├── .gitattributes ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── Prediction-and-Submission.ipynb ├── README.md ├── Training.ipynb ├── assets ├── 4k_video.gif ├── detection_activations.png ├── detection_anchors.png ├── detection_final.png ├── detection_histograms.png ├── detection_masks.png ├── detection_refinement.png ├── detection_tensorboard.png ├── donuts.png ├── sheep.png └── street.png ├── coco.py ├── data └── _ ├── images ├── 1045023827_4ec3e8ba5c_z.jpg ├── 12283150_12d37e6389_z.jpg ├── 2383514521_1fc8d7b0de_z.jpg ├── 2502287818_41e4b0c4fb_z.jpg ├── 2516944023_d00345997d_z.jpg ├── 25691390_f9944f61b5_z.jpg ├── 262985539_1709e54576_z.jpg ├── 3132016470_c27baa00e8_z.jpg ├── 3627527276_6fe8cd9bfe_z.jpg ├── 3651581213_f81963d1dd_z.jpg ├── 3800883468_12af3c0b50_z.jpg ├── 3862500489_6fd195d183_z.jpg ├── 3878153025_8fde829928_z.jpg ├── 4410436637_7b0ca36ee7_z.jpg ├── 4782628554_668bc31826_z.jpg ├── 5951960966_d4e1cda5d0_z.jpg ├── 6584515005_fce9cec486_z.jpg ├── 6821351586_59aa0dc110_z.jpg ├── 7581246086_cf7bbb7255_z.jpg ├── 7933423348_c30bd9bd4e_z.jpg ├── 8053677163_d4c8f416be_z.jpg ├── 8239308689_efa6c11b08_z.jpg ├── 8433365521_9252889f9a_z.jpg ├── 8512296263_5fc5458e20_z.jpg ├── 8699757338_c3941051b6_z.jpg ├── 8734543718_37f6b8bd45_z.jpg ├── 8829708882_48f263491e_z.jpg ├── 9118579087_f9ffa19e63_z.jpg ├── 9247489789_132c0d534a_z.jpg ├── loss-plot.png └── predictions.png ├── mrcnn ├── __init__.py ├── cocoeval.py ├── config.py ├── dataset.py ├── evaluate.py ├── model.py ├── parallel_model.py ├── utils.py └── visualize.py ├── requirements.txt ├── samples ├── balloon │ ├── README.md │ ├── balloon.py │ ├── inspect_balloon_data.ipynb │ └── inspect_balloon_model.ipynb ├── coco │ ├── coco.py │ ├── inspect_data.ipynb │ ├── inspect_model.ipynb │ └── inspect_weights.ipynb ├── demo.ipynb └── shapes │ ├── shapes.py │ └── train_shapes.ipynb ├── setup.cfg └── setup.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.h5 filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Data files and directories common in repo root 2 | datasets/ 3 | logs/ 4 | *.h5 5 | results/ 6 | temp/ 7 | test/ 8 | 9 | *.ipynb 10 | data/* 11 | 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | 17 | # Distribution / packaging 18 | .Python 19 | env/ 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # VS Studio Code 41 | .vscode 42 | 43 | # PyCharm 44 | .idea/ 45 | 46 | # Dropbox 47 | .dropbox.attr 48 | 49 | # Jupyter Notebook 50 | .ipynb_checkpoints 51 | 52 | # pyenv 53 | .python-version 54 | 55 | # dotenv 56 | .env 57 | 58 | # virtualenv 59 | .venv 60 | venv/ 61 | ENV/ 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mask R-CNN 2 | 3 | The MIT License (MIT) 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | include requirements.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![CrowdAI-Logo](https://github.com/crowdAI/crowdai/raw/master/app/assets/images/misc/crowdai-logo-smile.svg?sanitize=true) 2 | 3 | >The research paper summarizing the corresponding benchmark and associated solutions can be found here : [Deep Learning for Understanding Satellite Imagery: An Experimental Survey](https://www.frontiersin.org/articles/10.3389/frai.2020.534696/full) 4 | 5 | # crowdAI Mapping Challenge : Baseline 6 | 7 | This repository contains the details of implementation of the Baseline submission using [Mask RCNN](https://arxiv.org/abs/1703.06870) which obtains a score of `[AP(IoU=0.5)=0.697 ; AR(IoU=0.5)=0.479]` for the [crowdAI Mapping Challenge](https://www.crowdai.org/challenges/mapping-challenge). 8 | 9 | # Installation 10 | ``` 11 | git clone https://github.com/crowdai/crowdai-mapping-challenge-mask-rcnn 12 | cd crowdai-mapping-challenge-mask-rcnn 13 | # Please ensure that you use python3.6 14 | pip install -r requirements.txt 15 | python setup.py install 16 | ``` 17 | 18 | # Notebooks 19 | Please follow the instructions on the relevant notebooks for the training, prediction and submissions. 20 | 21 | * [Training](Training.ipynb) 22 | * [Prediction and Submission](Prediction-and-Submission.ipynb) 23 | (_pre-trained weights for baseline submission included_) 24 | 25 | # Results 26 | ![sample_predictions](images/predictions.png) 27 | 28 | # Citation 29 | ``` 30 | @article{mohanty2020deep, 31 | title={Deep Learning for Understanding Satellite Imagery: An Experimental Survey}, 32 | author={Mohanty, Sharada Prasanna and Czakon, Jakub and Kaczmarek, Kamil A and Pyskir, Andrzej and Tarasiewicz, Piotr and Kunwar, Saket and Rohrbach, Janick and Luo, Dave and Prasad, Manjunath and Fleer, Sascha and others}, 33 | journal={Frontiers in Artificial Intelligence}, 34 | volume={3}, 35 | year={2020}, 36 | publisher={Frontiers Media SA} 37 | } 38 | 39 | @misc{crowdAIMappingChallengeBaseline2018, 40 | author = {Mohanty, Sharada Prasanna}, 41 | title = {CrowdAI Mapping Challenge 2018 : Baseline with Mask RCNN}, 42 | year = {2018}, 43 | publisher = {GitHub}, 44 | journal = {GitHub repository}, 45 | howpublished = {\url{https://github.com/crowdai/crowdai-mapping-challenge-mask-rcnn}}, 46 | commit = {bac1cf19adbc9d078122c6933da6f808c4ee590d} 47 | } 48 | ``` 49 | # Acknowledgements 50 | This repository heavily reuses code from the amazing [tensorflow Mask RCNN implementation](https://github.com/matterport/Mask_RCNN) by [@waleedka](https://github.com/waleedka/). 51 | Many thanks to all the contributors of that project. 52 | You are encouraged to checkout [https://github.com/matterport/Mask_RCNN](https://github.com/matterport/Mask_RCNN) for documentation on many other aspects of this code. 53 | 54 | # Author 55 | Sharada Mohanty [sharada.mohanty@epfl.ch](sharada.mohanty@epfl.ch) 56 | -------------------------------------------------------------------------------- /Training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# mapping-challenge-mask_rcnn-training\n", 8 | "![CrowdAI-Logo](https://github.com/crowdAI/crowdai/raw/master/app/assets/images/misc/crowdai-logo-smile.svg?sanitize=true)\n", 9 | "\n", 10 | "This notebook contains the baseline code for the training a vanilla [Mask RCNN](https://arxiv.org/abs/1703.06870) model for the [crowdAI Mapping Challenge](https://www.crowdai.org/challenges/mapping-challenge).\n", 11 | "\n", 12 | "This code is adapted from the [Mask RCNN]() tensorflow implementation available here : [https://github.com/matterport/Mask_RCNN](https://github.com/matterport/Mask_RCNN).\n", 13 | "\n", 14 | "First we begin by importing all the necessary dependencies : " 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 12, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import os\n", 26 | "import sys\n", 27 | "import time\n", 28 | "import numpy as np\n", 29 | "\n", 30 | "# Download and install the Python COCO tools from https://github.com/waleedka/coco\n", 31 | "# That's a fork from the original https://github.com/pdollar/coco with a bug\n", 32 | "# fix for Python 3.\n", 33 | "# I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50\n", 34 | "# If the PR is merged then use the original repo.\n", 35 | "# Note: Edit PythonAPI/Makefile and replace \"python\" with \"python3\".\n", 36 | "# \n", 37 | "# A quick one liner to install the library \n", 38 | "# !pip install git+https://github.com/waleedka/coco.git#subdirectory=PythonAPI\n", 39 | "\n", 40 | "from pycocotools.coco import COCO\n", 41 | "from pycocotools.cocoeval import COCOeval\n", 42 | "from pycocotools import mask as maskUtils\n", 43 | "\n", 44 | "from mrcnn.evaluate import build_coco_results, evaluate_coco\n", 45 | "from mrcnn.dataset import MappingChallengeDataset\n", 46 | "\n", 47 | "import zipfile\n", 48 | "import urllib.request\n", 49 | "import shutil\n" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "## Dataset location \n", 57 | "Now we have to download all the files in the datasets section and untar them to have the following structure :\n", 58 | "```\n", 59 | "├── data\n", 60 | "| ├── pretrained_weights.h5 (already included in this repository)\n", 61 | "│   ├── test\n", 62 | "│   │   └── images/\n", 63 | "│ │ └── annotation.json\n", 64 | "│   ├── train\n", 65 | "│   │   └── images/\n", 66 | "│ │ └── annotation.json\n", 67 | "│   └── val\n", 68 | "│   └── images/\n", 69 | "│ └── annotation.json\n", 70 | "```\n", 71 | "Note that the `pretrained_weights.h5` (available at [https://www.crowdai.org/challenges/mapping-challenge/dataset_files](https://www.crowdai.org/challenges/mapping-challenge/dataset_files)) are the weights used for the baseline submission, and are obtained by running the learning schedule mentioned later in the experiment. In the said experiment, the initial weights used can be found [here](https://github.com/matterport/Mask_RCNN/releases/download/v2.1/mask_rcnn_balloon.h5). " 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 19, 77 | "metadata": { 78 | "collapsed": true 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "ROOT_DIR = os.getcwd()\n", 83 | "\n", 84 | "# Import Mask RCNN\n", 85 | "sys.path.append(ROOT_DIR) # To find local version of the library\n", 86 | "from mrcnn.config import Config\n", 87 | "from mrcnn import model as modellib, utils\n", 88 | "\n", 89 | "\n", 90 | "PRETRAINED_MODEL_PATH = os.path.join(ROOT_DIR,\"data/\" \"pretrained_weights.h5\")\n", 91 | "LOGS_DIRECTORY = os.path.join(ROOT_DIR, \"logs\")" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "## Experiment Configuration" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 22, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "name": "stdout", 108 | "output_type": "stream", 109 | "text": [ 110 | "\n", 111 | "Configurations:\n", 112 | "BACKBONE resnet101\n", 113 | "BACKBONE_STRIDES [4, 8, 16, 32, 64]\n", 114 | "BATCH_SIZE 5\n", 115 | "BBOX_STD_DEV [0.1 0.1 0.2 0.2]\n", 116 | "DETECTION_MAX_INSTANCES 100\n", 117 | "DETECTION_MIN_CONFIDENCE 0.7\n", 118 | "DETECTION_NMS_THRESHOLD 0.3\n", 119 | "GPU_COUNT 1\n", 120 | "GRADIENT_CLIP_NORM 5.0\n", 121 | "IMAGES_PER_GPU 5\n", 122 | "IMAGE_MAX_DIM 320\n", 123 | "IMAGE_META_SIZE 14\n", 124 | "IMAGE_MIN_DIM 320\n", 125 | "IMAGE_RESIZE_MODE square\n", 126 | "IMAGE_SHAPE [320 320 3]\n", 127 | "LEARNING_MOMENTUM 0.9\n", 128 | "LEARNING_RATE 0.001\n", 129 | "MASK_POOL_SIZE 14\n", 130 | "MASK_SHAPE [28, 28]\n", 131 | "MAX_GT_INSTANCES 100\n", 132 | "MEAN_PIXEL [123.7 116.8 103.9]\n", 133 | "MINI_MASK_SHAPE (56, 56)\n", 134 | "NAME crowdai-mapping-challenge\n", 135 | "NUM_CLASSES 2\n", 136 | "POOL_SIZE 7\n", 137 | "POST_NMS_ROIS_INFERENCE 1000\n", 138 | "POST_NMS_ROIS_TRAINING 2000\n", 139 | "ROI_POSITIVE_RATIO 0.33\n", 140 | "RPN_ANCHOR_RATIOS [0.5, 1, 2]\n", 141 | "RPN_ANCHOR_SCALES (32, 64, 128, 256, 512)\n", 142 | "RPN_ANCHOR_STRIDE 1\n", 143 | "RPN_BBOX_STD_DEV [0.1 0.1 0.2 0.2]\n", 144 | "RPN_NMS_THRESHOLD 0.7\n", 145 | "RPN_TRAIN_ANCHORS_PER_IMAGE 256\n", 146 | "STEPS_PER_EPOCH 1000\n", 147 | "TRAIN_BN False\n", 148 | "TRAIN_ROIS_PER_IMAGE 200\n", 149 | "USE_MINI_MASK True\n", 150 | "USE_RPN_ROIS True\n", 151 | "VALIDATION_STEPS 50\n", 152 | "WEIGHT_DECAY 0.0001\n", 153 | "\n", 154 | "\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "class MappingChallengeConfig(Config):\n", 160 | " \"\"\"Configuration for training on data in MS COCO format.\n", 161 | " Derives from the base Config class and overrides values specific\n", 162 | " to the COCO dataset.\n", 163 | " \"\"\"\n", 164 | " # Give the configuration a recognizable name\n", 165 | " NAME = \"crowdai-mapping-challenge\"\n", 166 | "\n", 167 | " # We use a GPU with 12GB memory, which can fit two images.\n", 168 | " # Adjust down if you use a smaller GPU.\n", 169 | " IMAGES_PER_GPU = 5\n", 170 | "\n", 171 | " # Uncomment to train on 8 GPUs (default is 1)\n", 172 | " GPU_COUNT = 1\n", 173 | "\n", 174 | " # Number of classes (including background)\n", 175 | " NUM_CLASSES = 1 + 1 # 1 Backgroun + 1 Building\n", 176 | "\n", 177 | " STEPS_PER_EPOCH=1000\n", 178 | " VALIDATION_STEPS=50\n", 179 | "\n", 180 | "\n", 181 | " IMAGE_MAX_DIM=320\n", 182 | " IMAGE_MIN_DIM=320\n", 183 | "\n", 184 | "config = MappingChallengeConfig()\n", 185 | "config.display()" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "## Instantiate Model" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 25, 198 | "metadata": { 199 | "collapsed": true 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "model = modellib.MaskRCNN(mode=\"training\", config=config, model_dir=LOGS_DIRECTORY)\n", 204 | "# Load pretrained weights\n", 205 | "model_path = PRETRAINED_MODEL_PATH\n", 206 | "model.load_weights(model_path, by_name=True)" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "## Load Training and Validation Dataset" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 32, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "name": "stdout", 223 | "output_type": "stream", 224 | "text": [ 225 | "Annotation Path data/train/annotation-small.json\n", 226 | "Image Dir data/train/images\n", 227 | "loading annotations into memory...\n", 228 | "Done (t=1.12s)\n", 229 | "creating index...\n", 230 | "index created!\n", 231 | "Annotation Path data/val/annotation-small.json\n", 232 | "Image Dir data/val/images\n", 233 | "loading annotations into memory...\n", 234 | "Done (t=0.20s)\n", 235 | "creating index...\n", 236 | "index created!\n" 237 | ] 238 | } 239 | ], 240 | "source": [ 241 | "# Load training dataset\n", 242 | "dataset_train = MappingChallengeDataset()\n", 243 | "dataset_train.load_dataset(dataset_dir=os.path.join(\"data\", \"train\"), load_small=True)\n", 244 | "dataset_train.prepare()\n", 245 | "\n", 246 | "# Load validation dataset\n", 247 | "dataset_val = MappingChallengeDataset()\n", 248 | "val_coco = dataset_val.load_dataset(dataset_dir=os.path.join(\"data\", \"val\"), load_small=True, return_coco=True)\n", 249 | "dataset_val.prepare()" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "## Train" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [ 265 | "# *** This training schedule is an example. Update to your needs ***\n", 266 | "\n", 267 | "# Training - Stage 1\n", 268 | "print(\"Training network heads\")\n", 269 | "model.train(dataset_train, dataset_val,\n", 270 | " learning_rate=config.LEARNING_RATE,\n", 271 | " epochs=40,\n", 272 | " layers='heads')\n", 273 | "\n", 274 | "# Training - Stage 2\n", 275 | "# Finetune layers from ResNet stage 4 and up\n", 276 | "print(\"Fine tune Resnet stage 4 and up\")\n", 277 | "model.train(dataset_train, dataset_val,\n", 278 | " learning_rate=config.LEARNING_RATE,\n", 279 | " epochs=120,\n", 280 | " layers='4+')\n", 281 | "\n", 282 | "# Training - Stage 3\n", 283 | "# Fine tune all layers\n", 284 | "print(\"Fine tune all layers\")\n", 285 | "model.train(dataset_train, dataset_val,\n", 286 | " learning_rate=config.LEARNING_RATE / 10,\n", 287 | " epochs=160,\n", 288 | " layers='all')" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "Now you can monitor the training by running : \n", 296 | "```\n", 297 | "tensorboard --logdir=logs/[path-to-your-experiment-logdir]\n", 298 | "```\n", 299 | "and if everything works great, you should see something like : \n", 300 | "![loss-plot](images/loss-plot.png)\n", 301 | "\n", 302 | "# Author\n", 303 | "Sharada Mohanty [sharada.mohanty@epfl.ch](sharada.mohanty@epfl.ch)" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": null, 309 | "metadata": { 310 | "collapsed": true 311 | }, 312 | "outputs": [], 313 | "source": [] 314 | } 315 | ], 316 | "metadata": { 317 | "kernelspec": { 318 | "display_name": "Python 2", 319 | "language": "python", 320 | "name": "python2" 321 | }, 322 | "language_info": { 323 | "codemirror_mode": { 324 | "name": "ipython", 325 | "version": 2 326 | }, 327 | "file_extension": ".py", 328 | "mimetype": "text/x-python", 329 | "name": "python", 330 | "nbconvert_exporter": "python", 331 | "pygments_lexer": "ipython2", 332 | "version": "2.7.14" 333 | } 334 | }, 335 | "nbformat": 4, 336 | "nbformat_minor": 2 337 | } 338 | -------------------------------------------------------------------------------- /assets/4k_video.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/4k_video.gif -------------------------------------------------------------------------------- /assets/detection_activations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/detection_activations.png -------------------------------------------------------------------------------- /assets/detection_anchors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/detection_anchors.png -------------------------------------------------------------------------------- /assets/detection_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/detection_final.png -------------------------------------------------------------------------------- /assets/detection_histograms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/detection_histograms.png -------------------------------------------------------------------------------- /assets/detection_masks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/detection_masks.png -------------------------------------------------------------------------------- /assets/detection_refinement.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/detection_refinement.png -------------------------------------------------------------------------------- /assets/detection_tensorboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/detection_tensorboard.png -------------------------------------------------------------------------------- /assets/donuts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/donuts.png -------------------------------------------------------------------------------- /assets/sheep.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/sheep.png -------------------------------------------------------------------------------- /assets/street.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/street.png -------------------------------------------------------------------------------- /coco.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Configurations and data loading code for MS COCO. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | ------------------------------------------------------------ 10 | 11 | Usage: import the module (see Jupyter notebooks for examples), or run from 12 | the command line as such: 13 | 14 | # Train a new model starting from pre-trained COCO weights 15 | python3 coco.py train --dataset=/path/to/coco/ --model=coco 16 | 17 | # Train a new model starting from ImageNet weights 18 | python3 coco.py train --dataset=/path/to/coco/ --model=imagenet 19 | 20 | # Continue training a model that you had trained earlier 21 | python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5 22 | 23 | # Continue training the last model you trained 24 | python3 coco.py train --dataset=/path/to/coco/ --model=last 25 | 26 | # Run COCO evaluatoin on the last model you trained 27 | python3 coco.py evaluate --dataset=/path/to/coco/ --model=last 28 | """ 29 | 30 | import os 31 | import time 32 | import numpy as np 33 | 34 | # Download and install the Python COCO tools from https://github.com/waleedka/coco 35 | # 36 | # pip install git+https://github.com/waleedka/coco.git#subdirectory=PythonAPI 37 | # 38 | # That's a fork from the original https://github.com/pdollar/coco with a bug 39 | # fix for Python 3. 40 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50 41 | # If the PR is merged then use the original repo. 42 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3". 43 | from pycocotools.coco import COCO 44 | from pycocotools.cocoeval import COCOeval 45 | from pycocotools import mask as maskUtils 46 | 47 | import zipfile 48 | import urllib.request 49 | import shutil 50 | 51 | from mrcnn.config import Config 52 | import mrcnn.utils as utils 53 | import mrcnn.model as modellib 54 | 55 | # Root directory of the project 56 | ROOT_DIR = os.getcwd() 57 | 58 | # Path to trained weights file 59 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") 60 | 61 | # Directory to save logs and model checkpoints, if not provided 62 | # through the command line argument --logs 63 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 64 | DEFAULT_DATASET_YEAR = "2014" 65 | 66 | ############################################################ 67 | # Configurations 68 | ############################################################ 69 | 70 | 71 | class CocoConfig(Config): 72 | """Configuration for training on MS COCO. 73 | Derives from the base Config class and overrides values specific 74 | to the COCO dataset. 75 | """ 76 | # Give the configuration a recognizable name 77 | NAME = "coco" 78 | 79 | # We use a GPU with 12GB memory, which can fit two images. 80 | # Adjust down if you use a smaller GPU. 81 | IMAGES_PER_GPU = 2 82 | 83 | # Uncomment to train on 8 GPUs (default is 1) 84 | # GPU_COUNT = 8 85 | 86 | # Number of classes (including background) 87 | NUM_CLASSES = 1 + 80 # COCO has 80 classes 88 | 89 | 90 | ############################################################ 91 | # Dataset 92 | ############################################################ 93 | 94 | class CocoDataset(utils.Dataset): 95 | def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None, 96 | class_map=None, return_coco=False, auto_download=False): 97 | """Load a subset of the COCO dataset. 98 | dataset_dir: The root directory of the COCO dataset. 99 | subset: What to load (train, val, minival, valminusminival) 100 | year: What dataset year to load (2014, 2017) as a string, not an integer 101 | class_ids: If provided, only loads images that have the given classes. 102 | class_map: TODO: Not implemented yet. Supports maping classes from 103 | different datasets to the same class ID. 104 | return_coco: If True, returns the COCO object. 105 | auto_download: Automatically download and unzip MS-COCO images and annotations 106 | """ 107 | 108 | if auto_download is True: 109 | self.auto_download(dataset_dir, subset, year) 110 | 111 | coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year)) 112 | if subset == "minival" or subset == "valminusminival": 113 | subset = "val" 114 | image_dir = "{}/{}{}".format(dataset_dir, subset, year) 115 | 116 | # Load all classes or a subset? 117 | if not class_ids: 118 | # All classes 119 | class_ids = sorted(coco.getCatIds()) 120 | 121 | # All images or a subset? 122 | if class_ids: 123 | image_ids = [] 124 | for id in class_ids: 125 | image_ids.extend(list(coco.getImgIds(catIds=[id]))) 126 | # Remove duplicates 127 | image_ids = list(set(image_ids)) 128 | else: 129 | # All images 130 | image_ids = list(coco.imgs.keys()) 131 | 132 | # Add classes 133 | for i in class_ids: 134 | self.add_class("coco", i, coco.loadCats(i)[0]["name"]) 135 | 136 | # Add images 137 | for i in image_ids: 138 | self.add_image( 139 | "coco", image_id=i, 140 | path=os.path.join(image_dir, coco.imgs[i]['file_name']), 141 | width=coco.imgs[i]["width"], 142 | height=coco.imgs[i]["height"], 143 | annotations=coco.loadAnns(coco.getAnnIds( 144 | imgIds=[i], catIds=class_ids, iscrowd=None))) 145 | if return_coco: 146 | return coco 147 | 148 | def auto_download(self, dataDir, dataType, dataYear): 149 | """Download the COCO dataset/annotations if requested. 150 | dataDir: The root directory of the COCO dataset. 151 | dataType: What to load (train, val, minival, valminusminival) 152 | dataYear: What dataset year to load (2014, 2017) as a string, not an integer 153 | Note: 154 | For 2014, use "train", "val", "minival", or "valminusminival" 155 | For 2017, only "train" and "val" annotations are available 156 | """ 157 | 158 | # Setup paths and file names 159 | if dataType == "minival" or dataType == "valminusminival": 160 | imgDir = "{}/{}{}".format(dataDir, "val", dataYear) 161 | imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear) 162 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear) 163 | else: 164 | imgDir = "{}/{}{}".format(dataDir, dataType, dataYear) 165 | imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear) 166 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear) 167 | # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL) 168 | 169 | # Create main folder if it doesn't exist yet 170 | if not os.path.exists(dataDir): 171 | os.makedirs(dataDir) 172 | 173 | # Download images if not available locally 174 | if not os.path.exists(imgDir): 175 | os.makedirs(imgDir) 176 | print("Downloading images to " + imgZipFile + " ...") 177 | with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out: 178 | shutil.copyfileobj(resp, out) 179 | print("... done downloading.") 180 | print("Unzipping " + imgZipFile) 181 | with zipfile.ZipFile(imgZipFile, "r") as zip_ref: 182 | zip_ref.extractall(dataDir) 183 | print("... done unzipping") 184 | print("Will use images in " + imgDir) 185 | 186 | # Setup annotations data paths 187 | annDir = "{}/annotations".format(dataDir) 188 | if dataType == "minival": 189 | annZipFile = "{}/instances_minival2014.json.zip".format(dataDir) 190 | annFile = "{}/instances_minival2014.json".format(annDir) 191 | annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0" 192 | unZipDir = annDir 193 | elif dataType == "valminusminival": 194 | annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir) 195 | annFile = "{}/instances_valminusminival2014.json".format(annDir) 196 | annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0" 197 | unZipDir = annDir 198 | else: 199 | annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear) 200 | annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear) 201 | annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear) 202 | unZipDir = dataDir 203 | # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL) 204 | 205 | # Download annotations if not available locally 206 | if not os.path.exists(annDir): 207 | os.makedirs(annDir) 208 | if not os.path.exists(annFile): 209 | if not os.path.exists(annZipFile): 210 | print("Downloading zipped annotations to " + annZipFile + " ...") 211 | with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out: 212 | shutil.copyfileobj(resp, out) 213 | print("... done downloading.") 214 | print("Unzipping " + annZipFile) 215 | with zipfile.ZipFile(annZipFile, "r") as zip_ref: 216 | zip_ref.extractall(unZipDir) 217 | print("... done unzipping") 218 | print("Will use annotations in " + annFile) 219 | 220 | def load_mask(self, image_id): 221 | """Load instance masks for the given image. 222 | 223 | Different datasets use different ways to store masks. This 224 | function converts the different mask format to one format 225 | in the form of a bitmap [height, width, instances]. 226 | 227 | Returns: 228 | masks: A bool array of shape [height, width, instance count] with 229 | one mask per instance. 230 | class_ids: a 1D array of class IDs of the instance masks. 231 | """ 232 | # If not a COCO image, delegate to parent class. 233 | image_info = self.image_info[image_id] 234 | if image_info["source"] != "coco": 235 | return super(CocoDataset, self).load_mask(image_id) 236 | 237 | instance_masks = [] 238 | class_ids = [] 239 | annotations = self.image_info[image_id]["annotations"] 240 | # Build mask of shape [height, width, instance_count] and list 241 | # of class IDs that correspond to each channel of the mask. 242 | for annotation in annotations: 243 | class_id = self.map_source_class_id( 244 | "coco.{}".format(annotation['category_id'])) 245 | if class_id: 246 | m = self.annToMask(annotation, image_info["height"], 247 | image_info["width"]) 248 | # Some objects are so small that they're less than 1 pixel area 249 | # and end up rounded out. Skip those objects. 250 | if m.max() < 1: 251 | continue 252 | # Is it a crowd? If so, use a negative class ID. 253 | if annotation['iscrowd']: 254 | # Use negative class ID for crowds 255 | class_id *= -1 256 | # For crowd masks, annToMask() sometimes returns a mask 257 | # smaller than the given dimensions. If so, resize it. 258 | if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]: 259 | m = np.ones([image_info["height"], image_info["width"]], dtype=bool) 260 | instance_masks.append(m) 261 | class_ids.append(class_id) 262 | 263 | # Pack instance masks into an array 264 | if class_ids: 265 | mask = np.stack(instance_masks, axis=2) 266 | class_ids = np.array(class_ids, dtype=np.int32) 267 | return mask, class_ids 268 | else: 269 | # Call super class to return an empty mask 270 | return super(CocoDataset, self).load_mask(image_id) 271 | 272 | def image_reference(self, image_id): 273 | """Return a link to the image in the COCO Website.""" 274 | info = self.image_info[image_id] 275 | if info["source"] == "coco": 276 | return "http://cocodataset.org/#explore?id={}".format(info["id"]) 277 | else: 278 | super(CocoDataset, self).image_reference(image_id) 279 | 280 | # The following two functions are from pycocotools with a few changes. 281 | 282 | def annToRLE(self, ann, height, width): 283 | """ 284 | Convert annotation which can be polygons, uncompressed RLE to RLE. 285 | :return: binary mask (numpy 2D array) 286 | """ 287 | segm = ann['segmentation'] 288 | if isinstance(segm, list): 289 | # polygon -- a single object might consist of multiple parts 290 | # we merge all parts into one mask rle code 291 | rles = maskUtils.frPyObjects(segm, height, width) 292 | rle = maskUtils.merge(rles) 293 | elif isinstance(segm['counts'], list): 294 | # uncompressed RLE 295 | rle = maskUtils.frPyObjects(segm, height, width) 296 | else: 297 | # rle 298 | rle = ann['segmentation'] 299 | return rle 300 | 301 | def annToMask(self, ann, height, width): 302 | """ 303 | Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. 304 | :return: binary mask (numpy 2D array) 305 | """ 306 | rle = self.annToRLE(ann, height, width) 307 | m = maskUtils.decode(rle) 308 | return m 309 | 310 | 311 | ############################################################ 312 | # COCO Evaluation 313 | ############################################################ 314 | 315 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks): 316 | """Arrange resutls to match COCO specs in http://cocodataset.org/#format 317 | """ 318 | # If no results, return an empty list 319 | if rois is None: 320 | return [] 321 | 322 | results = [] 323 | for image_id in image_ids: 324 | # Loop through detections 325 | for i in range(rois.shape[0]): 326 | class_id = class_ids[i] 327 | score = scores[i] 328 | bbox = np.around(rois[i], 1) 329 | mask = masks[:, :, i] 330 | 331 | result = { 332 | "image_id": image_id, 333 | "category_id": dataset.get_source_class_id(class_id, "coco"), 334 | "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]], 335 | "score": score, 336 | "segmentation": maskUtils.encode(np.asfortranarray(mask)) 337 | } 338 | results.append(result) 339 | return results 340 | 341 | 342 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None): 343 | """Runs official COCO evaluation. 344 | dataset: A Dataset object with valiadtion data 345 | eval_type: "bbox" or "segm" for bounding box or segmentation evaluation 346 | limit: if not 0, it's the number of images to use for evaluation 347 | """ 348 | # Pick COCO images from the dataset 349 | image_ids = image_ids or dataset.image_ids 350 | 351 | # Limit to a subset 352 | if limit: 353 | image_ids = image_ids[:limit] 354 | 355 | # Get corresponding COCO image IDs. 356 | coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids] 357 | 358 | t_prediction = 0 359 | t_start = time.time() 360 | 361 | results = [] 362 | for i, image_id in enumerate(image_ids): 363 | # Load image 364 | image = dataset.load_image(image_id) 365 | 366 | # Run detection 367 | t = time.time() 368 | r = model.detect([image], verbose=0)[0] 369 | t_prediction += (time.time() - t) 370 | 371 | # Convert results to COCO format 372 | image_results = build_coco_results(dataset, coco_image_ids[i:i + 1], 373 | r["rois"], r["class_ids"], 374 | r["scores"], r["masks"]) 375 | results.extend(image_results) 376 | 377 | # Load results. This modifies results with additional attributes. 378 | coco_results = coco.loadRes(results) 379 | 380 | # Evaluate 381 | cocoEval = COCOeval(coco, coco_results, eval_type) 382 | cocoEval.params.imgIds = coco_image_ids 383 | cocoEval.evaluate() 384 | cocoEval.accumulate() 385 | cocoEval.summarize() 386 | 387 | print("Prediction time: {}. Average {}/image".format( 388 | t_prediction, t_prediction / len(image_ids))) 389 | print("Total time: ", time.time() - t_start) 390 | 391 | 392 | ############################################################ 393 | # Training 394 | ############################################################ 395 | 396 | 397 | if __name__ == '__main__': 398 | import argparse 399 | 400 | # Parse command line arguments 401 | parser = argparse.ArgumentParser( 402 | description='Train Mask R-CNN on MS COCO.') 403 | parser.add_argument("command", 404 | metavar="", 405 | help="'train' or 'evaluate' on MS COCO") 406 | parser.add_argument('--dataset', required=True, 407 | metavar="/path/to/coco/", 408 | help='Directory of the MS-COCO dataset') 409 | parser.add_argument('--year', required=False, 410 | default=DEFAULT_DATASET_YEAR, 411 | metavar="", 412 | help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)') 413 | parser.add_argument('--model', required=True, 414 | metavar="/path/to/weights.h5", 415 | help="Path to weights .h5 file or 'coco'") 416 | parser.add_argument('--logs', required=False, 417 | default=DEFAULT_LOGS_DIR, 418 | metavar="/path/to/logs/", 419 | help='Logs and checkpoints directory (default=logs/)') 420 | parser.add_argument('--limit', required=False, 421 | default=500, 422 | metavar="", 423 | help='Images to use for evaluation (default=500)') 424 | parser.add_argument('--download', required=False, 425 | default=False, 426 | metavar="", 427 | help='Automatically download and unzip MS-COCO files (default=False)', 428 | type=bool) 429 | args = parser.parse_args() 430 | print("Command: ", args.command) 431 | print("Model: ", args.model) 432 | print("Dataset: ", args.dataset) 433 | print("Year: ", args.year) 434 | print("Logs: ", args.logs) 435 | print("Auto Download: ", args.download) 436 | 437 | # Configurations 438 | if args.command == "train": 439 | config = CocoConfig() 440 | else: 441 | class InferenceConfig(CocoConfig): 442 | # Set batch size to 1 since we'll be running inference on 443 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 444 | GPU_COUNT = 1 445 | IMAGES_PER_GPU = 1 446 | DETECTION_MIN_CONFIDENCE = 0 447 | config = InferenceConfig() 448 | config.display() 449 | 450 | # Create model 451 | if args.command == "train": 452 | model = modellib.MaskRCNN(mode="training", config=config, 453 | model_dir=args.logs) 454 | else: 455 | model = modellib.MaskRCNN(mode="inference", config=config, 456 | model_dir=args.logs) 457 | 458 | # Select weights file to load 459 | if args.model.lower() == "coco": 460 | model_path = COCO_MODEL_PATH 461 | elif args.model.lower() == "last": 462 | # Find last trained weights 463 | model_path = model.find_last()[1] 464 | elif args.model.lower() == "imagenet": 465 | # Start from ImageNet trained weights 466 | model_path = model.get_imagenet_weights() 467 | else: 468 | model_path = args.model 469 | 470 | # Load weights 471 | print("Loading weights ", model_path) 472 | model.load_weights(model_path, by_name=True) 473 | 474 | # Train or evaluate 475 | if args.command == "train": 476 | # Training dataset. Use the training set and 35K from the 477 | # validation set, as as in the Mask RCNN paper. 478 | dataset_train = CocoDataset() 479 | dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download) 480 | dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download) 481 | dataset_train.prepare() 482 | 483 | # Validation dataset 484 | dataset_val = CocoDataset() 485 | dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download) 486 | dataset_val.prepare() 487 | 488 | # *** This training schedule is an example. Update to your needs *** 489 | 490 | # Training - Stage 1 491 | print("Training network heads") 492 | model.train(dataset_train, dataset_val, 493 | learning_rate=config.LEARNING_RATE, 494 | epochs=40, 495 | layers='heads') 496 | 497 | # Training - Stage 2 498 | # Finetune layers from ResNet stage 4 and up 499 | print("Fine tune Resnet stage 4 and up") 500 | model.train(dataset_train, dataset_val, 501 | learning_rate=config.LEARNING_RATE, 502 | epochs=120, 503 | layers='4+') 504 | 505 | # Training - Stage 3 506 | # Fine tune all layers 507 | print("Fine tune all layers") 508 | model.train(dataset_train, dataset_val, 509 | learning_rate=config.LEARNING_RATE / 10, 510 | epochs=160, 511 | layers='all') 512 | 513 | elif args.command == "evaluate": 514 | # Validation dataset 515 | dataset_val = CocoDataset() 516 | coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download) 517 | dataset_val.prepare() 518 | print("Running COCO evaluation on {} images.".format(args.limit)) 519 | evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit)) 520 | else: 521 | print("'{}' is not recognized. " 522 | "Use 'train' or 'evaluate'".format(args.command)) 523 | -------------------------------------------------------------------------------- /data/_: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/data/_ -------------------------------------------------------------------------------- /images/1045023827_4ec3e8ba5c_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/1045023827_4ec3e8ba5c_z.jpg -------------------------------------------------------------------------------- /images/12283150_12d37e6389_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/12283150_12d37e6389_z.jpg -------------------------------------------------------------------------------- /images/2383514521_1fc8d7b0de_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/2383514521_1fc8d7b0de_z.jpg -------------------------------------------------------------------------------- /images/2502287818_41e4b0c4fb_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/2502287818_41e4b0c4fb_z.jpg -------------------------------------------------------------------------------- /images/2516944023_d00345997d_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/2516944023_d00345997d_z.jpg -------------------------------------------------------------------------------- /images/25691390_f9944f61b5_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/25691390_f9944f61b5_z.jpg -------------------------------------------------------------------------------- /images/262985539_1709e54576_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/262985539_1709e54576_z.jpg -------------------------------------------------------------------------------- /images/3132016470_c27baa00e8_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/3132016470_c27baa00e8_z.jpg -------------------------------------------------------------------------------- /images/3627527276_6fe8cd9bfe_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/3627527276_6fe8cd9bfe_z.jpg -------------------------------------------------------------------------------- /images/3651581213_f81963d1dd_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/3651581213_f81963d1dd_z.jpg -------------------------------------------------------------------------------- /images/3800883468_12af3c0b50_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/3800883468_12af3c0b50_z.jpg -------------------------------------------------------------------------------- /images/3862500489_6fd195d183_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/3862500489_6fd195d183_z.jpg -------------------------------------------------------------------------------- /images/3878153025_8fde829928_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/3878153025_8fde829928_z.jpg -------------------------------------------------------------------------------- /images/4410436637_7b0ca36ee7_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/4410436637_7b0ca36ee7_z.jpg -------------------------------------------------------------------------------- /images/4782628554_668bc31826_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/4782628554_668bc31826_z.jpg -------------------------------------------------------------------------------- /images/5951960966_d4e1cda5d0_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/5951960966_d4e1cda5d0_z.jpg -------------------------------------------------------------------------------- /images/6584515005_fce9cec486_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/6584515005_fce9cec486_z.jpg -------------------------------------------------------------------------------- /images/6821351586_59aa0dc110_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/6821351586_59aa0dc110_z.jpg -------------------------------------------------------------------------------- /images/7581246086_cf7bbb7255_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/7581246086_cf7bbb7255_z.jpg -------------------------------------------------------------------------------- /images/7933423348_c30bd9bd4e_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/7933423348_c30bd9bd4e_z.jpg -------------------------------------------------------------------------------- /images/8053677163_d4c8f416be_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/8053677163_d4c8f416be_z.jpg -------------------------------------------------------------------------------- /images/8239308689_efa6c11b08_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/8239308689_efa6c11b08_z.jpg -------------------------------------------------------------------------------- /images/8433365521_9252889f9a_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/8433365521_9252889f9a_z.jpg -------------------------------------------------------------------------------- /images/8512296263_5fc5458e20_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/8512296263_5fc5458e20_z.jpg -------------------------------------------------------------------------------- /images/8699757338_c3941051b6_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/8699757338_c3941051b6_z.jpg -------------------------------------------------------------------------------- /images/8734543718_37f6b8bd45_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/8734543718_37f6b8bd45_z.jpg -------------------------------------------------------------------------------- /images/8829708882_48f263491e_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/8829708882_48f263491e_z.jpg -------------------------------------------------------------------------------- /images/9118579087_f9ffa19e63_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/9118579087_f9ffa19e63_z.jpg -------------------------------------------------------------------------------- /images/9247489789_132c0d534a_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/9247489789_132c0d534a_z.jpg -------------------------------------------------------------------------------- /images/loss-plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/loss-plot.png -------------------------------------------------------------------------------- /images/predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/predictions.png -------------------------------------------------------------------------------- /mrcnn/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /mrcnn/cocoeval.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import numpy as np 4 | import datetime 5 | import time 6 | from collections import defaultdict 7 | from pycocotools import mask as maskUtils 8 | import copy 9 | 10 | """ 11 | This script has been taken (and modified) from : 12 | https://github.com/crowdAI/coco/blob/master/PythonAPI/pycocotools/cocoeval.py 13 | """ 14 | 15 | 16 | class COCOeval: 17 | # Interface for evaluating detection on the Microsoft COCO dataset. 18 | # 19 | # The usage for CocoEval is as follows: 20 | # cocoGt=..., cocoDt=... # load dataset and results 21 | # E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object 22 | # E.params.recThrs = ...; # set parameters as desired 23 | # E.evaluate(); # run per image evaluation 24 | # E.accumulate(); # accumulate per image results 25 | # E.summarize(); # display summary metrics of results 26 | # For example usage see evalDemo.m and http://mscoco.org/. 27 | # 28 | # The evaluation parameters are as follows (defaults in brackets): 29 | # imgIds - [all] N img ids to use for evaluation 30 | # catIds - [all] K cat ids to use for evaluation 31 | # iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation 32 | # recThrs - [0:.01:1] R=101 recall thresholds for evaluation 33 | # areaRng - [...] A=4 object area ranges for evaluation 34 | # maxDets - [1 10 100] M=3 thresholds on max detections per image 35 | # iouType - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints' 36 | # iouType replaced the now DEPRECATED useSegm parameter. 37 | # useCats - [1] if true use category labels for evaluation 38 | # Note: if useCats=0 category labels are ignored as in proposal scoring. 39 | # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified. 40 | # 41 | # evaluate(): evaluates detections on every image and every category and 42 | # concats the results into the "evalImgs" with fields: 43 | # dtIds - [1xD] id for each of the D detections (dt) 44 | # gtIds - [1xG] id for each of the G ground truths (gt) 45 | # dtMatches - [TxD] matching gt id at each IoU or 0 46 | # gtMatches - [TxG] matching dt id at each IoU or 0 47 | # dtScores - [1xD] confidence of each dt 48 | # gtIgnore - [1xG] ignore flag for each gt 49 | # dtIgnore - [TxD] ignore flag for each dt at each IoU 50 | # 51 | # accumulate(): accumulates the per-image, per-category evaluation 52 | # results in "evalImgs" into the dictionary "eval" with fields: 53 | # params - parameters used for evaluation 54 | # date - date evaluation was performed 55 | # counts - [T,R,K,A,M] parameter dimensions (see above) 56 | # precision - [TxRxKxAxM] precision for every evaluation setting 57 | # recall - [TxKxAxM] max recall for every evaluation setting 58 | # Note: precision and recall==-1 for settings with no gt objects. 59 | # 60 | # See also coco, mask, pycocoDemo, pycocoEvalDemo 61 | # 62 | # Microsoft COCO Toolbox. version 2.0 63 | # Data, paper, and tutorials available at: http://mscoco.org/ 64 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 65 | # Licensed under the Simplified BSD License [see coco/license.txt] 66 | def __init__(self, cocoGt=None, cocoDt=None, iouType='segm'): 67 | ''' 68 | Initialize CocoEval using coco APIs for gt and dt 69 | :param cocoGt: coco object with ground truth annotations 70 | :param cocoDt: coco object with detection results 71 | :return: None 72 | ''' 73 | if not iouType: 74 | print('iouType not specified. use default iouType segm') 75 | self.cocoGt = cocoGt # ground truth COCO API 76 | self.cocoDt = cocoDt # detections COCO API 77 | self.params = {} # evaluation parameters 78 | self.evalImgs = defaultdict(list) # per-image per-category evaluation results [KxAxI] elements 79 | self.eval = {} # accumulated evaluation results 80 | self._gts = defaultdict(list) # gt for evaluation 81 | self._dts = defaultdict(list) # dt for evaluation 82 | self.params = Params(iouType=iouType) # parameters 83 | self._paramsEval = {} # parameters for evaluation 84 | self.stats = [] # result summarization 85 | self.ious = {} # ious between all gts and dts 86 | if not cocoGt is None: 87 | self.params.imgIds = sorted(cocoGt.getImgIds()) 88 | self.params.catIds = sorted(cocoGt.getCatIds()) 89 | 90 | 91 | def _prepare(self): 92 | ''' 93 | Prepare ._gts and ._dts for evaluation based on params 94 | :return: None 95 | ''' 96 | def _toMask(anns, coco): 97 | # modify ann['segmentation'] by reference 98 | for ann in anns: 99 | rle = coco.annToRLE(ann) 100 | ann['segmentation'] = rle 101 | p = self.params 102 | if p.useCats: 103 | gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) 104 | dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) 105 | else: 106 | gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds)) 107 | dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds)) 108 | 109 | # convert ground truth to mask if iouType == 'segm' 110 | if p.iouType == 'segm': 111 | _toMask(gts, self.cocoGt) 112 | _toMask(dts, self.cocoDt) 113 | # set ignore flag 114 | for gt in gts: 115 | gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0 116 | gt['ignore'] = 'iscrowd' in gt and gt['iscrowd'] 117 | if p.iouType == 'keypoints': 118 | gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore'] 119 | self._gts = defaultdict(list) # gt for evaluation 120 | self._dts = defaultdict(list) # dt for evaluation 121 | for gt in gts: 122 | self._gts[gt['image_id'], gt['category_id']].append(gt) 123 | for dt in dts: 124 | self._dts[dt['image_id'], dt['category_id']].append(dt) 125 | self.evalImgs = defaultdict(list) # per-image per-category evaluation results 126 | self.eval = {} # accumulated evaluation results 127 | 128 | def evaluate(self): 129 | ''' 130 | Run per image evaluation on given images and store results (a list of dict) in self.evalImgs 131 | :return: None 132 | ''' 133 | tic = time.time() 134 | print('Running per image evaluation...') 135 | p = self.params 136 | # add backward compatibility if useSegm is specified in params 137 | if not p.useSegm is None: 138 | p.iouType = 'segm' if p.useSegm == 1 else 'bbox' 139 | print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) 140 | print('Evaluate annotation type *{}*'.format(p.iouType)) 141 | p.imgIds = list(np.unique(p.imgIds)) 142 | if p.useCats: 143 | p.catIds = list(np.unique(p.catIds)) 144 | p.maxDets = sorted(p.maxDets) 145 | self.params=p 146 | 147 | self._prepare() 148 | # loop through images, area range, max detection number 149 | catIds = p.catIds if p.useCats else [-1] 150 | 151 | if p.iouType == 'segm' or p.iouType == 'bbox': 152 | computeIoU = self.computeIoU 153 | elif p.iouType == 'keypoints': 154 | computeIoU = self.computeOks 155 | self.ious = {(imgId, catId): computeIoU(imgId, catId) \ 156 | for imgId in p.imgIds 157 | for catId in catIds} 158 | 159 | evaluateImg = self.evaluateImg 160 | maxDet = p.maxDets[-1] 161 | self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet) 162 | for catId in catIds 163 | for areaRng in p.areaRng 164 | for imgId in p.imgIds 165 | ] 166 | self._paramsEval = copy.deepcopy(self.params) 167 | toc = time.time() 168 | print('DONE (t={:0.2f}s).'.format(toc-tic)) 169 | 170 | def computeIoU(self, imgId, catId): 171 | p = self.params 172 | if p.useCats: 173 | gt = self._gts[imgId,catId] 174 | dt = self._dts[imgId,catId] 175 | else: 176 | gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]] 177 | dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]] 178 | if len(gt) == 0 and len(dt) ==0: 179 | return [] 180 | inds = np.argsort([-d['score'] for d in dt], kind='mergesort') 181 | dt = [dt[i] for i in inds] 182 | if len(dt) > p.maxDets[-1]: 183 | dt=dt[0:p.maxDets[-1]] 184 | 185 | if p.iouType == 'segm': 186 | g = [g['segmentation'] for g in gt] 187 | d = [d['segmentation'] for d in dt] 188 | elif p.iouType == 'bbox': 189 | g = [g['bbox'] for g in gt] 190 | d = [d['bbox'] for d in dt] 191 | else: 192 | raise Exception('unknown iouType for iou computation') 193 | 194 | # compute iou between each dt and gt region 195 | iscrowd = [int(o['iscrowd']) for o in gt] 196 | ious = maskUtils.iou(d,g,iscrowd) 197 | return ious 198 | 199 | def computeOks(self, imgId, catId): 200 | p = self.params 201 | # dimention here should be Nxm 202 | gts = self._gts[imgId, catId] 203 | dts = self._dts[imgId, catId] 204 | inds = np.argsort([-d['score'] for d in dts], kind='mergesort') 205 | dts = [dts[i] for i in inds] 206 | if len(dts) > p.maxDets[-1]: 207 | dts = dts[0:p.maxDets[-1]] 208 | # if len(gts) == 0 and len(dts) == 0: 209 | if len(gts) == 0 or len(dts) == 0: 210 | return [] 211 | ious = np.zeros((len(dts), len(gts))) 212 | sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0 213 | vars = (sigmas * 2)**2 214 | k = len(sigmas) 215 | # compute oks between each detection and ground truth object 216 | for j, gt in enumerate(gts): 217 | # create bounds for ignore regions(double the gt bbox) 218 | g = np.array(gt['keypoints']) 219 | xg = g[0::3]; yg = g[1::3]; vg = g[2::3] 220 | k1 = np.count_nonzero(vg > 0) 221 | bb = gt['bbox'] 222 | x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2 223 | y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2 224 | for i, dt in enumerate(dts): 225 | d = np.array(dt['keypoints']) 226 | xd = d[0::3]; yd = d[1::3] 227 | if k1>0: 228 | # measure the per-keypoint distance if keypoints visible 229 | dx = xd - xg 230 | dy = yd - yg 231 | else: 232 | # measure minimum distance to keypoints in (x0,y0) & (x1,y1) 233 | z = np.zeros((k)) 234 | dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0) 235 | dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0) 236 | e = (dx**2 + dy**2) / vars / (gt['area']+np.spacing(1)) / 2 237 | if k1 > 0: 238 | e=e[vg > 0] 239 | ious[i, j] = np.sum(np.exp(-e)) / e.shape[0] 240 | return ious 241 | 242 | def evaluateImg(self, imgId, catId, aRng, maxDet): 243 | ''' 244 | perform evaluation for single category and image 245 | :return: dict (single image results) 246 | ''' 247 | p = self.params 248 | if p.useCats: 249 | gt = self._gts[imgId,catId] 250 | dt = self._dts[imgId,catId] 251 | else: 252 | gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]] 253 | dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]] 254 | if len(gt) == 0 and len(dt) ==0: 255 | return None 256 | 257 | for g in gt: 258 | if g['ignore'] or (g['area']aRng[1]): 259 | g['_ignore'] = 1 260 | else: 261 | g['_ignore'] = 0 262 | 263 | # sort dt highest score first, sort gt ignore last 264 | gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort') 265 | gt = [gt[i] for i in gtind] 266 | dtind = np.argsort([-d['score'] for d in dt], kind='mergesort') 267 | dt = [dt[i] for i in dtind[0:maxDet]] 268 | iscrowd = [int(o['iscrowd']) for o in gt] 269 | # load computed ious 270 | ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId] 271 | 272 | T = len(p.iouThrs) 273 | G = len(gt) 274 | D = len(dt) 275 | gtm = np.zeros((T,G)) 276 | dtm = np.zeros((T,D)) 277 | gtIg = np.array([g['_ignore'] for g in gt]) 278 | dtIg = np.zeros((T,D)) 279 | if not len(ious)==0: 280 | for tind, t in enumerate(p.iouThrs): 281 | for dind, d in enumerate(dt): 282 | # information about best match so far (m=-1 -> unmatched) 283 | iou = min([t,1-1e-10]) 284 | m = -1 285 | for gind, g in enumerate(gt): 286 | # if this gt already matched, and not a crowd, continue 287 | if gtm[tind,gind]>0 and not iscrowd[gind]: 288 | continue 289 | # if dt matched to reg gt, and on ignore gt, stop 290 | if m>-1 and gtIg[m]==0 and gtIg[gind]==1: 291 | break 292 | # continue to next gt unless better match made 293 | if ious[dind,gind] < iou: 294 | continue 295 | # if match successful and best so far, store appropriately 296 | iou=ious[dind,gind] 297 | m=gind 298 | # if match made store id of match for both dt and gt 299 | if m ==-1: 300 | continue 301 | dtIg[tind,dind] = gtIg[m] 302 | dtm[tind,dind] = gt[m]['id'] 303 | gtm[tind,m] = d['id'] 304 | # set unmatched detections outside of area range to ignore 305 | a = np.array([d['area']aRng[1] for d in dt]).reshape((1, len(dt))) 306 | dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0))) 307 | # store results for given image and category 308 | return { 309 | 'image_id': imgId, 310 | 'category_id': catId, 311 | 'aRng': aRng, 312 | 'maxDet': maxDet, 313 | 'dtIds': [d['id'] for d in dt], 314 | 'gtIds': [g['id'] for g in gt], 315 | 'dtMatches': dtm, 316 | 'gtMatches': gtm, 317 | 'dtScores': [d['score'] for d in dt], 318 | 'gtIgnore': gtIg, 319 | 'dtIgnore': dtIg, 320 | } 321 | 322 | def accumulate(self, p = None): 323 | ''' 324 | Accumulate per image evaluation results and store the result in self.eval 325 | :param p: input params for evaluation 326 | :return: None 327 | ''' 328 | print('Accumulating evaluation results...') 329 | tic = time.time() 330 | if not self.evalImgs: 331 | print('Please run evaluate() first') 332 | # allows input customized parameters 333 | if p is None: 334 | p = self.params 335 | p.catIds = p.catIds if p.useCats == 1 else [-1] 336 | T = len(p.iouThrs) 337 | R = len(p.recThrs) 338 | K = len(p.catIds) if p.useCats else 1 339 | A = len(p.areaRng) 340 | M = len(p.maxDets) 341 | precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories 342 | recall = -np.ones((T,K,A,M)) 343 | 344 | # create dictionary for future indexing 345 | _pe = self._paramsEval 346 | catIds = _pe.catIds if _pe.useCats else [-1] 347 | setK = set(catIds) 348 | setA = set(map(tuple, _pe.areaRng)) 349 | setM = set(_pe.maxDets) 350 | setI = set(_pe.imgIds) 351 | # get inds to evaluate 352 | k_list = [n for n, k in enumerate(p.catIds) if k in setK] 353 | m_list = [m for n, m in enumerate(p.maxDets) if m in setM] 354 | a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] 355 | i_list = [n for n, i in enumerate(p.imgIds) if i in setI] 356 | I0 = len(_pe.imgIds) 357 | A0 = len(_pe.areaRng) 358 | # retrieve E at each category, area range, and max number of detections 359 | for k, k0 in enumerate(k_list): 360 | Nk = k0*A0*I0 361 | for a, a0 in enumerate(a_list): 362 | Na = a0*I0 363 | for m, maxDet in enumerate(m_list): 364 | E = [self.evalImgs[Nk + Na + i] for i in i_list] 365 | E = [e for e in E if not e is None] 366 | if len(E) == 0: 367 | continue 368 | dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) 369 | 370 | # different sorting method generates slightly different results. 371 | # mergesort is used to be consistent as Matlab implementation. 372 | inds = np.argsort(-dtScores, kind='mergesort') 373 | 374 | dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] 375 | dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] 376 | gtIg = np.concatenate([e['gtIgnore'] for e in E]) 377 | npig = np.count_nonzero(gtIg==0 ) 378 | if npig == 0: 379 | continue 380 | tps = np.logical_and( dtm, np.logical_not(dtIg) ) 381 | fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) 382 | 383 | tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float) 384 | fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float) 385 | for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): 386 | tp = np.array(tp) 387 | fp = np.array(fp) 388 | nd = len(tp) 389 | rc = tp / npig 390 | pr = tp / (fp+tp+np.spacing(1)) 391 | q = np.zeros((R,)) 392 | 393 | if nd: 394 | recall[t,k,a,m] = rc[-1] 395 | else: 396 | recall[t,k,a,m] = 0 397 | 398 | # numpy is slow without cython optimization for accessing elements 399 | # use python array gets significant speed improvement 400 | pr = pr.tolist(); q = q.tolist() 401 | 402 | for i in range(nd-1, 0, -1): 403 | if pr[i] > pr[i-1]: 404 | pr[i-1] = pr[i] 405 | 406 | inds = np.searchsorted(rc, p.recThrs, side='left') 407 | try: 408 | for ri, pi in enumerate(inds): 409 | q[ri] = pr[pi] 410 | except: 411 | pass 412 | precision[t,:,k,a,m] = np.array(q) 413 | self.eval = { 414 | 'params': p, 415 | 'counts': [T, R, K, A, M], 416 | 'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 417 | 'precision': precision, 418 | 'recall': recall, 419 | } 420 | toc = time.time() 421 | print('DONE (t={:0.2f}s).'.format( toc-tic)) 422 | 423 | def _summarize(self, ap=1, iouThr=None, areaRng='all', maxDets=100 ): 424 | p = self.params 425 | iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}' 426 | titleStr = 'Average Precision' if ap == 1 else 'Average Recall' 427 | typeStr = '(AP)' if ap==1 else '(AR)' 428 | iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \ 429 | if iouThr is None else '{:0.2f}'.format(iouThr) 430 | 431 | aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] 432 | mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] 433 | if ap == 1: 434 | # dimension of precision: [TxRxKxAxM] 435 | s = self.eval['precision'] 436 | # IoU 437 | if iouThr is not None: 438 | t = np.where(iouThr == p.iouThrs)[0] 439 | s = s[t] 440 | s = s[:,:,:,aind,mind] 441 | else: 442 | # dimension of recall: [TxKxAxM] 443 | s = self.eval['recall'] 444 | if iouThr is not None: 445 | t = np.where(iouThr == p.iouThrs)[0] 446 | s = s[t] 447 | s = s[:,:,aind,mind] 448 | if len(s[s>-1])==0: 449 | mean_s = -1 450 | else: 451 | mean_s = np.mean(s[s>-1]) 452 | print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)) 453 | return mean_s 454 | 455 | def summarize(self): 456 | ''' 457 | Compute and display summary metrics for evaluation results. 458 | Note this functin can *only* be applied on the default parameter setting 459 | ''' 460 | def _summarizeDets(): 461 | stats = np.zeros((12,)) 462 | stats[0] = self._summarize(1) 463 | stats[1] = self._summarize(1, iouThr=.5, maxDets=self.params.maxDets[2]) 464 | stats[2] = self._summarize(1, iouThr=.75, maxDets=self.params.maxDets[2]) 465 | stats[3] = self._summarize(1, areaRng='small', maxDets=self.params.maxDets[2]) 466 | stats[4] = self._summarize(1, areaRng='medium', maxDets=self.params.maxDets[2]) 467 | stats[5] = self._summarize(1, areaRng='large', maxDets=self.params.maxDets[2]) 468 | stats[6] = self._summarize(0, maxDets=self.params.maxDets[0]) 469 | stats[7] = self._summarize(0, maxDets=self.params.maxDets[1]) 470 | stats[8] = self._summarize(0, maxDets=self.params.maxDets[2]) 471 | stats[9] = self._summarize(0, areaRng='small', maxDets=self.params.maxDets[2]) 472 | stats[10] = self._summarize(0, areaRng='medium', maxDets=self.params.maxDets[2]) 473 | stats[11] = self._summarize(0, areaRng='large', maxDets=self.params.maxDets[2]) 474 | return stats 475 | def _summarizeKps(): 476 | stats = np.zeros((10,)) 477 | stats[0] = self._summarize(1, maxDets=20) 478 | stats[1] = self._summarize(1, maxDets=20, iouThr=.5) 479 | stats[2] = self._summarize(1, maxDets=20, iouThr=.75) 480 | stats[3] = self._summarize(1, maxDets=20, areaRng='medium') 481 | stats[4] = self._summarize(1, maxDets=20, areaRng='large') 482 | stats[5] = self._summarize(0, maxDets=20) 483 | stats[6] = self._summarize(0, maxDets=20, iouThr=.5) 484 | stats[7] = self._summarize(0, maxDets=20, iouThr=.75) 485 | stats[8] = self._summarize(0, maxDets=20, areaRng='medium') 486 | stats[9] = self._summarize(0, maxDets=20, areaRng='large') 487 | return stats 488 | if not self.eval: 489 | raise Exception('Please run accumulate() first') 490 | iouType = self.params.iouType 491 | if iouType == 'segm' or iouType == 'bbox': 492 | summarize = _summarizeDets 493 | elif iouType == 'keypoints': 494 | summarize = _summarizeKps 495 | self.stats = summarize() 496 | 497 | def __str__(self): 498 | self.summarize() 499 | 500 | class Params: 501 | ''' 502 | Params for coco evaluation api 503 | ''' 504 | def setDetParams(self): 505 | self.imgIds = [] 506 | self.catIds = [100] # For the Category ID of Building 507 | # np.arange causes trouble. the data point on arange is slightly larger than the true value 508 | self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True) 509 | self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True) 510 | self.maxDets = [1, 10, 100] 511 | self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] 512 | self.areaRngLbl = ['all', 'small', 'medium', 'large'] 513 | self.useCats = 1 514 | 515 | def setKpParams(self): 516 | self.imgIds = [] 517 | self.catIds = [] 518 | # np.arange causes trouble. the data point on arange is slightly larger than the true value 519 | self.iouThrs = [0.5] 520 | self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True) 521 | self.maxDets = [20] # At max 20 objects detected per image 522 | self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] 523 | self.areaRngLbl = ['all'] #Consider all area ranges for evaluation 524 | self.useCats = 1 525 | 526 | def __init__(self, iouType='segm'): 527 | if iouType == 'segm' or iouType == 'bbox': 528 | self.setDetParams() 529 | elif iouType == 'keypoints': 530 | self.setKpParams() 531 | else: 532 | raise Exception('iouType not supported') 533 | self.iouType = iouType 534 | # useSegm is deprecated 535 | self.useSegm = None 536 | -------------------------------------------------------------------------------- /mrcnn/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Base Configurations class. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import math 11 | import numpy as np 12 | 13 | 14 | # Base Configuration Class 15 | # Don't use this class directly. Instead, sub-class it and override 16 | # the configurations you need to change. 17 | 18 | class Config(object): 19 | """Base configuration class. For custom configurations, create a 20 | sub-class that inherits from this one and override properties 21 | that need to be changed. 22 | """ 23 | # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. 24 | # Useful if your code needs to do things differently depending on which 25 | # experiment is running. 26 | NAME = None # Override in sub-classes 27 | 28 | # NUMBER OF GPUs to use. For CPU training, use 1 29 | GPU_COUNT = 1 30 | 31 | # Number of images to train with on each GPU. A 12GB GPU can typically 32 | # handle 2 images of 1024x1024px. 33 | # Adjust based on your GPU memory and image sizes. Use the highest 34 | # number that your GPU can handle for best performance. 35 | IMAGES_PER_GPU = 2 36 | 37 | # Number of training steps per epoch 38 | # This doesn't need to match the size of the training set. Tensorboard 39 | # updates are saved at the end of each epoch, so setting this to a 40 | # smaller number means getting more frequent TensorBoard updates. 41 | # Validation stats are also calculated at each epoch end and they 42 | # might take a while, so don't set this too small to avoid spending 43 | # a lot of time on validation stats. 44 | STEPS_PER_EPOCH = 1000 45 | 46 | # Number of validation steps to run at the end of every training epoch. 47 | # A bigger number improves accuracy of validation stats, but slows 48 | # down the training. 49 | VALIDATION_STEPS = 50 50 | 51 | # Backbone network architecture 52 | # Supported values are: resnet50, resnet101 53 | BACKBONE = "resnet101" 54 | 55 | # The strides of each layer of the FPN Pyramid. These values 56 | # are based on a Resnet101 backbone. 57 | BACKBONE_STRIDES = [4, 8, 16, 32, 64] 58 | 59 | # Number of classification classes (including background) 60 | NUM_CLASSES = 1 # Override in sub-classes 61 | 62 | # Length of square anchor side in pixels 63 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) 64 | 65 | # Ratios of anchors at each cell (width/height) 66 | # A value of 1 represents a square anchor, and 0.5 is a wide anchor 67 | RPN_ANCHOR_RATIOS = [0.5, 1, 2] 68 | 69 | # Anchor stride 70 | # If 1 then anchors are created for each cell in the backbone feature map. 71 | # If 2, then anchors are created for every other cell, and so on. 72 | RPN_ANCHOR_STRIDE = 1 73 | 74 | # Non-max suppression threshold to filter RPN proposals. 75 | # You can increase this during training to generate more propsals. 76 | RPN_NMS_THRESHOLD = 0.7 77 | 78 | # How many anchors per image to use for RPN training 79 | RPN_TRAIN_ANCHORS_PER_IMAGE = 256 80 | 81 | # ROIs kept after non-maximum supression (training and inference) 82 | POST_NMS_ROIS_TRAINING = 2000 83 | POST_NMS_ROIS_INFERENCE = 1000 84 | 85 | # If enabled, resizes instance masks to a smaller size to reduce 86 | # memory load. Recommended when using high-resolution images. 87 | USE_MINI_MASK = True 88 | MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask 89 | 90 | # Input image resizing 91 | # Generally, use the "square" resizing mode for training and inferencing 92 | # and it should work well in most cases. In this mode, images are scaled 93 | # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the 94 | # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is 95 | # padded with zeros to make it a square so multiple images can be put 96 | # in one batch. 97 | # Available resizing modes: 98 | # none: No resizing or padding. Return the image unchanged. 99 | # square: Resize and pad with zeros to get a square image 100 | # of size [max_dim, max_dim]. 101 | # pad64: Pads width and height with zeros to make them multiples of 64. 102 | # If IMAGE_MIN_DIM is not None, then scale the small side to 103 | # that size before padding. IMAGE_MAX_DIM is ignored in this mode. 104 | # The multiple of 64 is needed to ensure smooth scaling of feature 105 | # maps up and down the 6 levels of the FPN pyramid (2**6=64). 106 | IMAGE_RESIZE_MODE = "square" 107 | IMAGE_MIN_DIM = 800 108 | IMAGE_MAX_DIM = 1024 109 | 110 | # Image mean (RGB) 111 | MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) 112 | 113 | # Number of ROIs per image to feed to classifier/mask heads 114 | # The Mask RCNN paper uses 512 but often the RPN doesn't generate 115 | # enough positive proposals to fill this and keep a positive:negative 116 | # ratio of 1:3. You can increase the number of proposals by adjusting 117 | # the RPN NMS threshold. 118 | TRAIN_ROIS_PER_IMAGE = 200 119 | 120 | # Percent of positive ROIs used to train classifier/mask heads 121 | ROI_POSITIVE_RATIO = 0.33 122 | 123 | # Pooled ROIs 124 | POOL_SIZE = 7 125 | MASK_POOL_SIZE = 14 126 | 127 | # Shape of output mask 128 | # To change this you also need to change the neural network mask branch 129 | MASK_SHAPE = [28, 28] 130 | 131 | # Maximum number of ground truth instances to use in one image 132 | MAX_GT_INSTANCES = 100 133 | 134 | # Bounding box refinement standard deviation for RPN and final detections. 135 | RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 136 | BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 137 | 138 | # Max number of final detections 139 | DETECTION_MAX_INSTANCES = 100 140 | 141 | # Minimum probability value to accept a detected instance 142 | # ROIs below this threshold are skipped 143 | DETECTION_MIN_CONFIDENCE = 0.7 144 | 145 | # Non-maximum suppression threshold for detection 146 | DETECTION_NMS_THRESHOLD = 0.3 147 | 148 | # Learning rate and momentum 149 | # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes 150 | # weights to explode. Likely due to differences in optimzer 151 | # implementation. 152 | LEARNING_RATE = 0.001 153 | LEARNING_MOMENTUM = 0.9 154 | 155 | # Weight decay regularization 156 | WEIGHT_DECAY = 0.0001 157 | 158 | # Use RPN ROIs or externally generated ROIs for training 159 | # Keep this True for most situations. Set to False if you want to train 160 | # the head branches on ROI generated by code rather than the ROIs from 161 | # the RPN. For example, to debug the classifier head without having to 162 | # train the RPN. 163 | USE_RPN_ROIS = True 164 | 165 | # Train or freeze batch normalization layers 166 | # None: Train BN layers. This is the normal mode 167 | # False: Freeze BN layers. Good when using a small batch size 168 | # True: (don't use). Set layer in training mode even when inferencing 169 | TRAIN_BN = False # Defaulting to False since batch size is often small 170 | 171 | # Gradient norm clipping 172 | GRADIENT_CLIP_NORM = 5.0 173 | 174 | def __init__(self): 175 | """Set values of computed attributes.""" 176 | # Effective batch size 177 | self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT 178 | 179 | # Input image size 180 | self.IMAGE_SHAPE = np.array( 181 | [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3]) 182 | 183 | # Image meta data length 184 | # See compose_image_meta() for details 185 | self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES 186 | 187 | def display(self): 188 | """Display Configuration values.""" 189 | print("\nConfigurations:") 190 | for a in dir(self): 191 | if not a.startswith("__") and not callable(getattr(self, a)): 192 | print("{:30} {}".format(a, getattr(self, a))) 193 | print("\n") 194 | -------------------------------------------------------------------------------- /mrcnn/dataset.py: -------------------------------------------------------------------------------- 1 | from mrcnn import utils 2 | import numpy as np 3 | 4 | from pycocotools.coco import COCO 5 | from pycocotools.cocoeval import COCOeval 6 | from pycocotools import mask as maskUtils 7 | 8 | import os 9 | 10 | class MappingChallengeDataset(utils.Dataset): 11 | def load_dataset(self, dataset_dir, load_small=False, return_coco=True): 12 | """ Loads dataset released for the crowdAI Mapping Challenge(https://www.crowdai.org/challenges/mapping-challenge) 13 | Params: 14 | - dataset_dir : root directory of the dataset (can point to the train/val folder) 15 | - load_small : Boolean value which signals if the annotations for all the images need to be loaded into the memory, 16 | or if only a small subset of the same should be loaded into memory 17 | """ 18 | self.load_small = load_small 19 | if self.load_small: 20 | annotation_path = os.path.join(dataset_dir, "annotation-small.json") 21 | else: 22 | annotation_path = os.path.join(dataset_dir, "annotation.json") 23 | 24 | image_dir = os.path.join(dataset_dir, "images") 25 | print("Annotation Path ", annotation_path) 26 | print("Image Dir ", image_dir) 27 | assert os.path.exists(annotation_path) and os.path.exists(image_dir) 28 | 29 | self.coco = COCO(annotation_path) 30 | self.image_dir = image_dir 31 | 32 | # Load all classes (Only Building in this version) 33 | classIds = self.coco.getCatIds() 34 | 35 | # Load all images 36 | image_ids = list(self.coco.imgs.keys()) 37 | 38 | # register classes 39 | for _class_id in classIds: 40 | self.add_class("crowdai-mapping-challenge", _class_id, self.coco.loadCats(_class_id)[0]["name"]) 41 | 42 | # Register Images 43 | for _img_id in image_ids: 44 | assert(os.path.exists(os.path.join(image_dir, self.coco.imgs[_img_id]['file_name']))) 45 | self.add_image( 46 | "crowdai-mapping-challenge", image_id=_img_id, 47 | path=os.path.join(image_dir, self.coco.imgs[_img_id]['file_name']), 48 | width=self.coco.imgs[_img_id]["width"], 49 | height=self.coco.imgs[_img_id]["height"], 50 | annotations=self.coco.loadAnns(self.coco.getAnnIds( 51 | imgIds=[_img_id], 52 | catIds=classIds, 53 | iscrowd=None))) 54 | 55 | if return_coco: 56 | return self.coco 57 | 58 | def load_mask(self, image_id): 59 | """ Loads instance mask for a given image 60 | This function converts mask from the coco format to a 61 | a bitmap [height, width, instance] 62 | Params: 63 | - image_id : reference id for a given image 64 | 65 | Returns: 66 | masks : A bool array of shape [height, width, instances] with 67 | one mask per instance 68 | class_ids : a 1D array of classIds of the corresponding instance masks 69 | (In this version of the challenge it will be of shape [instances] and always be filled with the class-id of the "Building" class.) 70 | """ 71 | 72 | image_info = self.image_info[image_id] 73 | assert image_info["source"] == "crowdai-mapping-challenge" 74 | 75 | instance_masks = [] 76 | class_ids = [] 77 | annotations = self.image_info[image_id]["annotations"] 78 | # Build mask of shape [height, width, instance_count] and list 79 | # of class IDs that correspond to each channel of the mask. 80 | for annotation in annotations: 81 | class_id = self.map_source_class_id( 82 | "crowdai-mapping-challenge.{}".format(annotation['category_id'])) 83 | if class_id: 84 | m = self.annToMask(annotation, image_info["height"], 85 | image_info["width"]) 86 | # Some objects are so small that they're less than 1 pixel area 87 | # and end up rounded out. Skip those objects. 88 | if m.max() < 1: 89 | continue 90 | 91 | # Ignore the notion of "is_crowd" as specified in the coco format 92 | # as we donot have the said annotation in the current version of the dataset 93 | 94 | instance_masks.append(m) 95 | class_ids.append(class_id) 96 | # Pack instance masks into an array 97 | if class_ids: 98 | mask = np.stack(instance_masks, axis=2) 99 | class_ids = np.array(class_ids, dtype=np.int32) 100 | return mask, class_ids 101 | else: 102 | # Call super class to return an empty mask 103 | return super(MappingChallengeDataset, self).load_mask(image_id) 104 | 105 | 106 | def image_reference(self, image_id): 107 | """Return a reference for a particular image 108 | 109 | Ideally you this function is supposed to return a URL 110 | but in this case, we will simply return the image_id 111 | """ 112 | return "crowdai-mapping-challenge::{}".format(image_id) 113 | # The following two functions are from pycocotools with a few changes. 114 | 115 | def annToRLE(self, ann, height, width): 116 | """ 117 | Convert annotation which can be polygons, uncompressed RLE to RLE. 118 | :return: binary mask (numpy 2D array) 119 | """ 120 | segm = ann['segmentation'] 121 | if isinstance(segm, list): 122 | # polygon -- a single object might consist of multiple parts 123 | # we merge all parts into one mask rle code 124 | rles = maskUtils.frPyObjects(segm, height, width) 125 | rle = maskUtils.merge(rles) 126 | elif isinstance(segm['counts'], list): 127 | # uncompressed RLE 128 | rle = maskUtils.frPyObjects(segm, height, width) 129 | else: 130 | # rle 131 | rle = ann['segmentation'] 132 | return rle 133 | 134 | def annToMask(self, ann, height, width): 135 | """ 136 | Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. 137 | :return: binary mask (numpy 2D array) 138 | """ 139 | rle = self.annToRLE(ann, height, width) 140 | m = maskUtils.decode(rle) 141 | return m 142 | -------------------------------------------------------------------------------- /mrcnn/evaluate.py: -------------------------------------------------------------------------------- 1 | from pycocotools.coco import COCO 2 | from mrcnn.cocoeval import COCOeval 3 | from pycocotools import mask as maskUtils 4 | import time 5 | import numpy as np 6 | 7 | ############################################################ 8 | # COCO Evaluation 9 | ############################################################ 10 | 11 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks): 12 | """Arrange resutls to match COCO specs in http://cocodataset.org/#format 13 | """ 14 | # If no results, return an empty list 15 | if rois is None: 16 | return [] 17 | 18 | results = [] 19 | for image_id in image_ids: 20 | # Loop through detections 21 | for i in range(rois.shape[0]): 22 | class_id = class_ids[i] 23 | score = scores[i] 24 | bbox = np.around(rois[i], 1) 25 | mask = masks[:, :, i] 26 | 27 | result = { 28 | "image_id": image_id, 29 | "category_id": dataset.get_source_class_id(class_id, "crowdai-mapping-challenge"), 30 | "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]], 31 | "score": score, 32 | "segmentation": maskUtils.encode(np.asfortranarray(mask)).encode('utf-8') 33 | } 34 | results.append(result) 35 | return results 36 | 37 | 38 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None): 39 | """Runs official COCO evaluation. 40 | dataset: A Dataset object with valiadtion data 41 | eval_type: "bbox" or "segm" for bounding box or segmentation evaluation 42 | limit: if not 0, it's the number of images to use for evaluation 43 | """ 44 | # Pick COCO images from the dataset 45 | image_ids = image_ids or dataset.image_ids 46 | 47 | # Limit to a subset 48 | if limit: 49 | image_ids = image_ids[:limit] 50 | 51 | # Get corresponding COCO image IDs. 52 | coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids] 53 | 54 | t_prediction = 0 55 | t_start = time.time() 56 | 57 | results = [] 58 | 59 | for i, image_id in enumerate(image_ids): 60 | # Load image 61 | image = dataset.load_image(image_id) 62 | 63 | # Run detection 64 | t = time.time() 65 | print("="*100) 66 | print("Image shape ", image.shape) 67 | r = model.detect([image]) 68 | r = r[0] 69 | t_prediction += (time.time() - t) 70 | print("Prediction time : ", (time.time() - t)) 71 | # Convert results to COCO format 72 | image_results = build_coco_results(dataset, coco_image_ids[i:i + 1], 73 | r["rois"], r["class_ids"], 74 | r["scores"], r["masks"]) 75 | print("Number of detections : ", len(r["rois"])) 76 | print("Classes Predicted : ", r["class_ids"]) 77 | print("Scores : ", r["scores"]) 78 | results.extend(image_results) 79 | 80 | # Load results. This modifies results with additional attributes. 81 | coco_results = coco.loadRes(results) 82 | 83 | # Evaluate 84 | cocoEval = COCOeval(coco, coco_results, eval_type) 85 | cocoEval.params.imgIds = coco_image_ids 86 | cocoEval.evaluate() 87 | cocoEval.accumulate() 88 | ap = cocoEval._summarize(ap=1, iouThr=0.5, areaRng="all", maxDets=100) 89 | ar = cocoEval._summarize(ap=0, areaRng="all", maxDets=100) 90 | print("Precision : ", ap, " Recall : ", ar) 91 | 92 | print("Prediction time: {}. Average {}/image".format( 93 | t_prediction, t_prediction / len(image_ids))) 94 | print("Total time: ", time.time() - t_start) 95 | -------------------------------------------------------------------------------- /mrcnn/parallel_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Multi-GPU Support for Keras. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | Ideas and a small code snippets from these sources: 10 | https://github.com/fchollet/keras/issues/2436 11 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012 12 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/ 13 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py 14 | """ 15 | 16 | import tensorflow as tf 17 | import keras.backend as K 18 | import keras.layers as KL 19 | import keras.models as KM 20 | 21 | 22 | class ParallelModel(KM.Model): 23 | """Subclasses the standard Keras Model and adds multi-GPU support. 24 | It works by creating a copy of the model on each GPU. Then it slices 25 | the inputs and sends a slice to each copy of the model, and then 26 | merges the outputs together and applies the loss on the combined 27 | outputs. 28 | """ 29 | 30 | def __init__(self, keras_model, gpu_count): 31 | """Class constructor. 32 | keras_model: The Keras model to parallelize 33 | gpu_count: Number of GPUs. Must be > 1 34 | """ 35 | self.inner_model = keras_model 36 | self.gpu_count = gpu_count 37 | merged_outputs = self.make_parallel() 38 | super(ParallelModel, self).__init__(inputs=self.inner_model.inputs, 39 | outputs=merged_outputs) 40 | 41 | def __getattribute__(self, attrname): 42 | """Redirect loading and saving methods to the inner model. That's where 43 | the weights are stored.""" 44 | if 'load' in attrname or 'save' in attrname: 45 | return getattr(self.inner_model, attrname) 46 | return super(ParallelModel, self).__getattribute__(attrname) 47 | 48 | def summary(self, *args, **kwargs): 49 | """Override summary() to display summaries of both, the wrapper 50 | and inner models.""" 51 | super(ParallelModel, self).summary(*args, **kwargs) 52 | self.inner_model.summary(*args, **kwargs) 53 | 54 | def make_parallel(self): 55 | """Creates a new wrapper model that consists of multiple replicas of 56 | the original model placed on different GPUs. 57 | """ 58 | # Slice inputs. Slice inputs on the CPU to avoid sending a copy 59 | # of the full inputs to all GPUs. Saves on bandwidth and memory. 60 | input_slices = {name: tf.split(x, self.gpu_count) 61 | for name, x in zip(self.inner_model.input_names, 62 | self.inner_model.inputs)} 63 | 64 | output_names = self.inner_model.output_names 65 | outputs_all = [] 66 | for i in range(len(self.inner_model.outputs)): 67 | outputs_all.append([]) 68 | 69 | # Run the model call() on each GPU to place the ops there 70 | for i in range(self.gpu_count): 71 | with tf.device('/gpu:%d' % i): 72 | with tf.name_scope('tower_%d' % i): 73 | # Run a slice of inputs through this replica 74 | zipped_inputs = zip(self.inner_model.input_names, 75 | self.inner_model.inputs) 76 | inputs = [ 77 | KL.Lambda(lambda s: input_slices[name][i], 78 | output_shape=lambda s: (None,) + s[1:])(tensor) 79 | for name, tensor in zipped_inputs] 80 | # Create the model replica and get the outputs 81 | outputs = self.inner_model(inputs) 82 | if not isinstance(outputs, list): 83 | outputs = [outputs] 84 | # Save the outputs for merging back together later 85 | for l, o in enumerate(outputs): 86 | outputs_all[l].append(o) 87 | 88 | # Merge outputs on CPU 89 | with tf.device('/cpu:0'): 90 | merged = [] 91 | for outputs, name in zip(outputs_all, output_names): 92 | # If outputs are numbers without dimensions, add a batch dim. 93 | def add_dim(tensor): 94 | """Add a dimension to tensors that don't have any.""" 95 | if K.int_shape(tensor) == (): 96 | return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor) 97 | return tensor 98 | outputs = list(map(add_dim, outputs)) 99 | 100 | # Concatenate 101 | merged.append(KL.Concatenate(axis=0, name=name)(outputs)) 102 | return merged 103 | 104 | 105 | if __name__ == "__main__": 106 | # Testing code below. It creates a simple model to train on MNIST and 107 | # tries to run it on 2 GPUs. It saves the graph so it can be viewed 108 | # in TensorBoard. Run it as: 109 | # 110 | # python3 parallel_model.py 111 | 112 | import os 113 | import numpy as np 114 | import keras.optimizers 115 | from keras.datasets import mnist 116 | from keras.preprocessing.image import ImageDataGenerator 117 | 118 | GPU_COUNT = 2 119 | 120 | # Root directory of the project 121 | ROOT_DIR = os.path.abspath("../") 122 | 123 | # Directory to save logs and trained model 124 | MODEL_DIR = os.path.join(ROOT_DIR, "logs") 125 | 126 | def build_model(x_train, num_classes): 127 | # Reset default graph. Keras leaves old ops in the graph, 128 | # which are ignored for execution but clutter graph 129 | # visualization in TensorBoard. 130 | tf.reset_default_graph() 131 | 132 | inputs = KL.Input(shape=x_train.shape[1:], name="input_image") 133 | x = KL.Conv2D(32, (3, 3), activation='relu', padding="same", 134 | name="conv1")(inputs) 135 | x = KL.Conv2D(64, (3, 3), activation='relu', padding="same", 136 | name="conv2")(x) 137 | x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x) 138 | x = KL.Flatten(name="flat1")(x) 139 | x = KL.Dense(128, activation='relu', name="dense1")(x) 140 | x = KL.Dense(num_classes, activation='softmax', name="dense2")(x) 141 | 142 | return KM.Model(inputs, x, "digit_classifier_model") 143 | 144 | # Load MNIST Data 145 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 146 | x_train = np.expand_dims(x_train, -1).astype('float32') / 255 147 | x_test = np.expand_dims(x_test, -1).astype('float32') / 255 148 | 149 | print('x_train shape:', x_train.shape) 150 | print('x_test shape:', x_test.shape) 151 | 152 | # Build data generator and model 153 | datagen = ImageDataGenerator() 154 | model = build_model(x_train, 10) 155 | 156 | # Add multi-GPU support. 157 | model = ParallelModel(model, GPU_COUNT) 158 | 159 | optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0) 160 | 161 | model.compile(loss='sparse_categorical_crossentropy', 162 | optimizer=optimizer, metrics=['accuracy']) 163 | 164 | model.summary() 165 | 166 | # Train 167 | model.fit_generator( 168 | datagen.flow(x_train, y_train, batch_size=64), 169 | steps_per_epoch=50, epochs=10, verbose=1, 170 | validation_data=(x_test, y_test), 171 | callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR, 172 | write_graph=True)] 173 | ) 174 | -------------------------------------------------------------------------------- /mrcnn/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Common utility functions and classes. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import sys 11 | import os 12 | import math 13 | import random 14 | import numpy as np 15 | import tensorflow as tf 16 | import scipy 17 | import skimage.color 18 | import skimage.io 19 | import skimage.transform 20 | import urllib.request 21 | import shutil 22 | import warnings 23 | 24 | # URL from which to download the latest COCO trained weights 25 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5" 26 | 27 | 28 | ############################################################ 29 | # Bounding Boxes 30 | ############################################################ 31 | 32 | def extract_bboxes(mask): 33 | """Compute bounding boxes from masks. 34 | mask: [height, width, num_instances]. Mask pixels are either 1 or 0. 35 | 36 | Returns: bbox array [num_instances, (y1, x1, y2, x2)]. 37 | """ 38 | boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32) 39 | for i in range(mask.shape[-1]): 40 | m = mask[:, :, i] 41 | # Bounding box. 42 | horizontal_indicies = np.where(np.any(m, axis=0))[0] 43 | vertical_indicies = np.where(np.any(m, axis=1))[0] 44 | if horizontal_indicies.shape[0]: 45 | x1, x2 = horizontal_indicies[[0, -1]] 46 | y1, y2 = vertical_indicies[[0, -1]] 47 | # x2 and y2 should not be part of the box. Increment by 1. 48 | x2 += 1 49 | y2 += 1 50 | else: 51 | # No mask for this instance. Might happen due to 52 | # resizing or cropping. Set bbox to zeros 53 | x1, x2, y1, y2 = 0, 0, 0, 0 54 | boxes[i] = np.array([y1, x1, y2, x2]) 55 | return boxes.astype(np.int32) 56 | 57 | 58 | def compute_iou(box, boxes, box_area, boxes_area): 59 | """Calculates IoU of the given box with the array of the given boxes. 60 | box: 1D vector [y1, x1, y2, x2] 61 | boxes: [boxes_count, (y1, x1, y2, x2)] 62 | box_area: float. the area of 'box' 63 | boxes_area: array of length boxes_count. 64 | 65 | Note: the areas are passed in rather than calculated here for 66 | efficency. Calculate once in the caller to avoid duplicate work. 67 | """ 68 | # Calculate intersection areas 69 | y1 = np.maximum(box[0], boxes[:, 0]) 70 | y2 = np.minimum(box[2], boxes[:, 2]) 71 | x1 = np.maximum(box[1], boxes[:, 1]) 72 | x2 = np.minimum(box[3], boxes[:, 3]) 73 | intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) 74 | union = box_area + boxes_area[:] - intersection[:] 75 | iou = intersection / union 76 | return iou 77 | 78 | 79 | def compute_overlaps(boxes1, boxes2): 80 | """Computes IoU overlaps between two sets of boxes. 81 | boxes1, boxes2: [N, (y1, x1, y2, x2)]. 82 | 83 | For better performance, pass the largest set first and the smaller second. 84 | """ 85 | # Areas of anchors and GT boxes 86 | area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) 87 | area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) 88 | 89 | # Compute overlaps to generate matrix [boxes1 count, boxes2 count] 90 | # Each cell contains the IoU value. 91 | overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0])) 92 | for i in range(overlaps.shape[1]): 93 | box2 = boxes2[i] 94 | overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1) 95 | return overlaps 96 | 97 | 98 | def compute_overlaps_masks(masks1, masks2): 99 | '''Computes IoU overlaps between two sets of masks. 100 | masks1, masks2: [Height, Width, instances] 101 | ''' 102 | # flatten masks 103 | masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32) 104 | masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32) 105 | area1 = np.sum(masks1, axis=0) 106 | area2 = np.sum(masks2, axis=0) 107 | 108 | # intersections and union 109 | intersections = np.dot(masks1.T, masks2) 110 | union = area1[:, None] + area2[None, :] - intersections 111 | overlaps = intersections / union 112 | 113 | return overlaps 114 | 115 | 116 | def non_max_suppression(boxes, scores, threshold): 117 | """Performs non-maximum supression and returns indicies of kept boxes. 118 | boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. 119 | scores: 1-D array of box scores. 120 | threshold: Float. IoU threshold to use for filtering. 121 | """ 122 | assert boxes.shape[0] > 0 123 | if boxes.dtype.kind != "f": 124 | boxes = boxes.astype(np.float32) 125 | 126 | # Compute box areas 127 | y1 = boxes[:, 0] 128 | x1 = boxes[:, 1] 129 | y2 = boxes[:, 2] 130 | x2 = boxes[:, 3] 131 | area = (y2 - y1) * (x2 - x1) 132 | 133 | # Get indicies of boxes sorted by scores (highest first) 134 | ixs = scores.argsort()[::-1] 135 | 136 | pick = [] 137 | while len(ixs) > 0: 138 | # Pick top box and add its index to the list 139 | i = ixs[0] 140 | pick.append(i) 141 | # Compute IoU of the picked box with the rest 142 | iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) 143 | # Identify boxes with IoU over the threshold. This 144 | # returns indicies into ixs[1:], so add 1 to get 145 | # indicies into ixs. 146 | remove_ixs = np.where(iou > threshold)[0] + 1 147 | # Remove indicies of the picked and overlapped boxes. 148 | ixs = np.delete(ixs, remove_ixs) 149 | ixs = np.delete(ixs, 0) 150 | return np.array(pick, dtype=np.int32) 151 | 152 | 153 | def apply_box_deltas(boxes, deltas): 154 | """Applies the given deltas to the given boxes. 155 | boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box. 156 | deltas: [N, (dy, dx, log(dh), log(dw))] 157 | """ 158 | boxes = boxes.astype(np.float32) 159 | # Convert to y, x, h, w 160 | height = boxes[:, 2] - boxes[:, 0] 161 | width = boxes[:, 3] - boxes[:, 1] 162 | center_y = boxes[:, 0] + 0.5 * height 163 | center_x = boxes[:, 1] + 0.5 * width 164 | # Apply deltas 165 | center_y += deltas[:, 0] * height 166 | center_x += deltas[:, 1] * width 167 | height *= np.exp(deltas[:, 2]) 168 | width *= np.exp(deltas[:, 3]) 169 | # Convert back to y1, x1, y2, x2 170 | y1 = center_y - 0.5 * height 171 | x1 = center_x - 0.5 * width 172 | y2 = y1 + height 173 | x2 = x1 + width 174 | return np.stack([y1, x1, y2, x2], axis=1) 175 | 176 | 177 | def box_refinement_graph(box, gt_box): 178 | """Compute refinement needed to transform box to gt_box. 179 | box and gt_box are [N, (y1, x1, y2, x2)] 180 | """ 181 | box = tf.cast(box, tf.float32) 182 | gt_box = tf.cast(gt_box, tf.float32) 183 | 184 | height = box[:, 2] - box[:, 0] 185 | width = box[:, 3] - box[:, 1] 186 | center_y = box[:, 0] + 0.5 * height 187 | center_x = box[:, 1] + 0.5 * width 188 | 189 | gt_height = gt_box[:, 2] - gt_box[:, 0] 190 | gt_width = gt_box[:, 3] - gt_box[:, 1] 191 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 192 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 193 | 194 | dy = (gt_center_y - center_y) / height 195 | dx = (gt_center_x - center_x) / width 196 | dh = tf.log(gt_height / height) 197 | dw = tf.log(gt_width / width) 198 | 199 | result = tf.stack([dy, dx, dh, dw], axis=1) 200 | return result 201 | 202 | 203 | def box_refinement(box, gt_box): 204 | """Compute refinement needed to transform box to gt_box. 205 | box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is 206 | assumed to be outside the box. 207 | """ 208 | box = box.astype(np.float32) 209 | gt_box = gt_box.astype(np.float32) 210 | 211 | height = box[:, 2] - box[:, 0] 212 | width = box[:, 3] - box[:, 1] 213 | center_y = box[:, 0] + 0.5 * height 214 | center_x = box[:, 1] + 0.5 * width 215 | 216 | gt_height = gt_box[:, 2] - gt_box[:, 0] 217 | gt_width = gt_box[:, 3] - gt_box[:, 1] 218 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 219 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 220 | 221 | dy = (gt_center_y - center_y) / height 222 | dx = (gt_center_x - center_x) / width 223 | dh = np.log(gt_height / height) 224 | dw = np.log(gt_width / width) 225 | 226 | return np.stack([dy, dx, dh, dw], axis=1) 227 | 228 | 229 | ############################################################ 230 | # Dataset 231 | ############################################################ 232 | 233 | class Dataset(object): 234 | """The base class for dataset classes. 235 | To use it, create a new class that adds functions specific to the dataset 236 | you want to use. For example: 237 | 238 | class CatsAndDogsDataset(Dataset): 239 | def load_cats_and_dogs(self): 240 | ... 241 | def load_mask(self, image_id): 242 | ... 243 | def image_reference(self, image_id): 244 | ... 245 | 246 | See COCODataset and ShapesDataset as examples. 247 | """ 248 | 249 | def __init__(self, class_map=None): 250 | self._image_ids = [] 251 | self.image_info = [] 252 | # Background is always the first class 253 | self.class_info = [{"source": "", "id": 0, "name": "BG"}] 254 | self.source_class_ids = {} 255 | 256 | def add_class(self, source, class_id, class_name): 257 | assert "." not in source, "Source name cannot contain a dot" 258 | # Does the class exist already? 259 | for info in self.class_info: 260 | if info['source'] == source and info["id"] == class_id: 261 | # source.class_id combination already available, skip 262 | return 263 | # Add the class 264 | self.class_info.append({ 265 | "source": source, 266 | "id": class_id, 267 | "name": class_name, 268 | }) 269 | 270 | def add_image(self, source, image_id, path, **kwargs): 271 | image_info = { 272 | "id": image_id, 273 | "source": source, 274 | "path": path, 275 | } 276 | image_info.update(kwargs) 277 | self.image_info.append(image_info) 278 | 279 | def image_reference(self, image_id): 280 | """Return a link to the image in its source Website or details about 281 | the image that help looking it up or debugging it. 282 | 283 | Override for your dataset, but pass to this function 284 | if you encounter images not in your dataset. 285 | """ 286 | return "" 287 | 288 | def prepare(self, class_map=None): 289 | """Prepares the Dataset class for use. 290 | 291 | TODO: class map is not supported yet. When done, it should handle mapping 292 | classes from different datasets to the same class ID. 293 | """ 294 | 295 | def clean_name(name): 296 | """Returns a shorter version of object names for cleaner display.""" 297 | return ",".join(name.split(",")[:1]) 298 | 299 | # Build (or rebuild) everything else from the info dicts. 300 | self.num_classes = len(self.class_info) 301 | self.class_ids = np.arange(self.num_classes) 302 | self.class_names = [clean_name(c["name"]) for c in self.class_info] 303 | self.num_images = len(self.image_info) 304 | self._image_ids = np.arange(self.num_images) 305 | 306 | self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id 307 | for info, id in zip(self.class_info, self.class_ids)} 308 | 309 | # Map sources to class_ids they support 310 | self.sources = list(set([i['source'] for i in self.class_info])) 311 | self.source_class_ids = {} 312 | # Loop over datasets 313 | for source in self.sources: 314 | self.source_class_ids[source] = [] 315 | # Find classes that belong to this dataset 316 | for i, info in enumerate(self.class_info): 317 | # Include BG class in all datasets 318 | if i == 0 or source == info['source']: 319 | self.source_class_ids[source].append(i) 320 | 321 | def map_source_class_id(self, source_class_id): 322 | """Takes a source class ID and returns the int class ID assigned to it. 323 | 324 | For example: 325 | dataset.map_source_class_id("coco.12") -> 23 326 | """ 327 | return self.class_from_source_map[source_class_id] 328 | 329 | def get_source_class_id(self, class_id, source): 330 | """Map an internal class ID to the corresponding class ID in the source dataset.""" 331 | info = self.class_info[class_id] 332 | assert info['source'] == source 333 | return info['id'] 334 | 335 | def append_data(self, class_info, image_info): 336 | self.external_to_class_id = {} 337 | for i, c in enumerate(self.class_info): 338 | for ds, id in c["map"]: 339 | self.external_to_class_id[ds + str(id)] = i 340 | 341 | # Map external image IDs to internal ones. 342 | self.external_to_image_id = {} 343 | for i, info in enumerate(self.image_info): 344 | self.external_to_image_id[info["ds"] + str(info["id"])] = i 345 | 346 | @property 347 | def image_ids(self): 348 | return self._image_ids 349 | 350 | def source_image_link(self, image_id): 351 | """Returns the path or URL to the image. 352 | Override this to return a URL to the image if it's availble online for easy 353 | debugging. 354 | """ 355 | return self.image_info[image_id]["path"] 356 | 357 | def load_image(self, image_id): 358 | """Load the specified image and return a [H,W,3] Numpy array. 359 | """ 360 | # Load image 361 | image = skimage.io.imread(self.image_info[image_id]['path']) 362 | # If grayscale. Convert to RGB for consistency. 363 | if image.ndim != 3: 364 | image = skimage.color.gray2rgb(image) 365 | # If has an alpha channel, remove it for consistency 366 | if image.shape[-1] == 4: 367 | image = image[..., :3] 368 | return image 369 | 370 | def load_mask(self, image_id): 371 | """Load instance masks for the given image. 372 | 373 | Different datasets use different ways to store masks. Override this 374 | method to load instance masks and return them in the form of am 375 | array of binary masks of shape [height, width, instances]. 376 | 377 | Returns: 378 | masks: A bool array of shape [height, width, instance count] with 379 | a binary mask per instance. 380 | class_ids: a 1D array of class IDs of the instance masks. 381 | """ 382 | # Override this function to load a mask from your dataset. 383 | # Otherwise, it returns an empty mask. 384 | mask = np.empty([0, 0, 0]) 385 | class_ids = np.empty([0], np.int32) 386 | return mask, class_ids 387 | 388 | 389 | def resize_image(image, min_dim=None, max_dim=None, mode="square"): 390 | """Resizes an image keeping the aspect ratio unchanged. 391 | 392 | min_dim: if provided, resizes the image such that it's smaller 393 | dimension == min_dim 394 | max_dim: if provided, ensures that the image longest side doesn't 395 | exceed this value. 396 | mode: Resizing mode. 397 | none: No resizing. Return the image unchanged. 398 | square: Resize and pad with zeros to get a square image 399 | of size [max_dim, max_dim]. 400 | pad64: Pads width and height with zeros to make them multiples of 64. 401 | If min_dim is provided, it scales the small side to >= min_dim 402 | before padding. max_dim is ignored in this mode. 403 | The multiple of 64 is needed to ensure smooth scaling of feature 404 | maps up and down the 6 levels of the FPN pyramid (2**6=64). 405 | 406 | Returns: 407 | image: the resized image 408 | window: (y1, x1, y2, x2). If max_dim is provided, padding might 409 | be inserted in the returned image. If so, this window is the 410 | coordinates of the image part of the full image (excluding 411 | the padding). The x2, y2 pixels are not included. 412 | scale: The scale factor used to resize the image 413 | padding: Padding added to the image [(top, bottom), (left, right), (0, 0)] 414 | """ 415 | # Keep track of image dtype and return results in the same dtype 416 | image_dtype = image.dtype 417 | # Default window (y1, x1, y2, x2) and default scale == 1. 418 | h, w = image.shape[:2] 419 | window = (0, 0, h, w) 420 | scale = 1 421 | padding = [(0, 0), (0, 0), (0, 0)] 422 | 423 | if mode == "none": 424 | return image, window, scale, padding 425 | 426 | # Scale? 427 | if min_dim: 428 | # Scale up but not down 429 | scale = max(1, min_dim / min(h, w)) 430 | # Does it exceed max dim? 431 | if max_dim and mode == "square": 432 | image_max = max(h, w) 433 | if round(image_max * scale) > max_dim: 434 | scale = max_dim / image_max 435 | 436 | # Resize image using bilinear interpolation 437 | if scale != 1: 438 | image = skimage.transform.resize( 439 | image, (round(h * scale), round(w * scale)), 440 | order=1, mode="constant", preserve_range=True) 441 | # Need padding? 442 | if mode == "square": 443 | # Get new height and width 444 | h, w = image.shape[:2] 445 | top_pad = (max_dim - h) // 2 446 | bottom_pad = max_dim - h - top_pad 447 | left_pad = (max_dim - w) // 2 448 | right_pad = max_dim - w - left_pad 449 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 450 | image = np.pad(image, padding, mode='constant', constant_values=0) 451 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 452 | elif mode == "pad64": 453 | h, w = image.shape[:2] 454 | # Both sides must be divisible by 64 455 | assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64" 456 | # Height 457 | if h % 64 > 0: 458 | max_h = h - (h % 64) + 64 459 | top_pad = (max_h - h) // 2 460 | bottom_pad = max_h - h - top_pad 461 | else: 462 | top_pad = bottom_pad = 0 463 | # Width 464 | if w % 64 > 0: 465 | max_w = w - (w % 64) + 64 466 | left_pad = (max_w - w) // 2 467 | right_pad = max_w - w - left_pad 468 | else: 469 | left_pad = right_pad = 0 470 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 471 | image = np.pad(image, padding, mode='constant', constant_values=0) 472 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 473 | else: 474 | raise Exception("Mode {} not supported".format(mode)) 475 | return image.astype(image_dtype), window, scale, padding 476 | 477 | 478 | def resize_mask(mask, scale, padding): 479 | """Resizes a mask using the given scale and padding. 480 | Typically, you get the scale and padding from resize_image() to 481 | ensure both, the image and the mask, are resized consistently. 482 | 483 | scale: mask scaling factor 484 | padding: Padding to add to the mask in the form 485 | [(top, bottom), (left, right), (0, 0)] 486 | """ 487 | # Suppress warning from scipy 0.13.0, the output shape of zoom() is 488 | # calculated with round() instead of int() 489 | with warnings.catch_warnings(): 490 | warnings.simplefilter("ignore") 491 | mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0) 492 | mask = np.pad(mask, padding, mode='constant', constant_values=0) 493 | return mask 494 | 495 | 496 | def minimize_mask(bbox, mask, mini_shape): 497 | """Resize masks to a smaller version to reduce memory load. 498 | Mini-masks can be resized back to image scale using expand_masks() 499 | 500 | See inspect_data.ipynb notebook for more details. 501 | """ 502 | mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool) 503 | for i in range(mask.shape[-1]): 504 | # Pick slice and cast to bool in case load_mask() returned wrong dtype 505 | m = mask[:, :, i].astype(bool) 506 | y1, x1, y2, x2 = bbox[i][:4] 507 | m = m[y1:y2, x1:x2] 508 | if m.size == 0: 509 | raise Exception("Invalid bounding box with area of zero") 510 | # Resize with bilinear interpolation 511 | m = skimage.transform.resize(m, mini_shape, order=1, mode="constant") 512 | mini_mask[:, :, i] = np.around(m).astype(np.bool) 513 | return mini_mask 514 | 515 | 516 | def expand_mask(bbox, mini_mask, image_shape): 517 | """Resizes mini masks back to image size. Reverses the change 518 | of minimize_mask(). 519 | 520 | See inspect_data.ipynb notebook for more details. 521 | """ 522 | mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool) 523 | for i in range(mask.shape[-1]): 524 | m = mini_mask[:, :, i] 525 | y1, x1, y2, x2 = bbox[i][:4] 526 | h = y2 - y1 527 | w = x2 - x1 528 | # Resize with bilinear interpolation 529 | m = skimage.transform.resize(m, (h, w), order=1, mode="constant") 530 | mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool) 531 | return mask 532 | 533 | 534 | # TODO: Build and use this function to reduce code duplication 535 | def mold_mask(mask, config): 536 | pass 537 | 538 | 539 | def unmold_mask(mask, bbox, image_shape): 540 | """Converts a mask generated by the neural network to a format similar 541 | to its original shape. 542 | mask: [height, width] of type float. A small, typically 28x28 mask. 543 | bbox: [y1, x1, y2, x2]. The box to fit the mask in. 544 | 545 | Returns a binary mask with the same size as the original image. 546 | """ 547 | threshold = 0.5 548 | y1, x1, y2, x2 = bbox 549 | mask = skimage.transform.resize(mask, (y2 - y1, x2 - x1), order=1, mode="constant") 550 | mask = np.where(mask >= threshold, 1, 0).astype(np.bool) 551 | 552 | # Put the mask in the right location. 553 | full_mask = np.zeros(image_shape[:2], dtype=np.bool) 554 | full_mask[y1:y2, x1:x2] = mask 555 | return full_mask 556 | 557 | 558 | ############################################################ 559 | # Anchors 560 | ############################################################ 561 | 562 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): 563 | """ 564 | scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] 565 | ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] 566 | shape: [height, width] spatial shape of the feature map over which 567 | to generate anchors. 568 | feature_stride: Stride of the feature map relative to the image in pixels. 569 | anchor_stride: Stride of anchors on the feature map. For example, if the 570 | value is 2 then generate anchors for every other feature map pixel. 571 | """ 572 | # Get all combinations of scales and ratios 573 | scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) 574 | scales = scales.flatten() 575 | ratios = ratios.flatten() 576 | 577 | # Enumerate heights and widths from scales and ratios 578 | heights = scales / np.sqrt(ratios) 579 | widths = scales * np.sqrt(ratios) 580 | 581 | # Enumerate shifts in feature space 582 | shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride 583 | shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride 584 | shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) 585 | 586 | # Enumerate combinations of shifts, widths, and heights 587 | box_widths, box_centers_x = np.meshgrid(widths, shifts_x) 588 | box_heights, box_centers_y = np.meshgrid(heights, shifts_y) 589 | 590 | # Reshape to get a list of (y, x) and a list of (h, w) 591 | box_centers = np.stack( 592 | [box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) 593 | box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) 594 | 595 | # Convert to corner coordinates (y1, x1, y2, x2) 596 | boxes = np.concatenate([box_centers - 0.5 * box_sizes, 597 | box_centers + 0.5 * box_sizes], axis=1) 598 | return boxes 599 | 600 | 601 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, 602 | anchor_stride): 603 | """Generate anchors at different levels of a feature pyramid. Each scale 604 | is associated with a level of the pyramid, but each ratio is used in 605 | all levels of the pyramid. 606 | 607 | Returns: 608 | anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted 609 | with the same order of the given scales. So, anchors of scale[0] come 610 | first, then anchors of scale[1], and so on. 611 | """ 612 | # Anchors 613 | # [anchor_count, (y1, x1, y2, x2)] 614 | anchors = [] 615 | for i in range(len(scales)): 616 | anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], 617 | feature_strides[i], anchor_stride)) 618 | return np.concatenate(anchors, axis=0) 619 | 620 | 621 | ############################################################ 622 | # Miscellaneous 623 | ############################################################ 624 | 625 | def trim_zeros(x): 626 | """It's common to have tensors larger than the available data and 627 | pad with zeros. This function removes rows that are all zeros. 628 | 629 | x: [rows, columns]. 630 | """ 631 | assert len(x.shape) == 2 632 | return x[~np.all(x == 0, axis=1)] 633 | 634 | 635 | def compute_matches(gt_boxes, gt_class_ids, gt_masks, 636 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 637 | iou_threshold=0.5, score_threshold=0.0): 638 | """Finds matches between prediction and ground truth instances. 639 | 640 | Returns: 641 | gt_match: 1-D array. For each GT box it has the index of the matched 642 | predicted box. 643 | pred_match: 1-D array. For each predicted box, it has the index of 644 | the matched ground truth box. 645 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 646 | """ 647 | # Trim zero padding 648 | # TODO: cleaner to do zero unpadding upstream 649 | gt_boxes = trim_zeros(gt_boxes) 650 | gt_masks = gt_masks[..., :gt_boxes.shape[0]] 651 | pred_boxes = trim_zeros(pred_boxes) 652 | pred_scores = pred_scores[:pred_boxes.shape[0]] 653 | # Sort predictions by score from high to low 654 | indices = np.argsort(pred_scores)[::-1] 655 | pred_boxes = pred_boxes[indices] 656 | pred_class_ids = pred_class_ids[indices] 657 | pred_scores = pred_scores[indices] 658 | pred_masks = pred_masks[..., indices] 659 | 660 | # Compute IoU overlaps [pred_masks, gt_masks] 661 | overlaps = compute_overlaps_masks(pred_masks, gt_masks) 662 | 663 | # Loop through predictions and find matching ground truth boxes 664 | match_count = 0 665 | pred_match = -1 * np.ones([pred_boxes.shape[0]]) 666 | gt_match = -1 * np.ones([gt_boxes.shape[0]]) 667 | for i in range(len(pred_boxes)): 668 | # Find best matching ground truth box 669 | # 1. Sort matches by score 670 | sorted_ixs = np.argsort(overlaps[i])[::-1] 671 | # 2. Remove low scores 672 | low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0] 673 | if low_score_idx.size > 0: 674 | sorted_ixs = sorted_ixs[:low_score_idx[0]] 675 | # 3. Find the match 676 | for j in sorted_ixs: 677 | # If ground truth box is already matched, go to next one 678 | if gt_match[j] > 0: 679 | continue 680 | # If we reach IoU smaller than the threshold, end the loop 681 | iou = overlaps[i, j] 682 | if iou < iou_threshold: 683 | break 684 | # Do we have a match? 685 | if pred_class_ids[i] == gt_class_ids[j]: 686 | match_count += 1 687 | gt_match[j] = i 688 | pred_match[i] = j 689 | break 690 | 691 | return gt_match, pred_match, overlaps 692 | 693 | 694 | def compute_ap(gt_boxes, gt_class_ids, gt_masks, 695 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 696 | iou_threshold=0.5): 697 | """Compute Average Precision at a set IoU threshold (default 0.5). 698 | 699 | Returns: 700 | mAP: Mean Average Precision 701 | precisions: List of precisions at different class score thresholds. 702 | recalls: List of recall values at different class score thresholds. 703 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 704 | """ 705 | # Get matches and overlaps 706 | gt_match, pred_match, overlaps = compute_matches( 707 | gt_boxes, gt_class_ids, gt_masks, 708 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 709 | iou_threshold) 710 | 711 | # Compute precision and recall at each prediction box step 712 | precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1) 713 | recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match) 714 | 715 | # Pad with start and end values to simplify the math 716 | precisions = np.concatenate([[0], precisions, [0]]) 717 | recalls = np.concatenate([[0], recalls, [1]]) 718 | 719 | # Ensure precision values decrease but don't increase. This way, the 720 | # precision value at each recall threshold is the maximum it can be 721 | # for all following recall thresholds, as specified by the VOC paper. 722 | for i in range(len(precisions) - 2, -1, -1): 723 | precisions[i] = np.maximum(precisions[i], precisions[i + 1]) 724 | 725 | # Compute mean AP over recall range 726 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 727 | mAP = np.sum((recalls[indices] - recalls[indices - 1]) * 728 | precisions[indices]) 729 | 730 | return mAP, precisions, recalls, overlaps 731 | 732 | 733 | def compute_recall(pred_boxes, gt_boxes, iou): 734 | """Compute the recall at the given IoU threshold. It's an indication 735 | of how many GT boxes were found by the given prediction boxes. 736 | 737 | pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates 738 | gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates 739 | """ 740 | # Measure overlaps 741 | overlaps = compute_overlaps(pred_boxes, gt_boxes) 742 | iou_max = np.max(overlaps, axis=1) 743 | iou_argmax = np.argmax(overlaps, axis=1) 744 | positive_ids = np.where(iou_max >= iou)[0] 745 | matched_gt_boxes = iou_argmax[positive_ids] 746 | 747 | recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] 748 | return recall, positive_ids 749 | 750 | 751 | # ## Batch Slicing 752 | # Some custom layers support a batch size of 1 only, and require a lot of work 753 | # to support batches greater than 1. This function slices an input tensor 754 | # across the batch dimension and feeds batches of size 1. Effectively, 755 | # an easy way to support batches > 1 quickly with little code modification. 756 | # In the long run, it's more efficient to modify the code to support large 757 | # batches and getting rid of this function. Consider this a temporary solution 758 | def batch_slice(inputs, graph_fn, batch_size, names=None): 759 | """Splits inputs into slices and feeds each slice to a copy of the given 760 | computation graph and then combines the results. It allows you to run a 761 | graph on a batch of inputs even if the graph is written to support one 762 | instance only. 763 | 764 | inputs: list of tensors. All must have the same first dimension length 765 | graph_fn: A function that returns a TF tensor that's part of a graph. 766 | batch_size: number of slices to divide the data into. 767 | names: If provided, assigns names to the resulting tensors. 768 | """ 769 | if not isinstance(inputs, list): 770 | inputs = [inputs] 771 | 772 | outputs = [] 773 | for i in range(batch_size): 774 | inputs_slice = [x[i] for x in inputs] 775 | output_slice = graph_fn(*inputs_slice) 776 | if not isinstance(output_slice, (tuple, list)): 777 | output_slice = [output_slice] 778 | outputs.append(output_slice) 779 | # Change outputs from a list of slices where each is 780 | # a list of outputs to a list of outputs and each has 781 | # a list of slices 782 | outputs = list(zip(*outputs)) 783 | 784 | if names is None: 785 | names = [None] * len(outputs) 786 | 787 | result = [tf.stack(o, axis=0, name=n) 788 | for o, n in zip(outputs, names)] 789 | if len(result) == 1: 790 | result = result[0] 791 | 792 | return result 793 | 794 | 795 | def download_trained_weights(coco_model_path, verbose=1): 796 | """Download COCO trained weights from Releases. 797 | 798 | coco_model_path: local path of COCO trained weights 799 | """ 800 | if verbose > 0: 801 | print("Downloading pretrained model to " + coco_model_path + " ...") 802 | with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out: 803 | shutil.copyfileobj(resp, out) 804 | if verbose > 0: 805 | print("... done downloading pretrained model!") 806 | 807 | 808 | def norm_boxes(boxes, shape): 809 | """Converts boxes from pixel coordinates to normalized coordinates. 810 | boxes: [N, (y1, x1, y2, x2)] in pixel coordinates 811 | shape: [..., (height, width)] in pixels 812 | 813 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized 814 | coordinates it's inside the box. 815 | 816 | Returns: 817 | [N, (y1, x1, y2, x2)] in normalized coordinates 818 | """ 819 | h, w = shape 820 | scale = np.array([h - 1, w - 1, h - 1, w - 1]) 821 | shift = np.array([0, 0, 1, 1]) 822 | return np.divide((boxes - shift), scale).astype(np.float32) 823 | 824 | 825 | def denorm_boxes(boxes, shape): 826 | """Converts boxes from normalized coordinates to pixel coordinates. 827 | boxes: [N, (y1, x1, y2, x2)] in normalized coordinates 828 | shape: [..., (height, width)] in pixels 829 | 830 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized 831 | coordinates it's inside the box. 832 | 833 | Returns: 834 | [N, (y1, x1, y2, x2)] in pixel coordinates 835 | """ 836 | h, w = shape 837 | scale = np.array([h - 1, w - 1, h - 1, w - 1]) 838 | shift = np.array([0, 0, 1, 1]) 839 | return np.around(np.multiply(boxes, scale) + shift).astype(np.int32) 840 | -------------------------------------------------------------------------------- /mrcnn/visualize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Display and Visualization Functions. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import os 11 | import sys 12 | import logging 13 | import random 14 | import itertools 15 | import colorsys 16 | 17 | import numpy as np 18 | from skimage.measure import find_contours 19 | import matplotlib.pyplot as plt 20 | from matplotlib import patches, lines 21 | from matplotlib.patches import Polygon 22 | import IPython.display 23 | 24 | # Root directory of the project 25 | ROOT_DIR = os.path.abspath("../") 26 | 27 | # Import Mask RCNN 28 | sys.path.append(ROOT_DIR) # To find local version of the library 29 | from mrcnn import utils 30 | 31 | 32 | ############################################################ 33 | # Visualization 34 | ############################################################ 35 | 36 | def display_images(images, titles=None, cols=4, cmap=None, norm=None, 37 | interpolation=None): 38 | """Display the given set of images, optionally with titles. 39 | images: list or array of image tensors in HWC format. 40 | titles: optional. A list of titles to display with each image. 41 | cols: number of images per row 42 | cmap: Optional. Color map to use. For example, "Blues". 43 | norm: Optional. A Normalize instance to map values to colors. 44 | interpolation: Optional. Image interporlation to use for display. 45 | """ 46 | titles = titles if titles is not None else [""] * len(images) 47 | rows = len(images) // cols + 1 48 | plt.figure(figsize=(14, 14 * rows // cols)) 49 | i = 1 50 | for image, title in zip(images, titles): 51 | plt.subplot(rows, cols, i) 52 | plt.title(title, fontsize=9) 53 | plt.axis('off') 54 | plt.imshow(image.astype(np.uint8), cmap=cmap, 55 | norm=norm, interpolation=interpolation) 56 | i += 1 57 | plt.show() 58 | 59 | 60 | def random_colors(N, bright=True): 61 | """ 62 | Generate random colors. 63 | To get visually distinct colors, generate them in HSV space then 64 | convert to RGB. 65 | """ 66 | brightness = 1.0 if bright else 0.7 67 | hsv = [(i / N, 1, brightness) for i in range(N)] 68 | colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv)) 69 | random.shuffle(colors) 70 | return colors 71 | 72 | 73 | def apply_mask(image, mask, color, alpha=0.5): 74 | """Apply the given mask to the image. 75 | """ 76 | for c in range(3): 77 | image[:, :, c] = np.where(mask == 1, 78 | image[:, :, c] * 79 | (1 - alpha) + alpha * color[c] * 255, 80 | image[:, :, c]) 81 | return image 82 | 83 | 84 | def display_instances(image, boxes, masks, class_ids, class_names, 85 | scores=None, title="", 86 | figsize=(16, 16), ax=None): 87 | """ 88 | boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates. 89 | masks: [height, width, num_instances] 90 | class_ids: [num_instances] 91 | class_names: list of class names of the dataset 92 | scores: (optional) confidence scores for each box 93 | figsize: (optional) the size of the image. 94 | """ 95 | # Number of instances 96 | N = boxes.shape[0] 97 | if not N: 98 | print("\n*** No instances to display *** \n") 99 | else: 100 | assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0] 101 | 102 | if not ax: 103 | _, ax = plt.subplots(1, figsize=figsize) 104 | 105 | # Generate random colors 106 | colors = random_colors(N) 107 | 108 | # Show area outside image boundaries. 109 | height, width = image.shape[:2] 110 | ax.set_ylim(height + 10, -10) 111 | ax.set_xlim(-10, width + 10) 112 | ax.axis('off') 113 | ax.set_title(title) 114 | 115 | masked_image = image.astype(np.uint32).copy() 116 | for i in range(N): 117 | color = colors[i] 118 | 119 | # Bounding box 120 | if not np.any(boxes[i]): 121 | # Skip this instance. Has no bbox. Likely lost in image cropping. 122 | continue 123 | y1, x1, y2, x2 = boxes[i] 124 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 125 | alpha=0.7, linestyle="dashed", 126 | edgecolor=color, facecolor='none') 127 | ax.add_patch(p) 128 | 129 | # Label 130 | class_id = class_ids[i] 131 | score = scores[i] if scores is not None else None 132 | label = class_names[class_id] 133 | x = random.randint(x1, (x1 + x2) // 2) 134 | caption = "{} {:.3f}".format(label, score) if score else label 135 | ax.text(x1, y1 + 8, caption, 136 | color='w', size=11, backgroundcolor="none") 137 | 138 | # Mask 139 | mask = masks[:, :, i] 140 | masked_image = apply_mask(masked_image, mask, color) 141 | 142 | # Mask Polygon 143 | # Pad to ensure proper polygons for masks that touch image edges. 144 | padded_mask = np.zeros( 145 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 146 | padded_mask[1:-1, 1:-1] = mask 147 | contours = find_contours(padded_mask, 0.5) 148 | for verts in contours: 149 | # Subtract the padding and flip (y, x) to (x, y) 150 | verts = np.fliplr(verts) - 1 151 | p = Polygon(verts, facecolor="none", edgecolor=color) 152 | ax.add_patch(p) 153 | ax.imshow(masked_image.astype(np.uint8)) 154 | plt.show() 155 | 156 | 157 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10): 158 | """ 159 | anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates. 160 | proposals: [n, 4] the same anchors but refined to fit objects better. 161 | """ 162 | masked_image = image.copy() 163 | 164 | # Pick random anchors in case there are too many. 165 | ids = np.arange(rois.shape[0], dtype=np.int32) 166 | ids = np.random.choice( 167 | ids, limit, replace=False) if ids.shape[0] > limit else ids 168 | 169 | fig, ax = plt.subplots(1, figsize=(12, 12)) 170 | if rois.shape[0] > limit: 171 | plt.title("Showing {} random ROIs out of {}".format( 172 | len(ids), rois.shape[0])) 173 | else: 174 | plt.title("{} ROIs".format(len(ids))) 175 | 176 | # Show area outside image boundaries. 177 | ax.set_ylim(image.shape[0] + 20, -20) 178 | ax.set_xlim(-50, image.shape[1] + 20) 179 | ax.axis('off') 180 | 181 | for i, id in enumerate(ids): 182 | color = np.random.rand(3) 183 | class_id = class_ids[id] 184 | # ROI 185 | y1, x1, y2, x2 = rois[id] 186 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 187 | edgecolor=color if class_id else "gray", 188 | facecolor='none', linestyle="dashed") 189 | ax.add_patch(p) 190 | # Refined ROI 191 | if class_id: 192 | ry1, rx1, ry2, rx2 = refined_rois[id] 193 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 194 | edgecolor=color, facecolor='none') 195 | ax.add_patch(p) 196 | # Connect the top-left corners of the anchor and proposal for easy visualization 197 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 198 | 199 | # Label 200 | label = class_names[class_id] 201 | ax.text(rx1, ry1 + 8, "{}".format(label), 202 | color='w', size=11, backgroundcolor="none") 203 | 204 | # Mask 205 | m = utils.unmold_mask(mask[id], rois[id] 206 | [:4].astype(np.int32), image.shape) 207 | masked_image = apply_mask(masked_image, m, color) 208 | 209 | ax.imshow(masked_image) 210 | 211 | # Print stats 212 | print("Positive ROIs: ", class_ids[class_ids > 0].shape[0]) 213 | print("Negative ROIs: ", class_ids[class_ids == 0].shape[0]) 214 | print("Positive Ratio: {:.2f}".format( 215 | class_ids[class_ids > 0].shape[0] / class_ids.shape[0])) 216 | 217 | 218 | # TODO: Replace with matplotlib equivalent? 219 | def draw_box(image, box, color): 220 | """Draw 3-pixel width bounding boxes on the given image array. 221 | color: list of 3 int values for RGB. 222 | """ 223 | y1, x1, y2, x2 = box 224 | image[y1:y1 + 2, x1:x2] = color 225 | image[y2:y2 + 2, x1:x2] = color 226 | image[y1:y2, x1:x1 + 2] = color 227 | image[y1:y2, x2:x2 + 2] = color 228 | return image 229 | 230 | 231 | def display_top_masks(image, mask, class_ids, class_names, limit=4): 232 | """Display the given image and the top few class masks.""" 233 | to_display = [] 234 | titles = [] 235 | to_display.append(image) 236 | titles.append("H x W={}x{}".format(image.shape[0], image.shape[1])) 237 | # Pick top prominent classes in this image 238 | unique_class_ids = np.unique(class_ids) 239 | mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]]) 240 | for i in unique_class_ids] 241 | top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area), 242 | key=lambda r: r[1], reverse=True) if v[1] > 0] 243 | # Generate images and titles 244 | for i in range(limit): 245 | class_id = top_ids[i] if i < len(top_ids) else -1 246 | # Pull masks of instances belonging to the same class. 247 | m = mask[:, :, np.where(class_ids == class_id)[0]] 248 | m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1) 249 | to_display.append(m) 250 | titles.append(class_names[class_id] if class_id != -1 else "-") 251 | display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r") 252 | 253 | 254 | def plot_precision_recall(AP, precisions, recalls): 255 | """Draw the precision-recall curve. 256 | 257 | AP: Average precision at IoU >= 0.5 258 | precisions: list of precision values 259 | recalls: list of recall values 260 | """ 261 | # Plot the Precision-Recall curve 262 | _, ax = plt.subplots(1) 263 | ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP)) 264 | ax.set_ylim(0, 1.1) 265 | ax.set_xlim(0, 1.1) 266 | _ = ax.plot(recalls, precisions) 267 | 268 | 269 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores, 270 | overlaps, class_names, threshold=0.5): 271 | """Draw a grid showing how ground truth objects are classified. 272 | gt_class_ids: [N] int. Ground truth class IDs 273 | pred_class_id: [N] int. Predicted class IDs 274 | pred_scores: [N] float. The probability scores of predicted classes 275 | overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictins and GT boxes. 276 | class_names: list of all class names in the dataset 277 | threshold: Float. The prediction probability required to predict a class 278 | """ 279 | gt_class_ids = gt_class_ids[gt_class_ids != 0] 280 | pred_class_ids = pred_class_ids[pred_class_ids != 0] 281 | 282 | plt.figure(figsize=(12, 10)) 283 | plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues) 284 | plt.yticks(np.arange(len(pred_class_ids)), 285 | ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i]) 286 | for i, id in enumerate(pred_class_ids)]) 287 | plt.xticks(np.arange(len(gt_class_ids)), 288 | [class_names[int(id)] for id in gt_class_ids], rotation=90) 289 | 290 | thresh = overlaps.max() / 2. 291 | for i, j in itertools.product(range(overlaps.shape[0]), 292 | range(overlaps.shape[1])): 293 | text = "" 294 | if overlaps[i, j] > threshold: 295 | text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong" 296 | color = ("white" if overlaps[i, j] > thresh 297 | else "black" if overlaps[i, j] > 0 298 | else "grey") 299 | plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text), 300 | horizontalalignment="center", verticalalignment="center", 301 | fontsize=9, color=color) 302 | 303 | plt.tight_layout() 304 | plt.xlabel("Ground Truth") 305 | plt.ylabel("Predictions") 306 | 307 | 308 | def draw_boxes(image, boxes=None, refined_boxes=None, 309 | masks=None, captions=None, visibilities=None, 310 | title="", ax=None): 311 | """Draw bounding boxes and segmentation masks with differnt 312 | customizations. 313 | 314 | boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates. 315 | refined_boxes: Like boxes, but draw with solid lines to show 316 | that they're the result of refining 'boxes'. 317 | masks: [N, height, width] 318 | captions: List of N titles to display on each box 319 | visibilities: (optional) List of values of 0, 1, or 2. Determine how 320 | prominant each bounding box should be. 321 | title: An optional title to show over the image 322 | ax: (optional) Matplotlib axis to draw on. 323 | """ 324 | # Number of boxes 325 | assert boxes is not None or refined_boxes is not None 326 | N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0] 327 | 328 | # Matplotlib Axis 329 | if not ax: 330 | _, ax = plt.subplots(1, figsize=(12, 12)) 331 | 332 | # Generate random colors 333 | colors = random_colors(N) 334 | 335 | # Show area outside image boundaries. 336 | margin = image.shape[0] // 10 337 | ax.set_ylim(image.shape[0] + margin, -margin) 338 | ax.set_xlim(-margin, image.shape[1] + margin) 339 | ax.axis('off') 340 | 341 | ax.set_title(title) 342 | 343 | masked_image = image.astype(np.uint32).copy() 344 | for i in range(N): 345 | # Box visibility 346 | visibility = visibilities[i] if visibilities is not None else 1 347 | if visibility == 0: 348 | color = "gray" 349 | style = "dotted" 350 | alpha = 0.5 351 | elif visibility == 1: 352 | color = colors[i] 353 | style = "dotted" 354 | alpha = 1 355 | elif visibility == 2: 356 | color = colors[i] 357 | style = "solid" 358 | alpha = 1 359 | 360 | # Boxes 361 | if boxes is not None: 362 | if not np.any(boxes[i]): 363 | # Skip this instance. Has no bbox. Likely lost in cropping. 364 | continue 365 | y1, x1, y2, x2 = boxes[i] 366 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 367 | alpha=alpha, linestyle=style, 368 | edgecolor=color, facecolor='none') 369 | ax.add_patch(p) 370 | 371 | # Refined boxes 372 | if refined_boxes is not None and visibility > 0: 373 | ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32) 374 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 375 | edgecolor=color, facecolor='none') 376 | ax.add_patch(p) 377 | # Connect the top-left corners of the anchor and proposal 378 | if boxes is not None: 379 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 380 | 381 | # Captions 382 | if captions is not None: 383 | caption = captions[i] 384 | # If there are refined boxes, display captions on them 385 | if refined_boxes is not None: 386 | y1, x1, y2, x2 = ry1, rx1, ry2, rx2 387 | x = random.randint(x1, (x1 + x2) // 2) 388 | ax.text(x1, y1, caption, size=11, verticalalignment='top', 389 | color='w', backgroundcolor="none", 390 | bbox={'facecolor': color, 'alpha': 0.5, 391 | 'pad': 2, 'edgecolor': 'none'}) 392 | 393 | # Masks 394 | if masks is not None: 395 | mask = masks[:, :, i] 396 | masked_image = apply_mask(masked_image, mask, color) 397 | # Mask Polygon 398 | # Pad to ensure proper polygons for masks that touch image edges. 399 | padded_mask = np.zeros( 400 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 401 | padded_mask[1:-1, 1:-1] = mask 402 | contours = find_contours(padded_mask, 0.5) 403 | for verts in contours: 404 | # Subtract the padding and flip (y, x) to (x, y) 405 | verts = np.fliplr(verts) - 1 406 | p = Polygon(verts, facecolor="none", edgecolor=color) 407 | ax.add_patch(p) 408 | ax.imshow(masked_image.astype(np.uint8)) 409 | 410 | 411 | def display_table(table): 412 | """Display values in a table format. 413 | table: an iterable of rows, and each row is an iterable of values. 414 | """ 415 | html = "" 416 | for row in table: 417 | row_html = "" 418 | for col in row: 419 | row_html += "{:40}".format(str(col)) 420 | html += "" + row_html + "" 421 | html = "" + html + "
" 422 | IPython.display.display(IPython.display.HTML(html)) 423 | 424 | 425 | def display_weight_stats(model): 426 | """Scans all the weights in the model and returns a list of tuples 427 | that contain stats about each weight. 428 | """ 429 | layers = model.get_trainable_layers() 430 | table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]] 431 | for l in layers: 432 | weight_values = l.get_weights() # list of Numpy arrays 433 | weight_tensors = l.weights # list of TF tensors 434 | for i, w in enumerate(weight_values): 435 | weight_name = weight_tensors[i].name 436 | # Detect problematic layers. Exclude biases of conv layers. 437 | alert = "" 438 | if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1): 439 | alert += "*** dead?" 440 | if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000: 441 | alert += "*** Overflow?" 442 | # Add row 443 | table.append([ 444 | weight_name + alert, 445 | str(w.shape), 446 | "{:+9.4f}".format(w.min()), 447 | "{:+10.4f}".format(w.max()), 448 | "{:+9.4f}".format(w.std()), 449 | ]) 450 | display_table(table) 451 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | Pillow 4 | cython 5 | matplotlib 6 | scikit-image 7 | tensorflow>=1.3.0 8 | keras>=2.0.8 9 | opencv-python 10 | h5py 11 | imgaug 12 | IPython[all] 13 | tqdm==4.19.9 14 | jupyter-client==5.2.3 15 | jupyter-core==4.4.0 16 | -------------------------------------------------------------------------------- /samples/balloon/README.md: -------------------------------------------------------------------------------- 1 | # Color Spash Example 2 | 3 | This is an example showing the use of Mask RCNN in a real application. 4 | We train the model to detect balloons only, and then we use the generated 5 | masks to keep balloons in color while changing the rest of the image to 6 | grayscale. 7 | 8 | ## Installation 9 | From the [Releases page](https://github.com/matterport/Mask_RCNN/releases) page: 10 | 1. Download `mask_rcnn_balloon.h5`. Save it in the root directory of the repo (the `mask_rcnn` directory). 11 | 2. Download `balloon_dataset.p3`. Expand it such that it's in the path `mask_rcnn/datasets/balloon/`. 12 | 13 | ## Apply color splash using the provided weights 14 | Apply splash effect on an image: 15 | 16 | ```bash 17 | python3 balloon.py splash --weights=/path/to/mask_rcnn/mask_rcnn_balloon.h5 --image= 18 | ``` 19 | 20 | Apply splash effect on a video. Requires OpenCV 3.2+: 21 | 22 | ```bash 23 | python3 balloon.py splash --weights=/path/to/mask_rcnn/mask_rcnn_balloon.h5 --video= 24 | ``` 25 | 26 | 27 | ## Run Jupyter notebooks 28 | Open the `inspect_balloon_data.ipynb` or `inspect_balloon_model.ipynb` Jupter notebooks. You can use these notebooks to explore the dataset and run through the detection pipelie step by step. 29 | 30 | ## Train the Balloon model 31 | 32 | Train a new model starting from pre-trained COCO weights 33 | ``` 34 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=coco 35 | ``` 36 | 37 | Resume training a model that you had trained earlier 38 | ``` 39 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=last 40 | ``` 41 | 42 | Train a new model starting from ImageNet weights 43 | ``` 44 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=imagenet 45 | ``` 46 | 47 | The code in `balloon.py` is set to train for 3K steps (30 epochs of 100 steps each), and using a batch size of 2. 48 | Update the schedule to fit your needs. 49 | -------------------------------------------------------------------------------- /samples/balloon/balloon.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Train on the toy Balloon dataset and implement color splash effect. 4 | 5 | Copyright (c) 2018 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | ------------------------------------------------------------ 10 | 11 | Usage: import the module (see Jupyter notebooks for examples), or run from 12 | the command line as such: 13 | 14 | # Train a new model starting from pre-trained COCO weights 15 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=coco 16 | 17 | # Resume training a model that you had trained earlier 18 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=last 19 | 20 | # Train a new model starting from ImageNet weights 21 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=imagenet 22 | 23 | # Apply color splash to an image 24 | python3 balloon.py splash --weights=/path/to/weights/file.h5 --image= 25 | 26 | # Apply color splash to video using the last weights you trained 27 | python3 balloon.py splash --weights=last --video= 28 | """ 29 | 30 | import os 31 | import sys 32 | import json 33 | import datetime 34 | import numpy as np 35 | import skimage.draw 36 | 37 | # Root directory of the project 38 | ROOT_DIR = os.path.abspath("../../") 39 | 40 | # Import Mask RCNN 41 | sys.path.append(ROOT_DIR) # To find local version of the library 42 | from mrcnn.config import Config 43 | from mrcnn import model as modellib, utils 44 | 45 | # Path to trained weights file 46 | COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") 47 | 48 | # Directory to save logs and model checkpoints, if not provided 49 | # through the command line argument --logs 50 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 51 | 52 | ############################################################ 53 | # Configurations 54 | ############################################################ 55 | 56 | 57 | class BalloonConfig(Config): 58 | """Configuration for training on the toy dataset. 59 | Derives from the base Config class and overrides some values. 60 | """ 61 | # Give the configuration a recognizable name 62 | NAME = "balloon" 63 | 64 | # We use a GPU with 12GB memory, which can fit two images. 65 | # Adjust down if you use a smaller GPU. 66 | IMAGES_PER_GPU = 2 67 | 68 | # Number of classes (including background) 69 | NUM_CLASSES = 1 + 1 # Background + baloon 70 | 71 | # Number of training steps per epoch 72 | STEPS_PER_EPOCH = 100 73 | 74 | # Skip detections with < 90% confidence 75 | DETECTION_MIN_CONFIDENCE = 0.9 76 | 77 | 78 | ############################################################ 79 | # Dataset 80 | ############################################################ 81 | 82 | class BalloonDataset(utils.Dataset): 83 | 84 | def load_balloon(self, dataset_dir, subset): 85 | """Load a subset of the Balloon dataset. 86 | dataset_dir: Root directory of the dataset. 87 | subset: Subset to load: train or val 88 | """ 89 | # Add classes. We have only one class to add. 90 | self.add_class("balloon", 1, "balloon") 91 | 92 | # Train or validation dataset? 93 | assert subset in ["train", "val"] 94 | dataset_dir = os.path.join(dataset_dir, subset) 95 | 96 | # Load annotations 97 | # VGG Image Annotator saves each image in the form: 98 | # { 'filename': '28503151_5b5b7ec140_b.jpg', 99 | # 'regions': { 100 | # '0': { 101 | # 'region_attributes': {}, 102 | # 'shape_attributes': { 103 | # 'all_points_x': [...], 104 | # 'all_points_y': [...], 105 | # 'name': 'polygon'}}, 106 | # ... more regions ... 107 | # }, 108 | # 'size': 100202 109 | # } 110 | # We mostly care about the x and y coordinates of each region 111 | annotations = json.load(open(os.path.join(dataset_dir, "via_region_data.json"))) 112 | annotations = list(annotations.values()) # don't need the dict keys 113 | 114 | # The VIA tool saves images in the JSON even if they don't have any 115 | # annotations. Skip unannotated images. 116 | annotations = [a for a in annotations if a['regions']] 117 | 118 | # Add images 119 | for a in annotations: 120 | # Get the x, y coordinaets of points of the polygons that make up 121 | # the outline of each object instance. There are stores in the 122 | # shape_attributes (see json format above) 123 | polygons = [r['shape_attributes'] for r in a['regions'].values()] 124 | 125 | # load_mask() needs the image size to convert polygons to masks. 126 | # Unfortunately, VIA doesn't include it in JSON, so we must read 127 | # the image. This is only managable since the dataset is tiny. 128 | image_path = os.path.join(dataset_dir, a['filename']) 129 | image = skimage.io.imread(image_path) 130 | height, width = image.shape[:2] 131 | 132 | self.add_image( 133 | "balloon", 134 | image_id=a['filename'], # use file name as a unique image id 135 | path=image_path, 136 | width=width, height=height, 137 | polygons=polygons) 138 | 139 | def load_mask(self, image_id): 140 | """Generate instance masks for an image. 141 | Returns: 142 | masks: A bool array of shape [height, width, instance count] with 143 | one mask per instance. 144 | class_ids: a 1D array of class IDs of the instance masks. 145 | """ 146 | # If not a balloon dataset image, delegate to parent class. 147 | image_info = self.image_info[image_id] 148 | if image_info["source"] != "balloon": 149 | return super(self.__class__, self).load_mask(image_id) 150 | 151 | # Convert polygons to a bitmap mask of shape 152 | # [height, width, instance_count] 153 | info = self.image_info[image_id] 154 | mask = np.zeros([info["height"], info["width"], len(info["polygons"])], 155 | dtype=np.uint8) 156 | for i, p in enumerate(info["polygons"]): 157 | # Get indexes of pixels inside the polygon and set them to 1 158 | rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x']) 159 | mask[rr, cc, i] = 1 160 | 161 | # Return mask, and array of class IDs of each instance. Since we have 162 | # one class ID only, we return an array of 1s 163 | return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32) 164 | 165 | def image_reference(self, image_id): 166 | """Return the path of the image.""" 167 | info = self.image_info[image_id] 168 | if info["source"] == "balloon": 169 | return info["path"] 170 | else: 171 | super(self.__class__, self).image_reference(image_id) 172 | 173 | 174 | def train(model): 175 | """Train the model.""" 176 | # Training dataset. 177 | dataset_train = BalloonDataset() 178 | dataset_train.load_balloon(args.dataset, "train") 179 | dataset_train.prepare() 180 | 181 | # Validation dataset 182 | dataset_val = BalloonDataset() 183 | dataset_val.load_balloon(args.dataset, "val") 184 | dataset_val.prepare() 185 | 186 | # *** This training schedule is an example. Update to your needs *** 187 | # Since we're using a very small dataset, and starting from 188 | # COCO trained weights, we don't need to train too long. Also, 189 | # no need to train all layers, just the heads should do it. 190 | print("Training network heads") 191 | model.train(dataset_train, dataset_val, 192 | learning_rate=config.LEARNING_RATE, 193 | epochs=30, 194 | layers='heads') 195 | 196 | 197 | def color_splash(image, mask): 198 | """Apply color splash effect. 199 | image: RGB image [height, width, 3] 200 | mask: instance segmentation mask [height, width, instance count] 201 | 202 | Returns result image. 203 | """ 204 | # Make a grayscale copy of the image. The grayscale copy still 205 | # has 3 RGB channels, though. 206 | gray = skimage.color.gray2rgb(skimage.color.rgb2gray(image)) * 255 207 | # We're treating all instances as one, so collapse the mask into one layer 208 | mask = (np.sum(mask, -1, keepdims=True) >= 1) 209 | # Copy color pixels from the original color image where mask is set 210 | if mask.shape[0] > 0: 211 | splash = np.where(mask, image, gray).astype(np.uint8) 212 | else: 213 | splash = gray 214 | return splash 215 | 216 | 217 | def detect_and_color_splash(model, image_path=None, video_path=None): 218 | assert image_path or video_path 219 | 220 | # Image or video? 221 | if image_path: 222 | # Run model detection and generate the color splash effect 223 | print("Running on {}".format(args.image)) 224 | # Read image 225 | image = skimage.io.imread(args.image) 226 | # Detect objects 227 | r = model.detect([image], verbose=1)[0] 228 | # Color splash 229 | splash = color_splash(image, r['masks']) 230 | # Save output 231 | file_name = "splash_{:%Y%m%dT%H%M%S}.png".format(datetime.datetime.now()) 232 | skimage.io.imsave(file_name, splash) 233 | elif video_path: 234 | import cv2 235 | # Video capture 236 | vcapture = cv2.VideoCapture(video_path) 237 | width = int(vcapture.get(cv2.CAP_PROP_FRAME_WIDTH)) 238 | height = int(vcapture.get(cv2.CAP_PROP_FRAME_HEIGHT)) 239 | fps = vcapture.get(cv2.CAP_PROP_FPS) 240 | 241 | # Define codec and create video writer 242 | file_name = "splash_{:%Y%m%dT%H%M%S}.avi".format(datetime.datetime.now()) 243 | vwriter = cv2.VideoWriter(file_name, 244 | cv2.VideoWriter_fourcc(*'MJPG'), 245 | fps, (width, height)) 246 | 247 | count = 0 248 | success = True 249 | while success: 250 | print("frame: ", count) 251 | # Read next image 252 | success, image = vcapture.read() 253 | if success: 254 | # OpenCV returns images as BGR, convert to RGB 255 | image = image[..., ::-1] 256 | # Detect objects 257 | r = model.detect([image], verbose=0)[0] 258 | # Color splash 259 | splash = color_splash(image, r['masks']) 260 | # RGB -> BGR to save image to video 261 | splash = splash[..., ::-1] 262 | # Add image to video writer 263 | vwriter.write(splash) 264 | count += 1 265 | vwriter.release() 266 | print("Saved to ", file_name) 267 | 268 | 269 | ############################################################ 270 | # Training 271 | ############################################################ 272 | 273 | if __name__ == '__main__': 274 | import argparse 275 | 276 | # Parse command line arguments 277 | parser = argparse.ArgumentParser( 278 | description='Train Mask R-CNN to detect balloons.') 279 | parser.add_argument("command", 280 | metavar="", 281 | help="'train' or 'splash'") 282 | parser.add_argument('--dataset', required=False, 283 | metavar="/path/to/balloon/dataset/", 284 | help='Directory of the Balloon dataset') 285 | parser.add_argument('--weights', required=True, 286 | metavar="/path/to/weights.h5", 287 | help="Path to weights .h5 file or 'coco'") 288 | parser.add_argument('--logs', required=False, 289 | default=DEFAULT_LOGS_DIR, 290 | metavar="/path/to/logs/", 291 | help='Logs and checkpoints directory (default=logs/)') 292 | parser.add_argument('--image', required=False, 293 | metavar="path or URL to image", 294 | help='Image to apply the color splash effect on') 295 | parser.add_argument('--video', required=False, 296 | metavar="path or URL to video", 297 | help='Video to apply the color splash effect on') 298 | args = parser.parse_args() 299 | 300 | # Validate arguments 301 | if args.command == "train": 302 | assert args.dataset, "Argument --dataset is required for training" 303 | elif args.command == "splash": 304 | assert args.image or args.video,\ 305 | "Provide --image or --video to apply color splash" 306 | 307 | print("Weights: ", args.weights) 308 | print("Dataset: ", args.dataset) 309 | print("Logs: ", args.logs) 310 | 311 | # Configurations 312 | if args.command == "train": 313 | config = BalloonConfig() 314 | else: 315 | class InferenceConfig(BalloonConfig): 316 | # Set batch size to 1 since we'll be running inference on 317 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 318 | GPU_COUNT = 1 319 | IMAGES_PER_GPU = 1 320 | config = InferenceConfig() 321 | config.display() 322 | 323 | # Create model 324 | if args.command == "train": 325 | model = modellib.MaskRCNN(mode="training", config=config, 326 | model_dir=args.logs) 327 | else: 328 | model = modellib.MaskRCNN(mode="inference", config=config, 329 | model_dir=args.logs) 330 | 331 | # Select weights file to load 332 | if args.weights.lower() == "coco": 333 | weights_path = COCO_WEIGHTS_PATH 334 | # Download weights file 335 | if not os.path.exists(weights_path): 336 | utils.download_trained_weights(weights_path) 337 | elif args.weights.lower() == "last": 338 | # Find last trained weights 339 | weights_path = model.find_last()[1] 340 | elif args.weights.lower() == "imagenet": 341 | # Start from ImageNet trained weights 342 | weights_path = model.get_imagenet_weights() 343 | else: 344 | weights_path = args.weights 345 | 346 | # Load weights 347 | print("Loading weights ", weights_path) 348 | if args.weights.lower() == "coco": 349 | # Exclude the last layers because they require a matching 350 | # number of classes 351 | model.load_weights(weights_path, by_name=True, exclude=[ 352 | "mrcnn_class_logits", "mrcnn_bbox_fc", 353 | "mrcnn_bbox", "mrcnn_mask"]) 354 | else: 355 | model.load_weights(weights_path, by_name=True) 356 | 357 | # Train or evaluate 358 | if args.command == "train": 359 | train(model) 360 | elif args.command == "splash": 361 | detect_and_color_splash(model, image_path=args.image, 362 | video_path=args.video) 363 | else: 364 | print("'{}' is not recognized. " 365 | "Use 'train' or 'splash'".format(args.command)) 366 | -------------------------------------------------------------------------------- /samples/coco/coco.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Configurations and data loading code for MS COCO. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | ------------------------------------------------------------ 10 | 11 | Usage: import the module (see Jupyter notebooks for examples), or run from 12 | the command line as such: 13 | 14 | # Train a new model starting from pre-trained COCO weights 15 | python3 coco.py train --dataset=/path/to/coco/ --model=coco 16 | 17 | # Train a new model starting from ImageNet weights 18 | python3 coco.py train --dataset=/path/to/coco/ --model=imagenet 19 | 20 | # Continue training a model that you had trained earlier 21 | python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5 22 | 23 | # Continue training the last model you trained 24 | python3 coco.py train --dataset=/path/to/coco/ --model=last 25 | 26 | # Run COCO evaluatoin on the last model you trained 27 | python3 coco.py evaluate --dataset=/path/to/coco/ --model=last 28 | """ 29 | 30 | import os 31 | import sys 32 | import time 33 | import numpy as np 34 | import imgaug # https://github.com/aleju/imgaug (pip3 install imageaug) 35 | 36 | # Download and install the Python COCO tools from https://github.com/waleedka/coco 37 | # That's a fork from the original https://github.com/pdollar/coco with a bug 38 | # fix for Python 3. 39 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50 40 | # If the PR is merged then use the original repo. 41 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3". 42 | from pycocotools.coco import COCO 43 | from pycocotools.cocoeval import COCOeval 44 | from pycocotools import mask as maskUtils 45 | 46 | import zipfile 47 | import urllib.request 48 | import shutil 49 | 50 | # Root directory of the project 51 | ROOT_DIR = os.path.abspath("../../") 52 | 53 | # Import Mask RCNN 54 | sys.path.append(ROOT_DIR) # To find local version of the library 55 | from mrcnn.config import Config 56 | from mrcnn import model as modellib, utils 57 | 58 | # Path to trained weights file 59 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") 60 | 61 | # Directory to save logs and model checkpoints, if not provided 62 | # through the command line argument --logs 63 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 64 | DEFAULT_DATASET_YEAR = "2014" 65 | 66 | ############################################################ 67 | # Configurations 68 | ############################################################ 69 | 70 | 71 | class CocoConfig(Config): 72 | """Configuration for training on MS COCO. 73 | Derives from the base Config class and overrides values specific 74 | to the COCO dataset. 75 | """ 76 | # Give the configuration a recognizable name 77 | NAME = "coco" 78 | 79 | # We use a GPU with 12GB memory, which can fit two images. 80 | # Adjust down if you use a smaller GPU. 81 | IMAGES_PER_GPU = 2 82 | 83 | # Uncomment to train on 8 GPUs (default is 1) 84 | # GPU_COUNT = 8 85 | 86 | # Number of classes (including background) 87 | NUM_CLASSES = 1 + 80 # COCO has 80 classes 88 | 89 | 90 | ############################################################ 91 | # Dataset 92 | ############################################################ 93 | 94 | class CocoDataset(utils.Dataset): 95 | def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None, 96 | class_map=None, return_coco=False, auto_download=False): 97 | """Load a subset of the COCO dataset. 98 | dataset_dir: The root directory of the COCO dataset. 99 | subset: What to load (train, val, minival, valminusminival) 100 | year: What dataset year to load (2014, 2017) as a string, not an integer 101 | class_ids: If provided, only loads images that have the given classes. 102 | class_map: TODO: Not implemented yet. Supports maping classes from 103 | different datasets to the same class ID. 104 | return_coco: If True, returns the COCO object. 105 | auto_download: Automatically download and unzip MS-COCO images and annotations 106 | """ 107 | 108 | if auto_download is True: 109 | self.auto_download(dataset_dir, subset, year) 110 | 111 | coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year)) 112 | if subset == "minival" or subset == "valminusminival": 113 | subset = "val" 114 | image_dir = "{}/{}{}".format(dataset_dir, subset, year) 115 | 116 | # Load all classes or a subset? 117 | if not class_ids: 118 | # All classes 119 | class_ids = sorted(coco.getCatIds()) 120 | 121 | # All images or a subset? 122 | if class_ids: 123 | image_ids = [] 124 | for id in class_ids: 125 | image_ids.extend(list(coco.getImgIds(catIds=[id]))) 126 | # Remove duplicates 127 | image_ids = list(set(image_ids)) 128 | else: 129 | # All images 130 | image_ids = list(coco.imgs.keys()) 131 | 132 | # Add classes 133 | for i in class_ids: 134 | self.add_class("coco", i, coco.loadCats(i)[0]["name"]) 135 | 136 | # Add images 137 | for i in image_ids: 138 | self.add_image( 139 | "coco", image_id=i, 140 | path=os.path.join(image_dir, coco.imgs[i]['file_name']), 141 | width=coco.imgs[i]["width"], 142 | height=coco.imgs[i]["height"], 143 | annotations=coco.loadAnns(coco.getAnnIds( 144 | imgIds=[i], catIds=class_ids, iscrowd=None))) 145 | if return_coco: 146 | return coco 147 | 148 | def auto_download(self, dataDir, dataType, dataYear): 149 | """Download the COCO dataset/annotations if requested. 150 | dataDir: The root directory of the COCO dataset. 151 | dataType: What to load (train, val, minival, valminusminival) 152 | dataYear: What dataset year to load (2014, 2017) as a string, not an integer 153 | Note: 154 | For 2014, use "train", "val", "minival", or "valminusminival" 155 | For 2017, only "train" and "val" annotations are available 156 | """ 157 | 158 | # Setup paths and file names 159 | if dataType == "minival" or dataType == "valminusminival": 160 | imgDir = "{}/{}{}".format(dataDir, "val", dataYear) 161 | imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear) 162 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear) 163 | else: 164 | imgDir = "{}/{}{}".format(dataDir, dataType, dataYear) 165 | imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear) 166 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear) 167 | # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL) 168 | 169 | # Create main folder if it doesn't exist yet 170 | if not os.path.exists(dataDir): 171 | os.makedirs(dataDir) 172 | 173 | # Download images if not available locally 174 | if not os.path.exists(imgDir): 175 | os.makedirs(imgDir) 176 | print("Downloading images to " + imgZipFile + " ...") 177 | with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out: 178 | shutil.copyfileobj(resp, out) 179 | print("... done downloading.") 180 | print("Unzipping " + imgZipFile) 181 | with zipfile.ZipFile(imgZipFile, "r") as zip_ref: 182 | zip_ref.extractall(dataDir) 183 | print("... done unzipping") 184 | print("Will use images in " + imgDir) 185 | 186 | # Setup annotations data paths 187 | annDir = "{}/annotations".format(dataDir) 188 | if dataType == "minival": 189 | annZipFile = "{}/instances_minival2014.json.zip".format(dataDir) 190 | annFile = "{}/instances_minival2014.json".format(annDir) 191 | annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0" 192 | unZipDir = annDir 193 | elif dataType == "valminusminival": 194 | annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir) 195 | annFile = "{}/instances_valminusminival2014.json".format(annDir) 196 | annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0" 197 | unZipDir = annDir 198 | else: 199 | annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear) 200 | annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear) 201 | annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear) 202 | unZipDir = dataDir 203 | # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL) 204 | 205 | # Download annotations if not available locally 206 | if not os.path.exists(annDir): 207 | os.makedirs(annDir) 208 | if not os.path.exists(annFile): 209 | if not os.path.exists(annZipFile): 210 | print("Downloading zipped annotations to " + annZipFile + " ...") 211 | with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out: 212 | shutil.copyfileobj(resp, out) 213 | print("... done downloading.") 214 | print("Unzipping " + annZipFile) 215 | with zipfile.ZipFile(annZipFile, "r") as zip_ref: 216 | zip_ref.extractall(unZipDir) 217 | print("... done unzipping") 218 | print("Will use annotations in " + annFile) 219 | 220 | def load_mask(self, image_id): 221 | """Load instance masks for the given image. 222 | 223 | Different datasets use different ways to store masks. This 224 | function converts the different mask format to one format 225 | in the form of a bitmap [height, width, instances]. 226 | 227 | Returns: 228 | masks: A bool array of shape [height, width, instance count] with 229 | one mask per instance. 230 | class_ids: a 1D array of class IDs of the instance masks. 231 | """ 232 | # If not a COCO image, delegate to parent class. 233 | image_info = self.image_info[image_id] 234 | if image_info["source"] != "coco": 235 | return super(CocoDataset, self).load_mask(image_id) 236 | 237 | instance_masks = [] 238 | class_ids = [] 239 | annotations = self.image_info[image_id]["annotations"] 240 | # Build mask of shape [height, width, instance_count] and list 241 | # of class IDs that correspond to each channel of the mask. 242 | for annotation in annotations: 243 | class_id = self.map_source_class_id( 244 | "coco.{}".format(annotation['category_id'])) 245 | if class_id: 246 | m = self.annToMask(annotation, image_info["height"], 247 | image_info["width"]) 248 | # Some objects are so small that they're less than 1 pixel area 249 | # and end up rounded out. Skip those objects. 250 | if m.max() < 1: 251 | continue 252 | # Is it a crowd? If so, use a negative class ID. 253 | if annotation['iscrowd']: 254 | # Use negative class ID for crowds 255 | class_id *= -1 256 | # For crowd masks, annToMask() sometimes returns a mask 257 | # smaller than the given dimensions. If so, resize it. 258 | if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]: 259 | m = np.ones([image_info["height"], image_info["width"]], dtype=bool) 260 | instance_masks.append(m) 261 | class_ids.append(class_id) 262 | 263 | # Pack instance masks into an array 264 | if class_ids: 265 | mask = np.stack(instance_masks, axis=2).astype(np.bool) 266 | class_ids = np.array(class_ids, dtype=np.int32) 267 | return mask, class_ids 268 | else: 269 | # Call super class to return an empty mask 270 | return super(CocoDataset, self).load_mask(image_id) 271 | 272 | def image_reference(self, image_id): 273 | """Return a link to the image in the COCO Website.""" 274 | info = self.image_info[image_id] 275 | if info["source"] == "coco": 276 | return "http://cocodataset.org/#explore?id={}".format(info["id"]) 277 | else: 278 | super(CocoDataset, self).image_reference(image_id) 279 | 280 | # The following two functions are from pycocotools with a few changes. 281 | 282 | def annToRLE(self, ann, height, width): 283 | """ 284 | Convert annotation which can be polygons, uncompressed RLE to RLE. 285 | :return: binary mask (numpy 2D array) 286 | """ 287 | segm = ann['segmentation'] 288 | if isinstance(segm, list): 289 | # polygon -- a single object might consist of multiple parts 290 | # we merge all parts into one mask rle code 291 | rles = maskUtils.frPyObjects(segm, height, width) 292 | rle = maskUtils.merge(rles) 293 | elif isinstance(segm['counts'], list): 294 | # uncompressed RLE 295 | rle = maskUtils.frPyObjects(segm, height, width) 296 | else: 297 | # rle 298 | rle = ann['segmentation'] 299 | return rle 300 | 301 | def annToMask(self, ann, height, width): 302 | """ 303 | Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. 304 | :return: binary mask (numpy 2D array) 305 | """ 306 | rle = self.annToRLE(ann, height, width) 307 | m = maskUtils.decode(rle) 308 | return m 309 | 310 | 311 | ############################################################ 312 | # COCO Evaluation 313 | ############################################################ 314 | 315 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks): 316 | """Arrange resutls to match COCO specs in http://cocodataset.org/#format 317 | """ 318 | # If no results, return an empty list 319 | if rois is None: 320 | return [] 321 | 322 | results = [] 323 | for image_id in image_ids: 324 | # Loop through detections 325 | for i in range(rois.shape[0]): 326 | class_id = class_ids[i] 327 | score = scores[i] 328 | bbox = np.around(rois[i], 1) 329 | mask = masks[:, :, i] 330 | 331 | result = { 332 | "image_id": image_id, 333 | "category_id": dataset.get_source_class_id(class_id, "coco"), 334 | "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]], 335 | "score": score, 336 | "segmentation": maskUtils.encode(np.asfortranarray(mask)) 337 | } 338 | results.append(result) 339 | return results 340 | 341 | 342 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None): 343 | """Runs official COCO evaluation. 344 | dataset: A Dataset object with valiadtion data 345 | eval_type: "bbox" or "segm" for bounding box or segmentation evaluation 346 | limit: if not 0, it's the number of images to use for evaluation 347 | """ 348 | # Pick COCO images from the dataset 349 | image_ids = image_ids or dataset.image_ids 350 | 351 | # Limit to a subset 352 | if limit: 353 | image_ids = image_ids[:limit] 354 | 355 | # Get corresponding COCO image IDs. 356 | coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids] 357 | 358 | t_prediction = 0 359 | t_start = time.time() 360 | 361 | results = [] 362 | for i, image_id in enumerate(image_ids): 363 | # Load image 364 | image = dataset.load_image(image_id) 365 | 366 | # Run detection 367 | t = time.time() 368 | r = model.detect([image], verbose=0)[0] 369 | t_prediction += (time.time() - t) 370 | 371 | # Convert results to COCO format 372 | # Cast masks to uint8 because COCO tools errors out on bool 373 | image_results = build_coco_results(dataset, coco_image_ids[i:i + 1], 374 | r["rois"], r["class_ids"], 375 | r["scores"], 376 | r["masks"].astype(np.uint8)) 377 | results.extend(image_results) 378 | 379 | # Load results. This modifies results with additional attributes. 380 | coco_results = coco.loadRes(results) 381 | 382 | # Evaluate 383 | cocoEval = COCOeval(coco, coco_results, eval_type) 384 | cocoEval.params.imgIds = coco_image_ids 385 | cocoEval.evaluate() 386 | cocoEval.accumulate() 387 | cocoEval.summarize() 388 | 389 | print("Prediction time: {}. Average {}/image".format( 390 | t_prediction, t_prediction / len(image_ids))) 391 | print("Total time: ", time.time() - t_start) 392 | 393 | 394 | ############################################################ 395 | # Training 396 | ############################################################ 397 | 398 | 399 | if __name__ == '__main__': 400 | import argparse 401 | 402 | # Parse command line arguments 403 | parser = argparse.ArgumentParser( 404 | description='Train Mask R-CNN on MS COCO.') 405 | parser.add_argument("command", 406 | metavar="", 407 | help="'train' or 'evaluate' on MS COCO") 408 | parser.add_argument('--dataset', required=True, 409 | metavar="/path/to/coco/", 410 | help='Directory of the MS-COCO dataset') 411 | parser.add_argument('--year', required=False, 412 | default=DEFAULT_DATASET_YEAR, 413 | metavar="", 414 | help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)') 415 | parser.add_argument('--model', required=True, 416 | metavar="/path/to/weights.h5", 417 | help="Path to weights .h5 file or 'coco'") 418 | parser.add_argument('--logs', required=False, 419 | default=DEFAULT_LOGS_DIR, 420 | metavar="/path/to/logs/", 421 | help='Logs and checkpoints directory (default=logs/)') 422 | parser.add_argument('--limit', required=False, 423 | default=500, 424 | metavar="", 425 | help='Images to use for evaluation (default=500)') 426 | parser.add_argument('--download', required=False, 427 | default=False, 428 | metavar="", 429 | help='Automatically download and unzip MS-COCO files (default=False)', 430 | type=bool) 431 | args = parser.parse_args() 432 | print("Command: ", args.command) 433 | print("Model: ", args.model) 434 | print("Dataset: ", args.dataset) 435 | print("Year: ", args.year) 436 | print("Logs: ", args.logs) 437 | print("Auto Download: ", args.download) 438 | 439 | # Configurations 440 | if args.command == "train": 441 | config = CocoConfig() 442 | else: 443 | class InferenceConfig(CocoConfig): 444 | # Set batch size to 1 since we'll be running inference on 445 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 446 | GPU_COUNT = 1 447 | IMAGES_PER_GPU = 1 448 | DETECTION_MIN_CONFIDENCE = 0 449 | config = InferenceConfig() 450 | config.display() 451 | 452 | # Create model 453 | if args.command == "train": 454 | model = modellib.MaskRCNN(mode="training", config=config, 455 | model_dir=args.logs) 456 | else: 457 | model = modellib.MaskRCNN(mode="inference", config=config, 458 | model_dir=args.logs) 459 | 460 | # Select weights file to load 461 | if args.model.lower() == "coco": 462 | model_path = COCO_MODEL_PATH 463 | elif args.model.lower() == "last": 464 | # Find last trained weights 465 | model_path = model.find_last()[1] 466 | elif args.model.lower() == "imagenet": 467 | # Start from ImageNet trained weights 468 | model_path = model.get_imagenet_weights() 469 | else: 470 | model_path = args.model 471 | 472 | # Load weights 473 | print("Loading weights ", model_path) 474 | model.load_weights(model_path, by_name=True) 475 | 476 | # Train or evaluate 477 | if args.command == "train": 478 | # Training dataset. Use the training set and 35K from the 479 | # validation set, as as in the Mask RCNN paper. 480 | dataset_train = CocoDataset() 481 | dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download) 482 | dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download) 483 | dataset_train.prepare() 484 | 485 | # Validation dataset 486 | dataset_val = CocoDataset() 487 | dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download) 488 | dataset_val.prepare() 489 | 490 | # Image Augmentation 491 | # Right/Left flip 50% of the time 492 | augmentation = imgaug.augmenters.Fliplr(0.5) 493 | 494 | # *** This training schedule is an example. Update to your needs *** 495 | 496 | # Training - Stage 1 497 | print("Training network heads") 498 | model.train(dataset_train, dataset_val, 499 | learning_rate=config.LEARNING_RATE, 500 | epochs=40, 501 | layers='heads', 502 | augmentation=augmentation) 503 | 504 | # Training - Stage 2 505 | # Finetune layers from ResNet stage 4 and up 506 | print("Fine tune Resnet stage 4 and up") 507 | model.train(dataset_train, dataset_val, 508 | learning_rate=config.LEARNING_RATE, 509 | epochs=120, 510 | layers='4+', 511 | augmentation=augmentation) 512 | 513 | # Training - Stage 3 514 | # Fine tune all layers 515 | print("Fine tune all layers") 516 | model.train(dataset_train, dataset_val, 517 | learning_rate=config.LEARNING_RATE / 10, 518 | epochs=160, 519 | layers='all', 520 | augmentation=augmentation) 521 | 522 | elif args.command == "evaluate": 523 | # Validation dataset 524 | dataset_val = CocoDataset() 525 | coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download) 526 | dataset_val.prepare() 527 | print("Running COCO evaluation on {} images.".format(args.limit)) 528 | evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit)) 529 | else: 530 | print("'{}' is not recognized. " 531 | "Use 'train' or 'evaluate'".format(args.command)) 532 | -------------------------------------------------------------------------------- /samples/shapes/shapes.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Configurations and data loading code for the synthetic Shapes dataset. 4 | This is a duplicate of the code in the noteobook train_shapes.ipynb for easy 5 | import into other notebooks, such as inspect_model.ipynb. 6 | 7 | Copyright (c) 2017 Matterport, Inc. 8 | Licensed under the MIT License (see LICENSE for details) 9 | Written by Waleed Abdulla 10 | """ 11 | 12 | import os 13 | import sys 14 | import math 15 | import random 16 | import numpy as np 17 | import cv2 18 | 19 | # Root directory of the project 20 | ROOT_DIR = os.path.abspath("../../") 21 | 22 | # Import Mask RCNN 23 | sys.path.append(ROOT_DIR) # To find local version of the library 24 | from mrcnn.config import Config 25 | from mrcnn import utils 26 | 27 | 28 | class ShapesConfig(Config): 29 | """Configuration for training on the toy shapes dataset. 30 | Derives from the base Config class and overrides values specific 31 | to the toy shapes dataset. 32 | """ 33 | # Give the configuration a recognizable name 34 | NAME = "shapes" 35 | 36 | # Train on 1 GPU and 8 images per GPU. We can put multiple images on each 37 | # GPU because the images are small. Batch size is 8 (GPUs * images/GPU). 38 | GPU_COUNT = 1 39 | IMAGES_PER_GPU = 8 40 | 41 | # Number of classes (including background) 42 | NUM_CLASSES = 1 + 3 # background + 3 shapes 43 | 44 | # Use small images for faster training. Set the limits of the small side 45 | # the large side, and that determines the image shape. 46 | IMAGE_MIN_DIM = 128 47 | IMAGE_MAX_DIM = 128 48 | 49 | # Use smaller anchors because our image and objects are small 50 | RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128) # anchor side in pixels 51 | 52 | # Reduce training ROIs per image because the images are small and have 53 | # few objects. Aim to allow ROI sampling to pick 33% positive ROIs. 54 | TRAIN_ROIS_PER_IMAGE = 32 55 | 56 | # Use a small epoch since the data is simple 57 | STEPS_PER_EPOCH = 100 58 | 59 | # use small validation steps since the epoch is small 60 | VALIDATION_STEPS = 5 61 | 62 | 63 | class ShapesDataset(utils.Dataset): 64 | """Generates the shapes synthetic dataset. The dataset consists of simple 65 | shapes (triangles, squares, circles) placed randomly on a blank surface. 66 | The images are generated on the fly. No file access required. 67 | """ 68 | 69 | def load_shapes(self, count, height, width): 70 | """Generate the requested number of synthetic images. 71 | count: number of images to generate. 72 | height, width: the size of the generated images. 73 | """ 74 | # Add classes 75 | self.add_class("shapes", 1, "square") 76 | self.add_class("shapes", 2, "circle") 77 | self.add_class("shapes", 3, "triangle") 78 | 79 | # Add images 80 | # Generate random specifications of images (i.e. color and 81 | # list of shapes sizes and locations). This is more compact than 82 | # actual images. Images are generated on the fly in load_image(). 83 | for i in range(count): 84 | bg_color, shapes = self.random_image(height, width) 85 | self.add_image("shapes", image_id=i, path=None, 86 | width=width, height=height, 87 | bg_color=bg_color, shapes=shapes) 88 | 89 | def load_image(self, image_id): 90 | """Generate an image from the specs of the given image ID. 91 | Typically this function loads the image from a file, but 92 | in this case it generates the image on the fly from the 93 | specs in image_info. 94 | """ 95 | info = self.image_info[image_id] 96 | bg_color = np.array(info['bg_color']).reshape([1, 1, 3]) 97 | image = np.ones([info['height'], info['width'], 3], dtype=np.uint8) 98 | image = image * bg_color.astype(np.uint8) 99 | for shape, color, dims in info['shapes']: 100 | image = self.draw_shape(image, shape, dims, color) 101 | return image 102 | 103 | def image_reference(self, image_id): 104 | """Return the shapes data of the image.""" 105 | info = self.image_info[image_id] 106 | if info["source"] == "shapes": 107 | return info["shapes"] 108 | else: 109 | super(self.__class__).image_reference(self, image_id) 110 | 111 | def load_mask(self, image_id): 112 | """Generate instance masks for shapes of the given image ID. 113 | """ 114 | info = self.image_info[image_id] 115 | shapes = info['shapes'] 116 | count = len(shapes) 117 | mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8) 118 | for i, (shape, _, dims) in enumerate(info['shapes']): 119 | mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(), 120 | shape, dims, 1) 121 | # Handle occlusions 122 | occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8) 123 | for i in range(count - 2, -1, -1): 124 | mask[:, :, i] = mask[:, :, i] * occlusion 125 | occlusion = np.logical_and( 126 | occlusion, np.logical_not(mask[:, :, i])) 127 | # Map class names to class IDs. 128 | class_ids = np.array([self.class_names.index(s[0]) for s in shapes]) 129 | return mask, class_ids.astype(np.int32) 130 | 131 | def draw_shape(self, image, shape, dims, color): 132 | """Draws a shape from the given specs.""" 133 | # Get the center x, y and the size s 134 | x, y, s = dims 135 | if shape == 'square': 136 | image = cv2.rectangle(image, (x - s, y - s), 137 | (x + s, y + s), color, -1) 138 | elif shape == "circle": 139 | image = cv2.circle(image, (x, y), s, color, -1) 140 | elif shape == "triangle": 141 | points = np.array([[(x, y - s), 142 | (x - s / math.sin(math.radians(60)), y + s), 143 | (x + s / math.sin(math.radians(60)), y + s), 144 | ]], dtype=np.int32) 145 | image = cv2.fillPoly(image, points, color) 146 | return image 147 | 148 | def random_shape(self, height, width): 149 | """Generates specifications of a random shape that lies within 150 | the given height and width boundaries. 151 | Returns a tuple of three valus: 152 | * The shape name (square, circle, ...) 153 | * Shape color: a tuple of 3 values, RGB. 154 | * Shape dimensions: A tuple of values that define the shape size 155 | and location. Differs per shape type. 156 | """ 157 | # Shape 158 | shape = random.choice(["square", "circle", "triangle"]) 159 | # Color 160 | color = tuple([random.randint(0, 255) for _ in range(3)]) 161 | # Center x, y 162 | buffer = 20 163 | y = random.randint(buffer, height - buffer - 1) 164 | x = random.randint(buffer, width - buffer - 1) 165 | # Size 166 | s = random.randint(buffer, height // 4) 167 | return shape, color, (x, y, s) 168 | 169 | def random_image(self, height, width): 170 | """Creates random specifications of an image with multiple shapes. 171 | Returns the background color of the image and a list of shape 172 | specifications that can be used to draw the image. 173 | """ 174 | # Pick random background color 175 | bg_color = np.array([random.randint(0, 255) for _ in range(3)]) 176 | # Generate a few random shapes and record their 177 | # bounding boxes 178 | shapes = [] 179 | boxes = [] 180 | N = random.randint(1, 4) 181 | for _ in range(N): 182 | shape, color, dims = self.random_shape(height, width) 183 | shapes.append((shape, color, dims)) 184 | x, y, s = dims 185 | boxes.append([y - s, x - s, y + s, x + s]) 186 | # Apply non-max suppression wit 0.3 threshold to avoid 187 | # shapes covering each other 188 | keep_ixs = utils.non_max_suppression( 189 | np.array(boxes), np.arange(N), 0.3) 190 | shapes = [s for i, s in enumerate(shapes) if i in keep_ixs] 191 | return bg_color, shapes 192 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | license-file = LICENSE 4 | requirements-file = requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | The build/compilations setup 3 | 4 | >> pip install -r requirements.txt 5 | >> python setup.py install 6 | """ 7 | import pip 8 | import logging 9 | import pkg_resources 10 | try: 11 | from setuptools import setup 12 | except ImportError: 13 | from distutils.core import setup 14 | 15 | 16 | def _parse_requirements(file_path): 17 | pip_ver = pkg_resources.get_distribution('pip').version 18 | pip_version = list(map(int, pip_ver.split('.')[:2])) 19 | if pip_version >= [6, 0]: 20 | raw = pip.req.parse_requirements(file_path, 21 | session=pip.download.PipSession()) 22 | else: 23 | raw = pip.req.parse_requirements(file_path) 24 | return [str(i.req) for i in raw] 25 | 26 | 27 | # parse_requirements() returns generator of pip.req.InstallRequirement objects 28 | try: 29 | install_reqs = _parse_requirements("requirements.txt") 30 | except Exception: 31 | logging.warning('Fail load requirements file, so using default ones.') 32 | install_reqs = [] 33 | 34 | setup( 35 | name='mask-rcnn', 36 | version='2.1', 37 | url='https://github.com/matterport/Mask_RCNN', 38 | author='Matterport', 39 | author_email='waleed.abdulla@gmail.com', 40 | license='MIT', 41 | description='Mask R-CNN for object detection and instance segmentation', 42 | packages=["mrcnn"], 43 | install_requires=install_reqs, 44 | include_package_data=True, 45 | python_requires='>=3.4', 46 | long_description="""This is an implementation of Mask R-CNN on Python 3, Keras, and TensorFlow. 47 | The model generates bounding boxes and segmentation masks for each instance of an object in the image. 48 | It's based on Feature Pyramid Network (FPN) and a ResNet101 backbone.""", 49 | classifiers=[ 50 | "Development Status :: 5 - Production/Stable", 51 | "Environment :: Console", 52 | "Intended Audience :: Developers", 53 | "Intended Audience :: Information Technology", 54 | "Intended Audience :: Education", 55 | "Intended Audience :: Science/Research", 56 | "License :: OSI Approved :: MIT License", 57 | "Natural Language :: English", 58 | "Operating System :: OS Independent", 59 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 60 | "Topic :: Scientific/Engineering :: Image Recognition", 61 | "Topic :: Scientific/Engineering :: Visualization", 62 | "Topic :: Scientific/Engineering :: Image Segmentation", 63 | 'Programming Language :: Python :: 3.4', 64 | 'Programming Language :: Python :: 3.5', 65 | 'Programming Language :: Python :: 3.6', 66 | ], 67 | keywords="image instance segmentation object detection mask rcnn r-cnn tensorflow keras", 68 | ) 69 | --------------------------------------------------------------------------------