├── .gitattributes
├── .gitignore
├── LICENSE
├── MANIFEST.in
├── Prediction-and-Submission.ipynb
├── README.md
├── Training.ipynb
├── assets
    ├── 4k_video.gif
    ├── detection_activations.png
    ├── detection_anchors.png
    ├── detection_final.png
    ├── detection_histograms.png
    ├── detection_masks.png
    ├── detection_refinement.png
    ├── detection_tensorboard.png
    ├── donuts.png
    ├── sheep.png
    └── street.png
├── coco.py
├── data
    └── _
├── images
    ├── 1045023827_4ec3e8ba5c_z.jpg
    ├── 12283150_12d37e6389_z.jpg
    ├── 2383514521_1fc8d7b0de_z.jpg
    ├── 2502287818_41e4b0c4fb_z.jpg
    ├── 2516944023_d00345997d_z.jpg
    ├── 25691390_f9944f61b5_z.jpg
    ├── 262985539_1709e54576_z.jpg
    ├── 3132016470_c27baa00e8_z.jpg
    ├── 3627527276_6fe8cd9bfe_z.jpg
    ├── 3651581213_f81963d1dd_z.jpg
    ├── 3800883468_12af3c0b50_z.jpg
    ├── 3862500489_6fd195d183_z.jpg
    ├── 3878153025_8fde829928_z.jpg
    ├── 4410436637_7b0ca36ee7_z.jpg
    ├── 4782628554_668bc31826_z.jpg
    ├── 5951960966_d4e1cda5d0_z.jpg
    ├── 6584515005_fce9cec486_z.jpg
    ├── 6821351586_59aa0dc110_z.jpg
    ├── 7581246086_cf7bbb7255_z.jpg
    ├── 7933423348_c30bd9bd4e_z.jpg
    ├── 8053677163_d4c8f416be_z.jpg
    ├── 8239308689_efa6c11b08_z.jpg
    ├── 8433365521_9252889f9a_z.jpg
    ├── 8512296263_5fc5458e20_z.jpg
    ├── 8699757338_c3941051b6_z.jpg
    ├── 8734543718_37f6b8bd45_z.jpg
    ├── 8829708882_48f263491e_z.jpg
    ├── 9118579087_f9ffa19e63_z.jpg
    ├── 9247489789_132c0d534a_z.jpg
    ├── loss-plot.png
    └── predictions.png
├── mrcnn
    ├── __init__.py
    ├── cocoeval.py
    ├── config.py
    ├── dataset.py
    ├── evaluate.py
    ├── model.py
    ├── parallel_model.py
    ├── utils.py
    └── visualize.py
├── requirements.txt
├── samples
    ├── balloon
    │   ├── README.md
    │   ├── balloon.py
    │   ├── inspect_balloon_data.ipynb
    │   └── inspect_balloon_model.ipynb
    ├── coco
    │   ├── coco.py
    │   ├── inspect_data.ipynb
    │   ├── inspect_model.ipynb
    │   └── inspect_weights.ipynb
    ├── demo.ipynb
    └── shapes
    │   ├── shapes.py
    │   └── train_shapes.ipynb
├── setup.cfg
└── setup.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.h5 filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Data files and directories common in repo root
 2 | datasets/
 3 | logs/
 4 | *.h5
 5 | results/
 6 | temp/
 7 | test/
 8 | 
 9 | *.ipynb
10 | data/*
11 | 
12 | # Byte-compiled / optimized / DLL files
13 | __pycache__/
14 | *.py[cod]
15 | *$py.class
16 | 
17 | # Distribution / packaging
18 | .Python
19 | env/
20 | build/
21 | develop-eggs/
22 | dist/
23 | downloads/
24 | eggs/
25 | .eggs/
26 | lib/
27 | lib64/
28 | parts/
29 | sdist/
30 | var/
31 | wheels/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 | 
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 | 
40 | # VS Studio Code
41 | .vscode
42 | 
43 | # PyCharm
44 | .idea/
45 | 
46 | # Dropbox
47 | .dropbox.attr
48 | 
49 | # Jupyter Notebook
50 | .ipynb_checkpoints
51 | 
52 | # pyenv
53 | .python-version
54 | 
55 | # dotenv
56 | .env
57 | 
58 | # virtualenv
59 | .venv
60 | venv/
61 | ENV/
62 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Mask R-CNN
 2 | 
 3 | The MIT License (MIT)
 4 | 
 5 | Copyright (c) 2017 Matterport, Inc.
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
3 | include requirements.txt


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![CrowdAI-Logo](https://github.com/crowdAI/crowdai/raw/master/app/assets/images/misc/crowdai-logo-smile.svg?sanitize=true)
 2 | 
 3 | >The research paper summarizing the corresponding benchmark and associated solutions can be found here : [Deep Learning for Understanding Satellite Imagery: An Experimental Survey](https://www.frontiersin.org/articles/10.3389/frai.2020.534696/full)
 4 | 
 5 | # crowdAI Mapping Challenge : Baseline
 6 | 
 7 | This repository contains the details of implementation of the Baseline submission using [Mask RCNN](https://arxiv.org/abs/1703.06870) which obtains a score of `[AP(IoU=0.5)=0.697 ; AR(IoU=0.5)=0.479]` for the [crowdAI Mapping Challenge](https://www.crowdai.org/challenges/mapping-challenge).
 8 | 
 9 | # Installation
10 | ```
11 | git clone https://github.com/crowdai/crowdai-mapping-challenge-mask-rcnn
12 | cd crowdai-mapping-challenge-mask-rcnn
13 | # Please ensure that you use python3.6
14 | pip install -r requirements.txt
15 | python setup.py install
16 | ```
17 | 
18 | # Notebooks
19 | Please follow the instructions on the relevant notebooks for the training, prediction and submissions.
20 | 
21 | * [Training](Training.ipynb)
22 | * [Prediction and Submission](Prediction-and-Submission.ipynb)
23 |   (_pre-trained weights for baseline submission included_)
24 | 
25 | # Results
26 | ![sample_predictions](images/predictions.png)
27 | 
28 | # Citation
29 | ```
30 | @article{mohanty2020deep, 
31 |     title={Deep Learning for Understanding Satellite Imagery: An Experimental Survey}, 
32 |     author={Mohanty, Sharada Prasanna and Czakon, Jakub and Kaczmarek, Kamil A and Pyskir, Andrzej and Tarasiewicz, Piotr and Kunwar, Saket and Rohrbach, Janick and Luo, Dave and Prasad, Manjunath and Fleer, Sascha and others}, 
33 |     journal={Frontiers in Artificial Intelligence}, 
34 |     volume={3}, 
35 |     year={2020}, 
36 |     publisher={Frontiers Media SA}
37 | }
38 | 
39 | @misc{crowdAIMappingChallengeBaseline2018,
40 |     author = {Mohanty, Sharada Prasanna},
41 |     title = {CrowdAI Mapping Challenge 2018 : Baseline with Mask RCNN},
42 |     year = {2018},
43 |     publisher = {GitHub},
44 |     journal = {GitHub repository},
45 |     howpublished = {\url{https://github.com/crowdai/crowdai-mapping-challenge-mask-rcnn}},
46 |     commit = {bac1cf19adbc9d078122c6933da6f808c4ee590d}
47 | }
48 | ```
49 | # Acknowledgements
50 | This repository heavily reuses code from the amazing [tensorflow Mask RCNN implementation](https://github.com/matterport/Mask_RCNN) by [@waleedka](https://github.com/waleedka/).
51 | Many thanks to all the contributors of that project.
52 | You are encouraged to checkout [https://github.com/matterport/Mask_RCNN](https://github.com/matterport/Mask_RCNN) for documentation on many other aspects of this code.
53 | 
54 | # Author
55 | Sharada Mohanty [sharada.mohanty@epfl.ch](sharada.mohanty@epfl.ch)
56 | 


--------------------------------------------------------------------------------
/Training.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# mapping-challenge-mask_rcnn-training\n",
  8 |     "![CrowdAI-Logo](https://github.com/crowdAI/crowdai/raw/master/app/assets/images/misc/crowdai-logo-smile.svg?sanitize=true)\n",
  9 |     "\n",
 10 |     "This notebook contains the baseline code for the training a vanilla [Mask RCNN](https://arxiv.org/abs/1703.06870) model for the [crowdAI Mapping Challenge](https://www.crowdai.org/challenges/mapping-challenge).\n",
 11 |     "\n",
 12 |     "This code is adapted from the [Mask RCNN]() tensorflow implementation available here : [https://github.com/matterport/Mask_RCNN](https://github.com/matterport/Mask_RCNN).\n",
 13 |     "\n",
 14 |     "First we begin by importing all the necessary dependencies : "
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 12,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "import os\n",
 26 |     "import sys\n",
 27 |     "import time\n",
 28 |     "import numpy as np\n",
 29 |     "\n",
 30 |     "# Download and install the Python COCO tools from https://github.com/waleedka/coco\n",
 31 |     "# That's a fork from the original https://github.com/pdollar/coco with a bug\n",
 32 |     "# fix for Python 3.\n",
 33 |     "# I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50\n",
 34 |     "# If the PR is merged then use the original repo.\n",
 35 |     "# Note: Edit PythonAPI/Makefile and replace \"python\" with \"python3\".\n",
 36 |     "#  \n",
 37 |     "# A quick one liner to install the library \n",
 38 |     "# !pip install git+https://github.com/waleedka/coco.git#subdirectory=PythonAPI\n",
 39 |     "\n",
 40 |     "from pycocotools.coco import COCO\n",
 41 |     "from pycocotools.cocoeval import COCOeval\n",
 42 |     "from pycocotools import mask as maskUtils\n",
 43 |     "\n",
 44 |     "from mrcnn.evaluate import build_coco_results, evaluate_coco\n",
 45 |     "from mrcnn.dataset import MappingChallengeDataset\n",
 46 |     "\n",
 47 |     "import zipfile\n",
 48 |     "import urllib.request\n",
 49 |     "import shutil\n"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "## Dataset location \n",
 57 |     "Now we have to download all the files in the datasets section and untar them to have the following structure :\n",
 58 |     "```\n",
 59 |     "├── data\n",
 60 |     "|   ├── pretrained_weights.h5 (already included in this repository)\n",
 61 |     "│   ├── test\n",
 62 |     "│   │   └── images/\n",
 63 |     "│   │   └── annotation.json\n",
 64 |     "│   ├── train\n",
 65 |     "│   │   └── images/\n",
 66 |     "│   │   └── annotation.json\n",
 67 |     "│   └── val\n",
 68 |     "│       └── images/\n",
 69 |     "│       └── annotation.json\n",
 70 |     "```\n",
 71 |     "Note that the `pretrained_weights.h5` (available at [https://www.crowdai.org/challenges/mapping-challenge/dataset_files](https://www.crowdai.org/challenges/mapping-challenge/dataset_files)) are the weights used for the baseline submission, and are obtained by running the learning schedule mentioned later in the experiment. In the said experiment, the initial weights used can be found [here](https://github.com/matterport/Mask_RCNN/releases/download/v2.1/mask_rcnn_balloon.h5). "
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 19,
 77 |    "metadata": {
 78 |     "collapsed": true
 79 |    },
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "ROOT_DIR = os.getcwd()\n",
 83 |     "\n",
 84 |     "# Import Mask RCNN\n",
 85 |     "sys.path.append(ROOT_DIR)  # To find local version of the library\n",
 86 |     "from mrcnn.config import Config\n",
 87 |     "from mrcnn import model as modellib, utils\n",
 88 |     "\n",
 89 |     "\n",
 90 |     "PRETRAINED_MODEL_PATH = os.path.join(ROOT_DIR,\"data/\" \"pretrained_weights.h5\")\n",
 91 |     "LOGS_DIRECTORY = os.path.join(ROOT_DIR, \"logs\")"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "## Experiment Configuration"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 22,
104 |    "metadata": {},
105 |    "outputs": [
106 |     {
107 |      "name": "stdout",
108 |      "output_type": "stream",
109 |      "text": [
110 |       "\n",
111 |       "Configurations:\n",
112 |       "BACKBONE                       resnet101\n",
113 |       "BACKBONE_STRIDES               [4, 8, 16, 32, 64]\n",
114 |       "BATCH_SIZE                     5\n",
115 |       "BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]\n",
116 |       "DETECTION_MAX_INSTANCES        100\n",
117 |       "DETECTION_MIN_CONFIDENCE       0.7\n",
118 |       "DETECTION_NMS_THRESHOLD        0.3\n",
119 |       "GPU_COUNT                      1\n",
120 |       "GRADIENT_CLIP_NORM             5.0\n",
121 |       "IMAGES_PER_GPU                 5\n",
122 |       "IMAGE_MAX_DIM                  320\n",
123 |       "IMAGE_META_SIZE                14\n",
124 |       "IMAGE_MIN_DIM                  320\n",
125 |       "IMAGE_RESIZE_MODE              square\n",
126 |       "IMAGE_SHAPE                    [320 320   3]\n",
127 |       "LEARNING_MOMENTUM              0.9\n",
128 |       "LEARNING_RATE                  0.001\n",
129 |       "MASK_POOL_SIZE                 14\n",
130 |       "MASK_SHAPE                     [28, 28]\n",
131 |       "MAX_GT_INSTANCES               100\n",
132 |       "MEAN_PIXEL                     [123.7 116.8 103.9]\n",
133 |       "MINI_MASK_SHAPE                (56, 56)\n",
134 |       "NAME                           crowdai-mapping-challenge\n",
135 |       "NUM_CLASSES                    2\n",
136 |       "POOL_SIZE                      7\n",
137 |       "POST_NMS_ROIS_INFERENCE        1000\n",
138 |       "POST_NMS_ROIS_TRAINING         2000\n",
139 |       "ROI_POSITIVE_RATIO             0.33\n",
140 |       "RPN_ANCHOR_RATIOS              [0.5, 1, 2]\n",
141 |       "RPN_ANCHOR_SCALES              (32, 64, 128, 256, 512)\n",
142 |       "RPN_ANCHOR_STRIDE              1\n",
143 |       "RPN_BBOX_STD_DEV               [0.1 0.1 0.2 0.2]\n",
144 |       "RPN_NMS_THRESHOLD              0.7\n",
145 |       "RPN_TRAIN_ANCHORS_PER_IMAGE    256\n",
146 |       "STEPS_PER_EPOCH                1000\n",
147 |       "TRAIN_BN                       False\n",
148 |       "TRAIN_ROIS_PER_IMAGE           200\n",
149 |       "USE_MINI_MASK                  True\n",
150 |       "USE_RPN_ROIS                   True\n",
151 |       "VALIDATION_STEPS               50\n",
152 |       "WEIGHT_DECAY                   0.0001\n",
153 |       "\n",
154 |       "\n"
155 |      ]
156 |     }
157 |    ],
158 |    "source": [
159 |     "class MappingChallengeConfig(Config):\n",
160 |     "    \"\"\"Configuration for training on data in MS COCO format.\n",
161 |     "    Derives from the base Config class and overrides values specific\n",
162 |     "    to the COCO dataset.\n",
163 |     "    \"\"\"\n",
164 |     "    # Give the configuration a recognizable name\n",
165 |     "    NAME = \"crowdai-mapping-challenge\"\n",
166 |     "\n",
167 |     "    # We use a GPU with 12GB memory, which can fit two images.\n",
168 |     "    # Adjust down if you use a smaller GPU.\n",
169 |     "    IMAGES_PER_GPU = 5\n",
170 |     "\n",
171 |     "    # Uncomment to train on 8 GPUs (default is 1)\n",
172 |     "    GPU_COUNT = 1\n",
173 |     "\n",
174 |     "    # Number of classes (including background)\n",
175 |     "    NUM_CLASSES = 1 + 1  # 1 Backgroun + 1 Building\n",
176 |     "\n",
177 |     "    STEPS_PER_EPOCH=1000\n",
178 |     "    VALIDATION_STEPS=50\n",
179 |     "\n",
180 |     "\n",
181 |     "    IMAGE_MAX_DIM=320\n",
182 |     "    IMAGE_MIN_DIM=320\n",
183 |     "\n",
184 |     "config = MappingChallengeConfig()\n",
185 |     "config.display()"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "metadata": {},
191 |    "source": [
192 |     "## Instantiate Model"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 25,
198 |    "metadata": {
199 |     "collapsed": true
200 |    },
201 |    "outputs": [],
202 |    "source": [
203 |     "model = modellib.MaskRCNN(mode=\"training\", config=config, model_dir=LOGS_DIRECTORY)\n",
204 |     "# Load pretrained weights\n",
205 |     "model_path = PRETRAINED_MODEL_PATH\n",
206 |     "model.load_weights(model_path, by_name=True)"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "markdown",
211 |    "metadata": {},
212 |    "source": [
213 |     "## Load Training and Validation Dataset"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 32,
219 |    "metadata": {},
220 |    "outputs": [
221 |     {
222 |      "name": "stdout",
223 |      "output_type": "stream",
224 |      "text": [
225 |       "Annotation Path  data/train/annotation-small.json\n",
226 |       "Image Dir  data/train/images\n",
227 |       "loading annotations into memory...\n",
228 |       "Done (t=1.12s)\n",
229 |       "creating index...\n",
230 |       "index created!\n",
231 |       "Annotation Path  data/val/annotation-small.json\n",
232 |       "Image Dir  data/val/images\n",
233 |       "loading annotations into memory...\n",
234 |       "Done (t=0.20s)\n",
235 |       "creating index...\n",
236 |       "index created!\n"
237 |      ]
238 |     }
239 |    ],
240 |    "source": [
241 |     "# Load training dataset\n",
242 |     "dataset_train = MappingChallengeDataset()\n",
243 |     "dataset_train.load_dataset(dataset_dir=os.path.join(\"data\", \"train\"), load_small=True)\n",
244 |     "dataset_train.prepare()\n",
245 |     "\n",
246 |     "# Load validation dataset\n",
247 |     "dataset_val = MappingChallengeDataset()\n",
248 |     "val_coco = dataset_val.load_dataset(dataset_dir=os.path.join(\"data\", \"val\"), load_small=True, return_coco=True)\n",
249 |     "dataset_val.prepare()"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "markdown",
254 |    "metadata": {},
255 |    "source": [
256 |     "## Train"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": null,
262 |    "metadata": {},
263 |    "outputs": [],
264 |    "source": [
265 |     "# *** This training schedule is an example. Update to your needs ***\n",
266 |     "\n",
267 |     "# Training - Stage 1\n",
268 |     "print(\"Training network heads\")\n",
269 |     "model.train(dataset_train, dataset_val,\n",
270 |     "            learning_rate=config.LEARNING_RATE,\n",
271 |     "            epochs=40,\n",
272 |     "            layers='heads')\n",
273 |     "\n",
274 |     "# Training - Stage 2\n",
275 |     "# Finetune layers from ResNet stage 4 and up\n",
276 |     "print(\"Fine tune Resnet stage 4 and up\")\n",
277 |     "model.train(dataset_train, dataset_val,\n",
278 |     "            learning_rate=config.LEARNING_RATE,\n",
279 |     "            epochs=120,\n",
280 |     "            layers='4+')\n",
281 |     "\n",
282 |     "# Training - Stage 3\n",
283 |     "# Fine tune all layers\n",
284 |     "print(\"Fine tune all layers\")\n",
285 |     "model.train(dataset_train, dataset_val,\n",
286 |     "            learning_rate=config.LEARNING_RATE / 10,\n",
287 |     "            epochs=160,\n",
288 |     "            layers='all')"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "markdown",
293 |    "metadata": {},
294 |    "source": [
295 |     "Now you can monitor the training by running : \n",
296 |     "```\n",
297 |     "tensorboard --logdir=logs/[path-to-your-experiment-logdir]\n",
298 |     "```\n",
299 |     "and if everything works great, you should see something like : \n",
300 |     "![loss-plot](images/loss-plot.png)\n",
301 |     "\n",
302 |     "# Author\n",
303 |     "Sharada Mohanty [sharada.mohanty@epfl.ch](sharada.mohanty@epfl.ch)"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": null,
309 |    "metadata": {
310 |     "collapsed": true
311 |    },
312 |    "outputs": [],
313 |    "source": []
314 |   }
315 |  ],
316 |  "metadata": {
317 |   "kernelspec": {
318 |    "display_name": "Python 2",
319 |    "language": "python",
320 |    "name": "python2"
321 |   },
322 |   "language_info": {
323 |    "codemirror_mode": {
324 |     "name": "ipython",
325 |     "version": 2
326 |    },
327 |    "file_extension": ".py",
328 |    "mimetype": "text/x-python",
329 |    "name": "python",
330 |    "nbconvert_exporter": "python",
331 |    "pygments_lexer": "ipython2",
332 |    "version": "2.7.14"
333 |   }
334 |  },
335 |  "nbformat": 4,
336 |  "nbformat_minor": 2
337 | }
338 | 


--------------------------------------------------------------------------------
/assets/4k_video.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/4k_video.gif


--------------------------------------------------------------------------------
/assets/detection_activations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/detection_activations.png


--------------------------------------------------------------------------------
/assets/detection_anchors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/detection_anchors.png


--------------------------------------------------------------------------------
/assets/detection_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/detection_final.png


--------------------------------------------------------------------------------
/assets/detection_histograms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/detection_histograms.png


--------------------------------------------------------------------------------
/assets/detection_masks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/detection_masks.png


--------------------------------------------------------------------------------
/assets/detection_refinement.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/detection_refinement.png


--------------------------------------------------------------------------------
/assets/detection_tensorboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/detection_tensorboard.png


--------------------------------------------------------------------------------
/assets/donuts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/donuts.png


--------------------------------------------------------------------------------
/assets/sheep.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/sheep.png


--------------------------------------------------------------------------------
/assets/street.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/assets/street.png


--------------------------------------------------------------------------------
/coco.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Configurations and data loading code for MS COCO.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | 
  9 | ------------------------------------------------------------
 10 | 
 11 | Usage: import the module (see Jupyter notebooks for examples), or run from
 12 |        the command line as such:
 13 | 
 14 |     # Train a new model starting from pre-trained COCO weights
 15 |     python3 coco.py train --dataset=/path/to/coco/ --model=coco
 16 | 
 17 |     # Train a new model starting from ImageNet weights
 18 |     python3 coco.py train --dataset=/path/to/coco/ --model=imagenet
 19 | 
 20 |     # Continue training a model that you had trained earlier
 21 |     python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
 22 | 
 23 |     # Continue training the last model you trained
 24 |     python3 coco.py train --dataset=/path/to/coco/ --model=last
 25 | 
 26 |     # Run COCO evaluatoin on the last model you trained
 27 |     python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
 28 | """
 29 | 
 30 | import os
 31 | import time
 32 | import numpy as np
 33 | 
 34 | # Download and install the Python COCO tools from https://github.com/waleedka/coco
 35 | #
 36 | # pip install git+https://github.com/waleedka/coco.git#subdirectory=PythonAPI
 37 | #
 38 | # That's a fork from the original https://github.com/pdollar/coco with a bug
 39 | # fix for Python 3.
 40 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
 41 | # If the PR is merged then use the original repo.
 42 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3".
 43 | from pycocotools.coco import COCO
 44 | from pycocotools.cocoeval import COCOeval
 45 | from pycocotools import mask as maskUtils
 46 | 
 47 | import zipfile
 48 | import urllib.request
 49 | import shutil
 50 | 
 51 | from mrcnn.config import Config
 52 | import mrcnn.utils as utils
 53 | import mrcnn.model as modellib
 54 | 
 55 | # Root directory of the project
 56 | ROOT_DIR = os.getcwd()
 57 | 
 58 | # Path to trained weights file
 59 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
 60 | 
 61 | # Directory to save logs and model checkpoints, if not provided
 62 | # through the command line argument --logs
 63 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
 64 | DEFAULT_DATASET_YEAR = "2014"
 65 | 
 66 | ############################################################
 67 | #  Configurations
 68 | ############################################################
 69 | 
 70 | 
 71 | class CocoConfig(Config):
 72 |     """Configuration for training on MS COCO.
 73 |     Derives from the base Config class and overrides values specific
 74 |     to the COCO dataset.
 75 |     """
 76 |     # Give the configuration a recognizable name
 77 |     NAME = "coco"
 78 | 
 79 |     # We use a GPU with 12GB memory, which can fit two images.
 80 |     # Adjust down if you use a smaller GPU.
 81 |     IMAGES_PER_GPU = 2
 82 | 
 83 |     # Uncomment to train on 8 GPUs (default is 1)
 84 |     # GPU_COUNT = 8
 85 | 
 86 |     # Number of classes (including background)
 87 |     NUM_CLASSES = 1 + 80  # COCO has 80 classes
 88 | 
 89 | 
 90 | ############################################################
 91 | #  Dataset
 92 | ############################################################
 93 | 
 94 | class CocoDataset(utils.Dataset):
 95 |     def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
 96 |                   class_map=None, return_coco=False, auto_download=False):
 97 |         """Load a subset of the COCO dataset.
 98 |         dataset_dir: The root directory of the COCO dataset.
 99 |         subset: What to load (train, val, minival, valminusminival)
100 |         year: What dataset year to load (2014, 2017) as a string, not an integer
101 |         class_ids: If provided, only loads images that have the given classes.
102 |         class_map: TODO: Not implemented yet. Supports maping classes from
103 |             different datasets to the same class ID.
104 |         return_coco: If True, returns the COCO object.
105 |         auto_download: Automatically download and unzip MS-COCO images and annotations
106 |         """
107 | 
108 |         if auto_download is True:
109 |             self.auto_download(dataset_dir, subset, year)
110 | 
111 |         coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
112 |         if subset == "minival" or subset == "valminusminival":
113 |             subset = "val"
114 |         image_dir = "{}/{}{}".format(dataset_dir, subset, year)
115 | 
116 |         # Load all classes or a subset?
117 |         if not class_ids:
118 |             # All classes
119 |             class_ids = sorted(coco.getCatIds())
120 | 
121 |         # All images or a subset?
122 |         if class_ids:
123 |             image_ids = []
124 |             for id in class_ids:
125 |                 image_ids.extend(list(coco.getImgIds(catIds=[id])))
126 |             # Remove duplicates
127 |             image_ids = list(set(image_ids))
128 |         else:
129 |             # All images
130 |             image_ids = list(coco.imgs.keys())
131 | 
132 |         # Add classes
133 |         for i in class_ids:
134 |             self.add_class("coco", i, coco.loadCats(i)[0]["name"])
135 | 
136 |         # Add images
137 |         for i in image_ids:
138 |             self.add_image(
139 |                 "coco", image_id=i,
140 |                 path=os.path.join(image_dir, coco.imgs[i]['file_name']),
141 |                 width=coco.imgs[i]["width"],
142 |                 height=coco.imgs[i]["height"],
143 |                 annotations=coco.loadAnns(coco.getAnnIds(
144 |                     imgIds=[i], catIds=class_ids, iscrowd=None)))
145 |         if return_coco:
146 |             return coco
147 | 
148 |     def auto_download(self, dataDir, dataType, dataYear):
149 |         """Download the COCO dataset/annotations if requested.
150 |         dataDir: The root directory of the COCO dataset.
151 |         dataType: What to load (train, val, minival, valminusminival)
152 |         dataYear: What dataset year to load (2014, 2017) as a string, not an integer
153 |         Note:
154 |             For 2014, use "train", "val", "minival", or "valminusminival"
155 |             For 2017, only "train" and "val" annotations are available
156 |         """
157 | 
158 |         # Setup paths and file names
159 |         if dataType == "minival" or dataType == "valminusminival":
160 |             imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
161 |             imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
162 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
163 |         else:
164 |             imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
165 |             imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
166 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
167 |         # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
168 | 
169 |         # Create main folder if it doesn't exist yet
170 |         if not os.path.exists(dataDir):
171 |             os.makedirs(dataDir)
172 | 
173 |         # Download images if not available locally
174 |         if not os.path.exists(imgDir):
175 |             os.makedirs(imgDir)
176 |             print("Downloading images to " + imgZipFile + " ...")
177 |             with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
178 |                 shutil.copyfileobj(resp, out)
179 |             print("... done downloading.")
180 |             print("Unzipping " + imgZipFile)
181 |             with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
182 |                 zip_ref.extractall(dataDir)
183 |             print("... done unzipping")
184 |         print("Will use images in " + imgDir)
185 | 
186 |         # Setup annotations data paths
187 |         annDir = "{}/annotations".format(dataDir)
188 |         if dataType == "minival":
189 |             annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
190 |             annFile = "{}/instances_minival2014.json".format(annDir)
191 |             annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
192 |             unZipDir = annDir
193 |         elif dataType == "valminusminival":
194 |             annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
195 |             annFile = "{}/instances_valminusminival2014.json".format(annDir)
196 |             annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
197 |             unZipDir = annDir
198 |         else:
199 |             annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
200 |             annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
201 |             annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
202 |             unZipDir = dataDir
203 |         # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
204 | 
205 |         # Download annotations if not available locally
206 |         if not os.path.exists(annDir):
207 |             os.makedirs(annDir)
208 |         if not os.path.exists(annFile):
209 |             if not os.path.exists(annZipFile):
210 |                 print("Downloading zipped annotations to " + annZipFile + " ...")
211 |                 with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
212 |                     shutil.copyfileobj(resp, out)
213 |                 print("... done downloading.")
214 |             print("Unzipping " + annZipFile)
215 |             with zipfile.ZipFile(annZipFile, "r") as zip_ref:
216 |                 zip_ref.extractall(unZipDir)
217 |             print("... done unzipping")
218 |         print("Will use annotations in " + annFile)
219 | 
220 |     def load_mask(self, image_id):
221 |         """Load instance masks for the given image.
222 | 
223 |         Different datasets use different ways to store masks. This
224 |         function converts the different mask format to one format
225 |         in the form of a bitmap [height, width, instances].
226 | 
227 |         Returns:
228 |         masks: A bool array of shape [height, width, instance count] with
229 |             one mask per instance.
230 |         class_ids: a 1D array of class IDs of the instance masks.
231 |         """
232 |         # If not a COCO image, delegate to parent class.
233 |         image_info = self.image_info[image_id]
234 |         if image_info["source"] != "coco":
235 |             return super(CocoDataset, self).load_mask(image_id)
236 | 
237 |         instance_masks = []
238 |         class_ids = []
239 |         annotations = self.image_info[image_id]["annotations"]
240 |         # Build mask of shape [height, width, instance_count] and list
241 |         # of class IDs that correspond to each channel of the mask.
242 |         for annotation in annotations:
243 |             class_id = self.map_source_class_id(
244 |                 "coco.{}".format(annotation['category_id']))
245 |             if class_id:
246 |                 m = self.annToMask(annotation, image_info["height"],
247 |                                    image_info["width"])
248 |                 # Some objects are so small that they're less than 1 pixel area
249 |                 # and end up rounded out. Skip those objects.
250 |                 if m.max() < 1:
251 |                     continue
252 |                 # Is it a crowd? If so, use a negative class ID.
253 |                 if annotation['iscrowd']:
254 |                     # Use negative class ID for crowds
255 |                     class_id *= -1
256 |                     # For crowd masks, annToMask() sometimes returns a mask
257 |                     # smaller than the given dimensions. If so, resize it.
258 |                     if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
259 |                         m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
260 |                 instance_masks.append(m)
261 |                 class_ids.append(class_id)
262 | 
263 |         # Pack instance masks into an array
264 |         if class_ids:
265 |             mask = np.stack(instance_masks, axis=2)
266 |             class_ids = np.array(class_ids, dtype=np.int32)
267 |             return mask, class_ids
268 |         else:
269 |             # Call super class to return an empty mask
270 |             return super(CocoDataset, self).load_mask(image_id)
271 | 
272 |     def image_reference(self, image_id):
273 |         """Return a link to the image in the COCO Website."""
274 |         info = self.image_info[image_id]
275 |         if info["source"] == "coco":
276 |             return "http://cocodataset.org/#explore?id={}".format(info["id"])
277 |         else:
278 |             super(CocoDataset, self).image_reference(image_id)
279 | 
280 |     # The following two functions are from pycocotools with a few changes.
281 | 
282 |     def annToRLE(self, ann, height, width):
283 |         """
284 |         Convert annotation which can be polygons, uncompressed RLE to RLE.
285 |         :return: binary mask (numpy 2D array)
286 |         """
287 |         segm = ann['segmentation']
288 |         if isinstance(segm, list):
289 |             # polygon -- a single object might consist of multiple parts
290 |             # we merge all parts into one mask rle code
291 |             rles = maskUtils.frPyObjects(segm, height, width)
292 |             rle = maskUtils.merge(rles)
293 |         elif isinstance(segm['counts'], list):
294 |             # uncompressed RLE
295 |             rle = maskUtils.frPyObjects(segm, height, width)
296 |         else:
297 |             # rle
298 |             rle = ann['segmentation']
299 |         return rle
300 | 
301 |     def annToMask(self, ann, height, width):
302 |         """
303 |         Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
304 |         :return: binary mask (numpy 2D array)
305 |         """
306 |         rle = self.annToRLE(ann, height, width)
307 |         m = maskUtils.decode(rle)
308 |         return m
309 | 
310 | 
311 | ############################################################
312 | #  COCO Evaluation
313 | ############################################################
314 | 
315 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
316 |     """Arrange resutls to match COCO specs in http://cocodataset.org/#format
317 |     """
318 |     # If no results, return an empty list
319 |     if rois is None:
320 |         return []
321 | 
322 |     results = []
323 |     for image_id in image_ids:
324 |         # Loop through detections
325 |         for i in range(rois.shape[0]):
326 |             class_id = class_ids[i]
327 |             score = scores[i]
328 |             bbox = np.around(rois[i], 1)
329 |             mask = masks[:, :, i]
330 | 
331 |             result = {
332 |                 "image_id": image_id,
333 |                 "category_id": dataset.get_source_class_id(class_id, "coco"),
334 |                 "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
335 |                 "score": score,
336 |                 "segmentation": maskUtils.encode(np.asfortranarray(mask))
337 |             }
338 |             results.append(result)
339 |     return results
340 | 
341 | 
342 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
343 |     """Runs official COCO evaluation.
344 |     dataset: A Dataset object with valiadtion data
345 |     eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
346 |     limit: if not 0, it's the number of images to use for evaluation
347 |     """
348 |     # Pick COCO images from the dataset
349 |     image_ids = image_ids or dataset.image_ids
350 | 
351 |     # Limit to a subset
352 |     if limit:
353 |         image_ids = image_ids[:limit]
354 | 
355 |     # Get corresponding COCO image IDs.
356 |     coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
357 | 
358 |     t_prediction = 0
359 |     t_start = time.time()
360 | 
361 |     results = []
362 |     for i, image_id in enumerate(image_ids):
363 |         # Load image
364 |         image = dataset.load_image(image_id)
365 | 
366 |         # Run detection
367 |         t = time.time()
368 |         r = model.detect([image], verbose=0)[0]
369 |         t_prediction += (time.time() - t)
370 | 
371 |         # Convert results to COCO format
372 |         image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
373 |                                            r["rois"], r["class_ids"],
374 |                                            r["scores"], r["masks"])
375 |         results.extend(image_results)
376 | 
377 |     # Load results. This modifies results with additional attributes.
378 |     coco_results = coco.loadRes(results)
379 | 
380 |     # Evaluate
381 |     cocoEval = COCOeval(coco, coco_results, eval_type)
382 |     cocoEval.params.imgIds = coco_image_ids
383 |     cocoEval.evaluate()
384 |     cocoEval.accumulate()
385 |     cocoEval.summarize()
386 | 
387 |     print("Prediction time: {}. Average {}/image".format(
388 |         t_prediction, t_prediction / len(image_ids)))
389 |     print("Total time: ", time.time() - t_start)
390 | 
391 | 
392 | ############################################################
393 | #  Training
394 | ############################################################
395 | 
396 | 
397 | if __name__ == '__main__':
398 |     import argparse
399 | 
400 |     # Parse command line arguments
401 |     parser = argparse.ArgumentParser(
402 |         description='Train Mask R-CNN on MS COCO.')
403 |     parser.add_argument("command",
404 |                         metavar="<command>",
405 |                         help="'train' or 'evaluate' on MS COCO")
406 |     parser.add_argument('--dataset', required=True,
407 |                         metavar="/path/to/coco/",
408 |                         help='Directory of the MS-COCO dataset')
409 |     parser.add_argument('--year', required=False,
410 |                         default=DEFAULT_DATASET_YEAR,
411 |                         metavar="<year>",
412 |                         help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
413 |     parser.add_argument('--model', required=True,
414 |                         metavar="/path/to/weights.h5",
415 |                         help="Path to weights .h5 file or 'coco'")
416 |     parser.add_argument('--logs', required=False,
417 |                         default=DEFAULT_LOGS_DIR,
418 |                         metavar="/path/to/logs/",
419 |                         help='Logs and checkpoints directory (default=logs/)')
420 |     parser.add_argument('--limit', required=False,
421 |                         default=500,
422 |                         metavar="<image count>",
423 |                         help='Images to use for evaluation (default=500)')
424 |     parser.add_argument('--download', required=False,
425 |                         default=False,
426 |                         metavar="<True|False>",
427 |                         help='Automatically download and unzip MS-COCO files (default=False)',
428 |                         type=bool)
429 |     args = parser.parse_args()
430 |     print("Command: ", args.command)
431 |     print("Model: ", args.model)
432 |     print("Dataset: ", args.dataset)
433 |     print("Year: ", args.year)
434 |     print("Logs: ", args.logs)
435 |     print("Auto Download: ", args.download)
436 | 
437 |     # Configurations
438 |     if args.command == "train":
439 |         config = CocoConfig()
440 |     else:
441 |         class InferenceConfig(CocoConfig):
442 |             # Set batch size to 1 since we'll be running inference on
443 |             # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
444 |             GPU_COUNT = 1
445 |             IMAGES_PER_GPU = 1
446 |             DETECTION_MIN_CONFIDENCE = 0
447 |         config = InferenceConfig()
448 |     config.display()
449 | 
450 |     # Create model
451 |     if args.command == "train":
452 |         model = modellib.MaskRCNN(mode="training", config=config,
453 |                                   model_dir=args.logs)
454 |     else:
455 |         model = modellib.MaskRCNN(mode="inference", config=config,
456 |                                   model_dir=args.logs)
457 | 
458 |     # Select weights file to load
459 |     if args.model.lower() == "coco":
460 |         model_path = COCO_MODEL_PATH
461 |     elif args.model.lower() == "last":
462 |         # Find last trained weights
463 |         model_path = model.find_last()[1]
464 |     elif args.model.lower() == "imagenet":
465 |         # Start from ImageNet trained weights
466 |         model_path = model.get_imagenet_weights()
467 |     else:
468 |         model_path = args.model
469 | 
470 |     # Load weights
471 |     print("Loading weights ", model_path)
472 |     model.load_weights(model_path, by_name=True)
473 | 
474 |     # Train or evaluate
475 |     if args.command == "train":
476 |         # Training dataset. Use the training set and 35K from the
477 |         # validation set, as as in the Mask RCNN paper.
478 |         dataset_train = CocoDataset()
479 |         dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
480 |         dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
481 |         dataset_train.prepare()
482 | 
483 |         # Validation dataset
484 |         dataset_val = CocoDataset()
485 |         dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download)
486 |         dataset_val.prepare()
487 | 
488 |         # *** This training schedule is an example. Update to your needs ***
489 | 
490 |         # Training - Stage 1
491 |         print("Training network heads")
492 |         model.train(dataset_train, dataset_val,
493 |                     learning_rate=config.LEARNING_RATE,
494 |                     epochs=40,
495 |                     layers='heads')
496 | 
497 |         # Training - Stage 2
498 |         # Finetune layers from ResNet stage 4 and up
499 |         print("Fine tune Resnet stage 4 and up")
500 |         model.train(dataset_train, dataset_val,
501 |                     learning_rate=config.LEARNING_RATE,
502 |                     epochs=120,
503 |                     layers='4+')
504 | 
505 |         # Training - Stage 3
506 |         # Fine tune all layers
507 |         print("Fine tune all layers")
508 |         model.train(dataset_train, dataset_val,
509 |                     learning_rate=config.LEARNING_RATE / 10,
510 |                     epochs=160,
511 |                     layers='all')
512 | 
513 |     elif args.command == "evaluate":
514 |         # Validation dataset
515 |         dataset_val = CocoDataset()
516 |         coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download)
517 |         dataset_val.prepare()
518 |         print("Running COCO evaluation on {} images.".format(args.limit))
519 |         evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
520 |     else:
521 |         print("'{}' is not recognized. "
522 |               "Use 'train' or 'evaluate'".format(args.command))
523 | 


--------------------------------------------------------------------------------
/data/_:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/data/_


--------------------------------------------------------------------------------
/images/1045023827_4ec3e8ba5c_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/1045023827_4ec3e8ba5c_z.jpg


--------------------------------------------------------------------------------
/images/12283150_12d37e6389_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/12283150_12d37e6389_z.jpg


--------------------------------------------------------------------------------
/images/2383514521_1fc8d7b0de_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/2383514521_1fc8d7b0de_z.jpg


--------------------------------------------------------------------------------
/images/2502287818_41e4b0c4fb_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/2502287818_41e4b0c4fb_z.jpg


--------------------------------------------------------------------------------
/images/2516944023_d00345997d_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/2516944023_d00345997d_z.jpg


--------------------------------------------------------------------------------
/images/25691390_f9944f61b5_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/25691390_f9944f61b5_z.jpg


--------------------------------------------------------------------------------
/images/262985539_1709e54576_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/262985539_1709e54576_z.jpg


--------------------------------------------------------------------------------
/images/3132016470_c27baa00e8_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/3132016470_c27baa00e8_z.jpg


--------------------------------------------------------------------------------
/images/3627527276_6fe8cd9bfe_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/3627527276_6fe8cd9bfe_z.jpg


--------------------------------------------------------------------------------
/images/3651581213_f81963d1dd_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/3651581213_f81963d1dd_z.jpg


--------------------------------------------------------------------------------
/images/3800883468_12af3c0b50_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/3800883468_12af3c0b50_z.jpg


--------------------------------------------------------------------------------
/images/3862500489_6fd195d183_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/3862500489_6fd195d183_z.jpg


--------------------------------------------------------------------------------
/images/3878153025_8fde829928_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/3878153025_8fde829928_z.jpg


--------------------------------------------------------------------------------
/images/4410436637_7b0ca36ee7_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/4410436637_7b0ca36ee7_z.jpg


--------------------------------------------------------------------------------
/images/4782628554_668bc31826_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/4782628554_668bc31826_z.jpg


--------------------------------------------------------------------------------
/images/5951960966_d4e1cda5d0_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/5951960966_d4e1cda5d0_z.jpg


--------------------------------------------------------------------------------
/images/6584515005_fce9cec486_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/6584515005_fce9cec486_z.jpg


--------------------------------------------------------------------------------
/images/6821351586_59aa0dc110_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/6821351586_59aa0dc110_z.jpg


--------------------------------------------------------------------------------
/images/7581246086_cf7bbb7255_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/7581246086_cf7bbb7255_z.jpg


--------------------------------------------------------------------------------
/images/7933423348_c30bd9bd4e_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/7933423348_c30bd9bd4e_z.jpg


--------------------------------------------------------------------------------
/images/8053677163_d4c8f416be_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/8053677163_d4c8f416be_z.jpg


--------------------------------------------------------------------------------
/images/8239308689_efa6c11b08_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/8239308689_efa6c11b08_z.jpg


--------------------------------------------------------------------------------
/images/8433365521_9252889f9a_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/8433365521_9252889f9a_z.jpg


--------------------------------------------------------------------------------
/images/8512296263_5fc5458e20_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/8512296263_5fc5458e20_z.jpg


--------------------------------------------------------------------------------
/images/8699757338_c3941051b6_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/8699757338_c3941051b6_z.jpg


--------------------------------------------------------------------------------
/images/8734543718_37f6b8bd45_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/8734543718_37f6b8bd45_z.jpg


--------------------------------------------------------------------------------
/images/8829708882_48f263491e_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/8829708882_48f263491e_z.jpg


--------------------------------------------------------------------------------
/images/9118579087_f9ffa19e63_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/9118579087_f9ffa19e63_z.jpg


--------------------------------------------------------------------------------
/images/9247489789_132c0d534a_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/9247489789_132c0d534a_z.jpg


--------------------------------------------------------------------------------
/images/loss-plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/loss-plot.png


--------------------------------------------------------------------------------
/images/predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crowdAI/crowdai-mapping-challenge-mask-rcnn/663ae2603b08195726c3726835de6830bc07d317/images/predictions.png


--------------------------------------------------------------------------------
/mrcnn/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/mrcnn/cocoeval.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | import numpy as np
  4 | import datetime
  5 | import time
  6 | from collections import defaultdict
  7 | from pycocotools import mask as maskUtils
  8 | import copy
  9 | 
 10 | """
 11 | This script has been taken (and modified) from :
 12 | https://github.com/crowdAI/coco/blob/master/PythonAPI/pycocotools/cocoeval.py
 13 | """
 14 | 
 15 | 
 16 | class COCOeval:
 17 |     # Interface for evaluating detection on the Microsoft COCO dataset.
 18 |     #
 19 |     # The usage for CocoEval is as follows:
 20 |     #  cocoGt=..., cocoDt=...       # load dataset and results
 21 |     #  E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
 22 |     #  E.params.recThrs = ...;      # set parameters as desired
 23 |     #  E.evaluate();                # run per image evaluation
 24 |     #  E.accumulate();              # accumulate per image results
 25 |     #  E.summarize();               # display summary metrics of results
 26 |     # For example usage see evalDemo.m and http://mscoco.org/.
 27 |     #
 28 |     # The evaluation parameters are as follows (defaults in brackets):
 29 |     #  imgIds     - [all] N img ids to use for evaluation
 30 |     #  catIds     - [all] K cat ids to use for evaluation
 31 |     #  iouThrs    - [.5:.05:.95] T=10 IoU thresholds for evaluation
 32 |     #  recThrs    - [0:.01:1] R=101 recall thresholds for evaluation
 33 |     #  areaRng    - [...] A=4 object area ranges for evaluation
 34 |     #  maxDets    - [1 10 100] M=3 thresholds on max detections per image
 35 |     #  iouType    - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints'
 36 |     #  iouType replaced the now DEPRECATED useSegm parameter.
 37 |     #  useCats    - [1] if true use category labels for evaluation
 38 |     # Note: if useCats=0 category labels are ignored as in proposal scoring.
 39 |     # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
 40 |     #
 41 |     # evaluate(): evaluates detections on every image and every category and
 42 |     # concats the results into the "evalImgs" with fields:
 43 |     #  dtIds      - [1xD] id for each of the D detections (dt)
 44 |     #  gtIds      - [1xG] id for each of the G ground truths (gt)
 45 |     #  dtMatches  - [TxD] matching gt id at each IoU or 0
 46 |     #  gtMatches  - [TxG] matching dt id at each IoU or 0
 47 |     #  dtScores   - [1xD] confidence of each dt
 48 |     #  gtIgnore   - [1xG] ignore flag for each gt
 49 |     #  dtIgnore   - [TxD] ignore flag for each dt at each IoU
 50 |     #
 51 |     # accumulate(): accumulates the per-image, per-category evaluation
 52 |     # results in "evalImgs" into the dictionary "eval" with fields:
 53 |     #  params     - parameters used for evaluation
 54 |     #  date       - date evaluation was performed
 55 |     #  counts     - [T,R,K,A,M] parameter dimensions (see above)
 56 |     #  precision  - [TxRxKxAxM] precision for every evaluation setting
 57 |     #  recall     - [TxKxAxM] max recall for every evaluation setting
 58 |     # Note: precision and recall==-1 for settings with no gt objects.
 59 |     #
 60 |     # See also coco, mask, pycocoDemo, pycocoEvalDemo
 61 |     #
 62 |     # Microsoft COCO Toolbox.      version 2.0
 63 |     # Data, paper, and tutorials available at:  http://mscoco.org/
 64 |     # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 65 |     # Licensed under the Simplified BSD License [see coco/license.txt]
 66 |     def __init__(self, cocoGt=None, cocoDt=None, iouType='segm'):
 67 |         '''
 68 |         Initialize CocoEval using coco APIs for gt and dt
 69 |         :param cocoGt: coco object with ground truth annotations
 70 |         :param cocoDt: coco object with detection results
 71 |         :return: None
 72 |         '''
 73 |         if not iouType:
 74 |             print('iouType not specified. use default iouType segm')
 75 |         self.cocoGt   = cocoGt              # ground truth COCO API
 76 |         self.cocoDt   = cocoDt              # detections COCO API
 77 |         self.params   = {}                  # evaluation parameters
 78 |         self.evalImgs = defaultdict(list)   # per-image per-category evaluation results [KxAxI] elements
 79 |         self.eval     = {}                  # accumulated evaluation results
 80 |         self._gts = defaultdict(list)       # gt for evaluation
 81 |         self._dts = defaultdict(list)       # dt for evaluation
 82 |         self.params = Params(iouType=iouType) # parameters
 83 |         self._paramsEval = {}               # parameters for evaluation
 84 |         self.stats = []                     # result summarization
 85 |         self.ious = {}                      # ious between all gts and dts
 86 |         if not cocoGt is None:
 87 |             self.params.imgIds = sorted(cocoGt.getImgIds())
 88 |             self.params.catIds = sorted(cocoGt.getCatIds())
 89 | 
 90 | 
 91 |     def _prepare(self):
 92 |         '''
 93 |         Prepare ._gts and ._dts for evaluation based on params
 94 |         :return: None
 95 |         '''
 96 |         def _toMask(anns, coco):
 97 |             # modify ann['segmentation'] by reference
 98 |             for ann in anns:
 99 |                 rle = coco.annToRLE(ann)
100 |                 ann['segmentation'] = rle
101 |         p = self.params
102 |         if p.useCats:
103 |             gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
104 |             dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
105 |         else:
106 |             gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
107 |             dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
108 | 
109 |         # convert ground truth to mask if iouType == 'segm'
110 |         if p.iouType == 'segm':
111 |             _toMask(gts, self.cocoGt)
112 |             _toMask(dts, self.cocoDt)
113 |         # set ignore flag
114 |         for gt in gts:
115 |             gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0
116 |             gt['ignore'] = 'iscrowd' in gt and gt['iscrowd']
117 |             if p.iouType == 'keypoints':
118 |                 gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore']
119 |         self._gts = defaultdict(list)       # gt for evaluation
120 |         self._dts = defaultdict(list)       # dt for evaluation
121 |         for gt in gts:
122 |             self._gts[gt['image_id'], gt['category_id']].append(gt)
123 |         for dt in dts:
124 |             self._dts[dt['image_id'], dt['category_id']].append(dt)
125 |         self.evalImgs = defaultdict(list)   # per-image per-category evaluation results
126 |         self.eval     = {}                  # accumulated evaluation results
127 | 
128 |     def evaluate(self):
129 |         '''
130 |         Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
131 |         :return: None
132 |         '''
133 |         tic = time.time()
134 |         print('Running per image evaluation...')
135 |         p = self.params
136 |         # add backward compatibility if useSegm is specified in params
137 |         if not p.useSegm is None:
138 |             p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
139 |             print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
140 |         print('Evaluate annotation type *{}*'.format(p.iouType))
141 |         p.imgIds = list(np.unique(p.imgIds))
142 |         if p.useCats:
143 |             p.catIds = list(np.unique(p.catIds))
144 |         p.maxDets = sorted(p.maxDets)
145 |         self.params=p
146 | 
147 |         self._prepare()
148 |         # loop through images, area range, max detection number
149 |         catIds = p.catIds if p.useCats else [-1]
150 | 
151 |         if p.iouType == 'segm' or p.iouType == 'bbox':
152 |             computeIoU = self.computeIoU
153 |         elif p.iouType == 'keypoints':
154 |             computeIoU = self.computeOks
155 |         self.ious = {(imgId, catId): computeIoU(imgId, catId) \
156 |                         for imgId in p.imgIds
157 |                         for catId in catIds}
158 | 
159 |         evaluateImg = self.evaluateImg
160 |         maxDet = p.maxDets[-1]
161 |         self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet)
162 |                  for catId in catIds
163 |                  for areaRng in p.areaRng
164 |                  for imgId in p.imgIds
165 |              ]
166 |         self._paramsEval = copy.deepcopy(self.params)
167 |         toc = time.time()
168 |         print('DONE (t={:0.2f}s).'.format(toc-tic))
169 | 
170 |     def computeIoU(self, imgId, catId):
171 |         p = self.params
172 |         if p.useCats:
173 |             gt = self._gts[imgId,catId]
174 |             dt = self._dts[imgId,catId]
175 |         else:
176 |             gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
177 |             dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
178 |         if len(gt) == 0 and len(dt) ==0:
179 |             return []
180 |         inds = np.argsort([-d['score'] for d in dt], kind='mergesort')
181 |         dt = [dt[i] for i in inds]
182 |         if len(dt) > p.maxDets[-1]:
183 |             dt=dt[0:p.maxDets[-1]]
184 | 
185 |         if p.iouType == 'segm':
186 |             g = [g['segmentation'] for g in gt]
187 |             d = [d['segmentation'] for d in dt]
188 |         elif p.iouType == 'bbox':
189 |             g = [g['bbox'] for g in gt]
190 |             d = [d['bbox'] for d in dt]
191 |         else:
192 |             raise Exception('unknown iouType for iou computation')
193 | 
194 |         # compute iou between each dt and gt region
195 |         iscrowd = [int(o['iscrowd']) for o in gt]
196 |         ious = maskUtils.iou(d,g,iscrowd)
197 |         return ious
198 | 
199 |     def computeOks(self, imgId, catId):
200 |         p = self.params
201 |         # dimention here should be Nxm
202 |         gts = self._gts[imgId, catId]
203 |         dts = self._dts[imgId, catId]
204 |         inds = np.argsort([-d['score'] for d in dts], kind='mergesort')
205 |         dts = [dts[i] for i in inds]
206 |         if len(dts) > p.maxDets[-1]:
207 |             dts = dts[0:p.maxDets[-1]]
208 |         # if len(gts) == 0 and len(dts) == 0:
209 |         if len(gts) == 0 or len(dts) == 0:
210 |             return []
211 |         ious = np.zeros((len(dts), len(gts)))
212 |         sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0
213 |         vars = (sigmas * 2)**2
214 |         k = len(sigmas)
215 |         # compute oks between each detection and ground truth object
216 |         for j, gt in enumerate(gts):
217 |             # create bounds for ignore regions(double the gt bbox)
218 |             g = np.array(gt['keypoints'])
219 |             xg = g[0::3]; yg = g[1::3]; vg = g[2::3]
220 |             k1 = np.count_nonzero(vg > 0)
221 |             bb = gt['bbox']
222 |             x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2
223 |             y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2
224 |             for i, dt in enumerate(dts):
225 |                 d = np.array(dt['keypoints'])
226 |                 xd = d[0::3]; yd = d[1::3]
227 |                 if k1>0:
228 |                     # measure the per-keypoint distance if keypoints visible
229 |                     dx = xd - xg
230 |                     dy = yd - yg
231 |                 else:
232 |                     # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
233 |                     z = np.zeros((k))
234 |                     dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0)
235 |                     dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0)
236 |                 e = (dx**2 + dy**2) / vars / (gt['area']+np.spacing(1)) / 2
237 |                 if k1 > 0:
238 |                     e=e[vg > 0]
239 |                 ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
240 |         return ious
241 | 
242 |     def evaluateImg(self, imgId, catId, aRng, maxDet):
243 |         '''
244 |         perform evaluation for single category and image
245 |         :return: dict (single image results)
246 |         '''
247 |         p = self.params
248 |         if p.useCats:
249 |             gt = self._gts[imgId,catId]
250 |             dt = self._dts[imgId,catId]
251 |         else:
252 |             gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
253 |             dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
254 |         if len(gt) == 0 and len(dt) ==0:
255 |             return None
256 | 
257 |         for g in gt:
258 |             if g['ignore'] or (g['area']<aRng[0] or g['area']>aRng[1]):
259 |                 g['_ignore'] = 1
260 |             else:
261 |                 g['_ignore'] = 0
262 | 
263 |         # sort dt highest score first, sort gt ignore last
264 |         gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort')
265 |         gt = [gt[i] for i in gtind]
266 |         dtind = np.argsort([-d['score'] for d in dt], kind='mergesort')
267 |         dt = [dt[i] for i in dtind[0:maxDet]]
268 |         iscrowd = [int(o['iscrowd']) for o in gt]
269 |         # load computed ious
270 |         ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId]
271 | 
272 |         T = len(p.iouThrs)
273 |         G = len(gt)
274 |         D = len(dt)
275 |         gtm  = np.zeros((T,G))
276 |         dtm  = np.zeros((T,D))
277 |         gtIg = np.array([g['_ignore'] for g in gt])
278 |         dtIg = np.zeros((T,D))
279 |         if not len(ious)==0:
280 |             for tind, t in enumerate(p.iouThrs):
281 |                 for dind, d in enumerate(dt):
282 |                     # information about best match so far (m=-1 -> unmatched)
283 |                     iou = min([t,1-1e-10])
284 |                     m   = -1
285 |                     for gind, g in enumerate(gt):
286 |                         # if this gt already matched, and not a crowd, continue
287 |                         if gtm[tind,gind]>0 and not iscrowd[gind]:
288 |                             continue
289 |                         # if dt matched to reg gt, and on ignore gt, stop
290 |                         if m>-1 and gtIg[m]==0 and gtIg[gind]==1:
291 |                             break
292 |                         # continue to next gt unless better match made
293 |                         if ious[dind,gind] < iou:
294 |                             continue
295 |                         # if match successful and best so far, store appropriately
296 |                         iou=ious[dind,gind]
297 |                         m=gind
298 |                     # if match made store id of match for both dt and gt
299 |                     if m ==-1:
300 |                         continue
301 |                     dtIg[tind,dind] = gtIg[m]
302 |                     dtm[tind,dind]  = gt[m]['id']
303 |                     gtm[tind,m]     = d['id']
304 |         # set unmatched detections outside of area range to ignore
305 |         a = np.array([d['area']<aRng[0] or d['area']>aRng[1] for d in dt]).reshape((1, len(dt)))
306 |         dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0)))
307 |         # store results for given image and category
308 |         return {
309 |                 'image_id':     imgId,
310 |                 'category_id':  catId,
311 |                 'aRng':         aRng,
312 |                 'maxDet':       maxDet,
313 |                 'dtIds':        [d['id'] for d in dt],
314 |                 'gtIds':        [g['id'] for g in gt],
315 |                 'dtMatches':    dtm,
316 |                 'gtMatches':    gtm,
317 |                 'dtScores':     [d['score'] for d in dt],
318 |                 'gtIgnore':     gtIg,
319 |                 'dtIgnore':     dtIg,
320 |             }
321 | 
322 |     def accumulate(self, p = None):
323 |         '''
324 |         Accumulate per image evaluation results and store the result in self.eval
325 |         :param p: input params for evaluation
326 |         :return: None
327 |         '''
328 |         print('Accumulating evaluation results...')
329 |         tic = time.time()
330 |         if not self.evalImgs:
331 |             print('Please run evaluate() first')
332 |         # allows input customized parameters
333 |         if p is None:
334 |             p = self.params
335 |         p.catIds = p.catIds if p.useCats == 1 else [-1]
336 |         T           = len(p.iouThrs)
337 |         R           = len(p.recThrs)
338 |         K           = len(p.catIds) if p.useCats else 1
339 |         A           = len(p.areaRng)
340 |         M           = len(p.maxDets)
341 |         precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
342 |         recall      = -np.ones((T,K,A,M))
343 | 
344 |         # create dictionary for future indexing
345 |         _pe = self._paramsEval
346 |         catIds = _pe.catIds if _pe.useCats else [-1]
347 |         setK = set(catIds)
348 |         setA = set(map(tuple, _pe.areaRng))
349 |         setM = set(_pe.maxDets)
350 |         setI = set(_pe.imgIds)
351 |         # get inds to evaluate
352 |         k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
353 |         m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
354 |         a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
355 |         i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
356 |         I0 = len(_pe.imgIds)
357 |         A0 = len(_pe.areaRng)
358 |         # retrieve E at each category, area range, and max number of detections
359 |         for k, k0 in enumerate(k_list):
360 |             Nk = k0*A0*I0
361 |             for a, a0 in enumerate(a_list):
362 |                 Na = a0*I0
363 |                 for m, maxDet in enumerate(m_list):
364 |                     E = [self.evalImgs[Nk + Na + i] for i in i_list]
365 |                     E = [e for e in E if not e is None]
366 |                     if len(E) == 0:
367 |                         continue
368 |                     dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
369 | 
370 |                     # different sorting method generates slightly different results.
371 |                     # mergesort is used to be consistent as Matlab implementation.
372 |                     inds = np.argsort(-dtScores, kind='mergesort')
373 | 
374 |                     dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
375 |                     dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
376 |                     gtIg = np.concatenate([e['gtIgnore'] for e in E])
377 |                     npig = np.count_nonzero(gtIg==0 )
378 |                     if npig == 0:
379 |                         continue
380 |                     tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
381 |                     fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
382 | 
383 |                     tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
384 |                     fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
385 |                     for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
386 |                         tp = np.array(tp)
387 |                         fp = np.array(fp)
388 |                         nd = len(tp)
389 |                         rc = tp / npig
390 |                         pr = tp / (fp+tp+np.spacing(1))
391 |                         q  = np.zeros((R,))
392 | 
393 |                         if nd:
394 |                             recall[t,k,a,m] = rc[-1]
395 |                         else:
396 |                             recall[t,k,a,m] = 0
397 | 
398 |                         # numpy is slow without cython optimization for accessing elements
399 |                         # use python array gets significant speed improvement
400 |                         pr = pr.tolist(); q = q.tolist()
401 | 
402 |                         for i in range(nd-1, 0, -1):
403 |                             if pr[i] > pr[i-1]:
404 |                                 pr[i-1] = pr[i]
405 | 
406 |                         inds = np.searchsorted(rc, p.recThrs, side='left')
407 |                         try:
408 |                             for ri, pi in enumerate(inds):
409 |                                 q[ri] = pr[pi]
410 |                         except:
411 |                             pass
412 |                         precision[t,:,k,a,m] = np.array(q)
413 |         self.eval = {
414 |             'params': p,
415 |             'counts': [T, R, K, A, M],
416 |             'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
417 |             'precision': precision,
418 |             'recall':   recall,
419 |         }
420 |         toc = time.time()
421 |         print('DONE (t={:0.2f}s).'.format( toc-tic))
422 | 
423 |     def _summarize(self, ap=1, iouThr=None, areaRng='all', maxDets=100 ):
424 |         p = self.params
425 |         iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
426 |         titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
427 |         typeStr = '(AP)' if ap==1 else '(AR)'
428 |         iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
429 |             if iouThr is None else '{:0.2f}'.format(iouThr)
430 | 
431 |         aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
432 |         mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
433 |         if ap == 1:
434 |             # dimension of precision: [TxRxKxAxM]
435 |             s = self.eval['precision']
436 |             # IoU
437 |             if iouThr is not None:
438 |                 t = np.where(iouThr == p.iouThrs)[0]
439 |                 s = s[t]
440 |             s = s[:,:,:,aind,mind]
441 |         else:
442 |             # dimension of recall: [TxKxAxM]
443 |             s = self.eval['recall']
444 |             if iouThr is not None:
445 |                 t = np.where(iouThr == p.iouThrs)[0]
446 |                 s = s[t]
447 |             s = s[:,:,aind,mind]
448 |         if len(s[s>-1])==0:
449 |             mean_s = -1
450 |         else:
451 |             mean_s = np.mean(s[s>-1])
452 |         print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))
453 |         return mean_s
454 | 
455 |     def summarize(self):
456 |         '''
457 |         Compute and display summary metrics for evaluation results.
458 |         Note this functin can *only* be applied on the default parameter setting
459 |         '''
460 |         def _summarizeDets():
461 |             stats = np.zeros((12,))
462 |             stats[0] = self._summarize(1)
463 |             stats[1] = self._summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
464 |             stats[2] = self._summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])
465 |             stats[3] = self._summarize(1, areaRng='small', maxDets=self.params.maxDets[2])
466 |             stats[4] = self._summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])
467 |             stats[5] = self._summarize(1, areaRng='large', maxDets=self.params.maxDets[2])
468 |             stats[6] = self._summarize(0, maxDets=self.params.maxDets[0])
469 |             stats[7] = self._summarize(0, maxDets=self.params.maxDets[1])
470 |             stats[8] = self._summarize(0, maxDets=self.params.maxDets[2])
471 |             stats[9] = self._summarize(0, areaRng='small', maxDets=self.params.maxDets[2])
472 |             stats[10] = self._summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])
473 |             stats[11] = self._summarize(0, areaRng='large', maxDets=self.params.maxDets[2])
474 |             return stats
475 |         def _summarizeKps():
476 |             stats = np.zeros((10,))
477 |             stats[0] = self._summarize(1, maxDets=20)
478 |             stats[1] = self._summarize(1, maxDets=20, iouThr=.5)
479 |             stats[2] = self._summarize(1, maxDets=20, iouThr=.75)
480 |             stats[3] = self._summarize(1, maxDets=20, areaRng='medium')
481 |             stats[4] = self._summarize(1, maxDets=20, areaRng='large')
482 |             stats[5] = self._summarize(0, maxDets=20)
483 |             stats[6] = self._summarize(0, maxDets=20, iouThr=.5)
484 |             stats[7] = self._summarize(0, maxDets=20, iouThr=.75)
485 |             stats[8] = self._summarize(0, maxDets=20, areaRng='medium')
486 |             stats[9] = self._summarize(0, maxDets=20, areaRng='large')
487 |             return stats
488 |         if not self.eval:
489 |             raise Exception('Please run accumulate() first')
490 |         iouType = self.params.iouType
491 |         if iouType == 'segm' or iouType == 'bbox':
492 |             summarize = _summarizeDets
493 |         elif iouType == 'keypoints':
494 |             summarize = _summarizeKps
495 |         self.stats = summarize()
496 | 
497 |     def __str__(self):
498 |         self.summarize()
499 | 
500 | class Params:
501 |     '''
502 |     Params for coco evaluation api
503 |     '''
504 |     def setDetParams(self):
505 |         self.imgIds = []
506 |         self.catIds = [100] # For the Category ID of Building
507 |         # np.arange causes trouble.  the data point on arange is slightly larger than the true value
508 |         self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True)
509 |         self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
510 |         self.maxDets = [1, 10, 100]
511 |         self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
512 |         self.areaRngLbl = ['all', 'small', 'medium', 'large']
513 |         self.useCats = 1
514 | 
515 |     def setKpParams(self):
516 |         self.imgIds = []
517 |         self.catIds = []
518 |         # np.arange causes trouble.  the data point on arange is slightly larger than the true value
519 |         self.iouThrs = [0.5]
520 |         self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
521 |         self.maxDets = [20] # At max 20 objects detected per image
522 |         self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
523 |         self.areaRngLbl = ['all'] #Consider all area ranges for evaluation
524 |         self.useCats = 1
525 | 
526 |     def __init__(self, iouType='segm'):
527 |         if iouType == 'segm' or iouType == 'bbox':
528 |             self.setDetParams()
529 |         elif iouType == 'keypoints':
530 |             self.setKpParams()
531 |         else:
532 |             raise Exception('iouType not supported')
533 |         self.iouType = iouType
534 |         # useSegm is deprecated
535 |         self.useSegm = None
536 | 


--------------------------------------------------------------------------------
/mrcnn/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Base Configurations class.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import math
 11 | import numpy as np
 12 | 
 13 | 
 14 | # Base Configuration Class
 15 | # Don't use this class directly. Instead, sub-class it and override
 16 | # the configurations you need to change.
 17 | 
 18 | class Config(object):
 19 |     """Base configuration class. For custom configurations, create a
 20 |     sub-class that inherits from this one and override properties
 21 |     that need to be changed.
 22 |     """
 23 |     # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
 24 |     # Useful if your code needs to do things differently depending on which
 25 |     # experiment is running.
 26 |     NAME = None  # Override in sub-classes
 27 | 
 28 |     # NUMBER OF GPUs to use. For CPU training, use 1
 29 |     GPU_COUNT = 1
 30 | 
 31 |     # Number of images to train with on each GPU. A 12GB GPU can typically
 32 |     # handle 2 images of 1024x1024px.
 33 |     # Adjust based on your GPU memory and image sizes. Use the highest
 34 |     # number that your GPU can handle for best performance.
 35 |     IMAGES_PER_GPU = 2
 36 | 
 37 |     # Number of training steps per epoch
 38 |     # This doesn't need to match the size of the training set. Tensorboard
 39 |     # updates are saved at the end of each epoch, so setting this to a
 40 |     # smaller number means getting more frequent TensorBoard updates.
 41 |     # Validation stats are also calculated at each epoch end and they
 42 |     # might take a while, so don't set this too small to avoid spending
 43 |     # a lot of time on validation stats.
 44 |     STEPS_PER_EPOCH = 1000
 45 | 
 46 |     # Number of validation steps to run at the end of every training epoch.
 47 |     # A bigger number improves accuracy of validation stats, but slows
 48 |     # down the training.
 49 |     VALIDATION_STEPS = 50
 50 | 
 51 |     # Backbone network architecture
 52 |     # Supported values are: resnet50, resnet101
 53 |     BACKBONE = "resnet101"
 54 | 
 55 |     # The strides of each layer of the FPN Pyramid. These values
 56 |     # are based on a Resnet101 backbone.
 57 |     BACKBONE_STRIDES = [4, 8, 16, 32, 64]
 58 | 
 59 |     # Number of classification classes (including background)
 60 |     NUM_CLASSES = 1  # Override in sub-classes
 61 | 
 62 |     # Length of square anchor side in pixels
 63 |     RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
 64 | 
 65 |     # Ratios of anchors at each cell (width/height)
 66 |     # A value of 1 represents a square anchor, and 0.5 is a wide anchor
 67 |     RPN_ANCHOR_RATIOS = [0.5, 1, 2]
 68 | 
 69 |     # Anchor stride
 70 |     # If 1 then anchors are created for each cell in the backbone feature map.
 71 |     # If 2, then anchors are created for every other cell, and so on.
 72 |     RPN_ANCHOR_STRIDE = 1
 73 | 
 74 |     # Non-max suppression threshold to filter RPN proposals.
 75 |     # You can increase this during training to generate more propsals.
 76 |     RPN_NMS_THRESHOLD = 0.7
 77 | 
 78 |     # How many anchors per image to use for RPN training
 79 |     RPN_TRAIN_ANCHORS_PER_IMAGE = 256
 80 | 
 81 |     # ROIs kept after non-maximum supression (training and inference)
 82 |     POST_NMS_ROIS_TRAINING = 2000
 83 |     POST_NMS_ROIS_INFERENCE = 1000
 84 | 
 85 |     # If enabled, resizes instance masks to a smaller size to reduce
 86 |     # memory load. Recommended when using high-resolution images.
 87 |     USE_MINI_MASK = True
 88 |     MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
 89 | 
 90 |     # Input image resizing
 91 |     # Generally, use the "square" resizing mode for training and inferencing
 92 |     # and it should work well in most cases. In this mode, images are scaled
 93 |     # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the
 94 |     # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is
 95 |     # padded with zeros to make it a square so multiple images can be put
 96 |     # in one batch.
 97 |     # Available resizing modes:
 98 |     # none:   No resizing or padding. Return the image unchanged.
 99 |     # square: Resize and pad with zeros to get a square image
100 |     #         of size [max_dim, max_dim].
101 |     # pad64:  Pads width and height with zeros to make them multiples of 64.
102 |     #         If IMAGE_MIN_DIM is not None, then scale the small side to
103 |     #         that size before padding. IMAGE_MAX_DIM is ignored in this mode.
104 |     #         The multiple of 64 is needed to ensure smooth scaling of feature
105 |     #         maps up and down the 6 levels of the FPN pyramid (2**6=64).
106 |     IMAGE_RESIZE_MODE = "square"
107 |     IMAGE_MIN_DIM = 800
108 |     IMAGE_MAX_DIM = 1024
109 | 
110 |     # Image mean (RGB)
111 |     MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
112 | 
113 |     # Number of ROIs per image to feed to classifier/mask heads
114 |     # The Mask RCNN paper uses 512 but often the RPN doesn't generate
115 |     # enough positive proposals to fill this and keep a positive:negative
116 |     # ratio of 1:3. You can increase the number of proposals by adjusting
117 |     # the RPN NMS threshold.
118 |     TRAIN_ROIS_PER_IMAGE = 200
119 | 
120 |     # Percent of positive ROIs used to train classifier/mask heads
121 |     ROI_POSITIVE_RATIO = 0.33
122 | 
123 |     # Pooled ROIs
124 |     POOL_SIZE = 7
125 |     MASK_POOL_SIZE = 14
126 | 
127 |     # Shape of output mask
128 |     # To change this you also need to change the neural network mask branch
129 |     MASK_SHAPE = [28, 28]
130 | 
131 |     # Maximum number of ground truth instances to use in one image
132 |     MAX_GT_INSTANCES = 100
133 | 
134 |     # Bounding box refinement standard deviation for RPN and final detections.
135 |     RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
136 |     BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
137 | 
138 |     # Max number of final detections
139 |     DETECTION_MAX_INSTANCES = 100
140 | 
141 |     # Minimum probability value to accept a detected instance
142 |     # ROIs below this threshold are skipped
143 |     DETECTION_MIN_CONFIDENCE = 0.7
144 | 
145 |     # Non-maximum suppression threshold for detection
146 |     DETECTION_NMS_THRESHOLD = 0.3
147 | 
148 |     # Learning rate and momentum
149 |     # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
150 |     # weights to explode. Likely due to differences in optimzer
151 |     # implementation.
152 |     LEARNING_RATE = 0.001
153 |     LEARNING_MOMENTUM = 0.9
154 | 
155 |     # Weight decay regularization
156 |     WEIGHT_DECAY = 0.0001
157 | 
158 |     # Use RPN ROIs or externally generated ROIs for training
159 |     # Keep this True for most situations. Set to False if you want to train
160 |     # the head branches on ROI generated by code rather than the ROIs from
161 |     # the RPN. For example, to debug the classifier head without having to
162 |     # train the RPN.
163 |     USE_RPN_ROIS = True
164 | 
165 |     # Train or freeze batch normalization layers
166 |     #     None: Train BN layers. This is the normal mode
167 |     #     False: Freeze BN layers. Good when using a small batch size
168 |     #     True: (don't use). Set layer in training mode even when inferencing
169 |     TRAIN_BN = False  # Defaulting to False since batch size is often small
170 | 
171 |     # Gradient norm clipping
172 |     GRADIENT_CLIP_NORM = 5.0
173 | 
174 |     def __init__(self):
175 |         """Set values of computed attributes."""
176 |         # Effective batch size
177 |         self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
178 | 
179 |         # Input image size
180 |         self.IMAGE_SHAPE = np.array(
181 |             [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
182 | 
183 |         # Image meta data length
184 |         # See compose_image_meta() for details
185 |         self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES
186 | 
187 |     def display(self):
188 |         """Display Configuration values."""
189 |         print("\nConfigurations:")
190 |         for a in dir(self):
191 |             if not a.startswith("__") and not callable(getattr(self, a)):
192 |                 print("{:30} {}".format(a, getattr(self, a)))
193 |         print("\n")
194 | 


--------------------------------------------------------------------------------
/mrcnn/dataset.py:
--------------------------------------------------------------------------------
  1 | from mrcnn import utils
  2 | import numpy as np
  3 | 
  4 | from pycocotools.coco import COCO
  5 | from pycocotools.cocoeval import COCOeval
  6 | from pycocotools import mask as maskUtils
  7 | 
  8 | import os
  9 | 
 10 | class MappingChallengeDataset(utils.Dataset):
 11 |     def load_dataset(self, dataset_dir, load_small=False, return_coco=True):
 12 |         """ Loads dataset released for the crowdAI Mapping Challenge(https://www.crowdai.org/challenges/mapping-challenge)
 13 |             Params:
 14 |                 - dataset_dir : root directory of the dataset (can point to the train/val folder)
 15 |                 - load_small : Boolean value which signals if the annotations for all the images need to be loaded into the memory,
 16 |                                or if only a small subset of the same should be loaded into memory
 17 |         """
 18 |         self.load_small = load_small
 19 |         if self.load_small:
 20 |             annotation_path = os.path.join(dataset_dir, "annotation-small.json")
 21 |         else:
 22 |             annotation_path = os.path.join(dataset_dir, "annotation.json")
 23 | 
 24 |         image_dir = os.path.join(dataset_dir, "images")
 25 |         print("Annotation Path ", annotation_path)
 26 |         print("Image Dir ", image_dir)
 27 |         assert os.path.exists(annotation_path) and os.path.exists(image_dir)
 28 | 
 29 |         self.coco = COCO(annotation_path)
 30 |         self.image_dir = image_dir
 31 | 
 32 |         # Load all classes (Only Building in this version)
 33 |         classIds = self.coco.getCatIds()
 34 | 
 35 |         # Load all images
 36 |         image_ids = list(self.coco.imgs.keys())
 37 | 
 38 |         # register classes
 39 |         for _class_id in classIds:
 40 |             self.add_class("crowdai-mapping-challenge", _class_id, self.coco.loadCats(_class_id)[0]["name"])
 41 | 
 42 |         # Register Images
 43 |         for _img_id in image_ids:
 44 |             assert(os.path.exists(os.path.join(image_dir, self.coco.imgs[_img_id]['file_name'])))
 45 |             self.add_image(
 46 |                 "crowdai-mapping-challenge", image_id=_img_id,
 47 |                 path=os.path.join(image_dir, self.coco.imgs[_img_id]['file_name']),
 48 |                 width=self.coco.imgs[_img_id]["width"],
 49 |                 height=self.coco.imgs[_img_id]["height"],
 50 |                 annotations=self.coco.loadAnns(self.coco.getAnnIds(
 51 |                                             imgIds=[_img_id],
 52 |                                             catIds=classIds,
 53 |                                             iscrowd=None)))
 54 | 
 55 |         if return_coco:
 56 |             return self.coco
 57 | 
 58 |     def load_mask(self, image_id):
 59 |         """ Loads instance mask for a given image
 60 |               This function converts mask from the coco format to a
 61 |               a bitmap [height, width, instance]
 62 |             Params:
 63 |                 - image_id : reference id for a given image
 64 | 
 65 |             Returns:
 66 |                 masks : A bool array of shape [height, width, instances] with
 67 |                     one mask per instance
 68 |                 class_ids : a 1D array of classIds of the corresponding instance masks
 69 |                     (In this version of the challenge it will be of shape [instances] and always be filled with the class-id of the "Building" class.)
 70 |         """
 71 | 
 72 |         image_info = self.image_info[image_id]
 73 |         assert image_info["source"] == "crowdai-mapping-challenge"
 74 | 
 75 |         instance_masks = []
 76 |         class_ids = []
 77 |         annotations = self.image_info[image_id]["annotations"]
 78 |         # Build mask of shape [height, width, instance_count] and list
 79 |         # of class IDs that correspond to each channel of the mask.
 80 |         for annotation in annotations:
 81 |             class_id = self.map_source_class_id(
 82 |                 "crowdai-mapping-challenge.{}".format(annotation['category_id']))
 83 |             if class_id:
 84 |                 m = self.annToMask(annotation,  image_info["height"],
 85 |                                                 image_info["width"])
 86 |                 # Some objects are so small that they're less than 1 pixel area
 87 |                 # and end up rounded out. Skip those objects.
 88 |                 if m.max() < 1:
 89 |                     continue
 90 | 
 91 |                 # Ignore the notion of "is_crowd" as specified in the coco format
 92 |                 # as we donot have the said annotation in the current version of the dataset
 93 | 
 94 |                 instance_masks.append(m)
 95 |                 class_ids.append(class_id)
 96 |         # Pack instance masks into an array
 97 |         if class_ids:
 98 |             mask = np.stack(instance_masks, axis=2)
 99 |             class_ids = np.array(class_ids, dtype=np.int32)
100 |             return mask, class_ids
101 |         else:
102 |             # Call super class to return an empty mask
103 |             return super(MappingChallengeDataset, self).load_mask(image_id)
104 | 
105 | 
106 |     def image_reference(self, image_id):
107 |         """Return a reference for a particular image
108 | 
109 |             Ideally you this function is supposed to return a URL
110 |             but in this case, we will simply return the image_id
111 |         """
112 |         return "crowdai-mapping-challenge::{}".format(image_id)
113 |     # The following two functions are from pycocotools with a few changes.
114 | 
115 |     def annToRLE(self, ann, height, width):
116 |         """
117 |         Convert annotation which can be polygons, uncompressed RLE to RLE.
118 |         :return: binary mask (numpy 2D array)
119 |         """
120 |         segm = ann['segmentation']
121 |         if isinstance(segm, list):
122 |             # polygon -- a single object might consist of multiple parts
123 |             # we merge all parts into one mask rle code
124 |             rles = maskUtils.frPyObjects(segm, height, width)
125 |             rle = maskUtils.merge(rles)
126 |         elif isinstance(segm['counts'], list):
127 |             # uncompressed RLE
128 |             rle = maskUtils.frPyObjects(segm, height, width)
129 |         else:
130 |             # rle
131 |             rle = ann['segmentation']
132 |         return rle
133 | 
134 |     def annToMask(self, ann, height, width):
135 |         """
136 |         Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
137 |         :return: binary mask (numpy 2D array)
138 |         """
139 |         rle = self.annToRLE(ann, height, width)
140 |         m = maskUtils.decode(rle)
141 |         return m
142 | 


--------------------------------------------------------------------------------
/mrcnn/evaluate.py:
--------------------------------------------------------------------------------
 1 | from pycocotools.coco import COCO
 2 | from mrcnn.cocoeval import COCOeval
 3 | from pycocotools import mask as maskUtils
 4 | import time
 5 | import numpy as np
 6 | 
 7 | ############################################################
 8 | #  COCO Evaluation
 9 | ############################################################
10 | 
11 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
12 |     """Arrange resutls to match COCO specs in http://cocodataset.org/#format
13 |     """
14 |     # If no results, return an empty list
15 |     if rois is None:
16 |         return []
17 | 
18 |     results = []
19 |     for image_id in image_ids:
20 |         # Loop through detections
21 |         for i in range(rois.shape[0]):
22 |             class_id = class_ids[i]
23 |             score = scores[i]
24 |             bbox = np.around(rois[i], 1)
25 |             mask = masks[:, :, i]
26 | 
27 |             result = {
28 |                 "image_id": image_id,
29 |                 "category_id": dataset.get_source_class_id(class_id, "crowdai-mapping-challenge"),
30 |                 "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
31 |                 "score": score,
32 |                 "segmentation": maskUtils.encode(np.asfortranarray(mask)).encode('utf-8')
33 |             }
34 |             results.append(result)
35 |     return results
36 | 
37 | 
38 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
39 |     """Runs official COCO evaluation.
40 |     dataset: A Dataset object with valiadtion data
41 |     eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
42 |     limit: if not 0, it's the number of images to use for evaluation
43 |     """
44 |     # Pick COCO images from the dataset
45 |     image_ids = image_ids or dataset.image_ids
46 | 
47 |     # Limit to a subset
48 |     if limit:
49 |         image_ids = image_ids[:limit]
50 | 
51 |     # Get corresponding COCO image IDs.
52 |     coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
53 | 
54 |     t_prediction = 0
55 |     t_start = time.time()
56 | 
57 |     results = []
58 | 
59 |     for i, image_id in enumerate(image_ids):
60 |         # Load image
61 |         image = dataset.load_image(image_id)
62 | 
63 |         # Run detection
64 |         t = time.time()
65 |         print("="*100)
66 |         print("Image shape ", image.shape)
67 |         r = model.detect([image])
68 |         r = r[0]
69 |         t_prediction += (time.time() - t)
70 |         print("Prediction time : ", (time.time() - t))
71 |         # Convert results to COCO format
72 |         image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
73 |                                            r["rois"], r["class_ids"],
74 |                                            r["scores"], r["masks"])
75 |         print("Number of detections : ", len(r["rois"]))
76 |         print("Classes Predicted : ", r["class_ids"])
77 |         print("Scores : ", r["scores"])
78 |         results.extend(image_results)
79 | 
80 |     # Load results. This modifies results with additional attributes.
81 |     coco_results = coco.loadRes(results)
82 | 
83 |     # Evaluate
84 |     cocoEval = COCOeval(coco, coco_results, eval_type)
85 |     cocoEval.params.imgIds = coco_image_ids
86 |     cocoEval.evaluate()
87 |     cocoEval.accumulate()
88 |     ap = cocoEval._summarize(ap=1, iouThr=0.5, areaRng="all", maxDets=100)
89 |     ar = cocoEval._summarize(ap=0, areaRng="all", maxDets=100)
90 |     print("Precision : ", ap, " Recall : ", ar)
91 | 
92 |     print("Prediction time: {}. Average {}/image".format(
93 |         t_prediction, t_prediction / len(image_ids)))
94 |     print("Total time: ", time.time() - t_start)
95 | 


--------------------------------------------------------------------------------
/mrcnn/parallel_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Multi-GPU Support for Keras.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | 
  9 | Ideas and a small code snippets from these sources:
 10 | https://github.com/fchollet/keras/issues/2436
 11 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012
 12 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/
 13 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py
 14 | """
 15 | 
 16 | import tensorflow as tf
 17 | import keras.backend as K
 18 | import keras.layers as KL
 19 | import keras.models as KM
 20 | 
 21 | 
 22 | class ParallelModel(KM.Model):
 23 |     """Subclasses the standard Keras Model and adds multi-GPU support.
 24 |     It works by creating a copy of the model on each GPU. Then it slices
 25 |     the inputs and sends a slice to each copy of the model, and then
 26 |     merges the outputs together and applies the loss on the combined
 27 |     outputs.
 28 |     """
 29 | 
 30 |     def __init__(self, keras_model, gpu_count):
 31 |         """Class constructor.
 32 |         keras_model: The Keras model to parallelize
 33 |         gpu_count: Number of GPUs. Must be > 1
 34 |         """
 35 |         self.inner_model = keras_model
 36 |         self.gpu_count = gpu_count
 37 |         merged_outputs = self.make_parallel()
 38 |         super(ParallelModel, self).__init__(inputs=self.inner_model.inputs,
 39 |                                             outputs=merged_outputs)
 40 | 
 41 |     def __getattribute__(self, attrname):
 42 |         """Redirect loading and saving methods to the inner model. That's where
 43 |         the weights are stored."""
 44 |         if 'load' in attrname or 'save' in attrname:
 45 |             return getattr(self.inner_model, attrname)
 46 |         return super(ParallelModel, self).__getattribute__(attrname)
 47 | 
 48 |     def summary(self, *args, **kwargs):
 49 |         """Override summary() to display summaries of both, the wrapper
 50 |         and inner models."""
 51 |         super(ParallelModel, self).summary(*args, **kwargs)
 52 |         self.inner_model.summary(*args, **kwargs)
 53 | 
 54 |     def make_parallel(self):
 55 |         """Creates a new wrapper model that consists of multiple replicas of
 56 |         the original model placed on different GPUs.
 57 |         """
 58 |         # Slice inputs. Slice inputs on the CPU to avoid sending a copy
 59 |         # of the full inputs to all GPUs. Saves on bandwidth and memory.
 60 |         input_slices = {name: tf.split(x, self.gpu_count)
 61 |                         for name, x in zip(self.inner_model.input_names,
 62 |                                            self.inner_model.inputs)}
 63 | 
 64 |         output_names = self.inner_model.output_names
 65 |         outputs_all = []
 66 |         for i in range(len(self.inner_model.outputs)):
 67 |             outputs_all.append([])
 68 | 
 69 |         # Run the model call() on each GPU to place the ops there
 70 |         for i in range(self.gpu_count):
 71 |             with tf.device('/gpu:%d' % i):
 72 |                 with tf.name_scope('tower_%d' % i):
 73 |                     # Run a slice of inputs through this replica
 74 |                     zipped_inputs = zip(self.inner_model.input_names,
 75 |                                         self.inner_model.inputs)
 76 |                     inputs = [
 77 |                         KL.Lambda(lambda s: input_slices[name][i],
 78 |                                   output_shape=lambda s: (None,) + s[1:])(tensor)
 79 |                         for name, tensor in zipped_inputs]
 80 |                     # Create the model replica and get the outputs
 81 |                     outputs = self.inner_model(inputs)
 82 |                     if not isinstance(outputs, list):
 83 |                         outputs = [outputs]
 84 |                     # Save the outputs for merging back together later
 85 |                     for l, o in enumerate(outputs):
 86 |                         outputs_all[l].append(o)
 87 | 
 88 |         # Merge outputs on CPU
 89 |         with tf.device('/cpu:0'):
 90 |             merged = []
 91 |             for outputs, name in zip(outputs_all, output_names):
 92 |                 # If outputs are numbers without dimensions, add a batch dim.
 93 |                 def add_dim(tensor):
 94 |                     """Add a dimension to tensors that don't have any."""
 95 |                     if K.int_shape(tensor) == ():
 96 |                         return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor)
 97 |                     return tensor
 98 |                 outputs = list(map(add_dim, outputs))
 99 | 
100 |                 # Concatenate
101 |                 merged.append(KL.Concatenate(axis=0, name=name)(outputs))
102 |         return merged
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     # Testing code below. It creates a simple model to train on MNIST and
107 |     # tries to run it on 2 GPUs. It saves the graph so it can be viewed
108 |     # in TensorBoard. Run it as:
109 |     #
110 |     # python3 parallel_model.py
111 | 
112 |     import os
113 |     import numpy as np
114 |     import keras.optimizers
115 |     from keras.datasets import mnist
116 |     from keras.preprocessing.image import ImageDataGenerator
117 | 
118 |     GPU_COUNT = 2
119 | 
120 |     # Root directory of the project
121 |     ROOT_DIR = os.path.abspath("../")
122 | 
123 |     # Directory to save logs and trained model
124 |     MODEL_DIR = os.path.join(ROOT_DIR, "logs")
125 | 
126 |     def build_model(x_train, num_classes):
127 |         # Reset default graph. Keras leaves old ops in the graph,
128 |         # which are ignored for execution but clutter graph
129 |         # visualization in TensorBoard.
130 |         tf.reset_default_graph()
131 | 
132 |         inputs = KL.Input(shape=x_train.shape[1:], name="input_image")
133 |         x = KL.Conv2D(32, (3, 3), activation='relu', padding="same",
134 |                       name="conv1")(inputs)
135 |         x = KL.Conv2D(64, (3, 3), activation='relu', padding="same",
136 |                       name="conv2")(x)
137 |         x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x)
138 |         x = KL.Flatten(name="flat1")(x)
139 |         x = KL.Dense(128, activation='relu', name="dense1")(x)
140 |         x = KL.Dense(num_classes, activation='softmax', name="dense2")(x)
141 | 
142 |         return KM.Model(inputs, x, "digit_classifier_model")
143 | 
144 |     # Load MNIST Data
145 |     (x_train, y_train), (x_test, y_test) = mnist.load_data()
146 |     x_train = np.expand_dims(x_train, -1).astype('float32') / 255
147 |     x_test = np.expand_dims(x_test, -1).astype('float32') / 255
148 | 
149 |     print('x_train shape:', x_train.shape)
150 |     print('x_test shape:', x_test.shape)
151 | 
152 |     # Build data generator and model
153 |     datagen = ImageDataGenerator()
154 |     model = build_model(x_train, 10)
155 | 
156 |     # Add multi-GPU support.
157 |     model = ParallelModel(model, GPU_COUNT)
158 | 
159 |     optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0)
160 | 
161 |     model.compile(loss='sparse_categorical_crossentropy',
162 |                   optimizer=optimizer, metrics=['accuracy'])
163 | 
164 |     model.summary()
165 | 
166 |     # Train
167 |     model.fit_generator(
168 |         datagen.flow(x_train, y_train, batch_size=64),
169 |         steps_per_epoch=50, epochs=10, verbose=1,
170 |         validation_data=(x_test, y_test),
171 |         callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
172 |                                                write_graph=True)]
173 |     )
174 | 


--------------------------------------------------------------------------------
/mrcnn/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Common utility functions and classes.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import sys
 11 | import os
 12 | import math
 13 | import random
 14 | import numpy as np
 15 | import tensorflow as tf
 16 | import scipy
 17 | import skimage.color
 18 | import skimage.io
 19 | import skimage.transform
 20 | import urllib.request
 21 | import shutil
 22 | import warnings
 23 | 
 24 | # URL from which to download the latest COCO trained weights
 25 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
 26 | 
 27 | 
 28 | ############################################################
 29 | #  Bounding Boxes
 30 | ############################################################
 31 | 
 32 | def extract_bboxes(mask):
 33 |     """Compute bounding boxes from masks.
 34 |     mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
 35 | 
 36 |     Returns: bbox array [num_instances, (y1, x1, y2, x2)].
 37 |     """
 38 |     boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
 39 |     for i in range(mask.shape[-1]):
 40 |         m = mask[:, :, i]
 41 |         # Bounding box.
 42 |         horizontal_indicies = np.where(np.any(m, axis=0))[0]
 43 |         vertical_indicies = np.where(np.any(m, axis=1))[0]
 44 |         if horizontal_indicies.shape[0]:
 45 |             x1, x2 = horizontal_indicies[[0, -1]]
 46 |             y1, y2 = vertical_indicies[[0, -1]]
 47 |             # x2 and y2 should not be part of the box. Increment by 1.
 48 |             x2 += 1
 49 |             y2 += 1
 50 |         else:
 51 |             # No mask for this instance. Might happen due to
 52 |             # resizing or cropping. Set bbox to zeros
 53 |             x1, x2, y1, y2 = 0, 0, 0, 0
 54 |         boxes[i] = np.array([y1, x1, y2, x2])
 55 |     return boxes.astype(np.int32)
 56 | 
 57 | 
 58 | def compute_iou(box, boxes, box_area, boxes_area):
 59 |     """Calculates IoU of the given box with the array of the given boxes.
 60 |     box: 1D vector [y1, x1, y2, x2]
 61 |     boxes: [boxes_count, (y1, x1, y2, x2)]
 62 |     box_area: float. the area of 'box'
 63 |     boxes_area: array of length boxes_count.
 64 | 
 65 |     Note: the areas are passed in rather than calculated here for
 66 |           efficency. Calculate once in the caller to avoid duplicate work.
 67 |     """
 68 |     # Calculate intersection areas
 69 |     y1 = np.maximum(box[0], boxes[:, 0])
 70 |     y2 = np.minimum(box[2], boxes[:, 2])
 71 |     x1 = np.maximum(box[1], boxes[:, 1])
 72 |     x2 = np.minimum(box[3], boxes[:, 3])
 73 |     intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
 74 |     union = box_area + boxes_area[:] - intersection[:]
 75 |     iou = intersection / union
 76 |     return iou
 77 | 
 78 | 
 79 | def compute_overlaps(boxes1, boxes2):
 80 |     """Computes IoU overlaps between two sets of boxes.
 81 |     boxes1, boxes2: [N, (y1, x1, y2, x2)].
 82 | 
 83 |     For better performance, pass the largest set first and the smaller second.
 84 |     """
 85 |     # Areas of anchors and GT boxes
 86 |     area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
 87 |     area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
 88 | 
 89 |     # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
 90 |     # Each cell contains the IoU value.
 91 |     overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
 92 |     for i in range(overlaps.shape[1]):
 93 |         box2 = boxes2[i]
 94 |         overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
 95 |     return overlaps
 96 | 
 97 | 
 98 | def compute_overlaps_masks(masks1, masks2):
 99 |     '''Computes IoU overlaps between two sets of masks.
100 |     masks1, masks2: [Height, Width, instances]
101 |     '''
102 |     # flatten masks
103 |     masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
104 |     masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
105 |     area1 = np.sum(masks1, axis=0)
106 |     area2 = np.sum(masks2, axis=0)
107 | 
108 |     # intersections and union
109 |     intersections = np.dot(masks1.T, masks2)
110 |     union = area1[:, None] + area2[None, :] - intersections
111 |     overlaps = intersections / union
112 | 
113 |     return overlaps
114 | 
115 | 
116 | def non_max_suppression(boxes, scores, threshold):
117 |     """Performs non-maximum supression and returns indicies of kept boxes.
118 |     boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
119 |     scores: 1-D array of box scores.
120 |     threshold: Float. IoU threshold to use for filtering.
121 |     """
122 |     assert boxes.shape[0] > 0
123 |     if boxes.dtype.kind != "f":
124 |         boxes = boxes.astype(np.float32)
125 | 
126 |     # Compute box areas
127 |     y1 = boxes[:, 0]
128 |     x1 = boxes[:, 1]
129 |     y2 = boxes[:, 2]
130 |     x2 = boxes[:, 3]
131 |     area = (y2 - y1) * (x2 - x1)
132 | 
133 |     # Get indicies of boxes sorted by scores (highest first)
134 |     ixs = scores.argsort()[::-1]
135 | 
136 |     pick = []
137 |     while len(ixs) > 0:
138 |         # Pick top box and add its index to the list
139 |         i = ixs[0]
140 |         pick.append(i)
141 |         # Compute IoU of the picked box with the rest
142 |         iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
143 |         # Identify boxes with IoU over the threshold. This
144 |         # returns indicies into ixs[1:], so add 1 to get
145 |         # indicies into ixs.
146 |         remove_ixs = np.where(iou > threshold)[0] + 1
147 |         # Remove indicies of the picked and overlapped boxes.
148 |         ixs = np.delete(ixs, remove_ixs)
149 |         ixs = np.delete(ixs, 0)
150 |     return np.array(pick, dtype=np.int32)
151 | 
152 | 
153 | def apply_box_deltas(boxes, deltas):
154 |     """Applies the given deltas to the given boxes.
155 |     boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
156 |     deltas: [N, (dy, dx, log(dh), log(dw))]
157 |     """
158 |     boxes = boxes.astype(np.float32)
159 |     # Convert to y, x, h, w
160 |     height = boxes[:, 2] - boxes[:, 0]
161 |     width = boxes[:, 3] - boxes[:, 1]
162 |     center_y = boxes[:, 0] + 0.5 * height
163 |     center_x = boxes[:, 1] + 0.5 * width
164 |     # Apply deltas
165 |     center_y += deltas[:, 0] * height
166 |     center_x += deltas[:, 1] * width
167 |     height *= np.exp(deltas[:, 2])
168 |     width *= np.exp(deltas[:, 3])
169 |     # Convert back to y1, x1, y2, x2
170 |     y1 = center_y - 0.5 * height
171 |     x1 = center_x - 0.5 * width
172 |     y2 = y1 + height
173 |     x2 = x1 + width
174 |     return np.stack([y1, x1, y2, x2], axis=1)
175 | 
176 | 
177 | def box_refinement_graph(box, gt_box):
178 |     """Compute refinement needed to transform box to gt_box.
179 |     box and gt_box are [N, (y1, x1, y2, x2)]
180 |     """
181 |     box = tf.cast(box, tf.float32)
182 |     gt_box = tf.cast(gt_box, tf.float32)
183 | 
184 |     height = box[:, 2] - box[:, 0]
185 |     width = box[:, 3] - box[:, 1]
186 |     center_y = box[:, 0] + 0.5 * height
187 |     center_x = box[:, 1] + 0.5 * width
188 | 
189 |     gt_height = gt_box[:, 2] - gt_box[:, 0]
190 |     gt_width = gt_box[:, 3] - gt_box[:, 1]
191 |     gt_center_y = gt_box[:, 0] + 0.5 * gt_height
192 |     gt_center_x = gt_box[:, 1] + 0.5 * gt_width
193 | 
194 |     dy = (gt_center_y - center_y) / height
195 |     dx = (gt_center_x - center_x) / width
196 |     dh = tf.log(gt_height / height)
197 |     dw = tf.log(gt_width / width)
198 | 
199 |     result = tf.stack([dy, dx, dh, dw], axis=1)
200 |     return result
201 | 
202 | 
203 | def box_refinement(box, gt_box):
204 |     """Compute refinement needed to transform box to gt_box.
205 |     box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
206 |     assumed to be outside the box.
207 |     """
208 |     box = box.astype(np.float32)
209 |     gt_box = gt_box.astype(np.float32)
210 | 
211 |     height = box[:, 2] - box[:, 0]
212 |     width = box[:, 3] - box[:, 1]
213 |     center_y = box[:, 0] + 0.5 * height
214 |     center_x = box[:, 1] + 0.5 * width
215 | 
216 |     gt_height = gt_box[:, 2] - gt_box[:, 0]
217 |     gt_width = gt_box[:, 3] - gt_box[:, 1]
218 |     gt_center_y = gt_box[:, 0] + 0.5 * gt_height
219 |     gt_center_x = gt_box[:, 1] + 0.5 * gt_width
220 | 
221 |     dy = (gt_center_y - center_y) / height
222 |     dx = (gt_center_x - center_x) / width
223 |     dh = np.log(gt_height / height)
224 |     dw = np.log(gt_width / width)
225 | 
226 |     return np.stack([dy, dx, dh, dw], axis=1)
227 | 
228 | 
229 | ############################################################
230 | #  Dataset
231 | ############################################################
232 | 
233 | class Dataset(object):
234 |     """The base class for dataset classes.
235 |     To use it, create a new class that adds functions specific to the dataset
236 |     you want to use. For example:
237 | 
238 |     class CatsAndDogsDataset(Dataset):
239 |         def load_cats_and_dogs(self):
240 |             ...
241 |         def load_mask(self, image_id):
242 |             ...
243 |         def image_reference(self, image_id):
244 |             ...
245 | 
246 |     See COCODataset and ShapesDataset as examples.
247 |     """
248 | 
249 |     def __init__(self, class_map=None):
250 |         self._image_ids = []
251 |         self.image_info = []
252 |         # Background is always the first class
253 |         self.class_info = [{"source": "", "id": 0, "name": "BG"}]
254 |         self.source_class_ids = {}
255 | 
256 |     def add_class(self, source, class_id, class_name):
257 |         assert "." not in source, "Source name cannot contain a dot"
258 |         # Does the class exist already?
259 |         for info in self.class_info:
260 |             if info['source'] == source and info["id"] == class_id:
261 |                 # source.class_id combination already available, skip
262 |                 return
263 |         # Add the class
264 |         self.class_info.append({
265 |             "source": source,
266 |             "id": class_id,
267 |             "name": class_name,
268 |         })
269 | 
270 |     def add_image(self, source, image_id, path, **kwargs):
271 |         image_info = {
272 |             "id": image_id,
273 |             "source": source,
274 |             "path": path,
275 |         }
276 |         image_info.update(kwargs)
277 |         self.image_info.append(image_info)
278 | 
279 |     def image_reference(self, image_id):
280 |         """Return a link to the image in its source Website or details about
281 |         the image that help looking it up or debugging it.
282 | 
283 |         Override for your dataset, but pass to this function
284 |         if you encounter images not in your dataset.
285 |         """
286 |         return ""
287 | 
288 |     def prepare(self, class_map=None):
289 |         """Prepares the Dataset class for use.
290 | 
291 |         TODO: class map is not supported yet. When done, it should handle mapping
292 |               classes from different datasets to the same class ID.
293 |         """
294 | 
295 |         def clean_name(name):
296 |             """Returns a shorter version of object names for cleaner display."""
297 |             return ",".join(name.split(",")[:1])
298 | 
299 |         # Build (or rebuild) everything else from the info dicts.
300 |         self.num_classes = len(self.class_info)
301 |         self.class_ids = np.arange(self.num_classes)
302 |         self.class_names = [clean_name(c["name"]) for c in self.class_info]
303 |         self.num_images = len(self.image_info)
304 |         self._image_ids = np.arange(self.num_images)
305 | 
306 |         self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
307 |                                       for info, id in zip(self.class_info, self.class_ids)}
308 | 
309 |         # Map sources to class_ids they support
310 |         self.sources = list(set([i['source'] for i in self.class_info]))
311 |         self.source_class_ids = {}
312 |         # Loop over datasets
313 |         for source in self.sources:
314 |             self.source_class_ids[source] = []
315 |             # Find classes that belong to this dataset
316 |             for i, info in enumerate(self.class_info):
317 |                 # Include BG class in all datasets
318 |                 if i == 0 or source == info['source']:
319 |                     self.source_class_ids[source].append(i)
320 | 
321 |     def map_source_class_id(self, source_class_id):
322 |         """Takes a source class ID and returns the int class ID assigned to it.
323 | 
324 |         For example:
325 |         dataset.map_source_class_id("coco.12") -> 23
326 |         """
327 |         return self.class_from_source_map[source_class_id]
328 | 
329 |     def get_source_class_id(self, class_id, source):
330 |         """Map an internal class ID to the corresponding class ID in the source dataset."""
331 |         info = self.class_info[class_id]
332 |         assert info['source'] == source
333 |         return info['id']
334 | 
335 |     def append_data(self, class_info, image_info):
336 |         self.external_to_class_id = {}
337 |         for i, c in enumerate(self.class_info):
338 |             for ds, id in c["map"]:
339 |                 self.external_to_class_id[ds + str(id)] = i
340 | 
341 |         # Map external image IDs to internal ones.
342 |         self.external_to_image_id = {}
343 |         for i, info in enumerate(self.image_info):
344 |             self.external_to_image_id[info["ds"] + str(info["id"])] = i
345 | 
346 |     @property
347 |     def image_ids(self):
348 |         return self._image_ids
349 | 
350 |     def source_image_link(self, image_id):
351 |         """Returns the path or URL to the image.
352 |         Override this to return a URL to the image if it's availble online for easy
353 |         debugging.
354 |         """
355 |         return self.image_info[image_id]["path"]
356 | 
357 |     def load_image(self, image_id):
358 |         """Load the specified image and return a [H,W,3] Numpy array.
359 |         """
360 |         # Load image
361 |         image = skimage.io.imread(self.image_info[image_id]['path'])
362 |         # If grayscale. Convert to RGB for consistency.
363 |         if image.ndim != 3:
364 |             image = skimage.color.gray2rgb(image)
365 |         # If has an alpha channel, remove it for consistency
366 |         if image.shape[-1] == 4:
367 |             image = image[..., :3]
368 |         return image
369 | 
370 |     def load_mask(self, image_id):
371 |         """Load instance masks for the given image.
372 | 
373 |         Different datasets use different ways to store masks. Override this
374 |         method to load instance masks and return them in the form of am
375 |         array of binary masks of shape [height, width, instances].
376 | 
377 |         Returns:
378 |             masks: A bool array of shape [height, width, instance count] with
379 |                 a binary mask per instance.
380 |             class_ids: a 1D array of class IDs of the instance masks.
381 |         """
382 |         # Override this function to load a mask from your dataset.
383 |         # Otherwise, it returns an empty mask.
384 |         mask = np.empty([0, 0, 0])
385 |         class_ids = np.empty([0], np.int32)
386 |         return mask, class_ids
387 | 
388 | 
389 | def resize_image(image, min_dim=None, max_dim=None, mode="square"):
390 |     """Resizes an image keeping the aspect ratio unchanged.
391 | 
392 |     min_dim: if provided, resizes the image such that it's smaller
393 |         dimension == min_dim
394 |     max_dim: if provided, ensures that the image longest side doesn't
395 |         exceed this value.
396 |     mode: Resizing mode.
397 |         none: No resizing. Return the image unchanged.
398 |         square: Resize and pad with zeros to get a square image
399 |             of size [max_dim, max_dim].
400 |         pad64: Pads width and height with zeros to make them multiples of 64.
401 |                If min_dim is provided, it scales the small side to >= min_dim
402 |                before padding. max_dim is ignored in this mode.
403 |                The multiple of 64 is needed to ensure smooth scaling of feature
404 |                maps up and down the 6 levels of the FPN pyramid (2**6=64).
405 | 
406 |     Returns:
407 |     image: the resized image
408 |     window: (y1, x1, y2, x2). If max_dim is provided, padding might
409 |         be inserted in the returned image. If so, this window is the
410 |         coordinates of the image part of the full image (excluding
411 |         the padding). The x2, y2 pixels are not included.
412 |     scale: The scale factor used to resize the image
413 |     padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
414 |     """
415 |     # Keep track of image dtype and return results in the same dtype
416 |     image_dtype = image.dtype
417 |     # Default window (y1, x1, y2, x2) and default scale == 1.
418 |     h, w = image.shape[:2]
419 |     window = (0, 0, h, w)
420 |     scale = 1
421 |     padding = [(0, 0), (0, 0), (0, 0)]
422 | 
423 |     if mode == "none":
424 |         return image, window, scale, padding
425 | 
426 |     # Scale?
427 |     if min_dim:
428 |         # Scale up but not down
429 |         scale = max(1, min_dim / min(h, w))
430 |     # Does it exceed max dim?
431 |     if max_dim and mode == "square":
432 |         image_max = max(h, w)
433 |         if round(image_max * scale) > max_dim:
434 |             scale = max_dim / image_max
435 | 
436 |     # Resize image using bilinear interpolation
437 |     if scale != 1:
438 |         image = skimage.transform.resize(
439 |             image, (round(h * scale), round(w * scale)),
440 |             order=1, mode="constant", preserve_range=True)
441 |     # Need padding?
442 |     if mode == "square":
443 |         # Get new height and width
444 |         h, w = image.shape[:2]
445 |         top_pad = (max_dim - h) // 2
446 |         bottom_pad = max_dim - h - top_pad
447 |         left_pad = (max_dim - w) // 2
448 |         right_pad = max_dim - w - left_pad
449 |         padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
450 |         image = np.pad(image, padding, mode='constant', constant_values=0)
451 |         window = (top_pad, left_pad, h + top_pad, w + left_pad)
452 |     elif mode == "pad64":
453 |         h, w = image.shape[:2]
454 |         # Both sides must be divisible by 64
455 |         assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64"
456 |         # Height
457 |         if h % 64 > 0:
458 |             max_h = h - (h % 64) + 64
459 |             top_pad = (max_h - h) // 2
460 |             bottom_pad = max_h - h - top_pad
461 |         else:
462 |             top_pad = bottom_pad = 0
463 |         # Width
464 |         if w % 64 > 0:
465 |             max_w = w - (w % 64) + 64
466 |             left_pad = (max_w - w) // 2
467 |             right_pad = max_w - w - left_pad
468 |         else:
469 |             left_pad = right_pad = 0
470 |         padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
471 |         image = np.pad(image, padding, mode='constant', constant_values=0)
472 |         window = (top_pad, left_pad, h + top_pad, w + left_pad)
473 |     else:
474 |         raise Exception("Mode {} not supported".format(mode))
475 |     return image.astype(image_dtype), window, scale, padding
476 | 
477 | 
478 | def resize_mask(mask, scale, padding):
479 |     """Resizes a mask using the given scale and padding.
480 |     Typically, you get the scale and padding from resize_image() to
481 |     ensure both, the image and the mask, are resized consistently.
482 | 
483 |     scale: mask scaling factor
484 |     padding: Padding to add to the mask in the form
485 |             [(top, bottom), (left, right), (0, 0)]
486 |     """
487 |     # Suppress warning from scipy 0.13.0, the output shape of zoom() is
488 |     # calculated with round() instead of int()
489 |     with warnings.catch_warnings():
490 |         warnings.simplefilter("ignore")
491 |         mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
492 |     mask = np.pad(mask, padding, mode='constant', constant_values=0)
493 |     return mask
494 | 
495 | 
496 | def minimize_mask(bbox, mask, mini_shape):
497 |     """Resize masks to a smaller version to reduce memory load.
498 |     Mini-masks can be resized back to image scale using expand_masks()
499 | 
500 |     See inspect_data.ipynb notebook for more details.
501 |     """
502 |     mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
503 |     for i in range(mask.shape[-1]):
504 |         # Pick slice and cast to bool in case load_mask() returned wrong dtype
505 |         m = mask[:, :, i].astype(bool)
506 |         y1, x1, y2, x2 = bbox[i][:4]
507 |         m = m[y1:y2, x1:x2]
508 |         if m.size == 0:
509 |             raise Exception("Invalid bounding box with area of zero")
510 |         # Resize with bilinear interpolation
511 |         m = skimage.transform.resize(m, mini_shape, order=1, mode="constant")
512 |         mini_mask[:, :, i] = np.around(m).astype(np.bool)
513 |     return mini_mask
514 | 
515 | 
516 | def expand_mask(bbox, mini_mask, image_shape):
517 |     """Resizes mini masks back to image size. Reverses the change
518 |     of minimize_mask().
519 | 
520 |     See inspect_data.ipynb notebook for more details.
521 |     """
522 |     mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
523 |     for i in range(mask.shape[-1]):
524 |         m = mini_mask[:, :, i]
525 |         y1, x1, y2, x2 = bbox[i][:4]
526 |         h = y2 - y1
527 |         w = x2 - x1
528 |         # Resize with bilinear interpolation
529 |         m = skimage.transform.resize(m, (h, w), order=1, mode="constant")
530 |         mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool)
531 |     return mask
532 | 
533 | 
534 | # TODO: Build and use this function to reduce code duplication
535 | def mold_mask(mask, config):
536 |     pass
537 | 
538 | 
539 | def unmold_mask(mask, bbox, image_shape):
540 |     """Converts a mask generated by the neural network to a format similar
541 |     to its original shape.
542 |     mask: [height, width] of type float. A small, typically 28x28 mask.
543 |     bbox: [y1, x1, y2, x2]. The box to fit the mask in.
544 | 
545 |     Returns a binary mask with the same size as the original image.
546 |     """
547 |     threshold = 0.5
548 |     y1, x1, y2, x2 = bbox
549 |     mask = skimage.transform.resize(mask, (y2 - y1, x2 - x1), order=1, mode="constant")
550 |     mask = np.where(mask >= threshold, 1, 0).astype(np.bool)
551 | 
552 |     # Put the mask in the right location.
553 |     full_mask = np.zeros(image_shape[:2], dtype=np.bool)
554 |     full_mask[y1:y2, x1:x2] = mask
555 |     return full_mask
556 | 
557 | 
558 | ############################################################
559 | #  Anchors
560 | ############################################################
561 | 
562 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
563 |     """
564 |     scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
565 |     ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
566 |     shape: [height, width] spatial shape of the feature map over which
567 |             to generate anchors.
568 |     feature_stride: Stride of the feature map relative to the image in pixels.
569 |     anchor_stride: Stride of anchors on the feature map. For example, if the
570 |         value is 2 then generate anchors for every other feature map pixel.
571 |     """
572 |     # Get all combinations of scales and ratios
573 |     scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
574 |     scales = scales.flatten()
575 |     ratios = ratios.flatten()
576 | 
577 |     # Enumerate heights and widths from scales and ratios
578 |     heights = scales / np.sqrt(ratios)
579 |     widths = scales * np.sqrt(ratios)
580 | 
581 |     # Enumerate shifts in feature space
582 |     shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
583 |     shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
584 |     shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
585 | 
586 |     # Enumerate combinations of shifts, widths, and heights
587 |     box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
588 |     box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
589 | 
590 |     # Reshape to get a list of (y, x) and a list of (h, w)
591 |     box_centers = np.stack(
592 |         [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
593 |     box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
594 | 
595 |     # Convert to corner coordinates (y1, x1, y2, x2)
596 |     boxes = np.concatenate([box_centers - 0.5 * box_sizes,
597 |                             box_centers + 0.5 * box_sizes], axis=1)
598 |     return boxes
599 | 
600 | 
601 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
602 |                              anchor_stride):
603 |     """Generate anchors at different levels of a feature pyramid. Each scale
604 |     is associated with a level of the pyramid, but each ratio is used in
605 |     all levels of the pyramid.
606 | 
607 |     Returns:
608 |     anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
609 |         with the same order of the given scales. So, anchors of scale[0] come
610 |         first, then anchors of scale[1], and so on.
611 |     """
612 |     # Anchors
613 |     # [anchor_count, (y1, x1, y2, x2)]
614 |     anchors = []
615 |     for i in range(len(scales)):
616 |         anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
617 |                                         feature_strides[i], anchor_stride))
618 |     return np.concatenate(anchors, axis=0)
619 | 
620 | 
621 | ############################################################
622 | #  Miscellaneous
623 | ############################################################
624 | 
625 | def trim_zeros(x):
626 |     """It's common to have tensors larger than the available data and
627 |     pad with zeros. This function removes rows that are all zeros.
628 | 
629 |     x: [rows, columns].
630 |     """
631 |     assert len(x.shape) == 2
632 |     return x[~np.all(x == 0, axis=1)]
633 | 
634 | 
635 | def compute_matches(gt_boxes, gt_class_ids, gt_masks,
636 |                     pred_boxes, pred_class_ids, pred_scores, pred_masks,
637 |                     iou_threshold=0.5, score_threshold=0.0):
638 |     """Finds matches between prediction and ground truth instances.
639 | 
640 |     Returns:
641 |         gt_match: 1-D array. For each GT box it has the index of the matched
642 |                   predicted box.
643 |         pred_match: 1-D array. For each predicted box, it has the index of
644 |                     the matched ground truth box.
645 |         overlaps: [pred_boxes, gt_boxes] IoU overlaps.
646 |     """
647 |     # Trim zero padding
648 |     # TODO: cleaner to do zero unpadding upstream
649 |     gt_boxes = trim_zeros(gt_boxes)
650 |     gt_masks = gt_masks[..., :gt_boxes.shape[0]]
651 |     pred_boxes = trim_zeros(pred_boxes)
652 |     pred_scores = pred_scores[:pred_boxes.shape[0]]
653 |     # Sort predictions by score from high to low
654 |     indices = np.argsort(pred_scores)[::-1]
655 |     pred_boxes = pred_boxes[indices]
656 |     pred_class_ids = pred_class_ids[indices]
657 |     pred_scores = pred_scores[indices]
658 |     pred_masks = pred_masks[..., indices]
659 | 
660 |     # Compute IoU overlaps [pred_masks, gt_masks]
661 |     overlaps = compute_overlaps_masks(pred_masks, gt_masks)
662 | 
663 |     # Loop through predictions and find matching ground truth boxes
664 |     match_count = 0
665 |     pred_match = -1 * np.ones([pred_boxes.shape[0]])
666 |     gt_match = -1 * np.ones([gt_boxes.shape[0]])
667 |     for i in range(len(pred_boxes)):
668 |         # Find best matching ground truth box
669 |         # 1. Sort matches by score
670 |         sorted_ixs = np.argsort(overlaps[i])[::-1]
671 |         # 2. Remove low scores
672 |         low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0]
673 |         if low_score_idx.size > 0:
674 |             sorted_ixs = sorted_ixs[:low_score_idx[0]]
675 |         # 3. Find the match
676 |         for j in sorted_ixs:
677 |             # If ground truth box is already matched, go to next one
678 |             if gt_match[j] > 0:
679 |                 continue
680 |             # If we reach IoU smaller than the threshold, end the loop
681 |             iou = overlaps[i, j]
682 |             if iou < iou_threshold:
683 |                 break
684 |             # Do we have a match?
685 |             if pred_class_ids[i] == gt_class_ids[j]:
686 |                 match_count += 1
687 |                 gt_match[j] = i
688 |                 pred_match[i] = j
689 |                 break
690 | 
691 |     return gt_match, pred_match, overlaps
692 | 
693 | 
694 | def compute_ap(gt_boxes, gt_class_ids, gt_masks,
695 |                pred_boxes, pred_class_ids, pred_scores, pred_masks,
696 |                iou_threshold=0.5):
697 |     """Compute Average Precision at a set IoU threshold (default 0.5).
698 | 
699 |     Returns:
700 |     mAP: Mean Average Precision
701 |     precisions: List of precisions at different class score thresholds.
702 |     recalls: List of recall values at different class score thresholds.
703 |     overlaps: [pred_boxes, gt_boxes] IoU overlaps.
704 |     """
705 |     # Get matches and overlaps
706 |     gt_match, pred_match, overlaps = compute_matches(
707 |         gt_boxes, gt_class_ids, gt_masks,
708 |         pred_boxes, pred_class_ids, pred_scores, pred_masks,
709 |         iou_threshold)
710 | 
711 |     # Compute precision and recall at each prediction box step
712 |     precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1)
713 |     recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match)
714 | 
715 |     # Pad with start and end values to simplify the math
716 |     precisions = np.concatenate([[0], precisions, [0]])
717 |     recalls = np.concatenate([[0], recalls, [1]])
718 | 
719 |     # Ensure precision values decrease but don't increase. This way, the
720 |     # precision value at each recall threshold is the maximum it can be
721 |     # for all following recall thresholds, as specified by the VOC paper.
722 |     for i in range(len(precisions) - 2, -1, -1):
723 |         precisions[i] = np.maximum(precisions[i], precisions[i + 1])
724 | 
725 |     # Compute mean AP over recall range
726 |     indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
727 |     mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
728 |                  precisions[indices])
729 | 
730 |     return mAP, precisions, recalls, overlaps
731 | 
732 | 
733 | def compute_recall(pred_boxes, gt_boxes, iou):
734 |     """Compute the recall at the given IoU threshold. It's an indication
735 |     of how many GT boxes were found by the given prediction boxes.
736 | 
737 |     pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
738 |     gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
739 |     """
740 |     # Measure overlaps
741 |     overlaps = compute_overlaps(pred_boxes, gt_boxes)
742 |     iou_max = np.max(overlaps, axis=1)
743 |     iou_argmax = np.argmax(overlaps, axis=1)
744 |     positive_ids = np.where(iou_max >= iou)[0]
745 |     matched_gt_boxes = iou_argmax[positive_ids]
746 | 
747 |     recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
748 |     return recall, positive_ids
749 | 
750 | 
751 | # ## Batch Slicing
752 | # Some custom layers support a batch size of 1 only, and require a lot of work
753 | # to support batches greater than 1. This function slices an input tensor
754 | # across the batch dimension and feeds batches of size 1. Effectively,
755 | # an easy way to support batches > 1 quickly with little code modification.
756 | # In the long run, it's more efficient to modify the code to support large
757 | # batches and getting rid of this function. Consider this a temporary solution
758 | def batch_slice(inputs, graph_fn, batch_size, names=None):
759 |     """Splits inputs into slices and feeds each slice to a copy of the given
760 |     computation graph and then combines the results. It allows you to run a
761 |     graph on a batch of inputs even if the graph is written to support one
762 |     instance only.
763 | 
764 |     inputs: list of tensors. All must have the same first dimension length
765 |     graph_fn: A function that returns a TF tensor that's part of a graph.
766 |     batch_size: number of slices to divide the data into.
767 |     names: If provided, assigns names to the resulting tensors.
768 |     """
769 |     if not isinstance(inputs, list):
770 |         inputs = [inputs]
771 | 
772 |     outputs = []
773 |     for i in range(batch_size):
774 |         inputs_slice = [x[i] for x in inputs]
775 |         output_slice = graph_fn(*inputs_slice)
776 |         if not isinstance(output_slice, (tuple, list)):
777 |             output_slice = [output_slice]
778 |         outputs.append(output_slice)
779 |     # Change outputs from a list of slices where each is
780 |     # a list of outputs to a list of outputs and each has
781 |     # a list of slices
782 |     outputs = list(zip(*outputs))
783 | 
784 |     if names is None:
785 |         names = [None] * len(outputs)
786 | 
787 |     result = [tf.stack(o, axis=0, name=n)
788 |               for o, n in zip(outputs, names)]
789 |     if len(result) == 1:
790 |         result = result[0]
791 | 
792 |     return result
793 | 
794 | 
795 | def download_trained_weights(coco_model_path, verbose=1):
796 |     """Download COCO trained weights from Releases.
797 | 
798 |     coco_model_path: local path of COCO trained weights
799 |     """
800 |     if verbose > 0:
801 |         print("Downloading pretrained model to " + coco_model_path + " ...")
802 |     with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
803 |         shutil.copyfileobj(resp, out)
804 |     if verbose > 0:
805 |         print("... done downloading pretrained model!")
806 | 
807 | 
808 | def norm_boxes(boxes, shape):
809 |     """Converts boxes from pixel coordinates to normalized coordinates.
810 |     boxes: [N, (y1, x1, y2, x2)] in pixel coordinates
811 |     shape: [..., (height, width)] in pixels
812 | 
813 |     Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
814 |     coordinates it's inside the box.
815 | 
816 |     Returns:
817 |         [N, (y1, x1, y2, x2)] in normalized coordinates
818 |     """
819 |     h, w = shape
820 |     scale = np.array([h - 1, w - 1, h - 1, w - 1])
821 |     shift = np.array([0, 0, 1, 1])
822 |     return np.divide((boxes - shift), scale).astype(np.float32)
823 | 
824 | 
825 | def denorm_boxes(boxes, shape):
826 |     """Converts boxes from normalized coordinates to pixel coordinates.
827 |     boxes: [N, (y1, x1, y2, x2)] in normalized coordinates
828 |     shape: [..., (height, width)] in pixels
829 | 
830 |     Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
831 |     coordinates it's inside the box.
832 | 
833 |     Returns:
834 |         [N, (y1, x1, y2, x2)] in pixel coordinates
835 |     """
836 |     h, w = shape
837 |     scale = np.array([h - 1, w - 1, h - 1, w - 1])
838 |     shift = np.array([0, 0, 1, 1])
839 |     return np.around(np.multiply(boxes, scale) + shift).astype(np.int32)
840 | 


--------------------------------------------------------------------------------
/mrcnn/visualize.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Display and Visualization Functions.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import os
 11 | import sys
 12 | import logging
 13 | import random
 14 | import itertools
 15 | import colorsys
 16 | 
 17 | import numpy as np
 18 | from skimage.measure import find_contours
 19 | import matplotlib.pyplot as plt
 20 | from matplotlib import patches,  lines
 21 | from matplotlib.patches import Polygon
 22 | import IPython.display
 23 | 
 24 | # Root directory of the project
 25 | ROOT_DIR = os.path.abspath("../")
 26 | 
 27 | # Import Mask RCNN
 28 | sys.path.append(ROOT_DIR)  # To find local version of the library
 29 | from mrcnn import utils
 30 | 
 31 | 
 32 | ############################################################
 33 | #  Visualization
 34 | ############################################################
 35 | 
 36 | def display_images(images, titles=None, cols=4, cmap=None, norm=None,
 37 |                    interpolation=None):
 38 |     """Display the given set of images, optionally with titles.
 39 |     images: list or array of image tensors in HWC format.
 40 |     titles: optional. A list of titles to display with each image.
 41 |     cols: number of images per row
 42 |     cmap: Optional. Color map to use. For example, "Blues".
 43 |     norm: Optional. A Normalize instance to map values to colors.
 44 |     interpolation: Optional. Image interporlation to use for display.
 45 |     """
 46 |     titles = titles if titles is not None else [""] * len(images)
 47 |     rows = len(images) // cols + 1
 48 |     plt.figure(figsize=(14, 14 * rows // cols))
 49 |     i = 1
 50 |     for image, title in zip(images, titles):
 51 |         plt.subplot(rows, cols, i)
 52 |         plt.title(title, fontsize=9)
 53 |         plt.axis('off')
 54 |         plt.imshow(image.astype(np.uint8), cmap=cmap,
 55 |                    norm=norm, interpolation=interpolation)
 56 |         i += 1
 57 |     plt.show()
 58 | 
 59 | 
 60 | def random_colors(N, bright=True):
 61 |     """
 62 |     Generate random colors.
 63 |     To get visually distinct colors, generate them in HSV space then
 64 |     convert to RGB.
 65 |     """
 66 |     brightness = 1.0 if bright else 0.7
 67 |     hsv = [(i / N, 1, brightness) for i in range(N)]
 68 |     colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
 69 |     random.shuffle(colors)
 70 |     return colors
 71 | 
 72 | 
 73 | def apply_mask(image, mask, color, alpha=0.5):
 74 |     """Apply the given mask to the image.
 75 |     """
 76 |     for c in range(3):
 77 |         image[:, :, c] = np.where(mask == 1,
 78 |                                   image[:, :, c] *
 79 |                                   (1 - alpha) + alpha * color[c] * 255,
 80 |                                   image[:, :, c])
 81 |     return image
 82 | 
 83 | 
 84 | def display_instances(image, boxes, masks, class_ids, class_names,
 85 |                       scores=None, title="",
 86 |                       figsize=(16, 16), ax=None):
 87 |     """
 88 |     boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
 89 |     masks: [height, width, num_instances]
 90 |     class_ids: [num_instances]
 91 |     class_names: list of class names of the dataset
 92 |     scores: (optional) confidence scores for each box
 93 |     figsize: (optional) the size of the image.
 94 |     """
 95 |     # Number of instances
 96 |     N = boxes.shape[0]
 97 |     if not N:
 98 |         print("\n*** No instances to display *** \n")
 99 |     else:
100 |         assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
101 | 
102 |     if not ax:
103 |         _, ax = plt.subplots(1, figsize=figsize)
104 | 
105 |     # Generate random colors
106 |     colors = random_colors(N)
107 | 
108 |     # Show area outside image boundaries.
109 |     height, width = image.shape[:2]
110 |     ax.set_ylim(height + 10, -10)
111 |     ax.set_xlim(-10, width + 10)
112 |     ax.axis('off')
113 |     ax.set_title(title)
114 | 
115 |     masked_image = image.astype(np.uint32).copy()
116 |     for i in range(N):
117 |         color = colors[i]
118 | 
119 |         # Bounding box
120 |         if not np.any(boxes[i]):
121 |             # Skip this instance. Has no bbox. Likely lost in image cropping.
122 |             continue
123 |         y1, x1, y2, x2 = boxes[i]
124 |         p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
125 |                               alpha=0.7, linestyle="dashed",
126 |                               edgecolor=color, facecolor='none')
127 |         ax.add_patch(p)
128 | 
129 |         # Label
130 |         class_id = class_ids[i]
131 |         score = scores[i] if scores is not None else None
132 |         label = class_names[class_id]
133 |         x = random.randint(x1, (x1 + x2) // 2)
134 |         caption = "{} {:.3f}".format(label, score) if score else label
135 |         ax.text(x1, y1 + 8, caption,
136 |                 color='w', size=11, backgroundcolor="none")
137 | 
138 |         # Mask
139 |         mask = masks[:, :, i]
140 |         masked_image = apply_mask(masked_image, mask, color)
141 | 
142 |         # Mask Polygon
143 |         # Pad to ensure proper polygons for masks that touch image edges.
144 |         padded_mask = np.zeros(
145 |             (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
146 |         padded_mask[1:-1, 1:-1] = mask
147 |         contours = find_contours(padded_mask, 0.5)
148 |         for verts in contours:
149 |             # Subtract the padding and flip (y, x) to (x, y)
150 |             verts = np.fliplr(verts) - 1
151 |             p = Polygon(verts, facecolor="none", edgecolor=color)
152 |             ax.add_patch(p)
153 |     ax.imshow(masked_image.astype(np.uint8))
154 |     plt.show()
155 |     
156 | 
157 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10):
158 |     """
159 |     anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates.
160 |     proposals: [n, 4] the same anchors but refined to fit objects better.
161 |     """
162 |     masked_image = image.copy()
163 | 
164 |     # Pick random anchors in case there are too many.
165 |     ids = np.arange(rois.shape[0], dtype=np.int32)
166 |     ids = np.random.choice(
167 |         ids, limit, replace=False) if ids.shape[0] > limit else ids
168 | 
169 |     fig, ax = plt.subplots(1, figsize=(12, 12))
170 |     if rois.shape[0] > limit:
171 |         plt.title("Showing {} random ROIs out of {}".format(
172 |             len(ids), rois.shape[0]))
173 |     else:
174 |         plt.title("{} ROIs".format(len(ids)))
175 | 
176 |     # Show area outside image boundaries.
177 |     ax.set_ylim(image.shape[0] + 20, -20)
178 |     ax.set_xlim(-50, image.shape[1] + 20)
179 |     ax.axis('off')
180 | 
181 |     for i, id in enumerate(ids):
182 |         color = np.random.rand(3)
183 |         class_id = class_ids[id]
184 |         # ROI
185 |         y1, x1, y2, x2 = rois[id]
186 |         p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
187 |                               edgecolor=color if class_id else "gray",
188 |                               facecolor='none', linestyle="dashed")
189 |         ax.add_patch(p)
190 |         # Refined ROI
191 |         if class_id:
192 |             ry1, rx1, ry2, rx2 = refined_rois[id]
193 |             p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
194 |                                   edgecolor=color, facecolor='none')
195 |             ax.add_patch(p)
196 |             # Connect the top-left corners of the anchor and proposal for easy visualization
197 |             ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
198 | 
199 |             # Label
200 |             label = class_names[class_id]
201 |             ax.text(rx1, ry1 + 8, "{}".format(label),
202 |                     color='w', size=11, backgroundcolor="none")
203 | 
204 |             # Mask
205 |             m = utils.unmold_mask(mask[id], rois[id]
206 |                                   [:4].astype(np.int32), image.shape)
207 |             masked_image = apply_mask(masked_image, m, color)
208 | 
209 |     ax.imshow(masked_image)
210 | 
211 |     # Print stats
212 |     print("Positive ROIs: ", class_ids[class_ids > 0].shape[0])
213 |     print("Negative ROIs: ", class_ids[class_ids == 0].shape[0])
214 |     print("Positive Ratio: {:.2f}".format(
215 |         class_ids[class_ids > 0].shape[0] / class_ids.shape[0]))
216 | 
217 | 
218 | # TODO: Replace with matplotlib equivalent?
219 | def draw_box(image, box, color):
220 |     """Draw 3-pixel width bounding boxes on the given image array.
221 |     color: list of 3 int values for RGB.
222 |     """
223 |     y1, x1, y2, x2 = box
224 |     image[y1:y1 + 2, x1:x2] = color
225 |     image[y2:y2 + 2, x1:x2] = color
226 |     image[y1:y2, x1:x1 + 2] = color
227 |     image[y1:y2, x2:x2 + 2] = color
228 |     return image
229 | 
230 | 
231 | def display_top_masks(image, mask, class_ids, class_names, limit=4):
232 |     """Display the given image and the top few class masks."""
233 |     to_display = []
234 |     titles = []
235 |     to_display.append(image)
236 |     titles.append("H x W={}x{}".format(image.shape[0], image.shape[1]))
237 |     # Pick top prominent classes in this image
238 |     unique_class_ids = np.unique(class_ids)
239 |     mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]])
240 |                  for i in unique_class_ids]
241 |     top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area),
242 |                                     key=lambda r: r[1], reverse=True) if v[1] > 0]
243 |     # Generate images and titles
244 |     for i in range(limit):
245 |         class_id = top_ids[i] if i < len(top_ids) else -1
246 |         # Pull masks of instances belonging to the same class.
247 |         m = mask[:, :, np.where(class_ids == class_id)[0]]
248 |         m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1)
249 |         to_display.append(m)
250 |         titles.append(class_names[class_id] if class_id != -1 else "-")
251 |     display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r")
252 | 
253 | 
254 | def plot_precision_recall(AP, precisions, recalls):
255 |     """Draw the precision-recall curve.
256 | 
257 |     AP: Average precision at IoU >= 0.5
258 |     precisions: list of precision values
259 |     recalls: list of recall values
260 |     """
261 |     # Plot the Precision-Recall curve
262 |     _, ax = plt.subplots(1)
263 |     ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP))
264 |     ax.set_ylim(0, 1.1)
265 |     ax.set_xlim(0, 1.1)
266 |     _ = ax.plot(recalls, precisions)
267 | 
268 | 
269 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores,
270 |                   overlaps, class_names, threshold=0.5):
271 |     """Draw a grid showing how ground truth objects are classified.
272 |     gt_class_ids: [N] int. Ground truth class IDs
273 |     pred_class_id: [N] int. Predicted class IDs
274 |     pred_scores: [N] float. The probability scores of predicted classes
275 |     overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictins and GT boxes.
276 |     class_names: list of all class names in the dataset
277 |     threshold: Float. The prediction probability required to predict a class
278 |     """
279 |     gt_class_ids = gt_class_ids[gt_class_ids != 0]
280 |     pred_class_ids = pred_class_ids[pred_class_ids != 0]
281 | 
282 |     plt.figure(figsize=(12, 10))
283 |     plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues)
284 |     plt.yticks(np.arange(len(pred_class_ids)),
285 |                ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i])
286 |                 for i, id in enumerate(pred_class_ids)])
287 |     plt.xticks(np.arange(len(gt_class_ids)),
288 |                [class_names[int(id)] for id in gt_class_ids], rotation=90)
289 | 
290 |     thresh = overlaps.max() / 2.
291 |     for i, j in itertools.product(range(overlaps.shape[0]),
292 |                                   range(overlaps.shape[1])):
293 |         text = ""
294 |         if overlaps[i, j] > threshold:
295 |             text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong"
296 |         color = ("white" if overlaps[i, j] > thresh
297 |                  else "black" if overlaps[i, j] > 0
298 |                  else "grey")
299 |         plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text),
300 |                  horizontalalignment="center", verticalalignment="center",
301 |                  fontsize=9, color=color)
302 | 
303 |     plt.tight_layout()
304 |     plt.xlabel("Ground Truth")
305 |     plt.ylabel("Predictions")
306 | 
307 | 
308 | def draw_boxes(image, boxes=None, refined_boxes=None,
309 |                masks=None, captions=None, visibilities=None,
310 |                title="", ax=None):
311 |     """Draw bounding boxes and segmentation masks with differnt
312 |     customizations.
313 | 
314 |     boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates.
315 |     refined_boxes: Like boxes, but draw with solid lines to show
316 |         that they're the result of refining 'boxes'.
317 |     masks: [N, height, width]
318 |     captions: List of N titles to display on each box
319 |     visibilities: (optional) List of values of 0, 1, or 2. Determine how
320 |         prominant each bounding box should be.
321 |     title: An optional title to show over the image
322 |     ax: (optional) Matplotlib axis to draw on.
323 |     """
324 |     # Number of boxes
325 |     assert boxes is not None or refined_boxes is not None
326 |     N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0]
327 | 
328 |     # Matplotlib Axis
329 |     if not ax:
330 |         _, ax = plt.subplots(1, figsize=(12, 12))
331 | 
332 |     # Generate random colors
333 |     colors = random_colors(N)
334 | 
335 |     # Show area outside image boundaries.
336 |     margin = image.shape[0] // 10
337 |     ax.set_ylim(image.shape[0] + margin, -margin)
338 |     ax.set_xlim(-margin, image.shape[1] + margin)
339 |     ax.axis('off')
340 | 
341 |     ax.set_title(title)
342 | 
343 |     masked_image = image.astype(np.uint32).copy()
344 |     for i in range(N):
345 |         # Box visibility
346 |         visibility = visibilities[i] if visibilities is not None else 1
347 |         if visibility == 0:
348 |             color = "gray"
349 |             style = "dotted"
350 |             alpha = 0.5
351 |         elif visibility == 1:
352 |             color = colors[i]
353 |             style = "dotted"
354 |             alpha = 1
355 |         elif visibility == 2:
356 |             color = colors[i]
357 |             style = "solid"
358 |             alpha = 1
359 | 
360 |         # Boxes
361 |         if boxes is not None:
362 |             if not np.any(boxes[i]):
363 |                 # Skip this instance. Has no bbox. Likely lost in cropping.
364 |                 continue
365 |             y1, x1, y2, x2 = boxes[i]
366 |             p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
367 |                                   alpha=alpha, linestyle=style,
368 |                                   edgecolor=color, facecolor='none')
369 |             ax.add_patch(p)
370 | 
371 |         # Refined boxes
372 |         if refined_boxes is not None and visibility > 0:
373 |             ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32)
374 |             p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
375 |                                   edgecolor=color, facecolor='none')
376 |             ax.add_patch(p)
377 |             # Connect the top-left corners of the anchor and proposal
378 |             if boxes is not None:
379 |                 ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
380 | 
381 |         # Captions
382 |         if captions is not None:
383 |             caption = captions[i]
384 |             # If there are refined boxes, display captions on them
385 |             if refined_boxes is not None:
386 |                 y1, x1, y2, x2 = ry1, rx1, ry2, rx2
387 |             x = random.randint(x1, (x1 + x2) // 2)
388 |             ax.text(x1, y1, caption, size=11, verticalalignment='top',
389 |                     color='w', backgroundcolor="none",
390 |                     bbox={'facecolor': color, 'alpha': 0.5,
391 |                           'pad': 2, 'edgecolor': 'none'})
392 | 
393 |         # Masks
394 |         if masks is not None:
395 |             mask = masks[:, :, i]
396 |             masked_image = apply_mask(masked_image, mask, color)
397 |             # Mask Polygon
398 |             # Pad to ensure proper polygons for masks that touch image edges.
399 |             padded_mask = np.zeros(
400 |                 (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
401 |             padded_mask[1:-1, 1:-1] = mask
402 |             contours = find_contours(padded_mask, 0.5)
403 |             for verts in contours:
404 |                 # Subtract the padding and flip (y, x) to (x, y)
405 |                 verts = np.fliplr(verts) - 1
406 |                 p = Polygon(verts, facecolor="none", edgecolor=color)
407 |                 ax.add_patch(p)
408 |     ax.imshow(masked_image.astype(np.uint8))
409 | 
410 | 
411 | def display_table(table):
412 |     """Display values in a table format.
413 |     table: an iterable of rows, and each row is an iterable of values.
414 |     """
415 |     html = ""
416 |     for row in table:
417 |         row_html = ""
418 |         for col in row:
419 |             row_html += "<td>{:40}</td>".format(str(col))
420 |         html += "<tr>" + row_html + "</tr>"
421 |     html = "<table>" + html + "</table>"
422 |     IPython.display.display(IPython.display.HTML(html))
423 | 
424 | 
425 | def display_weight_stats(model):
426 |     """Scans all the weights in the model and returns a list of tuples
427 |     that contain stats about each weight.
428 |     """
429 |     layers = model.get_trainable_layers()
430 |     table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]]
431 |     for l in layers:
432 |         weight_values = l.get_weights()  # list of Numpy arrays
433 |         weight_tensors = l.weights  # list of TF tensors
434 |         for i, w in enumerate(weight_values):
435 |             weight_name = weight_tensors[i].name
436 |             # Detect problematic layers. Exclude biases of conv layers.
437 |             alert = ""
438 |             if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1):
439 |                 alert += "<span style='color:red'>*** dead?</span>"
440 |             if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000:
441 |                 alert += "<span style='color:red'>*** Overflow?</span>"
442 |             # Add row
443 |             table.append([
444 |                 weight_name + alert,
445 |                 str(w.shape),
446 |                 "{:+9.4f}".format(w.min()),
447 |                 "{:+10.4f}".format(w.max()),
448 |                 "{:+9.4f}".format(w.std()),
449 |             ])
450 |     display_table(table)
451 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | scipy
 3 | Pillow
 4 | cython
 5 | matplotlib
 6 | scikit-image
 7 | tensorflow>=1.3.0
 8 | keras>=2.0.8
 9 | opencv-python
10 | h5py
11 | imgaug
12 | IPython[all]
13 | tqdm==4.19.9
14 | jupyter-client==5.2.3
15 | jupyter-core==4.4.0
16 | 


--------------------------------------------------------------------------------
/samples/balloon/README.md:
--------------------------------------------------------------------------------
 1 | # Color Spash Example
 2 | 
 3 | This is an example showing the use of Mask RCNN in a real application.
 4 | We train the model to detect balloons only, and then we use the generated 
 5 | masks to keep balloons in color while changing the rest of the image to
 6 | grayscale. 
 7 | 
 8 | ## Installation
 9 | From the [Releases page](https://github.com/matterport/Mask_RCNN/releases) page:
10 | 1. Download `mask_rcnn_balloon.h5`. Save it in the root directory of the repo (the `mask_rcnn` directory).
11 | 2. Download `balloon_dataset.p3`. Expand it such that it's in the path `mask_rcnn/datasets/balloon/`.
12 | 
13 | ## Apply color splash using the provided weights
14 | Apply splash effect on an image:
15 | 
16 | ```bash
17 | python3 balloon.py splash --weights=/path/to/mask_rcnn/mask_rcnn_balloon.h5 --image=<file name or URL>
18 | ```
19 | 
20 | Apply splash effect on a video. Requires OpenCV 3.2+:
21 | 
22 | ```bash
23 | python3 balloon.py splash --weights=/path/to/mask_rcnn/mask_rcnn_balloon.h5 --video=<file name or URL>
24 | ```
25 | 
26 | 
27 | ## Run Jupyter notebooks
28 | Open the `inspect_balloon_data.ipynb` or `inspect_balloon_model.ipynb` Jupter notebooks. You can use these notebooks to explore the dataset and run through the detection pipelie step by step.
29 | 
30 | ## Train the Balloon model
31 | 
32 | Train a new model starting from pre-trained COCO weights
33 | ```
34 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=coco
35 | ```
36 | 
37 | Resume training a model that you had trained earlier
38 | ```
39 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=last
40 | ```
41 | 
42 | Train a new model starting from ImageNet weights
43 | ```
44 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=imagenet
45 | ```
46 | 
47 | The code in `balloon.py` is set to train for 3K steps (30 epochs of 100 steps each), and using a batch size of 2. 
48 | Update the schedule to fit your needs.
49 | 


--------------------------------------------------------------------------------
/samples/balloon/balloon.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Train on the toy Balloon dataset and implement color splash effect.
  4 | 
  5 | Copyright (c) 2018 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | 
  9 | ------------------------------------------------------------
 10 | 
 11 | Usage: import the module (see Jupyter notebooks for examples), or run from
 12 |        the command line as such:
 13 | 
 14 |     # Train a new model starting from pre-trained COCO weights
 15 |     python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=coco
 16 | 
 17 |     # Resume training a model that you had trained earlier
 18 |     python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=last
 19 | 
 20 |     # Train a new model starting from ImageNet weights
 21 |     python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=imagenet
 22 | 
 23 |     # Apply color splash to an image
 24 |     python3 balloon.py splash --weights=/path/to/weights/file.h5 --image=<URL or path to file>
 25 | 
 26 |     # Apply color splash to video using the last weights you trained
 27 |     python3 balloon.py splash --weights=last --video=<URL or path to file>
 28 | """
 29 | 
 30 | import os
 31 | import sys
 32 | import json
 33 | import datetime
 34 | import numpy as np
 35 | import skimage.draw
 36 | 
 37 | # Root directory of the project
 38 | ROOT_DIR = os.path.abspath("../../")
 39 | 
 40 | # Import Mask RCNN
 41 | sys.path.append(ROOT_DIR)  # To find local version of the library
 42 | from mrcnn.config import Config
 43 | from mrcnn import model as modellib, utils
 44 | 
 45 | # Path to trained weights file
 46 | COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
 47 | 
 48 | # Directory to save logs and model checkpoints, if not provided
 49 | # through the command line argument --logs
 50 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
 51 | 
 52 | ############################################################
 53 | #  Configurations
 54 | ############################################################
 55 | 
 56 | 
 57 | class BalloonConfig(Config):
 58 |     """Configuration for training on the toy  dataset.
 59 |     Derives from the base Config class and overrides some values.
 60 |     """
 61 |     # Give the configuration a recognizable name
 62 |     NAME = "balloon"
 63 | 
 64 |     # We use a GPU with 12GB memory, which can fit two images.
 65 |     # Adjust down if you use a smaller GPU.
 66 |     IMAGES_PER_GPU = 2
 67 | 
 68 |     # Number of classes (including background)
 69 |     NUM_CLASSES = 1 + 1  # Background + baloon
 70 | 
 71 |     # Number of training steps per epoch
 72 |     STEPS_PER_EPOCH = 100
 73 | 
 74 |     # Skip detections with < 90% confidence
 75 |     DETECTION_MIN_CONFIDENCE = 0.9
 76 | 
 77 | 
 78 | ############################################################
 79 | #  Dataset
 80 | ############################################################
 81 | 
 82 | class BalloonDataset(utils.Dataset):
 83 | 
 84 |     def load_balloon(self, dataset_dir, subset):
 85 |         """Load a subset of the Balloon dataset.
 86 |         dataset_dir: Root directory of the dataset.
 87 |         subset: Subset to load: train or val
 88 |         """
 89 |         # Add classes. We have only one class to add.
 90 |         self.add_class("balloon", 1, "balloon")
 91 | 
 92 |         # Train or validation dataset?
 93 |         assert subset in ["train", "val"]
 94 |         dataset_dir = os.path.join(dataset_dir, subset)
 95 | 
 96 |         # Load annotations
 97 |         # VGG Image Annotator saves each image in the form:
 98 |         # { 'filename': '28503151_5b5b7ec140_b.jpg',
 99 |         #   'regions': {
100 |         #       '0': {
101 |         #           'region_attributes': {},
102 |         #           'shape_attributes': {
103 |         #               'all_points_x': [...],
104 |         #               'all_points_y': [...],
105 |         #               'name': 'polygon'}},
106 |         #       ... more regions ...
107 |         #   },
108 |         #   'size': 100202
109 |         # }
110 |         # We mostly care about the x and y coordinates of each region
111 |         annotations = json.load(open(os.path.join(dataset_dir, "via_region_data.json")))
112 |         annotations = list(annotations.values())  # don't need the dict keys
113 | 
114 |         # The VIA tool saves images in the JSON even if they don't have any
115 |         # annotations. Skip unannotated images.
116 |         annotations = [a for a in annotations if a['regions']]
117 | 
118 |         # Add images
119 |         for a in annotations:
120 |             # Get the x, y coordinaets of points of the polygons that make up
121 |             # the outline of each object instance. There are stores in the
122 |             # shape_attributes (see json format above)
123 |             polygons = [r['shape_attributes'] for r in a['regions'].values()]
124 | 
125 |             # load_mask() needs the image size to convert polygons to masks.
126 |             # Unfortunately, VIA doesn't include it in JSON, so we must read
127 |             # the image. This is only managable since the dataset is tiny.
128 |             image_path = os.path.join(dataset_dir, a['filename'])
129 |             image = skimage.io.imread(image_path)
130 |             height, width = image.shape[:2]
131 | 
132 |             self.add_image(
133 |                 "balloon",
134 |                 image_id=a['filename'],  # use file name as a unique image id
135 |                 path=image_path,
136 |                 width=width, height=height,
137 |                 polygons=polygons)
138 | 
139 |     def load_mask(self, image_id):
140 |         """Generate instance masks for an image.
141 |        Returns:
142 |         masks: A bool array of shape [height, width, instance count] with
143 |             one mask per instance.
144 |         class_ids: a 1D array of class IDs of the instance masks.
145 |         """
146 |         # If not a balloon dataset image, delegate to parent class.
147 |         image_info = self.image_info[image_id]
148 |         if image_info["source"] != "balloon":
149 |             return super(self.__class__, self).load_mask(image_id)
150 | 
151 |         # Convert polygons to a bitmap mask of shape
152 |         # [height, width, instance_count]
153 |         info = self.image_info[image_id]
154 |         mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
155 |                         dtype=np.uint8)
156 |         for i, p in enumerate(info["polygons"]):
157 |             # Get indexes of pixels inside the polygon and set them to 1
158 |             rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
159 |             mask[rr, cc, i] = 1
160 | 
161 |         # Return mask, and array of class IDs of each instance. Since we have
162 |         # one class ID only, we return an array of 1s
163 |         return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32)
164 | 
165 |     def image_reference(self, image_id):
166 |         """Return the path of the image."""
167 |         info = self.image_info[image_id]
168 |         if info["source"] == "balloon":
169 |             return info["path"]
170 |         else:
171 |             super(self.__class__, self).image_reference(image_id)
172 | 
173 | 
174 | def train(model):
175 |     """Train the model."""
176 |     # Training dataset.
177 |     dataset_train = BalloonDataset()
178 |     dataset_train.load_balloon(args.dataset, "train")
179 |     dataset_train.prepare()
180 | 
181 |     # Validation dataset
182 |     dataset_val = BalloonDataset()
183 |     dataset_val.load_balloon(args.dataset, "val")
184 |     dataset_val.prepare()
185 | 
186 |     # *** This training schedule is an example. Update to your needs ***
187 |     # Since we're using a very small dataset, and starting from
188 |     # COCO trained weights, we don't need to train too long. Also,
189 |     # no need to train all layers, just the heads should do it.
190 |     print("Training network heads")
191 |     model.train(dataset_train, dataset_val,
192 |                 learning_rate=config.LEARNING_RATE,
193 |                 epochs=30,
194 |                 layers='heads')
195 | 
196 | 
197 | def color_splash(image, mask):
198 |     """Apply color splash effect.
199 |     image: RGB image [height, width, 3]
200 |     mask: instance segmentation mask [height, width, instance count]
201 | 
202 |     Returns result image.
203 |     """
204 |     # Make a grayscale copy of the image. The grayscale copy still
205 |     # has 3 RGB channels, though.
206 |     gray = skimage.color.gray2rgb(skimage.color.rgb2gray(image)) * 255
207 |     # We're treating all instances as one, so collapse the mask into one layer
208 |     mask = (np.sum(mask, -1, keepdims=True) >= 1)
209 |     # Copy color pixels from the original color image where mask is set
210 |     if mask.shape[0] > 0:
211 |         splash = np.where(mask, image, gray).astype(np.uint8)
212 |     else:
213 |         splash = gray
214 |     return splash
215 | 
216 | 
217 | def detect_and_color_splash(model, image_path=None, video_path=None):
218 |     assert image_path or video_path
219 | 
220 |     # Image or video?
221 |     if image_path:
222 |         # Run model detection and generate the color splash effect
223 |         print("Running on {}".format(args.image))
224 |         # Read image
225 |         image = skimage.io.imread(args.image)
226 |         # Detect objects
227 |         r = model.detect([image], verbose=1)[0]
228 |         # Color splash
229 |         splash = color_splash(image, r['masks'])
230 |         # Save output
231 |         file_name = "splash_{:%Y%m%dT%H%M%S}.png".format(datetime.datetime.now())
232 |         skimage.io.imsave(file_name, splash)
233 |     elif video_path:
234 |         import cv2
235 |         # Video capture
236 |         vcapture = cv2.VideoCapture(video_path)
237 |         width = int(vcapture.get(cv2.CAP_PROP_FRAME_WIDTH))
238 |         height = int(vcapture.get(cv2.CAP_PROP_FRAME_HEIGHT))
239 |         fps = vcapture.get(cv2.CAP_PROP_FPS)
240 | 
241 |         # Define codec and create video writer
242 |         file_name = "splash_{:%Y%m%dT%H%M%S}.avi".format(datetime.datetime.now())
243 |         vwriter = cv2.VideoWriter(file_name,
244 |                                   cv2.VideoWriter_fourcc(*'MJPG'),
245 |                                   fps, (width, height))
246 | 
247 |         count = 0
248 |         success = True
249 |         while success:
250 |             print("frame: ", count)
251 |             # Read next image
252 |             success, image = vcapture.read()
253 |             if success:
254 |                 # OpenCV returns images as BGR, convert to RGB
255 |                 image = image[..., ::-1]
256 |                 # Detect objects
257 |                 r = model.detect([image], verbose=0)[0]
258 |                 # Color splash
259 |                 splash = color_splash(image, r['masks'])
260 |                 # RGB -> BGR to save image to video
261 |                 splash = splash[..., ::-1]
262 |                 # Add image to video writer
263 |                 vwriter.write(splash)
264 |                 count += 1
265 |         vwriter.release()
266 |     print("Saved to ", file_name)
267 | 
268 | 
269 | ############################################################
270 | #  Training
271 | ############################################################
272 | 
273 | if __name__ == '__main__':
274 |     import argparse
275 | 
276 |     # Parse command line arguments
277 |     parser = argparse.ArgumentParser(
278 |         description='Train Mask R-CNN to detect balloons.')
279 |     parser.add_argument("command",
280 |                         metavar="<command>",
281 |                         help="'train' or 'splash'")
282 |     parser.add_argument('--dataset', required=False,
283 |                         metavar="/path/to/balloon/dataset/",
284 |                         help='Directory of the Balloon dataset')
285 |     parser.add_argument('--weights', required=True,
286 |                         metavar="/path/to/weights.h5",
287 |                         help="Path to weights .h5 file or 'coco'")
288 |     parser.add_argument('--logs', required=False,
289 |                         default=DEFAULT_LOGS_DIR,
290 |                         metavar="/path/to/logs/",
291 |                         help='Logs and checkpoints directory (default=logs/)')
292 |     parser.add_argument('--image', required=False,
293 |                         metavar="path or URL to image",
294 |                         help='Image to apply the color splash effect on')
295 |     parser.add_argument('--video', required=False,
296 |                         metavar="path or URL to video",
297 |                         help='Video to apply the color splash effect on')
298 |     args = parser.parse_args()
299 | 
300 |     # Validate arguments
301 |     if args.command == "train":
302 |         assert args.dataset, "Argument --dataset is required for training"
303 |     elif args.command == "splash":
304 |         assert args.image or args.video,\
305 |                "Provide --image or --video to apply color splash"
306 | 
307 |     print("Weights: ", args.weights)
308 |     print("Dataset: ", args.dataset)
309 |     print("Logs: ", args.logs)
310 | 
311 |     # Configurations
312 |     if args.command == "train":
313 |         config = BalloonConfig()
314 |     else:
315 |         class InferenceConfig(BalloonConfig):
316 |             # Set batch size to 1 since we'll be running inference on
317 |             # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
318 |             GPU_COUNT = 1
319 |             IMAGES_PER_GPU = 1
320 |         config = InferenceConfig()
321 |     config.display()
322 | 
323 |     # Create model
324 |     if args.command == "train":
325 |         model = modellib.MaskRCNN(mode="training", config=config,
326 |                                   model_dir=args.logs)
327 |     else:
328 |         model = modellib.MaskRCNN(mode="inference", config=config,
329 |                                   model_dir=args.logs)
330 | 
331 |     # Select weights file to load
332 |     if args.weights.lower() == "coco":
333 |         weights_path = COCO_WEIGHTS_PATH
334 |         # Download weights file
335 |         if not os.path.exists(weights_path):
336 |             utils.download_trained_weights(weights_path)
337 |     elif args.weights.lower() == "last":
338 |         # Find last trained weights
339 |         weights_path = model.find_last()[1]
340 |     elif args.weights.lower() == "imagenet":
341 |         # Start from ImageNet trained weights
342 |         weights_path = model.get_imagenet_weights()
343 |     else:
344 |         weights_path = args.weights
345 | 
346 |     # Load weights
347 |     print("Loading weights ", weights_path)
348 |     if args.weights.lower() == "coco":
349 |         # Exclude the last layers because they require a matching
350 |         # number of classes
351 |         model.load_weights(weights_path, by_name=True, exclude=[
352 |             "mrcnn_class_logits", "mrcnn_bbox_fc",
353 |             "mrcnn_bbox", "mrcnn_mask"])
354 |     else:
355 |         model.load_weights(weights_path, by_name=True)
356 | 
357 |     # Train or evaluate
358 |     if args.command == "train":
359 |         train(model)
360 |     elif args.command == "splash":
361 |         detect_and_color_splash(model, image_path=args.image,
362 |                                 video_path=args.video)
363 |     else:
364 |         print("'{}' is not recognized. "
365 |               "Use 'train' or 'splash'".format(args.command))
366 | 


--------------------------------------------------------------------------------
/samples/coco/coco.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Configurations and data loading code for MS COCO.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | 
  9 | ------------------------------------------------------------
 10 | 
 11 | Usage: import the module (see Jupyter notebooks for examples), or run from
 12 |        the command line as such:
 13 | 
 14 |     # Train a new model starting from pre-trained COCO weights
 15 |     python3 coco.py train --dataset=/path/to/coco/ --model=coco
 16 | 
 17 |     # Train a new model starting from ImageNet weights
 18 |     python3 coco.py train --dataset=/path/to/coco/ --model=imagenet
 19 | 
 20 |     # Continue training a model that you had trained earlier
 21 |     python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
 22 | 
 23 |     # Continue training the last model you trained
 24 |     python3 coco.py train --dataset=/path/to/coco/ --model=last
 25 | 
 26 |     # Run COCO evaluatoin on the last model you trained
 27 |     python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
 28 | """
 29 | 
 30 | import os
 31 | import sys
 32 | import time
 33 | import numpy as np
 34 | import imgaug  # https://github.com/aleju/imgaug (pip3 install imageaug)
 35 | 
 36 | # Download and install the Python COCO tools from https://github.com/waleedka/coco
 37 | # That's a fork from the original https://github.com/pdollar/coco with a bug
 38 | # fix for Python 3.
 39 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
 40 | # If the PR is merged then use the original repo.
 41 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3".
 42 | from pycocotools.coco import COCO
 43 | from pycocotools.cocoeval import COCOeval
 44 | from pycocotools import mask as maskUtils
 45 | 
 46 | import zipfile
 47 | import urllib.request
 48 | import shutil
 49 | 
 50 | # Root directory of the project
 51 | ROOT_DIR = os.path.abspath("../../")
 52 | 
 53 | # Import Mask RCNN
 54 | sys.path.append(ROOT_DIR)  # To find local version of the library
 55 | from mrcnn.config import Config
 56 | from mrcnn import model as modellib, utils
 57 | 
 58 | # Path to trained weights file
 59 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
 60 | 
 61 | # Directory to save logs and model checkpoints, if not provided
 62 | # through the command line argument --logs
 63 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
 64 | DEFAULT_DATASET_YEAR = "2014"
 65 | 
 66 | ############################################################
 67 | #  Configurations
 68 | ############################################################
 69 | 
 70 | 
 71 | class CocoConfig(Config):
 72 |     """Configuration for training on MS COCO.
 73 |     Derives from the base Config class and overrides values specific
 74 |     to the COCO dataset.
 75 |     """
 76 |     # Give the configuration a recognizable name
 77 |     NAME = "coco"
 78 | 
 79 |     # We use a GPU with 12GB memory, which can fit two images.
 80 |     # Adjust down if you use a smaller GPU.
 81 |     IMAGES_PER_GPU = 2
 82 | 
 83 |     # Uncomment to train on 8 GPUs (default is 1)
 84 |     # GPU_COUNT = 8
 85 | 
 86 |     # Number of classes (including background)
 87 |     NUM_CLASSES = 1 + 80  # COCO has 80 classes
 88 | 
 89 | 
 90 | ############################################################
 91 | #  Dataset
 92 | ############################################################
 93 | 
 94 | class CocoDataset(utils.Dataset):
 95 |     def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
 96 |                   class_map=None, return_coco=False, auto_download=False):
 97 |         """Load a subset of the COCO dataset.
 98 |         dataset_dir: The root directory of the COCO dataset.
 99 |         subset: What to load (train, val, minival, valminusminival)
100 |         year: What dataset year to load (2014, 2017) as a string, not an integer
101 |         class_ids: If provided, only loads images that have the given classes.
102 |         class_map: TODO: Not implemented yet. Supports maping classes from
103 |             different datasets to the same class ID.
104 |         return_coco: If True, returns the COCO object.
105 |         auto_download: Automatically download and unzip MS-COCO images and annotations
106 |         """
107 | 
108 |         if auto_download is True:
109 |             self.auto_download(dataset_dir, subset, year)
110 | 
111 |         coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
112 |         if subset == "minival" or subset == "valminusminival":
113 |             subset = "val"
114 |         image_dir = "{}/{}{}".format(dataset_dir, subset, year)
115 | 
116 |         # Load all classes or a subset?
117 |         if not class_ids:
118 |             # All classes
119 |             class_ids = sorted(coco.getCatIds())
120 | 
121 |         # All images or a subset?
122 |         if class_ids:
123 |             image_ids = []
124 |             for id in class_ids:
125 |                 image_ids.extend(list(coco.getImgIds(catIds=[id])))
126 |             # Remove duplicates
127 |             image_ids = list(set(image_ids))
128 |         else:
129 |             # All images
130 |             image_ids = list(coco.imgs.keys())
131 | 
132 |         # Add classes
133 |         for i in class_ids:
134 |             self.add_class("coco", i, coco.loadCats(i)[0]["name"])
135 | 
136 |         # Add images
137 |         for i in image_ids:
138 |             self.add_image(
139 |                 "coco", image_id=i,
140 |                 path=os.path.join(image_dir, coco.imgs[i]['file_name']),
141 |                 width=coco.imgs[i]["width"],
142 |                 height=coco.imgs[i]["height"],
143 |                 annotations=coco.loadAnns(coco.getAnnIds(
144 |                     imgIds=[i], catIds=class_ids, iscrowd=None)))
145 |         if return_coco:
146 |             return coco
147 | 
148 |     def auto_download(self, dataDir, dataType, dataYear):
149 |         """Download the COCO dataset/annotations if requested.
150 |         dataDir: The root directory of the COCO dataset.
151 |         dataType: What to load (train, val, minival, valminusminival)
152 |         dataYear: What dataset year to load (2014, 2017) as a string, not an integer
153 |         Note:
154 |             For 2014, use "train", "val", "minival", or "valminusminival"
155 |             For 2017, only "train" and "val" annotations are available
156 |         """
157 | 
158 |         # Setup paths and file names
159 |         if dataType == "minival" or dataType == "valminusminival":
160 |             imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
161 |             imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
162 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
163 |         else:
164 |             imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
165 |             imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
166 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
167 |         # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
168 | 
169 |         # Create main folder if it doesn't exist yet
170 |         if not os.path.exists(dataDir):
171 |             os.makedirs(dataDir)
172 | 
173 |         # Download images if not available locally
174 |         if not os.path.exists(imgDir):
175 |             os.makedirs(imgDir)
176 |             print("Downloading images to " + imgZipFile + " ...")
177 |             with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
178 |                 shutil.copyfileobj(resp, out)
179 |             print("... done downloading.")
180 |             print("Unzipping " + imgZipFile)
181 |             with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
182 |                 zip_ref.extractall(dataDir)
183 |             print("... done unzipping")
184 |         print("Will use images in " + imgDir)
185 | 
186 |         # Setup annotations data paths
187 |         annDir = "{}/annotations".format(dataDir)
188 |         if dataType == "minival":
189 |             annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
190 |             annFile = "{}/instances_minival2014.json".format(annDir)
191 |             annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
192 |             unZipDir = annDir
193 |         elif dataType == "valminusminival":
194 |             annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
195 |             annFile = "{}/instances_valminusminival2014.json".format(annDir)
196 |             annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
197 |             unZipDir = annDir
198 |         else:
199 |             annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
200 |             annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
201 |             annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
202 |             unZipDir = dataDir
203 |         # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
204 | 
205 |         # Download annotations if not available locally
206 |         if not os.path.exists(annDir):
207 |             os.makedirs(annDir)
208 |         if not os.path.exists(annFile):
209 |             if not os.path.exists(annZipFile):
210 |                 print("Downloading zipped annotations to " + annZipFile + " ...")
211 |                 with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
212 |                     shutil.copyfileobj(resp, out)
213 |                 print("... done downloading.")
214 |             print("Unzipping " + annZipFile)
215 |             with zipfile.ZipFile(annZipFile, "r") as zip_ref:
216 |                 zip_ref.extractall(unZipDir)
217 |             print("... done unzipping")
218 |         print("Will use annotations in " + annFile)
219 | 
220 |     def load_mask(self, image_id):
221 |         """Load instance masks for the given image.
222 | 
223 |         Different datasets use different ways to store masks. This
224 |         function converts the different mask format to one format
225 |         in the form of a bitmap [height, width, instances].
226 | 
227 |         Returns:
228 |         masks: A bool array of shape [height, width, instance count] with
229 |             one mask per instance.
230 |         class_ids: a 1D array of class IDs of the instance masks.
231 |         """
232 |         # If not a COCO image, delegate to parent class.
233 |         image_info = self.image_info[image_id]
234 |         if image_info["source"] != "coco":
235 |             return super(CocoDataset, self).load_mask(image_id)
236 | 
237 |         instance_masks = []
238 |         class_ids = []
239 |         annotations = self.image_info[image_id]["annotations"]
240 |         # Build mask of shape [height, width, instance_count] and list
241 |         # of class IDs that correspond to each channel of the mask.
242 |         for annotation in annotations:
243 |             class_id = self.map_source_class_id(
244 |                 "coco.{}".format(annotation['category_id']))
245 |             if class_id:
246 |                 m = self.annToMask(annotation, image_info["height"],
247 |                                    image_info["width"])
248 |                 # Some objects are so small that they're less than 1 pixel area
249 |                 # and end up rounded out. Skip those objects.
250 |                 if m.max() < 1:
251 |                     continue
252 |                 # Is it a crowd? If so, use a negative class ID.
253 |                 if annotation['iscrowd']:
254 |                     # Use negative class ID for crowds
255 |                     class_id *= -1
256 |                     # For crowd masks, annToMask() sometimes returns a mask
257 |                     # smaller than the given dimensions. If so, resize it.
258 |                     if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
259 |                         m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
260 |                 instance_masks.append(m)
261 |                 class_ids.append(class_id)
262 | 
263 |         # Pack instance masks into an array
264 |         if class_ids:
265 |             mask = np.stack(instance_masks, axis=2).astype(np.bool)
266 |             class_ids = np.array(class_ids, dtype=np.int32)
267 |             return mask, class_ids
268 |         else:
269 |             # Call super class to return an empty mask
270 |             return super(CocoDataset, self).load_mask(image_id)
271 | 
272 |     def image_reference(self, image_id):
273 |         """Return a link to the image in the COCO Website."""
274 |         info = self.image_info[image_id]
275 |         if info["source"] == "coco":
276 |             return "http://cocodataset.org/#explore?id={}".format(info["id"])
277 |         else:
278 |             super(CocoDataset, self).image_reference(image_id)
279 | 
280 |     # The following two functions are from pycocotools with a few changes.
281 | 
282 |     def annToRLE(self, ann, height, width):
283 |         """
284 |         Convert annotation which can be polygons, uncompressed RLE to RLE.
285 |         :return: binary mask (numpy 2D array)
286 |         """
287 |         segm = ann['segmentation']
288 |         if isinstance(segm, list):
289 |             # polygon -- a single object might consist of multiple parts
290 |             # we merge all parts into one mask rle code
291 |             rles = maskUtils.frPyObjects(segm, height, width)
292 |             rle = maskUtils.merge(rles)
293 |         elif isinstance(segm['counts'], list):
294 |             # uncompressed RLE
295 |             rle = maskUtils.frPyObjects(segm, height, width)
296 |         else:
297 |             # rle
298 |             rle = ann['segmentation']
299 |         return rle
300 | 
301 |     def annToMask(self, ann, height, width):
302 |         """
303 |         Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
304 |         :return: binary mask (numpy 2D array)
305 |         """
306 |         rle = self.annToRLE(ann, height, width)
307 |         m = maskUtils.decode(rle)
308 |         return m
309 | 
310 | 
311 | ############################################################
312 | #  COCO Evaluation
313 | ############################################################
314 | 
315 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
316 |     """Arrange resutls to match COCO specs in http://cocodataset.org/#format
317 |     """
318 |     # If no results, return an empty list
319 |     if rois is None:
320 |         return []
321 | 
322 |     results = []
323 |     for image_id in image_ids:
324 |         # Loop through detections
325 |         for i in range(rois.shape[0]):
326 |             class_id = class_ids[i]
327 |             score = scores[i]
328 |             bbox = np.around(rois[i], 1)
329 |             mask = masks[:, :, i]
330 | 
331 |             result = {
332 |                 "image_id": image_id,
333 |                 "category_id": dataset.get_source_class_id(class_id, "coco"),
334 |                 "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
335 |                 "score": score,
336 |                 "segmentation": maskUtils.encode(np.asfortranarray(mask))
337 |             }
338 |             results.append(result)
339 |     return results
340 | 
341 | 
342 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
343 |     """Runs official COCO evaluation.
344 |     dataset: A Dataset object with valiadtion data
345 |     eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
346 |     limit: if not 0, it's the number of images to use for evaluation
347 |     """
348 |     # Pick COCO images from the dataset
349 |     image_ids = image_ids or dataset.image_ids
350 | 
351 |     # Limit to a subset
352 |     if limit:
353 |         image_ids = image_ids[:limit]
354 | 
355 |     # Get corresponding COCO image IDs.
356 |     coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
357 | 
358 |     t_prediction = 0
359 |     t_start = time.time()
360 | 
361 |     results = []
362 |     for i, image_id in enumerate(image_ids):
363 |         # Load image
364 |         image = dataset.load_image(image_id)
365 | 
366 |         # Run detection
367 |         t = time.time()
368 |         r = model.detect([image], verbose=0)[0]
369 |         t_prediction += (time.time() - t)
370 | 
371 |         # Convert results to COCO format
372 |         # Cast masks to uint8 because COCO tools errors out on bool
373 |         image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
374 |                                            r["rois"], r["class_ids"],
375 |                                            r["scores"],
376 |                                            r["masks"].astype(np.uint8))
377 |         results.extend(image_results)
378 | 
379 |     # Load results. This modifies results with additional attributes.
380 |     coco_results = coco.loadRes(results)
381 | 
382 |     # Evaluate
383 |     cocoEval = COCOeval(coco, coco_results, eval_type)
384 |     cocoEval.params.imgIds = coco_image_ids
385 |     cocoEval.evaluate()
386 |     cocoEval.accumulate()
387 |     cocoEval.summarize()
388 | 
389 |     print("Prediction time: {}. Average {}/image".format(
390 |         t_prediction, t_prediction / len(image_ids)))
391 |     print("Total time: ", time.time() - t_start)
392 | 
393 | 
394 | ############################################################
395 | #  Training
396 | ############################################################
397 | 
398 | 
399 | if __name__ == '__main__':
400 |     import argparse
401 | 
402 |     # Parse command line arguments
403 |     parser = argparse.ArgumentParser(
404 |         description='Train Mask R-CNN on MS COCO.')
405 |     parser.add_argument("command",
406 |                         metavar="<command>",
407 |                         help="'train' or 'evaluate' on MS COCO")
408 |     parser.add_argument('--dataset', required=True,
409 |                         metavar="/path/to/coco/",
410 |                         help='Directory of the MS-COCO dataset')
411 |     parser.add_argument('--year', required=False,
412 |                         default=DEFAULT_DATASET_YEAR,
413 |                         metavar="<year>",
414 |                         help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
415 |     parser.add_argument('--model', required=True,
416 |                         metavar="/path/to/weights.h5",
417 |                         help="Path to weights .h5 file or 'coco'")
418 |     parser.add_argument('--logs', required=False,
419 |                         default=DEFAULT_LOGS_DIR,
420 |                         metavar="/path/to/logs/",
421 |                         help='Logs and checkpoints directory (default=logs/)')
422 |     parser.add_argument('--limit', required=False,
423 |                         default=500,
424 |                         metavar="<image count>",
425 |                         help='Images to use for evaluation (default=500)')
426 |     parser.add_argument('--download', required=False,
427 |                         default=False,
428 |                         metavar="<True|False>",
429 |                         help='Automatically download and unzip MS-COCO files (default=False)',
430 |                         type=bool)
431 |     args = parser.parse_args()
432 |     print("Command: ", args.command)
433 |     print("Model: ", args.model)
434 |     print("Dataset: ", args.dataset)
435 |     print("Year: ", args.year)
436 |     print("Logs: ", args.logs)
437 |     print("Auto Download: ", args.download)
438 | 
439 |     # Configurations
440 |     if args.command == "train":
441 |         config = CocoConfig()
442 |     else:
443 |         class InferenceConfig(CocoConfig):
444 |             # Set batch size to 1 since we'll be running inference on
445 |             # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
446 |             GPU_COUNT = 1
447 |             IMAGES_PER_GPU = 1
448 |             DETECTION_MIN_CONFIDENCE = 0
449 |         config = InferenceConfig()
450 |     config.display()
451 | 
452 |     # Create model
453 |     if args.command == "train":
454 |         model = modellib.MaskRCNN(mode="training", config=config,
455 |                                   model_dir=args.logs)
456 |     else:
457 |         model = modellib.MaskRCNN(mode="inference", config=config,
458 |                                   model_dir=args.logs)
459 | 
460 |     # Select weights file to load
461 |     if args.model.lower() == "coco":
462 |         model_path = COCO_MODEL_PATH
463 |     elif args.model.lower() == "last":
464 |         # Find last trained weights
465 |         model_path = model.find_last()[1]
466 |     elif args.model.lower() == "imagenet":
467 |         # Start from ImageNet trained weights
468 |         model_path = model.get_imagenet_weights()
469 |     else:
470 |         model_path = args.model
471 | 
472 |     # Load weights
473 |     print("Loading weights ", model_path)
474 |     model.load_weights(model_path, by_name=True)
475 | 
476 |     # Train or evaluate
477 |     if args.command == "train":
478 |         # Training dataset. Use the training set and 35K from the
479 |         # validation set, as as in the Mask RCNN paper.
480 |         dataset_train = CocoDataset()
481 |         dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
482 |         dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
483 |         dataset_train.prepare()
484 | 
485 |         # Validation dataset
486 |         dataset_val = CocoDataset()
487 |         dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download)
488 |         dataset_val.prepare()
489 | 
490 |         # Image Augmentation
491 |         # Right/Left flip 50% of the time
492 |         augmentation = imgaug.augmenters.Fliplr(0.5)
493 | 
494 |         # *** This training schedule is an example. Update to your needs ***
495 | 
496 |         # Training - Stage 1
497 |         print("Training network heads")
498 |         model.train(dataset_train, dataset_val,
499 |                     learning_rate=config.LEARNING_RATE,
500 |                     epochs=40,
501 |                     layers='heads',
502 |                     augmentation=augmentation)
503 | 
504 |         # Training - Stage 2
505 |         # Finetune layers from ResNet stage 4 and up
506 |         print("Fine tune Resnet stage 4 and up")
507 |         model.train(dataset_train, dataset_val,
508 |                     learning_rate=config.LEARNING_RATE,
509 |                     epochs=120,
510 |                     layers='4+',
511 |                     augmentation=augmentation)
512 | 
513 |         # Training - Stage 3
514 |         # Fine tune all layers
515 |         print("Fine tune all layers")
516 |         model.train(dataset_train, dataset_val,
517 |                     learning_rate=config.LEARNING_RATE / 10,
518 |                     epochs=160,
519 |                     layers='all',
520 |                     augmentation=augmentation)
521 | 
522 |     elif args.command == "evaluate":
523 |         # Validation dataset
524 |         dataset_val = CocoDataset()
525 |         coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download)
526 |         dataset_val.prepare()
527 |         print("Running COCO evaluation on {} images.".format(args.limit))
528 |         evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
529 |     else:
530 |         print("'{}' is not recognized. "
531 |               "Use 'train' or 'evaluate'".format(args.command))
532 | 


--------------------------------------------------------------------------------
/samples/shapes/shapes.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Configurations and data loading code for the synthetic Shapes dataset.
  4 | This is a duplicate of the code in the noteobook train_shapes.ipynb for easy
  5 | import into other notebooks, such as inspect_model.ipynb.
  6 | 
  7 | Copyright (c) 2017 Matterport, Inc.
  8 | Licensed under the MIT License (see LICENSE for details)
  9 | Written by Waleed Abdulla
 10 | """
 11 | 
 12 | import os
 13 | import sys
 14 | import math
 15 | import random
 16 | import numpy as np
 17 | import cv2
 18 | 
 19 | # Root directory of the project
 20 | ROOT_DIR = os.path.abspath("../../")
 21 | 
 22 | # Import Mask RCNN
 23 | sys.path.append(ROOT_DIR)  # To find local version of the library
 24 | from mrcnn.config import Config
 25 | from mrcnn import utils
 26 | 
 27 | 
 28 | class ShapesConfig(Config):
 29 |     """Configuration for training on the toy shapes dataset.
 30 |     Derives from the base Config class and overrides values specific
 31 |     to the toy shapes dataset.
 32 |     """
 33 |     # Give the configuration a recognizable name
 34 |     NAME = "shapes"
 35 | 
 36 |     # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
 37 |     # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
 38 |     GPU_COUNT = 1
 39 |     IMAGES_PER_GPU = 8
 40 | 
 41 |     # Number of classes (including background)
 42 |     NUM_CLASSES = 1 + 3  # background + 3 shapes
 43 | 
 44 |     # Use small images for faster training. Set the limits of the small side
 45 |     # the large side, and that determines the image shape.
 46 |     IMAGE_MIN_DIM = 128
 47 |     IMAGE_MAX_DIM = 128
 48 | 
 49 |     # Use smaller anchors because our image and objects are small
 50 |     RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels
 51 | 
 52 |     # Reduce training ROIs per image because the images are small and have
 53 |     # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
 54 |     TRAIN_ROIS_PER_IMAGE = 32
 55 | 
 56 |     # Use a small epoch since the data is simple
 57 |     STEPS_PER_EPOCH = 100
 58 | 
 59 |     # use small validation steps since the epoch is small
 60 |     VALIDATION_STEPS = 5
 61 | 
 62 | 
 63 | class ShapesDataset(utils.Dataset):
 64 |     """Generates the shapes synthetic dataset. The dataset consists of simple
 65 |     shapes (triangles, squares, circles) placed randomly on a blank surface.
 66 |     The images are generated on the fly. No file access required.
 67 |     """
 68 | 
 69 |     def load_shapes(self, count, height, width):
 70 |         """Generate the requested number of synthetic images.
 71 |         count: number of images to generate.
 72 |         height, width: the size of the generated images.
 73 |         """
 74 |         # Add classes
 75 |         self.add_class("shapes", 1, "square")
 76 |         self.add_class("shapes", 2, "circle")
 77 |         self.add_class("shapes", 3, "triangle")
 78 | 
 79 |         # Add images
 80 |         # Generate random specifications of images (i.e. color and
 81 |         # list of shapes sizes and locations). This is more compact than
 82 |         # actual images. Images are generated on the fly in load_image().
 83 |         for i in range(count):
 84 |             bg_color, shapes = self.random_image(height, width)
 85 |             self.add_image("shapes", image_id=i, path=None,
 86 |                            width=width, height=height,
 87 |                            bg_color=bg_color, shapes=shapes)
 88 | 
 89 |     def load_image(self, image_id):
 90 |         """Generate an image from the specs of the given image ID.
 91 |         Typically this function loads the image from a file, but
 92 |         in this case it generates the image on the fly from the
 93 |         specs in image_info.
 94 |         """
 95 |         info = self.image_info[image_id]
 96 |         bg_color = np.array(info['bg_color']).reshape([1, 1, 3])
 97 |         image = np.ones([info['height'], info['width'], 3], dtype=np.uint8)
 98 |         image = image * bg_color.astype(np.uint8)
 99 |         for shape, color, dims in info['shapes']:
100 |             image = self.draw_shape(image, shape, dims, color)
101 |         return image
102 | 
103 |     def image_reference(self, image_id):
104 |         """Return the shapes data of the image."""
105 |         info = self.image_info[image_id]
106 |         if info["source"] == "shapes":
107 |             return info["shapes"]
108 |         else:
109 |             super(self.__class__).image_reference(self, image_id)
110 | 
111 |     def load_mask(self, image_id):
112 |         """Generate instance masks for shapes of the given image ID.
113 |         """
114 |         info = self.image_info[image_id]
115 |         shapes = info['shapes']
116 |         count = len(shapes)
117 |         mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
118 |         for i, (shape, _, dims) in enumerate(info['shapes']):
119 |             mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(),
120 |                                                   shape, dims, 1)
121 |         # Handle occlusions
122 |         occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
123 |         for i in range(count - 2, -1, -1):
124 |             mask[:, :, i] = mask[:, :, i] * occlusion
125 |             occlusion = np.logical_and(
126 |                 occlusion, np.logical_not(mask[:, :, i]))
127 |         # Map class names to class IDs.
128 |         class_ids = np.array([self.class_names.index(s[0]) for s in shapes])
129 |         return mask, class_ids.astype(np.int32)
130 | 
131 |     def draw_shape(self, image, shape, dims, color):
132 |         """Draws a shape from the given specs."""
133 |         # Get the center x, y and the size s
134 |         x, y, s = dims
135 |         if shape == 'square':
136 |             image = cv2.rectangle(image, (x - s, y - s),
137 |                                   (x + s, y + s), color, -1)
138 |         elif shape == "circle":
139 |             image = cv2.circle(image, (x, y), s, color, -1)
140 |         elif shape == "triangle":
141 |             points = np.array([[(x, y - s),
142 |                                 (x - s / math.sin(math.radians(60)), y + s),
143 |                                 (x + s / math.sin(math.radians(60)), y + s),
144 |                                 ]], dtype=np.int32)
145 |             image = cv2.fillPoly(image, points, color)
146 |         return image
147 | 
148 |     def random_shape(self, height, width):
149 |         """Generates specifications of a random shape that lies within
150 |         the given height and width boundaries.
151 |         Returns a tuple of three valus:
152 |         * The shape name (square, circle, ...)
153 |         * Shape color: a tuple of 3 values, RGB.
154 |         * Shape dimensions: A tuple of values that define the shape size
155 |                             and location. Differs per shape type.
156 |         """
157 |         # Shape
158 |         shape = random.choice(["square", "circle", "triangle"])
159 |         # Color
160 |         color = tuple([random.randint(0, 255) for _ in range(3)])
161 |         # Center x, y
162 |         buffer = 20
163 |         y = random.randint(buffer, height - buffer - 1)
164 |         x = random.randint(buffer, width - buffer - 1)
165 |         # Size
166 |         s = random.randint(buffer, height // 4)
167 |         return shape, color, (x, y, s)
168 | 
169 |     def random_image(self, height, width):
170 |         """Creates random specifications of an image with multiple shapes.
171 |         Returns the background color of the image and a list of shape
172 |         specifications that can be used to draw the image.
173 |         """
174 |         # Pick random background color
175 |         bg_color = np.array([random.randint(0, 255) for _ in range(3)])
176 |         # Generate a few random shapes and record their
177 |         # bounding boxes
178 |         shapes = []
179 |         boxes = []
180 |         N = random.randint(1, 4)
181 |         for _ in range(N):
182 |             shape, color, dims = self.random_shape(height, width)
183 |             shapes.append((shape, color, dims))
184 |             x, y, s = dims
185 |             boxes.append([y - s, x - s, y + s, x + s])
186 |         # Apply non-max suppression wit 0.3 threshold to avoid
187 |         # shapes covering each other
188 |         keep_ixs = utils.non_max_suppression(
189 |             np.array(boxes), np.arange(N), 0.3)
190 |         shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
191 |         return bg_color, shapes
192 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | license-file = LICENSE
4 | requirements-file = requirements.txt


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The build/compilations setup
 3 | 
 4 | >> pip install -r requirements.txt
 5 | >> python setup.py install
 6 | """
 7 | import pip
 8 | import logging
 9 | import pkg_resources
10 | try:
11 |     from setuptools import setup
12 | except ImportError:
13 |     from distutils.core import setup
14 | 
15 | 
16 | def _parse_requirements(file_path):
17 |     pip_ver = pkg_resources.get_distribution('pip').version
18 |     pip_version = list(map(int, pip_ver.split('.')[:2]))
19 |     if pip_version >= [6, 0]:
20 |         raw = pip.req.parse_requirements(file_path,
21 |                                          session=pip.download.PipSession())
22 |     else:
23 |         raw = pip.req.parse_requirements(file_path)
24 |     return [str(i.req) for i in raw]
25 | 
26 | 
27 | # parse_requirements() returns generator of pip.req.InstallRequirement objects
28 | try:
29 |     install_reqs = _parse_requirements("requirements.txt")
30 | except Exception:
31 |     logging.warning('Fail load requirements file, so using default ones.')
32 |     install_reqs = []
33 | 
34 | setup(
35 |     name='mask-rcnn',
36 |     version='2.1',
37 |     url='https://github.com/matterport/Mask_RCNN',
38 |     author='Matterport',
39 |     author_email='waleed.abdulla@gmail.com',
40 |     license='MIT',
41 |     description='Mask R-CNN for object detection and instance segmentation',
42 |     packages=["mrcnn"],
43 |     install_requires=install_reqs,
44 |     include_package_data=True,
45 |     python_requires='>=3.4',
46 |     long_description="""This is an implementation of Mask R-CNN on Python 3, Keras, and TensorFlow. 
47 | The model generates bounding boxes and segmentation masks for each instance of an object in the image. 
48 | It's based on Feature Pyramid Network (FPN) and a ResNet101 backbone.""",
49 |     classifiers=[
50 |         "Development Status :: 5 - Production/Stable",
51 |         "Environment :: Console",
52 |         "Intended Audience :: Developers",
53 |         "Intended Audience :: Information Technology",
54 |         "Intended Audience :: Education",
55 |         "Intended Audience :: Science/Research",
56 |         "License :: OSI Approved :: MIT License",
57 |         "Natural Language :: English",
58 |         "Operating System :: OS Independent",
59 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
60 |         "Topic :: Scientific/Engineering :: Image Recognition",
61 |         "Topic :: Scientific/Engineering :: Visualization",
62 |         "Topic :: Scientific/Engineering :: Image Segmentation",
63 |         'Programming Language :: Python :: 3.4',
64 |         'Programming Language :: Python :: 3.5',
65 |         'Programming Language :: Python :: 3.6',
66 |     ],
67 |     keywords="image instance segmentation object detection mask rcnn r-cnn tensorflow keras",
68 | )
69 | 


--------------------------------------------------------------------------------