├── .gitignore
├── Demo.ipynb
├── InstallPyTorchSourceCUDA.ipynb
├── LICENSE
├── README.md
├── Setup.ipynb
├── Train.ipynb
├── build_pytorch1.0_macosx_10.13.sh
├── coco.py
├── config.py
├── demo.py
├── fish_pytorch_style.py
├── images
    ├── find_fish.png
    ├── rec_fish.png
    └── vgg_annotated_fish.jpg
├── lib
    ├── nms
    │   ├── __init__.py
    │   ├── build.py
    │   ├── nms_wrapper.py
    │   ├── pth_nms.py
    │   └── src
    │   │   ├── cuda
    │   │       ├── nms_kernel.cu
    │   │       └── nms_kernel.h
    │   │   ├── nms.c
    │   │   ├── nms.h
    │   │   ├── nms_cuda.c
    │   │   └── nms_cuda.h
    └── roi_align
    │   ├── __init__.py
    │   ├── build.py
    │   ├── crop_and_resize.py
    │   ├── roi_align.py
    │   └── src
    │       ├── crop_and_resize.c
    │       ├── crop_and_resize.h
    │       ├── crop_and_resize_gpu.c
    │       ├── crop_and_resize_gpu.h
    │       └── cuda
    │           ├── crop_and_resize_kernel.cu
    │           └── crop_and_resize_kernel.h
├── model.py
├── requirements.txt
├── utils.py
└── visualize.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | tmp
  2 | tmp_install
  3 | bdist
  4 | pytorch
  5 | *.bkup
  6 | scripts
  7 | coco
  8 | data
  9 | pycocotools
 10 | .DS_Store
 11 | *.h5
 12 | *.pth
 13 | fish_pics/
 14 | logs/
 15 | assets/
 16 | 
 17 | _ext/
 18 | pycocotools/
 19 | cocoapi/
 20 | *.cu.o
 21 | 
 22 | *.exe
 23 | 
 24 | # Byte-compiled / optimized / DLL files
 25 | __pycache__/
 26 | *.py[cod]
 27 | *$py.class
 28 | 
 29 | # C extensions
 30 | *.so
 31 | 
 32 | # Distribution / packaging
 33 | .Python
 34 | env/
 35 | build/
 36 | develop-eggs/
 37 | dist/
 38 | downloads/
 39 | eggs/
 40 | .eggs/
 41 | #lib/
 42 | lib64/
 43 | parts/
 44 | sdist/
 45 | var/
 46 | wheels/
 47 | *.egg-info/
 48 | .installed.cfg
 49 | *.egg
 50 | 
 51 | # PyInstaller
 52 | #  Usually these files are written by a python script from a template
 53 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 54 | *.manifest
 55 | *.spec
 56 | 
 57 | # Installer logs
 58 | pip-log.txt
 59 | pip-delete-this-directory.txt
 60 | 
 61 | # Unit test / coverage reports
 62 | htmlcov/
 63 | .tox/
 64 | .coverage
 65 | .coverage.*
 66 | .cache
 67 | nosetests.xml
 68 | coverage.xml
 69 | *.cover
 70 | .hypothesis/
 71 | 
 72 | # Translations
 73 | *.mo
 74 | *.pot
 75 | 
 76 | # Django stuff:
 77 | *.log
 78 | local_settings.py
 79 | 
 80 | # Flask stuff:
 81 | instance/
 82 | .webassets-cache
 83 | 
 84 | # Scrapy stuff:
 85 | .scrapy
 86 | 
 87 | # Sphinx documentation
 88 | docs/_build/
 89 | 
 90 | # PyBuilder
 91 | target/
 92 | 
 93 | # Jupyter Notebook
 94 | .ipynb_checkpoints
 95 | 
 96 | # pyenv
 97 | .python-version
 98 | 
 99 | # celery beat schedule file
100 | celerybeat-schedule
101 | 
102 | # SageMath parsed files
103 | *.sage.py
104 | 
105 | # dotenv
106 | .env
107 | 
108 | # virtualenv
109 | .venv
110 | venv/
111 | ENV/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | 


--------------------------------------------------------------------------------
/Demo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Demo for PyTorch Mask RCNN\n",
  8 |     "Base Repo:  https://github.com/multimodallearning/pytorch-mask-rcnn\n",
  9 |     "\n",
 10 |     "Sample Repo:  https://github.com/michhar/pytorch-mask-rcnn-samples\n",
 11 |     "\n",
 12 |     "**Note:  Run this notebook inside of the Sample Repo after cloning**"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "markdown",
 17 |    "metadata": {},
 18 |    "source": [
 19 |     "## Prerequisites\n",
 20 |     "\n",
 21 |     "1.  PyTorch (tested with version below) - **note**:  further testing required as PyTorch has updated to 1.0 which is in preview (as of this notebook update) and the code was originally using 0.4.  It's likely the model class and training script will need to be updated as well.  Troubleshooting is happening for the custom extension builds as well. (2018-09-28)"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "## Check Setup"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "import sys\n",
 38 |     "sys.prefix"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "import torch\n",
 48 |     "torch.__version__"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "### Download the Model\n",
 56 |     "\n",
 57 |     "Here:  https://drive.google.com/open?id=1VV6WgX_RNl6a9Yi9-Pe7ZyVKHRJZSKkm\n",
 58 |     "\n",
 59 |     "And upload to the root directory of the repo on this machine (where this notebook lives)."
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "## Setup and Run Inference"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "# Am I running 32 or 64 bit Python\n",
 76 |     "import struct\n",
 77 |     "print(struct.calcsize(\"P\") * 8)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "%load_ext autoreload\n",
 87 |     "%autoreload\n",
 88 |     "import os\n",
 89 |     "import sys\n",
 90 |     "import random\n",
 91 |     "import math\n",
 92 |     "import numpy as np\n",
 93 |     "import skimage.io\n",
 94 |     "import matplotlib\n",
 95 |     "import matplotlib.pyplot as plt\n",
 96 |     "import glob\n",
 97 |     "from io import BytesIO\n",
 98 |     "import requests\n",
 99 |     "from PIL import Image\n",
100 |     "%matplotlib inline\n",
101 |     "\n",
102 |     "import coco\n",
103 |     "# from pycocotools import coco\n",
104 |     "import utils\n",
105 |     "import model as modellib\n",
106 |     "import visualize\n",
107 |     "\n",
108 |     "import torch\n",
109 |     "# import pycocotools\n",
110 |     "\n",
111 |     "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
112 |     "print(device)\n",
113 |     "\n",
114 |     "# Root directory of the project\n",
115 |     "ROOT_DIR = os.getcwd()\n",
116 |     "\n",
117 |     "# Directory to save logs and trained model\n",
118 |     "MODEL_DIR = os.path.join(ROOT_DIR, \"logs\")\n",
119 |     "\n",
120 |     "# Path to trained weights file\n",
121 |     "# Download this file and place in the root of your\n",
122 |     "# project (See README file for details)\n",
123 |     "COCO_MODEL_PATH = os.path.join(ROOT_DIR, \"models\", \"mask_rcnn_coco.pth\")\n",
124 |     "\n",
125 |     "# Directory of images to run detection on\n",
126 |     "IMAGE_DIR = os.path.join(ROOT_DIR, \"images\")\n",
127 |     "\n",
128 |     "class InferenceConfig(coco.CocoConfig):\n",
129 |     "    # Set batch size to 1 since we'll be running inference on\n",
130 |     "    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU\n",
131 |     "    # GPU_COUNT = 0 for CPU\n",
132 |     "    GPU_COUNT = 0\n",
133 |     "    IMAGES_PER_GPU = 1\n",
134 |     "    COCO_MODEL_PATH = os.path.join(ROOT_DIR, \"mask_rcnn_coco.pth\")\n",
135 |     "\n",
136 |     "\n",
137 |     "config = InferenceConfig()\n",
138 |     "config.display()\n",
139 |     "\n",
140 |     "# Create model object.\n",
141 |     "model = modellib.MaskRCNN(model_dir=MODEL_DIR, config=config)\n",
142 |     "model = model.to(device=device)"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "# Load weights trained on MS-COCO\n",
152 |     "print(COCO_MODEL_PATH)\n",
153 |     "model.load_state_dict(torch.load(COCO_MODEL_PATH))"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "# COCO Class names\n",
163 |     "# Index of the class in the list is its ID. For example, to get ID of\n",
164 |     "# the teddy bear class, use: class_names.index('teddy bear')\n",
165 |     "class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',\n",
166 |     "               'bus', 'train', 'truck', 'boat', 'traffic light',\n",
167 |     "               'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',\n",
168 |     "               'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',\n",
169 |     "               'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',\n",
170 |     "               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',\n",
171 |     "               'kite', 'baseball bat', 'baseball glove', 'skateboard',\n",
172 |     "               'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',\n",
173 |     "               'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',\n",
174 |     "               'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',\n",
175 |     "               'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',\n",
176 |     "               'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',\n",
177 |     "               'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',\n",
178 |     "               'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',\n",
179 |     "               'teddy bear', 'hair drier', 'toothbrush']\n",
180 |     "\n",
181 |     "# Load a random image from the images folder\n",
182 |     "# file_names = glob.glob(os.path.join('images', '*.jpg'))\n",
183 |     "# image = skimage.io.imread(os.path.join(random.choice(file_names)))\n",
184 |     "\n",
185 |     "# Or load file from the internet\n",
186 |     "req = requests.get('https://cdn.pixabay.com/photo/2015/06/20/13/55/man-815795__340.jpg')\n",
187 |     "image = np.asarray(Image.open(BytesIO(req.content)))"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {
194 |     "scrolled": false
195 |    },
196 |    "outputs": [],
197 |    "source": [
198 |     "# Run detection\n",
199 |     "results = model.detect([image])"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": [
208 |     "# Visualize results\n",
209 |     "r = results[0]\n",
210 |     "visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],\n",
211 |     "                            class_names, r['scores'])\n",
212 |     "plt.show()"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "markdown",
217 |    "metadata": {},
218 |    "source": [
219 |     "## Run an actual training experiment"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {},
226 |    "outputs": [],
227 |    "source": [
228 |     "# ! python coco.py train --dataset=mask_rcnn_coco.pth --model=imagenet"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "metadata": {},
235 |    "outputs": [],
236 |    "source": [
237 |     "%%writefile demo.py\n",
238 |     "import os\n",
239 |     "import sys\n",
240 |     "import random\n",
241 |     "import math\n",
242 |     "import numpy as np\n",
243 |     "import skimage.io\n",
244 |     "import matplotlib\n",
245 |     "import matplotlib.pyplot as plt\n",
246 |     "import glob\n",
247 |     "from io import BytesIO\n",
248 |     "import requests\n",
249 |     "from PIL import Image\n",
250 |     "\n",
251 |     "# import coco\n",
252 |     "from pycocotools import coco\n",
253 |     "import utils\n",
254 |     "import model as modellib\n",
255 |     "import visualize\n",
256 |     "\n",
257 |     "import torch\n",
258 |     "import pycocotools\n",
259 |     "\n",
260 |     "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
261 |     "print(device)\n",
262 |     "\n",
263 |     "# Root directory of the project\n",
264 |     "ROOT_DIR = os.getcwd()\n",
265 |     "\n",
266 |     "# Directory to save logs and trained model\n",
267 |     "MODEL_DIR = os.path.join(ROOT_DIR, \"logs\")\n",
268 |     "\n",
269 |     "# Path to trained weights file\n",
270 |     "# Download this file and place in the root of your\n",
271 |     "# project (See README file for details)\n",
272 |     "COCO_MODEL_PATH = os.path.join(ROOT_DIR, \"models\", \"mask_rcnn_coco.pth\")\n",
273 |     "\n",
274 |     "# Directory of images to run detection on\n",
275 |     "IMAGE_DIR = os.path.join(ROOT_DIR, \"images\")\n",
276 |     "\n",
277 |     "class InferenceConfig(coco.CocoConfig):\n",
278 |     "    # Set batch size to 1 since we'll be running inference on\n",
279 |     "    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU\n",
280 |     "    # GPU_COUNT = 0 for CPU\n",
281 |     "    GPU_COUNT = 0\n",
282 |     "    IMAGES_PER_GPU = 1\n",
283 |     "    COCO_MODEL_PATH = os.path.join(ROOT_DIR, \"mask_rcnn_coco.pth\")\n",
284 |     "\n",
285 |     "\n",
286 |     "config = InferenceConfig()\n",
287 |     "config.display()\n",
288 |     "\n",
289 |     "# Create model object.\n",
290 |     "model = modellib.MaskRCNN(model_dir=MODEL_DIR, config=config)\n",
291 |     "model = model.to(device=device)\n",
292 |     "\n",
293 |     "# Load weights trained on MS-COCO\n",
294 |     "print(COCO_MODEL_PATH)\n",
295 |     "model.load_state_dict(torch.load(COCO_MODEL_PATH))\n",
296 |     "\n",
297 |     "# COCO Class names\n",
298 |     "# Index of the class in the list is its ID. For example, to get ID of\n",
299 |     "# the teddy bear class, use: class_names.index('teddy bear')\n",
300 |     "class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',\n",
301 |     "               'bus', 'train', 'truck', 'boat', 'traffic light',\n",
302 |     "               'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',\n",
303 |     "               'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',\n",
304 |     "               'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',\n",
305 |     "               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',\n",
306 |     "               'kite', 'baseball bat', 'baseball glove', 'skateboard',\n",
307 |     "               'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',\n",
308 |     "               'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',\n",
309 |     "               'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',\n",
310 |     "               'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',\n",
311 |     "               'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',\n",
312 |     "               'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',\n",
313 |     "               'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',\n",
314 |     "               'teddy bear', 'hair drier', 'toothbrush']\n",
315 |     "\n",
316 |     "# Load a random image from the images folder\n",
317 |     "# file_names = glob.glob(os.path.join('images', '*.jpg'))\n",
318 |     "# image = skimage.io.imread(os.path.join(random.choice(file_names)))\n",
319 |     "\n",
320 |     "# Or load file from the internet\n",
321 |     "req = requests.get('https://cdn.pixabay.com/photo/2015/06/20/13/55/man-815795__340.jpg')\n",
322 |     "image = np.asarray(Image.open(BytesIO(req.content)))\n",
323 |     "\n",
324 |     "# Run detection\n",
325 |     "results = model.detect([image])\n",
326 |     "\n",
327 |     "# Visualize results\n",
328 |     "r = results[0]\n",
329 |     "visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],\n",
330 |     "                            class_names, r['scores'])\n",
331 |     "plt.show()"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": null,
337 |    "metadata": {},
338 |    "outputs": [],
339 |    "source": []
340 |   }
341 |  ],
342 |  "metadata": {
343 |   "kernelspec": {
344 |    "display_name": "Python 3.6 (sys)",
345 |    "language": "python",
346 |    "name": "py36sys"
347 |   },
348 |   "language_info": {
349 |    "codemirror_mode": {
350 |     "name": "ipython",
351 |     "version": 3
352 |    },
353 |    "file_extension": ".py",
354 |    "mimetype": "text/x-python",
355 |    "name": "python",
356 |    "nbconvert_exporter": "python",
357 |    "pygments_lexer": "ipython3",
358 |    "version": "3.6.6"
359 |   },
360 |   "nav_menu": {},
361 |   "toc": {
362 |    "navigate_menu": true,
363 |    "number_sections": true,
364 |    "sideBar": true,
365 |    "threshold": 6,
366 |    "toc_cell": false,
367 |    "toc_section_display": "block",
368 |    "toc_window_display": false
369 |   },
370 |   "toc_position": {
371 |    "height": "635.696px",
372 |    "left": "0px",
373 |    "right": "1488.18px",
374 |    "top": "133.438px",
375 |    "width": "25.4545px"
376 |   }
377 |  },
378 |  "nbformat": 4,
379 |  "nbformat_minor": 2
380 | }
381 | 


--------------------------------------------------------------------------------
/InstallPyTorchSourceCUDA.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Building PyTorch from Source on Local Machine with CUDA Support\n",
  8 |     "Compiling from source to **be able to run PyTorch with GPU support and compile PyTorch extensions with CUDA support**\n",
  9 |     "\n",
 10 |     "Systems tested:\n",
 11 |     "* [TBD system] with CUDA 10.0 and cuDNN 7.3.0 ([CUDA setup](https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html) and [cuDNN setup](https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html)) and Python 3.6 (non-conda install)"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "# What pytorch do we have right now?\n",
 21 |     "import sys\n",
 22 |     "print(sys.prefix)\n",
 23 |     "! {sys.prefix}/bin/pip freeze | grep torch"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "Please have done the following if on macOS (tested with 10.13):\n",
 31 |     "1. Pip installed all packages from `requirements.txt` (`pip install -r requirements.txt`)\n",
 32 |     "2. Make sure using `clang` and `clang++` from Apple through XCode.\n",
 33 |     "3. Homebrew install of `libbind`:  `brew install libbind`\n",
 34 |     "4.  NVIDIA CUDA Toolkit setup ([NVIDIA CUDA Toolkit Docs](https://docs.nvidia.com/cuda/index.html))\n",
 35 |     "  * NEED NVIDIA CUDA >= 9.2 (testing here with 10.0)\n"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "These instructions are based on \n",
 43 |     "* https://github.com/QuantScientist/Deep-Learning-Boot-Camp/blob/master/day02-PyTORCH-and-PyCUDA/PyTorch/build_torch.sh\n",
 44 |     "* http://www.cs.rochester.edu/u/kautz/Installing-Pytorch-Cuda-on-Macbook.html\n",
 45 |     "* https://github.com/pytorch/pytorch/blob/master/.jenkins/pytorch/macos-build.sh\n",
 46 |     "* https://github.com/apache/incubator-mxnet/issues/9217#issuecomment-354982838"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {
 53 |     "scrolled": false
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "! git clone --recursive https://github.com/pytorch/pytorch.git"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {
 64 |     "scrolled": false
 65 |    },
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "! export PATH=/usr/local/cuda/bin:$PATH &&\\\n",
 69 |     "    export LD_LIBRARY_PATH=/usr/local/cuda/lib:$LD_LIBRARY_PATH &&\\\n",
 70 |     "    export CUDA_BIN_PATH=/usr/local/cuda/bin &&\\\n",
 71 |     "    export CUDA_TOOLKIT_ROOT_DIR=/Developer/NVIDIA/CUDA-10.0 &&\\\n",
 72 |     "    export CUDNN_LIB_DIR=/usr/local/cuda/lib &&\\\n",
 73 |     "    export CUDA_HOST_COMPILER=clang &&\\\n",
 74 |     "    export USE_CUDA=1 &&\\\n",
 75 |     "    export USE_NNPACK=1 &&\\\n",
 76 |     "    export CC=clang &&\\\n",
 77 |     "    export CXX=clang++ &&\\\n",
 78 |     "    cd pytorch &&\\\n",
 79 |     "    pip3 uninstall torch --yes &&\\\n",
 80 |     "    python3 setup.py clean &&\\\n",
 81 |     "    TORCH_CUDA_ARCH_LIST=\"5.2\" python3 setup.py install"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "# # To redo above, remove the folder that was git cloned\n",
 91 |     "# ! rm -fr pytorch"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "import torch\n",
101 |     "torch.__version__"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "It might be prudent to build a wheel for this build.  To do so, on the command line or in a notebook cell, in the `pytorch` folder: [wip command]\n",
109 |     "\n",
110 |     "`python3 setup.py clean && PATH=/Developer/NVIDIA/CUDA-10.0/bin:$PATH LD_LIBRARY_PATH=//Developer/NVIDIA/CUDA-10.0/lib:$LD_LIBRARY_PATH CUDA_BIN_PATH=/Developer/NVIDIA/CUDA-10.0/bin CUDA_TOOLKIT_ROOT_DIR=/Developer/NVIDIA/CUDA-10.0 CUDNN_LIB_DIR=//Developer/NVIDIA/CUDA-10.0/lib USE_CUDA=1 CC=clang CXX=clang++ CUDA_HOME=/Developer/NVIDIA/CUDA-10.0/ USE_NNPACK=0 LDFLAGS=\"-Wl,-no_compact_unwind\" python3 setup.py sdist bdist_wheel`\n",
111 |     "\n",
112 |     "Then the .whl file will be in the `bdist` folder."
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "Building on an NVIDIA PX2 Drive/TX2 Jetson (CUDA 9.0, cuDNN 7.0, Python 3.5, Ubuntu 16.04):\n",
120 |     "\n",
121 |     "* Note, need a swap file on PX2 - instructions here https://support.rackspace.com/how-to/create-a-linux-swap-file/ (created 3GB swap file).\n",
122 |     "* This will build a wheel under `dist`"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {
129 |     "scrolled": false
130 |    },
131 |    "outputs": [],
132 |    "source": [
133 |     "! export PATH=/usr/local/cuda/bin:$PATH &&\\\n",
134 |     "    export USE_OPENCV=1 &&\\\n",
135 |     "    export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH &&\\\n",
136 |     "    export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH &&\\\n",
137 |     "    export CUDA_BIN_PATH=/usr/local/cuda/bin &&\\\n",
138 |     "    export CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/ &&\\\n",
139 |     "    export CUDNN_LIB_DIR=/usr/local/cuda/lib64 &&\\\n",
140 |     "    export CUDA_HOST_COMPILER=cc &&\\\n",
141 |     "    export USE_CUDA=1 &&\\\n",
142 |     "    export USE_NNPACK=1 &&\\\n",
143 |     "    export CC=cc &&\\\n",
144 |     "    export CXX=c++ &&\\\n",
145 |     "    cd pytorch &&\\\n",
146 |     "    python3 setup.py clean &&\\\n",
147 |     "    TORCH_CUDA_ARCH_LIST=\"3.5 5.2 6.0 6.1+PTX\" TORCH_NVCC_FLAGS=\"-Xfatbin -compress-all\" python3 setup.py bdist_wheel"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": null,
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": [
156 |     "! pip install <whl file>"
157 |    ]
158 |   }
159 |  ],
160 |  "metadata": {
161 |   "kernelspec": {
162 |    "display_name": "Python 3.6 (sys)",
163 |    "language": "python",
164 |    "name": "py36sys"
165 |   },
166 |   "language_info": {
167 |    "codemirror_mode": {
168 |     "name": "ipython",
169 |     "version": 3
170 |    },
171 |    "file_extension": ".py",
172 |    "mimetype": "text/x-python",
173 |    "name": "python",
174 |    "nbconvert_exporter": "python",
175 |    "pygments_lexer": "ipython3",
176 |    "version": "3.6.6"
177 |   },
178 |   "nav_menu": {},
179 |   "toc": {
180 |    "navigate_menu": true,
181 |    "number_sections": true,
182 |    "sideBar": true,
183 |    "threshold": 6,
184 |    "toc_cell": false,
185 |    "toc_section_display": "block",
186 |    "toc_window_display": false
187 |   }
188 |  },
189 |  "nbformat": 4,
190 |  "nbformat_minor": 2
191 | }
192 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Mask R-CNN
 2 | 
 3 | The MIT License (MIT)
 4 | 
 5 | Copyright (c) 2017 Matterport, Inc.
 6 | Copyright (c) 2018 Micheleen Harris
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this software and associated documentation files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # A Sample Inspired by a PyTorch Port of MaskRCNN for Instance Segmentation
 2 | 
 3 | Inspired by:  https://github.com/multimodallearning/pytorch-mask-rcnn
 4 | 
 5 | <img src="images/find_fish.png" width=300px><img src="images/rec_fish.png" width=300px>
 6 | 
 7 | **NOTES**:
 8 | * This project is working with _PyTorch 0.4.1_.  If you'd like to help update this, please feel free to fork and create a PR.
 9 | * There are two C-extensions that require the NVIDIA compiler and CUDA support.
10 | 
11 | ## Setup
12 | 
13 | System Tested (Linux and NVIDIA GPU required with CUDA/cuDNN):
14 | * CUDA 9.0
15 | * NVIDIA Tesla K80
16 | * Ubuntu 16.04
17 | 
18 | These sets of Jupyter notebooks may also be run in a Docker container running on CUDA-capable GPU hardware.
19 | 
20 | ## Base Model
21 | 
22 | * Download the COCO model (base for transfer learning) from [google drive](https://drive.google.com/drive/folders/1LXUgC2IZUYNEoXr05tdqyKFZY0pZyPDc).  You could also choose to start with the ImageNet model.
23 | 
24 | ## Setup and Demo
25 | 
26 | Work through the notebooks:
27 | 
28 | * Setup.ipynb - install the PyTorch extensions and grab a few other tools
29 | * Demo.ipynb - to test setup and perform inference with a base model
30 | 
31 | ### Collect and Label Custom Data
32 | 
33 | 1. Choose images with your object(s) of interest
34 | 2. Label with the VGG Image Annotator tool (http://www.robots.ox.ac.uk/~vgg/software/via/)
35 | 
36 | ![VGG annotated fish pic](images/vgg_annotated_fish.jpg)
37 | <br><p align="right">Adult Schoolmaster Snappers (Lutjanus apodus); Source: Florent Charpin, http://reefguide.org/pixhtml/schoolmaster2.html</p><br>
38 | 
39 | 
40 | ### Train
41 | 
42 | * Train.ipynb - train on custom-labeled data, supported by a custom PyTorch `DataSet` class (`fish_pytorch_style.py`)
43 | 
44 | ## Wish to Build PyTorch for Your System?
45 | 
46 | If you wish to build PyTorch latest or from a commit, follow one of the two notebooks:
47 | 
48 | * InstallPyTorchSourceCUDA.ipynb - build from source with CUDA support
49 | 
50 | 
51 | ## Additional Information and Credits
52 | 
53 | * See the original repo for more information beyond the two notebooks.  https://github.com/multimodallearning/pytorch-mask-rcnn
54 | * See this original blog post for some of the concepts and processes behind the PyTorch and, subsequently, this repo (the TensorFlow version): https://engineering.matterport.com/splash-of-color-instance-segmentation-with-mask-r-cnn-and-tensorflow-7c761e238b46
55 | * Related project - https://github.com/svanbodegraven/Machine-Learning-Containers
56 | * Related project - https://github.com/Azadehkhojandi/computer-vision-fish-frame-proposal
57 | * Merging VGG annotations - https://github.com/Azadehkhojandi/VGG-Image-Annotator-Json-Merger
58 | 
59 | > TIP:  You can run this project inside a Docker image such as the `rheartpython/cvdeep` public image that has many Deep Learning frameworks preinstalled.  (more info at https://github.com/michhar/custom-jupyterhub-linux-vm)
60 | 


--------------------------------------------------------------------------------
/Setup.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Setup for PyTorch Mask-RCNN"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Installs\n",
 15 |     "\n",
 16 |     "System (Linux and NVIDIA GPU required with CUDA/cuDNN):\n",
 17 |     "* CUDA 9.0\n",
 18 |     "* NVIDIA Tesla K80\n",
 19 |     "* Ubuntu 16.04\n",
 20 |     "\n",
 21 |     "\n",
 22 |     "The Python used (tested with Anaconda 3.5, 3.6, and 3.7):\n",
 23 |     "* DSVM:  `/anaconda/envs/py35/bin/python` or `/anaconda/envs/pytorch041/bin/python` if available\n",
 24 |     "* macOS:  `/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/`\n"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 3,
 30 |    "metadata": {},
 31 |    "outputs": [
 32 |     {
 33 |      "name": "stdout",
 34 |      "output_type": "stream",
 35 |      "text": [
 36 |       "Sun May 12 03:17:16 2019       \r\n",
 37 |       "+-----------------------------------------------------------------------------+\r\n",
 38 |       "| NVIDIA-SMI 396.44                 Driver Version: 396.44                    |\r\n",
 39 |       "|-------------------------------+----------------------+----------------------+\r\n",
 40 |       "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\r\n",
 41 |       "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\r\n",
 42 |       "|===============================+======================+======================|\r\n",
 43 |       "|   0  Tesla K80           On   | 00004BBC:00:00.0 Off |                    0 |\r\n",
 44 |       "| N/A   34C    P8    34W / 149W |      0MiB / 11441MiB |      0%      Default |\r\n",
 45 |       "+-------------------------------+----------------------+----------------------+\r\n",
 46 |       "                                                                               \r\n",
 47 |       "+-----------------------------------------------------------------------------+\r\n",
 48 |       "| Processes:                                                       GPU Memory |\r\n",
 49 |       "|  GPU       PID   Type   Process name                             Usage      |\r\n",
 50 |       "|=============================================================================|\r\n",
 51 |       "|  No running processes found                                                 |\r\n",
 52 |       "+-----------------------------------------------------------------------------+\r\n"
 53 |      ]
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "! nvidia-smi"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 4,
 63 |    "metadata": {},
 64 |    "outputs": [
 65 |     {
 66 |      "data": {
 67 |       "text/plain": [
 68 |        "'/anaconda/envs/pytorch041/bin/python'"
 69 |       ]
 70 |      },
 71 |      "execution_count": 4,
 72 |      "metadata": {},
 73 |      "output_type": "execute_result"
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "# What Python\n",
 78 |     "import sys\n",
 79 |     "sys.executable"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {
 86 |     "scrolled": false
 87 |    },
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "# Install libs from requirements.txt library\n",
 91 |     "! sudo LC_ALL=C {sys.executable} -m pip install -r requirements.txt"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 5,
 97 |    "metadata": {},
 98 |    "outputs": [
 99 |     {
100 |      "data": {
101 |       "text/plain": [
102 |        "'0.4.1.post2'"
103 |       ]
104 |      },
105 |      "execution_count": 5,
106 |      "metadata": {},
107 |      "output_type": "execute_result"
108 |     }
109 |    ],
110 |    "source": [
111 |     "# What PyTorch\n",
112 |     "import torch\n",
113 |     "torch.__version__"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "## Building the extensions\n",
121 |     "\n",
122 |     "Could also do:\n",
123 |     "\n",
124 |     "```\n",
125 |     "! export PATH=/usr/local/cuda/bin:$PATH &&\\\n",
126 |     "    export USE_OPENCV=1 &&\\\n",
127 |     "    export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH &&\\\n",
128 |     "    export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH &&\\\n",
129 |     "    export CUDA_BIN_PATH=/usr/local/cuda/bin &&\\\n",
130 |     "    export CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/ &&\\\n",
131 |     "    export CUDNN_LIB_DIR=/usr/local/cuda/lib64 &&\\\n",
132 |     "    export CUDA_HOST_COMPILER=cc &&\\\n",
133 |     "    export USE_CUDA=1 &&\\\n",
134 |     "    export USE_NNPACK=1 &&\\\n",
135 |     "    cd nms/src/cuda/ && nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC &&\\\n",
136 |     "    cd ../../ &&\\\n",
137 |     "    TORCH_CUDA_ARCH_LIST=\"3.5 5.2 6.0 6.1+PTX\" TORCH_NVCC_FLAGS=\"-Xfatbin -compress-all\" &&\\\n",
138 |     "    CC=cc CXX=cc++ python3 build.py\n",
139 |     "```"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "**NOTE**: Update the path to Python executable below before running the following bash script"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 16,
152 |    "metadata": {
153 |     "scrolled": false
154 |    },
155 |    "outputs": [
156 |     {
157 |      "name": "stdout",
158 |      "output_type": "stream",
159 |      "text": [
160 |       "Compiling crop_and_resize kernels by nvcc...\n",
161 |       "Including CUDA code.\n",
162 |       "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align\n",
163 |       "generating /tmp/tmp7w45x_xn/_crop_and_resize.c\n",
164 |       "setting the current directory to '/tmp/tmp7w45x_xn'\n",
165 |       "running build_ext\n",
166 |       "building '_crop_and_resize' extension\n",
167 |       "creating data\n",
168 |       "creating data/home\n",
169 |       "creating data/home/wonderwoman\n",
170 |       "creating data/home/wonderwoman/notebooks\n",
171 |       "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples\n",
172 |       "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib\n",
173 |       "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align\n",
174 |       "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src\n",
175 |       "gcc -pthread -B /data/anaconda/envs/pytorch041/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/TH -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/THC -I/usr/local/cuda/include -I/anaconda/envs/pytorch041/include/python3.7m -c _crop_and_resize.c -o ./_crop_and_resize.o -std=c99 -std=c99\n",
176 |       "gcc -pthread -B /data/anaconda/envs/pytorch041/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/TH -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/THC -I/usr/local/cuda/include -I/anaconda/envs/pytorch041/include/python3.7m -c /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize.c -o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize.o -std=c99 -std=c99\n",
177 |       "gcc -pthread -B /data/anaconda/envs/pytorch041/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/TH -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/THC -I/usr/local/cuda/include -I/anaconda/envs/pytorch041/include/python3.7m -c /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize_gpu.c -o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize_gpu.o -std=c99 -std=c99\n",
178 |       "gcc -pthread -shared -B /data/anaconda/envs/pytorch041/compiler_compat -L/data/anaconda/envs/pytorch041/lib -Wl,-rpath=/data/anaconda/envs/pytorch041/lib -Wl,--no-as-needed -Wl,--sysroot=/ ./_crop_and_resize.o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize.o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize_gpu.o /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/cuda/crop_and_resize_kernel.cu.o -o ./_crop_and_resize.so\n",
179 |       "Compiling nms kernels by nvcc...\n",
180 |       "Including CUDA code.\n",
181 |       "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms\n",
182 |       "generating /tmp/tmp1t_4ld5y/_nms.c\n",
183 |       "setting the current directory to '/tmp/tmp1t_4ld5y'\n",
184 |       "running build_ext\n",
185 |       "building '_nms' extension\n",
186 |       "creating data\n",
187 |       "creating data/home\n",
188 |       "creating data/home/wonderwoman\n",
189 |       "creating data/home/wonderwoman/notebooks\n",
190 |       "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples\n",
191 |       "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib\n",
192 |       "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms\n",
193 |       "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src\n",
194 |       "gcc -pthread -B /data/anaconda/envs/pytorch041/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/TH -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/THC -I/usr/local/cuda/include -I/anaconda/envs/pytorch041/include/python3.7m -c _nms.c -o ./_nms.o -std=c99\n",
195 |       "gcc -pthread -B /data/anaconda/envs/pytorch041/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/TH -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/THC -I/usr/local/cuda/include -I/anaconda/envs/pytorch041/include/python3.7m -c /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms.c -o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms.o -std=c99\n",
196 |       "gcc -pthread -B /data/anaconda/envs/pytorch041/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/TH -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/THC -I/usr/local/cuda/include -I/anaconda/envs/pytorch041/include/python3.7m -c /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.c -o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.o -std=c99\n",
197 |       "gcc -pthread -shared -B /data/anaconda/envs/pytorch041/compiler_compat -L/data/anaconda/envs/pytorch041/lib -Wl,-rpath=/data/anaconda/envs/pytorch041/lib -Wl,--no-as-needed -Wl,--sysroot=/ ./_nms.o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms.o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.o /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/cuda/nms_kernel.cu.o -o ./_nms.so\n"
198 |      ]
199 |     },
200 |     {
201 |      "name": "stderr",
202 |      "output_type": "stream",
203 |      "text": [
204 |       "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize.c: In function ‘CropAndResizePerBox’:\n",
205 |       "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize.c:30:0: warning: ignoring #pragma omp parallel [-Wunknown-pragmas]\n",
206 |       "     #pragma omp parallel for\n",
207 |       " ^\n",
208 |       "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.c: In function ‘gpu_nms’:\n",
209 |       "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.c:29:35: warning: initialization from incompatible pointer type [-Wincompatible-pointer-types]\n",
210 |       "   unsigned long long* mask_flat = THCudaLongTensor_data(state, mask);\n",
211 |       "                                   ^\n",
212 |       "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.c:37:40: warning: initialization from incompatible pointer type [-Wincompatible-pointer-types]\n",
213 |       "   unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu);\n",
214 |       "                                        ^\n",
215 |       "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.c:40:39: warning: initialization from incompatible pointer type [-Wincompatible-pointer-types]\n",
216 |       "   unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu);\n",
217 |       "                                       ^\n",
218 |       "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.c:23:7: warning: unused variable ‘boxes_dim’ [-Wunused-variable]\n",
219 |       "   int boxes_dim = THCudaTensor_size(state, boxes, 1);\n",
220 |       "       ^\n"
221 |      ]
222 |     }
223 |    ],
224 |    "source": [
225 |     "%%bash\n",
226 |     "\n",
227 |     "export PATH=/anaconda/envs/pytorch041/bin/:$PATH\n",
228 |     "\n",
229 |     "CUDA_ARCH=\"-gencode arch=compute_30,code=sm_30 \\\n",
230 |     "           -gencode arch=compute_35,code=sm_35 \\\n",
231 |     "           -gencode arch=compute_50,code=sm_50 \\\n",
232 |     "           -gencode arch=compute_52,code=sm_52 \\\n",
233 |     "           -gencode arch=compute_60,code=sm_60 \\\n",
234 |     "           -gencode arch=compute_61,code=sm_61 \\\n",
235 |     "\t   -gencode arch=compute_70,code=sm_70 \"\n",
236 |     "\n",
237 |     "\n",
238 |     "# Build RoIAlign\n",
239 |     "cd lib/roi_align/src/cuda\n",
240 |     "echo 'Compiling crop_and_resize kernels by nvcc...'\n",
241 |     "/usr/local/cuda/bin/nvcc -c -o crop_and_resize_kernel.cu.o crop_and_resize_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH\n",
242 |     "cd ../../\n",
243 |     "python build.py\n",
244 |     "cd ../..\n",
245 |     "\n",
246 |     "# Build NMS\n",
247 |     "cd lib/nms/src/cuda\n",
248 |     "echo \"Compiling nms kernels by nvcc...\"\n",
249 |     "/usr/local/cuda/bin/nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH\n",
250 |     "cd ../../\n",
251 |     "python build.py\n",
252 |     "cd ../.."
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "markdown",
257 |    "metadata": {},
258 |    "source": [
259 |     "## `pycocotools` and `coco` libraries"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "markdown",
264 |    "metadata": {},
265 |    "source": [
266 |     "**NOTE**: Update the path to Python executable below before running the following bash script"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": 20,
272 |    "metadata": {},
273 |    "outputs": [
274 |     {
275 |      "name": "stdout",
276 |      "output_type": "stream",
277 |      "text": [
278 |       "Compiling pycocotools/_mask.pyx because it changed.\n",
279 |       "[1/1] Cythonizing pycocotools/_mask.pyx\n",
280 |       "running build_ext\n",
281 |       "building 'pycocotools._mask' extension\n",
282 |       "creating build\n",
283 |       "creating build/temp.linux-x86_64-3.7\n",
284 |       "creating build/temp.linux-x86_64-3.7/pycocotools\n",
285 |       "creating build/common\n",
286 |       "cc -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/numpy/core/include -I../common -I/anaconda/envs/pytorch041/include/python3.7m -c pycocotools/_mask.c -o build/temp.linux-x86_64-3.7/pycocotools/_mask.o -Wno-cpp -Wno-unused-function -std=c99\n",
287 |       "cc -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/numpy/core/include -I../common -I/anaconda/envs/pytorch041/include/python3.7m -c ../common/maskApi.c -o build/temp.linux-x86_64-3.7/../common/maskApi.o -Wno-cpp -Wno-unused-function -std=c99\n",
288 |       "gcc -pthread -shared -B /data/anaconda/envs/pytorch041/compiler_compat -L/data/anaconda/envs/pytorch041/lib -Wl,-rpath=/data/anaconda/envs/pytorch041/lib -Wl,--no-as-needed -Wl,--sysroot=/ build/temp.linux-x86_64-3.7/pycocotools/_mask.o build/temp.linux-x86_64-3.7/../common/maskApi.o -o /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/coco/PythonAPI/pycocotools/_mask.cpython-37m-x86_64-linux-gnu.so\n"
289 |      ]
290 |     },
291 |     {
292 |      "name": "stderr",
293 |      "output_type": "stream",
294 |      "text": [
295 |       "Cloning into 'coco'...\n",
296 |       "/anaconda/envs/pytorch041/lib/python3.7/site-packages/Cython/Compiler/Main.py:367: FutureWarning: Cython directive 'language_level' not set, using 2 for now (Py2). This will change in a later release! File: /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/coco/PythonAPI/pycocotools/_mask.pyx\n",
297 |       "  tree = Parsing.p_module(s, pxd, full_module_name)\n"
298 |      ]
299 |     }
300 |    ],
301 |    "source": [
302 |     "%%bash\n",
303 |     "\n",
304 |     "export PATH=/anaconda/envs/pytorch041/bin/:$PATH\n",
305 |     "\n",
306 |     "rm -fr coco\n",
307 |     "rm -fr cocoapi\n",
308 |     "rm -fr pycocotools\n",
309 |     "\n",
310 |     "export CC=cc\n",
311 |     "export CXX=cc++\n",
312 |     "\n",
313 |     "git clone https://github.com/waleedka/coco.git\n",
314 |     "# git clone https://github.com/cocodataset/cocoapi.git &&\\\n",
315 |     "cd coco/PythonAPI/ && python setup.py build_ext --inplace\n",
316 |     "cd ../..\n",
317 |     "ln -s coco/PythonAPI/pycocotools/ pycocotools"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "code",
322 |    "execution_count": 21,
323 |    "metadata": {},
324 |    "outputs": [
325 |     {
326 |      "name": "stdout",
327 |      "output_type": "stream",
328 |      "text": [
329 |       "build_pytorch1.0_macosx_10.13.sh  lib\r\n",
330 |       "coco\t\t\t\t  LICENSE\r\n",
331 |       "coco.py\t\t\t\t  model.py\r\n",
332 |       "config.py\t\t\t  __pycache__\r\n",
333 |       "data\t\t\t\t  pycocotools\r\n",
334 |       "Demo.ipynb\t\t\t  README.md\r\n",
335 |       "demo.py\t\t\t\t  requirements.txt\r\n",
336 |       "fish_maskrcnn.zip\t\t  Setup.ipynb\r\n",
337 |       "fish_pytorch_style.py\t\t  Train.ipynb\r\n",
338 |       "images\t\t\t\t  utils.py\r\n",
339 |       "InstallPyTorchSourceCPU.ipynb\t  visualize.py\r\n",
340 |       "InstallPyTorchSourceCUDA.ipynb\r\n"
341 |      ]
342 |     }
343 |    ],
344 |    "source": [
345 |     "! ls"
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "markdown",
350 |    "metadata": {},
351 |    "source": [
352 |     "Note, here, for macOS, using Xcode 9.4.1 command line tools"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "code",
357 |    "execution_count": null,
358 |    "metadata": {},
359 |    "outputs": [],
360 |    "source": [
361 |     "# # MacOS, see https://pytorch.org/ for other distributions\n",
362 |     "# ! pip install torch_nightly -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html"
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "markdown",
367 |    "metadata": {},
368 |    "source": [
369 |     "More relevant resolved GitHub issues:\n",
370 |     "* https://github.com/uber/horovod/issues/274#issuecomment-390813445\n",
371 |     "* https://github.com/pytorch/pytorch/issues/10234"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": null,
377 |    "metadata": {},
378 |    "outputs": [],
379 |    "source": []
380 |   }
381 |  ],
382 |  "metadata": {
383 |   "kernelspec": {
384 |    "display_name": "Python 3.5 - PyTorch 0.4.1",
385 |    "language": "python",
386 |    "name": "pytorch_041"
387 |   },
388 |   "language_info": {
389 |    "codemirror_mode": {
390 |     "name": "ipython",
391 |     "version": 3
392 |    },
393 |    "file_extension": ".py",
394 |    "mimetype": "text/x-python",
395 |    "name": "python",
396 |    "nbconvert_exporter": "python",
397 |    "pygments_lexer": "ipython3",
398 |    "version": "3.7.2"
399 |   },
400 |   "nav_menu": {},
401 |   "toc": {
402 |    "navigate_menu": true,
403 |    "number_sections": true,
404 |    "sideBar": true,
405 |    "threshold": 6,
406 |    "toc_cell": false,
407 |    "toc_section_display": "block",
408 |    "toc_window_display": false
409 |   }
410 |  },
411 |  "nbformat": 4,
412 |  "nbformat_minor": 2
413 | }
414 | 


--------------------------------------------------------------------------------
/build_pytorch1.0_macosx_10.13.sh:
--------------------------------------------------------------------------------
 1 | git clone --recursive https://github.com/pytorch/pytorch.git
 2 | pip3 uninstall torch --yes
 3 | 
 4 | cd pytorch && \
 5 |     git checkout -b 8619230 &&\
 6 |     USE_OPENCV=1 \
 7 |     CMAKE_PREFIX_PATH="/Library/Frameworks/Python.framework/Versions/3.6/bin/" \
 8 |     LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH \
 9 |     LD_LIBRARY_PATH=/usr/local/cuda/lib:$LD_LIBRARY_PATH \
10 |     CUDA_BIN_PATH=/usr/local/cuda/bin \
11 |     CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/ \
12 |     CUDNN_LIB_DIR=/usr/local/cuda/lib \
13 |     CUDA_HOST_COMPILER=/usr/local/opt/llvm/bin/clang \
14 |     USE_CUDA=0 \
15 |     USE_NNPACK=0 \
16 |     CC=/usr/local/opt/llvm/bin/clang \
17 |     CXX=/usr/local/opt/llvm/bin/clang++ \
18 | #    BUILD_TORCH=ON \
19 |     TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1+PTX" \
20 | 	TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
21 | 	python3 setup.py bdist_wheel
22 | 
23 | 


--------------------------------------------------------------------------------
/coco.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Configurations and data loading code for MS COCO.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | 
  9 | ------------------------------------------------------------
 10 | 
 11 | Usage: import the module (see Jupyter notebooks for examples), or run from
 12 |        the command line as such:
 13 | 
 14 |     # Train a new model starting from pre-trained COCO weights
 15 |     python3 coco.py train --dataset=/path/to/coco/ --model=coco
 16 | 
 17 |     # Train a new model starting from ImageNet weights
 18 |     python3 coco.py train --dataset=/path/to/coco/ --model=imagenet
 19 | 
 20 |     # Continue training a model that you had trained earlier
 21 |     python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
 22 | 
 23 |     # Continue training the last model you trained
 24 |     python3 coco.py train --dataset=/path/to/coco/ --model=last
 25 | 
 26 |     # Run COCO evaluatoin on the last model you trained
 27 |     python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
 28 | """
 29 | 
 30 | import os
 31 | import time
 32 | import numpy as np
 33 | 
 34 | # Download and install the Python COCO tools from https://github.com/waleedka/coco
 35 | # That's a fork from the original https://github.com/pdollar/coco with a bug
 36 | # fix for Python 3.
 37 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
 38 | # If the PR is merged then use the original repo.
 39 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3".
 40 | from pycocotools.coco import COCO
 41 | from pycocotools.cocoeval import COCOeval
 42 | from pycocotools import mask as maskUtils
 43 | 
 44 | import zipfile
 45 | import urllib.request
 46 | import shutil
 47 | 
 48 | from config import Config
 49 | import utils
 50 | import model as modellib
 51 | 
 52 | import torch
 53 | 
 54 | # Root directory of the project
 55 | ROOT_DIR = os.getcwd()
 56 | 
 57 | # Path to trained weights file
 58 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.pth")
 59 | 
 60 | # Directory to save logs and model checkpoints, if not provided
 61 | # through the command line argument --logs
 62 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
 63 | DEFAULT_DATASET_YEAR = "2014"
 64 | 
 65 | ############################################################
 66 | #  Configurations
 67 | ############################################################
 68 | 
 69 | class CocoConfig(Config):
 70 |     """Configuration for training on MS COCO.
 71 |     Derives from the base Config class and overrides values specific
 72 |     to the COCO dataset.
 73 |     """
 74 |     # Give the configuration a recognizable name
 75 |     NAME = "coco"
 76 | 
 77 |     # We use one GPU with 8GB memory, which can fit one image.
 78 |     # Adjust down if you use a smaller GPU.
 79 |     IMAGES_PER_GPU = 16
 80 | 
 81 |     # Uncomment to train on 8 GPUs (default is 1)
 82 |     GPU_COUNT = 0
 83 | 
 84 |     # Number of classes (including background)
 85 |     NUM_CLASSES = 1 + 80  # COCO has 80 classes
 86 | 
 87 | 
 88 | ############################################################
 89 | #  Dataset
 90 | ############################################################
 91 | 
 92 | class CocoDataset(utils.Dataset):
 93 |     def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
 94 |                   class_map=None, return_coco=False, auto_download=False):
 95 |         """Load a subset of the COCO dataset.
 96 |         dataset_dir: The root directory of the COCO dataset.
 97 |         subset: What to load (train, val, minival, valminusminival)
 98 |         year: What dataset year to load (2014, 2017) as a string, not an integer
 99 |         class_ids: If provided, only loads images that have the given classes.
100 |         class_map: TODO: Not implemented yet. Supports maping classes from
101 |             different datasets to the same class ID.
102 |         return_coco: If True, returns the COCO object.
103 |         auto_download: Automatically download and unzip MS-COCO images and annotations
104 |         """
105 | 
106 |         if auto_download is True:
107 |             self.auto_download(dataset_dir, subset, year)
108 | 
109 |         coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
110 |         if subset == "minival" or subset == "valminusminival":
111 |             subset = "val"
112 |         image_dir = "{}/{}{}".format(dataset_dir, subset, year)
113 | 
114 |         # Load all classes or a subset?
115 |         if not class_ids:
116 |             # All classes
117 |             class_ids = sorted(coco.getCatIds())
118 | 
119 |         # All images or a subset?
120 |         if class_ids:
121 |             image_ids = []
122 |             for id in class_ids:
123 |                 image_ids.extend(list(coco.getImgIds(catIds=[id])))
124 |             # Remove duplicates
125 |             image_ids = list(set(image_ids))
126 |         else:
127 |             # All images
128 |             image_ids = list(coco.imgs.keys())
129 | 
130 |         # Add classes
131 |         for i in class_ids:
132 |             self.add_class("coco", i, coco.loadCats(i)[0]["name"])
133 | 
134 |         # Add images
135 |         for i in image_ids:
136 |             self.add_image(
137 |                 "coco", image_id=i,
138 |                 path=os.path.join(image_dir, coco.imgs[i]['file_name']),
139 |                 width=coco.imgs[i]["width"],
140 |                 height=coco.imgs[i]["height"],
141 |                 annotations=coco.loadAnns(coco.getAnnIds(
142 |                     imgIds=[i], catIds=class_ids, iscrowd=None)))
143 |         if return_coco:
144 |             return coco
145 | 
146 |     def auto_download(self, dataDir, dataType, dataYear):
147 |         """Download the COCO dataset/annotations if requested.
148 |         dataDir: The root directory of the COCO dataset.
149 |         dataType: What to load (train, val, minival, valminusminival)
150 |         dataYear: What dataset year to load (2014, 2017) as a string, not an integer
151 |         Note:
152 |             For 2014, use "train", "val", "minival", or "valminusminival"
153 |             For 2017, only "train" and "val" annotations are available
154 |         """
155 | 
156 |         # Setup paths and file names
157 |         if dataType == "minival" or dataType == "valminusminival":
158 |             imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
159 |             imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
160 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
161 |         else:
162 |             imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
163 |             imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
164 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
165 |         # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
166 | 
167 |         # Create main folder if it doesn't exist yet
168 |         if not os.path.exists(dataDir):
169 |             os.makedirs(dataDir)
170 | 
171 |         # Download images if not available locally
172 |         if not os.path.exists(imgDir):
173 |             os.makedirs(imgDir)
174 |             print("Downloading images to " + imgZipFile + " ...")
175 |             with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
176 |                 shutil.copyfileobj(resp, out)
177 |             print("... done downloading.")
178 |             print("Unzipping " + imgZipFile)
179 |             with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
180 |                 zip_ref.extractall(dataDir)
181 |             print("... done unzipping")
182 |         print("Will use images in " + imgDir)
183 | 
184 |         # Setup annotations data paths
185 |         annDir = "{}/annotations".format(dataDir)
186 |         if dataType == "minival":
187 |             annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
188 |             annFile = "{}/instances_minival2014.json".format(annDir)
189 |             annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
190 |             unZipDir = annDir
191 |         elif dataType == "valminusminival":
192 |             annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
193 |             annFile = "{}/instances_valminusminival2014.json".format(annDir)
194 |             annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
195 |             unZipDir = annDir
196 |         else:
197 |             annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
198 |             annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
199 |             annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
200 |             unZipDir = dataDir
201 |         # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
202 | 
203 |         # Download annotations if not available locally
204 |         if not os.path.exists(annDir):
205 |             os.makedirs(annDir)
206 |         if not os.path.exists(annFile):
207 |             if not os.path.exists(annZipFile):
208 |                 print("Downloading zipped annotations to " + annZipFile + " ...")
209 |                 with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
210 |                     shutil.copyfileobj(resp, out)
211 |                 print("... done downloading.")
212 |             print("Unzipping " + annZipFile)
213 |             with zipfile.ZipFile(annZipFile, "r") as zip_ref:
214 |                 zip_ref.extractall(unZipDir)
215 |             print("... done unzipping")
216 |         print("Will use annotations in " + annFile)
217 | 
218 |     def load_mask(self, image_id):
219 |         """Load instance masks for the given image.
220 | 
221 |         Different datasets use different ways to store masks. This
222 |         function converts the different mask format to one format
223 |         in the form of a bitmap [height, width, instances].
224 | 
225 |         Returns:
226 |         masks: A bool array of shape [height, width, instance count] with
227 |             one mask per instance.
228 |         class_ids: a 1D array of class IDs of the instance masks.
229 |         """
230 |         # If not a COCO image, delegate to parent class.
231 |         image_info = self.image_info[image_id]
232 |         if image_info["source"] != "coco":
233 |             return super(CocoDataset, self).load_mask(image_id)
234 | 
235 |         instance_masks = []
236 |         class_ids = []
237 |         annotations = self.image_info[image_id]["annotations"]
238 |         # Build mask of shape [height, width, instance_count] and list
239 |         # of class IDs that correspond to each channel of the mask.
240 |         for annotation in annotations:
241 |             class_id = self.map_source_class_id(
242 |                 "coco.{}".format(annotation['category_id']))
243 |             if class_id:
244 |                 m = self.annToMask(annotation, image_info["height"],
245 |                                    image_info["width"])
246 |                 # Some objects are so small that they're less than 1 pixel area
247 |                 # and end up rounded out. Skip those objects.
248 |                 if m.max() < 1:
249 |                     continue
250 |                 # Is it a crowd? If so, use a negative class ID.
251 |                 if annotation['iscrowd']:
252 |                     # Use negative class ID for crowds
253 |                     class_id *= -1
254 |                     # For crowd masks, annToMask() sometimes returns a mask
255 |                     # smaller than the given dimensions. If so, resize it.
256 |                     if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
257 |                         m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
258 |                 instance_masks.append(m)
259 |                 class_ids.append(class_id)
260 | 
261 |         # Pack instance masks into an array
262 |         if class_ids:
263 |             mask = np.stack(instance_masks, axis=2)
264 |             class_ids = np.array(class_ids, dtype=np.int32)
265 |             return mask, class_ids
266 |         else:
267 |             # Call super class to return an empty mask
268 |             return super(CocoDataset, self).load_mask(image_id)
269 | 
270 |     def image_reference(self, image_id):
271 |         """Return a link to the image in the COCO Website."""
272 |         info = self.image_info[image_id]
273 |         if info["source"] == "coco":
274 |             return "http://cocodataset.org/#explore?id={}".format(info["id"])
275 |         else:
276 |             super(CocoDataset, self).image_reference(image_id)
277 | 
278 |     # The following two functions are from pycocotools with a few changes.
279 | 
280 |     def annToRLE(self, ann, height, width):
281 |         """
282 |         Convert annotation which can be polygons, uncompressed RLE to RLE.
283 |         :return: binary mask (numpy 2D array)
284 |         """
285 |         segm = ann['segmentation']
286 |         if isinstance(segm, list):
287 |             # polygon -- a single object might consist of multiple parts
288 |             # we merge all parts into one mask rle code
289 |             rles = maskUtils.frPyObjects(segm, height, width)
290 |             rle = maskUtils.merge(rles)
291 |         elif isinstance(segm['counts'], list):
292 |             # uncompressed RLE
293 |             rle = maskUtils.frPyObjects(segm, height, width)
294 |         else:
295 |             # rle
296 |             rle = ann['segmentation']
297 |         return rle
298 | 
299 |     def annToMask(self, ann, height, width):
300 |         """
301 |         Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
302 |         :return: binary mask (numpy 2D array)
303 |         """
304 |         rle = self.annToRLE(ann, height, width)
305 |         m = maskUtils.decode(rle)
306 |         return m
307 | 
308 | 
309 | ############################################################
310 | #  COCO Evaluation
311 | ############################################################
312 | 
313 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
314 |     """Arrange resutls to match COCO specs in http://cocodataset.org/#format
315 |     """
316 |     # If no results, return an empty list
317 |     if rois is None:
318 |         return []
319 | 
320 |     results = []
321 |     for image_id in image_ids:
322 |         # Loop through detections
323 |         for i in range(rois.shape[0]):
324 |             class_id = class_ids[i]
325 |             score = scores[i]
326 |             bbox = np.around(rois[i], 1)
327 |             mask = masks[:, :, i]
328 | 
329 |             result = {
330 |                 "image_id": image_id,
331 |                 "category_id": dataset.get_source_class_id(class_id, "coco"),
332 |                 "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
333 |                 "score": score,
334 |                 "segmentation": maskUtils.encode(np.asfortranarray(mask))
335 |             }
336 |             results.append(result)
337 |     return results
338 | 
339 | 
340 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
341 |     """Runs official COCO evaluation.
342 |     dataset: A Dataset object with valiadtion data
343 |     eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
344 |     limit: if not 0, it's the number of images to use for evaluation
345 |     """
346 |     # Pick COCO images from the dataset
347 |     image_ids = image_ids or dataset.image_ids
348 | 
349 |     # Limit to a subset
350 |     if limit:
351 |         image_ids = image_ids[:limit]
352 | 
353 |     # Get corresponding COCO image IDs.
354 |     coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
355 | 
356 |     t_prediction = 0
357 |     t_start = time.time()
358 | 
359 |     results = []
360 |     for i, image_id in enumerate(image_ids):
361 |         # Load image
362 |         image = dataset.load_image(image_id)
363 | 
364 |         # Run detection
365 |         t = time.time()
366 |         r = model.detect([image])[0]
367 |         t_prediction += (time.time() - t)
368 | 
369 |         # Convert results to COCO format
370 |         image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
371 |                                            r["rois"], r["class_ids"],
372 |                                            r["scores"], r["masks"])
373 |         results.extend(image_results)
374 | 
375 |     # Load results. This modifies results with additional attributes.
376 |     coco_results = coco.loadRes(results)
377 | 
378 |     # Evaluate
379 |     cocoEval = COCOeval(coco, coco_results, eval_type)
380 |     cocoEval.params.imgIds = coco_image_ids
381 |     cocoEval.evaluate()
382 |     cocoEval.accumulate()
383 |     cocoEval.summarize()
384 | 
385 |     print("Prediction time: {}. Average {}/image".format(
386 |         t_prediction, t_prediction / len(image_ids)))
387 |     print("Total time: ", time.time() - t_start)
388 | 
389 | 
390 | ############################################################
391 | #  Training
392 | ############################################################
393 | 
394 | 
395 | if __name__ == '__main__':
396 |     import argparse
397 | 
398 |     # Parse command line arguments
399 |     parser = argparse.ArgumentParser(
400 |         description='Train Mask R-CNN on MS COCO.')
401 |     parser.add_argument("command",
402 |                         metavar="<command>",
403 |                         help="'train' or 'evaluate' on MS COCO")
404 |     parser.add_argument('--dataset', required=True,
405 |                         metavar="/path/to/coco/",
406 |                         help='Directory of the MS-COCO dataset')
407 |     parser.add_argument('--year', required=False,
408 |                         default=DEFAULT_DATASET_YEAR,
409 |                         metavar="<year>",
410 |                         help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
411 |     parser.add_argument('--model', required=False,
412 |                         metavar="/path/to/weights.pth",
413 |                         help="Path to weights .pth file or 'coco'")
414 |     parser.add_argument('--logs', required=False,
415 |                         default=DEFAULT_LOGS_DIR,
416 |                         metavar="/path/to/logs/",
417 |                         help='Logs and checkpoints directory (default=logs/)')
418 |     parser.add_argument('--limit', required=False,
419 |                         default=500,
420 |                         metavar="<image count>",
421 |                         help='Images to use for evaluation (default=500)')
422 |     parser.add_argument('--download', required=False,
423 |                         default=False,
424 |                         metavar="<True|False>",
425 |                         help='Automatically download and unzip MS-COCO files (default=False)',
426 |                         type=bool)
427 |     args = parser.parse_args()
428 |     print("Command: ", args.command)
429 |     print("Model: ", args.model)
430 |     print("Dataset: ", args.dataset)
431 |     print("Year: ", args.year)
432 |     print("Logs: ", args.logs)
433 |     print("Auto Download: ", args.download)
434 | 
435 |     # Configurations
436 |     if args.command == "train":
437 |         config = CocoConfig()
438 |     else:
439 |         class InferenceConfig(CocoConfig):
440 |             # Set batch size to 1 since we'll be running inference on
441 |             # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
442 |             GPU_COUNT = 1
443 |             IMAGES_PER_GPU = 1
444 |             DETECTION_MIN_CONFIDENCE = 0
445 |         config = InferenceConfig()
446 |     config.display()
447 | 
448 |     # Create model
449 |     if args.command == "train":
450 |         model = modellib.MaskRCNN(config=config,
451 |                                   model_dir=args.logs)
452 |     else:
453 |         model = modellib.MaskRCNN(config=config,
454 |                                   model_dir=args.logs)
455 |     if config.GPU_COUNT:
456 |         model = model.cuda()
457 | 
458 |     # Select weights file to load
459 |     if args.model:
460 |         if args.model.lower() == "coco":
461 |             model_path = COCO_MODEL_PATH
462 |         elif args.model.lower() == "last":
463 |             # Find last trained weights
464 |             model_path = model.find_last()[1]
465 |         elif args.model.lower() == "imagenet":
466 |             # Start from ImageNet trained weights
467 |             model_path = config.IMAGENET_MODEL_PATH
468 |         else:
469 |             model_path = args.model
470 |     else:
471 |         model_path = ""
472 | 
473 |     # Load weights
474 |     print("Loading weights ", model_path)
475 |     model.load_weights(model_path)
476 | 
477 |     # Train or evaluate
478 |     if args.command == "train":
479 |         # Training dataset. Use the training set and 35K from the
480 |         # validation set, as as in the Mask RCNN paper.
481 |         dataset_train = CocoDataset()
482 |         dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
483 |         dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
484 |         dataset_train.prepare()
485 | 
486 |         # Validation dataset
487 |         dataset_val = CocoDataset()
488 |         dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download)
489 |         dataset_val.prepare()
490 | 
491 |         # *** This training schedule is an example. Update to your needs ***
492 | 
493 |         # Training - Stage 1
494 |         print("Training network heads")
495 |         model.train_model(dataset_train, dataset_val,
496 |                     learning_rate=config.LEARNING_RATE,
497 |                     epochs=40,
498 |                     layers='heads')
499 | 
500 |         # Training - Stage 2
501 |         # Finetune layers from ResNet stage 4 and up
502 |         print("Fine tune Resnet stage 4 and up")
503 |         model.train_model(dataset_train, dataset_val,
504 |                     learning_rate=config.LEARNING_RATE,
505 |                     epochs=120,
506 |                     layers='4+')
507 | 
508 |         # Training - Stage 3
509 |         # Fine tune all layers
510 |         print("Fine tune all layers")
511 |         model.train_model(dataset_train, dataset_val,
512 |                     learning_rate=config.LEARNING_RATE / 10,
513 |                     epochs=160,
514 |                     layers='all')
515 | 
516 |     elif args.command == "evaluate":
517 |         # Validation dataset
518 |         dataset_val = CocoDataset()
519 |         coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download)
520 |         dataset_val.prepare()
521 |         print("Running COCO evaluation on {} images.".format(args.limit))
522 |         evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
523 |         evaluate_coco(model, dataset_val, coco, "segm", limit=int(args.limit))
524 |     else:
525 |         print("'{}' is not recognized. "
526 |               "Use 'train' or 'evaluate'".format(args.command))
527 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Base Configurations class.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import math
 11 | import numpy as np
 12 | import os
 13 | 
 14 | 
 15 | # Base Configuration Class
 16 | # Don't use this class directly. Instead, sub-class it and override
 17 | # the configurations you need to change.
 18 | 
 19 | class Config(object):
 20 |     """Base configuration class. For custom configurations, create a
 21 |     sub-class that inherits from this one and override properties
 22 |     that need to be changed.
 23 |     """
 24 |     # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
 25 |     # Useful if your code needs to do things differently depending on which
 26 |     # experiment is running.
 27 |     NAME = None  # Override in sub-classes
 28 | 
 29 |     # Path to pretrained imagenet model
 30 |     IMAGENET_MODEL_PATH = os.path.join(os.getcwd(), "resnet50_imagenet.pth")
 31 | 
 32 |     # NUMBER OF GPUs to use. For CPU use 0
 33 |     GPU_COUNT = 1
 34 | 
 35 |     # Number of images to train with on each GPU. A 12GB GPU can typically
 36 |     # handle 2 images of 1024x1024px.
 37 |     # Adjust based on your GPU memory and image sizes. Use the highest
 38 |     # number that your GPU can handle for best performance.
 39 |     IMAGES_PER_GPU = 2
 40 | 
 41 |     # Number of training steps per epoch
 42 |     # This doesn't need to match the size of the training set. Tensorboard
 43 |     # updates are saved at the end of each epoch, so setting this to a
 44 |     # smaller number means getting more frequent TensorBoard updates.
 45 |     # Validation stats are also calculated at each epoch end and they
 46 |     # might take a while, so don't set this too small to avoid spending
 47 |     # a lot of time on validation stats.
 48 |     STEPS_PER_EPOCH = 1000
 49 | 
 50 |     # Number of validation steps to run at the end of every training epoch.
 51 |     # A bigger number improves accuracy of validation stats, but slows
 52 |     # down the training.
 53 |     VALIDATION_STEPS = 50
 54 | 
 55 |     # The strides of each layer of the FPN Pyramid. These values
 56 |     # are based on a Resnet101 backbone.
 57 |     BACKBONE_STRIDES = [4, 8, 16, 32, 64]
 58 | 
 59 |     # Number of classification classes (including background)
 60 |     NUM_CLASSES = 1  # Override in sub-classes
 61 | 
 62 |     # Length of square anchor side in pixels
 63 |     RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
 64 | 
 65 |     # Ratios of anchors at each cell (width/height)
 66 |     # A value of 1 represents a square anchor, and 0.5 is a wide anchor
 67 |     RPN_ANCHOR_RATIOS = [0.5, 1, 2]
 68 | 
 69 |     # Anchor stride
 70 |     # If 1 then anchors are created for each cell in the backbone feature map.
 71 |     # If 2, then anchors are created for every other cell, and so on.
 72 |     RPN_ANCHOR_STRIDE = 1
 73 | 
 74 |     # Non-max suppression threshold to filter RPN proposals.
 75 |     # You can reduce this during training to generate more propsals.
 76 |     RPN_NMS_THRESHOLD = 0.7
 77 | 
 78 |     # How many anchors per image to use for RPN training
 79 |     RPN_TRAIN_ANCHORS_PER_IMAGE = 256
 80 | 
 81 |     # ROIs kept after non-maximum supression (training and inference)
 82 |     POST_NMS_ROIS_TRAINING = 2000
 83 |     POST_NMS_ROIS_INFERENCE = 1000
 84 | 
 85 |     # If enabled, resizes instance masks to a smaller size to reduce
 86 |     # memory load. Recommended when using high-resolution images.
 87 |     USE_MINI_MASK = True
 88 |     MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
 89 | 
 90 |     # Input image resing
 91 |     # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and
 92 |     # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't
 93 |     # be satisfied together the IMAGE_MAX_DIM is enforced.
 94 |     IMAGE_MIN_DIM = 800
 95 |     IMAGE_MAX_DIM = 1024
 96 |     # If True, pad images with zeros such that they're (max_dim by max_dim)
 97 |     IMAGE_PADDING = True  # currently, the False option is not supported
 98 | 
 99 |     # Image mean (RGB)
100 |     MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
101 | 
102 |     # Number of ROIs per image to feed to classifier/mask heads
103 |     # The Mask RCNN paper uses 512 but often the RPN doesn't generate
104 |     # enough positive proposals to fill this and keep a positive:negative
105 |     # ratio of 1:3. You can increase the number of proposals by adjusting
106 |     # the RPN NMS threshold.
107 |     TRAIN_ROIS_PER_IMAGE = 200
108 | 
109 |     # Percent of positive ROIs used to train classifier/mask heads
110 |     ROI_POSITIVE_RATIO = 0.33
111 | 
112 |     # Pooled ROIs
113 |     POOL_SIZE = 7
114 |     MASK_POOL_SIZE = 14
115 |     MASK_SHAPE = [28, 28]
116 | 
117 |     # Maximum number of ground truth instances to use in one image
118 |     MAX_GT_INSTANCES = 100
119 | 
120 |     # Bounding box refinement standard deviation for RPN and final detections.
121 |     RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
122 |     BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
123 | 
124 |     # Max number of final detections
125 |     DETECTION_MAX_INSTANCES = 100
126 | 
127 |     # Minimum probability value to accept a detected instance
128 |     # ROIs below this threshold are skipped
129 |     DETECTION_MIN_CONFIDENCE = 0.7
130 | 
131 |     # Non-maximum suppression threshold for detection
132 |     DETECTION_NMS_THRESHOLD = 0.3
133 | 
134 |     # Learning rate and momentum
135 |     # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
136 |     # weights to explode. Likely due to differences in optimzer
137 |     # implementation.
138 |     LEARNING_RATE = 0.001
139 |     LEARNING_MOMENTUM = 0.9
140 | 
141 |     # Weight decay regularization
142 |     WEIGHT_DECAY = 0.0001
143 | 
144 |     # Use RPN ROIs or externally generated ROIs for training
145 |     # Keep this True for most situations. Set to False if you want to train
146 |     # the head branches on ROI generated by code rather than the ROIs from
147 |     # the RPN. For example, to debug the classifier head without having to
148 |     # train the RPN.
149 |     USE_RPN_ROIS = True
150 |     
151 |     # Number of workers to use in Data loading (the data
152 |     # generators) within model design, e.g., as in:
153 |     # torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=self.config.NUM_WORKERS)
154 |     NUM_WORKERS = 4
155 | 
156 |     def __init__(self):
157 |         """Set values of computed attributes."""
158 |         # Effective batch size
159 |         if self.GPU_COUNT > 0:
160 |             self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
161 |         else:
162 |             self.BATCH_SIZE = self.IMAGES_PER_GPU
163 | 
164 |         # Adjust step size based on batch size
165 |         self.STEPS_PER_EPOCH = self.BATCH_SIZE * self.STEPS_PER_EPOCH
166 | 
167 |         # Input image size
168 |         self.IMAGE_SHAPE = np.array(
169 |             [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
170 | 
171 |         # Compute backbone size from input image size
172 |         self.BACKBONE_SHAPES = np.array(
173 |             [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)),
174 |               int(math.ceil(self.IMAGE_SHAPE[1] / stride))]
175 |              for stride in self.BACKBONE_STRIDES])
176 | 
177 |     def display(self):
178 |         """Display Configuration values."""
179 |         print("\nConfigurations:")
180 |         for a in dir(self):
181 |             if not a.startswith("__") and not callable(getattr(self, a)):
182 |                 print("{:30} {}".format(a, getattr(self, a)))
183 |         print("\n")
184 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import random
 4 | import math
 5 | import numpy as np
 6 | import skimage.io
 7 | import matplotlib
 8 | import matplotlib.pyplot as plt
 9 | import glob
10 | from io import BytesIO
11 | import requests
12 | from PIL import Image
13 | 
14 | # import coco
15 | from pycocotools import coco
16 | import utils
17 | import model as modellib
18 | import visualize
19 | 
20 | import torch
21 | import pycocotools
22 | 
23 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
24 | print(device)
25 | 
26 | # Root directory of the project
27 | ROOT_DIR = os.getcwd()
28 | 
29 | # Directory to save logs and trained model
30 | MODEL_DIR = os.path.join(ROOT_DIR, "logs")
31 | 
32 | # Path to trained weights file
33 | # Download this file and place in the root of your
34 | # project (See README file for details)
35 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "models", "mask_rcnn_coco.pth")
36 | 
37 | # Directory of images to run detection on
38 | IMAGE_DIR = os.path.join(ROOT_DIR, "images")
39 | 
40 | class InferenceConfig(coco.CocoConfig):
41 |     # Set batch size to 1 since we'll be running inference on
42 |     # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
43 |     # GPU_COUNT = 0 for CPU
44 |     GPU_COUNT = 0
45 |     IMAGES_PER_GPU = 1
46 |     COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.pth")
47 | 
48 | 
49 | config = InferenceConfig()
50 | config.display()
51 | 
52 | # Create model object.
53 | model = modellib.MaskRCNN(model_dir=MODEL_DIR, config=config)
54 | model = model.to(device=device)
55 | 
56 | # Load weights trained on MS-COCO
57 | print(COCO_MODEL_PATH)
58 | model.load_state_dict(torch.load(COCO_MODEL_PATH))
59 | 
60 | # COCO Class names
61 | # Index of the class in the list is its ID. For example, to get ID of
62 | # the teddy bear class, use: class_names.index('teddy bear')
63 | class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
64 |                'bus', 'train', 'truck', 'boat', 'traffic light',
65 |                'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
66 |                'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
67 |                'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
68 |                'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
69 |                'kite', 'baseball bat', 'baseball glove', 'skateboard',
70 |                'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
71 |                'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
72 |                'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
73 |                'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
74 |                'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
75 |                'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
76 |                'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
77 |                'teddy bear', 'hair drier', 'toothbrush']
78 | 
79 | # Load a random image from the images folder
80 | # file_names = glob.glob(os.path.join('images', '*.jpg'))
81 | # image = skimage.io.imread(os.path.join(random.choice(file_names)))
82 | 
83 | # Or load file from the internet
84 | req = requests.get('https://cdn.pixabay.com/photo/2015/06/20/13/55/man-815795__340.jpg')
85 | image = np.asarray(Image.open(BytesIO(req.content)))


--------------------------------------------------------------------------------
/fish_pytorch_style.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Configurations and data loading code for MS COCO.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | 
  9 | ------------------------------------------------------------
 10 | 
 11 | Usage: import the module (see Jupyter notebooks for examples), or run from
 12 |        the command line as such:
 13 | 
 14 |     # Train a new model starting from pre-trained COCO weights
 15 |     python3 coco.py train --dataset=/path/to/coco/ --model=coco
 16 | 
 17 |     # Train a new model starting from ImageNet weights
 18 |     python3 coco.py train --dataset=/path/to/coco/ --model=imagenet
 19 | 
 20 |     # Continue training a model that you had trained earlier
 21 |     python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
 22 | 
 23 |     # Continue training the last model you trained
 24 |     python3 coco.py train --dataset=/path/to/coco/ --model=last
 25 | 
 26 |     # Run COCO evaluatoin on the last model you trained
 27 |     python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
 28 | """
 29 | 
 30 | import os
 31 | import time
 32 | import numpy as np
 33 | 
 34 | # Download and install the Python COCO tools from https://github.com/waleedka/coco
 35 | # That's a fork from the original https://github.com/pdollar/coco with a bug
 36 | # fix for Python 3.
 37 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
 38 | # If the PR is merged then use the original repo.
 39 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3".
 40 | from pycocotools.coco import COCO
 41 | from pycocotools.cocoeval import COCOeval
 42 | from pycocotools import mask as maskUtils
 43 | 
 44 | import zipfile
 45 | import urllib.request
 46 | import shutil
 47 | 
 48 | from config import Config
 49 | import utils
 50 | import model as modellib
 51 | 
 52 | import torch
 53 | 
 54 | # Root directory of the project
 55 | ROOT_DIR = os.getcwd()
 56 | 
 57 | # Path to trained weights file
 58 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.pth")
 59 | 
 60 | # Directory to save logs and model checkpoints, if not provided
 61 | # through the command line argument --logs
 62 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
 63 | DEFAULT_DATASET_YEAR = "2014"
 64 | 
 65 | ############################################################
 66 | #  Configurations
 67 | ############################################################
 68 | 
 69 | 
 70 | class FishConfig(Config):
 71 |     """Configuration for training on the toy  dataset.
 72 |     Derives from the base Config class and overrides some values.
 73 |     """
 74 |     # Give the configuration a recognizable name
 75 |     NAME = "fish"
 76 | 
 77 |     # We use a GPU with 12GB memory, which can fit two images.
 78 |     # Adjust down if you use a smaller GPU.
 79 |     IMAGES_PER_GPU = 2
 80 | 
 81 |     # Number of classes (including background)
 82 |     NUM_CLASSES = 1 + 1  # Background + baloon
 83 | 
 84 |     # Number of training steps per epoch
 85 |     STEPS_PER_EPOCH = 100
 86 | 
 87 |     # Skip detections with < 90% confidence
 88 |     DETECTION_MIN_CONFIDENCE = 0.9
 89 | 
 90 | 
 91 | ############################################################
 92 | #  Dataset
 93 | ############################################################
 94 | 
 95 | class FishDataset(utils.Dataset):
 96 | 
 97 |     def load_fish(self, dataset_dir, subset):
 98 |         """Load a subset of the fish dataset.
 99 |         dataset_dir: Root directory of the dataset.
100 |         subset: Subset to load: train or val
101 |         """
102 |         # Add classes. We have only one class to add.
103 |         self.add_class("fish", 1, "fish")
104 | 
105 |         # Train or validation dataset?
106 |         assert subset in ["train", "val"]
107 |         dataset_dir = os.path.join(dataset_dir, subset)
108 | 
109 |         # Load annotations
110 |         # VGG Image Annotator saves each image in the form:
111 |         # { 'filename': '28503151_5b5b7ec140_b.jpg',
112 |         #   'regions': {
113 |         #       '0': {
114 |         #           'region_attributes': {},
115 |         #           'shape_attributes': {
116 |         #               'all_points_x': [...],
117 |         #               'all_points_y': [...],
118 |         #               'name': 'polygon'}},
119 |         #       ... more regions ...
120 |         #   },
121 |         #   'size': 100202
122 |         # }
123 |         # We mostly care about the x and y coordinates of each region
124 |         annotations = json.load(open(os.path.join(dataset_dir, "via_region_data.json")))
125 |         annotations = list(annotations.values())  # don't need the dict keys
126 | 
127 |         # The VIA tool saves images in the JSON even if they don't have any
128 |         # annotations. Skip unannotated images.
129 |         annotations = [a for a in annotations if a['regions']]
130 | 
131 |         # Add images
132 |         for a in annotations:
133 |             # Get the x, y coordinaets of points of the polygons that make up
134 |             # the outline of each object instance. There are stores in the
135 |             # shape_attributes (see json format above)
136 |             polygons = [r['shape_attributes'] for r in a['regions'].values()]
137 | 
138 |             # load_mask() needs the image size to convert polygons to masks.
139 |             # Unfortunately, VIA doesn't include it in JSON, so we must read
140 |             # the image. This is only managable since the dataset is tiny.
141 |             image_path = os.path.join(dataset_dir, a['filename'])
142 |             image = skimage.io.imread(image_path)
143 |             height, width = image.shape[:2]
144 | 
145 |             self.add_image(
146 |                 "fish",
147 |                 image_id=a['filename'],  # use file name as a unique image id
148 |                 path=image_path,
149 |                 width=width, height=height,
150 |                 polygons=polygons)
151 | 
152 |     def load_mask(self, image_id):
153 |         """Generate instance masks for an image.
154 |        Returns:
155 |         masks: A bool array of shape [height, width, instance count] with
156 |             one mask per instance.
157 |         class_ids: a 1D array of class IDs of the instance masks.
158 |         """
159 |         # If not a fish dataset image, delegate to parent class.
160 |         image_info = self.image_info[image_id]
161 |         if image_info["source"] != "fish":
162 |             print ("Not a Baloon")
163 |             return super(self.__class__, self).load_mask(image_id)
164 | 
165 |         # Convert polygons to a bitmap mask of shape
166 |         # [height, width, instance_count]
167 |         info = self.image_info[image_id]
168 |         mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
169 |                         dtype=np.uint8)
170 |         for i, p in enumerate(info["polygons"]):
171 |             # Get indexes of pixels inside the polygon and set them to 1
172 |             rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
173 |             mask[rr, cc, i] = 1
174 | 
175 |         # Return mask, and array of class IDs of each instance. Since we have
176 |         # one class ID only, we return an array of 1s
177 |         return mask, np.ones([mask.shape[-1]], dtype=np.int32)
178 | 
179 |     def image_reference(self, image_id):
180 |         """Return the path of the image."""
181 |         info = self.image_info[image_id]
182 |         if info["source"] == "fish":
183 |             return info["path"]
184 |         else:
185 |             super(self.__class__, self).image_reference(image_id)
186 | 
187 | 
188 | 
189 | 
190 | ############################################################
191 | #  Configurations
192 | ############################################################
193 | 
194 | class CocoConfig(Config):
195 |     """Configuration for training on MS COCO.
196 |     Derives from the base Config class and overrides values specific
197 |     to the COCO dataset.
198 |     """
199 |     # Give the configuration a recognizable name
200 |     NAME = "coco"
201 | 
202 |     # We use one GPU with 8GB memory, which can fit one image.
203 |     # Adjust down if you use a smaller GPU.
204 |     IMAGES_PER_GPU = 16
205 | 
206 |     # Uncomment to train on 8 GPUs (default is 1)
207 |     # GPU_COUNT = 8
208 | 
209 |     # Number of classes (including background)
210 |     NUM_CLASSES = 1 + 80  # COCO has 80 classes
211 | 
212 | 
213 | ############################################################
214 | #  Dataset
215 | ############################################################
216 | 
217 | class CocoDataset(utils.Dataset):
218 |     def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
219 |                   class_map=None, return_coco=False, auto_download=False):
220 |         """Load a subset of the COCO dataset.
221 |         dataset_dir: The root directory of the COCO dataset.
222 |         subset: What to load (train, val, minival, valminusminival)
223 |         year: What dataset year to load (2014, 2017) as a string, not an integer
224 |         class_ids: If provided, only loads images that have the given classes.
225 |         class_map: TODO: Not implemented yet. Supports maping classes from
226 |             different datasets to the same class ID.
227 |         return_coco: If True, returns the COCO object.
228 |         auto_download: Automatically download and unzip MS-COCO images and annotations
229 |         """
230 | 
231 |         if auto_download is True:
232 |             self.auto_download(dataset_dir, subset, year)
233 | 
234 |         coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
235 |         if subset == "minival" or subset == "valminusminival":
236 |             subset = "val"
237 |         image_dir = "{}/{}{}".format(dataset_dir, subset, year)
238 | 
239 |         # Load all classes or a subset?
240 |         if not class_ids:
241 |             # All classes
242 |             class_ids = sorted(coco.getCatIds())
243 | 
244 |         # All images or a subset?
245 |         if class_ids:
246 |             image_ids = []
247 |             for id in class_ids:
248 |                 image_ids.extend(list(coco.getImgIds(catIds=[id])))
249 |             # Remove duplicates
250 |             image_ids = list(set(image_ids))
251 |         else:
252 |             # All images
253 |             image_ids = list(coco.imgs.keys())
254 | 
255 |         # Add classes
256 |         for i in class_ids:
257 |             self.add_class("coco", i, coco.loadCats(i)[0]["name"])
258 | 
259 |         # Add images
260 |         for i in image_ids:
261 |             self.add_image(
262 |                 "coco", image_id=i,
263 |                 path=os.path.join(image_dir, coco.imgs[i]['file_name']),
264 |                 width=coco.imgs[i]["width"],
265 |                 height=coco.imgs[i]["height"],
266 |                 annotations=coco.loadAnns(coco.getAnnIds(
267 |                     imgIds=[i], catIds=class_ids, iscrowd=None)))
268 |         if return_coco:
269 |             return coco
270 | 
271 |     def auto_download(self, dataDir, dataType, dataYear):
272 |         """Download the COCO dataset/annotations if requested.
273 |         dataDir: The root directory of the COCO dataset.
274 |         dataType: What to load (train, val, minival, valminusminival)
275 |         dataYear: What dataset year to load (2014, 2017) as a string, not an integer
276 |         Note:
277 |             For 2014, use "train", "val", "minival", or "valminusminival"
278 |             For 2017, only "train" and "val" annotations are available
279 |         """
280 | 
281 |         # Setup paths and file names
282 |         if dataType == "minival" or dataType == "valminusminival":
283 |             imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
284 |             imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
285 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
286 |         else:
287 |             imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
288 |             imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
289 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
290 |         # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
291 | 
292 |         # Create main folder if it doesn't exist yet
293 |         if not os.path.exists(dataDir):
294 |             os.makedirs(dataDir)
295 | 
296 |         # Download images if not available locally
297 |         if not os.path.exists(imgDir):
298 |             os.makedirs(imgDir)
299 |             print("Downloading images to " + imgZipFile + " ...")
300 |             with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
301 |                 shutil.copyfileobj(resp, out)
302 |             print("... done downloading.")
303 |             print("Unzipping " + imgZipFile)
304 |             with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
305 |                 zip_ref.extractall(dataDir)
306 |             print("... done unzipping")
307 |         print("Will use images in " + imgDir)
308 | 
309 |         # Setup annotations data paths
310 |         annDir = "{}/annotations".format(dataDir)
311 |         if dataType == "minival":
312 |             annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
313 |             annFile = "{}/instances_minival2014.json".format(annDir)
314 |             annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
315 |             unZipDir = annDir
316 |         elif dataType == "valminusminival":
317 |             annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
318 |             annFile = "{}/instances_valminusminival2014.json".format(annDir)
319 |             annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
320 |             unZipDir = annDir
321 |         else:
322 |             annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
323 |             annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
324 |             annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
325 |             unZipDir = dataDir
326 |         # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
327 | 
328 |         # Download annotations if not available locally
329 |         if not os.path.exists(annDir):
330 |             os.makedirs(annDir)
331 |         if not os.path.exists(annFile):
332 |             if not os.path.exists(annZipFile):
333 |                 print("Downloading zipped annotations to " + annZipFile + " ...")
334 |                 with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
335 |                     shutil.copyfileobj(resp, out)
336 |                 print("... done downloading.")
337 |             print("Unzipping " + annZipFile)
338 |             with zipfile.ZipFile(annZipFile, "r") as zip_ref:
339 |                 zip_ref.extractall(unZipDir)
340 |             print("... done unzipping")
341 |         print("Will use annotations in " + annFile)
342 | 
343 |     def load_mask(self, image_id):
344 |         """Load instance masks for the given image.
345 | 
346 |         Different datasets use different ways to store masks. This
347 |         function converts the different mask format to one format
348 |         in the form of a bitmap [height, width, instances].
349 | 
350 |         Returns:
351 |         masks: A bool array of shape [height, width, instance count] with
352 |             one mask per instance.
353 |         class_ids: a 1D array of class IDs of the instance masks.
354 |         """
355 |         # If not a COCO image, delegate to parent class.
356 |         image_info = self.image_info[image_id]
357 |         if image_info["source"] != "coco":
358 |             return super(CocoDataset, self).load_mask(image_id)
359 | 
360 |         instance_masks = []
361 |         class_ids = []
362 |         annotations = self.image_info[image_id]["annotations"]
363 |         # Build mask of shape [height, width, instance_count] and list
364 |         # of class IDs that correspond to each channel of the mask.
365 |         for annotation in annotations:
366 |             class_id = self.map_source_class_id(
367 |                 "coco.{}".format(annotation['category_id']))
368 |             if class_id:
369 |                 m = self.annToMask(annotation, image_info["height"],
370 |                                    image_info["width"])
371 |                 # Some objects are so small that they're less than 1 pixel area
372 |                 # and end up rounded out. Skip those objects.
373 |                 if m.max() < 1:
374 |                     continue
375 |                 # Is it a crowd? If so, use a negative class ID.
376 |                 if annotation['iscrowd']:
377 |                     # Use negative class ID for crowds
378 |                     class_id *= -1
379 |                     # For crowd masks, annToMask() sometimes returns a mask
380 |                     # smaller than the given dimensions. If so, resize it.
381 |                     if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
382 |                         m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
383 |                 instance_masks.append(m)
384 |                 class_ids.append(class_id)
385 | 
386 |         # Pack instance masks into an array
387 |         if class_ids:
388 |             mask = np.stack(instance_masks, axis=2)
389 |             class_ids = np.array(class_ids, dtype=np.int32)
390 |             return mask, class_ids
391 |         else:
392 |             # Call super class to return an empty mask
393 |             return super(CocoDataset, self).load_mask(image_id)
394 | 
395 |     def image_reference(self, image_id):
396 |         """Return a link to the image in the COCO Website."""
397 |         info = self.image_info[image_id]
398 |         if info["source"] == "coco":
399 |             return "http://cocodataset.org/#explore?id={}".format(info["id"])
400 |         else:
401 |             super(CocoDataset, self).image_reference(image_id)
402 | 
403 |     # The following two functions are from pycocotools with a few changes.
404 | 
405 |     def annToRLE(self, ann, height, width):
406 |         """
407 |         Convert annotation which can be polygons, uncompressed RLE to RLE.
408 |         :return: binary mask (numpy 2D array)
409 |         """
410 |         segm = ann['segmentation']
411 |         if isinstance(segm, list):
412 |             # polygon -- a single object might consist of multiple parts
413 |             # we merge all parts into one mask rle code
414 |             rles = maskUtils.frPyObjects(segm, height, width)
415 |             rle = maskUtils.merge(rles)
416 |         elif isinstance(segm['counts'], list):
417 |             # uncompressed RLE
418 |             rle = maskUtils.frPyObjects(segm, height, width)
419 |         else:
420 |             # rle
421 |             rle = ann['segmentation']
422 |         return rle
423 | 
424 |     def annToMask(self, ann, height, width):
425 |         """
426 |         Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
427 |         :return: binary mask (numpy 2D array)
428 |         """
429 |         rle = self.annToRLE(ann, height, width)
430 |         m = maskUtils.decode(rle)
431 |         return m
432 | 
433 | 
434 | ############################################################
435 | #  COCO Evaluation
436 | ############################################################
437 | 
438 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
439 |     """Arrange resutls to match COCO specs in http://cocodataset.org/#format
440 |     """
441 |     # If no results, return an empty list
442 |     if rois is None:
443 |         return []
444 | 
445 |     results = []
446 |     for image_id in image_ids:
447 |         # Loop through detections
448 |         for i in range(rois.shape[0]):
449 |             class_id = class_ids[i]
450 |             score = scores[i]
451 |             bbox = np.around(rois[i], 1)
452 |             mask = masks[:, :, i]
453 | 
454 |             result = {
455 |                 "image_id": image_id,
456 |                 "category_id": dataset.get_source_class_id(class_id, "coco"),
457 |                 "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
458 |                 "score": score,
459 |                 "segmentation": maskUtils.encode(np.asfortranarray(mask))
460 |             }
461 |             results.append(result)
462 |     return results
463 | 
464 | 
465 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
466 |     """Runs official COCO evaluation.
467 |     dataset: A Dataset object with valiadtion data
468 |     eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
469 |     limit: if not 0, it's the number of images to use for evaluation
470 |     """
471 |     # Pick COCO images from the dataset
472 |     image_ids = image_ids or dataset.image_ids
473 | 
474 |     # Limit to a subset
475 |     if limit:
476 |         image_ids = image_ids[:limit]
477 | 
478 |     # Get corresponding COCO image IDs.
479 |     coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
480 | 
481 |     t_prediction = 0
482 |     t_start = time.time()
483 | 
484 |     results = []
485 |     for i, image_id in enumerate(image_ids):
486 |         # Load image
487 |         image = dataset.load_image(image_id)
488 | 
489 |         # Run detection
490 |         t = time.time()
491 |         r = model.detect([image])[0]
492 |         t_prediction += (time.time() - t)
493 | 
494 |         # Convert results to COCO format
495 |         image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
496 |                                            r["rois"], r["class_ids"],
497 |                                            r["scores"], r["masks"])
498 |         results.extend(image_results)
499 | 
500 |     # Load results. This modifies results with additional attributes.
501 |     coco_results = coco.loadRes(results)
502 | 
503 |     # Evaluate
504 |     cocoEval = COCOeval(coco, coco_results, eval_type)
505 |     cocoEval.params.imgIds = coco_image_ids
506 |     cocoEval.evaluate()
507 |     cocoEval.accumulate()
508 |     cocoEval.summarize()
509 | 
510 |     print("Prediction time: {}. Average {}/image".format(
511 |         t_prediction, t_prediction / len(image_ids)))
512 |     print("Total time: ", time.time() - t_start)
513 | 
514 | 
515 | ############################################################
516 | #  Training
517 | ############################################################
518 | 
519 | 
520 | if __name__ == '__main__':
521 |     import argparse
522 | 
523 |     # Parse command line arguments
524 |     parser = argparse.ArgumentParser(
525 |         description='Train Mask R-CNN on MS COCO.')
526 |     parser.add_argument("command",
527 |                         metavar="<command>",
528 |                         help="'train' or 'evaluate' on MS COCO")
529 |     parser.add_argument('--dataset', required=True,
530 |                         metavar="/path/to/coco/",
531 |                         help='Directory of the MS-COCO dataset')
532 |     parser.add_argument('--year', required=False,
533 |                         default=DEFAULT_DATASET_YEAR,
534 |                         metavar="<year>",
535 |                         help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
536 |     parser.add_argument('--model', required=False,
537 |                         metavar="/path/to/weights.pth",
538 |                         help="Path to weights .pth file or 'coco'")
539 |     parser.add_argument('--logs', required=False,
540 |                         default=DEFAULT_LOGS_DIR,
541 |                         metavar="/path/to/logs/",
542 |                         help='Logs and checkpoints directory (default=logs/)')
543 |     parser.add_argument('--limit', required=False,
544 |                         default=500,
545 |                         metavar="<image count>",
546 |                         help='Images to use for evaluation (default=500)')
547 |     parser.add_argument('--download', required=False,
548 |                         default=False,
549 |                         metavar="<True|False>",
550 |                         help='Automatically download and unzip MS-COCO files (default=False)',
551 |                         type=bool)
552 |     args = parser.parse_args()
553 |     print("Command: ", args.command)
554 |     print("Model: ", args.model)
555 |     print("Dataset: ", args.dataset)
556 |     print("Year: ", args.year)
557 |     print("Logs: ", args.logs)
558 |     print("Auto Download: ", args.download)
559 | 
560 |     # Configurations
561 |     if args.command == "train":
562 |         config = CocoConfig()
563 |     else:
564 |         class InferenceConfig(CocoConfig):
565 |             # Set batch size to 1 since we'll be running inference on
566 |             # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
567 |             GPU_COUNT = 1
568 |             IMAGES_PER_GPU = 1
569 |             DETECTION_MIN_CONFIDENCE = 0
570 |         config = InferenceConfig()
571 |     config.display()
572 | 
573 |     # Create model
574 |     if args.command == "train":
575 |         model = modellib.MaskRCNN(config=config,
576 |                                   model_dir=args.logs)
577 |     else:
578 |         model = modellib.MaskRCNN(config=config,
579 |                                   model_dir=args.logs)
580 |     if config.GPU_COUNT:
581 |         model = model.cuda()
582 | 
583 |     # Select weights file to load
584 |     if args.model:
585 |         if args.model.lower() == "coco":
586 |             model_path = COCO_MODEL_PATH
587 |         elif args.model.lower() == "last":
588 |             # Find last trained weights
589 |             model_path = model.find_last()[1]
590 |         elif args.model.lower() == "imagenet":
591 |             # Start from ImageNet trained weights
592 |             model_path = config.IMAGENET_MODEL_PATH
593 |         else:
594 |             model_path = args.model
595 |     else:
596 |         model_path = ""
597 | 
598 |     # Load weights
599 |     print("Loading weights ", model_path)
600 |     model.load_weights(model_path)
601 | 
602 |     # Train or evaluate
603 |     if args.command == "train":
604 |         # Training dataset. Use the training set and 35K from the
605 |         # validation set, as as in the Mask RCNN paper.
606 |         dataset_train = CocoDataset()
607 |         dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
608 |         dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
609 |         dataset_train.prepare()
610 | 
611 |         # Validation dataset
612 |         dataset_val = CocoDataset()
613 |         dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download)
614 |         dataset_val.prepare()
615 | 
616 |         # *** This training schedule is an example. Update to your needs ***
617 | 
618 |         # Training - Stage 1
619 |         print("Training network heads")
620 |         model.train_model(dataset_train, dataset_val,
621 |                     learning_rate=config.LEARNING_RATE,
622 |                     epochs=40,
623 |                     layers='heads')
624 | 
625 |         # Training - Stage 2
626 |         # Finetune layers from ResNet stage 4 and up
627 |         print("Fine tune Resnet stage 4 and up")
628 |         model.train_model(dataset_train, dataset_val,
629 |                     learning_rate=config.LEARNING_RATE,
630 |                     epochs=120,
631 |                     layers='4+')
632 | 
633 |         # Training - Stage 3
634 |         # Fine tune all layers
635 |         print("Fine tune all layers")
636 |         model.train_model(dataset_train, dataset_val,
637 |                     learning_rate=config.LEARNING_RATE / 10,
638 |                     epochs=160,
639 |                     layers='all')
640 | 
641 |     elif args.command == "evaluate":
642 |         # Validation dataset
643 |         dataset_val = CocoDataset()
644 |         coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download)
645 |         dataset_val.prepare()
646 |         print("Running COCO evaluation on {} images.".format(args.limit))
647 |         evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
648 |         evaluate_coco(model, dataset_val, coco, "segm", limit=int(args.limit))
649 |     else:
650 |         print("'{}' is not recognized. "
651 |               "Use 'train' or 'evaluate'".format(args.command))
652 | 


--------------------------------------------------------------------------------
/images/find_fish.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michhar/pytorch-mask-rcnn-samples/f03054ab01f162c23bcccfc5de32a633041c5237/images/find_fish.png


--------------------------------------------------------------------------------
/images/rec_fish.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michhar/pytorch-mask-rcnn-samples/f03054ab01f162c23bcccfc5de32a633041c5237/images/rec_fish.png


--------------------------------------------------------------------------------
/images/vgg_annotated_fish.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michhar/pytorch-mask-rcnn-samples/f03054ab01f162c23bcccfc5de32a633041c5237/images/vgg_annotated_fish.jpg


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michhar/pytorch-mask-rcnn-samples/f03054ab01f162c23bcccfc5de32a633041c5237/lib/nms/__init__.py


--------------------------------------------------------------------------------
/lib/nms/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/nms.c']
 7 | headers = ['src/nms.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/nms_cuda.c']
14 |     headers += ['src/nms_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/nms_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.nms',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 


--------------------------------------------------------------------------------
/lib/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from lib.nms.pth_nms import pth_nms
12 | 
13 | 
14 | def nms(dets, thresh):
15 |   """Dispatch to either CPU or GPU NMS implementations.
16 |   Accept dets as tensor"""
17 |   return pth_nms(dets, thresh)
18 | 


--------------------------------------------------------------------------------
/lib/nms/pth_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from ._ext import nms
 3 | import numpy as np
 4 | 
 5 | def pth_nms(dets, thresh):
 6 |   """
 7 |   dets has to be a tensor
 8 |   """
 9 |   if not dets.is_cuda:
10 |     x1 = dets[:, 0]
11 |     y1 = dets[:, 1]
12 |     x2 = dets[:, 2]
13 |     y2 = dets[:, 3]
14 |     scores = dets[:, 4]
15 | 
16 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
17 |     order = scores.sort(0, descending=True)[1]
18 |     # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long()
19 | 
20 |     keep = torch.LongTensor(dets.size(0))
21 |     num_out = torch.LongTensor(1)
22 |     nms.cpu_nms(keep, num_out, dets, order, areas, thresh)
23 | 
24 |     return keep[:num_out[0]]
25 |   else:
26 |     x1 = dets[:, 0]
27 |     y1 = dets[:, 1]
28 |     x2 = dets[:, 2]
29 |     y2 = dets[:, 3]
30 |     scores = dets[:, 4]
31 | 
32 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
33 |     order = scores.sort(0, descending=True)[1]
34 |     # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda()
35 | 
36 |     dets = dets[order].contiguous()
37 | 
38 |     keep = torch.LongTensor(dets.size(0))
39 |     num_out = torch.LongTensor(1)
40 |     # keep = torch.cuda.LongTensor(dets.size(0))
41 |     # num_out = torch.cuda.LongTensor(1)
42 |     nms.gpu_nms(keep, num_out, dets, thresh)
43 | 
44 |     return order[keep[:num_out[0]].cuda()].contiguous()
45 |     # return order[keep[:num_out[0]]].contiguous()
46 | 
47 | 


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | #include <math.h>
12 | #include <stdio.h>
13 | #include <float.h>
14 | #include "nms_kernel.h"
15 | 
16 | __device__ inline float devIoU(float const * const a, float const * const b) {
17 |   float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
18 |   float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
19 |   float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f);
20 |   float interS = width * height;
21 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
22 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
23 |   return interS / (Sa + Sb - interS);
24 | }
25 | 
26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
27 |                            const float *dev_boxes, unsigned long long *dev_mask) {
28 |   const int row_start = blockIdx.y;
29 |   const int col_start = blockIdx.x;
30 | 
31 |   // if (row_start > col_start) return;
32 | 
33 |   const int row_size =
34 |         fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
35 |   const int col_size =
36 |         fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
37 | 
38 |   __shared__ float block_boxes[threadsPerBlock * 5];
39 |   if (threadIdx.x < col_size) {
40 |     block_boxes[threadIdx.x * 5 + 0] =
41 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
42 |     block_boxes[threadIdx.x * 5 + 1] =
43 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
44 |     block_boxes[threadIdx.x * 5 + 2] =
45 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
46 |     block_boxes[threadIdx.x * 5 + 3] =
47 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
48 |     block_boxes[threadIdx.x * 5 + 4] =
49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
50 |   }
51 |   __syncthreads();
52 | 
53 |   if (threadIdx.x < row_size) {
54 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
55 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
56 |     int i = 0;
57 |     unsigned long long t = 0;
58 |     int start = 0;
59 |     if (row_start == col_start) {
60 |       start = threadIdx.x + 1;
61 |     }
62 |     for (i = start; i < col_size; i++) {
63 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
64 |         t |= 1ULL << i;
65 |       }
66 |     }
67 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
68 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
69 |   }
70 | }
71 | 
72 | 
73 | void _nms(int boxes_num, float * boxes_dev,
74 |           unsigned long long * mask_dev, float nms_overlap_thresh) {
75 | 
76 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
77 |               DIVUP(boxes_num, threadsPerBlock));
78 |   dim3 threads(threadsPerBlock);
79 |   nms_kernel<<<blocks, threads>>>(boxes_num,
80 |                                   nms_overlap_thresh,
81 |                                   boxes_dev,
82 |                                   mask_dev);
83 | }
84 | 
85 | #ifdef __cplusplus
86 | }
87 | #endif
88 | 


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NMS_KERNEL
 2 | #define _NMS_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
10 | 
11 | void _nms(int boxes_num, float * boxes_dev,
12 |           unsigned long long * mask_dev, float nms_overlap_thresh);
13 | 
14 | #ifdef __cplusplus
15 | }
16 | #endif
17 | 
18 | #endif
19 | 
20 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms.c:
--------------------------------------------------------------------------------
 1 | #include <TH/TH.h>
 2 | #include <math.h>
 3 | 
 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) {
 5 |     // boxes has to be sorted
 6 |     THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous");
 7 |     THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous");
 8 |     THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous");
 9 |     THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous");
10 |     // Number of ROIs
11 |     long boxes_num = THFloatTensor_size(boxes, 0);
12 |     long boxes_dim = THFloatTensor_size(boxes, 1);
13 | 
14 |     long * keep_out_flat = THLongTensor_data(keep_out);
15 |     float * boxes_flat = THFloatTensor_data(boxes);
16 |     long * order_flat = THLongTensor_data(order);
17 |     float * areas_flat = THFloatTensor_data(areas);
18 | 
19 |     THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num);
20 |     THByteTensor_fill(suppressed, 0);
21 |     unsigned char * suppressed_flat =  THByteTensor_data(suppressed);
22 | 
23 |     // nominal indices
24 |     int i, j;
25 |     // sorted indices
26 |     int _i, _j;
27 |     // temp variables for box i's (the box currently under consideration)
28 |     float ix1, iy1, ix2, iy2, iarea;
29 |     // variables for computing overlap with box j (lower scoring box)
30 |     float xx1, yy1, xx2, yy2;
31 |     float w, h;
32 |     float inter, ovr;
33 | 
34 |     long num_to_keep = 0;
35 |     for (_i=0; _i < boxes_num; ++_i) {
36 |         i = order_flat[_i];
37 |         if (suppressed_flat[i] == 1) {
38 |             continue;
39 |         }
40 |         keep_out_flat[num_to_keep++] = i;
41 |         ix1 = boxes_flat[i * boxes_dim];
42 |         iy1 = boxes_flat[i * boxes_dim + 1];
43 |         ix2 = boxes_flat[i * boxes_dim + 2];
44 |         iy2 = boxes_flat[i * boxes_dim + 3];
45 |         iarea = areas_flat[i];
46 |         for (_j = _i + 1; _j < boxes_num; ++_j) {
47 |             j = order_flat[_j];
48 |             if (suppressed_flat[j] == 1) {
49 |                 continue;
50 |             }
51 |             xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]);
52 |             yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]);
53 |             xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]);
54 |             yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]);
55 |             w = fmaxf(0.0, xx2 - xx1 + 1);
56 |             h = fmaxf(0.0, yy2 - yy1 + 1);
57 |             inter = w * h;
58 |             ovr = inter / (iarea + areas_flat[j] - inter);
59 |             if (ovr >= nms_overlap_thresh) {
60 |                 suppressed_flat[j] = 1;
61 |             }
62 |         }
63 |     }
64 | 
65 |     long *num_out_flat = THLongTensor_data(num_out);
66 |     *num_out_flat = num_to_keep;
67 |     THByteTensor_free(suppressed);
68 |     return 1;
69 | }


--------------------------------------------------------------------------------
/lib/nms/src/nms.h:
--------------------------------------------------------------------------------
1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #include <THC/THC.h>
 8 | #include <TH/TH.h>
 9 | #include <math.h>
10 | #include <stdio.h>
11 | 
12 | #include "cuda/nms_kernel.h"
13 | 
14 | 
15 | extern THCState *state;
16 | 
17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) {
18 |   // boxes has to be sorted
19 |   THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous");
20 |   THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous");
21 |   // Number of ROIs
22 |   int boxes_num = THCudaTensor_size(state, boxes, 0);
23 |   int boxes_dim = THCudaTensor_size(state, boxes, 1);
24 | 
25 |   float* boxes_flat = THCudaTensor_data(state, boxes);
26 | 
27 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
28 |   THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks);
29 |   unsigned long long* mask_flat = THCudaLongTensor_data(state, mask);
30 | 
31 |   _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh);
32 | 
33 |   THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks);
34 |   THLongTensor_copyCuda(state, mask_cpu, mask);
35 |   THCudaLongTensor_free(state, mask);
36 | 
37 |   unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu);
38 | 
39 |   THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks);
40 |   unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu);
41 |   THLongTensor_fill(remv_cpu, 0);
42 | 
43 |   long * keep_flat = THLongTensor_data(keep);
44 |   long num_to_keep = 0;
45 | 
46 |   int i, j;
47 |   for (i = 0; i < boxes_num; i++) {
48 |     int nblock = i / threadsPerBlock;
49 |     int inblock = i % threadsPerBlock;
50 | 
51 |     if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) {
52 |       keep_flat[num_to_keep++] = i;
53 |       unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks;
54 |       for (j = nblock; j < col_blocks; j++) {
55 |         remv_cpu_flat[j] |= p[j];
56 |       }
57 |     }
58 |   }
59 | 
60 |   long * num_out_flat = THLongTensor_data(num_out);
61 |   * num_out_flat = num_to_keep;
62 | 
63 |   THLongTensor_free(mask_cpu);
64 |   THLongTensor_free(remv_cpu);
65 | 
66 |   return 1;
67 | }
68 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/lib/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michhar/pytorch-mask-rcnn-samples/f03054ab01f162c23bcccfc5de32a633041c5237/lib/roi_align/__init__.py


--------------------------------------------------------------------------------
/lib/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/crop_and_resize.c']
 7 | headers = ['src/crop_and_resize.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | extra_objects = []
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/crop_and_resize_gpu.c']
15 |     headers += ['src/crop_and_resize_gpu.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     extra_objects += ['src/cuda/crop_and_resize_kernel.cu.o']
18 |     with_cuda = True
19 | 
20 | extra_compile_args = ['-std=c99']
21 | 
22 | this_file = os.path.dirname(os.path.realpath(__file__))
23 | print(this_file)
24 | sources = [os.path.join(this_file, fname) for fname in sources]
25 | headers = [os.path.join(this_file, fname) for fname in headers]
26 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
27 | 
28 | ffi = create_extension(
29 |     '_ext.crop_and_resize',
30 |     headers=headers,
31 |     sources=sources,
32 |     define_macros=defines,
33 |     relative_to=__file__,
34 |     with_cuda=with_cuda,
35 |     extra_objects=extra_objects,
36 |     extra_compile_args=extra_compile_args
37 | )
38 | 
39 | if __name__ == '__main__':
40 |     ffi.build()
41 | 


--------------------------------------------------------------------------------
/lib/roi_align/crop_and_resize.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from torch.autograd import Function
 6 | 
 7 | from ._ext import crop_and_resize as _backend
 8 | 
 9 | 
10 | class CropAndResizeFunction(Function):
11 | 
12 |     def __init__(self, crop_height, crop_width, extrapolation_value=0):
13 |         self.crop_height = crop_height
14 |         self.crop_width = crop_width
15 |         self.extrapolation_value = extrapolation_value
16 | 
17 |     def forward(self, image, boxes, box_ind):
18 |         crops = torch.zeros_like(image)
19 | 
20 |         if image.is_cuda:
21 |             _backend.crop_and_resize_gpu_forward(
22 |                 image, boxes, box_ind,
23 |                 self.extrapolation_value, self.crop_height, self.crop_width, crops)
24 |         else:
25 |             _backend.crop_and_resize_forward(
26 |                 image, boxes, box_ind,
27 |                 self.extrapolation_value, self.crop_height, self.crop_width, crops)
28 | 
29 |         # save for backward
30 |         self.im_size = image.size()
31 |         self.save_for_backward(boxes, box_ind)
32 | 
33 |         return crops
34 | 
35 |     def backward(self, grad_outputs):
36 |         boxes, box_ind = self.saved_tensors
37 | 
38 |         grad_outputs = grad_outputs.contiguous()
39 |         grad_image = torch.zeros_like(grad_outputs).resize_(*self.im_size)
40 | 
41 |         if grad_outputs.is_cuda:
42 |             _backend.crop_and_resize_gpu_backward(
43 |                 grad_outputs, boxes, box_ind, grad_image
44 |             )
45 |         else:
46 |             _backend.crop_and_resize_backward(
47 |                 grad_outputs, boxes, box_ind, grad_image
48 |             )
49 | 
50 |         return grad_image, None, None
51 | 
52 | 
53 | class CropAndResize(nn.Module):
54 |     """
55 |     Crop and resize ported from tensorflow
56 |     See more details on https://www.tensorflow.org/api_docs/python/tf/image/crop_and_resize
57 |     """
58 | 
59 |     def __init__(self, crop_height, crop_width, extrapolation_value=0):
60 |         super(CropAndResize, self).__init__()
61 | 
62 |         self.crop_height = crop_height
63 |         self.crop_width = crop_width
64 |         self.extrapolation_value = extrapolation_value
65 | 
66 |     def forward(self, image, boxes, box_ind):
67 |         return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(image, boxes, box_ind)
68 | 


--------------------------------------------------------------------------------
/lib/roi_align/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | from .crop_and_resize import CropAndResizeFunction, CropAndResize
 5 | 
 6 | 
 7 | class RoIAlign(nn.Module):
 8 | 
 9 |     def __init__(self, crop_height, crop_width, extrapolation_value=0, transform_fpcoor=True):
10 |         super(RoIAlign, self).__init__()
11 | 
12 |         self.crop_height = crop_height
13 |         self.crop_width = crop_width
14 |         self.extrapolation_value = extrapolation_value
15 |         self.transform_fpcoor = transform_fpcoor
16 | 
17 |     def forward(self, featuremap, boxes, box_ind):
18 |         """
19 |         RoIAlign based on crop_and_resize.
20 |         See more details on https://github.com/ppwwyyxx/tensorpack/blob/6d5ba6a970710eaaa14b89d24aace179eb8ee1af/examples/FasterRCNN/model.py#L301
21 |         :param featuremap: NxCxHxW
22 |         :param boxes: Mx4 float box with (x1, y1, x2, y2) **without normalization**
23 |         :param box_ind: M
24 |         :return: MxCxoHxoW
25 |         """
26 |         x1, y1, x2, y2 = torch.split(boxes, 1, dim=1)
27 |         image_height, image_width = featuremap.size()[2:4]
28 | 
29 |         if self.transform_fpcoor:
30 |             spacing_w = (x2 - x1) / float(self.crop_width)
31 |             spacing_h = (y2 - y1) / float(self.crop_height)
32 | 
33 |             nx0 = (x1 + spacing_w / 2 - 0.5) / float(image_width - 1)
34 |             ny0 = (y1 + spacing_h / 2 - 0.5) / float(image_height - 1)
35 |             nw = spacing_w * float(self.crop_width - 1) / float(image_width - 1)
36 |             nh = spacing_h * float(self.crop_height - 1) / float(image_height - 1)
37 | 
38 |             boxes = torch.cat((ny0, nx0, ny0 + nh, nx0 + nw), 1)
39 |         else:
40 |             x1 = x1 / float(image_width - 1)
41 |             x2 = x2 / float(image_width - 1)
42 |             y1 = y1 / float(image_height - 1)
43 |             y2 = y2 / float(image_height - 1)
44 |             boxes = torch.cat((y1, x1, y2, x2), 1)
45 | 
46 |         boxes = boxes.detach().contiguous()
47 |         box_ind = box_ind.detach()
48 |         return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(featuremap, boxes, box_ind)
49 | 


--------------------------------------------------------------------------------
/lib/roi_align/src/crop_and_resize.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <stdio.h>
  3 | #include <math.h>
  4 | 
  5 | 
  6 | void CropAndResizePerBox(
  7 |     const float * image_data, 
  8 |     const int batch_size,
  9 |     const int depth,
 10 |     const int image_height,
 11 |     const int image_width,
 12 | 
 13 |     const float * boxes_data, 
 14 |     const int * box_index_data,
 15 |     const int start_box, 
 16 |     const int limit_box,
 17 | 
 18 |     float * corps_data,
 19 |     const int crop_height,
 20 |     const int crop_width,
 21 |     const float extrapolation_value
 22 | ) {
 23 |     const int image_channel_elements = image_height * image_width;
 24 |     const int image_elements = depth * image_channel_elements;
 25 | 
 26 |     const int channel_elements = crop_height * crop_width;
 27 |     const int crop_elements = depth * channel_elements;
 28 | 
 29 |     int b;
 30 |     #pragma omp parallel for
 31 |     for (b = start_box; b < limit_box; ++b) {
 32 |         const float * box = boxes_data + b * 4;
 33 |         const float y1 = box[0];
 34 |         const float x1 = box[1];
 35 |         const float y2 = box[2];
 36 |         const float x2 = box[3];
 37 | 
 38 |         const int b_in = box_index_data[b];
 39 |         if (b_in < 0 || b_in >= batch_size) {
 40 |             printf("Error: batch_index %d out of range [0, %d)\n", b_in, batch_size);
 41 |             exit(-1);
 42 |         }
 43 | 
 44 |         const float height_scale =
 45 |             (crop_height > 1)
 46 |                 ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
 47 |                 : 0;
 48 |         const float width_scale =
 49 |             (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1)
 50 |                              : 0;
 51 | 
 52 |         for (int y = 0; y < crop_height; ++y)
 53 |         {
 54 |             const float in_y = (crop_height > 1)
 55 |                                    ? y1 * (image_height - 1) + y * height_scale
 56 |                                    : 0.5 * (y1 + y2) * (image_height - 1);
 57 | 
 58 |             if (in_y < 0 || in_y > image_height - 1)
 59 |             {
 60 |                 for (int x = 0; x < crop_width; ++x)
 61 |                 {
 62 |                     for (int d = 0; d < depth; ++d)
 63 |                     {
 64 |                         // crops(b, y, x, d) = extrapolation_value;
 65 |                         corps_data[crop_elements * b + channel_elements * d + y * crop_width + x] = extrapolation_value;
 66 |                     }
 67 |                 }
 68 |                 continue;
 69 |             }
 70 |             
 71 |             const int top_y_index = floorf(in_y);
 72 |             const int bottom_y_index = ceilf(in_y);
 73 |             const float y_lerp = in_y - top_y_index;
 74 | 
 75 |             for (int x = 0; x < crop_width; ++x)
 76 |             {
 77 |                 const float in_x = (crop_width > 1)
 78 |                                        ? x1 * (image_width - 1) + x * width_scale
 79 |                                        : 0.5 * (x1 + x2) * (image_width - 1);
 80 |                 if (in_x < 0 || in_x > image_width - 1)
 81 |                 {
 82 |                     for (int d = 0; d < depth; ++d)
 83 |                     {
 84 |                         corps_data[crop_elements * b + channel_elements * d + y * crop_width + x] = extrapolation_value;
 85 |                     }
 86 |                     continue;
 87 |                 }
 88 |             
 89 |                 const int left_x_index = floorf(in_x);
 90 |                 const int right_x_index = ceilf(in_x);
 91 |                 const float x_lerp = in_x - left_x_index;
 92 | 
 93 |                 for (int d = 0; d < depth; ++d)
 94 |                 {   
 95 |                     const float *pimage = image_data + b_in * image_elements + d * image_channel_elements;
 96 | 
 97 |                     const float top_left = pimage[top_y_index * image_width + left_x_index];
 98 |                     const float top_right = pimage[top_y_index * image_width + right_x_index];
 99 |                     const float bottom_left = pimage[bottom_y_index * image_width + left_x_index];
100 |                     const float bottom_right = pimage[bottom_y_index * image_width + right_x_index];
101 |                     
102 |                     const float top = top_left + (top_right - top_left) * x_lerp;
103 |                     const float bottom =
104 |                         bottom_left + (bottom_right - bottom_left) * x_lerp;
105 |                         
106 |                     corps_data[crop_elements * b + channel_elements * d + y * crop_width + x] = top + (bottom - top) * y_lerp;
107 |                 }
108 |             }   // end for x
109 |         }   // end for y
110 |     }   // end for b
111 | 
112 | }
113 | 
114 | 
115 | void crop_and_resize_forward(
116 |     THFloatTensor * image,
117 |     THFloatTensor * boxes,      // [y1, x1, y2, x2]
118 |     THIntTensor * box_index,    // range in [0, batch_size)
119 |     const float extrapolation_value,
120 |     const int crop_height,
121 |     const int crop_width,
122 |     THFloatTensor * crops
123 | ) {
124 |     //const int batch_size = image->size[0];
125 |     //const int depth = image->size[1];
126 |     //const int image_height = image->size[2];
127 |     //const int image_width = image->size[3];
128 | 
129 |     //const int num_boxes = boxes->size[0];
130 | 
131 |     const int batch_size = THFloatTensor_size(image, 0);
132 |     const int depth = THFloatTensor_size(image, 1);
133 |     const int image_height = THFloatTensor_size(image, 2);
134 |     const int image_width = THFloatTensor_size(image, 3);
135 | 
136 |     const int num_boxes = THFloatTensor_size(boxes, 0);
137 | 
138 |     // init output space
139 |     THFloatTensor_resize4d(crops, num_boxes, depth, crop_height, crop_width);
140 |     THFloatTensor_zero(crops);
141 | 
142 |     // crop_and_resize for each box
143 |     CropAndResizePerBox(
144 |         THFloatTensor_data(image),
145 |         batch_size,
146 |         depth,
147 |         image_height,
148 |         image_width,
149 | 
150 |         THFloatTensor_data(boxes),
151 |         THIntTensor_data(box_index),
152 |         0,
153 |         num_boxes,
154 | 
155 |         THFloatTensor_data(crops),
156 |         crop_height,
157 |         crop_width,
158 |         extrapolation_value
159 |     );
160 | 
161 | }
162 | 
163 | 
164 | void crop_and_resize_backward(
165 |     THFloatTensor * grads,
166 |     THFloatTensor * boxes,      // [y1, x1, y2, x2]
167 |     THIntTensor * box_index,    // range in [0, batch_size)
168 |     THFloatTensor * grads_image // resize to [bsize, c, hc, wc]
169 | )
170 | {   
171 |     // shape
172 |     //const int batch_size = grads_image->size[0];
173 |     //const int depth = grads_image->size[1];
174 |     //const int image_height = grads_image->size[2];
175 |     //const int image_width = grads_image->size[3];
176 | 
177 |     //const int num_boxes = grads->size[0];
178 |     //const int crop_height = grads->size[2];
179 |     //const int crop_width = grads->size[3];
180 | 
181 |     const int batch_size = THFloatTensor_size(grads_image, 0);
182 |     const int depth = THFloatTensor_size(grads_image, 1);
183 |     const int image_height = THFloatTensor_size(grads_image, 2);
184 |     const int image_width = THFloatTensor_size(grads_image, 3);
185 | 
186 |     const int num_boxes = THFloatTensor_size(grads, 0);
187 |     const int crop_height = THFloatTensor_size(grads,2);
188 |     const int crop_width = THFloatTensor_size(grads,3);
189 | 
190 | 
191 |     // n_elements
192 |     const int image_channel_elements = image_height * image_width;
193 |     const int image_elements = depth * image_channel_elements;
194 | 
195 |     const int channel_elements = crop_height * crop_width;
196 |     const int crop_elements = depth * channel_elements;
197 | 
198 |     // init output space
199 |     THFloatTensor_zero(grads_image);
200 | 
201 |     // data pointer
202 |     const float * grads_data = THFloatTensor_data(grads);
203 |     const float * boxes_data = THFloatTensor_data(boxes);
204 |     const int * box_index_data = THIntTensor_data(box_index);
205 |     float * grads_image_data = THFloatTensor_data(grads_image);
206 | 
207 |     for (int b = 0; b < num_boxes; ++b) {
208 |         const float * box = boxes_data + b * 4;
209 |         const float y1 = box[0];
210 |         const float x1 = box[1];
211 |         const float y2 = box[2];
212 |         const float x2 = box[3];
213 | 
214 |         const int b_in = box_index_data[b];
215 |         if (b_in < 0 || b_in >= batch_size) {
216 |             printf("Error: batch_index %d out of range [0, %d)\n", b_in, batch_size);
217 |             exit(-1);
218 |         }
219 | 
220 |         const float height_scale =
221 |             (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
222 |                               : 0;
223 |         const float width_scale =
224 |             (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1)
225 |                              : 0;
226 | 
227 |         for (int y = 0; y < crop_height; ++y)
228 |         {
229 |             const float in_y = (crop_height > 1)
230 |                                    ? y1 * (image_height - 1) + y * height_scale
231 |                                    : 0.5 * (y1 + y2) * (image_height - 1);
232 |             if (in_y < 0 || in_y > image_height - 1)
233 |             {
234 |                 continue;
235 |             }
236 |             const int top_y_index = floorf(in_y);
237 |             const int bottom_y_index = ceilf(in_y);
238 |             const float y_lerp = in_y - top_y_index;
239 | 
240 |             for (int x = 0; x < crop_width; ++x)
241 |             {
242 |                 const float in_x = (crop_width > 1)
243 |                                        ? x1 * (image_width - 1) + x * width_scale
244 |                                        : 0.5 * (x1 + x2) * (image_width - 1);
245 |                 if (in_x < 0 || in_x > image_width - 1)
246 |                 {
247 |                     continue;
248 |                 }
249 |                 const int left_x_index = floorf(in_x);
250 |                 const int right_x_index = ceilf(in_x);
251 |                 const float x_lerp = in_x - left_x_index;
252 | 
253 |                 for (int d = 0; d < depth; ++d)
254 |                 {   
255 |                     float *pimage = grads_image_data + b_in * image_elements + d * image_channel_elements;
256 |                     const float grad_val = grads_data[crop_elements * b + channel_elements * d + y * crop_width + x];
257 | 
258 |                     const float dtop = (1 - y_lerp) * grad_val;
259 |                     pimage[top_y_index * image_width + left_x_index] += (1 - x_lerp) * dtop;
260 |                     pimage[top_y_index * image_width + right_x_index] += x_lerp * dtop;
261 | 
262 |                     const float dbottom = y_lerp * grad_val;
263 |                     pimage[bottom_y_index * image_width + left_x_index] += (1 - x_lerp) * dbottom;
264 |                     pimage[bottom_y_index * image_width + right_x_index] += x_lerp * dbottom;
265 |                 }   // end d
266 |             }   // end x
267 |         }   // end y
268 |     }   // end b
269 | }


--------------------------------------------------------------------------------
/lib/roi_align/src/crop_and_resize.h:
--------------------------------------------------------------------------------
 1 | void crop_and_resize_forward(
 2 |     THFloatTensor * image,
 3 |     THFloatTensor * boxes,      // [y1, x1, y2, x2]
 4 |     THIntTensor * box_index,    // range in [0, batch_size)
 5 |     const float extrapolation_value,
 6 |     const int crop_height,
 7 |     const int crop_width,
 8 |     THFloatTensor * crops
 9 | );
10 | 
11 | void crop_and_resize_backward(
12 |     THFloatTensor * grads,
13 |     THFloatTensor * boxes,      // [y1, x1, y2, x2]
14 |     THIntTensor * box_index,    // range in [0, batch_size)
15 |     THFloatTensor * grads_image // resize to [bsize, c, hc, wc]
16 | );


--------------------------------------------------------------------------------
/lib/roi_align/src/crop_and_resize_gpu.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include "cuda/crop_and_resize_kernel.h"
 3 | 
 4 | extern THCState *state;
 5 | 
 6 | 
 7 | void crop_and_resize_gpu_forward(
 8 |     THCudaTensor * image,
 9 |     THCudaTensor * boxes,           // [y1, x1, y2, x2]
10 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
11 |     const float extrapolation_value,
12 |     const int crop_height,
13 |     const int crop_width,
14 |     THCudaTensor * crops
15 | ) {
16 |     const int batch_size = THCudaTensor_size(state, image, 0);
17 |     const int depth = THCudaTensor_size(state, image, 1);
18 |     const int image_height = THCudaTensor_size(state, image, 2);
19 |     const int image_width = THCudaTensor_size(state, image, 3);
20 | 
21 |     const int num_boxes = THCudaTensor_size(state, boxes, 0);
22 | 
23 |     // init output space
24 |     THCudaTensor_resize4d(state, crops, num_boxes, depth, crop_height, crop_width);
25 |     THCudaTensor_zero(state, crops);
26 | 
27 |     cudaStream_t stream = THCState_getCurrentStream(state);
28 |     CropAndResizeLaucher(
29 |         THCudaTensor_data(state, image),
30 |         THCudaTensor_data(state, boxes),
31 |         THCudaIntTensor_data(state, box_index),
32 |         num_boxes, batch_size, image_height, image_width,
33 |         crop_height, crop_width, depth, extrapolation_value,
34 |         THCudaTensor_data(state, crops),
35 |         stream
36 |     );
37 | }
38 | 
39 | 
40 | void crop_and_resize_gpu_backward(
41 |     THCudaTensor * grads,
42 |     THCudaTensor * boxes,      // [y1, x1, y2, x2]
43 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
44 |     THCudaTensor * grads_image // resize to [bsize, c, hc, wc]
45 | ) {
46 |     // shape
47 |     const int batch_size = THCudaTensor_size(state, grads_image, 0);
48 |     const int depth = THCudaTensor_size(state, grads_image, 1);
49 |     const int image_height = THCudaTensor_size(state, grads_image, 2);
50 |     const int image_width = THCudaTensor_size(state, grads_image, 3);
51 | 
52 |     const int num_boxes = THCudaTensor_size(state, grads, 0);
53 |     const int crop_height = THCudaTensor_size(state, grads, 2);
54 |     const int crop_width = THCudaTensor_size(state, grads, 3);
55 | 
56 |     // init output space
57 |     THCudaTensor_zero(state, grads_image);
58 | 
59 |     cudaStream_t stream = THCState_getCurrentStream(state);
60 |     CropAndResizeBackpropImageLaucher(
61 |         THCudaTensor_data(state, grads),
62 |         THCudaTensor_data(state, boxes),
63 |         THCudaIntTensor_data(state, box_index),
64 |         num_boxes, batch_size, image_height, image_width,
65 |         crop_height, crop_width, depth,
66 |         THCudaTensor_data(state, grads_image),
67 |         stream
68 |     );
69 | }


--------------------------------------------------------------------------------
/lib/roi_align/src/crop_and_resize_gpu.h:
--------------------------------------------------------------------------------
 1 | void crop_and_resize_gpu_forward(
 2 |     THCudaTensor * image,
 3 |     THCudaTensor * boxes,           // [y1, x1, y2, x2]
 4 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
 5 |     const float extrapolation_value,
 6 |     const int crop_height,
 7 |     const int crop_width,
 8 |     THCudaTensor * crops
 9 | );
10 | 
11 | void crop_and_resize_gpu_backward(
12 |     THCudaTensor * grads,
13 |     THCudaTensor * boxes,      // [y1, x1, y2, x2]
14 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
15 |     THCudaTensor * grads_image // resize to [bsize, c, hc, wc]
16 | );


--------------------------------------------------------------------------------
/lib/roi_align/src/cuda/crop_and_resize_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdio.h>
  3 | #include "crop_and_resize_kernel.h"
  4 | 
  5 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
  6 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
  7 |      i += blockDim.x * gridDim.x)
  8 | 
  9 | 
 10 | __global__
 11 | void CropAndResizeKernel(
 12 |     const int nthreads, const float *image_ptr, const float *boxes_ptr,
 13 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
 14 |     int image_width, int crop_height, int crop_width, int depth,
 15 |     float extrapolation_value, float *crops_ptr)
 16 | {
 17 |     CUDA_1D_KERNEL_LOOP(out_idx, nthreads)
 18 |     {
 19 |         // NHWC: out_idx = d + depth * (w + crop_width * (h + crop_height * b))
 20 |         // NCHW: out_idx = w + crop_width * (h + crop_height * (d + depth * b))
 21 |         int idx = out_idx;
 22 |         const int x = idx % crop_width;
 23 |         idx /= crop_width;
 24 |         const int y = idx % crop_height;
 25 |         idx /= crop_height;
 26 |         const int d = idx % depth;
 27 |         const int b = idx / depth;
 28 | 
 29 |         const float y1 = boxes_ptr[b * 4];
 30 |         const float x1 = boxes_ptr[b * 4 + 1];
 31 |         const float y2 = boxes_ptr[b * 4 + 2];
 32 |         const float x2 = boxes_ptr[b * 4 + 3];
 33 | 
 34 |         const int b_in = box_ind_ptr[b];
 35 |         if (b_in < 0 || b_in >= batch)
 36 |         {
 37 |             continue;
 38 |         }
 39 | 
 40 |         const float height_scale =
 41 |             (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
 42 |                                 : 0;
 43 |         const float width_scale =
 44 |             (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0;
 45 | 
 46 |         const float in_y = (crop_height > 1)
 47 |                                 ? y1 * (image_height - 1) + y * height_scale
 48 |                                 : 0.5 * (y1 + y2) * (image_height - 1);
 49 |         if (in_y < 0 || in_y > image_height - 1)
 50 |         {
 51 |             crops_ptr[out_idx] = extrapolation_value;
 52 |             continue;
 53 |         }
 54 | 
 55 |         const float in_x = (crop_width > 1)
 56 |                                 ? x1 * (image_width - 1) + x * width_scale
 57 |                                 : 0.5 * (x1 + x2) * (image_width - 1);
 58 |         if (in_x < 0 || in_x > image_width - 1)
 59 |         {
 60 |             crops_ptr[out_idx] = extrapolation_value;
 61 |             continue;
 62 |         }
 63 | 
 64 |         const int top_y_index = floorf(in_y);
 65 |         const int bottom_y_index = ceilf(in_y);
 66 |         const float y_lerp = in_y - top_y_index;
 67 | 
 68 |         const int left_x_index = floorf(in_x);
 69 |         const int right_x_index = ceilf(in_x);
 70 |         const float x_lerp = in_x - left_x_index;
 71 | 
 72 |         const float *pimage = image_ptr + (b_in * depth + d) * image_height * image_width;
 73 |         const float top_left = pimage[top_y_index * image_width + left_x_index];
 74 |         const float top_right = pimage[top_y_index * image_width + right_x_index];
 75 |         const float bottom_left = pimage[bottom_y_index * image_width + left_x_index];
 76 |         const float bottom_right = pimage[bottom_y_index * image_width + right_x_index];
 77 | 
 78 |         const float top = top_left + (top_right - top_left) * x_lerp;
 79 |         const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
 80 |         crops_ptr[out_idx] = top + (bottom - top) * y_lerp;
 81 |     }
 82 | }
 83 | 
 84 | __global__
 85 | void CropAndResizeBackpropImageKernel(
 86 |     const int nthreads, const float *grads_ptr, const float *boxes_ptr,
 87 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
 88 |     int image_width, int crop_height, int crop_width, int depth,
 89 |     float *grads_image_ptr)
 90 | {
 91 |     CUDA_1D_KERNEL_LOOP(out_idx, nthreads)
 92 |     {
 93 |         // NHWC: out_idx = d + depth * (w + crop_width * (h + crop_height * b))
 94 |         // NCHW: out_idx = w + crop_width * (h + crop_height * (d + depth * b))
 95 |         int idx = out_idx;
 96 |         const int x = idx % crop_width;
 97 |         idx /= crop_width;
 98 |         const int y = idx % crop_height;
 99 |         idx /= crop_height;
100 |         const int d = idx % depth;
101 |         const int b = idx / depth;
102 | 
103 |         const float y1 = boxes_ptr[b * 4];
104 |         const float x1 = boxes_ptr[b * 4 + 1];
105 |         const float y2 = boxes_ptr[b * 4 + 2];
106 |         const float x2 = boxes_ptr[b * 4 + 3];
107 | 
108 |         const int b_in = box_ind_ptr[b];
109 |         if (b_in < 0 || b_in >= batch)
110 |         {
111 |             continue;
112 |         }
113 | 
114 |         const float height_scale =
115 |             (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
116 |                                 : 0;
117 |         const float width_scale =
118 |             (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0;
119 | 
120 |         const float in_y = (crop_height > 1)
121 |                                 ? y1 * (image_height - 1) + y * height_scale
122 |                                 : 0.5 * (y1 + y2) * (image_height - 1);
123 |         if (in_y < 0 || in_y > image_height - 1)
124 |         {
125 |             continue;
126 |         }
127 | 
128 |         const float in_x = (crop_width > 1)
129 |                                 ? x1 * (image_width - 1) + x * width_scale
130 |                                 : 0.5 * (x1 + x2) * (image_width - 1);
131 |         if (in_x < 0 || in_x > image_width - 1)
132 |         {
133 |             continue;
134 |         }
135 | 
136 |         const int top_y_index = floorf(in_y);
137 |         const int bottom_y_index = ceilf(in_y);
138 |         const float y_lerp = in_y - top_y_index;
139 | 
140 |         const int left_x_index = floorf(in_x);
141 |         const int right_x_index = ceilf(in_x);
142 |         const float x_lerp = in_x - left_x_index;
143 | 
144 |         float *pimage = grads_image_ptr + (b_in * depth + d) * image_height * image_width;
145 |         const float dtop = (1 - y_lerp) * grads_ptr[out_idx];
146 |         atomicAdd(
147 |             pimage + top_y_index * image_width + left_x_index, 
148 |             (1 - x_lerp) * dtop
149 |         );
150 |         atomicAdd(
151 |             pimage + top_y_index * image_width + right_x_index, 
152 |             x_lerp * dtop
153 |         );
154 | 
155 |         const float dbottom = y_lerp * grads_ptr[out_idx];
156 |         atomicAdd(
157 |             pimage + bottom_y_index * image_width + left_x_index, 
158 |             (1 - x_lerp) * dbottom
159 |         );
160 |         atomicAdd(
161 |             pimage + bottom_y_index * image_width + right_x_index, 
162 |             x_lerp * dbottom
163 |         );
164 |     }
165 | }
166 | 
167 | 
168 | void CropAndResizeLaucher(
169 |     const float *image_ptr, const float *boxes_ptr,
170 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
171 |     int image_width, int crop_height, int crop_width, int depth,
172 |     float extrapolation_value, float *crops_ptr, cudaStream_t stream)
173 | {   
174 |     const int total_count = num_boxes * crop_height * crop_width * depth;
175 |     const int thread_per_block = 1024;
176 |     const int block_count = (total_count + thread_per_block - 1) / thread_per_block;
177 |     cudaError_t err;
178 | 
179 |     if (total_count > 0)
180 |     {
181 |         CropAndResizeKernel<<<block_count, thread_per_block, 0, stream>>>(
182 |             total_count, image_ptr, boxes_ptr,
183 |             box_ind_ptr, num_boxes, batch, image_height, image_width,
184 |             crop_height, crop_width, depth, extrapolation_value, crops_ptr);
185 | 
186 |         err = cudaGetLastError();
187 |         if (cudaSuccess != err)
188 |         {
189 |             fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err));
190 |             exit(-1);
191 |         }
192 |     }
193 | }
194 | 
195 | 
196 | void CropAndResizeBackpropImageLaucher(
197 |     const float *grads_ptr, const float *boxes_ptr,
198 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
199 |     int image_width, int crop_height, int crop_width, int depth,
200 |     float *grads_image_ptr, cudaStream_t stream)
201 | {   
202 |     const int total_count = num_boxes * crop_height * crop_width * depth;
203 |     const int thread_per_block = 1024;
204 |     const int block_count = (total_count + thread_per_block - 1) / thread_per_block;
205 |     cudaError_t err;
206 | 
207 |     if (total_count > 0)
208 |     {
209 |         CropAndResizeBackpropImageKernel<<<block_count, thread_per_block, 0, stream>>>(
210 |             total_count, grads_ptr, boxes_ptr,
211 |             box_ind_ptr, num_boxes, batch, image_height, image_width,
212 |             crop_height, crop_width, depth, grads_image_ptr);
213 | 
214 |         err = cudaGetLastError();
215 |         if (cudaSuccess != err)
216 |         {
217 |             fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err));
218 |             exit(-1);
219 |         }
220 |     }
221 | }


--------------------------------------------------------------------------------
/lib/roi_align/src/cuda/crop_and_resize_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CropAndResize_Kernel
 2 | #define _CropAndResize_Kernel
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void CropAndResizeLaucher(
 9 |     const float *image_ptr, const float *boxes_ptr,
10 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
11 |     int image_width, int crop_height, int crop_width, int depth,
12 |     float extrapolation_value, float *crops_ptr, cudaStream_t stream);
13 | 
14 | void CropAndResizeBackpropImageLaucher(
15 |     const float *grads_ptr, const float *boxes_ptr,
16 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
17 |     int image_width, int crop_height, int crop_width, int depth,
18 |     float *grads_image_ptr, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | h5py==2.8.0
 2 | torchvision==0.2.1
 3 | cffi==1.11.5
 4 | scikit-image==0.14.0
 5 | Cython==0.28.5
 6 | docopt==0.6.2
 7 | clint==0.5.1
 8 | crontab==0.22.2
 9 | tablib==0.12.1
10 | typing==3.6.6
11 | pyyaml
12 | mkl==2019.0
13 | mkl-include==2019.0
14 | setuptools
15 | cmake==3.12.0
16 | azure
17 | setuptools


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Common utility functions and classes.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import sys
 11 | import os
 12 | import math
 13 | import random
 14 | import numpy as np
 15 | import scipy.misc
 16 | import scipy.ndimage
 17 | import skimage.color
 18 | import skimage.io
 19 | import torch
 20 | import urllib.request
 21 | import shutil
 22 | import warnings
 23 | 
 24 | # URL from which to download the latest COCO trained weights
 25 | COCO_MODEL_URL = "https://drive.google.com/file/d/1VV6WgX_RNl6a9Yi9-Pe7ZyVKHRJZSKkm/view?usp=sharing"
 26 | 
 27 | ############################################################
 28 | #  Bounding Boxes
 29 | ############################################################
 30 | 
 31 | def extract_bboxes(mask):
 32 |     """Compute bounding boxes from masks.
 33 |     mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
 34 | 
 35 |     Returns: bbox array [num_instances, (y1, x1, y2, x2)].
 36 |     """
 37 |     boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
 38 |     for i in range(mask.shape[-1]):
 39 |         m = mask[:, :, i]
 40 |         # Bounding box.
 41 |         horizontal_indicies = np.where(np.any(m, axis=0))[0]
 42 |         vertical_indicies = np.where(np.any(m, axis=1))[0]
 43 |         if horizontal_indicies.shape[0]:
 44 |             x1, x2 = horizontal_indicies[[0, -1]]
 45 |             y1, y2 = vertical_indicies[[0, -1]]
 46 |             # x2 and y2 should not be part of the box. Increment by 1.
 47 |             x2 += 1
 48 |             y2 += 1
 49 |         else:
 50 |             # No mask for this instance. Might happen due to
 51 |             # resizing or cropping. Set bbox to zeros
 52 |             x1, x2, y1, y2 = 0, 0, 0, 0
 53 |         boxes[i] = np.array([y1, x1, y2, x2])
 54 |     return boxes.astype(np.int32)
 55 | 
 56 | 
 57 | def compute_iou(box, boxes, box_area, boxes_area):
 58 |     """Calculates IoU of the given box with the array of the given boxes.
 59 |     box: 1D vector [y1, x1, y2, x2]
 60 |     boxes: [boxes_count, (y1, x1, y2, x2)]
 61 |     box_area: float. the area of 'box'
 62 |     boxes_area: array of length boxes_count.
 63 | 
 64 |     Note: the areas are passed in rather than calculated here for
 65 |           efficency. Calculate once in the caller to avoid duplicate work.
 66 |     """
 67 |     # Calculate intersection areas
 68 |     y1 = np.maximum(box[0], boxes[:, 0])
 69 |     y2 = np.minimum(box[2], boxes[:, 2])
 70 |     x1 = np.maximum(box[1], boxes[:, 1])
 71 |     x2 = np.minimum(box[3], boxes[:, 3])
 72 |     intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
 73 |     union = box_area + boxes_area[:] - intersection[:]
 74 |     iou = intersection / union
 75 |     return iou
 76 | 
 77 | def compute_ap(gt_boxes, gt_class_ids, gt_masks,
 78 |                pred_boxes, pred_class_ids, pred_scores, pred_masks,
 79 |                iou_threshold=0.5):
 80 |     """Compute Average Precision at a set IoU threshold (default 0.5).
 81 |     Returns:
 82 |     mAP: Mean Average Precision
 83 |     precisions: List of precisions at different class score thresholds.
 84 |     recalls: List of recall values at different class score thresholds.
 85 |     overlaps: [pred_boxes, gt_boxes] IoU overlaps.
 86 |     """
 87 |     # Get matches and overlaps
 88 |     gt_match, pred_match, overlaps = compute_matches(
 89 |         gt_boxes, gt_class_ids, gt_masks,
 90 |         pred_boxes, pred_class_ids, pred_scores, pred_masks,
 91 |         iou_threshold)
 92 | 
 93 |     # Compute precision and recall at each prediction box step
 94 |     precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1)
 95 |     recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match)
 96 | 
 97 |     # Pad with start and end values to simplify the math
 98 |     precisions = np.concatenate([[0], precisions, [0]])
 99 |     recalls = np.concatenate([[0], recalls, [1]])
100 | 
101 |     # Ensure precision values decrease but don't increase. This way, the
102 |     # precision value at each recall threshold is the maximum it can be
103 |     # for all following recall thresholds, as specified by the VOC paper.
104 |     for i in range(len(precisions) - 2, -1, -1):
105 |         precisions[i] = np.maximum(precisions[i], precisions[i + 1])
106 | 
107 |     # Compute mean AP over recall range
108 |     indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
109 |     mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
110 |                  precisions[indices])
111 | 
112 |     return mAP, precisions, recalls, overlaps
113 | 
114 | ############################################################
115 | #  Miscellaneous
116 | ############################################################
117 | 
118 | def trim_zeros(x):
119 |     """It's common to have tensors larger than the available data and
120 |     pad with zeros. This function removes rows that are all zeros.
121 |     x: [rows, columns].
122 |     """
123 |     assert len(x.shape) == 2
124 |     return x[~np.all(x == 0, axis=1)]
125 | 
126 | 
127 | def compute_matches(gt_boxes, gt_class_ids, gt_masks,
128 |                     pred_boxes, pred_class_ids, pred_scores, pred_masks,
129 |                     iou_threshold=0.5, score_threshold=0.0):
130 |     """Finds matches between prediction and ground truth instances.
131 |     Returns:
132 |         gt_match: 1-D array. For each GT box it has the index of the matched
133 |                   predicted box.
134 |         pred_match: 1-D array. For each predicted box, it has the index of
135 |                     the matched ground truth box.
136 |         overlaps: [pred_boxes, gt_boxes] IoU overlaps.
137 |     """
138 |     # Trim zero padding
139 |     # TODO: cleaner to do zero unpadding upstream
140 |     gt_boxes = trim_zeros(gt_boxes)
141 |     gt_masks = gt_masks[..., :gt_boxes.shape[0]]
142 |     pred_boxes = trim_zeros(pred_boxes)
143 |     pred_scores = pred_scores[:pred_boxes.shape[0]]
144 |     # Sort predictions by score from high to low
145 |     indices = np.argsort(pred_scores)[::-1]
146 |     pred_boxes = pred_boxes[indices]
147 |     pred_class_ids = pred_class_ids[indices]
148 |     pred_scores = pred_scores[indices]
149 |     pred_masks = pred_masks[..., indices]
150 | 
151 |     # Compute IoU overlaps [pred_masks, gt_masks]
152 |     overlaps = compute_overlaps_masks(pred_masks, gt_masks)
153 | 
154 |     # Loop through predictions and find matching ground truth boxes
155 |     match_count = 0
156 |     pred_match = -1 * np.ones([pred_boxes.shape[0]])
157 |     gt_match = -1 * np.ones([gt_boxes.shape[0]])
158 |     for i in range(len(pred_boxes)):
159 |         # Find best matching ground truth box
160 |         # 1. Sort matches by score
161 |         sorted_ixs = np.argsort(overlaps[i])[::-1]
162 |         # 2. Remove low scores
163 |         low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0]
164 |         if low_score_idx.size > 0:
165 |             sorted_ixs = sorted_ixs[:low_score_idx[0]]
166 |         # 3. Find the match
167 |         for j in sorted_ixs:
168 |             # If ground truth box is already matched, go to next one
169 |             if gt_match[j] > 0:
170 |                 continue
171 |             # If we reach IoU smaller than the threshold, end the loop
172 |             iou = overlaps[i, j]
173 |             if iou < iou_threshold:
174 |                 break
175 |             # Do we have a match?
176 |             if pred_class_ids[i] == gt_class_ids[j]:
177 |                 match_count += 1
178 |                 gt_match[j] = i
179 |                 pred_match[i] = j
180 |                 break
181 | 
182 |     return gt_match, pred_match, overlaps
183 | 
184 | 
185 | def compute_ap(gt_boxes, gt_class_ids, gt_masks,
186 |                pred_boxes, pred_class_ids, pred_scores, pred_masks,
187 |                iou_threshold=0.5):
188 |     """Compute Average Precision at a set IoU threshold (default 0.5).
189 |     Returns:
190 |     mAP: Mean Average Precision
191 |     precisions: List of precisions at different class score thresholds.
192 |     recalls: List of recall values at different class score thresholds.
193 |     overlaps: [pred_boxes, gt_boxes] IoU overlaps.
194 |     """
195 |     # Get matches and overlaps
196 |     gt_match, pred_match, overlaps = compute_matches(
197 |         gt_boxes, gt_class_ids, gt_masks,
198 |         pred_boxes, pred_class_ids, pred_scores, pred_masks,
199 |         iou_threshold)
200 | 
201 |     # Compute precision and recall at each prediction box step
202 |     precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1)
203 |     recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match)
204 | 
205 |     # Pad with start and end values to simplify the math
206 |     precisions = np.concatenate([[0], precisions, [0]])
207 |     recalls = np.concatenate([[0], recalls, [1]])
208 | 
209 |     # Ensure precision values decrease but don't increase. This way, the
210 |     # precision value at each recall threshold is the maximum it can be
211 |     # for all following recall thresholds, as specified by the VOC paper.
212 |     for i in range(len(precisions) - 2, -1, -1):
213 |         precisions[i] = np.maximum(precisions[i], precisions[i + 1])
214 | 
215 |     # Compute mean AP over recall range
216 |     indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
217 |     mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
218 |                  precisions[indices])
219 | 
220 |     return mAP, precisions, recalls, overlaps
221 | 
222 | 
223 | def compute_ap_range(gt_box, gt_class_id, gt_mask,
224 |                      pred_box, pred_class_id, pred_score, pred_mask,
225 |                      iou_thresholds=None, verbose=1):
226 |     """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95."""
227 |     # Default is 0.5 to 0.95 with increments of 0.05
228 |     iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05)
229 |     
230 |     # Compute AP over range of IoU thresholds
231 |     AP = []
232 |     for iou_threshold in iou_thresholds:
233 |         ap, precisions, recalls, overlaps =\
234 |             compute_ap(gt_box, gt_class_id, gt_mask,
235 |                         pred_box, pred_class_id, pred_score, pred_mask,
236 |                         iou_threshold=iou_threshold)
237 |         if verbose:
238 |             print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap))
239 |         AP.append(ap)
240 |     AP = np.array(AP).mean()
241 |     if verbose:
242 |         print("AP @{:.2f}-{:.2f}:\t {:.3f}".format(
243 |             iou_thresholds[0], iou_thresholds[-1], AP))
244 |     return AP
245 | 
246 | 
247 | def compute_recall(pred_boxes, gt_boxes, iou):
248 |     """Compute the recall at the given IoU threshold. It's an indication
249 |     of how many GT boxes were found by the given prediction boxes.
250 |     pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
251 |     gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
252 |     """
253 |     # Measure overlaps
254 |     overlaps = compute_overlaps(pred_boxes, gt_boxes)
255 |     iou_max = np.max(overlaps, axis=1)
256 |     iou_argmax = np.argmax(overlaps, axis=1)
257 |     positive_ids = np.where(iou_max >= iou)[0]
258 |     matched_gt_boxes = iou_argmax[positive_ids]
259 | 
260 |     recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
261 |     return recall, positive_ids
262 | 
263 | 
264 | # ## Batch Slicing
265 | # Some custom layers support a batch size of 1 only, and require a lot of work
266 | # to support batches greater than 1. This function slices an input tensor
267 | # across the batch dimension and feeds batches of size 1. Effectively,
268 | # an easy way to support batches > 1 quickly with little code modification.
269 | # In the long run, it's more efficient to modify the code to support large
270 | # batches and getting rid of this function. Consider this a temporary solution
271 | def batch_slice(inputs, graph_fn, batch_size, names=None):
272 |     """Splits inputs into slices and feeds each slice to a copy of the given
273 |     computation graph and then combines the results. It allows you to run a
274 |     graph on a batch of inputs even if the graph is written to support one
275 |     instance only.
276 |     inputs: list of tensors. All must have the same first dimension length
277 |     graph_fn: A function that returns a TF tensor that's part of a graph.
278 |     batch_size: number of slices to divide the data into.
279 |     names: If provided, assigns names to the resulting tensors.
280 |     """
281 |     if not isinstance(inputs, list):
282 |         inputs = [inputs]
283 | 
284 |     outputs = []
285 |     for i in range(batch_size):
286 |         inputs_slice = [x[i] for x in inputs]
287 |         output_slice = graph_fn(*inputs_slice)
288 |         if not isinstance(output_slice, (tuple, list)):
289 |             output_slice = [output_slice]
290 |         outputs.append(output_slice)
291 |     # Change outputs from a list of slices where each is
292 |     # a list of outputs to a list of outputs and each has
293 |     # a list of slices
294 |     outputs = list(zip(*outputs))
295 | 
296 |     if names is None:
297 |         names = [None] * len(outputs)
298 | 
299 |     result = [tf.stack(o, axis=0, name=n)
300 |               for o, n in zip(outputs, names)]
301 |     if len(result) == 1:
302 |         result = result[0]
303 | 
304 |     return result
305 | 
306 | 
307 | def download_trained_weights(coco_model_path, verbose=1):
308 |     """Download COCO trained weights from Releases.
309 |     coco_model_path: local path of COCO trained weights
310 |     """
311 |     if verbose > 0:
312 |         print("Downloading pretrained model to " + coco_model_path + " ...")
313 |     with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
314 |         shutil.copyfileobj(resp, out)
315 |     if verbose > 0:
316 |         print("... done downloading pretrained model!")
317 | 
318 | 
319 | def norm_boxes(boxes, shape):
320 |     """Converts boxes from pixel coordinates to normalized coordinates.
321 |     boxes: [N, (y1, x1, y2, x2)] in pixel coordinates
322 |     shape: [..., (height, width)] in pixels
323 |     Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
324 |     coordinates it's inside the box.
325 |     Returns:
326 |         [N, (y1, x1, y2, x2)] in normalized coordinates
327 |     """
328 |     h, w = shape
329 |     scale = np.array([h - 1, w - 1, h - 1, w - 1])
330 |     shift = np.array([0, 0, 1, 1])
331 |     return np.divide((boxes - shift), scale).astype(np.float32)
332 | 
333 | 
334 | def denorm_boxes(boxes, shape):
335 |     """Converts boxes from normalized coordinates to pixel coordinates.
336 |     boxes: [N, (y1, x1, y2, x2)] in normalized coordinates
337 |     shape: [..., (height, width)] in pixels
338 |     Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
339 |     coordinates it's inside the box.
340 |     Returns:
341 |         [N, (y1, x1, y2, x2)] in pixel coordinates
342 |     """
343 |     h, w = shape
344 |     scale = np.array([h - 1, w - 1, h - 1, w - 1])
345 |     shift = np.array([0, 0, 1, 1])
346 |     return np.around(np.multiply(boxes, scale) + shift).astype(np.int32)
347 | 
348 | def compute_overlaps(boxes1, boxes2):
349 |     """Computes IoU overlaps between two sets of boxes.
350 |     boxes1, boxes2: [N, (y1, x1, y2, x2)].
351 | 
352 |     For better performance, pass the largest set first and the smaller second.
353 |     """
354 |     # Areas of anchors and GT boxes
355 |     area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
356 |     area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
357 | 
358 |     # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
359 |     # Each cell contains the IoU value.
360 |     overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
361 |     for i in range(overlaps.shape[1]):
362 |         box2 = boxes2[i]
363 |         overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
364 |     return overlaps
365 | 
366 | def compute_overlaps_masks(masks1, masks2):
367 |     '''Computes IoU overlaps between two sets of masks.
368 |     masks1, masks2: [Height, Width, instances]
369 |     '''
370 |     # flatten masks
371 |     masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
372 |     masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
373 |     area1 = np.sum(masks1, axis=0)
374 |     area2 = np.sum(masks2, axis=0)
375 | 
376 |     # intersections and union
377 |     intersections = np.dot(masks1.T, masks2)
378 |     union = area1[:, None] + area2[None, :] - intersections
379 |     overlaps = intersections / union
380 | 
381 |     return overlaps
382 | 
383 | 
384 | def non_max_suppression(boxes, scores, threshold):
385 |     """Performs non-maximum supression and returns indicies of kept boxes.
386 |     boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
387 |     scores: 1-D array of box scores.
388 |     threshold: Float. IoU threshold to use for filtering.
389 |     """
390 |     assert boxes.shape[0] > 0
391 |     if boxes.dtype.kind != "f":
392 |         boxes = boxes.astype(np.float32)
393 | 
394 |     # Compute box areas
395 |     y1 = boxes[:, 0]
396 |     x1 = boxes[:, 1]
397 |     y2 = boxes[:, 2]
398 |     x2 = boxes[:, 3]
399 |     area = (y2 - y1) * (x2 - x1)
400 | 
401 |     # Get indicies of boxes sorted by scores (highest first)
402 |     ixs = scores.argsort()[::-1]
403 | 
404 |     pick = []
405 |     while len(ixs) > 0:
406 |         # Pick top box and add its index to the list
407 |         i = ixs[0]
408 |         pick.append(i)
409 |         # Compute IoU of the picked box with the rest
410 |         iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
411 |         # Identify boxes with IoU over the threshold. This
412 |         # returns indicies into ixs[1:], so add 1 to get
413 |         # indicies into ixs.
414 |         remove_ixs = np.where(iou > threshold)[0] + 1
415 |         # Remove indicies of the picked and overlapped boxes.
416 |         ixs = np.delete(ixs, remove_ixs)
417 |         ixs = np.delete(ixs, 0)
418 |     return np.array(pick, dtype=np.int32)
419 | 
420 | 
421 | 
422 | def box_refinement(box, gt_box):
423 |     """Compute refinement needed to transform box to gt_box.
424 |     box and gt_box are [N, (y1, x1, y2, x2)]
425 |     """
426 | 
427 |     height = box[:, 2] - box[:, 0]
428 |     width = box[:, 3] - box[:, 1]
429 |     center_y = box[:, 0] + 0.5 * height
430 |     center_x = box[:, 1] + 0.5 * width
431 | 
432 |     gt_height = gt_box[:, 2] - gt_box[:, 0]
433 |     gt_width = gt_box[:, 3] - gt_box[:, 1]
434 |     gt_center_y = gt_box[:, 0] + 0.5 * gt_height
435 |     gt_center_x = gt_box[:, 1] + 0.5 * gt_width
436 | 
437 |     dy = (gt_center_y - center_y) / height
438 |     dx = (gt_center_x - center_x) / width
439 |     dh = torch.log(gt_height / height)
440 |     dw = torch.log(gt_width / width)
441 | 
442 |     result = torch.stack([dy, dx, dh, dw], dim=1)
443 |     return result
444 | 
445 | 
446 | ############################################################
447 | #  Dataset
448 | ############################################################
449 | 
450 | class Dataset(object):
451 |     """The base class for dataset classes.
452 |     To use it, create a new class that adds functions specific to the dataset
453 |     you want to use. For example:
454 | 
455 |     class CatsAndDogsDataset(Dataset):
456 |         def load_cats_and_dogs(self):
457 |             ...
458 |         def load_mask(self, image_id):
459 |             ...
460 |         def image_reference(self, image_id):
461 |             ...
462 | 
463 |     See COCODataset and ShapesDataset as examples.
464 |     """
465 | 
466 |     def __init__(self, class_map=None):
467 |         self._image_ids = []
468 |         self.image_info = []
469 |         # Background is always the first class
470 |         self.class_info = [{"source": "", "id": 0, "name": "BG"}]
471 |         self.source_class_ids = {}
472 | 
473 |     def add_class(self, source, class_id, class_name):
474 |         assert "." not in source, "Source name cannot contain a dot"
475 |         # Does the class exist already?
476 |         for info in self.class_info:
477 |             if info['source'] == source and info["id"] == class_id:
478 |                 # source.class_id combination already available, skip
479 |                 return
480 |         # Add the class
481 |         self.class_info.append({
482 |             "source": source,
483 |             "id": class_id,
484 |             "name": class_name,
485 |         })
486 | 
487 |     def add_image(self, source, image_id, path, **kwargs):
488 |         image_info = {
489 |             "id": image_id,
490 |             "source": source,
491 |             "path": path,
492 |         }
493 |         image_info.update(kwargs)
494 |         self.image_info.append(image_info)
495 | 
496 |     def image_reference(self, image_id):
497 |         """Return a link to the image in its source Website or details about
498 |         the image that help looking it up or debugging it.
499 | 
500 |         Override for your dataset, but pass to this function
501 |         if you encounter images not in your dataset.
502 |         """
503 |         return ""
504 | 
505 |     def prepare(self, class_map=None):
506 |         """Prepares the Dataset class for use.
507 | 
508 |         TODO: class map is not supported yet. When done, it should handle mapping
509 |               classes from different datasets to the same class ID.
510 |         """
511 |         def clean_name(name):
512 |             """Returns a shorter version of object names for cleaner display."""
513 |             return ",".join(name.split(",")[:1])
514 | 
515 |         # Build (or rebuild) everything else from the info dicts.
516 |         self.num_classes = len(self.class_info)
517 |         self.class_ids = np.arange(self.num_classes)
518 |         self.class_names = [clean_name(c["name"]) for c in self.class_info]
519 |         self.num_images = len(self.image_info)
520 |         self._image_ids = np.arange(self.num_images)
521 | 
522 |         self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
523 |                                       for info, id in zip(self.class_info, self.class_ids)}
524 | 
525 |         # Map sources to class_ids they support
526 |         self.sources = list(set([i['source'] for i in self.class_info]))
527 |         self.source_class_ids = {}
528 |         # Loop over datasets
529 |         for source in self.sources:
530 |             self.source_class_ids[source] = []
531 |             # Find classes that belong to this dataset
532 |             for i, info in enumerate(self.class_info):
533 |                 # Include BG class in all datasets
534 |                 if i == 0 or source == info['source']:
535 |                     self.source_class_ids[source].append(i)
536 | 
537 |     def map_source_class_id(self, source_class_id):
538 |         """Takes a source class ID and returns the int class ID assigned to it.
539 | 
540 |         For example:
541 |         dataset.map_source_class_id("coco.12") -> 23
542 |         """
543 |         return self.class_from_source_map[source_class_id]
544 | 
545 |     def get_source_class_id(self, class_id, source):
546 |         """Map an internal class ID to the corresponding class ID in the source dataset."""
547 |         info = self.class_info[class_id]
548 |         assert info['source'] == source
549 |         return info['id']
550 | 
551 |     def append_data(self, class_info, image_info):
552 |         self.external_to_class_id = {}
553 |         for i, c in enumerate(self.class_info):
554 |             for ds, id in c["map"]:
555 |                 self.external_to_class_id[ds + str(id)] = i
556 | 
557 |         # Map external image IDs to internal ones.
558 |         self.external_to_image_id = {}
559 |         for i, info in enumerate(self.image_info):
560 |             self.external_to_image_id[info["ds"] + str(info["id"])] = i
561 | 
562 |     @property
563 |     def image_ids(self):
564 |         return self._image_ids
565 | 
566 |     def source_image_link(self, image_id):
567 |         """Returns the path or URL to the image.
568 |         Override this to return a URL to the image if it's availble online for easy
569 |         debugging.
570 |         """
571 |         return self.image_info[image_id]["path"]
572 | 
573 |     def load_image(self, image_id):
574 |         """Load the specified image and return a [H,W,3] Numpy array.
575 |         """
576 |         # Load image
577 |         image = skimage.io.imread(self.image_info[image_id]['path'])
578 |         # If grayscale. Convert to RGB for consistency.
579 |         if image.ndim != 3:
580 |             image = skimage.color.gray2rgb(image)
581 |         return image
582 | 
583 |     def load_mask(self, image_id):
584 |         """Load instance masks for the given image.
585 | 
586 |         Different datasets use different ways to store masks. Override this
587 |         method to load instance masks and return them in the form of am
588 |         array of binary masks of shape [height, width, instances].
589 | 
590 |         Returns:
591 |             masks: A bool array of shape [height, width, instance count] with
592 |                 a binary mask per instance.
593 |             class_ids: a 1D array of class IDs of the instance masks.
594 |         """
595 |         # Override this function to load a mask from your dataset.
596 |         # Otherwise, it returns an empty mask.
597 |         mask = np.empty([0, 0, 0])
598 |         class_ids = np.empty([0], np.int32)
599 |         return mask, class_ids
600 | 
601 | 
602 | def resize_image(image, min_dim=None, max_dim=None, padding=False):
603 |     """
604 |     Resizes an image keeping the aspect ratio.
605 | 
606 |     min_dim: if provided, resizes the image such that it's smaller
607 |         dimension == min_dim
608 |     max_dim: if provided, ensures that the image longest side doesn't
609 |         exceed this value.
610 |     padding: If true, pads image with zeros so it's size is max_dim x max_dim
611 | 
612 |     Returns:
613 |     image: the resized image
614 |     window: (y1, x1, y2, x2). If max_dim is provided, padding might
615 |         be inserted in the returned image. If so, this window is the
616 |         coordinates of the image part of the full image (excluding
617 |         the padding). The x2, y2 pixels are not included.
618 |     scale: The scale factor used to resize the image
619 |     padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
620 |     """
621 |     # Default window (y1, x1, y2, x2) and default scale == 1.
622 |     h, w = image.shape[:2]
623 |     window = (0, 0, h, w)
624 |     scale = 1
625 | 
626 |     # Scale?
627 |     if min_dim:
628 |         # Scale up but not down
629 |         scale = max(1, min_dim / min(h, w))
630 |     # Does it exceed max dim?
631 |     if max_dim:
632 |         image_max = max(h, w)
633 |         if round(image_max * scale) > max_dim:
634 |             scale = max_dim / image_max
635 |     # Resize image and mask
636 |     if scale != 1:
637 |         image = scipy.misc.imresize(
638 |             image, (round(h * scale), round(w * scale)))
639 |     # Need padding?
640 |     if padding:
641 |         # Get new height and width
642 |         h, w = image.shape[:2]
643 |         top_pad = (max_dim - h) // 2
644 |         bottom_pad = max_dim - h - top_pad
645 |         left_pad = (max_dim - w) // 2
646 |         right_pad = max_dim - w - left_pad
647 |         padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
648 |         image = np.pad(image, padding, mode='constant', constant_values=0)
649 |         window = (top_pad, left_pad, h + top_pad, w + left_pad)
650 |     return image, window, scale, padding
651 | 
652 | 
653 | def resize_mask(mask, scale, padding):
654 |     """Resizes a mask using the given scale and padding.
655 |     Typically, you get the scale and padding from resize_image() to
656 |     ensure both, the image and the mask, are resized consistently.
657 | 
658 |     scale: mask scaling factor
659 |     padding: Padding to add to the mask in the form
660 |             [(top, bottom), (left, right), (0, 0)]
661 |     """
662 |     h, w = mask.shape[:2]
663 |     mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
664 |     mask = np.pad(mask, padding, mode='constant', constant_values=0)
665 |     return mask
666 | 
667 | 
668 | def minimize_mask(bbox, mask, mini_shape):
669 |     """Resize masks to a smaller version to cut memory load.
670 |     Mini-masks can then resized back to image scale using expand_masks()
671 | 
672 |     See inspect_data.ipynb notebook for more details.
673 |     """
674 |     mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
675 |     for i in range(mask.shape[-1]):
676 |         m = mask[:, :, i]
677 |         y1, x1, y2, x2 = bbox[i][:4]
678 |         m = m[y1:y2, x1:x2]
679 |         if m.size == 0:
680 |             raise Exception("Invalid bounding box with area of zero")
681 |         m = scipy.misc.imresize(m.astype(float), mini_shape, interp='bilinear')
682 |         mini_mask[:, :, i] = np.where(m >= 128, 1, 0)
683 |     return mini_mask
684 | 
685 | 
686 | def expand_mask(bbox, mini_mask, image_shape):
687 |     """Resizes mini masks back to image size. Reverses the change
688 |     of minimize_mask().
689 | 
690 |     See inspect_data.ipynb notebook for more details.
691 |     """
692 |     mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
693 |     for i in range(mask.shape[-1]):
694 |         m = mini_mask[:, :, i]
695 |         y1, x1, y2, x2 = bbox[i][:4]
696 |         h = y2 - y1
697 |         w = x2 - x1
698 |         m = scipy.misc.imresize(m.astype(float), (h, w), interp='bilinear')
699 |         mask[y1:y2, x1:x2, i] = np.where(m >= 128, 1, 0)
700 |     return mask
701 | 
702 | 
703 | # TODO: Build and use this function to reduce code duplication
704 | def mold_mask(mask, config):
705 |     pass
706 | 
707 | 
708 | def unmold_mask(mask, bbox, image_shape):
709 |     """Converts a mask generated by the neural network into a format similar
710 |     to it's original shape.
711 |     mask: [height, width] of type float. A small, typically 28x28 mask.
712 |     bbox: [y1, x1, y2, x2]. The box to fit the mask in.
713 | 
714 |     Returns a binary mask with the same size as the original image.
715 |     """
716 |     threshold = 0.5
717 |     y1, x1, y2, x2 = bbox
718 |     mask = scipy.misc.imresize(
719 |         mask, (y2 - y1, x2 - x1), interp='bilinear').astype(np.float32) / 255.0
720 |     mask = np.where(mask >= threshold, 1, 0).astype(np.uint8)
721 | 
722 |     # Put the mask in the right location.
723 |     full_mask = np.zeros(image_shape[:2], dtype=np.uint8)
724 |     full_mask[y1:y2, x1:x2] = mask
725 |     return full_mask
726 | 
727 | 
728 | ############################################################
729 | #  Anchors
730 | ############################################################
731 | 
732 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
733 |     """
734 |     scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
735 |     ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
736 |     shape: [height, width] spatial shape of the feature map over which
737 |             to generate anchors.
738 |     feature_stride: Stride of the feature map relative to the image in pixels.
739 |     anchor_stride: Stride of anchors on the feature map. For example, if the
740 |         value is 2 then generate anchors for every other feature map pixel.
741 |     """
742 |     # Get all combinations of scales and ratios
743 |     scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
744 |     scales = scales.flatten()
745 |     ratios = ratios.flatten()
746 | 
747 |     # Enumerate heights and widths from scales and ratios
748 |     heights = scales / np.sqrt(ratios)
749 |     widths = scales * np.sqrt(ratios)
750 | 
751 |     # Enumerate shifts in feature space
752 |     shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
753 |     shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
754 |     shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
755 | 
756 |     # Enumerate combinations of shifts, widths, and heights
757 |     box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
758 |     box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
759 | 
760 |     # Reshape to get a list of (y, x) and a list of (h, w)
761 |     box_centers = np.stack(
762 |         [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
763 |     box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
764 | 
765 |     # Convert to corner coordinates (y1, x1, y2, x2)
766 |     boxes = np.concatenate([box_centers - 0.5 * box_sizes,
767 |                             box_centers + 0.5 * box_sizes], axis=1)
768 |     return boxes
769 | 
770 | 
771 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
772 |                              anchor_stride):
773 |     """Generate anchors at different levels of a feature pyramid. Each scale
774 |     is associated with a level of the pyramid, but each ratio is used in
775 |     all levels of the pyramid.
776 | 
777 |     Returns:
778 |     anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
779 |         with the same order of the given scales. So, anchors of scale[0] come
780 |         first, then anchors of scale[1], and so on.
781 |     """
782 |     # Anchors
783 |     # [anchor_count, (y1, x1, y2, x2)]
784 |     anchors = []
785 |     for i in range(len(scales)):
786 |         anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
787 |                                         feature_strides[i], anchor_stride))
788 |     return np.concatenate(anchors, axis=0)
789 | 
790 | 
791 | def download_trained_weights(coco_model_path, verbose=1):
792 |     """Download COCO trained weights from Releases.
793 | 
794 |     coco_model_path: local path of COCO trained weights
795 |     """
796 |     if verbose > 0:
797 |         print("Downloading pretrained model to " + coco_model_path + " ...")
798 |     with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
799 |         shutil.copyfileobj(resp, out)
800 |     if verbose > 0:
801 |         print("... done downloading pretrained model!")
802 | 
803 | 
804 | 
805 | 
806 | 
807 | 


--------------------------------------------------------------------------------
/visualize.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Display and Visualization Functions.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import os
 11 | import random
 12 | import itertools
 13 | import colorsys
 14 | import numpy as np
 15 | from skimage.measure import find_contours
 16 | import matplotlib.pyplot as plt
 17 | if "DISPLAY" not in os.environ:
 18 |     plt.switch_backend('agg')
 19 | import matplotlib.patches as patches
 20 | import matplotlib.lines as lines
 21 | from matplotlib.patches import Polygon
 22 | 
 23 | import utils
 24 | 
 25 | 
 26 | ############################################################
 27 | #  Visualization
 28 | ############################################################
 29 | 
 30 | def display_images(images, titles=None, cols=4, cmap=None, norm=None,
 31 |                    interpolation=None):
 32 |     """Display the given set of images, optionally with titles.
 33 |     images: list or array of image tensors in HWC format.
 34 |     titles: optional. A list of titles to display with each image.
 35 |     cols: number of images per row
 36 |     cmap: Optional. Color map to use. For example, "Blues".
 37 |     norm: Optional. A Normalize instance to map values to colors.
 38 |     interpolation: Optional. Image interporlation to use for display.
 39 |     """
 40 |     titles = titles if titles is not None else [""] * len(images)
 41 |     rows = len(images) // cols + 1
 42 |     plt.figure(figsize=(14, 14 * rows // cols))
 43 |     i = 1
 44 |     for image, title in zip(images, titles):
 45 |         plt.subplot(rows, cols, i)
 46 |         plt.title(title, fontsize=9)
 47 |         plt.axis('off')
 48 |         plt.imshow(image.astype(np.uint8), cmap=cmap,
 49 |                    norm=norm, interpolation=interpolation)
 50 |         i += 1
 51 |     plt.show()
 52 | 
 53 | 
 54 | def random_colors(N, bright=True):
 55 |     """
 56 |     Generate random colors.
 57 |     To get visually distinct colors, generate them in HSV space then
 58 |     convert to RGB.
 59 |     """
 60 |     brightness = 1.0 if bright else 0.7
 61 |     hsv = [(i / N, 1, brightness) for i in range(N)]
 62 |     colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
 63 |     random.shuffle(colors)
 64 |     return colors
 65 | 
 66 | 
 67 | def apply_mask(image, mask, color, alpha=0.5):
 68 |     """Apply the given mask to the image.
 69 |     """
 70 |     for c in range(3):
 71 |         image[:, :, c] = np.where(mask == 1,
 72 |                                   image[:, :, c] *
 73 |                                   (1 - alpha) + alpha * color[c] * 255,
 74 |                                   image[:, :, c])
 75 |     return image
 76 | 
 77 | 
 78 | def display_instances(image, boxes, masks, class_ids, class_names,
 79 |                       scores=None, title="",
 80 |                       figsize=(16, 16), ax=None):
 81 |     """
 82 |     boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
 83 |     masks: [height, width, num_instances]
 84 |     class_ids: [num_instances]
 85 |     class_names: list of class names of the dataset
 86 |     scores: (optional) confidence scores for each box
 87 |     figsize: (optional) the size of the image.
 88 |     """
 89 |     # Number of instances
 90 |     N = boxes.shape[0]
 91 |     if not N:
 92 |         print("\n*** No instances to display *** \n")
 93 |     else:
 94 |         assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
 95 | 
 96 |     if not ax:
 97 |         _, ax = plt.subplots(1, figsize=figsize)
 98 | 
 99 |     # Generate random colors
100 |     colors = random_colors(N)
101 | 
102 |     # Show area outside image boundaries.
103 |     height, width = image.shape[:2]
104 |     ax.set_ylim(height + 10, -10)
105 |     ax.set_xlim(-10, width + 10)
106 |     ax.axis('off')
107 |     ax.set_title(title)
108 | 
109 |     masked_image = image.astype(np.uint32).copy()
110 |     for i in range(N):
111 |         color = colors[i]
112 | 
113 |         # Bounding box
114 |         if not np.any(boxes[i]):
115 |             # Skip this instance. Has no bbox. Likely lost in image cropping.
116 |             continue
117 |         y1, x1, y2, x2 = boxes[i]
118 |         p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
119 |                               alpha=0.7, linestyle="dashed",
120 |                               edgecolor=color, facecolor='none')
121 |         ax.add_patch(p)
122 | 
123 |         # Label
124 |         class_id = class_ids[i]
125 |         score = scores[i] if scores is not None else None
126 |         label = class_names[class_id]
127 |         x = random.randint(x1, (x1 + x2) // 2)
128 |         caption = "{} {:.3f}".format(label, score) if score else label
129 |         ax.text(x1, y1 + 8, caption,
130 |                 color='w', size=11, backgroundcolor="none")
131 | 
132 |         # Mask
133 |         mask = masks[:, :, i]
134 |         masked_image = apply_mask(masked_image, mask, color)
135 | 
136 |         # Mask Polygon
137 |         # Pad to ensure proper polygons for masks that touch image edges.
138 |         padded_mask = np.zeros(
139 |             (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
140 |         padded_mask[1:-1, 1:-1] = mask
141 |         contours = find_contours(padded_mask, 0.5)
142 |         for verts in contours:
143 |             # Subtract the padding and flip (y, x) to (x, y)
144 |             verts = np.fliplr(verts) - 1
145 |             p = Polygon(verts, facecolor="none", edgecolor=color)
146 |             ax.add_patch(p)
147 |     ax.imshow(masked_image.astype(np.uint8))
148 |     plt.show()
149 |     
150 | 
151 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10):
152 |     """
153 |     anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates.
154 |     proposals: [n, 4] the same anchors but refined to fit objects better.
155 |     """
156 |     masked_image = image.copy()
157 | 
158 |     # Pick random anchors in case there are too many.
159 |     ids = np.arange(rois.shape[0], dtype=np.int32)
160 |     ids = np.random.choice(
161 |         ids, limit, replace=False) if ids.shape[0] > limit else ids
162 | 
163 |     fig, ax = plt.subplots(1, figsize=(12, 12))
164 |     if rois.shape[0] > limit:
165 |         plt.title("Showing {} random ROIs out of {}".format(
166 |             len(ids), rois.shape[0]))
167 |     else:
168 |         plt.title("{} ROIs".format(len(ids)))
169 | 
170 |     # Show area outside image boundaries.
171 |     ax.set_ylim(image.shape[0] + 20, -20)
172 |     ax.set_xlim(-50, image.shape[1] + 20)
173 |     ax.axis('off')
174 | 
175 |     for i, id in enumerate(ids):
176 |         color = np.random.rand(3)
177 |         class_id = class_ids[id]
178 |         # ROI
179 |         y1, x1, y2, x2 = rois[id]
180 |         p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
181 |                               edgecolor=color if class_id else "gray",
182 |                               facecolor='none', linestyle="dashed")
183 |         ax.add_patch(p)
184 |         # Refined ROI
185 |         if class_id:
186 |             ry1, rx1, ry2, rx2 = refined_rois[id]
187 |             p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
188 |                                   edgecolor=color, facecolor='none')
189 |             ax.add_patch(p)
190 |             # Connect the top-left corners of the anchor and proposal for easy visualization
191 |             ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
192 | 
193 |             # Label
194 |             label = class_names[class_id]
195 |             ax.text(rx1, ry1 + 8, "{}".format(label),
196 |                     color='w', size=11, backgroundcolor="none")
197 | 
198 |             # Mask
199 |             m = utils.unmold_mask(mask[id], rois[id]
200 |                                   [:4].astype(np.int32), image.shape)
201 |             masked_image = apply_mask(masked_image, m, color)
202 | 
203 |     ax.imshow(masked_image)
204 | 
205 |     # Print stats
206 |     print("Positive ROIs: ", class_ids[class_ids > 0].shape[0])
207 |     print("Negative ROIs: ", class_ids[class_ids == 0].shape[0])
208 |     print("Positive Ratio: {:.2f}".format(
209 |         class_ids[class_ids > 0].shape[0] / class_ids.shape[0]))
210 | 
211 | 
212 | # TODO: Replace with matplotlib equivalent?
213 | def draw_box(image, box, color):
214 |     """Draw 3-pixel width bounding boxes on the given image array.
215 |     color: list of 3 int values for RGB.
216 |     """
217 |     y1, x1, y2, x2 = box
218 |     image[y1:y1 + 2, x1:x2] = color
219 |     image[y2:y2 + 2, x1:x2] = color
220 |     image[y1:y2, x1:x1 + 2] = color
221 |     image[y1:y2, x2:x2 + 2] = color
222 |     return image
223 | 
224 | 
225 | def display_top_masks(image, mask, class_ids, class_names, limit=4):
226 |     """Display the given image and the top few class masks."""
227 |     to_display = []
228 |     titles = []
229 |     to_display.append(image)
230 |     titles.append("H x W={}x{}".format(image.shape[0], image.shape[1]))
231 |     # Pick top prominent classes in this image
232 |     unique_class_ids = np.unique(class_ids)
233 |     mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]])
234 |                  for i in unique_class_ids]
235 |     top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area),
236 |                                     key=lambda r: r[1], reverse=True) if v[1] > 0]
237 |     # Generate images and titles
238 |     for i in range(limit):
239 |         class_id = top_ids[i] if i < len(top_ids) else -1
240 |         # Pull masks of instances belonging to the same class.
241 |         m = mask[:, :, np.where(class_ids == class_id)[0]]
242 |         m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1)
243 |         to_display.append(m)
244 |         titles.append(class_names[class_id] if class_id != -1 else "-")
245 |     display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r")
246 | 
247 | 
248 | def plot_precision_recall(AP, precisions, recalls):
249 |     """Draw the precision-recall curve.
250 | 
251 |     AP: Average precision at IoU >= 0.5
252 |     precisions: list of precision values
253 |     recalls: list of recall values
254 |     """
255 |     # Plot the Precision-Recall curve
256 |     _, ax = plt.subplots(1)
257 |     ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP))
258 |     ax.set_ylim(0, 1.1)
259 |     ax.set_xlim(0, 1.1)
260 |     _ = ax.plot(recalls, precisions)
261 | 
262 | 
263 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores,
264 |                   overlaps, class_names, threshold=0.5):
265 |     """Draw a grid showing how ground truth objects are classified.
266 |     gt_class_ids: [N] int. Ground truth class IDs
267 |     pred_class_id: [N] int. Predicted class IDs
268 |     pred_scores: [N] float. The probability scores of predicted classes
269 |     overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictins and GT boxes.
270 |     class_names: list of all class names in the dataset
271 |     threshold: Float. The prediction probability required to predict a class
272 |     """
273 |     gt_class_ids = gt_class_ids[gt_class_ids != 0]
274 |     pred_class_ids = pred_class_ids[pred_class_ids != 0]
275 | 
276 |     plt.figure(figsize=(12, 10))
277 |     plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues)
278 |     plt.yticks(np.arange(len(pred_class_ids)),
279 |                ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i])
280 |                 for i, id in enumerate(pred_class_ids)])
281 |     plt.xticks(np.arange(len(gt_class_ids)),
282 |                [class_names[int(id)] for id in gt_class_ids], rotation=90)
283 | 
284 |     thresh = overlaps.max() / 2.
285 |     for i, j in itertools.product(range(overlaps.shape[0]),
286 |                                   range(overlaps.shape[1])):
287 |         text = ""
288 |         if overlaps[i, j] > threshold:
289 |             text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong"
290 |         color = ("white" if overlaps[i, j] > thresh
291 |                  else "black" if overlaps[i, j] > 0
292 |                  else "grey")
293 |         plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text),
294 |                  horizontalalignment="center", verticalalignment="center",
295 |                  fontsize=9, color=color)
296 | 
297 |     plt.tight_layout()
298 |     plt.xlabel("Ground Truth")
299 |     plt.ylabel("Predictions")
300 | 
301 | 
302 | def draw_boxes(image, boxes=None, refined_boxes=None,
303 |                masks=None, captions=None, visibilities=None,
304 |                title="", ax=None):
305 |     """Draw bounding boxes and segmentation masks with differnt
306 |     customizations.
307 | 
308 |     boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates.
309 |     refined_boxes: Like boxes, but draw with solid lines to show
310 |         that they're the result of refining 'boxes'.
311 |     masks: [N, height, width]
312 |     captions: List of N titles to display on each box
313 |     visibilities: (optional) List of values of 0, 1, or 2. Determine how
314 |         prominant each bounding box should be.
315 |     title: An optional title to show over the image
316 |     ax: (optional) Matplotlib axis to draw on.
317 |     """
318 |     # Number of boxes
319 |     assert boxes is not None or refined_boxes is not None
320 |     N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0]
321 | 
322 |     # Matplotlib Axis
323 |     if not ax:
324 |         _, ax = plt.subplots(1, figsize=(12, 12))
325 | 
326 |     # Generate random colors
327 |     colors = random_colors(N)
328 | 
329 |     # Show area outside image boundaries.
330 |     margin = image.shape[0] // 10
331 |     ax.set_ylim(image.shape[0] + margin, -margin)
332 |     ax.set_xlim(-margin, image.shape[1] + margin)
333 |     ax.axis('off')
334 | 
335 |     ax.set_title(title)
336 | 
337 |     masked_image = image.astype(np.uint32).copy()
338 |     for i in range(N):
339 |         # Box visibility
340 |         visibility = visibilities[i] if visibilities is not None else 1
341 |         if visibility == 0:
342 |             color = "gray"
343 |             style = "dotted"
344 |             alpha = 0.5
345 |         elif visibility == 1:
346 |             color = colors[i]
347 |             style = "dotted"
348 |             alpha = 1
349 |         elif visibility == 2:
350 |             color = colors[i]
351 |             style = "solid"
352 |             alpha = 1
353 | 
354 |         # Boxes
355 |         if boxes is not None:
356 |             if not np.any(boxes[i]):
357 |                 # Skip this instance. Has no bbox. Likely lost in cropping.
358 |                 continue
359 |             y1, x1, y2, x2 = boxes[i]
360 |             p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
361 |                                   alpha=alpha, linestyle=style,
362 |                                   edgecolor=color, facecolor='none')
363 |             ax.add_patch(p)
364 | 
365 |         # Refined boxes
366 |         if refined_boxes is not None and visibility > 0:
367 |             ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32)
368 |             p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
369 |                                   edgecolor=color, facecolor='none')
370 |             ax.add_patch(p)
371 |             # Connect the top-left corners of the anchor and proposal
372 |             if boxes is not None:
373 |                 ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
374 | 
375 |         # Captions
376 |         if captions is not None:
377 |             caption = captions[i]
378 |             # If there are refined boxes, display captions on them
379 |             if refined_boxes is not None:
380 |                 y1, x1, y2, x2 = ry1, rx1, ry2, rx2
381 |             x = random.randint(x1, (x1 + x2) // 2)
382 |             ax.text(x1, y1, caption, size=11, verticalalignment='top',
383 |                     color='w', backgroundcolor="none",
384 |                     bbox={'facecolor': color, 'alpha': 0.5,
385 |                           'pad': 2, 'edgecolor': 'none'})
386 | 
387 |         # Masks
388 |         if masks is not None:
389 |             mask = masks[:, :, i]
390 |             masked_image = apply_mask(masked_image, mask, color)
391 |             # Mask Polygon
392 |             # Pad to ensure proper polygons for masks that touch image edges.
393 |             padded_mask = np.zeros(
394 |                 (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
395 |             padded_mask[1:-1, 1:-1] = mask
396 |             contours = find_contours(padded_mask, 0.5)
397 |             for verts in contours:
398 |                 # Subtract the padding and flip (y, x) to (x, y)
399 |                 verts = np.fliplr(verts) - 1
400 |                 p = Polygon(verts, facecolor="none", edgecolor=color)
401 |                 ax.add_patch(p)
402 |     ax.imshow(masked_image.astype(np.uint8))
403 | 
404 | def plot_loss(loss, val_loss, save=True, log_dir=None):
405 |     loss = np.array(loss)
406 |     val_loss = np.array(val_loss)
407 | 
408 |     plt.figure("loss")
409 |     plt.gcf().clear()
410 |     plt.plot(loss[:, 0], label='train')
411 |     plt.plot(val_loss[:, 0], label='valid')
412 |     plt.xlabel('epoch')
413 |     plt.ylabel('loss')
414 |     plt.legend()
415 |     if save:
416 |         save_path = os.path.join(log_dir, "loss.png")
417 |         plt.savefig(save_path)
418 |     else:
419 |         plt.show(block=False)
420 |         plt.pause(0.1)
421 | 
422 |     plt.figure("rpn_class_loss")
423 |     plt.gcf().clear()
424 |     plt.plot(loss[:, 1], label='train')
425 |     plt.plot(val_loss[:, 1], label='valid')
426 |     plt.xlabel('epoch')
427 |     plt.ylabel('loss')
428 |     plt.legend()
429 |     if save:
430 |         save_path = os.path.join(log_dir, "rpn_class_loss.png")
431 |         plt.savefig(save_path)
432 |     else:
433 |         plt.show(block=False)
434 |         plt.pause(0.1)
435 | 
436 |     plt.figure("rpn_bbox_loss")
437 |     plt.gcf().clear()
438 |     plt.plot(loss[:, 2], label='train')
439 |     plt.plot(val_loss[:, 2], label='valid')
440 |     plt.xlabel('epoch')
441 |     plt.ylabel('loss')
442 |     plt.legend()
443 |     if save:
444 |         save_path = os.path.join(log_dir, "rpn_bbox_loss.png")
445 |         plt.savefig(save_path)
446 |     else:
447 |         plt.show(block=False)
448 |         plt.pause(0.1)
449 | 
450 |     plt.figure("mrcnn_class_loss")
451 |     plt.gcf().clear()
452 |     plt.plot(loss[:, 3], label='train')
453 |     plt.plot(val_loss[:, 3], label='valid')
454 |     plt.xlabel('epoch')
455 |     plt.ylabel('loss')
456 |     plt.legend()
457 |     if save:
458 |         save_path = os.path.join(log_dir, "mrcnn_class_loss.png")
459 |         plt.savefig(save_path)
460 |     else:
461 |         plt.show(block=False)
462 |         plt.pause(0.1)
463 | 
464 |     plt.figure("mrcnn_bbox_loss")
465 |     plt.gcf().clear()
466 |     plt.plot(loss[:, 4], label='train')
467 |     plt.plot(val_loss[:, 4], label='valid')
468 |     plt.xlabel('epoch')
469 |     plt.ylabel('loss')
470 |     plt.legend()
471 |     if save:
472 |         save_path = os.path.join(log_dir, "mrcnn_bbox_loss.png")
473 |         plt.savefig(save_path)
474 |     else:
475 |         plt.show(block=False)
476 |         plt.pause(0.1)
477 | 
478 |     plt.figure("mrcnn_mask_loss")
479 |     plt.gcf().clear()
480 |     plt.plot(loss[:, 5], label='train')
481 |     plt.plot(val_loss[:, 5], label='valid')
482 |     plt.xlabel('epoch')
483 |     plt.ylabel('loss')
484 |     plt.legend()
485 |     if save:
486 |         save_path = os.path.join(log_dir, "mrcnn_mask_loss.png")
487 |         plt.savefig(save_path)
488 |     else:
489 |         plt.show(block=False)
490 |         plt.pause(0.1)
491 | 
492 | 
493 | 


--------------------------------------------------------------------------------