├── .gitignore ├── Demo.ipynb ├── InstallPyTorchSourceCUDA.ipynb ├── LICENSE ├── README.md ├── Setup.ipynb ├── Train.ipynb ├── build_pytorch1.0_macosx_10.13.sh ├── coco.py ├── config.py ├── demo.py ├── fish_pytorch_style.py ├── images ├── find_fish.png ├── rec_fish.png └── vgg_annotated_fish.jpg ├── lib ├── nms │ ├── __init__.py │ ├── build.py │ ├── nms_wrapper.py │ ├── pth_nms.py │ └── src │ │ ├── cuda │ │ ├── nms_kernel.cu │ │ └── nms_kernel.h │ │ ├── nms.c │ │ ├── nms.h │ │ ├── nms_cuda.c │ │ └── nms_cuda.h └── roi_align │ ├── __init__.py │ ├── build.py │ ├── crop_and_resize.py │ ├── roi_align.py │ └── src │ ├── crop_and_resize.c │ ├── crop_and_resize.h │ ├── crop_and_resize_gpu.c │ ├── crop_and_resize_gpu.h │ └── cuda │ ├── crop_and_resize_kernel.cu │ └── crop_and_resize_kernel.h ├── model.py ├── requirements.txt ├── utils.py └── visualize.py /.gitignore: -------------------------------------------------------------------------------- 1 | tmp 2 | tmp_install 3 | bdist 4 | pytorch 5 | *.bkup 6 | scripts 7 | coco 8 | data 9 | pycocotools 10 | .DS_Store 11 | *.h5 12 | *.pth 13 | fish_pics/ 14 | logs/ 15 | assets/ 16 | 17 | _ext/ 18 | pycocotools/ 19 | cocoapi/ 20 | *.cu.o 21 | 22 | *.exe 23 | 24 | # Byte-compiled / optimized / DLL files 25 | __pycache__/ 26 | *.py[cod] 27 | *$py.class 28 | 29 | # C extensions 30 | *.so 31 | 32 | # Distribution / packaging 33 | .Python 34 | env/ 35 | build/ 36 | develop-eggs/ 37 | dist/ 38 | downloads/ 39 | eggs/ 40 | .eggs/ 41 | #lib/ 42 | lib64/ 43 | parts/ 44 | sdist/ 45 | var/ 46 | wheels/ 47 | *.egg-info/ 48 | .installed.cfg 49 | *.egg 50 | 51 | # PyInstaller 52 | # Usually these files are written by a python script from a template 53 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 54 | *.manifest 55 | *.spec 56 | 57 | # Installer logs 58 | pip-log.txt 59 | pip-delete-this-directory.txt 60 | 61 | # Unit test / coverage reports 62 | htmlcov/ 63 | .tox/ 64 | .coverage 65 | .coverage.* 66 | .cache 67 | nosetests.xml 68 | coverage.xml 69 | *.cover 70 | .hypothesis/ 71 | 72 | # Translations 73 | *.mo 74 | *.pot 75 | 76 | # Django stuff: 77 | *.log 78 | local_settings.py 79 | 80 | # Flask stuff: 81 | instance/ 82 | .webassets-cache 83 | 84 | # Scrapy stuff: 85 | .scrapy 86 | 87 | # Sphinx documentation 88 | docs/_build/ 89 | 90 | # PyBuilder 91 | target/ 92 | 93 | # Jupyter Notebook 94 | .ipynb_checkpoints 95 | 96 | # pyenv 97 | .python-version 98 | 99 | # celery beat schedule file 100 | celerybeat-schedule 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # dotenv 106 | .env 107 | 108 | # virtualenv 109 | .venv 110 | venv/ 111 | ENV/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | -------------------------------------------------------------------------------- /Demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Demo for PyTorch Mask RCNN\n", 8 | "Base Repo: https://github.com/multimodallearning/pytorch-mask-rcnn\n", 9 | "\n", 10 | "Sample Repo: https://github.com/michhar/pytorch-mask-rcnn-samples\n", 11 | "\n", 12 | "**Note: Run this notebook inside of the Sample Repo after cloning**" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "## Prerequisites\n", 20 | "\n", 21 | "1. PyTorch (tested with version below) - **note**: further testing required as PyTorch has updated to 1.0 which is in preview (as of this notebook update) and the code was originally using 0.4. It's likely the model class and training script will need to be updated as well. Troubleshooting is happening for the custom extension builds as well. (2018-09-28)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "## Check Setup" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import sys\n", 38 | "sys.prefix" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "import torch\n", 48 | "torch.__version__" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "### Download the Model\n", 56 | "\n", 57 | "Here: https://drive.google.com/open?id=1VV6WgX_RNl6a9Yi9-Pe7ZyVKHRJZSKkm\n", 58 | "\n", 59 | "And upload to the root directory of the repo on this machine (where this notebook lives)." 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "## Setup and Run Inference" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "# Am I running 32 or 64 bit Python\n", 76 | "import struct\n", 77 | "print(struct.calcsize(\"P\") * 8)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "%load_ext autoreload\n", 87 | "%autoreload\n", 88 | "import os\n", 89 | "import sys\n", 90 | "import random\n", 91 | "import math\n", 92 | "import numpy as np\n", 93 | "import skimage.io\n", 94 | "import matplotlib\n", 95 | "import matplotlib.pyplot as plt\n", 96 | "import glob\n", 97 | "from io import BytesIO\n", 98 | "import requests\n", 99 | "from PIL import Image\n", 100 | "%matplotlib inline\n", 101 | "\n", 102 | "import coco\n", 103 | "# from pycocotools import coco\n", 104 | "import utils\n", 105 | "import model as modellib\n", 106 | "import visualize\n", 107 | "\n", 108 | "import torch\n", 109 | "# import pycocotools\n", 110 | "\n", 111 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", 112 | "print(device)\n", 113 | "\n", 114 | "# Root directory of the project\n", 115 | "ROOT_DIR = os.getcwd()\n", 116 | "\n", 117 | "# Directory to save logs and trained model\n", 118 | "MODEL_DIR = os.path.join(ROOT_DIR, \"logs\")\n", 119 | "\n", 120 | "# Path to trained weights file\n", 121 | "# Download this file and place in the root of your\n", 122 | "# project (See README file for details)\n", 123 | "COCO_MODEL_PATH = os.path.join(ROOT_DIR, \"models\", \"mask_rcnn_coco.pth\")\n", 124 | "\n", 125 | "# Directory of images to run detection on\n", 126 | "IMAGE_DIR = os.path.join(ROOT_DIR, \"images\")\n", 127 | "\n", 128 | "class InferenceConfig(coco.CocoConfig):\n", 129 | " # Set batch size to 1 since we'll be running inference on\n", 130 | " # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU\n", 131 | " # GPU_COUNT = 0 for CPU\n", 132 | " GPU_COUNT = 0\n", 133 | " IMAGES_PER_GPU = 1\n", 134 | " COCO_MODEL_PATH = os.path.join(ROOT_DIR, \"mask_rcnn_coco.pth\")\n", 135 | "\n", 136 | "\n", 137 | "config = InferenceConfig()\n", 138 | "config.display()\n", 139 | "\n", 140 | "# Create model object.\n", 141 | "model = modellib.MaskRCNN(model_dir=MODEL_DIR, config=config)\n", 142 | "model = model.to(device=device)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "# Load weights trained on MS-COCO\n", 152 | "print(COCO_MODEL_PATH)\n", 153 | "model.load_state_dict(torch.load(COCO_MODEL_PATH))" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "# COCO Class names\n", 163 | "# Index of the class in the list is its ID. For example, to get ID of\n", 164 | "# the teddy bear class, use: class_names.index('teddy bear')\n", 165 | "class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',\n", 166 | " 'bus', 'train', 'truck', 'boat', 'traffic light',\n", 167 | " 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',\n", 168 | " 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',\n", 169 | " 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',\n", 170 | " 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',\n", 171 | " 'kite', 'baseball bat', 'baseball glove', 'skateboard',\n", 172 | " 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',\n", 173 | " 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',\n", 174 | " 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',\n", 175 | " 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',\n", 176 | " 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',\n", 177 | " 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',\n", 178 | " 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',\n", 179 | " 'teddy bear', 'hair drier', 'toothbrush']\n", 180 | "\n", 181 | "# Load a random image from the images folder\n", 182 | "# file_names = glob.glob(os.path.join('images', '*.jpg'))\n", 183 | "# image = skimage.io.imread(os.path.join(random.choice(file_names)))\n", 184 | "\n", 185 | "# Or load file from the internet\n", 186 | "req = requests.get('https://cdn.pixabay.com/photo/2015/06/20/13/55/man-815795__340.jpg')\n", 187 | "image = np.asarray(Image.open(BytesIO(req.content)))" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "scrolled": false 195 | }, 196 | "outputs": [], 197 | "source": [ 198 | "# Run detection\n", 199 | "results = model.detect([image])" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "# Visualize results\n", 209 | "r = results[0]\n", 210 | "visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],\n", 211 | " class_names, r['scores'])\n", 212 | "plt.show()" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "## Run an actual training experiment" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "# ! python coco.py train --dataset=mask_rcnn_coco.pth --model=imagenet" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "%%writefile demo.py\n", 238 | "import os\n", 239 | "import sys\n", 240 | "import random\n", 241 | "import math\n", 242 | "import numpy as np\n", 243 | "import skimage.io\n", 244 | "import matplotlib\n", 245 | "import matplotlib.pyplot as plt\n", 246 | "import glob\n", 247 | "from io import BytesIO\n", 248 | "import requests\n", 249 | "from PIL import Image\n", 250 | "\n", 251 | "# import coco\n", 252 | "from pycocotools import coco\n", 253 | "import utils\n", 254 | "import model as modellib\n", 255 | "import visualize\n", 256 | "\n", 257 | "import torch\n", 258 | "import pycocotools\n", 259 | "\n", 260 | "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", 261 | "print(device)\n", 262 | "\n", 263 | "# Root directory of the project\n", 264 | "ROOT_DIR = os.getcwd()\n", 265 | "\n", 266 | "# Directory to save logs and trained model\n", 267 | "MODEL_DIR = os.path.join(ROOT_DIR, \"logs\")\n", 268 | "\n", 269 | "# Path to trained weights file\n", 270 | "# Download this file and place in the root of your\n", 271 | "# project (See README file for details)\n", 272 | "COCO_MODEL_PATH = os.path.join(ROOT_DIR, \"models\", \"mask_rcnn_coco.pth\")\n", 273 | "\n", 274 | "# Directory of images to run detection on\n", 275 | "IMAGE_DIR = os.path.join(ROOT_DIR, \"images\")\n", 276 | "\n", 277 | "class InferenceConfig(coco.CocoConfig):\n", 278 | " # Set batch size to 1 since we'll be running inference on\n", 279 | " # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU\n", 280 | " # GPU_COUNT = 0 for CPU\n", 281 | " GPU_COUNT = 0\n", 282 | " IMAGES_PER_GPU = 1\n", 283 | " COCO_MODEL_PATH = os.path.join(ROOT_DIR, \"mask_rcnn_coco.pth\")\n", 284 | "\n", 285 | "\n", 286 | "config = InferenceConfig()\n", 287 | "config.display()\n", 288 | "\n", 289 | "# Create model object.\n", 290 | "model = modellib.MaskRCNN(model_dir=MODEL_DIR, config=config)\n", 291 | "model = model.to(device=device)\n", 292 | "\n", 293 | "# Load weights trained on MS-COCO\n", 294 | "print(COCO_MODEL_PATH)\n", 295 | "model.load_state_dict(torch.load(COCO_MODEL_PATH))\n", 296 | "\n", 297 | "# COCO Class names\n", 298 | "# Index of the class in the list is its ID. For example, to get ID of\n", 299 | "# the teddy bear class, use: class_names.index('teddy bear')\n", 300 | "class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',\n", 301 | " 'bus', 'train', 'truck', 'boat', 'traffic light',\n", 302 | " 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',\n", 303 | " 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',\n", 304 | " 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',\n", 305 | " 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',\n", 306 | " 'kite', 'baseball bat', 'baseball glove', 'skateboard',\n", 307 | " 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',\n", 308 | " 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',\n", 309 | " 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',\n", 310 | " 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',\n", 311 | " 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',\n", 312 | " 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',\n", 313 | " 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',\n", 314 | " 'teddy bear', 'hair drier', 'toothbrush']\n", 315 | "\n", 316 | "# Load a random image from the images folder\n", 317 | "# file_names = glob.glob(os.path.join('images', '*.jpg'))\n", 318 | "# image = skimage.io.imread(os.path.join(random.choice(file_names)))\n", 319 | "\n", 320 | "# Or load file from the internet\n", 321 | "req = requests.get('https://cdn.pixabay.com/photo/2015/06/20/13/55/man-815795__340.jpg')\n", 322 | "image = np.asarray(Image.open(BytesIO(req.content)))\n", 323 | "\n", 324 | "# Run detection\n", 325 | "results = model.detect([image])\n", 326 | "\n", 327 | "# Visualize results\n", 328 | "r = results[0]\n", 329 | "visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],\n", 330 | " class_names, r['scores'])\n", 331 | "plt.show()" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [] 340 | } 341 | ], 342 | "metadata": { 343 | "kernelspec": { 344 | "display_name": "Python 3.6 (sys)", 345 | "language": "python", 346 | "name": "py36sys" 347 | }, 348 | "language_info": { 349 | "codemirror_mode": { 350 | "name": "ipython", 351 | "version": 3 352 | }, 353 | "file_extension": ".py", 354 | "mimetype": "text/x-python", 355 | "name": "python", 356 | "nbconvert_exporter": "python", 357 | "pygments_lexer": "ipython3", 358 | "version": "3.6.6" 359 | }, 360 | "nav_menu": {}, 361 | "toc": { 362 | "navigate_menu": true, 363 | "number_sections": true, 364 | "sideBar": true, 365 | "threshold": 6, 366 | "toc_cell": false, 367 | "toc_section_display": "block", 368 | "toc_window_display": false 369 | }, 370 | "toc_position": { 371 | "height": "635.696px", 372 | "left": "0px", 373 | "right": "1488.18px", 374 | "top": "133.438px", 375 | "width": "25.4545px" 376 | } 377 | }, 378 | "nbformat": 4, 379 | "nbformat_minor": 2 380 | } 381 | -------------------------------------------------------------------------------- /InstallPyTorchSourceCUDA.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Building PyTorch from Source on Local Machine with CUDA Support\n", 8 | "Compiling from source to **be able to run PyTorch with GPU support and compile PyTorch extensions with CUDA support**\n", 9 | "\n", 10 | "Systems tested:\n", 11 | "* [TBD system] with CUDA 10.0 and cuDNN 7.3.0 ([CUDA setup](https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html) and [cuDNN setup](https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html)) and Python 3.6 (non-conda install)" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# What pytorch do we have right now?\n", 21 | "import sys\n", 22 | "print(sys.prefix)\n", 23 | "! {sys.prefix}/bin/pip freeze | grep torch" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Please have done the following if on macOS (tested with 10.13):\n", 31 | "1. Pip installed all packages from `requirements.txt` (`pip install -r requirements.txt`)\n", 32 | "2. Make sure using `clang` and `clang++` from Apple through XCode.\n", 33 | "3. Homebrew install of `libbind`: `brew install libbind`\n", 34 | "4. NVIDIA CUDA Toolkit setup ([NVIDIA CUDA Toolkit Docs](https://docs.nvidia.com/cuda/index.html))\n", 35 | " * NEED NVIDIA CUDA >= 9.2 (testing here with 10.0)\n" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "These instructions are based on \n", 43 | "* https://github.com/QuantScientist/Deep-Learning-Boot-Camp/blob/master/day02-PyTORCH-and-PyCUDA/PyTorch/build_torch.sh\n", 44 | "* http://www.cs.rochester.edu/u/kautz/Installing-Pytorch-Cuda-on-Macbook.html\n", 45 | "* https://github.com/pytorch/pytorch/blob/master/.jenkins/pytorch/macos-build.sh\n", 46 | "* https://github.com/apache/incubator-mxnet/issues/9217#issuecomment-354982838" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "scrolled": false 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "! git clone --recursive https://github.com/pytorch/pytorch.git" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "scrolled": false 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "! export PATH=/usr/local/cuda/bin:$PATH &&\\\n", 69 | " export LD_LIBRARY_PATH=/usr/local/cuda/lib:$LD_LIBRARY_PATH &&\\\n", 70 | " export CUDA_BIN_PATH=/usr/local/cuda/bin &&\\\n", 71 | " export CUDA_TOOLKIT_ROOT_DIR=/Developer/NVIDIA/CUDA-10.0 &&\\\n", 72 | " export CUDNN_LIB_DIR=/usr/local/cuda/lib &&\\\n", 73 | " export CUDA_HOST_COMPILER=clang &&\\\n", 74 | " export USE_CUDA=1 &&\\\n", 75 | " export USE_NNPACK=1 &&\\\n", 76 | " export CC=clang &&\\\n", 77 | " export CXX=clang++ &&\\\n", 78 | " cd pytorch &&\\\n", 79 | " pip3 uninstall torch --yes &&\\\n", 80 | " python3 setup.py clean &&\\\n", 81 | " TORCH_CUDA_ARCH_LIST=\"5.2\" python3 setup.py install" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "# # To redo above, remove the folder that was git cloned\n", 91 | "# ! rm -fr pytorch" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "import torch\n", 101 | "torch.__version__" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "It might be prudent to build a wheel for this build. To do so, on the command line or in a notebook cell, in the `pytorch` folder: [wip command]\n", 109 | "\n", 110 | "`python3 setup.py clean && PATH=/Developer/NVIDIA/CUDA-10.0/bin:$PATH LD_LIBRARY_PATH=//Developer/NVIDIA/CUDA-10.0/lib:$LD_LIBRARY_PATH CUDA_BIN_PATH=/Developer/NVIDIA/CUDA-10.0/bin CUDA_TOOLKIT_ROOT_DIR=/Developer/NVIDIA/CUDA-10.0 CUDNN_LIB_DIR=//Developer/NVIDIA/CUDA-10.0/lib USE_CUDA=1 CC=clang CXX=clang++ CUDA_HOME=/Developer/NVIDIA/CUDA-10.0/ USE_NNPACK=0 LDFLAGS=\"-Wl,-no_compact_unwind\" python3 setup.py sdist bdist_wheel`\n", 111 | "\n", 112 | "Then the .whl file will be in the `bdist` folder." 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "Building on an NVIDIA PX2 Drive/TX2 Jetson (CUDA 9.0, cuDNN 7.0, Python 3.5, Ubuntu 16.04):\n", 120 | "\n", 121 | "* Note, need a swap file on PX2 - instructions here https://support.rackspace.com/how-to/create-a-linux-swap-file/ (created 3GB swap file).\n", 122 | "* This will build a wheel under `dist`" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": { 129 | "scrolled": false 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "! export PATH=/usr/local/cuda/bin:$PATH &&\\\n", 134 | " export USE_OPENCV=1 &&\\\n", 135 | " export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH &&\\\n", 136 | " export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH &&\\\n", 137 | " export CUDA_BIN_PATH=/usr/local/cuda/bin &&\\\n", 138 | " export CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/ &&\\\n", 139 | " export CUDNN_LIB_DIR=/usr/local/cuda/lib64 &&\\\n", 140 | " export CUDA_HOST_COMPILER=cc &&\\\n", 141 | " export USE_CUDA=1 &&\\\n", 142 | " export USE_NNPACK=1 &&\\\n", 143 | " export CC=cc &&\\\n", 144 | " export CXX=c++ &&\\\n", 145 | " cd pytorch &&\\\n", 146 | " python3 setup.py clean &&\\\n", 147 | " TORCH_CUDA_ARCH_LIST=\"3.5 5.2 6.0 6.1+PTX\" TORCH_NVCC_FLAGS=\"-Xfatbin -compress-all\" python3 setup.py bdist_wheel" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "! pip install " 157 | ] 158 | } 159 | ], 160 | "metadata": { 161 | "kernelspec": { 162 | "display_name": "Python 3.6 (sys)", 163 | "language": "python", 164 | "name": "py36sys" 165 | }, 166 | "language_info": { 167 | "codemirror_mode": { 168 | "name": "ipython", 169 | "version": 3 170 | }, 171 | "file_extension": ".py", 172 | "mimetype": "text/x-python", 173 | "name": "python", 174 | "nbconvert_exporter": "python", 175 | "pygments_lexer": "ipython3", 176 | "version": "3.6.6" 177 | }, 178 | "nav_menu": {}, 179 | "toc": { 180 | "navigate_menu": true, 181 | "number_sections": true, 182 | "sideBar": true, 183 | "threshold": 6, 184 | "toc_cell": false, 185 | "toc_section_display": "block", 186 | "toc_window_display": false 187 | } 188 | }, 189 | "nbformat": 4, 190 | "nbformat_minor": 2 191 | } 192 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mask R-CNN 2 | 3 | The MIT License (MIT) 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Copyright (c) 2018 Micheleen Harris 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A Sample Inspired by a PyTorch Port of MaskRCNN for Instance Segmentation 2 | 3 | Inspired by: https://github.com/multimodallearning/pytorch-mask-rcnn 4 | 5 | 6 | 7 | **NOTES**: 8 | * This project is working with _PyTorch 0.4.1_. If you'd like to help update this, please feel free to fork and create a PR. 9 | * There are two C-extensions that require the NVIDIA compiler and CUDA support. 10 | 11 | ## Setup 12 | 13 | System Tested (Linux and NVIDIA GPU required with CUDA/cuDNN): 14 | * CUDA 9.0 15 | * NVIDIA Tesla K80 16 | * Ubuntu 16.04 17 | 18 | These sets of Jupyter notebooks may also be run in a Docker container running on CUDA-capable GPU hardware. 19 | 20 | ## Base Model 21 | 22 | * Download the COCO model (base for transfer learning) from [google drive](https://drive.google.com/drive/folders/1LXUgC2IZUYNEoXr05tdqyKFZY0pZyPDc). You could also choose to start with the ImageNet model. 23 | 24 | ## Setup and Demo 25 | 26 | Work through the notebooks: 27 | 28 | * Setup.ipynb - install the PyTorch extensions and grab a few other tools 29 | * Demo.ipynb - to test setup and perform inference with a base model 30 | 31 | ### Collect and Label Custom Data 32 | 33 | 1. Choose images with your object(s) of interest 34 | 2. Label with the VGG Image Annotator tool (http://www.robots.ox.ac.uk/~vgg/software/via/) 35 | 36 | ![VGG annotated fish pic](images/vgg_annotated_fish.jpg) 37 |

Adult Schoolmaster Snappers (Lutjanus apodus); Source: Florent Charpin, http://reefguide.org/pixhtml/schoolmaster2.html


38 | 39 | 40 | ### Train 41 | 42 | * Train.ipynb - train on custom-labeled data, supported by a custom PyTorch `DataSet` class (`fish_pytorch_style.py`) 43 | 44 | ## Wish to Build PyTorch for Your System? 45 | 46 | If you wish to build PyTorch latest or from a commit, follow one of the two notebooks: 47 | 48 | * InstallPyTorchSourceCUDA.ipynb - build from source with CUDA support 49 | 50 | 51 | ## Additional Information and Credits 52 | 53 | * See the original repo for more information beyond the two notebooks. https://github.com/multimodallearning/pytorch-mask-rcnn 54 | * See this original blog post for some of the concepts and processes behind the PyTorch and, subsequently, this repo (the TensorFlow version): https://engineering.matterport.com/splash-of-color-instance-segmentation-with-mask-r-cnn-and-tensorflow-7c761e238b46 55 | * Related project - https://github.com/svanbodegraven/Machine-Learning-Containers 56 | * Related project - https://github.com/Azadehkhojandi/computer-vision-fish-frame-proposal 57 | * Merging VGG annotations - https://github.com/Azadehkhojandi/VGG-Image-Annotator-Json-Merger 58 | 59 | > TIP: You can run this project inside a Docker image such as the `rheartpython/cvdeep` public image that has many Deep Learning frameworks preinstalled. (more info at https://github.com/michhar/custom-jupyterhub-linux-vm) 60 | -------------------------------------------------------------------------------- /Setup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Setup for PyTorch Mask-RCNN" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Installs\n", 15 | "\n", 16 | "System (Linux and NVIDIA GPU required with CUDA/cuDNN):\n", 17 | "* CUDA 9.0\n", 18 | "* NVIDIA Tesla K80\n", 19 | "* Ubuntu 16.04\n", 20 | "\n", 21 | "\n", 22 | "The Python used (tested with Anaconda 3.5, 3.6, and 3.7):\n", 23 | "* DSVM: `/anaconda/envs/py35/bin/python` or `/anaconda/envs/pytorch041/bin/python` if available\n", 24 | "* macOS: `/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/`\n" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "name": "stdout", 34 | "output_type": "stream", 35 | "text": [ 36 | "Sun May 12 03:17:16 2019 \r\n", 37 | "+-----------------------------------------------------------------------------+\r\n", 38 | "| NVIDIA-SMI 396.44 Driver Version: 396.44 |\r\n", 39 | "|-------------------------------+----------------------+----------------------+\r\n", 40 | "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\r\n", 41 | "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\r\n", 42 | "|===============================+======================+======================|\r\n", 43 | "| 0 Tesla K80 On | 00004BBC:00:00.0 Off | 0 |\r\n", 44 | "| N/A 34C P8 34W / 149W | 0MiB / 11441MiB | 0% Default |\r\n", 45 | "+-------------------------------+----------------------+----------------------+\r\n", 46 | " \r\n", 47 | "+-----------------------------------------------------------------------------+\r\n", 48 | "| Processes: GPU Memory |\r\n", 49 | "| GPU PID Type Process name Usage |\r\n", 50 | "|=============================================================================|\r\n", 51 | "| No running processes found |\r\n", 52 | "+-----------------------------------------------------------------------------+\r\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "! nvidia-smi" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "'/anaconda/envs/pytorch041/bin/python'" 69 | ] 70 | }, 71 | "execution_count": 4, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "# What Python\n", 78 | "import sys\n", 79 | "sys.executable" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "scrolled": false 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "# Install libs from requirements.txt library\n", 91 | "! sudo LC_ALL=C {sys.executable} -m pip install -r requirements.txt" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 5, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "'0.4.1.post2'" 103 | ] 104 | }, 105 | "execution_count": 5, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "# What PyTorch\n", 112 | "import torch\n", 113 | "torch.__version__" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## Building the extensions\n", 121 | "\n", 122 | "Could also do:\n", 123 | "\n", 124 | "```\n", 125 | "! export PATH=/usr/local/cuda/bin:$PATH &&\\\n", 126 | " export USE_OPENCV=1 &&\\\n", 127 | " export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH &&\\\n", 128 | " export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH &&\\\n", 129 | " export CUDA_BIN_PATH=/usr/local/cuda/bin &&\\\n", 130 | " export CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/ &&\\\n", 131 | " export CUDNN_LIB_DIR=/usr/local/cuda/lib64 &&\\\n", 132 | " export CUDA_HOST_COMPILER=cc &&\\\n", 133 | " export USE_CUDA=1 &&\\\n", 134 | " export USE_NNPACK=1 &&\\\n", 135 | " cd nms/src/cuda/ && nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC &&\\\n", 136 | " cd ../../ &&\\\n", 137 | " TORCH_CUDA_ARCH_LIST=\"3.5 5.2 6.0 6.1+PTX\" TORCH_NVCC_FLAGS=\"-Xfatbin -compress-all\" &&\\\n", 138 | " CC=cc CXX=cc++ python3 build.py\n", 139 | "```" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "**NOTE**: Update the path to Python executable below before running the following bash script" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 16, 152 | "metadata": { 153 | "scrolled": false 154 | }, 155 | "outputs": [ 156 | { 157 | "name": "stdout", 158 | "output_type": "stream", 159 | "text": [ 160 | "Compiling crop_and_resize kernels by nvcc...\n", 161 | "Including CUDA code.\n", 162 | "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align\n", 163 | "generating /tmp/tmp7w45x_xn/_crop_and_resize.c\n", 164 | "setting the current directory to '/tmp/tmp7w45x_xn'\n", 165 | "running build_ext\n", 166 | "building '_crop_and_resize' extension\n", 167 | "creating data\n", 168 | "creating data/home\n", 169 | "creating data/home/wonderwoman\n", 170 | "creating data/home/wonderwoman/notebooks\n", 171 | "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples\n", 172 | "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib\n", 173 | "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align\n", 174 | "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src\n", 175 | "gcc -pthread -B /data/anaconda/envs/pytorch041/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/TH -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/THC -I/usr/local/cuda/include -I/anaconda/envs/pytorch041/include/python3.7m -c _crop_and_resize.c -o ./_crop_and_resize.o -std=c99 -std=c99\n", 176 | "gcc -pthread -B /data/anaconda/envs/pytorch041/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/TH -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/THC -I/usr/local/cuda/include -I/anaconda/envs/pytorch041/include/python3.7m -c /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize.c -o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize.o -std=c99 -std=c99\n", 177 | "gcc -pthread -B /data/anaconda/envs/pytorch041/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/TH -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/THC -I/usr/local/cuda/include -I/anaconda/envs/pytorch041/include/python3.7m -c /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize_gpu.c -o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize_gpu.o -std=c99 -std=c99\n", 178 | "gcc -pthread -shared -B /data/anaconda/envs/pytorch041/compiler_compat -L/data/anaconda/envs/pytorch041/lib -Wl,-rpath=/data/anaconda/envs/pytorch041/lib -Wl,--no-as-needed -Wl,--sysroot=/ ./_crop_and_resize.o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize.o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize_gpu.o /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/cuda/crop_and_resize_kernel.cu.o -o ./_crop_and_resize.so\n", 179 | "Compiling nms kernels by nvcc...\n", 180 | "Including CUDA code.\n", 181 | "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms\n", 182 | "generating /tmp/tmp1t_4ld5y/_nms.c\n", 183 | "setting the current directory to '/tmp/tmp1t_4ld5y'\n", 184 | "running build_ext\n", 185 | "building '_nms' extension\n", 186 | "creating data\n", 187 | "creating data/home\n", 188 | "creating data/home/wonderwoman\n", 189 | "creating data/home/wonderwoman/notebooks\n", 190 | "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples\n", 191 | "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib\n", 192 | "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms\n", 193 | "creating data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src\n", 194 | "gcc -pthread -B /data/anaconda/envs/pytorch041/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/TH -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/THC -I/usr/local/cuda/include -I/anaconda/envs/pytorch041/include/python3.7m -c _nms.c -o ./_nms.o -std=c99\n", 195 | "gcc -pthread -B /data/anaconda/envs/pytorch041/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/TH -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/THC -I/usr/local/cuda/include -I/anaconda/envs/pytorch041/include/python3.7m -c /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms.c -o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms.o -std=c99\n", 196 | "gcc -pthread -B /data/anaconda/envs/pytorch041/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -DWITH_CUDA -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/TH -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/torch/utils/ffi/../../lib/include/THC -I/usr/local/cuda/include -I/anaconda/envs/pytorch041/include/python3.7m -c /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.c -o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.o -std=c99\n", 197 | "gcc -pthread -shared -B /data/anaconda/envs/pytorch041/compiler_compat -L/data/anaconda/envs/pytorch041/lib -Wl,-rpath=/data/anaconda/envs/pytorch041/lib -Wl,--no-as-needed -Wl,--sysroot=/ ./_nms.o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms.o ./data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.o /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/cuda/nms_kernel.cu.o -o ./_nms.so\n" 198 | ] 199 | }, 200 | { 201 | "name": "stderr", 202 | "output_type": "stream", 203 | "text": [ 204 | "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize.c: In function ‘CropAndResizePerBox’:\n", 205 | "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/roi_align/src/crop_and_resize.c:30:0: warning: ignoring #pragma omp parallel [-Wunknown-pragmas]\n", 206 | " #pragma omp parallel for\n", 207 | " ^\n", 208 | "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.c: In function ‘gpu_nms’:\n", 209 | "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.c:29:35: warning: initialization from incompatible pointer type [-Wincompatible-pointer-types]\n", 210 | " unsigned long long* mask_flat = THCudaLongTensor_data(state, mask);\n", 211 | " ^\n", 212 | "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.c:37:40: warning: initialization from incompatible pointer type [-Wincompatible-pointer-types]\n", 213 | " unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu);\n", 214 | " ^\n", 215 | "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.c:40:39: warning: initialization from incompatible pointer type [-Wincompatible-pointer-types]\n", 216 | " unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu);\n", 217 | " ^\n", 218 | "/data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/lib/nms/src/nms_cuda.c:23:7: warning: unused variable ‘boxes_dim’ [-Wunused-variable]\n", 219 | " int boxes_dim = THCudaTensor_size(state, boxes, 1);\n", 220 | " ^\n" 221 | ] 222 | } 223 | ], 224 | "source": [ 225 | "%%bash\n", 226 | "\n", 227 | "export PATH=/anaconda/envs/pytorch041/bin/:$PATH\n", 228 | "\n", 229 | "CUDA_ARCH=\"-gencode arch=compute_30,code=sm_30 \\\n", 230 | " -gencode arch=compute_35,code=sm_35 \\\n", 231 | " -gencode arch=compute_50,code=sm_50 \\\n", 232 | " -gencode arch=compute_52,code=sm_52 \\\n", 233 | " -gencode arch=compute_60,code=sm_60 \\\n", 234 | " -gencode arch=compute_61,code=sm_61 \\\n", 235 | "\t -gencode arch=compute_70,code=sm_70 \"\n", 236 | "\n", 237 | "\n", 238 | "# Build RoIAlign\n", 239 | "cd lib/roi_align/src/cuda\n", 240 | "echo 'Compiling crop_and_resize kernels by nvcc...'\n", 241 | "/usr/local/cuda/bin/nvcc -c -o crop_and_resize_kernel.cu.o crop_and_resize_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH\n", 242 | "cd ../../\n", 243 | "python build.py\n", 244 | "cd ../..\n", 245 | "\n", 246 | "# Build NMS\n", 247 | "cd lib/nms/src/cuda\n", 248 | "echo \"Compiling nms kernels by nvcc...\"\n", 249 | "/usr/local/cuda/bin/nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC $CUDA_ARCH\n", 250 | "cd ../../\n", 251 | "python build.py\n", 252 | "cd ../.." 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "## `pycocotools` and `coco` libraries" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "**NOTE**: Update the path to Python executable below before running the following bash script" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 20, 272 | "metadata": {}, 273 | "outputs": [ 274 | { 275 | "name": "stdout", 276 | "output_type": "stream", 277 | "text": [ 278 | "Compiling pycocotools/_mask.pyx because it changed.\n", 279 | "[1/1] Cythonizing pycocotools/_mask.pyx\n", 280 | "running build_ext\n", 281 | "building 'pycocotools._mask' extension\n", 282 | "creating build\n", 283 | "creating build/temp.linux-x86_64-3.7\n", 284 | "creating build/temp.linux-x86_64-3.7/pycocotools\n", 285 | "creating build/common\n", 286 | "cc -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/numpy/core/include -I../common -I/anaconda/envs/pytorch041/include/python3.7m -c pycocotools/_mask.c -o build/temp.linux-x86_64-3.7/pycocotools/_mask.o -Wno-cpp -Wno-unused-function -std=c99\n", 287 | "cc -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/anaconda/envs/pytorch041/lib/python3.7/site-packages/numpy/core/include -I../common -I/anaconda/envs/pytorch041/include/python3.7m -c ../common/maskApi.c -o build/temp.linux-x86_64-3.7/../common/maskApi.o -Wno-cpp -Wno-unused-function -std=c99\n", 288 | "gcc -pthread -shared -B /data/anaconda/envs/pytorch041/compiler_compat -L/data/anaconda/envs/pytorch041/lib -Wl,-rpath=/data/anaconda/envs/pytorch041/lib -Wl,--no-as-needed -Wl,--sysroot=/ build/temp.linux-x86_64-3.7/pycocotools/_mask.o build/temp.linux-x86_64-3.7/../common/maskApi.o -o /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/coco/PythonAPI/pycocotools/_mask.cpython-37m-x86_64-linux-gnu.so\n" 289 | ] 290 | }, 291 | { 292 | "name": "stderr", 293 | "output_type": "stream", 294 | "text": [ 295 | "Cloning into 'coco'...\n", 296 | "/anaconda/envs/pytorch041/lib/python3.7/site-packages/Cython/Compiler/Main.py:367: FutureWarning: Cython directive 'language_level' not set, using 2 for now (Py2). This will change in a later release! File: /data/home/wonderwoman/notebooks/pytorch-mask-rcnn-samples/coco/PythonAPI/pycocotools/_mask.pyx\n", 297 | " tree = Parsing.p_module(s, pxd, full_module_name)\n" 298 | ] 299 | } 300 | ], 301 | "source": [ 302 | "%%bash\n", 303 | "\n", 304 | "export PATH=/anaconda/envs/pytorch041/bin/:$PATH\n", 305 | "\n", 306 | "rm -fr coco\n", 307 | "rm -fr cocoapi\n", 308 | "rm -fr pycocotools\n", 309 | "\n", 310 | "export CC=cc\n", 311 | "export CXX=cc++\n", 312 | "\n", 313 | "git clone https://github.com/waleedka/coco.git\n", 314 | "# git clone https://github.com/cocodataset/cocoapi.git &&\\\n", 315 | "cd coco/PythonAPI/ && python setup.py build_ext --inplace\n", 316 | "cd ../..\n", 317 | "ln -s coco/PythonAPI/pycocotools/ pycocotools" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 21, 323 | "metadata": {}, 324 | "outputs": [ 325 | { 326 | "name": "stdout", 327 | "output_type": "stream", 328 | "text": [ 329 | "build_pytorch1.0_macosx_10.13.sh lib\r\n", 330 | "coco\t\t\t\t LICENSE\r\n", 331 | "coco.py\t\t\t\t model.py\r\n", 332 | "config.py\t\t\t __pycache__\r\n", 333 | "data\t\t\t\t pycocotools\r\n", 334 | "Demo.ipynb\t\t\t README.md\r\n", 335 | "demo.py\t\t\t\t requirements.txt\r\n", 336 | "fish_maskrcnn.zip\t\t Setup.ipynb\r\n", 337 | "fish_pytorch_style.py\t\t Train.ipynb\r\n", 338 | "images\t\t\t\t utils.py\r\n", 339 | "InstallPyTorchSourceCPU.ipynb\t visualize.py\r\n", 340 | "InstallPyTorchSourceCUDA.ipynb\r\n" 341 | ] 342 | } 343 | ], 344 | "source": [ 345 | "! ls" 346 | ] 347 | }, 348 | { 349 | "cell_type": "markdown", 350 | "metadata": {}, 351 | "source": [ 352 | "Note, here, for macOS, using Xcode 9.4.1 command line tools" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "metadata": {}, 359 | "outputs": [], 360 | "source": [ 361 | "# # MacOS, see https://pytorch.org/ for other distributions\n", 362 | "# ! pip install torch_nightly -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html" 363 | ] 364 | }, 365 | { 366 | "cell_type": "markdown", 367 | "metadata": {}, 368 | "source": [ 369 | "More relevant resolved GitHub issues:\n", 370 | "* https://github.com/uber/horovod/issues/274#issuecomment-390813445\n", 371 | "* https://github.com/pytorch/pytorch/issues/10234" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [] 380 | } 381 | ], 382 | "metadata": { 383 | "kernelspec": { 384 | "display_name": "Python 3.5 - PyTorch 0.4.1", 385 | "language": "python", 386 | "name": "pytorch_041" 387 | }, 388 | "language_info": { 389 | "codemirror_mode": { 390 | "name": "ipython", 391 | "version": 3 392 | }, 393 | "file_extension": ".py", 394 | "mimetype": "text/x-python", 395 | "name": "python", 396 | "nbconvert_exporter": "python", 397 | "pygments_lexer": "ipython3", 398 | "version": "3.7.2" 399 | }, 400 | "nav_menu": {}, 401 | "toc": { 402 | "navigate_menu": true, 403 | "number_sections": true, 404 | "sideBar": true, 405 | "threshold": 6, 406 | "toc_cell": false, 407 | "toc_section_display": "block", 408 | "toc_window_display": false 409 | } 410 | }, 411 | "nbformat": 4, 412 | "nbformat_minor": 2 413 | } 414 | -------------------------------------------------------------------------------- /build_pytorch1.0_macosx_10.13.sh: -------------------------------------------------------------------------------- 1 | git clone --recursive https://github.com/pytorch/pytorch.git 2 | pip3 uninstall torch --yes 3 | 4 | cd pytorch && \ 5 | git checkout -b 8619230 &&\ 6 | USE_OPENCV=1 \ 7 | CMAKE_PREFIX_PATH="/Library/Frameworks/Python.framework/Versions/3.6/bin/" \ 8 | LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH \ 9 | LD_LIBRARY_PATH=/usr/local/cuda/lib:$LD_LIBRARY_PATH \ 10 | CUDA_BIN_PATH=/usr/local/cuda/bin \ 11 | CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda/ \ 12 | CUDNN_LIB_DIR=/usr/local/cuda/lib \ 13 | CUDA_HOST_COMPILER=/usr/local/opt/llvm/bin/clang \ 14 | USE_CUDA=0 \ 15 | USE_NNPACK=0 \ 16 | CC=/usr/local/opt/llvm/bin/clang \ 17 | CXX=/usr/local/opt/llvm/bin/clang++ \ 18 | # BUILD_TORCH=ON \ 19 | TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1+PTX" \ 20 | TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \ 21 | python3 setup.py bdist_wheel 22 | 23 | -------------------------------------------------------------------------------- /coco.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Configurations and data loading code for MS COCO. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | ------------------------------------------------------------ 10 | 11 | Usage: import the module (see Jupyter notebooks for examples), or run from 12 | the command line as such: 13 | 14 | # Train a new model starting from pre-trained COCO weights 15 | python3 coco.py train --dataset=/path/to/coco/ --model=coco 16 | 17 | # Train a new model starting from ImageNet weights 18 | python3 coco.py train --dataset=/path/to/coco/ --model=imagenet 19 | 20 | # Continue training a model that you had trained earlier 21 | python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5 22 | 23 | # Continue training the last model you trained 24 | python3 coco.py train --dataset=/path/to/coco/ --model=last 25 | 26 | # Run COCO evaluatoin on the last model you trained 27 | python3 coco.py evaluate --dataset=/path/to/coco/ --model=last 28 | """ 29 | 30 | import os 31 | import time 32 | import numpy as np 33 | 34 | # Download and install the Python COCO tools from https://github.com/waleedka/coco 35 | # That's a fork from the original https://github.com/pdollar/coco with a bug 36 | # fix for Python 3. 37 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50 38 | # If the PR is merged then use the original repo. 39 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3". 40 | from pycocotools.coco import COCO 41 | from pycocotools.cocoeval import COCOeval 42 | from pycocotools import mask as maskUtils 43 | 44 | import zipfile 45 | import urllib.request 46 | import shutil 47 | 48 | from config import Config 49 | import utils 50 | import model as modellib 51 | 52 | import torch 53 | 54 | # Root directory of the project 55 | ROOT_DIR = os.getcwd() 56 | 57 | # Path to trained weights file 58 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.pth") 59 | 60 | # Directory to save logs and model checkpoints, if not provided 61 | # through the command line argument --logs 62 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 63 | DEFAULT_DATASET_YEAR = "2014" 64 | 65 | ############################################################ 66 | # Configurations 67 | ############################################################ 68 | 69 | class CocoConfig(Config): 70 | """Configuration for training on MS COCO. 71 | Derives from the base Config class and overrides values specific 72 | to the COCO dataset. 73 | """ 74 | # Give the configuration a recognizable name 75 | NAME = "coco" 76 | 77 | # We use one GPU with 8GB memory, which can fit one image. 78 | # Adjust down if you use a smaller GPU. 79 | IMAGES_PER_GPU = 16 80 | 81 | # Uncomment to train on 8 GPUs (default is 1) 82 | GPU_COUNT = 0 83 | 84 | # Number of classes (including background) 85 | NUM_CLASSES = 1 + 80 # COCO has 80 classes 86 | 87 | 88 | ############################################################ 89 | # Dataset 90 | ############################################################ 91 | 92 | class CocoDataset(utils.Dataset): 93 | def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None, 94 | class_map=None, return_coco=False, auto_download=False): 95 | """Load a subset of the COCO dataset. 96 | dataset_dir: The root directory of the COCO dataset. 97 | subset: What to load (train, val, minival, valminusminival) 98 | year: What dataset year to load (2014, 2017) as a string, not an integer 99 | class_ids: If provided, only loads images that have the given classes. 100 | class_map: TODO: Not implemented yet. Supports maping classes from 101 | different datasets to the same class ID. 102 | return_coco: If True, returns the COCO object. 103 | auto_download: Automatically download and unzip MS-COCO images and annotations 104 | """ 105 | 106 | if auto_download is True: 107 | self.auto_download(dataset_dir, subset, year) 108 | 109 | coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year)) 110 | if subset == "minival" or subset == "valminusminival": 111 | subset = "val" 112 | image_dir = "{}/{}{}".format(dataset_dir, subset, year) 113 | 114 | # Load all classes or a subset? 115 | if not class_ids: 116 | # All classes 117 | class_ids = sorted(coco.getCatIds()) 118 | 119 | # All images or a subset? 120 | if class_ids: 121 | image_ids = [] 122 | for id in class_ids: 123 | image_ids.extend(list(coco.getImgIds(catIds=[id]))) 124 | # Remove duplicates 125 | image_ids = list(set(image_ids)) 126 | else: 127 | # All images 128 | image_ids = list(coco.imgs.keys()) 129 | 130 | # Add classes 131 | for i in class_ids: 132 | self.add_class("coco", i, coco.loadCats(i)[0]["name"]) 133 | 134 | # Add images 135 | for i in image_ids: 136 | self.add_image( 137 | "coco", image_id=i, 138 | path=os.path.join(image_dir, coco.imgs[i]['file_name']), 139 | width=coco.imgs[i]["width"], 140 | height=coco.imgs[i]["height"], 141 | annotations=coco.loadAnns(coco.getAnnIds( 142 | imgIds=[i], catIds=class_ids, iscrowd=None))) 143 | if return_coco: 144 | return coco 145 | 146 | def auto_download(self, dataDir, dataType, dataYear): 147 | """Download the COCO dataset/annotations if requested. 148 | dataDir: The root directory of the COCO dataset. 149 | dataType: What to load (train, val, minival, valminusminival) 150 | dataYear: What dataset year to load (2014, 2017) as a string, not an integer 151 | Note: 152 | For 2014, use "train", "val", "minival", or "valminusminival" 153 | For 2017, only "train" and "val" annotations are available 154 | """ 155 | 156 | # Setup paths and file names 157 | if dataType == "minival" or dataType == "valminusminival": 158 | imgDir = "{}/{}{}".format(dataDir, "val", dataYear) 159 | imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear) 160 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear) 161 | else: 162 | imgDir = "{}/{}{}".format(dataDir, dataType, dataYear) 163 | imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear) 164 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear) 165 | # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL) 166 | 167 | # Create main folder if it doesn't exist yet 168 | if not os.path.exists(dataDir): 169 | os.makedirs(dataDir) 170 | 171 | # Download images if not available locally 172 | if not os.path.exists(imgDir): 173 | os.makedirs(imgDir) 174 | print("Downloading images to " + imgZipFile + " ...") 175 | with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out: 176 | shutil.copyfileobj(resp, out) 177 | print("... done downloading.") 178 | print("Unzipping " + imgZipFile) 179 | with zipfile.ZipFile(imgZipFile, "r") as zip_ref: 180 | zip_ref.extractall(dataDir) 181 | print("... done unzipping") 182 | print("Will use images in " + imgDir) 183 | 184 | # Setup annotations data paths 185 | annDir = "{}/annotations".format(dataDir) 186 | if dataType == "minival": 187 | annZipFile = "{}/instances_minival2014.json.zip".format(dataDir) 188 | annFile = "{}/instances_minival2014.json".format(annDir) 189 | annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0" 190 | unZipDir = annDir 191 | elif dataType == "valminusminival": 192 | annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir) 193 | annFile = "{}/instances_valminusminival2014.json".format(annDir) 194 | annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0" 195 | unZipDir = annDir 196 | else: 197 | annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear) 198 | annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear) 199 | annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear) 200 | unZipDir = dataDir 201 | # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL) 202 | 203 | # Download annotations if not available locally 204 | if not os.path.exists(annDir): 205 | os.makedirs(annDir) 206 | if not os.path.exists(annFile): 207 | if not os.path.exists(annZipFile): 208 | print("Downloading zipped annotations to " + annZipFile + " ...") 209 | with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out: 210 | shutil.copyfileobj(resp, out) 211 | print("... done downloading.") 212 | print("Unzipping " + annZipFile) 213 | with zipfile.ZipFile(annZipFile, "r") as zip_ref: 214 | zip_ref.extractall(unZipDir) 215 | print("... done unzipping") 216 | print("Will use annotations in " + annFile) 217 | 218 | def load_mask(self, image_id): 219 | """Load instance masks for the given image. 220 | 221 | Different datasets use different ways to store masks. This 222 | function converts the different mask format to one format 223 | in the form of a bitmap [height, width, instances]. 224 | 225 | Returns: 226 | masks: A bool array of shape [height, width, instance count] with 227 | one mask per instance. 228 | class_ids: a 1D array of class IDs of the instance masks. 229 | """ 230 | # If not a COCO image, delegate to parent class. 231 | image_info = self.image_info[image_id] 232 | if image_info["source"] != "coco": 233 | return super(CocoDataset, self).load_mask(image_id) 234 | 235 | instance_masks = [] 236 | class_ids = [] 237 | annotations = self.image_info[image_id]["annotations"] 238 | # Build mask of shape [height, width, instance_count] and list 239 | # of class IDs that correspond to each channel of the mask. 240 | for annotation in annotations: 241 | class_id = self.map_source_class_id( 242 | "coco.{}".format(annotation['category_id'])) 243 | if class_id: 244 | m = self.annToMask(annotation, image_info["height"], 245 | image_info["width"]) 246 | # Some objects are so small that they're less than 1 pixel area 247 | # and end up rounded out. Skip those objects. 248 | if m.max() < 1: 249 | continue 250 | # Is it a crowd? If so, use a negative class ID. 251 | if annotation['iscrowd']: 252 | # Use negative class ID for crowds 253 | class_id *= -1 254 | # For crowd masks, annToMask() sometimes returns a mask 255 | # smaller than the given dimensions. If so, resize it. 256 | if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]: 257 | m = np.ones([image_info["height"], image_info["width"]], dtype=bool) 258 | instance_masks.append(m) 259 | class_ids.append(class_id) 260 | 261 | # Pack instance masks into an array 262 | if class_ids: 263 | mask = np.stack(instance_masks, axis=2) 264 | class_ids = np.array(class_ids, dtype=np.int32) 265 | return mask, class_ids 266 | else: 267 | # Call super class to return an empty mask 268 | return super(CocoDataset, self).load_mask(image_id) 269 | 270 | def image_reference(self, image_id): 271 | """Return a link to the image in the COCO Website.""" 272 | info = self.image_info[image_id] 273 | if info["source"] == "coco": 274 | return "http://cocodataset.org/#explore?id={}".format(info["id"]) 275 | else: 276 | super(CocoDataset, self).image_reference(image_id) 277 | 278 | # The following two functions are from pycocotools with a few changes. 279 | 280 | def annToRLE(self, ann, height, width): 281 | """ 282 | Convert annotation which can be polygons, uncompressed RLE to RLE. 283 | :return: binary mask (numpy 2D array) 284 | """ 285 | segm = ann['segmentation'] 286 | if isinstance(segm, list): 287 | # polygon -- a single object might consist of multiple parts 288 | # we merge all parts into one mask rle code 289 | rles = maskUtils.frPyObjects(segm, height, width) 290 | rle = maskUtils.merge(rles) 291 | elif isinstance(segm['counts'], list): 292 | # uncompressed RLE 293 | rle = maskUtils.frPyObjects(segm, height, width) 294 | else: 295 | # rle 296 | rle = ann['segmentation'] 297 | return rle 298 | 299 | def annToMask(self, ann, height, width): 300 | """ 301 | Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. 302 | :return: binary mask (numpy 2D array) 303 | """ 304 | rle = self.annToRLE(ann, height, width) 305 | m = maskUtils.decode(rle) 306 | return m 307 | 308 | 309 | ############################################################ 310 | # COCO Evaluation 311 | ############################################################ 312 | 313 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks): 314 | """Arrange resutls to match COCO specs in http://cocodataset.org/#format 315 | """ 316 | # If no results, return an empty list 317 | if rois is None: 318 | return [] 319 | 320 | results = [] 321 | for image_id in image_ids: 322 | # Loop through detections 323 | for i in range(rois.shape[0]): 324 | class_id = class_ids[i] 325 | score = scores[i] 326 | bbox = np.around(rois[i], 1) 327 | mask = masks[:, :, i] 328 | 329 | result = { 330 | "image_id": image_id, 331 | "category_id": dataset.get_source_class_id(class_id, "coco"), 332 | "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]], 333 | "score": score, 334 | "segmentation": maskUtils.encode(np.asfortranarray(mask)) 335 | } 336 | results.append(result) 337 | return results 338 | 339 | 340 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None): 341 | """Runs official COCO evaluation. 342 | dataset: A Dataset object with valiadtion data 343 | eval_type: "bbox" or "segm" for bounding box or segmentation evaluation 344 | limit: if not 0, it's the number of images to use for evaluation 345 | """ 346 | # Pick COCO images from the dataset 347 | image_ids = image_ids or dataset.image_ids 348 | 349 | # Limit to a subset 350 | if limit: 351 | image_ids = image_ids[:limit] 352 | 353 | # Get corresponding COCO image IDs. 354 | coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids] 355 | 356 | t_prediction = 0 357 | t_start = time.time() 358 | 359 | results = [] 360 | for i, image_id in enumerate(image_ids): 361 | # Load image 362 | image = dataset.load_image(image_id) 363 | 364 | # Run detection 365 | t = time.time() 366 | r = model.detect([image])[0] 367 | t_prediction += (time.time() - t) 368 | 369 | # Convert results to COCO format 370 | image_results = build_coco_results(dataset, coco_image_ids[i:i + 1], 371 | r["rois"], r["class_ids"], 372 | r["scores"], r["masks"]) 373 | results.extend(image_results) 374 | 375 | # Load results. This modifies results with additional attributes. 376 | coco_results = coco.loadRes(results) 377 | 378 | # Evaluate 379 | cocoEval = COCOeval(coco, coco_results, eval_type) 380 | cocoEval.params.imgIds = coco_image_ids 381 | cocoEval.evaluate() 382 | cocoEval.accumulate() 383 | cocoEval.summarize() 384 | 385 | print("Prediction time: {}. Average {}/image".format( 386 | t_prediction, t_prediction / len(image_ids))) 387 | print("Total time: ", time.time() - t_start) 388 | 389 | 390 | ############################################################ 391 | # Training 392 | ############################################################ 393 | 394 | 395 | if __name__ == '__main__': 396 | import argparse 397 | 398 | # Parse command line arguments 399 | parser = argparse.ArgumentParser( 400 | description='Train Mask R-CNN on MS COCO.') 401 | parser.add_argument("command", 402 | metavar="", 403 | help="'train' or 'evaluate' on MS COCO") 404 | parser.add_argument('--dataset', required=True, 405 | metavar="/path/to/coco/", 406 | help='Directory of the MS-COCO dataset') 407 | parser.add_argument('--year', required=False, 408 | default=DEFAULT_DATASET_YEAR, 409 | metavar="", 410 | help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)') 411 | parser.add_argument('--model', required=False, 412 | metavar="/path/to/weights.pth", 413 | help="Path to weights .pth file or 'coco'") 414 | parser.add_argument('--logs', required=False, 415 | default=DEFAULT_LOGS_DIR, 416 | metavar="/path/to/logs/", 417 | help='Logs and checkpoints directory (default=logs/)') 418 | parser.add_argument('--limit', required=False, 419 | default=500, 420 | metavar="", 421 | help='Images to use for evaluation (default=500)') 422 | parser.add_argument('--download', required=False, 423 | default=False, 424 | metavar="", 425 | help='Automatically download and unzip MS-COCO files (default=False)', 426 | type=bool) 427 | args = parser.parse_args() 428 | print("Command: ", args.command) 429 | print("Model: ", args.model) 430 | print("Dataset: ", args.dataset) 431 | print("Year: ", args.year) 432 | print("Logs: ", args.logs) 433 | print("Auto Download: ", args.download) 434 | 435 | # Configurations 436 | if args.command == "train": 437 | config = CocoConfig() 438 | else: 439 | class InferenceConfig(CocoConfig): 440 | # Set batch size to 1 since we'll be running inference on 441 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 442 | GPU_COUNT = 1 443 | IMAGES_PER_GPU = 1 444 | DETECTION_MIN_CONFIDENCE = 0 445 | config = InferenceConfig() 446 | config.display() 447 | 448 | # Create model 449 | if args.command == "train": 450 | model = modellib.MaskRCNN(config=config, 451 | model_dir=args.logs) 452 | else: 453 | model = modellib.MaskRCNN(config=config, 454 | model_dir=args.logs) 455 | if config.GPU_COUNT: 456 | model = model.cuda() 457 | 458 | # Select weights file to load 459 | if args.model: 460 | if args.model.lower() == "coco": 461 | model_path = COCO_MODEL_PATH 462 | elif args.model.lower() == "last": 463 | # Find last trained weights 464 | model_path = model.find_last()[1] 465 | elif args.model.lower() == "imagenet": 466 | # Start from ImageNet trained weights 467 | model_path = config.IMAGENET_MODEL_PATH 468 | else: 469 | model_path = args.model 470 | else: 471 | model_path = "" 472 | 473 | # Load weights 474 | print("Loading weights ", model_path) 475 | model.load_weights(model_path) 476 | 477 | # Train or evaluate 478 | if args.command == "train": 479 | # Training dataset. Use the training set and 35K from the 480 | # validation set, as as in the Mask RCNN paper. 481 | dataset_train = CocoDataset() 482 | dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download) 483 | dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download) 484 | dataset_train.prepare() 485 | 486 | # Validation dataset 487 | dataset_val = CocoDataset() 488 | dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download) 489 | dataset_val.prepare() 490 | 491 | # *** This training schedule is an example. Update to your needs *** 492 | 493 | # Training - Stage 1 494 | print("Training network heads") 495 | model.train_model(dataset_train, dataset_val, 496 | learning_rate=config.LEARNING_RATE, 497 | epochs=40, 498 | layers='heads') 499 | 500 | # Training - Stage 2 501 | # Finetune layers from ResNet stage 4 and up 502 | print("Fine tune Resnet stage 4 and up") 503 | model.train_model(dataset_train, dataset_val, 504 | learning_rate=config.LEARNING_RATE, 505 | epochs=120, 506 | layers='4+') 507 | 508 | # Training - Stage 3 509 | # Fine tune all layers 510 | print("Fine tune all layers") 511 | model.train_model(dataset_train, dataset_val, 512 | learning_rate=config.LEARNING_RATE / 10, 513 | epochs=160, 514 | layers='all') 515 | 516 | elif args.command == "evaluate": 517 | # Validation dataset 518 | dataset_val = CocoDataset() 519 | coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download) 520 | dataset_val.prepare() 521 | print("Running COCO evaluation on {} images.".format(args.limit)) 522 | evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit)) 523 | evaluate_coco(model, dataset_val, coco, "segm", limit=int(args.limit)) 524 | else: 525 | print("'{}' is not recognized. " 526 | "Use 'train' or 'evaluate'".format(args.command)) 527 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Base Configurations class. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import math 11 | import numpy as np 12 | import os 13 | 14 | 15 | # Base Configuration Class 16 | # Don't use this class directly. Instead, sub-class it and override 17 | # the configurations you need to change. 18 | 19 | class Config(object): 20 | """Base configuration class. For custom configurations, create a 21 | sub-class that inherits from this one and override properties 22 | that need to be changed. 23 | """ 24 | # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. 25 | # Useful if your code needs to do things differently depending on which 26 | # experiment is running. 27 | NAME = None # Override in sub-classes 28 | 29 | # Path to pretrained imagenet model 30 | IMAGENET_MODEL_PATH = os.path.join(os.getcwd(), "resnet50_imagenet.pth") 31 | 32 | # NUMBER OF GPUs to use. For CPU use 0 33 | GPU_COUNT = 1 34 | 35 | # Number of images to train with on each GPU. A 12GB GPU can typically 36 | # handle 2 images of 1024x1024px. 37 | # Adjust based on your GPU memory and image sizes. Use the highest 38 | # number that your GPU can handle for best performance. 39 | IMAGES_PER_GPU = 2 40 | 41 | # Number of training steps per epoch 42 | # This doesn't need to match the size of the training set. Tensorboard 43 | # updates are saved at the end of each epoch, so setting this to a 44 | # smaller number means getting more frequent TensorBoard updates. 45 | # Validation stats are also calculated at each epoch end and they 46 | # might take a while, so don't set this too small to avoid spending 47 | # a lot of time on validation stats. 48 | STEPS_PER_EPOCH = 1000 49 | 50 | # Number of validation steps to run at the end of every training epoch. 51 | # A bigger number improves accuracy of validation stats, but slows 52 | # down the training. 53 | VALIDATION_STEPS = 50 54 | 55 | # The strides of each layer of the FPN Pyramid. These values 56 | # are based on a Resnet101 backbone. 57 | BACKBONE_STRIDES = [4, 8, 16, 32, 64] 58 | 59 | # Number of classification classes (including background) 60 | NUM_CLASSES = 1 # Override in sub-classes 61 | 62 | # Length of square anchor side in pixels 63 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) 64 | 65 | # Ratios of anchors at each cell (width/height) 66 | # A value of 1 represents a square anchor, and 0.5 is a wide anchor 67 | RPN_ANCHOR_RATIOS = [0.5, 1, 2] 68 | 69 | # Anchor stride 70 | # If 1 then anchors are created for each cell in the backbone feature map. 71 | # If 2, then anchors are created for every other cell, and so on. 72 | RPN_ANCHOR_STRIDE = 1 73 | 74 | # Non-max suppression threshold to filter RPN proposals. 75 | # You can reduce this during training to generate more propsals. 76 | RPN_NMS_THRESHOLD = 0.7 77 | 78 | # How many anchors per image to use for RPN training 79 | RPN_TRAIN_ANCHORS_PER_IMAGE = 256 80 | 81 | # ROIs kept after non-maximum supression (training and inference) 82 | POST_NMS_ROIS_TRAINING = 2000 83 | POST_NMS_ROIS_INFERENCE = 1000 84 | 85 | # If enabled, resizes instance masks to a smaller size to reduce 86 | # memory load. Recommended when using high-resolution images. 87 | USE_MINI_MASK = True 88 | MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask 89 | 90 | # Input image resing 91 | # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and 92 | # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't 93 | # be satisfied together the IMAGE_MAX_DIM is enforced. 94 | IMAGE_MIN_DIM = 800 95 | IMAGE_MAX_DIM = 1024 96 | # If True, pad images with zeros such that they're (max_dim by max_dim) 97 | IMAGE_PADDING = True # currently, the False option is not supported 98 | 99 | # Image mean (RGB) 100 | MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) 101 | 102 | # Number of ROIs per image to feed to classifier/mask heads 103 | # The Mask RCNN paper uses 512 but often the RPN doesn't generate 104 | # enough positive proposals to fill this and keep a positive:negative 105 | # ratio of 1:3. You can increase the number of proposals by adjusting 106 | # the RPN NMS threshold. 107 | TRAIN_ROIS_PER_IMAGE = 200 108 | 109 | # Percent of positive ROIs used to train classifier/mask heads 110 | ROI_POSITIVE_RATIO = 0.33 111 | 112 | # Pooled ROIs 113 | POOL_SIZE = 7 114 | MASK_POOL_SIZE = 14 115 | MASK_SHAPE = [28, 28] 116 | 117 | # Maximum number of ground truth instances to use in one image 118 | MAX_GT_INSTANCES = 100 119 | 120 | # Bounding box refinement standard deviation for RPN and final detections. 121 | RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 122 | BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 123 | 124 | # Max number of final detections 125 | DETECTION_MAX_INSTANCES = 100 126 | 127 | # Minimum probability value to accept a detected instance 128 | # ROIs below this threshold are skipped 129 | DETECTION_MIN_CONFIDENCE = 0.7 130 | 131 | # Non-maximum suppression threshold for detection 132 | DETECTION_NMS_THRESHOLD = 0.3 133 | 134 | # Learning rate and momentum 135 | # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes 136 | # weights to explode. Likely due to differences in optimzer 137 | # implementation. 138 | LEARNING_RATE = 0.001 139 | LEARNING_MOMENTUM = 0.9 140 | 141 | # Weight decay regularization 142 | WEIGHT_DECAY = 0.0001 143 | 144 | # Use RPN ROIs or externally generated ROIs for training 145 | # Keep this True for most situations. Set to False if you want to train 146 | # the head branches on ROI generated by code rather than the ROIs from 147 | # the RPN. For example, to debug the classifier head without having to 148 | # train the RPN. 149 | USE_RPN_ROIS = True 150 | 151 | # Number of workers to use in Data loading (the data 152 | # generators) within model design, e.g., as in: 153 | # torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=self.config.NUM_WORKERS) 154 | NUM_WORKERS = 4 155 | 156 | def __init__(self): 157 | """Set values of computed attributes.""" 158 | # Effective batch size 159 | if self.GPU_COUNT > 0: 160 | self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT 161 | else: 162 | self.BATCH_SIZE = self.IMAGES_PER_GPU 163 | 164 | # Adjust step size based on batch size 165 | self.STEPS_PER_EPOCH = self.BATCH_SIZE * self.STEPS_PER_EPOCH 166 | 167 | # Input image size 168 | self.IMAGE_SHAPE = np.array( 169 | [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3]) 170 | 171 | # Compute backbone size from input image size 172 | self.BACKBONE_SHAPES = np.array( 173 | [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)), 174 | int(math.ceil(self.IMAGE_SHAPE[1] / stride))] 175 | for stride in self.BACKBONE_STRIDES]) 176 | 177 | def display(self): 178 | """Display Configuration values.""" 179 | print("\nConfigurations:") 180 | for a in dir(self): 181 | if not a.startswith("__") and not callable(getattr(self, a)): 182 | print("{:30} {}".format(a, getattr(self, a))) 183 | print("\n") 184 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import random 4 | import math 5 | import numpy as np 6 | import skimage.io 7 | import matplotlib 8 | import matplotlib.pyplot as plt 9 | import glob 10 | from io import BytesIO 11 | import requests 12 | from PIL import Image 13 | 14 | # import coco 15 | from pycocotools import coco 16 | import utils 17 | import model as modellib 18 | import visualize 19 | 20 | import torch 21 | import pycocotools 22 | 23 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 24 | print(device) 25 | 26 | # Root directory of the project 27 | ROOT_DIR = os.getcwd() 28 | 29 | # Directory to save logs and trained model 30 | MODEL_DIR = os.path.join(ROOT_DIR, "logs") 31 | 32 | # Path to trained weights file 33 | # Download this file and place in the root of your 34 | # project (See README file for details) 35 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "models", "mask_rcnn_coco.pth") 36 | 37 | # Directory of images to run detection on 38 | IMAGE_DIR = os.path.join(ROOT_DIR, "images") 39 | 40 | class InferenceConfig(coco.CocoConfig): 41 | # Set batch size to 1 since we'll be running inference on 42 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 43 | # GPU_COUNT = 0 for CPU 44 | GPU_COUNT = 0 45 | IMAGES_PER_GPU = 1 46 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.pth") 47 | 48 | 49 | config = InferenceConfig() 50 | config.display() 51 | 52 | # Create model object. 53 | model = modellib.MaskRCNN(model_dir=MODEL_DIR, config=config) 54 | model = model.to(device=device) 55 | 56 | # Load weights trained on MS-COCO 57 | print(COCO_MODEL_PATH) 58 | model.load_state_dict(torch.load(COCO_MODEL_PATH)) 59 | 60 | # COCO Class names 61 | # Index of the class in the list is its ID. For example, to get ID of 62 | # the teddy bear class, use: class_names.index('teddy bear') 63 | class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 64 | 'bus', 'train', 'truck', 'boat', 'traffic light', 65 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 66 | 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 67 | 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 68 | 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 69 | 'kite', 'baseball bat', 'baseball glove', 'skateboard', 70 | 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 71 | 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 72 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 73 | 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 74 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 75 | 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 76 | 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 77 | 'teddy bear', 'hair drier', 'toothbrush'] 78 | 79 | # Load a random image from the images folder 80 | # file_names = glob.glob(os.path.join('images', '*.jpg')) 81 | # image = skimage.io.imread(os.path.join(random.choice(file_names))) 82 | 83 | # Or load file from the internet 84 | req = requests.get('https://cdn.pixabay.com/photo/2015/06/20/13/55/man-815795__340.jpg') 85 | image = np.asarray(Image.open(BytesIO(req.content))) -------------------------------------------------------------------------------- /fish_pytorch_style.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Configurations and data loading code for MS COCO. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | ------------------------------------------------------------ 10 | 11 | Usage: import the module (see Jupyter notebooks for examples), or run from 12 | the command line as such: 13 | 14 | # Train a new model starting from pre-trained COCO weights 15 | python3 coco.py train --dataset=/path/to/coco/ --model=coco 16 | 17 | # Train a new model starting from ImageNet weights 18 | python3 coco.py train --dataset=/path/to/coco/ --model=imagenet 19 | 20 | # Continue training a model that you had trained earlier 21 | python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5 22 | 23 | # Continue training the last model you trained 24 | python3 coco.py train --dataset=/path/to/coco/ --model=last 25 | 26 | # Run COCO evaluatoin on the last model you trained 27 | python3 coco.py evaluate --dataset=/path/to/coco/ --model=last 28 | """ 29 | 30 | import os 31 | import time 32 | import numpy as np 33 | 34 | # Download and install the Python COCO tools from https://github.com/waleedka/coco 35 | # That's a fork from the original https://github.com/pdollar/coco with a bug 36 | # fix for Python 3. 37 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50 38 | # If the PR is merged then use the original repo. 39 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3". 40 | from pycocotools.coco import COCO 41 | from pycocotools.cocoeval import COCOeval 42 | from pycocotools import mask as maskUtils 43 | 44 | import zipfile 45 | import urllib.request 46 | import shutil 47 | 48 | from config import Config 49 | import utils 50 | import model as modellib 51 | 52 | import torch 53 | 54 | # Root directory of the project 55 | ROOT_DIR = os.getcwd() 56 | 57 | # Path to trained weights file 58 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.pth") 59 | 60 | # Directory to save logs and model checkpoints, if not provided 61 | # through the command line argument --logs 62 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 63 | DEFAULT_DATASET_YEAR = "2014" 64 | 65 | ############################################################ 66 | # Configurations 67 | ############################################################ 68 | 69 | 70 | class FishConfig(Config): 71 | """Configuration for training on the toy dataset. 72 | Derives from the base Config class and overrides some values. 73 | """ 74 | # Give the configuration a recognizable name 75 | NAME = "fish" 76 | 77 | # We use a GPU with 12GB memory, which can fit two images. 78 | # Adjust down if you use a smaller GPU. 79 | IMAGES_PER_GPU = 2 80 | 81 | # Number of classes (including background) 82 | NUM_CLASSES = 1 + 1 # Background + baloon 83 | 84 | # Number of training steps per epoch 85 | STEPS_PER_EPOCH = 100 86 | 87 | # Skip detections with < 90% confidence 88 | DETECTION_MIN_CONFIDENCE = 0.9 89 | 90 | 91 | ############################################################ 92 | # Dataset 93 | ############################################################ 94 | 95 | class FishDataset(utils.Dataset): 96 | 97 | def load_fish(self, dataset_dir, subset): 98 | """Load a subset of the fish dataset. 99 | dataset_dir: Root directory of the dataset. 100 | subset: Subset to load: train or val 101 | """ 102 | # Add classes. We have only one class to add. 103 | self.add_class("fish", 1, "fish") 104 | 105 | # Train or validation dataset? 106 | assert subset in ["train", "val"] 107 | dataset_dir = os.path.join(dataset_dir, subset) 108 | 109 | # Load annotations 110 | # VGG Image Annotator saves each image in the form: 111 | # { 'filename': '28503151_5b5b7ec140_b.jpg', 112 | # 'regions': { 113 | # '0': { 114 | # 'region_attributes': {}, 115 | # 'shape_attributes': { 116 | # 'all_points_x': [...], 117 | # 'all_points_y': [...], 118 | # 'name': 'polygon'}}, 119 | # ... more regions ... 120 | # }, 121 | # 'size': 100202 122 | # } 123 | # We mostly care about the x and y coordinates of each region 124 | annotations = json.load(open(os.path.join(dataset_dir, "via_region_data.json"))) 125 | annotations = list(annotations.values()) # don't need the dict keys 126 | 127 | # The VIA tool saves images in the JSON even if they don't have any 128 | # annotations. Skip unannotated images. 129 | annotations = [a for a in annotations if a['regions']] 130 | 131 | # Add images 132 | for a in annotations: 133 | # Get the x, y coordinaets of points of the polygons that make up 134 | # the outline of each object instance. There are stores in the 135 | # shape_attributes (see json format above) 136 | polygons = [r['shape_attributes'] for r in a['regions'].values()] 137 | 138 | # load_mask() needs the image size to convert polygons to masks. 139 | # Unfortunately, VIA doesn't include it in JSON, so we must read 140 | # the image. This is only managable since the dataset is tiny. 141 | image_path = os.path.join(dataset_dir, a['filename']) 142 | image = skimage.io.imread(image_path) 143 | height, width = image.shape[:2] 144 | 145 | self.add_image( 146 | "fish", 147 | image_id=a['filename'], # use file name as a unique image id 148 | path=image_path, 149 | width=width, height=height, 150 | polygons=polygons) 151 | 152 | def load_mask(self, image_id): 153 | """Generate instance masks for an image. 154 | Returns: 155 | masks: A bool array of shape [height, width, instance count] with 156 | one mask per instance. 157 | class_ids: a 1D array of class IDs of the instance masks. 158 | """ 159 | # If not a fish dataset image, delegate to parent class. 160 | image_info = self.image_info[image_id] 161 | if image_info["source"] != "fish": 162 | print ("Not a Baloon") 163 | return super(self.__class__, self).load_mask(image_id) 164 | 165 | # Convert polygons to a bitmap mask of shape 166 | # [height, width, instance_count] 167 | info = self.image_info[image_id] 168 | mask = np.zeros([info["height"], info["width"], len(info["polygons"])], 169 | dtype=np.uint8) 170 | for i, p in enumerate(info["polygons"]): 171 | # Get indexes of pixels inside the polygon and set them to 1 172 | rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x']) 173 | mask[rr, cc, i] = 1 174 | 175 | # Return mask, and array of class IDs of each instance. Since we have 176 | # one class ID only, we return an array of 1s 177 | return mask, np.ones([mask.shape[-1]], dtype=np.int32) 178 | 179 | def image_reference(self, image_id): 180 | """Return the path of the image.""" 181 | info = self.image_info[image_id] 182 | if info["source"] == "fish": 183 | return info["path"] 184 | else: 185 | super(self.__class__, self).image_reference(image_id) 186 | 187 | 188 | 189 | 190 | ############################################################ 191 | # Configurations 192 | ############################################################ 193 | 194 | class CocoConfig(Config): 195 | """Configuration for training on MS COCO. 196 | Derives from the base Config class and overrides values specific 197 | to the COCO dataset. 198 | """ 199 | # Give the configuration a recognizable name 200 | NAME = "coco" 201 | 202 | # We use one GPU with 8GB memory, which can fit one image. 203 | # Adjust down if you use a smaller GPU. 204 | IMAGES_PER_GPU = 16 205 | 206 | # Uncomment to train on 8 GPUs (default is 1) 207 | # GPU_COUNT = 8 208 | 209 | # Number of classes (including background) 210 | NUM_CLASSES = 1 + 80 # COCO has 80 classes 211 | 212 | 213 | ############################################################ 214 | # Dataset 215 | ############################################################ 216 | 217 | class CocoDataset(utils.Dataset): 218 | def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None, 219 | class_map=None, return_coco=False, auto_download=False): 220 | """Load a subset of the COCO dataset. 221 | dataset_dir: The root directory of the COCO dataset. 222 | subset: What to load (train, val, minival, valminusminival) 223 | year: What dataset year to load (2014, 2017) as a string, not an integer 224 | class_ids: If provided, only loads images that have the given classes. 225 | class_map: TODO: Not implemented yet. Supports maping classes from 226 | different datasets to the same class ID. 227 | return_coco: If True, returns the COCO object. 228 | auto_download: Automatically download and unzip MS-COCO images and annotations 229 | """ 230 | 231 | if auto_download is True: 232 | self.auto_download(dataset_dir, subset, year) 233 | 234 | coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year)) 235 | if subset == "minival" or subset == "valminusminival": 236 | subset = "val" 237 | image_dir = "{}/{}{}".format(dataset_dir, subset, year) 238 | 239 | # Load all classes or a subset? 240 | if not class_ids: 241 | # All classes 242 | class_ids = sorted(coco.getCatIds()) 243 | 244 | # All images or a subset? 245 | if class_ids: 246 | image_ids = [] 247 | for id in class_ids: 248 | image_ids.extend(list(coco.getImgIds(catIds=[id]))) 249 | # Remove duplicates 250 | image_ids = list(set(image_ids)) 251 | else: 252 | # All images 253 | image_ids = list(coco.imgs.keys()) 254 | 255 | # Add classes 256 | for i in class_ids: 257 | self.add_class("coco", i, coco.loadCats(i)[0]["name"]) 258 | 259 | # Add images 260 | for i in image_ids: 261 | self.add_image( 262 | "coco", image_id=i, 263 | path=os.path.join(image_dir, coco.imgs[i]['file_name']), 264 | width=coco.imgs[i]["width"], 265 | height=coco.imgs[i]["height"], 266 | annotations=coco.loadAnns(coco.getAnnIds( 267 | imgIds=[i], catIds=class_ids, iscrowd=None))) 268 | if return_coco: 269 | return coco 270 | 271 | def auto_download(self, dataDir, dataType, dataYear): 272 | """Download the COCO dataset/annotations if requested. 273 | dataDir: The root directory of the COCO dataset. 274 | dataType: What to load (train, val, minival, valminusminival) 275 | dataYear: What dataset year to load (2014, 2017) as a string, not an integer 276 | Note: 277 | For 2014, use "train", "val", "minival", or "valminusminival" 278 | For 2017, only "train" and "val" annotations are available 279 | """ 280 | 281 | # Setup paths and file names 282 | if dataType == "minival" or dataType == "valminusminival": 283 | imgDir = "{}/{}{}".format(dataDir, "val", dataYear) 284 | imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear) 285 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear) 286 | else: 287 | imgDir = "{}/{}{}".format(dataDir, dataType, dataYear) 288 | imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear) 289 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear) 290 | # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL) 291 | 292 | # Create main folder if it doesn't exist yet 293 | if not os.path.exists(dataDir): 294 | os.makedirs(dataDir) 295 | 296 | # Download images if not available locally 297 | if not os.path.exists(imgDir): 298 | os.makedirs(imgDir) 299 | print("Downloading images to " + imgZipFile + " ...") 300 | with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out: 301 | shutil.copyfileobj(resp, out) 302 | print("... done downloading.") 303 | print("Unzipping " + imgZipFile) 304 | with zipfile.ZipFile(imgZipFile, "r") as zip_ref: 305 | zip_ref.extractall(dataDir) 306 | print("... done unzipping") 307 | print("Will use images in " + imgDir) 308 | 309 | # Setup annotations data paths 310 | annDir = "{}/annotations".format(dataDir) 311 | if dataType == "minival": 312 | annZipFile = "{}/instances_minival2014.json.zip".format(dataDir) 313 | annFile = "{}/instances_minival2014.json".format(annDir) 314 | annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0" 315 | unZipDir = annDir 316 | elif dataType == "valminusminival": 317 | annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir) 318 | annFile = "{}/instances_valminusminival2014.json".format(annDir) 319 | annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0" 320 | unZipDir = annDir 321 | else: 322 | annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear) 323 | annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear) 324 | annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear) 325 | unZipDir = dataDir 326 | # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL) 327 | 328 | # Download annotations if not available locally 329 | if not os.path.exists(annDir): 330 | os.makedirs(annDir) 331 | if not os.path.exists(annFile): 332 | if not os.path.exists(annZipFile): 333 | print("Downloading zipped annotations to " + annZipFile + " ...") 334 | with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out: 335 | shutil.copyfileobj(resp, out) 336 | print("... done downloading.") 337 | print("Unzipping " + annZipFile) 338 | with zipfile.ZipFile(annZipFile, "r") as zip_ref: 339 | zip_ref.extractall(unZipDir) 340 | print("... done unzipping") 341 | print("Will use annotations in " + annFile) 342 | 343 | def load_mask(self, image_id): 344 | """Load instance masks for the given image. 345 | 346 | Different datasets use different ways to store masks. This 347 | function converts the different mask format to one format 348 | in the form of a bitmap [height, width, instances]. 349 | 350 | Returns: 351 | masks: A bool array of shape [height, width, instance count] with 352 | one mask per instance. 353 | class_ids: a 1D array of class IDs of the instance masks. 354 | """ 355 | # If not a COCO image, delegate to parent class. 356 | image_info = self.image_info[image_id] 357 | if image_info["source"] != "coco": 358 | return super(CocoDataset, self).load_mask(image_id) 359 | 360 | instance_masks = [] 361 | class_ids = [] 362 | annotations = self.image_info[image_id]["annotations"] 363 | # Build mask of shape [height, width, instance_count] and list 364 | # of class IDs that correspond to each channel of the mask. 365 | for annotation in annotations: 366 | class_id = self.map_source_class_id( 367 | "coco.{}".format(annotation['category_id'])) 368 | if class_id: 369 | m = self.annToMask(annotation, image_info["height"], 370 | image_info["width"]) 371 | # Some objects are so small that they're less than 1 pixel area 372 | # and end up rounded out. Skip those objects. 373 | if m.max() < 1: 374 | continue 375 | # Is it a crowd? If so, use a negative class ID. 376 | if annotation['iscrowd']: 377 | # Use negative class ID for crowds 378 | class_id *= -1 379 | # For crowd masks, annToMask() sometimes returns a mask 380 | # smaller than the given dimensions. If so, resize it. 381 | if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]: 382 | m = np.ones([image_info["height"], image_info["width"]], dtype=bool) 383 | instance_masks.append(m) 384 | class_ids.append(class_id) 385 | 386 | # Pack instance masks into an array 387 | if class_ids: 388 | mask = np.stack(instance_masks, axis=2) 389 | class_ids = np.array(class_ids, dtype=np.int32) 390 | return mask, class_ids 391 | else: 392 | # Call super class to return an empty mask 393 | return super(CocoDataset, self).load_mask(image_id) 394 | 395 | def image_reference(self, image_id): 396 | """Return a link to the image in the COCO Website.""" 397 | info = self.image_info[image_id] 398 | if info["source"] == "coco": 399 | return "http://cocodataset.org/#explore?id={}".format(info["id"]) 400 | else: 401 | super(CocoDataset, self).image_reference(image_id) 402 | 403 | # The following two functions are from pycocotools with a few changes. 404 | 405 | def annToRLE(self, ann, height, width): 406 | """ 407 | Convert annotation which can be polygons, uncompressed RLE to RLE. 408 | :return: binary mask (numpy 2D array) 409 | """ 410 | segm = ann['segmentation'] 411 | if isinstance(segm, list): 412 | # polygon -- a single object might consist of multiple parts 413 | # we merge all parts into one mask rle code 414 | rles = maskUtils.frPyObjects(segm, height, width) 415 | rle = maskUtils.merge(rles) 416 | elif isinstance(segm['counts'], list): 417 | # uncompressed RLE 418 | rle = maskUtils.frPyObjects(segm, height, width) 419 | else: 420 | # rle 421 | rle = ann['segmentation'] 422 | return rle 423 | 424 | def annToMask(self, ann, height, width): 425 | """ 426 | Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. 427 | :return: binary mask (numpy 2D array) 428 | """ 429 | rle = self.annToRLE(ann, height, width) 430 | m = maskUtils.decode(rle) 431 | return m 432 | 433 | 434 | ############################################################ 435 | # COCO Evaluation 436 | ############################################################ 437 | 438 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks): 439 | """Arrange resutls to match COCO specs in http://cocodataset.org/#format 440 | """ 441 | # If no results, return an empty list 442 | if rois is None: 443 | return [] 444 | 445 | results = [] 446 | for image_id in image_ids: 447 | # Loop through detections 448 | for i in range(rois.shape[0]): 449 | class_id = class_ids[i] 450 | score = scores[i] 451 | bbox = np.around(rois[i], 1) 452 | mask = masks[:, :, i] 453 | 454 | result = { 455 | "image_id": image_id, 456 | "category_id": dataset.get_source_class_id(class_id, "coco"), 457 | "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]], 458 | "score": score, 459 | "segmentation": maskUtils.encode(np.asfortranarray(mask)) 460 | } 461 | results.append(result) 462 | return results 463 | 464 | 465 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None): 466 | """Runs official COCO evaluation. 467 | dataset: A Dataset object with valiadtion data 468 | eval_type: "bbox" or "segm" for bounding box or segmentation evaluation 469 | limit: if not 0, it's the number of images to use for evaluation 470 | """ 471 | # Pick COCO images from the dataset 472 | image_ids = image_ids or dataset.image_ids 473 | 474 | # Limit to a subset 475 | if limit: 476 | image_ids = image_ids[:limit] 477 | 478 | # Get corresponding COCO image IDs. 479 | coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids] 480 | 481 | t_prediction = 0 482 | t_start = time.time() 483 | 484 | results = [] 485 | for i, image_id in enumerate(image_ids): 486 | # Load image 487 | image = dataset.load_image(image_id) 488 | 489 | # Run detection 490 | t = time.time() 491 | r = model.detect([image])[0] 492 | t_prediction += (time.time() - t) 493 | 494 | # Convert results to COCO format 495 | image_results = build_coco_results(dataset, coco_image_ids[i:i + 1], 496 | r["rois"], r["class_ids"], 497 | r["scores"], r["masks"]) 498 | results.extend(image_results) 499 | 500 | # Load results. This modifies results with additional attributes. 501 | coco_results = coco.loadRes(results) 502 | 503 | # Evaluate 504 | cocoEval = COCOeval(coco, coco_results, eval_type) 505 | cocoEval.params.imgIds = coco_image_ids 506 | cocoEval.evaluate() 507 | cocoEval.accumulate() 508 | cocoEval.summarize() 509 | 510 | print("Prediction time: {}. Average {}/image".format( 511 | t_prediction, t_prediction / len(image_ids))) 512 | print("Total time: ", time.time() - t_start) 513 | 514 | 515 | ############################################################ 516 | # Training 517 | ############################################################ 518 | 519 | 520 | if __name__ == '__main__': 521 | import argparse 522 | 523 | # Parse command line arguments 524 | parser = argparse.ArgumentParser( 525 | description='Train Mask R-CNN on MS COCO.') 526 | parser.add_argument("command", 527 | metavar="", 528 | help="'train' or 'evaluate' on MS COCO") 529 | parser.add_argument('--dataset', required=True, 530 | metavar="/path/to/coco/", 531 | help='Directory of the MS-COCO dataset') 532 | parser.add_argument('--year', required=False, 533 | default=DEFAULT_DATASET_YEAR, 534 | metavar="", 535 | help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)') 536 | parser.add_argument('--model', required=False, 537 | metavar="/path/to/weights.pth", 538 | help="Path to weights .pth file or 'coco'") 539 | parser.add_argument('--logs', required=False, 540 | default=DEFAULT_LOGS_DIR, 541 | metavar="/path/to/logs/", 542 | help='Logs and checkpoints directory (default=logs/)') 543 | parser.add_argument('--limit', required=False, 544 | default=500, 545 | metavar="", 546 | help='Images to use for evaluation (default=500)') 547 | parser.add_argument('--download', required=False, 548 | default=False, 549 | metavar="", 550 | help='Automatically download and unzip MS-COCO files (default=False)', 551 | type=bool) 552 | args = parser.parse_args() 553 | print("Command: ", args.command) 554 | print("Model: ", args.model) 555 | print("Dataset: ", args.dataset) 556 | print("Year: ", args.year) 557 | print("Logs: ", args.logs) 558 | print("Auto Download: ", args.download) 559 | 560 | # Configurations 561 | if args.command == "train": 562 | config = CocoConfig() 563 | else: 564 | class InferenceConfig(CocoConfig): 565 | # Set batch size to 1 since we'll be running inference on 566 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 567 | GPU_COUNT = 1 568 | IMAGES_PER_GPU = 1 569 | DETECTION_MIN_CONFIDENCE = 0 570 | config = InferenceConfig() 571 | config.display() 572 | 573 | # Create model 574 | if args.command == "train": 575 | model = modellib.MaskRCNN(config=config, 576 | model_dir=args.logs) 577 | else: 578 | model = modellib.MaskRCNN(config=config, 579 | model_dir=args.logs) 580 | if config.GPU_COUNT: 581 | model = model.cuda() 582 | 583 | # Select weights file to load 584 | if args.model: 585 | if args.model.lower() == "coco": 586 | model_path = COCO_MODEL_PATH 587 | elif args.model.lower() == "last": 588 | # Find last trained weights 589 | model_path = model.find_last()[1] 590 | elif args.model.lower() == "imagenet": 591 | # Start from ImageNet trained weights 592 | model_path = config.IMAGENET_MODEL_PATH 593 | else: 594 | model_path = args.model 595 | else: 596 | model_path = "" 597 | 598 | # Load weights 599 | print("Loading weights ", model_path) 600 | model.load_weights(model_path) 601 | 602 | # Train or evaluate 603 | if args.command == "train": 604 | # Training dataset. Use the training set and 35K from the 605 | # validation set, as as in the Mask RCNN paper. 606 | dataset_train = CocoDataset() 607 | dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download) 608 | dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download) 609 | dataset_train.prepare() 610 | 611 | # Validation dataset 612 | dataset_val = CocoDataset() 613 | dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download) 614 | dataset_val.prepare() 615 | 616 | # *** This training schedule is an example. Update to your needs *** 617 | 618 | # Training - Stage 1 619 | print("Training network heads") 620 | model.train_model(dataset_train, dataset_val, 621 | learning_rate=config.LEARNING_RATE, 622 | epochs=40, 623 | layers='heads') 624 | 625 | # Training - Stage 2 626 | # Finetune layers from ResNet stage 4 and up 627 | print("Fine tune Resnet stage 4 and up") 628 | model.train_model(dataset_train, dataset_val, 629 | learning_rate=config.LEARNING_RATE, 630 | epochs=120, 631 | layers='4+') 632 | 633 | # Training - Stage 3 634 | # Fine tune all layers 635 | print("Fine tune all layers") 636 | model.train_model(dataset_train, dataset_val, 637 | learning_rate=config.LEARNING_RATE / 10, 638 | epochs=160, 639 | layers='all') 640 | 641 | elif args.command == "evaluate": 642 | # Validation dataset 643 | dataset_val = CocoDataset() 644 | coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download) 645 | dataset_val.prepare() 646 | print("Running COCO evaluation on {} images.".format(args.limit)) 647 | evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit)) 648 | evaluate_coco(model, dataset_val, coco, "segm", limit=int(args.limit)) 649 | else: 650 | print("'{}' is not recognized. " 651 | "Use 'train' or 'evaluate'".format(args.command)) 652 | -------------------------------------------------------------------------------- /images/find_fish.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/pytorch-mask-rcnn-samples/f03054ab01f162c23bcccfc5de32a633041c5237/images/find_fish.png -------------------------------------------------------------------------------- /images/rec_fish.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/pytorch-mask-rcnn-samples/f03054ab01f162c23bcccfc5de32a633041c5237/images/rec_fish.png -------------------------------------------------------------------------------- /images/vgg_annotated_fish.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/pytorch-mask-rcnn-samples/f03054ab01f162c23bcccfc5de32a633041c5237/images/vgg_annotated_fish.jpg -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/pytorch-mask-rcnn-samples/f03054ab01f162c23bcccfc5de32a633041c5237/lib/nms/__init__.py -------------------------------------------------------------------------------- /lib/nms/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/nms.c'] 7 | headers = ['src/nms.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/nms_cuda.c'] 14 | headers += ['src/nms_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/nms_kernel.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.nms', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | -------------------------------------------------------------------------------- /lib/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from lib.nms.pth_nms import pth_nms 12 | 13 | 14 | def nms(dets, thresh): 15 | """Dispatch to either CPU or GPU NMS implementations. 16 | Accept dets as tensor""" 17 | return pth_nms(dets, thresh) 18 | -------------------------------------------------------------------------------- /lib/nms/pth_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from ._ext import nms 3 | import numpy as np 4 | 5 | def pth_nms(dets, thresh): 6 | """ 7 | dets has to be a tensor 8 | """ 9 | if not dets.is_cuda: 10 | x1 = dets[:, 0] 11 | y1 = dets[:, 1] 12 | x2 = dets[:, 2] 13 | y2 = dets[:, 3] 14 | scores = dets[:, 4] 15 | 16 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 17 | order = scores.sort(0, descending=True)[1] 18 | # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long() 19 | 20 | keep = torch.LongTensor(dets.size(0)) 21 | num_out = torch.LongTensor(1) 22 | nms.cpu_nms(keep, num_out, dets, order, areas, thresh) 23 | 24 | return keep[:num_out[0]] 25 | else: 26 | x1 = dets[:, 0] 27 | y1 = dets[:, 1] 28 | x2 = dets[:, 2] 29 | y2 = dets[:, 3] 30 | scores = dets[:, 4] 31 | 32 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 33 | order = scores.sort(0, descending=True)[1] 34 | # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda() 35 | 36 | dets = dets[order].contiguous() 37 | 38 | keep = torch.LongTensor(dets.size(0)) 39 | num_out = torch.LongTensor(1) 40 | # keep = torch.cuda.LongTensor(dets.size(0)) 41 | # num_out = torch.cuda.LongTensor(1) 42 | nms.gpu_nms(keep, num_out, dets, thresh) 43 | 44 | return order[keep[:num_out[0]].cuda()].contiguous() 45 | # return order[keep[:num_out[0]]].contiguous() 46 | 47 | -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include 12 | #include 13 | #include 14 | #include "nms_kernel.h" 15 | 16 | __device__ inline float devIoU(float const * const a, float const * const b) { 17 | float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]); 18 | float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]); 19 | float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f); 20 | float interS = width * height; 21 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 22 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 23 | return interS / (Sa + Sb - interS); 24 | } 25 | 26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 27 | const float *dev_boxes, unsigned long long *dev_mask) { 28 | const int row_start = blockIdx.y; 29 | const int col_start = blockIdx.x; 30 | 31 | // if (row_start > col_start) return; 32 | 33 | const int row_size = 34 | fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 35 | const int col_size = 36 | fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 37 | 38 | __shared__ float block_boxes[threadsPerBlock * 5]; 39 | if (threadIdx.x < col_size) { 40 | block_boxes[threadIdx.x * 5 + 0] = 41 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 42 | block_boxes[threadIdx.x * 5 + 1] = 43 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 44 | block_boxes[threadIdx.x * 5 + 2] = 45 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 46 | block_boxes[threadIdx.x * 5 + 3] = 47 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 48 | block_boxes[threadIdx.x * 5 + 4] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 50 | } 51 | __syncthreads(); 52 | 53 | if (threadIdx.x < row_size) { 54 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 55 | const float *cur_box = dev_boxes + cur_box_idx * 5; 56 | int i = 0; 57 | unsigned long long t = 0; 58 | int start = 0; 59 | if (row_start == col_start) { 60 | start = threadIdx.x + 1; 61 | } 62 | for (i = start; i < col_size; i++) { 63 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 64 | t |= 1ULL << i; 65 | } 66 | } 67 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 68 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 69 | } 70 | } 71 | 72 | 73 | void _nms(int boxes_num, float * boxes_dev, 74 | unsigned long long * mask_dev, float nms_overlap_thresh) { 75 | 76 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 77 | DIVUP(boxes_num, threadsPerBlock)); 78 | dim3 threads(threadsPerBlock); 79 | nms_kernel<<>>(boxes_num, 80 | nms_overlap_thresh, 81 | boxes_dev, 82 | mask_dev); 83 | } 84 | 85 | #ifdef __cplusplus 86 | } 87 | #endif 88 | -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _NMS_KERNEL 2 | #define _NMS_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 10 | 11 | void _nms(int boxes_num, float * boxes_dev, 12 | unsigned long long * mask_dev, float nms_overlap_thresh); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif 19 | 20 | -------------------------------------------------------------------------------- /lib/nms/src/nms.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) { 5 | // boxes has to be sorted 6 | THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous"); 7 | THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous"); 8 | THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous"); 9 | THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous"); 10 | // Number of ROIs 11 | long boxes_num = THFloatTensor_size(boxes, 0); 12 | long boxes_dim = THFloatTensor_size(boxes, 1); 13 | 14 | long * keep_out_flat = THLongTensor_data(keep_out); 15 | float * boxes_flat = THFloatTensor_data(boxes); 16 | long * order_flat = THLongTensor_data(order); 17 | float * areas_flat = THFloatTensor_data(areas); 18 | 19 | THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num); 20 | THByteTensor_fill(suppressed, 0); 21 | unsigned char * suppressed_flat = THByteTensor_data(suppressed); 22 | 23 | // nominal indices 24 | int i, j; 25 | // sorted indices 26 | int _i, _j; 27 | // temp variables for box i's (the box currently under consideration) 28 | float ix1, iy1, ix2, iy2, iarea; 29 | // variables for computing overlap with box j (lower scoring box) 30 | float xx1, yy1, xx2, yy2; 31 | float w, h; 32 | float inter, ovr; 33 | 34 | long num_to_keep = 0; 35 | for (_i=0; _i < boxes_num; ++_i) { 36 | i = order_flat[_i]; 37 | if (suppressed_flat[i] == 1) { 38 | continue; 39 | } 40 | keep_out_flat[num_to_keep++] = i; 41 | ix1 = boxes_flat[i * boxes_dim]; 42 | iy1 = boxes_flat[i * boxes_dim + 1]; 43 | ix2 = boxes_flat[i * boxes_dim + 2]; 44 | iy2 = boxes_flat[i * boxes_dim + 3]; 45 | iarea = areas_flat[i]; 46 | for (_j = _i + 1; _j < boxes_num; ++_j) { 47 | j = order_flat[_j]; 48 | if (suppressed_flat[j] == 1) { 49 | continue; 50 | } 51 | xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]); 52 | yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]); 53 | xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]); 54 | yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]); 55 | w = fmaxf(0.0, xx2 - xx1 + 1); 56 | h = fmaxf(0.0, yy2 - yy1 + 1); 57 | inter = w * h; 58 | ovr = inter / (iarea + areas_flat[j] - inter); 59 | if (ovr >= nms_overlap_thresh) { 60 | suppressed_flat[j] = 1; 61 | } 62 | } 63 | } 64 | 65 | long *num_out_flat = THLongTensor_data(num_out); 66 | *num_out_flat = num_to_keep; 67 | THByteTensor_free(suppressed); 68 | return 1; 69 | } -------------------------------------------------------------------------------- /lib/nms/src/nms.h: -------------------------------------------------------------------------------- 1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh); -------------------------------------------------------------------------------- /lib/nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "cuda/nms_kernel.h" 13 | 14 | 15 | extern THCState *state; 16 | 17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) { 18 | // boxes has to be sorted 19 | THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous"); 20 | THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous"); 21 | // Number of ROIs 22 | int boxes_num = THCudaTensor_size(state, boxes, 0); 23 | int boxes_dim = THCudaTensor_size(state, boxes, 1); 24 | 25 | float* boxes_flat = THCudaTensor_data(state, boxes); 26 | 27 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 28 | THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks); 29 | unsigned long long* mask_flat = THCudaLongTensor_data(state, mask); 30 | 31 | _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh); 32 | 33 | THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks); 34 | THLongTensor_copyCuda(state, mask_cpu, mask); 35 | THCudaLongTensor_free(state, mask); 36 | 37 | unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu); 38 | 39 | THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks); 40 | unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu); 41 | THLongTensor_fill(remv_cpu, 0); 42 | 43 | long * keep_flat = THLongTensor_data(keep); 44 | long num_to_keep = 0; 45 | 46 | int i, j; 47 | for (i = 0; i < boxes_num; i++) { 48 | int nblock = i / threadsPerBlock; 49 | int inblock = i % threadsPerBlock; 50 | 51 | if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) { 52 | keep_flat[num_to_keep++] = i; 53 | unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks; 54 | for (j = nblock; j < col_blocks; j++) { 55 | remv_cpu_flat[j] |= p[j]; 56 | } 57 | } 58 | } 59 | 60 | long * num_out_flat = THLongTensor_data(num_out); 61 | * num_out_flat = num_to_keep; 62 | 63 | THLongTensor_free(mask_cpu); 64 | THLongTensor_free(remv_cpu); 65 | 66 | return 1; 67 | } 68 | -------------------------------------------------------------------------------- /lib/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh); -------------------------------------------------------------------------------- /lib/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/pytorch-mask-rcnn-samples/f03054ab01f162c23bcccfc5de32a633041c5237/lib/roi_align/__init__.py -------------------------------------------------------------------------------- /lib/roi_align/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/crop_and_resize.c'] 7 | headers = ['src/crop_and_resize.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | extra_objects = [] 12 | if torch.cuda.is_available(): 13 | print('Including CUDA code.') 14 | sources += ['src/crop_and_resize_gpu.c'] 15 | headers += ['src/crop_and_resize_gpu.h'] 16 | defines += [('WITH_CUDA', None)] 17 | extra_objects += ['src/cuda/crop_and_resize_kernel.cu.o'] 18 | with_cuda = True 19 | 20 | extra_compile_args = ['-std=c99'] 21 | 22 | this_file = os.path.dirname(os.path.realpath(__file__)) 23 | print(this_file) 24 | sources = [os.path.join(this_file, fname) for fname in sources] 25 | headers = [os.path.join(this_file, fname) for fname in headers] 26 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 27 | 28 | ffi = create_extension( 29 | '_ext.crop_and_resize', 30 | headers=headers, 31 | sources=sources, 32 | define_macros=defines, 33 | relative_to=__file__, 34 | with_cuda=with_cuda, 35 | extra_objects=extra_objects, 36 | extra_compile_args=extra_compile_args 37 | ) 38 | 39 | if __name__ == '__main__': 40 | ffi.build() 41 | -------------------------------------------------------------------------------- /lib/roi_align/crop_and_resize.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.autograd import Function 6 | 7 | from ._ext import crop_and_resize as _backend 8 | 9 | 10 | class CropAndResizeFunction(Function): 11 | 12 | def __init__(self, crop_height, crop_width, extrapolation_value=0): 13 | self.crop_height = crop_height 14 | self.crop_width = crop_width 15 | self.extrapolation_value = extrapolation_value 16 | 17 | def forward(self, image, boxes, box_ind): 18 | crops = torch.zeros_like(image) 19 | 20 | if image.is_cuda: 21 | _backend.crop_and_resize_gpu_forward( 22 | image, boxes, box_ind, 23 | self.extrapolation_value, self.crop_height, self.crop_width, crops) 24 | else: 25 | _backend.crop_and_resize_forward( 26 | image, boxes, box_ind, 27 | self.extrapolation_value, self.crop_height, self.crop_width, crops) 28 | 29 | # save for backward 30 | self.im_size = image.size() 31 | self.save_for_backward(boxes, box_ind) 32 | 33 | return crops 34 | 35 | def backward(self, grad_outputs): 36 | boxes, box_ind = self.saved_tensors 37 | 38 | grad_outputs = grad_outputs.contiguous() 39 | grad_image = torch.zeros_like(grad_outputs).resize_(*self.im_size) 40 | 41 | if grad_outputs.is_cuda: 42 | _backend.crop_and_resize_gpu_backward( 43 | grad_outputs, boxes, box_ind, grad_image 44 | ) 45 | else: 46 | _backend.crop_and_resize_backward( 47 | grad_outputs, boxes, box_ind, grad_image 48 | ) 49 | 50 | return grad_image, None, None 51 | 52 | 53 | class CropAndResize(nn.Module): 54 | """ 55 | Crop and resize ported from tensorflow 56 | See more details on https://www.tensorflow.org/api_docs/python/tf/image/crop_and_resize 57 | """ 58 | 59 | def __init__(self, crop_height, crop_width, extrapolation_value=0): 60 | super(CropAndResize, self).__init__() 61 | 62 | self.crop_height = crop_height 63 | self.crop_width = crop_width 64 | self.extrapolation_value = extrapolation_value 65 | 66 | def forward(self, image, boxes, box_ind): 67 | return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(image, boxes, box_ind) 68 | -------------------------------------------------------------------------------- /lib/roi_align/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from .crop_and_resize import CropAndResizeFunction, CropAndResize 5 | 6 | 7 | class RoIAlign(nn.Module): 8 | 9 | def __init__(self, crop_height, crop_width, extrapolation_value=0, transform_fpcoor=True): 10 | super(RoIAlign, self).__init__() 11 | 12 | self.crop_height = crop_height 13 | self.crop_width = crop_width 14 | self.extrapolation_value = extrapolation_value 15 | self.transform_fpcoor = transform_fpcoor 16 | 17 | def forward(self, featuremap, boxes, box_ind): 18 | """ 19 | RoIAlign based on crop_and_resize. 20 | See more details on https://github.com/ppwwyyxx/tensorpack/blob/6d5ba6a970710eaaa14b89d24aace179eb8ee1af/examples/FasterRCNN/model.py#L301 21 | :param featuremap: NxCxHxW 22 | :param boxes: Mx4 float box with (x1, y1, x2, y2) **without normalization** 23 | :param box_ind: M 24 | :return: MxCxoHxoW 25 | """ 26 | x1, y1, x2, y2 = torch.split(boxes, 1, dim=1) 27 | image_height, image_width = featuremap.size()[2:4] 28 | 29 | if self.transform_fpcoor: 30 | spacing_w = (x2 - x1) / float(self.crop_width) 31 | spacing_h = (y2 - y1) / float(self.crop_height) 32 | 33 | nx0 = (x1 + spacing_w / 2 - 0.5) / float(image_width - 1) 34 | ny0 = (y1 + spacing_h / 2 - 0.5) / float(image_height - 1) 35 | nw = spacing_w * float(self.crop_width - 1) / float(image_width - 1) 36 | nh = spacing_h * float(self.crop_height - 1) / float(image_height - 1) 37 | 38 | boxes = torch.cat((ny0, nx0, ny0 + nh, nx0 + nw), 1) 39 | else: 40 | x1 = x1 / float(image_width - 1) 41 | x2 = x2 / float(image_width - 1) 42 | y1 = y1 / float(image_height - 1) 43 | y2 = y2 / float(image_height - 1) 44 | boxes = torch.cat((y1, x1, y2, x2), 1) 45 | 46 | boxes = boxes.detach().contiguous() 47 | box_ind = box_ind.detach() 48 | return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(featuremap, boxes, box_ind) 49 | -------------------------------------------------------------------------------- /lib/roi_align/src/crop_and_resize.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | void CropAndResizePerBox( 7 | const float * image_data, 8 | const int batch_size, 9 | const int depth, 10 | const int image_height, 11 | const int image_width, 12 | 13 | const float * boxes_data, 14 | const int * box_index_data, 15 | const int start_box, 16 | const int limit_box, 17 | 18 | float * corps_data, 19 | const int crop_height, 20 | const int crop_width, 21 | const float extrapolation_value 22 | ) { 23 | const int image_channel_elements = image_height * image_width; 24 | const int image_elements = depth * image_channel_elements; 25 | 26 | const int channel_elements = crop_height * crop_width; 27 | const int crop_elements = depth * channel_elements; 28 | 29 | int b; 30 | #pragma omp parallel for 31 | for (b = start_box; b < limit_box; ++b) { 32 | const float * box = boxes_data + b * 4; 33 | const float y1 = box[0]; 34 | const float x1 = box[1]; 35 | const float y2 = box[2]; 36 | const float x2 = box[3]; 37 | 38 | const int b_in = box_index_data[b]; 39 | if (b_in < 0 || b_in >= batch_size) { 40 | printf("Error: batch_index %d out of range [0, %d)\n", b_in, batch_size); 41 | exit(-1); 42 | } 43 | 44 | const float height_scale = 45 | (crop_height > 1) 46 | ? (y2 - y1) * (image_height - 1) / (crop_height - 1) 47 | : 0; 48 | const float width_scale = 49 | (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) 50 | : 0; 51 | 52 | for (int y = 0; y < crop_height; ++y) 53 | { 54 | const float in_y = (crop_height > 1) 55 | ? y1 * (image_height - 1) + y * height_scale 56 | : 0.5 * (y1 + y2) * (image_height - 1); 57 | 58 | if (in_y < 0 || in_y > image_height - 1) 59 | { 60 | for (int x = 0; x < crop_width; ++x) 61 | { 62 | for (int d = 0; d < depth; ++d) 63 | { 64 | // crops(b, y, x, d) = extrapolation_value; 65 | corps_data[crop_elements * b + channel_elements * d + y * crop_width + x] = extrapolation_value; 66 | } 67 | } 68 | continue; 69 | } 70 | 71 | const int top_y_index = floorf(in_y); 72 | const int bottom_y_index = ceilf(in_y); 73 | const float y_lerp = in_y - top_y_index; 74 | 75 | for (int x = 0; x < crop_width; ++x) 76 | { 77 | const float in_x = (crop_width > 1) 78 | ? x1 * (image_width - 1) + x * width_scale 79 | : 0.5 * (x1 + x2) * (image_width - 1); 80 | if (in_x < 0 || in_x > image_width - 1) 81 | { 82 | for (int d = 0; d < depth; ++d) 83 | { 84 | corps_data[crop_elements * b + channel_elements * d + y * crop_width + x] = extrapolation_value; 85 | } 86 | continue; 87 | } 88 | 89 | const int left_x_index = floorf(in_x); 90 | const int right_x_index = ceilf(in_x); 91 | const float x_lerp = in_x - left_x_index; 92 | 93 | for (int d = 0; d < depth; ++d) 94 | { 95 | const float *pimage = image_data + b_in * image_elements + d * image_channel_elements; 96 | 97 | const float top_left = pimage[top_y_index * image_width + left_x_index]; 98 | const float top_right = pimage[top_y_index * image_width + right_x_index]; 99 | const float bottom_left = pimage[bottom_y_index * image_width + left_x_index]; 100 | const float bottom_right = pimage[bottom_y_index * image_width + right_x_index]; 101 | 102 | const float top = top_left + (top_right - top_left) * x_lerp; 103 | const float bottom = 104 | bottom_left + (bottom_right - bottom_left) * x_lerp; 105 | 106 | corps_data[crop_elements * b + channel_elements * d + y * crop_width + x] = top + (bottom - top) * y_lerp; 107 | } 108 | } // end for x 109 | } // end for y 110 | } // end for b 111 | 112 | } 113 | 114 | 115 | void crop_and_resize_forward( 116 | THFloatTensor * image, 117 | THFloatTensor * boxes, // [y1, x1, y2, x2] 118 | THIntTensor * box_index, // range in [0, batch_size) 119 | const float extrapolation_value, 120 | const int crop_height, 121 | const int crop_width, 122 | THFloatTensor * crops 123 | ) { 124 | //const int batch_size = image->size[0]; 125 | //const int depth = image->size[1]; 126 | //const int image_height = image->size[2]; 127 | //const int image_width = image->size[3]; 128 | 129 | //const int num_boxes = boxes->size[0]; 130 | 131 | const int batch_size = THFloatTensor_size(image, 0); 132 | const int depth = THFloatTensor_size(image, 1); 133 | const int image_height = THFloatTensor_size(image, 2); 134 | const int image_width = THFloatTensor_size(image, 3); 135 | 136 | const int num_boxes = THFloatTensor_size(boxes, 0); 137 | 138 | // init output space 139 | THFloatTensor_resize4d(crops, num_boxes, depth, crop_height, crop_width); 140 | THFloatTensor_zero(crops); 141 | 142 | // crop_and_resize for each box 143 | CropAndResizePerBox( 144 | THFloatTensor_data(image), 145 | batch_size, 146 | depth, 147 | image_height, 148 | image_width, 149 | 150 | THFloatTensor_data(boxes), 151 | THIntTensor_data(box_index), 152 | 0, 153 | num_boxes, 154 | 155 | THFloatTensor_data(crops), 156 | crop_height, 157 | crop_width, 158 | extrapolation_value 159 | ); 160 | 161 | } 162 | 163 | 164 | void crop_and_resize_backward( 165 | THFloatTensor * grads, 166 | THFloatTensor * boxes, // [y1, x1, y2, x2] 167 | THIntTensor * box_index, // range in [0, batch_size) 168 | THFloatTensor * grads_image // resize to [bsize, c, hc, wc] 169 | ) 170 | { 171 | // shape 172 | //const int batch_size = grads_image->size[0]; 173 | //const int depth = grads_image->size[1]; 174 | //const int image_height = grads_image->size[2]; 175 | //const int image_width = grads_image->size[3]; 176 | 177 | //const int num_boxes = grads->size[0]; 178 | //const int crop_height = grads->size[2]; 179 | //const int crop_width = grads->size[3]; 180 | 181 | const int batch_size = THFloatTensor_size(grads_image, 0); 182 | const int depth = THFloatTensor_size(grads_image, 1); 183 | const int image_height = THFloatTensor_size(grads_image, 2); 184 | const int image_width = THFloatTensor_size(grads_image, 3); 185 | 186 | const int num_boxes = THFloatTensor_size(grads, 0); 187 | const int crop_height = THFloatTensor_size(grads,2); 188 | const int crop_width = THFloatTensor_size(grads,3); 189 | 190 | 191 | // n_elements 192 | const int image_channel_elements = image_height * image_width; 193 | const int image_elements = depth * image_channel_elements; 194 | 195 | const int channel_elements = crop_height * crop_width; 196 | const int crop_elements = depth * channel_elements; 197 | 198 | // init output space 199 | THFloatTensor_zero(grads_image); 200 | 201 | // data pointer 202 | const float * grads_data = THFloatTensor_data(grads); 203 | const float * boxes_data = THFloatTensor_data(boxes); 204 | const int * box_index_data = THIntTensor_data(box_index); 205 | float * grads_image_data = THFloatTensor_data(grads_image); 206 | 207 | for (int b = 0; b < num_boxes; ++b) { 208 | const float * box = boxes_data + b * 4; 209 | const float y1 = box[0]; 210 | const float x1 = box[1]; 211 | const float y2 = box[2]; 212 | const float x2 = box[3]; 213 | 214 | const int b_in = box_index_data[b]; 215 | if (b_in < 0 || b_in >= batch_size) { 216 | printf("Error: batch_index %d out of range [0, %d)\n", b_in, batch_size); 217 | exit(-1); 218 | } 219 | 220 | const float height_scale = 221 | (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1) 222 | : 0; 223 | const float width_scale = 224 | (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) 225 | : 0; 226 | 227 | for (int y = 0; y < crop_height; ++y) 228 | { 229 | const float in_y = (crop_height > 1) 230 | ? y1 * (image_height - 1) + y * height_scale 231 | : 0.5 * (y1 + y2) * (image_height - 1); 232 | if (in_y < 0 || in_y > image_height - 1) 233 | { 234 | continue; 235 | } 236 | const int top_y_index = floorf(in_y); 237 | const int bottom_y_index = ceilf(in_y); 238 | const float y_lerp = in_y - top_y_index; 239 | 240 | for (int x = 0; x < crop_width; ++x) 241 | { 242 | const float in_x = (crop_width > 1) 243 | ? x1 * (image_width - 1) + x * width_scale 244 | : 0.5 * (x1 + x2) * (image_width - 1); 245 | if (in_x < 0 || in_x > image_width - 1) 246 | { 247 | continue; 248 | } 249 | const int left_x_index = floorf(in_x); 250 | const int right_x_index = ceilf(in_x); 251 | const float x_lerp = in_x - left_x_index; 252 | 253 | for (int d = 0; d < depth; ++d) 254 | { 255 | float *pimage = grads_image_data + b_in * image_elements + d * image_channel_elements; 256 | const float grad_val = grads_data[crop_elements * b + channel_elements * d + y * crop_width + x]; 257 | 258 | const float dtop = (1 - y_lerp) * grad_val; 259 | pimage[top_y_index * image_width + left_x_index] += (1 - x_lerp) * dtop; 260 | pimage[top_y_index * image_width + right_x_index] += x_lerp * dtop; 261 | 262 | const float dbottom = y_lerp * grad_val; 263 | pimage[bottom_y_index * image_width + left_x_index] += (1 - x_lerp) * dbottom; 264 | pimage[bottom_y_index * image_width + right_x_index] += x_lerp * dbottom; 265 | } // end d 266 | } // end x 267 | } // end y 268 | } // end b 269 | } -------------------------------------------------------------------------------- /lib/roi_align/src/crop_and_resize.h: -------------------------------------------------------------------------------- 1 | void crop_and_resize_forward( 2 | THFloatTensor * image, 3 | THFloatTensor * boxes, // [y1, x1, y2, x2] 4 | THIntTensor * box_index, // range in [0, batch_size) 5 | const float extrapolation_value, 6 | const int crop_height, 7 | const int crop_width, 8 | THFloatTensor * crops 9 | ); 10 | 11 | void crop_and_resize_backward( 12 | THFloatTensor * grads, 13 | THFloatTensor * boxes, // [y1, x1, y2, x2] 14 | THIntTensor * box_index, // range in [0, batch_size) 15 | THFloatTensor * grads_image // resize to [bsize, c, hc, wc] 16 | ); -------------------------------------------------------------------------------- /lib/roi_align/src/crop_and_resize_gpu.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "cuda/crop_and_resize_kernel.h" 3 | 4 | extern THCState *state; 5 | 6 | 7 | void crop_and_resize_gpu_forward( 8 | THCudaTensor * image, 9 | THCudaTensor * boxes, // [y1, x1, y2, x2] 10 | THCudaIntTensor * box_index, // range in [0, batch_size) 11 | const float extrapolation_value, 12 | const int crop_height, 13 | const int crop_width, 14 | THCudaTensor * crops 15 | ) { 16 | const int batch_size = THCudaTensor_size(state, image, 0); 17 | const int depth = THCudaTensor_size(state, image, 1); 18 | const int image_height = THCudaTensor_size(state, image, 2); 19 | const int image_width = THCudaTensor_size(state, image, 3); 20 | 21 | const int num_boxes = THCudaTensor_size(state, boxes, 0); 22 | 23 | // init output space 24 | THCudaTensor_resize4d(state, crops, num_boxes, depth, crop_height, crop_width); 25 | THCudaTensor_zero(state, crops); 26 | 27 | cudaStream_t stream = THCState_getCurrentStream(state); 28 | CropAndResizeLaucher( 29 | THCudaTensor_data(state, image), 30 | THCudaTensor_data(state, boxes), 31 | THCudaIntTensor_data(state, box_index), 32 | num_boxes, batch_size, image_height, image_width, 33 | crop_height, crop_width, depth, extrapolation_value, 34 | THCudaTensor_data(state, crops), 35 | stream 36 | ); 37 | } 38 | 39 | 40 | void crop_and_resize_gpu_backward( 41 | THCudaTensor * grads, 42 | THCudaTensor * boxes, // [y1, x1, y2, x2] 43 | THCudaIntTensor * box_index, // range in [0, batch_size) 44 | THCudaTensor * grads_image // resize to [bsize, c, hc, wc] 45 | ) { 46 | // shape 47 | const int batch_size = THCudaTensor_size(state, grads_image, 0); 48 | const int depth = THCudaTensor_size(state, grads_image, 1); 49 | const int image_height = THCudaTensor_size(state, grads_image, 2); 50 | const int image_width = THCudaTensor_size(state, grads_image, 3); 51 | 52 | const int num_boxes = THCudaTensor_size(state, grads, 0); 53 | const int crop_height = THCudaTensor_size(state, grads, 2); 54 | const int crop_width = THCudaTensor_size(state, grads, 3); 55 | 56 | // init output space 57 | THCudaTensor_zero(state, grads_image); 58 | 59 | cudaStream_t stream = THCState_getCurrentStream(state); 60 | CropAndResizeBackpropImageLaucher( 61 | THCudaTensor_data(state, grads), 62 | THCudaTensor_data(state, boxes), 63 | THCudaIntTensor_data(state, box_index), 64 | num_boxes, batch_size, image_height, image_width, 65 | crop_height, crop_width, depth, 66 | THCudaTensor_data(state, grads_image), 67 | stream 68 | ); 69 | } -------------------------------------------------------------------------------- /lib/roi_align/src/crop_and_resize_gpu.h: -------------------------------------------------------------------------------- 1 | void crop_and_resize_gpu_forward( 2 | THCudaTensor * image, 3 | THCudaTensor * boxes, // [y1, x1, y2, x2] 4 | THCudaIntTensor * box_index, // range in [0, batch_size) 5 | const float extrapolation_value, 6 | const int crop_height, 7 | const int crop_width, 8 | THCudaTensor * crops 9 | ); 10 | 11 | void crop_and_resize_gpu_backward( 12 | THCudaTensor * grads, 13 | THCudaTensor * boxes, // [y1, x1, y2, x2] 14 | THCudaIntTensor * box_index, // range in [0, batch_size) 15 | THCudaTensor * grads_image // resize to [bsize, c, hc, wc] 16 | ); -------------------------------------------------------------------------------- /lib/roi_align/src/cuda/crop_and_resize_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "crop_and_resize_kernel.h" 4 | 5 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 6 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 7 | i += blockDim.x * gridDim.x) 8 | 9 | 10 | __global__ 11 | void CropAndResizeKernel( 12 | const int nthreads, const float *image_ptr, const float *boxes_ptr, 13 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 14 | int image_width, int crop_height, int crop_width, int depth, 15 | float extrapolation_value, float *crops_ptr) 16 | { 17 | CUDA_1D_KERNEL_LOOP(out_idx, nthreads) 18 | { 19 | // NHWC: out_idx = d + depth * (w + crop_width * (h + crop_height * b)) 20 | // NCHW: out_idx = w + crop_width * (h + crop_height * (d + depth * b)) 21 | int idx = out_idx; 22 | const int x = idx % crop_width; 23 | idx /= crop_width; 24 | const int y = idx % crop_height; 25 | idx /= crop_height; 26 | const int d = idx % depth; 27 | const int b = idx / depth; 28 | 29 | const float y1 = boxes_ptr[b * 4]; 30 | const float x1 = boxes_ptr[b * 4 + 1]; 31 | const float y2 = boxes_ptr[b * 4 + 2]; 32 | const float x2 = boxes_ptr[b * 4 + 3]; 33 | 34 | const int b_in = box_ind_ptr[b]; 35 | if (b_in < 0 || b_in >= batch) 36 | { 37 | continue; 38 | } 39 | 40 | const float height_scale = 41 | (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1) 42 | : 0; 43 | const float width_scale = 44 | (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0; 45 | 46 | const float in_y = (crop_height > 1) 47 | ? y1 * (image_height - 1) + y * height_scale 48 | : 0.5 * (y1 + y2) * (image_height - 1); 49 | if (in_y < 0 || in_y > image_height - 1) 50 | { 51 | crops_ptr[out_idx] = extrapolation_value; 52 | continue; 53 | } 54 | 55 | const float in_x = (crop_width > 1) 56 | ? x1 * (image_width - 1) + x * width_scale 57 | : 0.5 * (x1 + x2) * (image_width - 1); 58 | if (in_x < 0 || in_x > image_width - 1) 59 | { 60 | crops_ptr[out_idx] = extrapolation_value; 61 | continue; 62 | } 63 | 64 | const int top_y_index = floorf(in_y); 65 | const int bottom_y_index = ceilf(in_y); 66 | const float y_lerp = in_y - top_y_index; 67 | 68 | const int left_x_index = floorf(in_x); 69 | const int right_x_index = ceilf(in_x); 70 | const float x_lerp = in_x - left_x_index; 71 | 72 | const float *pimage = image_ptr + (b_in * depth + d) * image_height * image_width; 73 | const float top_left = pimage[top_y_index * image_width + left_x_index]; 74 | const float top_right = pimage[top_y_index * image_width + right_x_index]; 75 | const float bottom_left = pimage[bottom_y_index * image_width + left_x_index]; 76 | const float bottom_right = pimage[bottom_y_index * image_width + right_x_index]; 77 | 78 | const float top = top_left + (top_right - top_left) * x_lerp; 79 | const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp; 80 | crops_ptr[out_idx] = top + (bottom - top) * y_lerp; 81 | } 82 | } 83 | 84 | __global__ 85 | void CropAndResizeBackpropImageKernel( 86 | const int nthreads, const float *grads_ptr, const float *boxes_ptr, 87 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 88 | int image_width, int crop_height, int crop_width, int depth, 89 | float *grads_image_ptr) 90 | { 91 | CUDA_1D_KERNEL_LOOP(out_idx, nthreads) 92 | { 93 | // NHWC: out_idx = d + depth * (w + crop_width * (h + crop_height * b)) 94 | // NCHW: out_idx = w + crop_width * (h + crop_height * (d + depth * b)) 95 | int idx = out_idx; 96 | const int x = idx % crop_width; 97 | idx /= crop_width; 98 | const int y = idx % crop_height; 99 | idx /= crop_height; 100 | const int d = idx % depth; 101 | const int b = idx / depth; 102 | 103 | const float y1 = boxes_ptr[b * 4]; 104 | const float x1 = boxes_ptr[b * 4 + 1]; 105 | const float y2 = boxes_ptr[b * 4 + 2]; 106 | const float x2 = boxes_ptr[b * 4 + 3]; 107 | 108 | const int b_in = box_ind_ptr[b]; 109 | if (b_in < 0 || b_in >= batch) 110 | { 111 | continue; 112 | } 113 | 114 | const float height_scale = 115 | (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1) 116 | : 0; 117 | const float width_scale = 118 | (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0; 119 | 120 | const float in_y = (crop_height > 1) 121 | ? y1 * (image_height - 1) + y * height_scale 122 | : 0.5 * (y1 + y2) * (image_height - 1); 123 | if (in_y < 0 || in_y > image_height - 1) 124 | { 125 | continue; 126 | } 127 | 128 | const float in_x = (crop_width > 1) 129 | ? x1 * (image_width - 1) + x * width_scale 130 | : 0.5 * (x1 + x2) * (image_width - 1); 131 | if (in_x < 0 || in_x > image_width - 1) 132 | { 133 | continue; 134 | } 135 | 136 | const int top_y_index = floorf(in_y); 137 | const int bottom_y_index = ceilf(in_y); 138 | const float y_lerp = in_y - top_y_index; 139 | 140 | const int left_x_index = floorf(in_x); 141 | const int right_x_index = ceilf(in_x); 142 | const float x_lerp = in_x - left_x_index; 143 | 144 | float *pimage = grads_image_ptr + (b_in * depth + d) * image_height * image_width; 145 | const float dtop = (1 - y_lerp) * grads_ptr[out_idx]; 146 | atomicAdd( 147 | pimage + top_y_index * image_width + left_x_index, 148 | (1 - x_lerp) * dtop 149 | ); 150 | atomicAdd( 151 | pimage + top_y_index * image_width + right_x_index, 152 | x_lerp * dtop 153 | ); 154 | 155 | const float dbottom = y_lerp * grads_ptr[out_idx]; 156 | atomicAdd( 157 | pimage + bottom_y_index * image_width + left_x_index, 158 | (1 - x_lerp) * dbottom 159 | ); 160 | atomicAdd( 161 | pimage + bottom_y_index * image_width + right_x_index, 162 | x_lerp * dbottom 163 | ); 164 | } 165 | } 166 | 167 | 168 | void CropAndResizeLaucher( 169 | const float *image_ptr, const float *boxes_ptr, 170 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 171 | int image_width, int crop_height, int crop_width, int depth, 172 | float extrapolation_value, float *crops_ptr, cudaStream_t stream) 173 | { 174 | const int total_count = num_boxes * crop_height * crop_width * depth; 175 | const int thread_per_block = 1024; 176 | const int block_count = (total_count + thread_per_block - 1) / thread_per_block; 177 | cudaError_t err; 178 | 179 | if (total_count > 0) 180 | { 181 | CropAndResizeKernel<<>>( 182 | total_count, image_ptr, boxes_ptr, 183 | box_ind_ptr, num_boxes, batch, image_height, image_width, 184 | crop_height, crop_width, depth, extrapolation_value, crops_ptr); 185 | 186 | err = cudaGetLastError(); 187 | if (cudaSuccess != err) 188 | { 189 | fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); 190 | exit(-1); 191 | } 192 | } 193 | } 194 | 195 | 196 | void CropAndResizeBackpropImageLaucher( 197 | const float *grads_ptr, const float *boxes_ptr, 198 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 199 | int image_width, int crop_height, int crop_width, int depth, 200 | float *grads_image_ptr, cudaStream_t stream) 201 | { 202 | const int total_count = num_boxes * crop_height * crop_width * depth; 203 | const int thread_per_block = 1024; 204 | const int block_count = (total_count + thread_per_block - 1) / thread_per_block; 205 | cudaError_t err; 206 | 207 | if (total_count > 0) 208 | { 209 | CropAndResizeBackpropImageKernel<<>>( 210 | total_count, grads_ptr, boxes_ptr, 211 | box_ind_ptr, num_boxes, batch, image_height, image_width, 212 | crop_height, crop_width, depth, grads_image_ptr); 213 | 214 | err = cudaGetLastError(); 215 | if (cudaSuccess != err) 216 | { 217 | fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); 218 | exit(-1); 219 | } 220 | } 221 | } -------------------------------------------------------------------------------- /lib/roi_align/src/cuda/crop_and_resize_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _CropAndResize_Kernel 2 | #define _CropAndResize_Kernel 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void CropAndResizeLaucher( 9 | const float *image_ptr, const float *boxes_ptr, 10 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 11 | int image_width, int crop_height, int crop_width, int depth, 12 | float extrapolation_value, float *crops_ptr, cudaStream_t stream); 13 | 14 | void CropAndResizeBackpropImageLaucher( 15 | const float *grads_ptr, const float *boxes_ptr, 16 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 17 | int image_width, int crop_height, int crop_width, int depth, 18 | float *grads_image_ptr, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | h5py==2.8.0 2 | torchvision==0.2.1 3 | cffi==1.11.5 4 | scikit-image==0.14.0 5 | Cython==0.28.5 6 | docopt==0.6.2 7 | clint==0.5.1 8 | crontab==0.22.2 9 | tablib==0.12.1 10 | typing==3.6.6 11 | pyyaml 12 | mkl==2019.0 13 | mkl-include==2019.0 14 | setuptools 15 | cmake==3.12.0 16 | azure 17 | setuptools -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Common utility functions and classes. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import sys 11 | import os 12 | import math 13 | import random 14 | import numpy as np 15 | import scipy.misc 16 | import scipy.ndimage 17 | import skimage.color 18 | import skimage.io 19 | import torch 20 | import urllib.request 21 | import shutil 22 | import warnings 23 | 24 | # URL from which to download the latest COCO trained weights 25 | COCO_MODEL_URL = "https://drive.google.com/file/d/1VV6WgX_RNl6a9Yi9-Pe7ZyVKHRJZSKkm/view?usp=sharing" 26 | 27 | ############################################################ 28 | # Bounding Boxes 29 | ############################################################ 30 | 31 | def extract_bboxes(mask): 32 | """Compute bounding boxes from masks. 33 | mask: [height, width, num_instances]. Mask pixels are either 1 or 0. 34 | 35 | Returns: bbox array [num_instances, (y1, x1, y2, x2)]. 36 | """ 37 | boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32) 38 | for i in range(mask.shape[-1]): 39 | m = mask[:, :, i] 40 | # Bounding box. 41 | horizontal_indicies = np.where(np.any(m, axis=0))[0] 42 | vertical_indicies = np.where(np.any(m, axis=1))[0] 43 | if horizontal_indicies.shape[0]: 44 | x1, x2 = horizontal_indicies[[0, -1]] 45 | y1, y2 = vertical_indicies[[0, -1]] 46 | # x2 and y2 should not be part of the box. Increment by 1. 47 | x2 += 1 48 | y2 += 1 49 | else: 50 | # No mask for this instance. Might happen due to 51 | # resizing or cropping. Set bbox to zeros 52 | x1, x2, y1, y2 = 0, 0, 0, 0 53 | boxes[i] = np.array([y1, x1, y2, x2]) 54 | return boxes.astype(np.int32) 55 | 56 | 57 | def compute_iou(box, boxes, box_area, boxes_area): 58 | """Calculates IoU of the given box with the array of the given boxes. 59 | box: 1D vector [y1, x1, y2, x2] 60 | boxes: [boxes_count, (y1, x1, y2, x2)] 61 | box_area: float. the area of 'box' 62 | boxes_area: array of length boxes_count. 63 | 64 | Note: the areas are passed in rather than calculated here for 65 | efficency. Calculate once in the caller to avoid duplicate work. 66 | """ 67 | # Calculate intersection areas 68 | y1 = np.maximum(box[0], boxes[:, 0]) 69 | y2 = np.minimum(box[2], boxes[:, 2]) 70 | x1 = np.maximum(box[1], boxes[:, 1]) 71 | x2 = np.minimum(box[3], boxes[:, 3]) 72 | intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) 73 | union = box_area + boxes_area[:] - intersection[:] 74 | iou = intersection / union 75 | return iou 76 | 77 | def compute_ap(gt_boxes, gt_class_ids, gt_masks, 78 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 79 | iou_threshold=0.5): 80 | """Compute Average Precision at a set IoU threshold (default 0.5). 81 | Returns: 82 | mAP: Mean Average Precision 83 | precisions: List of precisions at different class score thresholds. 84 | recalls: List of recall values at different class score thresholds. 85 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 86 | """ 87 | # Get matches and overlaps 88 | gt_match, pred_match, overlaps = compute_matches( 89 | gt_boxes, gt_class_ids, gt_masks, 90 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 91 | iou_threshold) 92 | 93 | # Compute precision and recall at each prediction box step 94 | precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1) 95 | recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match) 96 | 97 | # Pad with start and end values to simplify the math 98 | precisions = np.concatenate([[0], precisions, [0]]) 99 | recalls = np.concatenate([[0], recalls, [1]]) 100 | 101 | # Ensure precision values decrease but don't increase. This way, the 102 | # precision value at each recall threshold is the maximum it can be 103 | # for all following recall thresholds, as specified by the VOC paper. 104 | for i in range(len(precisions) - 2, -1, -1): 105 | precisions[i] = np.maximum(precisions[i], precisions[i + 1]) 106 | 107 | # Compute mean AP over recall range 108 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 109 | mAP = np.sum((recalls[indices] - recalls[indices - 1]) * 110 | precisions[indices]) 111 | 112 | return mAP, precisions, recalls, overlaps 113 | 114 | ############################################################ 115 | # Miscellaneous 116 | ############################################################ 117 | 118 | def trim_zeros(x): 119 | """It's common to have tensors larger than the available data and 120 | pad with zeros. This function removes rows that are all zeros. 121 | x: [rows, columns]. 122 | """ 123 | assert len(x.shape) == 2 124 | return x[~np.all(x == 0, axis=1)] 125 | 126 | 127 | def compute_matches(gt_boxes, gt_class_ids, gt_masks, 128 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 129 | iou_threshold=0.5, score_threshold=0.0): 130 | """Finds matches between prediction and ground truth instances. 131 | Returns: 132 | gt_match: 1-D array. For each GT box it has the index of the matched 133 | predicted box. 134 | pred_match: 1-D array. For each predicted box, it has the index of 135 | the matched ground truth box. 136 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 137 | """ 138 | # Trim zero padding 139 | # TODO: cleaner to do zero unpadding upstream 140 | gt_boxes = trim_zeros(gt_boxes) 141 | gt_masks = gt_masks[..., :gt_boxes.shape[0]] 142 | pred_boxes = trim_zeros(pred_boxes) 143 | pred_scores = pred_scores[:pred_boxes.shape[0]] 144 | # Sort predictions by score from high to low 145 | indices = np.argsort(pred_scores)[::-1] 146 | pred_boxes = pred_boxes[indices] 147 | pred_class_ids = pred_class_ids[indices] 148 | pred_scores = pred_scores[indices] 149 | pred_masks = pred_masks[..., indices] 150 | 151 | # Compute IoU overlaps [pred_masks, gt_masks] 152 | overlaps = compute_overlaps_masks(pred_masks, gt_masks) 153 | 154 | # Loop through predictions and find matching ground truth boxes 155 | match_count = 0 156 | pred_match = -1 * np.ones([pred_boxes.shape[0]]) 157 | gt_match = -1 * np.ones([gt_boxes.shape[0]]) 158 | for i in range(len(pred_boxes)): 159 | # Find best matching ground truth box 160 | # 1. Sort matches by score 161 | sorted_ixs = np.argsort(overlaps[i])[::-1] 162 | # 2. Remove low scores 163 | low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0] 164 | if low_score_idx.size > 0: 165 | sorted_ixs = sorted_ixs[:low_score_idx[0]] 166 | # 3. Find the match 167 | for j in sorted_ixs: 168 | # If ground truth box is already matched, go to next one 169 | if gt_match[j] > 0: 170 | continue 171 | # If we reach IoU smaller than the threshold, end the loop 172 | iou = overlaps[i, j] 173 | if iou < iou_threshold: 174 | break 175 | # Do we have a match? 176 | if pred_class_ids[i] == gt_class_ids[j]: 177 | match_count += 1 178 | gt_match[j] = i 179 | pred_match[i] = j 180 | break 181 | 182 | return gt_match, pred_match, overlaps 183 | 184 | 185 | def compute_ap(gt_boxes, gt_class_ids, gt_masks, 186 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 187 | iou_threshold=0.5): 188 | """Compute Average Precision at a set IoU threshold (default 0.5). 189 | Returns: 190 | mAP: Mean Average Precision 191 | precisions: List of precisions at different class score thresholds. 192 | recalls: List of recall values at different class score thresholds. 193 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 194 | """ 195 | # Get matches and overlaps 196 | gt_match, pred_match, overlaps = compute_matches( 197 | gt_boxes, gt_class_ids, gt_masks, 198 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 199 | iou_threshold) 200 | 201 | # Compute precision and recall at each prediction box step 202 | precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1) 203 | recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match) 204 | 205 | # Pad with start and end values to simplify the math 206 | precisions = np.concatenate([[0], precisions, [0]]) 207 | recalls = np.concatenate([[0], recalls, [1]]) 208 | 209 | # Ensure precision values decrease but don't increase. This way, the 210 | # precision value at each recall threshold is the maximum it can be 211 | # for all following recall thresholds, as specified by the VOC paper. 212 | for i in range(len(precisions) - 2, -1, -1): 213 | precisions[i] = np.maximum(precisions[i], precisions[i + 1]) 214 | 215 | # Compute mean AP over recall range 216 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 217 | mAP = np.sum((recalls[indices] - recalls[indices - 1]) * 218 | precisions[indices]) 219 | 220 | return mAP, precisions, recalls, overlaps 221 | 222 | 223 | def compute_ap_range(gt_box, gt_class_id, gt_mask, 224 | pred_box, pred_class_id, pred_score, pred_mask, 225 | iou_thresholds=None, verbose=1): 226 | """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95.""" 227 | # Default is 0.5 to 0.95 with increments of 0.05 228 | iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05) 229 | 230 | # Compute AP over range of IoU thresholds 231 | AP = [] 232 | for iou_threshold in iou_thresholds: 233 | ap, precisions, recalls, overlaps =\ 234 | compute_ap(gt_box, gt_class_id, gt_mask, 235 | pred_box, pred_class_id, pred_score, pred_mask, 236 | iou_threshold=iou_threshold) 237 | if verbose: 238 | print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap)) 239 | AP.append(ap) 240 | AP = np.array(AP).mean() 241 | if verbose: 242 | print("AP @{:.2f}-{:.2f}:\t {:.3f}".format( 243 | iou_thresholds[0], iou_thresholds[-1], AP)) 244 | return AP 245 | 246 | 247 | def compute_recall(pred_boxes, gt_boxes, iou): 248 | """Compute the recall at the given IoU threshold. It's an indication 249 | of how many GT boxes were found by the given prediction boxes. 250 | pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates 251 | gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates 252 | """ 253 | # Measure overlaps 254 | overlaps = compute_overlaps(pred_boxes, gt_boxes) 255 | iou_max = np.max(overlaps, axis=1) 256 | iou_argmax = np.argmax(overlaps, axis=1) 257 | positive_ids = np.where(iou_max >= iou)[0] 258 | matched_gt_boxes = iou_argmax[positive_ids] 259 | 260 | recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] 261 | return recall, positive_ids 262 | 263 | 264 | # ## Batch Slicing 265 | # Some custom layers support a batch size of 1 only, and require a lot of work 266 | # to support batches greater than 1. This function slices an input tensor 267 | # across the batch dimension and feeds batches of size 1. Effectively, 268 | # an easy way to support batches > 1 quickly with little code modification. 269 | # In the long run, it's more efficient to modify the code to support large 270 | # batches and getting rid of this function. Consider this a temporary solution 271 | def batch_slice(inputs, graph_fn, batch_size, names=None): 272 | """Splits inputs into slices and feeds each slice to a copy of the given 273 | computation graph and then combines the results. It allows you to run a 274 | graph on a batch of inputs even if the graph is written to support one 275 | instance only. 276 | inputs: list of tensors. All must have the same first dimension length 277 | graph_fn: A function that returns a TF tensor that's part of a graph. 278 | batch_size: number of slices to divide the data into. 279 | names: If provided, assigns names to the resulting tensors. 280 | """ 281 | if not isinstance(inputs, list): 282 | inputs = [inputs] 283 | 284 | outputs = [] 285 | for i in range(batch_size): 286 | inputs_slice = [x[i] for x in inputs] 287 | output_slice = graph_fn(*inputs_slice) 288 | if not isinstance(output_slice, (tuple, list)): 289 | output_slice = [output_slice] 290 | outputs.append(output_slice) 291 | # Change outputs from a list of slices where each is 292 | # a list of outputs to a list of outputs and each has 293 | # a list of slices 294 | outputs = list(zip(*outputs)) 295 | 296 | if names is None: 297 | names = [None] * len(outputs) 298 | 299 | result = [tf.stack(o, axis=0, name=n) 300 | for o, n in zip(outputs, names)] 301 | if len(result) == 1: 302 | result = result[0] 303 | 304 | return result 305 | 306 | 307 | def download_trained_weights(coco_model_path, verbose=1): 308 | """Download COCO trained weights from Releases. 309 | coco_model_path: local path of COCO trained weights 310 | """ 311 | if verbose > 0: 312 | print("Downloading pretrained model to " + coco_model_path + " ...") 313 | with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out: 314 | shutil.copyfileobj(resp, out) 315 | if verbose > 0: 316 | print("... done downloading pretrained model!") 317 | 318 | 319 | def norm_boxes(boxes, shape): 320 | """Converts boxes from pixel coordinates to normalized coordinates. 321 | boxes: [N, (y1, x1, y2, x2)] in pixel coordinates 322 | shape: [..., (height, width)] in pixels 323 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized 324 | coordinates it's inside the box. 325 | Returns: 326 | [N, (y1, x1, y2, x2)] in normalized coordinates 327 | """ 328 | h, w = shape 329 | scale = np.array([h - 1, w - 1, h - 1, w - 1]) 330 | shift = np.array([0, 0, 1, 1]) 331 | return np.divide((boxes - shift), scale).astype(np.float32) 332 | 333 | 334 | def denorm_boxes(boxes, shape): 335 | """Converts boxes from normalized coordinates to pixel coordinates. 336 | boxes: [N, (y1, x1, y2, x2)] in normalized coordinates 337 | shape: [..., (height, width)] in pixels 338 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized 339 | coordinates it's inside the box. 340 | Returns: 341 | [N, (y1, x1, y2, x2)] in pixel coordinates 342 | """ 343 | h, w = shape 344 | scale = np.array([h - 1, w - 1, h - 1, w - 1]) 345 | shift = np.array([0, 0, 1, 1]) 346 | return np.around(np.multiply(boxes, scale) + shift).astype(np.int32) 347 | 348 | def compute_overlaps(boxes1, boxes2): 349 | """Computes IoU overlaps between two sets of boxes. 350 | boxes1, boxes2: [N, (y1, x1, y2, x2)]. 351 | 352 | For better performance, pass the largest set first and the smaller second. 353 | """ 354 | # Areas of anchors and GT boxes 355 | area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) 356 | area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) 357 | 358 | # Compute overlaps to generate matrix [boxes1 count, boxes2 count] 359 | # Each cell contains the IoU value. 360 | overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0])) 361 | for i in range(overlaps.shape[1]): 362 | box2 = boxes2[i] 363 | overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1) 364 | return overlaps 365 | 366 | def compute_overlaps_masks(masks1, masks2): 367 | '''Computes IoU overlaps between two sets of masks. 368 | masks1, masks2: [Height, Width, instances] 369 | ''' 370 | # flatten masks 371 | masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32) 372 | masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32) 373 | area1 = np.sum(masks1, axis=0) 374 | area2 = np.sum(masks2, axis=0) 375 | 376 | # intersections and union 377 | intersections = np.dot(masks1.T, masks2) 378 | union = area1[:, None] + area2[None, :] - intersections 379 | overlaps = intersections / union 380 | 381 | return overlaps 382 | 383 | 384 | def non_max_suppression(boxes, scores, threshold): 385 | """Performs non-maximum supression and returns indicies of kept boxes. 386 | boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. 387 | scores: 1-D array of box scores. 388 | threshold: Float. IoU threshold to use for filtering. 389 | """ 390 | assert boxes.shape[0] > 0 391 | if boxes.dtype.kind != "f": 392 | boxes = boxes.astype(np.float32) 393 | 394 | # Compute box areas 395 | y1 = boxes[:, 0] 396 | x1 = boxes[:, 1] 397 | y2 = boxes[:, 2] 398 | x2 = boxes[:, 3] 399 | area = (y2 - y1) * (x2 - x1) 400 | 401 | # Get indicies of boxes sorted by scores (highest first) 402 | ixs = scores.argsort()[::-1] 403 | 404 | pick = [] 405 | while len(ixs) > 0: 406 | # Pick top box and add its index to the list 407 | i = ixs[0] 408 | pick.append(i) 409 | # Compute IoU of the picked box with the rest 410 | iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) 411 | # Identify boxes with IoU over the threshold. This 412 | # returns indicies into ixs[1:], so add 1 to get 413 | # indicies into ixs. 414 | remove_ixs = np.where(iou > threshold)[0] + 1 415 | # Remove indicies of the picked and overlapped boxes. 416 | ixs = np.delete(ixs, remove_ixs) 417 | ixs = np.delete(ixs, 0) 418 | return np.array(pick, dtype=np.int32) 419 | 420 | 421 | 422 | def box_refinement(box, gt_box): 423 | """Compute refinement needed to transform box to gt_box. 424 | box and gt_box are [N, (y1, x1, y2, x2)] 425 | """ 426 | 427 | height = box[:, 2] - box[:, 0] 428 | width = box[:, 3] - box[:, 1] 429 | center_y = box[:, 0] + 0.5 * height 430 | center_x = box[:, 1] + 0.5 * width 431 | 432 | gt_height = gt_box[:, 2] - gt_box[:, 0] 433 | gt_width = gt_box[:, 3] - gt_box[:, 1] 434 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 435 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 436 | 437 | dy = (gt_center_y - center_y) / height 438 | dx = (gt_center_x - center_x) / width 439 | dh = torch.log(gt_height / height) 440 | dw = torch.log(gt_width / width) 441 | 442 | result = torch.stack([dy, dx, dh, dw], dim=1) 443 | return result 444 | 445 | 446 | ############################################################ 447 | # Dataset 448 | ############################################################ 449 | 450 | class Dataset(object): 451 | """The base class for dataset classes. 452 | To use it, create a new class that adds functions specific to the dataset 453 | you want to use. For example: 454 | 455 | class CatsAndDogsDataset(Dataset): 456 | def load_cats_and_dogs(self): 457 | ... 458 | def load_mask(self, image_id): 459 | ... 460 | def image_reference(self, image_id): 461 | ... 462 | 463 | See COCODataset and ShapesDataset as examples. 464 | """ 465 | 466 | def __init__(self, class_map=None): 467 | self._image_ids = [] 468 | self.image_info = [] 469 | # Background is always the first class 470 | self.class_info = [{"source": "", "id": 0, "name": "BG"}] 471 | self.source_class_ids = {} 472 | 473 | def add_class(self, source, class_id, class_name): 474 | assert "." not in source, "Source name cannot contain a dot" 475 | # Does the class exist already? 476 | for info in self.class_info: 477 | if info['source'] == source and info["id"] == class_id: 478 | # source.class_id combination already available, skip 479 | return 480 | # Add the class 481 | self.class_info.append({ 482 | "source": source, 483 | "id": class_id, 484 | "name": class_name, 485 | }) 486 | 487 | def add_image(self, source, image_id, path, **kwargs): 488 | image_info = { 489 | "id": image_id, 490 | "source": source, 491 | "path": path, 492 | } 493 | image_info.update(kwargs) 494 | self.image_info.append(image_info) 495 | 496 | def image_reference(self, image_id): 497 | """Return a link to the image in its source Website or details about 498 | the image that help looking it up or debugging it. 499 | 500 | Override for your dataset, but pass to this function 501 | if you encounter images not in your dataset. 502 | """ 503 | return "" 504 | 505 | def prepare(self, class_map=None): 506 | """Prepares the Dataset class for use. 507 | 508 | TODO: class map is not supported yet. When done, it should handle mapping 509 | classes from different datasets to the same class ID. 510 | """ 511 | def clean_name(name): 512 | """Returns a shorter version of object names for cleaner display.""" 513 | return ",".join(name.split(",")[:1]) 514 | 515 | # Build (or rebuild) everything else from the info dicts. 516 | self.num_classes = len(self.class_info) 517 | self.class_ids = np.arange(self.num_classes) 518 | self.class_names = [clean_name(c["name"]) for c in self.class_info] 519 | self.num_images = len(self.image_info) 520 | self._image_ids = np.arange(self.num_images) 521 | 522 | self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id 523 | for info, id in zip(self.class_info, self.class_ids)} 524 | 525 | # Map sources to class_ids they support 526 | self.sources = list(set([i['source'] for i in self.class_info])) 527 | self.source_class_ids = {} 528 | # Loop over datasets 529 | for source in self.sources: 530 | self.source_class_ids[source] = [] 531 | # Find classes that belong to this dataset 532 | for i, info in enumerate(self.class_info): 533 | # Include BG class in all datasets 534 | if i == 0 or source == info['source']: 535 | self.source_class_ids[source].append(i) 536 | 537 | def map_source_class_id(self, source_class_id): 538 | """Takes a source class ID and returns the int class ID assigned to it. 539 | 540 | For example: 541 | dataset.map_source_class_id("coco.12") -> 23 542 | """ 543 | return self.class_from_source_map[source_class_id] 544 | 545 | def get_source_class_id(self, class_id, source): 546 | """Map an internal class ID to the corresponding class ID in the source dataset.""" 547 | info = self.class_info[class_id] 548 | assert info['source'] == source 549 | return info['id'] 550 | 551 | def append_data(self, class_info, image_info): 552 | self.external_to_class_id = {} 553 | for i, c in enumerate(self.class_info): 554 | for ds, id in c["map"]: 555 | self.external_to_class_id[ds + str(id)] = i 556 | 557 | # Map external image IDs to internal ones. 558 | self.external_to_image_id = {} 559 | for i, info in enumerate(self.image_info): 560 | self.external_to_image_id[info["ds"] + str(info["id"])] = i 561 | 562 | @property 563 | def image_ids(self): 564 | return self._image_ids 565 | 566 | def source_image_link(self, image_id): 567 | """Returns the path or URL to the image. 568 | Override this to return a URL to the image if it's availble online for easy 569 | debugging. 570 | """ 571 | return self.image_info[image_id]["path"] 572 | 573 | def load_image(self, image_id): 574 | """Load the specified image and return a [H,W,3] Numpy array. 575 | """ 576 | # Load image 577 | image = skimage.io.imread(self.image_info[image_id]['path']) 578 | # If grayscale. Convert to RGB for consistency. 579 | if image.ndim != 3: 580 | image = skimage.color.gray2rgb(image) 581 | return image 582 | 583 | def load_mask(self, image_id): 584 | """Load instance masks for the given image. 585 | 586 | Different datasets use different ways to store masks. Override this 587 | method to load instance masks and return them in the form of am 588 | array of binary masks of shape [height, width, instances]. 589 | 590 | Returns: 591 | masks: A bool array of shape [height, width, instance count] with 592 | a binary mask per instance. 593 | class_ids: a 1D array of class IDs of the instance masks. 594 | """ 595 | # Override this function to load a mask from your dataset. 596 | # Otherwise, it returns an empty mask. 597 | mask = np.empty([0, 0, 0]) 598 | class_ids = np.empty([0], np.int32) 599 | return mask, class_ids 600 | 601 | 602 | def resize_image(image, min_dim=None, max_dim=None, padding=False): 603 | """ 604 | Resizes an image keeping the aspect ratio. 605 | 606 | min_dim: if provided, resizes the image such that it's smaller 607 | dimension == min_dim 608 | max_dim: if provided, ensures that the image longest side doesn't 609 | exceed this value. 610 | padding: If true, pads image with zeros so it's size is max_dim x max_dim 611 | 612 | Returns: 613 | image: the resized image 614 | window: (y1, x1, y2, x2). If max_dim is provided, padding might 615 | be inserted in the returned image. If so, this window is the 616 | coordinates of the image part of the full image (excluding 617 | the padding). The x2, y2 pixels are not included. 618 | scale: The scale factor used to resize the image 619 | padding: Padding added to the image [(top, bottom), (left, right), (0, 0)] 620 | """ 621 | # Default window (y1, x1, y2, x2) and default scale == 1. 622 | h, w = image.shape[:2] 623 | window = (0, 0, h, w) 624 | scale = 1 625 | 626 | # Scale? 627 | if min_dim: 628 | # Scale up but not down 629 | scale = max(1, min_dim / min(h, w)) 630 | # Does it exceed max dim? 631 | if max_dim: 632 | image_max = max(h, w) 633 | if round(image_max * scale) > max_dim: 634 | scale = max_dim / image_max 635 | # Resize image and mask 636 | if scale != 1: 637 | image = scipy.misc.imresize( 638 | image, (round(h * scale), round(w * scale))) 639 | # Need padding? 640 | if padding: 641 | # Get new height and width 642 | h, w = image.shape[:2] 643 | top_pad = (max_dim - h) // 2 644 | bottom_pad = max_dim - h - top_pad 645 | left_pad = (max_dim - w) // 2 646 | right_pad = max_dim - w - left_pad 647 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 648 | image = np.pad(image, padding, mode='constant', constant_values=0) 649 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 650 | return image, window, scale, padding 651 | 652 | 653 | def resize_mask(mask, scale, padding): 654 | """Resizes a mask using the given scale and padding. 655 | Typically, you get the scale and padding from resize_image() to 656 | ensure both, the image and the mask, are resized consistently. 657 | 658 | scale: mask scaling factor 659 | padding: Padding to add to the mask in the form 660 | [(top, bottom), (left, right), (0, 0)] 661 | """ 662 | h, w = mask.shape[:2] 663 | mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0) 664 | mask = np.pad(mask, padding, mode='constant', constant_values=0) 665 | return mask 666 | 667 | 668 | def minimize_mask(bbox, mask, mini_shape): 669 | """Resize masks to a smaller version to cut memory load. 670 | Mini-masks can then resized back to image scale using expand_masks() 671 | 672 | See inspect_data.ipynb notebook for more details. 673 | """ 674 | mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool) 675 | for i in range(mask.shape[-1]): 676 | m = mask[:, :, i] 677 | y1, x1, y2, x2 = bbox[i][:4] 678 | m = m[y1:y2, x1:x2] 679 | if m.size == 0: 680 | raise Exception("Invalid bounding box with area of zero") 681 | m = scipy.misc.imresize(m.astype(float), mini_shape, interp='bilinear') 682 | mini_mask[:, :, i] = np.where(m >= 128, 1, 0) 683 | return mini_mask 684 | 685 | 686 | def expand_mask(bbox, mini_mask, image_shape): 687 | """Resizes mini masks back to image size. Reverses the change 688 | of minimize_mask(). 689 | 690 | See inspect_data.ipynb notebook for more details. 691 | """ 692 | mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool) 693 | for i in range(mask.shape[-1]): 694 | m = mini_mask[:, :, i] 695 | y1, x1, y2, x2 = bbox[i][:4] 696 | h = y2 - y1 697 | w = x2 - x1 698 | m = scipy.misc.imresize(m.astype(float), (h, w), interp='bilinear') 699 | mask[y1:y2, x1:x2, i] = np.where(m >= 128, 1, 0) 700 | return mask 701 | 702 | 703 | # TODO: Build and use this function to reduce code duplication 704 | def mold_mask(mask, config): 705 | pass 706 | 707 | 708 | def unmold_mask(mask, bbox, image_shape): 709 | """Converts a mask generated by the neural network into a format similar 710 | to it's original shape. 711 | mask: [height, width] of type float. A small, typically 28x28 mask. 712 | bbox: [y1, x1, y2, x2]. The box to fit the mask in. 713 | 714 | Returns a binary mask with the same size as the original image. 715 | """ 716 | threshold = 0.5 717 | y1, x1, y2, x2 = bbox 718 | mask = scipy.misc.imresize( 719 | mask, (y2 - y1, x2 - x1), interp='bilinear').astype(np.float32) / 255.0 720 | mask = np.where(mask >= threshold, 1, 0).astype(np.uint8) 721 | 722 | # Put the mask in the right location. 723 | full_mask = np.zeros(image_shape[:2], dtype=np.uint8) 724 | full_mask[y1:y2, x1:x2] = mask 725 | return full_mask 726 | 727 | 728 | ############################################################ 729 | # Anchors 730 | ############################################################ 731 | 732 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): 733 | """ 734 | scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] 735 | ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] 736 | shape: [height, width] spatial shape of the feature map over which 737 | to generate anchors. 738 | feature_stride: Stride of the feature map relative to the image in pixels. 739 | anchor_stride: Stride of anchors on the feature map. For example, if the 740 | value is 2 then generate anchors for every other feature map pixel. 741 | """ 742 | # Get all combinations of scales and ratios 743 | scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) 744 | scales = scales.flatten() 745 | ratios = ratios.flatten() 746 | 747 | # Enumerate heights and widths from scales and ratios 748 | heights = scales / np.sqrt(ratios) 749 | widths = scales * np.sqrt(ratios) 750 | 751 | # Enumerate shifts in feature space 752 | shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride 753 | shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride 754 | shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) 755 | 756 | # Enumerate combinations of shifts, widths, and heights 757 | box_widths, box_centers_x = np.meshgrid(widths, shifts_x) 758 | box_heights, box_centers_y = np.meshgrid(heights, shifts_y) 759 | 760 | # Reshape to get a list of (y, x) and a list of (h, w) 761 | box_centers = np.stack( 762 | [box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) 763 | box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) 764 | 765 | # Convert to corner coordinates (y1, x1, y2, x2) 766 | boxes = np.concatenate([box_centers - 0.5 * box_sizes, 767 | box_centers + 0.5 * box_sizes], axis=1) 768 | return boxes 769 | 770 | 771 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, 772 | anchor_stride): 773 | """Generate anchors at different levels of a feature pyramid. Each scale 774 | is associated with a level of the pyramid, but each ratio is used in 775 | all levels of the pyramid. 776 | 777 | Returns: 778 | anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted 779 | with the same order of the given scales. So, anchors of scale[0] come 780 | first, then anchors of scale[1], and so on. 781 | """ 782 | # Anchors 783 | # [anchor_count, (y1, x1, y2, x2)] 784 | anchors = [] 785 | for i in range(len(scales)): 786 | anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], 787 | feature_strides[i], anchor_stride)) 788 | return np.concatenate(anchors, axis=0) 789 | 790 | 791 | def download_trained_weights(coco_model_path, verbose=1): 792 | """Download COCO trained weights from Releases. 793 | 794 | coco_model_path: local path of COCO trained weights 795 | """ 796 | if verbose > 0: 797 | print("Downloading pretrained model to " + coco_model_path + " ...") 798 | with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out: 799 | shutil.copyfileobj(resp, out) 800 | if verbose > 0: 801 | print("... done downloading pretrained model!") 802 | 803 | 804 | 805 | 806 | 807 | -------------------------------------------------------------------------------- /visualize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Display and Visualization Functions. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import os 11 | import random 12 | import itertools 13 | import colorsys 14 | import numpy as np 15 | from skimage.measure import find_contours 16 | import matplotlib.pyplot as plt 17 | if "DISPLAY" not in os.environ: 18 | plt.switch_backend('agg') 19 | import matplotlib.patches as patches 20 | import matplotlib.lines as lines 21 | from matplotlib.patches import Polygon 22 | 23 | import utils 24 | 25 | 26 | ############################################################ 27 | # Visualization 28 | ############################################################ 29 | 30 | def display_images(images, titles=None, cols=4, cmap=None, norm=None, 31 | interpolation=None): 32 | """Display the given set of images, optionally with titles. 33 | images: list or array of image tensors in HWC format. 34 | titles: optional. A list of titles to display with each image. 35 | cols: number of images per row 36 | cmap: Optional. Color map to use. For example, "Blues". 37 | norm: Optional. A Normalize instance to map values to colors. 38 | interpolation: Optional. Image interporlation to use for display. 39 | """ 40 | titles = titles if titles is not None else [""] * len(images) 41 | rows = len(images) // cols + 1 42 | plt.figure(figsize=(14, 14 * rows // cols)) 43 | i = 1 44 | for image, title in zip(images, titles): 45 | plt.subplot(rows, cols, i) 46 | plt.title(title, fontsize=9) 47 | plt.axis('off') 48 | plt.imshow(image.astype(np.uint8), cmap=cmap, 49 | norm=norm, interpolation=interpolation) 50 | i += 1 51 | plt.show() 52 | 53 | 54 | def random_colors(N, bright=True): 55 | """ 56 | Generate random colors. 57 | To get visually distinct colors, generate them in HSV space then 58 | convert to RGB. 59 | """ 60 | brightness = 1.0 if bright else 0.7 61 | hsv = [(i / N, 1, brightness) for i in range(N)] 62 | colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv)) 63 | random.shuffle(colors) 64 | return colors 65 | 66 | 67 | def apply_mask(image, mask, color, alpha=0.5): 68 | """Apply the given mask to the image. 69 | """ 70 | for c in range(3): 71 | image[:, :, c] = np.where(mask == 1, 72 | image[:, :, c] * 73 | (1 - alpha) + alpha * color[c] * 255, 74 | image[:, :, c]) 75 | return image 76 | 77 | 78 | def display_instances(image, boxes, masks, class_ids, class_names, 79 | scores=None, title="", 80 | figsize=(16, 16), ax=None): 81 | """ 82 | boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates. 83 | masks: [height, width, num_instances] 84 | class_ids: [num_instances] 85 | class_names: list of class names of the dataset 86 | scores: (optional) confidence scores for each box 87 | figsize: (optional) the size of the image. 88 | """ 89 | # Number of instances 90 | N = boxes.shape[0] 91 | if not N: 92 | print("\n*** No instances to display *** \n") 93 | else: 94 | assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0] 95 | 96 | if not ax: 97 | _, ax = plt.subplots(1, figsize=figsize) 98 | 99 | # Generate random colors 100 | colors = random_colors(N) 101 | 102 | # Show area outside image boundaries. 103 | height, width = image.shape[:2] 104 | ax.set_ylim(height + 10, -10) 105 | ax.set_xlim(-10, width + 10) 106 | ax.axis('off') 107 | ax.set_title(title) 108 | 109 | masked_image = image.astype(np.uint32).copy() 110 | for i in range(N): 111 | color = colors[i] 112 | 113 | # Bounding box 114 | if not np.any(boxes[i]): 115 | # Skip this instance. Has no bbox. Likely lost in image cropping. 116 | continue 117 | y1, x1, y2, x2 = boxes[i] 118 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 119 | alpha=0.7, linestyle="dashed", 120 | edgecolor=color, facecolor='none') 121 | ax.add_patch(p) 122 | 123 | # Label 124 | class_id = class_ids[i] 125 | score = scores[i] if scores is not None else None 126 | label = class_names[class_id] 127 | x = random.randint(x1, (x1 + x2) // 2) 128 | caption = "{} {:.3f}".format(label, score) if score else label 129 | ax.text(x1, y1 + 8, caption, 130 | color='w', size=11, backgroundcolor="none") 131 | 132 | # Mask 133 | mask = masks[:, :, i] 134 | masked_image = apply_mask(masked_image, mask, color) 135 | 136 | # Mask Polygon 137 | # Pad to ensure proper polygons for masks that touch image edges. 138 | padded_mask = np.zeros( 139 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 140 | padded_mask[1:-1, 1:-1] = mask 141 | contours = find_contours(padded_mask, 0.5) 142 | for verts in contours: 143 | # Subtract the padding and flip (y, x) to (x, y) 144 | verts = np.fliplr(verts) - 1 145 | p = Polygon(verts, facecolor="none", edgecolor=color) 146 | ax.add_patch(p) 147 | ax.imshow(masked_image.astype(np.uint8)) 148 | plt.show() 149 | 150 | 151 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10): 152 | """ 153 | anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates. 154 | proposals: [n, 4] the same anchors but refined to fit objects better. 155 | """ 156 | masked_image = image.copy() 157 | 158 | # Pick random anchors in case there are too many. 159 | ids = np.arange(rois.shape[0], dtype=np.int32) 160 | ids = np.random.choice( 161 | ids, limit, replace=False) if ids.shape[0] > limit else ids 162 | 163 | fig, ax = plt.subplots(1, figsize=(12, 12)) 164 | if rois.shape[0] > limit: 165 | plt.title("Showing {} random ROIs out of {}".format( 166 | len(ids), rois.shape[0])) 167 | else: 168 | plt.title("{} ROIs".format(len(ids))) 169 | 170 | # Show area outside image boundaries. 171 | ax.set_ylim(image.shape[0] + 20, -20) 172 | ax.set_xlim(-50, image.shape[1] + 20) 173 | ax.axis('off') 174 | 175 | for i, id in enumerate(ids): 176 | color = np.random.rand(3) 177 | class_id = class_ids[id] 178 | # ROI 179 | y1, x1, y2, x2 = rois[id] 180 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 181 | edgecolor=color if class_id else "gray", 182 | facecolor='none', linestyle="dashed") 183 | ax.add_patch(p) 184 | # Refined ROI 185 | if class_id: 186 | ry1, rx1, ry2, rx2 = refined_rois[id] 187 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 188 | edgecolor=color, facecolor='none') 189 | ax.add_patch(p) 190 | # Connect the top-left corners of the anchor and proposal for easy visualization 191 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 192 | 193 | # Label 194 | label = class_names[class_id] 195 | ax.text(rx1, ry1 + 8, "{}".format(label), 196 | color='w', size=11, backgroundcolor="none") 197 | 198 | # Mask 199 | m = utils.unmold_mask(mask[id], rois[id] 200 | [:4].astype(np.int32), image.shape) 201 | masked_image = apply_mask(masked_image, m, color) 202 | 203 | ax.imshow(masked_image) 204 | 205 | # Print stats 206 | print("Positive ROIs: ", class_ids[class_ids > 0].shape[0]) 207 | print("Negative ROIs: ", class_ids[class_ids == 0].shape[0]) 208 | print("Positive Ratio: {:.2f}".format( 209 | class_ids[class_ids > 0].shape[0] / class_ids.shape[0])) 210 | 211 | 212 | # TODO: Replace with matplotlib equivalent? 213 | def draw_box(image, box, color): 214 | """Draw 3-pixel width bounding boxes on the given image array. 215 | color: list of 3 int values for RGB. 216 | """ 217 | y1, x1, y2, x2 = box 218 | image[y1:y1 + 2, x1:x2] = color 219 | image[y2:y2 + 2, x1:x2] = color 220 | image[y1:y2, x1:x1 + 2] = color 221 | image[y1:y2, x2:x2 + 2] = color 222 | return image 223 | 224 | 225 | def display_top_masks(image, mask, class_ids, class_names, limit=4): 226 | """Display the given image and the top few class masks.""" 227 | to_display = [] 228 | titles = [] 229 | to_display.append(image) 230 | titles.append("H x W={}x{}".format(image.shape[0], image.shape[1])) 231 | # Pick top prominent classes in this image 232 | unique_class_ids = np.unique(class_ids) 233 | mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]]) 234 | for i in unique_class_ids] 235 | top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area), 236 | key=lambda r: r[1], reverse=True) if v[1] > 0] 237 | # Generate images and titles 238 | for i in range(limit): 239 | class_id = top_ids[i] if i < len(top_ids) else -1 240 | # Pull masks of instances belonging to the same class. 241 | m = mask[:, :, np.where(class_ids == class_id)[0]] 242 | m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1) 243 | to_display.append(m) 244 | titles.append(class_names[class_id] if class_id != -1 else "-") 245 | display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r") 246 | 247 | 248 | def plot_precision_recall(AP, precisions, recalls): 249 | """Draw the precision-recall curve. 250 | 251 | AP: Average precision at IoU >= 0.5 252 | precisions: list of precision values 253 | recalls: list of recall values 254 | """ 255 | # Plot the Precision-Recall curve 256 | _, ax = plt.subplots(1) 257 | ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP)) 258 | ax.set_ylim(0, 1.1) 259 | ax.set_xlim(0, 1.1) 260 | _ = ax.plot(recalls, precisions) 261 | 262 | 263 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores, 264 | overlaps, class_names, threshold=0.5): 265 | """Draw a grid showing how ground truth objects are classified. 266 | gt_class_ids: [N] int. Ground truth class IDs 267 | pred_class_id: [N] int. Predicted class IDs 268 | pred_scores: [N] float. The probability scores of predicted classes 269 | overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictins and GT boxes. 270 | class_names: list of all class names in the dataset 271 | threshold: Float. The prediction probability required to predict a class 272 | """ 273 | gt_class_ids = gt_class_ids[gt_class_ids != 0] 274 | pred_class_ids = pred_class_ids[pred_class_ids != 0] 275 | 276 | plt.figure(figsize=(12, 10)) 277 | plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues) 278 | plt.yticks(np.arange(len(pred_class_ids)), 279 | ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i]) 280 | for i, id in enumerate(pred_class_ids)]) 281 | plt.xticks(np.arange(len(gt_class_ids)), 282 | [class_names[int(id)] for id in gt_class_ids], rotation=90) 283 | 284 | thresh = overlaps.max() / 2. 285 | for i, j in itertools.product(range(overlaps.shape[0]), 286 | range(overlaps.shape[1])): 287 | text = "" 288 | if overlaps[i, j] > threshold: 289 | text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong" 290 | color = ("white" if overlaps[i, j] > thresh 291 | else "black" if overlaps[i, j] > 0 292 | else "grey") 293 | plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text), 294 | horizontalalignment="center", verticalalignment="center", 295 | fontsize=9, color=color) 296 | 297 | plt.tight_layout() 298 | plt.xlabel("Ground Truth") 299 | plt.ylabel("Predictions") 300 | 301 | 302 | def draw_boxes(image, boxes=None, refined_boxes=None, 303 | masks=None, captions=None, visibilities=None, 304 | title="", ax=None): 305 | """Draw bounding boxes and segmentation masks with differnt 306 | customizations. 307 | 308 | boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates. 309 | refined_boxes: Like boxes, but draw with solid lines to show 310 | that they're the result of refining 'boxes'. 311 | masks: [N, height, width] 312 | captions: List of N titles to display on each box 313 | visibilities: (optional) List of values of 0, 1, or 2. Determine how 314 | prominant each bounding box should be. 315 | title: An optional title to show over the image 316 | ax: (optional) Matplotlib axis to draw on. 317 | """ 318 | # Number of boxes 319 | assert boxes is not None or refined_boxes is not None 320 | N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0] 321 | 322 | # Matplotlib Axis 323 | if not ax: 324 | _, ax = plt.subplots(1, figsize=(12, 12)) 325 | 326 | # Generate random colors 327 | colors = random_colors(N) 328 | 329 | # Show area outside image boundaries. 330 | margin = image.shape[0] // 10 331 | ax.set_ylim(image.shape[0] + margin, -margin) 332 | ax.set_xlim(-margin, image.shape[1] + margin) 333 | ax.axis('off') 334 | 335 | ax.set_title(title) 336 | 337 | masked_image = image.astype(np.uint32).copy() 338 | for i in range(N): 339 | # Box visibility 340 | visibility = visibilities[i] if visibilities is not None else 1 341 | if visibility == 0: 342 | color = "gray" 343 | style = "dotted" 344 | alpha = 0.5 345 | elif visibility == 1: 346 | color = colors[i] 347 | style = "dotted" 348 | alpha = 1 349 | elif visibility == 2: 350 | color = colors[i] 351 | style = "solid" 352 | alpha = 1 353 | 354 | # Boxes 355 | if boxes is not None: 356 | if not np.any(boxes[i]): 357 | # Skip this instance. Has no bbox. Likely lost in cropping. 358 | continue 359 | y1, x1, y2, x2 = boxes[i] 360 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 361 | alpha=alpha, linestyle=style, 362 | edgecolor=color, facecolor='none') 363 | ax.add_patch(p) 364 | 365 | # Refined boxes 366 | if refined_boxes is not None and visibility > 0: 367 | ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32) 368 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 369 | edgecolor=color, facecolor='none') 370 | ax.add_patch(p) 371 | # Connect the top-left corners of the anchor and proposal 372 | if boxes is not None: 373 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 374 | 375 | # Captions 376 | if captions is not None: 377 | caption = captions[i] 378 | # If there are refined boxes, display captions on them 379 | if refined_boxes is not None: 380 | y1, x1, y2, x2 = ry1, rx1, ry2, rx2 381 | x = random.randint(x1, (x1 + x2) // 2) 382 | ax.text(x1, y1, caption, size=11, verticalalignment='top', 383 | color='w', backgroundcolor="none", 384 | bbox={'facecolor': color, 'alpha': 0.5, 385 | 'pad': 2, 'edgecolor': 'none'}) 386 | 387 | # Masks 388 | if masks is not None: 389 | mask = masks[:, :, i] 390 | masked_image = apply_mask(masked_image, mask, color) 391 | # Mask Polygon 392 | # Pad to ensure proper polygons for masks that touch image edges. 393 | padded_mask = np.zeros( 394 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 395 | padded_mask[1:-1, 1:-1] = mask 396 | contours = find_contours(padded_mask, 0.5) 397 | for verts in contours: 398 | # Subtract the padding and flip (y, x) to (x, y) 399 | verts = np.fliplr(verts) - 1 400 | p = Polygon(verts, facecolor="none", edgecolor=color) 401 | ax.add_patch(p) 402 | ax.imshow(masked_image.astype(np.uint8)) 403 | 404 | def plot_loss(loss, val_loss, save=True, log_dir=None): 405 | loss = np.array(loss) 406 | val_loss = np.array(val_loss) 407 | 408 | plt.figure("loss") 409 | plt.gcf().clear() 410 | plt.plot(loss[:, 0], label='train') 411 | plt.plot(val_loss[:, 0], label='valid') 412 | plt.xlabel('epoch') 413 | plt.ylabel('loss') 414 | plt.legend() 415 | if save: 416 | save_path = os.path.join(log_dir, "loss.png") 417 | plt.savefig(save_path) 418 | else: 419 | plt.show(block=False) 420 | plt.pause(0.1) 421 | 422 | plt.figure("rpn_class_loss") 423 | plt.gcf().clear() 424 | plt.plot(loss[:, 1], label='train') 425 | plt.plot(val_loss[:, 1], label='valid') 426 | plt.xlabel('epoch') 427 | plt.ylabel('loss') 428 | plt.legend() 429 | if save: 430 | save_path = os.path.join(log_dir, "rpn_class_loss.png") 431 | plt.savefig(save_path) 432 | else: 433 | plt.show(block=False) 434 | plt.pause(0.1) 435 | 436 | plt.figure("rpn_bbox_loss") 437 | plt.gcf().clear() 438 | plt.plot(loss[:, 2], label='train') 439 | plt.plot(val_loss[:, 2], label='valid') 440 | plt.xlabel('epoch') 441 | plt.ylabel('loss') 442 | plt.legend() 443 | if save: 444 | save_path = os.path.join(log_dir, "rpn_bbox_loss.png") 445 | plt.savefig(save_path) 446 | else: 447 | plt.show(block=False) 448 | plt.pause(0.1) 449 | 450 | plt.figure("mrcnn_class_loss") 451 | plt.gcf().clear() 452 | plt.plot(loss[:, 3], label='train') 453 | plt.plot(val_loss[:, 3], label='valid') 454 | plt.xlabel('epoch') 455 | plt.ylabel('loss') 456 | plt.legend() 457 | if save: 458 | save_path = os.path.join(log_dir, "mrcnn_class_loss.png") 459 | plt.savefig(save_path) 460 | else: 461 | plt.show(block=False) 462 | plt.pause(0.1) 463 | 464 | plt.figure("mrcnn_bbox_loss") 465 | plt.gcf().clear() 466 | plt.plot(loss[:, 4], label='train') 467 | plt.plot(val_loss[:, 4], label='valid') 468 | plt.xlabel('epoch') 469 | plt.ylabel('loss') 470 | plt.legend() 471 | if save: 472 | save_path = os.path.join(log_dir, "mrcnn_bbox_loss.png") 473 | plt.savefig(save_path) 474 | else: 475 | plt.show(block=False) 476 | plt.pause(0.1) 477 | 478 | plt.figure("mrcnn_mask_loss") 479 | plt.gcf().clear() 480 | plt.plot(loss[:, 5], label='train') 481 | plt.plot(val_loss[:, 5], label='valid') 482 | plt.xlabel('epoch') 483 | plt.ylabel('loss') 484 | plt.legend() 485 | if save: 486 | save_path = os.path.join(log_dir, "mrcnn_mask_loss.png") 487 | plt.savefig(save_path) 488 | else: 489 | plt.show(block=False) 490 | plt.pause(0.1) 491 | 492 | 493 | --------------------------------------------------------------------------------