├── .gitignore ├── Anaconda Setup ├── README.MD ├── condaenv.em7g437d.requirements.txt ├── mask_rcnn.yml └── requirments.txt ├── LICENSE ├── MANIFEST.in ├── README.md ├── assets ├── 4k_video.gif ├── balloon_color_splash.gif ├── detection_activations.png ├── detection_anchors.png ├── detection_final.png ├── detection_histograms.png ├── detection_masks.png ├── detection_refinement.png ├── detection_tensorboard.png ├── images_to_osm.png ├── mapping_challenge.png ├── nucleus_segmentation.png ├── project_3dbuildings.png ├── project_grass_gis.png ├── project_ice_wedge_polygons.png ├── project_shiny1.jpg ├── project_usiigaci1.gif ├── project_usiigaci2.gif └── street.png ├── coco ├── coco.py ├── inspect_data.ipynb ├── inspect_model.ipynb └── inspect_weights.ipynb ├── demo.ipynb ├── images ├── 1045023827_4ec3e8ba5c_z.jpg ├── 12283150_12d37e6389_z.jpg ├── 2383514521_1fc8d7b0de_z.jpg ├── 2502287818_41e4b0c4fb_z.jpg ├── 2516944023_d00345997d_z.jpg ├── 25691390_f9944f61b5_z.jpg ├── 262985539_1709e54576_z.jpg ├── 3132016470_c27baa00e8_z.jpg ├── 3627527276_6fe8cd9bfe_z.jpg ├── 3651581213_f81963d1dd_z.jpg ├── 3800883468_12af3c0b50_z.jpg ├── 3862500489_6fd195d183_z.jpg ├── 3878153025_8fde829928_z.jpg ├── 4410436637_7b0ca36ee7_z.jpg ├── 4782628554_668bc31826_z.jpg ├── 5951960966_d4e1cda5d0_z.jpg ├── 6584515005_fce9cec486_z.jpg ├── 6821351586_59aa0dc110_z.jpg ├── 7581246086_cf7bbb7255_z.jpg ├── 7933423348_c30bd9bd4e_z.jpg ├── 8053677163_d4c8f416be_z.jpg ├── 8239308689_efa6c11b08_z.jpg ├── 8433365521_9252889f9a_z.jpg ├── 8512296263_5fc5458e20_z.jpg ├── 8699757338_c3941051b6_z.jpg ├── 8734543718_37f6b8bd45_z.jpg ├── 8829708882_48f263491e_z.jpg ├── 9118579087_f9ffa19e63_z.jpg └── 9247489789_132c0d534a_z.jpg ├── mrcnn ├── __init__.py ├── config.py ├── model.py ├── parallel_model.py ├── utils.py └── visualize.py ├── requirements.txt ├── samples ├── README.md ├── balloon │ ├── README.md │ ├── balloon.py │ ├── inspect_balloon_data.ipynb │ └── inspect_balloon_model.ipynb ├── coco │ ├── coco.py │ ├── inspect_data.ipynb │ ├── inspect_model.ipynb │ └── inspect_weights.ipynb ├── demo.ipynb ├── nucleus │ ├── README.md │ ├── inspect_nucleus_data.ipynb │ ├── inspect_nucleus_model.ipynb │ └── nucleus.py └── shapes │ ├── shapes.py │ └── train_shapes.ipynb ├── setup.cfg ├── setup.py ├── video_demo.py └── visualize_cv2.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Data files and directories common in repo root 2 | datasets/ 3 | logs/ 4 | *.h5 5 | results/ 6 | temp/ 7 | test/ 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # Distribution / packaging 15 | .Python 16 | env/ 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # VS Studio Code 38 | .vscode 39 | 40 | # PyCharm 41 | .idea/ 42 | 43 | # Dropbox 44 | .dropbox.attr 45 | 46 | # Jupyter Notebook 47 | .ipynb_checkpoints 48 | 49 | # pyenv 50 | .python-version 51 | 52 | # dotenv 53 | .env 54 | 55 | # virtualenv 56 | .venv 57 | venv/ 58 | ENV/ 59 | -------------------------------------------------------------------------------- /Anaconda Setup/README.MD: -------------------------------------------------------------------------------- 1 | # Installation Guide for Anaconda 2 | Hey guys and welcome back. This lecture is going to be a quick lesson on how to install Anaconda for Mask RCNN. Lets get started. 3 | First we are going to go to Google type in Anaconda download and click on the first link. 4 | ## Installing Anaconda 5 | 6 | * Select Anaconda for windows and download Python 3.7 7 | * And then you can save it anywhere 8 | * Once it has downloaded you can open up the installer and follow the instructions. 9 | * Everything can be left default except for advance options where we recommend that you Add Anaconda to my PATH Environmental variable 10 | * Click next and let it install 11 | * Now open Anaconda prompt or command from windows start menu. 12 | 13 | ## Creating the Environment 14 | 15 | Now type in this command : 16 | 17 | ```conda env create –f file_name.yml```. 18 | 19 | which will create a new conda environment using .yml file. This .yml file you will find in the github repo attached to this video. Ensure also that you are in the correct folder before we 20 | * Press enter to execute. 21 | * Now to activate this new conda environment we execute the following command: 22 | 23 | ```conda activate mask_rcnn``` 24 | * You should see your new environment activate showing mask_rcnn in brackets. 25 | 26 | ## Installing the Dependencies 27 | 28 | * Lastly we shall install all the other dependencies using the pip command. 29 | * ```pip install –r requirments.txt``` 30 | * To test if everything is working we can type in the command 31 | * ```Python demo.py``` 32 | You should get the response that is displayed over if everything was imported correctly. 33 | 34 | 35 | Okay that it is from me. In the next lecture I will show you how to setup and run Mask RCNN. 36 | 37 | -------------------------------------------------------------------------------- /Anaconda Setup/condaenv.em7g437d.requirements.txt: -------------------------------------------------------------------------------- 1 | imgaug==0.2.8 2 | opencv-python==4.0.0.21 3 | pycocotools==2.0 -------------------------------------------------------------------------------- /Anaconda Setup/mask_rcnn.yml: -------------------------------------------------------------------------------- 1 | name: my_personal_lab_mask 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - _tflow_select=2.3.0=mkl 7 | - absl-py=0.7.0=py36_0 8 | - astor=0.7.1=py36_0 9 | - attrs=19.1.0=py36_1 10 | - backcall=0.1.0=py36_0 11 | - blas=1.0=mkl 12 | - bleach=3.1.0=py36_0 13 | - ca-certificates=2019.3.9=hecc5488_0 14 | - certifi=2019.3.9=py36_0 15 | - cloudpickle=0.8.0=py36_0 16 | - colorama=0.4.1=py36_0 17 | - cycler=0.10.0=py36h009560c_0 18 | - cython=0.29.6=py36h6538335_0 19 | - cytoolz=0.9.0.1=py36hfa6e2cd_1 20 | - dask-core=1.1.4=py36_1 21 | - decorator=4.4.0=py36_1 22 | - defusedxml=0.5.0=py36_1 23 | - entrypoints=0.3=py36_0 24 | - freetype=2.9.1=ha9979f8_1 25 | - gast=0.2.2=py36_0 26 | - geos=3.7.1=h33f27b4_0 27 | - grpcio=1.16.1=py36h351948d_1 28 | - h5py=2.8.0=py36hf7173ca_2 29 | - hdf5=1.8.20=hac2f561_1 30 | - icc_rt=2019.0.0=h0cc432a_1 31 | - icu=58.2=ha66f8fd_1 32 | - imageio=2.5.0=py36_0 33 | - intel-openmp=2019.3=203 34 | - ipykernel=5.1.0=py36h39e3cac_0 35 | - ipython=7.3.0=py36h39e3cac_0 36 | - ipython_genutils=0.2.0=py36h3c5d0ee_0 37 | - ipywidgets=7.4.2=py36_0 38 | - jedi=0.13.3=py36_0 39 | - jinja2=2.10=py36_0 40 | - jpeg=9b=hb83a4c4_2 41 | - jsonschema=3.0.1=py36_0 42 | - jupyter=1.0.0=py36_7 43 | - jupyter_client=5.2.4=py36_0 44 | - jupyter_console=6.0.0=py36_0 45 | - jupyter_core=4.4.0=py36_0 46 | - keras=2.2.4=0 47 | - keras-applications=1.0.7=py_0 48 | - keras-base=2.2.4=py36_0 49 | - keras-preprocessing=1.0.9=py_0 50 | - kiwisolver=1.0.1=py36h6538335_0 51 | - libmklml=2019.0.3=0 52 | - libopencv=3.4.2=h20b85fd_0 53 | - libpng=1.6.36=h2a8f88b_0 54 | - libprotobuf=3.6.1=h7bd577a_0 55 | - libsodium=1.0.16=h9d3ae62_0 56 | - libtiff=4.0.10=hb898794_2 57 | - m2w64-gcc-libgfortran=5.3.0=6 58 | - m2w64-gcc-libs=5.3.0=7 59 | - m2w64-gcc-libs-core=5.3.0=7 60 | - m2w64-gmp=6.1.0=2 61 | - m2w64-libwinpthread-git=5.0.0.4634.697f757=2 62 | - markdown=3.0.1=py36_0 63 | - markupsafe=1.1.1=py36he774522_0 64 | - matplotlib=3.0.3=py36hc8f65d3_0 65 | - mistune=0.8.4=py36he774522_0 66 | - mkl=2019.3=203 67 | - mkl_fft=1.0.10=py36h14836fe_0 68 | - mkl_random=1.0.2=py36h343c172_0 69 | - mock=2.0.0=py36h9086845_0 70 | - msys2-conda-epoch=20160418=1 71 | - nbconvert=5.4.1=py36_3 72 | - nbformat=4.4.0=py36h3a5bc1b_0 73 | - networkx=2.2=py36_1 74 | - notebook=5.7.6=py36_0 75 | - numpy=1.16.2=py36h19fb1c0_0 76 | - numpy-base=1.16.2=py36hc3f5095_0 77 | - olefile=0.46=py36_0 78 | - opencv=3.4.2=py36h40b0b35_0 79 | - openssl=1.1.1b=hfa6e2cd_2 80 | - pandoc=2.2.3.2=0 81 | - pandocfilters=1.4.2=py36_1 82 | - parso=0.3.4=py36_0 83 | - pbr=5.1.3=py_0 84 | - pickleshare=0.7.5=py36_0 85 | - pillow=5.4.1=py36hdc69c19_0 86 | - pip=19.0.3=py36_0 87 | - prometheus_client=0.6.0=py36_0 88 | - prompt_toolkit=2.0.9=py36_0 89 | - protobuf=3.6.1=py36h33f27b4_0 90 | - py-opencv=3.4.2=py36hc319ecb_0 91 | - pygments=2.3.1=py36_0 92 | - pyparsing=2.3.1=py36_0 93 | - pyqt=5.9.2=py36h6538335_2 94 | - pyreadline=2.1=py36_1 95 | - pyrsistent=0.14.11=py36he774522_0 96 | - python=3.6.8=h9f7ef89_7 97 | - python-dateutil=2.8.0=py36_0 98 | - pytz=2018.9=py36_0 99 | - pywavelets=1.0.2=py36h8c2d366_0 100 | - pywinpty=0.5.5=py36_1000 101 | - pyyaml=5.1=py36he774522_0 102 | - pyzmq=18.0.0=py36ha925a31_0 103 | - qt=5.9.7=vc14h73c81de_0 104 | - qtconsole=4.4.3=py36_0 105 | - scikit-image=0.14.2=py36ha925a31_0 106 | - scipy=1.2.1=py36h29ff71c_0 107 | - send2trash=1.5.0=py36_0 108 | - setuptools=40.8.0=py36_0 109 | - shapely=1.6.4=py36h222a598_0 110 | - sip=4.19.8=py36h6538335_0 111 | - six=1.12.0=py36_0 112 | - sqlite=3.27.2=he774522_0 113 | - tensorboard=1.13.1=py36h33f27b4_0 114 | - tensorflow=1.13.1=mkl_py36hd212fbe_0 115 | - tensorflow-base=1.13.1=mkl_py36hcaf7020_0 116 | - tensorflow-estimator=1.13.0=py_0 117 | - termcolor=1.1.0=py36_1 118 | - terminado=0.8.1=py36_1 119 | - testpath=0.4.2=py36_0 120 | - tk=8.6.8=hfa6e2cd_0 121 | - toolz=0.9.0=py36_0 122 | - tornado=6.0.1=py36he774522_0 123 | - traitlets=4.3.2=py36h096827d_0 124 | - vc=14.1=h0510ff6_4 125 | - vs2015_runtime=14.15.26706=h3a45250_0 126 | - wcwidth=0.1.7=py36h3d5aa90_0 127 | - webencodings=0.5.1=py36_1 128 | - werkzeug=0.14.1=py36_0 129 | - wheel=0.33.1=py36_0 130 | - widgetsnbextension=3.4.2=py36_0 131 | - wincertstore=0.2=py36h7fe50ca_0 132 | - winpty=0.4.3=4 133 | - xz=5.2.4=h2fa13f4_4 134 | - yaml=0.1.7=hc54c509_2 135 | - zeromq=4.3.1=h33f27b4_3 136 | - zlib=1.2.11=h62dcd97_3 137 | - zstd=1.3.7=h508b16e_0 138 | - pip: 139 | - imgaug==0.2.8 140 | - opencv-python==4.0.0.21 141 | - pycocotools==2.0 142 | prefix: C:\Users\GB-PC07\Anaconda3\envs\myenv 143 | 144 | -------------------------------------------------------------------------------- /Anaconda Setup/requirments.txt: -------------------------------------------------------------------------------- 1 | imgaug==0.2.8 2 | git+https://github.com/philferriere/cocoapi.git#egg=pycocotools&subdirectory=PythonAPI -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mask R-CNN 2 | 3 | The MIT License (MIT) 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | include requirements.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mask R-CNN for Object Detection and Segmentation 2 | 3 | This is an implementation of [Mask R-CNN](https://arxiv.org/abs/1703.06870) on Python 3, Keras, and TensorFlow. The model generates bounding boxes and segmentation masks for each instance of an object in the image. It's based on Feature Pyramid Network (FPN) and a ResNet101 backbone. 4 | 5 | ![Instance Segmentation Sample](assets/street.png) 6 | 7 | The repository includes: 8 | * Source code of Mask R-CNN built on FPN and ResNet101. 9 | * Training code for MS COCO 10 | * Pre-trained weights for MS COCO 11 | * Jupyter notebooks to visualize the detection pipeline at every step 12 | * ParallelModel class for multi-GPU training 13 | * Evaluation on MS COCO metrics (AP) 14 | * Example of training on your own dataset 15 | 16 | 17 | The code is documented and designed to be easy to extend. If you use it in your research, please consider citing this repository (bibtex below). If you work on 3D vision, you might find our recently released [Matterport3D](https://matterport.com/blog/2017/09/20/announcing-matterport3d-research-dataset/) dataset useful as well. 18 | This dataset was created from 3D-reconstructed spaces captured by our customers who agreed to make them publicly available for academic use. You can see more examples [here](https://matterport.com/gallery/). 19 | 20 | # Getting Started 21 | * [demo.ipynb](samples/demo.ipynb) Is the easiest way to start. It shows an example of using a model pre-trained on MS COCO to segment objects in your own images. 22 | It includes code to run object detection and instance segmentation on arbitrary images. 23 | 24 | * [train_shapes.ipynb](samples/shapes/train_shapes.ipynb) shows how to train Mask R-CNN on your own dataset. This notebook introduces a toy dataset (Shapes) to demonstrate training on a new dataset. 25 | 26 | * ([model.py](mrcnn/model.py), [utils.py](mrcnn/utils.py), [config.py](mrcnn/config.py)): These files contain the main Mask RCNN implementation. 27 | 28 | 29 | * [inspect_data.ipynb](samples/coco/inspect_data.ipynb). This notebook visualizes the different pre-processing steps 30 | to prepare the training data. 31 | 32 | * [inspect_model.ipynb](samples/coco/inspect_model.ipynb) This notebook goes in depth into the steps performed to detect and segment objects. It provides visualizations of every step of the pipeline. 33 | 34 | * [inspect_weights.ipynb](samples/coco/inspect_weights.ipynb) 35 | This notebooks inspects the weights of a trained model and looks for anomalies and odd patterns. 36 | 37 | 38 | # Step by Step Detection 39 | To help with debugging and understanding the model, there are 3 notebooks 40 | ([inspect_data.ipynb](samples/coco/inspect_data.ipynb), [inspect_model.ipynb](samples/coco/inspect_model.ipynb), 41 | [inspect_weights.ipynb](samples/coco/inspect_weights.ipynb)) that provide a lot of visualizations and allow running the model step by step to inspect the output at each point. Here are a few examples: 42 | 43 | 44 | 45 | ## 1. Anchor sorting and filtering 46 | Visualizes every step of the first stage Region Proposal Network and displays positive and negative anchors along with anchor box refinement. 47 | ![](assets/detection_anchors.png) 48 | 49 | ## 2. Bounding Box Refinement 50 | This is an example of final detection boxes (dotted lines) and the refinement applied to them (solid lines) in the second stage. 51 | ![](assets/detection_refinement.png) 52 | 53 | ## 3. Mask Generation 54 | Examples of generated masks. These then get scaled and placed on the image in the right location. 55 | 56 | ![](assets/detection_masks.png) 57 | 58 | ## 4.Layer activations 59 | Often it's useful to inspect the activations at different layers to look for signs of trouble (all zeros or random noise). 60 | 61 | ![](assets/detection_activations.png) 62 | 63 | ## 5. Weight Histograms 64 | Another useful debugging tool is to inspect the weight histograms. These are included in the inspect_weights.ipynb notebook. 65 | 66 | ![](assets/detection_histograms.png) 67 | 68 | ## 6. Logging to TensorBoard 69 | TensorBoard is another great debugging and visualization tool. The model is configured to log losses and save weights at the end of every epoch. 70 | 71 | ![](assets/detection_tensorboard.png) 72 | 73 | ## 6. Composing the different pieces into a final result 74 | 75 | ![](assets/detection_final.png) 76 | 77 | 78 | # Training on MS COCO 79 | We're providing pre-trained weights for MS COCO to make it easier to start. You can 80 | use those weights as a starting point to train your own variation on the network. 81 | Training and evaluation code is in `samples/coco/coco.py`. You can import this 82 | module in Jupyter notebook (see the provided notebooks for examples) or you 83 | can run it directly from the command line as such: 84 | 85 | ``` 86 | # Train a new model starting from pre-trained COCO weights 87 | python3 samples/coco/coco.py train --dataset=/path/to/coco/ --model=coco 88 | 89 | # Train a new model starting from ImageNet weights 90 | python3 samples/coco/coco.py train --dataset=/path/to/coco/ --model=imagenet 91 | 92 | # Continue training a model that you had trained earlier 93 | python3 samples/coco/coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5 94 | 95 | # Continue training the last model you trained. This will find 96 | # the last trained weights in the model directory. 97 | python3 samples/coco/coco.py train --dataset=/path/to/coco/ --model=last 98 | ``` 99 | 100 | You can also run the COCO evaluation code with: 101 | ``` 102 | # Run COCO evaluation on the last trained model 103 | python3 samples/coco/coco.py evaluate --dataset=/path/to/coco/ --model=last 104 | ``` 105 | 106 | The training schedule, learning rate, and other parameters should be set in `samples/coco/coco.py`. 107 | 108 | 109 | # Training on Your Own Dataset 110 | 111 | Start by reading this [blog post about the balloon color splash sample](https://engineering.matterport.com/splash-of-color-instance-segmentation-with-mask-r-cnn-and-tensorflow-7c761e238b46). It covers the process starting from annotating images to training to using the results in a sample application. 112 | 113 | In summary, to train the model on your own dataset you'll need to extend two classes: 114 | 115 | ```Config``` 116 | This class contains the default configuration. Subclass it and modify the attributes you need to change. 117 | 118 | ```Dataset``` 119 | This class provides a consistent way to work with any dataset. 120 | It allows you to use new datasets for training without having to change 121 | the code of the model. It also supports loading multiple datasets at the 122 | same time, which is useful if the objects you want to detect are not 123 | all available in one dataset. 124 | 125 | See examples in `samples/shapes/train_shapes.ipynb`, `samples/coco/coco.py`, `samples/balloon/balloon.py`, and `samples/nucleus/nucleus.py`. 126 | 127 | ## Differences from the Official Paper 128 | This implementation follows the Mask RCNN paper for the most part, but there are a few cases where we deviated in favor of code simplicity and generalization. These are some of the differences we're aware of. If you encounter other differences, please do let us know. 129 | 130 | * **Image Resizing:** To support training multiple images per batch we resize all images to the same size. For example, 1024x1024px on MS COCO. We preserve the aspect ratio, so if an image is not square we pad it with zeros. In the paper the resizing is done such that the smallest side is 800px and the largest is trimmed at 1000px. 131 | * **Bounding Boxes**: Some datasets provide bounding boxes and some provide masks only. To support training on multiple datasets we opted to ignore the bounding boxes that come with the dataset and generate them on the fly instead. We pick the smallest box that encapsulates all the pixels of the mask as the bounding box. This simplifies the implementation and also makes it easy to apply image augmentations that would otherwise be harder to apply to bounding boxes, such as image rotation. 132 | 133 | To validate this approach, we compared our computed bounding boxes to those provided by the COCO dataset. 134 | We found that ~2% of bounding boxes differed by 1px or more, ~0.05% differed by 5px or more, 135 | and only 0.01% differed by 10px or more. 136 | 137 | * **Learning Rate:** The paper uses a learning rate of 0.02, but we found that to be 138 | too high, and often causes the weights to explode, especially when using a small batch 139 | size. It might be related to differences between how Caffe and TensorFlow compute 140 | gradients (sum vs mean across batches and GPUs). Or, maybe the official model uses gradient 141 | clipping to avoid this issue. We do use gradient clipping, but don't set it too aggressively. 142 | We found that smaller learning rates converge faster anyway so we go with that. 143 | 144 | ## Citation 145 | Use this bibtex to cite this repository: 146 | ``` 147 | @misc{matterport_maskrcnn_2017, 148 | title={Mask R-CNN for object detection and instance segmentation on Keras and TensorFlow}, 149 | author={Waleed Abdulla}, 150 | year={2017}, 151 | publisher={Github}, 152 | journal={GitHub repository}, 153 | howpublished={\url{https://github.com/matterport/Mask_RCNN}}, 154 | } 155 | ``` 156 | 157 | ## Contributing 158 | Contributions to this repository are welcome. Examples of things you can contribute: 159 | * Speed Improvements. Like re-writing some Python code in TensorFlow or Cython. 160 | * Training on other datasets. 161 | * Accuracy Improvements. 162 | * Visualizations and examples. 163 | 164 | You can also [join our team](https://matterport.com/careers/) and help us build even more projects like this one. 165 | 166 | ## Requirements 167 | Python 3.4, TensorFlow 1.3, Keras 2.0.8 and other common packages listed in `requirements.txt`. 168 | 169 | ### MS COCO Requirements: 170 | To train or test on MS COCO, you'll also need: 171 | * pycocotools (installation instructions below) 172 | * [MS COCO Dataset](http://cocodataset.org/#home) 173 | * Download the 5K [minival](https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0) 174 | and the 35K [validation-minus-minival](https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0) 175 | subsets. More details in the original [Faster R-CNN implementation](https://github.com/rbgirshick/py-faster-rcnn/blob/master/data/README.md). 176 | 177 | If you use Docker, the code has been verified to work on 178 | [this Docker container](https://hub.docker.com/r/waleedka/modern-deep-learning/). 179 | 180 | 181 | ## Installation 182 | 1. Clone this repository 183 | 2. Install dependencies 184 | ```bash 185 | pip3 install -r requirements.txt 186 | ``` 187 | 3. Run setup from the repository root directory 188 | ```bash 189 | python3 setup.py install 190 | ``` 191 | 3. Download pre-trained COCO weights (mask_rcnn_coco.h5) from the [releases page](https://github.com/matterport/Mask_RCNN/releases). 192 | 4. (Optional) To train or test on MS COCO install `pycocotools` from one of these repos. They are forks of the original pycocotools with fixes for Python3 and Windows (the official repo doesn't seem to be active anymore). 193 | 194 | * Linux: https://github.com/waleedka/coco 195 | * Windows: https://github.com/philferriere/cocoapi. 196 | You must have the Visual C++ 2015 build tools on your path (see the repo for additional details) 197 | 198 | # Projects Using this Model 199 | If you extend this model to other datasets or build projects that use it, we'd love to hear from you. 200 | 201 | ### [4K Video Demo](https://www.youtube.com/watch?v=OOT3UIXZztE) by Karol Majek. 202 | [![Mask RCNN on 4K Video](assets/4k_video.gif)](https://www.youtube.com/watch?v=OOT3UIXZztE) 203 | 204 | ### [Images to OSM](https://github.com/jremillard/images-to-osm): Improve OpenStreetMap by adding baseball, soccer, tennis, football, and basketball fields. 205 | 206 | ![Identify sport fields in satellite images](assets/images_to_osm.png) 207 | 208 | ### [Splash of Color](https://engineering.matterport.com/splash-of-color-instance-segmentation-with-mask-r-cnn-and-tensorflow-7c761e238b46). A blog post explaining how to train this model from scratch and use it to implement a color splash effect. 209 | ![Balloon Color Splash](assets/balloon_color_splash.gif) 210 | 211 | 212 | ### [Segmenting Nuclei in Microscopy Images](samples/nucleus). Built for the [2018 Data Science Bowl](https://www.kaggle.com/c/data-science-bowl-2018) 213 | Code is in the `samples/nucleus` directory. 214 | 215 | ![Nucleus Segmentation](assets/nucleus_segmentation.png) 216 | 217 | ### [Detection and Segmentation for Surgery Robots](https://github.com/SUYEgit/Surgery-Robot-Detection-Segmentation) by the NUS Control & Mechatronics Lab. 218 | ![Surgery Robot Detection and Segmentation](https://github.com/SUYEgit/Surgery-Robot-Detection-Segmentation/raw/master/assets/video.gif) 219 | 220 | ### [Reconstructing 3D buildings from aerial LiDAR](https://medium.com/geoai/reconstructing-3d-buildings-from-aerial-lidar-with-ai-details-6a81cb3079c0) 221 | A proof of concept project by [Esri](https://www.esri.com/), in collaboration with Nvidia and Miami-Dade County. Along with a great write up and code by Dmitry Kudinov, Daniel Hedges, and Omar Maher. 222 | ![3D Building Reconstruction](assets/project_3dbuildings.png) 223 | 224 | ### [Usiigaci: Label-free Cell Tracking in Phase Contrast Microscopy](https://github.com/oist/usiigaci) 225 | A project from Japan to automatically track cells in a microfluidics platform. Paper is pending, but the source code is released. 226 | 227 | ![](assets/project_usiigaci1.gif) ![](assets/project_usiigaci2.gif) 228 | 229 | ### [Characterization of Arctic Ice-Wedge Polygons in Very High Spatial Resolution Aerial Imagery](http://www.mdpi.com/2072-4292/10/9/1487) 230 | Research project to understand the complex processes between degradations in the Arctic and climate change. By Weixing Zhang, Chandi Witharana, Anna Liljedahl, and Mikhail Kanevskiy. 231 | ![image](assets/project_ice_wedge_polygons.png) 232 | 233 | ### [Mask-RCNN Shiny](https://github.com/huuuuusy/Mask-RCNN-Shiny) 234 | A computer vision class project by HU Shiyu to apply the color pop effect on people with beautiful results. 235 | ![](assets/project_shiny1.jpg) 236 | 237 | ### [Mapping Challenge](https://github.com/crowdAI/crowdai-mapping-challenge-mask-rcnn): Convert satellite imagery to maps for use by humanitarian organisations. 238 | ![Mapping Challenge](assets/mapping_challenge.png) 239 | 240 | ### [GRASS GIS Addon](https://github.com/ctu-geoforall-lab/i.ann.maskrcnn) to generate vector masks from geospatial imagery. Based on a [Master's thesis](https://github.com/ctu-geoforall-lab-projects/dp-pesek-2018) by Ondřej Pešek. 241 | ![GRASS GIS Image](assets/project_grass_gis.png) 242 | -------------------------------------------------------------------------------- /assets/4k_video.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/4k_video.gif -------------------------------------------------------------------------------- /assets/balloon_color_splash.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/balloon_color_splash.gif -------------------------------------------------------------------------------- /assets/detection_activations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/detection_activations.png -------------------------------------------------------------------------------- /assets/detection_anchors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/detection_anchors.png -------------------------------------------------------------------------------- /assets/detection_final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/detection_final.png -------------------------------------------------------------------------------- /assets/detection_histograms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/detection_histograms.png -------------------------------------------------------------------------------- /assets/detection_masks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/detection_masks.png -------------------------------------------------------------------------------- /assets/detection_refinement.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/detection_refinement.png -------------------------------------------------------------------------------- /assets/detection_tensorboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/detection_tensorboard.png -------------------------------------------------------------------------------- /assets/images_to_osm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/images_to_osm.png -------------------------------------------------------------------------------- /assets/mapping_challenge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/mapping_challenge.png -------------------------------------------------------------------------------- /assets/nucleus_segmentation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/nucleus_segmentation.png -------------------------------------------------------------------------------- /assets/project_3dbuildings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/project_3dbuildings.png -------------------------------------------------------------------------------- /assets/project_grass_gis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/project_grass_gis.png -------------------------------------------------------------------------------- /assets/project_ice_wedge_polygons.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/project_ice_wedge_polygons.png -------------------------------------------------------------------------------- /assets/project_shiny1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/project_shiny1.jpg -------------------------------------------------------------------------------- /assets/project_usiigaci1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/project_usiigaci1.gif -------------------------------------------------------------------------------- /assets/project_usiigaci2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/project_usiigaci2.gif -------------------------------------------------------------------------------- /assets/street.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/street.png -------------------------------------------------------------------------------- /coco/coco.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Configurations and data loading code for MS COCO. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | ------------------------------------------------------------ 10 | 11 | Usage: import the module (see Jupyter notebooks for examples), or run from 12 | the command line as such: 13 | 14 | # Train a new model starting from pre-trained COCO weights 15 | python3 coco.py train --dataset=/path/to/coco/ --model=coco 16 | 17 | # Train a new model starting from ImageNet weights. Also auto download COCO dataset 18 | python3 coco.py train --dataset=/path/to/coco/ --model=imagenet --download=True 19 | 20 | # Continue training a model that you had trained earlier 21 | python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5 22 | 23 | # Continue training the last model you trained 24 | python3 coco.py train --dataset=/path/to/coco/ --model=last 25 | 26 | # Run COCO evaluatoin on the last model you trained 27 | python3 coco.py evaluate --dataset=/path/to/coco/ --model=last 28 | """ 29 | 30 | import os 31 | import sys 32 | import time 33 | import numpy as np 34 | import imgaug # https://github.com/aleju/imgaug (pip3 install imgaug) 35 | 36 | # Download and install the Python COCO tools from https://github.com/waleedka/coco 37 | # That's a fork from the original https://github.com/pdollar/coco with a bug 38 | # fix for Python 3. 39 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50 40 | # If the PR is merged then use the original repo. 41 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3". 42 | from pycocotools.coco import COCO 43 | from pycocotools.cocoeval import COCOeval 44 | from pycocotools import mask as maskUtils 45 | 46 | import zipfile 47 | import urllib.request 48 | import shutil 49 | 50 | # Root directory of the project 51 | ROOT_DIR = os.path.abspath("../../") 52 | 53 | # Import Mask RCNN 54 | sys.path.append(ROOT_DIR) # To find local version of the library 55 | from mrcnn.config import Config 56 | from mrcnn import model as modellib, utils 57 | 58 | # Path to trained weights file 59 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") 60 | 61 | # Directory to save logs and model checkpoints, if not provided 62 | # through the command line argument --logs 63 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 64 | DEFAULT_DATASET_YEAR = "2014" 65 | 66 | ############################################################ 67 | # Configurations 68 | ############################################################ 69 | 70 | 71 | class CocoConfig(Config): 72 | """Configuration for training on MS COCO. 73 | Derives from the base Config class and overrides values specific 74 | to the COCO dataset. 75 | """ 76 | # Give the configuration a recognizable name 77 | NAME = "coco" 78 | 79 | # We use a GPU with 12GB memory, which can fit two images. 80 | # Adjust down if you use a smaller GPU. 81 | IMAGES_PER_GPU = 2 82 | 83 | # Uncomment to train on 8 GPUs (default is 1) 84 | # GPU_COUNT = 8 85 | 86 | # Number of classes (including background) 87 | NUM_CLASSES = 1 + 80 # COCO has 80 classes 88 | 89 | 90 | ############################################################ 91 | # Dataset 92 | ############################################################ 93 | 94 | class CocoDataset(utils.Dataset): 95 | def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None, 96 | class_map=None, return_coco=False, auto_download=False): 97 | """Load a subset of the COCO dataset. 98 | dataset_dir: The root directory of the COCO dataset. 99 | subset: What to load (train, val, minival, valminusminival) 100 | year: What dataset year to load (2014, 2017) as a string, not an integer 101 | class_ids: If provided, only loads images that have the given classes. 102 | class_map: TODO: Not implemented yet. Supports maping classes from 103 | different datasets to the same class ID. 104 | return_coco: If True, returns the COCO object. 105 | auto_download: Automatically download and unzip MS-COCO images and annotations 106 | """ 107 | 108 | if auto_download is True: 109 | self.auto_download(dataset_dir, subset, year) 110 | 111 | coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year)) 112 | if subset == "minival" or subset == "valminusminival": 113 | subset = "val" 114 | image_dir = "{}/{}{}".format(dataset_dir, subset, year) 115 | 116 | # Load all classes or a subset? 117 | if not class_ids: 118 | # All classes 119 | class_ids = sorted(coco.getCatIds()) 120 | 121 | # All images or a subset? 122 | if class_ids: 123 | image_ids = [] 124 | for id in class_ids: 125 | image_ids.extend(list(coco.getImgIds(catIds=[id]))) 126 | # Remove duplicates 127 | image_ids = list(set(image_ids)) 128 | else: 129 | # All images 130 | image_ids = list(coco.imgs.keys()) 131 | 132 | # Add classes 133 | for i in class_ids: 134 | self.add_class("coco", i, coco.loadCats(i)[0]["name"]) 135 | 136 | # Add images 137 | for i in image_ids: 138 | self.add_image( 139 | "coco", image_id=i, 140 | path=os.path.join(image_dir, coco.imgs[i]['file_name']), 141 | width=coco.imgs[i]["width"], 142 | height=coco.imgs[i]["height"], 143 | annotations=coco.loadAnns(coco.getAnnIds( 144 | imgIds=[i], catIds=class_ids, iscrowd=None))) 145 | if return_coco: 146 | return coco 147 | 148 | def auto_download(self, dataDir, dataType, dataYear): 149 | """Download the COCO dataset/annotations if requested. 150 | dataDir: The root directory of the COCO dataset. 151 | dataType: What to load (train, val, minival, valminusminival) 152 | dataYear: What dataset year to load (2014, 2017) as a string, not an integer 153 | Note: 154 | For 2014, use "train", "val", "minival", or "valminusminival" 155 | For 2017, only "train" and "val" annotations are available 156 | """ 157 | 158 | # Setup paths and file names 159 | if dataType == "minival" or dataType == "valminusminival": 160 | imgDir = "{}/{}{}".format(dataDir, "val", dataYear) 161 | imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear) 162 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear) 163 | else: 164 | imgDir = "{}/{}{}".format(dataDir, dataType, dataYear) 165 | imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear) 166 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear) 167 | # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL) 168 | 169 | # Create main folder if it doesn't exist yet 170 | if not os.path.exists(dataDir): 171 | os.makedirs(dataDir) 172 | 173 | # Download images if not available locally 174 | if not os.path.exists(imgDir): 175 | os.makedirs(imgDir) 176 | print("Downloading images to " + imgZipFile + " ...") 177 | with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out: 178 | shutil.copyfileobj(resp, out) 179 | print("... done downloading.") 180 | print("Unzipping " + imgZipFile) 181 | with zipfile.ZipFile(imgZipFile, "r") as zip_ref: 182 | zip_ref.extractall(dataDir) 183 | print("... done unzipping") 184 | print("Will use images in " + imgDir) 185 | 186 | # Setup annotations data paths 187 | annDir = "{}/annotations".format(dataDir) 188 | if dataType == "minival": 189 | annZipFile = "{}/instances_minival2014.json.zip".format(dataDir) 190 | annFile = "{}/instances_minival2014.json".format(annDir) 191 | annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0" 192 | unZipDir = annDir 193 | elif dataType == "valminusminival": 194 | annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir) 195 | annFile = "{}/instances_valminusminival2014.json".format(annDir) 196 | annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0" 197 | unZipDir = annDir 198 | else: 199 | annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear) 200 | annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear) 201 | annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear) 202 | unZipDir = dataDir 203 | # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL) 204 | 205 | # Download annotations if not available locally 206 | if not os.path.exists(annDir): 207 | os.makedirs(annDir) 208 | if not os.path.exists(annFile): 209 | if not os.path.exists(annZipFile): 210 | print("Downloading zipped annotations to " + annZipFile + " ...") 211 | with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out: 212 | shutil.copyfileobj(resp, out) 213 | print("... done downloading.") 214 | print("Unzipping " + annZipFile) 215 | with zipfile.ZipFile(annZipFile, "r") as zip_ref: 216 | zip_ref.extractall(unZipDir) 217 | print("... done unzipping") 218 | print("Will use annotations in " + annFile) 219 | 220 | def load_mask(self, image_id): 221 | """Load instance masks for the given image. 222 | 223 | Different datasets use different ways to store masks. This 224 | function converts the different mask format to one format 225 | in the form of a bitmap [height, width, instances]. 226 | 227 | Returns: 228 | masks: A bool array of shape [height, width, instance count] with 229 | one mask per instance. 230 | class_ids: a 1D array of class IDs of the instance masks. 231 | """ 232 | # If not a COCO image, delegate to parent class. 233 | image_info = self.image_info[image_id] 234 | if image_info["source"] != "coco": 235 | return super(CocoDataset, self).load_mask(image_id) 236 | 237 | instance_masks = [] 238 | class_ids = [] 239 | annotations = self.image_info[image_id]["annotations"] 240 | # Build mask of shape [height, width, instance_count] and list 241 | # of class IDs that correspond to each channel of the mask. 242 | for annotation in annotations: 243 | class_id = self.map_source_class_id( 244 | "coco.{}".format(annotation['category_id'])) 245 | if class_id: 246 | m = self.annToMask(annotation, image_info["height"], 247 | image_info["width"]) 248 | # Some objects are so small that they're less than 1 pixel area 249 | # and end up rounded out. Skip those objects. 250 | if m.max() < 1: 251 | continue 252 | # Is it a crowd? If so, use a negative class ID. 253 | if annotation['iscrowd']: 254 | # Use negative class ID for crowds 255 | class_id *= -1 256 | # For crowd masks, annToMask() sometimes returns a mask 257 | # smaller than the given dimensions. If so, resize it. 258 | if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]: 259 | m = np.ones([image_info["height"], image_info["width"]], dtype=bool) 260 | instance_masks.append(m) 261 | class_ids.append(class_id) 262 | 263 | # Pack instance masks into an array 264 | if class_ids: 265 | mask = np.stack(instance_masks, axis=2).astype(np.bool) 266 | class_ids = np.array(class_ids, dtype=np.int32) 267 | return mask, class_ids 268 | else: 269 | # Call super class to return an empty mask 270 | return super(CocoDataset, self).load_mask(image_id) 271 | 272 | def image_reference(self, image_id): 273 | """Return a link to the image in the COCO Website.""" 274 | info = self.image_info[image_id] 275 | if info["source"] == "coco": 276 | return "http://cocodataset.org/#explore?id={}".format(info["id"]) 277 | else: 278 | super(CocoDataset, self).image_reference(image_id) 279 | 280 | # The following two functions are from pycocotools with a few changes. 281 | 282 | def annToRLE(self, ann, height, width): 283 | """ 284 | Convert annotation which can be polygons, uncompressed RLE to RLE. 285 | :return: binary mask (numpy 2D array) 286 | """ 287 | segm = ann['segmentation'] 288 | if isinstance(segm, list): 289 | # polygon -- a single object might consist of multiple parts 290 | # we merge all parts into one mask rle code 291 | rles = maskUtils.frPyObjects(segm, height, width) 292 | rle = maskUtils.merge(rles) 293 | elif isinstance(segm['counts'], list): 294 | # uncompressed RLE 295 | rle = maskUtils.frPyObjects(segm, height, width) 296 | else: 297 | # rle 298 | rle = ann['segmentation'] 299 | return rle 300 | 301 | def annToMask(self, ann, height, width): 302 | """ 303 | Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. 304 | :return: binary mask (numpy 2D array) 305 | """ 306 | rle = self.annToRLE(ann, height, width) 307 | m = maskUtils.decode(rle) 308 | return m 309 | 310 | 311 | ############################################################ 312 | # COCO Evaluation 313 | ############################################################ 314 | 315 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks): 316 | """Arrange resutls to match COCO specs in http://cocodataset.org/#format 317 | """ 318 | # If no results, return an empty list 319 | if rois is None: 320 | return [] 321 | 322 | results = [] 323 | for image_id in image_ids: 324 | # Loop through detections 325 | for i in range(rois.shape[0]): 326 | class_id = class_ids[i] 327 | score = scores[i] 328 | bbox = np.around(rois[i], 1) 329 | mask = masks[:, :, i] 330 | 331 | result = { 332 | "image_id": image_id, 333 | "category_id": dataset.get_source_class_id(class_id, "coco"), 334 | "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]], 335 | "score": score, 336 | "segmentation": maskUtils.encode(np.asfortranarray(mask)) 337 | } 338 | results.append(result) 339 | return results 340 | 341 | 342 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None): 343 | """Runs official COCO evaluation. 344 | dataset: A Dataset object with valiadtion data 345 | eval_type: "bbox" or "segm" for bounding box or segmentation evaluation 346 | limit: if not 0, it's the number of images to use for evaluation 347 | """ 348 | # Pick COCO images from the dataset 349 | image_ids = image_ids or dataset.image_ids 350 | 351 | # Limit to a subset 352 | if limit: 353 | image_ids = image_ids[:limit] 354 | 355 | # Get corresponding COCO image IDs. 356 | coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids] 357 | 358 | t_prediction = 0 359 | t_start = time.time() 360 | 361 | results = [] 362 | for i, image_id in enumerate(image_ids): 363 | # Load image 364 | image = dataset.load_image(image_id) 365 | 366 | # Run detection 367 | t = time.time() 368 | r = model.detect([image], verbose=0)[0] 369 | t_prediction += (time.time() - t) 370 | 371 | # Convert results to COCO format 372 | # Cast masks to uint8 because COCO tools errors out on bool 373 | image_results = build_coco_results(dataset, coco_image_ids[i:i + 1], 374 | r["rois"], r["class_ids"], 375 | r["scores"], 376 | r["masks"].astype(np.uint8)) 377 | results.extend(image_results) 378 | 379 | # Load results. This modifies results with additional attributes. 380 | coco_results = coco.loadRes(results) 381 | 382 | # Evaluate 383 | cocoEval = COCOeval(coco, coco_results, eval_type) 384 | cocoEval.params.imgIds = coco_image_ids 385 | cocoEval.evaluate() 386 | cocoEval.accumulate() 387 | cocoEval.summarize() 388 | 389 | print("Prediction time: {}. Average {}/image".format( 390 | t_prediction, t_prediction / len(image_ids))) 391 | print("Total time: ", time.time() - t_start) 392 | 393 | 394 | ############################################################ 395 | # Training 396 | ############################################################ 397 | 398 | 399 | if __name__ == '__main__': 400 | import argparse 401 | 402 | # Parse command line arguments 403 | parser = argparse.ArgumentParser( 404 | description='Train Mask R-CNN on MS COCO.') 405 | parser.add_argument("command", 406 | metavar="", 407 | help="'train' or 'evaluate' on MS COCO") 408 | parser.add_argument('--dataset', required=True, 409 | metavar="/path/to/coco/", 410 | help='Directory of the MS-COCO dataset') 411 | parser.add_argument('--year', required=False, 412 | default=DEFAULT_DATASET_YEAR, 413 | metavar="", 414 | help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)') 415 | parser.add_argument('--model', required=True, 416 | metavar="/path/to/weights.h5", 417 | help="Path to weights .h5 file or 'coco'") 418 | parser.add_argument('--logs', required=False, 419 | default=DEFAULT_LOGS_DIR, 420 | metavar="/path/to/logs/", 421 | help='Logs and checkpoints directory (default=logs/)') 422 | parser.add_argument('--limit', required=False, 423 | default=500, 424 | metavar="", 425 | help='Images to use for evaluation (default=500)') 426 | parser.add_argument('--download', required=False, 427 | default=False, 428 | metavar="", 429 | help='Automatically download and unzip MS-COCO files (default=False)', 430 | type=bool) 431 | args = parser.parse_args() 432 | print("Command: ", args.command) 433 | print("Model: ", args.model) 434 | print("Dataset: ", args.dataset) 435 | print("Year: ", args.year) 436 | print("Logs: ", args.logs) 437 | print("Auto Download: ", args.download) 438 | 439 | # Configurations 440 | if args.command == "train": 441 | config = CocoConfig() 442 | else: 443 | class InferenceConfig(CocoConfig): 444 | # Set batch size to 1 since we'll be running inference on 445 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 446 | GPU_COUNT = 1 447 | IMAGES_PER_GPU = 1 448 | DETECTION_MIN_CONFIDENCE = 0 449 | config = InferenceConfig() 450 | config.display() 451 | 452 | # Create model 453 | if args.command == "train": 454 | model = modellib.MaskRCNN(mode="training", config=config, 455 | model_dir=args.logs) 456 | else: 457 | model = modellib.MaskRCNN(mode="inference", config=config, 458 | model_dir=args.logs) 459 | 460 | # Select weights file to load 461 | if args.model.lower() == "coco": 462 | model_path = COCO_MODEL_PATH 463 | elif args.model.lower() == "last": 464 | # Find last trained weights 465 | model_path = model.find_last() 466 | elif args.model.lower() == "imagenet": 467 | # Start from ImageNet trained weights 468 | model_path = model.get_imagenet_weights() 469 | else: 470 | model_path = args.model 471 | 472 | # Load weights 473 | print("Loading weights ", model_path) 474 | model.load_weights(model_path, by_name=True) 475 | 476 | # Train or evaluate 477 | if args.command == "train": 478 | # Training dataset. Use the training set and 35K from the 479 | # validation set, as as in the Mask RCNN paper. 480 | dataset_train = CocoDataset() 481 | dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download) 482 | if args.year in '2014': 483 | dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download) 484 | dataset_train.prepare() 485 | 486 | # Validation dataset 487 | dataset_val = CocoDataset() 488 | val_type = "val" if args.year in '2017' else "minival" 489 | dataset_val.load_coco(args.dataset, val_type, year=args.year, auto_download=args.download) 490 | dataset_val.prepare() 491 | 492 | # Image Augmentation 493 | # Right/Left flip 50% of the time 494 | augmentation = imgaug.augmenters.Fliplr(0.5) 495 | 496 | # *** This training schedule is an example. Update to your needs *** 497 | 498 | # Training - Stage 1 499 | print("Training network heads") 500 | model.train(dataset_train, dataset_val, 501 | learning_rate=config.LEARNING_RATE, 502 | epochs=40, 503 | layers='heads', 504 | augmentation=augmentation) 505 | 506 | # Training - Stage 2 507 | # Finetune layers from ResNet stage 4 and up 508 | print("Fine tune Resnet stage 4 and up") 509 | model.train(dataset_train, dataset_val, 510 | learning_rate=config.LEARNING_RATE, 511 | epochs=120, 512 | layers='4+', 513 | augmentation=augmentation) 514 | 515 | # Training - Stage 3 516 | # Fine tune all layers 517 | print("Fine tune all layers") 518 | model.train(dataset_train, dataset_val, 519 | learning_rate=config.LEARNING_RATE / 10, 520 | epochs=160, 521 | layers='all', 522 | augmentation=augmentation) 523 | 524 | elif args.command == "evaluate": 525 | # Validation dataset 526 | dataset_val = CocoDataset() 527 | val_type = "val" if args.year in '2017' else "minival" 528 | coco = dataset_val.load_coco(args.dataset, val_type, year=args.year, return_coco=True, auto_download=args.download) 529 | dataset_val.prepare() 530 | print("Running COCO evaluation on {} images.".format(args.limit)) 531 | evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit)) 532 | else: 533 | print("'{}' is not recognized. " 534 | "Use 'train' or 'evaluate'".format(args.command)) 535 | -------------------------------------------------------------------------------- /demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Mask R-CNN Demo\n", 8 | "\n", 9 | "A quick intro to using the pre-trained model to detect and segment objects." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stderr", 19 | "output_type": "stream", 20 | "text": [ 21 | "Using TensorFlow backend.\n" 22 | ] 23 | } 24 | ], 25 | "source": [ 26 | "import os\n", 27 | "import sys\n", 28 | "import random\n", 29 | "import math\n", 30 | "import numpy as np\n", 31 | "import skimage.io\n", 32 | "import matplotlib\n", 33 | "import matplotlib.pyplot as plt\n", 34 | "\n", 35 | "# Root directory of the project\n", 36 | "ROOT_DIR = os.path.abspath(\"../\")\n", 37 | "\n", 38 | "# Import Mask RCNN\n", 39 | "sys.path.append(ROOT_DIR) # To find local version of the library\n", 40 | "from mrcnn import utils\n", 41 | "import mrcnn.model as modellib\n", 42 | "from mrcnn import visualize\n", 43 | "# Import COCO config\n", 44 | "sys.path.append(os.path.join(ROOT_DIR, \"samples/coco/\")) # To find local version\n", 45 | "import coco\n", 46 | "\n", 47 | "%matplotlib inline \n", 48 | "\n", 49 | "# Directory to save logs and trained model\n", 50 | "MODEL_DIR = os.path.join(ROOT_DIR, \"logs\")\n", 51 | "\n", 52 | "# Local path to trained weights file\n", 53 | "COCO_MODEL_PATH = os.path.join(ROOT_DIR, \"mask_rcnn_coco.h5\")\n", 54 | "# Download COCO trained weights from Releases if needed\n", 55 | "if not os.path.exists(COCO_MODEL_PATH):\n", 56 | " utils.download_trained_weights(COCO_MODEL_PATH)\n", 57 | "\n", 58 | "# Directory of images to run detection on\n", 59 | "IMAGE_DIR = os.path.join(ROOT_DIR, \"images\")" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "## Configurations\n", 67 | "\n", 68 | "We'll be using a model trained on the MS-COCO dataset. The configurations of this model are in the ```CocoConfig``` class in ```coco.py```.\n", 69 | "\n", 70 | "For inferencing, modify the configurations a bit to fit the task. To do so, sub-class the ```CocoConfig``` class and override the attributes you need to change." 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 2, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "ename": "AttributeError", 80 | "evalue": "module 'coco' has no attribute 'CocoConfig'", 81 | "output_type": "error", 82 | "traceback": [ 83 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 84 | "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", 85 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mclass\u001b[0m \u001b[0mInferenceConfig\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcoco\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mCocoConfig\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;31m# Set batch size to 1 since we'll be running inference on\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;31m# one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mGPU_COUNT\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mIMAGES_PER_GPU\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 86 | "\u001b[1;31mAttributeError\u001b[0m: module 'coco' has no attribute 'CocoConfig'" 87 | ] 88 | } 89 | ], 90 | "source": [ 91 | "class InferenceConfig(coco.CocoConfig):\n", 92 | " # Set batch size to 1 since we'll be running inference on\n", 93 | " # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU\n", 94 | " GPU_COUNT = 1\n", 95 | " IMAGES_PER_GPU = 1\n", 96 | "\n", 97 | "config = InferenceConfig()\n", 98 | "config.display()" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "## Create Model and Load Trained Weights" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": { 112 | "scrolled": false 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "# Create model object in inference mode.\n", 117 | "model = modellib.MaskRCNN(mode=\"inference\", model_dir=MODEL_DIR, config=config)\n", 118 | "\n", 119 | "# Load weights trained on MS-COCO\n", 120 | "model.load_weights(COCO_MODEL_PATH, by_name=True)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "## Class Names\n", 128 | "\n", 129 | "The model classifies objects and returns class IDs, which are integer value that identify each class. Some datasets assign integer values to their classes and some don't. For example, in the MS-COCO dataset, the 'person' class is 1 and 'teddy bear' is 88. The IDs are often sequential, but not always. The COCO dataset, for example, has classes associated with class IDs 70 and 72, but not 71.\n", 130 | "\n", 131 | "To improve consistency, and to support training on data from multiple sources at the same time, our ```Dataset``` class assigns it's own sequential integer IDs to each class. For example, if you load the COCO dataset using our ```Dataset``` class, the 'person' class would get class ID = 1 (just like COCO) and the 'teddy bear' class is 78 (different from COCO). Keep that in mind when mapping class IDs to class names.\n", 132 | "\n", 133 | "To get the list of class names, you'd load the dataset and then use the ```class_names``` property like this.\n", 134 | "```\n", 135 | "# Load COCO dataset\n", 136 | "dataset = coco.CocoDataset()\n", 137 | "dataset.load_coco(COCO_DIR, \"train\")\n", 138 | "dataset.prepare()\n", 139 | "\n", 140 | "# Print class names\n", 141 | "print(dataset.class_names)\n", 142 | "```\n", 143 | "\n", 144 | "We don't want to require you to download the COCO dataset just to run this demo, so we're including the list of class names below. The index of the class name in the list represent its ID (first class is 0, second is 1, third is 2, ...etc.)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "# COCO Class names\n", 154 | "# Index of the class in the list is its ID. For example, to get ID of\n", 155 | "# the teddy bear class, use: class_names.index('teddy bear')\n", 156 | "class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',\n", 157 | " 'bus', 'train', 'truck', 'boat', 'traffic light',\n", 158 | " 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',\n", 159 | " 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',\n", 160 | " 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',\n", 161 | " 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',\n", 162 | " 'kite', 'baseball bat', 'baseball glove', 'skateboard',\n", 163 | " 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',\n", 164 | " 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',\n", 165 | " 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',\n", 166 | " 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',\n", 167 | " 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',\n", 168 | " 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',\n", 169 | " 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',\n", 170 | " 'teddy bear', 'hair drier', 'toothbrush']" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "## Run Object Detection" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": { 184 | "scrolled": false 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "# Load a random image from the images folder\n", 189 | "file_names = next(os.walk(IMAGE_DIR))[2]\n", 190 | "image = skimage.io.imread(os.path.join(IMAGE_DIR, random.choice(file_names)))\n", 191 | "\n", 192 | "# Run detection\n", 193 | "results = model.detect([image], verbose=1)\n", 194 | "\n", 195 | "# Visualize results\n", 196 | "r = results[0]\n", 197 | "visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], \n", 198 | " class_names, r['scores'])" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [] 207 | } 208 | ], 209 | "metadata": { 210 | "kernelspec": { 211 | "display_name": "Python 3", 212 | "language": "python", 213 | "name": "python3" 214 | }, 215 | "language_info": { 216 | "codemirror_mode": { 217 | "name": "ipython", 218 | "version": 3 219 | }, 220 | "file_extension": ".py", 221 | "mimetype": "text/x-python", 222 | "name": "python", 223 | "nbconvert_exporter": "python", 224 | "pygments_lexer": "ipython3", 225 | "version": "3.6.8" 226 | } 227 | }, 228 | "nbformat": 4, 229 | "nbformat_minor": 2 230 | } 231 | -------------------------------------------------------------------------------- /images/1045023827_4ec3e8ba5c_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/1045023827_4ec3e8ba5c_z.jpg -------------------------------------------------------------------------------- /images/12283150_12d37e6389_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/12283150_12d37e6389_z.jpg -------------------------------------------------------------------------------- /images/2383514521_1fc8d7b0de_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/2383514521_1fc8d7b0de_z.jpg -------------------------------------------------------------------------------- /images/2502287818_41e4b0c4fb_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/2502287818_41e4b0c4fb_z.jpg -------------------------------------------------------------------------------- /images/2516944023_d00345997d_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/2516944023_d00345997d_z.jpg -------------------------------------------------------------------------------- /images/25691390_f9944f61b5_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/25691390_f9944f61b5_z.jpg -------------------------------------------------------------------------------- /images/262985539_1709e54576_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/262985539_1709e54576_z.jpg -------------------------------------------------------------------------------- /images/3132016470_c27baa00e8_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/3132016470_c27baa00e8_z.jpg -------------------------------------------------------------------------------- /images/3627527276_6fe8cd9bfe_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/3627527276_6fe8cd9bfe_z.jpg -------------------------------------------------------------------------------- /images/3651581213_f81963d1dd_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/3651581213_f81963d1dd_z.jpg -------------------------------------------------------------------------------- /images/3800883468_12af3c0b50_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/3800883468_12af3c0b50_z.jpg -------------------------------------------------------------------------------- /images/3862500489_6fd195d183_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/3862500489_6fd195d183_z.jpg -------------------------------------------------------------------------------- /images/3878153025_8fde829928_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/3878153025_8fde829928_z.jpg -------------------------------------------------------------------------------- /images/4410436637_7b0ca36ee7_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/4410436637_7b0ca36ee7_z.jpg -------------------------------------------------------------------------------- /images/4782628554_668bc31826_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/4782628554_668bc31826_z.jpg -------------------------------------------------------------------------------- /images/5951960966_d4e1cda5d0_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/5951960966_d4e1cda5d0_z.jpg -------------------------------------------------------------------------------- /images/6584515005_fce9cec486_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/6584515005_fce9cec486_z.jpg -------------------------------------------------------------------------------- /images/6821351586_59aa0dc110_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/6821351586_59aa0dc110_z.jpg -------------------------------------------------------------------------------- /images/7581246086_cf7bbb7255_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/7581246086_cf7bbb7255_z.jpg -------------------------------------------------------------------------------- /images/7933423348_c30bd9bd4e_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/7933423348_c30bd9bd4e_z.jpg -------------------------------------------------------------------------------- /images/8053677163_d4c8f416be_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/8053677163_d4c8f416be_z.jpg -------------------------------------------------------------------------------- /images/8239308689_efa6c11b08_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/8239308689_efa6c11b08_z.jpg -------------------------------------------------------------------------------- /images/8433365521_9252889f9a_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/8433365521_9252889f9a_z.jpg -------------------------------------------------------------------------------- /images/8512296263_5fc5458e20_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/8512296263_5fc5458e20_z.jpg -------------------------------------------------------------------------------- /images/8699757338_c3941051b6_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/8699757338_c3941051b6_z.jpg -------------------------------------------------------------------------------- /images/8734543718_37f6b8bd45_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/8734543718_37f6b8bd45_z.jpg -------------------------------------------------------------------------------- /images/8829708882_48f263491e_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/8829708882_48f263491e_z.jpg -------------------------------------------------------------------------------- /images/9118579087_f9ffa19e63_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/9118579087_f9ffa19e63_z.jpg -------------------------------------------------------------------------------- /images/9247489789_132c0d534a_z.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/9247489789_132c0d534a_z.jpg -------------------------------------------------------------------------------- /mrcnn/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /mrcnn/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Base Configurations class. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import numpy as np 11 | 12 | 13 | # Base Configuration Class 14 | # Don't use this class directly. Instead, sub-class it and override 15 | # the configurations you need to change. 16 | 17 | class Config(object): 18 | """Base configuration class. For custom configurations, create a 19 | sub-class that inherits from this one and override properties 20 | that need to be changed. 21 | """ 22 | # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. 23 | # Useful if your code needs to do things differently depending on which 24 | # experiment is running. 25 | NAME = None # Override in sub-classes 26 | 27 | # NUMBER OF GPUs to use. When using only a CPU, this needs to be set to 1. 28 | GPU_COUNT = 1 29 | 30 | # Number of images to train with on each GPU. A 12GB GPU can typically 31 | # handle 2 images of 1024x1024px. 32 | # Adjust based on your GPU memory and image sizes. Use the highest 33 | # number that your GPU can handle for best performance. 34 | IMAGES_PER_GPU = 2 35 | 36 | # Number of training steps per epoch 37 | # This doesn't need to match the size of the training set. Tensorboard 38 | # updates are saved at the end of each epoch, so setting this to a 39 | # smaller number means getting more frequent TensorBoard updates. 40 | # Validation stats are also calculated at each epoch end and they 41 | # might take a while, so don't set this too small to avoid spending 42 | # a lot of time on validation stats. 43 | STEPS_PER_EPOCH = 1000 44 | 45 | # Number of validation steps to run at the end of every training epoch. 46 | # A bigger number improves accuracy of validation stats, but slows 47 | # down the training. 48 | VALIDATION_STEPS = 50 49 | 50 | # Backbone network architecture 51 | # Supported values are: resnet50, resnet101. 52 | # You can also provide a callable that should have the signature 53 | # of model.resnet_graph. If you do so, you need to supply a callable 54 | # to COMPUTE_BACKBONE_SHAPE as well 55 | BACKBONE = "resnet101" 56 | 57 | # Only useful if you supply a callable to BACKBONE. Should compute 58 | # the shape of each layer of the FPN Pyramid. 59 | # See model.compute_backbone_shapes 60 | COMPUTE_BACKBONE_SHAPE = None 61 | 62 | # The strides of each layer of the FPN Pyramid. These values 63 | # are based on a Resnet101 backbone. 64 | BACKBONE_STRIDES = [4, 8, 16, 32, 64] 65 | 66 | # Size of the fully-connected layers in the classification graph 67 | FPN_CLASSIF_FC_LAYERS_SIZE = 1024 68 | 69 | # Size of the top-down layers used to build the feature pyramid 70 | TOP_DOWN_PYRAMID_SIZE = 256 71 | 72 | # Number of classification classes (including background) 73 | NUM_CLASSES = 1 # Override in sub-classes 74 | 75 | # Length of square anchor side in pixels 76 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) 77 | 78 | # Ratios of anchors at each cell (width/height) 79 | # A value of 1 represents a square anchor, and 0.5 is a wide anchor 80 | RPN_ANCHOR_RATIOS = [0.5, 1, 2] 81 | 82 | # Anchor stride 83 | # If 1 then anchors are created for each cell in the backbone feature map. 84 | # If 2, then anchors are created for every other cell, and so on. 85 | RPN_ANCHOR_STRIDE = 1 86 | 87 | # Non-max suppression threshold to filter RPN proposals. 88 | # You can increase this during training to generate more propsals. 89 | RPN_NMS_THRESHOLD = 0.7 90 | 91 | # How many anchors per image to use for RPN training 92 | RPN_TRAIN_ANCHORS_PER_IMAGE = 256 93 | 94 | # ROIs kept after tf.nn.top_k and before non-maximum suppression 95 | PRE_NMS_LIMIT = 6000 96 | 97 | # ROIs kept after non-maximum suppression (training and inference) 98 | POST_NMS_ROIS_TRAINING = 2000 99 | POST_NMS_ROIS_INFERENCE = 1000 100 | 101 | # If enabled, resizes instance masks to a smaller size to reduce 102 | # memory load. Recommended when using high-resolution images. 103 | USE_MINI_MASK = True 104 | MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask 105 | 106 | # Input image resizing 107 | # Generally, use the "square" resizing mode for training and predicting 108 | # and it should work well in most cases. In this mode, images are scaled 109 | # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the 110 | # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is 111 | # padded with zeros to make it a square so multiple images can be put 112 | # in one batch. 113 | # Available resizing modes: 114 | # none: No resizing or padding. Return the image unchanged. 115 | # square: Resize and pad with zeros to get a square image 116 | # of size [max_dim, max_dim]. 117 | # pad64: Pads width and height with zeros to make them multiples of 64. 118 | # If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales 119 | # up before padding. IMAGE_MAX_DIM is ignored in this mode. 120 | # The multiple of 64 is needed to ensure smooth scaling of feature 121 | # maps up and down the 6 levels of the FPN pyramid (2**6=64). 122 | # crop: Picks random crops from the image. First, scales the image based 123 | # on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of 124 | # size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only. 125 | # IMAGE_MAX_DIM is not used in this mode. 126 | IMAGE_RESIZE_MODE = "square" 127 | IMAGE_MIN_DIM = 800 128 | IMAGE_MAX_DIM = 1024 129 | # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further 130 | # up scaling. For example, if set to 2 then images are scaled up to double 131 | # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it. 132 | # However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM. 133 | IMAGE_MIN_SCALE = 0 134 | # Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4 135 | # Changing this requires other changes in the code. See the WIKI for more 136 | # details: https://github.com/matterport/Mask_RCNN/wiki 137 | IMAGE_CHANNEL_COUNT = 3 138 | 139 | # Image mean (RGB) 140 | MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) 141 | 142 | # Number of ROIs per image to feed to classifier/mask heads 143 | # The Mask RCNN paper uses 512 but often the RPN doesn't generate 144 | # enough positive proposals to fill this and keep a positive:negative 145 | # ratio of 1:3. You can increase the number of proposals by adjusting 146 | # the RPN NMS threshold. 147 | TRAIN_ROIS_PER_IMAGE = 200 148 | 149 | # Percent of positive ROIs used to train classifier/mask heads 150 | ROI_POSITIVE_RATIO = 0.33 151 | 152 | # Pooled ROIs 153 | POOL_SIZE = 7 154 | MASK_POOL_SIZE = 14 155 | 156 | # Shape of output mask 157 | # To change this you also need to change the neural network mask branch 158 | MASK_SHAPE = [28, 28] 159 | 160 | # Maximum number of ground truth instances to use in one image 161 | MAX_GT_INSTANCES = 100 162 | 163 | # Bounding box refinement standard deviation for RPN and final detections. 164 | RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 165 | BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 166 | 167 | # Max number of final detections 168 | DETECTION_MAX_INSTANCES = 100 169 | 170 | # Minimum probability value to accept a detected instance 171 | # ROIs below this threshold are skipped 172 | DETECTION_MIN_CONFIDENCE = 0.7 173 | 174 | # Non-maximum suppression threshold for detection 175 | DETECTION_NMS_THRESHOLD = 0.3 176 | 177 | # Learning rate and momentum 178 | # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes 179 | # weights to explode. Likely due to differences in optimizer 180 | # implementation. 181 | LEARNING_RATE = 0.001 182 | LEARNING_MOMENTUM = 0.9 183 | 184 | # Weight decay regularization 185 | WEIGHT_DECAY = 0.0001 186 | 187 | # Loss weights for more precise optimization. 188 | # Can be used for R-CNN training setup. 189 | LOSS_WEIGHTS = { 190 | "rpn_class_loss": 1., 191 | "rpn_bbox_loss": 1., 192 | "mrcnn_class_loss": 1., 193 | "mrcnn_bbox_loss": 1., 194 | "mrcnn_mask_loss": 1. 195 | } 196 | 197 | # Use RPN ROIs or externally generated ROIs for training 198 | # Keep this True for most situations. Set to False if you want to train 199 | # the head branches on ROI generated by code rather than the ROIs from 200 | # the RPN. For example, to debug the classifier head without having to 201 | # train the RPN. 202 | USE_RPN_ROIS = True 203 | 204 | # Train or freeze batch normalization layers 205 | # None: Train BN layers. This is the normal mode 206 | # False: Freeze BN layers. Good when using a small batch size 207 | # True: (don't use). Set layer in training mode even when predicting 208 | TRAIN_BN = False # Defaulting to False since batch size is often small 209 | 210 | # Gradient norm clipping 211 | GRADIENT_CLIP_NORM = 5.0 212 | 213 | def __init__(self): 214 | """Set values of computed attributes.""" 215 | # Effective batch size 216 | self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT 217 | 218 | # Input image size 219 | if self.IMAGE_RESIZE_MODE == "crop": 220 | self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM, 221 | self.IMAGE_CHANNEL_COUNT]) 222 | else: 223 | self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 224 | self.IMAGE_CHANNEL_COUNT]) 225 | 226 | # Image meta data length 227 | # See compose_image_meta() for details 228 | self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES 229 | 230 | def display(self): 231 | """Display Configuration values.""" 232 | print("\nConfigurations:") 233 | for a in dir(self): 234 | if not a.startswith("__") and not callable(getattr(self, a)): 235 | print("{:30} {}".format(a, getattr(self, a))) 236 | print("\n") 237 | -------------------------------------------------------------------------------- /mrcnn/parallel_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Multi-GPU Support for Keras. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | Ideas and a small code snippets from these sources: 10 | https://github.com/fchollet/keras/issues/2436 11 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012 12 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/ 13 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py 14 | """ 15 | 16 | import tensorflow as tf 17 | import keras.backend as K 18 | import keras.layers as KL 19 | import keras.models as KM 20 | 21 | 22 | class ParallelModel(KM.Model): 23 | """Subclasses the standard Keras Model and adds multi-GPU support. 24 | It works by creating a copy of the model on each GPU. Then it slices 25 | the inputs and sends a slice to each copy of the model, and then 26 | merges the outputs together and applies the loss on the combined 27 | outputs. 28 | """ 29 | 30 | def __init__(self, keras_model, gpu_count): 31 | """Class constructor. 32 | keras_model: The Keras model to parallelize 33 | gpu_count: Number of GPUs. Must be > 1 34 | """ 35 | self.inner_model = keras_model 36 | self.gpu_count = gpu_count 37 | merged_outputs = self.make_parallel() 38 | super(ParallelModel, self).__init__(inputs=self.inner_model.inputs, 39 | outputs=merged_outputs) 40 | 41 | def __getattribute__(self, attrname): 42 | """Redirect loading and saving methods to the inner model. That's where 43 | the weights are stored.""" 44 | if 'load' in attrname or 'save' in attrname: 45 | return getattr(self.inner_model, attrname) 46 | return super(ParallelModel, self).__getattribute__(attrname) 47 | 48 | def summary(self, *args, **kwargs): 49 | """Override summary() to display summaries of both, the wrapper 50 | and inner models.""" 51 | super(ParallelModel, self).summary(*args, **kwargs) 52 | self.inner_model.summary(*args, **kwargs) 53 | 54 | def make_parallel(self): 55 | """Creates a new wrapper model that consists of multiple replicas of 56 | the original model placed on different GPUs. 57 | """ 58 | # Slice inputs. Slice inputs on the CPU to avoid sending a copy 59 | # of the full inputs to all GPUs. Saves on bandwidth and memory. 60 | input_slices = {name: tf.split(x, self.gpu_count) 61 | for name, x in zip(self.inner_model.input_names, 62 | self.inner_model.inputs)} 63 | 64 | output_names = self.inner_model.output_names 65 | outputs_all = [] 66 | for i in range(len(self.inner_model.outputs)): 67 | outputs_all.append([]) 68 | 69 | # Run the model call() on each GPU to place the ops there 70 | for i in range(self.gpu_count): 71 | with tf.device('/gpu:%d' % i): 72 | with tf.name_scope('tower_%d' % i): 73 | # Run a slice of inputs through this replica 74 | zipped_inputs = zip(self.inner_model.input_names, 75 | self.inner_model.inputs) 76 | inputs = [ 77 | KL.Lambda(lambda s: input_slices[name][i], 78 | output_shape=lambda s: (None,) + s[1:])(tensor) 79 | for name, tensor in zipped_inputs] 80 | # Create the model replica and get the outputs 81 | outputs = self.inner_model(inputs) 82 | if not isinstance(outputs, list): 83 | outputs = [outputs] 84 | # Save the outputs for merging back together later 85 | for l, o in enumerate(outputs): 86 | outputs_all[l].append(o) 87 | 88 | # Merge outputs on CPU 89 | with tf.device('/cpu:0'): 90 | merged = [] 91 | for outputs, name in zip(outputs_all, output_names): 92 | # Concatenate or average outputs? 93 | # Outputs usually have a batch dimension and we concatenate 94 | # across it. If they don't, then the output is likely a loss 95 | # or a metric value that gets averaged across the batch. 96 | # Keras expects losses and metrics to be scalars. 97 | if K.int_shape(outputs[0]) == (): 98 | # Average 99 | m = KL.Lambda(lambda o: tf.add_n(o) / len(outputs), name=name)(outputs) 100 | else: 101 | # Concatenate 102 | m = KL.Concatenate(axis=0, name=name)(outputs) 103 | merged.append(m) 104 | return merged 105 | 106 | 107 | if __name__ == "__main__": 108 | # Testing code below. It creates a simple model to train on MNIST and 109 | # tries to run it on 2 GPUs. It saves the graph so it can be viewed 110 | # in TensorBoard. Run it as: 111 | # 112 | # python3 parallel_model.py 113 | 114 | import os 115 | import numpy as np 116 | import keras.optimizers 117 | from keras.datasets import mnist 118 | from keras.preprocessing.image import ImageDataGenerator 119 | 120 | GPU_COUNT = 2 121 | 122 | # Root directory of the project 123 | ROOT_DIR = os.path.abspath("../") 124 | 125 | # Directory to save logs and trained model 126 | MODEL_DIR = os.path.join(ROOT_DIR, "logs") 127 | 128 | def build_model(x_train, num_classes): 129 | # Reset default graph. Keras leaves old ops in the graph, 130 | # which are ignored for execution but clutter graph 131 | # visualization in TensorBoard. 132 | tf.reset_default_graph() 133 | 134 | inputs = KL.Input(shape=x_train.shape[1:], name="input_image") 135 | x = KL.Conv2D(32, (3, 3), activation='relu', padding="same", 136 | name="conv1")(inputs) 137 | x = KL.Conv2D(64, (3, 3), activation='relu', padding="same", 138 | name="conv2")(x) 139 | x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x) 140 | x = KL.Flatten(name="flat1")(x) 141 | x = KL.Dense(128, activation='relu', name="dense1")(x) 142 | x = KL.Dense(num_classes, activation='softmax', name="dense2")(x) 143 | 144 | return KM.Model(inputs, x, "digit_classifier_model") 145 | 146 | # Load MNIST Data 147 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 148 | x_train = np.expand_dims(x_train, -1).astype('float32') / 255 149 | x_test = np.expand_dims(x_test, -1).astype('float32') / 255 150 | 151 | print('x_train shape:', x_train.shape) 152 | print('x_test shape:', x_test.shape) 153 | 154 | # Build data generator and model 155 | datagen = ImageDataGenerator() 156 | model = build_model(x_train, 10) 157 | 158 | # Add multi-GPU support. 159 | model = ParallelModel(model, GPU_COUNT) 160 | 161 | optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0) 162 | 163 | model.compile(loss='sparse_categorical_crossentropy', 164 | optimizer=optimizer, metrics=['accuracy']) 165 | 166 | model.summary() 167 | 168 | # Train 169 | model.fit_generator( 170 | datagen.flow(x_train, y_train, batch_size=64), 171 | steps_per_epoch=50, epochs=10, verbose=1, 172 | validation_data=(x_test, y_test), 173 | callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR, 174 | write_graph=True)] 175 | ) 176 | -------------------------------------------------------------------------------- /mrcnn/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Common utility functions and classes. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import sys 11 | import os 12 | import logging 13 | import math 14 | import random 15 | import numpy as np 16 | import tensorflow as tf 17 | import scipy 18 | import skimage.color 19 | import skimage.io 20 | import skimage.transform 21 | import urllib.request 22 | import shutil 23 | import warnings 24 | from distutils.version import LooseVersion 25 | 26 | # URL from which to download the latest COCO trained weights 27 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5" 28 | 29 | 30 | ############################################################ 31 | # Bounding Boxes 32 | ############################################################ 33 | 34 | def extract_bboxes(mask): 35 | """Compute bounding boxes from masks. 36 | mask: [height, width, num_instances]. Mask pixels are either 1 or 0. 37 | 38 | Returns: bbox array [num_instances, (y1, x1, y2, x2)]. 39 | """ 40 | boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32) 41 | for i in range(mask.shape[-1]): 42 | m = mask[:, :, i] 43 | # Bounding box. 44 | horizontal_indicies = np.where(np.any(m, axis=0))[0] 45 | vertical_indicies = np.where(np.any(m, axis=1))[0] 46 | if horizontal_indicies.shape[0]: 47 | x1, x2 = horizontal_indicies[[0, -1]] 48 | y1, y2 = vertical_indicies[[0, -1]] 49 | # x2 and y2 should not be part of the box. Increment by 1. 50 | x2 += 1 51 | y2 += 1 52 | else: 53 | # No mask for this instance. Might happen due to 54 | # resizing or cropping. Set bbox to zeros 55 | x1, x2, y1, y2 = 0, 0, 0, 0 56 | boxes[i] = np.array([y1, x1, y2, x2]) 57 | return boxes.astype(np.int32) 58 | 59 | 60 | def compute_iou(box, boxes, box_area, boxes_area): 61 | """Calculates IoU of the given box with the array of the given boxes. 62 | box: 1D vector [y1, x1, y2, x2] 63 | boxes: [boxes_count, (y1, x1, y2, x2)] 64 | box_area: float. the area of 'box' 65 | boxes_area: array of length boxes_count. 66 | 67 | Note: the areas are passed in rather than calculated here for 68 | efficiency. Calculate once in the caller to avoid duplicate work. 69 | """ 70 | # Calculate intersection areas 71 | y1 = np.maximum(box[0], boxes[:, 0]) 72 | y2 = np.minimum(box[2], boxes[:, 2]) 73 | x1 = np.maximum(box[1], boxes[:, 1]) 74 | x2 = np.minimum(box[3], boxes[:, 3]) 75 | intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) 76 | union = box_area + boxes_area[:] - intersection[:] 77 | iou = intersection / union 78 | return iou 79 | 80 | 81 | def compute_overlaps(boxes1, boxes2): 82 | """Computes IoU overlaps between two sets of boxes. 83 | boxes1, boxes2: [N, (y1, x1, y2, x2)]. 84 | 85 | For better performance, pass the largest set first and the smaller second. 86 | """ 87 | # Areas of anchors and GT boxes 88 | area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) 89 | area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) 90 | 91 | # Compute overlaps to generate matrix [boxes1 count, boxes2 count] 92 | # Each cell contains the IoU value. 93 | overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0])) 94 | for i in range(overlaps.shape[1]): 95 | box2 = boxes2[i] 96 | overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1) 97 | return overlaps 98 | 99 | 100 | def compute_overlaps_masks(masks1, masks2): 101 | """Computes IoU overlaps between two sets of masks. 102 | masks1, masks2: [Height, Width, instances] 103 | """ 104 | 105 | # If either set of masks is empty return empty result 106 | if masks1.shape[-1] == 0 or masks2.shape[-1] == 0: 107 | return np.zeros((masks1.shape[-1], masks2.shape[-1])) 108 | # flatten masks and compute their areas 109 | masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32) 110 | masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32) 111 | area1 = np.sum(masks1, axis=0) 112 | area2 = np.sum(masks2, axis=0) 113 | 114 | # intersections and union 115 | intersections = np.dot(masks1.T, masks2) 116 | union = area1[:, None] + area2[None, :] - intersections 117 | overlaps = intersections / union 118 | 119 | return overlaps 120 | 121 | 122 | def non_max_suppression(boxes, scores, threshold): 123 | """Performs non-maximum suppression and returns indices of kept boxes. 124 | boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. 125 | scores: 1-D array of box scores. 126 | threshold: Float. IoU threshold to use for filtering. 127 | """ 128 | assert boxes.shape[0] > 0 129 | if boxes.dtype.kind != "f": 130 | boxes = boxes.astype(np.float32) 131 | 132 | # Compute box areas 133 | y1 = boxes[:, 0] 134 | x1 = boxes[:, 1] 135 | y2 = boxes[:, 2] 136 | x2 = boxes[:, 3] 137 | area = (y2 - y1) * (x2 - x1) 138 | 139 | # Get indicies of boxes sorted by scores (highest first) 140 | ixs = scores.argsort()[::-1] 141 | 142 | pick = [] 143 | while len(ixs) > 0: 144 | # Pick top box and add its index to the list 145 | i = ixs[0] 146 | pick.append(i) 147 | # Compute IoU of the picked box with the rest 148 | iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) 149 | # Identify boxes with IoU over the threshold. This 150 | # returns indices into ixs[1:], so add 1 to get 151 | # indices into ixs. 152 | remove_ixs = np.where(iou > threshold)[0] + 1 153 | # Remove indices of the picked and overlapped boxes. 154 | ixs = np.delete(ixs, remove_ixs) 155 | ixs = np.delete(ixs, 0) 156 | return np.array(pick, dtype=np.int32) 157 | 158 | 159 | def apply_box_deltas(boxes, deltas): 160 | """Applies the given deltas to the given boxes. 161 | boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box. 162 | deltas: [N, (dy, dx, log(dh), log(dw))] 163 | """ 164 | boxes = boxes.astype(np.float32) 165 | # Convert to y, x, h, w 166 | height = boxes[:, 2] - boxes[:, 0] 167 | width = boxes[:, 3] - boxes[:, 1] 168 | center_y = boxes[:, 0] + 0.5 * height 169 | center_x = boxes[:, 1] + 0.5 * width 170 | # Apply deltas 171 | center_y += deltas[:, 0] * height 172 | center_x += deltas[:, 1] * width 173 | height *= np.exp(deltas[:, 2]) 174 | width *= np.exp(deltas[:, 3]) 175 | # Convert back to y1, x1, y2, x2 176 | y1 = center_y - 0.5 * height 177 | x1 = center_x - 0.5 * width 178 | y2 = y1 + height 179 | x2 = x1 + width 180 | return np.stack([y1, x1, y2, x2], axis=1) 181 | 182 | 183 | def box_refinement_graph(box, gt_box): 184 | """Compute refinement needed to transform box to gt_box. 185 | box and gt_box are [N, (y1, x1, y2, x2)] 186 | """ 187 | box = tf.cast(box, tf.float32) 188 | gt_box = tf.cast(gt_box, tf.float32) 189 | 190 | height = box[:, 2] - box[:, 0] 191 | width = box[:, 3] - box[:, 1] 192 | center_y = box[:, 0] + 0.5 * height 193 | center_x = box[:, 1] + 0.5 * width 194 | 195 | gt_height = gt_box[:, 2] - gt_box[:, 0] 196 | gt_width = gt_box[:, 3] - gt_box[:, 1] 197 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 198 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 199 | 200 | dy = (gt_center_y - center_y) / height 201 | dx = (gt_center_x - center_x) / width 202 | dh = tf.log(gt_height / height) 203 | dw = tf.log(gt_width / width) 204 | 205 | result = tf.stack([dy, dx, dh, dw], axis=1) 206 | return result 207 | 208 | 209 | def box_refinement(box, gt_box): 210 | """Compute refinement needed to transform box to gt_box. 211 | box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is 212 | assumed to be outside the box. 213 | """ 214 | box = box.astype(np.float32) 215 | gt_box = gt_box.astype(np.float32) 216 | 217 | height = box[:, 2] - box[:, 0] 218 | width = box[:, 3] - box[:, 1] 219 | center_y = box[:, 0] + 0.5 * height 220 | center_x = box[:, 1] + 0.5 * width 221 | 222 | gt_height = gt_box[:, 2] - gt_box[:, 0] 223 | gt_width = gt_box[:, 3] - gt_box[:, 1] 224 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 225 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 226 | 227 | dy = (gt_center_y - center_y) / height 228 | dx = (gt_center_x - center_x) / width 229 | dh = np.log(gt_height / height) 230 | dw = np.log(gt_width / width) 231 | 232 | return np.stack([dy, dx, dh, dw], axis=1) 233 | 234 | 235 | ############################################################ 236 | # Dataset 237 | ############################################################ 238 | 239 | class Dataset(object): 240 | """The base class for dataset classes. 241 | To use it, create a new class that adds functions specific to the dataset 242 | you want to use. For example: 243 | 244 | class CatsAndDogsDataset(Dataset): 245 | def load_cats_and_dogs(self): 246 | ... 247 | def load_mask(self, image_id): 248 | ... 249 | def image_reference(self, image_id): 250 | ... 251 | 252 | See COCODataset and ShapesDataset as examples. 253 | """ 254 | 255 | def __init__(self, class_map=None): 256 | self._image_ids = [] 257 | self.image_info = [] 258 | # Background is always the first class 259 | self.class_info = [{"source": "", "id": 0, "name": "BG"}] 260 | self.source_class_ids = {} 261 | 262 | def add_class(self, source, class_id, class_name): 263 | assert "." not in source, "Source name cannot contain a dot" 264 | # Does the class exist already? 265 | for info in self.class_info: 266 | if info['source'] == source and info["id"] == class_id: 267 | # source.class_id combination already available, skip 268 | return 269 | # Add the class 270 | self.class_info.append({ 271 | "source": source, 272 | "id": class_id, 273 | "name": class_name, 274 | }) 275 | 276 | def add_image(self, source, image_id, path, **kwargs): 277 | image_info = { 278 | "id": image_id, 279 | "source": source, 280 | "path": path, 281 | } 282 | image_info.update(kwargs) 283 | self.image_info.append(image_info) 284 | 285 | def image_reference(self, image_id): 286 | """Return a link to the image in its source Website or details about 287 | the image that help looking it up or debugging it. 288 | 289 | Override for your dataset, but pass to this function 290 | if you encounter images not in your dataset. 291 | """ 292 | return "" 293 | 294 | def prepare(self, class_map=None): 295 | """Prepares the Dataset class for use. 296 | 297 | TODO: class map is not supported yet. When done, it should handle mapping 298 | classes from different datasets to the same class ID. 299 | """ 300 | 301 | def clean_name(name): 302 | """Returns a shorter version of object names for cleaner display.""" 303 | return ",".join(name.split(",")[:1]) 304 | 305 | # Build (or rebuild) everything else from the info dicts. 306 | self.num_classes = len(self.class_info) 307 | self.class_ids = np.arange(self.num_classes) 308 | self.class_names = [clean_name(c["name"]) for c in self.class_info] 309 | self.num_images = len(self.image_info) 310 | self._image_ids = np.arange(self.num_images) 311 | 312 | # Mapping from source class and image IDs to internal IDs 313 | self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id 314 | for info, id in zip(self.class_info, self.class_ids)} 315 | self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id 316 | for info, id in zip(self.image_info, self.image_ids)} 317 | 318 | # Map sources to class_ids they support 319 | self.sources = list(set([i['source'] for i in self.class_info])) 320 | self.source_class_ids = {} 321 | # Loop over datasets 322 | for source in self.sources: 323 | self.source_class_ids[source] = [] 324 | # Find classes that belong to this dataset 325 | for i, info in enumerate(self.class_info): 326 | # Include BG class in all datasets 327 | if i == 0 or source == info['source']: 328 | self.source_class_ids[source].append(i) 329 | 330 | def map_source_class_id(self, source_class_id): 331 | """Takes a source class ID and returns the int class ID assigned to it. 332 | 333 | For example: 334 | dataset.map_source_class_id("coco.12") -> 23 335 | """ 336 | return self.class_from_source_map[source_class_id] 337 | 338 | def get_source_class_id(self, class_id, source): 339 | """Map an internal class ID to the corresponding class ID in the source dataset.""" 340 | info = self.class_info[class_id] 341 | assert info['source'] == source 342 | return info['id'] 343 | 344 | @property 345 | def image_ids(self): 346 | return self._image_ids 347 | 348 | def source_image_link(self, image_id): 349 | """Returns the path or URL to the image. 350 | Override this to return a URL to the image if it's available online for easy 351 | debugging. 352 | """ 353 | return self.image_info[image_id]["path"] 354 | 355 | def load_image(self, image_id): 356 | """Load the specified image and return a [H,W,3] Numpy array. 357 | """ 358 | # Load image 359 | image = skimage.io.imread(self.image_info[image_id]['path']) 360 | # If grayscale. Convert to RGB for consistency. 361 | if image.ndim != 3: 362 | image = skimage.color.gray2rgb(image) 363 | # If has an alpha channel, remove it for consistency 364 | if image.shape[-1] == 4: 365 | image = image[..., :3] 366 | return image 367 | 368 | def load_mask(self, image_id): 369 | """Load instance masks for the given image. 370 | 371 | Different datasets use different ways to store masks. Override this 372 | method to load instance masks and return them in the form of am 373 | array of binary masks of shape [height, width, instances]. 374 | 375 | Returns: 376 | masks: A bool array of shape [height, width, instance count] with 377 | a binary mask per instance. 378 | class_ids: a 1D array of class IDs of the instance masks. 379 | """ 380 | # Override this function to load a mask from your dataset. 381 | # Otherwise, it returns an empty mask. 382 | logging.warning("You are using the default load_mask(), maybe you need to define your own one.") 383 | mask = np.empty([0, 0, 0]) 384 | class_ids = np.empty([0], np.int32) 385 | return mask, class_ids 386 | 387 | 388 | def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"): 389 | """Resizes an image keeping the aspect ratio unchanged. 390 | 391 | min_dim: if provided, resizes the image such that it's smaller 392 | dimension == min_dim 393 | max_dim: if provided, ensures that the image longest side doesn't 394 | exceed this value. 395 | min_scale: if provided, ensure that the image is scaled up by at least 396 | this percent even if min_dim doesn't require it. 397 | mode: Resizing mode. 398 | none: No resizing. Return the image unchanged. 399 | square: Resize and pad with zeros to get a square image 400 | of size [max_dim, max_dim]. 401 | pad64: Pads width and height with zeros to make them multiples of 64. 402 | If min_dim or min_scale are provided, it scales the image up 403 | before padding. max_dim is ignored in this mode. 404 | The multiple of 64 is needed to ensure smooth scaling of feature 405 | maps up and down the 6 levels of the FPN pyramid (2**6=64). 406 | crop: Picks random crops from the image. First, scales the image based 407 | on min_dim and min_scale, then picks a random crop of 408 | size min_dim x min_dim. Can be used in training only. 409 | max_dim is not used in this mode. 410 | 411 | Returns: 412 | image: the resized image 413 | window: (y1, x1, y2, x2). If max_dim is provided, padding might 414 | be inserted in the returned image. If so, this window is the 415 | coordinates of the image part of the full image (excluding 416 | the padding). The x2, y2 pixels are not included. 417 | scale: The scale factor used to resize the image 418 | padding: Padding added to the image [(top, bottom), (left, right), (0, 0)] 419 | """ 420 | # Keep track of image dtype and return results in the same dtype 421 | image_dtype = image.dtype 422 | # Default window (y1, x1, y2, x2) and default scale == 1. 423 | h, w = image.shape[:2] 424 | window = (0, 0, h, w) 425 | scale = 1 426 | padding = [(0, 0), (0, 0), (0, 0)] 427 | crop = None 428 | 429 | if mode == "none": 430 | return image, window, scale, padding, crop 431 | 432 | # Scale? 433 | if min_dim: 434 | # Scale up but not down 435 | scale = max(1, min_dim / min(h, w)) 436 | if min_scale and scale < min_scale: 437 | scale = min_scale 438 | 439 | # Does it exceed max dim? 440 | if max_dim and mode == "square": 441 | image_max = max(h, w) 442 | if round(image_max * scale) > max_dim: 443 | scale = max_dim / image_max 444 | 445 | # Resize image using bilinear interpolation 446 | if scale != 1: 447 | image = resize(image, (round(h * scale), round(w * scale)), 448 | preserve_range=True) 449 | 450 | # Need padding or cropping? 451 | if mode == "square": 452 | # Get new height and width 453 | h, w = image.shape[:2] 454 | top_pad = (max_dim - h) // 2 455 | bottom_pad = max_dim - h - top_pad 456 | left_pad = (max_dim - w) // 2 457 | right_pad = max_dim - w - left_pad 458 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 459 | image = np.pad(image, padding, mode='constant', constant_values=0) 460 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 461 | elif mode == "pad64": 462 | h, w = image.shape[:2] 463 | # Both sides must be divisible by 64 464 | assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64" 465 | # Height 466 | if h % 64 > 0: 467 | max_h = h - (h % 64) + 64 468 | top_pad = (max_h - h) // 2 469 | bottom_pad = max_h - h - top_pad 470 | else: 471 | top_pad = bottom_pad = 0 472 | # Width 473 | if w % 64 > 0: 474 | max_w = w - (w % 64) + 64 475 | left_pad = (max_w - w) // 2 476 | right_pad = max_w - w - left_pad 477 | else: 478 | left_pad = right_pad = 0 479 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 480 | image = np.pad(image, padding, mode='constant', constant_values=0) 481 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 482 | elif mode == "crop": 483 | # Pick a random crop 484 | h, w = image.shape[:2] 485 | y = random.randint(0, (h - min_dim)) 486 | x = random.randint(0, (w - min_dim)) 487 | crop = (y, x, min_dim, min_dim) 488 | image = image[y:y + min_dim, x:x + min_dim] 489 | window = (0, 0, min_dim, min_dim) 490 | else: 491 | raise Exception("Mode {} not supported".format(mode)) 492 | return image.astype(image_dtype), window, scale, padding, crop 493 | 494 | 495 | def resize_mask(mask, scale, padding, crop=None): 496 | """Resizes a mask using the given scale and padding. 497 | Typically, you get the scale and padding from resize_image() to 498 | ensure both, the image and the mask, are resized consistently. 499 | 500 | scale: mask scaling factor 501 | padding: Padding to add to the mask in the form 502 | [(top, bottom), (left, right), (0, 0)] 503 | """ 504 | # Suppress warning from scipy 0.13.0, the output shape of zoom() is 505 | # calculated with round() instead of int() 506 | with warnings.catch_warnings(): 507 | warnings.simplefilter("ignore") 508 | mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0) 509 | if crop is not None: 510 | y, x, h, w = crop 511 | mask = mask[y:y + h, x:x + w] 512 | else: 513 | mask = np.pad(mask, padding, mode='constant', constant_values=0) 514 | return mask 515 | 516 | 517 | def minimize_mask(bbox, mask, mini_shape): 518 | """Resize masks to a smaller version to reduce memory load. 519 | Mini-masks can be resized back to image scale using expand_masks() 520 | 521 | See inspect_data.ipynb notebook for more details. 522 | """ 523 | mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool) 524 | for i in range(mask.shape[-1]): 525 | # Pick slice and cast to bool in case load_mask() returned wrong dtype 526 | m = mask[:, :, i].astype(bool) 527 | y1, x1, y2, x2 = bbox[i][:4] 528 | m = m[y1:y2, x1:x2] 529 | if m.size == 0: 530 | raise Exception("Invalid bounding box with area of zero") 531 | # Resize with bilinear interpolation 532 | m = resize(m, mini_shape) 533 | mini_mask[:, :, i] = np.around(m).astype(np.bool) 534 | return mini_mask 535 | 536 | 537 | def expand_mask(bbox, mini_mask, image_shape): 538 | """Resizes mini masks back to image size. Reverses the change 539 | of minimize_mask(). 540 | 541 | See inspect_data.ipynb notebook for more details. 542 | """ 543 | mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool) 544 | for i in range(mask.shape[-1]): 545 | m = mini_mask[:, :, i] 546 | y1, x1, y2, x2 = bbox[i][:4] 547 | h = y2 - y1 548 | w = x2 - x1 549 | # Resize with bilinear interpolation 550 | m = resize(m, (h, w)) 551 | mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool) 552 | return mask 553 | 554 | 555 | # TODO: Build and use this function to reduce code duplication 556 | def mold_mask(mask, config): 557 | pass 558 | 559 | 560 | def unmold_mask(mask, bbox, image_shape): 561 | """Converts a mask generated by the neural network to a format similar 562 | to its original shape. 563 | mask: [height, width] of type float. A small, typically 28x28 mask. 564 | bbox: [y1, x1, y2, x2]. The box to fit the mask in. 565 | 566 | Returns a binary mask with the same size as the original image. 567 | """ 568 | threshold = 0.5 569 | y1, x1, y2, x2 = bbox 570 | mask = resize(mask, (y2 - y1, x2 - x1)) 571 | mask = np.where(mask >= threshold, 1, 0).astype(np.bool) 572 | 573 | # Put the mask in the right location. 574 | full_mask = np.zeros(image_shape[:2], dtype=np.bool) 575 | full_mask[y1:y2, x1:x2] = mask 576 | return full_mask 577 | 578 | 579 | ############################################################ 580 | # Anchors 581 | ############################################################ 582 | 583 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): 584 | """ 585 | scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] 586 | ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] 587 | shape: [height, width] spatial shape of the feature map over which 588 | to generate anchors. 589 | feature_stride: Stride of the feature map relative to the image in pixels. 590 | anchor_stride: Stride of anchors on the feature map. For example, if the 591 | value is 2 then generate anchors for every other feature map pixel. 592 | """ 593 | # Get all combinations of scales and ratios 594 | scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) 595 | scales = scales.flatten() 596 | ratios = ratios.flatten() 597 | 598 | # Enumerate heights and widths from scales and ratios 599 | heights = scales / np.sqrt(ratios) 600 | widths = scales * np.sqrt(ratios) 601 | 602 | # Enumerate shifts in feature space 603 | shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride 604 | shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride 605 | shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) 606 | 607 | # Enumerate combinations of shifts, widths, and heights 608 | box_widths, box_centers_x = np.meshgrid(widths, shifts_x) 609 | box_heights, box_centers_y = np.meshgrid(heights, shifts_y) 610 | 611 | # Reshape to get a list of (y, x) and a list of (h, w) 612 | box_centers = np.stack( 613 | [box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) 614 | box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) 615 | 616 | # Convert to corner coordinates (y1, x1, y2, x2) 617 | boxes = np.concatenate([box_centers - 0.5 * box_sizes, 618 | box_centers + 0.5 * box_sizes], axis=1) 619 | return boxes 620 | 621 | 622 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, 623 | anchor_stride): 624 | """Generate anchors at different levels of a feature pyramid. Each scale 625 | is associated with a level of the pyramid, but each ratio is used in 626 | all levels of the pyramid. 627 | 628 | Returns: 629 | anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted 630 | with the same order of the given scales. So, anchors of scale[0] come 631 | first, then anchors of scale[1], and so on. 632 | """ 633 | # Anchors 634 | # [anchor_count, (y1, x1, y2, x2)] 635 | anchors = [] 636 | for i in range(len(scales)): 637 | anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], 638 | feature_strides[i], anchor_stride)) 639 | return np.concatenate(anchors, axis=0) 640 | 641 | 642 | ############################################################ 643 | # Miscellaneous 644 | ############################################################ 645 | 646 | def trim_zeros(x): 647 | """It's common to have tensors larger than the available data and 648 | pad with zeros. This function removes rows that are all zeros. 649 | 650 | x: [rows, columns]. 651 | """ 652 | assert len(x.shape) == 2 653 | return x[~np.all(x == 0, axis=1)] 654 | 655 | 656 | def compute_matches(gt_boxes, gt_class_ids, gt_masks, 657 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 658 | iou_threshold=0.5, score_threshold=0.0): 659 | """Finds matches between prediction and ground truth instances. 660 | 661 | Returns: 662 | gt_match: 1-D array. For each GT box it has the index of the matched 663 | predicted box. 664 | pred_match: 1-D array. For each predicted box, it has the index of 665 | the matched ground truth box. 666 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 667 | """ 668 | # Trim zero padding 669 | # TODO: cleaner to do zero unpadding upstream 670 | gt_boxes = trim_zeros(gt_boxes) 671 | gt_masks = gt_masks[..., :gt_boxes.shape[0]] 672 | pred_boxes = trim_zeros(pred_boxes) 673 | pred_scores = pred_scores[:pred_boxes.shape[0]] 674 | # Sort predictions by score from high to low 675 | indices = np.argsort(pred_scores)[::-1] 676 | pred_boxes = pred_boxes[indices] 677 | pred_class_ids = pred_class_ids[indices] 678 | pred_scores = pred_scores[indices] 679 | pred_masks = pred_masks[..., indices] 680 | 681 | # Compute IoU overlaps [pred_masks, gt_masks] 682 | overlaps = compute_overlaps_masks(pred_masks, gt_masks) 683 | 684 | # Loop through predictions and find matching ground truth boxes 685 | match_count = 0 686 | pred_match = -1 * np.ones([pred_boxes.shape[0]]) 687 | gt_match = -1 * np.ones([gt_boxes.shape[0]]) 688 | for i in range(len(pred_boxes)): 689 | # Find best matching ground truth box 690 | # 1. Sort matches by score 691 | sorted_ixs = np.argsort(overlaps[i])[::-1] 692 | # 2. Remove low scores 693 | low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0] 694 | if low_score_idx.size > 0: 695 | sorted_ixs = sorted_ixs[:low_score_idx[0]] 696 | # 3. Find the match 697 | for j in sorted_ixs: 698 | # If ground truth box is already matched, go to next one 699 | if gt_match[j] > -1: 700 | continue 701 | # If we reach IoU smaller than the threshold, end the loop 702 | iou = overlaps[i, j] 703 | if iou < iou_threshold: 704 | break 705 | # Do we have a match? 706 | if pred_class_ids[i] == gt_class_ids[j]: 707 | match_count += 1 708 | gt_match[j] = i 709 | pred_match[i] = j 710 | break 711 | 712 | return gt_match, pred_match, overlaps 713 | 714 | 715 | def compute_ap(gt_boxes, gt_class_ids, gt_masks, 716 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 717 | iou_threshold=0.5): 718 | """Compute Average Precision at a set IoU threshold (default 0.5). 719 | 720 | Returns: 721 | mAP: Mean Average Precision 722 | precisions: List of precisions at different class score thresholds. 723 | recalls: List of recall values at different class score thresholds. 724 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 725 | """ 726 | # Get matches and overlaps 727 | gt_match, pred_match, overlaps = compute_matches( 728 | gt_boxes, gt_class_ids, gt_masks, 729 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 730 | iou_threshold) 731 | 732 | # Compute precision and recall at each prediction box step 733 | precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1) 734 | recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match) 735 | 736 | # Pad with start and end values to simplify the math 737 | precisions = np.concatenate([[0], precisions, [0]]) 738 | recalls = np.concatenate([[0], recalls, [1]]) 739 | 740 | # Ensure precision values decrease but don't increase. This way, the 741 | # precision value at each recall threshold is the maximum it can be 742 | # for all following recall thresholds, as specified by the VOC paper. 743 | for i in range(len(precisions) - 2, -1, -1): 744 | precisions[i] = np.maximum(precisions[i], precisions[i + 1]) 745 | 746 | # Compute mean AP over recall range 747 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 748 | mAP = np.sum((recalls[indices] - recalls[indices - 1]) * 749 | precisions[indices]) 750 | 751 | return mAP, precisions, recalls, overlaps 752 | 753 | 754 | def compute_ap_range(gt_box, gt_class_id, gt_mask, 755 | pred_box, pred_class_id, pred_score, pred_mask, 756 | iou_thresholds=None, verbose=1): 757 | """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95.""" 758 | # Default is 0.5 to 0.95 with increments of 0.05 759 | iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05) 760 | 761 | # Compute AP over range of IoU thresholds 762 | AP = [] 763 | for iou_threshold in iou_thresholds: 764 | ap, precisions, recalls, overlaps =\ 765 | compute_ap(gt_box, gt_class_id, gt_mask, 766 | pred_box, pred_class_id, pred_score, pred_mask, 767 | iou_threshold=iou_threshold) 768 | if verbose: 769 | print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap)) 770 | AP.append(ap) 771 | AP = np.array(AP).mean() 772 | if verbose: 773 | print("AP @{:.2f}-{:.2f}:\t {:.3f}".format( 774 | iou_thresholds[0], iou_thresholds[-1], AP)) 775 | return AP 776 | 777 | 778 | def compute_recall(pred_boxes, gt_boxes, iou): 779 | """Compute the recall at the given IoU threshold. It's an indication 780 | of how many GT boxes were found by the given prediction boxes. 781 | 782 | pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates 783 | gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates 784 | """ 785 | # Measure overlaps 786 | overlaps = compute_overlaps(pred_boxes, gt_boxes) 787 | iou_max = np.max(overlaps, axis=1) 788 | iou_argmax = np.argmax(overlaps, axis=1) 789 | positive_ids = np.where(iou_max >= iou)[0] 790 | matched_gt_boxes = iou_argmax[positive_ids] 791 | 792 | recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] 793 | return recall, positive_ids 794 | 795 | 796 | # ## Batch Slicing 797 | # Some custom layers support a batch size of 1 only, and require a lot of work 798 | # to support batches greater than 1. This function slices an input tensor 799 | # across the batch dimension and feeds batches of size 1. Effectively, 800 | # an easy way to support batches > 1 quickly with little code modification. 801 | # In the long run, it's more efficient to modify the code to support large 802 | # batches and getting rid of this function. Consider this a temporary solution 803 | def batch_slice(inputs, graph_fn, batch_size, names=None): 804 | """Splits inputs into slices and feeds each slice to a copy of the given 805 | computation graph and then combines the results. It allows you to run a 806 | graph on a batch of inputs even if the graph is written to support one 807 | instance only. 808 | 809 | inputs: list of tensors. All must have the same first dimension length 810 | graph_fn: A function that returns a TF tensor that's part of a graph. 811 | batch_size: number of slices to divide the data into. 812 | names: If provided, assigns names to the resulting tensors. 813 | """ 814 | if not isinstance(inputs, list): 815 | inputs = [inputs] 816 | 817 | outputs = [] 818 | for i in range(batch_size): 819 | inputs_slice = [x[i] for x in inputs] 820 | output_slice = graph_fn(*inputs_slice) 821 | if not isinstance(output_slice, (tuple, list)): 822 | output_slice = [output_slice] 823 | outputs.append(output_slice) 824 | # Change outputs from a list of slices where each is 825 | # a list of outputs to a list of outputs and each has 826 | # a list of slices 827 | outputs = list(zip(*outputs)) 828 | 829 | if names is None: 830 | names = [None] * len(outputs) 831 | 832 | result = [tf.stack(o, axis=0, name=n) 833 | for o, n in zip(outputs, names)] 834 | if len(result) == 1: 835 | result = result[0] 836 | 837 | return result 838 | 839 | 840 | def download_trained_weights(coco_model_path, verbose=1): 841 | """Download COCO trained weights from Releases. 842 | 843 | coco_model_path: local path of COCO trained weights 844 | """ 845 | if verbose > 0: 846 | print("Downloading pretrained model to " + coco_model_path + " ...") 847 | with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out: 848 | shutil.copyfileobj(resp, out) 849 | if verbose > 0: 850 | print("... done downloading pretrained model!") 851 | 852 | 853 | def norm_boxes(boxes, shape): 854 | """Converts boxes from pixel coordinates to normalized coordinates. 855 | boxes: [N, (y1, x1, y2, x2)] in pixel coordinates 856 | shape: [..., (height, width)] in pixels 857 | 858 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized 859 | coordinates it's inside the box. 860 | 861 | Returns: 862 | [N, (y1, x1, y2, x2)] in normalized coordinates 863 | """ 864 | h, w = shape 865 | scale = np.array([h - 1, w - 1, h - 1, w - 1]) 866 | shift = np.array([0, 0, 1, 1]) 867 | return np.divide((boxes - shift), scale).astype(np.float32) 868 | 869 | 870 | def denorm_boxes(boxes, shape): 871 | """Converts boxes from normalized coordinates to pixel coordinates. 872 | boxes: [N, (y1, x1, y2, x2)] in normalized coordinates 873 | shape: [..., (height, width)] in pixels 874 | 875 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized 876 | coordinates it's inside the box. 877 | 878 | Returns: 879 | [N, (y1, x1, y2, x2)] in pixel coordinates 880 | """ 881 | h, w = shape 882 | scale = np.array([h - 1, w - 1, h - 1, w - 1]) 883 | shift = np.array([0, 0, 1, 1]) 884 | return np.around(np.multiply(boxes, scale) + shift).astype(np.int32) 885 | 886 | 887 | def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True, 888 | preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None): 889 | """A wrapper for Scikit-Image resize(). 890 | 891 | Scikit-Image generates warnings on every call to resize() if it doesn't 892 | receive the right parameters. The right parameters depend on the version 893 | of skimage. This solves the problem by using different parameters per 894 | version. And it provides a central place to control resizing defaults. 895 | """ 896 | if LooseVersion(skimage.__version__) >= LooseVersion("0.14"): 897 | # New in 0.14: anti_aliasing. Default it to False for backward 898 | # compatibility with skimage 0.13. 899 | return skimage.transform.resize( 900 | image, output_shape, 901 | order=order, mode=mode, cval=cval, clip=clip, 902 | preserve_range=preserve_range, anti_aliasing=anti_aliasing, 903 | anti_aliasing_sigma=anti_aliasing_sigma) 904 | else: 905 | return skimage.transform.resize( 906 | image, output_shape, 907 | order=order, mode=mode, cval=cval, clip=clip, 908 | preserve_range=preserve_range) 909 | -------------------------------------------------------------------------------- /mrcnn/visualize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Display and Visualization Functions. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import os 11 | import sys 12 | import random 13 | import itertools 14 | import colorsys 15 | 16 | import numpy as np 17 | from skimage.measure import find_contours 18 | import matplotlib.pyplot as plt 19 | from matplotlib import patches, lines 20 | from matplotlib.patches import Polygon 21 | import IPython.display 22 | 23 | # Root directory of the project 24 | ROOT_DIR = os.path.abspath("../") 25 | 26 | # Import Mask RCNN 27 | sys.path.append(ROOT_DIR) # To find local version of the library 28 | from mrcnn import utils 29 | 30 | 31 | ############################################################ 32 | # Visualization 33 | ############################################################ 34 | 35 | def display_images(images, titles=None, cols=4, cmap=None, norm=None, 36 | interpolation=None): 37 | """Display the given set of images, optionally with titles. 38 | images: list or array of image tensors in HWC format. 39 | titles: optional. A list of titles to display with each image. 40 | cols: number of images per row 41 | cmap: Optional. Color map to use. For example, "Blues". 42 | norm: Optional. A Normalize instance to map values to colors. 43 | interpolation: Optional. Image interpolation to use for display. 44 | """ 45 | titles = titles if titles is not None else [""] * len(images) 46 | rows = len(images) // cols + 1 47 | plt.figure(figsize=(14, 14 * rows // cols)) 48 | i = 1 49 | for image, title in zip(images, titles): 50 | plt.subplot(rows, cols, i) 51 | plt.title(title, fontsize=9) 52 | plt.axis('off') 53 | plt.imshow(image.astype(np.uint8), cmap=cmap, 54 | norm=norm, interpolation=interpolation) 55 | i += 1 56 | plt.show() 57 | 58 | 59 | def random_colors(N, bright=True): 60 | """ 61 | Generate random colors. 62 | To get visually distinct colors, generate them in HSV space then 63 | convert to RGB. 64 | """ 65 | brightness = 1.0 if bright else 0.7 66 | hsv = [(i / N, 1, brightness) for i in range(N)] 67 | colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv)) 68 | random.shuffle(colors) 69 | return colors 70 | 71 | 72 | def apply_mask(image, mask, color, alpha=0.5): 73 | """Apply the given mask to the image. 74 | """ 75 | for c in range(3): 76 | image[:, :, c] = np.where(mask == 1, 77 | image[:, :, c] * 78 | (1 - alpha) + alpha * color[c] * 255, 79 | image[:, :, c]) 80 | return image 81 | 82 | 83 | def display_instances(image, boxes, masks, class_ids, class_names, 84 | scores=None, title="", 85 | figsize=(16, 16), ax=None, 86 | show_mask=True, show_bbox=True, 87 | colors=None, captions=None): 88 | """ 89 | boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates. 90 | masks: [height, width, num_instances] 91 | class_ids: [num_instances] 92 | class_names: list of class names of the dataset 93 | scores: (optional) confidence scores for each box 94 | title: (optional) Figure title 95 | show_mask, show_bbox: To show masks and bounding boxes or not 96 | figsize: (optional) the size of the image 97 | colors: (optional) An array or colors to use with each object 98 | captions: (optional) A list of strings to use as captions for each object 99 | """ 100 | # Number of instances 101 | N = boxes.shape[0] 102 | if not N: 103 | print("\n*** No instances to display *** \n") 104 | else: 105 | assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0] 106 | 107 | # If no axis is passed, create one and automatically call show() 108 | auto_show = False 109 | if not ax: 110 | _, ax = plt.subplots(1, figsize=figsize) 111 | auto_show = True 112 | 113 | # Generate random colors 114 | colors = colors or random_colors(N) 115 | 116 | # Show area outside image boundaries. 117 | height, width = image.shape[:2] 118 | ax.set_ylim(height + 10, -10) 119 | ax.set_xlim(-10, width + 10) 120 | ax.axis('off') 121 | ax.set_title(title) 122 | 123 | masked_image = image.astype(np.uint32).copy() 124 | for i in range(N): 125 | color = colors[i] 126 | 127 | # Bounding box 128 | if not np.any(boxes[i]): 129 | # Skip this instance. Has no bbox. Likely lost in image cropping. 130 | continue 131 | y1, x1, y2, x2 = boxes[i] 132 | if show_bbox: 133 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 134 | alpha=0.7, linestyle="dashed", 135 | edgecolor=color, facecolor='none') 136 | ax.add_patch(p) 137 | 138 | # Label 139 | if not captions: 140 | class_id = class_ids[i] 141 | score = scores[i] if scores is not None else None 142 | label = class_names[class_id] 143 | caption = "{} {:.3f}".format(label, score) if score else label 144 | else: 145 | caption = captions[i] 146 | ax.text(x1, y1 + 8, caption, 147 | color='w', size=11, backgroundcolor="none") 148 | 149 | # Mask 150 | mask = masks[:, :, i] 151 | if show_mask: 152 | masked_image = apply_mask(masked_image, mask, color) 153 | 154 | # Mask Polygon 155 | # Pad to ensure proper polygons for masks that touch image edges. 156 | padded_mask = np.zeros( 157 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 158 | padded_mask[1:-1, 1:-1] = mask 159 | contours = find_contours(padded_mask, 0.5) 160 | for verts in contours: 161 | # Subtract the padding and flip (y, x) to (x, y) 162 | verts = np.fliplr(verts) - 1 163 | p = Polygon(verts, facecolor="none", edgecolor=color) 164 | ax.add_patch(p) 165 | ax.imshow(masked_image.astype(np.uint8)) 166 | if auto_show: 167 | plt.show() 168 | 169 | 170 | def display_differences(image, 171 | gt_box, gt_class_id, gt_mask, 172 | pred_box, pred_class_id, pred_score, pred_mask, 173 | class_names, title="", ax=None, 174 | show_mask=True, show_box=True, 175 | iou_threshold=0.5, score_threshold=0.5): 176 | """Display ground truth and prediction instances on the same image.""" 177 | # Match predictions to ground truth 178 | gt_match, pred_match, overlaps = utils.compute_matches( 179 | gt_box, gt_class_id, gt_mask, 180 | pred_box, pred_class_id, pred_score, pred_mask, 181 | iou_threshold=iou_threshold, score_threshold=score_threshold) 182 | # Ground truth = green. Predictions = red 183 | colors = [(0, 1, 0, .8)] * len(gt_match)\ 184 | + [(1, 0, 0, 1)] * len(pred_match) 185 | # Concatenate GT and predictions 186 | class_ids = np.concatenate([gt_class_id, pred_class_id]) 187 | scores = np.concatenate([np.zeros([len(gt_match)]), pred_score]) 188 | boxes = np.concatenate([gt_box, pred_box]) 189 | masks = np.concatenate([gt_mask, pred_mask], axis=-1) 190 | # Captions per instance show score/IoU 191 | captions = ["" for m in gt_match] + ["{:.2f} / {:.2f}".format( 192 | pred_score[i], 193 | (overlaps[i, int(pred_match[i])] 194 | if pred_match[i] > -1 else overlaps[i].max())) 195 | for i in range(len(pred_match))] 196 | # Set title if not provided 197 | title = title or "Ground Truth and Detections\n GT=green, pred=red, captions: score/IoU" 198 | # Display 199 | display_instances( 200 | image, 201 | boxes, masks, class_ids, 202 | class_names, scores, ax=ax, 203 | show_bbox=show_box, show_mask=show_mask, 204 | colors=colors, captions=captions, 205 | title=title) 206 | 207 | 208 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10): 209 | """ 210 | anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates. 211 | proposals: [n, 4] the same anchors but refined to fit objects better. 212 | """ 213 | masked_image = image.copy() 214 | 215 | # Pick random anchors in case there are too many. 216 | ids = np.arange(rois.shape[0], dtype=np.int32) 217 | ids = np.random.choice( 218 | ids, limit, replace=False) if ids.shape[0] > limit else ids 219 | 220 | fig, ax = plt.subplots(1, figsize=(12, 12)) 221 | if rois.shape[0] > limit: 222 | plt.title("Showing {} random ROIs out of {}".format( 223 | len(ids), rois.shape[0])) 224 | else: 225 | plt.title("{} ROIs".format(len(ids))) 226 | 227 | # Show area outside image boundaries. 228 | ax.set_ylim(image.shape[0] + 20, -20) 229 | ax.set_xlim(-50, image.shape[1] + 20) 230 | ax.axis('off') 231 | 232 | for i, id in enumerate(ids): 233 | color = np.random.rand(3) 234 | class_id = class_ids[id] 235 | # ROI 236 | y1, x1, y2, x2 = rois[id] 237 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 238 | edgecolor=color if class_id else "gray", 239 | facecolor='none', linestyle="dashed") 240 | ax.add_patch(p) 241 | # Refined ROI 242 | if class_id: 243 | ry1, rx1, ry2, rx2 = refined_rois[id] 244 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 245 | edgecolor=color, facecolor='none') 246 | ax.add_patch(p) 247 | # Connect the top-left corners of the anchor and proposal for easy visualization 248 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 249 | 250 | # Label 251 | label = class_names[class_id] 252 | ax.text(rx1, ry1 + 8, "{}".format(label), 253 | color='w', size=11, backgroundcolor="none") 254 | 255 | # Mask 256 | m = utils.unmold_mask(mask[id], rois[id] 257 | [:4].astype(np.int32), image.shape) 258 | masked_image = apply_mask(masked_image, m, color) 259 | 260 | ax.imshow(masked_image) 261 | 262 | # Print stats 263 | print("Positive ROIs: ", class_ids[class_ids > 0].shape[0]) 264 | print("Negative ROIs: ", class_ids[class_ids == 0].shape[0]) 265 | print("Positive Ratio: {:.2f}".format( 266 | class_ids[class_ids > 0].shape[0] / class_ids.shape[0])) 267 | 268 | 269 | # TODO: Replace with matplotlib equivalent? 270 | def draw_box(image, box, color): 271 | """Draw 3-pixel width bounding boxes on the given image array. 272 | color: list of 3 int values for RGB. 273 | """ 274 | y1, x1, y2, x2 = box 275 | image[y1:y1 + 2, x1:x2] = color 276 | image[y2:y2 + 2, x1:x2] = color 277 | image[y1:y2, x1:x1 + 2] = color 278 | image[y1:y2, x2:x2 + 2] = color 279 | return image 280 | 281 | 282 | def display_top_masks(image, mask, class_ids, class_names, limit=4): 283 | """Display the given image and the top few class masks.""" 284 | to_display = [] 285 | titles = [] 286 | to_display.append(image) 287 | titles.append("H x W={}x{}".format(image.shape[0], image.shape[1])) 288 | # Pick top prominent classes in this image 289 | unique_class_ids = np.unique(class_ids) 290 | mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]]) 291 | for i in unique_class_ids] 292 | top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area), 293 | key=lambda r: r[1], reverse=True) if v[1] > 0] 294 | # Generate images and titles 295 | for i in range(limit): 296 | class_id = top_ids[i] if i < len(top_ids) else -1 297 | # Pull masks of instances belonging to the same class. 298 | m = mask[:, :, np.where(class_ids == class_id)[0]] 299 | m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1) 300 | to_display.append(m) 301 | titles.append(class_names[class_id] if class_id != -1 else "-") 302 | display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r") 303 | 304 | 305 | def plot_precision_recall(AP, precisions, recalls): 306 | """Draw the precision-recall curve. 307 | 308 | AP: Average precision at IoU >= 0.5 309 | precisions: list of precision values 310 | recalls: list of recall values 311 | """ 312 | # Plot the Precision-Recall curve 313 | _, ax = plt.subplots(1) 314 | ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP)) 315 | ax.set_ylim(0, 1.1) 316 | ax.set_xlim(0, 1.1) 317 | _ = ax.plot(recalls, precisions) 318 | 319 | 320 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores, 321 | overlaps, class_names, threshold=0.5): 322 | """Draw a grid showing how ground truth objects are classified. 323 | gt_class_ids: [N] int. Ground truth class IDs 324 | pred_class_id: [N] int. Predicted class IDs 325 | pred_scores: [N] float. The probability scores of predicted classes 326 | overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictions and GT boxes. 327 | class_names: list of all class names in the dataset 328 | threshold: Float. The prediction probability required to predict a class 329 | """ 330 | gt_class_ids = gt_class_ids[gt_class_ids != 0] 331 | pred_class_ids = pred_class_ids[pred_class_ids != 0] 332 | 333 | plt.figure(figsize=(12, 10)) 334 | plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues) 335 | plt.yticks(np.arange(len(pred_class_ids)), 336 | ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i]) 337 | for i, id in enumerate(pred_class_ids)]) 338 | plt.xticks(np.arange(len(gt_class_ids)), 339 | [class_names[int(id)] for id in gt_class_ids], rotation=90) 340 | 341 | thresh = overlaps.max() / 2. 342 | for i, j in itertools.product(range(overlaps.shape[0]), 343 | range(overlaps.shape[1])): 344 | text = "" 345 | if overlaps[i, j] > threshold: 346 | text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong" 347 | color = ("white" if overlaps[i, j] > thresh 348 | else "black" if overlaps[i, j] > 0 349 | else "grey") 350 | plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text), 351 | horizontalalignment="center", verticalalignment="center", 352 | fontsize=9, color=color) 353 | 354 | plt.tight_layout() 355 | plt.xlabel("Ground Truth") 356 | plt.ylabel("Predictions") 357 | 358 | 359 | def draw_boxes(image, boxes=None, refined_boxes=None, 360 | masks=None, captions=None, visibilities=None, 361 | title="", ax=None): 362 | """Draw bounding boxes and segmentation masks with different 363 | customizations. 364 | 365 | boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates. 366 | refined_boxes: Like boxes, but draw with solid lines to show 367 | that they're the result of refining 'boxes'. 368 | masks: [N, height, width] 369 | captions: List of N titles to display on each box 370 | visibilities: (optional) List of values of 0, 1, or 2. Determine how 371 | prominent each bounding box should be. 372 | title: An optional title to show over the image 373 | ax: (optional) Matplotlib axis to draw on. 374 | """ 375 | # Number of boxes 376 | assert boxes is not None or refined_boxes is not None 377 | N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0] 378 | 379 | # Matplotlib Axis 380 | if not ax: 381 | _, ax = plt.subplots(1, figsize=(12, 12)) 382 | 383 | # Generate random colors 384 | colors = random_colors(N) 385 | 386 | # Show area outside image boundaries. 387 | margin = image.shape[0] // 10 388 | ax.set_ylim(image.shape[0] + margin, -margin) 389 | ax.set_xlim(-margin, image.shape[1] + margin) 390 | ax.axis('off') 391 | 392 | ax.set_title(title) 393 | 394 | masked_image = image.astype(np.uint32).copy() 395 | for i in range(N): 396 | # Box visibility 397 | visibility = visibilities[i] if visibilities is not None else 1 398 | if visibility == 0: 399 | color = "gray" 400 | style = "dotted" 401 | alpha = 0.5 402 | elif visibility == 1: 403 | color = colors[i] 404 | style = "dotted" 405 | alpha = 1 406 | elif visibility == 2: 407 | color = colors[i] 408 | style = "solid" 409 | alpha = 1 410 | 411 | # Boxes 412 | if boxes is not None: 413 | if not np.any(boxes[i]): 414 | # Skip this instance. Has no bbox. Likely lost in cropping. 415 | continue 416 | y1, x1, y2, x2 = boxes[i] 417 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 418 | alpha=alpha, linestyle=style, 419 | edgecolor=color, facecolor='none') 420 | ax.add_patch(p) 421 | 422 | # Refined boxes 423 | if refined_boxes is not None and visibility > 0: 424 | ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32) 425 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 426 | edgecolor=color, facecolor='none') 427 | ax.add_patch(p) 428 | # Connect the top-left corners of the anchor and proposal 429 | if boxes is not None: 430 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 431 | 432 | # Captions 433 | if captions is not None: 434 | caption = captions[i] 435 | # If there are refined boxes, display captions on them 436 | if refined_boxes is not None: 437 | y1, x1, y2, x2 = ry1, rx1, ry2, rx2 438 | ax.text(x1, y1, caption, size=11, verticalalignment='top', 439 | color='w', backgroundcolor="none", 440 | bbox={'facecolor': color, 'alpha': 0.5, 441 | 'pad': 2, 'edgecolor': 'none'}) 442 | 443 | # Masks 444 | if masks is not None: 445 | mask = masks[:, :, i] 446 | masked_image = apply_mask(masked_image, mask, color) 447 | # Mask Polygon 448 | # Pad to ensure proper polygons for masks that touch image edges. 449 | padded_mask = np.zeros( 450 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 451 | padded_mask[1:-1, 1:-1] = mask 452 | contours = find_contours(padded_mask, 0.5) 453 | for verts in contours: 454 | # Subtract the padding and flip (y, x) to (x, y) 455 | verts = np.fliplr(verts) - 1 456 | p = Polygon(verts, facecolor="none", edgecolor=color) 457 | ax.add_patch(p) 458 | ax.imshow(masked_image.astype(np.uint8)) 459 | 460 | 461 | def display_table(table): 462 | """Display values in a table format. 463 | table: an iterable of rows, and each row is an iterable of values. 464 | """ 465 | html = "" 466 | for row in table: 467 | row_html = "" 468 | for col in row: 469 | row_html += "{:40}".format(str(col)) 470 | html += "" + row_html + "" 471 | html = "" + html + "
" 472 | IPython.display.display(IPython.display.HTML(html)) 473 | 474 | 475 | def display_weight_stats(model): 476 | """Scans all the weights in the model and returns a list of tuples 477 | that contain stats about each weight. 478 | """ 479 | layers = model.get_trainable_layers() 480 | table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]] 481 | for l in layers: 482 | weight_values = l.get_weights() # list of Numpy arrays 483 | weight_tensors = l.weights # list of TF tensors 484 | for i, w in enumerate(weight_values): 485 | weight_name = weight_tensors[i].name 486 | # Detect problematic layers. Exclude biases of conv layers. 487 | alert = "" 488 | if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1): 489 | alert += "*** dead?" 490 | if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000: 491 | alert += "*** Overflow?" 492 | # Add row 493 | table.append([ 494 | weight_name + alert, 495 | str(w.shape), 496 | "{:+9.4f}".format(w.min()), 497 | "{:+10.4f}".format(w.max()), 498 | "{:+9.4f}".format(w.std()), 499 | ]) 500 | display_table(table) 501 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | Pillow 4 | cython 5 | matplotlib 6 | scikit-image 7 | tensorflow>=1.3.0 8 | keras>=2.0.8 9 | opencv-python 10 | h5py 11 | imgaug 12 | IPython[all] -------------------------------------------------------------------------------- /samples/README.md: -------------------------------------------------------------------------------- 1 | # Mask R-CNN for Object Detection and Segmentation -------------------------------------------------------------------------------- /samples/balloon/README.md: -------------------------------------------------------------------------------- 1 | # Color Splash Example 2 | 3 | This is an example showing the use of Mask RCNN in a real application. 4 | We train the model to detect balloons only, and then we use the generated 5 | masks to keep balloons in color while changing the rest of the image to 6 | grayscale. 7 | 8 | 9 | [This blog post](https://engineering.matterport.com/splash-of-color-instance-segmentation-with-mask-r-cnn-and-tensorflow-7c761e238b46) describes this sample in more detail. 10 | 11 | ![Balloon Color Splash](/assets/balloon_color_splash.gif) 12 | 13 | 14 | ## Installation 15 | From the [Releases page](https://github.com/matterport/Mask_RCNN/releases) page: 16 | 1. Download `mask_rcnn_balloon.h5`. Save it in the root directory of the repo (the `mask_rcnn` directory). 17 | 2. Download `balloon_dataset.zip`. Expand it such that it's in the path `mask_rcnn/datasets/balloon/`. 18 | 19 | ## Apply color splash using the provided weights 20 | Apply splash effect on an image: 21 | 22 | ```bash 23 | python3 balloon.py splash --weights=/path/to/mask_rcnn/mask_rcnn_balloon.h5 --image= 24 | ``` 25 | 26 | Apply splash effect on a video. Requires OpenCV 3.2+: 27 | 28 | ```bash 29 | python3 balloon.py splash --weights=/path/to/mask_rcnn/mask_rcnn_balloon.h5 --video= 30 | ``` 31 | 32 | 33 | ## Run Jupyter notebooks 34 | Open the `inspect_balloon_data.ipynb` or `inspect_balloon_model.ipynb` Jupter notebooks. You can use these notebooks to explore the dataset and run through the detection pipelie step by step. 35 | 36 | ## Train the Balloon model 37 | 38 | Train a new model starting from pre-trained COCO weights 39 | ``` 40 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=coco 41 | ``` 42 | 43 | Resume training a model that you had trained earlier 44 | ``` 45 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=last 46 | ``` 47 | 48 | Train a new model starting from ImageNet weights 49 | ``` 50 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=imagenet 51 | ``` 52 | 53 | The code in `balloon.py` is set to train for 3K steps (30 epochs of 100 steps each), and using a batch size of 2. 54 | Update the schedule to fit your needs. 55 | -------------------------------------------------------------------------------- /samples/balloon/balloon.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Train on the toy Balloon dataset and implement color splash effect. 4 | 5 | Copyright (c) 2018 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | ------------------------------------------------------------ 10 | 11 | Usage: import the module (see Jupyter notebooks for examples), or run from 12 | the command line as such: 13 | 14 | # Train a new model starting from pre-trained COCO weights 15 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=coco 16 | 17 | # Resume training a model that you had trained earlier 18 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=last 19 | 20 | # Train a new model starting from ImageNet weights 21 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=imagenet 22 | 23 | # Apply color splash to an image 24 | python3 balloon.py splash --weights=/path/to/weights/file.h5 --image= 25 | 26 | # Apply color splash to video using the last weights you trained 27 | python3 balloon.py splash --weights=last --video= 28 | """ 29 | 30 | import os 31 | import sys 32 | import json 33 | import datetime 34 | import numpy as np 35 | import skimage.draw 36 | 37 | # Root directory of the project 38 | ROOT_DIR = os.path.abspath("../../") 39 | 40 | # Import Mask RCNN 41 | sys.path.append(ROOT_DIR) # To find local version of the library 42 | from mrcnn.config import Config 43 | from mrcnn import model as modellib, utils 44 | 45 | # Path to trained weights file 46 | COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") 47 | 48 | # Directory to save logs and model checkpoints, if not provided 49 | # through the command line argument --logs 50 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 51 | 52 | ############################################################ 53 | # Configurations 54 | ############################################################ 55 | 56 | 57 | class BalloonConfig(Config): 58 | """Configuration for training on the toy dataset. 59 | Derives from the base Config class and overrides some values. 60 | """ 61 | # Give the configuration a recognizable name 62 | NAME = "balloon" 63 | 64 | # We use a GPU with 12GB memory, which can fit two images. 65 | # Adjust down if you use a smaller GPU. 66 | IMAGES_PER_GPU = 2 67 | 68 | # Number of classes (including background) 69 | NUM_CLASSES = 1 + 1 # Background + balloon 70 | 71 | # Number of training steps per epoch 72 | STEPS_PER_EPOCH = 100 73 | 74 | # Skip detections with < 90% confidence 75 | DETECTION_MIN_CONFIDENCE = 0.9 76 | 77 | 78 | ############################################################ 79 | # Dataset 80 | ############################################################ 81 | 82 | class BalloonDataset(utils.Dataset): 83 | 84 | def load_balloon(self, dataset_dir, subset): 85 | """Load a subset of the Balloon dataset. 86 | dataset_dir: Root directory of the dataset. 87 | subset: Subset to load: train or val 88 | """ 89 | # Add classes. We have only one class to add. 90 | self.add_class("balloon", 1, "balloon") 91 | 92 | # Train or validation dataset? 93 | assert subset in ["train", "val"] 94 | dataset_dir = os.path.join(dataset_dir, subset) 95 | 96 | # Load annotations 97 | # VGG Image Annotator (up to version 1.6) saves each image in the form: 98 | # { 'filename': '28503151_5b5b7ec140_b.jpg', 99 | # 'regions': { 100 | # '0': { 101 | # 'region_attributes': {}, 102 | # 'shape_attributes': { 103 | # 'all_points_x': [...], 104 | # 'all_points_y': [...], 105 | # 'name': 'polygon'}}, 106 | # ... more regions ... 107 | # }, 108 | # 'size': 100202 109 | # } 110 | # We mostly care about the x and y coordinates of each region 111 | # Note: In VIA 2.0, regions was changed from a dict to a list. 112 | annotations = json.load(open(os.path.join(dataset_dir, "via_region_data.json"))) 113 | annotations = list(annotations.values()) # don't need the dict keys 114 | 115 | # The VIA tool saves images in the JSON even if they don't have any 116 | # annotations. Skip unannotated images. 117 | annotations = [a for a in annotations if a['regions']] 118 | 119 | # Add images 120 | for a in annotations: 121 | # Get the x, y coordinaets of points of the polygons that make up 122 | # the outline of each object instance. These are stores in the 123 | # shape_attributes (see json format above) 124 | # The if condition is needed to support VIA versions 1.x and 2.x. 125 | if type(a['regions']) is dict: 126 | polygons = [r['shape_attributes'] for r in a['regions'].values()] 127 | else: 128 | polygons = [r['shape_attributes'] for r in a['regions']] 129 | 130 | # load_mask() needs the image size to convert polygons to masks. 131 | # Unfortunately, VIA doesn't include it in JSON, so we must read 132 | # the image. This is only managable since the dataset is tiny. 133 | image_path = os.path.join(dataset_dir, a['filename']) 134 | image = skimage.io.imread(image_path) 135 | height, width = image.shape[:2] 136 | 137 | self.add_image( 138 | "balloon", 139 | image_id=a['filename'], # use file name as a unique image id 140 | path=image_path, 141 | width=width, height=height, 142 | polygons=polygons) 143 | 144 | def load_mask(self, image_id): 145 | """Generate instance masks for an image. 146 | Returns: 147 | masks: A bool array of shape [height, width, instance count] with 148 | one mask per instance. 149 | class_ids: a 1D array of class IDs of the instance masks. 150 | """ 151 | # If not a balloon dataset image, delegate to parent class. 152 | image_info = self.image_info[image_id] 153 | if image_info["source"] != "balloon": 154 | return super(self.__class__, self).load_mask(image_id) 155 | 156 | # Convert polygons to a bitmap mask of shape 157 | # [height, width, instance_count] 158 | info = self.image_info[image_id] 159 | mask = np.zeros([info["height"], info["width"], len(info["polygons"])], 160 | dtype=np.uint8) 161 | for i, p in enumerate(info["polygons"]): 162 | # Get indexes of pixels inside the polygon and set them to 1 163 | rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x']) 164 | mask[rr, cc, i] = 1 165 | 166 | # Return mask, and array of class IDs of each instance. Since we have 167 | # one class ID only, we return an array of 1s 168 | return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32) 169 | 170 | def image_reference(self, image_id): 171 | """Return the path of the image.""" 172 | info = self.image_info[image_id] 173 | if info["source"] == "balloon": 174 | return info["path"] 175 | else: 176 | super(self.__class__, self).image_reference(image_id) 177 | 178 | 179 | def train(model): 180 | """Train the model.""" 181 | # Training dataset. 182 | dataset_train = BalloonDataset() 183 | dataset_train.load_balloon(args.dataset, "train") 184 | dataset_train.prepare() 185 | 186 | # Validation dataset 187 | dataset_val = BalloonDataset() 188 | dataset_val.load_balloon(args.dataset, "val") 189 | dataset_val.prepare() 190 | 191 | # *** This training schedule is an example. Update to your needs *** 192 | # Since we're using a very small dataset, and starting from 193 | # COCO trained weights, we don't need to train too long. Also, 194 | # no need to train all layers, just the heads should do it. 195 | print("Training network heads") 196 | model.train(dataset_train, dataset_val, 197 | learning_rate=config.LEARNING_RATE, 198 | epochs=30, 199 | layers='heads') 200 | 201 | 202 | def color_splash(image, mask): 203 | """Apply color splash effect. 204 | image: RGB image [height, width, 3] 205 | mask: instance segmentation mask [height, width, instance count] 206 | 207 | Returns result image. 208 | """ 209 | # Make a grayscale copy of the image. The grayscale copy still 210 | # has 3 RGB channels, though. 211 | gray = skimage.color.gray2rgb(skimage.color.rgb2gray(image)) * 255 212 | # Copy color pixels from the original color image where mask is set 213 | if mask.shape[-1] > 0: 214 | # We're treating all instances as one, so collapse the mask into one layer 215 | mask = (np.sum(mask, -1, keepdims=True) >= 1) 216 | splash = np.where(mask, image, gray).astype(np.uint8) 217 | else: 218 | splash = gray.astype(np.uint8) 219 | return splash 220 | 221 | 222 | def detect_and_color_splash(model, image_path=None, video_path=None): 223 | assert image_path or video_path 224 | 225 | # Image or video? 226 | if image_path: 227 | # Run model detection and generate the color splash effect 228 | print("Running on {}".format(args.image)) 229 | # Read image 230 | image = skimage.io.imread(args.image) 231 | # Detect objects 232 | r = model.detect([image], verbose=1)[0] 233 | # Color splash 234 | splash = color_splash(image, r['masks']) 235 | # Save output 236 | file_name = "splash_{:%Y%m%dT%H%M%S}.png".format(datetime.datetime.now()) 237 | skimage.io.imsave(file_name, splash) 238 | elif video_path: 239 | import cv2 240 | # Video capture 241 | vcapture = cv2.VideoCapture(video_path) 242 | width = int(vcapture.get(cv2.CAP_PROP_FRAME_WIDTH)) 243 | height = int(vcapture.get(cv2.CAP_PROP_FRAME_HEIGHT)) 244 | fps = vcapture.get(cv2.CAP_PROP_FPS) 245 | 246 | # Define codec and create video writer 247 | file_name = "splash_{:%Y%m%dT%H%M%S}.avi".format(datetime.datetime.now()) 248 | vwriter = cv2.VideoWriter(file_name, 249 | cv2.VideoWriter_fourcc(*'MJPG'), 250 | fps, (width, height)) 251 | 252 | count = 0 253 | success = True 254 | while success: 255 | print("frame: ", count) 256 | # Read next image 257 | success, image = vcapture.read() 258 | if success: 259 | # OpenCV returns images as BGR, convert to RGB 260 | image = image[..., ::-1] 261 | # Detect objects 262 | r = model.detect([image], verbose=0)[0] 263 | # Color splash 264 | splash = color_splash(image, r['masks']) 265 | # RGB -> BGR to save image to video 266 | splash = splash[..., ::-1] 267 | # Add image to video writer 268 | vwriter.write(splash) 269 | count += 1 270 | vwriter.release() 271 | print("Saved to ", file_name) 272 | 273 | 274 | ############################################################ 275 | # Training 276 | ############################################################ 277 | 278 | if __name__ == '__main__': 279 | import argparse 280 | 281 | # Parse command line arguments 282 | parser = argparse.ArgumentParser( 283 | description='Train Mask R-CNN to detect balloons.') 284 | parser.add_argument("command", 285 | metavar="", 286 | help="'train' or 'splash'") 287 | parser.add_argument('--dataset', required=False, 288 | metavar="/path/to/balloon/dataset/", 289 | help='Directory of the Balloon dataset') 290 | parser.add_argument('--weights', required=True, 291 | metavar="/path/to/weights.h5", 292 | help="Path to weights .h5 file or 'coco'") 293 | parser.add_argument('--logs', required=False, 294 | default=DEFAULT_LOGS_DIR, 295 | metavar="/path/to/logs/", 296 | help='Logs and checkpoints directory (default=logs/)') 297 | parser.add_argument('--image', required=False, 298 | metavar="path or URL to image", 299 | help='Image to apply the color splash effect on') 300 | parser.add_argument('--video', required=False, 301 | metavar="path or URL to video", 302 | help='Video to apply the color splash effect on') 303 | args = parser.parse_args() 304 | 305 | # Validate arguments 306 | if args.command == "train": 307 | assert args.dataset, "Argument --dataset is required for training" 308 | elif args.command == "splash": 309 | assert args.image or args.video,\ 310 | "Provide --image or --video to apply color splash" 311 | 312 | print("Weights: ", args.weights) 313 | print("Dataset: ", args.dataset) 314 | print("Logs: ", args.logs) 315 | 316 | # Configurations 317 | if args.command == "train": 318 | config = BalloonConfig() 319 | else: 320 | class InferenceConfig(BalloonConfig): 321 | # Set batch size to 1 since we'll be running inference on 322 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 323 | GPU_COUNT = 1 324 | IMAGES_PER_GPU = 1 325 | config = InferenceConfig() 326 | config.display() 327 | 328 | # Create model 329 | if args.command == "train": 330 | model = modellib.MaskRCNN(mode="training", config=config, 331 | model_dir=args.logs) 332 | else: 333 | model = modellib.MaskRCNN(mode="inference", config=config, 334 | model_dir=args.logs) 335 | 336 | # Select weights file to load 337 | if args.weights.lower() == "coco": 338 | weights_path = COCO_WEIGHTS_PATH 339 | # Download weights file 340 | if not os.path.exists(weights_path): 341 | utils.download_trained_weights(weights_path) 342 | elif args.weights.lower() == "last": 343 | # Find last trained weights 344 | weights_path = model.find_last() 345 | elif args.weights.lower() == "imagenet": 346 | # Start from ImageNet trained weights 347 | weights_path = model.get_imagenet_weights() 348 | else: 349 | weights_path = args.weights 350 | 351 | # Load weights 352 | print("Loading weights ", weights_path) 353 | if args.weights.lower() == "coco": 354 | # Exclude the last layers because they require a matching 355 | # number of classes 356 | model.load_weights(weights_path, by_name=True, exclude=[ 357 | "mrcnn_class_logits", "mrcnn_bbox_fc", 358 | "mrcnn_bbox", "mrcnn_mask"]) 359 | else: 360 | model.load_weights(weights_path, by_name=True) 361 | 362 | # Train or evaluate 363 | if args.command == "train": 364 | train(model) 365 | elif args.command == "splash": 366 | detect_and_color_splash(model, image_path=args.image, 367 | video_path=args.video) 368 | else: 369 | print("'{}' is not recognized. " 370 | "Use 'train' or 'splash'".format(args.command)) 371 | -------------------------------------------------------------------------------- /samples/coco/coco.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Configurations and data loading code for MS COCO. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | ------------------------------------------------------------ 10 | 11 | Usage: import the module (see Jupyter notebooks for examples), or run from 12 | the command line as such: 13 | 14 | # Train a new model starting from pre-trained COCO weights 15 | python3 coco.py train --dataset=/path/to/coco/ --model=coco 16 | 17 | # Train a new model starting from ImageNet weights. Also auto download COCO dataset 18 | python3 coco.py train --dataset=/path/to/coco/ --model=imagenet --download=True 19 | 20 | # Continue training a model that you had trained earlier 21 | python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5 22 | 23 | # Continue training the last model you trained 24 | python3 coco.py train --dataset=/path/to/coco/ --model=last 25 | 26 | # Run COCO evaluatoin on the last model you trained 27 | python3 coco.py evaluate --dataset=/path/to/coco/ --model=last 28 | """ 29 | 30 | import os 31 | import sys 32 | import time 33 | import numpy as np 34 | import imgaug # https://github.com/aleju/imgaug (pip3 install imgaug) 35 | 36 | # Download and install the Python COCO tools from https://github.com/waleedka/coco 37 | # That's a fork from the original https://github.com/pdollar/coco with a bug 38 | # fix for Python 3. 39 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50 40 | # If the PR is merged then use the original repo. 41 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3". 42 | from pycocotools.coco import COCO 43 | from pycocotools.cocoeval import COCOeval 44 | from pycocotools import mask as maskUtils 45 | 46 | import zipfile 47 | import urllib.request 48 | import shutil 49 | 50 | # Root directory of the project 51 | ROOT_DIR = os.path.abspath("../../") 52 | 53 | # Import Mask RCNN 54 | sys.path.append(ROOT_DIR) # To find local version of the library 55 | from mrcnn.config import Config 56 | from mrcnn import model as modellib, utils 57 | 58 | # Path to trained weights file 59 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") 60 | 61 | # Directory to save logs and model checkpoints, if not provided 62 | # through the command line argument --logs 63 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 64 | DEFAULT_DATASET_YEAR = "2014" 65 | 66 | ############################################################ 67 | # Configurations 68 | ############################################################ 69 | 70 | 71 | class CocoConfig(Config): 72 | """Configuration for training on MS COCO. 73 | Derives from the base Config class and overrides values specific 74 | to the COCO dataset. 75 | """ 76 | # Give the configuration a recognizable name 77 | NAME = "coco" 78 | 79 | # We use a GPU with 12GB memory, which can fit two images. 80 | # Adjust down if you use a smaller GPU. 81 | IMAGES_PER_GPU = 2 82 | 83 | # Uncomment to train on 8 GPUs (default is 1) 84 | # GPU_COUNT = 8 85 | 86 | # Number of classes (including background) 87 | NUM_CLASSES = 1 + 80 # COCO has 80 classes 88 | 89 | 90 | ############################################################ 91 | # Dataset 92 | ############################################################ 93 | 94 | class CocoDataset(utils.Dataset): 95 | def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None, 96 | class_map=None, return_coco=False, auto_download=False): 97 | """Load a subset of the COCO dataset. 98 | dataset_dir: The root directory of the COCO dataset. 99 | subset: What to load (train, val, minival, valminusminival) 100 | year: What dataset year to load (2014, 2017) as a string, not an integer 101 | class_ids: If provided, only loads images that have the given classes. 102 | class_map: TODO: Not implemented yet. Supports maping classes from 103 | different datasets to the same class ID. 104 | return_coco: If True, returns the COCO object. 105 | auto_download: Automatically download and unzip MS-COCO images and annotations 106 | """ 107 | 108 | if auto_download is True: 109 | self.auto_download(dataset_dir, subset, year) 110 | 111 | coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year)) 112 | if subset == "minival" or subset == "valminusminival": 113 | subset = "val" 114 | image_dir = "{}/{}{}".format(dataset_dir, subset, year) 115 | 116 | # Load all classes or a subset? 117 | if not class_ids: 118 | # All classes 119 | class_ids = sorted(coco.getCatIds()) 120 | 121 | # All images or a subset? 122 | if class_ids: 123 | image_ids = [] 124 | for id in class_ids: 125 | image_ids.extend(list(coco.getImgIds(catIds=[id]))) 126 | # Remove duplicates 127 | image_ids = list(set(image_ids)) 128 | else: 129 | # All images 130 | image_ids = list(coco.imgs.keys()) 131 | 132 | # Add classes 133 | for i in class_ids: 134 | self.add_class("coco", i, coco.loadCats(i)[0]["name"]) 135 | 136 | # Add images 137 | for i in image_ids: 138 | self.add_image( 139 | "coco", image_id=i, 140 | path=os.path.join(image_dir, coco.imgs[i]['file_name']), 141 | width=coco.imgs[i]["width"], 142 | height=coco.imgs[i]["height"], 143 | annotations=coco.loadAnns(coco.getAnnIds( 144 | imgIds=[i], catIds=class_ids, iscrowd=None))) 145 | if return_coco: 146 | return coco 147 | 148 | def auto_download(self, dataDir, dataType, dataYear): 149 | """Download the COCO dataset/annotations if requested. 150 | dataDir: The root directory of the COCO dataset. 151 | dataType: What to load (train, val, minival, valminusminival) 152 | dataYear: What dataset year to load (2014, 2017) as a string, not an integer 153 | Note: 154 | For 2014, use "train", "val", "minival", or "valminusminival" 155 | For 2017, only "train" and "val" annotations are available 156 | """ 157 | 158 | # Setup paths and file names 159 | if dataType == "minival" or dataType == "valminusminival": 160 | imgDir = "{}/{}{}".format(dataDir, "val", dataYear) 161 | imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear) 162 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear) 163 | else: 164 | imgDir = "{}/{}{}".format(dataDir, dataType, dataYear) 165 | imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear) 166 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear) 167 | # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL) 168 | 169 | # Create main folder if it doesn't exist yet 170 | if not os.path.exists(dataDir): 171 | os.makedirs(dataDir) 172 | 173 | # Download images if not available locally 174 | if not os.path.exists(imgDir): 175 | os.makedirs(imgDir) 176 | print("Downloading images to " + imgZipFile + " ...") 177 | with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out: 178 | shutil.copyfileobj(resp, out) 179 | print("... done downloading.") 180 | print("Unzipping " + imgZipFile) 181 | with zipfile.ZipFile(imgZipFile, "r") as zip_ref: 182 | zip_ref.extractall(dataDir) 183 | print("... done unzipping") 184 | print("Will use images in " + imgDir) 185 | 186 | # Setup annotations data paths 187 | annDir = "{}/annotations".format(dataDir) 188 | if dataType == "minival": 189 | annZipFile = "{}/instances_minival2014.json.zip".format(dataDir) 190 | annFile = "{}/instances_minival2014.json".format(annDir) 191 | annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0" 192 | unZipDir = annDir 193 | elif dataType == "valminusminival": 194 | annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir) 195 | annFile = "{}/instances_valminusminival2014.json".format(annDir) 196 | annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0" 197 | unZipDir = annDir 198 | else: 199 | annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear) 200 | annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear) 201 | annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear) 202 | unZipDir = dataDir 203 | # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL) 204 | 205 | # Download annotations if not available locally 206 | if not os.path.exists(annDir): 207 | os.makedirs(annDir) 208 | if not os.path.exists(annFile): 209 | if not os.path.exists(annZipFile): 210 | print("Downloading zipped annotations to " + annZipFile + " ...") 211 | with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out: 212 | shutil.copyfileobj(resp, out) 213 | print("... done downloading.") 214 | print("Unzipping " + annZipFile) 215 | with zipfile.ZipFile(annZipFile, "r") as zip_ref: 216 | zip_ref.extractall(unZipDir) 217 | print("... done unzipping") 218 | print("Will use annotations in " + annFile) 219 | 220 | def load_mask(self, image_id): 221 | """Load instance masks for the given image. 222 | 223 | Different datasets use different ways to store masks. This 224 | function converts the different mask format to one format 225 | in the form of a bitmap [height, width, instances]. 226 | 227 | Returns: 228 | masks: A bool array of shape [height, width, instance count] with 229 | one mask per instance. 230 | class_ids: a 1D array of class IDs of the instance masks. 231 | """ 232 | # If not a COCO image, delegate to parent class. 233 | image_info = self.image_info[image_id] 234 | if image_info["source"] != "coco": 235 | return super(CocoDataset, self).load_mask(image_id) 236 | 237 | instance_masks = [] 238 | class_ids = [] 239 | annotations = self.image_info[image_id]["annotations"] 240 | # Build mask of shape [height, width, instance_count] and list 241 | # of class IDs that correspond to each channel of the mask. 242 | for annotation in annotations: 243 | class_id = self.map_source_class_id( 244 | "coco.{}".format(annotation['category_id'])) 245 | if class_id: 246 | m = self.annToMask(annotation, image_info["height"], 247 | image_info["width"]) 248 | # Some objects are so small that they're less than 1 pixel area 249 | # and end up rounded out. Skip those objects. 250 | if m.max() < 1: 251 | continue 252 | # Is it a crowd? If so, use a negative class ID. 253 | if annotation['iscrowd']: 254 | # Use negative class ID for crowds 255 | class_id *= -1 256 | # For crowd masks, annToMask() sometimes returns a mask 257 | # smaller than the given dimensions. If so, resize it. 258 | if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]: 259 | m = np.ones([image_info["height"], image_info["width"]], dtype=bool) 260 | instance_masks.append(m) 261 | class_ids.append(class_id) 262 | 263 | # Pack instance masks into an array 264 | if class_ids: 265 | mask = np.stack(instance_masks, axis=2).astype(np.bool) 266 | class_ids = np.array(class_ids, dtype=np.int32) 267 | return mask, class_ids 268 | else: 269 | # Call super class to return an empty mask 270 | return super(CocoDataset, self).load_mask(image_id) 271 | 272 | def image_reference(self, image_id): 273 | """Return a link to the image in the COCO Website.""" 274 | info = self.image_info[image_id] 275 | if info["source"] == "coco": 276 | return "http://cocodataset.org/#explore?id={}".format(info["id"]) 277 | else: 278 | super(CocoDataset, self).image_reference(image_id) 279 | 280 | # The following two functions are from pycocotools with a few changes. 281 | 282 | def annToRLE(self, ann, height, width): 283 | """ 284 | Convert annotation which can be polygons, uncompressed RLE to RLE. 285 | :return: binary mask (numpy 2D array) 286 | """ 287 | segm = ann['segmentation'] 288 | if isinstance(segm, list): 289 | # polygon -- a single object might consist of multiple parts 290 | # we merge all parts into one mask rle code 291 | rles = maskUtils.frPyObjects(segm, height, width) 292 | rle = maskUtils.merge(rles) 293 | elif isinstance(segm['counts'], list): 294 | # uncompressed RLE 295 | rle = maskUtils.frPyObjects(segm, height, width) 296 | else: 297 | # rle 298 | rle = ann['segmentation'] 299 | return rle 300 | 301 | def annToMask(self, ann, height, width): 302 | """ 303 | Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. 304 | :return: binary mask (numpy 2D array) 305 | """ 306 | rle = self.annToRLE(ann, height, width) 307 | m = maskUtils.decode(rle) 308 | return m 309 | 310 | 311 | ############################################################ 312 | # COCO Evaluation 313 | ############################################################ 314 | 315 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks): 316 | """Arrange resutls to match COCO specs in http://cocodataset.org/#format 317 | """ 318 | # If no results, return an empty list 319 | if rois is None: 320 | return [] 321 | 322 | results = [] 323 | for image_id in image_ids: 324 | # Loop through detections 325 | for i in range(rois.shape[0]): 326 | class_id = class_ids[i] 327 | score = scores[i] 328 | bbox = np.around(rois[i], 1) 329 | mask = masks[:, :, i] 330 | 331 | result = { 332 | "image_id": image_id, 333 | "category_id": dataset.get_source_class_id(class_id, "coco"), 334 | "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]], 335 | "score": score, 336 | "segmentation": maskUtils.encode(np.asfortranarray(mask)) 337 | } 338 | results.append(result) 339 | return results 340 | 341 | 342 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None): 343 | """Runs official COCO evaluation. 344 | dataset: A Dataset object with valiadtion data 345 | eval_type: "bbox" or "segm" for bounding box or segmentation evaluation 346 | limit: if not 0, it's the number of images to use for evaluation 347 | """ 348 | # Pick COCO images from the dataset 349 | image_ids = image_ids or dataset.image_ids 350 | 351 | # Limit to a subset 352 | if limit: 353 | image_ids = image_ids[:limit] 354 | 355 | # Get corresponding COCO image IDs. 356 | coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids] 357 | 358 | t_prediction = 0 359 | t_start = time.time() 360 | 361 | results = [] 362 | for i, image_id in enumerate(image_ids): 363 | # Load image 364 | image = dataset.load_image(image_id) 365 | 366 | # Run detection 367 | t = time.time() 368 | r = model.detect([image], verbose=0)[0] 369 | t_prediction += (time.time() - t) 370 | 371 | # Convert results to COCO format 372 | # Cast masks to uint8 because COCO tools errors out on bool 373 | image_results = build_coco_results(dataset, coco_image_ids[i:i + 1], 374 | r["rois"], r["class_ids"], 375 | r["scores"], 376 | r["masks"].astype(np.uint8)) 377 | results.extend(image_results) 378 | 379 | # Load results. This modifies results with additional attributes. 380 | coco_results = coco.loadRes(results) 381 | 382 | # Evaluate 383 | cocoEval = COCOeval(coco, coco_results, eval_type) 384 | cocoEval.params.imgIds = coco_image_ids 385 | cocoEval.evaluate() 386 | cocoEval.accumulate() 387 | cocoEval.summarize() 388 | 389 | print("Prediction time: {}. Average {}/image".format( 390 | t_prediction, t_prediction / len(image_ids))) 391 | print("Total time: ", time.time() - t_start) 392 | 393 | 394 | ############################################################ 395 | # Training 396 | ############################################################ 397 | 398 | 399 | if __name__ == '__main__': 400 | import argparse 401 | 402 | # Parse command line arguments 403 | parser = argparse.ArgumentParser( 404 | description='Train Mask R-CNN on MS COCO.') 405 | parser.add_argument("command", 406 | metavar="", 407 | help="'train' or 'evaluate' on MS COCO") 408 | parser.add_argument('--dataset', required=True, 409 | metavar="/path/to/coco/", 410 | help='Directory of the MS-COCO dataset') 411 | parser.add_argument('--year', required=False, 412 | default=DEFAULT_DATASET_YEAR, 413 | metavar="", 414 | help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)') 415 | parser.add_argument('--model', required=True, 416 | metavar="/path/to/weights.h5", 417 | help="Path to weights .h5 file or 'coco'") 418 | parser.add_argument('--logs', required=False, 419 | default=DEFAULT_LOGS_DIR, 420 | metavar="/path/to/logs/", 421 | help='Logs and checkpoints directory (default=logs/)') 422 | parser.add_argument('--limit', required=False, 423 | default=500, 424 | metavar="", 425 | help='Images to use for evaluation (default=500)') 426 | parser.add_argument('--download', required=False, 427 | default=False, 428 | metavar="", 429 | help='Automatically download and unzip MS-COCO files (default=False)', 430 | type=bool) 431 | args = parser.parse_args() 432 | print("Command: ", args.command) 433 | print("Model: ", args.model) 434 | print("Dataset: ", args.dataset) 435 | print("Year: ", args.year) 436 | print("Logs: ", args.logs) 437 | print("Auto Download: ", args.download) 438 | 439 | # Configurations 440 | if args.command == "train": 441 | config = CocoConfig() 442 | else: 443 | class InferenceConfig(CocoConfig): 444 | # Set batch size to 1 since we'll be running inference on 445 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 446 | GPU_COUNT = 1 447 | IMAGES_PER_GPU = 1 448 | DETECTION_MIN_CONFIDENCE = 0 449 | config = InferenceConfig() 450 | config.display() 451 | 452 | # Create model 453 | if args.command == "train": 454 | model = modellib.MaskRCNN(mode="training", config=config, 455 | model_dir=args.logs) 456 | else: 457 | model = modellib.MaskRCNN(mode="inference", config=config, 458 | model_dir=args.logs) 459 | 460 | # Select weights file to load 461 | if args.model.lower() == "coco": 462 | model_path = COCO_MODEL_PATH 463 | elif args.model.lower() == "last": 464 | # Find last trained weights 465 | model_path = model.find_last() 466 | elif args.model.lower() == "imagenet": 467 | # Start from ImageNet trained weights 468 | model_path = model.get_imagenet_weights() 469 | else: 470 | model_path = args.model 471 | 472 | # Load weights 473 | print("Loading weights ", model_path) 474 | model.load_weights(model_path, by_name=True) 475 | 476 | # Train or evaluate 477 | if args.command == "train": 478 | # Training dataset. Use the training set and 35K from the 479 | # validation set, as as in the Mask RCNN paper. 480 | dataset_train = CocoDataset() 481 | dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download) 482 | if args.year in '2014': 483 | dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download) 484 | dataset_train.prepare() 485 | 486 | # Validation dataset 487 | dataset_val = CocoDataset() 488 | val_type = "val" if args.year in '2017' else "minival" 489 | dataset_val.load_coco(args.dataset, val_type, year=args.year, auto_download=args.download) 490 | dataset_val.prepare() 491 | 492 | # Image Augmentation 493 | # Right/Left flip 50% of the time 494 | augmentation = imgaug.augmenters.Fliplr(0.5) 495 | 496 | # *** This training schedule is an example. Update to your needs *** 497 | 498 | # Training - Stage 1 499 | print("Training network heads") 500 | model.train(dataset_train, dataset_val, 501 | learning_rate=config.LEARNING_RATE, 502 | epochs=40, 503 | layers='heads', 504 | augmentation=augmentation) 505 | 506 | # Training - Stage 2 507 | # Finetune layers from ResNet stage 4 and up 508 | print("Fine tune Resnet stage 4 and up") 509 | model.train(dataset_train, dataset_val, 510 | learning_rate=config.LEARNING_RATE, 511 | epochs=120, 512 | layers='4+', 513 | augmentation=augmentation) 514 | 515 | # Training - Stage 3 516 | # Fine tune all layers 517 | print("Fine tune all layers") 518 | model.train(dataset_train, dataset_val, 519 | learning_rate=config.LEARNING_RATE / 10, 520 | epochs=160, 521 | layers='all', 522 | augmentation=augmentation) 523 | 524 | elif args.command == "evaluate": 525 | # Validation dataset 526 | dataset_val = CocoDataset() 527 | val_type = "val" if args.year in '2017' else "minival" 528 | coco = dataset_val.load_coco(args.dataset, val_type, year=args.year, return_coco=True, auto_download=args.download) 529 | dataset_val.prepare() 530 | print("Running COCO evaluation on {} images.".format(args.limit)) 531 | evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit)) 532 | else: 533 | print("'{}' is not recognized. " 534 | "Use 'train' or 'evaluate'".format(args.command)) 535 | -------------------------------------------------------------------------------- /samples/nucleus/README.md: -------------------------------------------------------------------------------- 1 | # Nuclei Counting and Segmentation 2 | 3 | This sample implements the [2018 Data Science Bowl challenge](https://www.kaggle.com/c/data-science-bowl-2018). 4 | The goal is to segment individual nuclei in microscopy images. 5 | The `nucleus.py` file contains the main parts of the code, and the two Jupyter notebooks 6 | 7 | 8 | ## Command line Usage 9 | Train a new model starting from ImageNet weights using `train` dataset (which is `stage1_train` minus validation set) 10 | ``` 11 | python3 nucleus.py train --dataset=/path/to/dataset --subset=train --weights=imagenet 12 | ``` 13 | 14 | Train a new model starting from specific weights file using the full `stage1_train` dataset 15 | ``` 16 | python3 nucleus.py train --dataset=/path/to/dataset --subset=stage1_train --weights=/path/to/weights.h5 17 | ``` 18 | 19 | Resume training a model that you had trained earlier 20 | ``` 21 | python3 nucleus.py train --dataset=/path/to/dataset --subset=train --weights=last 22 | ``` 23 | 24 | Generate submission file from `stage1_test` images 25 | ``` 26 | python3 nucleus.py detect --dataset=/path/to/dataset --subset=stage1_test --weights= 27 | ``` 28 | 29 | 30 | ## Jupyter notebooks 31 | Two Jupyter notebooks are provided as well: `inspect_nucleus_data.ipynb` and `inspect_nucleus_model.ipynb`. 32 | They explore the dataset, run stats on it, and go through the detection process step by step. 33 | -------------------------------------------------------------------------------- /samples/nucleus/nucleus.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Train on the nuclei segmentation dataset from the 4 | Kaggle 2018 Data Science Bowl 5 | https://www.kaggle.com/c/data-science-bowl-2018/ 6 | 7 | Licensed under the MIT License (see LICENSE for details) 8 | Written by Waleed Abdulla 9 | 10 | ------------------------------------------------------------ 11 | 12 | Usage: import the module (see Jupyter notebooks for examples), or run from 13 | the command line as such: 14 | 15 | # Train a new model starting from ImageNet weights 16 | python3 nucleus.py train --dataset=/path/to/dataset --subset=train --weights=imagenet 17 | 18 | # Train a new model starting from specific weights file 19 | python3 nucleus.py train --dataset=/path/to/dataset --subset=train --weights=/path/to/weights.h5 20 | 21 | # Resume training a model that you had trained earlier 22 | python3 nucleus.py train --dataset=/path/to/dataset --subset=train --weights=last 23 | 24 | # Generate submission file 25 | python3 nucleus.py detect --dataset=/path/to/dataset --subset=train --weights= 26 | """ 27 | 28 | # Set matplotlib backend 29 | # This has to be done before other importa that might 30 | # set it, but only if we're running in script mode 31 | # rather than being imported. 32 | if __name__ == '__main__': 33 | import matplotlib 34 | # Agg backend runs without a display 35 | matplotlib.use('Agg') 36 | import matplotlib.pyplot as plt 37 | 38 | import os 39 | import sys 40 | import json 41 | import datetime 42 | import numpy as np 43 | import skimage.io 44 | from imgaug import augmenters as iaa 45 | 46 | # Root directory of the project 47 | ROOT_DIR = os.path.abspath("../../") 48 | 49 | # Import Mask RCNN 50 | sys.path.append(ROOT_DIR) # To find local version of the library 51 | from mrcnn.config import Config 52 | from mrcnn import utils 53 | from mrcnn import model as modellib 54 | from mrcnn import visualize 55 | 56 | # Path to trained weights file 57 | COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") 58 | 59 | # Directory to save logs and model checkpoints, if not provided 60 | # through the command line argument --logs 61 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 62 | 63 | # Results directory 64 | # Save submission files here 65 | RESULTS_DIR = os.path.join(ROOT_DIR, "results/nucleus/") 66 | 67 | # The dataset doesn't have a standard train/val split, so I picked 68 | # a variety of images to surve as a validation set. 69 | VAL_IMAGE_IDS = [ 70 | "0c2550a23b8a0f29a7575de8c61690d3c31bc897dd5ba66caec201d201a278c2", 71 | "92f31f591929a30e4309ab75185c96ff4314ce0a7ead2ed2c2171897ad1da0c7", 72 | "1e488c42eb1a54a3e8412b1f12cde530f950f238d71078f2ede6a85a02168e1f", 73 | "c901794d1a421d52e5734500c0a2a8ca84651fb93b19cec2f411855e70cae339", 74 | "8e507d58f4c27cd2a82bee79fe27b069befd62a46fdaed20970a95a2ba819c7b", 75 | "60cb718759bff13f81c4055a7679e81326f78b6a193a2d856546097c949b20ff", 76 | "da5f98f2b8a64eee735a398de48ed42cd31bf17a6063db46a9e0783ac13cd844", 77 | "9ebcfaf2322932d464f15b5662cae4d669b2d785b8299556d73fffcae8365d32", 78 | "1b44d22643830cd4f23c9deadb0bd499fb392fb2cd9526d81547d93077d983df", 79 | "97126a9791f0c1176e4563ad679a301dac27c59011f579e808bbd6e9f4cd1034", 80 | "e81c758e1ca177b0942ecad62cf8d321ffc315376135bcbed3df932a6e5b40c0", 81 | "f29fd9c52e04403cd2c7d43b6fe2479292e53b2f61969d25256d2d2aca7c6a81", 82 | "0ea221716cf13710214dcd331a61cea48308c3940df1d28cfc7fd817c83714e1", 83 | "3ab9cab6212fabd723a2c5a1949c2ded19980398b56e6080978e796f45cbbc90", 84 | "ebc18868864ad075548cc1784f4f9a237bb98335f9645ee727dac8332a3e3716", 85 | "bb61fc17daf8bdd4e16fdcf50137a8d7762bec486ede9249d92e511fcb693676", 86 | "e1bcb583985325d0ef5f3ef52957d0371c96d4af767b13e48102bca9d5351a9b", 87 | "947c0d94c8213ac7aaa41c4efc95d854246550298259cf1bb489654d0e969050", 88 | "cbca32daaae36a872a11da4eaff65d1068ff3f154eedc9d3fc0c214a4e5d32bd", 89 | "f4c4db3df4ff0de90f44b027fc2e28c16bf7e5c75ea75b0a9762bbb7ac86e7a3", 90 | "4193474b2f1c72f735b13633b219d9cabdd43c21d9c2bb4dfc4809f104ba4c06", 91 | "f73e37957c74f554be132986f38b6f1d75339f636dfe2b681a0cf3f88d2733af", 92 | "a4c44fc5f5bf213e2be6091ccaed49d8bf039d78f6fbd9c4d7b7428cfcb2eda4", 93 | "cab4875269f44a701c5e58190a1d2f6fcb577ea79d842522dcab20ccb39b7ad2", 94 | "8ecdb93582b2d5270457b36651b62776256ade3aaa2d7432ae65c14f07432d49", 95 | ] 96 | 97 | 98 | ############################################################ 99 | # Configurations 100 | ############################################################ 101 | 102 | class NucleusConfig(Config): 103 | """Configuration for training on the nucleus segmentation dataset.""" 104 | # Give the configuration a recognizable name 105 | NAME = "nucleus" 106 | 107 | # Adjust depending on your GPU memory 108 | IMAGES_PER_GPU = 6 109 | 110 | # Number of classes (including background) 111 | NUM_CLASSES = 1 + 1 # Background + nucleus 112 | 113 | # Number of training and validation steps per epoch 114 | STEPS_PER_EPOCH = (657 - len(VAL_IMAGE_IDS)) // IMAGES_PER_GPU 115 | VALIDATION_STEPS = max(1, len(VAL_IMAGE_IDS) // IMAGES_PER_GPU) 116 | 117 | # Don't exclude based on confidence. Since we have two classes 118 | # then 0.5 is the minimum anyway as it picks between nucleus and BG 119 | DETECTION_MIN_CONFIDENCE = 0 120 | 121 | # Backbone network architecture 122 | # Supported values are: resnet50, resnet101 123 | BACKBONE = "resnet50" 124 | 125 | # Input image resizing 126 | # Random crops of size 512x512 127 | IMAGE_RESIZE_MODE = "crop" 128 | IMAGE_MIN_DIM = 512 129 | IMAGE_MAX_DIM = 512 130 | IMAGE_MIN_SCALE = 2.0 131 | 132 | # Length of square anchor side in pixels 133 | RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128) 134 | 135 | # ROIs kept after non-maximum supression (training and inference) 136 | POST_NMS_ROIS_TRAINING = 1000 137 | POST_NMS_ROIS_INFERENCE = 2000 138 | 139 | # Non-max suppression threshold to filter RPN proposals. 140 | # You can increase this during training to generate more propsals. 141 | RPN_NMS_THRESHOLD = 0.9 142 | 143 | # How many anchors per image to use for RPN training 144 | RPN_TRAIN_ANCHORS_PER_IMAGE = 64 145 | 146 | # Image mean (RGB) 147 | MEAN_PIXEL = np.array([43.53, 39.56, 48.22]) 148 | 149 | # If enabled, resizes instance masks to a smaller size to reduce 150 | # memory load. Recommended when using high-resolution images. 151 | USE_MINI_MASK = True 152 | MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask 153 | 154 | # Number of ROIs per image to feed to classifier/mask heads 155 | # The Mask RCNN paper uses 512 but often the RPN doesn't generate 156 | # enough positive proposals to fill this and keep a positive:negative 157 | # ratio of 1:3. You can increase the number of proposals by adjusting 158 | # the RPN NMS threshold. 159 | TRAIN_ROIS_PER_IMAGE = 128 160 | 161 | # Maximum number of ground truth instances to use in one image 162 | MAX_GT_INSTANCES = 200 163 | 164 | # Max number of final detections per image 165 | DETECTION_MAX_INSTANCES = 400 166 | 167 | 168 | class NucleusInferenceConfig(NucleusConfig): 169 | # Set batch size to 1 to run one image at a time 170 | GPU_COUNT = 1 171 | IMAGES_PER_GPU = 1 172 | # Don't resize imager for inferencing 173 | IMAGE_RESIZE_MODE = "pad64" 174 | # Non-max suppression threshold to filter RPN proposals. 175 | # You can increase this during training to generate more propsals. 176 | RPN_NMS_THRESHOLD = 0.7 177 | 178 | 179 | ############################################################ 180 | # Dataset 181 | ############################################################ 182 | 183 | class NucleusDataset(utils.Dataset): 184 | 185 | def load_nucleus(self, dataset_dir, subset): 186 | """Load a subset of the nuclei dataset. 187 | 188 | dataset_dir: Root directory of the dataset 189 | subset: Subset to load. Either the name of the sub-directory, 190 | such as stage1_train, stage1_test, ...etc. or, one of: 191 | * train: stage1_train excluding validation images 192 | * val: validation images from VAL_IMAGE_IDS 193 | """ 194 | # Add classes. We have one class. 195 | # Naming the dataset nucleus, and the class nucleus 196 | self.add_class("nucleus", 1, "nucleus") 197 | 198 | # Which subset? 199 | # "val": use hard-coded list above 200 | # "train": use data from stage1_train minus the hard-coded list above 201 | # else: use the data from the specified sub-directory 202 | assert subset in ["train", "val", "stage1_train", "stage1_test", "stage2_test"] 203 | subset_dir = "stage1_train" if subset in ["train", "val"] else subset 204 | dataset_dir = os.path.join(dataset_dir, subset_dir) 205 | if subset == "val": 206 | image_ids = VAL_IMAGE_IDS 207 | else: 208 | # Get image ids from directory names 209 | image_ids = next(os.walk(dataset_dir))[1] 210 | if subset == "train": 211 | image_ids = list(set(image_ids) - set(VAL_IMAGE_IDS)) 212 | 213 | # Add images 214 | for image_id in image_ids: 215 | self.add_image( 216 | "nucleus", 217 | image_id=image_id, 218 | path=os.path.join(dataset_dir, image_id, "images/{}.png".format(image_id))) 219 | 220 | def load_mask(self, image_id): 221 | """Generate instance masks for an image. 222 | Returns: 223 | masks: A bool array of shape [height, width, instance count] with 224 | one mask per instance. 225 | class_ids: a 1D array of class IDs of the instance masks. 226 | """ 227 | info = self.image_info[image_id] 228 | # Get mask directory from image path 229 | mask_dir = os.path.join(os.path.dirname(os.path.dirname(info['path'])), "masks") 230 | 231 | # Read mask files from .png image 232 | mask = [] 233 | for f in next(os.walk(mask_dir))[2]: 234 | if f.endswith(".png"): 235 | m = skimage.io.imread(os.path.join(mask_dir, f)).astype(np.bool) 236 | mask.append(m) 237 | mask = np.stack(mask, axis=-1) 238 | # Return mask, and array of class IDs of each instance. Since we have 239 | # one class ID, we return an array of ones 240 | return mask, np.ones([mask.shape[-1]], dtype=np.int32) 241 | 242 | def image_reference(self, image_id): 243 | """Return the path of the image.""" 244 | info = self.image_info[image_id] 245 | if info["source"] == "nucleus": 246 | return info["id"] 247 | else: 248 | super(self.__class__, self).image_reference(image_id) 249 | 250 | 251 | ############################################################ 252 | # Training 253 | ############################################################ 254 | 255 | def train(model, dataset_dir, subset): 256 | """Train the model.""" 257 | # Training dataset. 258 | dataset_train = NucleusDataset() 259 | dataset_train.load_nucleus(dataset_dir, subset) 260 | dataset_train.prepare() 261 | 262 | # Validation dataset 263 | dataset_val = NucleusDataset() 264 | dataset_val.load_nucleus(dataset_dir, "val") 265 | dataset_val.prepare() 266 | 267 | # Image augmentation 268 | # http://imgaug.readthedocs.io/en/latest/source/augmenters.html 269 | augmentation = iaa.SomeOf((0, 2), [ 270 | iaa.Fliplr(0.5), 271 | iaa.Flipud(0.5), 272 | iaa.OneOf([iaa.Affine(rotate=90), 273 | iaa.Affine(rotate=180), 274 | iaa.Affine(rotate=270)]), 275 | iaa.Multiply((0.8, 1.5)), 276 | iaa.GaussianBlur(sigma=(0.0, 5.0)) 277 | ]) 278 | 279 | # *** This training schedule is an example. Update to your needs *** 280 | 281 | # If starting from imagenet, train heads only for a bit 282 | # since they have random weights 283 | print("Train network heads") 284 | model.train(dataset_train, dataset_val, 285 | learning_rate=config.LEARNING_RATE, 286 | epochs=20, 287 | augmentation=augmentation, 288 | layers='heads') 289 | 290 | print("Train all layers") 291 | model.train(dataset_train, dataset_val, 292 | learning_rate=config.LEARNING_RATE, 293 | epochs=40, 294 | augmentation=augmentation, 295 | layers='all') 296 | 297 | 298 | ############################################################ 299 | # RLE Encoding 300 | ############################################################ 301 | 302 | def rle_encode(mask): 303 | """Encodes a mask in Run Length Encoding (RLE). 304 | Returns a string of space-separated values. 305 | """ 306 | assert mask.ndim == 2, "Mask must be of shape [Height, Width]" 307 | # Flatten it column wise 308 | m = mask.T.flatten() 309 | # Compute gradient. Equals 1 or -1 at transition points 310 | g = np.diff(np.concatenate([[0], m, [0]]), n=1) 311 | # 1-based indicies of transition points (where gradient != 0) 312 | rle = np.where(g != 0)[0].reshape([-1, 2]) + 1 313 | # Convert second index in each pair to lenth 314 | rle[:, 1] = rle[:, 1] - rle[:, 0] 315 | return " ".join(map(str, rle.flatten())) 316 | 317 | 318 | def rle_decode(rle, shape): 319 | """Decodes an RLE encoded list of space separated 320 | numbers and returns a binary mask.""" 321 | rle = list(map(int, rle.split())) 322 | rle = np.array(rle, dtype=np.int32).reshape([-1, 2]) 323 | rle[:, 1] += rle[:, 0] 324 | rle -= 1 325 | mask = np.zeros([shape[0] * shape[1]], np.bool) 326 | for s, e in rle: 327 | assert 0 <= s < mask.shape[0] 328 | assert 1 <= e <= mask.shape[0], "shape: {} s {} e {}".format(shape, s, e) 329 | mask[s:e] = 1 330 | # Reshape and transpose 331 | mask = mask.reshape([shape[1], shape[0]]).T 332 | return mask 333 | 334 | 335 | def mask_to_rle(image_id, mask, scores): 336 | "Encodes instance masks to submission format." 337 | assert mask.ndim == 3, "Mask must be [H, W, count]" 338 | # If mask is empty, return line with image ID only 339 | if mask.shape[-1] == 0: 340 | return "{},".format(image_id) 341 | # Remove mask overlaps 342 | # Multiply each instance mask by its score order 343 | # then take the maximum across the last dimension 344 | order = np.argsort(scores)[::-1] + 1 # 1-based descending 345 | mask = np.max(mask * np.reshape(order, [1, 1, -1]), -1) 346 | # Loop over instance masks 347 | lines = [] 348 | for o in order: 349 | m = np.where(mask == o, 1, 0) 350 | # Skip if empty 351 | if m.sum() == 0.0: 352 | continue 353 | rle = rle_encode(m) 354 | lines.append("{}, {}".format(image_id, rle)) 355 | return "\n".join(lines) 356 | 357 | 358 | ############################################################ 359 | # Detection 360 | ############################################################ 361 | 362 | def detect(model, dataset_dir, subset): 363 | """Run detection on images in the given directory.""" 364 | print("Running on {}".format(dataset_dir)) 365 | 366 | # Create directory 367 | if not os.path.exists(RESULTS_DIR): 368 | os.makedirs(RESULTS_DIR) 369 | submit_dir = "submit_{:%Y%m%dT%H%M%S}".format(datetime.datetime.now()) 370 | submit_dir = os.path.join(RESULTS_DIR, submit_dir) 371 | os.makedirs(submit_dir) 372 | 373 | # Read dataset 374 | dataset = NucleusDataset() 375 | dataset.load_nucleus(dataset_dir, subset) 376 | dataset.prepare() 377 | # Load over images 378 | submission = [] 379 | for image_id in dataset.image_ids: 380 | # Load image and run detection 381 | image = dataset.load_image(image_id) 382 | # Detect objects 383 | r = model.detect([image], verbose=0)[0] 384 | # Encode image to RLE. Returns a string of multiple lines 385 | source_id = dataset.image_info[image_id]["id"] 386 | rle = mask_to_rle(source_id, r["masks"], r["scores"]) 387 | submission.append(rle) 388 | # Save image with masks 389 | visualize.display_instances( 390 | image, r['rois'], r['masks'], r['class_ids'], 391 | dataset.class_names, r['scores'], 392 | show_bbox=False, show_mask=False, 393 | title="Predictions") 394 | plt.savefig("{}/{}.png".format(submit_dir, dataset.image_info[image_id]["id"])) 395 | 396 | # Save to csv file 397 | submission = "ImageId,EncodedPixels\n" + "\n".join(submission) 398 | file_path = os.path.join(submit_dir, "submit.csv") 399 | with open(file_path, "w") as f: 400 | f.write(submission) 401 | print("Saved to ", submit_dir) 402 | 403 | 404 | ############################################################ 405 | # Command Line 406 | ############################################################ 407 | 408 | if __name__ == '__main__': 409 | import argparse 410 | 411 | # Parse command line arguments 412 | parser = argparse.ArgumentParser( 413 | description='Mask R-CNN for nuclei counting and segmentation') 414 | parser.add_argument("command", 415 | metavar="", 416 | help="'train' or 'detect'") 417 | parser.add_argument('--dataset', required=False, 418 | metavar="/path/to/dataset/", 419 | help='Root directory of the dataset') 420 | parser.add_argument('--weights', required=True, 421 | metavar="/path/to/weights.h5", 422 | help="Path to weights .h5 file or 'coco'") 423 | parser.add_argument('--logs', required=False, 424 | default=DEFAULT_LOGS_DIR, 425 | metavar="/path/to/logs/", 426 | help='Logs and checkpoints directory (default=logs/)') 427 | parser.add_argument('--subset', required=False, 428 | metavar="Dataset sub-directory", 429 | help="Subset of dataset to run prediction on") 430 | args = parser.parse_args() 431 | 432 | # Validate arguments 433 | if args.command == "train": 434 | assert args.dataset, "Argument --dataset is required for training" 435 | elif args.command == "detect": 436 | assert args.subset, "Provide --subset to run prediction on" 437 | 438 | print("Weights: ", args.weights) 439 | print("Dataset: ", args.dataset) 440 | if args.subset: 441 | print("Subset: ", args.subset) 442 | print("Logs: ", args.logs) 443 | 444 | # Configurations 445 | if args.command == "train": 446 | config = NucleusConfig() 447 | else: 448 | config = NucleusInferenceConfig() 449 | config.display() 450 | 451 | # Create model 452 | if args.command == "train": 453 | model = modellib.MaskRCNN(mode="training", config=config, 454 | model_dir=args.logs) 455 | else: 456 | model = modellib.MaskRCNN(mode="inference", config=config, 457 | model_dir=args.logs) 458 | 459 | # Select weights file to load 460 | if args.weights.lower() == "coco": 461 | weights_path = COCO_WEIGHTS_PATH 462 | # Download weights file 463 | if not os.path.exists(weights_path): 464 | utils.download_trained_weights(weights_path) 465 | elif args.weights.lower() == "last": 466 | # Find last trained weights 467 | weights_path = model.find_last() 468 | elif args.weights.lower() == "imagenet": 469 | # Start from ImageNet trained weights 470 | weights_path = model.get_imagenet_weights() 471 | else: 472 | weights_path = args.weights 473 | 474 | # Load weights 475 | print("Loading weights ", weights_path) 476 | if args.weights.lower() == "coco": 477 | # Exclude the last layers because they require a matching 478 | # number of classes 479 | model.load_weights(weights_path, by_name=True, exclude=[ 480 | "mrcnn_class_logits", "mrcnn_bbox_fc", 481 | "mrcnn_bbox", "mrcnn_mask"]) 482 | else: 483 | model.load_weights(weights_path, by_name=True) 484 | 485 | # Train or evaluate 486 | if args.command == "train": 487 | train(model, args.dataset, args.subset) 488 | elif args.command == "detect": 489 | detect(model, args.dataset, args.subset) 490 | else: 491 | print("'{}' is not recognized. " 492 | "Use 'train' or 'detect'".format(args.command)) 493 | -------------------------------------------------------------------------------- /samples/shapes/shapes.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Configurations and data loading code for the synthetic Shapes dataset. 4 | This is a duplicate of the code in the noteobook train_shapes.ipynb for easy 5 | import into other notebooks, such as inspect_model.ipynb. 6 | 7 | Copyright (c) 2017 Matterport, Inc. 8 | Licensed under the MIT License (see LICENSE for details) 9 | Written by Waleed Abdulla 10 | """ 11 | 12 | import os 13 | import sys 14 | import math 15 | import random 16 | import numpy as np 17 | import cv2 18 | 19 | # Root directory of the project 20 | ROOT_DIR = os.path.abspath("../../") 21 | 22 | # Import Mask RCNN 23 | sys.path.append(ROOT_DIR) # To find local version of the library 24 | from mrcnn.config import Config 25 | from mrcnn import utils 26 | 27 | 28 | class ShapesConfig(Config): 29 | """Configuration for training on the toy shapes dataset. 30 | Derives from the base Config class and overrides values specific 31 | to the toy shapes dataset. 32 | """ 33 | # Give the configuration a recognizable name 34 | NAME = "shapes" 35 | 36 | # Train on 1 GPU and 8 images per GPU. We can put multiple images on each 37 | # GPU because the images are small. Batch size is 8 (GPUs * images/GPU). 38 | GPU_COUNT = 1 39 | IMAGES_PER_GPU = 8 40 | 41 | # Number of classes (including background) 42 | NUM_CLASSES = 1 + 3 # background + 3 shapes 43 | 44 | # Use small images for faster training. Set the limits of the small side 45 | # the large side, and that determines the image shape. 46 | IMAGE_MIN_DIM = 128 47 | IMAGE_MAX_DIM = 128 48 | 49 | # Use smaller anchors because our image and objects are small 50 | RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128) # anchor side in pixels 51 | 52 | # Reduce training ROIs per image because the images are small and have 53 | # few objects. Aim to allow ROI sampling to pick 33% positive ROIs. 54 | TRAIN_ROIS_PER_IMAGE = 32 55 | 56 | # Use a small epoch since the data is simple 57 | STEPS_PER_EPOCH = 100 58 | 59 | # use small validation steps since the epoch is small 60 | VALIDATION_STEPS = 5 61 | 62 | 63 | class ShapesDataset(utils.Dataset): 64 | """Generates the shapes synthetic dataset. The dataset consists of simple 65 | shapes (triangles, squares, circles) placed randomly on a blank surface. 66 | The images are generated on the fly. No file access required. 67 | """ 68 | 69 | def load_shapes(self, count, height, width): 70 | """Generate the requested number of synthetic images. 71 | count: number of images to generate. 72 | height, width: the size of the generated images. 73 | """ 74 | # Add classes 75 | self.add_class("shapes", 1, "square") 76 | self.add_class("shapes", 2, "circle") 77 | self.add_class("shapes", 3, "triangle") 78 | 79 | # Add images 80 | # Generate random specifications of images (i.e. color and 81 | # list of shapes sizes and locations). This is more compact than 82 | # actual images. Images are generated on the fly in load_image(). 83 | for i in range(count): 84 | bg_color, shapes = self.random_image(height, width) 85 | self.add_image("shapes", image_id=i, path=None, 86 | width=width, height=height, 87 | bg_color=bg_color, shapes=shapes) 88 | 89 | def load_image(self, image_id): 90 | """Generate an image from the specs of the given image ID. 91 | Typically this function loads the image from a file, but 92 | in this case it generates the image on the fly from the 93 | specs in image_info. 94 | """ 95 | info = self.image_info[image_id] 96 | bg_color = np.array(info['bg_color']).reshape([1, 1, 3]) 97 | image = np.ones([info['height'], info['width'], 3], dtype=np.uint8) 98 | image = image * bg_color.astype(np.uint8) 99 | for shape, color, dims in info['shapes']: 100 | image = self.draw_shape(image, shape, dims, color) 101 | return image 102 | 103 | def image_reference(self, image_id): 104 | """Return the shapes data of the image.""" 105 | info = self.image_info[image_id] 106 | if info["source"] == "shapes": 107 | return info["shapes"] 108 | else: 109 | super(self.__class__).image_reference(self, image_id) 110 | 111 | def load_mask(self, image_id): 112 | """Generate instance masks for shapes of the given image ID. 113 | """ 114 | info = self.image_info[image_id] 115 | shapes = info['shapes'] 116 | count = len(shapes) 117 | mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8) 118 | for i, (shape, _, dims) in enumerate(info['shapes']): 119 | mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(), 120 | shape, dims, 1) 121 | # Handle occlusions 122 | occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8) 123 | for i in range(count - 2, -1, -1): 124 | mask[:, :, i] = mask[:, :, i] * occlusion 125 | occlusion = np.logical_and( 126 | occlusion, np.logical_not(mask[:, :, i])) 127 | # Map class names to class IDs. 128 | class_ids = np.array([self.class_names.index(s[0]) for s in shapes]) 129 | return mask, class_ids.astype(np.int32) 130 | 131 | def draw_shape(self, image, shape, dims, color): 132 | """Draws a shape from the given specs.""" 133 | # Get the center x, y and the size s 134 | x, y, s = dims 135 | if shape == 'square': 136 | image = cv2.rectangle(image, (x - s, y - s), 137 | (x + s, y + s), color, -1) 138 | elif shape == "circle": 139 | image = cv2.circle(image, (x, y), s, color, -1) 140 | elif shape == "triangle": 141 | points = np.array([[(x, y - s), 142 | (x - s / math.sin(math.radians(60)), y + s), 143 | (x + s / math.sin(math.radians(60)), y + s), 144 | ]], dtype=np.int32) 145 | image = cv2.fillPoly(image, points, color) 146 | return image 147 | 148 | def random_shape(self, height, width): 149 | """Generates specifications of a random shape that lies within 150 | the given height and width boundaries. 151 | Returns a tuple of three valus: 152 | * The shape name (square, circle, ...) 153 | * Shape color: a tuple of 3 values, RGB. 154 | * Shape dimensions: A tuple of values that define the shape size 155 | and location. Differs per shape type. 156 | """ 157 | # Shape 158 | shape = random.choice(["square", "circle", "triangle"]) 159 | # Color 160 | color = tuple([random.randint(0, 255) for _ in range(3)]) 161 | # Center x, y 162 | buffer = 20 163 | y = random.randint(buffer, height - buffer - 1) 164 | x = random.randint(buffer, width - buffer - 1) 165 | # Size 166 | s = random.randint(buffer, height // 4) 167 | return shape, color, (x, y, s) 168 | 169 | def random_image(self, height, width): 170 | """Creates random specifications of an image with multiple shapes. 171 | Returns the background color of the image and a list of shape 172 | specifications that can be used to draw the image. 173 | """ 174 | # Pick random background color 175 | bg_color = np.array([random.randint(0, 255) for _ in range(3)]) 176 | # Generate a few random shapes and record their 177 | # bounding boxes 178 | shapes = [] 179 | boxes = [] 180 | N = random.randint(1, 4) 181 | for _ in range(N): 182 | shape, color, dims = self.random_shape(height, width) 183 | shapes.append((shape, color, dims)) 184 | x, y, s = dims 185 | boxes.append([y - s, x - s, y + s, x + s]) 186 | # Apply non-max suppression wit 0.3 threshold to avoid 187 | # shapes covering each other 188 | keep_ixs = utils.non_max_suppression( 189 | np.array(boxes), np.arange(N), 0.3) 190 | shapes = [s for i, s in enumerate(shapes) if i in keep_ixs] 191 | return bg_color, shapes 192 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | license-file = LICENSE 4 | requirements-file = requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | The build/compilations setup 3 | 4 | >> pip install -r requirements.txt 5 | >> python setup.py install 6 | """ 7 | import pip 8 | import logging 9 | import pkg_resources 10 | try: 11 | from setuptools import setup 12 | except ImportError: 13 | from distutils.core import setup 14 | 15 | 16 | def _parse_requirements(file_path): 17 | pip_ver = pkg_resources.get_distribution('pip').version 18 | pip_version = list(map(int, pip_ver.split('.')[:2])) 19 | if pip_version >= [6, 0]: 20 | raw = pip.req.parse_requirements(file_path, 21 | session=pip.download.PipSession()) 22 | else: 23 | raw = pip.req.parse_requirements(file_path) 24 | return [str(i.req) for i in raw] 25 | 26 | 27 | # parse_requirements() returns generator of pip.req.InstallRequirement objects 28 | try: 29 | install_reqs = _parse_requirements("requirements.txt") 30 | except Exception: 31 | logging.warning('Fail load requirements file, so using default ones.') 32 | install_reqs = [] 33 | 34 | setup( 35 | name='mask-rcnn', 36 | version='2.1', 37 | url='https://github.com/matterport/Mask_RCNN', 38 | author='Matterport', 39 | author_email='waleed.abdulla@gmail.com', 40 | license='MIT', 41 | description='Mask R-CNN for object detection and instance segmentation', 42 | packages=["mrcnn"], 43 | install_requires=install_reqs, 44 | include_package_data=True, 45 | python_requires='>=3.4', 46 | long_description="""This is an implementation of Mask R-CNN on Python 3, Keras, and TensorFlow. 47 | The model generates bounding boxes and segmentation masks for each instance of an object in the image. 48 | It's based on Feature Pyramid Network (FPN) and a ResNet101 backbone.""", 49 | classifiers=[ 50 | "Development Status :: 5 - Production/Stable", 51 | "Environment :: Console", 52 | "Intended Audience :: Developers", 53 | "Intended Audience :: Information Technology", 54 | "Intended Audience :: Education", 55 | "Intended Audience :: Science/Research", 56 | "License :: OSI Approved :: MIT License", 57 | "Natural Language :: English", 58 | "Operating System :: OS Independent", 59 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 60 | "Topic :: Scientific/Engineering :: Image Recognition", 61 | "Topic :: Scientific/Engineering :: Visualization", 62 | "Topic :: Scientific/Engineering :: Image Segmentation", 63 | 'Programming Language :: Python :: 3.4', 64 | 'Programming Language :: Python :: 3.5', 65 | 'Programming Language :: Python :: 3.6', 66 | ], 67 | keywords="image instance segmentation object detection mask rcnn r-cnn tensorflow keras", 68 | ) 69 | -------------------------------------------------------------------------------- /video_demo.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from visualize_cv2 import model, display_instances, class_names 3 | import sys 4 | 5 | args = sys.argv 6 | if(len(args) < 2): 7 | print("run command: python video_demo.py 0 or video file name") 8 | sys.exit(0) 9 | name = args[1] 10 | if(len(args[1]) == 1): 11 | name = int(args[1]) 12 | 13 | stream = cv2.VideoCapture(name) 14 | 15 | while True: 16 | ret , frame = stream.read() 17 | if not ret: 18 | print("unable to fetch frame") 19 | break 20 | results = model.detect([frame], verbose=1) 21 | 22 | # Visualize results 23 | r = results[0] 24 | masked_image = display_instances(frame, r['rois'], r['masks'], r['class_ids'], 25 | class_names, r['scores']) 26 | cv2.imshow("masked_image",masked_image) 27 | if(cv2.waitKey(1) & 0xFF == ord('q')): 28 | break 29 | stream.release() 30 | cv2.destroyWindow("masked_image") -------------------------------------------------------------------------------- /visualize_cv2.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import os 4 | import sys 5 | 6 | from mrcnn import utils 7 | from mrcnn import model as modellib 8 | 9 | ROOT_DIR = os.path.abspath("./") 10 | MODEL_DIR = os.path.join(ROOT_DIR, "logs") 11 | sys.path.append(os.path.join(ROOT_DIR,"samples/coco/")) 12 | import coco 13 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") 14 | if not os.path.exists(COCO_MODEL_PATH): 15 | utils.download_trained_weights(COCO_MODEL_PATH) 16 | 17 | 18 | class InferenceConfig(coco.CocoConfig): 19 | GPU_COUNT = 1 20 | IMAGES_PER_GPU = 1 21 | 22 | 23 | config = InferenceConfig() 24 | config.display() 25 | 26 | model = modellib.MaskRCNN( 27 | mode="inference", model_dir=MODEL_DIR, config=config 28 | ) 29 | model.load_weights(COCO_MODEL_PATH, by_name=True) 30 | class_names = [ 31 | 'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 32 | 'bus', 'train', 'truck', 'boat', 'traffic light', 33 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 34 | 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 35 | 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 36 | 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 37 | 'kite', 'baseball bat', 'baseball glove', 'skateboard', 38 | 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 39 | 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 40 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 41 | 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 42 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 43 | 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 44 | 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 45 | 'teddy bear', 'hair drier', 'toothbrush' 46 | ] 47 | 48 | 49 | def random_colors(N): 50 | np.random.seed(1) 51 | colors = [tuple(255 * np.random.rand(3)) for _ in range(N)] 52 | return colors 53 | 54 | 55 | colors = random_colors(len(class_names)) 56 | class_dict = { 57 | name: color for name, color in zip(class_names, colors) 58 | } 59 | 60 | 61 | def apply_mask(image, mask, color, alpha=0.5): 62 | """apply mask to image""" 63 | for n, c in enumerate(color): 64 | image[:, :, n] = np.where( 65 | mask == 1, 66 | image[:, :, n] * (1 - alpha) + alpha * c, 67 | image[:, :, n] 68 | ) 69 | return image 70 | 71 | 72 | def display_instances(image, boxes, masks, ids, names, scores): 73 | """ 74 | take the image and results and apply the mask, box, and Label 75 | """ 76 | n_instances = boxes.shape[0] 77 | 78 | if not n_instances: 79 | print('NO INSTANCES TO DISPLAY') 80 | else: 81 | assert boxes.shape[0] == masks.shape[-1] == ids.shape[0] 82 | 83 | for i in range(n_instances): 84 | if not np.any(boxes[i]): 85 | continue 86 | 87 | y1, x1, y2, x2 = boxes[i] 88 | label = names[ids[i]] 89 | color = class_dict[label] 90 | score = scores[i] if scores is not None else None 91 | caption = '{} {:.2f}'.format(label, score) if score else label 92 | mask = masks[:, :, i] 93 | 94 | image = apply_mask(image, mask, color) 95 | image = cv2.rectangle(image, (x1, y1), (x2, y2), color, 2) 96 | image = cv2.putText( 97 | image, caption, (x1, y1), cv2.FONT_HERSHEY_COMPLEX, 0.7, color, 2 98 | ) 99 | 100 | return image 101 | 102 | 103 | if __name__ == '__main__': 104 | """ 105 | test everything 106 | """ 107 | 108 | capture = cv2.VideoCapture(0) 109 | 110 | # these 2 lines can be removed if you dont have a 1080p camera. 111 | capture.set(cv2.CAP_PROP_FRAME_WIDTH, 1920) 112 | capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080) 113 | 114 | while True: 115 | ret, frame = capture.read() 116 | results = model.detect([frame], verbose=0) 117 | r = results[0] 118 | frame = display_instances( 119 | frame, r['rois'], r['masks'], r['class_ids'], class_names, r['scores'] 120 | ) 121 | cv2.imshow('frame', frame) 122 | if cv2.waitKey(1) & 0xFF == ord('q'): 123 | break 124 | 125 | capture.release() 126 | cv2.destroyAllWindows() --------------------------------------------------------------------------------