├── .gitignore
├── Anaconda Setup
    ├── README.MD
    ├── condaenv.em7g437d.requirements.txt
    ├── mask_rcnn.yml
    └── requirments.txt
├── LICENSE
├── MANIFEST.in
├── README.md
├── assets
    ├── 4k_video.gif
    ├── balloon_color_splash.gif
    ├── detection_activations.png
    ├── detection_anchors.png
    ├── detection_final.png
    ├── detection_histograms.png
    ├── detection_masks.png
    ├── detection_refinement.png
    ├── detection_tensorboard.png
    ├── images_to_osm.png
    ├── mapping_challenge.png
    ├── nucleus_segmentation.png
    ├── project_3dbuildings.png
    ├── project_grass_gis.png
    ├── project_ice_wedge_polygons.png
    ├── project_shiny1.jpg
    ├── project_usiigaci1.gif
    ├── project_usiigaci2.gif
    └── street.png
├── coco
    ├── coco.py
    ├── inspect_data.ipynb
    ├── inspect_model.ipynb
    └── inspect_weights.ipynb
├── demo.ipynb
├── images
    ├── 1045023827_4ec3e8ba5c_z.jpg
    ├── 12283150_12d37e6389_z.jpg
    ├── 2383514521_1fc8d7b0de_z.jpg
    ├── 2502287818_41e4b0c4fb_z.jpg
    ├── 2516944023_d00345997d_z.jpg
    ├── 25691390_f9944f61b5_z.jpg
    ├── 262985539_1709e54576_z.jpg
    ├── 3132016470_c27baa00e8_z.jpg
    ├── 3627527276_6fe8cd9bfe_z.jpg
    ├── 3651581213_f81963d1dd_z.jpg
    ├── 3800883468_12af3c0b50_z.jpg
    ├── 3862500489_6fd195d183_z.jpg
    ├── 3878153025_8fde829928_z.jpg
    ├── 4410436637_7b0ca36ee7_z.jpg
    ├── 4782628554_668bc31826_z.jpg
    ├── 5951960966_d4e1cda5d0_z.jpg
    ├── 6584515005_fce9cec486_z.jpg
    ├── 6821351586_59aa0dc110_z.jpg
    ├── 7581246086_cf7bbb7255_z.jpg
    ├── 7933423348_c30bd9bd4e_z.jpg
    ├── 8053677163_d4c8f416be_z.jpg
    ├── 8239308689_efa6c11b08_z.jpg
    ├── 8433365521_9252889f9a_z.jpg
    ├── 8512296263_5fc5458e20_z.jpg
    ├── 8699757338_c3941051b6_z.jpg
    ├── 8734543718_37f6b8bd45_z.jpg
    ├── 8829708882_48f263491e_z.jpg
    ├── 9118579087_f9ffa19e63_z.jpg
    └── 9247489789_132c0d534a_z.jpg
├── mrcnn
    ├── __init__.py
    ├── config.py
    ├── model.py
    ├── parallel_model.py
    ├── utils.py
    └── visualize.py
├── requirements.txt
├── samples
    ├── README.md
    ├── balloon
    │   ├── README.md
    │   ├── balloon.py
    │   ├── inspect_balloon_data.ipynb
    │   └── inspect_balloon_model.ipynb
    ├── coco
    │   ├── coco.py
    │   ├── inspect_data.ipynb
    │   ├── inspect_model.ipynb
    │   └── inspect_weights.ipynb
    ├── demo.ipynb
    ├── nucleus
    │   ├── README.md
    │   ├── inspect_nucleus_data.ipynb
    │   ├── inspect_nucleus_model.ipynb
    │   └── nucleus.py
    └── shapes
    │   ├── shapes.py
    │   └── train_shapes.ipynb
├── setup.cfg
├── setup.py
├── video_demo.py
└── visualize_cv2.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Data files and directories common in repo root
 2 | datasets/
 3 | logs/
 4 | *.h5
 5 | results/
 6 | temp/
 7 | test/
 8 | 
 9 | # Byte-compiled / optimized / DLL files
10 | __pycache__/
11 | *.py[cod]
12 | *$py.class
13 | 
14 | # Distribution / packaging
15 | .Python
16 | env/
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | wheels/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # VS Studio Code
38 | .vscode
39 | 
40 | # PyCharm
41 | .idea/
42 | 
43 | # Dropbox
44 | .dropbox.attr
45 | 
46 | # Jupyter Notebook
47 | .ipynb_checkpoints
48 | 
49 | # pyenv
50 | .python-version
51 | 
52 | # dotenv
53 | .env
54 | 
55 | # virtualenv
56 | .venv
57 | venv/
58 | ENV/
59 | 


--------------------------------------------------------------------------------
/Anaconda Setup/README.MD:
--------------------------------------------------------------------------------
 1 | # Installation Guide for Anaconda
 2 | Hey guys and welcome back. This lecture is going to be a quick lesson on how to install Anaconda for Mask RCNN. Lets get started. 
 3 | First we are going to go to Google type in Anaconda download and click on the first link.
 4 | ## Installing Anaconda 
 5 | 
 6 | * Select Anaconda for windows and download Python 3.7
 7 |   * And then you can save it  anywhere
 8 | * Once it has downloaded you can open up the installer and follow the instructions.
 9 |   *	Everything can be left default except for advance options where we recommend that you Add Anaconda to my PATH Environmental variable 
10 |   *	Click next and let it install		
11 | * Now open Anaconda prompt or command from windows start menu.
12 | 
13 | ## Creating the Environment 
14 | 
15 | Now type in this command :
16 | 
17 |  ```conda env create –f file_name.yml```.
18 |  
19 | which will create a new conda environment using .yml file.  This .yml file you will find in the github repo attached to this video. Ensure also that you are in the correct folder before we 
20 |   *	Press enter to execute.
21 | * Now to activate this new conda environment we execute the following command:
22 | 
23 | ```conda activate mask_rcnn```
24 |   *	You should see your new environment activate showing mask_rcnn in brackets.
25 |  
26 | ## Installing the Dependencies
27 | 
28 | * Lastly we shall install all the other dependencies using the pip command.
29 |   *	 ```pip install –r requirments.txt```
30 | * To test if everything is working we can type in the command
31 |   *	```Python demo.py```
32 | You should get the response that is displayed over if everything was imported correctly. 
33 | 
34 | 
35 | Okay that it is from me. In the next lecture I will show you how to setup and run Mask RCNN. 
36 | 
37 | 


--------------------------------------------------------------------------------
/Anaconda Setup/condaenv.em7g437d.requirements.txt:
--------------------------------------------------------------------------------
1 | imgaug==0.2.8
2 | opencv-python==4.0.0.21
3 | pycocotools==2.0


--------------------------------------------------------------------------------
/Anaconda Setup/mask_rcnn.yml:
--------------------------------------------------------------------------------
  1 | name: my_personal_lab_mask
  2 | channels:
  3 |   - conda-forge
  4 |   - defaults
  5 | dependencies:
  6 |   - _tflow_select=2.3.0=mkl
  7 |   - absl-py=0.7.0=py36_0
  8 |   - astor=0.7.1=py36_0
  9 |   - attrs=19.1.0=py36_1
 10 |   - backcall=0.1.0=py36_0
 11 |   - blas=1.0=mkl
 12 |   - bleach=3.1.0=py36_0
 13 |   - ca-certificates=2019.3.9=hecc5488_0
 14 |   - certifi=2019.3.9=py36_0
 15 |   - cloudpickle=0.8.0=py36_0
 16 |   - colorama=0.4.1=py36_0
 17 |   - cycler=0.10.0=py36h009560c_0
 18 |   - cython=0.29.6=py36h6538335_0
 19 |   - cytoolz=0.9.0.1=py36hfa6e2cd_1
 20 |   - dask-core=1.1.4=py36_1
 21 |   - decorator=4.4.0=py36_1
 22 |   - defusedxml=0.5.0=py36_1
 23 |   - entrypoints=0.3=py36_0
 24 |   - freetype=2.9.1=ha9979f8_1
 25 |   - gast=0.2.2=py36_0
 26 |   - geos=3.7.1=h33f27b4_0
 27 |   - grpcio=1.16.1=py36h351948d_1
 28 |   - h5py=2.8.0=py36hf7173ca_2
 29 |   - hdf5=1.8.20=hac2f561_1
 30 |   - icc_rt=2019.0.0=h0cc432a_1
 31 |   - icu=58.2=ha66f8fd_1
 32 |   - imageio=2.5.0=py36_0
 33 |   - intel-openmp=2019.3=203
 34 |   - ipykernel=5.1.0=py36h39e3cac_0
 35 |   - ipython=7.3.0=py36h39e3cac_0
 36 |   - ipython_genutils=0.2.0=py36h3c5d0ee_0
 37 |   - ipywidgets=7.4.2=py36_0
 38 |   - jedi=0.13.3=py36_0
 39 |   - jinja2=2.10=py36_0
 40 |   - jpeg=9b=hb83a4c4_2
 41 |   - jsonschema=3.0.1=py36_0
 42 |   - jupyter=1.0.0=py36_7
 43 |   - jupyter_client=5.2.4=py36_0
 44 |   - jupyter_console=6.0.0=py36_0
 45 |   - jupyter_core=4.4.0=py36_0
 46 |   - keras=2.2.4=0
 47 |   - keras-applications=1.0.7=py_0
 48 |   - keras-base=2.2.4=py36_0
 49 |   - keras-preprocessing=1.0.9=py_0
 50 |   - kiwisolver=1.0.1=py36h6538335_0
 51 |   - libmklml=2019.0.3=0
 52 |   - libopencv=3.4.2=h20b85fd_0
 53 |   - libpng=1.6.36=h2a8f88b_0
 54 |   - libprotobuf=3.6.1=h7bd577a_0
 55 |   - libsodium=1.0.16=h9d3ae62_0
 56 |   - libtiff=4.0.10=hb898794_2
 57 |   - m2w64-gcc-libgfortran=5.3.0=6
 58 |   - m2w64-gcc-libs=5.3.0=7
 59 |   - m2w64-gcc-libs-core=5.3.0=7
 60 |   - m2w64-gmp=6.1.0=2
 61 |   - m2w64-libwinpthread-git=5.0.0.4634.697f757=2
 62 |   - markdown=3.0.1=py36_0
 63 |   - markupsafe=1.1.1=py36he774522_0
 64 |   - matplotlib=3.0.3=py36hc8f65d3_0
 65 |   - mistune=0.8.4=py36he774522_0
 66 |   - mkl=2019.3=203
 67 |   - mkl_fft=1.0.10=py36h14836fe_0
 68 |   - mkl_random=1.0.2=py36h343c172_0
 69 |   - mock=2.0.0=py36h9086845_0
 70 |   - msys2-conda-epoch=20160418=1
 71 |   - nbconvert=5.4.1=py36_3
 72 |   - nbformat=4.4.0=py36h3a5bc1b_0
 73 |   - networkx=2.2=py36_1
 74 |   - notebook=5.7.6=py36_0
 75 |   - numpy=1.16.2=py36h19fb1c0_0
 76 |   - numpy-base=1.16.2=py36hc3f5095_0
 77 |   - olefile=0.46=py36_0
 78 |   - opencv=3.4.2=py36h40b0b35_0
 79 |   - openssl=1.1.1b=hfa6e2cd_2
 80 |   - pandoc=2.2.3.2=0
 81 |   - pandocfilters=1.4.2=py36_1
 82 |   - parso=0.3.4=py36_0
 83 |   - pbr=5.1.3=py_0
 84 |   - pickleshare=0.7.5=py36_0
 85 |   - pillow=5.4.1=py36hdc69c19_0
 86 |   - pip=19.0.3=py36_0
 87 |   - prometheus_client=0.6.0=py36_0
 88 |   - prompt_toolkit=2.0.9=py36_0
 89 |   - protobuf=3.6.1=py36h33f27b4_0
 90 |   - py-opencv=3.4.2=py36hc319ecb_0
 91 |   - pygments=2.3.1=py36_0
 92 |   - pyparsing=2.3.1=py36_0
 93 |   - pyqt=5.9.2=py36h6538335_2
 94 |   - pyreadline=2.1=py36_1
 95 |   - pyrsistent=0.14.11=py36he774522_0
 96 |   - python=3.6.8=h9f7ef89_7
 97 |   - python-dateutil=2.8.0=py36_0
 98 |   - pytz=2018.9=py36_0
 99 |   - pywavelets=1.0.2=py36h8c2d366_0
100 |   - pywinpty=0.5.5=py36_1000
101 |   - pyyaml=5.1=py36he774522_0
102 |   - pyzmq=18.0.0=py36ha925a31_0
103 |   - qt=5.9.7=vc14h73c81de_0
104 |   - qtconsole=4.4.3=py36_0
105 |   - scikit-image=0.14.2=py36ha925a31_0
106 |   - scipy=1.2.1=py36h29ff71c_0
107 |   - send2trash=1.5.0=py36_0
108 |   - setuptools=40.8.0=py36_0
109 |   - shapely=1.6.4=py36h222a598_0
110 |   - sip=4.19.8=py36h6538335_0
111 |   - six=1.12.0=py36_0
112 |   - sqlite=3.27.2=he774522_0
113 |   - tensorboard=1.13.1=py36h33f27b4_0
114 |   - tensorflow=1.13.1=mkl_py36hd212fbe_0
115 |   - tensorflow-base=1.13.1=mkl_py36hcaf7020_0
116 |   - tensorflow-estimator=1.13.0=py_0
117 |   - termcolor=1.1.0=py36_1
118 |   - terminado=0.8.1=py36_1
119 |   - testpath=0.4.2=py36_0
120 |   - tk=8.6.8=hfa6e2cd_0
121 |   - toolz=0.9.0=py36_0
122 |   - tornado=6.0.1=py36he774522_0
123 |   - traitlets=4.3.2=py36h096827d_0
124 |   - vc=14.1=h0510ff6_4
125 |   - vs2015_runtime=14.15.26706=h3a45250_0
126 |   - wcwidth=0.1.7=py36h3d5aa90_0
127 |   - webencodings=0.5.1=py36_1
128 |   - werkzeug=0.14.1=py36_0
129 |   - wheel=0.33.1=py36_0
130 |   - widgetsnbextension=3.4.2=py36_0
131 |   - wincertstore=0.2=py36h7fe50ca_0
132 |   - winpty=0.4.3=4
133 |   - xz=5.2.4=h2fa13f4_4
134 |   - yaml=0.1.7=hc54c509_2
135 |   - zeromq=4.3.1=h33f27b4_3
136 |   - zlib=1.2.11=h62dcd97_3
137 |   - zstd=1.3.7=h508b16e_0
138 |   - pip:
139 |     - imgaug==0.2.8
140 |     - opencv-python==4.0.0.21
141 |     - pycocotools==2.0
142 | prefix: C:\Users\GB-PC07\Anaconda3\envs\myenv
143 | 
144 | 


--------------------------------------------------------------------------------
/Anaconda Setup/requirments.txt:
--------------------------------------------------------------------------------
1 | imgaug==0.2.8
2 | git+https://github.com/philferriere/cocoapi.git#egg=pycocotools&subdirectory=PythonAPI


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Mask R-CNN
 2 | 
 3 | The MIT License (MIT)
 4 | 
 5 | Copyright (c) 2017 Matterport, Inc.
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
3 | include requirements.txt


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Mask R-CNN for Object Detection and Segmentation
  2 | 
  3 | This is an implementation of [Mask R-CNN](https://arxiv.org/abs/1703.06870) on Python 3, Keras, and TensorFlow. The model generates bounding boxes and segmentation masks for each instance of an object in the image. It's based on Feature Pyramid Network (FPN) and a ResNet101 backbone.
  4 | 
  5 | ![Instance Segmentation Sample](assets/street.png)
  6 | 
  7 | The repository includes:
  8 | * Source code of Mask R-CNN built on FPN and ResNet101.
  9 | * Training code for MS COCO
 10 | * Pre-trained weights for MS COCO
 11 | * Jupyter notebooks to visualize the detection pipeline at every step
 12 | * ParallelModel class for multi-GPU training
 13 | * Evaluation on MS COCO metrics (AP)
 14 | * Example of training on your own dataset
 15 | 
 16 | 
 17 | The code is documented and designed to be easy to extend. If you use it in your research, please consider citing this repository (bibtex below). If you work on 3D vision, you might find our recently released [Matterport3D](https://matterport.com/blog/2017/09/20/announcing-matterport3d-research-dataset/) dataset useful as well.
 18 | This dataset was created from 3D-reconstructed spaces captured by our customers who agreed to make them publicly available for academic use. You can see more examples [here](https://matterport.com/gallery/).
 19 | 
 20 | # Getting Started
 21 | * [demo.ipynb](samples/demo.ipynb) Is the easiest way to start. It shows an example of using a model pre-trained on MS COCO to segment objects in your own images.
 22 | It includes code to run object detection and instance segmentation on arbitrary images.
 23 | 
 24 | * [train_shapes.ipynb](samples/shapes/train_shapes.ipynb) shows how to train Mask R-CNN on your own dataset. This notebook introduces a toy dataset (Shapes) to demonstrate training on a new dataset.
 25 | 
 26 | * ([model.py](mrcnn/model.py), [utils.py](mrcnn/utils.py), [config.py](mrcnn/config.py)): These files contain the main Mask RCNN implementation. 
 27 | 
 28 | 
 29 | * [inspect_data.ipynb](samples/coco/inspect_data.ipynb). This notebook visualizes the different pre-processing steps
 30 | to prepare the training data.
 31 | 
 32 | * [inspect_model.ipynb](samples/coco/inspect_model.ipynb) This notebook goes in depth into the steps performed to detect and segment objects. It provides visualizations of every step of the pipeline.
 33 | 
 34 | * [inspect_weights.ipynb](samples/coco/inspect_weights.ipynb)
 35 | This notebooks inspects the weights of a trained model and looks for anomalies and odd patterns.
 36 | 
 37 | 
 38 | # Step by Step Detection
 39 | To help with debugging and understanding the model, there are 3 notebooks 
 40 | ([inspect_data.ipynb](samples/coco/inspect_data.ipynb), [inspect_model.ipynb](samples/coco/inspect_model.ipynb),
 41 | [inspect_weights.ipynb](samples/coco/inspect_weights.ipynb)) that provide a lot of visualizations and allow running the model step by step to inspect the output at each point. Here are a few examples:
 42 | 
 43 | 
 44 | 
 45 | ## 1. Anchor sorting and filtering
 46 | Visualizes every step of the first stage Region Proposal Network and displays positive and negative anchors along with anchor box refinement.
 47 | ![](assets/detection_anchors.png)
 48 | 
 49 | ## 2. Bounding Box Refinement
 50 | This is an example of final detection boxes (dotted lines) and the refinement applied to them (solid lines) in the second stage.
 51 | ![](assets/detection_refinement.png)
 52 | 
 53 | ## 3. Mask Generation
 54 | Examples of generated masks. These then get scaled and placed on the image in the right location.
 55 | 
 56 | ![](assets/detection_masks.png)
 57 | 
 58 | ## 4.Layer activations
 59 | Often it's useful to inspect the activations at different layers to look for signs of trouble (all zeros or random noise).
 60 | 
 61 | ![](assets/detection_activations.png)
 62 | 
 63 | ## 5. Weight Histograms
 64 | Another useful debugging tool is to inspect the weight histograms. These are included in the inspect_weights.ipynb notebook.
 65 | 
 66 | ![](assets/detection_histograms.png)
 67 | 
 68 | ## 6. Logging to TensorBoard
 69 | TensorBoard is another great debugging and visualization tool. The model is configured to log losses and save weights at the end of every epoch.
 70 | 
 71 | ![](assets/detection_tensorboard.png)
 72 | 
 73 | ## 6. Composing the different pieces into a final result
 74 | 
 75 | ![](assets/detection_final.png)
 76 | 
 77 | 
 78 | # Training on MS COCO
 79 | We're providing pre-trained weights for MS COCO to make it easier to start. You can
 80 | use those weights as a starting point to train your own variation on the network.
 81 | Training and evaluation code is in `samples/coco/coco.py`. You can import this
 82 | module in Jupyter notebook (see the provided notebooks for examples) or you
 83 | can run it directly from the command line as such:
 84 | 
 85 | ```
 86 | # Train a new model starting from pre-trained COCO weights
 87 | python3 samples/coco/coco.py train --dataset=/path/to/coco/ --model=coco
 88 | 
 89 | # Train a new model starting from ImageNet weights
 90 | python3 samples/coco/coco.py train --dataset=/path/to/coco/ --model=imagenet
 91 | 
 92 | # Continue training a model that you had trained earlier
 93 | python3 samples/coco/coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
 94 | 
 95 | # Continue training the last model you trained. This will find
 96 | # the last trained weights in the model directory.
 97 | python3 samples/coco/coco.py train --dataset=/path/to/coco/ --model=last
 98 | ```
 99 | 
100 | You can also run the COCO evaluation code with:
101 | ```
102 | # Run COCO evaluation on the last trained model
103 | python3 samples/coco/coco.py evaluate --dataset=/path/to/coco/ --model=last
104 | ```
105 | 
106 | The training schedule, learning rate, and other parameters should be set in `samples/coco/coco.py`.
107 | 
108 | 
109 | # Training on Your Own Dataset
110 | 
111 | Start by reading this [blog post about the balloon color splash sample](https://engineering.matterport.com/splash-of-color-instance-segmentation-with-mask-r-cnn-and-tensorflow-7c761e238b46). It covers the process starting from annotating images to training to using the results in a sample application.
112 | 
113 | In summary, to train the model on your own dataset you'll need to extend two classes:
114 | 
115 | ```Config```
116 | This class contains the default configuration. Subclass it and modify the attributes you need to change.
117 | 
118 | ```Dataset```
119 | This class provides a consistent way to work with any dataset. 
120 | It allows you to use new datasets for training without having to change 
121 | the code of the model. It also supports loading multiple datasets at the
122 | same time, which is useful if the objects you want to detect are not 
123 | all available in one dataset. 
124 | 
125 | See examples in `samples/shapes/train_shapes.ipynb`, `samples/coco/coco.py`, `samples/balloon/balloon.py`, and `samples/nucleus/nucleus.py`.
126 | 
127 | ## Differences from the Official Paper
128 | This implementation follows the Mask RCNN paper for the most part, but there are a few cases where we deviated in favor of code simplicity and generalization. These are some of the differences we're aware of. If you encounter other differences, please do let us know.
129 | 
130 | * **Image Resizing:** To support training multiple images per batch we resize all images to the same size. For example, 1024x1024px on MS COCO. We preserve the aspect ratio, so if an image is not square we pad it with zeros. In the paper the resizing is done such that the smallest side is 800px and the largest is trimmed at 1000px.
131 | * **Bounding Boxes**: Some datasets provide bounding boxes and some provide masks only. To support training on multiple datasets we opted to ignore the bounding boxes that come with the dataset and generate them on the fly instead. We pick the smallest box that encapsulates all the pixels of the mask as the bounding box. This simplifies the implementation and also makes it easy to apply image augmentations that would otherwise be harder to apply to bounding boxes, such as image rotation.
132 | 
133 |     To validate this approach, we compared our computed bounding boxes to those provided by the COCO dataset.
134 | We found that ~2% of bounding boxes differed by 1px or more, ~0.05% differed by 5px or more, 
135 | and only 0.01% differed by 10px or more.
136 | 
137 | * **Learning Rate:** The paper uses a learning rate of 0.02, but we found that to be
138 | too high, and often causes the weights to explode, especially when using a small batch
139 | size. It might be related to differences between how Caffe and TensorFlow compute 
140 | gradients (sum vs mean across batches and GPUs). Or, maybe the official model uses gradient
141 | clipping to avoid this issue. We do use gradient clipping, but don't set it too aggressively.
142 | We found that smaller learning rates converge faster anyway so we go with that.
143 | 
144 | ## Citation
145 | Use this bibtex to cite this repository:
146 | ```
147 | @misc{matterport_maskrcnn_2017,
148 |   title={Mask R-CNN for object detection and instance segmentation on Keras and TensorFlow},
149 |   author={Waleed Abdulla},
150 |   year={2017},
151 |   publisher={Github},
152 |   journal={GitHub repository},
153 |   howpublished={\url{https://github.com/matterport/Mask_RCNN}},
154 | }
155 | ```
156 | 
157 | ## Contributing
158 | Contributions to this repository are welcome. Examples of things you can contribute:
159 | * Speed Improvements. Like re-writing some Python code in TensorFlow or Cython.
160 | * Training on other datasets.
161 | * Accuracy Improvements.
162 | * Visualizations and examples.
163 | 
164 | You can also [join our team](https://matterport.com/careers/) and help us build even more projects like this one.
165 | 
166 | ## Requirements
167 | Python 3.4, TensorFlow 1.3, Keras 2.0.8 and other common packages listed in `requirements.txt`.
168 | 
169 | ### MS COCO Requirements:
170 | To train or test on MS COCO, you'll also need:
171 | * pycocotools (installation instructions below)
172 | * [MS COCO Dataset](http://cocodataset.org/#home)
173 | * Download the 5K [minival](https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0)
174 |   and the 35K [validation-minus-minival](https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0)
175 |   subsets. More details in the original [Faster R-CNN implementation](https://github.com/rbgirshick/py-faster-rcnn/blob/master/data/README.md).
176 | 
177 | If you use Docker, the code has been verified to work on
178 | [this Docker container](https://hub.docker.com/r/waleedka/modern-deep-learning/).
179 | 
180 | 
181 | ## Installation
182 | 1. Clone this repository
183 | 2. Install dependencies
184 |    ```bash
185 |    pip3 install -r requirements.txt
186 |    ```
187 | 3. Run setup from the repository root directory
188 |     ```bash
189 |     python3 setup.py install
190 |     ``` 
191 | 3. Download pre-trained COCO weights (mask_rcnn_coco.h5) from the [releases page](https://github.com/matterport/Mask_RCNN/releases).
192 | 4. (Optional) To train or test on MS COCO install `pycocotools` from one of these repos. They are forks of the original pycocotools with fixes for Python3 and Windows (the official repo doesn't seem to be active anymore).
193 | 
194 |     * Linux: https://github.com/waleedka/coco
195 |     * Windows: https://github.com/philferriere/cocoapi.
196 |     You must have the Visual C++ 2015 build tools on your path (see the repo for additional details)
197 | 
198 | # Projects Using this Model
199 | If you extend this model to other datasets or build projects that use it, we'd love to hear from you.
200 | 
201 | ### [4K Video Demo](https://www.youtube.com/watch?v=OOT3UIXZztE) by Karol Majek.
202 | [![Mask RCNN on 4K Video](assets/4k_video.gif)](https://www.youtube.com/watch?v=OOT3UIXZztE)
203 | 
204 | ### [Images to OSM](https://github.com/jremillard/images-to-osm): Improve OpenStreetMap by adding baseball, soccer, tennis, football, and basketball fields.
205 | 
206 | ![Identify sport fields in satellite images](assets/images_to_osm.png)
207 | 
208 | ### [Splash of Color](https://engineering.matterport.com/splash-of-color-instance-segmentation-with-mask-r-cnn-and-tensorflow-7c761e238b46). A blog post explaining how to train this model from scratch and use it to implement a color splash effect.
209 | ![Balloon Color Splash](assets/balloon_color_splash.gif)
210 | 
211 | 
212 | ### [Segmenting Nuclei in Microscopy Images](samples/nucleus). Built for the [2018 Data Science Bowl](https://www.kaggle.com/c/data-science-bowl-2018)
213 | Code is in the `samples/nucleus` directory.
214 | 
215 | ![Nucleus Segmentation](assets/nucleus_segmentation.png)
216 | 
217 | ### [Detection and Segmentation for Surgery Robots](https://github.com/SUYEgit/Surgery-Robot-Detection-Segmentation) by the NUS Control & Mechatronics Lab.
218 | ![Surgery Robot Detection and Segmentation](https://github.com/SUYEgit/Surgery-Robot-Detection-Segmentation/raw/master/assets/video.gif)
219 | 
220 | ### [Reconstructing 3D buildings from aerial LiDAR](https://medium.com/geoai/reconstructing-3d-buildings-from-aerial-lidar-with-ai-details-6a81cb3079c0)
221 | A proof of concept project by [Esri](https://www.esri.com/), in collaboration with Nvidia and Miami-Dade County. Along with a great write up and code by Dmitry Kudinov, Daniel Hedges, and Omar Maher.
222 | ![3D Building Reconstruction](assets/project_3dbuildings.png)
223 | 
224 | ### [Usiigaci: Label-free Cell Tracking in Phase Contrast Microscopy](https://github.com/oist/usiigaci)
225 | A project from Japan to automatically track cells in a microfluidics platform. Paper is pending, but the source code is released.
226 | 
227 | ![](assets/project_usiigaci1.gif) ![](assets/project_usiigaci2.gif)
228 | 
229 | ### [Characterization of Arctic Ice-Wedge Polygons in Very High Spatial Resolution Aerial Imagery](http://www.mdpi.com/2072-4292/10/9/1487)
230 | Research project to understand the complex processes between degradations in the Arctic and climate change. By Weixing Zhang, Chandi Witharana, Anna Liljedahl, and Mikhail Kanevskiy.
231 | ![image](assets/project_ice_wedge_polygons.png)
232 | 
233 | ### [Mask-RCNN Shiny](https://github.com/huuuuusy/Mask-RCNN-Shiny)
234 | A computer vision class project by HU Shiyu to apply the color pop effect on people with beautiful results.
235 | ![](assets/project_shiny1.jpg)
236 | 
237 | ### [Mapping Challenge](https://github.com/crowdAI/crowdai-mapping-challenge-mask-rcnn): Convert satellite imagery to maps for use by humanitarian organisations.
238 | ![Mapping Challenge](assets/mapping_challenge.png)
239 | 
240 | ### [GRASS GIS Addon](https://github.com/ctu-geoforall-lab/i.ann.maskrcnn) to generate vector masks from geospatial imagery. Based on a [Master's thesis](https://github.com/ctu-geoforall-lab-projects/dp-pesek-2018) by Ondřej Pešek.
241 | ![GRASS GIS Image](assets/project_grass_gis.png)
242 | 


--------------------------------------------------------------------------------
/assets/4k_video.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/4k_video.gif


--------------------------------------------------------------------------------
/assets/balloon_color_splash.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/balloon_color_splash.gif


--------------------------------------------------------------------------------
/assets/detection_activations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/detection_activations.png


--------------------------------------------------------------------------------
/assets/detection_anchors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/detection_anchors.png


--------------------------------------------------------------------------------
/assets/detection_final.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/detection_final.png


--------------------------------------------------------------------------------
/assets/detection_histograms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/detection_histograms.png


--------------------------------------------------------------------------------
/assets/detection_masks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/detection_masks.png


--------------------------------------------------------------------------------
/assets/detection_refinement.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/detection_refinement.png


--------------------------------------------------------------------------------
/assets/detection_tensorboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/detection_tensorboard.png


--------------------------------------------------------------------------------
/assets/images_to_osm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/images_to_osm.png


--------------------------------------------------------------------------------
/assets/mapping_challenge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/mapping_challenge.png


--------------------------------------------------------------------------------
/assets/nucleus_segmentation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/nucleus_segmentation.png


--------------------------------------------------------------------------------
/assets/project_3dbuildings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/project_3dbuildings.png


--------------------------------------------------------------------------------
/assets/project_grass_gis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/project_grass_gis.png


--------------------------------------------------------------------------------
/assets/project_ice_wedge_polygons.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/project_ice_wedge_polygons.png


--------------------------------------------------------------------------------
/assets/project_shiny1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/project_shiny1.jpg


--------------------------------------------------------------------------------
/assets/project_usiigaci1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/project_usiigaci1.gif


--------------------------------------------------------------------------------
/assets/project_usiigaci2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/project_usiigaci2.gif


--------------------------------------------------------------------------------
/assets/street.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/assets/street.png


--------------------------------------------------------------------------------
/coco/coco.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Configurations and data loading code for MS COCO.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | 
  9 | ------------------------------------------------------------
 10 | 
 11 | Usage: import the module (see Jupyter notebooks for examples), or run from
 12 |        the command line as such:
 13 | 
 14 |     # Train a new model starting from pre-trained COCO weights
 15 |     python3 coco.py train --dataset=/path/to/coco/ --model=coco
 16 | 
 17 |     # Train a new model starting from ImageNet weights. Also auto download COCO dataset
 18 |     python3 coco.py train --dataset=/path/to/coco/ --model=imagenet --download=True
 19 | 
 20 |     # Continue training a model that you had trained earlier
 21 |     python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
 22 | 
 23 |     # Continue training the last model you trained
 24 |     python3 coco.py train --dataset=/path/to/coco/ --model=last
 25 | 
 26 |     # Run COCO evaluatoin on the last model you trained
 27 |     python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
 28 | """
 29 | 
 30 | import os
 31 | import sys
 32 | import time
 33 | import numpy as np
 34 | import imgaug  # https://github.com/aleju/imgaug (pip3 install imgaug)
 35 | 
 36 | # Download and install the Python COCO tools from https://github.com/waleedka/coco
 37 | # That's a fork from the original https://github.com/pdollar/coco with a bug
 38 | # fix for Python 3.
 39 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
 40 | # If the PR is merged then use the original repo.
 41 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3".
 42 | from pycocotools.coco import COCO
 43 | from pycocotools.cocoeval import COCOeval
 44 | from pycocotools import mask as maskUtils
 45 | 
 46 | import zipfile
 47 | import urllib.request
 48 | import shutil
 49 | 
 50 | # Root directory of the project
 51 | ROOT_DIR = os.path.abspath("../../")
 52 | 
 53 | # Import Mask RCNN
 54 | sys.path.append(ROOT_DIR)  # To find local version of the library
 55 | from mrcnn.config import Config
 56 | from mrcnn import model as modellib, utils
 57 | 
 58 | # Path to trained weights file
 59 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
 60 | 
 61 | # Directory to save logs and model checkpoints, if not provided
 62 | # through the command line argument --logs
 63 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
 64 | DEFAULT_DATASET_YEAR = "2014"
 65 | 
 66 | ############################################################
 67 | #  Configurations
 68 | ############################################################
 69 | 
 70 | 
 71 | class CocoConfig(Config):
 72 |     """Configuration for training on MS COCO.
 73 |     Derives from the base Config class and overrides values specific
 74 |     to the COCO dataset.
 75 |     """
 76 |     # Give the configuration a recognizable name
 77 |     NAME = "coco"
 78 | 
 79 |     # We use a GPU with 12GB memory, which can fit two images.
 80 |     # Adjust down if you use a smaller GPU.
 81 |     IMAGES_PER_GPU = 2
 82 | 
 83 |     # Uncomment to train on 8 GPUs (default is 1)
 84 |     # GPU_COUNT = 8
 85 | 
 86 |     # Number of classes (including background)
 87 |     NUM_CLASSES = 1 + 80  # COCO has 80 classes
 88 | 
 89 | 
 90 | ############################################################
 91 | #  Dataset
 92 | ############################################################
 93 | 
 94 | class CocoDataset(utils.Dataset):
 95 |     def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
 96 |                   class_map=None, return_coco=False, auto_download=False):
 97 |         """Load a subset of the COCO dataset.
 98 |         dataset_dir: The root directory of the COCO dataset.
 99 |         subset: What to load (train, val, minival, valminusminival)
100 |         year: What dataset year to load (2014, 2017) as a string, not an integer
101 |         class_ids: If provided, only loads images that have the given classes.
102 |         class_map: TODO: Not implemented yet. Supports maping classes from
103 |             different datasets to the same class ID.
104 |         return_coco: If True, returns the COCO object.
105 |         auto_download: Automatically download and unzip MS-COCO images and annotations
106 |         """
107 | 
108 |         if auto_download is True:
109 |             self.auto_download(dataset_dir, subset, year)
110 | 
111 |         coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
112 |         if subset == "minival" or subset == "valminusminival":
113 |             subset = "val"
114 |         image_dir = "{}/{}{}".format(dataset_dir, subset, year)
115 | 
116 |         # Load all classes or a subset?
117 |         if not class_ids:
118 |             # All classes
119 |             class_ids = sorted(coco.getCatIds())
120 | 
121 |         # All images or a subset?
122 |         if class_ids:
123 |             image_ids = []
124 |             for id in class_ids:
125 |                 image_ids.extend(list(coco.getImgIds(catIds=[id])))
126 |             # Remove duplicates
127 |             image_ids = list(set(image_ids))
128 |         else:
129 |             # All images
130 |             image_ids = list(coco.imgs.keys())
131 | 
132 |         # Add classes
133 |         for i in class_ids:
134 |             self.add_class("coco", i, coco.loadCats(i)[0]["name"])
135 | 
136 |         # Add images
137 |         for i in image_ids:
138 |             self.add_image(
139 |                 "coco", image_id=i,
140 |                 path=os.path.join(image_dir, coco.imgs[i]['file_name']),
141 |                 width=coco.imgs[i]["width"],
142 |                 height=coco.imgs[i]["height"],
143 |                 annotations=coco.loadAnns(coco.getAnnIds(
144 |                     imgIds=[i], catIds=class_ids, iscrowd=None)))
145 |         if return_coco:
146 |             return coco
147 | 
148 |     def auto_download(self, dataDir, dataType, dataYear):
149 |         """Download the COCO dataset/annotations if requested.
150 |         dataDir: The root directory of the COCO dataset.
151 |         dataType: What to load (train, val, minival, valminusminival)
152 |         dataYear: What dataset year to load (2014, 2017) as a string, not an integer
153 |         Note:
154 |             For 2014, use "train", "val", "minival", or "valminusminival"
155 |             For 2017, only "train" and "val" annotations are available
156 |         """
157 | 
158 |         # Setup paths and file names
159 |         if dataType == "minival" or dataType == "valminusminival":
160 |             imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
161 |             imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
162 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
163 |         else:
164 |             imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
165 |             imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
166 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
167 |         # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
168 | 
169 |         # Create main folder if it doesn't exist yet
170 |         if not os.path.exists(dataDir):
171 |             os.makedirs(dataDir)
172 | 
173 |         # Download images if not available locally
174 |         if not os.path.exists(imgDir):
175 |             os.makedirs(imgDir)
176 |             print("Downloading images to " + imgZipFile + " ...")
177 |             with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
178 |                 shutil.copyfileobj(resp, out)
179 |             print("... done downloading.")
180 |             print("Unzipping " + imgZipFile)
181 |             with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
182 |                 zip_ref.extractall(dataDir)
183 |             print("... done unzipping")
184 |         print("Will use images in " + imgDir)
185 | 
186 |         # Setup annotations data paths
187 |         annDir = "{}/annotations".format(dataDir)
188 |         if dataType == "minival":
189 |             annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
190 |             annFile = "{}/instances_minival2014.json".format(annDir)
191 |             annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
192 |             unZipDir = annDir
193 |         elif dataType == "valminusminival":
194 |             annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
195 |             annFile = "{}/instances_valminusminival2014.json".format(annDir)
196 |             annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
197 |             unZipDir = annDir
198 |         else:
199 |             annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
200 |             annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
201 |             annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
202 |             unZipDir = dataDir
203 |         # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
204 | 
205 |         # Download annotations if not available locally
206 |         if not os.path.exists(annDir):
207 |             os.makedirs(annDir)
208 |         if not os.path.exists(annFile):
209 |             if not os.path.exists(annZipFile):
210 |                 print("Downloading zipped annotations to " + annZipFile + " ...")
211 |                 with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
212 |                     shutil.copyfileobj(resp, out)
213 |                 print("... done downloading.")
214 |             print("Unzipping " + annZipFile)
215 |             with zipfile.ZipFile(annZipFile, "r") as zip_ref:
216 |                 zip_ref.extractall(unZipDir)
217 |             print("... done unzipping")
218 |         print("Will use annotations in " + annFile)
219 | 
220 |     def load_mask(self, image_id):
221 |         """Load instance masks for the given image.
222 | 
223 |         Different datasets use different ways to store masks. This
224 |         function converts the different mask format to one format
225 |         in the form of a bitmap [height, width, instances].
226 | 
227 |         Returns:
228 |         masks: A bool array of shape [height, width, instance count] with
229 |             one mask per instance.
230 |         class_ids: a 1D array of class IDs of the instance masks.
231 |         """
232 |         # If not a COCO image, delegate to parent class.
233 |         image_info = self.image_info[image_id]
234 |         if image_info["source"] != "coco":
235 |             return super(CocoDataset, self).load_mask(image_id)
236 | 
237 |         instance_masks = []
238 |         class_ids = []
239 |         annotations = self.image_info[image_id]["annotations"]
240 |         # Build mask of shape [height, width, instance_count] and list
241 |         # of class IDs that correspond to each channel of the mask.
242 |         for annotation in annotations:
243 |             class_id = self.map_source_class_id(
244 |                 "coco.{}".format(annotation['category_id']))
245 |             if class_id:
246 |                 m = self.annToMask(annotation, image_info["height"],
247 |                                    image_info["width"])
248 |                 # Some objects are so small that they're less than 1 pixel area
249 |                 # and end up rounded out. Skip those objects.
250 |                 if m.max() < 1:
251 |                     continue
252 |                 # Is it a crowd? If so, use a negative class ID.
253 |                 if annotation['iscrowd']:
254 |                     # Use negative class ID for crowds
255 |                     class_id *= -1
256 |                     # For crowd masks, annToMask() sometimes returns a mask
257 |                     # smaller than the given dimensions. If so, resize it.
258 |                     if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
259 |                         m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
260 |                 instance_masks.append(m)
261 |                 class_ids.append(class_id)
262 | 
263 |         # Pack instance masks into an array
264 |         if class_ids:
265 |             mask = np.stack(instance_masks, axis=2).astype(np.bool)
266 |             class_ids = np.array(class_ids, dtype=np.int32)
267 |             return mask, class_ids
268 |         else:
269 |             # Call super class to return an empty mask
270 |             return super(CocoDataset, self).load_mask(image_id)
271 | 
272 |     def image_reference(self, image_id):
273 |         """Return a link to the image in the COCO Website."""
274 |         info = self.image_info[image_id]
275 |         if info["source"] == "coco":
276 |             return "http://cocodataset.org/#explore?id={}".format(info["id"])
277 |         else:
278 |             super(CocoDataset, self).image_reference(image_id)
279 | 
280 |     # The following two functions are from pycocotools with a few changes.
281 | 
282 |     def annToRLE(self, ann, height, width):
283 |         """
284 |         Convert annotation which can be polygons, uncompressed RLE to RLE.
285 |         :return: binary mask (numpy 2D array)
286 |         """
287 |         segm = ann['segmentation']
288 |         if isinstance(segm, list):
289 |             # polygon -- a single object might consist of multiple parts
290 |             # we merge all parts into one mask rle code
291 |             rles = maskUtils.frPyObjects(segm, height, width)
292 |             rle = maskUtils.merge(rles)
293 |         elif isinstance(segm['counts'], list):
294 |             # uncompressed RLE
295 |             rle = maskUtils.frPyObjects(segm, height, width)
296 |         else:
297 |             # rle
298 |             rle = ann['segmentation']
299 |         return rle
300 | 
301 |     def annToMask(self, ann, height, width):
302 |         """
303 |         Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
304 |         :return: binary mask (numpy 2D array)
305 |         """
306 |         rle = self.annToRLE(ann, height, width)
307 |         m = maskUtils.decode(rle)
308 |         return m
309 | 
310 | 
311 | ############################################################
312 | #  COCO Evaluation
313 | ############################################################
314 | 
315 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
316 |     """Arrange resutls to match COCO specs in http://cocodataset.org/#format
317 |     """
318 |     # If no results, return an empty list
319 |     if rois is None:
320 |         return []
321 | 
322 |     results = []
323 |     for image_id in image_ids:
324 |         # Loop through detections
325 |         for i in range(rois.shape[0]):
326 |             class_id = class_ids[i]
327 |             score = scores[i]
328 |             bbox = np.around(rois[i], 1)
329 |             mask = masks[:, :, i]
330 | 
331 |             result = {
332 |                 "image_id": image_id,
333 |                 "category_id": dataset.get_source_class_id(class_id, "coco"),
334 |                 "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
335 |                 "score": score,
336 |                 "segmentation": maskUtils.encode(np.asfortranarray(mask))
337 |             }
338 |             results.append(result)
339 |     return results
340 | 
341 | 
342 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
343 |     """Runs official COCO evaluation.
344 |     dataset: A Dataset object with valiadtion data
345 |     eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
346 |     limit: if not 0, it's the number of images to use for evaluation
347 |     """
348 |     # Pick COCO images from the dataset
349 |     image_ids = image_ids or dataset.image_ids
350 | 
351 |     # Limit to a subset
352 |     if limit:
353 |         image_ids = image_ids[:limit]
354 | 
355 |     # Get corresponding COCO image IDs.
356 |     coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
357 | 
358 |     t_prediction = 0
359 |     t_start = time.time()
360 | 
361 |     results = []
362 |     for i, image_id in enumerate(image_ids):
363 |         # Load image
364 |         image = dataset.load_image(image_id)
365 | 
366 |         # Run detection
367 |         t = time.time()
368 |         r = model.detect([image], verbose=0)[0]
369 |         t_prediction += (time.time() - t)
370 | 
371 |         # Convert results to COCO format
372 |         # Cast masks to uint8 because COCO tools errors out on bool
373 |         image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
374 |                                            r["rois"], r["class_ids"],
375 |                                            r["scores"],
376 |                                            r["masks"].astype(np.uint8))
377 |         results.extend(image_results)
378 | 
379 |     # Load results. This modifies results with additional attributes.
380 |     coco_results = coco.loadRes(results)
381 | 
382 |     # Evaluate
383 |     cocoEval = COCOeval(coco, coco_results, eval_type)
384 |     cocoEval.params.imgIds = coco_image_ids
385 |     cocoEval.evaluate()
386 |     cocoEval.accumulate()
387 |     cocoEval.summarize()
388 | 
389 |     print("Prediction time: {}. Average {}/image".format(
390 |         t_prediction, t_prediction / len(image_ids)))
391 |     print("Total time: ", time.time() - t_start)
392 | 
393 | 
394 | ############################################################
395 | #  Training
396 | ############################################################
397 | 
398 | 
399 | if __name__ == '__main__':
400 |     import argparse
401 | 
402 |     # Parse command line arguments
403 |     parser = argparse.ArgumentParser(
404 |         description='Train Mask R-CNN on MS COCO.')
405 |     parser.add_argument("command",
406 |                         metavar="<command>",
407 |                         help="'train' or 'evaluate' on MS COCO")
408 |     parser.add_argument('--dataset', required=True,
409 |                         metavar="/path/to/coco/",
410 |                         help='Directory of the MS-COCO dataset')
411 |     parser.add_argument('--year', required=False,
412 |                         default=DEFAULT_DATASET_YEAR,
413 |                         metavar="<year>",
414 |                         help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
415 |     parser.add_argument('--model', required=True,
416 |                         metavar="/path/to/weights.h5",
417 |                         help="Path to weights .h5 file or 'coco'")
418 |     parser.add_argument('--logs', required=False,
419 |                         default=DEFAULT_LOGS_DIR,
420 |                         metavar="/path/to/logs/",
421 |                         help='Logs and checkpoints directory (default=logs/)')
422 |     parser.add_argument('--limit', required=False,
423 |                         default=500,
424 |                         metavar="<image count>",
425 |                         help='Images to use for evaluation (default=500)')
426 |     parser.add_argument('--download', required=False,
427 |                         default=False,
428 |                         metavar="<True|False>",
429 |                         help='Automatically download and unzip MS-COCO files (default=False)',
430 |                         type=bool)
431 |     args = parser.parse_args()
432 |     print("Command: ", args.command)
433 |     print("Model: ", args.model)
434 |     print("Dataset: ", args.dataset)
435 |     print("Year: ", args.year)
436 |     print("Logs: ", args.logs)
437 |     print("Auto Download: ", args.download)
438 | 
439 |     # Configurations
440 |     if args.command == "train":
441 |         config = CocoConfig()
442 |     else:
443 |         class InferenceConfig(CocoConfig):
444 |             # Set batch size to 1 since we'll be running inference on
445 |             # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
446 |             GPU_COUNT = 1
447 |             IMAGES_PER_GPU = 1
448 |             DETECTION_MIN_CONFIDENCE = 0
449 |         config = InferenceConfig()
450 |     config.display()
451 | 
452 |     # Create model
453 |     if args.command == "train":
454 |         model = modellib.MaskRCNN(mode="training", config=config,
455 |                                   model_dir=args.logs)
456 |     else:
457 |         model = modellib.MaskRCNN(mode="inference", config=config,
458 |                                   model_dir=args.logs)
459 | 
460 |     # Select weights file to load
461 |     if args.model.lower() == "coco":
462 |         model_path = COCO_MODEL_PATH
463 |     elif args.model.lower() == "last":
464 |         # Find last trained weights
465 |         model_path = model.find_last()
466 |     elif args.model.lower() == "imagenet":
467 |         # Start from ImageNet trained weights
468 |         model_path = model.get_imagenet_weights()
469 |     else:
470 |         model_path = args.model
471 | 
472 |     # Load weights
473 |     print("Loading weights ", model_path)
474 |     model.load_weights(model_path, by_name=True)
475 | 
476 |     # Train or evaluate
477 |     if args.command == "train":
478 |         # Training dataset. Use the training set and 35K from the
479 |         # validation set, as as in the Mask RCNN paper.
480 |         dataset_train = CocoDataset()
481 |         dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
482 |         if args.year in '2014':
483 |             dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
484 |         dataset_train.prepare()
485 | 
486 |         # Validation dataset
487 |         dataset_val = CocoDataset()
488 |         val_type = "val" if args.year in '2017' else "minival"
489 |         dataset_val.load_coco(args.dataset, val_type, year=args.year, auto_download=args.download)
490 |         dataset_val.prepare()
491 | 
492 |         # Image Augmentation
493 |         # Right/Left flip 50% of the time
494 |         augmentation = imgaug.augmenters.Fliplr(0.5)
495 | 
496 |         # *** This training schedule is an example. Update to your needs ***
497 | 
498 |         # Training - Stage 1
499 |         print("Training network heads")
500 |         model.train(dataset_train, dataset_val,
501 |                     learning_rate=config.LEARNING_RATE,
502 |                     epochs=40,
503 |                     layers='heads',
504 |                     augmentation=augmentation)
505 | 
506 |         # Training - Stage 2
507 |         # Finetune layers from ResNet stage 4 and up
508 |         print("Fine tune Resnet stage 4 and up")
509 |         model.train(dataset_train, dataset_val,
510 |                     learning_rate=config.LEARNING_RATE,
511 |                     epochs=120,
512 |                     layers='4+',
513 |                     augmentation=augmentation)
514 | 
515 |         # Training - Stage 3
516 |         # Fine tune all layers
517 |         print("Fine tune all layers")
518 |         model.train(dataset_train, dataset_val,
519 |                     learning_rate=config.LEARNING_RATE / 10,
520 |                     epochs=160,
521 |                     layers='all',
522 |                     augmentation=augmentation)
523 | 
524 |     elif args.command == "evaluate":
525 |         # Validation dataset
526 |         dataset_val = CocoDataset()
527 |         val_type = "val" if args.year in '2017' else "minival"
528 |         coco = dataset_val.load_coco(args.dataset, val_type, year=args.year, return_coco=True, auto_download=args.download)
529 |         dataset_val.prepare()
530 |         print("Running COCO evaluation on {} images.".format(args.limit))
531 |         evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
532 |     else:
533 |         print("'{}' is not recognized. "
534 |               "Use 'train' or 'evaluate'".format(args.command))
535 | 


--------------------------------------------------------------------------------
/demo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Mask R-CNN Demo\n",
  8 |     "\n",
  9 |     "A quick intro to using the pre-trained model to detect and segment objects."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stderr",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Using TensorFlow backend.\n"
 22 |      ]
 23 |     }
 24 |    ],
 25 |    "source": [
 26 |     "import os\n",
 27 |     "import sys\n",
 28 |     "import random\n",
 29 |     "import math\n",
 30 |     "import numpy as np\n",
 31 |     "import skimage.io\n",
 32 |     "import matplotlib\n",
 33 |     "import matplotlib.pyplot as plt\n",
 34 |     "\n",
 35 |     "# Root directory of the project\n",
 36 |     "ROOT_DIR = os.path.abspath(\"../\")\n",
 37 |     "\n",
 38 |     "# Import Mask RCNN\n",
 39 |     "sys.path.append(ROOT_DIR)  # To find local version of the library\n",
 40 |     "from mrcnn import utils\n",
 41 |     "import mrcnn.model as modellib\n",
 42 |     "from mrcnn import visualize\n",
 43 |     "# Import COCO config\n",
 44 |     "sys.path.append(os.path.join(ROOT_DIR, \"samples/coco/\"))  # To find local version\n",
 45 |     "import coco\n",
 46 |     "\n",
 47 |     "%matplotlib inline \n",
 48 |     "\n",
 49 |     "# Directory to save logs and trained model\n",
 50 |     "MODEL_DIR = os.path.join(ROOT_DIR, \"logs\")\n",
 51 |     "\n",
 52 |     "# Local path to trained weights file\n",
 53 |     "COCO_MODEL_PATH = os.path.join(ROOT_DIR, \"mask_rcnn_coco.h5\")\n",
 54 |     "# Download COCO trained weights from Releases if needed\n",
 55 |     "if not os.path.exists(COCO_MODEL_PATH):\n",
 56 |     "    utils.download_trained_weights(COCO_MODEL_PATH)\n",
 57 |     "\n",
 58 |     "# Directory of images to run detection on\n",
 59 |     "IMAGE_DIR = os.path.join(ROOT_DIR, \"images\")"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "## Configurations\n",
 67 |     "\n",
 68 |     "We'll be using a model trained on the MS-COCO dataset. The configurations of this model are in the ```CocoConfig``` class in ```coco.py```.\n",
 69 |     "\n",
 70 |     "For inferencing, modify the configurations a bit to fit the task. To do so, sub-class the ```CocoConfig``` class and override the attributes you need to change."
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 2,
 76 |    "metadata": {},
 77 |    "outputs": [
 78 |     {
 79 |      "ename": "AttributeError",
 80 |      "evalue": "module 'coco' has no attribute 'CocoConfig'",
 81 |      "output_type": "error",
 82 |      "traceback": [
 83 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
 84 |       "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
 85 |       "\u001b[1;32m<ipython-input-2-962dbc0ce941>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mclass\u001b[0m \u001b[0mInferenceConfig\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcoco\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mCocoConfig\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m     \u001b[1;31m# Set batch size to 1 since we'll be running inference on\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[1;31m# one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m     \u001b[0mGPU_COUNT\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m     \u001b[0mIMAGES_PER_GPU\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 86 |       "\u001b[1;31mAttributeError\u001b[0m: module 'coco' has no attribute 'CocoConfig'"
 87 |      ]
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "class InferenceConfig(coco.CocoConfig):\n",
 92 |     "    # Set batch size to 1 since we'll be running inference on\n",
 93 |     "    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU\n",
 94 |     "    GPU_COUNT = 1\n",
 95 |     "    IMAGES_PER_GPU = 1\n",
 96 |     "\n",
 97 |     "config = InferenceConfig()\n",
 98 |     "config.display()"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "metadata": {},
104 |    "source": [
105 |     "## Create Model and Load Trained Weights"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {
112 |     "scrolled": false
113 |    },
114 |    "outputs": [],
115 |    "source": [
116 |     "# Create model object in inference mode.\n",
117 |     "model = modellib.MaskRCNN(mode=\"inference\", model_dir=MODEL_DIR, config=config)\n",
118 |     "\n",
119 |     "# Load weights trained on MS-COCO\n",
120 |     "model.load_weights(COCO_MODEL_PATH, by_name=True)"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "## Class Names\n",
128 |     "\n",
129 |     "The model classifies objects and returns class IDs, which are integer value that identify each class. Some datasets assign integer values to their classes and some don't. For example, in the MS-COCO dataset, the 'person' class is 1 and 'teddy bear' is 88. The IDs are often sequential, but not always. The COCO dataset, for example, has classes associated with class IDs 70 and 72, but not 71.\n",
130 |     "\n",
131 |     "To improve consistency, and to support training on data from multiple sources at the same time, our ```Dataset``` class assigns it's own sequential integer IDs to each class. For example, if you load the COCO dataset using our ```Dataset``` class, the 'person' class would get class ID = 1 (just like COCO) and the 'teddy bear' class is 78 (different from COCO). Keep that in mind when mapping class IDs to class names.\n",
132 |     "\n",
133 |     "To get the list of class names, you'd load the dataset and then use the ```class_names``` property like this.\n",
134 |     "```\n",
135 |     "# Load COCO dataset\n",
136 |     "dataset = coco.CocoDataset()\n",
137 |     "dataset.load_coco(COCO_DIR, \"train\")\n",
138 |     "dataset.prepare()\n",
139 |     "\n",
140 |     "# Print class names\n",
141 |     "print(dataset.class_names)\n",
142 |     "```\n",
143 |     "\n",
144 |     "We don't want to require you to download the COCO dataset just to run this demo, so we're including the list of class names below. The index of the class name in the list represent its ID (first class is 0, second is 1, third is 2, ...etc.)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": [
153 |     "# COCO Class names\n",
154 |     "# Index of the class in the list is its ID. For example, to get ID of\n",
155 |     "# the teddy bear class, use: class_names.index('teddy bear')\n",
156 |     "class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',\n",
157 |     "               'bus', 'train', 'truck', 'boat', 'traffic light',\n",
158 |     "               'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',\n",
159 |     "               'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',\n",
160 |     "               'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',\n",
161 |     "               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',\n",
162 |     "               'kite', 'baseball bat', 'baseball glove', 'skateboard',\n",
163 |     "               'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',\n",
164 |     "               'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',\n",
165 |     "               'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',\n",
166 |     "               'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',\n",
167 |     "               'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',\n",
168 |     "               'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',\n",
169 |     "               'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',\n",
170 |     "               'teddy bear', 'hair drier', 'toothbrush']"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "## Run Object Detection"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {
184 |     "scrolled": false
185 |    },
186 |    "outputs": [],
187 |    "source": [
188 |     "# Load a random image from the images folder\n",
189 |     "file_names = next(os.walk(IMAGE_DIR))[2]\n",
190 |     "image = skimage.io.imread(os.path.join(IMAGE_DIR, random.choice(file_names)))\n",
191 |     "\n",
192 |     "# Run detection\n",
193 |     "results = model.detect([image], verbose=1)\n",
194 |     "\n",
195 |     "# Visualize results\n",
196 |     "r = results[0]\n",
197 |     "visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], \n",
198 |     "                            class_names, r['scores'])"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": []
207 |   }
208 |  ],
209 |  "metadata": {
210 |   "kernelspec": {
211 |    "display_name": "Python 3",
212 |    "language": "python",
213 |    "name": "python3"
214 |   },
215 |   "language_info": {
216 |    "codemirror_mode": {
217 |     "name": "ipython",
218 |     "version": 3
219 |    },
220 |    "file_extension": ".py",
221 |    "mimetype": "text/x-python",
222 |    "name": "python",
223 |    "nbconvert_exporter": "python",
224 |    "pygments_lexer": "ipython3",
225 |    "version": "3.6.8"
226 |   }
227 |  },
228 |  "nbformat": 4,
229 |  "nbformat_minor": 2
230 | }
231 | 


--------------------------------------------------------------------------------
/images/1045023827_4ec3e8ba5c_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/1045023827_4ec3e8ba5c_z.jpg


--------------------------------------------------------------------------------
/images/12283150_12d37e6389_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/12283150_12d37e6389_z.jpg


--------------------------------------------------------------------------------
/images/2383514521_1fc8d7b0de_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/2383514521_1fc8d7b0de_z.jpg


--------------------------------------------------------------------------------
/images/2502287818_41e4b0c4fb_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/2502287818_41e4b0c4fb_z.jpg


--------------------------------------------------------------------------------
/images/2516944023_d00345997d_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/2516944023_d00345997d_z.jpg


--------------------------------------------------------------------------------
/images/25691390_f9944f61b5_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/25691390_f9944f61b5_z.jpg


--------------------------------------------------------------------------------
/images/262985539_1709e54576_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/262985539_1709e54576_z.jpg


--------------------------------------------------------------------------------
/images/3132016470_c27baa00e8_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/3132016470_c27baa00e8_z.jpg


--------------------------------------------------------------------------------
/images/3627527276_6fe8cd9bfe_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/3627527276_6fe8cd9bfe_z.jpg


--------------------------------------------------------------------------------
/images/3651581213_f81963d1dd_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/3651581213_f81963d1dd_z.jpg


--------------------------------------------------------------------------------
/images/3800883468_12af3c0b50_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/3800883468_12af3c0b50_z.jpg


--------------------------------------------------------------------------------
/images/3862500489_6fd195d183_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/3862500489_6fd195d183_z.jpg


--------------------------------------------------------------------------------
/images/3878153025_8fde829928_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/3878153025_8fde829928_z.jpg


--------------------------------------------------------------------------------
/images/4410436637_7b0ca36ee7_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/4410436637_7b0ca36ee7_z.jpg


--------------------------------------------------------------------------------
/images/4782628554_668bc31826_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/4782628554_668bc31826_z.jpg


--------------------------------------------------------------------------------
/images/5951960966_d4e1cda5d0_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/5951960966_d4e1cda5d0_z.jpg


--------------------------------------------------------------------------------
/images/6584515005_fce9cec486_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/6584515005_fce9cec486_z.jpg


--------------------------------------------------------------------------------
/images/6821351586_59aa0dc110_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/6821351586_59aa0dc110_z.jpg


--------------------------------------------------------------------------------
/images/7581246086_cf7bbb7255_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/7581246086_cf7bbb7255_z.jpg


--------------------------------------------------------------------------------
/images/7933423348_c30bd9bd4e_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/7933423348_c30bd9bd4e_z.jpg


--------------------------------------------------------------------------------
/images/8053677163_d4c8f416be_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/8053677163_d4c8f416be_z.jpg


--------------------------------------------------------------------------------
/images/8239308689_efa6c11b08_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/8239308689_efa6c11b08_z.jpg


--------------------------------------------------------------------------------
/images/8433365521_9252889f9a_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/8433365521_9252889f9a_z.jpg


--------------------------------------------------------------------------------
/images/8512296263_5fc5458e20_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/8512296263_5fc5458e20_z.jpg


--------------------------------------------------------------------------------
/images/8699757338_c3941051b6_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/8699757338_c3941051b6_z.jpg


--------------------------------------------------------------------------------
/images/8734543718_37f6b8bd45_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/8734543718_37f6b8bd45_z.jpg


--------------------------------------------------------------------------------
/images/8829708882_48f263491e_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/8829708882_48f263491e_z.jpg


--------------------------------------------------------------------------------
/images/9118579087_f9ffa19e63_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/9118579087_f9ffa19e63_z.jpg


--------------------------------------------------------------------------------
/images/9247489789_132c0d534a_z.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vimallinuxworld13/Mask-R-CNN/5330a40f4c5cc6a3d237dc551649d06a22012d6e/images/9247489789_132c0d534a_z.jpg


--------------------------------------------------------------------------------
/mrcnn/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/mrcnn/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Base Configurations class.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import numpy as np
 11 | 
 12 | 
 13 | # Base Configuration Class
 14 | # Don't use this class directly. Instead, sub-class it and override
 15 | # the configurations you need to change.
 16 | 
 17 | class Config(object):
 18 |     """Base configuration class. For custom configurations, create a
 19 |     sub-class that inherits from this one and override properties
 20 |     that need to be changed.
 21 |     """
 22 |     # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
 23 |     # Useful if your code needs to do things differently depending on which
 24 |     # experiment is running.
 25 |     NAME = None  # Override in sub-classes
 26 | 
 27 |     # NUMBER OF GPUs to use. When using only a CPU, this needs to be set to 1.
 28 |     GPU_COUNT = 1
 29 | 
 30 |     # Number of images to train with on each GPU. A 12GB GPU can typically
 31 |     # handle 2 images of 1024x1024px.
 32 |     # Adjust based on your GPU memory and image sizes. Use the highest
 33 |     # number that your GPU can handle for best performance.
 34 |     IMAGES_PER_GPU = 2
 35 | 
 36 |     # Number of training steps per epoch
 37 |     # This doesn't need to match the size of the training set. Tensorboard
 38 |     # updates are saved at the end of each epoch, so setting this to a
 39 |     # smaller number means getting more frequent TensorBoard updates.
 40 |     # Validation stats are also calculated at each epoch end and they
 41 |     # might take a while, so don't set this too small to avoid spending
 42 |     # a lot of time on validation stats.
 43 |     STEPS_PER_EPOCH = 1000
 44 | 
 45 |     # Number of validation steps to run at the end of every training epoch.
 46 |     # A bigger number improves accuracy of validation stats, but slows
 47 |     # down the training.
 48 |     VALIDATION_STEPS = 50
 49 | 
 50 |     # Backbone network architecture
 51 |     # Supported values are: resnet50, resnet101.
 52 |     # You can also provide a callable that should have the signature
 53 |     # of model.resnet_graph. If you do so, you need to supply a callable
 54 |     # to COMPUTE_BACKBONE_SHAPE as well
 55 |     BACKBONE = "resnet101"
 56 | 
 57 |     # Only useful if you supply a callable to BACKBONE. Should compute
 58 |     # the shape of each layer of the FPN Pyramid.
 59 |     # See model.compute_backbone_shapes
 60 |     COMPUTE_BACKBONE_SHAPE = None
 61 | 
 62 |     # The strides of each layer of the FPN Pyramid. These values
 63 |     # are based on a Resnet101 backbone.
 64 |     BACKBONE_STRIDES = [4, 8, 16, 32, 64]
 65 | 
 66 |     # Size of the fully-connected layers in the classification graph
 67 |     FPN_CLASSIF_FC_LAYERS_SIZE = 1024
 68 | 
 69 |     # Size of the top-down layers used to build the feature pyramid
 70 |     TOP_DOWN_PYRAMID_SIZE = 256
 71 | 
 72 |     # Number of classification classes (including background)
 73 |     NUM_CLASSES = 1  # Override in sub-classes
 74 | 
 75 |     # Length of square anchor side in pixels
 76 |     RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
 77 | 
 78 |     # Ratios of anchors at each cell (width/height)
 79 |     # A value of 1 represents a square anchor, and 0.5 is a wide anchor
 80 |     RPN_ANCHOR_RATIOS = [0.5, 1, 2]
 81 | 
 82 |     # Anchor stride
 83 |     # If 1 then anchors are created for each cell in the backbone feature map.
 84 |     # If 2, then anchors are created for every other cell, and so on.
 85 |     RPN_ANCHOR_STRIDE = 1
 86 | 
 87 |     # Non-max suppression threshold to filter RPN proposals.
 88 |     # You can increase this during training to generate more propsals.
 89 |     RPN_NMS_THRESHOLD = 0.7
 90 | 
 91 |     # How many anchors per image to use for RPN training
 92 |     RPN_TRAIN_ANCHORS_PER_IMAGE = 256
 93 |     
 94 |     # ROIs kept after tf.nn.top_k and before non-maximum suppression
 95 |     PRE_NMS_LIMIT = 6000
 96 | 
 97 |     # ROIs kept after non-maximum suppression (training and inference)
 98 |     POST_NMS_ROIS_TRAINING = 2000
 99 |     POST_NMS_ROIS_INFERENCE = 1000
100 | 
101 |     # If enabled, resizes instance masks to a smaller size to reduce
102 |     # memory load. Recommended when using high-resolution images.
103 |     USE_MINI_MASK = True
104 |     MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
105 | 
106 |     # Input image resizing
107 |     # Generally, use the "square" resizing mode for training and predicting
108 |     # and it should work well in most cases. In this mode, images are scaled
109 |     # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the
110 |     # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is
111 |     # padded with zeros to make it a square so multiple images can be put
112 |     # in one batch.
113 |     # Available resizing modes:
114 |     # none:   No resizing or padding. Return the image unchanged.
115 |     # square: Resize and pad with zeros to get a square image
116 |     #         of size [max_dim, max_dim].
117 |     # pad64:  Pads width and height with zeros to make them multiples of 64.
118 |     #         If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales
119 |     #         up before padding. IMAGE_MAX_DIM is ignored in this mode.
120 |     #         The multiple of 64 is needed to ensure smooth scaling of feature
121 |     #         maps up and down the 6 levels of the FPN pyramid (2**6=64).
122 |     # crop:   Picks random crops from the image. First, scales the image based
123 |     #         on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of
124 |     #         size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only.
125 |     #         IMAGE_MAX_DIM is not used in this mode.
126 |     IMAGE_RESIZE_MODE = "square"
127 |     IMAGE_MIN_DIM = 800
128 |     IMAGE_MAX_DIM = 1024
129 |     # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further
130 |     # up scaling. For example, if set to 2 then images are scaled up to double
131 |     # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it.
132 |     # However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM.
133 |     IMAGE_MIN_SCALE = 0
134 |     # Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4
135 |     # Changing this requires other changes in the code. See the WIKI for more
136 |     # details: https://github.com/matterport/Mask_RCNN/wiki
137 |     IMAGE_CHANNEL_COUNT = 3
138 | 
139 |     # Image mean (RGB)
140 |     MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
141 | 
142 |     # Number of ROIs per image to feed to classifier/mask heads
143 |     # The Mask RCNN paper uses 512 but often the RPN doesn't generate
144 |     # enough positive proposals to fill this and keep a positive:negative
145 |     # ratio of 1:3. You can increase the number of proposals by adjusting
146 |     # the RPN NMS threshold.
147 |     TRAIN_ROIS_PER_IMAGE = 200
148 | 
149 |     # Percent of positive ROIs used to train classifier/mask heads
150 |     ROI_POSITIVE_RATIO = 0.33
151 | 
152 |     # Pooled ROIs
153 |     POOL_SIZE = 7
154 |     MASK_POOL_SIZE = 14
155 | 
156 |     # Shape of output mask
157 |     # To change this you also need to change the neural network mask branch
158 |     MASK_SHAPE = [28, 28]
159 | 
160 |     # Maximum number of ground truth instances to use in one image
161 |     MAX_GT_INSTANCES = 100
162 | 
163 |     # Bounding box refinement standard deviation for RPN and final detections.
164 |     RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
165 |     BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
166 | 
167 |     # Max number of final detections
168 |     DETECTION_MAX_INSTANCES = 100
169 | 
170 |     # Minimum probability value to accept a detected instance
171 |     # ROIs below this threshold are skipped
172 |     DETECTION_MIN_CONFIDENCE = 0.7
173 | 
174 |     # Non-maximum suppression threshold for detection
175 |     DETECTION_NMS_THRESHOLD = 0.3
176 | 
177 |     # Learning rate and momentum
178 |     # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
179 |     # weights to explode. Likely due to differences in optimizer
180 |     # implementation.
181 |     LEARNING_RATE = 0.001
182 |     LEARNING_MOMENTUM = 0.9
183 | 
184 |     # Weight decay regularization
185 |     WEIGHT_DECAY = 0.0001
186 | 
187 |     # Loss weights for more precise optimization.
188 |     # Can be used for R-CNN training setup.
189 |     LOSS_WEIGHTS = {
190 |         "rpn_class_loss": 1.,
191 |         "rpn_bbox_loss": 1.,
192 |         "mrcnn_class_loss": 1.,
193 |         "mrcnn_bbox_loss": 1.,
194 |         "mrcnn_mask_loss": 1.
195 |     }
196 | 
197 |     # Use RPN ROIs or externally generated ROIs for training
198 |     # Keep this True for most situations. Set to False if you want to train
199 |     # the head branches on ROI generated by code rather than the ROIs from
200 |     # the RPN. For example, to debug the classifier head without having to
201 |     # train the RPN.
202 |     USE_RPN_ROIS = True
203 | 
204 |     # Train or freeze batch normalization layers
205 |     #     None: Train BN layers. This is the normal mode
206 |     #     False: Freeze BN layers. Good when using a small batch size
207 |     #     True: (don't use). Set layer in training mode even when predicting
208 |     TRAIN_BN = False  # Defaulting to False since batch size is often small
209 | 
210 |     # Gradient norm clipping
211 |     GRADIENT_CLIP_NORM = 5.0
212 | 
213 |     def __init__(self):
214 |         """Set values of computed attributes."""
215 |         # Effective batch size
216 |         self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
217 | 
218 |         # Input image size
219 |         if self.IMAGE_RESIZE_MODE == "crop":
220 |             self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM,
221 |                 self.IMAGE_CHANNEL_COUNT])
222 |         else:
223 |             self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM,
224 |                 self.IMAGE_CHANNEL_COUNT])
225 | 
226 |         # Image meta data length
227 |         # See compose_image_meta() for details
228 |         self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES
229 | 
230 |     def display(self):
231 |         """Display Configuration values."""
232 |         print("\nConfigurations:")
233 |         for a in dir(self):
234 |             if not a.startswith("__") and not callable(getattr(self, a)):
235 |                 print("{:30} {}".format(a, getattr(self, a)))
236 |         print("\n")
237 | 


--------------------------------------------------------------------------------
/mrcnn/parallel_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Multi-GPU Support for Keras.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | 
  9 | Ideas and a small code snippets from these sources:
 10 | https://github.com/fchollet/keras/issues/2436
 11 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012
 12 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/
 13 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py
 14 | """
 15 | 
 16 | import tensorflow as tf
 17 | import keras.backend as K
 18 | import keras.layers as KL
 19 | import keras.models as KM
 20 | 
 21 | 
 22 | class ParallelModel(KM.Model):
 23 |     """Subclasses the standard Keras Model and adds multi-GPU support.
 24 |     It works by creating a copy of the model on each GPU. Then it slices
 25 |     the inputs and sends a slice to each copy of the model, and then
 26 |     merges the outputs together and applies the loss on the combined
 27 |     outputs.
 28 |     """
 29 | 
 30 |     def __init__(self, keras_model, gpu_count):
 31 |         """Class constructor.
 32 |         keras_model: The Keras model to parallelize
 33 |         gpu_count: Number of GPUs. Must be > 1
 34 |         """
 35 |         self.inner_model = keras_model
 36 |         self.gpu_count = gpu_count
 37 |         merged_outputs = self.make_parallel()
 38 |         super(ParallelModel, self).__init__(inputs=self.inner_model.inputs,
 39 |                                             outputs=merged_outputs)
 40 | 
 41 |     def __getattribute__(self, attrname):
 42 |         """Redirect loading and saving methods to the inner model. That's where
 43 |         the weights are stored."""
 44 |         if 'load' in attrname or 'save' in attrname:
 45 |             return getattr(self.inner_model, attrname)
 46 |         return super(ParallelModel, self).__getattribute__(attrname)
 47 | 
 48 |     def summary(self, *args, **kwargs):
 49 |         """Override summary() to display summaries of both, the wrapper
 50 |         and inner models."""
 51 |         super(ParallelModel, self).summary(*args, **kwargs)
 52 |         self.inner_model.summary(*args, **kwargs)
 53 | 
 54 |     def make_parallel(self):
 55 |         """Creates a new wrapper model that consists of multiple replicas of
 56 |         the original model placed on different GPUs.
 57 |         """
 58 |         # Slice inputs. Slice inputs on the CPU to avoid sending a copy
 59 |         # of the full inputs to all GPUs. Saves on bandwidth and memory.
 60 |         input_slices = {name: tf.split(x, self.gpu_count)
 61 |                         for name, x in zip(self.inner_model.input_names,
 62 |                                            self.inner_model.inputs)}
 63 | 
 64 |         output_names = self.inner_model.output_names
 65 |         outputs_all = []
 66 |         for i in range(len(self.inner_model.outputs)):
 67 |             outputs_all.append([])
 68 | 
 69 |         # Run the model call() on each GPU to place the ops there
 70 |         for i in range(self.gpu_count):
 71 |             with tf.device('/gpu:%d' % i):
 72 |                 with tf.name_scope('tower_%d' % i):
 73 |                     # Run a slice of inputs through this replica
 74 |                     zipped_inputs = zip(self.inner_model.input_names,
 75 |                                         self.inner_model.inputs)
 76 |                     inputs = [
 77 |                         KL.Lambda(lambda s: input_slices[name][i],
 78 |                                   output_shape=lambda s: (None,) + s[1:])(tensor)
 79 |                         for name, tensor in zipped_inputs]
 80 |                     # Create the model replica and get the outputs
 81 |                     outputs = self.inner_model(inputs)
 82 |                     if not isinstance(outputs, list):
 83 |                         outputs = [outputs]
 84 |                     # Save the outputs for merging back together later
 85 |                     for l, o in enumerate(outputs):
 86 |                         outputs_all[l].append(o)
 87 | 
 88 |         # Merge outputs on CPU
 89 |         with tf.device('/cpu:0'):
 90 |             merged = []
 91 |             for outputs, name in zip(outputs_all, output_names):
 92 |                 # Concatenate or average outputs?
 93 |                 # Outputs usually have a batch dimension and we concatenate
 94 |                 # across it. If they don't, then the output is likely a loss
 95 |                 # or a metric value that gets averaged across the batch.
 96 |                 # Keras expects losses and metrics to be scalars.
 97 |                 if K.int_shape(outputs[0]) == ():
 98 |                     # Average
 99 |                     m = KL.Lambda(lambda o: tf.add_n(o) / len(outputs), name=name)(outputs)
100 |                 else:
101 |                     # Concatenate
102 |                     m = KL.Concatenate(axis=0, name=name)(outputs)
103 |                 merged.append(m)
104 |         return merged
105 | 
106 | 
107 | if __name__ == "__main__":
108 |     # Testing code below. It creates a simple model to train on MNIST and
109 |     # tries to run it on 2 GPUs. It saves the graph so it can be viewed
110 |     # in TensorBoard. Run it as:
111 |     #
112 |     # python3 parallel_model.py
113 | 
114 |     import os
115 |     import numpy as np
116 |     import keras.optimizers
117 |     from keras.datasets import mnist
118 |     from keras.preprocessing.image import ImageDataGenerator
119 | 
120 |     GPU_COUNT = 2
121 | 
122 |     # Root directory of the project
123 |     ROOT_DIR = os.path.abspath("../")
124 | 
125 |     # Directory to save logs and trained model
126 |     MODEL_DIR = os.path.join(ROOT_DIR, "logs")
127 | 
128 |     def build_model(x_train, num_classes):
129 |         # Reset default graph. Keras leaves old ops in the graph,
130 |         # which are ignored for execution but clutter graph
131 |         # visualization in TensorBoard.
132 |         tf.reset_default_graph()
133 | 
134 |         inputs = KL.Input(shape=x_train.shape[1:], name="input_image")
135 |         x = KL.Conv2D(32, (3, 3), activation='relu', padding="same",
136 |                       name="conv1")(inputs)
137 |         x = KL.Conv2D(64, (3, 3), activation='relu', padding="same",
138 |                       name="conv2")(x)
139 |         x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x)
140 |         x = KL.Flatten(name="flat1")(x)
141 |         x = KL.Dense(128, activation='relu', name="dense1")(x)
142 |         x = KL.Dense(num_classes, activation='softmax', name="dense2")(x)
143 | 
144 |         return KM.Model(inputs, x, "digit_classifier_model")
145 | 
146 |     # Load MNIST Data
147 |     (x_train, y_train), (x_test, y_test) = mnist.load_data()
148 |     x_train = np.expand_dims(x_train, -1).astype('float32') / 255
149 |     x_test = np.expand_dims(x_test, -1).astype('float32') / 255
150 | 
151 |     print('x_train shape:', x_train.shape)
152 |     print('x_test shape:', x_test.shape)
153 | 
154 |     # Build data generator and model
155 |     datagen = ImageDataGenerator()
156 |     model = build_model(x_train, 10)
157 | 
158 |     # Add multi-GPU support.
159 |     model = ParallelModel(model, GPU_COUNT)
160 | 
161 |     optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0)
162 | 
163 |     model.compile(loss='sparse_categorical_crossentropy',
164 |                   optimizer=optimizer, metrics=['accuracy'])
165 | 
166 |     model.summary()
167 | 
168 |     # Train
169 |     model.fit_generator(
170 |         datagen.flow(x_train, y_train, batch_size=64),
171 |         steps_per_epoch=50, epochs=10, verbose=1,
172 |         validation_data=(x_test, y_test),
173 |         callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
174 |                                                write_graph=True)]
175 |     )
176 | 


--------------------------------------------------------------------------------
/mrcnn/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Common utility functions and classes.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import sys
 11 | import os
 12 | import logging
 13 | import math
 14 | import random
 15 | import numpy as np
 16 | import tensorflow as tf
 17 | import scipy
 18 | import skimage.color
 19 | import skimage.io
 20 | import skimage.transform
 21 | import urllib.request
 22 | import shutil
 23 | import warnings
 24 | from distutils.version import LooseVersion
 25 | 
 26 | # URL from which to download the latest COCO trained weights
 27 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
 28 | 
 29 | 
 30 | ############################################################
 31 | #  Bounding Boxes
 32 | ############################################################
 33 | 
 34 | def extract_bboxes(mask):
 35 |     """Compute bounding boxes from masks.
 36 |     mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
 37 | 
 38 |     Returns: bbox array [num_instances, (y1, x1, y2, x2)].
 39 |     """
 40 |     boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
 41 |     for i in range(mask.shape[-1]):
 42 |         m = mask[:, :, i]
 43 |         # Bounding box.
 44 |         horizontal_indicies = np.where(np.any(m, axis=0))[0]
 45 |         vertical_indicies = np.where(np.any(m, axis=1))[0]
 46 |         if horizontal_indicies.shape[0]:
 47 |             x1, x2 = horizontal_indicies[[0, -1]]
 48 |             y1, y2 = vertical_indicies[[0, -1]]
 49 |             # x2 and y2 should not be part of the box. Increment by 1.
 50 |             x2 += 1
 51 |             y2 += 1
 52 |         else:
 53 |             # No mask for this instance. Might happen due to
 54 |             # resizing or cropping. Set bbox to zeros
 55 |             x1, x2, y1, y2 = 0, 0, 0, 0
 56 |         boxes[i] = np.array([y1, x1, y2, x2])
 57 |     return boxes.astype(np.int32)
 58 | 
 59 | 
 60 | def compute_iou(box, boxes, box_area, boxes_area):
 61 |     """Calculates IoU of the given box with the array of the given boxes.
 62 |     box: 1D vector [y1, x1, y2, x2]
 63 |     boxes: [boxes_count, (y1, x1, y2, x2)]
 64 |     box_area: float. the area of 'box'
 65 |     boxes_area: array of length boxes_count.
 66 | 
 67 |     Note: the areas are passed in rather than calculated here for
 68 |     efficiency. Calculate once in the caller to avoid duplicate work.
 69 |     """
 70 |     # Calculate intersection areas
 71 |     y1 = np.maximum(box[0], boxes[:, 0])
 72 |     y2 = np.minimum(box[2], boxes[:, 2])
 73 |     x1 = np.maximum(box[1], boxes[:, 1])
 74 |     x2 = np.minimum(box[3], boxes[:, 3])
 75 |     intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
 76 |     union = box_area + boxes_area[:] - intersection[:]
 77 |     iou = intersection / union
 78 |     return iou
 79 | 
 80 | 
 81 | def compute_overlaps(boxes1, boxes2):
 82 |     """Computes IoU overlaps between two sets of boxes.
 83 |     boxes1, boxes2: [N, (y1, x1, y2, x2)].
 84 | 
 85 |     For better performance, pass the largest set first and the smaller second.
 86 |     """
 87 |     # Areas of anchors and GT boxes
 88 |     area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
 89 |     area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
 90 | 
 91 |     # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
 92 |     # Each cell contains the IoU value.
 93 |     overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
 94 |     for i in range(overlaps.shape[1]):
 95 |         box2 = boxes2[i]
 96 |         overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
 97 |     return overlaps
 98 | 
 99 | 
100 | def compute_overlaps_masks(masks1, masks2):
101 |     """Computes IoU overlaps between two sets of masks.
102 |     masks1, masks2: [Height, Width, instances]
103 |     """
104 |     
105 |     # If either set of masks is empty return empty result
106 |     if masks1.shape[-1] == 0 or masks2.shape[-1] == 0:
107 |         return np.zeros((masks1.shape[-1], masks2.shape[-1]))
108 |     # flatten masks and compute their areas
109 |     masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
110 |     masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
111 |     area1 = np.sum(masks1, axis=0)
112 |     area2 = np.sum(masks2, axis=0)
113 | 
114 |     # intersections and union
115 |     intersections = np.dot(masks1.T, masks2)
116 |     union = area1[:, None] + area2[None, :] - intersections
117 |     overlaps = intersections / union
118 | 
119 |     return overlaps
120 | 
121 | 
122 | def non_max_suppression(boxes, scores, threshold):
123 |     """Performs non-maximum suppression and returns indices of kept boxes.
124 |     boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
125 |     scores: 1-D array of box scores.
126 |     threshold: Float. IoU threshold to use for filtering.
127 |     """
128 |     assert boxes.shape[0] > 0
129 |     if boxes.dtype.kind != "f":
130 |         boxes = boxes.astype(np.float32)
131 | 
132 |     # Compute box areas
133 |     y1 = boxes[:, 0]
134 |     x1 = boxes[:, 1]
135 |     y2 = boxes[:, 2]
136 |     x2 = boxes[:, 3]
137 |     area = (y2 - y1) * (x2 - x1)
138 | 
139 |     # Get indicies of boxes sorted by scores (highest first)
140 |     ixs = scores.argsort()[::-1]
141 | 
142 |     pick = []
143 |     while len(ixs) > 0:
144 |         # Pick top box and add its index to the list
145 |         i = ixs[0]
146 |         pick.append(i)
147 |         # Compute IoU of the picked box with the rest
148 |         iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
149 |         # Identify boxes with IoU over the threshold. This
150 |         # returns indices into ixs[1:], so add 1 to get
151 |         # indices into ixs.
152 |         remove_ixs = np.where(iou > threshold)[0] + 1
153 |         # Remove indices of the picked and overlapped boxes.
154 |         ixs = np.delete(ixs, remove_ixs)
155 |         ixs = np.delete(ixs, 0)
156 |     return np.array(pick, dtype=np.int32)
157 | 
158 | 
159 | def apply_box_deltas(boxes, deltas):
160 |     """Applies the given deltas to the given boxes.
161 |     boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
162 |     deltas: [N, (dy, dx, log(dh), log(dw))]
163 |     """
164 |     boxes = boxes.astype(np.float32)
165 |     # Convert to y, x, h, w
166 |     height = boxes[:, 2] - boxes[:, 0]
167 |     width = boxes[:, 3] - boxes[:, 1]
168 |     center_y = boxes[:, 0] + 0.5 * height
169 |     center_x = boxes[:, 1] + 0.5 * width
170 |     # Apply deltas
171 |     center_y += deltas[:, 0] * height
172 |     center_x += deltas[:, 1] * width
173 |     height *= np.exp(deltas[:, 2])
174 |     width *= np.exp(deltas[:, 3])
175 |     # Convert back to y1, x1, y2, x2
176 |     y1 = center_y - 0.5 * height
177 |     x1 = center_x - 0.5 * width
178 |     y2 = y1 + height
179 |     x2 = x1 + width
180 |     return np.stack([y1, x1, y2, x2], axis=1)
181 | 
182 | 
183 | def box_refinement_graph(box, gt_box):
184 |     """Compute refinement needed to transform box to gt_box.
185 |     box and gt_box are [N, (y1, x1, y2, x2)]
186 |     """
187 |     box = tf.cast(box, tf.float32)
188 |     gt_box = tf.cast(gt_box, tf.float32)
189 | 
190 |     height = box[:, 2] - box[:, 0]
191 |     width = box[:, 3] - box[:, 1]
192 |     center_y = box[:, 0] + 0.5 * height
193 |     center_x = box[:, 1] + 0.5 * width
194 | 
195 |     gt_height = gt_box[:, 2] - gt_box[:, 0]
196 |     gt_width = gt_box[:, 3] - gt_box[:, 1]
197 |     gt_center_y = gt_box[:, 0] + 0.5 * gt_height
198 |     gt_center_x = gt_box[:, 1] + 0.5 * gt_width
199 | 
200 |     dy = (gt_center_y - center_y) / height
201 |     dx = (gt_center_x - center_x) / width
202 |     dh = tf.log(gt_height / height)
203 |     dw = tf.log(gt_width / width)
204 | 
205 |     result = tf.stack([dy, dx, dh, dw], axis=1)
206 |     return result
207 | 
208 | 
209 | def box_refinement(box, gt_box):
210 |     """Compute refinement needed to transform box to gt_box.
211 |     box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
212 |     assumed to be outside the box.
213 |     """
214 |     box = box.astype(np.float32)
215 |     gt_box = gt_box.astype(np.float32)
216 | 
217 |     height = box[:, 2] - box[:, 0]
218 |     width = box[:, 3] - box[:, 1]
219 |     center_y = box[:, 0] + 0.5 * height
220 |     center_x = box[:, 1] + 0.5 * width
221 | 
222 |     gt_height = gt_box[:, 2] - gt_box[:, 0]
223 |     gt_width = gt_box[:, 3] - gt_box[:, 1]
224 |     gt_center_y = gt_box[:, 0] + 0.5 * gt_height
225 |     gt_center_x = gt_box[:, 1] + 0.5 * gt_width
226 | 
227 |     dy = (gt_center_y - center_y) / height
228 |     dx = (gt_center_x - center_x) / width
229 |     dh = np.log(gt_height / height)
230 |     dw = np.log(gt_width / width)
231 | 
232 |     return np.stack([dy, dx, dh, dw], axis=1)
233 | 
234 | 
235 | ############################################################
236 | #  Dataset
237 | ############################################################
238 | 
239 | class Dataset(object):
240 |     """The base class for dataset classes.
241 |     To use it, create a new class that adds functions specific to the dataset
242 |     you want to use. For example:
243 | 
244 |     class CatsAndDogsDataset(Dataset):
245 |         def load_cats_and_dogs(self):
246 |             ...
247 |         def load_mask(self, image_id):
248 |             ...
249 |         def image_reference(self, image_id):
250 |             ...
251 | 
252 |     See COCODataset and ShapesDataset as examples.
253 |     """
254 | 
255 |     def __init__(self, class_map=None):
256 |         self._image_ids = []
257 |         self.image_info = []
258 |         # Background is always the first class
259 |         self.class_info = [{"source": "", "id": 0, "name": "BG"}]
260 |         self.source_class_ids = {}
261 | 
262 |     def add_class(self, source, class_id, class_name):
263 |         assert "." not in source, "Source name cannot contain a dot"
264 |         # Does the class exist already?
265 |         for info in self.class_info:
266 |             if info['source'] == source and info["id"] == class_id:
267 |                 # source.class_id combination already available, skip
268 |                 return
269 |         # Add the class
270 |         self.class_info.append({
271 |             "source": source,
272 |             "id": class_id,
273 |             "name": class_name,
274 |         })
275 | 
276 |     def add_image(self, source, image_id, path, **kwargs):
277 |         image_info = {
278 |             "id": image_id,
279 |             "source": source,
280 |             "path": path,
281 |         }
282 |         image_info.update(kwargs)
283 |         self.image_info.append(image_info)
284 | 
285 |     def image_reference(self, image_id):
286 |         """Return a link to the image in its source Website or details about
287 |         the image that help looking it up or debugging it.
288 | 
289 |         Override for your dataset, but pass to this function
290 |         if you encounter images not in your dataset.
291 |         """
292 |         return ""
293 | 
294 |     def prepare(self, class_map=None):
295 |         """Prepares the Dataset class for use.
296 | 
297 |         TODO: class map is not supported yet. When done, it should handle mapping
298 |               classes from different datasets to the same class ID.
299 |         """
300 | 
301 |         def clean_name(name):
302 |             """Returns a shorter version of object names for cleaner display."""
303 |             return ",".join(name.split(",")[:1])
304 | 
305 |         # Build (or rebuild) everything else from the info dicts.
306 |         self.num_classes = len(self.class_info)
307 |         self.class_ids = np.arange(self.num_classes)
308 |         self.class_names = [clean_name(c["name"]) for c in self.class_info]
309 |         self.num_images = len(self.image_info)
310 |         self._image_ids = np.arange(self.num_images)
311 | 
312 |         # Mapping from source class and image IDs to internal IDs
313 |         self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
314 |                                       for info, id in zip(self.class_info, self.class_ids)}
315 |         self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id
316 |                                       for info, id in zip(self.image_info, self.image_ids)}
317 | 
318 |         # Map sources to class_ids they support
319 |         self.sources = list(set([i['source'] for i in self.class_info]))
320 |         self.source_class_ids = {}
321 |         # Loop over datasets
322 |         for source in self.sources:
323 |             self.source_class_ids[source] = []
324 |             # Find classes that belong to this dataset
325 |             for i, info in enumerate(self.class_info):
326 |                 # Include BG class in all datasets
327 |                 if i == 0 or source == info['source']:
328 |                     self.source_class_ids[source].append(i)
329 | 
330 |     def map_source_class_id(self, source_class_id):
331 |         """Takes a source class ID and returns the int class ID assigned to it.
332 | 
333 |         For example:
334 |         dataset.map_source_class_id("coco.12") -> 23
335 |         """
336 |         return self.class_from_source_map[source_class_id]
337 | 
338 |     def get_source_class_id(self, class_id, source):
339 |         """Map an internal class ID to the corresponding class ID in the source dataset."""
340 |         info = self.class_info[class_id]
341 |         assert info['source'] == source
342 |         return info['id']
343 | 
344 |     @property
345 |     def image_ids(self):
346 |         return self._image_ids
347 | 
348 |     def source_image_link(self, image_id):
349 |         """Returns the path or URL to the image.
350 |         Override this to return a URL to the image if it's available online for easy
351 |         debugging.
352 |         """
353 |         return self.image_info[image_id]["path"]
354 | 
355 |     def load_image(self, image_id):
356 |         """Load the specified image and return a [H,W,3] Numpy array.
357 |         """
358 |         # Load image
359 |         image = skimage.io.imread(self.image_info[image_id]['path'])
360 |         # If grayscale. Convert to RGB for consistency.
361 |         if image.ndim != 3:
362 |             image = skimage.color.gray2rgb(image)
363 |         # If has an alpha channel, remove it for consistency
364 |         if image.shape[-1] == 4:
365 |             image = image[..., :3]
366 |         return image
367 | 
368 |     def load_mask(self, image_id):
369 |         """Load instance masks for the given image.
370 | 
371 |         Different datasets use different ways to store masks. Override this
372 |         method to load instance masks and return them in the form of am
373 |         array of binary masks of shape [height, width, instances].
374 | 
375 |         Returns:
376 |             masks: A bool array of shape [height, width, instance count] with
377 |                 a binary mask per instance.
378 |             class_ids: a 1D array of class IDs of the instance masks.
379 |         """
380 |         # Override this function to load a mask from your dataset.
381 |         # Otherwise, it returns an empty mask.
382 |         logging.warning("You are using the default load_mask(), maybe you need to define your own one.")
383 |         mask = np.empty([0, 0, 0])
384 |         class_ids = np.empty([0], np.int32)
385 |         return mask, class_ids
386 | 
387 | 
388 | def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"):
389 |     """Resizes an image keeping the aspect ratio unchanged.
390 | 
391 |     min_dim: if provided, resizes the image such that it's smaller
392 |         dimension == min_dim
393 |     max_dim: if provided, ensures that the image longest side doesn't
394 |         exceed this value.
395 |     min_scale: if provided, ensure that the image is scaled up by at least
396 |         this percent even if min_dim doesn't require it.
397 |     mode: Resizing mode.
398 |         none: No resizing. Return the image unchanged.
399 |         square: Resize and pad with zeros to get a square image
400 |             of size [max_dim, max_dim].
401 |         pad64: Pads width and height with zeros to make them multiples of 64.
402 |                If min_dim or min_scale are provided, it scales the image up
403 |                before padding. max_dim is ignored in this mode.
404 |                The multiple of 64 is needed to ensure smooth scaling of feature
405 |                maps up and down the 6 levels of the FPN pyramid (2**6=64).
406 |         crop: Picks random crops from the image. First, scales the image based
407 |               on min_dim and min_scale, then picks a random crop of
408 |               size min_dim x min_dim. Can be used in training only.
409 |               max_dim is not used in this mode.
410 | 
411 |     Returns:
412 |     image: the resized image
413 |     window: (y1, x1, y2, x2). If max_dim is provided, padding might
414 |         be inserted in the returned image. If so, this window is the
415 |         coordinates of the image part of the full image (excluding
416 |         the padding). The x2, y2 pixels are not included.
417 |     scale: The scale factor used to resize the image
418 |     padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
419 |     """
420 |     # Keep track of image dtype and return results in the same dtype
421 |     image_dtype = image.dtype
422 |     # Default window (y1, x1, y2, x2) and default scale == 1.
423 |     h, w = image.shape[:2]
424 |     window = (0, 0, h, w)
425 |     scale = 1
426 |     padding = [(0, 0), (0, 0), (0, 0)]
427 |     crop = None
428 | 
429 |     if mode == "none":
430 |         return image, window, scale, padding, crop
431 | 
432 |     # Scale?
433 |     if min_dim:
434 |         # Scale up but not down
435 |         scale = max(1, min_dim / min(h, w))
436 |     if min_scale and scale < min_scale:
437 |         scale = min_scale
438 | 
439 |     # Does it exceed max dim?
440 |     if max_dim and mode == "square":
441 |         image_max = max(h, w)
442 |         if round(image_max * scale) > max_dim:
443 |             scale = max_dim / image_max
444 | 
445 |     # Resize image using bilinear interpolation
446 |     if scale != 1:
447 |         image = resize(image, (round(h * scale), round(w * scale)),
448 |                        preserve_range=True)
449 | 
450 |     # Need padding or cropping?
451 |     if mode == "square":
452 |         # Get new height and width
453 |         h, w = image.shape[:2]
454 |         top_pad = (max_dim - h) // 2
455 |         bottom_pad = max_dim - h - top_pad
456 |         left_pad = (max_dim - w) // 2
457 |         right_pad = max_dim - w - left_pad
458 |         padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
459 |         image = np.pad(image, padding, mode='constant', constant_values=0)
460 |         window = (top_pad, left_pad, h + top_pad, w + left_pad)
461 |     elif mode == "pad64":
462 |         h, w = image.shape[:2]
463 |         # Both sides must be divisible by 64
464 |         assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64"
465 |         # Height
466 |         if h % 64 > 0:
467 |             max_h = h - (h % 64) + 64
468 |             top_pad = (max_h - h) // 2
469 |             bottom_pad = max_h - h - top_pad
470 |         else:
471 |             top_pad = bottom_pad = 0
472 |         # Width
473 |         if w % 64 > 0:
474 |             max_w = w - (w % 64) + 64
475 |             left_pad = (max_w - w) // 2
476 |             right_pad = max_w - w - left_pad
477 |         else:
478 |             left_pad = right_pad = 0
479 |         padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
480 |         image = np.pad(image, padding, mode='constant', constant_values=0)
481 |         window = (top_pad, left_pad, h + top_pad, w + left_pad)
482 |     elif mode == "crop":
483 |         # Pick a random crop
484 |         h, w = image.shape[:2]
485 |         y = random.randint(0, (h - min_dim))
486 |         x = random.randint(0, (w - min_dim))
487 |         crop = (y, x, min_dim, min_dim)
488 |         image = image[y:y + min_dim, x:x + min_dim]
489 |         window = (0, 0, min_dim, min_dim)
490 |     else:
491 |         raise Exception("Mode {} not supported".format(mode))
492 |     return image.astype(image_dtype), window, scale, padding, crop
493 | 
494 | 
495 | def resize_mask(mask, scale, padding, crop=None):
496 |     """Resizes a mask using the given scale and padding.
497 |     Typically, you get the scale and padding from resize_image() to
498 |     ensure both, the image and the mask, are resized consistently.
499 | 
500 |     scale: mask scaling factor
501 |     padding: Padding to add to the mask in the form
502 |             [(top, bottom), (left, right), (0, 0)]
503 |     """
504 |     # Suppress warning from scipy 0.13.0, the output shape of zoom() is
505 |     # calculated with round() instead of int()
506 |     with warnings.catch_warnings():
507 |         warnings.simplefilter("ignore")
508 |         mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
509 |     if crop is not None:
510 |         y, x, h, w = crop
511 |         mask = mask[y:y + h, x:x + w]
512 |     else:
513 |         mask = np.pad(mask, padding, mode='constant', constant_values=0)
514 |     return mask
515 | 
516 | 
517 | def minimize_mask(bbox, mask, mini_shape):
518 |     """Resize masks to a smaller version to reduce memory load.
519 |     Mini-masks can be resized back to image scale using expand_masks()
520 | 
521 |     See inspect_data.ipynb notebook for more details.
522 |     """
523 |     mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
524 |     for i in range(mask.shape[-1]):
525 |         # Pick slice and cast to bool in case load_mask() returned wrong dtype
526 |         m = mask[:, :, i].astype(bool)
527 |         y1, x1, y2, x2 = bbox[i][:4]
528 |         m = m[y1:y2, x1:x2]
529 |         if m.size == 0:
530 |             raise Exception("Invalid bounding box with area of zero")
531 |         # Resize with bilinear interpolation
532 |         m = resize(m, mini_shape)
533 |         mini_mask[:, :, i] = np.around(m).astype(np.bool)
534 |     return mini_mask
535 | 
536 | 
537 | def expand_mask(bbox, mini_mask, image_shape):
538 |     """Resizes mini masks back to image size. Reverses the change
539 |     of minimize_mask().
540 | 
541 |     See inspect_data.ipynb notebook for more details.
542 |     """
543 |     mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
544 |     for i in range(mask.shape[-1]):
545 |         m = mini_mask[:, :, i]
546 |         y1, x1, y2, x2 = bbox[i][:4]
547 |         h = y2 - y1
548 |         w = x2 - x1
549 |         # Resize with bilinear interpolation
550 |         m = resize(m, (h, w))
551 |         mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool)
552 |     return mask
553 | 
554 | 
555 | # TODO: Build and use this function to reduce code duplication
556 | def mold_mask(mask, config):
557 |     pass
558 | 
559 | 
560 | def unmold_mask(mask, bbox, image_shape):
561 |     """Converts a mask generated by the neural network to a format similar
562 |     to its original shape.
563 |     mask: [height, width] of type float. A small, typically 28x28 mask.
564 |     bbox: [y1, x1, y2, x2]. The box to fit the mask in.
565 | 
566 |     Returns a binary mask with the same size as the original image.
567 |     """
568 |     threshold = 0.5
569 |     y1, x1, y2, x2 = bbox
570 |     mask = resize(mask, (y2 - y1, x2 - x1))
571 |     mask = np.where(mask >= threshold, 1, 0).astype(np.bool)
572 | 
573 |     # Put the mask in the right location.
574 |     full_mask = np.zeros(image_shape[:2], dtype=np.bool)
575 |     full_mask[y1:y2, x1:x2] = mask
576 |     return full_mask
577 | 
578 | 
579 | ############################################################
580 | #  Anchors
581 | ############################################################
582 | 
583 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
584 |     """
585 |     scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
586 |     ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
587 |     shape: [height, width] spatial shape of the feature map over which
588 |             to generate anchors.
589 |     feature_stride: Stride of the feature map relative to the image in pixels.
590 |     anchor_stride: Stride of anchors on the feature map. For example, if the
591 |         value is 2 then generate anchors for every other feature map pixel.
592 |     """
593 |     # Get all combinations of scales and ratios
594 |     scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
595 |     scales = scales.flatten()
596 |     ratios = ratios.flatten()
597 | 
598 |     # Enumerate heights and widths from scales and ratios
599 |     heights = scales / np.sqrt(ratios)
600 |     widths = scales * np.sqrt(ratios)
601 | 
602 |     # Enumerate shifts in feature space
603 |     shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
604 |     shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
605 |     shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
606 | 
607 |     # Enumerate combinations of shifts, widths, and heights
608 |     box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
609 |     box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
610 | 
611 |     # Reshape to get a list of (y, x) and a list of (h, w)
612 |     box_centers = np.stack(
613 |         [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
614 |     box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
615 | 
616 |     # Convert to corner coordinates (y1, x1, y2, x2)
617 |     boxes = np.concatenate([box_centers - 0.5 * box_sizes,
618 |                             box_centers + 0.5 * box_sizes], axis=1)
619 |     return boxes
620 | 
621 | 
622 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
623 |                              anchor_stride):
624 |     """Generate anchors at different levels of a feature pyramid. Each scale
625 |     is associated with a level of the pyramid, but each ratio is used in
626 |     all levels of the pyramid.
627 | 
628 |     Returns:
629 |     anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
630 |         with the same order of the given scales. So, anchors of scale[0] come
631 |         first, then anchors of scale[1], and so on.
632 |     """
633 |     # Anchors
634 |     # [anchor_count, (y1, x1, y2, x2)]
635 |     anchors = []
636 |     for i in range(len(scales)):
637 |         anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
638 |                                         feature_strides[i], anchor_stride))
639 |     return np.concatenate(anchors, axis=0)
640 | 
641 | 
642 | ############################################################
643 | #  Miscellaneous
644 | ############################################################
645 | 
646 | def trim_zeros(x):
647 |     """It's common to have tensors larger than the available data and
648 |     pad with zeros. This function removes rows that are all zeros.
649 | 
650 |     x: [rows, columns].
651 |     """
652 |     assert len(x.shape) == 2
653 |     return x[~np.all(x == 0, axis=1)]
654 | 
655 | 
656 | def compute_matches(gt_boxes, gt_class_ids, gt_masks,
657 |                     pred_boxes, pred_class_ids, pred_scores, pred_masks,
658 |                     iou_threshold=0.5, score_threshold=0.0):
659 |     """Finds matches between prediction and ground truth instances.
660 | 
661 |     Returns:
662 |         gt_match: 1-D array. For each GT box it has the index of the matched
663 |                   predicted box.
664 |         pred_match: 1-D array. For each predicted box, it has the index of
665 |                     the matched ground truth box.
666 |         overlaps: [pred_boxes, gt_boxes] IoU overlaps.
667 |     """
668 |     # Trim zero padding
669 |     # TODO: cleaner to do zero unpadding upstream
670 |     gt_boxes = trim_zeros(gt_boxes)
671 |     gt_masks = gt_masks[..., :gt_boxes.shape[0]]
672 |     pred_boxes = trim_zeros(pred_boxes)
673 |     pred_scores = pred_scores[:pred_boxes.shape[0]]
674 |     # Sort predictions by score from high to low
675 |     indices = np.argsort(pred_scores)[::-1]
676 |     pred_boxes = pred_boxes[indices]
677 |     pred_class_ids = pred_class_ids[indices]
678 |     pred_scores = pred_scores[indices]
679 |     pred_masks = pred_masks[..., indices]
680 | 
681 |     # Compute IoU overlaps [pred_masks, gt_masks]
682 |     overlaps = compute_overlaps_masks(pred_masks, gt_masks)
683 | 
684 |     # Loop through predictions and find matching ground truth boxes
685 |     match_count = 0
686 |     pred_match = -1 * np.ones([pred_boxes.shape[0]])
687 |     gt_match = -1 * np.ones([gt_boxes.shape[0]])
688 |     for i in range(len(pred_boxes)):
689 |         # Find best matching ground truth box
690 |         # 1. Sort matches by score
691 |         sorted_ixs = np.argsort(overlaps[i])[::-1]
692 |         # 2. Remove low scores
693 |         low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0]
694 |         if low_score_idx.size > 0:
695 |             sorted_ixs = sorted_ixs[:low_score_idx[0]]
696 |         # 3. Find the match
697 |         for j in sorted_ixs:
698 |             # If ground truth box is already matched, go to next one
699 |             if gt_match[j] > -1:
700 |                 continue
701 |             # If we reach IoU smaller than the threshold, end the loop
702 |             iou = overlaps[i, j]
703 |             if iou < iou_threshold:
704 |                 break
705 |             # Do we have a match?
706 |             if pred_class_ids[i] == gt_class_ids[j]:
707 |                 match_count += 1
708 |                 gt_match[j] = i
709 |                 pred_match[i] = j
710 |                 break
711 | 
712 |     return gt_match, pred_match, overlaps
713 | 
714 | 
715 | def compute_ap(gt_boxes, gt_class_ids, gt_masks,
716 |                pred_boxes, pred_class_ids, pred_scores, pred_masks,
717 |                iou_threshold=0.5):
718 |     """Compute Average Precision at a set IoU threshold (default 0.5).
719 | 
720 |     Returns:
721 |     mAP: Mean Average Precision
722 |     precisions: List of precisions at different class score thresholds.
723 |     recalls: List of recall values at different class score thresholds.
724 |     overlaps: [pred_boxes, gt_boxes] IoU overlaps.
725 |     """
726 |     # Get matches and overlaps
727 |     gt_match, pred_match, overlaps = compute_matches(
728 |         gt_boxes, gt_class_ids, gt_masks,
729 |         pred_boxes, pred_class_ids, pred_scores, pred_masks,
730 |         iou_threshold)
731 | 
732 |     # Compute precision and recall at each prediction box step
733 |     precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1)
734 |     recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match)
735 | 
736 |     # Pad with start and end values to simplify the math
737 |     precisions = np.concatenate([[0], precisions, [0]])
738 |     recalls = np.concatenate([[0], recalls, [1]])
739 | 
740 |     # Ensure precision values decrease but don't increase. This way, the
741 |     # precision value at each recall threshold is the maximum it can be
742 |     # for all following recall thresholds, as specified by the VOC paper.
743 |     for i in range(len(precisions) - 2, -1, -1):
744 |         precisions[i] = np.maximum(precisions[i], precisions[i + 1])
745 | 
746 |     # Compute mean AP over recall range
747 |     indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
748 |     mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
749 |                  precisions[indices])
750 | 
751 |     return mAP, precisions, recalls, overlaps
752 | 
753 | 
754 | def compute_ap_range(gt_box, gt_class_id, gt_mask,
755 |                      pred_box, pred_class_id, pred_score, pred_mask,
756 |                      iou_thresholds=None, verbose=1):
757 |     """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95."""
758 |     # Default is 0.5 to 0.95 with increments of 0.05
759 |     iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05)
760 |     
761 |     # Compute AP over range of IoU thresholds
762 |     AP = []
763 |     for iou_threshold in iou_thresholds:
764 |         ap, precisions, recalls, overlaps =\
765 |             compute_ap(gt_box, gt_class_id, gt_mask,
766 |                         pred_box, pred_class_id, pred_score, pred_mask,
767 |                         iou_threshold=iou_threshold)
768 |         if verbose:
769 |             print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap))
770 |         AP.append(ap)
771 |     AP = np.array(AP).mean()
772 |     if verbose:
773 |         print("AP @{:.2f}-{:.2f}:\t {:.3f}".format(
774 |             iou_thresholds[0], iou_thresholds[-1], AP))
775 |     return AP
776 | 
777 | 
778 | def compute_recall(pred_boxes, gt_boxes, iou):
779 |     """Compute the recall at the given IoU threshold. It's an indication
780 |     of how many GT boxes were found by the given prediction boxes.
781 | 
782 |     pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
783 |     gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
784 |     """
785 |     # Measure overlaps
786 |     overlaps = compute_overlaps(pred_boxes, gt_boxes)
787 |     iou_max = np.max(overlaps, axis=1)
788 |     iou_argmax = np.argmax(overlaps, axis=1)
789 |     positive_ids = np.where(iou_max >= iou)[0]
790 |     matched_gt_boxes = iou_argmax[positive_ids]
791 | 
792 |     recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
793 |     return recall, positive_ids
794 | 
795 | 
796 | # ## Batch Slicing
797 | # Some custom layers support a batch size of 1 only, and require a lot of work
798 | # to support batches greater than 1. This function slices an input tensor
799 | # across the batch dimension and feeds batches of size 1. Effectively,
800 | # an easy way to support batches > 1 quickly with little code modification.
801 | # In the long run, it's more efficient to modify the code to support large
802 | # batches and getting rid of this function. Consider this a temporary solution
803 | def batch_slice(inputs, graph_fn, batch_size, names=None):
804 |     """Splits inputs into slices and feeds each slice to a copy of the given
805 |     computation graph and then combines the results. It allows you to run a
806 |     graph on a batch of inputs even if the graph is written to support one
807 |     instance only.
808 | 
809 |     inputs: list of tensors. All must have the same first dimension length
810 |     graph_fn: A function that returns a TF tensor that's part of a graph.
811 |     batch_size: number of slices to divide the data into.
812 |     names: If provided, assigns names to the resulting tensors.
813 |     """
814 |     if not isinstance(inputs, list):
815 |         inputs = [inputs]
816 | 
817 |     outputs = []
818 |     for i in range(batch_size):
819 |         inputs_slice = [x[i] for x in inputs]
820 |         output_slice = graph_fn(*inputs_slice)
821 |         if not isinstance(output_slice, (tuple, list)):
822 |             output_slice = [output_slice]
823 |         outputs.append(output_slice)
824 |     # Change outputs from a list of slices where each is
825 |     # a list of outputs to a list of outputs and each has
826 |     # a list of slices
827 |     outputs = list(zip(*outputs))
828 | 
829 |     if names is None:
830 |         names = [None] * len(outputs)
831 | 
832 |     result = [tf.stack(o, axis=0, name=n)
833 |               for o, n in zip(outputs, names)]
834 |     if len(result) == 1:
835 |         result = result[0]
836 | 
837 |     return result
838 | 
839 | 
840 | def download_trained_weights(coco_model_path, verbose=1):
841 |     """Download COCO trained weights from Releases.
842 | 
843 |     coco_model_path: local path of COCO trained weights
844 |     """
845 |     if verbose > 0:
846 |         print("Downloading pretrained model to " + coco_model_path + " ...")
847 |     with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
848 |         shutil.copyfileobj(resp, out)
849 |     if verbose > 0:
850 |         print("... done downloading pretrained model!")
851 | 
852 | 
853 | def norm_boxes(boxes, shape):
854 |     """Converts boxes from pixel coordinates to normalized coordinates.
855 |     boxes: [N, (y1, x1, y2, x2)] in pixel coordinates
856 |     shape: [..., (height, width)] in pixels
857 | 
858 |     Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
859 |     coordinates it's inside the box.
860 | 
861 |     Returns:
862 |         [N, (y1, x1, y2, x2)] in normalized coordinates
863 |     """
864 |     h, w = shape
865 |     scale = np.array([h - 1, w - 1, h - 1, w - 1])
866 |     shift = np.array([0, 0, 1, 1])
867 |     return np.divide((boxes - shift), scale).astype(np.float32)
868 | 
869 | 
870 | def denorm_boxes(boxes, shape):
871 |     """Converts boxes from normalized coordinates to pixel coordinates.
872 |     boxes: [N, (y1, x1, y2, x2)] in normalized coordinates
873 |     shape: [..., (height, width)] in pixels
874 | 
875 |     Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
876 |     coordinates it's inside the box.
877 | 
878 |     Returns:
879 |         [N, (y1, x1, y2, x2)] in pixel coordinates
880 |     """
881 |     h, w = shape
882 |     scale = np.array([h - 1, w - 1, h - 1, w - 1])
883 |     shift = np.array([0, 0, 1, 1])
884 |     return np.around(np.multiply(boxes, scale) + shift).astype(np.int32)
885 | 
886 | 
887 | def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True,
888 |            preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
889 |     """A wrapper for Scikit-Image resize().
890 | 
891 |     Scikit-Image generates warnings on every call to resize() if it doesn't
892 |     receive the right parameters. The right parameters depend on the version
893 |     of skimage. This solves the problem by using different parameters per
894 |     version. And it provides a central place to control resizing defaults.
895 |     """
896 |     if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
897 |         # New in 0.14: anti_aliasing. Default it to False for backward
898 |         # compatibility with skimage 0.13.
899 |         return skimage.transform.resize(
900 |             image, output_shape,
901 |             order=order, mode=mode, cval=cval, clip=clip,
902 |             preserve_range=preserve_range, anti_aliasing=anti_aliasing,
903 |             anti_aliasing_sigma=anti_aliasing_sigma)
904 |     else:
905 |         return skimage.transform.resize(
906 |             image, output_shape,
907 |             order=order, mode=mode, cval=cval, clip=clip,
908 |             preserve_range=preserve_range)
909 | 


--------------------------------------------------------------------------------
/mrcnn/visualize.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Display and Visualization Functions.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import os
 11 | import sys
 12 | import random
 13 | import itertools
 14 | import colorsys
 15 | 
 16 | import numpy as np
 17 | from skimage.measure import find_contours
 18 | import matplotlib.pyplot as plt
 19 | from matplotlib import patches,  lines
 20 | from matplotlib.patches import Polygon
 21 | import IPython.display
 22 | 
 23 | # Root directory of the project
 24 | ROOT_DIR = os.path.abspath("../")
 25 | 
 26 | # Import Mask RCNN
 27 | sys.path.append(ROOT_DIR)  # To find local version of the library
 28 | from mrcnn import utils
 29 | 
 30 | 
 31 | ############################################################
 32 | #  Visualization
 33 | ############################################################
 34 | 
 35 | def display_images(images, titles=None, cols=4, cmap=None, norm=None,
 36 |                    interpolation=None):
 37 |     """Display the given set of images, optionally with titles.
 38 |     images: list or array of image tensors in HWC format.
 39 |     titles: optional. A list of titles to display with each image.
 40 |     cols: number of images per row
 41 |     cmap: Optional. Color map to use. For example, "Blues".
 42 |     norm: Optional. A Normalize instance to map values to colors.
 43 |     interpolation: Optional. Image interpolation to use for display.
 44 |     """
 45 |     titles = titles if titles is not None else [""] * len(images)
 46 |     rows = len(images) // cols + 1
 47 |     plt.figure(figsize=(14, 14 * rows // cols))
 48 |     i = 1
 49 |     for image, title in zip(images, titles):
 50 |         plt.subplot(rows, cols, i)
 51 |         plt.title(title, fontsize=9)
 52 |         plt.axis('off')
 53 |         plt.imshow(image.astype(np.uint8), cmap=cmap,
 54 |                    norm=norm, interpolation=interpolation)
 55 |         i += 1
 56 |     plt.show()
 57 | 
 58 | 
 59 | def random_colors(N, bright=True):
 60 |     """
 61 |     Generate random colors.
 62 |     To get visually distinct colors, generate them in HSV space then
 63 |     convert to RGB.
 64 |     """
 65 |     brightness = 1.0 if bright else 0.7
 66 |     hsv = [(i / N, 1, brightness) for i in range(N)]
 67 |     colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
 68 |     random.shuffle(colors)
 69 |     return colors
 70 | 
 71 | 
 72 | def apply_mask(image, mask, color, alpha=0.5):
 73 |     """Apply the given mask to the image.
 74 |     """
 75 |     for c in range(3):
 76 |         image[:, :, c] = np.where(mask == 1,
 77 |                                   image[:, :, c] *
 78 |                                   (1 - alpha) + alpha * color[c] * 255,
 79 |                                   image[:, :, c])
 80 |     return image
 81 | 
 82 | 
 83 | def display_instances(image, boxes, masks, class_ids, class_names,
 84 |                       scores=None, title="",
 85 |                       figsize=(16, 16), ax=None,
 86 |                       show_mask=True, show_bbox=True,
 87 |                       colors=None, captions=None):
 88 |     """
 89 |     boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
 90 |     masks: [height, width, num_instances]
 91 |     class_ids: [num_instances]
 92 |     class_names: list of class names of the dataset
 93 |     scores: (optional) confidence scores for each box
 94 |     title: (optional) Figure title
 95 |     show_mask, show_bbox: To show masks and bounding boxes or not
 96 |     figsize: (optional) the size of the image
 97 |     colors: (optional) An array or colors to use with each object
 98 |     captions: (optional) A list of strings to use as captions for each object
 99 |     """
100 |     # Number of instances
101 |     N = boxes.shape[0]
102 |     if not N:
103 |         print("\n*** No instances to display *** \n")
104 |     else:
105 |         assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
106 | 
107 |     # If no axis is passed, create one and automatically call show()
108 |     auto_show = False
109 |     if not ax:
110 |         _, ax = plt.subplots(1, figsize=figsize)
111 |         auto_show = True
112 | 
113 |     # Generate random colors
114 |     colors = colors or random_colors(N)
115 | 
116 |     # Show area outside image boundaries.
117 |     height, width = image.shape[:2]
118 |     ax.set_ylim(height + 10, -10)
119 |     ax.set_xlim(-10, width + 10)
120 |     ax.axis('off')
121 |     ax.set_title(title)
122 | 
123 |     masked_image = image.astype(np.uint32).copy()
124 |     for i in range(N):
125 |         color = colors[i]
126 | 
127 |         # Bounding box
128 |         if not np.any(boxes[i]):
129 |             # Skip this instance. Has no bbox. Likely lost in image cropping.
130 |             continue
131 |         y1, x1, y2, x2 = boxes[i]
132 |         if show_bbox:
133 |             p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
134 |                                 alpha=0.7, linestyle="dashed",
135 |                                 edgecolor=color, facecolor='none')
136 |             ax.add_patch(p)
137 | 
138 |         # Label
139 |         if not captions:
140 |             class_id = class_ids[i]
141 |             score = scores[i] if scores is not None else None
142 |             label = class_names[class_id]
143 |             caption = "{} {:.3f}".format(label, score) if score else label
144 |         else:
145 |             caption = captions[i]
146 |         ax.text(x1, y1 + 8, caption,
147 |                 color='w', size=11, backgroundcolor="none")
148 | 
149 |         # Mask
150 |         mask = masks[:, :, i]
151 |         if show_mask:
152 |             masked_image = apply_mask(masked_image, mask, color)
153 | 
154 |         # Mask Polygon
155 |         # Pad to ensure proper polygons for masks that touch image edges.
156 |         padded_mask = np.zeros(
157 |             (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
158 |         padded_mask[1:-1, 1:-1] = mask
159 |         contours = find_contours(padded_mask, 0.5)
160 |         for verts in contours:
161 |             # Subtract the padding and flip (y, x) to (x, y)
162 |             verts = np.fliplr(verts) - 1
163 |             p = Polygon(verts, facecolor="none", edgecolor=color)
164 |             ax.add_patch(p)
165 |     ax.imshow(masked_image.astype(np.uint8))
166 |     if auto_show:
167 |         plt.show()
168 | 
169 | 
170 | def display_differences(image,
171 |                         gt_box, gt_class_id, gt_mask,
172 |                         pred_box, pred_class_id, pred_score, pred_mask,
173 |                         class_names, title="", ax=None,
174 |                         show_mask=True, show_box=True,
175 |                         iou_threshold=0.5, score_threshold=0.5):
176 |     """Display ground truth and prediction instances on the same image."""
177 |     # Match predictions to ground truth
178 |     gt_match, pred_match, overlaps = utils.compute_matches(
179 |         gt_box, gt_class_id, gt_mask,
180 |         pred_box, pred_class_id, pred_score, pred_mask,
181 |         iou_threshold=iou_threshold, score_threshold=score_threshold)
182 |     # Ground truth = green. Predictions = red
183 |     colors = [(0, 1, 0, .8)] * len(gt_match)\
184 |            + [(1, 0, 0, 1)] * len(pred_match)
185 |     # Concatenate GT and predictions
186 |     class_ids = np.concatenate([gt_class_id, pred_class_id])
187 |     scores = np.concatenate([np.zeros([len(gt_match)]), pred_score])
188 |     boxes = np.concatenate([gt_box, pred_box])
189 |     masks = np.concatenate([gt_mask, pred_mask], axis=-1)
190 |     # Captions per instance show score/IoU
191 |     captions = ["" for m in gt_match] + ["{:.2f} / {:.2f}".format(
192 |         pred_score[i],
193 |         (overlaps[i, int(pred_match[i])]
194 |             if pred_match[i] > -1 else overlaps[i].max()))
195 |             for i in range(len(pred_match))]
196 |     # Set title if not provided
197 |     title = title or "Ground Truth and Detections\n GT=green, pred=red, captions: score/IoU"
198 |     # Display
199 |     display_instances(
200 |         image,
201 |         boxes, masks, class_ids,
202 |         class_names, scores, ax=ax,
203 |         show_bbox=show_box, show_mask=show_mask,
204 |         colors=colors, captions=captions,
205 |         title=title)
206 | 
207 | 
208 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10):
209 |     """
210 |     anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates.
211 |     proposals: [n, 4] the same anchors but refined to fit objects better.
212 |     """
213 |     masked_image = image.copy()
214 | 
215 |     # Pick random anchors in case there are too many.
216 |     ids = np.arange(rois.shape[0], dtype=np.int32)
217 |     ids = np.random.choice(
218 |         ids, limit, replace=False) if ids.shape[0] > limit else ids
219 | 
220 |     fig, ax = plt.subplots(1, figsize=(12, 12))
221 |     if rois.shape[0] > limit:
222 |         plt.title("Showing {} random ROIs out of {}".format(
223 |             len(ids), rois.shape[0]))
224 |     else:
225 |         plt.title("{} ROIs".format(len(ids)))
226 | 
227 |     # Show area outside image boundaries.
228 |     ax.set_ylim(image.shape[0] + 20, -20)
229 |     ax.set_xlim(-50, image.shape[1] + 20)
230 |     ax.axis('off')
231 | 
232 |     for i, id in enumerate(ids):
233 |         color = np.random.rand(3)
234 |         class_id = class_ids[id]
235 |         # ROI
236 |         y1, x1, y2, x2 = rois[id]
237 |         p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
238 |                               edgecolor=color if class_id else "gray",
239 |                               facecolor='none', linestyle="dashed")
240 |         ax.add_patch(p)
241 |         # Refined ROI
242 |         if class_id:
243 |             ry1, rx1, ry2, rx2 = refined_rois[id]
244 |             p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
245 |                                   edgecolor=color, facecolor='none')
246 |             ax.add_patch(p)
247 |             # Connect the top-left corners of the anchor and proposal for easy visualization
248 |             ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
249 | 
250 |             # Label
251 |             label = class_names[class_id]
252 |             ax.text(rx1, ry1 + 8, "{}".format(label),
253 |                     color='w', size=11, backgroundcolor="none")
254 | 
255 |             # Mask
256 |             m = utils.unmold_mask(mask[id], rois[id]
257 |                                   [:4].astype(np.int32), image.shape)
258 |             masked_image = apply_mask(masked_image, m, color)
259 | 
260 |     ax.imshow(masked_image)
261 | 
262 |     # Print stats
263 |     print("Positive ROIs: ", class_ids[class_ids > 0].shape[0])
264 |     print("Negative ROIs: ", class_ids[class_ids == 0].shape[0])
265 |     print("Positive Ratio: {:.2f}".format(
266 |         class_ids[class_ids > 0].shape[0] / class_ids.shape[0]))
267 | 
268 | 
269 | # TODO: Replace with matplotlib equivalent?
270 | def draw_box(image, box, color):
271 |     """Draw 3-pixel width bounding boxes on the given image array.
272 |     color: list of 3 int values for RGB.
273 |     """
274 |     y1, x1, y2, x2 = box
275 |     image[y1:y1 + 2, x1:x2] = color
276 |     image[y2:y2 + 2, x1:x2] = color
277 |     image[y1:y2, x1:x1 + 2] = color
278 |     image[y1:y2, x2:x2 + 2] = color
279 |     return image
280 | 
281 | 
282 | def display_top_masks(image, mask, class_ids, class_names, limit=4):
283 |     """Display the given image and the top few class masks."""
284 |     to_display = []
285 |     titles = []
286 |     to_display.append(image)
287 |     titles.append("H x W={}x{}".format(image.shape[0], image.shape[1]))
288 |     # Pick top prominent classes in this image
289 |     unique_class_ids = np.unique(class_ids)
290 |     mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]])
291 |                  for i in unique_class_ids]
292 |     top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area),
293 |                                     key=lambda r: r[1], reverse=True) if v[1] > 0]
294 |     # Generate images and titles
295 |     for i in range(limit):
296 |         class_id = top_ids[i] if i < len(top_ids) else -1
297 |         # Pull masks of instances belonging to the same class.
298 |         m = mask[:, :, np.where(class_ids == class_id)[0]]
299 |         m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1)
300 |         to_display.append(m)
301 |         titles.append(class_names[class_id] if class_id != -1 else "-")
302 |     display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r")
303 | 
304 | 
305 | def plot_precision_recall(AP, precisions, recalls):
306 |     """Draw the precision-recall curve.
307 | 
308 |     AP: Average precision at IoU >= 0.5
309 |     precisions: list of precision values
310 |     recalls: list of recall values
311 |     """
312 |     # Plot the Precision-Recall curve
313 |     _, ax = plt.subplots(1)
314 |     ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP))
315 |     ax.set_ylim(0, 1.1)
316 |     ax.set_xlim(0, 1.1)
317 |     _ = ax.plot(recalls, precisions)
318 | 
319 | 
320 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores,
321 |                   overlaps, class_names, threshold=0.5):
322 |     """Draw a grid showing how ground truth objects are classified.
323 |     gt_class_ids: [N] int. Ground truth class IDs
324 |     pred_class_id: [N] int. Predicted class IDs
325 |     pred_scores: [N] float. The probability scores of predicted classes
326 |     overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictions and GT boxes.
327 |     class_names: list of all class names in the dataset
328 |     threshold: Float. The prediction probability required to predict a class
329 |     """
330 |     gt_class_ids = gt_class_ids[gt_class_ids != 0]
331 |     pred_class_ids = pred_class_ids[pred_class_ids != 0]
332 | 
333 |     plt.figure(figsize=(12, 10))
334 |     plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues)
335 |     plt.yticks(np.arange(len(pred_class_ids)),
336 |                ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i])
337 |                 for i, id in enumerate(pred_class_ids)])
338 |     plt.xticks(np.arange(len(gt_class_ids)),
339 |                [class_names[int(id)] for id in gt_class_ids], rotation=90)
340 | 
341 |     thresh = overlaps.max() / 2.
342 |     for i, j in itertools.product(range(overlaps.shape[0]),
343 |                                   range(overlaps.shape[1])):
344 |         text = ""
345 |         if overlaps[i, j] > threshold:
346 |             text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong"
347 |         color = ("white" if overlaps[i, j] > thresh
348 |                  else "black" if overlaps[i, j] > 0
349 |                  else "grey")
350 |         plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text),
351 |                  horizontalalignment="center", verticalalignment="center",
352 |                  fontsize=9, color=color)
353 | 
354 |     plt.tight_layout()
355 |     plt.xlabel("Ground Truth")
356 |     plt.ylabel("Predictions")
357 | 
358 | 
359 | def draw_boxes(image, boxes=None, refined_boxes=None,
360 |                masks=None, captions=None, visibilities=None,
361 |                title="", ax=None):
362 |     """Draw bounding boxes and segmentation masks with different
363 |     customizations.
364 | 
365 |     boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates.
366 |     refined_boxes: Like boxes, but draw with solid lines to show
367 |         that they're the result of refining 'boxes'.
368 |     masks: [N, height, width]
369 |     captions: List of N titles to display on each box
370 |     visibilities: (optional) List of values of 0, 1, or 2. Determine how
371 |         prominent each bounding box should be.
372 |     title: An optional title to show over the image
373 |     ax: (optional) Matplotlib axis to draw on.
374 |     """
375 |     # Number of boxes
376 |     assert boxes is not None or refined_boxes is not None
377 |     N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0]
378 | 
379 |     # Matplotlib Axis
380 |     if not ax:
381 |         _, ax = plt.subplots(1, figsize=(12, 12))
382 | 
383 |     # Generate random colors
384 |     colors = random_colors(N)
385 | 
386 |     # Show area outside image boundaries.
387 |     margin = image.shape[0] // 10
388 |     ax.set_ylim(image.shape[0] + margin, -margin)
389 |     ax.set_xlim(-margin, image.shape[1] + margin)
390 |     ax.axis('off')
391 | 
392 |     ax.set_title(title)
393 | 
394 |     masked_image = image.astype(np.uint32).copy()
395 |     for i in range(N):
396 |         # Box visibility
397 |         visibility = visibilities[i] if visibilities is not None else 1
398 |         if visibility == 0:
399 |             color = "gray"
400 |             style = "dotted"
401 |             alpha = 0.5
402 |         elif visibility == 1:
403 |             color = colors[i]
404 |             style = "dotted"
405 |             alpha = 1
406 |         elif visibility == 2:
407 |             color = colors[i]
408 |             style = "solid"
409 |             alpha = 1
410 | 
411 |         # Boxes
412 |         if boxes is not None:
413 |             if not np.any(boxes[i]):
414 |                 # Skip this instance. Has no bbox. Likely lost in cropping.
415 |                 continue
416 |             y1, x1, y2, x2 = boxes[i]
417 |             p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
418 |                                   alpha=alpha, linestyle=style,
419 |                                   edgecolor=color, facecolor='none')
420 |             ax.add_patch(p)
421 | 
422 |         # Refined boxes
423 |         if refined_boxes is not None and visibility > 0:
424 |             ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32)
425 |             p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
426 |                                   edgecolor=color, facecolor='none')
427 |             ax.add_patch(p)
428 |             # Connect the top-left corners of the anchor and proposal
429 |             if boxes is not None:
430 |                 ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
431 | 
432 |         # Captions
433 |         if captions is not None:
434 |             caption = captions[i]
435 |             # If there are refined boxes, display captions on them
436 |             if refined_boxes is not None:
437 |                 y1, x1, y2, x2 = ry1, rx1, ry2, rx2
438 |             ax.text(x1, y1, caption, size=11, verticalalignment='top',
439 |                     color='w', backgroundcolor="none",
440 |                     bbox={'facecolor': color, 'alpha': 0.5,
441 |                           'pad': 2, 'edgecolor': 'none'})
442 | 
443 |         # Masks
444 |         if masks is not None:
445 |             mask = masks[:, :, i]
446 |             masked_image = apply_mask(masked_image, mask, color)
447 |             # Mask Polygon
448 |             # Pad to ensure proper polygons for masks that touch image edges.
449 |             padded_mask = np.zeros(
450 |                 (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
451 |             padded_mask[1:-1, 1:-1] = mask
452 |             contours = find_contours(padded_mask, 0.5)
453 |             for verts in contours:
454 |                 # Subtract the padding and flip (y, x) to (x, y)
455 |                 verts = np.fliplr(verts) - 1
456 |                 p = Polygon(verts, facecolor="none", edgecolor=color)
457 |                 ax.add_patch(p)
458 |     ax.imshow(masked_image.astype(np.uint8))
459 | 
460 | 
461 | def display_table(table):
462 |     """Display values in a table format.
463 |     table: an iterable of rows, and each row is an iterable of values.
464 |     """
465 |     html = ""
466 |     for row in table:
467 |         row_html = ""
468 |         for col in row:
469 |             row_html += "<td>{:40}</td>".format(str(col))
470 |         html += "<tr>" + row_html + "</tr>"
471 |     html = "<table>" + html + "</table>"
472 |     IPython.display.display(IPython.display.HTML(html))
473 | 
474 | 
475 | def display_weight_stats(model):
476 |     """Scans all the weights in the model and returns a list of tuples
477 |     that contain stats about each weight.
478 |     """
479 |     layers = model.get_trainable_layers()
480 |     table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]]
481 |     for l in layers:
482 |         weight_values = l.get_weights()  # list of Numpy arrays
483 |         weight_tensors = l.weights  # list of TF tensors
484 |         for i, w in enumerate(weight_values):
485 |             weight_name = weight_tensors[i].name
486 |             # Detect problematic layers. Exclude biases of conv layers.
487 |             alert = ""
488 |             if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1):
489 |                 alert += "<span style='color:red'>*** dead?</span>"
490 |             if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000:
491 |                 alert += "<span style='color:red'>*** Overflow?</span>"
492 |             # Add row
493 |             table.append([
494 |                 weight_name + alert,
495 |                 str(w.shape),
496 |                 "{:+9.4f}".format(w.min()),
497 |                 "{:+10.4f}".format(w.max()),
498 |                 "{:+9.4f}".format(w.std()),
499 |             ])
500 |     display_table(table)
501 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | scipy
 3 | Pillow
 4 | cython
 5 | matplotlib
 6 | scikit-image
 7 | tensorflow>=1.3.0
 8 | keras>=2.0.8
 9 | opencv-python
10 | h5py
11 | imgaug
12 | IPython[all]


--------------------------------------------------------------------------------
/samples/README.md:
--------------------------------------------------------------------------------
1 | ﻿# Mask R-CNN for Object Detection and Segmentation


--------------------------------------------------------------------------------
/samples/balloon/README.md:
--------------------------------------------------------------------------------
 1 | # Color Splash Example
 2 | 
 3 | This is an example showing the use of Mask RCNN in a real application.
 4 | We train the model to detect balloons only, and then we use the generated 
 5 | masks to keep balloons in color while changing the rest of the image to
 6 | grayscale.
 7 | 
 8 | 
 9 | [This blog post](https://engineering.matterport.com/splash-of-color-instance-segmentation-with-mask-r-cnn-and-tensorflow-7c761e238b46) describes this sample in more detail.
10 | 
11 | ![Balloon Color Splash](/assets/balloon_color_splash.gif)
12 | 
13 | 
14 | ## Installation
15 | From the [Releases page](https://github.com/matterport/Mask_RCNN/releases) page:
16 | 1. Download `mask_rcnn_balloon.h5`. Save it in the root directory of the repo (the `mask_rcnn` directory).
17 | 2. Download `balloon_dataset.zip`. Expand it such that it's in the path `mask_rcnn/datasets/balloon/`.
18 | 
19 | ## Apply color splash using the provided weights
20 | Apply splash effect on an image:
21 | 
22 | ```bash
23 | python3 balloon.py splash --weights=/path/to/mask_rcnn/mask_rcnn_balloon.h5 --image=<file name or URL>
24 | ```
25 | 
26 | Apply splash effect on a video. Requires OpenCV 3.2+:
27 | 
28 | ```bash
29 | python3 balloon.py splash --weights=/path/to/mask_rcnn/mask_rcnn_balloon.h5 --video=<file name or URL>
30 | ```
31 | 
32 | 
33 | ## Run Jupyter notebooks
34 | Open the `inspect_balloon_data.ipynb` or `inspect_balloon_model.ipynb` Jupter notebooks. You can use these notebooks to explore the dataset and run through the detection pipelie step by step.
35 | 
36 | ## Train the Balloon model
37 | 
38 | Train a new model starting from pre-trained COCO weights
39 | ```
40 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=coco
41 | ```
42 | 
43 | Resume training a model that you had trained earlier
44 | ```
45 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=last
46 | ```
47 | 
48 | Train a new model starting from ImageNet weights
49 | ```
50 | python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=imagenet
51 | ```
52 | 
53 | The code in `balloon.py` is set to train for 3K steps (30 epochs of 100 steps each), and using a batch size of 2. 
54 | Update the schedule to fit your needs.
55 | 


--------------------------------------------------------------------------------
/samples/balloon/balloon.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Train on the toy Balloon dataset and implement color splash effect.
  4 | 
  5 | Copyright (c) 2018 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | 
  9 | ------------------------------------------------------------
 10 | 
 11 | Usage: import the module (see Jupyter notebooks for examples), or run from
 12 |        the command line as such:
 13 | 
 14 |     # Train a new model starting from pre-trained COCO weights
 15 |     python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=coco
 16 | 
 17 |     # Resume training a model that you had trained earlier
 18 |     python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=last
 19 | 
 20 |     # Train a new model starting from ImageNet weights
 21 |     python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=imagenet
 22 | 
 23 |     # Apply color splash to an image
 24 |     python3 balloon.py splash --weights=/path/to/weights/file.h5 --image=<URL or path to file>
 25 | 
 26 |     # Apply color splash to video using the last weights you trained
 27 |     python3 balloon.py splash --weights=last --video=<URL or path to file>
 28 | """
 29 | 
 30 | import os
 31 | import sys
 32 | import json
 33 | import datetime
 34 | import numpy as np
 35 | import skimage.draw
 36 | 
 37 | # Root directory of the project
 38 | ROOT_DIR = os.path.abspath("../../")
 39 | 
 40 | # Import Mask RCNN
 41 | sys.path.append(ROOT_DIR)  # To find local version of the library
 42 | from mrcnn.config import Config
 43 | from mrcnn import model as modellib, utils
 44 | 
 45 | # Path to trained weights file
 46 | COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
 47 | 
 48 | # Directory to save logs and model checkpoints, if not provided
 49 | # through the command line argument --logs
 50 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
 51 | 
 52 | ############################################################
 53 | #  Configurations
 54 | ############################################################
 55 | 
 56 | 
 57 | class BalloonConfig(Config):
 58 |     """Configuration for training on the toy  dataset.
 59 |     Derives from the base Config class and overrides some values.
 60 |     """
 61 |     # Give the configuration a recognizable name
 62 |     NAME = "balloon"
 63 | 
 64 |     # We use a GPU with 12GB memory, which can fit two images.
 65 |     # Adjust down if you use a smaller GPU.
 66 |     IMAGES_PER_GPU = 2
 67 | 
 68 |     # Number of classes (including background)
 69 |     NUM_CLASSES = 1 + 1  # Background + balloon
 70 | 
 71 |     # Number of training steps per epoch
 72 |     STEPS_PER_EPOCH = 100
 73 | 
 74 |     # Skip detections with < 90% confidence
 75 |     DETECTION_MIN_CONFIDENCE = 0.9
 76 | 
 77 | 
 78 | ############################################################
 79 | #  Dataset
 80 | ############################################################
 81 | 
 82 | class BalloonDataset(utils.Dataset):
 83 | 
 84 |     def load_balloon(self, dataset_dir, subset):
 85 |         """Load a subset of the Balloon dataset.
 86 |         dataset_dir: Root directory of the dataset.
 87 |         subset: Subset to load: train or val
 88 |         """
 89 |         # Add classes. We have only one class to add.
 90 |         self.add_class("balloon", 1, "balloon")
 91 | 
 92 |         # Train or validation dataset?
 93 |         assert subset in ["train", "val"]
 94 |         dataset_dir = os.path.join(dataset_dir, subset)
 95 | 
 96 |         # Load annotations
 97 |         # VGG Image Annotator (up to version 1.6) saves each image in the form:
 98 |         # { 'filename': '28503151_5b5b7ec140_b.jpg',
 99 |         #   'regions': {
100 |         #       '0': {
101 |         #           'region_attributes': {},
102 |         #           'shape_attributes': {
103 |         #               'all_points_x': [...],
104 |         #               'all_points_y': [...],
105 |         #               'name': 'polygon'}},
106 |         #       ... more regions ...
107 |         #   },
108 |         #   'size': 100202
109 |         # }
110 |         # We mostly care about the x and y coordinates of each region
111 |         # Note: In VIA 2.0, regions was changed from a dict to a list.
112 |         annotations = json.load(open(os.path.join(dataset_dir, "via_region_data.json")))
113 |         annotations = list(annotations.values())  # don't need the dict keys
114 | 
115 |         # The VIA tool saves images in the JSON even if they don't have any
116 |         # annotations. Skip unannotated images.
117 |         annotations = [a for a in annotations if a['regions']]
118 | 
119 |         # Add images
120 |         for a in annotations:
121 |             # Get the x, y coordinaets of points of the polygons that make up
122 |             # the outline of each object instance. These are stores in the
123 |             # shape_attributes (see json format above)
124 |             # The if condition is needed to support VIA versions 1.x and 2.x.
125 |             if type(a['regions']) is dict:
126 |                 polygons = [r['shape_attributes'] for r in a['regions'].values()]
127 |             else:
128 |                 polygons = [r['shape_attributes'] for r in a['regions']] 
129 | 
130 |             # load_mask() needs the image size to convert polygons to masks.
131 |             # Unfortunately, VIA doesn't include it in JSON, so we must read
132 |             # the image. This is only managable since the dataset is tiny.
133 |             image_path = os.path.join(dataset_dir, a['filename'])
134 |             image = skimage.io.imread(image_path)
135 |             height, width = image.shape[:2]
136 | 
137 |             self.add_image(
138 |                 "balloon",
139 |                 image_id=a['filename'],  # use file name as a unique image id
140 |                 path=image_path,
141 |                 width=width, height=height,
142 |                 polygons=polygons)
143 | 
144 |     def load_mask(self, image_id):
145 |         """Generate instance masks for an image.
146 |        Returns:
147 |         masks: A bool array of shape [height, width, instance count] with
148 |             one mask per instance.
149 |         class_ids: a 1D array of class IDs of the instance masks.
150 |         """
151 |         # If not a balloon dataset image, delegate to parent class.
152 |         image_info = self.image_info[image_id]
153 |         if image_info["source"] != "balloon":
154 |             return super(self.__class__, self).load_mask(image_id)
155 | 
156 |         # Convert polygons to a bitmap mask of shape
157 |         # [height, width, instance_count]
158 |         info = self.image_info[image_id]
159 |         mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
160 |                         dtype=np.uint8)
161 |         for i, p in enumerate(info["polygons"]):
162 |             # Get indexes of pixels inside the polygon and set them to 1
163 |             rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
164 |             mask[rr, cc, i] = 1
165 | 
166 |         # Return mask, and array of class IDs of each instance. Since we have
167 |         # one class ID only, we return an array of 1s
168 |         return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32)
169 | 
170 |     def image_reference(self, image_id):
171 |         """Return the path of the image."""
172 |         info = self.image_info[image_id]
173 |         if info["source"] == "balloon":
174 |             return info["path"]
175 |         else:
176 |             super(self.__class__, self).image_reference(image_id)
177 | 
178 | 
179 | def train(model):
180 |     """Train the model."""
181 |     # Training dataset.
182 |     dataset_train = BalloonDataset()
183 |     dataset_train.load_balloon(args.dataset, "train")
184 |     dataset_train.prepare()
185 | 
186 |     # Validation dataset
187 |     dataset_val = BalloonDataset()
188 |     dataset_val.load_balloon(args.dataset, "val")
189 |     dataset_val.prepare()
190 | 
191 |     # *** This training schedule is an example. Update to your needs ***
192 |     # Since we're using a very small dataset, and starting from
193 |     # COCO trained weights, we don't need to train too long. Also,
194 |     # no need to train all layers, just the heads should do it.
195 |     print("Training network heads")
196 |     model.train(dataset_train, dataset_val,
197 |                 learning_rate=config.LEARNING_RATE,
198 |                 epochs=30,
199 |                 layers='heads')
200 | 
201 | 
202 | def color_splash(image, mask):
203 |     """Apply color splash effect.
204 |     image: RGB image [height, width, 3]
205 |     mask: instance segmentation mask [height, width, instance count]
206 | 
207 |     Returns result image.
208 |     """
209 |     # Make a grayscale copy of the image. The grayscale copy still
210 |     # has 3 RGB channels, though.
211 |     gray = skimage.color.gray2rgb(skimage.color.rgb2gray(image)) * 255
212 |     # Copy color pixels from the original color image where mask is set
213 |     if mask.shape[-1] > 0:
214 |         # We're treating all instances as one, so collapse the mask into one layer
215 |         mask = (np.sum(mask, -1, keepdims=True) >= 1)
216 |         splash = np.where(mask, image, gray).astype(np.uint8)
217 |     else:
218 |         splash = gray.astype(np.uint8)
219 |     return splash
220 | 
221 | 
222 | def detect_and_color_splash(model, image_path=None, video_path=None):
223 |     assert image_path or video_path
224 | 
225 |     # Image or video?
226 |     if image_path:
227 |         # Run model detection and generate the color splash effect
228 |         print("Running on {}".format(args.image))
229 |         # Read image
230 |         image = skimage.io.imread(args.image)
231 |         # Detect objects
232 |         r = model.detect([image], verbose=1)[0]
233 |         # Color splash
234 |         splash = color_splash(image, r['masks'])
235 |         # Save output
236 |         file_name = "splash_{:%Y%m%dT%H%M%S}.png".format(datetime.datetime.now())
237 |         skimage.io.imsave(file_name, splash)
238 |     elif video_path:
239 |         import cv2
240 |         # Video capture
241 |         vcapture = cv2.VideoCapture(video_path)
242 |         width = int(vcapture.get(cv2.CAP_PROP_FRAME_WIDTH))
243 |         height = int(vcapture.get(cv2.CAP_PROP_FRAME_HEIGHT))
244 |         fps = vcapture.get(cv2.CAP_PROP_FPS)
245 | 
246 |         # Define codec and create video writer
247 |         file_name = "splash_{:%Y%m%dT%H%M%S}.avi".format(datetime.datetime.now())
248 |         vwriter = cv2.VideoWriter(file_name,
249 |                                   cv2.VideoWriter_fourcc(*'MJPG'),
250 |                                   fps, (width, height))
251 | 
252 |         count = 0
253 |         success = True
254 |         while success:
255 |             print("frame: ", count)
256 |             # Read next image
257 |             success, image = vcapture.read()
258 |             if success:
259 |                 # OpenCV returns images as BGR, convert to RGB
260 |                 image = image[..., ::-1]
261 |                 # Detect objects
262 |                 r = model.detect([image], verbose=0)[0]
263 |                 # Color splash
264 |                 splash = color_splash(image, r['masks'])
265 |                 # RGB -> BGR to save image to video
266 |                 splash = splash[..., ::-1]
267 |                 # Add image to video writer
268 |                 vwriter.write(splash)
269 |                 count += 1
270 |         vwriter.release()
271 |     print("Saved to ", file_name)
272 | 
273 | 
274 | ############################################################
275 | #  Training
276 | ############################################################
277 | 
278 | if __name__ == '__main__':
279 |     import argparse
280 | 
281 |     # Parse command line arguments
282 |     parser = argparse.ArgumentParser(
283 |         description='Train Mask R-CNN to detect balloons.')
284 |     parser.add_argument("command",
285 |                         metavar="<command>",
286 |                         help="'train' or 'splash'")
287 |     parser.add_argument('--dataset', required=False,
288 |                         metavar="/path/to/balloon/dataset/",
289 |                         help='Directory of the Balloon dataset')
290 |     parser.add_argument('--weights', required=True,
291 |                         metavar="/path/to/weights.h5",
292 |                         help="Path to weights .h5 file or 'coco'")
293 |     parser.add_argument('--logs', required=False,
294 |                         default=DEFAULT_LOGS_DIR,
295 |                         metavar="/path/to/logs/",
296 |                         help='Logs and checkpoints directory (default=logs/)')
297 |     parser.add_argument('--image', required=False,
298 |                         metavar="path or URL to image",
299 |                         help='Image to apply the color splash effect on')
300 |     parser.add_argument('--video', required=False,
301 |                         metavar="path or URL to video",
302 |                         help='Video to apply the color splash effect on')
303 |     args = parser.parse_args()
304 | 
305 |     # Validate arguments
306 |     if args.command == "train":
307 |         assert args.dataset, "Argument --dataset is required for training"
308 |     elif args.command == "splash":
309 |         assert args.image or args.video,\
310 |                "Provide --image or --video to apply color splash"
311 | 
312 |     print("Weights: ", args.weights)
313 |     print("Dataset: ", args.dataset)
314 |     print("Logs: ", args.logs)
315 | 
316 |     # Configurations
317 |     if args.command == "train":
318 |         config = BalloonConfig()
319 |     else:
320 |         class InferenceConfig(BalloonConfig):
321 |             # Set batch size to 1 since we'll be running inference on
322 |             # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
323 |             GPU_COUNT = 1
324 |             IMAGES_PER_GPU = 1
325 |         config = InferenceConfig()
326 |     config.display()
327 | 
328 |     # Create model
329 |     if args.command == "train":
330 |         model = modellib.MaskRCNN(mode="training", config=config,
331 |                                   model_dir=args.logs)
332 |     else:
333 |         model = modellib.MaskRCNN(mode="inference", config=config,
334 |                                   model_dir=args.logs)
335 | 
336 |     # Select weights file to load
337 |     if args.weights.lower() == "coco":
338 |         weights_path = COCO_WEIGHTS_PATH
339 |         # Download weights file
340 |         if not os.path.exists(weights_path):
341 |             utils.download_trained_weights(weights_path)
342 |     elif args.weights.lower() == "last":
343 |         # Find last trained weights
344 |         weights_path = model.find_last()
345 |     elif args.weights.lower() == "imagenet":
346 |         # Start from ImageNet trained weights
347 |         weights_path = model.get_imagenet_weights()
348 |     else:
349 |         weights_path = args.weights
350 | 
351 |     # Load weights
352 |     print("Loading weights ", weights_path)
353 |     if args.weights.lower() == "coco":
354 |         # Exclude the last layers because they require a matching
355 |         # number of classes
356 |         model.load_weights(weights_path, by_name=True, exclude=[
357 |             "mrcnn_class_logits", "mrcnn_bbox_fc",
358 |             "mrcnn_bbox", "mrcnn_mask"])
359 |     else:
360 |         model.load_weights(weights_path, by_name=True)
361 | 
362 |     # Train or evaluate
363 |     if args.command == "train":
364 |         train(model)
365 |     elif args.command == "splash":
366 |         detect_and_color_splash(model, image_path=args.image,
367 |                                 video_path=args.video)
368 |     else:
369 |         print("'{}' is not recognized. "
370 |               "Use 'train' or 'splash'".format(args.command))
371 | 


--------------------------------------------------------------------------------
/samples/coco/coco.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Configurations and data loading code for MS COCO.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | 
  9 | ------------------------------------------------------------
 10 | 
 11 | Usage: import the module (see Jupyter notebooks for examples), or run from
 12 |        the command line as such:
 13 | 
 14 |     # Train a new model starting from pre-trained COCO weights
 15 |     python3 coco.py train --dataset=/path/to/coco/ --model=coco
 16 | 
 17 |     # Train a new model starting from ImageNet weights. Also auto download COCO dataset
 18 |     python3 coco.py train --dataset=/path/to/coco/ --model=imagenet --download=True
 19 | 
 20 |     # Continue training a model that you had trained earlier
 21 |     python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
 22 | 
 23 |     # Continue training the last model you trained
 24 |     python3 coco.py train --dataset=/path/to/coco/ --model=last
 25 | 
 26 |     # Run COCO evaluatoin on the last model you trained
 27 |     python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
 28 | """
 29 | 
 30 | import os
 31 | import sys
 32 | import time
 33 | import numpy as np
 34 | import imgaug  # https://github.com/aleju/imgaug (pip3 install imgaug)
 35 | 
 36 | # Download and install the Python COCO tools from https://github.com/waleedka/coco
 37 | # That's a fork from the original https://github.com/pdollar/coco with a bug
 38 | # fix for Python 3.
 39 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
 40 | # If the PR is merged then use the original repo.
 41 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3".
 42 | from pycocotools.coco import COCO
 43 | from pycocotools.cocoeval import COCOeval
 44 | from pycocotools import mask as maskUtils
 45 | 
 46 | import zipfile
 47 | import urllib.request
 48 | import shutil
 49 | 
 50 | # Root directory of the project
 51 | ROOT_DIR = os.path.abspath("../../")
 52 | 
 53 | # Import Mask RCNN
 54 | sys.path.append(ROOT_DIR)  # To find local version of the library
 55 | from mrcnn.config import Config
 56 | from mrcnn import model as modellib, utils
 57 | 
 58 | # Path to trained weights file
 59 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
 60 | 
 61 | # Directory to save logs and model checkpoints, if not provided
 62 | # through the command line argument --logs
 63 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
 64 | DEFAULT_DATASET_YEAR = "2014"
 65 | 
 66 | ############################################################
 67 | #  Configurations
 68 | ############################################################
 69 | 
 70 | 
 71 | class CocoConfig(Config):
 72 |     """Configuration for training on MS COCO.
 73 |     Derives from the base Config class and overrides values specific
 74 |     to the COCO dataset.
 75 |     """
 76 |     # Give the configuration a recognizable name
 77 |     NAME = "coco"
 78 | 
 79 |     # We use a GPU with 12GB memory, which can fit two images.
 80 |     # Adjust down if you use a smaller GPU.
 81 |     IMAGES_PER_GPU = 2
 82 | 
 83 |     # Uncomment to train on 8 GPUs (default is 1)
 84 |     # GPU_COUNT = 8
 85 | 
 86 |     # Number of classes (including background)
 87 |     NUM_CLASSES = 1 + 80  # COCO has 80 classes
 88 | 
 89 | 
 90 | ############################################################
 91 | #  Dataset
 92 | ############################################################
 93 | 
 94 | class CocoDataset(utils.Dataset):
 95 |     def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
 96 |                   class_map=None, return_coco=False, auto_download=False):
 97 |         """Load a subset of the COCO dataset.
 98 |         dataset_dir: The root directory of the COCO dataset.
 99 |         subset: What to load (train, val, minival, valminusminival)
100 |         year: What dataset year to load (2014, 2017) as a string, not an integer
101 |         class_ids: If provided, only loads images that have the given classes.
102 |         class_map: TODO: Not implemented yet. Supports maping classes from
103 |             different datasets to the same class ID.
104 |         return_coco: If True, returns the COCO object.
105 |         auto_download: Automatically download and unzip MS-COCO images and annotations
106 |         """
107 | 
108 |         if auto_download is True:
109 |             self.auto_download(dataset_dir, subset, year)
110 | 
111 |         coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
112 |         if subset == "minival" or subset == "valminusminival":
113 |             subset = "val"
114 |         image_dir = "{}/{}{}".format(dataset_dir, subset, year)
115 | 
116 |         # Load all classes or a subset?
117 |         if not class_ids:
118 |             # All classes
119 |             class_ids = sorted(coco.getCatIds())
120 | 
121 |         # All images or a subset?
122 |         if class_ids:
123 |             image_ids = []
124 |             for id in class_ids:
125 |                 image_ids.extend(list(coco.getImgIds(catIds=[id])))
126 |             # Remove duplicates
127 |             image_ids = list(set(image_ids))
128 |         else:
129 |             # All images
130 |             image_ids = list(coco.imgs.keys())
131 | 
132 |         # Add classes
133 |         for i in class_ids:
134 |             self.add_class("coco", i, coco.loadCats(i)[0]["name"])
135 | 
136 |         # Add images
137 |         for i in image_ids:
138 |             self.add_image(
139 |                 "coco", image_id=i,
140 |                 path=os.path.join(image_dir, coco.imgs[i]['file_name']),
141 |                 width=coco.imgs[i]["width"],
142 |                 height=coco.imgs[i]["height"],
143 |                 annotations=coco.loadAnns(coco.getAnnIds(
144 |                     imgIds=[i], catIds=class_ids, iscrowd=None)))
145 |         if return_coco:
146 |             return coco
147 | 
148 |     def auto_download(self, dataDir, dataType, dataYear):
149 |         """Download the COCO dataset/annotations if requested.
150 |         dataDir: The root directory of the COCO dataset.
151 |         dataType: What to load (train, val, minival, valminusminival)
152 |         dataYear: What dataset year to load (2014, 2017) as a string, not an integer
153 |         Note:
154 |             For 2014, use "train", "val", "minival", or "valminusminival"
155 |             For 2017, only "train" and "val" annotations are available
156 |         """
157 | 
158 |         # Setup paths and file names
159 |         if dataType == "minival" or dataType == "valminusminival":
160 |             imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
161 |             imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
162 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
163 |         else:
164 |             imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
165 |             imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
166 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
167 |         # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
168 | 
169 |         # Create main folder if it doesn't exist yet
170 |         if not os.path.exists(dataDir):
171 |             os.makedirs(dataDir)
172 | 
173 |         # Download images if not available locally
174 |         if not os.path.exists(imgDir):
175 |             os.makedirs(imgDir)
176 |             print("Downloading images to " + imgZipFile + " ...")
177 |             with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
178 |                 shutil.copyfileobj(resp, out)
179 |             print("... done downloading.")
180 |             print("Unzipping " + imgZipFile)
181 |             with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
182 |                 zip_ref.extractall(dataDir)
183 |             print("... done unzipping")
184 |         print("Will use images in " + imgDir)
185 | 
186 |         # Setup annotations data paths
187 |         annDir = "{}/annotations".format(dataDir)
188 |         if dataType == "minival":
189 |             annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
190 |             annFile = "{}/instances_minival2014.json".format(annDir)
191 |             annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
192 |             unZipDir = annDir
193 |         elif dataType == "valminusminival":
194 |             annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
195 |             annFile = "{}/instances_valminusminival2014.json".format(annDir)
196 |             annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
197 |             unZipDir = annDir
198 |         else:
199 |             annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
200 |             annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
201 |             annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
202 |             unZipDir = dataDir
203 |         # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
204 | 
205 |         # Download annotations if not available locally
206 |         if not os.path.exists(annDir):
207 |             os.makedirs(annDir)
208 |         if not os.path.exists(annFile):
209 |             if not os.path.exists(annZipFile):
210 |                 print("Downloading zipped annotations to " + annZipFile + " ...")
211 |                 with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
212 |                     shutil.copyfileobj(resp, out)
213 |                 print("... done downloading.")
214 |             print("Unzipping " + annZipFile)
215 |             with zipfile.ZipFile(annZipFile, "r") as zip_ref:
216 |                 zip_ref.extractall(unZipDir)
217 |             print("... done unzipping")
218 |         print("Will use annotations in " + annFile)
219 | 
220 |     def load_mask(self, image_id):
221 |         """Load instance masks for the given image.
222 | 
223 |         Different datasets use different ways to store masks. This
224 |         function converts the different mask format to one format
225 |         in the form of a bitmap [height, width, instances].
226 | 
227 |         Returns:
228 |         masks: A bool array of shape [height, width, instance count] with
229 |             one mask per instance.
230 |         class_ids: a 1D array of class IDs of the instance masks.
231 |         """
232 |         # If not a COCO image, delegate to parent class.
233 |         image_info = self.image_info[image_id]
234 |         if image_info["source"] != "coco":
235 |             return super(CocoDataset, self).load_mask(image_id)
236 | 
237 |         instance_masks = []
238 |         class_ids = []
239 |         annotations = self.image_info[image_id]["annotations"]
240 |         # Build mask of shape [height, width, instance_count] and list
241 |         # of class IDs that correspond to each channel of the mask.
242 |         for annotation in annotations:
243 |             class_id = self.map_source_class_id(
244 |                 "coco.{}".format(annotation['category_id']))
245 |             if class_id:
246 |                 m = self.annToMask(annotation, image_info["height"],
247 |                                    image_info["width"])
248 |                 # Some objects are so small that they're less than 1 pixel area
249 |                 # and end up rounded out. Skip those objects.
250 |                 if m.max() < 1:
251 |                     continue
252 |                 # Is it a crowd? If so, use a negative class ID.
253 |                 if annotation['iscrowd']:
254 |                     # Use negative class ID for crowds
255 |                     class_id *= -1
256 |                     # For crowd masks, annToMask() sometimes returns a mask
257 |                     # smaller than the given dimensions. If so, resize it.
258 |                     if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
259 |                         m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
260 |                 instance_masks.append(m)
261 |                 class_ids.append(class_id)
262 | 
263 |         # Pack instance masks into an array
264 |         if class_ids:
265 |             mask = np.stack(instance_masks, axis=2).astype(np.bool)
266 |             class_ids = np.array(class_ids, dtype=np.int32)
267 |             return mask, class_ids
268 |         else:
269 |             # Call super class to return an empty mask
270 |             return super(CocoDataset, self).load_mask(image_id)
271 | 
272 |     def image_reference(self, image_id):
273 |         """Return a link to the image in the COCO Website."""
274 |         info = self.image_info[image_id]
275 |         if info["source"] == "coco":
276 |             return "http://cocodataset.org/#explore?id={}".format(info["id"])
277 |         else:
278 |             super(CocoDataset, self).image_reference(image_id)
279 | 
280 |     # The following two functions are from pycocotools with a few changes.
281 | 
282 |     def annToRLE(self, ann, height, width):
283 |         """
284 |         Convert annotation which can be polygons, uncompressed RLE to RLE.
285 |         :return: binary mask (numpy 2D array)
286 |         """
287 |         segm = ann['segmentation']
288 |         if isinstance(segm, list):
289 |             # polygon -- a single object might consist of multiple parts
290 |             # we merge all parts into one mask rle code
291 |             rles = maskUtils.frPyObjects(segm, height, width)
292 |             rle = maskUtils.merge(rles)
293 |         elif isinstance(segm['counts'], list):
294 |             # uncompressed RLE
295 |             rle = maskUtils.frPyObjects(segm, height, width)
296 |         else:
297 |             # rle
298 |             rle = ann['segmentation']
299 |         return rle
300 | 
301 |     def annToMask(self, ann, height, width):
302 |         """
303 |         Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
304 |         :return: binary mask (numpy 2D array)
305 |         """
306 |         rle = self.annToRLE(ann, height, width)
307 |         m = maskUtils.decode(rle)
308 |         return m
309 | 
310 | 
311 | ############################################################
312 | #  COCO Evaluation
313 | ############################################################
314 | 
315 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
316 |     """Arrange resutls to match COCO specs in http://cocodataset.org/#format
317 |     """
318 |     # If no results, return an empty list
319 |     if rois is None:
320 |         return []
321 | 
322 |     results = []
323 |     for image_id in image_ids:
324 |         # Loop through detections
325 |         for i in range(rois.shape[0]):
326 |             class_id = class_ids[i]
327 |             score = scores[i]
328 |             bbox = np.around(rois[i], 1)
329 |             mask = masks[:, :, i]
330 | 
331 |             result = {
332 |                 "image_id": image_id,
333 |                 "category_id": dataset.get_source_class_id(class_id, "coco"),
334 |                 "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
335 |                 "score": score,
336 |                 "segmentation": maskUtils.encode(np.asfortranarray(mask))
337 |             }
338 |             results.append(result)
339 |     return results
340 | 
341 | 
342 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
343 |     """Runs official COCO evaluation.
344 |     dataset: A Dataset object with valiadtion data
345 |     eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
346 |     limit: if not 0, it's the number of images to use for evaluation
347 |     """
348 |     # Pick COCO images from the dataset
349 |     image_ids = image_ids or dataset.image_ids
350 | 
351 |     # Limit to a subset
352 |     if limit:
353 |         image_ids = image_ids[:limit]
354 | 
355 |     # Get corresponding COCO image IDs.
356 |     coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
357 | 
358 |     t_prediction = 0
359 |     t_start = time.time()
360 | 
361 |     results = []
362 |     for i, image_id in enumerate(image_ids):
363 |         # Load image
364 |         image = dataset.load_image(image_id)
365 | 
366 |         # Run detection
367 |         t = time.time()
368 |         r = model.detect([image], verbose=0)[0]
369 |         t_prediction += (time.time() - t)
370 | 
371 |         # Convert results to COCO format
372 |         # Cast masks to uint8 because COCO tools errors out on bool
373 |         image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
374 |                                            r["rois"], r["class_ids"],
375 |                                            r["scores"],
376 |                                            r["masks"].astype(np.uint8))
377 |         results.extend(image_results)
378 | 
379 |     # Load results. This modifies results with additional attributes.
380 |     coco_results = coco.loadRes(results)
381 | 
382 |     # Evaluate
383 |     cocoEval = COCOeval(coco, coco_results, eval_type)
384 |     cocoEval.params.imgIds = coco_image_ids
385 |     cocoEval.evaluate()
386 |     cocoEval.accumulate()
387 |     cocoEval.summarize()
388 | 
389 |     print("Prediction time: {}. Average {}/image".format(
390 |         t_prediction, t_prediction / len(image_ids)))
391 |     print("Total time: ", time.time() - t_start)
392 | 
393 | 
394 | ############################################################
395 | #  Training
396 | ############################################################
397 | 
398 | 
399 | if __name__ == '__main__':
400 |     import argparse
401 | 
402 |     # Parse command line arguments
403 |     parser = argparse.ArgumentParser(
404 |         description='Train Mask R-CNN on MS COCO.')
405 |     parser.add_argument("command",
406 |                         metavar="<command>",
407 |                         help="'train' or 'evaluate' on MS COCO")
408 |     parser.add_argument('--dataset', required=True,
409 |                         metavar="/path/to/coco/",
410 |                         help='Directory of the MS-COCO dataset')
411 |     parser.add_argument('--year', required=False,
412 |                         default=DEFAULT_DATASET_YEAR,
413 |                         metavar="<year>",
414 |                         help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
415 |     parser.add_argument('--model', required=True,
416 |                         metavar="/path/to/weights.h5",
417 |                         help="Path to weights .h5 file or 'coco'")
418 |     parser.add_argument('--logs', required=False,
419 |                         default=DEFAULT_LOGS_DIR,
420 |                         metavar="/path/to/logs/",
421 |                         help='Logs and checkpoints directory (default=logs/)')
422 |     parser.add_argument('--limit', required=False,
423 |                         default=500,
424 |                         metavar="<image count>",
425 |                         help='Images to use for evaluation (default=500)')
426 |     parser.add_argument('--download', required=False,
427 |                         default=False,
428 |                         metavar="<True|False>",
429 |                         help='Automatically download and unzip MS-COCO files (default=False)',
430 |                         type=bool)
431 |     args = parser.parse_args()
432 |     print("Command: ", args.command)
433 |     print("Model: ", args.model)
434 |     print("Dataset: ", args.dataset)
435 |     print("Year: ", args.year)
436 |     print("Logs: ", args.logs)
437 |     print("Auto Download: ", args.download)
438 | 
439 |     # Configurations
440 |     if args.command == "train":
441 |         config = CocoConfig()
442 |     else:
443 |         class InferenceConfig(CocoConfig):
444 |             # Set batch size to 1 since we'll be running inference on
445 |             # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
446 |             GPU_COUNT = 1
447 |             IMAGES_PER_GPU = 1
448 |             DETECTION_MIN_CONFIDENCE = 0
449 |         config = InferenceConfig()
450 |     config.display()
451 | 
452 |     # Create model
453 |     if args.command == "train":
454 |         model = modellib.MaskRCNN(mode="training", config=config,
455 |                                   model_dir=args.logs)
456 |     else:
457 |         model = modellib.MaskRCNN(mode="inference", config=config,
458 |                                   model_dir=args.logs)
459 | 
460 |     # Select weights file to load
461 |     if args.model.lower() == "coco":
462 |         model_path = COCO_MODEL_PATH
463 |     elif args.model.lower() == "last":
464 |         # Find last trained weights
465 |         model_path = model.find_last()
466 |     elif args.model.lower() == "imagenet":
467 |         # Start from ImageNet trained weights
468 |         model_path = model.get_imagenet_weights()
469 |     else:
470 |         model_path = args.model
471 | 
472 |     # Load weights
473 |     print("Loading weights ", model_path)
474 |     model.load_weights(model_path, by_name=True)
475 | 
476 |     # Train or evaluate
477 |     if args.command == "train":
478 |         # Training dataset. Use the training set and 35K from the
479 |         # validation set, as as in the Mask RCNN paper.
480 |         dataset_train = CocoDataset()
481 |         dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
482 |         if args.year in '2014':
483 |             dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
484 |         dataset_train.prepare()
485 | 
486 |         # Validation dataset
487 |         dataset_val = CocoDataset()
488 |         val_type = "val" if args.year in '2017' else "minival"
489 |         dataset_val.load_coco(args.dataset, val_type, year=args.year, auto_download=args.download)
490 |         dataset_val.prepare()
491 | 
492 |         # Image Augmentation
493 |         # Right/Left flip 50% of the time
494 |         augmentation = imgaug.augmenters.Fliplr(0.5)
495 | 
496 |         # *** This training schedule is an example. Update to your needs ***
497 | 
498 |         # Training - Stage 1
499 |         print("Training network heads")
500 |         model.train(dataset_train, dataset_val,
501 |                     learning_rate=config.LEARNING_RATE,
502 |                     epochs=40,
503 |                     layers='heads',
504 |                     augmentation=augmentation)
505 | 
506 |         # Training - Stage 2
507 |         # Finetune layers from ResNet stage 4 and up
508 |         print("Fine tune Resnet stage 4 and up")
509 |         model.train(dataset_train, dataset_val,
510 |                     learning_rate=config.LEARNING_RATE,
511 |                     epochs=120,
512 |                     layers='4+',
513 |                     augmentation=augmentation)
514 | 
515 |         # Training - Stage 3
516 |         # Fine tune all layers
517 |         print("Fine tune all layers")
518 |         model.train(dataset_train, dataset_val,
519 |                     learning_rate=config.LEARNING_RATE / 10,
520 |                     epochs=160,
521 |                     layers='all',
522 |                     augmentation=augmentation)
523 | 
524 |     elif args.command == "evaluate":
525 |         # Validation dataset
526 |         dataset_val = CocoDataset()
527 |         val_type = "val" if args.year in '2017' else "minival"
528 |         coco = dataset_val.load_coco(args.dataset, val_type, year=args.year, return_coco=True, auto_download=args.download)
529 |         dataset_val.prepare()
530 |         print("Running COCO evaluation on {} images.".format(args.limit))
531 |         evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
532 |     else:
533 |         print("'{}' is not recognized. "
534 |               "Use 'train' or 'evaluate'".format(args.command))
535 | 


--------------------------------------------------------------------------------
/samples/nucleus/README.md:
--------------------------------------------------------------------------------
 1 | # Nuclei Counting and Segmentation
 2 | 
 3 | This sample implements the [2018 Data Science Bowl challenge](https://www.kaggle.com/c/data-science-bowl-2018).
 4 | The goal is to segment individual nuclei in microscopy images.
 5 | The `nucleus.py` file contains the main parts of the code, and the two Jupyter notebooks
 6 | 
 7 | 
 8 | ## Command line Usage
 9 | Train a new model starting from ImageNet weights using `train` dataset (which is `stage1_train` minus validation set)
10 | ```
11 | python3 nucleus.py train --dataset=/path/to/dataset --subset=train --weights=imagenet
12 | ```
13 | 
14 | Train a new model starting from specific weights file using the full `stage1_train` dataset
15 | ```
16 | python3 nucleus.py train --dataset=/path/to/dataset --subset=stage1_train --weights=/path/to/weights.h5
17 | ```
18 | 
19 | Resume training a model that you had trained earlier
20 | ```
21 | python3 nucleus.py train --dataset=/path/to/dataset --subset=train --weights=last
22 | ```
23 | 
24 | Generate submission file from `stage1_test` images
25 | ```
26 | python3 nucleus.py detect --dataset=/path/to/dataset --subset=stage1_test --weights=<last or /path/to/weights.h5>
27 | ```
28 | 
29 | 
30 | ## Jupyter notebooks
31 | Two Jupyter notebooks are provided as well: `inspect_nucleus_data.ipynb` and `inspect_nucleus_model.ipynb`.
32 | They explore the dataset, run stats on it, and go through the detection process step by step.
33 | 


--------------------------------------------------------------------------------
/samples/nucleus/nucleus.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Train on the nuclei segmentation dataset from the
  4 | Kaggle 2018 Data Science Bowl
  5 | https://www.kaggle.com/c/data-science-bowl-2018/
  6 | 
  7 | Licensed under the MIT License (see LICENSE for details)
  8 | Written by Waleed Abdulla
  9 | 
 10 | ------------------------------------------------------------
 11 | 
 12 | Usage: import the module (see Jupyter notebooks for examples), or run from
 13 |        the command line as such:
 14 | 
 15 |     # Train a new model starting from ImageNet weights
 16 |     python3 nucleus.py train --dataset=/path/to/dataset --subset=train --weights=imagenet
 17 | 
 18 |     # Train a new model starting from specific weights file
 19 |     python3 nucleus.py train --dataset=/path/to/dataset --subset=train --weights=/path/to/weights.h5
 20 | 
 21 |     # Resume training a model that you had trained earlier
 22 |     python3 nucleus.py train --dataset=/path/to/dataset --subset=train --weights=last
 23 | 
 24 |     # Generate submission file
 25 |     python3 nucleus.py detect --dataset=/path/to/dataset --subset=train --weights=<last or /path/to/weights.h5>
 26 | """
 27 | 
 28 | # Set matplotlib backend
 29 | # This has to be done before other importa that might
 30 | # set it, but only if we're running in script mode
 31 | # rather than being imported.
 32 | if __name__ == '__main__':
 33 |     import matplotlib
 34 |     # Agg backend runs without a display
 35 |     matplotlib.use('Agg')
 36 |     import matplotlib.pyplot as plt
 37 | 
 38 | import os
 39 | import sys
 40 | import json
 41 | import datetime
 42 | import numpy as np
 43 | import skimage.io
 44 | from imgaug import augmenters as iaa
 45 | 
 46 | # Root directory of the project
 47 | ROOT_DIR = os.path.abspath("../../")
 48 | 
 49 | # Import Mask RCNN
 50 | sys.path.append(ROOT_DIR)  # To find local version of the library
 51 | from mrcnn.config import Config
 52 | from mrcnn import utils
 53 | from mrcnn import model as modellib
 54 | from mrcnn import visualize
 55 | 
 56 | # Path to trained weights file
 57 | COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
 58 | 
 59 | # Directory to save logs and model checkpoints, if not provided
 60 | # through the command line argument --logs
 61 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
 62 | 
 63 | # Results directory
 64 | # Save submission files here
 65 | RESULTS_DIR = os.path.join(ROOT_DIR, "results/nucleus/")
 66 | 
 67 | # The dataset doesn't have a standard train/val split, so I picked
 68 | # a variety of images to surve as a validation set.
 69 | VAL_IMAGE_IDS = [
 70 |     "0c2550a23b8a0f29a7575de8c61690d3c31bc897dd5ba66caec201d201a278c2",
 71 |     "92f31f591929a30e4309ab75185c96ff4314ce0a7ead2ed2c2171897ad1da0c7",
 72 |     "1e488c42eb1a54a3e8412b1f12cde530f950f238d71078f2ede6a85a02168e1f",
 73 |     "c901794d1a421d52e5734500c0a2a8ca84651fb93b19cec2f411855e70cae339",
 74 |     "8e507d58f4c27cd2a82bee79fe27b069befd62a46fdaed20970a95a2ba819c7b",
 75 |     "60cb718759bff13f81c4055a7679e81326f78b6a193a2d856546097c949b20ff",
 76 |     "da5f98f2b8a64eee735a398de48ed42cd31bf17a6063db46a9e0783ac13cd844",
 77 |     "9ebcfaf2322932d464f15b5662cae4d669b2d785b8299556d73fffcae8365d32",
 78 |     "1b44d22643830cd4f23c9deadb0bd499fb392fb2cd9526d81547d93077d983df",
 79 |     "97126a9791f0c1176e4563ad679a301dac27c59011f579e808bbd6e9f4cd1034",
 80 |     "e81c758e1ca177b0942ecad62cf8d321ffc315376135bcbed3df932a6e5b40c0",
 81 |     "f29fd9c52e04403cd2c7d43b6fe2479292e53b2f61969d25256d2d2aca7c6a81",
 82 |     "0ea221716cf13710214dcd331a61cea48308c3940df1d28cfc7fd817c83714e1",
 83 |     "3ab9cab6212fabd723a2c5a1949c2ded19980398b56e6080978e796f45cbbc90",
 84 |     "ebc18868864ad075548cc1784f4f9a237bb98335f9645ee727dac8332a3e3716",
 85 |     "bb61fc17daf8bdd4e16fdcf50137a8d7762bec486ede9249d92e511fcb693676",
 86 |     "e1bcb583985325d0ef5f3ef52957d0371c96d4af767b13e48102bca9d5351a9b",
 87 |     "947c0d94c8213ac7aaa41c4efc95d854246550298259cf1bb489654d0e969050",
 88 |     "cbca32daaae36a872a11da4eaff65d1068ff3f154eedc9d3fc0c214a4e5d32bd",
 89 |     "f4c4db3df4ff0de90f44b027fc2e28c16bf7e5c75ea75b0a9762bbb7ac86e7a3",
 90 |     "4193474b2f1c72f735b13633b219d9cabdd43c21d9c2bb4dfc4809f104ba4c06",
 91 |     "f73e37957c74f554be132986f38b6f1d75339f636dfe2b681a0cf3f88d2733af",
 92 |     "a4c44fc5f5bf213e2be6091ccaed49d8bf039d78f6fbd9c4d7b7428cfcb2eda4",
 93 |     "cab4875269f44a701c5e58190a1d2f6fcb577ea79d842522dcab20ccb39b7ad2",
 94 |     "8ecdb93582b2d5270457b36651b62776256ade3aaa2d7432ae65c14f07432d49",
 95 | ]
 96 | 
 97 | 
 98 | ############################################################
 99 | #  Configurations
100 | ############################################################
101 | 
102 | class NucleusConfig(Config):
103 |     """Configuration for training on the nucleus segmentation dataset."""
104 |     # Give the configuration a recognizable name
105 |     NAME = "nucleus"
106 | 
107 |     # Adjust depending on your GPU memory
108 |     IMAGES_PER_GPU = 6
109 | 
110 |     # Number of classes (including background)
111 |     NUM_CLASSES = 1 + 1  # Background + nucleus
112 | 
113 |     # Number of training and validation steps per epoch
114 |     STEPS_PER_EPOCH = (657 - len(VAL_IMAGE_IDS)) // IMAGES_PER_GPU
115 |     VALIDATION_STEPS = max(1, len(VAL_IMAGE_IDS) // IMAGES_PER_GPU)
116 | 
117 |     # Don't exclude based on confidence. Since we have two classes
118 |     # then 0.5 is the minimum anyway as it picks between nucleus and BG
119 |     DETECTION_MIN_CONFIDENCE = 0
120 | 
121 |     # Backbone network architecture
122 |     # Supported values are: resnet50, resnet101
123 |     BACKBONE = "resnet50"
124 | 
125 |     # Input image resizing
126 |     # Random crops of size 512x512
127 |     IMAGE_RESIZE_MODE = "crop"
128 |     IMAGE_MIN_DIM = 512
129 |     IMAGE_MAX_DIM = 512
130 |     IMAGE_MIN_SCALE = 2.0
131 | 
132 |     # Length of square anchor side in pixels
133 |     RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)
134 | 
135 |     # ROIs kept after non-maximum supression (training and inference)
136 |     POST_NMS_ROIS_TRAINING = 1000
137 |     POST_NMS_ROIS_INFERENCE = 2000
138 | 
139 |     # Non-max suppression threshold to filter RPN proposals.
140 |     # You can increase this during training to generate more propsals.
141 |     RPN_NMS_THRESHOLD = 0.9
142 | 
143 |     # How many anchors per image to use for RPN training
144 |     RPN_TRAIN_ANCHORS_PER_IMAGE = 64
145 | 
146 |     # Image mean (RGB)
147 |     MEAN_PIXEL = np.array([43.53, 39.56, 48.22])
148 | 
149 |     # If enabled, resizes instance masks to a smaller size to reduce
150 |     # memory load. Recommended when using high-resolution images.
151 |     USE_MINI_MASK = True
152 |     MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
153 | 
154 |     # Number of ROIs per image to feed to classifier/mask heads
155 |     # The Mask RCNN paper uses 512 but often the RPN doesn't generate
156 |     # enough positive proposals to fill this and keep a positive:negative
157 |     # ratio of 1:3. You can increase the number of proposals by adjusting
158 |     # the RPN NMS threshold.
159 |     TRAIN_ROIS_PER_IMAGE = 128
160 | 
161 |     # Maximum number of ground truth instances to use in one image
162 |     MAX_GT_INSTANCES = 200
163 | 
164 |     # Max number of final detections per image
165 |     DETECTION_MAX_INSTANCES = 400
166 | 
167 | 
168 | class NucleusInferenceConfig(NucleusConfig):
169 |     # Set batch size to 1 to run one image at a time
170 |     GPU_COUNT = 1
171 |     IMAGES_PER_GPU = 1
172 |     # Don't resize imager for inferencing
173 |     IMAGE_RESIZE_MODE = "pad64"
174 |     # Non-max suppression threshold to filter RPN proposals.
175 |     # You can increase this during training to generate more propsals.
176 |     RPN_NMS_THRESHOLD = 0.7
177 | 
178 | 
179 | ############################################################
180 | #  Dataset
181 | ############################################################
182 | 
183 | class NucleusDataset(utils.Dataset):
184 | 
185 |     def load_nucleus(self, dataset_dir, subset):
186 |         """Load a subset of the nuclei dataset.
187 | 
188 |         dataset_dir: Root directory of the dataset
189 |         subset: Subset to load. Either the name of the sub-directory,
190 |                 such as stage1_train, stage1_test, ...etc. or, one of:
191 |                 * train: stage1_train excluding validation images
192 |                 * val: validation images from VAL_IMAGE_IDS
193 |         """
194 |         # Add classes. We have one class.
195 |         # Naming the dataset nucleus, and the class nucleus
196 |         self.add_class("nucleus", 1, "nucleus")
197 | 
198 |         # Which subset?
199 |         # "val": use hard-coded list above
200 |         # "train": use data from stage1_train minus the hard-coded list above
201 |         # else: use the data from the specified sub-directory
202 |         assert subset in ["train", "val", "stage1_train", "stage1_test", "stage2_test"]
203 |         subset_dir = "stage1_train" if subset in ["train", "val"] else subset
204 |         dataset_dir = os.path.join(dataset_dir, subset_dir)
205 |         if subset == "val":
206 |             image_ids = VAL_IMAGE_IDS
207 |         else:
208 |             # Get image ids from directory names
209 |             image_ids = next(os.walk(dataset_dir))[1]
210 |             if subset == "train":
211 |                 image_ids = list(set(image_ids) - set(VAL_IMAGE_IDS))
212 | 
213 |         # Add images
214 |         for image_id in image_ids:
215 |             self.add_image(
216 |                 "nucleus",
217 |                 image_id=image_id,
218 |                 path=os.path.join(dataset_dir, image_id, "images/{}.png".format(image_id)))
219 | 
220 |     def load_mask(self, image_id):
221 |         """Generate instance masks for an image.
222 |        Returns:
223 |         masks: A bool array of shape [height, width, instance count] with
224 |             one mask per instance.
225 |         class_ids: a 1D array of class IDs of the instance masks.
226 |         """
227 |         info = self.image_info[image_id]
228 |         # Get mask directory from image path
229 |         mask_dir = os.path.join(os.path.dirname(os.path.dirname(info['path'])), "masks")
230 | 
231 |         # Read mask files from .png image
232 |         mask = []
233 |         for f in next(os.walk(mask_dir))[2]:
234 |             if f.endswith(".png"):
235 |                 m = skimage.io.imread(os.path.join(mask_dir, f)).astype(np.bool)
236 |                 mask.append(m)
237 |         mask = np.stack(mask, axis=-1)
238 |         # Return mask, and array of class IDs of each instance. Since we have
239 |         # one class ID, we return an array of ones
240 |         return mask, np.ones([mask.shape[-1]], dtype=np.int32)
241 | 
242 |     def image_reference(self, image_id):
243 |         """Return the path of the image."""
244 |         info = self.image_info[image_id]
245 |         if info["source"] == "nucleus":
246 |             return info["id"]
247 |         else:
248 |             super(self.__class__, self).image_reference(image_id)
249 | 
250 | 
251 | ############################################################
252 | #  Training
253 | ############################################################
254 | 
255 | def train(model, dataset_dir, subset):
256 |     """Train the model."""
257 |     # Training dataset.
258 |     dataset_train = NucleusDataset()
259 |     dataset_train.load_nucleus(dataset_dir, subset)
260 |     dataset_train.prepare()
261 | 
262 |     # Validation dataset
263 |     dataset_val = NucleusDataset()
264 |     dataset_val.load_nucleus(dataset_dir, "val")
265 |     dataset_val.prepare()
266 | 
267 |     # Image augmentation
268 |     # http://imgaug.readthedocs.io/en/latest/source/augmenters.html
269 |     augmentation = iaa.SomeOf((0, 2), [
270 |         iaa.Fliplr(0.5),
271 |         iaa.Flipud(0.5),
272 |         iaa.OneOf([iaa.Affine(rotate=90),
273 |                    iaa.Affine(rotate=180),
274 |                    iaa.Affine(rotate=270)]),
275 |         iaa.Multiply((0.8, 1.5)),
276 |         iaa.GaussianBlur(sigma=(0.0, 5.0))
277 |     ])
278 | 
279 |     # *** This training schedule is an example. Update to your needs ***
280 | 
281 |     # If starting from imagenet, train heads only for a bit
282 |     # since they have random weights
283 |     print("Train network heads")
284 |     model.train(dataset_train, dataset_val,
285 |                 learning_rate=config.LEARNING_RATE,
286 |                 epochs=20,
287 |                 augmentation=augmentation,
288 |                 layers='heads')
289 | 
290 |     print("Train all layers")
291 |     model.train(dataset_train, dataset_val,
292 |                 learning_rate=config.LEARNING_RATE,
293 |                 epochs=40,
294 |                 augmentation=augmentation,
295 |                 layers='all')
296 | 
297 | 
298 | ############################################################
299 | #  RLE Encoding
300 | ############################################################
301 | 
302 | def rle_encode(mask):
303 |     """Encodes a mask in Run Length Encoding (RLE).
304 |     Returns a string of space-separated values.
305 |     """
306 |     assert mask.ndim == 2, "Mask must be of shape [Height, Width]"
307 |     # Flatten it column wise
308 |     m = mask.T.flatten()
309 |     # Compute gradient. Equals 1 or -1 at transition points
310 |     g = np.diff(np.concatenate([[0], m, [0]]), n=1)
311 |     # 1-based indicies of transition points (where gradient != 0)
312 |     rle = np.where(g != 0)[0].reshape([-1, 2]) + 1
313 |     # Convert second index in each pair to lenth
314 |     rle[:, 1] = rle[:, 1] - rle[:, 0]
315 |     return " ".join(map(str, rle.flatten()))
316 | 
317 | 
318 | def rle_decode(rle, shape):
319 |     """Decodes an RLE encoded list of space separated
320 |     numbers and returns a binary mask."""
321 |     rle = list(map(int, rle.split()))
322 |     rle = np.array(rle, dtype=np.int32).reshape([-1, 2])
323 |     rle[:, 1] += rle[:, 0]
324 |     rle -= 1
325 |     mask = np.zeros([shape[0] * shape[1]], np.bool)
326 |     for s, e in rle:
327 |         assert 0 <= s < mask.shape[0]
328 |         assert 1 <= e <= mask.shape[0], "shape: {}  s {}  e {}".format(shape, s, e)
329 |         mask[s:e] = 1
330 |     # Reshape and transpose
331 |     mask = mask.reshape([shape[1], shape[0]]).T
332 |     return mask
333 | 
334 | 
335 | def mask_to_rle(image_id, mask, scores):
336 |     "Encodes instance masks to submission format."
337 |     assert mask.ndim == 3, "Mask must be [H, W, count]"
338 |     # If mask is empty, return line with image ID only
339 |     if mask.shape[-1] == 0:
340 |         return "{},".format(image_id)
341 |     # Remove mask overlaps
342 |     # Multiply each instance mask by its score order
343 |     # then take the maximum across the last dimension
344 |     order = np.argsort(scores)[::-1] + 1  # 1-based descending
345 |     mask = np.max(mask * np.reshape(order, [1, 1, -1]), -1)
346 |     # Loop over instance masks
347 |     lines = []
348 |     for o in order:
349 |         m = np.where(mask == o, 1, 0)
350 |         # Skip if empty
351 |         if m.sum() == 0.0:
352 |             continue
353 |         rle = rle_encode(m)
354 |         lines.append("{}, {}".format(image_id, rle))
355 |     return "\n".join(lines)
356 | 
357 | 
358 | ############################################################
359 | #  Detection
360 | ############################################################
361 | 
362 | def detect(model, dataset_dir, subset):
363 |     """Run detection on images in the given directory."""
364 |     print("Running on {}".format(dataset_dir))
365 | 
366 |     # Create directory
367 |     if not os.path.exists(RESULTS_DIR):
368 |         os.makedirs(RESULTS_DIR)
369 |     submit_dir = "submit_{:%Y%m%dT%H%M%S}".format(datetime.datetime.now())
370 |     submit_dir = os.path.join(RESULTS_DIR, submit_dir)
371 |     os.makedirs(submit_dir)
372 | 
373 |     # Read dataset
374 |     dataset = NucleusDataset()
375 |     dataset.load_nucleus(dataset_dir, subset)
376 |     dataset.prepare()
377 |     # Load over images
378 |     submission = []
379 |     for image_id in dataset.image_ids:
380 |         # Load image and run detection
381 |         image = dataset.load_image(image_id)
382 |         # Detect objects
383 |         r = model.detect([image], verbose=0)[0]
384 |         # Encode image to RLE. Returns a string of multiple lines
385 |         source_id = dataset.image_info[image_id]["id"]
386 |         rle = mask_to_rle(source_id, r["masks"], r["scores"])
387 |         submission.append(rle)
388 |         # Save image with masks
389 |         visualize.display_instances(
390 |             image, r['rois'], r['masks'], r['class_ids'],
391 |             dataset.class_names, r['scores'],
392 |             show_bbox=False, show_mask=False,
393 |             title="Predictions")
394 |         plt.savefig("{}/{}.png".format(submit_dir, dataset.image_info[image_id]["id"]))
395 | 
396 |     # Save to csv file
397 |     submission = "ImageId,EncodedPixels\n" + "\n".join(submission)
398 |     file_path = os.path.join(submit_dir, "submit.csv")
399 |     with open(file_path, "w") as f:
400 |         f.write(submission)
401 |     print("Saved to ", submit_dir)
402 | 
403 | 
404 | ############################################################
405 | #  Command Line
406 | ############################################################
407 | 
408 | if __name__ == '__main__':
409 |     import argparse
410 | 
411 |     # Parse command line arguments
412 |     parser = argparse.ArgumentParser(
413 |         description='Mask R-CNN for nuclei counting and segmentation')
414 |     parser.add_argument("command",
415 |                         metavar="<command>",
416 |                         help="'train' or 'detect'")
417 |     parser.add_argument('--dataset', required=False,
418 |                         metavar="/path/to/dataset/",
419 |                         help='Root directory of the dataset')
420 |     parser.add_argument('--weights', required=True,
421 |                         metavar="/path/to/weights.h5",
422 |                         help="Path to weights .h5 file or 'coco'")
423 |     parser.add_argument('--logs', required=False,
424 |                         default=DEFAULT_LOGS_DIR,
425 |                         metavar="/path/to/logs/",
426 |                         help='Logs and checkpoints directory (default=logs/)')
427 |     parser.add_argument('--subset', required=False,
428 |                         metavar="Dataset sub-directory",
429 |                         help="Subset of dataset to run prediction on")
430 |     args = parser.parse_args()
431 | 
432 |     # Validate arguments
433 |     if args.command == "train":
434 |         assert args.dataset, "Argument --dataset is required for training"
435 |     elif args.command == "detect":
436 |         assert args.subset, "Provide --subset to run prediction on"
437 | 
438 |     print("Weights: ", args.weights)
439 |     print("Dataset: ", args.dataset)
440 |     if args.subset:
441 |         print("Subset: ", args.subset)
442 |     print("Logs: ", args.logs)
443 | 
444 |     # Configurations
445 |     if args.command == "train":
446 |         config = NucleusConfig()
447 |     else:
448 |         config = NucleusInferenceConfig()
449 |     config.display()
450 | 
451 |     # Create model
452 |     if args.command == "train":
453 |         model = modellib.MaskRCNN(mode="training", config=config,
454 |                                   model_dir=args.logs)
455 |     else:
456 |         model = modellib.MaskRCNN(mode="inference", config=config,
457 |                                   model_dir=args.logs)
458 | 
459 |     # Select weights file to load
460 |     if args.weights.lower() == "coco":
461 |         weights_path = COCO_WEIGHTS_PATH
462 |         # Download weights file
463 |         if not os.path.exists(weights_path):
464 |             utils.download_trained_weights(weights_path)
465 |     elif args.weights.lower() == "last":
466 |         # Find last trained weights
467 |         weights_path = model.find_last()
468 |     elif args.weights.lower() == "imagenet":
469 |         # Start from ImageNet trained weights
470 |         weights_path = model.get_imagenet_weights()
471 |     else:
472 |         weights_path = args.weights
473 | 
474 |     # Load weights
475 |     print("Loading weights ", weights_path)
476 |     if args.weights.lower() == "coco":
477 |         # Exclude the last layers because they require a matching
478 |         # number of classes
479 |         model.load_weights(weights_path, by_name=True, exclude=[
480 |             "mrcnn_class_logits", "mrcnn_bbox_fc",
481 |             "mrcnn_bbox", "mrcnn_mask"])
482 |     else:
483 |         model.load_weights(weights_path, by_name=True)
484 | 
485 |     # Train or evaluate
486 |     if args.command == "train":
487 |         train(model, args.dataset, args.subset)
488 |     elif args.command == "detect":
489 |         detect(model, args.dataset, args.subset)
490 |     else:
491 |         print("'{}' is not recognized. "
492 |               "Use 'train' or 'detect'".format(args.command))
493 | 


--------------------------------------------------------------------------------
/samples/shapes/shapes.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Configurations and data loading code for the synthetic Shapes dataset.
  4 | This is a duplicate of the code in the noteobook train_shapes.ipynb for easy
  5 | import into other notebooks, such as inspect_model.ipynb.
  6 | 
  7 | Copyright (c) 2017 Matterport, Inc.
  8 | Licensed under the MIT License (see LICENSE for details)
  9 | Written by Waleed Abdulla
 10 | """
 11 | 
 12 | import os
 13 | import sys
 14 | import math
 15 | import random
 16 | import numpy as np
 17 | import cv2
 18 | 
 19 | # Root directory of the project
 20 | ROOT_DIR = os.path.abspath("../../")
 21 | 
 22 | # Import Mask RCNN
 23 | sys.path.append(ROOT_DIR)  # To find local version of the library
 24 | from mrcnn.config import Config
 25 | from mrcnn import utils
 26 | 
 27 | 
 28 | class ShapesConfig(Config):
 29 |     """Configuration for training on the toy shapes dataset.
 30 |     Derives from the base Config class and overrides values specific
 31 |     to the toy shapes dataset.
 32 |     """
 33 |     # Give the configuration a recognizable name
 34 |     NAME = "shapes"
 35 | 
 36 |     # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
 37 |     # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
 38 |     GPU_COUNT = 1
 39 |     IMAGES_PER_GPU = 8
 40 | 
 41 |     # Number of classes (including background)
 42 |     NUM_CLASSES = 1 + 3  # background + 3 shapes
 43 | 
 44 |     # Use small images for faster training. Set the limits of the small side
 45 |     # the large side, and that determines the image shape.
 46 |     IMAGE_MIN_DIM = 128
 47 |     IMAGE_MAX_DIM = 128
 48 | 
 49 |     # Use smaller anchors because our image and objects are small
 50 |     RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels
 51 | 
 52 |     # Reduce training ROIs per image because the images are small and have
 53 |     # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
 54 |     TRAIN_ROIS_PER_IMAGE = 32
 55 | 
 56 |     # Use a small epoch since the data is simple
 57 |     STEPS_PER_EPOCH = 100
 58 | 
 59 |     # use small validation steps since the epoch is small
 60 |     VALIDATION_STEPS = 5
 61 | 
 62 | 
 63 | class ShapesDataset(utils.Dataset):
 64 |     """Generates the shapes synthetic dataset. The dataset consists of simple
 65 |     shapes (triangles, squares, circles) placed randomly on a blank surface.
 66 |     The images are generated on the fly. No file access required.
 67 |     """
 68 | 
 69 |     def load_shapes(self, count, height, width):
 70 |         """Generate the requested number of synthetic images.
 71 |         count: number of images to generate.
 72 |         height, width: the size of the generated images.
 73 |         """
 74 |         # Add classes
 75 |         self.add_class("shapes", 1, "square")
 76 |         self.add_class("shapes", 2, "circle")
 77 |         self.add_class("shapes", 3, "triangle")
 78 | 
 79 |         # Add images
 80 |         # Generate random specifications of images (i.e. color and
 81 |         # list of shapes sizes and locations). This is more compact than
 82 |         # actual images. Images are generated on the fly in load_image().
 83 |         for i in range(count):
 84 |             bg_color, shapes = self.random_image(height, width)
 85 |             self.add_image("shapes", image_id=i, path=None,
 86 |                            width=width, height=height,
 87 |                            bg_color=bg_color, shapes=shapes)
 88 | 
 89 |     def load_image(self, image_id):
 90 |         """Generate an image from the specs of the given image ID.
 91 |         Typically this function loads the image from a file, but
 92 |         in this case it generates the image on the fly from the
 93 |         specs in image_info.
 94 |         """
 95 |         info = self.image_info[image_id]
 96 |         bg_color = np.array(info['bg_color']).reshape([1, 1, 3])
 97 |         image = np.ones([info['height'], info['width'], 3], dtype=np.uint8)
 98 |         image = image * bg_color.astype(np.uint8)
 99 |         for shape, color, dims in info['shapes']:
100 |             image = self.draw_shape(image, shape, dims, color)
101 |         return image
102 | 
103 |     def image_reference(self, image_id):
104 |         """Return the shapes data of the image."""
105 |         info = self.image_info[image_id]
106 |         if info["source"] == "shapes":
107 |             return info["shapes"]
108 |         else:
109 |             super(self.__class__).image_reference(self, image_id)
110 | 
111 |     def load_mask(self, image_id):
112 |         """Generate instance masks for shapes of the given image ID.
113 |         """
114 |         info = self.image_info[image_id]
115 |         shapes = info['shapes']
116 |         count = len(shapes)
117 |         mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
118 |         for i, (shape, _, dims) in enumerate(info['shapes']):
119 |             mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(),
120 |                                                   shape, dims, 1)
121 |         # Handle occlusions
122 |         occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
123 |         for i in range(count - 2, -1, -1):
124 |             mask[:, :, i] = mask[:, :, i] * occlusion
125 |             occlusion = np.logical_and(
126 |                 occlusion, np.logical_not(mask[:, :, i]))
127 |         # Map class names to class IDs.
128 |         class_ids = np.array([self.class_names.index(s[0]) for s in shapes])
129 |         return mask, class_ids.astype(np.int32)
130 | 
131 |     def draw_shape(self, image, shape, dims, color):
132 |         """Draws a shape from the given specs."""
133 |         # Get the center x, y and the size s
134 |         x, y, s = dims
135 |         if shape == 'square':
136 |             image = cv2.rectangle(image, (x - s, y - s),
137 |                                   (x + s, y + s), color, -1)
138 |         elif shape == "circle":
139 |             image = cv2.circle(image, (x, y), s, color, -1)
140 |         elif shape == "triangle":
141 |             points = np.array([[(x, y - s),
142 |                                 (x - s / math.sin(math.radians(60)), y + s),
143 |                                 (x + s / math.sin(math.radians(60)), y + s),
144 |                                 ]], dtype=np.int32)
145 |             image = cv2.fillPoly(image, points, color)
146 |         return image
147 | 
148 |     def random_shape(self, height, width):
149 |         """Generates specifications of a random shape that lies within
150 |         the given height and width boundaries.
151 |         Returns a tuple of three valus:
152 |         * The shape name (square, circle, ...)
153 |         * Shape color: a tuple of 3 values, RGB.
154 |         * Shape dimensions: A tuple of values that define the shape size
155 |                             and location. Differs per shape type.
156 |         """
157 |         # Shape
158 |         shape = random.choice(["square", "circle", "triangle"])
159 |         # Color
160 |         color = tuple([random.randint(0, 255) for _ in range(3)])
161 |         # Center x, y
162 |         buffer = 20
163 |         y = random.randint(buffer, height - buffer - 1)
164 |         x = random.randint(buffer, width - buffer - 1)
165 |         # Size
166 |         s = random.randint(buffer, height // 4)
167 |         return shape, color, (x, y, s)
168 | 
169 |     def random_image(self, height, width):
170 |         """Creates random specifications of an image with multiple shapes.
171 |         Returns the background color of the image and a list of shape
172 |         specifications that can be used to draw the image.
173 |         """
174 |         # Pick random background color
175 |         bg_color = np.array([random.randint(0, 255) for _ in range(3)])
176 |         # Generate a few random shapes and record their
177 |         # bounding boxes
178 |         shapes = []
179 |         boxes = []
180 |         N = random.randint(1, 4)
181 |         for _ in range(N):
182 |             shape, color, dims = self.random_shape(height, width)
183 |             shapes.append((shape, color, dims))
184 |             x, y, s = dims
185 |             boxes.append([y - s, x - s, y + s, x + s])
186 |         # Apply non-max suppression wit 0.3 threshold to avoid
187 |         # shapes covering each other
188 |         keep_ixs = utils.non_max_suppression(
189 |             np.array(boxes), np.arange(N), 0.3)
190 |         shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
191 |         return bg_color, shapes
192 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | license-file = LICENSE
4 | requirements-file = requirements.txt


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The build/compilations setup
 3 | 
 4 | >> pip install -r requirements.txt
 5 | >> python setup.py install
 6 | """
 7 | import pip
 8 | import logging
 9 | import pkg_resources
10 | try:
11 |     from setuptools import setup
12 | except ImportError:
13 |     from distutils.core import setup
14 | 
15 | 
16 | def _parse_requirements(file_path):
17 |     pip_ver = pkg_resources.get_distribution('pip').version
18 |     pip_version = list(map(int, pip_ver.split('.')[:2]))
19 |     if pip_version >= [6, 0]:
20 |         raw = pip.req.parse_requirements(file_path,
21 |                                          session=pip.download.PipSession())
22 |     else:
23 |         raw = pip.req.parse_requirements(file_path)
24 |     return [str(i.req) for i in raw]
25 | 
26 | 
27 | # parse_requirements() returns generator of pip.req.InstallRequirement objects
28 | try:
29 |     install_reqs = _parse_requirements("requirements.txt")
30 | except Exception:
31 |     logging.warning('Fail load requirements file, so using default ones.')
32 |     install_reqs = []
33 | 
34 | setup(
35 |     name='mask-rcnn',
36 |     version='2.1',
37 |     url='https://github.com/matterport/Mask_RCNN',
38 |     author='Matterport',
39 |     author_email='waleed.abdulla@gmail.com',
40 |     license='MIT',
41 |     description='Mask R-CNN for object detection and instance segmentation',
42 |     packages=["mrcnn"],
43 |     install_requires=install_reqs,
44 |     include_package_data=True,
45 |     python_requires='>=3.4',
46 |     long_description="""This is an implementation of Mask R-CNN on Python 3, Keras, and TensorFlow. 
47 | The model generates bounding boxes and segmentation masks for each instance of an object in the image. 
48 | It's based on Feature Pyramid Network (FPN) and a ResNet101 backbone.""",
49 |     classifiers=[
50 |         "Development Status :: 5 - Production/Stable",
51 |         "Environment :: Console",
52 |         "Intended Audience :: Developers",
53 |         "Intended Audience :: Information Technology",
54 |         "Intended Audience :: Education",
55 |         "Intended Audience :: Science/Research",
56 |         "License :: OSI Approved :: MIT License",
57 |         "Natural Language :: English",
58 |         "Operating System :: OS Independent",
59 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
60 |         "Topic :: Scientific/Engineering :: Image Recognition",
61 |         "Topic :: Scientific/Engineering :: Visualization",
62 |         "Topic :: Scientific/Engineering :: Image Segmentation",
63 |         'Programming Language :: Python :: 3.4',
64 |         'Programming Language :: Python :: 3.5',
65 |         'Programming Language :: Python :: 3.6',
66 |     ],
67 |     keywords="image instance segmentation object detection mask rcnn r-cnn tensorflow keras",
68 | )
69 | 


--------------------------------------------------------------------------------
/video_demo.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from visualize_cv2 import model, display_instances, class_names
 3 | import sys
 4 | 
 5 | args = sys.argv
 6 | if(len(args) < 2):
 7 | 	print("run command: python video_demo.py 0 or video file name")
 8 | 	sys.exit(0)
 9 | name = args[1]
10 | if(len(args[1]) == 1):
11 | 	name = int(args[1])
12 | 	
13 | stream = cv2.VideoCapture(name)
14 | 	
15 | while True:
16 | 	ret , frame = stream.read()
17 | 	if not ret:
18 | 		print("unable to fetch frame")
19 | 		break
20 | 	results = model.detect([frame], verbose=1)
21 | 
22 | 	# Visualize results
23 | 	r = results[0]
24 | 	masked_image = display_instances(frame, r['rois'], r['masks'], r['class_ids'], 
25 |                             class_names, r['scores'])
26 | 	cv2.imshow("masked_image",masked_image)
27 | 	if(cv2.waitKey(1) & 0xFF == ord('q')):
28 | 		break
29 | stream.release()
30 | cv2.destroyWindow("masked_image")


--------------------------------------------------------------------------------
/visualize_cv2.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import os
  4 | import sys
  5 | 
  6 | from mrcnn import utils
  7 | from mrcnn import model as modellib
  8 | 
  9 | ROOT_DIR = os.path.abspath("./")
 10 | MODEL_DIR = os.path.join(ROOT_DIR, "logs")
 11 | sys.path.append(os.path.join(ROOT_DIR,"samples/coco/"))
 12 | import coco
 13 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
 14 | if not os.path.exists(COCO_MODEL_PATH):
 15 |     utils.download_trained_weights(COCO_MODEL_PATH)
 16 | 
 17 | 
 18 | class InferenceConfig(coco.CocoConfig):
 19 |     GPU_COUNT = 1
 20 |     IMAGES_PER_GPU = 1
 21 | 
 22 | 
 23 | config = InferenceConfig()
 24 | config.display()
 25 | 
 26 | model = modellib.MaskRCNN(
 27 |     mode="inference", model_dir=MODEL_DIR, config=config
 28 | )
 29 | model.load_weights(COCO_MODEL_PATH, by_name=True)
 30 | class_names = [
 31 |     'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
 32 |     'bus', 'train', 'truck', 'boat', 'traffic light',
 33 |     'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
 34 |     'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
 35 |     'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
 36 |     'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
 37 |     'kite', 'baseball bat', 'baseball glove', 'skateboard',
 38 |     'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
 39 |     'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
 40 |     'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
 41 |     'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
 42 |     'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
 43 |     'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
 44 |     'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
 45 |     'teddy bear', 'hair drier', 'toothbrush'
 46 | ]
 47 | 
 48 | 
 49 | def random_colors(N):
 50 |     np.random.seed(1)
 51 |     colors = [tuple(255 * np.random.rand(3)) for _ in range(N)]
 52 |     return colors
 53 | 
 54 | 
 55 | colors = random_colors(len(class_names))
 56 | class_dict = {
 57 |     name: color for name, color in zip(class_names, colors)
 58 | }
 59 | 
 60 | 
 61 | def apply_mask(image, mask, color, alpha=0.5):
 62 |     """apply mask to image"""
 63 |     for n, c in enumerate(color):
 64 |         image[:, :, n] = np.where(
 65 |             mask == 1,
 66 |             image[:, :, n] * (1 - alpha) + alpha * c,
 67 |             image[:, :, n]
 68 |         )
 69 |     return image
 70 | 
 71 | 
 72 | def display_instances(image, boxes, masks, ids, names, scores):
 73 |     """
 74 |         take the image and results and apply the mask, box, and Label
 75 |     """
 76 |     n_instances = boxes.shape[0]
 77 | 
 78 |     if not n_instances:
 79 |         print('NO INSTANCES TO DISPLAY')
 80 |     else:
 81 |         assert boxes.shape[0] == masks.shape[-1] == ids.shape[0]
 82 | 
 83 |     for i in range(n_instances):
 84 |         if not np.any(boxes[i]):
 85 |             continue
 86 | 
 87 |         y1, x1, y2, x2 = boxes[i]
 88 |         label = names[ids[i]]
 89 |         color = class_dict[label]
 90 |         score = scores[i] if scores is not None else None
 91 |         caption = '{} {:.2f}'.format(label, score) if score else label
 92 |         mask = masks[:, :, i]
 93 | 
 94 |         image = apply_mask(image, mask, color)
 95 |         image = cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
 96 |         image = cv2.putText(
 97 |             image, caption, (x1, y1), cv2.FONT_HERSHEY_COMPLEX, 0.7, color, 2
 98 |         )
 99 | 
100 |     return image
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     """
105 |         test everything
106 |     """
107 | 
108 |     capture = cv2.VideoCapture(0)
109 | 
110 |     # these 2 lines can be removed if you dont have a 1080p camera.
111 |     capture.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
112 |     capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
113 | 
114 |     while True:
115 |         ret, frame = capture.read()
116 |         results = model.detect([frame], verbose=0)
117 |         r = results[0]
118 |         frame = display_instances(
119 |             frame, r['rois'], r['masks'], r['class_ids'], class_names, r['scores']
120 |         )
121 |         cv2.imshow('frame', frame)
122 |         if cv2.waitKey(1) & 0xFF == ord('q'):
123 |             break
124 | 
125 |     capture.release()
126 |     cv2.destroyAllWindows()


--------------------------------------------------------------------------------