├── bags
    └── .placeholder
├── nodes
    ├── __init__.py
    ├── coco.pyc
    ├── config.pyc
    ├── model.pyc
    ├── utils.pyc
    ├── visualize.pyc
    ├── __pycache__
    │   ├── coco.cpython-36.pyc
    │   ├── config.cpython-36.pyc
    │   ├── model.cpython-36.pyc
    │   ├── utils.cpython-36.pyc
    │   └── visualize.cpython-36.pyc
    ├── config.py
    ├── parallel_model.py
    ├── mask_rcnn_node
    ├── shapes.py
    ├── visualize.py
    ├── coco.py
    └── utils.py
├── src
    └── mask_rcnn_ros
    │   ├── __init__.py
    │   ├── config.py
    │   ├── parallel_model.py
    │   ├── shapes.py
    │   ├── visualize.py
    │   ├── coco.py
    │   └── utils.py
├── doc
    ├── mask_r-cnn_1.png
    └── mask_r-cnn_2.png
├── scripts
    └── download_freiburg3_rgbd_example_bag.sh
├── requirements.txt
├── setup.py
├── msg
    └── Result.msg
├── launch
    └── freiburg3_rgbd_example.launch
├── package.xml
├── CMakeLists.txt
├── .gitignore
├── LICENSE
├── LICENSE.Mask_R-CNN
├── README.md
└── rviz
    └── mask_rcnn_ros.rviz


/bags/.placeholder:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nodes/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/mask_rcnn_ros/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nodes/coco.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/coco.pyc


--------------------------------------------------------------------------------
/nodes/config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/config.pyc


--------------------------------------------------------------------------------
/nodes/model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/model.pyc


--------------------------------------------------------------------------------
/nodes/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/utils.pyc


--------------------------------------------------------------------------------
/nodes/visualize.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/visualize.pyc


--------------------------------------------------------------------------------
/doc/mask_r-cnn_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/doc/mask_r-cnn_1.png


--------------------------------------------------------------------------------
/doc/mask_r-cnn_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/doc/mask_r-cnn_2.png


--------------------------------------------------------------------------------
/nodes/__pycache__/coco.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/__pycache__/coco.cpython-36.pyc


--------------------------------------------------------------------------------
/nodes/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/__pycache__/config.cpython-36.pyc


--------------------------------------------------------------------------------
/nodes/__pycache__/model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/__pycache__/model.cpython-36.pyc


--------------------------------------------------------------------------------
/nodes/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/nodes/__pycache__/visualize.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/__pycache__/visualize.cpython-36.pyc


--------------------------------------------------------------------------------
/scripts/download_freiburg3_rgbd_example_bag.sh:
--------------------------------------------------------------------------------
1 | !/bin/sh
2 | wget https://vision.in.tum.de/rgbd/dataset/freiburg3/rgbd_dataset_freiburg3_long_office_household.bag -P bags
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | h5py==2.7.0
 2 | Keras==2.1.2
 3 | numpy==1.13.3
 4 | opencv-python==3.4.0.12
 5 | scikit-image==0.13.0
 6 | scikit-learn==0.19.1
 7 | scipy==0.19.1
 8 | matplotlib==2.2.3
 9 | tensorflow-gpu==1.4.1
10 | ipython==5.2.0
11 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | ## ! DO NOT MANUALLY INVOKE THIS setup.py, USE CATKIN INSTEAD
 2 | 
 3 | from distutils.core import setup
 4 | from catkin_pkg.python_setup import generate_distutils_setup
 5 | 
 6 | # fetch values from package.xml
 7 | setup_args = generate_distutils_setup(
 8 |     packages=['mask_rcnn_ros',],
 9 |     package_dir={'': 'src'})
10 | 
11 | setup(**setup_args)
12 | 


--------------------------------------------------------------------------------
/msg/Result.msg:
--------------------------------------------------------------------------------
 1 | std_msgs/Header header
 2 | 
 3 | # Bounding boxes in pixels
 4 | sensor_msgs/RegionOfInterest[] boxes
 5 | 
 6 | # Integer class IDs for each bounding box
 7 | int32[] class_ids
 8 | 
 9 | # String class IDs for each bouding box
10 | string[] class_names
11 | 
12 | # Float probability scores of the class_id
13 | float32[] scores
14 | 
15 | # Instance masks as Image
16 | sensor_msgs/Image[] masks
17 | 
18 | 


--------------------------------------------------------------------------------
/launch/freiburg3_rgbd_example.launch:
--------------------------------------------------------------------------------
 1 | <launch>
 2 |     <node name="mask_rcnn" pkg="mask_rcnn_ros" type="mask_rcnn_node" output="screen">
 3 |         <remap from="~input" to="/camera/rgb/image_color" />
 4 |         <param name="~visualization" value="true" />
 5 |     </node>
 6 | 
 7 |     <node name="bag" pkg="rosbag" type="play"
 8 |         args="-l $(find mask_rcnn_ros)/bags/rgbd_dataset_freiburg3_long_office_household.bag" />
 9 | 
10 |     <node name="rviz" pkg="rviz" type="rviz" args="-d $(find mask_rcnn_ros)/rviz/mask_rcnn_ros.rviz" />
11 | </launch>
12 | 


--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <package>
 3 |   <name>mask_rcnn_ros</name>
 4 |   <version>0.1.0</version>
 5 |   <description>The Mask R-CNN for ROS</description>
 6 | 
 7 |   <maintainer email="akio.code@gmail.com">Akio Ochiai</maintainer>
 8 | 
 9 |   <license>MIT</license>
10 | 
11 |   <url type="website">http://wiki.ros.org/mask_rcnn_ros</url>
12 | 
13 |   <author email="akio.code@gmail.com">Akio Ochiai</author>
14 |   <author>Matterport, Inc.</author>
15 | 
16 |   <buildtool_depend>catkin</buildtool_depend>
17 |   <build_depend>message_generation</build_depend>
18 | 
19 |   <run_depend>rospy</run_depend>
20 |   <run_depend>message_runtime</run_depend>
21 |   <run_depend>std_msgs</run_depend>
22 |   <run_depend>sensor_msgs</run_depend>
23 |   <run_depend>cv_bridge</run_depend>
24 |   <run_depend>vision_opencv</run_depend>
25 | </package>
26 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.3)
 2 | project(mask_rcnn_ros)
 3 | 
 4 | find_package(catkin REQUIRED COMPONENTS std_msgs sensor_msgs message_generation)
 5 | 
 6 | catkin_python_setup()
 7 | 
 8 | add_message_files(
 9 |   FILES
10 |     Result.msg
11 | )
12 | 
13 | 
14 | generate_messages(
15 |   DEPENDENCIES std_msgs sensor_msgs
16 | )
17 | 
18 | 
19 | 
20 | catkin_package(CATKIN_DEPENDS message_runtime)
21 | 
22 | #############
23 | ## Install ##
24 | #############
25 | 
26 | install(PROGRAMS
27 |   nodes/mask_rcnn_node
28 |   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
29 | )
30 | 
31 | install(DIRECTORY
32 |   msg
33 |   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
34 | )
35 | 
36 | 
37 | #############
38 | ## Testing ##
39 | #############
40 | 
41 | 
42 | ## Add folders to be run by python nosetests
43 | # catkin_add_nosetests(test)
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | devel/
 2 | logs/
 3 | build/
 4 | bin/
 5 | lib/
 6 | msg_gen/
 7 | srv_gen/
 8 | msg/*Action.msg
 9 | msg/*ActionFeedback.msg
10 | msg/*ActionGoal.msg
11 | msg/*ActionResult.msg
12 | msg/*Feedback.msg
13 | msg/*Goal.msg
14 | msg/*Result.msg
15 | msg/_*.py
16 | build_isolated/
17 | devel_isolated/
18 | src/CMakeLists.txt
19 | .catkin_workspace
20 | result/data/*.txt
21 | result/data/*.csv
22 | src/cmake-build-debug/
23 | src/.idea/
24 | src/multisensor/cmake-build-debug/
25 | src/multisensor/.idea/
26 | .vscode/
27 | 
28 | 
29 | # Generated by dynamic reconfigure
30 | *.cfgc
31 | /cfg/cpp/
32 | /cfg/*.py
33 | 
34 | # Ignore generated docs
35 | *.dox
36 | *.wikidoc
37 | 
38 | # eclipse stuff
39 | .project
40 | .cproject
41 | 
42 | # qcreator stuff
43 | CMakeLists.txt.user
44 | 
45 | srv/_*.py
46 | *.pcd
47 | *.pyc
48 | qtcreator-*
49 | *.user
50 | 
51 | /planning/cfg
52 | /planning/docs
53 | /planning/src
54 | 
55 | *~
56 | 
57 | # Emacs
58 | .#*
59 | 
60 | # Catkin custom files
61 | CATKIN_IGNORE
62 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | mask_rcnn_ros
 2 | 
 3 | The MIT License (MIT)
 4 | 
 5 | Copyright (c) 2017 Akio Ochiai, Inc.
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/LICENSE.Mask_R-CNN:
--------------------------------------------------------------------------------
 1 | Mask R-CNN
 2 | 
 3 | The MIT License (MIT)
 4 | 
 5 | Copyright (c) 2017 Matterport, Inc.
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # The ROS Package of Mask R-CNN for Object Detection and Segmentation
  2 | 
  3 | This is a ROS package of [Mask R-CNN](https://arxiv.org/abs/1703.06870) algorithm for object detection and segmentation.
  4 | 
  5 | The package contains ROS node of Mask R-CNN with topic-based ROS interface.
  6 | 
  7 | Most of core algorithm code was based on [Mask R-CNN implementation by Matterport, Inc. ](https://github.com/matterport/Mask_RCNN)
  8 | 
  9 | ## Training
 10 | 
 11 | This repository doesn't contain code for training Mask R-CNN network model.
 12 | If you want to train the model on your own class definition or dataset, try it on [the upstream reposity](https://github.com/matterport/Mask_RCNN) and give the result weight to `model_path` parameter.
 13 | 
 14 | 
 15 | ## Requirements
 16 | * ROS Indigo/kinetic
 17 | * TensorFlow 1.3+
 18 | * Keras 2.0.8+
 19 | * Numpy, skimage, scipy, Pillow, cython, h5py
 20 | * I only test code on Python 2.7, it may work on Python3.X.
 21 | * see more dependency and version details in [requirements.txt](https://github.com/qixuxiang/mask_rcnn_ros/blob/master/requirements.txt)
 22 | 
 23 | ## ROS Interfaces
 24 |  
 25 | ### Parameters
 26 | 
 27 | * `~model_path: string`
 28 | 
 29 |     Path to the HDF5 model file.
 30 |     If the model_path is default value and the file doesn't exist, the node automatically downloads the file.
 31 | 
 32 |     Default: `$ROS_HOME/mask_rcnn_coco.h5`
 33 | 
 34 | * `~visualization: bool`
 35 | 
 36 |     If true, the node publish visualized images to `~visualization` topic.
 37 |     Default: `true`
 38 | 
 39 | * `~class_names: string[]`
 40 | 
 41 |     Class names to be treated as detection targets.
 42 |     Default: All MS COCO classes.
 43 | 
 44 | ### Topics Published
 45 | 
 46 | * `~result: mask_rcnn_ros/Result`
 47 | 
 48 |     Result of detection. See also `Result.msg` for detailed description.
 49 | 
 50 | * `~visualization: sensor_mgs/Image`
 51 | 
 52 |     Visualized result over an input image.
 53 | 
 54 | 
 55 | ### Topics Subscribed
 56 | 
 57 | * `~input: sensor_msgs/Image`
 58 | 
 59 |     Input image to be proccessed
 60 | 
 61 | ## Getting Started
 62 | 
 63 | 1. Clone this repository to your catkin workspace, build workspace and source devel environment 
 64 | ```
 65 | $ cd ~/.catkin_ws/src
 66 | $ git clone https://github.com/qixuxiang/mask_rcnn_ros.git
 67 | $ cd mask_rcnn_ros
 68 | $ python2 -m pip install --upgrade pip
 69 | $ python2 -m pip install -r requirements.txt
 70 | $ cd ../..
 71 | $ catkin_make
 72 | $ source devel/setup.bash
 73 | 
 74 | ```
 75 | 
 76 | 2. Run mask_rcnn node
 77 |       ~~~bash
 78 |       $ rosrun mask_rcnn_ros mask_rcnn_node
 79 |       ~~~
 80 | 
 81 | ## Example
 82 | 
 83 | There is a simple example launch file using [RGB-D SLAM Dataset](https://vision.in.tum.de/data/datasets/rgbd-dataset/download).
 84 | 
 85 | ~~~bash
 86 | $ sudo chmod 777 scripts/download_freiburg3_rgbd_example_bag.sh
 87 | $ ./scripts/download_freiburg3_rgbd_example_bag.sh
 88 | $ roslaunch mask_rcnn_ros freiburg3_rgbd_example.launch
 89 | ~~~
 90 | 
 91 | Then RViz window will appear and show result like following:
 92 | 
 93 | ![example1](doc/mask_r-cnn_1.png)
 94 | 
 95 | ![example2](doc/mask_r-cnn_2.png)
 96 | 
 97 | ## Other issue
 98 | 
 99 | * If you have installed Anaconda|Python, Please delete or comment `export PATH=/home/soft/conda3/bin:$PATH` in you `~/.bashrc` file.
100 | 
101 | * When you run the code, please wait for a moment for the result because there will be delay when play bag file and process the images.
102 | 
103 | * Welcome to submit any issue if you have problems, and add your software system information details, such as Ubuntu 16/14,ROS Indigo/Kinetic, Python2/Python3, Tensorflow 1.4,etc..
104 | 


--------------------------------------------------------------------------------
/rviz/mask_rcnn_ros.rviz:
--------------------------------------------------------------------------------
  1 | Panels:
  2 |   - Class: rviz/Displays
  3 |     Help Height: 0
  4 |     Name: Displays
  5 |     Property Tree Widget:
  6 |       Expanded:
  7 |         - /Global Options1
  8 |         - /Status1
  9 |         - /Image1
 10 |         - /Image2
 11 |       Splitter Ratio: 0.755813956
 12 |     Tree Height: 614
 13 |   - Class: rviz/Selection
 14 |     Name: Selection
 15 |   - Class: rviz/Tool Properties
 16 |     Expanded:
 17 |       - /2D Pose Estimate1
 18 |       - /2D Nav Goal1
 19 |       - /Publish Point1
 20 |     Name: Tool Properties
 21 |     Splitter Ratio: 0.588679016
 22 |   - Class: rviz/Views
 23 |     Expanded:
 24 |       - /Current View1
 25 |     Name: Views
 26 |     Splitter Ratio: 0.5
 27 |   - Class: rviz/Time
 28 |     Experimental: false
 29 |     Name: Time
 30 |     SyncMode: 0
 31 |     SyncSource: Image
 32 | Visualization Manager:
 33 |   Class: ""
 34 |   Displays:
 35 |     - Alpha: 0.5
 36 |       Cell Size: 1
 37 |       Class: rviz/Grid
 38 |       Color: 160; 160; 164
 39 |       Enabled: true
 40 |       Line Style:
 41 |         Line Width: 0.0299999993
 42 |         Value: Lines
 43 |       Name: Grid
 44 |       Normal Cell Count: 0
 45 |       Offset:
 46 |         X: 0
 47 |         Y: 0
 48 |         Z: 0
 49 |       Plane: XY
 50 |       Plane Cell Count: 10
 51 |       Reference Frame: <Fixed Frame>
 52 |       Value: true
 53 |     - Class: rviz/Image
 54 |       Enabled: true
 55 |       Image Topic: /camera/rgb/image_color
 56 |       Max Value: 1
 57 |       Median window: 5
 58 |       Min Value: 0
 59 |       Name: Image
 60 |       Normalize Range: true
 61 |       Queue Size: 2
 62 |       Transport Hint: raw
 63 |       Unreliable: false
 64 |       Value: true
 65 |     - Class: rviz/Image
 66 |       Enabled: true
 67 |       Image Topic: /mask_rcnn/visualization
 68 |       Max Value: 1
 69 |       Median window: 5
 70 |       Min Value: 0
 71 |       Name: Image
 72 |       Normalize Range: true
 73 |       Queue Size: 2
 74 |       Transport Hint: raw
 75 |       Unreliable: false
 76 |       Value: true
 77 |   Enabled: true
 78 |   Global Options:
 79 |     Background Color: 48; 48; 48
 80 |     Fixed Frame: world
 81 |     Frame Rate: 30
 82 |   Name: root
 83 |   Tools:
 84 |     - Class: rviz/Interact
 85 |       Hide Inactive Objects: true
 86 |     - Class: rviz/MoveCamera
 87 |     - Class: rviz/Select
 88 |     - Class: rviz/FocusCamera
 89 |     - Class: rviz/Measure
 90 |     - Class: rviz/SetInitialPose
 91 |       Topic: /initialpose
 92 |     - Class: rviz/SetGoal
 93 |       Topic: /move_base_simple/goal
 94 |     - Class: rviz/PublishPoint
 95 |       Single click: true
 96 |       Topic: /clicked_point
 97 |   Value: true
 98 |   Views:
 99 |     Current:
100 |       Class: rviz/Orbit
101 |       Distance: 3.30293489
102 |       Enable Stereo Rendering:
103 |         Stereo Eye Separation: 0.0599999987
104 |         Stereo Focal Distance: 1
105 |         Swap Stereo Eyes: false
106 |         Value: false
107 |       Focal Point:
108 |         X: 0.919049203
109 |         Y: 0.11560297
110 |         Z: 0.632362902
111 |       Focal Shape Fixed Size: true
112 |       Focal Shape Size: 0.0500000007
113 |       Invert Z Axis: false
114 |       Name: Current View
115 |       Near Clip Distance: 0.00999999978
116 |       Pitch: 0.185397774
117 |       Target Frame: <Fixed Frame>
118 |       Value: Orbit (rviz)
119 |       Yaw: 5.57856464
120 |     Saved: ~
121 | Window Geometry:
122 |   Displays:
123 |     collapsed: false
124 |   Height: 817
125 |   Hide Left Dock: false
126 |   Hide Right Dock: false
127 |   Image:
128 |     collapsed: false
129 |   QMainWindow State: 000000ff00000000fd00000004000000000000016a000002a7fc0200000009fb0000001200530065006c0065006300740069006f006e00000001e10000009b0000006400fffffffb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001ed000001df00000185000000a3fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb000000100044006900730070006c0061007900730100000028000002a7000000dd00fffffffb0000000a0056006900650077007300000001ba000000f3000000b000fffffffb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c000002610000000100000216000002a7fc0200000005fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a0049006d0061006700650100000028000001430000001600fffffffb0000000a0049006d00610067006501000001710000015e0000001600fffffffb0000000a0049006d00610067006501000001fe000000af0000000000000000fb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e10000019700000003000004eb0000003efc0100000002fb0000000800540069006d00650100000000000004eb0000030000fffffffb0000000800540069006d006501000000000000045000000000000000000000015f000002a700000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000
130 |   Selection:
131 |     collapsed: false
132 |   Time:
133 |     collapsed: false
134 |   Tool Properties:
135 |     collapsed: false
136 |   Views:
137 |     collapsed: false
138 |   Width: 1259
139 |   X: 618
140 |   Y: 126
141 | 


--------------------------------------------------------------------------------
/nodes/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Base Configurations class.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import math
 11 | import numpy as np
 12 | 
 13 | 
 14 | # Base Configuration Class
 15 | # Don't use this class directly. Instead, sub-class it and override
 16 | # the configurations you need to change.
 17 | 
 18 | class Config(object):
 19 |     """Base configuration class. For custom configurations, create a
 20 |     sub-class that inherits from this one and override properties
 21 |     that need to be changed.
 22 |     """
 23 |     # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
 24 |     # Useful if your code needs to do things differently depending on which
 25 |     # experiment is running.
 26 |     NAME = None  # Override in sub-classes
 27 | 
 28 |     # NUMBER OF GPUs to use. For CPU training, use 1
 29 |     GPU_COUNT = 1
 30 | 
 31 |     # Number of images to train with on each GPU. A 12GB GPU can typically
 32 |     # handle 2 images of 1024x1024px.
 33 |     # Adjust based on your GPU memory and image sizes. Use the highest
 34 |     # number that your GPU can handle for best performance.
 35 |     IMAGES_PER_GPU = 2
 36 | 
 37 |     # Number of training steps per epoch
 38 |     # This doesn't need to match the size of the training set. Tensorboard
 39 |     # updates are saved at the end of each epoch, so setting this to a
 40 |     # smaller number means getting more frequent TensorBoard updates.
 41 |     # Validation stats are also calculated at each epoch end and they
 42 |     # might take a while, so don't set this too small to avoid spending
 43 |     # a lot of time on validation stats.
 44 |     STEPS_PER_EPOCH = 1000
 45 | 
 46 |     # Number of validation steps to run at the end of every training epoch.
 47 |     # A bigger number improves accuracy of validation stats, but slows
 48 |     # down the training.
 49 |     VALIDATION_STEPS = 50
 50 | 
 51 |     # The strides of each layer of the FPN Pyramid. These values
 52 |     # are based on a Resnet101 backbone.
 53 |     BACKBONE_STRIDES = [4, 8, 16, 32, 64]
 54 | 
 55 |     # Number of classification classes (including background)
 56 |     NUM_CLASSES = 1  # Override in sub-classes
 57 | 
 58 |     # Length of square anchor side in pixels
 59 |     RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
 60 | 
 61 |     # Ratios of anchors at each cell (width/height)
 62 |     # A value of 1 represents a square anchor, and 0.5 is a wide anchor
 63 |     RPN_ANCHOR_RATIOS = [0.5, 1, 2]
 64 | 
 65 |     # Anchor stride
 66 |     # If 1 then anchors are created for each cell in the backbone feature map.
 67 |     # If 2, then anchors are created for every other cell, and so on.
 68 |     RPN_ANCHOR_STRIDE = 1
 69 | 
 70 |     # Non-max suppression threshold to filter RPN proposals.
 71 |     # You can reduce this during training to generate more propsals.
 72 |     RPN_NMS_THRESHOLD = 0.7
 73 | 
 74 |     # How many anchors per image to use for RPN training
 75 |     RPN_TRAIN_ANCHORS_PER_IMAGE = 256
 76 | 
 77 |     # ROIs kept after non-maximum supression (training and inference)
 78 |     POST_NMS_ROIS_TRAINING = 2000
 79 |     POST_NMS_ROIS_INFERENCE = 1000
 80 | 
 81 |     # If enabled, resizes instance masks to a smaller size to reduce
 82 |     # memory load. Recommended when using high-resolution images.
 83 |     USE_MINI_MASK = True
 84 |     MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
 85 | 
 86 |     # Input image resing
 87 |     # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and
 88 |     # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't
 89 |     # be satisfied together the IMAGE_MAX_DIM is enforced.
 90 |     IMAGE_MIN_DIM = 800
 91 |     IMAGE_MAX_DIM = 1024
 92 |     # If True, pad images with zeros such that they're (max_dim by max_dim)
 93 |     IMAGE_PADDING = True  # currently, the False option is not supported
 94 | 
 95 |     # Image mean (RGB)
 96 |     MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
 97 | 
 98 |     # Number of ROIs per image to feed to classifier/mask heads
 99 |     # The Mask RCNN paper uses 512 but often the RPN doesn't generate
100 |     # enough positive proposals to fill this and keep a positive:negative
101 |     # ratio of 1:3. You can increase the number of proposals by adjusting
102 |     # the RPN NMS threshold.
103 |     TRAIN_ROIS_PER_IMAGE = 200
104 | 
105 |     # Percent of positive ROIs used to train classifier/mask heads
106 |     ROI_POSITIVE_RATIO = 0.33
107 | 
108 |     # Pooled ROIs
109 |     POOL_SIZE = 7
110 |     MASK_POOL_SIZE = 14
111 |     MASK_SHAPE = [28, 28]
112 | 
113 |     # Maximum number of ground truth instances to use in one image
114 |     MAX_GT_INSTANCES = 100
115 | 
116 |     # Bounding box refinement standard deviation for RPN and final detections.
117 |     RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
118 |     BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
119 | 
120 |     # Max number of final detections
121 |     DETECTION_MAX_INSTANCES = 100
122 | 
123 |     # Minimum probability value to accept a detected instance
124 |     # ROIs below this threshold are skipped
125 |     DETECTION_MIN_CONFIDENCE = 0.7
126 | 
127 |     # Non-maximum suppression threshold for detection
128 |     DETECTION_NMS_THRESHOLD = 0.3
129 | 
130 |     # Learning rate and momentum
131 |     # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
132 |     # weights to explode. Likely due to differences in optimzer
133 |     # implementation.
134 |     LEARNING_RATE = 0.001
135 |     LEARNING_MOMENTUM = 0.9
136 | 
137 |     # Weight decay regularization
138 |     WEIGHT_DECAY = 0.0001
139 | 
140 |     # Use RPN ROIs or externally generated ROIs for training
141 |     # Keep this True for most situations. Set to False if you want to train
142 |     # the head branches on ROI generated by code rather than the ROIs from
143 |     # the RPN. For example, to debug the classifier head without having to
144 |     # train the RPN.
145 |     USE_RPN_ROIS = True
146 | 
147 |     def __init__(self):
148 |         """Set values of computed attributes."""
149 |         # Effective batch size
150 |         self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
151 | 
152 |         # Input image size
153 |         self.IMAGE_SHAPE = np.array(
154 |             [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
155 | 
156 |         # Compute backbone size from input image size
157 |         self.BACKBONE_SHAPES = np.array(
158 |             [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)),
159 |               int(math.ceil(self.IMAGE_SHAPE[1] / stride))]
160 |              for stride in self.BACKBONE_STRIDES])
161 | 
162 |     def display(self):
163 |         """Display Configuration values."""
164 |         print("\nConfigurations:")
165 |         for a in dir(self):
166 |             if not a.startswith("__") and not callable(getattr(self, a)):
167 |                 print("{:30} {}".format(a, getattr(self, a)))
168 |         print("\n")
169 | 


--------------------------------------------------------------------------------
/src/mask_rcnn_ros/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Base Configurations class.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import math
 11 | import numpy as np
 12 | 
 13 | 
 14 | # Base Configuration Class
 15 | # Don't use this class directly. Instead, sub-class it and override
 16 | # the configurations you need to change.
 17 | 
 18 | class Config(object):
 19 |     """Base configuration class. For custom configurations, create a
 20 |     sub-class that inherits from this one and override properties
 21 |     that need to be changed.
 22 |     """
 23 |     # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
 24 |     # Useful if your code needs to do things differently depending on which
 25 |     # experiment is running.
 26 |     NAME = None  # Override in sub-classes
 27 | 
 28 |     # NUMBER OF GPUs to use. For CPU training, use 1
 29 |     GPU_COUNT = 1
 30 | 
 31 |     # Number of images to train with on each GPU. A 12GB GPU can typically
 32 |     # handle 2 images of 1024x1024px.
 33 |     # Adjust based on your GPU memory and image sizes. Use the highest
 34 |     # number that your GPU can handle for best performance.
 35 |     IMAGES_PER_GPU = 2
 36 | 
 37 |     # Number of training steps per epoch
 38 |     # This doesn't need to match the size of the training set. Tensorboard
 39 |     # updates are saved at the end of each epoch, so setting this to a
 40 |     # smaller number means getting more frequent TensorBoard updates.
 41 |     # Validation stats are also calculated at each epoch end and they
 42 |     # might take a while, so don't set this too small to avoid spending
 43 |     # a lot of time on validation stats.
 44 |     STEPS_PER_EPOCH = 1000
 45 | 
 46 |     # Number of validation steps to run at the end of every training epoch.
 47 |     # A bigger number improves accuracy of validation stats, but slows
 48 |     # down the training.
 49 |     VALIDATION_STEPS = 50
 50 | 
 51 |     # The strides of each layer of the FPN Pyramid. These values
 52 |     # are based on a Resnet101 backbone.
 53 |     BACKBONE_STRIDES = [4, 8, 16, 32, 64]
 54 | 
 55 |     # Number of classification classes (including background)
 56 |     NUM_CLASSES = 1  # Override in sub-classes
 57 | 
 58 |     # Length of square anchor side in pixels
 59 |     RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
 60 | 
 61 |     # Ratios of anchors at each cell (width/height)
 62 |     # A value of 1 represents a square anchor, and 0.5 is a wide anchor
 63 |     RPN_ANCHOR_RATIOS = [0.5, 1, 2]
 64 | 
 65 |     # Anchor stride
 66 |     # If 1 then anchors are created for each cell in the backbone feature map.
 67 |     # If 2, then anchors are created for every other cell, and so on.
 68 |     RPN_ANCHOR_STRIDE = 1
 69 | 
 70 |     # Non-max suppression threshold to filter RPN proposals.
 71 |     # You can reduce this during training to generate more propsals.
 72 |     RPN_NMS_THRESHOLD = 0.7
 73 | 
 74 |     # How many anchors per image to use for RPN training
 75 |     RPN_TRAIN_ANCHORS_PER_IMAGE = 256
 76 | 
 77 |     # ROIs kept after non-maximum supression (training and inference)
 78 |     POST_NMS_ROIS_TRAINING = 2000
 79 |     POST_NMS_ROIS_INFERENCE = 1000
 80 | 
 81 |     # If enabled, resizes instance masks to a smaller size to reduce
 82 |     # memory load. Recommended when using high-resolution images.
 83 |     USE_MINI_MASK = True
 84 |     MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
 85 | 
 86 |     # Input image resing
 87 |     # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and
 88 |     # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't
 89 |     # be satisfied together the IMAGE_MAX_DIM is enforced.
 90 |     IMAGE_MIN_DIM = 800
 91 |     IMAGE_MAX_DIM = 1024
 92 |     # If True, pad images with zeros such that they're (max_dim by max_dim)
 93 |     IMAGE_PADDING = True  # currently, the False option is not supported
 94 | 
 95 |     # Image mean (RGB)
 96 |     MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
 97 | 
 98 |     # Number of ROIs per image to feed to classifier/mask heads
 99 |     # The Mask RCNN paper uses 512 but often the RPN doesn't generate
100 |     # enough positive proposals to fill this and keep a positive:negative
101 |     # ratio of 1:3. You can increase the number of proposals by adjusting
102 |     # the RPN NMS threshold.
103 |     TRAIN_ROIS_PER_IMAGE = 200
104 | 
105 |     # Percent of positive ROIs used to train classifier/mask heads
106 |     ROI_POSITIVE_RATIO = 0.33
107 | 
108 |     # Pooled ROIs
109 |     POOL_SIZE = 7
110 |     MASK_POOL_SIZE = 14
111 |     MASK_SHAPE = [28, 28]
112 | 
113 |     # Maximum number of ground truth instances to use in one image
114 |     MAX_GT_INSTANCES = 100
115 | 
116 |     # Bounding box refinement standard deviation for RPN and final detections.
117 |     RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
118 |     BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
119 | 
120 |     # Max number of final detections
121 |     DETECTION_MAX_INSTANCES = 100
122 | 
123 |     # Minimum probability value to accept a detected instance
124 |     # ROIs below this threshold are skipped
125 |     DETECTION_MIN_CONFIDENCE = 0.7
126 | 
127 |     # Non-maximum suppression threshold for detection
128 |     DETECTION_NMS_THRESHOLD = 0.3
129 | 
130 |     # Learning rate and momentum
131 |     # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
132 |     # weights to explode. Likely due to differences in optimzer
133 |     # implementation.
134 |     LEARNING_RATE = 0.001
135 |     LEARNING_MOMENTUM = 0.9
136 | 
137 |     # Weight decay regularization
138 |     WEIGHT_DECAY = 0.0001
139 | 
140 |     # Use RPN ROIs or externally generated ROIs for training
141 |     # Keep this True for most situations. Set to False if you want to train
142 |     # the head branches on ROI generated by code rather than the ROIs from
143 |     # the RPN. For example, to debug the classifier head without having to
144 |     # train the RPN.
145 |     USE_RPN_ROIS = True
146 | 
147 |     def __init__(self):
148 |         """Set values of computed attributes."""
149 |         # Effective batch size
150 |         self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
151 | 
152 |         # Input image size
153 |         self.IMAGE_SHAPE = np.array(
154 |             [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
155 | 
156 |         # Compute backbone size from input image size
157 |         self.BACKBONE_SHAPES = np.array(
158 |             [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)),
159 |               int(math.ceil(self.IMAGE_SHAPE[1] / stride))]
160 |              for stride in self.BACKBONE_STRIDES])
161 | 
162 |     def display(self):
163 |         """Display Configuration values."""
164 |         print("\nConfigurations:")
165 |         for a in dir(self):
166 |             if not a.startswith("__") and not callable(getattr(self, a)):
167 |                 print("{:30} {}".format(a, getattr(self, a)))
168 |         print("\n")
169 | 


--------------------------------------------------------------------------------
/nodes/parallel_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Multi-GPU Support for Keras.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | 
  9 | Ideas and a small code snippets from these sources:
 10 | https://github.com/fchollet/keras/issues/2436
 11 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012
 12 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/
 13 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py
 14 | """
 15 | 
 16 | import tensorflow as tf
 17 | import keras.backend as K
 18 | import keras.layers as KL
 19 | import keras.models as KM
 20 | 
 21 | 
 22 | class ParallelModel(KM.Model):
 23 |     """Subclasses the standard Keras Model and adds multi-GPU support.
 24 |     It works by creating a copy of the model on each GPU. Then it slices
 25 |     the inputs and sends a slice to each copy of the model, and then
 26 |     merges the outputs together and applies the loss on the combined
 27 |     outputs.
 28 |     """
 29 | 
 30 |     def __init__(self, keras_model, gpu_count):
 31 |         """Class constructor.
 32 |         keras_model: The Keras model to parallelize
 33 |         gpu_count: Number of GPUs. Must be > 1
 34 |         """
 35 |         self.inner_model = keras_model
 36 |         self.gpu_count = gpu_count
 37 |         merged_outputs = self.make_parallel()
 38 |         super(ParallelModel, self).__init__(inputs=self.inner_model.inputs,
 39 |                                             outputs=merged_outputs)
 40 | 
 41 |     def __getattribute__(self, attrname):
 42 |         """Redirect loading and saving methods to the inner model. That's where
 43 |         the weights are stored."""
 44 |         if 'load' in attrname or 'save' in attrname:
 45 |             return getattr(self.inner_model, attrname)
 46 |         return super(ParallelModel, self).__getattribute__(attrname)
 47 | 
 48 |     def summary(self, *args, **kwargs):
 49 |         """Override summary() to display summaries of both, the wrapper
 50 |         and inner models."""
 51 |         super(ParallelModel, self).summary(*args, **kwargs)
 52 |         self.inner_model.summary(*args, **kwargs)
 53 | 
 54 |     def make_parallel(self):
 55 |         """Creates a new wrapper model that consists of multiple replicas of
 56 |         the original model placed on different GPUs.
 57 |         """
 58 |         # Slice inputs. Slice inputs on the CPU to avoid sending a copy
 59 |         # of the full inputs to all GPUs. Saves on bandwidth and memory.
 60 |         input_slices = {name: tf.split(x, self.gpu_count)
 61 |                         for name, x in zip(self.inner_model.input_names,
 62 |                                            self.inner_model.inputs)}
 63 | 
 64 |         output_names = self.inner_model.output_names
 65 |         outputs_all = []
 66 |         for i in range(len(self.inner_model.outputs)):
 67 |             outputs_all.append([])
 68 | 
 69 |         # Run the model call() on each GPU to place the ops there
 70 |         for i in range(self.gpu_count):
 71 |             with tf.device('/gpu:%d' % i):
 72 |                 with tf.name_scope('tower_%d' % i):
 73 |                     # Run a slice of inputs through this replica
 74 |                     zipped_inputs = zip(self.inner_model.input_names,
 75 |                                         self.inner_model.inputs)
 76 |                     inputs = [
 77 |                         KL.Lambda(lambda s: input_slices[name][i],
 78 |                                   output_shape=lambda s: (None,) + s[1:])(tensor)
 79 |                         for name, tensor in zipped_inputs]
 80 |                     # Create the model replica and get the outputs
 81 |                     outputs = self.inner_model(inputs)
 82 |                     if not isinstance(outputs, list):
 83 |                         outputs = [outputs]
 84 |                     # Save the outputs for merging back together later
 85 |                     for l, o in enumerate(outputs):
 86 |                         outputs_all[l].append(o)
 87 | 
 88 |         # Merge outputs on CPU
 89 |         with tf.device('/cpu:0'):
 90 |             merged = []
 91 |             for outputs, name in zip(outputs_all, output_names):
 92 |                 # If outputs are numbers without dimensions, add a batch dim.
 93 |                 def add_dim(tensor):
 94 |                     """Add a dimension to tensors that don't have any."""
 95 |                     if K.int_shape(tensor) == ():
 96 |                         return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor)
 97 |                     return tensor
 98 |                 outputs = list(map(add_dim, outputs))
 99 | 
100 |                 # Concatenate
101 |                 merged.append(KL.Concatenate(axis=0, name=name)(outputs))
102 |         return merged
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     # Testing code below. It creates a simple model to train on MNIST and
107 |     # tries to run it on 2 GPUs. It saves the graph so it can be viewed
108 |     # in TensorBoard. Run it as:
109 |     #
110 |     # python3 parallel_model.py
111 | 
112 |     import os
113 |     import numpy as np
114 |     import keras.optimizers
115 |     from keras.datasets import mnist
116 |     from keras.preprocessing.image import ImageDataGenerator
117 | 
118 |     GPU_COUNT = 2
119 | 
120 |     # Root directory of the project
121 |     ROOT_DIR = os.getcwd()
122 | 
123 |     # Directory to save logs and trained model
124 |     MODEL_DIR = os.path.join(ROOT_DIR, "logs/parallel")
125 | 
126 |     def build_model(x_train, num_classes):
127 |         # Reset default graph. Keras leaves old ops in the graph,
128 |         # which are ignored for execution but clutter graph
129 |         # visualization in TensorBoard.
130 |         tf.reset_default_graph()
131 | 
132 |         inputs = KL.Input(shape=x_train.shape[1:], name="input_image")
133 |         x = KL.Conv2D(32, (3, 3), activation='relu', padding="same",
134 |                       name="conv1")(inputs)
135 |         x = KL.Conv2D(64, (3, 3), activation='relu', padding="same",
136 |                       name="conv2")(x)
137 |         x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x)
138 |         x = KL.Flatten(name="flat1")(x)
139 |         x = KL.Dense(128, activation='relu', name="dense1")(x)
140 |         x = KL.Dense(num_classes, activation='softmax', name="dense2")(x)
141 | 
142 |         return KM.Model(inputs, x, "digit_classifier_model")
143 | 
144 |     # Load MNIST Data
145 |     (x_train, y_train), (x_test, y_test) = mnist.load_data()
146 |     x_train = np.expand_dims(x_train, -1).astype('float32') / 255
147 |     x_test = np.expand_dims(x_test, -1).astype('float32') / 255
148 | 
149 |     print('x_train shape:', x_train.shape)
150 |     print('x_test shape:', x_test.shape)
151 | 
152 |     # Build data generator and model
153 |     datagen = ImageDataGenerator()
154 |     model = build_model(x_train, 10)
155 | 
156 |     # Add multi-GPU support.
157 |     model = ParallelModel(model, GPU_COUNT)
158 | 
159 |     optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0)
160 | 
161 |     model.compile(loss='sparse_categorical_crossentropy',
162 |                   optimizer=optimizer, metrics=['accuracy'])
163 | 
164 |     model.summary()
165 | 
166 |     # Train
167 |     model.fit_generator(
168 |         datagen.flow(x_train, y_train, batch_size=64),
169 |         steps_per_epoch=50, epochs=10, verbose=1,
170 |         validation_data=(x_test, y_test),
171 |         callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
172 |                                                write_graph=True)]
173 |     )
174 | 


--------------------------------------------------------------------------------
/src/mask_rcnn_ros/parallel_model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Multi-GPU Support for Keras.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | 
  9 | Ideas and a small code snippets from these sources:
 10 | https://github.com/fchollet/keras/issues/2436
 11 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012
 12 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/
 13 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py
 14 | """
 15 | 
 16 | import tensorflow as tf
 17 | import keras.backend as K
 18 | import keras.layers as KL
 19 | import keras.models as KM
 20 | 
 21 | 
 22 | class ParallelModel(KM.Model):
 23 |     """Subclasses the standard Keras Model and adds multi-GPU support.
 24 |     It works by creating a copy of the model on each GPU. Then it slices
 25 |     the inputs and sends a slice to each copy of the model, and then
 26 |     merges the outputs together and applies the loss on the combined
 27 |     outputs.
 28 |     """
 29 | 
 30 |     def __init__(self, keras_model, gpu_count):
 31 |         """Class constructor.
 32 |         keras_model: The Keras model to parallelize
 33 |         gpu_count: Number of GPUs. Must be > 1
 34 |         """
 35 |         self.inner_model = keras_model
 36 |         self.gpu_count = gpu_count
 37 |         merged_outputs = self.make_parallel()
 38 |         super(ParallelModel, self).__init__(inputs=self.inner_model.inputs,
 39 |                                             outputs=merged_outputs)
 40 | 
 41 |     def __getattribute__(self, attrname):
 42 |         """Redirect loading and saving methods to the inner model. That's where
 43 |         the weights are stored."""
 44 |         if 'load' in attrname or 'save' in attrname:
 45 |             return getattr(self.inner_model, attrname)
 46 |         return super(ParallelModel, self).__getattribute__(attrname)
 47 | 
 48 |     def summary(self, *args, **kwargs):
 49 |         """Override summary() to display summaries of both, the wrapper
 50 |         and inner models."""
 51 |         super(ParallelModel, self).summary(*args, **kwargs)
 52 |         self.inner_model.summary(*args, **kwargs)
 53 | 
 54 |     def make_parallel(self):
 55 |         """Creates a new wrapper model that consists of multiple replicas of
 56 |         the original model placed on different GPUs.
 57 |         """
 58 |         # Slice inputs. Slice inputs on the CPU to avoid sending a copy
 59 |         # of the full inputs to all GPUs. Saves on bandwidth and memory.
 60 |         input_slices = {name: tf.split(x, self.gpu_count)
 61 |                         for name, x in zip(self.inner_model.input_names,
 62 |                                            self.inner_model.inputs)}
 63 | 
 64 |         output_names = self.inner_model.output_names
 65 |         outputs_all = []
 66 |         for i in range(len(self.inner_model.outputs)):
 67 |             outputs_all.append([])
 68 | 
 69 |         # Run the model call() on each GPU to place the ops there
 70 |         for i in range(self.gpu_count):
 71 |             with tf.device('/gpu:%d' % i):
 72 |                 with tf.name_scope('tower_%d' % i):
 73 |                     # Run a slice of inputs through this replica
 74 |                     zipped_inputs = zip(self.inner_model.input_names,
 75 |                                         self.inner_model.inputs)
 76 |                     inputs = [
 77 |                         KL.Lambda(lambda s: input_slices[name][i],
 78 |                                   output_shape=lambda s: (None,) + s[1:])(tensor)
 79 |                         for name, tensor in zipped_inputs]
 80 |                     # Create the model replica and get the outputs
 81 |                     outputs = self.inner_model(inputs)
 82 |                     if not isinstance(outputs, list):
 83 |                         outputs = [outputs]
 84 |                     # Save the outputs for merging back together later
 85 |                     for l, o in enumerate(outputs):
 86 |                         outputs_all[l].append(o)
 87 | 
 88 |         # Merge outputs on CPU
 89 |         with tf.device('/cpu:0'):
 90 |             merged = []
 91 |             for outputs, name in zip(outputs_all, output_names):
 92 |                 # If outputs are numbers without dimensions, add a batch dim.
 93 |                 def add_dim(tensor):
 94 |                     """Add a dimension to tensors that don't have any."""
 95 |                     if K.int_shape(tensor) == ():
 96 |                         return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor)
 97 |                     return tensor
 98 |                 outputs = list(map(add_dim, outputs))
 99 | 
100 |                 # Concatenate
101 |                 merged.append(KL.Concatenate(axis=0, name=name)(outputs))
102 |         return merged
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     # Testing code below. It creates a simple model to train on MNIST and
107 |     # tries to run it on 2 GPUs. It saves the graph so it can be viewed
108 |     # in TensorBoard. Run it as:
109 |     #
110 |     # python3 parallel_model.py
111 | 
112 |     import os
113 |     import numpy as np
114 |     import keras.optimizers
115 |     from keras.datasets import mnist
116 |     from keras.preprocessing.image import ImageDataGenerator
117 | 
118 |     GPU_COUNT = 2
119 | 
120 |     # Root directory of the project
121 |     ROOT_DIR = os.getcwd()
122 | 
123 |     # Directory to save logs and trained model
124 |     MODEL_DIR = os.path.join(ROOT_DIR, "logs/parallel")
125 | 
126 |     def build_model(x_train, num_classes):
127 |         # Reset default graph. Keras leaves old ops in the graph,
128 |         # which are ignored for execution but clutter graph
129 |         # visualization in TensorBoard.
130 |         tf.reset_default_graph()
131 | 
132 |         inputs = KL.Input(shape=x_train.shape[1:], name="input_image")
133 |         x = KL.Conv2D(32, (3, 3), activation='relu', padding="same",
134 |                       name="conv1")(inputs)
135 |         x = KL.Conv2D(64, (3, 3), activation='relu', padding="same",
136 |                       name="conv2")(x)
137 |         x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x)
138 |         x = KL.Flatten(name="flat1")(x)
139 |         x = KL.Dense(128, activation='relu', name="dense1")(x)
140 |         x = KL.Dense(num_classes, activation='softmax', name="dense2")(x)
141 | 
142 |         return KM.Model(inputs, x, "digit_classifier_model")
143 | 
144 |     # Load MNIST Data
145 |     (x_train, y_train), (x_test, y_test) = mnist.load_data()
146 |     x_train = np.expand_dims(x_train, -1).astype('float32') / 255
147 |     x_test = np.expand_dims(x_test, -1).astype('float32') / 255
148 | 
149 |     print('x_train shape:', x_train.shape)
150 |     print('x_test shape:', x_test.shape)
151 | 
152 |     # Build data generator and model
153 |     datagen = ImageDataGenerator()
154 |     model = build_model(x_train, 10)
155 | 
156 |     # Add multi-GPU support.
157 |     model = ParallelModel(model, GPU_COUNT)
158 | 
159 |     optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0)
160 | 
161 |     model.compile(loss='sparse_categorical_crossentropy',
162 |                   optimizer=optimizer, metrics=['accuracy'])
163 | 
164 |     model.summary()
165 | 
166 |     # Train
167 |     model.fit_generator(
168 |         datagen.flow(x_train, y_train, batch_size=64),
169 |         steps_per_epoch=50, epochs=10, verbose=1,
170 |         validation_data=(x_test, y_test),
171 |         callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
172 |                                                write_graph=True)]
173 |     )
174 | 


--------------------------------------------------------------------------------
/nodes/mask_rcnn_node:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import os
  3 | import threading
  4 | import numpy as np
  5 | 
  6 | import cv2
  7 | from cv_bridge import CvBridge
  8 | import rospy
  9 | from sensor_msgs.msg import Image
 10 | from sensor_msgs.msg import RegionOfInterest
 11 | 
 12 | import coco
 13 | import utils
 14 | import model as modellib
 15 | import visualize
 16 | from mask_rcnn_ros.msg import Result
 17 | 
 18 | 
 19 | # Local path to trained weights file
 20 | ROS_HOME = os.environ.get('ROS_HOME', os.path.join(os.environ['HOME'], '.ros'))
 21 | COCO_MODEL_PATH = os.path.join(ROS_HOME, 'mask_rcnn_coco.h5')
 22 | 
 23 | # COCO Class names
 24 | # Index of the class in the list is its ID. For example, to get ID of
 25 | # the teddy bear class, use: CLASS_NAMES.index('teddy bear')
 26 | CLASS_NAMES = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
 27 |                'bus', 'train', 'truck', 'boat', 'traffic light',
 28 |                'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
 29 |                'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
 30 |                'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
 31 |                'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
 32 |                'kite', 'baseball bat', 'baseball glove', 'skateboard',
 33 |                'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
 34 |                'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
 35 |                'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
 36 |                'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
 37 |                'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
 38 |                'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
 39 |                'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
 40 |                'teddy bear', 'hair drier', 'toothbrush']
 41 | 
 42 | 
 43 | class InferenceConfig(coco.CocoConfig):
 44 |     # Set batch size to 1 since we'll be running inference on
 45 |     # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
 46 |     GPU_COUNT = 1
 47 |     IMAGES_PER_GPU = 1
 48 | 
 49 | 
 50 | class MaskRCNNNode(object):
 51 |     def __init__(self):
 52 |         self._cv_bridge = CvBridge()
 53 | 
 54 |         config = InferenceConfig()
 55 |         config.display()
 56 | 
 57 |         self._visualization = rospy.get_param('~visualization', True)
 58 | 
 59 |         # Create model object in inference mode.
 60 |         self._model = modellib.MaskRCNN(mode="inference", model_dir="",
 61 |                                         config=config)
 62 |         # Load weights trained on MS-COCO
 63 |         model_path = rospy.get_param('~model_path', COCO_MODEL_PATH)
 64 |         # Download COCO trained weights from Releases if needed
 65 |         if model_path == COCO_MODEL_PATH and not os.path.exists(COCO_MODEL_PATH):
 66 |             utils.download_trained_weights(COCO_MODEL_PATH)
 67 | 
 68 |         self._model.load_weights(model_path, by_name=True)
 69 | 
 70 |         self._class_names = rospy.get_param('~class_names', CLASS_NAMES)
 71 | 
 72 |         self._last_msg = None
 73 |         self._msg_lock = threading.Lock()
 74 | 
 75 |         self._class_colors = visualize.random_colors(len(CLASS_NAMES))
 76 | 
 77 |         self._publish_rate = rospy.get_param('~publish_rate', 100)
 78 | 
 79 |     def run(self):
 80 |         self._result_pub = rospy.Publisher('~result', Result, queue_size=1)
 81 |         vis_pub = rospy.Publisher('~visualization', Image, queue_size=1)
 82 |         rospy.Subscriber('~input', Image,
 83 |                          self._image_callback, queue_size=1)
 84 | 
 85 |         rate = rospy.Rate(self._publish_rate)
 86 |         while not rospy.is_shutdown():
 87 |             if self._msg_lock.acquire(False):
 88 |                 msg = self._last_msg
 89 |                 self._last_msg = None
 90 |                 self._msg_lock.release()
 91 |             else:
 92 |                 rate.sleep()
 93 |                 continue
 94 | 
 95 |             if msg is not None:
 96 |                 np_image = self._cv_bridge.imgmsg_to_cv2(msg, 'bgr8')
 97 | 
 98 |                 # Run detection
 99 |                 results = self._model.detect([np_image], verbose=0)
100 |                 result = results[0]
101 |                 result_msg = self._build_result_msg(msg, result)
102 |                 self._result_pub.publish(result_msg)
103 | 
104 |                 # Visualize results
105 |                 if self._visualization:
106 |                     cv_result = self._visualize_cv(result, np_image)
107 |                     image_msg = self._cv_bridge.cv2_to_imgmsg(cv_result, 'bgr8')
108 |                     vis_pub.publish(image_msg)
109 | 
110 |             rate.sleep()
111 | 
112 |     def _build_result_msg(self, msg, result):
113 |         result_msg = Result()
114 |         result_msg.header = msg.header
115 |         for i, (y1, x1, y2, x2) in enumerate(result['rois']):
116 |             box = RegionOfInterest()
117 |             box.x_offset = np.asscalar(x1)
118 |             box.y_offset = np.asscalar(y1)
119 |             box.height = np.asscalar(y2 - y1)
120 |             box.width = np.asscalar(x2 - x1)
121 |             result_msg.boxes.append(box)
122 | 
123 |             class_id = result['class_ids'][i]
124 |             result_msg.class_ids.append(class_id)
125 | 
126 |             class_name = self._class_names[class_id]
127 |             result_msg.class_names.append(class_name)
128 | 
129 |             score = result['scores'][i]
130 |             result_msg.scores.append(score)
131 | 
132 |             mask = Image()
133 |             mask.header = msg.header
134 |             mask.height = result['masks'].shape[0]
135 |             mask.width = result['masks'].shape[1]
136 |             mask.encoding = "mono8"
137 |             mask.is_bigendian = False
138 |             mask.step = mask.width
139 |             mask.data = (result['masks'][:, :, i] * 255).tobytes()
140 |             result_msg.masks.append(mask)
141 |         return result_msg
142 | 
143 |     def _visualize(self, result, image):
144 |         from matplotlib.backends.backend_agg import FigureCanvasAgg
145 |         from matplotlib.figure import Figure
146 | 
147 |         fig = Figure()
148 |         canvas = FigureCanvasAgg(fig)
149 |         axes = fig.gca()
150 |         visualize.display_instances(image, result['rois'], result['masks'],
151 |                                     result['class_ids'], CLASS_NAMES,
152 |                                     result['scores'], ax=axes,
153 |                                     class_colors=self._class_colors)
154 |         fig.tight_layout()
155 |         canvas.draw()
156 |         result = np.fromstring(canvas.tostring_rgb(), dtype='uint8')
157 | 
158 |         _, _, w, h = fig.bbox.bounds
159 |         result = result.reshape((int(h), int(w), 3))
160 |         return result
161 | 
162 |     def _visualize_cv(self, result, image):
163 | 
164 |         image = visualize.display_instances_cv(image, result['rois'], result['masks'],
165 |                                                result['class_ids'], CLASS_NAMES,
166 |                                                result['scores'],
167 |                                                class_colors=self._class_colors)
168 | 
169 |         return image
170 | 
171 |     def _image_callback(self, msg):
172 |         rospy.logdebug("Get an image")
173 |         if self._msg_lock.acquire(False):
174 |             self._last_msg = msg
175 |             self._msg_lock.release()
176 | 
177 | 
178 | def main():
179 |     rospy.init_node('mask_rcnn')
180 | 
181 |     node = MaskRCNNNode()
182 |     node.run()
183 | 
184 | 
185 | if __name__ == '__main__':
186 |     main()
187 | 


--------------------------------------------------------------------------------
/nodes/shapes.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Configurations and data loading code for the synthetic Shapes dataset.
  4 | This is a duplicate of the code in the noteobook train_shapes.ipynb for easy
  5 | import into other notebooks, such as inspect_model.ipynb.
  6 | 
  7 | Copyright (c) 2017 Matterport, Inc.
  8 | Licensed under the MIT License (see LICENSE for details)
  9 | Written by Waleed Abdulla
 10 | """
 11 | 
 12 | import math
 13 | import random
 14 | import numpy as np
 15 | import cv2
 16 | 
 17 | from config import Config
 18 | import utils
 19 | 
 20 | 
 21 | class ShapesConfig(Config):
 22 |     """Configuration for training on the toy shapes dataset.
 23 |     Derives from the base Config class and overrides values specific
 24 |     to the toy shapes dataset.
 25 |     """
 26 |     # Give the configuration a recognizable name
 27 |     NAME = "shapes"
 28 | 
 29 |     # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
 30 |     # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
 31 |     GPU_COUNT = 1
 32 |     IMAGES_PER_GPU = 8
 33 | 
 34 |     # Number of classes (including background)
 35 |     NUM_CLASSES = 1 + 3  # background + 3 shapes
 36 | 
 37 |     # Use small images for faster training. Set the limits of the small side
 38 |     # the large side, and that determines the image shape.
 39 |     IMAGE_MIN_DIM = 128
 40 |     IMAGE_MAX_DIM = 128
 41 | 
 42 |     # Use smaller anchors because our image and objects are small
 43 |     RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels
 44 | 
 45 |     # Reduce training ROIs per image because the images are small and have
 46 |     # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
 47 |     TRAIN_ROIS_PER_IMAGE = 32
 48 | 
 49 |     # Use a small epoch since the data is simple
 50 |     STEPS_PER_EPOCH = 100
 51 | 
 52 |     # use small validation steps since the epoch is small
 53 |     VALIDATION_STEPS = 5
 54 | 
 55 | 
 56 | class ShapesDataset(utils.Dataset):
 57 |     """Generates the shapes synthetic dataset. The dataset consists of simple
 58 |     shapes (triangles, squares, circles) placed randomly on a blank surface.
 59 |     The images are generated on the fly. No file access required.
 60 |     """
 61 | 
 62 |     def load_shapes(self, count, height, width):
 63 |         """Generate the requested number of synthetic images.
 64 |         count: number of images to generate.
 65 |         height, width: the size of the generated images.
 66 |         """
 67 |         # Add classes
 68 |         self.add_class("shapes", 1, "square")
 69 |         self.add_class("shapes", 2, "circle")
 70 |         self.add_class("shapes", 3, "triangle")
 71 | 
 72 |         # Add images
 73 |         # Generate random specifications of images (i.e. color and
 74 |         # list of shapes sizes and locations). This is more compact than
 75 |         # actual images. Images are generated on the fly in load_image().
 76 |         for i in range(count):
 77 |             bg_color, shapes = self.random_image(height, width)
 78 |             self.add_image("shapes", image_id=i, path=None,
 79 |                            width=width, height=height,
 80 |                            bg_color=bg_color, shapes=shapes)
 81 | 
 82 |     def load_image(self, image_id):
 83 |         """Generate an image from the specs of the given image ID.
 84 |         Typically this function loads the image from a file, but
 85 |         in this case it generates the image on the fly from the
 86 |         specs in image_info.
 87 |         """
 88 |         info = self.image_info[image_id]
 89 |         bg_color = np.array(info['bg_color']).reshape([1, 1, 3])
 90 |         image = np.ones([info['height'], info['width'], 3], dtype=np.uint8)
 91 |         image = image * bg_color.astype(np.uint8)
 92 |         for shape, color, dims in info['shapes']:
 93 |             image = self.draw_shape(image, shape, dims, color)
 94 |         return image
 95 | 
 96 |     def image_reference(self, image_id):
 97 |         """Return the shapes data of the image."""
 98 |         info = self.image_info[image_id]
 99 |         if info["source"] == "shapes":
100 |             return info["shapes"]
101 |         else:
102 |             super(self.__class__).image_reference(self, image_id)
103 | 
104 |     def load_mask(self, image_id):
105 |         """Generate instance masks for shapes of the given image ID.
106 |         """
107 |         info = self.image_info[image_id]
108 |         shapes = info['shapes']
109 |         count = len(shapes)
110 |         mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
111 |         for i, (shape, _, dims) in enumerate(info['shapes']):
112 |             mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(),
113 |                                                   shape, dims, 1)
114 |         # Handle occlusions
115 |         occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
116 |         for i in range(count - 2, -1, -1):
117 |             mask[:, :, i] = mask[:, :, i] * occlusion
118 |             occlusion = np.logical_and(
119 |                 occlusion, np.logical_not(mask[:, :, i]))
120 |         # Map class names to class IDs.
121 |         class_ids = np.array([self.class_names.index(s[0]) for s in shapes])
122 |         return mask, class_ids.astype(np.int32)
123 | 
124 |     def draw_shape(self, image, shape, dims, color):
125 |         """Draws a shape from the given specs."""
126 |         # Get the center x, y and the size s
127 |         x, y, s = dims
128 |         if shape == 'square':
129 |             image = cv2.rectangle(image, (x - s, y - s),
130 |                                   (x + s, y + s), color, -1)
131 |         elif shape == "circle":
132 |             image = cv2.circle(image, (x, y), s, color, -1)
133 |         elif shape == "triangle":
134 |             points = np.array([[(x, y - s),
135 |                                 (x - s / math.sin(math.radians(60)), y + s),
136 |                                 (x + s / math.sin(math.radians(60)), y + s),
137 |                                 ]], dtype=np.int32)
138 |             image = cv2.fillPoly(image, points, color)
139 |         return image
140 | 
141 |     def random_shape(self, height, width):
142 |         """Generates specifications of a random shape that lies within
143 |         the given height and width boundaries.
144 |         Returns a tuple of three valus:
145 |         * The shape name (square, circle, ...)
146 |         * Shape color: a tuple of 3 values, RGB.
147 |         * Shape dimensions: A tuple of values that define the shape size
148 |                             and location. Differs per shape type.
149 |         """
150 |         # Shape
151 |         shape = random.choice(["square", "circle", "triangle"])
152 |         # Color
153 |         color = tuple([random.randint(0, 255) for _ in range(3)])
154 |         # Center x, y
155 |         buffer = 20
156 |         y = random.randint(buffer, height - buffer - 1)
157 |         x = random.randint(buffer, width - buffer - 1)
158 |         # Size
159 |         s = random.randint(buffer, height // 4)
160 |         return shape, color, (x, y, s)
161 | 
162 |     def random_image(self, height, width):
163 |         """Creates random specifications of an image with multiple shapes.
164 |         Returns the background color of the image and a list of shape
165 |         specifications that can be used to draw the image.
166 |         """
167 |         # Pick random background color
168 |         bg_color = np.array([random.randint(0, 255) for _ in range(3)])
169 |         # Generate a few random shapes and record their
170 |         # bounding boxes
171 |         shapes = []
172 |         boxes = []
173 |         N = random.randint(1, 4)
174 |         for _ in range(N):
175 |             shape, color, dims = self.random_shape(height, width)
176 |             shapes.append((shape, color, dims))
177 |             x, y, s = dims
178 |             boxes.append([y - s, x - s, y + s, x + s])
179 |         # Apply non-max suppression wit 0.3 threshold to avoid
180 |         # shapes covering each other
181 |         keep_ixs = utils.non_max_suppression(
182 |             np.array(boxes), np.arange(N), 0.3)
183 |         shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
184 |         return bg_color, shapes
185 | 


--------------------------------------------------------------------------------
/src/mask_rcnn_ros/shapes.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Configurations and data loading code for the synthetic Shapes dataset.
  4 | This is a duplicate of the code in the noteobook train_shapes.ipynb for easy
  5 | import into other notebooks, such as inspect_model.ipynb.
  6 | 
  7 | Copyright (c) 2017 Matterport, Inc.
  8 | Licensed under the MIT License (see LICENSE for details)
  9 | Written by Waleed Abdulla
 10 | """
 11 | 
 12 | import math
 13 | import random
 14 | import numpy as np
 15 | import cv2
 16 | 
 17 | from config import Config
 18 | import utils
 19 | 
 20 | 
 21 | class ShapesConfig(Config):
 22 |     """Configuration for training on the toy shapes dataset.
 23 |     Derives from the base Config class and overrides values specific
 24 |     to the toy shapes dataset.
 25 |     """
 26 |     # Give the configuration a recognizable name
 27 |     NAME = "shapes"
 28 | 
 29 |     # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
 30 |     # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
 31 |     GPU_COUNT = 1
 32 |     IMAGES_PER_GPU = 8
 33 | 
 34 |     # Number of classes (including background)
 35 |     NUM_CLASSES = 1 + 3  # background + 3 shapes
 36 | 
 37 |     # Use small images for faster training. Set the limits of the small side
 38 |     # the large side, and that determines the image shape.
 39 |     IMAGE_MIN_DIM = 128
 40 |     IMAGE_MAX_DIM = 128
 41 | 
 42 |     # Use smaller anchors because our image and objects are small
 43 |     RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels
 44 | 
 45 |     # Reduce training ROIs per image because the images are small and have
 46 |     # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
 47 |     TRAIN_ROIS_PER_IMAGE = 32
 48 | 
 49 |     # Use a small epoch since the data is simple
 50 |     STEPS_PER_EPOCH = 100
 51 | 
 52 |     # use small validation steps since the epoch is small
 53 |     VALIDATION_STEPS = 5
 54 | 
 55 | 
 56 | class ShapesDataset(utils.Dataset):
 57 |     """Generates the shapes synthetic dataset. The dataset consists of simple
 58 |     shapes (triangles, squares, circles) placed randomly on a blank surface.
 59 |     The images are generated on the fly. No file access required.
 60 |     """
 61 | 
 62 |     def load_shapes(self, count, height, width):
 63 |         """Generate the requested number of synthetic images.
 64 |         count: number of images to generate.
 65 |         height, width: the size of the generated images.
 66 |         """
 67 |         # Add classes
 68 |         self.add_class("shapes", 1, "square")
 69 |         self.add_class("shapes", 2, "circle")
 70 |         self.add_class("shapes", 3, "triangle")
 71 | 
 72 |         # Add images
 73 |         # Generate random specifications of images (i.e. color and
 74 |         # list of shapes sizes and locations). This is more compact than
 75 |         # actual images. Images are generated on the fly in load_image().
 76 |         for i in range(count):
 77 |             bg_color, shapes = self.random_image(height, width)
 78 |             self.add_image("shapes", image_id=i, path=None,
 79 |                            width=width, height=height,
 80 |                            bg_color=bg_color, shapes=shapes)
 81 | 
 82 |     def load_image(self, image_id):
 83 |         """Generate an image from the specs of the given image ID.
 84 |         Typically this function loads the image from a file, but
 85 |         in this case it generates the image on the fly from the
 86 |         specs in image_info.
 87 |         """
 88 |         info = self.image_info[image_id]
 89 |         bg_color = np.array(info['bg_color']).reshape([1, 1, 3])
 90 |         image = np.ones([info['height'], info['width'], 3], dtype=np.uint8)
 91 |         image = image * bg_color.astype(np.uint8)
 92 |         for shape, color, dims in info['shapes']:
 93 |             image = self.draw_shape(image, shape, dims, color)
 94 |         return image
 95 | 
 96 |     def image_reference(self, image_id):
 97 |         """Return the shapes data of the image."""
 98 |         info = self.image_info[image_id]
 99 |         if info["source"] == "shapes":
100 |             return info["shapes"]
101 |         else:
102 |             super(self.__class__).image_reference(self, image_id)
103 | 
104 |     def load_mask(self, image_id):
105 |         """Generate instance masks for shapes of the given image ID.
106 |         """
107 |         info = self.image_info[image_id]
108 |         shapes = info['shapes']
109 |         count = len(shapes)
110 |         mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
111 |         for i, (shape, _, dims) in enumerate(info['shapes']):
112 |             mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(),
113 |                                                   shape, dims, 1)
114 |         # Handle occlusions
115 |         occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
116 |         for i in range(count - 2, -1, -1):
117 |             mask[:, :, i] = mask[:, :, i] * occlusion
118 |             occlusion = np.logical_and(
119 |                 occlusion, np.logical_not(mask[:, :, i]))
120 |         # Map class names to class IDs.
121 |         class_ids = np.array([self.class_names.index(s[0]) for s in shapes])
122 |         return mask, class_ids.astype(np.int32)
123 | 
124 |     def draw_shape(self, image, shape, dims, color):
125 |         """Draws a shape from the given specs."""
126 |         # Get the center x, y and the size s
127 |         x, y, s = dims
128 |         if shape == 'square':
129 |             image = cv2.rectangle(image, (x - s, y - s),
130 |                                   (x + s, y + s), color, -1)
131 |         elif shape == "circle":
132 |             image = cv2.circle(image, (x, y), s, color, -1)
133 |         elif shape == "triangle":
134 |             points = np.array([[(x, y - s),
135 |                                 (x - s / math.sin(math.radians(60)), y + s),
136 |                                 (x + s / math.sin(math.radians(60)), y + s),
137 |                                 ]], dtype=np.int32)
138 |             image = cv2.fillPoly(image, points, color)
139 |         return image
140 | 
141 |     def random_shape(self, height, width):
142 |         """Generates specifications of a random shape that lies within
143 |         the given height and width boundaries.
144 |         Returns a tuple of three valus:
145 |         * The shape name (square, circle, ...)
146 |         * Shape color: a tuple of 3 values, RGB.
147 |         * Shape dimensions: A tuple of values that define the shape size
148 |                             and location. Differs per shape type.
149 |         """
150 |         # Shape
151 |         shape = random.choice(["square", "circle", "triangle"])
152 |         # Color
153 |         color = tuple([random.randint(0, 255) for _ in range(3)])
154 |         # Center x, y
155 |         buffer = 20
156 |         y = random.randint(buffer, height - buffer - 1)
157 |         x = random.randint(buffer, width - buffer - 1)
158 |         # Size
159 |         s = random.randint(buffer, height // 4)
160 |         return shape, color, (x, y, s)
161 | 
162 |     def random_image(self, height, width):
163 |         """Creates random specifications of an image with multiple shapes.
164 |         Returns the background color of the image and a list of shape
165 |         specifications that can be used to draw the image.
166 |         """
167 |         # Pick random background color
168 |         bg_color = np.array([random.randint(0, 255) for _ in range(3)])
169 |         # Generate a few random shapes and record their
170 |         # bounding boxes
171 |         shapes = []
172 |         boxes = []
173 |         N = random.randint(1, 4)
174 |         for _ in range(N):
175 |             shape, color, dims = self.random_shape(height, width)
176 |             shapes.append((shape, color, dims))
177 |             x, y, s = dims
178 |             boxes.append([y - s, x - s, y + s, x + s])
179 |         # Apply non-max suppression wit 0.3 threshold to avoid
180 |         # shapes covering each other
181 |         keep_ixs = utils.non_max_suppression(
182 |             np.array(boxes), np.arange(N), 0.3)
183 |         shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
184 |         return bg_color, shapes
185 | 


--------------------------------------------------------------------------------
/nodes/visualize.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Display and Visualization Functions.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import random
 11 | import itertools
 12 | import colorsys
 13 | import numpy as np
 14 | from skimage.measure import find_contours
 15 | import matplotlib.pyplot as plt
 16 | import matplotlib.patches as patches
 17 | import matplotlib.lines as lines
 18 | from matplotlib.patches import Polygon
 19 | import cv2
 20 | import IPython.display
 21 | 
 22 | import utils
 23 | import sys
 24 | sys.path.remove('/opt/ros/indigo/lib/python2.7/dist-packages')
 25 | 
 26 | ############################################################
 27 | #  Visualization
 28 | ############################################################
 29 | 
 30 | def display_images(images, titles=None, cols=4, cmap=None, norm=None,
 31 |                    interpolation=None):
 32 |     """Display the given set of images, optionally with titles.
 33 |     images: list or array of image tensors in HWC format.
 34 |     titles: optional. A list of titles to display with each image.
 35 |     cols: number of images per row
 36 |     cmap: Optional. Color map to use. For example, "Blues".
 37 |     norm: Optional. A Normalize instance to map values to colors.
 38 |     interpolation: Optional. Image interporlation to use for display.
 39 |     """
 40 |     titles = titles if titles is not None else [""] * len(images)
 41 |     rows = len(images) // cols + 1
 42 |     plt.figure(figsize=(14, 14 * rows // cols))
 43 |     i = 1
 44 |     for image, title in zip(images, titles):
 45 |         plt.subplot(rows, cols, i)
 46 |         plt.title(title, fontsize=9)
 47 |         plt.axis('off')
 48 |         plt.imshow(image.astype(np.uint8), cmap=cmap,
 49 |                    norm=norm, interpolation=interpolation)
 50 |         i += 1
 51 |     plt.show()
 52 | 
 53 | 
 54 | def random_colors(N, bright=True):
 55 |     """
 56 |     Generate random colors.
 57 |     To get visually distinct colors, generate them in HSV space then
 58 |     convert to RGB.
 59 |     """
 60 |     brightness = 1.0 if bright else 0.7
 61 |     hsv = [(float(i) / N, 1, brightness) for i in range(N)]
 62 |     colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
 63 |     random.shuffle(colors)
 64 |     return colors
 65 | 
 66 | 
 67 | def apply_mask(image, mask, color, alpha=0.5):
 68 |     """Apply the given mask to the image.
 69 |     """
 70 |     for c in range(3):
 71 |         image[:, :, c] = np.where(mask == 1,
 72 |                                   image[:, :, c] *
 73 |                                   (1 - alpha) + alpha * color[c] * 255,
 74 |                                   image[:, :, c])
 75 |     return image
 76 | 
 77 | 
 78 | def display_instances(image, boxes, masks, class_ids, class_names,
 79 |                       scores=None, title="",
 80 |                       figsize=(16, 16), ax=None, class_colors=None):
 81 |     """
 82 |     boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
 83 |     masks: [height, width, num_instances]
 84 |     class_ids: [num_instances]
 85 |     class_names: list of class names of the dataset
 86 |     scores: (optional) confidence scores for each box
 87 |     figsize: (optional) the size of the image.
 88 |     """
 89 |     # Number of instances
 90 |     N = boxes.shape[0]
 91 |     if not N:
 92 |         print("\n*** No instances to display *** \n")
 93 |     else:
 94 |         assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
 95 | 
 96 |     if not ax:
 97 |         _, ax = plt.subplots(1, figsize=figsize)
 98 | 
 99 |     # Generate random colors
100 |     if class_colors is None:
101 |         colors = random_colors(N)
102 | 
103 |     # Show area outside image boundaries.
104 |     height, width = image.shape[:2]
105 |     ax.set_ylim(height + 10, -10)
106 |     ax.set_xlim(-10, width + 10)
107 |     ax.axis('off')
108 |     ax.set_title(title)
109 | 
110 |     masked_image = image.astype(np.uint32).copy()
111 |     for i in range(N):
112 |         class_id = class_ids[i]
113 |         if class_colors is None:
114 |             color = colors[i]
115 |         else:
116 |             color = class_colors[class_id]
117 | 
118 |         # Bounding box
119 |         if not np.any(boxes[i]):
120 |             # Skip this instance. Has no bbox. Likely lost in image cropping.
121 |             continue
122 |         y1, x1, y2, x2 = boxes[i]
123 |         p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
124 |                               alpha=0.7, linestyle="dashed",
125 |                               edgecolor=color, facecolor='none')
126 |         ax.add_patch(p)
127 | 
128 |         # Label
129 |         score = scores[i] if scores is not None else None
130 |         label = class_names[class_id]
131 |         x = random.randint(x1, (x1 + x2) // 2)
132 |         caption = "{} {:.3f}".format(label, score) if score else label
133 |         ax.text(x1, y1 + 8, caption,
134 |                 color='w', size=11, backgroundcolor="none")
135 | 
136 |         # Mask
137 |         mask = masks[:, :, i]
138 |         masked_image = apply_mask(masked_image, mask, color)
139 | 
140 |         # Mask Polygon
141 |         # Pad to ensure proper polygons for masks that touch image edges.
142 |         padded_mask = np.zeros(
143 |             (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
144 |         padded_mask[1:-1, 1:-1] = mask
145 |         contours = find_contours(padded_mask, 0.5)
146 |         for verts in contours:
147 |             # Subtract the padding and flip (y, x) to (x, y)
148 |             verts = np.fliplr(verts) - 1
149 |             p = Polygon(verts, facecolor="none", edgecolor=color)
150 |             ax.add_patch(p)
151 |     ax.imshow(masked_image.astype(np.uint8))
152 |     #plt.show()
153 | 
154 | 
155 | def display_instances_cv(image, boxes, masks, class_ids, class_names,
156 |                          scores=None, class_colors=None, alpha=0.7):
157 |     """
158 |     boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
159 |     masks: [height, width, num_instances]
160 |     class_ids: [num_instances]
161 |     class_names: list of class names of the dataset
162 |     scores: (optional) confidence scores for each box
163 |     class_colors: a list mapping class ids to their colors
164 |     alpha: the amount of transparency of the mask overlay
165 |     """
166 |     # Number of instances
167 |     n = boxes.shape[0]
168 |     if n:
169 |         assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
170 | 
171 |     # Generate random colors
172 |     if class_colors is None:
173 |         colors = random_colors(n)
174 | 
175 |     for i in range(n):
176 |         class_id = class_ids[i]
177 |         if class_colors is None:
178 |             color = colors[i]
179 |         else:
180 |             color = class_colors[class_id]
181 | 
182 |         # Transform class colors to BGR and rescale [0-255] for OpenCv
183 |         bgr_color = tuple(c*255 for c in color[::-1])
184 | 
185 |         # Draw bounding boxes
186 |         if not np.any(boxes[i]):
187 |             # Skip this instance. Has no bbox. Likely lost in image cropping.
188 |             continue
189 |         y1, x1, y2, x2 = boxes[i]
190 |         cv2.rectangle(image, (x1, y1), (x2, y2), color=bgr_color, thickness=2)
191 | 
192 |         # Draw transparent mask
193 |         overlay = image.copy()
194 |         mask = masks[:, :, i]
195 |         __, thresh = cv2.threshold(mask, 0.5, 1, cv2.THRESH_BINARY)
196 |         _, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
197 |         cv2.drawContours(image, contours, -1, color=bgr_color, thickness=cv2.FILLED)
198 |         cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0, image)
199 | 
200 |         # Draw text label
201 |         score = scores[i] if scores is not None else None
202 |         label = class_names[class_id]
203 |         caption = "{} {:.3f}".format(label, score) if score else label
204 |         cv2.putText(image, caption, (x1, y1 + 12), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5,
205 |                     color=(255, 255, 255))
206 | 
207 |     return image
208 | 
209 | 
210 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10):
211 |     """
212 |     anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates.
213 |     proposals: [n, 4] the same anchors but refined to fit objects better.
214 |     """
215 |     masked_image = image.copy()
216 | 
217 |     # Pick random anchors in case there are too many.
218 |     ids = np.arange(rois.shape[0], dtype=np.int32)
219 |     ids = np.random.choice(
220 |         ids, limit, replace=False) if ids.shape[0] > limit else ids
221 | 
222 |     fig, ax = plt.subplots(1, figsize=(12, 12))
223 |     if rois.shape[0] > limit:
224 |         plt.title("Showing {} random ROIs out of {}".format(
225 |             len(ids), rois.shape[0]))
226 |     else:
227 |         plt.title("{} ROIs".format(len(ids)))
228 | 
229 |     # Show area outside image boundaries.
230 |     ax.set_ylim(image.shape[0] + 20, -20)
231 |     ax.set_xlim(-50, image.shape[1] + 20)
232 |     ax.axis('off')
233 | 
234 |     for i, id in enumerate(ids):
235 |         color = np.random.rand(3)
236 |         class_id = class_ids[id]
237 |         # ROI
238 |         y1, x1, y2, x2 = rois[id]
239 |         p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
240 |                               edgecolor=color if class_id else "gray",
241 |                               facecolor='none', linestyle="dashed")
242 |         ax.add_patch(p)
243 |         # Refined ROI
244 |         if class_id:
245 |             ry1, rx1, ry2, rx2 = refined_rois[id]
246 |             p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
247 |                                   edgecolor=color, facecolor='none')
248 |             ax.add_patch(p)
249 |             # Connect the top-left corners of the anchor and proposal for easy visualization
250 |             ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
251 | 
252 |             # Label
253 |             label = class_names[class_id]
254 |             ax.text(rx1, ry1 + 8, "{}".format(label),
255 |                     color='w', size=11, backgroundcolor="none")
256 | 
257 |             # Mask
258 |             m = utils.unmold_mask(mask[id], rois[id]
259 |                                   [:4].astype(np.int32), image.shape)
260 |             masked_image = apply_mask(masked_image, m, color)
261 | 
262 |     ax.imshow(masked_image)
263 | 
264 |     # Print stats
265 |     print("Positive ROIs: ", class_ids[class_ids > 0].shape[0])
266 |     print("Negative ROIs: ", class_ids[class_ids == 0].shape[0])
267 |     print("Positive Ratio: {:.2f}".format(
268 |         class_ids[class_ids > 0].shape[0] / class_ids.shape[0]))
269 | 
270 | 
271 | # TODO: Replace with matplotlib equivalent?
272 | def draw_box(image, box, color):
273 |     """Draw 3-pixel width bounding boxes on the given image array.
274 |     color: list of 3 int values for RGB.
275 |     """
276 |     y1, x1, y2, x2 = box
277 |     image[y1:y1 + 2, x1:x2] = color
278 |     image[y2:y2 + 2, x1:x2] = color
279 |     image[y1:y2, x1:x1 + 2] = color
280 |     image[y1:y2, x2:x2 + 2] = color
281 |     return image
282 | 
283 | 
284 | def display_top_masks(image, mask, class_ids, class_names, limit=4):
285 |     """Display the given image and the top few class masks."""
286 |     to_display = []
287 |     titles = []
288 |     to_display.append(image)
289 |     titles.append("H x W={}x{}".format(image.shape[0], image.shape[1]))
290 |     # Pick top prominent classes in this image
291 |     unique_class_ids = np.unique(class_ids)
292 |     mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]])
293 |                  for i in unique_class_ids]
294 |     top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area),
295 |                                     key=lambda r: r[1], reverse=True) if v[1] > 0]
296 |     # Generate images and titles
297 |     for i in range(limit):
298 |         class_id = top_ids[i] if i < len(top_ids) else -1
299 |         # Pull masks of instances belonging to the same class.
300 |         m = mask[:, :, np.where(class_ids == class_id)[0]]
301 |         m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1)
302 |         to_display.append(m)
303 |         titles.append(class_names[class_id] if class_id != -1 else "-")
304 |     display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r")
305 | 
306 | 
307 | def plot_precision_recall(AP, precisions, recalls):
308 |     """Draw the precision-recall curve.
309 | 
310 |     AP: Average precision at IoU >= 0.5
311 |     precisions: list of precision values
312 |     recalls: list of recall values
313 |     """
314 |     # Plot the Precision-Recall curve
315 |     _, ax = plt.subplots(1)
316 |     ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP))
317 |     ax.set_ylim(0, 1.1)
318 |     ax.set_xlim(0, 1.1)
319 |     _ = ax.plot(recalls, precisions)
320 | 
321 | 
322 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores,
323 |                   overlaps, class_names, threshold=0.5):
324 |     """Draw a grid showing how ground truth objects are classified.
325 |     gt_class_ids: [N] int. Ground truth class IDs
326 |     pred_class_id: [N] int. Predicted class IDs
327 |     pred_scores: [N] float. The probability scores of predicted classes
328 |     overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictins and GT boxes.
329 |     class_names: list of all class names in the dataset
330 |     threshold: Float. The prediction probability required to predict a class
331 |     """
332 |     gt_class_ids = gt_class_ids[gt_class_ids != 0]
333 |     pred_class_ids = pred_class_ids[pred_class_ids != 0]
334 | 
335 |     plt.figure(figsize=(12, 10))
336 |     plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues)
337 |     plt.yticks(np.arange(len(pred_class_ids)),
338 |                ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i])
339 |                 for i, id in enumerate(pred_class_ids)])
340 |     plt.xticks(np.arange(len(gt_class_ids)),
341 |                [class_names[int(id)] for id in gt_class_ids], rotation=90)
342 | 
343 |     thresh = overlaps.max() / 2.
344 |     for i, j in itertools.product(range(overlaps.shape[0]),
345 |                                   range(overlaps.shape[1])):
346 |         text = ""
347 |         if overlaps[i, j] > threshold:
348 |             text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong"
349 |         color = ("white" if overlaps[i, j] > thresh
350 |                  else "black" if overlaps[i, j] > 0
351 |                  else "grey")
352 |         plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text),
353 |                  horizontalalignment="center", verticalalignment="center",
354 |                  fontsize=9, color=color)
355 | 
356 |     plt.tight_layout()
357 |     plt.xlabel("Ground Truth")
358 |     plt.ylabel("Predictions")
359 | 
360 | 
361 | def draw_boxes(image, boxes=None, refined_boxes=None,
362 |                masks=None, captions=None, visibilities=None,
363 |                title="", ax=None):
364 |     """Draw bounding boxes and segmentation masks with differnt
365 |     customizations.
366 | 
367 |     boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates.
368 |     refined_boxes: Like boxes, but draw with solid lines to show
369 |         that they're the result of refining 'boxes'.
370 |     masks: [N, height, width]
371 |     captions: List of N titles to display on each box
372 |     visibilities: (optional) List of values of 0, 1, or 2. Determine how
373 |         prominant each bounding box should be.
374 |     title: An optional title to show over the image
375 |     ax: (optional) Matplotlib axis to draw on.
376 |     """
377 |     # Number of boxes
378 |     assert boxes is not None or refined_boxes is not None
379 |     N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0]
380 | 
381 |     # Matplotlib Axis
382 |     if not ax:
383 |         _, ax = plt.subplots(1, figsize=(12, 12))
384 | 
385 |     # Generate random colors
386 |     colors = random_colors(N)
387 | 
388 |     # Show area outside image boundaries.
389 |     margin = image.shape[0] // 10
390 |     ax.set_ylim(image.shape[0] + margin, -margin)
391 |     ax.set_xlim(-margin, image.shape[1] + margin)
392 |     ax.axis('off')
393 | 
394 |     ax.set_title(title)
395 | 
396 |     masked_image = image.astype(np.uint32).copy()
397 |     for i in range(N):
398 |         # Box visibility
399 |         visibility = visibilities[i] if visibilities is not None else 1
400 |         if visibility == 0:
401 |             color = "gray"
402 |             style = "dotted"
403 |             alpha = 0.5
404 |         elif visibility == 1:
405 |             color = colors[i]
406 |             style = "dotted"
407 |             alpha = 1
408 |         elif visibility == 2:
409 |             color = colors[i]
410 |             style = "solid"
411 |             alpha = 1
412 | 
413 |         # Boxes
414 |         if boxes is not None:
415 |             if not np.any(boxes[i]):
416 |                 # Skip this instance. Has no bbox. Likely lost in cropping.
417 |                 continue
418 |             y1, x1, y2, x2 = boxes[i]
419 |             p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
420 |                                   alpha=alpha, linestyle=style,
421 |                                   edgecolor=color, facecolor='none')
422 |             ax.add_patch(p)
423 | 
424 |         # Refined boxes
425 |         if refined_boxes is not None and visibility > 0:
426 |             ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32)
427 |             p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
428 |                                   edgecolor=color, facecolor='none')
429 |             ax.add_patch(p)
430 |             # Connect the top-left corners of the anchor and proposal
431 |             if boxes is not None:
432 |                 ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
433 | 
434 |         # Captions
435 |         if captions is not None:
436 |             caption = captions[i]
437 |             # If there are refined boxes, display captions on them
438 |             if refined_boxes is not None:
439 |                 y1, x1, y2, x2 = ry1, rx1, ry2, rx2
440 |             x = random.randint(x1, (x1 + x2) // 2)
441 |             ax.text(x1, y1, caption, size=11, verticalalignment='top',
442 |                     color='w', backgroundcolor="none",
443 |                     bbox={'facecolor': color, 'alpha': 0.5,
444 |                           'pad': 2, 'edgecolor': 'none'})
445 | 
446 |         # Masks
447 |         if masks is not None:
448 |             mask = masks[:, :, i]
449 |             masked_image = apply_mask(masked_image, mask, color)
450 |             # Mask Polygon
451 |             # Pad to ensure proper polygons for masks that touch image edges.
452 |             padded_mask = np.zeros(
453 |                 (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
454 |             padded_mask[1:-1, 1:-1] = mask
455 |             contours = find_contours(padded_mask, 0.5)
456 |             for verts in contours:
457 |                 # Subtract the padding and flip (y, x) to (x, y)
458 |                 verts = np.fliplr(verts) - 1
459 |                 p = Polygon(verts, facecolor="none", edgecolor=color)
460 |                 ax.add_patch(p)
461 |     ax.imshow(masked_image.astype(np.uint8))
462 | 
463 | 
464 | def display_table(table):
465 |     """Display values in a table format.
466 |     table: an iterable of rows, and each row is an iterable of values.
467 |     """
468 |     html = ""
469 |     for row in table:
470 |         row_html = ""
471 |         for col in row:
472 |             row_html += "<td>{:40}</td>".format(str(col))
473 |         html += "<tr>" + row_html + "</tr>"
474 |     html = "<table>" + html + "</table>"
475 |     IPython.display.display(IPython.display.HTML(html))
476 | 
477 | 
478 | def display_weight_stats(model):
479 |     """Scans all the weights in the model and returns a list of tuples
480 |     that contain stats about each weight.
481 |     """
482 |     layers = model.get_trainable_layers()
483 |     table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]]
484 |     for l in layers:
485 |         weight_values = l.get_weights()  # list of Numpy arrays
486 |         weight_tensors = l.weights  # list of TF tensors
487 |         for i, w in enumerate(weight_values):
488 |             weight_name = weight_tensors[i].name
489 |             # Detect problematic layers. Exclude biases of conv layers.
490 |             alert = ""
491 |             if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1):
492 |                 alert += "<span style='color:red'>*** dead?</span>"
493 |             if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000:
494 |                 alert += "<span style='color:red'>*** Overflow?</span>"
495 |             # Add row
496 |             table.append([
497 |                 weight_name + alert,
498 |                 str(w.shape),
499 |                 "{:+9.4f}".format(w.min()),
500 |                 "{:+10.4f}".format(w.max()),
501 |                 "{:+9.4f}".format(w.std()),
502 |             ])
503 |     display_table(table)
504 | 


--------------------------------------------------------------------------------
/src/mask_rcnn_ros/visualize.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Display and Visualization Functions.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import random
 11 | import itertools
 12 | import colorsys
 13 | import numpy as np
 14 | from skimage.measure import find_contours
 15 | import matplotlib.pyplot as plt
 16 | import matplotlib.patches as patches
 17 | import matplotlib.lines as lines
 18 | from matplotlib.patches import Polygon
 19 | import cv2
 20 | import IPython.display
 21 | 
 22 | import utils
 23 | import sys
 24 | sys.path.remove('/opt/ros/indigo/lib/python2.7/dist-packages')
 25 | 
 26 | ############################################################
 27 | #  Visualization
 28 | ############################################################
 29 | 
 30 | def display_images(images, titles=None, cols=4, cmap=None, norm=None,
 31 |                    interpolation=None):
 32 |     """Display the given set of images, optionally with titles.
 33 |     images: list or array of image tensors in HWC format.
 34 |     titles: optional. A list of titles to display with each image.
 35 |     cols: number of images per row
 36 |     cmap: Optional. Color map to use. For example, "Blues".
 37 |     norm: Optional. A Normalize instance to map values to colors.
 38 |     interpolation: Optional. Image interporlation to use for display.
 39 |     """
 40 |     titles = titles if titles is not None else [""] * len(images)
 41 |     rows = len(images) // cols + 1
 42 |     plt.figure(figsize=(14, 14 * rows // cols))
 43 |     i = 1
 44 |     for image, title in zip(images, titles):
 45 |         plt.subplot(rows, cols, i)
 46 |         plt.title(title, fontsize=9)
 47 |         plt.axis('off')
 48 |         plt.imshow(image.astype(np.uint8), cmap=cmap,
 49 |                    norm=norm, interpolation=interpolation)
 50 |         i += 1
 51 |     plt.show()
 52 | 
 53 | 
 54 | def random_colors(N, bright=True):
 55 |     """
 56 |     Generate random colors.
 57 |     To get visually distinct colors, generate them in HSV space then
 58 |     convert to RGB.
 59 |     """
 60 |     brightness = 1.0 if bright else 0.7
 61 |     hsv = [(float(i) / N, 1, brightness) for i in range(N)]
 62 |     colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
 63 |     random.shuffle(colors)
 64 |     return colors
 65 | 
 66 | 
 67 | def apply_mask(image, mask, color, alpha=0.5):
 68 |     """Apply the given mask to the image.
 69 |     """
 70 |     for c in range(3):
 71 |         image[:, :, c] = np.where(mask == 1,
 72 |                                   image[:, :, c] *
 73 |                                   (1 - alpha) + alpha * color[c] * 255,
 74 |                                   image[:, :, c])
 75 |     return image
 76 | 
 77 | 
 78 | def display_instances(image, boxes, masks, class_ids, class_names,
 79 |                       scores=None, title="",
 80 |                       figsize=(16, 16), ax=None, class_colors=None):
 81 |     """
 82 |     boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
 83 |     masks: [height, width, num_instances]
 84 |     class_ids: [num_instances]
 85 |     class_names: list of class names of the dataset
 86 |     scores: (optional) confidence scores for each box
 87 |     figsize: (optional) the size of the image.
 88 |     """
 89 |     # Number of instances
 90 |     N = boxes.shape[0]
 91 |     if not N:
 92 |         print("\n*** No instances to display *** \n")
 93 |     else:
 94 |         assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
 95 | 
 96 |     if not ax:
 97 |         _, ax = plt.subplots(1, figsize=figsize)
 98 | 
 99 |     # Generate random colors
100 |     if class_colors is None:
101 |         colors = random_colors(N)
102 | 
103 |     # Show area outside image boundaries.
104 |     height, width = image.shape[:2]
105 |     ax.set_ylim(height + 10, -10)
106 |     ax.set_xlim(-10, width + 10)
107 |     ax.axis('off')
108 |     ax.set_title(title)
109 | 
110 |     masked_image = image.astype(np.uint32).copy()
111 |     for i in range(N):
112 |         class_id = class_ids[i]
113 |         if class_colors is None:
114 |             color = colors[i]
115 |         else:
116 |             color = class_colors[class_id]
117 | 
118 |         # Bounding box
119 |         if not np.any(boxes[i]):
120 |             # Skip this instance. Has no bbox. Likely lost in image cropping.
121 |             continue
122 |         y1, x1, y2, x2 = boxes[i]
123 |         p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
124 |                               alpha=0.7, linestyle="dashed",
125 |                               edgecolor=color, facecolor='none')
126 |         ax.add_patch(p)
127 | 
128 |         # Label
129 |         score = scores[i] if scores is not None else None
130 |         label = class_names[class_id]
131 |         x = random.randint(x1, (x1 + x2) // 2)
132 |         caption = "{} {:.3f}".format(label, score) if score else label
133 |         ax.text(x1, y1 + 8, caption,
134 |                 color='w', size=11, backgroundcolor="none")
135 | 
136 |         # Mask
137 |         mask = masks[:, :, i]
138 |         masked_image = apply_mask(masked_image, mask, color)
139 | 
140 |         # Mask Polygon
141 |         # Pad to ensure proper polygons for masks that touch image edges.
142 |         padded_mask = np.zeros(
143 |             (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
144 |         padded_mask[1:-1, 1:-1] = mask
145 |         contours = find_contours(padded_mask, 0.5)
146 |         for verts in contours:
147 |             # Subtract the padding and flip (y, x) to (x, y)
148 |             verts = np.fliplr(verts) - 1
149 |             p = Polygon(verts, facecolor="none", edgecolor=color)
150 |             ax.add_patch(p)
151 |     ax.imshow(masked_image.astype(np.uint8))
152 |     #plt.show()
153 | 
154 | 
155 | def display_instances_cv(image, boxes, masks, class_ids, class_names,
156 |                          scores=None, class_colors=None, alpha=0.7):
157 |     """
158 |     boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
159 |     masks: [height, width, num_instances]
160 |     class_ids: [num_instances]
161 |     class_names: list of class names of the dataset
162 |     scores: (optional) confidence scores for each box
163 |     class_colors: a list mapping class ids to their colors
164 |     alpha: the amount of transparency of the mask overlay
165 |     """
166 |     # Number of instances
167 |     n = boxes.shape[0]
168 |     if n:
169 |         assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
170 | 
171 |     # Generate random colors
172 |     if class_colors is None:
173 |         colors = random_colors(n)
174 | 
175 |     for i in range(n):
176 |         class_id = class_ids[i]
177 |         if class_colors is None:
178 |             color = colors[i]
179 |         else:
180 |             color = class_colors[class_id]
181 | 
182 |         # Transform class colors to BGR and rescale [0-255] for OpenCv
183 |         bgr_color = tuple(c*255 for c in color[::-1])
184 | 
185 |         # Draw bounding boxes
186 |         if not np.any(boxes[i]):
187 |             # Skip this instance. Has no bbox. Likely lost in image cropping.
188 |             continue
189 |         y1, x1, y2, x2 = boxes[i]
190 |         cv2.rectangle(image, (x1, y1), (x2, y2), color=bgr_color, thickness=2)
191 | 
192 |         # Draw transparent mask
193 |         overlay = image.copy()
194 |         mask = masks[:, :, i]
195 |         __, thresh = cv2.threshold(mask, 0.5, 1, cv2.THRESH_BINARY)
196 |         _, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
197 |         cv2.drawContours(image, contours, -1, color=bgr_color, thickness=cv2.FILLED)
198 |         cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0, image)
199 | 
200 |         # Draw text label
201 |         score = scores[i] if scores is not None else None
202 |         label = class_names[class_id]
203 |         caption = "{} {:.3f}".format(label, score) if score else label
204 |         cv2.putText(image, caption, (x1, y1 + 12), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5,
205 |                     color=(255, 255, 255))
206 | 
207 |     return image
208 | 
209 | 
210 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10):
211 |     """
212 |     anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates.
213 |     proposals: [n, 4] the same anchors but refined to fit objects better.
214 |     """
215 |     masked_image = image.copy()
216 | 
217 |     # Pick random anchors in case there are too many.
218 |     ids = np.arange(rois.shape[0], dtype=np.int32)
219 |     ids = np.random.choice(
220 |         ids, limit, replace=False) if ids.shape[0] > limit else ids
221 | 
222 |     fig, ax = plt.subplots(1, figsize=(12, 12))
223 |     if rois.shape[0] > limit:
224 |         plt.title("Showing {} random ROIs out of {}".format(
225 |             len(ids), rois.shape[0]))
226 |     else:
227 |         plt.title("{} ROIs".format(len(ids)))
228 | 
229 |     # Show area outside image boundaries.
230 |     ax.set_ylim(image.shape[0] + 20, -20)
231 |     ax.set_xlim(-50, image.shape[1] + 20)
232 |     ax.axis('off')
233 | 
234 |     for i, id in enumerate(ids):
235 |         color = np.random.rand(3)
236 |         class_id = class_ids[id]
237 |         # ROI
238 |         y1, x1, y2, x2 = rois[id]
239 |         p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
240 |                               edgecolor=color if class_id else "gray",
241 |                               facecolor='none', linestyle="dashed")
242 |         ax.add_patch(p)
243 |         # Refined ROI
244 |         if class_id:
245 |             ry1, rx1, ry2, rx2 = refined_rois[id]
246 |             p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
247 |                                   edgecolor=color, facecolor='none')
248 |             ax.add_patch(p)
249 |             # Connect the top-left corners of the anchor and proposal for easy visualization
250 |             ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
251 | 
252 |             # Label
253 |             label = class_names[class_id]
254 |             ax.text(rx1, ry1 + 8, "{}".format(label),
255 |                     color='w', size=11, backgroundcolor="none")
256 | 
257 |             # Mask
258 |             m = utils.unmold_mask(mask[id], rois[id]
259 |                                   [:4].astype(np.int32), image.shape)
260 |             masked_image = apply_mask(masked_image, m, color)
261 | 
262 |     ax.imshow(masked_image)
263 | 
264 |     # Print stats
265 |     print("Positive ROIs: ", class_ids[class_ids > 0].shape[0])
266 |     print("Negative ROIs: ", class_ids[class_ids == 0].shape[0])
267 |     print("Positive Ratio: {:.2f}".format(
268 |         class_ids[class_ids > 0].shape[0] / class_ids.shape[0]))
269 | 
270 | 
271 | # TODO: Replace with matplotlib equivalent?
272 | def draw_box(image, box, color):
273 |     """Draw 3-pixel width bounding boxes on the given image array.
274 |     color: list of 3 int values for RGB.
275 |     """
276 |     y1, x1, y2, x2 = box
277 |     image[y1:y1 + 2, x1:x2] = color
278 |     image[y2:y2 + 2, x1:x2] = color
279 |     image[y1:y2, x1:x1 + 2] = color
280 |     image[y1:y2, x2:x2 + 2] = color
281 |     return image
282 | 
283 | 
284 | def display_top_masks(image, mask, class_ids, class_names, limit=4):
285 |     """Display the given image and the top few class masks."""
286 |     to_display = []
287 |     titles = []
288 |     to_display.append(image)
289 |     titles.append("H x W={}x{}".format(image.shape[0], image.shape[1]))
290 |     # Pick top prominent classes in this image
291 |     unique_class_ids = np.unique(class_ids)
292 |     mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]])
293 |                  for i in unique_class_ids]
294 |     top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area),
295 |                                     key=lambda r: r[1], reverse=True) if v[1] > 0]
296 |     # Generate images and titles
297 |     for i in range(limit):
298 |         class_id = top_ids[i] if i < len(top_ids) else -1
299 |         # Pull masks of instances belonging to the same class.
300 |         m = mask[:, :, np.where(class_ids == class_id)[0]]
301 |         m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1)
302 |         to_display.append(m)
303 |         titles.append(class_names[class_id] if class_id != -1 else "-")
304 |     display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r")
305 | 
306 | 
307 | def plot_precision_recall(AP, precisions, recalls):
308 |     """Draw the precision-recall curve.
309 | 
310 |     AP: Average precision at IoU >= 0.5
311 |     precisions: list of precision values
312 |     recalls: list of recall values
313 |     """
314 |     # Plot the Precision-Recall curve
315 |     _, ax = plt.subplots(1)
316 |     ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP))
317 |     ax.set_ylim(0, 1.1)
318 |     ax.set_xlim(0, 1.1)
319 |     _ = ax.plot(recalls, precisions)
320 | 
321 | 
322 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores,
323 |                   overlaps, class_names, threshold=0.5):
324 |     """Draw a grid showing how ground truth objects are classified.
325 |     gt_class_ids: [N] int. Ground truth class IDs
326 |     pred_class_id: [N] int. Predicted class IDs
327 |     pred_scores: [N] float. The probability scores of predicted classes
328 |     overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictins and GT boxes.
329 |     class_names: list of all class names in the dataset
330 |     threshold: Float. The prediction probability required to predict a class
331 |     """
332 |     gt_class_ids = gt_class_ids[gt_class_ids != 0]
333 |     pred_class_ids = pred_class_ids[pred_class_ids != 0]
334 | 
335 |     plt.figure(figsize=(12, 10))
336 |     plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues)
337 |     plt.yticks(np.arange(len(pred_class_ids)),
338 |                ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i])
339 |                 for i, id in enumerate(pred_class_ids)])
340 |     plt.xticks(np.arange(len(gt_class_ids)),
341 |                [class_names[int(id)] for id in gt_class_ids], rotation=90)
342 | 
343 |     thresh = overlaps.max() / 2.
344 |     for i, j in itertools.product(range(overlaps.shape[0]),
345 |                                   range(overlaps.shape[1])):
346 |         text = ""
347 |         if overlaps[i, j] > threshold:
348 |             text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong"
349 |         color = ("white" if overlaps[i, j] > thresh
350 |                  else "black" if overlaps[i, j] > 0
351 |                  else "grey")
352 |         plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text),
353 |                  horizontalalignment="center", verticalalignment="center",
354 |                  fontsize=9, color=color)
355 | 
356 |     plt.tight_layout()
357 |     plt.xlabel("Ground Truth")
358 |     plt.ylabel("Predictions")
359 | 
360 | 
361 | def draw_boxes(image, boxes=None, refined_boxes=None,
362 |                masks=None, captions=None, visibilities=None,
363 |                title="", ax=None):
364 |     """Draw bounding boxes and segmentation masks with differnt
365 |     customizations.
366 | 
367 |     boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates.
368 |     refined_boxes: Like boxes, but draw with solid lines to show
369 |         that they're the result of refining 'boxes'.
370 |     masks: [N, height, width]
371 |     captions: List of N titles to display on each box
372 |     visibilities: (optional) List of values of 0, 1, or 2. Determine how
373 |         prominant each bounding box should be.
374 |     title: An optional title to show over the image
375 |     ax: (optional) Matplotlib axis to draw on.
376 |     """
377 |     # Number of boxes
378 |     assert boxes is not None or refined_boxes is not None
379 |     N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0]
380 | 
381 |     # Matplotlib Axis
382 |     if not ax:
383 |         _, ax = plt.subplots(1, figsize=(12, 12))
384 | 
385 |     # Generate random colors
386 |     colors = random_colors(N)
387 | 
388 |     # Show area outside image boundaries.
389 |     margin = image.shape[0] // 10
390 |     ax.set_ylim(image.shape[0] + margin, -margin)
391 |     ax.set_xlim(-margin, image.shape[1] + margin)
392 |     ax.axis('off')
393 | 
394 |     ax.set_title(title)
395 | 
396 |     masked_image = image.astype(np.uint32).copy()
397 |     for i in range(N):
398 |         # Box visibility
399 |         visibility = visibilities[i] if visibilities is not None else 1
400 |         if visibility == 0:
401 |             color = "gray"
402 |             style = "dotted"
403 |             alpha = 0.5
404 |         elif visibility == 1:
405 |             color = colors[i]
406 |             style = "dotted"
407 |             alpha = 1
408 |         elif visibility == 2:
409 |             color = colors[i]
410 |             style = "solid"
411 |             alpha = 1
412 | 
413 |         # Boxes
414 |         if boxes is not None:
415 |             if not np.any(boxes[i]):
416 |                 # Skip this instance. Has no bbox. Likely lost in cropping.
417 |                 continue
418 |             y1, x1, y2, x2 = boxes[i]
419 |             p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
420 |                                   alpha=alpha, linestyle=style,
421 |                                   edgecolor=color, facecolor='none')
422 |             ax.add_patch(p)
423 | 
424 |         # Refined boxes
425 |         if refined_boxes is not None and visibility > 0:
426 |             ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32)
427 |             p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
428 |                                   edgecolor=color, facecolor='none')
429 |             ax.add_patch(p)
430 |             # Connect the top-left corners of the anchor and proposal
431 |             if boxes is not None:
432 |                 ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
433 | 
434 |         # Captions
435 |         if captions is not None:
436 |             caption = captions[i]
437 |             # If there are refined boxes, display captions on them
438 |             if refined_boxes is not None:
439 |                 y1, x1, y2, x2 = ry1, rx1, ry2, rx2
440 |             x = random.randint(x1, (x1 + x2) // 2)
441 |             ax.text(x1, y1, caption, size=11, verticalalignment='top',
442 |                     color='w', backgroundcolor="none",
443 |                     bbox={'facecolor': color, 'alpha': 0.5,
444 |                           'pad': 2, 'edgecolor': 'none'})
445 | 
446 |         # Masks
447 |         if masks is not None:
448 |             mask = masks[:, :, i]
449 |             masked_image = apply_mask(masked_image, mask, color)
450 |             # Mask Polygon
451 |             # Pad to ensure proper polygons for masks that touch image edges.
452 |             padded_mask = np.zeros(
453 |                 (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
454 |             padded_mask[1:-1, 1:-1] = mask
455 |             contours = find_contours(padded_mask, 0.5)
456 |             for verts in contours:
457 |                 # Subtract the padding and flip (y, x) to (x, y)
458 |                 verts = np.fliplr(verts) - 1
459 |                 p = Polygon(verts, facecolor="none", edgecolor=color)
460 |                 ax.add_patch(p)
461 |     ax.imshow(masked_image.astype(np.uint8))
462 | 
463 | 
464 | def display_table(table):
465 |     """Display values in a table format.
466 |     table: an iterable of rows, and each row is an iterable of values.
467 |     """
468 |     html = ""
469 |     for row in table:
470 |         row_html = ""
471 |         for col in row:
472 |             row_html += "<td>{:40}</td>".format(str(col))
473 |         html += "<tr>" + row_html + "</tr>"
474 |     html = "<table>" + html + "</table>"
475 |     IPython.display.display(IPython.display.HTML(html))
476 | 
477 | 
478 | def display_weight_stats(model):
479 |     """Scans all the weights in the model and returns a list of tuples
480 |     that contain stats about each weight.
481 |     """
482 |     layers = model.get_trainable_layers()
483 |     table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]]
484 |     for l in layers:
485 |         weight_values = l.get_weights()  # list of Numpy arrays
486 |         weight_tensors = l.weights  # list of TF tensors
487 |         for i, w in enumerate(weight_values):
488 |             weight_name = weight_tensors[i].name
489 |             # Detect problematic layers. Exclude biases of conv layers.
490 |             alert = ""
491 |             if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1):
492 |                 alert += "<span style='color:red'>*** dead?</span>"
493 |             if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000:
494 |                 alert += "<span style='color:red'>*** Overflow?</span>"
495 |             # Add row
496 |             table.append([
497 |                 weight_name + alert,
498 |                 str(w.shape),
499 |                 "{:+9.4f}".format(w.min()),
500 |                 "{:+10.4f}".format(w.max()),
501 |                 "{:+9.4f}".format(w.std()),
502 |             ])
503 |     display_table(table)
504 | 


--------------------------------------------------------------------------------
/nodes/coco.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Configurations and data loading code for MS COCO.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | 
  9 | ------------------------------------------------------------
 10 | 
 11 | Usage: import the module (see Jupyter notebooks for examples), or run from
 12 |        the command line as such:
 13 | 
 14 |     # Train a new model starting from pre-trained COCO weights
 15 |     python3 coco.py train --dataset=/path/to/coco/ --model=coco
 16 | 
 17 |     # Train a new model starting from ImageNet weights
 18 |     python3 coco.py train --dataset=/path/to/coco/ --model=imagenet
 19 | 
 20 |     # Continue training a model that you had trained earlier
 21 |     python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
 22 | 
 23 |     # Continue training the last model you trained
 24 |     python3 coco.py train --dataset=/path/to/coco/ --model=last
 25 | 
 26 |     # Run COCO evaluatoin on the last model you trained
 27 |     python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
 28 | """
 29 | 
 30 | import os
 31 | import time
 32 | import numpy as np
 33 | 
 34 | # Download and install the Python COCO tools from https://github.com/waleedka/coco
 35 | # That's a fork from the original https://github.com/pdollar/coco with a bug
 36 | # fix for Python 3.
 37 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
 38 | # If the PR is merged then use the original repo.
 39 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3".
 40 | # from pycocotools.coco import COCO
 41 | # from pycocotools.cocoeval import COCOeval
 42 | # from pycocotools import mask as maskUtils
 43 | 
 44 | import zipfile
 45 | from six.moves.urllib import request
 46 | import shutil
 47 | 
 48 | from config import Config
 49 | import utils
 50 | import model as modellib
 51 | 
 52 | # Root directory of the project
 53 | ROOT_DIR = os.path.dirname(__file__)
 54 | 
 55 | # Path to trained weights file
 56 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
 57 | 
 58 | # Directory to save logs and model checkpoints, if not provided
 59 | # through the command line argument --logs
 60 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
 61 | DEFAULT_DATASET_YEAR = "2014"
 62 | 
 63 | ############################################################
 64 | #  Configurations
 65 | ############################################################
 66 | 
 67 | 
 68 | class CocoConfig(Config):
 69 |     """Configuration for training on MS COCO.
 70 |     Derives from the base Config class and overrides values specific
 71 |     to the COCO dataset.
 72 |     """
 73 |     # Give the configuration a recognizable name
 74 |     NAME = "coco"
 75 | 
 76 |     # We use a GPU with 12GB memory, which can fit two images.
 77 |     # Adjust down if you use a smaller GPU.
 78 |     IMAGES_PER_GPU = 2
 79 | 
 80 |     # Uncomment to train on 8 GPUs (default is 1)
 81 |     # GPU_COUNT = 8
 82 | 
 83 |     # Number of classes (including background)
 84 |     NUM_CLASSES = 1 + 80  # COCO has 80 classes
 85 | 
 86 | 
 87 | ############################################################
 88 | #  Dataset
 89 | ############################################################
 90 | 
 91 | class CocoDataset(utils.Dataset):
 92 |     def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
 93 |                   class_map=None, return_coco=False, auto_download=False):
 94 |         """Load a subset of the COCO dataset.
 95 |         dataset_dir: The root directory of the COCO dataset.
 96 |         subset: What to load (train, val, minival, valminusminival)
 97 |         year: What dataset year to load (2014, 2017) as a string, not an integer
 98 |         class_ids: If provided, only loads images that have the given classes.
 99 |         class_map: TODO: Not implemented yet. Supports maping classes from
100 |             different datasets to the same class ID.
101 |         return_coco: If True, returns the COCO object.
102 |         auto_download: Automatically download and unzip MS-COCO images and annotations
103 |         """
104 | 
105 |         if auto_download is True:
106 |             self.auto_download(dataset_dir, subset, year)
107 | 
108 |         coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
109 |         if subset == "minival" or subset == "valminusminival":
110 |             subset = "val"
111 |         image_dir = "{}/{}{}".format(dataset_dir, subset, year)
112 | 
113 |         # Load all classes or a subset?
114 |         if not class_ids:
115 |             # All classes
116 |             class_ids = sorted(coco.getCatIds())
117 | 
118 |         # All images or a subset?
119 |         if class_ids:
120 |             image_ids = []
121 |             for id in class_ids:
122 |                 image_ids.extend(list(coco.getImgIds(catIds=[id])))
123 |             # Remove duplicates
124 |             image_ids = list(set(image_ids))
125 |         else:
126 |             # All images
127 |             image_ids = list(coco.imgs.keys())
128 | 
129 |         # Add classes
130 |         for i in class_ids:
131 |             self.add_class("coco", i, coco.loadCats(i)[0]["name"])
132 | 
133 |         # Add images
134 |         for i in image_ids:
135 |             self.add_image(
136 |                 "coco", image_id=i,
137 |                 path=os.path.join(image_dir, coco.imgs[i]['file_name']),
138 |                 width=coco.imgs[i]["width"],
139 |                 height=coco.imgs[i]["height"],
140 |                 annotations=coco.loadAnns(coco.getAnnIds(
141 |                     imgIds=[i], catIds=class_ids, iscrowd=None)))
142 |         if return_coco:
143 |             return coco
144 | 
145 |     def auto_download(self, dataDir, dataType, dataYear):
146 |         """Download the COCO dataset/annotations if requested.
147 |         dataDir: The root directory of the COCO dataset.
148 |         dataType: What to load (train, val, minival, valminusminival)
149 |         dataYear: What dataset year to load (2014, 2017) as a string, not an integer
150 |         Note:
151 |             For 2014, use "train", "val", "minival", or "valminusminival"
152 |             For 2017, only "train" and "val" annotations are available
153 |         """
154 | 
155 |         # Setup paths and file names
156 |         if dataType == "minival" or dataType == "valminusminival":
157 |             imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
158 |             imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
159 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
160 |         else:
161 |             imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
162 |             imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
163 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
164 |         # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
165 | 
166 |         # Create main folder if it doesn't exist yet
167 |         if not os.path.exists(dataDir):
168 |             os.makedirs(dataDir)
169 | 
170 |         # Download images if not available locally
171 |         if not os.path.exists(imgDir):
172 |             os.makedirs(imgDir)
173 |             print("Downloading images to " + imgZipFile + " ...")
174 |             with request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
175 |                 shutil.copyfileobj(resp, out)
176 |             print("... done downloading.")
177 |             print("Unzipping " + imgZipFile)
178 |             with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
179 |                 zip_ref.extractall(dataDir)
180 |             print("... done unzipping")
181 |         print("Will use images in " + imgDir)
182 | 
183 |         # Setup annotations data paths
184 |         annDir = "{}/annotations".format(dataDir)
185 |         if dataType == "minival":
186 |             annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
187 |             annFile = "{}/instances_minival2014.json".format(annDir)
188 |             annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
189 |             unZipDir = annDir
190 |         elif dataType == "valminusminival":
191 |             annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
192 |             annFile = "{}/instances_valminusminival2014.json".format(annDir)
193 |             annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
194 |             unZipDir = annDir
195 |         else:
196 |             annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
197 |             annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
198 |             annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
199 |             unZipDir = dataDir
200 |         # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
201 | 
202 |         # Download annotations if not available locally
203 |         if not os.path.exists(annDir):
204 |             os.makedirs(annDir)
205 |         if not os.path.exists(annFile):
206 |             if not os.path.exists(annZipFile):
207 |                 print("Downloading zipped annotations to " + annZipFile + " ...")
208 |                 with request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
209 |                     shutil.copyfileobj(resp, out)
210 |                 print("... done downloading.")
211 |             print("Unzipping " + annZipFile)
212 |             with zipfile.ZipFile(annZipFile, "r") as zip_ref:
213 |                 zip_ref.extractall(unZipDir)
214 |             print("... done unzipping")
215 |         print("Will use annotations in " + annFile)
216 | 
217 |     def load_mask(self, image_id):
218 |         """Load instance masks for the given image.
219 | 
220 |         Different datasets use different ways to store masks. This
221 |         function converts the different mask format to one format
222 |         in the form of a bitmap [height, width, instances].
223 | 
224 |         Returns:
225 |         masks: A bool array of shape [height, width, instance count] with
226 |             one mask per instance.
227 |         class_ids: a 1D array of class IDs of the instance masks.
228 |         """
229 |         # If not a COCO image, delegate to parent class.
230 |         image_info = self.image_info[image_id]
231 |         if image_info["source"] != "coco":
232 |             return super(CocoDataset, self).load_mask(image_id)
233 | 
234 |         instance_masks = []
235 |         class_ids = []
236 |         annotations = self.image_info[image_id]["annotations"]
237 |         # Build mask of shape [height, width, instance_count] and list
238 |         # of class IDs that correspond to each channel of the mask.
239 |         for annotation in annotations:
240 |             class_id = self.map_source_class_id(
241 |                 "coco.{}".format(annotation['category_id']))
242 |             if class_id:
243 |                 m = self.annToMask(annotation, image_info["height"],
244 |                                    image_info["width"])
245 |                 # Some objects are so small that they're less than 1 pixel area
246 |                 # and end up rounded out. Skip those objects.
247 |                 if m.max() < 1:
248 |                     continue
249 |                 # Is it a crowd? If so, use a negative class ID.
250 |                 if annotation['iscrowd']:
251 |                     # Use negative class ID for crowds
252 |                     class_id *= -1
253 |                     # For crowd masks, annToMask() sometimes returns a mask
254 |                     # smaller than the given dimensions. If so, resize it.
255 |                     if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
256 |                         m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
257 |                 instance_masks.append(m)
258 |                 class_ids.append(class_id)
259 | 
260 |         # Pack instance masks into an array
261 |         if class_ids:
262 |             mask = np.stack(instance_masks, axis=2)
263 |             class_ids = np.array(class_ids, dtype=np.int32)
264 |             return mask, class_ids
265 |         else:
266 |             # Call super class to return an empty mask
267 |             return super(CocoDataset, self).load_mask(image_id)
268 | 
269 |     def image_reference(self, image_id):
270 |         """Return a link to the image in the COCO Website."""
271 |         info = self.image_info[image_id]
272 |         if info["source"] == "coco":
273 |             return "http://cocodataset.org/#explore?id={}".format(info["id"])
274 |         else:
275 |             super(CocoDataset, self).image_reference(self, image_id)
276 | 
277 |     # The following two functions are from pycocotools with a few changes.
278 | 
279 |     def annToRLE(self, ann, height, width):
280 |         """
281 |         Convert annotation which can be polygons, uncompressed RLE to RLE.
282 |         :return: binary mask (numpy 2D array)
283 |         """
284 |         segm = ann['segmentation']
285 |         if isinstance(segm, list):
286 |             # polygon -- a single object might consist of multiple parts
287 |             # we merge all parts into one mask rle code
288 |             rles = maskUtils.frPyObjects(segm, height, width)
289 |             rle = maskUtils.merge(rles)
290 |         elif isinstance(segm['counts'], list):
291 |             # uncompressed RLE
292 |             rle = maskUtils.frPyObjects(segm, height, width)
293 |         else:
294 |             # rle
295 |             rle = ann['segmentation']
296 |         return rle
297 | 
298 |     def annToMask(self, ann, height, width):
299 |         """
300 |         Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
301 |         :return: binary mask (numpy 2D array)
302 |         """
303 |         rle = self.annToRLE(ann, height, width)
304 |         m = maskUtils.decode(rle)
305 |         return m
306 | 
307 | 
308 | ############################################################
309 | #  COCO Evaluation
310 | ############################################################
311 | 
312 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
313 |     """Arrange resutls to match COCO specs in http://cocodataset.org/#format
314 |     """
315 |     # If no results, return an empty list
316 |     if rois is None:
317 |         return []
318 | 
319 |     results = []
320 |     for image_id in image_ids:
321 |         # Loop through detections
322 |         for i in range(rois.shape[0]):
323 |             class_id = class_ids[i]
324 |             score = scores[i]
325 |             bbox = np.around(rois[i], 1)
326 |             mask = masks[:, :, i]
327 | 
328 |             result = {
329 |                 "image_id": image_id,
330 |                 "category_id": dataset.get_source_class_id(class_id, "coco"),
331 |                 "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
332 |                 "score": score,
333 |                 "segmentation": maskUtils.encode(np.asfortranarray(mask))
334 |             }
335 |             results.append(result)
336 |     return results
337 | 
338 | 
339 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
340 |     """Runs official COCO evaluation.
341 |     dataset: A Dataset object with valiadtion data
342 |     eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
343 |     limit: if not 0, it's the number of images to use for evaluation
344 |     """
345 |     # Pick COCO images from the dataset
346 |     image_ids = image_ids or dataset.image_ids
347 | 
348 |     # Limit to a subset
349 |     if limit:
350 |         image_ids = image_ids[:limit]
351 | 
352 |     # Get corresponding COCO image IDs.
353 |     coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
354 | 
355 |     t_prediction = 0
356 |     t_start = time.time()
357 | 
358 |     results = []
359 |     for i, image_id in enumerate(image_ids):
360 |         # Load image
361 |         image = dataset.load_image(image_id)
362 | 
363 |         # Run detection
364 |         t = time.time()
365 |         r = model.detect([image], verbose=0)[0]
366 |         t_prediction += (time.time() - t)
367 | 
368 |         # Convert results to COCO format
369 |         image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
370 |                                            r["rois"], r["class_ids"],
371 |                                            r["scores"], r["masks"])
372 |         results.extend(image_results)
373 | 
374 |     # Load results. This modifies results with additional attributes.
375 |     coco_results = coco.loadRes(results)
376 | 
377 |     # Evaluate
378 |     cocoEval = COCOeval(coco, coco_results, eval_type)
379 |     cocoEval.params.imgIds = coco_image_ids
380 |     cocoEval.evaluate()
381 |     cocoEval.accumulate()
382 |     cocoEval.summarize()
383 | 
384 |     print("Prediction time: {}. Average {}/image".format(
385 |         t_prediction, t_prediction / len(image_ids)))
386 |     print("Total time: ", time.time() - t_start)
387 | 
388 | 
389 | ############################################################
390 | #  Training
391 | ############################################################
392 | 
393 | 
394 | if __name__ == '__main__':
395 |     import argparse
396 | 
397 |     # Parse command line arguments
398 |     parser = argparse.ArgumentParser(
399 |         description='Train Mask R-CNN on MS COCO.')
400 |     parser.add_argument("command",
401 |                         metavar="<command>",
402 |                         help="'train' or 'evaluate' on MS COCO")
403 |     parser.add_argument('--dataset', required=True,
404 |                         metavar="/path/to/coco/",
405 |                         help='Directory of the MS-COCO dataset')
406 |     parser.add_argument('--year', required=False,
407 |                         default=DEFAULT_DATASET_YEAR,
408 |                         metavar="<year>",
409 |                         help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
410 |     parser.add_argument('--model', required=True,
411 |                         metavar="/path/to/weights.h5",
412 |                         help="Path to weights .h5 file or 'coco'")
413 |     parser.add_argument('--logs', required=False,
414 |                         default=DEFAULT_LOGS_DIR,
415 |                         metavar="/path/to/logs/",
416 |                         help='Logs and checkpoints directory (default=logs/)')
417 |     parser.add_argument('--limit', required=False,
418 |                         default=500,
419 |                         metavar="<image count>",
420 |                         help='Images to use for evaluation (default=500)')
421 |     parser.add_argument('--download', required=False,
422 |                         default=False,
423 |                         metavar="<True|False>",
424 |                         help='Automatically download and unzip MS-COCO files (default=False)',
425 |                         type=bool)
426 |     args = parser.parse_args()
427 |     print("Command: ", args.command)
428 |     print("Model: ", args.model)
429 |     print("Dataset: ", args.dataset)
430 |     print("Year: ", args.year)
431 |     print("Logs: ", args.logs)
432 |     print("Auto Download: ", args.download)
433 | 
434 |     # Configurations
435 |     if args.command == "train":
436 |         config = CocoConfig()
437 |     else:
438 |         class InferenceConfig(CocoConfig):
439 |             # Set batch size to 1 since we'll be running inference on
440 |             # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
441 |             GPU_COUNT = 1
442 |             IMAGES_PER_GPU = 1
443 |             DETECTION_MIN_CONFIDENCE = 0
444 |         config = InferenceConfig()
445 |     config.display()
446 | 
447 |     # Create model
448 |     if args.command == "train":
449 |         model = modellib.MaskRCNN(mode="training", config=config,
450 |                                   model_dir=args.logs)
451 |     else:
452 |         model = modellib.MaskRCNN(mode="inference", config=config,
453 |                                   model_dir=args.logs)
454 | 
455 |     # Select weights file to load
456 |     if args.model.lower() == "coco":
457 |         model_path = COCO_MODEL_PATH
458 |     elif args.model.lower() == "last":
459 |         # Find last trained weights
460 |         model_path = model.find_last()[1]
461 |     elif args.model.lower() == "imagenet":
462 |         # Start from ImageNet trained weights
463 |         model_path = model.get_imagenet_weights()
464 |     else:
465 |         model_path = args.model
466 | 
467 |     # Load weights
468 |     print("Loading weights ", model_path)
469 |     model.load_weights(model_path, by_name=True)
470 | 
471 |     # Train or evaluate
472 |     if args.command == "train":
473 |         # Training dataset. Use the training set and 35K from the
474 |         # validation set, as as in the Mask RCNN paper.
475 |         dataset_train = CocoDataset()
476 |         dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
477 |         dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
478 |         dataset_train.prepare()
479 | 
480 |         # Validation dataset
481 |         dataset_val = CocoDataset()
482 |         dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download)
483 |         dataset_val.prepare()
484 | 
485 |         # *** This training schedule is an example. Update to your needs ***
486 | 
487 |         # Training - Stage 1
488 |         print("Training network heads")
489 |         model.train(dataset_train, dataset_val,
490 |                     learning_rate=config.LEARNING_RATE,
491 |                     epochs=40,
492 |                     layers='heads')
493 | 
494 |         # Training - Stage 2
495 |         # Finetune layers from ResNet stage 4 and up
496 |         print("Fine tune Resnet stage 4 and up")
497 |         model.train(dataset_train, dataset_val,
498 |                     learning_rate=config.LEARNING_RATE,
499 |                     epochs=120,
500 |                     layers='4+')
501 | 
502 |         # Training - Stage 3
503 |         # Fine tune all layers
504 |         print("Fine tune all layers")
505 |         model.train(dataset_train, dataset_val,
506 |                     learning_rate=config.LEARNING_RATE / 10,
507 |                     epochs=160,
508 |                     layers='all')
509 | 
510 |     elif args.command == "evaluate":
511 |         # Validation dataset
512 |         dataset_val = CocoDataset()
513 |         coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download)
514 |         dataset_val.prepare()
515 |         print("Running COCO evaluation on {} images.".format(args.limit))
516 |         evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
517 |     else:
518 |         print("'{}' is not recognized. "
519 |               "Use 'train' or 'evaluate'".format(args.command))
520 | 


--------------------------------------------------------------------------------
/src/mask_rcnn_ros/coco.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Configurations and data loading code for MS COCO.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | 
  9 | ------------------------------------------------------------
 10 | 
 11 | Usage: import the module (see Jupyter notebooks for examples), or run from
 12 |        the command line as such:
 13 | 
 14 |     # Train a new model starting from pre-trained COCO weights
 15 |     python3 coco.py train --dataset=/path/to/coco/ --model=coco
 16 | 
 17 |     # Train a new model starting from ImageNet weights
 18 |     python3 coco.py train --dataset=/path/to/coco/ --model=imagenet
 19 | 
 20 |     # Continue training a model that you had trained earlier
 21 |     python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
 22 | 
 23 |     # Continue training the last model you trained
 24 |     python3 coco.py train --dataset=/path/to/coco/ --model=last
 25 | 
 26 |     # Run COCO evaluatoin on the last model you trained
 27 |     python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
 28 | """
 29 | 
 30 | import os
 31 | import time
 32 | import numpy as np
 33 | 
 34 | # Download and install the Python COCO tools from https://github.com/waleedka/coco
 35 | # That's a fork from the original https://github.com/pdollar/coco with a bug
 36 | # fix for Python 3.
 37 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
 38 | # If the PR is merged then use the original repo.
 39 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3".
 40 | # from pycocotools.coco import COCO
 41 | # from pycocotools.cocoeval import COCOeval
 42 | # from pycocotools import mask as maskUtils
 43 | 
 44 | import zipfile
 45 | from six.moves.urllib import request
 46 | import shutil
 47 | 
 48 | from config import Config
 49 | import utils
 50 | import model as modellib
 51 | 
 52 | # Root directory of the project
 53 | ROOT_DIR = os.path.dirname(__file__)
 54 | 
 55 | # Path to trained weights file
 56 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
 57 | 
 58 | # Directory to save logs and model checkpoints, if not provided
 59 | # through the command line argument --logs
 60 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
 61 | DEFAULT_DATASET_YEAR = "2014"
 62 | 
 63 | ############################################################
 64 | #  Configurations
 65 | ############################################################
 66 | 
 67 | 
 68 | class CocoConfig(Config):
 69 |     """Configuration for training on MS COCO.
 70 |     Derives from the base Config class and overrides values specific
 71 |     to the COCO dataset.
 72 |     """
 73 |     # Give the configuration a recognizable name
 74 |     NAME = "coco"
 75 | 
 76 |     # We use a GPU with 12GB memory, which can fit two images.
 77 |     # Adjust down if you use a smaller GPU.
 78 |     IMAGES_PER_GPU = 2
 79 | 
 80 |     # Uncomment to train on 8 GPUs (default is 1)
 81 |     # GPU_COUNT = 8
 82 | 
 83 |     # Number of classes (including background)
 84 |     NUM_CLASSES = 1 + 80  # COCO has 80 classes
 85 | 
 86 | 
 87 | ############################################################
 88 | #  Dataset
 89 | ############################################################
 90 | 
 91 | class CocoDataset(utils.Dataset):
 92 |     def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
 93 |                   class_map=None, return_coco=False, auto_download=False):
 94 |         """Load a subset of the COCO dataset.
 95 |         dataset_dir: The root directory of the COCO dataset.
 96 |         subset: What to load (train, val, minival, valminusminival)
 97 |         year: What dataset year to load (2014, 2017) as a string, not an integer
 98 |         class_ids: If provided, only loads images that have the given classes.
 99 |         class_map: TODO: Not implemented yet. Supports maping classes from
100 |             different datasets to the same class ID.
101 |         return_coco: If True, returns the COCO object.
102 |         auto_download: Automatically download and unzip MS-COCO images and annotations
103 |         """
104 | 
105 |         if auto_download is True:
106 |             self.auto_download(dataset_dir, subset, year)
107 | 
108 |         coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
109 |         if subset == "minival" or subset == "valminusminival":
110 |             subset = "val"
111 |         image_dir = "{}/{}{}".format(dataset_dir, subset, year)
112 | 
113 |         # Load all classes or a subset?
114 |         if not class_ids:
115 |             # All classes
116 |             class_ids = sorted(coco.getCatIds())
117 | 
118 |         # All images or a subset?
119 |         if class_ids:
120 |             image_ids = []
121 |             for id in class_ids:
122 |                 image_ids.extend(list(coco.getImgIds(catIds=[id])))
123 |             # Remove duplicates
124 |             image_ids = list(set(image_ids))
125 |         else:
126 |             # All images
127 |             image_ids = list(coco.imgs.keys())
128 | 
129 |         # Add classes
130 |         for i in class_ids:
131 |             self.add_class("coco", i, coco.loadCats(i)[0]["name"])
132 | 
133 |         # Add images
134 |         for i in image_ids:
135 |             self.add_image(
136 |                 "coco", image_id=i,
137 |                 path=os.path.join(image_dir, coco.imgs[i]['file_name']),
138 |                 width=coco.imgs[i]["width"],
139 |                 height=coco.imgs[i]["height"],
140 |                 annotations=coco.loadAnns(coco.getAnnIds(
141 |                     imgIds=[i], catIds=class_ids, iscrowd=None)))
142 |         if return_coco:
143 |             return coco
144 | 
145 |     def auto_download(self, dataDir, dataType, dataYear):
146 |         """Download the COCO dataset/annotations if requested.
147 |         dataDir: The root directory of the COCO dataset.
148 |         dataType: What to load (train, val, minival, valminusminival)
149 |         dataYear: What dataset year to load (2014, 2017) as a string, not an integer
150 |         Note:
151 |             For 2014, use "train", "val", "minival", or "valminusminival"
152 |             For 2017, only "train" and "val" annotations are available
153 |         """
154 | 
155 |         # Setup paths and file names
156 |         if dataType == "minival" or dataType == "valminusminival":
157 |             imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
158 |             imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
159 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
160 |         else:
161 |             imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
162 |             imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
163 |             imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
164 |         # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
165 | 
166 |         # Create main folder if it doesn't exist yet
167 |         if not os.path.exists(dataDir):
168 |             os.makedirs(dataDir)
169 | 
170 |         # Download images if not available locally
171 |         if not os.path.exists(imgDir):
172 |             os.makedirs(imgDir)
173 |             print("Downloading images to " + imgZipFile + " ...")
174 |             with request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
175 |                 shutil.copyfileobj(resp, out)
176 |             print("... done downloading.")
177 |             print("Unzipping " + imgZipFile)
178 |             with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
179 |                 zip_ref.extractall(dataDir)
180 |             print("... done unzipping")
181 |         print("Will use images in " + imgDir)
182 | 
183 |         # Setup annotations data paths
184 |         annDir = "{}/annotations".format(dataDir)
185 |         if dataType == "minival":
186 |             annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
187 |             annFile = "{}/instances_minival2014.json".format(annDir)
188 |             annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
189 |             unZipDir = annDir
190 |         elif dataType == "valminusminival":
191 |             annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
192 |             annFile = "{}/instances_valminusminival2014.json".format(annDir)
193 |             annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
194 |             unZipDir = annDir
195 |         else:
196 |             annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
197 |             annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
198 |             annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
199 |             unZipDir = dataDir
200 |         # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
201 | 
202 |         # Download annotations if not available locally
203 |         if not os.path.exists(annDir):
204 |             os.makedirs(annDir)
205 |         if not os.path.exists(annFile):
206 |             if not os.path.exists(annZipFile):
207 |                 print("Downloading zipped annotations to " + annZipFile + " ...")
208 |                 with request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
209 |                     shutil.copyfileobj(resp, out)
210 |                 print("... done downloading.")
211 |             print("Unzipping " + annZipFile)
212 |             with zipfile.ZipFile(annZipFile, "r") as zip_ref:
213 |                 zip_ref.extractall(unZipDir)
214 |             print("... done unzipping")
215 |         print("Will use annotations in " + annFile)
216 | 
217 |     def load_mask(self, image_id):
218 |         """Load instance masks for the given image.
219 | 
220 |         Different datasets use different ways to store masks. This
221 |         function converts the different mask format to one format
222 |         in the form of a bitmap [height, width, instances].
223 | 
224 |         Returns:
225 |         masks: A bool array of shape [height, width, instance count] with
226 |             one mask per instance.
227 |         class_ids: a 1D array of class IDs of the instance masks.
228 |         """
229 |         # If not a COCO image, delegate to parent class.
230 |         image_info = self.image_info[image_id]
231 |         if image_info["source"] != "coco":
232 |             return super(CocoDataset, self).load_mask(image_id)
233 | 
234 |         instance_masks = []
235 |         class_ids = []
236 |         annotations = self.image_info[image_id]["annotations"]
237 |         # Build mask of shape [height, width, instance_count] and list
238 |         # of class IDs that correspond to each channel of the mask.
239 |         for annotation in annotations:
240 |             class_id = self.map_source_class_id(
241 |                 "coco.{}".format(annotation['category_id']))
242 |             if class_id:
243 |                 m = self.annToMask(annotation, image_info["height"],
244 |                                    image_info["width"])
245 |                 # Some objects are so small that they're less than 1 pixel area
246 |                 # and end up rounded out. Skip those objects.
247 |                 if m.max() < 1:
248 |                     continue
249 |                 # Is it a crowd? If so, use a negative class ID.
250 |                 if annotation['iscrowd']:
251 |                     # Use negative class ID for crowds
252 |                     class_id *= -1
253 |                     # For crowd masks, annToMask() sometimes returns a mask
254 |                     # smaller than the given dimensions. If so, resize it.
255 |                     if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
256 |                         m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
257 |                 instance_masks.append(m)
258 |                 class_ids.append(class_id)
259 | 
260 |         # Pack instance masks into an array
261 |         if class_ids:
262 |             mask = np.stack(instance_masks, axis=2)
263 |             class_ids = np.array(class_ids, dtype=np.int32)
264 |             return mask, class_ids
265 |         else:
266 |             # Call super class to return an empty mask
267 |             return super(CocoDataset, self).load_mask(image_id)
268 | 
269 |     def image_reference(self, image_id):
270 |         """Return a link to the image in the COCO Website."""
271 |         info = self.image_info[image_id]
272 |         if info["source"] == "coco":
273 |             return "http://cocodataset.org/#explore?id={}".format(info["id"])
274 |         else:
275 |             super(CocoDataset, self).image_reference(self, image_id)
276 | 
277 |     # The following two functions are from pycocotools with a few changes.
278 | 
279 |     def annToRLE(self, ann, height, width):
280 |         """
281 |         Convert annotation which can be polygons, uncompressed RLE to RLE.
282 |         :return: binary mask (numpy 2D array)
283 |         """
284 |         segm = ann['segmentation']
285 |         if isinstance(segm, list):
286 |             # polygon -- a single object might consist of multiple parts
287 |             # we merge all parts into one mask rle code
288 |             rles = maskUtils.frPyObjects(segm, height, width)
289 |             rle = maskUtils.merge(rles)
290 |         elif isinstance(segm['counts'], list):
291 |             # uncompressed RLE
292 |             rle = maskUtils.frPyObjects(segm, height, width)
293 |         else:
294 |             # rle
295 |             rle = ann['segmentation']
296 |         return rle
297 | 
298 |     def annToMask(self, ann, height, width):
299 |         """
300 |         Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
301 |         :return: binary mask (numpy 2D array)
302 |         """
303 |         rle = self.annToRLE(ann, height, width)
304 |         m = maskUtils.decode(rle)
305 |         return m
306 | 
307 | 
308 | ############################################################
309 | #  COCO Evaluation
310 | ############################################################
311 | 
312 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
313 |     """Arrange resutls to match COCO specs in http://cocodataset.org/#format
314 |     """
315 |     # If no results, return an empty list
316 |     if rois is None:
317 |         return []
318 | 
319 |     results = []
320 |     for image_id in image_ids:
321 |         # Loop through detections
322 |         for i in range(rois.shape[0]):
323 |             class_id = class_ids[i]
324 |             score = scores[i]
325 |             bbox = np.around(rois[i], 1)
326 |             mask = masks[:, :, i]
327 | 
328 |             result = {
329 |                 "image_id": image_id,
330 |                 "category_id": dataset.get_source_class_id(class_id, "coco"),
331 |                 "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
332 |                 "score": score,
333 |                 "segmentation": maskUtils.encode(np.asfortranarray(mask))
334 |             }
335 |             results.append(result)
336 |     return results
337 | 
338 | 
339 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
340 |     """Runs official COCO evaluation.
341 |     dataset: A Dataset object with valiadtion data
342 |     eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
343 |     limit: if not 0, it's the number of images to use for evaluation
344 |     """
345 |     # Pick COCO images from the dataset
346 |     image_ids = image_ids or dataset.image_ids
347 | 
348 |     # Limit to a subset
349 |     if limit:
350 |         image_ids = image_ids[:limit]
351 | 
352 |     # Get corresponding COCO image IDs.
353 |     coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
354 | 
355 |     t_prediction = 0
356 |     t_start = time.time()
357 | 
358 |     results = []
359 |     for i, image_id in enumerate(image_ids):
360 |         # Load image
361 |         image = dataset.load_image(image_id)
362 | 
363 |         # Run detection
364 |         t = time.time()
365 |         r = model.detect([image], verbose=0)[0]
366 |         t_prediction += (time.time() - t)
367 | 
368 |         # Convert results to COCO format
369 |         image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
370 |                                            r["rois"], r["class_ids"],
371 |                                            r["scores"], r["masks"])
372 |         results.extend(image_results)
373 | 
374 |     # Load results. This modifies results with additional attributes.
375 |     coco_results = coco.loadRes(results)
376 | 
377 |     # Evaluate
378 |     cocoEval = COCOeval(coco, coco_results, eval_type)
379 |     cocoEval.params.imgIds = coco_image_ids
380 |     cocoEval.evaluate()
381 |     cocoEval.accumulate()
382 |     cocoEval.summarize()
383 | 
384 |     print("Prediction time: {}. Average {}/image".format(
385 |         t_prediction, t_prediction / len(image_ids)))
386 |     print("Total time: ", time.time() - t_start)
387 | 
388 | 
389 | ############################################################
390 | #  Training
391 | ############################################################
392 | 
393 | 
394 | if __name__ == '__main__':
395 |     import argparse
396 | 
397 |     # Parse command line arguments
398 |     parser = argparse.ArgumentParser(
399 |         description='Train Mask R-CNN on MS COCO.')
400 |     parser.add_argument("command",
401 |                         metavar="<command>",
402 |                         help="'train' or 'evaluate' on MS COCO")
403 |     parser.add_argument('--dataset', required=True,
404 |                         metavar="/path/to/coco/",
405 |                         help='Directory of the MS-COCO dataset')
406 |     parser.add_argument('--year', required=False,
407 |                         default=DEFAULT_DATASET_YEAR,
408 |                         metavar="<year>",
409 |                         help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
410 |     parser.add_argument('--model', required=True,
411 |                         metavar="/path/to/weights.h5",
412 |                         help="Path to weights .h5 file or 'coco'")
413 |     parser.add_argument('--logs', required=False,
414 |                         default=DEFAULT_LOGS_DIR,
415 |                         metavar="/path/to/logs/",
416 |                         help='Logs and checkpoints directory (default=logs/)')
417 |     parser.add_argument('--limit', required=False,
418 |                         default=500,
419 |                         metavar="<image count>",
420 |                         help='Images to use for evaluation (default=500)')
421 |     parser.add_argument('--download', required=False,
422 |                         default=False,
423 |                         metavar="<True|False>",
424 |                         help='Automatically download and unzip MS-COCO files (default=False)',
425 |                         type=bool)
426 |     args = parser.parse_args()
427 |     print("Command: ", args.command)
428 |     print("Model: ", args.model)
429 |     print("Dataset: ", args.dataset)
430 |     print("Year: ", args.year)
431 |     print("Logs: ", args.logs)
432 |     print("Auto Download: ", args.download)
433 | 
434 |     # Configurations
435 |     if args.command == "train":
436 |         config = CocoConfig()
437 |     else:
438 |         class InferenceConfig(CocoConfig):
439 |             # Set batch size to 1 since we'll be running inference on
440 |             # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
441 |             GPU_COUNT = 1
442 |             IMAGES_PER_GPU = 1
443 |             DETECTION_MIN_CONFIDENCE = 0
444 |         config = InferenceConfig()
445 |     config.display()
446 | 
447 |     # Create model
448 |     if args.command == "train":
449 |         model = modellib.MaskRCNN(mode="training", config=config,
450 |                                   model_dir=args.logs)
451 |     else:
452 |         model = modellib.MaskRCNN(mode="inference", config=config,
453 |                                   model_dir=args.logs)
454 | 
455 |     # Select weights file to load
456 |     if args.model.lower() == "coco":
457 |         model_path = COCO_MODEL_PATH
458 |     elif args.model.lower() == "last":
459 |         # Find last trained weights
460 |         model_path = model.find_last()[1]
461 |     elif args.model.lower() == "imagenet":
462 |         # Start from ImageNet trained weights
463 |         model_path = model.get_imagenet_weights()
464 |     else:
465 |         model_path = args.model
466 | 
467 |     # Load weights
468 |     print("Loading weights ", model_path)
469 |     model.load_weights(model_path, by_name=True)
470 | 
471 |     # Train or evaluate
472 |     if args.command == "train":
473 |         # Training dataset. Use the training set and 35K from the
474 |         # validation set, as as in the Mask RCNN paper.
475 |         dataset_train = CocoDataset()
476 |         dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
477 |         dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
478 |         dataset_train.prepare()
479 | 
480 |         # Validation dataset
481 |         dataset_val = CocoDataset()
482 |         dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download)
483 |         dataset_val.prepare()
484 | 
485 |         # *** This training schedule is an example. Update to your needs ***
486 | 
487 |         # Training - Stage 1
488 |         print("Training network heads")
489 |         model.train(dataset_train, dataset_val,
490 |                     learning_rate=config.LEARNING_RATE,
491 |                     epochs=40,
492 |                     layers='heads')
493 | 
494 |         # Training - Stage 2
495 |         # Finetune layers from ResNet stage 4 and up
496 |         print("Fine tune Resnet stage 4 and up")
497 |         model.train(dataset_train, dataset_val,
498 |                     learning_rate=config.LEARNING_RATE,
499 |                     epochs=120,
500 |                     layers='4+')
501 | 
502 |         # Training - Stage 3
503 |         # Fine tune all layers
504 |         print("Fine tune all layers")
505 |         model.train(dataset_train, dataset_val,
506 |                     learning_rate=config.LEARNING_RATE / 10,
507 |                     epochs=160,
508 |                     layers='all')
509 | 
510 |     elif args.command == "evaluate":
511 |         # Validation dataset
512 |         dataset_val = CocoDataset()
513 |         coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download)
514 |         dataset_val.prepare()
515 |         print("Running COCO evaluation on {} images.".format(args.limit))
516 |         evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
517 |     else:
518 |         print("'{}' is not recognized. "
519 |               "Use 'train' or 'evaluate'".format(args.command))
520 | 


--------------------------------------------------------------------------------
/nodes/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Common utility functions and classes.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import sys
 11 | import os
 12 | import math
 13 | import random
 14 | import numpy as np
 15 | import tensorflow as tf
 16 | import scipy.misc
 17 | import skimage.color
 18 | import skimage.io
 19 | from six.moves.urllib import request
 20 | import shutil
 21 | import contextlib
 22 | 
 23 | # URL from which to download the latest COCO trained weights
 24 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
 25 | 
 26 | 
 27 | ############################################################
 28 | #  Bounding Boxes
 29 | ############################################################
 30 | 
 31 | def extract_bboxes(mask):
 32 |     """Compute bounding boxes from masks.
 33 |     mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
 34 | 
 35 |     Returns: bbox array [num_instances, (y1, x1, y2, x2)].
 36 |     """
 37 |     boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
 38 |     for i in range(mask.shape[-1]):
 39 |         m = mask[:, :, i]
 40 |         # Bounding box.
 41 |         horizontal_indicies = np.where(np.any(m, axis=0))[0]
 42 |         vertical_indicies = np.where(np.any(m, axis=1))[0]
 43 |         if horizontal_indicies.shape[0]:
 44 |             x1, x2 = horizontal_indicies[[0, -1]]
 45 |             y1, y2 = vertical_indicies[[0, -1]]
 46 |             # x2 and y2 should not be part of the box. Increment by 1.
 47 |             x2 += 1
 48 |             y2 += 1
 49 |         else:
 50 |             # No mask for this instance. Might happen due to
 51 |             # resizing or cropping. Set bbox to zeros
 52 |             x1, x2, y1, y2 = 0, 0, 0, 0
 53 |         boxes[i] = np.array([y1, x1, y2, x2])
 54 |     return boxes.astype(np.int32)
 55 | 
 56 | 
 57 | def compute_iou(box, boxes, box_area, boxes_area):
 58 |     """Calculates IoU of the given box with the array of the given boxes.
 59 |     box: 1D vector [y1, x1, y2, x2]
 60 |     boxes: [boxes_count, (y1, x1, y2, x2)]
 61 |     box_area: float. the area of 'box'
 62 |     boxes_area: array of length boxes_count.
 63 | 
 64 |     Note: the areas are passed in rather than calculated here for
 65 |           efficency. Calculate once in the caller to avoid duplicate work.
 66 |     """
 67 |     # Calculate intersection areas
 68 |     y1 = np.maximum(box[0], boxes[:, 0])
 69 |     y2 = np.minimum(box[2], boxes[:, 2])
 70 |     x1 = np.maximum(box[1], boxes[:, 1])
 71 |     x2 = np.minimum(box[3], boxes[:, 3])
 72 |     intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
 73 |     union = box_area + boxes_area[:] - intersection[:]
 74 |     iou = intersection / union
 75 |     return iou
 76 | 
 77 | 
 78 | def compute_overlaps(boxes1, boxes2):
 79 |     """Computes IoU overlaps between two sets of boxes.
 80 |     boxes1, boxes2: [N, (y1, x1, y2, x2)].
 81 | 
 82 |     For better performance, pass the largest set first and the smaller second.
 83 |     """
 84 |     # Areas of anchors and GT boxes
 85 |     area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
 86 |     area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
 87 | 
 88 |     # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
 89 |     # Each cell contains the IoU value.
 90 |     overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
 91 |     for i in range(overlaps.shape[1]):
 92 |         box2 = boxes2[i]
 93 |         overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
 94 |     return overlaps
 95 | 
 96 | 
 97 | def non_max_suppression(boxes, scores, threshold):
 98 |     """Performs non-maximum supression and returns indicies of kept boxes.
 99 |     boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
100 |     scores: 1-D array of box scores.
101 |     threshold: Float. IoU threshold to use for filtering.
102 |     """
103 |     assert boxes.shape[0] > 0
104 |     if boxes.dtype.kind != "f":
105 |         boxes = boxes.astype(np.float32)
106 | 
107 |     # Compute box areas
108 |     y1 = boxes[:, 0]
109 |     x1 = boxes[:, 1]
110 |     y2 = boxes[:, 2]
111 |     x2 = boxes[:, 3]
112 |     area = (y2 - y1) * (x2 - x1)
113 | 
114 |     # Get indicies of boxes sorted by scores (highest first)
115 |     ixs = scores.argsort()[::-1]
116 | 
117 |     pick = []
118 |     while len(ixs) > 0:
119 |         # Pick top box and add its index to the list
120 |         i = ixs[0]
121 |         pick.append(i)
122 |         # Compute IoU of the picked box with the rest
123 |         iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
124 |         # Identify boxes with IoU over the threshold. This
125 |         # returns indicies into ixs[1:], so add 1 to get
126 |         # indicies into ixs.
127 |         remove_ixs = np.where(iou > threshold)[0] + 1
128 |         # Remove indicies of the picked and overlapped boxes.
129 |         ixs = np.delete(ixs, remove_ixs)
130 |         ixs = np.delete(ixs, 0)
131 |     return np.array(pick, dtype=np.int32)
132 | 
133 | 
134 | def apply_box_deltas(boxes, deltas):
135 |     """Applies the given deltas to the given boxes.
136 |     boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
137 |     deltas: [N, (dy, dx, log(dh), log(dw))]
138 |     """
139 |     boxes = boxes.astype(np.float32)
140 |     # Convert to y, x, h, w
141 |     height = boxes[:, 2] - boxes[:, 0]
142 |     width = boxes[:, 3] - boxes[:, 1]
143 |     center_y = boxes[:, 0] + 0.5 * height
144 |     center_x = boxes[:, 1] + 0.5 * width
145 |     # Apply deltas
146 |     center_y += deltas[:, 0] * height
147 |     center_x += deltas[:, 1] * width
148 |     height *= np.exp(deltas[:, 2])
149 |     width *= np.exp(deltas[:, 3])
150 |     # Convert back to y1, x1, y2, x2
151 |     y1 = center_y - 0.5 * height
152 |     x1 = center_x - 0.5 * width
153 |     y2 = y1 + height
154 |     x2 = x1 + width
155 |     return np.stack([y1, x1, y2, x2], axis=1)
156 | 
157 | 
158 | def box_refinement_graph(box, gt_box):
159 |     """Compute refinement needed to transform box to gt_box.
160 |     box and gt_box are [N, (y1, x1, y2, x2)]
161 |     """
162 |     box = tf.cast(box, tf.float32)
163 |     gt_box = tf.cast(gt_box, tf.float32)
164 | 
165 |     height = box[:, 2] - box[:, 0]
166 |     width = box[:, 3] - box[:, 1]
167 |     center_y = box[:, 0] + 0.5 * height
168 |     center_x = box[:, 1] + 0.5 * width
169 | 
170 |     gt_height = gt_box[:, 2] - gt_box[:, 0]
171 |     gt_width = gt_box[:, 3] - gt_box[:, 1]
172 |     gt_center_y = gt_box[:, 0] + 0.5 * gt_height
173 |     gt_center_x = gt_box[:, 1] + 0.5 * gt_width
174 | 
175 |     dy = (gt_center_y - center_y) / height
176 |     dx = (gt_center_x - center_x) / width
177 |     dh = tf.log(gt_height / height)
178 |     dw = tf.log(gt_width / width)
179 | 
180 |     result = tf.stack([dy, dx, dh, dw], axis=1)
181 |     return result
182 | 
183 | 
184 | def box_refinement(box, gt_box):
185 |     """Compute refinement needed to transform box to gt_box.
186 |     box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
187 |     assumed to be outside the box.
188 |     """
189 |     box = box.astype(np.float32)
190 |     gt_box = gt_box.astype(np.float32)
191 | 
192 |     height = box[:, 2] - box[:, 0]
193 |     width = box[:, 3] - box[:, 1]
194 |     center_y = box[:, 0] + 0.5 * height
195 |     center_x = box[:, 1] + 0.5 * width
196 | 
197 |     gt_height = gt_box[:, 2] - gt_box[:, 0]
198 |     gt_width = gt_box[:, 3] - gt_box[:, 1]
199 |     gt_center_y = gt_box[:, 0] + 0.5 * gt_height
200 |     gt_center_x = gt_box[:, 1] + 0.5 * gt_width
201 | 
202 |     dy = (gt_center_y - center_y) / height
203 |     dx = (gt_center_x - center_x) / width
204 |     dh = np.log(gt_height / height)
205 |     dw = np.log(gt_width / width)
206 | 
207 |     return np.stack([dy, dx, dh, dw], axis=1)
208 | 
209 | 
210 | ############################################################
211 | #  Dataset
212 | ############################################################
213 | 
214 | class Dataset(object):
215 |     """The base class for dataset classes.
216 |     To use it, create a new class that adds functions specific to the dataset
217 |     you want to use. For example:
218 | 
219 |     class CatsAndDogsDataset(Dataset):
220 |         def load_cats_and_dogs(self):
221 |             ...
222 |         def load_mask(self, image_id):
223 |             ...
224 |         def image_reference(self, image_id):
225 |             ...
226 | 
227 |     See COCODataset and ShapesDataset as examples.
228 |     """
229 | 
230 |     def __init__(self, class_map=None):
231 |         self._image_ids = []
232 |         self.image_info = []
233 |         # Background is always the first class
234 |         self.class_info = [{"source": "", "id": 0, "name": "BG"}]
235 |         self.source_class_ids = {}
236 | 
237 |     def add_class(self, source, class_id, class_name):
238 |         assert "." not in source, "Source name cannot contain a dot"
239 |         # Does the class exist already?
240 |         for info in self.class_info:
241 |             if info['source'] == source and info["id"] == class_id:
242 |                 # source.class_id combination already available, skip
243 |                 return
244 |         # Add the class
245 |         self.class_info.append({
246 |             "source": source,
247 |             "id": class_id,
248 |             "name": class_name,
249 |         })
250 | 
251 |     def add_image(self, source, image_id, path, **kwargs):
252 |         image_info = {
253 |             "id": image_id,
254 |             "source": source,
255 |             "path": path,
256 |         }
257 |         image_info.update(kwargs)
258 |         self.image_info.append(image_info)
259 | 
260 |     def image_reference(self, image_id):
261 |         """Return a link to the image in its source Website or details about
262 |         the image that help looking it up or debugging it.
263 | 
264 |         Override for your dataset, but pass to this function
265 |         if you encounter images not in your dataset.
266 |         """
267 |         return ""
268 | 
269 |     def prepare(self, class_map=None):
270 |         """Prepares the Dataset class for use.
271 | 
272 |         TODO: class map is not supported yet. When done, it should handle mapping
273 |               classes from different datasets to the same class ID.
274 |         """
275 |         def clean_name(name):
276 |             """Returns a shorter version of object names for cleaner display."""
277 |             return ",".join(name.split(",")[:1])
278 | 
279 |         # Build (or rebuild) everything else from the info dicts.
280 |         self.num_classes = len(self.class_info)
281 |         self.class_ids = np.arange(self.num_classes)
282 |         self.class_names = [clean_name(c["name"]) for c in self.class_info]
283 |         self.num_images = len(self.image_info)
284 |         self._image_ids = np.arange(self.num_images)
285 | 
286 |         self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
287 |                                       for info, id in zip(self.class_info, self.class_ids)}
288 | 
289 |         # Map sources to class_ids they support
290 |         self.sources = list(set([i['source'] for i in self.class_info]))
291 |         self.source_class_ids = {}
292 |         # Loop over datasets
293 |         for source in self.sources:
294 |             self.source_class_ids[source] = []
295 |             # Find classes that belong to this dataset
296 |             for i, info in enumerate(self.class_info):
297 |                 # Include BG class in all datasets
298 |                 if i == 0 or source == info['source']:
299 |                     self.source_class_ids[source].append(i)
300 | 
301 |     def map_source_class_id(self, source_class_id):
302 |         """Takes a source class ID and returns the int class ID assigned to it.
303 | 
304 |         For example:
305 |         dataset.map_source_class_id("coco.12") -> 23
306 |         """
307 |         return self.class_from_source_map[source_class_id]
308 | 
309 |     def get_source_class_id(self, class_id, source):
310 |         """Map an internal class ID to the corresponding class ID in the source dataset."""
311 |         info = self.class_info[class_id]
312 |         assert info['source'] == source
313 |         return info['id']
314 | 
315 |     def append_data(self, class_info, image_info):
316 |         self.external_to_class_id = {}
317 |         for i, c in enumerate(self.class_info):
318 |             for ds, id in c["map"]:
319 |                 self.external_to_class_id[ds + str(id)] = i
320 | 
321 |         # Map external image IDs to internal ones.
322 |         self.external_to_image_id = {}
323 |         for i, info in enumerate(self.image_info):
324 |             self.external_to_image_id[info["ds"] + str(info["id"])] = i
325 | 
326 |     @property
327 |     def image_ids(self):
328 |         return self._image_ids
329 | 
330 |     def source_image_link(self, image_id):
331 |         """Returns the path or URL to the image.
332 |         Override this to return a URL to the image if it's availble online for easy
333 |         debugging.
334 |         """
335 |         return self.image_info[image_id]["path"]
336 | 
337 |     def load_image(self, image_id):
338 |         """Load the specified image and return a [H,W,3] Numpy array.
339 |         """
340 |         # Load image
341 |         image = skimage.io.imread(self.image_info[image_id]['path'])
342 |         # If grayscale. Convert to RGB for consistency.
343 |         if image.ndim != 3:
344 |             image = skimage.color.gray2rgb(image)
345 |         return image
346 | 
347 |     def load_mask(self, image_id):
348 |         """Load instance masks for the given image.
349 | 
350 |         Different datasets use different ways to store masks. Override this
351 |         method to load instance masks and return them in the form of am
352 |         array of binary masks of shape [height, width, instances].
353 | 
354 |         Returns:
355 |             masks: A bool array of shape [height, width, instance count] with
356 |                 a binary mask per instance.
357 |             class_ids: a 1D array of class IDs of the instance masks.
358 |         """
359 |         # Override this function to load a mask from your dataset.
360 |         # Otherwise, it returns an empty mask.
361 |         mask = np.empty([0, 0, 0])
362 |         class_ids = np.empty([0], np.int32)
363 |         return mask, class_ids
364 | 
365 | 
366 | def resize_image(image, min_dim=None, max_dim=None, padding=False):
367 |     """
368 |     Resizes an image keeping the aspect ratio.
369 | 
370 |     min_dim: if provided, resizes the image such that it's smaller
371 |         dimension == min_dim
372 |     max_dim: if provided, ensures that the image longest side doesn't
373 |         exceed this value.
374 |     padding: If true, pads image with zeros so it's size is max_dim x max_dim
375 | 
376 |     Returns:
377 |     image: the resized image
378 |     window: (y1, x1, y2, x2). If max_dim is provided, padding might
379 |         be inserted in the returned image. If so, this window is the
380 |         coordinates of the image part of the full image (excluding
381 |         the padding). The x2, y2 pixels are not included.
382 |     scale: The scale factor used to resize the image
383 |     padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
384 |     """
385 |     # Default window (y1, x1, y2, x2) and default scale == 1.
386 |     h, w = image.shape[:2]
387 |     window = (0, 0, h, w)
388 |     scale = 1
389 | 
390 |     # Scale?
391 |     if min_dim:
392 |         # Scale up but not down
393 |         scale = max(1, min_dim / min(h, w))
394 |     # Does it exceed max dim?
395 |     if max_dim:
396 |         image_max = max(h, w)
397 |         if round(image_max * scale) > max_dim:
398 |             scale = max_dim / image_max
399 |     # Resize image and mask
400 |     if scale != 1:
401 |         image = scipy.misc.imresize(
402 |             image, (round(h * scale), round(w * scale)))
403 |     # Need padding?
404 |     if padding:
405 |         # Get new height and width
406 |         h, w = image.shape[:2]
407 |         top_pad = (max_dim - h) // 2
408 |         bottom_pad = max_dim - h - top_pad
409 |         left_pad = (max_dim - w) // 2
410 |         right_pad = max_dim - w - left_pad
411 |         padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
412 |         image = np.pad(image, padding, mode='constant', constant_values=0)
413 |         window = (top_pad, left_pad, h + top_pad, w + left_pad)
414 |     return image, window, scale, padding
415 | 
416 | 
417 | def resize_mask(mask, scale, padding):
418 |     """Resizes a mask using the given scale and padding.
419 |     Typically, you get the scale and padding from resize_image() to
420 |     ensure both, the image and the mask, are resized consistently.
421 | 
422 |     scale: mask scaling factor
423 |     padding: Padding to add to the mask in the form
424 |             [(top, bottom), (left, right), (0, 0)]
425 |     """
426 |     h, w = mask.shape[:2]
427 |     mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
428 |     mask = np.pad(mask, padding, mode='constant', constant_values=0)
429 |     return mask
430 | 
431 | 
432 | def minimize_mask(bbox, mask, mini_shape):
433 |     """Resize masks to a smaller version to cut memory load.
434 |     Mini-masks can then resized back to image scale using expand_masks()
435 | 
436 |     See inspect_data.ipynb notebook for more details.
437 |     """
438 |     mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
439 |     for i in range(mask.shape[-1]):
440 |         m = mask[:, :, i]
441 |         y1, x1, y2, x2 = bbox[i][:4]
442 |         m = m[y1:y2, x1:x2]
443 |         if m.size == 0:
444 |             raise Exception("Invalid bounding box with area of zero")
445 |         m = scipy.misc.imresize(m.astype(float), mini_shape, interp='bilinear')
446 |         mini_mask[:, :, i] = np.where(m >= 128, 1, 0)
447 |     return mini_mask
448 | 
449 | 
450 | def expand_mask(bbox, mini_mask, image_shape):
451 |     """Resizes mini masks back to image size. Reverses the change
452 |     of minimize_mask().
453 | 
454 |     See inspect_data.ipynb notebook for more details.
455 |     """
456 |     mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
457 |     for i in range(mask.shape[-1]):
458 |         m = mini_mask[:, :, i]
459 |         y1, x1, y2, x2 = bbox[i][:4]
460 |         h = y2 - y1
461 |         w = x2 - x1
462 |         m = scipy.misc.imresize(m.astype(float), (h, w), interp='bilinear')
463 |         mask[y1:y2, x1:x2, i] = np.where(m >= 128, 1, 0)
464 |     return mask
465 | 
466 | 
467 | # TODO: Build and use this function to reduce code duplication
468 | def mold_mask(mask, config):
469 |     pass
470 | 
471 | 
472 | def unmold_mask(mask, bbox, image_shape):
473 |     """Converts a mask generated by the neural network into a format similar
474 |     to it's original shape.
475 |     mask: [height, width] of type float. A small, typically 28x28 mask.
476 |     bbox: [y1, x1, y2, x2]. The box to fit the mask in.
477 | 
478 |     Returns a binary mask with the same size as the original image.
479 |     """
480 |     threshold = 0.5
481 |     y1, x1, y2, x2 = bbox
482 |     mask = scipy.misc.imresize(
483 |         mask, (y2 - y1, x2 - x1), interp='bilinear').astype(np.float32) / 255.0
484 |     mask = np.where(mask >= threshold, 1, 0).astype(np.uint8)
485 | 
486 |     # Put the mask in the right location.
487 |     full_mask = np.zeros(image_shape[:2], dtype=np.uint8)
488 |     full_mask[y1:y2, x1:x2] = mask
489 |     return full_mask
490 | 
491 | 
492 | ############################################################
493 | #  Anchors
494 | ############################################################
495 | 
496 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
497 |     """
498 |     scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
499 |     ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
500 |     shape: [height, width] spatial shape of the feature map over which
501 |             to generate anchors.
502 |     feature_stride: Stride of the feature map relative to the image in pixels.
503 |     anchor_stride: Stride of anchors on the feature map. For example, if the
504 |         value is 2 then generate anchors for every other feature map pixel.
505 |     """
506 |     # Get all combinations of scales and ratios
507 |     scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
508 |     scales = scales.flatten()
509 |     ratios = ratios.flatten()
510 | 
511 |     # Enumerate heights and widths from scales and ratios
512 |     heights = scales / np.sqrt(ratios)
513 |     widths = scales * np.sqrt(ratios)
514 | 
515 |     # Enumerate shifts in feature space
516 |     shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
517 |     shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
518 |     shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
519 | 
520 |     # Enumerate combinations of shifts, widths, and heights
521 |     box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
522 |     box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
523 | 
524 |     # Reshape to get a list of (y, x) and a list of (h, w)
525 |     box_centers = np.stack(
526 |         [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
527 |     box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
528 | 
529 |     # Convert to corner coordinates (y1, x1, y2, x2)
530 |     boxes = np.concatenate([box_centers - 0.5 * box_sizes,
531 |                             box_centers + 0.5 * box_sizes], axis=1)
532 |     return boxes
533 | 
534 | 
535 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
536 |                              anchor_stride):
537 |     """Generate anchors at different levels of a feature pyramid. Each scale
538 |     is associated with a level of the pyramid, but each ratio is used in
539 |     all levels of the pyramid.
540 | 
541 |     Returns:
542 |     anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
543 |         with the same order of the given scales. So, anchors of scale[0] come
544 |         first, then anchors of scale[1], and so on.
545 |     """
546 |     # Anchors
547 |     # [anchor_count, (y1, x1, y2, x2)]
548 |     anchors = []
549 |     for i in range(len(scales)):
550 |         anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
551 |                                         feature_strides[i], anchor_stride))
552 |     return np.concatenate(anchors, axis=0)
553 | 
554 | 
555 | ############################################################
556 | #  Miscellaneous
557 | ############################################################
558 | 
559 | def trim_zeros(x):
560 |     """It's common to have tensors larger than the available data and
561 |     pad with zeros. This function removes rows that are all zeros.
562 | 
563 |     x: [rows, columns].
564 |     """
565 |     assert len(x.shape) == 2
566 |     return x[~np.all(x == 0, axis=1)]
567 | 
568 | 
569 | def compute_ap(gt_boxes, gt_class_ids,
570 |                pred_boxes, pred_class_ids, pred_scores,
571 |                iou_threshold=0.5):
572 |     """Compute Average Precision at a set IoU threshold (default 0.5).
573 | 
574 |     Returns:
575 |     mAP: Mean Average Precision
576 |     precisions: List of precisions at different class score thresholds.
577 |     recalls: List of recall values at different class score thresholds.
578 |     overlaps: [pred_boxes, gt_boxes] IoU overlaps.
579 |     """
580 |     # Trim zero padding and sort predictions by score from high to low
581 |     # TODO: cleaner to do zero unpadding upstream
582 |     gt_boxes = trim_zeros(gt_boxes)
583 |     pred_boxes = trim_zeros(pred_boxes)
584 |     pred_scores = pred_scores[:pred_boxes.shape[0]]
585 |     indices = np.argsort(pred_scores)[::-1]
586 |     pred_boxes = pred_boxes[indices]
587 |     pred_class_ids = pred_class_ids[indices]
588 |     pred_scores = pred_scores[indices]
589 | 
590 |     # Compute IoU overlaps [pred_boxes, gt_boxes]
591 |     overlaps = compute_overlaps(pred_boxes, gt_boxes)
592 | 
593 |     # Loop through ground truth boxes and find matching predictions
594 |     match_count = 0
595 |     pred_match = np.zeros([pred_boxes.shape[0]])
596 |     gt_match = np.zeros([gt_boxes.shape[0]])
597 |     for i in range(len(pred_boxes)):
598 |         # Find best matching ground truth box
599 |         sorted_ixs = np.argsort(overlaps[i])[::-1]
600 |         for j in sorted_ixs:
601 |             # If ground truth box is already matched, go to next one
602 |             if gt_match[j] == 1:
603 |                 continue
604 |             # If we reach IoU smaller than the threshold, end the loop
605 |             iou = overlaps[i, j]
606 |             if iou < iou_threshold:
607 |                 break
608 |             # Do we have a match?
609 |             if pred_class_ids[i] == gt_class_ids[j]:
610 |                 match_count += 1
611 |                 gt_match[j] = 1
612 |                 pred_match[i] = 1
613 |                 break
614 | 
615 |     # Compute precision and recall at each prediction box step
616 |     precisions = np.cumsum(pred_match) / (np.arange(len(pred_match)) + 1)
617 |     recalls = np.cumsum(pred_match).astype(np.float32) / len(gt_match)
618 | 
619 |     # Pad with start and end values to simplify the math
620 |     precisions = np.concatenate([[0], precisions, [0]])
621 |     recalls = np.concatenate([[0], recalls, [1]])
622 | 
623 |     # Ensure precision values decrease but don't increase. This way, the
624 |     # precision value at each recall threshold is the maximum it can be
625 |     # for all following recall thresholds, as specified by the VOC paper.
626 |     for i in range(len(precisions) - 2, -1, -1):
627 |         precisions[i] = np.maximum(precisions[i], precisions[i + 1])
628 | 
629 |     # Compute mean AP over recall range
630 |     indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
631 |     mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
632 |                  precisions[indices])
633 | 
634 |     return mAP, precisions, recalls, overlaps
635 | 
636 | 
637 | def compute_recall(pred_boxes, gt_boxes, iou):
638 |     """Compute the recall at the given IoU threshold. It's an indication
639 |     of how many GT boxes were found by the given prediction boxes.
640 | 
641 |     pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
642 |     gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
643 |     """
644 |     # Measure overlaps
645 |     overlaps = compute_overlaps(pred_boxes, gt_boxes)
646 |     iou_max = np.max(overlaps, axis=1)
647 |     iou_argmax = np.argmax(overlaps, axis=1)
648 |     positive_ids = np.where(iou_max >= iou)[0]
649 |     matched_gt_boxes = iou_argmax[positive_ids]
650 | 
651 |     recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
652 |     return recall, positive_ids
653 | 
654 | 
655 | # ## Batch Slicing
656 | # Some custom layers support a batch size of 1 only, and require a lot of work
657 | # to support batches greater than 1. This function slices an input tensor
658 | # across the batch dimension and feeds batches of size 1. Effectively,
659 | # an easy way to support batches > 1 quickly with little code modification.
660 | # In the long run, it's more efficient to modify the code to support large
661 | # batches and getting rid of this function. Consider this a temporary solution
662 | def batch_slice(inputs, graph_fn, batch_size, names=None):
663 |     """Splits inputs into slices and feeds each slice to a copy of the given
664 |     computation graph and then combines the results. It allows you to run a
665 |     graph on a batch of inputs even if the graph is written to support one
666 |     instance only.
667 | 
668 |     inputs: list of tensors. All must have the same first dimension length
669 |     graph_fn: A function that returns a TF tensor that's part of a graph.
670 |     batch_size: number of slices to divide the data into.
671 |     names: If provided, assigns names to the resulting tensors.
672 |     """
673 |     if not isinstance(inputs, list):
674 |         inputs = [inputs]
675 | 
676 |     outputs = []
677 |     for i in range(batch_size):
678 |         inputs_slice = [x[i] for x in inputs]
679 |         output_slice = graph_fn(*inputs_slice)
680 |         if not isinstance(output_slice, (tuple, list)):
681 |             output_slice = [output_slice]
682 |         outputs.append(output_slice)
683 |     # Change outputs from a list of slices where each is
684 |     # a list of outputs to a list of outputs and each has
685 |     # a list of slices
686 |     outputs = list(zip(*outputs))
687 | 
688 |     if names is None:
689 |         names = [None] * len(outputs)
690 | 
691 |     result = [tf.stack(o, axis=0, name=n)
692 |               for o, n in zip(outputs, names)]
693 |     if len(result) == 1:
694 |         result = result[0]
695 | 
696 |     return result
697 | 
698 | 
699 | def download_trained_weights(coco_model_path, verbose=1):
700 |     """Download COCO trained weights from Releases.
701 | 
702 |     coco_model_path: local path of COCO trained weights
703 |     """
704 |     if verbose > 0:
705 |         print("Downloading pretrained model to " + coco_model_path + " ...")
706 |     with contextlib.closing(request.urlopen(COCO_MODEL_URL)) as resp, open(coco_model_path, 'wb') as out:
707 |         shutil.copyfileobj(resp, out)
708 |     if verbose > 0:
709 |         print("... done downloading pretrained model!")
710 | 


--------------------------------------------------------------------------------
/src/mask_rcnn_ros/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Common utility functions and classes.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import sys
 11 | import os
 12 | import math
 13 | import random
 14 | import numpy as np
 15 | import tensorflow as tf
 16 | import scipy.misc
 17 | import skimage.color
 18 | import skimage.io
 19 | import urllib.request
 20 | import shutil
 21 | 
 22 | # URL from which to download the latest COCO trained weights
 23 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
 24 | 
 25 | 
 26 | ############################################################
 27 | #  Bounding Boxes
 28 | ############################################################
 29 | 
 30 | def extract_bboxes(mask):
 31 |     """Compute bounding boxes from masks.
 32 |     mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
 33 | 
 34 |     Returns: bbox array [num_instances, (y1, x1, y2, x2)].
 35 |     """
 36 |     boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
 37 |     for i in range(mask.shape[-1]):
 38 |         m = mask[:, :, i]
 39 |         # Bounding box.
 40 |         horizontal_indicies = np.where(np.any(m, axis=0))[0]
 41 |         vertical_indicies = np.where(np.any(m, axis=1))[0]
 42 |         if horizontal_indicies.shape[0]:
 43 |             x1, x2 = horizontal_indicies[[0, -1]]
 44 |             y1, y2 = vertical_indicies[[0, -1]]
 45 |             # x2 and y2 should not be part of the box. Increment by 1.
 46 |             x2 += 1
 47 |             y2 += 1
 48 |         else:
 49 |             # No mask for this instance. Might happen due to
 50 |             # resizing or cropping. Set bbox to zeros
 51 |             x1, x2, y1, y2 = 0, 0, 0, 0
 52 |         boxes[i] = np.array([y1, x1, y2, x2])
 53 |     return boxes.astype(np.int32)
 54 | 
 55 | 
 56 | def compute_iou(box, boxes, box_area, boxes_area):
 57 |     """Calculates IoU of the given box with the array of the given boxes.
 58 |     box: 1D vector [y1, x1, y2, x2]
 59 |     boxes: [boxes_count, (y1, x1, y2, x2)]
 60 |     box_area: float. the area of 'box'
 61 |     boxes_area: array of length boxes_count.
 62 | 
 63 |     Note: the areas are passed in rather than calculated here for
 64 |           efficency. Calculate once in the caller to avoid duplicate work.
 65 |     """
 66 |     # Calculate intersection areas
 67 |     y1 = np.maximum(box[0], boxes[:, 0])
 68 |     y2 = np.minimum(box[2], boxes[:, 2])
 69 |     x1 = np.maximum(box[1], boxes[:, 1])
 70 |     x2 = np.minimum(box[3], boxes[:, 3])
 71 |     intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
 72 |     union = box_area + boxes_area[:] - intersection[:]
 73 |     iou = intersection / union
 74 |     return iou
 75 | 
 76 | 
 77 | def compute_overlaps(boxes1, boxes2):
 78 |     """Computes IoU overlaps between two sets of boxes.
 79 |     boxes1, boxes2: [N, (y1, x1, y2, x2)].
 80 | 
 81 |     For better performance, pass the largest set first and the smaller second.
 82 |     """
 83 |     # Areas of anchors and GT boxes
 84 |     area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
 85 |     area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
 86 | 
 87 |     # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
 88 |     # Each cell contains the IoU value.
 89 |     overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
 90 |     for i in range(overlaps.shape[1]):
 91 |         box2 = boxes2[i]
 92 |         overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
 93 |     return overlaps
 94 | 
 95 | 
 96 | def compute_overlaps_masks(masks1, masks2):
 97 |     '''Computes IoU overlaps between two sets of masks.
 98 |     masks1, masks2: [Height, Width, instances]
 99 |     '''
100 |     # flatten masks
101 |     masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
102 |     masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
103 |     area1 = np.sum(masks1, axis=0)
104 |     area2 = np.sum(masks2, axis=0)
105 | 
106 |     # intersections and union
107 |     intersections = np.dot(masks1.T, masks2)
108 |     union = area1[:, None] + area2[None, :] - intersections
109 |     overlaps = intersections / union
110 | 
111 |     return overlaps
112 | 
113 | 
114 | def non_max_suppression(boxes, scores, threshold):
115 |     """Performs non-maximum supression and returns indicies of kept boxes.
116 |     boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
117 |     scores: 1-D array of box scores.
118 |     threshold: Float. IoU threshold to use for filtering.
119 |     """
120 |     assert boxes.shape[0] > 0
121 |     if boxes.dtype.kind != "f":
122 |         boxes = boxes.astype(np.float32)
123 | 
124 |     # Compute box areas
125 |     y1 = boxes[:, 0]
126 |     x1 = boxes[:, 1]
127 |     y2 = boxes[:, 2]
128 |     x2 = boxes[:, 3]
129 |     area = (y2 - y1) * (x2 - x1)
130 | 
131 |     # Get indicies of boxes sorted by scores (highest first)
132 |     ixs = scores.argsort()[::-1]
133 | 
134 |     pick = []
135 |     while len(ixs) > 0:
136 |         # Pick top box and add its index to the list
137 |         i = ixs[0]
138 |         pick.append(i)
139 |         # Compute IoU of the picked box with the rest
140 |         iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
141 |         # Identify boxes with IoU over the threshold. This
142 |         # returns indicies into ixs[1:], so add 1 to get
143 |         # indicies into ixs.
144 |         remove_ixs = np.where(iou > threshold)[0] + 1
145 |         # Remove indicies of the picked and overlapped boxes.
146 |         ixs = np.delete(ixs, remove_ixs)
147 |         ixs = np.delete(ixs, 0)
148 |     return np.array(pick, dtype=np.int32)
149 | 
150 | 
151 | def apply_box_deltas(boxes, deltas):
152 |     """Applies the given deltas to the given boxes.
153 |     boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
154 |     deltas: [N, (dy, dx, log(dh), log(dw))]
155 |     """
156 |     boxes = boxes.astype(np.float32)
157 |     # Convert to y, x, h, w
158 |     height = boxes[:, 2] - boxes[:, 0]
159 |     width = boxes[:, 3] - boxes[:, 1]
160 |     center_y = boxes[:, 0] + 0.5 * height
161 |     center_x = boxes[:, 1] + 0.5 * width
162 |     # Apply deltas
163 |     center_y += deltas[:, 0] * height
164 |     center_x += deltas[:, 1] * width
165 |     height *= np.exp(deltas[:, 2])
166 |     width *= np.exp(deltas[:, 3])
167 |     # Convert back to y1, x1, y2, x2
168 |     y1 = center_y - 0.5 * height
169 |     x1 = center_x - 0.5 * width
170 |     y2 = y1 + height
171 |     x2 = x1 + width
172 |     return np.stack([y1, x1, y2, x2], axis=1)
173 | 
174 | 
175 | def box_refinement_graph(box, gt_box):
176 |     """Compute refinement needed to transform box to gt_box.
177 |     box and gt_box are [N, (y1, x1, y2, x2)]
178 |     """
179 |     box = tf.cast(box, tf.float32)
180 |     gt_box = tf.cast(gt_box, tf.float32)
181 | 
182 |     height = box[:, 2] - box[:, 0]
183 |     width = box[:, 3] - box[:, 1]
184 |     center_y = box[:, 0] + 0.5 * height
185 |     center_x = box[:, 1] + 0.5 * width
186 | 
187 |     gt_height = gt_box[:, 2] - gt_box[:, 0]
188 |     gt_width = gt_box[:, 3] - gt_box[:, 1]
189 |     gt_center_y = gt_box[:, 0] + 0.5 * gt_height
190 |     gt_center_x = gt_box[:, 1] + 0.5 * gt_width
191 | 
192 |     dy = (gt_center_y - center_y) / height
193 |     dx = (gt_center_x - center_x) / width
194 |     dh = tf.log(gt_height / height)
195 |     dw = tf.log(gt_width / width)
196 | 
197 |     result = tf.stack([dy, dx, dh, dw], axis=1)
198 |     return result
199 | 
200 | 
201 | def box_refinement(box, gt_box):
202 |     """Compute refinement needed to transform box to gt_box.
203 |     box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
204 |     assumed to be outside the box.
205 |     """
206 |     box = box.astype(np.float32)
207 |     gt_box = gt_box.astype(np.float32)
208 | 
209 |     height = box[:, 2] - box[:, 0]
210 |     width = box[:, 3] - box[:, 1]
211 |     center_y = box[:, 0] + 0.5 * height
212 |     center_x = box[:, 1] + 0.5 * width
213 | 
214 |     gt_height = gt_box[:, 2] - gt_box[:, 0]
215 |     gt_width = gt_box[:, 3] - gt_box[:, 1]
216 |     gt_center_y = gt_box[:, 0] + 0.5 * gt_height
217 |     gt_center_x = gt_box[:, 1] + 0.5 * gt_width
218 | 
219 |     dy = (gt_center_y - center_y) / height
220 |     dx = (gt_center_x - center_x) / width
221 |     dh = np.log(gt_height / height)
222 |     dw = np.log(gt_width / width)
223 | 
224 |     return np.stack([dy, dx, dh, dw], axis=1)
225 | 
226 | 
227 | ############################################################
228 | #  Dataset
229 | ############################################################
230 | 
231 | class Dataset(object):
232 |     """The base class for dataset classes.
233 |     To use it, create a new class that adds functions specific to the dataset
234 |     you want to use. For example:
235 | 
236 |     class CatsAndDogsDataset(Dataset):
237 |         def load_cats_and_dogs(self):
238 |             ...
239 |         def load_mask(self, image_id):
240 |             ...
241 |         def image_reference(self, image_id):
242 |             ...
243 | 
244 |     See COCODataset and ShapesDataset as examples.
245 |     """
246 | 
247 |     def __init__(self, class_map=None):
248 |         self._image_ids = []
249 |         self.image_info = []
250 |         # Background is always the first class
251 |         self.class_info = [{"source": "", "id": 0, "name": "BG"}]
252 |         self.source_class_ids = {}
253 | 
254 |     def add_class(self, source, class_id, class_name):
255 |         assert "." not in source, "Source name cannot contain a dot"
256 |         # Does the class exist already?
257 |         for info in self.class_info:
258 |             if info['source'] == source and info["id"] == class_id:
259 |                 # source.class_id combination already available, skip
260 |                 return
261 |         # Add the class
262 |         self.class_info.append({
263 |             "source": source,
264 |             "id": class_id,
265 |             "name": class_name,
266 |         })
267 | 
268 |     def add_image(self, source, image_id, path, **kwargs):
269 |         image_info = {
270 |             "id": image_id,
271 |             "source": source,
272 |             "path": path,
273 |         }
274 |         image_info.update(kwargs)
275 |         self.image_info.append(image_info)
276 | 
277 |     def image_reference(self, image_id):
278 |         """Return a link to the image in its source Website or details about
279 |         the image that help looking it up or debugging it.
280 | 
281 |         Override for your dataset, but pass to this function
282 |         if you encounter images not in your dataset.
283 |         """
284 |         return ""
285 | 
286 |     def prepare(self, class_map=None):
287 |         """Prepares the Dataset class for use.
288 | 
289 |         TODO: class map is not supported yet. When done, it should handle mapping
290 |               classes from different datasets to the same class ID.
291 |         """
292 | 
293 |         def clean_name(name):
294 |             """Returns a shorter version of object names for cleaner display."""
295 |             return ",".join(name.split(",")[:1])
296 | 
297 |         # Build (or rebuild) everything else from the info dicts.
298 |         self.num_classes = len(self.class_info)
299 |         self.class_ids = np.arange(self.num_classes)
300 |         self.class_names = [clean_name(c["name"]) for c in self.class_info]
301 |         self.num_images = len(self.image_info)
302 |         self._image_ids = np.arange(self.num_images)
303 | 
304 |         self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
305 |                                       for info, id in zip(self.class_info, self.class_ids)}
306 | 
307 |         # Map sources to class_ids they support
308 |         self.sources = list(set([i['source'] for i in self.class_info]))
309 |         self.source_class_ids = {}
310 |         # Loop over datasets
311 |         for source in self.sources:
312 |             self.source_class_ids[source] = []
313 |             # Find classes that belong to this dataset
314 |             for i, info in enumerate(self.class_info):
315 |                 # Include BG class in all datasets
316 |                 if i == 0 or source == info['source']:
317 |                     self.source_class_ids[source].append(i)
318 | 
319 |     def map_source_class_id(self, source_class_id):
320 |         """Takes a source class ID and returns the int class ID assigned to it.
321 | 
322 |         For example:
323 |         dataset.map_source_class_id("coco.12") -> 23
324 |         """
325 |         return self.class_from_source_map[source_class_id]
326 | 
327 |     def get_source_class_id(self, class_id, source):
328 |         """Map an internal class ID to the corresponding class ID in the source dataset."""
329 |         info = self.class_info[class_id]
330 |         assert info['source'] == source
331 |         return info['id']
332 | 
333 |     def append_data(self, class_info, image_info):
334 |         self.external_to_class_id = {}
335 |         for i, c in enumerate(self.class_info):
336 |             for ds, id in c["map"]:
337 |                 self.external_to_class_id[ds + str(id)] = i
338 | 
339 |         # Map external image IDs to internal ones.
340 |         self.external_to_image_id = {}
341 |         for i, info in enumerate(self.image_info):
342 |             self.external_to_image_id[info["ds"] + str(info["id"])] = i
343 | 
344 |     @property
345 |     def image_ids(self):
346 |         return self._image_ids
347 | 
348 |     def source_image_link(self, image_id):
349 |         """Returns the path or URL to the image.
350 |         Override this to return a URL to the image if it's availble online for easy
351 |         debugging.
352 |         """
353 |         return self.image_info[image_id]["path"]
354 | 
355 |     def load_image(self, image_id):
356 |         """Load the specified image and return a [H,W,3] Numpy array.
357 |         """
358 |         # Load image
359 |         image = skimage.io.imread(self.image_info[image_id]['path'])
360 |         # If grayscale. Convert to RGB for consistency.
361 |         if image.ndim != 3:
362 |             image = skimage.color.gray2rgb(image)
363 |         return image
364 | 
365 |     def load_mask(self, image_id):
366 |         """Load instance masks for the given image.
367 | 
368 |         Different datasets use different ways to store masks. Override this
369 |         method to load instance masks and return them in the form of am
370 |         array of binary masks of shape [height, width, instances].
371 | 
372 |         Returns:
373 |             masks: A bool array of shape [height, width, instance count] with
374 |                 a binary mask per instance.
375 |             class_ids: a 1D array of class IDs of the instance masks.
376 |         """
377 |         # Override this function to load a mask from your dataset.
378 |         # Otherwise, it returns an empty mask.
379 |         mask = np.empty([0, 0, 0])
380 |         class_ids = np.empty([0], np.int32)
381 |         return mask, class_ids
382 | 
383 | 
384 | def resize_image(image, min_dim=None, max_dim=None, padding=False):
385 |     """
386 |     Resizes an image keeping the aspect ratio.
387 | 
388 |     min_dim: if provided, resizes the image such that it's smaller
389 |         dimension == min_dim
390 |     max_dim: if provided, ensures that the image longest side doesn't
391 |         exceed this value.
392 |     padding: If true, pads image with zeros so it's size is max_dim x max_dim
393 | 
394 |     Returns:
395 |     image: the resized image
396 |     window: (y1, x1, y2, x2). If max_dim is provided, padding might
397 |         be inserted in the returned image. If so, this window is the
398 |         coordinates of the image part of the full image (excluding
399 |         the padding). The x2, y2 pixels are not included.
400 |     scale: The scale factor used to resize the image
401 |     padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
402 |     """
403 |     # Default window (y1, x1, y2, x2) and default scale == 1.
404 |     h, w = image.shape[:2]
405 |     window = (0, 0, h, w)
406 |     scale = 1
407 | 
408 |     # Scale?
409 |     if min_dim:
410 |         # Scale up but not down
411 |         scale = max(1, min_dim / min(h, w))
412 |     # Does it exceed max dim?
413 |     if max_dim:
414 |         image_max = max(h, w)
415 |         if round(image_max * scale) > max_dim:
416 |             scale = max_dim / image_max
417 |     # Resize image and mask
418 |     if scale != 1:
419 |         image = scipy.misc.imresize(
420 |             image, (round(h * scale), round(w * scale)))
421 |     # Need padding?
422 |     if padding:
423 |         # Get new height and width
424 |         h, w = image.shape[:2]
425 |         top_pad = (max_dim - h) // 2
426 |         bottom_pad = max_dim - h - top_pad
427 |         left_pad = (max_dim - w) // 2
428 |         right_pad = max_dim - w - left_pad
429 |         padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
430 |         image = np.pad(image, padding, mode='constant', constant_values=0)
431 |         window = (top_pad, left_pad, h + top_pad, w + left_pad)
432 |     return image, window, scale, padding
433 | 
434 | 
435 | def resize_mask(mask, scale, padding):
436 |     """Resizes a mask using the given scale and padding.
437 |     Typically, you get the scale and padding from resize_image() to
438 |     ensure both, the image and the mask, are resized consistently.
439 | 
440 |     scale: mask scaling factor
441 |     padding: Padding to add to the mask in the form
442 |             [(top, bottom), (left, right), (0, 0)]
443 |     """
444 |     h, w = mask.shape[:2]
445 |     mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
446 |     mask = np.pad(mask, padding, mode='constant', constant_values=0)
447 |     return mask
448 | 
449 | 
450 | def minimize_mask(bbox, mask, mini_shape):
451 |     """Resize masks to a smaller version to cut memory load.
452 |     Mini-masks can then resized back to image scale using expand_masks()
453 | 
454 |     See inspect_data.ipynb notebook for more details.
455 |     """
456 |     mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
457 |     for i in range(mask.shape[-1]):
458 |         m = mask[:, :, i]
459 |         y1, x1, y2, x2 = bbox[i][:4]
460 |         m = m[y1:y2, x1:x2]
461 |         if m.size == 0:
462 |             raise Exception("Invalid bounding box with area of zero")
463 |         m = scipy.misc.imresize(m.astype(float), mini_shape, interp='bilinear')
464 |         mini_mask[:, :, i] = np.where(m >= 128, 1, 0)
465 |     return mini_mask
466 | 
467 | 
468 | def expand_mask(bbox, mini_mask, image_shape):
469 |     """Resizes mini masks back to image size. Reverses the change
470 |     of minimize_mask().
471 | 
472 |     See inspect_data.ipynb notebook for more details.
473 |     """
474 |     mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
475 |     for i in range(mask.shape[-1]):
476 |         m = mini_mask[:, :, i]
477 |         y1, x1, y2, x2 = bbox[i][:4]
478 |         h = y2 - y1
479 |         w = x2 - x1
480 |         m = scipy.misc.imresize(m.astype(float), (h, w), interp='bilinear')
481 |         mask[y1:y2, x1:x2, i] = np.where(m >= 128, 1, 0)
482 |     return mask
483 | 
484 | 
485 | # TODO: Build and use this function to reduce code duplication
486 | def mold_mask(mask, config):
487 |     pass
488 | 
489 | 
490 | def unmold_mask(mask, bbox, image_shape):
491 |     """Converts a mask generated by the neural network into a format similar
492 |     to it's original shape.
493 |     mask: [height, width] of type float. A small, typically 28x28 mask.
494 |     bbox: [y1, x1, y2, x2]. The box to fit the mask in.
495 | 
496 |     Returns a binary mask with the same size as the original image.
497 |     """
498 |     threshold = 0.5
499 |     y1, x1, y2, x2 = bbox
500 |     mask = scipy.misc.imresize(
501 |         mask, (y2 - y1, x2 - x1), interp='bilinear').astype(np.float32) / 255.0
502 |     mask = np.where(mask >= threshold, 1, 0).astype(np.uint8)
503 | 
504 |     # Put the mask in the right location.
505 |     full_mask = np.zeros(image_shape[:2], dtype=np.uint8)
506 |     full_mask[y1:y2, x1:x2] = mask
507 |     return full_mask
508 | 
509 | 
510 | ############################################################
511 | #  Anchors
512 | ############################################################
513 | 
514 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
515 |     """
516 |     scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
517 |     ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
518 |     shape: [height, width] spatial shape of the feature map over which
519 |             to generate anchors.
520 |     feature_stride: Stride of the feature map relative to the image in pixels.
521 |     anchor_stride: Stride of anchors on the feature map. For example, if the
522 |         value is 2 then generate anchors for every other feature map pixel.
523 |     """
524 |     # Get all combinations of scales and ratios
525 |     scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
526 |     scales = scales.flatten()
527 |     ratios = ratios.flatten()
528 | 
529 |     # Enumerate heights and widths from scales and ratios
530 |     heights = scales / np.sqrt(ratios)
531 |     widths = scales * np.sqrt(ratios)
532 | 
533 |     # Enumerate shifts in feature space
534 |     shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
535 |     shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
536 |     shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
537 | 
538 |     # Enumerate combinations of shifts, widths, and heights
539 |     box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
540 |     box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
541 | 
542 |     # Reshape to get a list of (y, x) and a list of (h, w)
543 |     box_centers = np.stack(
544 |         [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
545 |     box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
546 | 
547 |     # Convert to corner coordinates (y1, x1, y2, x2)
548 |     boxes = np.concatenate([box_centers - 0.5 * box_sizes,
549 |                             box_centers + 0.5 * box_sizes], axis=1)
550 |     return boxes
551 | 
552 | 
553 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
554 |                              anchor_stride):
555 |     """Generate anchors at different levels of a feature pyramid. Each scale
556 |     is associated with a level of the pyramid, but each ratio is used in
557 |     all levels of the pyramid.
558 | 
559 |     Returns:
560 |     anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
561 |         with the same order of the given scales. So, anchors of scale[0] come
562 |         first, then anchors of scale[1], and so on.
563 |     """
564 |     # Anchors
565 |     # [anchor_count, (y1, x1, y2, x2)]
566 |     anchors = []
567 |     for i in range(len(scales)):
568 |         anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
569 |                                         feature_strides[i], anchor_stride))
570 |     return np.concatenate(anchors, axis=0)
571 | 
572 | 
573 | ############################################################
574 | #  Miscellaneous
575 | ############################################################
576 | 
577 | def trim_zeros(x):
578 |     """It's common to have tensors larger than the available data and
579 |     pad with zeros. This function removes rows that are all zeros.
580 | 
581 |     x: [rows, columns].
582 |     """
583 |     assert len(x.shape) == 2
584 |     return x[~np.all(x == 0, axis=1)]
585 | 
586 | 
587 | def compute_ap(gt_boxes, gt_class_ids, gt_masks,
588 |                pred_boxes, pred_class_ids, pred_scores, pred_masks,
589 |                iou_threshold=0.5):
590 |     """Compute Average Precision at a set IoU threshold (default 0.5).
591 | 
592 |     Returns:
593 |     mAP: Mean Average Precision
594 |     precisions: List of precisions at different class score thresholds.
595 |     recalls: List of recall values at different class score thresholds.
596 |     overlaps: [pred_boxes, gt_boxes] IoU overlaps.
597 |     """
598 |     # Trim zero padding and sort predictions by score from high to low
599 |     # TODO: cleaner to do zero unpadding upstream
600 |     gt_boxes = trim_zeros(gt_boxes)
601 |     gt_masks = gt_masks[..., :gt_boxes.shape[0]]
602 |     pred_boxes = trim_zeros(pred_boxes)
603 |     pred_scores = pred_scores[:pred_boxes.shape[0]]
604 |     indices = np.argsort(pred_scores)[::-1]
605 |     pred_boxes = pred_boxes[indices]
606 |     pred_class_ids = pred_class_ids[indices]
607 |     pred_scores = pred_scores[indices]
608 |     pred_masks = pred_masks[..., indices]
609 | 
610 |     # Compute IoU overlaps [pred_masks, gt_masks]
611 |     overlaps = compute_overlaps_masks(pred_masks, gt_masks)
612 | 
613 |     # Loop through ground truth boxes and find matching predictions
614 |     match_count = 0
615 |     pred_match = np.zeros([pred_boxes.shape[0]])
616 |     gt_match = np.zeros([gt_boxes.shape[0]])
617 |     for i in range(len(pred_boxes)):
618 |         # Find best matching ground truth box
619 |         sorted_ixs = np.argsort(overlaps[i])[::-1]
620 |         for j in sorted_ixs:
621 |             # If ground truth box is already matched, go to next one
622 |             if gt_match[j] == 1:
623 |                 continue
624 |             # If we reach IoU smaller than the threshold, end the loop
625 |             iou = overlaps[i, j]
626 |             if iou < iou_threshold:
627 |                 break
628 |             # Do we have a match?
629 |             if pred_class_ids[i] == gt_class_ids[j]:
630 |                 match_count += 1
631 |                 gt_match[j] = 1
632 |                 pred_match[i] = 1
633 |                 break
634 | 
635 |     # Compute precision and recall at each prediction box step
636 |     precisions = np.cumsum(pred_match) / (np.arange(len(pred_match)) + 1)
637 |     recalls = np.cumsum(pred_match).astype(np.float32) / len(gt_match)
638 | 
639 |     # Pad with start and end values to simplify the math
640 |     precisions = np.concatenate([[0], precisions, [0]])
641 |     recalls = np.concatenate([[0], recalls, [1]])
642 | 
643 |     # Ensure precision values decrease but don't increase. This way, the
644 |     # precision value at each recall threshold is the maximum it can be
645 |     # for all following recall thresholds, as specified by the VOC paper.
646 |     for i in range(len(precisions) - 2, -1, -1):
647 |         precisions[i] = np.maximum(precisions[i], precisions[i + 1])
648 | 
649 |     # Compute mean AP over recall range
650 |     indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
651 |     mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
652 |                  precisions[indices])
653 | 
654 |     return mAP, precisions, recalls, overlaps
655 | 
656 | 
657 | def compute_recall(pred_boxes, gt_boxes, iou):
658 |     """Compute the recall at the given IoU threshold. It's an indication
659 |     of how many GT boxes were found by the given prediction boxes.
660 | 
661 |     pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
662 |     gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
663 |     """
664 |     # Measure overlaps
665 |     overlaps = compute_overlaps(pred_boxes, gt_boxes)
666 |     iou_max = np.max(overlaps, axis=1)
667 |     iou_argmax = np.argmax(overlaps, axis=1)
668 |     positive_ids = np.where(iou_max >= iou)[0]
669 |     matched_gt_boxes = iou_argmax[positive_ids]
670 | 
671 |     recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
672 |     return recall, positive_ids
673 | 
674 | 
675 | # ## Batch Slicing
676 | # Some custom layers support a batch size of 1 only, and require a lot of work
677 | # to support batches greater than 1. This function slices an input tensor
678 | # across the batch dimension and feeds batches of size 1. Effectively,
679 | # an easy way to support batches > 1 quickly with little code modification.
680 | # In the long run, it's more efficient to modify the code to support large
681 | # batches and getting rid of this function. Consider this a temporary solution
682 | def batch_slice(inputs, graph_fn, batch_size, names=None):
683 |     """Splits inputs into slices and feeds each slice to a copy of the given
684 |     computation graph and then combines the results. It allows you to run a
685 |     graph on a batch of inputs even if the graph is written to support one
686 |     instance only.
687 | 
688 |     inputs: list of tensors. All must have the same first dimension length
689 |     graph_fn: A function that returns a TF tensor that's part of a graph.
690 |     batch_size: number of slices to divide the data into.
691 |     names: If provided, assigns names to the resulting tensors.
692 |     """
693 |     if not isinstance(inputs, list):
694 |         inputs = [inputs]
695 | 
696 |     outputs = []
697 |     for i in range(batch_size):
698 |         inputs_slice = [x[i] for x in inputs]
699 |         output_slice = graph_fn(*inputs_slice)
700 |         if not isinstance(output_slice, (tuple, list)):
701 |             output_slice = [output_slice]
702 |         outputs.append(output_slice)
703 |     # Change outputs from a list of slices where each is
704 |     # a list of outputs to a list of outputs and each has
705 |     # a list of slices
706 |     outputs = list(zip(*outputs))
707 | 
708 |     if names is None:
709 |         names = [None] * len(outputs)
710 | 
711 |     result = [tf.stack(o, axis=0, name=n)
712 |               for o, n in zip(outputs, names)]
713 |     if len(result) == 1:
714 |         result = result[0]
715 | 
716 |     return result
717 | 
718 | 
719 | def download_trained_weights(coco_model_path, verbose=1):
720 |     """Download COCO trained weights from Releases.
721 | 
722 |     coco_model_path: local path of COCO trained weights
723 |     """
724 |     if verbose > 0:
725 |         print("Downloading pretrained model to " + coco_model_path + " ...")
726 |     with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
727 |         shutil.copyfileobj(resp, out)
728 |     if verbose > 0:
729 |         print("... done downloading pretrained model!")
730 | 


--------------------------------------------------------------------------------