├── bags
└── .placeholder
├── nodes
├── __init__.py
├── coco.pyc
├── config.pyc
├── model.pyc
├── utils.pyc
├── visualize.pyc
├── __pycache__
│ ├── coco.cpython-36.pyc
│ ├── config.cpython-36.pyc
│ ├── model.cpython-36.pyc
│ ├── utils.cpython-36.pyc
│ └── visualize.cpython-36.pyc
├── config.py
├── parallel_model.py
├── mask_rcnn_node
├── shapes.py
├── visualize.py
├── coco.py
└── utils.py
├── src
└── mask_rcnn_ros
│ ├── __init__.py
│ ├── config.py
│ ├── parallel_model.py
│ ├── shapes.py
│ ├── visualize.py
│ ├── coco.py
│ └── utils.py
├── doc
├── mask_r-cnn_1.png
└── mask_r-cnn_2.png
├── scripts
└── download_freiburg3_rgbd_example_bag.sh
├── requirements.txt
├── setup.py
├── msg
└── Result.msg
├── launch
└── freiburg3_rgbd_example.launch
├── package.xml
├── CMakeLists.txt
├── .gitignore
├── LICENSE
├── LICENSE.Mask_R-CNN
├── README.md
└── rviz
└── mask_rcnn_ros.rviz
/bags/.placeholder:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/nodes/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/mask_rcnn_ros/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/nodes/coco.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/coco.pyc
--------------------------------------------------------------------------------
/nodes/config.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/config.pyc
--------------------------------------------------------------------------------
/nodes/model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/model.pyc
--------------------------------------------------------------------------------
/nodes/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/utils.pyc
--------------------------------------------------------------------------------
/nodes/visualize.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/visualize.pyc
--------------------------------------------------------------------------------
/doc/mask_r-cnn_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/doc/mask_r-cnn_1.png
--------------------------------------------------------------------------------
/doc/mask_r-cnn_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/doc/mask_r-cnn_2.png
--------------------------------------------------------------------------------
/nodes/__pycache__/coco.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/__pycache__/coco.cpython-36.pyc
--------------------------------------------------------------------------------
/nodes/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/__pycache__/config.cpython-36.pyc
--------------------------------------------------------------------------------
/nodes/__pycache__/model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/__pycache__/model.cpython-36.pyc
--------------------------------------------------------------------------------
/nodes/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/__pycache__/utils.cpython-36.pyc
--------------------------------------------------------------------------------
/nodes/__pycache__/visualize.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/__pycache__/visualize.cpython-36.pyc
--------------------------------------------------------------------------------
/scripts/download_freiburg3_rgbd_example_bag.sh:
--------------------------------------------------------------------------------
1 | !/bin/sh
2 | wget https://vision.in.tum.de/rgbd/dataset/freiburg3/rgbd_dataset_freiburg3_long_office_household.bag -P bags
3 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | h5py==2.7.0
2 | Keras==2.1.2
3 | numpy==1.13.3
4 | opencv-python==3.4.0.12
5 | scikit-image==0.13.0
6 | scikit-learn==0.19.1
7 | scipy==0.19.1
8 | matplotlib==2.2.3
9 | tensorflow-gpu==1.4.1
10 | ipython==5.2.0
11 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | ## ! DO NOT MANUALLY INVOKE THIS setup.py, USE CATKIN INSTEAD
2 |
3 | from distutils.core import setup
4 | from catkin_pkg.python_setup import generate_distutils_setup
5 |
6 | # fetch values from package.xml
7 | setup_args = generate_distutils_setup(
8 | packages=['mask_rcnn_ros',],
9 | package_dir={'': 'src'})
10 |
11 | setup(**setup_args)
12 |
--------------------------------------------------------------------------------
/msg/Result.msg:
--------------------------------------------------------------------------------
1 | std_msgs/Header header
2 |
3 | # Bounding boxes in pixels
4 | sensor_msgs/RegionOfInterest[] boxes
5 |
6 | # Integer class IDs for each bounding box
7 | int32[] class_ids
8 |
9 | # String class IDs for each bouding box
10 | string[] class_names
11 |
12 | # Float probability scores of the class_id
13 | float32[] scores
14 |
15 | # Instance masks as Image
16 | sensor_msgs/Image[] masks
17 |
18 |
--------------------------------------------------------------------------------
/launch/freiburg3_rgbd_example.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | mask_rcnn_ros
4 | 0.1.0
5 | The Mask R-CNN for ROS
6 |
7 | Akio Ochiai
8 |
9 | MIT
10 |
11 | http://wiki.ros.org/mask_rcnn_ros
12 |
13 | Akio Ochiai
14 | Matterport, Inc.
15 |
16 | catkin
17 | message_generation
18 |
19 | rospy
20 | message_runtime
21 | std_msgs
22 | sensor_msgs
23 | cv_bridge
24 | vision_opencv
25 |
26 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.8.3)
2 | project(mask_rcnn_ros)
3 |
4 | find_package(catkin REQUIRED COMPONENTS std_msgs sensor_msgs message_generation)
5 |
6 | catkin_python_setup()
7 |
8 | add_message_files(
9 | FILES
10 | Result.msg
11 | )
12 |
13 |
14 | generate_messages(
15 | DEPENDENCIES std_msgs sensor_msgs
16 | )
17 |
18 |
19 |
20 | catkin_package(CATKIN_DEPENDS message_runtime)
21 |
22 | #############
23 | ## Install ##
24 | #############
25 |
26 | install(PROGRAMS
27 | nodes/mask_rcnn_node
28 | DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
29 | )
30 |
31 | install(DIRECTORY
32 | msg
33 | DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
34 | )
35 |
36 |
37 | #############
38 | ## Testing ##
39 | #############
40 |
41 |
42 | ## Add folders to be run by python nosetests
43 | # catkin_add_nosetests(test)
44 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | devel/
2 | logs/
3 | build/
4 | bin/
5 | lib/
6 | msg_gen/
7 | srv_gen/
8 | msg/*Action.msg
9 | msg/*ActionFeedback.msg
10 | msg/*ActionGoal.msg
11 | msg/*ActionResult.msg
12 | msg/*Feedback.msg
13 | msg/*Goal.msg
14 | msg/*Result.msg
15 | msg/_*.py
16 | build_isolated/
17 | devel_isolated/
18 | src/CMakeLists.txt
19 | .catkin_workspace
20 | result/data/*.txt
21 | result/data/*.csv
22 | src/cmake-build-debug/
23 | src/.idea/
24 | src/multisensor/cmake-build-debug/
25 | src/multisensor/.idea/
26 | .vscode/
27 |
28 |
29 | # Generated by dynamic reconfigure
30 | *.cfgc
31 | /cfg/cpp/
32 | /cfg/*.py
33 |
34 | # Ignore generated docs
35 | *.dox
36 | *.wikidoc
37 |
38 | # eclipse stuff
39 | .project
40 | .cproject
41 |
42 | # qcreator stuff
43 | CMakeLists.txt.user
44 |
45 | srv/_*.py
46 | *.pcd
47 | *.pyc
48 | qtcreator-*
49 | *.user
50 |
51 | /planning/cfg
52 | /planning/docs
53 | /planning/src
54 |
55 | *~
56 |
57 | # Emacs
58 | .#*
59 |
60 | # Catkin custom files
61 | CATKIN_IGNORE
62 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | mask_rcnn_ros
2 |
3 | The MIT License (MIT)
4 |
5 | Copyright (c) 2017 Akio Ochiai, Inc.
6 |
7 | Permission is hereby granted, free of charge, to any person obtaining a copy
8 | of this software and associated documentation files (the "Software"), to deal
9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 |
--------------------------------------------------------------------------------
/LICENSE.Mask_R-CNN:
--------------------------------------------------------------------------------
1 | Mask R-CNN
2 |
3 | The MIT License (MIT)
4 |
5 | Copyright (c) 2017 Matterport, Inc.
6 |
7 | Permission is hereby granted, free of charge, to any person obtaining a copy
8 | of this software and associated documentation files (the "Software"), to deal
9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 |
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 |
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # The ROS Package of Mask R-CNN for Object Detection and Segmentation
2 |
3 | This is a ROS package of [Mask R-CNN](https://arxiv.org/abs/1703.06870) algorithm for object detection and segmentation.
4 |
5 | The package contains ROS node of Mask R-CNN with topic-based ROS interface.
6 |
7 | Most of core algorithm code was based on [Mask R-CNN implementation by Matterport, Inc. ](https://github.com/matterport/Mask_RCNN)
8 |
9 | ## Training
10 |
11 | This repository doesn't contain code for training Mask R-CNN network model.
12 | If you want to train the model on your own class definition or dataset, try it on [the upstream reposity](https://github.com/matterport/Mask_RCNN) and give the result weight to `model_path` parameter.
13 |
14 |
15 | ## Requirements
16 | * ROS Indigo/kinetic
17 | * TensorFlow 1.3+
18 | * Keras 2.0.8+
19 | * Numpy, skimage, scipy, Pillow, cython, h5py
20 | * I only test code on Python 2.7, it may work on Python3.X.
21 | * see more dependency and version details in [requirements.txt](https://github.com/qixuxiang/mask_rcnn_ros/blob/master/requirements.txt)
22 |
23 | ## ROS Interfaces
24 |
25 | ### Parameters
26 |
27 | * `~model_path: string`
28 |
29 | Path to the HDF5 model file.
30 | If the model_path is default value and the file doesn't exist, the node automatically downloads the file.
31 |
32 | Default: `$ROS_HOME/mask_rcnn_coco.h5`
33 |
34 | * `~visualization: bool`
35 |
36 | If true, the node publish visualized images to `~visualization` topic.
37 | Default: `true`
38 |
39 | * `~class_names: string[]`
40 |
41 | Class names to be treated as detection targets.
42 | Default: All MS COCO classes.
43 |
44 | ### Topics Published
45 |
46 | * `~result: mask_rcnn_ros/Result`
47 |
48 | Result of detection. See also `Result.msg` for detailed description.
49 |
50 | * `~visualization: sensor_mgs/Image`
51 |
52 | Visualized result over an input image.
53 |
54 |
55 | ### Topics Subscribed
56 |
57 | * `~input: sensor_msgs/Image`
58 |
59 | Input image to be proccessed
60 |
61 | ## Getting Started
62 |
63 | 1. Clone this repository to your catkin workspace, build workspace and source devel environment
64 | ```
65 | $ cd ~/.catkin_ws/src
66 | $ git clone https://github.com/qixuxiang/mask_rcnn_ros.git
67 | $ cd mask_rcnn_ros
68 | $ python2 -m pip install --upgrade pip
69 | $ python2 -m pip install -r requirements.txt
70 | $ cd ../..
71 | $ catkin_make
72 | $ source devel/setup.bash
73 |
74 | ```
75 |
76 | 2. Run mask_rcnn node
77 | ~~~bash
78 | $ rosrun mask_rcnn_ros mask_rcnn_node
79 | ~~~
80 |
81 | ## Example
82 |
83 | There is a simple example launch file using [RGB-D SLAM Dataset](https://vision.in.tum.de/data/datasets/rgbd-dataset/download).
84 |
85 | ~~~bash
86 | $ sudo chmod 777 scripts/download_freiburg3_rgbd_example_bag.sh
87 | $ ./scripts/download_freiburg3_rgbd_example_bag.sh
88 | $ roslaunch mask_rcnn_ros freiburg3_rgbd_example.launch
89 | ~~~
90 |
91 | Then RViz window will appear and show result like following:
92 |
93 | 
94 |
95 | 
96 |
97 | ## Other issue
98 |
99 | * If you have installed Anaconda|Python, Please delete or comment `export PATH=/home/soft/conda3/bin:$PATH` in you `~/.bashrc` file.
100 |
101 | * When you run the code, please wait for a moment for the result because there will be delay when play bag file and process the images.
102 |
103 | * Welcome to submit any issue if you have problems, and add your software system information details, such as Ubuntu 16/14,ROS Indigo/Kinetic, Python2/Python3, Tensorflow 1.4,etc..
104 |
--------------------------------------------------------------------------------
/rviz/mask_rcnn_ros.rviz:
--------------------------------------------------------------------------------
1 | Panels:
2 | - Class: rviz/Displays
3 | Help Height: 0
4 | Name: Displays
5 | Property Tree Widget:
6 | Expanded:
7 | - /Global Options1
8 | - /Status1
9 | - /Image1
10 | - /Image2
11 | Splitter Ratio: 0.755813956
12 | Tree Height: 614
13 | - Class: rviz/Selection
14 | Name: Selection
15 | - Class: rviz/Tool Properties
16 | Expanded:
17 | - /2D Pose Estimate1
18 | - /2D Nav Goal1
19 | - /Publish Point1
20 | Name: Tool Properties
21 | Splitter Ratio: 0.588679016
22 | - Class: rviz/Views
23 | Expanded:
24 | - /Current View1
25 | Name: Views
26 | Splitter Ratio: 0.5
27 | - Class: rviz/Time
28 | Experimental: false
29 | Name: Time
30 | SyncMode: 0
31 | SyncSource: Image
32 | Visualization Manager:
33 | Class: ""
34 | Displays:
35 | - Alpha: 0.5
36 | Cell Size: 1
37 | Class: rviz/Grid
38 | Color: 160; 160; 164
39 | Enabled: true
40 | Line Style:
41 | Line Width: 0.0299999993
42 | Value: Lines
43 | Name: Grid
44 | Normal Cell Count: 0
45 | Offset:
46 | X: 0
47 | Y: 0
48 | Z: 0
49 | Plane: XY
50 | Plane Cell Count: 10
51 | Reference Frame:
52 | Value: true
53 | - Class: rviz/Image
54 | Enabled: true
55 | Image Topic: /camera/rgb/image_color
56 | Max Value: 1
57 | Median window: 5
58 | Min Value: 0
59 | Name: Image
60 | Normalize Range: true
61 | Queue Size: 2
62 | Transport Hint: raw
63 | Unreliable: false
64 | Value: true
65 | - Class: rviz/Image
66 | Enabled: true
67 | Image Topic: /mask_rcnn/visualization
68 | Max Value: 1
69 | Median window: 5
70 | Min Value: 0
71 | Name: Image
72 | Normalize Range: true
73 | Queue Size: 2
74 | Transport Hint: raw
75 | Unreliable: false
76 | Value: true
77 | Enabled: true
78 | Global Options:
79 | Background Color: 48; 48; 48
80 | Fixed Frame: world
81 | Frame Rate: 30
82 | Name: root
83 | Tools:
84 | - Class: rviz/Interact
85 | Hide Inactive Objects: true
86 | - Class: rviz/MoveCamera
87 | - Class: rviz/Select
88 | - Class: rviz/FocusCamera
89 | - Class: rviz/Measure
90 | - Class: rviz/SetInitialPose
91 | Topic: /initialpose
92 | - Class: rviz/SetGoal
93 | Topic: /move_base_simple/goal
94 | - Class: rviz/PublishPoint
95 | Single click: true
96 | Topic: /clicked_point
97 | Value: true
98 | Views:
99 | Current:
100 | Class: rviz/Orbit
101 | Distance: 3.30293489
102 | Enable Stereo Rendering:
103 | Stereo Eye Separation: 0.0599999987
104 | Stereo Focal Distance: 1
105 | Swap Stereo Eyes: false
106 | Value: false
107 | Focal Point:
108 | X: 0.919049203
109 | Y: 0.11560297
110 | Z: 0.632362902
111 | Focal Shape Fixed Size: true
112 | Focal Shape Size: 0.0500000007
113 | Invert Z Axis: false
114 | Name: Current View
115 | Near Clip Distance: 0.00999999978
116 | Pitch: 0.185397774
117 | Target Frame:
118 | Value: Orbit (rviz)
119 | Yaw: 5.57856464
120 | Saved: ~
121 | Window Geometry:
122 | Displays:
123 | collapsed: false
124 | Height: 817
125 | Hide Left Dock: false
126 | Hide Right Dock: false
127 | Image:
128 | collapsed: false
129 | QMainWindow State: 000000ff00000000fd00000004000000000000016a000002a7fc0200000009fb0000001200530065006c0065006300740069006f006e00000001e10000009b0000006400fffffffb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001ed000001df00000185000000a3fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb000000100044006900730070006c0061007900730100000028000002a7000000dd00fffffffb0000000a0056006900650077007300000001ba000000f3000000b000fffffffb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c000002610000000100000216000002a7fc0200000005fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a0049006d0061006700650100000028000001430000001600fffffffb0000000a0049006d00610067006501000001710000015e0000001600fffffffb0000000a0049006d00610067006501000001fe000000af0000000000000000fb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e10000019700000003000004eb0000003efc0100000002fb0000000800540069006d00650100000000000004eb0000030000fffffffb0000000800540069006d006501000000000000045000000000000000000000015f000002a700000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000
130 | Selection:
131 | collapsed: false
132 | Time:
133 | collapsed: false
134 | Tool Properties:
135 | collapsed: false
136 | Views:
137 | collapsed: false
138 | Width: 1259
139 | X: 618
140 | Y: 126
141 |
--------------------------------------------------------------------------------
/nodes/config.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Base Configurations class.
4 |
5 | Copyright (c) 2017 Matterport, Inc.
6 | Licensed under the MIT License (see LICENSE for details)
7 | Written by Waleed Abdulla
8 | """
9 |
10 | import math
11 | import numpy as np
12 |
13 |
14 | # Base Configuration Class
15 | # Don't use this class directly. Instead, sub-class it and override
16 | # the configurations you need to change.
17 |
18 | class Config(object):
19 | """Base configuration class. For custom configurations, create a
20 | sub-class that inherits from this one and override properties
21 | that need to be changed.
22 | """
23 | # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
24 | # Useful if your code needs to do things differently depending on which
25 | # experiment is running.
26 | NAME = None # Override in sub-classes
27 |
28 | # NUMBER OF GPUs to use. For CPU training, use 1
29 | GPU_COUNT = 1
30 |
31 | # Number of images to train with on each GPU. A 12GB GPU can typically
32 | # handle 2 images of 1024x1024px.
33 | # Adjust based on your GPU memory and image sizes. Use the highest
34 | # number that your GPU can handle for best performance.
35 | IMAGES_PER_GPU = 2
36 |
37 | # Number of training steps per epoch
38 | # This doesn't need to match the size of the training set. Tensorboard
39 | # updates are saved at the end of each epoch, so setting this to a
40 | # smaller number means getting more frequent TensorBoard updates.
41 | # Validation stats are also calculated at each epoch end and they
42 | # might take a while, so don't set this too small to avoid spending
43 | # a lot of time on validation stats.
44 | STEPS_PER_EPOCH = 1000
45 |
46 | # Number of validation steps to run at the end of every training epoch.
47 | # A bigger number improves accuracy of validation stats, but slows
48 | # down the training.
49 | VALIDATION_STEPS = 50
50 |
51 | # The strides of each layer of the FPN Pyramid. These values
52 | # are based on a Resnet101 backbone.
53 | BACKBONE_STRIDES = [4, 8, 16, 32, 64]
54 |
55 | # Number of classification classes (including background)
56 | NUM_CLASSES = 1 # Override in sub-classes
57 |
58 | # Length of square anchor side in pixels
59 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
60 |
61 | # Ratios of anchors at each cell (width/height)
62 | # A value of 1 represents a square anchor, and 0.5 is a wide anchor
63 | RPN_ANCHOR_RATIOS = [0.5, 1, 2]
64 |
65 | # Anchor stride
66 | # If 1 then anchors are created for each cell in the backbone feature map.
67 | # If 2, then anchors are created for every other cell, and so on.
68 | RPN_ANCHOR_STRIDE = 1
69 |
70 | # Non-max suppression threshold to filter RPN proposals.
71 | # You can reduce this during training to generate more propsals.
72 | RPN_NMS_THRESHOLD = 0.7
73 |
74 | # How many anchors per image to use for RPN training
75 | RPN_TRAIN_ANCHORS_PER_IMAGE = 256
76 |
77 | # ROIs kept after non-maximum supression (training and inference)
78 | POST_NMS_ROIS_TRAINING = 2000
79 | POST_NMS_ROIS_INFERENCE = 1000
80 |
81 | # If enabled, resizes instance masks to a smaller size to reduce
82 | # memory load. Recommended when using high-resolution images.
83 | USE_MINI_MASK = True
84 | MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask
85 |
86 | # Input image resing
87 | # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and
88 | # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't
89 | # be satisfied together the IMAGE_MAX_DIM is enforced.
90 | IMAGE_MIN_DIM = 800
91 | IMAGE_MAX_DIM = 1024
92 | # If True, pad images with zeros such that they're (max_dim by max_dim)
93 | IMAGE_PADDING = True # currently, the False option is not supported
94 |
95 | # Image mean (RGB)
96 | MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
97 |
98 | # Number of ROIs per image to feed to classifier/mask heads
99 | # The Mask RCNN paper uses 512 but often the RPN doesn't generate
100 | # enough positive proposals to fill this and keep a positive:negative
101 | # ratio of 1:3. You can increase the number of proposals by adjusting
102 | # the RPN NMS threshold.
103 | TRAIN_ROIS_PER_IMAGE = 200
104 |
105 | # Percent of positive ROIs used to train classifier/mask heads
106 | ROI_POSITIVE_RATIO = 0.33
107 |
108 | # Pooled ROIs
109 | POOL_SIZE = 7
110 | MASK_POOL_SIZE = 14
111 | MASK_SHAPE = [28, 28]
112 |
113 | # Maximum number of ground truth instances to use in one image
114 | MAX_GT_INSTANCES = 100
115 |
116 | # Bounding box refinement standard deviation for RPN and final detections.
117 | RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
118 | BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
119 |
120 | # Max number of final detections
121 | DETECTION_MAX_INSTANCES = 100
122 |
123 | # Minimum probability value to accept a detected instance
124 | # ROIs below this threshold are skipped
125 | DETECTION_MIN_CONFIDENCE = 0.7
126 |
127 | # Non-maximum suppression threshold for detection
128 | DETECTION_NMS_THRESHOLD = 0.3
129 |
130 | # Learning rate and momentum
131 | # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
132 | # weights to explode. Likely due to differences in optimzer
133 | # implementation.
134 | LEARNING_RATE = 0.001
135 | LEARNING_MOMENTUM = 0.9
136 |
137 | # Weight decay regularization
138 | WEIGHT_DECAY = 0.0001
139 |
140 | # Use RPN ROIs or externally generated ROIs for training
141 | # Keep this True for most situations. Set to False if you want to train
142 | # the head branches on ROI generated by code rather than the ROIs from
143 | # the RPN. For example, to debug the classifier head without having to
144 | # train the RPN.
145 | USE_RPN_ROIS = True
146 |
147 | def __init__(self):
148 | """Set values of computed attributes."""
149 | # Effective batch size
150 | self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
151 |
152 | # Input image size
153 | self.IMAGE_SHAPE = np.array(
154 | [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
155 |
156 | # Compute backbone size from input image size
157 | self.BACKBONE_SHAPES = np.array(
158 | [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)),
159 | int(math.ceil(self.IMAGE_SHAPE[1] / stride))]
160 | for stride in self.BACKBONE_STRIDES])
161 |
162 | def display(self):
163 | """Display Configuration values."""
164 | print("\nConfigurations:")
165 | for a in dir(self):
166 | if not a.startswith("__") and not callable(getattr(self, a)):
167 | print("{:30} {}".format(a, getattr(self, a)))
168 | print("\n")
169 |
--------------------------------------------------------------------------------
/src/mask_rcnn_ros/config.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Base Configurations class.
4 |
5 | Copyright (c) 2017 Matterport, Inc.
6 | Licensed under the MIT License (see LICENSE for details)
7 | Written by Waleed Abdulla
8 | """
9 |
10 | import math
11 | import numpy as np
12 |
13 |
14 | # Base Configuration Class
15 | # Don't use this class directly. Instead, sub-class it and override
16 | # the configurations you need to change.
17 |
18 | class Config(object):
19 | """Base configuration class. For custom configurations, create a
20 | sub-class that inherits from this one and override properties
21 | that need to be changed.
22 | """
23 | # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
24 | # Useful if your code needs to do things differently depending on which
25 | # experiment is running.
26 | NAME = None # Override in sub-classes
27 |
28 | # NUMBER OF GPUs to use. For CPU training, use 1
29 | GPU_COUNT = 1
30 |
31 | # Number of images to train with on each GPU. A 12GB GPU can typically
32 | # handle 2 images of 1024x1024px.
33 | # Adjust based on your GPU memory and image sizes. Use the highest
34 | # number that your GPU can handle for best performance.
35 | IMAGES_PER_GPU = 2
36 |
37 | # Number of training steps per epoch
38 | # This doesn't need to match the size of the training set. Tensorboard
39 | # updates are saved at the end of each epoch, so setting this to a
40 | # smaller number means getting more frequent TensorBoard updates.
41 | # Validation stats are also calculated at each epoch end and they
42 | # might take a while, so don't set this too small to avoid spending
43 | # a lot of time on validation stats.
44 | STEPS_PER_EPOCH = 1000
45 |
46 | # Number of validation steps to run at the end of every training epoch.
47 | # A bigger number improves accuracy of validation stats, but slows
48 | # down the training.
49 | VALIDATION_STEPS = 50
50 |
51 | # The strides of each layer of the FPN Pyramid. These values
52 | # are based on a Resnet101 backbone.
53 | BACKBONE_STRIDES = [4, 8, 16, 32, 64]
54 |
55 | # Number of classification classes (including background)
56 | NUM_CLASSES = 1 # Override in sub-classes
57 |
58 | # Length of square anchor side in pixels
59 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
60 |
61 | # Ratios of anchors at each cell (width/height)
62 | # A value of 1 represents a square anchor, and 0.5 is a wide anchor
63 | RPN_ANCHOR_RATIOS = [0.5, 1, 2]
64 |
65 | # Anchor stride
66 | # If 1 then anchors are created for each cell in the backbone feature map.
67 | # If 2, then anchors are created for every other cell, and so on.
68 | RPN_ANCHOR_STRIDE = 1
69 |
70 | # Non-max suppression threshold to filter RPN proposals.
71 | # You can reduce this during training to generate more propsals.
72 | RPN_NMS_THRESHOLD = 0.7
73 |
74 | # How many anchors per image to use for RPN training
75 | RPN_TRAIN_ANCHORS_PER_IMAGE = 256
76 |
77 | # ROIs kept after non-maximum supression (training and inference)
78 | POST_NMS_ROIS_TRAINING = 2000
79 | POST_NMS_ROIS_INFERENCE = 1000
80 |
81 | # If enabled, resizes instance masks to a smaller size to reduce
82 | # memory load. Recommended when using high-resolution images.
83 | USE_MINI_MASK = True
84 | MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask
85 |
86 | # Input image resing
87 | # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and
88 | # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't
89 | # be satisfied together the IMAGE_MAX_DIM is enforced.
90 | IMAGE_MIN_DIM = 800
91 | IMAGE_MAX_DIM = 1024
92 | # If True, pad images with zeros such that they're (max_dim by max_dim)
93 | IMAGE_PADDING = True # currently, the False option is not supported
94 |
95 | # Image mean (RGB)
96 | MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
97 |
98 | # Number of ROIs per image to feed to classifier/mask heads
99 | # The Mask RCNN paper uses 512 but often the RPN doesn't generate
100 | # enough positive proposals to fill this and keep a positive:negative
101 | # ratio of 1:3. You can increase the number of proposals by adjusting
102 | # the RPN NMS threshold.
103 | TRAIN_ROIS_PER_IMAGE = 200
104 |
105 | # Percent of positive ROIs used to train classifier/mask heads
106 | ROI_POSITIVE_RATIO = 0.33
107 |
108 | # Pooled ROIs
109 | POOL_SIZE = 7
110 | MASK_POOL_SIZE = 14
111 | MASK_SHAPE = [28, 28]
112 |
113 | # Maximum number of ground truth instances to use in one image
114 | MAX_GT_INSTANCES = 100
115 |
116 | # Bounding box refinement standard deviation for RPN and final detections.
117 | RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
118 | BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
119 |
120 | # Max number of final detections
121 | DETECTION_MAX_INSTANCES = 100
122 |
123 | # Minimum probability value to accept a detected instance
124 | # ROIs below this threshold are skipped
125 | DETECTION_MIN_CONFIDENCE = 0.7
126 |
127 | # Non-maximum suppression threshold for detection
128 | DETECTION_NMS_THRESHOLD = 0.3
129 |
130 | # Learning rate and momentum
131 | # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
132 | # weights to explode. Likely due to differences in optimzer
133 | # implementation.
134 | LEARNING_RATE = 0.001
135 | LEARNING_MOMENTUM = 0.9
136 |
137 | # Weight decay regularization
138 | WEIGHT_DECAY = 0.0001
139 |
140 | # Use RPN ROIs or externally generated ROIs for training
141 | # Keep this True for most situations. Set to False if you want to train
142 | # the head branches on ROI generated by code rather than the ROIs from
143 | # the RPN. For example, to debug the classifier head without having to
144 | # train the RPN.
145 | USE_RPN_ROIS = True
146 |
147 | def __init__(self):
148 | """Set values of computed attributes."""
149 | # Effective batch size
150 | self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
151 |
152 | # Input image size
153 | self.IMAGE_SHAPE = np.array(
154 | [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
155 |
156 | # Compute backbone size from input image size
157 | self.BACKBONE_SHAPES = np.array(
158 | [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)),
159 | int(math.ceil(self.IMAGE_SHAPE[1] / stride))]
160 | for stride in self.BACKBONE_STRIDES])
161 |
162 | def display(self):
163 | """Display Configuration values."""
164 | print("\nConfigurations:")
165 | for a in dir(self):
166 | if not a.startswith("__") and not callable(getattr(self, a)):
167 | print("{:30} {}".format(a, getattr(self, a)))
168 | print("\n")
169 |
--------------------------------------------------------------------------------
/nodes/parallel_model.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Multi-GPU Support for Keras.
4 |
5 | Copyright (c) 2017 Matterport, Inc.
6 | Licensed under the MIT License (see LICENSE for details)
7 | Written by Waleed Abdulla
8 |
9 | Ideas and a small code snippets from these sources:
10 | https://github.com/fchollet/keras/issues/2436
11 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012
12 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/
13 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py
14 | """
15 |
16 | import tensorflow as tf
17 | import keras.backend as K
18 | import keras.layers as KL
19 | import keras.models as KM
20 |
21 |
22 | class ParallelModel(KM.Model):
23 | """Subclasses the standard Keras Model and adds multi-GPU support.
24 | It works by creating a copy of the model on each GPU. Then it slices
25 | the inputs and sends a slice to each copy of the model, and then
26 | merges the outputs together and applies the loss on the combined
27 | outputs.
28 | """
29 |
30 | def __init__(self, keras_model, gpu_count):
31 | """Class constructor.
32 | keras_model: The Keras model to parallelize
33 | gpu_count: Number of GPUs. Must be > 1
34 | """
35 | self.inner_model = keras_model
36 | self.gpu_count = gpu_count
37 | merged_outputs = self.make_parallel()
38 | super(ParallelModel, self).__init__(inputs=self.inner_model.inputs,
39 | outputs=merged_outputs)
40 |
41 | def __getattribute__(self, attrname):
42 | """Redirect loading and saving methods to the inner model. That's where
43 | the weights are stored."""
44 | if 'load' in attrname or 'save' in attrname:
45 | return getattr(self.inner_model, attrname)
46 | return super(ParallelModel, self).__getattribute__(attrname)
47 |
48 | def summary(self, *args, **kwargs):
49 | """Override summary() to display summaries of both, the wrapper
50 | and inner models."""
51 | super(ParallelModel, self).summary(*args, **kwargs)
52 | self.inner_model.summary(*args, **kwargs)
53 |
54 | def make_parallel(self):
55 | """Creates a new wrapper model that consists of multiple replicas of
56 | the original model placed on different GPUs.
57 | """
58 | # Slice inputs. Slice inputs on the CPU to avoid sending a copy
59 | # of the full inputs to all GPUs. Saves on bandwidth and memory.
60 | input_slices = {name: tf.split(x, self.gpu_count)
61 | for name, x in zip(self.inner_model.input_names,
62 | self.inner_model.inputs)}
63 |
64 | output_names = self.inner_model.output_names
65 | outputs_all = []
66 | for i in range(len(self.inner_model.outputs)):
67 | outputs_all.append([])
68 |
69 | # Run the model call() on each GPU to place the ops there
70 | for i in range(self.gpu_count):
71 | with tf.device('/gpu:%d' % i):
72 | with tf.name_scope('tower_%d' % i):
73 | # Run a slice of inputs through this replica
74 | zipped_inputs = zip(self.inner_model.input_names,
75 | self.inner_model.inputs)
76 | inputs = [
77 | KL.Lambda(lambda s: input_slices[name][i],
78 | output_shape=lambda s: (None,) + s[1:])(tensor)
79 | for name, tensor in zipped_inputs]
80 | # Create the model replica and get the outputs
81 | outputs = self.inner_model(inputs)
82 | if not isinstance(outputs, list):
83 | outputs = [outputs]
84 | # Save the outputs for merging back together later
85 | for l, o in enumerate(outputs):
86 | outputs_all[l].append(o)
87 |
88 | # Merge outputs on CPU
89 | with tf.device('/cpu:0'):
90 | merged = []
91 | for outputs, name in zip(outputs_all, output_names):
92 | # If outputs are numbers without dimensions, add a batch dim.
93 | def add_dim(tensor):
94 | """Add a dimension to tensors that don't have any."""
95 | if K.int_shape(tensor) == ():
96 | return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor)
97 | return tensor
98 | outputs = list(map(add_dim, outputs))
99 |
100 | # Concatenate
101 | merged.append(KL.Concatenate(axis=0, name=name)(outputs))
102 | return merged
103 |
104 |
105 | if __name__ == "__main__":
106 | # Testing code below. It creates a simple model to train on MNIST and
107 | # tries to run it on 2 GPUs. It saves the graph so it can be viewed
108 | # in TensorBoard. Run it as:
109 | #
110 | # python3 parallel_model.py
111 |
112 | import os
113 | import numpy as np
114 | import keras.optimizers
115 | from keras.datasets import mnist
116 | from keras.preprocessing.image import ImageDataGenerator
117 |
118 | GPU_COUNT = 2
119 |
120 | # Root directory of the project
121 | ROOT_DIR = os.getcwd()
122 |
123 | # Directory to save logs and trained model
124 | MODEL_DIR = os.path.join(ROOT_DIR, "logs/parallel")
125 |
126 | def build_model(x_train, num_classes):
127 | # Reset default graph. Keras leaves old ops in the graph,
128 | # which are ignored for execution but clutter graph
129 | # visualization in TensorBoard.
130 | tf.reset_default_graph()
131 |
132 | inputs = KL.Input(shape=x_train.shape[1:], name="input_image")
133 | x = KL.Conv2D(32, (3, 3), activation='relu', padding="same",
134 | name="conv1")(inputs)
135 | x = KL.Conv2D(64, (3, 3), activation='relu', padding="same",
136 | name="conv2")(x)
137 | x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x)
138 | x = KL.Flatten(name="flat1")(x)
139 | x = KL.Dense(128, activation='relu', name="dense1")(x)
140 | x = KL.Dense(num_classes, activation='softmax', name="dense2")(x)
141 |
142 | return KM.Model(inputs, x, "digit_classifier_model")
143 |
144 | # Load MNIST Data
145 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
146 | x_train = np.expand_dims(x_train, -1).astype('float32') / 255
147 | x_test = np.expand_dims(x_test, -1).astype('float32') / 255
148 |
149 | print('x_train shape:', x_train.shape)
150 | print('x_test shape:', x_test.shape)
151 |
152 | # Build data generator and model
153 | datagen = ImageDataGenerator()
154 | model = build_model(x_train, 10)
155 |
156 | # Add multi-GPU support.
157 | model = ParallelModel(model, GPU_COUNT)
158 |
159 | optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0)
160 |
161 | model.compile(loss='sparse_categorical_crossentropy',
162 | optimizer=optimizer, metrics=['accuracy'])
163 |
164 | model.summary()
165 |
166 | # Train
167 | model.fit_generator(
168 | datagen.flow(x_train, y_train, batch_size=64),
169 | steps_per_epoch=50, epochs=10, verbose=1,
170 | validation_data=(x_test, y_test),
171 | callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
172 | write_graph=True)]
173 | )
174 |
--------------------------------------------------------------------------------
/src/mask_rcnn_ros/parallel_model.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Multi-GPU Support for Keras.
4 |
5 | Copyright (c) 2017 Matterport, Inc.
6 | Licensed under the MIT License (see LICENSE for details)
7 | Written by Waleed Abdulla
8 |
9 | Ideas and a small code snippets from these sources:
10 | https://github.com/fchollet/keras/issues/2436
11 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012
12 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/
13 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py
14 | """
15 |
16 | import tensorflow as tf
17 | import keras.backend as K
18 | import keras.layers as KL
19 | import keras.models as KM
20 |
21 |
22 | class ParallelModel(KM.Model):
23 | """Subclasses the standard Keras Model and adds multi-GPU support.
24 | It works by creating a copy of the model on each GPU. Then it slices
25 | the inputs and sends a slice to each copy of the model, and then
26 | merges the outputs together and applies the loss on the combined
27 | outputs.
28 | """
29 |
30 | def __init__(self, keras_model, gpu_count):
31 | """Class constructor.
32 | keras_model: The Keras model to parallelize
33 | gpu_count: Number of GPUs. Must be > 1
34 | """
35 | self.inner_model = keras_model
36 | self.gpu_count = gpu_count
37 | merged_outputs = self.make_parallel()
38 | super(ParallelModel, self).__init__(inputs=self.inner_model.inputs,
39 | outputs=merged_outputs)
40 |
41 | def __getattribute__(self, attrname):
42 | """Redirect loading and saving methods to the inner model. That's where
43 | the weights are stored."""
44 | if 'load' in attrname or 'save' in attrname:
45 | return getattr(self.inner_model, attrname)
46 | return super(ParallelModel, self).__getattribute__(attrname)
47 |
48 | def summary(self, *args, **kwargs):
49 | """Override summary() to display summaries of both, the wrapper
50 | and inner models."""
51 | super(ParallelModel, self).summary(*args, **kwargs)
52 | self.inner_model.summary(*args, **kwargs)
53 |
54 | def make_parallel(self):
55 | """Creates a new wrapper model that consists of multiple replicas of
56 | the original model placed on different GPUs.
57 | """
58 | # Slice inputs. Slice inputs on the CPU to avoid sending a copy
59 | # of the full inputs to all GPUs. Saves on bandwidth and memory.
60 | input_slices = {name: tf.split(x, self.gpu_count)
61 | for name, x in zip(self.inner_model.input_names,
62 | self.inner_model.inputs)}
63 |
64 | output_names = self.inner_model.output_names
65 | outputs_all = []
66 | for i in range(len(self.inner_model.outputs)):
67 | outputs_all.append([])
68 |
69 | # Run the model call() on each GPU to place the ops there
70 | for i in range(self.gpu_count):
71 | with tf.device('/gpu:%d' % i):
72 | with tf.name_scope('tower_%d' % i):
73 | # Run a slice of inputs through this replica
74 | zipped_inputs = zip(self.inner_model.input_names,
75 | self.inner_model.inputs)
76 | inputs = [
77 | KL.Lambda(lambda s: input_slices[name][i],
78 | output_shape=lambda s: (None,) + s[1:])(tensor)
79 | for name, tensor in zipped_inputs]
80 | # Create the model replica and get the outputs
81 | outputs = self.inner_model(inputs)
82 | if not isinstance(outputs, list):
83 | outputs = [outputs]
84 | # Save the outputs for merging back together later
85 | for l, o in enumerate(outputs):
86 | outputs_all[l].append(o)
87 |
88 | # Merge outputs on CPU
89 | with tf.device('/cpu:0'):
90 | merged = []
91 | for outputs, name in zip(outputs_all, output_names):
92 | # If outputs are numbers without dimensions, add a batch dim.
93 | def add_dim(tensor):
94 | """Add a dimension to tensors that don't have any."""
95 | if K.int_shape(tensor) == ():
96 | return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor)
97 | return tensor
98 | outputs = list(map(add_dim, outputs))
99 |
100 | # Concatenate
101 | merged.append(KL.Concatenate(axis=0, name=name)(outputs))
102 | return merged
103 |
104 |
105 | if __name__ == "__main__":
106 | # Testing code below. It creates a simple model to train on MNIST and
107 | # tries to run it on 2 GPUs. It saves the graph so it can be viewed
108 | # in TensorBoard. Run it as:
109 | #
110 | # python3 parallel_model.py
111 |
112 | import os
113 | import numpy as np
114 | import keras.optimizers
115 | from keras.datasets import mnist
116 | from keras.preprocessing.image import ImageDataGenerator
117 |
118 | GPU_COUNT = 2
119 |
120 | # Root directory of the project
121 | ROOT_DIR = os.getcwd()
122 |
123 | # Directory to save logs and trained model
124 | MODEL_DIR = os.path.join(ROOT_DIR, "logs/parallel")
125 |
126 | def build_model(x_train, num_classes):
127 | # Reset default graph. Keras leaves old ops in the graph,
128 | # which are ignored for execution but clutter graph
129 | # visualization in TensorBoard.
130 | tf.reset_default_graph()
131 |
132 | inputs = KL.Input(shape=x_train.shape[1:], name="input_image")
133 | x = KL.Conv2D(32, (3, 3), activation='relu', padding="same",
134 | name="conv1")(inputs)
135 | x = KL.Conv2D(64, (3, 3), activation='relu', padding="same",
136 | name="conv2")(x)
137 | x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x)
138 | x = KL.Flatten(name="flat1")(x)
139 | x = KL.Dense(128, activation='relu', name="dense1")(x)
140 | x = KL.Dense(num_classes, activation='softmax', name="dense2")(x)
141 |
142 | return KM.Model(inputs, x, "digit_classifier_model")
143 |
144 | # Load MNIST Data
145 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
146 | x_train = np.expand_dims(x_train, -1).astype('float32') / 255
147 | x_test = np.expand_dims(x_test, -1).astype('float32') / 255
148 |
149 | print('x_train shape:', x_train.shape)
150 | print('x_test shape:', x_test.shape)
151 |
152 | # Build data generator and model
153 | datagen = ImageDataGenerator()
154 | model = build_model(x_train, 10)
155 |
156 | # Add multi-GPU support.
157 | model = ParallelModel(model, GPU_COUNT)
158 |
159 | optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0)
160 |
161 | model.compile(loss='sparse_categorical_crossentropy',
162 | optimizer=optimizer, metrics=['accuracy'])
163 |
164 | model.summary()
165 |
166 | # Train
167 | model.fit_generator(
168 | datagen.flow(x_train, y_train, batch_size=64),
169 | steps_per_epoch=50, epochs=10, verbose=1,
170 | validation_data=(x_test, y_test),
171 | callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
172 | write_graph=True)]
173 | )
174 |
--------------------------------------------------------------------------------
/nodes/mask_rcnn_node:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import threading
4 | import numpy as np
5 |
6 | import cv2
7 | from cv_bridge import CvBridge
8 | import rospy
9 | from sensor_msgs.msg import Image
10 | from sensor_msgs.msg import RegionOfInterest
11 |
12 | import coco
13 | import utils
14 | import model as modellib
15 | import visualize
16 | from mask_rcnn_ros.msg import Result
17 |
18 |
19 | # Local path to trained weights file
20 | ROS_HOME = os.environ.get('ROS_HOME', os.path.join(os.environ['HOME'], '.ros'))
21 | COCO_MODEL_PATH = os.path.join(ROS_HOME, 'mask_rcnn_coco.h5')
22 |
23 | # COCO Class names
24 | # Index of the class in the list is its ID. For example, to get ID of
25 | # the teddy bear class, use: CLASS_NAMES.index('teddy bear')
26 | CLASS_NAMES = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
27 | 'bus', 'train', 'truck', 'boat', 'traffic light',
28 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
29 | 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
30 | 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
31 | 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
32 | 'kite', 'baseball bat', 'baseball glove', 'skateboard',
33 | 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
34 | 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
35 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
36 | 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
37 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
38 | 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
39 | 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
40 | 'teddy bear', 'hair drier', 'toothbrush']
41 |
42 |
43 | class InferenceConfig(coco.CocoConfig):
44 | # Set batch size to 1 since we'll be running inference on
45 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
46 | GPU_COUNT = 1
47 | IMAGES_PER_GPU = 1
48 |
49 |
50 | class MaskRCNNNode(object):
51 | def __init__(self):
52 | self._cv_bridge = CvBridge()
53 |
54 | config = InferenceConfig()
55 | config.display()
56 |
57 | self._visualization = rospy.get_param('~visualization', True)
58 |
59 | # Create model object in inference mode.
60 | self._model = modellib.MaskRCNN(mode="inference", model_dir="",
61 | config=config)
62 | # Load weights trained on MS-COCO
63 | model_path = rospy.get_param('~model_path', COCO_MODEL_PATH)
64 | # Download COCO trained weights from Releases if needed
65 | if model_path == COCO_MODEL_PATH and not os.path.exists(COCO_MODEL_PATH):
66 | utils.download_trained_weights(COCO_MODEL_PATH)
67 |
68 | self._model.load_weights(model_path, by_name=True)
69 |
70 | self._class_names = rospy.get_param('~class_names', CLASS_NAMES)
71 |
72 | self._last_msg = None
73 | self._msg_lock = threading.Lock()
74 |
75 | self._class_colors = visualize.random_colors(len(CLASS_NAMES))
76 |
77 | self._publish_rate = rospy.get_param('~publish_rate', 100)
78 |
79 | def run(self):
80 | self._result_pub = rospy.Publisher('~result', Result, queue_size=1)
81 | vis_pub = rospy.Publisher('~visualization', Image, queue_size=1)
82 | rospy.Subscriber('~input', Image,
83 | self._image_callback, queue_size=1)
84 |
85 | rate = rospy.Rate(self._publish_rate)
86 | while not rospy.is_shutdown():
87 | if self._msg_lock.acquire(False):
88 | msg = self._last_msg
89 | self._last_msg = None
90 | self._msg_lock.release()
91 | else:
92 | rate.sleep()
93 | continue
94 |
95 | if msg is not None:
96 | np_image = self._cv_bridge.imgmsg_to_cv2(msg, 'bgr8')
97 |
98 | # Run detection
99 | results = self._model.detect([np_image], verbose=0)
100 | result = results[0]
101 | result_msg = self._build_result_msg(msg, result)
102 | self._result_pub.publish(result_msg)
103 |
104 | # Visualize results
105 | if self._visualization:
106 | cv_result = self._visualize_cv(result, np_image)
107 | image_msg = self._cv_bridge.cv2_to_imgmsg(cv_result, 'bgr8')
108 | vis_pub.publish(image_msg)
109 |
110 | rate.sleep()
111 |
112 | def _build_result_msg(self, msg, result):
113 | result_msg = Result()
114 | result_msg.header = msg.header
115 | for i, (y1, x1, y2, x2) in enumerate(result['rois']):
116 | box = RegionOfInterest()
117 | box.x_offset = np.asscalar(x1)
118 | box.y_offset = np.asscalar(y1)
119 | box.height = np.asscalar(y2 - y1)
120 | box.width = np.asscalar(x2 - x1)
121 | result_msg.boxes.append(box)
122 |
123 | class_id = result['class_ids'][i]
124 | result_msg.class_ids.append(class_id)
125 |
126 | class_name = self._class_names[class_id]
127 | result_msg.class_names.append(class_name)
128 |
129 | score = result['scores'][i]
130 | result_msg.scores.append(score)
131 |
132 | mask = Image()
133 | mask.header = msg.header
134 | mask.height = result['masks'].shape[0]
135 | mask.width = result['masks'].shape[1]
136 | mask.encoding = "mono8"
137 | mask.is_bigendian = False
138 | mask.step = mask.width
139 | mask.data = (result['masks'][:, :, i] * 255).tobytes()
140 | result_msg.masks.append(mask)
141 | return result_msg
142 |
143 | def _visualize(self, result, image):
144 | from matplotlib.backends.backend_agg import FigureCanvasAgg
145 | from matplotlib.figure import Figure
146 |
147 | fig = Figure()
148 | canvas = FigureCanvasAgg(fig)
149 | axes = fig.gca()
150 | visualize.display_instances(image, result['rois'], result['masks'],
151 | result['class_ids'], CLASS_NAMES,
152 | result['scores'], ax=axes,
153 | class_colors=self._class_colors)
154 | fig.tight_layout()
155 | canvas.draw()
156 | result = np.fromstring(canvas.tostring_rgb(), dtype='uint8')
157 |
158 | _, _, w, h = fig.bbox.bounds
159 | result = result.reshape((int(h), int(w), 3))
160 | return result
161 |
162 | def _visualize_cv(self, result, image):
163 |
164 | image = visualize.display_instances_cv(image, result['rois'], result['masks'],
165 | result['class_ids'], CLASS_NAMES,
166 | result['scores'],
167 | class_colors=self._class_colors)
168 |
169 | return image
170 |
171 | def _image_callback(self, msg):
172 | rospy.logdebug("Get an image")
173 | if self._msg_lock.acquire(False):
174 | self._last_msg = msg
175 | self._msg_lock.release()
176 |
177 |
178 | def main():
179 | rospy.init_node('mask_rcnn')
180 |
181 | node = MaskRCNNNode()
182 | node.run()
183 |
184 |
185 | if __name__ == '__main__':
186 | main()
187 |
--------------------------------------------------------------------------------
/nodes/shapes.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Configurations and data loading code for the synthetic Shapes dataset.
4 | This is a duplicate of the code in the noteobook train_shapes.ipynb for easy
5 | import into other notebooks, such as inspect_model.ipynb.
6 |
7 | Copyright (c) 2017 Matterport, Inc.
8 | Licensed under the MIT License (see LICENSE for details)
9 | Written by Waleed Abdulla
10 | """
11 |
12 | import math
13 | import random
14 | import numpy as np
15 | import cv2
16 |
17 | from config import Config
18 | import utils
19 |
20 |
21 | class ShapesConfig(Config):
22 | """Configuration for training on the toy shapes dataset.
23 | Derives from the base Config class and overrides values specific
24 | to the toy shapes dataset.
25 | """
26 | # Give the configuration a recognizable name
27 | NAME = "shapes"
28 |
29 | # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
30 | # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
31 | GPU_COUNT = 1
32 | IMAGES_PER_GPU = 8
33 |
34 | # Number of classes (including background)
35 | NUM_CLASSES = 1 + 3 # background + 3 shapes
36 |
37 | # Use small images for faster training. Set the limits of the small side
38 | # the large side, and that determines the image shape.
39 | IMAGE_MIN_DIM = 128
40 | IMAGE_MAX_DIM = 128
41 |
42 | # Use smaller anchors because our image and objects are small
43 | RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128) # anchor side in pixels
44 |
45 | # Reduce training ROIs per image because the images are small and have
46 | # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
47 | TRAIN_ROIS_PER_IMAGE = 32
48 |
49 | # Use a small epoch since the data is simple
50 | STEPS_PER_EPOCH = 100
51 |
52 | # use small validation steps since the epoch is small
53 | VALIDATION_STEPS = 5
54 |
55 |
56 | class ShapesDataset(utils.Dataset):
57 | """Generates the shapes synthetic dataset. The dataset consists of simple
58 | shapes (triangles, squares, circles) placed randomly on a blank surface.
59 | The images are generated on the fly. No file access required.
60 | """
61 |
62 | def load_shapes(self, count, height, width):
63 | """Generate the requested number of synthetic images.
64 | count: number of images to generate.
65 | height, width: the size of the generated images.
66 | """
67 | # Add classes
68 | self.add_class("shapes", 1, "square")
69 | self.add_class("shapes", 2, "circle")
70 | self.add_class("shapes", 3, "triangle")
71 |
72 | # Add images
73 | # Generate random specifications of images (i.e. color and
74 | # list of shapes sizes and locations). This is more compact than
75 | # actual images. Images are generated on the fly in load_image().
76 | for i in range(count):
77 | bg_color, shapes = self.random_image(height, width)
78 | self.add_image("shapes", image_id=i, path=None,
79 | width=width, height=height,
80 | bg_color=bg_color, shapes=shapes)
81 |
82 | def load_image(self, image_id):
83 | """Generate an image from the specs of the given image ID.
84 | Typically this function loads the image from a file, but
85 | in this case it generates the image on the fly from the
86 | specs in image_info.
87 | """
88 | info = self.image_info[image_id]
89 | bg_color = np.array(info['bg_color']).reshape([1, 1, 3])
90 | image = np.ones([info['height'], info['width'], 3], dtype=np.uint8)
91 | image = image * bg_color.astype(np.uint8)
92 | for shape, color, dims in info['shapes']:
93 | image = self.draw_shape(image, shape, dims, color)
94 | return image
95 |
96 | def image_reference(self, image_id):
97 | """Return the shapes data of the image."""
98 | info = self.image_info[image_id]
99 | if info["source"] == "shapes":
100 | return info["shapes"]
101 | else:
102 | super(self.__class__).image_reference(self, image_id)
103 |
104 | def load_mask(self, image_id):
105 | """Generate instance masks for shapes of the given image ID.
106 | """
107 | info = self.image_info[image_id]
108 | shapes = info['shapes']
109 | count = len(shapes)
110 | mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
111 | for i, (shape, _, dims) in enumerate(info['shapes']):
112 | mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(),
113 | shape, dims, 1)
114 | # Handle occlusions
115 | occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
116 | for i in range(count - 2, -1, -1):
117 | mask[:, :, i] = mask[:, :, i] * occlusion
118 | occlusion = np.logical_and(
119 | occlusion, np.logical_not(mask[:, :, i]))
120 | # Map class names to class IDs.
121 | class_ids = np.array([self.class_names.index(s[0]) for s in shapes])
122 | return mask, class_ids.astype(np.int32)
123 |
124 | def draw_shape(self, image, shape, dims, color):
125 | """Draws a shape from the given specs."""
126 | # Get the center x, y and the size s
127 | x, y, s = dims
128 | if shape == 'square':
129 | image = cv2.rectangle(image, (x - s, y - s),
130 | (x + s, y + s), color, -1)
131 | elif shape == "circle":
132 | image = cv2.circle(image, (x, y), s, color, -1)
133 | elif shape == "triangle":
134 | points = np.array([[(x, y - s),
135 | (x - s / math.sin(math.radians(60)), y + s),
136 | (x + s / math.sin(math.radians(60)), y + s),
137 | ]], dtype=np.int32)
138 | image = cv2.fillPoly(image, points, color)
139 | return image
140 |
141 | def random_shape(self, height, width):
142 | """Generates specifications of a random shape that lies within
143 | the given height and width boundaries.
144 | Returns a tuple of three valus:
145 | * The shape name (square, circle, ...)
146 | * Shape color: a tuple of 3 values, RGB.
147 | * Shape dimensions: A tuple of values that define the shape size
148 | and location. Differs per shape type.
149 | """
150 | # Shape
151 | shape = random.choice(["square", "circle", "triangle"])
152 | # Color
153 | color = tuple([random.randint(0, 255) for _ in range(3)])
154 | # Center x, y
155 | buffer = 20
156 | y = random.randint(buffer, height - buffer - 1)
157 | x = random.randint(buffer, width - buffer - 1)
158 | # Size
159 | s = random.randint(buffer, height // 4)
160 | return shape, color, (x, y, s)
161 |
162 | def random_image(self, height, width):
163 | """Creates random specifications of an image with multiple shapes.
164 | Returns the background color of the image and a list of shape
165 | specifications that can be used to draw the image.
166 | """
167 | # Pick random background color
168 | bg_color = np.array([random.randint(0, 255) for _ in range(3)])
169 | # Generate a few random shapes and record their
170 | # bounding boxes
171 | shapes = []
172 | boxes = []
173 | N = random.randint(1, 4)
174 | for _ in range(N):
175 | shape, color, dims = self.random_shape(height, width)
176 | shapes.append((shape, color, dims))
177 | x, y, s = dims
178 | boxes.append([y - s, x - s, y + s, x + s])
179 | # Apply non-max suppression wit 0.3 threshold to avoid
180 | # shapes covering each other
181 | keep_ixs = utils.non_max_suppression(
182 | np.array(boxes), np.arange(N), 0.3)
183 | shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
184 | return bg_color, shapes
185 |
--------------------------------------------------------------------------------
/src/mask_rcnn_ros/shapes.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Configurations and data loading code for the synthetic Shapes dataset.
4 | This is a duplicate of the code in the noteobook train_shapes.ipynb for easy
5 | import into other notebooks, such as inspect_model.ipynb.
6 |
7 | Copyright (c) 2017 Matterport, Inc.
8 | Licensed under the MIT License (see LICENSE for details)
9 | Written by Waleed Abdulla
10 | """
11 |
12 | import math
13 | import random
14 | import numpy as np
15 | import cv2
16 |
17 | from config import Config
18 | import utils
19 |
20 |
21 | class ShapesConfig(Config):
22 | """Configuration for training on the toy shapes dataset.
23 | Derives from the base Config class and overrides values specific
24 | to the toy shapes dataset.
25 | """
26 | # Give the configuration a recognizable name
27 | NAME = "shapes"
28 |
29 | # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
30 | # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
31 | GPU_COUNT = 1
32 | IMAGES_PER_GPU = 8
33 |
34 | # Number of classes (including background)
35 | NUM_CLASSES = 1 + 3 # background + 3 shapes
36 |
37 | # Use small images for faster training. Set the limits of the small side
38 | # the large side, and that determines the image shape.
39 | IMAGE_MIN_DIM = 128
40 | IMAGE_MAX_DIM = 128
41 |
42 | # Use smaller anchors because our image and objects are small
43 | RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128) # anchor side in pixels
44 |
45 | # Reduce training ROIs per image because the images are small and have
46 | # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
47 | TRAIN_ROIS_PER_IMAGE = 32
48 |
49 | # Use a small epoch since the data is simple
50 | STEPS_PER_EPOCH = 100
51 |
52 | # use small validation steps since the epoch is small
53 | VALIDATION_STEPS = 5
54 |
55 |
56 | class ShapesDataset(utils.Dataset):
57 | """Generates the shapes synthetic dataset. The dataset consists of simple
58 | shapes (triangles, squares, circles) placed randomly on a blank surface.
59 | The images are generated on the fly. No file access required.
60 | """
61 |
62 | def load_shapes(self, count, height, width):
63 | """Generate the requested number of synthetic images.
64 | count: number of images to generate.
65 | height, width: the size of the generated images.
66 | """
67 | # Add classes
68 | self.add_class("shapes", 1, "square")
69 | self.add_class("shapes", 2, "circle")
70 | self.add_class("shapes", 3, "triangle")
71 |
72 | # Add images
73 | # Generate random specifications of images (i.e. color and
74 | # list of shapes sizes and locations). This is more compact than
75 | # actual images. Images are generated on the fly in load_image().
76 | for i in range(count):
77 | bg_color, shapes = self.random_image(height, width)
78 | self.add_image("shapes", image_id=i, path=None,
79 | width=width, height=height,
80 | bg_color=bg_color, shapes=shapes)
81 |
82 | def load_image(self, image_id):
83 | """Generate an image from the specs of the given image ID.
84 | Typically this function loads the image from a file, but
85 | in this case it generates the image on the fly from the
86 | specs in image_info.
87 | """
88 | info = self.image_info[image_id]
89 | bg_color = np.array(info['bg_color']).reshape([1, 1, 3])
90 | image = np.ones([info['height'], info['width'], 3], dtype=np.uint8)
91 | image = image * bg_color.astype(np.uint8)
92 | for shape, color, dims in info['shapes']:
93 | image = self.draw_shape(image, shape, dims, color)
94 | return image
95 |
96 | def image_reference(self, image_id):
97 | """Return the shapes data of the image."""
98 | info = self.image_info[image_id]
99 | if info["source"] == "shapes":
100 | return info["shapes"]
101 | else:
102 | super(self.__class__).image_reference(self, image_id)
103 |
104 | def load_mask(self, image_id):
105 | """Generate instance masks for shapes of the given image ID.
106 | """
107 | info = self.image_info[image_id]
108 | shapes = info['shapes']
109 | count = len(shapes)
110 | mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
111 | for i, (shape, _, dims) in enumerate(info['shapes']):
112 | mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(),
113 | shape, dims, 1)
114 | # Handle occlusions
115 | occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
116 | for i in range(count - 2, -1, -1):
117 | mask[:, :, i] = mask[:, :, i] * occlusion
118 | occlusion = np.logical_and(
119 | occlusion, np.logical_not(mask[:, :, i]))
120 | # Map class names to class IDs.
121 | class_ids = np.array([self.class_names.index(s[0]) for s in shapes])
122 | return mask, class_ids.astype(np.int32)
123 |
124 | def draw_shape(self, image, shape, dims, color):
125 | """Draws a shape from the given specs."""
126 | # Get the center x, y and the size s
127 | x, y, s = dims
128 | if shape == 'square':
129 | image = cv2.rectangle(image, (x - s, y - s),
130 | (x + s, y + s), color, -1)
131 | elif shape == "circle":
132 | image = cv2.circle(image, (x, y), s, color, -1)
133 | elif shape == "triangle":
134 | points = np.array([[(x, y - s),
135 | (x - s / math.sin(math.radians(60)), y + s),
136 | (x + s / math.sin(math.radians(60)), y + s),
137 | ]], dtype=np.int32)
138 | image = cv2.fillPoly(image, points, color)
139 | return image
140 |
141 | def random_shape(self, height, width):
142 | """Generates specifications of a random shape that lies within
143 | the given height and width boundaries.
144 | Returns a tuple of three valus:
145 | * The shape name (square, circle, ...)
146 | * Shape color: a tuple of 3 values, RGB.
147 | * Shape dimensions: A tuple of values that define the shape size
148 | and location. Differs per shape type.
149 | """
150 | # Shape
151 | shape = random.choice(["square", "circle", "triangle"])
152 | # Color
153 | color = tuple([random.randint(0, 255) for _ in range(3)])
154 | # Center x, y
155 | buffer = 20
156 | y = random.randint(buffer, height - buffer - 1)
157 | x = random.randint(buffer, width - buffer - 1)
158 | # Size
159 | s = random.randint(buffer, height // 4)
160 | return shape, color, (x, y, s)
161 |
162 | def random_image(self, height, width):
163 | """Creates random specifications of an image with multiple shapes.
164 | Returns the background color of the image and a list of shape
165 | specifications that can be used to draw the image.
166 | """
167 | # Pick random background color
168 | bg_color = np.array([random.randint(0, 255) for _ in range(3)])
169 | # Generate a few random shapes and record their
170 | # bounding boxes
171 | shapes = []
172 | boxes = []
173 | N = random.randint(1, 4)
174 | for _ in range(N):
175 | shape, color, dims = self.random_shape(height, width)
176 | shapes.append((shape, color, dims))
177 | x, y, s = dims
178 | boxes.append([y - s, x - s, y + s, x + s])
179 | # Apply non-max suppression wit 0.3 threshold to avoid
180 | # shapes covering each other
181 | keep_ixs = utils.non_max_suppression(
182 | np.array(boxes), np.arange(N), 0.3)
183 | shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
184 | return bg_color, shapes
185 |
--------------------------------------------------------------------------------
/nodes/visualize.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Display and Visualization Functions.
4 |
5 | Copyright (c) 2017 Matterport, Inc.
6 | Licensed under the MIT License (see LICENSE for details)
7 | Written by Waleed Abdulla
8 | """
9 |
10 | import random
11 | import itertools
12 | import colorsys
13 | import numpy as np
14 | from skimage.measure import find_contours
15 | import matplotlib.pyplot as plt
16 | import matplotlib.patches as patches
17 | import matplotlib.lines as lines
18 | from matplotlib.patches import Polygon
19 | import cv2
20 | import IPython.display
21 |
22 | import utils
23 | import sys
24 | sys.path.remove('/opt/ros/indigo/lib/python2.7/dist-packages')
25 |
26 | ############################################################
27 | # Visualization
28 | ############################################################
29 |
30 | def display_images(images, titles=None, cols=4, cmap=None, norm=None,
31 | interpolation=None):
32 | """Display the given set of images, optionally with titles.
33 | images: list or array of image tensors in HWC format.
34 | titles: optional. A list of titles to display with each image.
35 | cols: number of images per row
36 | cmap: Optional. Color map to use. For example, "Blues".
37 | norm: Optional. A Normalize instance to map values to colors.
38 | interpolation: Optional. Image interporlation to use for display.
39 | """
40 | titles = titles if titles is not None else [""] * len(images)
41 | rows = len(images) // cols + 1
42 | plt.figure(figsize=(14, 14 * rows // cols))
43 | i = 1
44 | for image, title in zip(images, titles):
45 | plt.subplot(rows, cols, i)
46 | plt.title(title, fontsize=9)
47 | plt.axis('off')
48 | plt.imshow(image.astype(np.uint8), cmap=cmap,
49 | norm=norm, interpolation=interpolation)
50 | i += 1
51 | plt.show()
52 |
53 |
54 | def random_colors(N, bright=True):
55 | """
56 | Generate random colors.
57 | To get visually distinct colors, generate them in HSV space then
58 | convert to RGB.
59 | """
60 | brightness = 1.0 if bright else 0.7
61 | hsv = [(float(i) / N, 1, brightness) for i in range(N)]
62 | colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
63 | random.shuffle(colors)
64 | return colors
65 |
66 |
67 | def apply_mask(image, mask, color, alpha=0.5):
68 | """Apply the given mask to the image.
69 | """
70 | for c in range(3):
71 | image[:, :, c] = np.where(mask == 1,
72 | image[:, :, c] *
73 | (1 - alpha) + alpha * color[c] * 255,
74 | image[:, :, c])
75 | return image
76 |
77 |
78 | def display_instances(image, boxes, masks, class_ids, class_names,
79 | scores=None, title="",
80 | figsize=(16, 16), ax=None, class_colors=None):
81 | """
82 | boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
83 | masks: [height, width, num_instances]
84 | class_ids: [num_instances]
85 | class_names: list of class names of the dataset
86 | scores: (optional) confidence scores for each box
87 | figsize: (optional) the size of the image.
88 | """
89 | # Number of instances
90 | N = boxes.shape[0]
91 | if not N:
92 | print("\n*** No instances to display *** \n")
93 | else:
94 | assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
95 |
96 | if not ax:
97 | _, ax = plt.subplots(1, figsize=figsize)
98 |
99 | # Generate random colors
100 | if class_colors is None:
101 | colors = random_colors(N)
102 |
103 | # Show area outside image boundaries.
104 | height, width = image.shape[:2]
105 | ax.set_ylim(height + 10, -10)
106 | ax.set_xlim(-10, width + 10)
107 | ax.axis('off')
108 | ax.set_title(title)
109 |
110 | masked_image = image.astype(np.uint32).copy()
111 | for i in range(N):
112 | class_id = class_ids[i]
113 | if class_colors is None:
114 | color = colors[i]
115 | else:
116 | color = class_colors[class_id]
117 |
118 | # Bounding box
119 | if not np.any(boxes[i]):
120 | # Skip this instance. Has no bbox. Likely lost in image cropping.
121 | continue
122 | y1, x1, y2, x2 = boxes[i]
123 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
124 | alpha=0.7, linestyle="dashed",
125 | edgecolor=color, facecolor='none')
126 | ax.add_patch(p)
127 |
128 | # Label
129 | score = scores[i] if scores is not None else None
130 | label = class_names[class_id]
131 | x = random.randint(x1, (x1 + x2) // 2)
132 | caption = "{} {:.3f}".format(label, score) if score else label
133 | ax.text(x1, y1 + 8, caption,
134 | color='w', size=11, backgroundcolor="none")
135 |
136 | # Mask
137 | mask = masks[:, :, i]
138 | masked_image = apply_mask(masked_image, mask, color)
139 |
140 | # Mask Polygon
141 | # Pad to ensure proper polygons for masks that touch image edges.
142 | padded_mask = np.zeros(
143 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
144 | padded_mask[1:-1, 1:-1] = mask
145 | contours = find_contours(padded_mask, 0.5)
146 | for verts in contours:
147 | # Subtract the padding and flip (y, x) to (x, y)
148 | verts = np.fliplr(verts) - 1
149 | p = Polygon(verts, facecolor="none", edgecolor=color)
150 | ax.add_patch(p)
151 | ax.imshow(masked_image.astype(np.uint8))
152 | #plt.show()
153 |
154 |
155 | def display_instances_cv(image, boxes, masks, class_ids, class_names,
156 | scores=None, class_colors=None, alpha=0.7):
157 | """
158 | boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
159 | masks: [height, width, num_instances]
160 | class_ids: [num_instances]
161 | class_names: list of class names of the dataset
162 | scores: (optional) confidence scores for each box
163 | class_colors: a list mapping class ids to their colors
164 | alpha: the amount of transparency of the mask overlay
165 | """
166 | # Number of instances
167 | n = boxes.shape[0]
168 | if n:
169 | assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
170 |
171 | # Generate random colors
172 | if class_colors is None:
173 | colors = random_colors(n)
174 |
175 | for i in range(n):
176 | class_id = class_ids[i]
177 | if class_colors is None:
178 | color = colors[i]
179 | else:
180 | color = class_colors[class_id]
181 |
182 | # Transform class colors to BGR and rescale [0-255] for OpenCv
183 | bgr_color = tuple(c*255 for c in color[::-1])
184 |
185 | # Draw bounding boxes
186 | if not np.any(boxes[i]):
187 | # Skip this instance. Has no bbox. Likely lost in image cropping.
188 | continue
189 | y1, x1, y2, x2 = boxes[i]
190 | cv2.rectangle(image, (x1, y1), (x2, y2), color=bgr_color, thickness=2)
191 |
192 | # Draw transparent mask
193 | overlay = image.copy()
194 | mask = masks[:, :, i]
195 | __, thresh = cv2.threshold(mask, 0.5, 1, cv2.THRESH_BINARY)
196 | _, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
197 | cv2.drawContours(image, contours, -1, color=bgr_color, thickness=cv2.FILLED)
198 | cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0, image)
199 |
200 | # Draw text label
201 | score = scores[i] if scores is not None else None
202 | label = class_names[class_id]
203 | caption = "{} {:.3f}".format(label, score) if score else label
204 | cv2.putText(image, caption, (x1, y1 + 12), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5,
205 | color=(255, 255, 255))
206 |
207 | return image
208 |
209 |
210 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10):
211 | """
212 | anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates.
213 | proposals: [n, 4] the same anchors but refined to fit objects better.
214 | """
215 | masked_image = image.copy()
216 |
217 | # Pick random anchors in case there are too many.
218 | ids = np.arange(rois.shape[0], dtype=np.int32)
219 | ids = np.random.choice(
220 | ids, limit, replace=False) if ids.shape[0] > limit else ids
221 |
222 | fig, ax = plt.subplots(1, figsize=(12, 12))
223 | if rois.shape[0] > limit:
224 | plt.title("Showing {} random ROIs out of {}".format(
225 | len(ids), rois.shape[0]))
226 | else:
227 | plt.title("{} ROIs".format(len(ids)))
228 |
229 | # Show area outside image boundaries.
230 | ax.set_ylim(image.shape[0] + 20, -20)
231 | ax.set_xlim(-50, image.shape[1] + 20)
232 | ax.axis('off')
233 |
234 | for i, id in enumerate(ids):
235 | color = np.random.rand(3)
236 | class_id = class_ids[id]
237 | # ROI
238 | y1, x1, y2, x2 = rois[id]
239 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
240 | edgecolor=color if class_id else "gray",
241 | facecolor='none', linestyle="dashed")
242 | ax.add_patch(p)
243 | # Refined ROI
244 | if class_id:
245 | ry1, rx1, ry2, rx2 = refined_rois[id]
246 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
247 | edgecolor=color, facecolor='none')
248 | ax.add_patch(p)
249 | # Connect the top-left corners of the anchor and proposal for easy visualization
250 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
251 |
252 | # Label
253 | label = class_names[class_id]
254 | ax.text(rx1, ry1 + 8, "{}".format(label),
255 | color='w', size=11, backgroundcolor="none")
256 |
257 | # Mask
258 | m = utils.unmold_mask(mask[id], rois[id]
259 | [:4].astype(np.int32), image.shape)
260 | masked_image = apply_mask(masked_image, m, color)
261 |
262 | ax.imshow(masked_image)
263 |
264 | # Print stats
265 | print("Positive ROIs: ", class_ids[class_ids > 0].shape[0])
266 | print("Negative ROIs: ", class_ids[class_ids == 0].shape[0])
267 | print("Positive Ratio: {:.2f}".format(
268 | class_ids[class_ids > 0].shape[0] / class_ids.shape[0]))
269 |
270 |
271 | # TODO: Replace with matplotlib equivalent?
272 | def draw_box(image, box, color):
273 | """Draw 3-pixel width bounding boxes on the given image array.
274 | color: list of 3 int values for RGB.
275 | """
276 | y1, x1, y2, x2 = box
277 | image[y1:y1 + 2, x1:x2] = color
278 | image[y2:y2 + 2, x1:x2] = color
279 | image[y1:y2, x1:x1 + 2] = color
280 | image[y1:y2, x2:x2 + 2] = color
281 | return image
282 |
283 |
284 | def display_top_masks(image, mask, class_ids, class_names, limit=4):
285 | """Display the given image and the top few class masks."""
286 | to_display = []
287 | titles = []
288 | to_display.append(image)
289 | titles.append("H x W={}x{}".format(image.shape[0], image.shape[1]))
290 | # Pick top prominent classes in this image
291 | unique_class_ids = np.unique(class_ids)
292 | mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]])
293 | for i in unique_class_ids]
294 | top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area),
295 | key=lambda r: r[1], reverse=True) if v[1] > 0]
296 | # Generate images and titles
297 | for i in range(limit):
298 | class_id = top_ids[i] if i < len(top_ids) else -1
299 | # Pull masks of instances belonging to the same class.
300 | m = mask[:, :, np.where(class_ids == class_id)[0]]
301 | m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1)
302 | to_display.append(m)
303 | titles.append(class_names[class_id] if class_id != -1 else "-")
304 | display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r")
305 |
306 |
307 | def plot_precision_recall(AP, precisions, recalls):
308 | """Draw the precision-recall curve.
309 |
310 | AP: Average precision at IoU >= 0.5
311 | precisions: list of precision values
312 | recalls: list of recall values
313 | """
314 | # Plot the Precision-Recall curve
315 | _, ax = plt.subplots(1)
316 | ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP))
317 | ax.set_ylim(0, 1.1)
318 | ax.set_xlim(0, 1.1)
319 | _ = ax.plot(recalls, precisions)
320 |
321 |
322 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores,
323 | overlaps, class_names, threshold=0.5):
324 | """Draw a grid showing how ground truth objects are classified.
325 | gt_class_ids: [N] int. Ground truth class IDs
326 | pred_class_id: [N] int. Predicted class IDs
327 | pred_scores: [N] float. The probability scores of predicted classes
328 | overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictins and GT boxes.
329 | class_names: list of all class names in the dataset
330 | threshold: Float. The prediction probability required to predict a class
331 | """
332 | gt_class_ids = gt_class_ids[gt_class_ids != 0]
333 | pred_class_ids = pred_class_ids[pred_class_ids != 0]
334 |
335 | plt.figure(figsize=(12, 10))
336 | plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues)
337 | plt.yticks(np.arange(len(pred_class_ids)),
338 | ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i])
339 | for i, id in enumerate(pred_class_ids)])
340 | plt.xticks(np.arange(len(gt_class_ids)),
341 | [class_names[int(id)] for id in gt_class_ids], rotation=90)
342 |
343 | thresh = overlaps.max() / 2.
344 | for i, j in itertools.product(range(overlaps.shape[0]),
345 | range(overlaps.shape[1])):
346 | text = ""
347 | if overlaps[i, j] > threshold:
348 | text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong"
349 | color = ("white" if overlaps[i, j] > thresh
350 | else "black" if overlaps[i, j] > 0
351 | else "grey")
352 | plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text),
353 | horizontalalignment="center", verticalalignment="center",
354 | fontsize=9, color=color)
355 |
356 | plt.tight_layout()
357 | plt.xlabel("Ground Truth")
358 | plt.ylabel("Predictions")
359 |
360 |
361 | def draw_boxes(image, boxes=None, refined_boxes=None,
362 | masks=None, captions=None, visibilities=None,
363 | title="", ax=None):
364 | """Draw bounding boxes and segmentation masks with differnt
365 | customizations.
366 |
367 | boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates.
368 | refined_boxes: Like boxes, but draw with solid lines to show
369 | that they're the result of refining 'boxes'.
370 | masks: [N, height, width]
371 | captions: List of N titles to display on each box
372 | visibilities: (optional) List of values of 0, 1, or 2. Determine how
373 | prominant each bounding box should be.
374 | title: An optional title to show over the image
375 | ax: (optional) Matplotlib axis to draw on.
376 | """
377 | # Number of boxes
378 | assert boxes is not None or refined_boxes is not None
379 | N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0]
380 |
381 | # Matplotlib Axis
382 | if not ax:
383 | _, ax = plt.subplots(1, figsize=(12, 12))
384 |
385 | # Generate random colors
386 | colors = random_colors(N)
387 |
388 | # Show area outside image boundaries.
389 | margin = image.shape[0] // 10
390 | ax.set_ylim(image.shape[0] + margin, -margin)
391 | ax.set_xlim(-margin, image.shape[1] + margin)
392 | ax.axis('off')
393 |
394 | ax.set_title(title)
395 |
396 | masked_image = image.astype(np.uint32).copy()
397 | for i in range(N):
398 | # Box visibility
399 | visibility = visibilities[i] if visibilities is not None else 1
400 | if visibility == 0:
401 | color = "gray"
402 | style = "dotted"
403 | alpha = 0.5
404 | elif visibility == 1:
405 | color = colors[i]
406 | style = "dotted"
407 | alpha = 1
408 | elif visibility == 2:
409 | color = colors[i]
410 | style = "solid"
411 | alpha = 1
412 |
413 | # Boxes
414 | if boxes is not None:
415 | if not np.any(boxes[i]):
416 | # Skip this instance. Has no bbox. Likely lost in cropping.
417 | continue
418 | y1, x1, y2, x2 = boxes[i]
419 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
420 | alpha=alpha, linestyle=style,
421 | edgecolor=color, facecolor='none')
422 | ax.add_patch(p)
423 |
424 | # Refined boxes
425 | if refined_boxes is not None and visibility > 0:
426 | ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32)
427 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
428 | edgecolor=color, facecolor='none')
429 | ax.add_patch(p)
430 | # Connect the top-left corners of the anchor and proposal
431 | if boxes is not None:
432 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
433 |
434 | # Captions
435 | if captions is not None:
436 | caption = captions[i]
437 | # If there are refined boxes, display captions on them
438 | if refined_boxes is not None:
439 | y1, x1, y2, x2 = ry1, rx1, ry2, rx2
440 | x = random.randint(x1, (x1 + x2) // 2)
441 | ax.text(x1, y1, caption, size=11, verticalalignment='top',
442 | color='w', backgroundcolor="none",
443 | bbox={'facecolor': color, 'alpha': 0.5,
444 | 'pad': 2, 'edgecolor': 'none'})
445 |
446 | # Masks
447 | if masks is not None:
448 | mask = masks[:, :, i]
449 | masked_image = apply_mask(masked_image, mask, color)
450 | # Mask Polygon
451 | # Pad to ensure proper polygons for masks that touch image edges.
452 | padded_mask = np.zeros(
453 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
454 | padded_mask[1:-1, 1:-1] = mask
455 | contours = find_contours(padded_mask, 0.5)
456 | for verts in contours:
457 | # Subtract the padding and flip (y, x) to (x, y)
458 | verts = np.fliplr(verts) - 1
459 | p = Polygon(verts, facecolor="none", edgecolor=color)
460 | ax.add_patch(p)
461 | ax.imshow(masked_image.astype(np.uint8))
462 |
463 |
464 | def display_table(table):
465 | """Display values in a table format.
466 | table: an iterable of rows, and each row is an iterable of values.
467 | """
468 | html = ""
469 | for row in table:
470 | row_html = ""
471 | for col in row:
472 | row_html += "| {:40} | ".format(str(col))
473 | html += "" + row_html + "
"
474 | html = ""
475 | IPython.display.display(IPython.display.HTML(html))
476 |
477 |
478 | def display_weight_stats(model):
479 | """Scans all the weights in the model and returns a list of tuples
480 | that contain stats about each weight.
481 | """
482 | layers = model.get_trainable_layers()
483 | table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]]
484 | for l in layers:
485 | weight_values = l.get_weights() # list of Numpy arrays
486 | weight_tensors = l.weights # list of TF tensors
487 | for i, w in enumerate(weight_values):
488 | weight_name = weight_tensors[i].name
489 | # Detect problematic layers. Exclude biases of conv layers.
490 | alert = ""
491 | if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1):
492 | alert += "*** dead?"
493 | if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000:
494 | alert += "*** Overflow?"
495 | # Add row
496 | table.append([
497 | weight_name + alert,
498 | str(w.shape),
499 | "{:+9.4f}".format(w.min()),
500 | "{:+10.4f}".format(w.max()),
501 | "{:+9.4f}".format(w.std()),
502 | ])
503 | display_table(table)
504 |
--------------------------------------------------------------------------------
/src/mask_rcnn_ros/visualize.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Display and Visualization Functions.
4 |
5 | Copyright (c) 2017 Matterport, Inc.
6 | Licensed under the MIT License (see LICENSE for details)
7 | Written by Waleed Abdulla
8 | """
9 |
10 | import random
11 | import itertools
12 | import colorsys
13 | import numpy as np
14 | from skimage.measure import find_contours
15 | import matplotlib.pyplot as plt
16 | import matplotlib.patches as patches
17 | import matplotlib.lines as lines
18 | from matplotlib.patches import Polygon
19 | import cv2
20 | import IPython.display
21 |
22 | import utils
23 | import sys
24 | sys.path.remove('/opt/ros/indigo/lib/python2.7/dist-packages')
25 |
26 | ############################################################
27 | # Visualization
28 | ############################################################
29 |
30 | def display_images(images, titles=None, cols=4, cmap=None, norm=None,
31 | interpolation=None):
32 | """Display the given set of images, optionally with titles.
33 | images: list or array of image tensors in HWC format.
34 | titles: optional. A list of titles to display with each image.
35 | cols: number of images per row
36 | cmap: Optional. Color map to use. For example, "Blues".
37 | norm: Optional. A Normalize instance to map values to colors.
38 | interpolation: Optional. Image interporlation to use for display.
39 | """
40 | titles = titles if titles is not None else [""] * len(images)
41 | rows = len(images) // cols + 1
42 | plt.figure(figsize=(14, 14 * rows // cols))
43 | i = 1
44 | for image, title in zip(images, titles):
45 | plt.subplot(rows, cols, i)
46 | plt.title(title, fontsize=9)
47 | plt.axis('off')
48 | plt.imshow(image.astype(np.uint8), cmap=cmap,
49 | norm=norm, interpolation=interpolation)
50 | i += 1
51 | plt.show()
52 |
53 |
54 | def random_colors(N, bright=True):
55 | """
56 | Generate random colors.
57 | To get visually distinct colors, generate them in HSV space then
58 | convert to RGB.
59 | """
60 | brightness = 1.0 if bright else 0.7
61 | hsv = [(float(i) / N, 1, brightness) for i in range(N)]
62 | colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
63 | random.shuffle(colors)
64 | return colors
65 |
66 |
67 | def apply_mask(image, mask, color, alpha=0.5):
68 | """Apply the given mask to the image.
69 | """
70 | for c in range(3):
71 | image[:, :, c] = np.where(mask == 1,
72 | image[:, :, c] *
73 | (1 - alpha) + alpha * color[c] * 255,
74 | image[:, :, c])
75 | return image
76 |
77 |
78 | def display_instances(image, boxes, masks, class_ids, class_names,
79 | scores=None, title="",
80 | figsize=(16, 16), ax=None, class_colors=None):
81 | """
82 | boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
83 | masks: [height, width, num_instances]
84 | class_ids: [num_instances]
85 | class_names: list of class names of the dataset
86 | scores: (optional) confidence scores for each box
87 | figsize: (optional) the size of the image.
88 | """
89 | # Number of instances
90 | N = boxes.shape[0]
91 | if not N:
92 | print("\n*** No instances to display *** \n")
93 | else:
94 | assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
95 |
96 | if not ax:
97 | _, ax = plt.subplots(1, figsize=figsize)
98 |
99 | # Generate random colors
100 | if class_colors is None:
101 | colors = random_colors(N)
102 |
103 | # Show area outside image boundaries.
104 | height, width = image.shape[:2]
105 | ax.set_ylim(height + 10, -10)
106 | ax.set_xlim(-10, width + 10)
107 | ax.axis('off')
108 | ax.set_title(title)
109 |
110 | masked_image = image.astype(np.uint32).copy()
111 | for i in range(N):
112 | class_id = class_ids[i]
113 | if class_colors is None:
114 | color = colors[i]
115 | else:
116 | color = class_colors[class_id]
117 |
118 | # Bounding box
119 | if not np.any(boxes[i]):
120 | # Skip this instance. Has no bbox. Likely lost in image cropping.
121 | continue
122 | y1, x1, y2, x2 = boxes[i]
123 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
124 | alpha=0.7, linestyle="dashed",
125 | edgecolor=color, facecolor='none')
126 | ax.add_patch(p)
127 |
128 | # Label
129 | score = scores[i] if scores is not None else None
130 | label = class_names[class_id]
131 | x = random.randint(x1, (x1 + x2) // 2)
132 | caption = "{} {:.3f}".format(label, score) if score else label
133 | ax.text(x1, y1 + 8, caption,
134 | color='w', size=11, backgroundcolor="none")
135 |
136 | # Mask
137 | mask = masks[:, :, i]
138 | masked_image = apply_mask(masked_image, mask, color)
139 |
140 | # Mask Polygon
141 | # Pad to ensure proper polygons for masks that touch image edges.
142 | padded_mask = np.zeros(
143 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
144 | padded_mask[1:-1, 1:-1] = mask
145 | contours = find_contours(padded_mask, 0.5)
146 | for verts in contours:
147 | # Subtract the padding and flip (y, x) to (x, y)
148 | verts = np.fliplr(verts) - 1
149 | p = Polygon(verts, facecolor="none", edgecolor=color)
150 | ax.add_patch(p)
151 | ax.imshow(masked_image.astype(np.uint8))
152 | #plt.show()
153 |
154 |
155 | def display_instances_cv(image, boxes, masks, class_ids, class_names,
156 | scores=None, class_colors=None, alpha=0.7):
157 | """
158 | boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
159 | masks: [height, width, num_instances]
160 | class_ids: [num_instances]
161 | class_names: list of class names of the dataset
162 | scores: (optional) confidence scores for each box
163 | class_colors: a list mapping class ids to their colors
164 | alpha: the amount of transparency of the mask overlay
165 | """
166 | # Number of instances
167 | n = boxes.shape[0]
168 | if n:
169 | assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
170 |
171 | # Generate random colors
172 | if class_colors is None:
173 | colors = random_colors(n)
174 |
175 | for i in range(n):
176 | class_id = class_ids[i]
177 | if class_colors is None:
178 | color = colors[i]
179 | else:
180 | color = class_colors[class_id]
181 |
182 | # Transform class colors to BGR and rescale [0-255] for OpenCv
183 | bgr_color = tuple(c*255 for c in color[::-1])
184 |
185 | # Draw bounding boxes
186 | if not np.any(boxes[i]):
187 | # Skip this instance. Has no bbox. Likely lost in image cropping.
188 | continue
189 | y1, x1, y2, x2 = boxes[i]
190 | cv2.rectangle(image, (x1, y1), (x2, y2), color=bgr_color, thickness=2)
191 |
192 | # Draw transparent mask
193 | overlay = image.copy()
194 | mask = masks[:, :, i]
195 | __, thresh = cv2.threshold(mask, 0.5, 1, cv2.THRESH_BINARY)
196 | _, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
197 | cv2.drawContours(image, contours, -1, color=bgr_color, thickness=cv2.FILLED)
198 | cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0, image)
199 |
200 | # Draw text label
201 | score = scores[i] if scores is not None else None
202 | label = class_names[class_id]
203 | caption = "{} {:.3f}".format(label, score) if score else label
204 | cv2.putText(image, caption, (x1, y1 + 12), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5,
205 | color=(255, 255, 255))
206 |
207 | return image
208 |
209 |
210 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10):
211 | """
212 | anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates.
213 | proposals: [n, 4] the same anchors but refined to fit objects better.
214 | """
215 | masked_image = image.copy()
216 |
217 | # Pick random anchors in case there are too many.
218 | ids = np.arange(rois.shape[0], dtype=np.int32)
219 | ids = np.random.choice(
220 | ids, limit, replace=False) if ids.shape[0] > limit else ids
221 |
222 | fig, ax = plt.subplots(1, figsize=(12, 12))
223 | if rois.shape[0] > limit:
224 | plt.title("Showing {} random ROIs out of {}".format(
225 | len(ids), rois.shape[0]))
226 | else:
227 | plt.title("{} ROIs".format(len(ids)))
228 |
229 | # Show area outside image boundaries.
230 | ax.set_ylim(image.shape[0] + 20, -20)
231 | ax.set_xlim(-50, image.shape[1] + 20)
232 | ax.axis('off')
233 |
234 | for i, id in enumerate(ids):
235 | color = np.random.rand(3)
236 | class_id = class_ids[id]
237 | # ROI
238 | y1, x1, y2, x2 = rois[id]
239 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
240 | edgecolor=color if class_id else "gray",
241 | facecolor='none', linestyle="dashed")
242 | ax.add_patch(p)
243 | # Refined ROI
244 | if class_id:
245 | ry1, rx1, ry2, rx2 = refined_rois[id]
246 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
247 | edgecolor=color, facecolor='none')
248 | ax.add_patch(p)
249 | # Connect the top-left corners of the anchor and proposal for easy visualization
250 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
251 |
252 | # Label
253 | label = class_names[class_id]
254 | ax.text(rx1, ry1 + 8, "{}".format(label),
255 | color='w', size=11, backgroundcolor="none")
256 |
257 | # Mask
258 | m = utils.unmold_mask(mask[id], rois[id]
259 | [:4].astype(np.int32), image.shape)
260 | masked_image = apply_mask(masked_image, m, color)
261 |
262 | ax.imshow(masked_image)
263 |
264 | # Print stats
265 | print("Positive ROIs: ", class_ids[class_ids > 0].shape[0])
266 | print("Negative ROIs: ", class_ids[class_ids == 0].shape[0])
267 | print("Positive Ratio: {:.2f}".format(
268 | class_ids[class_ids > 0].shape[0] / class_ids.shape[0]))
269 |
270 |
271 | # TODO: Replace with matplotlib equivalent?
272 | def draw_box(image, box, color):
273 | """Draw 3-pixel width bounding boxes on the given image array.
274 | color: list of 3 int values for RGB.
275 | """
276 | y1, x1, y2, x2 = box
277 | image[y1:y1 + 2, x1:x2] = color
278 | image[y2:y2 + 2, x1:x2] = color
279 | image[y1:y2, x1:x1 + 2] = color
280 | image[y1:y2, x2:x2 + 2] = color
281 | return image
282 |
283 |
284 | def display_top_masks(image, mask, class_ids, class_names, limit=4):
285 | """Display the given image and the top few class masks."""
286 | to_display = []
287 | titles = []
288 | to_display.append(image)
289 | titles.append("H x W={}x{}".format(image.shape[0], image.shape[1]))
290 | # Pick top prominent classes in this image
291 | unique_class_ids = np.unique(class_ids)
292 | mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]])
293 | for i in unique_class_ids]
294 | top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area),
295 | key=lambda r: r[1], reverse=True) if v[1] > 0]
296 | # Generate images and titles
297 | for i in range(limit):
298 | class_id = top_ids[i] if i < len(top_ids) else -1
299 | # Pull masks of instances belonging to the same class.
300 | m = mask[:, :, np.where(class_ids == class_id)[0]]
301 | m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1)
302 | to_display.append(m)
303 | titles.append(class_names[class_id] if class_id != -1 else "-")
304 | display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r")
305 |
306 |
307 | def plot_precision_recall(AP, precisions, recalls):
308 | """Draw the precision-recall curve.
309 |
310 | AP: Average precision at IoU >= 0.5
311 | precisions: list of precision values
312 | recalls: list of recall values
313 | """
314 | # Plot the Precision-Recall curve
315 | _, ax = plt.subplots(1)
316 | ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP))
317 | ax.set_ylim(0, 1.1)
318 | ax.set_xlim(0, 1.1)
319 | _ = ax.plot(recalls, precisions)
320 |
321 |
322 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores,
323 | overlaps, class_names, threshold=0.5):
324 | """Draw a grid showing how ground truth objects are classified.
325 | gt_class_ids: [N] int. Ground truth class IDs
326 | pred_class_id: [N] int. Predicted class IDs
327 | pred_scores: [N] float. The probability scores of predicted classes
328 | overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictins and GT boxes.
329 | class_names: list of all class names in the dataset
330 | threshold: Float. The prediction probability required to predict a class
331 | """
332 | gt_class_ids = gt_class_ids[gt_class_ids != 0]
333 | pred_class_ids = pred_class_ids[pred_class_ids != 0]
334 |
335 | plt.figure(figsize=(12, 10))
336 | plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues)
337 | plt.yticks(np.arange(len(pred_class_ids)),
338 | ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i])
339 | for i, id in enumerate(pred_class_ids)])
340 | plt.xticks(np.arange(len(gt_class_ids)),
341 | [class_names[int(id)] for id in gt_class_ids], rotation=90)
342 |
343 | thresh = overlaps.max() / 2.
344 | for i, j in itertools.product(range(overlaps.shape[0]),
345 | range(overlaps.shape[1])):
346 | text = ""
347 | if overlaps[i, j] > threshold:
348 | text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong"
349 | color = ("white" if overlaps[i, j] > thresh
350 | else "black" if overlaps[i, j] > 0
351 | else "grey")
352 | plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text),
353 | horizontalalignment="center", verticalalignment="center",
354 | fontsize=9, color=color)
355 |
356 | plt.tight_layout()
357 | plt.xlabel("Ground Truth")
358 | plt.ylabel("Predictions")
359 |
360 |
361 | def draw_boxes(image, boxes=None, refined_boxes=None,
362 | masks=None, captions=None, visibilities=None,
363 | title="", ax=None):
364 | """Draw bounding boxes and segmentation masks with differnt
365 | customizations.
366 |
367 | boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates.
368 | refined_boxes: Like boxes, but draw with solid lines to show
369 | that they're the result of refining 'boxes'.
370 | masks: [N, height, width]
371 | captions: List of N titles to display on each box
372 | visibilities: (optional) List of values of 0, 1, or 2. Determine how
373 | prominant each bounding box should be.
374 | title: An optional title to show over the image
375 | ax: (optional) Matplotlib axis to draw on.
376 | """
377 | # Number of boxes
378 | assert boxes is not None or refined_boxes is not None
379 | N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0]
380 |
381 | # Matplotlib Axis
382 | if not ax:
383 | _, ax = plt.subplots(1, figsize=(12, 12))
384 |
385 | # Generate random colors
386 | colors = random_colors(N)
387 |
388 | # Show area outside image boundaries.
389 | margin = image.shape[0] // 10
390 | ax.set_ylim(image.shape[0] + margin, -margin)
391 | ax.set_xlim(-margin, image.shape[1] + margin)
392 | ax.axis('off')
393 |
394 | ax.set_title(title)
395 |
396 | masked_image = image.astype(np.uint32).copy()
397 | for i in range(N):
398 | # Box visibility
399 | visibility = visibilities[i] if visibilities is not None else 1
400 | if visibility == 0:
401 | color = "gray"
402 | style = "dotted"
403 | alpha = 0.5
404 | elif visibility == 1:
405 | color = colors[i]
406 | style = "dotted"
407 | alpha = 1
408 | elif visibility == 2:
409 | color = colors[i]
410 | style = "solid"
411 | alpha = 1
412 |
413 | # Boxes
414 | if boxes is not None:
415 | if not np.any(boxes[i]):
416 | # Skip this instance. Has no bbox. Likely lost in cropping.
417 | continue
418 | y1, x1, y2, x2 = boxes[i]
419 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
420 | alpha=alpha, linestyle=style,
421 | edgecolor=color, facecolor='none')
422 | ax.add_patch(p)
423 |
424 | # Refined boxes
425 | if refined_boxes is not None and visibility > 0:
426 | ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32)
427 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
428 | edgecolor=color, facecolor='none')
429 | ax.add_patch(p)
430 | # Connect the top-left corners of the anchor and proposal
431 | if boxes is not None:
432 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
433 |
434 | # Captions
435 | if captions is not None:
436 | caption = captions[i]
437 | # If there are refined boxes, display captions on them
438 | if refined_boxes is not None:
439 | y1, x1, y2, x2 = ry1, rx1, ry2, rx2
440 | x = random.randint(x1, (x1 + x2) // 2)
441 | ax.text(x1, y1, caption, size=11, verticalalignment='top',
442 | color='w', backgroundcolor="none",
443 | bbox={'facecolor': color, 'alpha': 0.5,
444 | 'pad': 2, 'edgecolor': 'none'})
445 |
446 | # Masks
447 | if masks is not None:
448 | mask = masks[:, :, i]
449 | masked_image = apply_mask(masked_image, mask, color)
450 | # Mask Polygon
451 | # Pad to ensure proper polygons for masks that touch image edges.
452 | padded_mask = np.zeros(
453 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
454 | padded_mask[1:-1, 1:-1] = mask
455 | contours = find_contours(padded_mask, 0.5)
456 | for verts in contours:
457 | # Subtract the padding and flip (y, x) to (x, y)
458 | verts = np.fliplr(verts) - 1
459 | p = Polygon(verts, facecolor="none", edgecolor=color)
460 | ax.add_patch(p)
461 | ax.imshow(masked_image.astype(np.uint8))
462 |
463 |
464 | def display_table(table):
465 | """Display values in a table format.
466 | table: an iterable of rows, and each row is an iterable of values.
467 | """
468 | html = ""
469 | for row in table:
470 | row_html = ""
471 | for col in row:
472 | row_html += "{:40} | ".format(str(col))
473 | html += "" + row_html + "
"
474 | html = ""
475 | IPython.display.display(IPython.display.HTML(html))
476 |
477 |
478 | def display_weight_stats(model):
479 | """Scans all the weights in the model and returns a list of tuples
480 | that contain stats about each weight.
481 | """
482 | layers = model.get_trainable_layers()
483 | table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]]
484 | for l in layers:
485 | weight_values = l.get_weights() # list of Numpy arrays
486 | weight_tensors = l.weights # list of TF tensors
487 | for i, w in enumerate(weight_values):
488 | weight_name = weight_tensors[i].name
489 | # Detect problematic layers. Exclude biases of conv layers.
490 | alert = ""
491 | if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1):
492 | alert += "*** dead?"
493 | if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000:
494 | alert += "*** Overflow?"
495 | # Add row
496 | table.append([
497 | weight_name + alert,
498 | str(w.shape),
499 | "{:+9.4f}".format(w.min()),
500 | "{:+10.4f}".format(w.max()),
501 | "{:+9.4f}".format(w.std()),
502 | ])
503 | display_table(table)
504 |
--------------------------------------------------------------------------------
/nodes/coco.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Configurations and data loading code for MS COCO.
4 |
5 | Copyright (c) 2017 Matterport, Inc.
6 | Licensed under the MIT License (see LICENSE for details)
7 | Written by Waleed Abdulla
8 |
9 | ------------------------------------------------------------
10 |
11 | Usage: import the module (see Jupyter notebooks for examples), or run from
12 | the command line as such:
13 |
14 | # Train a new model starting from pre-trained COCO weights
15 | python3 coco.py train --dataset=/path/to/coco/ --model=coco
16 |
17 | # Train a new model starting from ImageNet weights
18 | python3 coco.py train --dataset=/path/to/coco/ --model=imagenet
19 |
20 | # Continue training a model that you had trained earlier
21 | python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
22 |
23 | # Continue training the last model you trained
24 | python3 coco.py train --dataset=/path/to/coco/ --model=last
25 |
26 | # Run COCO evaluatoin on the last model you trained
27 | python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
28 | """
29 |
30 | import os
31 | import time
32 | import numpy as np
33 |
34 | # Download and install the Python COCO tools from https://github.com/waleedka/coco
35 | # That's a fork from the original https://github.com/pdollar/coco with a bug
36 | # fix for Python 3.
37 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
38 | # If the PR is merged then use the original repo.
39 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3".
40 | # from pycocotools.coco import COCO
41 | # from pycocotools.cocoeval import COCOeval
42 | # from pycocotools import mask as maskUtils
43 |
44 | import zipfile
45 | from six.moves.urllib import request
46 | import shutil
47 |
48 | from config import Config
49 | import utils
50 | import model as modellib
51 |
52 | # Root directory of the project
53 | ROOT_DIR = os.path.dirname(__file__)
54 |
55 | # Path to trained weights file
56 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
57 |
58 | # Directory to save logs and model checkpoints, if not provided
59 | # through the command line argument --logs
60 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
61 | DEFAULT_DATASET_YEAR = "2014"
62 |
63 | ############################################################
64 | # Configurations
65 | ############################################################
66 |
67 |
68 | class CocoConfig(Config):
69 | """Configuration for training on MS COCO.
70 | Derives from the base Config class and overrides values specific
71 | to the COCO dataset.
72 | """
73 | # Give the configuration a recognizable name
74 | NAME = "coco"
75 |
76 | # We use a GPU with 12GB memory, which can fit two images.
77 | # Adjust down if you use a smaller GPU.
78 | IMAGES_PER_GPU = 2
79 |
80 | # Uncomment to train on 8 GPUs (default is 1)
81 | # GPU_COUNT = 8
82 |
83 | # Number of classes (including background)
84 | NUM_CLASSES = 1 + 80 # COCO has 80 classes
85 |
86 |
87 | ############################################################
88 | # Dataset
89 | ############################################################
90 |
91 | class CocoDataset(utils.Dataset):
92 | def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
93 | class_map=None, return_coco=False, auto_download=False):
94 | """Load a subset of the COCO dataset.
95 | dataset_dir: The root directory of the COCO dataset.
96 | subset: What to load (train, val, minival, valminusminival)
97 | year: What dataset year to load (2014, 2017) as a string, not an integer
98 | class_ids: If provided, only loads images that have the given classes.
99 | class_map: TODO: Not implemented yet. Supports maping classes from
100 | different datasets to the same class ID.
101 | return_coco: If True, returns the COCO object.
102 | auto_download: Automatically download and unzip MS-COCO images and annotations
103 | """
104 |
105 | if auto_download is True:
106 | self.auto_download(dataset_dir, subset, year)
107 |
108 | coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
109 | if subset == "minival" or subset == "valminusminival":
110 | subset = "val"
111 | image_dir = "{}/{}{}".format(dataset_dir, subset, year)
112 |
113 | # Load all classes or a subset?
114 | if not class_ids:
115 | # All classes
116 | class_ids = sorted(coco.getCatIds())
117 |
118 | # All images or a subset?
119 | if class_ids:
120 | image_ids = []
121 | for id in class_ids:
122 | image_ids.extend(list(coco.getImgIds(catIds=[id])))
123 | # Remove duplicates
124 | image_ids = list(set(image_ids))
125 | else:
126 | # All images
127 | image_ids = list(coco.imgs.keys())
128 |
129 | # Add classes
130 | for i in class_ids:
131 | self.add_class("coco", i, coco.loadCats(i)[0]["name"])
132 |
133 | # Add images
134 | for i in image_ids:
135 | self.add_image(
136 | "coco", image_id=i,
137 | path=os.path.join(image_dir, coco.imgs[i]['file_name']),
138 | width=coco.imgs[i]["width"],
139 | height=coco.imgs[i]["height"],
140 | annotations=coco.loadAnns(coco.getAnnIds(
141 | imgIds=[i], catIds=class_ids, iscrowd=None)))
142 | if return_coco:
143 | return coco
144 |
145 | def auto_download(self, dataDir, dataType, dataYear):
146 | """Download the COCO dataset/annotations if requested.
147 | dataDir: The root directory of the COCO dataset.
148 | dataType: What to load (train, val, minival, valminusminival)
149 | dataYear: What dataset year to load (2014, 2017) as a string, not an integer
150 | Note:
151 | For 2014, use "train", "val", "minival", or "valminusminival"
152 | For 2017, only "train" and "val" annotations are available
153 | """
154 |
155 | # Setup paths and file names
156 | if dataType == "minival" or dataType == "valminusminival":
157 | imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
158 | imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
159 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
160 | else:
161 | imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
162 | imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
163 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
164 | # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
165 |
166 | # Create main folder if it doesn't exist yet
167 | if not os.path.exists(dataDir):
168 | os.makedirs(dataDir)
169 |
170 | # Download images if not available locally
171 | if not os.path.exists(imgDir):
172 | os.makedirs(imgDir)
173 | print("Downloading images to " + imgZipFile + " ...")
174 | with request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
175 | shutil.copyfileobj(resp, out)
176 | print("... done downloading.")
177 | print("Unzipping " + imgZipFile)
178 | with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
179 | zip_ref.extractall(dataDir)
180 | print("... done unzipping")
181 | print("Will use images in " + imgDir)
182 |
183 | # Setup annotations data paths
184 | annDir = "{}/annotations".format(dataDir)
185 | if dataType == "minival":
186 | annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
187 | annFile = "{}/instances_minival2014.json".format(annDir)
188 | annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
189 | unZipDir = annDir
190 | elif dataType == "valminusminival":
191 | annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
192 | annFile = "{}/instances_valminusminival2014.json".format(annDir)
193 | annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
194 | unZipDir = annDir
195 | else:
196 | annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
197 | annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
198 | annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
199 | unZipDir = dataDir
200 | # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
201 |
202 | # Download annotations if not available locally
203 | if not os.path.exists(annDir):
204 | os.makedirs(annDir)
205 | if not os.path.exists(annFile):
206 | if not os.path.exists(annZipFile):
207 | print("Downloading zipped annotations to " + annZipFile + " ...")
208 | with request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
209 | shutil.copyfileobj(resp, out)
210 | print("... done downloading.")
211 | print("Unzipping " + annZipFile)
212 | with zipfile.ZipFile(annZipFile, "r") as zip_ref:
213 | zip_ref.extractall(unZipDir)
214 | print("... done unzipping")
215 | print("Will use annotations in " + annFile)
216 |
217 | def load_mask(self, image_id):
218 | """Load instance masks for the given image.
219 |
220 | Different datasets use different ways to store masks. This
221 | function converts the different mask format to one format
222 | in the form of a bitmap [height, width, instances].
223 |
224 | Returns:
225 | masks: A bool array of shape [height, width, instance count] with
226 | one mask per instance.
227 | class_ids: a 1D array of class IDs of the instance masks.
228 | """
229 | # If not a COCO image, delegate to parent class.
230 | image_info = self.image_info[image_id]
231 | if image_info["source"] != "coco":
232 | return super(CocoDataset, self).load_mask(image_id)
233 |
234 | instance_masks = []
235 | class_ids = []
236 | annotations = self.image_info[image_id]["annotations"]
237 | # Build mask of shape [height, width, instance_count] and list
238 | # of class IDs that correspond to each channel of the mask.
239 | for annotation in annotations:
240 | class_id = self.map_source_class_id(
241 | "coco.{}".format(annotation['category_id']))
242 | if class_id:
243 | m = self.annToMask(annotation, image_info["height"],
244 | image_info["width"])
245 | # Some objects are so small that they're less than 1 pixel area
246 | # and end up rounded out. Skip those objects.
247 | if m.max() < 1:
248 | continue
249 | # Is it a crowd? If so, use a negative class ID.
250 | if annotation['iscrowd']:
251 | # Use negative class ID for crowds
252 | class_id *= -1
253 | # For crowd masks, annToMask() sometimes returns a mask
254 | # smaller than the given dimensions. If so, resize it.
255 | if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
256 | m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
257 | instance_masks.append(m)
258 | class_ids.append(class_id)
259 |
260 | # Pack instance masks into an array
261 | if class_ids:
262 | mask = np.stack(instance_masks, axis=2)
263 | class_ids = np.array(class_ids, dtype=np.int32)
264 | return mask, class_ids
265 | else:
266 | # Call super class to return an empty mask
267 | return super(CocoDataset, self).load_mask(image_id)
268 |
269 | def image_reference(self, image_id):
270 | """Return a link to the image in the COCO Website."""
271 | info = self.image_info[image_id]
272 | if info["source"] == "coco":
273 | return "http://cocodataset.org/#explore?id={}".format(info["id"])
274 | else:
275 | super(CocoDataset, self).image_reference(self, image_id)
276 |
277 | # The following two functions are from pycocotools with a few changes.
278 |
279 | def annToRLE(self, ann, height, width):
280 | """
281 | Convert annotation which can be polygons, uncompressed RLE to RLE.
282 | :return: binary mask (numpy 2D array)
283 | """
284 | segm = ann['segmentation']
285 | if isinstance(segm, list):
286 | # polygon -- a single object might consist of multiple parts
287 | # we merge all parts into one mask rle code
288 | rles = maskUtils.frPyObjects(segm, height, width)
289 | rle = maskUtils.merge(rles)
290 | elif isinstance(segm['counts'], list):
291 | # uncompressed RLE
292 | rle = maskUtils.frPyObjects(segm, height, width)
293 | else:
294 | # rle
295 | rle = ann['segmentation']
296 | return rle
297 |
298 | def annToMask(self, ann, height, width):
299 | """
300 | Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
301 | :return: binary mask (numpy 2D array)
302 | """
303 | rle = self.annToRLE(ann, height, width)
304 | m = maskUtils.decode(rle)
305 | return m
306 |
307 |
308 | ############################################################
309 | # COCO Evaluation
310 | ############################################################
311 |
312 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
313 | """Arrange resutls to match COCO specs in http://cocodataset.org/#format
314 | """
315 | # If no results, return an empty list
316 | if rois is None:
317 | return []
318 |
319 | results = []
320 | for image_id in image_ids:
321 | # Loop through detections
322 | for i in range(rois.shape[0]):
323 | class_id = class_ids[i]
324 | score = scores[i]
325 | bbox = np.around(rois[i], 1)
326 | mask = masks[:, :, i]
327 |
328 | result = {
329 | "image_id": image_id,
330 | "category_id": dataset.get_source_class_id(class_id, "coco"),
331 | "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
332 | "score": score,
333 | "segmentation": maskUtils.encode(np.asfortranarray(mask))
334 | }
335 | results.append(result)
336 | return results
337 |
338 |
339 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
340 | """Runs official COCO evaluation.
341 | dataset: A Dataset object with valiadtion data
342 | eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
343 | limit: if not 0, it's the number of images to use for evaluation
344 | """
345 | # Pick COCO images from the dataset
346 | image_ids = image_ids or dataset.image_ids
347 |
348 | # Limit to a subset
349 | if limit:
350 | image_ids = image_ids[:limit]
351 |
352 | # Get corresponding COCO image IDs.
353 | coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
354 |
355 | t_prediction = 0
356 | t_start = time.time()
357 |
358 | results = []
359 | for i, image_id in enumerate(image_ids):
360 | # Load image
361 | image = dataset.load_image(image_id)
362 |
363 | # Run detection
364 | t = time.time()
365 | r = model.detect([image], verbose=0)[0]
366 | t_prediction += (time.time() - t)
367 |
368 | # Convert results to COCO format
369 | image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
370 | r["rois"], r["class_ids"],
371 | r["scores"], r["masks"])
372 | results.extend(image_results)
373 |
374 | # Load results. This modifies results with additional attributes.
375 | coco_results = coco.loadRes(results)
376 |
377 | # Evaluate
378 | cocoEval = COCOeval(coco, coco_results, eval_type)
379 | cocoEval.params.imgIds = coco_image_ids
380 | cocoEval.evaluate()
381 | cocoEval.accumulate()
382 | cocoEval.summarize()
383 |
384 | print("Prediction time: {}. Average {}/image".format(
385 | t_prediction, t_prediction / len(image_ids)))
386 | print("Total time: ", time.time() - t_start)
387 |
388 |
389 | ############################################################
390 | # Training
391 | ############################################################
392 |
393 |
394 | if __name__ == '__main__':
395 | import argparse
396 |
397 | # Parse command line arguments
398 | parser = argparse.ArgumentParser(
399 | description='Train Mask R-CNN on MS COCO.')
400 | parser.add_argument("command",
401 | metavar="",
402 | help="'train' or 'evaluate' on MS COCO")
403 | parser.add_argument('--dataset', required=True,
404 | metavar="/path/to/coco/",
405 | help='Directory of the MS-COCO dataset')
406 | parser.add_argument('--year', required=False,
407 | default=DEFAULT_DATASET_YEAR,
408 | metavar="",
409 | help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
410 | parser.add_argument('--model', required=True,
411 | metavar="/path/to/weights.h5",
412 | help="Path to weights .h5 file or 'coco'")
413 | parser.add_argument('--logs', required=False,
414 | default=DEFAULT_LOGS_DIR,
415 | metavar="/path/to/logs/",
416 | help='Logs and checkpoints directory (default=logs/)')
417 | parser.add_argument('--limit', required=False,
418 | default=500,
419 | metavar="",
420 | help='Images to use for evaluation (default=500)')
421 | parser.add_argument('--download', required=False,
422 | default=False,
423 | metavar="",
424 | help='Automatically download and unzip MS-COCO files (default=False)',
425 | type=bool)
426 | args = parser.parse_args()
427 | print("Command: ", args.command)
428 | print("Model: ", args.model)
429 | print("Dataset: ", args.dataset)
430 | print("Year: ", args.year)
431 | print("Logs: ", args.logs)
432 | print("Auto Download: ", args.download)
433 |
434 | # Configurations
435 | if args.command == "train":
436 | config = CocoConfig()
437 | else:
438 | class InferenceConfig(CocoConfig):
439 | # Set batch size to 1 since we'll be running inference on
440 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
441 | GPU_COUNT = 1
442 | IMAGES_PER_GPU = 1
443 | DETECTION_MIN_CONFIDENCE = 0
444 | config = InferenceConfig()
445 | config.display()
446 |
447 | # Create model
448 | if args.command == "train":
449 | model = modellib.MaskRCNN(mode="training", config=config,
450 | model_dir=args.logs)
451 | else:
452 | model = modellib.MaskRCNN(mode="inference", config=config,
453 | model_dir=args.logs)
454 |
455 | # Select weights file to load
456 | if args.model.lower() == "coco":
457 | model_path = COCO_MODEL_PATH
458 | elif args.model.lower() == "last":
459 | # Find last trained weights
460 | model_path = model.find_last()[1]
461 | elif args.model.lower() == "imagenet":
462 | # Start from ImageNet trained weights
463 | model_path = model.get_imagenet_weights()
464 | else:
465 | model_path = args.model
466 |
467 | # Load weights
468 | print("Loading weights ", model_path)
469 | model.load_weights(model_path, by_name=True)
470 |
471 | # Train or evaluate
472 | if args.command == "train":
473 | # Training dataset. Use the training set and 35K from the
474 | # validation set, as as in the Mask RCNN paper.
475 | dataset_train = CocoDataset()
476 | dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
477 | dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
478 | dataset_train.prepare()
479 |
480 | # Validation dataset
481 | dataset_val = CocoDataset()
482 | dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download)
483 | dataset_val.prepare()
484 |
485 | # *** This training schedule is an example. Update to your needs ***
486 |
487 | # Training - Stage 1
488 | print("Training network heads")
489 | model.train(dataset_train, dataset_val,
490 | learning_rate=config.LEARNING_RATE,
491 | epochs=40,
492 | layers='heads')
493 |
494 | # Training - Stage 2
495 | # Finetune layers from ResNet stage 4 and up
496 | print("Fine tune Resnet stage 4 and up")
497 | model.train(dataset_train, dataset_val,
498 | learning_rate=config.LEARNING_RATE,
499 | epochs=120,
500 | layers='4+')
501 |
502 | # Training - Stage 3
503 | # Fine tune all layers
504 | print("Fine tune all layers")
505 | model.train(dataset_train, dataset_val,
506 | learning_rate=config.LEARNING_RATE / 10,
507 | epochs=160,
508 | layers='all')
509 |
510 | elif args.command == "evaluate":
511 | # Validation dataset
512 | dataset_val = CocoDataset()
513 | coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download)
514 | dataset_val.prepare()
515 | print("Running COCO evaluation on {} images.".format(args.limit))
516 | evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
517 | else:
518 | print("'{}' is not recognized. "
519 | "Use 'train' or 'evaluate'".format(args.command))
520 |
--------------------------------------------------------------------------------
/src/mask_rcnn_ros/coco.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Configurations and data loading code for MS COCO.
4 |
5 | Copyright (c) 2017 Matterport, Inc.
6 | Licensed under the MIT License (see LICENSE for details)
7 | Written by Waleed Abdulla
8 |
9 | ------------------------------------------------------------
10 |
11 | Usage: import the module (see Jupyter notebooks for examples), or run from
12 | the command line as such:
13 |
14 | # Train a new model starting from pre-trained COCO weights
15 | python3 coco.py train --dataset=/path/to/coco/ --model=coco
16 |
17 | # Train a new model starting from ImageNet weights
18 | python3 coco.py train --dataset=/path/to/coco/ --model=imagenet
19 |
20 | # Continue training a model that you had trained earlier
21 | python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
22 |
23 | # Continue training the last model you trained
24 | python3 coco.py train --dataset=/path/to/coco/ --model=last
25 |
26 | # Run COCO evaluatoin on the last model you trained
27 | python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
28 | """
29 |
30 | import os
31 | import time
32 | import numpy as np
33 |
34 | # Download and install the Python COCO tools from https://github.com/waleedka/coco
35 | # That's a fork from the original https://github.com/pdollar/coco with a bug
36 | # fix for Python 3.
37 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
38 | # If the PR is merged then use the original repo.
39 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3".
40 | # from pycocotools.coco import COCO
41 | # from pycocotools.cocoeval import COCOeval
42 | # from pycocotools import mask as maskUtils
43 |
44 | import zipfile
45 | from six.moves.urllib import request
46 | import shutil
47 |
48 | from config import Config
49 | import utils
50 | import model as modellib
51 |
52 | # Root directory of the project
53 | ROOT_DIR = os.path.dirname(__file__)
54 |
55 | # Path to trained weights file
56 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
57 |
58 | # Directory to save logs and model checkpoints, if not provided
59 | # through the command line argument --logs
60 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
61 | DEFAULT_DATASET_YEAR = "2014"
62 |
63 | ############################################################
64 | # Configurations
65 | ############################################################
66 |
67 |
68 | class CocoConfig(Config):
69 | """Configuration for training on MS COCO.
70 | Derives from the base Config class and overrides values specific
71 | to the COCO dataset.
72 | """
73 | # Give the configuration a recognizable name
74 | NAME = "coco"
75 |
76 | # We use a GPU with 12GB memory, which can fit two images.
77 | # Adjust down if you use a smaller GPU.
78 | IMAGES_PER_GPU = 2
79 |
80 | # Uncomment to train on 8 GPUs (default is 1)
81 | # GPU_COUNT = 8
82 |
83 | # Number of classes (including background)
84 | NUM_CLASSES = 1 + 80 # COCO has 80 classes
85 |
86 |
87 | ############################################################
88 | # Dataset
89 | ############################################################
90 |
91 | class CocoDataset(utils.Dataset):
92 | def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
93 | class_map=None, return_coco=False, auto_download=False):
94 | """Load a subset of the COCO dataset.
95 | dataset_dir: The root directory of the COCO dataset.
96 | subset: What to load (train, val, minival, valminusminival)
97 | year: What dataset year to load (2014, 2017) as a string, not an integer
98 | class_ids: If provided, only loads images that have the given classes.
99 | class_map: TODO: Not implemented yet. Supports maping classes from
100 | different datasets to the same class ID.
101 | return_coco: If True, returns the COCO object.
102 | auto_download: Automatically download and unzip MS-COCO images and annotations
103 | """
104 |
105 | if auto_download is True:
106 | self.auto_download(dataset_dir, subset, year)
107 |
108 | coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
109 | if subset == "minival" or subset == "valminusminival":
110 | subset = "val"
111 | image_dir = "{}/{}{}".format(dataset_dir, subset, year)
112 |
113 | # Load all classes or a subset?
114 | if not class_ids:
115 | # All classes
116 | class_ids = sorted(coco.getCatIds())
117 |
118 | # All images or a subset?
119 | if class_ids:
120 | image_ids = []
121 | for id in class_ids:
122 | image_ids.extend(list(coco.getImgIds(catIds=[id])))
123 | # Remove duplicates
124 | image_ids = list(set(image_ids))
125 | else:
126 | # All images
127 | image_ids = list(coco.imgs.keys())
128 |
129 | # Add classes
130 | for i in class_ids:
131 | self.add_class("coco", i, coco.loadCats(i)[0]["name"])
132 |
133 | # Add images
134 | for i in image_ids:
135 | self.add_image(
136 | "coco", image_id=i,
137 | path=os.path.join(image_dir, coco.imgs[i]['file_name']),
138 | width=coco.imgs[i]["width"],
139 | height=coco.imgs[i]["height"],
140 | annotations=coco.loadAnns(coco.getAnnIds(
141 | imgIds=[i], catIds=class_ids, iscrowd=None)))
142 | if return_coco:
143 | return coco
144 |
145 | def auto_download(self, dataDir, dataType, dataYear):
146 | """Download the COCO dataset/annotations if requested.
147 | dataDir: The root directory of the COCO dataset.
148 | dataType: What to load (train, val, minival, valminusminival)
149 | dataYear: What dataset year to load (2014, 2017) as a string, not an integer
150 | Note:
151 | For 2014, use "train", "val", "minival", or "valminusminival"
152 | For 2017, only "train" and "val" annotations are available
153 | """
154 |
155 | # Setup paths and file names
156 | if dataType == "minival" or dataType == "valminusminival":
157 | imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
158 | imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
159 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
160 | else:
161 | imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
162 | imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
163 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
164 | # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
165 |
166 | # Create main folder if it doesn't exist yet
167 | if not os.path.exists(dataDir):
168 | os.makedirs(dataDir)
169 |
170 | # Download images if not available locally
171 | if not os.path.exists(imgDir):
172 | os.makedirs(imgDir)
173 | print("Downloading images to " + imgZipFile + " ...")
174 | with request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
175 | shutil.copyfileobj(resp, out)
176 | print("... done downloading.")
177 | print("Unzipping " + imgZipFile)
178 | with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
179 | zip_ref.extractall(dataDir)
180 | print("... done unzipping")
181 | print("Will use images in " + imgDir)
182 |
183 | # Setup annotations data paths
184 | annDir = "{}/annotations".format(dataDir)
185 | if dataType == "minival":
186 | annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
187 | annFile = "{}/instances_minival2014.json".format(annDir)
188 | annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
189 | unZipDir = annDir
190 | elif dataType == "valminusminival":
191 | annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
192 | annFile = "{}/instances_valminusminival2014.json".format(annDir)
193 | annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
194 | unZipDir = annDir
195 | else:
196 | annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
197 | annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
198 | annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
199 | unZipDir = dataDir
200 | # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
201 |
202 | # Download annotations if not available locally
203 | if not os.path.exists(annDir):
204 | os.makedirs(annDir)
205 | if not os.path.exists(annFile):
206 | if not os.path.exists(annZipFile):
207 | print("Downloading zipped annotations to " + annZipFile + " ...")
208 | with request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
209 | shutil.copyfileobj(resp, out)
210 | print("... done downloading.")
211 | print("Unzipping " + annZipFile)
212 | with zipfile.ZipFile(annZipFile, "r") as zip_ref:
213 | zip_ref.extractall(unZipDir)
214 | print("... done unzipping")
215 | print("Will use annotations in " + annFile)
216 |
217 | def load_mask(self, image_id):
218 | """Load instance masks for the given image.
219 |
220 | Different datasets use different ways to store masks. This
221 | function converts the different mask format to one format
222 | in the form of a bitmap [height, width, instances].
223 |
224 | Returns:
225 | masks: A bool array of shape [height, width, instance count] with
226 | one mask per instance.
227 | class_ids: a 1D array of class IDs of the instance masks.
228 | """
229 | # If not a COCO image, delegate to parent class.
230 | image_info = self.image_info[image_id]
231 | if image_info["source"] != "coco":
232 | return super(CocoDataset, self).load_mask(image_id)
233 |
234 | instance_masks = []
235 | class_ids = []
236 | annotations = self.image_info[image_id]["annotations"]
237 | # Build mask of shape [height, width, instance_count] and list
238 | # of class IDs that correspond to each channel of the mask.
239 | for annotation in annotations:
240 | class_id = self.map_source_class_id(
241 | "coco.{}".format(annotation['category_id']))
242 | if class_id:
243 | m = self.annToMask(annotation, image_info["height"],
244 | image_info["width"])
245 | # Some objects are so small that they're less than 1 pixel area
246 | # and end up rounded out. Skip those objects.
247 | if m.max() < 1:
248 | continue
249 | # Is it a crowd? If so, use a negative class ID.
250 | if annotation['iscrowd']:
251 | # Use negative class ID for crowds
252 | class_id *= -1
253 | # For crowd masks, annToMask() sometimes returns a mask
254 | # smaller than the given dimensions. If so, resize it.
255 | if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
256 | m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
257 | instance_masks.append(m)
258 | class_ids.append(class_id)
259 |
260 | # Pack instance masks into an array
261 | if class_ids:
262 | mask = np.stack(instance_masks, axis=2)
263 | class_ids = np.array(class_ids, dtype=np.int32)
264 | return mask, class_ids
265 | else:
266 | # Call super class to return an empty mask
267 | return super(CocoDataset, self).load_mask(image_id)
268 |
269 | def image_reference(self, image_id):
270 | """Return a link to the image in the COCO Website."""
271 | info = self.image_info[image_id]
272 | if info["source"] == "coco":
273 | return "http://cocodataset.org/#explore?id={}".format(info["id"])
274 | else:
275 | super(CocoDataset, self).image_reference(self, image_id)
276 |
277 | # The following two functions are from pycocotools with a few changes.
278 |
279 | def annToRLE(self, ann, height, width):
280 | """
281 | Convert annotation which can be polygons, uncompressed RLE to RLE.
282 | :return: binary mask (numpy 2D array)
283 | """
284 | segm = ann['segmentation']
285 | if isinstance(segm, list):
286 | # polygon -- a single object might consist of multiple parts
287 | # we merge all parts into one mask rle code
288 | rles = maskUtils.frPyObjects(segm, height, width)
289 | rle = maskUtils.merge(rles)
290 | elif isinstance(segm['counts'], list):
291 | # uncompressed RLE
292 | rle = maskUtils.frPyObjects(segm, height, width)
293 | else:
294 | # rle
295 | rle = ann['segmentation']
296 | return rle
297 |
298 | def annToMask(self, ann, height, width):
299 | """
300 | Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
301 | :return: binary mask (numpy 2D array)
302 | """
303 | rle = self.annToRLE(ann, height, width)
304 | m = maskUtils.decode(rle)
305 | return m
306 |
307 |
308 | ############################################################
309 | # COCO Evaluation
310 | ############################################################
311 |
312 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
313 | """Arrange resutls to match COCO specs in http://cocodataset.org/#format
314 | """
315 | # If no results, return an empty list
316 | if rois is None:
317 | return []
318 |
319 | results = []
320 | for image_id in image_ids:
321 | # Loop through detections
322 | for i in range(rois.shape[0]):
323 | class_id = class_ids[i]
324 | score = scores[i]
325 | bbox = np.around(rois[i], 1)
326 | mask = masks[:, :, i]
327 |
328 | result = {
329 | "image_id": image_id,
330 | "category_id": dataset.get_source_class_id(class_id, "coco"),
331 | "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
332 | "score": score,
333 | "segmentation": maskUtils.encode(np.asfortranarray(mask))
334 | }
335 | results.append(result)
336 | return results
337 |
338 |
339 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
340 | """Runs official COCO evaluation.
341 | dataset: A Dataset object with valiadtion data
342 | eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
343 | limit: if not 0, it's the number of images to use for evaluation
344 | """
345 | # Pick COCO images from the dataset
346 | image_ids = image_ids or dataset.image_ids
347 |
348 | # Limit to a subset
349 | if limit:
350 | image_ids = image_ids[:limit]
351 |
352 | # Get corresponding COCO image IDs.
353 | coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
354 |
355 | t_prediction = 0
356 | t_start = time.time()
357 |
358 | results = []
359 | for i, image_id in enumerate(image_ids):
360 | # Load image
361 | image = dataset.load_image(image_id)
362 |
363 | # Run detection
364 | t = time.time()
365 | r = model.detect([image], verbose=0)[0]
366 | t_prediction += (time.time() - t)
367 |
368 | # Convert results to COCO format
369 | image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
370 | r["rois"], r["class_ids"],
371 | r["scores"], r["masks"])
372 | results.extend(image_results)
373 |
374 | # Load results. This modifies results with additional attributes.
375 | coco_results = coco.loadRes(results)
376 |
377 | # Evaluate
378 | cocoEval = COCOeval(coco, coco_results, eval_type)
379 | cocoEval.params.imgIds = coco_image_ids
380 | cocoEval.evaluate()
381 | cocoEval.accumulate()
382 | cocoEval.summarize()
383 |
384 | print("Prediction time: {}. Average {}/image".format(
385 | t_prediction, t_prediction / len(image_ids)))
386 | print("Total time: ", time.time() - t_start)
387 |
388 |
389 | ############################################################
390 | # Training
391 | ############################################################
392 |
393 |
394 | if __name__ == '__main__':
395 | import argparse
396 |
397 | # Parse command line arguments
398 | parser = argparse.ArgumentParser(
399 | description='Train Mask R-CNN on MS COCO.')
400 | parser.add_argument("command",
401 | metavar="",
402 | help="'train' or 'evaluate' on MS COCO")
403 | parser.add_argument('--dataset', required=True,
404 | metavar="/path/to/coco/",
405 | help='Directory of the MS-COCO dataset')
406 | parser.add_argument('--year', required=False,
407 | default=DEFAULT_DATASET_YEAR,
408 | metavar="",
409 | help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
410 | parser.add_argument('--model', required=True,
411 | metavar="/path/to/weights.h5",
412 | help="Path to weights .h5 file or 'coco'")
413 | parser.add_argument('--logs', required=False,
414 | default=DEFAULT_LOGS_DIR,
415 | metavar="/path/to/logs/",
416 | help='Logs and checkpoints directory (default=logs/)')
417 | parser.add_argument('--limit', required=False,
418 | default=500,
419 | metavar="",
420 | help='Images to use for evaluation (default=500)')
421 | parser.add_argument('--download', required=False,
422 | default=False,
423 | metavar="",
424 | help='Automatically download and unzip MS-COCO files (default=False)',
425 | type=bool)
426 | args = parser.parse_args()
427 | print("Command: ", args.command)
428 | print("Model: ", args.model)
429 | print("Dataset: ", args.dataset)
430 | print("Year: ", args.year)
431 | print("Logs: ", args.logs)
432 | print("Auto Download: ", args.download)
433 |
434 | # Configurations
435 | if args.command == "train":
436 | config = CocoConfig()
437 | else:
438 | class InferenceConfig(CocoConfig):
439 | # Set batch size to 1 since we'll be running inference on
440 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
441 | GPU_COUNT = 1
442 | IMAGES_PER_GPU = 1
443 | DETECTION_MIN_CONFIDENCE = 0
444 | config = InferenceConfig()
445 | config.display()
446 |
447 | # Create model
448 | if args.command == "train":
449 | model = modellib.MaskRCNN(mode="training", config=config,
450 | model_dir=args.logs)
451 | else:
452 | model = modellib.MaskRCNN(mode="inference", config=config,
453 | model_dir=args.logs)
454 |
455 | # Select weights file to load
456 | if args.model.lower() == "coco":
457 | model_path = COCO_MODEL_PATH
458 | elif args.model.lower() == "last":
459 | # Find last trained weights
460 | model_path = model.find_last()[1]
461 | elif args.model.lower() == "imagenet":
462 | # Start from ImageNet trained weights
463 | model_path = model.get_imagenet_weights()
464 | else:
465 | model_path = args.model
466 |
467 | # Load weights
468 | print("Loading weights ", model_path)
469 | model.load_weights(model_path, by_name=True)
470 |
471 | # Train or evaluate
472 | if args.command == "train":
473 | # Training dataset. Use the training set and 35K from the
474 | # validation set, as as in the Mask RCNN paper.
475 | dataset_train = CocoDataset()
476 | dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
477 | dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
478 | dataset_train.prepare()
479 |
480 | # Validation dataset
481 | dataset_val = CocoDataset()
482 | dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download)
483 | dataset_val.prepare()
484 |
485 | # *** This training schedule is an example. Update to your needs ***
486 |
487 | # Training - Stage 1
488 | print("Training network heads")
489 | model.train(dataset_train, dataset_val,
490 | learning_rate=config.LEARNING_RATE,
491 | epochs=40,
492 | layers='heads')
493 |
494 | # Training - Stage 2
495 | # Finetune layers from ResNet stage 4 and up
496 | print("Fine tune Resnet stage 4 and up")
497 | model.train(dataset_train, dataset_val,
498 | learning_rate=config.LEARNING_RATE,
499 | epochs=120,
500 | layers='4+')
501 |
502 | # Training - Stage 3
503 | # Fine tune all layers
504 | print("Fine tune all layers")
505 | model.train(dataset_train, dataset_val,
506 | learning_rate=config.LEARNING_RATE / 10,
507 | epochs=160,
508 | layers='all')
509 |
510 | elif args.command == "evaluate":
511 | # Validation dataset
512 | dataset_val = CocoDataset()
513 | coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download)
514 | dataset_val.prepare()
515 | print("Running COCO evaluation on {} images.".format(args.limit))
516 | evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
517 | else:
518 | print("'{}' is not recognized. "
519 | "Use 'train' or 'evaluate'".format(args.command))
520 |
--------------------------------------------------------------------------------
/nodes/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Common utility functions and classes.
4 |
5 | Copyright (c) 2017 Matterport, Inc.
6 | Licensed under the MIT License (see LICENSE for details)
7 | Written by Waleed Abdulla
8 | """
9 |
10 | import sys
11 | import os
12 | import math
13 | import random
14 | import numpy as np
15 | import tensorflow as tf
16 | import scipy.misc
17 | import skimage.color
18 | import skimage.io
19 | from six.moves.urllib import request
20 | import shutil
21 | import contextlib
22 |
23 | # URL from which to download the latest COCO trained weights
24 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
25 |
26 |
27 | ############################################################
28 | # Bounding Boxes
29 | ############################################################
30 |
31 | def extract_bboxes(mask):
32 | """Compute bounding boxes from masks.
33 | mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
34 |
35 | Returns: bbox array [num_instances, (y1, x1, y2, x2)].
36 | """
37 | boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
38 | for i in range(mask.shape[-1]):
39 | m = mask[:, :, i]
40 | # Bounding box.
41 | horizontal_indicies = np.where(np.any(m, axis=0))[0]
42 | vertical_indicies = np.where(np.any(m, axis=1))[0]
43 | if horizontal_indicies.shape[0]:
44 | x1, x2 = horizontal_indicies[[0, -1]]
45 | y1, y2 = vertical_indicies[[0, -1]]
46 | # x2 and y2 should not be part of the box. Increment by 1.
47 | x2 += 1
48 | y2 += 1
49 | else:
50 | # No mask for this instance. Might happen due to
51 | # resizing or cropping. Set bbox to zeros
52 | x1, x2, y1, y2 = 0, 0, 0, 0
53 | boxes[i] = np.array([y1, x1, y2, x2])
54 | return boxes.astype(np.int32)
55 |
56 |
57 | def compute_iou(box, boxes, box_area, boxes_area):
58 | """Calculates IoU of the given box with the array of the given boxes.
59 | box: 1D vector [y1, x1, y2, x2]
60 | boxes: [boxes_count, (y1, x1, y2, x2)]
61 | box_area: float. the area of 'box'
62 | boxes_area: array of length boxes_count.
63 |
64 | Note: the areas are passed in rather than calculated here for
65 | efficency. Calculate once in the caller to avoid duplicate work.
66 | """
67 | # Calculate intersection areas
68 | y1 = np.maximum(box[0], boxes[:, 0])
69 | y2 = np.minimum(box[2], boxes[:, 2])
70 | x1 = np.maximum(box[1], boxes[:, 1])
71 | x2 = np.minimum(box[3], boxes[:, 3])
72 | intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
73 | union = box_area + boxes_area[:] - intersection[:]
74 | iou = intersection / union
75 | return iou
76 |
77 |
78 | def compute_overlaps(boxes1, boxes2):
79 | """Computes IoU overlaps between two sets of boxes.
80 | boxes1, boxes2: [N, (y1, x1, y2, x2)].
81 |
82 | For better performance, pass the largest set first and the smaller second.
83 | """
84 | # Areas of anchors and GT boxes
85 | area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
86 | area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
87 |
88 | # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
89 | # Each cell contains the IoU value.
90 | overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
91 | for i in range(overlaps.shape[1]):
92 | box2 = boxes2[i]
93 | overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
94 | return overlaps
95 |
96 |
97 | def non_max_suppression(boxes, scores, threshold):
98 | """Performs non-maximum supression and returns indicies of kept boxes.
99 | boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
100 | scores: 1-D array of box scores.
101 | threshold: Float. IoU threshold to use for filtering.
102 | """
103 | assert boxes.shape[0] > 0
104 | if boxes.dtype.kind != "f":
105 | boxes = boxes.astype(np.float32)
106 |
107 | # Compute box areas
108 | y1 = boxes[:, 0]
109 | x1 = boxes[:, 1]
110 | y2 = boxes[:, 2]
111 | x2 = boxes[:, 3]
112 | area = (y2 - y1) * (x2 - x1)
113 |
114 | # Get indicies of boxes sorted by scores (highest first)
115 | ixs = scores.argsort()[::-1]
116 |
117 | pick = []
118 | while len(ixs) > 0:
119 | # Pick top box and add its index to the list
120 | i = ixs[0]
121 | pick.append(i)
122 | # Compute IoU of the picked box with the rest
123 | iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
124 | # Identify boxes with IoU over the threshold. This
125 | # returns indicies into ixs[1:], so add 1 to get
126 | # indicies into ixs.
127 | remove_ixs = np.where(iou > threshold)[0] + 1
128 | # Remove indicies of the picked and overlapped boxes.
129 | ixs = np.delete(ixs, remove_ixs)
130 | ixs = np.delete(ixs, 0)
131 | return np.array(pick, dtype=np.int32)
132 |
133 |
134 | def apply_box_deltas(boxes, deltas):
135 | """Applies the given deltas to the given boxes.
136 | boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
137 | deltas: [N, (dy, dx, log(dh), log(dw))]
138 | """
139 | boxes = boxes.astype(np.float32)
140 | # Convert to y, x, h, w
141 | height = boxes[:, 2] - boxes[:, 0]
142 | width = boxes[:, 3] - boxes[:, 1]
143 | center_y = boxes[:, 0] + 0.5 * height
144 | center_x = boxes[:, 1] + 0.5 * width
145 | # Apply deltas
146 | center_y += deltas[:, 0] * height
147 | center_x += deltas[:, 1] * width
148 | height *= np.exp(deltas[:, 2])
149 | width *= np.exp(deltas[:, 3])
150 | # Convert back to y1, x1, y2, x2
151 | y1 = center_y - 0.5 * height
152 | x1 = center_x - 0.5 * width
153 | y2 = y1 + height
154 | x2 = x1 + width
155 | return np.stack([y1, x1, y2, x2], axis=1)
156 |
157 |
158 | def box_refinement_graph(box, gt_box):
159 | """Compute refinement needed to transform box to gt_box.
160 | box and gt_box are [N, (y1, x1, y2, x2)]
161 | """
162 | box = tf.cast(box, tf.float32)
163 | gt_box = tf.cast(gt_box, tf.float32)
164 |
165 | height = box[:, 2] - box[:, 0]
166 | width = box[:, 3] - box[:, 1]
167 | center_y = box[:, 0] + 0.5 * height
168 | center_x = box[:, 1] + 0.5 * width
169 |
170 | gt_height = gt_box[:, 2] - gt_box[:, 0]
171 | gt_width = gt_box[:, 3] - gt_box[:, 1]
172 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height
173 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width
174 |
175 | dy = (gt_center_y - center_y) / height
176 | dx = (gt_center_x - center_x) / width
177 | dh = tf.log(gt_height / height)
178 | dw = tf.log(gt_width / width)
179 |
180 | result = tf.stack([dy, dx, dh, dw], axis=1)
181 | return result
182 |
183 |
184 | def box_refinement(box, gt_box):
185 | """Compute refinement needed to transform box to gt_box.
186 | box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
187 | assumed to be outside the box.
188 | """
189 | box = box.astype(np.float32)
190 | gt_box = gt_box.astype(np.float32)
191 |
192 | height = box[:, 2] - box[:, 0]
193 | width = box[:, 3] - box[:, 1]
194 | center_y = box[:, 0] + 0.5 * height
195 | center_x = box[:, 1] + 0.5 * width
196 |
197 | gt_height = gt_box[:, 2] - gt_box[:, 0]
198 | gt_width = gt_box[:, 3] - gt_box[:, 1]
199 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height
200 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width
201 |
202 | dy = (gt_center_y - center_y) / height
203 | dx = (gt_center_x - center_x) / width
204 | dh = np.log(gt_height / height)
205 | dw = np.log(gt_width / width)
206 |
207 | return np.stack([dy, dx, dh, dw], axis=1)
208 |
209 |
210 | ############################################################
211 | # Dataset
212 | ############################################################
213 |
214 | class Dataset(object):
215 | """The base class for dataset classes.
216 | To use it, create a new class that adds functions specific to the dataset
217 | you want to use. For example:
218 |
219 | class CatsAndDogsDataset(Dataset):
220 | def load_cats_and_dogs(self):
221 | ...
222 | def load_mask(self, image_id):
223 | ...
224 | def image_reference(self, image_id):
225 | ...
226 |
227 | See COCODataset and ShapesDataset as examples.
228 | """
229 |
230 | def __init__(self, class_map=None):
231 | self._image_ids = []
232 | self.image_info = []
233 | # Background is always the first class
234 | self.class_info = [{"source": "", "id": 0, "name": "BG"}]
235 | self.source_class_ids = {}
236 |
237 | def add_class(self, source, class_id, class_name):
238 | assert "." not in source, "Source name cannot contain a dot"
239 | # Does the class exist already?
240 | for info in self.class_info:
241 | if info['source'] == source and info["id"] == class_id:
242 | # source.class_id combination already available, skip
243 | return
244 | # Add the class
245 | self.class_info.append({
246 | "source": source,
247 | "id": class_id,
248 | "name": class_name,
249 | })
250 |
251 | def add_image(self, source, image_id, path, **kwargs):
252 | image_info = {
253 | "id": image_id,
254 | "source": source,
255 | "path": path,
256 | }
257 | image_info.update(kwargs)
258 | self.image_info.append(image_info)
259 |
260 | def image_reference(self, image_id):
261 | """Return a link to the image in its source Website or details about
262 | the image that help looking it up or debugging it.
263 |
264 | Override for your dataset, but pass to this function
265 | if you encounter images not in your dataset.
266 | """
267 | return ""
268 |
269 | def prepare(self, class_map=None):
270 | """Prepares the Dataset class for use.
271 |
272 | TODO: class map is not supported yet. When done, it should handle mapping
273 | classes from different datasets to the same class ID.
274 | """
275 | def clean_name(name):
276 | """Returns a shorter version of object names for cleaner display."""
277 | return ",".join(name.split(",")[:1])
278 |
279 | # Build (or rebuild) everything else from the info dicts.
280 | self.num_classes = len(self.class_info)
281 | self.class_ids = np.arange(self.num_classes)
282 | self.class_names = [clean_name(c["name"]) for c in self.class_info]
283 | self.num_images = len(self.image_info)
284 | self._image_ids = np.arange(self.num_images)
285 |
286 | self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
287 | for info, id in zip(self.class_info, self.class_ids)}
288 |
289 | # Map sources to class_ids they support
290 | self.sources = list(set([i['source'] for i in self.class_info]))
291 | self.source_class_ids = {}
292 | # Loop over datasets
293 | for source in self.sources:
294 | self.source_class_ids[source] = []
295 | # Find classes that belong to this dataset
296 | for i, info in enumerate(self.class_info):
297 | # Include BG class in all datasets
298 | if i == 0 or source == info['source']:
299 | self.source_class_ids[source].append(i)
300 |
301 | def map_source_class_id(self, source_class_id):
302 | """Takes a source class ID and returns the int class ID assigned to it.
303 |
304 | For example:
305 | dataset.map_source_class_id("coco.12") -> 23
306 | """
307 | return self.class_from_source_map[source_class_id]
308 |
309 | def get_source_class_id(self, class_id, source):
310 | """Map an internal class ID to the corresponding class ID in the source dataset."""
311 | info = self.class_info[class_id]
312 | assert info['source'] == source
313 | return info['id']
314 |
315 | def append_data(self, class_info, image_info):
316 | self.external_to_class_id = {}
317 | for i, c in enumerate(self.class_info):
318 | for ds, id in c["map"]:
319 | self.external_to_class_id[ds + str(id)] = i
320 |
321 | # Map external image IDs to internal ones.
322 | self.external_to_image_id = {}
323 | for i, info in enumerate(self.image_info):
324 | self.external_to_image_id[info["ds"] + str(info["id"])] = i
325 |
326 | @property
327 | def image_ids(self):
328 | return self._image_ids
329 |
330 | def source_image_link(self, image_id):
331 | """Returns the path or URL to the image.
332 | Override this to return a URL to the image if it's availble online for easy
333 | debugging.
334 | """
335 | return self.image_info[image_id]["path"]
336 |
337 | def load_image(self, image_id):
338 | """Load the specified image and return a [H,W,3] Numpy array.
339 | """
340 | # Load image
341 | image = skimage.io.imread(self.image_info[image_id]['path'])
342 | # If grayscale. Convert to RGB for consistency.
343 | if image.ndim != 3:
344 | image = skimage.color.gray2rgb(image)
345 | return image
346 |
347 | def load_mask(self, image_id):
348 | """Load instance masks for the given image.
349 |
350 | Different datasets use different ways to store masks. Override this
351 | method to load instance masks and return them in the form of am
352 | array of binary masks of shape [height, width, instances].
353 |
354 | Returns:
355 | masks: A bool array of shape [height, width, instance count] with
356 | a binary mask per instance.
357 | class_ids: a 1D array of class IDs of the instance masks.
358 | """
359 | # Override this function to load a mask from your dataset.
360 | # Otherwise, it returns an empty mask.
361 | mask = np.empty([0, 0, 0])
362 | class_ids = np.empty([0], np.int32)
363 | return mask, class_ids
364 |
365 |
366 | def resize_image(image, min_dim=None, max_dim=None, padding=False):
367 | """
368 | Resizes an image keeping the aspect ratio.
369 |
370 | min_dim: if provided, resizes the image such that it's smaller
371 | dimension == min_dim
372 | max_dim: if provided, ensures that the image longest side doesn't
373 | exceed this value.
374 | padding: If true, pads image with zeros so it's size is max_dim x max_dim
375 |
376 | Returns:
377 | image: the resized image
378 | window: (y1, x1, y2, x2). If max_dim is provided, padding might
379 | be inserted in the returned image. If so, this window is the
380 | coordinates of the image part of the full image (excluding
381 | the padding). The x2, y2 pixels are not included.
382 | scale: The scale factor used to resize the image
383 | padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
384 | """
385 | # Default window (y1, x1, y2, x2) and default scale == 1.
386 | h, w = image.shape[:2]
387 | window = (0, 0, h, w)
388 | scale = 1
389 |
390 | # Scale?
391 | if min_dim:
392 | # Scale up but not down
393 | scale = max(1, min_dim / min(h, w))
394 | # Does it exceed max dim?
395 | if max_dim:
396 | image_max = max(h, w)
397 | if round(image_max * scale) > max_dim:
398 | scale = max_dim / image_max
399 | # Resize image and mask
400 | if scale != 1:
401 | image = scipy.misc.imresize(
402 | image, (round(h * scale), round(w * scale)))
403 | # Need padding?
404 | if padding:
405 | # Get new height and width
406 | h, w = image.shape[:2]
407 | top_pad = (max_dim - h) // 2
408 | bottom_pad = max_dim - h - top_pad
409 | left_pad = (max_dim - w) // 2
410 | right_pad = max_dim - w - left_pad
411 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
412 | image = np.pad(image, padding, mode='constant', constant_values=0)
413 | window = (top_pad, left_pad, h + top_pad, w + left_pad)
414 | return image, window, scale, padding
415 |
416 |
417 | def resize_mask(mask, scale, padding):
418 | """Resizes a mask using the given scale and padding.
419 | Typically, you get the scale and padding from resize_image() to
420 | ensure both, the image and the mask, are resized consistently.
421 |
422 | scale: mask scaling factor
423 | padding: Padding to add to the mask in the form
424 | [(top, bottom), (left, right), (0, 0)]
425 | """
426 | h, w = mask.shape[:2]
427 | mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
428 | mask = np.pad(mask, padding, mode='constant', constant_values=0)
429 | return mask
430 |
431 |
432 | def minimize_mask(bbox, mask, mini_shape):
433 | """Resize masks to a smaller version to cut memory load.
434 | Mini-masks can then resized back to image scale using expand_masks()
435 |
436 | See inspect_data.ipynb notebook for more details.
437 | """
438 | mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
439 | for i in range(mask.shape[-1]):
440 | m = mask[:, :, i]
441 | y1, x1, y2, x2 = bbox[i][:4]
442 | m = m[y1:y2, x1:x2]
443 | if m.size == 0:
444 | raise Exception("Invalid bounding box with area of zero")
445 | m = scipy.misc.imresize(m.astype(float), mini_shape, interp='bilinear')
446 | mini_mask[:, :, i] = np.where(m >= 128, 1, 0)
447 | return mini_mask
448 |
449 |
450 | def expand_mask(bbox, mini_mask, image_shape):
451 | """Resizes mini masks back to image size. Reverses the change
452 | of minimize_mask().
453 |
454 | See inspect_data.ipynb notebook for more details.
455 | """
456 | mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
457 | for i in range(mask.shape[-1]):
458 | m = mini_mask[:, :, i]
459 | y1, x1, y2, x2 = bbox[i][:4]
460 | h = y2 - y1
461 | w = x2 - x1
462 | m = scipy.misc.imresize(m.astype(float), (h, w), interp='bilinear')
463 | mask[y1:y2, x1:x2, i] = np.where(m >= 128, 1, 0)
464 | return mask
465 |
466 |
467 | # TODO: Build and use this function to reduce code duplication
468 | def mold_mask(mask, config):
469 | pass
470 |
471 |
472 | def unmold_mask(mask, bbox, image_shape):
473 | """Converts a mask generated by the neural network into a format similar
474 | to it's original shape.
475 | mask: [height, width] of type float. A small, typically 28x28 mask.
476 | bbox: [y1, x1, y2, x2]. The box to fit the mask in.
477 |
478 | Returns a binary mask with the same size as the original image.
479 | """
480 | threshold = 0.5
481 | y1, x1, y2, x2 = bbox
482 | mask = scipy.misc.imresize(
483 | mask, (y2 - y1, x2 - x1), interp='bilinear').astype(np.float32) / 255.0
484 | mask = np.where(mask >= threshold, 1, 0).astype(np.uint8)
485 |
486 | # Put the mask in the right location.
487 | full_mask = np.zeros(image_shape[:2], dtype=np.uint8)
488 | full_mask[y1:y2, x1:x2] = mask
489 | return full_mask
490 |
491 |
492 | ############################################################
493 | # Anchors
494 | ############################################################
495 |
496 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
497 | """
498 | scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
499 | ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
500 | shape: [height, width] spatial shape of the feature map over which
501 | to generate anchors.
502 | feature_stride: Stride of the feature map relative to the image in pixels.
503 | anchor_stride: Stride of anchors on the feature map. For example, if the
504 | value is 2 then generate anchors for every other feature map pixel.
505 | """
506 | # Get all combinations of scales and ratios
507 | scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
508 | scales = scales.flatten()
509 | ratios = ratios.flatten()
510 |
511 | # Enumerate heights and widths from scales and ratios
512 | heights = scales / np.sqrt(ratios)
513 | widths = scales * np.sqrt(ratios)
514 |
515 | # Enumerate shifts in feature space
516 | shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
517 | shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
518 | shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
519 |
520 | # Enumerate combinations of shifts, widths, and heights
521 | box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
522 | box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
523 |
524 | # Reshape to get a list of (y, x) and a list of (h, w)
525 | box_centers = np.stack(
526 | [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
527 | box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
528 |
529 | # Convert to corner coordinates (y1, x1, y2, x2)
530 | boxes = np.concatenate([box_centers - 0.5 * box_sizes,
531 | box_centers + 0.5 * box_sizes], axis=1)
532 | return boxes
533 |
534 |
535 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
536 | anchor_stride):
537 | """Generate anchors at different levels of a feature pyramid. Each scale
538 | is associated with a level of the pyramid, but each ratio is used in
539 | all levels of the pyramid.
540 |
541 | Returns:
542 | anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
543 | with the same order of the given scales. So, anchors of scale[0] come
544 | first, then anchors of scale[1], and so on.
545 | """
546 | # Anchors
547 | # [anchor_count, (y1, x1, y2, x2)]
548 | anchors = []
549 | for i in range(len(scales)):
550 | anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
551 | feature_strides[i], anchor_stride))
552 | return np.concatenate(anchors, axis=0)
553 |
554 |
555 | ############################################################
556 | # Miscellaneous
557 | ############################################################
558 |
559 | def trim_zeros(x):
560 | """It's common to have tensors larger than the available data and
561 | pad with zeros. This function removes rows that are all zeros.
562 |
563 | x: [rows, columns].
564 | """
565 | assert len(x.shape) == 2
566 | return x[~np.all(x == 0, axis=1)]
567 |
568 |
569 | def compute_ap(gt_boxes, gt_class_ids,
570 | pred_boxes, pred_class_ids, pred_scores,
571 | iou_threshold=0.5):
572 | """Compute Average Precision at a set IoU threshold (default 0.5).
573 |
574 | Returns:
575 | mAP: Mean Average Precision
576 | precisions: List of precisions at different class score thresholds.
577 | recalls: List of recall values at different class score thresholds.
578 | overlaps: [pred_boxes, gt_boxes] IoU overlaps.
579 | """
580 | # Trim zero padding and sort predictions by score from high to low
581 | # TODO: cleaner to do zero unpadding upstream
582 | gt_boxes = trim_zeros(gt_boxes)
583 | pred_boxes = trim_zeros(pred_boxes)
584 | pred_scores = pred_scores[:pred_boxes.shape[0]]
585 | indices = np.argsort(pred_scores)[::-1]
586 | pred_boxes = pred_boxes[indices]
587 | pred_class_ids = pred_class_ids[indices]
588 | pred_scores = pred_scores[indices]
589 |
590 | # Compute IoU overlaps [pred_boxes, gt_boxes]
591 | overlaps = compute_overlaps(pred_boxes, gt_boxes)
592 |
593 | # Loop through ground truth boxes and find matching predictions
594 | match_count = 0
595 | pred_match = np.zeros([pred_boxes.shape[0]])
596 | gt_match = np.zeros([gt_boxes.shape[0]])
597 | for i in range(len(pred_boxes)):
598 | # Find best matching ground truth box
599 | sorted_ixs = np.argsort(overlaps[i])[::-1]
600 | for j in sorted_ixs:
601 | # If ground truth box is already matched, go to next one
602 | if gt_match[j] == 1:
603 | continue
604 | # If we reach IoU smaller than the threshold, end the loop
605 | iou = overlaps[i, j]
606 | if iou < iou_threshold:
607 | break
608 | # Do we have a match?
609 | if pred_class_ids[i] == gt_class_ids[j]:
610 | match_count += 1
611 | gt_match[j] = 1
612 | pred_match[i] = 1
613 | break
614 |
615 | # Compute precision and recall at each prediction box step
616 | precisions = np.cumsum(pred_match) / (np.arange(len(pred_match)) + 1)
617 | recalls = np.cumsum(pred_match).astype(np.float32) / len(gt_match)
618 |
619 | # Pad with start and end values to simplify the math
620 | precisions = np.concatenate([[0], precisions, [0]])
621 | recalls = np.concatenate([[0], recalls, [1]])
622 |
623 | # Ensure precision values decrease but don't increase. This way, the
624 | # precision value at each recall threshold is the maximum it can be
625 | # for all following recall thresholds, as specified by the VOC paper.
626 | for i in range(len(precisions) - 2, -1, -1):
627 | precisions[i] = np.maximum(precisions[i], precisions[i + 1])
628 |
629 | # Compute mean AP over recall range
630 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
631 | mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
632 | precisions[indices])
633 |
634 | return mAP, precisions, recalls, overlaps
635 |
636 |
637 | def compute_recall(pred_boxes, gt_boxes, iou):
638 | """Compute the recall at the given IoU threshold. It's an indication
639 | of how many GT boxes were found by the given prediction boxes.
640 |
641 | pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
642 | gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
643 | """
644 | # Measure overlaps
645 | overlaps = compute_overlaps(pred_boxes, gt_boxes)
646 | iou_max = np.max(overlaps, axis=1)
647 | iou_argmax = np.argmax(overlaps, axis=1)
648 | positive_ids = np.where(iou_max >= iou)[0]
649 | matched_gt_boxes = iou_argmax[positive_ids]
650 |
651 | recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
652 | return recall, positive_ids
653 |
654 |
655 | # ## Batch Slicing
656 | # Some custom layers support a batch size of 1 only, and require a lot of work
657 | # to support batches greater than 1. This function slices an input tensor
658 | # across the batch dimension and feeds batches of size 1. Effectively,
659 | # an easy way to support batches > 1 quickly with little code modification.
660 | # In the long run, it's more efficient to modify the code to support large
661 | # batches and getting rid of this function. Consider this a temporary solution
662 | def batch_slice(inputs, graph_fn, batch_size, names=None):
663 | """Splits inputs into slices and feeds each slice to a copy of the given
664 | computation graph and then combines the results. It allows you to run a
665 | graph on a batch of inputs even if the graph is written to support one
666 | instance only.
667 |
668 | inputs: list of tensors. All must have the same first dimension length
669 | graph_fn: A function that returns a TF tensor that's part of a graph.
670 | batch_size: number of slices to divide the data into.
671 | names: If provided, assigns names to the resulting tensors.
672 | """
673 | if not isinstance(inputs, list):
674 | inputs = [inputs]
675 |
676 | outputs = []
677 | for i in range(batch_size):
678 | inputs_slice = [x[i] for x in inputs]
679 | output_slice = graph_fn(*inputs_slice)
680 | if not isinstance(output_slice, (tuple, list)):
681 | output_slice = [output_slice]
682 | outputs.append(output_slice)
683 | # Change outputs from a list of slices where each is
684 | # a list of outputs to a list of outputs and each has
685 | # a list of slices
686 | outputs = list(zip(*outputs))
687 |
688 | if names is None:
689 | names = [None] * len(outputs)
690 |
691 | result = [tf.stack(o, axis=0, name=n)
692 | for o, n in zip(outputs, names)]
693 | if len(result) == 1:
694 | result = result[0]
695 |
696 | return result
697 |
698 |
699 | def download_trained_weights(coco_model_path, verbose=1):
700 | """Download COCO trained weights from Releases.
701 |
702 | coco_model_path: local path of COCO trained weights
703 | """
704 | if verbose > 0:
705 | print("Downloading pretrained model to " + coco_model_path + " ...")
706 | with contextlib.closing(request.urlopen(COCO_MODEL_URL)) as resp, open(coco_model_path, 'wb') as out:
707 | shutil.copyfileobj(resp, out)
708 | if verbose > 0:
709 | print("... done downloading pretrained model!")
710 |
--------------------------------------------------------------------------------
/src/mask_rcnn_ros/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Common utility functions and classes.
4 |
5 | Copyright (c) 2017 Matterport, Inc.
6 | Licensed under the MIT License (see LICENSE for details)
7 | Written by Waleed Abdulla
8 | """
9 |
10 | import sys
11 | import os
12 | import math
13 | import random
14 | import numpy as np
15 | import tensorflow as tf
16 | import scipy.misc
17 | import skimage.color
18 | import skimage.io
19 | import urllib.request
20 | import shutil
21 |
22 | # URL from which to download the latest COCO trained weights
23 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
24 |
25 |
26 | ############################################################
27 | # Bounding Boxes
28 | ############################################################
29 |
30 | def extract_bboxes(mask):
31 | """Compute bounding boxes from masks.
32 | mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
33 |
34 | Returns: bbox array [num_instances, (y1, x1, y2, x2)].
35 | """
36 | boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
37 | for i in range(mask.shape[-1]):
38 | m = mask[:, :, i]
39 | # Bounding box.
40 | horizontal_indicies = np.where(np.any(m, axis=0))[0]
41 | vertical_indicies = np.where(np.any(m, axis=1))[0]
42 | if horizontal_indicies.shape[0]:
43 | x1, x2 = horizontal_indicies[[0, -1]]
44 | y1, y2 = vertical_indicies[[0, -1]]
45 | # x2 and y2 should not be part of the box. Increment by 1.
46 | x2 += 1
47 | y2 += 1
48 | else:
49 | # No mask for this instance. Might happen due to
50 | # resizing or cropping. Set bbox to zeros
51 | x1, x2, y1, y2 = 0, 0, 0, 0
52 | boxes[i] = np.array([y1, x1, y2, x2])
53 | return boxes.astype(np.int32)
54 |
55 |
56 | def compute_iou(box, boxes, box_area, boxes_area):
57 | """Calculates IoU of the given box with the array of the given boxes.
58 | box: 1D vector [y1, x1, y2, x2]
59 | boxes: [boxes_count, (y1, x1, y2, x2)]
60 | box_area: float. the area of 'box'
61 | boxes_area: array of length boxes_count.
62 |
63 | Note: the areas are passed in rather than calculated here for
64 | efficency. Calculate once in the caller to avoid duplicate work.
65 | """
66 | # Calculate intersection areas
67 | y1 = np.maximum(box[0], boxes[:, 0])
68 | y2 = np.minimum(box[2], boxes[:, 2])
69 | x1 = np.maximum(box[1], boxes[:, 1])
70 | x2 = np.minimum(box[3], boxes[:, 3])
71 | intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
72 | union = box_area + boxes_area[:] - intersection[:]
73 | iou = intersection / union
74 | return iou
75 |
76 |
77 | def compute_overlaps(boxes1, boxes2):
78 | """Computes IoU overlaps between two sets of boxes.
79 | boxes1, boxes2: [N, (y1, x1, y2, x2)].
80 |
81 | For better performance, pass the largest set first and the smaller second.
82 | """
83 | # Areas of anchors and GT boxes
84 | area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
85 | area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
86 |
87 | # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
88 | # Each cell contains the IoU value.
89 | overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
90 | for i in range(overlaps.shape[1]):
91 | box2 = boxes2[i]
92 | overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
93 | return overlaps
94 |
95 |
96 | def compute_overlaps_masks(masks1, masks2):
97 | '''Computes IoU overlaps between two sets of masks.
98 | masks1, masks2: [Height, Width, instances]
99 | '''
100 | # flatten masks
101 | masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
102 | masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
103 | area1 = np.sum(masks1, axis=0)
104 | area2 = np.sum(masks2, axis=0)
105 |
106 | # intersections and union
107 | intersections = np.dot(masks1.T, masks2)
108 | union = area1[:, None] + area2[None, :] - intersections
109 | overlaps = intersections / union
110 |
111 | return overlaps
112 |
113 |
114 | def non_max_suppression(boxes, scores, threshold):
115 | """Performs non-maximum supression and returns indicies of kept boxes.
116 | boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
117 | scores: 1-D array of box scores.
118 | threshold: Float. IoU threshold to use for filtering.
119 | """
120 | assert boxes.shape[0] > 0
121 | if boxes.dtype.kind != "f":
122 | boxes = boxes.astype(np.float32)
123 |
124 | # Compute box areas
125 | y1 = boxes[:, 0]
126 | x1 = boxes[:, 1]
127 | y2 = boxes[:, 2]
128 | x2 = boxes[:, 3]
129 | area = (y2 - y1) * (x2 - x1)
130 |
131 | # Get indicies of boxes sorted by scores (highest first)
132 | ixs = scores.argsort()[::-1]
133 |
134 | pick = []
135 | while len(ixs) > 0:
136 | # Pick top box and add its index to the list
137 | i = ixs[0]
138 | pick.append(i)
139 | # Compute IoU of the picked box with the rest
140 | iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
141 | # Identify boxes with IoU over the threshold. This
142 | # returns indicies into ixs[1:], so add 1 to get
143 | # indicies into ixs.
144 | remove_ixs = np.where(iou > threshold)[0] + 1
145 | # Remove indicies of the picked and overlapped boxes.
146 | ixs = np.delete(ixs, remove_ixs)
147 | ixs = np.delete(ixs, 0)
148 | return np.array(pick, dtype=np.int32)
149 |
150 |
151 | def apply_box_deltas(boxes, deltas):
152 | """Applies the given deltas to the given boxes.
153 | boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
154 | deltas: [N, (dy, dx, log(dh), log(dw))]
155 | """
156 | boxes = boxes.astype(np.float32)
157 | # Convert to y, x, h, w
158 | height = boxes[:, 2] - boxes[:, 0]
159 | width = boxes[:, 3] - boxes[:, 1]
160 | center_y = boxes[:, 0] + 0.5 * height
161 | center_x = boxes[:, 1] + 0.5 * width
162 | # Apply deltas
163 | center_y += deltas[:, 0] * height
164 | center_x += deltas[:, 1] * width
165 | height *= np.exp(deltas[:, 2])
166 | width *= np.exp(deltas[:, 3])
167 | # Convert back to y1, x1, y2, x2
168 | y1 = center_y - 0.5 * height
169 | x1 = center_x - 0.5 * width
170 | y2 = y1 + height
171 | x2 = x1 + width
172 | return np.stack([y1, x1, y2, x2], axis=1)
173 |
174 |
175 | def box_refinement_graph(box, gt_box):
176 | """Compute refinement needed to transform box to gt_box.
177 | box and gt_box are [N, (y1, x1, y2, x2)]
178 | """
179 | box = tf.cast(box, tf.float32)
180 | gt_box = tf.cast(gt_box, tf.float32)
181 |
182 | height = box[:, 2] - box[:, 0]
183 | width = box[:, 3] - box[:, 1]
184 | center_y = box[:, 0] + 0.5 * height
185 | center_x = box[:, 1] + 0.5 * width
186 |
187 | gt_height = gt_box[:, 2] - gt_box[:, 0]
188 | gt_width = gt_box[:, 3] - gt_box[:, 1]
189 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height
190 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width
191 |
192 | dy = (gt_center_y - center_y) / height
193 | dx = (gt_center_x - center_x) / width
194 | dh = tf.log(gt_height / height)
195 | dw = tf.log(gt_width / width)
196 |
197 | result = tf.stack([dy, dx, dh, dw], axis=1)
198 | return result
199 |
200 |
201 | def box_refinement(box, gt_box):
202 | """Compute refinement needed to transform box to gt_box.
203 | box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
204 | assumed to be outside the box.
205 | """
206 | box = box.astype(np.float32)
207 | gt_box = gt_box.astype(np.float32)
208 |
209 | height = box[:, 2] - box[:, 0]
210 | width = box[:, 3] - box[:, 1]
211 | center_y = box[:, 0] + 0.5 * height
212 | center_x = box[:, 1] + 0.5 * width
213 |
214 | gt_height = gt_box[:, 2] - gt_box[:, 0]
215 | gt_width = gt_box[:, 3] - gt_box[:, 1]
216 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height
217 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width
218 |
219 | dy = (gt_center_y - center_y) / height
220 | dx = (gt_center_x - center_x) / width
221 | dh = np.log(gt_height / height)
222 | dw = np.log(gt_width / width)
223 |
224 | return np.stack([dy, dx, dh, dw], axis=1)
225 |
226 |
227 | ############################################################
228 | # Dataset
229 | ############################################################
230 |
231 | class Dataset(object):
232 | """The base class for dataset classes.
233 | To use it, create a new class that adds functions specific to the dataset
234 | you want to use. For example:
235 |
236 | class CatsAndDogsDataset(Dataset):
237 | def load_cats_and_dogs(self):
238 | ...
239 | def load_mask(self, image_id):
240 | ...
241 | def image_reference(self, image_id):
242 | ...
243 |
244 | See COCODataset and ShapesDataset as examples.
245 | """
246 |
247 | def __init__(self, class_map=None):
248 | self._image_ids = []
249 | self.image_info = []
250 | # Background is always the first class
251 | self.class_info = [{"source": "", "id": 0, "name": "BG"}]
252 | self.source_class_ids = {}
253 |
254 | def add_class(self, source, class_id, class_name):
255 | assert "." not in source, "Source name cannot contain a dot"
256 | # Does the class exist already?
257 | for info in self.class_info:
258 | if info['source'] == source and info["id"] == class_id:
259 | # source.class_id combination already available, skip
260 | return
261 | # Add the class
262 | self.class_info.append({
263 | "source": source,
264 | "id": class_id,
265 | "name": class_name,
266 | })
267 |
268 | def add_image(self, source, image_id, path, **kwargs):
269 | image_info = {
270 | "id": image_id,
271 | "source": source,
272 | "path": path,
273 | }
274 | image_info.update(kwargs)
275 | self.image_info.append(image_info)
276 |
277 | def image_reference(self, image_id):
278 | """Return a link to the image in its source Website or details about
279 | the image that help looking it up or debugging it.
280 |
281 | Override for your dataset, but pass to this function
282 | if you encounter images not in your dataset.
283 | """
284 | return ""
285 |
286 | def prepare(self, class_map=None):
287 | """Prepares the Dataset class for use.
288 |
289 | TODO: class map is not supported yet. When done, it should handle mapping
290 | classes from different datasets to the same class ID.
291 | """
292 |
293 | def clean_name(name):
294 | """Returns a shorter version of object names for cleaner display."""
295 | return ",".join(name.split(",")[:1])
296 |
297 | # Build (or rebuild) everything else from the info dicts.
298 | self.num_classes = len(self.class_info)
299 | self.class_ids = np.arange(self.num_classes)
300 | self.class_names = [clean_name(c["name"]) for c in self.class_info]
301 | self.num_images = len(self.image_info)
302 | self._image_ids = np.arange(self.num_images)
303 |
304 | self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
305 | for info, id in zip(self.class_info, self.class_ids)}
306 |
307 | # Map sources to class_ids they support
308 | self.sources = list(set([i['source'] for i in self.class_info]))
309 | self.source_class_ids = {}
310 | # Loop over datasets
311 | for source in self.sources:
312 | self.source_class_ids[source] = []
313 | # Find classes that belong to this dataset
314 | for i, info in enumerate(self.class_info):
315 | # Include BG class in all datasets
316 | if i == 0 or source == info['source']:
317 | self.source_class_ids[source].append(i)
318 |
319 | def map_source_class_id(self, source_class_id):
320 | """Takes a source class ID and returns the int class ID assigned to it.
321 |
322 | For example:
323 | dataset.map_source_class_id("coco.12") -> 23
324 | """
325 | return self.class_from_source_map[source_class_id]
326 |
327 | def get_source_class_id(self, class_id, source):
328 | """Map an internal class ID to the corresponding class ID in the source dataset."""
329 | info = self.class_info[class_id]
330 | assert info['source'] == source
331 | return info['id']
332 |
333 | def append_data(self, class_info, image_info):
334 | self.external_to_class_id = {}
335 | for i, c in enumerate(self.class_info):
336 | for ds, id in c["map"]:
337 | self.external_to_class_id[ds + str(id)] = i
338 |
339 | # Map external image IDs to internal ones.
340 | self.external_to_image_id = {}
341 | for i, info in enumerate(self.image_info):
342 | self.external_to_image_id[info["ds"] + str(info["id"])] = i
343 |
344 | @property
345 | def image_ids(self):
346 | return self._image_ids
347 |
348 | def source_image_link(self, image_id):
349 | """Returns the path or URL to the image.
350 | Override this to return a URL to the image if it's availble online for easy
351 | debugging.
352 | """
353 | return self.image_info[image_id]["path"]
354 |
355 | def load_image(self, image_id):
356 | """Load the specified image and return a [H,W,3] Numpy array.
357 | """
358 | # Load image
359 | image = skimage.io.imread(self.image_info[image_id]['path'])
360 | # If grayscale. Convert to RGB for consistency.
361 | if image.ndim != 3:
362 | image = skimage.color.gray2rgb(image)
363 | return image
364 |
365 | def load_mask(self, image_id):
366 | """Load instance masks for the given image.
367 |
368 | Different datasets use different ways to store masks. Override this
369 | method to load instance masks and return them in the form of am
370 | array of binary masks of shape [height, width, instances].
371 |
372 | Returns:
373 | masks: A bool array of shape [height, width, instance count] with
374 | a binary mask per instance.
375 | class_ids: a 1D array of class IDs of the instance masks.
376 | """
377 | # Override this function to load a mask from your dataset.
378 | # Otherwise, it returns an empty mask.
379 | mask = np.empty([0, 0, 0])
380 | class_ids = np.empty([0], np.int32)
381 | return mask, class_ids
382 |
383 |
384 | def resize_image(image, min_dim=None, max_dim=None, padding=False):
385 | """
386 | Resizes an image keeping the aspect ratio.
387 |
388 | min_dim: if provided, resizes the image such that it's smaller
389 | dimension == min_dim
390 | max_dim: if provided, ensures that the image longest side doesn't
391 | exceed this value.
392 | padding: If true, pads image with zeros so it's size is max_dim x max_dim
393 |
394 | Returns:
395 | image: the resized image
396 | window: (y1, x1, y2, x2). If max_dim is provided, padding might
397 | be inserted in the returned image. If so, this window is the
398 | coordinates of the image part of the full image (excluding
399 | the padding). The x2, y2 pixels are not included.
400 | scale: The scale factor used to resize the image
401 | padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
402 | """
403 | # Default window (y1, x1, y2, x2) and default scale == 1.
404 | h, w = image.shape[:2]
405 | window = (0, 0, h, w)
406 | scale = 1
407 |
408 | # Scale?
409 | if min_dim:
410 | # Scale up but not down
411 | scale = max(1, min_dim / min(h, w))
412 | # Does it exceed max dim?
413 | if max_dim:
414 | image_max = max(h, w)
415 | if round(image_max * scale) > max_dim:
416 | scale = max_dim / image_max
417 | # Resize image and mask
418 | if scale != 1:
419 | image = scipy.misc.imresize(
420 | image, (round(h * scale), round(w * scale)))
421 | # Need padding?
422 | if padding:
423 | # Get new height and width
424 | h, w = image.shape[:2]
425 | top_pad = (max_dim - h) // 2
426 | bottom_pad = max_dim - h - top_pad
427 | left_pad = (max_dim - w) // 2
428 | right_pad = max_dim - w - left_pad
429 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
430 | image = np.pad(image, padding, mode='constant', constant_values=0)
431 | window = (top_pad, left_pad, h + top_pad, w + left_pad)
432 | return image, window, scale, padding
433 |
434 |
435 | def resize_mask(mask, scale, padding):
436 | """Resizes a mask using the given scale and padding.
437 | Typically, you get the scale and padding from resize_image() to
438 | ensure both, the image and the mask, are resized consistently.
439 |
440 | scale: mask scaling factor
441 | padding: Padding to add to the mask in the form
442 | [(top, bottom), (left, right), (0, 0)]
443 | """
444 | h, w = mask.shape[:2]
445 | mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
446 | mask = np.pad(mask, padding, mode='constant', constant_values=0)
447 | return mask
448 |
449 |
450 | def minimize_mask(bbox, mask, mini_shape):
451 | """Resize masks to a smaller version to cut memory load.
452 | Mini-masks can then resized back to image scale using expand_masks()
453 |
454 | See inspect_data.ipynb notebook for more details.
455 | """
456 | mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
457 | for i in range(mask.shape[-1]):
458 | m = mask[:, :, i]
459 | y1, x1, y2, x2 = bbox[i][:4]
460 | m = m[y1:y2, x1:x2]
461 | if m.size == 0:
462 | raise Exception("Invalid bounding box with area of zero")
463 | m = scipy.misc.imresize(m.astype(float), mini_shape, interp='bilinear')
464 | mini_mask[:, :, i] = np.where(m >= 128, 1, 0)
465 | return mini_mask
466 |
467 |
468 | def expand_mask(bbox, mini_mask, image_shape):
469 | """Resizes mini masks back to image size. Reverses the change
470 | of minimize_mask().
471 |
472 | See inspect_data.ipynb notebook for more details.
473 | """
474 | mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
475 | for i in range(mask.shape[-1]):
476 | m = mini_mask[:, :, i]
477 | y1, x1, y2, x2 = bbox[i][:4]
478 | h = y2 - y1
479 | w = x2 - x1
480 | m = scipy.misc.imresize(m.astype(float), (h, w), interp='bilinear')
481 | mask[y1:y2, x1:x2, i] = np.where(m >= 128, 1, 0)
482 | return mask
483 |
484 |
485 | # TODO: Build and use this function to reduce code duplication
486 | def mold_mask(mask, config):
487 | pass
488 |
489 |
490 | def unmold_mask(mask, bbox, image_shape):
491 | """Converts a mask generated by the neural network into a format similar
492 | to it's original shape.
493 | mask: [height, width] of type float. A small, typically 28x28 mask.
494 | bbox: [y1, x1, y2, x2]. The box to fit the mask in.
495 |
496 | Returns a binary mask with the same size as the original image.
497 | """
498 | threshold = 0.5
499 | y1, x1, y2, x2 = bbox
500 | mask = scipy.misc.imresize(
501 | mask, (y2 - y1, x2 - x1), interp='bilinear').astype(np.float32) / 255.0
502 | mask = np.where(mask >= threshold, 1, 0).astype(np.uint8)
503 |
504 | # Put the mask in the right location.
505 | full_mask = np.zeros(image_shape[:2], dtype=np.uint8)
506 | full_mask[y1:y2, x1:x2] = mask
507 | return full_mask
508 |
509 |
510 | ############################################################
511 | # Anchors
512 | ############################################################
513 |
514 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
515 | """
516 | scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
517 | ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
518 | shape: [height, width] spatial shape of the feature map over which
519 | to generate anchors.
520 | feature_stride: Stride of the feature map relative to the image in pixels.
521 | anchor_stride: Stride of anchors on the feature map. For example, if the
522 | value is 2 then generate anchors for every other feature map pixel.
523 | """
524 | # Get all combinations of scales and ratios
525 | scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
526 | scales = scales.flatten()
527 | ratios = ratios.flatten()
528 |
529 | # Enumerate heights and widths from scales and ratios
530 | heights = scales / np.sqrt(ratios)
531 | widths = scales * np.sqrt(ratios)
532 |
533 | # Enumerate shifts in feature space
534 | shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
535 | shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
536 | shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
537 |
538 | # Enumerate combinations of shifts, widths, and heights
539 | box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
540 | box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
541 |
542 | # Reshape to get a list of (y, x) and a list of (h, w)
543 | box_centers = np.stack(
544 | [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
545 | box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
546 |
547 | # Convert to corner coordinates (y1, x1, y2, x2)
548 | boxes = np.concatenate([box_centers - 0.5 * box_sizes,
549 | box_centers + 0.5 * box_sizes], axis=1)
550 | return boxes
551 |
552 |
553 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
554 | anchor_stride):
555 | """Generate anchors at different levels of a feature pyramid. Each scale
556 | is associated with a level of the pyramid, but each ratio is used in
557 | all levels of the pyramid.
558 |
559 | Returns:
560 | anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
561 | with the same order of the given scales. So, anchors of scale[0] come
562 | first, then anchors of scale[1], and so on.
563 | """
564 | # Anchors
565 | # [anchor_count, (y1, x1, y2, x2)]
566 | anchors = []
567 | for i in range(len(scales)):
568 | anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
569 | feature_strides[i], anchor_stride))
570 | return np.concatenate(anchors, axis=0)
571 |
572 |
573 | ############################################################
574 | # Miscellaneous
575 | ############################################################
576 |
577 | def trim_zeros(x):
578 | """It's common to have tensors larger than the available data and
579 | pad with zeros. This function removes rows that are all zeros.
580 |
581 | x: [rows, columns].
582 | """
583 | assert len(x.shape) == 2
584 | return x[~np.all(x == 0, axis=1)]
585 |
586 |
587 | def compute_ap(gt_boxes, gt_class_ids, gt_masks,
588 | pred_boxes, pred_class_ids, pred_scores, pred_masks,
589 | iou_threshold=0.5):
590 | """Compute Average Precision at a set IoU threshold (default 0.5).
591 |
592 | Returns:
593 | mAP: Mean Average Precision
594 | precisions: List of precisions at different class score thresholds.
595 | recalls: List of recall values at different class score thresholds.
596 | overlaps: [pred_boxes, gt_boxes] IoU overlaps.
597 | """
598 | # Trim zero padding and sort predictions by score from high to low
599 | # TODO: cleaner to do zero unpadding upstream
600 | gt_boxes = trim_zeros(gt_boxes)
601 | gt_masks = gt_masks[..., :gt_boxes.shape[0]]
602 | pred_boxes = trim_zeros(pred_boxes)
603 | pred_scores = pred_scores[:pred_boxes.shape[0]]
604 | indices = np.argsort(pred_scores)[::-1]
605 | pred_boxes = pred_boxes[indices]
606 | pred_class_ids = pred_class_ids[indices]
607 | pred_scores = pred_scores[indices]
608 | pred_masks = pred_masks[..., indices]
609 |
610 | # Compute IoU overlaps [pred_masks, gt_masks]
611 | overlaps = compute_overlaps_masks(pred_masks, gt_masks)
612 |
613 | # Loop through ground truth boxes and find matching predictions
614 | match_count = 0
615 | pred_match = np.zeros([pred_boxes.shape[0]])
616 | gt_match = np.zeros([gt_boxes.shape[0]])
617 | for i in range(len(pred_boxes)):
618 | # Find best matching ground truth box
619 | sorted_ixs = np.argsort(overlaps[i])[::-1]
620 | for j in sorted_ixs:
621 | # If ground truth box is already matched, go to next one
622 | if gt_match[j] == 1:
623 | continue
624 | # If we reach IoU smaller than the threshold, end the loop
625 | iou = overlaps[i, j]
626 | if iou < iou_threshold:
627 | break
628 | # Do we have a match?
629 | if pred_class_ids[i] == gt_class_ids[j]:
630 | match_count += 1
631 | gt_match[j] = 1
632 | pred_match[i] = 1
633 | break
634 |
635 | # Compute precision and recall at each prediction box step
636 | precisions = np.cumsum(pred_match) / (np.arange(len(pred_match)) + 1)
637 | recalls = np.cumsum(pred_match).astype(np.float32) / len(gt_match)
638 |
639 | # Pad with start and end values to simplify the math
640 | precisions = np.concatenate([[0], precisions, [0]])
641 | recalls = np.concatenate([[0], recalls, [1]])
642 |
643 | # Ensure precision values decrease but don't increase. This way, the
644 | # precision value at each recall threshold is the maximum it can be
645 | # for all following recall thresholds, as specified by the VOC paper.
646 | for i in range(len(precisions) - 2, -1, -1):
647 | precisions[i] = np.maximum(precisions[i], precisions[i + 1])
648 |
649 | # Compute mean AP over recall range
650 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
651 | mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
652 | precisions[indices])
653 |
654 | return mAP, precisions, recalls, overlaps
655 |
656 |
657 | def compute_recall(pred_boxes, gt_boxes, iou):
658 | """Compute the recall at the given IoU threshold. It's an indication
659 | of how many GT boxes were found by the given prediction boxes.
660 |
661 | pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
662 | gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
663 | """
664 | # Measure overlaps
665 | overlaps = compute_overlaps(pred_boxes, gt_boxes)
666 | iou_max = np.max(overlaps, axis=1)
667 | iou_argmax = np.argmax(overlaps, axis=1)
668 | positive_ids = np.where(iou_max >= iou)[0]
669 | matched_gt_boxes = iou_argmax[positive_ids]
670 |
671 | recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
672 | return recall, positive_ids
673 |
674 |
675 | # ## Batch Slicing
676 | # Some custom layers support a batch size of 1 only, and require a lot of work
677 | # to support batches greater than 1. This function slices an input tensor
678 | # across the batch dimension and feeds batches of size 1. Effectively,
679 | # an easy way to support batches > 1 quickly with little code modification.
680 | # In the long run, it's more efficient to modify the code to support large
681 | # batches and getting rid of this function. Consider this a temporary solution
682 | def batch_slice(inputs, graph_fn, batch_size, names=None):
683 | """Splits inputs into slices and feeds each slice to a copy of the given
684 | computation graph and then combines the results. It allows you to run a
685 | graph on a batch of inputs even if the graph is written to support one
686 | instance only.
687 |
688 | inputs: list of tensors. All must have the same first dimension length
689 | graph_fn: A function that returns a TF tensor that's part of a graph.
690 | batch_size: number of slices to divide the data into.
691 | names: If provided, assigns names to the resulting tensors.
692 | """
693 | if not isinstance(inputs, list):
694 | inputs = [inputs]
695 |
696 | outputs = []
697 | for i in range(batch_size):
698 | inputs_slice = [x[i] for x in inputs]
699 | output_slice = graph_fn(*inputs_slice)
700 | if not isinstance(output_slice, (tuple, list)):
701 | output_slice = [output_slice]
702 | outputs.append(output_slice)
703 | # Change outputs from a list of slices where each is
704 | # a list of outputs to a list of outputs and each has
705 | # a list of slices
706 | outputs = list(zip(*outputs))
707 |
708 | if names is None:
709 | names = [None] * len(outputs)
710 |
711 | result = [tf.stack(o, axis=0, name=n)
712 | for o, n in zip(outputs, names)]
713 | if len(result) == 1:
714 | result = result[0]
715 |
716 | return result
717 |
718 |
719 | def download_trained_weights(coco_model_path, verbose=1):
720 | """Download COCO trained weights from Releases.
721 |
722 | coco_model_path: local path of COCO trained weights
723 | """
724 | if verbose > 0:
725 | print("Downloading pretrained model to " + coco_model_path + " ...")
726 | with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
727 | shutil.copyfileobj(resp, out)
728 | if verbose > 0:
729 | print("... done downloading pretrained model!")
730 |
--------------------------------------------------------------------------------