├── bags └── .placeholder ├── nodes ├── __init__.py ├── coco.pyc ├── config.pyc ├── model.pyc ├── utils.pyc ├── visualize.pyc ├── __pycache__ │ ├── coco.cpython-36.pyc │ ├── config.cpython-36.pyc │ ├── model.cpython-36.pyc │ ├── utils.cpython-36.pyc │ └── visualize.cpython-36.pyc ├── config.py ├── parallel_model.py ├── mask_rcnn_node ├── shapes.py ├── visualize.py ├── coco.py └── utils.py ├── src └── mask_rcnn_ros │ ├── __init__.py │ ├── config.py │ ├── parallel_model.py │ ├── shapes.py │ ├── visualize.py │ ├── coco.py │ └── utils.py ├── doc ├── mask_r-cnn_1.png └── mask_r-cnn_2.png ├── scripts └── download_freiburg3_rgbd_example_bag.sh ├── requirements.txt ├── setup.py ├── msg └── Result.msg ├── launch └── freiburg3_rgbd_example.launch ├── package.xml ├── CMakeLists.txt ├── .gitignore ├── LICENSE ├── LICENSE.Mask_R-CNN ├── README.md └── rviz └── mask_rcnn_ros.rviz /bags/.placeholder: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nodes/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/mask_rcnn_ros/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nodes/coco.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/coco.pyc -------------------------------------------------------------------------------- /nodes/config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/config.pyc -------------------------------------------------------------------------------- /nodes/model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/model.pyc -------------------------------------------------------------------------------- /nodes/utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/utils.pyc -------------------------------------------------------------------------------- /nodes/visualize.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/visualize.pyc -------------------------------------------------------------------------------- /doc/mask_r-cnn_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/doc/mask_r-cnn_1.png -------------------------------------------------------------------------------- /doc/mask_r-cnn_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/doc/mask_r-cnn_2.png -------------------------------------------------------------------------------- /nodes/__pycache__/coco.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/__pycache__/coco.cpython-36.pyc -------------------------------------------------------------------------------- /nodes/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /nodes/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /nodes/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /nodes/__pycache__/visualize.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qixuxiang/mask_rcnn_ros/HEAD/nodes/__pycache__/visualize.cpython-36.pyc -------------------------------------------------------------------------------- /scripts/download_freiburg3_rgbd_example_bag.sh: -------------------------------------------------------------------------------- 1 | !/bin/sh 2 | wget https://vision.in.tum.de/rgbd/dataset/freiburg3/rgbd_dataset_freiburg3_long_office_household.bag -P bags 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | h5py==2.7.0 2 | Keras==2.1.2 3 | numpy==1.13.3 4 | opencv-python==3.4.0.12 5 | scikit-image==0.13.0 6 | scikit-learn==0.19.1 7 | scipy==0.19.1 8 | matplotlib==2.2.3 9 | tensorflow-gpu==1.4.1 10 | ipython==5.2.0 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | ## ! DO NOT MANUALLY INVOKE THIS setup.py, USE CATKIN INSTEAD 2 | 3 | from distutils.core import setup 4 | from catkin_pkg.python_setup import generate_distutils_setup 5 | 6 | # fetch values from package.xml 7 | setup_args = generate_distutils_setup( 8 | packages=['mask_rcnn_ros',], 9 | package_dir={'': 'src'}) 10 | 11 | setup(**setup_args) 12 | -------------------------------------------------------------------------------- /msg/Result.msg: -------------------------------------------------------------------------------- 1 | std_msgs/Header header 2 | 3 | # Bounding boxes in pixels 4 | sensor_msgs/RegionOfInterest[] boxes 5 | 6 | # Integer class IDs for each bounding box 7 | int32[] class_ids 8 | 9 | # String class IDs for each bouding box 10 | string[] class_names 11 | 12 | # Float probability scores of the class_id 13 | float32[] scores 14 | 15 | # Instance masks as Image 16 | sensor_msgs/Image[] masks 17 | 18 | -------------------------------------------------------------------------------- /launch/freiburg3_rgbd_example.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | mask_rcnn_ros 4 | 0.1.0 5 | The Mask R-CNN for ROS 6 | 7 | Akio Ochiai 8 | 9 | MIT 10 | 11 | http://wiki.ros.org/mask_rcnn_ros 12 | 13 | Akio Ochiai 14 | Matterport, Inc. 15 | 16 | catkin 17 | message_generation 18 | 19 | rospy 20 | message_runtime 21 | std_msgs 22 | sensor_msgs 23 | cv_bridge 24 | vision_opencv 25 | 26 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.3) 2 | project(mask_rcnn_ros) 3 | 4 | find_package(catkin REQUIRED COMPONENTS std_msgs sensor_msgs message_generation) 5 | 6 | catkin_python_setup() 7 | 8 | add_message_files( 9 | FILES 10 | Result.msg 11 | ) 12 | 13 | 14 | generate_messages( 15 | DEPENDENCIES std_msgs sensor_msgs 16 | ) 17 | 18 | 19 | 20 | catkin_package(CATKIN_DEPENDS message_runtime) 21 | 22 | ############# 23 | ## Install ## 24 | ############# 25 | 26 | install(PROGRAMS 27 | nodes/mask_rcnn_node 28 | DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 29 | ) 30 | 31 | install(DIRECTORY 32 | msg 33 | DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} 34 | ) 35 | 36 | 37 | ############# 38 | ## Testing ## 39 | ############# 40 | 41 | 42 | ## Add folders to be run by python nosetests 43 | # catkin_add_nosetests(test) 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | devel/ 2 | logs/ 3 | build/ 4 | bin/ 5 | lib/ 6 | msg_gen/ 7 | srv_gen/ 8 | msg/*Action.msg 9 | msg/*ActionFeedback.msg 10 | msg/*ActionGoal.msg 11 | msg/*ActionResult.msg 12 | msg/*Feedback.msg 13 | msg/*Goal.msg 14 | msg/*Result.msg 15 | msg/_*.py 16 | build_isolated/ 17 | devel_isolated/ 18 | src/CMakeLists.txt 19 | .catkin_workspace 20 | result/data/*.txt 21 | result/data/*.csv 22 | src/cmake-build-debug/ 23 | src/.idea/ 24 | src/multisensor/cmake-build-debug/ 25 | src/multisensor/.idea/ 26 | .vscode/ 27 | 28 | 29 | # Generated by dynamic reconfigure 30 | *.cfgc 31 | /cfg/cpp/ 32 | /cfg/*.py 33 | 34 | # Ignore generated docs 35 | *.dox 36 | *.wikidoc 37 | 38 | # eclipse stuff 39 | .project 40 | .cproject 41 | 42 | # qcreator stuff 43 | CMakeLists.txt.user 44 | 45 | srv/_*.py 46 | *.pcd 47 | *.pyc 48 | qtcreator-* 49 | *.user 50 | 51 | /planning/cfg 52 | /planning/docs 53 | /planning/src 54 | 55 | *~ 56 | 57 | # Emacs 58 | .#* 59 | 60 | # Catkin custom files 61 | CATKIN_IGNORE 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | mask_rcnn_ros 2 | 3 | The MIT License (MIT) 4 | 5 | Copyright (c) 2017 Akio Ochiai, Inc. 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /LICENSE.Mask_R-CNN: -------------------------------------------------------------------------------- 1 | Mask R-CNN 2 | 3 | The MIT License (MIT) 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The ROS Package of Mask R-CNN for Object Detection and Segmentation 2 | 3 | This is a ROS package of [Mask R-CNN](https://arxiv.org/abs/1703.06870) algorithm for object detection and segmentation. 4 | 5 | The package contains ROS node of Mask R-CNN with topic-based ROS interface. 6 | 7 | Most of core algorithm code was based on [Mask R-CNN implementation by Matterport, Inc. ](https://github.com/matterport/Mask_RCNN) 8 | 9 | ## Training 10 | 11 | This repository doesn't contain code for training Mask R-CNN network model. 12 | If you want to train the model on your own class definition or dataset, try it on [the upstream reposity](https://github.com/matterport/Mask_RCNN) and give the result weight to `model_path` parameter. 13 | 14 | 15 | ## Requirements 16 | * ROS Indigo/kinetic 17 | * TensorFlow 1.3+ 18 | * Keras 2.0.8+ 19 | * Numpy, skimage, scipy, Pillow, cython, h5py 20 | * I only test code on Python 2.7, it may work on Python3.X. 21 | * see more dependency and version details in [requirements.txt](https://github.com/qixuxiang/mask_rcnn_ros/blob/master/requirements.txt) 22 | 23 | ## ROS Interfaces 24 | 25 | ### Parameters 26 | 27 | * `~model_path: string` 28 | 29 | Path to the HDF5 model file. 30 | If the model_path is default value and the file doesn't exist, the node automatically downloads the file. 31 | 32 | Default: `$ROS_HOME/mask_rcnn_coco.h5` 33 | 34 | * `~visualization: bool` 35 | 36 | If true, the node publish visualized images to `~visualization` topic. 37 | Default: `true` 38 | 39 | * `~class_names: string[]` 40 | 41 | Class names to be treated as detection targets. 42 | Default: All MS COCO classes. 43 | 44 | ### Topics Published 45 | 46 | * `~result: mask_rcnn_ros/Result` 47 | 48 | Result of detection. See also `Result.msg` for detailed description. 49 | 50 | * `~visualization: sensor_mgs/Image` 51 | 52 | Visualized result over an input image. 53 | 54 | 55 | ### Topics Subscribed 56 | 57 | * `~input: sensor_msgs/Image` 58 | 59 | Input image to be proccessed 60 | 61 | ## Getting Started 62 | 63 | 1. Clone this repository to your catkin workspace, build workspace and source devel environment 64 | ``` 65 | $ cd ~/.catkin_ws/src 66 | $ git clone https://github.com/qixuxiang/mask_rcnn_ros.git 67 | $ cd mask_rcnn_ros 68 | $ python2 -m pip install --upgrade pip 69 | $ python2 -m pip install -r requirements.txt 70 | $ cd ../.. 71 | $ catkin_make 72 | $ source devel/setup.bash 73 | 74 | ``` 75 | 76 | 2. Run mask_rcnn node 77 | ~~~bash 78 | $ rosrun mask_rcnn_ros mask_rcnn_node 79 | ~~~ 80 | 81 | ## Example 82 | 83 | There is a simple example launch file using [RGB-D SLAM Dataset](https://vision.in.tum.de/data/datasets/rgbd-dataset/download). 84 | 85 | ~~~bash 86 | $ sudo chmod 777 scripts/download_freiburg3_rgbd_example_bag.sh 87 | $ ./scripts/download_freiburg3_rgbd_example_bag.sh 88 | $ roslaunch mask_rcnn_ros freiburg3_rgbd_example.launch 89 | ~~~ 90 | 91 | Then RViz window will appear and show result like following: 92 | 93 | ![example1](doc/mask_r-cnn_1.png) 94 | 95 | ![example2](doc/mask_r-cnn_2.png) 96 | 97 | ## Other issue 98 | 99 | * If you have installed Anaconda|Python, Please delete or comment `export PATH=/home/soft/conda3/bin:$PATH` in you `~/.bashrc` file. 100 | 101 | * When you run the code, please wait for a moment for the result because there will be delay when play bag file and process the images. 102 | 103 | * Welcome to submit any issue if you have problems, and add your software system information details, such as Ubuntu 16/14,ROS Indigo/Kinetic, Python2/Python3, Tensorflow 1.4,etc.. 104 | -------------------------------------------------------------------------------- /rviz/mask_rcnn_ros.rviz: -------------------------------------------------------------------------------- 1 | Panels: 2 | - Class: rviz/Displays 3 | Help Height: 0 4 | Name: Displays 5 | Property Tree Widget: 6 | Expanded: 7 | - /Global Options1 8 | - /Status1 9 | - /Image1 10 | - /Image2 11 | Splitter Ratio: 0.755813956 12 | Tree Height: 614 13 | - Class: rviz/Selection 14 | Name: Selection 15 | - Class: rviz/Tool Properties 16 | Expanded: 17 | - /2D Pose Estimate1 18 | - /2D Nav Goal1 19 | - /Publish Point1 20 | Name: Tool Properties 21 | Splitter Ratio: 0.588679016 22 | - Class: rviz/Views 23 | Expanded: 24 | - /Current View1 25 | Name: Views 26 | Splitter Ratio: 0.5 27 | - Class: rviz/Time 28 | Experimental: false 29 | Name: Time 30 | SyncMode: 0 31 | SyncSource: Image 32 | Visualization Manager: 33 | Class: "" 34 | Displays: 35 | - Alpha: 0.5 36 | Cell Size: 1 37 | Class: rviz/Grid 38 | Color: 160; 160; 164 39 | Enabled: true 40 | Line Style: 41 | Line Width: 0.0299999993 42 | Value: Lines 43 | Name: Grid 44 | Normal Cell Count: 0 45 | Offset: 46 | X: 0 47 | Y: 0 48 | Z: 0 49 | Plane: XY 50 | Plane Cell Count: 10 51 | Reference Frame: 52 | Value: true 53 | - Class: rviz/Image 54 | Enabled: true 55 | Image Topic: /camera/rgb/image_color 56 | Max Value: 1 57 | Median window: 5 58 | Min Value: 0 59 | Name: Image 60 | Normalize Range: true 61 | Queue Size: 2 62 | Transport Hint: raw 63 | Unreliable: false 64 | Value: true 65 | - Class: rviz/Image 66 | Enabled: true 67 | Image Topic: /mask_rcnn/visualization 68 | Max Value: 1 69 | Median window: 5 70 | Min Value: 0 71 | Name: Image 72 | Normalize Range: true 73 | Queue Size: 2 74 | Transport Hint: raw 75 | Unreliable: false 76 | Value: true 77 | Enabled: true 78 | Global Options: 79 | Background Color: 48; 48; 48 80 | Fixed Frame: world 81 | Frame Rate: 30 82 | Name: root 83 | Tools: 84 | - Class: rviz/Interact 85 | Hide Inactive Objects: true 86 | - Class: rviz/MoveCamera 87 | - Class: rviz/Select 88 | - Class: rviz/FocusCamera 89 | - Class: rviz/Measure 90 | - Class: rviz/SetInitialPose 91 | Topic: /initialpose 92 | - Class: rviz/SetGoal 93 | Topic: /move_base_simple/goal 94 | - Class: rviz/PublishPoint 95 | Single click: true 96 | Topic: /clicked_point 97 | Value: true 98 | Views: 99 | Current: 100 | Class: rviz/Orbit 101 | Distance: 3.30293489 102 | Enable Stereo Rendering: 103 | Stereo Eye Separation: 0.0599999987 104 | Stereo Focal Distance: 1 105 | Swap Stereo Eyes: false 106 | Value: false 107 | Focal Point: 108 | X: 0.919049203 109 | Y: 0.11560297 110 | Z: 0.632362902 111 | Focal Shape Fixed Size: true 112 | Focal Shape Size: 0.0500000007 113 | Invert Z Axis: false 114 | Name: Current View 115 | Near Clip Distance: 0.00999999978 116 | Pitch: 0.185397774 117 | Target Frame: 118 | Value: Orbit (rviz) 119 | Yaw: 5.57856464 120 | Saved: ~ 121 | Window Geometry: 122 | Displays: 123 | collapsed: false 124 | Height: 817 125 | Hide Left Dock: false 126 | Hide Right Dock: false 127 | Image: 128 | collapsed: false 129 | QMainWindow State: 000000ff00000000fd00000004000000000000016a000002a7fc0200000009fb0000001200530065006c0065006300740069006f006e00000001e10000009b0000006400fffffffb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001ed000001df00000185000000a3fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb000000100044006900730070006c0061007900730100000028000002a7000000dd00fffffffb0000000a0056006900650077007300000001ba000000f3000000b000fffffffb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c000002610000000100000216000002a7fc0200000005fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a0049006d0061006700650100000028000001430000001600fffffffb0000000a0049006d00610067006501000001710000015e0000001600fffffffb0000000a0049006d00610067006501000001fe000000af0000000000000000fb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e10000019700000003000004eb0000003efc0100000002fb0000000800540069006d00650100000000000004eb0000030000fffffffb0000000800540069006d006501000000000000045000000000000000000000015f000002a700000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000 130 | Selection: 131 | collapsed: false 132 | Time: 133 | collapsed: false 134 | Tool Properties: 135 | collapsed: false 136 | Views: 137 | collapsed: false 138 | Width: 1259 139 | X: 618 140 | Y: 126 141 | -------------------------------------------------------------------------------- /nodes/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Base Configurations class. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import math 11 | import numpy as np 12 | 13 | 14 | # Base Configuration Class 15 | # Don't use this class directly. Instead, sub-class it and override 16 | # the configurations you need to change. 17 | 18 | class Config(object): 19 | """Base configuration class. For custom configurations, create a 20 | sub-class that inherits from this one and override properties 21 | that need to be changed. 22 | """ 23 | # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. 24 | # Useful if your code needs to do things differently depending on which 25 | # experiment is running. 26 | NAME = None # Override in sub-classes 27 | 28 | # NUMBER OF GPUs to use. For CPU training, use 1 29 | GPU_COUNT = 1 30 | 31 | # Number of images to train with on each GPU. A 12GB GPU can typically 32 | # handle 2 images of 1024x1024px. 33 | # Adjust based on your GPU memory and image sizes. Use the highest 34 | # number that your GPU can handle for best performance. 35 | IMAGES_PER_GPU = 2 36 | 37 | # Number of training steps per epoch 38 | # This doesn't need to match the size of the training set. Tensorboard 39 | # updates are saved at the end of each epoch, so setting this to a 40 | # smaller number means getting more frequent TensorBoard updates. 41 | # Validation stats are also calculated at each epoch end and they 42 | # might take a while, so don't set this too small to avoid spending 43 | # a lot of time on validation stats. 44 | STEPS_PER_EPOCH = 1000 45 | 46 | # Number of validation steps to run at the end of every training epoch. 47 | # A bigger number improves accuracy of validation stats, but slows 48 | # down the training. 49 | VALIDATION_STEPS = 50 50 | 51 | # The strides of each layer of the FPN Pyramid. These values 52 | # are based on a Resnet101 backbone. 53 | BACKBONE_STRIDES = [4, 8, 16, 32, 64] 54 | 55 | # Number of classification classes (including background) 56 | NUM_CLASSES = 1 # Override in sub-classes 57 | 58 | # Length of square anchor side in pixels 59 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) 60 | 61 | # Ratios of anchors at each cell (width/height) 62 | # A value of 1 represents a square anchor, and 0.5 is a wide anchor 63 | RPN_ANCHOR_RATIOS = [0.5, 1, 2] 64 | 65 | # Anchor stride 66 | # If 1 then anchors are created for each cell in the backbone feature map. 67 | # If 2, then anchors are created for every other cell, and so on. 68 | RPN_ANCHOR_STRIDE = 1 69 | 70 | # Non-max suppression threshold to filter RPN proposals. 71 | # You can reduce this during training to generate more propsals. 72 | RPN_NMS_THRESHOLD = 0.7 73 | 74 | # How many anchors per image to use for RPN training 75 | RPN_TRAIN_ANCHORS_PER_IMAGE = 256 76 | 77 | # ROIs kept after non-maximum supression (training and inference) 78 | POST_NMS_ROIS_TRAINING = 2000 79 | POST_NMS_ROIS_INFERENCE = 1000 80 | 81 | # If enabled, resizes instance masks to a smaller size to reduce 82 | # memory load. Recommended when using high-resolution images. 83 | USE_MINI_MASK = True 84 | MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask 85 | 86 | # Input image resing 87 | # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and 88 | # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't 89 | # be satisfied together the IMAGE_MAX_DIM is enforced. 90 | IMAGE_MIN_DIM = 800 91 | IMAGE_MAX_DIM = 1024 92 | # If True, pad images with zeros such that they're (max_dim by max_dim) 93 | IMAGE_PADDING = True # currently, the False option is not supported 94 | 95 | # Image mean (RGB) 96 | MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) 97 | 98 | # Number of ROIs per image to feed to classifier/mask heads 99 | # The Mask RCNN paper uses 512 but often the RPN doesn't generate 100 | # enough positive proposals to fill this and keep a positive:negative 101 | # ratio of 1:3. You can increase the number of proposals by adjusting 102 | # the RPN NMS threshold. 103 | TRAIN_ROIS_PER_IMAGE = 200 104 | 105 | # Percent of positive ROIs used to train classifier/mask heads 106 | ROI_POSITIVE_RATIO = 0.33 107 | 108 | # Pooled ROIs 109 | POOL_SIZE = 7 110 | MASK_POOL_SIZE = 14 111 | MASK_SHAPE = [28, 28] 112 | 113 | # Maximum number of ground truth instances to use in one image 114 | MAX_GT_INSTANCES = 100 115 | 116 | # Bounding box refinement standard deviation for RPN and final detections. 117 | RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 118 | BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 119 | 120 | # Max number of final detections 121 | DETECTION_MAX_INSTANCES = 100 122 | 123 | # Minimum probability value to accept a detected instance 124 | # ROIs below this threshold are skipped 125 | DETECTION_MIN_CONFIDENCE = 0.7 126 | 127 | # Non-maximum suppression threshold for detection 128 | DETECTION_NMS_THRESHOLD = 0.3 129 | 130 | # Learning rate and momentum 131 | # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes 132 | # weights to explode. Likely due to differences in optimzer 133 | # implementation. 134 | LEARNING_RATE = 0.001 135 | LEARNING_MOMENTUM = 0.9 136 | 137 | # Weight decay regularization 138 | WEIGHT_DECAY = 0.0001 139 | 140 | # Use RPN ROIs or externally generated ROIs for training 141 | # Keep this True for most situations. Set to False if you want to train 142 | # the head branches on ROI generated by code rather than the ROIs from 143 | # the RPN. For example, to debug the classifier head without having to 144 | # train the RPN. 145 | USE_RPN_ROIS = True 146 | 147 | def __init__(self): 148 | """Set values of computed attributes.""" 149 | # Effective batch size 150 | self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT 151 | 152 | # Input image size 153 | self.IMAGE_SHAPE = np.array( 154 | [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3]) 155 | 156 | # Compute backbone size from input image size 157 | self.BACKBONE_SHAPES = np.array( 158 | [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)), 159 | int(math.ceil(self.IMAGE_SHAPE[1] / stride))] 160 | for stride in self.BACKBONE_STRIDES]) 161 | 162 | def display(self): 163 | """Display Configuration values.""" 164 | print("\nConfigurations:") 165 | for a in dir(self): 166 | if not a.startswith("__") and not callable(getattr(self, a)): 167 | print("{:30} {}".format(a, getattr(self, a))) 168 | print("\n") 169 | -------------------------------------------------------------------------------- /src/mask_rcnn_ros/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Base Configurations class. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import math 11 | import numpy as np 12 | 13 | 14 | # Base Configuration Class 15 | # Don't use this class directly. Instead, sub-class it and override 16 | # the configurations you need to change. 17 | 18 | class Config(object): 19 | """Base configuration class. For custom configurations, create a 20 | sub-class that inherits from this one and override properties 21 | that need to be changed. 22 | """ 23 | # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. 24 | # Useful if your code needs to do things differently depending on which 25 | # experiment is running. 26 | NAME = None # Override in sub-classes 27 | 28 | # NUMBER OF GPUs to use. For CPU training, use 1 29 | GPU_COUNT = 1 30 | 31 | # Number of images to train with on each GPU. A 12GB GPU can typically 32 | # handle 2 images of 1024x1024px. 33 | # Adjust based on your GPU memory and image sizes. Use the highest 34 | # number that your GPU can handle for best performance. 35 | IMAGES_PER_GPU = 2 36 | 37 | # Number of training steps per epoch 38 | # This doesn't need to match the size of the training set. Tensorboard 39 | # updates are saved at the end of each epoch, so setting this to a 40 | # smaller number means getting more frequent TensorBoard updates. 41 | # Validation stats are also calculated at each epoch end and they 42 | # might take a while, so don't set this too small to avoid spending 43 | # a lot of time on validation stats. 44 | STEPS_PER_EPOCH = 1000 45 | 46 | # Number of validation steps to run at the end of every training epoch. 47 | # A bigger number improves accuracy of validation stats, but slows 48 | # down the training. 49 | VALIDATION_STEPS = 50 50 | 51 | # The strides of each layer of the FPN Pyramid. These values 52 | # are based on a Resnet101 backbone. 53 | BACKBONE_STRIDES = [4, 8, 16, 32, 64] 54 | 55 | # Number of classification classes (including background) 56 | NUM_CLASSES = 1 # Override in sub-classes 57 | 58 | # Length of square anchor side in pixels 59 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) 60 | 61 | # Ratios of anchors at each cell (width/height) 62 | # A value of 1 represents a square anchor, and 0.5 is a wide anchor 63 | RPN_ANCHOR_RATIOS = [0.5, 1, 2] 64 | 65 | # Anchor stride 66 | # If 1 then anchors are created for each cell in the backbone feature map. 67 | # If 2, then anchors are created for every other cell, and so on. 68 | RPN_ANCHOR_STRIDE = 1 69 | 70 | # Non-max suppression threshold to filter RPN proposals. 71 | # You can reduce this during training to generate more propsals. 72 | RPN_NMS_THRESHOLD = 0.7 73 | 74 | # How many anchors per image to use for RPN training 75 | RPN_TRAIN_ANCHORS_PER_IMAGE = 256 76 | 77 | # ROIs kept after non-maximum supression (training and inference) 78 | POST_NMS_ROIS_TRAINING = 2000 79 | POST_NMS_ROIS_INFERENCE = 1000 80 | 81 | # If enabled, resizes instance masks to a smaller size to reduce 82 | # memory load. Recommended when using high-resolution images. 83 | USE_MINI_MASK = True 84 | MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask 85 | 86 | # Input image resing 87 | # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and 88 | # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't 89 | # be satisfied together the IMAGE_MAX_DIM is enforced. 90 | IMAGE_MIN_DIM = 800 91 | IMAGE_MAX_DIM = 1024 92 | # If True, pad images with zeros such that they're (max_dim by max_dim) 93 | IMAGE_PADDING = True # currently, the False option is not supported 94 | 95 | # Image mean (RGB) 96 | MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) 97 | 98 | # Number of ROIs per image to feed to classifier/mask heads 99 | # The Mask RCNN paper uses 512 but often the RPN doesn't generate 100 | # enough positive proposals to fill this and keep a positive:negative 101 | # ratio of 1:3. You can increase the number of proposals by adjusting 102 | # the RPN NMS threshold. 103 | TRAIN_ROIS_PER_IMAGE = 200 104 | 105 | # Percent of positive ROIs used to train classifier/mask heads 106 | ROI_POSITIVE_RATIO = 0.33 107 | 108 | # Pooled ROIs 109 | POOL_SIZE = 7 110 | MASK_POOL_SIZE = 14 111 | MASK_SHAPE = [28, 28] 112 | 113 | # Maximum number of ground truth instances to use in one image 114 | MAX_GT_INSTANCES = 100 115 | 116 | # Bounding box refinement standard deviation for RPN and final detections. 117 | RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 118 | BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 119 | 120 | # Max number of final detections 121 | DETECTION_MAX_INSTANCES = 100 122 | 123 | # Minimum probability value to accept a detected instance 124 | # ROIs below this threshold are skipped 125 | DETECTION_MIN_CONFIDENCE = 0.7 126 | 127 | # Non-maximum suppression threshold for detection 128 | DETECTION_NMS_THRESHOLD = 0.3 129 | 130 | # Learning rate and momentum 131 | # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes 132 | # weights to explode. Likely due to differences in optimzer 133 | # implementation. 134 | LEARNING_RATE = 0.001 135 | LEARNING_MOMENTUM = 0.9 136 | 137 | # Weight decay regularization 138 | WEIGHT_DECAY = 0.0001 139 | 140 | # Use RPN ROIs or externally generated ROIs for training 141 | # Keep this True for most situations. Set to False if you want to train 142 | # the head branches on ROI generated by code rather than the ROIs from 143 | # the RPN. For example, to debug the classifier head without having to 144 | # train the RPN. 145 | USE_RPN_ROIS = True 146 | 147 | def __init__(self): 148 | """Set values of computed attributes.""" 149 | # Effective batch size 150 | self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT 151 | 152 | # Input image size 153 | self.IMAGE_SHAPE = np.array( 154 | [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3]) 155 | 156 | # Compute backbone size from input image size 157 | self.BACKBONE_SHAPES = np.array( 158 | [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)), 159 | int(math.ceil(self.IMAGE_SHAPE[1] / stride))] 160 | for stride in self.BACKBONE_STRIDES]) 161 | 162 | def display(self): 163 | """Display Configuration values.""" 164 | print("\nConfigurations:") 165 | for a in dir(self): 166 | if not a.startswith("__") and not callable(getattr(self, a)): 167 | print("{:30} {}".format(a, getattr(self, a))) 168 | print("\n") 169 | -------------------------------------------------------------------------------- /nodes/parallel_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Multi-GPU Support for Keras. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | Ideas and a small code snippets from these sources: 10 | https://github.com/fchollet/keras/issues/2436 11 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012 12 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/ 13 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py 14 | """ 15 | 16 | import tensorflow as tf 17 | import keras.backend as K 18 | import keras.layers as KL 19 | import keras.models as KM 20 | 21 | 22 | class ParallelModel(KM.Model): 23 | """Subclasses the standard Keras Model and adds multi-GPU support. 24 | It works by creating a copy of the model on each GPU. Then it slices 25 | the inputs and sends a slice to each copy of the model, and then 26 | merges the outputs together and applies the loss on the combined 27 | outputs. 28 | """ 29 | 30 | def __init__(self, keras_model, gpu_count): 31 | """Class constructor. 32 | keras_model: The Keras model to parallelize 33 | gpu_count: Number of GPUs. Must be > 1 34 | """ 35 | self.inner_model = keras_model 36 | self.gpu_count = gpu_count 37 | merged_outputs = self.make_parallel() 38 | super(ParallelModel, self).__init__(inputs=self.inner_model.inputs, 39 | outputs=merged_outputs) 40 | 41 | def __getattribute__(self, attrname): 42 | """Redirect loading and saving methods to the inner model. That's where 43 | the weights are stored.""" 44 | if 'load' in attrname or 'save' in attrname: 45 | return getattr(self.inner_model, attrname) 46 | return super(ParallelModel, self).__getattribute__(attrname) 47 | 48 | def summary(self, *args, **kwargs): 49 | """Override summary() to display summaries of both, the wrapper 50 | and inner models.""" 51 | super(ParallelModel, self).summary(*args, **kwargs) 52 | self.inner_model.summary(*args, **kwargs) 53 | 54 | def make_parallel(self): 55 | """Creates a new wrapper model that consists of multiple replicas of 56 | the original model placed on different GPUs. 57 | """ 58 | # Slice inputs. Slice inputs on the CPU to avoid sending a copy 59 | # of the full inputs to all GPUs. Saves on bandwidth and memory. 60 | input_slices = {name: tf.split(x, self.gpu_count) 61 | for name, x in zip(self.inner_model.input_names, 62 | self.inner_model.inputs)} 63 | 64 | output_names = self.inner_model.output_names 65 | outputs_all = [] 66 | for i in range(len(self.inner_model.outputs)): 67 | outputs_all.append([]) 68 | 69 | # Run the model call() on each GPU to place the ops there 70 | for i in range(self.gpu_count): 71 | with tf.device('/gpu:%d' % i): 72 | with tf.name_scope('tower_%d' % i): 73 | # Run a slice of inputs through this replica 74 | zipped_inputs = zip(self.inner_model.input_names, 75 | self.inner_model.inputs) 76 | inputs = [ 77 | KL.Lambda(lambda s: input_slices[name][i], 78 | output_shape=lambda s: (None,) + s[1:])(tensor) 79 | for name, tensor in zipped_inputs] 80 | # Create the model replica and get the outputs 81 | outputs = self.inner_model(inputs) 82 | if not isinstance(outputs, list): 83 | outputs = [outputs] 84 | # Save the outputs for merging back together later 85 | for l, o in enumerate(outputs): 86 | outputs_all[l].append(o) 87 | 88 | # Merge outputs on CPU 89 | with tf.device('/cpu:0'): 90 | merged = [] 91 | for outputs, name in zip(outputs_all, output_names): 92 | # If outputs are numbers without dimensions, add a batch dim. 93 | def add_dim(tensor): 94 | """Add a dimension to tensors that don't have any.""" 95 | if K.int_shape(tensor) == (): 96 | return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor) 97 | return tensor 98 | outputs = list(map(add_dim, outputs)) 99 | 100 | # Concatenate 101 | merged.append(KL.Concatenate(axis=0, name=name)(outputs)) 102 | return merged 103 | 104 | 105 | if __name__ == "__main__": 106 | # Testing code below. It creates a simple model to train on MNIST and 107 | # tries to run it on 2 GPUs. It saves the graph so it can be viewed 108 | # in TensorBoard. Run it as: 109 | # 110 | # python3 parallel_model.py 111 | 112 | import os 113 | import numpy as np 114 | import keras.optimizers 115 | from keras.datasets import mnist 116 | from keras.preprocessing.image import ImageDataGenerator 117 | 118 | GPU_COUNT = 2 119 | 120 | # Root directory of the project 121 | ROOT_DIR = os.getcwd() 122 | 123 | # Directory to save logs and trained model 124 | MODEL_DIR = os.path.join(ROOT_DIR, "logs/parallel") 125 | 126 | def build_model(x_train, num_classes): 127 | # Reset default graph. Keras leaves old ops in the graph, 128 | # which are ignored for execution but clutter graph 129 | # visualization in TensorBoard. 130 | tf.reset_default_graph() 131 | 132 | inputs = KL.Input(shape=x_train.shape[1:], name="input_image") 133 | x = KL.Conv2D(32, (3, 3), activation='relu', padding="same", 134 | name="conv1")(inputs) 135 | x = KL.Conv2D(64, (3, 3), activation='relu', padding="same", 136 | name="conv2")(x) 137 | x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x) 138 | x = KL.Flatten(name="flat1")(x) 139 | x = KL.Dense(128, activation='relu', name="dense1")(x) 140 | x = KL.Dense(num_classes, activation='softmax', name="dense2")(x) 141 | 142 | return KM.Model(inputs, x, "digit_classifier_model") 143 | 144 | # Load MNIST Data 145 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 146 | x_train = np.expand_dims(x_train, -1).astype('float32') / 255 147 | x_test = np.expand_dims(x_test, -1).astype('float32') / 255 148 | 149 | print('x_train shape:', x_train.shape) 150 | print('x_test shape:', x_test.shape) 151 | 152 | # Build data generator and model 153 | datagen = ImageDataGenerator() 154 | model = build_model(x_train, 10) 155 | 156 | # Add multi-GPU support. 157 | model = ParallelModel(model, GPU_COUNT) 158 | 159 | optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0) 160 | 161 | model.compile(loss='sparse_categorical_crossentropy', 162 | optimizer=optimizer, metrics=['accuracy']) 163 | 164 | model.summary() 165 | 166 | # Train 167 | model.fit_generator( 168 | datagen.flow(x_train, y_train, batch_size=64), 169 | steps_per_epoch=50, epochs=10, verbose=1, 170 | validation_data=(x_test, y_test), 171 | callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR, 172 | write_graph=True)] 173 | ) 174 | -------------------------------------------------------------------------------- /src/mask_rcnn_ros/parallel_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Multi-GPU Support for Keras. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | Ideas and a small code snippets from these sources: 10 | https://github.com/fchollet/keras/issues/2436 11 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012 12 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/ 13 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py 14 | """ 15 | 16 | import tensorflow as tf 17 | import keras.backend as K 18 | import keras.layers as KL 19 | import keras.models as KM 20 | 21 | 22 | class ParallelModel(KM.Model): 23 | """Subclasses the standard Keras Model and adds multi-GPU support. 24 | It works by creating a copy of the model on each GPU. Then it slices 25 | the inputs and sends a slice to each copy of the model, and then 26 | merges the outputs together and applies the loss on the combined 27 | outputs. 28 | """ 29 | 30 | def __init__(self, keras_model, gpu_count): 31 | """Class constructor. 32 | keras_model: The Keras model to parallelize 33 | gpu_count: Number of GPUs. Must be > 1 34 | """ 35 | self.inner_model = keras_model 36 | self.gpu_count = gpu_count 37 | merged_outputs = self.make_parallel() 38 | super(ParallelModel, self).__init__(inputs=self.inner_model.inputs, 39 | outputs=merged_outputs) 40 | 41 | def __getattribute__(self, attrname): 42 | """Redirect loading and saving methods to the inner model. That's where 43 | the weights are stored.""" 44 | if 'load' in attrname or 'save' in attrname: 45 | return getattr(self.inner_model, attrname) 46 | return super(ParallelModel, self).__getattribute__(attrname) 47 | 48 | def summary(self, *args, **kwargs): 49 | """Override summary() to display summaries of both, the wrapper 50 | and inner models.""" 51 | super(ParallelModel, self).summary(*args, **kwargs) 52 | self.inner_model.summary(*args, **kwargs) 53 | 54 | def make_parallel(self): 55 | """Creates a new wrapper model that consists of multiple replicas of 56 | the original model placed on different GPUs. 57 | """ 58 | # Slice inputs. Slice inputs on the CPU to avoid sending a copy 59 | # of the full inputs to all GPUs. Saves on bandwidth and memory. 60 | input_slices = {name: tf.split(x, self.gpu_count) 61 | for name, x in zip(self.inner_model.input_names, 62 | self.inner_model.inputs)} 63 | 64 | output_names = self.inner_model.output_names 65 | outputs_all = [] 66 | for i in range(len(self.inner_model.outputs)): 67 | outputs_all.append([]) 68 | 69 | # Run the model call() on each GPU to place the ops there 70 | for i in range(self.gpu_count): 71 | with tf.device('/gpu:%d' % i): 72 | with tf.name_scope('tower_%d' % i): 73 | # Run a slice of inputs through this replica 74 | zipped_inputs = zip(self.inner_model.input_names, 75 | self.inner_model.inputs) 76 | inputs = [ 77 | KL.Lambda(lambda s: input_slices[name][i], 78 | output_shape=lambda s: (None,) + s[1:])(tensor) 79 | for name, tensor in zipped_inputs] 80 | # Create the model replica and get the outputs 81 | outputs = self.inner_model(inputs) 82 | if not isinstance(outputs, list): 83 | outputs = [outputs] 84 | # Save the outputs for merging back together later 85 | for l, o in enumerate(outputs): 86 | outputs_all[l].append(o) 87 | 88 | # Merge outputs on CPU 89 | with tf.device('/cpu:0'): 90 | merged = [] 91 | for outputs, name in zip(outputs_all, output_names): 92 | # If outputs are numbers without dimensions, add a batch dim. 93 | def add_dim(tensor): 94 | """Add a dimension to tensors that don't have any.""" 95 | if K.int_shape(tensor) == (): 96 | return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor) 97 | return tensor 98 | outputs = list(map(add_dim, outputs)) 99 | 100 | # Concatenate 101 | merged.append(KL.Concatenate(axis=0, name=name)(outputs)) 102 | return merged 103 | 104 | 105 | if __name__ == "__main__": 106 | # Testing code below. It creates a simple model to train on MNIST and 107 | # tries to run it on 2 GPUs. It saves the graph so it can be viewed 108 | # in TensorBoard. Run it as: 109 | # 110 | # python3 parallel_model.py 111 | 112 | import os 113 | import numpy as np 114 | import keras.optimizers 115 | from keras.datasets import mnist 116 | from keras.preprocessing.image import ImageDataGenerator 117 | 118 | GPU_COUNT = 2 119 | 120 | # Root directory of the project 121 | ROOT_DIR = os.getcwd() 122 | 123 | # Directory to save logs and trained model 124 | MODEL_DIR = os.path.join(ROOT_DIR, "logs/parallel") 125 | 126 | def build_model(x_train, num_classes): 127 | # Reset default graph. Keras leaves old ops in the graph, 128 | # which are ignored for execution but clutter graph 129 | # visualization in TensorBoard. 130 | tf.reset_default_graph() 131 | 132 | inputs = KL.Input(shape=x_train.shape[1:], name="input_image") 133 | x = KL.Conv2D(32, (3, 3), activation='relu', padding="same", 134 | name="conv1")(inputs) 135 | x = KL.Conv2D(64, (3, 3), activation='relu', padding="same", 136 | name="conv2")(x) 137 | x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x) 138 | x = KL.Flatten(name="flat1")(x) 139 | x = KL.Dense(128, activation='relu', name="dense1")(x) 140 | x = KL.Dense(num_classes, activation='softmax', name="dense2")(x) 141 | 142 | return KM.Model(inputs, x, "digit_classifier_model") 143 | 144 | # Load MNIST Data 145 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 146 | x_train = np.expand_dims(x_train, -1).astype('float32') / 255 147 | x_test = np.expand_dims(x_test, -1).astype('float32') / 255 148 | 149 | print('x_train shape:', x_train.shape) 150 | print('x_test shape:', x_test.shape) 151 | 152 | # Build data generator and model 153 | datagen = ImageDataGenerator() 154 | model = build_model(x_train, 10) 155 | 156 | # Add multi-GPU support. 157 | model = ParallelModel(model, GPU_COUNT) 158 | 159 | optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0) 160 | 161 | model.compile(loss='sparse_categorical_crossentropy', 162 | optimizer=optimizer, metrics=['accuracy']) 163 | 164 | model.summary() 165 | 166 | # Train 167 | model.fit_generator( 168 | datagen.flow(x_train, y_train, batch_size=64), 169 | steps_per_epoch=50, epochs=10, verbose=1, 170 | validation_data=(x_test, y_test), 171 | callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR, 172 | write_graph=True)] 173 | ) 174 | -------------------------------------------------------------------------------- /nodes/mask_rcnn_node: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import threading 4 | import numpy as np 5 | 6 | import cv2 7 | from cv_bridge import CvBridge 8 | import rospy 9 | from sensor_msgs.msg import Image 10 | from sensor_msgs.msg import RegionOfInterest 11 | 12 | import coco 13 | import utils 14 | import model as modellib 15 | import visualize 16 | from mask_rcnn_ros.msg import Result 17 | 18 | 19 | # Local path to trained weights file 20 | ROS_HOME = os.environ.get('ROS_HOME', os.path.join(os.environ['HOME'], '.ros')) 21 | COCO_MODEL_PATH = os.path.join(ROS_HOME, 'mask_rcnn_coco.h5') 22 | 23 | # COCO Class names 24 | # Index of the class in the list is its ID. For example, to get ID of 25 | # the teddy bear class, use: CLASS_NAMES.index('teddy bear') 26 | CLASS_NAMES = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 27 | 'bus', 'train', 'truck', 'boat', 'traffic light', 28 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 29 | 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 30 | 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 31 | 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 32 | 'kite', 'baseball bat', 'baseball glove', 'skateboard', 33 | 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 34 | 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 35 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 36 | 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 37 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 38 | 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 39 | 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 40 | 'teddy bear', 'hair drier', 'toothbrush'] 41 | 42 | 43 | class InferenceConfig(coco.CocoConfig): 44 | # Set batch size to 1 since we'll be running inference on 45 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 46 | GPU_COUNT = 1 47 | IMAGES_PER_GPU = 1 48 | 49 | 50 | class MaskRCNNNode(object): 51 | def __init__(self): 52 | self._cv_bridge = CvBridge() 53 | 54 | config = InferenceConfig() 55 | config.display() 56 | 57 | self._visualization = rospy.get_param('~visualization', True) 58 | 59 | # Create model object in inference mode. 60 | self._model = modellib.MaskRCNN(mode="inference", model_dir="", 61 | config=config) 62 | # Load weights trained on MS-COCO 63 | model_path = rospy.get_param('~model_path', COCO_MODEL_PATH) 64 | # Download COCO trained weights from Releases if needed 65 | if model_path == COCO_MODEL_PATH and not os.path.exists(COCO_MODEL_PATH): 66 | utils.download_trained_weights(COCO_MODEL_PATH) 67 | 68 | self._model.load_weights(model_path, by_name=True) 69 | 70 | self._class_names = rospy.get_param('~class_names', CLASS_NAMES) 71 | 72 | self._last_msg = None 73 | self._msg_lock = threading.Lock() 74 | 75 | self._class_colors = visualize.random_colors(len(CLASS_NAMES)) 76 | 77 | self._publish_rate = rospy.get_param('~publish_rate', 100) 78 | 79 | def run(self): 80 | self._result_pub = rospy.Publisher('~result', Result, queue_size=1) 81 | vis_pub = rospy.Publisher('~visualization', Image, queue_size=1) 82 | rospy.Subscriber('~input', Image, 83 | self._image_callback, queue_size=1) 84 | 85 | rate = rospy.Rate(self._publish_rate) 86 | while not rospy.is_shutdown(): 87 | if self._msg_lock.acquire(False): 88 | msg = self._last_msg 89 | self._last_msg = None 90 | self._msg_lock.release() 91 | else: 92 | rate.sleep() 93 | continue 94 | 95 | if msg is not None: 96 | np_image = self._cv_bridge.imgmsg_to_cv2(msg, 'bgr8') 97 | 98 | # Run detection 99 | results = self._model.detect([np_image], verbose=0) 100 | result = results[0] 101 | result_msg = self._build_result_msg(msg, result) 102 | self._result_pub.publish(result_msg) 103 | 104 | # Visualize results 105 | if self._visualization: 106 | cv_result = self._visualize_cv(result, np_image) 107 | image_msg = self._cv_bridge.cv2_to_imgmsg(cv_result, 'bgr8') 108 | vis_pub.publish(image_msg) 109 | 110 | rate.sleep() 111 | 112 | def _build_result_msg(self, msg, result): 113 | result_msg = Result() 114 | result_msg.header = msg.header 115 | for i, (y1, x1, y2, x2) in enumerate(result['rois']): 116 | box = RegionOfInterest() 117 | box.x_offset = np.asscalar(x1) 118 | box.y_offset = np.asscalar(y1) 119 | box.height = np.asscalar(y2 - y1) 120 | box.width = np.asscalar(x2 - x1) 121 | result_msg.boxes.append(box) 122 | 123 | class_id = result['class_ids'][i] 124 | result_msg.class_ids.append(class_id) 125 | 126 | class_name = self._class_names[class_id] 127 | result_msg.class_names.append(class_name) 128 | 129 | score = result['scores'][i] 130 | result_msg.scores.append(score) 131 | 132 | mask = Image() 133 | mask.header = msg.header 134 | mask.height = result['masks'].shape[0] 135 | mask.width = result['masks'].shape[1] 136 | mask.encoding = "mono8" 137 | mask.is_bigendian = False 138 | mask.step = mask.width 139 | mask.data = (result['masks'][:, :, i] * 255).tobytes() 140 | result_msg.masks.append(mask) 141 | return result_msg 142 | 143 | def _visualize(self, result, image): 144 | from matplotlib.backends.backend_agg import FigureCanvasAgg 145 | from matplotlib.figure import Figure 146 | 147 | fig = Figure() 148 | canvas = FigureCanvasAgg(fig) 149 | axes = fig.gca() 150 | visualize.display_instances(image, result['rois'], result['masks'], 151 | result['class_ids'], CLASS_NAMES, 152 | result['scores'], ax=axes, 153 | class_colors=self._class_colors) 154 | fig.tight_layout() 155 | canvas.draw() 156 | result = np.fromstring(canvas.tostring_rgb(), dtype='uint8') 157 | 158 | _, _, w, h = fig.bbox.bounds 159 | result = result.reshape((int(h), int(w), 3)) 160 | return result 161 | 162 | def _visualize_cv(self, result, image): 163 | 164 | image = visualize.display_instances_cv(image, result['rois'], result['masks'], 165 | result['class_ids'], CLASS_NAMES, 166 | result['scores'], 167 | class_colors=self._class_colors) 168 | 169 | return image 170 | 171 | def _image_callback(self, msg): 172 | rospy.logdebug("Get an image") 173 | if self._msg_lock.acquire(False): 174 | self._last_msg = msg 175 | self._msg_lock.release() 176 | 177 | 178 | def main(): 179 | rospy.init_node('mask_rcnn') 180 | 181 | node = MaskRCNNNode() 182 | node.run() 183 | 184 | 185 | if __name__ == '__main__': 186 | main() 187 | -------------------------------------------------------------------------------- /nodes/shapes.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Configurations and data loading code for the synthetic Shapes dataset. 4 | This is a duplicate of the code in the noteobook train_shapes.ipynb for easy 5 | import into other notebooks, such as inspect_model.ipynb. 6 | 7 | Copyright (c) 2017 Matterport, Inc. 8 | Licensed under the MIT License (see LICENSE for details) 9 | Written by Waleed Abdulla 10 | """ 11 | 12 | import math 13 | import random 14 | import numpy as np 15 | import cv2 16 | 17 | from config import Config 18 | import utils 19 | 20 | 21 | class ShapesConfig(Config): 22 | """Configuration for training on the toy shapes dataset. 23 | Derives from the base Config class and overrides values specific 24 | to the toy shapes dataset. 25 | """ 26 | # Give the configuration a recognizable name 27 | NAME = "shapes" 28 | 29 | # Train on 1 GPU and 8 images per GPU. We can put multiple images on each 30 | # GPU because the images are small. Batch size is 8 (GPUs * images/GPU). 31 | GPU_COUNT = 1 32 | IMAGES_PER_GPU = 8 33 | 34 | # Number of classes (including background) 35 | NUM_CLASSES = 1 + 3 # background + 3 shapes 36 | 37 | # Use small images for faster training. Set the limits of the small side 38 | # the large side, and that determines the image shape. 39 | IMAGE_MIN_DIM = 128 40 | IMAGE_MAX_DIM = 128 41 | 42 | # Use smaller anchors because our image and objects are small 43 | RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128) # anchor side in pixels 44 | 45 | # Reduce training ROIs per image because the images are small and have 46 | # few objects. Aim to allow ROI sampling to pick 33% positive ROIs. 47 | TRAIN_ROIS_PER_IMAGE = 32 48 | 49 | # Use a small epoch since the data is simple 50 | STEPS_PER_EPOCH = 100 51 | 52 | # use small validation steps since the epoch is small 53 | VALIDATION_STEPS = 5 54 | 55 | 56 | class ShapesDataset(utils.Dataset): 57 | """Generates the shapes synthetic dataset. The dataset consists of simple 58 | shapes (triangles, squares, circles) placed randomly on a blank surface. 59 | The images are generated on the fly. No file access required. 60 | """ 61 | 62 | def load_shapes(self, count, height, width): 63 | """Generate the requested number of synthetic images. 64 | count: number of images to generate. 65 | height, width: the size of the generated images. 66 | """ 67 | # Add classes 68 | self.add_class("shapes", 1, "square") 69 | self.add_class("shapes", 2, "circle") 70 | self.add_class("shapes", 3, "triangle") 71 | 72 | # Add images 73 | # Generate random specifications of images (i.e. color and 74 | # list of shapes sizes and locations). This is more compact than 75 | # actual images. Images are generated on the fly in load_image(). 76 | for i in range(count): 77 | bg_color, shapes = self.random_image(height, width) 78 | self.add_image("shapes", image_id=i, path=None, 79 | width=width, height=height, 80 | bg_color=bg_color, shapes=shapes) 81 | 82 | def load_image(self, image_id): 83 | """Generate an image from the specs of the given image ID. 84 | Typically this function loads the image from a file, but 85 | in this case it generates the image on the fly from the 86 | specs in image_info. 87 | """ 88 | info = self.image_info[image_id] 89 | bg_color = np.array(info['bg_color']).reshape([1, 1, 3]) 90 | image = np.ones([info['height'], info['width'], 3], dtype=np.uint8) 91 | image = image * bg_color.astype(np.uint8) 92 | for shape, color, dims in info['shapes']: 93 | image = self.draw_shape(image, shape, dims, color) 94 | return image 95 | 96 | def image_reference(self, image_id): 97 | """Return the shapes data of the image.""" 98 | info = self.image_info[image_id] 99 | if info["source"] == "shapes": 100 | return info["shapes"] 101 | else: 102 | super(self.__class__).image_reference(self, image_id) 103 | 104 | def load_mask(self, image_id): 105 | """Generate instance masks for shapes of the given image ID. 106 | """ 107 | info = self.image_info[image_id] 108 | shapes = info['shapes'] 109 | count = len(shapes) 110 | mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8) 111 | for i, (shape, _, dims) in enumerate(info['shapes']): 112 | mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(), 113 | shape, dims, 1) 114 | # Handle occlusions 115 | occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8) 116 | for i in range(count - 2, -1, -1): 117 | mask[:, :, i] = mask[:, :, i] * occlusion 118 | occlusion = np.logical_and( 119 | occlusion, np.logical_not(mask[:, :, i])) 120 | # Map class names to class IDs. 121 | class_ids = np.array([self.class_names.index(s[0]) for s in shapes]) 122 | return mask, class_ids.astype(np.int32) 123 | 124 | def draw_shape(self, image, shape, dims, color): 125 | """Draws a shape from the given specs.""" 126 | # Get the center x, y and the size s 127 | x, y, s = dims 128 | if shape == 'square': 129 | image = cv2.rectangle(image, (x - s, y - s), 130 | (x + s, y + s), color, -1) 131 | elif shape == "circle": 132 | image = cv2.circle(image, (x, y), s, color, -1) 133 | elif shape == "triangle": 134 | points = np.array([[(x, y - s), 135 | (x - s / math.sin(math.radians(60)), y + s), 136 | (x + s / math.sin(math.radians(60)), y + s), 137 | ]], dtype=np.int32) 138 | image = cv2.fillPoly(image, points, color) 139 | return image 140 | 141 | def random_shape(self, height, width): 142 | """Generates specifications of a random shape that lies within 143 | the given height and width boundaries. 144 | Returns a tuple of three valus: 145 | * The shape name (square, circle, ...) 146 | * Shape color: a tuple of 3 values, RGB. 147 | * Shape dimensions: A tuple of values that define the shape size 148 | and location. Differs per shape type. 149 | """ 150 | # Shape 151 | shape = random.choice(["square", "circle", "triangle"]) 152 | # Color 153 | color = tuple([random.randint(0, 255) for _ in range(3)]) 154 | # Center x, y 155 | buffer = 20 156 | y = random.randint(buffer, height - buffer - 1) 157 | x = random.randint(buffer, width - buffer - 1) 158 | # Size 159 | s = random.randint(buffer, height // 4) 160 | return shape, color, (x, y, s) 161 | 162 | def random_image(self, height, width): 163 | """Creates random specifications of an image with multiple shapes. 164 | Returns the background color of the image and a list of shape 165 | specifications that can be used to draw the image. 166 | """ 167 | # Pick random background color 168 | bg_color = np.array([random.randint(0, 255) for _ in range(3)]) 169 | # Generate a few random shapes and record their 170 | # bounding boxes 171 | shapes = [] 172 | boxes = [] 173 | N = random.randint(1, 4) 174 | for _ in range(N): 175 | shape, color, dims = self.random_shape(height, width) 176 | shapes.append((shape, color, dims)) 177 | x, y, s = dims 178 | boxes.append([y - s, x - s, y + s, x + s]) 179 | # Apply non-max suppression wit 0.3 threshold to avoid 180 | # shapes covering each other 181 | keep_ixs = utils.non_max_suppression( 182 | np.array(boxes), np.arange(N), 0.3) 183 | shapes = [s for i, s in enumerate(shapes) if i in keep_ixs] 184 | return bg_color, shapes 185 | -------------------------------------------------------------------------------- /src/mask_rcnn_ros/shapes.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Configurations and data loading code for the synthetic Shapes dataset. 4 | This is a duplicate of the code in the noteobook train_shapes.ipynb for easy 5 | import into other notebooks, such as inspect_model.ipynb. 6 | 7 | Copyright (c) 2017 Matterport, Inc. 8 | Licensed under the MIT License (see LICENSE for details) 9 | Written by Waleed Abdulla 10 | """ 11 | 12 | import math 13 | import random 14 | import numpy as np 15 | import cv2 16 | 17 | from config import Config 18 | import utils 19 | 20 | 21 | class ShapesConfig(Config): 22 | """Configuration for training on the toy shapes dataset. 23 | Derives from the base Config class and overrides values specific 24 | to the toy shapes dataset. 25 | """ 26 | # Give the configuration a recognizable name 27 | NAME = "shapes" 28 | 29 | # Train on 1 GPU and 8 images per GPU. We can put multiple images on each 30 | # GPU because the images are small. Batch size is 8 (GPUs * images/GPU). 31 | GPU_COUNT = 1 32 | IMAGES_PER_GPU = 8 33 | 34 | # Number of classes (including background) 35 | NUM_CLASSES = 1 + 3 # background + 3 shapes 36 | 37 | # Use small images for faster training. Set the limits of the small side 38 | # the large side, and that determines the image shape. 39 | IMAGE_MIN_DIM = 128 40 | IMAGE_MAX_DIM = 128 41 | 42 | # Use smaller anchors because our image and objects are small 43 | RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128) # anchor side in pixels 44 | 45 | # Reduce training ROIs per image because the images are small and have 46 | # few objects. Aim to allow ROI sampling to pick 33% positive ROIs. 47 | TRAIN_ROIS_PER_IMAGE = 32 48 | 49 | # Use a small epoch since the data is simple 50 | STEPS_PER_EPOCH = 100 51 | 52 | # use small validation steps since the epoch is small 53 | VALIDATION_STEPS = 5 54 | 55 | 56 | class ShapesDataset(utils.Dataset): 57 | """Generates the shapes synthetic dataset. The dataset consists of simple 58 | shapes (triangles, squares, circles) placed randomly on a blank surface. 59 | The images are generated on the fly. No file access required. 60 | """ 61 | 62 | def load_shapes(self, count, height, width): 63 | """Generate the requested number of synthetic images. 64 | count: number of images to generate. 65 | height, width: the size of the generated images. 66 | """ 67 | # Add classes 68 | self.add_class("shapes", 1, "square") 69 | self.add_class("shapes", 2, "circle") 70 | self.add_class("shapes", 3, "triangle") 71 | 72 | # Add images 73 | # Generate random specifications of images (i.e. color and 74 | # list of shapes sizes and locations). This is more compact than 75 | # actual images. Images are generated on the fly in load_image(). 76 | for i in range(count): 77 | bg_color, shapes = self.random_image(height, width) 78 | self.add_image("shapes", image_id=i, path=None, 79 | width=width, height=height, 80 | bg_color=bg_color, shapes=shapes) 81 | 82 | def load_image(self, image_id): 83 | """Generate an image from the specs of the given image ID. 84 | Typically this function loads the image from a file, but 85 | in this case it generates the image on the fly from the 86 | specs in image_info. 87 | """ 88 | info = self.image_info[image_id] 89 | bg_color = np.array(info['bg_color']).reshape([1, 1, 3]) 90 | image = np.ones([info['height'], info['width'], 3], dtype=np.uint8) 91 | image = image * bg_color.astype(np.uint8) 92 | for shape, color, dims in info['shapes']: 93 | image = self.draw_shape(image, shape, dims, color) 94 | return image 95 | 96 | def image_reference(self, image_id): 97 | """Return the shapes data of the image.""" 98 | info = self.image_info[image_id] 99 | if info["source"] == "shapes": 100 | return info["shapes"] 101 | else: 102 | super(self.__class__).image_reference(self, image_id) 103 | 104 | def load_mask(self, image_id): 105 | """Generate instance masks for shapes of the given image ID. 106 | """ 107 | info = self.image_info[image_id] 108 | shapes = info['shapes'] 109 | count = len(shapes) 110 | mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8) 111 | for i, (shape, _, dims) in enumerate(info['shapes']): 112 | mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(), 113 | shape, dims, 1) 114 | # Handle occlusions 115 | occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8) 116 | for i in range(count - 2, -1, -1): 117 | mask[:, :, i] = mask[:, :, i] * occlusion 118 | occlusion = np.logical_and( 119 | occlusion, np.logical_not(mask[:, :, i])) 120 | # Map class names to class IDs. 121 | class_ids = np.array([self.class_names.index(s[0]) for s in shapes]) 122 | return mask, class_ids.astype(np.int32) 123 | 124 | def draw_shape(self, image, shape, dims, color): 125 | """Draws a shape from the given specs.""" 126 | # Get the center x, y and the size s 127 | x, y, s = dims 128 | if shape == 'square': 129 | image = cv2.rectangle(image, (x - s, y - s), 130 | (x + s, y + s), color, -1) 131 | elif shape == "circle": 132 | image = cv2.circle(image, (x, y), s, color, -1) 133 | elif shape == "triangle": 134 | points = np.array([[(x, y - s), 135 | (x - s / math.sin(math.radians(60)), y + s), 136 | (x + s / math.sin(math.radians(60)), y + s), 137 | ]], dtype=np.int32) 138 | image = cv2.fillPoly(image, points, color) 139 | return image 140 | 141 | def random_shape(self, height, width): 142 | """Generates specifications of a random shape that lies within 143 | the given height and width boundaries. 144 | Returns a tuple of three valus: 145 | * The shape name (square, circle, ...) 146 | * Shape color: a tuple of 3 values, RGB. 147 | * Shape dimensions: A tuple of values that define the shape size 148 | and location. Differs per shape type. 149 | """ 150 | # Shape 151 | shape = random.choice(["square", "circle", "triangle"]) 152 | # Color 153 | color = tuple([random.randint(0, 255) for _ in range(3)]) 154 | # Center x, y 155 | buffer = 20 156 | y = random.randint(buffer, height - buffer - 1) 157 | x = random.randint(buffer, width - buffer - 1) 158 | # Size 159 | s = random.randint(buffer, height // 4) 160 | return shape, color, (x, y, s) 161 | 162 | def random_image(self, height, width): 163 | """Creates random specifications of an image with multiple shapes. 164 | Returns the background color of the image and a list of shape 165 | specifications that can be used to draw the image. 166 | """ 167 | # Pick random background color 168 | bg_color = np.array([random.randint(0, 255) for _ in range(3)]) 169 | # Generate a few random shapes and record their 170 | # bounding boxes 171 | shapes = [] 172 | boxes = [] 173 | N = random.randint(1, 4) 174 | for _ in range(N): 175 | shape, color, dims = self.random_shape(height, width) 176 | shapes.append((shape, color, dims)) 177 | x, y, s = dims 178 | boxes.append([y - s, x - s, y + s, x + s]) 179 | # Apply non-max suppression wit 0.3 threshold to avoid 180 | # shapes covering each other 181 | keep_ixs = utils.non_max_suppression( 182 | np.array(boxes), np.arange(N), 0.3) 183 | shapes = [s for i, s in enumerate(shapes) if i in keep_ixs] 184 | return bg_color, shapes 185 | -------------------------------------------------------------------------------- /nodes/visualize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Display and Visualization Functions. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import random 11 | import itertools 12 | import colorsys 13 | import numpy as np 14 | from skimage.measure import find_contours 15 | import matplotlib.pyplot as plt 16 | import matplotlib.patches as patches 17 | import matplotlib.lines as lines 18 | from matplotlib.patches import Polygon 19 | import cv2 20 | import IPython.display 21 | 22 | import utils 23 | import sys 24 | sys.path.remove('/opt/ros/indigo/lib/python2.7/dist-packages') 25 | 26 | ############################################################ 27 | # Visualization 28 | ############################################################ 29 | 30 | def display_images(images, titles=None, cols=4, cmap=None, norm=None, 31 | interpolation=None): 32 | """Display the given set of images, optionally with titles. 33 | images: list or array of image tensors in HWC format. 34 | titles: optional. A list of titles to display with each image. 35 | cols: number of images per row 36 | cmap: Optional. Color map to use. For example, "Blues". 37 | norm: Optional. A Normalize instance to map values to colors. 38 | interpolation: Optional. Image interporlation to use for display. 39 | """ 40 | titles = titles if titles is not None else [""] * len(images) 41 | rows = len(images) // cols + 1 42 | plt.figure(figsize=(14, 14 * rows // cols)) 43 | i = 1 44 | for image, title in zip(images, titles): 45 | plt.subplot(rows, cols, i) 46 | plt.title(title, fontsize=9) 47 | plt.axis('off') 48 | plt.imshow(image.astype(np.uint8), cmap=cmap, 49 | norm=norm, interpolation=interpolation) 50 | i += 1 51 | plt.show() 52 | 53 | 54 | def random_colors(N, bright=True): 55 | """ 56 | Generate random colors. 57 | To get visually distinct colors, generate them in HSV space then 58 | convert to RGB. 59 | """ 60 | brightness = 1.0 if bright else 0.7 61 | hsv = [(float(i) / N, 1, brightness) for i in range(N)] 62 | colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv)) 63 | random.shuffle(colors) 64 | return colors 65 | 66 | 67 | def apply_mask(image, mask, color, alpha=0.5): 68 | """Apply the given mask to the image. 69 | """ 70 | for c in range(3): 71 | image[:, :, c] = np.where(mask == 1, 72 | image[:, :, c] * 73 | (1 - alpha) + alpha * color[c] * 255, 74 | image[:, :, c]) 75 | return image 76 | 77 | 78 | def display_instances(image, boxes, masks, class_ids, class_names, 79 | scores=None, title="", 80 | figsize=(16, 16), ax=None, class_colors=None): 81 | """ 82 | boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates. 83 | masks: [height, width, num_instances] 84 | class_ids: [num_instances] 85 | class_names: list of class names of the dataset 86 | scores: (optional) confidence scores for each box 87 | figsize: (optional) the size of the image. 88 | """ 89 | # Number of instances 90 | N = boxes.shape[0] 91 | if not N: 92 | print("\n*** No instances to display *** \n") 93 | else: 94 | assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0] 95 | 96 | if not ax: 97 | _, ax = plt.subplots(1, figsize=figsize) 98 | 99 | # Generate random colors 100 | if class_colors is None: 101 | colors = random_colors(N) 102 | 103 | # Show area outside image boundaries. 104 | height, width = image.shape[:2] 105 | ax.set_ylim(height + 10, -10) 106 | ax.set_xlim(-10, width + 10) 107 | ax.axis('off') 108 | ax.set_title(title) 109 | 110 | masked_image = image.astype(np.uint32).copy() 111 | for i in range(N): 112 | class_id = class_ids[i] 113 | if class_colors is None: 114 | color = colors[i] 115 | else: 116 | color = class_colors[class_id] 117 | 118 | # Bounding box 119 | if not np.any(boxes[i]): 120 | # Skip this instance. Has no bbox. Likely lost in image cropping. 121 | continue 122 | y1, x1, y2, x2 = boxes[i] 123 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 124 | alpha=0.7, linestyle="dashed", 125 | edgecolor=color, facecolor='none') 126 | ax.add_patch(p) 127 | 128 | # Label 129 | score = scores[i] if scores is not None else None 130 | label = class_names[class_id] 131 | x = random.randint(x1, (x1 + x2) // 2) 132 | caption = "{} {:.3f}".format(label, score) if score else label 133 | ax.text(x1, y1 + 8, caption, 134 | color='w', size=11, backgroundcolor="none") 135 | 136 | # Mask 137 | mask = masks[:, :, i] 138 | masked_image = apply_mask(masked_image, mask, color) 139 | 140 | # Mask Polygon 141 | # Pad to ensure proper polygons for masks that touch image edges. 142 | padded_mask = np.zeros( 143 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 144 | padded_mask[1:-1, 1:-1] = mask 145 | contours = find_contours(padded_mask, 0.5) 146 | for verts in contours: 147 | # Subtract the padding and flip (y, x) to (x, y) 148 | verts = np.fliplr(verts) - 1 149 | p = Polygon(verts, facecolor="none", edgecolor=color) 150 | ax.add_patch(p) 151 | ax.imshow(masked_image.astype(np.uint8)) 152 | #plt.show() 153 | 154 | 155 | def display_instances_cv(image, boxes, masks, class_ids, class_names, 156 | scores=None, class_colors=None, alpha=0.7): 157 | """ 158 | boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates. 159 | masks: [height, width, num_instances] 160 | class_ids: [num_instances] 161 | class_names: list of class names of the dataset 162 | scores: (optional) confidence scores for each box 163 | class_colors: a list mapping class ids to their colors 164 | alpha: the amount of transparency of the mask overlay 165 | """ 166 | # Number of instances 167 | n = boxes.shape[0] 168 | if n: 169 | assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0] 170 | 171 | # Generate random colors 172 | if class_colors is None: 173 | colors = random_colors(n) 174 | 175 | for i in range(n): 176 | class_id = class_ids[i] 177 | if class_colors is None: 178 | color = colors[i] 179 | else: 180 | color = class_colors[class_id] 181 | 182 | # Transform class colors to BGR and rescale [0-255] for OpenCv 183 | bgr_color = tuple(c*255 for c in color[::-1]) 184 | 185 | # Draw bounding boxes 186 | if not np.any(boxes[i]): 187 | # Skip this instance. Has no bbox. Likely lost in image cropping. 188 | continue 189 | y1, x1, y2, x2 = boxes[i] 190 | cv2.rectangle(image, (x1, y1), (x2, y2), color=bgr_color, thickness=2) 191 | 192 | # Draw transparent mask 193 | overlay = image.copy() 194 | mask = masks[:, :, i] 195 | __, thresh = cv2.threshold(mask, 0.5, 1, cv2.THRESH_BINARY) 196 | _, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 197 | cv2.drawContours(image, contours, -1, color=bgr_color, thickness=cv2.FILLED) 198 | cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0, image) 199 | 200 | # Draw text label 201 | score = scores[i] if scores is not None else None 202 | label = class_names[class_id] 203 | caption = "{} {:.3f}".format(label, score) if score else label 204 | cv2.putText(image, caption, (x1, y1 + 12), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5, 205 | color=(255, 255, 255)) 206 | 207 | return image 208 | 209 | 210 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10): 211 | """ 212 | anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates. 213 | proposals: [n, 4] the same anchors but refined to fit objects better. 214 | """ 215 | masked_image = image.copy() 216 | 217 | # Pick random anchors in case there are too many. 218 | ids = np.arange(rois.shape[0], dtype=np.int32) 219 | ids = np.random.choice( 220 | ids, limit, replace=False) if ids.shape[0] > limit else ids 221 | 222 | fig, ax = plt.subplots(1, figsize=(12, 12)) 223 | if rois.shape[0] > limit: 224 | plt.title("Showing {} random ROIs out of {}".format( 225 | len(ids), rois.shape[0])) 226 | else: 227 | plt.title("{} ROIs".format(len(ids))) 228 | 229 | # Show area outside image boundaries. 230 | ax.set_ylim(image.shape[0] + 20, -20) 231 | ax.set_xlim(-50, image.shape[1] + 20) 232 | ax.axis('off') 233 | 234 | for i, id in enumerate(ids): 235 | color = np.random.rand(3) 236 | class_id = class_ids[id] 237 | # ROI 238 | y1, x1, y2, x2 = rois[id] 239 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 240 | edgecolor=color if class_id else "gray", 241 | facecolor='none', linestyle="dashed") 242 | ax.add_patch(p) 243 | # Refined ROI 244 | if class_id: 245 | ry1, rx1, ry2, rx2 = refined_rois[id] 246 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 247 | edgecolor=color, facecolor='none') 248 | ax.add_patch(p) 249 | # Connect the top-left corners of the anchor and proposal for easy visualization 250 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 251 | 252 | # Label 253 | label = class_names[class_id] 254 | ax.text(rx1, ry1 + 8, "{}".format(label), 255 | color='w', size=11, backgroundcolor="none") 256 | 257 | # Mask 258 | m = utils.unmold_mask(mask[id], rois[id] 259 | [:4].astype(np.int32), image.shape) 260 | masked_image = apply_mask(masked_image, m, color) 261 | 262 | ax.imshow(masked_image) 263 | 264 | # Print stats 265 | print("Positive ROIs: ", class_ids[class_ids > 0].shape[0]) 266 | print("Negative ROIs: ", class_ids[class_ids == 0].shape[0]) 267 | print("Positive Ratio: {:.2f}".format( 268 | class_ids[class_ids > 0].shape[0] / class_ids.shape[0])) 269 | 270 | 271 | # TODO: Replace with matplotlib equivalent? 272 | def draw_box(image, box, color): 273 | """Draw 3-pixel width bounding boxes on the given image array. 274 | color: list of 3 int values for RGB. 275 | """ 276 | y1, x1, y2, x2 = box 277 | image[y1:y1 + 2, x1:x2] = color 278 | image[y2:y2 + 2, x1:x2] = color 279 | image[y1:y2, x1:x1 + 2] = color 280 | image[y1:y2, x2:x2 + 2] = color 281 | return image 282 | 283 | 284 | def display_top_masks(image, mask, class_ids, class_names, limit=4): 285 | """Display the given image and the top few class masks.""" 286 | to_display = [] 287 | titles = [] 288 | to_display.append(image) 289 | titles.append("H x W={}x{}".format(image.shape[0], image.shape[1])) 290 | # Pick top prominent classes in this image 291 | unique_class_ids = np.unique(class_ids) 292 | mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]]) 293 | for i in unique_class_ids] 294 | top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area), 295 | key=lambda r: r[1], reverse=True) if v[1] > 0] 296 | # Generate images and titles 297 | for i in range(limit): 298 | class_id = top_ids[i] if i < len(top_ids) else -1 299 | # Pull masks of instances belonging to the same class. 300 | m = mask[:, :, np.where(class_ids == class_id)[0]] 301 | m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1) 302 | to_display.append(m) 303 | titles.append(class_names[class_id] if class_id != -1 else "-") 304 | display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r") 305 | 306 | 307 | def plot_precision_recall(AP, precisions, recalls): 308 | """Draw the precision-recall curve. 309 | 310 | AP: Average precision at IoU >= 0.5 311 | precisions: list of precision values 312 | recalls: list of recall values 313 | """ 314 | # Plot the Precision-Recall curve 315 | _, ax = plt.subplots(1) 316 | ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP)) 317 | ax.set_ylim(0, 1.1) 318 | ax.set_xlim(0, 1.1) 319 | _ = ax.plot(recalls, precisions) 320 | 321 | 322 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores, 323 | overlaps, class_names, threshold=0.5): 324 | """Draw a grid showing how ground truth objects are classified. 325 | gt_class_ids: [N] int. Ground truth class IDs 326 | pred_class_id: [N] int. Predicted class IDs 327 | pred_scores: [N] float. The probability scores of predicted classes 328 | overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictins and GT boxes. 329 | class_names: list of all class names in the dataset 330 | threshold: Float. The prediction probability required to predict a class 331 | """ 332 | gt_class_ids = gt_class_ids[gt_class_ids != 0] 333 | pred_class_ids = pred_class_ids[pred_class_ids != 0] 334 | 335 | plt.figure(figsize=(12, 10)) 336 | plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues) 337 | plt.yticks(np.arange(len(pred_class_ids)), 338 | ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i]) 339 | for i, id in enumerate(pred_class_ids)]) 340 | plt.xticks(np.arange(len(gt_class_ids)), 341 | [class_names[int(id)] for id in gt_class_ids], rotation=90) 342 | 343 | thresh = overlaps.max() / 2. 344 | for i, j in itertools.product(range(overlaps.shape[0]), 345 | range(overlaps.shape[1])): 346 | text = "" 347 | if overlaps[i, j] > threshold: 348 | text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong" 349 | color = ("white" if overlaps[i, j] > thresh 350 | else "black" if overlaps[i, j] > 0 351 | else "grey") 352 | plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text), 353 | horizontalalignment="center", verticalalignment="center", 354 | fontsize=9, color=color) 355 | 356 | plt.tight_layout() 357 | plt.xlabel("Ground Truth") 358 | plt.ylabel("Predictions") 359 | 360 | 361 | def draw_boxes(image, boxes=None, refined_boxes=None, 362 | masks=None, captions=None, visibilities=None, 363 | title="", ax=None): 364 | """Draw bounding boxes and segmentation masks with differnt 365 | customizations. 366 | 367 | boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates. 368 | refined_boxes: Like boxes, but draw with solid lines to show 369 | that they're the result of refining 'boxes'. 370 | masks: [N, height, width] 371 | captions: List of N titles to display on each box 372 | visibilities: (optional) List of values of 0, 1, or 2. Determine how 373 | prominant each bounding box should be. 374 | title: An optional title to show over the image 375 | ax: (optional) Matplotlib axis to draw on. 376 | """ 377 | # Number of boxes 378 | assert boxes is not None or refined_boxes is not None 379 | N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0] 380 | 381 | # Matplotlib Axis 382 | if not ax: 383 | _, ax = plt.subplots(1, figsize=(12, 12)) 384 | 385 | # Generate random colors 386 | colors = random_colors(N) 387 | 388 | # Show area outside image boundaries. 389 | margin = image.shape[0] // 10 390 | ax.set_ylim(image.shape[0] + margin, -margin) 391 | ax.set_xlim(-margin, image.shape[1] + margin) 392 | ax.axis('off') 393 | 394 | ax.set_title(title) 395 | 396 | masked_image = image.astype(np.uint32).copy() 397 | for i in range(N): 398 | # Box visibility 399 | visibility = visibilities[i] if visibilities is not None else 1 400 | if visibility == 0: 401 | color = "gray" 402 | style = "dotted" 403 | alpha = 0.5 404 | elif visibility == 1: 405 | color = colors[i] 406 | style = "dotted" 407 | alpha = 1 408 | elif visibility == 2: 409 | color = colors[i] 410 | style = "solid" 411 | alpha = 1 412 | 413 | # Boxes 414 | if boxes is not None: 415 | if not np.any(boxes[i]): 416 | # Skip this instance. Has no bbox. Likely lost in cropping. 417 | continue 418 | y1, x1, y2, x2 = boxes[i] 419 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 420 | alpha=alpha, linestyle=style, 421 | edgecolor=color, facecolor='none') 422 | ax.add_patch(p) 423 | 424 | # Refined boxes 425 | if refined_boxes is not None and visibility > 0: 426 | ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32) 427 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 428 | edgecolor=color, facecolor='none') 429 | ax.add_patch(p) 430 | # Connect the top-left corners of the anchor and proposal 431 | if boxes is not None: 432 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 433 | 434 | # Captions 435 | if captions is not None: 436 | caption = captions[i] 437 | # If there are refined boxes, display captions on them 438 | if refined_boxes is not None: 439 | y1, x1, y2, x2 = ry1, rx1, ry2, rx2 440 | x = random.randint(x1, (x1 + x2) // 2) 441 | ax.text(x1, y1, caption, size=11, verticalalignment='top', 442 | color='w', backgroundcolor="none", 443 | bbox={'facecolor': color, 'alpha': 0.5, 444 | 'pad': 2, 'edgecolor': 'none'}) 445 | 446 | # Masks 447 | if masks is not None: 448 | mask = masks[:, :, i] 449 | masked_image = apply_mask(masked_image, mask, color) 450 | # Mask Polygon 451 | # Pad to ensure proper polygons for masks that touch image edges. 452 | padded_mask = np.zeros( 453 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 454 | padded_mask[1:-1, 1:-1] = mask 455 | contours = find_contours(padded_mask, 0.5) 456 | for verts in contours: 457 | # Subtract the padding and flip (y, x) to (x, y) 458 | verts = np.fliplr(verts) - 1 459 | p = Polygon(verts, facecolor="none", edgecolor=color) 460 | ax.add_patch(p) 461 | ax.imshow(masked_image.astype(np.uint8)) 462 | 463 | 464 | def display_table(table): 465 | """Display values in a table format. 466 | table: an iterable of rows, and each row is an iterable of values. 467 | """ 468 | html = "" 469 | for row in table: 470 | row_html = "" 471 | for col in row: 472 | row_html += "{:40}".format(str(col)) 473 | html += "" + row_html + "" 474 | html = "" + html + "
" 475 | IPython.display.display(IPython.display.HTML(html)) 476 | 477 | 478 | def display_weight_stats(model): 479 | """Scans all the weights in the model and returns a list of tuples 480 | that contain stats about each weight. 481 | """ 482 | layers = model.get_trainable_layers() 483 | table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]] 484 | for l in layers: 485 | weight_values = l.get_weights() # list of Numpy arrays 486 | weight_tensors = l.weights # list of TF tensors 487 | for i, w in enumerate(weight_values): 488 | weight_name = weight_tensors[i].name 489 | # Detect problematic layers. Exclude biases of conv layers. 490 | alert = "" 491 | if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1): 492 | alert += "*** dead?" 493 | if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000: 494 | alert += "*** Overflow?" 495 | # Add row 496 | table.append([ 497 | weight_name + alert, 498 | str(w.shape), 499 | "{:+9.4f}".format(w.min()), 500 | "{:+10.4f}".format(w.max()), 501 | "{:+9.4f}".format(w.std()), 502 | ]) 503 | display_table(table) 504 | -------------------------------------------------------------------------------- /src/mask_rcnn_ros/visualize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Display and Visualization Functions. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import random 11 | import itertools 12 | import colorsys 13 | import numpy as np 14 | from skimage.measure import find_contours 15 | import matplotlib.pyplot as plt 16 | import matplotlib.patches as patches 17 | import matplotlib.lines as lines 18 | from matplotlib.patches import Polygon 19 | import cv2 20 | import IPython.display 21 | 22 | import utils 23 | import sys 24 | sys.path.remove('/opt/ros/indigo/lib/python2.7/dist-packages') 25 | 26 | ############################################################ 27 | # Visualization 28 | ############################################################ 29 | 30 | def display_images(images, titles=None, cols=4, cmap=None, norm=None, 31 | interpolation=None): 32 | """Display the given set of images, optionally with titles. 33 | images: list or array of image tensors in HWC format. 34 | titles: optional. A list of titles to display with each image. 35 | cols: number of images per row 36 | cmap: Optional. Color map to use. For example, "Blues". 37 | norm: Optional. A Normalize instance to map values to colors. 38 | interpolation: Optional. Image interporlation to use for display. 39 | """ 40 | titles = titles if titles is not None else [""] * len(images) 41 | rows = len(images) // cols + 1 42 | plt.figure(figsize=(14, 14 * rows // cols)) 43 | i = 1 44 | for image, title in zip(images, titles): 45 | plt.subplot(rows, cols, i) 46 | plt.title(title, fontsize=9) 47 | plt.axis('off') 48 | plt.imshow(image.astype(np.uint8), cmap=cmap, 49 | norm=norm, interpolation=interpolation) 50 | i += 1 51 | plt.show() 52 | 53 | 54 | def random_colors(N, bright=True): 55 | """ 56 | Generate random colors. 57 | To get visually distinct colors, generate them in HSV space then 58 | convert to RGB. 59 | """ 60 | brightness = 1.0 if bright else 0.7 61 | hsv = [(float(i) / N, 1, brightness) for i in range(N)] 62 | colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv)) 63 | random.shuffle(colors) 64 | return colors 65 | 66 | 67 | def apply_mask(image, mask, color, alpha=0.5): 68 | """Apply the given mask to the image. 69 | """ 70 | for c in range(3): 71 | image[:, :, c] = np.where(mask == 1, 72 | image[:, :, c] * 73 | (1 - alpha) + alpha * color[c] * 255, 74 | image[:, :, c]) 75 | return image 76 | 77 | 78 | def display_instances(image, boxes, masks, class_ids, class_names, 79 | scores=None, title="", 80 | figsize=(16, 16), ax=None, class_colors=None): 81 | """ 82 | boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates. 83 | masks: [height, width, num_instances] 84 | class_ids: [num_instances] 85 | class_names: list of class names of the dataset 86 | scores: (optional) confidence scores for each box 87 | figsize: (optional) the size of the image. 88 | """ 89 | # Number of instances 90 | N = boxes.shape[0] 91 | if not N: 92 | print("\n*** No instances to display *** \n") 93 | else: 94 | assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0] 95 | 96 | if not ax: 97 | _, ax = plt.subplots(1, figsize=figsize) 98 | 99 | # Generate random colors 100 | if class_colors is None: 101 | colors = random_colors(N) 102 | 103 | # Show area outside image boundaries. 104 | height, width = image.shape[:2] 105 | ax.set_ylim(height + 10, -10) 106 | ax.set_xlim(-10, width + 10) 107 | ax.axis('off') 108 | ax.set_title(title) 109 | 110 | masked_image = image.astype(np.uint32).copy() 111 | for i in range(N): 112 | class_id = class_ids[i] 113 | if class_colors is None: 114 | color = colors[i] 115 | else: 116 | color = class_colors[class_id] 117 | 118 | # Bounding box 119 | if not np.any(boxes[i]): 120 | # Skip this instance. Has no bbox. Likely lost in image cropping. 121 | continue 122 | y1, x1, y2, x2 = boxes[i] 123 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 124 | alpha=0.7, linestyle="dashed", 125 | edgecolor=color, facecolor='none') 126 | ax.add_patch(p) 127 | 128 | # Label 129 | score = scores[i] if scores is not None else None 130 | label = class_names[class_id] 131 | x = random.randint(x1, (x1 + x2) // 2) 132 | caption = "{} {:.3f}".format(label, score) if score else label 133 | ax.text(x1, y1 + 8, caption, 134 | color='w', size=11, backgroundcolor="none") 135 | 136 | # Mask 137 | mask = masks[:, :, i] 138 | masked_image = apply_mask(masked_image, mask, color) 139 | 140 | # Mask Polygon 141 | # Pad to ensure proper polygons for masks that touch image edges. 142 | padded_mask = np.zeros( 143 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 144 | padded_mask[1:-1, 1:-1] = mask 145 | contours = find_contours(padded_mask, 0.5) 146 | for verts in contours: 147 | # Subtract the padding and flip (y, x) to (x, y) 148 | verts = np.fliplr(verts) - 1 149 | p = Polygon(verts, facecolor="none", edgecolor=color) 150 | ax.add_patch(p) 151 | ax.imshow(masked_image.astype(np.uint8)) 152 | #plt.show() 153 | 154 | 155 | def display_instances_cv(image, boxes, masks, class_ids, class_names, 156 | scores=None, class_colors=None, alpha=0.7): 157 | """ 158 | boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates. 159 | masks: [height, width, num_instances] 160 | class_ids: [num_instances] 161 | class_names: list of class names of the dataset 162 | scores: (optional) confidence scores for each box 163 | class_colors: a list mapping class ids to their colors 164 | alpha: the amount of transparency of the mask overlay 165 | """ 166 | # Number of instances 167 | n = boxes.shape[0] 168 | if n: 169 | assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0] 170 | 171 | # Generate random colors 172 | if class_colors is None: 173 | colors = random_colors(n) 174 | 175 | for i in range(n): 176 | class_id = class_ids[i] 177 | if class_colors is None: 178 | color = colors[i] 179 | else: 180 | color = class_colors[class_id] 181 | 182 | # Transform class colors to BGR and rescale [0-255] for OpenCv 183 | bgr_color = tuple(c*255 for c in color[::-1]) 184 | 185 | # Draw bounding boxes 186 | if not np.any(boxes[i]): 187 | # Skip this instance. Has no bbox. Likely lost in image cropping. 188 | continue 189 | y1, x1, y2, x2 = boxes[i] 190 | cv2.rectangle(image, (x1, y1), (x2, y2), color=bgr_color, thickness=2) 191 | 192 | # Draw transparent mask 193 | overlay = image.copy() 194 | mask = masks[:, :, i] 195 | __, thresh = cv2.threshold(mask, 0.5, 1, cv2.THRESH_BINARY) 196 | _, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 197 | cv2.drawContours(image, contours, -1, color=bgr_color, thickness=cv2.FILLED) 198 | cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0, image) 199 | 200 | # Draw text label 201 | score = scores[i] if scores is not None else None 202 | label = class_names[class_id] 203 | caption = "{} {:.3f}".format(label, score) if score else label 204 | cv2.putText(image, caption, (x1, y1 + 12), fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5, 205 | color=(255, 255, 255)) 206 | 207 | return image 208 | 209 | 210 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10): 211 | """ 212 | anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates. 213 | proposals: [n, 4] the same anchors but refined to fit objects better. 214 | """ 215 | masked_image = image.copy() 216 | 217 | # Pick random anchors in case there are too many. 218 | ids = np.arange(rois.shape[0], dtype=np.int32) 219 | ids = np.random.choice( 220 | ids, limit, replace=False) if ids.shape[0] > limit else ids 221 | 222 | fig, ax = plt.subplots(1, figsize=(12, 12)) 223 | if rois.shape[0] > limit: 224 | plt.title("Showing {} random ROIs out of {}".format( 225 | len(ids), rois.shape[0])) 226 | else: 227 | plt.title("{} ROIs".format(len(ids))) 228 | 229 | # Show area outside image boundaries. 230 | ax.set_ylim(image.shape[0] + 20, -20) 231 | ax.set_xlim(-50, image.shape[1] + 20) 232 | ax.axis('off') 233 | 234 | for i, id in enumerate(ids): 235 | color = np.random.rand(3) 236 | class_id = class_ids[id] 237 | # ROI 238 | y1, x1, y2, x2 = rois[id] 239 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 240 | edgecolor=color if class_id else "gray", 241 | facecolor='none', linestyle="dashed") 242 | ax.add_patch(p) 243 | # Refined ROI 244 | if class_id: 245 | ry1, rx1, ry2, rx2 = refined_rois[id] 246 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 247 | edgecolor=color, facecolor='none') 248 | ax.add_patch(p) 249 | # Connect the top-left corners of the anchor and proposal for easy visualization 250 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 251 | 252 | # Label 253 | label = class_names[class_id] 254 | ax.text(rx1, ry1 + 8, "{}".format(label), 255 | color='w', size=11, backgroundcolor="none") 256 | 257 | # Mask 258 | m = utils.unmold_mask(mask[id], rois[id] 259 | [:4].astype(np.int32), image.shape) 260 | masked_image = apply_mask(masked_image, m, color) 261 | 262 | ax.imshow(masked_image) 263 | 264 | # Print stats 265 | print("Positive ROIs: ", class_ids[class_ids > 0].shape[0]) 266 | print("Negative ROIs: ", class_ids[class_ids == 0].shape[0]) 267 | print("Positive Ratio: {:.2f}".format( 268 | class_ids[class_ids > 0].shape[0] / class_ids.shape[0])) 269 | 270 | 271 | # TODO: Replace with matplotlib equivalent? 272 | def draw_box(image, box, color): 273 | """Draw 3-pixel width bounding boxes on the given image array. 274 | color: list of 3 int values for RGB. 275 | """ 276 | y1, x1, y2, x2 = box 277 | image[y1:y1 + 2, x1:x2] = color 278 | image[y2:y2 + 2, x1:x2] = color 279 | image[y1:y2, x1:x1 + 2] = color 280 | image[y1:y2, x2:x2 + 2] = color 281 | return image 282 | 283 | 284 | def display_top_masks(image, mask, class_ids, class_names, limit=4): 285 | """Display the given image and the top few class masks.""" 286 | to_display = [] 287 | titles = [] 288 | to_display.append(image) 289 | titles.append("H x W={}x{}".format(image.shape[0], image.shape[1])) 290 | # Pick top prominent classes in this image 291 | unique_class_ids = np.unique(class_ids) 292 | mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]]) 293 | for i in unique_class_ids] 294 | top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area), 295 | key=lambda r: r[1], reverse=True) if v[1] > 0] 296 | # Generate images and titles 297 | for i in range(limit): 298 | class_id = top_ids[i] if i < len(top_ids) else -1 299 | # Pull masks of instances belonging to the same class. 300 | m = mask[:, :, np.where(class_ids == class_id)[0]] 301 | m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1) 302 | to_display.append(m) 303 | titles.append(class_names[class_id] if class_id != -1 else "-") 304 | display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r") 305 | 306 | 307 | def plot_precision_recall(AP, precisions, recalls): 308 | """Draw the precision-recall curve. 309 | 310 | AP: Average precision at IoU >= 0.5 311 | precisions: list of precision values 312 | recalls: list of recall values 313 | """ 314 | # Plot the Precision-Recall curve 315 | _, ax = plt.subplots(1) 316 | ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP)) 317 | ax.set_ylim(0, 1.1) 318 | ax.set_xlim(0, 1.1) 319 | _ = ax.plot(recalls, precisions) 320 | 321 | 322 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores, 323 | overlaps, class_names, threshold=0.5): 324 | """Draw a grid showing how ground truth objects are classified. 325 | gt_class_ids: [N] int. Ground truth class IDs 326 | pred_class_id: [N] int. Predicted class IDs 327 | pred_scores: [N] float. The probability scores of predicted classes 328 | overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictins and GT boxes. 329 | class_names: list of all class names in the dataset 330 | threshold: Float. The prediction probability required to predict a class 331 | """ 332 | gt_class_ids = gt_class_ids[gt_class_ids != 0] 333 | pred_class_ids = pred_class_ids[pred_class_ids != 0] 334 | 335 | plt.figure(figsize=(12, 10)) 336 | plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues) 337 | plt.yticks(np.arange(len(pred_class_ids)), 338 | ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i]) 339 | for i, id in enumerate(pred_class_ids)]) 340 | plt.xticks(np.arange(len(gt_class_ids)), 341 | [class_names[int(id)] for id in gt_class_ids], rotation=90) 342 | 343 | thresh = overlaps.max() / 2. 344 | for i, j in itertools.product(range(overlaps.shape[0]), 345 | range(overlaps.shape[1])): 346 | text = "" 347 | if overlaps[i, j] > threshold: 348 | text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong" 349 | color = ("white" if overlaps[i, j] > thresh 350 | else "black" if overlaps[i, j] > 0 351 | else "grey") 352 | plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text), 353 | horizontalalignment="center", verticalalignment="center", 354 | fontsize=9, color=color) 355 | 356 | plt.tight_layout() 357 | plt.xlabel("Ground Truth") 358 | plt.ylabel("Predictions") 359 | 360 | 361 | def draw_boxes(image, boxes=None, refined_boxes=None, 362 | masks=None, captions=None, visibilities=None, 363 | title="", ax=None): 364 | """Draw bounding boxes and segmentation masks with differnt 365 | customizations. 366 | 367 | boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates. 368 | refined_boxes: Like boxes, but draw with solid lines to show 369 | that they're the result of refining 'boxes'. 370 | masks: [N, height, width] 371 | captions: List of N titles to display on each box 372 | visibilities: (optional) List of values of 0, 1, or 2. Determine how 373 | prominant each bounding box should be. 374 | title: An optional title to show over the image 375 | ax: (optional) Matplotlib axis to draw on. 376 | """ 377 | # Number of boxes 378 | assert boxes is not None or refined_boxes is not None 379 | N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0] 380 | 381 | # Matplotlib Axis 382 | if not ax: 383 | _, ax = plt.subplots(1, figsize=(12, 12)) 384 | 385 | # Generate random colors 386 | colors = random_colors(N) 387 | 388 | # Show area outside image boundaries. 389 | margin = image.shape[0] // 10 390 | ax.set_ylim(image.shape[0] + margin, -margin) 391 | ax.set_xlim(-margin, image.shape[1] + margin) 392 | ax.axis('off') 393 | 394 | ax.set_title(title) 395 | 396 | masked_image = image.astype(np.uint32).copy() 397 | for i in range(N): 398 | # Box visibility 399 | visibility = visibilities[i] if visibilities is not None else 1 400 | if visibility == 0: 401 | color = "gray" 402 | style = "dotted" 403 | alpha = 0.5 404 | elif visibility == 1: 405 | color = colors[i] 406 | style = "dotted" 407 | alpha = 1 408 | elif visibility == 2: 409 | color = colors[i] 410 | style = "solid" 411 | alpha = 1 412 | 413 | # Boxes 414 | if boxes is not None: 415 | if not np.any(boxes[i]): 416 | # Skip this instance. Has no bbox. Likely lost in cropping. 417 | continue 418 | y1, x1, y2, x2 = boxes[i] 419 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 420 | alpha=alpha, linestyle=style, 421 | edgecolor=color, facecolor='none') 422 | ax.add_patch(p) 423 | 424 | # Refined boxes 425 | if refined_boxes is not None and visibility > 0: 426 | ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32) 427 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 428 | edgecolor=color, facecolor='none') 429 | ax.add_patch(p) 430 | # Connect the top-left corners of the anchor and proposal 431 | if boxes is not None: 432 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 433 | 434 | # Captions 435 | if captions is not None: 436 | caption = captions[i] 437 | # If there are refined boxes, display captions on them 438 | if refined_boxes is not None: 439 | y1, x1, y2, x2 = ry1, rx1, ry2, rx2 440 | x = random.randint(x1, (x1 + x2) // 2) 441 | ax.text(x1, y1, caption, size=11, verticalalignment='top', 442 | color='w', backgroundcolor="none", 443 | bbox={'facecolor': color, 'alpha': 0.5, 444 | 'pad': 2, 'edgecolor': 'none'}) 445 | 446 | # Masks 447 | if masks is not None: 448 | mask = masks[:, :, i] 449 | masked_image = apply_mask(masked_image, mask, color) 450 | # Mask Polygon 451 | # Pad to ensure proper polygons for masks that touch image edges. 452 | padded_mask = np.zeros( 453 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 454 | padded_mask[1:-1, 1:-1] = mask 455 | contours = find_contours(padded_mask, 0.5) 456 | for verts in contours: 457 | # Subtract the padding and flip (y, x) to (x, y) 458 | verts = np.fliplr(verts) - 1 459 | p = Polygon(verts, facecolor="none", edgecolor=color) 460 | ax.add_patch(p) 461 | ax.imshow(masked_image.astype(np.uint8)) 462 | 463 | 464 | def display_table(table): 465 | """Display values in a table format. 466 | table: an iterable of rows, and each row is an iterable of values. 467 | """ 468 | html = "" 469 | for row in table: 470 | row_html = "" 471 | for col in row: 472 | row_html += "{:40}".format(str(col)) 473 | html += "" + row_html + "" 474 | html = "" + html + "
" 475 | IPython.display.display(IPython.display.HTML(html)) 476 | 477 | 478 | def display_weight_stats(model): 479 | """Scans all the weights in the model and returns a list of tuples 480 | that contain stats about each weight. 481 | """ 482 | layers = model.get_trainable_layers() 483 | table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]] 484 | for l in layers: 485 | weight_values = l.get_weights() # list of Numpy arrays 486 | weight_tensors = l.weights # list of TF tensors 487 | for i, w in enumerate(weight_values): 488 | weight_name = weight_tensors[i].name 489 | # Detect problematic layers. Exclude biases of conv layers. 490 | alert = "" 491 | if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1): 492 | alert += "*** dead?" 493 | if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000: 494 | alert += "*** Overflow?" 495 | # Add row 496 | table.append([ 497 | weight_name + alert, 498 | str(w.shape), 499 | "{:+9.4f}".format(w.min()), 500 | "{:+10.4f}".format(w.max()), 501 | "{:+9.4f}".format(w.std()), 502 | ]) 503 | display_table(table) 504 | -------------------------------------------------------------------------------- /nodes/coco.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Configurations and data loading code for MS COCO. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | ------------------------------------------------------------ 10 | 11 | Usage: import the module (see Jupyter notebooks for examples), or run from 12 | the command line as such: 13 | 14 | # Train a new model starting from pre-trained COCO weights 15 | python3 coco.py train --dataset=/path/to/coco/ --model=coco 16 | 17 | # Train a new model starting from ImageNet weights 18 | python3 coco.py train --dataset=/path/to/coco/ --model=imagenet 19 | 20 | # Continue training a model that you had trained earlier 21 | python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5 22 | 23 | # Continue training the last model you trained 24 | python3 coco.py train --dataset=/path/to/coco/ --model=last 25 | 26 | # Run COCO evaluatoin on the last model you trained 27 | python3 coco.py evaluate --dataset=/path/to/coco/ --model=last 28 | """ 29 | 30 | import os 31 | import time 32 | import numpy as np 33 | 34 | # Download and install the Python COCO tools from https://github.com/waleedka/coco 35 | # That's a fork from the original https://github.com/pdollar/coco with a bug 36 | # fix for Python 3. 37 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50 38 | # If the PR is merged then use the original repo. 39 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3". 40 | # from pycocotools.coco import COCO 41 | # from pycocotools.cocoeval import COCOeval 42 | # from pycocotools import mask as maskUtils 43 | 44 | import zipfile 45 | from six.moves.urllib import request 46 | import shutil 47 | 48 | from config import Config 49 | import utils 50 | import model as modellib 51 | 52 | # Root directory of the project 53 | ROOT_DIR = os.path.dirname(__file__) 54 | 55 | # Path to trained weights file 56 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") 57 | 58 | # Directory to save logs and model checkpoints, if not provided 59 | # through the command line argument --logs 60 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 61 | DEFAULT_DATASET_YEAR = "2014" 62 | 63 | ############################################################ 64 | # Configurations 65 | ############################################################ 66 | 67 | 68 | class CocoConfig(Config): 69 | """Configuration for training on MS COCO. 70 | Derives from the base Config class and overrides values specific 71 | to the COCO dataset. 72 | """ 73 | # Give the configuration a recognizable name 74 | NAME = "coco" 75 | 76 | # We use a GPU with 12GB memory, which can fit two images. 77 | # Adjust down if you use a smaller GPU. 78 | IMAGES_PER_GPU = 2 79 | 80 | # Uncomment to train on 8 GPUs (default is 1) 81 | # GPU_COUNT = 8 82 | 83 | # Number of classes (including background) 84 | NUM_CLASSES = 1 + 80 # COCO has 80 classes 85 | 86 | 87 | ############################################################ 88 | # Dataset 89 | ############################################################ 90 | 91 | class CocoDataset(utils.Dataset): 92 | def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None, 93 | class_map=None, return_coco=False, auto_download=False): 94 | """Load a subset of the COCO dataset. 95 | dataset_dir: The root directory of the COCO dataset. 96 | subset: What to load (train, val, minival, valminusminival) 97 | year: What dataset year to load (2014, 2017) as a string, not an integer 98 | class_ids: If provided, only loads images that have the given classes. 99 | class_map: TODO: Not implemented yet. Supports maping classes from 100 | different datasets to the same class ID. 101 | return_coco: If True, returns the COCO object. 102 | auto_download: Automatically download and unzip MS-COCO images and annotations 103 | """ 104 | 105 | if auto_download is True: 106 | self.auto_download(dataset_dir, subset, year) 107 | 108 | coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year)) 109 | if subset == "minival" or subset == "valminusminival": 110 | subset = "val" 111 | image_dir = "{}/{}{}".format(dataset_dir, subset, year) 112 | 113 | # Load all classes or a subset? 114 | if not class_ids: 115 | # All classes 116 | class_ids = sorted(coco.getCatIds()) 117 | 118 | # All images or a subset? 119 | if class_ids: 120 | image_ids = [] 121 | for id in class_ids: 122 | image_ids.extend(list(coco.getImgIds(catIds=[id]))) 123 | # Remove duplicates 124 | image_ids = list(set(image_ids)) 125 | else: 126 | # All images 127 | image_ids = list(coco.imgs.keys()) 128 | 129 | # Add classes 130 | for i in class_ids: 131 | self.add_class("coco", i, coco.loadCats(i)[0]["name"]) 132 | 133 | # Add images 134 | for i in image_ids: 135 | self.add_image( 136 | "coco", image_id=i, 137 | path=os.path.join(image_dir, coco.imgs[i]['file_name']), 138 | width=coco.imgs[i]["width"], 139 | height=coco.imgs[i]["height"], 140 | annotations=coco.loadAnns(coco.getAnnIds( 141 | imgIds=[i], catIds=class_ids, iscrowd=None))) 142 | if return_coco: 143 | return coco 144 | 145 | def auto_download(self, dataDir, dataType, dataYear): 146 | """Download the COCO dataset/annotations if requested. 147 | dataDir: The root directory of the COCO dataset. 148 | dataType: What to load (train, val, minival, valminusminival) 149 | dataYear: What dataset year to load (2014, 2017) as a string, not an integer 150 | Note: 151 | For 2014, use "train", "val", "minival", or "valminusminival" 152 | For 2017, only "train" and "val" annotations are available 153 | """ 154 | 155 | # Setup paths and file names 156 | if dataType == "minival" or dataType == "valminusminival": 157 | imgDir = "{}/{}{}".format(dataDir, "val", dataYear) 158 | imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear) 159 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear) 160 | else: 161 | imgDir = "{}/{}{}".format(dataDir, dataType, dataYear) 162 | imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear) 163 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear) 164 | # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL) 165 | 166 | # Create main folder if it doesn't exist yet 167 | if not os.path.exists(dataDir): 168 | os.makedirs(dataDir) 169 | 170 | # Download images if not available locally 171 | if not os.path.exists(imgDir): 172 | os.makedirs(imgDir) 173 | print("Downloading images to " + imgZipFile + " ...") 174 | with request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out: 175 | shutil.copyfileobj(resp, out) 176 | print("... done downloading.") 177 | print("Unzipping " + imgZipFile) 178 | with zipfile.ZipFile(imgZipFile, "r") as zip_ref: 179 | zip_ref.extractall(dataDir) 180 | print("... done unzipping") 181 | print("Will use images in " + imgDir) 182 | 183 | # Setup annotations data paths 184 | annDir = "{}/annotations".format(dataDir) 185 | if dataType == "minival": 186 | annZipFile = "{}/instances_minival2014.json.zip".format(dataDir) 187 | annFile = "{}/instances_minival2014.json".format(annDir) 188 | annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0" 189 | unZipDir = annDir 190 | elif dataType == "valminusminival": 191 | annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir) 192 | annFile = "{}/instances_valminusminival2014.json".format(annDir) 193 | annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0" 194 | unZipDir = annDir 195 | else: 196 | annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear) 197 | annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear) 198 | annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear) 199 | unZipDir = dataDir 200 | # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL) 201 | 202 | # Download annotations if not available locally 203 | if not os.path.exists(annDir): 204 | os.makedirs(annDir) 205 | if not os.path.exists(annFile): 206 | if not os.path.exists(annZipFile): 207 | print("Downloading zipped annotations to " + annZipFile + " ...") 208 | with request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out: 209 | shutil.copyfileobj(resp, out) 210 | print("... done downloading.") 211 | print("Unzipping " + annZipFile) 212 | with zipfile.ZipFile(annZipFile, "r") as zip_ref: 213 | zip_ref.extractall(unZipDir) 214 | print("... done unzipping") 215 | print("Will use annotations in " + annFile) 216 | 217 | def load_mask(self, image_id): 218 | """Load instance masks for the given image. 219 | 220 | Different datasets use different ways to store masks. This 221 | function converts the different mask format to one format 222 | in the form of a bitmap [height, width, instances]. 223 | 224 | Returns: 225 | masks: A bool array of shape [height, width, instance count] with 226 | one mask per instance. 227 | class_ids: a 1D array of class IDs of the instance masks. 228 | """ 229 | # If not a COCO image, delegate to parent class. 230 | image_info = self.image_info[image_id] 231 | if image_info["source"] != "coco": 232 | return super(CocoDataset, self).load_mask(image_id) 233 | 234 | instance_masks = [] 235 | class_ids = [] 236 | annotations = self.image_info[image_id]["annotations"] 237 | # Build mask of shape [height, width, instance_count] and list 238 | # of class IDs that correspond to each channel of the mask. 239 | for annotation in annotations: 240 | class_id = self.map_source_class_id( 241 | "coco.{}".format(annotation['category_id'])) 242 | if class_id: 243 | m = self.annToMask(annotation, image_info["height"], 244 | image_info["width"]) 245 | # Some objects are so small that they're less than 1 pixel area 246 | # and end up rounded out. Skip those objects. 247 | if m.max() < 1: 248 | continue 249 | # Is it a crowd? If so, use a negative class ID. 250 | if annotation['iscrowd']: 251 | # Use negative class ID for crowds 252 | class_id *= -1 253 | # For crowd masks, annToMask() sometimes returns a mask 254 | # smaller than the given dimensions. If so, resize it. 255 | if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]: 256 | m = np.ones([image_info["height"], image_info["width"]], dtype=bool) 257 | instance_masks.append(m) 258 | class_ids.append(class_id) 259 | 260 | # Pack instance masks into an array 261 | if class_ids: 262 | mask = np.stack(instance_masks, axis=2) 263 | class_ids = np.array(class_ids, dtype=np.int32) 264 | return mask, class_ids 265 | else: 266 | # Call super class to return an empty mask 267 | return super(CocoDataset, self).load_mask(image_id) 268 | 269 | def image_reference(self, image_id): 270 | """Return a link to the image in the COCO Website.""" 271 | info = self.image_info[image_id] 272 | if info["source"] == "coco": 273 | return "http://cocodataset.org/#explore?id={}".format(info["id"]) 274 | else: 275 | super(CocoDataset, self).image_reference(self, image_id) 276 | 277 | # The following two functions are from pycocotools with a few changes. 278 | 279 | def annToRLE(self, ann, height, width): 280 | """ 281 | Convert annotation which can be polygons, uncompressed RLE to RLE. 282 | :return: binary mask (numpy 2D array) 283 | """ 284 | segm = ann['segmentation'] 285 | if isinstance(segm, list): 286 | # polygon -- a single object might consist of multiple parts 287 | # we merge all parts into one mask rle code 288 | rles = maskUtils.frPyObjects(segm, height, width) 289 | rle = maskUtils.merge(rles) 290 | elif isinstance(segm['counts'], list): 291 | # uncompressed RLE 292 | rle = maskUtils.frPyObjects(segm, height, width) 293 | else: 294 | # rle 295 | rle = ann['segmentation'] 296 | return rle 297 | 298 | def annToMask(self, ann, height, width): 299 | """ 300 | Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. 301 | :return: binary mask (numpy 2D array) 302 | """ 303 | rle = self.annToRLE(ann, height, width) 304 | m = maskUtils.decode(rle) 305 | return m 306 | 307 | 308 | ############################################################ 309 | # COCO Evaluation 310 | ############################################################ 311 | 312 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks): 313 | """Arrange resutls to match COCO specs in http://cocodataset.org/#format 314 | """ 315 | # If no results, return an empty list 316 | if rois is None: 317 | return [] 318 | 319 | results = [] 320 | for image_id in image_ids: 321 | # Loop through detections 322 | for i in range(rois.shape[0]): 323 | class_id = class_ids[i] 324 | score = scores[i] 325 | bbox = np.around(rois[i], 1) 326 | mask = masks[:, :, i] 327 | 328 | result = { 329 | "image_id": image_id, 330 | "category_id": dataset.get_source_class_id(class_id, "coco"), 331 | "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]], 332 | "score": score, 333 | "segmentation": maskUtils.encode(np.asfortranarray(mask)) 334 | } 335 | results.append(result) 336 | return results 337 | 338 | 339 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None): 340 | """Runs official COCO evaluation. 341 | dataset: A Dataset object with valiadtion data 342 | eval_type: "bbox" or "segm" for bounding box or segmentation evaluation 343 | limit: if not 0, it's the number of images to use for evaluation 344 | """ 345 | # Pick COCO images from the dataset 346 | image_ids = image_ids or dataset.image_ids 347 | 348 | # Limit to a subset 349 | if limit: 350 | image_ids = image_ids[:limit] 351 | 352 | # Get corresponding COCO image IDs. 353 | coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids] 354 | 355 | t_prediction = 0 356 | t_start = time.time() 357 | 358 | results = [] 359 | for i, image_id in enumerate(image_ids): 360 | # Load image 361 | image = dataset.load_image(image_id) 362 | 363 | # Run detection 364 | t = time.time() 365 | r = model.detect([image], verbose=0)[0] 366 | t_prediction += (time.time() - t) 367 | 368 | # Convert results to COCO format 369 | image_results = build_coco_results(dataset, coco_image_ids[i:i + 1], 370 | r["rois"], r["class_ids"], 371 | r["scores"], r["masks"]) 372 | results.extend(image_results) 373 | 374 | # Load results. This modifies results with additional attributes. 375 | coco_results = coco.loadRes(results) 376 | 377 | # Evaluate 378 | cocoEval = COCOeval(coco, coco_results, eval_type) 379 | cocoEval.params.imgIds = coco_image_ids 380 | cocoEval.evaluate() 381 | cocoEval.accumulate() 382 | cocoEval.summarize() 383 | 384 | print("Prediction time: {}. Average {}/image".format( 385 | t_prediction, t_prediction / len(image_ids))) 386 | print("Total time: ", time.time() - t_start) 387 | 388 | 389 | ############################################################ 390 | # Training 391 | ############################################################ 392 | 393 | 394 | if __name__ == '__main__': 395 | import argparse 396 | 397 | # Parse command line arguments 398 | parser = argparse.ArgumentParser( 399 | description='Train Mask R-CNN on MS COCO.') 400 | parser.add_argument("command", 401 | metavar="", 402 | help="'train' or 'evaluate' on MS COCO") 403 | parser.add_argument('--dataset', required=True, 404 | metavar="/path/to/coco/", 405 | help='Directory of the MS-COCO dataset') 406 | parser.add_argument('--year', required=False, 407 | default=DEFAULT_DATASET_YEAR, 408 | metavar="", 409 | help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)') 410 | parser.add_argument('--model', required=True, 411 | metavar="/path/to/weights.h5", 412 | help="Path to weights .h5 file or 'coco'") 413 | parser.add_argument('--logs', required=False, 414 | default=DEFAULT_LOGS_DIR, 415 | metavar="/path/to/logs/", 416 | help='Logs and checkpoints directory (default=logs/)') 417 | parser.add_argument('--limit', required=False, 418 | default=500, 419 | metavar="", 420 | help='Images to use for evaluation (default=500)') 421 | parser.add_argument('--download', required=False, 422 | default=False, 423 | metavar="", 424 | help='Automatically download and unzip MS-COCO files (default=False)', 425 | type=bool) 426 | args = parser.parse_args() 427 | print("Command: ", args.command) 428 | print("Model: ", args.model) 429 | print("Dataset: ", args.dataset) 430 | print("Year: ", args.year) 431 | print("Logs: ", args.logs) 432 | print("Auto Download: ", args.download) 433 | 434 | # Configurations 435 | if args.command == "train": 436 | config = CocoConfig() 437 | else: 438 | class InferenceConfig(CocoConfig): 439 | # Set batch size to 1 since we'll be running inference on 440 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 441 | GPU_COUNT = 1 442 | IMAGES_PER_GPU = 1 443 | DETECTION_MIN_CONFIDENCE = 0 444 | config = InferenceConfig() 445 | config.display() 446 | 447 | # Create model 448 | if args.command == "train": 449 | model = modellib.MaskRCNN(mode="training", config=config, 450 | model_dir=args.logs) 451 | else: 452 | model = modellib.MaskRCNN(mode="inference", config=config, 453 | model_dir=args.logs) 454 | 455 | # Select weights file to load 456 | if args.model.lower() == "coco": 457 | model_path = COCO_MODEL_PATH 458 | elif args.model.lower() == "last": 459 | # Find last trained weights 460 | model_path = model.find_last()[1] 461 | elif args.model.lower() == "imagenet": 462 | # Start from ImageNet trained weights 463 | model_path = model.get_imagenet_weights() 464 | else: 465 | model_path = args.model 466 | 467 | # Load weights 468 | print("Loading weights ", model_path) 469 | model.load_weights(model_path, by_name=True) 470 | 471 | # Train or evaluate 472 | if args.command == "train": 473 | # Training dataset. Use the training set and 35K from the 474 | # validation set, as as in the Mask RCNN paper. 475 | dataset_train = CocoDataset() 476 | dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download) 477 | dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download) 478 | dataset_train.prepare() 479 | 480 | # Validation dataset 481 | dataset_val = CocoDataset() 482 | dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download) 483 | dataset_val.prepare() 484 | 485 | # *** This training schedule is an example. Update to your needs *** 486 | 487 | # Training - Stage 1 488 | print("Training network heads") 489 | model.train(dataset_train, dataset_val, 490 | learning_rate=config.LEARNING_RATE, 491 | epochs=40, 492 | layers='heads') 493 | 494 | # Training - Stage 2 495 | # Finetune layers from ResNet stage 4 and up 496 | print("Fine tune Resnet stage 4 and up") 497 | model.train(dataset_train, dataset_val, 498 | learning_rate=config.LEARNING_RATE, 499 | epochs=120, 500 | layers='4+') 501 | 502 | # Training - Stage 3 503 | # Fine tune all layers 504 | print("Fine tune all layers") 505 | model.train(dataset_train, dataset_val, 506 | learning_rate=config.LEARNING_RATE / 10, 507 | epochs=160, 508 | layers='all') 509 | 510 | elif args.command == "evaluate": 511 | # Validation dataset 512 | dataset_val = CocoDataset() 513 | coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download) 514 | dataset_val.prepare() 515 | print("Running COCO evaluation on {} images.".format(args.limit)) 516 | evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit)) 517 | else: 518 | print("'{}' is not recognized. " 519 | "Use 'train' or 'evaluate'".format(args.command)) 520 | -------------------------------------------------------------------------------- /src/mask_rcnn_ros/coco.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Configurations and data loading code for MS COCO. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | ------------------------------------------------------------ 10 | 11 | Usage: import the module (see Jupyter notebooks for examples), or run from 12 | the command line as such: 13 | 14 | # Train a new model starting from pre-trained COCO weights 15 | python3 coco.py train --dataset=/path/to/coco/ --model=coco 16 | 17 | # Train a new model starting from ImageNet weights 18 | python3 coco.py train --dataset=/path/to/coco/ --model=imagenet 19 | 20 | # Continue training a model that you had trained earlier 21 | python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5 22 | 23 | # Continue training the last model you trained 24 | python3 coco.py train --dataset=/path/to/coco/ --model=last 25 | 26 | # Run COCO evaluatoin on the last model you trained 27 | python3 coco.py evaluate --dataset=/path/to/coco/ --model=last 28 | """ 29 | 30 | import os 31 | import time 32 | import numpy as np 33 | 34 | # Download and install the Python COCO tools from https://github.com/waleedka/coco 35 | # That's a fork from the original https://github.com/pdollar/coco with a bug 36 | # fix for Python 3. 37 | # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50 38 | # If the PR is merged then use the original repo. 39 | # Note: Edit PythonAPI/Makefile and replace "python" with "python3". 40 | # from pycocotools.coco import COCO 41 | # from pycocotools.cocoeval import COCOeval 42 | # from pycocotools import mask as maskUtils 43 | 44 | import zipfile 45 | from six.moves.urllib import request 46 | import shutil 47 | 48 | from config import Config 49 | import utils 50 | import model as modellib 51 | 52 | # Root directory of the project 53 | ROOT_DIR = os.path.dirname(__file__) 54 | 55 | # Path to trained weights file 56 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") 57 | 58 | # Directory to save logs and model checkpoints, if not provided 59 | # through the command line argument --logs 60 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 61 | DEFAULT_DATASET_YEAR = "2014" 62 | 63 | ############################################################ 64 | # Configurations 65 | ############################################################ 66 | 67 | 68 | class CocoConfig(Config): 69 | """Configuration for training on MS COCO. 70 | Derives from the base Config class and overrides values specific 71 | to the COCO dataset. 72 | """ 73 | # Give the configuration a recognizable name 74 | NAME = "coco" 75 | 76 | # We use a GPU with 12GB memory, which can fit two images. 77 | # Adjust down if you use a smaller GPU. 78 | IMAGES_PER_GPU = 2 79 | 80 | # Uncomment to train on 8 GPUs (default is 1) 81 | # GPU_COUNT = 8 82 | 83 | # Number of classes (including background) 84 | NUM_CLASSES = 1 + 80 # COCO has 80 classes 85 | 86 | 87 | ############################################################ 88 | # Dataset 89 | ############################################################ 90 | 91 | class CocoDataset(utils.Dataset): 92 | def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None, 93 | class_map=None, return_coco=False, auto_download=False): 94 | """Load a subset of the COCO dataset. 95 | dataset_dir: The root directory of the COCO dataset. 96 | subset: What to load (train, val, minival, valminusminival) 97 | year: What dataset year to load (2014, 2017) as a string, not an integer 98 | class_ids: If provided, only loads images that have the given classes. 99 | class_map: TODO: Not implemented yet. Supports maping classes from 100 | different datasets to the same class ID. 101 | return_coco: If True, returns the COCO object. 102 | auto_download: Automatically download and unzip MS-COCO images and annotations 103 | """ 104 | 105 | if auto_download is True: 106 | self.auto_download(dataset_dir, subset, year) 107 | 108 | coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year)) 109 | if subset == "minival" or subset == "valminusminival": 110 | subset = "val" 111 | image_dir = "{}/{}{}".format(dataset_dir, subset, year) 112 | 113 | # Load all classes or a subset? 114 | if not class_ids: 115 | # All classes 116 | class_ids = sorted(coco.getCatIds()) 117 | 118 | # All images or a subset? 119 | if class_ids: 120 | image_ids = [] 121 | for id in class_ids: 122 | image_ids.extend(list(coco.getImgIds(catIds=[id]))) 123 | # Remove duplicates 124 | image_ids = list(set(image_ids)) 125 | else: 126 | # All images 127 | image_ids = list(coco.imgs.keys()) 128 | 129 | # Add classes 130 | for i in class_ids: 131 | self.add_class("coco", i, coco.loadCats(i)[0]["name"]) 132 | 133 | # Add images 134 | for i in image_ids: 135 | self.add_image( 136 | "coco", image_id=i, 137 | path=os.path.join(image_dir, coco.imgs[i]['file_name']), 138 | width=coco.imgs[i]["width"], 139 | height=coco.imgs[i]["height"], 140 | annotations=coco.loadAnns(coco.getAnnIds( 141 | imgIds=[i], catIds=class_ids, iscrowd=None))) 142 | if return_coco: 143 | return coco 144 | 145 | def auto_download(self, dataDir, dataType, dataYear): 146 | """Download the COCO dataset/annotations if requested. 147 | dataDir: The root directory of the COCO dataset. 148 | dataType: What to load (train, val, minival, valminusminival) 149 | dataYear: What dataset year to load (2014, 2017) as a string, not an integer 150 | Note: 151 | For 2014, use "train", "val", "minival", or "valminusminival" 152 | For 2017, only "train" and "val" annotations are available 153 | """ 154 | 155 | # Setup paths and file names 156 | if dataType == "minival" or dataType == "valminusminival": 157 | imgDir = "{}/{}{}".format(dataDir, "val", dataYear) 158 | imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear) 159 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear) 160 | else: 161 | imgDir = "{}/{}{}".format(dataDir, dataType, dataYear) 162 | imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear) 163 | imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear) 164 | # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL) 165 | 166 | # Create main folder if it doesn't exist yet 167 | if not os.path.exists(dataDir): 168 | os.makedirs(dataDir) 169 | 170 | # Download images if not available locally 171 | if not os.path.exists(imgDir): 172 | os.makedirs(imgDir) 173 | print("Downloading images to " + imgZipFile + " ...") 174 | with request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out: 175 | shutil.copyfileobj(resp, out) 176 | print("... done downloading.") 177 | print("Unzipping " + imgZipFile) 178 | with zipfile.ZipFile(imgZipFile, "r") as zip_ref: 179 | zip_ref.extractall(dataDir) 180 | print("... done unzipping") 181 | print("Will use images in " + imgDir) 182 | 183 | # Setup annotations data paths 184 | annDir = "{}/annotations".format(dataDir) 185 | if dataType == "minival": 186 | annZipFile = "{}/instances_minival2014.json.zip".format(dataDir) 187 | annFile = "{}/instances_minival2014.json".format(annDir) 188 | annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0" 189 | unZipDir = annDir 190 | elif dataType == "valminusminival": 191 | annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir) 192 | annFile = "{}/instances_valminusminival2014.json".format(annDir) 193 | annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0" 194 | unZipDir = annDir 195 | else: 196 | annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear) 197 | annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear) 198 | annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear) 199 | unZipDir = dataDir 200 | # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL) 201 | 202 | # Download annotations if not available locally 203 | if not os.path.exists(annDir): 204 | os.makedirs(annDir) 205 | if not os.path.exists(annFile): 206 | if not os.path.exists(annZipFile): 207 | print("Downloading zipped annotations to " + annZipFile + " ...") 208 | with request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out: 209 | shutil.copyfileobj(resp, out) 210 | print("... done downloading.") 211 | print("Unzipping " + annZipFile) 212 | with zipfile.ZipFile(annZipFile, "r") as zip_ref: 213 | zip_ref.extractall(unZipDir) 214 | print("... done unzipping") 215 | print("Will use annotations in " + annFile) 216 | 217 | def load_mask(self, image_id): 218 | """Load instance masks for the given image. 219 | 220 | Different datasets use different ways to store masks. This 221 | function converts the different mask format to one format 222 | in the form of a bitmap [height, width, instances]. 223 | 224 | Returns: 225 | masks: A bool array of shape [height, width, instance count] with 226 | one mask per instance. 227 | class_ids: a 1D array of class IDs of the instance masks. 228 | """ 229 | # If not a COCO image, delegate to parent class. 230 | image_info = self.image_info[image_id] 231 | if image_info["source"] != "coco": 232 | return super(CocoDataset, self).load_mask(image_id) 233 | 234 | instance_masks = [] 235 | class_ids = [] 236 | annotations = self.image_info[image_id]["annotations"] 237 | # Build mask of shape [height, width, instance_count] and list 238 | # of class IDs that correspond to each channel of the mask. 239 | for annotation in annotations: 240 | class_id = self.map_source_class_id( 241 | "coco.{}".format(annotation['category_id'])) 242 | if class_id: 243 | m = self.annToMask(annotation, image_info["height"], 244 | image_info["width"]) 245 | # Some objects are so small that they're less than 1 pixel area 246 | # and end up rounded out. Skip those objects. 247 | if m.max() < 1: 248 | continue 249 | # Is it a crowd? If so, use a negative class ID. 250 | if annotation['iscrowd']: 251 | # Use negative class ID for crowds 252 | class_id *= -1 253 | # For crowd masks, annToMask() sometimes returns a mask 254 | # smaller than the given dimensions. If so, resize it. 255 | if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]: 256 | m = np.ones([image_info["height"], image_info["width"]], dtype=bool) 257 | instance_masks.append(m) 258 | class_ids.append(class_id) 259 | 260 | # Pack instance masks into an array 261 | if class_ids: 262 | mask = np.stack(instance_masks, axis=2) 263 | class_ids = np.array(class_ids, dtype=np.int32) 264 | return mask, class_ids 265 | else: 266 | # Call super class to return an empty mask 267 | return super(CocoDataset, self).load_mask(image_id) 268 | 269 | def image_reference(self, image_id): 270 | """Return a link to the image in the COCO Website.""" 271 | info = self.image_info[image_id] 272 | if info["source"] == "coco": 273 | return "http://cocodataset.org/#explore?id={}".format(info["id"]) 274 | else: 275 | super(CocoDataset, self).image_reference(self, image_id) 276 | 277 | # The following two functions are from pycocotools with a few changes. 278 | 279 | def annToRLE(self, ann, height, width): 280 | """ 281 | Convert annotation which can be polygons, uncompressed RLE to RLE. 282 | :return: binary mask (numpy 2D array) 283 | """ 284 | segm = ann['segmentation'] 285 | if isinstance(segm, list): 286 | # polygon -- a single object might consist of multiple parts 287 | # we merge all parts into one mask rle code 288 | rles = maskUtils.frPyObjects(segm, height, width) 289 | rle = maskUtils.merge(rles) 290 | elif isinstance(segm['counts'], list): 291 | # uncompressed RLE 292 | rle = maskUtils.frPyObjects(segm, height, width) 293 | else: 294 | # rle 295 | rle = ann['segmentation'] 296 | return rle 297 | 298 | def annToMask(self, ann, height, width): 299 | """ 300 | Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. 301 | :return: binary mask (numpy 2D array) 302 | """ 303 | rle = self.annToRLE(ann, height, width) 304 | m = maskUtils.decode(rle) 305 | return m 306 | 307 | 308 | ############################################################ 309 | # COCO Evaluation 310 | ############################################################ 311 | 312 | def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks): 313 | """Arrange resutls to match COCO specs in http://cocodataset.org/#format 314 | """ 315 | # If no results, return an empty list 316 | if rois is None: 317 | return [] 318 | 319 | results = [] 320 | for image_id in image_ids: 321 | # Loop through detections 322 | for i in range(rois.shape[0]): 323 | class_id = class_ids[i] 324 | score = scores[i] 325 | bbox = np.around(rois[i], 1) 326 | mask = masks[:, :, i] 327 | 328 | result = { 329 | "image_id": image_id, 330 | "category_id": dataset.get_source_class_id(class_id, "coco"), 331 | "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]], 332 | "score": score, 333 | "segmentation": maskUtils.encode(np.asfortranarray(mask)) 334 | } 335 | results.append(result) 336 | return results 337 | 338 | 339 | def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None): 340 | """Runs official COCO evaluation. 341 | dataset: A Dataset object with valiadtion data 342 | eval_type: "bbox" or "segm" for bounding box or segmentation evaluation 343 | limit: if not 0, it's the number of images to use for evaluation 344 | """ 345 | # Pick COCO images from the dataset 346 | image_ids = image_ids or dataset.image_ids 347 | 348 | # Limit to a subset 349 | if limit: 350 | image_ids = image_ids[:limit] 351 | 352 | # Get corresponding COCO image IDs. 353 | coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids] 354 | 355 | t_prediction = 0 356 | t_start = time.time() 357 | 358 | results = [] 359 | for i, image_id in enumerate(image_ids): 360 | # Load image 361 | image = dataset.load_image(image_id) 362 | 363 | # Run detection 364 | t = time.time() 365 | r = model.detect([image], verbose=0)[0] 366 | t_prediction += (time.time() - t) 367 | 368 | # Convert results to COCO format 369 | image_results = build_coco_results(dataset, coco_image_ids[i:i + 1], 370 | r["rois"], r["class_ids"], 371 | r["scores"], r["masks"]) 372 | results.extend(image_results) 373 | 374 | # Load results. This modifies results with additional attributes. 375 | coco_results = coco.loadRes(results) 376 | 377 | # Evaluate 378 | cocoEval = COCOeval(coco, coco_results, eval_type) 379 | cocoEval.params.imgIds = coco_image_ids 380 | cocoEval.evaluate() 381 | cocoEval.accumulate() 382 | cocoEval.summarize() 383 | 384 | print("Prediction time: {}. Average {}/image".format( 385 | t_prediction, t_prediction / len(image_ids))) 386 | print("Total time: ", time.time() - t_start) 387 | 388 | 389 | ############################################################ 390 | # Training 391 | ############################################################ 392 | 393 | 394 | if __name__ == '__main__': 395 | import argparse 396 | 397 | # Parse command line arguments 398 | parser = argparse.ArgumentParser( 399 | description='Train Mask R-CNN on MS COCO.') 400 | parser.add_argument("command", 401 | metavar="", 402 | help="'train' or 'evaluate' on MS COCO") 403 | parser.add_argument('--dataset', required=True, 404 | metavar="/path/to/coco/", 405 | help='Directory of the MS-COCO dataset') 406 | parser.add_argument('--year', required=False, 407 | default=DEFAULT_DATASET_YEAR, 408 | metavar="", 409 | help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)') 410 | parser.add_argument('--model', required=True, 411 | metavar="/path/to/weights.h5", 412 | help="Path to weights .h5 file or 'coco'") 413 | parser.add_argument('--logs', required=False, 414 | default=DEFAULT_LOGS_DIR, 415 | metavar="/path/to/logs/", 416 | help='Logs and checkpoints directory (default=logs/)') 417 | parser.add_argument('--limit', required=False, 418 | default=500, 419 | metavar="", 420 | help='Images to use for evaluation (default=500)') 421 | parser.add_argument('--download', required=False, 422 | default=False, 423 | metavar="", 424 | help='Automatically download and unzip MS-COCO files (default=False)', 425 | type=bool) 426 | args = parser.parse_args() 427 | print("Command: ", args.command) 428 | print("Model: ", args.model) 429 | print("Dataset: ", args.dataset) 430 | print("Year: ", args.year) 431 | print("Logs: ", args.logs) 432 | print("Auto Download: ", args.download) 433 | 434 | # Configurations 435 | if args.command == "train": 436 | config = CocoConfig() 437 | else: 438 | class InferenceConfig(CocoConfig): 439 | # Set batch size to 1 since we'll be running inference on 440 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 441 | GPU_COUNT = 1 442 | IMAGES_PER_GPU = 1 443 | DETECTION_MIN_CONFIDENCE = 0 444 | config = InferenceConfig() 445 | config.display() 446 | 447 | # Create model 448 | if args.command == "train": 449 | model = modellib.MaskRCNN(mode="training", config=config, 450 | model_dir=args.logs) 451 | else: 452 | model = modellib.MaskRCNN(mode="inference", config=config, 453 | model_dir=args.logs) 454 | 455 | # Select weights file to load 456 | if args.model.lower() == "coco": 457 | model_path = COCO_MODEL_PATH 458 | elif args.model.lower() == "last": 459 | # Find last trained weights 460 | model_path = model.find_last()[1] 461 | elif args.model.lower() == "imagenet": 462 | # Start from ImageNet trained weights 463 | model_path = model.get_imagenet_weights() 464 | else: 465 | model_path = args.model 466 | 467 | # Load weights 468 | print("Loading weights ", model_path) 469 | model.load_weights(model_path, by_name=True) 470 | 471 | # Train or evaluate 472 | if args.command == "train": 473 | # Training dataset. Use the training set and 35K from the 474 | # validation set, as as in the Mask RCNN paper. 475 | dataset_train = CocoDataset() 476 | dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download) 477 | dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download) 478 | dataset_train.prepare() 479 | 480 | # Validation dataset 481 | dataset_val = CocoDataset() 482 | dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download) 483 | dataset_val.prepare() 484 | 485 | # *** This training schedule is an example. Update to your needs *** 486 | 487 | # Training - Stage 1 488 | print("Training network heads") 489 | model.train(dataset_train, dataset_val, 490 | learning_rate=config.LEARNING_RATE, 491 | epochs=40, 492 | layers='heads') 493 | 494 | # Training - Stage 2 495 | # Finetune layers from ResNet stage 4 and up 496 | print("Fine tune Resnet stage 4 and up") 497 | model.train(dataset_train, dataset_val, 498 | learning_rate=config.LEARNING_RATE, 499 | epochs=120, 500 | layers='4+') 501 | 502 | # Training - Stage 3 503 | # Fine tune all layers 504 | print("Fine tune all layers") 505 | model.train(dataset_train, dataset_val, 506 | learning_rate=config.LEARNING_RATE / 10, 507 | epochs=160, 508 | layers='all') 509 | 510 | elif args.command == "evaluate": 511 | # Validation dataset 512 | dataset_val = CocoDataset() 513 | coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download) 514 | dataset_val.prepare() 515 | print("Running COCO evaluation on {} images.".format(args.limit)) 516 | evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit)) 517 | else: 518 | print("'{}' is not recognized. " 519 | "Use 'train' or 'evaluate'".format(args.command)) 520 | -------------------------------------------------------------------------------- /nodes/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Common utility functions and classes. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import sys 11 | import os 12 | import math 13 | import random 14 | import numpy as np 15 | import tensorflow as tf 16 | import scipy.misc 17 | import skimage.color 18 | import skimage.io 19 | from six.moves.urllib import request 20 | import shutil 21 | import contextlib 22 | 23 | # URL from which to download the latest COCO trained weights 24 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5" 25 | 26 | 27 | ############################################################ 28 | # Bounding Boxes 29 | ############################################################ 30 | 31 | def extract_bboxes(mask): 32 | """Compute bounding boxes from masks. 33 | mask: [height, width, num_instances]. Mask pixels are either 1 or 0. 34 | 35 | Returns: bbox array [num_instances, (y1, x1, y2, x2)]. 36 | """ 37 | boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32) 38 | for i in range(mask.shape[-1]): 39 | m = mask[:, :, i] 40 | # Bounding box. 41 | horizontal_indicies = np.where(np.any(m, axis=0))[0] 42 | vertical_indicies = np.where(np.any(m, axis=1))[0] 43 | if horizontal_indicies.shape[0]: 44 | x1, x2 = horizontal_indicies[[0, -1]] 45 | y1, y2 = vertical_indicies[[0, -1]] 46 | # x2 and y2 should not be part of the box. Increment by 1. 47 | x2 += 1 48 | y2 += 1 49 | else: 50 | # No mask for this instance. Might happen due to 51 | # resizing or cropping. Set bbox to zeros 52 | x1, x2, y1, y2 = 0, 0, 0, 0 53 | boxes[i] = np.array([y1, x1, y2, x2]) 54 | return boxes.astype(np.int32) 55 | 56 | 57 | def compute_iou(box, boxes, box_area, boxes_area): 58 | """Calculates IoU of the given box with the array of the given boxes. 59 | box: 1D vector [y1, x1, y2, x2] 60 | boxes: [boxes_count, (y1, x1, y2, x2)] 61 | box_area: float. the area of 'box' 62 | boxes_area: array of length boxes_count. 63 | 64 | Note: the areas are passed in rather than calculated here for 65 | efficency. Calculate once in the caller to avoid duplicate work. 66 | """ 67 | # Calculate intersection areas 68 | y1 = np.maximum(box[0], boxes[:, 0]) 69 | y2 = np.minimum(box[2], boxes[:, 2]) 70 | x1 = np.maximum(box[1], boxes[:, 1]) 71 | x2 = np.minimum(box[3], boxes[:, 3]) 72 | intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) 73 | union = box_area + boxes_area[:] - intersection[:] 74 | iou = intersection / union 75 | return iou 76 | 77 | 78 | def compute_overlaps(boxes1, boxes2): 79 | """Computes IoU overlaps between two sets of boxes. 80 | boxes1, boxes2: [N, (y1, x1, y2, x2)]. 81 | 82 | For better performance, pass the largest set first and the smaller second. 83 | """ 84 | # Areas of anchors and GT boxes 85 | area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) 86 | area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) 87 | 88 | # Compute overlaps to generate matrix [boxes1 count, boxes2 count] 89 | # Each cell contains the IoU value. 90 | overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0])) 91 | for i in range(overlaps.shape[1]): 92 | box2 = boxes2[i] 93 | overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1) 94 | return overlaps 95 | 96 | 97 | def non_max_suppression(boxes, scores, threshold): 98 | """Performs non-maximum supression and returns indicies of kept boxes. 99 | boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. 100 | scores: 1-D array of box scores. 101 | threshold: Float. IoU threshold to use for filtering. 102 | """ 103 | assert boxes.shape[0] > 0 104 | if boxes.dtype.kind != "f": 105 | boxes = boxes.astype(np.float32) 106 | 107 | # Compute box areas 108 | y1 = boxes[:, 0] 109 | x1 = boxes[:, 1] 110 | y2 = boxes[:, 2] 111 | x2 = boxes[:, 3] 112 | area = (y2 - y1) * (x2 - x1) 113 | 114 | # Get indicies of boxes sorted by scores (highest first) 115 | ixs = scores.argsort()[::-1] 116 | 117 | pick = [] 118 | while len(ixs) > 0: 119 | # Pick top box and add its index to the list 120 | i = ixs[0] 121 | pick.append(i) 122 | # Compute IoU of the picked box with the rest 123 | iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) 124 | # Identify boxes with IoU over the threshold. This 125 | # returns indicies into ixs[1:], so add 1 to get 126 | # indicies into ixs. 127 | remove_ixs = np.where(iou > threshold)[0] + 1 128 | # Remove indicies of the picked and overlapped boxes. 129 | ixs = np.delete(ixs, remove_ixs) 130 | ixs = np.delete(ixs, 0) 131 | return np.array(pick, dtype=np.int32) 132 | 133 | 134 | def apply_box_deltas(boxes, deltas): 135 | """Applies the given deltas to the given boxes. 136 | boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box. 137 | deltas: [N, (dy, dx, log(dh), log(dw))] 138 | """ 139 | boxes = boxes.astype(np.float32) 140 | # Convert to y, x, h, w 141 | height = boxes[:, 2] - boxes[:, 0] 142 | width = boxes[:, 3] - boxes[:, 1] 143 | center_y = boxes[:, 0] + 0.5 * height 144 | center_x = boxes[:, 1] + 0.5 * width 145 | # Apply deltas 146 | center_y += deltas[:, 0] * height 147 | center_x += deltas[:, 1] * width 148 | height *= np.exp(deltas[:, 2]) 149 | width *= np.exp(deltas[:, 3]) 150 | # Convert back to y1, x1, y2, x2 151 | y1 = center_y - 0.5 * height 152 | x1 = center_x - 0.5 * width 153 | y2 = y1 + height 154 | x2 = x1 + width 155 | return np.stack([y1, x1, y2, x2], axis=1) 156 | 157 | 158 | def box_refinement_graph(box, gt_box): 159 | """Compute refinement needed to transform box to gt_box. 160 | box and gt_box are [N, (y1, x1, y2, x2)] 161 | """ 162 | box = tf.cast(box, tf.float32) 163 | gt_box = tf.cast(gt_box, tf.float32) 164 | 165 | height = box[:, 2] - box[:, 0] 166 | width = box[:, 3] - box[:, 1] 167 | center_y = box[:, 0] + 0.5 * height 168 | center_x = box[:, 1] + 0.5 * width 169 | 170 | gt_height = gt_box[:, 2] - gt_box[:, 0] 171 | gt_width = gt_box[:, 3] - gt_box[:, 1] 172 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 173 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 174 | 175 | dy = (gt_center_y - center_y) / height 176 | dx = (gt_center_x - center_x) / width 177 | dh = tf.log(gt_height / height) 178 | dw = tf.log(gt_width / width) 179 | 180 | result = tf.stack([dy, dx, dh, dw], axis=1) 181 | return result 182 | 183 | 184 | def box_refinement(box, gt_box): 185 | """Compute refinement needed to transform box to gt_box. 186 | box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is 187 | assumed to be outside the box. 188 | """ 189 | box = box.astype(np.float32) 190 | gt_box = gt_box.astype(np.float32) 191 | 192 | height = box[:, 2] - box[:, 0] 193 | width = box[:, 3] - box[:, 1] 194 | center_y = box[:, 0] + 0.5 * height 195 | center_x = box[:, 1] + 0.5 * width 196 | 197 | gt_height = gt_box[:, 2] - gt_box[:, 0] 198 | gt_width = gt_box[:, 3] - gt_box[:, 1] 199 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 200 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 201 | 202 | dy = (gt_center_y - center_y) / height 203 | dx = (gt_center_x - center_x) / width 204 | dh = np.log(gt_height / height) 205 | dw = np.log(gt_width / width) 206 | 207 | return np.stack([dy, dx, dh, dw], axis=1) 208 | 209 | 210 | ############################################################ 211 | # Dataset 212 | ############################################################ 213 | 214 | class Dataset(object): 215 | """The base class for dataset classes. 216 | To use it, create a new class that adds functions specific to the dataset 217 | you want to use. For example: 218 | 219 | class CatsAndDogsDataset(Dataset): 220 | def load_cats_and_dogs(self): 221 | ... 222 | def load_mask(self, image_id): 223 | ... 224 | def image_reference(self, image_id): 225 | ... 226 | 227 | See COCODataset and ShapesDataset as examples. 228 | """ 229 | 230 | def __init__(self, class_map=None): 231 | self._image_ids = [] 232 | self.image_info = [] 233 | # Background is always the first class 234 | self.class_info = [{"source": "", "id": 0, "name": "BG"}] 235 | self.source_class_ids = {} 236 | 237 | def add_class(self, source, class_id, class_name): 238 | assert "." not in source, "Source name cannot contain a dot" 239 | # Does the class exist already? 240 | for info in self.class_info: 241 | if info['source'] == source and info["id"] == class_id: 242 | # source.class_id combination already available, skip 243 | return 244 | # Add the class 245 | self.class_info.append({ 246 | "source": source, 247 | "id": class_id, 248 | "name": class_name, 249 | }) 250 | 251 | def add_image(self, source, image_id, path, **kwargs): 252 | image_info = { 253 | "id": image_id, 254 | "source": source, 255 | "path": path, 256 | } 257 | image_info.update(kwargs) 258 | self.image_info.append(image_info) 259 | 260 | def image_reference(self, image_id): 261 | """Return a link to the image in its source Website or details about 262 | the image that help looking it up or debugging it. 263 | 264 | Override for your dataset, but pass to this function 265 | if you encounter images not in your dataset. 266 | """ 267 | return "" 268 | 269 | def prepare(self, class_map=None): 270 | """Prepares the Dataset class for use. 271 | 272 | TODO: class map is not supported yet. When done, it should handle mapping 273 | classes from different datasets to the same class ID. 274 | """ 275 | def clean_name(name): 276 | """Returns a shorter version of object names for cleaner display.""" 277 | return ",".join(name.split(",")[:1]) 278 | 279 | # Build (or rebuild) everything else from the info dicts. 280 | self.num_classes = len(self.class_info) 281 | self.class_ids = np.arange(self.num_classes) 282 | self.class_names = [clean_name(c["name"]) for c in self.class_info] 283 | self.num_images = len(self.image_info) 284 | self._image_ids = np.arange(self.num_images) 285 | 286 | self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id 287 | for info, id in zip(self.class_info, self.class_ids)} 288 | 289 | # Map sources to class_ids they support 290 | self.sources = list(set([i['source'] for i in self.class_info])) 291 | self.source_class_ids = {} 292 | # Loop over datasets 293 | for source in self.sources: 294 | self.source_class_ids[source] = [] 295 | # Find classes that belong to this dataset 296 | for i, info in enumerate(self.class_info): 297 | # Include BG class in all datasets 298 | if i == 0 or source == info['source']: 299 | self.source_class_ids[source].append(i) 300 | 301 | def map_source_class_id(self, source_class_id): 302 | """Takes a source class ID and returns the int class ID assigned to it. 303 | 304 | For example: 305 | dataset.map_source_class_id("coco.12") -> 23 306 | """ 307 | return self.class_from_source_map[source_class_id] 308 | 309 | def get_source_class_id(self, class_id, source): 310 | """Map an internal class ID to the corresponding class ID in the source dataset.""" 311 | info = self.class_info[class_id] 312 | assert info['source'] == source 313 | return info['id'] 314 | 315 | def append_data(self, class_info, image_info): 316 | self.external_to_class_id = {} 317 | for i, c in enumerate(self.class_info): 318 | for ds, id in c["map"]: 319 | self.external_to_class_id[ds + str(id)] = i 320 | 321 | # Map external image IDs to internal ones. 322 | self.external_to_image_id = {} 323 | for i, info in enumerate(self.image_info): 324 | self.external_to_image_id[info["ds"] + str(info["id"])] = i 325 | 326 | @property 327 | def image_ids(self): 328 | return self._image_ids 329 | 330 | def source_image_link(self, image_id): 331 | """Returns the path or URL to the image. 332 | Override this to return a URL to the image if it's availble online for easy 333 | debugging. 334 | """ 335 | return self.image_info[image_id]["path"] 336 | 337 | def load_image(self, image_id): 338 | """Load the specified image and return a [H,W,3] Numpy array. 339 | """ 340 | # Load image 341 | image = skimage.io.imread(self.image_info[image_id]['path']) 342 | # If grayscale. Convert to RGB for consistency. 343 | if image.ndim != 3: 344 | image = skimage.color.gray2rgb(image) 345 | return image 346 | 347 | def load_mask(self, image_id): 348 | """Load instance masks for the given image. 349 | 350 | Different datasets use different ways to store masks. Override this 351 | method to load instance masks and return them in the form of am 352 | array of binary masks of shape [height, width, instances]. 353 | 354 | Returns: 355 | masks: A bool array of shape [height, width, instance count] with 356 | a binary mask per instance. 357 | class_ids: a 1D array of class IDs of the instance masks. 358 | """ 359 | # Override this function to load a mask from your dataset. 360 | # Otherwise, it returns an empty mask. 361 | mask = np.empty([0, 0, 0]) 362 | class_ids = np.empty([0], np.int32) 363 | return mask, class_ids 364 | 365 | 366 | def resize_image(image, min_dim=None, max_dim=None, padding=False): 367 | """ 368 | Resizes an image keeping the aspect ratio. 369 | 370 | min_dim: if provided, resizes the image such that it's smaller 371 | dimension == min_dim 372 | max_dim: if provided, ensures that the image longest side doesn't 373 | exceed this value. 374 | padding: If true, pads image with zeros so it's size is max_dim x max_dim 375 | 376 | Returns: 377 | image: the resized image 378 | window: (y1, x1, y2, x2). If max_dim is provided, padding might 379 | be inserted in the returned image. If so, this window is the 380 | coordinates of the image part of the full image (excluding 381 | the padding). The x2, y2 pixels are not included. 382 | scale: The scale factor used to resize the image 383 | padding: Padding added to the image [(top, bottom), (left, right), (0, 0)] 384 | """ 385 | # Default window (y1, x1, y2, x2) and default scale == 1. 386 | h, w = image.shape[:2] 387 | window = (0, 0, h, w) 388 | scale = 1 389 | 390 | # Scale? 391 | if min_dim: 392 | # Scale up but not down 393 | scale = max(1, min_dim / min(h, w)) 394 | # Does it exceed max dim? 395 | if max_dim: 396 | image_max = max(h, w) 397 | if round(image_max * scale) > max_dim: 398 | scale = max_dim / image_max 399 | # Resize image and mask 400 | if scale != 1: 401 | image = scipy.misc.imresize( 402 | image, (round(h * scale), round(w * scale))) 403 | # Need padding? 404 | if padding: 405 | # Get new height and width 406 | h, w = image.shape[:2] 407 | top_pad = (max_dim - h) // 2 408 | bottom_pad = max_dim - h - top_pad 409 | left_pad = (max_dim - w) // 2 410 | right_pad = max_dim - w - left_pad 411 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 412 | image = np.pad(image, padding, mode='constant', constant_values=0) 413 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 414 | return image, window, scale, padding 415 | 416 | 417 | def resize_mask(mask, scale, padding): 418 | """Resizes a mask using the given scale and padding. 419 | Typically, you get the scale and padding from resize_image() to 420 | ensure both, the image and the mask, are resized consistently. 421 | 422 | scale: mask scaling factor 423 | padding: Padding to add to the mask in the form 424 | [(top, bottom), (left, right), (0, 0)] 425 | """ 426 | h, w = mask.shape[:2] 427 | mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0) 428 | mask = np.pad(mask, padding, mode='constant', constant_values=0) 429 | return mask 430 | 431 | 432 | def minimize_mask(bbox, mask, mini_shape): 433 | """Resize masks to a smaller version to cut memory load. 434 | Mini-masks can then resized back to image scale using expand_masks() 435 | 436 | See inspect_data.ipynb notebook for more details. 437 | """ 438 | mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool) 439 | for i in range(mask.shape[-1]): 440 | m = mask[:, :, i] 441 | y1, x1, y2, x2 = bbox[i][:4] 442 | m = m[y1:y2, x1:x2] 443 | if m.size == 0: 444 | raise Exception("Invalid bounding box with area of zero") 445 | m = scipy.misc.imresize(m.astype(float), mini_shape, interp='bilinear') 446 | mini_mask[:, :, i] = np.where(m >= 128, 1, 0) 447 | return mini_mask 448 | 449 | 450 | def expand_mask(bbox, mini_mask, image_shape): 451 | """Resizes mini masks back to image size. Reverses the change 452 | of minimize_mask(). 453 | 454 | See inspect_data.ipynb notebook for more details. 455 | """ 456 | mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool) 457 | for i in range(mask.shape[-1]): 458 | m = mini_mask[:, :, i] 459 | y1, x1, y2, x2 = bbox[i][:4] 460 | h = y2 - y1 461 | w = x2 - x1 462 | m = scipy.misc.imresize(m.astype(float), (h, w), interp='bilinear') 463 | mask[y1:y2, x1:x2, i] = np.where(m >= 128, 1, 0) 464 | return mask 465 | 466 | 467 | # TODO: Build and use this function to reduce code duplication 468 | def mold_mask(mask, config): 469 | pass 470 | 471 | 472 | def unmold_mask(mask, bbox, image_shape): 473 | """Converts a mask generated by the neural network into a format similar 474 | to it's original shape. 475 | mask: [height, width] of type float. A small, typically 28x28 mask. 476 | bbox: [y1, x1, y2, x2]. The box to fit the mask in. 477 | 478 | Returns a binary mask with the same size as the original image. 479 | """ 480 | threshold = 0.5 481 | y1, x1, y2, x2 = bbox 482 | mask = scipy.misc.imresize( 483 | mask, (y2 - y1, x2 - x1), interp='bilinear').astype(np.float32) / 255.0 484 | mask = np.where(mask >= threshold, 1, 0).astype(np.uint8) 485 | 486 | # Put the mask in the right location. 487 | full_mask = np.zeros(image_shape[:2], dtype=np.uint8) 488 | full_mask[y1:y2, x1:x2] = mask 489 | return full_mask 490 | 491 | 492 | ############################################################ 493 | # Anchors 494 | ############################################################ 495 | 496 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): 497 | """ 498 | scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] 499 | ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] 500 | shape: [height, width] spatial shape of the feature map over which 501 | to generate anchors. 502 | feature_stride: Stride of the feature map relative to the image in pixels. 503 | anchor_stride: Stride of anchors on the feature map. For example, if the 504 | value is 2 then generate anchors for every other feature map pixel. 505 | """ 506 | # Get all combinations of scales and ratios 507 | scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) 508 | scales = scales.flatten() 509 | ratios = ratios.flatten() 510 | 511 | # Enumerate heights and widths from scales and ratios 512 | heights = scales / np.sqrt(ratios) 513 | widths = scales * np.sqrt(ratios) 514 | 515 | # Enumerate shifts in feature space 516 | shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride 517 | shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride 518 | shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) 519 | 520 | # Enumerate combinations of shifts, widths, and heights 521 | box_widths, box_centers_x = np.meshgrid(widths, shifts_x) 522 | box_heights, box_centers_y = np.meshgrid(heights, shifts_y) 523 | 524 | # Reshape to get a list of (y, x) and a list of (h, w) 525 | box_centers = np.stack( 526 | [box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) 527 | box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) 528 | 529 | # Convert to corner coordinates (y1, x1, y2, x2) 530 | boxes = np.concatenate([box_centers - 0.5 * box_sizes, 531 | box_centers + 0.5 * box_sizes], axis=1) 532 | return boxes 533 | 534 | 535 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, 536 | anchor_stride): 537 | """Generate anchors at different levels of a feature pyramid. Each scale 538 | is associated with a level of the pyramid, but each ratio is used in 539 | all levels of the pyramid. 540 | 541 | Returns: 542 | anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted 543 | with the same order of the given scales. So, anchors of scale[0] come 544 | first, then anchors of scale[1], and so on. 545 | """ 546 | # Anchors 547 | # [anchor_count, (y1, x1, y2, x2)] 548 | anchors = [] 549 | for i in range(len(scales)): 550 | anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], 551 | feature_strides[i], anchor_stride)) 552 | return np.concatenate(anchors, axis=0) 553 | 554 | 555 | ############################################################ 556 | # Miscellaneous 557 | ############################################################ 558 | 559 | def trim_zeros(x): 560 | """It's common to have tensors larger than the available data and 561 | pad with zeros. This function removes rows that are all zeros. 562 | 563 | x: [rows, columns]. 564 | """ 565 | assert len(x.shape) == 2 566 | return x[~np.all(x == 0, axis=1)] 567 | 568 | 569 | def compute_ap(gt_boxes, gt_class_ids, 570 | pred_boxes, pred_class_ids, pred_scores, 571 | iou_threshold=0.5): 572 | """Compute Average Precision at a set IoU threshold (default 0.5). 573 | 574 | Returns: 575 | mAP: Mean Average Precision 576 | precisions: List of precisions at different class score thresholds. 577 | recalls: List of recall values at different class score thresholds. 578 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 579 | """ 580 | # Trim zero padding and sort predictions by score from high to low 581 | # TODO: cleaner to do zero unpadding upstream 582 | gt_boxes = trim_zeros(gt_boxes) 583 | pred_boxes = trim_zeros(pred_boxes) 584 | pred_scores = pred_scores[:pred_boxes.shape[0]] 585 | indices = np.argsort(pred_scores)[::-1] 586 | pred_boxes = pred_boxes[indices] 587 | pred_class_ids = pred_class_ids[indices] 588 | pred_scores = pred_scores[indices] 589 | 590 | # Compute IoU overlaps [pred_boxes, gt_boxes] 591 | overlaps = compute_overlaps(pred_boxes, gt_boxes) 592 | 593 | # Loop through ground truth boxes and find matching predictions 594 | match_count = 0 595 | pred_match = np.zeros([pred_boxes.shape[0]]) 596 | gt_match = np.zeros([gt_boxes.shape[0]]) 597 | for i in range(len(pred_boxes)): 598 | # Find best matching ground truth box 599 | sorted_ixs = np.argsort(overlaps[i])[::-1] 600 | for j in sorted_ixs: 601 | # If ground truth box is already matched, go to next one 602 | if gt_match[j] == 1: 603 | continue 604 | # If we reach IoU smaller than the threshold, end the loop 605 | iou = overlaps[i, j] 606 | if iou < iou_threshold: 607 | break 608 | # Do we have a match? 609 | if pred_class_ids[i] == gt_class_ids[j]: 610 | match_count += 1 611 | gt_match[j] = 1 612 | pred_match[i] = 1 613 | break 614 | 615 | # Compute precision and recall at each prediction box step 616 | precisions = np.cumsum(pred_match) / (np.arange(len(pred_match)) + 1) 617 | recalls = np.cumsum(pred_match).astype(np.float32) / len(gt_match) 618 | 619 | # Pad with start and end values to simplify the math 620 | precisions = np.concatenate([[0], precisions, [0]]) 621 | recalls = np.concatenate([[0], recalls, [1]]) 622 | 623 | # Ensure precision values decrease but don't increase. This way, the 624 | # precision value at each recall threshold is the maximum it can be 625 | # for all following recall thresholds, as specified by the VOC paper. 626 | for i in range(len(precisions) - 2, -1, -1): 627 | precisions[i] = np.maximum(precisions[i], precisions[i + 1]) 628 | 629 | # Compute mean AP over recall range 630 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 631 | mAP = np.sum((recalls[indices] - recalls[indices - 1]) * 632 | precisions[indices]) 633 | 634 | return mAP, precisions, recalls, overlaps 635 | 636 | 637 | def compute_recall(pred_boxes, gt_boxes, iou): 638 | """Compute the recall at the given IoU threshold. It's an indication 639 | of how many GT boxes were found by the given prediction boxes. 640 | 641 | pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates 642 | gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates 643 | """ 644 | # Measure overlaps 645 | overlaps = compute_overlaps(pred_boxes, gt_boxes) 646 | iou_max = np.max(overlaps, axis=1) 647 | iou_argmax = np.argmax(overlaps, axis=1) 648 | positive_ids = np.where(iou_max >= iou)[0] 649 | matched_gt_boxes = iou_argmax[positive_ids] 650 | 651 | recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] 652 | return recall, positive_ids 653 | 654 | 655 | # ## Batch Slicing 656 | # Some custom layers support a batch size of 1 only, and require a lot of work 657 | # to support batches greater than 1. This function slices an input tensor 658 | # across the batch dimension and feeds batches of size 1. Effectively, 659 | # an easy way to support batches > 1 quickly with little code modification. 660 | # In the long run, it's more efficient to modify the code to support large 661 | # batches and getting rid of this function. Consider this a temporary solution 662 | def batch_slice(inputs, graph_fn, batch_size, names=None): 663 | """Splits inputs into slices and feeds each slice to a copy of the given 664 | computation graph and then combines the results. It allows you to run a 665 | graph on a batch of inputs even if the graph is written to support one 666 | instance only. 667 | 668 | inputs: list of tensors. All must have the same first dimension length 669 | graph_fn: A function that returns a TF tensor that's part of a graph. 670 | batch_size: number of slices to divide the data into. 671 | names: If provided, assigns names to the resulting tensors. 672 | """ 673 | if not isinstance(inputs, list): 674 | inputs = [inputs] 675 | 676 | outputs = [] 677 | for i in range(batch_size): 678 | inputs_slice = [x[i] for x in inputs] 679 | output_slice = graph_fn(*inputs_slice) 680 | if not isinstance(output_slice, (tuple, list)): 681 | output_slice = [output_slice] 682 | outputs.append(output_slice) 683 | # Change outputs from a list of slices where each is 684 | # a list of outputs to a list of outputs and each has 685 | # a list of slices 686 | outputs = list(zip(*outputs)) 687 | 688 | if names is None: 689 | names = [None] * len(outputs) 690 | 691 | result = [tf.stack(o, axis=0, name=n) 692 | for o, n in zip(outputs, names)] 693 | if len(result) == 1: 694 | result = result[0] 695 | 696 | return result 697 | 698 | 699 | def download_trained_weights(coco_model_path, verbose=1): 700 | """Download COCO trained weights from Releases. 701 | 702 | coco_model_path: local path of COCO trained weights 703 | """ 704 | if verbose > 0: 705 | print("Downloading pretrained model to " + coco_model_path + " ...") 706 | with contextlib.closing(request.urlopen(COCO_MODEL_URL)) as resp, open(coco_model_path, 'wb') as out: 707 | shutil.copyfileobj(resp, out) 708 | if verbose > 0: 709 | print("... done downloading pretrained model!") 710 | -------------------------------------------------------------------------------- /src/mask_rcnn_ros/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Common utility functions and classes. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import sys 11 | import os 12 | import math 13 | import random 14 | import numpy as np 15 | import tensorflow as tf 16 | import scipy.misc 17 | import skimage.color 18 | import skimage.io 19 | import urllib.request 20 | import shutil 21 | 22 | # URL from which to download the latest COCO trained weights 23 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5" 24 | 25 | 26 | ############################################################ 27 | # Bounding Boxes 28 | ############################################################ 29 | 30 | def extract_bboxes(mask): 31 | """Compute bounding boxes from masks. 32 | mask: [height, width, num_instances]. Mask pixels are either 1 or 0. 33 | 34 | Returns: bbox array [num_instances, (y1, x1, y2, x2)]. 35 | """ 36 | boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32) 37 | for i in range(mask.shape[-1]): 38 | m = mask[:, :, i] 39 | # Bounding box. 40 | horizontal_indicies = np.where(np.any(m, axis=0))[0] 41 | vertical_indicies = np.where(np.any(m, axis=1))[0] 42 | if horizontal_indicies.shape[0]: 43 | x1, x2 = horizontal_indicies[[0, -1]] 44 | y1, y2 = vertical_indicies[[0, -1]] 45 | # x2 and y2 should not be part of the box. Increment by 1. 46 | x2 += 1 47 | y2 += 1 48 | else: 49 | # No mask for this instance. Might happen due to 50 | # resizing or cropping. Set bbox to zeros 51 | x1, x2, y1, y2 = 0, 0, 0, 0 52 | boxes[i] = np.array([y1, x1, y2, x2]) 53 | return boxes.astype(np.int32) 54 | 55 | 56 | def compute_iou(box, boxes, box_area, boxes_area): 57 | """Calculates IoU of the given box with the array of the given boxes. 58 | box: 1D vector [y1, x1, y2, x2] 59 | boxes: [boxes_count, (y1, x1, y2, x2)] 60 | box_area: float. the area of 'box' 61 | boxes_area: array of length boxes_count. 62 | 63 | Note: the areas are passed in rather than calculated here for 64 | efficency. Calculate once in the caller to avoid duplicate work. 65 | """ 66 | # Calculate intersection areas 67 | y1 = np.maximum(box[0], boxes[:, 0]) 68 | y2 = np.minimum(box[2], boxes[:, 2]) 69 | x1 = np.maximum(box[1], boxes[:, 1]) 70 | x2 = np.minimum(box[3], boxes[:, 3]) 71 | intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) 72 | union = box_area + boxes_area[:] - intersection[:] 73 | iou = intersection / union 74 | return iou 75 | 76 | 77 | def compute_overlaps(boxes1, boxes2): 78 | """Computes IoU overlaps between two sets of boxes. 79 | boxes1, boxes2: [N, (y1, x1, y2, x2)]. 80 | 81 | For better performance, pass the largest set first and the smaller second. 82 | """ 83 | # Areas of anchors and GT boxes 84 | area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) 85 | area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) 86 | 87 | # Compute overlaps to generate matrix [boxes1 count, boxes2 count] 88 | # Each cell contains the IoU value. 89 | overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0])) 90 | for i in range(overlaps.shape[1]): 91 | box2 = boxes2[i] 92 | overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1) 93 | return overlaps 94 | 95 | 96 | def compute_overlaps_masks(masks1, masks2): 97 | '''Computes IoU overlaps between two sets of masks. 98 | masks1, masks2: [Height, Width, instances] 99 | ''' 100 | # flatten masks 101 | masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32) 102 | masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32) 103 | area1 = np.sum(masks1, axis=0) 104 | area2 = np.sum(masks2, axis=0) 105 | 106 | # intersections and union 107 | intersections = np.dot(masks1.T, masks2) 108 | union = area1[:, None] + area2[None, :] - intersections 109 | overlaps = intersections / union 110 | 111 | return overlaps 112 | 113 | 114 | def non_max_suppression(boxes, scores, threshold): 115 | """Performs non-maximum supression and returns indicies of kept boxes. 116 | boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. 117 | scores: 1-D array of box scores. 118 | threshold: Float. IoU threshold to use for filtering. 119 | """ 120 | assert boxes.shape[0] > 0 121 | if boxes.dtype.kind != "f": 122 | boxes = boxes.astype(np.float32) 123 | 124 | # Compute box areas 125 | y1 = boxes[:, 0] 126 | x1 = boxes[:, 1] 127 | y2 = boxes[:, 2] 128 | x2 = boxes[:, 3] 129 | area = (y2 - y1) * (x2 - x1) 130 | 131 | # Get indicies of boxes sorted by scores (highest first) 132 | ixs = scores.argsort()[::-1] 133 | 134 | pick = [] 135 | while len(ixs) > 0: 136 | # Pick top box and add its index to the list 137 | i = ixs[0] 138 | pick.append(i) 139 | # Compute IoU of the picked box with the rest 140 | iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) 141 | # Identify boxes with IoU over the threshold. This 142 | # returns indicies into ixs[1:], so add 1 to get 143 | # indicies into ixs. 144 | remove_ixs = np.where(iou > threshold)[0] + 1 145 | # Remove indicies of the picked and overlapped boxes. 146 | ixs = np.delete(ixs, remove_ixs) 147 | ixs = np.delete(ixs, 0) 148 | return np.array(pick, dtype=np.int32) 149 | 150 | 151 | def apply_box_deltas(boxes, deltas): 152 | """Applies the given deltas to the given boxes. 153 | boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box. 154 | deltas: [N, (dy, dx, log(dh), log(dw))] 155 | """ 156 | boxes = boxes.astype(np.float32) 157 | # Convert to y, x, h, w 158 | height = boxes[:, 2] - boxes[:, 0] 159 | width = boxes[:, 3] - boxes[:, 1] 160 | center_y = boxes[:, 0] + 0.5 * height 161 | center_x = boxes[:, 1] + 0.5 * width 162 | # Apply deltas 163 | center_y += deltas[:, 0] * height 164 | center_x += deltas[:, 1] * width 165 | height *= np.exp(deltas[:, 2]) 166 | width *= np.exp(deltas[:, 3]) 167 | # Convert back to y1, x1, y2, x2 168 | y1 = center_y - 0.5 * height 169 | x1 = center_x - 0.5 * width 170 | y2 = y1 + height 171 | x2 = x1 + width 172 | return np.stack([y1, x1, y2, x2], axis=1) 173 | 174 | 175 | def box_refinement_graph(box, gt_box): 176 | """Compute refinement needed to transform box to gt_box. 177 | box and gt_box are [N, (y1, x1, y2, x2)] 178 | """ 179 | box = tf.cast(box, tf.float32) 180 | gt_box = tf.cast(gt_box, tf.float32) 181 | 182 | height = box[:, 2] - box[:, 0] 183 | width = box[:, 3] - box[:, 1] 184 | center_y = box[:, 0] + 0.5 * height 185 | center_x = box[:, 1] + 0.5 * width 186 | 187 | gt_height = gt_box[:, 2] - gt_box[:, 0] 188 | gt_width = gt_box[:, 3] - gt_box[:, 1] 189 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 190 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 191 | 192 | dy = (gt_center_y - center_y) / height 193 | dx = (gt_center_x - center_x) / width 194 | dh = tf.log(gt_height / height) 195 | dw = tf.log(gt_width / width) 196 | 197 | result = tf.stack([dy, dx, dh, dw], axis=1) 198 | return result 199 | 200 | 201 | def box_refinement(box, gt_box): 202 | """Compute refinement needed to transform box to gt_box. 203 | box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is 204 | assumed to be outside the box. 205 | """ 206 | box = box.astype(np.float32) 207 | gt_box = gt_box.astype(np.float32) 208 | 209 | height = box[:, 2] - box[:, 0] 210 | width = box[:, 3] - box[:, 1] 211 | center_y = box[:, 0] + 0.5 * height 212 | center_x = box[:, 1] + 0.5 * width 213 | 214 | gt_height = gt_box[:, 2] - gt_box[:, 0] 215 | gt_width = gt_box[:, 3] - gt_box[:, 1] 216 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 217 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 218 | 219 | dy = (gt_center_y - center_y) / height 220 | dx = (gt_center_x - center_x) / width 221 | dh = np.log(gt_height / height) 222 | dw = np.log(gt_width / width) 223 | 224 | return np.stack([dy, dx, dh, dw], axis=1) 225 | 226 | 227 | ############################################################ 228 | # Dataset 229 | ############################################################ 230 | 231 | class Dataset(object): 232 | """The base class for dataset classes. 233 | To use it, create a new class that adds functions specific to the dataset 234 | you want to use. For example: 235 | 236 | class CatsAndDogsDataset(Dataset): 237 | def load_cats_and_dogs(self): 238 | ... 239 | def load_mask(self, image_id): 240 | ... 241 | def image_reference(self, image_id): 242 | ... 243 | 244 | See COCODataset and ShapesDataset as examples. 245 | """ 246 | 247 | def __init__(self, class_map=None): 248 | self._image_ids = [] 249 | self.image_info = [] 250 | # Background is always the first class 251 | self.class_info = [{"source": "", "id": 0, "name": "BG"}] 252 | self.source_class_ids = {} 253 | 254 | def add_class(self, source, class_id, class_name): 255 | assert "." not in source, "Source name cannot contain a dot" 256 | # Does the class exist already? 257 | for info in self.class_info: 258 | if info['source'] == source and info["id"] == class_id: 259 | # source.class_id combination already available, skip 260 | return 261 | # Add the class 262 | self.class_info.append({ 263 | "source": source, 264 | "id": class_id, 265 | "name": class_name, 266 | }) 267 | 268 | def add_image(self, source, image_id, path, **kwargs): 269 | image_info = { 270 | "id": image_id, 271 | "source": source, 272 | "path": path, 273 | } 274 | image_info.update(kwargs) 275 | self.image_info.append(image_info) 276 | 277 | def image_reference(self, image_id): 278 | """Return a link to the image in its source Website or details about 279 | the image that help looking it up or debugging it. 280 | 281 | Override for your dataset, but pass to this function 282 | if you encounter images not in your dataset. 283 | """ 284 | return "" 285 | 286 | def prepare(self, class_map=None): 287 | """Prepares the Dataset class for use. 288 | 289 | TODO: class map is not supported yet. When done, it should handle mapping 290 | classes from different datasets to the same class ID. 291 | """ 292 | 293 | def clean_name(name): 294 | """Returns a shorter version of object names for cleaner display.""" 295 | return ",".join(name.split(",")[:1]) 296 | 297 | # Build (or rebuild) everything else from the info dicts. 298 | self.num_classes = len(self.class_info) 299 | self.class_ids = np.arange(self.num_classes) 300 | self.class_names = [clean_name(c["name"]) for c in self.class_info] 301 | self.num_images = len(self.image_info) 302 | self._image_ids = np.arange(self.num_images) 303 | 304 | self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id 305 | for info, id in zip(self.class_info, self.class_ids)} 306 | 307 | # Map sources to class_ids they support 308 | self.sources = list(set([i['source'] for i in self.class_info])) 309 | self.source_class_ids = {} 310 | # Loop over datasets 311 | for source in self.sources: 312 | self.source_class_ids[source] = [] 313 | # Find classes that belong to this dataset 314 | for i, info in enumerate(self.class_info): 315 | # Include BG class in all datasets 316 | if i == 0 or source == info['source']: 317 | self.source_class_ids[source].append(i) 318 | 319 | def map_source_class_id(self, source_class_id): 320 | """Takes a source class ID and returns the int class ID assigned to it. 321 | 322 | For example: 323 | dataset.map_source_class_id("coco.12") -> 23 324 | """ 325 | return self.class_from_source_map[source_class_id] 326 | 327 | def get_source_class_id(self, class_id, source): 328 | """Map an internal class ID to the corresponding class ID in the source dataset.""" 329 | info = self.class_info[class_id] 330 | assert info['source'] == source 331 | return info['id'] 332 | 333 | def append_data(self, class_info, image_info): 334 | self.external_to_class_id = {} 335 | for i, c in enumerate(self.class_info): 336 | for ds, id in c["map"]: 337 | self.external_to_class_id[ds + str(id)] = i 338 | 339 | # Map external image IDs to internal ones. 340 | self.external_to_image_id = {} 341 | for i, info in enumerate(self.image_info): 342 | self.external_to_image_id[info["ds"] + str(info["id"])] = i 343 | 344 | @property 345 | def image_ids(self): 346 | return self._image_ids 347 | 348 | def source_image_link(self, image_id): 349 | """Returns the path or URL to the image. 350 | Override this to return a URL to the image if it's availble online for easy 351 | debugging. 352 | """ 353 | return self.image_info[image_id]["path"] 354 | 355 | def load_image(self, image_id): 356 | """Load the specified image and return a [H,W,3] Numpy array. 357 | """ 358 | # Load image 359 | image = skimage.io.imread(self.image_info[image_id]['path']) 360 | # If grayscale. Convert to RGB for consistency. 361 | if image.ndim != 3: 362 | image = skimage.color.gray2rgb(image) 363 | return image 364 | 365 | def load_mask(self, image_id): 366 | """Load instance masks for the given image. 367 | 368 | Different datasets use different ways to store masks. Override this 369 | method to load instance masks and return them in the form of am 370 | array of binary masks of shape [height, width, instances]. 371 | 372 | Returns: 373 | masks: A bool array of shape [height, width, instance count] with 374 | a binary mask per instance. 375 | class_ids: a 1D array of class IDs of the instance masks. 376 | """ 377 | # Override this function to load a mask from your dataset. 378 | # Otherwise, it returns an empty mask. 379 | mask = np.empty([0, 0, 0]) 380 | class_ids = np.empty([0], np.int32) 381 | return mask, class_ids 382 | 383 | 384 | def resize_image(image, min_dim=None, max_dim=None, padding=False): 385 | """ 386 | Resizes an image keeping the aspect ratio. 387 | 388 | min_dim: if provided, resizes the image such that it's smaller 389 | dimension == min_dim 390 | max_dim: if provided, ensures that the image longest side doesn't 391 | exceed this value. 392 | padding: If true, pads image with zeros so it's size is max_dim x max_dim 393 | 394 | Returns: 395 | image: the resized image 396 | window: (y1, x1, y2, x2). If max_dim is provided, padding might 397 | be inserted in the returned image. If so, this window is the 398 | coordinates of the image part of the full image (excluding 399 | the padding). The x2, y2 pixels are not included. 400 | scale: The scale factor used to resize the image 401 | padding: Padding added to the image [(top, bottom), (left, right), (0, 0)] 402 | """ 403 | # Default window (y1, x1, y2, x2) and default scale == 1. 404 | h, w = image.shape[:2] 405 | window = (0, 0, h, w) 406 | scale = 1 407 | 408 | # Scale? 409 | if min_dim: 410 | # Scale up but not down 411 | scale = max(1, min_dim / min(h, w)) 412 | # Does it exceed max dim? 413 | if max_dim: 414 | image_max = max(h, w) 415 | if round(image_max * scale) > max_dim: 416 | scale = max_dim / image_max 417 | # Resize image and mask 418 | if scale != 1: 419 | image = scipy.misc.imresize( 420 | image, (round(h * scale), round(w * scale))) 421 | # Need padding? 422 | if padding: 423 | # Get new height and width 424 | h, w = image.shape[:2] 425 | top_pad = (max_dim - h) // 2 426 | bottom_pad = max_dim - h - top_pad 427 | left_pad = (max_dim - w) // 2 428 | right_pad = max_dim - w - left_pad 429 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 430 | image = np.pad(image, padding, mode='constant', constant_values=0) 431 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 432 | return image, window, scale, padding 433 | 434 | 435 | def resize_mask(mask, scale, padding): 436 | """Resizes a mask using the given scale and padding. 437 | Typically, you get the scale and padding from resize_image() to 438 | ensure both, the image and the mask, are resized consistently. 439 | 440 | scale: mask scaling factor 441 | padding: Padding to add to the mask in the form 442 | [(top, bottom), (left, right), (0, 0)] 443 | """ 444 | h, w = mask.shape[:2] 445 | mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0) 446 | mask = np.pad(mask, padding, mode='constant', constant_values=0) 447 | return mask 448 | 449 | 450 | def minimize_mask(bbox, mask, mini_shape): 451 | """Resize masks to a smaller version to cut memory load. 452 | Mini-masks can then resized back to image scale using expand_masks() 453 | 454 | See inspect_data.ipynb notebook for more details. 455 | """ 456 | mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool) 457 | for i in range(mask.shape[-1]): 458 | m = mask[:, :, i] 459 | y1, x1, y2, x2 = bbox[i][:4] 460 | m = m[y1:y2, x1:x2] 461 | if m.size == 0: 462 | raise Exception("Invalid bounding box with area of zero") 463 | m = scipy.misc.imresize(m.astype(float), mini_shape, interp='bilinear') 464 | mini_mask[:, :, i] = np.where(m >= 128, 1, 0) 465 | return mini_mask 466 | 467 | 468 | def expand_mask(bbox, mini_mask, image_shape): 469 | """Resizes mini masks back to image size. Reverses the change 470 | of minimize_mask(). 471 | 472 | See inspect_data.ipynb notebook for more details. 473 | """ 474 | mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool) 475 | for i in range(mask.shape[-1]): 476 | m = mini_mask[:, :, i] 477 | y1, x1, y2, x2 = bbox[i][:4] 478 | h = y2 - y1 479 | w = x2 - x1 480 | m = scipy.misc.imresize(m.astype(float), (h, w), interp='bilinear') 481 | mask[y1:y2, x1:x2, i] = np.where(m >= 128, 1, 0) 482 | return mask 483 | 484 | 485 | # TODO: Build and use this function to reduce code duplication 486 | def mold_mask(mask, config): 487 | pass 488 | 489 | 490 | def unmold_mask(mask, bbox, image_shape): 491 | """Converts a mask generated by the neural network into a format similar 492 | to it's original shape. 493 | mask: [height, width] of type float. A small, typically 28x28 mask. 494 | bbox: [y1, x1, y2, x2]. The box to fit the mask in. 495 | 496 | Returns a binary mask with the same size as the original image. 497 | """ 498 | threshold = 0.5 499 | y1, x1, y2, x2 = bbox 500 | mask = scipy.misc.imresize( 501 | mask, (y2 - y1, x2 - x1), interp='bilinear').astype(np.float32) / 255.0 502 | mask = np.where(mask >= threshold, 1, 0).astype(np.uint8) 503 | 504 | # Put the mask in the right location. 505 | full_mask = np.zeros(image_shape[:2], dtype=np.uint8) 506 | full_mask[y1:y2, x1:x2] = mask 507 | return full_mask 508 | 509 | 510 | ############################################################ 511 | # Anchors 512 | ############################################################ 513 | 514 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): 515 | """ 516 | scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] 517 | ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] 518 | shape: [height, width] spatial shape of the feature map over which 519 | to generate anchors. 520 | feature_stride: Stride of the feature map relative to the image in pixels. 521 | anchor_stride: Stride of anchors on the feature map. For example, if the 522 | value is 2 then generate anchors for every other feature map pixel. 523 | """ 524 | # Get all combinations of scales and ratios 525 | scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) 526 | scales = scales.flatten() 527 | ratios = ratios.flatten() 528 | 529 | # Enumerate heights and widths from scales and ratios 530 | heights = scales / np.sqrt(ratios) 531 | widths = scales * np.sqrt(ratios) 532 | 533 | # Enumerate shifts in feature space 534 | shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride 535 | shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride 536 | shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) 537 | 538 | # Enumerate combinations of shifts, widths, and heights 539 | box_widths, box_centers_x = np.meshgrid(widths, shifts_x) 540 | box_heights, box_centers_y = np.meshgrid(heights, shifts_y) 541 | 542 | # Reshape to get a list of (y, x) and a list of (h, w) 543 | box_centers = np.stack( 544 | [box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) 545 | box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) 546 | 547 | # Convert to corner coordinates (y1, x1, y2, x2) 548 | boxes = np.concatenate([box_centers - 0.5 * box_sizes, 549 | box_centers + 0.5 * box_sizes], axis=1) 550 | return boxes 551 | 552 | 553 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, 554 | anchor_stride): 555 | """Generate anchors at different levels of a feature pyramid. Each scale 556 | is associated with a level of the pyramid, but each ratio is used in 557 | all levels of the pyramid. 558 | 559 | Returns: 560 | anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted 561 | with the same order of the given scales. So, anchors of scale[0] come 562 | first, then anchors of scale[1], and so on. 563 | """ 564 | # Anchors 565 | # [anchor_count, (y1, x1, y2, x2)] 566 | anchors = [] 567 | for i in range(len(scales)): 568 | anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], 569 | feature_strides[i], anchor_stride)) 570 | return np.concatenate(anchors, axis=0) 571 | 572 | 573 | ############################################################ 574 | # Miscellaneous 575 | ############################################################ 576 | 577 | def trim_zeros(x): 578 | """It's common to have tensors larger than the available data and 579 | pad with zeros. This function removes rows that are all zeros. 580 | 581 | x: [rows, columns]. 582 | """ 583 | assert len(x.shape) == 2 584 | return x[~np.all(x == 0, axis=1)] 585 | 586 | 587 | def compute_ap(gt_boxes, gt_class_ids, gt_masks, 588 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 589 | iou_threshold=0.5): 590 | """Compute Average Precision at a set IoU threshold (default 0.5). 591 | 592 | Returns: 593 | mAP: Mean Average Precision 594 | precisions: List of precisions at different class score thresholds. 595 | recalls: List of recall values at different class score thresholds. 596 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 597 | """ 598 | # Trim zero padding and sort predictions by score from high to low 599 | # TODO: cleaner to do zero unpadding upstream 600 | gt_boxes = trim_zeros(gt_boxes) 601 | gt_masks = gt_masks[..., :gt_boxes.shape[0]] 602 | pred_boxes = trim_zeros(pred_boxes) 603 | pred_scores = pred_scores[:pred_boxes.shape[0]] 604 | indices = np.argsort(pred_scores)[::-1] 605 | pred_boxes = pred_boxes[indices] 606 | pred_class_ids = pred_class_ids[indices] 607 | pred_scores = pred_scores[indices] 608 | pred_masks = pred_masks[..., indices] 609 | 610 | # Compute IoU overlaps [pred_masks, gt_masks] 611 | overlaps = compute_overlaps_masks(pred_masks, gt_masks) 612 | 613 | # Loop through ground truth boxes and find matching predictions 614 | match_count = 0 615 | pred_match = np.zeros([pred_boxes.shape[0]]) 616 | gt_match = np.zeros([gt_boxes.shape[0]]) 617 | for i in range(len(pred_boxes)): 618 | # Find best matching ground truth box 619 | sorted_ixs = np.argsort(overlaps[i])[::-1] 620 | for j in sorted_ixs: 621 | # If ground truth box is already matched, go to next one 622 | if gt_match[j] == 1: 623 | continue 624 | # If we reach IoU smaller than the threshold, end the loop 625 | iou = overlaps[i, j] 626 | if iou < iou_threshold: 627 | break 628 | # Do we have a match? 629 | if pred_class_ids[i] == gt_class_ids[j]: 630 | match_count += 1 631 | gt_match[j] = 1 632 | pred_match[i] = 1 633 | break 634 | 635 | # Compute precision and recall at each prediction box step 636 | precisions = np.cumsum(pred_match) / (np.arange(len(pred_match)) + 1) 637 | recalls = np.cumsum(pred_match).astype(np.float32) / len(gt_match) 638 | 639 | # Pad with start and end values to simplify the math 640 | precisions = np.concatenate([[0], precisions, [0]]) 641 | recalls = np.concatenate([[0], recalls, [1]]) 642 | 643 | # Ensure precision values decrease but don't increase. This way, the 644 | # precision value at each recall threshold is the maximum it can be 645 | # for all following recall thresholds, as specified by the VOC paper. 646 | for i in range(len(precisions) - 2, -1, -1): 647 | precisions[i] = np.maximum(precisions[i], precisions[i + 1]) 648 | 649 | # Compute mean AP over recall range 650 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 651 | mAP = np.sum((recalls[indices] - recalls[indices - 1]) * 652 | precisions[indices]) 653 | 654 | return mAP, precisions, recalls, overlaps 655 | 656 | 657 | def compute_recall(pred_boxes, gt_boxes, iou): 658 | """Compute the recall at the given IoU threshold. It's an indication 659 | of how many GT boxes were found by the given prediction boxes. 660 | 661 | pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates 662 | gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates 663 | """ 664 | # Measure overlaps 665 | overlaps = compute_overlaps(pred_boxes, gt_boxes) 666 | iou_max = np.max(overlaps, axis=1) 667 | iou_argmax = np.argmax(overlaps, axis=1) 668 | positive_ids = np.where(iou_max >= iou)[0] 669 | matched_gt_boxes = iou_argmax[positive_ids] 670 | 671 | recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] 672 | return recall, positive_ids 673 | 674 | 675 | # ## Batch Slicing 676 | # Some custom layers support a batch size of 1 only, and require a lot of work 677 | # to support batches greater than 1. This function slices an input tensor 678 | # across the batch dimension and feeds batches of size 1. Effectively, 679 | # an easy way to support batches > 1 quickly with little code modification. 680 | # In the long run, it's more efficient to modify the code to support large 681 | # batches and getting rid of this function. Consider this a temporary solution 682 | def batch_slice(inputs, graph_fn, batch_size, names=None): 683 | """Splits inputs into slices and feeds each slice to a copy of the given 684 | computation graph and then combines the results. It allows you to run a 685 | graph on a batch of inputs even if the graph is written to support one 686 | instance only. 687 | 688 | inputs: list of tensors. All must have the same first dimension length 689 | graph_fn: A function that returns a TF tensor that's part of a graph. 690 | batch_size: number of slices to divide the data into. 691 | names: If provided, assigns names to the resulting tensors. 692 | """ 693 | if not isinstance(inputs, list): 694 | inputs = [inputs] 695 | 696 | outputs = [] 697 | for i in range(batch_size): 698 | inputs_slice = [x[i] for x in inputs] 699 | output_slice = graph_fn(*inputs_slice) 700 | if not isinstance(output_slice, (tuple, list)): 701 | output_slice = [output_slice] 702 | outputs.append(output_slice) 703 | # Change outputs from a list of slices where each is 704 | # a list of outputs to a list of outputs and each has 705 | # a list of slices 706 | outputs = list(zip(*outputs)) 707 | 708 | if names is None: 709 | names = [None] * len(outputs) 710 | 711 | result = [tf.stack(o, axis=0, name=n) 712 | for o, n in zip(outputs, names)] 713 | if len(result) == 1: 714 | result = result[0] 715 | 716 | return result 717 | 718 | 719 | def download_trained_weights(coco_model_path, verbose=1): 720 | """Download COCO trained weights from Releases. 721 | 722 | coco_model_path: local path of COCO trained weights 723 | """ 724 | if verbose > 0: 725 | print("Downloading pretrained model to " + coco_model_path + " ...") 726 | with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out: 727 | shutil.copyfileobj(resp, out) 728 | if verbose > 0: 729 | print("... done downloading pretrained model!") 730 | --------------------------------------------------------------------------------