├── README.md
├── screenshot.gif
├── screenshot1.gif
└── semantic_segmentation
    ├── CMakeLists.txt
    ├── LICENSE.txt
    ├── nodes
        ├── __pycache__
        │   ├── cv_bridge.cpython-36.pyc
        │   └── cv_resize.cpython-36.pyc
        ├── cv_bridge.py
        ├── cv_resize.py
        ├── models
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   └── __init__.cpython-36.pyc
        │   ├── espnetv2_bdd100k_driveable
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   ├── __init__.cpython-36.pyc
        │   │   │   └── cv_resize.cpython-36.pyc
        │   │   ├── categories.json
        │   │   ├── cnn
        │   │   │   ├── Model.py
        │   │   │   ├── SegmentationModel.py
        │   │   │   ├── __pycache__
        │   │   │   │   ├── Model.cpython-36.pyc
        │   │   │   │   ├── SegmentationModel.cpython-36.pyc
        │   │   │   │   └── cnn_utils.cpython-36.pyc
        │   │   │   └── cnn_utils.py
        │   │   ├── cv_resize.py
        │   │   └── weights.pth
        │   ├── mnv2_bdd100k_driveable_513
        │   │   ├── __init__.py
        │   │   ├── __pycache__
        │   │   │   └── __init__.cpython-36.pyc
        │   │   ├── categories.json
        │   │   ├── graph.pb
        │   │   └── notes.txt
        │   ├── mnv2_coco2017_driving_513
        │   │   ├── __init__.py
        │   │   ├── categories.json
        │   │   └── graph.pb
        │   └── mnv2_dm05_voc
        │   │   ├── __init__.py
        │   │   ├── categories.json
        │   │   ├── cv_resize.py
        │   │   └── graph.pb
        └── segmentation_node
    └── package.xml


/README.md:
--------------------------------------------------------------------------------
 1 | # ros-semantic-segmentation
 2 | 
 3 | A generalized semantic segmentation package for ROS that is agnostic to deep learning framework and model.
 4 | 
 5 | Three models are provided. All are extremely lightweight, fast models so they can be included inside the repo without asking you to download some zip file from DropBox. They are not the most accurate models. You can implement your own following the examples.
 6 | 
 7 | **TensorFlow models**
 8 | * **mnv2_bdd100k_driveable_513** -- TensorFlow >= 1.11, Deeplab V3+ on a MobileNet v2 backbone, trained on BDD100K driveable area, 513x513 input size.
 9 | * **mnv2_coco2017_driving** -- TensorFlow >= 1.11, Deeplab V3+ on a subset of 13 COCO2017 classes related to driving
10 | * **mnv2_dm05_voc** -- TensorFlow >= 1.11, Deeplab V3+ on VOC 2012 classes (pretrained Google model mnv2_dm05_coco_voc_trainaug).
11 | 
12 | **PyTorch models**
13 | * **espnetv2_bdd100k_driveable** -- PyTorch, ESPNETv2 on BDD100K driveable area, 1024x512 input size, scale 1.0.
14 | 
15 | To implement another model, you can follow these examples. You need to create a new directory under models and have a class called Model inside __init__.py that implements infer().
16 | 
17 | ![screenshot](/screenshot.gif?raw=true "screenshot")
18 | ![screenshot1](/screenshot1.gif?raw=true "screenshot1")
19 | 
20 | ## Try it
21 | 
22 | ```rosrun semantic_segmentation segmentation_node __ns:=/camera```
23 | 
24 | ## Parameters:
25 | 
26 | * **model** (string) -- name of the model to use. Defaults to "mnv2_bdd100k_driveable_513".
27 | * **rate** (float) -- the maximum frame rate to run inferences. Default to 30.0. Note that if your system is too slow, it will run at the maximum speed possible while dropping frames.
28 | * **topic_image** (string) -- topic to listen for images. Defaults to "image_raw".
29 | * **topic_semantic** (string) -- topic to output semantic predictions. Defaults to "semantic". Outputs a mono8 image indicating semantic classes at each pixel.
30 | * **topic_semantic_color** (string) -- topic to output a colored RGB version of the semantic predictions for visualization purposes. Defaults to "semantic_color". Outputs a rgb8 image.
31 | 
32 | When the node is initialized, it will set an additional ROS parameter **semantic_categories** as is defined in the chose model. This parameter can be read by other nodes to know which IDs correspond to which classes.
33 | 
34 | ## Subscribers:
35 | 
36 | * **image_raw** (sensor_msgs/Image)
37 | 
38 | ## Publishers:
39 | 
40 | * **semantic** (sensor_msgs/Image)
41 | * **semantic_color** (sensor_msgs/Image)
42 | 
43 | ## Disclaimer
44 | 
45 | This is not intended to be used for production autonomous vehicles. This is provided "as-is" for educational purposes. I am not liable for any damage or injury that may result from the use of this software.
46 | 


--------------------------------------------------------------------------------
/screenshot.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/screenshot.gif


--------------------------------------------------------------------------------
/screenshot1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/screenshot1.gif


--------------------------------------------------------------------------------
/semantic_segmentation/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8.3)
 2 | project(semantic_segmentation)
 3 | 
 4 | ## Find catkin macros and libraries
 5 | ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
 6 | ## is used, also find other catkin packages
 7 | find_package(catkin REQUIRED COMPONENTS
 8 |   roscpp
 9 |   rospy
10 |   std_msgs
11 | )
12 | 
13 | catkin_package()
14 | 
15 | include_directories(
16 |   ${catkin_INCLUDE_DIRS}
17 | )
18 | 
19 | catkin_install_python(PROGRAMS
20 |   nodes/segmentation_node
21 |   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
22 | )
23 | 
24 | install(DIRECTORY
25 |   nodes/models
26 |   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
27 | )
28 | 
29 | 


--------------------------------------------------------------------------------
/semantic_segmentation/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | 3-Clause BSD License
 2 | 
 3 | Copyright 2019 Dheera Venkatraman
 4 | Contact: `echo qurren | sed -e "s/\(.*\)/\1@\1.arg/" | tr a-z n-za-m`
 5 | 
 6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
11 | 
12 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
15 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/__pycache__/cv_bridge.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/semantic_segmentation/nodes/__pycache__/cv_bridge.cpython-36.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/__pycache__/cv_resize.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/semantic_segmentation/nodes/__pycache__/cv_resize.cpython-36.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/cv_bridge.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Mimics the functionality of cv_bridge but in pure python, without OpenCV dependency.
 5 | 
 6 | An additional optional flip_channels argument is provided in the imgmsg_to_cv2() function
 7 | for convenience (e.g. if resulting numpy array is going to be fed directly into a neural network
 8 | that expects channels in RGB order instead of OpenCV-ish BGR).
 9 | 
10 | Author: dheera@dheera.net
11 | """
12 | 
13 | from sensor_msgs.msg import Image
14 | import numpy
15 | import rospy
16 | 
17 | BPP = {
18 |   'bgr8': 3,
19 |   'rgb8': 3,
20 |   '16UC1': 2,
21 |   '8UC1': 1,
22 |   'mono16': 2,
23 |   'mono8': 1
24 | }
25 | 
26 | def imgmsg_to_cv2(data, desired_encoding="passthrough", flip_channels=False):
27 |     """
28 |     Converts a ROS image to an OpenCV image without using the cv_bridge package,
29 |     for compatibility purposes.
30 |     """
31 | 
32 |     if desired_encoding == "passthrough":
33 |         encoding = data.encoding
34 |     else:
35 |         encoding = desired_encoding
36 | 
37 |     if encoding == 'bgr8' or (encoding=='rgb8' and flip_channels):
38 |         return numpy.frombuffer(data.data, numpy.uint8).reshape((data.height, data.width, 3))
39 |     elif encoding == 'rgb8' or (encoding=='bgr8' and flip_channels):
40 |         return numpy.frombuffer(data.data, numpy.uint8).reshape((data.height, data.width, 3))[:, :, ::-1]
41 |     elif encoding == 'mono8' or encoding == '8UC1':
42 |         return numpy.frombuffer(data.data, numpy.uint8).reshape((data.height, data.width))
43 |     elif encoding == 'mono16' or encoding == '16UC1':
44 |         return numpy.frombuffer(data.data, numpy.uint16).reshape((data.height, data.width))
45 |     else:
46 |         rospy.logwarn("Unsupported encoding %s" % encoding)
47 |         return None
48 | 
49 | def cv2_to_imgmsg(cv2img, encoding='bgr8'):
50 |     """
51 |     Converts an OpenCV image to a ROS image without using the cv_bridge package,
52 |     for compatibility purposes.
53 |     """
54 | 
55 |     msg = Image()
56 |     msg.width = cv2img.shape[1]
57 |     msg.height = cv2img.shape[0]
58 |     msg.encoding = encoding
59 |     msg.step = BPP[encoding]*cv2img.shape[1]
60 |     msg.data = numpy.ascontiguousarray(cv2img).tobytes()
61 | 
62 |     return msg
63 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/cv_resize.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Nearest neighbor resize using numpy only.
 5 | Unfortunately the ROS-provided cv2.so is fantastically compiled without python3 bindings so it cannot be used with python3. This allows people who only need nearest neighbor resize to avoid these shenanigans entirely and avoid OpenCV dependency.
 6 | 
 7 | Author: dheera@dheera.net
 8 | """
 9 | 
10 | import numpy as np
11 | 
12 | def resize(img, new_shape):
13 |     """
14 |     Nearest-neighbor resize.
15 |     img = an opencv image
16 |     shape = (width, height) as per OpenCV convention NOT numpy convention
17 |     """
18 | 
19 |     new_width, new_height = new_shape  # cv2 convention
20 |     old_height, old_width = img.shape[0], img.shape[1]  # numpy shenanigans
21 | 
22 |     new_i, new_j = np.meshgrid(
23 |         np.linspace(0, old_width - 1, new_width).astype(np.uint16),
24 |         np.linspace(0, old_height -1, new_height).astype(np.uint16)
25 |     )
26 | 
27 |     return img[new_j, new_i]
28 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/semantic_segmentation/nodes/models/__init__.py


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/semantic_segmentation/nodes/models/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import json
 4 | import os
 5 | import numpy as np
 6 | import time
 7 | import torch
 8 | from .cnn import SegmentationModel as net
 9 | from .cv_resize import resize
10 | 
11 | PATH = os.path.dirname(__file__)
12 | SCALE = 1.0
13 | WEIGHTS_FILENAME = "weights.pth"
14 | CATEGORIES_FILENAME = "categories.json"
15 | INPUT_WIDTH = 1024
16 | INPUT_HEIGHT = 512
17 | USE_GPU = True
18 | MEAN = np.array([[[72.3923111, 82.90893555, 73.15840149]]], dtype = np.float32)
19 | STD = np.array([[[45.3192215, 46.15289307, 44.91483307]]], dtype = np.float32)
20 | 
21 | class Model(object):
22 |     def __init__(self):
23 |         color_map_list = []
24 |         with open(os.path.join(PATH, CATEGORIES_FILENAME)) as f:
25 |             self._categories = json.loads(f.read())
26 |             self._color_map = np.array([category["color"] for category in self._categories], dtype = np.uint8)
27 | 
28 |         self.model = net.EESPNet_Seg(len(self._categories), s = SCALE)
29 | 
30 |         self.model = torch.nn.DataParallel(self.model)
31 |         self.model.load_state_dict(torch.load(os.path.join(PATH, WEIGHTS_FILENAME)))
32 |         if USE_GPU:
33 |             self.model = self.model.cuda()
34 | 
35 |         self.model.eval()
36 | 
37 |     @property
38 |     def trace(self):
39 |         return (self.run_options.trace_level > 0)
40 | 
41 |     @trace.setter
42 |     def trace(self, value):
43 |         if value:
44 |             self.run_options.trace_level = tf.RunOptions.FULL_TRACE
45 |         else:
46 |             self.run_options.trace_level = 0
47 | 
48 |     @property
49 |     def color_map(self):
50 |         return self._color_map
51 | 
52 |     @property
53 |     def categories(self):
54 |         return self._categories
55 | 
56 |     def infer(self, images):
57 |         # TODO: support batch size > 1
58 | 
59 |         img = images[0]
60 |         img = img.astype(np.float32)
61 |         img = resize(img, (INPUT_WIDTH, INPUT_HEIGHT))
62 |         img -= MEAN
63 |         img /= STD
64 | 
65 |         img = img / 255
66 |         img = img.transpose((2, 0, 1))
67 | 
68 |         img_tensor = torch.from_numpy(img)
69 |         img_tensor = torch.unsqueeze(img_tensor, 0)
70 | 
71 |         if USE_GPU:
72 |             img_tensor = img_tensor.cuda()
73 | 
74 |         img_out = self.model(img_tensor)
75 |         classMap_numpy = img_out[0].max(0)[1].byte().cpu().data.numpy()
76 |         return [classMap_numpy]
77 | 
78 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/__pycache__/cv_resize.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/__pycache__/cv_resize.cpython-36.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/categories.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "color": [ 0, 0, 0 ],
 4 |         "name": "background"
 5 |     },
 6 |     {
 7 |         "color": [ 255, 0, 0 ],
 8 |         "name": "driveable"
 9 |     },
10 |     {
11 |         "color": [ 0, 127, 255 ],
12 |         "name": "driveable_alt"
13 |     }
14 | ]
15 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/cnn/Model.py:
--------------------------------------------------------------------------------
  1 | from torch.nn import init
  2 | import torch.nn.functional as F
  3 | from .cnn_utils import *
  4 | import math
  5 | import torch
  6 | 
  7 | __author__ = "Sachin Mehta"
  8 | __version__ = "1.0.1"
  9 | __maintainer__ = "Sachin Mehta"
 10 | 
 11 | class EESP(nn.Module):
 12 |     '''
 13 |     This class defines the EESP block, which is based on the following principle
 14 |         REDUCE ---> SPLIT ---> TRANSFORM --> MERGE
 15 |     '''
 16 | 
 17 |     def __init__(self, nIn, nOut, stride=1, k=4, r_lim=7, down_method='esp'): #down_method --> ['avg' or 'esp']
 18 |         '''
 19 |         :param nIn: number of input channels
 20 |         :param nOut: number of output channels
 21 |         :param stride: factor by which we should skip (useful for down-sampling). If 2, then down-samples the feature map by 2
 22 |         :param k: # of parallel branches
 23 |         :param r_lim: A maximum value of receptive field allowed for EESP block
 24 |         :param g: number of groups to be used in the feature map reduction step.
 25 |         '''
 26 |         super().__init__()
 27 |         self.stride = stride
 28 |         n = int(nOut / k)
 29 |         n1 = nOut - (k - 1) * n
 30 |         assert down_method in ['avg', 'esp'], 'One of these is suppported (avg or esp)'
 31 |         assert n == n1, "n(={}) and n1(={}) should be equal for Depth-wise Convolution ".format(n, n1)
 32 |         #assert nIn%k == 0, "Number of input channels ({}) should be divisible by # of branches ({})".format(nIn, k)
 33 |         #assert n % k == 0, "Number of output channels ({}) should be divisible by # of branches ({})".format(n, k)
 34 |         self.proj_1x1 = CBR(nIn, n, 1, stride=1, groups=k)
 35 | 
 36 |         # (For convenience) Mapping between dilation rate and receptive field for a 3x3 kernel
 37 |         map_receptive_ksize = {3: 1, 5: 2, 7: 3, 9: 4, 11: 5, 13: 6, 15: 7, 17: 8}
 38 |         self.k_sizes = list()
 39 |         for i in range(k):
 40 |             ksize = int(3 + 2 * i)
 41 |             # After reaching the receptive field limit, fall back to the base kernel size of 3 with a dilation rate of 1
 42 |             ksize = ksize if ksize <= r_lim else 3
 43 |             self.k_sizes.append(ksize)
 44 |         # sort (in ascending order) these kernel sizes based on their receptive field
 45 |         # This enables us to ignore the kernels (3x3 in our case) with the same effective receptive field in hierarchical
 46 |         # feature fusion because kernels with 3x3 receptive fields does not have gridding artifact.
 47 |         self.k_sizes.sort()
 48 |         self.spp_dw = nn.ModuleList()
 49 |         #self.bn = nn.ModuleList()
 50 |         for i in range(k):
 51 |             d_rate = map_receptive_ksize[self.k_sizes[i]]
 52 |             self.spp_dw.append(CDilated(n, n, kSize=3, stride=stride, groups=n, d=d_rate))
 53 |             #self.bn.append(nn.BatchNorm2d(n))
 54 |         self.conv_1x1_exp = CB(nOut, nOut, 1, 1, groups=k)
 55 |         self.br_after_cat = BR(nOut)
 56 |         self.module_act = nn.PReLU(nOut)
 57 |         self.downAvg = True if down_method == 'avg' else False
 58 | 
 59 |     def forward(self, input):
 60 |         '''
 61 |         :param input: input feature map
 62 |         :return: transformed feature map
 63 |         '''
 64 | 
 65 |         # Reduce --> project high-dimensional feature maps to low-dimensional space
 66 |         output1 = self.proj_1x1(input)
 67 |         output = [self.spp_dw[0](output1)]
 68 |         # compute the output for each branch and hierarchically fuse them
 69 |         # i.e. Split --> Transform --> HFF
 70 |         for k in range(1, len(self.spp_dw)):
 71 |             out_k = self.spp_dw[k](output1)
 72 |             # HFF
 73 |             # We donot combine the branches that have the same effective receptive (3x3 in our case)
 74 |             # because there are no holes in those kernels.
 75 |             out_k = out_k + output[k - 1]
 76 |             #apply batch norm after fusion and then append to the list
 77 |             output.append(out_k)
 78 |         # Merge
 79 |         expanded = self.conv_1x1_exp( # Aggregate the feature maps using point-wise convolution
 80 |             self.br_after_cat( # apply batch normalization followed by activation function (PRelu in this case)
 81 |                 torch.cat(output, 1) # concatenate the output of different branches
 82 |             )
 83 |         )
 84 |         del output
 85 |         # if down-sampling, then return the concatenated vector
 86 |         # as Downsampling function will combine it with avg. pooled feature map and then threshold it
 87 |         if self.stride == 2 and self.downAvg:
 88 |             return expanded
 89 | 
 90 |         # if dimensions of input and concatenated vector are the same, add them (RESIDUAL LINK)
 91 |         if expanded.size() == input.size():
 92 |             expanded = expanded + input
 93 | 
 94 |         # Threshold the feature map using activation function (PReLU in this case)
 95 |         return self.module_act(expanded)
 96 | 
 97 | 
 98 | class DownSampler(nn.Module):
 99 |     '''
100 |     Down-sampling fucntion that has two parallel branches: (1) avg pooling
101 |     and (2) EESP block with stride of 2. The output feature maps of these branches
102 |     are then concatenated and thresholded using an activation function (PReLU in our
103 |     case) to produce the final output.
104 |     '''
105 | 
106 |     def __init__(self, nin, nout, k=4, r_lim=9, reinf=True):
107 |         '''
108 |             :param nin: number of input channels
109 |             :param nout: number of output channels
110 |             :param k: # of parallel branches
111 |             :param r_lim: A maximum value of receptive field allowed for EESP block
112 |             :param g: number of groups to be used in the feature map reduction step.
113 |         '''
114 |         super().__init__()
115 |         nout_new = nout - nin
116 |         self.eesp = EESP(nin, nout_new, stride=2, k=k, r_lim=r_lim, down_method='avg')
117 |         self.avg = nn.AvgPool2d(kernel_size=3, padding=1, stride=2)
118 |         if reinf:
119 |             self.inp_reinf = nn.Sequential(
120 |                 CBR(config_inp_reinf, config_inp_reinf, 3, 1),
121 |                 CB(config_inp_reinf, nout, 1, 1)
122 |             )
123 |         self.act =  nn.PReLU(nout)
124 | 
125 |     def forward(self, input, input2=None):
126 |         '''
127 |         :param input: input feature map
128 |         :return: feature map down-sampled by a factor of 2
129 |         '''
130 |         avg_out = self.avg(input)
131 |         eesp_out = self.eesp(input)
132 |         output = torch.cat([avg_out, eesp_out], 1)
133 |         if input2 is not None:
134 |             #assuming the input is a square image
135 |             w1 = avg_out.size(2)
136 |             while True:
137 |                 input2 = F.avg_pool2d(input2, kernel_size=3, padding=1, stride=2)
138 |                 w2 = input2.size(2)
139 |                 if w2 == w1:
140 |                     break
141 |             output = output + self.inp_reinf(input2)
142 | 
143 |         return self.act(output) #self.act(output)
144 | 
145 | class EESPNet(nn.Module):
146 |     '''
147 |     This class defines the ESPNetv2 architecture for the ImageNet classification
148 |     '''
149 | 
150 |     def __init__(self, classes=20, s=1):
151 |         '''
152 |         :param classes: number of classes in the dataset. Default is 20 for the cityscapes
153 |         :param s: factor that scales the number of output feature maps
154 |         '''
155 |         super().__init__()
156 |         reps = [0, 3, 7, 3]  # how many times EESP blocks should be repeated.
157 |         channels = 3
158 | 
159 |         r_lim = [13, 11, 9, 7, 5]  # receptive field at each spatial level
160 |         K = [4]*len(r_lim) # No. of parallel branches at different levels
161 | 
162 |         base = 32 #base configuration
163 |         config_len = 5
164 |         config = [base] * config_len
165 |         base_s = 0
166 |         for i in range(config_len):
167 |             if i== 0:
168 |                 base_s = int(base * s)
169 |                 base_s = math.ceil(base_s / K[0]) * K[0]
170 |                 config[i] = base if base_s > base else base_s
171 |             else:
172 |                 config[i] = base_s * pow(2, i)
173 |         if s <= 1.5:
174 |             config.append(1024)
175 |         elif s in [1.5, 2]:
176 |             config.append(1280)
177 |         else:
178 |             ValueError('Configuration not supported')
179 | 
180 |         #print('Config: ', config)
181 | 
182 |         global config_inp_reinf
183 |         config_inp_reinf = 3
184 |         self.input_reinforcement = True
185 |         assert len(K) == len(r_lim), 'Length of branching factor array and receptive field array should be the same.'
186 | 
187 |         self.level1 = CBR(channels, config[0], 3, 2)  # 112 L1
188 | 
189 |         self.level2_0 = DownSampler(config[0], config[1], k=K[0], r_lim=r_lim[0], reinf=self.input_reinforcement)  # out = 56
190 |         self.level3_0 = DownSampler(config[1], config[2], k=K[1], r_lim=r_lim[1], reinf=self.input_reinforcement) # out = 28
191 |         self.level3 = nn.ModuleList()
192 |         for i in range(reps[1]):
193 |             self.level3.append(EESP(config[2], config[2], stride=1, k=K[2], r_lim=r_lim[2]))
194 | 
195 |         self.level4_0 = DownSampler(config[2], config[3], k=K[2], r_lim=r_lim[2], reinf=self.input_reinforcement) #out = 14
196 |         self.level4 = nn.ModuleList()
197 |         for i in range(reps[2]):
198 |             self.level4.append(EESP(config[3], config[3], stride=1, k=K[3], r_lim=r_lim[3]))
199 | 
200 |         self.level5_0 = DownSampler(config[3], config[4], k=K[3], r_lim=r_lim[3]) #7
201 |         self.level5 = nn.ModuleList()
202 |         for i in range(reps[3]):
203 |             self.level5.append(EESP(config[4], config[4], stride=1, k=K[4], r_lim=r_lim[4]))
204 | 
205 |         # expand the feature maps using depth-wise separable convolution
206 |         self.level5.append(CBR(config[4], config[4], 3, 1, groups=config[4]))
207 |         self.level5.append(CBR(config[4], config[5], 1, 1, groups=K[4]))
208 | 
209 | 
210 | 
211 |         #self.level5_exp = nn.ModuleList()
212 |         #assert config[5]%config[4] == 0, '{} should be divisible by {}'.format(config[5], config[4])
213 |         #gr = int(config[5]/config[4])
214 |         #for i in range(gr):
215 |         #    self.level5_exp.append(CBR(config[4], config[4], 1, 1, groups=pow(2, i)))
216 | 
217 |         self.classifier = nn.Linear(config[5], classes)
218 |         self.init_params()
219 | 
220 |     def init_params(self):
221 |         '''
222 |         Function to initialze the parameters
223 |         '''
224 |         for m in self.modules():
225 |             if isinstance(m, nn.Conv2d):
226 |                 init.kaiming_normal_(m.weight, mode='fan_out')
227 |                 if m.bias is not None:
228 |                     init.constant_(m.bias, 0)
229 |             elif isinstance(m, nn.BatchNorm2d):
230 |                 init.constant_(m.weight, 1)
231 |                 init.constant_(m.bias, 0)
232 |             elif isinstance(m, nn.Linear):
233 |                 init.normal_(m.weight, std=0.001)
234 |                 if m.bias is not None:
235 |                     init.constant_(m.bias, 0)
236 | 
237 |     def forward(self, input, p=0.2, seg=True):
238 |         '''
239 |         :param input: Receives the input RGB image
240 |         :return: a C-dimensional vector, C=# of classes
241 |         '''
242 |         out_l1 = self.level1(input)  # 112
243 |         if not self.input_reinforcement:
244 |             del input
245 |             input = None
246 | 
247 |         out_l2 = self.level2_0(out_l1, input)  # 56
248 | 
249 |         out_l3_0 = self.level3_0(out_l2, input)  # out_l2_inp_rein
250 |         for i, layer in enumerate(self.level3):
251 |             if i == 0:
252 |                 out_l3 = layer(out_l3_0)
253 |             else:
254 |                 out_l3 = layer(out_l3)
255 | 
256 |         out_l4_0 = self.level4_0(out_l3, input)  # down-sampled
257 |         for i, layer in enumerate(self.level4):
258 |             if i == 0:
259 |                 out_l4 = layer(out_l4_0)
260 |             else:
261 |                 out_l4 = layer(out_l4)
262 | 
263 |         if not seg:
264 |             out_l5_0 = self.level5_0(out_l4)  # down-sampled
265 |             for i, layer in enumerate(self.level5):
266 |                 if i == 0:
267 |                     out_l5 = layer(out_l5_0)
268 |                 else:
269 |                     out_l5 = layer(out_l5)
270 | 
271 |             #out_e = []
272 |             #for layer in self.level5_exp:
273 |             #    out_e.append(layer(out_l5))
274 |             #out_exp = torch.cat(out_e, dim=1)
275 | 
276 | 
277 | 
278 |             output_g = F.adaptive_avg_pool2d(out_l5, output_size=1)
279 |             output_g = F.dropout(output_g, p=p, training=self.training)
280 |             output_1x1 = output_g.view(output_g.size(0), -1)
281 | 
282 |             return self.classifier(output_1x1)
283 |         return out_l1, out_l2, out_l3, out_l4
284 | 
285 | 
286 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/cnn/SegmentationModel.py:
--------------------------------------------------------------------------------
 1 | #============================================
 2 | __author__ = "Sachin Mehta"
 3 | __license__ = "MIT"
 4 | __maintainer__ = "Sachin Mehta"
 5 | #============================================
 6 | import torch
 7 | from torch import nn
 8 | 
 9 | from .Model import EESPNet, EESP
10 | import os
11 | import torch.nn.functional as F
12 | from .cnn_utils import *
13 | 
14 | class EESPNet_Seg(nn.Module):
15 |     def __init__(self, classes=20, s=1, pretrained=None, gpus=1):
16 |         super().__init__()
17 |         classificationNet = EESPNet(classes=1000, s=s)
18 |         if gpus >=1:
19 |             classificationNet = nn.DataParallel(classificationNet)
20 |         # load the pretrained weights
21 |         if pretrained:
22 |             if not os.path.isfile(pretrained):
23 |                 print('Weight file does not exist. Training without pre-trained weights')
24 |             print('Model initialized with pretrained weights')
25 |             classificationNet.load_state_dict(torch.load(pretrained))
26 | 
27 |         self.net = classificationNet.module
28 | 
29 |         del classificationNet
30 |         # delete last few layers
31 |         del self.net.classifier
32 |         del self.net.level5
33 |         del self.net.level5_0
34 |         if s <=0.5:
35 |             p = 0.1
36 |         else:
37 |             p=0.2
38 | 
39 |         self.proj_L4_C = CBR(self.net.level4[-1].module_act.num_parameters, self.net.level3[-1].module_act.num_parameters, 1, 1)
40 |         pspSize = 2*self.net.level3[-1].module_act.num_parameters
41 |         self.pspMod = nn.Sequential(EESP(pspSize, pspSize //2, stride=1, k=4, r_lim=7),
42 |                 PSPModule(pspSize // 2, pspSize //2))
43 |         self.project_l3 = nn.Sequential(nn.Dropout2d(p=p), C(pspSize // 2, classes, 1, 1))
44 |         self.act_l3 = BR(classes)
45 |         self.project_l2 = CBR(self.net.level2_0.act.num_parameters + classes, classes, 1, 1)
46 |         self.project_l1 = nn.Sequential(nn.Dropout2d(p=p), C(self.net.level1.act.num_parameters + classes, classes, 1, 1))
47 | 
48 |     def hierarchicalUpsample(self, x, factor=3):
49 |         for i in range(factor):
50 |             x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
51 |         return x
52 | 
53 | 
54 |     def forward(self, input):
55 |         out_l1, out_l2, out_l3, out_l4 = self.net(input, seg=True)
56 |         out_l4_proj = self.proj_L4_C(out_l4)
57 |         up_l4_to_l3 = F.interpolate(out_l4_proj, scale_factor=2, mode='bilinear', align_corners=True)
58 |         merged_l3_upl4 = self.pspMod(torch.cat([out_l3, up_l4_to_l3], 1))
59 |         proj_merge_l3_bef_act = self.project_l3(merged_l3_upl4)
60 |         proj_merge_l3 = self.act_l3(proj_merge_l3_bef_act)
61 |         out_up_l3 = F.interpolate(proj_merge_l3, scale_factor=2, mode='bilinear', align_corners=True)
62 |         merge_l2 = self.project_l2(torch.cat([out_l2, out_up_l3], 1))
63 |         out_up_l2 = F.interpolate(merge_l2, scale_factor=2, mode='bilinear', align_corners=True)
64 |         merge_l1 = self.project_l1(torch.cat([out_l1, out_up_l2], 1))
65 |         if self.training:
66 |             return F.interpolate(merge_l1, scale_factor=2, mode='bilinear', align_corners=True), self.hierarchicalUpsample(proj_merge_l3_bef_act)
67 |         else:
68 |             return F.interpolate(merge_l1, scale_factor=2, mode='bilinear', align_corners=True)
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     input = torch.Tensor(1, 3, 512, 1024).cuda()
73 |     net = EESPNet_Seg(classes=20, s=2).cuda()
74 |     out_x_8 = net(input)
75 |     print(out_x_8.size())
76 | 
77 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/cnn/__pycache__/Model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/cnn/__pycache__/Model.cpython-36.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/cnn/__pycache__/SegmentationModel.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/cnn/__pycache__/SegmentationModel.cpython-36.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/cnn/__pycache__/cnn_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/cnn/__pycache__/cnn_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/cnn/cnn_utils.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | __author__ = "Sachin Mehta"
  7 | __version__ = "1.0.1"
  8 | __maintainer__ = "Sachin Mehta"
  9 | 
 10 | 
 11 | class PSPModule(nn.Module):
 12 |     def __init__(self, features, out_features=1024, sizes=(1, 2, 4, 8)):
 13 |         super().__init__()
 14 |         self.stages = []
 15 |         self.stages = nn.ModuleList([C(features, features, 3, 1, groups=features) for size in sizes])
 16 |         self.project = CBR(features * (len(sizes) + 1), out_features, 1, 1)
 17 |  
 18 |     def forward(self, feats):
 19 |         h, w = feats.size(2), feats.size(3)
 20 |         out = [feats]
 21 |         for stage in self.stages:
 22 |             feats = F.avg_pool2d(feats, kernel_size=3, stride=2, padding=1)
 23 |             upsampled = F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True)
 24 |             out.append(upsampled)
 25 |         return self.project(torch.cat(out, dim=1))
 26 | 
 27 | class CBR(nn.Module):
 28 |     '''
 29 |     This class defines the convolution layer with batch normalization and PReLU activation
 30 |     '''
 31 | 
 32 |     def __init__(self, nIn, nOut, kSize, stride=1, groups=1):
 33 |         '''
 34 | 
 35 |         :param nIn: number of input channels
 36 |         :param nOut: number of output channels
 37 |         :param kSize: kernel size
 38 |         :param stride: stride rate for down-sampling. Default is 1
 39 |         '''
 40 |         super().__init__()
 41 |         padding = int((kSize - 1) / 2)
 42 |         self.conv = nn.Conv2d(nIn, nOut, kSize, stride=stride, padding=padding, bias=False, groups=groups)
 43 |         self.bn = nn.BatchNorm2d(nOut)
 44 |         self.act = nn.PReLU(nOut)
 45 | 
 46 |     def forward(self, input):
 47 |         '''
 48 |         :param input: input feature map
 49 |         :return: transformed feature map
 50 |         '''
 51 |         output = self.conv(input)
 52 |         # output = self.conv1(output)
 53 |         output = self.bn(output)
 54 |         output = self.act(output)
 55 |         return output
 56 | 
 57 | 
 58 | class BR(nn.Module):
 59 |     '''
 60 |         This class groups the batch normalization and PReLU activation
 61 |     '''
 62 | 
 63 |     def __init__(self, nOut):
 64 |         '''
 65 |         :param nOut: output feature maps
 66 |         '''
 67 |         super().__init__()
 68 |         self.bn = nn.BatchNorm2d(nOut)
 69 |         self.act = nn.PReLU(nOut)
 70 | 
 71 |     def forward(self, input):
 72 |         '''
 73 |         :param input: input feature map
 74 |         :return: normalized and thresholded feature map
 75 |         '''
 76 |         output = self.bn(input)
 77 |         output = self.act(output)
 78 |         return output
 79 | 
 80 | 
 81 | class CB(nn.Module):
 82 |     '''
 83 |        This class groups the convolution and batch normalization
 84 |     '''
 85 | 
 86 |     def __init__(self, nIn, nOut, kSize, stride=1, groups=1):
 87 |         '''
 88 |         :param nIn: number of input channels
 89 |         :param nOut: number of output channels
 90 |         :param kSize: kernel size
 91 |         :param stride: optinal stide for down-sampling
 92 |         '''
 93 |         super().__init__()
 94 |         padding = int((kSize - 1) / 2)
 95 |         self.conv = nn.Conv2d(nIn, nOut, kSize, stride=stride, padding=padding, bias=False,
 96 |                               groups=groups)
 97 |         self.bn = nn.BatchNorm2d(nOut)
 98 | 
 99 |     def forward(self, input):
100 |         '''
101 | 
102 |         :param input: input feature map
103 |         :return: transformed feature map
104 |         '''
105 |         output = self.conv(input)
106 |         output = self.bn(output)
107 |         return output
108 | 
109 | 
110 | class C(nn.Module):
111 |     '''
112 |     This class is for a convolutional layer.
113 |     '''
114 | 
115 |     def __init__(self, nIn, nOut, kSize, stride=1, groups=1):
116 |         '''
117 | 
118 |         :param nIn: number of input channels
119 |         :param nOut: number of output channels
120 |         :param kSize: kernel size
121 |         :param stride: optional stride rate for down-sampling
122 |         '''
123 |         super().__init__()
124 |         padding = int((kSize - 1) / 2)
125 |         self.conv = nn.Conv2d(nIn, nOut, kSize, stride=stride, padding=padding, bias=False,
126 |                               groups=groups)
127 | 
128 |     def forward(self, input):
129 |         '''
130 |         :param input: input feature map
131 |         :return: transformed feature map
132 |         '''
133 |         output = self.conv(input)
134 |         return output
135 | 
136 | 
137 | class CDilated(nn.Module):
138 |     '''
139 |     This class defines the dilated convolution.
140 |     '''
141 | 
142 |     def __init__(self, nIn, nOut, kSize, stride=1, d=1, groups=1):
143 |         '''
144 |         :param nIn: number of input channels
145 |         :param nOut: number of output channels
146 |         :param kSize: kernel size
147 |         :param stride: optional stride rate for down-sampling
148 |         :param d: optional dilation rate
149 |         '''
150 |         super().__init__()
151 |         padding = int((kSize - 1) / 2) * d
152 |         self.conv = nn.Conv2d(nIn, nOut,kSize, stride=stride, padding=padding, bias=False,
153 |                               dilation=d, groups=groups)
154 | 
155 |     def forward(self, input):
156 |         '''
157 |         :param input: input feature map
158 |         :return: transformed feature map
159 |         '''
160 |         output = self.conv(input)
161 |         return output
162 | 
163 | class CDilatedB(nn.Module):
164 |     '''
165 |     This class defines the dilated convolution with batch normalization.
166 |     '''
167 | 
168 |     def __init__(self, nIn, nOut, kSize, stride=1, d=1, groups=1):
169 |         '''
170 |         :param nIn: number of input channels
171 |         :param nOut: number of output channels
172 |         :param kSize: kernel size
173 |         :param stride: optional stride rate for down-sampling
174 |         :param d: optional dilation rate
175 |         '''
176 |         super().__init__()
177 |         padding = int((kSize - 1) / 2) * d
178 |         self.conv = nn.Conv2d(nIn, nOut,kSize, stride=stride, padding=padding, bias=False,
179 |                               dilation=d, groups=groups)
180 |         self.bn = nn.BatchNorm2d(nOut)
181 | 
182 |     def forward(self, input):
183 |         '''
184 |         :param input: input feature map
185 |         :return: transformed feature map
186 |         '''
187 |         return self.bn(self.conv(input))
188 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/cv_resize.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Nearest neighbor resize using numpy only.
 5 | Unfortunately the ROS-provided cv2.so is fantastically compiled without python3 bindings so it cannot be used with python3. This allows people who only need nearest neighbor resize to avoid these shenanigans entirely and avoid OpenCV dependency.
 6 | 
 7 | Author: dheera@dheera.net
 8 | """
 9 | 
10 | import numpy as np
11 | 
12 | def resize(img, new_shape):
13 |     """
14 |     Nearest-neighbor resize.
15 |     img = an opencv image
16 |     shape = (width, height) as per OpenCV convention NOT numpy convention
17 |     """
18 | 
19 |     new_width, new_height = new_shape  # cv2 convention
20 |     old_height, old_width = img.shape[0], img.shape[1]  # numpy shenanigans
21 | 
22 |     new_i, new_j = np.meshgrid(
23 |         np.linspace(0, old_width - 1, new_width).astype(np.uint16),
24 |         np.linspace(0, old_height -1, new_height).astype(np.uint16)
25 |     )
26 | 
27 |     return img[new_j, new_i]
28 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/weights.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/semantic_segmentation/nodes/models/espnetv2_bdd100k_driveable/weights.pth


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/mnv2_bdd100k_driveable_513/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import json
 4 | import os
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | import time
 8 | from tensorflow.python.client import timeline
 9 | 
10 | PATH = os.path.dirname(__file__)
11 | MODEL_FILENAME = "graph.pb"
12 | CATEGORIES_FILENAME = "categories.json"
13 | INPUT_SIZE = 513
14 | INPUT_TENSOR_NAME = "ImageTensor:0"
15 | OUTPUT_TENSOR_NAME = "SemanticPredictions:0"
16 | 
17 | class Model(object):
18 |     def __init__(self):
19 |         self.graph = tf.Graph()
20 |         self.graph_def = None
21 | 
22 |         color_map_list = []
23 |         with open(os.path.join(PATH, CATEGORIES_FILENAME)) as f:
24 |             self._categories = json.loads(f.read())
25 |             self._color_map = np.array([category["color"] for category in self._categories], dtype = np.uint8)
26 | 
27 |         with open(os.path.join(PATH, MODEL_FILENAME), 'rb') as f:
28 |             self.graph_def = tf.GraphDef.FromString(f.read())
29 | 
30 |         if self.graph_def is None:
31 |             print('Error loading graph')
32 | 
33 |         with self.graph.as_default():
34 |             tf.import_graph_def(self.graph_def, name='')
35 | 
36 |         self.config = tf.ConfigProto()
37 |         self.config.gpu_options.allow_growth = True
38 |         self.session = tf.Session(graph=self.graph, config = self.config)
39 |         self.run_options = tf.RunOptions()
40 |         self.run_metadata = tf.RunMetadata()
41 | 
42 |     @property
43 |     def trace(self):
44 |         return (self.run_options.trace_level > 0)
45 | 
46 |     @trace.setter
47 |     def trace(self, value):
48 |         if value:
49 |             self.run_options.trace_level = tf.RunOptions.FULL_TRACE
50 |         else:
51 |             self.run_options.trace_level = 0
52 | 
53 |     @property
54 |     def color_map(self):
55 |         return self._color_map
56 | 
57 |     @property
58 |     def categories(self):
59 |         return self._categories
60 | 
61 |     def infer(self, images):
62 |         seg_maps = self.session.run(
63 |             OUTPUT_TENSOR_NAME,
64 |             options = self.run_options,
65 |             run_metadata = self.run_metadata,
66 |             feed_dict = { INPUT_TENSOR_NAME: images }
67 |         )
68 | 
69 |         if(self.run_options.trace_level > 0):
70 |             fetched_timeline = timeline.Timeline(self.run_metadata.step_stats)
71 |             chrome_trace = fetched_timeline.generate_chrome_trace_format()
72 |             filename = "/tmp/trace-" + str(time.time()) + ".json"
73 |             with open(filename, "w") as f:
74 |                 f.write(chrome_trace)
75 | 
76 |         return(seg_maps)
77 | 
78 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/mnv2_bdd100k_driveable_513/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/semantic_segmentation/nodes/models/mnv2_bdd100k_driveable_513/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/mnv2_bdd100k_driveable_513/categories.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "color": [ 0, 0, 0 ],
 4 |         "name": "background"
 5 |     },
 6 |     {
 7 |         "color": [ 255, 0, 0 ],
 8 |         "name": "driveable"
 9 |     },
10 |     {
11 |         "color": [ 0, 127, 255 ],
12 |         "name": "driveable_alt"
13 |     }
14 | ]
15 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/mnv2_bdd100k_driveable_513/graph.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/semantic_segmentation/nodes/models/mnv2_bdd100k_driveable_513/graph.pb


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/mnv2_bdd100k_driveable_513/notes.txt:
--------------------------------------------------------------------------------
 1 | BASE_LEARNING_RATE=0.007
 2 | BATCH_SIZE=32
 3 | DATASET="bdd100k_driveable"
 4 | FINE_TUNE_BATCH_NORM="true"
 5 | INIT_CKPT_NAME="mobilenet_v2_cityscapes"
 6 | INIT_LAST_LAYER="false"
 7 | INPUT_SIZE=513
 8 | LEARNING_POWER=0.9
 9 | MAX_SCALE_FACTOR=2.00
10 | MIN_SCALE_FACTOR=0.75
11 | MODEL_VARIANT="mobilenet_v2"
12 | NUM_ITERATIONS=100000
13 | OUTPUT_STRIDE=16
14 | PROB_OF_FLIP = 0.0
15 | TRAIN_SPLIT="train"
16 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/mnv2_coco2017_driving_513/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import json
 4 | import os
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | import time
 8 | from tensorflow.python.client import timeline
 9 | 
10 | PATH = os.path.dirname(__file__)
11 | MODEL_FILENAME = "graph.pb"
12 | CATEGORIES_FILENAME = "categories.json"
13 | INPUT_SIZE = 513
14 | INPUT_TENSOR_NAME = "ImageTensor:0"
15 | OUTPUT_TENSOR_NAME = "SemanticPredictions:0"
16 | 
17 | class Model(object):
18 |     def __init__(self):
19 |         self.graph = tf.Graph()
20 |         self.graph_def = None
21 | 
22 |         color_map_list = []
23 |         with open(os.path.join(PATH, CATEGORIES_FILENAME)) as f:
24 |             self._categories = json.loads(f.read())
25 |             self._color_map = np.array([category["color"] for category in self._categories], dtype = np.uint8)
26 | 
27 |         with open(os.path.join(PATH, MODEL_FILENAME), 'rb') as f:
28 |             self.graph_def = tf.GraphDef.FromString(f.read())
29 | 
30 |         if self.graph_def is None:
31 |             print('Error loading graph')
32 | 
33 |         with self.graph.as_default():
34 |             tf.import_graph_def(self.graph_def, name='')
35 | 
36 |         self.config = tf.ConfigProto()
37 |         self.config.gpu_options.allow_growth = True
38 |         self.session = tf.Session(graph=self.graph, config = self.config)
39 |         self.run_options = tf.RunOptions()
40 |         self.run_metadata = tf.RunMetadata()
41 | 
42 |     @property
43 |     def trace(self):
44 |         return (self.run_options.trace_level > 0)
45 | 
46 |     @trace.setter
47 |     def trace(self, value):
48 |         if value:
49 |             self.run_options.trace_level = tf.RunOptions.FULL_TRACE
50 |         else:
51 |             self.run_options.trace_level = 0
52 | 
53 |     @property
54 |     def color_map(self):
55 |         return self._color_map
56 | 
57 |     @property
58 |     def categories(self):
59 |         return self._categories
60 | 
61 |     def infer(self, images):
62 |         seg_maps = self.session.run(
63 |             OUTPUT_TENSOR_NAME,
64 |             options = self.run_options,
65 |             run_metadata = self.run_metadata,
66 |             feed_dict = { INPUT_TENSOR_NAME: images }
67 |         )
68 | 
69 |         if(self.run_options.trace_level > 0):
70 |             fetched_timeline = timeline.Timeline(self.run_metadata.step_stats)
71 |             chrome_trace = fetched_timeline.generate_chrome_trace_format()
72 |             filename = "/tmp/trace-" + str(time.time()) + ".json"
73 |             with open(filename, "w") as f:
74 |                 f.write(chrome_trace)
75 | 
76 |         return(seg_maps)
77 | 
78 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/mnv2_coco2017_driving_513/categories.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "color": [ 0, 0, 0 ],
 4 |         "name": "background",
 5 |         "sources": {}
 6 |     },
 7 |     {
 8 |         "color": [ 255, 0, 0 ],
 9 |         "name": "person",
10 |         "sources": { "coco2017": [ 1 ] }
11 |     },
12 |     {
13 |         "color": [ 0, 255, 0 ],
14 |         "name": "bicycle",
15 |         "sources": { "coco2017": [ 2 ] }
16 |     },
17 |     {
18 |         "color": [ 0, 127, 255 ],
19 |         "name": "car",
20 |         "sources": { "coco2017": [ 3 ] }
21 |     },
22 |     {
23 |         "color": [ 0, 255, 255 ],
24 |         "name": "motorcycle",
25 |         "sources": { "coco2017": [ 4 ] }
26 |     },
27 |     {
28 |         "color": [ 0, 255, 127 ],
29 |         "name": "bus",
30 |         "sources": { "coco2017": [ 6 ] }
31 |     },
32 |     {
33 |         "color": [ 0, 127, 127 ],
34 |         "name": "train",
35 |         "sources": { "coco2017": [ 7 ] }
36 |     },
37 |     {
38 |         "color": [ 127, 255, 255 ],
39 |         "name": "truck",
40 |         "sources": { "coco2017": [ 8 ] }
41 |     },
42 |     {
43 |         "color": [ 255, 255, 0 ],
44 |         "name": "traffic_light",
45 |         "sources": { "coco2017": [ 10 ] }
46 |     },
47 |     {
48 |         "color": [ 255, 127, 0 ],
49 |         "name": "stop_sign",
50 |         "sources": { "coco2017": [ 13 ] }
51 |     },
52 |     {
53 |         "color": [ 191, 191, 0 ],
54 |         "name": "pavement",
55 |         "sources": { "coco2017": [ 140 ] }
56 |     },
57 |     {
58 |         "color": [ 191, 191, 191 ],
59 |         "name": "railroad",
60 |         "sources": { "coco2017": [ 147 ] }
61 |     },
62 |     {
63 |         "color": [ 127, 0, 127 ],
64 |         "name": "road",
65 |         "sources": { "coco2017": [ 149 ] }
66 |     }
67 | ]
68 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/mnv2_coco2017_driving_513/graph.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/semantic_segmentation/nodes/models/mnv2_coco2017_driving_513/graph.pb


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/mnv2_dm05_voc/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import json
 4 | import os
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | import time
 8 | from tensorflow.python.client import timeline
 9 | from .cv_resize import resize
10 | 
11 | PATH = os.path.dirname(__file__)
12 | MODEL_FILENAME = "graph.pb"
13 | CATEGORIES_FILENAME = "categories.json"
14 | INPUT_SIZE = 513
15 | INPUT_TENSOR_NAME = "ImageTensor:0"
16 | OUTPUT_TENSOR_NAME = "SemanticPredictions:0"
17 | 
18 | class Model(object):
19 |     def __init__(self):
20 |         self.graph = tf.Graph()
21 |         self.graph_def = None
22 | 
23 |         color_map_list = []
24 |         with open(os.path.join(PATH, CATEGORIES_FILENAME)) as f:
25 |             self._categories = json.loads(f.read())
26 |             self._color_map = np.array([category["color"] for category in self._categories], dtype = np.uint8)
27 | 
28 |         with open(os.path.join(PATH, MODEL_FILENAME), 'rb') as f:
29 |             self.graph_def = tf.GraphDef.FromString(f.read())
30 | 
31 |         if self.graph_def is None:
32 |             print('Error loading graph')
33 | 
34 |         with self.graph.as_default():
35 |             tf.import_graph_def(self.graph_def, name='')
36 | 
37 |         self.config = tf.ConfigProto()
38 |         self.config.gpu_options.allow_growth = True
39 |         self.session = tf.Session(graph=self.graph, config = self.config)
40 |         self.run_options = tf.RunOptions()
41 |         self.run_metadata = tf.RunMetadata()
42 | 
43 |     @property
44 |     def trace(self):
45 |         return (self.run_options.trace_level > 0)
46 | 
47 |     @trace.setter
48 |     def trace(self, value):
49 |         if value:
50 |             self.run_options.trace_level = tf.RunOptions.FULL_TRACE
51 |         else:
52 |             self.run_options.trace_level = 0
53 | 
54 |     @property
55 |     def color_map(self):
56 |         return self._color_map
57 | 
58 |     @property
59 |     def categories(self):
60 |         return self._categories
61 | 
62 |     def infer(self, images):
63 |         resized_images = [resize(image, (INPUT_SIZE, INPUT_SIZE)) \
64 |             for image in images]
65 | 
66 |         seg_maps = self.session.run(
67 |             OUTPUT_TENSOR_NAME,
68 |             options = self.run_options,
69 |             run_metadata = self.run_metadata,
70 |             feed_dict = { INPUT_TENSOR_NAME: resized_images }
71 |         )
72 | 
73 |         if(self.run_options.trace_level > 0):
74 |             fetched_timeline = timeline.Timeline(self.run_metadata.step_stats)
75 |             chrome_trace = fetched_timeline.generate_chrome_trace_format()
76 |             filename = "/tmp/trace-" + str(time.time()) + ".json"
77 |             with open(filename, "w") as f:
78 |                 f.write(chrome_trace)
79 | 
80 |         return(seg_maps)
81 | 
82 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/mnv2_dm05_voc/categories.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "color": [ 0, 0, 0 ],
 4 |         "name": "background"
 5 |     },
 6 |     {
 7 |         "color": [ 255, 0, 0 ],
 8 |         "name": "aeroplane"
 9 |     },
10 |     {
11 |         "color": [ 0, 127, 255 ],
12 |         "name": "bicycle"
13 |     },
14 |     {
15 |         "color": [ 255, 127, 0 ],
16 |         "name": "bird"
17 |     },
18 |     {
19 |         "color": [ 255, 0, 127 ],
20 |         "name": "boat"
21 |     },
22 |     {
23 |         "color": [ 127, 0, 255 ],
24 |         "name": "bottle"
25 |     },
26 |     {
27 |         "color": [ 0, 255, 127 ],
28 |         "name": "bus"
29 |     },
30 |     {
31 |         "color": [ 127, 255, 0 ],
32 |         "name": "car"
33 |     },
34 |     {
35 |         "color": [ 0, 255, 0 ],
36 |         "name": "cat"
37 |     },
38 |     {
39 |         "color": [ 0, 0, 255 ],
40 |         "name": "chair"
41 |     },
42 |     {
43 |         "color": [ 255, 0, 255 ],
44 |         "name": "cow"
45 |     },
46 |     {
47 |         "color": [ 255, 255, 0 ],
48 |         "name": "diningtable"
49 |     },
50 |     {
51 |         "color": [ 0, 255, 255 ],
52 |         "name": "dog"
53 |     },
54 |     {
55 |         "color": [ 255, 127, 255 ],
56 |         "name": "horse"
57 |     },
58 |     {
59 |         "color": [ 255, 255, 127 ],
60 |         "name": "motorbike"
61 |     },
62 |     {
63 |         "color": [ 127, 255, 255 ],
64 |         "name": "person"
65 |     },
66 |     {
67 |         "color": [ 127, 63, 0 ],
68 |         "name": "pottedplant"
69 |     },
70 |     {
71 |         "color": [ 255, 255, 255 ],
72 |         "name": "sheep"
73 |     },
74 |     {
75 |         "color": [ 63, 127, 0 ],
76 |         "name": "sofa"
77 |     },
78 |     {
79 |         "color": [ 0, 63, 127 ],
80 |         "name": "train"
81 |     },
82 |     {
83 |         "color": [ 0, 127, 63 ],
84 |         "name": "tvmonitor"
85 |     }
86 | ]
87 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/mnv2_dm05_voc/cv_resize.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Nearest neighbor resize using numpy only.
 5 | Unfortunately the ROS-provided cv2.so is fantastically compiled without python3 bindings so it cannot be used with python3. This allows people who only need nearest neighbor resize to avoid these shenanigans entirely and avoid OpenCV dependency.
 6 | 
 7 | Author: dheera@dheera.net
 8 | """
 9 | 
10 | import numpy as np
11 | 
12 | def resize(img, new_shape):
13 |     """
14 |     Nearest-neighbor resize.
15 |     img = an opencv image
16 |     shape = (width, height) as per OpenCV convention NOT numpy convention
17 |     """
18 | 
19 |     new_width, new_height = new_shape  # cv2 convention
20 |     old_height, old_width = img.shape[0], img.shape[1]  # numpy shenanigans
21 | 
22 |     new_i, new_j = np.meshgrid(
23 |         np.linspace(0, old_width - 1, new_width).astype(np.uint16),
24 |         np.linspace(0, old_height -1, new_height).astype(np.uint16)
25 |     )
26 | 
27 |     return img[new_j, new_i]
28 | 


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/models/mnv2_dm05_voc/graph.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dheera/ros-semantic-segmentation/e11468489bb5cf898bb14517ebd43c4e69565d2a/semantic_segmentation/nodes/models/mnv2_dm05_voc/graph.pb


--------------------------------------------------------------------------------
/semantic_segmentation/nodes/segmentation_node:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import numpy as np
 4 | import rospy
 5 | import cv_bridge
 6 | from sensor_msgs.msg import Image
 7 | 
 8 | def on_image(msg):
 9 |     on_image.last_image = msg
10 | on_image.last_image = None
11 | 
12 | if __name__ == "__main__":
13 |     rospy.init_node('road_segmentation_node')
14 | 
15 |     MODEL = rospy.get_param('~model', 'mnv2_bdd100k_driveable_513')
16 |     TOPIC_IMAGE = rospy.get_param('~topic_image', 'image_raw')
17 |     TOPIC_SEMANTIC = rospy.get_param('~topic_semantic', 'semantic')
18 |     TOPIC_SEMANTIC_COLOR = rospy.get_param('~topic_semantic_color', 'semantic_color')
19 |     RATE = rospy.get_param('~rate', 30.0)
20 | 
21 |     sub_image = rospy.Subscriber(TOPIC_IMAGE, Image, on_image)
22 |     pub_semantic = rospy.Publisher(TOPIC_SEMANTIC, Image, queue_size = 1)
23 |     pub_semantic_color = rospy.Publisher(TOPIC_SEMANTIC_COLOR, Image, queue_size = 1)
24 | 
25 |     rate = rospy.Rate(RATE)
26 | 
27 |     model = getattr(__import__('models', globals(), locals(), fromlist = [MODEL]), MODEL).Model()
28 |     rospy.set_param("semantic_categories", model.categories)
29 | 
30 |     while not rospy.is_shutdown():
31 |         rate.sleep()
32 | 
33 |         if on_image.last_image is None:
34 |             continue
35 | 
36 |         header = on_image.last_image.header
37 |         semantic = model.infer([cv_bridge.imgmsg_to_cv2(on_image.last_image)])[0]
38 | 
39 |         if pub_semantic.get_num_connections() > 0:
40 |             m = cv_bridge.cv2_to_imgmsg(semantic.astype(np.uint8), encoding = 'mono8')
41 |             m.header.stamp.secs = header.stamp.secs
42 |             m.header.stamp.nsecs = header.stamp.nsecs
43 |             pub_semantic.publish(m)
44 | 
45 |         if pub_semantic_color.get_num_connections() > 0:
46 |             m = cv_bridge.cv2_to_imgmsg(model.color_map[semantic.astype(np.uint8)], encoding = 'rgb8')
47 |             m.header.stamp.secs = header.stamp.secs
48 |             m.header.stamp.nsecs = header.stamp.nsecs
49 |             pub_semantic_color.publish(m)
50 | 
51 | 


--------------------------------------------------------------------------------
/semantic_segmentation/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <package>
 3 |   <name>semantic_segmentation</name>
 4 |   <version>0.0.0</version>
 5 |   <description>Semantic segmentation</description>
 6 | 
 7 |   <maintainer email="dheera@dheera.net">dheera</maintainer>
 8 | 
 9 |   <license>BSD</license>
10 | 
11 |   <buildtool_depend>catkin</buildtool_depend>
12 |   <build_depend>rospy</build_depend>
13 |   <build_depend>std_msgs</build_depend>
14 |   <build_depend>sensor_msgs</build_depend>
15 |   <run_depend>rospy</run_depend>
16 |   <run_depend>std_msgs</run_depend>
17 |   <run_depend>sensor_msgs</run_depend>
18 | 
19 |   <export>
20 |   </export>
21 | </package>
22 | 


--------------------------------------------------------------------------------