├── LICENSE
├── README.md
├── checkpoint
├── image
    ├── test_3000.png
    └── test_5000.png
├── input_velodyne.py
├── model_01_deconv.py
├── model_025_deconv_norm.py
├── model_025_deconv_norm_validate.py
├── multiview_2d.py
├── parse_xml.py
├── thesis.txt
├── velodyne_025_deconv_norm_valid40.ckpt.data-00000-of-00001
├── velodyne_025_deconv_norm_valid40.ckpt.index
└── velodyne_025_deconv_norm_valid40.ckpt.meta


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Yuki Tsuji
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # KiTTI Data Processing and 3D CNN for Vehicle Detection
 2 | ## 3D CNN for Vehicle Detection
 3 | <b>3D Fully Convolutional Network using PointCloud data for Vehicle Detection<b>  
 4 | Reference: <b>3D Fully Convolutional Network for Vehicle Detection in Point Cloud<b> 
 5 | <https://arxiv.org/pdf/1611.08069.pdf>  
 6 | 
 7 | Main File is "model_01_deconv.py"  
 8 | Data Loading Function is "input_velodyne.py"  
 9 | 
10 | ### Example (3D Bounding Box:8 Vertices) Not executing NMS
11 | <img src="./image/test_3000.png"/>  
12 | <img src="./image/test_5000.png"/>  
13 | 
14 | ## Requirement
15 | - Python
16 | - tensorflow
17 | - ROS
18 | - Python-PCL (If you will not use pcl, please comment out related code in input_velodyne.py
19 | 
20 | 
21 | ## KiTTI Data Processing
22 | <b>Dataset is KITTI 3D Object Detection Datasets<b>  
23 | <http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d>
24 | - Velodyne PointCloud
25 | - training labels
26 | - Calibration
27 | 


--------------------------------------------------------------------------------
/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "velodyne_10th_try_100.ckpt"
2 | all_model_checkpoint_paths: "velodyne_10th_try_60.ckpt"
3 | all_model_checkpoint_paths: "velodyne_10th_try_70.ckpt"
4 | all_model_checkpoint_paths: "velodyne_10th_try_80.ckpt"
5 | all_model_checkpoint_paths: "velodyne_10th_try_90.ckpt"
6 | all_model_checkpoint_paths: "velodyne_10th_try_100.ckpt"
7 | 


--------------------------------------------------------------------------------
/image/test_3000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukitsuji/3D_CNN_tensorflow/5104ecf3f75a4e51a2170b72244e3f2a8993ce25/image/test_3000.png


--------------------------------------------------------------------------------
/image/test_5000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukitsuji/3D_CNN_tensorflow/5104ecf3f75a4e51a2170b72244e3f2a8993ce25/image/test_5000.png


--------------------------------------------------------------------------------
/input_velodyne.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import sys
  3 | import os
  4 | import rospy
  5 | import numpy as np
  6 | import cv2
  7 | import pcl
  8 | import glob
  9 | import math
 10 | import std_msgs.msg
 11 | import sensor_msgs.point_cloud2 as pc2
 12 | from sensor_msgs.msg import PointCloud2
 13 | from parse_xml import parseXML
 14 | 
 15 | 
 16 | def load_pc_from_pcd(pcd_path):
 17 |     """Load PointCloud data from pcd file."""
 18 |     p = pcl.load(pcd_path)
 19 |     return np.array(list(p), dtype=np.float32)
 20 | 
 21 | def load_pc_from_bin(bin_path):
 22 |     """Load PointCloud data from pcd file."""
 23 |     obj = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 4)
 24 |     return obj
 25 | 
 26 | def read_label_from_txt(label_path):
 27 |     """Read label from txt file."""
 28 |     text = np.fromfile(label_path)
 29 |     bounding_box = []
 30 |     with open(label_path, "r") as f:
 31 |         labels = f.read().split("\n")
 32 |         for label in labels:
 33 |             if not label:
 34 |                 continue
 35 |             label = label.split(" ")
 36 |             if (label[0] == "DontCare"):
 37 |                 continue
 38 | 
 39 |             if label[0] == ("Car" or "Van"): #  or "Truck"
 40 |                 bounding_box.append(label[8:15])
 41 | 
 42 |     if bounding_box:
 43 |         data = np.array(bounding_box, dtype=np.float32)
 44 |         return data[:, 3:6], data[:, :3], data[:, 6]
 45 |     else:
 46 |         return None, None, None
 47 | 
 48 | def read_label_from_xml(label_path):
 49 |     """Read label from xml file.
 50 | 
 51 |     # Returns:
 52 |         label_dic (dictionary): labels for one sequence.
 53 |         size (list): Bounding Box Size. [l, w. h]?
 54 |     """
 55 |     labels = parseXML(label_path)
 56 |     label_dic = {}
 57 |     for label in labels:
 58 |         first_frame = label.firstFrame
 59 |         nframes = label.nFrames
 60 |         size = label.size
 61 |         obj_type = label.objectType
 62 |         for index, place, rotate in zip(range(first_frame, first_frame+nframes), label.trans, label.rots):
 63 |             if index in label_dic.keys():
 64 |                 label_dic[index]["place"] = np.vstack((label_dic[index]["place"], place))
 65 |                 label_dic[index]["size"] = np.vstack((label_dic[index]["size"], np.array(size)))
 66 |                 label_dic[index]["rotate"] = np.vstack((label_dic[index]["rotate"], rotate))
 67 |             else:
 68 |                 label_dic[index] = {}
 69 |                 label_dic[index]["place"] = place
 70 |                 label_dic[index]["rotate"] = rotate
 71 |                 label_dic[index]["size"] = np.array(size)
 72 |     return label_dic, size
 73 | 
 74 | def read_calib_file(calib_path):
 75 |     """Read a calibration file."""
 76 |     data = {}
 77 |     with open(calib_path, 'r') as f:
 78 |         for line in f.readlines():
 79 |             if not line or line == "\n":
 80 |                 continue
 81 |             key, value = line.split(':', 1)
 82 |             try:
 83 |                 data[key] = np.array([float(x) for x in value.split()])
 84 |             except ValueError:
 85 |                 pass
 86 |     return data
 87 | 
 88 | def proj_to_velo(calib_data):
 89 |     """Projection matrix to 3D axis for 3D Label"""
 90 |     rect = calib_data["R0_rect"].reshape(3, 3)
 91 |     velo_to_cam = calib_data["Tr_velo_to_cam"].reshape(3, 4)
 92 |     inv_rect = np.linalg.inv(rect)
 93 |     inv_velo_to_cam = np.linalg.pinv(velo_to_cam[:, :3])
 94 |     return np.dot(inv_velo_to_cam, inv_rect)
 95 | 
 96 | 
 97 | def filter_camera_angle(places):
 98 |     """Filter camera angles for KiTTI Datasets"""
 99 |     bool_in = np.logical_and((places[:, 1] < places[:, 0] - 0.27), (-places[:, 1] < places[:, 0] - 0.27))
100 |     # bool_in = np.logical_and((places[:, 1] < places[:, 0]), (-places[:, 1] < places[:, 0]))
101 |     return places[bool_in]
102 | 
103 | def create_publish_obj(obj, places, rotates, size):
104 |     """Create object of correct data for publisher"""
105 |     for place, rotate, sz in zip(places, rotates, size):
106 |         x, y, z = place
107 |         obj.append((x, y, z))
108 |         h, w, l = sz
109 |         if l > 10:
110 |             continue
111 |         for hei in range(0, int(h*100)):
112 |             for wid in range(0, int(w*100)):
113 |                 for le in range(0, int(l*100)):
114 |                     a = (x - l / 2.) + le / 100.
115 |                     b = (y - w / 2.) + wid / 100.
116 |                     c = (z) + hei / 100.
117 |                     obj.append((a, b, c))
118 |     return obj
119 | 
120 | def get_boxcorners(places, rotates, size):
121 |     """Create 8 corners of bounding box from bottom center."""
122 |     corners = []
123 |     for place, rotate, sz in zip(places, rotates, size):
124 |         x, y, z = place
125 |         h, w, l = sz
126 |         if l > 10:
127 |             continue
128 | 
129 |         corner = np.array([
130 |             [x - l / 2., y - w / 2., z],
131 |             [x + l / 2., y - w / 2., z],
132 |             [x - l / 2., y + w / 2., z],
133 |             [x - l / 2., y - w / 2., z + h],
134 |             [x - l / 2., y + w / 2., z + h],
135 |             [x + l / 2., y + w / 2., z],
136 |             [x + l / 2., y - w / 2., z + h],
137 |             [x + l / 2., y + w / 2., z + h],
138 |         ])
139 | 
140 |         corner -= np.array([x, y, z])
141 | 
142 |         rotate_matrix = np.array([
143 |             [np.cos(rotate), -np.sin(rotate), 0],
144 |             [np.sin(rotate), np.cos(rotate), 0],
145 |             [0, 0, 1]
146 |         ])
147 | 
148 |         a = np.dot(corner, rotate_matrix.transpose())
149 |         a += np.array([x, y, z])
150 |         corners.append(a)
151 |     return np.array(corners)
152 | 
153 | def publish_pc2(pc, obj):
154 |     """Publisher of PointCloud data"""
155 |     pub = rospy.Publisher("/points_raw", PointCloud2, queue_size=1000000)
156 |     rospy.init_node("pc2_publisher")
157 |     header = std_msgs.msg.Header()
158 |     header.stamp = rospy.Time.now()
159 |     header.frame_id = "velodyne"
160 |     points = pc2.create_cloud_xyz32(header, pc[:, :3])
161 | 
162 |     pub2 = rospy.Publisher("/points_raw1", PointCloud2, queue_size=1000000)
163 |     header = std_msgs.msg.Header()
164 |     header.stamp = rospy.Time.now()
165 |     header.frame_id = "velodyne"
166 |     points2 = pc2.create_cloud_xyz32(header, obj)
167 | 
168 |     r = rospy.Rate(0.1)
169 |     while not rospy.is_shutdown():
170 |         pub.publish(points)
171 |         pub2.publish(points2)
172 |         r.sleep()
173 | 
174 | def raw_to_voxel(pc, resolution=0.50, x=(0, 90), y=(-50, 50), z=(-4.5, 5.5)):
175 |     """Convert PointCloud2 to Voxel"""
176 |     logic_x = np.logical_and(pc[:, 0] >= x[0], pc[:, 0] < x[1])
177 |     logic_y = np.logical_and(pc[:, 1] >= y[0], pc[:, 1] < y[1])
178 |     logic_z = np.logical_and(pc[:, 2] >= z[0], pc[:, 2] < z[1])
179 |     pc = pc[:, :3][np.logical_and(logic_x, np.logical_and(logic_y, logic_z))]
180 |     pc =((pc - np.array([x[0], y[0], z[0]])) / resolution).astype(np.int32)
181 |     voxel = np.zeros((int((x[1] - x[0]) / resolution), int((y[1] - y[0]) / resolution), int(round((z[1]-z[0]) / resolution))))
182 |     voxel[pc[:, 0], pc[:, 1], pc[:, 2]] = 1
183 |     return voxel
184 | 
185 | def center_to_sphere(places, size, resolution=0.50, min_value=np.array([0., -50., -4.5]), scale=4, x=(0, 90), y=(-50, 50), z=(-4.5, 5.5)):
186 |     """Convert object label to Training label for objectness loss"""
187 |     x_logical = np.logical_and((places[:, 0] < x[1]), (places[:, 0] >= x[0]))
188 |     y_logical = np.logical_and((places[:, 1] < y[1]), (places[:, 1] >= y[0]))
189 |     z_logical = np.logical_and((places[:, 2] < z[1]), (places[:, 2] >= z[0]))
190 |     xyz_logical = np.logical_and(x_logical, np.logical_and(y_logical, z_logical))
191 |     center = places.copy()
192 |     center[:, 2] = center[:, 2] + size[:, 0] / 2.
193 |     sphere_center = ((center[xyz_logical] - min_value) / (resolution * scale)).astype(np.int32)
194 |     return sphere_center
195 | 
196 | def sphere_to_center(p_sphere, resolution=0.5, scale=4, min_value=np.array([0., -50., -4.5])):
197 |     """from sphere center to label center"""
198 |     center = p_sphere * (resolution*scale) + min_value
199 |     return center
200 | 
201 | def voxel_to_corner(corner_vox, resolution, center):#TODO
202 |     """Create 3D corner from voxel and the diff to corner"""
203 |     corners = center + corner_vox
204 |     return corners
205 | 
206 | def read_labels(label_path, label_type, calib_path=None, is_velo_cam=False, proj_velo=None):
207 |     """Read labels from xml or txt file.
208 |     Original Label value is shifted about 0.27m from object center.
209 |     So need to revise the position of objects.
210 |     """
211 |     if label_type == "txt": #TODO
212 |         places, size, rotates = read_label_from_txt(label_path)
213 |         if places is None:
214 |             return None, None, None
215 |         rotates = np.pi / 2 - rotates
216 |         dummy = np.zeros_like(places)
217 |         dummy = places.copy()
218 |         if calib_path:
219 |             places = np.dot(dummy, proj_velo.transpose())[:, :3]
220 |         else:
221 |             places = dummy
222 |         if is_velo_cam:
223 |             places[:, 0] += 0.27
224 | 
225 |     elif label_type == "xml":
226 |         bounding_boxes, size = read_label_from_xml(label_path)
227 |         places = bounding_boxes[30]["place"]
228 |         rotates = bounding_boxes[30]["rotate"][:, 2]
229 |         size = bounding_boxes[30]["size"]
230 | 
231 |     return places, rotates, size
232 | 
233 | def create_label(places, size, corners, resolution=0.50, x=(0, 90), y=(-50, 50), z=(-4.5, 5.5), scale=4, min_value=np.array([0., -50., -4.5])):
234 |     """Create training Labels which satisfy the range of experiment"""
235 |     x_logical = np.logical_and((places[:, 0] < x[1]), (places[:, 0] >= x[0]))
236 |     y_logical = np.logical_and((places[:, 1] < y[1]), (places[:, 1] >= y[0]))
237 |     z_logical = np.logical_and((places[:, 2] + size[:, 0]/2. < z[1]), (places[:, 2] + size[:, 0]/2. >= z[0]))
238 |     xyz_logical = np.logical_and(x_logical, np.logical_and(y_logical, z_logical))
239 | 
240 |     center = places.copy()
241 |     center[:, 2] = center[:, 2] + size[:, 0] / 2. # Move bottom to center
242 |     sphere_center = ((center[xyz_logical] - min_value) / (resolution * scale)).astype(np.int32)
243 | 
244 |     train_corners = corners[xyz_logical].copy()
245 |     anchor_center = sphere_to_center(sphere_center, resolution=resolution, scale=scale, min_value=min_value) #sphere to center
246 |     for index, (corner, center) in enumerate(zip(corners[xyz_logical], anchor_center)):
247 |         train_corners[index] = corner - center
248 |     return sphere_center, train_corners
249 | 
250 | def corner_to_train(corners, sphere_center, resolution=0.50, x=(0, 90), y=(-50, 50), z=(-4.5, 5.5), scale=4, min_value=np.array([0., -50., -4.5])):
251 |     """Convert corner to Training label for regression loss"""
252 |     x_logical = np.logical_and((corners[:, :, 0] < x[1]), (corners[:, :, 0] >= x[0]))
253 |     y_logical = np.logical_and((corners[:, :, 1] < y[1]), (corners[:, :, 1] >= y[0]))
254 |     z_logical = np.logical_and((corners[:, :, 2] < z[1]), (corners[:, :, 2] >= z[0]))
255 |     xyz_logical = np.logical_and(x_logical, np.logical_and(y_logical, z_logical)).all(axis=1)
256 |     train_corners = corners[xyz_logical].copy()
257 |     sphere_center = sphere_to_center(sphere_center, resolution=resolution, scale=scale, min_value=min_value) #sphere to center
258 |     for index, (corner, center) in enumerate(zip(corners[xyz_logical], sphere_center)):
259 |         train_corners[index] = corner - center
260 |     return train_corners
261 | 
262 | def corner_to_voxel(voxel_shape, corners, sphere_center, scale=4):
263 |     """Create final regression label from corner"""
264 |     corner_voxel = np.zeros((voxel_shape[0] / scale, voxel_shape[1] / scale, voxel_shape[2] / scale, 24))
265 |     corner_voxel[sphere_center[:, 0], sphere_center[:, 1], sphere_center[:, 2]] = corners
266 |     return corner_voxel
267 | 
268 | def create_objectness_label(sphere_center, resolution=0.5, x=90, y=100, z=10, scale=4):
269 |     """Create Objectness label"""
270 |     obj_maps = np.zeros((int(x / (resolution * scale)), int(y / (resolution * scale)), int(round(z / (resolution * scale)))))
271 |     obj_maps[sphere_center[:, 0], sphere_center[:, 1], sphere_center[:, 2]] = 1
272 |     return obj_maps
273 | 
274 | def process(velodyne_path, label_path=None, calib_path=None, dataformat="pcd", label_type="txt", is_velo_cam=False):
275 |     p = []
276 |     pc = None
277 |     bounding_boxes = None
278 |     places = None
279 |     rotates = None
280 |     size = None
281 |     proj_velo = None
282 | 
283 |     if dataformat == "bin":
284 |         pc = load_pc_from_bin(velodyne_path)
285 |     elif dataformat == "pcd":
286 |         pc = load_pc_from_pcd(velodyne_path)
287 | 
288 |     if calib_path:
289 |         calib = read_calib_file(calib_path)
290 |         proj_velo = proj_to_velo(calib)[:, :3]
291 | 
292 |     if label_path:
293 |         places, rotates, size = read_labels(label_path, label_type, calib_path=calib_path, is_velo_cam=is_velo_cam, proj_velo=proj_velo)
294 | 
295 |     corners = get_boxcorners(places, rotates, size)
296 |     print("################", len(pc))
297 |     pc = filter_camera_angle(pc)
298 |     # obj = []
299 |     # obj = create_publish_obj(obj, places, rotates, size)
300 | 
301 |     p.append((0, 0, 0))
302 |     p.append((0, 0, -1))
303 |     print pc.shape
304 |     print 1
305 |     # publish_pc2(pc, obj)
306 |     a = center_to_sphere(places, size, resolution=0.25)
307 |     print places
308 |     print a
309 |     print sphere_to_center(a, resolution=0.25)
310 |     bbox = sphere_to_center(a, resolution=0.25)
311 |     print corners.shape
312 |     # publish_pc2(pc, bbox.reshape(-1, 3))
313 |     publish_pc2(pc, corners.reshape(-1, 3))
314 | 
315 | if __name__ == "__main__":
316 |     # pcd_path = "../data/training/velodyne/000012.pcd"
317 |     # label_path = "../data/training/label_2/000012.txt"
318 |     # calib_path = "../data/training/calib/000012.txt"
319 |     # process(pcd_path, label_path, calib_path=calib_path, dataformat="pcd")
320 | 
321 |     # bin_path = "../data/2011_09_26/2011_09_26_drive_0001_sync/velodyne_points/data/0000000030.bin"
322 |     # xml_path = "../data/2011_09_26/2011_09_26_drive_0001_sync/tracklet_labels.xml"
323 |     # process(bin_path, xml_path, dataformat="bin", label_type="xml")
324 | 
325 |     pcd_path = "/home/katou01/download/training/velodyne/000410.bin"
326 |     label_path = "/home/katou01/download/training/label_2/000410.txt"
327 |     calib_path = "/home/katou01/download/training/calib/000410.txt"
328 |     process(pcd_path, label_path, calib_path=calib_path, dataformat="bin", is_velo_cam=True)
329 | 


--------------------------------------------------------------------------------
/model_01_deconv.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import sys
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from input_velodyne import *
  6 | import glob
  7 | 
  8 | def batch_norm(inputs, phase_train, decay=0.9, eps=1e-5):
  9 |     """Batch Normalization
 10 | 
 11 |        Args:
 12 |            inputs: input data(Batch size) from last layer
 13 |            phase_train: when you test, please set phase_train "None"
 14 |        Returns:
 15 |            output for next layer
 16 |     """
 17 |     gamma = tf.get_variable("gamma", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
 18 |     beta = tf.get_variable("beta", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
 19 |     pop_mean = tf.get_variable("pop_mean", trainable=False, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
 20 |     pop_var = tf.get_variable("pop_var", trainable=False, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
 21 |     axes = range(len(inputs.get_shape()) - 1)
 22 | 
 23 |     if phase_train != None:
 24 |         batch_mean, batch_var = tf.nn.moments(inputs, axes)
 25 |         train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean*(1 - decay))
 26 |         train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
 27 |         with tf.control_dependencies([train_mean, train_var]):
 28 |             return tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta, gamma, eps)
 29 |     else:
 30 |         return tf.nn.batch_normalization(inputs, pop_mean, pop_var, beta, gamma, eps)
 31 | 
 32 | def conv3DLayer(input_layer, input_dim, output_dim, height, width, length, stride, activation=tf.nn.relu, padding="SAME", name="", is_training=True):
 33 |     with tf.variable_scope("conv3D" + name):
 34 |         kernel = tf.get_variable("weights", shape=[length, height, width, input_dim, output_dim], \
 35 |             dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01))
 36 |         b = tf.get_variable("bias", shape=[output_dim], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
 37 |         conv = tf.nn.conv3d(input_layer, kernel, stride, padding=padding)
 38 |         bias = tf.nn.bias_add(conv, b)
 39 |         if activation:
 40 |             bias = activation(bias, name="activation")
 41 |         bias = batch_norm(bias, is_training)
 42 |     return bias
 43 | 
 44 | def conv3D_to_output(input_layer, input_dim, output_dim, height, width, length, stride, activation=tf.nn.relu, padding="SAME", name=""):
 45 |     with tf.variable_scope("conv3D" + name):
 46 |         kernel = tf.get_variable("weights", shape=[length, height, width, input_dim, output_dim], \
 47 |             dtype=tf.float32, initializer=tf.constant_initializer(0.01))
 48 |         conv = tf.nn.conv3d(input_layer, kernel, stride, padding=padding)
 49 |     return conv
 50 | 
 51 | def deconv3D_to_output(input_layer, input_dim, output_dim, height, width, length, stride, output_shape, activation=tf.nn.relu, padding="SAME", name=""):
 52 |     with tf.variable_scope("deconv3D"+name):
 53 |         kernel = tf.get_variable("weights", shape=[length, height, width, output_dim, input_dim], \
 54 |             dtype=tf.float32, initializer=tf.constant_initializer(0.01))
 55 |         deconv = tf.nn.conv3d_transpose(input_layer, kernel, output_shape, stride, padding="SAME")
 56 |     return deconv
 57 | 
 58 | def fully_connected(input_layer, shape, name="", is_training=True):
 59 |     with tf.variable_scope("fully" + name):
 60 |         kernel = tf.get_variable("weights", shape=shape, \
 61 |             dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01))
 62 |         fully = tf.matmul(input_layer, kernel)
 63 |         fully = tf.nn.relu(fully)
 64 |         fully = batch_norm(fully, is_training)
 65 |         return fully
 66 | 
 67 | class BNBLayer(object):
 68 |     def __init__(self):
 69 |         pass
 70 | 
 71 |     def build_graph(self, voxel, activation=tf.nn.relu, is_training=True):
 72 |         self.layer1 = conv3DLayer(voxel, 1, 16, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, is_training=is_training)
 73 |         self.layer2 = conv3DLayer(self.layer1, 16, 32, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, is_training=is_training)
 74 |         self.layer3 = conv3DLayer(self.layer2, 32, 64, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, is_training=is_training)
 75 |         self.layer4 = conv3DLayer(self.layer3, 64, 64, 3, 3, 3, [1, 1, 1, 1, 1], name="layer4", activation=activation, is_training=is_training)
 76 |         # base_shape = self.layer3.get_shape().as_list()
 77 |         # obj_output_shape = [tf.shape(self.layer4)[0], base_shape[1], base_shape[2], base_shape[3], 2]
 78 |         # cord_output_shape = [tf.shape(self.layer4)[0], base_shape[1], base_shape[2], base_shape[3], 24]
 79 |         self.objectness = conv3D_to_output(self.layer4, 64, 2, 3, 3, 3, [1, 1, 1, 1, 1], name="objectness", activation=None)
 80 |         self.cordinate = conv3D_to_output(self.layer4, 64, 24, 3, 3, 3, [1, 1, 1, 1, 1], name="cordinate", activation=None)
 81 |         # self.objectness = deconv3D_to_output(self.layer4, 32, 2, 3, 3, 3, [1, 2, 2, 2, 1], obj_output_shape, name="objectness", activation=None)
 82 |         # self.cordinate = deconv3D_to_output(self.layer4, 32, 24, 3, 3, 3, [1, 2, 2, 2, 1], cord_output_shape, name="cordinate", activation=None)
 83 |         self.y = tf.nn.softmax(self.objectness, dim=-1)
 84 |     # #original
 85 |     # def build_graph(self, voxel, activation=tf.nn.relu, is_training=True):
 86 |     #     self.layer1 = conv3DLayer(voxel, 1, 10, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, is_training=is_training)
 87 |     #     self.layer2 = conv3DLayer(self.layer1, 10, 20, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, is_training=is_training)
 88 |     #     self.layer3 = conv3DLayer(self.layer2, 20, 30, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, is_training=is_training)
 89 |     #     base_shape = self.layer2.get_shape().as_list()
 90 |     #     obj_output_shape = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 2]
 91 |     #     cord_output_shape = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 24]
 92 |     #     self.objectness = deconv3D_to_output(self.layer3, 30, 2, 3, 3, 3, [1, 2, 2, 2, 1], obj_output_shape, name="objectness", activation=None)
 93 |     #     self.cordinate = deconv3D_to_output(self.layer3, 30, 24, 3, 3, 3, [1, 2, 2, 2, 1], cord_output_shape, name="cordinate", activation=None)
 94 |     #     self.y = tf.nn.softmax(self.objectness, dim=-1)
 95 | 
 96 | def ssd_model(sess, voxel_shape=(300, 300, 300),activation=tf.nn.relu, is_training=True):
 97 |     voxel = tf.placeholder(tf.float32, [None, voxel_shape[0], voxel_shape[1], voxel_shape[2], 1])
 98 |     phase_train = tf.placeholder(tf.bool, name='phase_train') if is_training else None
 99 |     with tf.variable_scope("3D_CNN_model") as scope:
100 |         bnb_model = BNBLayer()
101 |         bnb_model.build_graph(voxel, activation=activation, is_training=phase_train)
102 | 
103 |     if is_training:
104 |         initialized_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="3D_CNN_model")
105 |         sess.run(tf.variables_initializer(initialized_var))
106 |     return bnb_model, voxel, phase_train
107 | 
108 | def loss_func(model):
109 |     g_map = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list()[:4])
110 |     g_cord = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list())
111 |     object_loss = tf.multiply(g_map, model.objectness[:, :, :, :, 0])
112 |     non_gmap = tf.subtract(tf.ones_like(g_map, dtype=tf.float32), g_map)
113 |     nonobject_loss = tf.multiply(non_gmap, model.objectness[:, :, :, :, 1])
114 |     # sum_object_loss = tf.add(tf.exp(object_loss), tf.exp(nonobject_loss))
115 |     sum_object_loss = tf.exp(-tf.add(object_loss, nonobject_loss))
116 |     # sum_object_loss = tf.exp(-nonobject_loss)
117 |     bunbo = tf.add(tf.exp(-model.objectness[:, :, :, :, 0]), tf.exp(-model.objectness[:, :, :, :, 1]))
118 |     obj_loss = 0.005 * tf.reduce_sum(-tf.log(tf.div(sum_object_loss, bunbo)))
119 | 
120 |     cord_diff = tf.multiply(g_map, tf.reduce_sum(tf.square(tf.subtract(model.cordinate, g_cord)), 4))
121 |     cord_loss = tf.reduce_sum(cord_diff)
122 |     return obj_loss, obj_loss, cord_loss, g_map, g_cord
123 | 
124 | def loss_func2(model):
125 |     g_map = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list()[:4])
126 |     obj_loss = tf.reduce_sum(tf.square(tf.subtract(model.objectness[:, :, :, :, 0], g_map)))
127 | 
128 |     g_cord = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list())
129 |     cord_diff = tf.multiply(g_map, tf.reduce_sum(tf.square(tf.subtract(model.cordinate, g_cord)), 4))
130 |     cord_loss = tf.reduce_sum(cord_diff) * 0.1
131 |     return tf.add(obj_loss, cord_loss), g_map, g_cord
132 | 
133 | def loss_func3(model):
134 |     g_map = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list()[:4])
135 |     g_cord = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list())
136 |     non_gmap = tf.subtract(tf.ones_like(g_map, dtype=tf.float32), g_map)
137 | 
138 |     elosion = 0.00001
139 |     y = model.y
140 |     is_obj_loss = -tf.reduce_sum(tf.multiply(g_map,  tf.log(y[:, :, :, :, 0] + elosion)))
141 |     non_obj_loss = tf.multiply(-tf.reduce_sum(tf.multiply(non_gmap, tf.log(y[:, :, :, :, 1] + elosion))), 0.0008)
142 |     cross_entropy = tf.add(is_obj_loss, non_obj_loss)
143 |     obj_loss = cross_entropy
144 | 
145 |     g_cord = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list())
146 |     cord_diff = tf.multiply(g_map, tf.reduce_sum(tf.square(tf.subtract(model.cordinate, g_cord)), 4))
147 |     cord_loss = tf.multiply(tf.reduce_sum(cord_diff), 0.02)
148 |     return tf.add(obj_loss, cord_loss), obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_map, g_cord, y
149 | 
150 | def create_optimizer(all_loss, lr=0.001):
151 |     opt = tf.train.AdamOptimizer(lr)
152 |     optimizer = opt.minimize(all_loss)
153 |     return optimizer
154 | 
155 | def train(batch_num, velodyne_path, label_path=None, calib_path=None, resolution=0.2, \
156 |         dataformat="pcd", label_type="txt", is_velo_cam=False, scale=4, lr=0.01, \
157 |         voxel_shape=(800, 800, 40), x=(0, 80), y=(-40, 40), z=(-2.5, 1.5), epoch=101):
158 |     # tf Graph input
159 |     batch_size = batch_num
160 |     training_epochs = epoch
161 | 
162 |     with tf.Session() as sess:
163 |         model, voxel, phase_train = ssd_model(sess, voxel_shape=voxel_shape, activation=tf.nn.relu, is_training=True)
164 |         saver = tf.train.Saver()
165 |         total_loss, obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_map, g_cord, y_pred = loss_func3(model)
166 |         optimizer = create_optimizer(total_loss, lr=lr)
167 |         init = tf.global_variables_initializer()
168 |         sess.run(init)
169 | 
170 |         for epoch in range(training_epochs):
171 |             for (batch_x, batch_g_map, batch_g_cord) in lidar_generator(batch_num, velodyne_path, label_path=label_path, \
172 |                calib_path=calib_path,resolution=resolution, dataformat=dataformat, label_type=label_type, is_velo_cam=is_velo_cam, \
173 |                scale=scale, x=x, y=y, z=z):
174 |                 # print batch_x.shape, batch_g_map.shape, batch_g_cord.shape, batch_num
175 |                 # print batch_x.shape
176 |                 # print batch_g_map.shape
177 |                 # print batch_g_cord.shape
178 |                 sess.run(optimizer, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
179 |                 # ct = sess.run(total_loss, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
180 |                 # co = sess.run(obj_loss, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
181 |                 cc = sess.run(cord_loss, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
182 |                 iol = sess.run(is_obj_loss, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
183 |                 nol = sess.run(non_obj_loss, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
184 |                 # print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(ct))
185 |                 # print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(co))
186 |                 print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(cc))
187 |                 print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(iol))
188 |                 print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(nol))
189 |             if (epoch != 0) and (epoch % 10 == 0):
190 |                 print "Save epoch " + str(epoch)
191 |                 saver.save(sess, "velodyne_025_deconv_norm_valid" + str(epoch) + ".ckpt")
192 |         print("Optimization Finished!")
193 | 
194 | def train_test(batch_num, velodyne_path, label_path=None, calib_path=None, resolution=0.2, dataformat="pcd", label_type="txt", is_velo_cam=False, \
195 |              scale=4, voxel_shape=(800, 800, 40), x=(0, 80), y=(-40, 40), z=(-2.5, 1.5)):
196 |     batch_size = batch_num
197 |     p = []
198 |     pc = None
199 |     bounding_boxes = None
200 |     places = None
201 |     rotates = None
202 |     size = None
203 |     proj_velo = None
204 | 
205 |     if dataformat == "bin":
206 |         pc = load_pc_from_bin(velodyne_path)
207 |     elif dataformat == "pcd":
208 |         pc = load_pc_from_pcd(velodyne_path)
209 | 
210 |     if calib_path:
211 |         calib = read_calib_file(calib_path)
212 |         proj_velo = proj_to_velo(calib)[:, :3]
213 | 
214 |     if label_path:
215 |         places, rotates, size = read_labels(label_path, label_type, calib_path=calib_path, is_velo_cam=is_velo_cam, proj_velo=proj_velo)
216 | 
217 |     corners = get_boxcorners(places, rotates, size)
218 |     filter_car_data(corners)
219 |     pc = filter_camera_angle(pc)
220 | 
221 |     voxel =  raw_to_voxel(pc, resolution=resolution, x=x, y=y, z=z)
222 |     center_sphere = center_to_sphere(places, size, resolution=resolution)
223 |     corner_label = corner_to_train(corners, center_sphere, resolution=resolution)
224 |     g_map = create_objectness_label(center_sphere, resolution=resolution)
225 |     g_cord = corner_label.reshape(corner_label.shape[0], -1)
226 | 
227 |     voxel_x = voxel.reshape(1, voxel.shape[0], voxel.shape[1], voxel.shape[2], 1)
228 | 
229 |     with tf.Session() as sess:
230 |         is_training=None
231 |         model, voxel, phase_train = ssd_model(sess, voxel_shape=voxel_shape, activation=tf.nn.relu, is_training=is_training)
232 |         saver = tf.train.Saver()
233 |         new_saver = tf.train.import_meta_graph("velodyne_025_deconv_norm_valid40.ckpt.meta")
234 |         last_model = "./velodyne_025_deconv_norm_valid40.ckpt"
235 |         saver.restore(sess, last_model)
236 | 
237 |         objectness = model.objectness
238 |         cordinate = model.cordinate
239 |         y_pred = model.y
240 |         objectness = sess.run(objectness, feed_dict={voxel: voxel_x})[0, :, :, :, 0]
241 |         cordinate = sess.run(cordinate, feed_dict={voxel: voxel_x})[0]
242 |         y_pred = sess.run(y_pred, feed_dict={voxel: voxel_x})[0, :, :, :, 0]
243 |         print objectness.shape, objectness.max(), objectness.min()
244 |         print y_pred.shape, y_pred.max(), y_pred.min()
245 | 
246 |         # print np.where(objectness >= 0.55)
247 |         index = np.where(y_pred >= 0.995)
248 |         print np.vstack((index[0], np.vstack((index[1], index[2])))).transpose()
249 |         print np.vstack((index[0], np.vstack((index[1], index[2])))).transpose().shape
250 | 
251 |         a = center_to_sphere(places, size, resolution=resolution, x=x, y=y, z=z, \
252 |             scale=scale, min_value=np.array([x[0], y[0], z[0]]))
253 |         label_center = sphere_to_center(a, resolution=resolution, \
254 |             scale=scale, min_value=np.array([x[0], y[0], z[0]]))
255 |         label_corners = get_boxcorners(label_center, rotates, size)
256 |         print a[a[:, 0].argsort()]
257 |         # center = np.array([20, 57, 3])
258 |         #
259 |         # pred_center = sphere_to_center(center, resolution=resolution)
260 |         # print pred_center
261 |         # print cordinate.shape
262 |         # corners = cordinate[center[0], center[1], center[2]].reshape(-1, 3)
263 |         centers = np.vstack((index[0], np.vstack((index[1], index[2])))).transpose()
264 |         centers = sphere_to_center(centers, resolution=resolution, \
265 |             scale=scale, min_value=np.array([x[0], y[0], z[0]]))
266 |         corners = cordinate[index].reshape(-1, 8, 3) + centers[:, np.newaxis]
267 |         print corners.shape
268 |         print voxel.shape
269 |         # publish_pc2(pc, corners.reshape(-1, 3))
270 |         publish_pc2(pc, corners.reshape(-1, 3))
271 |         # pred_corners = corners + pred_center
272 |         # print pred_corners
273 | 
274 | def test(batch_num, velodyne_path, label_path=None, calib_path=None, resolution=0.2, dataformat="pcd", label_type="txt", is_velo_cam=False, \
275 |              scale=4, voxel_shape=(800, 800, 40), x=(0, 80), y=(-40, 40), z=(-2.5, 1.5)):
276 |     batch_size = batch_num
277 |     p = []
278 |     pc = None
279 |     bounding_boxes = None
280 |     places = None
281 |     rotates = None
282 |     size = None
283 |     proj_velo = None
284 | 
285 |     if dataformat == "bin":
286 |         pc = load_pc_from_bin(velodyne_path)
287 |     elif dataformat == "pcd":
288 |         pc = load_pc_from_pcd(velodyne_path)
289 | 
290 |     pc = filter_camera_angle(pc)
291 |     voxel =  raw_to_voxel(pc, resolution=resolution, x=x, y=y, z=z)
292 |     voxel_x = voxel.reshape(1, voxel.shape[0], voxel.shape[1], voxel.shape[2], 1)
293 | 
294 |     with tf.Session() as sess:
295 |         is_training=None
296 |         model, voxel, phase_train = ssd_model(sess, voxel_shape=voxel_shape, activation=tf.nn.relu, is_training=is_training)
297 |         saver = tf.train.Saver()
298 |         new_saver = tf.train.import_meta_graph("velodyne_025_deconv_norm_valid40.ckpt.meta")
299 |         last_model = "./velodyne_025_deconv_norm_valid40.ckpt"
300 |         saver.restore(sess, last_model)
301 | 
302 |         objectness = model.objectness
303 |         cordinate = model.cordinate
304 |         y_pred = model.y
305 |         objectness = sess.run(objectness, feed_dict={voxel: voxel_x})[0, :, :, :, 0]
306 |         cordinate = sess.run(cordinate, feed_dict={voxel: voxel_x})[0]
307 |         y_pred = sess.run(y_pred, feed_dict={voxel: voxel_x})[0, :, :, :, 0]
308 |         print objectness.shape, objectness.max(), objectness.min()
309 |         print y_pred.shape, y_pred.max(), y_pred.min()
310 | 
311 |         index = np.where(y_pred >= 0.995)
312 |         print np.vstack((index[0], np.vstack((index[1], index[2])))).transpose()
313 |         print np.vstack((index[0], np.vstack((index[1], index[2])))).transpose().shape
314 | 
315 |         centers = np.vstack((index[0], np.vstack((index[1], index[2])))).transpose()
316 |         centers = sphere_to_center(centers, resolution=resolution, \
317 |             scale=scale, min_value=np.array([x[0], y[0], z[0]]))
318 |         corners = cordinate[index].reshape(-1, 8, 3) + centers[:, np.newaxis]
319 |         print corners.shape
320 |         print voxel.shape
321 |         # publish_pc2(pc, corners.reshape(-1, 3))
322 |         publish_pc2(pc, corners.reshape(-1, 3))
323 |         # pred_corners = corners + pred_center
324 |         # print pred_corners
325 | 
326 | def lidar_generator(batch_num, velodyne_path, label_path=None, calib_path=None, resolution=0.2, dataformat="pcd", label_type="txt", is_velo_cam=False, \
327 |                         scale=4, x=(0, 80), y=(-40, 40), z=(-2.5, 1.5)):
328 |     velodynes_path = glob.glob(velodyne_path)
329 |     labels_path = glob.glob(label_path)
330 |     calibs_path = glob.glob(calib_path)
331 |     velodynes_path.sort()
332 |     labels_path.sort()
333 |     calibs_path.sort()
334 |     iter_num = len(velodynes_path) // batch_num
335 | 
336 |     for itn in range(iter_num):
337 |         batch_voxel = []
338 |         batch_g_map = []
339 |         batch_g_cord = []
340 | 
341 |         for velodynes, labels, calibs in zip(velodynes_path[itn*batch_num:(itn+1)*batch_num], \
342 |             labels_path[itn*batch_num:(itn+1)*batch_num], calibs_path[itn*batch_num:(itn+1)*batch_num]):
343 |             p = []
344 |             pc = None
345 |             bounding_boxes = None
346 |             places = None
347 |             rotates = None
348 |             size = None
349 |             proj_velo = None
350 | 
351 |             if dataformat == "bin":
352 |                 pc = load_pc_from_bin(velodynes)
353 |             elif dataformat == "pcd":
354 |                 pc = load_pc_from_pcd(velodynes)
355 | 
356 |             if calib_path:
357 |                 calib = read_calib_file(calibs)
358 |                 proj_velo = proj_to_velo(calib)[:, :3]
359 | 
360 |             if label_path:
361 |                 places, rotates, size = read_labels(labels, label_type, calib_path=calib_path, is_velo_cam=is_velo_cam, proj_velo=proj_velo)
362 |                 if places is None:
363 |                     continue
364 | 
365 |             corners = get_boxcorners(places, rotates, size)
366 |             pc = filter_camera_angle(pc)
367 | 
368 |             voxel =  raw_to_voxel(pc, resolution=resolution, x=x, y=y, z=z)
369 |             center_sphere, corner_label = create_label(places, size, corners, resolution=resolution, x=x, y=y, z=z, \
370 |                 scale=scale, min_value=np.array([x[0], y[0], z[0]]))
371 | 
372 |             if not center_sphere.shape[0]:
373 |                 print 1
374 |                 continue
375 |             g_map = create_objectness_label(center_sphere, resolution=resolution, x=(x[1] - x[0]), y=(y[1] - y[0]), z=(z[1] - z[0]), scale=scale)
376 |             g_cord = corner_label.reshape(corner_label.shape[0], -1)
377 |             g_cord = corner_to_voxel(voxel.shape, g_cord, center_sphere, scale=scale)
378 | 
379 |             batch_voxel.append(voxel)
380 |             batch_g_map.append(g_map)
381 |             batch_g_cord.append(g_cord)
382 |         yield np.array(batch_voxel, dtype=np.float32)[:, :, :, :, np.newaxis], np.array(batch_g_map, dtype=np.float32), np.array(batch_g_cord, dtype=np.float32)
383 | 
384 | 
385 | if __name__ == '__main__':
386 |     # pcd_path = "../data/training/velodyne/*.bin"
387 |     # label_path = "../data/training/label_2/*.txt"
388 |     # calib_path = "../data/training/calib/*.txt"
389 |     # train(5, pcd_path, label_path=label_path, resolution=0.1, calib_path=calib_path, dataformat="bin", is_velo_cam=True, \
390 |     #         scale=8, voxel_shape=(800, 800, 40), x=(0, 80), y=(-40, 40), z=(-2.5, 1.5))
391 |     #
392 |     # pcd_path = "../data/training/velodyne/005000.bin"
393 |     # label_path = "../data/training/label_2/005000.txt"
394 |     # calib_path = "../data/training/calib/005000.txt"
395 |     # pcd_path = "../data/testing/velodyne/005000.bin"
396 |     # label_path = "../data/testing/label_2/005000.txt"
397 |     # calib_path = "../data/testing/calib/005000.txt"
398 |     # train_test(1, pcd_path, label_path=label_path, resolution=0.1, calib_path=calib_path, dataformat="bin", is_velo_cam=True, \
399 |     #         scale=8, voxel_shape=(800, 800, 40), x=(0, 80), y=(-40, 40), z=(-2.5, 1.5))
400 | 
401 |     pcd_path = "/home/katou01/download/testing/velodyne/002397.bin"
402 |     calib_path = "/home/katou01/download/testing/calib/002397.txt"
403 |     test(1, pcd_path, label_path=None, resolution=0.1, calib_path=calib_path, dataformat="bin", is_velo_cam=True, \
404 |             scale=8, voxel_shape=(800, 800, 40), x=(0, 80), y=(-40, 40), z=(-2.5, 1.5))
405 | 


--------------------------------------------------------------------------------
/model_025_deconv_norm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import sys
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from input_velodyne import *
  6 | import glob
  7 | 
  8 | def batch_norm(inputs, is_training, decay=0.9, eps=1e-5):
  9 |     """Batch Normalization
 10 | 
 11 |        Args:
 12 |            inputs: input data(Batch size) from last layer
 13 |            is_training: when you test, please set is_training "None"
 14 |        Returns:
 15 |            output for next layer
 16 |     """
 17 |     gamma = tf.Variable(tf.ones(inputs.get_shape()[1:]), name="gamma")
 18 |     beta = tf.Variable(tf.zeros(inputs.get_shape()[1:]), name="beta")
 19 |     pop_mean = tf.Variable(tf.zeros(inputs.get_shape()[1:]), trainable=False, name="pop_mean")
 20 |     pop_var = tf.Variable(tf.ones(inputs.get_shape()[1:]), trainable=False, name="pop_var")
 21 | 
 22 | 
 23 |     def is_true():
 24 |         batch_mean, batch_var = tf.nn.moments(inputs, [0])
 25 |         train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean*(1 - decay))
 26 |         train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
 27 |         with tf.control_dependencies([train_mean, train_var]):
 28 |             return pop_mean, pop_var
 29 |             # return tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta, gamma, eps)
 30 | 
 31 |     def is_false():
 32 |         return pop_mean, pop_var
 33 |         # return tf.nn.batch_normalization(inputs, pop_mean, pop_var, beta, gamma, eps)
 34 | 
 35 |     mean, var = tf.cond(is_training, is_true, is_false)
 36 |     normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, eps)
 37 |     # normed = tf.cond(is_training, is_true, is_false)
 38 |     return normed
 39 | 
 40 | def conv3DLayer(input_layer, input_dim, output_dim, height, width, length, stride, activation=tf.nn.relu, padding="SAME", name="", is_training=True):
 41 |     with tf.variable_scope("conv3D" + name) as c3:
 42 |         kernel = tf.get_variable("weights", shape=[length, height, width, input_dim, output_dim], \
 43 |             dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01))
 44 |         b = tf.get_variable("bias", shape=[output_dim], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
 45 |         conv = tf.nn.conv3d(input_layer, kernel, stride, padding=padding)
 46 |         bias = tf.nn.bias_add(conv, b)
 47 |         if activation:
 48 |             bias = activation(bias, name="activation")
 49 |         bias = batch_norm(bias, is_training)
 50 |     return bias
 51 | 
 52 | def conv3D_to_output(input_layer, input_dim, output_dim, height, width, length, stride, activation=tf.nn.relu, padding="SAME", name=""):
 53 |     with tf.variable_scope("conv3D" + name):
 54 |         kernel = tf.get_variable("weights", shape=[length, height, width, input_dim, output_dim], \
 55 |             dtype=tf.float32, initializer=tf.constant_initializer(0.01))
 56 |         conv = tf.nn.conv3d(input_layer, kernel, stride, padding=padding)
 57 |     return conv
 58 | 
 59 | def deconv3D_to_output(input_layer, input_dim, output_dim, height, width, length, stride, output_shape, activation=tf.nn.relu, padding="SAME", name=""):
 60 |     with tf.variable_scope("deconv3D"+name):
 61 |         kernel = tf.get_variable("weights", shape=[length, height, width, output_dim, input_dim], \
 62 |             dtype=tf.float32, initializer=tf.constant_initializer(0.01))
 63 |         deconv = tf.nn.conv3d_transpose(input_layer, kernel, output_shape, stride, padding="SAME")
 64 |     return deconv
 65 | 
 66 | def fully_connected(input_layer, shape, name="", is_training=True):
 67 |     with tf.variable_scope("fully" + name):
 68 |         kernel = tf.get_variable("weights", shape=shape, \
 69 |             dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01))
 70 |         fully = tf.matmul(input_layer, kernel)
 71 |         fully = tf.nn.relu(fully)
 72 |         fully = batch_norm(fully, is_training)
 73 |         return fully
 74 | 
 75 | class BNBLayer(object):
 76 |     def __init__(self):
 77 |         pass
 78 | 
 79 |     def build_graph(self, voxel, activation=tf.nn.relu, is_training=True):
 80 |         self.layer1 = conv3DLayer(voxel, 1, 10, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, is_training=is_training)
 81 |         self.layer2 = conv3DLayer(self.layer1, 10, 15, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, is_training=is_training)
 82 |         # self.layer3 = conv3DLayer(self.layer2, 15, 30, 3, 3, 3, [1, 1, 1, 1, 1], name="layer3", activation=activation, is_training=is_training)
 83 |         # self.layer4 = conv3DLayer(self.layer3, 32, 32, 3, 3, 3, [1, 1, 1, 1, 1], name="layer4", activation=activation, is_training=is_training)
 84 |         # self.layer4 = conv3DLayer(self.layer3, 32, 32, 3, 3, 3, [1, 2, 2, 2, 1], name="layer4", activation=activation, is_training=is_training)
 85 |         # base_shape = self.layer3.get_shape().as_list()
 86 |         # obj_output_shape = [tf.shape(self.layer4)[0], base_shape[1], base_shape[2], base_shape[3], 2]
 87 |         # cord_output_shape = [tf.shape(self.layer4)[0], base_shape[1], base_shape[2], base_shape[3], 24]
 88 |         self.objectness = conv3D_to_output(self.layer2, 15, 2, 3, 3, 3, [1, 1, 1, 1, 1], name="objectness", activation=None)
 89 |         self.cordinate = conv3D_to_output(self.layer2, 15, 24, 3, 3, 3, [1, 1, 1, 1, 1], name="cordinate", activation=None)
 90 |         # self.objectness = deconv3D_to_output(self.layer4, 32, 2, 3, 3, 3, [1, 2, 2, 2, 1], obj_output_shape, name="objectness", activation=None)
 91 |         # self.cordinate = deconv3D_to_output(self.layer4, 32, 24, 3, 3, 3, [1, 2, 2, 2, 1], cord_output_shape, name="cordinate", activation=None)
 92 |         self.y = tf.nn.softmax(self.objectness, dim=-1)
 93 | 
 94 |     #original
 95 |     # def build_graph(self, voxel, activation=tf.nn.relu, is_training=True):
 96 |     #     self.layer1 = conv3DLayer(voxel, 1, 10, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, is_training=is_training)
 97 |     #     self.layer2 = conv3DLayer(self.layer1, 10, 16, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, is_training=is_training)
 98 |     #     self.layer3 = conv3DLayer(self.layer2, 16, 30, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, is_training=is_training)
 99 |     #     base_shape = self.layer2.get_shape().as_list()
100 |     #     obj_output_shape = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 2]
101 |     #     cord_output_shape = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 24]
102 |     #     self.objectness = deconv3D_to_output(self.layer3, 30, 2, 3, 3, 3, [1, 2, 2, 2, 1], obj_output_shape, name="objectness", activation=None)
103 |     #     self.cordinate = deconv3D_to_output(self.layer3, 30, 24, 3, 3, 3, [1, 2, 2, 2, 1], cord_output_shape, name="cordinate", activation=None)
104 |     #     self.y = tf.nn.softmax(self.objectness, dim=-1)
105 | 
106 | def ssd_model(sess, voxel_shape=(300, 300, 300),activation=tf.nn.relu):
107 |     voxel = tf.placeholder(tf.float32, [None, voxel_shape[0], voxel_shape[1], voxel_shape[2], 1])
108 |     phase_train = tf.placeholder(tf.bool, name='phase_train')
109 |     with tf.variable_scope("3D_CNN_model") as scope:
110 |         bnb_model = BNBLayer()
111 |         bnb_model.build_graph(voxel, activation=activation, is_training=phase_train)
112 | 
113 |     initialized_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="3D_CNN_model")
114 |     sess.run(tf.variables_initializer(initialized_var))
115 |     return bnb_model, voxel, phase_train
116 | 
117 | def loss_func(model):
118 |     g_map = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list()[:4])
119 |     g_cord = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list())
120 |     object_loss = tf.multiply(g_map, model.objectness[:, :, :, :, 0])
121 |     non_gmap = tf.subtract(tf.ones_like(g_map, dtype=tf.float32), g_map)
122 |     nonobject_loss = tf.multiply(non_gmap, model.objectness[:, :, :, :, 1])
123 |     # sum_object_loss = tf.add(tf.exp(object_loss), tf.exp(nonobject_loss))
124 |     sum_object_loss = tf.exp(-tf.add(object_loss, nonobject_loss))
125 |     # sum_object_loss = tf.exp(-nonobject_loss)
126 |     bunbo = tf.add(tf.exp(-model.objectness[:, :, :, :, 0]), tf.exp(-model.objectness[:, :, :, :, 1]))
127 |     obj_loss = 0.005 * tf.reduce_sum(-tf.log(tf.div(sum_object_loss, bunbo)))
128 | 
129 |     cord_diff = tf.multiply(g_map, tf.reduce_sum(tf.square(tf.subtract(model.cordinate, g_cord)), 4))
130 |     cord_loss = tf.reduce_sum(cord_diff)
131 |     return obj_loss, obj_loss, cord_loss, g_map, g_cord
132 | 
133 | def loss_func2(model):
134 |     g_map = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list()[:4])
135 |     obj_loss = tf.reduce_sum(tf.square(tf.subtract(model.objectness[:, :, :, :, 0], g_map)))
136 | 
137 |     g_cord = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list())
138 |     cord_diff = tf.multiply(g_map, tf.reduce_sum(tf.square(tf.subtract(model.cordinate, g_cord)), 4))
139 |     cord_loss = tf.reduce_sum(cord_diff) * 0.1
140 |     return tf.add(obj_loss, cord_loss), g_map, g_cord
141 | 
142 | def loss_func3(model):
143 |     g_map = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list()[:4])
144 |     g_cord = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list())
145 |     non_gmap = tf.subtract(tf.ones_like(g_map, dtype=tf.float32), g_map)
146 | 
147 |     elosion = 0.00001
148 |     y = model.y
149 |     is_obj_loss = -tf.reduce_sum(tf.multiply(g_map,  tf.log(y[:, :, :, :, 0] + elosion)))
150 |     non_obj_loss = tf.multiply(-tf.reduce_sum(tf.multiply(non_gmap, tf.log(y[:, :, :, :, 1] + elosion))), 0.0008)
151 |     cross_entropy = tf.add(is_obj_loss, non_obj_loss)
152 |     obj_loss = cross_entropy
153 | 
154 |     g_cord = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list())
155 |     cord_diff = tf.multiply(g_map, tf.reduce_sum(tf.square(tf.subtract(model.cordinate, g_cord)), 4))
156 |     cord_loss = tf.multiply(tf.reduce_sum(cord_diff), 0.02)
157 |     return tf.add(obj_loss, cord_loss), obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_map, g_cord, y
158 | 
159 | def create_optimizer(all_loss, lr=0.001):
160 |     opt = tf.train.AdamOptimizer(lr)
161 |     optimizer = opt.minimize(all_loss)
162 |     return optimizer
163 | 
164 | def train(batch_num, velodyne_path, label_path=None, calib_path=None, resolution=0.2, dataformat="pcd", label_type="txt", is_velo_cam=False, \
165 |         scale=4, voxel_shape=(800, 800, 40), x=(0, 80), y=(-40, 40), z=(-2.5, 1.5)):
166 |     # tf Graph input
167 |     batch_size = batch_num
168 |     training_epochs = 101
169 | 
170 |     with tf.Session() as sess:
171 |         model, voxel, phase_train = ssd_model(sess, voxel_shape=voxel_shape, activation=tf.nn.relu)
172 |         saver = tf.train.Saver()
173 |         total_loss, obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_map, g_cord, y_pred = loss_func3(model)
174 |         optimizer = create_optimizer(total_loss, lr=0.01)
175 |         init = tf.global_variables_initializer()
176 |         sess.run(init)
177 | 
178 |         for epoch in range(training_epochs):
179 |             for (batch_x, batch_g_map, batch_g_cord) in lidar_generator(batch_num, velodyne_path, label_path=label_path, \
180 |                calib_path=calib_path,resolution=resolution, dataformat=dataformat, label_type=label_type, is_velo_cam=is_velo_cam, \
181 |                scale=scale, x=x, y=y, z=z):
182 |                 # print batch_x.shape, batch_g_map.shape, batch_g_cord.shape, batch_num
183 |                 # print batch_x.shape
184 |                 # print batch_g_map.shape
185 |                 # print batch_g_cord.shape
186 |                 sess.run(optimizer, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
187 | 
188 |                 # ct = sess.run(total_loss, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
189 |                 # co = sess.run(obj_loss, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
190 |                 cc = sess.run(cord_loss, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
191 |                 iol = sess.run(is_obj_loss, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
192 |                 nol = sess.run(non_obj_loss, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
193 |                 # soft = sess.run(y, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord})
194 |                 # print soft[0, 0, 0, 0, :]
195 |                 # print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(ct))
196 |                 # print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(co))
197 |                 print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(cc))
198 |                 print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(iol))
199 |                 print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(nol))
200 |                 # print ""
201 | 
202 |             if (epoch != 0) and (epoch % 1 == 0):
203 |                 print "Save epoch " + str(epoch)
204 |                 saver.save(sess, "velodyne_025_deconv_norm" + str(epoch) + ".ckpt")
205 |         print("Optimization Finished!")
206 | 
207 | def test(batch_num, velodyne_path, label_path=None, calib_path=None, resolution=0.2, dataformat="pcd", label_type="txt", is_velo_cam=False, \
208 |              scale=4, voxel_shape=(800, 800, 40), x=(0, 80), y=(-40, 40), z=(-2.5, 1.5)):
209 |     # tf Graph input
210 |     batch_size = batch_num # 1
211 |     training_epochs = 5
212 |     p = []
213 |     pc = None
214 |     bounding_boxes = None
215 |     places = None
216 |     rotates = None
217 |     size = None
218 |     proj_velo = None
219 | 
220 |     if dataformat == "bin":
221 |         pc = load_pc_from_bin(velodyne_path)
222 |     elif dataformat == "pcd":
223 |         pc = load_pc_from_pcd(velodyne_path)
224 | 
225 |     if calib_path:
226 |         calib = read_calib_file(calib_path)
227 |         proj_velo = proj_to_velo(calib)[:, :3]
228 | 
229 |     if label_path:
230 |         places, rotates, size = read_labels(label_path, label_type, calib_path=calib_path, is_velo_cam=is_velo_cam, proj_velo=proj_velo)
231 | 
232 |     corners = get_boxcorners(places, rotates, size)
233 |     filter_car_data(corners)
234 |     pc = filter_camera_angle(pc)
235 | 
236 |     voxel =  raw_to_voxel(pc, resolution=resolution, x=x, y=y, z=z)
237 |     center_sphere = center_to_sphere(places, size, resolution=resolution)
238 |     corner_label = corner_to_train(corners, center_sphere, resolution=resolution)
239 |     g_map = create_objectness_label(center_sphere, resolution=resolution)
240 |     g_cord = corner_label.reshape(corner_label.shape[0], -1)
241 | 
242 |     voxel_x = voxel.reshape(1, voxel.shape[0], voxel.shape[1], voxel.shape[2], 1)
243 | 
244 |     with tf.Session() as sess:
245 |         model, voxel, phase_train = ssd_model(sess, voxel_shape=voxel_shape, activation=tf.nn.relu)
246 |         # optimizer = create_optimizer(total_loss)
247 |         saver = tf.train.Saver()
248 |         new_saver = tf.train.import_meta_graph("velodyne_025_deconv_norm1.ckpt.meta")
249 |         # last_model = tf.train.latest_checkpoint('./velodyne_10th_try_900.ckpt')
250 |         last_model = "./velodyne_025_deconv_norm1.ckpt"
251 |         saver.restore(sess, last_model)
252 | 
253 |         objectness = model.objectness
254 |         cordinate = model.cordinate
255 |         y_pred = model.y
256 |         objectness = sess.run(objectness, feed_dict={voxel: voxel_x, phase_train:False})[0, :, :, :, 0]
257 |         cordinate = sess.run(cordinate, feed_dict={voxel: voxel_x, phase_train:False})[0]
258 |         y_pred = sess.run(y_pred, feed_dict={voxel: voxel_x, phase_train:False})[0, :, :, :, 0]
259 |         print objectness.shape, objectness.max(), objectness.min()
260 |         print y_pred.shape, y_pred.max(), y_pred.min()
261 | 
262 |         # print np.where(objectness >= 0.55)
263 |         index = np.where(y_pred >= 0.995)
264 |         print np.vstack((index[0], np.vstack((index[1], index[2])))).transpose()
265 |         print np.vstack((index[0], np.vstack((index[1], index[2])))).transpose().shape
266 | 
267 |         a = center_to_sphere(places, size, resolution=resolution, x=x, y=y, z=z, \
268 |             scale=scale, min_value=np.array([x[0], y[0], z[0]]))
269 |         label_center = sphere_to_center(a, resolution=resolution, \
270 |             scale=scale, min_value=np.array([x[0], y[0], z[0]]))
271 |         label_corners = get_boxcorners(label_center, rotates, size)
272 |         print a[a[:, 0].argsort()]
273 |         # center = np.array([20, 57, 3])
274 |         #
275 |         # pred_center = sphere_to_center(center, resolution=resolution)
276 |         # print pred_center
277 |         # print cordinate.shape
278 |         # corners = cordinate[center[0], center[1], center[2]].reshape(-1, 3)
279 |         centers = np.vstack((index[0], np.vstack((index[1], index[2])))).transpose()
280 |         centers = sphere_to_center(centers, resolution=resolution, \
281 |             scale=scale, min_value=np.array([x[0], y[0], z[0]]))
282 |         corners = cordinate[index].reshape(-1, 8, 3) + centers[:, np.newaxis]
283 |         print corners.shape
284 |         print voxel.shape
285 |         # publish_pc2(pc, corners.reshape(-1, 3))
286 |         publish_pc2(pc, label_corners.reshape(-1, 3))
287 |         # pred_corners = corners + pred_center
288 |         # print pred_corners
289 | 
290 | def lidar_generator(batch_num, velodyne_path, label_path=None, calib_path=None, resolution=0.2, dataformat="pcd", label_type="txt", is_velo_cam=False, \
291 |                         scale=4, x=(0, 80), y=(-40, 40), z=(-2.5, 1.5)):
292 |     velodynes_path = glob.glob(velodyne_path)
293 |     labels_path = glob.glob(label_path)
294 |     calibs_path = glob.glob(calib_path)
295 |     velodynes_path.sort()
296 |     labels_path.sort()
297 |     calibs_path.sort()
298 |     iter_num = len(velodynes_path) // batch_num
299 | 
300 |     for itn in range(iter_num):
301 |         batch_voxel = []
302 |         batch_g_map = []
303 |         batch_g_cord = []
304 | 
305 |         for velodynes, labels, calibs in zip(velodynes_path[itn*batch_num:(itn+1)*batch_num], \
306 |             labels_path[itn*batch_num:(itn+1)*batch_num], calibs_path[itn*batch_num:(itn+1)*batch_num]):
307 |             p = []
308 |             pc = None
309 |             bounding_boxes = None
310 |             places = None
311 |             rotates = None
312 |             size = None
313 |             proj_velo = None
314 | 
315 |             if dataformat == "bin":
316 |                 pc = load_pc_from_bin(velodynes)
317 |             elif dataformat == "pcd":
318 |                 pc = load_pc_from_pcd(velodynes)
319 | 
320 |             if calib_path:
321 |                 calib = read_calib_file(calibs)
322 |                 proj_velo = proj_to_velo(calib)[:, :3]
323 | 
324 |             if label_path:
325 |                 places, rotates, size = read_labels(labels, label_type, calib_path=calib_path, is_velo_cam=is_velo_cam, proj_velo=proj_velo)
326 |                 if places is None:
327 |                     continue
328 | 
329 |             corners = get_boxcorners(places, rotates, size)
330 |             filter_car_data(corners)
331 |             pc = filter_camera_angle(pc)
332 | 
333 |             voxel =  raw_to_voxel(pc, resolution=resolution, x=x, y=y, z=z)
334 |             # center_sphere = center_to_sphere(places, size, resolution=resolution, min_value=np.array([0., -40, -2.5]), scale=scale, x=x, y=y, z=(-2.5, 2.3))
335 |             # corner_label = corner_to_train(corners, center_sphere, resolution=resolution, x=x, y=y, z=(-2.5, 2.3), scale=scale, min_value=np.array([0., -40, -2.5]))
336 |             center_sphere, corner_label = create_label(places, size, corners, resolution=resolution, x=x, y=y, z=z, \
337 |                 scale=scale, min_value=np.array([x[0], y[0], z[0]]))
338 | 
339 |             # print center_sphere
340 |             if not center_sphere.shape[0]:
341 |                 print 1
342 |                 continue
343 |             g_map = create_objectness_label(center_sphere, resolution=resolution, x=(x[1] - x[0]), y=(y[1] - y[0]), z=(z[1] - z[0]), scale=scale)
344 |             g_cord = corner_label.reshape(corner_label.shape[0], -1)
345 |             g_cord = corner_to_voxel(voxel.shape, g_cord, center_sphere, scale=scale)
346 | 
347 |             batch_voxel.append(voxel)
348 |             batch_g_map.append(g_map)
349 |             batch_g_cord.append(g_cord)
350 |         yield np.array(batch_voxel, dtype=np.float32)[:, :, :, :, np.newaxis], np.array(batch_g_map, dtype=np.float32), np.array(batch_g_cord, dtype=np.float32)
351 | 
352 | 
353 | if __name__ == '__main__':
354 |     # pcd_path = "../data/training/velodyne/*.bin"
355 |     # label_path = "../data/training/label_2/*.txt"
356 |     # calib_path = "../data/training/calib/*.txt"
357 |     # train(5, pcd_path, label_path=label_path, resolution=0.25, calib_path=calib_path, dataformat="bin", is_velo_cam=True, \
358 |     #         scale=4, voxel_shape=(360, 400, 40), x=(0, 90), y=(-50, 50), z=(-5.5, 4.5))
359 |     # #
360 |     pcd_path = "../data/training/velodyne/004000.bin"
361 |     label_path = "../data/training/label_2/004000.txt"
362 |     calib_path = "../data/training/calib/004000.txt"
363 |     test(1, pcd_path, label_path=label_path, resolution=0.25, calib_path=calib_path, dataformat="bin", is_velo_cam=True, \
364 |             scale=4, voxel_shape=(360, 400, 40), x=(0, 90), y=(-50, 50), z=(-5.5, 4.5))
365 |     # test(1, pcd_path, label_path=label_path, resolution=0.1, calib_path=calib_path, dataformat="bin", is_velo_cam=True, scale=8, voxel_shape=(800, 800, 40))
366 | 


--------------------------------------------------------------------------------
/model_025_deconv_norm_validate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import sys
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from input_velodyne import *
  6 | import glob
  7 | 
  8 | #original
  9 | def batch_norm(inputs, phase_train, decay=0.9, eps=1e-5):
 10 |     """Batch Normalization
 11 | 
 12 |        Args:
 13 |            inputs: input data(Batch size) from last layer
 14 |            phase_train: when you test, please set phase_train "None"
 15 |        Returns:
 16 |            output for next layer
 17 |     """
 18 |     gamma = tf.get_variable("gamma", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
 19 |     beta = tf.get_variable("beta", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
 20 |     pop_mean = tf.get_variable("pop_mean", trainable=False, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
 21 |     pop_var = tf.get_variable("pop_var", trainable=False, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
 22 | 
 23 |     # gamma = tf.Variable(tf.ones(inputs.get_shape()[1:]), name="gamma")
 24 |     # beta = tf.Variable(tf.zeros(inputs.get_shape()[1:]), name="beta")
 25 |     # pop_mean = tf.Variable(tf.zeros(inputs.get_shape()[1:]), trainable=False, name="pop_mean")
 26 |     # pop_var = tf.Variable(tf.ones(inputs.get_shape()[1:]), trainable=False, name="pop_var")
 27 |     axes = range(len(inputs.get_shape()) - 1)
 28 | 
 29 |     if phase_train != None:
 30 |         batch_mean, batch_var = tf.nn.moments(inputs, axes)
 31 |         train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean*(1 - decay))
 32 |         train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
 33 |         with tf.control_dependencies([train_mean, train_var]):
 34 |             return tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta, gamma, eps)
 35 |     else:
 36 |         return tf.nn.batch_normalization(inputs, pop_mean, pop_var, beta, gamma, eps)
 37 | 
 38 | # def batch_norm(inputs, phase_train, decay=0.9, eps=1e-5):
 39 | #     with tf.variable_scope("bn"):
 40 | #         gamma = tf.get_variable("gamma", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
 41 | #         beta = tf.get_variable("beta", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
 42 | #         pop_mean = tf.get_variable("pop_mean", trainable=True, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
 43 | #         pop_var = tf.get_variable("pop_var", trainable=True, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
 44 | #
 45 | #         # if phase_train == None:
 46 | #         #     print 21
 47 | #         #     return tf.nn.batch_normalization(inputs, pop_mean, pop_var, beta, gamma, eps)
 48 | #
 49 | #         rank = len(inputs.get_shape())
 50 | #         axes = range(rank - 1)
 51 | #         batch_mean, batch_var = tf.nn.moments(inputs, axes)
 52 | #         ema = tf.train.ExponentialMovingAverage(decay=decay)
 53 | #
 54 | #         def update():
 55 | #             ema_apply_op = ema.apply([batch_mean, batch_var])
 56 | #             train_mean = pop_mean.assign(ema.average(batch_mean))
 57 | #             train_var = pop_var.assign(ema.average(batch_var))
 58 | #             with tf.control_dependencies([ema_apply_op]):
 59 | #                 return tf.nn.batch_normalization(inputs, tf.identity(batch_mean), tf.identity(batch_var), \
 60 | #                     beta, gamma, eps)
 61 | #
 62 | #         def average():
 63 | #             train_mean = pop_mean.assign(ema.average(batch_mean))
 64 | #             train_var = pop_var.assign(ema.average(batch_var))
 65 | #             with tf.control_dependencies([train_mean, train_var]):
 66 | #                 return tf.nn.batch_normalization(inputs, train_mean, train_var, beta, gamma, eps)
 67 | #
 68 | #         return tf.cond(phase_train, update, average)
 69 | 
 70 | # def batch_norm(inputs, phase_train, decay=0.9, eps=1e-5):
 71 | #     """Batch Normalization
 72 | #
 73 | #        Args:
 74 | #            inputs: input data(Batch size) from last layer
 75 | #            phase_train: when you test, please set phase_train "None"
 76 | #        Returns:
 77 | #            output for next layer
 78 | #     """
 79 | #     bn = tf.contrib.layers.batch_norm(inputs, center=True, scale=True, is_training=False, scope="bn")
 80 | #     return bn
 81 |     # with tf.variable_scope("bn", reuse=False):
 82 |     #     bn = tf.contrib.layers.batch_norm(inputs, center=True, scale=True, is_training=False, reuse=False, scope="bn")
 83 |     #     return bn
 84 | 
 85 | def conv3DLayer(input_layer, input_dim, output_dim, height, width, length, stride, activation=tf.nn.relu, padding="SAME", name="", is_training=True):
 86 |     with tf.variable_scope("conv3D" + name):
 87 |         kernel = tf.get_variable("weights", shape=[length, height, width, input_dim, output_dim], \
 88 |             dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01))
 89 |         b = tf.get_variable("bias", shape=[output_dim], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
 90 |         conv = tf.nn.conv3d(input_layer, kernel, stride, padding=padding)
 91 |         bias = tf.nn.bias_add(conv, b)
 92 |         if activation:
 93 |             bias = activation(bias, name="activation")
 94 |         bias = batch_norm(bias, is_training)
 95 |     return bias
 96 | 
 97 | def conv3D_to_output(input_layer, input_dim, output_dim, height, width, length, stride, activation=tf.nn.relu, padding="SAME", name=""):
 98 |     with tf.variable_scope("conv3D" + name):
 99 |         kernel = tf.get_variable("weights", shape=[length, height, width, input_dim, output_dim], \
100 |             dtype=tf.float32, initializer=tf.constant_initializer(0.01))
101 |         conv = tf.nn.conv3d(input_layer, kernel, stride, padding=padding)
102 |     return conv
103 | 
104 | def deconv3D_to_output(input_layer, input_dim, output_dim, height, width, length, stride, output_shape, activation=tf.nn.relu, padding="SAME", name=""):
105 |     with tf.variable_scope("deconv3D"+name):
106 |         kernel = tf.get_variable("weights", shape=[length, height, width, output_dim, input_dim], \
107 |             dtype=tf.float32, initializer=tf.constant_initializer(0.01))
108 |         deconv = tf.nn.conv3d_transpose(input_layer, kernel, output_shape, stride, padding="SAME")
109 |     return deconv
110 | 
111 | def fully_connected(input_layer, shape, name="", is_training=True):
112 |     with tf.variable_scope("fully" + name):
113 |         kernel = tf.get_variable("weights", shape=shape, \
114 |             dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01))
115 |         fully = tf.matmul(input_layer, kernel)
116 |         fully = tf.nn.relu(fully)
117 |         fully = batch_norm(fully, is_training)
118 |         return fully
119 | 
120 | class BNBLayer(object):
121 |     def __init__(self):
122 |         pass
123 | 
124 |     def build_graph(self, voxel, activation=tf.nn.relu, is_training=None):
125 |         self.layer1 = conv3DLayer(voxel, 1, 10, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, is_training=is_training)
126 |         self.layer2 = conv3DLayer(self.layer1, 10, 15, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, is_training=is_training)
127 |         # self.layer3 = conv3DLayer(self.layer2, 15, 30, 3, 3, 3, [1, 1, 1, 1, 1], name="layer3", activation=activation, is_training=is_training)
128 |         # self.layer4 = conv3DLayer(self.layer3, 32, 32, 3, 3, 3, [1, 1, 1, 1, 1], name="layer4", activation=activation, is_training=is_training)
129 |         # self.layer4 = conv3DLayer(self.layer3, 32, 32, 3, 3, 3, [1, 2, 2, 2, 1], name="layer4", activation=activation, is_training=is_training)
130 |         # base_shape = self.layer3.get_shape().as_list()
131 |         # obj_output_shape = [tf.shape(self.layer4)[0], base_shape[1], base_shape[2], base_shape[3], 2]
132 |         # cord_output_shape = [tf.shape(self.layer4)[0], base_shape[1], base_shape[2], base_shape[3], 24]
133 |         self.objectness = conv3D_to_output(self.layer2, 15, 2, 3, 3, 3, [1, 1, 1, 1, 1], name="objectness", activation=None)
134 |         self.cordinate = conv3D_to_output(self.layer2, 15, 24, 3, 3, 3, [1, 1, 1, 1, 1], name="cordinate", activation=None)
135 |         # self.objectness = deconv3D_to_output(self.layer4, 32, 2, 3, 3, 3, [1, 2, 2, 2, 1], obj_output_shape, name="objectness", activation=None)
136 |         # self.cordinate = deconv3D_to_output(self.layer4, 32, 24, 3, 3, 3, [1, 2, 2, 2, 1], cord_output_shape, name="cordinate", activation=None)
137 |         self.y = tf.nn.softmax(self.objectness, dim=-1)
138 | 
139 |     #original
140 |     # def build_graph(self, voxel, activation=tf.nn.relu, is_training=True):
141 |     #     self.layer1 = conv3DLayer(voxel, 1, 10, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, is_training=is_training)
142 |     #     self.layer2 = conv3DLayer(self.layer1, 10, 16, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, is_training=is_training)
143 |     #     self.layer3 = conv3DLayer(self.layer2, 16, 30, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, is_training=is_training)
144 |     #     base_shape = self.layer2.get_shape().as_list()
145 |     #     obj_output_shape = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 2]
146 |     #     cord_output_shape = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 24]
147 |     #     self.objectness = deconv3D_to_output(self.layer3, 30, 2, 3, 3, 3, [1, 2, 2, 2, 1], obj_output_shape, name="objectness", activation=None)
148 |     #     self.cordinate = deconv3D_to_output(self.layer3, 30, 24, 3, 3, 3, [1, 2, 2, 2, 1], cord_output_shape, name="cordinate", activation=None)
149 |     #     self.y = tf.nn.softmax(self.objectness, dim=-1)
150 | 
151 | def ssd_model(sess, voxel_shape=(300, 300, 300),activation=tf.nn.relu, is_training=True):
152 |     voxel = tf.placeholder(tf.float32, [None, voxel_shape[0], voxel_shape[1], voxel_shape[2], 1])
153 |     phase_train = tf.placeholder(tf.bool, name='phase_train') if is_training else None
154 |     with tf.variable_scope("3D_CNN_model") as scope:
155 |         bnb_model = BNBLayer()
156 |         bnb_model.build_graph(voxel, activation=activation, is_training=phase_train)
157 | 
158 |     if is_training:
159 |         initialized_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="3D_CNN_model")
160 |         sess.run(tf.variables_initializer(initialized_var))
161 |     return bnb_model, voxel, phase_train
162 | 
163 | def loss_func(model):
164 |     g_map = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list()[:4])
165 |     g_cord = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list())
166 |     object_loss = tf.multiply(g_map, model.objectness[:, :, :, :, 0])
167 |     non_gmap = tf.subtract(tf.ones_like(g_map, dtype=tf.float32), g_map)
168 |     nonobject_loss = tf.multiply(non_gmap, model.objectness[:, :, :, :, 1])
169 |     # sum_object_loss = tf.add(tf.exp(object_loss), tf.exp(nonobject_loss))
170 |     sum_object_loss = tf.exp(-tf.add(object_loss, nonobject_loss))
171 |     # sum_object_loss = tf.exp(-nonobject_loss)
172 |     bunbo = tf.add(tf.exp(-model.objectness[:, :, :, :, 0]), tf.exp(-model.objectness[:, :, :, :, 1]))
173 |     obj_loss = 0.005 * tf.reduce_sum(-tf.log(tf.div(sum_object_loss, bunbo)))
174 | 
175 |     cord_diff = tf.multiply(g_map, tf.reduce_sum(tf.square(tf.subtract(model.cordinate, g_cord)), 4))
176 |     cord_loss = tf.reduce_sum(cord_diff)
177 |     return obj_loss, obj_loss, cord_loss, g_map, g_cord
178 | 
179 | def loss_func2(model):
180 |     g_map = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list()[:4])
181 |     obj_loss = tf.reduce_sum(tf.square(tf.subtract(model.objectness[:, :, :, :, 0], g_map)))
182 | 
183 |     g_cord = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list())
184 |     cord_diff = tf.multiply(g_map, tf.reduce_sum(tf.square(tf.subtract(model.cordinate, g_cord)), 4))
185 |     cord_loss = tf.reduce_sum(cord_diff) * 0.1
186 |     return tf.add(obj_loss, cord_loss), g_map, g_cord
187 | 
188 | def loss_func3(model):
189 |     g_map = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list()[:4])
190 |     g_cord = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list())
191 |     non_gmap = tf.subtract(tf.ones_like(g_map, dtype=tf.float32), g_map)
192 | 
193 |     elosion = 0.00001
194 |     y = model.y
195 |     is_obj_loss = -tf.reduce_sum(tf.multiply(g_map,  tf.log(y[:, :, :, :, 0] + elosion)))
196 |     non_obj_loss = tf.multiply(-tf.reduce_sum(tf.multiply(non_gmap, tf.log(y[:, :, :, :, 1] + elosion))), 0.0008)
197 |     cross_entropy = tf.add(is_obj_loss, non_obj_loss)
198 |     obj_loss = cross_entropy
199 | 
200 |     g_cord = tf.placeholder(tf.float32, model.cordinate.get_shape().as_list())
201 |     cord_diff = tf.multiply(g_map, tf.reduce_sum(tf.square(tf.subtract(model.cordinate, g_cord)), 4))
202 |     cord_loss = tf.multiply(tf.reduce_sum(cord_diff), 0.02)
203 |     return tf.add(obj_loss, cord_loss), obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_map, g_cord, y
204 | 
205 | def create_optimizer(all_loss, lr=0.001):
206 |     opt = tf.train.AdamOptimizer(lr)
207 |     optimizer = opt.minimize(all_loss)
208 |     return optimizer
209 | 
210 | def train(batch_num, velodyne_path, label_path=None, calib_path=None, resolution=0.2, dataformat="pcd", label_type="txt", is_velo_cam=False, \
211 |         scale=4, voxel_shape=(800, 800, 40), x=(0, 80), y=(-40, 40), z=(-2.5, 1.5)):
212 |     # tf Graph input
213 |     batch_size = batch_num
214 |     training_epochs = 101
215 | 
216 |     with tf.Session() as sess:
217 |         model, voxel, phase_train = ssd_model(sess, voxel_shape=voxel_shape, activation=tf.nn.relu, is_training=True)
218 |         saver = tf.train.Saver()
219 |         total_loss, obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_map, g_cord, y_pred = loss_func3(model)
220 |         optimizer = create_optimizer(total_loss, lr=0.01)
221 |         init = tf.global_variables_initializer()
222 |         sess.run(init)
223 | 
224 |         for epoch in range(training_epochs):
225 |             for (batch_x, batch_g_map, batch_g_cord) in lidar_generator(batch_num, velodyne_path, label_path=label_path, \
226 |                calib_path=calib_path,resolution=resolution, dataformat=dataformat, label_type=label_type, is_velo_cam=is_velo_cam, \
227 |                scale=scale, x=x, y=y, z=z):
228 |                 # print batch_x.shape, batch_g_map.shape, batch_g_cord.shape, batch_num
229 |                 # print batch_x.shape
230 |                 # print batch_g_map.shape
231 |                 # print batch_g_cord.shape
232 |                 sess.run(optimizer, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
233 | 
234 |                 # ct = sess.run(total_loss, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
235 |                 # co = sess.run(obj_loss, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
236 |                 cc = sess.run(cord_loss, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
237 |                 iol = sess.run(is_obj_loss, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
238 |                 nol = sess.run(non_obj_loss, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord, phase_train:True})
239 |                 # soft = sess.run(y, feed_dict={voxel: batch_x, g_map: batch_g_map, g_cord: batch_g_cord})
240 |                 # print soft[0, 0, 0, 0, :]
241 |                 # print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(ct))
242 |                 # print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(co))
243 |                 print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(cc))
244 |                 print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(iol))
245 |                 print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(nol))
246 |             if (epoch != 0) and (epoch % 10 == 0):
247 |                 print "Save epoch " + str(epoch)
248 |                 saver.save(sess, "velodyne_025_deconv_norm_valid" + str(epoch) + ".ckpt")
249 |         print("Optimization Finished!")
250 | 
251 | def test(batch_num, velodyne_path, label_path=None, calib_path=None, resolution=0.2, dataformat="pcd", label_type="txt", is_velo_cam=False, \
252 |              scale=4, voxel_shape=(800, 800, 40), x=(0, 80), y=(-40, 40), z=(-2.5, 1.5)):
253 |     # tf Graph input
254 |     batch_size = batch_num # 1
255 |     training_epochs = 5
256 |     p = []
257 |     pc = None
258 |     bounding_boxes = None
259 |     places = None
260 |     rotates = None
261 |     size = None
262 |     proj_velo = None
263 | 
264 |     if dataformat == "bin":
265 |         pc = load_pc_from_bin(velodyne_path)
266 |     elif dataformat == "pcd":
267 |         pc = load_pc_from_pcd(velodyne_path)
268 | 
269 |     if calib_path:
270 |         calib = read_calib_file(calib_path)
271 |         proj_velo = proj_to_velo(calib)[:, :3]
272 | 
273 |     if label_path:
274 |         places, rotates, size = read_labels(label_path, label_type, calib_path=calib_path, is_velo_cam=is_velo_cam, proj_velo=proj_velo)
275 | 
276 |     corners = get_boxcorners(places, rotates, size)
277 |     filter_car_data(corners)
278 |     pc = filter_camera_angle(pc)
279 | 
280 |     voxel =  raw_to_voxel(pc, resolution=resolution, x=x, y=y, z=z)
281 |     center_sphere = center_to_sphere(places, size, resolution=resolution)
282 |     corner_label = corner_to_train(corners, center_sphere, resolution=resolution)
283 |     g_map = create_objectness_label(center_sphere, resolution=resolution)
284 |     g_cord = corner_label.reshape(corner_label.shape[0], -1)
285 | 
286 |     voxel_x = voxel.reshape(1, voxel.shape[0], voxel.shape[1], voxel.shape[2], 1)
287 | 
288 |     with tf.Session() as sess:
289 |         is_training=None
290 |         model, voxel, phase_train = ssd_model(sess, voxel_shape=voxel_shape, activation=tf.nn.relu, is_training=is_training)
291 |         saver = tf.train.Saver()
292 |         new_saver = tf.train.import_meta_graph("velodyne_025_deconv_norm_valid0.ckpt.meta")
293 |         last_model = "./velodyne_025_deconv_norm_valid0.ckpt"
294 |         saver.restore(sess, last_model)
295 | 
296 |         objectness = model.objectness
297 |         cordinate = model.cordinate
298 |         y_pred = model.y
299 |         objectness = sess.run(objectness, feed_dict={voxel: voxel_x})[0, :, :, :, 0]
300 |         cordinate = sess.run(cordinate, feed_dict={voxel: voxel_x})[0]
301 |         y_pred = sess.run(y_pred, feed_dict={voxel: voxel_x})[0, :, :, :, 0]
302 |         print objectness.shape, objectness.max(), objectness.min()
303 |         print y_pred.shape, y_pred.max(), y_pred.min()
304 | 
305 |         # print np.where(objectness >= 0.55)
306 |         index = np.where(y_pred >= 0.995)
307 |         print np.vstack((index[0], np.vstack((index[1], index[2])))).transpose()
308 |         print np.vstack((index[0], np.vstack((index[1], index[2])))).transpose().shape
309 | 
310 |         a = center_to_sphere(places, size, resolution=resolution, x=x, y=y, z=z, \
311 |             scale=scale, min_value=np.array([x[0], y[0], z[0]]))
312 |         label_center = sphere_to_center(a, resolution=resolution, \
313 |             scale=scale, min_value=np.array([x[0], y[0], z[0]]))
314 |         label_corners = get_boxcorners(label_center, rotates, size)
315 |         print a[a[:, 0].argsort()]
316 |         # center = np.array([20, 57, 3])
317 |         #
318 |         # pred_center = sphere_to_center(center, resolution=resolution)
319 |         # print pred_center
320 |         # print cordinate.shape
321 |         # corners = cordinate[center[0], center[1], center[2]].reshape(-1, 3)
322 |         centers = np.vstack((index[0], np.vstack((index[1], index[2])))).transpose()
323 |         centers = sphere_to_center(centers, resolution=resolution, \
324 |             scale=scale, min_value=np.array([x[0], y[0], z[0]]))
325 |         corners = cordinate[index].reshape(-1, 8, 3) + centers[:, np.newaxis]
326 |         print corners.shape
327 |         print voxel.shape
328 |         # publish_pc2(pc, corners.reshape(-1, 3))
329 |         publish_pc2(pc, label_corners.reshape(-1, 3))
330 |         # pred_corners = corners + pred_center
331 |         # print pred_corners
332 | 
333 | def lidar_generator(batch_num, velodyne_path, label_path=None, calib_path=None, resolution=0.2, dataformat="pcd", label_type="txt", is_velo_cam=False, \
334 |                         scale=4, x=(0, 80), y=(-40, 40), z=(-2.5, 1.5)):
335 |     velodynes_path = glob.glob(velodyne_path)
336 |     labels_path = glob.glob(label_path)
337 |     calibs_path = glob.glob(calib_path)
338 |     velodynes_path.sort()
339 |     labels_path.sort()
340 |     calibs_path.sort()
341 |     iter_num = len(velodynes_path) // batch_num
342 | 
343 |     for itn in range(iter_num):
344 |         batch_voxel = []
345 |         batch_g_map = []
346 |         batch_g_cord = []
347 | 
348 |         for velodynes, labels, calibs in zip(velodynes_path[itn*batch_num:(itn+1)*batch_num], \
349 |             labels_path[itn*batch_num:(itn+1)*batch_num], calibs_path[itn*batch_num:(itn+1)*batch_num]):
350 |             p = []
351 |             pc = None
352 |             bounding_boxes = None
353 |             places = None
354 |             rotates = None
355 |             size = None
356 |             proj_velo = None
357 | 
358 |             if dataformat == "bin":
359 |                 pc = load_pc_from_bin(velodynes)
360 |             elif dataformat == "pcd":
361 |                 pc = load_pc_from_pcd(velodynes)
362 | 
363 |             if calib_path:
364 |                 calib = read_calib_file(calibs)
365 |                 proj_velo = proj_to_velo(calib)[:, :3]
366 | 
367 |             if label_path:
368 |                 places, rotates, size = read_labels(labels, label_type, calib_path=calib_path, is_velo_cam=is_velo_cam, proj_velo=proj_velo)
369 |                 if places is None:
370 |                     continue
371 | 
372 |             corners = get_boxcorners(places, rotates, size)
373 |             filter_car_data(corners)
374 |             pc = filter_camera_angle(pc)
375 | 
376 |             voxel =  raw_to_voxel(pc, resolution=resolution, x=x, y=y, z=z)
377 |             # center_sphere = center_to_sphere(places, size, resolution=resolution, min_value=np.array([0., -40, -2.5]), scale=scale, x=x, y=y, z=(-2.5, 2.3))
378 |             # corner_label = corner_to_train(corners, center_sphere, resolution=resolution, x=x, y=y, z=(-2.5, 2.3), scale=scale, min_value=np.array([0., -40, -2.5]))
379 |             center_sphere, corner_label = create_label(places, size, corners, resolution=resolution, x=x, y=y, z=z, \
380 |                 scale=scale, min_value=np.array([x[0], y[0], z[0]]))
381 | 
382 |             # print center_sphere
383 |             if not center_sphere.shape[0]:
384 |                 print 1
385 |                 continue
386 |             g_map = create_objectness_label(center_sphere, resolution=resolution, x=(x[1] - x[0]), y=(y[1] - y[0]), z=(z[1] - z[0]), scale=scale)
387 |             g_cord = corner_label.reshape(corner_label.shape[0], -1)
388 |             g_cord = corner_to_voxel(voxel.shape, g_cord, center_sphere, scale=scale)
389 | 
390 |             batch_voxel.append(voxel)
391 |             batch_g_map.append(g_map)
392 |             batch_g_cord.append(g_cord)
393 |         yield np.array(batch_voxel, dtype=np.float32)[:, :, :, :, np.newaxis], np.array(batch_g_map, dtype=np.float32), np.array(batch_g_cord, dtype=np.float32)
394 | 
395 | 
396 | if __name__ == '__main__':
397 |     pcd_path = "../data/training/velodyne/*.bin"
398 |     label_path = "../data/training/label_2/*.txt"
399 |     calib_path = "../data/training/calib/*.txt"
400 |     train(5, pcd_path, label_path=label_path, resolution=0.25, calib_path=calib_path, dataformat="bin", is_velo_cam=True, \
401 |             scale=4, voxel_shape=(360, 400, 40), x=(0, 90), y=(-50, 50), z=(-5.5, 4.5))
402 |     # #
403 |     # pcd_path = "../data/training/velodyne/005000.bin"
404 |     # label_path = "../data/training/label_2/005000.txt"
405 |     # calib_path = "../data/training/calib/005000.txt"
406 |     # test(1, pcd_path, label_path=label_path, resolution=0.25, calib_path=calib_path, dataformat="bin", is_velo_cam=True, \
407 |     #         scale=4, voxel_shape=(360, 400, 40), x=(0, 90), y=(-50, 50), z=(-5.5, 4.5))
408 |     # test(1, pcd_path, label_path=label_path, resolution=0.1, calib_path=calib_path, dataformat="bin", is_velo_cam=True, scale=8, voxel_shape=(800, 800, 40))
409 | 


--------------------------------------------------------------------------------
/multiview_2d.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import os
 5 | import rospy
 6 | import numpy as np
 7 | import cv2
 8 | import pcl
 9 | import glob
10 | import std_msgs.msg
11 | import sensor_msgs.point_cloud2 as pc2
12 | from sensor_msgs.msg import PointCloud2
13 | from parse_xml import parseXML
14 | from input_velodyne import *
15 | import matplotlib.pyplot as plt
16 | 
17 | def convert_xyz_to_2d(places):
18 |     theta = np.arctan2(places[:, 1], places[:, 0])
19 |     ave_theta = np.average(theta)
20 |     phi = np.arctan2(places[:, 2], np.sqrt(places[:, 0]**2 + places[:, 1]**2))
21 |     ave_phi = np.average(phi)
22 |     r = (theta / ave_theta).astype(np.int32)
23 |     c = (phi / ave_phi).astype(np.int32)
24 |     print "places", places.shape
25 |     print np.max(places, axis=0)
26 |     print np.min(places, axis=0)
27 |     print "theta", theta.shape
28 |     print theta.max(axis=0)
29 |     print theta.min(axis=0)
30 |     print ave_theta
31 |     print "phi", phi.shape
32 |     print phi.min()
33 |     print phi.max()
34 |     print ave_phi
35 |     print r.max(), r.min(), c.max(), c.min()
36 |     plt.hist(phi)
37 |     plt.show()
38 | 
39 | def bird_view(places):
40 |     pass
41 | 
42 | def process(velodyne_path, label_path=None, calib_path=None, dataformat="pcd", label_type="txt", is_velo_cam=False):
43 |     p = []
44 |     pc = None
45 |     bounding_boxes = None
46 |     places = None
47 |     rotates = None
48 |     size = None
49 |     proj_velo = None
50 | 
51 |     if dataformat == "bin":
52 |         pc = load_pc_from_bin(velodyne_path)
53 |     elif dataformat == "pcd":
54 |         pc = load_pc_from_pcd(velodyne_path)
55 | 
56 |     if calib_path:
57 |         calib = read_calib_file(calib_path)
58 |         proj_velo = proj_to_velo(calib)[:, :3]
59 | 
60 |     if label_path:
61 |         places, rotates, size = read_labels(label_path, label_type, calib_path=calib_path, is_velo_cam=is_velo_cam, proj_velo=proj_velo)
62 | 
63 |     convert_xyz_to_2d(pc)
64 | 
65 |     # corners = get_boxcorners(places, rotates, size)
66 |     # filter_car_data(corners)
67 |     #
68 |     # pc = filter_camera_angle(pc)
69 |     # # obj = []
70 |     # # obj = create_publish_obj(obj, places, rotates, size)
71 |     #
72 |     # p.append((0, 0, 0))
73 |     # print 1
74 |     # # publish_pc2(pc, obj)
75 |     # publish_pc2(pc, corners.reshape(-1, 3))
76 | 
77 | if __name__ == "__main__":
78 |     # pcd_path = "../data/training/velodyne/000012.pcd"
79 |     # label_path = "../data/training/label_2/000012.txt"
80 |     # calib_path = "../data/training/calib/000012.txt"
81 |     # process(pcd_path, label_path, calib_path=calib_path, dataformat="pcd")
82 | 
83 |     # bin_path = "../data/2011_09_26/2011_09_26_drive_0001_sync/velodyne_points/data/0000000030.bin"
84 |     # xml_path = "../data/2011_09_26/2011_09_26_drive_0001_sync/tracklet_labels.xml"
85 |     # process(bin_path, xml_path, dataformat="bin", label_type="xml")
86 | 
87 | 
88 |     pcd_path = "/home/katou01/download/training/velodyne/005080.bin"
89 |     label_path = "/home/katou01/download/training/label_2/005080.txt"
90 |     calib_path = "/home/katou01/download/training/calib/005080.txt"
91 |     process(pcd_path, label_path, calib_path=calib_path, dataformat="bin", is_velo_cam=True)
92 | 


--------------------------------------------------------------------------------
/parse_xml.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | parse XML files containing tracklet info for kitti data base (raw data section)
  4 | (http://cvlibs.net/datasets/kitti/raw_data.php)
  5 | 
  6 | No guarantees that this code is correct, usage is at your own risk!
  7 | 
  8 | created by Christian Herdtweck, Max Planck Institute for Biological Cybernetics
  9 |   (christian.herdtweck@tuebingen.mpg.de)
 10 | 
 11 | requires numpy!
 12 | 
 13 | example usage:
 14 |   import parseTrackletXML as xmlParser
 15 |   kittiDir = '/path/to/kitti/data'
 16 |   drive = '2011_09_26_drive_0001'
 17 |   xmlParser.example(kittiDir, drive)
 18 | or simply on command line:
 19 |   python parseTrackletXML.py
 20 | """
 21 | 
 22 | # Version History:
 23 | # 4/7/12 Christian Herdtweck: seems to work with a few random test xml tracklet files;
 24 | #   converts file contents to ElementTree and then to list of Tracklet objects;
 25 | #   Tracklet objects have str and iter functions
 26 | # 5/7/12 ch: added constants for state, occlusion, truncation and added consistency checks
 27 | # 30/1/14 ch: create example function from example code
 28 | 
 29 | from sys import argv as cmdLineArgs
 30 | from xml.etree.ElementTree import ElementTree
 31 | import numpy as np
 32 | import itertools
 33 | from warnings import warn
 34 | 
 35 | STATE_UNSET = 0
 36 | STATE_INTERP = 1
 37 | STATE_LABELED = 2
 38 | stateFromText = {'0':STATE_UNSET, '1':STATE_INTERP, '2':STATE_LABELED}
 39 | 
 40 | OCC_UNSET = 255  # -1 as uint8
 41 | OCC_VISIBLE = 0
 42 | OCC_PARTLY = 1
 43 | OCC_FULLY = 2
 44 | occFromText = {'-1':OCC_UNSET, '0':OCC_VISIBLE, '1':OCC_PARTLY, '2':OCC_FULLY}
 45 | 
 46 | TRUNC_UNSET = 255  # -1 as uint8, but in xml files the value '99' is used!
 47 | TRUNC_IN_IMAGE = 0
 48 | TRUNC_TRUNCATED = 1
 49 | TRUNC_OUT_IMAGE = 2
 50 | TRUNC_BEHIND_IMAGE = 3
 51 | truncFromText = {'99':TRUNC_UNSET, '0':TRUNC_IN_IMAGE, '1':TRUNC_TRUNCATED, \
 52 |                   '2':TRUNC_OUT_IMAGE, '3': TRUNC_BEHIND_IMAGE}
 53 | 
 54 | 
 55 | class Tracklet(object):
 56 |     """ representation an annotated object track
 57 | 
 58 |     Tracklets are created in function parseXML and can most conveniently used as follows:
 59 | 
 60 |     for trackletObj in parseXML(trackletFile):
 61 |     for translation, rotation, state, occlusion, truncation, amtOcclusion, amtBorders, absoluteFrameNumber in trackletObj:
 62 |       your code here
 63 |     #end: for all frames
 64 |     #end: for all tracklets
 65 | 
 66 |     absoluteFrameNumber is in range [firstFrame, firstFrame+nFrames[
 67 |     amtOcclusion and amtBorders could be None
 68 | 
 69 |     You can of course also directly access the fields objType (string), size (len-3 ndarray), firstFrame/nFrames (int),
 70 |     trans/rots (nFrames x 3 float ndarrays), states/truncs (len-nFrames uint8 ndarrays), occs (nFrames x 2 uint8 ndarray),
 71 |     and for some tracklets amtOccs (nFrames x 2 float ndarray) and amtBorders (nFrames x 3 float ndarray). The last two
 72 |     can be None if the xml file did not include these fields in poses
 73 |     """
 74 | 
 75 |     objectType = None
 76 |     size = None  # len-3 float array: (height, width, length)
 77 |     firstFrame = None
 78 |     trans = None   # n x 3 float array (x,y,z)
 79 |     rots = None    # n x 3 float array (x,y,z)
 80 |     states = None  # len-n uint8 array of states
 81 |     occs = None    # n x 2 uint8 array  (occlusion, occlusion_kf)
 82 |     truncs = None  # len-n uint8 array of truncation
 83 |     amtOccs = None    # None or (n x 2) float array  (amt_occlusion, amt_occlusion_kf)
 84 |     amtBorders = None    # None (n x 3) float array  (amt_border_l / _r / _kf)
 85 |     nFrames = None
 86 | 
 87 |     def __init__(self):
 88 |         """create Tracklet with no info set """
 89 |         self.size = np.nan*np.ones(3, dtype=float)
 90 | 
 91 |     def __str__(self):
 92 |         """ return human-readable string representation of tracklet object
 93 | 
 94 |         called implicitly in
 95 |         print trackletObj
 96 |         or in
 97 |         text = str(trackletObj)
 98 |         """
 99 |         return '[Tracklet over {0} frames for {1}]'.format(self.nFrames, self.objectType)
100 | 
101 |     def __iter__(self):
102 |         """ returns an iterator that yields tuple of all the available data for each frame
103 | 
104 |         called whenever code iterates over a tracklet object, e.g. in
105 |         for translation, rotation, state, occlusion, truncation, amtOcclusion, amtBorders, absoluteFrameNumber in trackletObj:
106 |           ...do something ...
107 |         or
108 |         trackDataIter = iter(trackletObj)
109 |         """
110 |         if self.amtOccs is None:
111 |           return itertools.izip(self.trans, self.rots, self.states, self.occs, self.truncs, \
112 |               itertools.repeat(None), itertools.repeat(None), xrange(self.firstFrame, self.firstFrame+self.nFrames))
113 |         else:
114 |           return itertools.izip(self.trans, self.rots, self.states, self.occs, self.truncs, \
115 |               self.amtOccs, self.amtBorders, xrange(self.firstFrame, self.firstFrame+self.nFrames))
116 | #end: class Tracklet
117 | 
118 | 
119 | def parseXML(trackletFile):
120 |   r""" parse tracklet xml file and convert results to list of Tracklet objects
121 | 
122 |   :param trackletFile: name of a tracklet xml file
123 |   :returns: list of Tracklet objects read from xml file
124 |   """
125 | 
126 |   # convert tracklet XML data to a tree structure
127 |   eTree = ElementTree()
128 |   print 'parsing tracklet file', trackletFile
129 |   with open(trackletFile) as f:
130 |     eTree.parse(f)
131 | 
132 |   # now convert output to list of Tracklet objects
133 |   trackletsElem = eTree.find('tracklets')
134 |   tracklets = []
135 |   trackletIdx = 0
136 |   nTracklets = None
137 |   for trackletElem in trackletsElem:
138 |     #print 'track:', trackletElem.tag
139 |     if trackletElem.tag == 'count':
140 |       nTracklets = int(trackletElem.text)
141 |       print 'file contains', nTracklets, 'tracklets'
142 |     elif trackletElem.tag == 'item_version':
143 |       pass
144 |     elif trackletElem.tag == 'item':
145 |       #print 'tracklet {0} of {1}'.format(trackletIdx, nTracklets)
146 |       # a tracklet
147 |       newTrack = Tracklet()
148 |       isFinished = False
149 |       hasAmt = False
150 |       frameIdx = None
151 |       for info in trackletElem:
152 |         #print 'trackInfo:', info.tag
153 |         if isFinished:
154 |           raise ValueError('more info on element after finished!')
155 |         if info.tag == 'objectType':
156 |           newTrack.objectType = info.text
157 |         elif info.tag == 'h':
158 |           newTrack.size[0] = float(info.text)
159 |         elif info.tag == 'w':
160 |           newTrack.size[1] = float(info.text)
161 |         elif info.tag == 'l':
162 |           newTrack.size[2] = float(info.text)
163 |         elif info.tag == 'first_frame':
164 |           newTrack.firstFrame = int(info.text)
165 |         elif info.tag == 'poses':
166 |           # this info is the possibly long list of poses
167 |           for pose in info:
168 |             #print 'trackInfoPose:', pose.tag
169 |             if pose.tag == 'count':   # this should come before the others
170 |               if newTrack.nFrames is not None:
171 |                 raise ValueError('there are several pose lists for a single track!')
172 |               elif frameIdx is not None:
173 |                 raise ValueError('?!')
174 |               newTrack.nFrames = int(pose.text)
175 |               newTrack.trans  = np.nan * np.ones((newTrack.nFrames, 3), dtype=float)
176 |               newTrack.rots   = np.nan * np.ones((newTrack.nFrames, 3), dtype=float)
177 |               newTrack.states = np.nan * np.ones(newTrack.nFrames, dtype='uint8')
178 |               newTrack.occs   = np.nan * np.ones((newTrack.nFrames, 2), dtype='uint8')
179 |               newTrack.truncs = np.nan * np.ones(newTrack.nFrames, dtype='uint8')
180 |               newTrack.amtOccs = np.nan * np.ones((newTrack.nFrames, 2), dtype=float)
181 |               newTrack.amtBorders = np.nan * np.ones((newTrack.nFrames, 3), dtype=float)
182 |               frameIdx = 0
183 |             elif pose.tag == 'item_version':
184 |               pass
185 |             elif pose.tag == 'item':
186 |               # pose in one frame
187 |               if frameIdx is None:
188 |                 raise ValueError('pose item came before number of poses!')
189 |               for poseInfo in pose:
190 |                 #print 'trackInfoPoseInfo:', poseInfo.tag
191 |                 if poseInfo.tag == 'tx':
192 |                   newTrack.trans[frameIdx, 0] = float(poseInfo.text)
193 |                 elif poseInfo.tag == 'ty':
194 |                   newTrack.trans[frameIdx, 1] = float(poseInfo.text)
195 |                 elif poseInfo.tag == 'tz':
196 |                   newTrack.trans[frameIdx, 2] = float(poseInfo.text)
197 |                 elif poseInfo.tag == 'rx':
198 |                   newTrack.rots[frameIdx, 0] = float(poseInfo.text)
199 |                 elif poseInfo.tag == 'ry':
200 |                   newTrack.rots[frameIdx, 1] = float(poseInfo.text)
201 |                 elif poseInfo.tag == 'rz':
202 |                   newTrack.rots[frameIdx, 2] = float(poseInfo.text)
203 |                 elif poseInfo.tag == 'state':
204 |                   newTrack.states[frameIdx] = stateFromText[poseInfo.text]
205 |                 elif poseInfo.tag == 'occlusion':
206 |                   newTrack.occs[frameIdx, 0] = occFromText[poseInfo.text]
207 |                 elif poseInfo.tag == 'occlusion_kf':
208 |                   newTrack.occs[frameIdx, 1] = occFromText[poseInfo.text]
209 |                 elif poseInfo.tag == 'truncation':
210 |                   newTrack.truncs[frameIdx] = truncFromText[poseInfo.text]
211 |                 elif poseInfo.tag == 'amt_occlusion':
212 |                   newTrack.amtOccs[frameIdx,0] = float(poseInfo.text)
213 |                   hasAmt = True
214 |                 elif poseInfo.tag == 'amt_occlusion_kf':
215 |                   newTrack.amtOccs[frameIdx,1] = float(poseInfo.text)
216 |                   hasAmt = True
217 |                 elif poseInfo.tag == 'amt_border_l':
218 |                   newTrack.amtBorders[frameIdx,0] = float(poseInfo.text)
219 |                   hasAmt = True
220 |                 elif poseInfo.tag == 'amt_border_r':
221 |                   newTrack.amtBorders[frameIdx,1] = float(poseInfo.text)
222 |                   hasAmt = True
223 |                 elif poseInfo.tag == 'amt_border_kf':
224 |                   newTrack.amtBorders[frameIdx,2] = float(poseInfo.text)
225 |                   hasAmt = True
226 |                 else:
227 |                   raise ValueError('unexpected tag in poses item: {0}!'.format(poseInfo.tag))
228 |               frameIdx += 1
229 |             else:
230 |               raise ValueError('unexpected pose info: {0}!'.format(pose.tag))
231 |         elif info.tag == 'finished':
232 |           isFinished = True
233 |         else:
234 |           raise ValueError('unexpected tag in tracklets: {0}!'.format(info.tag))
235 |       #end: for all fields in current tracklet
236 | 
237 |       # some final consistency checks on new tracklet
238 |       if not isFinished:
239 |         warn('tracklet {0} was not finished!'.format(trackletIdx))
240 |       if newTrack.nFrames is None:
241 |         warn('tracklet {0} contains no information!'.format(trackletIdx))
242 |       elif frameIdx != newTrack.nFrames:
243 |         warn('tracklet {0} is supposed to have {1} frames, but perser found {1}!'.format(\
244 |             trackletIdx, newTrack.nFrames, frameIdx))
245 |       if np.abs(newTrack.rots[:,:2]).sum() > 1e-16:
246 |         warn('track contains rotation other than yaw!')
247 | 
248 |       # if amtOccs / amtBorders are not set, set them to None
249 |       if not hasAmt:
250 |         newTrack.amtOccs = None
251 |         newTrack.amtBorders = None
252 | 
253 |       # add new tracklet to list
254 |       tracklets.append(newTrack)
255 |       trackletIdx += 1
256 | 
257 |     else:
258 |       raise ValueError('unexpected tracklet info')
259 |   #end: for tracklet list items
260 | 
261 |   print 'loaded', trackletIdx, 'tracklets'
262 | 
263 |   # final consistency check
264 |   if trackletIdx != nTracklets:
265 |     warn('according to xml information the file has {0} tracklets, but parser found {1}!'.format(nTracklets, trackletIdx))
266 | 
267 |   return tracklets
268 | #end: function parseXML
269 | 
270 | 
271 | def example(kittiDir=None, drive=None):
272 | 
273 |   from os.path import join, expanduser
274 |   import readline    # makes raw_input behave more fancy
275 |   # from xmlParser import parseXML, TRUNC_IN_IMAGE, TRUNC_TRUNCATED
276 | 
277 |   DEFAULT_DRIVE = '2011_09_26_drive_0001'
278 |   twoPi = 2.*np.pi
279 | 
280 |   # get dir names
281 |   if kittiDir is None:
282 |     kittiDir = expanduser(raw_input('please enter kitti base dir (e.g. ~/path/to/kitti): ').strip())
283 |   if drive is None:
284 |     drive    = raw_input('please enter drive name (default {0}): '.format(DEFAULT_DRIVE)).strip()
285 |     if len(drive) == 0:
286 |       drive = DEFAULT_DRIVE
287 | 
288 |   # read tracklets from file
289 |   myTrackletFile = join(kittiDir, drive, 'tracklet_labels.xml')
290 |   tracklets = parseXML(myTrackletFile)
291 | 
292 |   # loop over tracklets
293 |   for iTracklet, tracklet in enumerate(tracklets):
294 |     print 'tracklet {0: 3d}: {1}'.format(iTracklet, tracklet)
295 | 
296 |     # this part is inspired by kitti object development kit matlab code: computeBox3D
297 |     h,w,l = tracklet.size
298 |     trackletBox = np.array([ # in velodyne coordinates around zero point and without orientation yet\
299 |         [-l/2, -l/2,  l/2, l/2, -l/2, -l/2,  l/2, l/2], \
300 |         [ w/2, -w/2, -w/2, w/2,  w/2, -w/2, -w/2, w/2], \
301 |         [ 0.0,  0.0,  0.0, 0.0,    h,     h,   h,   h]])
302 | 
303 |     # loop over all data in tracklet
304 |     for translation, rotation, state, occlusion, truncation, amtOcclusion, amtBorders, absoluteFrameNumber \
305 |         in tracklet:
306 | 
307 |       # determine if object is in the image; otherwise continue
308 |       if truncation not in (TRUNC_IN_IMAGE, TRUNC_TRUNCATED):
309 |         continue
310 | 
311 |       # re-create 3D bounding box in velodyne coordinate system
312 |       yaw = rotation[2]   # other rotations are 0 in all xml files I checked
313 |       assert np.abs(rotation[:2]).sum() == 0, 'object rotations other than yaw given!'
314 |       rotMat = np.array([\
315 |           [np.cos(yaw), -np.sin(yaw), 0.0], \
316 |           [np.sin(yaw),  np.cos(yaw), 0.0], \
317 |           [        0.0,          0.0, 1.0]])
318 |       cornerPosInVelo = np.dot(rotMat, trackletBox) + np.tile(translation, (8,1)).T
319 | 
320 |       # calc yaw as seen from the camera (i.e. 0 degree = facing away from cam), as opposed to
321 |       #   car-centered yaw (i.e. 0 degree = same orientation as car).
322 |       #   makes quite a difference for objects in periphery!
323 |       # Result is in [0, 2pi]
324 |       x, y, z = translation
325 |       yawVisual = ( yaw - np.arctan2(y, x) ) % twoPi
326 | 
327 |     #end: for all frames in track
328 |   #end: for all tracks
329 | #end: function example
330 | 
331 | # when somebody runs this file as a script:
332 | #   run example if no arg or only 'example' was given as arg
333 | #   otherwise run parseXML
334 | if __name__ == "__main__":
335 |   # cmdLineArgs[0] is 'parseTrackletXML.py'
336 |   if len(cmdLineArgs) < 2:
337 |     example()
338 |   elif (len(cmdLineArgs) == 2) and (cmdLineArgs[1] == 'example'):
339 |     example()
340 |   else:
341 |     parseXML(*cmdLineArgs[1:])
342 | 


--------------------------------------------------------------------------------
/thesis.txt:
--------------------------------------------------------------------------------
 1 | 1. vehicle detection from 3D Lidar Using Fully Convolutional Network
 2 | 
 3 | Lidar Processing
 4 | 1. project range scans as 2D maps similat to the depth map of RGBD data
 5 |   1. velodyne scan can be roughly projected and discretized into a 2D point map.
 6 |   θ = atan2(y, x)                     # azimuth angle
 7 |   φ = arcsin(z / √x**2 + y**2 + z**2) # elevation angle
 8 |   r = int(θ / ⊿θ)                    # 2D Map Position
 9 |   c = int(φ / ⊿φ)                    # 2D Map Position
10 |   ⊿θ is the average horizontal and vertical angle resolution between consecutive beam emitters
11 |   ⊿θ = 0.08 http://velodynelidar.com/hdl-64e.html
12 |   ⊿φ = 0.4
13 | 
14 |   each (r, c) have 2-channel data (d, z), d is √x**2 + y**2
15 | 
16 | 
17 | 
18 | kitti
19 | 
20 | Label
21 | #Values    Name      Description
22 | ----------------------------------------------------------------------------
23 |    1    type         Describes the type of object: 'Car', 'Van', 'Truck',
24 |                      'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
25 |                      'Misc' or 'DontCare'
26 |    1    truncated    Float from 0 (non-truncated) to 1 (truncated), where
27 |                      truncated refers to the object leaving image boundaries
28 |    1    occluded     Integer (0,1,2,3) indicating occlusion state:
29 |                      0 = fully visible, 1 = partly occluded
30 |                      2 = largely occluded, 3 = unknown
31 |    1    alpha        Observation angle of object, ranging [-pi..pi]
32 |    4    bbox         2D bounding box of object in the image (0-based index):
33 |                      contains left, top, right, bottom pixel coordinates
34 |    3    dimensions   3D object dimensions: height, width, length (in meters)
35 |    3    location     3D object location x,y,z in camera coordinates (in meters)
36 |    1    rotation_y   Rotation ry around Y-axis in camera coordinates [-pi..pi]
37 |    1    score        Only for results: Float, indicating confidence in
38 |                      detection, needed for p/r curves, higher is better.
39 | type trun occl alpha bbox  bbox   bbox  bbox  dimen dimen dimen loc   loc loc  rotate
40 | Car 0.89   0   2.29   0.00 194.70 414.71 373.00 1.57 1.67 4.14 -2.75 1.70 4.10 1.72
41 | 


--------------------------------------------------------------------------------
/velodyne_025_deconv_norm_valid40.ckpt.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukitsuji/3D_CNN_tensorflow/5104ecf3f75a4e51a2170b72244e3f2a8993ce25/velodyne_025_deconv_norm_valid40.ckpt.data-00000-of-00001


--------------------------------------------------------------------------------
/velodyne_025_deconv_norm_valid40.ckpt.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukitsuji/3D_CNN_tensorflow/5104ecf3f75a4e51a2170b72244e3f2a8993ce25/velodyne_025_deconv_norm_valid40.ckpt.index


--------------------------------------------------------------------------------
/velodyne_025_deconv_norm_valid40.ckpt.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukitsuji/3D_CNN_tensorflow/5104ecf3f75a4e51a2170b72244e3f2a8993ce25/velodyne_025_deconv_norm_valid40.ckpt.meta


--------------------------------------------------------------------------------