├── LICENSE ├── README.md ├── cmd ├── intro.JPG ├── ops.py ├── pc_util.py ├── pointnet2_sem_seg.py ├── pointnet2_sem_seg_voxel.py ├── pointnet_util.py ├── provider.py ├── result.JPG ├── scannet_dataset.py ├── scannet_dataset_multi.py ├── scene_util.py ├── suncg_dataset_multi.py ├── tf_util.py ├── train_pc_joint_multi_combinesample_queue.py ├── train_voxel_joint_multi_v1.py └── train_voxel_joint_multi_v2.py /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 Zaiwei Zhang 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer 8 | in the documentation and/or other materials provided with the distribution. 9 | 10 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived 11 | from this software without specific prior written permission. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 14 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 15 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 16 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 17 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 18 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Path Invariance Network in Tensorflow 2 | 3 | Tensorflow implementation of one application in [Path Invariance Map Networks](https://arxiv.org/abs/1812.11647): 3D Scene Semantic Segmentation. 4 | Since we use [pointnet2](https://github.com/charlesq34/pointnet2) codebase, some of the code are borrowed from there. 5 | 6 | ![alt tag](intro.JPG) 7 | 8 | ## Prerequisites 9 | 10 | - Two GPUs(GTX 1080 or better) 11 | - Python 2.7 12 | - [Tensorflow 0.12.1](https://github.com/tensorflow/tensorflow/tree/r0.12) 13 | - [SciPy](http://www.scipy.org/install.html) 14 | - [Pointnet2 Customized TF Operators](https://github.com/charlesq34/pointnet2/tree/master/tf_ops) Please make sure the tf_ops folder is placed in the same directory. 15 | 16 | ## Usage 17 | 18 | First, download dataset [here](https://drive.google.com/open?id=1tOFn5rcs-9KsmfBOO9au3jeJiGTX7ikn). (Point clouds are collected by Pointnet++) 19 | The pre-trained models can be downloaded [here](https://drive.google.com/open?id=1hZYRfFNsx9b0KMI5YwX_ZmbKRRQJto-Z). 20 | (Point clouds models are trained using Pointnet++, and voxel models are trained using 3D-U-Net. The training code for voxel models will be released soon.) 21 | 22 | To train models with downloaded dataset: 23 | 24 | $ ./cmd 25 | 26 | All training commands are included in the cmd file. 27 | Testing results are logged during the training stage. 28 | 29 | ## Results 30 | ![alt tag](result.JPG) 31 | 32 | ## Author 33 | 34 | Zaiwei Zhang 35 | 36 | ## License 37 | Our code is released under BSD License (see LICENSE file for details). 38 | -------------------------------------------------------------------------------- /cmd: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=1 nohup python train_pc_joint_multi_combinesample_queue.py > trainlog_pc & 2 | CUDA_VISIBLE_DEVICES=0 nohup python train_voxel_joint_multi_v1.py > trainlog_voxel_v1 & 3 | CUDA_VISIBLE_DEVICES=0 nohup python train_voxel_joint_multi_v2.py > trainlog_voxel_v2 & 4 | 5 | -------------------------------------------------------------------------------- /intro.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zaiweizhang/path_invariance_map_network/0b561ec97d01331f5b61e4e3b405316edbb9219b/intro.JPG -------------------------------------------------------------------------------- /ops.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | def conv(inputs, out_num, kernel_size, scope, data_type='3D', norm=True, is_training=True): 5 | if data_type == '2D': 6 | outs = tf.layers.conv2d( 7 | inputs, out_num, kernel_size, padding='same', name=scope+'/conv', 8 | kernel_initializer=tf.truncated_normal_initializer) 9 | else: 10 | shape = list(kernel_size) + [inputs.shape[-1].value, out_num] 11 | weights = tf.get_variable( 12 | scope+'/conv/weights', shape, 13 | initializer=tf.truncated_normal_initializer()) 14 | outs = tf.nn.conv3d( 15 | inputs, weights, (1, 1, 1, 1, 1), padding='SAME', 16 | name=scope+'/conv') 17 | if norm: 18 | return tf.contrib.layers.batch_norm( 19 | outs, decay=0.9, epsilon=1e-5, activation_fn=tf.nn.relu, 20 | updates_collections=None, scope=scope+'/batch_norm', is_training=is_training) 21 | else: 22 | return outs 23 | 24 | 25 | def deconv(inputs, out_num, kernel_size, scope, data_type='3D', is_training=True, bn_decay=None, **kws): 26 | if data_type == '2D': 27 | outs = tf.layers.conv2d_transpose( 28 | inputs, out_num, kernel_size, (2, 2), padding='same', name=scope, 29 | kernel_initializer=tf.truncated_normal_initializer) 30 | else: 31 | shape = list(kernel_size) + [out_num, out_num] 32 | input_shape = inputs.shape.as_list() 33 | out_shape = [input_shape[0]] + \ 34 | list(map(lambda x: x*2, input_shape[1:-1])) + [out_num] 35 | with tf.device("/cpu:0"): 36 | weights = tf.get_variable( 37 | scope+'/deconv/weights', shape, 38 | initializer=tf.truncated_normal_initializer()) 39 | outs = tf.nn.conv3d_transpose( 40 | inputs, weights, out_shape, (1, 2, 2, 2, 1), name=scope+'/deconv') 41 | return tf.contrib.layers.batch_norm( 42 | outs, decay=bn_decay, epsilon=1e-5, activation_fn=tf.nn.relu, 43 | updates_collections=None, scope=scope+'/batch_norm', is_training=is_training) 44 | 45 | 46 | def pool(inputs, kernel_size, scope, data_type='3D'): 47 | if data_type == '2D': 48 | return tf.layers.max_pooling2d(inputs, kernel_size, (2, 2), name=scope) 49 | return tf.layers.max_pooling3d(inputs, kernel_size, (2, 2, 2), name=scope) 50 | 51 | def bias_variable(shape): 52 | initial = tf.zeros(shape) 53 | return tf.Variable(initial) 54 | 55 | def weight_variable(shape): 56 | initial = tf.random_normal(shape, mean=0.0, stddev=0.01) 57 | return tf.Variable(initial) 58 | -------------------------------------------------------------------------------- /pc_util.py: -------------------------------------------------------------------------------- 1 | """ Utility functions for processing point clouds. 2 | 3 | Heavily borrowed from pointnet2 4 | Author: Charles R. Qi, Hao Su 5 | Date: November 2016 6 | """ 7 | 8 | import os 9 | import sys 10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 11 | sys.path.append(BASE_DIR) 12 | 13 | # Point cloud IO 14 | import numpy as np 15 | from plyfile import PlyData, PlyElement 16 | import operator 17 | 18 | import matplotlib.pyplot as pyplot 19 | 20 | from sklearn.cluster import KMeans 21 | from sklearn.decomposition import PCA 22 | from sklearn.neighbors import KDTree 23 | from multiprocessing import Process, Manager 24 | 25 | def write_ply_color_multic(points, labels, out_filename): 26 | """ Color (N,3) points with labels (N) within range 0 ~ num_classes-1 as OBJ file """ 27 | N = points.shape[0] 28 | fout = open(out_filename, 'w') 29 | ### Write header here 30 | fout.write("ply\n") 31 | fout.write("format ascii 1.0\n") 32 | fout.write("element vertex %d\n" % N) 33 | fout.write("property float x\n") 34 | fout.write("property float y\n") 35 | fout.write("property float z\n") 36 | fout.write("property uchar red\n") 37 | fout.write("property uchar green\n") 38 | fout.write("property uchar blue\n") 39 | fout.write("end_header\n") 40 | for i in range(N): 41 | c = pyplot.cm.hsv(labels[i]) 42 | c = [int(x*255) for x in c] 43 | fout.write('%f %f %f %d %d %d\n' % (points[i,0],points[i,1],points[i,2],c[0],c[1],c[2])) 44 | fout.close() 45 | 46 | def surface_variant_para(stored, pcndex, pc): 47 | num_neighbour = 10 48 | pca = PCA() 49 | kdt = KDTree(pc, leaf_size=100, metric='euclidean') 50 | ### For each point we get the surface variant 51 | hm = np.zeros(pc.shape[0]) 52 | idx = kdt.query(pc,k=num_neighbour)[1] 53 | for i in range(len(idx)): 54 | data = pc[idx[i],:] 55 | pca.fit(data) 56 | lambdas = pca.singular_values_ 57 | hm[i] = lambdas[2]/float(sum(lambdas)) 58 | if np.isnan(hm[i]): 59 | hm[i] = 0 60 | ### Normalize the surface variant here 61 | minv = np.min(hm) 62 | maxv = np.max(hm) 63 | if float(maxv - minv) == 0: 64 | stored[pcndex] = np.ones(hm.shape) 65 | else: 66 | stored[pcndex] = (hm-minv)/float(maxv - minv)*0.9+0.1 67 | 68 | def sample_multi(pc): 69 | ### Do multi-threading here to reduce time 70 | numP = 16 71 | result = [] 72 | proc = [] 73 | stored = Manager().dict() 74 | 75 | chunk = len(pc)//numP 76 | for i in range(numP): 77 | newbdata = pc[i*chunk:(i+1)*chunk,...] 78 | p = Process(target=surface_variant_para, args=(stored, i, newbdata)) 79 | p.start() 80 | proc.append(p) 81 | 82 | for p in proc: 83 | p.join() 84 | 85 | for ndex in sorted(stored.keys()): 86 | result.append(stored[ndex]) 87 | 88 | result = np.concatenate(result, 0) 89 | return result 90 | 91 | ### Multi-view to point cloud conversion 92 | def DepthToPointCloud(image, label, pred, intrinsic): 93 | # depth image to point cloud 94 | h, w = image.shape[0], image.shape[1] 95 | ys, xs = np.meshgrid(range(h),range(w),indexing='ij') 96 | vals = image[ys, xs] 97 | labels = label[ys, xs] 98 | preds = pred[ys, xs] 99 | valid = (vals != 0) 100 | ys, xs, vals, labels, preds = ys[valid], xs[valid], vals[valid], labels[valid], preds[valid] 101 | points = np.zeros([len(ys), 3]) 102 | points[:,0] = (xs-w/2.0) / intrinsic[0] * vals 103 | points[:,1] = (ys-h/2.0) / intrinsic[0] * vals 104 | points[:,2] = vals 105 | return points, labels, preds 106 | 107 | def mv_to_pc(batch_image, batch_label, pred_label, batch_pose, intrinsic): 108 | pcall = [] 109 | labelall = [] 110 | predall = [] 111 | R_base = np.linalg.inv(batch_pose[0]) 112 | for i in range(len(batch_image)): 113 | pc, label, pred = DepthToPointCloud(np.squeeze(batch_image[i][:,:,0]), np.squeeze(batch_label[i]), np.squeeze(pred_label[i]), intrinsic) 114 | R = np.matmul(R_base, batch_pose[i]) 115 | pc = np.matmul(R[:3,:3], pc.T)+R[:3,3:4] 116 | pcall.append(pc.copy()) 117 | labelall.append(label) 118 | predall.append(pred) 119 | pcall = (np.concatenate(pcall, 1)).T 120 | labelall = np.concatenate(labelall, 0) 121 | predall = np.concatenate(predall, 0) 122 | pc2obj(pcall[::100,:].T) 123 | return pcall[::100,:], labelall[::100], predall[::100] 124 | 125 | def voting_pc(pc, pred): 126 | pc_smaller = pc[::10,:] 127 | pred_smaller = pred[::10] 128 | pc_reduced = pc[::100,:] 129 | newpc_dict = {i:{} for i in range(pc_reduced.shape[0])} 130 | for i in range(len(pc_smaller)): 131 | if i % 1000 == 0: 132 | print ("done with pc:", i) 133 | dist2 = np.sum((pc_reduced - pc_smaller[i,:])**2, axis=1) 134 | idx = np.argmin(dist2) 135 | if pred_smaller[i] in newpc_dict[idx].keys(): 136 | newpc_dict[idx][pred_smaller[i]] += 1 137 | else: 138 | newpc_dict[idx][pred_smaller[i]] = 1 139 | newpred = [] 140 | for i in range(len(pc_reduced)): 141 | newpred.append(max(newpc_dict[i].iteritems(), key=operator.itemgetter(1))[0]) 142 | return np.array(newpred) 143 | 144 | def mv_to_pc_voting(batch_image, batch_label, pred_label, batch_pose, intrinsic): 145 | pcall = [] 146 | labelall = [] 147 | predall = [] 148 | R_base = np.linalg.inv(batch_pose[0]) 149 | for i in range(len(batch_image)): 150 | pc, label, pred = DepthToPointCloud(np.squeeze(batch_image[i][:,:,0]), np.squeeze(batch_label[i]), np.squeeze(pred_label[i]), intrinsic) 151 | R = np.matmul(R_base, batch_pose[i]) 152 | pc = np.matmul(R[:3,:3], pc.T)+R[:3,3:4] 153 | pcall.append(pc.copy()) 154 | labelall.append(label) 155 | predall.append(pred) 156 | pcall = (np.concatenate(pcall, 1)).T 157 | labelall = np.concatenate(labelall, 0) 158 | predall = np.concatenate(predall, 0) 159 | pred_reduced = voting_pc(pcall, predall) 160 | pc2obj(pcall[::100,:].T) 161 | return pcall[::100,:], labelall[::100], pred_reduced 162 | 163 | def pc2obj(pc, filepath='test.obj'): 164 | nverts = pc.shape[1] 165 | with open(filepath, 'w') as f: 166 | f.write("# OBJ file\n") 167 | for v in range(nverts): 168 | f.write("v %.4f %.4f %.4f\n" % (pc[0,v],pc[1,v],pc[2,v])) 169 | 170 | # ---------------------------------------- 171 | # Point Cloud/Volume Conversions 172 | # ---------------------------------------- 173 | def point_cloud_label_to_surface_voxel_label(point_cloud, label, res=0.0484): 174 | coordmax = np.max(point_cloud,axis=0) 175 | coordmin = np.min(point_cloud,axis=0) 176 | nvox = np.ceil((coordmax-coordmin)/res) 177 | vidx = np.ceil((point_cloud-coordmin)/res) 178 | vidx = vidx[:,0]+vidx[:,1]*nvox[0]+vidx[:,2]*nvox[0]*nvox[1] 179 | uvidx = np.unique(vidx) 180 | if label.ndim==1: 181 | uvlabel = [np.argmax(np.bincount(label[vidx==uv].astype(np.uint32))) for uv in uvidx] 182 | else: 183 | assert(label.ndim==2) 184 | uvlabel = np.zeros(len(uvidx),label.shape[1]) 185 | for i in range(label.shape[1]): 186 | uvlabel[:,i] = np.array([np.argmax(np.bincount(label[vidx==uv,i].astype(np.uint32))) for uv in uvidx]) 187 | return uvidx, uvlabel, nvox 188 | 189 | def point_cloud_label_to_surface_voxel_label_fast(point_cloud, label, res=0.0484): 190 | coordmax = np.max(point_cloud,axis=0) 191 | coordmin = np.min(point_cloud,axis=0) 192 | nvox = np.ceil((coordmax-coordmin)/res) 193 | vidx = np.ceil((point_cloud-coordmin)/res) 194 | vidx = vidx[:,0]+vidx[:,1]*nvox[0]+vidx[:,2]*nvox[0]*nvox[1] 195 | uvidx, vpidx = np.unique(vidx,return_index=True) 196 | if label.ndim==1: 197 | uvlabel = label[vpidx] 198 | else: 199 | assert(label.ndim==2) 200 | uvlabel = label[vpidx,:] 201 | return uvidx, uvlabel, nvox 202 | 203 | def point_cloud_to_volume_batch(point_clouds, vsize=12, radius=1.1, flatten=True): 204 | """ Input is BxNx3 batch of point cloud 205 | Output is Bx(vsize^3) 206 | """ 207 | vol_list = [] 208 | for b in range(point_clouds.shape[0]): 209 | vol = point_cloud_to_volume(np.squeeze(point_clouds[b,:,:]), vsize, radius) 210 | if flatten: 211 | vol_list.append(vol.flatten()) 212 | else: 213 | vol_list.append(np.expand_dims(np.expand_dims(vol, -1), 0)) 214 | if flatten: 215 | return np.vstack(vol_list) 216 | else: 217 | return np.concatenate(vol_list, 0) 218 | 219 | def point_cloud_label_to_volume_batch(point_clouds, labels, weights, vsize=12, radius=1.1, flatten=True): 220 | """ Input is BxNx3 batch of point cloud 221 | Output is Bx(vsize^3) 222 | """ 223 | vol_list = [] 224 | label_list = [] 225 | weight_list = [] 226 | for b in range(point_clouds.shape[0]): 227 | vol, label, weight = point_cloud_label_to_volume(np.squeeze(point_clouds[b,:,:]), np.squeeze(labels[b,:]), np.squeeze(weights[b,:]), vsize, radius) 228 | vol_list.append(np.expand_dims(np.expand_dims(vol, -1), 0)) 229 | label_list.append(np.expand_dims(label, 0)) 230 | weight_list.append(np.expand_dims(weight, 0)) 231 | 232 | return np.concatenate(vol_list, 0), np.concatenate(label_list, 0), np.concatenate(weight_list, 0) 233 | 234 | def point_cloud_label_to_volume_batch_exact(point_clouds, vsize=12, radius=1.1, flatten=True): 235 | """ Input is BxNx3 batch of point cloud 236 | Output is Bx(vsize^3) 237 | """ 238 | vol_list = [] 239 | for b in range(point_clouds.shape[0]): 240 | vol = np.zeros((vsize,vsize,vsize)) 241 | voxel = 2*radius/float(vsize) 242 | locations = (np.squeeze(point_clouds[b,:,:]) + radius)/voxel 243 | locations = locations.astype(int) 244 | vol[locations[:,0],locations[:,1],locations[:,2]] = 1.0 245 | vol_list.append(np.expand_dims(np.expand_dims(vol, -1), 0)) 246 | return np.concatenate(vol_list, 0) 247 | 248 | def point_cloud_label_to_volume(points, label, weight, vsize, radius=1.1): 249 | """ input is Nx3 points. 250 | output is vsize*vsize*vsize 251 | assumes points are in range [-radius, radius] 252 | """ 253 | vol = np.zeros((vsize,vsize,vsize)) 254 | la = np.zeros((vsize,vsize,vsize)) 255 | we = np.zeros((vsize,vsize,vsize)) 256 | voxel = 2*radius/float(vsize) 257 | locations = (points + radius)/voxel 258 | locations = locations.astype(int) 259 | vol[locations[:,0],locations[:,1],locations[:,2]] = 1.0 260 | la[locations[:,0],locations[:,1],locations[:,2]] = label 261 | we[locations[:,0],locations[:,1],locations[:,2]] = weight 262 | return vol, la, we 263 | 264 | def point_cloud_to_volume(points, vsize, radius=1.0): 265 | """ input is Nx3 points. 266 | output is vsize*vsize*vsize 267 | assumes points are in range [-radius, radius] 268 | """ 269 | vol = np.zeros((vsize,vsize,vsize)) 270 | voxel = 2*radius/float(vsize) 271 | locations = (points + radius)/voxel 272 | locations = locations.astype(int) 273 | vol[locations[:,0],locations[:,1],locations[:,2]] = 1.0 274 | return vol 275 | 276 | def volume_topc_batch(pred_val, batch_label_vol, batch_smpw_vol): 277 | bsize = pred_val.shape[0] 278 | pred_pc = [] 279 | label_pc = [] 280 | smpw_pc = [] 281 | other_pc = [] 282 | aug_data = [] 283 | vsize = pred_val.shape[1] 284 | for i in range(bsize): 285 | points = [] 286 | points_label = [] 287 | points_smpw = [] 288 | points_other = [] 289 | points_aug = [] 290 | for a in range(vsize): 291 | for b in range(vsize): 292 | for c in range(vsize): 293 | if batch_label_vol[i,a,b,c] > 0: 294 | points.append(pred_val[i,a,b,c]) 295 | points_label.append(batch_label_vol[i,a,b,c]) 296 | points_smpw.append(batch_smpw_vol[i,a,b,c]) 297 | points_aug.append(np.array([a,b,c])) 298 | elif batch_label_vol[i,a,b,c] == 0 and batch_smpw_vol[i,a,b,c] > 0: 299 | points_other.append(pred_val[i,a,b,c]) 300 | if len(points) == 0: 301 | continue 302 | 303 | pred_pc.append(np.array(points)) 304 | label_pc.append(np.array(points_label)) 305 | smpw_pc.append(np.array(points_smpw)) 306 | other_pc.append(np.array(points_other)) 307 | aug_data.append(np.array(points_aug)) 308 | return pred_pc, label_pc, smpw_pc, other_pc, aug_data 309 | 310 | def volume_topc_batch_exact(pred_val, batch_data, radius=1.1, vsize=32): 311 | bsize = pred_val.shape[0] 312 | pred_pc = [] 313 | label_pc = [] 314 | for i in range(bsize): 315 | cur_data = batch_data[i,:,:] 316 | cur_val = pred_val[i,:,:,:] 317 | 318 | voxel = 2*radius/float(vsize) 319 | cur_data = (np.squeeze(cur_data) + radius)/voxel 320 | cur_data = cur_data.astype(int) 321 | 322 | points_label = cur_val[cur_data[:,0], cur_data[:,1], cur_data[:,2]] 323 | pred_pc.append(points_label) 324 | return np.array(pred_pc) 325 | 326 | def volume_to_point_cloud(vol): 327 | """ vol is occupancy grid (value = 0 or 1) of size vsize*vsize*vsize 328 | return Nx3 numpy array. 329 | """ 330 | vsize = vol.shape[0] 331 | assert(vol.shape[1] == vsize and vol.shape[1] == vsize) 332 | points = [] 333 | for a in range(vsize): 334 | for b in range(vsize): 335 | for c in range(vsize): 336 | if vol[a,b,c] == 1: 337 | points.append(np.array([a,b,c])) 338 | if len(points) == 0: 339 | return np.zeros((0,3)) 340 | points = np.vstack(points) 341 | return points 342 | 343 | def point_cloud_to_volume_v2_batch(point_clouds, vsize=12, radius=1.0, num_sample=128): 344 | """ Input is BxNx3 a batch of point cloud 345 | Output is BxVxVxVxnum_samplex3 346 | Added on Feb 19 347 | """ 348 | vol_list = [] 349 | for b in range(point_clouds.shape[0]): 350 | vol = point_cloud_to_volume_v2(point_clouds[b,:,:], vsize, radius, num_sample) 351 | vol_list.append(np.expand_dims(vol, 0)) 352 | return np.concatenate(vol_list, 0) 353 | 354 | def point_cloud_to_volume_v2(points, vsize, radius=1.0, num_sample=128): 355 | """ input is Nx3 points 356 | output is vsize*vsize*vsize*num_sample*3 357 | assumes points are in range [-radius, radius] 358 | samples num_sample points in each voxel, if there are less than 359 | num_sample points, replicate the points 360 | Added on Feb 19 361 | """ 362 | vol = np.zeros((vsize,vsize,vsize,num_sample,3)) 363 | voxel = 2*radius/float(vsize) 364 | locations = (points + radius)/voxel 365 | locations = locations.astype(int) 366 | loc2pc = {} 367 | for n in range(points.shape[0]): 368 | loc = tuple(locations[n,:]) 369 | if loc not in loc2pc: 370 | loc2pc[loc] = [] 371 | loc2pc[loc].append(points[n,:]) 372 | 373 | for i in range(vsize): 374 | for j in range(vsize): 375 | for k in range(vsize): 376 | if (i,j,k) not in loc2pc: 377 | vol[i,j,k,:,:] = np.zeros((num_sample,3)) 378 | else: 379 | pc = loc2pc[(i,j,k)] # a list of (3,) arrays 380 | pc = np.vstack(pc) # kx3 381 | # Sample/pad to num_sample points 382 | if pc.shape[0]>num_sample: 383 | choices = np.random.choice(pc.shape[0], num_sample, replace=False) 384 | pc = pc[choices,:] 385 | elif pc.shape[0]num_sample: 433 | choices = np.random.choice(pc.shape[0], num_sample, replace=False) 434 | pc = pc[choices,:] 435 | elif pc.shape[0] 0) 497 | dx = mask[:, 0] 498 | dy = mask[:, 1] 499 | dv = disk[disk > 0] 500 | 501 | # Order points by z-buffer 502 | zorder = np.argsort(points[:, 2]) 503 | points = points[zorder, :] 504 | points[:, 2] = (points[:, 2] - np.min(points[:, 2])) / (np.max(points[:, 2] - np.min(points[:, 2]))) 505 | max_depth = np.max(points[:, 2]) 506 | 507 | for i in range(points.shape[0]): 508 | j = points.shape[0] - i - 1 509 | x = points[j, 0] 510 | y = points[j, 1] 511 | xc = canvasSize/2 + (x*space) 512 | yc = canvasSize/2 + (y*space) 513 | xc = int(np.round(xc)) 514 | yc = int(np.round(yc)) 515 | 516 | px = dx + xc 517 | py = dy + yc 518 | 519 | image[px, py] = image[px, py] * 0.7 + dv * (max_depth - points[j, 2]) * 0.3 520 | 521 | image = image / np.max(image) 522 | return image 523 | 524 | def point_cloud_three_views(points): 525 | """ input points Nx3 numpy array (+y is up direction). 526 | return an numpy array gray image of size 500x1500. """ 527 | # +y is up direction 528 | # xrot is azimuth 529 | # yrot is in-plane 530 | # zrot is elevation 531 | img1 = draw_point_cloud(points, zrot=110/180.0*np.pi, xrot=45/180.0*np.pi, yrot=0/180.0*np.pi) 532 | img2 = draw_point_cloud(points, zrot=70/180.0*np.pi, xrot=135/180.0*np.pi, yrot=0/180.0*np.pi) 533 | img3 = draw_point_cloud(points, zrot=180.0/180.0*np.pi, xrot=90/180.0*np.pi, yrot=0/180.0*np.pi) 534 | image_large = np.concatenate([img1, img2, img3], 1) 535 | return image_large 536 | 537 | 538 | def point_cloud_three_views_demo(): 539 | """ Demo for draw_point_cloud function """ 540 | from PIL import Image 541 | points = read_ply('../third_party/mesh_sampling/piano.ply') 542 | im_array = point_cloud_three_views(points) 543 | img = Image.fromarray(np.uint8(im_array*255.0)) 544 | img.save('piano.jpg') 545 | 546 | if __name__=="__main__": 547 | point_cloud_three_views_demo() 548 | 549 | 550 | def pyplot_draw_point_cloud(points, output_filename): 551 | """ points is a Nx3 numpy array """ 552 | import matplotlib.pyplot as plt 553 | fig = plt.figure() 554 | ax = fig.add_subplot(111, projection='3d') 555 | ax.scatter(points[:,0], points[:,1], points[:,2]) 556 | ax.set_xlabel('x') 557 | ax.set_ylabel('y') 558 | ax.set_zlabel('z') 559 | #savefig(output_filename) 560 | 561 | def pyplot_draw_volume(vol, output_filename): 562 | """ vol is of size vsize*vsize*vsize 563 | output an image to output_filename 564 | """ 565 | points = volume_to_point_cloud(vol) 566 | pyplot_draw_point_cloud(points, output_filename) 567 | 568 | def write_ply_color(points, labels, out_filename, num_classes=None): 569 | """ Color (N,3) points with labels (N) within range 0 ~ num_classes-1 as OBJ file """ 570 | import matplotlib.pyplot as pyplot 571 | labels = labels.astype(int) 572 | N = points.shape[0] 573 | if num_classes is None: 574 | num_classes = np.max(labels)+1 575 | else: 576 | assert(num_classes>np.max(labels)) 577 | fout = open(out_filename, 'w') 578 | colors = [pyplot.cm.hsv(i/float(num_classes)) for i in range(num_classes)] 579 | for i in range(N): 580 | c = colors[labels[i]] 581 | c = [int(x*255) for x in c] 582 | fout.write('v %f %f %f %d %d %d\n' % (points[i,0],points[i,1],points[i,2],c[0],c[1],c[2])) 583 | fout.close() 584 | -------------------------------------------------------------------------------- /pointnet2_sem_seg.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | BASE_DIR = os.path.dirname(__file__) 4 | sys.path.append(BASE_DIR) 5 | import tensorflow as tf 6 | import numpy as np 7 | import tf_util 8 | from pointnet_util import pointnet_sa_module, pointnet_fp_module 9 | 10 | def placeholder_inputs(batch_size, num_point, name=""): 11 | pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 3), name=name+"pc") 12 | labels_pl = tf.placeholder(tf.int32, shape=(batch_size, num_point), name=name+"label") 13 | smpws_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point), name=name+"smpw") 14 | return pointclouds_pl, labels_pl, smpws_pl 15 | 16 | 17 | def get_model(point_cloud, is_training, num_class, bn_decay=None): 18 | """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """ 19 | batch_size = point_cloud.get_shape()[0].value 20 | num_point = point_cloud.get_shape()[1].value 21 | end_points = {} 22 | l0_xyz = point_cloud 23 | l0_points = None 24 | end_points['l0_xyz'] = l0_xyz 25 | 26 | # Layer 1 27 | l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=1024, radius=0.1, nsample=32, mlp=[32,32,64], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer1') 28 | l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=256, radius=0.2, nsample=32, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer2') 29 | l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=64, radius=0.4, nsample=32, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer3') 30 | l4_xyz, l4_points, l4_indices = pointnet_sa_module(l3_xyz, l3_points, npoint=16, radius=0.8, nsample=32, mlp=[256,256,512], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer4') 31 | 32 | # Feature Propagation layers 33 | l3_points = pointnet_fp_module(l3_xyz, l4_xyz, l3_points, l4_points, [256,256], is_training, bn_decay, scope='fa_layer1') 34 | l2_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256,256], is_training, bn_decay, scope='fa_layer2') 35 | l1_points = pointnet_fp_module(l1_xyz, l2_xyz, l1_points, l2_points, [256,128], is_training, bn_decay, scope='fa_layer3') 36 | l0_points = pointnet_fp_module(l0_xyz, l1_xyz, l0_points, l1_points, [128,128,128], is_training, bn_decay, scope='fa_layer4') 37 | 38 | # FC layers 39 | net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay) 40 | end_points['feats'] = net 41 | net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1') 42 | net = tf_util.conv1d(net, num_class, 1, padding='VALID', activation_fn=None, scope='fc2') 43 | 44 | return net, end_points 45 | 46 | 47 | def get_loss(pred, label, smpw): 48 | """ pred: BxNxC, 49 | label: BxN, 50 | smpw: BxN """ 51 | bsize = pred.get_shape()[0] 52 | classify_loss1 = tf.losses.sparse_softmax_cross_entropy(labels=label[0:bsize//2,...], logits=pred[0:bsize//2,...], weights=smpw[0:bsize//2,...]) 53 | classify_loss2 = tf.losses.sparse_softmax_cross_entropy(labels=label[bsize//2:bsize,...], logits=pred[bsize//2:bsize,...], weights=smpw[bsize//2:bsize,...]) 54 | classify_loss = classify_loss1 + 0.75*classify_loss2 55 | tf.summary.scalar('classify loss', classify_loss) 56 | tf.add_to_collection('losses', classify_loss) 57 | return classify_loss, classify_loss1, classify_loss2 58 | 59 | if __name__=='__main__': 60 | with tf.Graph().as_default(): 61 | inputs = tf.zeros((32,2048,3)) 62 | net, _ = get_model(inputs, tf.constant(True), 10) 63 | print(net) 64 | -------------------------------------------------------------------------------- /pointnet2_sem_seg_voxel.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | BASE_DIR = os.path.dirname(__file__) 4 | sys.path.append(BASE_DIR) 5 | import tensorflow as tf 6 | import numpy as np 7 | import tf_util 8 | import ops 9 | from pointnet_util import pointnet_sa_module, pointnet_fp_module 10 | 11 | ## Hyper parameters for voxel 12 | #voxel_grid = 32 13 | voxel_channel = 1 14 | voxel_network_depth = 4 15 | voxel_start_channel_num = 32 16 | voxel_channel_axis = 4 17 | voxel_conv_size = (3, 3, 3) 18 | voxel_pool_size = (2, 2, 2) 19 | voxel_action = 'concat' 20 | voxel_class_num = 21+1 21 | 22 | def placeholder_inputs(batch_size, v_size): 23 | pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, v_size, v_size, v_size, 1)) 24 | labels_pl = tf.placeholder(tf.int32, shape=(batch_size, v_size, v_size, v_size)) 25 | smpws_pl = tf.placeholder(tf.float32, shape=(batch_size, v_size, v_size, v_size)) 26 | return pointclouds_pl, labels_pl, smpws_pl 27 | 28 | def build_down_block(inputs, name, down_outputs, first=False,TPS=False, is_training=True, bn_decay=None): 29 | out_num = voxel_start_channel_num if first else 2 * \ 30 | inputs.shape[voxel_channel_axis].value 31 | 32 | conv1 = tf_util.conv3d(inputs, out_num, voxel_conv_size, name+'/conv1', bn=True, is_training=is_training, bn_decay=bn_decay) 33 | #if TPS == True: 34 | # conv1= self.transform.Encoder(conv1,conv1) 35 | conv2 = tf_util.conv3d(conv1, out_num, voxel_conv_size, name+'/conv2', bn=True, is_training=is_training, bn_decay=bn_decay) 36 | down_outputs.append(conv2) 37 | pool = ops.pool(conv2, voxel_pool_size, name + 38 | '/pool') 39 | return pool 40 | 41 | def build_bottom_block( inputs, name, is_training=True, bn_decay=None): 42 | out_num = inputs.shape[voxel_channel_axis].value 43 | conv1 = tf_util.conv3d(inputs, 2*out_num, voxel_conv_size, name+'/conv1', bn=True, is_training=is_training, bn_decay=bn_decay) 44 | conv2 = tf_util.conv3d(conv1, out_num, voxel_conv_size, name+'/conv2', bn=True, is_training=is_training, bn_decay=bn_decay) 45 | return conv2 46 | 47 | def deconv_func(): 48 | return getattr(ops, "deconv") 49 | 50 | def conv_func(): 51 | return getattr(ops, "conv") 52 | 53 | def build_up_block(inputs, down_inputs, name, final=False,Decoder=False,is_training=True,bn_decay=None): 54 | out_num = inputs.shape[voxel_channel_axis].value 55 | conv1 = deconv_func()( 56 | inputs, out_num, voxel_conv_size, name+'/conv1', 57 | action=voxel_action, is_training=is_training, bn_decay=bn_decay) 58 | conv1 = tf.concat( 59 | [conv1, down_inputs], voxel_channel_axis, name=name+'/concat') 60 | conv2 = tf_util.conv3d(conv1, out_num, voxel_conv_size, name+'/conv2', bn=True, is_training=is_training, bn_decay=bn_decay) 61 | #if Decoder == True: 62 | # conv2 = self.transform.Decoder(conv2,conv2) 63 | out_num = voxel_class_num if final else out_num/2 64 | conv3 = tf_util.conv3d(conv2, out_num, voxel_conv_size, name+'/conv3', bn=(not final), is_training=is_training, bn_decay=bn_decay) 65 | return conv3 66 | 67 | def get_model(inputs, num_class, is_training=True, bn_decay=None): 68 | outputs = inputs 69 | down_outputs = [] 70 | for layer_index in range(voxel_network_depth-1): 71 | is_first = True if not layer_index else False 72 | name = 'down%s' % layer_index 73 | outputs = build_down_block(outputs, name, down_outputs, first=is_first,TPS = False, is_training=is_training, bn_decay=bn_decay) 74 | print("down ",layer_index," shape ", outputs.get_shape()) 75 | outputs = build_bottom_block(outputs, 'bottom', is_training=is_training, bn_decay=bn_decay) 76 | for layer_index in range(voxel_network_depth-2, -1, -1): 77 | is_final = True if layer_index == 0 else False 78 | name = 'up%s' % layer_index 79 | down_inputs = down_outputs[layer_index] 80 | outputs = build_up_block(outputs, down_inputs, name,final=is_final,Decoder=False, is_training=is_training, bn_decay=bn_decay ) 81 | print("up ",layer_index," shape ",outputs.get_shape()) 82 | return outputs 83 | 84 | def get_loss(pred, label, smpw): 85 | """ pred: BxNxC, 86 | label: BxN, 87 | smpw: BxN """ 88 | bsize = pred.get_shape()[0] 89 | classify_loss1 = tf.losses.sparse_softmax_cross_entropy(labels=label[0:bsize//2,...], logits=pred[0:bsize//2,...], weights=smpw[0:bsize//2,...]) 90 | classify_loss2 = tf.losses.sparse_softmax_cross_entropy(labels=label[bsize//2:bsize,...], logits=pred[bsize//2:bsize,...], weights=smpw[bsize//2:bsize,...]) 91 | classify_loss = classify_loss1 + 0.75*classify_loss2 92 | tf.summary.scalar('classify loss', classify_loss) 93 | tf.add_to_collection('losses', classify_loss) 94 | return classify_loss, classify_loss1, classify_loss2 95 | 96 | if __name__=='__main__': 97 | with tf.Graph().as_default(): 98 | inputs = tf.zeros((32,2048,3)) 99 | net, _ = get_model(inputs, tf.constant(True), 10) 100 | print(net) 101 | -------------------------------------------------------------------------------- /pointnet_util.py: -------------------------------------------------------------------------------- 1 | """ PointNet++ Layers 2 | 3 | Author: Charles R. Qi 4 | Date: November 2017 5 | """ 6 | 7 | import os 8 | import sys 9 | ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | sys.path.append(os.path.join(ROOT_DIR, 'tf_ops/sampling')) 11 | sys.path.append(os.path.join(ROOT_DIR, 'tf_ops/grouping')) 12 | sys.path.append(os.path.join(ROOT_DIR, 'tf_ops/3d_interpolation')) 13 | from tf_sampling import farthest_point_sample, gather_point 14 | from tf_grouping import query_ball_point, group_point, knn_point 15 | from tf_interpolate import three_nn, three_interpolate 16 | import tensorflow as tf 17 | import numpy as np 18 | import tf_util 19 | 20 | def sample_and_group(npoint, radius, nsample, xyz, points, knn=False, use_xyz=True): 21 | ''' 22 | Input: 23 | npoint: int32 24 | radius: float32 25 | nsample: int32 26 | xyz: (batch_size, ndataset, 3) TF tensor 27 | points: (batch_size, ndataset, channel) TF tensor, if None will just use xyz as points 28 | knn: bool, if True use kNN instead of radius search 29 | use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features 30 | Output: 31 | new_xyz: (batch_size, npoint, 3) TF tensor 32 | new_points: (batch_size, npoint, nsample, 3+channel) TF tensor 33 | idx: (batch_size, npoint, nsample) TF tensor, indices of local points as in ndataset points 34 | grouped_xyz: (batch_size, npoint, nsample, 3) TF tensor, normalized point XYZs 35 | (subtracted by seed point XYZ) in local regions 36 | ''' 37 | 38 | new_xyz = gather_point(xyz, farthest_point_sample(npoint, xyz)) # (batch_size, npoint, 3) 39 | if knn: 40 | _,idx = knn_point(nsample, xyz, new_xyz) 41 | else: 42 | idx, pts_cnt = query_ball_point(radius, nsample, xyz, new_xyz) 43 | grouped_xyz = group_point(xyz, idx) # (batch_size, npoint, nsample, 3) 44 | grouped_xyz -= tf.tile(tf.expand_dims(new_xyz, 2), [1,1,nsample,1]) # translation normalization 45 | if points is not None: 46 | grouped_points = group_point(points, idx) # (batch_size, npoint, nsample, channel) 47 | if use_xyz: 48 | new_points = tf.concat([grouped_xyz, grouped_points], axis=-1) # (batch_size, npoint, nample, 3+channel) 49 | else: 50 | new_points = grouped_points 51 | else: 52 | new_points = grouped_xyz 53 | 54 | return new_xyz, new_points, idx, grouped_xyz 55 | 56 | 57 | def sample_and_group_all(xyz, points, use_xyz=True): 58 | ''' 59 | Inputs: 60 | xyz: (batch_size, ndataset, 3) TF tensor 61 | points: (batch_size, ndataset, channel) TF tensor, if None will just use xyz as points 62 | use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features 63 | Outputs: 64 | new_xyz: (batch_size, 1, 3) as (0,0,0) 65 | new_points: (batch_size, 1, ndataset, 3+channel) TF tensor 66 | Note: 67 | Equivalent to sample_and_group with npoint=1, radius=inf, use (0,0,0) as the centroid 68 | ''' 69 | batch_size = xyz.get_shape()[0].value 70 | nsample = xyz.get_shape()[1].value 71 | new_xyz = tf.constant(np.tile(np.array([0,0,0]).reshape((1,1,3)), (batch_size,1,1)),dtype=tf.float32) # (batch_size, 1, 3) 72 | idx = tf.constant(np.tile(np.array(range(nsample)).reshape((1,1,nsample)), (batch_size,1,1))) 73 | grouped_xyz = tf.reshape(xyz, (batch_size, 1, nsample, 3)) # (batch_size, npoint=1, nsample, 3) 74 | if points is not None: 75 | if use_xyz: 76 | new_points = tf.concat([xyz, points], axis=2) # (batch_size, 16, 259) 77 | else: 78 | new_points = points 79 | new_points = tf.expand_dims(new_points, 1) # (batch_size, 1, 16, 259) 80 | else: 81 | new_points = grouped_xyz 82 | return new_xyz, new_points, idx, grouped_xyz 83 | 84 | 85 | def pointnet_sa_module(xyz, points, npoint, radius, nsample, mlp, mlp2, group_all, is_training, bn_decay, scope, bn=True, pooling='max', knn=False, use_xyz=True, use_nchw=False): 86 | ''' PointNet Set Abstraction (SA) Module 87 | Input: 88 | xyz: (batch_size, ndataset, 3) TF tensor 89 | points: (batch_size, ndataset, channel) TF tensor 90 | npoint: int32 -- #points sampled in farthest point sampling 91 | radius: float32 -- search radius in local region 92 | nsample: int32 -- how many points in each local region 93 | mlp: list of int32 -- output size for MLP on each point 94 | mlp2: list of int32 -- output size for MLP on each region 95 | group_all: bool -- group all points into one PC if set true, OVERRIDE 96 | npoint, radius and nsample settings 97 | use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features 98 | use_nchw: bool, if True, use NCHW data format for conv2d, which is usually faster than NHWC format 99 | Return: 100 | new_xyz: (batch_size, npoint, 3) TF tensor 101 | new_points: (batch_size, npoint, mlp[-1] or mlp2[-1]) TF tensor 102 | idx: (batch_size, npoint, nsample) int32 -- indices for local regions 103 | ''' 104 | data_format = 'NCHW' if use_nchw else 'NHWC' 105 | with tf.variable_scope(scope) as sc: 106 | # Sample and Grouping 107 | if group_all: 108 | nsample = xyz.get_shape()[1].value 109 | new_xyz, new_points, idx, grouped_xyz = sample_and_group_all(xyz, points, use_xyz) 110 | else: 111 | new_xyz, new_points, idx, grouped_xyz = sample_and_group(npoint, radius, nsample, xyz, points, knn, use_xyz) 112 | 113 | # Point Feature Embedding 114 | if use_nchw: new_points = tf.transpose(new_points, [0,3,1,2]) 115 | for i, num_out_channel in enumerate(mlp): 116 | new_points = tf_util.conv2d(new_points, num_out_channel, [1,1], 117 | padding='VALID', stride=[1,1], 118 | bn=bn, is_training=is_training, 119 | scope='conv%d'%(i), bn_decay=bn_decay, 120 | data_format=data_format) 121 | if use_nchw: new_points = tf.transpose(new_points, [0,2,3,1]) 122 | 123 | # Pooling in Local Regions 124 | if pooling=='max': 125 | new_points = tf.reduce_max(new_points, axis=[2], keep_dims=True, name='maxpool') 126 | elif pooling=='avg': 127 | new_points = tf.reduce_mean(new_points, axis=[2], keep_dims=True, name='avgpool') 128 | elif pooling=='weighted_avg': 129 | with tf.variable_scope('weighted_avg'): 130 | dists = tf.norm(grouped_xyz,axis=-1,ord=2,keep_dims=True) 131 | exp_dists = tf.exp(-dists * 5) 132 | weights = exp_dists/tf.reduce_sum(exp_dists,axis=2,keep_dims=True) # (batch_size, npoint, nsample, 1) 133 | new_points *= weights # (batch_size, npoint, nsample, mlp[-1]) 134 | new_points = tf.reduce_sum(new_points, axis=2, keep_dims=True) 135 | elif pooling=='max_and_avg': 136 | max_points = tf.reduce_max(new_points, axis=[2], keep_dims=True, name='maxpool') 137 | avg_points = tf.reduce_mean(new_points, axis=[2], keep_dims=True, name='avgpool') 138 | new_points = tf.concat([avg_points, max_points], axis=-1) 139 | 140 | # [Optional] Further Processing 141 | if mlp2 is not None: 142 | if use_nchw: new_points = tf.transpose(new_points, [0,3,1,2]) 143 | for i, num_out_channel in enumerate(mlp2): 144 | new_points = tf_util.conv2d(new_points, num_out_channel, [1,1], 145 | padding='VALID', stride=[1,1], 146 | bn=bn, is_training=is_training, 147 | scope='conv_post_%d'%(i), bn_decay=bn_decay, 148 | data_format=data_format) 149 | if use_nchw: new_points = tf.transpose(new_points, [0,2,3,1]) 150 | 151 | new_points = tf.squeeze(new_points, [2]) # (batch_size, npoints, mlp2[-1]) 152 | return new_xyz, new_points, idx 153 | 154 | def pointnet_sa_module_msg(xyz, points, npoint, radius_list, nsample_list, mlp_list, is_training, bn_decay, scope, bn=True, use_xyz=True, use_nchw=False): 155 | ''' PointNet Set Abstraction (SA) module with Multi-Scale Grouping (MSG) 156 | Input: 157 | xyz: (batch_size, ndataset, 3) TF tensor 158 | points: (batch_size, ndataset, channel) TF tensor 159 | npoint: int32 -- #points sampled in farthest point sampling 160 | radius: list of float32 -- search radius in local region 161 | nsample: list of int32 -- how many points in each local region 162 | mlp: list of list of int32 -- output size for MLP on each point 163 | use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features 164 | use_nchw: bool, if True, use NCHW data format for conv2d, which is usually faster than NHWC format 165 | Return: 166 | new_xyz: (batch_size, npoint, 3) TF tensor 167 | new_points: (batch_size, npoint, \sum_k{mlp[k][-1]}) TF tensor 168 | ''' 169 | data_format = 'NCHW' if use_nchw else 'NHWC' 170 | with tf.variable_scope(scope) as sc: 171 | new_xyz = gather_point(xyz, farthest_point_sample(npoint, xyz)) 172 | new_points_list = [] 173 | for i in range(len(radius_list)): 174 | radius = radius_list[i] 175 | nsample = nsample_list[i] 176 | idx, pts_cnt = query_ball_point(radius, nsample, xyz, new_xyz) 177 | grouped_xyz = group_point(xyz, idx) 178 | grouped_xyz -= tf.tile(tf.expand_dims(new_xyz, 2), [1,1,nsample,1]) 179 | if points is not None: 180 | grouped_points = group_point(points, idx) 181 | if use_xyz: 182 | grouped_points = tf.concat([grouped_points, grouped_xyz], axis=-1) 183 | else: 184 | grouped_points = grouped_xyz 185 | if use_nchw: grouped_points = tf.transpose(grouped_points, [0,3,1,2]) 186 | for j,num_out_channel in enumerate(mlp_list[i]): 187 | grouped_points = tf_util.conv2d(grouped_points, num_out_channel, [1,1], 188 | padding='VALID', stride=[1,1], bn=bn, is_training=is_training, 189 | scope='conv%d_%d'%(i,j), bn_decay=bn_decay) 190 | if use_nchw: grouped_points = tf.transpose(grouped_points, [0,2,3,1]) 191 | new_points = tf.reduce_max(grouped_points, axis=[2]) 192 | new_points_list.append(new_points) 193 | new_points_concat = tf.concat(new_points_list, axis=-1) 194 | return new_xyz, new_points_concat 195 | 196 | 197 | def pointnet_fp_module(xyz1, xyz2, points1, points2, mlp, is_training, bn_decay, scope, bn=True): 198 | ''' PointNet Feature Propogation (FP) Module 199 | Input: 200 | xyz1: (batch_size, ndataset1, 3) TF tensor 201 | xyz2: (batch_size, ndataset2, 3) TF tensor, sparser than xyz1 202 | points1: (batch_size, ndataset1, nchannel1) TF tensor 203 | points2: (batch_size, ndataset2, nchannel2) TF tensor 204 | mlp: list of int32 -- output size for MLP on each point 205 | Return: 206 | new_points: (batch_size, ndataset1, mlp[-1]) TF tensor 207 | ''' 208 | with tf.variable_scope(scope) as sc: 209 | dist, idx = three_nn(xyz1, xyz2) 210 | dist = tf.maximum(dist, 1e-10) 211 | norm = tf.reduce_sum((1.0/dist),axis=2,keep_dims=True) 212 | norm = tf.tile(norm,[1,1,3]) 213 | weight = (1.0/dist) / norm 214 | interpolated_points = three_interpolate(points2, idx, weight) 215 | 216 | if points1 is not None: 217 | new_points1 = tf.concat(axis=2, values=[interpolated_points, points1]) # B,ndataset1,nchannel1+nchannel2 218 | else: 219 | new_points1 = interpolated_points 220 | new_points1 = tf.expand_dims(new_points1, 2) 221 | for i, num_out_channel in enumerate(mlp): 222 | new_points1 = tf_util.conv2d(new_points1, num_out_channel, [1,1], 223 | padding='VALID', stride=[1,1], 224 | bn=bn, is_training=is_training, 225 | scope='conv_%d'%(i), bn_decay=bn_decay) 226 | new_points1 = tf.squeeze(new_points1, [2]) # B,ndataset1,mlp[-1] 227 | return new_points1 228 | -------------------------------------------------------------------------------- /provider.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import h5py 5 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 6 | sys.path.append(BASE_DIR) 7 | 8 | def shuffle_data(data, labels): 9 | """ Shuffle data and labels. 10 | Input: 11 | data: B,N,... numpy array 12 | label: B,... numpy array 13 | Return: 14 | shuffled data, label and shuffle indices 15 | """ 16 | idx = np.arange(len(labels)) 17 | np.random.shuffle(idx) 18 | return data[idx, ...], labels[idx], idx 19 | 20 | def shuffle_points(batch_data): 21 | """ Shuffle orders of points in each point cloud -- changes FPS behavior. 22 | Use the same shuffling idx for the entire batch. 23 | Input: 24 | BxNxC array 25 | Output: 26 | BxNxC array 27 | """ 28 | idx = np.arange(batch_data.shape[1]) 29 | np.random.shuffle(idx) 30 | return batch_data[:,idx,:] 31 | 32 | def rotate_point_cloud(batch_data): 33 | """ Randomly rotate the point clouds to augument the dataset 34 | rotation is per shape based along up direction 35 | Input: 36 | BxNx3 array, original batch of point clouds 37 | Return: 38 | BxNx3 array, rotated batch of point clouds 39 | """ 40 | rotated_data = np.zeros(batch_data.shape, dtype=np.float32) 41 | for k in xrange(batch_data.shape[0]): 42 | rotation_angle = np.random.uniform() * 2 * np.pi 43 | cosval = np.cos(rotation_angle) 44 | sinval = np.sin(rotation_angle) 45 | rotation_matrix = np.array([[cosval, 0, sinval], 46 | [0, 1, 0], 47 | [-sinval, 0, cosval]]) 48 | shape_pc = batch_data[k, ...] 49 | rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix) 50 | return rotated_data 51 | 52 | def rotate_point_cloud_z(batch_data): 53 | """ Randomly rotate the point clouds to augument the dataset 54 | rotation is per shape based along up direction 55 | Input: 56 | BxNx3 array, original batch of point clouds 57 | Return: 58 | BxNx3 array, rotated batch of point clouds 59 | """ 60 | rotated_data = np.zeros(batch_data.shape, dtype=np.float32) 61 | for k in xrange(batch_data.shape[0]): 62 | rotation_angle = np.random.uniform() * 2 * np.pi 63 | cosval = np.cos(rotation_angle) 64 | sinval = np.sin(rotation_angle) 65 | rotation_matrix = np.array([[cosval, sinval, 0], 66 | [-sinval, cosval, 0], 67 | [0, 0, 1]]) 68 | shape_pc = batch_data[k, ...] 69 | rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix) 70 | return rotated_data 71 | 72 | def rotate_point_cloud_with_normal(batch_xyz_normal): 73 | ''' Randomly rotate XYZ, normal point cloud. 74 | Input: 75 | batch_xyz_normal: B,N,6, first three channels are XYZ, last 3 all normal 76 | Output: 77 | B,N,6, rotated XYZ, normal point cloud 78 | ''' 79 | for k in xrange(batch_xyz_normal.shape[0]): 80 | rotation_angle = np.random.uniform() * 2 * np.pi 81 | cosval = np.cos(rotation_angle) 82 | sinval = np.sin(rotation_angle) 83 | rotation_matrix = np.array([[cosval, 0, sinval], 84 | [0, 1, 0], 85 | [-sinval, 0, cosval]]) 86 | shape_pc = batch_xyz_normal[k,:,0:3] 87 | shape_normal = batch_xyz_normal[k,:,3:6] 88 | batch_xyz_normal[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix) 89 | batch_xyz_normal[k,:,3:6] = np.dot(shape_normal.reshape((-1, 3)), rotation_matrix) 90 | return batch_xyz_normal 91 | 92 | def rotate_perturbation_point_cloud_with_normal(batch_data, angle_sigma=0.06, angle_clip=0.18): 93 | """ Randomly perturb the point clouds by small rotations 94 | Input: 95 | BxNx6 array, original batch of point clouds and point normals 96 | Return: 97 | BxNx3 array, rotated batch of point clouds 98 | """ 99 | rotated_data = np.zeros(batch_data.shape, dtype=np.float32) 100 | for k in xrange(batch_data.shape[0]): 101 | angles = np.clip(angle_sigma*np.random.randn(3), -angle_clip, angle_clip) 102 | Rx = np.array([[1,0,0], 103 | [0,np.cos(angles[0]),-np.sin(angles[0])], 104 | [0,np.sin(angles[0]),np.cos(angles[0])]]) 105 | Ry = np.array([[np.cos(angles[1]),0,np.sin(angles[1])], 106 | [0,1,0], 107 | [-np.sin(angles[1]),0,np.cos(angles[1])]]) 108 | Rz = np.array([[np.cos(angles[2]),-np.sin(angles[2]),0], 109 | [np.sin(angles[2]),np.cos(angles[2]),0], 110 | [0,0,1]]) 111 | R = np.dot(Rz, np.dot(Ry,Rx)) 112 | shape_pc = batch_data[k,:,0:3] 113 | shape_normal = batch_data[k,:,3:6] 114 | rotated_data[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), R) 115 | rotated_data[k,:,3:6] = np.dot(shape_normal.reshape((-1, 3)), R) 116 | return rotated_data 117 | 118 | 119 | def rotate_point_cloud_by_angle(batch_data, rotation_angle): 120 | """ Rotate the point cloud along up direction with certain angle. 121 | Input: 122 | BxNx3 array, original batch of point clouds 123 | Return: 124 | BxNx3 array, rotated batch of point clouds 125 | """ 126 | rotated_data = np.zeros(batch_data.shape, dtype=np.float32) 127 | for k in xrange(batch_data.shape[0]): 128 | #rotation_angle = np.random.uniform() * 2 * np.pi 129 | cosval = np.cos(rotation_angle) 130 | sinval = np.sin(rotation_angle) 131 | rotation_matrix = np.array([[cosval, 0, sinval], 132 | [0, 1, 0], 133 | [-sinval, 0, cosval]]) 134 | shape_pc = batch_data[k,:,0:3] 135 | rotated_data[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix) 136 | return rotated_data 137 | 138 | def rotate_point_cloud_by_angle_with_normal(batch_data, rotation_angle): 139 | """ Rotate the point cloud along up direction with certain angle. 140 | Input: 141 | BxNx6 array, original batch of point clouds with normal 142 | scalar, angle of rotation 143 | Return: 144 | BxNx6 array, rotated batch of point clouds iwth normal 145 | """ 146 | rotated_data = np.zeros(batch_data.shape, dtype=np.float32) 147 | for k in xrange(batch_data.shape[0]): 148 | #rotation_angle = np.random.uniform() * 2 * np.pi 149 | cosval = np.cos(rotation_angle) 150 | sinval = np.sin(rotation_angle) 151 | rotation_matrix = np.array([[cosval, 0, sinval], 152 | [0, 1, 0], 153 | [-sinval, 0, cosval]]) 154 | shape_pc = batch_data[k,:,0:3] 155 | shape_normal = batch_data[k,:,3:6] 156 | rotated_data[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix) 157 | rotated_data[k,:,3:6] = np.dot(shape_normal.reshape((-1,3)), rotation_matrix) 158 | return rotated_data 159 | 160 | 161 | 162 | def rotate_perturbation_point_cloud(batch_data, angle_sigma=0.06, angle_clip=0.18): 163 | """ Randomly perturb the point clouds by small rotations 164 | Input: 165 | BxNx3 array, original batch of point clouds 166 | Return: 167 | BxNx3 array, rotated batch of point clouds 168 | """ 169 | rotated_data = np.zeros(batch_data.shape, dtype=np.float32) 170 | for k in xrange(batch_data.shape[0]): 171 | angles = np.clip(angle_sigma*np.random.randn(3), -angle_clip, angle_clip) 172 | Rx = np.array([[1,0,0], 173 | [0,np.cos(angles[0]),-np.sin(angles[0])], 174 | [0,np.sin(angles[0]),np.cos(angles[0])]]) 175 | Ry = np.array([[np.cos(angles[1]),0,np.sin(angles[1])], 176 | [0,1,0], 177 | [-np.sin(angles[1]),0,np.cos(angles[1])]]) 178 | Rz = np.array([[np.cos(angles[2]),-np.sin(angles[2]),0], 179 | [np.sin(angles[2]),np.cos(angles[2]),0], 180 | [0,0,1]]) 181 | R = np.dot(Rz, np.dot(Ry,Rx)) 182 | shape_pc = batch_data[k, ...] 183 | rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), R) 184 | return rotated_data 185 | 186 | 187 | def jitter_point_cloud(batch_data, sigma=0.01, clip=0.05): 188 | """ Randomly jitter points. jittering is per point. 189 | Input: 190 | BxNx3 array, original batch of point clouds 191 | Return: 192 | BxNx3 array, jittered batch of point clouds 193 | """ 194 | B, N, C = batch_data.shape 195 | assert(clip > 0) 196 | jittered_data = np.clip(sigma * np.random.randn(B, N, C), -1*clip, clip) 197 | jittered_data += batch_data 198 | return jittered_data 199 | 200 | def shift_point_cloud(batch_data, shift_range=0.1): 201 | """ Randomly shift point cloud. Shift is per point cloud. 202 | Input: 203 | BxNx3 array, original batch of point clouds 204 | Return: 205 | BxNx3 array, shifted batch of point clouds 206 | """ 207 | B, N, C = batch_data.shape 208 | shifts = np.random.uniform(-shift_range, shift_range, (B,3)) 209 | for batch_index in range(B): 210 | batch_data[batch_index,:,:] += shifts[batch_index,:] 211 | return batch_data 212 | 213 | 214 | def random_scale_point_cloud(batch_data, scale_low=0.8, scale_high=1.25): 215 | """ Randomly scale the point cloud. Scale is per point cloud. 216 | Input: 217 | BxNx3 array, original batch of point clouds 218 | Return: 219 | BxNx3 array, scaled batch of point clouds 220 | """ 221 | B, N, C = batch_data.shape 222 | scales = np.random.uniform(scale_low, scale_high, B) 223 | for batch_index in range(B): 224 | batch_data[batch_index,:,:] *= scales[batch_index] 225 | return batch_data 226 | 227 | def random_point_dropout(batch_pc, max_dropout_ratio=0.875): 228 | ''' batch_pc: BxNx3 ''' 229 | for b in range(batch_pc.shape[0]): 230 | dropout_ratio = np.random.random()*max_dropout_ratio # 0~0.875 231 | drop_idx = np.where(np.random.random((batch_pc.shape[1]))<=dropout_ratio)[0] 232 | if len(drop_idx)>0: 233 | batch_pc[b,drop_idx,:] = batch_pc[b,0,:] # set to the first point 234 | return batch_pc 235 | 236 | 237 | def getDataFiles(list_filename): 238 | return [line.rstrip() for line in open(list_filename)] 239 | 240 | def load_h5(h5_filename): 241 | f = h5py.File(h5_filename) 242 | data = f['data'][:] 243 | label = f['label'][:] 244 | return (data, label) 245 | 246 | def loadDataFile(filename): 247 | return load_h5(filename) 248 | -------------------------------------------------------------------------------- /result.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zaiweizhang/path_invariance_map_network/0b561ec97d01331f5b61e4e3b405316edbb9219b/result.JPG -------------------------------------------------------------------------------- /scannet_dataset.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os 3 | import sys 4 | import numpy as np 5 | import pc_util 6 | import scene_util 7 | 8 | class ScannetDataset(): 9 | def __init__(self, root, npoints=8192, split='train'): 10 | self.npoints = npoints 11 | self.root = root 12 | self.split = split 13 | if split == 'train': 14 | self.data_filename = os.path.join(self.root, 'scannet_%s_100.pickle'%(split)) 15 | else: 16 | self.data_filename = os.path.join(self.root, 'scannet_%s.pickle'%(split)) 17 | with open(self.data_filename,'rb') as fp: 18 | self.scene_points_list = pickle.load(fp) 19 | self.semantic_labels_list = pickle.load(fp) 20 | if split=='train': 21 | labelweights = np.zeros(21) 22 | for seg in self.semantic_labels_list: 23 | tmp,_ = np.histogram(seg,range(22)) 24 | labelweights += tmp 25 | labelweights = labelweights.astype(np.float32) 26 | labelweights = labelweights/np.sum(labelweights) 27 | self.labelweights = 1/np.log(1.2+labelweights) 28 | elif split=='test': 29 | self.labelweights = np.ones(21) 30 | def __getitem__(self, index): 31 | point_set = self.scene_points_list[index] 32 | semantic_seg = self.semantic_labels_list[index].astype(np.int32) 33 | coordmax = np.max(point_set,axis=0) 34 | coordmin = np.min(point_set,axis=0) 35 | smpmin = np.maximum(coordmax-[1.5,1.5,3.0], coordmin) 36 | smpmin[2] = coordmin[2] 37 | smpsz = np.minimum(coordmax-smpmin,[1.5,1.5,3.0]) 38 | smpsz[2] = coordmax[2]-coordmin[2] 39 | isvalid = False 40 | for i in range(10): 41 | curcenter = point_set[np.random.choice(len(semantic_seg),1)[0],:] 42 | curmin = curcenter-[0.75,0.75,1.5] 43 | curmax = curcenter+[0.75,0.75,1.5] 44 | curmin[2] = coordmin[2] 45 | curmax[2] = coordmax[2] 46 | curchoice = np.sum((point_set>=(curmin-0.2))*(point_set<=(curmax+0.2)),axis=1)==3 47 | cur_point_set = point_set[curchoice,:] 48 | cur_semantic_seg = semantic_seg[curchoice] 49 | if len(cur_semantic_seg)==0: 50 | continue 51 | mask = np.sum((cur_point_set>=(curmin-0.01))*(cur_point_set<=(curmax+0.01)),axis=1)==3 52 | vidx = np.ceil((cur_point_set[mask,:]-curmin)/(curmax-curmin)*[31.0,31.0,62.0]) 53 | vidx = np.unique(vidx[:,0]*31.0*62.0+vidx[:,1]*62.0+vidx[:,2]) 54 | isvalid = np.sum(cur_semantic_seg>0)/len(cur_semantic_seg)>=0.7 and len(vidx)/31.0/31.0/62.0>=0.02 55 | if isvalid: 56 | break 57 | choice = np.random.choice(len(cur_semantic_seg), self.npoints, replace=True) 58 | point_set = cur_point_set[choice,:] 59 | semantic_seg = cur_semantic_seg[choice] 60 | mask = mask[choice] 61 | sample_weight = self.labelweights[semantic_seg] 62 | sample_weight *= mask 63 | return point_set, semantic_seg, sample_weight 64 | def __len__(self): 65 | return len(self.scene_points_list) 66 | 67 | class ScannetDatasetWholeScene(): 68 | def __init__(self, root, npoints=8192, split='train'): 69 | self.npoints = npoints 70 | self.root = root 71 | self.split = split 72 | self.data_filename = os.path.join(self.root, 'scannet_%s.pickle'%(split)) 73 | with open(self.data_filename,'rb') as fp: 74 | self.scene_points_list = pickle.load(fp) 75 | self.semantic_labels_list = pickle.load(fp) 76 | if split=='train': 77 | labelweights = np.zeros(21) 78 | for seg in self.semantic_labels_list: 79 | tmp,_ = np.histogram(seg,range(22)) 80 | labelweights += tmp 81 | labelweights = labelweights.astype(np.float32) 82 | labelweights = labelweights/np.sum(labelweights) 83 | self.labelweights = 1/np.log(1.2+labelweights) 84 | elif split=='test': 85 | self.labelweights = np.ones(21) 86 | def __getitem__(self, index): 87 | point_set_ini = self.scene_points_list[index] 88 | semantic_seg_ini = self.semantic_labels_list[index].astype(np.int32) 89 | coordmax = np.max(point_set_ini,axis=0) 90 | coordmin = np.min(point_set_ini,axis=0) 91 | nsubvolume_x = np.ceil((coordmax[0]-coordmin[0])/1.5).astype(np.int32) 92 | nsubvolume_y = np.ceil((coordmax[1]-coordmin[1])/1.5).astype(np.int32) 93 | point_sets = list() 94 | semantic_segs = list() 95 | sample_weights = list() 96 | isvalid = False 97 | for i in range(nsubvolume_x): 98 | for j in range(nsubvolume_y): 99 | curmin = coordmin+[i*1.5,j*1.5,0] 100 | curmax = coordmin+[(i+1)*1.5,(j+1)*1.5,coordmax[2]-coordmin[2]] 101 | curchoice = np.sum((point_set_ini>=(curmin-0.2))*(point_set_ini<=(curmax+0.2)),axis=1)==3 102 | cur_point_set = point_set_ini[curchoice,:] 103 | cur_semantic_seg = semantic_seg_ini[curchoice] 104 | if len(cur_semantic_seg)==0: 105 | continue 106 | mask = np.sum((cur_point_set>=(curmin-0.001))*(cur_point_set<=(curmax+0.001)),axis=1)==3 107 | choice = np.random.choice(len(cur_semantic_seg), self.npoints, replace=True) 108 | point_set = cur_point_set[choice,:] # Nx3 109 | semantic_seg = cur_semantic_seg[choice] # N 110 | mask = mask[choice] 111 | if sum(mask)/float(len(mask))<0.01: 112 | continue 113 | sample_weight = self.labelweights[semantic_seg] 114 | sample_weight *= mask # N 115 | point_sets.append(np.expand_dims(point_set,0)) # 1xNx3 116 | semantic_segs.append(np.expand_dims(semantic_seg,0)) # 1xN 117 | sample_weights.append(np.expand_dims(sample_weight,0)) # 1xN 118 | point_sets = np.concatenate(tuple(point_sets),axis=0) 119 | semantic_segs = np.concatenate(tuple(semantic_segs),axis=0) 120 | sample_weights = np.concatenate(tuple(sample_weights),axis=0) 121 | return point_sets, semantic_segs, sample_weights 122 | def __len__(self): 123 | return len(self.scene_points_list) 124 | 125 | if __name__=='__main__': 126 | d = ScannetDatasetWholeScene(root = './data', split='test', npoints=8192) 127 | labelweights_vox = np.zeros(21) 128 | for ii in xrange(len(d)): 129 | print ii 130 | ps,seg,smpw = d[ii] 131 | for b in xrange(ps.shape[0]): 132 | _, uvlabel, _ = pc_util.point_cloud_label_to_surface_voxel_label_fast(ps[b,smpw[b,:]>0,:], seg[b,smpw[b,:]>0], res=0.02) 133 | tmp,_ = np.histogram(uvlabel,range(22)) 134 | labelweights_vox += tmp 135 | print labelweights_vox[1:].astype(np.float32)/np.sum(labelweights_vox[1:].astype(np.float32)) 136 | exit() 137 | 138 | 139 | -------------------------------------------------------------------------------- /scannet_dataset_multi.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os 3 | import sys 4 | import numpy as np 5 | import pc_util 6 | import scene_util 7 | from sklearn.cluster import KMeans 8 | 9 | class ScannetDataset(): 10 | def __init__(self, root, npoints=[4096, 8192, 12288], split='train'): 11 | self.npoints = npoints 12 | self.root = root 13 | self.split = split 14 | if split == 'train': 15 | self.data_filename = os.path.join(self.root, 'scannet_%s_100.pickle'%(split)) 16 | else: 17 | self.data_filename = os.path.join(self.root, 'scannet_%s.pickle'%(split)) 18 | with open(self.data_filename,'rb') as fp: 19 | self.scene_points_list = pickle.load(fp) 20 | self.semantic_labels_list = pickle.load(fp) 21 | if split=='train': 22 | labelweights = np.zeros(21) 23 | for seg in self.semantic_labels_list: 24 | tmp,_ = np.histogram(seg,range(22)) 25 | labelweights += tmp 26 | labelweights = labelweights.astype(np.float32) 27 | labelweights = labelweights/np.sum(labelweights) 28 | self.labelweights = 1/np.log(1.2+labelweights) 29 | elif split=='test': 30 | self.labelweights = np.ones(21) 31 | 32 | def __getitem__(self, index): 33 | point_set = self.scene_points_list[index] 34 | semantic_seg = self.semantic_labels_list[index].astype(np.int32) 35 | coordmax = np.max(point_set,axis=0) 36 | coordmin = np.min(point_set,axis=0) 37 | smpmin = np.maximum(coordmax-[1.5,1.5,3.0], coordmin) 38 | smpmin[2] = coordmin[2] 39 | smpsz = np.minimum(coordmax-smpmin,[1.5,1.5,3.0]) 40 | smpsz[2] = coordmax[2]-coordmin[2] 41 | isvalid = False 42 | for i in range(10): 43 | curcenter = point_set[np.random.choice(len(semantic_seg),1)[0],:] 44 | curmin = curcenter-[0.75,0.75,1.5] 45 | curmax = curcenter+[0.75,0.75,1.5] 46 | curmin[2] = coordmin[2] 47 | curmax[2] = coordmax[2] 48 | curchoice = np.sum((point_set>=(curmin-0.2))*(point_set<=(curmax+0.2)),axis=1)==3 49 | cur_point_set = point_set[curchoice,:] 50 | cur_semantic_seg = semantic_seg[curchoice] 51 | if len(cur_semantic_seg)==0: 52 | continue 53 | mask = np.sum((cur_point_set>=(curmin-0.01))*(cur_point_set<=(curmax+0.01)),axis=1)==3 54 | vidx = np.ceil((cur_point_set[mask,:]-curmin)/(curmax-curmin)*[31.0,31.0,62.0]) 55 | vidx = np.unique(vidx[:,0]*31.0*62.0+vidx[:,1]*62.0+vidx[:,2]) 56 | isvalid = np.sum(cur_semantic_seg>0)/len(cur_semantic_seg)>=0.7 and len(vidx)/31.0/31.0/62.0>=0.02 57 | if isvalid: 58 | break 59 | 60 | point_set = [] 61 | semantic_seg = [] 62 | masks = [] 63 | choice1 = np.random.choice(len(cur_point_set), self.npoints[0], replace=True) 64 | choice2 = np.random.choice(len(cur_point_set), self.npoints[1], replace=True) 65 | choice3 = np.random.choice(len(cur_point_set), self.npoints[2], replace=True) 66 | pc1 = cur_point_set[choice1,:].copy() 67 | sem1 = cur_semantic_seg[choice1].copy() 68 | mask1 = mask[choice1].copy() 69 | pc2 = cur_point_set[choice2,:].copy() 70 | sem2 = cur_semantic_seg[choice2].copy() 71 | mask2 = mask[choice2].copy() 72 | pc3 = cur_point_set[choice3,:].copy() 73 | sem3 = cur_semantic_seg[choice3].copy() 74 | mask3 = mask[choice3].copy() 75 | 76 | ### getting samples from other point clouds densities 77 | ### Optimized based on surface variation (https://lgg.epfl.ch/publications/2003/pauly_2003_MFE.pdf) 78 | hm = pc_util.sample_multi(np.squeeze(pc3)) 79 | #pc_util.write_ply_color_multic(np.squeeze(pc3), (hm-0.1)/0.9*0.7, "test.ply") ### can be used for visualization 80 | idx = np.argsort(hm) 81 | ### Take the last 20 points 82 | sal_points_frompc3 = pc3[idx[-20:], ...] 83 | kmeans = KMeans(n_clusters=3, random_state=0).fit(sal_points_frompc3) 84 | maxlabel = np.argmax(np.bincount(kmeans.labels_)) 85 | curcenter = kmeans.cluster_centers_[maxlabel,:] 86 | curmin = curcenter-[0.75*0.88,0.75*0.88,1.5*0.88] 87 | curmax = curcenter+[0.75*0.88,0.75*0.88,1.5*0.88] 88 | curmin[2] = coordmin[2] 89 | curmax[2] = coordmax[2] 90 | curchoicepc3 = np.sum((pc3>=(curmin-0.1))*(pc3<=(curmax+0.1)),axis=1)==3 91 | pc3_selected = pc3[curchoicepc3,...].copy() 92 | sem3_selected = sem3[curchoicepc3,...].copy() 93 | mask3_selected = mask3[curchoicepc3,...].copy() 94 | 95 | curmin = curcenter-[0.75*0.70,0.75*0.70,1.5*0.70] 96 | curmax = curcenter+[0.75*0.70,0.75*0.70,1.5*0.70] 97 | curmin[2] = coordmin[2] 98 | curmax[2] = coordmax[2] 99 | curchoicepc3 = np.sum((pc3>=(curmin-0.1))*(pc3<=(curmax+0.1)),axis=1)==3 100 | pc3_selected_f = pc3[curchoicepc3,...].copy() 101 | sem3_selected_f = sem3[curchoicepc3,...].copy() 102 | mask3_selected_f = mask3[curchoicepc3,...].copy() 103 | 104 | data_idx1 = np.random.choice(len(np.squeeze(pc3_selected_f)), self.npoints[0], replace=True) 105 | data_idx2 = np.random.choice(len(np.squeeze(pc3_selected)), self.npoints[1], replace=True) 106 | pc1_fromPC3 = pc3_selected_f[data_idx1,:].copy() 107 | sem1_fromPC3 = sem3_selected_f[data_idx1].copy() 108 | mask1_fromPC3 = mask3_selected_f[data_idx1].copy() 109 | 110 | pc2_fromPC3 = pc3_selected[data_idx2,:].copy() 111 | sem2_fromPC3 = sem3_selected[data_idx2].copy() 112 | mask2_fromPC3 = mask3_selected[data_idx2].copy() 113 | 114 | ### pcII to pcIII 115 | hm = pc_util.sample_multi(np.squeeze(pc2)) 116 | idx = np.argsort(hm) 117 | ### Take the last 20 points 118 | sal_points_frompc2 = pc2[idx[-20:], ...] 119 | kmeans = KMeans(n_clusters=3, random_state=0).fit(sal_points_frompc2) 120 | maxlabel = np.argmax(np.bincount(kmeans.labels_)) 121 | curcenter = kmeans.cluster_centers_[maxlabel,:] 122 | curmin = curcenter-[0.75*0.79,0.75*0.79,1.5*0.79] 123 | curmax = curcenter+[0.75*0.79,0.75*0.79,1.5*0.79] 124 | curmin[2] = coordmin[2] 125 | curmax[2] = coordmax[2] 126 | curchoicepc2 = np.sum((pc2>=(curmin-0.1))*(pc2<=(curmax+0.1)),axis=1)==3 127 | pc2_selected = pc2[curchoicepc2,...].copy() 128 | sem2_selected = sem2[curchoicepc2,...].copy() 129 | mask2_selected = mask2[curchoicepc2,...].copy() 130 | 131 | data_idx = np.random.choice(len(np.squeeze(pc2_selected)), self.npoints[0], replace=True) 132 | pc1_fromPC2 = pc2_selected[data_idx,:].copy() 133 | sem1_fromPC2 = sem2_selected[data_idx].copy() 134 | mask1_fromPC2 = mask2_selected[data_idx].copy() 135 | 136 | point_set = [pc1_fromPC2, pc1_fromPC3, pc1, pc2_fromPC3, pc2, pc3] 137 | sems = [sem1_fromPC2, sem1_fromPC3, sem1, sem2_fromPC3, sem2, sem3] 138 | masks = [self.labelweights[sems[0]]*mask1_fromPC2, self.labelweights[sems[1]]*mask1_fromPC3, self.labelweights[sems[2]]*mask1, self.labelweights[sems[3]]*mask2_fromPC3, self.labelweights[sems[4]]*mask2, self.labelweights[sems[5]]*mask3] 139 | return point_set, sems, masks 140 | 141 | def __len__(self): 142 | return len(self.scene_points_list) 143 | 144 | class ScannetDatasetWholeScene(): 145 | def __init__(self, root, npoints=8192, split='train'): 146 | self.npoints = npoints 147 | self.root = root 148 | self.split = split 149 | self.data_filename = os.path.join(self.root, 'scannet_%s.pickle'%(split)) 150 | with open(self.data_filename,'rb') as fp: 151 | self.scene_points_list = pickle.load(fp) 152 | self.semantic_labels_list = pickle.load(fp) 153 | if split=='train': 154 | labelweights = np.zeros(21) 155 | for seg in self.semantic_labels_list: 156 | tmp,_ = np.histogram(seg,range(22)) 157 | labelweights += tmp 158 | labelweights = labelweights.astype(np.float32) 159 | labelweights = labelweights/np.sum(labelweights) 160 | self.labelweights = 1/np.log(1.2+labelweights) 161 | elif split=='test': 162 | self.labelweights = np.ones(21) 163 | def __getitem__(self, index): 164 | point_set_ini = self.scene_points_list[index] 165 | semantic_seg_ini = self.semantic_labels_list[index].astype(np.int32) 166 | coordmax = np.max(point_set_ini,axis=0) 167 | coordmin = np.min(point_set_ini,axis=0) 168 | nsubvolume_x = np.ceil((coordmax[0]-coordmin[0])/1.5).astype(np.int32) 169 | nsubvolume_y = np.ceil((coordmax[1]-coordmin[1])/1.5).astype(np.int32) 170 | point_sets = list() 171 | semantic_segs = list() 172 | sample_weights = list() 173 | isvalid = False 174 | for i in range(nsubvolume_x): 175 | for j in range(nsubvolume_y): 176 | curmin = coordmin+[i*1.5,j*1.5,0] 177 | curmax = coordmin+[(i+1)*1.5,(j+1)*1.5,coordmax[2]-coordmin[2]] 178 | curchoice = np.sum((point_set_ini>=(curmin-0.2))*(point_set_ini<=(curmax+0.2)),axis=1)==3 179 | cur_point_set = point_set_ini[curchoice,:] 180 | cur_semantic_seg = semantic_seg_ini[curchoice] 181 | if len(cur_semantic_seg)==0: 182 | continue 183 | mask = np.sum((cur_point_set>=(curmin-0.001))*(cur_point_set<=(curmax+0.001)),axis=1)==3 184 | choice = np.random.choice(len(cur_semantic_seg), self.npoints, replace=True) 185 | point_set = cur_point_set[choice,:] # Nx3 186 | semantic_seg = cur_semantic_seg[choice] # N 187 | mask = mask[choice] 188 | if sum(mask)/float(len(mask))<0.01: 189 | continue 190 | sample_weight = self.labelweights[semantic_seg] 191 | sample_weight *= mask # N 192 | point_sets.append(np.expand_dims(point_set,0)) # 1xNx3 193 | semantic_segs.append(np.expand_dims(semantic_seg,0)) # 1xN 194 | sample_weights.append(np.expand_dims(sample_weight,0)) # 1xN 195 | point_sets = np.concatenate(tuple(point_sets),axis=0) 196 | semantic_segs = np.concatenate(tuple(semantic_segs),axis=0) 197 | sample_weights = np.concatenate(tuple(sample_weights),axis=0) 198 | return point_sets, semantic_segs, sample_weights 199 | def __len__(self): 200 | return len(self.scene_points_list) 201 | 202 | if __name__=='__main__': 203 | d = ScannetDatasetWholeScene(root = './data', split='test', npoints=8192) 204 | labelweights_vox = np.zeros(21) 205 | for ii in xrange(len(d)): 206 | print ii 207 | ps,seg,smpw = d[ii] 208 | for b in xrange(ps.shape[0]): 209 | _, uvlabel, _ = pc_util.point_cloud_label_to_surface_voxel_label_fast(ps[b,smpw[b,:]>0,:], seg[b,smpw[b,:]>0], res=0.02) 210 | tmp,_ = np.histogram(uvlabel,range(22)) 211 | labelweights_vox += tmp 212 | print labelweights_vox[1:].astype(np.float32)/np.sum(labelweights_vox[1:].astype(np.float32)) 213 | exit() 214 | 215 | 216 | -------------------------------------------------------------------------------- /scene_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 4 | sys.path.append(BASE_DIR) 5 | 6 | import numpy as np 7 | from sklearn.neighbors import NearestNeighbors 8 | from numpy import linalg as la 9 | import scipy.io as sio 10 | 11 | def cart2sph(xyz): 12 | xy = xyz[:,0]**2+xyz[:,1]**2 13 | aer = np.zeros(xyz.shape) 14 | aer[:,2] = np.sqrt(xy+xyz[:,2]**2) 15 | aer[:,1] = np.arctan2(xyz[:,2],np.sqrt(xy)) 16 | aer[:,0] = np.arctan2(xyz[:,1],xyz[:,0]) 17 | return aer 18 | 19 | # generate virtual scan of a scene by subsampling the point cloud 20 | def virtual_scan(xyz, mode=-1): 21 | camloc = np.mean(xyz,axis=0) 22 | camloc[2] = 1.5 # human height 23 | if mode==-1: 24 | view_dr = np.array([2*np.pi*np.random.random(), np.pi/10*(np.random.random()-0.75)]) 25 | camloc[:2] -= (0.8+0.7*np.random.random())*np.array([np.cos(view_dr[0]),np.sin(view_dr[0])]) 26 | else: 27 | view_dr = np.array([np.pi/4*mode, 0]) 28 | camloc[:2] -= np.array([np.cos(view_dr[0]),np.sin(view_dr[0])]) 29 | ct_ray_dr = np.array([np.cos(view_dr[1])*np.cos(view_dr[0]), np.cos(view_dr[1])*np.sin(view_dr[0]), np.sin(view_dr[1])]) 30 | hr_dr = np.cross(ct_ray_dr, np.array([0,0,1])) 31 | hr_dr /= la.norm(hr_dr) 32 | vt_dr = np.cross(hr_dr, ct_ray_dr) 33 | vt_dr /= la.norm(vt_dr) 34 | xx = np.linspace(-0.6,0.6,200) #200 35 | yy = np.linspace(-0.45,0.45,150) #150 36 | xx, yy = np.meshgrid(xx,yy) 37 | xx = xx.reshape(-1,1) 38 | yy = yy.reshape(-1,1) 39 | rays = xx*hr_dr.reshape(1,-1)+yy*vt_dr.reshape(1,-1)+ct_ray_dr.reshape(1,-1) 40 | rays_aer = cart2sph(rays) 41 | local_xyz = xyz-camloc.reshape(1,-1) 42 | local_aer = cart2sph(local_xyz) 43 | nbrs = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(rays_aer[:,:2]) 44 | mindd, minidx = nbrs.kneighbors(local_aer[:,:2]) 45 | mindd = mindd.reshape(-1) 46 | minidx = minidx.reshape(-1) 47 | 48 | sub_idx = mindd<0.01 49 | if sum(sub_idx)<100: 50 | return np.ones(0) 51 | sub_r = local_aer[sub_idx,2] 52 | sub_minidx = minidx[sub_idx] 53 | min_r = float('inf')*np.ones(np.max(sub_minidx)+1) 54 | for i in xrange(len(sub_r)): 55 | if sub_r[i]min_r[sub_minidx[i]]: 60 | sub_smpidx[i] = 0 61 | smpidx = np.where(sub_idx)[0] 62 | smpidx = smpidx[sub_smpidx==1] 63 | return smpidx 64 | 65 | if __name__=='__main__': 66 | pc = np.load('scannet_dataset/scannet_scenes/scene0015_00.npy') 67 | print pc.shape 68 | xyz = pc[:,:3] 69 | seg = pc[:,7] 70 | smpidx = virtual_scan(xyz,mode=2) 71 | xyz = xyz[smpidx,:] 72 | seg = seg[smpidx] 73 | sio.savemat('tmp.mat',{'pc':xyz,'seg':seg}) 74 | -------------------------------------------------------------------------------- /suncg_dataset_multi.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os 3 | import os.path 4 | import sys 5 | import numpy as np 6 | import pc_util 7 | import scene_util 8 | import scipy.io as sio 9 | import time 10 | from multiprocessing import Process, Manager 11 | from sklearn.neighbors import KDTree 12 | from sklearn.cluster import KMeans 13 | 14 | NUM_REPE = 6 15 | SUBSET = [0, 1, 3] 16 | 17 | ### Get the consistent labels across different dimensions 18 | def get_vote_parallel(data, pcndex, batch_data, pred_val, voxeldata1, voxeldata2): 19 | newpred = [] 20 | for i in range(NUM_REPE): ## 7 point representation 21 | newpred.append(np.zeros(pred_val[i].shape)) 22 | ### We have 9 representations 23 | for i in range(batch_data[0].shape[0]): 24 | result_pred = np.zeros([batch_data[-1].shape[1], NUM_REPE+2]) 25 | result_pred[:,-3] = pred_val[-1][i,:] ## 12288 preds 26 | result_pred[:,-2] = voxeldata1[i,:] ## voxel preds 27 | result_pred[:,-1] = voxeldata2[i,:] ## voxel preds 28 | pc_maps = [] 29 | pc12288_tree = KDTree(batch_data[-1][i,:,:], leaf_size=100) 30 | for j in range(NUM_REPE-1): 31 | if j in SUBSET: 32 | ### Sub sampled pc based on surface varation 33 | result_pred[:,j] = (j+22)#use large class to remove the contribution 34 | idx = np.squeeze(pc12288_tree.query(batch_data[j][i,:,:], k=1)[1]) 35 | pc_map = {i:idx[i] for i in range(len(batch_data[j][i,:,:]))} 36 | result_pred[idx,j] = pred_val[j][i,:] 37 | else: 38 | pc_tree = KDTree(batch_data[j][i,:,:], leaf_size=100) 39 | idx = np.squeeze(pc_tree.query(batch_data[-1][i,:,:],k=1)[1]) 40 | result_pred[:,j] = pred_val[j][i,idx] 41 | idx = np.squeeze(pc12288_tree.query(batch_data[j][i,:,:], k=1)[1]) 42 | pc_map = {i:idx[i] for i in range(len(batch_data[j][i,:,:]))} 43 | pc_maps.append(pc_map) 44 | 45 | ### Get the popular vote here 46 | axis = 1 47 | u, indices = np.unique(result_pred, return_inverse=True) 48 | voted_pred = u[np.argmax(np.apply_along_axis(np.bincount, axis, indices.reshape(result_pred.shape), None, np.max(indices) + 1), axis=axis)] 49 | 50 | newpred[-1][i,:] = voted_pred 51 | for j in range(NUM_REPE-1): 52 | for k in range(len(batch_data[j][i,:,:])): 53 | newpred[j][i,k] = voted_pred[pc_maps[j][k]] 54 | data[pcndex] = newpred 55 | 56 | class SuncgDataset(): 57 | def __init__(self, root, batch_size=8, npoints=[4096, 8192, 12288], split='train', rep="pc"): 58 | self.npoints = npoints 59 | self.root = root 60 | self.split = split 61 | self.rep = rep 62 | self.nrep = [3, 2, 1] 63 | self.batch_size = batch_size 64 | self.data_filename = os.path.join(self.root, 'scannet_train_unlabel.pickle') 65 | with open(self.data_filename,'rb') as fp: 66 | self.scene_list = pickle.load(fp) 67 | 68 | self.train_idxs = np.arange(0, len(self.scene_list)) 69 | np.random.shuffle(self.train_idxs) 70 | self.num_batches = len(self.scene_list)/self.batch_size 71 | self.batch_idx = 0 72 | self.epoch_idx = 0 73 | if split=='train': 74 | self.labelweights = np.ones(21) 75 | 76 | def get_next(self): 77 | if self.batch_idx >= self.num_batches: 78 | self.batch_idx = 0 79 | np.random.shuffle(self.train_idxs) 80 | 81 | batch_data = [] 82 | mask = [] 83 | for i in range(len(self.npoints)): 84 | for j in range(self.nrep[i]): 85 | batch_data.append(np.zeros((self.batch_size, self.npoints[i], 3))) 86 | mask.append(np.zeros((self.batch_size, self.npoints[i]))) 87 | start_idx = self.batch_idx * self.batch_size 88 | for i in range(self.batch_size): 89 | ps, smpw = self.getitem(self.train_idxs[i+start_idx]) 90 | counter = 0 91 | for j in range(len(self.npoints)): 92 | for k in range(self.nrep[j]): 93 | batch_data[counter][i,...] = ps[counter] 94 | mask[counter][i,:] = smpw[counter] 95 | 96 | ### Add the drop point as training 97 | dropout_ratio = np.random.random()*0.875 # 0-0.875 98 | drop_idx = np.where(np.random.random((ps[counter].shape[0]))<=dropout_ratio)[0] 99 | batch_data[counter][i,drop_idx,:] = batch_data[counter][i,0,:] 100 | mask[counter][i,drop_idx] *= 0 101 | counter += 1 102 | self.batch_idx += 1 103 | return batch_data, mask 104 | 105 | def getitem(self, index): 106 | point_set = self.scene_list[index] 107 | coordmax = np.max(point_set,axis=0) 108 | coordmin = np.min(point_set,axis=0) 109 | isvalid = False 110 | curcenter = point_set[np.random.choice(len(point_set),1)[0],:] 111 | curmin = curcenter-[0.75,0.75,1.5] 112 | curmax = curcenter+[0.75,0.75,1.5] 113 | curmin[2] = coordmin[2] 114 | curmax[2] = coordmax[2] 115 | curchoice = np.sum((point_set>=(curmin-0.2))*(point_set<=(curmax+0.2)),axis=1)==3 116 | cur_point_set = point_set[curchoice,:] 117 | mask = np.sum((cur_point_set>=(curmin-0.01))*(cur_point_set<=(curmax+0.01)),axis=1)==3 118 | point_set = [] 119 | masks = [] 120 | choice1 = np.random.choice(len(cur_point_set), self.npoints[0], replace=True) 121 | choice2 = np.random.choice(len(cur_point_set), self.npoints[1], replace=True) 122 | choice3 = np.random.choice(len(cur_point_set), self.npoints[2], replace=True) 123 | pc1 = cur_point_set[choice1,:].copy() 124 | mask1 = mask[choice1].copy() 125 | pc2 = cur_point_set[choice2,:].copy() 126 | mask2 = mask[choice2].copy() 127 | pc3 = cur_point_set[choice3,:].copy() 128 | mask3 = mask[choice3].copy() 129 | 130 | ### getting samples from other point clouds densities 131 | ### Optimized based on surface variation (https://lgg.epfl.ch/publications/2003/pauly_2003_MFE.pdf) 132 | hm = pc_util.sample_multi(np.squeeze(pc3)) 133 | idx = np.argsort(hm) 134 | ### Take the last 20 points 135 | sal_points_frompc3 = pc3[idx[-20:], ...] 136 | kmeans = KMeans(n_clusters=3, random_state=0).fit(sal_points_frompc3) 137 | maxlabel = np.argmax(np.bincount(kmeans.labels_)) 138 | curcenter = kmeans.cluster_centers_[maxlabel,:] 139 | curmin = curcenter-[0.75*0.88,0.75*0.88,1.5*0.88] 140 | curmax = curcenter+[0.75*0.88,0.75*0.88,1.5*0.88] 141 | curmin[2] = coordmin[2] 142 | curmax[2] = coordmax[2] 143 | curchoicepc3 = np.sum((pc3>=(curmin-0.1))*(pc3<=(curmax+0.1)),axis=1)==3 144 | pc3_selected = pc3[curchoicepc3,...].copy() 145 | mask3_selected = mask3[curchoicepc3,...].copy() 146 | 147 | curmin = curcenter-[0.75*0.70,0.75*0.70,1.5*0.70] 148 | curmax = curcenter+[0.75*0.70,0.75*0.70,1.5*0.70] 149 | curmin[2] = coordmin[2] 150 | curmax[2] = coordmax[2] 151 | curchoicepc3 = np.sum((pc3>=(curmin-0.1))*(pc3<=(curmax+0.1)),axis=1)==3 152 | pc3_selected_f = pc3[curchoicepc3,...].copy() 153 | mask3_selected_f = mask3[curchoicepc3,...].copy() 154 | 155 | data_idx1 = np.random.choice(len(np.squeeze(pc3_selected_f)), self.npoints[0], replace=True) 156 | data_idx2 = np.random.choice(len(np.squeeze(pc3_selected)), self.npoints[1], replace=True) 157 | pc1_fromPC3 = pc3_selected_f[data_idx1,:].copy() 158 | mask1_fromPC3 = mask3_selected_f[data_idx1].copy() 159 | 160 | pc2_fromPC3 = pc3_selected[data_idx2,:].copy() 161 | mask2_fromPC3 = mask3_selected[data_idx2].copy() 162 | 163 | hm = pc_util.sample_multi(np.squeeze(pc2)) 164 | idx = np.argsort(hm) 165 | ### Take the last 20 points 166 | sal_points_frompc2 = pc2[idx[-20:], ...] 167 | kmeans = KMeans(n_clusters=3, random_state=0).fit(sal_points_frompc2) 168 | maxlabel = np.argmax(np.bincount(kmeans.labels_)) 169 | curcenter = kmeans.cluster_centers_[maxlabel,:] 170 | curmin = curcenter-[0.75*0.79,0.75*0.79,1.5*0.79] 171 | curmax = curcenter+[0.75*0.79,0.75*0.79,1.5*0.79] 172 | curmin[2] = coordmin[2] 173 | curmax[2] = coordmax[2] 174 | curchoicepc2 = np.sum((pc2>=(curmin-0.1))*(pc2<=(curmax+0.1)),axis=1)==3 175 | pc2_selected = pc2[curchoicepc2,...].copy() 176 | mask2_selected = mask2[curchoicepc2,...].copy() 177 | 178 | data_idx = np.random.choice(len(np.squeeze(pc2_selected)), self.npoints[0], replace=True) 179 | pc1_fromPC2 = pc2_selected[data_idx,:].copy() 180 | mask1_fromPC2 = mask2_selected[data_idx].copy() 181 | 182 | point_set = [pc1_fromPC2, pc1_fromPC3, pc1, pc2_fromPC3, pc2, pc3] 183 | masks = [mask1_fromPC2, mask1_fromPC3, mask1, mask2_fromPC3, mask2, mask3] 184 | return point_set, masks 185 | 186 | def __len__(self): 187 | return len(self.scene_list) 188 | 189 | def wait_other(self): 190 | sid = self.rep[5] 191 | while (not os.path.exists("pc_data"+sid+".mat")): 192 | pass 193 | time.sleep(1) ## Wait for data to be written 194 | inmat = sio.loadmat("pc_data"+sid+".mat") 195 | data = inmat['batch_data'] 196 | smpw = inmat['batch_smpw'] 197 | os.remove("pc_data"+sid+".mat") 198 | return data, smpw 199 | 200 | def check_gone(self, batch_data, batch_smpw): 201 | sio.savemat("pc_data1.mat", {"batch_data":batch_data, "batch_smpw": batch_smpw}) 202 | sio.savemat("pc_data2.mat", {"batch_data":batch_data, "batch_smpw": batch_smpw}) 203 | while (os.path.exists("pc_data1.mat")) or (os.path.exists("pc_data2.mat")) : 204 | pass 205 | return 206 | 207 | def get_vote_multi(self, bdata, bpred, vdata, vdata2): 208 | ### Do multi-threading here to reduce time 209 | numP = bdata[0].shape[0] 210 | result = [] 211 | proc = [] 212 | stored = Manager().dict() 213 | 214 | for i in range(numP): 215 | newbdata = [np.expand_dims(bdata[j][i,...], 0) for j in range(NUM_REPE)] 216 | newbpred = [np.expand_dims(bpred[j][i,...], 0) for j in range(NUM_REPE)] 217 | newvdata = np.expand_dims(vdata[i,...], 0) 218 | newvdata2 = np.expand_dims(vdata2[i,...], 0) 219 | p = Process(target=get_vote_parallel, args=(stored, i, newbdata, newbpred, newvdata, newvdata2)) 220 | p.start() 221 | proc.append(p) 222 | 223 | for p in proc: 224 | p.join() 225 | 226 | for ndex in sorted(stored.keys()): 227 | result.append(stored[ndex]) 228 | 229 | reps = [] 230 | for i in range(NUM_REPE): 231 | reps.append([]) 232 | for i in range(numP): 233 | for j in range(NUM_REPE): 234 | reps[j].append(result[i][j]) 235 | result = [np.concatenate(reps[i], 0) for i in range(NUM_REPE)] 236 | return result 237 | 238 | def ready(self, batch_data, pred_val, mask, label_weights): 239 | if "voxel" in self.rep: 240 | sid = self.rep[5] 241 | sio.savemat(self.rep+".mat", {"batch_data":batch_data, "pred_val":pred_val}) 242 | while (not os.path.exists("pc"+sid+".mat")): 243 | pass 244 | time.sleep(1) ## Wait for data to be written 245 | newdata = sio.loadmat("pc"+sid+".mat") 246 | os.remove("pc"+sid+".mat") 247 | return newdata["batch_data"], newdata["pred_val"], label_weights[newdata["pred_val"].astype(np.int32)]*mask 248 | elif self.rep == "pc": 249 | while (not os.path.exists("voxel1.mat")) or (not os.path.exists("voxel2.mat")): 250 | pass 251 | time.sleep(1) ## Wait for data to be written 252 | voxeldata1 = sio.loadmat("voxel1.mat") 253 | os.remove("voxel1.mat") 254 | voxeldata2 = sio.loadmat("voxel2.mat") 255 | os.remove("voxel2.mat") 256 | newpred = self.get_vote_multi(batch_data, pred_val, voxeldata1["pred_val"], voxeldata2["pred_val"]) 257 | 258 | ## Save voted data to file 259 | sio.savemat(self.rep+"1.mat", {"batch_data":voxeldata1["batch_data"], "pred_val":newpred[-1]}) 260 | sio.savemat(self.rep+"2.mat", {"batch_data":voxeldata2["batch_data"], "pred_val":newpred[-1]}) 261 | smpws = [] 262 | counter = 0 263 | for i in range(len(self.npoints)): 264 | for j in range(self.nrep[i]): 265 | smpws.append(label_weights[newpred[counter].astype(np.int32)]*mask[counter]) 266 | counter += 1 267 | return batch_data, newpred, smpws 268 | else: 269 | print ("only support voxel or pc right now") 270 | sys.exit(0) 271 | 272 | -------------------------------------------------------------------------------- /tf_util.py: -------------------------------------------------------------------------------- 1 | """ Wrapper functions for TensorFlow layers. 2 | 3 | Author: Charles R. Qi 4 | Date: November 2017 5 | """ 6 | 7 | import numpy as np 8 | import tensorflow as tf 9 | 10 | def _variable_on_cpu(name, shape, initializer, use_fp16=False): 11 | """Helper to create a Variable stored on CPU memory. 12 | Args: 13 | name: name of the variable 14 | shape: list of ints 15 | initializer: initializer for Variable 16 | Returns: 17 | Variable Tensor 18 | """ 19 | with tf.device("/cpu:0"): 20 | dtype = tf.float16 if use_fp16 else tf.float32 21 | var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype) 22 | return var 23 | 24 | def _variable_with_weight_decay(name, shape, stddev, wd, use_xavier=True): 25 | """Helper to create an initialized Variable with weight decay. 26 | 27 | Note that the Variable is initialized with a truncated normal distribution. 28 | A weight decay is added only if one is specified. 29 | 30 | Args: 31 | name: name of the variable 32 | shape: list of ints 33 | stddev: standard deviation of a truncated Gaussian 34 | wd: add L2Loss weight decay multiplied by this float. If None, weight 35 | decay is not added for this Variable. 36 | use_xavier: bool, whether to use xavier initializer 37 | 38 | Returns: 39 | Variable Tensor 40 | """ 41 | if use_xavier: 42 | initializer = tf.contrib.layers.xavier_initializer() 43 | else: 44 | initializer = tf.truncated_normal_initializer(stddev=stddev) 45 | var = _variable_on_cpu(name, shape, initializer) 46 | if wd is not None: 47 | weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss') 48 | tf.add_to_collection('losses', weight_decay) 49 | return var 50 | 51 | 52 | def conv1d(inputs, 53 | num_output_channels, 54 | kernel_size, 55 | scope, 56 | stride=1, 57 | padding='SAME', 58 | data_format='NHWC', 59 | use_xavier=True, 60 | stddev=1e-3, 61 | weight_decay=None, 62 | activation_fn=tf.nn.relu, 63 | bn=False, 64 | bn_decay=None, 65 | is_training=None): 66 | """ 1D convolution with non-linear operation. 67 | 68 | Args: 69 | inputs: 3-D tensor variable BxLxC 70 | num_output_channels: int 71 | kernel_size: int 72 | scope: string 73 | stride: int 74 | padding: 'SAME' or 'VALID' 75 | data_format: 'NHWC' or 'NCHW' 76 | use_xavier: bool, use xavier_initializer if true 77 | stddev: float, stddev for truncated_normal init 78 | weight_decay: float 79 | activation_fn: function 80 | bn: bool, whether to use batch norm 81 | bn_decay: float or float tensor variable in [0,1] 82 | is_training: bool Tensor variable 83 | 84 | Returns: 85 | Variable tensor 86 | """ 87 | with tf.variable_scope(scope) as sc: 88 | assert(data_format=='NHWC' or data_format=='NCHW') 89 | if data_format == 'NHWC': 90 | num_in_channels = inputs.get_shape()[-1].value 91 | elif data_format=='NCHW': 92 | num_in_channels = inputs.get_shape()[1].value 93 | kernel_shape = [kernel_size, 94 | num_in_channels, num_output_channels] 95 | kernel = _variable_with_weight_decay('weights', 96 | shape=kernel_shape, 97 | use_xavier=use_xavier, 98 | stddev=stddev, 99 | wd=weight_decay) 100 | outputs = tf.nn.conv1d(inputs, kernel, 101 | stride=stride, 102 | padding=padding, 103 | data_format=data_format) 104 | biases = _variable_on_cpu('biases', [num_output_channels], 105 | tf.constant_initializer(0.0)) 106 | outputs = tf.nn.bias_add(outputs, biases, data_format=data_format) 107 | 108 | if bn: 109 | outputs = batch_norm_for_conv1d(outputs, is_training, 110 | bn_decay=bn_decay, scope='bn', 111 | data_format=data_format) 112 | 113 | if activation_fn is not None: 114 | outputs = activation_fn(outputs) 115 | return outputs 116 | 117 | 118 | 119 | 120 | def conv2d(inputs, 121 | num_output_channels, 122 | kernel_size, 123 | scope, 124 | stride=[1, 1], 125 | padding='SAME', 126 | data_format='NHWC', 127 | use_xavier=True, 128 | stddev=1e-3, 129 | weight_decay=None, 130 | activation_fn=tf.nn.relu, 131 | bn=False, 132 | bn_decay=None, 133 | is_training=None): 134 | """ 2D convolution with non-linear operation. 135 | 136 | Args: 137 | inputs: 4-D tensor variable BxHxWxC 138 | num_output_channels: int 139 | kernel_size: a list of 2 ints 140 | scope: string 141 | stride: a list of 2 ints 142 | padding: 'SAME' or 'VALID' 143 | data_format: 'NHWC' or 'NCHW' 144 | use_xavier: bool, use xavier_initializer if true 145 | stddev: float, stddev for truncated_normal init 146 | weight_decay: float 147 | activation_fn: function 148 | bn: bool, whether to use batch norm 149 | bn_decay: float or float tensor variable in [0,1] 150 | is_training: bool Tensor variable 151 | 152 | Returns: 153 | Variable tensor 154 | """ 155 | with tf.variable_scope(scope) as sc: 156 | kernel_h, kernel_w = kernel_size 157 | assert(data_format=='NHWC' or data_format=='NCHW') 158 | if data_format == 'NHWC': 159 | num_in_channels = inputs.get_shape()[-1].value 160 | elif data_format=='NCHW': 161 | num_in_channels = inputs.get_shape()[1].value 162 | kernel_shape = [kernel_h, kernel_w, 163 | num_in_channels, num_output_channels] 164 | kernel = _variable_with_weight_decay('weights', 165 | shape=kernel_shape, 166 | use_xavier=use_xavier, 167 | stddev=stddev, 168 | wd=weight_decay) 169 | stride_h, stride_w = stride 170 | outputs = tf.nn.conv2d(inputs, kernel, 171 | [1, stride_h, stride_w, 1], 172 | padding=padding, 173 | data_format=data_format) 174 | biases = _variable_on_cpu('biases', [num_output_channels], 175 | tf.constant_initializer(0.0)) 176 | outputs = tf.nn.bias_add(outputs, biases, data_format=data_format) 177 | 178 | if bn: 179 | outputs = batch_norm_for_conv2d(outputs, is_training, 180 | bn_decay=bn_decay, scope='bn', 181 | data_format=data_format) 182 | 183 | if activation_fn is not None: 184 | outputs = activation_fn(outputs) 185 | return outputs 186 | 187 | 188 | def conv2d_transpose(inputs, 189 | num_output_channels, 190 | kernel_size, 191 | scope, 192 | stride=[1, 1], 193 | padding='SAME', 194 | use_xavier=True, 195 | stddev=1e-3, 196 | weight_decay=None, 197 | activation_fn=tf.nn.relu, 198 | bn=False, 199 | bn_decay=None, 200 | is_training=None): 201 | """ 2D convolution transpose with non-linear operation. 202 | 203 | Args: 204 | inputs: 4-D tensor variable BxHxWxC 205 | num_output_channels: int 206 | kernel_size: a list of 2 ints 207 | scope: string 208 | stride: a list of 2 ints 209 | padding: 'SAME' or 'VALID' 210 | use_xavier: bool, use xavier_initializer if true 211 | stddev: float, stddev for truncated_normal init 212 | weight_decay: float 213 | activation_fn: function 214 | bn: bool, whether to use batch norm 215 | bn_decay: float or float tensor variable in [0,1] 216 | is_training: bool Tensor variable 217 | 218 | Returns: 219 | Variable tensor 220 | 221 | Note: conv2d(conv2d_transpose(a, num_out, ksize, stride), a.shape[-1], ksize, stride) == a 222 | """ 223 | with tf.variable_scope(scope) as sc: 224 | kernel_h, kernel_w = kernel_size 225 | num_in_channels = inputs.get_shape()[-1].value 226 | kernel_shape = [kernel_h, kernel_w, 227 | num_output_channels, num_in_channels] # reversed to conv2d 228 | kernel = _variable_with_weight_decay('weights', 229 | shape=kernel_shape, 230 | use_xavier=use_xavier, 231 | stddev=stddev, 232 | wd=weight_decay) 233 | stride_h, stride_w = stride 234 | 235 | # from slim.convolution2d_transpose 236 | def get_deconv_dim(dim_size, stride_size, kernel_size, padding): 237 | dim_size *= stride_size 238 | 239 | if padding == 'VALID' and dim_size is not None: 240 | dim_size += max(kernel_size - stride_size, 0) 241 | return dim_size 242 | 243 | # caculate output shape 244 | batch_size = inputs.get_shape()[0].value 245 | height = inputs.get_shape()[1].value 246 | width = inputs.get_shape()[2].value 247 | out_height = get_deconv_dim(height, stride_h, kernel_h, padding) 248 | out_width = get_deconv_dim(width, stride_w, kernel_w, padding) 249 | output_shape = [batch_size, out_height, out_width, num_output_channels] 250 | 251 | outputs = tf.nn.conv2d_transpose(inputs, kernel, output_shape, 252 | [1, stride_h, stride_w, 1], 253 | padding=padding) 254 | biases = _variable_on_cpu('biases', [num_output_channels], 255 | tf.constant_initializer(0.0)) 256 | outputs = tf.nn.bias_add(outputs, biases) 257 | 258 | if bn: 259 | outputs = batch_norm_for_conv2d(outputs, is_training, 260 | bn_decay=bn_decay, scope='bn') 261 | 262 | if activation_fn is not None: 263 | outputs = activation_fn(outputs) 264 | return outputs 265 | 266 | 267 | 268 | def conv3d(inputs, 269 | num_output_channels, 270 | kernel_size, 271 | scope, 272 | stride=[1, 1, 1], 273 | padding='SAME', 274 | use_xavier=True, 275 | stddev=1e-3, 276 | weight_decay=None, 277 | activation_fn=tf.nn.relu, 278 | bn=False, 279 | bn_decay=None, 280 | is_training=None): 281 | """ 3D convolution with non-linear operation. 282 | 283 | Args: 284 | inputs: 5-D tensor variable BxDxHxWxC 285 | num_output_channels: int 286 | kernel_size: a list of 3 ints 287 | scope: string 288 | stride: a list of 3 ints 289 | padding: 'SAME' or 'VALID' 290 | use_xavier: bool, use xavier_initializer if true 291 | stddev: float, stddev for truncated_normal init 292 | weight_decay: float 293 | activation_fn: function 294 | bn: bool, whether to use batch norm 295 | bn_decay: float or float tensor variable in [0,1] 296 | is_training: bool Tensor variable 297 | 298 | Returns: 299 | Variable tensor 300 | """ 301 | with tf.variable_scope(scope) as sc: 302 | kernel_d, kernel_h, kernel_w = kernel_size 303 | num_in_channels = inputs.get_shape()[-1].value 304 | kernel_shape = [kernel_d, kernel_h, kernel_w, 305 | num_in_channels, num_output_channels] 306 | kernel = _variable_with_weight_decay('weights', 307 | shape=kernel_shape, 308 | use_xavier=use_xavier, 309 | stddev=stddev, 310 | wd=weight_decay) 311 | stride_d, stride_h, stride_w = stride 312 | outputs = tf.nn.conv3d(inputs, kernel, 313 | [1, stride_d, stride_h, stride_w, 1], 314 | padding=padding) 315 | biases = _variable_on_cpu('biases', [num_output_channels], 316 | tf.constant_initializer(0.0)) 317 | outputs = tf.nn.bias_add(outputs, biases) 318 | 319 | if bn: 320 | outputs = batch_norm_for_conv3d(outputs, is_training, 321 | bn_decay=bn_decay, scope='bn') 322 | 323 | if activation_fn is not None: 324 | outputs = activation_fn(outputs) 325 | return outputs 326 | 327 | def fully_connected(inputs, 328 | num_outputs, 329 | scope, 330 | use_xavier=True, 331 | stddev=1e-3, 332 | weight_decay=None, 333 | activation_fn=tf.nn.relu, 334 | bn=False, 335 | bn_decay=None, 336 | is_training=None): 337 | """ Fully connected layer with non-linear operation. 338 | 339 | Args: 340 | inputs: 2-D tensor BxN 341 | num_outputs: int 342 | 343 | Returns: 344 | Variable tensor of size B x num_outputs. 345 | """ 346 | with tf.variable_scope(scope) as sc: 347 | num_input_units = inputs.get_shape()[-1].value 348 | weights = _variable_with_weight_decay('weights', 349 | shape=[num_input_units, num_outputs], 350 | use_xavier=use_xavier, 351 | stddev=stddev, 352 | wd=weight_decay) 353 | outputs = tf.matmul(inputs, weights) 354 | biases = _variable_on_cpu('biases', [num_outputs], 355 | tf.constant_initializer(0.0)) 356 | outputs = tf.nn.bias_add(outputs, biases) 357 | 358 | if bn: 359 | outputs = batch_norm_for_fc(outputs, is_training, bn_decay, 'bn') 360 | 361 | if activation_fn is not None: 362 | outputs = activation_fn(outputs) 363 | return outputs 364 | 365 | 366 | def max_pool2d(inputs, 367 | kernel_size, 368 | scope, 369 | stride=[2, 2], 370 | padding='VALID'): 371 | """ 2D max pooling. 372 | 373 | Args: 374 | inputs: 4-D tensor BxHxWxC 375 | kernel_size: a list of 2 ints 376 | stride: a list of 2 ints 377 | 378 | Returns: 379 | Variable tensor 380 | """ 381 | with tf.variable_scope(scope) as sc: 382 | kernel_h, kernel_w = kernel_size 383 | stride_h, stride_w = stride 384 | outputs = tf.nn.max_pool(inputs, 385 | ksize=[1, kernel_h, kernel_w, 1], 386 | strides=[1, stride_h, stride_w, 1], 387 | padding=padding, 388 | name=sc.name) 389 | return outputs 390 | 391 | def avg_pool2d(inputs, 392 | kernel_size, 393 | scope, 394 | stride=[2, 2], 395 | padding='VALID'): 396 | """ 2D avg pooling. 397 | 398 | Args: 399 | inputs: 4-D tensor BxHxWxC 400 | kernel_size: a list of 2 ints 401 | stride: a list of 2 ints 402 | 403 | Returns: 404 | Variable tensor 405 | """ 406 | with tf.variable_scope(scope) as sc: 407 | kernel_h, kernel_w = kernel_size 408 | stride_h, stride_w = stride 409 | outputs = tf.nn.avg_pool(inputs, 410 | ksize=[1, kernel_h, kernel_w, 1], 411 | strides=[1, stride_h, stride_w, 1], 412 | padding=padding, 413 | name=sc.name) 414 | return outputs 415 | 416 | 417 | def max_pool3d(inputs, 418 | kernel_size, 419 | scope, 420 | stride=[2, 2, 2], 421 | padding='VALID'): 422 | """ 3D max pooling. 423 | 424 | Args: 425 | inputs: 5-D tensor BxDxHxWxC 426 | kernel_size: a list of 3 ints 427 | stride: a list of 3 ints 428 | 429 | Returns: 430 | Variable tensor 431 | """ 432 | with tf.variable_scope(scope) as sc: 433 | kernel_d, kernel_h, kernel_w = kernel_size 434 | stride_d, stride_h, stride_w = stride 435 | outputs = tf.nn.max_pool3d(inputs, 436 | ksize=[1, kernel_d, kernel_h, kernel_w, 1], 437 | strides=[1, stride_d, stride_h, stride_w, 1], 438 | padding=padding, 439 | name=sc.name) 440 | return outputs 441 | 442 | def avg_pool3d(inputs, 443 | kernel_size, 444 | scope, 445 | stride=[2, 2, 2], 446 | padding='VALID'): 447 | """ 3D avg pooling. 448 | 449 | Args: 450 | inputs: 5-D tensor BxDxHxWxC 451 | kernel_size: a list of 3 ints 452 | stride: a list of 3 ints 453 | 454 | Returns: 455 | Variable tensor 456 | """ 457 | with tf.variable_scope(scope) as sc: 458 | kernel_d, kernel_h, kernel_w = kernel_size 459 | stride_d, stride_h, stride_w = stride 460 | outputs = tf.nn.avg_pool3d(inputs, 461 | ksize=[1, kernel_d, kernel_h, kernel_w, 1], 462 | strides=[1, stride_d, stride_h, stride_w, 1], 463 | padding=padding, 464 | name=sc.name) 465 | return outputs 466 | 467 | 468 | def batch_norm_template_unused(inputs, is_training, scope, moments_dims, bn_decay): 469 | """ NOTE: this is older version of the util func. it is deprecated. 470 | Batch normalization on convolutional maps and beyond... 471 | Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow 472 | 473 | Args: 474 | inputs: Tensor, k-D input ... x C could be BC or BHWC or BDHWC 475 | is_training: boolean tf.Varialbe, true indicates training phase 476 | scope: string, variable scope 477 | moments_dims: a list of ints, indicating dimensions for moments calculation 478 | bn_decay: float or float tensor variable, controling moving average weight 479 | Return: 480 | normed: batch-normalized maps 481 | """ 482 | with tf.variable_scope(scope) as sc: 483 | num_channels = inputs.get_shape()[-1].value 484 | beta = _variable_on_cpu(name='beta',shape=[num_channels], 485 | initializer=tf.constant_initializer(0)) 486 | gamma = _variable_on_cpu(name='gamma',shape=[num_channels], 487 | initializer=tf.constant_initializer(1.0)) 488 | batch_mean, batch_var = tf.nn.moments(inputs, moments_dims, name='moments') 489 | decay = bn_decay if bn_decay is not None else 0.9 490 | ema = tf.train.ExponentialMovingAverage(decay=decay) 491 | # Operator that maintains moving averages of variables. 492 | # Need to set reuse=False, otherwise if reuse, will see moments_1/mean/ExponentialMovingAverage/ does not exist 493 | # https://github.com/shekkizh/WassersteinGAN.tensorflow/issues/3 494 | with tf.variable_scope(tf.get_variable_scope(), reuse=False): 495 | ema_apply_op = tf.cond(is_training, 496 | lambda: ema.apply([batch_mean, batch_var]), 497 | lambda: tf.no_op()) 498 | 499 | # Update moving average and return current batch's avg and var. 500 | def mean_var_with_update(): 501 | with tf.control_dependencies([ema_apply_op]): 502 | return tf.identity(batch_mean), tf.identity(batch_var) 503 | 504 | # ema.average returns the Variable holding the average of var. 505 | mean, var = tf.cond(is_training, 506 | mean_var_with_update, 507 | lambda: (ema.average(batch_mean), ema.average(batch_var))) 508 | normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, 1e-3) 509 | return normed 510 | 511 | 512 | def batch_norm_template(inputs, is_training, scope, moments_dims_unused, bn_decay, data_format='NHWC'): 513 | """ Batch normalization on convolutional maps and beyond... 514 | Ref.: http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow 515 | 516 | Args: 517 | inputs: Tensor, k-D input ... x C could be BC or BHWC or BDHWC 518 | is_training: boolean tf.Varialbe, true indicates training phase 519 | scope: string, variable scope 520 | moments_dims: a list of ints, indicating dimensions for moments calculation 521 | bn_decay: float or float tensor variable, controling moving average weight 522 | data_format: 'NHWC' or 'NCHW' 523 | Return: 524 | normed: batch-normalized maps 525 | """ 526 | bn_decay = bn_decay if bn_decay is not None else 0.9 527 | return tf.contrib.layers.batch_norm(inputs, 528 | center=True, scale=True, 529 | is_training=is_training, decay=bn_decay,updates_collections=None, 530 | scope=scope, 531 | data_format=data_format) 532 | 533 | 534 | def batch_norm_for_fc(inputs, is_training, bn_decay, scope): 535 | """ Batch normalization on FC data. 536 | 537 | Args: 538 | inputs: Tensor, 2D BxC input 539 | is_training: boolean tf.Varialbe, true indicates training phase 540 | bn_decay: float or float tensor variable, controling moving average weight 541 | scope: string, variable scope 542 | Return: 543 | normed: batch-normalized maps 544 | """ 545 | return batch_norm_template(inputs, is_training, scope, [0,], bn_decay) 546 | 547 | 548 | def batch_norm_for_conv1d(inputs, is_training, bn_decay, scope, data_format): 549 | """ Batch normalization on 1D convolutional maps. 550 | 551 | Args: 552 | inputs: Tensor, 3D BLC input maps 553 | is_training: boolean tf.Varialbe, true indicates training phase 554 | bn_decay: float or float tensor variable, controling moving average weight 555 | scope: string, variable scope 556 | data_format: 'NHWC' or 'NCHW' 557 | Return: 558 | normed: batch-normalized maps 559 | """ 560 | return batch_norm_template(inputs, is_training, scope, [0,1], bn_decay, data_format) 561 | 562 | 563 | 564 | 565 | def batch_norm_for_conv2d(inputs, is_training, bn_decay, scope, data_format): 566 | """ Batch normalization on 2D convolutional maps. 567 | 568 | Args: 569 | inputs: Tensor, 4D BHWC input maps 570 | is_training: boolean tf.Varialbe, true indicates training phase 571 | bn_decay: float or float tensor variable, controling moving average weight 572 | scope: string, variable scope 573 | data_format: 'NHWC' or 'NCHW' 574 | Return: 575 | normed: batch-normalized maps 576 | """ 577 | return batch_norm_template(inputs, is_training, scope, [0,1,2], bn_decay, data_format) 578 | 579 | 580 | def batch_norm_for_conv3d(inputs, is_training, bn_decay, scope): 581 | """ Batch normalization on 3D convolutional maps. 582 | 583 | Args: 584 | inputs: Tensor, 5D BDHWC input maps 585 | is_training: boolean tf.Varialbe, true indicates training phase 586 | bn_decay: float or float tensor variable, controling moving average weight 587 | scope: string, variable scope 588 | Return: 589 | normed: batch-normalized maps 590 | """ 591 | return batch_norm_template(inputs, is_training, scope, [0,1,2,3], bn_decay) 592 | 593 | 594 | def dropout(inputs, 595 | is_training, 596 | scope, 597 | keep_prob=0.5, 598 | noise_shape=None): 599 | """ Dropout layer. 600 | 601 | Args: 602 | inputs: tensor 603 | is_training: boolean tf.Variable 604 | scope: string 605 | keep_prob: float in [0,1] 606 | noise_shape: list of ints 607 | 608 | Returns: 609 | tensor variable 610 | """ 611 | with tf.variable_scope(scope) as sc: 612 | outputs = tf.cond(is_training, 613 | lambda: tf.nn.dropout(inputs, keep_prob, noise_shape), 614 | lambda: inputs) 615 | return outputs 616 | -------------------------------------------------------------------------------- /train_pc_joint_multi_combinesample_queue.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import math 3 | from datetime import datetime 4 | #import h5pyprovider 5 | import numpy as np 6 | import tensorflow as tf 7 | import socket 8 | import importlib 9 | import os 10 | import sys 11 | ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | sys.path.append(ROOT_DIR) # model 13 | import provider 14 | import tf_util 15 | import pc_util 16 | import scannet_dataset 17 | import scannet_dataset_multi 18 | #import suncg_dataset_multi 19 | import suncg_dataset_multi 20 | import math 21 | from multiprocessing import Process, Queue 22 | 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument('--gpu', type=int, default=0, help='GPU to use [default: GPU 0]') 25 | parser.add_argument('--model', default='pointnet2_sem_seg', help='Model name [default: model]') 26 | parser.add_argument('--log_dir', default='log_pc_joint/', help='Log dir [default: log]') 27 | ### Start from isolated model 28 | parser.add_argument('--restore_dir1', default='models/PCIII/best_model.ckpt', help='Restore dir [default: log]') 29 | parser.add_argument('--restore_dir2', default='models/PCII/best_model.ckpt', help='Restore dir [default: log]') 30 | parser.add_argument('--restore_dir3', default='models/PCI/best_model.ckpt', help='Restore dir [default: log]') 31 | parser.add_argument('--max_epoch', type=int, default=201*3, help='Epoch to run [default: 201]') 32 | parser.add_argument('--batch_size', type=int, default=16, help='Batch Size during training [default: 16]') 33 | parser.add_argument('--learning_rate', type=float, default=0.001, help='Initial learning rate [default: 0.001]') 34 | parser.add_argument('--momentum', type=float, default=0.9, help='Initial learning rate [default: 0.9]') 35 | parser.add_argument('--optimizer', default='adam', help='adam or momentum [default: adam]') 36 | parser.add_argument('--decay_step', type=int, default=200000, help='Decay step for lr decay [default: 200000]') 37 | parser.add_argument('--decay_rate', type=float, default=0.7, help='Decay rate for lr decay [default: 0.7]') 38 | FLAGS = parser.parse_args() 39 | 40 | EPOCH_CNT = 0 41 | BATCH_SIZE = FLAGS.batch_size 42 | NUM_POINT = [4096, 8192, 12288] 43 | NUM_POINT_MORE = [4096, 4096, 4096, 8192, 8192, 12288] 44 | NUM_REP = [3, 2, 1] 45 | NUM_PATH = 3 46 | MAX_EPOCH = FLAGS.max_epoch 47 | BASE_LEARNING_RATE = FLAGS.learning_rate 48 | GPU_INDEX = FLAGS.gpu 49 | MOMENTUM = FLAGS.momentum 50 | OPTIMIZER = FLAGS.optimizer 51 | DECAY_STEP = FLAGS.decay_step 52 | DECAY_RATE = FLAGS.decay_rate 53 | restore_dir = [FLAGS.restore_dir1, FLAGS.restore_dir2, FLAGS.restore_dir3] 54 | 55 | MODEL = importlib.import_module(FLAGS.model) # import network module 56 | MODEL_FILE = FLAGS.model+'.py' 57 | LOG_DIR = FLAGS.log_dir 58 | if not os.path.exists(LOG_DIR): os.mkdir(LOG_DIR) 59 | os.system('cp %s %s' % (MODEL_FILE, LOG_DIR)) # bkp of model def 60 | LOG_FOUT = open(os.path.join(LOG_DIR, 'log_train.txt'), 'w') 61 | LOG_FOUT.write(str(FLAGS)+'\n') 62 | 63 | BN_INIT_DECAY = 0.5 64 | BN_DECAY_DECAY_RATE = 0.5 65 | BN_DECAY_DECAY_STEP = float(DECAY_STEP) 66 | BN_DECAY_CLIP = 0.99 67 | 68 | HOSTNAME = socket.gethostname() 69 | 70 | NUM_CLASSES = 21 71 | 72 | # Shapenet official train/test split 73 | DATA_PATH = os.path.join(ROOT_DIR,'data','scannet_data_pointnet2') 74 | TRAIN_DATASET = scannet_dataset_multi.ScannetDataset(root=DATA_PATH, npoints=NUM_POINT, split='train') 75 | TEST_DATASET_WHOLE_SCENE = [] 76 | for i in range(len(NUM_POINT)): 77 | TEST_DATASET_WHOLE_SCENE.append(scannet_dataset.ScannetDatasetWholeScene(root=DATA_PATH, npoints=NUM_POINT[i], split='test')) 78 | SUNCG_DATASET = suncg_dataset_multi.SuncgDataset(root=DATA_PATH, npoints=NUM_POINT, split='train', batch_size=BATCH_SIZE//2) 79 | 80 | DATA_QUEUE = Queue(maxsize=100) 81 | DATA_QUEUE_SUN = Queue(maxsize=100) 82 | 83 | def data_producer_suncg(): 84 | while True: 85 | if DATA_QUEUE_SUN.qsize() < 100: 86 | data = SUNCG_DATASET.get_next() 87 | DATA_QUEUE_SUN.put(data) 88 | 89 | def data_producer(): 90 | num_batches = len(TRAIN_DATASET)/(BATCH_SIZE // 2) 91 | train_idxs = np.arange(0, len(TRAIN_DATASET)) 92 | np.random.shuffle(train_idxs) 93 | batch_idx = 0 94 | 95 | while True: 96 | if DATA_QUEUE.qsize() < 100: 97 | if batch_idx >= num_batches: 98 | batch_idx = 0 99 | train_idxs = np.arange(0, len(TRAIN_DATASET)) 100 | np.random.shuffle(train_idxs) 101 | start_idx = batch_idx * (BATCH_SIZE // 2) 102 | end_idx = (batch_idx+1) * (BATCH_SIZE // 2) 103 | data = get_batch_wdp(TRAIN_DATASET, train_idxs, start_idx, end_idx) 104 | DATA_QUEUE.put(data) 105 | batch_idx += 1 106 | 107 | def pc_normalize_batch(pc): 108 | bsize = pc.shape[0] 109 | newpc = [] 110 | for i in range(bsize): 111 | curpc = pc[i] 112 | centroid = np.mean(curpc, axis=0) 113 | curpc = curpc - centroid 114 | m = np.max(np.sqrt(np.sum(curpc**2, axis=1))) 115 | curpc = curpc / m 116 | newpc.append(curpc) 117 | return np.array(newpc) 118 | 119 | def log_string(out_str): 120 | LOG_FOUT.write(out_str+'\n') 121 | LOG_FOUT.flush() 122 | print(out_str) 123 | 124 | def get_learning_rate(batch): 125 | learning_rate = tf.train.exponential_decay( 126 | BASE_LEARNING_RATE, # Base learning rate. 127 | batch * BATCH_SIZE, # Current index into the dataset. 128 | DECAY_STEP, # Decay step. 129 | DECAY_RATE, # Decay rate. 130 | staircase=True) 131 | learing_rate = tf.maximum(learning_rate, 0.00001) # CLIP THE LEARNING RATE! 132 | return learning_rate 133 | 134 | def get_bn_decay(batch): 135 | bn_momentum = tf.train.exponential_decay( 136 | BN_INIT_DECAY, 137 | batch*BATCH_SIZE, 138 | BN_DECAY_DECAY_STEP, 139 | BN_DECAY_DECAY_RATE, 140 | staircase=True) 141 | bn_decay = tf.minimum(BN_DECAY_CLIP, 1 - bn_momentum) 142 | return bn_decay 143 | 144 | def train(): 145 | ### Start the data processing queue here 146 | scannet_p = Process(target=data_producer) 147 | scannet_p.start() 148 | print ("started scannet data processing process") 149 | suncg_p = Process(target=data_producer_suncg) 150 | suncg_p.start() 151 | print ("started suncg data processing process") 152 | 153 | with tf.Graph().as_default(): 154 | with tf.device('/cpu:0'): 155 | pointclouds_pl_4096, labels_pl_4096, smpws_pl_4096 = MODEL.placeholder_inputs(BATCH_SIZE, NUM_POINT[0], "4096") 156 | pointclouds_pl_8192, labels_pl_8192, smpws_pl_8192 = MODEL.placeholder_inputs(BATCH_SIZE, NUM_POINT[1], "8192") 157 | pointclouds_pl_12288, labels_pl_12288, smpws_pl_12288 = MODEL.placeholder_inputs(BATCH_SIZE, NUM_POINT[2], "12288") 158 | is_training_pl = tf.placeholder(tf.bool, shape=()) 159 | data_select = tf.placeholder(tf.int32, shape=()) 160 | pt = tf.cond(tf.less(data_select, 1), lambda: pointclouds_pl_4096, lambda: tf.cond(tf.less(data_select, 2), lambda: pointclouds_pl_8192, lambda: pointclouds_pl_12288)) 161 | label = tf.cond(tf.less(data_select, 1), lambda: labels_pl_4096, lambda: tf.cond(tf.less(data_select, 2), lambda: labels_pl_8192, lambda: labels_pl_12288)) 162 | smpw = tf.cond(tf.less(data_select, 1), lambda: smpws_pl_4096, lambda: tf.cond(tf.less(data_select, 2), lambda: smpws_pl_8192, lambda: smpws_pl_12288)) 163 | 164 | # Note the global_step=batch parameter to minimize. 165 | # That tells the optimizer to helpfully increment the 'batch' parameter 166 | # for you every time it trains. 167 | batch = tf.get_variable('batch', [], 168 | initializer=tf.constant_initializer(0), trainable=False) 169 | bn_decay = get_bn_decay(batch) 170 | tf.summary.scalar('bn_decay', bn_decay) 171 | 172 | # Set learning rate and optimizer 173 | learning_rate = get_learning_rate(batch) 174 | tf.summary.scalar('learning_rate', learning_rate) 175 | if OPTIMIZER == 'momentum': 176 | optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM) 177 | elif OPTIMIZER == 'adam': 178 | optimizer = tf.train.AdamOptimizer(learning_rate) 179 | 180 | # ------------------------------------------- 181 | # Get model and loss on multiple GPU devices 182 | # ------------------------------------------- 183 | # Allocating variables on CPU first will greatly accelerate multi-gpu training. 184 | # Ref: https://github.com/kuza55/keras-extras/issues/21 185 | print "--- Get model and loss" 186 | # Get model and loss 187 | pred, end_points = MODEL.get_model(pt, is_training_pl, NUM_CLASSES, bn_decay=bn_decay) 188 | 189 | with tf.variable_scope(tf.get_variable_scope(), reuse=True): 190 | with tf.device('/gpu:%d'%(0)), tf.name_scope('gpu_%d'%(0)) as scope: 191 | pred, end_points = MODEL.get_model(pt, is_training_pl, NUM_CLASSES, bn_decay=bn_decay) 192 | MODEL.get_loss(pred, label, smpw) 193 | losses = tf.get_collection('losses', scope) 194 | total_loss = tf.add_n(losses, name='total_loss') 195 | 196 | # Get training operator 197 | grads = optimizer.compute_gradients(total_loss) 198 | 199 | train_op = optimizer.apply_gradients(grads, global_step=batch) 200 | 201 | # Add ops to save and restore all the variables. 202 | saver = tf.train.Saver(max_to_keep=20) 203 | 204 | # Create a session 205 | config = tf.ConfigProto() 206 | config.gpu_options.allow_growth = True 207 | config.allow_soft_placement = True 208 | config.log_device_placement = False 209 | sess = tf.Session(config=config) 210 | 211 | # Add summary writers 212 | merged = tf.summary.merge_all() 213 | train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'), sess.graph) 214 | test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'), sess.graph) 215 | 216 | # Add the multi model part 217 | # claim just variables 218 | copy_to_model0_op = [] 219 | copy_from_model0_op = [] 220 | all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) 221 | print ("Shared variables") 222 | for i in range(1, NUM_PATH+1): ## 3 different pcs 223 | copy_from_model0_op_i = [] 224 | copy_to_model0_op_i = [] 225 | for var in all_vars: 226 | #new_var_name = var.name.replace('model0', 'model%d' % i) 227 | new_var_name = ("pc%d/" % i) + var.name 228 | if var in tf.trainable_variables(): 229 | trainable = True 230 | else: 231 | trainable = False 232 | new_var = tf.get_variable(new_var_name.split(':')[0], shape=var.shape, dtype=var.dtype, trainable=trainable) 233 | copy_from_model0_op_i.append(new_var.assign(var)) 234 | copy_to_model0_op_i.append(var.assign(new_var)) 235 | copy_to_model0_op.append(copy_to_model0_op_i) 236 | copy_from_model0_op.append(copy_from_model0_op_i) 237 | # Init variables 238 | init = tf.global_variables_initializer() 239 | sess.run(init) 240 | for i in range(len(NUM_POINT)): 241 | saver.restore(sess, restore_dir[i]) 242 | sess.run(copy_from_model0_op[i]) 243 | 244 | ops = {'pointclouds_pl1': pointclouds_pl_4096, 245 | 'labels_pl1': labels_pl_4096, 246 | 'smpws_pl1': smpws_pl_4096, 247 | 'pointclouds_pl2': pointclouds_pl_8192, 248 | 'labels_pl2': labels_pl_8192, 249 | 'smpws_pl2': smpws_pl_8192, 250 | 'pointclouds_pl3': pointclouds_pl_12288, 251 | 'labels_pl3': labels_pl_12288, 252 | 'smpws_pl3': smpws_pl_12288, 253 | 'data_select': data_select, 254 | 'is_training_pl': is_training_pl, 255 | 'pred': pred, 256 | 'loss': total_loss, 257 | 'train_op': train_op, 258 | 'merged': merged, 259 | 'step': batch, 260 | 'end_points': end_points} 261 | 262 | ### Check for evaluation 263 | ### Previous best models 264 | best_acc = [] 265 | for i in range(len(NUM_POINT)): 266 | print ("Testing pc with "+str(NUM_POINT[i])) 267 | sess.run(copy_to_model0_op[i]) 268 | best_acc.append(eval_whole_scene_one_epoch(sess, ops, test_writer, TEST_DATASET_WHOLE_SCENE[i], NUM_POINT[i], i)) 269 | 270 | for epoch in range(MAX_EPOCH): 271 | log_string('**** EPOCH %03d ****' % (epoch)) 272 | sys.stdout.flush() 273 | 274 | train_one_epoch(sess, ops, train_writer, copy_from_model0_op, copy_to_model0_op) 275 | 276 | ### Testing takes some time, start testing after 200 epoches 277 | if epoch > 200: 278 | if (epoch+1)%5==0: 279 | counter = 0 280 | for i in range(len(NUM_POINT)): 281 | print ("Testing pc with "+str(NUM_POINT[i])) 282 | sess.run(copy_to_model0_op[i]) 283 | acc = eval_whole_scene_one_epoch(sess, ops, test_writer, TEST_DATASET_WHOLE_SCENE[i], NUM_POINT[i], i) 284 | if acc > best_acc[i]: 285 | best_acc[i] = acc 286 | save_path = saver.save(sess, os.path.join(LOG_DIR+'model'+str(i+1), "best_model_for%01d_epoch_%03d.ckpt"%(i, epoch))) 287 | log_string("Model saved in file: %s" % save_path) 288 | else: 289 | if (epoch+1)%20==0: 290 | counter = 0 291 | for i in range(len(NUM_POINT)): 292 | print ("Testing pc with "+str(NUM_POINT[i])) 293 | sess.run(copy_to_model0_op[i]) 294 | acc = eval_whole_scene_one_epoch(sess, ops, test_writer, TEST_DATASET_WHOLE_SCENE[i], NUM_POINT[i], i) 295 | if acc > best_acc[i]: 296 | best_acc[i] = acc 297 | save_path = saver.save(sess, os.path.join(LOG_DIR+'model'+str(i+1), "best_model_for%01d_epoch_%03d.ckpt"%(i, epoch))) 298 | log_string("Model saved in file: %s" % save_path) 299 | 300 | # Save the variables to disk. 301 | if epoch % 10 == 0: 302 | for i in range(len(NUM_POINT)): 303 | sess.run(copy_to_model0_op[i]) 304 | save_path = saver.save(sess, os.path.join(LOG_DIR+'model'+str(i+1), "model_for%01d.ckpt"%i)) 305 | log_string("Model saved in file: %s" % save_path) 306 | 307 | ### Terminate the data preprocessing here 308 | scannet_p.terminate() 309 | suncg_p.terminate() 310 | 311 | def get_batch_wdp(dataset, idxs, start_idx, end_idx): 312 | bsize = end_idx-start_idx 313 | batch_data = [] 314 | batch_label = [] 315 | batch_smpw = [] 316 | 317 | for i in range(len(NUM_POINT)): 318 | for j in range(NUM_REP[i]): 319 | batch_data.append(np.zeros((bsize, NUM_POINT[i], 3))) 320 | batch_label.append(np.zeros((bsize, NUM_POINT[i]), dtype=np.int32)) 321 | batch_smpw.append(np.zeros((bsize, NUM_POINT[i]), dtype=np.float32)) 322 | for i in range(bsize): 323 | ps,seg,smpw = dataset[idxs[i+start_idx]] 324 | counter = 0 325 | for j in range(len(NUM_POINT)): 326 | for k in range(NUM_REP[j]): 327 | batch_data[counter][i,...] = ps[counter] 328 | batch_label[counter][i,:] = seg[counter] 329 | batch_smpw[counter][i,:] = smpw[counter] 330 | 331 | dropout_ratio = np.random.random()*0.875 # 0-0.875 332 | drop_idx = np.where(np.random.random((ps[counter].shape[0]))<=dropout_ratio)[0] 333 | batch_data[counter][i,drop_idx,:] = batch_data[counter][i,0,:] 334 | batch_label[counter][i,drop_idx] = batch_label[counter][i,0] 335 | batch_smpw[counter][i,drop_idx] *= 0 336 | counter += 1 337 | return batch_data, batch_label, batch_smpw 338 | 339 | def get_batch(dataset, idxs, start_idx, end_idx, num_point): 340 | bsize = end_idx-start_idx 341 | batch_data = np.zeros((bsize, num_point, 3)) 342 | batch_label = np.zeros((bsize, num_point), dtype=np.int32) 343 | batch_smpw = np.zeros((bsize, num_point), dtype=np.float32) 344 | for i in range(bsize): 345 | ps,seg,smpw = dataset[idxs[i+start_idx]] 346 | batch_data[i,...] = ps 347 | batch_label[i,:] = seg 348 | batch_smpw[i,:] = smpw 349 | return batch_data, batch_label, batch_smpw 350 | 351 | def add_empty(feed_dict, data_mode, ops): 352 | for i in range(len(NUM_POINT)): 353 | if i != data_mode: 354 | feed_dict[ops['pointclouds_pl'+str(i+1)]] = np.zeros([BATCH_SIZE, NUM_POINT[i], 3]) 355 | feed_dict[ops['labels_pl'+str(i+1)]] = np.zeros([BATCH_SIZE, NUM_POINT[i]]) 356 | feed_dict[ops['smpws_pl'+str(i+1)]] = np.zeros([BATCH_SIZE, NUM_POINT[i]]) 357 | return feed_dict 358 | 359 | def shuffle_batch(batch_data, idx): 360 | batch_data_temp = [] 361 | batch = [] 362 | for i in range(len(batch_data)): 363 | batch_data_temp.append(batch_data[i].copy()) 364 | for i in range(len(idx)): 365 | batch.append(batch_data_temp[idx[i]][i,...]) 366 | return np.stack(batch,0) 367 | 368 | def shuffle_data(batch_data): 369 | ### Shuffle 0 and 1, 2 and 3 370 | chunk = len(batch_data[0][0])//3+1 371 | idx = np.array([0]*chunk + [1]*chunk + [2]*(len(batch_data[0][0]) - 2*chunk)) 372 | np.random.shuffle(idx) 373 | 374 | for i in range(len(batch_data)): 375 | batch_data[i][0] = shuffle_batch([batch_data[i][0], batch_data[i][1], batch_data[i][2]], idx) 376 | 377 | #idx = np.arange(len(batch_data[0][0])*2) 378 | chunk = len(batch_data[0][0])//2 379 | idx = np.array([0]*chunk + [1]*chunk) 380 | np.random.shuffle(idx) 381 | 382 | for i in range(len(batch_data)): 383 | batch_data[i][3] = shuffle_batch([batch_data[i][3], batch_data[i][4]], idx) 384 | 385 | return batch_data 386 | 387 | def train_one_epoch(sess, ops, train_writer, copy_from_model0_op, copy_to_model0_op): 388 | """ ops: dict mapping from string to tf ops """ 389 | is_training = True 390 | 391 | # Shuffle train samples 392 | train_idxs = np.arange(0, len(TRAIN_DATASET)) 393 | np.random.shuffle(train_idxs) 394 | num_batches = len(TRAIN_DATASET)/(BATCH_SIZE // 2) 395 | 396 | log_string(str(datetime.now())) 397 | 398 | total_correct = 0 399 | total_seen = 0 400 | loss_sum = 0 401 | batch_idx_suncg = 0 402 | for batch_idx in range(num_batches): 403 | start_idx = batch_idx * (BATCH_SIZE // 2) 404 | end_idx = (batch_idx+1) * (BATCH_SIZE // 2) 405 | while DATA_QUEUE_SUN.empty(): 406 | pass 407 | temp_batch_data, batch_smpw = DATA_QUEUE_SUN.get() 408 | 409 | SUNCG_DATASET.check_gone(temp_batch_data[-1], batch_smpw[-1]) ## Only give 12288 points to voxel 410 | batch_data = [] 411 | for i in range(len(temp_batch_data)): 412 | batch_data.append(np.zeros((BATCH_SIZE, NUM_POINT_MORE[i], 3))) 413 | batch_data[i][0:BATCH_SIZE//2,:,:] = temp_batch_data[i] 414 | 415 | pred_val = [] 416 | counter = 0 417 | for i in range(len(NUM_POINT)): 418 | for j in range(NUM_REP[i]): 419 | sess.run(copy_to_model0_op[i]) 420 | feed_dict = {} 421 | feed_dict[ops['is_training_pl']] = False 422 | feed_dict[ops['data_select']] = i 423 | feed_dict[ops['pointclouds_pl'+str(i+1)]] = batch_data[counter] 424 | temp_pred_val = sess.run(ops['pred'], feed_dict=add_empty(feed_dict, i, ops)) 425 | pred_val.append(np.squeeze(np.argmax(temp_pred_val[0:BATCH_SIZE//2,...], 2))) 426 | counter += 1 427 | 428 | ### Combine with other sources here 429 | batch_data_extra, batch_label_extra, batch_smpw_extra = SUNCG_DATASET.ready(temp_batch_data, pred_val, batch_smpw, TRAIN_DATASET.labelweights) 430 | 431 | while DATA_QUEUE.empty(): 432 | pass 433 | batch_data, batch_label, batch_smpw = DATA_QUEUE.get() 434 | shuffled_data = shuffle_data([batch_data, batch_label, batch_smpw]) 435 | batch_data, batch_label, batch_smpw = shuffled_data[0], shuffled_data[1], shuffled_data[2] 436 | shuffled_data = shuffle_data([batch_data_extra, batch_label_extra, batch_smpw_extra]) 437 | batch_data_extra, batch_label_extra, batch_smpw_extra = shuffled_data[0], shuffled_data[1], shuffled_data[2] 438 | ### Combine data 439 | counter = 0 440 | for i in range(len(NUM_POINT)): 441 | for j in range(NUM_REP[i]): 442 | if j == 0: 443 | sess.run(copy_to_model0_op[i]) 444 | batch_data_temp = np.concatenate([batch_data[counter], batch_data_extra[counter]], 0) 445 | batch_label_temp = np.concatenate([batch_label[counter], batch_label_extra[counter]], 0) 446 | batch_smpw_temp = np.concatenate([batch_smpw[counter], batch_smpw_extra[counter]], 0) 447 | 448 | aug_data = provider.rotate_point_cloud_z(batch_data_temp) 449 | feed_dict = {ops['pointclouds_pl'+str(i+1)]: aug_data, 450 | ops['labels_pl'+str(i+1)]: batch_label_temp, 451 | ops['smpws_pl'+str(i+1)]:batch_smpw_temp, 452 | ops['data_select']:i, 453 | ops['is_training_pl']: is_training,} 454 | summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred']], feed_dict=add_empty(feed_dict, i, ops)) 455 | sess.run(copy_from_model0_op[i]) 456 | train_writer.add_summary(summary, step) 457 | pred_val = np.argmax(pred_val, 2) 458 | correct = np.sum(pred_val == batch_label_temp) 459 | total_correct += correct 460 | total_seen += (BATCH_SIZE*NUM_POINT[i]) 461 | loss_sum += loss_val 462 | counter += 1 463 | 464 | if (batch_idx+1)%10 == 0: 465 | log_string(' -- %03d / %03d --' % (batch_idx+1, num_batches)) 466 | log_string('mean loss: %f' % (loss_sum / 10.0 / float(len(NUM_POINT)))) 467 | log_string('accuracy: %f' % (total_correct / float(total_seen))) 468 | total_correct = 0 469 | total_seen = 0 470 | loss_sum = 0 471 | 472 | # evaluate on whole scenes to generate numbers provided in the paper (same evaluation with pointnet2) 473 | def eval_whole_scene_one_epoch(sess, ops, test_writer, test_dataset, num_point, mode): 474 | """ ops: dict mapping from string to tf ops """ 475 | global EPOCH_CNT 476 | is_training = False 477 | test_idxs = np.arange(0, len(test_dataset)) 478 | num_batches = len(test_dataset) 479 | 480 | total_correct = 0 481 | total_seen = 0 482 | loss_sum = 0 483 | total_seen_class = [0 for _ in range(NUM_CLASSES)] 484 | total_correct_class = [0 for _ in range(NUM_CLASSES)] 485 | 486 | total_correct_vox = 0 487 | total_seen_vox = 0 488 | total_seen_class_vox = [0 for _ in range(NUM_CLASSES)] 489 | total_correct_class_vox = [0 for _ in range(NUM_CLASSES)] 490 | 491 | log_string(str(datetime.now())) 492 | log_string('---- EPOCH %03d EVALUATION WHOLE SCENE----'%(EPOCH_CNT)) 493 | 494 | labelweights = np.zeros(21) 495 | labelweights_vox = np.zeros(21) 496 | is_continue_batch = False 497 | 498 | extra_batch_data = np.zeros((0,num_point,3)) 499 | extra_batch_label = np.zeros((0,num_point)) 500 | extra_batch_smpw = np.zeros((0,num_point)) 501 | for batch_idx in range(num_batches): 502 | if not is_continue_batch: 503 | batch_data, batch_label, batch_smpw = test_dataset[batch_idx] 504 | batch_data = np.concatenate((batch_data,extra_batch_data),axis=0) 505 | batch_label = np.concatenate((batch_label,extra_batch_label),axis=0) 506 | batch_smpw = np.concatenate((batch_smpw,extra_batch_smpw),axis=0) 507 | else: 508 | batch_data_tmp, batch_label_tmp, batch_smpw_tmp = test_dataset[batch_idx] 509 | batch_data = np.concatenate((batch_data,batch_data_tmp),axis=0) 510 | batch_label = np.concatenate((batch_label,batch_label_tmp),axis=0) 511 | batch_smpw = np.concatenate((batch_smpw,batch_smpw_tmp),axis=0) 512 | if batch_data.shape[0]0) & (batch_smpw>0)) # evaluate only on 20 categories but not unknown 540 | total_correct += correct 541 | total_seen += np.sum((batch_label>0) & (batch_smpw>0)) 542 | loss_sum += loss_val 543 | tmp,_ = np.histogram(batch_label,range(22)) 544 | labelweights += tmp 545 | for l in range(NUM_CLASSES): 546 | total_seen_class[l] += np.sum((batch_label==l) & (batch_smpw>0)) 547 | total_correct_class[l] += np.sum((pred_val==l) & (batch_label==l) & (batch_smpw>0)) 548 | 549 | for b in xrange(batch_label.shape[0]): 550 | _, uvlabel, _ = pc_util.point_cloud_label_to_surface_voxel_label_fast(aug_data[b,batch_smpw[b,:]>0,:], np.concatenate((np.expand_dims(batch_label[b,batch_smpw[b,:]>0],1),np.expand_dims(pred_val[b,batch_smpw[b,:]>0],1)),axis=1), res=0.02) 551 | total_correct_vox += np.sum((uvlabel[:,0]==uvlabel[:,1])&(uvlabel[:,0]>0)) 552 | total_seen_vox += np.sum(uvlabel[:,0]>0) 553 | tmp,_ = np.histogram(uvlabel[:,0],range(22)) 554 | labelweights_vox += tmp 555 | for l in range(NUM_CLASSES): 556 | total_seen_class_vox[l] += np.sum(uvlabel[:,0]==l) 557 | total_correct_class_vox[l] += np.sum((uvlabel[:,0]==l) & (uvlabel[:,1]==l)) 558 | 559 | log_string('eval whole scene mean loss: %f' % (loss_sum / float(num_batches))) 560 | log_string('eval whole scene point accuracy vox: %f'% (total_correct_vox / float(total_seen_vox))) 561 | log_string('eval whole scene point avg class acc vox: %f' % (np.mean(np.array(total_correct_class_vox[1:])/(np.array(total_seen_class_vox[1:],dtype=np.float)+1e-6)))) 562 | log_string('eval whole scene point accuracy: %f'% (total_correct / float(total_seen))) 563 | log_string('eval whole scene point avg class acc: %f' % (np.mean(np.array(total_correct_class[1:])/(np.array(total_seen_class[1:],dtype=np.float)+1e-6)))) 564 | labelweights = labelweights[1:].astype(np.float32)/np.sum(labelweights[1:].astype(np.float32)) 565 | labelweights_vox = labelweights_vox[1:].astype(np.float32)/np.sum(labelweights_vox[1:].astype(np.float32)) 566 | caliweights = np.array([0.388,0.357,0.038,0.033,0.017,0.02,0.016,0.025,0.002,0.002,0.002,0.007,0.006,0.022,0.004,0.0004,0.003,0.002,0.024,0.029]) 567 | caliacc = np.average(np.array(total_correct_class_vox[1:])/(np.array(total_seen_class_vox[1:],dtype=np.float)+1e-6),weights=caliweights) 568 | log_string('eval whole scene point calibrated average acc vox: %f' % caliacc) 569 | 570 | per_class_str = 'vox based --------' 571 | for l in range(1,NUM_CLASSES): 572 | per_class_str += 'class %d weight: %f, acc: %f; ' % (l,labelweights_vox[l-1],total_correct_class_vox[l]/float(total_seen_class_vox[l])) 573 | log_string(per_class_str) 574 | EPOCH_CNT += 1 575 | return caliacc 576 | 577 | if __name__ == "__main__": 578 | log_string('pid: %s'%(str(os.getpid()))) 579 | train() 580 | LOG_FOUT.close() 581 | -------------------------------------------------------------------------------- /train_voxel_joint_multi_v1.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import math 3 | from datetime import datetime 4 | import numpy as np 5 | import tensorflow as tf 6 | import socket 7 | import importlib 8 | import os 9 | import sys 10 | ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) 11 | sys.path.append(ROOT_DIR) # provider 12 | import provider 13 | import tf_util 14 | import pc_util 15 | import scannet_dataset 16 | import suncg_dataset_multi 17 | import math 18 | 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument('--gpu', type=int, default=0, help='GPU to use [default: GPU 0]') 21 | parser.add_argument('--model', default='pointnet2_sem_seg_voxel', help='Model name [default: model]') 22 | parser.add_argument('--log_dir', default='log_vol32_joint/', help='Log dir [default: log]') 23 | ### Start with isolated trained model 24 | parser.add_argument('--restore_dir', default='models/VOLI/best_model.ckpt', help='Restore dir [default: log]') 25 | parser.add_argument('--num_point', type=int, default=12288, help='Point Number [default: 8192]') 26 | parser.add_argument('--max_epoch', type=int, default=201*3, help='Epoch to run [default: 201]') 27 | parser.add_argument('--batch_size', type=int, default=16, help='Batch Size during training [default: 16]') 28 | parser.add_argument('--learning_rate', type=float, default=0.001, help='Initial learning rate [default: 0.001]') 29 | parser.add_argument('--momentum', type=float, default=0.9, help='Initial learning rate [default: 0.9]') 30 | parser.add_argument('--optimizer', default='adam', help='adam or momentum [default: adam]') 31 | parser.add_argument('--decay_step', type=int, default=200000, help='Decay step for lr decay [default: 200000]') 32 | parser.add_argument('--decay_rate', type=float, default=0.7, help='Decay rate for lr decay [default: 0.7]') 33 | FLAGS = parser.parse_args() 34 | 35 | EPOCH_CNT = 0 36 | BATCH_SIZE = FLAGS.batch_size 37 | NUM_POINT = FLAGS.num_point 38 | MAX_EPOCH = FLAGS.max_epoch 39 | BASE_LEARNING_RATE = FLAGS.learning_rate 40 | GPU_INDEX = FLAGS.gpu 41 | MOMENTUM = FLAGS.momentum 42 | OPTIMIZER = FLAGS.optimizer 43 | DECAY_STEP = FLAGS.decay_step 44 | DECAY_RATE = FLAGS.decay_rate 45 | restore_dir = FLAGS.restore_dir 46 | 47 | MODEL = importlib.import_module(FLAGS.model) # import network module 48 | MODEL_FILE = FLAGS.model+'.py' 49 | LOG_DIR = FLAGS.log_dir 50 | if not os.path.exists(LOG_DIR): os.mkdir(LOG_DIR) 51 | os.system('cp %s %s' % (MODEL_FILE, LOG_DIR)) # bkp of model def 52 | LOG_FOUT = open(os.path.join(LOG_DIR, 'log_train.txt'), 'w') 53 | LOG_FOUT.write(str(FLAGS)+'\n') 54 | 55 | BN_INIT_DECAY = 0.5 56 | BN_DECAY_DECAY_RATE = 0.5 57 | BN_DECAY_DECAY_STEP = float(DECAY_STEP) 58 | BN_DECAY_CLIP = 0.99 59 | 60 | HOSTNAME = socket.gethostname() 61 | 62 | NUM_CLASSES = 21 63 | V_SIZE = 32 64 | 65 | # Shapenet official train/test split 66 | DATA_PATH = os.path.join(ROOT_DIR,'data','scannet_data_pointnet2') 67 | TRAIN_DATASET = scannet_dataset.ScannetDataset(root=DATA_PATH, npoints=NUM_POINT, split='train') 68 | TEST_DATASET = scannet_dataset.ScannetDataset(root=DATA_PATH, npoints=NUM_POINT, split='test') 69 | TEST_DATASET_WHOLE_SCENE = scannet_dataset.ScannetDatasetWholeScene(root=DATA_PATH, npoints=NUM_POINT, split='test') 70 | SUNCG_DATASET = suncg_dataset_multi.SuncgDataset(root=DATA_PATH, npoints=NUM_POINT, split='train', rep='voxel1') 71 | 72 | def pc_normalize_batch(pc): 73 | bsize = pc.shape[0] 74 | newpc = [] 75 | for i in range(bsize): 76 | curpc = pc[i] 77 | centroid = np.mean(curpc, axis=0) 78 | curpc = curpc - centroid 79 | m = np.max(np.sqrt(np.sum(curpc**2, axis=1))) 80 | curpc = curpc / m 81 | newpc.append(curpc) 82 | return np.array(newpc) 83 | 84 | def log_string(out_str): 85 | LOG_FOUT.write(out_str+'\n') 86 | LOG_FOUT.flush() 87 | print(out_str) 88 | 89 | def get_learning_rate(batch): 90 | learning_rate = tf.train.exponential_decay( 91 | BASE_LEARNING_RATE, # Base learning rate. 92 | batch * BATCH_SIZE, # Current index into the dataset. 93 | DECAY_STEP, # Decay step. 94 | DECAY_RATE, # Decay rate. 95 | staircase=True) 96 | learing_rate = tf.maximum(learning_rate, 0.00001) # CLIP THE LEARNING RATE! 97 | return learning_rate 98 | 99 | def get_bn_decay(batch): 100 | bn_momentum = tf.train.exponential_decay( 101 | BN_INIT_DECAY, 102 | batch*BATCH_SIZE, 103 | BN_DECAY_DECAY_STEP, 104 | BN_DECAY_DECAY_RATE, 105 | staircase=True) 106 | bn_decay = tf.minimum(BN_DECAY_CLIP, 1 - bn_momentum) 107 | return bn_decay 108 | 109 | def train(): 110 | with tf.Graph().as_default(): 111 | with tf.device('/cpu:0'): 112 | pointclouds_pl, labels_pl, smpws_pl = MODEL.placeholder_inputs(BATCH_SIZE, V_SIZE) 113 | is_training_pl = tf.placeholder(tf.bool, shape=()) 114 | 115 | # Note the global_step=batch parameter to minimize. 116 | # That tells the optimizer to helpfully increment the 'batch' parameter 117 | # for you every time it trains. 118 | batch = tf.get_variable('batch', [], 119 | initializer=tf.constant_initializer(0), trainable=False) 120 | bn_decay = get_bn_decay(batch) 121 | tf.summary.scalar('bn_decay', bn_decay) 122 | 123 | # Set learning rate and optimizer 124 | learning_rate = get_learning_rate(batch) 125 | tf.summary.scalar('learning_rate', learning_rate) 126 | if OPTIMIZER == 'momentum': 127 | optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM) 128 | elif OPTIMIZER == 'adam': 129 | optimizer = tf.train.AdamOptimizer(learning_rate) 130 | 131 | # ------------------------------------------- 132 | # Get model and loss on multiple GPU devices 133 | # ------------------------------------------- 134 | # Allocating variables on CPU first will greatly accelerate multi-gpu training. 135 | # Ref: https://github.com/kuza55/keras-extras/issues/21 136 | print "--- Get model and loss" 137 | # Get model and loss 138 | pred = MODEL.get_model(pointclouds_pl, NUM_CLASSES, is_training_pl, bn_decay=bn_decay) 139 | 140 | with tf.variable_scope(tf.get_variable_scope(), reuse=True): 141 | with tf.device('/gpu:%d'%(0)), tf.name_scope('gpu_%d'%(0)) as scope: 142 | pred = MODEL.get_model(pointclouds_pl, NUM_CLASSES, is_training_pl, bn_decay=bn_decay) 143 | _, loss1, loss2 = MODEL.get_loss(pred, labels_pl, smpws_pl) 144 | losses = tf.get_collection('losses', scope) 145 | total_loss = tf.add_n(losses, name='total_loss') 146 | 147 | grads = optimizer.compute_gradients(total_loss) 148 | 149 | # Get training operator 150 | train_op = optimizer.apply_gradients(grads, global_step=batch) 151 | 152 | correct = tf.equal(tf.argmax(pred, 4), tf.to_int64(labels_pl)) 153 | accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / float(BATCH_SIZE*NUM_POINT) 154 | tf.summary.scalar('accuracy', accuracy) 155 | 156 | # Add ops to save and restore all the variables. 157 | saver = tf.train.Saver() 158 | 159 | # Create a session 160 | config = tf.ConfigProto() 161 | config.gpu_options.allow_growth = True 162 | config.allow_soft_placement = True 163 | config.log_device_placement = False 164 | sess = tf.Session(config=config) 165 | 166 | # Add summary writers 167 | merged = tf.summary.merge_all() 168 | train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'), sess.graph) 169 | test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'), sess.graph) 170 | 171 | # Init variables 172 | init = tf.global_variables_initializer() 173 | sess.run(init) 174 | 175 | if restore_dir != 'None': 176 | saver.restore(sess, restore_dir) 177 | else: 178 | print ("issue here! Must have a pretrained model") 179 | sys.exit(0) 180 | 181 | ops = {'pointclouds_pl': pointclouds_pl, 182 | 'labels_pl': labels_pl, 183 | 'smpws_pl': smpws_pl, 184 | 'is_training_pl': is_training_pl, 185 | 'pred': pred, 186 | 'loss': total_loss, 187 | 'loss1': loss1, 188 | 'loss2': loss2, 189 | 'train_op': train_op, 190 | 'merged': merged, 191 | 'step': batch} 192 | 193 | ### Evaluate first 194 | best_acc = eval_whole_scene_one_epoch(sess, ops, test_writer) 195 | 196 | for epoch in range(MAX_EPOCH): 197 | log_string('**** EPOCH %03d ****' % (epoch)) 198 | sys.stdout.flush() 199 | 200 | train_one_epoch(sess, ops, train_writer) 201 | if (epoch+1)%5==0: 202 | acc = eval_whole_scene_one_epoch(sess, ops, test_writer) 203 | if acc > best_acc: 204 | best_acc = acc 205 | save_path = saver.save(sess, os.path.join(LOG_DIR, "best_model_epoch_%03d.ckpt"%(epoch))) 206 | log_string("Model saved in file: %s" % save_path) 207 | 208 | # Save the variables to disk. 209 | if epoch % 10 == 0: 210 | save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt")) 211 | log_string("Model saved in file: %s" % save_path) 212 | 213 | def get_batch_wdp(dataset, idxs, start_idx, end_idx): 214 | bsize = end_idx-start_idx 215 | batch_data = np.zeros((bsize, NUM_POINT, 3)) 216 | batch_label = np.zeros((bsize, NUM_POINT), dtype=np.int32) 217 | batch_smpw = np.zeros((bsize, NUM_POINT), dtype=np.float32) 218 | for i in range(bsize): 219 | ps,seg,smpw = dataset[idxs[i+start_idx]] 220 | batch_data[i,...] = ps 221 | batch_label[i,:] = seg 222 | batch_smpw[i,:] = smpw 223 | 224 | dropout_ratio = np.random.random()*0.875 # 0-0.875 225 | drop_idx = np.where(np.random.random((ps.shape[0]))<=dropout_ratio)[0] 226 | batch_data[i,drop_idx,:] = batch_data[i,0,:] 227 | batch_label[i,drop_idx] = batch_label[i,0] 228 | batch_smpw[i,drop_idx] *= 0 229 | return batch_data, batch_label, batch_smpw 230 | 231 | def train_one_epoch(sess, ops, train_writer): 232 | """ ops: dict mapping from string to tf ops """ 233 | is_training = True 234 | 235 | # Shuffle train samples 236 | train_idxs = np.arange(0, len(TRAIN_DATASET)) 237 | np.random.shuffle(train_idxs) 238 | num_batches = len(TRAIN_DATASET)/(BATCH_SIZE // 2) 239 | 240 | log_string(str(datetime.now())) 241 | 242 | total_correct = 0 243 | total_seen = 0 244 | loss_sum = 0 245 | loss_sum1 = 0 246 | loss_sum2 = 0 247 | for batch_idx in range(num_batches): 248 | start_idx = batch_idx * (BATCH_SIZE // 2) 249 | end_idx = (batch_idx+1) * (BATCH_SIZE // 2) 250 | 251 | ### Get input from other process 252 | batch_data, batch_smpw = SUNCG_DATASET.wait_other() 253 | ###Convert it to voxel 254 | batch_data_norm = pc_normalize_batch(batch_data) 255 | 256 | batch_data_temp = pc_util.point_cloud_label_to_volume_batch_exact(batch_data_norm, vsize=V_SIZE, flatten=True) 257 | batch_data_vol = np.zeros((BATCH_SIZE, V_SIZE, V_SIZE, V_SIZE, 1)) 258 | batch_data_vol[0:BATCH_SIZE//2,:,:,:,:] = batch_data_temp 259 | feed_dict = {ops['pointclouds_pl']: batch_data_vol, 260 | ops['is_training_pl']: False} 261 | pred_val = sess.run(ops['pred'], feed_dict=feed_dict) 262 | pred_val = np.expand_dims(np.argmax(pred_val, 4), -1) 263 | pred_val = pred_val[0:BATCH_SIZE//2,:,:,:,:] 264 | ###Convert it back to pc 265 | pred_val = np.clip(pc_util.volume_topc_batch_exact(pred_val, batch_data_norm) - 1, a_min=0, a_max=None) ### Clip the label in case of Nan in training 266 | batch_data_extra, batch_label_extra, batch_smpw_extra = SUNCG_DATASET.ready(batch_data, np.squeeze(pred_val), batch_smpw, TRAIN_DATASET.labelweights) 267 | 268 | batch_data, batch_label, batch_smpw = get_batch_wdp(TRAIN_DATASET, train_idxs, start_idx, end_idx) 269 | batch_data = np.concatenate([batch_data, batch_data_extra], 0) 270 | batch_label = np.concatenate([batch_label, batch_label_extra], 0) 271 | batch_smpw = np.concatenate([batch_smpw, batch_smpw_extra], 0) 272 | 273 | # Augment batched point clouds by rotation 274 | aug_data = provider.rotate_point_cloud_z(batch_data) 275 | ###Convert it to voxel 276 | aug_data_vol, batch_label_vol, batch_smpw_vol = pc_util.point_cloud_label_to_volume_batch(pc_normalize_batch(aug_data), batch_label+1, batch_smpw, vsize=V_SIZE, flatten=True) 277 | 278 | feed_dict = {ops['pointclouds_pl']: aug_data_vol, 279 | ops['labels_pl']: batch_label_vol, 280 | ops['smpws_pl']:batch_smpw_vol, 281 | ops['is_training_pl']: is_training,} 282 | 283 | summary, step, _, loss_val, loss_val1, loss_val2, pred_val = sess.run([ops['merged'], ops['step'], 284 | ops['train_op'], ops['loss'], ops['loss1'], ops['loss2'], ops['pred']], feed_dict=feed_dict) 285 | train_writer.add_summary(summary, step) 286 | ### Change the voxel back to pc 287 | pred_val = np.argmax(pred_val, 4) 288 | pred_val, batch_label, batch_smpw, _, _ = pc_util.volume_topc_batch(pred_val, batch_label_vol, batch_smpw_vol) 289 | for i in range(len(pred_val)): 290 | pred_val[i] -= 1 291 | for i in range(len(batch_label)): 292 | batch_label[i] -= 1 293 | for i in range(len(pred_val)): 294 | correct = np.sum(pred_val[i] == batch_label[i]) 295 | total_correct += correct 296 | total_seen += pred_val[i].shape[0] 297 | loss_sum += loss_val 298 | loss_sum1 += loss_val1 299 | loss_sum2 += loss_val2 300 | if (batch_idx+1)%10 == 0: 301 | log_string(' -- %03d / %03d --' % (batch_idx+1, num_batches)) 302 | log_string('mean loss: %f' % (loss_sum / 10)) 303 | log_string('mean loss1: %f' % (loss_sum1 / 10)) 304 | log_string('mean loss2: %f' % (loss_sum2 / 10)) 305 | log_string('accuracy: %f' % (total_correct / float(total_seen))) 306 | total_correct = 0 307 | total_seen = 0 308 | loss_sum = 0 309 | 310 | # evaluate on whole scenes to generate numbers provided in the paper 311 | # For consistency, convert it back to pointcloud and evaluated with the code provided in pointnet2 312 | def eval_whole_scene_one_epoch(sess, ops, test_writer): 313 | """ ops: dict mapping from string to tf ops """ 314 | global EPOCH_CNT 315 | is_training = False 316 | test_idxs = np.arange(0, len(TEST_DATASET_WHOLE_SCENE)) 317 | num_batches = len(TEST_DATASET_WHOLE_SCENE) 318 | 319 | total_correct = 0 320 | total_seen = 0 321 | loss_sum = 0 322 | total_seen_class = [0 for _ in range(NUM_CLASSES)] 323 | total_correct_class = [0 for _ in range(NUM_CLASSES)] 324 | 325 | total_correct_vox = 0 326 | total_seen_vox = 0 327 | total_seen_class_vox = [0 for _ in range(NUM_CLASSES)] 328 | total_correct_class_vox = [0 for _ in range(NUM_CLASSES)] 329 | 330 | log_string(str(datetime.now())) 331 | log_string('---- EPOCH %03d EVALUATION WHOLE SCENE----'%(EPOCH_CNT)) 332 | 333 | labelweights = np.zeros(21) 334 | labelweights_vox = np.zeros(21) 335 | is_continue_batch = False 336 | 337 | extra_batch_data = np.zeros((0,NUM_POINT,3)) 338 | extra_batch_label = np.zeros((0,NUM_POINT)) 339 | extra_batch_smpw = np.zeros((0,NUM_POINT)) 340 | for batch_idx in range(num_batches): 341 | if not is_continue_batch: 342 | batch_data, batch_label, batch_smpw = TEST_DATASET_WHOLE_SCENE[batch_idx] 343 | batch_data = np.concatenate((batch_data,extra_batch_data),axis=0) 344 | batch_label = np.concatenate((batch_label,extra_batch_label),axis=0) 345 | batch_smpw = np.concatenate((batch_smpw,extra_batch_smpw),axis=0) 346 | else: 347 | batch_data_tmp, batch_label_tmp, batch_smpw_tmp = TEST_DATASET_WHOLE_SCENE[batch_idx] 348 | batch_data = np.concatenate((batch_data,batch_data_tmp),axis=0) 349 | batch_label = np.concatenate((batch_label,batch_label_tmp),axis=0) 350 | batch_smpw = np.concatenate((batch_smpw,batch_smpw_tmp),axis=0) 351 | if batch_data.shape[0]0) & (batch_smpw[i]>0)) # evaluate only on 20 categories but not unknown 386 | total_correct += correct 387 | total_seen += np.sum((batch_label[i]>0) & (batch_smpw[i]>0)) 388 | loss_sum += loss_val 389 | for l in range(NUM_CLASSES): 390 | total_seen_class[l] += np.sum((batch_label[i]==l) & (batch_smpw[i]>0)) 391 | total_correct_class[l] += np.sum((pred_val[i]==l) & (batch_label[i]==l) & (batch_smpw[i]>0)) 392 | 393 | for b in range(len(batch_label)): 394 | if (aug_data[b][batch_smpw[b]>0,:].shape)[0] == 0: 395 | continue 396 | _, uvlabel, _ = pc_util.point_cloud_label_to_surface_voxel_label_fast(aug_data[b][batch_smpw[b]>0,:], np.concatenate((np.expand_dims(batch_label[b][batch_smpw[b]>0],1),np.expand_dims(pred_val[b][batch_smpw[b]>0],1)),axis=1), res=0.02) 397 | total_correct_vox += np.sum((uvlabel[:,0]==uvlabel[:,1])&(uvlabel[:,0]>0)) 398 | total_seen_vox += np.sum(uvlabel[:,0]>0) 399 | tmp,_ = np.histogram(uvlabel[:,0],range(22)) 400 | labelweights_vox += tmp 401 | for l in range(NUM_CLASSES): 402 | total_seen_class_vox[l] += np.sum(uvlabel[:,0]==l) 403 | total_correct_class_vox[l] += np.sum((uvlabel[:,0]==l) & (uvlabel[:,1]==l)) 404 | 405 | log_string('eval whole scene mean loss: %f' % (loss_sum / float(num_batches))) 406 | log_string('eval whole scene point accuracy vox: %f'% (total_correct_vox / float(total_seen_vox))) 407 | log_string('eval whole scene point avg class acc vox: %f' % (np.mean(np.array(total_correct_class_vox[1:])/(np.array(total_seen_class_vox[1:],dtype=np.float)+1e-6)))) 408 | log_string('eval whole scene point accuracy: %f'% (total_correct / float(total_seen))) 409 | log_string('eval whole scene point avg class acc: %f' % (np.mean(np.array(total_correct_class[1:])/(np.array(total_seen_class[1:],dtype=np.float)+1e-6)))) 410 | labelweights = labelweights[1:].astype(np.float32)/np.sum(labelweights[1:].astype(np.float32)) 411 | labelweights_vox = labelweights_vox[1:].astype(np.float32)/np.sum(labelweights_vox[1:].astype(np.float32)) 412 | caliweights = np.array([0.388,0.357,0.038,0.033,0.017,0.02,0.016,0.025,0.002,0.002,0.002,0.007,0.006,0.022,0.004,0.0004,0.003,0.002,0.024,0.029]) 413 | caliacc = np.average(np.array(total_correct_class_vox[1:])/(np.array(total_seen_class_vox[1:],dtype=np.float)+1e-6),weights=caliweights) 414 | log_string('eval whole scene point calibrated average acc vox: %f' % caliacc) 415 | 416 | per_class_str = 'vox based --------' 417 | for l in range(1,NUM_CLASSES): 418 | per_class_str += 'class %d weight: %f, acc: %f; ' % (l,labelweights_vox[l-1],total_correct_class_vox[l]/float(total_seen_class_vox[l])) 419 | log_string(per_class_str) 420 | EPOCH_CNT += 1 421 | return caliacc 422 | 423 | if __name__ == "__main__": 424 | log_string('pid: %s'%(str(os.getpid()))) 425 | train() 426 | LOG_FOUT.close() 427 | -------------------------------------------------------------------------------- /train_voxel_joint_multi_v2.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import math 3 | from datetime import datetime 4 | import numpy as np 5 | import tensorflow as tf 6 | import socket 7 | import importlib 8 | import os 9 | import sys 10 | ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) 11 | sys.path.append(ROOT_DIR) # provider 12 | import provider 13 | import tf_util 14 | import pc_util 15 | import scannet_dataset 16 | import suncg_dataset_multi 17 | import math 18 | 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument('--gpu', type=int, default=0, help='GPU to use [default: GPU 0]') 21 | parser.add_argument('--model', default='pointnet2_sem_seg_voxel', help='Model name [default: model]') 22 | parser.add_argument('--log_dir', default='log_vol24_joint/', help='Log dir [default: log]') 23 | ### Start with isolated trained model 24 | parser.add_argument('--restore_dir', default='models/VOLII/best_model.ckpt', help='Restore dir [default: log]') 25 | parser.add_argument('--num_point', type=int, default=12288, help='Point Number [default: 8192]') 26 | parser.add_argument('--max_epoch', type=int, default=201*3, help='Epoch to run [default: 201]') 27 | parser.add_argument('--batch_size', type=int, default=16, help='Batch Size during training [default: 16]') 28 | parser.add_argument('--learning_rate', type=float, default=0.001, help='Initial learning rate [default: 0.001]') 29 | parser.add_argument('--momentum', type=float, default=0.9, help='Initial learning rate [default: 0.9]') 30 | parser.add_argument('--optimizer', default='adam', help='adam or momentum [default: adam]') 31 | parser.add_argument('--decay_step', type=int, default=200000, help='Decay step for lr decay [default: 200000]') 32 | parser.add_argument('--decay_rate', type=float, default=0.7, help='Decay rate for lr decay [default: 0.7]') 33 | FLAGS = parser.parse_args() 34 | 35 | EPOCH_CNT = 0 36 | BATCH_SIZE = FLAGS.batch_size 37 | NUM_POINT = FLAGS.num_point 38 | MAX_EPOCH = FLAGS.max_epoch 39 | BASE_LEARNING_RATE = FLAGS.learning_rate 40 | GPU_INDEX = FLAGS.gpu 41 | MOMENTUM = FLAGS.momentum 42 | OPTIMIZER = FLAGS.optimizer 43 | DECAY_STEP = FLAGS.decay_step 44 | DECAY_RATE = FLAGS.decay_rate 45 | restore_dir = FLAGS.restore_dir 46 | 47 | MODEL = importlib.import_module(FLAGS.model) # import network module 48 | MODEL_FILE = FLAGS.model+'.py' 49 | LOG_DIR = FLAGS.log_dir 50 | if not os.path.exists(LOG_DIR): os.mkdir(LOG_DIR) 51 | os.system('cp %s %s' % (MODEL_FILE, LOG_DIR)) # bkp of model def 52 | LOG_FOUT = open(os.path.join(LOG_DIR, 'log_train.txt'), 'w') 53 | LOG_FOUT.write(str(FLAGS)+'\n') 54 | 55 | BN_INIT_DECAY = 0.5 56 | BN_DECAY_DECAY_RATE = 0.5 57 | BN_DECAY_DECAY_STEP = float(DECAY_STEP) 58 | BN_DECAY_CLIP = 0.99 59 | 60 | HOSTNAME = socket.gethostname() 61 | 62 | NUM_CLASSES = 21 63 | V_SIZE = 24 64 | 65 | # Shapenet official train/test split 66 | DATA_PATH = os.path.join(ROOT_DIR,'data','scannet_data_pointnet2') 67 | TRAIN_DATASET = scannet_dataset.ScannetDataset(root=DATA_PATH, npoints=NUM_POINT, split='train') 68 | TEST_DATASET_WHOLE_SCENE = scannet_dataset.ScannetDatasetWholeScene(root=DATA_PATH, npoints=NUM_POINT, split='test') 69 | SUNCG_DATASET = suncg_dataset_multi.SuncgDataset(root=DATA_PATH, npoints=NUM_POINT, split='train', rep='voxel2') 70 | 71 | def pc_normalize_batch(pc): 72 | bsize = pc.shape[0] 73 | newpc = [] 74 | for i in range(bsize): 75 | curpc = pc[i] 76 | centroid = np.mean(curpc, axis=0) 77 | curpc = curpc - centroid 78 | m = np.max(np.sqrt(np.sum(curpc**2, axis=1))) 79 | curpc = curpc / m 80 | newpc.append(curpc) 81 | return np.array(newpc) 82 | 83 | def log_string(out_str): 84 | LOG_FOUT.write(out_str+'\n') 85 | LOG_FOUT.flush() 86 | print(out_str) 87 | 88 | def get_learning_rate(batch): 89 | learning_rate = tf.train.exponential_decay( 90 | BASE_LEARNING_RATE, # Base learning rate. 91 | batch * BATCH_SIZE, # Current index into the dataset. 92 | DECAY_STEP, # Decay step. 93 | DECAY_RATE, # Decay rate. 94 | staircase=True) 95 | learing_rate = tf.maximum(learning_rate, 0.00001) # CLIP THE LEARNING RATE! 96 | return learning_rate 97 | 98 | def get_bn_decay(batch): 99 | bn_momentum = tf.train.exponential_decay( 100 | BN_INIT_DECAY, 101 | batch*BATCH_SIZE, 102 | BN_DECAY_DECAY_STEP, 103 | BN_DECAY_DECAY_RATE, 104 | staircase=True) 105 | bn_decay = tf.minimum(BN_DECAY_CLIP, 1 - bn_momentum) 106 | return bn_decay 107 | 108 | def train(): 109 | with tf.Graph().as_default(): 110 | with tf.device('/cpu:0'): 111 | pointclouds_pl, labels_pl, smpws_pl = MODEL.placeholder_inputs(BATCH_SIZE, V_SIZE) 112 | is_training_pl = tf.placeholder(tf.bool, shape=()) 113 | 114 | # Note the global_step=batch parameter to minimize. 115 | # That tells the optimizer to helpfully increment the 'batch' parameter 116 | # for you every time it trains. 117 | batch = tf.get_variable('batch', [], 118 | initializer=tf.constant_initializer(0), trainable=False) 119 | bn_decay = get_bn_decay(batch) 120 | tf.summary.scalar('bn_decay', bn_decay) 121 | 122 | # Set learning rate and optimizer 123 | learning_rate = get_learning_rate(batch) 124 | tf.summary.scalar('learning_rate', learning_rate) 125 | if OPTIMIZER == 'momentum': 126 | optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM) 127 | elif OPTIMIZER == 'adam': 128 | optimizer = tf.train.AdamOptimizer(learning_rate) 129 | 130 | # ------------------------------------------- 131 | # Get model and loss on multiple GPU devices 132 | # ------------------------------------------- 133 | # Allocating variables on CPU first will greatly accelerate multi-gpu training. 134 | # Ref: https://github.com/kuza55/keras-extras/issues/21 135 | print "--- Get model and loss" 136 | # Get model and loss 137 | pred = MODEL.get_model(pointclouds_pl, NUM_CLASSES, is_training_pl, bn_decay=bn_decay) 138 | 139 | with tf.variable_scope(tf.get_variable_scope(), reuse=True): 140 | with tf.device('/gpu:%d'%(0)), tf.name_scope('gpu_%d'%(0)) as scope: 141 | pred = MODEL.get_model(pointclouds_pl, NUM_CLASSES, is_training_pl, bn_decay=bn_decay) 142 | _, loss1, loss2 = MODEL.get_loss(pred, labels_pl, smpws_pl) 143 | losses = tf.get_collection('losses', scope) 144 | total_loss = tf.add_n(losses, name='total_loss') 145 | 146 | grads = optimizer.compute_gradients(total_loss) 147 | 148 | # Get training operator 149 | train_op = optimizer.apply_gradients(grads, global_step=batch) 150 | 151 | correct = tf.equal(tf.argmax(pred, 4), tf.to_int64(labels_pl)) 152 | accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / float(BATCH_SIZE*NUM_POINT) 153 | tf.summary.scalar('accuracy', accuracy) 154 | 155 | # Add ops to save and restore all the variables. 156 | saver = tf.train.Saver() 157 | 158 | # Create a session 159 | config = tf.ConfigProto() 160 | config.gpu_options.allow_growth = True 161 | config.allow_soft_placement = True 162 | config.log_device_placement = False 163 | sess = tf.Session(config=config) 164 | 165 | # Add summary writers 166 | merged = tf.summary.merge_all() 167 | train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'), sess.graph) 168 | test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'), sess.graph) 169 | 170 | # Init variables 171 | init = tf.global_variables_initializer() 172 | sess.run(init) 173 | 174 | if restore_dir != 'None': 175 | saver.restore(sess, restore_dir) 176 | else: 177 | print ("issue here! Must have a pretrained model") 178 | sys.exit(0) 179 | 180 | ops = {'pointclouds_pl': pointclouds_pl, 181 | 'labels_pl': labels_pl, 182 | 'smpws_pl': smpws_pl, 183 | 'is_training_pl': is_training_pl, 184 | 'pred': pred, 185 | 'loss': total_loss, 186 | 'loss1': loss1, 187 | 'loss2': loss2, 188 | 'train_op': train_op, 189 | 'merged': merged, 190 | 'step': batch} 191 | ### Evaluate first 192 | best_acc = eval_whole_scene_one_epoch(sess, ops, test_writer) 193 | for epoch in range(MAX_EPOCH): 194 | log_string('**** EPOCH %03d ****' % (epoch)) 195 | sys.stdout.flush() 196 | 197 | train_one_epoch(sess, ops, train_writer) 198 | if (epoch+1)%5==0: 199 | acc = eval_whole_scene_one_epoch(sess, ops, test_writer) 200 | if acc > best_acc: 201 | best_acc = acc 202 | save_path = saver.save(sess, os.path.join(LOG_DIR, "best_model_epoch_%03d.ckpt"%(epoch))) 203 | log_string("Model saved in file: %s" % save_path) 204 | 205 | # Save the variables to disk. 206 | if epoch % 10 == 0: 207 | save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt")) 208 | log_string("Model saved in file: %s" % save_path) 209 | 210 | def get_batch_wdp(dataset, idxs, start_idx, end_idx): 211 | bsize = end_idx-start_idx 212 | batch_data = np.zeros((bsize, NUM_POINT, 3)) 213 | batch_label = np.zeros((bsize, NUM_POINT), dtype=np.int32) 214 | batch_smpw = np.zeros((bsize, NUM_POINT), dtype=np.float32) 215 | for i in range(bsize): 216 | ps,seg,smpw = dataset[idxs[i+start_idx]] 217 | batch_data[i,...] = ps 218 | batch_label[i,:] = seg 219 | batch_smpw[i,:] = smpw 220 | 221 | dropout_ratio = np.random.random()*0.875 # 0-0.875 222 | drop_idx = np.where(np.random.random((ps.shape[0]))<=dropout_ratio)[0] 223 | batch_data[i,drop_idx,:] = batch_data[i,0,:] 224 | batch_label[i,drop_idx] = batch_label[i,0] 225 | batch_smpw[i,drop_idx] *= 0 226 | return batch_data, batch_label, batch_smpw 227 | 228 | def train_one_epoch(sess, ops, train_writer): 229 | """ ops: dict mapping from string to tf ops """ 230 | is_training = True 231 | 232 | # Shuffle train samples 233 | train_idxs = np.arange(0, len(TRAIN_DATASET)) 234 | np.random.shuffle(train_idxs) 235 | num_batches = len(TRAIN_DATASET)/(BATCH_SIZE // 2) 236 | 237 | log_string(str(datetime.now())) 238 | 239 | total_correct = 0 240 | total_seen = 0 241 | loss_sum = 0 242 | loss_sum1 = 0 243 | loss_sum2 = 0 244 | for batch_idx in range(num_batches): 245 | start_idx = batch_idx * (BATCH_SIZE // 2) 246 | end_idx = (batch_idx+1) * (BATCH_SIZE // 2) 247 | 248 | ### Get data from pc process 249 | batch_data, batch_smpw = SUNCG_DATASET.wait_other() 250 | ###Convert it to voxel 251 | batch_data_norm = pc_normalize_batch(batch_data) 252 | batch_data_temp = pc_util.point_cloud_label_to_volume_batch_exact(batch_data_norm, vsize=V_SIZE, flatten=True) 253 | 254 | batch_data_vol = np.zeros((BATCH_SIZE, V_SIZE, V_SIZE, V_SIZE, 1)) 255 | batch_data_vol[0:BATCH_SIZE//2,:,:,:,:] = batch_data_temp 256 | feed_dict = {ops['pointclouds_pl']: batch_data_vol, 257 | ops['is_training_pl']: False} 258 | pred_val = sess.run(ops['pred'], feed_dict=feed_dict) 259 | pred_val = np.expand_dims(np.argmax(pred_val, 4), -1) 260 | pred_val = pred_val[0:BATCH_SIZE//2,:,:,:,:] 261 | ###Convert it back to pc 262 | pred_val = np.clip(pc_util.volume_topc_batch_exact(pred_val, batch_data_norm, vsize=V_SIZE) - 1, a_min=0, a_max=None) ### Clip the label in case of Nan in training 263 | batch_data_extra, batch_label_extra, batch_smpw_extra = SUNCG_DATASET.ready(batch_data, np.squeeze(pred_val), batch_smpw, TRAIN_DATASET.labelweights) 264 | 265 | batch_data, batch_label, batch_smpw = get_batch_wdp(TRAIN_DATASET, train_idxs, start_idx, end_idx) 266 | 267 | batch_data = np.concatenate([batch_data, batch_data_extra], 0) 268 | batch_label = np.concatenate([batch_label, batch_label_extra], 0) 269 | batch_smpw = np.concatenate([batch_smpw, batch_smpw_extra], 0) 270 | 271 | # Augment batched point clouds by rotation 272 | aug_data = provider.rotate_point_cloud_z(batch_data) 273 | ###Convert it to voxel 274 | aug_data_vol, batch_label_vol, batch_smpw_vol = pc_util.point_cloud_label_to_volume_batch(pc_normalize_batch(aug_data), batch_label+1, batch_smpw, vsize=V_SIZE, flatten=True) 275 | 276 | feed_dict = {ops['pointclouds_pl']: aug_data_vol, 277 | ops['labels_pl']: batch_label_vol, 278 | ops['smpws_pl']:batch_smpw_vol, 279 | ops['is_training_pl']: is_training,} 280 | 281 | summary, step, _, loss_val, loss_val1, loss_val2, pred_val = sess.run([ops['merged'], ops['step'], 282 | ops['train_op'], ops['loss'], ops['loss1'], ops['loss2'], ops['pred']], feed_dict=feed_dict) 283 | train_writer.add_summary(summary, step) 284 | 285 | ### Change the voxel back to pc 286 | pred_val = np.argmax(pred_val, 4) 287 | pred_val, batch_label, batch_smpw, _, _ = pc_util.volume_topc_batch(pred_val, batch_label_vol, batch_smpw_vol) 288 | for i in range(len(pred_val)): 289 | pred_val[i] -= 1 290 | for i in range(len(batch_label)): 291 | batch_label[i] -= 1 292 | 293 | for i in range(len(pred_val)): 294 | correct = np.sum(pred_val[i] == batch_label[i]) 295 | total_correct += correct 296 | total_seen += pred_val[i].shape[0] 297 | loss_sum += loss_val 298 | loss_sum1 += loss_val1 299 | loss_sum2 += loss_val2 300 | if (batch_idx+1)%10 == 0: 301 | log_string(' -- %03d / %03d --' % (batch_idx+1, num_batches)) 302 | log_string('mean loss: %f' % (loss_sum / 10)) 303 | log_string('mean loss1: %f' % (loss_sum1 / 10)) 304 | log_string('mean loss2: %f' % (loss_sum2 / 10)) 305 | log_string('accuracy: %f' % (total_correct / float(total_seen))) 306 | total_correct = 0 307 | total_seen = 0 308 | loss_sum = 0 309 | 310 | # evaluate on whole scenes to generate numbers provided in the paper 311 | # For consistency, convert it back to pointcloud and evaluated with the code provided in pointnet2 312 | def eval_whole_scene_one_epoch(sess, ops, test_writer): 313 | """ ops: dict mapping from string to tf ops """ 314 | global EPOCH_CNT 315 | is_training = False 316 | test_idxs = np.arange(0, len(TEST_DATASET_WHOLE_SCENE)) 317 | num_batches = len(TEST_DATASET_WHOLE_SCENE) 318 | 319 | total_correct = 0 320 | total_seen = 0 321 | loss_sum = 0 322 | total_seen_class = [0 for _ in range(NUM_CLASSES)] 323 | total_correct_class = [0 for _ in range(NUM_CLASSES)] 324 | 325 | total_correct_vox = 0 326 | total_seen_vox = 0 327 | total_seen_class_vox = [0 for _ in range(NUM_CLASSES)] 328 | total_correct_class_vox = [0 for _ in range(NUM_CLASSES)] 329 | 330 | log_string(str(datetime.now())) 331 | log_string('---- EPOCH %03d EVALUATION WHOLE SCENE----'%(EPOCH_CNT)) 332 | 333 | labelweights = np.zeros(21) 334 | labelweights_vox = np.zeros(21) 335 | is_continue_batch = False 336 | 337 | extra_batch_data = np.zeros((0,NUM_POINT,3)) 338 | extra_batch_label = np.zeros((0,NUM_POINT)) 339 | extra_batch_smpw = np.zeros((0,NUM_POINT)) 340 | for batch_idx in range(num_batches): 341 | if not is_continue_batch: 342 | batch_data, batch_label, batch_smpw = TEST_DATASET_WHOLE_SCENE[batch_idx] 343 | batch_data = np.concatenate((batch_data,extra_batch_data),axis=0) 344 | batch_label = np.concatenate((batch_label,extra_batch_label),axis=0) 345 | batch_smpw = np.concatenate((batch_smpw,extra_batch_smpw),axis=0) 346 | else: 347 | batch_data_tmp, batch_label_tmp, batch_smpw_tmp = TEST_DATASET_WHOLE_SCENE[batch_idx] 348 | batch_data = np.concatenate((batch_data,batch_data_tmp),axis=0) 349 | batch_label = np.concatenate((batch_label,batch_label_tmp),axis=0) 350 | batch_smpw = np.concatenate((batch_smpw,batch_smpw_tmp),axis=0) 351 | if batch_data.shape[0]0) & (batch_smpw[i]>0)) # evaluate only on 20 categories but not unknown 386 | total_correct += correct 387 | total_seen += np.sum((batch_label[i]>0) & (batch_smpw[i]>0)) 388 | loss_sum += loss_val 389 | for l in range(NUM_CLASSES): 390 | total_seen_class[l] += np.sum((batch_label[i]==l) & (batch_smpw[i]>0)) 391 | total_correct_class[l] += np.sum((pred_val[i]==l) & (batch_label[i]==l) & (batch_smpw[i]>0)) 392 | for b in range(len(batch_label)): 393 | if (aug_data[b][batch_smpw[b]>0,:].shape)[0] == 0: 394 | continue 395 | _, uvlabel, _ = pc_util.point_cloud_label_to_surface_voxel_label_fast(aug_data[b][batch_smpw[b]>0,:], np.concatenate((np.expand_dims(batch_label[b][batch_smpw[b]>0],1),np.expand_dims(pred_val[b][batch_smpw[b]>0],1)),axis=1), res=0.02) 396 | total_correct_vox += np.sum((uvlabel[:,0]==uvlabel[:,1])&(uvlabel[:,0]>0)) 397 | total_seen_vox += np.sum(uvlabel[:,0]>0) 398 | tmp,_ = np.histogram(uvlabel[:,0],range(22)) 399 | labelweights_vox += tmp 400 | for l in range(NUM_CLASSES): 401 | total_seen_class_vox[l] += np.sum(uvlabel[:,0]==l) 402 | total_correct_class_vox[l] += np.sum((uvlabel[:,0]==l) & (uvlabel[:,1]==l)) 403 | 404 | log_string('eval whole scene mean loss: %f' % (loss_sum / float(num_batches))) 405 | log_string('eval whole scene point accuracy vox: %f'% (total_correct_vox / float(total_seen_vox))) 406 | log_string('eval whole scene point avg class acc vox: %f' % (np.mean(np.array(total_correct_class_vox[1:])/(np.array(total_seen_class_vox[1:],dtype=np.float)+1e-6)))) 407 | log_string('eval whole scene point accuracy: %f'% (total_correct / float(total_seen))) 408 | log_string('eval whole scene point avg class acc: %f' % (np.mean(np.array(total_correct_class[1:])/(np.array(total_seen_class[1:],dtype=np.float)+1e-6)))) 409 | labelweights = labelweights[1:].astype(np.float32)/np.sum(labelweights[1:].astype(np.float32)) 410 | labelweights_vox = labelweights_vox[1:].astype(np.float32)/np.sum(labelweights_vox[1:].astype(np.float32)) 411 | caliweights = np.array([0.388,0.357,0.038,0.033,0.017,0.02,0.016,0.025,0.002,0.002,0.002,0.007,0.006,0.022,0.004,0.0004,0.003,0.002,0.024,0.029]) 412 | caliacc = np.average(np.array(total_correct_class_vox[1:])/(np.array(total_seen_class_vox[1:],dtype=np.float)+1e-6),weights=caliweights) 413 | log_string('eval whole scene point calibrated average acc vox: %f' % caliacc) 414 | 415 | per_class_str = 'vox based --------' 416 | for l in range(1,NUM_CLASSES): 417 | per_class_str += 'class %d weight: %f, acc: %f; ' % (l,labelweights_vox[l-1],total_correct_class_vox[l]/float(total_seen_class_vox[l])) 418 | log_string(per_class_str) 419 | EPOCH_CNT += 1 420 | return caliacc 421 | 422 | if __name__ == "__main__": 423 | log_string('pid: %s'%(str(os.getpid()))) 424 | train() 425 | LOG_FOUT.close() 426 | --------------------------------------------------------------------------------