├── .gitignore ├── sphere.npy ├── gaussian.npy ├── image ├── DGCNN.jpg ├── acc1.png ├── acc2.png ├── acc3.png ├── acc4.png ├── acc5.png ├── shapenetpartpart_train0_chair.png ├── shapenetpartpart_train58_airplane.png └── shapenetpartpart_train38_skateboard.png ├── tensorboard ├── Segment_dgcnn_seg_k40_1024_b32 │ └── events.out.tfevents.1579308861.server231 ├── Classify_dgcnn_cls_k40_1024_b32 │ └── events.out.tfevents.1579684482.server231 ├── Classify_dgcnn_seg_k40_1024_b32 │ └── events.out.tfevents.1579565090.server231 ├── Reconstruct_dgcnn_cls_k20_1024_b16 │ └── events.out.tfevents.1579401032.server231 ├── Reconstruct_dgcnn_seg_k40_1024_b32 │ └── events.out.tfevents.1579272092.server231 ├── Classify_dgcnn_cls_k40_1024_b32_part │ └── events.out.tfevents.1579698812.server231 ├── Segment_dgcnn_seg_k40_1024_b32_nolabel │ └── events.out.tfevents.1579336965.server231 └── Reconstruct_dgcnn_cls_k20_1024_b16_part │ └── events.out.tfevents.1579685815.server231 ├── LICENSE ├── utils.py ├── svm.py ├── main.py ├── dataset.py ├── loss.py ├── inference.py ├── reconstruction.py ├── README.md ├── classification.py ├── visualization.py ├── segmentation.py └── model.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | */.DS_Store 3 | -------------------------------------------------------------------------------- /sphere.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/sphere.npy -------------------------------------------------------------------------------- /gaussian.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/gaussian.npy -------------------------------------------------------------------------------- /image/DGCNN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/DGCNN.jpg -------------------------------------------------------------------------------- /image/acc1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/acc1.png -------------------------------------------------------------------------------- /image/acc2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/acc2.png -------------------------------------------------------------------------------- /image/acc3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/acc3.png -------------------------------------------------------------------------------- /image/acc4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/acc4.png -------------------------------------------------------------------------------- /image/acc5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/acc5.png -------------------------------------------------------------------------------- /image/shapenetpartpart_train0_chair.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/shapenetpartpart_train0_chair.png -------------------------------------------------------------------------------- /image/shapenetpartpart_train58_airplane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/shapenetpartpart_train58_airplane.png -------------------------------------------------------------------------------- /image/shapenetpartpart_train38_skateboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/shapenetpartpart_train38_skateboard.png -------------------------------------------------------------------------------- /tensorboard/Segment_dgcnn_seg_k40_1024_b32/events.out.tfevents.1579308861.server231: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Segment_dgcnn_seg_k40_1024_b32/events.out.tfevents.1579308861.server231 -------------------------------------------------------------------------------- /tensorboard/Classify_dgcnn_cls_k40_1024_b32/events.out.tfevents.1579684482.server231: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Classify_dgcnn_cls_k40_1024_b32/events.out.tfevents.1579684482.server231 -------------------------------------------------------------------------------- /tensorboard/Classify_dgcnn_seg_k40_1024_b32/events.out.tfevents.1579565090.server231: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Classify_dgcnn_seg_k40_1024_b32/events.out.tfevents.1579565090.server231 -------------------------------------------------------------------------------- /tensorboard/Reconstruct_dgcnn_cls_k20_1024_b16/events.out.tfevents.1579401032.server231: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Reconstruct_dgcnn_cls_k20_1024_b16/events.out.tfevents.1579401032.server231 -------------------------------------------------------------------------------- /tensorboard/Reconstruct_dgcnn_seg_k40_1024_b32/events.out.tfevents.1579272092.server231: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Reconstruct_dgcnn_seg_k40_1024_b32/events.out.tfevents.1579272092.server231 -------------------------------------------------------------------------------- /tensorboard/Classify_dgcnn_cls_k40_1024_b32_part/events.out.tfevents.1579698812.server231: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Classify_dgcnn_cls_k40_1024_b32_part/events.out.tfevents.1579698812.server231 -------------------------------------------------------------------------------- /tensorboard/Segment_dgcnn_seg_k40_1024_b32_nolabel/events.out.tfevents.1579336965.server231: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Segment_dgcnn_seg_k40_1024_b32_nolabel/events.out.tfevents.1579336965.server231 -------------------------------------------------------------------------------- /tensorboard/Reconstruct_dgcnn_cls_k20_1024_b16_part/events.out.tfevents.1579685815.server231: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Reconstruct_dgcnn_cls_k20_1024_b16_part/events.out.tfevents.1579685815.server231 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 An Tao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @Author: An Tao 5 | @Contact: ta19@mails.tsinghua.edu.cn 6 | @File: utils.py 7 | @Time: 2020/1/2 10:26 AM 8 | """ 9 | 10 | import os 11 | import sys 12 | 13 | 14 | class AverageMeter(object): 15 | """Computes and stores the average and current value""" 16 | def __init__(self): 17 | self.reset() 18 | 19 | def reset(self): 20 | self.val = 0 21 | self.avg = 0 22 | self.sum = 0 23 | self.count = 0 24 | 25 | def update(self, val, n=1): 26 | self.val = val 27 | self.sum += val * n 28 | self.count += n 29 | self.avg = self.sum / self.count 30 | 31 | 32 | def learning_rate_decay(optimizer, t, lr_0): 33 | for param_group in optimizer.param_groups: 34 | lr = lr_0 / np.sqrt(1 + lr_0 * param_group['weight_decay'] * t) 35 | param_group['lr'] = lr 36 | 37 | 38 | class Logger(object): 39 | def __init__(self, fpath=None): 40 | self.console = sys.stdout 41 | self.file = None 42 | if fpath is not None: 43 | self.file = open(fpath, 'w') 44 | 45 | def __del__(self): 46 | self.close() 47 | 48 | def __enter__(self): 49 | pass 50 | 51 | def __exit__(self, *args): 52 | self.close() 53 | 54 | def write(self, msg): 55 | self.console.write(msg) 56 | if self.file is not None: 57 | self.file.write(msg) 58 | 59 | def flush(self): 60 | self.console.flush() 61 | if self.file is not None: 62 | self.file.flush() 63 | os.fsync(self.file.fileno()) 64 | 65 | def close(self): 66 | self.console.close() 67 | if self.file is not None: 68 | self.file.close() -------------------------------------------------------------------------------- /svm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @Author: An Tao 5 | @Contact: ta19@mails.tsinghua.edu.cn 6 | @File: svm.py 7 | @Time: 2020/1/2 10:26 AM 8 | """ 9 | 10 | import os 11 | import h5py 12 | import numpy as np 13 | from glob import glob 14 | from sklearn.svm import LinearSVC 15 | 16 | 17 | class SVM(object): 18 | def __init__(self, feature_dir): 19 | self.feature_dir = feature_dir 20 | 21 | self.train_path = glob(os.path.join(self.feature_dir, 'train*.h5')) 22 | self.test_path = glob(os.path.join(self.feature_dir, 'test*.h5')) 23 | 24 | print("Loading feature dataset...") 25 | train_data = [] 26 | train_label = [] 27 | for path in self.train_path: 28 | f = h5py.File(path, 'r+') 29 | data = f['data'][:].astype('float32') 30 | label = f['label'][:].astype('int64') 31 | f.close() 32 | train_data.append(data) 33 | train_label.append(label) 34 | self.train_data = np.concatenate(train_data, axis=0) 35 | self.train_label = np.concatenate(train_label, axis=0) 36 | print("Training set size:", np.size(self.train_data, 0)) 37 | 38 | test_data = [] 39 | test_label = [] 40 | for path in self.test_path: 41 | f = h5py.File(path, 'r+') 42 | data = f['data'][:].astype('float32') 43 | label = f['label'][:].astype('int64') 44 | f.close() 45 | test_data.append(data) 46 | test_label.append(label) 47 | self.test_data = np.concatenate(test_data, axis=0) 48 | self.test_label = np.concatenate(test_label, axis=0) 49 | print("Testing set size:", np.size(self.test_data, 0)) 50 | 51 | def run(self): 52 | clf = LinearSVC(random_state=0) 53 | clf.fit(self.train_data, self.train_label) 54 | result = clf.predict(self.test_data) 55 | accuracy = np.sum(result==self.test_label).astype(float) / np.size(self.test_label) 56 | print("Transfer linear SVM accuracy: {:.2f}%".format(accuracy*100)) 57 | 58 | 59 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @Author: An Tao 5 | @Contact: ta19@mails.tsinghua.edu.cn 6 | @File: main.py 7 | @Time: 2020/1/2 10:26 AM 8 | """ 9 | 10 | import argparse 11 | 12 | from reconstruction import Reconstruction 13 | from classification import Classification 14 | from segmentation import Segmentation 15 | from inference import Inference 16 | from svm import SVM 17 | 18 | 19 | def get_parser(): 20 | parser = argparse.ArgumentParser(description='Unsupervised Point Cloud Feature Learning') 21 | parser.add_argument('--exp_name', type=str, default=None, metavar='N', 22 | help='Name of the experiment') 23 | parser.add_argument('--task', type=str, default='reconstruct', metavar='N', 24 | choices=['reconstruct', 'classify', 'segment'], 25 | help='Experiment task, [reconstruct, classify, segment]') 26 | parser.add_argument('--seg_no_class_label', action='store_true', 27 | help='Do not use class labels in segmentation') 28 | parser.add_argument('--loss', type=str, default='softmax', metavar='N', 29 | choices=['softmax', 'triplet'], 30 | help='Loss to use, [softmax, triplet]') 31 | parser.add_argument('--margin', type=float, default=None, 32 | help='Margin for triplet loss') 33 | parser.add_argument('--encoder', type=str, default='foldingnet', metavar='N', 34 | choices=['foldnet', 'dgcnn_cls', 'dgcnn_seg'], 35 | help='Encoder to use, [foldingnet, dgcnn_cls, dgcnn_seg]') 36 | parser.add_argument('--dropout', type=float, default=0.5, 37 | help='Dropout rate') 38 | parser.add_argument('--feat_dims', type=int, default=512, metavar='N', 39 | help='Number of dims for feature ') 40 | parser.add_argument('--k', type=int, default=None, metavar='N', 41 | help='Num of nearest neighbors to use for KNN') 42 | parser.add_argument('--shape', type=str, default='sphere', metavar='N', 43 | choices=['plane', 'sphere', 'gaussian'], 44 | help='Shape of points to input decoder, [plane, sphere, gaussian]') 45 | parser.add_argument('--dataset', type=str, default='shapenetcorev2', metavar='N', 46 | choices=['shapenetpart', 'modelnet40', 'modelnet10', 'shapenetpartpart'], 47 | help='Dataset to use, [shapenetpart, modelnet40, modelnet10, shapenetpartpart]') 48 | parser.add_argument('--class_choice', type=str, default=None, metavar='N', 49 | choices=['airplane', 'bag', 'cap', 'car', 'chair', 50 | 'earphone', 'guitar', 'knife', 'lamp', 'laptop', 51 | 'motorbike', 'mug', 'pistol', 'rocket', 'skateboard', 'table']) 52 | parser.add_argument('--no_scheduler', action='store_true', 53 | help='Do not use scheduler in training') 54 | parser.add_argument('--use_rotate', action='store_true', 55 | help='Rotate the pointcloud before training') 56 | parser.add_argument('--use_translate', action='store_true', 57 | help='Translate the pointcloud before training') 58 | parser.add_argument('--use_jitter', action='store_true', 59 | help='Jitter the pointcloud before training') 60 | parser.add_argument('--dataset_root', type=str, default='../dataset', help="Dataset root path") 61 | parser.add_argument('--gpu', type=str, help='Id of gpu device to be used', default='0') 62 | parser.add_argument('--batch_size', type=int, default=16, metavar='batch_size', 63 | help='Size of batch)') 64 | parser.add_argument('--workers', type=int, help='Number of data loading workers', default=16) 65 | parser.add_argument('--epochs', type=int, default=None, metavar='N', 66 | help='Number of episode to train ') 67 | parser.add_argument('--snapshot_interval', type=int, default=10, metavar='N', 68 | help='Save snapshot interval ') 69 | parser.add_argument('--no_cuda', action='store_true', 70 | help='Enables CUDA training') 71 | parser.add_argument('--eval', action='store_true', 72 | help='Evaluate the model') 73 | parser.add_argument('--num_points', type=int, default=2048, 74 | help='Num of points to use') 75 | parser.add_argument('--model_path', type=str, default='', metavar='N', 76 | help='Path to load model') 77 | args = parser.parse_args() 78 | return args 79 | 80 | 81 | if __name__ == '__main__': 82 | args = get_parser() 83 | if args.eval == False: 84 | if args.task == 'reconstruct': 85 | reconstruction = Reconstruction(args) 86 | reconstruction.run() 87 | elif args.task == 'classify': 88 | classification = Classification(args) 89 | classification.run() 90 | elif args.task == 'segment': 91 | segmentation = Segmentation(args) 92 | segmentation.run() 93 | else: 94 | inference = Inference(args) 95 | feature_dir = inference.run() 96 | svm = SVM(feature_dir) 97 | svm.run() 98 | -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @Author: An Tao 5 | @Contact: ta19@mails.tsinghua.edu.cn 6 | @File: dataset.py 7 | @Time: 2020/1/2 10:26 AM 8 | """ 9 | 10 | import os 11 | import torch 12 | import json 13 | import h5py 14 | from glob import glob 15 | import numpy as np 16 | import torch.utils.data as data 17 | 18 | 19 | shapenetpart_seg_num = [4, 2, 2, 4, 4, 3, 3, 2, 4, 2, 6, 2, 3, 3, 3, 3] 20 | shapenetpart_seg_start_index = [0, 4, 6, 8, 12, 16, 19, 22, 24, 28, 30, 36, 38, 41, 44, 47] 21 | 22 | 23 | def translate_pointcloud(pointcloud): 24 | xyz1 = np.random.uniform(low=2./3., high=3./2., size=[3]) 25 | xyz2 = np.random.uniform(low=-0.2, high=0.2, size=[3]) 26 | 27 | translated_pointcloud = np.add(np.multiply(pointcloud, xyz1), xyz2).astype('float32') 28 | return translated_pointcloud 29 | 30 | 31 | def jitter_pointcloud(pointcloud, sigma=0.01, clip=0.02): 32 | N, C = pointcloud.shape 33 | pointcloud += np.clip(sigma * np.random.randn(N, C), -1*clip, clip) 34 | return pointcloud 35 | 36 | 37 | def rotate_pointcloud(pointcloud): 38 | theta = np.pi*2 * np.random.choice(24) / 24 39 | rotation_matrix = np.array([[np.cos(theta), -np.sin(theta)],[np.sin(theta), np.cos(theta)]]) 40 | pointcloud[:,[0,2]] = pointcloud[:,[0,2]].dot(rotation_matrix) # random rotation (x,z) 41 | return pointcloud 42 | 43 | 44 | class Dataset(data.Dataset): 45 | def __init__(self, root, dataset_name='modelnet40', class_choice=None, 46 | num_points=2048, split='train', load_name=False, 47 | segmentation=False, random_rotate=False, random_jitter=False, 48 | random_translate=False): 49 | 50 | assert dataset_name.lower() in ['shapenetcorev2', 'shapenetpart', 51 | 'modelnet10', 'modelnet40', 'shapenetpartpart'] 52 | assert num_points <= 2048 53 | 54 | if dataset_name in ['shapenetcorev2', 'shapenetpart', 'shapenetpartpart']: 55 | assert split.lower() in ['train', 'test', 'val', 'trainval', 'all'] 56 | else: 57 | assert split.lower() in ['train', 'test', 'all'] 58 | 59 | if dataset_name not in ['shapenetcorev2', 'shapenetpart', 'shapenetpartpart'] and segmentation == True: 60 | raise AssertionError 61 | 62 | self.root = os.path.join(root, dataset_name + '_' + '*hdf5_2048') 63 | self.dataset_name = dataset_name 64 | self.class_choice = class_choice 65 | self.num_points = num_points 66 | self.split = split 67 | self.load_name = load_name 68 | if self.dataset_name == 'shapenetpartpart': 69 | self.segmentation = True 70 | else: 71 | self.segmentation = segmentation 72 | self.random_rotate = random_rotate 73 | self.random_jitter = random_jitter 74 | self.random_translate = random_translate 75 | 76 | self.path_h5py_all = [] 77 | self.path_name_all = [] 78 | self.path_file_all = [] 79 | 80 | if self.split in ['train','trainval','all']: 81 | self.get_path('train') 82 | if self.dataset_name in ['shapenetcorev2', 'shapenetpart', 'shapenetpartpart']: 83 | if self.split in ['val','trainval','all']: 84 | self.get_path('val') 85 | if self.split in ['test', 'all']: 86 | self.get_path('test') 87 | 88 | self.path_h5py_all.sort() 89 | data, label, seg = self.load_h5py(self.path_h5py_all) 90 | 91 | if self.load_name or self.class_choice != None: 92 | self.path_name_all.sort() 93 | self.name = self.load_json(self.path_name_all) # load label name 94 | 95 | self.data = np.concatenate(data, axis=0) 96 | self.label = np.concatenate(label, axis=0) 97 | if self.segmentation: 98 | self.seg = np.concatenate(seg, axis=0) 99 | 100 | if self.class_choice != None: 101 | indices = (self.name == class_choice).squeeze() 102 | self.data = self.data[indices] 103 | self.label = self.label[indices] 104 | if self.segmentation: 105 | self.seg = self.seg[indices] 106 | self.seg_num_all = shapenetpart_seg_num[id_choice] 107 | self.seg_start_index = shapenetpart_seg_start_index[id_choice] 108 | elif self.segmentation: 109 | self.seg_num_all = 50 110 | self.seg_start_index = 0 111 | 112 | def get_path(self, type): 113 | path_h5py = os.path.join(self.root, '*%s*.h5'%type) 114 | self.path_h5py_all += glob(path_h5py) 115 | if self.load_name: 116 | path_json = os.path.join(self.root, '%s*_id2name.json'%type) 117 | self.path_name_all += glob(path_json) 118 | return 119 | 120 | def load_h5py(self, path): 121 | all_data = [] 122 | all_label = [] 123 | all_seg = [] 124 | for h5_name in path: 125 | f = h5py.File(h5_name, 'r+') 126 | data = f['data'][:].astype('float32') 127 | label = f['label'][:].astype('int64') 128 | if self.segmentation: 129 | seg = f['seg'][:].astype('int64') 130 | f.close() 131 | all_data.append(data) 132 | all_label.append(label) 133 | if self.segmentation: 134 | all_seg.append(seg) 135 | return all_data, all_label, all_seg 136 | 137 | def load_json(self, path): 138 | all_data = [] 139 | for json_name in path: 140 | j = open(json_name, 'r+') 141 | data = json.load(j) 142 | all_data += data 143 | return all_data 144 | 145 | def __getitem__(self, item): 146 | point_set = self.data[item][:self.num_points] 147 | label = self.label[item] 148 | if self.load_name: 149 | name = self.name[item] # get label name 150 | 151 | if self.random_rotate: 152 | point_set = rotate_pointcloud(point_set) 153 | if self.random_jitter: 154 | point_set = jitter_pointcloud(point_set) 155 | if self.random_translate: 156 | point_set = translate_pointcloud(point_set) 157 | 158 | # convert numpy array to pytorch Tensor 159 | point_set = torch.from_numpy(point_set) 160 | label = torch.from_numpy(np.array([label]).astype(np.int64)) 161 | label = label.squeeze(0) 162 | 163 | if self.segmentation: 164 | seg = self.seg[item] 165 | seg = torch.from_numpy(seg) 166 | if self.dataset_name == 'shapenetpartpart': 167 | return point_set, seg.unsqueeze(1)[0] 168 | else: 169 | return point_set, label, seg 170 | else: 171 | return point_set, label 172 | 173 | def __len__(self): 174 | return self.data.shape[0] -------------------------------------------------------------------------------- /loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @Author: An Tao 5 | @Contact: ta19@mails.tsinghua.edu.cn 6 | @File: loss.py 7 | @Time: 2020/1/2 10:26 AM 8 | """ 9 | 10 | import copy 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | from torch.autograd import Variable 15 | 16 | 17 | def batch_pairwise_dist(x, y): 18 | bs, num_points_x, points_dim = x.size() 19 | _, num_points_y, _ = y.size() 20 | xx = torch.bmm(x, x.transpose(2, 1)) 21 | yy = torch.bmm(y, y.transpose(2, 1)) 22 | zz = torch.bmm(x, y.transpose(2, 1)) 23 | diag_ind_x = torch.arange(0, num_points_x) 24 | diag_ind_y = torch.arange(0, num_points_y) 25 | if x.get_device() != -1: 26 | diag_ind_x = diag_ind_x.cuda(x.get_device()) 27 | diag_ind_y = diag_ind_y.cuda(x.get_device()) 28 | rx = xx[:, diag_ind_x, diag_ind_x].unsqueeze(1).expand_as(zz.transpose(2, 1)) 29 | ry = yy[:, diag_ind_y, diag_ind_y].unsqueeze(1).expand_as(zz) 30 | P = (rx.transpose(2, 1) + ry - 2 * zz) 31 | return P 32 | 33 | 34 | class ChamferLoss(nn.Module): 35 | def __init__(self): 36 | super(ChamferLoss, self).__init__() 37 | self.use_cuda = torch.cuda.is_available() 38 | 39 | def forward(self, preds, gts): 40 | P = batch_pairwise_dist(gts, preds) 41 | mins, _ = torch.min(P, 1) 42 | loss_1 = torch.sum(mins) 43 | mins, _ = torch.min(P, 2) 44 | loss_2 = torch.sum(mins) 45 | return loss_1 + loss_2 46 | 47 | 48 | class CrossEntropyLoss(nn.Module): 49 | def __init__(self, smoothing=True): 50 | super(CrossEntropyLoss, self).__init__() 51 | self.smoothing = smoothing 52 | 53 | def forward(self, preds, gts): 54 | gts = gts.contiguous().view(-1) 55 | 56 | if self.smoothing: 57 | eps = 0.2 58 | n_class = preds.size(1) 59 | 60 | one_hot = torch.zeros_like(preds).scatter(1, gts.view(-1, 1), 1) 61 | one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1) 62 | log_prb = F.log_softmax(preds, dim=1) 63 | 64 | loss = -(one_hot * log_prb).sum(dim=1).mean() 65 | else: 66 | loss = F.cross_entropy(preds, gts, reduction='mean') 67 | 68 | return loss 69 | 70 | 71 | class TripletLoss(nn.Module): 72 | def __init__(self, margin=None, normalize_feature=True): 73 | super(TripletLoss, self).__init__() 74 | self.margin = margin 75 | self.normalize_feature = normalize_feature 76 | if self.margin is not None: 77 | self.ranking_loss = nn.MarginRankingLoss(margin=margin) 78 | else: 79 | self.ranking_loss = nn.SoftMarginLoss() 80 | 81 | def normalize(self, x, axis=-1): 82 | """Normalizing to unit length along the specified dimension. 83 | Args: 84 | x: pytorch Variable 85 | Returns: 86 | x: pytorch Variable, same shape as input 87 | """ 88 | x = 1. * x / (torch.norm(x, 2, axis, keepdim=True).expand_as(x) + 1e-12) 89 | return x 90 | 91 | def euclidean_dist(self, x, y): 92 | """ 93 | Args: 94 | x: pytorch Variable, with shape [m, d] 95 | y: pytorch Variable, with shape [n, d] 96 | Returns: 97 | dist: pytorch Variable, with shape [m, n] 98 | """ 99 | m, n = x.size(0), y.size(0) 100 | xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n) 101 | yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t() 102 | dist = xx + yy 103 | dist.addmm_(1, -2, x, y.t()) 104 | dist = dist.clamp(min=1e-12).sqrt() # for numerical stability 105 | return dist 106 | 107 | def hard_example_mining(self, dist_mat, labels, return_inds=False): 108 | """For each anchor, find the hardest positive and negative sample. 109 | Args: 110 | dist_mat: pytorch Variable, pair wise distance between samples, shape [B, N, N] 111 | labels: pytorch LongTensor, with shape [B, N] 112 | return_inds: whether to return the indices. Save time if `False`(?) 113 | Returns: 114 | dist_ap: pytorch Variable, distance(anchor, positive); shape [N] 115 | dist_an: pytorch Variable, distance(anchor, negative); shape [N] 116 | p_inds: pytorch LongTensor, with shape [N]; 117 | indices of selected hard positive samples; 0 <= p_inds[i] <= N - 1 118 | n_inds: pytorch LongTensor, with shape [N]; 119 | indices of selected hard negative samples; 0 <= n_inds[i] <= N - 1 120 | NOTE: Only consider the case in which all labels have same num of samples, 121 | thus we can cope with all anchors in parallel. 122 | """ 123 | 124 | assert len(dist_mat.size()) == 3 125 | assert dist_mat.size(1) == dist_mat.size(2) 126 | B = dist_mat.size(0) 127 | N = dist_mat.size(1) 128 | 129 | # shape [B, N, N] 130 | is_pos = labels.unsqueeze(2).expand(B, N, N).eq(labels.unsqueeze(2).expand(B, N, N).transpose(2,1)) 131 | is_neg = labels.unsqueeze(2).expand(B, N, N).ne(labels.unsqueeze(2).expand(B, N, N).transpose(2,1)) 132 | 133 | # `dist_ap` means distance(anchor, positive) 134 | # both `dist_ap` and `relative_p_inds` with shape [B, N, 1] 135 | dist_mat_pos = torch.zeros(dist_mat.size(), dtype=torch.float32, device=dist_mat.device) 136 | dist_mat_pos[is_pos] = dist_mat[is_pos] 137 | dist_ap, relative_p_inds = torch.max(dist_mat_pos, 2, keepdim=True) 138 | # `dist_an` means distance(anchor, negative) 139 | # both `dist_an` and `relative_n_inds` with shape [B, N, 1] 140 | dist_mat_neg = torch.zeros(dist_mat.size(), dtype=torch.float32, device=dist_mat.device).fill_(1000) 141 | dist_mat_neg[is_neg] = dist_mat[is_neg] 142 | dist_an, relative_n_inds = torch.min(dist_mat_neg, 2, keepdim=True) 143 | 144 | # shape [B, N] 145 | dist_ap = dist_ap.squeeze(2) 146 | dist_an = dist_an.squeeze(2) 147 | 148 | return dist_ap, dist_an 149 | 150 | def forward(self, preds, gts, new_device): 151 | """ 152 | Args: 153 | preds: pytorch Variable, shape [B, N, C] 154 | gts: pytorch LongTensor, with shape [B, N] 155 | Returns: 156 | loss: pytorch Variable, with shape [1] 157 | p_inds: pytorch LongTensor, with shape [N]; 158 | indices of selected hard positive samples; 0 <= p_inds[i] <= N - 1 159 | n_inds: pytorch LongTensor, with shape [N]; 160 | indices of selected hard negative samples; 0 <= n_inds[i] <= N - 1 161 | ================== 162 | For Debugging, etc 163 | ================== 164 | dist_ap: pytorch Variable, distance(anchor, positive); shape [N] 165 | dist_an: pytorch Variable, distance(anchor, negative); shape [N] 166 | dist_mat: pytorch Variable, pairwise euclidean distance; shape [N, N] 167 | """ 168 | if self.normalize_feature: 169 | preds = self.normalize(preds, axis=-1) 170 | preds = preds.cuda(new_device) 171 | gts = gts.cuda(new_device) 172 | # shape [B, N, N] 173 | dist_mat = batch_pairwise_dist(preds, preds) 174 | dist_mat = dist_mat.clamp(min=1e-12).sqrt() # for numerical stability 175 | dist_ap, dist_an = self.hard_example_mining(dist_mat, gts) 176 | y = Variable(dist_an.data.new().resize_as_(dist_an.data).fill_(1)) 177 | if self.margin is not None: 178 | loss = self.ranking_loss(dist_an, dist_ap, y) 179 | else: 180 | loss = self.ranking_loss(dist_an - dist_ap, y) 181 | return loss, dist_ap, dist_an 182 | 183 | -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @Author: An Tao 5 | @Contact: ta19@mails.tsinghua.edu.cn 6 | @File: inference.py 7 | @Time: 2020/1/2 10:26 AM 8 | """ 9 | 10 | import os 11 | import sys 12 | import time 13 | import shutil 14 | import torch 15 | import numpy as np 16 | import h5py 17 | 18 | from tensorboardX import SummaryWriter 19 | 20 | from model import ReconstructionNet, ClassificationNet, SegmentationNet 21 | from dataset import Dataset 22 | from utils import Logger 23 | 24 | 25 | class Inference(object): 26 | def __init__(self, args): 27 | self.batch_size = args.batch_size 28 | self.no_cuda = args.no_cuda 29 | self.task = args.task 30 | 31 | # create exp directory 32 | file = [f for f in args.model_path.split('/')] 33 | if args.exp_name != None: 34 | self.experiment_id = args.exp_name 35 | else: 36 | self.experiment_id = time.strftime('%m%d%H%M%S') 37 | cache_root = 'cache/%s' % self.experiment_id 38 | os.makedirs(cache_root, exist_ok=True) 39 | self.feature_dir = os.path.join(cache_root, 'features/') 40 | sys.stdout = Logger(os.path.join(cache_root, 'log.txt')) 41 | 42 | # check directory 43 | if not os.path.exists(self.feature_dir): 44 | os.makedirs(self.feature_dir) 45 | else: 46 | shutil.rmtree(self.feature_dir) 47 | os.makedirs(self.feature_dir) 48 | 49 | # print args 50 | print(str(args)) 51 | 52 | # get gpu id 53 | gids = ''.join(args.gpu.split()) 54 | self.gpu_ids = [int(gid) for gid in gids.split(',')] 55 | self.first_gpu = self.gpu_ids[0] 56 | 57 | # generate dataset 58 | self.infer_dataset_train = Dataset( 59 | root=args.dataset_root, 60 | dataset_name=args.dataset, 61 | split='train', 62 | num_points=args.num_points, 63 | ) 64 | self.infer_dataset_test = Dataset( 65 | root=args.dataset_root, 66 | dataset_name=args.dataset, 67 | split='test', 68 | num_points=args.num_points, 69 | ) 70 | self.infer_loader_train = torch.utils.data.DataLoader( 71 | self.infer_dataset_train, 72 | batch_size=args.batch_size, 73 | shuffle=False, 74 | num_workers=args.workers 75 | ) 76 | self.infer_loader_test = torch.utils.data.DataLoader( 77 | self.infer_dataset_test, 78 | batch_size=args.batch_size, 79 | shuffle=False, 80 | num_workers=args.workers 81 | ) 82 | print("Inference set size (train):", self.infer_loader_train.dataset.__len__()) 83 | print("Inference set size (test):", self.infer_loader_test.dataset.__len__()) 84 | 85 | # initialize model 86 | if args.task == "reconstruct": 87 | self.model = ReconstructionNet(args) 88 | elif args.task == "classify": 89 | self.model = ClassificationNet(args) 90 | elif args.task == "segment": 91 | self.model = SegmentationNet(args) 92 | if args.model_path != '': 93 | self._load_pretrain(args.model_path) 94 | 95 | # load model to gpu 96 | if not args.no_cuda: 97 | if len(self.gpu_ids) != 1: # multiple gpus 98 | self.model = torch.nn.DataParallel(self.model.cuda(self.first_gpu), self.gpu_ids) 99 | else: 100 | self.model = self.model.cuda(self.gpu_ids[0]) 101 | 102 | def run(self): 103 | self.model.eval() 104 | 105 | # generate train set for SVM 106 | loss_buf = [] 107 | feature_train = [] 108 | lbs_train = [] 109 | n = 0 110 | for iter, (pts, lbs) in enumerate(self.infer_loader_train): 111 | if not self.no_cuda: 112 | pts = pts.cuda(self.first_gpu) 113 | lbs = lbs.cuda(self.first_gpu) 114 | if self.task == "reconstruct": 115 | output, feature = self.model(pts) 116 | elif self.task in ["classify", "segment"]: 117 | feature = self.model(pts) 118 | feature_train.append(feature.detach().cpu().numpy().squeeze(1)) 119 | lbs_train.append(lbs.cpu().numpy().squeeze(1)) 120 | if ((iter+1) * self.batch_size % 2048) == 0 \ 121 | or (iter+1) == len(self.infer_loader_train): 122 | feature_train = np.concatenate(feature_train, axis=0) 123 | lbs_train = np.concatenate(lbs_train, axis=0) 124 | f = h5py.File(os.path.join(self.feature_dir, 'train' + str(n) + '.h5'),'w') 125 | f['data'] = feature_train 126 | f['label'] = lbs_train 127 | f.close() 128 | print("Train set {} for SVM saved.".format(n)) 129 | feature_train = [] 130 | lbs_train = [] 131 | n += 1 132 | if self.task == "reconstruct": 133 | if len(self.gpu_ids) != 1: # multiple gpus 134 | loss = self.model.module.get_loss(pts, output) 135 | else: 136 | loss = self.model.get_loss(pts, output) 137 | loss_buf.append(loss.detach().cpu().numpy()) 138 | if self.task == "reconstruct": 139 | print(f'Avg loss {np.mean(loss_buf)}') 140 | print("Finish generating train set for SVM.") 141 | 142 | # generate test set for SVM 143 | loss_buf = [] 144 | feature_test = [] 145 | lbs_test = [] 146 | n = 0 147 | for iter, (pts, lbs) in enumerate(self.infer_loader_test): 148 | if not self.no_cuda: 149 | pts = pts.cuda(self.first_gpu) 150 | lbs = lbs.cuda(self.first_gpu) 151 | if self.task == "reconstruct": 152 | output, feature = self.model(pts) 153 | elif self.task in ["classify", "segment"]: 154 | feature = self.model(pts) 155 | feature_test.append(feature.detach().cpu().numpy().squeeze(1)) 156 | lbs_test.append(lbs.cpu().numpy().squeeze(1)) 157 | if ((iter+1) * self.batch_size % 2048) == 0 \ 158 | or (iter+1) == len(self.infer_loader_test): 159 | feature_test = np.concatenate(feature_test, axis=0) 160 | lbs_test = np.concatenate(lbs_test, axis=0) 161 | f = h5py.File(os.path.join(self.feature_dir, 'test' + str(n) + '.h5'),'w') 162 | f['data'] = feature_test 163 | f['label'] = lbs_test 164 | f.close() 165 | print("Test set {} for SVM saved.".format(n)) 166 | feature_test = [] 167 | lbs_test = [] 168 | n += 1 169 | if self.task == "reconstruct": 170 | if len(self.gpu_ids) != 1: # multiple gpus 171 | loss = self.model.module.get_loss(pts, output) 172 | else: 173 | loss = self.model.get_loss(pts, output) 174 | loss_buf.append(loss.detach().cpu().numpy()) 175 | if self.task == "reconstruct": 176 | print(f'Avg loss {np.mean(loss_buf)}') 177 | print("Finish generating test set for SVM.") 178 | 179 | return self.feature_dir 180 | 181 | 182 | def _load_pretrain(self, pretrain): 183 | state_dict = torch.load(pretrain, map_location='cpu') 184 | from collections import OrderedDict 185 | new_state_dict = OrderedDict() 186 | for key, val in state_dict.items(): 187 | if key[:6] == 'module': 188 | name = key[7:] # remove 'module.' 189 | else: 190 | name = key 191 | if key[:10] == 'classifier': 192 | continue 193 | if key[:9] == 'segmenter': 194 | continue 195 | new_state_dict[name] = val 196 | self.model.load_state_dict(new_state_dict) 197 | print(f"Load model from {pretrain}") 198 | -------------------------------------------------------------------------------- /reconstruction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @Author: An Tao 5 | @Contact: ta19@mails.tsinghua.edu.cn 6 | @File: reconstruction.py 7 | @Time: 2020/1/2 10:26 AM 8 | """ 9 | 10 | import os 11 | import sys 12 | import time 13 | import shutil 14 | import torch 15 | import torch.optim as optim 16 | import numpy as np 17 | 18 | from tensorboardX import SummaryWriter 19 | 20 | from model import ReconstructionNet 21 | from dataset import Dataset 22 | from utils import Logger 23 | 24 | 25 | class Reconstruction(object): 26 | def __init__(self, args): 27 | self.dataset_name = args.dataset 28 | if args.epochs != None: 29 | self.epochs = args.epochs 30 | elif args.encoder == 'foldnet': 31 | self.epochs = 278 32 | elif args.encoder == 'dgcnn_cls': 33 | self.epochs = 250 34 | elif args.encoder == 'dgcnn_seg': 35 | self.epochs = 290 36 | self.batch_size = args.batch_size 37 | self.snapshot_interval = args.snapshot_interval 38 | self.no_cuda = args.no_cuda 39 | self.model_path = args.model_path 40 | 41 | # create exp directory 42 | file = [f for f in args.model_path.split('/')] 43 | if args.exp_name != None: 44 | self.experiment_id = "Reconstruct_" + args.exp_name 45 | elif file[-2] == 'models': 46 | self.experiment_id = file[-3] 47 | else: 48 | self.experiment_id = "Reconstruct" + time.strftime('%m%d%H%M%S') 49 | snapshot_root = 'snapshot/%s' % self.experiment_id 50 | tensorboard_root = 'tensorboard/%s' % self.experiment_id 51 | self.save_dir = os.path.join(snapshot_root, 'models/') 52 | self.tboard_dir = tensorboard_root 53 | 54 | # check arguments 55 | if self.model_path == '': 56 | if not os.path.exists(self.save_dir): 57 | os.makedirs(self.save_dir) 58 | else: 59 | choose = input("Remove " + self.save_dir + " ? (y/n)") 60 | if choose == "y": 61 | shutil.rmtree(self.save_dir) 62 | os.makedirs(self.save_dir) 63 | else: 64 | sys.exit(0) 65 | if not os.path.exists(self.tboard_dir): 66 | os.makedirs(self.tboard_dir) 67 | else: 68 | shutil.rmtree(self.tboard_dir) 69 | os.makedirs(self.tboard_dir) 70 | sys.stdout = Logger(os.path.join(snapshot_root, 'log.txt')) 71 | self.writer = SummaryWriter(log_dir=self.tboard_dir) 72 | 73 | # print args 74 | print(str(args)) 75 | 76 | # get gpu id 77 | gids = ''.join(args.gpu.split()) 78 | self.gpu_ids = [int(gid) for gid in gids.split(',')] 79 | self.first_gpu = self.gpu_ids[0] 80 | 81 | # generate dataset 82 | self.train_dataset = Dataset( 83 | root=args.dataset_root, 84 | dataset_name=args.dataset, 85 | split='all', 86 | num_points=args.num_points, 87 | random_translate=args.use_translate, 88 | random_rotate=True, 89 | random_jitter=args.use_jitter 90 | ) 91 | self.train_loader = torch.utils.data.DataLoader( 92 | self.train_dataset, 93 | batch_size=args.batch_size, 94 | shuffle=True, 95 | num_workers=args.workers 96 | ) 97 | print("Training set size:", self.train_loader.dataset.__len__()) 98 | 99 | # initialize model 100 | self.model = ReconstructionNet(args) 101 | if self.model_path != '': 102 | self._load_pretrain(args.model_path) 103 | 104 | # load model to gpu 105 | if not self.no_cuda: 106 | if len(self.gpu_ids) != 1: # multiple gpus 107 | self.model = torch.nn.DataParallel(self.model.cuda(self.first_gpu), self.gpu_ids) 108 | else: 109 | self.model = self.model.cuda(self.gpu_ids[0]) 110 | 111 | # initialize optimizer 112 | self.parameter = self.model.parameters() 113 | self.optimizer = optim.Adam(self.parameter, lr=0.0001*16/args.batch_size, betas=(0.9, 0.999), weight_decay=1e-6) 114 | 115 | 116 | def run(self): 117 | self.train_hist = { 118 | 'loss': [], 119 | 'per_epoch_time': [], 120 | 'total_time': [] 121 | } 122 | best_loss = 1000000000 123 | print('Training start!!') 124 | start_time = time.time() 125 | self.model.train() 126 | if self.model_path != '': 127 | start_epoch = self.model_path[-7:-4] 128 | if start_epoch[0] == '_': 129 | start_epoch = start_epoch[1:] 130 | start_epoch = int(start_epoch) 131 | else: 132 | start_epoch = 0 133 | for epoch in range(start_epoch, self.epochs): 134 | loss = self.train_epoch(epoch) 135 | 136 | # save snapeshot 137 | if (epoch + 1) % self.snapshot_interval == 0: 138 | self._snapshot(epoch + 1) 139 | if loss < best_loss: 140 | best_loss = loss 141 | self._snapshot('best') 142 | 143 | # save tensorboard 144 | if self.writer: 145 | self.writer.add_scalar('Train Loss', self.train_hist['loss'][-1], epoch) 146 | self.writer.add_scalar('Learning Rate', self._get_lr(), epoch) 147 | 148 | # finish all epoch 149 | self._snapshot(epoch + 1) 150 | if loss < best_loss: 151 | best_loss = loss 152 | self._snapshot('best') 153 | self.train_hist['total_time'].append(time.time() - start_time) 154 | print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), 155 | self.epochs, self.train_hist['total_time'][0])) 156 | print("Training finish!... save training results") 157 | 158 | 159 | def train_epoch(self, epoch): 160 | epoch_start_time = time.time() 161 | loss_buf = [] 162 | num_batch = int(len(self.train_loader.dataset) / self.batch_size) 163 | for iter, (pts, _) in enumerate(self.train_loader): 164 | if pts.size(0) == 1: 165 | continue 166 | 167 | if not self.no_cuda: 168 | pts = pts.cuda(self.first_gpu) 169 | 170 | # forward 171 | self.optimizer.zero_grad() 172 | output, _ = self.model(pts) 173 | 174 | # loss 175 | if len(self.gpu_ids) != 1: # multiple gpus 176 | loss = self.model.module.get_loss(pts, output) 177 | else: 178 | loss = self.model.get_loss(pts, output) 179 | 180 | # backward 181 | loss.backward() 182 | self.optimizer.step() 183 | loss_buf.append(loss.detach().cpu().numpy()) 184 | 185 | # finish one epoch 186 | epoch_time = time.time() - epoch_start_time 187 | self.train_hist['per_epoch_time'].append(epoch_time) 188 | self.train_hist['loss'].append(np.mean(loss_buf)) 189 | print(f'Epoch {epoch+1}: Loss {np.mean(loss_buf)}, time {epoch_time:.4f}s') 190 | return np.mean(loss_buf) 191 | 192 | 193 | def _snapshot(self, epoch): 194 | state_dict = self.model.state_dict() 195 | from collections import OrderedDict 196 | new_state_dict = OrderedDict() 197 | for key, val in state_dict.items(): 198 | if key[:6] == 'module': 199 | name = key[7:] # remove 'module.' 200 | else: 201 | name = key 202 | new_state_dict[name] = val 203 | save_dir = os.path.join(self.save_dir, self.dataset_name) 204 | torch.save(new_state_dict, save_dir + "_" + str(epoch) + '.pkl') 205 | print(f"Save model to {save_dir}_{str(epoch)}.pkl") 206 | 207 | 208 | def _load_pretrain(self, pretrain): 209 | state_dict = torch.load(pretrain, map_location='cpu') 210 | from collections import OrderedDict 211 | new_state_dict = OrderedDict() 212 | for key, val in state_dict.items(): 213 | if key[:6] == 'module': 214 | name = key[7:] # remove 'module.' 215 | else: 216 | name = key 217 | new_state_dict[name] = val 218 | self.model.load_state_dict(new_state_dict) 219 | print(f"Load model from {pretrain}") 220 | 221 | 222 | def _get_lr(self, group=0): 223 | return self.optimizer.param_groups[group]['lr'] 224 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Point Cloud Segmentation for Classific Feature Learning 2 | ## Introduction 3 | This work aims to show whether learning a point cloud segmentation task is able to extract features performing well in classification. We do all experiments under the framework of DGCNN. 4 | 5 | Details for DGCNN see **Dynamic Graph CNN for Learning on Point Clouds** (https://arxiv.xilesou.top/pdf/1801.07829). We provide a PyTorch reimplementation for DGCNN in [antao97/dgcnn.pytorch](https://github.com/antao97/dgcnn.pytorch). 6 | 7 | DGCNN provides two type of networks, one for classification and one for segmentation. We use "DGCNN_Cls" to denote network for classification and "DGCNN_Seg" for segmentation. The network sturcture of DGCNN is 8 |

9 | 10 |

11 | 12 | We also do experiments to see whether learning segmentation on meaningful point clouds assembled by some base point clouds can help to learning better features for base point clouds. To run the experments, we first seperate segmentation parts of each shape from ShapeNetPart dataset as new shapes and make this new dataset as ShapeNetPart Part dataset. Then, we train network on ShapeNetPart dataset as normal and test it on ShapeNetPart Part dataset. 13 | 14 | Some visualized point clouds in our ShapeNetPart Part dataset: 15 |

16 | 17 | 18 | 19 |

20 |       chair           skateboard           airplane 21 | 22 | Experimental results show that learning point cloud segmentation does help to extract features suitable for classification. 23 | 24 | **The key contributions of this work are as follows:** 25 | 26 | - Since the network provided by DGCNN for segmentation is supervised, we provide an revised DGCNN segmentation network with no category label. 27 | - When segmentation is trained and tested on intact point clouds, the trained model can help to extract better features. 28 | - When segmentation is trained on intact point clouds and tested on base point clouds, the trained model also can help to extract better features. 29 | 30 | If you find this work useful, please cite: 31 | ``` 32 | @article{tao2020, 33 | Author = {An Tao}, 34 | Title = {Point Cloud Segmentation for Classific Feature Learning}, 35 | Journal = {https://github.com/antao97/PointCloudSegmentation}, 36 | Year = {2020} 37 | } 38 | ``` 39 | 40 |   41 | ## Requirements 42 | - Python 3.7 43 | - PyTorch 1.2 44 | - CUDA 10.0 45 | - Package: glob, h5py, tensorflow, tensorboard, tensorboardX and sklearn 46 | 47 |   48 | ## Download datasets 49 | Download the HDF5 format datasets (where each shape is sampled 2,048 points uniformly): 50 | 51 | - ShapeNetPart (338M) [[TsinghuaCloud]](https://cloud.tsinghua.edu.cn/f/c25d94e163454196a26b/) [[BaiduDisk]](https://pan.baidu.com/s/1yi4bMVBE2mV8NqVRtNLoqw) 52 | - ShapeNetPart Part (450M) [[TsinghuaCloud]](https://cloud.tsinghua.edu.cn/f/b6ee6e3b345744889e6b/) [[BaiduDisk]](https://pan.baidu.com/s/1uCotFveZ5R5ztRPIm1hfJQ) 53 | - ModelNet40 (194M) [[TsinghuaCloud]](https://cloud.tsinghua.edu.cn/f/b3d9fe3e2a514def8097/) [[BaiduDisk]](https://pan.baidu.com/s/1NQZgN8tvHVqQntxefcdVAg) 54 | 55 | You can find more details about the above datasets in this [repo](https://github.com/antao97/PointCloudDatasets). 56 | 57 |   58 | ## Experiment settings 59 | To evaluate the quality of extracted features, we use ShapeNetPart dataset to both train DGCNN and a linear SVM classifier. Specifically, we train the linear SVM classifier on ShapeNetPart dataset using the features (latent representations) obtained from the trained feature encoder. 60 | 61 | For transfer performance, we train the linear SVM classifier on ModelNet 40 dataset using the features (latent representations) obtained from the same network trained from the ShapeNetPart dataset. 62 | 63 | In this work we compare the performance for adopted training task among supervised segmentation, supervised segmentation without category label, supervised classification and unsupervised reconstruction. For supervised segmentation without category label, we discard the adding of categorical vector (mlp {64}) and directly repeat the 1024 dim feature into n x 1024. We do unsupervised reconstruction following the framework in this [repo](https://github.com/antao97/UnsupervisedPointCloudReconstruction) and use source points from sphere surface for decoder. We also change feature dimension into 1024. Except unsupervised reconstruction, we do all experiments under the framework of DGCNN. 64 | 65 | To train the network, run 66 | ``` 67 | python main.py --exp_name --task --dataset_root --encoder --k <20 | 40> --feat_dims 1024 --batch_size <16 | 32> --dataset shapenetpart --gpu 68 | ``` 69 | Use `--seg_no_label` if you want to run segmentation task without category label. 70 | 71 | You can download our already trained models from [[TsinghuaCloud]](https://cloud.tsinghua.edu.cn/d/d9e7a899582d432cbc11/) or [[BaiduDisk]](https://pan.baidu.com/s/1UigbY4jNts8LMZ6fqJXvxQ) and place them under `snapshot/`. 72 | 73 | Because this work is done before our PyTorch reimplementation for DGCNN, the training setting in this repo is slightly different with [antao97/dgcnn.pytorch](https://github.com/antao97/dgcnn.pytorch). You can set `self.epochs = 200` and `random_translate=False` in line 61 and 120 in `segmentation.py` to follow the settings in [antao97/dgcnn.pytorch](https://github.com/antao97/dgcnn.pytorch). 74 | 75 | Besides ShapeNetPart dataset, we also test the performace of linear SVM classifier on ShapeNetPart Part dataset, using the model trained on ShapeNetPart dataset with segmentation task. 76 | 77 | To evaluate the performance of a given trained model, run 78 | ``` 79 | python main.py --eval --model_path --task --dataset_root --encoder --k <20 | 40> --feat_dims 1024 --dataset --gpu 80 | ``` 81 | 82 | Use `--no_cuda` if you want to run in CPU. 83 | 84 | To use Tensorboard, run 85 | ``` 86 | tensorboard --logdir tensorboard --bind_all 87 | ``` 88 | You can find the Tensorboard records under `tensorboard/`. 89 | 90 |   91 | ## Classification accuracy of linear SVM classifier 92 | ### Results with best settings 93 | | Task | Info | Encoder | K | Batch Size | Epochs | ShapeNetPart | ModelNet40 | 94 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | 95 | | Segmentation | Supervised | DGCNN_Seg | 40 | 32 | 250 | 98.9% | 89.4% | 96 | | Segmentation | Supervised (no category label) | DGCNN_Seg | 40 | 32 | 250 | **99.9%** | 89.2% | 97 | | Classification | Supervised | DGCNN_Cls | 40 | 32 | 250 | 99.8% | 89.6% | 98 | | Reconstruction | Unsupervised | DGCNN_Cls | 20 | 16 | 250 | 98.7% | **89.8%** | 99 | 100 |   101 |

102 | 103 | 104 |

105 | 106 |   107 | ### Results with settings for segmentation task 108 | | Task | Info | Encoder | K | Batch Size | Epochs | ShapeNetPart | ModelNet40 | 109 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | 110 | | Segmentation | Supervised | DGCNN_Seg | 40 | 32 | 250 | 98.9% | **89.4%** | 111 | | Segmentation | Supervised (no category label) | DGCNN_Seg | 40 | 32 | 250 | **99.9%** | 89.2% | 112 | | Classification | Supervised | DGCNN_Seg | 40 | 32 | 250 | **99.9%** | 86.8% | 113 | | Reconstruction | Unsupervised | DGCNN_Seg | 40 | 32 | 290 | 98.8% | 89.2% | 114 | 115 |   116 |

117 | 118 | 119 |

120 | 121 |   122 | ### Results evaluated on ShapeNetPart Part dataset 123 | | Task | Info | Encoder | K | Batch Size | Epochs | Training Dataset | Eval Acc 124 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | 125 | | Segmentation | Supervised | DGCNN_Seg | 40 | 32 | 250 | ShapeNetPart | 85.0% | 126 | | Segmentation | Supervised (no category label) | DGCNN_Seg | 40 | 32 | 250 | ShapeNetPart | 84.0% | 127 | | Classification | Supervised | DGCNN_Cls | 40 | 32 | 250 | ShapeNetPart Part | **99.0%** | 4 128 | | Reconstruction | Unsupervised | DGCNN_Cls | 20 | 16 | 250 | ShapeNetPart Part | 87.5% | 129 | 130 |   131 |

132 | 133 |

134 | 135 |   136 | ## Performance analysis 137 | Experimental results show that learning point cloud segmentation does help to extract features suitable for classification. However, simply adopting the training scheme from DGCNN for segmentation task is not suitable for transfer learning. We believe better results will get if using better training scheme. 138 | 139 | -------------------------------------------------------------------------------- /classification.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @Author: An Tao 5 | @Contact: ta19@mails.tsinghua.edu.cn 6 | @File: reconstruction.py 7 | @Time: 2020/1/2 10:26 AM 8 | """ 9 | 10 | import os 11 | import sys 12 | import time 13 | import shutil 14 | import numpy as np 15 | import torch 16 | import torch.optim as optim 17 | from torch.optim.lr_scheduler import CosineAnnealingLR 18 | import sklearn.metrics as metrics 19 | 20 | from tensorboardX import SummaryWriter 21 | 22 | from model import ClassificationNet 23 | from dataset import Dataset 24 | from utils import Logger 25 | 26 | 27 | class Classification(object): 28 | def __init__(self, args): 29 | self.dataset_name = args.dataset 30 | if args.epochs != None: 31 | self.epochs = args.epochs 32 | else: 33 | self.epochs = 250 34 | self.batch_size = args.batch_size 35 | self.snapshot_interval = args.snapshot_interval 36 | self.no_cuda = args.no_cuda 37 | self.model_path = args.model_path 38 | self.no_scheduler = args.no_scheduler 39 | 40 | # create exp directory 41 | file = [f for f in args.model_path.split('/')] 42 | if args.exp_name != None: 43 | self.experiment_id = "Classify_" + args.exp_name 44 | elif file[-2] == 'models': 45 | self.experiment_id = file[-3] 46 | else: 47 | self.experiment_id = "Classify" + time.strftime('%m%d%H%M%S') 48 | snapshot_root = 'snapshot/%s' % self.experiment_id 49 | tensorboard_root = 'tensorboard/%s' % self.experiment_id 50 | self.save_dir = os.path.join(snapshot_root, 'models/') 51 | self.tboard_dir = tensorboard_root 52 | 53 | # check arguments 54 | if self.model_path == '': 55 | if not os.path.exists(self.save_dir): 56 | os.makedirs(self.save_dir) 57 | else: 58 | choose = input("Remove " + self.save_dir + " ? (y/n)") 59 | if choose == "y": 60 | shutil.rmtree(self.save_dir) 61 | os.makedirs(self.save_dir) 62 | else: 63 | sys.exit(0) 64 | if not os.path.exists(self.tboard_dir): 65 | os.makedirs(self.tboard_dir) 66 | else: 67 | shutil.rmtree(self.tboard_dir) 68 | os.makedirs(self.tboard_dir) 69 | sys.stdout = Logger(os.path.join(snapshot_root, 'log.txt')) 70 | self.writer = SummaryWriter(log_dir=self.tboard_dir) 71 | 72 | # print args 73 | print(str(args)) 74 | 75 | # get gpu id 76 | gids = ''.join(args.gpu.split()) 77 | self.gpu_ids = [int(gid) for gid in gids.split(',')] 78 | self.first_gpu = self.gpu_ids[0] 79 | 80 | # generate dataset 81 | self.train_dataset = Dataset( 82 | root=args.dataset_root, 83 | dataset_name=args.dataset, 84 | split='all', 85 | num_points=args.num_points, 86 | random_translate=True, 87 | random_rotate=args.use_rotate, 88 | random_jitter=args.use_jitter 89 | ) 90 | self.train_loader = torch.utils.data.DataLoader( 91 | self.train_dataset, 92 | batch_size=args.batch_size, 93 | shuffle=True, 94 | num_workers=args.workers 95 | ) 96 | print("Training set size:", self.train_loader.dataset.__len__()) 97 | 98 | # initialize model 99 | self.model = ClassificationNet(args) 100 | if self.model_path != '': 101 | self._load_pretrain(args.model_path) 102 | 103 | # load model to gpu 104 | if not self.no_cuda: 105 | if len(self.gpu_ids) != 1: # multiple gpus 106 | self.model = torch.nn.DataParallel(self.model.cuda(self.first_gpu), self.gpu_ids) 107 | else: 108 | self.model = self.model.cuda(self.gpu_ids[0]) 109 | 110 | # initialize optimizer 111 | self.parameter = self.model.parameters() 112 | if self.no_scheduler == False: 113 | self.optimizer = optim.SGD(self.parameter, lr=0.1, weight_decay=1e-4) 114 | self.scheduler = CosineAnnealingLR(self.optimizer, self.epochs, eta_min=1e-3) 115 | else: 116 | self.optimizer = optim.SGD(self.parameter, lr=0.01, weight_decay=1e-4) 117 | 118 | 119 | def run(self): 120 | self.train_hist = { 121 | 'loss': [], 122 | 'per_epoch_time': [], 123 | 'total_time': [] 124 | } 125 | best_loss = 1000000000 126 | print('Training start!!') 127 | start_time = time.time() 128 | self.model.train() 129 | if self.model_path != '': 130 | start_epoch = self.model_path[-7:-4] 131 | if start_epoch[0] == '_': 132 | start_epoch = start_epoch[1:] 133 | start_epoch = int(start_epoch) 134 | else: 135 | start_epoch = 0 136 | for epoch in range(start_epoch, self.epochs): 137 | loss = self.train_epoch(epoch) 138 | 139 | # save snapeshot 140 | if (epoch + 1) % self.snapshot_interval == 0: 141 | self._snapshot(epoch + 1) 142 | if loss < best_loss: 143 | best_loss = loss 144 | self._snapshot('best') 145 | 146 | # save tensorboard 147 | if self.writer: 148 | self.writer.add_scalar('Train Loss', self.train_hist['loss'][-1], epoch) 149 | self.writer.add_scalar('Learning Rate', self._get_lr(), epoch) 150 | 151 | # finish all epoch 152 | self._snapshot(epoch + 1) 153 | if loss < best_loss: 154 | best_loss = loss 155 | self._snapshot('best') 156 | self.train_hist['total_time'].append(time.time() - start_time) 157 | print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), 158 | self.epochs, self.train_hist['total_time'][0])) 159 | print("Training finish!... save training results") 160 | 161 | 162 | def train_epoch(self, epoch): 163 | epoch_start_time = time.time() 164 | loss_buf = [] 165 | train_pred = [] 166 | train_true = [] 167 | num_batch = int(len(self.train_loader.dataset) / self.batch_size) 168 | for iter, (pts, label) in enumerate(self.train_loader): 169 | if pts.size(0) == 1: 170 | continue 171 | if not self.no_cuda: 172 | pts = pts.cuda(self.first_gpu) 173 | label = label.cuda(self.first_gpu) 174 | 175 | # forward 176 | self.optimizer.zero_grad() 177 | output, _ = self.model(pts) 178 | 179 | # loss 180 | if len(self.gpu_ids) != 1: # multiple gpus 181 | loss = self.model.module.get_loss(output, label) 182 | else: 183 | loss = self.model.get_loss(output, label) 184 | 185 | # backward 186 | loss.backward() 187 | self.optimizer.step() 188 | loss_buf.append(loss.detach().cpu().numpy()) 189 | 190 | preds = output.max(dim=1)[1] 191 | train_true.append(label.view(-1).cpu().numpy()) 192 | train_pred.append(preds.detach().cpu().numpy()) 193 | 194 | # finish one epoch 195 | if self.no_scheduler == False: 196 | self.scheduler.step() 197 | epoch_time = time.time() - epoch_start_time 198 | self.train_hist['per_epoch_time'].append(epoch_time) 199 | self.train_hist['loss'].append(np.mean(loss_buf)) 200 | train_true = np.concatenate(train_true) 201 | train_pred = np.concatenate(train_pred) 202 | print("Epoch %d: Loss %.6f, train acc %.6f, train avg acc %.6f, time %.4fs" % (epoch+1, 203 | np.mean(loss_buf), 204 | metrics.accuracy_score( 205 | train_true, train_pred), 206 | metrics.balanced_accuracy_score( 207 | train_true, train_pred), 208 | epoch_time)) 209 | return np.mean(loss_buf) 210 | 211 | 212 | def _snapshot(self, epoch): 213 | state_dict = self.model.state_dict() 214 | from collections import OrderedDict 215 | new_state_dict = OrderedDict() 216 | for key, val in state_dict.items(): 217 | if key[:6] == 'module': 218 | name = key[7:] # remove 'module.' 219 | else: 220 | name = key 221 | new_state_dict[name] = val 222 | save_dir = os.path.join(self.save_dir, self.dataset_name) 223 | torch.save(new_state_dict, save_dir + "_" + str(epoch) + '.pkl') 224 | print(f"Save model to {save_dir}_{str(epoch)}.pkl") 225 | 226 | 227 | def _load_pretrain(self, pretrain): 228 | state_dict = torch.load(pretrain, map_location='cpu') 229 | from collections import OrderedDict 230 | new_state_dict = OrderedDict() 231 | for key, val in state_dict.items(): 232 | if key[:6] == 'module': 233 | name = key[7:] # remove 'module.' 234 | else: 235 | name = key 236 | new_state_dict[name] = val 237 | self.model.load_state_dict(new_state_dict) 238 | print(f"Load model from {pretrain}") 239 | 240 | 241 | def _get_lr(self, group=0): 242 | return self.optimizer.param_groups[group]['lr'] 243 | -------------------------------------------------------------------------------- /visualization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @Author: An Tao 5 | @Contact: ta19@mails.tsinghua.edu.cn 6 | @File: visualization.py 7 | @Time: 2020/1/2 10:26 AM 8 | """ 9 | 10 | import os 11 | import time 12 | import numpy as np 13 | import torch 14 | import itertools 15 | import argparse 16 | from glob import glob 17 | 18 | from model import ReconstructionNet 19 | 20 | def standardize_bbox(pcl, points_per_object): 21 | pt_indices = np.random.choice(pcl.shape[0], points_per_object, replace=False) 22 | np.random.shuffle(pt_indices) 23 | pcl = pcl[pt_indices] # n by 3 24 | mins = np.amin(pcl, axis=0) 25 | maxs = np.amax(pcl, axis=0) 26 | center = ( mins + maxs ) / 2. 27 | scale = np.amax(maxs-mins) 28 | print("Center: {}, Scale: {}".format(center, scale)) 29 | result = ((pcl - center)/scale).astype(np.float32) # [-0.5, 0.5] 30 | return result 31 | 32 | xml_head = \ 33 | """ 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | """ 65 | 66 | xml_ball_segment = \ 67 | """ 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | """ 79 | 80 | xml_tail = \ 81 | """ 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | """ 101 | 102 | def colormap(x,y,z): 103 | vec = np.array([x,y,z]) 104 | vec = np.clip(vec, 0.001,1.0) 105 | norm = np.sqrt(np.sum(vec**2)) 106 | vec /= norm 107 | return [vec[0], vec[1], vec[2]] 108 | 109 | def mitsuba(pcl, path, clr=None): 110 | xml_segments = [xml_head] 111 | 112 | # pcl = standardize_bbox(pcl, 2048) 113 | # pcl = pcl - np.expand_dims(np.mean(pcl, axis=0), 0) # center 114 | # dist = np.max(np.sqrt(np.sum(pcl ** 2, axis=1)), 0) 115 | # pcl = pcl / dist # scale 116 | 117 | pcl = pcl[:,[2,0,1]] 118 | pcl[:,0] *= -1 119 | h = np.min(pcl[:,2]) 120 | 121 | if clr == "plane": 122 | clrgrid = [[0, 1, 45], [1, 0, 45]] 123 | b = np.linspace(*clrgrid[0]) 124 | c = np.linspace(*clrgrid[1]) 125 | color_all = np.array(list(itertools.product(b, c))) 126 | color_all = np.concatenate((np.linspace(1, 0, 2025)[..., np.newaxis], color_all), axis=1) 127 | elif clr == "sphere": 128 | color_all = np.load("sphere.npy") 129 | color_all = (color_all + 0.3) / 0.6 130 | elif clr == "gaussian": 131 | color_all = np.load("gaussian.npy") 132 | color_all = (color_all + 0.3) / 0.6 133 | 134 | for i in range(pcl.shape[0]): 135 | if clr == None: 136 | color = colormap(pcl[i,0]+0.5,pcl[i,1]+0.5,pcl[i,2]+0.5) 137 | elif clr in ["plane", "sphere", "gaussian"]: 138 | color = color_all[i] 139 | else: 140 | color = clr 141 | if h < -0.25: 142 | xml_segments.append(xml_ball_segment.format(pcl[i,0],pcl[i,1],pcl[i,2]-h-0.6875, *color)) 143 | else: 144 | xml_segments.append(xml_ball_segment.format(pcl[i,0],pcl[i,1],pcl[i,2], *color)) 145 | xml_segments.append(xml_tail) 146 | 147 | xml_content = str.join('', xml_segments) 148 | 149 | with open(path, 'w') as f: 150 | f.write(xml_content) 151 | 152 | def load_pretrain(model, pretrain): 153 | state_dict = torch.load(pretrain, map_location='cpu') 154 | from collections import OrderedDict 155 | new_state_dict = OrderedDict() 156 | for key, val in state_dict.items(): 157 | if key[:6] == 'module': 158 | name = key[7:] # remove 'module.' 159 | else: 160 | name = key 161 | new_state_dict[name] = val 162 | model.load_state_dict(new_state_dict) 163 | print(f"Load model from {pretrain}") 164 | return model 165 | 166 | 167 | def visualize(args): 168 | # create exp directory 169 | file = [f for f in args.model_path.split('/')] 170 | if args.exp_name != None: 171 | experiment_id = args.exp_name 172 | elif file[-1] == '': 173 | experiment_id = time.strftime('%m%d%H%M%S') 174 | one_model = True 175 | elif file[-1][-4:] == '.pkl': 176 | experiment_id = file[-3] 177 | one_model = True 178 | elif file[-1] == 'models': 179 | experiment_id = file[-2] 180 | one_model = False 181 | else: 182 | experiment_id = time.strftime('%m%d%H%M%S') 183 | save_root = os.path.join('mitsuba', experiment_id, args.dataset, args.split + str(args.item)) 184 | os.makedirs(save_root, exist_ok=True) 185 | 186 | # initialize dataset 187 | from dataset import Dataset 188 | dataset = Dataset(root=args.dataset_root, dataset_name=args.dataset, 189 | num_points=args.num_points, split=args.split, load_name=True) 190 | 191 | # load data from dataset 192 | pts, lb, n = dataset[args.item] 193 | print(f"Dataset: {args.dataset}, split: {args.split}, item: {args.item}, category: {n}") 194 | 195 | # generate XML file for original point cloud 196 | if args.draw_original: 197 | save_path = os.path.join(save_root, args.dataset + '_' + args.split + str(args.item) + '_' + str(n) + '_origin.xml') 198 | color = [0.4, 0.4, 0.6] 199 | mitsuba(pts.numpy(), save_path, color) 200 | 201 | # generate XML file for decoder souce point 202 | if args.draw_source_points: 203 | if args.shape == 'plane': 204 | meshgrid = [[-0.3, 0.3, 45], [-0.3, 0.3, 45]] 205 | x = np.linspace(*meshgrid[0]) 206 | y = np.linspace(*meshgrid[1]) 207 | points = np.array(list(itertools.product(x, y))) 208 | points = np.concatenate((points,np.zeros(2025)[..., np.newaxis]), axis=1) 209 | elif args.shape == 'sphere': 210 | points = np.load("sphere.npy") 211 | elif args.shape == 'gaussian': 212 | points = np.load("gaussian.npy") 213 | save_path = os.path.join(save_root, args.dataset + '_' + args.split + str(args.item) + '_' + str(n) + '_epoch0.xml') 214 | mitsuba(points, save_path, clr=args.shape) 215 | 216 | # initialize model 217 | model = ReconstructionNet(args) 218 | 219 | if one_model: 220 | if file[0] != '': 221 | model = load_pretrain(model, args.model_path) 222 | model.eval() 223 | reconstructed_pl, _ = model(pts.view(1, 2048, 3)) 224 | save_path = os.path.join(save_root, file[-1][:-4] + args.split + str(args.item) + '_' + str(n) + '.xml') 225 | mitsuba(reconstructed_pl[0].detach().numpy(), save_path, clr=args.shape) 226 | else: 227 | load_path = glob(os.path.join(args.model_path, '*.pkl')) 228 | load_path.sort() 229 | for path in load_path: 230 | model_name = [p for p in path.split('/')][-1] 231 | model = load_pretrain(model, path) 232 | model.eval() 233 | reconstructed_pl, _ = model(pts.view(1, 2048, 3)) 234 | save_path = os.path.join(save_root, model_name[:-4] + '_' + args.dataset + '_' + args.split + str(args.item) + '_' + str(n) + '.xml') 235 | mitsuba(reconstructed_pl[0].detach().numpy(), save_path, clr=args.shape) 236 | 237 | 238 | if __name__ == '__main__': 239 | parser = argparse.ArgumentParser(description='Unsupervised Point Cloud Feature Learning') 240 | parser.add_argument('--exp_name', type=str, default=None, metavar='N', 241 | help='Name of the experiment') 242 | parser.add_argument('--item', type=int, default=0, metavar='N', 243 | help='Item of point cloud to load') 244 | parser.add_argument('--split', type=str, default='train', metavar='N', 245 | choices=['train','test', 'val', 'trainval', 'all'], 246 | help='Split to use, [foldingnet, dgcnn_cls, dgcnn_seg]') 247 | parser.add_argument('--encoder', type=str, default='foldingnet', metavar='N', 248 | choices=['foldnet', 'dgcnn_cls', 'dgcnn_seg'], 249 | help='Encoder to use, [foldingnet, dgcnn_cls, dgcnn_seg]') 250 | parser.add_argument('--feat_dims', type=int, default=512, metavar='N', 251 | help='Number of dims for feature ') 252 | parser.add_argument('--k', type=int, default=None, metavar='N', 253 | help='Num of nearest neighbors to use for KNN') 254 | parser.add_argument('--shape', type=str, default='plane', metavar='N', 255 | choices=['plane', 'sphere', 'gaussian'], 256 | help='Shape of points to input decoder, [plane, sphere, gaussian]') 257 | parser.add_argument('--dataset', type=str, default='shapenetcorev2', metavar='N', 258 | choices=['shapenetcorev2','modelnet40', 'modelnet10', 'shapenetpartpart'], 259 | help='Dataset to use, [shapenetcorev2, modelnet40, modelnet10, shapenetpartpart]') 260 | parser.add_argument('--dataset_root', type=str, default='../dataset', help="Dataset root path") 261 | parser.add_argument('--num_points', type=int, default=2048, 262 | help='Num of points to use') 263 | parser.add_argument('--model_path', type=str, default='', metavar='N', 264 | help='Path to load model') 265 | parser.add_argument('--draw_original', action='store_true', 266 | help='Draw original point cloud') 267 | parser.add_argument('--draw_source_points', action='store_true', 268 | help='Draw source points for decoder') 269 | args = parser.parse_args() 270 | 271 | print(str(args)) 272 | 273 | visualize(args) -------------------------------------------------------------------------------- /segmentation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @Author: An Tao 5 | @Contact: ta19@mails.tsinghua.edu.cn 6 | @File: reconstruction.py 7 | @Time: 2020/1/2 10:26 AM 8 | """ 9 | 10 | import os 11 | import sys 12 | import time 13 | import shutil 14 | import numpy as np 15 | import torch 16 | import torch.optim as optim 17 | from torch.optim.lr_scheduler import CosineAnnealingLR 18 | import sklearn.metrics as metrics 19 | 20 | from tensorboardX import SummaryWriter 21 | 22 | from model import SegmentationNet 23 | from dataset import Dataset 24 | from utils import Logger 25 | 26 | torch.multiprocessing.set_sharing_strategy('file_system') 27 | 28 | seg_num = [4, 2, 2, 4, 4, 3, 3, 2, 4, 2, 6, 2, 3, 3, 3, 3] 29 | index_start = [0, 4, 6, 8, 12, 16, 19, 22, 24, 28, 30, 36, 38, 41, 44, 47] 30 | 31 | 32 | def calculate_shape_IoU(pred_np, seg_np, label, class_choice): 33 | label = label.squeeze() 34 | shape_ious = [] 35 | for shape_idx in range(seg_np.shape[0]): 36 | if not class_choice: 37 | start_index = index_start[label[shape_idx]] 38 | num = seg_num[label[shape_idx]] 39 | parts = range(start_index, start_index + num) 40 | else: 41 | parts = range(seg_num[label[0]]) 42 | part_ious = [] 43 | for part in parts: 44 | I = np.sum(np.logical_and(pred_np[shape_idx] == part, seg_np[shape_idx] == part)) 45 | U = np.sum(np.logical_or(pred_np[shape_idx] == part, seg_np[shape_idx] == part)) 46 | if U == 0: 47 | iou = 1 # If the union of groundtruth and prediction points is empty, then count part IoU as 1 48 | else: 49 | iou = I / float(U) 50 | part_ious.append(iou) 51 | shape_ious.append(np.mean(part_ious)) 52 | return shape_ious 53 | 54 | 55 | class Segmentation(object): 56 | def __init__(self, args): 57 | self.dataset_name = args.dataset 58 | if args.epochs != None: 59 | self.epochs = args.epochs 60 | else: 61 | self.epochs = 250 62 | self.batch_size = args.batch_size 63 | self.snapshot_interval = args.snapshot_interval 64 | self.no_cuda = args.no_cuda 65 | self.model_path = args.model_path 66 | self.class_choice = args.class_choice 67 | self.no_scheduler = args.no_scheduler 68 | self.loss = args.loss 69 | 70 | # create exp directory 71 | file = [f for f in args.model_path.split('/')] 72 | if args.exp_name != None: 73 | self.experiment_id = "Segment_" + args.exp_name 74 | elif file[-2] == 'models': 75 | self.experiment_id = file[-3] 76 | else: 77 | self.experiment_id = "Segment" + time.strftime('%m%d%H%M%S') 78 | snapshot_root = 'snapshot/%s' % self.experiment_id 79 | tensorboard_root = 'tensorboard/%s' % self.experiment_id 80 | self.save_dir = os.path.join(snapshot_root, 'models/') 81 | self.tboard_dir = tensorboard_root 82 | 83 | # check arguments 84 | if self.model_path == '': 85 | if not os.path.exists(self.save_dir): 86 | os.makedirs(self.save_dir) 87 | else: 88 | choose = input("Remove " + self.save_dir + " ? (y/n)") 89 | if choose == "y": 90 | shutil.rmtree(self.save_dir) 91 | os.makedirs(self.save_dir) 92 | else: 93 | sys.exit(0) 94 | if not os.path.exists(self.tboard_dir): 95 | os.makedirs(self.tboard_dir) 96 | else: 97 | shutil.rmtree(self.tboard_dir) 98 | os.makedirs(self.tboard_dir) 99 | sys.stdout = Logger(os.path.join(snapshot_root, 'log.txt')) 100 | self.writer = SummaryWriter(log_dir=self.tboard_dir) 101 | 102 | # print args 103 | print(str(args)) 104 | 105 | # get gpu id 106 | gids = ''.join(args.gpu.split()) 107 | self.gpu_ids = [int(gid) for gid in gids.split(',')] 108 | self.first_gpu = self.gpu_ids[1] 109 | self.loss_gpu = self.gpu_ids[0] 110 | self.gpu_ids = self.gpu_ids[1:] 111 | # self.device = torch.device("cpu" if args.no_cuda else "cuda") 112 | 113 | # generate dataset 114 | self.train_dataset = Dataset( 115 | root=args.dataset_root, 116 | dataset_name=args.dataset, 117 | split='all', 118 | num_points=args.num_points, 119 | segmentation=True, 120 | random_translate=True, 121 | random_rotate=args.use_rotate, 122 | random_jitter=args.use_jitter 123 | ) 124 | self.train_loader = torch.utils.data.DataLoader( 125 | self.train_dataset, 126 | batch_size=args.batch_size, 127 | shuffle=True, 128 | num_workers=args.workers 129 | ) 130 | print("Training set size:", self.train_loader.dataset.__len__()) 131 | 132 | # initialize model 133 | self.seg_num_all = self.train_dataset.seg_num_all 134 | self.seg_start_index = self.train_dataset.seg_start_index 135 | self.model = SegmentationNet(args, self.seg_num_all) 136 | if self.model_path != '': 137 | self._load_pretrain(args.model_path) 138 | 139 | # load model to gpu 140 | if not self.no_cuda: 141 | if len(self.gpu_ids) != 1: # multiple gpus 142 | self.model = torch.nn.DataParallel(self.model.cuda(self.first_gpu), self.gpu_ids) 143 | else: 144 | self.model = self.model.cuda(self.gpu_ids[0]) 145 | # self.model = self.model.to(self.device) 146 | 147 | 148 | # initialize optimizer 149 | self.parameter = self.model.parameters() 150 | if self.no_scheduler == False: 151 | self.optimizer = optim.SGD(self.parameter, lr=0.1, weight_decay=1e-4) 152 | self.scheduler = CosineAnnealingLR(self.optimizer, self.epochs, eta_min=1e-3) 153 | else: 154 | self.optimizer = optim.SGD(self.parameter, lr=0.01, weight_decay=1e-4) 155 | 156 | 157 | def run(self): 158 | self.train_hist = { 159 | 'loss': [], 160 | 'per_epoch_time': [], 161 | 'total_time': [] 162 | } 163 | best_loss = 1000000000 164 | print('Training start!!') 165 | start_time = time.time() 166 | self.model.train() 167 | if self.model_path != '': 168 | start_epoch = self.model_path[-7:-4] 169 | if start_epoch[0] == '_': 170 | start_epoch = start_epoch[1:] 171 | start_epoch = int(start_epoch) 172 | else: 173 | start_epoch = 0 174 | for epoch in range(start_epoch, self.epochs): 175 | loss = self.train_epoch(epoch) 176 | 177 | # save snapeshot 178 | if (epoch + 1) % self.snapshot_interval == 0: 179 | self._snapshot(epoch + 1) 180 | if loss < best_loss: 181 | best_loss = loss 182 | self._snapshot('best') 183 | 184 | # save tensorboard 185 | if self.writer: 186 | self.writer.add_scalar('Train Loss', self.train_hist['loss'][-1], epoch) 187 | self.writer.add_scalar('Learning Rate', self._get_lr(), epoch) 188 | 189 | # finish all epoch 190 | self._snapshot(epoch + 1) 191 | if loss < best_loss: 192 | best_loss = loss 193 | self._snapshot('best') 194 | self.train_hist['total_time'].append(time.time() - start_time) 195 | print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), 196 | self.epochs, self.train_hist['total_time'][0])) 197 | print("Training finish!... save training results") 198 | 199 | 200 | def train_epoch(self, epoch): 201 | epoch_start_time = time.time() 202 | loss_buf = [] 203 | train_true_cls = [] 204 | train_pred_cls = [] 205 | train_true_seg = [] 206 | train_pred_seg = [] 207 | train_label_seg = [] 208 | dist_ap_buf = [] 209 | dist_an_buf = [] 210 | num_batch = int(len(self.train_loader.dataset) / self.batch_size) 211 | for iter, (pts, label, seg) in enumerate(self.train_loader): 212 | num_seg = seg.max(1)[0] - seg.min(1)[0] 213 | if 0 in num_seg: 214 | pts = pts[num_seg != 0] 215 | label = label[num_seg != 0] 216 | seg = seg[num_seg != 0] 217 | 218 | if pts.size(0) == 1: 219 | continue 220 | seg = seg - self.seg_start_index 221 | label_one_hot = torch.zeros((label.size(0), 16)) 222 | for idx in range(label.size(0)): 223 | label_one_hot[idx, label[idx]] = 1 224 | 225 | if not self.no_cuda: 226 | pts = pts.cuda(self.first_gpu) 227 | label_one_hot = label_one_hot.cuda(self.first_gpu) 228 | seg = seg.cuda(self.first_gpu) 229 | 230 | # forward 231 | self.optimizer.zero_grad() 232 | output, _ = self.model(pts, label_one_hot) 233 | output = output.permute(0, 2, 1).contiguous() 234 | 235 | # loss 236 | if self.loss == 'softmax': 237 | if len(self.gpu_ids) != 1: # multiple gpus 238 | loss = self.model.module.get_loss(output.view(-1, self.seg_num_all), seg.view(-1)) 239 | else: 240 | loss = self.model.get_loss(output.view(-1, self.seg_num_all), seg.view(-1)) 241 | 242 | # backward 243 | loss.backward() 244 | self.optimizer.step() 245 | loss_buf.append(loss.detach().cpu().numpy()) 246 | 247 | pred = output.max(dim=2)[1] 248 | seg_np = seg.cpu().numpy() # (batch_size, num_points) 249 | pred_np = pred.detach().cpu().numpy() # (batch_size, num_points) 250 | train_true_cls.append(seg_np.reshape(-1)) # (batch_size * num_points) 251 | train_pred_cls.append(pred_np.reshape(-1)) # (batch_size * num_points) 252 | train_true_seg.append(seg_np) 253 | train_pred_seg.append(pred_np) 254 | train_label_seg.append(label.reshape(-1)) 255 | 256 | elif self.loss == 'triplet': 257 | if len(self.gpu_ids) != 1: # multiple gpus 258 | loss, dist_ap, dist_an = self.model.module.get_loss(output, seg, new_device=self.loss_gpu) 259 | else: 260 | loss, dist_ap, dist_an = self.model.get_loss(output, seg, new_device=self.loss_gpu) 261 | 262 | # backward 263 | loss.backward() 264 | self.optimizer.step() 265 | loss_buf.append(loss.detach().cpu().numpy()) 266 | dist_ap_buf.append(np.mean(dist_ap.detach().cpu().numpy())) 267 | dist_an_buf.append(np.mean(dist_an.detach().cpu().numpy())) 268 | 269 | # print(iter, loss, time.time() - epoch_start_time, np.mean(dist_an.detach().cpu().numpy()), np.mean(dist_ap.detach().cpu().numpy())) 270 | 271 | # finish one epoch 272 | if self.no_scheduler == False: 273 | self.scheduler.step() 274 | epoch_time = time.time() - epoch_start_time 275 | self.train_hist['per_epoch_time'].append(epoch_time) 276 | self.train_hist['loss'].append(np.mean(loss_buf)) 277 | if self.loss == 'softmax': 278 | train_true_cls = np.concatenate(train_true_cls) 279 | train_pred_cls = np.concatenate(train_pred_cls) 280 | train_acc = metrics.accuracy_score(train_true_cls, train_pred_cls) 281 | avg_per_class_acc = metrics.balanced_accuracy_score(train_true_cls, train_pred_cls) 282 | train_true_seg = np.concatenate(train_true_seg, axis=0) 283 | train_pred_seg = np.concatenate(train_pred_seg, axis=0) 284 | train_label_seg = np.concatenate(train_label_seg) 285 | train_ious = calculate_shape_IoU(train_pred_seg, train_true_seg, train_label_seg, self.class_choice) 286 | print("Epoch %d: Loss %.6f, train acc %.6f, train avg acc %.6f, train iou: %.6f, time %.4fs" % (epoch+1, 287 | np.mean(loss_buf), 288 | train_acc, 289 | avg_per_class_acc, 290 | np.mean(train_ious), 291 | epoch_time)) 292 | elif self.loss == 'triplet': 293 | print("Epoch %d: Loss %.6f, dist an %.6f, dist ap %.6f, time %.4fs" % (epoch+1, 294 | np.mean(loss_buf), 295 | np.mean(dist_an_buf), 296 | np.mean(dist_ap_buf), 297 | epoch_time)) 298 | return np.mean(loss_buf) 299 | 300 | 301 | def _snapshot(self, epoch): 302 | state_dict = self.model.state_dict() 303 | from collections import OrderedDict 304 | new_state_dict = OrderedDict() 305 | for key, val in state_dict.items(): 306 | if key[:6] == 'module': 307 | name = key[7:] # remove 'module.' 308 | else: 309 | name = key 310 | new_state_dict[name] = val 311 | save_dir = os.path.join(self.save_dir, self.dataset_name) 312 | torch.save(new_state_dict, save_dir + "_" + str(epoch) + '.pkl') 313 | print(f"Save model to {save_dir}_{str(epoch)}.pkl") 314 | 315 | 316 | def _load_pretrain(self, pretrain): 317 | state_dict = torch.load(pretrain, map_location='cpu') 318 | from collections import OrderedDict 319 | new_state_dict = OrderedDict() 320 | for key, val in state_dict.items(): 321 | if key[:6] == 'module': 322 | name = key[7:] # remove 'module.' 323 | else: 324 | name = key 325 | new_state_dict[name] = val 326 | self.model.load_state_dict(new_state_dict) 327 | print(f"Load model from {pretrain}") 328 | 329 | 330 | def _get_lr(self, group=0): 331 | return self.optimizer.param_groups[group]['lr'] 332 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | @Author: An Tao 5 | @Contact: ta19@mails.tsinghua.edu.cn 6 | @File: model.py 7 | @Time: 2020/1/2 10:26 AM 8 | """ 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.init as init 13 | import torch.nn.functional as F 14 | import numpy as np 15 | import itertools 16 | from loss import ChamferLoss, CrossEntropyLoss, TripletLoss 17 | 18 | 19 | def knn(x, k): 20 | batch_size = x.size(0) 21 | num_points = x.size(2) 22 | 23 | inner = -2*torch.matmul(x.transpose(2, 1), x) 24 | xx = torch.sum(x**2, dim=1, keepdim=True) 25 | pairwise_distance = -xx - inner - xx.transpose(2, 1) 26 | 27 | idx = pairwise_distance.topk(k=k, dim=-1)[1] # (batch_size, num_points, k) 28 | 29 | if idx.get_device() == -1: 30 | idx_base = torch.arange(0, batch_size).view(-1, 1, 1)*num_points 31 | else: 32 | idx_base = torch.arange(0, batch_size, device=idx.get_device()).view(-1, 1, 1)*num_points 33 | idx = idx + idx_base 34 | idx = idx.view(-1) 35 | 36 | return idx 37 | 38 | 39 | def local_cov(pts, idx): 40 | batch_size = pts.size(0) 41 | num_points = pts.size(2) 42 | pts = pts.view(batch_size, -1, num_points) # (batch_size, 3, num_points) 43 | 44 | _, num_dims, _ = pts.size() 45 | 46 | x = pts.transpose(2, 1).contiguous() # (batch_size, num_points, 3) 47 | x = x.view(batch_size*num_points, -1)[idx, :] # (batch_size*num_points*2, 3) 48 | x = x.view(batch_size, num_points, -1, num_dims) # (batch_size, num_points, k, 3) 49 | 50 | x = torch.matmul(x[:,:,0].unsqueeze(3), x[:,:,1].unsqueeze(2)) # (batch_size, num_points, 3, 1) * (batch_size, num_points, 1, 3) -> (batch_size, num_points, 3, 3) 51 | x = x.view(batch_size, num_points, 9).transpose(2, 1) # (batch_size, 9, num_points) 52 | x = torch.cat((pts, x), dim=1) # (batch_size, 12, num_points) 53 | 54 | return x 55 | 56 | 57 | def local_maxpool(x, idx): 58 | batch_size = x.size(0) 59 | num_points = x.size(2) 60 | x = x.view(batch_size, -1, num_points) 61 | 62 | _, num_dims, _ = x.size() 63 | 64 | x = x.transpose(2, 1).contiguous() # (batch_size, num_points, num_dims) 65 | x = x.view(batch_size*num_points, -1)[idx, :] # (batch_size*n, num_dims) -> (batch_size*n*k, num_dims) 66 | x = x.view(batch_size, num_points, -1, num_dims) # (batch_size, num_points, k, num_dims) 67 | x, _ = torch.max(x, dim=2) # (batch_size, num_points, num_dims) 68 | 69 | return x 70 | 71 | 72 | def get_graph_feature(x, k=20, idx=None): 73 | batch_size = x.size(0) 74 | num_points = x.size(2) 75 | x = x.view(batch_size, -1, num_points) # (batch_size, num_dims, num_points) 76 | if idx is None: 77 | idx = knn(x, k=k) # (batch_size, num_points, k) 78 | 79 | _, num_dims, _ = x.size() 80 | 81 | x = x.transpose(2, 1).contiguous() # (batch_size, num_points, num_dims) 82 | feature = x.view(batch_size*num_points, -1)[idx, :] # (batch_size*n, num_dims) -> (batch_size*n*k, num_dims) 83 | feature = feature.view(batch_size, num_points, k, num_dims) # (batch_size, num_points, k, num_dims) 84 | x = x.view(batch_size, num_points, 1, num_dims).repeat(1, 1, k, 1) # (batch_size, num_points, k, num_dims) 85 | 86 | feature = torch.cat((feature-x, x), dim=3).permute(0, 3, 1, 2) # (batch_size, num_points, k, 2*num_dims) -> (batch_size, 2*num_dims, num_points, k) 87 | 88 | return feature # (batch_size, 2*num_dims, num_points, k) 89 | 90 | 91 | class DGCNN_Cls_Encoder(nn.Module): 92 | def __init__(self, args): 93 | super(DGCNN_Cls_Encoder, self).__init__() 94 | if args.k == None: 95 | self.k = 40 96 | else: 97 | self.k = args.k 98 | self.task = args.task 99 | 100 | self.bn1 = nn.BatchNorm2d(64) 101 | self.bn2 = nn.BatchNorm2d(64) 102 | self.bn3 = nn.BatchNorm2d(128) 103 | self.bn4 = nn.BatchNorm2d(256) 104 | self.bn5 = nn.BatchNorm1d(args.feat_dims) 105 | 106 | self.conv1 = nn.Sequential(nn.Conv2d(3*2, 64, kernel_size=1, bias=False), 107 | self.bn1, 108 | nn.LeakyReLU(negative_slope=0.2)) 109 | self.conv2 = nn.Sequential(nn.Conv2d(64*2, 64, kernel_size=1, bias=False), 110 | self.bn2, 111 | nn.LeakyReLU(negative_slope=0.2)) 112 | self.conv3 = nn.Sequential(nn.Conv2d(64*2, 128, kernel_size=1, bias=False), 113 | self.bn3, 114 | nn.LeakyReLU(negative_slope=0.2)) 115 | self.conv4 = nn.Sequential(nn.Conv2d(128*2, 256, kernel_size=1, bias=False), 116 | self.bn4, 117 | nn.LeakyReLU(negative_slope=0.2)) 118 | self.conv5 = nn.Sequential(nn.Conv1d(512, args.feat_dims, kernel_size=1, bias=False), 119 | self.bn5, 120 | nn.LeakyReLU(negative_slope=0.2)) 121 | 122 | def forward(self, x): 123 | x = x.transpose(2, 1) 124 | 125 | batch_size = x.size(0) 126 | x = get_graph_feature(x, k=self.k) # (batch_size, 3, num_points) -> (batch_size, 3*2, num_points, k) 127 | x = self.conv1(x) # (batch_size, 3*2, num_points, k) -> (batch_size, 64, num_points, k) 128 | x1 = x.max(dim=-1, keepdim=False)[0] # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points) 129 | 130 | x = get_graph_feature(x1, k=self.k) # (batch_size, 64, num_points) -> (batch_size, 64*2, num_points, k) 131 | x = self.conv2(x) # (batch_size, 64*2, num_points, k) -> (batch_size, 64, num_points, k) 132 | x2 = x.max(dim=-1, keepdim=False)[0] # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points) 133 | 134 | x = get_graph_feature(x2, k=self.k) # (batch_size, 64, num_points) -> (batch_size, 64*2, num_points, k) 135 | x = self.conv3(x) # (batch_size, 64*2, num_points, k) -> (batch_size, 128, num_points, k) 136 | x3 = x.max(dim=-1, keepdim=False)[0] # (batch_size, 128, num_points, k) -> (batch_size, 128, num_points) 137 | 138 | x = get_graph_feature(x3, k=self.k) # (batch_size, 128, num_points) -> (batch_size, 128*2, num_points, k) 139 | x = self.conv4(x) # (batch_size, 128*2, num_points, k) -> (batch_size, 256, num_points, k) 140 | x4 = x.max(dim=-1, keepdim=False)[0] # (batch_size, 256, num_points, k) -> (batch_size, 256, num_points) 141 | 142 | x = torch.cat((x1, x2, x3, x4), dim=1) # (batch_size, 512, num_points) 143 | 144 | x0 = self.conv5(x) # (batch_size, 512, num_points) -> (batch_size, feat_dims, num_points) 145 | x = x0.max(dim=-1, keepdim=False)[0] # (batch_size, feat_dims, num_points) -> (batch_size, feat_dims) 146 | feat = x.unsqueeze(1) # (batch_size, feat_dims) -> (batch_size, 1, feat_dims) 147 | 148 | if self.task == 'reconstruct': 149 | return feat # (batch_size, 1, feat_dims) 150 | else: 151 | return feat, x0 152 | 153 | 154 | class Point_Transform_Net(nn.Module): 155 | def __init__(self): 156 | super(Point_Transform_Net, self).__init__() 157 | self.k = 3 158 | 159 | self.bn1 = nn.BatchNorm2d(64) 160 | self.bn2 = nn.BatchNorm2d(128) 161 | self.bn3 = nn.BatchNorm1d(1024) 162 | 163 | self.conv1 = nn.Sequential(nn.Conv2d(6, 64, kernel_size=1, bias=False), 164 | self.bn1, 165 | nn.LeakyReLU(negative_slope=0.2)) 166 | self.conv2 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=1, bias=False), 167 | self.bn2, 168 | nn.LeakyReLU(negative_slope=0.2)) 169 | self.conv3 = nn.Sequential(nn.Conv1d(128, 1024, kernel_size=1, bias=False), 170 | self.bn3, 171 | nn.LeakyReLU(negative_slope=0.2)) 172 | 173 | self.linear1 = nn.Linear(1024, 512, bias=False) 174 | self.bn3 = nn.BatchNorm1d(512) 175 | self.linear2 = nn.Linear(512, 256, bias=False) 176 | self.bn4 = nn.BatchNorm1d(256) 177 | 178 | self.transform = nn.Linear(256, 3*3) 179 | init.constant_(self.transform.weight, 0) 180 | init.eye_(self.transform.bias.view(3, 3)) 181 | 182 | def forward(self, x): 183 | batch_size = x.size(0) 184 | 185 | x = self.conv1(x) # (batch_size, 3*2, num_points, k) -> (batch_size, 64, num_points, k) 186 | x = self.conv2(x) # (batch_size, 64, num_points, k) -> (batch_size, 128, num_points, k) 187 | x = x.max(dim=-1, keepdim=False)[0] # (batch_size, 128, num_points, k) -> (batch_size, 128, num_points) 188 | 189 | x = self.conv3(x) # (batch_size, 128, num_points) -> (batch_size, 1024, num_points) 190 | x = x.max(dim=-1, keepdim=False)[0] # (batch_size, 1024, num_points) -> (batch_size, 1024) 191 | 192 | x = F.leaky_relu(self.bn3(self.linear1(x)), negative_slope=0.2) # (batch_size, 1024) -> (batch_size, 512) 193 | x = F.leaky_relu(self.bn4(self.linear2(x)), negative_slope=0.2) # (batch_size, 512) -> (batch_size, 256) 194 | 195 | x = self.transform(x) # (batch_size, 256) -> (batch_size, 3*3) 196 | x = x.view(batch_size, 3, 3) # (batch_size, 3*3) -> (batch_size, 3, 3) 197 | 198 | return x # (batch_size, 3, 3) 199 | 200 | 201 | class DGCNN_Seg_Encoder(nn.Module): 202 | def __init__(self, args): 203 | super(DGCNN_Seg_Encoder, self).__init__() 204 | if args.k == None: 205 | self.k = 40 206 | else: 207 | self.k = args.k 208 | self.task = args.task 209 | self.transform_net = Point_Transform_Net() 210 | 211 | self.bn1 = nn.BatchNorm2d(64) 212 | self.bn2 = nn.BatchNorm2d(64) 213 | self.bn3 = nn.BatchNorm2d(64) 214 | self.bn4 = nn.BatchNorm2d(64) 215 | self.bn5 = nn.BatchNorm2d(64) 216 | self.bn6 = nn.BatchNorm1d(args.feat_dims) 217 | 218 | self.conv1 = nn.Sequential(nn.Conv2d(6, 64, kernel_size=1, bias=False), 219 | self.bn1, 220 | nn.LeakyReLU(negative_slope=0.2)) 221 | self.conv2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1, bias=False), 222 | self.bn2, 223 | nn.LeakyReLU(negative_slope=0.2)) 224 | self.conv3 = nn.Sequential(nn.Conv2d(64*2, 64, kernel_size=1, bias=False), 225 | self.bn3, 226 | nn.LeakyReLU(negative_slope=0.2)) 227 | self.conv4 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1, bias=False), 228 | self.bn4, 229 | nn.LeakyReLU(negative_slope=0.2)) 230 | self.conv5 = nn.Sequential(nn.Conv2d(64*2, 64, kernel_size=1, bias=False), 231 | self.bn5, 232 | nn.LeakyReLU(negative_slope=0.2)) 233 | self.conv6 = nn.Sequential(nn.Conv1d(192, args.feat_dims, kernel_size=1, bias=False), 234 | self.bn6, 235 | nn.LeakyReLU(negative_slope=0.2)) 236 | 237 | 238 | def forward(self, x): 239 | x = x.transpose(2, 1) 240 | 241 | batch_size = x.size(0) 242 | num_points = x.size(2) 243 | 244 | x0 = get_graph_feature(x, k=self.k) # (batch_size, 3, num_points) -> (batch_size, 3*2, num_points, k) 245 | t = self.transform_net(x0) # (batch_size, 3, 3) 246 | x = x.transpose(2, 1) # (batch_size, 3, num_points) -> (batch_size, num_points, 3) 247 | x = torch.bmm(x, t) # (batch_size, num_points, 3) * (batch_size, 3, 3) -> (batch_size, num_points, 3) 248 | x = x.transpose(2, 1) # (batch_size, num_points, 3) -> (batch_size, 3, num_points) 249 | 250 | x = get_graph_feature(x, k=self.k) # (batch_size, 3, num_points) -> (batch_size, 3*2, num_points, k) 251 | x = self.conv1(x) # (batch_size, 3*2, num_points, k) -> (batch_size, 64, num_points, k) 252 | x = self.conv2(x) # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points, k) 253 | x1 = x.max(dim=-1, keepdim=False)[0] # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points) 254 | 255 | x = get_graph_feature(x1, k=self.k) # (batch_size, 64, num_points) -> (batch_size, 64*2, num_points, k) 256 | x = self.conv3(x) # (batch_size, 64*2, num_points, k) -> (batch_size, 64, num_points, k) 257 | x = self.conv4(x) # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points, k) 258 | x2 = x.max(dim=-1, keepdim=False)[0] # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points) 259 | 260 | x = get_graph_feature(x2, k=self.k) # (batch_size, 64, num_points) -> (batch_size, 64*2, num_points, k) 261 | x = self.conv5(x) # (batch_size, 64*2, num_points, k) -> (batch_size, 64, num_points, k) 262 | x3 = x.max(dim=-1, keepdim=False)[0] # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points) 263 | 264 | x = torch.cat((x1, x2, x3), dim=1) # (batch_size, 64*3, num_points) 265 | 266 | x0 = self.conv6(x) # (batch_size, 64*3, num_points) -> (batch_size, emb_dims, num_points) 267 | 268 | x = x0.max(dim=-1, keepdim=False)[0] # (batch_size, emb_dims, num_points) -> (batch_size, emb_dims) 269 | feat = x.unsqueeze(1) # (batch_size, num_points) -> (batch_size, 1, emb_dims) 270 | 271 | if self.task == 'reconstruct': 272 | return feat # (batch_size, 1, emb_dims) 273 | elif self.task == 'classify': 274 | return feat, x0 275 | elif self.task == 'segment': 276 | return feat, x0, x1, x2, x3 277 | 278 | 279 | class FoldNet_Encoder(nn.Module): 280 | def __init__(self, args): 281 | super(FoldNet_Encoder, self).__init__() 282 | if args.k == None: 283 | self.k = 16 284 | else: 285 | self.k = args.k 286 | self.n = 2048 # input point cloud size 287 | self.mlp1 = nn.Sequential( 288 | nn.Conv1d(12, 64, 1), 289 | nn.ReLU(), 290 | nn.Conv1d(64, 64, 1), 291 | nn.ReLU(), 292 | nn.Conv1d(64, 64, 1), 293 | nn.ReLU(), 294 | ) 295 | self.linear1 = nn.Linear(64, 64) 296 | self.conv1 = nn.Conv1d(64, 128, 1) 297 | self.linear2 = nn.Linear(128, 128) 298 | self.conv2 = nn.Conv1d(128, 1024, 1) 299 | self.mlp2 = nn.Sequential( 300 | nn.Conv1d(1024, args.feat_dims, 1), 301 | nn.ReLU(), 302 | nn.Conv1d(args.feat_dims, args.feat_dims, 1), 303 | ) 304 | 305 | def graph_layer(self, x, idx): 306 | x = local_maxpool(x, idx) 307 | x = self.linear1(x) 308 | x = x.transpose(2, 1) 309 | x = F.relu(self.conv1(x)) 310 | x = local_maxpool(x, idx) 311 | x = self.linear2(x) 312 | x = x.transpose(2, 1) 313 | x = self.conv2(x) 314 | return x 315 | 316 | def forward(self, pts): 317 | pts = pts.transpose(2, 1) # (batch_size, 3, num_points) 318 | idx = knn(pts, k=self.k) 319 | x = local_cov(pts, idx) # (batch_size, 3, num_points) -> (batch_size, 12, num_points]) 320 | x = self.mlp1(x) # (batch_size, 12, num_points) -> (batch_size, 64, num_points]) 321 | x = self.graph_layer(x, idx) # (batch_size, 64, num_points) -> (batch_size, 1024, num_points) 322 | x = torch.max(x, 2, keepdim=True)[0] # (batch_size, 1024, num_points) -> (batch_size, 1024, 1) 323 | x = self.mlp2(x) # (batch_size, 1024, 1) -> (batch_size, feat_dims, 1) 324 | feat = x.transpose(2,1) # (batch_size, feat_dims, 1) -> (batch_size, 1, feat_dims) 325 | return feat # (batch_size, 1, feat_dims) 326 | 327 | 328 | class FoldNet_Decoder(nn.Module): 329 | def __init__(self, args): 330 | super(FoldNet_Decoder, self).__init__() 331 | self.m = 2025 # 45 * 45. 332 | self.shape = args.shape 333 | self.meshgrid = [[-0.3, 0.3, 45], [-0.3, 0.3, 45]] 334 | self.sphere = np.load("sphere.npy") 335 | self.gaussian = np.load("gaussian.npy") 336 | if self.shape == 'plane': 337 | self.folding1 = nn.Sequential( 338 | nn.Conv1d(args.feat_dims+2, args.feat_dims, 1), 339 | nn.ReLU(), 340 | nn.Conv1d(args.feat_dims, args.feat_dims, 1), 341 | nn.ReLU(), 342 | nn.Conv1d(args.feat_dims, 3, 1), 343 | ) 344 | else: 345 | self.folding1 = nn.Sequential( 346 | nn.Conv1d(args.feat_dims+3, args.feat_dims, 1), 347 | nn.ReLU(), 348 | nn.Conv1d(args.feat_dims, args.feat_dims, 1), 349 | nn.ReLU(), 350 | nn.Conv1d(args.feat_dims, 3, 1), 351 | ) 352 | self.folding2 = nn.Sequential( 353 | nn.Conv1d(args.feat_dims+3, args.feat_dims, 1), 354 | nn.ReLU(), 355 | nn.Conv1d(args.feat_dims, args.feat_dims, 1), 356 | nn.ReLU(), 357 | nn.Conv1d(args.feat_dims, 3, 1), 358 | ) 359 | 360 | def build_grid(self, batch_size): 361 | if self.shape == 'plane': 362 | x = np.linspace(*self.meshgrid[0]) 363 | y = np.linspace(*self.meshgrid[1]) 364 | points = np.array(list(itertools.product(x, y))) 365 | elif self.shape == 'sphere': 366 | points = self.sphere 367 | elif self.shape == 'gaussian': 368 | points = self.gaussian 369 | points = np.repeat(points[np.newaxis, ...], repeats=batch_size, axis=0) 370 | points = torch.tensor(points) 371 | return points.float() 372 | 373 | def forward(self, x): 374 | x = x.transpose(1, 2).repeat(1, 1, self.m) # (batch_size, feat_dims, num_points) 375 | points = self.build_grid(x.shape[0]).transpose(1, 2) # (batch_size, 2, num_points) or (batch_size, 3, num_points) 376 | if x.get_device() != -1: 377 | points = points.cuda(x.get_device()) 378 | cat1 = torch.cat((x, points), dim=1) # (batch_size, feat_dims+2, num_points) or (batch_size, feat_dims+3, num_points) 379 | folding_result1 = self.folding1(cat1) # (batch_size, 3, num_points) 380 | cat2 = torch.cat((x, folding_result1), dim=1) # (batch_size, 515, num_points) 381 | folding_result2 = self.folding2(cat2) # (batch_size, 3, num_points) 382 | return folding_result2.transpose(1, 2) # (batch_size, num_points ,3) 383 | 384 | 385 | class DGCNN_Cls_Classifier(nn.Module): 386 | def __init__(self, args): 387 | super(DGCNN_Cls_Classifier, self).__init__() 388 | if args.dataset == 'modelnet40': 389 | output_channels = 40 390 | elif args.dataset == 'modelnet10': 391 | output_channels = 10 392 | elif args.dataset == 'shapenetcorev2': 393 | output_channels = 55 394 | elif args.dataset == 'shapenetpart': 395 | output_channels = 16 396 | elif args.dataset == 'shapenetpartpart': 397 | output_channels = 50 398 | 399 | self.linear1 = nn.Linear(args.feat_dims*2, 512, bias=False) 400 | self.bn6 = nn.BatchNorm1d(512) 401 | self.dp1 = nn.Dropout(p=args.dropout) 402 | self.linear2 = nn.Linear(512, 256) 403 | self.bn7 = nn.BatchNorm1d(256) 404 | self.dp2 = nn.Dropout(p=args.dropout) 405 | self.linear3 = nn.Linear(256, output_channels) 406 | 407 | def forward(self, x): 408 | batch_size = x.size(0) 409 | 410 | x1 = F.adaptive_max_pool1d(x, 1).view(batch_size, -1) 411 | x2 = F.adaptive_avg_pool1d(x, 1).view(batch_size, -1) 412 | x = torch.cat((x1, x2), 1) 413 | 414 | x = F.leaky_relu(self.bn6(self.linear1(x)), negative_slope=0.2) 415 | x = self.dp1(x) 416 | x = F.leaky_relu(self.bn7(self.linear2(x)), negative_slope=0.2) 417 | x = self.dp2(x) 418 | x = self.linear3(x) 419 | return x 420 | 421 | 422 | class DGCNN_Seg_Segmenter(nn.Module): 423 | def __init__(self, args, seg_num_all): 424 | super(DGCNN_Seg_Segmenter, self).__init__() 425 | self.seg_num_all = seg_num_all 426 | self.seg_no_class_label = args.seg_no_class_label 427 | self.k = args.k 428 | self.feat_dims = args.feat_dims 429 | self.loss = args.loss 430 | 431 | self.bn7 = nn.BatchNorm1d(64) 432 | self.bn8 = nn.BatchNorm1d(256) 433 | self.bn9 = nn.BatchNorm1d(256) 434 | self.bn10 = nn.BatchNorm1d(128) 435 | 436 | self.conv7 = nn.Sequential(nn.Conv1d(16, 64, kernel_size=1, bias=False), 437 | self.bn7, 438 | nn.LeakyReLU(negative_slope=0.2)) 439 | if not self.seg_no_class_label: 440 | self.conv8 = nn.Sequential(nn.Conv1d(self.feat_dims+64+64*3, 256, kernel_size=1, bias=False), 441 | self.bn8, 442 | nn.LeakyReLU(negative_slope=0.2)) 443 | else: 444 | self.conv8 = nn.Sequential(nn.Conv1d(self.feat_dims+64*3, 256, kernel_size=1, bias=False), 445 | self.bn8, 446 | nn.LeakyReLU(negative_slope=0.2)) 447 | self.dp1 = nn.Dropout(p=args.dropout) 448 | self.conv9 = nn.Sequential(nn.Conv1d(256, 256, kernel_size=1, bias=False), 449 | self.bn9, 450 | nn.LeakyReLU(negative_slope=0.2)) 451 | self.dp2 = nn.Dropout(p=args.dropout) 452 | self.conv10 = nn.Sequential(nn.Conv1d(256, 128, kernel_size=1, bias=False), 453 | self.bn10, 454 | nn.LeakyReLU(negative_slope=0.2)) 455 | self.conv11 = nn.Conv1d(128, self.seg_num_all, kernel_size=1, bias=False) 456 | 457 | 458 | def forward(self, x, l, x1, x2, x3): 459 | batch_size = x.size(0) 460 | num_points = x.size(2) 461 | 462 | x = x.max(dim=-1, keepdim=True)[0] # (batch_size, emb_dims, num_points) -> (batch_size, emb_dims, 1) 463 | 464 | if not self.seg_no_class_label: 465 | l = l.view(batch_size, -1, 1) # (batch_size, num_categoties, 1) 466 | l = self.conv7(l) # (batch_size, num_categoties, 1) -> (batch_size, 64, 1) 467 | 468 | x = torch.cat((x, l), dim=1) # (batch_size, emb_dims+64, 1) 469 | x = x.repeat(1, 1, num_points) # (batch_size, emb_dims+64, num_points) 470 | x = torch.cat((x, x1, x2, x3), dim=1) # (batch_size, emb_dims+64+64*3, num_points) 471 | 472 | else: 473 | x = x.repeat(1, 1, num_points) # (batch_size, emb_dims, num_points) 474 | x = torch.cat((x, x1, x2, x3), dim=1) # (batch_size, emb_dims+64*3, num_points) 475 | 476 | x = self.conv8(x) # (batch_size, emb_dims+64+64*3 or emb_dims+64*3, num_points) -> (batch_size, 256, num_points) 477 | x = self.dp1(x) 478 | x = self.conv9(x) # (batch_size, 256, num_points) -> (batch_size, 256, num_points) 479 | x = self.dp2(x) 480 | x = self.conv10(x) # (batch_size, 256, num_points) -> (batch_size, 128, num_points) 481 | 482 | if self.loss == 'softmax': 483 | x = self.conv11(x) # (batch_size, 128, num_points) -> (batch_size, seg_num_all, num_points) 484 | 485 | return x 486 | 487 | 488 | class ReconstructionNet(nn.Module): 489 | def __init__(self, args): 490 | super(ReconstructionNet, self).__init__() 491 | if args.encoder == 'foldnet': 492 | self.encoder = FoldNet_Encoder(args) 493 | elif args.encoder == 'dgcnn_cls': 494 | self.encoder = DGCNN_Cls_Encoder(args) 495 | elif args.encoder == 'dgcnn_seg': 496 | self.encoder = DGCNN_Seg_Encoder(args) 497 | self.decoder = FoldNet_Decoder(args) 498 | self.loss = ChamferLoss() 499 | 500 | def forward(self, input): 501 | feature = self.encoder(input) 502 | output = self.decoder(feature) 503 | return output, feature 504 | 505 | def get_parameter(self): 506 | return list(self.encoder.parameters()) + list(self.decoder.parameters()) 507 | 508 | def get_loss(self, input, output): 509 | # input shape (batch_size, 2048, 3) 510 | # output shape (batch_size, 2025, 3) 511 | return self.loss(input, output) 512 | 513 | 514 | class ClassificationNet(nn.Module): 515 | def __init__(self, args): 516 | super(ClassificationNet, self).__init__() 517 | self.is_eval = args.eval 518 | if args.encoder == 'foldnet': 519 | self.encoder = FoldNet_Encoder(args) 520 | elif args.encoder == 'dgcnn_cls': 521 | self.encoder = DGCNN_Cls_Encoder(args) 522 | elif args.encoder == 'dgcnn_seg': 523 | self.encoder = DGCNN_Seg_Encoder(args) 524 | if not self.is_eval: 525 | self.classifier = DGCNN_Cls_Classifier(args) 526 | self.loss = CrossEntropyLoss() 527 | 528 | def forward(self, input): 529 | feature, latent = self.encoder(input) 530 | if not self.is_eval: 531 | output = self.classifier(latent) 532 | return output, feature 533 | else: 534 | return feature 535 | 536 | def get_parameter(self): 537 | return list(self.encoder.parameters()) + list(self.classifier.parameters()) 538 | 539 | def get_loss(self, preds, gts): 540 | # preds shape (batch_size, feat_dims) 541 | # gts shape (batch_size) 542 | return self.loss(preds, gts) 543 | 544 | 545 | class SegmentationNet(nn.Module): 546 | def __init__(self, args, seg_num_all=50): 547 | super(SegmentationNet, self).__init__() 548 | self.is_eval = args.eval 549 | self.loss_type = args.loss 550 | if args.encoder == 'foldnet': 551 | self.encoder = FoldNet_Encoder(args) 552 | elif args.encoder == 'dgcnn_cls': 553 | self.encoder = DGCNN_Cls_Encoder(args) 554 | elif args.encoder == 'dgcnn_seg': 555 | self.encoder = DGCNN_Seg_Encoder(args) 556 | if not self.is_eval: 557 | self.segmenter = DGCNN_Seg_Segmenter(args, seg_num_all) 558 | if self.loss_type == 'softmax': 559 | self.loss = CrossEntropyLoss() 560 | elif self.loss_type == 'triplet': 561 | self.loss = TripletLoss(margin=args.margin) 562 | 563 | def forward(self, input, label=None): 564 | feature, latent, x1, x2, x3 = self.encoder(input) 565 | if not self.is_eval: 566 | output = self.segmenter(latent, label, x1, x2, x3) 567 | return output, feature 568 | else: 569 | return feature 570 | 571 | def get_parameter(self): 572 | return list(self.encoder.parameters()) + list(self.segmenter.parameters()) 573 | 574 | def get_loss(self, preds, gts, new_device=None): 575 | # preds shape (batch_size, feat_dims) 576 | # gts shape (batch_size) 577 | if self.loss_type == 'softmax': 578 | return self.loss(preds, gts) 579 | elif self.loss_type == 'triplet': 580 | return self.loss(preds, gts, new_device) 581 | --------------------------------------------------------------------------------