├── .gitignore
├── sphere.npy
├── gaussian.npy
├── image
├── DGCNN.jpg
├── acc1.png
├── acc2.png
├── acc3.png
├── acc4.png
├── acc5.png
├── shapenetpartpart_train0_chair.png
├── shapenetpartpart_train58_airplane.png
└── shapenetpartpart_train38_skateboard.png
├── tensorboard
├── Segment_dgcnn_seg_k40_1024_b32
│ └── events.out.tfevents.1579308861.server231
├── Classify_dgcnn_cls_k40_1024_b32
│ └── events.out.tfevents.1579684482.server231
├── Classify_dgcnn_seg_k40_1024_b32
│ └── events.out.tfevents.1579565090.server231
├── Reconstruct_dgcnn_cls_k20_1024_b16
│ └── events.out.tfevents.1579401032.server231
├── Reconstruct_dgcnn_seg_k40_1024_b32
│ └── events.out.tfevents.1579272092.server231
├── Classify_dgcnn_cls_k40_1024_b32_part
│ └── events.out.tfevents.1579698812.server231
├── Segment_dgcnn_seg_k40_1024_b32_nolabel
│ └── events.out.tfevents.1579336965.server231
└── Reconstruct_dgcnn_cls_k20_1024_b16_part
│ └── events.out.tfevents.1579685815.server231
├── LICENSE
├── utils.py
├── svm.py
├── main.py
├── dataset.py
├── loss.py
├── inference.py
├── reconstruction.py
├── README.md
├── classification.py
├── visualization.py
├── segmentation.py
└── model.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | */.DS_Store
3 |
--------------------------------------------------------------------------------
/sphere.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/sphere.npy
--------------------------------------------------------------------------------
/gaussian.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/gaussian.npy
--------------------------------------------------------------------------------
/image/DGCNN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/DGCNN.jpg
--------------------------------------------------------------------------------
/image/acc1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/acc1.png
--------------------------------------------------------------------------------
/image/acc2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/acc2.png
--------------------------------------------------------------------------------
/image/acc3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/acc3.png
--------------------------------------------------------------------------------
/image/acc4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/acc4.png
--------------------------------------------------------------------------------
/image/acc5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/acc5.png
--------------------------------------------------------------------------------
/image/shapenetpartpart_train0_chair.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/shapenetpartpart_train0_chair.png
--------------------------------------------------------------------------------
/image/shapenetpartpart_train58_airplane.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/shapenetpartpart_train58_airplane.png
--------------------------------------------------------------------------------
/image/shapenetpartpart_train38_skateboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/image/shapenetpartpart_train38_skateboard.png
--------------------------------------------------------------------------------
/tensorboard/Segment_dgcnn_seg_k40_1024_b32/events.out.tfevents.1579308861.server231:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Segment_dgcnn_seg_k40_1024_b32/events.out.tfevents.1579308861.server231
--------------------------------------------------------------------------------
/tensorboard/Classify_dgcnn_cls_k40_1024_b32/events.out.tfevents.1579684482.server231:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Classify_dgcnn_cls_k40_1024_b32/events.out.tfevents.1579684482.server231
--------------------------------------------------------------------------------
/tensorboard/Classify_dgcnn_seg_k40_1024_b32/events.out.tfevents.1579565090.server231:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Classify_dgcnn_seg_k40_1024_b32/events.out.tfevents.1579565090.server231
--------------------------------------------------------------------------------
/tensorboard/Reconstruct_dgcnn_cls_k20_1024_b16/events.out.tfevents.1579401032.server231:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Reconstruct_dgcnn_cls_k20_1024_b16/events.out.tfevents.1579401032.server231
--------------------------------------------------------------------------------
/tensorboard/Reconstruct_dgcnn_seg_k40_1024_b32/events.out.tfevents.1579272092.server231:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Reconstruct_dgcnn_seg_k40_1024_b32/events.out.tfevents.1579272092.server231
--------------------------------------------------------------------------------
/tensorboard/Classify_dgcnn_cls_k40_1024_b32_part/events.out.tfevents.1579698812.server231:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Classify_dgcnn_cls_k40_1024_b32_part/events.out.tfevents.1579698812.server231
--------------------------------------------------------------------------------
/tensorboard/Segment_dgcnn_seg_k40_1024_b32_nolabel/events.out.tfevents.1579336965.server231:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Segment_dgcnn_seg_k40_1024_b32_nolabel/events.out.tfevents.1579336965.server231
--------------------------------------------------------------------------------
/tensorboard/Reconstruct_dgcnn_cls_k20_1024_b16_part/events.out.tfevents.1579685815.server231:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/antao97/PointCloudSegmentation/HEAD/tensorboard/Reconstruct_dgcnn_cls_k20_1024_b16_part/events.out.tfevents.1579685815.server231
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 An Tao
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | @Author: An Tao
5 | @Contact: ta19@mails.tsinghua.edu.cn
6 | @File: utils.py
7 | @Time: 2020/1/2 10:26 AM
8 | """
9 |
10 | import os
11 | import sys
12 |
13 |
14 | class AverageMeter(object):
15 | """Computes and stores the average and current value"""
16 | def __init__(self):
17 | self.reset()
18 |
19 | def reset(self):
20 | self.val = 0
21 | self.avg = 0
22 | self.sum = 0
23 | self.count = 0
24 |
25 | def update(self, val, n=1):
26 | self.val = val
27 | self.sum += val * n
28 | self.count += n
29 | self.avg = self.sum / self.count
30 |
31 |
32 | def learning_rate_decay(optimizer, t, lr_0):
33 | for param_group in optimizer.param_groups:
34 | lr = lr_0 / np.sqrt(1 + lr_0 * param_group['weight_decay'] * t)
35 | param_group['lr'] = lr
36 |
37 |
38 | class Logger(object):
39 | def __init__(self, fpath=None):
40 | self.console = sys.stdout
41 | self.file = None
42 | if fpath is not None:
43 | self.file = open(fpath, 'w')
44 |
45 | def __del__(self):
46 | self.close()
47 |
48 | def __enter__(self):
49 | pass
50 |
51 | def __exit__(self, *args):
52 | self.close()
53 |
54 | def write(self, msg):
55 | self.console.write(msg)
56 | if self.file is not None:
57 | self.file.write(msg)
58 |
59 | def flush(self):
60 | self.console.flush()
61 | if self.file is not None:
62 | self.file.flush()
63 | os.fsync(self.file.fileno())
64 |
65 | def close(self):
66 | self.console.close()
67 | if self.file is not None:
68 | self.file.close()
--------------------------------------------------------------------------------
/svm.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | @Author: An Tao
5 | @Contact: ta19@mails.tsinghua.edu.cn
6 | @File: svm.py
7 | @Time: 2020/1/2 10:26 AM
8 | """
9 |
10 | import os
11 | import h5py
12 | import numpy as np
13 | from glob import glob
14 | from sklearn.svm import LinearSVC
15 |
16 |
17 | class SVM(object):
18 | def __init__(self, feature_dir):
19 | self.feature_dir = feature_dir
20 |
21 | self.train_path = glob(os.path.join(self.feature_dir, 'train*.h5'))
22 | self.test_path = glob(os.path.join(self.feature_dir, 'test*.h5'))
23 |
24 | print("Loading feature dataset...")
25 | train_data = []
26 | train_label = []
27 | for path in self.train_path:
28 | f = h5py.File(path, 'r+')
29 | data = f['data'][:].astype('float32')
30 | label = f['label'][:].astype('int64')
31 | f.close()
32 | train_data.append(data)
33 | train_label.append(label)
34 | self.train_data = np.concatenate(train_data, axis=0)
35 | self.train_label = np.concatenate(train_label, axis=0)
36 | print("Training set size:", np.size(self.train_data, 0))
37 |
38 | test_data = []
39 | test_label = []
40 | for path in self.test_path:
41 | f = h5py.File(path, 'r+')
42 | data = f['data'][:].astype('float32')
43 | label = f['label'][:].astype('int64')
44 | f.close()
45 | test_data.append(data)
46 | test_label.append(label)
47 | self.test_data = np.concatenate(test_data, axis=0)
48 | self.test_label = np.concatenate(test_label, axis=0)
49 | print("Testing set size:", np.size(self.test_data, 0))
50 |
51 | def run(self):
52 | clf = LinearSVC(random_state=0)
53 | clf.fit(self.train_data, self.train_label)
54 | result = clf.predict(self.test_data)
55 | accuracy = np.sum(result==self.test_label).astype(float) / np.size(self.test_label)
56 | print("Transfer linear SVM accuracy: {:.2f}%".format(accuracy*100))
57 |
58 |
59 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | @Author: An Tao
5 | @Contact: ta19@mails.tsinghua.edu.cn
6 | @File: main.py
7 | @Time: 2020/1/2 10:26 AM
8 | """
9 |
10 | import argparse
11 |
12 | from reconstruction import Reconstruction
13 | from classification import Classification
14 | from segmentation import Segmentation
15 | from inference import Inference
16 | from svm import SVM
17 |
18 |
19 | def get_parser():
20 | parser = argparse.ArgumentParser(description='Unsupervised Point Cloud Feature Learning')
21 | parser.add_argument('--exp_name', type=str, default=None, metavar='N',
22 | help='Name of the experiment')
23 | parser.add_argument('--task', type=str, default='reconstruct', metavar='N',
24 | choices=['reconstruct', 'classify', 'segment'],
25 | help='Experiment task, [reconstruct, classify, segment]')
26 | parser.add_argument('--seg_no_class_label', action='store_true',
27 | help='Do not use class labels in segmentation')
28 | parser.add_argument('--loss', type=str, default='softmax', metavar='N',
29 | choices=['softmax', 'triplet'],
30 | help='Loss to use, [softmax, triplet]')
31 | parser.add_argument('--margin', type=float, default=None,
32 | help='Margin for triplet loss')
33 | parser.add_argument('--encoder', type=str, default='foldingnet', metavar='N',
34 | choices=['foldnet', 'dgcnn_cls', 'dgcnn_seg'],
35 | help='Encoder to use, [foldingnet, dgcnn_cls, dgcnn_seg]')
36 | parser.add_argument('--dropout', type=float, default=0.5,
37 | help='Dropout rate')
38 | parser.add_argument('--feat_dims', type=int, default=512, metavar='N',
39 | help='Number of dims for feature ')
40 | parser.add_argument('--k', type=int, default=None, metavar='N',
41 | help='Num of nearest neighbors to use for KNN')
42 | parser.add_argument('--shape', type=str, default='sphere', metavar='N',
43 | choices=['plane', 'sphere', 'gaussian'],
44 | help='Shape of points to input decoder, [plane, sphere, gaussian]')
45 | parser.add_argument('--dataset', type=str, default='shapenetcorev2', metavar='N',
46 | choices=['shapenetpart', 'modelnet40', 'modelnet10', 'shapenetpartpart'],
47 | help='Dataset to use, [shapenetpart, modelnet40, modelnet10, shapenetpartpart]')
48 | parser.add_argument('--class_choice', type=str, default=None, metavar='N',
49 | choices=['airplane', 'bag', 'cap', 'car', 'chair',
50 | 'earphone', 'guitar', 'knife', 'lamp', 'laptop',
51 | 'motorbike', 'mug', 'pistol', 'rocket', 'skateboard', 'table'])
52 | parser.add_argument('--no_scheduler', action='store_true',
53 | help='Do not use scheduler in training')
54 | parser.add_argument('--use_rotate', action='store_true',
55 | help='Rotate the pointcloud before training')
56 | parser.add_argument('--use_translate', action='store_true',
57 | help='Translate the pointcloud before training')
58 | parser.add_argument('--use_jitter', action='store_true',
59 | help='Jitter the pointcloud before training')
60 | parser.add_argument('--dataset_root', type=str, default='../dataset', help="Dataset root path")
61 | parser.add_argument('--gpu', type=str, help='Id of gpu device to be used', default='0')
62 | parser.add_argument('--batch_size', type=int, default=16, metavar='batch_size',
63 | help='Size of batch)')
64 | parser.add_argument('--workers', type=int, help='Number of data loading workers', default=16)
65 | parser.add_argument('--epochs', type=int, default=None, metavar='N',
66 | help='Number of episode to train ')
67 | parser.add_argument('--snapshot_interval', type=int, default=10, metavar='N',
68 | help='Save snapshot interval ')
69 | parser.add_argument('--no_cuda', action='store_true',
70 | help='Enables CUDA training')
71 | parser.add_argument('--eval', action='store_true',
72 | help='Evaluate the model')
73 | parser.add_argument('--num_points', type=int, default=2048,
74 | help='Num of points to use')
75 | parser.add_argument('--model_path', type=str, default='', metavar='N',
76 | help='Path to load model')
77 | args = parser.parse_args()
78 | return args
79 |
80 |
81 | if __name__ == '__main__':
82 | args = get_parser()
83 | if args.eval == False:
84 | if args.task == 'reconstruct':
85 | reconstruction = Reconstruction(args)
86 | reconstruction.run()
87 | elif args.task == 'classify':
88 | classification = Classification(args)
89 | classification.run()
90 | elif args.task == 'segment':
91 | segmentation = Segmentation(args)
92 | segmentation.run()
93 | else:
94 | inference = Inference(args)
95 | feature_dir = inference.run()
96 | svm = SVM(feature_dir)
97 | svm.run()
98 |
--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | @Author: An Tao
5 | @Contact: ta19@mails.tsinghua.edu.cn
6 | @File: dataset.py
7 | @Time: 2020/1/2 10:26 AM
8 | """
9 |
10 | import os
11 | import torch
12 | import json
13 | import h5py
14 | from glob import glob
15 | import numpy as np
16 | import torch.utils.data as data
17 |
18 |
19 | shapenetpart_seg_num = [4, 2, 2, 4, 4, 3, 3, 2, 4, 2, 6, 2, 3, 3, 3, 3]
20 | shapenetpart_seg_start_index = [0, 4, 6, 8, 12, 16, 19, 22, 24, 28, 30, 36, 38, 41, 44, 47]
21 |
22 |
23 | def translate_pointcloud(pointcloud):
24 | xyz1 = np.random.uniform(low=2./3., high=3./2., size=[3])
25 | xyz2 = np.random.uniform(low=-0.2, high=0.2, size=[3])
26 |
27 | translated_pointcloud = np.add(np.multiply(pointcloud, xyz1), xyz2).astype('float32')
28 | return translated_pointcloud
29 |
30 |
31 | def jitter_pointcloud(pointcloud, sigma=0.01, clip=0.02):
32 | N, C = pointcloud.shape
33 | pointcloud += np.clip(sigma * np.random.randn(N, C), -1*clip, clip)
34 | return pointcloud
35 |
36 |
37 | def rotate_pointcloud(pointcloud):
38 | theta = np.pi*2 * np.random.choice(24) / 24
39 | rotation_matrix = np.array([[np.cos(theta), -np.sin(theta)],[np.sin(theta), np.cos(theta)]])
40 | pointcloud[:,[0,2]] = pointcloud[:,[0,2]].dot(rotation_matrix) # random rotation (x,z)
41 | return pointcloud
42 |
43 |
44 | class Dataset(data.Dataset):
45 | def __init__(self, root, dataset_name='modelnet40', class_choice=None,
46 | num_points=2048, split='train', load_name=False,
47 | segmentation=False, random_rotate=False, random_jitter=False,
48 | random_translate=False):
49 |
50 | assert dataset_name.lower() in ['shapenetcorev2', 'shapenetpart',
51 | 'modelnet10', 'modelnet40', 'shapenetpartpart']
52 | assert num_points <= 2048
53 |
54 | if dataset_name in ['shapenetcorev2', 'shapenetpart', 'shapenetpartpart']:
55 | assert split.lower() in ['train', 'test', 'val', 'trainval', 'all']
56 | else:
57 | assert split.lower() in ['train', 'test', 'all']
58 |
59 | if dataset_name not in ['shapenetcorev2', 'shapenetpart', 'shapenetpartpart'] and segmentation == True:
60 | raise AssertionError
61 |
62 | self.root = os.path.join(root, dataset_name + '_' + '*hdf5_2048')
63 | self.dataset_name = dataset_name
64 | self.class_choice = class_choice
65 | self.num_points = num_points
66 | self.split = split
67 | self.load_name = load_name
68 | if self.dataset_name == 'shapenetpartpart':
69 | self.segmentation = True
70 | else:
71 | self.segmentation = segmentation
72 | self.random_rotate = random_rotate
73 | self.random_jitter = random_jitter
74 | self.random_translate = random_translate
75 |
76 | self.path_h5py_all = []
77 | self.path_name_all = []
78 | self.path_file_all = []
79 |
80 | if self.split in ['train','trainval','all']:
81 | self.get_path('train')
82 | if self.dataset_name in ['shapenetcorev2', 'shapenetpart', 'shapenetpartpart']:
83 | if self.split in ['val','trainval','all']:
84 | self.get_path('val')
85 | if self.split in ['test', 'all']:
86 | self.get_path('test')
87 |
88 | self.path_h5py_all.sort()
89 | data, label, seg = self.load_h5py(self.path_h5py_all)
90 |
91 | if self.load_name or self.class_choice != None:
92 | self.path_name_all.sort()
93 | self.name = self.load_json(self.path_name_all) # load label name
94 |
95 | self.data = np.concatenate(data, axis=0)
96 | self.label = np.concatenate(label, axis=0)
97 | if self.segmentation:
98 | self.seg = np.concatenate(seg, axis=0)
99 |
100 | if self.class_choice != None:
101 | indices = (self.name == class_choice).squeeze()
102 | self.data = self.data[indices]
103 | self.label = self.label[indices]
104 | if self.segmentation:
105 | self.seg = self.seg[indices]
106 | self.seg_num_all = shapenetpart_seg_num[id_choice]
107 | self.seg_start_index = shapenetpart_seg_start_index[id_choice]
108 | elif self.segmentation:
109 | self.seg_num_all = 50
110 | self.seg_start_index = 0
111 |
112 | def get_path(self, type):
113 | path_h5py = os.path.join(self.root, '*%s*.h5'%type)
114 | self.path_h5py_all += glob(path_h5py)
115 | if self.load_name:
116 | path_json = os.path.join(self.root, '%s*_id2name.json'%type)
117 | self.path_name_all += glob(path_json)
118 | return
119 |
120 | def load_h5py(self, path):
121 | all_data = []
122 | all_label = []
123 | all_seg = []
124 | for h5_name in path:
125 | f = h5py.File(h5_name, 'r+')
126 | data = f['data'][:].astype('float32')
127 | label = f['label'][:].astype('int64')
128 | if self.segmentation:
129 | seg = f['seg'][:].astype('int64')
130 | f.close()
131 | all_data.append(data)
132 | all_label.append(label)
133 | if self.segmentation:
134 | all_seg.append(seg)
135 | return all_data, all_label, all_seg
136 |
137 | def load_json(self, path):
138 | all_data = []
139 | for json_name in path:
140 | j = open(json_name, 'r+')
141 | data = json.load(j)
142 | all_data += data
143 | return all_data
144 |
145 | def __getitem__(self, item):
146 | point_set = self.data[item][:self.num_points]
147 | label = self.label[item]
148 | if self.load_name:
149 | name = self.name[item] # get label name
150 |
151 | if self.random_rotate:
152 | point_set = rotate_pointcloud(point_set)
153 | if self.random_jitter:
154 | point_set = jitter_pointcloud(point_set)
155 | if self.random_translate:
156 | point_set = translate_pointcloud(point_set)
157 |
158 | # convert numpy array to pytorch Tensor
159 | point_set = torch.from_numpy(point_set)
160 | label = torch.from_numpy(np.array([label]).astype(np.int64))
161 | label = label.squeeze(0)
162 |
163 | if self.segmentation:
164 | seg = self.seg[item]
165 | seg = torch.from_numpy(seg)
166 | if self.dataset_name == 'shapenetpartpart':
167 | return point_set, seg.unsqueeze(1)[0]
168 | else:
169 | return point_set, label, seg
170 | else:
171 | return point_set, label
172 |
173 | def __len__(self):
174 | return self.data.shape[0]
--------------------------------------------------------------------------------
/loss.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | @Author: An Tao
5 | @Contact: ta19@mails.tsinghua.edu.cn
6 | @File: loss.py
7 | @Time: 2020/1/2 10:26 AM
8 | """
9 |
10 | import copy
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 | from torch.autograd import Variable
15 |
16 |
17 | def batch_pairwise_dist(x, y):
18 | bs, num_points_x, points_dim = x.size()
19 | _, num_points_y, _ = y.size()
20 | xx = torch.bmm(x, x.transpose(2, 1))
21 | yy = torch.bmm(y, y.transpose(2, 1))
22 | zz = torch.bmm(x, y.transpose(2, 1))
23 | diag_ind_x = torch.arange(0, num_points_x)
24 | diag_ind_y = torch.arange(0, num_points_y)
25 | if x.get_device() != -1:
26 | diag_ind_x = diag_ind_x.cuda(x.get_device())
27 | diag_ind_y = diag_ind_y.cuda(x.get_device())
28 | rx = xx[:, diag_ind_x, diag_ind_x].unsqueeze(1).expand_as(zz.transpose(2, 1))
29 | ry = yy[:, diag_ind_y, diag_ind_y].unsqueeze(1).expand_as(zz)
30 | P = (rx.transpose(2, 1) + ry - 2 * zz)
31 | return P
32 |
33 |
34 | class ChamferLoss(nn.Module):
35 | def __init__(self):
36 | super(ChamferLoss, self).__init__()
37 | self.use_cuda = torch.cuda.is_available()
38 |
39 | def forward(self, preds, gts):
40 | P = batch_pairwise_dist(gts, preds)
41 | mins, _ = torch.min(P, 1)
42 | loss_1 = torch.sum(mins)
43 | mins, _ = torch.min(P, 2)
44 | loss_2 = torch.sum(mins)
45 | return loss_1 + loss_2
46 |
47 |
48 | class CrossEntropyLoss(nn.Module):
49 | def __init__(self, smoothing=True):
50 | super(CrossEntropyLoss, self).__init__()
51 | self.smoothing = smoothing
52 |
53 | def forward(self, preds, gts):
54 | gts = gts.contiguous().view(-1)
55 |
56 | if self.smoothing:
57 | eps = 0.2
58 | n_class = preds.size(1)
59 |
60 | one_hot = torch.zeros_like(preds).scatter(1, gts.view(-1, 1), 1)
61 | one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1)
62 | log_prb = F.log_softmax(preds, dim=1)
63 |
64 | loss = -(one_hot * log_prb).sum(dim=1).mean()
65 | else:
66 | loss = F.cross_entropy(preds, gts, reduction='mean')
67 |
68 | return loss
69 |
70 |
71 | class TripletLoss(nn.Module):
72 | def __init__(self, margin=None, normalize_feature=True):
73 | super(TripletLoss, self).__init__()
74 | self.margin = margin
75 | self.normalize_feature = normalize_feature
76 | if self.margin is not None:
77 | self.ranking_loss = nn.MarginRankingLoss(margin=margin)
78 | else:
79 | self.ranking_loss = nn.SoftMarginLoss()
80 |
81 | def normalize(self, x, axis=-1):
82 | """Normalizing to unit length along the specified dimension.
83 | Args:
84 | x: pytorch Variable
85 | Returns:
86 | x: pytorch Variable, same shape as input
87 | """
88 | x = 1. * x / (torch.norm(x, 2, axis, keepdim=True).expand_as(x) + 1e-12)
89 | return x
90 |
91 | def euclidean_dist(self, x, y):
92 | """
93 | Args:
94 | x: pytorch Variable, with shape [m, d]
95 | y: pytorch Variable, with shape [n, d]
96 | Returns:
97 | dist: pytorch Variable, with shape [m, n]
98 | """
99 | m, n = x.size(0), y.size(0)
100 | xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n)
101 | yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t()
102 | dist = xx + yy
103 | dist.addmm_(1, -2, x, y.t())
104 | dist = dist.clamp(min=1e-12).sqrt() # for numerical stability
105 | return dist
106 |
107 | def hard_example_mining(self, dist_mat, labels, return_inds=False):
108 | """For each anchor, find the hardest positive and negative sample.
109 | Args:
110 | dist_mat: pytorch Variable, pair wise distance between samples, shape [B, N, N]
111 | labels: pytorch LongTensor, with shape [B, N]
112 | return_inds: whether to return the indices. Save time if `False`(?)
113 | Returns:
114 | dist_ap: pytorch Variable, distance(anchor, positive); shape [N]
115 | dist_an: pytorch Variable, distance(anchor, negative); shape [N]
116 | p_inds: pytorch LongTensor, with shape [N];
117 | indices of selected hard positive samples; 0 <= p_inds[i] <= N - 1
118 | n_inds: pytorch LongTensor, with shape [N];
119 | indices of selected hard negative samples; 0 <= n_inds[i] <= N - 1
120 | NOTE: Only consider the case in which all labels have same num of samples,
121 | thus we can cope with all anchors in parallel.
122 | """
123 |
124 | assert len(dist_mat.size()) == 3
125 | assert dist_mat.size(1) == dist_mat.size(2)
126 | B = dist_mat.size(0)
127 | N = dist_mat.size(1)
128 |
129 | # shape [B, N, N]
130 | is_pos = labels.unsqueeze(2).expand(B, N, N).eq(labels.unsqueeze(2).expand(B, N, N).transpose(2,1))
131 | is_neg = labels.unsqueeze(2).expand(B, N, N).ne(labels.unsqueeze(2).expand(B, N, N).transpose(2,1))
132 |
133 | # `dist_ap` means distance(anchor, positive)
134 | # both `dist_ap` and `relative_p_inds` with shape [B, N, 1]
135 | dist_mat_pos = torch.zeros(dist_mat.size(), dtype=torch.float32, device=dist_mat.device)
136 | dist_mat_pos[is_pos] = dist_mat[is_pos]
137 | dist_ap, relative_p_inds = torch.max(dist_mat_pos, 2, keepdim=True)
138 | # `dist_an` means distance(anchor, negative)
139 | # both `dist_an` and `relative_n_inds` with shape [B, N, 1]
140 | dist_mat_neg = torch.zeros(dist_mat.size(), dtype=torch.float32, device=dist_mat.device).fill_(1000)
141 | dist_mat_neg[is_neg] = dist_mat[is_neg]
142 | dist_an, relative_n_inds = torch.min(dist_mat_neg, 2, keepdim=True)
143 |
144 | # shape [B, N]
145 | dist_ap = dist_ap.squeeze(2)
146 | dist_an = dist_an.squeeze(2)
147 |
148 | return dist_ap, dist_an
149 |
150 | def forward(self, preds, gts, new_device):
151 | """
152 | Args:
153 | preds: pytorch Variable, shape [B, N, C]
154 | gts: pytorch LongTensor, with shape [B, N]
155 | Returns:
156 | loss: pytorch Variable, with shape [1]
157 | p_inds: pytorch LongTensor, with shape [N];
158 | indices of selected hard positive samples; 0 <= p_inds[i] <= N - 1
159 | n_inds: pytorch LongTensor, with shape [N];
160 | indices of selected hard negative samples; 0 <= n_inds[i] <= N - 1
161 | ==================
162 | For Debugging, etc
163 | ==================
164 | dist_ap: pytorch Variable, distance(anchor, positive); shape [N]
165 | dist_an: pytorch Variable, distance(anchor, negative); shape [N]
166 | dist_mat: pytorch Variable, pairwise euclidean distance; shape [N, N]
167 | """
168 | if self.normalize_feature:
169 | preds = self.normalize(preds, axis=-1)
170 | preds = preds.cuda(new_device)
171 | gts = gts.cuda(new_device)
172 | # shape [B, N, N]
173 | dist_mat = batch_pairwise_dist(preds, preds)
174 | dist_mat = dist_mat.clamp(min=1e-12).sqrt() # for numerical stability
175 | dist_ap, dist_an = self.hard_example_mining(dist_mat, gts)
176 | y = Variable(dist_an.data.new().resize_as_(dist_an.data).fill_(1))
177 | if self.margin is not None:
178 | loss = self.ranking_loss(dist_an, dist_ap, y)
179 | else:
180 | loss = self.ranking_loss(dist_an - dist_ap, y)
181 | return loss, dist_ap, dist_an
182 |
183 |
--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | @Author: An Tao
5 | @Contact: ta19@mails.tsinghua.edu.cn
6 | @File: inference.py
7 | @Time: 2020/1/2 10:26 AM
8 | """
9 |
10 | import os
11 | import sys
12 | import time
13 | import shutil
14 | import torch
15 | import numpy as np
16 | import h5py
17 |
18 | from tensorboardX import SummaryWriter
19 |
20 | from model import ReconstructionNet, ClassificationNet, SegmentationNet
21 | from dataset import Dataset
22 | from utils import Logger
23 |
24 |
25 | class Inference(object):
26 | def __init__(self, args):
27 | self.batch_size = args.batch_size
28 | self.no_cuda = args.no_cuda
29 | self.task = args.task
30 |
31 | # create exp directory
32 | file = [f for f in args.model_path.split('/')]
33 | if args.exp_name != None:
34 | self.experiment_id = args.exp_name
35 | else:
36 | self.experiment_id = time.strftime('%m%d%H%M%S')
37 | cache_root = 'cache/%s' % self.experiment_id
38 | os.makedirs(cache_root, exist_ok=True)
39 | self.feature_dir = os.path.join(cache_root, 'features/')
40 | sys.stdout = Logger(os.path.join(cache_root, 'log.txt'))
41 |
42 | # check directory
43 | if not os.path.exists(self.feature_dir):
44 | os.makedirs(self.feature_dir)
45 | else:
46 | shutil.rmtree(self.feature_dir)
47 | os.makedirs(self.feature_dir)
48 |
49 | # print args
50 | print(str(args))
51 |
52 | # get gpu id
53 | gids = ''.join(args.gpu.split())
54 | self.gpu_ids = [int(gid) for gid in gids.split(',')]
55 | self.first_gpu = self.gpu_ids[0]
56 |
57 | # generate dataset
58 | self.infer_dataset_train = Dataset(
59 | root=args.dataset_root,
60 | dataset_name=args.dataset,
61 | split='train',
62 | num_points=args.num_points,
63 | )
64 | self.infer_dataset_test = Dataset(
65 | root=args.dataset_root,
66 | dataset_name=args.dataset,
67 | split='test',
68 | num_points=args.num_points,
69 | )
70 | self.infer_loader_train = torch.utils.data.DataLoader(
71 | self.infer_dataset_train,
72 | batch_size=args.batch_size,
73 | shuffle=False,
74 | num_workers=args.workers
75 | )
76 | self.infer_loader_test = torch.utils.data.DataLoader(
77 | self.infer_dataset_test,
78 | batch_size=args.batch_size,
79 | shuffle=False,
80 | num_workers=args.workers
81 | )
82 | print("Inference set size (train):", self.infer_loader_train.dataset.__len__())
83 | print("Inference set size (test):", self.infer_loader_test.dataset.__len__())
84 |
85 | # initialize model
86 | if args.task == "reconstruct":
87 | self.model = ReconstructionNet(args)
88 | elif args.task == "classify":
89 | self.model = ClassificationNet(args)
90 | elif args.task == "segment":
91 | self.model = SegmentationNet(args)
92 | if args.model_path != '':
93 | self._load_pretrain(args.model_path)
94 |
95 | # load model to gpu
96 | if not args.no_cuda:
97 | if len(self.gpu_ids) != 1: # multiple gpus
98 | self.model = torch.nn.DataParallel(self.model.cuda(self.first_gpu), self.gpu_ids)
99 | else:
100 | self.model = self.model.cuda(self.gpu_ids[0])
101 |
102 | def run(self):
103 | self.model.eval()
104 |
105 | # generate train set for SVM
106 | loss_buf = []
107 | feature_train = []
108 | lbs_train = []
109 | n = 0
110 | for iter, (pts, lbs) in enumerate(self.infer_loader_train):
111 | if not self.no_cuda:
112 | pts = pts.cuda(self.first_gpu)
113 | lbs = lbs.cuda(self.first_gpu)
114 | if self.task == "reconstruct":
115 | output, feature = self.model(pts)
116 | elif self.task in ["classify", "segment"]:
117 | feature = self.model(pts)
118 | feature_train.append(feature.detach().cpu().numpy().squeeze(1))
119 | lbs_train.append(lbs.cpu().numpy().squeeze(1))
120 | if ((iter+1) * self.batch_size % 2048) == 0 \
121 | or (iter+1) == len(self.infer_loader_train):
122 | feature_train = np.concatenate(feature_train, axis=0)
123 | lbs_train = np.concatenate(lbs_train, axis=0)
124 | f = h5py.File(os.path.join(self.feature_dir, 'train' + str(n) + '.h5'),'w')
125 | f['data'] = feature_train
126 | f['label'] = lbs_train
127 | f.close()
128 | print("Train set {} for SVM saved.".format(n))
129 | feature_train = []
130 | lbs_train = []
131 | n += 1
132 | if self.task == "reconstruct":
133 | if len(self.gpu_ids) != 1: # multiple gpus
134 | loss = self.model.module.get_loss(pts, output)
135 | else:
136 | loss = self.model.get_loss(pts, output)
137 | loss_buf.append(loss.detach().cpu().numpy())
138 | if self.task == "reconstruct":
139 | print(f'Avg loss {np.mean(loss_buf)}')
140 | print("Finish generating train set for SVM.")
141 |
142 | # generate test set for SVM
143 | loss_buf = []
144 | feature_test = []
145 | lbs_test = []
146 | n = 0
147 | for iter, (pts, lbs) in enumerate(self.infer_loader_test):
148 | if not self.no_cuda:
149 | pts = pts.cuda(self.first_gpu)
150 | lbs = lbs.cuda(self.first_gpu)
151 | if self.task == "reconstruct":
152 | output, feature = self.model(pts)
153 | elif self.task in ["classify", "segment"]:
154 | feature = self.model(pts)
155 | feature_test.append(feature.detach().cpu().numpy().squeeze(1))
156 | lbs_test.append(lbs.cpu().numpy().squeeze(1))
157 | if ((iter+1) * self.batch_size % 2048) == 0 \
158 | or (iter+1) == len(self.infer_loader_test):
159 | feature_test = np.concatenate(feature_test, axis=0)
160 | lbs_test = np.concatenate(lbs_test, axis=0)
161 | f = h5py.File(os.path.join(self.feature_dir, 'test' + str(n) + '.h5'),'w')
162 | f['data'] = feature_test
163 | f['label'] = lbs_test
164 | f.close()
165 | print("Test set {} for SVM saved.".format(n))
166 | feature_test = []
167 | lbs_test = []
168 | n += 1
169 | if self.task == "reconstruct":
170 | if len(self.gpu_ids) != 1: # multiple gpus
171 | loss = self.model.module.get_loss(pts, output)
172 | else:
173 | loss = self.model.get_loss(pts, output)
174 | loss_buf.append(loss.detach().cpu().numpy())
175 | if self.task == "reconstruct":
176 | print(f'Avg loss {np.mean(loss_buf)}')
177 | print("Finish generating test set for SVM.")
178 |
179 | return self.feature_dir
180 |
181 |
182 | def _load_pretrain(self, pretrain):
183 | state_dict = torch.load(pretrain, map_location='cpu')
184 | from collections import OrderedDict
185 | new_state_dict = OrderedDict()
186 | for key, val in state_dict.items():
187 | if key[:6] == 'module':
188 | name = key[7:] # remove 'module.'
189 | else:
190 | name = key
191 | if key[:10] == 'classifier':
192 | continue
193 | if key[:9] == 'segmenter':
194 | continue
195 | new_state_dict[name] = val
196 | self.model.load_state_dict(new_state_dict)
197 | print(f"Load model from {pretrain}")
198 |
--------------------------------------------------------------------------------
/reconstruction.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | @Author: An Tao
5 | @Contact: ta19@mails.tsinghua.edu.cn
6 | @File: reconstruction.py
7 | @Time: 2020/1/2 10:26 AM
8 | """
9 |
10 | import os
11 | import sys
12 | import time
13 | import shutil
14 | import torch
15 | import torch.optim as optim
16 | import numpy as np
17 |
18 | from tensorboardX import SummaryWriter
19 |
20 | from model import ReconstructionNet
21 | from dataset import Dataset
22 | from utils import Logger
23 |
24 |
25 | class Reconstruction(object):
26 | def __init__(self, args):
27 | self.dataset_name = args.dataset
28 | if args.epochs != None:
29 | self.epochs = args.epochs
30 | elif args.encoder == 'foldnet':
31 | self.epochs = 278
32 | elif args.encoder == 'dgcnn_cls':
33 | self.epochs = 250
34 | elif args.encoder == 'dgcnn_seg':
35 | self.epochs = 290
36 | self.batch_size = args.batch_size
37 | self.snapshot_interval = args.snapshot_interval
38 | self.no_cuda = args.no_cuda
39 | self.model_path = args.model_path
40 |
41 | # create exp directory
42 | file = [f for f in args.model_path.split('/')]
43 | if args.exp_name != None:
44 | self.experiment_id = "Reconstruct_" + args.exp_name
45 | elif file[-2] == 'models':
46 | self.experiment_id = file[-3]
47 | else:
48 | self.experiment_id = "Reconstruct" + time.strftime('%m%d%H%M%S')
49 | snapshot_root = 'snapshot/%s' % self.experiment_id
50 | tensorboard_root = 'tensorboard/%s' % self.experiment_id
51 | self.save_dir = os.path.join(snapshot_root, 'models/')
52 | self.tboard_dir = tensorboard_root
53 |
54 | # check arguments
55 | if self.model_path == '':
56 | if not os.path.exists(self.save_dir):
57 | os.makedirs(self.save_dir)
58 | else:
59 | choose = input("Remove " + self.save_dir + " ? (y/n)")
60 | if choose == "y":
61 | shutil.rmtree(self.save_dir)
62 | os.makedirs(self.save_dir)
63 | else:
64 | sys.exit(0)
65 | if not os.path.exists(self.tboard_dir):
66 | os.makedirs(self.tboard_dir)
67 | else:
68 | shutil.rmtree(self.tboard_dir)
69 | os.makedirs(self.tboard_dir)
70 | sys.stdout = Logger(os.path.join(snapshot_root, 'log.txt'))
71 | self.writer = SummaryWriter(log_dir=self.tboard_dir)
72 |
73 | # print args
74 | print(str(args))
75 |
76 | # get gpu id
77 | gids = ''.join(args.gpu.split())
78 | self.gpu_ids = [int(gid) for gid in gids.split(',')]
79 | self.first_gpu = self.gpu_ids[0]
80 |
81 | # generate dataset
82 | self.train_dataset = Dataset(
83 | root=args.dataset_root,
84 | dataset_name=args.dataset,
85 | split='all',
86 | num_points=args.num_points,
87 | random_translate=args.use_translate,
88 | random_rotate=True,
89 | random_jitter=args.use_jitter
90 | )
91 | self.train_loader = torch.utils.data.DataLoader(
92 | self.train_dataset,
93 | batch_size=args.batch_size,
94 | shuffle=True,
95 | num_workers=args.workers
96 | )
97 | print("Training set size:", self.train_loader.dataset.__len__())
98 |
99 | # initialize model
100 | self.model = ReconstructionNet(args)
101 | if self.model_path != '':
102 | self._load_pretrain(args.model_path)
103 |
104 | # load model to gpu
105 | if not self.no_cuda:
106 | if len(self.gpu_ids) != 1: # multiple gpus
107 | self.model = torch.nn.DataParallel(self.model.cuda(self.first_gpu), self.gpu_ids)
108 | else:
109 | self.model = self.model.cuda(self.gpu_ids[0])
110 |
111 | # initialize optimizer
112 | self.parameter = self.model.parameters()
113 | self.optimizer = optim.Adam(self.parameter, lr=0.0001*16/args.batch_size, betas=(0.9, 0.999), weight_decay=1e-6)
114 |
115 |
116 | def run(self):
117 | self.train_hist = {
118 | 'loss': [],
119 | 'per_epoch_time': [],
120 | 'total_time': []
121 | }
122 | best_loss = 1000000000
123 | print('Training start!!')
124 | start_time = time.time()
125 | self.model.train()
126 | if self.model_path != '':
127 | start_epoch = self.model_path[-7:-4]
128 | if start_epoch[0] == '_':
129 | start_epoch = start_epoch[1:]
130 | start_epoch = int(start_epoch)
131 | else:
132 | start_epoch = 0
133 | for epoch in range(start_epoch, self.epochs):
134 | loss = self.train_epoch(epoch)
135 |
136 | # save snapeshot
137 | if (epoch + 1) % self.snapshot_interval == 0:
138 | self._snapshot(epoch + 1)
139 | if loss < best_loss:
140 | best_loss = loss
141 | self._snapshot('best')
142 |
143 | # save tensorboard
144 | if self.writer:
145 | self.writer.add_scalar('Train Loss', self.train_hist['loss'][-1], epoch)
146 | self.writer.add_scalar('Learning Rate', self._get_lr(), epoch)
147 |
148 | # finish all epoch
149 | self._snapshot(epoch + 1)
150 | if loss < best_loss:
151 | best_loss = loss
152 | self._snapshot('best')
153 | self.train_hist['total_time'].append(time.time() - start_time)
154 | print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
155 | self.epochs, self.train_hist['total_time'][0]))
156 | print("Training finish!... save training results")
157 |
158 |
159 | def train_epoch(self, epoch):
160 | epoch_start_time = time.time()
161 | loss_buf = []
162 | num_batch = int(len(self.train_loader.dataset) / self.batch_size)
163 | for iter, (pts, _) in enumerate(self.train_loader):
164 | if pts.size(0) == 1:
165 | continue
166 |
167 | if not self.no_cuda:
168 | pts = pts.cuda(self.first_gpu)
169 |
170 | # forward
171 | self.optimizer.zero_grad()
172 | output, _ = self.model(pts)
173 |
174 | # loss
175 | if len(self.gpu_ids) != 1: # multiple gpus
176 | loss = self.model.module.get_loss(pts, output)
177 | else:
178 | loss = self.model.get_loss(pts, output)
179 |
180 | # backward
181 | loss.backward()
182 | self.optimizer.step()
183 | loss_buf.append(loss.detach().cpu().numpy())
184 |
185 | # finish one epoch
186 | epoch_time = time.time() - epoch_start_time
187 | self.train_hist['per_epoch_time'].append(epoch_time)
188 | self.train_hist['loss'].append(np.mean(loss_buf))
189 | print(f'Epoch {epoch+1}: Loss {np.mean(loss_buf)}, time {epoch_time:.4f}s')
190 | return np.mean(loss_buf)
191 |
192 |
193 | def _snapshot(self, epoch):
194 | state_dict = self.model.state_dict()
195 | from collections import OrderedDict
196 | new_state_dict = OrderedDict()
197 | for key, val in state_dict.items():
198 | if key[:6] == 'module':
199 | name = key[7:] # remove 'module.'
200 | else:
201 | name = key
202 | new_state_dict[name] = val
203 | save_dir = os.path.join(self.save_dir, self.dataset_name)
204 | torch.save(new_state_dict, save_dir + "_" + str(epoch) + '.pkl')
205 | print(f"Save model to {save_dir}_{str(epoch)}.pkl")
206 |
207 |
208 | def _load_pretrain(self, pretrain):
209 | state_dict = torch.load(pretrain, map_location='cpu')
210 | from collections import OrderedDict
211 | new_state_dict = OrderedDict()
212 | for key, val in state_dict.items():
213 | if key[:6] == 'module':
214 | name = key[7:] # remove 'module.'
215 | else:
216 | name = key
217 | new_state_dict[name] = val
218 | self.model.load_state_dict(new_state_dict)
219 | print(f"Load model from {pretrain}")
220 |
221 |
222 | def _get_lr(self, group=0):
223 | return self.optimizer.param_groups[group]['lr']
224 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Point Cloud Segmentation for Classific Feature Learning
2 | ## Introduction
3 | This work aims to show whether learning a point cloud segmentation task is able to extract features performing well in classification. We do all experiments under the framework of DGCNN.
4 |
5 | Details for DGCNN see **Dynamic Graph CNN for Learning on Point Clouds** (https://arxiv.xilesou.top/pdf/1801.07829). We provide a PyTorch reimplementation for DGCNN in [antao97/dgcnn.pytorch](https://github.com/antao97/dgcnn.pytorch).
6 |
7 | DGCNN provides two type of networks, one for classification and one for segmentation. We use "DGCNN_Cls" to denote network for classification and "DGCNN_Seg" for segmentation. The network sturcture of DGCNN is
8 |
9 |
10 |
11 |
12 | We also do experiments to see whether learning segmentation on meaningful point clouds assembled by some base point clouds can help to learning better features for base point clouds. To run the experments, we first seperate segmentation parts of each shape from ShapeNetPart dataset as new shapes and make this new dataset as ShapeNetPart Part dataset. Then, we train network on ShapeNetPart dataset as normal and test it on ShapeNetPart Part dataset.
13 |
14 | Some visualized point clouds in our ShapeNetPart Part dataset:
15 |
16 |
17 |
18 |
19 |
20 | chair skateboard airplane
21 |
22 | Experimental results show that learning point cloud segmentation does help to extract features suitable for classification.
23 |
24 | **The key contributions of this work are as follows:**
25 |
26 | - Since the network provided by DGCNN for segmentation is supervised, we provide an revised DGCNN segmentation network with no category label.
27 | - When segmentation is trained and tested on intact point clouds, the trained model can help to extract better features.
28 | - When segmentation is trained on intact point clouds and tested on base point clouds, the trained model also can help to extract better features.
29 |
30 | If you find this work useful, please cite:
31 | ```
32 | @article{tao2020,
33 | Author = {An Tao},
34 | Title = {Point Cloud Segmentation for Classific Feature Learning},
35 | Journal = {https://github.com/antao97/PointCloudSegmentation},
36 | Year = {2020}
37 | }
38 | ```
39 |
40 |
41 | ## Requirements
42 | - Python 3.7
43 | - PyTorch 1.2
44 | - CUDA 10.0
45 | - Package: glob, h5py, tensorflow, tensorboard, tensorboardX and sklearn
46 |
47 |
48 | ## Download datasets
49 | Download the HDF5 format datasets (where each shape is sampled 2,048 points uniformly):
50 |
51 | - ShapeNetPart (338M) [[TsinghuaCloud]](https://cloud.tsinghua.edu.cn/f/c25d94e163454196a26b/) [[BaiduDisk]](https://pan.baidu.com/s/1yi4bMVBE2mV8NqVRtNLoqw)
52 | - ShapeNetPart Part (450M) [[TsinghuaCloud]](https://cloud.tsinghua.edu.cn/f/b6ee6e3b345744889e6b/) [[BaiduDisk]](https://pan.baidu.com/s/1uCotFveZ5R5ztRPIm1hfJQ)
53 | - ModelNet40 (194M) [[TsinghuaCloud]](https://cloud.tsinghua.edu.cn/f/b3d9fe3e2a514def8097/) [[BaiduDisk]](https://pan.baidu.com/s/1NQZgN8tvHVqQntxefcdVAg)
54 |
55 | You can find more details about the above datasets in this [repo](https://github.com/antao97/PointCloudDatasets).
56 |
57 |
58 | ## Experiment settings
59 | To evaluate the quality of extracted features, we use ShapeNetPart dataset to both train DGCNN and a linear SVM classifier. Specifically, we train the linear SVM classifier on ShapeNetPart dataset using the features (latent representations) obtained from the trained feature encoder.
60 |
61 | For transfer performance, we train the linear SVM classifier on ModelNet 40 dataset using the features (latent representations) obtained from the same network trained from the ShapeNetPart dataset.
62 |
63 | In this work we compare the performance for adopted training task among supervised segmentation, supervised segmentation without category label, supervised classification and unsupervised reconstruction. For supervised segmentation without category label, we discard the adding of categorical vector (mlp {64}) and directly repeat the 1024 dim feature into n x 1024. We do unsupervised reconstruction following the framework in this [repo](https://github.com/antao97/UnsupervisedPointCloudReconstruction) and use source points from sphere surface for decoder. We also change feature dimension into 1024. Except unsupervised reconstruction, we do all experiments under the framework of DGCNN.
64 |
65 | To train the network, run
66 | ```
67 | python main.py --exp_name --task --dataset_root --encoder --k <20 | 40> --feat_dims 1024 --batch_size <16 | 32> --dataset shapenetpart --gpu
68 | ```
69 | Use `--seg_no_label` if you want to run segmentation task without category label.
70 |
71 | You can download our already trained models from [[TsinghuaCloud]](https://cloud.tsinghua.edu.cn/d/d9e7a899582d432cbc11/) or [[BaiduDisk]](https://pan.baidu.com/s/1UigbY4jNts8LMZ6fqJXvxQ) and place them under `snapshot/`.
72 |
73 | Because this work is done before our PyTorch reimplementation for DGCNN, the training setting in this repo is slightly different with [antao97/dgcnn.pytorch](https://github.com/antao97/dgcnn.pytorch). You can set `self.epochs = 200` and `random_translate=False` in line 61 and 120 in `segmentation.py` to follow the settings in [antao97/dgcnn.pytorch](https://github.com/antao97/dgcnn.pytorch).
74 |
75 | Besides ShapeNetPart dataset, we also test the performace of linear SVM classifier on ShapeNetPart Part dataset, using the model trained on ShapeNetPart dataset with segmentation task.
76 |
77 | To evaluate the performance of a given trained model, run
78 | ```
79 | python main.py --eval --model_path --task --dataset_root --encoder --k <20 | 40> --feat_dims 1024 --dataset --gpu
80 | ```
81 |
82 | Use `--no_cuda` if you want to run in CPU.
83 |
84 | To use Tensorboard, run
85 | ```
86 | tensorboard --logdir tensorboard --bind_all
87 | ```
88 | You can find the Tensorboard records under `tensorboard/`.
89 |
90 |
91 | ## Classification accuracy of linear SVM classifier
92 | ### Results with best settings
93 | | Task | Info | Encoder | K | Batch Size | Epochs | ShapeNetPart | ModelNet40 |
94 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
95 | | Segmentation | Supervised | DGCNN_Seg | 40 | 32 | 250 | 98.9% | 89.4% |
96 | | Segmentation | Supervised (no category label) | DGCNN_Seg | 40 | 32 | 250 | **99.9%** | 89.2% |
97 | | Classification | Supervised | DGCNN_Cls | 40 | 32 | 250 | 99.8% | 89.6% |
98 | | Reconstruction | Unsupervised | DGCNN_Cls | 20 | 16 | 250 | 98.7% | **89.8%** |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 | ### Results with settings for segmentation task
108 | | Task | Info | Encoder | K | Batch Size | Epochs | ShapeNetPart | ModelNet40 |
109 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
110 | | Segmentation | Supervised | DGCNN_Seg | 40 | 32 | 250 | 98.9% | **89.4%** |
111 | | Segmentation | Supervised (no category label) | DGCNN_Seg | 40 | 32 | 250 | **99.9%** | 89.2% |
112 | | Classification | Supervised | DGCNN_Seg | 40 | 32 | 250 | **99.9%** | 86.8% |
113 | | Reconstruction | Unsupervised | DGCNN_Seg | 40 | 32 | 290 | 98.8% | 89.2% |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 | ### Results evaluated on ShapeNetPart Part dataset
123 | | Task | Info | Encoder | K | Batch Size | Epochs | Training Dataset | Eval Acc
124 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
125 | | Segmentation | Supervised | DGCNN_Seg | 40 | 32 | 250 | ShapeNetPart | 85.0% |
126 | | Segmentation | Supervised (no category label) | DGCNN_Seg | 40 | 32 | 250 | ShapeNetPart | 84.0% |
127 | | Classification | Supervised | DGCNN_Cls | 40 | 32 | 250 | ShapeNetPart Part | **99.0%** | 4
128 | | Reconstruction | Unsupervised | DGCNN_Cls | 20 | 16 | 250 | ShapeNetPart Part | 87.5% |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 | ## Performance analysis
137 | Experimental results show that learning point cloud segmentation does help to extract features suitable for classification. However, simply adopting the training scheme from DGCNN for segmentation task is not suitable for transfer learning. We believe better results will get if using better training scheme.
138 |
139 |
--------------------------------------------------------------------------------
/classification.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | @Author: An Tao
5 | @Contact: ta19@mails.tsinghua.edu.cn
6 | @File: reconstruction.py
7 | @Time: 2020/1/2 10:26 AM
8 | """
9 |
10 | import os
11 | import sys
12 | import time
13 | import shutil
14 | import numpy as np
15 | import torch
16 | import torch.optim as optim
17 | from torch.optim.lr_scheduler import CosineAnnealingLR
18 | import sklearn.metrics as metrics
19 |
20 | from tensorboardX import SummaryWriter
21 |
22 | from model import ClassificationNet
23 | from dataset import Dataset
24 | from utils import Logger
25 |
26 |
27 | class Classification(object):
28 | def __init__(self, args):
29 | self.dataset_name = args.dataset
30 | if args.epochs != None:
31 | self.epochs = args.epochs
32 | else:
33 | self.epochs = 250
34 | self.batch_size = args.batch_size
35 | self.snapshot_interval = args.snapshot_interval
36 | self.no_cuda = args.no_cuda
37 | self.model_path = args.model_path
38 | self.no_scheduler = args.no_scheduler
39 |
40 | # create exp directory
41 | file = [f for f in args.model_path.split('/')]
42 | if args.exp_name != None:
43 | self.experiment_id = "Classify_" + args.exp_name
44 | elif file[-2] == 'models':
45 | self.experiment_id = file[-3]
46 | else:
47 | self.experiment_id = "Classify" + time.strftime('%m%d%H%M%S')
48 | snapshot_root = 'snapshot/%s' % self.experiment_id
49 | tensorboard_root = 'tensorboard/%s' % self.experiment_id
50 | self.save_dir = os.path.join(snapshot_root, 'models/')
51 | self.tboard_dir = tensorboard_root
52 |
53 | # check arguments
54 | if self.model_path == '':
55 | if not os.path.exists(self.save_dir):
56 | os.makedirs(self.save_dir)
57 | else:
58 | choose = input("Remove " + self.save_dir + " ? (y/n)")
59 | if choose == "y":
60 | shutil.rmtree(self.save_dir)
61 | os.makedirs(self.save_dir)
62 | else:
63 | sys.exit(0)
64 | if not os.path.exists(self.tboard_dir):
65 | os.makedirs(self.tboard_dir)
66 | else:
67 | shutil.rmtree(self.tboard_dir)
68 | os.makedirs(self.tboard_dir)
69 | sys.stdout = Logger(os.path.join(snapshot_root, 'log.txt'))
70 | self.writer = SummaryWriter(log_dir=self.tboard_dir)
71 |
72 | # print args
73 | print(str(args))
74 |
75 | # get gpu id
76 | gids = ''.join(args.gpu.split())
77 | self.gpu_ids = [int(gid) for gid in gids.split(',')]
78 | self.first_gpu = self.gpu_ids[0]
79 |
80 | # generate dataset
81 | self.train_dataset = Dataset(
82 | root=args.dataset_root,
83 | dataset_name=args.dataset,
84 | split='all',
85 | num_points=args.num_points,
86 | random_translate=True,
87 | random_rotate=args.use_rotate,
88 | random_jitter=args.use_jitter
89 | )
90 | self.train_loader = torch.utils.data.DataLoader(
91 | self.train_dataset,
92 | batch_size=args.batch_size,
93 | shuffle=True,
94 | num_workers=args.workers
95 | )
96 | print("Training set size:", self.train_loader.dataset.__len__())
97 |
98 | # initialize model
99 | self.model = ClassificationNet(args)
100 | if self.model_path != '':
101 | self._load_pretrain(args.model_path)
102 |
103 | # load model to gpu
104 | if not self.no_cuda:
105 | if len(self.gpu_ids) != 1: # multiple gpus
106 | self.model = torch.nn.DataParallel(self.model.cuda(self.first_gpu), self.gpu_ids)
107 | else:
108 | self.model = self.model.cuda(self.gpu_ids[0])
109 |
110 | # initialize optimizer
111 | self.parameter = self.model.parameters()
112 | if self.no_scheduler == False:
113 | self.optimizer = optim.SGD(self.parameter, lr=0.1, weight_decay=1e-4)
114 | self.scheduler = CosineAnnealingLR(self.optimizer, self.epochs, eta_min=1e-3)
115 | else:
116 | self.optimizer = optim.SGD(self.parameter, lr=0.01, weight_decay=1e-4)
117 |
118 |
119 | def run(self):
120 | self.train_hist = {
121 | 'loss': [],
122 | 'per_epoch_time': [],
123 | 'total_time': []
124 | }
125 | best_loss = 1000000000
126 | print('Training start!!')
127 | start_time = time.time()
128 | self.model.train()
129 | if self.model_path != '':
130 | start_epoch = self.model_path[-7:-4]
131 | if start_epoch[0] == '_':
132 | start_epoch = start_epoch[1:]
133 | start_epoch = int(start_epoch)
134 | else:
135 | start_epoch = 0
136 | for epoch in range(start_epoch, self.epochs):
137 | loss = self.train_epoch(epoch)
138 |
139 | # save snapeshot
140 | if (epoch + 1) % self.snapshot_interval == 0:
141 | self._snapshot(epoch + 1)
142 | if loss < best_loss:
143 | best_loss = loss
144 | self._snapshot('best')
145 |
146 | # save tensorboard
147 | if self.writer:
148 | self.writer.add_scalar('Train Loss', self.train_hist['loss'][-1], epoch)
149 | self.writer.add_scalar('Learning Rate', self._get_lr(), epoch)
150 |
151 | # finish all epoch
152 | self._snapshot(epoch + 1)
153 | if loss < best_loss:
154 | best_loss = loss
155 | self._snapshot('best')
156 | self.train_hist['total_time'].append(time.time() - start_time)
157 | print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
158 | self.epochs, self.train_hist['total_time'][0]))
159 | print("Training finish!... save training results")
160 |
161 |
162 | def train_epoch(self, epoch):
163 | epoch_start_time = time.time()
164 | loss_buf = []
165 | train_pred = []
166 | train_true = []
167 | num_batch = int(len(self.train_loader.dataset) / self.batch_size)
168 | for iter, (pts, label) in enumerate(self.train_loader):
169 | if pts.size(0) == 1:
170 | continue
171 | if not self.no_cuda:
172 | pts = pts.cuda(self.first_gpu)
173 | label = label.cuda(self.first_gpu)
174 |
175 | # forward
176 | self.optimizer.zero_grad()
177 | output, _ = self.model(pts)
178 |
179 | # loss
180 | if len(self.gpu_ids) != 1: # multiple gpus
181 | loss = self.model.module.get_loss(output, label)
182 | else:
183 | loss = self.model.get_loss(output, label)
184 |
185 | # backward
186 | loss.backward()
187 | self.optimizer.step()
188 | loss_buf.append(loss.detach().cpu().numpy())
189 |
190 | preds = output.max(dim=1)[1]
191 | train_true.append(label.view(-1).cpu().numpy())
192 | train_pred.append(preds.detach().cpu().numpy())
193 |
194 | # finish one epoch
195 | if self.no_scheduler == False:
196 | self.scheduler.step()
197 | epoch_time = time.time() - epoch_start_time
198 | self.train_hist['per_epoch_time'].append(epoch_time)
199 | self.train_hist['loss'].append(np.mean(loss_buf))
200 | train_true = np.concatenate(train_true)
201 | train_pred = np.concatenate(train_pred)
202 | print("Epoch %d: Loss %.6f, train acc %.6f, train avg acc %.6f, time %.4fs" % (epoch+1,
203 | np.mean(loss_buf),
204 | metrics.accuracy_score(
205 | train_true, train_pred),
206 | metrics.balanced_accuracy_score(
207 | train_true, train_pred),
208 | epoch_time))
209 | return np.mean(loss_buf)
210 |
211 |
212 | def _snapshot(self, epoch):
213 | state_dict = self.model.state_dict()
214 | from collections import OrderedDict
215 | new_state_dict = OrderedDict()
216 | for key, val in state_dict.items():
217 | if key[:6] == 'module':
218 | name = key[7:] # remove 'module.'
219 | else:
220 | name = key
221 | new_state_dict[name] = val
222 | save_dir = os.path.join(self.save_dir, self.dataset_name)
223 | torch.save(new_state_dict, save_dir + "_" + str(epoch) + '.pkl')
224 | print(f"Save model to {save_dir}_{str(epoch)}.pkl")
225 |
226 |
227 | def _load_pretrain(self, pretrain):
228 | state_dict = torch.load(pretrain, map_location='cpu')
229 | from collections import OrderedDict
230 | new_state_dict = OrderedDict()
231 | for key, val in state_dict.items():
232 | if key[:6] == 'module':
233 | name = key[7:] # remove 'module.'
234 | else:
235 | name = key
236 | new_state_dict[name] = val
237 | self.model.load_state_dict(new_state_dict)
238 | print(f"Load model from {pretrain}")
239 |
240 |
241 | def _get_lr(self, group=0):
242 | return self.optimizer.param_groups[group]['lr']
243 |
--------------------------------------------------------------------------------
/visualization.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | @Author: An Tao
5 | @Contact: ta19@mails.tsinghua.edu.cn
6 | @File: visualization.py
7 | @Time: 2020/1/2 10:26 AM
8 | """
9 |
10 | import os
11 | import time
12 | import numpy as np
13 | import torch
14 | import itertools
15 | import argparse
16 | from glob import glob
17 |
18 | from model import ReconstructionNet
19 |
20 | def standardize_bbox(pcl, points_per_object):
21 | pt_indices = np.random.choice(pcl.shape[0], points_per_object, replace=False)
22 | np.random.shuffle(pt_indices)
23 | pcl = pcl[pt_indices] # n by 3
24 | mins = np.amin(pcl, axis=0)
25 | maxs = np.amax(pcl, axis=0)
26 | center = ( mins + maxs ) / 2.
27 | scale = np.amax(maxs-mins)
28 | print("Center: {}, Scale: {}".format(center, scale))
29 | result = ((pcl - center)/scale).astype(np.float32) # [-0.5, 0.5]
30 | return result
31 |
32 | xml_head = \
33 | """
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 | """
65 |
66 | xml_ball_segment = \
67 | """
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 | """
79 |
80 | xml_tail = \
81 | """
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 | """
101 |
102 | def colormap(x,y,z):
103 | vec = np.array([x,y,z])
104 | vec = np.clip(vec, 0.001,1.0)
105 | norm = np.sqrt(np.sum(vec**2))
106 | vec /= norm
107 | return [vec[0], vec[1], vec[2]]
108 |
109 | def mitsuba(pcl, path, clr=None):
110 | xml_segments = [xml_head]
111 |
112 | # pcl = standardize_bbox(pcl, 2048)
113 | # pcl = pcl - np.expand_dims(np.mean(pcl, axis=0), 0) # center
114 | # dist = np.max(np.sqrt(np.sum(pcl ** 2, axis=1)), 0)
115 | # pcl = pcl / dist # scale
116 |
117 | pcl = pcl[:,[2,0,1]]
118 | pcl[:,0] *= -1
119 | h = np.min(pcl[:,2])
120 |
121 | if clr == "plane":
122 | clrgrid = [[0, 1, 45], [1, 0, 45]]
123 | b = np.linspace(*clrgrid[0])
124 | c = np.linspace(*clrgrid[1])
125 | color_all = np.array(list(itertools.product(b, c)))
126 | color_all = np.concatenate((np.linspace(1, 0, 2025)[..., np.newaxis], color_all), axis=1)
127 | elif clr == "sphere":
128 | color_all = np.load("sphere.npy")
129 | color_all = (color_all + 0.3) / 0.6
130 | elif clr == "gaussian":
131 | color_all = np.load("gaussian.npy")
132 | color_all = (color_all + 0.3) / 0.6
133 |
134 | for i in range(pcl.shape[0]):
135 | if clr == None:
136 | color = colormap(pcl[i,0]+0.5,pcl[i,1]+0.5,pcl[i,2]+0.5)
137 | elif clr in ["plane", "sphere", "gaussian"]:
138 | color = color_all[i]
139 | else:
140 | color = clr
141 | if h < -0.25:
142 | xml_segments.append(xml_ball_segment.format(pcl[i,0],pcl[i,1],pcl[i,2]-h-0.6875, *color))
143 | else:
144 | xml_segments.append(xml_ball_segment.format(pcl[i,0],pcl[i,1],pcl[i,2], *color))
145 | xml_segments.append(xml_tail)
146 |
147 | xml_content = str.join('', xml_segments)
148 |
149 | with open(path, 'w') as f:
150 | f.write(xml_content)
151 |
152 | def load_pretrain(model, pretrain):
153 | state_dict = torch.load(pretrain, map_location='cpu')
154 | from collections import OrderedDict
155 | new_state_dict = OrderedDict()
156 | for key, val in state_dict.items():
157 | if key[:6] == 'module':
158 | name = key[7:] # remove 'module.'
159 | else:
160 | name = key
161 | new_state_dict[name] = val
162 | model.load_state_dict(new_state_dict)
163 | print(f"Load model from {pretrain}")
164 | return model
165 |
166 |
167 | def visualize(args):
168 | # create exp directory
169 | file = [f for f in args.model_path.split('/')]
170 | if args.exp_name != None:
171 | experiment_id = args.exp_name
172 | elif file[-1] == '':
173 | experiment_id = time.strftime('%m%d%H%M%S')
174 | one_model = True
175 | elif file[-1][-4:] == '.pkl':
176 | experiment_id = file[-3]
177 | one_model = True
178 | elif file[-1] == 'models':
179 | experiment_id = file[-2]
180 | one_model = False
181 | else:
182 | experiment_id = time.strftime('%m%d%H%M%S')
183 | save_root = os.path.join('mitsuba', experiment_id, args.dataset, args.split + str(args.item))
184 | os.makedirs(save_root, exist_ok=True)
185 |
186 | # initialize dataset
187 | from dataset import Dataset
188 | dataset = Dataset(root=args.dataset_root, dataset_name=args.dataset,
189 | num_points=args.num_points, split=args.split, load_name=True)
190 |
191 | # load data from dataset
192 | pts, lb, n = dataset[args.item]
193 | print(f"Dataset: {args.dataset}, split: {args.split}, item: {args.item}, category: {n}")
194 |
195 | # generate XML file for original point cloud
196 | if args.draw_original:
197 | save_path = os.path.join(save_root, args.dataset + '_' + args.split + str(args.item) + '_' + str(n) + '_origin.xml')
198 | color = [0.4, 0.4, 0.6]
199 | mitsuba(pts.numpy(), save_path, color)
200 |
201 | # generate XML file for decoder souce point
202 | if args.draw_source_points:
203 | if args.shape == 'plane':
204 | meshgrid = [[-0.3, 0.3, 45], [-0.3, 0.3, 45]]
205 | x = np.linspace(*meshgrid[0])
206 | y = np.linspace(*meshgrid[1])
207 | points = np.array(list(itertools.product(x, y)))
208 | points = np.concatenate((points,np.zeros(2025)[..., np.newaxis]), axis=1)
209 | elif args.shape == 'sphere':
210 | points = np.load("sphere.npy")
211 | elif args.shape == 'gaussian':
212 | points = np.load("gaussian.npy")
213 | save_path = os.path.join(save_root, args.dataset + '_' + args.split + str(args.item) + '_' + str(n) + '_epoch0.xml')
214 | mitsuba(points, save_path, clr=args.shape)
215 |
216 | # initialize model
217 | model = ReconstructionNet(args)
218 |
219 | if one_model:
220 | if file[0] != '':
221 | model = load_pretrain(model, args.model_path)
222 | model.eval()
223 | reconstructed_pl, _ = model(pts.view(1, 2048, 3))
224 | save_path = os.path.join(save_root, file[-1][:-4] + args.split + str(args.item) + '_' + str(n) + '.xml')
225 | mitsuba(reconstructed_pl[0].detach().numpy(), save_path, clr=args.shape)
226 | else:
227 | load_path = glob(os.path.join(args.model_path, '*.pkl'))
228 | load_path.sort()
229 | for path in load_path:
230 | model_name = [p for p in path.split('/')][-1]
231 | model = load_pretrain(model, path)
232 | model.eval()
233 | reconstructed_pl, _ = model(pts.view(1, 2048, 3))
234 | save_path = os.path.join(save_root, model_name[:-4] + '_' + args.dataset + '_' + args.split + str(args.item) + '_' + str(n) + '.xml')
235 | mitsuba(reconstructed_pl[0].detach().numpy(), save_path, clr=args.shape)
236 |
237 |
238 | if __name__ == '__main__':
239 | parser = argparse.ArgumentParser(description='Unsupervised Point Cloud Feature Learning')
240 | parser.add_argument('--exp_name', type=str, default=None, metavar='N',
241 | help='Name of the experiment')
242 | parser.add_argument('--item', type=int, default=0, metavar='N',
243 | help='Item of point cloud to load')
244 | parser.add_argument('--split', type=str, default='train', metavar='N',
245 | choices=['train','test', 'val', 'trainval', 'all'],
246 | help='Split to use, [foldingnet, dgcnn_cls, dgcnn_seg]')
247 | parser.add_argument('--encoder', type=str, default='foldingnet', metavar='N',
248 | choices=['foldnet', 'dgcnn_cls', 'dgcnn_seg'],
249 | help='Encoder to use, [foldingnet, dgcnn_cls, dgcnn_seg]')
250 | parser.add_argument('--feat_dims', type=int, default=512, metavar='N',
251 | help='Number of dims for feature ')
252 | parser.add_argument('--k', type=int, default=None, metavar='N',
253 | help='Num of nearest neighbors to use for KNN')
254 | parser.add_argument('--shape', type=str, default='plane', metavar='N',
255 | choices=['plane', 'sphere', 'gaussian'],
256 | help='Shape of points to input decoder, [plane, sphere, gaussian]')
257 | parser.add_argument('--dataset', type=str, default='shapenetcorev2', metavar='N',
258 | choices=['shapenetcorev2','modelnet40', 'modelnet10', 'shapenetpartpart'],
259 | help='Dataset to use, [shapenetcorev2, modelnet40, modelnet10, shapenetpartpart]')
260 | parser.add_argument('--dataset_root', type=str, default='../dataset', help="Dataset root path")
261 | parser.add_argument('--num_points', type=int, default=2048,
262 | help='Num of points to use')
263 | parser.add_argument('--model_path', type=str, default='', metavar='N',
264 | help='Path to load model')
265 | parser.add_argument('--draw_original', action='store_true',
266 | help='Draw original point cloud')
267 | parser.add_argument('--draw_source_points', action='store_true',
268 | help='Draw source points for decoder')
269 | args = parser.parse_args()
270 |
271 | print(str(args))
272 |
273 | visualize(args)
--------------------------------------------------------------------------------
/segmentation.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | @Author: An Tao
5 | @Contact: ta19@mails.tsinghua.edu.cn
6 | @File: reconstruction.py
7 | @Time: 2020/1/2 10:26 AM
8 | """
9 |
10 | import os
11 | import sys
12 | import time
13 | import shutil
14 | import numpy as np
15 | import torch
16 | import torch.optim as optim
17 | from torch.optim.lr_scheduler import CosineAnnealingLR
18 | import sklearn.metrics as metrics
19 |
20 | from tensorboardX import SummaryWriter
21 |
22 | from model import SegmentationNet
23 | from dataset import Dataset
24 | from utils import Logger
25 |
26 | torch.multiprocessing.set_sharing_strategy('file_system')
27 |
28 | seg_num = [4, 2, 2, 4, 4, 3, 3, 2, 4, 2, 6, 2, 3, 3, 3, 3]
29 | index_start = [0, 4, 6, 8, 12, 16, 19, 22, 24, 28, 30, 36, 38, 41, 44, 47]
30 |
31 |
32 | def calculate_shape_IoU(pred_np, seg_np, label, class_choice):
33 | label = label.squeeze()
34 | shape_ious = []
35 | for shape_idx in range(seg_np.shape[0]):
36 | if not class_choice:
37 | start_index = index_start[label[shape_idx]]
38 | num = seg_num[label[shape_idx]]
39 | parts = range(start_index, start_index + num)
40 | else:
41 | parts = range(seg_num[label[0]])
42 | part_ious = []
43 | for part in parts:
44 | I = np.sum(np.logical_and(pred_np[shape_idx] == part, seg_np[shape_idx] == part))
45 | U = np.sum(np.logical_or(pred_np[shape_idx] == part, seg_np[shape_idx] == part))
46 | if U == 0:
47 | iou = 1 # If the union of groundtruth and prediction points is empty, then count part IoU as 1
48 | else:
49 | iou = I / float(U)
50 | part_ious.append(iou)
51 | shape_ious.append(np.mean(part_ious))
52 | return shape_ious
53 |
54 |
55 | class Segmentation(object):
56 | def __init__(self, args):
57 | self.dataset_name = args.dataset
58 | if args.epochs != None:
59 | self.epochs = args.epochs
60 | else:
61 | self.epochs = 250
62 | self.batch_size = args.batch_size
63 | self.snapshot_interval = args.snapshot_interval
64 | self.no_cuda = args.no_cuda
65 | self.model_path = args.model_path
66 | self.class_choice = args.class_choice
67 | self.no_scheduler = args.no_scheduler
68 | self.loss = args.loss
69 |
70 | # create exp directory
71 | file = [f for f in args.model_path.split('/')]
72 | if args.exp_name != None:
73 | self.experiment_id = "Segment_" + args.exp_name
74 | elif file[-2] == 'models':
75 | self.experiment_id = file[-3]
76 | else:
77 | self.experiment_id = "Segment" + time.strftime('%m%d%H%M%S')
78 | snapshot_root = 'snapshot/%s' % self.experiment_id
79 | tensorboard_root = 'tensorboard/%s' % self.experiment_id
80 | self.save_dir = os.path.join(snapshot_root, 'models/')
81 | self.tboard_dir = tensorboard_root
82 |
83 | # check arguments
84 | if self.model_path == '':
85 | if not os.path.exists(self.save_dir):
86 | os.makedirs(self.save_dir)
87 | else:
88 | choose = input("Remove " + self.save_dir + " ? (y/n)")
89 | if choose == "y":
90 | shutil.rmtree(self.save_dir)
91 | os.makedirs(self.save_dir)
92 | else:
93 | sys.exit(0)
94 | if not os.path.exists(self.tboard_dir):
95 | os.makedirs(self.tboard_dir)
96 | else:
97 | shutil.rmtree(self.tboard_dir)
98 | os.makedirs(self.tboard_dir)
99 | sys.stdout = Logger(os.path.join(snapshot_root, 'log.txt'))
100 | self.writer = SummaryWriter(log_dir=self.tboard_dir)
101 |
102 | # print args
103 | print(str(args))
104 |
105 | # get gpu id
106 | gids = ''.join(args.gpu.split())
107 | self.gpu_ids = [int(gid) for gid in gids.split(',')]
108 | self.first_gpu = self.gpu_ids[1]
109 | self.loss_gpu = self.gpu_ids[0]
110 | self.gpu_ids = self.gpu_ids[1:]
111 | # self.device = torch.device("cpu" if args.no_cuda else "cuda")
112 |
113 | # generate dataset
114 | self.train_dataset = Dataset(
115 | root=args.dataset_root,
116 | dataset_name=args.dataset,
117 | split='all',
118 | num_points=args.num_points,
119 | segmentation=True,
120 | random_translate=True,
121 | random_rotate=args.use_rotate,
122 | random_jitter=args.use_jitter
123 | )
124 | self.train_loader = torch.utils.data.DataLoader(
125 | self.train_dataset,
126 | batch_size=args.batch_size,
127 | shuffle=True,
128 | num_workers=args.workers
129 | )
130 | print("Training set size:", self.train_loader.dataset.__len__())
131 |
132 | # initialize model
133 | self.seg_num_all = self.train_dataset.seg_num_all
134 | self.seg_start_index = self.train_dataset.seg_start_index
135 | self.model = SegmentationNet(args, self.seg_num_all)
136 | if self.model_path != '':
137 | self._load_pretrain(args.model_path)
138 |
139 | # load model to gpu
140 | if not self.no_cuda:
141 | if len(self.gpu_ids) != 1: # multiple gpus
142 | self.model = torch.nn.DataParallel(self.model.cuda(self.first_gpu), self.gpu_ids)
143 | else:
144 | self.model = self.model.cuda(self.gpu_ids[0])
145 | # self.model = self.model.to(self.device)
146 |
147 |
148 | # initialize optimizer
149 | self.parameter = self.model.parameters()
150 | if self.no_scheduler == False:
151 | self.optimizer = optim.SGD(self.parameter, lr=0.1, weight_decay=1e-4)
152 | self.scheduler = CosineAnnealingLR(self.optimizer, self.epochs, eta_min=1e-3)
153 | else:
154 | self.optimizer = optim.SGD(self.parameter, lr=0.01, weight_decay=1e-4)
155 |
156 |
157 | def run(self):
158 | self.train_hist = {
159 | 'loss': [],
160 | 'per_epoch_time': [],
161 | 'total_time': []
162 | }
163 | best_loss = 1000000000
164 | print('Training start!!')
165 | start_time = time.time()
166 | self.model.train()
167 | if self.model_path != '':
168 | start_epoch = self.model_path[-7:-4]
169 | if start_epoch[0] == '_':
170 | start_epoch = start_epoch[1:]
171 | start_epoch = int(start_epoch)
172 | else:
173 | start_epoch = 0
174 | for epoch in range(start_epoch, self.epochs):
175 | loss = self.train_epoch(epoch)
176 |
177 | # save snapeshot
178 | if (epoch + 1) % self.snapshot_interval == 0:
179 | self._snapshot(epoch + 1)
180 | if loss < best_loss:
181 | best_loss = loss
182 | self._snapshot('best')
183 |
184 | # save tensorboard
185 | if self.writer:
186 | self.writer.add_scalar('Train Loss', self.train_hist['loss'][-1], epoch)
187 | self.writer.add_scalar('Learning Rate', self._get_lr(), epoch)
188 |
189 | # finish all epoch
190 | self._snapshot(epoch + 1)
191 | if loss < best_loss:
192 | best_loss = loss
193 | self._snapshot('best')
194 | self.train_hist['total_time'].append(time.time() - start_time)
195 | print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']),
196 | self.epochs, self.train_hist['total_time'][0]))
197 | print("Training finish!... save training results")
198 |
199 |
200 | def train_epoch(self, epoch):
201 | epoch_start_time = time.time()
202 | loss_buf = []
203 | train_true_cls = []
204 | train_pred_cls = []
205 | train_true_seg = []
206 | train_pred_seg = []
207 | train_label_seg = []
208 | dist_ap_buf = []
209 | dist_an_buf = []
210 | num_batch = int(len(self.train_loader.dataset) / self.batch_size)
211 | for iter, (pts, label, seg) in enumerate(self.train_loader):
212 | num_seg = seg.max(1)[0] - seg.min(1)[0]
213 | if 0 in num_seg:
214 | pts = pts[num_seg != 0]
215 | label = label[num_seg != 0]
216 | seg = seg[num_seg != 0]
217 |
218 | if pts.size(0) == 1:
219 | continue
220 | seg = seg - self.seg_start_index
221 | label_one_hot = torch.zeros((label.size(0), 16))
222 | for idx in range(label.size(0)):
223 | label_one_hot[idx, label[idx]] = 1
224 |
225 | if not self.no_cuda:
226 | pts = pts.cuda(self.first_gpu)
227 | label_one_hot = label_one_hot.cuda(self.first_gpu)
228 | seg = seg.cuda(self.first_gpu)
229 |
230 | # forward
231 | self.optimizer.zero_grad()
232 | output, _ = self.model(pts, label_one_hot)
233 | output = output.permute(0, 2, 1).contiguous()
234 |
235 | # loss
236 | if self.loss == 'softmax':
237 | if len(self.gpu_ids) != 1: # multiple gpus
238 | loss = self.model.module.get_loss(output.view(-1, self.seg_num_all), seg.view(-1))
239 | else:
240 | loss = self.model.get_loss(output.view(-1, self.seg_num_all), seg.view(-1))
241 |
242 | # backward
243 | loss.backward()
244 | self.optimizer.step()
245 | loss_buf.append(loss.detach().cpu().numpy())
246 |
247 | pred = output.max(dim=2)[1]
248 | seg_np = seg.cpu().numpy() # (batch_size, num_points)
249 | pred_np = pred.detach().cpu().numpy() # (batch_size, num_points)
250 | train_true_cls.append(seg_np.reshape(-1)) # (batch_size * num_points)
251 | train_pred_cls.append(pred_np.reshape(-1)) # (batch_size * num_points)
252 | train_true_seg.append(seg_np)
253 | train_pred_seg.append(pred_np)
254 | train_label_seg.append(label.reshape(-1))
255 |
256 | elif self.loss == 'triplet':
257 | if len(self.gpu_ids) != 1: # multiple gpus
258 | loss, dist_ap, dist_an = self.model.module.get_loss(output, seg, new_device=self.loss_gpu)
259 | else:
260 | loss, dist_ap, dist_an = self.model.get_loss(output, seg, new_device=self.loss_gpu)
261 |
262 | # backward
263 | loss.backward()
264 | self.optimizer.step()
265 | loss_buf.append(loss.detach().cpu().numpy())
266 | dist_ap_buf.append(np.mean(dist_ap.detach().cpu().numpy()))
267 | dist_an_buf.append(np.mean(dist_an.detach().cpu().numpy()))
268 |
269 | # print(iter, loss, time.time() - epoch_start_time, np.mean(dist_an.detach().cpu().numpy()), np.mean(dist_ap.detach().cpu().numpy()))
270 |
271 | # finish one epoch
272 | if self.no_scheduler == False:
273 | self.scheduler.step()
274 | epoch_time = time.time() - epoch_start_time
275 | self.train_hist['per_epoch_time'].append(epoch_time)
276 | self.train_hist['loss'].append(np.mean(loss_buf))
277 | if self.loss == 'softmax':
278 | train_true_cls = np.concatenate(train_true_cls)
279 | train_pred_cls = np.concatenate(train_pred_cls)
280 | train_acc = metrics.accuracy_score(train_true_cls, train_pred_cls)
281 | avg_per_class_acc = metrics.balanced_accuracy_score(train_true_cls, train_pred_cls)
282 | train_true_seg = np.concatenate(train_true_seg, axis=0)
283 | train_pred_seg = np.concatenate(train_pred_seg, axis=0)
284 | train_label_seg = np.concatenate(train_label_seg)
285 | train_ious = calculate_shape_IoU(train_pred_seg, train_true_seg, train_label_seg, self.class_choice)
286 | print("Epoch %d: Loss %.6f, train acc %.6f, train avg acc %.6f, train iou: %.6f, time %.4fs" % (epoch+1,
287 | np.mean(loss_buf),
288 | train_acc,
289 | avg_per_class_acc,
290 | np.mean(train_ious),
291 | epoch_time))
292 | elif self.loss == 'triplet':
293 | print("Epoch %d: Loss %.6f, dist an %.6f, dist ap %.6f, time %.4fs" % (epoch+1,
294 | np.mean(loss_buf),
295 | np.mean(dist_an_buf),
296 | np.mean(dist_ap_buf),
297 | epoch_time))
298 | return np.mean(loss_buf)
299 |
300 |
301 | def _snapshot(self, epoch):
302 | state_dict = self.model.state_dict()
303 | from collections import OrderedDict
304 | new_state_dict = OrderedDict()
305 | for key, val in state_dict.items():
306 | if key[:6] == 'module':
307 | name = key[7:] # remove 'module.'
308 | else:
309 | name = key
310 | new_state_dict[name] = val
311 | save_dir = os.path.join(self.save_dir, self.dataset_name)
312 | torch.save(new_state_dict, save_dir + "_" + str(epoch) + '.pkl')
313 | print(f"Save model to {save_dir}_{str(epoch)}.pkl")
314 |
315 |
316 | def _load_pretrain(self, pretrain):
317 | state_dict = torch.load(pretrain, map_location='cpu')
318 | from collections import OrderedDict
319 | new_state_dict = OrderedDict()
320 | for key, val in state_dict.items():
321 | if key[:6] == 'module':
322 | name = key[7:] # remove 'module.'
323 | else:
324 | name = key
325 | new_state_dict[name] = val
326 | self.model.load_state_dict(new_state_dict)
327 | print(f"Load model from {pretrain}")
328 |
329 |
330 | def _get_lr(self, group=0):
331 | return self.optimizer.param_groups[group]['lr']
332 |
--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """
4 | @Author: An Tao
5 | @Contact: ta19@mails.tsinghua.edu.cn
6 | @File: model.py
7 | @Time: 2020/1/2 10:26 AM
8 | """
9 |
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.init as init
13 | import torch.nn.functional as F
14 | import numpy as np
15 | import itertools
16 | from loss import ChamferLoss, CrossEntropyLoss, TripletLoss
17 |
18 |
19 | def knn(x, k):
20 | batch_size = x.size(0)
21 | num_points = x.size(2)
22 |
23 | inner = -2*torch.matmul(x.transpose(2, 1), x)
24 | xx = torch.sum(x**2, dim=1, keepdim=True)
25 | pairwise_distance = -xx - inner - xx.transpose(2, 1)
26 |
27 | idx = pairwise_distance.topk(k=k, dim=-1)[1] # (batch_size, num_points, k)
28 |
29 | if idx.get_device() == -1:
30 | idx_base = torch.arange(0, batch_size).view(-1, 1, 1)*num_points
31 | else:
32 | idx_base = torch.arange(0, batch_size, device=idx.get_device()).view(-1, 1, 1)*num_points
33 | idx = idx + idx_base
34 | idx = idx.view(-1)
35 |
36 | return idx
37 |
38 |
39 | def local_cov(pts, idx):
40 | batch_size = pts.size(0)
41 | num_points = pts.size(2)
42 | pts = pts.view(batch_size, -1, num_points) # (batch_size, 3, num_points)
43 |
44 | _, num_dims, _ = pts.size()
45 |
46 | x = pts.transpose(2, 1).contiguous() # (batch_size, num_points, 3)
47 | x = x.view(batch_size*num_points, -1)[idx, :] # (batch_size*num_points*2, 3)
48 | x = x.view(batch_size, num_points, -1, num_dims) # (batch_size, num_points, k, 3)
49 |
50 | x = torch.matmul(x[:,:,0].unsqueeze(3), x[:,:,1].unsqueeze(2)) # (batch_size, num_points, 3, 1) * (batch_size, num_points, 1, 3) -> (batch_size, num_points, 3, 3)
51 | x = x.view(batch_size, num_points, 9).transpose(2, 1) # (batch_size, 9, num_points)
52 | x = torch.cat((pts, x), dim=1) # (batch_size, 12, num_points)
53 |
54 | return x
55 |
56 |
57 | def local_maxpool(x, idx):
58 | batch_size = x.size(0)
59 | num_points = x.size(2)
60 | x = x.view(batch_size, -1, num_points)
61 |
62 | _, num_dims, _ = x.size()
63 |
64 | x = x.transpose(2, 1).contiguous() # (batch_size, num_points, num_dims)
65 | x = x.view(batch_size*num_points, -1)[idx, :] # (batch_size*n, num_dims) -> (batch_size*n*k, num_dims)
66 | x = x.view(batch_size, num_points, -1, num_dims) # (batch_size, num_points, k, num_dims)
67 | x, _ = torch.max(x, dim=2) # (batch_size, num_points, num_dims)
68 |
69 | return x
70 |
71 |
72 | def get_graph_feature(x, k=20, idx=None):
73 | batch_size = x.size(0)
74 | num_points = x.size(2)
75 | x = x.view(batch_size, -1, num_points) # (batch_size, num_dims, num_points)
76 | if idx is None:
77 | idx = knn(x, k=k) # (batch_size, num_points, k)
78 |
79 | _, num_dims, _ = x.size()
80 |
81 | x = x.transpose(2, 1).contiguous() # (batch_size, num_points, num_dims)
82 | feature = x.view(batch_size*num_points, -1)[idx, :] # (batch_size*n, num_dims) -> (batch_size*n*k, num_dims)
83 | feature = feature.view(batch_size, num_points, k, num_dims) # (batch_size, num_points, k, num_dims)
84 | x = x.view(batch_size, num_points, 1, num_dims).repeat(1, 1, k, 1) # (batch_size, num_points, k, num_dims)
85 |
86 | feature = torch.cat((feature-x, x), dim=3).permute(0, 3, 1, 2) # (batch_size, num_points, k, 2*num_dims) -> (batch_size, 2*num_dims, num_points, k)
87 |
88 | return feature # (batch_size, 2*num_dims, num_points, k)
89 |
90 |
91 | class DGCNN_Cls_Encoder(nn.Module):
92 | def __init__(self, args):
93 | super(DGCNN_Cls_Encoder, self).__init__()
94 | if args.k == None:
95 | self.k = 40
96 | else:
97 | self.k = args.k
98 | self.task = args.task
99 |
100 | self.bn1 = nn.BatchNorm2d(64)
101 | self.bn2 = nn.BatchNorm2d(64)
102 | self.bn3 = nn.BatchNorm2d(128)
103 | self.bn4 = nn.BatchNorm2d(256)
104 | self.bn5 = nn.BatchNorm1d(args.feat_dims)
105 |
106 | self.conv1 = nn.Sequential(nn.Conv2d(3*2, 64, kernel_size=1, bias=False),
107 | self.bn1,
108 | nn.LeakyReLU(negative_slope=0.2))
109 | self.conv2 = nn.Sequential(nn.Conv2d(64*2, 64, kernel_size=1, bias=False),
110 | self.bn2,
111 | nn.LeakyReLU(negative_slope=0.2))
112 | self.conv3 = nn.Sequential(nn.Conv2d(64*2, 128, kernel_size=1, bias=False),
113 | self.bn3,
114 | nn.LeakyReLU(negative_slope=0.2))
115 | self.conv4 = nn.Sequential(nn.Conv2d(128*2, 256, kernel_size=1, bias=False),
116 | self.bn4,
117 | nn.LeakyReLU(negative_slope=0.2))
118 | self.conv5 = nn.Sequential(nn.Conv1d(512, args.feat_dims, kernel_size=1, bias=False),
119 | self.bn5,
120 | nn.LeakyReLU(negative_slope=0.2))
121 |
122 | def forward(self, x):
123 | x = x.transpose(2, 1)
124 |
125 | batch_size = x.size(0)
126 | x = get_graph_feature(x, k=self.k) # (batch_size, 3, num_points) -> (batch_size, 3*2, num_points, k)
127 | x = self.conv1(x) # (batch_size, 3*2, num_points, k) -> (batch_size, 64, num_points, k)
128 | x1 = x.max(dim=-1, keepdim=False)[0] # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points)
129 |
130 | x = get_graph_feature(x1, k=self.k) # (batch_size, 64, num_points) -> (batch_size, 64*2, num_points, k)
131 | x = self.conv2(x) # (batch_size, 64*2, num_points, k) -> (batch_size, 64, num_points, k)
132 | x2 = x.max(dim=-1, keepdim=False)[0] # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points)
133 |
134 | x = get_graph_feature(x2, k=self.k) # (batch_size, 64, num_points) -> (batch_size, 64*2, num_points, k)
135 | x = self.conv3(x) # (batch_size, 64*2, num_points, k) -> (batch_size, 128, num_points, k)
136 | x3 = x.max(dim=-1, keepdim=False)[0] # (batch_size, 128, num_points, k) -> (batch_size, 128, num_points)
137 |
138 | x = get_graph_feature(x3, k=self.k) # (batch_size, 128, num_points) -> (batch_size, 128*2, num_points, k)
139 | x = self.conv4(x) # (batch_size, 128*2, num_points, k) -> (batch_size, 256, num_points, k)
140 | x4 = x.max(dim=-1, keepdim=False)[0] # (batch_size, 256, num_points, k) -> (batch_size, 256, num_points)
141 |
142 | x = torch.cat((x1, x2, x3, x4), dim=1) # (batch_size, 512, num_points)
143 |
144 | x0 = self.conv5(x) # (batch_size, 512, num_points) -> (batch_size, feat_dims, num_points)
145 | x = x0.max(dim=-1, keepdim=False)[0] # (batch_size, feat_dims, num_points) -> (batch_size, feat_dims)
146 | feat = x.unsqueeze(1) # (batch_size, feat_dims) -> (batch_size, 1, feat_dims)
147 |
148 | if self.task == 'reconstruct':
149 | return feat # (batch_size, 1, feat_dims)
150 | else:
151 | return feat, x0
152 |
153 |
154 | class Point_Transform_Net(nn.Module):
155 | def __init__(self):
156 | super(Point_Transform_Net, self).__init__()
157 | self.k = 3
158 |
159 | self.bn1 = nn.BatchNorm2d(64)
160 | self.bn2 = nn.BatchNorm2d(128)
161 | self.bn3 = nn.BatchNorm1d(1024)
162 |
163 | self.conv1 = nn.Sequential(nn.Conv2d(6, 64, kernel_size=1, bias=False),
164 | self.bn1,
165 | nn.LeakyReLU(negative_slope=0.2))
166 | self.conv2 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=1, bias=False),
167 | self.bn2,
168 | nn.LeakyReLU(negative_slope=0.2))
169 | self.conv3 = nn.Sequential(nn.Conv1d(128, 1024, kernel_size=1, bias=False),
170 | self.bn3,
171 | nn.LeakyReLU(negative_slope=0.2))
172 |
173 | self.linear1 = nn.Linear(1024, 512, bias=False)
174 | self.bn3 = nn.BatchNorm1d(512)
175 | self.linear2 = nn.Linear(512, 256, bias=False)
176 | self.bn4 = nn.BatchNorm1d(256)
177 |
178 | self.transform = nn.Linear(256, 3*3)
179 | init.constant_(self.transform.weight, 0)
180 | init.eye_(self.transform.bias.view(3, 3))
181 |
182 | def forward(self, x):
183 | batch_size = x.size(0)
184 |
185 | x = self.conv1(x) # (batch_size, 3*2, num_points, k) -> (batch_size, 64, num_points, k)
186 | x = self.conv2(x) # (batch_size, 64, num_points, k) -> (batch_size, 128, num_points, k)
187 | x = x.max(dim=-1, keepdim=False)[0] # (batch_size, 128, num_points, k) -> (batch_size, 128, num_points)
188 |
189 | x = self.conv3(x) # (batch_size, 128, num_points) -> (batch_size, 1024, num_points)
190 | x = x.max(dim=-1, keepdim=False)[0] # (batch_size, 1024, num_points) -> (batch_size, 1024)
191 |
192 | x = F.leaky_relu(self.bn3(self.linear1(x)), negative_slope=0.2) # (batch_size, 1024) -> (batch_size, 512)
193 | x = F.leaky_relu(self.bn4(self.linear2(x)), negative_slope=0.2) # (batch_size, 512) -> (batch_size, 256)
194 |
195 | x = self.transform(x) # (batch_size, 256) -> (batch_size, 3*3)
196 | x = x.view(batch_size, 3, 3) # (batch_size, 3*3) -> (batch_size, 3, 3)
197 |
198 | return x # (batch_size, 3, 3)
199 |
200 |
201 | class DGCNN_Seg_Encoder(nn.Module):
202 | def __init__(self, args):
203 | super(DGCNN_Seg_Encoder, self).__init__()
204 | if args.k == None:
205 | self.k = 40
206 | else:
207 | self.k = args.k
208 | self.task = args.task
209 | self.transform_net = Point_Transform_Net()
210 |
211 | self.bn1 = nn.BatchNorm2d(64)
212 | self.bn2 = nn.BatchNorm2d(64)
213 | self.bn3 = nn.BatchNorm2d(64)
214 | self.bn4 = nn.BatchNorm2d(64)
215 | self.bn5 = nn.BatchNorm2d(64)
216 | self.bn6 = nn.BatchNorm1d(args.feat_dims)
217 |
218 | self.conv1 = nn.Sequential(nn.Conv2d(6, 64, kernel_size=1, bias=False),
219 | self.bn1,
220 | nn.LeakyReLU(negative_slope=0.2))
221 | self.conv2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1, bias=False),
222 | self.bn2,
223 | nn.LeakyReLU(negative_slope=0.2))
224 | self.conv3 = nn.Sequential(nn.Conv2d(64*2, 64, kernel_size=1, bias=False),
225 | self.bn3,
226 | nn.LeakyReLU(negative_slope=0.2))
227 | self.conv4 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1, bias=False),
228 | self.bn4,
229 | nn.LeakyReLU(negative_slope=0.2))
230 | self.conv5 = nn.Sequential(nn.Conv2d(64*2, 64, kernel_size=1, bias=False),
231 | self.bn5,
232 | nn.LeakyReLU(negative_slope=0.2))
233 | self.conv6 = nn.Sequential(nn.Conv1d(192, args.feat_dims, kernel_size=1, bias=False),
234 | self.bn6,
235 | nn.LeakyReLU(negative_slope=0.2))
236 |
237 |
238 | def forward(self, x):
239 | x = x.transpose(2, 1)
240 |
241 | batch_size = x.size(0)
242 | num_points = x.size(2)
243 |
244 | x0 = get_graph_feature(x, k=self.k) # (batch_size, 3, num_points) -> (batch_size, 3*2, num_points, k)
245 | t = self.transform_net(x0) # (batch_size, 3, 3)
246 | x = x.transpose(2, 1) # (batch_size, 3, num_points) -> (batch_size, num_points, 3)
247 | x = torch.bmm(x, t) # (batch_size, num_points, 3) * (batch_size, 3, 3) -> (batch_size, num_points, 3)
248 | x = x.transpose(2, 1) # (batch_size, num_points, 3) -> (batch_size, 3, num_points)
249 |
250 | x = get_graph_feature(x, k=self.k) # (batch_size, 3, num_points) -> (batch_size, 3*2, num_points, k)
251 | x = self.conv1(x) # (batch_size, 3*2, num_points, k) -> (batch_size, 64, num_points, k)
252 | x = self.conv2(x) # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points, k)
253 | x1 = x.max(dim=-1, keepdim=False)[0] # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points)
254 |
255 | x = get_graph_feature(x1, k=self.k) # (batch_size, 64, num_points) -> (batch_size, 64*2, num_points, k)
256 | x = self.conv3(x) # (batch_size, 64*2, num_points, k) -> (batch_size, 64, num_points, k)
257 | x = self.conv4(x) # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points, k)
258 | x2 = x.max(dim=-1, keepdim=False)[0] # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points)
259 |
260 | x = get_graph_feature(x2, k=self.k) # (batch_size, 64, num_points) -> (batch_size, 64*2, num_points, k)
261 | x = self.conv5(x) # (batch_size, 64*2, num_points, k) -> (batch_size, 64, num_points, k)
262 | x3 = x.max(dim=-1, keepdim=False)[0] # (batch_size, 64, num_points, k) -> (batch_size, 64, num_points)
263 |
264 | x = torch.cat((x1, x2, x3), dim=1) # (batch_size, 64*3, num_points)
265 |
266 | x0 = self.conv6(x) # (batch_size, 64*3, num_points) -> (batch_size, emb_dims, num_points)
267 |
268 | x = x0.max(dim=-1, keepdim=False)[0] # (batch_size, emb_dims, num_points) -> (batch_size, emb_dims)
269 | feat = x.unsqueeze(1) # (batch_size, num_points) -> (batch_size, 1, emb_dims)
270 |
271 | if self.task == 'reconstruct':
272 | return feat # (batch_size, 1, emb_dims)
273 | elif self.task == 'classify':
274 | return feat, x0
275 | elif self.task == 'segment':
276 | return feat, x0, x1, x2, x3
277 |
278 |
279 | class FoldNet_Encoder(nn.Module):
280 | def __init__(self, args):
281 | super(FoldNet_Encoder, self).__init__()
282 | if args.k == None:
283 | self.k = 16
284 | else:
285 | self.k = args.k
286 | self.n = 2048 # input point cloud size
287 | self.mlp1 = nn.Sequential(
288 | nn.Conv1d(12, 64, 1),
289 | nn.ReLU(),
290 | nn.Conv1d(64, 64, 1),
291 | nn.ReLU(),
292 | nn.Conv1d(64, 64, 1),
293 | nn.ReLU(),
294 | )
295 | self.linear1 = nn.Linear(64, 64)
296 | self.conv1 = nn.Conv1d(64, 128, 1)
297 | self.linear2 = nn.Linear(128, 128)
298 | self.conv2 = nn.Conv1d(128, 1024, 1)
299 | self.mlp2 = nn.Sequential(
300 | nn.Conv1d(1024, args.feat_dims, 1),
301 | nn.ReLU(),
302 | nn.Conv1d(args.feat_dims, args.feat_dims, 1),
303 | )
304 |
305 | def graph_layer(self, x, idx):
306 | x = local_maxpool(x, idx)
307 | x = self.linear1(x)
308 | x = x.transpose(2, 1)
309 | x = F.relu(self.conv1(x))
310 | x = local_maxpool(x, idx)
311 | x = self.linear2(x)
312 | x = x.transpose(2, 1)
313 | x = self.conv2(x)
314 | return x
315 |
316 | def forward(self, pts):
317 | pts = pts.transpose(2, 1) # (batch_size, 3, num_points)
318 | idx = knn(pts, k=self.k)
319 | x = local_cov(pts, idx) # (batch_size, 3, num_points) -> (batch_size, 12, num_points])
320 | x = self.mlp1(x) # (batch_size, 12, num_points) -> (batch_size, 64, num_points])
321 | x = self.graph_layer(x, idx) # (batch_size, 64, num_points) -> (batch_size, 1024, num_points)
322 | x = torch.max(x, 2, keepdim=True)[0] # (batch_size, 1024, num_points) -> (batch_size, 1024, 1)
323 | x = self.mlp2(x) # (batch_size, 1024, 1) -> (batch_size, feat_dims, 1)
324 | feat = x.transpose(2,1) # (batch_size, feat_dims, 1) -> (batch_size, 1, feat_dims)
325 | return feat # (batch_size, 1, feat_dims)
326 |
327 |
328 | class FoldNet_Decoder(nn.Module):
329 | def __init__(self, args):
330 | super(FoldNet_Decoder, self).__init__()
331 | self.m = 2025 # 45 * 45.
332 | self.shape = args.shape
333 | self.meshgrid = [[-0.3, 0.3, 45], [-0.3, 0.3, 45]]
334 | self.sphere = np.load("sphere.npy")
335 | self.gaussian = np.load("gaussian.npy")
336 | if self.shape == 'plane':
337 | self.folding1 = nn.Sequential(
338 | nn.Conv1d(args.feat_dims+2, args.feat_dims, 1),
339 | nn.ReLU(),
340 | nn.Conv1d(args.feat_dims, args.feat_dims, 1),
341 | nn.ReLU(),
342 | nn.Conv1d(args.feat_dims, 3, 1),
343 | )
344 | else:
345 | self.folding1 = nn.Sequential(
346 | nn.Conv1d(args.feat_dims+3, args.feat_dims, 1),
347 | nn.ReLU(),
348 | nn.Conv1d(args.feat_dims, args.feat_dims, 1),
349 | nn.ReLU(),
350 | nn.Conv1d(args.feat_dims, 3, 1),
351 | )
352 | self.folding2 = nn.Sequential(
353 | nn.Conv1d(args.feat_dims+3, args.feat_dims, 1),
354 | nn.ReLU(),
355 | nn.Conv1d(args.feat_dims, args.feat_dims, 1),
356 | nn.ReLU(),
357 | nn.Conv1d(args.feat_dims, 3, 1),
358 | )
359 |
360 | def build_grid(self, batch_size):
361 | if self.shape == 'plane':
362 | x = np.linspace(*self.meshgrid[0])
363 | y = np.linspace(*self.meshgrid[1])
364 | points = np.array(list(itertools.product(x, y)))
365 | elif self.shape == 'sphere':
366 | points = self.sphere
367 | elif self.shape == 'gaussian':
368 | points = self.gaussian
369 | points = np.repeat(points[np.newaxis, ...], repeats=batch_size, axis=0)
370 | points = torch.tensor(points)
371 | return points.float()
372 |
373 | def forward(self, x):
374 | x = x.transpose(1, 2).repeat(1, 1, self.m) # (batch_size, feat_dims, num_points)
375 | points = self.build_grid(x.shape[0]).transpose(1, 2) # (batch_size, 2, num_points) or (batch_size, 3, num_points)
376 | if x.get_device() != -1:
377 | points = points.cuda(x.get_device())
378 | cat1 = torch.cat((x, points), dim=1) # (batch_size, feat_dims+2, num_points) or (batch_size, feat_dims+3, num_points)
379 | folding_result1 = self.folding1(cat1) # (batch_size, 3, num_points)
380 | cat2 = torch.cat((x, folding_result1), dim=1) # (batch_size, 515, num_points)
381 | folding_result2 = self.folding2(cat2) # (batch_size, 3, num_points)
382 | return folding_result2.transpose(1, 2) # (batch_size, num_points ,3)
383 |
384 |
385 | class DGCNN_Cls_Classifier(nn.Module):
386 | def __init__(self, args):
387 | super(DGCNN_Cls_Classifier, self).__init__()
388 | if args.dataset == 'modelnet40':
389 | output_channels = 40
390 | elif args.dataset == 'modelnet10':
391 | output_channels = 10
392 | elif args.dataset == 'shapenetcorev2':
393 | output_channels = 55
394 | elif args.dataset == 'shapenetpart':
395 | output_channels = 16
396 | elif args.dataset == 'shapenetpartpart':
397 | output_channels = 50
398 |
399 | self.linear1 = nn.Linear(args.feat_dims*2, 512, bias=False)
400 | self.bn6 = nn.BatchNorm1d(512)
401 | self.dp1 = nn.Dropout(p=args.dropout)
402 | self.linear2 = nn.Linear(512, 256)
403 | self.bn7 = nn.BatchNorm1d(256)
404 | self.dp2 = nn.Dropout(p=args.dropout)
405 | self.linear3 = nn.Linear(256, output_channels)
406 |
407 | def forward(self, x):
408 | batch_size = x.size(0)
409 |
410 | x1 = F.adaptive_max_pool1d(x, 1).view(batch_size, -1)
411 | x2 = F.adaptive_avg_pool1d(x, 1).view(batch_size, -1)
412 | x = torch.cat((x1, x2), 1)
413 |
414 | x = F.leaky_relu(self.bn6(self.linear1(x)), negative_slope=0.2)
415 | x = self.dp1(x)
416 | x = F.leaky_relu(self.bn7(self.linear2(x)), negative_slope=0.2)
417 | x = self.dp2(x)
418 | x = self.linear3(x)
419 | return x
420 |
421 |
422 | class DGCNN_Seg_Segmenter(nn.Module):
423 | def __init__(self, args, seg_num_all):
424 | super(DGCNN_Seg_Segmenter, self).__init__()
425 | self.seg_num_all = seg_num_all
426 | self.seg_no_class_label = args.seg_no_class_label
427 | self.k = args.k
428 | self.feat_dims = args.feat_dims
429 | self.loss = args.loss
430 |
431 | self.bn7 = nn.BatchNorm1d(64)
432 | self.bn8 = nn.BatchNorm1d(256)
433 | self.bn9 = nn.BatchNorm1d(256)
434 | self.bn10 = nn.BatchNorm1d(128)
435 |
436 | self.conv7 = nn.Sequential(nn.Conv1d(16, 64, kernel_size=1, bias=False),
437 | self.bn7,
438 | nn.LeakyReLU(negative_slope=0.2))
439 | if not self.seg_no_class_label:
440 | self.conv8 = nn.Sequential(nn.Conv1d(self.feat_dims+64+64*3, 256, kernel_size=1, bias=False),
441 | self.bn8,
442 | nn.LeakyReLU(negative_slope=0.2))
443 | else:
444 | self.conv8 = nn.Sequential(nn.Conv1d(self.feat_dims+64*3, 256, kernel_size=1, bias=False),
445 | self.bn8,
446 | nn.LeakyReLU(negative_slope=0.2))
447 | self.dp1 = nn.Dropout(p=args.dropout)
448 | self.conv9 = nn.Sequential(nn.Conv1d(256, 256, kernel_size=1, bias=False),
449 | self.bn9,
450 | nn.LeakyReLU(negative_slope=0.2))
451 | self.dp2 = nn.Dropout(p=args.dropout)
452 | self.conv10 = nn.Sequential(nn.Conv1d(256, 128, kernel_size=1, bias=False),
453 | self.bn10,
454 | nn.LeakyReLU(negative_slope=0.2))
455 | self.conv11 = nn.Conv1d(128, self.seg_num_all, kernel_size=1, bias=False)
456 |
457 |
458 | def forward(self, x, l, x1, x2, x3):
459 | batch_size = x.size(0)
460 | num_points = x.size(2)
461 |
462 | x = x.max(dim=-1, keepdim=True)[0] # (batch_size, emb_dims, num_points) -> (batch_size, emb_dims, 1)
463 |
464 | if not self.seg_no_class_label:
465 | l = l.view(batch_size, -1, 1) # (batch_size, num_categoties, 1)
466 | l = self.conv7(l) # (batch_size, num_categoties, 1) -> (batch_size, 64, 1)
467 |
468 | x = torch.cat((x, l), dim=1) # (batch_size, emb_dims+64, 1)
469 | x = x.repeat(1, 1, num_points) # (batch_size, emb_dims+64, num_points)
470 | x = torch.cat((x, x1, x2, x3), dim=1) # (batch_size, emb_dims+64+64*3, num_points)
471 |
472 | else:
473 | x = x.repeat(1, 1, num_points) # (batch_size, emb_dims, num_points)
474 | x = torch.cat((x, x1, x2, x3), dim=1) # (batch_size, emb_dims+64*3, num_points)
475 |
476 | x = self.conv8(x) # (batch_size, emb_dims+64+64*3 or emb_dims+64*3, num_points) -> (batch_size, 256, num_points)
477 | x = self.dp1(x)
478 | x = self.conv9(x) # (batch_size, 256, num_points) -> (batch_size, 256, num_points)
479 | x = self.dp2(x)
480 | x = self.conv10(x) # (batch_size, 256, num_points) -> (batch_size, 128, num_points)
481 |
482 | if self.loss == 'softmax':
483 | x = self.conv11(x) # (batch_size, 128, num_points) -> (batch_size, seg_num_all, num_points)
484 |
485 | return x
486 |
487 |
488 | class ReconstructionNet(nn.Module):
489 | def __init__(self, args):
490 | super(ReconstructionNet, self).__init__()
491 | if args.encoder == 'foldnet':
492 | self.encoder = FoldNet_Encoder(args)
493 | elif args.encoder == 'dgcnn_cls':
494 | self.encoder = DGCNN_Cls_Encoder(args)
495 | elif args.encoder == 'dgcnn_seg':
496 | self.encoder = DGCNN_Seg_Encoder(args)
497 | self.decoder = FoldNet_Decoder(args)
498 | self.loss = ChamferLoss()
499 |
500 | def forward(self, input):
501 | feature = self.encoder(input)
502 | output = self.decoder(feature)
503 | return output, feature
504 |
505 | def get_parameter(self):
506 | return list(self.encoder.parameters()) + list(self.decoder.parameters())
507 |
508 | def get_loss(self, input, output):
509 | # input shape (batch_size, 2048, 3)
510 | # output shape (batch_size, 2025, 3)
511 | return self.loss(input, output)
512 |
513 |
514 | class ClassificationNet(nn.Module):
515 | def __init__(self, args):
516 | super(ClassificationNet, self).__init__()
517 | self.is_eval = args.eval
518 | if args.encoder == 'foldnet':
519 | self.encoder = FoldNet_Encoder(args)
520 | elif args.encoder == 'dgcnn_cls':
521 | self.encoder = DGCNN_Cls_Encoder(args)
522 | elif args.encoder == 'dgcnn_seg':
523 | self.encoder = DGCNN_Seg_Encoder(args)
524 | if not self.is_eval:
525 | self.classifier = DGCNN_Cls_Classifier(args)
526 | self.loss = CrossEntropyLoss()
527 |
528 | def forward(self, input):
529 | feature, latent = self.encoder(input)
530 | if not self.is_eval:
531 | output = self.classifier(latent)
532 | return output, feature
533 | else:
534 | return feature
535 |
536 | def get_parameter(self):
537 | return list(self.encoder.parameters()) + list(self.classifier.parameters())
538 |
539 | def get_loss(self, preds, gts):
540 | # preds shape (batch_size, feat_dims)
541 | # gts shape (batch_size)
542 | return self.loss(preds, gts)
543 |
544 |
545 | class SegmentationNet(nn.Module):
546 | def __init__(self, args, seg_num_all=50):
547 | super(SegmentationNet, self).__init__()
548 | self.is_eval = args.eval
549 | self.loss_type = args.loss
550 | if args.encoder == 'foldnet':
551 | self.encoder = FoldNet_Encoder(args)
552 | elif args.encoder == 'dgcnn_cls':
553 | self.encoder = DGCNN_Cls_Encoder(args)
554 | elif args.encoder == 'dgcnn_seg':
555 | self.encoder = DGCNN_Seg_Encoder(args)
556 | if not self.is_eval:
557 | self.segmenter = DGCNN_Seg_Segmenter(args, seg_num_all)
558 | if self.loss_type == 'softmax':
559 | self.loss = CrossEntropyLoss()
560 | elif self.loss_type == 'triplet':
561 | self.loss = TripletLoss(margin=args.margin)
562 |
563 | def forward(self, input, label=None):
564 | feature, latent, x1, x2, x3 = self.encoder(input)
565 | if not self.is_eval:
566 | output = self.segmenter(latent, label, x1, x2, x3)
567 | return output, feature
568 | else:
569 | return feature
570 |
571 | def get_parameter(self):
572 | return list(self.encoder.parameters()) + list(self.segmenter.parameters())
573 |
574 | def get_loss(self, preds, gts, new_device=None):
575 | # preds shape (batch_size, feat_dims)
576 | # gts shape (batch_size)
577 | if self.loss_type == 'softmax':
578 | return self.loss(preds, gts)
579 | elif self.loss_type == 'triplet':
580 | return self.loss(preds, gts, new_device)
581 |
--------------------------------------------------------------------------------