├── README.md ├── modelnet_data.py ├── pointhop_spark.py ├── pointhop2_spark.py └── point_utils_spark.py /README.md: -------------------------------------------------------------------------------- 1 | # A fast and low memory requirement version of [PointHop](https://github.com/minzhang-1/PointHop) and [PointHop++](https://github.com/minzhang-1/PointHop2) 2 | Created by [Min Zhang](https://github.com/minzhang-1) 3 | 4 | ### Introduction 5 | This work is an improved implementation of our [PointHop method](https://arxiv.org/abs/1907.12766) and [PointHop++ method](https://arxiv.org/abs/2002.03281), which is built upon Apache Spark. With 12 cores (Intel (R) core ™ i7-5930k CPU @ 3.5GHZ), PointHop finishes in 20 minutes using less than 12GB memory, and PointHop++ finishes in 40 minutes using less than 14GB memory. 6 | 7 | In this repository, we release code and data for training a baseline of PointHop and PointHop++ classification network on point clouds sampled from 3D shapes. 8 | 9 | ### Citation 10 | If you find our work useful in your research, please consider citing: 11 | 12 | @article{zhang2020pointhop, 13 | title={PointHop: An Explainable Machine Learning Method for Point Cloud Classification}, 14 | author={Zhang, Min and You, Haoxuan and Kadam, Pranav and Liu, Shan and Kuo, C-C Jay}, 15 | journal={IEEE Transactions on Multimedia}, 16 | year={2020}, 17 | publisher={IEEE} 18 | } 19 | 20 | @article{zhang2020pointhop++, 21 | title={PointHop++: A Lightweight Learning Model on Point Sets for 3D Classification}, 22 | author={Zhang, Min and Wang, Yifan and Kadam, Pranav and Liu, Shan and Kuo, C-C Jay}, 23 | journal={arXiv preprint arXiv:2002.03281}, 24 | year={2020} 25 | } 26 | 27 | ### Installation 28 | 29 | The code has been tested with Python 2.7 and 3.5, Java 8.0. You may need to install h5py, sklearn, pickle and pyspark packages. 30 | 31 | To check your java version: 32 | ```bash 33 | java --version 34 | ``` 35 | 36 | To install pyspark for Python: 37 | ```bash 38 | sudo pip install pyspark 39 | ``` 40 | 41 | If you are using Python 3. You may need to set up your configuarion. 42 | ```bash 43 | PYSPARK_PYTHON=/usr/bin/python3 44 | PYSPARK_DRIVER_PYTHON=/usr/bin/python3 45 | ``` 46 | 47 | ### Usage 48 | To train and test a single PointHop model without ensemble to classify point clouds sampled from 3D shapes: 49 | 50 | python3 pointhop_spark.py 51 | 52 | To train and test a single PointHop++ model without feature selection and ensemble to classify point clouds sampled from 3D shapes: 53 | 54 | python3 pointhop2_spark.py 55 | 56 | Point clouds of ModelNet40 models in HDF5 files will be automatically downloaded (416MB) to the data folder. Each point cloud contains 2048 points uniformly sampled from a shape surface. Each cloud is zero-mean and normalized into an unit sphere. There are also text files in `data/modelnet40_ply_hdf5_2048` specifying the ids of shapes in h5 files. 57 | 58 | 59 | -------------------------------------------------------------------------------- /modelnet_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | import numpy as np 4 | from sklearn.model_selection import train_test_split 5 | 6 | 7 | def load_dir(data_dir, name='train_files.txt'): 8 | with open(os.path.join(data_dir,name),'r') as f: 9 | lines = f.readlines() 10 | return [os.path.join(data_dir, line.rstrip().split('/')[-1]) for line in lines] 11 | 12 | 13 | def shuffle_data(data): 14 | """ Shuffle data order. 15 | Input: 16 | data: B,N,... numpy array 17 | Return: 18 | shuffled data, shuffle indices 19 | """ 20 | idx = np.arange(data.shape[0]) 21 | np.random.shuffle(idx) 22 | return data[idx, ...], idx 23 | 24 | 25 | def shuffle_points(data): 26 | """ Shuffle orders of points in each point cloud -- changes FPS behavior. 27 | Input: 28 | BxNxC array 29 | Output: 30 | BxNxC array 31 | """ 32 | idx = np.arange(data.shape[1]) 33 | np.random.shuffle(idx) 34 | return data[:, idx, :], idx 35 | 36 | 37 | def xyz2sphere(data): 38 | """ 39 | Input: data(B,N,3) xyz_coordinates 40 | Return: data(B,N,3) sphere_coordinates 41 | """ 42 | r = np.sqrt(np.sum(data**2, axis=2, keepdims=False)) 43 | theta = np.arccos(data[...,2]*1.0/r) 44 | phi = np.arctan(data[...,1]*1.0/data[...,0]) 45 | 46 | if len(r.shape) == 2: 47 | r = np.expand_dims(r, 2) 48 | if len(theta.shape) == 2: 49 | theta = np.expand_dims(theta, 2) 50 | if len(phi.shape) == 2: 51 | phi = np.expand_dims(phi, 2) 52 | 53 | data_sphere = np.concatenate([r, theta, phi], axis=2) 54 | return data_sphere 55 | 56 | 57 | def xyz2cylind(data): 58 | """ 59 | Input: data(B,N,3) xyz_coordinates 60 | Return: data(B,N,3) cylindrical_coordinates 61 | """ 62 | r = np.sqrt(np.sum(data[...,:2]**2, axis=2, keepdims=False)) 63 | phi = np.arctan(data[...,1]*1.0/data[...,0]) 64 | z = data[...,2] 65 | 66 | if len(r.shape) == 2: 67 | r = np.expand_dims(r, 2) 68 | if len(z.shape) == 2: 69 | z = np.expand_dims(z, 2) 70 | if len(phi.shape) == 2: 71 | phi = np.expand_dims(phi, 2) 72 | 73 | data_sphere = np.concatenate([r, z, phi], axis=2) 74 | return data_sphere 75 | 76 | 77 | def data_load(num_point=None, data_dir=None, train=True): 78 | if not os.path.exists(data_dir): 79 | www = 'https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip' 80 | zipfile = os.path.basename(www) 81 | os.system('wget --no-check-certificate %s; unzip %s' % (www, zipfile)) 82 | os.system('rm %s' % (zipfile)) 83 | 84 | if train: 85 | data_pth = load_dir(data_dir, name='train_files.txt') 86 | else: 87 | data_pth = load_dir(data_dir, name='test_files.txt') 88 | 89 | point_list = [] 90 | label_list = [] 91 | for pth in data_pth: 92 | data_file = h5py.File(pth, 'r') 93 | point = data_file['data'][:] 94 | label = data_file['label'][:] 95 | point_list.append(point) 96 | label_list.append(label) 97 | data = np.concatenate(point_list, axis=0) 98 | label = np.concatenate(label_list, axis=0) 99 | # data, idx = shuffle_data(data) 100 | # data, ind = shuffle_points(data) 101 | 102 | if not num_point: 103 | return data[:, :, :], label 104 | else: 105 | return data[:, :num_point, :], label 106 | 107 | 108 | def data_separate(data, label): 109 | seed = 7 110 | np.random.seed(seed) 111 | train_data, valid_data, train_label, valid_label = train_test_split(data, label, test_size=0.1, random_state=seed) 112 | 113 | return train_data, train_label, valid_data, valid_label 114 | 115 | 116 | -------------------------------------------------------------------------------- /pointhop_spark.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import numpy as np 4 | import modelnet_data 5 | from pyspark import SparkConf 6 | from numpy import linalg as LA 7 | import point_utils_spark as pus 8 | from pyspark import SparkContext 9 | from sklearn.metrics import accuracy_score 10 | 11 | config = SparkConf().setAll( 12 | [('spark.driver.memory', '12g'), 13 | ('spark.executor.memory', '6g'), 14 | ('spark.driver.maxResultSize', '12g')]).setAppName('POINTHOP').setMaster('local[*]') 15 | sc = SparkContext(conf=config) 16 | sc.setLogLevel("ERROR") 17 | 18 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 19 | 20 | 21 | def pointhop_train(data, n_newpoint, n_sample, n_kernel, num_partition): 22 | ''' 23 | Train based on the provided samples. 24 | :param data: [num_samples, num_point, feature_dimension] 25 | :param n_newpoint: point numbers used in every stage 26 | :param n_sample: k nearest neighbors 27 | :param n_kernel: num kernels to be preserved 28 | :param num_partition: partition num for rdd 29 | :return: pca_params, feature 30 | ''' 31 | 32 | point_data = data 33 | pca_params = {} 34 | fea = [] 35 | pointRDD = sc.parallelize(point_data, num_partition) 36 | 37 | for i in range(len(n_sample)): 38 | if (i == 0 and point_data.shape[1] == n_newpoint[i]) or (i > 0 and n_newpoint[i-1] == n_newpoint[i]): 39 | fpsRDD = pointRDD 40 | else: 41 | fpsRDD = pointRDD.map(lambda x: pus.fps(x, n_newpoint[i])) 42 | fpsRDD.persist() 43 | 44 | knnRDD = fpsRDD.zip(pointRDD).map(lambda x: pus.knn(x[0], x[1], n_sample[i])) 45 | 46 | if i == 0: 47 | sgRDD = pointRDD.zip(knnRDD).flatMap(lambda x: pus.sg(x[0], x[0], x[1])) 48 | else: 49 | sgRDD = pointRDD.zip(knnRDD).zip(pcaRDD).flatMap(lambda x: pus.sg(x[0][0], x[1], x[0][1])) 50 | pcaRDD.unpersist() 51 | sgRDD.persist() 52 | 53 | kernels, energy = pus.pca(sgRDD) 54 | kernels = kernels[:n_kernel[i]] 55 | pca_params['Layer_{:d}/kernel'.format(i)] = kernels 56 | 57 | if i == 0: 58 | pcaRDD = sgRDD.map(lambda x: np.dot(x, kernels.T)) 59 | else: 60 | bias = sgRDD.map(lambda x: LA.norm(x)).max() 61 | pca_params['Layer_{:d}/bias'.format(i)] = bias 62 | e = np.zeros((1, kernels.shape[0])) 63 | e[0, 0] = 1 64 | pcaRDD = sgRDD.map(lambda x: x + bias).map(lambda x: np.dot(x, kernels.T)).map(lambda x: x - bias * e) 65 | 66 | pca_fea = np.array(pcaRDD.collect()) 67 | pca_fea = pca_fea.reshape((-1, n_newpoint[i], pca_fea.shape[-1])) 68 | print('Hop ', i, ': ', pca_fea.shape) 69 | pcaRDD = sc.parallelize(pca_fea, num_partition) 70 | fea.append(pus.extract_single(pca_fea)) 71 | pointRDD = fpsRDD 72 | sgRDD.unpersist() 73 | fpsRDD.unpersist() 74 | pcaRDD.unpersist() 75 | pointRDD.unpersist() 76 | fea = np.concatenate(fea, axis=-1) 77 | return pca_params, fea 78 | 79 | 80 | def pointhop_pred(data, n_newpoint, n_sample, pca_params, num_partition): 81 | ''' 82 | Test based on the provided samples. 83 | :param data: [num_samples, num_point, feature_dimension] 84 | :param n_newpoint: point numbers used in every stage 85 | :param n_sample: k nearest neighbors 86 | :param pca_params: model to be used 87 | :param num_partition: partition num for rdd 88 | :return: feature 89 | ''' 90 | 91 | point_data = data 92 | fea = [] 93 | pointRDD = sc.parallelize(point_data, num_partition) 94 | 95 | for i in range(len(n_sample)): 96 | if (i == 0 and point_data.shape[1] == n_newpoint[i]) or (i > 0 and n_newpoint[i-1] == n_newpoint[i]): 97 | fpsRDD = pointRDD 98 | else: 99 | fpsRDD = pointRDD.map(lambda x: pus.fps(x, n_newpoint[i])) 100 | fpsRDD.persist() 101 | 102 | knnRDD = fpsRDD.zip(pointRDD).map(lambda x: pus.knn(x[0], x[1], n_sample[i])) 103 | 104 | if i == 0: 105 | sgRDD = pointRDD.zip(knnRDD).flatMap(lambda x: pus.sg(x[0], x[0], x[1])) 106 | else: 107 | sgRDD = pointRDD.zip(knnRDD).zip(pcaRDD).flatMap(lambda x: pus.sg(x[0][0], x[1], x[0][1])) 108 | pcaRDD.unpersist() 109 | sgRDD.persist() 110 | 111 | kernels = pca_params['Layer_{:d}/kernel'.format(i)] 112 | 113 | if i == 0: 114 | pcaRDD = sgRDD.map(lambda x: np.dot(x, kernels.T)) 115 | else: 116 | bias = pca_params['Layer_{:d}/bias'.format(i)] 117 | e = np.zeros((1, kernels.shape[0])) 118 | e[0, 0] = 1 119 | pcaRDD = sgRDD.map(lambda x: x + bias).map(lambda x: np.dot(x, kernels.T)).map(lambda x: x - bias * e) 120 | 121 | pca_fea = np.array(pcaRDD.collect()) 122 | pca_fea = pca_fea.reshape((-1, n_newpoint[i], pca_fea.shape[-1])) 123 | print('Hop ', i, ': ', pca_fea.shape) 124 | pcaRDD = sc.parallelize(pca_fea, num_partition) 125 | fea.append(pus.extract_single(pca_fea)) 126 | pointRDD = fpsRDD 127 | sgRDD.unpersist() 128 | fpsRDD.unpersist() 129 | pcaRDD.unpersist() 130 | pointRDD.unpersist() 131 | fea = np.concatenate(fea, axis=-1) 132 | return fea 133 | 134 | 135 | if __name__ == '__main__': 136 | time_start = time.time() 137 | 138 | initial_point = 1024 139 | n_newpoint = [1024, 128, 128, 64] 140 | n_sample = [64, 64, 64, 64] 141 | n_kernel = [15, 25, 40, 80] 142 | 143 | train_data, train_label = modelnet_data.data_load(initial_point, os.path.join(BASE_DIR, 'modelnet40_ply_hdf5_2048'), True) 144 | test_data, test_label = modelnet_data.data_load(initial_point, os.path.join(BASE_DIR, 'modelnet40_ply_hdf5_2048'), False) 145 | train_data = train_data 146 | train_label = train_label 147 | test_data = test_data 148 | test_label = test_label 149 | print('Train data loaded!') 150 | 151 | pca_params, feature_train = pointhop_train(train_data, n_newpoint, n_sample, n_kernel, num_partition=1000) 152 | print(feature_train.shape) 153 | 154 | feature_test = pointhop_pred(test_data, n_newpoint, n_sample, pca_params, num_partition=400) 155 | print(feature_test.shape) 156 | 157 | clf = pus.rf_classifier(feature_train, np.squeeze(train_label)) 158 | pred_train = clf.predict(feature_train) 159 | acc_train = accuracy_score(train_label, pred_train) 160 | print('RF Classification train accuracy: ', acc_train) 161 | 162 | pred_test = clf.predict(feature_test) 163 | acc_test = accuracy_score(test_label, pred_test) 164 | print('RF Classification test accuracy: ', acc_test) 165 | 166 | weight = pus.llsr_train(feature_train, train_label, 40) 167 | prob_train, pred_train = pus.llsr_pred(feature_train, weight) 168 | acc_train = accuracy_score(train_label, pred_train) 169 | print('LLSR Classification train accuracy: ', acc_train) 170 | 171 | prob_test, pred_test = pus.llsr_pred(feature_test, weight) 172 | acc_test = accuracy_score(test_label, pred_test) 173 | print('LLSR Classification test accuracy: ', acc_test) 174 | 175 | weight = pus.llsr_train_weighted(feature_train, train_label, 40, epsilon=0.2) 176 | prob_train, pred_train = pus.llsr_pred(feature_train, weight) 177 | acc_train = accuracy_score(train_label, pred_train) 178 | print('WLLSR Classification train accuracy: ', acc_train) 179 | 180 | prob_test, pred_test = pus.llsr_pred(feature_test, weight) 181 | acc_test = accuracy_score(test_label, pred_test) 182 | print('WLLSR Classification test accuracy: ', acc_test) 183 | 184 | sc.stop() 185 | time_end = time.time() 186 | print('Duration:', (time_end - time_start) / 60.0, 'mins') 187 | -------------------------------------------------------------------------------- /pointhop2_spark.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import numpy as np 4 | import modelnet_data 5 | from pyspark import SparkConf 6 | from numpy import linalg as LA 7 | import point_utils_spark as pus 8 | from pyspark import SparkContext 9 | from sklearn.metrics import accuracy_score 10 | 11 | config = SparkConf().setAll( 12 | [('spark.driver.memory', '14g'), 13 | ('spark.executor.memory', '8g'), 14 | ('spark.driver.maxResultSize', '14g')]).setAppName('POINTHOP2').setMaster('local[*]') 15 | sc = SparkContext(conf=config) 16 | sc.setLogLevel("ERROR") 17 | 18 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 19 | 20 | 21 | def pointhop_train(data, n_newpoint, n_sample, threshold, num_partition): 22 | ''' 23 | Train based on the provided samples. 24 | :param data: [num_samples, num_point, feature_dimension] 25 | :param n_newpoint: point numbers used in every stage 26 | :param n_sample: k nearest neighbors 27 | :param threshold 28 | :param num_partition: partition num for rdd 29 | :return: pca_params, feature 30 | ''' 31 | 32 | point_data = data 33 | fea = [] 34 | pca_params = {} 35 | pointRDD = sc.parallelize(point_data, num_partition) 36 | 37 | for i in range(len(n_sample)): 38 | if (i == 0 and point_data.shape[1] == n_newpoint[i]) or (i > 0 and n_newpoint[i-1] == n_newpoint[i]): 39 | fpsRDD = pointRDD 40 | else: 41 | fpsRDD = pointRDD.map(lambda x: pus.fps(x, n_newpoint[i])) 42 | fpsRDD.persist() 43 | 44 | knnRDD = fpsRDD.zip(pointRDD).map(lambda x: pus.knn(x[0], x[1], n_sample[i])) 45 | knnRDD.persist() 46 | 47 | if i == 0: 48 | sgRDD = pointRDD.zip(knnRDD).flatMap(lambda x: pus.sg(x[0], x[0], x[1])) 49 | sgRDD.persist() 50 | kernels, energy = pus.pca(sgRDD) 51 | pcaRDD = sgRDD.map(lambda x: np.dot(x, kernels.T)) 52 | num_node = np.sum(energy > threshold) 53 | pre_energy = energy[:num_node] 54 | pca_fea = np.array(pcaRDD.collect()) 55 | pca_fea = pca_fea.reshape((-1, n_newpoint[i], pca_fea.shape[-1])) 56 | pcaRDD = sc.parallelize(pca_fea[:, :, :num_node], num_partition) 57 | pca_leaf_fea = pca_fea[:, :, num_node:] 58 | print('Hop ', i, ': ', pca_fea[:, :, num_node:].shape) 59 | pca_params['Layer_{:d}/num_node'.format(i)] = num_node 60 | else: 61 | sgRDD = pointRDD.zip(knnRDD).zip(pcaRDD).flatMap(lambda x: pus.sg_cw(x[0][0], x[1], x[0][1])) 62 | sgRDD.persist() 63 | kernels, energy, num_node_next = pus.pca_cw(sgRDD, pre_energy, threshold) 64 | if i == len(n_sample) - 1: 65 | num_node_next = [0 for j in range(num_node)] 66 | bias = np.max(np.array(sgRDD.map(lambda x: LA.norm(x, axis=0)).collect()), axis=0) 67 | e = np.zeros((kernels.shape[0], kernels.shape[-1])) 68 | e[:, 0] = bias 69 | pcaRDD = sgRDD.map(lambda x: x + bias).map( 70 | lambda x: np.array([np.dot(x[:, j], kernels[j].T) for j in range(kernels.shape[0])])).map(lambda x: x - e) 71 | 72 | pca_fea = np.array(pcaRDD.collect()) 73 | pca_fea = pca_fea.reshape((-1, n_newpoint[i], pca_fea.shape[1], pca_fea.shape[2])) 74 | pca_leaf_fea = np.concatenate([pca_fea[:, :, j, num_node_next[j]:] for j in range(num_node)], axis=-1) 75 | print('Hop ', i, ': ', pca_leaf_fea.shape) 76 | if i != len(n_sample) - 1: 77 | pca_nleaf_fea = np.concatenate([pca_fea[:, :, j, :num_node_next[j]] for j in range(num_node)], axis=-1) 78 | pcaRDD = sc.parallelize(pca_nleaf_fea, num_partition) 79 | 80 | pre_energy = np.concatenate([energy[j][:num_node_next[j]] for j in range(num_node)], axis=-1) 81 | num_node = np.sum(num_node_next) 82 | pca_params['Layer_{:d}/num_node'.format(i)] = num_node 83 | pca_params['Layer_{:d}/num_node_next'.format(i)] = num_node_next 84 | pca_params['Layer_{:d}/bias'.format(i)] = bias 85 | pca_params['Layer_{:d}/kernel'.format(i)] = kernels 86 | fea.append(pus.extract_single(pca_leaf_fea)) 87 | pointRDD = fpsRDD 88 | sgRDD.unpersist() 89 | knnRDD.unpersist() 90 | fpsRDD.unpersist() 91 | pcaRDD.unpersist() 92 | pointRDD.unpersist() 93 | fea = np.concatenate(fea, axis=-1) 94 | return pca_params, fea 95 | 96 | 97 | def pointhop_pred(data, n_newpoint, n_sample, pca_params, num_partition): 98 | ''' 99 | Test based on the provided samples. 100 | :param data: [num_samples, num_point, feature_dimension] 101 | :param n_newpoint: point numbers used in every stage 102 | :param n_sample: k nearest neighbors 103 | :param pca_params: model to be used 104 | :param num_partition: partition num for rdd 105 | :return: feature 106 | ''' 107 | point_data = data 108 | pcaRDD = None 109 | fea = [] 110 | pointRDD = sc.parallelize(point_data, num_partition) 111 | for i in range(len(n_sample)): 112 | if len(point_data) == n_newpoint: 113 | fpsRDD = pointRDD 114 | else: 115 | fpsRDD = pointRDD.map(lambda x: pus.fps(x, n_newpoint[i])) 116 | fpsRDD.persist() 117 | 118 | knnRDD = fpsRDD.zip(pointRDD).map(lambda x: pus.knn(x[0], x[1], n_sample[i])) 119 | knnRDD.persist() 120 | kernels = pca_params['Layer_{:d}/kernel'.format(i)] 121 | 122 | if i == 0: 123 | num_node = pca_params['Layer_{:d}/num_node'.format(i)] 124 | sgRDD = pointRDD.zip(knnRDD).flatMap(lambda x: pus.sg(x[0], x[0], x[1])) 125 | sgRDD.persist() 126 | pcaRDD = sgRDD.map(lambda x: np.dot(x, kernels.T)) 127 | pca_fea = np.array(pcaRDD.collect()) 128 | pca_fea = pca_fea.reshape((-1, n_newpoint[i], pca_fea.shape[-1])) 129 | pcaRDD = sc.parallelize(pca_fea[:, :, :num_node], num_partition) 130 | pca_leaf_fea = pca_fea[:, :, num_node:] 131 | print('Hop ', i, ': ', pca_fea[:, :, num_node:].shape) 132 | else: 133 | num_node_next = pca_params['Layer_{:d}/num_node_next'.format(i)] 134 | sgRDD = pointRDD.zip(knnRDD).zip(pcaRDD).flatMap(lambda x: pus.sg_cw(x[0][0], x[1], x[0][1])) 135 | sgRDD.persist() 136 | bias = pca_params['Layer_{:d}/bias'.format(i)] 137 | e = np.zeros((kernels.shape[0], kernels.shape[-1])) 138 | e[:, 0] = bias 139 | pcaRDD = sgRDD.map(lambda x: x + bias).map( 140 | lambda x: np.array([np.dot(x[:, j], kernels[j].T) for j in range(kernels.shape[0])])).map(lambda x: x - e) 141 | 142 | pca_fea = np.array(pcaRDD.collect()) 143 | pca_fea = pca_fea.reshape((-1, n_newpoint[i], pca_fea.shape[1], pca_fea.shape[2])) 144 | pca_leaf_fea = np.concatenate([pca_fea[:, :, j, num_node_next[j]:] for j in range(num_node)], axis=-1) 145 | print('Hop ', i, ': ', pca_leaf_fea.shape) 146 | if i != len(n_sample) - 1: 147 | pca_nleaf_fea = np.concatenate([pca_fea[:, :, j, :num_node_next[j]] for j in range(num_node)], axis=-1) 148 | pcaRDD = sc.parallelize(pca_nleaf_fea, num_partition) 149 | 150 | num_node = pca_params['Layer_{:d}/num_node'.format(i)] 151 | fea.append(pus.extract_single(pca_leaf_fea)) 152 | pointRDD = fpsRDD 153 | sgRDD.unpersist() 154 | knnRDD.unpersist() 155 | fpsRDD.unpersist() 156 | pcaRDD.unpersist() 157 | pointRDD.unpersist() 158 | fea = np.concatenate(fea, axis=-1) 159 | return fea 160 | 161 | 162 | if __name__ == '__main__': 163 | time_start = time.time() 164 | 165 | initial_point = 1024 166 | n_newpoint = [1024, 128, 128, 64] 167 | n_sample = [64, 64, 64, 64] 168 | threshold = 0.0001 169 | 170 | train_data, train_label = modelnet_data.data_load(initial_point, os.path.join(BASE_DIR, 'modelnet40_ply_hdf5_2048'), True) 171 | test_data, test_label = modelnet_data.data_load(initial_point, os.path.join(BASE_DIR, 'modelnet40_ply_hdf5_2048'), False) 172 | train_data = train_data 173 | train_label = train_label 174 | test_data = test_data 175 | test_label = test_label 176 | print('Train data loaded!') 177 | 178 | pca_params, feature_train = pointhop_train(train_data, n_newpoint, n_sample, threshold, num_partition=1000) 179 | print(feature_train.shape) 180 | 181 | feature_test = pointhop_pred(test_data, n_newpoint, n_sample, pca_params, num_partition=200) 182 | print(feature_test.shape) 183 | 184 | clf = pus.rf_classifier(feature_train, np.squeeze(train_label)) 185 | pred_train = clf.predict(feature_train) 186 | acc_train = accuracy_score(train_label, pred_train) 187 | print('RF Classification train accuracy: ', acc_train) 188 | 189 | pred_test = clf.predict(feature_test) 190 | acc_test = accuracy_score(test_label, pred_test) 191 | print('RF Classification test accuracy: ', acc_test) 192 | 193 | weight = pus.llsr_train(feature_train, train_label, 40) 194 | prob_train, pred_train = pus.llsr_pred(feature_train, weight) 195 | acc_train = accuracy_score(train_label, pred_train) 196 | print('LLSR Classification train accuracy: ', acc_train) 197 | 198 | prob_test, pred_test = pus.llsr_pred(feature_test, weight) 199 | acc_test = accuracy_score(test_label, pred_test) 200 | print('LLSR Classification test accuracy: ', acc_test) 201 | 202 | weight = pus.llsr_train_weighted(feature_train, train_label, 40, epsilon=0.2) 203 | prob_train, pred_train = pus.llsr_pred(feature_train, weight) 204 | acc_train = accuracy_score(train_label, pred_train) 205 | print('WLLSR Classification train accuracy: ', acc_train) 206 | 207 | prob_test, pred_test = pus.llsr_pred(feature_test, weight) 208 | acc_test = accuracy_score(test_label, pred_test) 209 | print('WLLSR Classification test accuracy: ', acc_test) 210 | 211 | sc.stop() 212 | time_end = time.time() 213 | print('Duration:', (time_end - time_start) / 60.0, 'mins') 214 | 215 | 216 | 217 | -------------------------------------------------------------------------------- /point_utils_spark.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import numpy as np 4 | import modelnet_data 5 | from sklearn import ensemble 6 | from numpy.linalg import eigh 7 | from pyspark import SparkConf 8 | from numpy import linalg as LA 9 | from pyspark import SparkContext 10 | 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | 13 | 14 | def fps_knn_sg(sample, fea, n_newpoint, n_sample): 15 | ''' 16 | :param sample:(N, 3) 17 | :param fea:(N, dim) 18 | :param n_newpoint: K 19 | :param n_sample: M 20 | :return:(K, 8, dim) 21 | ''' 22 | if len(sample) == n_newpoint: 23 | fps_sample = sample 24 | else: 25 | fps_sample = fps(sample, n_newpoint) 26 | nn_idx = knn(fps_sample, sample, n_sample) 27 | sg_fea = sg(sample, fea, nn_idx) 28 | return sg_fea 29 | 30 | 31 | def fps(sample, n_newpoint): 32 | ''' 33 | :param sample:(N, 3) 34 | :param n_newpoint: K 35 | :return:(K, 3) 36 | ''' 37 | fps_sample = [] 38 | farthest = np.random.randint(len(sample)) 39 | distance = np.ones((len(sample),), dtype=int) * 1e10 40 | for k in range(n_newpoint): 41 | fps_sample.append(sample[farthest]) 42 | dist = np.sum((sample - sample[farthest, :]) ** 2, axis=-1) 43 | idx = dist < distance 44 | distance[idx] = dist[idx] 45 | farthest = np.argmax(distance, axis=-1) 46 | return np.array(fps_sample) 47 | 48 | 49 | def calc_distances(new_pts, pts): 50 | ''' 51 | :param new_pts:(K, 3) 52 | :param pts:(N, 3) 53 | :return:(N, K) 54 | ''' 55 | tmp_trans = np.transpose(np.array(new_pts), [1, 0]) 56 | pts = np.array(pts) 57 | xy = np.matmul(pts, tmp_trans) 58 | pts_square = (pts**2).sum(axis=1, keepdims=True) 59 | tmp_square_trans = (tmp_trans**2).sum(axis=0, keepdims=True) 60 | return np.squeeze(pts_square + tmp_square_trans - 2 * xy) 61 | 62 | 63 | def knn(new_pts, pts, n_sample): 64 | ''' 65 | :param new_pts:(K, 3) 66 | :param pts:(N, 3) 67 | :param n_sample:int 68 | :return: nn_idx (K, n_sample) 69 | ''' 70 | distance_matrix = calc_distances(new_pts, pts) 71 | nn_idx = np.argpartition(distance_matrix, (0, n_sample), axis=0)[:n_sample, :] 72 | nn_idx = np.transpose(nn_idx, [1, 0]) 73 | return nn_idx 74 | 75 | 76 | def sg(sample, fea, nn_idx): 77 | ''' 78 | :param sample:(N, 3) 79 | :param fea:(N, n_sample, dim) 80 | :return: nn_idx (K, 8, dim) 81 | ''' 82 | pts_fea = np.concatenate([sample, fea], axis=-1) 83 | nn_fea = [] 84 | for i in range(nn_idx.shape[0]): 85 | nn_fea.append(pts_fea[nn_idx[i], :]) 86 | nn_fea = np.array(nn_fea) 87 | pc_n = nn_fea[..., :3] 88 | pc_fea = nn_fea[..., 3:] 89 | pc = np.expand_dims(pc_n[:, 0, :], axis=1) 90 | pc_c = pc_n - pc 91 | pc_idx = [] 92 | pc_idx.append(pc_c[:, :, 0] >= 0) 93 | pc_idx.append(pc_c[:, :, 0] <= 0) 94 | pc_idx.append(pc_c[:, :, 1] >= 0) 95 | pc_idx.append(pc_c[:, :, 1] <= 0) 96 | pc_idx.append(pc_c[:, :, 2] >= 0) 97 | pc_idx.append(pc_c[:, :, 2] <= 0) 98 | 99 | pc_bin = [] 100 | pc_bin.append(np.expand_dims((pc_idx[0] * pc_idx[2] * pc_idx[4])*1.0, axis=2)) 101 | pc_bin.append(np.expand_dims((pc_idx[0] * pc_idx[2] * pc_idx[5])*1.0, axis=2)) 102 | pc_bin.append(np.expand_dims((pc_idx[0] * pc_idx[3] * pc_idx[4])*1.0, axis=2)) 103 | pc_bin.append(np.expand_dims((pc_idx[0] * pc_idx[3] * pc_idx[5])*1.0, axis=2)) 104 | pc_bin.append(np.expand_dims((pc_idx[1] * pc_idx[2] * pc_idx[4])*1.0, axis=2)) 105 | pc_bin.append(np.expand_dims((pc_idx[1] * pc_idx[2] * pc_idx[5])*1.0, axis=2)) 106 | pc_bin.append(np.expand_dims((pc_idx[1] * pc_idx[3] * pc_idx[4])*1.0, axis=2)) 107 | pc_bin.append(np.expand_dims((pc_idx[1] * pc_idx[3] * pc_idx[5])*1.0, axis=2)) 108 | 109 | value = np.multiply(pc_fea, pc_bin) 110 | value = np.sum(value, axis=2, keepdims=True) 111 | num = np.sum(pc_bin, axis=2, keepdims=True) 112 | sg_fea = np.squeeze(value/num, axis=(2,)) 113 | sg_fea = np.transpose(sg_fea, [1, 0, 2]) 114 | sg_fea = sg_fea.reshape((sg_fea.shape[0], -1)) 115 | return sg_fea 116 | 117 | 118 | def sg_cw(sample, fea, nn_idx): 119 | ''' 120 | :param sample:(N, 3) 121 | :param fea:(N, n_sample, dim) 122 | :return: nn_idx (K, 8, dim) 123 | ''' 124 | sg_fea = [] 125 | for i in range(fea.shape[-1]): 126 | fea_cw = fea[:, i].reshape((fea.shape[0], 1)) 127 | pts_fea = np.concatenate([sample, fea_cw], axis=-1) 128 | nn_fea = [] 129 | for i in range(nn_idx.shape[0]): 130 | nn_fea.append(pts_fea[nn_idx[i], :]) 131 | nn_fea = np.array(nn_fea) 132 | pc_n = nn_fea[..., :3] 133 | pc_fea = nn_fea[..., 3:] 134 | pc = np.expand_dims(pc_n[:, 0, :], axis=1) 135 | pc_c = pc_n - pc 136 | pc_idx = [] 137 | pc_idx.append(pc_c[:, :, 0] >= 0) 138 | pc_idx.append(pc_c[:, :, 0] <= 0) 139 | pc_idx.append(pc_c[:, :, 1] >= 0) 140 | pc_idx.append(pc_c[:, :, 1] <= 0) 141 | pc_idx.append(pc_c[:, :, 2] >= 0) 142 | pc_idx.append(pc_c[:, :, 2] <= 0) 143 | 144 | pc_bin = [] 145 | pc_bin.append(np.expand_dims((pc_idx[0] * pc_idx[2] * pc_idx[4])*1.0, axis=2)) 146 | pc_bin.append(np.expand_dims((pc_idx[0] * pc_idx[2] * pc_idx[5])*1.0, axis=2)) 147 | pc_bin.append(np.expand_dims((pc_idx[0] * pc_idx[3] * pc_idx[4])*1.0, axis=2)) 148 | pc_bin.append(np.expand_dims((pc_idx[0] * pc_idx[3] * pc_idx[5])*1.0, axis=2)) 149 | pc_bin.append(np.expand_dims((pc_idx[1] * pc_idx[2] * pc_idx[4])*1.0, axis=2)) 150 | pc_bin.append(np.expand_dims((pc_idx[1] * pc_idx[2] * pc_idx[5])*1.0, axis=2)) 151 | pc_bin.append(np.expand_dims((pc_idx[1] * pc_idx[3] * pc_idx[4])*1.0, axis=2)) 152 | pc_bin.append(np.expand_dims((pc_idx[1] * pc_idx[3] * pc_idx[5])*1.0, axis=2)) 153 | 154 | value = np.multiply(pc_fea, pc_bin) 155 | value = np.sum(value, axis=2, keepdims=True) 156 | num = np.sum(pc_bin, axis=2, keepdims=True) 157 | sg_fea_cw = np.squeeze(value/num, axis=(2,)) 158 | sg_fea_cw = np.transpose(sg_fea_cw, [1, 0, 2]) 159 | sg_fea_cw = sg_fea_cw.reshape((sg_fea_cw.shape[0], -1)) 160 | sg_fea.append(sg_fea_cw) 161 | sg_fea = np.transpose(sg_fea, [1, 2, 0]) 162 | return sg_fea 163 | 164 | 165 | def pca_cw(sgRDD, pre_energy, threshold): 166 | ''' 167 | :param sgRDD: (M*K, dim, channel) 168 | :param pre_energy: (channel, ) 169 | :param threshold: float 170 | :return: kernels (channel, dim, dim) 171 | :return: energy (channel, dim) 172 | ''' 173 | kernels = [] 174 | energies = [] 175 | num_node_next = [] 176 | dc = np.array(sgRDD.map(lambda x: np.mean(x, axis=0)).collect()) 177 | sgRDD = sgRDD.map(lambda x: x - np.mean(x, axis=0)) 178 | fe = np.squeeze(sgRDD.map(lambda x: (1, (x, 1))).reduceByKey(lambda x, y: (x[0] + y[0], x[1] + y[1])) 179 | .map(lambda x: x[1][0] / float(x[1][1])).collect()) 180 | sgRDD = sgRDD.map(lambda x: x - fe) 181 | 182 | num_channels = fe.shape[0] 183 | largest_eva = np.var(dc, axis=0) * num_channels 184 | dc_kernel = [1 / np.sqrt(num_channels) * np.ones((1, num_channels)) / np.sqrt(largest_eva[i]) for i in range(len(largest_eva))] 185 | 186 | cov = sgRDD.map(lambda x: np.array([np.outer(x[:, i], x[:, i]) for i in range(x.shape[-1])])).sum() / dc.shape[0] 187 | col = cov.shape[-1] 188 | for i in range(cov.shape[0]): 189 | eva, eve = eigh(cov[i]) 190 | inds = np.argsort(eva) 191 | kernel = eve.T[inds[-1:-(col + 1):-1]] 192 | eva = eva[inds[-1:-(col + 1):-1]] 193 | kernel = np.concatenate((dc_kernel[i], kernel), axis=0)[:num_channels] 194 | eva = np.concatenate(([largest_eva[i]], eva), axis=0)[:num_channels] 195 | energy = np.array([i / sum(eva) for i in eva]) * pre_energy[i] 196 | num_node_next += [np.sum(energy > threshold)] 197 | kernels.append(kernel) 198 | energies.append(energy) 199 | kernels = np.array(kernels) 200 | energies = np.array(energies) 201 | return kernels, energies, num_node_next 202 | 203 | 204 | def pca(sgRDD): 205 | ''' 206 | :param sgRDD:(M*K, dim) 207 | :return: kernels (dim, dim) 208 | :return: energy (dim) 209 | ''' 210 | dc = np.array(sgRDD.map(lambda x: np.mean(x)).collect()) 211 | sgRDD = sgRDD.map(lambda x: x - np.mean(x)) 212 | fe = np.squeeze(sgRDD.map(lambda x: (1, (x, 1))).reduceByKey(lambda x, y: (x[0] + y[0], x[1] + y[1])) 213 | .map(lambda x: x[1][0]/float(x[1][1])).collect()) 214 | sgRDD = sgRDD.map(lambda x: x - fe) 215 | 216 | num_channels = fe.shape[0] 217 | largest_eva = [np.var(dc) * num_channels] 218 | dc_kernel = 1 / np.sqrt(num_channels) * np.ones((1, num_channels)) / np.sqrt(largest_eva) 219 | 220 | cov = sgRDD.map(lambda x: np.outer(x, x)).sum()/dc.shape[0] 221 | col = cov.shape[1] 222 | eva, eve = eigh(cov) 223 | inds = np.argsort(eva) 224 | kernels = eve.T[inds[-1:-(col + 1):-1]] 225 | eva = eva[inds[-1:-(col + 1):-1]] 226 | kernels = np.concatenate((dc_kernel, kernels), axis=0)[:num_channels] 227 | eva = np.concatenate((largest_eva, eva), axis=0)[:num_channels] 228 | energy = np.array([i / sum(eva) for i in eva]) 229 | return kernels, energy 230 | 231 | 232 | def extract(feat): 233 | ''' 234 | Do feature extraction based on the provided feature. 235 | :param feat: [num_layer, num_samples, num_points, feature_dimension] 236 | :return: feature 237 | ''' 238 | mean = [] 239 | maxi = [] 240 | l1 = [] 241 | l2 = [] 242 | 243 | for i in range(len(feat)): 244 | mean.append(feat[i].mean(axis=1, keepdims=False)) 245 | maxi.append(feat[i].max(axis=1, keepdims=False)) 246 | l1.append(np.linalg.norm(feat[i], ord=1, axis=1, keepdims=False)) 247 | l2.append(np.linalg.norm(feat[i], ord=2, axis=1, keepdims=False)) 248 | mean = np.concatenate(mean, axis=-1) 249 | maxi = np.concatenate(maxi, axis=-1) 250 | l1 = np.concatenate(l1, axis=-1) 251 | l2 = np.concatenate(l2, axis=-1) 252 | 253 | return [mean, maxi, l1, l2] 254 | 255 | 256 | def extract_single(feat): 257 | ''' 258 | Do feature extraction based on the provided feature. 259 | :param feat: [num_samples, num_points, feature_dimension] 260 | :return: feature 261 | ''' 262 | feature = [] 263 | feature.append(feat.mean(axis=1, keepdims=False)) 264 | feature.append(feat.max(axis=1, keepdims=False)) 265 | feature.append(np.linalg.norm(feat, ord=1, axis=1, keepdims=False)) 266 | feature.append(np.linalg.norm(feat, ord=2, axis=1, keepdims=False)) 267 | feature = np.concatenate(feature, axis=-1) 268 | return feature 269 | 270 | 271 | def average_acc(label, pred_label): 272 | classes = np.arange(40) 273 | acc = np.zeros(len(classes)) 274 | for i in range(len(classes)): 275 | ind = np.where(label == classes[i])[0] 276 | pred_test_special = pred_label[ind] 277 | acc[i] = len(np.where(pred_test_special == classes[i])[0]) / float(len(ind)) 278 | return acc 279 | 280 | 281 | def onehot_encoding(n_class, labels): 282 | targets = labels.reshape(-1) 283 | one_hot_targets = np.eye(n_class)[targets] 284 | return one_hot_targets 285 | 286 | 287 | def llsr_train(feature, label, num_class): 288 | A = np.ones((feature.shape[0], 1)) 289 | feature = np.concatenate((A, feature), axis=1) 290 | if num_class is not None: 291 | y = onehot_encoding(num_class, label) 292 | else: 293 | y = label 294 | weight = np.matmul(LA.pinv(feature), y) 295 | return weight 296 | 297 | 298 | def llsr_train_weighted(feature, label, num_class, epsilon): 299 | w = np.zeros((label.shape[0], label.shape[0])) 300 | f = [] 301 | for i in range(num_class): 302 | idx = np.where(label == i)[0] 303 | f.append(1/(float(len(idx))/label.shape[0] + epsilon)) 304 | for i in range(feature.shape[0]): 305 | w[i, i] = f[label[i][0]] 306 | 307 | A = np.ones((feature.shape[0], 1)) 308 | feature = np.concatenate((A, feature), axis=1) 309 | if num_class is not None: 310 | y = onehot_encoding(num_class, label) 311 | else: 312 | y = label 313 | weight = np.matmul(LA.pinv(np.matmul(w, feature)), np.matmul(w, y)) 314 | return weight 315 | 316 | 317 | def llsr_pred(feature, weight): 318 | A = np.ones((feature.shape[0], 1)) 319 | feature = np.concatenate((A, feature), axis=1) 320 | feature = np.matmul(feature, weight) 321 | pred = np.argmax(feature, axis=1) 322 | return feature, pred 323 | 324 | 325 | def rf_classifier(feat, y): 326 | ''' 327 | Train random forest based on the provided feature. 328 | :param feat: [num_samples, feature_dimension] 329 | :param y: label provided 330 | :return: classifer 331 | ''' 332 | clf = ensemble.RandomForestClassifier(n_estimators=128, bootstrap=False, 333 | n_jobs=-1) 334 | clf.fit(feat, y) 335 | return clf 336 | 337 | 338 | if __name__ == '__main__': 339 | time_start = time.time() 340 | config = SparkConf().setAll( 341 | [('spark.driver.memory', '4g'), 342 | ('spark.executor.memory', '4g'), 343 | ('spark.driver.maxResultSize', '2g')]).setAppName('PCSEG').setMaster('local[*]') 344 | sc = SparkContext(conf=config) 345 | sc.setLogLevel("ERROR") 346 | 347 | train_data, train_label = modelnet_data.data_load(1024, os.path.join(BASE_DIR, 'modelnet40_ply_hdf5_2048'), True) 348 | test_data, test_label = modelnet_data.data_load(1024, os.path.join(BASE_DIR, 'modelnet40_ply_hdf5_2048'), False) 349 | train_data = train_data[:200] 350 | train_label = train_label[:200] 351 | test_data = test_data[:200] 352 | test_label = test_label[:200] 353 | print('Train data loaded!') 354 | 355 | pointRDD = sc.parallelize(train_data, 5) 356 | fpsRDD = pointRDD.map(lambda x: fps(x, 128)) 357 | knnRDD = fpsRDD.zip(pointRDD).map(lambda x: knn(x[0], x[1], 64)) 358 | sgRDD = pointRDD.zip(knnRDD).flatMap(lambda x: sg(x[0], x[0], x[1])) 359 | kernels, energy = pca(sgRDD) 360 | pca_fea = np.array(sgRDD.map(lambda x: np.dot(x, kernels[:5].T)).collect()) 361 | pca_fea = pca_fea.reshape((train_data.shape[0], 128, -1)) 362 | print('PointHop Unit Finish! Feature shape: ', pca_fea.shape) 363 | 364 | sc.stop() 365 | time_end = time.time() 366 | print('Duration:', time_end - time_start) 367 | 368 | --------------------------------------------------------------------------------