├── Data_generation ├── MSR_Point_cloud_frames_generation(2048-FPS512_24).py └── NTU_Point_cloud_frames_generation(2048-FPS512_20).py ├── MSR_Net ├── MSR-Action3D(深度.mat).zip ├── channelattention │ ├── Channelattention.py │ ├── __init__.py │ └── __pycache__ │ │ ├── Channelattention.cpython-39.pyc │ │ └── __init__.cpython-39.pyc ├── dataset │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ └── dataset.cpython-39.pyc │ └── dataset.py ├── model │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ └── model.cpython-39.pyc │ └── model.py ├── positionencoding │ ├── Positionencoding.py │ ├── __init__.py │ └── __pycache__ │ │ ├── Positionencoding.cpython-39.pyc │ │ └── __init__.cpython-39.pyc ├── test.py ├── train.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-39.pyc │ └── utils_3d.cpython-39.pyc │ └── utils_3d.py ├── NTU60_Net ├── channelattention │ ├── Channelattention.py │ ├── __init__.py │ └── __pycache__ │ │ ├── Channelattention.cpython-39.pyc │ │ └── __init__.cpython-39.pyc ├── dataset │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ └── dataset.cpython-39.pyc │ └── dataset.py ├── model │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ └── model.cpython-39.pyc │ └── model.py ├── positionencoding │ ├── Positionencoding.py │ ├── __init__.py │ └── __pycache__ │ │ ├── Positionencoding.cpython-39.pyc │ │ └── __init__.cpython-39.pyc ├── test.py ├── train.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-39.pyc │ └── utils_3d.cpython-39.pyc │ └── utils_3d.py └── README.txt /Data_generation/MSR_Point_cloud_frames_generation(2048-FPS512_24).py: -------------------------------------------------------------------------------- 1 | import os 2 | import tqdm 3 | import imageio 4 | import numpy as np 5 | import time 6 | import random 7 | import math 8 | import scipy.io as sio 9 | from PIL import Image# 10 | ''' 11 | due to the ntu120 full depth maps data is not avialable, the action proposal is used by skeleton-based action proposal. 12 | ''' 13 | 14 | 15 | fx = 260.0-20 16 | fy = 240 17 | cx = (20.0+260)/2 18 | cy = (0+240)/2 19 | 20 | 21 | SAMPLE_NUM = 2048 22 | fps_sample_num=512 23 | K = 24 # max frame limit for temporal rank 24 | sample_num_level1=512 25 | 26 | save_path = 'C:\\Users\\Administrator\\Desktop\\LX\\paper\\dataset\\Prosessed_dataset\\01_MSR3D\\T' 27 | 28 | try: 29 | os.makedirs(save_path) 30 | except OSError: 31 | pass 32 | 33 | 34 | 35 | def main(): 36 | data_path='C:\\Users\\Administrator\\Desktop\\LX\\paper\\dataset\\Raw_dataset\\01_MSR3D\\MSR-Action3D(深度.mat)' 37 | files= os.listdir(data_path) 38 | for file in files: # 39 | if(file[-3:]=='mat'): 40 | if(1==1):#从(a20_s06_e02_sdepth.mat) 41 | action=int(file[1:3]) 42 | people=int(file[5:7]) 43 | order=int(file[9:11]) 44 | # 45 | if(action<10): 46 | sign_a='A00' 47 | else: 48 | sign_a='A0' 49 | 50 | if(people<10): 51 | sign_s='P00' 52 | else: 53 | sign_s='P0' 54 | 55 | if(order<10): 56 | sign_e='R00' 57 | else: 58 | sign_e='R0' 59 | # 60 | filename='S001C001'+sign_s+str(people)+sign_e+str(order)+sign_a+str(action)+'.npy'#文件 61 | print(filename) 62 | if(1==1):# 63 | data=sio.loadmat(data_path+"\\"+file) 64 | depth=data['depth'] 65 | h, w ,f = depth.shape 66 | if(1==1):#lx 67 | ## ------ select a fixed number K of images 68 | # 69 | n_frame = f# 70 | all_sam = np.arange(n_frame) 71 | if(1==0):# 72 | if n_frame > K: 73 | frame_index = random.sample(list(all_sam),K) 74 | #frame_index = np.array(frame_index) 75 | n_frame = K 76 | else: 77 | if n_frame < K/2: 78 | frame_index = random.sample(list(all_sam),K-n_frame-n_frame) 79 | frame_index.extend(list(all_sam)) 80 | frame_index.extend(list(all_sam)) 81 | n_frame = K 82 | else: 83 | frame_index = random.sample(list(all_sam),K-n_frame) 84 | frame_index.extend(list(all_sam)) 85 | n_frame = K 86 | frame_index = all_sam.tolist() 87 | if(1==0):# 88 | if n_frame > K: 89 | frame_index=[] 90 | for jj in range(K): 91 | iii = int(np.random.randint(int(n_frame*jj/K), int(n_frame*(jj+1)/K)), size=1) 92 | frame_index.append(iii) 93 | n_frame=K 94 | else: 95 | # frame_index = all_sam.tolist() 96 | frame_index = random.sample(list(all_sam),K-n_frame) 97 | frame_index.extend(list(all_sam)) 98 | n_frame = K 99 | if(1==1):# 100 | if n_frame > K: 101 | frame_index=[] 102 | for jj in range(K): 103 | iii = int((int(n_frame*jj/K)+int(n_frame*(jj+1)/K))/2) 104 | frame_index.append(iii) 105 | n_frame=K 106 | else: 107 | # frame_index = all_sam.tolist() 108 | frame_index = random.sample(list(all_sam),K-n_frame) 109 | frame_index.extend(list(all_sam)) 110 | n_frame = K 111 | frame_index.sort() 112 | print(f,K,frame_index) 113 | if(1==1):# 114 | all_frame_points_list = [] 115 | depth_Kframe=depth[:,:,frame_index] 116 | # print(depth_Kframe.shape) 117 | for i in range(n_frame): 118 | # print(depth_Kframe[:,:,i].shape) 119 | # if(i==0): 120 | # Image.fromarray(depth_Kframe[:,:,i]).show() 121 | cloud_im = depth_to_pointcloud(depth_Kframe[:,:,i]) 122 | # print(cloud_im.shape)#(3, 5356) 123 | #lx#print(type(cloud_im))# 124 | all_frame_points_list.append(cloud_im) #all frame points in 1 list 125 | #lx#print(all_frame_points_list.shape) 126 | if(1==1):# 127 | all_frame_3Dpoints_array = np.zeros(shape =[n_frame, SAMPLE_NUM, 3]) 128 | for i in range(n_frame): 129 | each_frame_points=all_frame_points_list[i].T#n*3 130 | # print('000',each_frame_points.shape) 131 | 132 | if len(each_frame_points)< SAMPLE_NUM:#lx# 133 | if len(each_frame_points)< SAMPLE_NUM/2: 134 | if len(each_frame_points)< SAMPLE_NUM/4: 135 | # print('01') 136 | rand_points_index = np.random.randint(0, each_frame_points.shape[0], size=SAMPLE_NUM-len(each_frame_points)-len(each_frame_points)-len(each_frame_points)-len(each_frame_points)) 137 | # print(len(rand_points_index)) 138 | each_frame_points = np.concatenate((each_frame_points, each_frame_points,each_frame_points, each_frame_points,each_frame_points[rand_points_index,:]), axis = 0) 139 | # print(each_frame_points.shape) 140 | else: 141 | 142 | # print('11') 143 | rand_points_index = np.random.randint(0, each_frame_points.shape[0], size=SAMPLE_NUM-len(each_frame_points)-len(each_frame_points)) 144 | each_frame_points = np.concatenate((each_frame_points, each_frame_points,each_frame_points[rand_points_index,:]), axis = 0) 145 | 146 | else: 147 | rand_points_index = np.random.randint(0, each_frame_points.shape[0], size=SAMPLE_NUM-len(each_frame_points)) 148 | each_frame_points = np.concatenate((each_frame_points, each_frame_points[rand_points_index,:]), axis = 0) 149 | # print('22') 150 | else:#lx# 151 | rand_points_index = np.random.randint(0, each_frame_points.shape[0], size=SAMPLE_NUM) 152 | each_frame_points = each_frame_points[rand_points_index,:] 153 | # print('33') 154 | # print(each_frame_points.shape) 155 | # all_frame_4Dpoints_array[i]=each_frame_points 156 | if(1==1):#PFS 157 | sampled_idx_l1 = farthest_point_sampling_fast(each_frame_points, sample_num_level1) 158 | # print(len(sampled_idx_l1)) 159 | other_idx = np.setdiff1d(np.arange(SAMPLE_NUM), sampled_idx_l1.ravel()) 160 | # print(len(other_idx)) 161 | new_idx = np.concatenate((sampled_idx_l1.ravel(), other_idx)) 162 | # print(len(new_idx)) 163 | each_frame_points = each_frame_points[new_idx,:] 164 | all_frame_3Dpoints_array[i]=each_frame_points #i:2048*3 165 | # break 166 | if(1==1):# 167 | max_x = all_frame_3Dpoints_array[:,:,0].max() 168 | max_y = all_frame_3Dpoints_array[:,:,1].max() 169 | max_z = all_frame_3Dpoints_array[:,:,2].max() 170 | min_x = all_frame_3Dpoints_array[:,:,0].min() 171 | min_y = all_frame_3Dpoints_array[:,:,1].min() 172 | min_z = all_frame_3Dpoints_array[:,:,2].min() 173 | 174 | x_len = max_x - min_x 175 | y_len = max_y - min_y 176 | z_len = max_z - min_z 177 | 178 | x_center = (max_x + min_x)/2 179 | y_center = (max_y + min_y)/2 180 | z_center = (max_z + min_z)/2 181 | 182 | all_frame_3Dpoints_array[:,:,0]=(all_frame_3Dpoints_array[:,:,0]-x_center)/y_len 183 | all_frame_3Dpoints_array[:,:,1]=(all_frame_3Dpoints_array[:,:,1]-y_center)/y_len 184 | all_frame_3Dpoints_array[:,:,2]=(all_frame_3Dpoints_array[:,:,2]-z_center)/y_len 185 | 186 | save_npy(all_frame_3Dpoints_array, filename) 187 | # print(all_frame_3Dpoints_array.shape) 188 | # print(all_frame_3Dpoints_array[0]) 189 | # print(all_frame_4Dpoints_array[1]) 190 | # break 191 | def save_npy(data, filename): 192 | file = os.path.join(save_path, filename) 193 | if not os.path.isfile(file): 194 | np.save(file, data) 195 | 196 | def farthest_point_sampling_fast(pc, sample_num): 197 | pc_num = pc.shape[0] 198 | #lx#print(pc_num) 199 | sample_idx = np.zeros(shape = [sample_num,1], dtype = np.int32) 200 | sample_idx[0] = np.random.randint(0,pc_num) 201 | #lx#print(sample_idx.shape) 202 | #lx#print(sample_idx[0]) 203 | #lx#print(pc[sample_idx[0],:]) 204 | cur_sample = np.tile(pc[sample_idx[0],:], (pc_num,1)) 205 | #lx#print(cur_sample.shape)#lx#(2048, 3) 206 | diff = pc-cur_sample 207 | #lx#print(diff.shape)#lx#(2048, 3) 208 | 209 | min_dist = (diff*diff).sum(axis = 1)# 210 | 211 | #lx#print(min_dist.shape)#lx#(2048,) 212 | #lx#print(min_dist.reshape(pc_num,1).shape)#lx#(2048,1) 213 | for cur_sample_idx in range(1,sample_num):#lx#sample_num=512 214 | ## find the farthest point 215 | 216 | sample_idx[cur_sample_idx] = np.argmax(min_dist) 217 | # print(sample_idx[cur_sample_idx]) 218 | if cur_sample_idx < sample_num-1: 219 | diff = pc - np.tile(pc[sample_idx[cur_sample_idx],:], (pc_num,1)) 220 | min_dist = np.concatenate((min_dist.reshape(pc_num,1), (diff*diff).sum(axis = 1).reshape(pc_num,1)), axis = 1).min(axis = 1) ##? 221 | #print(min_dist) 222 | return sample_idx# 223 | 224 | def load_depth_from_img(depth_path): 225 | depth_im = imageio.imread(depth_path) #im is a numpy array 226 | return depth_im 227 | 228 | def depth_to_pointcloud(depth_im):# 229 | # fx = 2 230 | # fy = 1 231 | # cx = 0 232 | # cy = 0 233 | #lx#例子输入np.array([[0,2,3],[6,5,3]]) 234 | rows,cols = depth_im.shape#lx#(424, 512) 235 | #lx#print(depth_im.shape)#lx#(2, 3) 236 | xx,yy = np.meshgrid(range(0,cols), range(0,rows)) 237 | #lx#print(xx)#lx#[[0 1 2],[0 1 2]] 238 | #lx#print(yy)#lx#[[0 0 0],[1 1 1]] 239 | valid = depth_im > 0 240 | #lx#print(valid)#lx#[[False True True],[ True True True]] 241 | xx = xx[valid]#lx# 242 | yy = yy[valid]#lx# 243 | #lx#print(xx)#lx#[1 2 0 1 2] 244 | #lx#print(yy)#lx#[0 0 1 1 1] 245 | depth_im = depth_im[valid] 246 | #lx#print(depth_im)#lx#[2 3 6 5 3] 247 | X = (xx - cx) * depth_im / fx# 248 | Y = (yy - cy) * depth_im / fy# 249 | #lx#print(X)#lx#[ -6. -6. -24. -15. -6. ] 250 | #lx#print(Y)#lx#[ -5. -7.5 -12. -10. -6. ] 251 | #lx#print((2-4)*8/1)#lx#-6.0 252 | Z = depth_im 253 | points3d = np.array([X.flatten(), Y.flatten(), Z.flatten()]) 254 | #lx#print(points3d)#lx# 255 | #lx#[[ -6. -6. -24. -15. -6. ] 256 | #lx# [ -5. -7.5 -12. -10. -6. ] 257 | #lx# [ 2. 3. 6. 5. 3. ]] 258 | return points3d 259 | 260 | if __name__ == '__main__': 261 | main() 262 | 263 | -------------------------------------------------------------------------------- /Data_generation/NTU_Point_cloud_frames_generation(2048-FPS512_20).py: -------------------------------------------------------------------------------- 1 | import os 2 | import tqdm 3 | import imageio 4 | import numpy as np 5 | import time 6 | import random 7 | import math 8 | import scipy.io as sio 9 | from PIL import Image# 10 | 11 | fx = 365.481 12 | fy = 365.481 13 | cx = 257.346 14 | cy = 210.347 15 | 16 | 17 | SAMPLE_NUM = 2048 18 | fps_sample_num=512 19 | K = 20 # max frame limit for temporal rank 20 | 21 | save_path = 'C:\\Users\\Administrator\\Desktop\\LX\\paper\\dataset\\Prosessed_dataset\\4DV(去地面)\\T120' 22 | 23 | try: 24 | os.makedirs(save_path) 25 | except OSError: 26 | pass 27 | 28 | def main(): 29 | data_path = 'C:\\Users\\Administrator\\Desktop\\LX\\paper\\dataset\\Raw_dataset\\ntu60dataset' 30 | sub_Files = os.listdir(data_path) 31 | sub_Files.sort() 32 | index17=0 33 | 34 | for s_fileName in sub_Files:#lx#Traverse 17 settings folders 35 | index17=index17+1 36 | if(index17>0): 37 | videoPath = os.path.join(data_path, s_fileName, 'nturgb+d_depth_masked') 38 | if os.path.isdir(videoPath):#lx#Determine whether it is a directory 39 | print(s_fileName) 40 | video_Files = os.listdir(videoPath) 41 | #print(video_Files) 42 | video_Files.sort() 43 | video_index=0 44 | video_num=len(video_Files) 45 | print(time.time()) 46 | for video_FileName in video_Files:#lx#Traverse the sample folder 47 | video_index=video_index+1 48 | # if(video_num*3/4 K: 69 | frame_index = random.sample(list(all_sam),K) 70 | #frame_index = np.array(frame_index) 71 | n_frame = K 72 | else: 73 | if n_frame < K/2: 74 | frame_index = random.sample(list(all_sam),K-n_frame-n_frame) 75 | frame_index.extend(list(all_sam)) 76 | frame_index.extend(list(all_sam)) 77 | n_frame = K 78 | else: 79 | frame_index = random.sample(list(all_sam),K-n_frame) 80 | frame_index.extend(list(all_sam)) 81 | n_frame = K 82 | frame_index = all_sam.tolist() 83 | if(1==0):#Randomly sample frames at equal intervals 84 | if n_frame > K: 85 | frame_index=[] 86 | for jj in range(K): 87 | iii = int(np.random.randint(int(n_frame*jj/K), int(n_frame*(jj+1)/K)), size=1) 88 | frame_index.append(iii) 89 | n_frame=K 90 | else: 91 | # frame_index = all_sam.tolist() 92 | frame_index = random.sample(list(all_sam),K-n_frame) 93 | frame_index.extend(list(all_sam)) 94 | n_frame = K 95 | if(1==1):#Sampling frames at equal intervals 96 | if n_frame > K: 97 | frame_index=[] 98 | for jj in range(K): 99 | iii = int((int(n_frame*jj/K)+int(n_frame*(jj+1)/K))/2) 100 | frame_index.append(iii) 101 | n_frame=K 102 | else: 103 | # frame_index = all_sam.tolist() 104 | frame_index = random.sample(list(all_sam),K-n_frame) 105 | frame_index.extend(list(all_sam)) 106 | n_frame = K 107 | frame_index.sort() 108 | # print(frame_index) 109 | 110 | ### ------convert the depth sequence to points data 111 | # print(n_frame,K) 112 | if(1==1):#Read the point cloud of each frame and sample 113 | all_frame_3Dpoints_array = np.zeros(shape =[n_frame, fps_sample_num, 3]) 114 | i=0 115 | for i_frame in frame_index:#lx#Traverse frames 116 | depthName = imgNames[i_frame] 117 | img_path = os.path.join(pngPath,depthName) 118 | depth_im = load_depth_from_img(img_path) 119 | #lx#print(depth_im.shape)#(424, 512) 120 | each_frame_points = depth_to_pointcloud(depth_im).T 121 | #lx#print(cloud_im.shape)#(3, 5356) 122 | #lx#print(type(cloud_im))# 123 | if(1==1):#Randomly sample 2048 points 124 | if len(each_frame_points)< SAMPLE_NUM:#lx#If the number of points is less than 2048, it will be repeated randomly to 2048 125 | if len(each_frame_points)< SAMPLE_NUM/2: 126 | if len(each_frame_points)< SAMPLE_NUM/4: 127 | # print('01') 128 | rand_points_index = np.random.randint(0, each_frame_points.shape[0], size=SAMPLE_NUM-len(each_frame_points)-len(each_frame_points)-len(each_frame_points)-len(each_frame_points)) 129 | # print(len(rand_points_index)) 130 | each_frame_points = np.concatenate((each_frame_points, each_frame_points,each_frame_points, each_frame_points,each_frame_points[rand_points_index,:]), axis = 0) 131 | # print(each_frame_points.shape) 132 | else: 133 | rand_points_index = np.random.randint(0, each_frame_points.shape[0], size=SAMPLE_NUM-len(each_frame_points)-len(each_frame_points)) 134 | each_frame_points = np.concatenate((each_frame_points, each_frame_points,each_frame_points[rand_points_index,:]), axis = 0) 135 | 136 | else: 137 | rand_points_index = np.random.randint(0, each_frame_points.shape[0], size=SAMPLE_NUM-len(each_frame_points)) 138 | each_frame_points = np.concatenate((each_frame_points, each_frame_points[rand_points_index,:]), axis = 0) 139 | # print('22') 140 | else:#lx#If it exceeds 2048, randomly sample 2048 points 141 | rand_points_index = np.random.randint(0, each_frame_points.shape[0], size=SAMPLE_NUM) 142 | each_frame_points = each_frame_points[rand_points_index,:] 143 | # print('33') 144 | # print(each_frame_points.shape) 145 | # all_frame_4Dpoints_array[i]=each_frame_points 146 | if(1==1):#PFS 147 | sampled_idx_l1 = farthest_point_sampling_fast(each_frame_points, fps_sample_num) 148 | # print(len(sampled_idx_l1)) 149 | other_idx = np.setdiff1d(np.arange(SAMPLE_NUM), sampled_idx_l1.ravel()) 150 | # print(len(other_idx)) 151 | new_idx = np.concatenate((sampled_idx_l1.ravel(), other_idx)) 152 | # print(len(new_idx)) 153 | each_frame_points = each_frame_points[new_idx,:] 154 | all_frame_3Dpoints_array[i]=each_frame_points[:fps_sample_num,:] #i:512*3 155 | i=i+1 156 | if(1==1):#Normalized 157 | max_x = all_frame_3Dpoints_array[:,:,0].max() 158 | max_y = all_frame_3Dpoints_array[:,:,1].max() 159 | max_z = all_frame_3Dpoints_array[:,:,2].max() 160 | min_x = all_frame_3Dpoints_array[:,:,0].min() 161 | min_y = all_frame_3Dpoints_array[:,:,1].min() 162 | min_z = all_frame_3Dpoints_array[:,:,2].min() 163 | 164 | x_len = max_x - min_x 165 | y_len = max_y - min_y 166 | z_len = max_z - min_z 167 | 168 | x_center = (max_x + min_x)/2 169 | y_center = (max_y + min_y)/2 170 | z_center = (max_z + min_z)/2 171 | 172 | all_frame_3Dpoints_array[:,:,0]=(all_frame_3Dpoints_array[:,:,0]-x_center)/y_len 173 | all_frame_3Dpoints_array[:,:,1]=(all_frame_3Dpoints_array[:,:,1]-y_center)/y_len 174 | all_frame_3Dpoints_array[:,:,2]=(all_frame_3Dpoints_array[:,:,2]-z_center)/y_len 175 | if(video_index==100): 176 | print(time.time()) 177 | # save_npy(all_frame_3Dpoints_array, filename) 178 | 179 | 180 | 181 | def save_npy(data, filename): 182 | file = os.path.join(save_path, filename) 183 | if not os.path.isfile(file): 184 | np.save(file, data) 185 | 186 | def farthest_point_sampling_fast(pc, sample_num): 187 | pc_num = pc.shape[0] 188 | #lx#print(pc_num) 189 | sample_idx = np.zeros(shape = [sample_num,1], dtype = np.int32) 190 | sample_idx[0] = np.random.randint(0,pc_num) 191 | #lx#print(sample_idx.shape) 192 | #lx#print(sample_idx[0]) 193 | #lx#print(pc[sample_idx[0],:]) 194 | cur_sample = np.tile(pc[sample_idx[0],:], (pc_num,1)) 195 | #lx#print(cur_sample.shape)#lx#(2048, 3) 196 | diff = pc-cur_sample 197 | #lx#print(diff.shape)#lx#(2048, 3) 198 | #lx#print((diff*diff).shape)#lx#(2048, 3)(diff*diff) 199 | min_dist = (diff*diff).sum(axis = 1)#lx# 200 | 201 | #lx#print(min_dist.shape)#lx#(2048,) 202 | #lx#print(min_dist.reshape(pc_num,1).shape)#lx#(2048,1) 203 | for cur_sample_idx in range(1,sample_num):#lx#sample_num=512 204 | ## find the farthest point 205 | 206 | sample_idx[cur_sample_idx] = np.argmax(min_dist) 207 | # print(sample_idx[cur_sample_idx]) 208 | if cur_sample_idx < sample_num-1: 209 | diff = pc - np.tile(pc[sample_idx[cur_sample_idx],:], (pc_num,1)) 210 | min_dist = np.concatenate((min_dist.reshape(pc_num,1), (diff*diff).sum(axis = 1).reshape(pc_num,1)), axis = 1).min(axis = 1) ##? 211 | #print(min_dist) 212 | return sample_idx#lx# 213 | 214 | def load_depth_from_img(depth_path): 215 | depth_im = imageio.imread(depth_path) #im is a numpy array 216 | return depth_im 217 | 218 | def depth_to_pointcloud(depth_im):#lx# 219 | # fx = 2 220 | # fy = 1 221 | # cx = 0 222 | # cy = 0 223 | #lx#np.array([[0,2,3],[6,5,3]]) 224 | rows,cols = depth_im.shape#lx#(424, 512) 225 | #lx#print(depth_im.shape)#lx#(2, 3) 226 | xx,yy = np.meshgrid(range(0,cols), range(0,rows)) 227 | #lx#print(xx)#lx#[[0 1 2],[0 1 2]] 228 | #lx#print(yy)#lx#[[0 0 0],[1 1 1]] 229 | valid = depth_im > 0 230 | #lx#print(valid)#lx#[[False True True],[ True True True]] 231 | xx = xx[valid]#lx# 232 | yy = yy[valid]#lx# 233 | #lx#print(xx)#lx#[1 2 0 1 2] 234 | #lx#print(yy)#lx#[0 0 1 1 1] 235 | depth_im = depth_im[valid] 236 | #lx#print(depth_im)#lx#[2 3 6 5 3] 237 | X = (xx - cx) * depth_im / fx#LX# 238 | Y = (yy - cy) * depth_im / fy#LX# 239 | #lx#print(X)#lx#[ -6. -6. -24. -15. -6. ] 240 | #lx#print(Y)#lx#[ -5. -7.5 -12. -10. -6. ] 241 | #lx#print((2-4)*8/1)#lx#-6.0 242 | Z = depth_im 243 | points3d = np.array([X.flatten(), Y.flatten(), Z.flatten()]) 244 | #lx#print(points3d)#lx# 245 | #lx#[[ -6. -6. -24. -15. -6. ] 246 | #lx# [ -5. -7.5 -12. -10. -6. ] 247 | #lx# [ 2. 3. 6. 5. 3. ]] 248 | return points3d 249 | 250 | if __name__ == '__main__': 251 | main() 252 | 253 | -------------------------------------------------------------------------------- /MSR_Net/MSR-Action3D(深度.mat).zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/MSR_Net/MSR-Action3D(深度.mat).zip -------------------------------------------------------------------------------- /MSR_Net/channelattention/Channelattention.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | try: 4 | from torch.hub import load_state_dict_from_url 5 | except ImportError: 6 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 7 | import torch 8 | 9 | class ChannelAttention(nn.Module): 10 | def __init__(self, in_planes, ratio=16): 11 | super(ChannelAttention, self).__init__() 12 | self.avg_pool = nn.AdaptiveAvgPool2d(1)#B*C*H*W->B*C*1*1.....->B*C*1*1 13 | self.max_pool = nn.AdaptiveMaxPool2d(1) 14 | # print(in_planes,in_planes // 2) 15 | self.fc1 = nn.Conv2d(in_planes, in_planes // 16, 1, bias=False) 16 | self.relu1 = nn.ReLU() 17 | self.fc2 = nn.Conv2d(in_planes // 16, in_planes, 1, bias=False) 18 | 19 | self.sigmoid = nn.Sigmoid() 20 | 21 | def forward(self, x): 22 | 23 | # print('x:',x.shape) 24 | # print('self.avg_pool(x):',self.avg_pool(x).shape) 25 | # print('elf.fc1(self.avg_pool(x)):',self.fc1(self.avg_pool(x)).shape) 26 | avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) 27 | # print('avg_out:',avg_out.shape) 28 | max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) 29 | out = avg_out + max_out 30 | return self.sigmoid(out) 31 | class ChannelAttention0(nn.Module): 32 | def __init__(self, in_planes, ratio=16): 33 | super(ChannelAttention0, self).__init__() 34 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 35 | self.max_pool = nn.AdaptiveMaxPool2d(1) 36 | # print(in_planes,in_planes // 2) 37 | self.fc1 = nn.Conv2d(in_planes, in_planes, 1, bias=False) 38 | self.relu1 = nn.ReLU() 39 | self.fc2 = nn.Conv2d(in_planes, in_planes, 1, bias=False) 40 | 41 | self.sigmoid = nn.Sigmoid() 42 | 43 | def forward(self, x): 44 | 45 | # print('x:',x.shape) 46 | # print('self.avg_pool(x):',self.avg_pool(x).shape) 47 | # print('elf.fc1(self.avg_pool(x)):',self.fc1(self.avg_pool(x)).shape) 48 | avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) 49 | # print('avg_out:',avg_out.shape) 50 | max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) 51 | out = avg_out + max_out 52 | return self.sigmoid(out) -------------------------------------------------------------------------------- /MSR_Net/channelattention/__init__.py: -------------------------------------------------------------------------------- 1 | from .Channelattention import * -------------------------------------------------------------------------------- /MSR_Net/channelattention/__pycache__/Channelattention.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/MSR_Net/channelattention/__pycache__/Channelattention.cpython-39.pyc -------------------------------------------------------------------------------- /MSR_Net/channelattention/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/MSR_Net/channelattention/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /MSR_Net/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import NTU_RGBD -------------------------------------------------------------------------------- /MSR_Net/dataset/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/MSR_Net/dataset/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /MSR_Net/dataset/__pycache__/dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/MSR_Net/dataset/__pycache__/dataset.cpython-39.pyc -------------------------------------------------------------------------------- /MSR_Net/dataset/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tqdm 3 | import torch 4 | import re 5 | import collections 6 | import imageio 7 | import random 8 | 9 | from tqdm import tqdm 10 | from torch.utils.data import Dataset 11 | 12 | import pandas as pd 13 | import numpy as np 14 | import scipy.io as sio 15 | 16 | 17 | #rose@ntu.edu.sg 18 | sample_num_level1 = 512 19 | sample_num_level2 = 128 20 | 21 | 22 | TRAIN_IDS = [1,3,5,7,9] 23 | TEST_IDS= [2,4,6,8,10] 24 | TRAIN_VALID_IDS=([],[]) 25 | compiled_regex = re.compile('.*S(\d{3})C(\d{3})P(\d{3})R(\d{3})A(\d{3}).*') 26 | # SAMPLE_NUM = 2048 27 | 28 | class NTU_RGBD(Dataset): 29 | """NTU depth human masked datasets""" 30 | def __init__(self, root_path, opt, 31 | full_train = True, 32 | test=False, 33 | validation=False, 34 | DATA_CROSS_VIEW = True, 35 | Transform = True): 36 | 37 | self.DATA_CROSS_VIEW = DATA_CROSS_VIEW 38 | self.root_path = root_path 39 | self.SAMPLE_NUM = opt.SAMPLE_NUM 40 | self.INPUT_FEATURE_NUM = opt.INPUT_FEATURE_NUM 41 | self.EACH_FRAME_SAMPLE_NUM = opt.EACH_FRAME_SAMPLE_NUM 42 | self.T_sample_num_level1 = opt.T_sample_num_level1 43 | 44 | self.all_framenum = opt.all_framenum 45 | self.framenum= opt.framenum 46 | self.transform = Transform 47 | # self.depth_path = opt.depth_path 48 | 49 | self.point_vids = os.listdir(self.root_path+'\\T')#.sort() 50 | self.point_vids.sort() 51 | #print(self.point_vids) 52 | self.TRAIN_IDS = TRAIN_IDS 53 | self.TEST_IDS = TEST_IDS 54 | 55 | self.num_clouds = len(self.point_vids) 56 | print(self.num_clouds) 57 | self.point_data = self.load_data() 58 | 59 | self.set_splits() 60 | 61 | self.id_to_action = list(pd.DataFrame(self.point_data)['action'] - 1) 62 | self.id_to_vidName = list(pd.DataFrame(self.point_data)['video_cloud_name']) 63 | 64 | self.train = (test == False) and (validation == False) 65 | if DATA_CROSS_VIEW == False: 66 | if test: self.vid_ids = self.test_split_subject.copy() 67 | elif validation: self.vid_ids = self.validation_split_subject.copy() 68 | elif full_train: self.vid_ids = self.train_split_subject.copy() 69 | else: self.vid_ids = self.train_split_subject_with_validation.copy() 70 | else: 71 | if test: self.vid_ids = self.test_split_camera.copy() 72 | else: self.vid_ids = self.train_split_camera.copy() 73 | 74 | print('num_data:',len(self.vid_ids)) 75 | 76 | 77 | # self.SAMPLE_NUM = opt.SAMPLE_NUM 78 | # self.INPUT_FEATURE_NUM = opt.INPUT_FEATURE_NUM 79 | 80 | # self.point_clouds = np.empty(shape=[self.SAMPLE_NUM, self.INPUT_FEATURE_NUM],dtype=np.float32) 81 | 82 | def __getitem__(self, idx): 83 | vid_id = self.vid_ids[idx] 84 | vid_name = self.id_to_vidName[vid_id] 85 | S_idx = vid_name[1:4] 86 | #print(vid_name) 87 | v_name = vid_name[:-4] 88 | 89 | ## 4DV-T motion point data 90 | path_T=self.root_path+'\\T' 91 | path_cloud_npy_T = os.path.join(path_T,self.id_to_vidName[vid_id]) 92 | 93 | all_sam = np.arange(self.all_framenum) 94 | 95 | if(1==1):# 96 | frame_index=[] 97 | for jj in range(self.framenum): 98 | iii =int(np.random.randint(int(self.all_framenum*jj/self.framenum), int(self.all_framenum*(jj+1)/self.framenum))) 99 | frame_index.append(iii) 100 | if(1==0):# 101 | frame_index = random.sample(list(all_sam),self.framenum) 102 | points4DV_T= np.load(path_cloud_npy_T)[frame_index,0:self.EACH_FRAME_SAMPLE_NUM,:self.INPUT_FEATURE_NUM]#60*512*4 103 | ## 4DV-S motion point data 104 | # path_S=self.root_path+'\\S' 105 | # path_cloud_npy_S = os.path.join(path_S,self.id_to_vidName[vid_id]) 106 | # matlab data(.mat) OR python data(.npy) 107 | # XYZ_C = sio.loadmat(path_cloud_npy) 108 | #print(self.id_to_vidName[vid_id]) 109 | # points_c = XYZ_C['pc'].astype(np.float32) 110 | #print(path_cloud_npy) 111 | # points4DV_S= np.load(path_cloud_npy_S)[:,0:4]#2048*4 112 | # points4DV_S = np.expand_dims(points4DV_S, axis=0)#1*2048*4 113 | #print(points_c.shape, points_2048_f.shape) 114 | label = self.id_to_action[vid_id] 115 | 116 | # random angle rotate for data augment 117 | theta = np.random.rand()*1.4-0.7 118 | 119 | if self.transform: 120 | ## point data augment 121 | points4DV_T = self.point_transform(points4DV_T,theta) 122 | points4DV_T = torch.tensor(points4DV_T,dtype=torch.float) 123 | # points4DV_S = torch.tensor(points4DV_S,dtype=torch.float) 124 | label = torch.tensor(label) 125 | return points4DV_T,label,vid_name 126 | 127 | def __len__(self): 128 | return len(self.vid_ids) 129 | 130 | 131 | def load_data(self): 132 | self.point_data = [] 133 | for cloud_idx in tqdm(range(self.num_clouds), "Getting video info"): 134 | self.point_data.append(self.get_pointdata(cloud_idx)) 135 | 136 | return self.point_data 137 | 138 | def get_pointdata(self, vid_id): 139 | 140 | vid_name = self.point_vids[vid_id] 141 | match = re.match(compiled_regex, vid_name) 142 | setup, camera, performer, replication, action = [*map(int, match.groups())] 143 | return { 144 | 'video_cloud_name': vid_name, 145 | 'video_index': vid_id, 146 | 'video_set': (setup, camera), 147 | 'setup': setup, 148 | 'camera': camera, 149 | 'performer': performer, 150 | 'replication': replication, 151 | 'action': action, 152 | } 153 | 154 | 155 | def set_splits(self): 156 | ''' 157 | Sets the train/test splits 158 | Cross-Subject Evaluation: 159 | Train ids = 1, 2, 4, 5, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 27, 160 | 28, 31, 34, 35, 38 161 | Cross-View Evaluation: 162 | Train camera views: 2, 3 163 | ''' 164 | # Save the dataset as a dataframe 165 | dataset = pd.DataFrame(self.point_data) 166 | 167 | # Get the train split ids 168 | train_ids_camera = [2, 3] 169 | 170 | # Cross-Subject splits 171 | self.train_split_subject = list( 172 | dataset[dataset.performer.isin(self.TRAIN_IDS)]['video_index']) 173 | self.train_split_subject_with_validation = list( 174 | dataset[dataset.performer.isin(TRAIN_VALID_IDS[0])]['video_index']) 175 | self.validation_split_subject = list( 176 | dataset[dataset.performer.isin(TRAIN_VALID_IDS[1])]['video_index']) 177 | self.test_split_subject = list( 178 | dataset[dataset.performer.isin(self.TEST_IDS)]['video_index']) 179 | 180 | # Cross-View splits 181 | self.train_split_camera = list( 182 | dataset[dataset.camera.isin(train_ids_camera)]['video_index']) 183 | self.test_split_camera = list( 184 | dataset[~dataset.camera.isin(train_ids_camera)]['video_index']) 185 | 186 | 187 | def point_transform(self,points_xyz,y): 188 | 189 | anglesX = (np.random.uniform()-0.5) * (1/9) * np.pi 190 | R_y = np.array([[[np.cos(y),0.0,np.sin(y)], 191 | [0.0,1.0,0.0], 192 | [-np.sin(y),0.0,np.cos(y)]]]) 193 | R_x = np.array([[[1, 0, 0], 194 | [0, np.cos(anglesX), -np.sin(anglesX)], 195 | [0, np.sin(anglesX), np.cos(anglesX)]]]) 196 | #print(R_y.shape) 197 | 198 | # points_c[:,:,0:3] = self.jitter_point_cloud(points_c[:,:,0:3],sigma=0.007, clip=0.04) 199 | points_xyz[:,:,0:3] = self.jitter_point_cloud(points_xyz[:,:,0:3],sigma=0.007, clip=0.04)# 200 | 201 | # points_c[:,-1536:,:] = self.random_dropout_point_cloud(points_c[:,-1536:,:]) 202 | points_xyz[:,-(self.EACH_FRAME_SAMPLE_NUM-self.T_sample_num_level1):,:] = self.random_dropout_point_cloud(points_xyz[:,-(self.EACH_FRAME_SAMPLE_NUM-self.T_sample_num_level1):,:]) 203 | 204 | R = np.matmul(R_y, R_x) 205 | 206 | # points_c[:,:,0:3] = np.matmul(points_c[:,:,0:3],R) 207 | points_xyz[:,:,0:3] = np.matmul(points_xyz[:,:,0:3],R) 208 | 209 | #if np.random.rand()>0.6: 210 | # for i in range(3): 211 | # points_c[:,i] = points_c[:,i]+(np.random.rand()-0.5)/6 212 | # points_xyz[:,i] = points_xyz[:,i]+(np.random.rand()-0.5)/6 213 | 214 | #print(points.shape) 215 | return points_xyz 216 | 217 | 218 | # def load_depth_from_img(self,depth_path): 219 | # depth_im = imageio.imread(depth_path) #im is a numpy array 220 | # return depth_im 221 | 222 | 223 | def jitter_point_cloud(self, data, sigma=0.01, clip=0.05): 224 | """ 225 | 226 | :param data: Nx3 array 227 | :return: jittered_data: Nx3 array 228 | """ 229 | M,N, C = data.shape 230 | # print(np.random.randn(M, N, C))# 231 | jittered_data = np.clip(sigma * np.random.randn(M, N, C), -1 * clip, clip).astype(np.float32)# 232 | 233 | jittered_data = data+jittered_data 234 | 235 | return jittered_data 236 | 237 | def random_dropout_point_cloud(self, data): 238 | """ 239 | :param data: Nx3 array 240 | :return: dropout_data: Nx3 array 241 | """ 242 | M, N, C = data.shape##60*300*4 243 | dropout_ratio = 0.7+ np.random.random()/2#n 244 | #dropout_ratio = np.random.random() * p 245 | drop_idx = np.where(np.random.random(N) <= dropout_ratio)[0] 246 | dropout_data = np.zeros_like(data) 247 | if len(drop_idx) > 0: 248 | dropout_data[:, drop_idx, :] = data[:, drop_idx, :] 249 | 250 | 251 | # xyz_center = np.random.random(3) 252 | # xyz_d = 0.1+np.random.random(3)/10 253 | 254 | # func_x = lambda d: d>xyz_center[0] and d<(xyz_center[0]+xyz_d[0]) 255 | # func_y = lambda d: d>xyz_center[1] and d<(xyz_center[1]+xyz_d[1]) 256 | # func_z = lambda d: d>xyz_center[2] and d<(xyz_center[2]+xyz_d[2]) 257 | # c_x = np.vectorize(func_x)(data[:,0]) 258 | # c_y = np.vectorize(func_x)(data[:,0]) 259 | # c_z = np.vectorize(func_x)(data[:,0]) 260 | # c = c_x*c_z*c_y 261 | # erase_index = np.where(c) 262 | # dropout_data[erase_index,:] =0 263 | return dropout_data 264 | -------------------------------------------------------------------------------- /MSR_Net/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import PointNet_Plus -------------------------------------------------------------------------------- /MSR_Net/model/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/MSR_Net/model/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /MSR_Net/model/__pycache__/model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/MSR_Net/model/__pycache__/model.cpython-39.pyc -------------------------------------------------------------------------------- /MSR_Net/model/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from utils import group_points_4DV_T_S,group_points_4DV_T_S2 8 | from channelattention import ChannelAttention,ChannelAttention0 9 | from positionencoding import get_positional_encoding 10 | nstates_plus_1 = [64,64,128] 11 | nstates_plus_2 = [128,128,256] 12 | nstates_plus_3 = [256,512,1024,1024,256] 13 | 14 | S_nstates_plus_1 = [64,64,128] 15 | S_nstates_plus_2 = [128,128,256] 16 | T_nstates_plus_2 = [256,512,1024] 17 | T_nstates_plus_3 = [1024] 18 | vlad_dim_out = 128*8 19 | 20 | dim_out=1024 21 | 22 | class PointNet_Plus(nn.Module): 23 | def __init__(self,opt,num_clusters=8,gost=1,dim=128,normalize_input=True): 24 | super(PointNet_Plus, self).__init__() 25 | self.temperal_num = opt.temperal_num 26 | self.knn_K = opt.knn_K 27 | self.ball_radius2 = opt.ball_radius2 28 | self.sample_num_level1 = opt.sample_num_level1 29 | self.sample_num_level2 = opt.sample_num_level2 30 | self.INPUT_FEATURE_NUM = opt.INPUT_FEATURE_NUM # x,y,x,c : 4 31 | self.num_outputs = opt.Num_Class 32 | ####SAMPLE_NUM 33 | self.Seg_size = opt.Seg_size 34 | self.stride=opt.stride 35 | self.EACH_FRAME_SAMPLE_NUM=opt.EACH_FRAME_SAMPLE_NUM 36 | self.T_knn_K = opt.T_knn_K 37 | self.T_knn_K2= opt.T_knn_K2 38 | self.T_sample_num_level1 = opt.T_sample_num_level1 39 | self.T_sample_num_level2 = opt.T_sample_num_level2 40 | self.framenum=opt.framenum 41 | self.T_group_num=int((self.framenum-self.Seg_size)/self.stride)+1 42 | 43 | self.opt=opt 44 | self.dim=dim 45 | 46 | self.normalize_input=normalize_input 47 | 48 | self.pooling = opt.pooling 49 | #self._init_params() 50 | 51 | 52 | self.netR_T_S1 = nn.Sequential( 53 | # B*INPUT_FEATURE_NUM*sample_num_level1*knn_K 54 | nn.Conv2d(self.INPUT_FEATURE_NUM+1, S_nstates_plus_1[0], kernel_size=(1, 1)), 55 | nn.BatchNorm2d(S_nstates_plus_1[0]), 56 | nn.ReLU(inplace=True), 57 | # B*64*sample_num_level1*knn_K 58 | nn.Conv2d(S_nstates_plus_1[0], S_nstates_plus_1[1], kernel_size=(1, 1)), 59 | nn.BatchNorm2d(S_nstates_plus_1[1]), 60 | nn.ReLU(inplace=True), 61 | # B*64*sample_num_level1*knn_K 62 | nn.Conv2d(S_nstates_plus_1[1], S_nstates_plus_1[2], kernel_size=(1, 1)), 63 | nn.BatchNorm2d(S_nstates_plus_1[2]), 64 | nn.ReLU(inplace=True), 65 | # B*128*sample_num_level1*knn_K 66 | nn.MaxPool2d((1,self.T_knn_K),stride=1) 67 | ) 68 | self.ca_S2 = ChannelAttention(self.INPUT_FEATURE_NUM+1+S_nstates_plus_1[2]) 69 | self.netR_T_S2 = nn.Sequential( 70 | # B*INPUT_FEATURE_NUM*sample_num_level1*knn_K 71 | nn.Conv2d(self.INPUT_FEATURE_NUM+1+S_nstates_plus_1[2], S_nstates_plus_2[0], kernel_size=(1, 1)), 72 | nn.BatchNorm2d(S_nstates_plus_2[0]), 73 | nn.ReLU(inplace=True), 74 | # B*64*sample_num_level1*knn_K 75 | nn.Conv2d(S_nstates_plus_2[0], S_nstates_plus_2[1], kernel_size=(1, 1)), 76 | nn.BatchNorm2d(S_nstates_plus_2[1]), 77 | nn.ReLU(inplace=True), 78 | # B*64*sample_num_level1*knn_K 79 | nn.Conv2d(S_nstates_plus_2[1], S_nstates_plus_2[2], kernel_size=(1, 1)), 80 | nn.BatchNorm2d(S_nstates_plus_2[2]), 81 | nn.ReLU(inplace=True), 82 | # B*128*sample_num_level1*knn_K 83 | nn.MaxPool2d((1,self.T_knn_K2),stride=1) 84 | ) 85 | self.ca_T1 = ChannelAttention(self.INPUT_FEATURE_NUM+S_nstates_plus_2[2]) 86 | self.net4DV_T1 = nn.Sequential( 87 | # B*INPUT_FEATURE_NUM*sample_num_level1*knn_K(B*10*28*2048) 88 | nn.Conv2d(self.INPUT_FEATURE_NUM+S_nstates_plus_2[2], T_nstates_plus_2[0], kernel_size=(1, 1)),#10->64 89 | nn.BatchNorm2d(T_nstates_plus_2[0]), 90 | nn.ReLU(inplace=True), 91 | # B*64*sample_num_level1*knn_K 92 | nn.Conv2d(T_nstates_plus_2[0], T_nstates_plus_2[1], kernel_size=(1, 1)),#64->64 93 | nn.BatchNorm2d(T_nstates_plus_2[1]), 94 | nn.ReLU(inplace=True), 95 | # B*64*sample_num_level1*knn_K 96 | nn.Conv2d(T_nstates_plus_2[1], T_nstates_plus_2[2], kernel_size=(1, 1)),#64->128 97 | nn.BatchNorm2d(T_nstates_plus_2[2]), 98 | nn.ReLU(inplace=True), 99 | # B*128*sample_num_level1*knn_K 100 | # nn.Conv2d(T_nstates_plus_2[2], T_nstates_plus_2[3], kernel_size=(1, 1)),#64->128 101 | # nn.BatchNorm2d(T_nstates_plus_2[3]), 102 | # nn.ReLU(inplace=True), 103 | nn.MaxPool2d((1,self.T_sample_num_level2*self.Seg_size),stride=1)#1*(t*512)#B*C*G*1 104 | ) 105 | self.net4DV_T2 = nn.Sequential( 106 | # B*259*sample_num_level2*1 107 | nn.Conv2d(T_nstates_plus_2[2], T_nstates_plus_3[0], kernel_size=(1, 1)), 108 | nn.BatchNorm2d(T_nstates_plus_3[0]), 109 | nn.ReLU(inplace=True), 110 | # B*256*sample_num_level2*1 111 | # nn.Conv2d(T_nstates_plus_3[0], T_nstates_plus_3[1], kernel_size=(1, 1)), 112 | # nn.BatchNorm2d(T_nstates_plus_3[1]), 113 | # nn.ReLU(inplace=True), 114 | # B*512*sample_num_level2*1 115 | # nn.Conv2d(T_nstates_plus_3[1], T_nstates_plus_3[2], kernel_size=(1, 1)), 116 | # nn.BatchNorm2d(T_nstates_plus_3[2]), 117 | # nn.ReLU(inplace=True), 118 | # B*1024*sample_num_level2*1 119 | # nn.MaxPool2d((self.T_group_num,1),stride=1), 120 | # B*1024*1*1 121 | ) 122 | 123 | KerStr=[(24,24),(12,12)] 124 | self.maxpoolings = nn.ModuleList([nn.MaxPool2d((K[0],1 ),(K[1],1)) for K in KerStr]) 125 | self.PE=get_positional_encoding(self.framenum,T_nstates_plus_2[2]) 126 | 127 | self.netR_FC = nn.Sequential( 128 | # B*1024 129 | #nn.Linear(nstates_plus_3[2], nstates_plus_3[3]), 130 | #nn.BatchNorm1d(nstates_plus_3[3]), 131 | #nn.ReLU(inplace=True), 132 | # B*1024 133 | nn.Linear(dim_out*4+256, nstates_plus_3[4]), 134 | nn.BatchNorm1d(nstates_plus_3[4]), 135 | nn.ReLU(inplace=True), 136 | # B*512 137 | nn.Linear(nstates_plus_3[4], self.num_outputs), 138 | nn.BatchNorm1d(self.num_outputs), 139 | nn.ReLU(inplace=True), 140 | # B*num_outputs 141 | ) 142 | 143 | def forward(self, xt, yt): 144 | 145 | 146 | B,f,d,N,k = xt.shape#B*F*4*Cen*K 147 | # print('xt:',xt.shape) 148 | # print('yt:',yt.shape) 149 | yt=yt.view(B*f,yt.size(2), self.opt.T_sample_num_level1, 1)#(B*F)*4*Cen*1 150 | xt=xt.view(B*f,d, self.opt.T_sample_num_level1, k)#(B*F)*4+1*Cen*K 151 | # print('xtt:',xt.shape) 152 | # xt = self.ca_S1(xt) * xt 153 | xt = self.netR_T_S1(xt)#(B*F)*128*Cen*1 154 | # print('xttt:',xt.shape) 155 | xt = torch.cat((yt, xt),1).squeeze(-1)#(B*F)*(4+128)*Cen 156 | xt=xt.view(B,f,xt.size(1), self.opt.T_sample_num_level1).transpose(2,3)#(B*F)*(4+128)*Cen->B*F*Cen1*(4+128) 157 | # print('xtttt:',xt.shape) 158 | S_inputs_level2,inputs_level1_center_s2 =group_points_4DV_T_S2(xt,self.opt)##B*F*5+128*Cen2*K2 B*F*4*Cen2*1 159 | # print('S_inputs_level2:',S_inputs_level2.shape) 160 | B2,f2,d2,N2,k2 = S_inputs_level2.shape#B*F*4*Cen*K 161 | inputs_level1_center_s2=inputs_level1_center_s2.view(B2*f2,inputs_level1_center_s2.size(2), self.opt.T_sample_num_level2, 1)#(B*F)*4*C2en*1 162 | S_inputs_level2=S_inputs_level2.view(B2*f2,d2, self.opt.T_sample_num_level2, k2)#(B*F)*5+128*C2en*K2 163 | S_inputs_level2 = self.ca_S2(S_inputs_level2) * S_inputs_level2 164 | xt = self.netR_T_S2(S_inputs_level2)#(B*F)*128*Cen2*1 165 | 166 | ###res s2 167 | xt_resS2=xt.squeeze(-1).view(B,f,xt.size(1), self.opt.T_sample_num_level2).transpose(1,2)#B*256*F*Cen2 168 | xt_resS2=F.max_pool2d(xt_resS2,kernel_size=(f,self.opt.T_sample_num_level2)).squeeze(-1).squeeze(-1)#B*256 169 | 170 | # print('xxt:',xt.shape) 171 | xt = torch.cat((inputs_level1_center_s2, xt),1).squeeze(-1)#(B*F)*4+128*Cen2 172 | xt =xt.view(-1,self.framenum,xt.size(1),self.opt.T_sample_num_level2).transpose(2,3)##(B*F)*(4+128)*C2en-》B*F*(4+128)*C2en->B*F*C2en*(4+128) 173 | 174 | 175 | # print(-1,self.framenum,4+128,self.opt.T_sample_num_level1,xt.shape) 176 | # print('xtttt:',xt.shape) 177 | T_inputs_level2 =xt.transpose(1,3).transpose(2,3) 178 | T_inputs_level2 = self.ca_T1(T_inputs_level2) * T_inputs_level2 179 | xt = self.net4DV_T1(T_inputs_level2)# 180 | 181 | ###resT1 182 | xt_resT1=F.max_pool2d(xt,kernel_size=(f,1)).squeeze(-1).squeeze(-1)#B*256 183 | # 184 | xt=xt.squeeze(-1)+self.PE.transpose(0,1) 185 | xt=xt.unsqueeze(-1) 186 | ### 187 | 188 | 189 | # xt = torch.cat((inputs_level1_center_t, xt),1) 190 | # xt = self.ca_T2(xt) * xt 191 | xt = self.net4DV_T2(xt)# 192 | 193 | xt = [maxpooling(xt) for maxpooling in self.maxpoolings]#B*(2048)*[G]*1 194 | xt = torch.cat(xt,2).squeeze(-1) 195 | # print('xttttt:',xt.shape) 196 | # print(xt.size(0),xt.size(1)*xt.size(2)) 197 | xt = xt.contiguous().view(xt.size(0),-1) 198 | 199 | ####### 200 | xt = torch.cat((xt, xt_resS2,xt_resT1),1) 201 | # print('xttt:',xt.shape) 202 | x = self.netR_FC(xt) 203 | # print('x:',x.shape) 204 | 205 | return x 206 | 207 | 208 | 209 | -------------------------------------------------------------------------------- /MSR_Net/positionencoding/Positionencoding.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | def get_positional_encoding(max_seq_len, embed_dim): 5 | 6 | positional_encoding=torch.zeros(max_seq_len, embed_dim).cuda() 7 | for pos in range(max_seq_len): 8 | for i in range(embed_dim): 9 | if(i%2==0): 10 | positional_encoding[pos,i]=torch.sin(pos / torch.tensor(10000**(2 * i / embed_dim))) 11 | else: 12 | positional_encoding[pos,i]=torch.cos(pos / torch.tensor(10000**(2 * i /embed_dim))) 13 | return positional_encoding 14 | -------------------------------------------------------------------------------- /MSR_Net/positionencoding/__init__.py: -------------------------------------------------------------------------------- 1 | from .Positionencoding import * -------------------------------------------------------------------------------- /MSR_Net/positionencoding/__pycache__/Positionencoding.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/MSR_Net/positionencoding/__pycache__/Positionencoding.cpython-39.pyc -------------------------------------------------------------------------------- /MSR_Net/positionencoding/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/MSR_Net/positionencoding/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /MSR_Net/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import os 4 | import tqdm 5 | import shutil 6 | import collections 7 | import argparse 8 | import random 9 | import time 10 | #import gpu_utils as g 11 | import numpy as np 12 | 13 | from model import PointNet_Plus#,Attension_Point,TVLAD 14 | from dataset import NTU_RGBD 15 | from utils import group_points_4DV_T_S 16 | 17 | from PIL import Image 18 | from torch.utils.data import Dataset 19 | from torchvision import transforms 20 | from torch.utils.data import DataLoader 21 | from tqdm import tqdm 22 | import logging 23 | def main(args=None): 24 | parser = argparse.ArgumentParser(description = "Training") 25 | 26 | parser.add_argument('--batchSize', type=int, default=32, help='input batch size') 27 | parser.add_argument('--nepoch', type=int, default=150, help='number of epochs to train for') 28 | parser.add_argument('--INPUT_FEATURE_NUM', type=int, default = 3, help='number of input point features') 29 | parser.add_argument('--temperal_num', type=int, default = 3, help='number of input point features') 30 | parser.add_argument('--pooling', type=str, default='concatenation', help='how to aggregate temporal split features: vlad | concatenation | bilinear') 31 | parser.add_argument('--dataset', type=str, default='ntu60', help='how to aggregate temporal split features: ntu120 | ntu60') 32 | 33 | parser.add_argument('--weight_decay', type=float, default=0.0008, help='weight decay (SGD only)') 34 | parser.add_argument('--learning_rate', type=float, default=0.001, help='learning rate at t=0') 35 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum (SGD only)') 36 | parser.add_argument('--workers', type=int, default=0, help='number of data loading workers') 37 | 38 | parser.add_argument('--root_path', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\\paper\\dataset\\Prosessed_dataset\\01_3DV-Action-master(base-run-version)(FPS512)(单时序流4维特征)', help='preprocess folder') 39 | parser.add_argument('--depth_path', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\\paper\\dataset\\Prosessed_dataset\\3DV-Action-master\\ntu60dataset\\', help='raw_depth_png') 40 | #################改1############# 41 | # parser.add_argument('--save_root_dir', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\\paper\\code\\3DV-Action-master\\models\\ntu60\\xsub', help='output folder') 42 | parser.add_argument('--save_root_dir', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\\paper\\code\\Models_Parameter\\03_3DV-Action-master(base-run-version)(FPS512-64-K32)(2层局部+时序池化3层简单)(t=6_stride=2_KC=64)(单时序流4维特征)(单流读取)\\models\\ntu60\\xsub', help='output folder') 43 | parser.add_argument('--model', type=str, default = '', help='model name for training resume') 44 | parser.add_argument('--optimizer', type=str, default = '', help='optimizer name for training resume') 45 | 46 | parser.add_argument('--ngpu', type=int, default=1, help='# GPUs') 47 | parser.add_argument('--main_gpu', type=int, default=0, help='main GPU id') # CUDA_VISIBLE_DEVICES=0 python train.py 48 | 49 | ########时序 50 | parser.add_argument('--Seg_size', type=int, default =1, help='number of frame in seg') 51 | parser.add_argument('--stride', type=int, default = 1, help='stride of seg') 52 | parser.add_argument('--all_framenum', type=int, default = 30, help='number of action frame') 53 | parser.add_argument('--framenum', type=int, default = 20, help='number of action frame') 54 | parser.add_argument('--EACH_FRAME_SAMPLE_NUM', type=int, default = 512, help='number of sample points in each frame') 55 | parser.add_argument('--T_knn_K', type=int, default = 48, help='K for knn search of temperal stream') 56 | parser.add_argument('--T_knn_K2', type=int, default = 16, help='K for knn search of temperal stream') 57 | parser.add_argument('--T_sample_num_level1', type=int, default = 128, help='number of first layer groups') 58 | parser.add_argument('--T_sample_num_level2', type=int, default = 32, help='number of first layer groups') 59 | parser.add_argument('--T_ball_radius', type=float, default=0.2, help='square of radius for ball query of temperal stream') 60 | 61 | parser.add_argument('--learning_rate_decay', type=float, default=1e-7, help='learning rate decay') 62 | 63 | parser.add_argument('--size', type=str, default='full', help='how many samples do we load: small | full') 64 | parser.add_argument('--SAMPLE_NUM', type=int, default = 2048, help='number of sample points') 65 | 66 | parser.add_argument('--Num_Class', type=int, default = 60, help='number of outputs') 67 | parser.add_argument('--knn_K', type=int, default = 64, help='K for knn search') 68 | parser.add_argument('--sample_num_level1', type=int, default = 512, help='number of first layer groups') 69 | parser.add_argument('--sample_num_level2', type=int, default = 128, help='number of second layer groups') 70 | parser.add_argument('--ball_radius', type=float, default=0.1, help='square of radius for ball query in level 1')#0.025 -> 0.05 for detph 71 | parser.add_argument('--ball_radius2', type=float, default=0.2, help='square of radius for ball query in level 2')# 0.08 -> 0.01 for depth 72 | 73 | opt = parser.parse_args() 74 | print (opt) 75 | logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y/%m/%d %H:%M:%S', filename=os.path.join(opt.save_root_dir, '(单3dp)(去2)(2局+3简运)(位编)(cs2t1)(多池1212-66)(中2)T11F20-51212832k4816test.log'), level=logging.INFO) 76 | # torch.cuda.set_device(opt.main_gpu) 77 | 78 | opt.manualSeed = 1 79 | random.seed(opt.manualSeed) 80 | torch.manual_seed(opt.manualSeed) 81 | 82 | try: 83 | os.makedirs(opt.save_root_dir) 84 | except OSError: 85 | pass 86 | 87 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 88 | 89 | torch.backends.cudnn.benchmark = True 90 | #torch.backends.cudnn.deterministic = True 91 | torch.cuda.empty_cache() 92 | 93 | #################改2############# 94 | data_val = NTU_RGBD(root_path = opt.root_path, opt=opt, 95 | DATA_CROSS_VIEW = False, 96 | full_train = False, 97 | validation = False, 98 | test = True, 99 | Transform = False 100 | ) 101 | val_loader = DataLoader(dataset = data_val, batch_size = 8,num_workers = 8) 102 | 103 | #net = 104 | 105 | netR = PointNet_Plus(opt) 106 | #################改3############# 107 | netR.load_state_dict(torch.load("C:\\Users\\Administrator\\Desktop\\pointnet_para_89.pth")) 108 | 109 | netR = torch.nn.DataParallel(netR).cuda() 110 | netR.cuda() 111 | print(netR) 112 | 113 | 114 | # evaluate mode 115 | torch.cuda.synchronize() 116 | netR.eval() 117 | conf_mat = np.zeros([opt.Num_Class, opt.Num_Class]) 118 | conf_mat60 = np.zeros([60, 60]) 119 | acc = 0.0 120 | loss_sigma = 0.0 121 | 122 | with torch.no_grad(): 123 | for i, data in enumerate(tqdm(val_loader)): 124 | #print(i) 125 | torch.cuda.synchronize() 126 | group_time_start = time.time() 127 | points4DV_T,label,vid_name = data 128 | points4DV_T,label = points4DV_T.cuda(),label.cuda() 129 | # print('points4DV_T:',points4DV_T.shape) 130 | xt, yt = group_points_4DV_T_S(points4DV_T, opt)#B*F*4*Cen*K B*F*4*Cen*1 131 | # print('xt:',xt.shape) 132 | xt = xt.type(torch.FloatTensor) 133 | yt = yt.type(torch.FloatTensor) 134 | forward_time_start= time.time() 135 | prediction = netR(xt,yt) 136 | forward_time_end = time.time() 137 | 138 | 139 | print('forward time:',forward_time_end-forward_time_start) 140 | _, predicted60 = torch.max(prediction.data[:,0:60], 1) 141 | _, predicted = torch.max(prediction.data, 1) 142 | #print(prediction.data) 143 | 144 | for j in range(len(label)): 145 | cate_i = label[j].cpu().numpy() 146 | pre_i = predicted[j].cpu().numpy() 147 | if pre_i != cate_i: 148 | logging.info('Video Name:{} -- correct label {} predicted to {}'.format(vid_name[j],cate_i,pre_i)) 149 | conf_mat[cate_i, pre_i] += 1.0 150 | if cate_i<60: 151 | pre_i60 = predicted60[j].cpu().numpy() 152 | conf_mat60[cate_i, pre_i60] += 1.0 153 | 154 | print('NTU120:{:.2%} NTU60:{:.2%}--correct number {}--all number {}===Average loss:{:.6%}'.format(conf_mat.trace() / conf_mat.sum(),conf_mat60.trace() / conf_mat60.sum(),conf_mat60.trace(),conf_mat60.sum(),loss_sigma/(i+1)/2)) 155 | logging.info('{} --nepoch{} set Accuracy:{:.2%}--correct number {}--all number {}===Average loss:{}'.format('Valid', opt.nepoch, conf_mat.trace() / conf_mat.sum(),conf_mat60.trace(),conf_mat60.sum(), loss_sigma/(i+1))) 156 | 157 | #torch.save(netR.module.state_dict(), '%s/pointnet_para_%d.pth' % (opt.save_root_dir, epoch)) 158 | if __name__ == '__main__': 159 | main() 160 | 161 | -------------------------------------------------------------------------------- /MSR_Net/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import os 4 | import tqdm 5 | import shutil 6 | import collections 7 | import argparse 8 | import random 9 | import time 10 | #import gpu_utils as g 11 | import numpy as np 12 | 13 | from model import PointNet_Plus#,Attension_Point,TVLAD 14 | from dataset import NTU_RGBD 15 | from utils import group_points_4DV_T_S 16 | 17 | from PIL import Image 18 | from torch.utils.data import Dataset 19 | from torchvision import transforms 20 | from torch.utils.data import DataLoader 21 | from tqdm import tqdm 22 | import logging 23 | def main(args=None): 24 | parser = argparse.ArgumentParser(description = "Training") 25 | 26 | parser.add_argument('--batchSize', type=int, default=16, help='input batch size')#¥¥¥¥ 27 | parser.add_argument('--nepoch', type=int, default=150, help='number of epochs to train for') 28 | parser.add_argument('--INPUT_FEATURE_NUM', type=int, default = 3, help='number of input point features') 29 | parser.add_argument('--temperal_num', type=int, default = 3, help='number of input point features') 30 | parser.add_argument('--pooling', type=str, default='concatenation', help='how to aggregate temporal split features: vlad | concatenation | bilinear') 31 | parser.add_argument('--dataset', type=str, default='ntu60', help='how to aggregate temporal split features: ntu120 | ntu60') 32 | 33 | parser.add_argument('--weight_decay', type=float, default=0.0008, help='weight decay (SGD only)') 34 | parser.add_argument('--learning_rate', type=float, default=0.001, help='learning rate at t=0')#¥¥¥¥ 35 | parser.add_argument('--gamma', type=float, default=0.5, help='')#¥¥¥¥ 36 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum (SGD only)') 37 | parser.add_argument('--workers', type=int, default=0, help='number of data loading workers') 38 | 39 | parser.add_argument('--root_path', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\\paper\\dataset\\Prosessed_dataset\\01_MSR3D', help='preprocess folder') 40 | # parser.add_argument('--depth_path', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\paper\\dataset\\Prosessed_dataset\\01_MSR3D\\', help='raw_depth_png') 41 | ################ 42 | # parser.add_argument('--save_root_dir', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\\paper\\code\\3DV-Action-master\\models\\ntu60\\xsub', help='output folder') 43 | parser.add_argument('--save_root_dir', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\\paper\\code\\Models_Parameter\\03_3DV-Action-master(base-run-version)(FPS512-64-K32)(2层局部+时序池化3层简单)(t=6_stride=2_KC=64)(单时序流4维特征)(单流读取)\\models\\msr\\xsub', help='output folder') 44 | parser.add_argument('--model', type=str, default = '', help='model name for training resume') 45 | parser.add_argument('--optimizer', type=str, default = '', help='optimizer name for training resume') 46 | 47 | parser.add_argument('--ngpu', type=int, default=1, help='# GPUs') 48 | parser.add_argument('--main_gpu', type=int, default=0, help='main GPU id') # CUDA_VISIBLE_DEVICES=0 python train.py 49 | 50 | ######## 51 | parser.add_argument('--Seg_size', type=int, default =1, help='number of frame in seg') 52 | parser.add_argument('--stride', type=int, default = 1, help='stride of seg') 53 | parser.add_argument('--all_framenum', type=int, default = 24, help='number of action frame') 54 | parser.add_argument('--framenum', type=int, default = 24, help='number of action frame') 55 | parser.add_argument('--EACH_FRAME_SAMPLE_NUM', type=int, default = 512, help='number of sample points in each frame') 56 | parser.add_argument('--T_knn_K', type=int, default = 48, help='K for knn search of temperal stream') 57 | parser.add_argument('--T_knn_K2', type=int, default = 16, help='K for knn search of temperal stream') 58 | parser.add_argument('--T_sample_num_level1', type=int, default = 128, help='number of first layer groups') 59 | parser.add_argument('--T_sample_num_level2', type=int, default = 32, help='number of first layer groups') 60 | parser.add_argument('--T_ball_radius', type=float, default=0.2, help='square of radius for ball query of temperal stream') 61 | 62 | parser.add_argument('--learning_rate_decay', type=float, default=1e-7, help='learning rate decay') 63 | 64 | parser.add_argument('--size', type=str, default='full', help='how many samples do we load: small | full') 65 | parser.add_argument('--SAMPLE_NUM', type=int, default = 2048, help='number of sample points') 66 | 67 | parser.add_argument('--Num_Class', type=int, default = 20, help='number of outputs') 68 | parser.add_argument('--knn_K', type=int, default = 64, help='K for knn search') 69 | parser.add_argument('--sample_num_level1', type=int, default = 512, help='number of first layer groups') 70 | parser.add_argument('--sample_num_level2', type=int, default = 128, help='number of second layer groups') 71 | parser.add_argument('--ball_radius', type=float, default=0.1, help='square of radius for ball query in level 1')#0.025 -> 0.05 for detph 72 | parser.add_argument('--ball_radius2', type=float, default=0.2, help='square of radius for ball query in level 2')# 0.08 -> 0.01 for depth 73 | 74 | 75 | opt = parser.parse_args() 76 | print (opt) 77 | logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y/%m/%d %H:%M:%S', filename=os.path.join(opt.save_root_dir, 'train00.log'), level=logging.INFO) 78 | # torch.cuda.set_device(opt.main_gpu) 79 | 80 | opt.manualSeed = 1 81 | random.seed(opt.manualSeed) 82 | torch.manual_seed(opt.manualSeed) 83 | 84 | try: 85 | os.makedirs(opt.save_root_dir) 86 | except OSError: 87 | pass 88 | 89 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 90 | 91 | torch.backends.cudnn.benchmark = True 92 | #torch.backends.cudnn.deterministic = True 93 | torch.cuda.empty_cache() 94 | ############################## 95 | data_train = NTU_RGBD(root_path = opt.root_path,opt=opt, 96 | DATA_CROSS_VIEW = False, 97 | full_train = True, 98 | validation = False, 99 | test = False, 100 | Transform = True 101 | ) 102 | train_loader = DataLoader(dataset = data_train, batch_size = opt.batchSize, shuffle = True, drop_last = True,num_workers = 8) 103 | data_val = NTU_RGBD(root_path = opt.root_path, opt=opt, 104 | DATA_CROSS_VIEW = False, 105 | full_train = False, 106 | validation = False, 107 | test = True, 108 | Transform = False 109 | ) 110 | val_loader = DataLoader(dataset = data_val, batch_size = 24,num_workers = 8) 111 | 112 | netR = PointNet_Plus(opt) 113 | 114 | netR = torch.nn.DataParallel(netR).cuda() 115 | netR.cuda() 116 | print(netR) 117 | 118 | criterion = torch.nn.CrossEntropyLoss().cuda() 119 | optimizer = torch.optim.Adam(netR.parameters(), lr=opt.learning_rate, betas = (0.5, 0.999), eps=1e-06) 120 | scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=opt.gamma) 121 | 122 | for epoch in range(opt.nepoch): 123 | scheduler.step(epoch) 124 | 125 | # switch to train mode 126 | torch.cuda.synchronize() 127 | netR.train() 128 | acc = 0.0 129 | loss_sigma = 0.0 130 | total1 = 0.0 131 | timer = time.time() 132 | 133 | for i, data in enumerate(tqdm(train_loader, 0)): 134 | if len(data[0])==1: 135 | continue 136 | torch.cuda.synchronize() 137 | # 1 load imputs and target 138 | ## 3DV points and 3 temporal segment appearance points 139 | ## points_xyzc: B*2048*8;points_1xyz:B*2048*3 target: B*1 140 | points4DV_T,label,v_name = data 141 | points4DV_T,label = points4DV_T.cuda(),label.cuda() 142 | # print('points4DV_T:',points4DV_T.shape) 143 | xt, yt = group_points_4DV_T_S(points4DV_T, opt)#B*F*4*Cen*K B*F*4*Cen*1 144 | # print('xt:',xt.shape) 145 | xt = xt.type(torch.FloatTensor) 146 | yt = yt.type(torch.FloatTensor) 147 | 148 | prediction = netR(xt,yt) 149 | 150 | loss = criterion(prediction,label) 151 | optimizer.zero_grad() 152 | 153 | loss.backward() 154 | optimizer.step() 155 | torch.cuda.synchronize() 156 | # update training error 157 | loss_sigma += loss.item() 158 | #_, predicted60 = torch.max(prediction.data[:,0:60], 1) 159 | _, predicted = torch.max(prediction.data, 1) 160 | # print(predicted.data) 161 | acc += (predicted==label).cpu().sum().numpy() 162 | total1 += label.size(0) 163 | 164 | 165 | acc_avg = acc/total1 166 | loss_avg = loss_sigma/total1 167 | print('======>>>>> Online epoch: #%d, lr=%.10f,Acc=%f,correctnum=%f,allnum=%f,avg_loss=%f <<<<<======' %(epoch, scheduler.get_lr()[0],acc_avg,acc,total1,loss_avg)) 168 | print("Epoch: " + str(epoch) + " Iter: " + str(i) + " Acc: " + ("%.2f" % acc_avg) +" Classification Loss: " + str(loss_avg)) 169 | logging.info('======>>>>> Online epoch: #%d, lr=%.10f,Acc=%f,correctnum=%f,allnum=%f,avg_loss=%f <<<<<======' %(epoch, scheduler.get_lr()[0],acc_avg,acc,total1,loss_avg)) 170 | logging.info("Epoch: " + str(epoch) + " Iter: " + str(i) + " Acc: " + ("%.2f" % acc_avg) +" Classification Loss: " + str(loss_avg)) 171 | if ((epoch+1)%1==0 or epoch==opt.nepoch-1): 172 | # evaluate mode 173 | torch.cuda.synchronize() 174 | netR.eval() 175 | conf_mat = np.zeros([opt.Num_Class, opt.Num_Class]) 176 | conf_mat60 = np.zeros([20, 20]) 177 | acc = 0.0 178 | loss_sigma = 0.0 179 | 180 | with torch.no_grad(): 181 | for i, data in enumerate(tqdm(val_loader)): 182 | torch.cuda.synchronize() 183 | 184 | points4DV_T,label,v_name = data 185 | # print(v_name) 186 | points4DV_T,label = points4DV_T.cuda(),label.cuda() 187 | 188 | xt, yt = group_points_4DV_T_S(points4DV_T, opt)#(B*F)*4*Cen*K (B*F)*4*Cen*1 189 | 190 | xt = xt.type(torch.FloatTensor) 191 | yt = yt.type(torch.FloatTensor) 192 | 193 | prediction = netR(xt,yt) 194 | 195 | loss = criterion(prediction,label) 196 | # print(label,prediction) 197 | _, predicted60 = torch.max(prediction.data[:,0:20], 1) 198 | _, predicted = torch.max(prediction.data, 1) 199 | # print(predicted60.data) 200 | loss_sigma += loss.item() 201 | 202 | for j in range(len(label)): 203 | cate_i = label[j].cpu().numpy() 204 | pre_i = predicted[j].cpu().numpy() 205 | conf_mat[cate_i, pre_i] += 1.0 206 | 207 | if cate_i<20: 208 | pre_i60 = predicted60[j].cpu().numpy() 209 | conf_mat60[cate_i, pre_i60] += 1.0 210 | # print(conf_mat) 211 | print('MSR120:{:.2%} MSR60:{:.2%}--correct number {}--all number {}===Average loss:{:.6%}'.format(conf_mat.trace() / conf_mat.sum(),conf_mat60.trace() / conf_mat60.sum(),conf_mat60.trace(),conf_mat60.sum(),loss_sigma/(i+1)/2)) 212 | logging.info('#################{} --epoch{} set Accuracy:{:.2%}--correct number {}--all number {}===Average loss:{}'.format('Valid', epoch, conf_mat.trace() / conf_mat.sum(),conf_mat60.trace(),conf_mat60.sum(), loss_sigma/(i+1))) 213 | 214 | torch.save(netR.module.state_dict(), '%s/pointnet_para_%d.pth' % (opt.save_root_dir, epoch)) 215 | if __name__ == '__main__': 216 | main() 217 | 218 | -------------------------------------------------------------------------------- /MSR_Net/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils_3d import * -------------------------------------------------------------------------------- /MSR_Net/utils/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/MSR_Net/utils/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /MSR_Net/utils/__pycache__/utils_3d.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/MSR_Net/utils/__pycache__/utils_3d.cpython-39.pyc -------------------------------------------------------------------------------- /MSR_Net/utils/utils_3d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import pdb 5 | def group_points_4DV_T_S(points, opt): 6 | #B*F*512*4 7 | T_ball_radius = torch.tensor(0.06) 8 | cur_train_size = points.shape[0]# 9 | INPUT_FEATURE_NUM = points.shape[-1]#3 10 | 11 | points = points.view(cur_train_size*opt.framenum, opt.EACH_FRAME_SAMPLE_NUM, -1)#(B*F)*512*4 12 | # print('1points:',points.shape,cur_train_size,opt.framenum, opt.EACH_FRAME_SAMPLE_NUM, -1) 13 | inputs1_diff = points[:,:,0:3].transpose(1,2).unsqueeze(1).expand(cur_train_size*opt.framenum,opt.T_sample_num_level1,3,opt.EACH_FRAME_SAMPLE_NUM) \ 14 | - points[:,0:opt.T_sample_num_level1,0:3].unsqueeze(-1).expand(cur_train_size*opt.framenum,opt.T_sample_num_level1,3,opt.EACH_FRAME_SAMPLE_NUM)# (B*F )* 64 * 3 * 512 15 | inputs1_diff = torch.mul(inputs1_diff, inputs1_diff) # B * 512 * 3 * 1024 16 | inputs1_diff = inputs1_diff.sum(2) # B * 512 * 1024 distance 17 | dists, inputs1_idx = torch.topk(inputs1_diff, opt.T_knn_K, 2, largest=False, sorted=False) # dists: B * 512 * 32; inputs1_idx: B * 512 * 32 18 | 19 | # ball query 20 | invalid_map = dists.gt(T_ball_radius) # B * 512 * 64 value: binary 21 | 22 | for jj in range(opt.T_sample_num_level1): 23 | inputs1_idx[:,jj,:][invalid_map[:,jj,:]] = jj 24 | 25 | idx_group_l1_long = inputs1_idx.view(cur_train_size*opt.framenum,opt.T_sample_num_level1*opt.T_knn_K,1).expand(cur_train_size*opt.framenum,opt.T_sample_num_level1*opt.T_knn_K,INPUT_FEATURE_NUM) 26 | 27 | inputs_level1 = points.gather(1,idx_group_l1_long).view(cur_train_size*opt.framenum,opt.T_sample_num_level1,opt.T_knn_K,INPUT_FEATURE_NUM) # (B*F)*64*32*4 28 | 29 | inputs_level1_center = points[:,0:opt.T_sample_num_level1,0:INPUT_FEATURE_NUM ].unsqueeze(2) # (B*F)*64*1*4 30 | inputs_level1[:,:,:,0:3] = inputs_level1[:,:,:,0:3] - inputs_level1_center[:,:,:,0:3].expand(cur_train_size*opt.framenum,opt.T_sample_num_level1,opt.T_knn_K,3)# (B*F)*64*32*3 31 | if(1==1): 32 | dis_l=torch.mul(inputs_level1[:,:,:,0:3], inputs_level1[:,:,:,0:3]) 33 | 34 | dis_l=dis_l.sum(3).unsqueeze(3)#lx# 35 | 36 | inputs_level1 = torch.cat((inputs_level1,dis_l),3).unsqueeze(1).transpose(1,4).squeeze(4) # (B*F)*4*64*32 37 | 38 | inputs_level1_center = inputs_level1_center.contiguous().view(cur_train_size,opt.framenum,opt.T_sample_num_level1,1,INPUT_FEATURE_NUM).transpose(2,3).transpose(2,4) # (B*F)*4*64*1 39 | FEATURE_NUM = inputs_level1.shape[-3]#4 40 | inputs_level1=inputs_level1.view(cur_train_size,opt.framenum,FEATURE_NUM, opt.T_sample_num_level1, opt.T_knn_K)#B*F*4*Cen*K 41 | return inputs_level1, inputs_level1_center 42 | def group_points_4DV_T_S2(points, opt): 43 | #B*F*Cen1*(3+128) 44 | T_ball_radius = torch.tensor(0.11) 45 | cur_train_size = points.shape[0]# 46 | INPUT_FEATURE_NUM = points.shape[-1]#4 47 | 48 | points = points.view(cur_train_size*opt.framenum, opt.T_sample_num_level1, -1)#(B*F)*512*4 49 | # print('1points:',points.shape,cur_train_size,opt.framenum, opt.T_sample_num_level1, -1) 50 | inputs1_diff = points[:,:,0:3].transpose(1,2).unsqueeze(1).expand(cur_train_size*opt.framenum,opt.T_sample_num_level2,3,opt.T_sample_num_level1) \ 51 | - points[:,0:opt.T_sample_num_level2,0:3].unsqueeze(-1).expand(cur_train_size*opt.framenum,opt.T_sample_num_level2,3,opt.T_sample_num_level1)# (B*F )* 64 * 3 * 512 52 | inputs1_diff = torch.mul(inputs1_diff, inputs1_diff) # B * 512 * 3 * 1024 53 | inputs1_diff = inputs1_diff.sum(2) # B * 512 * 1024 distance 54 | # print('inputs1_diff:',inputs1_diff.shape) 55 | dists, inputs1_idx = torch.topk(inputs1_diff, opt.T_knn_K2, 2, largest=False, sorted=False) # dists: B * 512 * 32; inputs1_idx: B * 512 * 32 56 | # print('inputs1_idx:',inputs1_idx.shape) 57 | # ball query 58 | invalid_map = dists.gt(T_ball_radius) # B * 512 * 64 value: binary 59 | 60 | for jj in range(opt.T_sample_num_level2): 61 | inputs1_idx[:,jj,:][invalid_map[:,jj,:]] = jj 62 | 63 | idx_group_l1_long = inputs1_idx.view(cur_train_size*opt.framenum,opt.T_sample_num_level2*opt.T_knn_K2,1).expand(cur_train_size*opt.framenum,opt.T_sample_num_level2*opt.T_knn_K2,points.shape[-1]) 64 | # print('points:',points.shape) 65 | # print('pointsg:',points.gather(1,idx_group_l1_long).shape) 66 | # print(cur_train_size*opt.framenum,opt.T_sample_num_level2,opt.T_knn_K2,points.shape[-1]) 67 | inputs_level1 = points.gather(1,idx_group_l1_long).view(cur_train_size*opt.framenum,opt.T_sample_num_level2,opt.T_knn_K2,points.shape[-1]) # (B*F)*64*32*4 68 | 69 | inputs_level1_center = points[:,0:opt.T_sample_num_level2,0:opt.INPUT_FEATURE_NUM].unsqueeze(2) # (B*F)*64*1*4 70 | inputs_level1[:,:,:,0:3] = inputs_level1[:,:,:,0:3] - inputs_level1_center[:,:,:,0:3].expand(cur_train_size*opt.framenum,opt.T_sample_num_level2,opt.T_knn_K2,3)# (B*F)*64*32*3 71 | if(1==1): 72 | dis_l=torch.mul(inputs_level1[:,:,:,0:3], inputs_level1[:,:,:,0:3]) 73 | 74 | dis_l=dis_l.sum(3).unsqueeze(3)#lx# 75 | 76 | inputs_level1 = torch.cat((inputs_level1,dis_l),3).unsqueeze(1).transpose(1,4).squeeze(4) # (B*F)*4*C2en*32 77 | 78 | inputs_level1_center = inputs_level1_center.contiguous().view(cur_train_size,opt.framenum,opt.T_sample_num_level2,1,opt.INPUT_FEATURE_NUM).transpose(2,3).transpose(2,4) # (B*F)*4*64*1 79 | FEATURE_NUM = inputs_level1.shape[-3]#4 80 | inputs_level1=inputs_level1.view(cur_train_size,opt.framenum,FEATURE_NUM, opt.T_sample_num_level2, opt.T_knn_K2)#B*F*4*Cen*K 81 | return inputs_level1, inputs_level1_center 82 | -------------------------------------------------------------------------------- /NTU60_Net/channelattention/Channelattention.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | try: 4 | from torch.hub import load_state_dict_from_url 5 | except ImportError: 6 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 7 | import torch 8 | 9 | class ChannelAttention(nn.Module): 10 | def __init__(self, in_planes, ratio=16): 11 | super(ChannelAttention, self).__init__() 12 | self.avg_pool = nn.AdaptiveAvgPool2d(1)#B*C*H*W->B*C*1*1.....->B*C*1*1 13 | self.max_pool = nn.AdaptiveMaxPool2d(1) 14 | # print(in_planes,in_planes // 2) 15 | self.fc1 = nn.Conv2d(in_planes, in_planes // 16, 1, bias=False) 16 | self.relu1 = nn.ReLU() 17 | self.fc2 = nn.Conv2d(in_planes // 16, in_planes, 1, bias=False) 18 | 19 | self.sigmoid = nn.Sigmoid() 20 | 21 | def forward(self, x): 22 | 23 | # print('x:',x.shape) 24 | # print('self.avg_pool(x):',self.avg_pool(x).shape) 25 | # print('elf.fc1(self.avg_pool(x)):',self.fc1(self.avg_pool(x)).shape) 26 | avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) 27 | # print('avg_out:',avg_out.shape) 28 | max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) 29 | out = avg_out + max_out 30 | return self.sigmoid(out) 31 | class ChannelAttention0(nn.Module): 32 | def __init__(self, in_planes, ratio=16): 33 | super(ChannelAttention0, self).__init__() 34 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 35 | self.max_pool = nn.AdaptiveMaxPool2d(1) 36 | # print(in_planes,in_planes // 2) 37 | self.fc1 = nn.Conv2d(in_planes, in_planes, 1, bias=False) 38 | self.relu1 = nn.ReLU() 39 | self.fc2 = nn.Conv2d(in_planes, in_planes, 1, bias=False) 40 | 41 | self.sigmoid = nn.Sigmoid() 42 | 43 | def forward(self, x): 44 | 45 | # print('x:',x.shape) 46 | # print('self.avg_pool(x):',self.avg_pool(x).shape) 47 | # print('elf.fc1(self.avg_pool(x)):',self.fc1(self.avg_pool(x)).shape) 48 | avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) 49 | # print('avg_out:',avg_out.shape) 50 | max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) 51 | out = avg_out + max_out 52 | return self.sigmoid(out) -------------------------------------------------------------------------------- /NTU60_Net/channelattention/__init__.py: -------------------------------------------------------------------------------- 1 | from .Channelattention import * -------------------------------------------------------------------------------- /NTU60_Net/channelattention/__pycache__/Channelattention.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/NTU60_Net/channelattention/__pycache__/Channelattention.cpython-39.pyc -------------------------------------------------------------------------------- /NTU60_Net/channelattention/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/NTU60_Net/channelattention/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /NTU60_Net/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import NTU_RGBD -------------------------------------------------------------------------------- /NTU60_Net/dataset/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/NTU60_Net/dataset/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /NTU60_Net/dataset/__pycache__/dataset.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/NTU60_Net/dataset/__pycache__/dataset.cpython-39.pyc -------------------------------------------------------------------------------- /NTU60_Net/dataset/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tqdm 3 | import torch 4 | import re 5 | import collections 6 | import imageio 7 | import random 8 | 9 | from tqdm import tqdm 10 | from torch.utils.data import Dataset 11 | 12 | import pandas as pd 13 | import numpy as np 14 | import scipy.io as sio 15 | 16 | fx = 365.481 17 | fy = 365.481 18 | cx = 257.346 19 | cy = 210.347 20 | #rose@ntu.edu.sg 21 | sample_num_level1 = 512 22 | sample_num_level2 = 128 23 | 24 | TRAIN_IDS_60 = [1, 2, 4, 5, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 27, 28, 31, 34, 35, 38] 25 | 26 | TRAIN_IDS = [1, 2, 4, 5, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 27, 28, 31, 34, 35, 38,45,46,47,49, 27 | 50,52,53,54,55,56,57,58,59,70,74,78,80,81,82,83,84,85,86,89,91,92,93,94,95,97,98,100,103] 28 | TRAIN_VALID_IDS = ([1, 2, 5, 8, 9, 13, 14, 15, 16, 18, 19, 27, 28, 31, 34, 38], [4, 17, 25, 35]) 29 | compiled_regex = re.compile('.*S(\d{3})C(\d{3})P(\d{3})R(\d{3})A(\d{3}).*') 30 | # SAMPLE_NUM = 2048 31 | 32 | class NTU_RGBD(Dataset): 33 | """NTU depth human masked datasets""" 34 | def __init__(self, root_path, opt, 35 | full_train = True, 36 | test=False, 37 | validation=False, 38 | DATA_CROSS_VIEW = True, 39 | Transform = True): 40 | 41 | self.DATA_CROSS_VIEW = DATA_CROSS_VIEW 42 | self.root_path = root_path 43 | self.SAMPLE_NUM = opt.SAMPLE_NUM 44 | self.INPUT_FEATURE_NUM = opt.INPUT_FEATURE_NUM 45 | self.EACH_FRAME_SAMPLE_NUM = opt.EACH_FRAME_SAMPLE_NUM 46 | self.T_sample_num_level1 = opt.T_sample_num_level1 47 | 48 | self.all_framenum = opt.all_framenum 49 | self.framenum= opt.framenum 50 | self.transform = Transform 51 | #self.depth_path = opt.depth_path 52 | 53 | self.point_vids = os.listdir(self.root_path+'\\T(ground)')#.sort() 54 | self.point_vids.sort() 55 | #print(self.point_vids) 56 | self.TRAIN_IDS = TRAIN_IDS 57 | if opt.dataset == 'ntu60': 58 | indx = self.point_vids.index('S017C003P020R002A060.npy')#('S016C003P040R002A060_xyzC.mat')#('S017C003P020R002A060.npy') 59 | self.point_vids = self.point_vids[0:indx] 60 | self.TRAIN_IDS = TRAIN_IDS_60 61 | 62 | self.num_clouds = len(self.point_vids) 63 | print(self.num_clouds) 64 | self.point_data = self.load_data() 65 | 66 | self.set_splits() 67 | 68 | self.id_to_action = list(pd.DataFrame(self.point_data)['action'] - 1) 69 | self.id_to_vidName = list(pd.DataFrame(self.point_data)['video_cloud_name']) 70 | 71 | self.train = (test == False) and (validation == False) 72 | if DATA_CROSS_VIEW == False: 73 | if test: self.vid_ids = self.test_split_subject.copy() 74 | elif validation: self.vid_ids = self.validation_split_subject.copy() 75 | elif full_train: self.vid_ids = self.train_split_subject.copy() 76 | else: self.vid_ids = self.train_split_subject_with_validation.copy() 77 | else: 78 | if test: self.vid_ids = self.test_split_camera.copy() 79 | else: self.vid_ids = self.train_split_camera.copy() 80 | 81 | print('num_data:',len(self.vid_ids)) 82 | 83 | 84 | # self.SAMPLE_NUM = opt.SAMPLE_NUM 85 | # self.INPUT_FEATURE_NUM = opt.INPUT_FEATURE_NUM 86 | 87 | # self.point_clouds = np.empty(shape=[self.SAMPLE_NUM, self.INPUT_FEATURE_NUM],dtype=np.float32) 88 | 89 | def __getitem__(self, idx): 90 | vid_id = self.vid_ids[idx] 91 | vid_name = self.id_to_vidName[vid_id] 92 | S_idx = vid_name[1:4] 93 | #print(vid_name) 94 | v_name = vid_name[:-4] 95 | 96 | ## 4DV-T motion point data 97 | path_T=self.root_path+'\\T(ground)' 98 | path_cloud_npy_T = os.path.join(path_T,self.id_to_vidName[vid_id]) 99 | 100 | all_sam = np.arange(self.all_framenum) 101 | 102 | if(1==1):#Randomly sample frames at equal intervals 103 | frame_index=[] 104 | for jj in range(self.framenum): 105 | iii =int(np.random.randint(int(self.all_framenum*jj/self.framenum), int(self.all_framenum*(jj+1)/self.framenum))) 106 | frame_index.append(iii) 107 | if(1==0):#Randomly sampled frames 108 | frame_index = random.sample(list(all_sam),self.framenum) 109 | points4DV_T= np.load(path_cloud_npy_T)[frame_index,0:self.EACH_FRAME_SAMPLE_NUM,:self.INPUT_FEATURE_NUM]#60*512*4 110 | ## 4DV-S motion point data 111 | # path_S=self.root_path+'\\S' 112 | # path_cloud_npy_S = os.path.join(path_S,self.id_to_vidName[vid_id]) 113 | # matlab data(.mat) OR python data(.npy) 114 | # XYZ_C = sio.loadmat(path_cloud_npy) 115 | #print(self.id_to_vidName[vid_id]) 116 | # points_c = XYZ_C['pc'].astype(np.float32) 117 | #print(path_cloud_npy) 118 | # points4DV_S= np.load(path_cloud_npy_S)[:,0:4]#2048*4 119 | # points4DV_S = np.expand_dims(points4DV_S, axis=0)#1*2048*4 120 | #print(points_c.shape, points_2048_f.shape) 121 | label = self.id_to_action[vid_id] 122 | 123 | # random angle rotate for data augment 124 | theta = np.random.rand()*1.4-0.7 125 | 126 | if self.transform: 127 | ## point data augment 128 | points4DV_T = self.point_transform(points4DV_T,theta) 129 | points4DV_T = torch.tensor(points4DV_T,dtype=torch.float) 130 | # points4DV_S = torch.tensor(points4DV_S,dtype=torch.float) 131 | label = torch.tensor(label) 132 | return points4DV_T,label,vid_name 133 | 134 | def __len__(self): 135 | return len(self.vid_ids) 136 | 137 | 138 | def load_data(self): 139 | self.point_data = [] 140 | for cloud_idx in tqdm(range(self.num_clouds), "Getting video info"): 141 | self.point_data.append(self.get_pointdata(cloud_idx)) 142 | 143 | return self.point_data 144 | 145 | def get_pointdata(self, vid_id): 146 | 147 | vid_name = self.point_vids[vid_id] 148 | match = re.match(compiled_regex, vid_name) 149 | setup, camera, performer, replication, action = [*map(int, match.groups())] 150 | return { 151 | 'video_cloud_name': vid_name, 152 | 'video_index': vid_id, 153 | 'video_set': (setup, camera), 154 | 'setup': setup, 155 | 'camera': camera, 156 | 'performer': performer, 157 | 'replication': replication, 158 | 'action': action, 159 | } 160 | 161 | 162 | def set_splits(self): 163 | ''' 164 | Sets the train/test splits 165 | Cross-Subject Evaluation: 166 | Train ids = 1, 2, 4, 5, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 27, 167 | 28, 31, 34, 35, 38 168 | Cross-View Evaluation: 169 | Train camera views: 2, 3 170 | ''' 171 | # Save the dataset as a dataframe 172 | dataset = pd.DataFrame(self.point_data) 173 | 174 | # Get the train split ids 175 | train_ids_camera = [2, 3] 176 | 177 | # Cross-Subject splits 178 | self.train_split_subject = list( 179 | dataset[dataset.performer.isin(self.TRAIN_IDS)]['video_index']) 180 | self.train_split_subject_with_validation = list( 181 | dataset[dataset.performer.isin(TRAIN_VALID_IDS[0])]['video_index']) 182 | self.validation_split_subject = list( 183 | dataset[dataset.performer.isin(TRAIN_VALID_IDS[1])]['video_index']) 184 | self.test_split_subject = list( 185 | dataset[~dataset.performer.isin(self.TRAIN_IDS)]['video_index']) 186 | 187 | # Cross-View splits 188 | self.train_split_camera = list( 189 | dataset[dataset.camera.isin(train_ids_camera)]['video_index']) 190 | self.test_split_camera = list( 191 | dataset[~dataset.camera.isin(train_ids_camera)]['video_index']) 192 | 193 | 194 | def point_transform(self,points_xyz,y): 195 | 196 | anglesX = (np.random.uniform()-0.5) * (1/9) * np.pi 197 | R_y = np.array([[[np.cos(y),0.0,np.sin(y)], 198 | [0.0,1.0,0.0], 199 | [-np.sin(y),0.0,np.cos(y)]]]) 200 | R_x = np.array([[[1, 0, 0], 201 | [0, np.cos(anglesX), -np.sin(anglesX)], 202 | [0, np.sin(anglesX), np.cos(anglesX)]]]) 203 | #print(R_y.shape) 204 | 205 | # points_c[:,:,0:3] = self.jitter_point_cloud(points_c[:,:,0:3],sigma=0.007, clip=0.04) 206 | points_xyz[:,:,0:3] = self.jitter_point_cloud(points_xyz[:,:,0:3],sigma=0.007, clip=0.04)# 207 | 208 | # points_c[:,-1536:,:] = self.random_dropout_point_cloud(points_c[:,-1536:,:]) 209 | points_xyz[:,-(self.EACH_FRAME_SAMPLE_NUM-self.T_sample_num_level1):,:] = self.random_dropout_point_cloud(points_xyz[:,-(self.EACH_FRAME_SAMPLE_NUM-self.T_sample_num_level1):,:]) 210 | 211 | R = np.matmul(R_y, R_x) 212 | 213 | # points_c[:,:,0:3] = np.matmul(points_c[:,:,0:3],R) 214 | points_xyz[:,:,0:3] = np.matmul(points_xyz[:,:,0:3],R) 215 | 216 | #if np.random.rand()>0.6: 217 | # for i in range(3): 218 | # points_c[:,i] = points_c[:,i]+(np.random.rand()-0.5)/6 219 | # points_xyz[:,i] = points_xyz[:,i]+(np.random.rand()-0.5)/6 220 | 221 | #print(points.shape) 222 | return points_xyz 223 | 224 | 225 | # def load_depth_from_img(self,depth_path): 226 | # depth_im = imageio.imread(depth_path) #im is a numpy array 227 | # return depth_im 228 | 229 | def depth_to_pointcloud(self,depth_im): 230 | rows,cols = depth_im.shape 231 | xx,yy = np.meshgrid(range(0,cols), range(0,rows)) 232 | 233 | valid = depth_im > 0 234 | xx = xx[valid] 235 | yy = yy[valid] 236 | depth_im = depth_im[valid] 237 | 238 | X = (xx - cx) * depth_im / fx 239 | Y = (yy - cy) * depth_im / fy 240 | Z = depth_im 241 | 242 | points3d = np.array([X.flatten(), Y.flatten(), Z.flatten()]) 243 | 244 | return points3d 245 | 246 | def jitter_point_cloud(self, data, sigma=0.01, clip=0.05): 247 | """ 248 | 249 | :param data: Nx3 array 250 | :return: jittered_data: Nx3 array 251 | """ 252 | M,N, C = data.shape 253 | # print(np.random.randn(M, N, C))# 254 | jittered_data = np.clip(sigma * np.random.randn(M, N, C), -1 * clip, clip).astype(np.float32) 255 | 256 | jittered_data = data+jittered_data 257 | 258 | return jittered_data 259 | 260 | def random_dropout_point_cloud(self, data): 261 | """ 262 | :param data: Nx3 array 263 | :return: dropout_data: Nx3 array 264 | """ 265 | M, N, C = data.shape##60*300*4 266 | dropout_ratio = 0.7+ np.random.random()/2 267 | #dropout_ratio = np.random.random() * p 268 | drop_idx = np.where(np.random.random(N) <= dropout_ratio)[0] 269 | dropout_data = np.zeros_like(data) 270 | if len(drop_idx) > 0: 271 | dropout_data[:, drop_idx, :] = data[:, drop_idx, :] 272 | 273 | 274 | # xyz_center = np.random.random(3) 275 | # xyz_d = 0.1+np.random.random(3)/10 276 | 277 | # func_x = lambda d: d>xyz_center[0] and d<(xyz_center[0]+xyz_d[0]) 278 | # func_y = lambda d: d>xyz_center[1] and d<(xyz_center[1]+xyz_d[1]) 279 | # func_z = lambda d: d>xyz_center[2] and d<(xyz_center[2]+xyz_d[2]) 280 | # c_x = np.vectorize(func_x)(data[:,0]) 281 | # c_y = np.vectorize(func_x)(data[:,0]) 282 | # c_z = np.vectorize(func_x)(data[:,0]) 283 | # c = c_x*c_z*c_y 284 | # erase_index = np.where(c) 285 | # dropout_data[erase_index,:] =0 286 | return dropout_data 287 | -------------------------------------------------------------------------------- /NTU60_Net/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .model import PointNet_Plus -------------------------------------------------------------------------------- /NTU60_Net/model/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/NTU60_Net/model/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /NTU60_Net/model/__pycache__/model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/NTU60_Net/model/__pycache__/model.cpython-39.pyc -------------------------------------------------------------------------------- /NTU60_Net/model/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from utils import group_points_4DV_T_S,group_points_4DV_T_S2 8 | from channelattention import ChannelAttention,ChannelAttention0 9 | from positionencoding import get_positional_encoding 10 | nstates_plus_1 = [64,64,128] 11 | nstates_plus_2 = [128,128,256] 12 | nstates_plus_3 = [256,512,1024,1024,256] 13 | 14 | S_nstates_plus_1 = [64,64,128] 15 | S_nstates_plus_2 = [128,128,256] 16 | T_nstates_plus_2 = [256,512,1024] 17 | T_nstates_plus_3 = [1024] 18 | vlad_dim_out = 128*8 19 | 20 | dim_out=1024 21 | 22 | class PointNet_Plus(nn.Module): 23 | def __init__(self,opt,num_clusters=8,gost=1,dim=128,normalize_input=True): 24 | super(PointNet_Plus, self).__init__() 25 | self.temperal_num = opt.temperal_num 26 | self.knn_K = opt.knn_K 27 | self.ball_radius2 = opt.ball_radius2 28 | self.sample_num_level1 = opt.sample_num_level1 29 | self.sample_num_level2 = opt.sample_num_level2 30 | self.INPUT_FEATURE_NUM = opt.INPUT_FEATURE_NUM # x,y,x,c : 4 31 | self.num_outputs = opt.Num_Class 32 | ####SAMPLE_NUM 33 | self.Seg_size = opt.Seg_size 34 | self.stride=opt.stride 35 | self.EACH_FRAME_SAMPLE_NUM=opt.EACH_FRAME_SAMPLE_NUM 36 | self.T_knn_K = opt.T_knn_K 37 | self.T_knn_K2= opt.T_knn_K2 38 | self.T_sample_num_level1 = opt.T_sample_num_level1 39 | self.T_sample_num_level2 = opt.T_sample_num_level2 40 | self.framenum=opt.framenum 41 | self.T_group_num=int((self.framenum-self.Seg_size)/self.stride)+1 42 | 43 | self.opt=opt 44 | self.dim=dim 45 | 46 | self.normalize_input=normalize_input 47 | 48 | self.pooling = opt.pooling 49 | #self._init_params() 50 | 51 | 52 | self.netR_T_S1 = nn.Sequential( 53 | # B*INPUT_FEATURE_NUM*sample_num_level1*knn_K 54 | nn.Conv2d(self.INPUT_FEATURE_NUM+1, S_nstates_plus_1[0], kernel_size=(1, 1)), 55 | nn.BatchNorm2d(S_nstates_plus_1[0]), 56 | nn.ReLU(inplace=True), 57 | # B*64*sample_num_level1*knn_K 58 | nn.Conv2d(S_nstates_plus_1[0], S_nstates_plus_1[1], kernel_size=(1, 1)), 59 | nn.BatchNorm2d(S_nstates_plus_1[1]), 60 | nn.ReLU(inplace=True), 61 | # B*64*sample_num_level1*knn_K 62 | nn.Conv2d(S_nstates_plus_1[1], S_nstates_plus_1[2], kernel_size=(1, 1)), 63 | nn.BatchNorm2d(S_nstates_plus_1[2]), 64 | nn.ReLU(inplace=True), 65 | # B*128*sample_num_level1*knn_K 66 | nn.MaxPool2d((1,self.T_knn_K),stride=1) 67 | ) 68 | self.ca_S2 = ChannelAttention(self.INPUT_FEATURE_NUM+1+S_nstates_plus_1[2]) 69 | self.netR_T_S2 = nn.Sequential( 70 | # B*INPUT_FEATURE_NUM*sample_num_level1*knn_K 71 | nn.Conv2d(self.INPUT_FEATURE_NUM+1+S_nstates_plus_1[2], S_nstates_plus_2[0], kernel_size=(1, 1)), 72 | nn.BatchNorm2d(S_nstates_plus_2[0]), 73 | nn.ReLU(inplace=True), 74 | # B*64*sample_num_level1*knn_K 75 | nn.Conv2d(S_nstates_plus_2[0], S_nstates_plus_2[1], kernel_size=(1, 1)), 76 | nn.BatchNorm2d(S_nstates_plus_2[1]), 77 | nn.ReLU(inplace=True), 78 | # B*64*sample_num_level1*knn_K 79 | nn.Conv2d(S_nstates_plus_2[1], S_nstates_plus_2[2], kernel_size=(1, 1)), 80 | nn.BatchNorm2d(S_nstates_plus_2[2]), 81 | nn.ReLU(inplace=True), 82 | # B*128*sample_num_level1*knn_K 83 | nn.MaxPool2d((1,self.T_knn_K2),stride=1) 84 | ) 85 | # self.ca_T1 = ChannelAttention(self.INPUT_FEATURE_NUM+S_nstates_plus_2[2]) 86 | self.net4DV_T1 = nn.Sequential( 87 | # B*INPUT_FEATURE_NUM*sample_num_level1*knn_K(B*10*28*2048) 88 | nn.Conv2d(self.INPUT_FEATURE_NUM+S_nstates_plus_2[2], T_nstates_plus_2[0], kernel_size=(1, 1)),#10->64 89 | nn.BatchNorm2d(T_nstates_plus_2[0]), 90 | nn.ReLU(inplace=True), 91 | # B*64*sample_num_level1*knn_K 92 | nn.Conv2d(T_nstates_plus_2[0], T_nstates_plus_2[1], kernel_size=(1, 1)),#64->64 93 | nn.BatchNorm2d(T_nstates_plus_2[1]), 94 | nn.ReLU(inplace=True), 95 | # B*64*sample_num_level1*knn_K 96 | nn.Conv2d(T_nstates_plus_2[1], T_nstates_plus_2[2], kernel_size=(1, 1)),#64->128 97 | nn.BatchNorm2d(T_nstates_plus_2[2]), 98 | nn.ReLU(inplace=True), 99 | # B*128*sample_num_level1*knn_K 100 | # nn.Conv2d(T_nstates_plus_2[2], T_nstates_plus_2[3], kernel_size=(1, 1)),#64->128 101 | # nn.BatchNorm2d(T_nstates_plus_2[3]), 102 | # nn.ReLU(inplace=True), 103 | nn.MaxPool2d((1,self.T_sample_num_level2*self.Seg_size),stride=1)#1*(t*512)#B*C*G*1 104 | ) 105 | self.net4DV_T2 = nn.Sequential( 106 | # B*259*sample_num_level2*1 107 | nn.Conv2d(T_nstates_plus_2[2], T_nstates_plus_3[0], kernel_size=(1, 1)), 108 | nn.BatchNorm2d(T_nstates_plus_3[0]), 109 | nn.ReLU(inplace=True), 110 | # B*256*sample_num_level2*1 111 | # nn.Conv2d(T_nstates_plus_3[0], T_nstates_plus_3[1], kernel_size=(1, 1)), 112 | # nn.BatchNorm2d(T_nstates_plus_3[1]), 113 | # nn.ReLU(inplace=True), 114 | # B*512*sample_num_level2*1 115 | # nn.Conv2d(T_nstates_plus_3[1], T_nstates_plus_3[2], kernel_size=(1, 1)), 116 | # nn.BatchNorm2d(T_nstates_plus_3[2]), 117 | # nn.ReLU(inplace=True), 118 | # B*1024*sample_num_level2*1 119 | # nn.MaxPool2d((self.T_group_num,1),stride=1), 120 | # B*1024*1*1 121 | ) 122 | KerStr=[(20,20),(10,10)] 123 | self.maxpoolings = nn.ModuleList([nn.MaxPool2d((K[0],1 ),(K[1],1)) for K in KerStr]) 124 | self.PE=get_positional_encoding(self.framenum,T_nstates_plus_2[2]) 125 | 126 | self.netR_FC = nn.Sequential( 127 | # B*1024 128 | #nn.Linear(nstates_plus_3[2], nstates_plus_3[3]), 129 | #nn.BatchNorm1d(nstates_plus_3[3]), 130 | #nn.ReLU(inplace=True), 131 | # B*1024 132 | nn.Linear(dim_out*4+256, nstates_plus_3[4]), 133 | nn.BatchNorm1d(nstates_plus_3[4]), 134 | nn.ReLU(inplace=True), 135 | # B*512 136 | nn.Linear(nstates_plus_3[4], self.num_outputs), 137 | nn.BatchNorm1d(self.num_outputs), 138 | nn.ReLU(inplace=True), 139 | # B*num_outputs 140 | ) 141 | def forward(self, xt, yt): 142 | #Intra-frame appearance encoding module 143 | #set abstract 1-improved pointnet 144 | B,f,d,N,k = xt.shape#B*F*4*Cen*K 145 | yt=yt.view(B*f,yt.size(2), self.opt.T_sample_num_level1, 1)#(B*F)*4*Cen*1 146 | xt=xt.view(B*f,d, self.opt.T_sample_num_level1, k)#(B*F)*4+1*Cen*K 147 | xt = self.netR_T_S1(xt)#(B*F)*128*Cen*1 148 | xt = torch.cat((yt, xt),1).squeeze(-1)#(B*F)*(4+128)*Cen 149 | xt=xt.view(B,f,xt.size(1), self.opt.T_sample_num_level1).transpose(2,3)#(B*F)*(4+128)*Cen->B*F*Cen1*(4+128) 150 | 151 | #set abstract 2-grouping 152 | S_inputs_level2,inputs_level1_center_s2 =group_points_4DV_T_S2(xt,self.opt)##B*F*5+128*Cen2*K2 B*F*4*Cen2*1 153 | #set abstract 2-improved pointnet 154 | B2,f2,d2,N2,k2 = S_inputs_level2.shape#B*F*4*Cen*K 155 | inputs_level1_center_s2=inputs_level1_center_s2.view(B2*f2,inputs_level1_center_s2.size(2), self.opt.T_sample_num_level2, 1)#(B*F)*4*C2en*1 156 | S_inputs_level2=S_inputs_level2.view(B2*f2,d2, self.opt.T_sample_num_level2, k2)#(B*F)*5+128*C2en*K2 157 | S_inputs_level2 = self.ca_S2(S_inputs_level2) * S_inputs_level2 158 | xt = self.netR_T_S2(S_inputs_level2)#(B*F)*128*Cen2*1 159 | ###partition-level features 160 | xt_resS2=xt.squeeze(-1).view(B,f,xt.size(1), self.opt.T_sample_num_level2).transpose(1,2)#B*256*F*Cen2 161 | xt_resS2=F.max_pool2d(xt_resS2,kernel_size=(f,self.opt.T_sample_num_level2)).squeeze(-1).squeeze(-1)#B*256 162 | xt = torch.cat((inputs_level1_center_s2, xt),1).squeeze(-1)#(B*F)*4+128*Cen2 163 | xt =xt.view(-1,self.framenum,xt.size(1),self.opt.T_sample_num_level2).transpose(2,3)##(B*F)*(4+128)*C2en-》B*F*(4+128)*C2en->B*F*C2en*(4+128) 164 | 165 | T_inputs_level2 =xt.transpose(1,3).transpose(2,3) 166 | # T_inputs_level2 = self.ca_T1(T_inputs_level2) * T_inputs_level2 167 | xt = self.net4DV_T1(T_inputs_level2)# 168 | 169 | 170 | 171 | ###frame-level features 172 | xt_resT1=F.max_pool2d(xt,kernel_size=(f,1)).squeeze(-1).squeeze(-1)#B*256 173 | #Inter-frame motion encoding module 174 | #Temporal position embedding layer 175 | xt=xt.squeeze(-1)+self.PE.transpose(0,1) 176 | xt=xt.unsqueeze(-1) 177 | ### 178 | 179 | #Shared MLP layer 180 | xt = self.net4DV_T2(xt)# 181 | ####Hierarchical pyramid max pooling layer 182 | xt = [maxpooling(xt) for maxpooling in self.maxpoolings]#B*(2048)*[G]*1 183 | xt = torch.cat(xt,2).squeeze(-1) 184 | xt = xt.contiguous().view(xt.size(0),-1) 185 | 186 | ####### 187 | xt = torch.cat((xt, xt_resS2,xt_resT1),1) 188 | x = self.netR_FC(xt) 189 | 190 | 191 | return x 192 | 193 | 194 | 195 | -------------------------------------------------------------------------------- /NTU60_Net/positionencoding/Positionencoding.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | def get_positional_encoding(max_seq_len, embed_dim): 5 | # Initialize one positional encoding 6 | # embed_dim: Word embedding dimension 7 | # max_seq_len: Large sequence length 8 | positional_encoding=torch.zeros(max_seq_len, embed_dim).cuda() 9 | for pos in range(max_seq_len): 10 | for i in range(embed_dim): 11 | if(i%2==0): 12 | positional_encoding[pos,i]=torch.sin(pos / torch.tensor(10000**(2 * i / embed_dim))) 13 | else: 14 | positional_encoding[pos,i]=torch.cos(pos / torch.tensor(10000**(2 * i /embed_dim))) 15 | return positional_encoding 16 | -------------------------------------------------------------------------------- /NTU60_Net/positionencoding/__init__.py: -------------------------------------------------------------------------------- 1 | from .Positionencoding import * -------------------------------------------------------------------------------- /NTU60_Net/positionencoding/__pycache__/Positionencoding.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/NTU60_Net/positionencoding/__pycache__/Positionencoding.cpython-39.pyc -------------------------------------------------------------------------------- /NTU60_Net/positionencoding/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/NTU60_Net/positionencoding/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /NTU60_Net/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import os 4 | import tqdm 5 | import shutil 6 | import collections 7 | import argparse 8 | import random 9 | import time 10 | #import gpu_utils as g 11 | import numpy as np 12 | 13 | from model import PointNet_Plus#,Attension_Point,TVLAD 14 | from dataset import NTU_RGBD 15 | from utils import group_points_4DV_T_S 16 | 17 | from PIL import Image 18 | from torch.utils.data import Dataset 19 | from torchvision import transforms 20 | from torch.utils.data import DataLoader 21 | from tqdm import tqdm 22 | import logging 23 | def main(args=None): 24 | parser = argparse.ArgumentParser(description = "Training") 25 | 26 | parser.add_argument('--batchSize', type=int, default=32, help='input batch size') 27 | parser.add_argument('--nepoch', type=int, default=150, help='number of epochs to train for') 28 | parser.add_argument('--INPUT_FEATURE_NUM', type=int, default = 3, help='number of input point features') 29 | parser.add_argument('--temperal_num', type=int, default = 3, help='number of input point features') 30 | parser.add_argument('--pooling', type=str, default='concatenation', help='how to aggregate temporal split features: vlad | concatenation | bilinear') 31 | parser.add_argument('--dataset', type=str, default='ntu60', help='how to aggregate temporal split features: ntu120 | ntu60') 32 | 33 | parser.add_argument('--weight_decay', type=float, default=0.0008, help='weight decay (SGD only)') 34 | parser.add_argument('--learning_rate', type=float, default=0.001, help='learning rate at t=0') 35 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum (SGD only)') 36 | parser.add_argument('--workers', type=int, default=0, help='number of data loading workers') 37 | 38 | parser.add_argument('--root_path', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\\paper\\dataset\\Prosessed_dataset\\01_3DV-Action-master(base-run-version)(FPS512)(单时序流4维特征)', help='preprocess folder') 39 | #parser.add_argument('--depth_path', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\\paper\\dataset\\Prosessed_dataset\\3DV-Action-master\\ntu60dataset\\', help='raw_depth_png') 40 | ############################## 41 | # parser.add_argument('--save_root_dir', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\\paper\\code\\3DV-Action-master\\models\\ntu60\\xsub', help='output folder') 42 | parser.add_argument('--save_root_dir', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\\paper\\code\\Models_Parameter\\03_3DV-Action-master(base-run-version)(FPS512-64-K32)(2层局部+时序池化3层简单)(t=6_stride=2_KC=64)(单时序流4维特征)(单流读取)\\models\\ntu60\\xview', help='output folder') 43 | parser.add_argument('--model', type=str, default = '', help='model name for training resume') 44 | parser.add_argument('--optimizer', type=str, default = '', help='optimizer name for training resume') 45 | 46 | parser.add_argument('--ngpu', type=int, default=1, help='# GPUs') 47 | parser.add_argument('--main_gpu', type=int, default=0, help='main GPU id') # CUDA_VISIBLE_DEVICES=0 python train.py 48 | 49 | ######## 50 | parser.add_argument('--Seg_size', type=int, default =1, help='number of frame in seg') 51 | parser.add_argument('--stride', type=int, default = 1, help='stride of seg') 52 | parser.add_argument('--all_framenum', type=int, default = 20, help='number of action frame') 53 | parser.add_argument('--framenum', type=int, default = 20, help='number of action frame') 54 | parser.add_argument('--EACH_FRAME_SAMPLE_NUM', type=int, default = 512, help='number of sample points in each frame') 55 | parser.add_argument('--T_knn_K', type=int, default = 48, help='K for knn search of temperal stream') 56 | parser.add_argument('--T_knn_K2', type=int, default = 16, help='K for knn search of temperal stream') 57 | parser.add_argument('--T_sample_num_level1', type=int, default = 128, help='number of first layer groups') 58 | parser.add_argument('--T_sample_num_level2', type=int, default = 32, help='number of first layer groups') 59 | parser.add_argument('--T_ball_radius', type=float, default=0.2, help='square of radius for ball query of temperal stream') 60 | 61 | parser.add_argument('--learning_rate_decay', type=float, default=1e-7, help='learning rate decay') 62 | 63 | parser.add_argument('--size', type=str, default='full', help='how many samples do we load: small | full') 64 | parser.add_argument('--SAMPLE_NUM', type=int, default = 2048, help='number of sample points') 65 | 66 | parser.add_argument('--Num_Class', type=int, default = 60, help='number of outputs') 67 | parser.add_argument('--knn_K', type=int, default = 64, help='K for knn search') 68 | parser.add_argument('--sample_num_level1', type=int, default = 512, help='number of first layer groups') 69 | parser.add_argument('--sample_num_level2', type=int, default = 128, help='number of second layer groups') 70 | parser.add_argument('--ball_radius', type=float, default=0.06, help='square of radius for ball query in level 1')#0.025 -> 0.05 for detph 71 | parser.add_argument('--ball_radius2', type=float, default=0.1, help='square of radius for ball query in level 2')# 0.08 -> 0.01 for depth 72 | 73 | opt = parser.parse_args() 74 | print (opt) 75 | # logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y/%m/%d %H:%M:%S', filename=os.path.join(opt.save_root_dir, '(单3dp)(去2)(2局+3简运)(位编)(cs2t1)(多池1212-66)(中2)T11F20-51212832k4816test.log'), level=logging.INFO) 76 | # torch.cuda.set_device(opt.main_gpu) 77 | 78 | opt.manualSeed = 1 79 | random.seed(opt.manualSeed) 80 | torch.manual_seed(opt.manualSeed) 81 | 82 | try: 83 | os.makedirs(opt.save_root_dir) 84 | except OSError: 85 | pass 86 | 87 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 88 | 89 | torch.backends.cudnn.benchmark = True 90 | #torch.backends.cudnn.deterministic = True 91 | torch.cuda.empty_cache() 92 | 93 | ################2############## 94 | data_val = NTU_RGBD(root_path = opt.root_path, opt=opt, 95 | DATA_CROSS_VIEW = True, 96 | full_train = False, 97 | validation = False, 98 | test = True, 99 | Transform = False 100 | ) 101 | val_loader = DataLoader(dataset = data_val, batch_size = 8,num_workers = 8) 102 | 103 | #net = 104 | 105 | netR = PointNet_Plus(opt) 106 | #################3############ 107 | netR.load_state_dict(torch.load("C:\\Users\\Administrator\\Desktop\\pointnet_para_82.pth")) 108 | 109 | netR = torch.nn.DataParallel(netR).cuda() 110 | netR.cuda() 111 | print(netR) 112 | 113 | 114 | # evaluate mode 115 | torch.cuda.synchronize() 116 | netR.eval() 117 | conf_mat = np.zeros([opt.Num_Class, opt.Num_Class]) 118 | conf_mat60 = np.zeros([60, 60]) 119 | acc = 0.0 120 | loss_sigma = 0.0 121 | 122 | with torch.no_grad(): 123 | for i, data in enumerate(tqdm(val_loader)): 124 | #print(i) 125 | torch.cuda.synchronize() 126 | group_time_start = time.time() 127 | points4DV_T,label,vid_name = data 128 | points4DV_T,label = points4DV_T.cuda(),label.cuda() 129 | # print('points4DV_T:',points4DV_T.shape) 130 | xt, yt = group_points_4DV_T_S(points4DV_T, opt)#B*F*4*Cen*K B*F*4*Cen*1 131 | # print('xt:',xt.shape) 132 | xt = xt.type(torch.FloatTensor) 133 | yt = yt.type(torch.FloatTensor) 134 | forward_time_start= time.time() 135 | prediction = netR(xt,yt) 136 | forward_time_end = time.time() 137 | 138 | 139 | print('forward time:',forward_time_end-forward_time_start) 140 | _, predicted60 = torch.max(prediction.data[:,0:60], 1) 141 | _, predicted = torch.max(prediction.data, 1) 142 | #print(prediction.data) 143 | 144 | for j in range(len(label)): 145 | cate_i = label[j].cpu().numpy() 146 | pre_i = predicted[j].cpu().numpy() 147 | if pre_i != cate_i: 148 | logging.info('Video Name:{} -- correct label {} predicted to {}'.format(vid_name[j],cate_i,pre_i)) 149 | conf_mat[cate_i, pre_i] += 1.0 150 | if cate_i<60: 151 | pre_i60 = predicted60[j].cpu().numpy() 152 | conf_mat60[cate_i, pre_i60] += 1.0 153 | 154 | print('NTU120:{:.2%} NTU60:{:.2%}--correct number {}--all number {}===Average loss:{:.6%}'.format(conf_mat.trace() / conf_mat.sum(),conf_mat60.trace() / conf_mat60.sum(),conf_mat60.trace(),conf_mat60.sum(),loss_sigma/(i+1)/2)) 155 | # logging.info('{} --nepoch{} set Accuracy:{:.2%}--correct number {}--all number {}===Average loss:{}'.format('Valid', opt.nepoch, conf_mat.trace() / conf_mat.sum(),conf_mat60.trace(),conf_mat60.sum(), loss_sigma/(i+1))) 156 | 157 | #torch.save(netR.module.state_dict(), '%s/pointnet_para_%d.pth' % (opt.save_root_dir, epoch)) 158 | if __name__ == '__main__': 159 | main() 160 | 161 | -------------------------------------------------------------------------------- /NTU60_Net/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import os 4 | import tqdm 5 | import shutil 6 | import collections 7 | import argparse 8 | import random 9 | import time 10 | #import gpu_utils as g 11 | import numpy as np 12 | 13 | from model import PointNet_Plus#,Attension_Point,TVLAD 14 | from dataset import NTU_RGBD 15 | from utils import group_points_4DV_T_S 16 | 17 | from PIL import Image 18 | from torch.utils.data import Dataset 19 | from torchvision import transforms 20 | from torch.utils.data import DataLoader 21 | from tqdm import tqdm 22 | import logging 23 | def main(args=None): 24 | parser = argparse.ArgumentParser(description = "Training") 25 | 26 | parser.add_argument('--batchSize', type=int, default=32, help='input batch size') 27 | parser.add_argument('--nepoch', type=int, default=150, help='number of epochs to train for') 28 | parser.add_argument('--INPUT_FEATURE_NUM', type=int, default = 3, help='number of input point features') 29 | parser.add_argument('--temperal_num', type=int, default = 3, help='number of input point features') 30 | parser.add_argument('--pooling', type=str, default='concatenation', help='how to aggregate temporal split features: vlad | concatenation | bilinear') 31 | parser.add_argument('--dataset', type=str, default='ntu60', help='how to aggregate temporal split features: ntu120 | ntu60') 32 | 33 | parser.add_argument('--weight_decay', type=float, default=0.0008, help='weight decay (SGD only)') 34 | parser.add_argument('--learning_rate', type=float, default=0.001, help='learning rate at t=0') 35 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum (SGD only)') 36 | parser.add_argument('--workers', type=int, default=0, help='number of data loading workers') 37 | 38 | parser.add_argument('--root_path', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\\paper\\dataset\\Prosessed_dataset\\01_3DV-Action-master(base-run-version)(FPS512)(单时序流4维特征)', help='preprocess folder') 39 | #parser.add_argument('--depth_path', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\\paper\\dataset\\Prosessed_dataset\\3DV-Action-master\\ntu60dataset\\', help='raw_depth_png') 40 | ############################## 41 | # parser.add_argument('--save_root_dir', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\\paper\\code\\3DV-Action-master\\models\\ntu60\\xsub', help='output folder') 42 | parser.add_argument('--save_root_dir', type=str, default='C:\\Users\\Administrator\\Desktop\\LX\\paper\\code\\Models_Parameter\\03_3DV-Action-master(base-run-version)(FPS512-64-K32)(2层局部+时序池化3层简单)(t=6_stride=2_KC=64)(单时序流4维特征)(单流读取)\\models\\ntu60\\xview', help='output folder') 43 | parser.add_argument('--model', type=str, default = '', help='model name for training resume') 44 | parser.add_argument('--optimizer', type=str, default = '', help='optimizer name for training resume') 45 | 46 | parser.add_argument('--ngpu', type=int, default=1, help='# GPUs') 47 | parser.add_argument('--main_gpu', type=int, default=0, help='main GPU id') # CUDA_VISIBLE_DEVICES=0 python train.py 48 | 49 | ######## 50 | parser.add_argument('--Seg_size', type=int, default =1, help='number of frame in seg') 51 | parser.add_argument('--stride', type=int, default = 1, help='stride of seg') 52 | parser.add_argument('--all_framenum', type=int, default = 20, help='number of action frame') 53 | parser.add_argument('--framenum', type=int, default = 20, help='number of action frame') 54 | parser.add_argument('--EACH_FRAME_SAMPLE_NUM', type=int, default = 512, help='number of sample points in each frame') 55 | parser.add_argument('--T_knn_K', type=int, default = 48, help='K for knn search of temperal stream') 56 | parser.add_argument('--T_knn_K2', type=int, default = 16, help='K for knn search of temperal stream') 57 | parser.add_argument('--T_sample_num_level1', type=int, default = 128, help='number of first layer groups') 58 | parser.add_argument('--T_sample_num_level2', type=int, default = 32, help='number of first layer groups') 59 | parser.add_argument('--T_ball_radius', type=float, default=0.2, help='square of radius for ball query of temperal stream') 60 | 61 | parser.add_argument('--learning_rate_decay', type=float, default=1e-7, help='learning rate decay') 62 | 63 | parser.add_argument('--size', type=str, default='full', help='how many samples do we load: small | full') 64 | parser.add_argument('--SAMPLE_NUM', type=int, default = 2048, help='number of sample points') 65 | 66 | parser.add_argument('--Num_Class', type=int, default = 60, help='number of outputs') 67 | parser.add_argument('--knn_K', type=int, default = 64, help='K for knn search') 68 | parser.add_argument('--sample_num_level1', type=int, default = 512, help='number of first layer groups') 69 | parser.add_argument('--sample_num_level2', type=int, default = 128, help='number of second layer groups') 70 | parser.add_argument('--ball_radius', type=float, default=0.06, help='square of radius for ball query in level 1')#0.025 -> 0.05 for detph 71 | parser.add_argument('--ball_radius2', type=float, default=0.1, help='square of radius for ball query in level 2')# 0.08 -> 0.01 for depth 72 | 73 | 74 | opt = parser.parse_args() 75 | print (opt) 76 | logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y/%m/%d %H:%M:%S', filename=os.path.join(opt.save_root_dir, 'train.log'), level=logging.INFO) 77 | # torch.cuda.set_device(opt.main_gpu) 78 | 79 | opt.manualSeed = 1 80 | random.seed(opt.manualSeed) 81 | torch.manual_seed(opt.manualSeed) 82 | 83 | try: 84 | os.makedirs(opt.save_root_dir) 85 | except OSError: 86 | pass 87 | 88 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 89 | 90 | torch.backends.cudnn.benchmark = True 91 | #torch.backends.cudnn.deterministic = True 92 | torch.cuda.empty_cache() 93 | ############################# 94 | data_train = NTU_RGBD(root_path = opt.root_path,opt=opt, 95 | DATA_CROSS_VIEW = True, 96 | full_train = True, 97 | validation = False, 98 | test = False, 99 | Transform = True 100 | ) 101 | train_loader = DataLoader(dataset = data_train, batch_size = opt.batchSize, shuffle = True, drop_last = True,num_workers = 8) 102 | data_val = NTU_RGBD(root_path = opt.root_path, opt=opt, 103 | DATA_CROSS_VIEW = True, 104 | full_train = False, 105 | validation = False, 106 | test = True, 107 | Transform = False 108 | ) 109 | val_loader = DataLoader(dataset = data_val, batch_size = 24,num_workers = 8) 110 | 111 | netR = PointNet_Plus(opt) 112 | 113 | netR = torch.nn.DataParallel(netR).cuda() 114 | netR.cuda() 115 | print(netR) 116 | 117 | criterion = torch.nn.CrossEntropyLoss().cuda() 118 | optimizer = torch.optim.Adam(netR.parameters(), lr=opt.learning_rate, betas = (0.5, 0.999), eps=1e-06) 119 | scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5) 120 | 121 | for epoch in range(opt.nepoch): 122 | scheduler.step(epoch) 123 | 124 | # switch to train mode 125 | torch.cuda.synchronize() 126 | netR.train() 127 | acc = 0.0 128 | loss_sigma = 0.0 129 | total1 = 0.0 130 | timer = time.time() 131 | 132 | for i, data in enumerate(tqdm(train_loader, 0)): 133 | if len(data[0])==1: 134 | continue 135 | torch.cuda.synchronize() 136 | # 1 load imputs and target 137 | ## 3DV points and 3 temporal segment appearance points 138 | ## points_xyzc: B*2048*8;points_1xyz:B*2048*3 target: B*1 139 | points4DV_T,label,v_name = data 140 | points4DV_T,label = points4DV_T.cuda(),label.cuda() 141 | #set abstract 1-grouping 142 | xt, yt = group_points_4DV_T_S(points4DV_T, opt)#B*F*4*Cen*K B*F*4*Cen*1 143 | # print('xt:',xt.shape) 144 | xt = xt.type(torch.FloatTensor) 145 | yt = yt.type(torch.FloatTensor) 146 | 147 | prediction = netR(xt,yt) 148 | 149 | loss = criterion(prediction,label) 150 | optimizer.zero_grad() 151 | 152 | loss.backward() 153 | optimizer.step() 154 | torch.cuda.synchronize() 155 | # update training error 156 | loss_sigma += loss.item() 157 | #_, predicted60 = torch.max(prediction.data[:,0:60], 1) 158 | _, predicted = torch.max(prediction.data, 1) 159 | 160 | acc += (predicted==label).cpu().sum().numpy() 161 | total1 += label.size(0) 162 | 163 | 164 | acc_avg = acc/total1 165 | loss_avg = loss_sigma/total1 166 | print('======>>>>> Online epoch: #%d, lr=%f,Acc=%f,correctnum=%f,allnum=%f,avg_loss=%f <<<<<======' %(epoch, scheduler.get_lr()[0],acc_avg,acc,total1,loss_avg)) 167 | print("Epoch: " + str(epoch) + " Iter: " + str(i) + " Acc: " + ("%.2f" % acc_avg) +" Classification Loss: " + str(loss_avg)) 168 | logging.info('======>>>>> Online epoch: #%d, lr=%f,Acc=%f,correctnum=%f,allnum=%f,avg_loss=%f <<<<<======' %(epoch, scheduler.get_lr()[0],acc_avg,acc,total1,loss_avg)) 169 | logging.info("Epoch: " + str(epoch) + " Iter: " + str(i) + " Acc: " + ("%.2f" % acc_avg) +" Classification Loss: " + str(loss_avg)) 170 | if ((epoch+1)%1==0 or epoch==opt.nepoch-1): 171 | # evaluate mode 172 | torch.cuda.synchronize() 173 | netR.eval() 174 | conf_mat = np.zeros([opt.Num_Class, opt.Num_Class]) 175 | conf_mat60 = np.zeros([60, 60]) 176 | acc = 0.0 177 | loss_sigma = 0.0 178 | 179 | with torch.no_grad(): 180 | for i, data in enumerate(tqdm(val_loader)): 181 | torch.cuda.synchronize() 182 | 183 | points4DV_T,label,v_name = data 184 | points4DV_T,label = points4DV_T.cuda(),label.cuda() 185 | #set abstract 1-grouping 186 | xt, yt = group_points_4DV_T_S(points4DV_T, opt)#(B*F)*4*Cen*K (B*F)*4*Cen*1 187 | 188 | xt = xt.type(torch.FloatTensor) 189 | yt = yt.type(torch.FloatTensor) 190 | 191 | prediction = netR(xt,yt) 192 | 193 | loss = criterion(prediction,label) 194 | _, predicted60 = torch.max(prediction.data[:,0:60], 1) 195 | _, predicted = torch.max(prediction.data, 1) 196 | #print(prediction.data) 197 | loss_sigma += loss.item() 198 | 199 | for j in range(len(label)): 200 | cate_i = label[j].cpu().numpy() 201 | pre_i = predicted[j].cpu().numpy() 202 | conf_mat[cate_i, pre_i] += 1.0 203 | if cate_i<60: 204 | pre_i60 = predicted60[j].cpu().numpy() 205 | conf_mat60[cate_i, pre_i60] += 1.0 206 | 207 | print('NTU120:{:.2%} NTU60:{:.2%}--correct number {}--all number {}===Average loss:{:.6%}'.format(conf_mat.trace() / conf_mat.sum(),conf_mat60.trace() / conf_mat60.sum(),conf_mat60.trace(),conf_mat60.sum(),loss_sigma/(i+1)/2)) 208 | logging.info('#################{} --epoch{} set Accuracy:{:.2%}--correct number {}--all number {}===Average loss:{}'.format('Valid', epoch, conf_mat.trace() / conf_mat.sum(),conf_mat60.trace(),conf_mat60.sum(), loss_sigma/(i+1))) 209 | 210 | torch.save(netR.module.state_dict(), '%s/pointnet_para_%d.pth' % (opt.save_root_dir, epoch)) 211 | if __name__ == '__main__': 212 | main() 213 | 214 | -------------------------------------------------------------------------------- /NTU60_Net/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils_3d import * -------------------------------------------------------------------------------- /NTU60_Net/utils/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/NTU60_Net/utils/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /NTU60_Net/utils/__pycache__/utils_3d.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XingLi1012/SequentialPointNet/88daef1d540ce849e431130acbf910556e59b0bb/NTU60_Net/utils/__pycache__/utils_3d.cpython-39.pyc -------------------------------------------------------------------------------- /NTU60_Net/utils/utils_3d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import pdb 5 | def group_points_4DV_T_S(points, opt): 6 | #B*F*512*4 7 | T_ball_radius = torch.tensor(0.06) 8 | cur_train_size = points.shape[0]# 9 | INPUT_FEATURE_NUM = points.shape[-1]#3 10 | 11 | points = points.view(cur_train_size*opt.framenum, opt.EACH_FRAME_SAMPLE_NUM, -1)#(B*F)*512*4 12 | # print('1points:',points.shape,cur_train_size,opt.framenum, opt.EACH_FRAME_SAMPLE_NUM, -1) 13 | inputs1_diff = points[:,:,0:3].transpose(1,2).unsqueeze(1).expand(cur_train_size*opt.framenum,opt.T_sample_num_level1,3,opt.EACH_FRAME_SAMPLE_NUM) \ 14 | - points[:,0:opt.T_sample_num_level1,0:3].unsqueeze(-1).expand(cur_train_size*opt.framenum,opt.T_sample_num_level1,3,opt.EACH_FRAME_SAMPLE_NUM)# (B*F )* 64 * 3 * 512 15 | inputs1_diff = torch.mul(inputs1_diff, inputs1_diff) # B * 512 * 3 * 1024 16 | inputs1_diff = inputs1_diff.sum(2) # B * 512 * 1024 distance 17 | dists, inputs1_idx = torch.topk(inputs1_diff, opt.T_knn_K, 2, largest=False, sorted=False) # dists: B * 512 * 32; inputs1_idx: B * 512 * 32 18 | 19 | # ball query 20 | invalid_map = dists.gt(T_ball_radius) # B * 512 * 64 value: binary 21 | 22 | for jj in range(opt.T_sample_num_level1): 23 | inputs1_idx[:,jj,:][invalid_map[:,jj,:]] = jj 24 | 25 | idx_group_l1_long = inputs1_idx.view(cur_train_size*opt.framenum,opt.T_sample_num_level1*opt.T_knn_K,1).expand(cur_train_size*opt.framenum,opt.T_sample_num_level1*opt.T_knn_K,INPUT_FEATURE_NUM) 26 | 27 | inputs_level1 = points.gather(1,idx_group_l1_long).view(cur_train_size*opt.framenum,opt.T_sample_num_level1,opt.T_knn_K,INPUT_FEATURE_NUM) # (B*F)*64*32*4 28 | 29 | inputs_level1_center = points[:,0:opt.T_sample_num_level1,0:INPUT_FEATURE_NUM ].unsqueeze(2) # (B*F)*64*1*4 30 | inputs_level1[:,:,:,0:3] = inputs_level1[:,:,:,0:3] - inputs_level1_center[:,:,:,0:3].expand(cur_train_size*opt.framenum,opt.T_sample_num_level1,opt.T_knn_K,3)# (B*F)*64*32*3 31 | if(1==1): 32 | dis_l=torch.mul(inputs_level1[:,:,:,0:3], inputs_level1[:,:,:,0:3]) 33 | 34 | dis_l=dis_l.sum(3).unsqueeze(3)#lx#The distance of each point from the origin in local coordinates(B*F)*64*32*1 35 | 36 | inputs_level1 = torch.cat((inputs_level1,dis_l),3).unsqueeze(1).transpose(1,4).squeeze(4) # (B*F)*4*64*32 37 | 38 | inputs_level1_center = inputs_level1_center.contiguous().view(cur_train_size,opt.framenum,opt.T_sample_num_level1,1,INPUT_FEATURE_NUM).transpose(2,3).transpose(2,4) # (B*F)*4*64*1 39 | FEATURE_NUM = inputs_level1.shape[-3]#4 40 | inputs_level1=inputs_level1.view(cur_train_size,opt.framenum,FEATURE_NUM, opt.T_sample_num_level1, opt.T_knn_K)#B*F*4*Cen*K 41 | return inputs_level1, inputs_level1_center 42 | def group_points_4DV_T_S2(points, opt): 43 | #B*F*Cen1*(3+128) 44 | T_ball_radius = torch.tensor(0.11) 45 | cur_train_size = points.shape[0]# 46 | INPUT_FEATURE_NUM = points.shape[-1]#4 47 | 48 | points = points.view(cur_train_size*opt.framenum, opt.T_sample_num_level1, -1)#(B*F)*512*4 49 | # print('1points:',points.shape,cur_train_size,opt.framenum, opt.T_sample_num_level1, -1) 50 | inputs1_diff = points[:,:,0:3].transpose(1,2).unsqueeze(1).expand(cur_train_size*opt.framenum,opt.T_sample_num_level2,3,opt.T_sample_num_level1) \ 51 | - points[:,0:opt.T_sample_num_level2,0:3].unsqueeze(-1).expand(cur_train_size*opt.framenum,opt.T_sample_num_level2,3,opt.T_sample_num_level1)# (B*F )* 64 * 3 * 512 52 | inputs1_diff = torch.mul(inputs1_diff, inputs1_diff) # B * 512 * 3 * 1024 53 | inputs1_diff = inputs1_diff.sum(2) # B * 512 * 1024 distance 54 | # print('inputs1_diff:',inputs1_diff.shape) 55 | dists, inputs1_idx = torch.topk(inputs1_diff, opt.T_knn_K2, 2, largest=False, sorted=False) # dists: B * 512 * 32; inputs1_idx: B * 512 * 32 56 | # print('inputs1_idx:',inputs1_idx.shape) 57 | # ball query 58 | invalid_map = dists.gt(T_ball_radius) # B * 512 * 64 value: binary 59 | 60 | for jj in range(opt.T_sample_num_level2): 61 | inputs1_idx[:,jj,:][invalid_map[:,jj,:]] = jj 62 | 63 | idx_group_l1_long = inputs1_idx.view(cur_train_size*opt.framenum,opt.T_sample_num_level2*opt.T_knn_K2,1).expand(cur_train_size*opt.framenum,opt.T_sample_num_level2*opt.T_knn_K2,points.shape[-1]) 64 | # print('points:',points.shape) 65 | # print('pointsg:',points.gather(1,idx_group_l1_long).shape) 66 | # print(cur_train_size*opt.framenum,opt.T_sample_num_level2,opt.T_knn_K2,points.shape[-1]) 67 | inputs_level1 = points.gather(1,idx_group_l1_long).view(cur_train_size*opt.framenum,opt.T_sample_num_level2,opt.T_knn_K2,points.shape[-1]) # (B*F)*64*32*4 68 | 69 | inputs_level1_center = points[:,0:opt.T_sample_num_level2,0:opt.INPUT_FEATURE_NUM].unsqueeze(2) # (B*F)*64*1*4 70 | inputs_level1[:,:,:,0:3] = inputs_level1[:,:,:,0:3] - inputs_level1_center[:,:,:,0:3].expand(cur_train_size*opt.framenum,opt.T_sample_num_level2,opt.T_knn_K2,3)# (B*F)*64*32*3 71 | if(1==1): 72 | dis_l=torch.mul(inputs_level1[:,:,:,0:3], inputs_level1[:,:,:,0:3]) 73 | 74 | dis_l=dis_l.sum(3).unsqueeze(3)#lx#The distance of each point from the origin in local coordinates(B*F)*64*32*1 75 | 76 | inputs_level1 = torch.cat((inputs_level1,dis_l),3).unsqueeze(1).transpose(1,4).squeeze(4) # (B*F)*4*C2en*32 77 | 78 | inputs_level1_center = inputs_level1_center.contiguous().view(cur_train_size,opt.framenum,opt.T_sample_num_level2,1,opt.INPUT_FEATURE_NUM).transpose(2,3).transpose(2,4) # (B*F)*4*64*1 79 | FEATURE_NUM = inputs_level1.shape[-3]#4 80 | inputs_level1=inputs_level1.view(cur_train_size,opt.framenum,FEATURE_NUM, opt.T_sample_num_level2, opt.T_knn_K2)#B*F*4*Cen*K 81 | return inputs_level1, inputs_level1_center -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | The paper corresponding to the code is . 2 | --------------------------------------------------------------------------------