├── moco ├── utils │ ├── __init__.py │ ├── tgcn.py │ └── graph.py ├── GRU.py └── builder_cmd.py ├── feeder ├── __init__.py ├── feeder_downstream.py ├── feeder_downstream_semi_supervised.py ├── feeder_pretraining_intra.py ├── augmentations.py └── feeder_pretraining_inter.py ├── .gitignore ├── images └── cmd.jpg ├── graph ├── __init__.py ├── tools.py ├── ntu_rgb_d.py └── kinetics.py ├── script_action_classification_cmd_transfer.sh ├── dataset.py ├── script_action_classification_cmd.sh ├── script_action_retrieval_cmd.sh ├── script_pretrain_moco_cmd.sh ├── data_gen ├── rotation.py ├── preprocess.py ├── pku_gendata.py ├── ntu_gendata.py └── resource │ ├── NTU_RGBD60_samples_with_missing_skeletons.txt │ └── NTU_RGBD120_samples_with_missing_skeletons.txt ├── README.md ├── script_action_classification_cmd_semi.sh ├── options ├── options_pretraining.py ├── options_retrieval.py └── options_classification.py ├── pretrain_moco_cmd.py ├── action_retrieval_cmd.py ├── action_classification_cmd.py └── action_classification_cmd_semi.py /moco/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /feeder/__init__.py: -------------------------------------------------------------------------------- 1 | from . import augmentations 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | 4 | data/ 5 | checkpoints/ -------------------------------------------------------------------------------- /images/cmd.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maoyunyao/CMD/HEAD/images/cmd.jpg -------------------------------------------------------------------------------- /graph/__init__.py: -------------------------------------------------------------------------------- 1 | from . import tools 2 | from . import ntu_rgb_d 3 | from . import kinetics 4 | -------------------------------------------------------------------------------- /graph/tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def edge2mat(link, num_node): 5 | A = np.zeros((num_node, num_node)) 6 | for i, j in link: 7 | A[j, i] = 1 8 | return A 9 | 10 | 11 | def normalize_digraph(A): 12 | Dl = np.sum(A, 0) 13 | h, w = A.shape 14 | Dn = np.zeros((w, w)) 15 | for i in range(w): 16 | if Dl[i] > 0: 17 | Dn[i, i] = Dl[i] ** (-1) 18 | AD = np.dot(A, Dn) 19 | return AD 20 | 21 | 22 | def get_spatial_graph(num_node, self_link, inward, outward): 23 | I = edge2mat(self_link, num_node) 24 | In = normalize_digraph(edge2mat(inward, num_node)) 25 | Out = normalize_digraph(edge2mat(outward, num_node)) 26 | A = np.stack((I, In, Out)) 27 | return A 28 | -------------------------------------------------------------------------------- /script_action_classification_cmd_transfer.sh: -------------------------------------------------------------------------------- 1 | # Semi with data ratio = 1.0 on PKU-MMD II 2 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \ 3 | --lr 0.01 \ 4 | --batch-size 64 \ 5 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_subject/checkpoint_0450.pth.tar \ 6 | --finetune-dataset pku_v2 \ 7 | --protocol cross_subject_semi \ 8 | --data-ratio 1.0 \ 9 | --finetune-skeleton-representation graph-based 10 | 11 | 12 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \ 13 | --lr 0.01 \ 14 | --batch-size 64 \ 15 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu120_cross_subject/checkpoint_0450.pth.tar \ 16 | --finetune-dataset pku_v2 \ 17 | --protocol cross_subject_semi \ 18 | --data-ratio 1.0 \ 19 | --finetune-skeleton-representation graph-based -------------------------------------------------------------------------------- /moco/utils/tgcn.py: -------------------------------------------------------------------------------- 1 | # The based unit of graph convolutional networks. 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | class ConvTemporalGraphical(nn.Module): 7 | def __init__(self, 8 | in_channels, 9 | out_channels, 10 | kernel_size, 11 | t_kernel_size=1, 12 | t_stride=1, 13 | t_padding=0, 14 | t_dilation=1, 15 | bias=True): 16 | super().__init__() 17 | 18 | self.kernel_size = kernel_size 19 | self.conv = nn.Conv2d( 20 | in_channels, 21 | out_channels * kernel_size, 22 | kernel_size=(t_kernel_size, 1), 23 | padding=(t_padding, 0), 24 | stride=(t_stride, 1), 25 | dilation=(t_dilation, 1), 26 | bias=bias) 27 | 28 | def forward(self, x, A): 29 | assert A.size(0) == self.kernel_size 30 | 31 | x = self.conv(x) 32 | 33 | n, kc, t, v = x.size() 34 | x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v) 35 | x = torch.einsum('nkctv,kvw->nctw', (x, A)) 36 | 37 | return x.contiguous(), A 38 | -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | 2 | def get_pretraining_set_intra(opts): 3 | 4 | from feeder.feeder_pretraining_intra import Feeder 5 | training_data = Feeder(**opts.train_feeder_args) 6 | 7 | return training_data 8 | 9 | def get_pretraining_set_inter(opts): 10 | 11 | from feeder.feeder_pretraining_inter import Feeder 12 | training_data = Feeder(**opts.train_feeder_args) 13 | 14 | return training_data 15 | 16 | def get_finetune_training_set(opts): 17 | 18 | from feeder.feeder_downstream import Feeder 19 | 20 | data = Feeder(**opts.train_feeder_args) 21 | 22 | return data 23 | 24 | def get_finetune_validation_set(opts): 25 | 26 | from feeder.feeder_downstream import Feeder 27 | data = Feeder(**opts.test_feeder_args) 28 | 29 | return data 30 | 31 | def get_finetune_training_set_semi_supervised(opts): 32 | 33 | from feeder.feeder_downstream_semi_supervised import Feeder 34 | 35 | data = Feeder(**opts.train_feeder_args) 36 | 37 | return data 38 | 39 | def get_finetune_validation_set_semi_supervised(opts): 40 | 41 | from feeder.feeder_downstream_semi_supervised import Feeder 42 | data = Feeder(**opts.test_feeder_args) 43 | 44 | return data 45 | -------------------------------------------------------------------------------- /script_action_classification_cmd.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd.py \ 2 | --lr 0.1 \ 3 | --batch-size 64 \ 4 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_subject/checkpoint_0450.pth.tar \ 5 | --finetune-dataset ntu60 --protocol cross_subject --finetune-skeleton-representation graph-based 6 | 7 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd.py \ 8 | --lr 0.1 \ 9 | --batch-size 64 \ 10 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_view/checkpoint_0450.pth.tar \ 11 | --finetune-dataset ntu60 --protocol cross_view --finetune-skeleton-representation graph-based 12 | 13 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd.py \ 14 | --lr 0.1 \ 15 | --batch-size 64 \ 16 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu120_cross_subject/checkpoint_0450.pth.tar \ 17 | --finetune-dataset ntu120 --protocol cross_subject --finetune-skeleton-representation graph-based 18 | 19 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd.py \ 20 | --lr 0.1 \ 21 | --batch-size 64 \ 22 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu120_cross_setup/checkpoint_0450.pth.tar \ 23 | --finetune-dataset ntu120 --protocol cross_setup --finetune-skeleton-representation graph-based 24 | -------------------------------------------------------------------------------- /script_action_retrieval_cmd.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python action_retrieval_cmd.py \ 2 | --lr 0.1 \ 3 | --batch-size 64 \ 4 | --knn-neighbours 1 \ 5 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_view/checkpoint_0450.pth.tar \ 6 | --finetune-dataset ntu60 --protocol cross_view --finetune-skeleton-representation graph-based 7 | 8 | 9 | CUDA_VISIBLE_DEVICES=0 python action_retrieval_cmd.py \ 10 | --lr 0.1 \ 11 | --batch-size 64 \ 12 | --knn-neighbours 1 \ 13 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_subject/checkpoint_0450.pth.tar \ 14 | --finetune-dataset ntu60 --protocol cross_subject --finetune-skeleton-representation graph-based 15 | 16 | 17 | CUDA_VISIBLE_DEVICES=0 python action_retrieval_cmd.py \ 18 | --lr 0.1 \ 19 | --batch-size 64 \ 20 | --knn-neighbours 1 \ 21 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu120_cross_subject/checkpoint_0450.pth.tar \ 22 | --finetune-dataset ntu120 --protocol cross_subject --finetune-skeleton-representation graph-based 23 | 24 | 25 | CUDA_VISIBLE_DEVICES=0 python action_retrieval_cmd.py \ 26 | --lr 0.1 \ 27 | --batch-size 64 \ 28 | --knn-neighbours 1 \ 29 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu120_cross_setup/checkpoint_0450.pth.tar \ 30 | --finetune-dataset ntu120 --protocol cross_setup --finetune-skeleton-representation graph-based 31 | -------------------------------------------------------------------------------- /graph/ntu_rgb_d.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.extend(['../']) 4 | from graph import tools 5 | 6 | num_node = 25 7 | self_link = [(i, i) for i in range(num_node)] 8 | inward_ori_index = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5), (7, 6), 9 | (8, 7), (9, 21), (10, 9), (11, 10), (12, 11), (13, 1), 10 | (14, 13), (15, 14), (16, 15), (17, 1), (18, 17), (19, 18), 11 | (20, 19), (22, 23), (23, 8), (24, 25), (25, 12)] 12 | inward = [(i - 1, j - 1) for (i, j) in inward_ori_index] 13 | outward = [(j, i) for (i, j) in inward] 14 | neighbor = inward + outward 15 | 16 | 17 | class Graph: 18 | def __init__(self, labeling_mode='spatial'): 19 | self.A = self.get_adjacency_matrix(labeling_mode) 20 | self.num_node = num_node 21 | self.self_link = self_link 22 | self.inward = inward 23 | self.outward = outward 24 | self.neighbor = neighbor 25 | 26 | def get_adjacency_matrix(self, labeling_mode=None): 27 | if labeling_mode is None: 28 | return self.A 29 | if labeling_mode == 'spatial': 30 | A = tools.get_spatial_graph(num_node, self_link, inward, outward) 31 | else: 32 | raise ValueError() 33 | return A 34 | 35 | 36 | if __name__ == '__main__': 37 | import matplotlib.pyplot as plt 38 | import os 39 | 40 | # os.environ['DISPLAY'] = 'localhost:11.0' 41 | A = Graph('spatial').get_adjacency_matrix() 42 | for i in A: 43 | plt.imshow(i, cmap='gray') 44 | plt.show() 45 | print(A) 46 | -------------------------------------------------------------------------------- /graph/kinetics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | 4 | sys.path.extend(['../']) 5 | from graph import tools 6 | import networkx as nx 7 | 8 | # Joint index: 9 | # {0, "Nose"} 10 | # {1, "Neck"}, 11 | # {2, "RShoulder"}, 12 | # {3, "RElbow"}, 13 | # {4, "RWrist"}, 14 | # {5, "LShoulder"}, 15 | # {6, "LElbow"}, 16 | # {7, "LWrist"}, 17 | # {8, "RHip"}, 18 | # {9, "RKnee"}, 19 | # {10, "RAnkle"}, 20 | # {11, "LHip"}, 21 | # {12, "LKnee"}, 22 | # {13, "LAnkle"}, 23 | # {14, "REye"}, 24 | # {15, "LEye"}, 25 | # {16, "REar"}, 26 | # {17, "LEar"}, 27 | 28 | # Edge format: (origin, neighbor) 29 | num_node = 18 30 | self_link = [(i, i) for i in range(num_node)] 31 | inward = [(4, 3), (3, 2), (7, 6), (6, 5), (13, 12), (12, 11), (10, 9), (9, 8), 32 | (11, 5), (8, 2), (5, 1), (2, 1), (0, 1), (15, 0), (14, 0), (17, 15), 33 | (16, 14)] 34 | outward = [(j, i) for (i, j) in inward] 35 | neighbor = inward + outward 36 | 37 | 38 | class Graph: 39 | def __init__(self, labeling_mode='spatial'): 40 | self.A = self.get_adjacency_matrix(labeling_mode) 41 | self.num_node = num_node 42 | self.self_link = self_link 43 | self.inward = inward 44 | self.outward = outward 45 | self.neighbor = neighbor 46 | 47 | def get_adjacency_matrix(self, labeling_mode=None): 48 | if labeling_mode is None: 49 | return self.A 50 | if labeling_mode == 'spatial': 51 | A = tools.get_spatial_graph(num_node, self_link, inward, outward) 52 | else: 53 | raise ValueError() 54 | return A 55 | 56 | 57 | if __name__ == '__main__': 58 | A = Graph('spatial').get_adjacency_matrix() 59 | print('') 60 | -------------------------------------------------------------------------------- /script_pretrain_moco_cmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0 python pretrain_moco_cmd.py \ 4 | --lr 0.01 \ 5 | --batch-size 64 \ 6 | --teacher-t 0.05 \ 7 | --student-t 0.1 \ 8 | --topk 8192 \ 9 | --mlp \ 10 | --contrast-t 0.07 \ 11 | --contrast-k 16384 \ 12 | --checkpoint-path ./checkpoints/pretrain_moco_cmd/ntu60_cross_subject \ 13 | --schedule 351 \ 14 | --epochs 451 \ 15 | --pre-dataset ntu60 \ 16 | --skeleton-representation graph-based \ 17 | --protocol cross_subject 18 | 19 | 20 | CUDA_VISIBLE_DEVICES=0 python pretrain_moco_cmd.py \ 21 | --lr 0.01 \ 22 | --batch-size 64 \ 23 | --teacher-t 0.05 \ 24 | --student-t 0.1 \ 25 | --topk 8192 \ 26 | --mlp \ 27 | --contrast-t 0.07 \ 28 | --contrast-k 16384 \ 29 | --checkpoint-path ./checkpoints/pretrain_moco_cmd/ntu60_cross_view \ 30 | --schedule 351 \ 31 | --epochs 451 \ 32 | --pre-dataset ntu60 \ 33 | --skeleton-representation graph-based \ 34 | --protocol cross_view 35 | 36 | 37 | CUDA_VISIBLE_DEVICES=0 python pretrain_moco_cmd.py \ 38 | --lr 0.01 \ 39 | --batch-size 64 \ 40 | --teacher-t 0.05 \ 41 | --student-t 0.1 \ 42 | --topk 8192 \ 43 | --mlp \ 44 | --contrast-t 0.07 \ 45 | --contrast-k 16384 \ 46 | --checkpoint-path ./checkpoints/pretrain_moco_cmd/ntu120_cross_subject \ 47 | --schedule 351 \ 48 | --epochs 451 \ 49 | --pre-dataset ntu120 \ 50 | --skeleton-representation graph-based \ 51 | --protocol cross_subject 52 | 53 | 54 | CUDA_VISIBLE_DEVICES=0 python pretrain_moco_cmd.py \ 55 | --lr 0.01 \ 56 | --batch-size 64 \ 57 | --teacher-t 0.05 \ 58 | --student-t 0.1 \ 59 | --topk 8192 \ 60 | --mlp \ 61 | --contrast-t 0.07 \ 62 | --contrast-k 16384 \ 63 | --checkpoint-path ./checkpoints/pretrain_moco_cmd/ntu60_cross_setup \ 64 | --schedule 351 \ 65 | --epochs 451 \ 66 | --pre-dataset ntu120 \ 67 | --skeleton-representation graph-based \ 68 | --protocol cross_setup 69 | 70 | 71 | CUDA_VISIBLE_DEVICES=0 python pretrain_moco_cmd.py \ 72 | --lr 0.01 \ 73 | --batch-size 64 \ 74 | --teacher-t 0.05 \ 75 | --student-t 0.1 \ 76 | --topk 8192 \ 77 | --mlp \ 78 | --contrast-t 0.07 \ 79 | --contrast-k 16384 \ 80 | --checkpoint-path ./checkpoints/pretrain_moco_cmd/pku_v2_cross_subject \ 81 | --schedule 801 \ 82 | --epochs 1001 \ 83 | --pre-dataset pku_v2 \ 84 | --skeleton-representation graph-based \ 85 | --protocol cross_subject -------------------------------------------------------------------------------- /data_gen/rotation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | 4 | 5 | def rotation_matrix(axis, theta): 6 | """ 7 | Return the rotation matrix associated with counterclockwise rotation about 8 | the given axis by theta radians. 9 | """ 10 | if np.abs(axis).sum() < 1e-6 or np.abs(theta) < 1e-6: 11 | return np.eye(3) 12 | axis = np.asarray(axis) 13 | axis = axis / math.sqrt(np.dot(axis, axis)) 14 | a = math.cos(theta / 2.0) 15 | b, c, d = -axis * math.sin(theta / 2.0) 16 | aa, bb, cc, dd = a * a, b * b, c * c, d * d 17 | bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d 18 | return np.array([[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)], 19 | [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)], 20 | [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]]) 21 | 22 | 23 | def unit_vector(vector): 24 | """ Returns the unit vector of the vector. """ 25 | return vector / np.linalg.norm(vector) 26 | 27 | 28 | def angle_between(v1, v2): 29 | """ Returns the angle in radians between vectors 'v1' and 'v2':: 30 | 31 | >>> angle_between((1, 0, 0), (0, 1, 0)) 32 | 1.5707963267948966 33 | >>> angle_between((1, 0, 0), (1, 0, 0)) 34 | 0.0 35 | >>> angle_between((1, 0, 0), (-1, 0, 0)) 36 | 3.141592653589793 37 | """ 38 | if np.abs(v1).sum() < 1e-6 or np.abs(v2).sum() < 1e-6: 39 | return 0 40 | v1_u = unit_vector(v1) 41 | v2_u = unit_vector(v2) 42 | return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0)) 43 | 44 | 45 | def x_rotation(vector, theta): 46 | """Rotates 3-D vector around x-axis""" 47 | R = np.array([[1, 0, 0], [0, np.cos(theta), -np.sin(theta)], [0, np.sin(theta), np.cos(theta)]]) 48 | return np.dot(R, vector) 49 | 50 | 51 | def y_rotation(vector, theta): 52 | """Rotates 3-D vector around y-axis""" 53 | R = np.array([[np.cos(theta), 0, np.sin(theta)], [0, 1, 0], [-np.sin(theta), 0, np.cos(theta)]]) 54 | return np.dot(R, vector) 55 | 56 | 57 | def z_rotation(vector, theta): 58 | """Rotates 3-D vector around z-axis""" 59 | R = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]]) 60 | return np.dot(R, vector) 61 | -------------------------------------------------------------------------------- /moco/GRU.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Dataset, DataLoader,SubsetRandomSampler 2 | from torch.nn.utils import clip_grad_norm_ 3 | from torch.nn.utils.rnn import pad_packed_sequence, pad_sequence, pack_padded_sequence 4 | from io import open 5 | import unicodedata 6 | import string 7 | import re 8 | import random 9 | import torch 10 | import torch.nn as nn 11 | 12 | from torch import optim 13 | import torch.nn.functional as F 14 | 15 | import numpy as np 16 | import math 17 | from torch.utils.data import random_split 18 | import torchvision 19 | 20 | 21 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 22 | 23 | class EncoderRNN(nn.Module): 24 | def __init__(self, input_size, hidden_size, num_layers): 25 | super(EncoderRNN, self).__init__() 26 | self.hidden_size = hidden_size 27 | self.gru = nn.GRU(input_size, hidden_size, num_layers=num_layers, bidirectional=True, batch_first=True) 28 | self.num_layers = num_layers 29 | 30 | def forward(self, input_tensor, seq_len): 31 | 32 | self.gru.flatten_parameters() 33 | 34 | encoder_hidden = torch.Tensor().to(device) 35 | 36 | for it in range(max(seq_len)): 37 | if it == 0: 38 | enout_tmp, hidden_tmp = self.gru(input_tensor[:, it:it+1, :]) 39 | else: 40 | enout_tmp, hidden_tmp = self.gru(input_tensor[:, it:it+1, :], hidden_tmp) 41 | encoder_hidden = torch.cat((encoder_hidden, enout_tmp),1) 42 | 43 | hidden = torch.empty((1, len(seq_len), encoder_hidden.shape[-1])).to(device) 44 | count = 0 45 | for ith_len in seq_len: 46 | hidden[0, count, :] = encoder_hidden[count, ith_len - 1, :] 47 | count += 1 48 | 49 | return hidden 50 | 51 | 52 | class BIGRU(nn.Module): 53 | def __init__(self, en_input_size, en_hidden_size, en_num_layers=3, num_class=60): 54 | super(BIGRU, self).__init__() 55 | self.en_num_layers = en_num_layers 56 | self.encoder = EncoderRNN(en_input_size, en_hidden_size, en_num_layers).to(device) 57 | self.fc = nn.Linear(2*en_hidden_size,num_class) 58 | 59 | self.input_norm = nn.BatchNorm1d(en_input_size) # 60 | 61 | self.en_input_size = en_input_size 62 | 63 | def forward(self, input_tensor, knn_eval=False): 64 | 65 | input_tensor = self.input_norm(input_tensor.permute(0,2,1).contiguous()).permute(0,2,1).contiguous() # BN 66 | 67 | seq_len = torch.zeros(input_tensor.size(0),dtype=int) + input_tensor.size(1) # list of input sequence lengths . 68 | 69 | encoder_hidden = self.encoder( 70 | input_tensor, seq_len) 71 | if knn_eval: # return last layer features during KNN evaluation (action retrieval) 72 | 73 | return encoder_hidden[0] 74 | else: 75 | out = self.fc(encoder_hidden[0]) 76 | return out -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CMD: Self-supervised 3D Action Representation Learning with Cross-modal Mutual Distillation 2 | [Yunyao Mao](http://home.ustc.edu.cn/~myy2016), [Wengang Zhou](http://staff.ustc.edu.cn/~zhwg/index.html), Zhenbo Lu, Jiajun Deng, and [Houqiang Li](http://staff.ustc.edu.cn/~lihq) 3 | 4 | ### Accepted by **ECCV 2022 (Oral)**. [[Paper Link]](https://arxiv.org/pdf/2208.12448.pdf) 5 | 6 | This repository includes Python (PyTorch) implementation of the CMD. 7 | 8 | ![](./images/cmd.jpg) 9 | 10 | ## Abstract 11 | In 3D action recognition, there exists rich complementary information between skeleton modalities. Nevertheless, how to model and 12 | utilize this information remains a challenging problem for self-supervised 13 | 3D action representation learning. In this work, we formulate the crossmodal interaction as a bidirectional knowledge distillation problem. Different from classic distillation solutions that transfer the knowledge of a fixed and pre-trained teacher to the student, in this work, the knowledge is continuously updated and bidirectionally distilled between modalities. To this end, we propose a new Cross-modal Mutual Distillation (CMD) framework with the following designs. On the one hand, the neighboring similarity distribution is introduced to model the knowledge learned in each modality, where the relational information is naturally suitable for the contrastive frameworks. On the other hand, asymmetrical configurations are used for teacher and student to stabilize the distillation process and to transfer high-confidence information between modalities. By derivation, we find that the cross-modal positive mining in previous works can be regarded as a degenerated version of our CMD. We perform extensive experiments on NTU RGB+D 60, NTU RGB+D 120, and PKU-MMD II datasets. Our approach outperforms existing self-supervised methods and sets a series of new records. 14 | 15 | ## Requirements 16 | 17 | ```bash 18 | python==3.8.13 19 | torch==1.8.1+cu111 20 | torchvision==0.9.1+cu111 21 | tensorboard==2.9.0 22 | scikit-learn==1.1.1 23 | tqdm==4.64.0 24 | numpy==1.22.4 25 | ``` 26 | 27 | ## Data Preprocessing 28 | Please refer to [skeleton-contrst](https://github.com/fmthoker/skeleton-contrast) 29 | 30 | ## Training and Testing 31 | Please refer to the bash scripts 32 | 33 | ## Pretrained Models 34 | NTU-60 and NTU-120: [pretrained_models](https://rec.ustc.edu.cn/share/5f6a5ee0-01dd-11ed-b9ae-8301ca6d3d37) 35 | 36 | ## Citation 37 | If you find this work useful for your research, please consider citing our work: 38 | ``` 39 | @inproceedings{Mao_2022_CMD, 40 | title={CMD: Self-supervised 3D Action Representation Learning with Cross-modal Mutual Distillation}, 41 | author={Mao, Yunyao and Zhou, Wengang and Lu, Zhenbo and Deng, Jiajun and Li, Houqiang}, 42 | booktitle={European Conference on Computer Vision (ECCV)}, 43 | year={2022} 44 | } 45 | ``` 46 | 47 | ## Acknowledgment 48 | The framework of our code is based on [skeleton-contrast](https://github.com/fmthoker/skeleton-contrast). 49 | -------------------------------------------------------------------------------- /feeder/feeder_downstream.py: -------------------------------------------------------------------------------- 1 | # sys 2 | import pickle 3 | 4 | # torch 5 | import torch 6 | from torch.autograd import Variable 7 | from torchvision import transforms 8 | import numpy as np 9 | np.set_printoptions(threshold=np.inf) 10 | 11 | try: 12 | from feeder import augmentations 13 | except: 14 | import augmentations 15 | 16 | 17 | class Feeder(torch.utils.data.Dataset): 18 | """ 19 | Arguments: 20 | data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M) 21 | """ 22 | 23 | def __init__(self, 24 | data_path, 25 | label_path, 26 | num_frame_path, 27 | l_ratio, 28 | input_size, 29 | input_representation, 30 | mmap=True): 31 | 32 | self.data_path = data_path 33 | self.label_path = label_path 34 | self.num_frame_path= num_frame_path 35 | self.input_size=input_size 36 | self.input_representation=input_representation 37 | self.l_ratio = l_ratio 38 | 39 | 40 | self.load_data(mmap) 41 | self.N, self.C, self.T, self.V, self.M = self.data.shape 42 | print(self.data.shape,len(self.number_of_frames),len(self.label)) 43 | print("l_ratio",self.l_ratio) 44 | 45 | def load_data(self, mmap): 46 | # data: N C V T M 47 | 48 | # load data 49 | if mmap: 50 | self.data = np.load(self.data_path, mmap_mode='r') 51 | else: 52 | self.data = np.load(self.data_path) 53 | 54 | # load num of valid frame length 55 | self.number_of_frames= np.load(self.num_frame_path) 56 | 57 | # load label 58 | if '.pkl' in self.label_path: 59 | with open(self.label_path, 'rb') as f: 60 | self.sample_name, self.label = pickle.load(f) 61 | elif '.npy' in self.label_path: 62 | self.label = np.load(self.label_path).tolist() 63 | 64 | def __len__(self): 65 | return self.N 66 | 67 | def __iter__(self): 68 | return self 69 | 70 | def __getitem__(self, index): 71 | 72 | # get raw input 73 | 74 | # input: C, T, V, M 75 | data_numpy = np.array(self.data[index]) 76 | 77 | # number_of_frames = self.number_of_frames[index] 78 | number_of_frames = min(self.number_of_frames[index], 300) # 300 is max_len, for pku-mmd 79 | 80 | label = self.label[index] 81 | 82 | # crop a sub-sequnce 83 | data_numpy = augmentations.crop_subsequence(data_numpy, number_of_frames, self.l_ratio, self.input_size) 84 | 85 | #input 86 | if self.input_representation == "seq-based": 87 | 88 | #sequence-based 89 | 90 | input_data = data_numpy.transpose(1,2,0,3) 91 | input_data = input_data.reshape(-1,150).astype('float32') 92 | return input_data, label 93 | 94 | elif self.input_representation == "graph-based" or self.input_representation == "image-based" : 95 | #graph-based or image-based 96 | 97 | input_data = data_numpy 98 | return input_data, label 99 | -------------------------------------------------------------------------------- /script_action_classification_cmd_semi.sh: -------------------------------------------------------------------------------- 1 | # Cross-view 2 | for((i=1;i<=5;i++)); 3 | do 4 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \ 5 | --lr 0.01 \ 6 | --batch-size 64 \ 7 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_view/checkpoint_0450.pth.tar \ 8 | --finetune-dataset ntu60 \ 9 | --protocol cross_view_semi \ 10 | --data-ratio 0.01 \ 11 | --finetune-skeleton-representation graph-based >> cmd_ntu60_cview_semi_0.01.txt 12 | done 13 | 14 | for((i=1;i<=5;i++)); 15 | do 16 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \ 17 | --lr 0.01 \ 18 | --batch-size 64 \ 19 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_view/checkpoint_0450.pth.tar \ 20 | --finetune-dataset ntu60 \ 21 | --protocol cross_view_semi \ 22 | --data-ratio 0.05 \ 23 | --finetune-skeleton-representation graph-based >> cmd_ntu60_cview_semi_0.05.txt 24 | done 25 | 26 | for((i=1;i<=5;i++)); 27 | do 28 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \ 29 | --lr 0.01 \ 30 | --batch-size 64 \ 31 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_view/checkpoint_0450.pth.tar \ 32 | --finetune-dataset ntu60 \ 33 | --protocol cross_view_semi \ 34 | --data-ratio 0.1 \ 35 | --finetune-skeleton-representation graph-based >> cmd_ntu60_cview_semi_0.1.txt 36 | done 37 | 38 | for((i=1;i<=5;i++)); 39 | do 40 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \ 41 | --lr 0.01 \ 42 | --batch-size 64 \ 43 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_view/checkpoint_0450.pth.tar \ 44 | --finetune-dataset ntu60 \ 45 | --protocol cross_view_semi \ 46 | --data-ratio 0.2 \ 47 | --finetune-skeleton-representation graph-based >> cmd_ntu60_cview_semi_0.2.txt 48 | done 49 | 50 | 51 | # Cross-subject 52 | for((i=1;i<=5;i++)); 53 | do 54 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \ 55 | --lr 0.01 \ 56 | --batch-size 64 \ 57 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_subject/checkpoint_0450.pth.tar \ 58 | --finetune-dataset ntu60 \ 59 | --protocol cross_subject_semi \ 60 | --data-ratio 0.01 \ 61 | --finetune-skeleton-representation graph-based >> cmd_ntu60_csub_semi_0.01.txt 62 | done 63 | 64 | for((i=1;i<=5;i++)); 65 | do 66 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \ 67 | --lr 0.01 \ 68 | --batch-size 64 \ 69 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_subject/checkpoint_0450.pth.tar \ 70 | --finetune-dataset ntu60 \ 71 | --protocol cross_subject_semi \ 72 | --data-ratio 0.05 \ 73 | --finetune-skeleton-representation graph-based >> cmd_ntu60_csub_semi_0.05.txt 74 | done 75 | 76 | for((i=1;i<=5;i++)); 77 | do 78 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \ 79 | --lr 0.01 \ 80 | --batch-size 64 \ 81 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_subject/checkpoint_0450.pth.tar \ 82 | --finetune-dataset ntu60 \ 83 | --protocol cross_subject_semi \ 84 | --data-ratio 0.1 \ 85 | --finetune-skeleton-representation graph-based >> cmd_ntu60_csub_semi_0.1.txt 86 | done 87 | 88 | for((i=1;i<=5;i++)); 89 | do 90 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \ 91 | --lr 0.01 \ 92 | --batch-size 64 \ 93 | --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_subject/checkpoint_0450.pth.tar \ 94 | --finetune-dataset ntu60 \ 95 | --protocol cross_subject_semi \ 96 | --data-ratio 0.2 \ 97 | --finetune-skeleton-representation graph-based >> cmd_ntu60_csub_semi_0.2.txt 98 | done -------------------------------------------------------------------------------- /data_gen/preprocess.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.extend(['../']) 4 | from data_gen.rotation import * 5 | from tqdm import tqdm 6 | 7 | 8 | def pre_normalization(data, zaxis=[0, 1], xaxis=[8, 4]): 9 | N, C, T, V, M = data.shape 10 | s = np.transpose(data, [0, 4, 2, 3, 1]) # N, C, T, V, M to N, M, T, V, C 11 | 12 | print('pad the null frames with the previous frames') 13 | for i_s, skeleton in enumerate(tqdm(s)): # pad 14 | if skeleton.sum() == 0: 15 | print(i_s, ' has no skeleton') 16 | for i_p, person in enumerate(skeleton): 17 | if person.sum() == 0: 18 | continue 19 | if person[0].sum() == 0: 20 | index = (person.sum(-1).sum(-1) != 0) 21 | tmp = person[index].copy() 22 | person *= 0 23 | person[:len(tmp)] = tmp 24 | for i_f, frame in enumerate(person): 25 | if frame.sum() == 0: 26 | if person[i_f:].sum() == 0: 27 | rest = len(person) - i_f 28 | num = int(np.ceil(rest / i_f)) 29 | pad = np.concatenate([person[0:i_f] for _ in range(num)], 0)[:rest] 30 | s[i_s, i_p, i_f:] = pad 31 | break 32 | 33 | print('sub the center joint #1 (spine joint in ntu and neck joint in kinetics)') 34 | for i_s, skeleton in enumerate(tqdm(s)): 35 | if skeleton.sum() == 0: 36 | continue 37 | main_body_center = skeleton[0][:, 1:2, :].copy() 38 | for i_p, person in enumerate(skeleton): 39 | if person.sum() == 0: 40 | continue 41 | mask = (person.sum(-1) != 0).reshape(T, V, 1) 42 | s[i_s, i_p] = (s[i_s, i_p] - main_body_center) * mask 43 | 44 | print('parallel the bone between hip(jpt 0) and spine(jpt 1) of the first person to the z axis') 45 | for i_s, skeleton in enumerate(tqdm(s)): 46 | if skeleton.sum() == 0: 47 | continue 48 | joint_bottom = skeleton[0, 0, zaxis[0]] 49 | joint_top = skeleton[0, 0, zaxis[1]] 50 | axis = np.cross(joint_top - joint_bottom, [0, 0, 1]) 51 | angle = angle_between(joint_top - joint_bottom, [0, 0, 1]) 52 | matrix_z = rotation_matrix(axis, angle) 53 | for i_p, person in enumerate(skeleton): 54 | if person.sum() == 0: 55 | continue 56 | for i_f, frame in enumerate(person): 57 | if frame.sum() == 0: 58 | continue 59 | for i_j, joint in enumerate(frame): 60 | s[i_s, i_p, i_f, i_j] = np.dot(matrix_z, joint) 61 | 62 | print( 63 | 'parallel the bone between right shoulder(jpt 8) and left shoulder(jpt 4) of the first person to the x axis') 64 | for i_s, skeleton in enumerate(tqdm(s)): 65 | if skeleton.sum() == 0: 66 | continue 67 | joint_rshoulder = skeleton[0, 0, xaxis[0]] 68 | joint_lshoulder = skeleton[0, 0, xaxis[1]] 69 | axis = np.cross(joint_rshoulder - joint_lshoulder, [1, 0, 0]) 70 | angle = angle_between(joint_rshoulder - joint_lshoulder, [1, 0, 0]) 71 | matrix_x = rotation_matrix(axis, angle) 72 | for i_p, person in enumerate(skeleton): 73 | if person.sum() == 0: 74 | continue 75 | for i_f, frame in enumerate(person): 76 | if frame.sum() == 0: 77 | continue 78 | for i_j, joint in enumerate(frame): 79 | s[i_s, i_p, i_f, i_j] = np.dot(matrix_x, joint) 80 | 81 | data = np.transpose(s, [0, 4, 2, 3, 1]) 82 | return data 83 | 84 | 85 | if __name__ == '__main__': 86 | data = np.load('../data/ntu/xview/val_data.npy') 87 | pre_normalization(data) 88 | np.save('../data/ntu/xview/data_val_pre.npy', data) 89 | -------------------------------------------------------------------------------- /feeder/feeder_downstream_semi_supervised.py: -------------------------------------------------------------------------------- 1 | # sys 2 | import pickle 3 | 4 | # torch 5 | import torch 6 | from torch.autograd import Variable 7 | from torchvision import transforms 8 | import numpy as np 9 | np.set_printoptions(threshold=np.inf) 10 | 11 | try: 12 | from feeder import augmentations 13 | except: 14 | import augmentations 15 | 16 | 17 | class Feeder(torch.utils.data.Dataset): 18 | """ 19 | Arguments: 20 | data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M) 21 | """ 22 | 23 | def __init__(self, 24 | data_path, 25 | label_path, 26 | num_frame_path, 27 | l_ratio, 28 | input_size, 29 | input_representation, 30 | data_ratio=None, 31 | mmap=True): 32 | 33 | self.data_path = data_path 34 | self.label_path = label_path 35 | self.num_frame_path = num_frame_path 36 | self.input_size = input_size 37 | self.input_representation = input_representation 38 | self.l_ratio = l_ratio 39 | 40 | self.load_data(mmap) 41 | self.N, self.C, self.T, self.V, self.M = self.data.shape 42 | 43 | if data_ratio is not None: 44 | self.random_select_data(data_ratio) 45 | 46 | print(self.data.shape, len(self.number_of_frames), len(self.label)) 47 | print("l_ratio", self.l_ratio) 48 | 49 | def load_data(self, mmap): 50 | # data: N C V T M 51 | 52 | # load data 53 | if mmap: 54 | self.data = np.load(self.data_path, mmap_mode='r') 55 | else: 56 | self.data = np.load(self.data_path) 57 | 58 | # load num of valid frame length 59 | self.number_of_frames = np.load(self.num_frame_path) 60 | 61 | # load label 62 | if '.pkl' in self.label_path: 63 | with open(self.label_path, 'rb') as f: 64 | self.sample_name, self.label = pickle.load(f) 65 | elif '.npy' in self.label_path: 66 | self.label = np.load(self.label_path).tolist() 67 | 68 | self.label = np.array(self.label) 69 | 70 | def random_select_data(self, data_ratio): 71 | idx = np.arange(self.N) 72 | np.random.shuffle(idx) 73 | 74 | N_used = int(self.N * data_ratio) 75 | idx_used = idx[ :N_used] 76 | 77 | self.N = N_used 78 | self.data = self.data[idx_used] 79 | self.label = self.label[idx_used] 80 | self.number_of_frames = self.number_of_frames[idx_used] 81 | 82 | def __len__(self): 83 | return self.N 84 | 85 | def __iter__(self): 86 | return self 87 | 88 | def __getitem__(self, index): 89 | 90 | # get raw input 91 | 92 | # input: C, T, V, M 93 | data_numpy = np.array(self.data[index]) 94 | 95 | # number_of_frames = self.number_of_frames[index] 96 | # 300 is max_len, for pku-mmd 97 | number_of_frames = min(self.number_of_frames[index], 300) 98 | 99 | label = self.label[index] 100 | 101 | # crop a sub-sequnce 102 | data_numpy = augmentations.crop_subsequence( 103 | data_numpy, number_of_frames, self.l_ratio, self.input_size) 104 | 105 | # input 106 | if self.input_representation == "seq-based": 107 | 108 | # sequence-based 109 | 110 | input_data = data_numpy.transpose(1, 2, 0, 3) 111 | input_data = input_data.reshape(-1, 150).astype('float32') 112 | return input_data, label 113 | 114 | elif self.input_representation == "graph-based" or self.input_representation == "image-based": 115 | #graph-based or image-based 116 | 117 | input_data = data_numpy 118 | return input_data, label 119 | -------------------------------------------------------------------------------- /options/options_pretraining.py: -------------------------------------------------------------------------------- 1 | # graph based model arguments 2 | agcn_model_arguments = { 3 | "num_class": 128, 4 | "num_point": 25, 5 | "num_person": 2, 6 | 'graph_args': { 7 | 'labeling_mode': 'spatial'} 8 | } 9 | 10 | #image based model arguments 11 | hcn_model_arguments = { 12 | "in_channel":3, 13 | "out_channel":64, 14 | "window_size":64, 15 | "num_joint":25, 16 | "num_person":2, 17 | "num_class":128 18 | } 19 | 20 | #Sequence based model arguments 21 | bi_gru_model_arguments = { 22 | "en_input_size":150, 23 | "en_hidden_size":1024, 24 | "en_num_layers":3, 25 | "num_class":128 26 | } 27 | 28 | 29 | class opts_ntu_60_cross_view(): 30 | 31 | def __init__(self): 32 | 33 | self.agcn_model_args = agcn_model_arguments 34 | 35 | self.hcn_model_args = hcn_model_arguments 36 | 37 | self.bi_gru_model_args = bi_gru_model_arguments 38 | 39 | # feeder 40 | self.train_feeder_args = { 41 | 'data_path': './data/NTU-RGB-D-60-AGCN/xview/train_data_joint.npy', 42 | 'num_frame_path': './data/NTU-RGB-D-60-AGCN/xview/train_num_frame.npy', 43 | 'l_ratio': [0.1,1], 44 | 'input_size': 64 45 | } 46 | 47 | class opts_ntu_60_cross_subject(): 48 | 49 | def __init__(self): 50 | 51 | self.agcn_model_args = agcn_model_arguments 52 | 53 | self.hcn_model_args = hcn_model_arguments 54 | 55 | self.bi_gru_model_args = bi_gru_model_arguments 56 | 57 | # feeder 58 | self.train_feeder_args = { 59 | 'data_path': './data/NTU-RGB-D-60-AGCN/xsub/train_data_joint.npy', 60 | 'num_frame_path': './data/NTU-RGB-D-60-AGCN/xsub/train_num_frame.npy', 61 | 'l_ratio': [0.1,1], 62 | 'input_size': 64 63 | } 64 | 65 | class opts_ntu_120_cross_subject(): 66 | 67 | def __init__(self): 68 | 69 | self.agcn_model_args = agcn_model_arguments 70 | 71 | self.hcn_model_args = hcn_model_arguments 72 | 73 | self.bi_gru_model_args = bi_gru_model_arguments 74 | 75 | # feeder 76 | self.train_feeder_args = { 77 | 'data_path': './data/NTU-RGB-D-120-AGCN/xsub/train_data_joint.npy', 78 | 'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsub/train_num_frame.npy', 79 | 'l_ratio': [0.1,1], 80 | 'input_size': 64 81 | } 82 | 83 | class opts_ntu_120_cross_setup(): 84 | 85 | def __init__(self): 86 | 87 | self.agcn_model_args = agcn_model_arguments 88 | 89 | self.hcn_model_args = hcn_model_arguments 90 | 91 | self.bi_gru_model_args = bi_gru_model_arguments 92 | 93 | # feeder 94 | self.train_feeder_args = { 95 | 'data_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_data_joint.npy', 96 | 'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_num_frame.npy', 97 | 'l_ratio': [0.1,1], 98 | 'input_size': 64 99 | } 100 | 101 | 102 | # PKU-MMD 103 | class opts_pku_v2_cross_view(): 104 | 105 | def __init__(self): 106 | 107 | self.agcn_model_args = agcn_model_arguments 108 | 109 | self.hcn_model_args = hcn_model_arguments 110 | 111 | self.bi_gru_model_args = bi_gru_model_arguments 112 | 113 | # feeder 114 | self.train_feeder_args = { 115 | 'data_path': './data/PKU-MMD-v2-AGCN/xview/train_data_joint.npy', 116 | 'num_frame_path': './data/PKU-MMD-v2-AGCN/xview/train_num_frame.npy', 117 | 'l_ratio': [0.1,1], 118 | 'input_size': 64 119 | } 120 | 121 | 122 | class opts_pku_v2_cross_subject(): 123 | 124 | def __init__(self): 125 | 126 | self.agcn_model_args = agcn_model_arguments 127 | 128 | self.hcn_model_args = hcn_model_arguments 129 | 130 | self.bi_gru_model_args = bi_gru_model_arguments 131 | 132 | # feeder 133 | self.train_feeder_args = { 134 | 'data_path': './data/PKU-MMD-v2-AGCN/xsub/train_data_joint.npy', 135 | 'num_frame_path': './data/PKU-MMD-v2-AGCN/xsub/train_num_frame.npy', 136 | 'l_ratio': [0.1,1], 137 | 'input_size': 64 138 | } -------------------------------------------------------------------------------- /data_gen/pku_gendata.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pickle 3 | from tqdm import tqdm 4 | import sys 5 | from numpy.lib.format import open_memmap 6 | 7 | sys.path.extend(['../']) 8 | from data_gen.preprocess import pre_normalization 9 | 10 | max_body_true = 2 11 | max_body_kinect = 4 12 | num_joint = 25 13 | max_frame = 300 14 | 15 | import numpy as np 16 | import os 17 | 18 | def read_data(data_path, name, max_body=4, num_joint=25): # top 2 body 19 | filename, action_idx = name.split('_') 20 | action_idx = int(action_idx) 21 | seq_data = np.loadtxt('{}/skeleton/{}'.format(data_path, filename)) 22 | label = np.loadtxt('{}/label/{}'.format(data_path, filename), delimiter=',') 23 | start, end = int(label[action_idx][1]), int(label[action_idx][2]) 24 | 25 | data = seq_data[start: end, :] # num_frames * 150 26 | data = data.reshape(data.shape[0], 2, 25, 3) # num_frame, num_body, num_joint, xyz 27 | data = data.transpose(3, 0, 2, 1) # xyz, num_frame, num_joint, num_body 28 | return data 29 | 30 | def gendata(data_path, out_path, benchmark='xview', part='eval'): 31 | # Read cross_subject_v2.txt and cross_view_v2.txt to obtain training_views training_subjects 32 | with open('{}/cross_view_v2.txt'.format(data_path), 'r') as f: 33 | lines = f.readlines() 34 | training_views = lines[1].strip('\n').split(', ') 35 | with open('{}/cross_subject_v2.txt'.format(data_path), 'r') as f: 36 | lines = f.readlines() 37 | training_subjects = lines[1].strip('\n').split(', ') 38 | 39 | 40 | sample_name = [] 41 | sample_label = [] 42 | for filename in os.listdir('{}/skeleton'.format(data_path)): 43 | if benchmark == 'xview': 44 | istraining = (filename[:-4] in training_views) 45 | elif benchmark == 'xsub': 46 | istraining = (filename[:-4] in training_subjects) 47 | else: 48 | raise ValueError() 49 | 50 | if part == 'train': 51 | issample = istraining 52 | elif part == 'val': 53 | issample = not (istraining) 54 | else: 55 | raise ValueError() 56 | 57 | if issample: 58 | label = np.loadtxt('{}/label/{}'.format(data_path, filename), delimiter=',') 59 | for idx in range(label.shape[0]): 60 | sample_name.append('{}_{}'.format(filename, str(idx))) 61 | sample_label.append(label[idx][0] - 1) 62 | 63 | with open('{}/{}_label.pkl'.format(out_path, part), 'wb') as f: 64 | pickle.dump((sample_name, list(sample_label)), f) 65 | 66 | fl = open_memmap( 67 | '{}/{}_num_frame.npy'.format(out_path, part), 68 | dtype='int', 69 | mode='w+', 70 | shape=(len(sample_label),)) 71 | 72 | fp = np.zeros((len(sample_label), 3, max_frame, num_joint, max_body_true), dtype=np.float32) 73 | 74 | for i, s in enumerate(tqdm(sample_name)): 75 | data = read_data(data_path, s, max_body=max_body_kinect, num_joint=num_joint) 76 | fp[i, :, 0:min(data.shape[1], max_frame), :, :] = data[:, 0:min(data.shape[1], max_frame), :, :] # num_frame 太大会截断! 77 | fl[i] = data.shape[1] # num_frame 78 | 79 | fp = pre_normalization(fp) 80 | np.save('{}/{}_data_joint.npy'.format(out_path, part), fp) 81 | 82 | 83 | if __name__ == '__main__': 84 | parser = argparse.ArgumentParser(description='PKU-MMD-v2 Data Converter.') 85 | 86 | parser.add_argument('--data_path', default='/data/user/dataset/PKU-MMD/v2/') 87 | parser.add_argument('--out_folder', default='../data/PKU-MMD-v2-AGCN/') 88 | benchmark = ['xsub','xview', ] 89 | 90 | part = ['train', 'val'] 91 | arg = parser.parse_args() 92 | 93 | for b in benchmark: 94 | for p in part: 95 | out_path = os.path.join(arg.out_folder, b) 96 | if not os.path.exists(out_path): 97 | os.makedirs(out_path) 98 | print(b, p) 99 | gendata( 100 | arg.data_path, 101 | out_path, 102 | benchmark=b, 103 | part=p) 104 | -------------------------------------------------------------------------------- /feeder/feeder_pretraining_intra.py: -------------------------------------------------------------------------------- 1 | # sys 2 | import pickle 3 | 4 | # torch 5 | import torch 6 | from torch.autograd import Variable 7 | from torchvision import transforms 8 | import numpy as np 9 | np.set_printoptions(threshold=np.inf) 10 | import random 11 | 12 | try: 13 | from feeder import augmentations 14 | except: 15 | import augmentations 16 | 17 | 18 | class Feeder(torch.utils.data.Dataset): 19 | """ 20 | Arguments: 21 | data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M) 22 | """ 23 | 24 | def __init__(self, 25 | data_path, 26 | num_frame_path, 27 | l_ratio, 28 | input_size, 29 | input_representation, 30 | mmap=True): 31 | 32 | self.data_path = data_path 33 | self.num_frame_path= num_frame_path 34 | self.input_size=input_size 35 | self.input_representation=input_representation 36 | self.crop_resize =True 37 | self.l_ratio = l_ratio 38 | 39 | 40 | self.load_data(mmap) 41 | self.N, self.C, self.T, self.V, self.M = self.data.shape 42 | print(self.data.shape,len(self.number_of_frames)) 43 | print("l_ratio",self.l_ratio) 44 | 45 | def load_data(self, mmap): 46 | # data: N C V T M 47 | 48 | # load data 49 | if mmap: 50 | self.data = np.load(self.data_path, mmap_mode='r') 51 | else: 52 | self.data = np.load(self.data_path) 53 | 54 | # load num of valid frame length 55 | self.number_of_frames= np.load(self.num_frame_path) 56 | 57 | def __len__(self): 58 | return self.N 59 | 60 | def __iter__(self): 61 | return self 62 | 63 | def __getitem__(self, index): 64 | 65 | # get raw input 66 | 67 | # input: C, T, V, M 68 | data_numpy = np.array(self.data[index]) 69 | number_of_frames = min(self.number_of_frames[index], 300) # 300 is max_len, for pku-mmd 70 | 71 | # apply spatio-temporal augmentations to generate view 1 72 | 73 | # temporal crop-resize 74 | data_numpy_v1_crop = augmentations.temporal_cropresize(data_numpy, number_of_frames, self.l_ratio, self.input_size) 75 | 76 | 77 | # randomly select one of the spatial augmentations 78 | flip_prob = random.random() 79 | if flip_prob < 0.5: 80 | data_numpy_v1 = augmentations.joint_courruption(data_numpy_v1_crop) 81 | else: 82 | data_numpy_v1 = augmentations.pose_augmentation(data_numpy_v1_crop) 83 | 84 | 85 | # apply spatio-temporal augmentations to generate view 2 86 | 87 | # temporal crop-resize 88 | data_numpy_v2_crop = augmentations.temporal_cropresize(data_numpy,number_of_frames, self.l_ratio, self.input_size) 89 | 90 | # randomly select one of the spatial augmentations 91 | flip_prob = random.random() 92 | if flip_prob < 0.5: 93 | data_numpy_v2 = augmentations.joint_courruption(data_numpy_v2_crop) 94 | else: 95 | data_numpy_v2 = augmentations.pose_augmentation(data_numpy_v2_crop) 96 | 97 | 98 | # convert augmented views into input formats based on skeleton-representations 99 | if self.input_representation == "seq-based" or self.input_representation == "trans-based": 100 | 101 | #Input for sequence-based representation 102 | # two person input ---> shpae (64 X 150) 103 | 104 | #View 1 105 | input_v1 = data_numpy_v1.transpose(1,2,0,3) 106 | input_v1 = input_v1.reshape(-1,150).astype('float32') 107 | 108 | #View 2 109 | input_v2 = data_numpy_v2.transpose(1,2,0,3) 110 | input_v2 = input_v2.reshape(-1,150).astype('float32') 111 | 112 | return input_v1, input_v2 113 | 114 | elif self.input_representation == "graph-based" or self.input_representation == "image-based": 115 | 116 | #input for graph-based or image-based representation 117 | # two person input ---> shape (3, 64, 25, 2) 118 | 119 | #View 1 120 | input_v1 = data_numpy_v1.astype('float32') 121 | #View 2 122 | input_v2 = data_numpy_v2.astype('float32') 123 | 124 | return input_v1, input_v2 125 | -------------------------------------------------------------------------------- /feeder/augmentations.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | import torch 3 | import random 4 | import numpy as np 5 | 6 | 7 | def joint_courruption(input_data): 8 | 9 | out = input_data.copy() 10 | 11 | flip_prob = random.random() 12 | 13 | if flip_prob < 0.5: 14 | 15 | #joint_indicies = np.random.choice(25, random.randint(5, 10), replace=False) 16 | joint_indicies = np.random.choice(25, 15,replace=False) 17 | out[:,:,joint_indicies,:] = 0 18 | return out 19 | 20 | else: 21 | #joint_indicies = np.random.choice(25, random.randint(5, 10), replace=False) 22 | joint_indicies = np.random.choice(25, 15,replace=False) 23 | 24 | temp = out[:,:,joint_indicies,:] 25 | Corruption = np.array([ 26 | [random.uniform(-1, 1), random.uniform(-1, 1), random.uniform(-1, 1)], 27 | [random.uniform(-1, 1), random.uniform(-1, 1), random.uniform(-1, 1)], 28 | [random.uniform(-1, 1), random.uniform(-1, 1), random.uniform(-1, 1)] ]) 29 | temp = np.dot(temp.transpose([1, 2, 3, 0]), Corruption) 30 | temp = temp.transpose(3, 0, 1, 2) 31 | out[:,:,joint_indicies,:] = temp 32 | return out 33 | 34 | 35 | 36 | def pose_augmentation(input_data): 37 | 38 | 39 | Shear = np.array([ 40 | [1, random.uniform(-1, 1), random.uniform(-1, 1)], 41 | [random.uniform(-1, 1), 1, random.uniform(-1, 1)], 42 | [random.uniform(-1, 1), random.uniform(-1, 1), 1] 43 | ]) 44 | 45 | temp_data = input_data.copy() 46 | result = np.dot(temp_data.transpose([1, 2, 3, 0]),Shear.transpose()) 47 | output = result.transpose(3, 0, 1, 2) 48 | 49 | return output 50 | 51 | def temporal_cropresize(input_data,num_of_frames,l_ratio,output_size): 52 | 53 | 54 | C, T, V, M =input_data.shape 55 | 56 | # Temporal crop 57 | min_crop_length = 64 58 | 59 | scale = np.random.rand(1)*(l_ratio[1]-l_ratio[0])+l_ratio[0] 60 | temporal_crop_length = np.minimum(np.maximum(int(np.floor(num_of_frames*scale)),min_crop_length),num_of_frames) 61 | 62 | start = np.random.randint(0,num_of_frames-temporal_crop_length+1) 63 | temporal_context = input_data[:,start:start+temporal_crop_length, :, :] 64 | 65 | # interpolate 66 | temporal_context = torch.tensor(temporal_context,dtype=torch.float) 67 | temporal_context=temporal_context.permute(0, 2, 3, 1).contiguous().view(C * V * M,temporal_crop_length) 68 | temporal_context=temporal_context[None, :, :, None] 69 | temporal_context= F.interpolate(temporal_context, size=(output_size, 1), mode='bilinear',align_corners=False) 70 | temporal_context = temporal_context.squeeze(dim=3).squeeze(dim=0) 71 | temporal_context=temporal_context.contiguous().view(C, V, M, output_size).permute(0, 3, 1, 2).contiguous().numpy() 72 | 73 | return temporal_context 74 | 75 | def crop_subsequence(input_data,num_of_frames,l_ratio,output_size): 76 | 77 | 78 | C, T, V, M =input_data.shape 79 | 80 | if l_ratio[0] == 0.5: 81 | # if training , sample a random crop 82 | 83 | min_crop_length = 64 84 | scale = np.random.rand(1)*(l_ratio[1]-l_ratio[0])+l_ratio[0] 85 | temporal_crop_length = np.minimum(np.maximum(int(np.floor(num_of_frames*scale)),min_crop_length),num_of_frames) 86 | 87 | start = np.random.randint(0,num_of_frames-temporal_crop_length+1) 88 | temporal_crop = input_data[:,start:start+temporal_crop_length, :, :] 89 | 90 | temporal_crop= torch.tensor(temporal_crop,dtype=torch.float) 91 | temporal_crop=temporal_crop.permute(0, 2, 3, 1).contiguous().view(C * V * M,temporal_crop_length) 92 | temporal_crop=temporal_crop[None, :, :, None] 93 | temporal_crop= F.interpolate(temporal_crop, size=(output_size, 1), mode='bilinear',align_corners=False) 94 | temporal_crop=temporal_crop.squeeze(dim=3).squeeze(dim=0) 95 | temporal_crop=temporal_crop.contiguous().view(C, V, M, output_size).permute(0, 3, 1, 2).contiguous().numpy() 96 | 97 | return temporal_crop 98 | 99 | else: 100 | # if testing , sample a center crop 101 | 102 | start = int((1-l_ratio[0]) * num_of_frames/2) 103 | data =input_data[:,start:num_of_frames-start, :, :] 104 | temporal_crop_length = data.shape[1] 105 | 106 | temporal_crop= torch.tensor(data,dtype=torch.float) 107 | temporal_crop=temporal_crop.permute(0, 2, 3, 1).contiguous().view(C * V * M,temporal_crop_length) 108 | temporal_crop=temporal_crop[None, :, :, None] 109 | temporal_crop= F.interpolate(temporal_crop, size=(output_size, 1), mode='bilinear',align_corners=False) 110 | temporal_crop=temporal_crop.squeeze(dim=3).squeeze(dim=0) 111 | temporal_crop=temporal_crop.contiguous().view(C, V, M, output_size).permute(0, 3, 1, 2).contiguous().numpy() 112 | 113 | return temporal_crop 114 | -------------------------------------------------------------------------------- /options/options_retrieval.py: -------------------------------------------------------------------------------- 1 | class opts_ntu_60_cross_view(): 2 | 3 | def __init__(self): 4 | 5 | # graph based model 6 | self.agcn_model_args = { 7 | "num_class": 60, 8 | "num_point": 25, 9 | "num_person": 2, 10 | 'graph_args': { 11 | 'labeling_mode': 'spatial'} 12 | } 13 | 14 | #image based model 15 | self.hcn_model_args = { 16 | "in_channel":3, 17 | "out_channel":64, 18 | "window_size":64, 19 | "num_joint":25, 20 | "num_person":2, 21 | "num_class":60 22 | } 23 | 24 | #Sequence based model 25 | self.bi_gru_model_args = { 26 | "en_input_size":150, 27 | "en_hidden_size":1024, 28 | "en_num_layers":3, 29 | "num_class":60 30 | } 31 | 32 | # feeder 33 | self.train_feeder_args = { 34 | 'data_path': './data/NTU-RGB-D-60-AGCN/xview/train_data_joint.npy', 35 | 'label_path': './data/NTU-RGB-D-60-AGCN/xview/train_label.pkl', 36 | 'num_frame_path': './data/NTU-RGB-D-60-AGCN/xview/train_num_frame.npy', 37 | 'l_ratio': [0.95], 38 | 'input_size': 64 39 | } 40 | 41 | self.test_feeder_args = { 42 | 43 | 'data_path': './data/NTU-RGB-D-60-AGCN/xview/val_data_joint.npy', 44 | 'label_path': './data/NTU-RGB-D-60-AGCN/xview/val_label.pkl', 45 | 'num_frame_path': './data/NTU-RGB-D-60-AGCN/xview/val_num_frame.npy', 46 | 'l_ratio': [0.95], 47 | 'input_size': 64 48 | } 49 | 50 | class opts_ntu_60_cross_subject(): 51 | 52 | def __init__(self): 53 | 54 | # graph based model 55 | self.agcn_model_args = { 56 | "num_class": 60, 57 | "num_point": 25, 58 | "num_person": 2, 59 | 'graph_args': { 60 | 'labeling_mode': 'spatial'} 61 | } 62 | 63 | #image based model 64 | self.hcn_model_args = { 65 | "in_channel":3, 66 | "out_channel":64, 67 | "window_size":64, 68 | "num_joint":25, 69 | "num_person":2, 70 | "num_class":60 71 | } 72 | 73 | #Sequence based model 74 | self.bi_gru_model_args = { 75 | "en_input_size":150, 76 | "en_hidden_size":1024, 77 | "en_num_layers":3, 78 | "num_class":60 79 | } 80 | 81 | # feeder 82 | self.train_feeder_args = { 83 | 'data_path': './data/NTU-RGB-D-60-AGCN/xsub/train_data_joint.npy', 84 | 'label_path': './data/NTU-RGB-D-60-AGCN/xsub/train_label.pkl', 85 | 'num_frame_path': './data/NTU-RGB-D-60-AGCN/xsub/train_num_frame.npy', 86 | 'l_ratio': [0.95], 87 | 'input_size': 64 88 | } 89 | 90 | self.test_feeder_args = { 91 | 92 | 'data_path': './data/NTU-RGB-D-60-AGCN/xsub/val_data_joint.npy', 93 | 'label_path': './data/NTU-RGB-D-60-AGCN/xsub/val_label.pkl', 94 | 'num_frame_path': './data/NTU-RGB-D-60-AGCN/xsub/val_num_frame.npy', 95 | 'l_ratio': [0.95], 96 | 'input_size': 64 97 | } 98 | 99 | class opts_ntu_120_cross_subject(): 100 | def __init__(self): 101 | 102 | # graph based model 103 | self.agcn_model_args = { 104 | "num_class": 120, 105 | "num_point": 25, 106 | "num_person": 2, 107 | 'graph_args': { 108 | 'labeling_mode': 'spatial'} 109 | } 110 | 111 | #image based model 112 | self.hcn_model_args = { 113 | "in_channel":3, 114 | "out_channel":64, 115 | "window_size":64, 116 | "num_joint":25, 117 | "num_person":2, 118 | "num_class":120 119 | } 120 | 121 | #Sequence based model 122 | self.bi_gru_model_args = { 123 | "en_input_size":150, 124 | "en_hidden_size":1024, 125 | "en_num_layers":3, 126 | "num_class":120 127 | } 128 | 129 | # feeder 130 | self.train_feeder_args = { 131 | 'data_path': './data/NTU-RGB-D-120-AGCN/xsub/train_data_joint.npy', 132 | 'label_path': './data/NTU-RGB-D-120-AGCN/xsub/train_label.pkl', 133 | 'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsub/train_num_frame.npy', 134 | 'l_ratio': [0.95], 135 | 'input_size': 64 136 | } 137 | 138 | self.test_feeder_args = { 139 | 140 | 'data_path': './data/NTU-RGB-D-120-AGCN/xsub/val_data_joint.npy', 141 | 'label_path': './data/NTU-RGB-D-120-AGCN/xsub/val_label.pkl', 142 | 'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsub/val_num_frame.npy', 143 | 'l_ratio': [0.95], 144 | 'input_size': 64 145 | } 146 | 147 | class opts_ntu_120_cross_setup(): 148 | 149 | def __init__(self): 150 | 151 | # graph based model 152 | self.agcn_model_args = { 153 | "num_class": 120, 154 | "num_point": 25, 155 | "num_person": 2, 156 | 'graph_args': { 157 | 'labeling_mode': 'spatial'} 158 | } 159 | 160 | #image based model 161 | self.hcn_model_args = { 162 | "in_channel":3, 163 | "out_channel":64, 164 | "window_size":64, 165 | "num_joint":25, 166 | "num_person":2, 167 | "num_class":120 168 | } 169 | 170 | #Sequence based model 171 | self.bi_gru_model_args = { 172 | "en_input_size":150, 173 | "en_hidden_size":1024, 174 | "en_num_layers":3, 175 | "num_class":120 176 | } 177 | 178 | # feeder 179 | self.train_feeder_args = { 180 | 'data_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_data_joint.npy', 181 | 'label_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_label.pkl', 182 | 'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_num_frame.npy', 183 | 'l_ratio': [0.95], 184 | 'input_size': 64 185 | } 186 | 187 | self.test_feeder_args = { 188 | 189 | 'data_path': './data/NTU-RGB-D-120-AGCN/xsetup/val_data_joint.npy', 190 | 'label_path': './data/NTU-RGB-D-120-AGCN/xsetup/val_label.pkl', 191 | 'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsetup/val_num_frame.npy', 192 | 'l_ratio': [0.95], 193 | 'input_size': 64 194 | } 195 | -------------------------------------------------------------------------------- /feeder/feeder_pretraining_inter.py: -------------------------------------------------------------------------------- 1 | # sys 2 | import pickle 3 | 4 | # torch 5 | import torch 6 | from torch.autograd import Variable 7 | from torchvision import transforms 8 | import numpy as np 9 | np.set_printoptions(threshold=np.inf) 10 | import random 11 | 12 | try: 13 | from feeder import augmentations 14 | except: 15 | import augmentations 16 | 17 | 18 | class Feeder(torch.utils.data.Dataset): 19 | """ 20 | Arguments: 21 | data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M) 22 | """ 23 | 24 | def __init__(self, 25 | data_path, 26 | num_frame_path, 27 | l_ratio, 28 | input_size, 29 | input_representations, 30 | mmap=True): 31 | 32 | self.data_path = data_path 33 | self.num_frame_path= num_frame_path 34 | self.input_size=input_size 35 | self.input_representations=input_representations 36 | self.crop_resize =True 37 | self.l_ratio = l_ratio 38 | 39 | 40 | self.load_data(mmap) 41 | self.N, self.C, self.T, self.V, self.M = self.data.shape 42 | print(self.data.shape,len(self.number_of_frames)) 43 | print("l_ratio",self.l_ratio) 44 | 45 | def load_data(self, mmap): 46 | # data: N C V T M 47 | 48 | # load data 49 | if mmap: 50 | self.data = np.load(self.data_path, mmap_mode='r') 51 | else: 52 | self.data = np.load(self.data_path) 53 | 54 | # load num of valid frame length 55 | self.number_of_frames= np.load(self.num_frame_path) 56 | 57 | def __len__(self): 58 | return self.N 59 | 60 | def __iter__(self): 61 | return self 62 | 63 | def __getitem__(self, index): 64 | 65 | # get raw input 66 | 67 | # input: C, T, V, M 68 | data_numpy = np.array(self.data[index]) 69 | number_of_frames = self.number_of_frames[index] 70 | 71 | # apply spatio-temporal augmentations to generate view 1 72 | 73 | # temporal crop-resize 74 | data_numpy_v1_crop = augmentations.temporal_cropresize(data_numpy, number_of_frames, self.l_ratio, self.input_size) 75 | 76 | # randomly select one of the spatial augmentations 77 | flip_prob = random.random() 78 | if flip_prob < 0.5: 79 | data_numpy_v1 = augmentations.pose_augmentation(data_numpy_v1_crop) 80 | else: 81 | data_numpy_v1 = augmentations.joint_courruption(data_numpy_v1_crop) 82 | 83 | 84 | # apply spatio-temporal augmentations to generate view 2 85 | 86 | # temporal crop-resize 87 | data_numpy_v2_crop = augmentations.temporal_cropresize(data_numpy,number_of_frames, self.l_ratio, self.input_size) 88 | 89 | # randomly select one of the spatial augmentations 90 | flip_prob = random.random() 91 | if flip_prob < 0.5: 92 | data_numpy_v2 = augmentations.pose_augmentation(data_numpy_v2_crop) 93 | else: 94 | data_numpy_v2 = augmentations.joint_courruption(data_numpy_v2_crop) 95 | 96 | 97 | # convert augmented views into input formats based on skeleton-representations 98 | 99 | if self.input_representations == "seq-based_and_graph-based" or self.input_representations == "seq-based_and_image-based" : 100 | 101 | # Input View 1 102 | #sequence-based input of view 1 ---> shpae (64 X 150) 103 | input_s1_v1 = data_numpy_v1.transpose(1,2,0,3) 104 | input_s1_v1 = input_s1_v1.reshape(-1,150).astype('float32') 105 | #graph-based / image-based input of view 1 ---> shape (3, 64, 25, 2) 106 | input_s2_v1 = data_numpy_v1.astype('float32') 107 | 108 | # Input View 2 109 | #sequence-based input of view 2 ---> shpae (64 X 150) 110 | input_s1_v2 = data_numpy_v2.transpose(1,2,0,3) 111 | input_s1_v2 = input_s1_v2.reshape(-1,150).astype('float32') 112 | #graph-based / image-based input of view 2 ---> shape (3, 64, 25, 2) 113 | input_s2_v2 = data_numpy_v2.astype('float32') 114 | 115 | elif self.input_representations == "graph-based_and_image-based": 116 | 117 | # Input View 1 118 | #graph-based and image-based inputs of view 1 ---> shape (3, 64, 25, 2) 119 | input_s1_v1 = data_numpy_v1.astype('float32') 120 | input_s2_v1 = data_numpy_v1.astype('float32') 121 | 122 | # Input View 2 123 | #graph-based and image-based inputs of view 2 ---> shape (3, 64, 25, 2) 124 | input_s1_v2 = data_numpy_v2.astype('float32') 125 | input_s2_v2 = data_numpy_v2.astype('float32') 126 | 127 | elif self.input_representations == "seq-based_and_trans-based": 128 | # Input View 1 129 | #sequence-based input of view 1 ---> shpae (64 X 150) 130 | input_s1_v1 = data_numpy_v1.transpose(1,2,0,3) 131 | input_s1_v1 = input_s1_v1.reshape(-1,150).astype('float32') 132 | #sequence-based input of view 1 ---> shpae (64 X 150) 133 | input_s2_v1 = data_numpy_v1.transpose(1,2,0,3) 134 | input_s2_v1 = input_s2_v1.reshape(-1,150).astype('float32') 135 | 136 | # Input View 2 137 | #sequence-based input of view 2 ---> shpae (64 X 150) 138 | input_s1_v2 = data_numpy_v2.transpose(1,2,0,3) 139 | input_s1_v2 = input_s1_v2.reshape(-1,150).astype('float32') 140 | #sequence-based input of view 1 ---> shpae (64 X 150) 141 | input_s2_v2 = data_numpy_v2.transpose(1,2,0,3) 142 | input_s2_v2 = input_s2_v2.reshape(-1,150).astype('float32') 143 | 144 | return input_s1_v1, input_s2_v1, input_s1_v2, input_s2_v2 145 | -------------------------------------------------------------------------------- /moco/utils/graph.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Graph(): 4 | def __init__(self, 5 | layout='openpose', 6 | strategy='uniform', 7 | max_hop=1, 8 | dilation=1): 9 | self.max_hop = max_hop 10 | self.dilation = dilation 11 | 12 | self.get_edge(layout) 13 | self.hop_dis = get_hop_distance( 14 | self.num_node, self.edge, max_hop=max_hop) 15 | self.get_adjacency(strategy) 16 | 17 | def __str__(self): 18 | return self.A 19 | 20 | def get_edge(self, layout): 21 | if layout == 'openpose': 22 | self.num_node = 18 23 | self_link = [(i, i) for i in range(self.num_node)] 24 | neighbor_link = [(4, 3), (3, 2), (7, 6), (6, 5), (13, 12), (12, 25 | 11), 26 | (10, 9), (9, 8), (11, 5), (8, 2), (5, 1), (2, 1), 27 | (0, 1), (15, 0), (14, 0), (17, 15), (16, 14)] 28 | self.edge = self_link + neighbor_link 29 | self.center = 1 30 | elif layout == 'ntu-rgb+d': 31 | self.num_node = 25 32 | self_link = [(i, i) for i in range(self.num_node)] 33 | neighbor_1base = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), 34 | (6, 5), (7, 6), (8, 7), (9, 21), (10, 9), 35 | (11, 10), (12, 11), (13, 1), (14, 13), (15, 14), 36 | (16, 15), (17, 1), (18, 17), (19, 18), (20, 19), 37 | (22, 23), (23, 8), (24, 25), (25, 12)] 38 | neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base] 39 | self.edge = self_link + neighbor_link 40 | self.center = 21 - 1 41 | elif layout == 'ntu_edge': 42 | self.num_node = 24 43 | self_link = [(i, i) for i in range(self.num_node)] 44 | neighbor_1base = [(1, 2), (3, 2), (4, 3), (5, 2), (6, 5), (7, 6), 45 | (8, 7), (9, 2), (10, 9), (11, 10), (12, 11), 46 | (13, 1), (14, 13), (15, 14), (16, 15), (17, 1), 47 | (18, 17), (19, 18), (20, 19), (21, 22), (22, 8), 48 | (23, 24), (24, 12)] 49 | neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base] 50 | self.edge = self_link + neighbor_link 51 | self.center = 2 52 | # elif layout=='customer settings' 53 | # pass 54 | else: 55 | raise ValueError("Do Not Exist This Layout.") 56 | 57 | def get_adjacency(self, strategy): 58 | valid_hop = range(0, self.max_hop + 1, self.dilation) 59 | adjacency = np.zeros((self.num_node, self.num_node)) 60 | for hop in valid_hop: 61 | adjacency[self.hop_dis == hop] = 1 62 | normalize_adjacency = normalize_digraph(adjacency) 63 | 64 | if strategy == 'uniform': 65 | A = np.zeros((1, self.num_node, self.num_node)) 66 | A[0] = normalize_adjacency 67 | self.A = A 68 | elif strategy == 'distance': 69 | A = np.zeros((len(valid_hop), self.num_node, self.num_node)) 70 | for i, hop in enumerate(valid_hop): 71 | A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis == 72 | hop] 73 | self.A = A 74 | elif strategy == 'spatial': 75 | A = [] 76 | for hop in valid_hop: 77 | a_root = np.zeros((self.num_node, self.num_node)) 78 | a_close = np.zeros((self.num_node, self.num_node)) 79 | a_further = np.zeros((self.num_node, self.num_node)) 80 | for i in range(self.num_node): 81 | for j in range(self.num_node): 82 | if self.hop_dis[j, i] == hop: 83 | if self.hop_dis[j, self.center] == self.hop_dis[ 84 | i, self.center]: 85 | a_root[j, i] = normalize_adjacency[j, i] 86 | elif self.hop_dis[j, self. 87 | center] > self.hop_dis[i, self. 88 | center]: 89 | a_close[j, i] = normalize_adjacency[j, i] 90 | else: 91 | a_further[j, i] = normalize_adjacency[j, i] 92 | if hop == 0: 93 | A.append(a_root) 94 | else: 95 | A.append(a_root + a_close) 96 | A.append(a_further) 97 | A = np.stack(A) 98 | self.A = A 99 | else: 100 | raise ValueError("Do Not Exist This Strategy") 101 | 102 | 103 | def get_hop_distance(num_node, edge, max_hop=1): 104 | A = np.zeros((num_node, num_node)) 105 | for i, j in edge: 106 | A[j, i] = 1 107 | A[i, j] = 1 108 | 109 | # compute hop steps 110 | hop_dis = np.zeros((num_node, num_node)) + np.inf 111 | transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)] 112 | arrive_mat = (np.stack(transfer_mat) > 0) 113 | for d in range(max_hop, -1, -1): 114 | hop_dis[arrive_mat[d]] = d 115 | return hop_dis 116 | 117 | 118 | def normalize_digraph(A): 119 | Dl = np.sum(A, 0) 120 | num_node = A.shape[0] 121 | Dn = np.zeros((num_node, num_node)) 122 | for i in range(num_node): 123 | if Dl[i] > 0: 124 | Dn[i, i] = Dl[i]**(-1) 125 | AD = np.dot(A, Dn) 126 | return AD 127 | 128 | 129 | def normalize_undigraph(A): 130 | Dl = np.sum(A, 0) 131 | num_node = A.shape[0] 132 | Dn = np.zeros((num_node, num_node)) 133 | for i in range(num_node): 134 | if Dl[i] > 0: 135 | Dn[i, i] = Dl[i]**(-0.5) 136 | DAD = np.dot(np.dot(Dn, A), Dn) 137 | return DAD 138 | -------------------------------------------------------------------------------- /data_gen/ntu_gendata.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pickle 3 | from tqdm import tqdm 4 | import sys 5 | from numpy.lib.format import open_memmap 6 | 7 | sys.path.extend(['../']) 8 | from data_gen.preprocess import pre_normalization 9 | 10 | # # ntu 60 11 | # training_subjects = [ 12 | # 1, 2, 4, 5, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 27, 28, 31, 34, 35, 38 13 | # ] 14 | 15 | training_cameras = [2, 3] 16 | 17 | # ntu 120 18 | training_subjects = [ 19 | 1, 2, 4, 5, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 27, 28, 31, 34, 35, 38, 20 | 45, 46, 47, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 70, 74, 78,80, 81, 82, 21 | 83, 84, 85, 86, 89, 91, 92, 93, 94, 95, 97, 98, 100, 103 22 | ] 23 | training_setups = [ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32] 24 | 25 | max_body_true = 2 26 | max_body_kinect = 4 27 | num_joint = 25 28 | max_frame = 300 29 | 30 | import numpy as np 31 | import os 32 | 33 | 34 | def read_skeleton_filter(file): 35 | with open(file, 'r') as f: 36 | skeleton_sequence = {} 37 | skeleton_sequence['numFrame'] = int(f.readline()) 38 | skeleton_sequence['frameInfo'] = [] 39 | # num_body = 0 40 | for t in range(skeleton_sequence['numFrame']): 41 | frame_info = {} 42 | frame_info['numBody'] = int(f.readline()) 43 | frame_info['bodyInfo'] = [] 44 | 45 | for m in range(frame_info['numBody']): 46 | body_info = {} 47 | body_info_key = [ 48 | 'bodyID', 'clipedEdges', 'handLeftConfidence', 49 | 'handLeftState', 'handRightConfidence', 'handRightState', 50 | 'isResticted', 'leanX', 'leanY', 'trackingState' 51 | ] 52 | body_info = { 53 | k: float(v) 54 | for k, v in zip(body_info_key, f.readline().split()) 55 | } 56 | body_info['numJoint'] = int(f.readline()) 57 | body_info['jointInfo'] = [] 58 | for v in range(body_info['numJoint']): 59 | joint_info_key = [ 60 | 'x', 'y', 'z', 'depthX', 'depthY', 'colorX', 'colorY', 61 | 'orientationW', 'orientationX', 'orientationY', 62 | 'orientationZ', 'trackingState' 63 | ] 64 | joint_info = { 65 | k: float(v) 66 | for k, v in zip(joint_info_key, f.readline().split()) 67 | } 68 | body_info['jointInfo'].append(joint_info) 69 | frame_info['bodyInfo'].append(body_info) 70 | skeleton_sequence['frameInfo'].append(frame_info) 71 | 72 | return skeleton_sequence 73 | 74 | 75 | def get_nonzero_std(s): # tvc 76 | index = s.sum(-1).sum(-1) != 0 # select valid frames 77 | s = s[index] 78 | if len(s) != 0: 79 | s = s[:, :, 0].std() + s[:, :, 1].std() + s[:, :, 2].std() # three channels 80 | else: 81 | s = 0 82 | return s 83 | 84 | 85 | def read_xyz(file, max_body=4, num_joint=25): # 取了前两个body 86 | seq_info = read_skeleton_filter(file) 87 | data = np.zeros((max_body, seq_info['numFrame'], num_joint, 3)) 88 | for n, f in enumerate(seq_info['frameInfo']): 89 | for m, b in enumerate(f['bodyInfo']): 90 | for j, v in enumerate(b['jointInfo']): 91 | if m < max_body and j < num_joint: 92 | data[m, n, j, :] = [v['x'], v['y'], v['z']] 93 | else: 94 | pass 95 | 96 | # select two max energy body 97 | energy = np.array([get_nonzero_std(x) for x in data]) 98 | index = energy.argsort()[::-1][0:max_body_true] 99 | data = data[index] 100 | 101 | data = data.transpose(3, 1, 2, 0) 102 | return data 103 | 104 | 105 | def gendata(data_path, out_path, ignored_sample_path=None, benchmark='xview', part='eval'): 106 | if ignored_sample_path != None: 107 | with open(ignored_sample_path, 'r') as f: 108 | ignored_samples = [ 109 | line.strip() + '.skeleton' for line in f.readlines() 110 | ] 111 | else: 112 | ignored_samples = [] 113 | sample_name = [] 114 | sample_label = [] 115 | for filename in os.listdir(data_path): 116 | if filename in ignored_samples: 117 | continue 118 | action_class = int( 119 | filename[filename.find('A') + 1:filename.find('A') + 4]) 120 | subject_id = int( 121 | filename[filename.find('P') + 1:filename.find('P') + 4]) 122 | camera_id = int( 123 | filename[filename.find('C') + 1:filename.find('C') + 4]) 124 | setup_id = int( 125 | filename[filename.find('S') + 1:filename.find('S') + 4]) 126 | 127 | if benchmark == 'xview': 128 | istraining = (camera_id in training_cameras) 129 | elif benchmark == 'xsub': 130 | istraining = (subject_id in training_subjects) 131 | elif benchmark == 'xsetup': 132 | istraining = (setup_id in training_setups) 133 | else: 134 | raise ValueError() 135 | 136 | if part == 'train': 137 | issample = istraining 138 | elif part == 'val': 139 | issample = not (istraining) 140 | else: 141 | raise ValueError() 142 | 143 | if issample: 144 | sample_name.append(filename) 145 | sample_label.append(action_class - 1) 146 | 147 | with open('{}/{}_label.pkl'.format(out_path, part), 'wb') as f: 148 | pickle.dump((sample_name, list(sample_label)), f) 149 | 150 | fl = open_memmap( 151 | '{}/{}_num_frame.npy'.format(out_path, part), 152 | dtype='int', 153 | mode='w+', 154 | shape=(len(sample_label),)) 155 | 156 | fp = np.zeros((len(sample_label), 3, max_frame, num_joint, max_body_true), dtype=np.float32) 157 | 158 | for i, s in enumerate(tqdm(sample_name)): 159 | data = read_xyz(os.path.join(data_path, s), max_body=max_body_kinect, num_joint=num_joint) 160 | fp[i, :, 0:data.shape[1], :, :] = data 161 | fl[i] = data.shape[1] # num_frame 162 | 163 | fp = pre_normalization(fp) 164 | np.save('{}/{}_data_joint.npy'.format(out_path, part), fp) 165 | 166 | 167 | if __name__ == '__main__': 168 | parser = argparse.ArgumentParser(description='NTU-RGB-D Data Converter.') 169 | # parser.add_argument('--data_path', default='/data/user/dataset/NTU/nturgb+d_skeletons_60/') 170 | # parser.add_argument('--ignored_sample_path', 171 | # default='resource/NTU_RGBD60_samples_with_missing_skeletons.txt') 172 | # parser.add_argument('--out_folder', default='../data/NTU-RGB-D-60-AGCN/') 173 | # benchmark = ['xsub', 'xview'] 174 | 175 | parser.add_argument('--data_path', default='/data/user/dataset/NTU/nturgb+d_skeletons_120/') 176 | parser.add_argument('--ignored_sample_path', 177 | default='resource/NTU_RGBD120_samples_with_missing_skeletons.txt') 178 | parser.add_argument('--out_folder', default='../data/NTU-RGB-D-120-AGCN/') 179 | benchmark = ['xsub','xsetup', ] 180 | 181 | part = ['train', 'val'] 182 | arg = parser.parse_args() 183 | 184 | for b in benchmark: 185 | for p in part: 186 | out_path = os.path.join(arg.out_folder, b) 187 | if not os.path.exists(out_path): 188 | os.makedirs(out_path) 189 | print(b, p) 190 | gendata( 191 | arg.data_path, 192 | out_path, 193 | arg.ignored_sample_path, 194 | benchmark=b, 195 | part=p) 196 | -------------------------------------------------------------------------------- /data_gen/resource/NTU_RGBD60_samples_with_missing_skeletons.txt: -------------------------------------------------------------------------------- 1 | S001C002P005R002A008 2 | S001C002P006R001A008 3 | S001C003P002R001A055 4 | S001C003P002R002A012 5 | S001C003P005R002A004 6 | S001C003P005R002A005 7 | S001C003P005R002A006 8 | S001C003P006R002A008 9 | S002C002P011R002A030 10 | S002C003P008R001A020 11 | S002C003P010R002A010 12 | S002C003P011R002A007 13 | S002C003P011R002A011 14 | S002C003P014R002A007 15 | S003C001P019R001A055 16 | S003C002P002R002A055 17 | S003C002P018R002A055 18 | S003C003P002R001A055 19 | S003C003P016R001A055 20 | S003C003P018R002A024 21 | S004C002P003R001A013 22 | S004C002P008R001A009 23 | S004C002P020R001A003 24 | S004C002P020R001A004 25 | S004C002P020R001A012 26 | S004C002P020R001A020 27 | S004C002P020R001A021 28 | S004C002P020R001A036 29 | S005C002P004R001A001 30 | S005C002P004R001A003 31 | S005C002P010R001A016 32 | S005C002P010R001A017 33 | S005C002P010R001A048 34 | S005C002P010R001A049 35 | S005C002P016R001A009 36 | S005C002P016R001A010 37 | S005C002P018R001A003 38 | S005C002P018R001A028 39 | S005C002P018R001A029 40 | S005C003P016R002A009 41 | S005C003P018R002A013 42 | S005C003P021R002A057 43 | S006C001P001R002A055 44 | S006C002P007R001A005 45 | S006C002P007R001A006 46 | S006C002P016R001A043 47 | S006C002P016R001A051 48 | S006C002P016R001A052 49 | S006C002P022R001A012 50 | S006C002P023R001A020 51 | S006C002P023R001A021 52 | S006C002P023R001A022 53 | S006C002P023R001A023 54 | S006C002P024R001A018 55 | S006C002P024R001A019 56 | S006C003P001R002A013 57 | S006C003P007R002A009 58 | S006C003P007R002A010 59 | S006C003P007R002A025 60 | S006C003P016R001A060 61 | S006C003P017R001A055 62 | S006C003P017R002A013 63 | S006C003P017R002A014 64 | S006C003P017R002A015 65 | S006C003P022R002A013 66 | S007C001P018R002A050 67 | S007C001P025R002A051 68 | S007C001P028R001A050 69 | S007C001P028R001A051 70 | S007C001P028R001A052 71 | S007C002P008R002A008 72 | S007C002P015R002A055 73 | S007C002P026R001A008 74 | S007C002P026R001A009 75 | S007C002P026R001A010 76 | S007C002P026R001A011 77 | S007C002P026R001A012 78 | S007C002P026R001A050 79 | S007C002P027R001A011 80 | S007C002P027R001A013 81 | S007C002P028R002A055 82 | S007C003P007R001A002 83 | S007C003P007R001A004 84 | S007C003P019R001A060 85 | S007C003P027R002A001 86 | S007C003P027R002A002 87 | S007C003P027R002A003 88 | S007C003P027R002A004 89 | S007C003P027R002A005 90 | S007C003P027R002A006 91 | S007C003P027R002A007 92 | S007C003P027R002A008 93 | S007C003P027R002A009 94 | S007C003P027R002A010 95 | S007C003P027R002A011 96 | S007C003P027R002A012 97 | S007C003P027R002A013 98 | S008C002P001R001A009 99 | S008C002P001R001A010 100 | S008C002P001R001A014 101 | S008C002P001R001A015 102 | S008C002P001R001A016 103 | S008C002P001R001A018 104 | S008C002P001R001A019 105 | S008C002P008R002A059 106 | S008C002P025R001A060 107 | S008C002P029R001A004 108 | S008C002P031R001A005 109 | S008C002P031R001A006 110 | S008C002P032R001A018 111 | S008C002P034R001A018 112 | S008C002P034R001A019 113 | S008C002P035R001A059 114 | S008C002P035R002A002 115 | S008C002P035R002A005 116 | S008C003P007R001A009 117 | S008C003P007R001A016 118 | S008C003P007R001A017 119 | S008C003P007R001A018 120 | S008C003P007R001A019 121 | S008C003P007R001A020 122 | S008C003P007R001A021 123 | S008C003P007R001A022 124 | S008C003P007R001A023 125 | S008C003P007R001A025 126 | S008C003P007R001A026 127 | S008C003P007R001A028 128 | S008C003P007R001A029 129 | S008C003P007R002A003 130 | S008C003P008R002A050 131 | S008C003P025R002A002 132 | S008C003P025R002A011 133 | S008C003P025R002A012 134 | S008C003P025R002A016 135 | S008C003P025R002A020 136 | S008C003P025R002A022 137 | S008C003P025R002A023 138 | S008C003P025R002A030 139 | S008C003P025R002A031 140 | S008C003P025R002A032 141 | S008C003P025R002A033 142 | S008C003P025R002A049 143 | S008C003P025R002A060 144 | S008C003P031R001A001 145 | S008C003P031R002A004 146 | S008C003P031R002A014 147 | S008C003P031R002A015 148 | S008C003P031R002A016 149 | S008C003P031R002A017 150 | S008C003P032R002A013 151 | S008C003P033R002A001 152 | S008C003P033R002A011 153 | S008C003P033R002A012 154 | S008C003P034R002A001 155 | S008C003P034R002A012 156 | S008C003P034R002A022 157 | S008C003P034R002A023 158 | S008C003P034R002A024 159 | S008C003P034R002A044 160 | S008C003P034R002A045 161 | S008C003P035R002A016 162 | S008C003P035R002A017 163 | S008C003P035R002A018 164 | S008C003P035R002A019 165 | S008C003P035R002A020 166 | S008C003P035R002A021 167 | S009C002P007R001A001 168 | S009C002P007R001A003 169 | S009C002P007R001A014 170 | S009C002P008R001A014 171 | S009C002P015R002A050 172 | S009C002P016R001A002 173 | S009C002P017R001A028 174 | S009C002P017R001A029 175 | S009C003P017R002A030 176 | S009C003P025R002A054 177 | S010C001P007R002A020 178 | S010C002P016R002A055 179 | S010C002P017R001A005 180 | S010C002P017R001A018 181 | S010C002P017R001A019 182 | S010C002P019R001A001 183 | S010C002P025R001A012 184 | S010C003P007R002A043 185 | S010C003P008R002A003 186 | S010C003P016R001A055 187 | S010C003P017R002A055 188 | S011C001P002R001A008 189 | S011C001P018R002A050 190 | S011C002P008R002A059 191 | S011C002P016R002A055 192 | S011C002P017R001A020 193 | S011C002P017R001A021 194 | S011C002P018R002A055 195 | S011C002P027R001A009 196 | S011C002P027R001A010 197 | S011C002P027R001A037 198 | S011C003P001R001A055 199 | S011C003P002R001A055 200 | S011C003P008R002A012 201 | S011C003P015R001A055 202 | S011C003P016R001A055 203 | S011C003P019R001A055 204 | S011C003P025R001A055 205 | S011C003P028R002A055 206 | S012C001P019R001A060 207 | S012C001P019R002A060 208 | S012C002P015R001A055 209 | S012C002P017R002A012 210 | S012C002P025R001A060 211 | S012C003P008R001A057 212 | S012C003P015R001A055 213 | S012C003P015R002A055 214 | S012C003P016R001A055 215 | S012C003P017R002A055 216 | S012C003P018R001A055 217 | S012C003P018R001A057 218 | S012C003P019R002A011 219 | S012C003P019R002A012 220 | S012C003P025R001A055 221 | S012C003P027R001A055 222 | S012C003P027R002A009 223 | S012C003P028R001A035 224 | S012C003P028R002A055 225 | S013C001P015R001A054 226 | S013C001P017R002A054 227 | S013C001P018R001A016 228 | S013C001P028R001A040 229 | S013C002P015R001A054 230 | S013C002P017R002A054 231 | S013C002P028R001A040 232 | S013C003P008R002A059 233 | S013C003P015R001A054 234 | S013C003P017R002A054 235 | S013C003P025R002A022 236 | S013C003P027R001A055 237 | S013C003P028R001A040 238 | S014C001P027R002A040 239 | S014C002P015R001A003 240 | S014C002P019R001A029 241 | S014C002P025R002A059 242 | S014C002P027R002A040 243 | S014C002P039R001A050 244 | S014C003P007R002A059 245 | S014C003P015R002A055 246 | S014C003P019R002A055 247 | S014C003P025R001A048 248 | S014C003P027R002A040 249 | S015C001P008R002A040 250 | S015C001P016R001A055 251 | S015C001P017R001A055 252 | S015C001P017R002A055 253 | S015C002P007R001A059 254 | S015C002P008R001A003 255 | S015C002P008R001A004 256 | S015C002P008R002A040 257 | S015C002P015R001A002 258 | S015C002P016R001A001 259 | S015C002P016R002A055 260 | S015C003P008R002A007 261 | S015C003P008R002A011 262 | S015C003P008R002A012 263 | S015C003P008R002A028 264 | S015C003P008R002A040 265 | S015C003P025R002A012 266 | S015C003P025R002A017 267 | S015C003P025R002A020 268 | S015C003P025R002A021 269 | S015C003P025R002A030 270 | S015C003P025R002A033 271 | S015C003P025R002A034 272 | S015C003P025R002A036 273 | S015C003P025R002A037 274 | S015C003P025R002A044 275 | S016C001P019R002A040 276 | S016C001P025R001A011 277 | S016C001P025R001A012 278 | S016C001P025R001A060 279 | S016C001P040R001A055 280 | S016C001P040R002A055 281 | S016C002P008R001A011 282 | S016C002P019R002A040 283 | S016C002P025R002A012 284 | S016C003P008R001A011 285 | S016C003P008R002A002 286 | S016C003P008R002A003 287 | S016C003P008R002A004 288 | S016C003P008R002A006 289 | S016C003P008R002A009 290 | S016C003P019R002A040 291 | S016C003P039R002A016 292 | S017C001P016R002A031 293 | S017C002P007R001A013 294 | S017C002P008R001A009 295 | S017C002P015R001A042 296 | S017C002P016R002A031 297 | S017C002P016R002A055 298 | S017C003P007R002A013 299 | S017C003P008R001A059 300 | S017C003P016R002A031 301 | S017C003P017R001A055 302 | S017C003P020R001A059 303 | -------------------------------------------------------------------------------- /options/options_classification.py: -------------------------------------------------------------------------------- 1 | class opts_ntu_60_cross_view(): 2 | 3 | def __init__(self): 4 | 5 | # graph based model 6 | self.agcn_model_args = { 7 | "num_class": 60, 8 | "num_point": 25, 9 | "num_person": 2, 10 | 'graph_args': { 11 | 'labeling_mode': 'spatial'} 12 | } 13 | 14 | #image based model 15 | self.hcn_model_args = { 16 | "in_channel":3, 17 | "out_channel":64, 18 | "window_size":64, 19 | "num_joint":25, 20 | "num_person":2, 21 | "num_class":60 22 | } 23 | 24 | #Sequence based model 25 | self.bi_gru_model_args = { 26 | "en_input_size":150, 27 | "en_hidden_size":1024, 28 | "en_num_layers":3, 29 | "num_class":60 30 | } 31 | 32 | # feeder 33 | self.train_feeder_args = { 34 | 'data_path': './data/NTU-RGB-D-60-AGCN/xview/train_data_joint.npy', 35 | 'label_path': './data/NTU-RGB-D-60-AGCN/xview/train_label.pkl', 36 | 'num_frame_path': './data/NTU-RGB-D-60-AGCN/xview/train_num_frame.npy', 37 | 'l_ratio': [0.5,1.0], 38 | 'input_size': 64 39 | } 40 | 41 | self.test_feeder_args = { 42 | 43 | 'data_path': './data/NTU-RGB-D-60-AGCN/xview/val_data_joint.npy', 44 | 'label_path': './data/NTU-RGB-D-60-AGCN/xview/val_label.pkl', 45 | 'num_frame_path': './data/NTU-RGB-D-60-AGCN/xview/val_num_frame.npy', 46 | 'l_ratio': [0.95], 47 | 'input_size': 64 48 | } 49 | 50 | class opts_ntu_60_cross_subject(): 51 | 52 | def __init__(self): 53 | 54 | # graph based model 55 | self.agcn_model_args = { 56 | "num_class": 60, 57 | "num_point": 25, 58 | "num_person": 2, 59 | 'graph_args': { 60 | 'labeling_mode': 'spatial'} 61 | } 62 | 63 | #image based model 64 | self.hcn_model_args = { 65 | "in_channel":3, 66 | "out_channel":64, 67 | "window_size":64, 68 | "num_joint":25, 69 | "num_person":2, 70 | "num_class":60 71 | } 72 | 73 | #Sequence based model 74 | self.bi_gru_model_args = { 75 | "en_input_size":150, 76 | "en_hidden_size":1024, 77 | "en_num_layers":3, 78 | "num_class":60 79 | } 80 | 81 | # feeder 82 | self.train_feeder_args = { 83 | 'data_path': './data/NTU-RGB-D-60-AGCN/xsub/train_data_joint.npy', 84 | 'label_path': './data/NTU-RGB-D-60-AGCN/xsub/train_label.pkl', 85 | 'num_frame_path': './data/NTU-RGB-D-60-AGCN/xsub/train_num_frame.npy', 86 | 'l_ratio': [0.5,1.0], 87 | 'input_size': 64 88 | } 89 | 90 | self.test_feeder_args = { 91 | 92 | 'data_path': './data/NTU-RGB-D-60-AGCN/xsub/val_data_joint.npy', 93 | 'label_path': './data/NTU-RGB-D-60-AGCN/xsub/val_label.pkl', 94 | 'num_frame_path': './data/NTU-RGB-D-60-AGCN/xsub/val_num_frame.npy', 95 | 'l_ratio': [0.95], 96 | 'input_size': 64 97 | } 98 | 99 | class opts_ntu_120_cross_subject(): 100 | def __init__(self): 101 | 102 | # graph based model 103 | self.agcn_model_args = { 104 | "num_class": 120, 105 | "num_point": 25, 106 | "num_person": 2, 107 | 'graph_args': { 108 | 'labeling_mode': 'spatial'} 109 | } 110 | 111 | #image based model 112 | self.hcn_model_args = { 113 | "in_channel":3, 114 | "out_channel":64, 115 | "window_size":64, 116 | "num_joint":25, 117 | "num_person":2, 118 | "num_class":120 119 | } 120 | 121 | #Sequence based model 122 | self.bi_gru_model_args = { 123 | "en_input_size":150, 124 | "en_hidden_size":1024, 125 | "en_num_layers":3, 126 | "num_class":120 127 | } 128 | 129 | # feeder 130 | self.train_feeder_args = { 131 | 'data_path': './data/NTU-RGB-D-120-AGCN/xsub/train_data_joint.npy', 132 | 'label_path': './data/NTU-RGB-D-120-AGCN/xsub/train_label.pkl', 133 | 'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsub/train_num_frame.npy', 134 | 'l_ratio': [0.5,1.0], 135 | 'input_size': 64 136 | } 137 | 138 | self.test_feeder_args = { 139 | 140 | 'data_path': './data/NTU-RGB-D-120-AGCN/xsub/val_data_joint.npy', 141 | 'label_path': './data/NTU-RGB-D-120-AGCN/xsub/val_label.pkl', 142 | 'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsub/val_num_frame.npy', 143 | 'l_ratio': [0.95], 144 | 'input_size': 64 145 | } 146 | 147 | class opts_ntu_120_cross_setup(): 148 | 149 | def __init__(self): 150 | 151 | # graph based model 152 | self.agcn_model_args = { 153 | "num_class": 120, 154 | "num_point": 25, 155 | "num_person": 2, 156 | 'graph_args': { 157 | 'labeling_mode': 'spatial'} 158 | } 159 | 160 | #image based model 161 | self.hcn_model_args = { 162 | "in_channel":3, 163 | "out_channel":64, 164 | "window_size":64, 165 | "num_joint":25, 166 | "num_person":2, 167 | "num_class":120 168 | } 169 | 170 | #Sequence based model 171 | self.bi_gru_model_args = { 172 | "en_input_size":150, 173 | "en_hidden_size":1024, 174 | "en_num_layers":3, 175 | "num_class":120 176 | } 177 | 178 | # feeder 179 | self.train_feeder_args = { 180 | 'data_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_data_joint.npy', 181 | 'label_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_label.pkl', 182 | 'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_num_frame.npy', 183 | 'l_ratio': [0.5,1.0], 184 | 'input_size': 64 185 | } 186 | 187 | self.test_feeder_args = { 188 | 189 | 'data_path': './data/NTU-RGB-D-120-AGCN/xsetup/val_data_joint.npy', 190 | 'label_path': './data/NTU-RGB-D-120-AGCN/xsetup/val_label.pkl', 191 | 'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsetup/val_num_frame.npy', 192 | 'l_ratio': [0.95], 193 | 'input_size': 64 194 | } 195 | 196 | 197 | class opts_pku_v2_cross_view(): 198 | 199 | def __init__(self): 200 | 201 | # graph based model 202 | self.agcn_model_args = { 203 | "num_class": 120, 204 | "num_point": 25, 205 | "num_person": 2, 206 | 'graph_args': { 207 | 'labeling_mode': 'spatial'} 208 | } 209 | 210 | #image based model 211 | self.hcn_model_args = { 212 | "in_channel":3, 213 | "out_channel":64, 214 | "window_size":64, 215 | "num_joint":25, 216 | "num_person":2, 217 | "num_class":120 218 | } 219 | 220 | #Sequence based model 221 | self.bi_gru_model_args = { 222 | "en_input_size":150, 223 | "en_hidden_size":1024, 224 | "en_num_layers":3, 225 | "num_class":120 226 | } 227 | 228 | # feeder 229 | self.train_feeder_args = { 230 | 'data_path': './data/PKU-MMD-v2-AGCN/xview/train_data_joint.npy', 231 | 'label_path': './data/PKU-MMD-v2-AGCN/xview/train_label.pkl', 232 | 'num_frame_path': './data/PKU-MMD-v2-AGCN/xview/train_num_frame.npy', 233 | 'l_ratio': [0.5,1.0], 234 | 'input_size': 64 235 | } 236 | 237 | self.test_feeder_args = { 238 | 239 | 'data_path': './data/PKU-MMD-v2-AGCN/xview/val_data_joint.npy', 240 | 'label_path': './data/PKU-MMD-v2-AGCN/xview/val_label.pkl', 241 | 'num_frame_path': './data/PKU-MMD-v2-AGCN/xview/val_num_frame.npy', 242 | 'l_ratio': [0.95], 243 | 'input_size': 64 244 | } 245 | 246 | class opts_pku_v2_cross_subject(): 247 | 248 | def __init__(self): 249 | 250 | # graph based model 251 | self.agcn_model_args = { 252 | "num_class": 120, 253 | "num_point": 25, 254 | "num_person": 2, 255 | 'graph_args': { 256 | 'labeling_mode': 'spatial'} 257 | } 258 | 259 | #image based model 260 | self.hcn_model_args = { 261 | "in_channel":3, 262 | "out_channel":64, 263 | "window_size":64, 264 | "num_joint":25, 265 | "num_person":2, 266 | "num_class":51 267 | } 268 | 269 | #Sequence based model 270 | self.bi_gru_model_args = { 271 | "en_input_size":150, 272 | "en_hidden_size":1024, 273 | "en_num_layers":3, 274 | "num_class":51 275 | } 276 | 277 | # feeder 278 | self.train_feeder_args = { 279 | 'data_path': './data/PKU-MMD-v2-AGCN/xsub/train_data_joint.npy', 280 | 'label_path': './data/PKU-MMD-v2-AGCN/xsub/train_label.pkl', 281 | 'num_frame_path': './data/PKU-MMD-v2-AGCN/xsub/train_num_frame.npy', 282 | 'l_ratio': [0.5,1.0], 283 | 'input_size': 64 284 | } 285 | 286 | self.test_feeder_args = { 287 | 288 | 'data_path': './data/PKU-MMD-v2-AGCN/xsub/val_data_joint.npy', 289 | 'label_path': './data/PKU-MMD-v2-AGCN/xsub/val_label.pkl', 290 | 'num_frame_path': './data/PKU-MMD-v2-AGCN/xsub/val_num_frame.npy', 291 | 'l_ratio': [0.95], 292 | 'input_size': 64 293 | } 294 | -------------------------------------------------------------------------------- /moco/builder_cmd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .GRU import BIGRU 6 | 7 | def loss_kld(inputs, targets): 8 | inputs = F.log_softmax(inputs, dim=1) 9 | targets = F.softmax(targets, dim=1) 10 | return F.kl_div(inputs, targets, reduction='batchmean') 11 | 12 | # initilize weight 13 | def weights_init_gru(model): 14 | with torch.no_grad(): 15 | for child in list(model.children()): 16 | print(child) 17 | for param in list(child.parameters()): 18 | if param.dim() == 2: 19 | nn.init.xavier_uniform_(param) 20 | print('GRU weights initialization finished!') 21 | 22 | class MoCo(nn.Module): 23 | def __init__(self, skeleton_representation, args_bi_gru, dim=128, K=65536, m=0.999, T=0.07, 24 | teacher_T=0.05, student_T=0.1, cmd_weight=1.0, topk=1024, mlp=False, pretrain=True): 25 | super(MoCo, self).__init__() 26 | self.pretrain = pretrain 27 | self.Bone = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5), (7, 6), (8, 7), (9, 21), 28 | (10, 9), (11, 10), (12, 11), (13, 1), (14, 13), (15, 14), (16, 15), (17, 1), 29 | (18, 17), (19, 18), (20, 19), (21, 21), (22, 23), (23, 8), (24, 25), (25, 12)] 30 | 31 | if not self.pretrain: 32 | self.encoder_q = BIGRU(**args_bi_gru) 33 | self.encoder_q_motion = BIGRU(**args_bi_gru) 34 | self.encoder_q_bone = BIGRU(**args_bi_gru) 35 | weights_init_gru(self.encoder_q) 36 | weights_init_gru(self.encoder_q_motion) 37 | weights_init_gru(self.encoder_q_bone) 38 | else: 39 | self.K = K 40 | self.m = m 41 | self.T = T 42 | self.teacher_T = teacher_T 43 | self.student_T = student_T 44 | self.cmd_weight = cmd_weight 45 | self.topk = topk 46 | mlp=mlp 47 | print(" MoCo parameters",K,m,T,mlp) 48 | print(" CMD parameters: teacher-T %.2f, student-T %.2f, cmd-weight: %.2f, topk: %d"%(teacher_T,student_T,cmd_weight,topk)) 49 | print(skeleton_representation) 50 | 51 | 52 | self.encoder_q = BIGRU(**args_bi_gru) 53 | self.encoder_k = BIGRU(**args_bi_gru) 54 | self.encoder_q_motion = BIGRU(**args_bi_gru) 55 | self.encoder_k_motion = BIGRU(**args_bi_gru) 56 | self.encoder_q_bone = BIGRU(**args_bi_gru) 57 | self.encoder_k_bone = BIGRU(**args_bi_gru) 58 | weights_init_gru(self.encoder_q) 59 | weights_init_gru(self.encoder_q_motion) 60 | weights_init_gru(self.encoder_q_bone) 61 | weights_init_gru(self.encoder_k) 62 | weights_init_gru(self.encoder_k_motion) 63 | weights_init_gru(self.encoder_k_bone) 64 | 65 | #projection heads 66 | if mlp: # hack: brute-force replacement 67 | dim_mlp = self.encoder_q.fc.weight.shape[1] 68 | self.encoder_q.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp), 69 | nn.ReLU(), 70 | self.encoder_q.fc) 71 | self.encoder_k.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp), 72 | nn.ReLU(), 73 | self.encoder_k.fc) 74 | self.encoder_q_motion.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp), 75 | nn.ReLU(), 76 | self.encoder_q_motion.fc) 77 | self.encoder_k_motion.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp), 78 | nn.ReLU(), 79 | self.encoder_k_motion.fc) 80 | self.encoder_q_bone.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp), 81 | nn.ReLU(), 82 | self.encoder_q_bone.fc) 83 | self.encoder_k_bone.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp), 84 | nn.ReLU(), 85 | self.encoder_k_bone.fc) 86 | 87 | for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()): 88 | param_k.data.copy_(param_q.data) # initialize 89 | param_k.requires_grad = False # not update by gradient 90 | for param_q, param_k in zip(self.encoder_q_motion.parameters(), self.encoder_k_motion.parameters()): 91 | param_k.data.copy_(param_q.data) 92 | param_k.requires_grad = False 93 | for param_q, param_k in zip(self.encoder_q_bone.parameters(), self.encoder_k_bone.parameters()): 94 | param_k.data.copy_(param_q.data) 95 | param_k.requires_grad = False 96 | 97 | # create the queue 98 | self.register_buffer("queue", torch.randn(dim, self.K)) 99 | self.queue = F.normalize(self.queue, dim=0) 100 | self.register_buffer("queue_ptr", torch.zeros(1, dtype=torch.long)) 101 | 102 | self.register_buffer("queue_motion", torch.randn(dim, self.K)) 103 | self.queue_motion = F.normalize(self.queue_motion, dim=0) 104 | self.register_buffer("queue_ptr_motion", torch.zeros(1, dtype=torch.long)) 105 | 106 | self.register_buffer("queue_bone", torch.randn(dim, self.K)) 107 | self.queue_bone = F.normalize(self.queue_bone, dim=0) 108 | self.register_buffer("queue_ptr_bone", torch.zeros(1, dtype=torch.long)) 109 | 110 | @torch.no_grad() 111 | def _momentum_update_key_encoder(self): 112 | """ 113 | Momentum update of the key encoder 114 | """ 115 | for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()): 116 | param_k.data = param_k.data * self.m + param_q.data * (1. - self.m) 117 | 118 | @torch.no_grad() 119 | def _momentum_update_key_encoder_motion(self): 120 | for param_q, param_k in zip(self.encoder_q_motion.parameters(), self.encoder_k_motion.parameters()): 121 | param_k.data = param_k.data * self.m + param_q.data * (1. - self.m) 122 | 123 | @torch.no_grad() 124 | def _momentum_update_key_encoder_bone(self): 125 | for param_q, param_k in zip(self.encoder_q_bone.parameters(), self.encoder_k_bone.parameters()): 126 | param_k.data = param_k.data * self.m + param_q.data * (1. - self.m) 127 | 128 | 129 | @torch.no_grad() 130 | def _dequeue_and_enqueue(self, keys): 131 | batch_size = keys.shape[0] 132 | ptr = int(self.queue_ptr) 133 | self.queue[:, ptr:ptr + batch_size] = keys.T 134 | ptr = (ptr + batch_size) % self.K # move pointer 135 | self.queue_ptr[0] = ptr 136 | 137 | @torch.no_grad() 138 | def _dequeue_and_enqueue_motion(self, keys): 139 | batch_size = keys.shape[0] 140 | ptr = int(self.queue_ptr_motion) 141 | self.queue_motion[:, ptr:ptr + batch_size] = keys.T 142 | ptr = (ptr + batch_size) % self.K # move pointer 143 | self.queue_ptr_motion[0] = ptr 144 | 145 | @torch.no_grad() 146 | def _dequeue_and_enqueue_bone(self, keys): 147 | batch_size = keys.shape[0] 148 | ptr = int(self.queue_ptr_bone) 149 | self.queue_bone[:, ptr:ptr + batch_size] = keys.T 150 | ptr = (ptr + batch_size) % self.K # move pointer 151 | self.queue_ptr_bone[0] = ptr 152 | 153 | 154 | def forward(self, im_q, im_k=None, view='joint', knn_eval=False): 155 | im_q_motion = torch.zeros_like(im_q) 156 | im_q_motion[:, :, :-1, :, :] = im_q[:, :, 1:, :, :] - im_q[:, :, :-1, :, :] 157 | 158 | im_q_bone = torch.zeros_like(im_q) 159 | for v1, v2 in self.Bone: 160 | im_q_bone[:, :, :, v1 - 1, :] = im_q[:, :, :, v1 - 1, :] - im_q[:, :, :, v2 - 1, :] 161 | 162 | # Permute and Reshape 163 | N, C, T, V, M = im_q.size() 164 | im_q = im_q.permute(0,2,3,1,4).reshape(N,T,-1) 165 | im_q_motion = im_q_motion.permute(0,2,3,1,4).reshape(N,T,-1) 166 | im_q_bone = im_q_bone.permute(0,2,3,1,4).reshape(N,T,-1) 167 | 168 | if not self.pretrain: 169 | if view == 'joint': 170 | return self.encoder_q(im_q, knn_eval) 171 | elif view == 'motion': 172 | return self.encoder_q_motion(im_q_motion, knn_eval) 173 | elif view == 'bone': 174 | return self.encoder_q_bone(im_q_bone, knn_eval) 175 | elif view == 'all': 176 | return (self.encoder_q(im_q, knn_eval) + \ 177 | self.encoder_q_motion(im_q_motion, knn_eval) + \ 178 | self.encoder_q_bone(im_q_bone, knn_eval)) / 3. 179 | else: 180 | raise ValueError 181 | 182 | im_k_motion = torch.zeros_like(im_k) 183 | im_k_motion[:, :, :-1, :, :] = im_k[:, :, 1:, :, :] - im_k[:, :, :-1, :, :] 184 | 185 | im_k_bone = torch.zeros_like(im_k) 186 | for v1, v2 in self.Bone: 187 | im_k_bone[:, :, :, v1 - 1, :] = im_k[:, :, :, v1 - 1, :] - im_k[:, :, :, v2 - 1, :] 188 | 189 | # Permute and Reshape 190 | im_k = im_k.permute(0,2,3,1,4).reshape(N,T,-1) 191 | im_k_motion = im_k_motion.permute(0,2,3,1,4).reshape(N,T,-1) 192 | im_k_bone = im_k_bone.permute(0,2,3,1,4).reshape(N,T,-1) 193 | 194 | # compute query features 195 | q = self.encoder_q(im_q) # queries: NxC 196 | q = F.normalize(q, dim=1) 197 | 198 | q_motion = self.encoder_q_motion(im_q_motion) 199 | q_motion = F.normalize(q_motion, dim=1) 200 | 201 | q_bone = self.encoder_q_bone(im_q_bone) 202 | q_bone = F.normalize(q_bone, dim=1) 203 | 204 | # compute key features for s1 and s2 skeleton representations 205 | with torch.no_grad(): # no gradient to keys 206 | self._momentum_update_key_encoder() # update the key encoder 207 | self._momentum_update_key_encoder_motion() 208 | self._momentum_update_key_encoder_bone() 209 | 210 | k = self.encoder_k(im_k) # keys: NxC 211 | k = F.normalize(k, dim=1) 212 | 213 | k_motion = self.encoder_k_motion(im_k_motion) 214 | k_motion = F.normalize(k_motion, dim=1) 215 | 216 | k_bone = self.encoder_k_bone(im_k_bone) 217 | k_bone = F.normalize(k_bone, dim=1) 218 | 219 | # MOCO 220 | # compute logits 221 | # Einstein sum is more intuitive 222 | # positive logits: Nx1 223 | l_pos = torch.einsum('nc,nc->n', [q, k]).unsqueeze(-1) 224 | # negative logits: NxK 225 | l_neg = torch.einsum('nc,ck->nk', [q, self.queue.clone().detach()]) 226 | 227 | l_pos_motion = torch.einsum('nc,nc->n', [q_motion, k_motion]).unsqueeze(-1) 228 | l_neg_motion = torch.einsum('nc,ck->nk', [q_motion, self.queue_motion.clone().detach()]) 229 | 230 | l_pos_bone = torch.einsum('nc,nc->n', [q_bone, k_bone]).unsqueeze(-1) 231 | l_neg_bone = torch.einsum('nc,ck->nk', [q_bone, self.queue_bone.clone().detach()]) 232 | 233 | # CMD loss 234 | lk_neg = torch.einsum('nc,ck->nk', [k, self.queue.clone().detach()]) 235 | lk_neg_motion = torch.einsum('nc,ck->nk', [k_motion, self.queue_motion.clone().detach()]) 236 | lk_neg_bone = torch.einsum('nc,ck->nk', [k_bone, self.queue_bone.clone().detach()]) 237 | 238 | # Top-k 239 | lk_neg_topk, topk_idx = torch.topk(lk_neg, self.topk, dim=-1) 240 | lk_neg_motion_topk, motion_topk_idx = torch.topk(lk_neg_motion, self.topk, dim=-1) 241 | lk_neg_bone_topk, bone_topk_idx = torch.topk(lk_neg_bone, self.topk, dim=-1) 242 | 243 | loss_cmd = loss_kld(torch.gather(l_neg_motion, -1, topk_idx) / self.student_T, lk_neg_topk / self.teacher_T) + \ 244 | loss_kld(torch.gather(l_neg_bone, -1, topk_idx) / self.student_T, lk_neg_topk / self.teacher_T) + \ 245 | loss_kld(torch.gather(l_neg, -1, motion_topk_idx) / self.student_T, lk_neg_motion_topk / self.teacher_T) + \ 246 | loss_kld(torch.gather(l_neg_bone, -1, motion_topk_idx) / self.student_T, lk_neg_motion_topk / self.teacher_T) + \ 247 | loss_kld(torch.gather(l_neg, -1, bone_topk_idx) / self.student_T, lk_neg_bone_topk / self.teacher_T) + \ 248 | loss_kld(torch.gather(l_neg_motion, -1, bone_topk_idx) / self.student_T, lk_neg_bone_topk / self.teacher_T) 249 | 250 | # logits: Nx(1+K) 251 | logits = torch.cat([l_pos, l_neg], dim=1) 252 | logits_motion = torch.cat([l_pos_motion, l_neg_motion], dim=1) 253 | logits_bone = torch.cat([l_pos_bone, l_neg_bone], dim=1) 254 | 255 | # apply temperature 256 | logits /= self.T 257 | logits_motion /= self.T 258 | logits_bone /= self.T 259 | 260 | # labels: positive key indicators 261 | labels = torch.zeros(logits.shape[0], dtype=torch.long).cuda() 262 | 263 | # dequeue and enqueue 264 | self._dequeue_and_enqueue(k) 265 | self._dequeue_and_enqueue_motion(k_motion) 266 | self._dequeue_and_enqueue_bone(k_bone) 267 | 268 | return logits, logits_motion, logits_bone, labels, loss_cmd * self.cmd_weight -------------------------------------------------------------------------------- /data_gen/resource/NTU_RGBD120_samples_with_missing_skeletons.txt: -------------------------------------------------------------------------------- 1 | S001C002P005R002A008 2 | S001C002P006R001A008 3 | S001C003P002R001A055 4 | S001C003P002R002A012 5 | S001C003P005R002A004 6 | S001C003P005R002A005 7 | S001C003P005R002A006 8 | S001C003P006R002A008 9 | S002C002P011R002A030 10 | S002C003P008R001A020 11 | S002C003P010R002A010 12 | S002C003P011R002A007 13 | S002C003P011R002A011 14 | S002C003P014R002A007 15 | S003C001P019R001A055 16 | S003C002P002R002A055 17 | S003C002P018R002A055 18 | S003C003P002R001A055 19 | S003C003P016R001A055 20 | S003C003P018R002A024 21 | S004C002P003R001A013 22 | S004C002P008R001A009 23 | S004C002P020R001A003 24 | S004C002P020R001A004 25 | S004C002P020R001A012 26 | S004C002P020R001A020 27 | S004C002P020R001A021 28 | S004C002P020R001A036 29 | S005C002P004R001A001 30 | S005C002P004R001A003 31 | S005C002P010R001A016 32 | S005C002P010R001A017 33 | S005C002P010R001A048 34 | S005C002P010R001A049 35 | S005C002P016R001A009 36 | S005C002P016R001A010 37 | S005C002P018R001A003 38 | S005C002P018R001A028 39 | S005C002P018R001A029 40 | S005C003P016R002A009 41 | S005C003P018R002A013 42 | S005C003P021R002A057 43 | S006C001P001R002A055 44 | S006C002P007R001A005 45 | S006C002P007R001A006 46 | S006C002P016R001A043 47 | S006C002P016R001A051 48 | S006C002P016R001A052 49 | S006C002P022R001A012 50 | S006C002P023R001A020 51 | S006C002P023R001A021 52 | S006C002P023R001A022 53 | S006C002P023R001A023 54 | S006C002P024R001A018 55 | S006C002P024R001A019 56 | S006C003P001R002A013 57 | S006C003P007R002A009 58 | S006C003P007R002A010 59 | S006C003P007R002A025 60 | S006C003P016R001A060 61 | S006C003P017R001A055 62 | S006C003P017R002A013 63 | S006C003P017R002A014 64 | S006C003P017R002A015 65 | S006C003P022R002A013 66 | S007C001P018R002A050 67 | S007C001P025R002A051 68 | S007C001P028R001A050 69 | S007C001P028R001A051 70 | S007C001P028R001A052 71 | S007C002P008R002A008 72 | S007C002P015R002A055 73 | S007C002P026R001A008 74 | S007C002P026R001A009 75 | S007C002P026R001A010 76 | S007C002P026R001A011 77 | S007C002P026R001A012 78 | S007C002P026R001A050 79 | S007C002P027R001A011 80 | S007C002P027R001A013 81 | S007C002P028R002A055 82 | S007C003P007R001A002 83 | S007C003P007R001A004 84 | S007C003P019R001A060 85 | S007C003P027R002A001 86 | S007C003P027R002A002 87 | S007C003P027R002A003 88 | S007C003P027R002A004 89 | S007C003P027R002A005 90 | S007C003P027R002A006 91 | S007C003P027R002A007 92 | S007C003P027R002A008 93 | S007C003P027R002A009 94 | S007C003P027R002A010 95 | S007C003P027R002A011 96 | S007C003P027R002A012 97 | S007C003P027R002A013 98 | S008C002P001R001A009 99 | S008C002P001R001A010 100 | S008C002P001R001A014 101 | S008C002P001R001A015 102 | S008C002P001R001A016 103 | S008C002P001R001A018 104 | S008C002P001R001A019 105 | S008C002P008R002A059 106 | S008C002P025R001A060 107 | S008C002P029R001A004 108 | S008C002P031R001A005 109 | S008C002P031R001A006 110 | S008C002P032R001A018 111 | S008C002P034R001A018 112 | S008C002P034R001A019 113 | S008C002P035R001A059 114 | S008C002P035R002A002 115 | S008C002P035R002A005 116 | S008C003P007R001A009 117 | S008C003P007R001A016 118 | S008C003P007R001A017 119 | S008C003P007R001A018 120 | S008C003P007R001A019 121 | S008C003P007R001A020 122 | S008C003P007R001A021 123 | S008C003P007R001A022 124 | S008C003P007R001A023 125 | S008C003P007R001A025 126 | S008C003P007R001A026 127 | S008C003P007R001A028 128 | S008C003P007R001A029 129 | S008C003P007R002A003 130 | S008C003P008R002A050 131 | S008C003P025R002A002 132 | S008C003P025R002A011 133 | S008C003P025R002A012 134 | S008C003P025R002A016 135 | S008C003P025R002A020 136 | S008C003P025R002A022 137 | S008C003P025R002A023 138 | S008C003P025R002A030 139 | S008C003P025R002A031 140 | S008C003P025R002A032 141 | S008C003P025R002A033 142 | S008C003P025R002A049 143 | S008C003P025R002A060 144 | S008C003P031R001A001 145 | S008C003P031R002A004 146 | S008C003P031R002A014 147 | S008C003P031R002A015 148 | S008C003P031R002A016 149 | S008C003P031R002A017 150 | S008C003P032R002A013 151 | S008C003P033R002A001 152 | S008C003P033R002A011 153 | S008C003P033R002A012 154 | S008C003P034R002A001 155 | S008C003P034R002A012 156 | S008C003P034R002A022 157 | S008C003P034R002A023 158 | S008C003P034R002A024 159 | S008C003P034R002A044 160 | S008C003P034R002A045 161 | S008C003P035R002A016 162 | S008C003P035R002A017 163 | S008C003P035R002A018 164 | S008C003P035R002A019 165 | S008C003P035R002A020 166 | S008C003P035R002A021 167 | S009C002P007R001A001 168 | S009C002P007R001A003 169 | S009C002P007R001A014 170 | S009C002P008R001A014 171 | S009C002P015R002A050 172 | S009C002P016R001A002 173 | S009C002P017R001A028 174 | S009C002P017R001A029 175 | S009C003P017R002A030 176 | S009C003P025R002A054 177 | S010C001P007R002A020 178 | S010C002P016R002A055 179 | S010C002P017R001A005 180 | S010C002P017R001A018 181 | S010C002P017R001A019 182 | S010C002P019R001A001 183 | S010C002P025R001A012 184 | S010C003P007R002A043 185 | S010C003P008R002A003 186 | S010C003P016R001A055 187 | S010C003P017R002A055 188 | S011C001P002R001A008 189 | S011C001P018R002A050 190 | S011C002P008R002A059 191 | S011C002P016R002A055 192 | S011C002P017R001A020 193 | S011C002P017R001A021 194 | S011C002P018R002A055 195 | S011C002P027R001A009 196 | S011C002P027R001A010 197 | S011C002P027R001A037 198 | S011C003P001R001A055 199 | S011C003P002R001A055 200 | S011C003P008R002A012 201 | S011C003P015R001A055 202 | S011C003P016R001A055 203 | S011C003P019R001A055 204 | S011C003P025R001A055 205 | S011C003P028R002A055 206 | S012C001P019R001A060 207 | S012C001P019R002A060 208 | S012C002P015R001A055 209 | S012C002P017R002A012 210 | S012C002P025R001A060 211 | S012C003P008R001A057 212 | S012C003P015R001A055 213 | S012C003P015R002A055 214 | S012C003P016R001A055 215 | S012C003P017R002A055 216 | S012C003P018R001A055 217 | S012C003P018R001A057 218 | S012C003P019R002A011 219 | S012C003P019R002A012 220 | S012C003P025R001A055 221 | S012C003P027R001A055 222 | S012C003P027R002A009 223 | S012C003P028R001A035 224 | S012C003P028R002A055 225 | S013C001P015R001A054 226 | S013C001P017R002A054 227 | S013C001P018R001A016 228 | S013C001P028R001A040 229 | S013C002P015R001A054 230 | S013C002P017R002A054 231 | S013C002P028R001A040 232 | S013C003P008R002A059 233 | S013C003P015R001A054 234 | S013C003P017R002A054 235 | S013C003P025R002A022 236 | S013C003P027R001A055 237 | S013C003P028R001A040 238 | S014C001P027R002A040 239 | S014C002P015R001A003 240 | S014C002P019R001A029 241 | S014C002P025R002A059 242 | S014C002P027R002A040 243 | S014C002P039R001A050 244 | S014C003P007R002A059 245 | S014C003P015R002A055 246 | S014C003P019R002A055 247 | S014C003P025R001A048 248 | S014C003P027R002A040 249 | S015C001P008R002A040 250 | S015C001P016R001A055 251 | S015C001P017R001A055 252 | S015C001P017R002A055 253 | S015C002P007R001A059 254 | S015C002P008R001A003 255 | S015C002P008R001A004 256 | S015C002P008R002A040 257 | S015C002P015R001A002 258 | S015C002P016R001A001 259 | S015C002P016R002A055 260 | S015C003P008R002A007 261 | S015C003P008R002A011 262 | S015C003P008R002A012 263 | S015C003P008R002A028 264 | S015C003P008R002A040 265 | S015C003P025R002A012 266 | S015C003P025R002A017 267 | S015C003P025R002A020 268 | S015C003P025R002A021 269 | S015C003P025R002A030 270 | S015C003P025R002A033 271 | S015C003P025R002A034 272 | S015C003P025R002A036 273 | S015C003P025R002A037 274 | S015C003P025R002A044 275 | S016C001P019R002A040 276 | S016C001P025R001A011 277 | S016C001P025R001A012 278 | S016C001P025R001A060 279 | S016C001P040R001A055 280 | S016C001P040R002A055 281 | S016C002P008R001A011 282 | S016C002P019R002A040 283 | S016C002P025R002A012 284 | S016C003P008R001A011 285 | S016C003P008R002A002 286 | S016C003P008R002A003 287 | S016C003P008R002A004 288 | S016C003P008R002A006 289 | S016C003P008R002A009 290 | S016C003P019R002A040 291 | S016C003P039R002A016 292 | S017C001P016R002A031 293 | S017C002P007R001A013 294 | S017C002P008R001A009 295 | S017C002P015R001A042 296 | S017C002P016R002A031 297 | S017C002P016R002A055 298 | S017C003P007R002A013 299 | S017C003P008R001A059 300 | S017C003P016R002A031 301 | S017C003P017R001A055 302 | S017C003P020R001A059 303 | S019C001P046R001A075 304 | S019C002P042R001A094 305 | S019C002P042R001A095 306 | S019C002P042R001A096 307 | S019C002P042R001A097 308 | S019C002P042R001A098 309 | S019C002P042R001A099 310 | S019C002P042R001A100 311 | S019C002P042R001A101 312 | S019C002P042R001A102 313 | S019C002P049R002A074 314 | S019C002P049R002A079 315 | S019C002P051R001A061 316 | S019C003P046R001A061 317 | S019C003P046R002A061 318 | S019C003P046R002A062 319 | S020C002P041R001A063 320 | S020C002P041R001A064 321 | S020C002P044R001A063 322 | S020C002P044R001A064 323 | S020C002P044R001A066 324 | S020C002P044R001A084 325 | S020C002P054R001A081 326 | S021C001P059R001A108 327 | S021C002P055R001A065 328 | S021C002P055R001A092 329 | S021C002P055R001A093 330 | S021C002P057R001A064 331 | S021C002P058R001A063 332 | S021C002P058R001A064 333 | S021C002P059R001A074 334 | S021C002P059R001A075 335 | S021C002P059R001A076 336 | S021C002P059R001A077 337 | S021C002P059R001A078 338 | S021C002P059R001A079 339 | S021C003P057R002A078 340 | S021C003P057R002A079 341 | S021C003P057R002A094 342 | S022C002P061R001A113 343 | S022C003P061R002A061 344 | S022C003P061R002A062 345 | S022C003P063R002A061 346 | S022C003P063R002A062 347 | S022C003P063R002A063 348 | S022C003P063R002A064 349 | S022C003P063R002A078 350 | S022C003P064R002A061 351 | S022C003P064R002A062 352 | S022C003P065R002A061 353 | S022C003P065R002A062 354 | S022C003P065R002A119 355 | S022C003P067R002A064 356 | S023C002P055R001A114 357 | S023C002P055R002A092 358 | S023C002P059R001A075 359 | S023C002P063R001A075 360 | S023C003P055R002A093 361 | S023C003P055R002A094 362 | S023C003P061R002A061 363 | S023C003P064R001A092 364 | S024C001P063R001A109 365 | S024C002P062R002A074 366 | S024C002P067R001A100 367 | S024C002P067R001A101 368 | S024C002P067R001A102 369 | S024C002P067R001A103 370 | S024C003P062R002A074 371 | S024C003P063R002A061 372 | S024C003P063R002A062 373 | S025C001P055R002A119 374 | S025C003P056R002A119 375 | S025C003P059R002A115 376 | S026C002P044R001A061 377 | S026C002P044R001A062 378 | S026C002P070R001A092 379 | S026C003P069R002A075 380 | S026C003P074R002A061 381 | S026C003P074R002A062 382 | S026C003P075R001A117 383 | S026C003P075R001A118 384 | S027C001P082R001A063 385 | S027C002P044R002A092 386 | S027C002P079R001A061 387 | S027C002P079R001A062 388 | S027C002P079R001A063 389 | S027C002P079R001A064 390 | S027C002P082R001A092 391 | S027C002P084R001A061 392 | S027C002P084R001A062 393 | S027C002P086R001A061 394 | S027C003P041R002A087 395 | S027C003P080R002A061 396 | S027C003P082R002A061 397 | S027C003P082R002A062 398 | S027C003P086R002A061 399 | S027C003P086R002A062 400 | S028C001P087R001A061 401 | S028C002P041R001A091 402 | S028C002P087R001A061 403 | S028C003P042R002A064 404 | S028C003P046R002A063 405 | S028C003P046R002A066 406 | S028C003P046R002A067 407 | S028C003P046R002A068 408 | S028C003P046R002A069 409 | S028C003P046R002A070 410 | S028C003P046R002A071 411 | S028C003P046R002A072 412 | S028C003P046R002A074 413 | S028C003P046R002A075 414 | S028C003P046R002A077 415 | S028C003P046R002A081 416 | S028C003P046R002A082 417 | S028C003P046R002A083 418 | S028C003P046R002A084 419 | S028C003P048R002A061 420 | S028C003P048R002A062 421 | S028C003P048R002A073 422 | S028C003P073R002A073 423 | S028C003P087R001A061 424 | S028C003P087R002A061 425 | S028C003P087R002A062 426 | S029C001P043R002A092 427 | S029C001P044R002A092 428 | S029C001P048R001A073 429 | S029C001P089R001A063 430 | S029C002P041R001A074 431 | S029C002P041R001A084 432 | S029C002P044R001A091 433 | S029C002P048R001A075 434 | S029C002P048R001A081 435 | S029C002P074R001A081 436 | S029C002P074R001A095 437 | S029C002P074R001A096 438 | S029C002P080R001A091 439 | S029C002P088R001A066 440 | S029C002P089R001A065 441 | S029C002P090R001A067 442 | S029C003P008R002A065 443 | S029C003P008R002A067 444 | S029C003P041R001A089 445 | S029C003P043R001A080 446 | S029C003P043R001A092 447 | S029C003P043R001A105 448 | S029C003P043R002A085 449 | S029C003P043R002A086 450 | S029C003P044R002A106 451 | S029C003P048R001A065 452 | S029C003P048R002A073 453 | S029C003P048R002A074 454 | S029C003P048R002A075 455 | S029C003P048R002A076 456 | S029C003P048R002A092 457 | S029C003P048R002A094 458 | S029C003P051R002A073 459 | S029C003P051R002A074 460 | S029C003P051R002A075 461 | S029C003P051R002A076 462 | S029C003P051R002A077 463 | S029C003P051R002A078 464 | S029C003P051R002A079 465 | S029C003P051R002A080 466 | S029C003P051R002A081 467 | S029C003P051R002A082 468 | S029C003P051R002A083 469 | S029C003P051R002A084 470 | S029C003P051R002A085 471 | S029C003P051R002A086 472 | S029C003P051R002A110 473 | S029C003P067R001A098 474 | S029C003P074R002A110 475 | S029C003P080R002A066 476 | S029C003P088R002A078 477 | S029C003P089R001A075 478 | S029C003P089R002A061 479 | S029C003P089R002A062 480 | S029C003P089R002A063 481 | S029C003P090R002A092 482 | S029C003P090R002A095 483 | S030C002P091R002A091 484 | S030C002P091R002A092 485 | S030C002P091R002A093 486 | S030C002P091R002A094 487 | S030C002P091R002A095 488 | S030C002P091R002A096 489 | S030C002P091R002A097 490 | S030C002P091R002A098 491 | S030C002P091R002A099 492 | S030C002P091R002A100 493 | S030C002P091R002A101 494 | S030C002P091R002A102 495 | S030C002P091R002A103 496 | S030C002P091R002A104 497 | S030C002P091R002A105 498 | S030C003P044R002A065 499 | S030C003P044R002A081 500 | S030C003P044R002A084 501 | S031C002P042R001A111 502 | S031C002P051R001A061 503 | S031C002P051R001A062 504 | S031C002P067R001A067 505 | S031C002P067R001A068 506 | S031C002P067R001A069 507 | S031C002P067R001A070 508 | S031C002P067R001A071 509 | S031C002P067R001A072 510 | S031C002P082R001A075 511 | S031C002P082R002A117 512 | S031C002P097R001A061 513 | S031C002P097R001A062 514 | S031C003P043R002A074 515 | S031C003P043R002A075 516 | S031C003P044R002A094 517 | S031C003P082R002A067 518 | S031C003P082R002A068 519 | S031C003P082R002A069 520 | S031C003P082R002A070 521 | S031C003P082R002A071 522 | S031C003P082R002A072 523 | S031C003P082R002A073 524 | S031C003P082R002A075 525 | S031C003P082R002A076 526 | S031C003P082R002A077 527 | S031C003P082R002A084 528 | S031C003P082R002A085 529 | S031C003P082R002A086 530 | S032C002P067R001A092 531 | S032C003P067R002A066 532 | S032C003P067R002A067 533 | S032C003P067R002A075 534 | S032C003P067R002A076 535 | S032C003P067R002A077 536 | -------------------------------------------------------------------------------- /pretrain_moco_cmd.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import math 3 | import os 4 | import random 5 | import shutil 6 | import time 7 | import numpy as np 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.parallel 12 | import torch.backends.cudnn as cudnn 13 | import torch.optim 14 | import torch.utils.data 15 | 16 | import torch.distributed as dist 17 | 18 | 19 | import moco.builder_cmd 20 | from torch.utils.tensorboard import SummaryWriter 21 | from dataset import get_pretraining_set_intra 22 | 23 | 24 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 25 | parser.add_argument('-j', '--workers', default=32, type=int, metavar='N', 26 | help='number of data loading workers (default: 32)') 27 | parser.add_argument('--epochs', default=200, type=int, metavar='N', 28 | help='number of total epochs to run') 29 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', 30 | help='manual epoch number (useful on restarts)') 31 | parser.add_argument('-b', '--batch-size', default=256, type=int, 32 | metavar='N', 33 | help='mini-batch size (default: 256), this is the total ' 34 | 'batch size of all GPUs on the current node when ' 35 | 'using Data Parallel or Distributed Data Parallel') 36 | parser.add_argument('--lr', '--learning-rate', default=0.001, type=float, 37 | metavar='LR', help='initial learning rate', dest='lr') 38 | parser.add_argument('--schedule', default=[100, 160], nargs='*', type=int, 39 | help='learning rate schedule (when to drop lr by 10x)') 40 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', 41 | help='momentum of SGD solver') 42 | parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, 43 | metavar='W', help='weight decay (default: 1e-4)', 44 | dest='weight_decay') 45 | parser.add_argument('-p', '--print-freq', default=10, type=int, 46 | metavar='N', help='print frequency (default: 10)') 47 | parser.add_argument('--resume', default='', type=str, metavar='PATH', 48 | help='path to latest checkpoint (default: none)') 49 | 50 | # Distributed 51 | parser.add_argument('--local_rank', default=-1, type=int, 52 | help='node rank for distributed training') 53 | 54 | parser.add_argument('--seed', default=42, type=int, 55 | help='seed for initializing training. ') 56 | parser.add_argument('--checkpoint-path', default='./checkpoints', type=str) 57 | parser.add_argument('--skeleton-representation', type=str, 58 | help='input skeleton-representation for self supervised training (image-based or graph-based or seq-based)') 59 | parser.add_argument('--pre-dataset', default='ntu60', type=str, 60 | help='which dataset to use for self supervised training (ntu60 or ntu120)') 61 | parser.add_argument('--protocol', default='cross_subject', type=str, 62 | help='traiining protocol cross_view/cross_subject/cross_setup') 63 | 64 | # contrast specific configs: 65 | parser.add_argument('--contrast-dim', default=128, type=int, 66 | help='feature dimension (default: 128)') 67 | parser.add_argument('--contrast-k', default=32768, type=int, 68 | help='queue size; number of negative keys (default: 16384)') 69 | parser.add_argument('--contrast-m', default=0.999, type=float, 70 | help='contrast momentum of updating key encoder (default: 0.999)') 71 | parser.add_argument('--contrast-t', default=0.07, type=float, 72 | help='softmax temperature (default: 0.07)') 73 | parser.add_argument('--teacher-t', default=0.05, type=float, 74 | help='softmax temperature (default: 0.05)') 75 | parser.add_argument('--student-t', default=0.1, type=float, 76 | help='softmax temperature (default: 0.1)') 77 | parser.add_argument('--cmd-weight', default=1.0, type=float, 78 | help='weight of sim loss (default: 1.0)') 79 | parser.add_argument('--topk', default=1024, type=int, 80 | help='number of contrastive context') 81 | parser.add_argument('--mlp', action='store_true', 82 | help='use mlp head') 83 | parser.add_argument('--cos', action='store_true', 84 | help='use cosine lr schedule') 85 | 86 | def init_seeds(seed): 87 | random.seed(seed) 88 | torch.manual_seed(seed) 89 | torch.cuda.manual_seed(seed) 90 | torch.cuda.manual_seed_all(seed) # if you are using multi-GPU. 91 | cudnn.deterministic = True 92 | cudnn.benchmark = True 93 | 94 | def main(): 95 | args = parser.parse_args() 96 | 97 | if args.local_rank != -1: 98 | dist.init_process_group(backend='nccl') 99 | torch.cuda.set_device(args.local_rank) 100 | else: 101 | torch.cuda.set_device(0) 102 | 103 | 104 | # Simply call main_worker function 105 | main_worker(args) 106 | 107 | def main_worker(args): 108 | if args.local_rank != -1: 109 | init_seeds(args.seed + args.local_rank) 110 | else: 111 | init_seeds(args.seed) 112 | 113 | # pretraining dataset and protocol 114 | from options import options_pretraining as options 115 | if args.pre_dataset == 'ntu60' and args.protocol == 'cross_view': 116 | opts = options.opts_ntu_60_cross_view() 117 | elif args.pre_dataset == 'ntu60' and args.protocol == 'cross_subject': 118 | opts = options.opts_ntu_60_cross_subject() 119 | elif args.pre_dataset == 'ntu120' and args.protocol == 'cross_setup': 120 | opts = options.opts_ntu_120_cross_setup() 121 | elif args.pre_dataset == 'ntu120' and args.protocol == 'cross_subject': 122 | opts = options.opts_ntu_120_cross_subject() 123 | elif args.pre_dataset == 'pku_v2' and args.protocol == 'cross_view': 124 | opts = options.opts_pku_v2_cross_view() 125 | elif args.pre_dataset == 'pku_v2' and args.protocol == 'cross_subject': 126 | opts = options.opts_pku_v2_cross_subject() 127 | 128 | opts.train_feeder_args['input_representation'] = args.skeleton_representation 129 | 130 | # create model 131 | print("=> creating model") 132 | 133 | model = moco.builder_cmd.MoCo(args.skeleton_representation, opts.bi_gru_model_args, 134 | args.contrast_dim, args.contrast_k, args.contrast_m, args.contrast_t, 135 | args.teacher_t, args.student_t, args.cmd_weight, args.topk, args.mlp) 136 | print("options",opts.train_feeder_args) 137 | print(model) 138 | 139 | model.cuda() 140 | if args.local_rank != -1: 141 | model = nn.SyncBatchNorm.convert_sync_batchnorm(model) 142 | model = nn.parallel.distributed.DistributedDataParallel(model, device_ids=[args.local_rank], find_unused_parameters=True) 143 | print('Distributed data parallel model used') 144 | 145 | # define loss function (criterion) and optimizer 146 | criterion = nn.CrossEntropyLoss().cuda() 147 | 148 | optimizer = torch.optim.SGD(model.parameters(), args.lr, 149 | momentum=args.momentum, 150 | weight_decay=args.weight_decay) 151 | 152 | # optionally resume from a checkpoint 153 | if args.resume: 154 | if os.path.isfile(args.resume): 155 | print("=> loading checkpoint '{}'".format(args.resume)) 156 | # Map model to be loaded to specified single gpu. 157 | checkpoint = torch.load(args.resume, map_location='cpu') 158 | args.start_epoch = checkpoint['epoch'] 159 | model.load_state_dict(checkpoint['state_dict']) 160 | optimizer.load_state_dict(checkpoint['optimizer']) 161 | print("=> loaded checkpoint '{}' (epoch {})" 162 | .format(args.resume, checkpoint['epoch'])) 163 | else: 164 | print("=> no checkpoint found at '{}'".format(args.resume)) 165 | 166 | ## Data loading code 167 | train_dataset = get_pretraining_set_intra(opts) 168 | 169 | if args.local_rank != -1: 170 | train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) 171 | else: 172 | train_sampler = None 173 | 174 | def worker_init_fn(worker_id): 175 | return np.random.seed(torch.initial_seed()%(2**31) + worker_id) # for single gpu 176 | train_loader = torch.utils.data.DataLoader( 177 | train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, 178 | worker_init_fn=worker_init_fn, pin_memory=True, sampler=train_sampler, drop_last=True) 179 | 180 | writer = SummaryWriter(args.checkpoint_path) 181 | 182 | for epoch in range(args.start_epoch, args.epochs): 183 | if args.local_rank != -1: 184 | train_sampler.set_epoch(epoch) 185 | 186 | adjust_learning_rate(optimizer, epoch, args) 187 | 188 | # train for one epoch 189 | loss_joint, loss_motion, loss_bone, loss_sim, top1_joint, top1_motion, top1_bone = train(train_loader, model, criterion, optimizer, epoch, args) 190 | 191 | if args.local_rank in [-1, 0]: 192 | writer.add_scalar('loss_joint', loss_joint.avg, global_step=epoch) 193 | writer.add_scalar('loss_motion', loss_motion.avg, global_step=epoch) 194 | writer.add_scalar('loss_bone', loss_bone.avg, global_step=epoch) 195 | writer.add_scalar('loss_sim', loss_sim.avg, global_step=epoch) 196 | writer.add_scalar('top1_joint',top1_joint.avg, global_step=epoch) 197 | writer.add_scalar('top1_motion',top1_motion.avg, global_step=epoch) 198 | writer.add_scalar('top1_bone',top1_bone.avg, global_step=epoch) 199 | 200 | if epoch % 10 == 0: 201 | save_checkpoint({ 202 | 'epoch': epoch + 1, 203 | 'state_dict': model.state_dict(), 204 | 'optimizer' : optimizer.state_dict(), 205 | }, is_best=False, filename=args.checkpoint_path+'/checkpoint_{:04d}.pth.tar'.format(epoch)) 206 | 207 | 208 | def train(train_loader, model, criterion, optimizer, epoch, args): 209 | batch_time = AverageMeter('Time', ':6.3f') 210 | data_time = AverageMeter('Data', ':6.3f') 211 | losses = AverageMeter('Loss', ':6.3f') 212 | losses_joint = AverageMeter('Loss Joint', ':6.3f') 213 | losses_motion = AverageMeter('Loss Motion', ':6.3f') 214 | losses_bone = AverageMeter('Loss Bone', ':6.3f') 215 | losses_sim = AverageMeter('Loss Sim', ':6.3f') 216 | top1_joint = AverageMeter('Acc Joint@1', ':6.2f') 217 | top1_motion = AverageMeter('Acc Motion@1', ':6.2f') 218 | top1_bone = AverageMeter('Acc Bone@1', ':6.2f') 219 | progress = ProgressMeter( 220 | len(train_loader), 221 | [batch_time, losses_joint, losses_motion, losses_bone, losses_sim, top1_joint, top1_motion, top1_bone], 222 | prefix="Epoch: [{}] Lr_rate [{}]".format(epoch,optimizer.param_groups[0]['lr'])) 223 | 224 | # switch to train mode 225 | model.train() 226 | 227 | end = time.time() 228 | for i, (input_v1, input_v2) in enumerate(train_loader): 229 | # measure data loading time 230 | data_time.update(time.time() - end) 231 | 232 | inputs= [input_v1,input_v2] 233 | inputs[0] =inputs[0].float().cuda(non_blocking=True) 234 | inputs[1] =inputs[1].float().cuda(non_blocking=True) 235 | 236 | # compute output 237 | output, output_motion, output_bone, target, loss_sim = model(inputs[0], inputs[1]) 238 | 239 | batch_size = output.size(0) 240 | 241 | # compute loss 242 | loss_joint = criterion(output, target) 243 | loss_motion = criterion(output_motion, target) 244 | loss_bone = criterion(output_bone, target) 245 | 246 | loss = loss_joint + loss_motion + loss_bone + loss_sim 247 | 248 | losses.update(loss.item(), batch_size) 249 | losses_joint.update(loss_joint.item(), batch_size) 250 | losses_motion.update(loss_motion.item(), batch_size) 251 | losses_bone.update(loss_bone.item(), batch_size) 252 | losses_sim.update(loss_sim.item(), batch_size) 253 | 254 | # measure accuracy of model m1 and m2 individually 255 | # acc1/acc5 are (K+1)-way contrast classifier accuracy 256 | # measure accuracy and record loss 257 | acc1_joint, _ = accuracy(output, target, topk=(1, 5)) 258 | acc1_motion, _ = accuracy(output_motion, target, topk=(1, 5)) 259 | acc1_bone, _ = accuracy(output_bone, target, topk=(1, 5)) 260 | top1_joint.update(acc1_joint[0], batch_size) 261 | top1_motion.update(acc1_motion[0], batch_size) 262 | top1_bone.update(acc1_bone[0], batch_size) 263 | 264 | #print("input output size",output.size(),images[0].size(),half_size) 265 | 266 | # compute gradient and do SGD step 267 | optimizer.zero_grad() 268 | loss.backward() 269 | optimizer.step() 270 | 271 | # measure elapsed time 272 | batch_time.update(time.time() - end) 273 | end = time.time() 274 | 275 | if i % args.print_freq == 0: 276 | progress.display(i) 277 | 278 | return losses_joint, losses_motion, losses_bone, losses_sim, top1_joint, top1_motion, top1_bone 279 | 280 | def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): 281 | torch.save(state, filename) 282 | if is_best: 283 | shutil.copyfile(filename, 'model_best.pth.tar') 284 | 285 | 286 | class AverageMeter(object): 287 | """Computes and stores the average and current value""" 288 | def __init__(self, name, fmt=':f'): 289 | self.name = name 290 | self.fmt = fmt 291 | self.reset() 292 | 293 | def reset(self): 294 | self.val = 0 295 | self.avg = 0 296 | self.sum = 0 297 | self.count = 0 298 | 299 | def update(self, val, n=1): 300 | self.val = val 301 | self.sum += val * n 302 | self.count += n 303 | self.avg = self.sum / self.count 304 | 305 | def __str__(self): 306 | fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' 307 | return fmtstr.format(**self.__dict__) 308 | 309 | 310 | class ProgressMeter(object): 311 | def __init__(self, num_batches, meters, prefix=""): 312 | self.batch_fmtstr = self._get_batch_fmtstr(num_batches) 313 | self.meters = meters 314 | self.prefix = prefix 315 | 316 | def display(self, batch): 317 | entries = [self.prefix + self.batch_fmtstr.format(batch)] 318 | entries += [str(meter) for meter in self.meters] 319 | print('\t'.join(entries)) 320 | 321 | def _get_batch_fmtstr(self, num_batches): 322 | num_digits = len(str(num_batches // 1)) 323 | fmt = '{:' + str(num_digits) + 'd}' 324 | return '[' + fmt + '/' + fmt.format(num_batches) + ']' 325 | 326 | 327 | def adjust_learning_rate(optimizer, epoch, args): 328 | """Decay the learning rate based on schedule""" 329 | lr = args.lr 330 | if args.cos: # cosine lr schedule 331 | lr *= 0.5 * (1. + math.cos(math.pi * epoch / args.epochs)) 332 | else: # stepwise lr schedule 333 | for milestone in args.schedule: 334 | lr *= 0.1 if epoch >= milestone else 1. 335 | for param_group in optimizer.param_groups: 336 | param_group['lr'] = lr 337 | 338 | 339 | def accuracy(output, target, topk=(1,)): 340 | """Computes the accuracy over the k top predictions for the specified values of k""" 341 | with torch.no_grad(): 342 | maxk = max(topk) 343 | batch_size = target.size(0) 344 | 345 | _, pred = output.topk(maxk, 1, True, True) 346 | pred = pred.t() 347 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 348 | 349 | res = [] 350 | for k in topk: 351 | correct_k = correct[:k].contiguous().view(-1).float().sum(0, keepdim=True) 352 | res.append(correct_k.mul_(100.0 / batch_size)) 353 | return res 354 | 355 | 356 | if __name__ == '__main__': 357 | main() 358 | -------------------------------------------------------------------------------- /action_retrieval_cmd.py: -------------------------------------------------------------------------------- 1 | from dataset import get_finetune_training_set, get_finetune_validation_set 2 | import argparse 3 | import os 4 | import random 5 | import warnings 6 | 7 | from tqdm import tqdm 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.parallel 12 | import torch.backends.cudnn as cudnn 13 | import torch.optim as optim 14 | import torch.utils.data 15 | from torch.utils.data import Dataset, DataLoader 16 | import torch.utils.data.distributed 17 | import numpy as np 18 | from sklearn.neighbors import KNeighborsClassifier 19 | from sklearn import preprocessing 20 | from sklearn.metrics import accuracy_score 21 | 22 | import moco.builder_cmd 23 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 24 | 25 | # change for action recogniton 26 | 27 | 28 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 29 | parser.add_argument('-j', '--workers', default=32, type=int, metavar='N', 30 | help='number of data loading workers (default: 32)') 31 | parser.add_argument('--epochs', default=80, type=int, metavar='N', 32 | help='number of total epochs to run') 33 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', 34 | help='manual epoch number (useful on restarts)') 35 | parser.add_argument('-b', '--batch-size', default=256, type=int, 36 | metavar='N', 37 | help='mini-batch size (default: 256), this is the total ' 38 | 'batch size of all GPUs on the current node when ' 39 | 'using Data Parallel or Distributed Data Parallel') 40 | parser.add_argument('--lr', '--learning-rate', default=30., type=float, 41 | metavar='LR', help='initial learning rate', dest='lr') 42 | parser.add_argument('--schedule', default=[50, 70, ], nargs='*', type=int, 43 | help='learning rate schedule (when to drop lr by a ratio)') 44 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', 45 | help='momentum') 46 | parser.add_argument('--wd', '--weight-decay', default=0., type=float, 47 | metavar='W', help='weight decay (default: 0.)', 48 | dest='weight_decay') 49 | parser.add_argument('-p', '--print-freq', default=10, type=int, 50 | metavar='N', help='print frequency (default: 10)') 51 | parser.add_argument('--resume', default='', type=str, metavar='PATH', 52 | help='path to latest checkpoint (default: none)') 53 | parser.add_argument('--seed', default=None, type=int, 54 | help='seed for initializing training. ') 55 | parser.add_argument('--gpu', default=None, type=int, 56 | help='GPU id to use.') 57 | 58 | parser.add_argument('--pretrained', default='', type=str, 59 | help='path to moco pretrained checkpoint') 60 | parser.add_argument('--finetune-dataset', default='ntu60', type=str, 61 | help='which dataset to use for finetuning') 62 | 63 | parser.add_argument('--protocol', default='cross_view', type=str, 64 | help='traiining protocol of ntu') 65 | 66 | parser.add_argument('--finetune-skeleton-representation', default='seq-based', type=str, 67 | help='which skeleton-representation to use for downstream training') 68 | parser.add_argument('--pretrain-skeleton-representation', default='seq-based', type=str, 69 | help='which skeleton-representation where used for pre-training') 70 | parser.add_argument('--knn-neighbours', default=None, type=int, 71 | help='number of neighbours used for KNN.') 72 | 73 | best_acc1 = 0 74 | 75 | # initilize weight 76 | def weights_init_gru(model): 77 | with torch.no_grad(): 78 | for child in list(model.children()): 79 | print("init ", child) 80 | for param in list(child.parameters()): 81 | if param.dim() == 2: 82 | nn.init.xavier_uniform_(param) 83 | print('PC weight initial finished!') 84 | 85 | 86 | def load_pretrained(model, pretrained): 87 | if os.path.isfile(pretrained): 88 | print("=> loading checkpoint '{}'".format(pretrained)) 89 | checkpoint = torch.load(pretrained, map_location="cpu") 90 | 91 | # rename moco pre-trained keys 92 | state_dict = checkpoint['state_dict'] 93 | for k in list(state_dict.keys()): 94 | # retain only encoder_q up to before the embedding layer 95 | if not k.startswith('encoder_q'): 96 | del state_dict[k] 97 | elif '.fc' in k: 98 | del state_dict[k] 99 | else: 100 | pass 101 | 102 | msg = model.load_state_dict(state_dict, strict=False) 103 | print("message", msg) 104 | assert set(msg.missing_keys) == {"encoder_q.fc.weight", "encoder_q.fc.bias", 105 | "encoder_q_motion.fc.weight", "encoder_q_motion.fc.bias", 106 | "encoder_q_bone.fc.weight", "encoder_q_bone.fc.bias"} 107 | 108 | print("=> loaded pre-trained model '{}'".format(pretrained)) 109 | else: 110 | print("=> no checkpoint found at '{}'".format(pretrained)) 111 | 112 | 113 | def knn(data_train, data_test, label_train, label_test, nn=9): 114 | label_train = np.asarray(label_train) 115 | label_test = np.asarray(label_test) 116 | print("Number of KNN Neighbours = ", nn) 117 | print("training feature and labels", data_train.shape, len(label_train)) 118 | print("test feature and labels", data_test.shape, len(label_test)) 119 | 120 | Xtr_Norm = preprocessing.normalize(data_train) 121 | Xte_Norm = preprocessing.normalize(data_test) 122 | 123 | knn = KNeighborsClassifier(n_neighbors=nn, 124 | metric='cosine') # , metric='cosine'#'mahalanobis', metric_params={'V': np.cov(data_train)}) 125 | knn.fit(Xtr_Norm, label_train) 126 | pred = knn.predict(Xte_Norm) 127 | acc = accuracy_score(pred, label_test) 128 | 129 | return acc 130 | 131 | 132 | def test_extract_hidden(model, data_train, data_eval): 133 | model.eval() 134 | print("Extracting training features") 135 | label_train_list = [] 136 | hidden_array_train_list = [] 137 | for ith, (ith_data, label) in enumerate(tqdm(data_train)): 138 | input_tensor = ith_data.to(device) 139 | 140 | en_hi = model(input_tensor, view='joint', knn_eval=True) 141 | en_hi = en_hi.squeeze() 142 | #print("encoder size",en_hi.size()) 143 | 144 | label_train_list.append(label) 145 | hidden_array_train_list.append(en_hi[:, :].detach().cpu().numpy()) 146 | label_train = np.hstack(label_train_list) 147 | hidden_array_train = np.vstack(hidden_array_train_list) 148 | 149 | print("Extracting validation features") 150 | label_eval_list = [] 151 | hidden_array_eval_list = [] 152 | for ith, (ith_data, label) in enumerate(tqdm(data_eval)): 153 | 154 | input_tensor = ith_data.to(device) 155 | 156 | en_hi = model(input_tensor, view='joint', knn_eval=True) 157 | en_hi = en_hi.squeeze() 158 | 159 | label_eval_list.append(label) 160 | hidden_array_eval_list.append(en_hi[:, :].detach().cpu().numpy()) 161 | label_eval = np.hstack(label_eval_list) 162 | hidden_array_eval = np.vstack(hidden_array_eval_list) 163 | 164 | return hidden_array_train, hidden_array_eval, label_train, label_eval 165 | 166 | 167 | class MyAutoDataset(Dataset): 168 | def __init__(self, data, label): 169 | 170 | self.data = data 171 | self.label = label 172 | #self.xy = zip(self.data, self.label) 173 | 174 | def __getitem__(self, index): 175 | sequence = self.data[index, :] 176 | label = self.label[index] 177 | 178 | return sequence, label 179 | 180 | def __len__(self): 181 | return len(self.label) 182 | 183 | 184 | def train_autoencoder(hidden_train, hidden_eval, label_train, 185 | label_eval, middle_size, criterion, lambda1, num_epoches): 186 | batch_size = 64 187 | #auto = autoencoder(hidden_train.shape[1], middle_size).to(device) 188 | auto = autoencoder(hidden_train.shape[1], middle_size).cuda() 189 | auto_optimizer = optim.Adam(auto.parameters(), lr=0.001) 190 | auto_scheduler = optim.lr_scheduler.LambdaLR(auto_optimizer, lr_lambda=lambda1) 191 | criterion_auto = nn.MSELoss() 192 | 193 | autodataset = MyAutoDataset(hidden_train, label_train) 194 | trainloader = DataLoader(autodataset, batch_size=batch_size, shuffle=True) 195 | 196 | autodataset = MyAutoDataset(hidden_eval, label_eval) 197 | evalloader = DataLoader(autodataset, batch_size=batch_size, shuffle=True) 198 | 199 | print("Training autoencoder") 200 | for epoch in tqdm(range(num_epoches)): 201 | for (data, label) in trainloader: 202 | # img, _ = data 203 | # img = img.view(img.size(0), -1) 204 | # img = Variable(img).cuda() 205 | #data = torch.tensor(data.clone().detach(), dtype=torch.float).to(device) 206 | # ===================forward===================== 207 | data = data.cuda() 208 | output, _ = auto(data) 209 | loss = criterion(output, data) 210 | # ===================backward==================== 211 | auto_optimizer.zero_grad() 212 | loss.backward() 213 | auto_optimizer.step() 214 | auto_scheduler.step() 215 | for (data, label) in evalloader: 216 | data = data.cuda() 217 | # ===================forward===================== 218 | output, _ = auto(data) 219 | loss_eval = criterion(output, data) 220 | # ===================log======================== 221 | # if epoch % 200 == 0: 222 | # print('epoch [{}/{}], train loss:{:.4f} eval loass:{:.4f}' 223 | # .format(epoch + 1, num_epoches, loss.item(), loss_eval.item())) 224 | 225 | # extract hidden train 226 | count = 0 227 | for (data, label) in trainloader: 228 | data = data.cuda() 229 | _, encoder_output = auto(data) 230 | 231 | if count == 0: 232 | np_out_train = encoder_output.detach().cpu().numpy() 233 | label_train = label 234 | else: 235 | label_train = np.hstack((label_train, label)) 236 | np_out_train = np.vstack((np_out_train, encoder_output.detach().cpu().numpy())) 237 | count += 1 238 | 239 | # extract hidden eval 240 | count = 0 241 | for (data, label) in evalloader: 242 | data = data.cuda() 243 | _, encoder_output = auto(data) 244 | 245 | if count == 0: 246 | np_out_eval = encoder_output.detach().cpu().numpy() 247 | label_eval = label 248 | 249 | else: 250 | label_eval = np.hstack((label_eval, label)) 251 | np_out_eval = np.vstack((np_out_eval, encoder_output.detach().cpu().numpy())) 252 | count += 1 253 | 254 | return np_out_train, np_out_eval, label_train, label_eval 255 | 256 | 257 | class autoencoder(nn.Module): 258 | def __init__(self, input_size, middle_size): 259 | super(autoencoder, self).__init__() 260 | self.encoder = nn.Sequential( 261 | nn.Linear(input_size, 1024), 262 | nn.Tanh(), 263 | nn.Linear(1024, 512), 264 | nn.Tanh(), 265 | nn.Linear(512, middle_size), 266 | nn.Tanh() 267 | ) 268 | 269 | self.decoder = nn.Sequential( 270 | nn.Linear(middle_size, 512), 271 | nn.Tanh(), 272 | nn.Linear(512, 1024), 273 | nn.Tanh(), 274 | nn.Linear(1024, input_size), 275 | ) 276 | 277 | def forward(self, x): 278 | middle_x = self.encoder(x) 279 | x = self.decoder(middle_x) 280 | return x, middle_x 281 | 282 | 283 | def clustering_knn_acc(model, train_loader, eval_loader, criterion, num_epoches=400, middle_size=125, knn_neighbours=1): 284 | hi_train, hi_eval, label_train, label_eval = test_extract_hidden(model, train_loader, eval_loader) 285 | # print(hi_train.shape) 286 | 287 | train_ae = False 288 | if train_ae: 289 | def lambda1(ith_epoch): return 0.95 ** (ith_epoch // 50) 290 | np_out_train, np_out_eval, au_l_train, au_l_eval = train_autoencoder(hi_train, hi_eval, label_train, 291 | label_eval, middle_size, criterion, lambda1, num_epoches) 292 | 293 | # print(hi_train.shape) 294 | knn_acc_1 = knn(hi_train, hi_eval, label_train, label_eval, nn=knn_neighbours) 295 | knn_acc_au = knn(np_out_train, np_out_eval, au_l_train, au_l_eval, nn=knn_neighbours) 296 | else: 297 | knn_acc_1 = knn(hi_train, hi_eval, label_train, label_eval, nn=knn_neighbours) 298 | knn_acc_au = knn_acc_1 299 | 300 | return knn_acc_1, knn_acc_au 301 | 302 | 303 | def main(): 304 | args = parser.parse_args() 305 | 306 | if args.seed is not None: 307 | random.seed(args.seed) 308 | torch.manual_seed(args.seed) 309 | cudnn.deterministic = True 310 | warnings.warn('You have chosen to seed training. ' 311 | 'This will turn on the CUDNN deterministic setting, ' 312 | 'which can slow down your training considerably! ' 313 | 'You may see unexpected behavior when restarting ' 314 | 'from checkpoints.') 315 | 316 | if args.gpu is not None: 317 | warnings.warn('You have chosen a specific GPU. This will completely ' 318 | 'disable data parallelism.') 319 | 320 | ngpus_per_node = torch.cuda.device_count() 321 | # Simply call main_worker function 322 | main_worker(0, ngpus_per_node, args) 323 | 324 | 325 | def main_worker(gpu, ngpus_per_node, args): 326 | global best_acc1 327 | args.gpu = gpu 328 | 329 | if args.gpu is not None: 330 | print("Use GPU: {} for training".format(args.gpu)) 331 | 332 | # training dataset 333 | from options import options_retrieval as options 334 | if args.finetune_dataset == 'ntu60' and args.protocol == 'cross_view': 335 | opts = options.opts_ntu_60_cross_view() 336 | elif args.finetune_dataset == 'ntu60' and args.protocol == 'cross_subject': 337 | opts = options.opts_ntu_60_cross_subject() 338 | elif args.finetune_dataset == 'ntu120' and args.protocol == 'cross_setup': 339 | opts = options.opts_ntu_120_cross_setup() 340 | elif args.finetune_dataset == 'ntu120' and args.protocol == 'cross_subject': 341 | opts = options.opts_ntu_120_cross_subject() 342 | elif args.finetune_dataset == 'pku_v2' and args.protocol == 'cross_view': 343 | opts = options.opts_pku_v2_cross_view() 344 | elif args.finetune_dataset == 'pku_v2' and args.protocol == 'cross_subject': 345 | opts = options.opts_pku_v2_cross_subject() 346 | 347 | opts.train_feeder_args['input_representation'] = args.finetune_skeleton_representation 348 | opts.test_feeder_args['input_representation'] = args.finetune_skeleton_representation 349 | 350 | # create model 351 | print("=> creating model") 352 | 353 | model = moco.builder_cmd.MoCo(args.finetune_skeleton_representation, opts.bi_gru_model_args, pretrain=False) 354 | print("options", opts.agcn_model_args, 355 | opts.train_feeder_args, opts.test_feeder_args) 356 | 357 | if args.pretrained: 358 | # freeze all layers 359 | for name, param in model.encoder_q.named_parameters(): 360 | param.requires_grad = False 361 | for name, param in model.encoder_q_motion.named_parameters(): 362 | param.requires_grad = False 363 | for name, param in model.encoder_q_bone.named_parameters(): 364 | param.requires_grad = False 365 | 366 | # load from pre-trained model 367 | load_pretrained(model, args.pretrained) 368 | 369 | if args.gpu is not None: 370 | model = model.cuda() 371 | model = nn.DataParallel(model, device_ids=None) 372 | 373 | cudnn.benchmark = True 374 | 375 | # Data loading code 376 | 377 | train_dataset = get_finetune_training_set(opts) 378 | val_dataset = get_finetune_validation_set(opts) 379 | 380 | train_sampler = None 381 | train_loader = torch.utils.data.DataLoader( 382 | train_dataset, batch_size=args.batch_size, shuffle=( 383 | train_sampler is None), 384 | num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=False) 385 | 386 | val_loader = torch.utils.data.DataLoader( 387 | val_dataset, 388 | batch_size=args.batch_size, shuffle=False, 389 | num_workers=args.workers, pin_memory=True, drop_last=False) 390 | 391 | auto_criterion = nn.MSELoss() 392 | # Extract frozen features of the pre-trained query encoder 393 | # train and evaluate a KNN classifier on extracted features 394 | acc1, acc_au = clustering_knn_acc(model, train_loader, val_loader, 395 | criterion=auto_criterion, 396 | knn_neighbours=args.knn_neighbours) 397 | 398 | print(" Knn Without AE= ", acc1, " Knn With AE=", acc_au) 399 | 400 | 401 | if __name__ == '__main__': 402 | main() 403 | -------------------------------------------------------------------------------- /action_classification_cmd.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import random 4 | import shutil 5 | import time 6 | import warnings 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.parallel 11 | import torch.backends.cudnn as cudnn 12 | import torch.optim 13 | import torch.utils.data 14 | import torch.utils.data.distributed 15 | 16 | import moco.builder_cmd 17 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 18 | 19 | # change for action recogniton 20 | from dataset import get_finetune_training_set,get_finetune_validation_set 21 | 22 | 23 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 24 | parser.add_argument('-j', '--workers', default=32, type=int, metavar='N', 25 | help='number of data loading workers (default: 32)') 26 | parser.add_argument('--epochs', default=80, type=int, metavar='N', 27 | help='number of total epochs to run') 28 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', 29 | help='manual epoch number (useful on restarts)') 30 | parser.add_argument('-b', '--batch-size', default=256, type=int, 31 | metavar='N', 32 | help='mini-batch size (default: 256), this is the total ' 33 | 'batch size of all GPUs on the current node when ' 34 | 'using Data Parallel or Distributed Data Parallel') 35 | parser.add_argument('--lr', '--learning-rate', default=30., type=float, 36 | metavar='LR', help='initial learning rate', dest='lr') 37 | parser.add_argument('--schedule', default=[50, 70,], nargs='*', type=int, 38 | help='learning rate schedule (when to drop lr by a ratio)') 39 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', 40 | help='momentum') 41 | parser.add_argument('--wd', '--weight-decay', default=0., type=float, 42 | metavar='W', help='weight decay (default: 0.)', 43 | dest='weight_decay') 44 | parser.add_argument('-p', '--print-freq', default=10, type=int, 45 | metavar='N', help='print frequency (default: 10)') 46 | parser.add_argument('--resume', default='', type=str, metavar='PATH', 47 | help='path to latest checkpoint (default: none)') 48 | parser.add_argument('--seed', default=None, type=int, 49 | help='seed for initializing training. ') 50 | parser.add_argument('--gpu', default=None, type=int, 51 | help='GPU id to use.') 52 | 53 | parser.add_argument('--pretrained', default='', type=str, 54 | help='path to moco pretrained checkpoint') 55 | parser.add_argument('--finetune-dataset', default='ntu60', type=str, 56 | help='which dataset to use for finetuning') 57 | 58 | parser.add_argument('--protocol', default='cross_view', type=str, 59 | help='traiining protocol of ntu') 60 | 61 | parser.add_argument('--finetune-skeleton-representation', default='graph-based', type=str, 62 | help='which skeleton-representation to use for downstream training') 63 | parser.add_argument('--pretrain-skeleton-representation', default='graph-based', type=str, 64 | help='which skeleton-representation where used for pre-training') 65 | 66 | best_acc1 = 0 67 | 68 | # initilize weight 69 | def weights_init_gru(model): 70 | with torch.no_grad(): 71 | for child in list(model.children()): 72 | print("init ",child) 73 | for param in list(child.parameters()): 74 | if param.dim() == 2: 75 | nn.init.xavier_uniform_(param) 76 | print('PC weight initial finished!') 77 | 78 | def load_pretrained(model, pretrained): 79 | if os.path.isfile(pretrained): 80 | print("=> loading checkpoint '{}'".format(pretrained)) 81 | checkpoint = torch.load(pretrained, map_location="cpu") 82 | 83 | # rename moco pre-trained keys 84 | state_dict = checkpoint['state_dict'] 85 | for k in list(state_dict.keys()): 86 | # retain only encoder_q up to before the embedding layer 87 | if not k.startswith('encoder_q'): 88 | del state_dict[k] 89 | elif '.fc' in k: 90 | del state_dict[k] 91 | else: 92 | pass 93 | 94 | msg = model.load_state_dict(state_dict, strict=False) 95 | print("message",msg) 96 | assert set(msg.missing_keys) == {"encoder_q.fc.weight", "encoder_q.fc.bias", 97 | "encoder_q_motion.fc.weight", "encoder_q_motion.fc.bias", 98 | "encoder_q_bone.fc.weight", "encoder_q_bone.fc.bias"} 99 | 100 | print("=> loaded pre-trained model '{}'".format(pretrained)) 101 | else: 102 | print("=> no checkpoint found at '{}'".format(pretrained)) 103 | 104 | 105 | def main(): 106 | args = parser.parse_args() 107 | 108 | if args.seed is not None: 109 | random.seed(args.seed) 110 | torch.manual_seed(args.seed) 111 | cudnn.deterministic = True 112 | warnings.warn('You have chosen to seed training. ' 113 | 'This will turn on the CUDNN deterministic setting, ' 114 | 'which can slow down your training considerably! ' 115 | 'You may see unexpected behavior when restarting ' 116 | 'from checkpoints.') 117 | 118 | if args.gpu is not None: 119 | warnings.warn('You have chosen a specific GPU. This will completely ' 120 | 'disable data parallelism.') 121 | ngpus_per_node = torch.cuda.device_count() 122 | # Simply call main_worker function 123 | main_worker(0, ngpus_per_node, args) 124 | 125 | 126 | def main_worker(gpu, ngpus_per_node, args): 127 | global best_acc1 128 | args.gpu = gpu 129 | 130 | if args.gpu is not None: 131 | print("Use GPU: {} for training".format(args.gpu)) 132 | 133 | # create model 134 | 135 | # training dataset 136 | from options import options_classification as options 137 | if args.finetune_dataset== 'ntu60' and args.protocol == 'cross_view': 138 | opts = options.opts_ntu_60_cross_view() 139 | elif args.finetune_dataset== 'ntu60' and args.protocol == 'cross_subject': 140 | opts = options.opts_ntu_60_cross_subject() 141 | elif args.finetune_dataset== 'ntu120' and args.protocol == 'cross_setup': 142 | opts = options.opts_ntu_120_cross_setup() 143 | elif args.finetune_dataset== 'ntu120' and args.protocol == 'cross_subject': 144 | opts = options.opts_ntu_120_cross_subject() 145 | elif args.finetune_dataset== 'pku_v2' and args.protocol == 'cross_view': 146 | opts = options.opts_pku_v2_cross_view() 147 | elif args.finetune_dataset== 'pku_v2' and args.protocol == 'cross_subject': 148 | opts = options.opts_pku_v2_cross_subject() 149 | 150 | opts.train_feeder_args['input_representation'] = args.finetune_skeleton_representation 151 | opts.test_feeder_args['input_representation'] = args.finetune_skeleton_representation 152 | 153 | # create model 154 | print("=> creating model") 155 | 156 | model = moco.builder_cmd.MoCo(args.finetune_skeleton_representation, opts.bi_gru_model_args, pretrain=False) 157 | print("options", opts.agcn_model_args, opts.train_feeder_args, opts.test_feeder_args) 158 | 159 | if args.pretrained: 160 | # freeze all layers but the last fc 161 | for name, param in model.encoder_q.named_parameters(): 162 | if name not in ['fc.weight', 'fc.bias']: 163 | param.requires_grad = False 164 | else: 165 | print('params',name) 166 | for name, param in model.encoder_q_motion.named_parameters(): 167 | if name not in ['fc.weight', 'fc.bias']: 168 | param.requires_grad = False 169 | else: 170 | print('params',name) 171 | for name, param in model.encoder_q_bone.named_parameters(): 172 | if name not in ['fc.weight', 'fc.bias']: 173 | param.requires_grad = False 174 | else: 175 | print('params',name) 176 | 177 | # init the fc layer 178 | model.encoder_q.fc.weight.data.normal_(mean=0.0, std=0.01) 179 | model.encoder_q.fc.bias.data.zero_() 180 | model.encoder_q_motion.fc.weight.data.normal_(mean=0.0, std=0.01) 181 | model.encoder_q_motion.fc.bias.data.zero_() 182 | model.encoder_q_bone.fc.weight.data.normal_(mean=0.0, std=0.01) 183 | model.encoder_q_bone.fc.bias.data.zero_() 184 | 185 | # load from pre-trained model 186 | load_pretrained(model, args.pretrained) 187 | 188 | if args.gpu is not None: 189 | model = model.cuda() 190 | model = nn.DataParallel(model, device_ids=None) 191 | 192 | # define loss function (criterion) and optimizer 193 | criterion = nn.CrossEntropyLoss().cuda(args.gpu) 194 | 195 | # optimize only the linear classifier 196 | parameters = list(filter(lambda p: p.requires_grad, model.parameters())) 197 | if args.pretrained: 198 | assert len(parameters) == 6 # (fc.weight, fc.bias) * 3 199 | optimizer = torch.optim.SGD(parameters, args.lr, 200 | momentum=args.momentum, 201 | weight_decay=args.weight_decay) 202 | if True: 203 | for parm in optimizer.param_groups: 204 | print ("optimize parameters lr",parm['lr']) 205 | 206 | # optionally resume from a checkpoint 207 | if args.resume: 208 | if os.path.isfile(args.resume): 209 | print("=> loading checkpoint '{}'".format(args.resume)) 210 | if args.gpu is None: 211 | checkpoint = torch.load(args.resume) 212 | else: 213 | # Map model to be loaded to specified single gpu. 214 | loc = 'cuda:{}'.format(args.gpu) 215 | checkpoint = torch.load(args.resume, map_location=loc) 216 | args.start_epoch = checkpoint['epoch'] 217 | best_acc1 = checkpoint['best_acc1'] 218 | if args.gpu is not None: 219 | # best_acc1 may be from a checkpoint from a different GPU 220 | best_acc1 = best_acc1.to(args.gpu) 221 | model.load_state_dict(checkpoint['state_dict']) 222 | optimizer.load_state_dict(checkpoint['optimizer']) 223 | print("=> loaded checkpoint '{}' (epoch {})" 224 | .format(args.resume, checkpoint['epoch'])) 225 | else: 226 | print("=> no checkpoint found at '{}'".format(args.resume)) 227 | 228 | cudnn.benchmark = True 229 | 230 | ## Data loading code 231 | 232 | train_dataset = get_finetune_training_set(opts) 233 | val_dataset = get_finetune_validation_set(opts) 234 | 235 | train_sampler = None 236 | 237 | train_loader = torch.utils.data.DataLoader( 238 | train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), 239 | num_workers=args.workers, pin_memory=True, sampler=train_sampler,drop_last=False) 240 | 241 | 242 | val_loader = torch.utils.data.DataLoader( 243 | val_dataset, 244 | batch_size=args.batch_size, shuffle=False, 245 | num_workers=args.workers, pin_memory=True,drop_last=False) 246 | 247 | 248 | for epoch in range(args.start_epoch, args.epochs): 249 | 250 | adjust_learning_rate(optimizer, epoch, args) 251 | 252 | # train for one epoch 253 | train(train_loader, model, criterion, optimizer, epoch, args) 254 | 255 | # evaluate on validation set 256 | if (epoch+1) % 5 == 0: 257 | acc1 = validate(val_loader, model, criterion, args) 258 | else: 259 | acc1 = 0 260 | 261 | 262 | # remember best acc@1 and save checkpoint 263 | is_best = acc1 > best_acc1 264 | if is_best: 265 | print("found new best accuracy:= ",acc1) 266 | best_acc1 = max(acc1, best_acc1) 267 | 268 | save_checkpoint({ 269 | 'epoch': epoch + 1, 270 | 'state_dict': model.state_dict(), 271 | 'best_acc1': best_acc1, 272 | 'optimizer' : optimizer.state_dict(), 273 | }, is_best,filename = args.finetune_skeleton_representation + '_checkpoint.pth.tar' ) 274 | 275 | # sanity check 276 | if epoch == args.start_epoch: 277 | sanity_check(model.state_dict(), args.pretrained) 278 | print("Final best accuracy",best_acc1) 279 | 280 | 281 | def train(train_loader, model, criterion, optimizer, epoch, args): 282 | batch_time = AverageMeter('Time', ':6.3f') 283 | data_time = AverageMeter('Data', ':6.3f') 284 | losses = AverageMeter('Loss', ':.4e') 285 | top1 = AverageMeter('Acc@1', ':6.2f') 286 | top5 = AverageMeter('Acc@5', ':6.2f') 287 | progress = ProgressMeter( 288 | len(train_loader), 289 | [batch_time, data_time, losses, top1, top5], 290 | prefix="Epoch: [{}]".format(epoch)) 291 | 292 | """ 293 | Switch to eval mode: 294 | Under the protocol of linear classification on frozen features/models, 295 | it is not legitimate to change any part of the pre-trained model. 296 | BatchNorm in train mode may revise running mean/std (even if it receives 297 | no gradient), which are part of the model parameters too. 298 | """ 299 | model.eval() 300 | 301 | end = time.time() 302 | for i, (images, target) in enumerate(train_loader): 303 | # measure data loading time 304 | data_time.update(time.time() - end) 305 | 306 | if args.gpu is not None: 307 | images = images.cuda(args.gpu, non_blocking=True) 308 | target = target.cuda(args.gpu, non_blocking=True) 309 | 310 | 311 | # compute output 312 | output = model(images) 313 | loss = criterion(output, target) 314 | 315 | # measure accuracy and record loss 316 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 317 | losses.update(loss.item(), images.size(0)) 318 | top1.update(acc1[0], images.size(0)) 319 | top5.update(acc5[0], images.size(0)) 320 | 321 | # compute gradient and do SGD step 322 | optimizer.zero_grad() 323 | loss.backward() 324 | optimizer.step() 325 | 326 | # measure elapsed time 327 | batch_time.update(time.time() - end) 328 | end = time.time() 329 | 330 | if i % args.print_freq == 0: 331 | progress.display(i) 332 | 333 | 334 | def validate(val_loader, model, criterion, args): 335 | batch_time = AverageMeter('Time', ':6.3f') 336 | losses = AverageMeter('Loss', ':.4e') 337 | top1 = AverageMeter('Acc@1', ':6.2f') 338 | top5 = AverageMeter('Acc@5', ':6.2f') 339 | progress = ProgressMeter( 340 | len(val_loader), 341 | [batch_time, losses, top1, top5], 342 | prefix='Test: ') 343 | 344 | # switch to evaluate mode 345 | model.eval() 346 | 347 | with torch.no_grad(): 348 | end = time.time() 349 | for i, (images, target) in enumerate(val_loader): 350 | if args.gpu is not None: 351 | images = images.cuda(args.gpu, non_blocking=True) 352 | target = target.cuda(args.gpu, non_blocking=True) 353 | 354 | # compute output 355 | output = model(images) 356 | loss = criterion(output, target) 357 | 358 | # measure accuracy and record loss 359 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 360 | losses.update(loss.item(), images.size(0)) 361 | top1.update(acc1[0], images.size(0)) 362 | top5.update(acc5[0], images.size(0)) 363 | 364 | # measure elapsed time 365 | batch_time.update(time.time() - end) 366 | end = time.time() 367 | 368 | if i % args.print_freq == 0: 369 | progress.display(i) 370 | 371 | # TODO: this should also be done with the ProgressMeter 372 | print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' 373 | .format(top1=top1, top5=top5)) 374 | 375 | return top1.avg 376 | 377 | 378 | def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): 379 | torch.save(state, filename) 380 | if is_best: 381 | shutil.copyfile(filename, filename+'model_best.pth.tar') 382 | 383 | 384 | def sanity_check(state_dict, pretrained_weights): 385 | """ 386 | Linear classifier should not change any weights other than the linear layer. 387 | This sanity check asserts nothing wrong happens (e.g., BN stats updated). 388 | """ 389 | print("=> loading '{}' for sanity check".format(pretrained_weights)) 390 | checkpoint = torch.load(pretrained_weights, map_location="cpu") 391 | state_dict_pre = checkpoint['state_dict'] 392 | 393 | for k in list(state_dict.keys()): 394 | # only ignore fc layer 395 | if 'fc.weight' in k or 'fc.bias' in k: 396 | continue 397 | 398 | # name in pretrained model 399 | k_pre = k[len('module.'):] if k.startswith('module.') else k 400 | 401 | assert ((state_dict[k].cpu() == state_dict_pre[k_pre]).all()), \ 402 | '{} is changed in linear classifier training.'.format(k) 403 | 404 | print("=> sanity check passed.") 405 | 406 | 407 | class AverageMeter(object): 408 | """Computes and stores the average and current value""" 409 | def __init__(self, name, fmt=':f'): 410 | self.name = name 411 | self.fmt = fmt 412 | self.reset() 413 | 414 | def reset(self): 415 | self.val = 0 416 | self.avg = 0 417 | self.sum = 0 418 | self.count = 0 419 | 420 | def update(self, val, n=1): 421 | self.val = val 422 | self.sum += val * n 423 | self.count += n 424 | self.avg = self.sum / self.count 425 | 426 | def __str__(self): 427 | fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' 428 | return fmtstr.format(**self.__dict__) 429 | 430 | 431 | class ProgressMeter(object): 432 | def __init__(self, num_batches, meters, prefix=""): 433 | self.batch_fmtstr = self._get_batch_fmtstr(num_batches) 434 | self.meters = meters 435 | self.prefix = prefix 436 | 437 | def display(self, batch): 438 | entries = [self.prefix + self.batch_fmtstr.format(batch)] 439 | entries += [str(meter) for meter in self.meters] 440 | print('\t'.join(entries)) 441 | 442 | def _get_batch_fmtstr(self, num_batches): 443 | num_digits = len(str(num_batches // 1)) 444 | fmt = '{:' + str(num_digits) + 'd}' 445 | return '[' + fmt + '/' + fmt.format(num_batches) + ']' 446 | 447 | 448 | def adjust_learning_rate(optimizer, epoch, args): 449 | """Decay the learning rate based on schedule""" 450 | lr = args.lr 451 | for milestone in args.schedule: 452 | lr *= 0.1 if epoch >= milestone else 1. 453 | for param_group in optimizer.param_groups: 454 | param_group['lr'] = lr 455 | 456 | 457 | def accuracy(output, target, topk=(1,)): 458 | """Computes the accuracy over the k top predictions for the specified values of k""" 459 | with torch.no_grad(): 460 | maxk = max(topk) 461 | batch_size = target.size(0) 462 | 463 | _, pred = output.topk(maxk, 1, True, True) 464 | pred = pred.t() 465 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 466 | 467 | res = [] 468 | for k in topk: 469 | correct_k = correct[:k].contiguous().view(-1).float().sum(0, keepdim=True) 470 | res.append(correct_k.mul_(100.0 / batch_size)) 471 | return res 472 | 473 | 474 | if __name__ == '__main__': 475 | main() 476 | -------------------------------------------------------------------------------- /action_classification_cmd_semi.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import random 4 | import shutil 5 | import time 6 | import warnings 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.parallel 11 | import torch.backends.cudnn as cudnn 12 | import torch.optim 13 | import torch.utils.data 14 | import torch.utils.data.distributed 15 | 16 | import moco.builder_cmd 17 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 18 | 19 | # change for action recogniton 20 | from dataset import get_finetune_training_set_semi_supervised, get_finetune_validation_set_semi_supervised 21 | 22 | 23 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 24 | parser.add_argument('-j', '--workers', default=32, type=int, metavar='N', 25 | help='number of data loading workers (default: 32)') 26 | parser.add_argument('--epochs', default=80, type=int, metavar='N', 27 | help='number of total epochs to run') 28 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', 29 | help='manual epoch number (useful on restarts)') 30 | parser.add_argument('-b', '--batch-size', default=256, type=int, 31 | metavar='N', 32 | help='mini-batch size (default: 256), this is the total ' 33 | 'batch size of all GPUs on the current node when ' 34 | 'using Data Parallel or Distributed Data Parallel') 35 | parser.add_argument('--lr', '--learning-rate', default=30., type=float, 36 | metavar='LR', help='initial learning rate', dest='lr') 37 | parser.add_argument('--schedule', default=[50, 70,], nargs='*', type=int, 38 | help='learning rate schedule (when to drop lr by a ratio)') 39 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', 40 | help='momentum') 41 | parser.add_argument('--wd', '--weight-decay', default=0., type=float, 42 | metavar='W', help='weight decay (default: 0.)', 43 | dest='weight_decay') 44 | parser.add_argument('-p', '--print-freq', default=10, type=int, 45 | metavar='N', help='print frequency (default: 10)') 46 | parser.add_argument('--resume', default='', type=str, metavar='PATH', 47 | help='path to latest checkpoint (default: none)') 48 | parser.add_argument('--seed', default=None, type=int, 49 | help='seed for initializing training. ') 50 | parser.add_argument('--gpu', default=None, type=int, 51 | help='GPU id to use.') 52 | 53 | parser.add_argument('--pretrained', default='', type=str, 54 | help='path to moco pretrained checkpoint') 55 | parser.add_argument('--finetune-dataset', default='ntu60', type=str, 56 | help='which dataset to use for finetuning') 57 | 58 | parser.add_argument('--protocol', default='cross_view', type=str, 59 | help='training protocol') 60 | 61 | parser.add_argument('--data-ratio', default=0.2, type=float, 62 | help='ratio of training data used in semi-supervised setting') 63 | 64 | parser.add_argument('--finetune-skeleton-representation', default='graph-based', type=str, 65 | help='which skeleton-representation to use for downstream training') 66 | parser.add_argument('--pretrain-skeleton-representation', default='graph-based', type=str, 67 | help='which skeleton-representation where used for pre-training') 68 | 69 | best_acc1 = 0 70 | 71 | # initilize weight 72 | def weights_init_gru(model): 73 | with torch.no_grad(): 74 | for child in list(model.children()): 75 | print("init ",child) 76 | for param in list(child.parameters()): 77 | if param.dim() == 2: 78 | nn.init.xavier_uniform_(param) 79 | print('PC weight initial finished!') 80 | 81 | def load_pretrained(model, pretrained): 82 | if os.path.isfile(pretrained): 83 | print("=> loading checkpoint '{}'".format(pretrained)) 84 | checkpoint = torch.load(pretrained, map_location="cpu") 85 | 86 | # rename moco pre-trained keys 87 | state_dict = checkpoint['state_dict'] 88 | for k in list(state_dict.keys()): 89 | # retain only encoder_q up to before the embedding layer 90 | if not k.startswith('encoder_q'): 91 | del state_dict[k] 92 | elif '.fc' in k: 93 | del state_dict[k] 94 | else: 95 | pass 96 | 97 | msg = model.load_state_dict(state_dict, strict=False) 98 | print("message",msg) 99 | assert set(msg.missing_keys) == {"encoder_q.fc.weight", "encoder_q.fc.bias", 100 | "encoder_q_motion.fc.weight", "encoder_q_motion.fc.bias", 101 | "encoder_q_bone.fc.weight", "encoder_q_bone.fc.bias"} 102 | 103 | print("=> loaded pre-trained model '{}'".format(pretrained)) 104 | else: 105 | print("=> no checkpoint found at '{}'".format(pretrained)) 106 | 107 | 108 | def main(): 109 | args = parser.parse_args() 110 | 111 | if args.seed is not None: 112 | random.seed(args.seed) 113 | torch.manual_seed(args.seed) 114 | cudnn.deterministic = True 115 | warnings.warn('You have chosen to seed training. ' 116 | 'This will turn on the CUDNN deterministic setting, ' 117 | 'which can slow down your training considerably! ' 118 | 'You may see unexpected behavior when restarting ' 119 | 'from checkpoints.') 120 | 121 | if args.gpu is not None: 122 | warnings.warn('You have chosen a specific GPU. This will completely ' 123 | 'disable data parallelism.') 124 | ngpus_per_node = torch.cuda.device_count() 125 | # Simply call main_worker function 126 | main_worker(0, ngpus_per_node, args) 127 | 128 | 129 | def main_worker(gpu, ngpus_per_node, args): 130 | global best_acc1 131 | args.gpu = gpu 132 | 133 | if args.gpu is not None: 134 | print("Use GPU: {} for training".format(args.gpu)) 135 | 136 | # create model 137 | 138 | # training dataset 139 | from options import options_classification as options 140 | if args.finetune_dataset== 'ntu60' and args.protocol == 'cross_view': 141 | opts = options.opts_ntu_60_cross_view() 142 | elif args.finetune_dataset== 'ntu60' and args.protocol == 'cross_subject': 143 | opts = options.opts_ntu_60_cross_subject() 144 | elif args.finetune_dataset== 'ntu120' and args.protocol == 'cross_setup': 145 | opts = options.opts_ntu_120_cross_setup() 146 | elif args.finetune_dataset== 'ntu120' and args.protocol == 'cross_subject': 147 | opts = options.opts_ntu_120_cross_subject() 148 | elif args.finetune_dataset== 'pku_v2' and args.protocol == 'cross_view': 149 | opts = options.opts_pku_v2_cross_view() 150 | elif args.finetune_dataset== 'pku_v2' and args.protocol == 'cross_subject': 151 | opts = options.opts_pku_v2_cross_subject() 152 | elif args.finetune_dataset== 'ntu60' and args.protocol == 'cross_view_semi': 153 | opts = options.opts_ntu_60_cross_view() 154 | elif args.finetune_dataset== 'ntu60' and args.protocol == 'cross_subject_semi': 155 | opts = options.opts_ntu_60_cross_subject() 156 | elif args.finetune_dataset== 'pku_v2' and args.protocol == 'cross_subject_semi': 157 | opts = options.opts_pku_v2_cross_subject() 158 | 159 | opts.train_feeder_args['input_representation'] = args.finetune_skeleton_representation 160 | opts.test_feeder_args['input_representation'] = args.finetune_skeleton_representation 161 | 162 | if 'semi' in args.protocol: 163 | opts.train_feeder_args['data_ratio'] = args.data_ratio 164 | # create model 165 | print("=> creating model") 166 | 167 | model = moco.builder_cmd.MoCo(args.finetune_skeleton_representation, opts.bi_gru_model_args, pretrain=False) 168 | print("options", opts.agcn_model_args, opts.train_feeder_args, opts.test_feeder_args) 169 | 170 | if args.pretrained: 171 | # init the fc layer 172 | model.encoder_q.fc.weight.data.normal_(mean=0.0, std=0.01) 173 | model.encoder_q.fc.bias.data.zero_() 174 | model.encoder_q_motion.fc.weight.data.normal_(mean=0.0, std=0.01) 175 | model.encoder_q_motion.fc.bias.data.zero_() 176 | model.encoder_q_bone.fc.weight.data.normal_(mean=0.0, std=0.01) 177 | model.encoder_q_bone.fc.bias.data.zero_() 178 | 179 | # load from pre-trained model 180 | load_pretrained(model, args.pretrained) 181 | 182 | if args.gpu is not None: 183 | model = model.cuda() 184 | 185 | # define loss function (criterion) and optimizer 186 | criterion = nn.CrossEntropyLoss().cuda(args.gpu) 187 | 188 | optimizer = torch.optim.SGD(model.parameters(), 189 | lr=args.lr, 190 | momentum=args.momentum, 191 | weight_decay=args.weight_decay) 192 | 193 | if True: 194 | for parm in optimizer.param_groups: 195 | print ("optimize parameters lr",parm['lr']) 196 | 197 | # optionally resume from a checkpoint 198 | if args.resume: 199 | if os.path.isfile(args.resume): 200 | print("=> loading checkpoint '{}'".format(args.resume)) 201 | if args.gpu is None: 202 | checkpoint = torch.load(args.resume) 203 | else: 204 | # Map model to be loaded to specified single gpu. 205 | loc = 'cuda:{}'.format(args.gpu) 206 | checkpoint = torch.load(args.resume, map_location=loc) 207 | args.start_epoch = checkpoint['epoch'] 208 | best_acc1 = checkpoint['best_acc1'] 209 | if args.gpu is not None: 210 | # best_acc1 may be from a checkpoint from a different GPU 211 | best_acc1 = best_acc1.to(args.gpu) 212 | model.load_state_dict(checkpoint['state_dict']) 213 | optimizer.load_state_dict(checkpoint['optimizer']) 214 | print("=> loaded checkpoint '{}' (epoch {})" 215 | .format(args.resume, checkpoint['epoch'])) 216 | else: 217 | print("=> no checkpoint found at '{}'".format(args.resume)) 218 | 219 | cudnn.benchmark = True 220 | 221 | ## Data loading code 222 | 223 | train_dataset = get_finetune_training_set_semi_supervised(opts) 224 | val_dataset = get_finetune_validation_set_semi_supervised(opts) 225 | 226 | train_sampler = None 227 | 228 | train_loader = torch.utils.data.DataLoader( 229 | train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), 230 | num_workers=args.workers, pin_memory=True, sampler=train_sampler,drop_last=False) 231 | 232 | 233 | val_loader = torch.utils.data.DataLoader( 234 | val_dataset, 235 | batch_size=args.batch_size, shuffle=False, 236 | num_workers=args.workers, pin_memory=True,drop_last=False) 237 | 238 | 239 | for epoch in range(args.start_epoch, args.epochs): 240 | 241 | adjust_learning_rate(optimizer, epoch, args) 242 | 243 | # train for one epoch 244 | train(train_loader, model, criterion, optimizer, epoch, args) 245 | 246 | # evaluate on validation set 247 | if (epoch+1) % 5 == 0: 248 | acc1 = validate(val_loader, model, criterion, args) 249 | else: 250 | acc1 = 0 251 | 252 | 253 | # remember best acc@1 and save checkpoint 254 | is_best = acc1 > best_acc1 255 | if is_best: 256 | print("found new best accuracy:= ",acc1) 257 | best_acc1 = max(acc1, best_acc1) 258 | 259 | save_checkpoint({ 260 | 'epoch': epoch + 1, 261 | 'state_dict': model.state_dict(), 262 | 'best_acc1': best_acc1, 263 | 'optimizer' : optimizer.state_dict(), 264 | }, is_best,filename = args.finetune_skeleton_representation + '_checkpoint.pth.tar' ) 265 | 266 | print("Final best accuracy",best_acc1) 267 | 268 | 269 | def train(train_loader, model, criterion, optimizer, epoch, args): 270 | batch_time = AverageMeter('Time', ':6.3f') 271 | data_time = AverageMeter('Data', ':6.3f') 272 | losses = AverageMeter('Loss', ':.4e') 273 | top1 = AverageMeter('Acc@1', ':6.2f') 274 | top5 = AverageMeter('Acc@5', ':6.2f') 275 | progress = ProgressMeter( 276 | len(train_loader), 277 | [batch_time, data_time, losses, top1, top5], 278 | prefix="Epoch: [{}]".format(epoch)) 279 | 280 | """ 281 | Switch to eval mode: 282 | Under the protocol of linear classification on frozen features/models, 283 | it is not legitimate to change any part of the pre-trained model. 284 | BatchNorm in train mode may revise running mean/std (even if it receives 285 | no gradient), which are part of the model parameters too. 286 | """ 287 | model.train() 288 | 289 | end = time.time() 290 | for i, (images, target) in enumerate(train_loader): 291 | # measure data loading time 292 | data_time.update(time.time() - end) 293 | 294 | if args.gpu is not None: 295 | images = images.cuda(args.gpu, non_blocking=True) 296 | target = target.cuda(args.gpu, non_blocking=True).long() 297 | 298 | 299 | # compute output 300 | output = model(images, view='joint') 301 | loss = criterion(output, target) 302 | 303 | # measure accuracy and record loss 304 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 305 | losses.update(loss.item(), images.size(0)) 306 | top1.update(acc1[0], images.size(0)) 307 | top5.update(acc5[0], images.size(0)) 308 | 309 | # compute gradient and do SGD step 310 | optimizer.zero_grad() 311 | loss.backward() 312 | optimizer.step() 313 | 314 | # measure elapsed time 315 | batch_time.update(time.time() - end) 316 | end = time.time() 317 | 318 | if i % args.print_freq == 0: 319 | progress.display(i) 320 | 321 | 322 | def validate(val_loader, model, criterion, args): 323 | batch_time = AverageMeter('Time', ':6.3f') 324 | losses = AverageMeter('Loss', ':.4e') 325 | top1 = AverageMeter('Acc@1', ':6.2f') 326 | top5 = AverageMeter('Acc@5', ':6.2f') 327 | progress = ProgressMeter( 328 | len(val_loader), 329 | [batch_time, losses, top1, top5], 330 | prefix='Test: ') 331 | 332 | # switch to evaluate mode 333 | model.eval() 334 | 335 | with torch.no_grad(): 336 | end = time.time() 337 | for i, (images, target) in enumerate(val_loader): 338 | if args.gpu is not None: 339 | images = images.cuda(args.gpu, non_blocking=True) 340 | target = target.cuda(args.gpu, non_blocking=True).long() 341 | 342 | # compute output 343 | output = model(images, view='joint') 344 | loss = criterion(output, target) 345 | 346 | # measure accuracy and record loss 347 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 348 | losses.update(loss.item(), images.size(0)) 349 | top1.update(acc1[0], images.size(0)) 350 | top5.update(acc5[0], images.size(0)) 351 | 352 | # measure elapsed time 353 | batch_time.update(time.time() - end) 354 | end = time.time() 355 | 356 | if i % args.print_freq == 0: 357 | progress.display(i) 358 | 359 | # TODO: this should also be done with the ProgressMeter 360 | print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' 361 | .format(top1=top1, top5=top5)) 362 | 363 | return top1.avg 364 | 365 | 366 | def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): 367 | torch.save(state, filename) 368 | if is_best: 369 | shutil.copyfile(filename, filename+'model_best.pth.tar') 370 | 371 | 372 | def sanity_check(state_dict, pretrained_weights): 373 | """ 374 | Linear classifier should not change any weights other than the linear layer. 375 | This sanity check asserts nothing wrong happens (e.g., BN stats updated). 376 | """ 377 | print("=> loading '{}' for sanity check".format(pretrained_weights)) 378 | checkpoint = torch.load(pretrained_weights, map_location="cpu") 379 | state_dict_pre = checkpoint['state_dict'] 380 | 381 | for k in list(state_dict.keys()): 382 | # only ignore fc layer 383 | if 'fc.weight' in k or 'fc.bias' in k: 384 | continue 385 | 386 | # name in pretrained model 387 | k_pre = k[len('module.'):] if k.startswith('module.') else k 388 | 389 | assert ((state_dict[k].cpu() == state_dict_pre[k_pre]).all()), \ 390 | '{} is changed in linear classifier training.'.format(k) 391 | 392 | print("=> sanity check passed.") 393 | 394 | 395 | class AverageMeter(object): 396 | """Computes and stores the average and current value""" 397 | def __init__(self, name, fmt=':f'): 398 | self.name = name 399 | self.fmt = fmt 400 | self.reset() 401 | 402 | def reset(self): 403 | self.val = 0 404 | self.avg = 0 405 | self.sum = 0 406 | self.count = 0 407 | 408 | def update(self, val, n=1): 409 | self.val = val 410 | self.sum += val * n 411 | self.count += n 412 | self.avg = self.sum / self.count 413 | 414 | def __str__(self): 415 | fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' 416 | return fmtstr.format(**self.__dict__) 417 | 418 | 419 | class ProgressMeter(object): 420 | def __init__(self, num_batches, meters, prefix=""): 421 | self.batch_fmtstr = self._get_batch_fmtstr(num_batches) 422 | self.meters = meters 423 | self.prefix = prefix 424 | 425 | def display(self, batch): 426 | entries = [self.prefix + self.batch_fmtstr.format(batch)] 427 | entries += [str(meter) for meter in self.meters] 428 | print('\t'.join(entries)) 429 | 430 | def _get_batch_fmtstr(self, num_batches): 431 | num_digits = len(str(num_batches // 1)) 432 | fmt = '{:' + str(num_digits) + 'd}' 433 | return '[' + fmt + '/' + fmt.format(num_batches) + ']' 434 | 435 | 436 | def adjust_learning_rate(optimizer, epoch, args): 437 | """Decay the learning rate based on schedule""" 438 | lr = args.lr 439 | for milestone in args.schedule: 440 | lr *= 0.1 if epoch >= milestone else 1. 441 | for index, param_group in enumerate(optimizer.param_groups): 442 | param_group['lr'] = lr 443 | 444 | 445 | 446 | 447 | def accuracy(output, target, topk=(1,)): 448 | """Computes the accuracy over the k top predictions for the specified values of k""" 449 | with torch.no_grad(): 450 | maxk = max(topk) 451 | batch_size = target.size(0) 452 | 453 | _, pred = output.topk(maxk, 1, True, True) 454 | pred = pred.t() 455 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 456 | 457 | res = [] 458 | for k in topk: 459 | correct_k = correct[:k].contiguous().view(-1).float().sum(0, keepdim=True) 460 | res.append(correct_k.mul_(100.0 / batch_size)) 461 | return res 462 | 463 | 464 | if __name__ == '__main__': 465 | main() 466 | --------------------------------------------------------------------------------