├── moco
    ├── utils
    │   ├── __init__.py
    │   ├── tgcn.py
    │   └── graph.py
    ├── GRU.py
    └── builder_cmd.py
├── feeder
    ├── __init__.py
    ├── feeder_downstream.py
    ├── feeder_downstream_semi_supervised.py
    ├── feeder_pretraining_intra.py
    ├── augmentations.py
    └── feeder_pretraining_inter.py
├── .gitignore
├── images
    └── cmd.jpg
├── graph
    ├── __init__.py
    ├── tools.py
    ├── ntu_rgb_d.py
    └── kinetics.py
├── script_action_classification_cmd_transfer.sh
├── dataset.py
├── script_action_classification_cmd.sh
├── script_action_retrieval_cmd.sh
├── script_pretrain_moco_cmd.sh
├── data_gen
    ├── rotation.py
    ├── preprocess.py
    ├── pku_gendata.py
    ├── ntu_gendata.py
    └── resource
    │   ├── NTU_RGBD60_samples_with_missing_skeletons.txt
    │   └── NTU_RGBD120_samples_with_missing_skeletons.txt
├── README.md
├── script_action_classification_cmd_semi.sh
├── options
    ├── options_pretraining.py
    ├── options_retrieval.py
    └── options_classification.py
├── pretrain_moco_cmd.py
├── action_retrieval_cmd.py
├── action_classification_cmd.py
└── action_classification_cmd_semi.py


/moco/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/feeder/__init__.py:
--------------------------------------------------------------------------------
1 | from . import augmentations
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.pyc
3 | 
4 | data/
5 | checkpoints/


--------------------------------------------------------------------------------
/images/cmd.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/maoyunyao/CMD/HEAD/images/cmd.jpg


--------------------------------------------------------------------------------
/graph/__init__.py:
--------------------------------------------------------------------------------
1 | from . import tools
2 | from . import ntu_rgb_d
3 | from . import kinetics
4 | 


--------------------------------------------------------------------------------
/graph/tools.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def edge2mat(link, num_node):
 5 |     A = np.zeros((num_node, num_node))
 6 |     for i, j in link:
 7 |         A[j, i] = 1
 8 |     return A
 9 | 
10 | 
11 | def normalize_digraph(A):
12 |     Dl = np.sum(A, 0)
13 |     h, w = A.shape
14 |     Dn = np.zeros((w, w))
15 |     for i in range(w):
16 |         if Dl[i] > 0:
17 |             Dn[i, i] = Dl[i] ** (-1)
18 |     AD = np.dot(A, Dn)
19 |     return AD
20 | 
21 | 
22 | def get_spatial_graph(num_node, self_link, inward, outward):
23 |     I = edge2mat(self_link, num_node)
24 |     In = normalize_digraph(edge2mat(inward, num_node))
25 |     Out = normalize_digraph(edge2mat(outward, num_node))
26 |     A = np.stack((I, In, Out))
27 |     return A
28 | 


--------------------------------------------------------------------------------
/script_action_classification_cmd_transfer.sh:
--------------------------------------------------------------------------------
 1 | # Semi with data ratio = 1.0 on PKU-MMD II
 2 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \
 3 |   --lr 0.01 \
 4 |   --batch-size 64 \
 5 |   --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_subject/checkpoint_0450.pth.tar \
 6 |   --finetune-dataset pku_v2 \
 7 |   --protocol cross_subject_semi \
 8 |   --data-ratio 1.0 \
 9 |   --finetune-skeleton-representation graph-based
10 | 
11 | 
12 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \
13 |   --lr 0.01 \
14 |   --batch-size 64 \
15 |   --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu120_cross_subject/checkpoint_0450.pth.tar \
16 |   --finetune-dataset pku_v2 \
17 |   --protocol cross_subject_semi \
18 |   --data-ratio 1.0 \
19 |   --finetune-skeleton-representation graph-based


--------------------------------------------------------------------------------
/moco/utils/tgcn.py:
--------------------------------------------------------------------------------
 1 | # The based unit of graph convolutional networks.
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | class ConvTemporalGraphical(nn.Module):
 7 |     def __init__(self,
 8 |                  in_channels,
 9 |                  out_channels,
10 |                  kernel_size,
11 |                  t_kernel_size=1,
12 |                  t_stride=1,
13 |                  t_padding=0,
14 |                  t_dilation=1,
15 |                  bias=True):
16 |         super().__init__()
17 | 
18 |         self.kernel_size = kernel_size
19 |         self.conv = nn.Conv2d(
20 |             in_channels,
21 |             out_channels * kernel_size,
22 |             kernel_size=(t_kernel_size, 1),
23 |             padding=(t_padding, 0),
24 |             stride=(t_stride, 1),
25 |             dilation=(t_dilation, 1),
26 |             bias=bias)
27 | 
28 |     def forward(self, x, A):
29 |         assert A.size(0) == self.kernel_size
30 | 
31 |         x = self.conv(x)
32 | 
33 |         n, kc, t, v = x.size()
34 |         x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v)
35 |         x = torch.einsum('nkctv,kvw->nctw', (x, A))
36 | 
37 |         return x.contiguous(), A
38 | 


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def get_pretraining_set_intra(opts):
 3 | 
 4 |     from feeder.feeder_pretraining_intra import Feeder
 5 |     training_data = Feeder(**opts.train_feeder_args)
 6 | 
 7 |     return training_data
 8 | 
 9 | def get_pretraining_set_inter(opts):
10 | 
11 |     from feeder.feeder_pretraining_inter import Feeder
12 |     training_data = Feeder(**opts.train_feeder_args)
13 | 
14 |     return training_data
15 | 
16 | def get_finetune_training_set(opts):
17 | 
18 |     from feeder.feeder_downstream import Feeder
19 | 
20 |     data = Feeder(**opts.train_feeder_args)
21 | 
22 |     return data
23 | 
24 | def get_finetune_validation_set(opts):
25 | 
26 |     from feeder.feeder_downstream import Feeder
27 |     data = Feeder(**opts.test_feeder_args)
28 | 
29 |     return data
30 | 
31 | def get_finetune_training_set_semi_supervised(opts):
32 | 
33 |     from feeder.feeder_downstream_semi_supervised import Feeder
34 | 
35 |     data = Feeder(**opts.train_feeder_args)
36 | 
37 |     return data
38 | 
39 | def get_finetune_validation_set_semi_supervised(opts):
40 | 
41 |     from feeder.feeder_downstream_semi_supervised import Feeder
42 |     data = Feeder(**opts.test_feeder_args)
43 | 
44 |     return data
45 | 


--------------------------------------------------------------------------------
/script_action_classification_cmd.sh:
--------------------------------------------------------------------------------
 1 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd.py \
 2 |   --lr 0.1 \
 3 |   --batch-size 64 \
 4 |   --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_subject/checkpoint_0450.pth.tar \
 5 |   --finetune-dataset ntu60 --protocol cross_subject --finetune-skeleton-representation graph-based
 6 | 
 7 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd.py \
 8 |   --lr 0.1 \
 9 |   --batch-size 64 \
10 |   --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_view/checkpoint_0450.pth.tar \
11 |   --finetune-dataset ntu60 --protocol cross_view --finetune-skeleton-representation graph-based
12 | 
13 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd.py \
14 |   --lr 0.1 \
15 |   --batch-size 64 \
16 |   --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu120_cross_subject/checkpoint_0450.pth.tar \
17 |   --finetune-dataset ntu120 --protocol cross_subject --finetune-skeleton-representation graph-based
18 | 
19 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd.py \
20 |   --lr 0.1 \
21 |   --batch-size 64 \
22 |   --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu120_cross_setup/checkpoint_0450.pth.tar \
23 |   --finetune-dataset ntu120 --protocol cross_setup --finetune-skeleton-representation graph-based
24 | 


--------------------------------------------------------------------------------
/script_action_retrieval_cmd.sh:
--------------------------------------------------------------------------------
 1 | CUDA_VISIBLE_DEVICES=0 python action_retrieval_cmd.py \
 2 |   --lr 0.1 \
 3 |   --batch-size 64 \
 4 |   --knn-neighbours 1 \
 5 |   --pretrained  /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_view/checkpoint_0450.pth.tar \
 6 |   --finetune-dataset ntu60 --protocol cross_view --finetune-skeleton-representation graph-based  
 7 | 
 8 | 
 9 | CUDA_VISIBLE_DEVICES=0 python action_retrieval_cmd.py \
10 |   --lr 0.1 \
11 |   --batch-size 64 \
12 |   --knn-neighbours 1 \
13 |   --pretrained  /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_subject/checkpoint_0450.pth.tar \
14 |   --finetune-dataset ntu60 --protocol cross_subject --finetune-skeleton-representation graph-based
15 | 
16 | 
17 | CUDA_VISIBLE_DEVICES=0 python action_retrieval_cmd.py \
18 |   --lr 0.1 \
19 |   --batch-size 64 \
20 |   --knn-neighbours 1 \
21 |   --pretrained  /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu120_cross_subject/checkpoint_0450.pth.tar \
22 |   --finetune-dataset ntu120 --protocol cross_subject --finetune-skeleton-representation graph-based
23 | 
24 | 
25 | CUDA_VISIBLE_DEVICES=0 python action_retrieval_cmd.py \
26 |   --lr 0.1 \
27 |   --batch-size 64 \
28 |   --knn-neighbours 1 \
29 |   --pretrained  /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu120_cross_setup/checkpoint_0450.pth.tar \
30 |   --finetune-dataset ntu120 --protocol cross_setup --finetune-skeleton-representation graph-based
31 | 


--------------------------------------------------------------------------------
/graph/ntu_rgb_d.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.extend(['../'])
 4 | from graph import tools
 5 | 
 6 | num_node = 25
 7 | self_link = [(i, i) for i in range(num_node)]
 8 | inward_ori_index = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5), (7, 6),
 9 |                     (8, 7), (9, 21), (10, 9), (11, 10), (12, 11), (13, 1),
10 |                     (14, 13), (15, 14), (16, 15), (17, 1), (18, 17), (19, 18),
11 |                     (20, 19), (22, 23), (23, 8), (24, 25), (25, 12)]
12 | inward = [(i - 1, j - 1) for (i, j) in inward_ori_index]
13 | outward = [(j, i) for (i, j) in inward]
14 | neighbor = inward + outward
15 | 
16 | 
17 | class Graph:
18 |     def __init__(self, labeling_mode='spatial'):
19 |         self.A = self.get_adjacency_matrix(labeling_mode)
20 |         self.num_node = num_node
21 |         self.self_link = self_link
22 |         self.inward = inward
23 |         self.outward = outward
24 |         self.neighbor = neighbor
25 | 
26 |     def get_adjacency_matrix(self, labeling_mode=None):
27 |         if labeling_mode is None:
28 |             return self.A
29 |         if labeling_mode == 'spatial':
30 |             A = tools.get_spatial_graph(num_node, self_link, inward, outward)
31 |         else:
32 |             raise ValueError()
33 |         return A
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     import matplotlib.pyplot as plt
38 |     import os
39 | 
40 |     # os.environ['DISPLAY'] = 'localhost:11.0'
41 |     A = Graph('spatial').get_adjacency_matrix()
42 |     for i in A:
43 |         plt.imshow(i, cmap='gray')
44 |         plt.show()
45 |     print(A)
46 | 


--------------------------------------------------------------------------------
/graph/kinetics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sys
 3 | 
 4 | sys.path.extend(['../'])
 5 | from graph import tools
 6 | import networkx as nx
 7 | 
 8 | # Joint index:
 9 | # {0,  "Nose"}
10 | # {1,  "Neck"},
11 | # {2,  "RShoulder"},
12 | # {3,  "RElbow"},
13 | # {4,  "RWrist"},
14 | # {5,  "LShoulder"},
15 | # {6,  "LElbow"},
16 | # {7,  "LWrist"},
17 | # {8,  "RHip"},
18 | # {9,  "RKnee"},
19 | # {10, "RAnkle"},
20 | # {11, "LHip"},
21 | # {12, "LKnee"},
22 | # {13, "LAnkle"},
23 | # {14, "REye"},
24 | # {15, "LEye"},
25 | # {16, "REar"},
26 | # {17, "LEar"},
27 | 
28 | # Edge format: (origin, neighbor)
29 | num_node = 18
30 | self_link = [(i, i) for i in range(num_node)]
31 | inward = [(4, 3), (3, 2), (7, 6), (6, 5), (13, 12), (12, 11), (10, 9), (9, 8),
32 |           (11, 5), (8, 2), (5, 1), (2, 1), (0, 1), (15, 0), (14, 0), (17, 15),
33 |           (16, 14)]
34 | outward = [(j, i) for (i, j) in inward]
35 | neighbor = inward + outward
36 | 
37 | 
38 | class Graph:
39 |     def __init__(self, labeling_mode='spatial'):
40 |         self.A = self.get_adjacency_matrix(labeling_mode)
41 |         self.num_node = num_node
42 |         self.self_link = self_link
43 |         self.inward = inward
44 |         self.outward = outward
45 |         self.neighbor = neighbor
46 | 
47 |     def get_adjacency_matrix(self, labeling_mode=None):
48 |         if labeling_mode is None:
49 |             return self.A
50 |         if labeling_mode == 'spatial':
51 |             A = tools.get_spatial_graph(num_node, self_link, inward, outward)
52 |         else:
53 |             raise ValueError()
54 |         return A
55 | 
56 | 
57 | if __name__ == '__main__':
58 |     A = Graph('spatial').get_adjacency_matrix()
59 |     print('')
60 | 


--------------------------------------------------------------------------------
/script_pretrain_moco_cmd.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | CUDA_VISIBLE_DEVICES=0 python pretrain_moco_cmd.py \
 4 | --lr 0.01 \
 5 | --batch-size 64 \
 6 | --teacher-t 0.05 \
 7 | --student-t 0.1 \
 8 | --topk 8192 \
 9 | --mlp \
10 | --contrast-t 0.07 \
11 | --contrast-k 16384 \
12 | --checkpoint-path ./checkpoints/pretrain_moco_cmd/ntu60_cross_subject \
13 | --schedule 351 \
14 | --epochs 451 \
15 | --pre-dataset ntu60 \
16 | --skeleton-representation graph-based \
17 | --protocol cross_subject
18 | 
19 | 
20 | CUDA_VISIBLE_DEVICES=0 python pretrain_moco_cmd.py \
21 | --lr 0.01 \
22 | --batch-size 64 \
23 | --teacher-t 0.05 \
24 | --student-t 0.1 \
25 | --topk 8192 \
26 | --mlp \
27 | --contrast-t 0.07 \
28 | --contrast-k 16384 \
29 | --checkpoint-path ./checkpoints/pretrain_moco_cmd/ntu60_cross_view \
30 | --schedule 351 \
31 | --epochs 451 \
32 | --pre-dataset ntu60 \
33 | --skeleton-representation graph-based \
34 | --protocol cross_view
35 | 
36 | 
37 | CUDA_VISIBLE_DEVICES=0 python pretrain_moco_cmd.py \
38 | --lr 0.01 \
39 | --batch-size 64 \
40 | --teacher-t 0.05 \
41 | --student-t 0.1 \
42 | --topk 8192 \
43 | --mlp \
44 | --contrast-t 0.07 \
45 | --contrast-k 16384 \
46 | --checkpoint-path ./checkpoints/pretrain_moco_cmd/ntu120_cross_subject \
47 | --schedule 351 \
48 | --epochs 451 \
49 | --pre-dataset ntu120 \
50 | --skeleton-representation graph-based \
51 | --protocol cross_subject
52 | 
53 | 
54 | CUDA_VISIBLE_DEVICES=0 python pretrain_moco_cmd.py \
55 | --lr 0.01 \
56 | --batch-size 64 \
57 | --teacher-t 0.05 \
58 | --student-t 0.1 \
59 | --topk 8192 \
60 | --mlp \
61 | --contrast-t 0.07 \
62 | --contrast-k 16384 \
63 | --checkpoint-path ./checkpoints/pretrain_moco_cmd/ntu60_cross_setup \
64 | --schedule 351 \
65 | --epochs 451 \
66 | --pre-dataset ntu120 \
67 | --skeleton-representation graph-based \
68 | --protocol cross_setup
69 | 
70 | 
71 | CUDA_VISIBLE_DEVICES=0 python pretrain_moco_cmd.py \
72 | --lr 0.01 \
73 | --batch-size 64 \
74 | --teacher-t 0.05 \
75 | --student-t 0.1 \
76 | --topk 8192 \
77 | --mlp \
78 | --contrast-t 0.07 \
79 | --contrast-k 16384 \
80 | --checkpoint-path ./checkpoints/pretrain_moco_cmd/pku_v2_cross_subject \
81 | --schedule 801 \
82 | --epochs 1001 \
83 | --pre-dataset pku_v2 \
84 | --skeleton-representation graph-based \
85 | --protocol cross_subject


--------------------------------------------------------------------------------
/data_gen/rotation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | 
 4 | 
 5 | def rotation_matrix(axis, theta):
 6 |     """
 7 |     Return the rotation matrix associated with counterclockwise rotation about
 8 |     the given axis by theta radians.
 9 |     """
10 |     if np.abs(axis).sum() < 1e-6 or np.abs(theta) < 1e-6:
11 |         return np.eye(3)
12 |     axis = np.asarray(axis)
13 |     axis = axis / math.sqrt(np.dot(axis, axis))
14 |     a = math.cos(theta / 2.0)
15 |     b, c, d = -axis * math.sin(theta / 2.0)
16 |     aa, bb, cc, dd = a * a, b * b, c * c, d * d
17 |     bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
18 |     return np.array([[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
19 |                      [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
20 |                      [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]])
21 | 
22 | 
23 | def unit_vector(vector):
24 |     """ Returns the unit vector of the vector.  """
25 |     return vector / np.linalg.norm(vector)
26 | 
27 | 
28 | def angle_between(v1, v2):
29 |     """ Returns the angle in radians between vectors 'v1' and 'v2'::
30 | 
31 |             >>> angle_between((1, 0, 0), (0, 1, 0))
32 |             1.5707963267948966
33 |             >>> angle_between((1, 0, 0), (1, 0, 0))
34 |             0.0
35 |             >>> angle_between((1, 0, 0), (-1, 0, 0))
36 |             3.141592653589793
37 |     """
38 |     if np.abs(v1).sum() < 1e-6 or np.abs(v2).sum() < 1e-6:
39 |         return 0
40 |     v1_u = unit_vector(v1)
41 |     v2_u = unit_vector(v2)
42 |     return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))
43 | 
44 | 
45 | def x_rotation(vector, theta):
46 |     """Rotates 3-D vector around x-axis"""
47 |     R = np.array([[1, 0, 0], [0, np.cos(theta), -np.sin(theta)], [0, np.sin(theta), np.cos(theta)]])
48 |     return np.dot(R, vector)
49 | 
50 | 
51 | def y_rotation(vector, theta):
52 |     """Rotates 3-D vector around y-axis"""
53 |     R = np.array([[np.cos(theta), 0, np.sin(theta)], [0, 1, 0], [-np.sin(theta), 0, np.cos(theta)]])
54 |     return np.dot(R, vector)
55 | 
56 | 
57 | def z_rotation(vector, theta):
58 |     """Rotates 3-D vector around z-axis"""
59 |     R = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]])
60 |     return np.dot(R, vector)
61 | 


--------------------------------------------------------------------------------
/moco/GRU.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import Dataset, DataLoader,SubsetRandomSampler
 2 | from torch.nn.utils import clip_grad_norm_
 3 | from torch.nn.utils.rnn import pad_packed_sequence, pad_sequence, pack_padded_sequence
 4 | from io import open
 5 | import unicodedata
 6 | import string
 7 | import re
 8 | import random
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | from torch import optim
13 | import torch.nn.functional as F
14 | 
15 | import numpy as np
16 | import math
17 | from torch.utils.data import random_split
18 | import torchvision
19 | 
20 | 
21 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22 | 
23 | class EncoderRNN(nn.Module):
24 |     def __init__(self, input_size, hidden_size, num_layers):
25 |         super(EncoderRNN, self).__init__()
26 |         self.hidden_size = hidden_size
27 |         self.gru = nn.GRU(input_size, hidden_size, num_layers=num_layers, bidirectional=True, batch_first=True)
28 |         self.num_layers = num_layers
29 | 
30 |     def forward(self, input_tensor, seq_len):
31 |         
32 |         self.gru.flatten_parameters()
33 | 
34 |         encoder_hidden = torch.Tensor().to(device)
35 |         
36 |         for it in range(max(seq_len)):
37 |           if it == 0:
38 |             enout_tmp, hidden_tmp = self.gru(input_tensor[:, it:it+1, :])
39 |           else:
40 |             enout_tmp, hidden_tmp = self.gru(input_tensor[:, it:it+1, :], hidden_tmp)
41 |           encoder_hidden = torch.cat((encoder_hidden, enout_tmp),1)
42 | 
43 |         hidden = torch.empty((1, len(seq_len), encoder_hidden.shape[-1])).to(device)
44 |         count = 0
45 |         for ith_len in seq_len:
46 |             hidden[0, count, :] = encoder_hidden[count, ith_len - 1, :]
47 |             count += 1
48 |         
49 |         return hidden
50 | 
51 | 
52 | class BIGRU(nn.Module):
53 |     def __init__(self, en_input_size, en_hidden_size, en_num_layers=3, num_class=60):
54 |         super(BIGRU, self).__init__()
55 |         self.en_num_layers = en_num_layers
56 |         self.encoder = EncoderRNN(en_input_size, en_hidden_size, en_num_layers).to(device)
57 |         self.fc = nn.Linear(2*en_hidden_size,num_class)
58 | 
59 |         self.input_norm = nn.BatchNorm1d(en_input_size)  #
60 | 
61 |         self.en_input_size = en_input_size
62 | 
63 |     def forward(self, input_tensor, knn_eval=False):
64 | 
65 |         input_tensor = self.input_norm(input_tensor.permute(0,2,1).contiguous()).permute(0,2,1).contiguous()  # BN
66 | 
67 |         seq_len = torch.zeros(input_tensor.size(0),dtype=int) + input_tensor.size(1) #  list of input sequence lengths .
68 | 
69 |         encoder_hidden = self.encoder(
70 |             input_tensor, seq_len)
71 |         if knn_eval: # return last layer features during  KNN evaluation (action retrieval)
72 | 
73 |              return encoder_hidden[0]
74 |         else:
75 |              out = self.fc(encoder_hidden[0])
76 |              return out


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CMD: Self-supervised 3D Action Representation Learning with Cross-modal Mutual Distillation
 2 | [Yunyao Mao](http://home.ustc.edu.cn/~myy2016), [Wengang Zhou](http://staff.ustc.edu.cn/~zhwg/index.html), Zhenbo Lu, Jiajun Deng, and [Houqiang Li](http://staff.ustc.edu.cn/~lihq) 
 3 | 
 4 | ### Accepted by **ECCV 2022 (Oral)**. [[Paper Link]](https://arxiv.org/pdf/2208.12448.pdf)
 5 | 
 6 | This repository includes Python (PyTorch) implementation of the CMD.
 7 | 
 8 | ![](./images/cmd.jpg)
 9 | 
10 | ## Abstract
11 | In 3D action recognition, there exists rich complementary information between skeleton modalities. Nevertheless, how to model and
12 | utilize this information remains a challenging problem for self-supervised
13 | 3D action representation learning. In this work, we formulate the crossmodal interaction as a bidirectional knowledge distillation problem. Different from classic distillation solutions that transfer the knowledge of a fixed and pre-trained teacher to the student, in this work, the knowledge is continuously updated and bidirectionally distilled between modalities. To this end, we propose a new Cross-modal Mutual Distillation (CMD) framework with the following designs. On the one hand, the neighboring similarity distribution is introduced to model the knowledge learned in each modality, where the relational information is naturally suitable for the contrastive frameworks. On the other hand, asymmetrical configurations are used for teacher and student to stabilize the distillation process and to transfer high-confidence information between modalities. By derivation, we find that the cross-modal positive mining in previous works can be regarded as a degenerated version of our CMD. We perform extensive experiments on NTU RGB+D 60, NTU RGB+D 120, and PKU-MMD II datasets. Our approach outperforms existing self-supervised methods and sets a series of new records.
14 | 
15 | ## Requirements
16 | 
17 | ```bash
18 | python==3.8.13
19 | torch==1.8.1+cu111
20 | torchvision==0.9.1+cu111
21 | tensorboard==2.9.0
22 | scikit-learn==1.1.1
23 | tqdm==4.64.0
24 | numpy==1.22.4
25 | ```
26 | 
27 | ## Data Preprocessing
28 | Please refer to [skeleton-contrst](https://github.com/fmthoker/skeleton-contrast)
29 | 
30 | ## Training and Testing
31 | Please refer to the bash scripts
32 | 
33 | ## Pretrained Models
34 | NTU-60 and NTU-120: [pretrained_models](https://rec.ustc.edu.cn/share/5f6a5ee0-01dd-11ed-b9ae-8301ca6d3d37)
35 | 
36 | ## Citation
37 | If you find this work useful for your research, please consider citing our work:
38 | ```
39 | @inproceedings{Mao_2022_CMD,
40 |     title={CMD: Self-supervised 3D Action Representation Learning with Cross-modal Mutual Distillation},
41 |     author={Mao, Yunyao and Zhou, Wengang and Lu, Zhenbo and Deng, Jiajun and Li, Houqiang},
42 |     booktitle={European Conference on Computer Vision (ECCV)},
43 |     year={2022}
44 | }
45 | ```
46 | 
47 | ## Acknowledgment
48 | The framework of our code is based on [skeleton-contrast](https://github.com/fmthoker/skeleton-contrast).
49 | 


--------------------------------------------------------------------------------
/feeder/feeder_downstream.py:
--------------------------------------------------------------------------------
 1 | # sys
 2 | import pickle
 3 | 
 4 | # torch
 5 | import torch
 6 | from torch.autograd import Variable
 7 | from torchvision import transforms
 8 | import numpy as np
 9 | np.set_printoptions(threshold=np.inf)
10 | 
11 | try:
12 |     from feeder import augmentations
13 | except:
14 |     import augmentations
15 | 
16 | 
17 | class Feeder(torch.utils.data.Dataset):
18 |     """ 
19 |     Arguments:
20 |         data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M)
21 |     """
22 | 
23 |     def __init__(self,
24 |                  data_path,
25 |                  label_path,
26 |                  num_frame_path,
27 |                  l_ratio,
28 |                  input_size,
29 |                  input_representation,
30 |                  mmap=True):
31 | 
32 |         self.data_path = data_path
33 |         self.label_path = label_path
34 |         self.num_frame_path= num_frame_path
35 |         self.input_size=input_size
36 |         self.input_representation=input_representation
37 |         self.l_ratio = l_ratio
38 | 
39 | 
40 |         self.load_data(mmap)
41 |         self.N, self.C, self.T, self.V, self.M = self.data.shape
42 |         print(self.data.shape,len(self.number_of_frames),len(self.label))
43 |         print("l_ratio",self.l_ratio)
44 | 
45 |     def load_data(self, mmap):
46 |         # data: N C V T M
47 | 
48 |         # load data
49 |         if mmap:
50 |             self.data = np.load(self.data_path, mmap_mode='r')
51 |         else:
52 |             self.data = np.load(self.data_path)
53 | 
54 |         # load num of valid frame length
55 |         self.number_of_frames= np.load(self.num_frame_path)
56 | 
57 |         # load label
58 |         if '.pkl' in self.label_path:
59 |             with open(self.label_path, 'rb') as f:
60 |                 self.sample_name, self.label = pickle.load(f)
61 |         elif '.npy' in self.label_path:
62 |                 self.label = np.load(self.label_path).tolist()
63 | 
64 |     def __len__(self):
65 |         return self.N
66 | 
67 |     def __iter__(self):
68 |         return self
69 | 
70 |     def __getitem__(self, index):
71 | 
72 |         # get raw input
73 | 
74 |         # input: C, T, V, M
75 |         data_numpy = np.array(self.data[index])
76 |         
77 |         # number_of_frames = self.number_of_frames[index]
78 |         number_of_frames = min(self.number_of_frames[index], 300)  # 300 is max_len, for pku-mmd
79 |         
80 |         label = self.label[index]
81 | 
82 |         # crop a sub-sequnce 
83 |         data_numpy = augmentations.crop_subsequence(data_numpy, number_of_frames, self.l_ratio, self.input_size)
84 | 
85 |         #input 
86 |         if  self.input_representation == "seq-based":
87 | 
88 |              #sequence-based 
89 | 
90 |              input_data = data_numpy.transpose(1,2,0,3)
91 |              input_data = input_data.reshape(-1,150).astype('float32')
92 |              return input_data, label
93 | 
94 |         elif  self.input_representation == "graph-based" or self.input_representation == "image-based" :
95 |              #graph-based or image-based
96 | 
97 |              input_data = data_numpy
98 |              return input_data, label
99 | 


--------------------------------------------------------------------------------
/script_action_classification_cmd_semi.sh:
--------------------------------------------------------------------------------
 1 | # Cross-view
 2 | for((i=1;i<=5;i++)); 
 3 | do  
 4 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \
 5 |   --lr 0.01 \
 6 |   --batch-size 64 \
 7 |   --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_view/checkpoint_0450.pth.tar \
 8 |   --finetune-dataset ntu60 \
 9 |   --protocol cross_view_semi \
10 |   --data-ratio 0.01 \
11 |   --finetune-skeleton-representation graph-based >> cmd_ntu60_cview_semi_0.01.txt
12 | done
13 | 
14 | for((i=1;i<=5;i++)); 
15 | do  
16 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \
17 |   --lr 0.01 \
18 |   --batch-size 64 \
19 |   --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_view/checkpoint_0450.pth.tar \
20 |   --finetune-dataset ntu60 \
21 |   --protocol cross_view_semi \
22 |   --data-ratio 0.05 \
23 |   --finetune-skeleton-representation graph-based >> cmd_ntu60_cview_semi_0.05.txt
24 | done
25 | 
26 | for((i=1;i<=5;i++)); 
27 | do  
28 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \
29 |   --lr 0.01 \
30 |   --batch-size 64 \
31 |   --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_view/checkpoint_0450.pth.tar \
32 |   --finetune-dataset ntu60 \
33 |   --protocol cross_view_semi \
34 |   --data-ratio 0.1 \
35 |   --finetune-skeleton-representation graph-based >> cmd_ntu60_cview_semi_0.1.txt
36 | done
37 | 
38 | for((i=1;i<=5;i++)); 
39 | do  
40 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \
41 |   --lr 0.01 \
42 |   --batch-size 64 \
43 |   --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_view/checkpoint_0450.pth.tar \
44 |   --finetune-dataset ntu60 \
45 |   --protocol cross_view_semi \
46 |   --data-ratio 0.2 \
47 |   --finetune-skeleton-representation graph-based >> cmd_ntu60_cview_semi_0.2.txt
48 | done
49 | 
50 | 
51 | # Cross-subject
52 | for((i=1;i<=5;i++)); 
53 | do  
54 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \
55 |   --lr 0.01 \
56 |   --batch-size 64 \
57 |   --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_subject/checkpoint_0450.pth.tar \
58 |   --finetune-dataset ntu60 \
59 |   --protocol cross_subject_semi \
60 |   --data-ratio 0.01 \
61 |   --finetune-skeleton-representation graph-based >> cmd_ntu60_csub_semi_0.01.txt
62 | done
63 | 
64 | for((i=1;i<=5;i++)); 
65 | do  
66 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \
67 |   --lr 0.01 \
68 |   --batch-size 64 \
69 |   --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_subject/checkpoint_0450.pth.tar \
70 |   --finetune-dataset ntu60 \
71 |   --protocol cross_subject_semi \
72 |   --data-ratio 0.05 \
73 |   --finetune-skeleton-representation graph-based >> cmd_ntu60_csub_semi_0.05.txt
74 | done
75 | 
76 | for((i=1;i<=5;i++)); 
77 | do  
78 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \
79 |   --lr 0.01 \
80 |   --batch-size 64 \
81 |   --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_subject/checkpoint_0450.pth.tar \
82 |   --finetune-dataset ntu60 \
83 |   --protocol cross_subject_semi \
84 |   --data-ratio 0.1 \
85 |   --finetune-skeleton-representation graph-based >> cmd_ntu60_csub_semi_0.1.txt
86 | done
87 | 
88 | for((i=1;i<=5;i++)); 
89 | do  
90 | CUDA_VISIBLE_DEVICES=0 python action_classification_cmd_semi.py \
91 |   --lr 0.01 \
92 |   --batch-size 64 \
93 |   --pretrained /data/user/ACTION/CMD/checkpoints/pretrain_moco_cmd/ntu60_cross_subject/checkpoint_0450.pth.tar \
94 |   --finetune-dataset ntu60 \
95 |   --protocol cross_subject_semi \
96 |   --data-ratio 0.2 \
97 |   --finetune-skeleton-representation graph-based >> cmd_ntu60_csub_semi_0.2.txt
98 | done


--------------------------------------------------------------------------------
/data_gen/preprocess.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.extend(['../'])
 4 | from data_gen.rotation import *
 5 | from tqdm import tqdm
 6 | 
 7 | 
 8 | def pre_normalization(data, zaxis=[0, 1], xaxis=[8, 4]):
 9 |     N, C, T, V, M = data.shape
10 |     s = np.transpose(data, [0, 4, 2, 3, 1])  # N, C, T, V, M  to  N, M, T, V, C
11 | 
12 |     print('pad the null frames with the previous frames')
13 |     for i_s, skeleton in enumerate(tqdm(s)):  # pad
14 |         if skeleton.sum() == 0:
15 |             print(i_s, ' has no skeleton')
16 |         for i_p, person in enumerate(skeleton):
17 |             if person.sum() == 0:
18 |                 continue
19 |             if person[0].sum() == 0:
20 |                 index = (person.sum(-1).sum(-1) != 0)
21 |                 tmp = person[index].copy()
22 |                 person *= 0
23 |                 person[:len(tmp)] = tmp
24 |             for i_f, frame in enumerate(person):
25 |                 if frame.sum() == 0:
26 |                     if person[i_f:].sum() == 0:
27 |                         rest = len(person) - i_f
28 |                         num = int(np.ceil(rest / i_f))
29 |                         pad = np.concatenate([person[0:i_f] for _ in range(num)], 0)[:rest]
30 |                         s[i_s, i_p, i_f:] = pad
31 |                         break
32 | 
33 |     print('sub the center joint #1 (spine joint in ntu and neck joint in kinetics)')
34 |     for i_s, skeleton in enumerate(tqdm(s)):
35 |         if skeleton.sum() == 0:
36 |             continue
37 |         main_body_center = skeleton[0][:, 1:2, :].copy()
38 |         for i_p, person in enumerate(skeleton):
39 |             if person.sum() == 0:
40 |                 continue
41 |             mask = (person.sum(-1) != 0).reshape(T, V, 1)
42 |             s[i_s, i_p] = (s[i_s, i_p] - main_body_center) * mask
43 | 
44 |     print('parallel the bone between hip(jpt 0) and spine(jpt 1) of the first person to the z axis')
45 |     for i_s, skeleton in enumerate(tqdm(s)):
46 |         if skeleton.sum() == 0:
47 |             continue
48 |         joint_bottom = skeleton[0, 0, zaxis[0]]
49 |         joint_top = skeleton[0, 0, zaxis[1]]
50 |         axis = np.cross(joint_top - joint_bottom, [0, 0, 1])
51 |         angle = angle_between(joint_top - joint_bottom, [0, 0, 1])
52 |         matrix_z = rotation_matrix(axis, angle)
53 |         for i_p, person in enumerate(skeleton):
54 |             if person.sum() == 0:
55 |                 continue
56 |             for i_f, frame in enumerate(person):
57 |                 if frame.sum() == 0:
58 |                     continue
59 |                 for i_j, joint in enumerate(frame):
60 |                     s[i_s, i_p, i_f, i_j] = np.dot(matrix_z, joint)
61 | 
62 |     print(
63 |         'parallel the bone between right shoulder(jpt 8) and left shoulder(jpt 4) of the first person to the x axis')
64 |     for i_s, skeleton in enumerate(tqdm(s)):
65 |         if skeleton.sum() == 0:
66 |             continue
67 |         joint_rshoulder = skeleton[0, 0, xaxis[0]]
68 |         joint_lshoulder = skeleton[0, 0, xaxis[1]]
69 |         axis = np.cross(joint_rshoulder - joint_lshoulder, [1, 0, 0])
70 |         angle = angle_between(joint_rshoulder - joint_lshoulder, [1, 0, 0])
71 |         matrix_x = rotation_matrix(axis, angle)
72 |         for i_p, person in enumerate(skeleton):
73 |             if person.sum() == 0:
74 |                 continue
75 |             for i_f, frame in enumerate(person):
76 |                 if frame.sum() == 0:
77 |                     continue
78 |                 for i_j, joint in enumerate(frame):
79 |                     s[i_s, i_p, i_f, i_j] = np.dot(matrix_x, joint)
80 | 
81 |     data = np.transpose(s, [0, 4, 2, 3, 1])
82 |     return data
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     data = np.load('../data/ntu/xview/val_data.npy')
87 |     pre_normalization(data)
88 |     np.save('../data/ntu/xview/data_val_pre.npy', data)
89 | 


--------------------------------------------------------------------------------
/feeder/feeder_downstream_semi_supervised.py:
--------------------------------------------------------------------------------
  1 | # sys
  2 | import pickle
  3 | 
  4 | # torch
  5 | import torch
  6 | from torch.autograd import Variable
  7 | from torchvision import transforms
  8 | import numpy as np
  9 | np.set_printoptions(threshold=np.inf)
 10 | 
 11 | try:
 12 |     from feeder import augmentations
 13 | except:
 14 |     import augmentations
 15 | 
 16 | 
 17 | class Feeder(torch.utils.data.Dataset):
 18 |     """ 
 19 |     Arguments:
 20 |         data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M)
 21 |     """
 22 | 
 23 |     def __init__(self,
 24 |                  data_path,
 25 |                  label_path,
 26 |                  num_frame_path,
 27 |                  l_ratio,
 28 |                  input_size,
 29 |                  input_representation,
 30 |                  data_ratio=None,
 31 |                  mmap=True):
 32 | 
 33 |         self.data_path = data_path
 34 |         self.label_path = label_path
 35 |         self.num_frame_path = num_frame_path
 36 |         self.input_size = input_size
 37 |         self.input_representation = input_representation
 38 |         self.l_ratio = l_ratio
 39 | 
 40 |         self.load_data(mmap)
 41 |         self.N, self.C, self.T, self.V, self.M = self.data.shape
 42 | 
 43 |         if data_ratio is not None:
 44 |             self.random_select_data(data_ratio)
 45 | 
 46 |         print(self.data.shape, len(self.number_of_frames), len(self.label))
 47 |         print("l_ratio", self.l_ratio)
 48 | 
 49 |     def load_data(self, mmap):
 50 |         # data: N C V T M
 51 | 
 52 |         # load data
 53 |         if mmap:
 54 |             self.data = np.load(self.data_path, mmap_mode='r')
 55 |         else:
 56 |             self.data = np.load(self.data_path)
 57 | 
 58 |         # load num of valid frame length
 59 |         self.number_of_frames = np.load(self.num_frame_path)
 60 | 
 61 |         # load label
 62 |         if '.pkl' in self.label_path:
 63 |             with open(self.label_path, 'rb') as f:
 64 |                 self.sample_name, self.label = pickle.load(f)
 65 |         elif '.npy' in self.label_path:
 66 |             self.label = np.load(self.label_path).tolist()
 67 |         
 68 |         self.label = np.array(self.label)
 69 | 
 70 |     def random_select_data(self, data_ratio):
 71 |         idx = np.arange(self.N)
 72 |         np.random.shuffle(idx)
 73 | 
 74 |         N_used = int(self.N * data_ratio)
 75 |         idx_used = idx[ :N_used]
 76 |         
 77 |         self.N = N_used
 78 |         self.data = self.data[idx_used]
 79 |         self.label = self.label[idx_used]
 80 |         self.number_of_frames = self.number_of_frames[idx_used]
 81 | 
 82 |     def __len__(self):
 83 |         return self.N
 84 | 
 85 |     def __iter__(self):
 86 |         return self
 87 | 
 88 |     def __getitem__(self, index):
 89 | 
 90 |         # get raw input
 91 | 
 92 |         # input: C, T, V, M
 93 |         data_numpy = np.array(self.data[index])
 94 | 
 95 |         # number_of_frames = self.number_of_frames[index]
 96 |         # 300 is max_len, for pku-mmd
 97 |         number_of_frames = min(self.number_of_frames[index], 300)
 98 | 
 99 |         label = self.label[index]
100 | 
101 |         # crop a sub-sequnce
102 |         data_numpy = augmentations.crop_subsequence(
103 |             data_numpy, number_of_frames, self.l_ratio, self.input_size)
104 | 
105 |         # input
106 |         if self.input_representation == "seq-based":
107 | 
108 |             # sequence-based
109 | 
110 |             input_data = data_numpy.transpose(1, 2, 0, 3)
111 |             input_data = input_data.reshape(-1, 150).astype('float32')
112 |             return input_data, label
113 | 
114 |         elif self.input_representation == "graph-based" or self.input_representation == "image-based":
115 |             #graph-based or image-based
116 | 
117 |             input_data = data_numpy
118 |             return input_data, label
119 | 


--------------------------------------------------------------------------------
/options/options_pretraining.py:
--------------------------------------------------------------------------------
  1 | # graph based model arguments
  2 | agcn_model_arguments = {
  3 |    "num_class": 128,
  4 |    "num_point": 25,
  5 |    "num_person": 2,
  6 |    'graph_args': {
  7 |      'labeling_mode': 'spatial'}
  8 | }
  9 | 
 10 | #image based model arguments
 11 | hcn_model_arguments = {
 12 |    "in_channel":3,
 13 |    "out_channel":64,
 14 |    "window_size":64,
 15 |    "num_joint":25,
 16 |    "num_person":2,
 17 |    "num_class":128
 18 |  }
 19 | 
 20 | #Sequence based model arguments
 21 | bi_gru_model_arguments = {
 22 |    "en_input_size":150,
 23 |    "en_hidden_size":1024,
 24 |    "en_num_layers":3,
 25 |    "num_class":128
 26 |  }
 27 | 
 28 | 
 29 | class  opts_ntu_60_cross_view():
 30 | 
 31 |   def __init__(self):
 32 | 
 33 |    self.agcn_model_args = agcn_model_arguments
 34 | 
 35 |    self.hcn_model_args = hcn_model_arguments
 36 | 
 37 |    self.bi_gru_model_args = bi_gru_model_arguments
 38 |    
 39 |    # feeder
 40 |    self.train_feeder_args = {
 41 |      'data_path': './data/NTU-RGB-D-60-AGCN/xview/train_data_joint.npy',
 42 |      'num_frame_path': './data/NTU-RGB-D-60-AGCN/xview/train_num_frame.npy',
 43 |      'l_ratio': [0.1,1],
 44 |      'input_size': 64
 45 |    }
 46 | 
 47 | class  opts_ntu_60_cross_subject():
 48 | 
 49 |   def __init__(self):
 50 | 
 51 |    self.agcn_model_args = agcn_model_arguments
 52 | 
 53 |    self.hcn_model_args = hcn_model_arguments
 54 | 
 55 |    self.bi_gru_model_args = bi_gru_model_arguments
 56 |    
 57 |    # feeder
 58 |    self.train_feeder_args = {
 59 |      'data_path': './data/NTU-RGB-D-60-AGCN/xsub/train_data_joint.npy',
 60 |      'num_frame_path': './data/NTU-RGB-D-60-AGCN/xsub/train_num_frame.npy',
 61 |      'l_ratio': [0.1,1],
 62 |      'input_size': 64
 63 |    }
 64 | 
 65 | class  opts_ntu_120_cross_subject():
 66 | 
 67 |   def __init__(self):
 68 | 
 69 |    self.agcn_model_args = agcn_model_arguments
 70 | 
 71 |    self.hcn_model_args = hcn_model_arguments
 72 | 
 73 |    self.bi_gru_model_args = bi_gru_model_arguments
 74 |    
 75 |    # feeder
 76 |    self.train_feeder_args = {
 77 |      'data_path': './data/NTU-RGB-D-120-AGCN/xsub/train_data_joint.npy',
 78 |      'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsub/train_num_frame.npy',
 79 |      'l_ratio': [0.1,1],
 80 |      'input_size': 64
 81 |    }
 82 | 
 83 | class  opts_ntu_120_cross_setup():
 84 | 
 85 |   def __init__(self):
 86 | 
 87 |    self.agcn_model_args = agcn_model_arguments
 88 | 
 89 |    self.hcn_model_args = hcn_model_arguments
 90 | 
 91 |    self.bi_gru_model_args = bi_gru_model_arguments
 92 |    
 93 |    # feeder
 94 |    self.train_feeder_args = {
 95 |      'data_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_data_joint.npy',
 96 |      'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_num_frame.npy',
 97 |      'l_ratio': [0.1,1],
 98 |      'input_size': 64
 99 |    }
100 | 
101 | 
102 | # PKU-MMD
103 | class  opts_pku_v2_cross_view():
104 | 
105 |   def __init__(self):
106 | 
107 |    self.agcn_model_args = agcn_model_arguments
108 | 
109 |    self.hcn_model_args = hcn_model_arguments
110 | 
111 |    self.bi_gru_model_args = bi_gru_model_arguments
112 |    
113 |    # feeder
114 |    self.train_feeder_args = {
115 |      'data_path': './data/PKU-MMD-v2-AGCN/xview/train_data_joint.npy',
116 |      'num_frame_path': './data/PKU-MMD-v2-AGCN/xview/train_num_frame.npy',
117 |      'l_ratio': [0.1,1],
118 |      'input_size': 64
119 |    }
120 | 
121 |   
122 | class  opts_pku_v2_cross_subject():
123 | 
124 |   def __init__(self):
125 | 
126 |    self.agcn_model_args = agcn_model_arguments
127 | 
128 |    self.hcn_model_args = hcn_model_arguments
129 | 
130 |    self.bi_gru_model_args = bi_gru_model_arguments
131 |    
132 |    # feeder
133 |    self.train_feeder_args = {
134 |      'data_path': './data/PKU-MMD-v2-AGCN/xsub/train_data_joint.npy',
135 |      'num_frame_path': './data/PKU-MMD-v2-AGCN/xsub/train_num_frame.npy',
136 |      'l_ratio': [0.1,1],
137 |      'input_size': 64
138 |    }


--------------------------------------------------------------------------------
/data_gen/pku_gendata.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pickle
  3 | from tqdm import tqdm
  4 | import sys
  5 | from numpy.lib.format import open_memmap
  6 | 
  7 | sys.path.extend(['../'])
  8 | from data_gen.preprocess import pre_normalization
  9 | 
 10 | max_body_true = 2
 11 | max_body_kinect = 4
 12 | num_joint = 25
 13 | max_frame = 300
 14 | 
 15 | import numpy as np
 16 | import os
 17 | 
 18 | def read_data(data_path, name, max_body=4, num_joint=25):  # top 2 body
 19 |     filename, action_idx = name.split('_')
 20 |     action_idx = int(action_idx)
 21 |     seq_data = np.loadtxt('{}/skeleton/{}'.format(data_path, filename))
 22 |     label = np.loadtxt('{}/label/{}'.format(data_path, filename), delimiter=',')
 23 |     start, end = int(label[action_idx][1]), int(label[action_idx][2])
 24 |     
 25 |     data = seq_data[start: end, :]  # num_frames * 150
 26 |     data = data.reshape(data.shape[0], 2, 25, 3)  # num_frame, num_body, num_joint, xyz
 27 |     data = data.transpose(3, 0, 2, 1)  # xyz, num_frame, num_joint, num_body
 28 |     return data
 29 | 
 30 | def gendata(data_path, out_path, benchmark='xview', part='eval'):
 31 |     # Read cross_subject_v2.txt and cross_view_v2.txt to obtain training_views training_subjects
 32 |     with open('{}/cross_view_v2.txt'.format(data_path), 'r') as f:
 33 |         lines = f.readlines()
 34 |         training_views = lines[1].strip('\n').split(', ')
 35 |     with open('{}/cross_subject_v2.txt'.format(data_path), 'r') as f:
 36 |         lines = f.readlines()
 37 |         training_subjects = lines[1].strip('\n').split(', ')
 38 | 
 39 | 
 40 |     sample_name = []
 41 |     sample_label = []
 42 |     for filename in os.listdir('{}/skeleton'.format(data_path)):
 43 |         if benchmark == 'xview':
 44 |             istraining = (filename[:-4] in training_views)
 45 |         elif benchmark == 'xsub':
 46 |             istraining = (filename[:-4] in training_subjects)
 47 |         else:
 48 |             raise ValueError()
 49 | 
 50 |         if part == 'train':
 51 |             issample = istraining
 52 |         elif part == 'val':
 53 |             issample = not (istraining)
 54 |         else:
 55 |             raise ValueError()
 56 | 
 57 |         if issample:
 58 |             label = np.loadtxt('{}/label/{}'.format(data_path, filename), delimiter=',')
 59 |             for idx in range(label.shape[0]):
 60 |                 sample_name.append('{}_{}'.format(filename, str(idx)))
 61 |                 sample_label.append(label[idx][0] - 1)
 62 | 
 63 |     with open('{}/{}_label.pkl'.format(out_path, part), 'wb') as f:
 64 |         pickle.dump((sample_name, list(sample_label)), f)
 65 | 
 66 |     fl = open_memmap(
 67 |         '{}/{}_num_frame.npy'.format(out_path, part),
 68 |         dtype='int',
 69 |         mode='w+',
 70 |         shape=(len(sample_label),))
 71 | 
 72 |     fp = np.zeros((len(sample_label), 3, max_frame, num_joint, max_body_true), dtype=np.float32)
 73 | 
 74 |     for i, s in enumerate(tqdm(sample_name)):
 75 |         data = read_data(data_path, s, max_body=max_body_kinect, num_joint=num_joint)
 76 |         fp[i, :, 0:min(data.shape[1], max_frame), :, :] = data[:, 0:min(data.shape[1], max_frame), :, :]  # num_frame 太大会截断！
 77 |         fl[i] = data.shape[1] # num_frame
 78 | 
 79 |     fp = pre_normalization(fp)
 80 |     np.save('{}/{}_data_joint.npy'.format(out_path, part), fp)
 81 | 
 82 | 
 83 | if __name__ == '__main__':
 84 |     parser = argparse.ArgumentParser(description='PKU-MMD-v2 Data Converter.')
 85 | 
 86 |     parser.add_argument('--data_path', default='/data/user/dataset/PKU-MMD/v2/')
 87 |     parser.add_argument('--out_folder', default='../data/PKU-MMD-v2-AGCN/')
 88 |     benchmark = ['xsub','xview', ]
 89 | 
 90 |     part = ['train', 'val']
 91 |     arg = parser.parse_args()
 92 | 
 93 |     for b in benchmark:
 94 |         for p in part:
 95 |             out_path = os.path.join(arg.out_folder, b)
 96 |             if not os.path.exists(out_path):
 97 |                 os.makedirs(out_path)
 98 |             print(b, p)
 99 |             gendata(
100 |                 arg.data_path,
101 |                 out_path,
102 |                 benchmark=b,
103 |                 part=p)
104 | 


--------------------------------------------------------------------------------
/feeder/feeder_pretraining_intra.py:
--------------------------------------------------------------------------------
  1 | # sys
  2 | import pickle
  3 | 
  4 | # torch
  5 | import torch
  6 | from torch.autograd import Variable
  7 | from torchvision import transforms
  8 | import numpy as np
  9 | np.set_printoptions(threshold=np.inf)
 10 | import random
 11 | 
 12 | try:
 13 |     from feeder import augmentations
 14 | except:
 15 |     import augmentations
 16 | 
 17 | 
 18 | class Feeder(torch.utils.data.Dataset):
 19 |     """ 
 20 |     Arguments:
 21 |         data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M)
 22 |     """
 23 | 
 24 |     def __init__(self,
 25 |                  data_path,
 26 |                  num_frame_path,
 27 |                  l_ratio,
 28 |                  input_size,
 29 |                  input_representation,
 30 |                  mmap=True):
 31 | 
 32 |         self.data_path = data_path
 33 |         self.num_frame_path= num_frame_path
 34 |         self.input_size=input_size
 35 |         self.input_representation=input_representation
 36 |         self.crop_resize =True
 37 |         self.l_ratio = l_ratio
 38 | 
 39 | 
 40 |         self.load_data(mmap)
 41 |         self.N, self.C, self.T, self.V, self.M = self.data.shape
 42 |         print(self.data.shape,len(self.number_of_frames))
 43 |         print("l_ratio",self.l_ratio)
 44 | 
 45 |     def load_data(self, mmap):
 46 |         # data: N C V T M
 47 | 
 48 |         # load data
 49 |         if mmap:
 50 |             self.data = np.load(self.data_path, mmap_mode='r')
 51 |         else:
 52 |             self.data = np.load(self.data_path)
 53 | 
 54 |         # load num of valid frame length
 55 |         self.number_of_frames= np.load(self.num_frame_path)
 56 | 
 57 |     def __len__(self):
 58 |         return self.N
 59 | 
 60 |     def __iter__(self):
 61 |         return self
 62 | 
 63 |     def __getitem__(self, index):
 64 | 
 65 |         # get raw input
 66 | 
 67 |         # input: C, T, V, M
 68 |         data_numpy = np.array(self.data[index])
 69 |         number_of_frames = min(self.number_of_frames[index], 300)  # 300 is max_len, for pku-mmd
 70 | 
 71 |         # apply spatio-temporal augmentations to generate  view 1 
 72 | 
 73 |         # temporal crop-resize
 74 |         data_numpy_v1_crop = augmentations.temporal_cropresize(data_numpy, number_of_frames, self.l_ratio, self.input_size)
 75 | 
 76 | 
 77 |         # randomly select  one of the spatial augmentations 
 78 |         flip_prob  = random.random()
 79 |         if flip_prob < 0.5:
 80 |                  data_numpy_v1 = augmentations.joint_courruption(data_numpy_v1_crop)
 81 |         else:
 82 |                  data_numpy_v1 = augmentations.pose_augmentation(data_numpy_v1_crop)
 83 | 
 84 | 
 85 |         # apply spatio-temporal augmentations to generate  view 2
 86 | 
 87 |         # temporal crop-resize
 88 |         data_numpy_v2_crop = augmentations.temporal_cropresize(data_numpy,number_of_frames, self.l_ratio, self.input_size)
 89 | 
 90 |         # randomly select  one of the spatial augmentations 
 91 |         flip_prob  = random.random()
 92 |         if flip_prob < 0.5:
 93 |                  data_numpy_v2 = augmentations.joint_courruption(data_numpy_v2_crop)
 94 |         else:
 95 |                  data_numpy_v2 = augmentations.pose_augmentation(data_numpy_v2_crop)
 96 | 
 97 | 
 98 |         # convert augmented views into input formats based on skeleton-representations
 99 |         if self.input_representation == "seq-based" or self.input_representation == "trans-based": 
100 | 
101 |              #Input for sequence-based representation
102 |              # two person  input ---> shpae (64 X 150)
103 | 
104 |              #View 1
105 |              input_v1 = data_numpy_v1.transpose(1,2,0,3)
106 |              input_v1 = input_v1.reshape(-1,150).astype('float32')
107 | 
108 |              #View 2
109 |              input_v2 = data_numpy_v2.transpose(1,2,0,3)
110 |              input_v2 = input_v2.reshape(-1,150).astype('float32')
111 | 
112 |              return input_v1, input_v2
113 | 
114 |         elif self.input_representation == "graph-based" or self.input_representation == "image-based": 
115 | 
116 |              #input for graph-based or image-based representation
117 |              # two person input --->  shape (3, 64, 25, 2)
118 | 
119 |              #View 1
120 |              input_v1 = data_numpy_v1.astype('float32')
121 |              #View 2
122 |              input_v2 = data_numpy_v2.astype('float32')
123 | 
124 |              return input_v1, input_v2
125 | 


--------------------------------------------------------------------------------
/feeder/augmentations.py:
--------------------------------------------------------------------------------
  1 | import torch.nn.functional as F
  2 | import torch
  3 | import random
  4 | import numpy as np
  5 | 
  6 | 
  7 | def joint_courruption(input_data):                                                                     
  8 | 
  9 |     out = input_data.copy()
 10 | 
 11 |     flip_prob  = random.random()
 12 | 
 13 |     if flip_prob < 0.5:
 14 | 
 15 |         #joint_indicies = np.random.choice(25, random.randint(5, 10), replace=False)
 16 |         joint_indicies = np.random.choice(25, 15,replace=False)
 17 |         out[:,:,joint_indicies,:] = 0 
 18 |         return out
 19 |     
 20 |     else:
 21 |          #joint_indicies = np.random.choice(25, random.randint(5, 10), replace=False)
 22 |          joint_indicies = np.random.choice(25, 15,replace=False)
 23 |          
 24 |          temp = out[:,:,joint_indicies,:] 
 25 |          Corruption = np.array([
 26 |                            [random.uniform(-1, 1), random.uniform(-1, 1), random.uniform(-1, 1)],
 27 |                            [random.uniform(-1, 1), random.uniform(-1, 1), random.uniform(-1, 1)],
 28 |                            [random.uniform(-1, 1), random.uniform(-1, 1), random.uniform(-1, 1)] ])
 29 |          temp = np.dot(temp.transpose([1, 2, 3, 0]), Corruption)
 30 |          temp = temp.transpose(3, 0, 1, 2)
 31 |          out[:,:,joint_indicies,:] = temp
 32 |          return out
 33 | 
 34 | 
 35 | 
 36 | def pose_augmentation(input_data):
 37 | 
 38 | 
 39 |         Shear       = np.array([
 40 |                       [1,	random.uniform(-1, 1), 	random.uniform(-1, 1)],
 41 |                       [random.uniform(-1, 1), 1, 	random.uniform(-1, 1)],
 42 |                       [random.uniform(-1, 1), 	random.uniform(-1, 1),      1]
 43 |                       ])
 44 | 
 45 |         temp_data = input_data.copy()
 46 |         result =  np.dot(temp_data.transpose([1, 2, 3, 0]),Shear.transpose())
 47 |         output = result.transpose(3, 0, 1, 2)
 48 | 
 49 |         return output
 50 | 
 51 | def temporal_cropresize(input_data,num_of_frames,l_ratio,output_size):
 52 | 
 53 | 
 54 |     C, T, V, M =input_data.shape
 55 | 
 56 |     # Temporal crop
 57 |     min_crop_length = 64
 58 | 
 59 |     scale = np.random.rand(1)*(l_ratio[1]-l_ratio[0])+l_ratio[0]
 60 |     temporal_crop_length = np.minimum(np.maximum(int(np.floor(num_of_frames*scale)),min_crop_length),num_of_frames)
 61 | 
 62 |     start = np.random.randint(0,num_of_frames-temporal_crop_length+1)
 63 |     temporal_context = input_data[:,start:start+temporal_crop_length, :, :]
 64 | 
 65 |     # interpolate
 66 |     temporal_context = torch.tensor(temporal_context,dtype=torch.float)
 67 |     temporal_context=temporal_context.permute(0, 2, 3, 1).contiguous().view(C * V * M,temporal_crop_length)
 68 |     temporal_context=temporal_context[None, :, :, None]
 69 |     temporal_context= F.interpolate(temporal_context, size=(output_size, 1), mode='bilinear',align_corners=False)
 70 |     temporal_context = temporal_context.squeeze(dim=3).squeeze(dim=0) 
 71 |     temporal_context=temporal_context.contiguous().view(C, V, M, output_size).permute(0, 3, 1, 2).contiguous().numpy()
 72 | 
 73 |     return temporal_context
 74 | 
 75 | def crop_subsequence(input_data,num_of_frames,l_ratio,output_size):
 76 | 
 77 | 
 78 |     C, T, V, M =input_data.shape
 79 | 
 80 |     if l_ratio[0] == 0.5:
 81 |     # if training , sample a random crop
 82 | 
 83 |          min_crop_length = 64
 84 |          scale = np.random.rand(1)*(l_ratio[1]-l_ratio[0])+l_ratio[0]
 85 |          temporal_crop_length = np.minimum(np.maximum(int(np.floor(num_of_frames*scale)),min_crop_length),num_of_frames)
 86 | 
 87 |          start = np.random.randint(0,num_of_frames-temporal_crop_length+1)
 88 |          temporal_crop = input_data[:,start:start+temporal_crop_length, :, :]
 89 | 
 90 |          temporal_crop= torch.tensor(temporal_crop,dtype=torch.float)
 91 |          temporal_crop=temporal_crop.permute(0, 2, 3, 1).contiguous().view(C * V * M,temporal_crop_length)
 92 |          temporal_crop=temporal_crop[None, :, :, None]
 93 |          temporal_crop= F.interpolate(temporal_crop, size=(output_size, 1), mode='bilinear',align_corners=False)
 94 |          temporal_crop=temporal_crop.squeeze(dim=3).squeeze(dim=0) 
 95 |          temporal_crop=temporal_crop.contiguous().view(C, V, M, output_size).permute(0, 3, 1, 2).contiguous().numpy()
 96 | 
 97 |          return temporal_crop
 98 | 
 99 |     else:
100 |     # if testing , sample a center crop
101 | 
102 |         start = int((1-l_ratio[0]) * num_of_frames/2)
103 |         data =input_data[:,start:num_of_frames-start, :, :]
104 |         temporal_crop_length = data.shape[1]
105 | 
106 |         temporal_crop= torch.tensor(data,dtype=torch.float)
107 |         temporal_crop=temporal_crop.permute(0, 2, 3, 1).contiguous().view(C * V * M,temporal_crop_length)
108 |         temporal_crop=temporal_crop[None, :, :, None]
109 |         temporal_crop= F.interpolate(temporal_crop, size=(output_size, 1), mode='bilinear',align_corners=False)
110 |         temporal_crop=temporal_crop.squeeze(dim=3).squeeze(dim=0) 
111 |         temporal_crop=temporal_crop.contiguous().view(C, V, M, output_size).permute(0, 3, 1, 2).contiguous().numpy()
112 | 
113 |         return temporal_crop
114 | 


--------------------------------------------------------------------------------
/options/options_retrieval.py:
--------------------------------------------------------------------------------
  1 | class  opts_ntu_60_cross_view():
  2 | 
  3 |   def __init__(self):
  4 | 
  5 |   # graph based model
  6 |    self.agcn_model_args = {
  7 |       "num_class": 60,
  8 |       "num_point": 25,
  9 |       "num_person": 2,
 10 |       'graph_args': {
 11 |         'labeling_mode': 'spatial'}
 12 |    }
 13 | 
 14 |    #image based model
 15 |    self.hcn_model_args = {
 16 |       "in_channel":3,
 17 |       "out_channel":64,
 18 |       "window_size":64,
 19 |       "num_joint":25,
 20 |       "num_person":2,
 21 |       "num_class":60
 22 |     }
 23 | 
 24 |    #Sequence based model
 25 |    self.bi_gru_model_args = {
 26 |       "en_input_size":150,
 27 |       "en_hidden_size":1024,
 28 |       "en_num_layers":3,
 29 |       "num_class":60
 30 |     }
 31 |    
 32 |    # feeder
 33 |    self.train_feeder_args = {
 34 |      'data_path': './data/NTU-RGB-D-60-AGCN/xview/train_data_joint.npy',
 35 |      'label_path': './data/NTU-RGB-D-60-AGCN/xview/train_label.pkl',
 36 |      'num_frame_path': './data/NTU-RGB-D-60-AGCN/xview/train_num_frame.npy',
 37 |      'l_ratio': [0.95],
 38 |      'input_size': 64
 39 |    }
 40 |    
 41 |    self.test_feeder_args = {
 42 | 
 43 |      'data_path': './data/NTU-RGB-D-60-AGCN/xview/val_data_joint.npy',
 44 |      'label_path': './data/NTU-RGB-D-60-AGCN/xview/val_label.pkl',
 45 |      'num_frame_path': './data/NTU-RGB-D-60-AGCN/xview/val_num_frame.npy',
 46 |      'l_ratio': [0.95],
 47 |      'input_size': 64
 48 |    }
 49 | 
 50 | class  opts_ntu_60_cross_subject():
 51 | 
 52 |   def __init__(self):
 53 | 
 54 |   # graph based model
 55 |    self.agcn_model_args = {
 56 |       "num_class": 60,
 57 |       "num_point": 25,
 58 |       "num_person": 2,
 59 |       'graph_args': {
 60 |         'labeling_mode': 'spatial'}
 61 |    }
 62 | 
 63 |    #image based model
 64 |    self.hcn_model_args = {
 65 |       "in_channel":3,
 66 |       "out_channel":64,
 67 |       "window_size":64,
 68 |       "num_joint":25,
 69 |       "num_person":2,
 70 |       "num_class":60
 71 |     }
 72 | 
 73 |    #Sequence based model
 74 |    self.bi_gru_model_args = {
 75 |       "en_input_size":150,
 76 |       "en_hidden_size":1024,
 77 |       "en_num_layers":3,
 78 |       "num_class":60
 79 |     }
 80 |    
 81 |    # feeder
 82 |    self.train_feeder_args = {
 83 |      'data_path': './data/NTU-RGB-D-60-AGCN/xsub/train_data_joint.npy',
 84 |      'label_path': './data/NTU-RGB-D-60-AGCN/xsub/train_label.pkl',
 85 |      'num_frame_path': './data/NTU-RGB-D-60-AGCN/xsub/train_num_frame.npy',
 86 |      'l_ratio': [0.95],
 87 |      'input_size': 64
 88 |    }
 89 |    
 90 |    self.test_feeder_args = {
 91 | 
 92 |      'data_path': './data/NTU-RGB-D-60-AGCN/xsub/val_data_joint.npy',
 93 |      'label_path': './data/NTU-RGB-D-60-AGCN/xsub/val_label.pkl',
 94 |      'num_frame_path': './data/NTU-RGB-D-60-AGCN/xsub/val_num_frame.npy',
 95 |      'l_ratio': [0.95],
 96 |      'input_size': 64
 97 |    }
 98 | 
 99 | class  opts_ntu_120_cross_subject():
100 |   def __init__(self):
101 | 
102 |   # graph based model
103 |    self.agcn_model_args = {
104 |       "num_class": 120,
105 |       "num_point": 25,
106 |       "num_person": 2,
107 |       'graph_args': {
108 |         'labeling_mode': 'spatial'}
109 |    }
110 | 
111 |    #image based model
112 |    self.hcn_model_args = {
113 |       "in_channel":3,
114 |       "out_channel":64,
115 |       "window_size":64,
116 |       "num_joint":25,
117 |       "num_person":2,
118 |       "num_class":120
119 |     }
120 | 
121 |    #Sequence based model
122 |    self.bi_gru_model_args = {
123 |       "en_input_size":150,
124 |       "en_hidden_size":1024,
125 |       "en_num_layers":3,
126 |       "num_class":120
127 |     }
128 |    
129 |    # feeder
130 |    self.train_feeder_args = {
131 |      'data_path': './data/NTU-RGB-D-120-AGCN/xsub/train_data_joint.npy',
132 |      'label_path': './data/NTU-RGB-D-120-AGCN/xsub/train_label.pkl',
133 |      'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsub/train_num_frame.npy',
134 |      'l_ratio': [0.95],
135 |      'input_size': 64
136 |    }
137 |    
138 |    self.test_feeder_args = {
139 | 
140 |      'data_path': './data/NTU-RGB-D-120-AGCN/xsub/val_data_joint.npy',
141 |      'label_path': './data/NTU-RGB-D-120-AGCN/xsub/val_label.pkl',
142 |      'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsub/val_num_frame.npy',
143 |      'l_ratio': [0.95],
144 |      'input_size': 64
145 |    }
146 | 
147 | class  opts_ntu_120_cross_setup():
148 | 
149 |   def __init__(self):
150 | 
151 |   # graph based model
152 |    self.agcn_model_args = {
153 |       "num_class": 120,
154 |       "num_point": 25,
155 |       "num_person": 2,
156 |       'graph_args': {
157 |         'labeling_mode': 'spatial'}
158 |    }
159 | 
160 |    #image based model
161 |    self.hcn_model_args = {
162 |       "in_channel":3,
163 |       "out_channel":64,
164 |       "window_size":64,
165 |       "num_joint":25,
166 |       "num_person":2,
167 |       "num_class":120
168 |     }
169 | 
170 |    #Sequence based model
171 |    self.bi_gru_model_args = {
172 |       "en_input_size":150,
173 |       "en_hidden_size":1024,
174 |       "en_num_layers":3,
175 |       "num_class":120
176 |     }
177 |    
178 |    # feeder
179 |    self.train_feeder_args = {
180 |      'data_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_data_joint.npy',
181 |      'label_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_label.pkl',
182 |      'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_num_frame.npy',
183 |      'l_ratio': [0.95],
184 |      'input_size': 64
185 |    }
186 |    
187 |    self.test_feeder_args = {
188 | 
189 |      'data_path': './data/NTU-RGB-D-120-AGCN/xsetup/val_data_joint.npy',
190 |      'label_path': './data/NTU-RGB-D-120-AGCN/xsetup/val_label.pkl',
191 |      'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsetup/val_num_frame.npy',
192 |      'l_ratio': [0.95],
193 |      'input_size': 64
194 |    }
195 | 


--------------------------------------------------------------------------------
/feeder/feeder_pretraining_inter.py:
--------------------------------------------------------------------------------
  1 | # sys
  2 | import pickle
  3 | 
  4 | # torch
  5 | import torch
  6 | from torch.autograd import Variable
  7 | from torchvision import transforms
  8 | import numpy as np
  9 | np.set_printoptions(threshold=np.inf)
 10 | import random
 11 | 
 12 | try:
 13 |     from feeder import augmentations
 14 | except:
 15 |     import augmentations
 16 | 
 17 | 
 18 | class Feeder(torch.utils.data.Dataset):
 19 |     """ 
 20 |     Arguments:
 21 |         data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M)
 22 |     """
 23 | 
 24 |     def __init__(self,
 25 |                  data_path,
 26 |                  num_frame_path,
 27 |                  l_ratio,
 28 |                  input_size,
 29 |                  input_representations,
 30 |                  mmap=True):
 31 | 
 32 |         self.data_path = data_path
 33 |         self.num_frame_path= num_frame_path
 34 |         self.input_size=input_size
 35 |         self.input_representations=input_representations
 36 |         self.crop_resize =True
 37 |         self.l_ratio = l_ratio
 38 | 
 39 | 
 40 |         self.load_data(mmap)
 41 |         self.N, self.C, self.T, self.V, self.M = self.data.shape
 42 |         print(self.data.shape,len(self.number_of_frames))
 43 |         print("l_ratio",self.l_ratio)
 44 | 
 45 |     def load_data(self, mmap):
 46 |         # data: N C V T M
 47 | 
 48 |         # load data
 49 |         if mmap:
 50 |             self.data = np.load(self.data_path, mmap_mode='r')
 51 |         else:
 52 |             self.data = np.load(self.data_path)
 53 | 
 54 |         # load num of valid frame length
 55 |         self.number_of_frames= np.load(self.num_frame_path)
 56 | 
 57 |     def __len__(self):
 58 |         return self.N
 59 | 
 60 |     def __iter__(self):
 61 |         return self
 62 | 
 63 |     def __getitem__(self, index):
 64 | 
 65 |         # get raw input
 66 | 
 67 |         # input: C, T, V, M
 68 |         data_numpy = np.array(self.data[index])
 69 |         number_of_frames = self.number_of_frames[index]
 70 | 
 71 |         # apply spatio-temporal augmentations to generate  view 1 
 72 | 
 73 |         # temporal crop-resize
 74 |         data_numpy_v1_crop = augmentations.temporal_cropresize(data_numpy, number_of_frames, self.l_ratio, self.input_size)
 75 | 
 76 |         # randomly select  one of the spatial augmentations 
 77 |         flip_prob  = random.random()
 78 |         if flip_prob < 0.5:
 79 |                  data_numpy_v1 = augmentations.pose_augmentation(data_numpy_v1_crop)
 80 |         else:
 81 |                  data_numpy_v1 = augmentations.joint_courruption(data_numpy_v1_crop)
 82 | 
 83 | 
 84 |         # apply spatio-temporal augmentations to generate  view 2
 85 | 
 86 |         # temporal crop-resize
 87 |         data_numpy_v2_crop = augmentations.temporal_cropresize(data_numpy,number_of_frames, self.l_ratio, self.input_size)
 88 | 
 89 |         # randomly select  one of the spatial augmentations 
 90 |         flip_prob  = random.random()
 91 |         if flip_prob < 0.5:
 92 |                  data_numpy_v2 = augmentations.pose_augmentation(data_numpy_v2_crop)
 93 |         else:
 94 |                  data_numpy_v2 = augmentations.joint_courruption(data_numpy_v2_crop)
 95 | 
 96 | 
 97 |         # convert augmented views into input formats based on skeleton-representations
 98 | 
 99 |         if self.input_representations == "seq-based_and_graph-based" or self.input_representations == "seq-based_and_image-based"  :
100 | 
101 |             # Input View 1
102 |             #sequence-based input of view 1 ---> shpae (64 X 150)
103 |             input_s1_v1 = data_numpy_v1.transpose(1,2,0,3)
104 |             input_s1_v1 = input_s1_v1.reshape(-1,150).astype('float32')
105 |             #graph-based / image-based input of view 1 ---> shape (3, 64, 25, 2)
106 |             input_s2_v1 = data_numpy_v1.astype('float32')
107 | 
108 |             # Input View 2
109 |             #sequence-based input of view 2 ---> shpae (64 X 150)
110 |             input_s1_v2 = data_numpy_v2.transpose(1,2,0,3)
111 |             input_s1_v2 = input_s1_v2.reshape(-1,150).astype('float32')
112 |             #graph-based / image-based input of view 2 ---> shape (3, 64, 25, 2)
113 |             input_s2_v2 = data_numpy_v2.astype('float32')
114 | 
115 |         elif self.input_representations == "graph-based_and_image-based":
116 | 
117 |             # Input View 1
118 |             #graph-based and image-based inputs of view 1 ---> shape (3, 64, 25, 2)
119 |             input_s1_v1 = data_numpy_v1.astype('float32')
120 |             input_s2_v1 = data_numpy_v1.astype('float32')
121 | 
122 |             # Input View 2
123 |             #graph-based and image-based inputs of view 2 ---> shape (3, 64, 25, 2)
124 |             input_s1_v2 = data_numpy_v2.astype('float32')
125 |             input_s2_v2 = data_numpy_v2.astype('float32')
126 | 
127 |         elif self.input_representations == "seq-based_and_trans-based":
128 |             # Input View 1
129 |             #sequence-based input of view 1 ---> shpae (64 X 150)
130 |             input_s1_v1 = data_numpy_v1.transpose(1,2,0,3)
131 |             input_s1_v1 = input_s1_v1.reshape(-1,150).astype('float32')
132 |             #sequence-based input of view 1 ---> shpae (64 X 150)
133 |             input_s2_v1 = data_numpy_v1.transpose(1,2,0,3)
134 |             input_s2_v1 = input_s2_v1.reshape(-1,150).astype('float32')
135 | 
136 |             # Input View 2
137 |             #sequence-based input of view 2 ---> shpae (64 X 150)
138 |             input_s1_v2 = data_numpy_v2.transpose(1,2,0,3)
139 |             input_s1_v2 = input_s1_v2.reshape(-1,150).astype('float32')
140 |             #sequence-based input of view 1 ---> shpae (64 X 150)
141 |             input_s2_v2 = data_numpy_v2.transpose(1,2,0,3)
142 |             input_s2_v2 = input_s2_v2.reshape(-1,150).astype('float32')
143 | 
144 |         return input_s1_v1, input_s2_v1, input_s1_v2, input_s2_v2
145 | 


--------------------------------------------------------------------------------
/moco/utils/graph.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | class Graph():
  4 |     def __init__(self,
  5 |                  layout='openpose',
  6 |                  strategy='uniform',
  7 |                  max_hop=1,
  8 |                  dilation=1):
  9 |         self.max_hop = max_hop
 10 |         self.dilation = dilation
 11 | 
 12 |         self.get_edge(layout)
 13 |         self.hop_dis = get_hop_distance(
 14 |             self.num_node, self.edge, max_hop=max_hop)
 15 |         self.get_adjacency(strategy)
 16 | 
 17 |     def __str__(self):
 18 |         return self.A
 19 | 
 20 |     def get_edge(self, layout):
 21 |         if layout == 'openpose':
 22 |             self.num_node = 18
 23 |             self_link = [(i, i) for i in range(self.num_node)]
 24 |             neighbor_link = [(4, 3), (3, 2), (7, 6), (6, 5), (13, 12), (12,
 25 |                                                                         11),
 26 |                              (10, 9), (9, 8), (11, 5), (8, 2), (5, 1), (2, 1),
 27 |                              (0, 1), (15, 0), (14, 0), (17, 15), (16, 14)]
 28 |             self.edge = self_link + neighbor_link
 29 |             self.center = 1
 30 |         elif layout == 'ntu-rgb+d':
 31 |             self.num_node = 25
 32 |             self_link = [(i, i) for i in range(self.num_node)]
 33 |             neighbor_1base = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21),
 34 |                               (6, 5), (7, 6), (8, 7), (9, 21), (10, 9),
 35 |                               (11, 10), (12, 11), (13, 1), (14, 13), (15, 14),
 36 |                               (16, 15), (17, 1), (18, 17), (19, 18), (20, 19),
 37 |                               (22, 23), (23, 8), (24, 25), (25, 12)]
 38 |             neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base]
 39 |             self.edge = self_link + neighbor_link
 40 |             self.center = 21 - 1
 41 |         elif layout == 'ntu_edge':
 42 |             self.num_node = 24
 43 |             self_link = [(i, i) for i in range(self.num_node)]
 44 |             neighbor_1base = [(1, 2), (3, 2), (4, 3), (5, 2), (6, 5), (7, 6),
 45 |                               (8, 7), (9, 2), (10, 9), (11, 10), (12, 11),
 46 |                               (13, 1), (14, 13), (15, 14), (16, 15), (17, 1),
 47 |                               (18, 17), (19, 18), (20, 19), (21, 22), (22, 8),
 48 |                               (23, 24), (24, 12)]
 49 |             neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base]
 50 |             self.edge = self_link + neighbor_link
 51 |             self.center = 2
 52 |         # elif layout=='customer settings'
 53 |         #     pass
 54 |         else:
 55 |             raise ValueError("Do Not Exist This Layout.")
 56 | 
 57 |     def get_adjacency(self, strategy):
 58 |         valid_hop = range(0, self.max_hop + 1, self.dilation)
 59 |         adjacency = np.zeros((self.num_node, self.num_node))
 60 |         for hop in valid_hop:
 61 |             adjacency[self.hop_dis == hop] = 1
 62 |         normalize_adjacency = normalize_digraph(adjacency)
 63 | 
 64 |         if strategy == 'uniform':
 65 |             A = np.zeros((1, self.num_node, self.num_node))
 66 |             A[0] = normalize_adjacency
 67 |             self.A = A
 68 |         elif strategy == 'distance':
 69 |             A = np.zeros((len(valid_hop), self.num_node, self.num_node))
 70 |             for i, hop in enumerate(valid_hop):
 71 |                 A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis ==
 72 |                                                                 hop]
 73 |             self.A = A
 74 |         elif strategy == 'spatial':
 75 |             A = []
 76 |             for hop in valid_hop:
 77 |                 a_root = np.zeros((self.num_node, self.num_node))
 78 |                 a_close = np.zeros((self.num_node, self.num_node))
 79 |                 a_further = np.zeros((self.num_node, self.num_node))
 80 |                 for i in range(self.num_node):
 81 |                     for j in range(self.num_node):
 82 |                         if self.hop_dis[j, i] == hop:
 83 |                             if self.hop_dis[j, self.center] == self.hop_dis[
 84 |                                     i, self.center]:
 85 |                                 a_root[j, i] = normalize_adjacency[j, i]
 86 |                             elif self.hop_dis[j, self.
 87 |                                               center] > self.hop_dis[i, self.
 88 |                                                                      center]:
 89 |                                 a_close[j, i] = normalize_adjacency[j, i]
 90 |                             else:
 91 |                                 a_further[j, i] = normalize_adjacency[j, i]
 92 |                 if hop == 0:
 93 |                     A.append(a_root)
 94 |                 else:
 95 |                     A.append(a_root + a_close)
 96 |                     A.append(a_further)
 97 |             A = np.stack(A)
 98 |             self.A = A
 99 |         else:
100 |             raise ValueError("Do Not Exist This Strategy")
101 | 
102 | 
103 | def get_hop_distance(num_node, edge, max_hop=1):
104 |     A = np.zeros((num_node, num_node))
105 |     for i, j in edge:
106 |         A[j, i] = 1
107 |         A[i, j] = 1
108 | 
109 |     # compute hop steps
110 |     hop_dis = np.zeros((num_node, num_node)) + np.inf
111 |     transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
112 |     arrive_mat = (np.stack(transfer_mat) > 0)
113 |     for d in range(max_hop, -1, -1):
114 |         hop_dis[arrive_mat[d]] = d
115 |     return hop_dis
116 | 
117 | 
118 | def normalize_digraph(A):
119 |     Dl = np.sum(A, 0)
120 |     num_node = A.shape[0]
121 |     Dn = np.zeros((num_node, num_node))
122 |     for i in range(num_node):
123 |         if Dl[i] > 0:
124 |             Dn[i, i] = Dl[i]**(-1)
125 |     AD = np.dot(A, Dn)
126 |     return AD
127 | 
128 | 
129 | def normalize_undigraph(A):
130 |     Dl = np.sum(A, 0)
131 |     num_node = A.shape[0]
132 |     Dn = np.zeros((num_node, num_node))
133 |     for i in range(num_node):
134 |         if Dl[i] > 0:
135 |             Dn[i, i] = Dl[i]**(-0.5)
136 |     DAD = np.dot(np.dot(Dn, A), Dn)
137 |     return DAD
138 | 


--------------------------------------------------------------------------------
/data_gen/ntu_gendata.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pickle
  3 | from tqdm import tqdm
  4 | import sys
  5 | from numpy.lib.format import open_memmap
  6 | 
  7 | sys.path.extend(['../'])
  8 | from data_gen.preprocess import pre_normalization
  9 | 
 10 | # # ntu 60
 11 | # training_subjects = [
 12 | #     1, 2, 4, 5, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 27, 28, 31, 34, 35, 38
 13 | # ]
 14 | 
 15 | training_cameras = [2, 3]
 16 | 
 17 | # ntu 120
 18 | training_subjects = [
 19 |     1, 2, 4, 5, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 27, 28, 31, 34, 35, 38, 
 20 |     45, 46, 47, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 70, 74, 78,80, 81, 82, 
 21 |     83, 84, 85, 86, 89, 91, 92, 93, 94, 95, 97, 98, 100, 103
 22 | ]
 23 | training_setups = [ 2, 4,  6, 8, 10, 12, 14,  16,  18,  20, 22, 24, 26, 28, 30, 32]
 24 | 
 25 | max_body_true = 2
 26 | max_body_kinect = 4
 27 | num_joint = 25
 28 | max_frame = 300
 29 | 
 30 | import numpy as np
 31 | import os
 32 | 
 33 | 
 34 | def read_skeleton_filter(file):
 35 |     with open(file, 'r') as f:
 36 |         skeleton_sequence = {}
 37 |         skeleton_sequence['numFrame'] = int(f.readline())
 38 |         skeleton_sequence['frameInfo'] = []
 39 |         # num_body = 0
 40 |         for t in range(skeleton_sequence['numFrame']):
 41 |             frame_info = {}
 42 |             frame_info['numBody'] = int(f.readline())
 43 |             frame_info['bodyInfo'] = []
 44 | 
 45 |             for m in range(frame_info['numBody']):
 46 |                 body_info = {}
 47 |                 body_info_key = [
 48 |                     'bodyID', 'clipedEdges', 'handLeftConfidence',
 49 |                     'handLeftState', 'handRightConfidence', 'handRightState',
 50 |                     'isResticted', 'leanX', 'leanY', 'trackingState'
 51 |                 ]
 52 |                 body_info = {
 53 |                     k: float(v)
 54 |                     for k, v in zip(body_info_key, f.readline().split())
 55 |                 }
 56 |                 body_info['numJoint'] = int(f.readline())
 57 |                 body_info['jointInfo'] = []
 58 |                 for v in range(body_info['numJoint']):
 59 |                     joint_info_key = [
 60 |                         'x', 'y', 'z', 'depthX', 'depthY', 'colorX', 'colorY',
 61 |                         'orientationW', 'orientationX', 'orientationY',
 62 |                         'orientationZ', 'trackingState'
 63 |                     ]
 64 |                     joint_info = {
 65 |                         k: float(v)
 66 |                         for k, v in zip(joint_info_key, f.readline().split())
 67 |                     }
 68 |                     body_info['jointInfo'].append(joint_info)
 69 |                 frame_info['bodyInfo'].append(body_info)
 70 |             skeleton_sequence['frameInfo'].append(frame_info)
 71 | 
 72 |     return skeleton_sequence
 73 | 
 74 | 
 75 | def get_nonzero_std(s):  # tvc
 76 |     index = s.sum(-1).sum(-1) != 0  # select valid frames
 77 |     s = s[index]
 78 |     if len(s) != 0:
 79 |         s = s[:, :, 0].std() + s[:, :, 1].std() + s[:, :, 2].std()  # three channels
 80 |     else:
 81 |         s = 0
 82 |     return s
 83 | 
 84 | 
 85 | def read_xyz(file, max_body=4, num_joint=25):  # 取了前两个body
 86 |     seq_info = read_skeleton_filter(file)
 87 |     data = np.zeros((max_body, seq_info['numFrame'], num_joint, 3))
 88 |     for n, f in enumerate(seq_info['frameInfo']):
 89 |         for m, b in enumerate(f['bodyInfo']):
 90 |             for j, v in enumerate(b['jointInfo']):
 91 |                 if m < max_body and j < num_joint:
 92 |                     data[m, n, j, :] = [v['x'], v['y'], v['z']]
 93 |                 else:
 94 |                     pass
 95 | 
 96 |     # select two max energy body
 97 |     energy = np.array([get_nonzero_std(x) for x in data])
 98 |     index = energy.argsort()[::-1][0:max_body_true]
 99 |     data = data[index]
100 | 
101 |     data = data.transpose(3, 1, 2, 0)
102 |     return data
103 | 
104 | 
105 | def gendata(data_path, out_path, ignored_sample_path=None, benchmark='xview', part='eval'):
106 |     if ignored_sample_path != None:
107 |         with open(ignored_sample_path, 'r') as f:
108 |             ignored_samples = [
109 |                 line.strip() + '.skeleton' for line in f.readlines()
110 |             ]
111 |     else:
112 |         ignored_samples = []
113 |     sample_name = []
114 |     sample_label = []
115 |     for filename in os.listdir(data_path):
116 |         if filename in ignored_samples:
117 |             continue
118 |         action_class = int(
119 |             filename[filename.find('A') + 1:filename.find('A') + 4])
120 |         subject_id = int(
121 |             filename[filename.find('P') + 1:filename.find('P') + 4])
122 |         camera_id = int(
123 |             filename[filename.find('C') + 1:filename.find('C') + 4])
124 |         setup_id = int(
125 |             filename[filename.find('S') + 1:filename.find('S') + 4])
126 | 
127 |         if benchmark == 'xview':
128 |             istraining = (camera_id in training_cameras)
129 |         elif benchmark == 'xsub':
130 |             istraining = (subject_id in training_subjects)
131 |         elif benchmark == 'xsetup':
132 |             istraining = (setup_id in training_setups)
133 |         else:
134 |             raise ValueError()
135 | 
136 |         if part == 'train':
137 |             issample = istraining
138 |         elif part == 'val':
139 |             issample = not (istraining)
140 |         else:
141 |             raise ValueError()
142 | 
143 |         if issample:
144 |             sample_name.append(filename)
145 |             sample_label.append(action_class - 1)
146 | 
147 |     with open('{}/{}_label.pkl'.format(out_path, part), 'wb') as f:
148 |         pickle.dump((sample_name, list(sample_label)), f)
149 | 
150 |     fl = open_memmap(
151 |         '{}/{}_num_frame.npy'.format(out_path, part),
152 |         dtype='int',
153 |         mode='w+',
154 |         shape=(len(sample_label),))
155 | 
156 |     fp = np.zeros((len(sample_label), 3, max_frame, num_joint, max_body_true), dtype=np.float32)
157 | 
158 |     for i, s in enumerate(tqdm(sample_name)):
159 |         data = read_xyz(os.path.join(data_path, s), max_body=max_body_kinect, num_joint=num_joint)
160 |         fp[i, :, 0:data.shape[1], :, :] = data
161 |         fl[i] = data.shape[1] # num_frame
162 | 
163 |     fp = pre_normalization(fp)
164 |     np.save('{}/{}_data_joint.npy'.format(out_path, part), fp)
165 | 
166 | 
167 | if __name__ == '__main__':
168 |     parser = argparse.ArgumentParser(description='NTU-RGB-D Data Converter.')
169 |     # parser.add_argument('--data_path', default='/data/user/dataset/NTU/nturgb+d_skeletons_60/')
170 |     # parser.add_argument('--ignored_sample_path',
171 |     #                     default='resource/NTU_RGBD60_samples_with_missing_skeletons.txt')
172 |     # parser.add_argument('--out_folder', default='../data/NTU-RGB-D-60-AGCN/')
173 |     # benchmark = ['xsub', 'xview']
174 | 
175 |     parser.add_argument('--data_path', default='/data/user/dataset/NTU/nturgb+d_skeletons_120/')
176 |     parser.add_argument('--ignored_sample_path',
177 |                        default='resource/NTU_RGBD120_samples_with_missing_skeletons.txt')
178 |     parser.add_argument('--out_folder', default='../data/NTU-RGB-D-120-AGCN/')
179 |     benchmark = ['xsub','xsetup', ]
180 | 
181 |     part = ['train', 'val']
182 |     arg = parser.parse_args()
183 | 
184 |     for b in benchmark:
185 |         for p in part:
186 |             out_path = os.path.join(arg.out_folder, b)
187 |             if not os.path.exists(out_path):
188 |                 os.makedirs(out_path)
189 |             print(b, p)
190 |             gendata(
191 |                 arg.data_path,
192 |                 out_path,
193 |                 arg.ignored_sample_path,
194 |                 benchmark=b,
195 |                 part=p)
196 | 


--------------------------------------------------------------------------------
/data_gen/resource/NTU_RGBD60_samples_with_missing_skeletons.txt:
--------------------------------------------------------------------------------
  1 | S001C002P005R002A008
  2 | S001C002P006R001A008
  3 | S001C003P002R001A055
  4 | S001C003P002R002A012
  5 | S001C003P005R002A004
  6 | S001C003P005R002A005
  7 | S001C003P005R002A006
  8 | S001C003P006R002A008
  9 | S002C002P011R002A030
 10 | S002C003P008R001A020
 11 | S002C003P010R002A010
 12 | S002C003P011R002A007
 13 | S002C003P011R002A011
 14 | S002C003P014R002A007
 15 | S003C001P019R001A055
 16 | S003C002P002R002A055
 17 | S003C002P018R002A055
 18 | S003C003P002R001A055
 19 | S003C003P016R001A055
 20 | S003C003P018R002A024
 21 | S004C002P003R001A013
 22 | S004C002P008R001A009
 23 | S004C002P020R001A003
 24 | S004C002P020R001A004
 25 | S004C002P020R001A012
 26 | S004C002P020R001A020
 27 | S004C002P020R001A021
 28 | S004C002P020R001A036
 29 | S005C002P004R001A001
 30 | S005C002P004R001A003
 31 | S005C002P010R001A016
 32 | S005C002P010R001A017
 33 | S005C002P010R001A048
 34 | S005C002P010R001A049
 35 | S005C002P016R001A009
 36 | S005C002P016R001A010
 37 | S005C002P018R001A003
 38 | S005C002P018R001A028
 39 | S005C002P018R001A029
 40 | S005C003P016R002A009
 41 | S005C003P018R002A013
 42 | S005C003P021R002A057
 43 | S006C001P001R002A055
 44 | S006C002P007R001A005
 45 | S006C002P007R001A006
 46 | S006C002P016R001A043
 47 | S006C002P016R001A051
 48 | S006C002P016R001A052
 49 | S006C002P022R001A012
 50 | S006C002P023R001A020
 51 | S006C002P023R001A021
 52 | S006C002P023R001A022
 53 | S006C002P023R001A023
 54 | S006C002P024R001A018
 55 | S006C002P024R001A019
 56 | S006C003P001R002A013
 57 | S006C003P007R002A009
 58 | S006C003P007R002A010
 59 | S006C003P007R002A025
 60 | S006C003P016R001A060
 61 | S006C003P017R001A055
 62 | S006C003P017R002A013
 63 | S006C003P017R002A014
 64 | S006C003P017R002A015
 65 | S006C003P022R002A013
 66 | S007C001P018R002A050
 67 | S007C001P025R002A051
 68 | S007C001P028R001A050
 69 | S007C001P028R001A051
 70 | S007C001P028R001A052
 71 | S007C002P008R002A008
 72 | S007C002P015R002A055
 73 | S007C002P026R001A008
 74 | S007C002P026R001A009
 75 | S007C002P026R001A010
 76 | S007C002P026R001A011
 77 | S007C002P026R001A012
 78 | S007C002P026R001A050
 79 | S007C002P027R001A011
 80 | S007C002P027R001A013
 81 | S007C002P028R002A055
 82 | S007C003P007R001A002
 83 | S007C003P007R001A004
 84 | S007C003P019R001A060
 85 | S007C003P027R002A001
 86 | S007C003P027R002A002
 87 | S007C003P027R002A003
 88 | S007C003P027R002A004
 89 | S007C003P027R002A005
 90 | S007C003P027R002A006
 91 | S007C003P027R002A007
 92 | S007C003P027R002A008
 93 | S007C003P027R002A009
 94 | S007C003P027R002A010
 95 | S007C003P027R002A011
 96 | S007C003P027R002A012
 97 | S007C003P027R002A013
 98 | S008C002P001R001A009
 99 | S008C002P001R001A010
100 | S008C002P001R001A014
101 | S008C002P001R001A015
102 | S008C002P001R001A016
103 | S008C002P001R001A018
104 | S008C002P001R001A019
105 | S008C002P008R002A059
106 | S008C002P025R001A060
107 | S008C002P029R001A004
108 | S008C002P031R001A005
109 | S008C002P031R001A006
110 | S008C002P032R001A018
111 | S008C002P034R001A018
112 | S008C002P034R001A019
113 | S008C002P035R001A059
114 | S008C002P035R002A002
115 | S008C002P035R002A005
116 | S008C003P007R001A009
117 | S008C003P007R001A016
118 | S008C003P007R001A017
119 | S008C003P007R001A018
120 | S008C003P007R001A019
121 | S008C003P007R001A020
122 | S008C003P007R001A021
123 | S008C003P007R001A022
124 | S008C003P007R001A023
125 | S008C003P007R001A025
126 | S008C003P007R001A026
127 | S008C003P007R001A028
128 | S008C003P007R001A029
129 | S008C003P007R002A003
130 | S008C003P008R002A050
131 | S008C003P025R002A002
132 | S008C003P025R002A011
133 | S008C003P025R002A012
134 | S008C003P025R002A016
135 | S008C003P025R002A020
136 | S008C003P025R002A022
137 | S008C003P025R002A023
138 | S008C003P025R002A030
139 | S008C003P025R002A031
140 | S008C003P025R002A032
141 | S008C003P025R002A033
142 | S008C003P025R002A049
143 | S008C003P025R002A060
144 | S008C003P031R001A001
145 | S008C003P031R002A004
146 | S008C003P031R002A014
147 | S008C003P031R002A015
148 | S008C003P031R002A016
149 | S008C003P031R002A017
150 | S008C003P032R002A013
151 | S008C003P033R002A001
152 | S008C003P033R002A011
153 | S008C003P033R002A012
154 | S008C003P034R002A001
155 | S008C003P034R002A012
156 | S008C003P034R002A022
157 | S008C003P034R002A023
158 | S008C003P034R002A024
159 | S008C003P034R002A044
160 | S008C003P034R002A045
161 | S008C003P035R002A016
162 | S008C003P035R002A017
163 | S008C003P035R002A018
164 | S008C003P035R002A019
165 | S008C003P035R002A020
166 | S008C003P035R002A021
167 | S009C002P007R001A001
168 | S009C002P007R001A003
169 | S009C002P007R001A014
170 | S009C002P008R001A014
171 | S009C002P015R002A050
172 | S009C002P016R001A002
173 | S009C002P017R001A028
174 | S009C002P017R001A029
175 | S009C003P017R002A030
176 | S009C003P025R002A054
177 | S010C001P007R002A020
178 | S010C002P016R002A055
179 | S010C002P017R001A005
180 | S010C002P017R001A018
181 | S010C002P017R001A019
182 | S010C002P019R001A001
183 | S010C002P025R001A012
184 | S010C003P007R002A043
185 | S010C003P008R002A003
186 | S010C003P016R001A055
187 | S010C003P017R002A055
188 | S011C001P002R001A008
189 | S011C001P018R002A050
190 | S011C002P008R002A059
191 | S011C002P016R002A055
192 | S011C002P017R001A020
193 | S011C002P017R001A021
194 | S011C002P018R002A055
195 | S011C002P027R001A009
196 | S011C002P027R001A010
197 | S011C002P027R001A037
198 | S011C003P001R001A055
199 | S011C003P002R001A055
200 | S011C003P008R002A012
201 | S011C003P015R001A055
202 | S011C003P016R001A055
203 | S011C003P019R001A055
204 | S011C003P025R001A055
205 | S011C003P028R002A055
206 | S012C001P019R001A060
207 | S012C001P019R002A060
208 | S012C002P015R001A055
209 | S012C002P017R002A012
210 | S012C002P025R001A060
211 | S012C003P008R001A057
212 | S012C003P015R001A055
213 | S012C003P015R002A055
214 | S012C003P016R001A055
215 | S012C003P017R002A055
216 | S012C003P018R001A055
217 | S012C003P018R001A057
218 | S012C003P019R002A011
219 | S012C003P019R002A012
220 | S012C003P025R001A055
221 | S012C003P027R001A055
222 | S012C003P027R002A009
223 | S012C003P028R001A035
224 | S012C003P028R002A055
225 | S013C001P015R001A054
226 | S013C001P017R002A054
227 | S013C001P018R001A016
228 | S013C001P028R001A040
229 | S013C002P015R001A054
230 | S013C002P017R002A054
231 | S013C002P028R001A040
232 | S013C003P008R002A059
233 | S013C003P015R001A054
234 | S013C003P017R002A054
235 | S013C003P025R002A022
236 | S013C003P027R001A055
237 | S013C003P028R001A040
238 | S014C001P027R002A040
239 | S014C002P015R001A003
240 | S014C002P019R001A029
241 | S014C002P025R002A059
242 | S014C002P027R002A040
243 | S014C002P039R001A050
244 | S014C003P007R002A059
245 | S014C003P015R002A055
246 | S014C003P019R002A055
247 | S014C003P025R001A048
248 | S014C003P027R002A040
249 | S015C001P008R002A040
250 | S015C001P016R001A055
251 | S015C001P017R001A055
252 | S015C001P017R002A055
253 | S015C002P007R001A059
254 | S015C002P008R001A003
255 | S015C002P008R001A004
256 | S015C002P008R002A040
257 | S015C002P015R001A002
258 | S015C002P016R001A001
259 | S015C002P016R002A055
260 | S015C003P008R002A007
261 | S015C003P008R002A011
262 | S015C003P008R002A012
263 | S015C003P008R002A028
264 | S015C003P008R002A040
265 | S015C003P025R002A012
266 | S015C003P025R002A017
267 | S015C003P025R002A020
268 | S015C003P025R002A021
269 | S015C003P025R002A030
270 | S015C003P025R002A033
271 | S015C003P025R002A034
272 | S015C003P025R002A036
273 | S015C003P025R002A037
274 | S015C003P025R002A044
275 | S016C001P019R002A040
276 | S016C001P025R001A011
277 | S016C001P025R001A012
278 | S016C001P025R001A060
279 | S016C001P040R001A055
280 | S016C001P040R002A055
281 | S016C002P008R001A011
282 | S016C002P019R002A040
283 | S016C002P025R002A012
284 | S016C003P008R001A011
285 | S016C003P008R002A002
286 | S016C003P008R002A003
287 | S016C003P008R002A004
288 | S016C003P008R002A006
289 | S016C003P008R002A009
290 | S016C003P019R002A040
291 | S016C003P039R002A016
292 | S017C001P016R002A031
293 | S017C002P007R001A013
294 | S017C002P008R001A009
295 | S017C002P015R001A042
296 | S017C002P016R002A031
297 | S017C002P016R002A055
298 | S017C003P007R002A013
299 | S017C003P008R001A059
300 | S017C003P016R002A031
301 | S017C003P017R001A055
302 | S017C003P020R001A059
303 | 


--------------------------------------------------------------------------------
/options/options_classification.py:
--------------------------------------------------------------------------------
  1 | class  opts_ntu_60_cross_view():
  2 | 
  3 |     def __init__(self):
  4 | 
  5 |         # graph based model
  6 |         self.agcn_model_args = {
  7 |         "num_class": 60,
  8 |         "num_point": 25,
  9 |         "num_person": 2,
 10 |         'graph_args': {
 11 |             'labeling_mode': 'spatial'}
 12 |         }
 13 | 
 14 |         #image based model
 15 |         self.hcn_model_args = {
 16 |         "in_channel":3,
 17 |         "out_channel":64,
 18 |         "window_size":64,
 19 |         "num_joint":25,
 20 |         "num_person":2,
 21 |         "num_class":60
 22 |         }
 23 | 
 24 |         #Sequence based model
 25 |         self.bi_gru_model_args = {
 26 |         "en_input_size":150,
 27 |         "en_hidden_size":1024,
 28 |         "en_num_layers":3,
 29 |         "num_class":60
 30 |         }
 31 |     
 32 |         # feeder
 33 |         self.train_feeder_args = {
 34 |         'data_path': './data/NTU-RGB-D-60-AGCN/xview/train_data_joint.npy',
 35 |         'label_path': './data/NTU-RGB-D-60-AGCN/xview/train_label.pkl',
 36 |         'num_frame_path': './data/NTU-RGB-D-60-AGCN/xview/train_num_frame.npy',
 37 |         'l_ratio': [0.5,1.0],
 38 |         'input_size': 64
 39 |         }
 40 |         
 41 |         self.test_feeder_args = {
 42 | 
 43 |         'data_path': './data/NTU-RGB-D-60-AGCN/xview/val_data_joint.npy',
 44 |         'label_path': './data/NTU-RGB-D-60-AGCN/xview/val_label.pkl',
 45 |         'num_frame_path': './data/NTU-RGB-D-60-AGCN/xview/val_num_frame.npy',
 46 |         'l_ratio': [0.95],
 47 |         'input_size': 64
 48 |         }
 49 | 
 50 | class  opts_ntu_60_cross_subject():
 51 | 
 52 |     def __init__(self):
 53 | 
 54 |         # graph based model
 55 |         self.agcn_model_args = {
 56 |             "num_class": 60,
 57 |             "num_point": 25,
 58 |             "num_person": 2,
 59 |             'graph_args': {
 60 |                 'labeling_mode': 'spatial'}
 61 |         }
 62 | 
 63 |         #image based model
 64 |         self.hcn_model_args = {
 65 |             "in_channel":3,
 66 |             "out_channel":64,
 67 |             "window_size":64,
 68 |             "num_joint":25,
 69 |             "num_person":2,
 70 |             "num_class":60
 71 |             }
 72 | 
 73 |         #Sequence based model
 74 |         self.bi_gru_model_args = {
 75 |             "en_input_size":150,
 76 |             "en_hidden_size":1024,
 77 |             "en_num_layers":3,
 78 |             "num_class":60
 79 |             }
 80 |         
 81 |         # feeder
 82 |         self.train_feeder_args = {
 83 |             'data_path': './data/NTU-RGB-D-60-AGCN/xsub/train_data_joint.npy',
 84 |             'label_path': './data/NTU-RGB-D-60-AGCN/xsub/train_label.pkl',
 85 |             'num_frame_path': './data/NTU-RGB-D-60-AGCN/xsub/train_num_frame.npy',
 86 |             'l_ratio': [0.5,1.0],
 87 |             'input_size': 64
 88 |         }
 89 |         
 90 |         self.test_feeder_args = {
 91 | 
 92 |             'data_path': './data/NTU-RGB-D-60-AGCN/xsub/val_data_joint.npy',
 93 |             'label_path': './data/NTU-RGB-D-60-AGCN/xsub/val_label.pkl',
 94 |             'num_frame_path': './data/NTU-RGB-D-60-AGCN/xsub/val_num_frame.npy',
 95 |             'l_ratio': [0.95],
 96 |             'input_size': 64
 97 |         }
 98 | 
 99 | class  opts_ntu_120_cross_subject():
100 |     def __init__(self):
101 | 
102 |         # graph based model
103 |         self.agcn_model_args = {
104 |             "num_class": 120,
105 |             "num_point": 25,
106 |             "num_person": 2,
107 |             'graph_args': {
108 |                 'labeling_mode': 'spatial'}
109 |         }
110 | 
111 |         #image based model
112 |         self.hcn_model_args = {
113 |             "in_channel":3,
114 |             "out_channel":64,
115 |             "window_size":64,
116 |             "num_joint":25,
117 |             "num_person":2,
118 |             "num_class":120
119 |             }
120 | 
121 |         #Sequence based model
122 |         self.bi_gru_model_args = {
123 |             "en_input_size":150,
124 |             "en_hidden_size":1024,
125 |             "en_num_layers":3,
126 |             "num_class":120
127 |             }
128 |         
129 |         # feeder
130 |         self.train_feeder_args = {
131 |             'data_path': './data/NTU-RGB-D-120-AGCN/xsub/train_data_joint.npy',
132 |             'label_path': './data/NTU-RGB-D-120-AGCN/xsub/train_label.pkl',
133 |             'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsub/train_num_frame.npy',
134 |             'l_ratio': [0.5,1.0],
135 |             'input_size': 64
136 |         }
137 |         
138 |         self.test_feeder_args = {
139 | 
140 |             'data_path': './data/NTU-RGB-D-120-AGCN/xsub/val_data_joint.npy',
141 |             'label_path': './data/NTU-RGB-D-120-AGCN/xsub/val_label.pkl',
142 |             'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsub/val_num_frame.npy',
143 |             'l_ratio': [0.95],
144 |             'input_size': 64
145 |         }
146 | 
147 | class  opts_ntu_120_cross_setup():
148 | 
149 |     def __init__(self):
150 | 
151 |         # graph based model
152 |         self.agcn_model_args = {
153 |             "num_class": 120,
154 |             "num_point": 25,
155 |             "num_person": 2,
156 |             'graph_args': {
157 |                 'labeling_mode': 'spatial'}
158 |         }
159 | 
160 |         #image based model
161 |         self.hcn_model_args = {
162 |             "in_channel":3,
163 |             "out_channel":64,
164 |             "window_size":64,
165 |             "num_joint":25,
166 |             "num_person":2,
167 |             "num_class":120
168 |             }
169 | 
170 |         #Sequence based model
171 |         self.bi_gru_model_args = {
172 |             "en_input_size":150,
173 |             "en_hidden_size":1024,
174 |             "en_num_layers":3,
175 |             "num_class":120
176 |             }
177 |         
178 |         # feeder
179 |         self.train_feeder_args = {
180 |             'data_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_data_joint.npy',
181 |             'label_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_label.pkl',
182 |             'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsetup/train_num_frame.npy',
183 |             'l_ratio': [0.5,1.0],
184 |             'input_size': 64
185 |         }
186 |         
187 |         self.test_feeder_args = {
188 | 
189 |             'data_path': './data/NTU-RGB-D-120-AGCN/xsetup/val_data_joint.npy',
190 |             'label_path': './data/NTU-RGB-D-120-AGCN/xsetup/val_label.pkl',
191 |             'num_frame_path': './data/NTU-RGB-D-120-AGCN/xsetup/val_num_frame.npy',
192 |             'l_ratio': [0.95],
193 |             'input_size': 64
194 |         }
195 | 
196 | 
197 | class  opts_pku_v2_cross_view():
198 | 
199 |     def __init__(self):
200 | 
201 |         # graph based model
202 |         self.agcn_model_args = {
203 |             "num_class": 120,
204 |             "num_point": 25,
205 |             "num_person": 2,
206 |             'graph_args': {
207 |                 'labeling_mode': 'spatial'}
208 |         }
209 | 
210 |         #image based model
211 |         self.hcn_model_args = {
212 |             "in_channel":3,
213 |             "out_channel":64,
214 |             "window_size":64,
215 |             "num_joint":25,
216 |             "num_person":2,
217 |             "num_class":120
218 |             }
219 | 
220 |         #Sequence based model
221 |         self.bi_gru_model_args = {
222 |             "en_input_size":150,
223 |             "en_hidden_size":1024,
224 |             "en_num_layers":3,
225 |             "num_class":120
226 |             }
227 |         
228 |         # feeder
229 |         self.train_feeder_args = {
230 |             'data_path': './data/PKU-MMD-v2-AGCN/xview/train_data_joint.npy',
231 |             'label_path': './data/PKU-MMD-v2-AGCN/xview/train_label.pkl',
232 |             'num_frame_path': './data/PKU-MMD-v2-AGCN/xview/train_num_frame.npy',
233 |             'l_ratio': [0.5,1.0],
234 |             'input_size': 64
235 |         }
236 |         
237 |         self.test_feeder_args = {
238 | 
239 |             'data_path': './data/PKU-MMD-v2-AGCN/xview/val_data_joint.npy',
240 |             'label_path': './data/PKU-MMD-v2-AGCN/xview/val_label.pkl',
241 |             'num_frame_path': './data/PKU-MMD-v2-AGCN/xview/val_num_frame.npy',
242 |             'l_ratio': [0.95],
243 |             'input_size': 64
244 |         }
245 | 
246 | class  opts_pku_v2_cross_subject():
247 | 
248 |     def __init__(self):
249 | 
250 |         # graph based model
251 |         self.agcn_model_args = {
252 |             "num_class": 120,
253 |             "num_point": 25,
254 |             "num_person": 2,
255 |             'graph_args': {
256 |                 'labeling_mode': 'spatial'}
257 |         }
258 | 
259 |         #image based model
260 |         self.hcn_model_args = {
261 |             "in_channel":3,
262 |             "out_channel":64,
263 |             "window_size":64,
264 |             "num_joint":25,
265 |             "num_person":2,
266 |             "num_class":51
267 |             }
268 | 
269 |         #Sequence based model
270 |         self.bi_gru_model_args = {
271 |             "en_input_size":150,
272 |             "en_hidden_size":1024,
273 |             "en_num_layers":3,
274 |             "num_class":51
275 |             }
276 |         
277 |         # feeder
278 |         self.train_feeder_args = {
279 |             'data_path': './data/PKU-MMD-v2-AGCN/xsub/train_data_joint.npy',
280 |             'label_path': './data/PKU-MMD-v2-AGCN/xsub/train_label.pkl',
281 |             'num_frame_path': './data/PKU-MMD-v2-AGCN/xsub/train_num_frame.npy',
282 |             'l_ratio': [0.5,1.0],
283 |             'input_size': 64
284 |         }
285 |         
286 |         self.test_feeder_args = {
287 | 
288 |             'data_path': './data/PKU-MMD-v2-AGCN/xsub/val_data_joint.npy',
289 |             'label_path': './data/PKU-MMD-v2-AGCN/xsub/val_label.pkl',
290 |             'num_frame_path': './data/PKU-MMD-v2-AGCN/xsub/val_num_frame.npy',
291 |             'l_ratio': [0.95],
292 |             'input_size': 64
293 |         }
294 | 


--------------------------------------------------------------------------------
/moco/builder_cmd.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from .GRU import BIGRU
  6 | 
  7 | def loss_kld(inputs, targets):
  8 |     inputs = F.log_softmax(inputs, dim=1)
  9 |     targets = F.softmax(targets, dim=1)
 10 |     return F.kl_div(inputs, targets, reduction='batchmean')
 11 | 
 12 | # initilize weight
 13 | def weights_init_gru(model):
 14 |     with torch.no_grad():
 15 |         for child in list(model.children()):
 16 |             print(child)
 17 |             for param in list(child.parameters()):
 18 |                   if param.dim() == 2:
 19 |                         nn.init.xavier_uniform_(param)
 20 |     print('GRU weights initialization finished!')
 21 | 
 22 | class MoCo(nn.Module):
 23 |     def __init__(self, skeleton_representation, args_bi_gru, dim=128, K=65536, m=0.999, T=0.07,
 24 |                  teacher_T=0.05, student_T=0.1, cmd_weight=1.0, topk=1024, mlp=False, pretrain=True):
 25 |         super(MoCo, self).__init__()
 26 |         self.pretrain = pretrain
 27 |         self.Bone = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5), (7, 6), (8, 7), (9, 21),
 28 |                      (10, 9), (11, 10), (12, 11), (13, 1), (14, 13), (15, 14), (16, 15), (17, 1),
 29 |                      (18, 17), (19, 18), (20, 19), (21, 21), (22, 23), (23, 8), (24, 25), (25, 12)]
 30 | 
 31 |         if not self.pretrain:
 32 |             self.encoder_q = BIGRU(**args_bi_gru)
 33 |             self.encoder_q_motion = BIGRU(**args_bi_gru)
 34 |             self.encoder_q_bone = BIGRU(**args_bi_gru)
 35 |             weights_init_gru(self.encoder_q)
 36 |             weights_init_gru(self.encoder_q_motion)
 37 |             weights_init_gru(self.encoder_q_bone)
 38 |         else:
 39 |             self.K = K
 40 |             self.m = m
 41 |             self.T = T
 42 |             self.teacher_T = teacher_T
 43 |             self.student_T = student_T
 44 |             self.cmd_weight = cmd_weight
 45 |             self.topk = topk
 46 |             mlp=mlp
 47 |             print(" MoCo parameters",K,m,T,mlp)
 48 |             print(" CMD parameters: teacher-T %.2f, student-T %.2f, cmd-weight: %.2f, topk: %d"%(teacher_T,student_T,cmd_weight,topk))
 49 |             print(skeleton_representation)
 50 | 
 51 | 
 52 |             self.encoder_q = BIGRU(**args_bi_gru)
 53 |             self.encoder_k = BIGRU(**args_bi_gru)
 54 |             self.encoder_q_motion = BIGRU(**args_bi_gru)
 55 |             self.encoder_k_motion = BIGRU(**args_bi_gru)
 56 |             self.encoder_q_bone = BIGRU(**args_bi_gru)
 57 |             self.encoder_k_bone = BIGRU(**args_bi_gru)
 58 |             weights_init_gru(self.encoder_q)
 59 |             weights_init_gru(self.encoder_q_motion)
 60 |             weights_init_gru(self.encoder_q_bone)
 61 |             weights_init_gru(self.encoder_k)
 62 |             weights_init_gru(self.encoder_k_motion)
 63 |             weights_init_gru(self.encoder_k_bone)
 64 | 
 65 |             #projection heads
 66 |             if mlp:  # hack: brute-force replacement
 67 |                 dim_mlp = self.encoder_q.fc.weight.shape[1]
 68 |                 self.encoder_q.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp),
 69 |                                                     nn.ReLU(),
 70 |                                                     self.encoder_q.fc)
 71 |                 self.encoder_k.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp),
 72 |                                                     nn.ReLU(),
 73 |                                                     self.encoder_k.fc)
 74 |                 self.encoder_q_motion.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp),
 75 |                                                             nn.ReLU(),
 76 |                                                             self.encoder_q_motion.fc)
 77 |                 self.encoder_k_motion.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp),
 78 |                                                             nn.ReLU(),
 79 |                                                             self.encoder_k_motion.fc)
 80 |                 self.encoder_q_bone.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp),
 81 |                                                         nn.ReLU(),
 82 |                                                         self.encoder_q_bone.fc)
 83 |                 self.encoder_k_bone.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp),
 84 |                                                         nn.ReLU(),
 85 |                                                         self.encoder_k_bone.fc)
 86 | 
 87 |             for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()):
 88 |                 param_k.data.copy_(param_q.data)    # initialize
 89 |                 param_k.requires_grad = False       # not update by gradient
 90 |             for param_q, param_k in zip(self.encoder_q_motion.parameters(), self.encoder_k_motion.parameters()):
 91 |                 param_k.data.copy_(param_q.data)
 92 |                 param_k.requires_grad = False
 93 |             for param_q, param_k in zip(self.encoder_q_bone.parameters(), self.encoder_k_bone.parameters()):
 94 |                 param_k.data.copy_(param_q.data)
 95 |                 param_k.requires_grad = False
 96 | 
 97 |             # create the queue
 98 |             self.register_buffer("queue", torch.randn(dim, self.K))
 99 |             self.queue = F.normalize(self.queue, dim=0)
100 |             self.register_buffer("queue_ptr", torch.zeros(1, dtype=torch.long))
101 | 
102 |             self.register_buffer("queue_motion", torch.randn(dim, self.K))
103 |             self.queue_motion = F.normalize(self.queue_motion, dim=0)
104 |             self.register_buffer("queue_ptr_motion", torch.zeros(1, dtype=torch.long))
105 | 
106 |             self.register_buffer("queue_bone", torch.randn(dim, self.K))
107 |             self.queue_bone = F.normalize(self.queue_bone, dim=0)
108 |             self.register_buffer("queue_ptr_bone", torch.zeros(1, dtype=torch.long))
109 | 
110 |     @torch.no_grad()
111 |     def _momentum_update_key_encoder(self):
112 |         """
113 |         Momentum update of the key encoder
114 |         """
115 |         for param_q, param_k in zip(self.encoder_q.parameters(), self.encoder_k.parameters()):
116 |             param_k.data = param_k.data * self.m + param_q.data * (1. - self.m)
117 | 
118 |     @torch.no_grad()
119 |     def _momentum_update_key_encoder_motion(self):
120 |         for param_q, param_k in zip(self.encoder_q_motion.parameters(), self.encoder_k_motion.parameters()):
121 |             param_k.data = param_k.data * self.m + param_q.data * (1. - self.m)
122 | 
123 |     @torch.no_grad()
124 |     def _momentum_update_key_encoder_bone(self):
125 |         for param_q, param_k in zip(self.encoder_q_bone.parameters(), self.encoder_k_bone.parameters()):
126 |             param_k.data = param_k.data * self.m + param_q.data * (1. - self.m)
127 | 
128 | 
129 |     @torch.no_grad()
130 |     def _dequeue_and_enqueue(self, keys):
131 |         batch_size = keys.shape[0]
132 |         ptr = int(self.queue_ptr)
133 |         self.queue[:, ptr:ptr + batch_size] = keys.T
134 |         ptr = (ptr + batch_size) % self.K  # move pointer
135 |         self.queue_ptr[0] = ptr
136 | 
137 |     @torch.no_grad()
138 |     def _dequeue_and_enqueue_motion(self, keys):
139 |         batch_size = keys.shape[0]
140 |         ptr = int(self.queue_ptr_motion)
141 |         self.queue_motion[:, ptr:ptr + batch_size] = keys.T
142 |         ptr = (ptr + batch_size) % self.K  # move pointer
143 |         self.queue_ptr_motion[0] = ptr
144 | 
145 |     @torch.no_grad()
146 |     def _dequeue_and_enqueue_bone(self, keys):
147 |         batch_size = keys.shape[0]
148 |         ptr = int(self.queue_ptr_bone)
149 |         self.queue_bone[:, ptr:ptr + batch_size] = keys.T
150 |         ptr = (ptr + batch_size) % self.K  # move pointer
151 |         self.queue_ptr_bone[0] = ptr
152 | 
153 | 
154 |     def forward(self, im_q, im_k=None, view='joint', knn_eval=False):
155 |         im_q_motion = torch.zeros_like(im_q)
156 |         im_q_motion[:, :, :-1, :, :] = im_q[:, :, 1:, :, :] - im_q[:, :, :-1, :, :]
157 | 
158 |         im_q_bone = torch.zeros_like(im_q)
159 |         for v1, v2 in self.Bone:
160 |             im_q_bone[:, :, :, v1 - 1, :] = im_q[:, :, :, v1 - 1, :] - im_q[:, :, :, v2 - 1, :]
161 | 
162 |         # Permute and Reshape
163 |         N, C, T, V, M = im_q.size()
164 |         im_q = im_q.permute(0,2,3,1,4).reshape(N,T,-1)
165 |         im_q_motion = im_q_motion.permute(0,2,3,1,4).reshape(N,T,-1)
166 |         im_q_bone = im_q_bone.permute(0,2,3,1,4).reshape(N,T,-1)
167 | 
168 |         if not self.pretrain:
169 |             if view == 'joint':
170 |                 return self.encoder_q(im_q, knn_eval)
171 |             elif view == 'motion':
172 |                 return self.encoder_q_motion(im_q_motion, knn_eval)
173 |             elif view == 'bone':
174 |                 return self.encoder_q_bone(im_q_bone, knn_eval)
175 |             elif view == 'all':
176 |                 return (self.encoder_q(im_q, knn_eval) + \
177 |                         self.encoder_q_motion(im_q_motion, knn_eval) + \
178 |                             self.encoder_q_bone(im_q_bone, knn_eval)) / 3.
179 |             else:
180 |                 raise ValueError        
181 |         
182 |         im_k_motion = torch.zeros_like(im_k)
183 |         im_k_motion[:, :, :-1, :, :] = im_k[:, :, 1:, :, :] - im_k[:, :, :-1, :, :]
184 | 
185 |         im_k_bone = torch.zeros_like(im_k)
186 |         for v1, v2 in self.Bone:
187 |             im_k_bone[:, :, :, v1 - 1, :] = im_k[:, :, :, v1 - 1, :] - im_k[:, :, :, v2 - 1, :]
188 | 
189 |         # Permute and Reshape
190 |         im_k = im_k.permute(0,2,3,1,4).reshape(N,T,-1)
191 |         im_k_motion = im_k_motion.permute(0,2,3,1,4).reshape(N,T,-1)
192 |         im_k_bone = im_k_bone.permute(0,2,3,1,4).reshape(N,T,-1)
193 | 
194 |         # compute query features
195 |         q = self.encoder_q(im_q)  # queries: NxC
196 |         q = F.normalize(q, dim=1)
197 | 
198 |         q_motion = self.encoder_q_motion(im_q_motion)
199 |         q_motion = F.normalize(q_motion, dim=1)
200 | 
201 |         q_bone = self.encoder_q_bone(im_q_bone)
202 |         q_bone = F.normalize(q_bone, dim=1)
203 | 
204 |         # compute key features for  s1 and  s2  skeleton representations 
205 |         with torch.no_grad():  # no gradient to keys
206 |             self._momentum_update_key_encoder()  # update the key encoder
207 |             self._momentum_update_key_encoder_motion()
208 |             self._momentum_update_key_encoder_bone()
209 | 
210 |             k = self.encoder_k(im_k)  # keys: NxC
211 |             k = F.normalize(k, dim=1)
212 | 
213 |             k_motion = self.encoder_k_motion(im_k_motion)
214 |             k_motion = F.normalize(k_motion, dim=1)
215 | 
216 |             k_bone = self.encoder_k_bone(im_k_bone)
217 |             k_bone = F.normalize(k_bone, dim=1)
218 | 
219 |         # MOCO
220 |         # compute logits
221 |         # Einstein sum is more intuitive
222 |         # positive logits: Nx1
223 |         l_pos = torch.einsum('nc,nc->n', [q, k]).unsqueeze(-1)
224 |         # negative logits: NxK
225 |         l_neg = torch.einsum('nc,ck->nk', [q, self.queue.clone().detach()])
226 | 
227 |         l_pos_motion = torch.einsum('nc,nc->n', [q_motion, k_motion]).unsqueeze(-1)
228 |         l_neg_motion = torch.einsum('nc,ck->nk', [q_motion, self.queue_motion.clone().detach()])
229 | 
230 |         l_pos_bone = torch.einsum('nc,nc->n', [q_bone, k_bone]).unsqueeze(-1)
231 |         l_neg_bone = torch.einsum('nc,ck->nk', [q_bone, self.queue_bone.clone().detach()])
232 | 
233 |         # CMD loss
234 |         lk_neg = torch.einsum('nc,ck->nk', [k, self.queue.clone().detach()])
235 |         lk_neg_motion = torch.einsum('nc,ck->nk', [k_motion, self.queue_motion.clone().detach()])
236 |         lk_neg_bone = torch.einsum('nc,ck->nk', [k_bone, self.queue_bone.clone().detach()])
237 | 
238 |         # Top-k
239 |         lk_neg_topk, topk_idx = torch.topk(lk_neg, self.topk, dim=-1)
240 |         lk_neg_motion_topk, motion_topk_idx = torch.topk(lk_neg_motion, self.topk, dim=-1)
241 |         lk_neg_bone_topk, bone_topk_idx = torch.topk(lk_neg_bone, self.topk, dim=-1)
242 | 
243 |         loss_cmd = loss_kld(torch.gather(l_neg_motion, -1, topk_idx) / self.student_T, lk_neg_topk / self.teacher_T) + \
244 |                    loss_kld(torch.gather(l_neg_bone, -1, topk_idx) / self.student_T, lk_neg_topk / self.teacher_T) + \
245 |                    loss_kld(torch.gather(l_neg, -1, motion_topk_idx) / self.student_T, lk_neg_motion_topk / self.teacher_T) + \
246 |                    loss_kld(torch.gather(l_neg_bone, -1, motion_topk_idx) / self.student_T, lk_neg_motion_topk / self.teacher_T) + \
247 |                    loss_kld(torch.gather(l_neg, -1, bone_topk_idx) / self.student_T, lk_neg_bone_topk / self.teacher_T) + \
248 |                    loss_kld(torch.gather(l_neg_motion, -1, bone_topk_idx) / self.student_T, lk_neg_bone_topk / self.teacher_T)
249 | 
250 |         # logits: Nx(1+K)
251 |         logits = torch.cat([l_pos, l_neg], dim=1)
252 |         logits_motion = torch.cat([l_pos_motion, l_neg_motion], dim=1)
253 |         logits_bone = torch.cat([l_pos_bone, l_neg_bone], dim=1)
254 | 
255 |         # apply temperature
256 |         logits /= self.T
257 |         logits_motion /= self.T
258 |         logits_bone /= self.T
259 | 
260 |         # labels: positive key indicators
261 |         labels = torch.zeros(logits.shape[0], dtype=torch.long).cuda()
262 | 
263 |         # dequeue and enqueue
264 |         self._dequeue_and_enqueue(k)
265 |         self._dequeue_and_enqueue_motion(k_motion)
266 |         self._dequeue_and_enqueue_bone(k_bone)
267 | 
268 |         return logits, logits_motion, logits_bone, labels, loss_cmd * self.cmd_weight


--------------------------------------------------------------------------------
/data_gen/resource/NTU_RGBD120_samples_with_missing_skeletons.txt:
--------------------------------------------------------------------------------
  1 | S001C002P005R002A008
  2 | S001C002P006R001A008
  3 | S001C003P002R001A055
  4 | S001C003P002R002A012
  5 | S001C003P005R002A004
  6 | S001C003P005R002A005
  7 | S001C003P005R002A006
  8 | S001C003P006R002A008
  9 | S002C002P011R002A030
 10 | S002C003P008R001A020
 11 | S002C003P010R002A010
 12 | S002C003P011R002A007
 13 | S002C003P011R002A011
 14 | S002C003P014R002A007
 15 | S003C001P019R001A055
 16 | S003C002P002R002A055
 17 | S003C002P018R002A055
 18 | S003C003P002R001A055
 19 | S003C003P016R001A055
 20 | S003C003P018R002A024
 21 | S004C002P003R001A013
 22 | S004C002P008R001A009
 23 | S004C002P020R001A003
 24 | S004C002P020R001A004
 25 | S004C002P020R001A012
 26 | S004C002P020R001A020
 27 | S004C002P020R001A021
 28 | S004C002P020R001A036
 29 | S005C002P004R001A001
 30 | S005C002P004R001A003
 31 | S005C002P010R001A016
 32 | S005C002P010R001A017
 33 | S005C002P010R001A048
 34 | S005C002P010R001A049
 35 | S005C002P016R001A009
 36 | S005C002P016R001A010
 37 | S005C002P018R001A003
 38 | S005C002P018R001A028
 39 | S005C002P018R001A029
 40 | S005C003P016R002A009
 41 | S005C003P018R002A013
 42 | S005C003P021R002A057
 43 | S006C001P001R002A055
 44 | S006C002P007R001A005
 45 | S006C002P007R001A006
 46 | S006C002P016R001A043
 47 | S006C002P016R001A051
 48 | S006C002P016R001A052
 49 | S006C002P022R001A012
 50 | S006C002P023R001A020
 51 | S006C002P023R001A021
 52 | S006C002P023R001A022
 53 | S006C002P023R001A023
 54 | S006C002P024R001A018
 55 | S006C002P024R001A019
 56 | S006C003P001R002A013
 57 | S006C003P007R002A009
 58 | S006C003P007R002A010
 59 | S006C003P007R002A025
 60 | S006C003P016R001A060
 61 | S006C003P017R001A055
 62 | S006C003P017R002A013
 63 | S006C003P017R002A014
 64 | S006C003P017R002A015
 65 | S006C003P022R002A013
 66 | S007C001P018R002A050
 67 | S007C001P025R002A051
 68 | S007C001P028R001A050
 69 | S007C001P028R001A051
 70 | S007C001P028R001A052
 71 | S007C002P008R002A008
 72 | S007C002P015R002A055
 73 | S007C002P026R001A008
 74 | S007C002P026R001A009
 75 | S007C002P026R001A010
 76 | S007C002P026R001A011
 77 | S007C002P026R001A012
 78 | S007C002P026R001A050
 79 | S007C002P027R001A011
 80 | S007C002P027R001A013
 81 | S007C002P028R002A055
 82 | S007C003P007R001A002
 83 | S007C003P007R001A004
 84 | S007C003P019R001A060
 85 | S007C003P027R002A001
 86 | S007C003P027R002A002
 87 | S007C003P027R002A003
 88 | S007C003P027R002A004
 89 | S007C003P027R002A005
 90 | S007C003P027R002A006
 91 | S007C003P027R002A007
 92 | S007C003P027R002A008
 93 | S007C003P027R002A009
 94 | S007C003P027R002A010
 95 | S007C003P027R002A011
 96 | S007C003P027R002A012
 97 | S007C003P027R002A013
 98 | S008C002P001R001A009
 99 | S008C002P001R001A010
100 | S008C002P001R001A014
101 | S008C002P001R001A015
102 | S008C002P001R001A016
103 | S008C002P001R001A018
104 | S008C002P001R001A019
105 | S008C002P008R002A059
106 | S008C002P025R001A060
107 | S008C002P029R001A004
108 | S008C002P031R001A005
109 | S008C002P031R001A006
110 | S008C002P032R001A018
111 | S008C002P034R001A018
112 | S008C002P034R001A019
113 | S008C002P035R001A059
114 | S008C002P035R002A002
115 | S008C002P035R002A005
116 | S008C003P007R001A009
117 | S008C003P007R001A016
118 | S008C003P007R001A017
119 | S008C003P007R001A018
120 | S008C003P007R001A019
121 | S008C003P007R001A020
122 | S008C003P007R001A021
123 | S008C003P007R001A022
124 | S008C003P007R001A023
125 | S008C003P007R001A025
126 | S008C003P007R001A026
127 | S008C003P007R001A028
128 | S008C003P007R001A029
129 | S008C003P007R002A003
130 | S008C003P008R002A050
131 | S008C003P025R002A002
132 | S008C003P025R002A011
133 | S008C003P025R002A012
134 | S008C003P025R002A016
135 | S008C003P025R002A020
136 | S008C003P025R002A022
137 | S008C003P025R002A023
138 | S008C003P025R002A030
139 | S008C003P025R002A031
140 | S008C003P025R002A032
141 | S008C003P025R002A033
142 | S008C003P025R002A049
143 | S008C003P025R002A060
144 | S008C003P031R001A001
145 | S008C003P031R002A004
146 | S008C003P031R002A014
147 | S008C003P031R002A015
148 | S008C003P031R002A016
149 | S008C003P031R002A017
150 | S008C003P032R002A013
151 | S008C003P033R002A001
152 | S008C003P033R002A011
153 | S008C003P033R002A012
154 | S008C003P034R002A001
155 | S008C003P034R002A012
156 | S008C003P034R002A022
157 | S008C003P034R002A023
158 | S008C003P034R002A024
159 | S008C003P034R002A044
160 | S008C003P034R002A045
161 | S008C003P035R002A016
162 | S008C003P035R002A017
163 | S008C003P035R002A018
164 | S008C003P035R002A019
165 | S008C003P035R002A020
166 | S008C003P035R002A021
167 | S009C002P007R001A001
168 | S009C002P007R001A003
169 | S009C002P007R001A014
170 | S009C002P008R001A014
171 | S009C002P015R002A050
172 | S009C002P016R001A002
173 | S009C002P017R001A028
174 | S009C002P017R001A029
175 | S009C003P017R002A030
176 | S009C003P025R002A054
177 | S010C001P007R002A020
178 | S010C002P016R002A055
179 | S010C002P017R001A005
180 | S010C002P017R001A018
181 | S010C002P017R001A019
182 | S010C002P019R001A001
183 | S010C002P025R001A012
184 | S010C003P007R002A043
185 | S010C003P008R002A003
186 | S010C003P016R001A055
187 | S010C003P017R002A055
188 | S011C001P002R001A008
189 | S011C001P018R002A050
190 | S011C002P008R002A059
191 | S011C002P016R002A055
192 | S011C002P017R001A020
193 | S011C002P017R001A021
194 | S011C002P018R002A055
195 | S011C002P027R001A009
196 | S011C002P027R001A010
197 | S011C002P027R001A037
198 | S011C003P001R001A055
199 | S011C003P002R001A055
200 | S011C003P008R002A012
201 | S011C003P015R001A055
202 | S011C003P016R001A055
203 | S011C003P019R001A055
204 | S011C003P025R001A055
205 | S011C003P028R002A055
206 | S012C001P019R001A060
207 | S012C001P019R002A060
208 | S012C002P015R001A055
209 | S012C002P017R002A012
210 | S012C002P025R001A060
211 | S012C003P008R001A057
212 | S012C003P015R001A055
213 | S012C003P015R002A055
214 | S012C003P016R001A055
215 | S012C003P017R002A055
216 | S012C003P018R001A055
217 | S012C003P018R001A057
218 | S012C003P019R002A011
219 | S012C003P019R002A012
220 | S012C003P025R001A055
221 | S012C003P027R001A055
222 | S012C003P027R002A009
223 | S012C003P028R001A035
224 | S012C003P028R002A055
225 | S013C001P015R001A054
226 | S013C001P017R002A054
227 | S013C001P018R001A016
228 | S013C001P028R001A040
229 | S013C002P015R001A054
230 | S013C002P017R002A054
231 | S013C002P028R001A040
232 | S013C003P008R002A059
233 | S013C003P015R001A054
234 | S013C003P017R002A054
235 | S013C003P025R002A022
236 | S013C003P027R001A055
237 | S013C003P028R001A040
238 | S014C001P027R002A040
239 | S014C002P015R001A003
240 | S014C002P019R001A029
241 | S014C002P025R002A059
242 | S014C002P027R002A040
243 | S014C002P039R001A050
244 | S014C003P007R002A059
245 | S014C003P015R002A055
246 | S014C003P019R002A055
247 | S014C003P025R001A048
248 | S014C003P027R002A040
249 | S015C001P008R002A040
250 | S015C001P016R001A055
251 | S015C001P017R001A055
252 | S015C001P017R002A055
253 | S015C002P007R001A059
254 | S015C002P008R001A003
255 | S015C002P008R001A004
256 | S015C002P008R002A040
257 | S015C002P015R001A002
258 | S015C002P016R001A001
259 | S015C002P016R002A055
260 | S015C003P008R002A007
261 | S015C003P008R002A011
262 | S015C003P008R002A012
263 | S015C003P008R002A028
264 | S015C003P008R002A040
265 | S015C003P025R002A012
266 | S015C003P025R002A017
267 | S015C003P025R002A020
268 | S015C003P025R002A021
269 | S015C003P025R002A030
270 | S015C003P025R002A033
271 | S015C003P025R002A034
272 | S015C003P025R002A036
273 | S015C003P025R002A037
274 | S015C003P025R002A044
275 | S016C001P019R002A040
276 | S016C001P025R001A011
277 | S016C001P025R001A012
278 | S016C001P025R001A060
279 | S016C001P040R001A055
280 | S016C001P040R002A055
281 | S016C002P008R001A011
282 | S016C002P019R002A040
283 | S016C002P025R002A012
284 | S016C003P008R001A011
285 | S016C003P008R002A002
286 | S016C003P008R002A003
287 | S016C003P008R002A004
288 | S016C003P008R002A006
289 | S016C003P008R002A009
290 | S016C003P019R002A040
291 | S016C003P039R002A016
292 | S017C001P016R002A031
293 | S017C002P007R001A013
294 | S017C002P008R001A009
295 | S017C002P015R001A042
296 | S017C002P016R002A031
297 | S017C002P016R002A055
298 | S017C003P007R002A013
299 | S017C003P008R001A059
300 | S017C003P016R002A031
301 | S017C003P017R001A055
302 | S017C003P020R001A059
303 | S019C001P046R001A075
304 | S019C002P042R001A094
305 | S019C002P042R001A095
306 | S019C002P042R001A096
307 | S019C002P042R001A097
308 | S019C002P042R001A098
309 | S019C002P042R001A099
310 | S019C002P042R001A100
311 | S019C002P042R001A101
312 | S019C002P042R001A102
313 | S019C002P049R002A074
314 | S019C002P049R002A079
315 | S019C002P051R001A061
316 | S019C003P046R001A061
317 | S019C003P046R002A061
318 | S019C003P046R002A062
319 | S020C002P041R001A063
320 | S020C002P041R001A064
321 | S020C002P044R001A063
322 | S020C002P044R001A064
323 | S020C002P044R001A066
324 | S020C002P044R001A084
325 | S020C002P054R001A081
326 | S021C001P059R001A108
327 | S021C002P055R001A065
328 | S021C002P055R001A092
329 | S021C002P055R001A093
330 | S021C002P057R001A064
331 | S021C002P058R001A063
332 | S021C002P058R001A064
333 | S021C002P059R001A074
334 | S021C002P059R001A075
335 | S021C002P059R001A076
336 | S021C002P059R001A077
337 | S021C002P059R001A078
338 | S021C002P059R001A079
339 | S021C003P057R002A078
340 | S021C003P057R002A079
341 | S021C003P057R002A094
342 | S022C002P061R001A113
343 | S022C003P061R002A061
344 | S022C003P061R002A062
345 | S022C003P063R002A061
346 | S022C003P063R002A062
347 | S022C003P063R002A063
348 | S022C003P063R002A064
349 | S022C003P063R002A078
350 | S022C003P064R002A061
351 | S022C003P064R002A062
352 | S022C003P065R002A061
353 | S022C003P065R002A062
354 | S022C003P065R002A119
355 | S022C003P067R002A064
356 | S023C002P055R001A114
357 | S023C002P055R002A092
358 | S023C002P059R001A075
359 | S023C002P063R001A075
360 | S023C003P055R002A093
361 | S023C003P055R002A094
362 | S023C003P061R002A061
363 | S023C003P064R001A092
364 | S024C001P063R001A109
365 | S024C002P062R002A074
366 | S024C002P067R001A100
367 | S024C002P067R001A101
368 | S024C002P067R001A102
369 | S024C002P067R001A103
370 | S024C003P062R002A074
371 | S024C003P063R002A061
372 | S024C003P063R002A062
373 | S025C001P055R002A119
374 | S025C003P056R002A119
375 | S025C003P059R002A115
376 | S026C002P044R001A061
377 | S026C002P044R001A062
378 | S026C002P070R001A092
379 | S026C003P069R002A075
380 | S026C003P074R002A061
381 | S026C003P074R002A062
382 | S026C003P075R001A117
383 | S026C003P075R001A118
384 | S027C001P082R001A063
385 | S027C002P044R002A092
386 | S027C002P079R001A061
387 | S027C002P079R001A062
388 | S027C002P079R001A063
389 | S027C002P079R001A064
390 | S027C002P082R001A092
391 | S027C002P084R001A061
392 | S027C002P084R001A062
393 | S027C002P086R001A061
394 | S027C003P041R002A087
395 | S027C003P080R002A061
396 | S027C003P082R002A061
397 | S027C003P082R002A062
398 | S027C003P086R002A061
399 | S027C003P086R002A062
400 | S028C001P087R001A061
401 | S028C002P041R001A091
402 | S028C002P087R001A061
403 | S028C003P042R002A064
404 | S028C003P046R002A063
405 | S028C003P046R002A066
406 | S028C003P046R002A067
407 | S028C003P046R002A068
408 | S028C003P046R002A069
409 | S028C003P046R002A070
410 | S028C003P046R002A071
411 | S028C003P046R002A072
412 | S028C003P046R002A074
413 | S028C003P046R002A075
414 | S028C003P046R002A077
415 | S028C003P046R002A081
416 | S028C003P046R002A082
417 | S028C003P046R002A083
418 | S028C003P046R002A084
419 | S028C003P048R002A061
420 | S028C003P048R002A062
421 | S028C003P048R002A073
422 | S028C003P073R002A073
423 | S028C003P087R001A061
424 | S028C003P087R002A061
425 | S028C003P087R002A062
426 | S029C001P043R002A092
427 | S029C001P044R002A092
428 | S029C001P048R001A073
429 | S029C001P089R001A063
430 | S029C002P041R001A074
431 | S029C002P041R001A084
432 | S029C002P044R001A091
433 | S029C002P048R001A075
434 | S029C002P048R001A081
435 | S029C002P074R001A081
436 | S029C002P074R001A095
437 | S029C002P074R001A096
438 | S029C002P080R001A091
439 | S029C002P088R001A066
440 | S029C002P089R001A065
441 | S029C002P090R001A067
442 | S029C003P008R002A065
443 | S029C003P008R002A067
444 | S029C003P041R001A089
445 | S029C003P043R001A080
446 | S029C003P043R001A092
447 | S029C003P043R001A105
448 | S029C003P043R002A085
449 | S029C003P043R002A086
450 | S029C003P044R002A106
451 | S029C003P048R001A065
452 | S029C003P048R002A073
453 | S029C003P048R002A074
454 | S029C003P048R002A075
455 | S029C003P048R002A076
456 | S029C003P048R002A092
457 | S029C003P048R002A094
458 | S029C003P051R002A073
459 | S029C003P051R002A074
460 | S029C003P051R002A075
461 | S029C003P051R002A076
462 | S029C003P051R002A077
463 | S029C003P051R002A078
464 | S029C003P051R002A079
465 | S029C003P051R002A080
466 | S029C003P051R002A081
467 | S029C003P051R002A082
468 | S029C003P051R002A083
469 | S029C003P051R002A084
470 | S029C003P051R002A085
471 | S029C003P051R002A086
472 | S029C003P051R002A110
473 | S029C003P067R001A098
474 | S029C003P074R002A110
475 | S029C003P080R002A066
476 | S029C003P088R002A078
477 | S029C003P089R001A075
478 | S029C003P089R002A061
479 | S029C003P089R002A062
480 | S029C003P089R002A063
481 | S029C003P090R002A092
482 | S029C003P090R002A095
483 | S030C002P091R002A091
484 | S030C002P091R002A092
485 | S030C002P091R002A093
486 | S030C002P091R002A094
487 | S030C002P091R002A095
488 | S030C002P091R002A096
489 | S030C002P091R002A097
490 | S030C002P091R002A098
491 | S030C002P091R002A099
492 | S030C002P091R002A100
493 | S030C002P091R002A101
494 | S030C002P091R002A102
495 | S030C002P091R002A103
496 | S030C002P091R002A104
497 | S030C002P091R002A105
498 | S030C003P044R002A065
499 | S030C003P044R002A081
500 | S030C003P044R002A084
501 | S031C002P042R001A111
502 | S031C002P051R001A061
503 | S031C002P051R001A062
504 | S031C002P067R001A067
505 | S031C002P067R001A068
506 | S031C002P067R001A069
507 | S031C002P067R001A070
508 | S031C002P067R001A071
509 | S031C002P067R001A072
510 | S031C002P082R001A075
511 | S031C002P082R002A117
512 | S031C002P097R001A061
513 | S031C002P097R001A062
514 | S031C003P043R002A074
515 | S031C003P043R002A075
516 | S031C003P044R002A094
517 | S031C003P082R002A067
518 | S031C003P082R002A068
519 | S031C003P082R002A069
520 | S031C003P082R002A070
521 | S031C003P082R002A071
522 | S031C003P082R002A072
523 | S031C003P082R002A073
524 | S031C003P082R002A075
525 | S031C003P082R002A076
526 | S031C003P082R002A077
527 | S031C003P082R002A084
528 | S031C003P082R002A085
529 | S031C003P082R002A086
530 | S032C002P067R001A092
531 | S032C003P067R002A066
532 | S032C003P067R002A067
533 | S032C003P067R002A075
534 | S032C003P067R002A076
535 | S032C003P067R002A077
536 | 


--------------------------------------------------------------------------------
/pretrain_moco_cmd.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import math
  3 | import os
  4 | import random
  5 | import shutil
  6 | import time
  7 | import numpy as np
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.parallel
 12 | import torch.backends.cudnn as cudnn
 13 | import torch.optim
 14 | import torch.utils.data
 15 | 
 16 | import torch.distributed as dist
 17 | 
 18 | 
 19 | import moco.builder_cmd
 20 | from torch.utils.tensorboard import SummaryWriter
 21 | from dataset import get_pretraining_set_intra
 22 | 
 23 | 
 24 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
 25 | parser.add_argument('-j', '--workers', default=32, type=int, metavar='N',
 26 |                     help='number of data loading workers (default: 32)')
 27 | parser.add_argument('--epochs', default=200, type=int, metavar='N',
 28 |                     help='number of total epochs to run')
 29 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
 30 |                     help='manual epoch number (useful on restarts)')
 31 | parser.add_argument('-b', '--batch-size', default=256, type=int,
 32 |                     metavar='N',
 33 |                     help='mini-batch size (default: 256), this is the total '
 34 |                          'batch size of all GPUs on the current node when '
 35 |                          'using Data Parallel or Distributed Data Parallel')
 36 | parser.add_argument('--lr', '--learning-rate', default=0.001, type=float,
 37 |                     metavar='LR', help='initial learning rate', dest='lr')
 38 | parser.add_argument('--schedule', default=[100, 160], nargs='*', type=int,
 39 |                     help='learning rate schedule (when to drop lr by 10x)')
 40 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
 41 |                     help='momentum of SGD solver')
 42 | parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
 43 |                     metavar='W', help='weight decay (default: 1e-4)',
 44 |                     dest='weight_decay')
 45 | parser.add_argument('-p', '--print-freq', default=10, type=int,
 46 |                     metavar='N', help='print frequency (default: 10)')
 47 | parser.add_argument('--resume', default='', type=str, metavar='PATH',
 48 |                     help='path to latest checkpoint (default: none)')
 49 | 
 50 | # Distributed
 51 | parser.add_argument('--local_rank', default=-1, type=int,
 52 |                     help='node rank for distributed training')
 53 | 
 54 | parser.add_argument('--seed', default=42, type=int,
 55 |                     help='seed for initializing training. ')
 56 | parser.add_argument('--checkpoint-path', default='./checkpoints', type=str)
 57 | parser.add_argument('--skeleton-representation', type=str,
 58 |                     help='input skeleton-representation  for self supervised training (image-based or graph-based or seq-based)')
 59 | parser.add_argument('--pre-dataset', default='ntu60', type=str,
 60 |                     help='which dataset to use for self supervised training (ntu60 or ntu120)')
 61 | parser.add_argument('--protocol', default='cross_subject', type=str,
 62 |                     help='traiining protocol cross_view/cross_subject/cross_setup')
 63 | 
 64 | # contrast specific configs:
 65 | parser.add_argument('--contrast-dim', default=128, type=int,
 66 |                     help='feature dimension (default: 128)')
 67 | parser.add_argument('--contrast-k', default=32768, type=int,
 68 |                     help='queue size; number of negative keys (default: 16384)')
 69 | parser.add_argument('--contrast-m', default=0.999, type=float,
 70 |                     help='contrast momentum of updating key encoder (default: 0.999)')
 71 | parser.add_argument('--contrast-t', default=0.07, type=float,
 72 |                     help='softmax temperature (default: 0.07)')
 73 | parser.add_argument('--teacher-t', default=0.05, type=float,
 74 |                     help='softmax temperature (default: 0.05)')
 75 | parser.add_argument('--student-t', default=0.1, type=float,
 76 |                     help='softmax temperature (default: 0.1)')
 77 | parser.add_argument('--cmd-weight', default=1.0, type=float,
 78 |                     help='weight of sim loss (default: 1.0)')
 79 | parser.add_argument('--topk', default=1024, type=int,
 80 |                     help='number of contrastive context')
 81 | parser.add_argument('--mlp', action='store_true',
 82 |                     help='use mlp head')
 83 | parser.add_argument('--cos', action='store_true',
 84 |                     help='use cosine lr schedule')
 85 | 
 86 | def init_seeds(seed):
 87 |     random.seed(seed)
 88 |     torch.manual_seed(seed)
 89 |     torch.cuda.manual_seed(seed)
 90 |     torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
 91 |     cudnn.deterministic = True
 92 |     cudnn.benchmark = True
 93 | 
 94 | def main():
 95 |     args = parser.parse_args()
 96 | 
 97 |     if args.local_rank != -1:
 98 |         dist.init_process_group(backend='nccl')
 99 |         torch.cuda.set_device(args.local_rank)
100 |     else:
101 |         torch.cuda.set_device(0)
102 | 
103 | 
104 |     # Simply call main_worker function
105 |     main_worker(args)
106 | 
107 | def main_worker(args):
108 |     if args.local_rank != -1:
109 |         init_seeds(args.seed + args.local_rank)
110 |     else:
111 |         init_seeds(args.seed)
112 | 
113 |     # pretraining dataset and protocol
114 |     from options import options_pretraining as options 
115 |     if args.pre_dataset == 'ntu60' and args.protocol == 'cross_view':
116 |         opts = options.opts_ntu_60_cross_view()
117 |     elif args.pre_dataset == 'ntu60' and args.protocol == 'cross_subject':
118 |         opts = options.opts_ntu_60_cross_subject()
119 |     elif args.pre_dataset == 'ntu120' and args.protocol == 'cross_setup':
120 |         opts = options.opts_ntu_120_cross_setup()
121 |     elif args.pre_dataset == 'ntu120' and args.protocol == 'cross_subject':
122 |         opts = options.opts_ntu_120_cross_subject()
123 |     elif args.pre_dataset == 'pku_v2' and args.protocol == 'cross_view':
124 |         opts = options.opts_pku_v2_cross_view()
125 |     elif args.pre_dataset == 'pku_v2' and args.protocol == 'cross_subject':
126 |         opts = options.opts_pku_v2_cross_subject()
127 | 
128 |     opts.train_feeder_args['input_representation'] = args.skeleton_representation
129 | 
130 |     # create model
131 |     print("=> creating model")
132 | 
133 |     model = moco.builder_cmd.MoCo(args.skeleton_representation, opts.bi_gru_model_args,
134 |                                   args.contrast_dim, args.contrast_k, args.contrast_m, args.contrast_t,
135 |                                   args.teacher_t, args.student_t, args.cmd_weight, args.topk, args.mlp)
136 |     print("options",opts.train_feeder_args)
137 |     print(model)
138 | 
139 |     model.cuda()
140 |     if args.local_rank != -1:
141 |         model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
142 |         model = nn.parallel.distributed.DistributedDataParallel(model, device_ids=[args.local_rank], find_unused_parameters=True)
143 |         print('Distributed data parallel model used')
144 | 
145 |     # define loss function (criterion) and optimizer
146 |     criterion = nn.CrossEntropyLoss().cuda()
147 | 
148 |     optimizer = torch.optim.SGD(model.parameters(), args.lr,
149 |                                 momentum=args.momentum,
150 |                                 weight_decay=args.weight_decay)
151 | 
152 |     # optionally resume from a checkpoint
153 |     if args.resume:
154 |         if os.path.isfile(args.resume):
155 |             print("=> loading checkpoint '{}'".format(args.resume))
156 |             # Map model to be loaded to specified single gpu.
157 |             checkpoint = torch.load(args.resume, map_location='cpu')
158 |             args.start_epoch = checkpoint['epoch']
159 |             model.load_state_dict(checkpoint['state_dict'])
160 |             optimizer.load_state_dict(checkpoint['optimizer'])
161 |             print("=> loaded checkpoint '{}' (epoch {})"
162 |                   .format(args.resume, checkpoint['epoch']))
163 |         else:
164 |             print("=> no checkpoint found at '{}'".format(args.resume))
165 | 
166 |     ## Data loading code
167 |     train_dataset = get_pretraining_set_intra(opts)
168 | 
169 |     if args.local_rank != -1:
170 |         train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
171 |     else:
172 |         train_sampler = None
173 | 
174 |     def worker_init_fn(worker_id):
175 |         return np.random.seed(torch.initial_seed()%(2**31) + worker_id)  # for single gpu
176 |     train_loader = torch.utils.data.DataLoader(
177 |         train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers,
178 |         worker_init_fn=worker_init_fn, pin_memory=True, sampler=train_sampler, drop_last=True)
179 | 
180 |     writer = SummaryWriter(args.checkpoint_path)
181 | 
182 |     for epoch in range(args.start_epoch, args.epochs):
183 |         if args.local_rank != -1:
184 |             train_sampler.set_epoch(epoch)
185 | 
186 |         adjust_learning_rate(optimizer, epoch, args)
187 | 
188 |         # train for one epoch
189 |         loss_joint, loss_motion, loss_bone, loss_sim, top1_joint, top1_motion, top1_bone = train(train_loader, model, criterion, optimizer, epoch, args)
190 |         
191 |         if args.local_rank in [-1, 0]:
192 |             writer.add_scalar('loss_joint', loss_joint.avg, global_step=epoch)
193 |             writer.add_scalar('loss_motion', loss_motion.avg, global_step=epoch)
194 |             writer.add_scalar('loss_bone', loss_bone.avg, global_step=epoch)
195 |             writer.add_scalar('loss_sim', loss_sim.avg, global_step=epoch)
196 |             writer.add_scalar('top1_joint',top1_joint.avg, global_step=epoch)
197 |             writer.add_scalar('top1_motion',top1_motion.avg, global_step=epoch)
198 |             writer.add_scalar('top1_bone',top1_bone.avg, global_step=epoch)
199 | 
200 |             if epoch % 10 == 0:
201 |                     save_checkpoint({
202 |                         'epoch': epoch + 1,
203 |                         'state_dict': model.state_dict(),
204 |                         'optimizer' : optimizer.state_dict(),
205 |                     }, is_best=False, filename=args.checkpoint_path+'/checkpoint_{:04d}.pth.tar'.format(epoch))
206 | 
207 | 
208 | def train(train_loader, model, criterion, optimizer, epoch, args):
209 |     batch_time = AverageMeter('Time', ':6.3f')
210 |     data_time = AverageMeter('Data', ':6.3f')
211 |     losses = AverageMeter('Loss', ':6.3f')
212 |     losses_joint = AverageMeter('Loss Joint', ':6.3f')
213 |     losses_motion = AverageMeter('Loss Motion', ':6.3f')
214 |     losses_bone = AverageMeter('Loss Bone', ':6.3f')
215 |     losses_sim = AverageMeter('Loss Sim', ':6.3f')
216 |     top1_joint = AverageMeter('Acc Joint@1', ':6.2f')
217 |     top1_motion = AverageMeter('Acc Motion@1', ':6.2f')
218 |     top1_bone = AverageMeter('Acc Bone@1', ':6.2f')
219 |     progress = ProgressMeter(
220 |         len(train_loader),
221 |         [batch_time, losses_joint, losses_motion, losses_bone, losses_sim, top1_joint, top1_motion, top1_bone],
222 |         prefix="Epoch: [{}] Lr_rate [{}]".format(epoch,optimizer.param_groups[0]['lr']))
223 | 
224 |     # switch to train mode
225 |     model.train()
226 | 
227 |     end = time.time()
228 |     for i, (input_v1, input_v2) in enumerate(train_loader):
229 |         # measure data loading time
230 |         data_time.update(time.time() - end)
231 | 
232 |         inputs= [input_v1,input_v2]
233 |         inputs[0] =inputs[0].float().cuda(non_blocking=True)
234 |         inputs[1] =inputs[1].float().cuda(non_blocking=True)
235 | 
236 |         # compute output
237 |         output, output_motion, output_bone, target, loss_sim = model(inputs[0], inputs[1])
238 | 
239 |         batch_size = output.size(0)
240 | 
241 |         # compute loss
242 |         loss_joint = criterion(output, target)
243 |         loss_motion = criterion(output_motion, target)
244 |         loss_bone = criterion(output_bone, target)
245 | 
246 |         loss = loss_joint + loss_motion + loss_bone + loss_sim
247 | 
248 |         losses.update(loss.item(), batch_size)
249 |         losses_joint.update(loss_joint.item(), batch_size)
250 |         losses_motion.update(loss_motion.item(), batch_size)
251 |         losses_bone.update(loss_bone.item(), batch_size)
252 |         losses_sim.update(loss_sim.item(), batch_size)
253 | 
254 |         # measure accuracy of model m1 and m2 individually
255 |         # acc1/acc5 are (K+1)-way contrast classifier accuracy
256 |         # measure accuracy and record loss
257 |         acc1_joint, _ = accuracy(output, target, topk=(1, 5))
258 |         acc1_motion, _ = accuracy(output_motion, target, topk=(1, 5))
259 |         acc1_bone, _ = accuracy(output_bone, target, topk=(1, 5))
260 |         top1_joint.update(acc1_joint[0], batch_size)
261 |         top1_motion.update(acc1_motion[0], batch_size)
262 |         top1_bone.update(acc1_bone[0], batch_size)
263 | 
264 |         #print("input output size",output.size(),images[0].size(),half_size)
265 | 
266 |         # compute gradient and do SGD step
267 |         optimizer.zero_grad()
268 |         loss.backward()
269 |         optimizer.step()
270 | 
271 |         # measure elapsed time
272 |         batch_time.update(time.time() - end)
273 |         end = time.time()
274 | 
275 |         if i % args.print_freq == 0:
276 |             progress.display(i)
277 | 
278 |     return losses_joint, losses_motion, losses_bone, losses_sim, top1_joint, top1_motion, top1_bone
279 | 
280 | def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
281 |     torch.save(state, filename)
282 |     if is_best:
283 |         shutil.copyfile(filename, 'model_best.pth.tar')
284 | 
285 | 
286 | class AverageMeter(object):
287 |     """Computes and stores the average and current value"""
288 |     def __init__(self, name, fmt=':f'):
289 |         self.name = name
290 |         self.fmt = fmt
291 |         self.reset()
292 | 
293 |     def reset(self):
294 |         self.val = 0
295 |         self.avg = 0
296 |         self.sum = 0
297 |         self.count = 0
298 | 
299 |     def update(self, val, n=1):
300 |         self.val = val
301 |         self.sum += val * n
302 |         self.count += n
303 |         self.avg = self.sum / self.count
304 | 
305 |     def __str__(self):
306 |         fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
307 |         return fmtstr.format(**self.__dict__)
308 | 
309 | 
310 | class ProgressMeter(object):
311 |     def __init__(self, num_batches, meters, prefix=""):
312 |         self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
313 |         self.meters = meters
314 |         self.prefix = prefix
315 | 
316 |     def display(self, batch):
317 |         entries = [self.prefix + self.batch_fmtstr.format(batch)]
318 |         entries += [str(meter) for meter in self.meters]
319 |         print('\t'.join(entries))
320 | 
321 |     def _get_batch_fmtstr(self, num_batches):
322 |         num_digits = len(str(num_batches // 1))
323 |         fmt = '{:' + str(num_digits) + 'd}'
324 |         return '[' + fmt + '/' + fmt.format(num_batches) + ']'
325 | 
326 | 
327 | def adjust_learning_rate(optimizer, epoch, args):
328 |     """Decay the learning rate based on schedule"""
329 |     lr = args.lr
330 |     if args.cos:  # cosine lr schedule
331 |         lr *= 0.5 * (1. + math.cos(math.pi * epoch / args.epochs))
332 |     else:  # stepwise lr schedule
333 |         for milestone in args.schedule:
334 |             lr *= 0.1 if epoch >= milestone else 1.
335 |     for param_group in optimizer.param_groups:
336 |         param_group['lr'] = lr
337 | 
338 | 
339 | def accuracy(output, target, topk=(1,)):
340 |     """Computes the accuracy over the k top predictions for the specified values of k"""
341 |     with torch.no_grad():
342 |         maxk = max(topk)
343 |         batch_size = target.size(0)
344 | 
345 |         _, pred = output.topk(maxk, 1, True, True)
346 |         pred = pred.t()
347 |         correct = pred.eq(target.view(1, -1).expand_as(pred))
348 | 
349 |         res = []
350 |         for k in topk:
351 |             correct_k = correct[:k].contiguous().view(-1).float().sum(0, keepdim=True)
352 |             res.append(correct_k.mul_(100.0 / batch_size))
353 |         return res
354 | 
355 | 
356 | if __name__ == '__main__':
357 |     main()
358 | 


--------------------------------------------------------------------------------
/action_retrieval_cmd.py:
--------------------------------------------------------------------------------
  1 | from dataset import get_finetune_training_set, get_finetune_validation_set
  2 | import argparse
  3 | import os
  4 | import random
  5 | import warnings
  6 | 
  7 | from tqdm import tqdm
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.parallel
 12 | import torch.backends.cudnn as cudnn
 13 | import torch.optim as optim
 14 | import torch.utils.data
 15 | from torch.utils.data import Dataset, DataLoader
 16 | import torch.utils.data.distributed
 17 | import numpy as np
 18 | from sklearn.neighbors import KNeighborsClassifier
 19 | from sklearn import preprocessing
 20 | from sklearn.metrics import accuracy_score
 21 | 
 22 | import moco.builder_cmd
 23 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 24 | 
 25 | # change for action recogniton
 26 | 
 27 | 
 28 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
 29 | parser.add_argument('-j', '--workers', default=32, type=int, metavar='N',
 30 |                     help='number of data loading workers (default: 32)')
 31 | parser.add_argument('--epochs', default=80, type=int, metavar='N',
 32 |                     help='number of total epochs to run')
 33 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
 34 |                     help='manual epoch number (useful on restarts)')
 35 | parser.add_argument('-b', '--batch-size', default=256, type=int,
 36 |                     metavar='N',
 37 |                     help='mini-batch size (default: 256), this is the total '
 38 |                          'batch size of all GPUs on the current node when '
 39 |                          'using Data Parallel or Distributed Data Parallel')
 40 | parser.add_argument('--lr', '--learning-rate', default=30., type=float,
 41 |                     metavar='LR', help='initial learning rate', dest='lr')
 42 | parser.add_argument('--schedule', default=[50, 70, ], nargs='*', type=int,
 43 |                     help='learning rate schedule (when to drop lr by a ratio)')
 44 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
 45 |                     help='momentum')
 46 | parser.add_argument('--wd', '--weight-decay', default=0., type=float,
 47 |                     metavar='W', help='weight decay (default: 0.)',
 48 |                     dest='weight_decay')
 49 | parser.add_argument('-p', '--print-freq', default=10, type=int,
 50 |                     metavar='N', help='print frequency (default: 10)')
 51 | parser.add_argument('--resume', default='', type=str, metavar='PATH',
 52 |                     help='path to latest checkpoint (default: none)')
 53 | parser.add_argument('--seed', default=None, type=int,
 54 |                     help='seed for initializing training. ')
 55 | parser.add_argument('--gpu', default=None, type=int,
 56 |                     help='GPU id to use.')
 57 | 
 58 | parser.add_argument('--pretrained', default='', type=str,
 59 |                     help='path to moco pretrained checkpoint')
 60 | parser.add_argument('--finetune-dataset', default='ntu60', type=str,
 61 |                     help='which dataset to use for finetuning')
 62 | 
 63 | parser.add_argument('--protocol', default='cross_view', type=str,
 64 |                     help='traiining protocol of ntu')
 65 | 
 66 | parser.add_argument('--finetune-skeleton-representation', default='seq-based', type=str,
 67 |                     help='which skeleton-representation to use for downstream training')
 68 | parser.add_argument('--pretrain-skeleton-representation', default='seq-based', type=str,
 69 |                     help='which skeleton-representation where used for  pre-training')
 70 | parser.add_argument('--knn-neighbours', default=None, type=int,
 71 |                     help='number of neighbours used for KNN.')
 72 | 
 73 | best_acc1 = 0
 74 | 
 75 | # initilize weight
 76 | def weights_init_gru(model):
 77 |     with torch.no_grad():
 78 |         for child in list(model.children()):
 79 |             print("init ", child)
 80 |             for param in list(child.parameters()):
 81 |                 if param.dim() == 2:
 82 |                     nn.init.xavier_uniform_(param)
 83 |     print('PC weight initial finished!')
 84 | 
 85 | 
 86 | def load_pretrained(model, pretrained):
 87 |     if os.path.isfile(pretrained):
 88 |         print("=> loading checkpoint '{}'".format(pretrained))
 89 |         checkpoint = torch.load(pretrained, map_location="cpu")
 90 | 
 91 |         # rename moco pre-trained keys
 92 |         state_dict = checkpoint['state_dict']
 93 |         for k in list(state_dict.keys()):
 94 |             # retain only encoder_q up to before the embedding layer
 95 |             if not k.startswith('encoder_q'):
 96 |                 del state_dict[k]
 97 |             elif '.fc' in k:
 98 |                 del state_dict[k]
 99 |             else:
100 |                 pass
101 | 
102 |         msg = model.load_state_dict(state_dict, strict=False)
103 |         print("message", msg)
104 |         assert set(msg.missing_keys) == {"encoder_q.fc.weight", "encoder_q.fc.bias",
105 |                                          "encoder_q_motion.fc.weight", "encoder_q_motion.fc.bias",
106 |                                          "encoder_q_bone.fc.weight", "encoder_q_bone.fc.bias"}
107 | 
108 |         print("=> loaded pre-trained model '{}'".format(pretrained))
109 |     else:
110 |         print("=> no checkpoint found at '{}'".format(pretrained))
111 | 
112 | 
113 | def knn(data_train, data_test, label_train, label_test, nn=9):
114 |     label_train = np.asarray(label_train)
115 |     label_test = np.asarray(label_test)
116 |     print("Number of KNN Neighbours = ", nn)
117 |     print("training feature and labels", data_train.shape, len(label_train))
118 |     print("test feature and labels", data_test.shape, len(label_test))
119 | 
120 |     Xtr_Norm = preprocessing.normalize(data_train)
121 |     Xte_Norm = preprocessing.normalize(data_test)
122 | 
123 |     knn = KNeighborsClassifier(n_neighbors=nn,
124 |                                metric='cosine')  # , metric='cosine'#'mahalanobis', metric_params={'V': np.cov(data_train)})
125 |     knn.fit(Xtr_Norm, label_train)
126 |     pred = knn.predict(Xte_Norm)
127 |     acc = accuracy_score(pred, label_test)
128 | 
129 |     return acc
130 | 
131 | 
132 | def test_extract_hidden(model, data_train, data_eval):
133 |     model.eval()
134 |     print("Extracting training features")
135 |     label_train_list = []
136 |     hidden_array_train_list = []
137 |     for ith, (ith_data, label) in enumerate(tqdm(data_train)):
138 |             input_tensor = ith_data.to(device)
139 | 
140 |             en_hi = model(input_tensor, view='joint', knn_eval=True)
141 |             en_hi = en_hi.squeeze()
142 |             #print("encoder size",en_hi.size())
143 | 
144 |             label_train_list.append(label)
145 |             hidden_array_train_list.append(en_hi[:, :].detach().cpu().numpy())
146 |     label_train = np.hstack(label_train_list)
147 |     hidden_array_train = np.vstack(hidden_array_train_list)
148 | 
149 |     print("Extracting validation features")
150 |     label_eval_list = []
151 |     hidden_array_eval_list = []
152 |     for ith, (ith_data,  label) in enumerate(tqdm(data_eval)):
153 | 
154 |         input_tensor = ith_data.to(device)
155 | 
156 |         en_hi = model(input_tensor, view='joint', knn_eval=True)
157 |         en_hi = en_hi.squeeze()
158 | 
159 |         label_eval_list.append(label)
160 |         hidden_array_eval_list.append(en_hi[:, :].detach().cpu().numpy())
161 |     label_eval = np.hstack(label_eval_list)
162 |     hidden_array_eval = np.vstack(hidden_array_eval_list)
163 | 
164 |     return hidden_array_train, hidden_array_eval, label_train, label_eval
165 | 
166 | 
167 | class MyAutoDataset(Dataset):
168 |     def __init__(self, data, label):
169 | 
170 |         self.data = data
171 |         self.label = label
172 |         #self.xy = zip(self.data, self.label)
173 | 
174 |     def __getitem__(self, index):
175 |         sequence = self.data[index, :]
176 |         label = self.label[index]
177 | 
178 |         return sequence, label
179 | 
180 |     def __len__(self):
181 |         return len(self.label)
182 | 
183 | 
184 | def train_autoencoder(hidden_train, hidden_eval, label_train,
185 |                       label_eval, middle_size, criterion, lambda1, num_epoches):
186 |     batch_size = 64
187 |     #auto = autoencoder(hidden_train.shape[1], middle_size).to(device)
188 |     auto = autoencoder(hidden_train.shape[1], middle_size).cuda()
189 |     auto_optimizer = optim.Adam(auto.parameters(), lr=0.001)
190 |     auto_scheduler = optim.lr_scheduler.LambdaLR(auto_optimizer, lr_lambda=lambda1)
191 |     criterion_auto = nn.MSELoss()
192 | 
193 |     autodataset = MyAutoDataset(hidden_train, label_train)
194 |     trainloader = DataLoader(autodataset, batch_size=batch_size, shuffle=True)
195 | 
196 |     autodataset = MyAutoDataset(hidden_eval, label_eval)
197 |     evalloader = DataLoader(autodataset, batch_size=batch_size, shuffle=True)
198 | 
199 |     print("Training autoencoder")
200 |     for epoch in tqdm(range(num_epoches)):
201 |         for (data, label) in trainloader:
202 |             # img, _ = data
203 |             # img = img.view(img.size(0), -1)
204 |             # img = Variable(img).cuda()
205 |             #data = torch.tensor(data.clone().detach(), dtype=torch.float).to(device)
206 |             # ===================forward=====================
207 |             data = data.cuda()
208 |             output, _ = auto(data)
209 |             loss = criterion(output, data)
210 |             # ===================backward====================
211 |             auto_optimizer.zero_grad()
212 |             loss.backward()
213 |             auto_optimizer.step()
214 |             auto_scheduler.step()
215 |         for (data, label) in evalloader:
216 |             data = data.cuda()
217 |             # ===================forward=====================
218 |             output, _ = auto(data)
219 |             loss_eval = criterion(output, data)
220 |         # ===================log========================
221 |         # if epoch % 200 == 0:
222 |         #   print('epoch [{}/{}], train loss:{:.4f} eval loass:{:.4f}'
223 |         #         .format(epoch + 1, num_epoches, loss.item(), loss_eval.item()))
224 | 
225 |     # extract hidden train
226 |     count = 0
227 |     for (data, label) in trainloader:
228 |         data = data.cuda()
229 |         _, encoder_output = auto(data)
230 | 
231 |         if count == 0:
232 |             np_out_train = encoder_output.detach().cpu().numpy()
233 |             label_train = label
234 |         else:
235 |             label_train = np.hstack((label_train, label))
236 |             np_out_train = np.vstack((np_out_train, encoder_output.detach().cpu().numpy()))
237 |         count += 1
238 | 
239 |     # extract hidden eval
240 |     count = 0
241 |     for (data, label) in evalloader:
242 |         data = data.cuda()
243 |         _, encoder_output = auto(data)
244 | 
245 |         if count == 0:
246 |             np_out_eval = encoder_output.detach().cpu().numpy()
247 |             label_eval = label
248 | 
249 |         else:
250 |             label_eval = np.hstack((label_eval, label))
251 |             np_out_eval = np.vstack((np_out_eval, encoder_output.detach().cpu().numpy()))
252 |         count += 1
253 | 
254 |     return np_out_train, np_out_eval, label_train, label_eval
255 | 
256 | 
257 | class autoencoder(nn.Module):
258 |     def __init__(self, input_size, middle_size):
259 |         super(autoencoder, self).__init__()
260 |         self.encoder = nn.Sequential(
261 |             nn.Linear(input_size, 1024),
262 |             nn.Tanh(),
263 |             nn.Linear(1024, 512),
264 |             nn.Tanh(),
265 |             nn.Linear(512, middle_size),
266 |             nn.Tanh()
267 |         )
268 | 
269 |         self.decoder = nn.Sequential(
270 |             nn.Linear(middle_size, 512),
271 |             nn.Tanh(),
272 |             nn.Linear(512, 1024),
273 |             nn.Tanh(),
274 |             nn.Linear(1024, input_size),
275 |         )
276 | 
277 |     def forward(self, x):
278 |         middle_x = self.encoder(x)
279 |         x = self.decoder(middle_x)
280 |         return x, middle_x
281 | 
282 | 
283 | def clustering_knn_acc(model, train_loader, eval_loader, criterion, num_epoches=400, middle_size=125, knn_neighbours=1):
284 |     hi_train, hi_eval, label_train, label_eval = test_extract_hidden(model, train_loader, eval_loader)
285 |     # print(hi_train.shape)
286 | 
287 |     train_ae = False
288 |     if train_ae:
289 |         def lambda1(ith_epoch): return 0.95 ** (ith_epoch // 50)
290 |         np_out_train, np_out_eval, au_l_train, au_l_eval = train_autoencoder(hi_train, hi_eval, label_train,
291 |                                                                             label_eval, middle_size, criterion, lambda1, num_epoches)
292 | 
293 |         # print(hi_train.shape)
294 |         knn_acc_1 = knn(hi_train, hi_eval, label_train, label_eval, nn=knn_neighbours)
295 |         knn_acc_au = knn(np_out_train, np_out_eval, au_l_train, au_l_eval, nn=knn_neighbours)
296 |     else:
297 |         knn_acc_1 = knn(hi_train, hi_eval, label_train, label_eval, nn=knn_neighbours)
298 |         knn_acc_au = knn_acc_1
299 | 
300 |     return knn_acc_1, knn_acc_au
301 | 
302 | 
303 | def main():
304 |     args = parser.parse_args()
305 | 
306 |     if args.seed is not None:
307 |         random.seed(args.seed)
308 |         torch.manual_seed(args.seed)
309 |         cudnn.deterministic = True
310 |         warnings.warn('You have chosen to seed training. '
311 |                       'This will turn on the CUDNN deterministic setting, '
312 |                       'which can slow down your training considerably! '
313 |                       'You may see unexpected behavior when restarting '
314 |                       'from checkpoints.')
315 | 
316 |     if args.gpu is not None:
317 |         warnings.warn('You have chosen a specific GPU. This will completely '
318 |                       'disable data parallelism.')
319 | 
320 |     ngpus_per_node = torch.cuda.device_count()
321 |     # Simply call main_worker function
322 |     main_worker(0, ngpus_per_node, args)
323 | 
324 | 
325 | def main_worker(gpu, ngpus_per_node, args):
326 |     global best_acc1
327 |     args.gpu = gpu
328 | 
329 |     if args.gpu is not None:
330 |         print("Use GPU: {} for training".format(args.gpu))
331 | 
332 |     # training dataset
333 |     from options import options_retrieval as options
334 |     if args.finetune_dataset == 'ntu60' and args.protocol == 'cross_view':
335 |         opts = options.opts_ntu_60_cross_view()
336 |     elif args.finetune_dataset == 'ntu60' and args.protocol == 'cross_subject':
337 |         opts = options.opts_ntu_60_cross_subject()
338 |     elif args.finetune_dataset == 'ntu120' and args.protocol == 'cross_setup':
339 |         opts = options.opts_ntu_120_cross_setup()
340 |     elif args.finetune_dataset == 'ntu120' and args.protocol == 'cross_subject':
341 |         opts = options.opts_ntu_120_cross_subject()
342 |     elif args.finetune_dataset == 'pku_v2' and args.protocol == 'cross_view':
343 |         opts = options.opts_pku_v2_cross_view()
344 |     elif args.finetune_dataset == 'pku_v2' and args.protocol == 'cross_subject':
345 |         opts = options.opts_pku_v2_cross_subject()
346 | 
347 |     opts.train_feeder_args['input_representation'] = args.finetune_skeleton_representation
348 |     opts.test_feeder_args['input_representation'] = args.finetune_skeleton_representation
349 | 
350 |     # create model
351 |     print("=> creating model")
352 | 
353 |     model = moco.builder_cmd.MoCo(args.finetune_skeleton_representation, opts.bi_gru_model_args, pretrain=False)
354 |     print("options", opts.agcn_model_args,
355 |           opts.train_feeder_args, opts.test_feeder_args)
356 | 
357 |     if args.pretrained:
358 |         # freeze all layers
359 |         for name, param in model.encoder_q.named_parameters():
360 |             param.requires_grad = False
361 |         for name, param in model.encoder_q_motion.named_parameters():
362 |             param.requires_grad = False
363 |         for name, param in model.encoder_q_bone.named_parameters():
364 |             param.requires_grad = False
365 | 
366 |     # load from pre-trained  model
367 |     load_pretrained(model, args.pretrained)
368 | 
369 |     if args.gpu is not None:
370 |         model = model.cuda()
371 |         model = nn.DataParallel(model, device_ids=None)
372 | 
373 |     cudnn.benchmark = True
374 | 
375 |     # Data loading code
376 | 
377 |     train_dataset = get_finetune_training_set(opts)
378 |     val_dataset = get_finetune_validation_set(opts)
379 | 
380 |     train_sampler = None
381 |     train_loader = torch.utils.data.DataLoader(
382 |         train_dataset, batch_size=args.batch_size, shuffle=(
383 |             train_sampler is None),
384 |         num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=False)
385 | 
386 |     val_loader = torch.utils.data.DataLoader(
387 |         val_dataset,
388 |         batch_size=args.batch_size, shuffle=False,
389 |         num_workers=args.workers, pin_memory=True, drop_last=False)
390 | 
391 |     auto_criterion = nn.MSELoss()
392 |     # Extract frozen features of  the  pre-trained query encoder
393 |     # train and evaluate a KNN  classifier on extracted features
394 |     acc1, acc_au = clustering_knn_acc(model, train_loader, val_loader,
395 |                                       criterion=auto_criterion,
396 |                                       knn_neighbours=args.knn_neighbours)
397 | 
398 |     print(" Knn Without AE= ", acc1, " Knn With AE=", acc_au)
399 | 
400 | 
401 | if __name__ == '__main__':
402 |     main()
403 | 


--------------------------------------------------------------------------------
/action_classification_cmd.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import random
  4 | import shutil
  5 | import time
  6 | import warnings
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.parallel
 11 | import torch.backends.cudnn as cudnn
 12 | import torch.optim
 13 | import torch.utils.data
 14 | import torch.utils.data.distributed
 15 | 
 16 | import moco.builder_cmd
 17 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 18 | 
 19 | # change for action recogniton
 20 | from dataset import get_finetune_training_set,get_finetune_validation_set
 21 | 
 22 | 
 23 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
 24 | parser.add_argument('-j', '--workers', default=32, type=int, metavar='N',
 25 |                     help='number of data loading workers (default: 32)')
 26 | parser.add_argument('--epochs', default=80, type=int, metavar='N',
 27 |                     help='number of total epochs to run')
 28 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
 29 |                     help='manual epoch number (useful on restarts)')
 30 | parser.add_argument('-b', '--batch-size', default=256, type=int,
 31 |                     metavar='N',
 32 |                     help='mini-batch size (default: 256), this is the total '
 33 |                          'batch size of all GPUs on the current node when '
 34 |                          'using Data Parallel or Distributed Data Parallel')
 35 | parser.add_argument('--lr', '--learning-rate', default=30., type=float,
 36 |                     metavar='LR', help='initial learning rate', dest='lr')
 37 | parser.add_argument('--schedule', default=[50, 70,], nargs='*', type=int,
 38 |                     help='learning rate schedule (when to drop lr by a ratio)')
 39 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
 40 |                     help='momentum')
 41 | parser.add_argument('--wd', '--weight-decay', default=0., type=float,
 42 |                     metavar='W', help='weight decay (default: 0.)',
 43 |                     dest='weight_decay')
 44 | parser.add_argument('-p', '--print-freq', default=10, type=int,
 45 |                     metavar='N', help='print frequency (default: 10)')
 46 | parser.add_argument('--resume', default='', type=str, metavar='PATH',
 47 |                     help='path to latest checkpoint (default: none)')
 48 | parser.add_argument('--seed', default=None, type=int,
 49 |                     help='seed for initializing training. ')
 50 | parser.add_argument('--gpu', default=None, type=int,
 51 |                     help='GPU id to use.')
 52 | 
 53 | parser.add_argument('--pretrained', default='', type=str,
 54 |                     help='path to moco pretrained checkpoint')
 55 | parser.add_argument('--finetune-dataset', default='ntu60', type=str,
 56 |                     help='which dataset to use for finetuning')
 57 | 
 58 | parser.add_argument('--protocol', default='cross_view', type=str,
 59 |                     help='traiining protocol of ntu')
 60 | 
 61 | parser.add_argument('--finetune-skeleton-representation', default='graph-based', type=str,
 62 |                     help='which skeleton-representation to use for downstream training')
 63 | parser.add_argument('--pretrain-skeleton-representation', default='graph-based', type=str,
 64 |                     help='which skeleton-representation where used for  pre-training')
 65 | 
 66 | best_acc1 = 0
 67 | 
 68 | # initilize weight
 69 | def weights_init_gru(model):
 70 |     with torch.no_grad():
 71 |         for child in list(model.children()):
 72 |             print("init ",child)
 73 |             for param in list(child.parameters()):
 74 |                   if param.dim() == 2:
 75 |                         nn.init.xavier_uniform_(param)
 76 |     print('PC weight initial finished!')
 77 | 
 78 | def load_pretrained(model, pretrained):
 79 |     if os.path.isfile(pretrained):
 80 |         print("=> loading checkpoint '{}'".format(pretrained))
 81 |         checkpoint = torch.load(pretrained, map_location="cpu")
 82 | 
 83 |         # rename moco pre-trained keys
 84 |         state_dict = checkpoint['state_dict']
 85 |         for k in list(state_dict.keys()):
 86 |             # retain only encoder_q up to before the embedding layer
 87 |             if not k.startswith('encoder_q'):
 88 |                 del state_dict[k]
 89 |             elif '.fc' in k:
 90 |                 del state_dict[k]
 91 |             else:
 92 |                 pass
 93 | 
 94 |         msg = model.load_state_dict(state_dict, strict=False)
 95 |         print("message",msg)
 96 |         assert set(msg.missing_keys) == {"encoder_q.fc.weight", "encoder_q.fc.bias",
 97 |                                          "encoder_q_motion.fc.weight", "encoder_q_motion.fc.bias",
 98 |                                          "encoder_q_bone.fc.weight", "encoder_q_bone.fc.bias"}
 99 | 
100 |         print("=> loaded pre-trained model '{}'".format(pretrained))
101 |     else:
102 |         print("=> no checkpoint found at '{}'".format(pretrained))
103 | 
104 | 
105 | def main():
106 |     args = parser.parse_args()
107 | 
108 |     if args.seed is not None:
109 |         random.seed(args.seed)
110 |         torch.manual_seed(args.seed)
111 |         cudnn.deterministic = True
112 |         warnings.warn('You have chosen to seed training. '
113 |                       'This will turn on the CUDNN deterministic setting, '
114 |                       'which can slow down your training considerably! '
115 |                       'You may see unexpected behavior when restarting '
116 |                       'from checkpoints.')
117 | 
118 |     if args.gpu is not None:
119 |         warnings.warn('You have chosen a specific GPU. This will completely '
120 |                       'disable data parallelism.')
121 |     ngpus_per_node = torch.cuda.device_count()
122 |     # Simply call main_worker function
123 |     main_worker(0, ngpus_per_node, args)
124 | 
125 | 
126 | def main_worker(gpu, ngpus_per_node, args):
127 |     global best_acc1
128 |     args.gpu = gpu
129 | 
130 |     if args.gpu is not None:
131 |         print("Use GPU: {} for training".format(args.gpu))
132 | 
133 |     # create model
134 | 
135 |     # training dataset
136 |     from options  import options_classification as options 
137 |     if args.finetune_dataset== 'ntu60' and args.protocol == 'cross_view':
138 |         opts = options.opts_ntu_60_cross_view()
139 |     elif args.finetune_dataset== 'ntu60' and args.protocol == 'cross_subject':
140 |         opts = options.opts_ntu_60_cross_subject()
141 |     elif args.finetune_dataset== 'ntu120' and args.protocol == 'cross_setup':
142 |         opts = options.opts_ntu_120_cross_setup()
143 |     elif args.finetune_dataset== 'ntu120' and args.protocol == 'cross_subject':
144 |         opts = options.opts_ntu_120_cross_subject()
145 |     elif args.finetune_dataset== 'pku_v2' and args.protocol == 'cross_view':
146 |         opts = options.opts_pku_v2_cross_view()
147 |     elif args.finetune_dataset== 'pku_v2' and args.protocol == 'cross_subject':
148 |         opts = options.opts_pku_v2_cross_subject()
149 | 
150 |     opts.train_feeder_args['input_representation'] = args.finetune_skeleton_representation
151 |     opts.test_feeder_args['input_representation'] = args.finetune_skeleton_representation
152 | 
153 |     # create model
154 |     print("=> creating model")
155 | 
156 |     model = moco.builder_cmd.MoCo(args.finetune_skeleton_representation, opts.bi_gru_model_args, pretrain=False)
157 |     print("options", opts.agcn_model_args, opts.train_feeder_args, opts.test_feeder_args)
158 | 
159 |     if args.pretrained:
160 |         # freeze all layers but the last fc
161 |         for name, param in model.encoder_q.named_parameters():
162 |             if name not in ['fc.weight', 'fc.bias']:
163 |                 param.requires_grad = False
164 |             else:
165 |                 print('params',name)
166 |         for name, param in model.encoder_q_motion.named_parameters():
167 |             if name not in ['fc.weight', 'fc.bias']:
168 |                 param.requires_grad = False
169 |             else:
170 |                 print('params',name)
171 |         for name, param in model.encoder_q_bone.named_parameters():
172 |             if name not in ['fc.weight', 'fc.bias']:
173 |                 param.requires_grad = False
174 |             else:
175 |                 print('params',name)
176 | 
177 |         # init the fc layer
178 |         model.encoder_q.fc.weight.data.normal_(mean=0.0, std=0.01)
179 |         model.encoder_q.fc.bias.data.zero_()
180 |         model.encoder_q_motion.fc.weight.data.normal_(mean=0.0, std=0.01)
181 |         model.encoder_q_motion.fc.bias.data.zero_()
182 |         model.encoder_q_bone.fc.weight.data.normal_(mean=0.0, std=0.01)
183 |         model.encoder_q_bone.fc.bias.data.zero_()
184 | 
185 |     # load from pre-trained model
186 |     load_pretrained(model, args.pretrained)
187 | 
188 |     if args.gpu is not None:
189 |         model = model.cuda()
190 |         model = nn.DataParallel(model, device_ids=None)
191 | 
192 |     # define loss function (criterion) and optimizer
193 |     criterion = nn.CrossEntropyLoss().cuda(args.gpu)
194 | 
195 |     # optimize only the linear classifier
196 |     parameters = list(filter(lambda p: p.requires_grad, model.parameters()))
197 |     if args.pretrained:
198 |           assert len(parameters) == 6  # (fc.weight, fc.bias) * 3
199 |     optimizer = torch.optim.SGD(parameters, args.lr,
200 |                                 momentum=args.momentum,
201 |                                 weight_decay=args.weight_decay)
202 |     if True:
203 |           for parm in optimizer.param_groups:
204 |                     print ("optimize parameters lr",parm['lr'])
205 | 
206 |     # optionally resume from a checkpoint
207 |     if args.resume:
208 |         if os.path.isfile(args.resume):
209 |             print("=> loading checkpoint '{}'".format(args.resume))
210 |             if args.gpu is None:
211 |                 checkpoint = torch.load(args.resume)
212 |             else:
213 |                 # Map model to be loaded to specified single gpu.
214 |                 loc = 'cuda:{}'.format(args.gpu)
215 |                 checkpoint = torch.load(args.resume, map_location=loc)
216 |             args.start_epoch = checkpoint['epoch']
217 |             best_acc1 = checkpoint['best_acc1']
218 |             if args.gpu is not None:
219 |                 # best_acc1 may be from a checkpoint from a different GPU
220 |                 best_acc1 = best_acc1.to(args.gpu)
221 |             model.load_state_dict(checkpoint['state_dict'])
222 |             optimizer.load_state_dict(checkpoint['optimizer'])
223 |             print("=> loaded checkpoint '{}' (epoch {})"
224 |                   .format(args.resume, checkpoint['epoch']))
225 |         else:
226 |             print("=> no checkpoint found at '{}'".format(args.resume))
227 | 
228 |     cudnn.benchmark = True
229 | 
230 |     ## Data loading code
231 | 
232 |     train_dataset = get_finetune_training_set(opts)
233 |     val_dataset = get_finetune_validation_set(opts)
234 | 
235 |     train_sampler = None
236 | 
237 |     train_loader = torch.utils.data.DataLoader(
238 |         train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
239 |         num_workers=args.workers, pin_memory=True, sampler=train_sampler,drop_last=False)
240 | 
241 | 
242 |     val_loader = torch.utils.data.DataLoader(
243 |         val_dataset,
244 |         batch_size=args.batch_size, shuffle=False,
245 |         num_workers=args.workers, pin_memory=True,drop_last=False)
246 | 
247 | 
248 |     for epoch in range(args.start_epoch, args.epochs):
249 | 
250 |         adjust_learning_rate(optimizer, epoch, args)
251 | 
252 |         # train for one epoch
253 |         train(train_loader, model, criterion, optimizer, epoch, args)
254 | 
255 |         # evaluate on validation set
256 |         if (epoch+1) % 5 == 0:
257 |             acc1 = validate(val_loader, model, criterion, args)
258 |         else:
259 |             acc1 = 0
260 | 
261 | 
262 |         # remember best acc@1 and save checkpoint
263 |         is_best = acc1 > best_acc1
264 |         if is_best:
265 |               print("found new best accuracy:= ",acc1)
266 |               best_acc1 = max(acc1, best_acc1)
267 | 
268 |               save_checkpoint({
269 |                 'epoch': epoch + 1,
270 |                 'state_dict': model.state_dict(),
271 |                 'best_acc1': best_acc1,
272 |                 'optimizer' : optimizer.state_dict(),
273 |             }, is_best,filename = args.finetune_skeleton_representation + '_checkpoint.pth.tar' )
274 | 
275 |         # sanity check 
276 |         if epoch == args.start_epoch:
277 |             sanity_check(model.state_dict(), args.pretrained)
278 |     print("Final  best accuracy",best_acc1)
279 | 
280 | 
281 | def train(train_loader, model, criterion, optimizer, epoch, args):
282 |     batch_time = AverageMeter('Time', ':6.3f')
283 |     data_time = AverageMeter('Data', ':6.3f')
284 |     losses = AverageMeter('Loss', ':.4e')
285 |     top1 = AverageMeter('Acc@1', ':6.2f')
286 |     top5 = AverageMeter('Acc@5', ':6.2f')
287 |     progress = ProgressMeter(
288 |         len(train_loader),
289 |         [batch_time, data_time, losses, top1, top5],
290 |         prefix="Epoch: [{}]".format(epoch))
291 | 
292 |     """
293 |     Switch to eval mode:
294 |     Under the protocol of linear classification on frozen features/models,
295 |     it is not legitimate to change any part of the pre-trained model.
296 |     BatchNorm in train mode may revise running mean/std (even if it receives
297 |     no gradient), which are part of the model parameters too.
298 |     """
299 |     model.eval()
300 | 
301 |     end = time.time()
302 |     for i, (images, target) in enumerate(train_loader):
303 |         # measure data loading time
304 |         data_time.update(time.time() - end)
305 | 
306 |         if args.gpu is not None:
307 |             images = images.cuda(args.gpu, non_blocking=True)
308 |         target = target.cuda(args.gpu, non_blocking=True)
309 | 
310 | 
311 |         # compute output
312 |         output = model(images)
313 |         loss = criterion(output, target)
314 | 
315 |         # measure accuracy and record loss
316 |         acc1, acc5 = accuracy(output, target, topk=(1, 5))
317 |         losses.update(loss.item(), images.size(0))
318 |         top1.update(acc1[0], images.size(0))
319 |         top5.update(acc5[0], images.size(0))
320 | 
321 |         # compute gradient and do SGD step
322 |         optimizer.zero_grad()
323 |         loss.backward()
324 |         optimizer.step()
325 | 
326 |         # measure elapsed time
327 |         batch_time.update(time.time() - end)
328 |         end = time.time()
329 | 
330 |         if i % args.print_freq == 0:
331 |             progress.display(i)
332 | 
333 | 
334 | def validate(val_loader, model, criterion, args):
335 |     batch_time = AverageMeter('Time', ':6.3f')
336 |     losses = AverageMeter('Loss', ':.4e')
337 |     top1 = AverageMeter('Acc@1', ':6.2f')
338 |     top5 = AverageMeter('Acc@5', ':6.2f')
339 |     progress = ProgressMeter(
340 |         len(val_loader),
341 |         [batch_time, losses, top1, top5],
342 |         prefix='Test: ')
343 | 
344 |     # switch to evaluate mode
345 |     model.eval()
346 | 
347 |     with torch.no_grad():
348 |         end = time.time()
349 |         for i, (images, target) in enumerate(val_loader):
350 |             if args.gpu is not None:
351 |                 images = images.cuda(args.gpu, non_blocking=True)
352 |             target = target.cuda(args.gpu, non_blocking=True)
353 | 
354 |             # compute output
355 |             output = model(images)
356 |             loss = criterion(output, target)
357 | 
358 |             # measure accuracy and record loss
359 |             acc1, acc5 = accuracy(output, target, topk=(1, 5))
360 |             losses.update(loss.item(), images.size(0))
361 |             top1.update(acc1[0], images.size(0))
362 |             top5.update(acc5[0], images.size(0))
363 | 
364 |             # measure elapsed time
365 |             batch_time.update(time.time() - end)
366 |             end = time.time()
367 | 
368 |             if i % args.print_freq == 0:
369 |                 progress.display(i)
370 | 
371 |         # TODO: this should also be done with the ProgressMeter
372 |         print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
373 |               .format(top1=top1, top5=top5))
374 | 
375 |     return top1.avg
376 | 
377 | 
378 | def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
379 |     torch.save(state, filename)
380 |     if is_best:
381 |         shutil.copyfile(filename, filename+'model_best.pth.tar')
382 | 
383 | 
384 | def sanity_check(state_dict, pretrained_weights):
385 |     """
386 |     Linear classifier should not change any weights other than the linear layer.
387 |     This sanity check asserts nothing wrong happens (e.g., BN stats updated).
388 |     """
389 |     print("=> loading '{}' for sanity check".format(pretrained_weights))
390 |     checkpoint = torch.load(pretrained_weights, map_location="cpu")
391 |     state_dict_pre = checkpoint['state_dict']
392 | 
393 |     for k in list(state_dict.keys()):
394 |         # only ignore fc layer
395 |         if 'fc.weight' in k or 'fc.bias' in k:
396 |             continue
397 | 
398 |         # name in pretrained model
399 |         k_pre = k[len('module.'):] if k.startswith('module.') else k
400 | 
401 |         assert ((state_dict[k].cpu() == state_dict_pre[k_pre]).all()), \
402 |             '{} is changed in linear classifier training.'.format(k)
403 | 
404 |     print("=> sanity check passed.")
405 | 
406 | 
407 | class AverageMeter(object):
408 |     """Computes and stores the average and current value"""
409 |     def __init__(self, name, fmt=':f'):
410 |         self.name = name
411 |         self.fmt = fmt
412 |         self.reset()
413 | 
414 |     def reset(self):
415 |         self.val = 0
416 |         self.avg = 0
417 |         self.sum = 0
418 |         self.count = 0
419 | 
420 |     def update(self, val, n=1):
421 |         self.val = val
422 |         self.sum += val * n
423 |         self.count += n
424 |         self.avg = self.sum / self.count
425 | 
426 |     def __str__(self):
427 |         fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
428 |         return fmtstr.format(**self.__dict__)
429 | 
430 | 
431 | class ProgressMeter(object):
432 |     def __init__(self, num_batches, meters, prefix=""):
433 |         self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
434 |         self.meters = meters
435 |         self.prefix = prefix
436 | 
437 |     def display(self, batch):
438 |         entries = [self.prefix + self.batch_fmtstr.format(batch)]
439 |         entries += [str(meter) for meter in self.meters]
440 |         print('\t'.join(entries))
441 | 
442 |     def _get_batch_fmtstr(self, num_batches):
443 |         num_digits = len(str(num_batches // 1))
444 |         fmt = '{:' + str(num_digits) + 'd}'
445 |         return '[' + fmt + '/' + fmt.format(num_batches) + ']'
446 | 
447 | 
448 | def adjust_learning_rate(optimizer, epoch, args):
449 |     """Decay the learning rate based on schedule"""
450 |     lr = args.lr
451 |     for milestone in args.schedule:
452 |         lr *= 0.1 if epoch >= milestone else 1.
453 |     for param_group in optimizer.param_groups:
454 |         param_group['lr'] = lr
455 | 
456 | 
457 | def accuracy(output, target, topk=(1,)):
458 |     """Computes the accuracy over the k top predictions for the specified values of k"""
459 |     with torch.no_grad():
460 |         maxk = max(topk)
461 |         batch_size = target.size(0)
462 | 
463 |         _, pred = output.topk(maxk, 1, True, True)
464 |         pred = pred.t()
465 |         correct = pred.eq(target.view(1, -1).expand_as(pred))
466 | 
467 |         res = []
468 |         for k in topk:
469 |             correct_k = correct[:k].contiguous().view(-1).float().sum(0, keepdim=True)
470 |             res.append(correct_k.mul_(100.0 / batch_size))
471 |         return res
472 | 
473 | 
474 | if __name__ == '__main__':
475 |     main()
476 | 


--------------------------------------------------------------------------------
/action_classification_cmd_semi.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import random
  4 | import shutil
  5 | import time
  6 | import warnings
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.parallel
 11 | import torch.backends.cudnn as cudnn
 12 | import torch.optim
 13 | import torch.utils.data
 14 | import torch.utils.data.distributed
 15 | 
 16 | import moco.builder_cmd
 17 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 18 | 
 19 | # change for action recogniton
 20 | from dataset import get_finetune_training_set_semi_supervised, get_finetune_validation_set_semi_supervised
 21 | 
 22 | 
 23 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
 24 | parser.add_argument('-j', '--workers', default=32, type=int, metavar='N',
 25 |                     help='number of data loading workers (default: 32)')
 26 | parser.add_argument('--epochs', default=80, type=int, metavar='N',
 27 |                     help='number of total epochs to run')
 28 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
 29 |                     help='manual epoch number (useful on restarts)')
 30 | parser.add_argument('-b', '--batch-size', default=256, type=int,
 31 |                     metavar='N',
 32 |                     help='mini-batch size (default: 256), this is the total '
 33 |                          'batch size of all GPUs on the current node when '
 34 |                          'using Data Parallel or Distributed Data Parallel')
 35 | parser.add_argument('--lr', '--learning-rate', default=30., type=float,
 36 |                     metavar='LR', help='initial learning rate', dest='lr')
 37 | parser.add_argument('--schedule', default=[50, 70,], nargs='*', type=int,
 38 |                     help='learning rate schedule (when to drop lr by a ratio)')
 39 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
 40 |                     help='momentum')
 41 | parser.add_argument('--wd', '--weight-decay', default=0., type=float,
 42 |                     metavar='W', help='weight decay (default: 0.)',
 43 |                     dest='weight_decay')
 44 | parser.add_argument('-p', '--print-freq', default=10, type=int,
 45 |                     metavar='N', help='print frequency (default: 10)')
 46 | parser.add_argument('--resume', default='', type=str, metavar='PATH',
 47 |                     help='path to latest checkpoint (default: none)')
 48 | parser.add_argument('--seed', default=None, type=int,
 49 |                     help='seed for initializing training. ')
 50 | parser.add_argument('--gpu', default=None, type=int,
 51 |                     help='GPU id to use.')
 52 | 
 53 | parser.add_argument('--pretrained', default='', type=str,
 54 |                     help='path to moco pretrained checkpoint')
 55 | parser.add_argument('--finetune-dataset', default='ntu60', type=str,
 56 |                     help='which dataset to use for finetuning')
 57 | 
 58 | parser.add_argument('--protocol', default='cross_view', type=str,
 59 |                     help='training protocol')
 60 | 
 61 | parser.add_argument('--data-ratio', default=0.2, type=float,
 62 |                     help='ratio of training data used in semi-supervised setting')
 63 | 
 64 | parser.add_argument('--finetune-skeleton-representation', default='graph-based', type=str,
 65 |                     help='which skeleton-representation to use for downstream training')
 66 | parser.add_argument('--pretrain-skeleton-representation', default='graph-based', type=str,
 67 |                     help='which skeleton-representation where used for  pre-training')
 68 | 
 69 | best_acc1 = 0
 70 | 
 71 | # initilize weight
 72 | def weights_init_gru(model):
 73 |     with torch.no_grad():
 74 |         for child in list(model.children()):
 75 |             print("init ",child)
 76 |             for param in list(child.parameters()):
 77 |                   if param.dim() == 2:
 78 |                         nn.init.xavier_uniform_(param)
 79 |     print('PC weight initial finished!')
 80 | 
 81 | def load_pretrained(model, pretrained):
 82 |     if os.path.isfile(pretrained):
 83 |         print("=> loading checkpoint '{}'".format(pretrained))
 84 |         checkpoint = torch.load(pretrained, map_location="cpu")
 85 | 
 86 |         # rename moco pre-trained keys
 87 |         state_dict = checkpoint['state_dict']
 88 |         for k in list(state_dict.keys()):
 89 |             # retain only encoder_q up to before the embedding layer
 90 |             if not k.startswith('encoder_q'):
 91 |                 del state_dict[k]
 92 |             elif '.fc' in k:
 93 |                 del state_dict[k]
 94 |             else:
 95 |                 pass
 96 | 
 97 |         msg = model.load_state_dict(state_dict, strict=False)
 98 |         print("message",msg)
 99 |         assert set(msg.missing_keys) == {"encoder_q.fc.weight", "encoder_q.fc.bias",
100 |                                          "encoder_q_motion.fc.weight", "encoder_q_motion.fc.bias",
101 |                                          "encoder_q_bone.fc.weight", "encoder_q_bone.fc.bias"}
102 | 
103 |         print("=> loaded pre-trained model '{}'".format(pretrained))
104 |     else:
105 |         print("=> no checkpoint found at '{}'".format(pretrained))
106 | 
107 | 
108 | def main():
109 |     args = parser.parse_args()
110 | 
111 |     if args.seed is not None:
112 |         random.seed(args.seed)
113 |         torch.manual_seed(args.seed)
114 |         cudnn.deterministic = True
115 |         warnings.warn('You have chosen to seed training. '
116 |                       'This will turn on the CUDNN deterministic setting, '
117 |                       'which can slow down your training considerably! '
118 |                       'You may see unexpected behavior when restarting '
119 |                       'from checkpoints.')
120 | 
121 |     if args.gpu is not None:
122 |         warnings.warn('You have chosen a specific GPU. This will completely '
123 |                       'disable data parallelism.')
124 |     ngpus_per_node = torch.cuda.device_count()
125 |     # Simply call main_worker function
126 |     main_worker(0, ngpus_per_node, args)
127 | 
128 | 
129 | def main_worker(gpu, ngpus_per_node, args):
130 |     global best_acc1
131 |     args.gpu = gpu
132 | 
133 |     if args.gpu is not None:
134 |         print("Use GPU: {} for training".format(args.gpu))
135 | 
136 |     # create model
137 | 
138 |     # training dataset
139 |     from options  import options_classification as options 
140 |     if args.finetune_dataset== 'ntu60' and args.protocol == 'cross_view':
141 |         opts = options.opts_ntu_60_cross_view()
142 |     elif args.finetune_dataset== 'ntu60' and args.protocol == 'cross_subject':
143 |         opts = options.opts_ntu_60_cross_subject()
144 |     elif args.finetune_dataset== 'ntu120' and args.protocol == 'cross_setup':
145 |         opts = options.opts_ntu_120_cross_setup()
146 |     elif args.finetune_dataset== 'ntu120' and args.protocol == 'cross_subject':
147 |         opts = options.opts_ntu_120_cross_subject()
148 |     elif args.finetune_dataset== 'pku_v2' and args.protocol == 'cross_view':
149 |         opts = options.opts_pku_v2_cross_view()
150 |     elif args.finetune_dataset== 'pku_v2' and args.protocol == 'cross_subject':
151 |         opts = options.opts_pku_v2_cross_subject()
152 |     elif args.finetune_dataset== 'ntu60' and args.protocol == 'cross_view_semi':
153 |         opts = options.opts_ntu_60_cross_view()
154 |     elif args.finetune_dataset== 'ntu60' and args.protocol == 'cross_subject_semi':
155 |         opts = options.opts_ntu_60_cross_subject()
156 |     elif args.finetune_dataset== 'pku_v2' and args.protocol == 'cross_subject_semi':
157 |         opts = options.opts_pku_v2_cross_subject()
158 | 
159 |     opts.train_feeder_args['input_representation'] = args.finetune_skeleton_representation
160 |     opts.test_feeder_args['input_representation'] = args.finetune_skeleton_representation
161 | 
162 |     if 'semi' in args.protocol:
163 |         opts.train_feeder_args['data_ratio'] = args.data_ratio
164 |     # create model
165 |     print("=> creating model")
166 | 
167 |     model = moco.builder_cmd.MoCo(args.finetune_skeleton_representation, opts.bi_gru_model_args, pretrain=False)
168 |     print("options", opts.agcn_model_args, opts.train_feeder_args, opts.test_feeder_args)
169 | 
170 |     if args.pretrained:
171 |         # init the fc layer
172 |         model.encoder_q.fc.weight.data.normal_(mean=0.0, std=0.01)
173 |         model.encoder_q.fc.bias.data.zero_()
174 |         model.encoder_q_motion.fc.weight.data.normal_(mean=0.0, std=0.01)
175 |         model.encoder_q_motion.fc.bias.data.zero_()
176 |         model.encoder_q_bone.fc.weight.data.normal_(mean=0.0, std=0.01)
177 |         model.encoder_q_bone.fc.bias.data.zero_()
178 | 
179 |     # load from pre-trained model
180 |     load_pretrained(model, args.pretrained)
181 | 
182 |     if args.gpu is not None:
183 |         model = model.cuda()
184 | 
185 |     # define loss function (criterion) and optimizer
186 |     criterion = nn.CrossEntropyLoss().cuda(args.gpu)
187 | 
188 |     optimizer = torch.optim.SGD(model.parameters(),
189 |                                 lr=args.lr,
190 |                                 momentum=args.momentum,
191 |                                 weight_decay=args.weight_decay)
192 | 
193 |     if True:
194 |           for parm in optimizer.param_groups:
195 |                     print ("optimize parameters lr",parm['lr'])
196 | 
197 |     # optionally resume from a checkpoint
198 |     if args.resume:
199 |         if os.path.isfile(args.resume):
200 |             print("=> loading checkpoint '{}'".format(args.resume))
201 |             if args.gpu is None:
202 |                 checkpoint = torch.load(args.resume)
203 |             else:
204 |                 # Map model to be loaded to specified single gpu.
205 |                 loc = 'cuda:{}'.format(args.gpu)
206 |                 checkpoint = torch.load(args.resume, map_location=loc)
207 |             args.start_epoch = checkpoint['epoch']
208 |             best_acc1 = checkpoint['best_acc1']
209 |             if args.gpu is not None:
210 |                 # best_acc1 may be from a checkpoint from a different GPU
211 |                 best_acc1 = best_acc1.to(args.gpu)
212 |             model.load_state_dict(checkpoint['state_dict'])
213 |             optimizer.load_state_dict(checkpoint['optimizer'])
214 |             print("=> loaded checkpoint '{}' (epoch {})"
215 |                   .format(args.resume, checkpoint['epoch']))
216 |         else:
217 |             print("=> no checkpoint found at '{}'".format(args.resume))
218 | 
219 |     cudnn.benchmark = True
220 | 
221 |     ## Data loading code
222 | 
223 |     train_dataset = get_finetune_training_set_semi_supervised(opts)
224 |     val_dataset = get_finetune_validation_set_semi_supervised(opts)
225 | 
226 |     train_sampler = None
227 | 
228 |     train_loader = torch.utils.data.DataLoader(
229 |         train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
230 |         num_workers=args.workers, pin_memory=True, sampler=train_sampler,drop_last=False)
231 | 
232 | 
233 |     val_loader = torch.utils.data.DataLoader(
234 |         val_dataset,
235 |         batch_size=args.batch_size, shuffle=False,
236 |         num_workers=args.workers, pin_memory=True,drop_last=False)
237 | 
238 | 
239 |     for epoch in range(args.start_epoch, args.epochs):
240 | 
241 |         adjust_learning_rate(optimizer, epoch, args)
242 | 
243 |         # train for one epoch
244 |         train(train_loader, model, criterion, optimizer, epoch, args)
245 | 
246 |         # evaluate on validation set
247 |         if (epoch+1) % 5 == 0:
248 |             acc1 = validate(val_loader, model, criterion, args)
249 |         else:
250 |             acc1 = 0
251 | 
252 | 
253 |         # remember best acc@1 and save checkpoint
254 |         is_best = acc1 > best_acc1
255 |         if is_best:
256 |               print("found new best accuracy:= ",acc1)
257 |               best_acc1 = max(acc1, best_acc1)
258 | 
259 |               save_checkpoint({
260 |                 'epoch': epoch + 1,
261 |                 'state_dict': model.state_dict(),
262 |                 'best_acc1': best_acc1,
263 |                 'optimizer' : optimizer.state_dict(),
264 |             }, is_best,filename = args.finetune_skeleton_representation + '_checkpoint.pth.tar' )
265 | 
266 |     print("Final  best accuracy",best_acc1)
267 | 
268 | 
269 | def train(train_loader, model, criterion, optimizer, epoch, args):
270 |     batch_time = AverageMeter('Time', ':6.3f')
271 |     data_time = AverageMeter('Data', ':6.3f')
272 |     losses = AverageMeter('Loss', ':.4e')
273 |     top1 = AverageMeter('Acc@1', ':6.2f')
274 |     top5 = AverageMeter('Acc@5', ':6.2f')
275 |     progress = ProgressMeter(
276 |         len(train_loader),
277 |         [batch_time, data_time, losses, top1, top5],
278 |         prefix="Epoch: [{}]".format(epoch))
279 | 
280 |     """
281 |     Switch to eval mode:
282 |     Under the protocol of linear classification on frozen features/models,
283 |     it is not legitimate to change any part of the pre-trained model.
284 |     BatchNorm in train mode may revise running mean/std (even if it receives
285 |     no gradient), which are part of the model parameters too.
286 |     """
287 |     model.train()
288 | 
289 |     end = time.time()
290 |     for i, (images, target) in enumerate(train_loader):
291 |         # measure data loading time
292 |         data_time.update(time.time() - end)
293 | 
294 |         if args.gpu is not None:
295 |             images = images.cuda(args.gpu, non_blocking=True)
296 |         target = target.cuda(args.gpu, non_blocking=True).long()
297 | 
298 | 
299 |         # compute output
300 |         output = model(images, view='joint')
301 |         loss = criterion(output, target)
302 | 
303 |         # measure accuracy and record loss
304 |         acc1, acc5 = accuracy(output, target, topk=(1, 5))
305 |         losses.update(loss.item(), images.size(0))
306 |         top1.update(acc1[0], images.size(0))
307 |         top5.update(acc5[0], images.size(0))
308 | 
309 |         # compute gradient and do SGD step
310 |         optimizer.zero_grad()
311 |         loss.backward()
312 |         optimizer.step()
313 | 
314 |         # measure elapsed time
315 |         batch_time.update(time.time() - end)
316 |         end = time.time()
317 | 
318 |         if i % args.print_freq == 0:
319 |             progress.display(i)
320 | 
321 | 
322 | def validate(val_loader, model, criterion, args):
323 |     batch_time = AverageMeter('Time', ':6.3f')
324 |     losses = AverageMeter('Loss', ':.4e')
325 |     top1 = AverageMeter('Acc@1', ':6.2f')
326 |     top5 = AverageMeter('Acc@5', ':6.2f')
327 |     progress = ProgressMeter(
328 |         len(val_loader),
329 |         [batch_time, losses, top1, top5],
330 |         prefix='Test: ')
331 | 
332 |     # switch to evaluate mode
333 |     model.eval()
334 | 
335 |     with torch.no_grad():
336 |         end = time.time()
337 |         for i, (images, target) in enumerate(val_loader):
338 |             if args.gpu is not None:
339 |                 images = images.cuda(args.gpu, non_blocking=True)
340 |             target = target.cuda(args.gpu, non_blocking=True).long()
341 | 
342 |             # compute output
343 |             output = model(images, view='joint')
344 |             loss = criterion(output, target)
345 | 
346 |             # measure accuracy and record loss
347 |             acc1, acc5 = accuracy(output, target, topk=(1, 5))
348 |             losses.update(loss.item(), images.size(0))
349 |             top1.update(acc1[0], images.size(0))
350 |             top5.update(acc5[0], images.size(0))
351 | 
352 |             # measure elapsed time
353 |             batch_time.update(time.time() - end)
354 |             end = time.time()
355 | 
356 |             if i % args.print_freq == 0:
357 |                 progress.display(i)
358 | 
359 |         # TODO: this should also be done with the ProgressMeter
360 |         print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
361 |               .format(top1=top1, top5=top5))
362 | 
363 |     return top1.avg
364 | 
365 | 
366 | def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
367 |     torch.save(state, filename)
368 |     if is_best:
369 |         shutil.copyfile(filename, filename+'model_best.pth.tar')
370 | 
371 | 
372 | def sanity_check(state_dict, pretrained_weights):
373 |     """
374 |     Linear classifier should not change any weights other than the linear layer.
375 |     This sanity check asserts nothing wrong happens (e.g., BN stats updated).
376 |     """
377 |     print("=> loading '{}' for sanity check".format(pretrained_weights))
378 |     checkpoint = torch.load(pretrained_weights, map_location="cpu")
379 |     state_dict_pre = checkpoint['state_dict']
380 | 
381 |     for k in list(state_dict.keys()):
382 |         # only ignore fc layer
383 |         if 'fc.weight' in k or 'fc.bias' in k:
384 |             continue
385 | 
386 |         # name in pretrained model
387 |         k_pre = k[len('module.'):] if k.startswith('module.') else k
388 | 
389 |         assert ((state_dict[k].cpu() == state_dict_pre[k_pre]).all()), \
390 |             '{} is changed in linear classifier training.'.format(k)
391 | 
392 |     print("=> sanity check passed.")
393 | 
394 | 
395 | class AverageMeter(object):
396 |     """Computes and stores the average and current value"""
397 |     def __init__(self, name, fmt=':f'):
398 |         self.name = name
399 |         self.fmt = fmt
400 |         self.reset()
401 | 
402 |     def reset(self):
403 |         self.val = 0
404 |         self.avg = 0
405 |         self.sum = 0
406 |         self.count = 0
407 | 
408 |     def update(self, val, n=1):
409 |         self.val = val
410 |         self.sum += val * n
411 |         self.count += n
412 |         self.avg = self.sum / self.count
413 | 
414 |     def __str__(self):
415 |         fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
416 |         return fmtstr.format(**self.__dict__)
417 | 
418 | 
419 | class ProgressMeter(object):
420 |     def __init__(self, num_batches, meters, prefix=""):
421 |         self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
422 |         self.meters = meters
423 |         self.prefix = prefix
424 | 
425 |     def display(self, batch):
426 |         entries = [self.prefix + self.batch_fmtstr.format(batch)]
427 |         entries += [str(meter) for meter in self.meters]
428 |         print('\t'.join(entries))
429 | 
430 |     def _get_batch_fmtstr(self, num_batches):
431 |         num_digits = len(str(num_batches // 1))
432 |         fmt = '{:' + str(num_digits) + 'd}'
433 |         return '[' + fmt + '/' + fmt.format(num_batches) + ']'
434 | 
435 | 
436 | def adjust_learning_rate(optimizer, epoch, args):
437 |     """Decay the learning rate based on schedule"""
438 |     lr = args.lr
439 |     for milestone in args.schedule:
440 |         lr *= 0.1 if epoch >= milestone else 1.
441 |     for index, param_group in enumerate(optimizer.param_groups):
442 |         param_group['lr'] = lr
443 |     
444 | 
445 | 
446 | 
447 | def accuracy(output, target, topk=(1,)):
448 |     """Computes the accuracy over the k top predictions for the specified values of k"""
449 |     with torch.no_grad():
450 |         maxk = max(topk)
451 |         batch_size = target.size(0)
452 | 
453 |         _, pred = output.topk(maxk, 1, True, True)
454 |         pred = pred.t()
455 |         correct = pred.eq(target.view(1, -1).expand_as(pred))
456 | 
457 |         res = []
458 |         for k in topk:
459 |             correct_k = correct[:k].contiguous().view(-1).float().sum(0, keepdim=True)
460 |             res.append(correct_k.mul_(100.0 / batch_size))
461 |         return res
462 | 
463 | 
464 | if __name__ == '__main__':
465 |     main()
466 | 


--------------------------------------------------------------------------------