├── action_recognition
    ├── model
    │   ├── __init__.py
    │   ├── agcn.py
    │   ├── agcn_mult.py
    │   └── aagcn.py
    ├── feeders
    │   ├── __init__.py
    │   ├── tools.py
    │   └── feeder.py
    ├── graph
    │   ├── __init__.py
    │   ├── tools.py
    │   ├── ntu_rgb_d.py
    │   └── kinetics.py
    ├── config
    │   └── babel_v1.0
    │   │   ├── test_60.yaml
    │   │   ├── test_120.yaml
    │   │   ├── test_60_wfl.yaml
    │   │   ├── test_120_wfl.yaml
    │   │   ├── train_60.yaml
    │   │   ├── train_120.yaml
    │   │   ├── train_60_wfl.yaml
    │   │   └── train_120_wfl.yaml
    ├── data_gen
    │   ├── rotation.py
    │   ├── preprocess.py
    │   ├── dutils.py
    │   ├── viz.py
    │   └── create_dataset.py
    ├── data
    │   └── action_label_2_idx.json
    ├── class_balanced_loss.py
    ├── challenge
    │   └── create_submission.py
    ├── Readme.md
    └── train_test.py
├── .gitignore
├── notebooks
    ├── Readme.md
    ├── BABEL_explore.ipynb
    └── BABEL_visualization.ipynb
├── requirements.txt
└── Readme.md


/action_recognition/model/__init__.py:
--------------------------------------------------------------------------------
1 | from . import agcn, aagcn
2 | 


--------------------------------------------------------------------------------
/action_recognition/feeders/__init__.py:
--------------------------------------------------------------------------------
1 | from . import tools
2 | from . import feeder
3 | 


--------------------------------------------------------------------------------
/action_recognition/graph/__init__.py:
--------------------------------------------------------------------------------
1 | from . import tools
2 | from . import ntu_rgb_d
3 | from . import kinetics
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # System
 2 | babel-env
 3 | 
 4 | # Temp files
 5 | *.DS_Store
 6 | *.swp
 7 | __pycache__
 8 | notebooks/.ipynb_checkpoints
 9 |           	
10 | # Data    	
11 | data/babel_v1.0_release
12 | action_recognition/data/release
13 | 
14 | # Predictions
15 | action_recognition/challenge/*.pkl
16 | action_recognition/challenge/*.npz
17 | ckpts     	
18 |           	
19 | # Logging 	
20 | wandb
21 | runs
22 | work_dir
23 | 


--------------------------------------------------------------------------------
/notebooks/Readme.md:
--------------------------------------------------------------------------------
 1 | ### Load and visualize BABEL
 2 | 
 3 | [`BABEL_visualization.ipynb`](BABEL_visualization.ipynb) contains code that demonstrates how to:
 4 | - Load the BABEL dataset 
 5 | - Visualize rendered videos of mocap sequences
 6 | - Visualize their action labels
 7 | 
 8 | 
 9 | ### Explore BABEL
10 | 
11 | [`BABEL_explore.ipynb`](BABEL_explore.ipynb) contains code that shows how to:
12 | - Compute stats. from BABEL (e.g., duration of labeled mocap)
13 | - Search BABEL for mocap sequences containing a specific action, and retrieve their annotations. 


--------------------------------------------------------------------------------
/action_recognition/config/babel_v1.0/test_60.yaml:
--------------------------------------------------------------------------------
 1 | # feeder
 2 | feeder: feeders.feeder.Feeder
 3 | test_feeder_args:
 4 |   data_path: ./data/release/val_ntu_sk_60.npy
 5 |   label_path: ./data/release/val_label_60.pkl
 6 |   debug: False
 7 | 
 8 | # model
 9 | model: model.agcn.Model
10 | model_args:
11 |   num_class: 60
12 |   num_point: 25
13 |   num_person: 1
14 |   graph: graph.ntu_rgb_d.Graph
15 |   graph_args:
16 |     labeling_mode: 'spatial'
17 | 
18 | # test
19 | phase: test
20 | device: [0]
21 | test_batch_size: 128
22 | weights: ./ckpts/ntu_sk_60_agcn_joint_const_lr_1e-3-17-6390.pt
23 | 
24 | work_dir: ./work_dir/babel_v1.0/test_runs/test_ntu_sk_60_agcn_joint_const_lr_1e-3
25 | save_score: True
26 | 


--------------------------------------------------------------------------------
/action_recognition/config/babel_v1.0/test_120.yaml:
--------------------------------------------------------------------------------
 1 | # feeder
 2 | feeder: feeders.feeder.Feeder
 3 | test_feeder_args:
 4 |   data_path: ./data/release/val_ntu_sk_120.npy
 5 |   label_path: ./data/release/val_label_120.pkl
 6 |   debug: False
 7 | 
 8 | # model
 9 | model: model.agcn.Model
10 | model_args:
11 |   num_class: 120
12 |   num_point: 25
13 |   num_person: 1
14 |   graph: graph.ntu_rgb_d.Graph
15 |   graph_args:
16 |     labeling_mode: 'spatial'
17 | 
18 | # test
19 | phase: test
20 | device: [0]
21 | test_batch_size: 128
22 | weights: ./ckpts/ntu_sk_120_agcn_joint_const_lr_1e-3-15-12240.pt  
23 | 
24 | work_dir: ./work_dir/babel_v1.0/test_runs/test_ntu_sk_120_agcn_joint_const_lr_1e-3
25 | save_score: True
26 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | backcall==0.2.0
 2 | certifi==2020.12.5
 3 | decorator==4.4.2
 4 | ipdb==0.13.4
 5 | ipython==7.19.0
 6 | ipython-genutils==0.2.0
 7 | jedi==0.18.0
 8 | joblib==1.0.0
 9 | networkx==2.5
10 | numpy==1.19.5
11 | parso==0.8.1
12 | pexpect==4.8.0
13 | pickleshare==0.7.5
14 | Pillow==8.1.0
15 | prompt-toolkit==3.0.10
16 | protobuf==3.14.0
17 | ptyprocess==0.7.0
18 | Pygments==2.7.4
19 | PyYAML==5.4
20 | scikit-learn==0.24.1
21 | scipy==1.6.0
22 | six==1.15.0
23 | tensorboardX==2.1
24 | threadpoolctl==2.1.0
25 | torch==1.7.1
26 | torchvision==0.8.2
27 | tqdm==4.56.0
28 | traitlets==5.0.5
29 | typing-extensions==3.7.4.3
30 | wcwidth==0.2.5
31 | pandas==1.3.4
32 | smplx==0.1.13
33 | matplotlib==3.1.3
34 | opencv-python==4.4.0.42
35 | 


--------------------------------------------------------------------------------
/action_recognition/graph/tools.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def edge2mat(link, num_node):
 5 |     A = np.zeros((num_node, num_node))
 6 |     for i, j in link:
 7 |         A[j, i] = 1
 8 |     return A
 9 | 
10 | 
11 | def normalize_digraph(A):  # 除以每列的和
12 |     Dl = np.sum(A, 0)
13 |     h, w = A.shape
14 |     Dn = np.zeros((w, w))
15 |     for i in range(w):
16 |         if Dl[i] > 0:
17 |             Dn[i, i] = Dl[i] ** (-1)
18 |     AD = np.dot(A, Dn)
19 |     return AD
20 | 
21 | 
22 | def get_spatial_graph(num_node, self_link, inward, outward):
23 |     I = edge2mat(self_link, num_node)
24 |     In = normalize_digraph(edge2mat(inward, num_node))
25 |     Out = normalize_digraph(edge2mat(outward, num_node))
26 |     A = np.stack((I, In, Out))
27 |     return A
28 | 


--------------------------------------------------------------------------------
/action_recognition/config/babel_v1.0/test_60_wfl.yaml:
--------------------------------------------------------------------------------
 1 | # feeder
 2 | feeder: feeders.feeder.Feeder
 3 | test_feeder_args:
 4 |   data_path: ./data/release/val_ntu_sk_60.npy
 5 |   label_path: ./data/release/val_label_60.pkl
 6 |   debug: False
 7 | 
 8 | label_count_path: ./data/release/train_label_60_count.pkl
 9 | 
10 | # model
11 | model: model.agcn.Model
12 | model_args:
13 |   num_class: 60
14 |   num_point: 25
15 |   num_person: 1
16 |   graph: graph.ntu_rgb_d.Graph
17 |   graph_args:
18 |     labeling_mode: 'spatial'
19 | 
20 | # test
21 | phase: test
22 | device: [0]
23 | test_batch_size: 32
24 | weights: ./ckpts/wfl_ntu_sk_60_agcn_joint_const_lr_1e-3-93-33370.pt
25 | 
26 | work_dir: ./work_dir/babel_v1.0/test_runs/test_wfl_ntu_sk_60_agcn_joint_const_lr_1e-3
27 | save_score: True
28 | 


--------------------------------------------------------------------------------
/action_recognition/config/babel_v1.0/test_120_wfl.yaml:
--------------------------------------------------------------------------------
 1 | # feeder
 2 | feeder: feeders.feeder.Feeder
 3 | test_feeder_args:
 4 |   data_path: ./data/release/val_ntu_sk_120.npy
 5 |   label_path: ./data/release/val_label_120.pkl
 6 |   debug: False
 7 | 
 8 | label_count_path: ./data/release/train_label_120_count.pkl
 9 | 
10 | # model
11 | model: model.agcn.Model
12 | model_args:
13 |   num_class: 120
14 |   num_point: 25
15 |   num_person: 1
16 |   graph: graph.ntu_rgb_d.Graph
17 |   graph_args:
18 |     labeling_mode: 'spatial'
19 | 
20 | # test
21 | phase: test
22 | device: [0]
23 | test_batch_size: 128
24 | weights: ./ckpts/wfl_ntu_sk_120_agcn_joint_const_lr_1e-3-157-60356.pt
25 | 
26 | work_dir: ./work_dir/babel_v1.0/test_runs/test_wfl_ntu_sk_120_agcn_joint_const_lr_1e-3
27 | save_score: True
28 | 


--------------------------------------------------------------------------------
/action_recognition/config/babel_v1.0/train_60.yaml:
--------------------------------------------------------------------------------
 1 | work_dir: ./work_dir/babel_v1.0/ntu_sk_60_agcn_joint_const_lr_1e-3
 2 | model_saved_name: ./runs/babel_v1.0/ntu_sk_60_agcn_joint_const_lr_1e-3
 3 | 
 4 | # feeder
 5 | feeder: feeders.feeder.Feeder
 6 | train_feeder_args:
 7 |   data_path: ./data/release/train_ntu_sk_60.npy
 8 |   label_path: ./data/release/train_label_60.pkl
 9 |   debug: False
10 |   random_choose: False
11 |   random_shift: False
12 |   random_move: False
13 |   window_size: -1
14 |   normalization: False
15 | 
16 | test_feeder_args:
17 |   data_path: ./data/release/val_ntu_sk_60.npy
18 |   label_path: ./data/release/val_label_60.pkl
19 | 
20 | # model
21 | model: model.agcn.Model
22 | model_args:
23 |   num_class: 60
24 |   num_person: 1 
25 |   num_point: 25
26 |   graph: graph.ntu_rgb_d.Graph
27 |   graph_args:
28 |     labeling_mode: 'spatial'
29 | 
30 | #optim
31 | weight_decay: 0.0001
32 | base_lr: 0.001
33 | step: []
34 | 
35 | # training
36 | device: [0]
37 | optimizer: 'Adam'
38 | loss: 'CE'
39 | batch_size: 64
40 | test_batch_size: 64
41 | num_epoch: 250
42 | nesterov: True
43 | 
44 | # weights: /ps/project/conditional_action_gen/2s_agcn/runs/babel_v1.0/ntu_sk_60_agcn_joint_const_lr_1e-3-49-23450.pt
45 | 


--------------------------------------------------------------------------------
/action_recognition/config/babel_v1.0/train_120.yaml:
--------------------------------------------------------------------------------
 1 | work_dir: ./work_dir/babel_v1.0/ntu_sk_120_agcn_joint_const_lr_1e-3
 2 | model_saved_name: ./runs/babel_v1.0/ntu_sk_120_agcn_joint_const_lr_1e-3
 3 | 
 4 | # feeder
 5 | feeder: feeders.feeder.Feeder
 6 | train_feeder_args:
 7 |   data_path: ./data/release/train_ntu_sk_120.npy
 8 |   label_path: ./data/release/train_label_120.pkl
 9 |   debug: False
10 |   random_choose: False
11 |   random_shift: False
12 |   random_move: False
13 |   window_size: -1
14 |   normalization: False
15 | 
16 | test_feeder_args:
17 |   data_path: ./data/release/val_ntu_sk_120.npy
18 |   label_path: ./data/release/val_label_120.pkl
19 | 
20 | # model
21 | model: model.agcn.Model
22 | model_args:
23 |   num_class: 120
24 |   num_person: 1 
25 |   num_point: 25
26 |   graph: graph.ntu_rgb_d.Graph
27 |   graph_args:
28 |     labeling_mode: 'spatial'
29 | 
30 | #optim
31 | weight_decay: 0.0001
32 | base_lr: 0.001
33 | step: []
34 | 
35 | # training
36 | device: [0]
37 | optimizer: 'Adam'
38 | loss: 'CE'
39 | batch_size: 64
40 | test_batch_size: 64
41 | num_epoch: 250
42 | nesterov: True
43 | 
44 | # weights: /ps/project/conditional_action_gen/2s_agcn/runs/babel_v1.0/ntu_sk_120_agcn_joint_const_lr_1e-3-49-23450.pt
45 | 


--------------------------------------------------------------------------------
/action_recognition/config/babel_v1.0/train_60_wfl.yaml:
--------------------------------------------------------------------------------
 1 | work_dir: ./work_dir/babel_v1.0/wfl_ntu_sk_60_agcn_joint_const_lr_1e-3
 2 | model_saved_name: ./runs/babel_v1.0/wfl_ntu_sk_60_agcn_joint_const_lr_1e-3
 3 | 
 4 | # feeder
 5 | feeder: feeders.feeder.Feeder
 6 | train_feeder_args:
 7 |   data_path: ./data/release/train_ntu_sk_60.npy
 8 |   label_path: ./data/release/train_label_60.pkl
 9 |   debug: False
10 |   random_choose: False
11 |   random_shift: False
12 |   random_move: False
13 |   window_size: -1
14 |   normalization: False
15 | 
16 | test_feeder_args:
17 |   data_path: ./data/release/val_ntu_sk_60.npy
18 |   label_path: ./data/release/val_label_60.pkl
19 | 
20 | # model
21 | model: model.agcn.Model
22 | model_args:
23 |   num_class: 60
24 |   num_person: 1 
25 |   num_point: 25
26 |   graph: graph.ntu_rgb_d.Graph
27 |   graph_args:
28 |     labeling_mode: 'spatial'
29 | 
30 | #optim
31 | weight_decay: 0.0001
32 | base_lr: 0.001
33 | step: []
34 | 
35 | # training
36 | device: [0]
37 | optimizer: 'Adam'
38 | loss: 'focal'
39 | beta: 0.9999
40 | gamma: 1.0
41 | label_count_path: ./data/release/train_label_60_count.pkl
42 | batch_size: 64
43 | test_batch_size: 64
44 | num_epoch: 200
45 | nesterov: True
46 | 
47 | # weights: /ps/project/conditional_action_gen/2s_agcn/runs/babel_v1.0/wfl_ntu_sk_60_agcn_joint_const_lr_1e-3-19-8760.pt
48 | 


--------------------------------------------------------------------------------
/action_recognition/config/babel_v1.0/train_120_wfl.yaml:
--------------------------------------------------------------------------------
 1 | work_dir: ./work_dir/babel_v1.0/wfl_ntu_sk_120_agcn_joint_const_lr_1e-3
 2 | model_saved_name: ./runs/babel_v1.0/wfl_ntu_sk_120_agcn_joint_const_lr_1e-3
 3 | 
 4 | # feeder
 5 | feeder: feeders.feeder.Feeder
 6 | train_feeder_args:
 7 |   data_path: ./data/release/train_ntu_sk_120.npy
 8 |   label_path: ./data/release/train_label_120.pkl
 9 |   debug: False
10 |   random_choose: False
11 |   random_shift: False
12 |   random_move: False
13 |   window_size: -1
14 |   normalization: False
15 | 
16 | test_feeder_args:
17 |   data_path: ./data/release/val_ntu_sk_120.npy
18 |   label_path: ./data/release/val_label_120.pkl
19 | 
20 | # model
21 | model: model.agcn.Model
22 | model_args:
23 |   num_class: 120
24 |   num_person: 1 
25 |   num_point: 25
26 |   graph: graph.ntu_rgb_d.Graph
27 |   graph_args:
28 |     labeling_mode: 'spatial'
29 | 
30 | #optim
31 | weight_decay: 0.0001
32 | base_lr: 0.001
33 | step: []
34 | 
35 | # training
36 | device: [0]
37 | optimizer: 'Adam'
38 | loss: 'focal'
39 | beta: 0.9999
40 | gamma: 1.0
41 | label_count_path: ./data/release/train_label_120_count.pkl
42 | batch_size: 64
43 | test_batch_size: 64
44 | num_epoch: 200
45 | nesterov: True
46 | 
47 | # weights: /ps/project/conditional_action_gen/2s_agcn/runs/babel_v1.0/wfl_ntu_sk_120_agcn_joint_const_lr_1e-3-19-8760.pt
48 | 


--------------------------------------------------------------------------------
/action_recognition/graph/ntu_rgb_d.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.extend(['../'])
 4 | from graph import tools
 5 | 
 6 | num_node = 25
 7 | self_link = [(i, i) for i in range(num_node)]
 8 | inward_ori_index = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5), (7, 6),
 9 |                     (8, 7), (9, 21), (10, 9), (11, 10), (12, 11), (13, 1),
10 |                     (14, 13), (15, 14), (16, 15), (17, 1), (18, 17), (19, 18),
11 |                     (20, 19), (22, 23), (23, 8), (24, 25), (25, 12)]
12 | inward = [(i - 1, j - 1) for (i, j) in inward_ori_index]
13 | outward = [(j, i) for (i, j) in inward]
14 | neighbor = inward + outward
15 | 
16 | 
17 | class Graph:
18 |     def __init__(self, labeling_mode='spatial'):
19 |         self.A = self.get_adjacency_matrix(labeling_mode)
20 |         self.num_node = num_node
21 |         self.self_link = self_link
22 |         self.inward = inward
23 |         self.outward = outward
24 |         self.neighbor = neighbor
25 | 
26 |     def get_adjacency_matrix(self, labeling_mode=None):
27 |         if labeling_mode is None:
28 |             return self.A
29 |         if labeling_mode == 'spatial':
30 |             A = tools.get_spatial_graph(num_node, self_link, inward, outward)
31 |         else:
32 |             raise ValueError()
33 |         return A
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     import matplotlib.pyplot as plt
38 |     import os
39 | 
40 |     # os.environ['DISPLAY'] = 'localhost:11.0'
41 |     A = Graph('spatial').get_adjacency_matrix()
42 |     for i in A:
43 |         plt.imshow(i, cmap='gray')
44 |         plt.show()
45 |     print(A)
46 | 


--------------------------------------------------------------------------------
/action_recognition/graph/kinetics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sys
 3 | 
 4 | sys.path.extend(['../'])
 5 | from graph import tools
 6 | import networkx as nx
 7 | 
 8 | # Joint index:
 9 | # {0,  "Nose"}
10 | # {1,  "Neck"},
11 | # {2,  "RShoulder"},
12 | # {3,  "RElbow"},
13 | # {4,  "RWrist"},
14 | # {5,  "LShoulder"},
15 | # {6,  "LElbow"},
16 | # {7,  "LWrist"},
17 | # {8,  "RHip"},
18 | # {9,  "RKnee"},
19 | # {10, "RAnkle"},
20 | # {11, "LHip"},
21 | # {12, "LKnee"},
22 | # {13, "LAnkle"},
23 | # {14, "REye"},
24 | # {15, "LEye"},
25 | # {16, "REar"},
26 | # {17, "LEar"},
27 | 
28 | # Edge format: (origin, neighbor)
29 | num_node = 18
30 | self_link = [(i, i) for i in range(num_node)]
31 | inward = [(4, 3), (3, 2), (7, 6), (6, 5), (13, 12), (12, 11), (10, 9), (9, 8),
32 |           (11, 5), (8, 2), (5, 1), (2, 1), (0, 1), (15, 0), (14, 0), (17, 15),
33 |           (16, 14)]
34 | outward = [(j, i) for (i, j) in inward]
35 | neighbor = inward + outward
36 | 
37 | 
38 | class Graph:
39 |     def __init__(self, labeling_mode='spatial'):
40 |         self.A = self.get_adjacency_matrix(labeling_mode)
41 |         self.num_node = num_node
42 |         self.self_link = self_link
43 |         self.inward = inward
44 |         self.outward = outward
45 |         self.neighbor = neighbor
46 | 
47 |     def get_adjacency_matrix(self, labeling_mode=None):
48 |         if labeling_mode is None:
49 |             return self.A
50 |         if labeling_mode == 'spatial':
51 |             A = tools.get_spatial_graph(num_node, self_link, inward, outward)
52 |         else:
53 |             raise ValueError()
54 |         return A
55 | 
56 | 
57 | if __name__ == '__main__':
58 |     A = Graph('spatial').get_adjacency_matrix()
59 |     print('')
60 | 


--------------------------------------------------------------------------------
/action_recognition/data_gen/rotation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | 
 4 | 
 5 | def rotation_matrix(axis, theta):
 6 |     """
 7 |     Return the rotation matrix associated with counterclockwise rotation about
 8 |     the given axis by theta radians.
 9 |     """
10 |     if np.abs(axis).sum() < 1e-6 or np.abs(theta) < 1e-6:
11 |         return np.eye(3)
12 |     axis = np.asarray(axis)
13 |     axis = axis / math.sqrt(np.dot(axis, axis))
14 |     a = math.cos(theta / 2.0)
15 |     b, c, d = -axis * math.sin(theta / 2.0)
16 |     aa, bb, cc, dd = a * a, b * b, c * c, d * d
17 |     bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
18 |     return np.array([[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
19 |                      [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
20 |                      [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]])
21 | 
22 | 
23 | def unit_vector(vector):
24 |     """ Returns the unit vector of the vector.  """
25 |     return vector / np.linalg.norm(vector)
26 | 
27 | 
28 | def angle_between(v1, v2):
29 |     """ Returns the angle in radians between vectors 'v1' and 'v2'::
30 | 
31 |             >>> angle_between((1, 0, 0), (0, 1, 0))
32 |             1.5707963267948966
33 |             >>> angle_between((1, 0, 0), (1, 0, 0))
34 |             0.0
35 |             >>> angle_between((1, 0, 0), (-1, 0, 0))
36 |             3.141592653589793
37 |     """
38 |     if np.abs(v1).sum() < 1e-6 or np.abs(v2).sum() < 1e-6:
39 |         return 0
40 |     v1_u = unit_vector(v1)
41 |     v2_u = unit_vector(v2)
42 |     return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))
43 | 
44 | 
45 | def x_rotation(vector, theta):
46 |     """Rotates 3-D vector around x-axis"""
47 |     R = np.array([[1, 0, 0], [0, np.cos(theta), -np.sin(theta)], [0, np.sin(theta), np.cos(theta)]])
48 |     return np.dot(R, vector)
49 | 
50 | 
51 | def y_rotation(vector, theta):
52 |     """Rotates 3-D vector around y-axis"""
53 |     R = np.array([[np.cos(theta), 0, np.sin(theta)], [0, 1, 0], [-np.sin(theta), 0, np.cos(theta)]])
54 |     return np.dot(R, vector)
55 | 
56 | 
57 | def z_rotation(vector, theta):
58 |     """Rotates 3-D vector around z-axis"""
59 |     R = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]])
60 |     return np.dot(R, vector)
61 | 


--------------------------------------------------------------------------------
/action_recognition/data/action_label_2_idx.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "walk": 0,
  3 |   "stand": 1,
  4 |   "hand movements": 2,
  5 |   "turn": 3,
  6 |   "interact with/use object": 4,
  7 |   "arm movements": 5,
  8 |   "t pose": 6,
  9 |   "step": 7,
 10 |   "backwards movement": 8,
 11 |   "raising body part": 9,
 12 |   "look": 10,
 13 |   "touch object": 11,
 14 |   "leg movements": 12,
 15 |   "forward movement": 13,
 16 |   "circular movement": 14,
 17 |   "stretch": 15,
 18 |   "jump": 16,
 19 |   "touching body part": 17,
 20 |   "sit": 18,
 21 |   "place something": 19,
 22 |   "take/pick something up": 20,
 23 |   "run": 21,
 24 |   "bend": 22,
 25 |   "throw": 23,
 26 |   "foot movements": 24,
 27 |   "a pose": 25,
 28 |   "stand up": 26,
 29 |   "lowering body part": 27,
 30 |   "sideways movement": 28,
 31 |   "move up/down incline": 29,
 32 |   "action with ball": 30,
 33 |   "kick": 31,
 34 |   "gesture": 32,
 35 |   "head movements": 33,
 36 |   "jog": 34,
 37 |   "grasp object": 35,
 38 |   "waist movements": 36,
 39 |   "lift something": 37,
 40 |   "knee movement": 38,
 41 |   "wave": 39,
 42 |   "move something": 40,
 43 |   "swing body part": 41,
 44 |   "catch": 42,
 45 |   "dance": 43,
 46 |   "lean": 44,
 47 |   "greet": 45,
 48 |   "poses": 46,
 49 |   "touching face": 47,
 50 |   "sports move": 48,
 51 |   "exercise/training": 49,
 52 |   "clean something": 50,
 53 |   "punch": 51,
 54 |   "squat": 52,
 55 |   "scratch": 53,
 56 |   "hop": 54,
 57 |   "play sport": 55,
 58 |   "stumble": 56,
 59 |   "crossing limbs": 57,
 60 |   "perform": 58,
 61 |   "martial art": 59,
 62 |   "balance": 60,
 63 |   "kneel": 61,
 64 |   "shake": 62,
 65 |   "grab body part": 63,
 66 |   "clap": 64,
 67 |   "crouch": 65,
 68 |   "spin": 66,
 69 |   "upper body movements": 67,
 70 |   "knock": 68,
 71 |   "adjust": 69,
 72 |   "crawl": 70,
 73 |   "twist": 71,
 74 |   "move back to original position": 72,
 75 |   "bow": 73,
 76 |   "hit": 74,
 77 |   "touch ground": 75,
 78 |   "shoulder movements": 76,
 79 |   "telephone call": 77,
 80 |   "grab person": 78,
 81 |   "play instrument": 79,
 82 |   "tap": 80,
 83 |   "spread": 81,
 84 |   "skip": 82,
 85 |   "rolling movement": 83,
 86 |   "jump rope": 84,
 87 |   "play catch": 85,
 88 |   "drink": 86,
 89 |   "evade": 87,
 90 |   "support": 88,
 91 |   "point": 89,
 92 |   "side to side movement": 90,
 93 |   "stop": 91,
 94 |   "protect": 92,
 95 |   "wrist movements": 93,
 96 |   "stances": 94,
 97 |   "wait": 95,
 98 |   "shuffle": 96,
 99 |   "lunge": 97,
100 |   "communicate (vocalise)": 98,
101 |   "jumping jacks": 99,
102 |   "rub": 100,
103 |   "dribble": 101,
104 |   "swim": 102,
105 |   "sneak": 103,
106 |   "to lower a body part": 104,
107 |   "misc. abstract action": 105,
108 |   "mix": 106,
109 |   "limp": 107,
110 |   "sway": 108,
111 |   "slide": 109,
112 |   "cartwheel": 110,
113 |   "press something": 111,
114 |   "shrug": 112,
115 |   "open something": 113,
116 |   "leap": 114,
117 |   "trip": 115,
118 |   "golf": 116,
119 |   "move misc. body part": 117,
120 |   "get injured": 118,
121 |   "sudden movement": 119,
122 |   "duck": 120,
123 |   "flap": 121,
124 |   "salute": 122,
125 |   "stagger": 123,
126 |   "draw": 124,
127 |   "tie": 125,
128 |   "eat": 126,
129 |   "style hair": 127,
130 |   "relax": 128,
131 |   "pray": 129,
132 |   "flip": 130,
133 |   "shivering": 131,
134 |   "interact with rope": 132,
135 |   "march": 133,
136 |   "zombie": 134,
137 |   "check": 135,
138 |   "wiggle": 136,
139 |   "bump": 137,
140 |   "give something": 138,
141 |   "yoga": 139,
142 |   "mime": 140,
143 |   "wobble": 141,
144 |   "release": 142,
145 |   "wash": 143,
146 |   "stroke": 144,
147 |   "rocking movement": 145,
148 |   "swipe": 146,
149 |   "strafe": 147,
150 |   "hang": 148,
151 |   "flail arms": 149
152 | }


--------------------------------------------------------------------------------
/action_recognition/data_gen/preprocess.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.extend(['../'])
 4 | from data_gen.rotation import *
 5 | #from rotation import *
 6 | from tqdm import tqdm
 7 | 
 8 | 
 9 | def pre_normalization(data, zaxis=[0, 1], xaxis=[8, 4]):
10 |     N, C, T, V, M = data.shape
11 |     s = np.transpose(data, [0, 4, 2, 3, 1])  # N, C, T, V, M  to  N, M, T, V, C
12 |     l_m_sk = []  # List idxs of missing skeletons
13 | 
14 |     print('pad the null frames with the previous frames')
15 |     for i_s, skeleton in enumerate(tqdm(s)):  # pad
16 |         if skeleton.sum() == 0:
17 |             print(i_s, ' has no skeleton')
18 |             l_m_sk.append(i_s)
19 |         for i_p, person in enumerate(skeleton):
20 |             if person.sum() == 0:
21 |                 continue
22 |             if person[0].sum() == 0:
23 |                 index = (person.sum(-1).sum(-1) != 0)
24 |                 tmp = person[index].copy()
25 |                 person *= 0
26 |                 person[:len(tmp)] = tmp
27 |             for i_f, frame in enumerate(person):
28 |                 if frame.sum() == 0:
29 |                     if person[i_f:].sum() == 0:
30 |                         rest = len(person) - i_f
31 |                         num = int(np.ceil(rest / i_f))
32 |                         pad = np.concatenate([person[0:i_f] for _ in range(num)], 0)[:rest]
33 |                         s[i_s, i_p, i_f:] = pad
34 |                         break
35 | 
36 |     print('sub the center joint #1 (spine joint in ntu and neck joint in kinetics)')
37 |     for i_s, skeleton in enumerate(tqdm(s)):
38 |         if skeleton.sum() == 0:
39 |             continue
40 |         main_body_center = skeleton[0][:, 1:2, :].copy()
41 |         for i_p, person in enumerate(skeleton):
42 |             if person.sum() == 0:
43 |                 continue
44 |             mask = (person.sum(-1) != 0).reshape(T, V, 1)
45 |             s[i_s, i_p] = (s[i_s, i_p] - main_body_center) * mask
46 | 
47 |     print('parallel the bone between hip(jpt 0) and spine(jpt 1) of the first person to the z axis')
48 |     for i_s, skeleton in enumerate(tqdm(s)):
49 |         if skeleton.sum() == 0:
50 |             continue
51 |         joint_bottom = skeleton[0, 0, zaxis[0]]
52 |         joint_top = skeleton[0, 0, zaxis[1]]
53 |         axis = np.cross(joint_top - joint_bottom, [0, 0, 1])
54 |         angle = angle_between(joint_top - joint_bottom, [0, 0, 1])
55 |         matrix_z = rotation_matrix(axis, angle)
56 |         for i_p, person in enumerate(skeleton):
57 |             if person.sum() == 0:
58 |                 continue
59 |             for i_f, frame in enumerate(person):
60 |                 if frame.sum() == 0:
61 |                     continue
62 |                 for i_j, joint in enumerate(frame):
63 |                     s[i_s, i_p, i_f, i_j] = np.dot(matrix_z, joint)
64 | 
65 |     print(
66 |         'parallel the bone between right shoulder(jpt 8) and left shoulder(jpt 4) of the first person to the x axis')
67 |     for i_s, skeleton in enumerate(tqdm(s)):
68 |         if skeleton.sum() == 0:
69 |             continue
70 |         joint_rshoulder = skeleton[0, 0, xaxis[0]]
71 |         joint_lshoulder = skeleton[0, 0, xaxis[1]]
72 |         axis = np.cross(joint_rshoulder - joint_lshoulder, [1, 0, 0])
73 |         angle = angle_between(joint_rshoulder - joint_lshoulder, [1, 0, 0])
74 |         matrix_x = rotation_matrix(axis, angle)
75 |         for i_p, person in enumerate(skeleton):
76 |             if person.sum() == 0:
77 |                 continue
78 |             for i_f, frame in enumerate(person):
79 |                 if frame.sum() == 0:
80 |                     continue
81 |                 for i_j, joint in enumerate(frame):
82 |                     s[i_s, i_p, i_f, i_j] = np.dot(matrix_x, joint)
83 | 
84 |     data = np.transpose(s, [0, 4, 2, 3, 1])
85 |     return data, l_m_sk
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     data = np.load('../data/ntu/xview/val_data.npy')
90 |     pre_normalization(data)
91 |     np.save('../data/ntu/xview/data_val_pre.npy', data)
92 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # BABEL: Bodies, Action and Behavior with English Labels [[CVPR 2021](http://cvpr2021.thecvf.com/)]
 3 | 
 4 | <p float="center">
 5 |   <img src="https://babel.is.tue.mpg.de/media/upload/babel_teaser.png" width="90%" />
 6 | </p>
 7 |         
 8 | > [Abhinanda R. Punnakkal\*](https://ps.is.tuebingen.mpg.de/person/apunnakkal), [Arjun Chandrasekaran\*](https://ps.is.tuebingen.mpg.de/person/achandrasekaran), [Nikos Athanasiou](https://ps.is.tuebingen.mpg.de/person/nathanasiou), [Alejandra Quiros-Ramirez](https://ps.is.tuebingen.mpg.de/person/aquiros), [Michael J. Black](https://ps.is.tuebingen.mpg.de/person/black). 
 9 | > \* denotes equal contribution
10 | 
11 | [Project Website](https://babel.is.tue.mpg.de) | [Paper](https://arxiv.org/pdf/2106.09696.pdf) | [Video](https://www.youtube.com/watch?v=BYWxvjKpCqA) | [Poster](https://babel.is.tue.mpg.de/media/upload/CVPR_2021_BABEL_poster.pdf) 
12 | 
13 | ---
14 | 
15 | BABEL is a large dataset with language labels describing the actions being performed in mocap sequences. BABEL labels about 43 hours of mocap sequences from [AMASS](https://amass.is.tue.mpg.de/) [1] with action labels. 
16 | Sequences have action labels at two possible levels of abstraction: 
17 | - **Sequence labels** which describe the overall action in the sequence
18 | - **Frame labels** which describe all actions in every frame of the sequence. Each frame label is precisely aligned with the duration of the corresponding action in the mocap sequence, and multiple actions can overlap.
19 | 
20 | To download the BABEL action labels, visit our ['Data' page](https://babel.is.tue.mpg.de/data.html). You can download the mocap sequences from [AMASS](https://amass.is.tue.mpg.de/). 
21 | 
22 | 
23 | ### Tutorials 
24 | 
25 | We release some helper code in Jupyter notebooks to load the BABEL dataset, visualize mocap sequences and their action labels, search BABEL for sequences containing specific actions, etc. 
26 | 
27 | See [`notebooks/`](notebooks/) for more details. 
28 | 
29 | 
30 | ### Action Recognition 
31 | 
32 | We provide features, training and inference code, and pre-trained checkpoints for 3D skeleton-based action recognition. 
33 | 
34 | Please see [`action_recognition/`](action_recognition/) for more details. 
35 | 
36 | 
37 | ### Acknowledgements
38 | 
39 | We thank the [Software Workshop](https://is.mpg.de/en/software-workshop) at MPI for building the action recognition test set evaluation web server. 
40 | The notebooks in this repo are inspired by the those provided by [AMASS](https://github.com/nghorbani/amass). 
41 | The Action Recognition code is based on the [2s-AGCN](https://github.com/lshiwjx/2s-AGCN) [2] implementation. 
42 | 
43 | 
44 | ### References 
45 | 
46 | [1] Mahmood, Naureen, et al. "AMASS: Archive of motion capture as surface shapes." Proceedings of the IEEE/CVF International Conference on Computer Vision. 2019. <br>
47 | [2] Shi, Lei, et al. "Two-stream adaptive graph convolutional networks for skeleton-based action recognition." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2019. 
48 | 
49 | ### License
50 | 
51 | Software Copyright License for non-commercial scientific research purposes. Please read carefully the terms and conditions and any accompanying documentation before you download and/or use the BABEL dataset, and software, (the "Model & Software"). By downloading and/or using the Model & Software (including downloading, cloning, installing, and any other use of this GitHub repository), you acknowledge that you have read these terms and conditions, understand them, and agree to be bound by them. If you do not agree with these terms and conditions, you must not download and/or use the Model & Software. Any infringement of the terms of this agreement will automatically terminate your rights under this License.
52 | 
53 | ### Contact
54 | 
55 | The code in this repository is developed by [Abhinanda Punnakkal](https://www.is.mpg.de/person/apunnakkal) and [Arjun Chandrasekaran](https://www.is.mpg.de/person/achandrasekaran), and tested by [Nikos Athanasiou](https://www.is.mpg.de/person/nathanasiou).
56 | 
57 | If you have any questions you can contact us at babel@tue.mpg.de.
58 | 
59 | 


--------------------------------------------------------------------------------
/action_recognition/class_balanced_loss.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # vim:fenc=utf-8
  4 | #
  5 | 
  6 | """
  7 | Code from:
  8 | https://raw.githubusercontent.com/vandit15/Class-balanced-loss-pytorch/master/class_balanced_loss.py
  9 | 
 10 | Pytorch implementation of Class-Balanced-Loss
 11 |    Reference: "Class-Balanced Loss Based on Effective Number of Samples"
 12 |    Authors: Yin Cui and
 13 |                Menglin Jia and
 14 |                Tsung Yi Lin and
 15 |                Yang Song and
 16 |                Serge J. Belongie
 17 |    https://arxiv.org/abs/1901.05555, CVPR'19.
 18 | """
 19 | 
 20 | 
 21 | import numpy as np
 22 | import torch
 23 | import torch.nn.functional as F
 24 | 
 25 | 
 26 | 
 27 | def focal_loss(labels, logits, alpha, gamma):
 28 |     """Compute the focal loss between `logits` and the ground truth `labels`.
 29 | 
 30 |     Focal loss = -alpha_t * (1-pt)^gamma * log(pt)
 31 |     where pt is the probability of being classified to the true class.
 32 |     pt = p (if true class), otherwise pt = 1 - p. p = sigmoid(logit).
 33 | 
 34 |     Args:
 35 |       labels: A float tensor of size [batch, num_classes].
 36 |       logits: A float tensor of size [batch, num_classes].
 37 |       alpha: A float tensor of size [batch_size]
 38 |         specifying per-example weight for balanced cross entropy.
 39 |       gamma: A float scalar modulating loss from hard and easy examples.
 40 | 
 41 |     Returns:
 42 |       focal_loss: A float32 scalar representing normalized total loss.
 43 |     """
 44 |     BCLoss = F.binary_cross_entropy_with_logits(input = logits, target = labels,reduction = "none")
 45 | 
 46 |     if gamma == 0.0:
 47 |         modulator = 1.0
 48 |     else:
 49 |         modulator = torch.exp(-gamma * labels * logits - gamma * torch.log(1 +
 50 |             torch.exp(-1.0 * logits)))
 51 | 
 52 |     loss = modulator * BCLoss
 53 | 
 54 |     weighted_loss = alpha * loss
 55 |     focal_loss = torch.sum(weighted_loss)
 56 | 
 57 |     focal_loss /= torch.sum(labels)
 58 |     return focal_loss
 59 | 
 60 | 
 61 | def CB_loss(labels, logits, samples_per_cls, no_of_classes, loss_type, beta, gamma, device):
 62 |     """Compute the Class Balanced Loss between `logits` and the ground truth `labels`.
 63 | 
 64 |     Class Balanced Loss: ((1-beta)/(1-beta^n))*Loss(labels, logits)
 65 |     where Loss is one of the standard losses used for Neural Networks.
 66 | 
 67 |     Args:
 68 |       labels: A int tensor of size [batch].
 69 |       logits: A float tensor of size [batch, no_of_classes].
 70 |       samples_per_cls: A python list of size [no_of_classes].
 71 |       no_of_classes: total number of classes. int
 72 |       loss_type: string. One of "sigmoid", "focal", "softmax".
 73 |       beta: float. Hyperparameter for Class balanced loss.
 74 |       gamma: float. Hyperparameter for Focal loss.
 75 | 
 76 |     Returns:
 77 |       cb_loss: A float tensor representing class balanced loss
 78 |     """
 79 |     effective_num = 1.0 - np.power(beta, samples_per_cls)
 80 |     weights = (1.0 - beta) / np.array(effective_num)
 81 |     weights = weights / np.sum(weights) * no_of_classes
 82 | 
 83 |     labels_one_hot = F.one_hot(labels, no_of_classes).float().cuda(device)
 84 | 
 85 |     weights = torch.tensor(weights).float().cuda(device)
 86 |     weights = weights.unsqueeze(0)
 87 |     weights = weights.repeat(labels_one_hot.shape[0],1) * labels_one_hot
 88 |     weights = weights.sum(1)
 89 |     weights = weights.unsqueeze(1)
 90 |     weights = weights.repeat(1,no_of_classes)
 91 | 
 92 |     if loss_type == "focal":
 93 |         cb_loss = focal_loss(labels_one_hot, logits, weights, gamma)
 94 |     elif loss_type == "sigmoid":
 95 |         cb_loss = F.binary_cross_entropy_with_logits(input = logits,target = labels_one_hot, weight = weights)
 96 |     elif loss_type == "softmax":
 97 |         pred = logits.softmax(dim = 1)
 98 |         cb_loss = F.binary_cross_entropy(input = pred, target = labels_one_hot, weight = weights)
 99 |     return cb_loss
100 | 
101 | 
102 | def test():
103 |     no_of_classes = 5
104 |     logits = torch.rand(10,no_of_classes).float()
105 |     labels = torch.randint(0,no_of_classes, size = (10,))
106 |     beta = 0.9999
107 |     gamma = 2.0
108 |     samples_per_cls = [2,3,1,2,2]
109 |     loss_type = "focal"
110 |     cb_loss = CB_loss(labels, logits, samples_per_cls, no_of_classes,loss_type, beta, gamma)
111 |     print(cb_loss)
112 | 


--------------------------------------------------------------------------------
/action_recognition/challenge/create_submission.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # vim:fenc=utf-8
  4 | #
  5 | # Copyright © 2021 achandrasekaran <arjun.chandrasekaran@tuebingen.mpg.de>
  6 | #
  7 | # Distributed under terms of the MIT license.
  8 | 
  9 | import sys, os, pdb, glob
 10 | import uuid
 11 | from os.path import join as ospj
 12 | from os.path import dirname as ospd
 13 | import json, pickle
 14 | import argparse
 15 | from tqdm import tqdm
 16 | from collections import *
 17 | 
 18 | import numpy as np
 19 | import pandas as pd
 20 | from pandas.core.common import flatten
 21 | from fnmatch import fnmatch
 22 | import re
 23 | 
 24 | 
 25 | def load_test_scores(test_scores_fp):
 26 | 	'''
 27 | 	Load the score prediction file, validate.
 28 | 
 29 | 	Format of data structure stored in prediction file:
 30 | 			score_dict = list(zip(
 31 | 					self.data_loader[ln].dataset.label[1],	# sid
 32 | 					self.data_loader[ln].dataset.sample_name,  # seg_id
 33 | 					self.data_loader[ln].dataset.label[2],	# chunk_id
 34 | 											score))
 35 | 	'''
 36 | 	# load test set predictions from model
 37 | 	test_scores = pickle.load(open(test_scores_fp, 'rb'))
 38 | 
 39 | 	# GT labels (-1 for test set), seg_id, chunk_id, score
 40 | 	_, seg_ids, chunk_ids, scores = zip(*test_scores)
 41 | 
 42 | 	# Validate the shape of predictions
 43 | 	scores = np.array(scores)
 44 | 	n_samples, n_classes = scores.shape
 45 | 	assert n_classes in (60, 120)
 46 | 
 47 | 	return list(zip(seg_ids, chunk_ids, scores)), n_classes
 48 | 
 49 | 
 50 | def load_test_samples(n_classes):
 51 | 	'''Load the GT samples corresponding to the BABEL subset (# classes) used.
 52 | 
 53 | 	GT labels data structure format:
 54 | 			List of seg_id, (label, sid, chunk_n, anntr_id)
 55 | 
 56 | 	Arguments:
 57 | 			scores: np.array (n_samples, n_classes) contains predicted scores for samples.
 58 | 	'''
 59 | 	# load test set samples
 60 | 	samples_filename = f'test_label_{n_classes}.pkl'
 61 | 	test_samples = pickle.load(open(f'../data/release/{samples_filename}', 'rb'))
 62 | 
 63 | 	# GT labels (-1 for test set), sid, chunk_id, anntr_id
 64 | 	seg_ids, (_, _, chunk_ids, _) = test_samples
 65 | 
 66 | 	return list(zip(seg_ids, chunk_ids))
 67 | 
 68 | 
 69 | def create_submission(test_samples, test_pred_scores, n_classes):
 70 | 	'''Create a submission with the same ordering of samples
 71 | 	as provided in the `test_label_{60, 120}.pkl` file.
 72 | 	'''
 73 | 	submission = []
 74 | 	perfect_map = True
 75 | 
 76 | 	# Ideal scenario -- 1:1 map between samples in two files
 77 | 	for i, ((seg_id, chunk_id), (pred_seg_id, pred_chunk_id, _)) in \
 78 | 							enumerate(zip(test_samples, test_pred_scores)):
 79 | 		if seg_id != pred_seg_id or chunk_id != pred_chunk_id:
 80 | 			perfect_map = False
 81 | 
 82 | 	if True == perfect_map:
 83 | 		submission = np.array(list(zip(*test_pred_scores))[2])
 84 | 	else:
 85 | 		# For each sample, find its predicted score
 86 | 		for i, (seg_id, chunk_id) in enumerate(test_samples):
 87 | 			for pred_seg_id, pred_chunk_id, score in test_pred_scores:
 88 | 				if pred_seg_id == seg_id and pred_chunk_id == chunk_id:
 89 | 					submission.append(score)
 90 | 					break
 91 | 		submission = np.array(submission)
 92 | 		if 60 == n_classes:
 93 | 			assert 15647 == submission.shape[0]
 94 | 		elif 120 == n_classes:
 95 | 			assert 16839 == submission.shape[0]
 96 | 
 97 | 	return submission
 98 | 
 99 | 
100 | def save_submission(submission, filepath):
101 | 	'''Save predicted scores for test samples in .npz format for
102 | 	submission to BABEL Action Recognition Challenge.
103 | 	'''
104 | 	np.savez(filepath, submission)
105 | 	print(f'Successfully saved submission in: {filepath}')
106 | 
107 | 	return None
108 | 
109 | 
110 | if __name__ == '__main__':
111 | 	# Add args
112 | 	parser = argparse.ArgumentParser(
113 | 							description='Predicted test scores --> Submission to server')
114 | 	parser.add_argument(
115 | 			'--pred_path',
116 | 			default='./epoch1_test_score.pkl',
117 | 			help='Path to file containing model predictions (saved to disk by train_test.py.')
118 | 	parser.add_argument(
119 | 			'--sub_path',
120 | 			default='./test_sub.npz',
121 | 			help='Path to write submission file.')
122 | 
123 | 	# Parse args
124 | 	args = parser.parse_args()
125 | 
126 | 	# Process scores into submission file
127 | 	test_pred_scores, n_classes = load_test_scores(args.pred_path)
128 | 	test_samples = load_test_samples(n_classes)
129 | 	submission = create_submission(test_samples, test_pred_scores, n_classes)
130 | 	save_submission(submission, args.sub_path)
131 | 


--------------------------------------------------------------------------------
/action_recognition/feeders/tools.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | def downsample(data_numpy, step, random_sample=True):
  5 |     # input: C,T,V,M
  6 |     begin = np.random.randint(step) if random_sample else 0
  7 |     return data_numpy[:, begin::step, :, :]
  8 | 
  9 | 
 10 | def temporal_slice(data_numpy, step):
 11 |     # input: C,T,V,M
 12 |     C, T, V, M = data_numpy.shape
 13 |     return data_numpy.reshape(C, T / step, step, V, M).transpose(
 14 |         (0, 1, 3, 2, 4)).reshape(C, T / step, V, step * M)
 15 | 
 16 | 
 17 | def mean_subtractor(data_numpy, mean):
 18 |     # input: C,T,V,M
 19 |     # naive version
 20 |     if mean == 0:
 21 |         return
 22 |     C, T, V, M = data_numpy.shape
 23 |     valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0
 24 |     begin = valid_frame.argmax()
 25 |     end = len(valid_frame) - valid_frame[::-1].argmax()
 26 |     data_numpy[:, :end, :, :] = data_numpy[:, :end, :, :] - mean
 27 |     return data_numpy
 28 | 
 29 | 
 30 | def auto_pading(data_numpy, size, random_pad=False):
 31 |     C, T, V, M = data_numpy.shape
 32 |     if T < size:
 33 |         begin = random.randint(0, size - T) if random_pad else 0
 34 |         data_numpy_paded = np.zeros((C, size, V, M))
 35 |         data_numpy_paded[:, begin:begin + T, :, :] = data_numpy
 36 |         return data_numpy_paded
 37 |     else:
 38 |         return data_numpy
 39 | 
 40 | 
 41 | def random_choose(data_numpy, size, auto_pad=True):
 42 |     # input: C,T,V,M 随机选择其中一段，不是很合理。因为有0
 43 |     C, T, V, M = data_numpy.shape
 44 |     if T == size:
 45 |         return data_numpy
 46 |     elif T < size:
 47 |         if auto_pad:
 48 |             return auto_pading(data_numpy, size, random_pad=True)
 49 |         else:
 50 |             return data_numpy
 51 |     else:
 52 |         begin = random.randint(0, T - size)
 53 |         return data_numpy[:, begin:begin + size, :, :]
 54 | 
 55 | 
 56 | def random_move(data_numpy,
 57 |                 angle_candidate=[-10., -5., 0., 5., 10.],
 58 |                 scale_candidate=[0.9, 1.0, 1.1],
 59 |                 transform_candidate=[-0.2, -0.1, 0.0, 0.1, 0.2],
 60 |                 move_time_candidate=[1]):
 61 |     # input: C,T,V,M
 62 |     C, T, V, M = data_numpy.shape
 63 |     move_time = random.choice(move_time_candidate)
 64 |     node = np.arange(0, T, T * 1.0 / move_time).round().astype(int)
 65 |     node = np.append(node, T)
 66 |     num_node = len(node)
 67 | 
 68 |     A = np.random.choice(angle_candidate, num_node)
 69 |     S = np.random.choice(scale_candidate, num_node)
 70 |     T_x = np.random.choice(transform_candidate, num_node)
 71 |     T_y = np.random.choice(transform_candidate, num_node)
 72 | 
 73 |     a = np.zeros(T)
 74 |     s = np.zeros(T)
 75 |     t_x = np.zeros(T)
 76 |     t_y = np.zeros(T)
 77 | 
 78 |     # linspace
 79 |     for i in range(num_node - 1):
 80 |         a[node[i]:node[i + 1]] = np.linspace(
 81 |             A[i], A[i + 1], node[i + 1] - node[i]) * np.pi / 180
 82 |         s[node[i]:node[i + 1]] = np.linspace(S[i], S[i + 1],
 83 |                                              node[i + 1] - node[i])
 84 |         t_x[node[i]:node[i + 1]] = np.linspace(T_x[i], T_x[i + 1],
 85 |                                                node[i + 1] - node[i])
 86 |         t_y[node[i]:node[i + 1]] = np.linspace(T_y[i], T_y[i + 1],
 87 |                                                node[i + 1] - node[i])
 88 | 
 89 |     theta = np.array([[np.cos(a) * s, -np.sin(a) * s],
 90 |                       [np.sin(a) * s, np.cos(a) * s]])  # xuanzhuan juzhen
 91 | 
 92 |     # perform transformation
 93 |     for i_frame in range(T):
 94 |         xy = data_numpy[0:2, i_frame, :, :]
 95 |         new_xy = np.dot(theta[:, :, i_frame], xy.reshape(2, -1))
 96 |         new_xy[0] += t_x[i_frame]
 97 |         new_xy[1] += t_y[i_frame]  # pingyi bianhuan
 98 |         data_numpy[0:2, i_frame, :, :] = new_xy.reshape(2, V, M)
 99 | 
100 |     return data_numpy
101 | 
102 | 
103 | def random_shift(data_numpy):
104 |     # input: C,T,V,M 偏移其中一段
105 |     C, T, V, M = data_numpy.shape
106 |     data_shift = np.zeros(data_numpy.shape)
107 |     valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0
108 |     begin = valid_frame.argmax()
109 |     end = len(valid_frame) - valid_frame[::-1].argmax()
110 | 
111 |     size = end - begin
112 |     bias = random.randint(0, T - size)
113 |     data_shift[:, bias:bias + size, :, :] = data_numpy[:, begin:end, :, :]
114 | 
115 |     return data_shift
116 | 
117 | 
118 | def openpose_match(data_numpy):
119 |     C, T, V, M = data_numpy.shape
120 |     assert (C == 3)
121 |     score = data_numpy[2, :, :, :].sum(axis=1)
122 |     # the rank of body confidence in each frame (shape: T-1, M)
123 |     rank = (-score[0:T - 1]).argsort(axis=1).reshape(T - 1, M)
124 | 
125 |     # data of frame 1
126 |     xy1 = data_numpy[0:2, 0:T - 1, :, :].reshape(2, T - 1, V, M, 1)
127 |     # data of frame 2
128 |     xy2 = data_numpy[0:2, 1:T, :, :].reshape(2, T - 1, V, 1, M)
129 |     # square of distance between frame 1&2 (shape: T-1, M, M)
130 |     distance = ((xy2 - xy1) ** 2).sum(axis=2).sum(axis=0)
131 | 
132 |     # match pose
133 |     forward_map = np.zeros((T, M), dtype=int) - 1
134 |     forward_map[0] = range(M)
135 |     for m in range(M):
136 |         choose = (rank == m)
137 |         forward = distance[choose].argmin(axis=1)
138 |         for t in range(T - 1):
139 |             distance[t, :, forward[t]] = np.inf
140 |         forward_map[1:][choose] = forward
141 |     assert (np.all(forward_map >= 0))
142 | 
143 |     # string data
144 |     for t in range(T - 1):
145 |         forward_map[t + 1] = forward_map[t + 1][forward_map[t]]
146 | 
147 |     # generate data
148 |     new_data_numpy = np.zeros(data_numpy.shape)
149 |     for t in range(T):
150 |         new_data_numpy[:, t, :, :] = data_numpy[:, t, :, forward_map[
151 |                                                              t]].transpose(1, 2, 0)
152 |     data_numpy = new_data_numpy
153 | 
154 |     # score sort
155 |     trace_score = data_numpy[2, :, :, :].sum(axis=1).sum(axis=0)
156 |     rank = (-trace_score).argsort()
157 |     data_numpy = data_numpy[:, :, :, rank]
158 | 
159 |     return data_numpy
160 | 


--------------------------------------------------------------------------------
/action_recognition/model/agcn.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.autograd import Variable
  7 | 
  8 | def import_class(name):
  9 |     components = name.split('.')
 10 |     mod = __import__(components[0])
 11 |     for comp in components[1:]:
 12 |         mod = getattr(mod, comp)
 13 |     return mod
 14 | 
 15 | 
 16 | def conv_branch_init(conv, branches):
 17 |     weight = conv.weight
 18 |     n = weight.size(0)
 19 |     k1 = weight.size(1)
 20 |     k2 = weight.size(2)
 21 |     nn.init.normal_(weight, 0, math.sqrt(2. / (n * k1 * k2 * branches)))
 22 |     nn.init.constant_(conv.bias, 0)
 23 | 
 24 | 
 25 | def conv_init(conv):
 26 |     nn.init.kaiming_normal_(conv.weight, mode='fan_out')
 27 |     nn.init.constant_(conv.bias, 0)
 28 | 
 29 | 
 30 | def bn_init(bn, scale):
 31 |     nn.init.constant_(bn.weight, scale)
 32 |     nn.init.constant_(bn.bias, 0)
 33 | 
 34 | 
 35 | class unit_tcn(nn.Module):
 36 |     def __init__(self, in_channels, out_channels, kernel_size=9, stride=1):
 37 |         super(unit_tcn, self).__init__()
 38 |         pad = int((kernel_size - 1) / 2)
 39 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=(kernel_size, 1), padding=(pad, 0),
 40 |                               stride=(stride, 1))
 41 | 
 42 |         self.bn = nn.BatchNorm2d(out_channels)
 43 |         self.relu = nn.ReLU()
 44 |         conv_init(self.conv)
 45 |         bn_init(self.bn, 1)
 46 | 
 47 |     def forward(self, x):
 48 |         x = self.bn(self.conv(x))
 49 |         return x
 50 | 
 51 | 
 52 | class unit_gcn(nn.Module):
 53 |     def __init__(self, in_channels, out_channels, A, coff_embedding=4, num_subset=3):
 54 |         super(unit_gcn, self).__init__()
 55 |         inter_channels = out_channels // coff_embedding
 56 |         self.inter_c = inter_channels
 57 |         self.PA = nn.Parameter(torch.from_numpy(A.astype(np.float32)))
 58 |         nn.init.constant_(self.PA, 1e-6)
 59 |         self.A = Variable(torch.from_numpy(A.astype(np.float32)), requires_grad=False)
 60 |         self.num_subset = num_subset
 61 | 
 62 |         self.conv_a = nn.ModuleList()
 63 |         self.conv_b = nn.ModuleList()
 64 |         self.conv_d = nn.ModuleList()
 65 |         for i in range(self.num_subset):
 66 |             self.conv_a.append(nn.Conv2d(in_channels, inter_channels, 1))
 67 |             self.conv_b.append(nn.Conv2d(in_channels, inter_channels, 1))
 68 |             self.conv_d.append(nn.Conv2d(in_channels, out_channels, 1))
 69 | 
 70 |         if in_channels != out_channels:
 71 |             self.down = nn.Sequential(
 72 |                 nn.Conv2d(in_channels, out_channels, 1),
 73 |                 nn.BatchNorm2d(out_channels)
 74 |             )
 75 |         else:
 76 |             self.down = lambda x: x
 77 | 
 78 |         self.bn = nn.BatchNorm2d(out_channels)
 79 |         self.soft = nn.Softmax(-2)
 80 |         self.relu = nn.ReLU()
 81 | 
 82 |         for m in self.modules():
 83 |             if isinstance(m, nn.Conv2d):
 84 |                 conv_init(m)
 85 |             elif isinstance(m, nn.BatchNorm2d):
 86 |                 bn_init(m, 1)
 87 |         bn_init(self.bn, 1e-6)
 88 |         for i in range(self.num_subset):
 89 |             conv_branch_init(self.conv_d[i], self.num_subset)
 90 | 
 91 |     def forward(self, x):
 92 |         N, C, T, V = x.size()
 93 |         A = self.A
 94 |         if -1 != x.get_device():
 95 |             A = A.cuda(x.get_device())
 96 |         A = A + self.PA
 97 | 
 98 |         y = None
 99 |         for i in range(self.num_subset):
100 |             A1 = self.conv_a[i](x).permute(0, 3, 1, 2).contiguous().view(N, V, self.inter_c * T)
101 |             A2 = self.conv_b[i](x).view(N, self.inter_c * T, V)
102 |             A1 = self.soft(torch.matmul(A1, A2) / A1.size(-1))  # N V V
103 |             A1 = A1 + A[i]
104 |             A2 = x.view(N, C * T, V)
105 |             z = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V))
106 |             y = z + y if y is not None else z
107 | 
108 |         y = self.bn(y)
109 |         y += self.down(x)
110 |         return self.relu(y)
111 | 
112 | 
113 | class TCN_GCN_unit(nn.Module):
114 |     def __init__(self, in_channels, out_channels, A, stride=1, residual=True):
115 |         super(TCN_GCN_unit, self).__init__()
116 |         self.gcn1 = unit_gcn(in_channels, out_channels, A)
117 |         self.tcn1 = unit_tcn(out_channels, out_channels, stride=stride)
118 |         self.relu = nn.ReLU()
119 |         if not residual:
120 |             self.residual = lambda x: 0
121 | 
122 |         elif (in_channels == out_channels) and (stride == 1):
123 |             self.residual = lambda x: x
124 | 
125 |         else:
126 |             self.residual = unit_tcn(in_channels, out_channels, kernel_size=1, stride=stride)
127 | 
128 |     def forward(self, x):
129 |         x = self.tcn1(self.gcn1(x)) + self.residual(x)
130 |         return self.relu(x)
131 | 
132 | 
133 | class Model(nn.Module):
134 |     def __init__(self, num_class=60, num_point=25, num_person=2, graph=None, graph_args=dict(), in_channels=3):
135 |         super(Model, self).__init__()
136 | 
137 |         if graph is None:
138 |             raise ValueError()
139 |         else:
140 |             Graph = import_class(graph)
141 |             self.graph = Graph(**graph_args)
142 | 
143 |         A = self.graph.A
144 |         self.data_bn = nn.BatchNorm1d(num_person * in_channels * num_point)
145 | 
146 |         self.l1 = TCN_GCN_unit(3, 64, A, residual=False)
147 |         self.l2 = TCN_GCN_unit(64, 64, A)
148 |         self.l3 = TCN_GCN_unit(64, 64, A)
149 |         self.l4 = TCN_GCN_unit(64, 64, A)
150 |         self.l5 = TCN_GCN_unit(64, 128, A, stride=2)
151 |         self.l6 = TCN_GCN_unit(128, 128, A)
152 |         self.l7 = TCN_GCN_unit(128, 128, A)
153 |         self.l8 = TCN_GCN_unit(128, 256, A, stride=2)
154 |         self.l9 = TCN_GCN_unit(256, 256, A)
155 |         self.l10 = TCN_GCN_unit(256, 256, A)
156 | 
157 |         self.fc = nn.Linear(256, num_class)
158 |         nn.init.normal_(self.fc.weight, 0, math.sqrt(2. / num_class))
159 |         bn_init(self.data_bn, 1)
160 | 
161 |     def forward(self, x):
162 |         N, C, T, V, M = x.size()
163 | 
164 |         x = x.permute(0, 4, 3, 1, 2).contiguous().view(N, M * V * C, T)
165 |         x = self.data_bn(x)
166 |         x = x.view(N, M, V, C, T).permute(0, 1, 3, 4, 2).contiguous().view(N * M, C, T, V)
167 | 
168 |         x = self.l1(x)
169 |         x = self.l2(x)
170 |         x = self.l3(x)
171 |         x = self.l4(x)
172 |         x = self.l5(x)
173 |         x = self.l6(x)
174 |         x = self.l7(x)
175 |         x = self.l8(x)
176 |         x = self.l9(x)
177 |         x = self.l10(x)
178 | 
179 |         # N*M,C,T,V
180 |         c_new = x.size(1)
181 |         x = x.view(N, M, c_new, -1)
182 |         x = x.mean(3).mean(1)
183 | 
184 |         return self.fc(x)
185 | 


--------------------------------------------------------------------------------
/action_recognition/model/agcn_mult.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.autograd import Variable
  7 | 
  8 | 
  9 | def import_class(name):
 10 |     components = name.split('.')
 11 |     mod = __import__(components[0])
 12 |     for comp in components[1:]:
 13 |         mod = getattr(mod, comp)
 14 |     return mod
 15 | 
 16 | 
 17 | def conv_branch_init(conv, branches):
 18 |     weight = conv.weight
 19 |     n = weight.size(0)
 20 |     k1 = weight.size(1)
 21 |     k2 = weight.size(2)
 22 |     nn.init.normal_(weight, 0, math.sqrt(2. / (n * k1 * k2 * branches)))
 23 |     nn.init.constant_(conv.bias, 0)
 24 | 
 25 | 
 26 | def conv_init(conv):
 27 |     nn.init.kaiming_normal_(conv.weight, mode='fan_out')
 28 |     nn.init.constant_(conv.bias, 0)
 29 | 
 30 | 
 31 | def bn_init(bn, scale):
 32 |     nn.init.constant_(bn.weight, scale)
 33 |     nn.init.constant_(bn.bias, 0)
 34 | 
 35 | 
 36 | class unit_tcn(nn.Module):
 37 |     def __init__(self, in_channels, out_channels, kernel_size=9, stride=1):
 38 |         super(unit_tcn, self).__init__()
 39 |         pad = int((kernel_size - 1) / 2)
 40 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=(kernel_size, 1), padding=(pad, 0),
 41 |                               stride=(stride, 1))
 42 | 
 43 |         self.bn = nn.BatchNorm2d(out_channels)
 44 |         self.relu = nn.ReLU()
 45 |         conv_init(self.conv)
 46 |         bn_init(self.bn, 1)
 47 | 
 48 |     def forward(self, x):
 49 |         x = self.bn(self.conv(x))
 50 |         return x
 51 | 
 52 | 
 53 | class unit_gcn(nn.Module):
 54 |     def __init__(self, in_channels, out_channels, A, coff_embedding=4, num_subset=3):
 55 |         super(unit_gcn, self).__init__()
 56 |         inter_channels = out_channels // coff_embedding
 57 |         self.inter_c = inter_channels
 58 |         self.PA = nn.Parameter(torch.from_numpy(A.astype(np.float32)))
 59 |         nn.init.constant_(self.PA, 1e-6)
 60 |         self.A = Variable(torch.from_numpy(A.astype(np.float32)), requires_grad=False)
 61 |         self.num_subset = num_subset
 62 | 
 63 |         self.conv_a = nn.ModuleList()
 64 |         self.conv_b = nn.ModuleList()
 65 |         self.conv_d = nn.ModuleList()
 66 |         for i in range(self.num_subset):
 67 |             self.conv_a.append(nn.Conv2d(in_channels, inter_channels, 1))
 68 |             self.conv_b.append(nn.Conv2d(in_channels, inter_channels, 1))
 69 |             self.conv_d.append(nn.Conv2d(in_channels, out_channels, 1))
 70 | 
 71 |         if in_channels != out_channels:
 72 |             self.down = nn.Sequential(
 73 |                 nn.Conv2d(in_channels, out_channels, 1),
 74 |                 nn.BatchNorm2d(out_channels)
 75 |             )
 76 |         else:
 77 |             self.down = lambda x: x
 78 | 
 79 |         self.bn = nn.BatchNorm2d(out_channels)
 80 |         self.soft = nn.Softmax(-2)
 81 |         self.relu = nn.ReLU()
 82 | 
 83 |         for m in self.modules():
 84 |             if isinstance(m, nn.Conv2d):
 85 |                 conv_init(m)
 86 |             elif isinstance(m, nn.BatchNorm2d):
 87 |                 bn_init(m, 1)
 88 |         bn_init(self.bn, 1e-6)
 89 |         for i in range(self.num_subset):
 90 |             conv_branch_init(self.conv_d[i], self.num_subset)
 91 | 
 92 |     def forward(self, x):
 93 |         N, C, T, V = x.size()
 94 |         A = self.A.cuda(x.get_device())
 95 |         A = A + self.PA
 96 | 
 97 |         y = None
 98 |         for i in range(self.num_subset):
 99 |             A1 = self.conv_a[i](x).permute(0, 3, 1, 2).contiguous().view(N, V, self.inter_c * T)
100 |             A2 = self.conv_b[i](x).view(N, self.inter_c * T, V)
101 |             A1 = self.soft(torch.matmul(A1, A2) / A1.size(-1))  # N V V
102 |             A1 = A1 + A[i]
103 |             A2 = x.view(N, C * T, V)
104 |             z = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V))
105 |             y = z + y if y is not None else z
106 | 
107 |         y = self.bn(y)
108 |         y += self.down(x)
109 |         return self.relu(y)
110 | 
111 | 
112 | class TCN_GCN_unit(nn.Module):
113 |     def __init__(self, in_channels, out_channels, A, stride=1, residual=True):
114 |         super(TCN_GCN_unit, self).__init__()
115 |         self.gcn1 = unit_gcn(in_channels, out_channels, A)
116 |         self.tcn1 = unit_tcn(out_channels, out_channels, stride=stride)
117 |         self.relu = nn.ReLU()
118 |         if not residual:
119 |             self.residual = lambda x: 0
120 | 
121 |         elif (in_channels == out_channels) and (stride == 1):
122 |             self.residual = lambda x: x
123 | 
124 |         else:
125 |             self.residual = unit_tcn(in_channels, out_channels, kernel_size=1, stride=stride)
126 | 
127 |     def forward(self, x):
128 |         x = self.tcn1(self.gcn1(x)) + self.residual(x)
129 |         return self.relu(x)
130 | 
131 | 
132 | class Model(nn.Module):
133 |     def __init__(self, num_class=60, num_point=25, loss_type='softmax', num_person=2, graph=None, graph_args=dict(), in_channels=3):
134 |         super(Model, self).__init__()
135 | 
136 |         if graph is None:
137 |             raise ValueError()
138 |         else:
139 |             Graph = import_class(graph)
140 |             self.graph = Graph(**graph_args)
141 | 
142 |         A = self.graph.A
143 |         self.data_bn = nn.BatchNorm1d(num_person * in_channels * num_point)
144 | 
145 |         self.l1 = TCN_GCN_unit(3, 64, A, residual=False)
146 |         self.l2 = TCN_GCN_unit(64, 64, A)
147 |         self.l3 = TCN_GCN_unit(64, 64, A)
148 |         self.l4 = TCN_GCN_unit(64, 64, A)
149 |         self.l5 = TCN_GCN_unit(64, 128, A, stride=2)
150 |         self.l6 = TCN_GCN_unit(128, 128, A)
151 |         self.l7 = TCN_GCN_unit(128, 128, A)
152 |         self.l8 = TCN_GCN_unit(128, 256, A, stride=2)
153 |         self.l9 = TCN_GCN_unit(256, 256, A)
154 |         self.l10 = TCN_GCN_unit(256, 256, A)
155 | 
156 | 
157 |         self.fc = nn.Linear(256, num_class)
158 |         self.sig = nn.Sigmoid()
159 |         nn.init.normal_(self.fc.weight, 0, math.sqrt(2. / num_class))
160 |         if loss_type == 'sigmoid' or loss_type == 'focal' or loss_type=='focal2':
161 |             nn.init.constant(self.fc.bias, -np.log(num_class - 1))
162 |         # self.sof = nn.Softmax(-1)
163 |         bn_init(self.data_bn, 1)
164 | 
165 |     def forward(self, x):
166 |         N, C, T, V, M = x.size()
167 | 
168 |         x = x.permute(0, 4, 3, 1, 2).contiguous().view(N, M * V * C, T)
169 |         x = self.data_bn(x)
170 |         x = x.view(N, M, V, C, T).permute(0, 1, 3, 4, 2).contiguous().view(N * M, C, T, V)
171 | 
172 |         x = self.l1(x)
173 |         x = self.l2(x)
174 |         x = self.l3(x)
175 |         x = self.l4(x)
176 |         x = self.l5(x)
177 |         x = self.l6(x)
178 |         x = self.l7(x)
179 |         x = self.l8(x)
180 |         x = self.l9(x)
181 |         x = self.l10(x)
182 | 
183 |         # N*M,C,T,V
184 |         c_new = x.size(1)
185 |         x = x.view(N, M, c_new, -1)
186 |         x = x.mean(3).mean(1)
187 | 
188 |         x = self.fc(x)
189 |         # import pdb
190 |         # pdb.set_trace()
191 |         x = self.sig(x)
192 |         return x
193 | 


--------------------------------------------------------------------------------
/action_recognition/Readme.md:
--------------------------------------------------------------------------------
  1 | ## Action Recognition
  2 | 
  3 | We follow the 3D skeleton-based action recognition setup and [implementation](https://github.com/lshiwjx/2s-AGCN) from Shi et al. [2] 
  4 | 
  5 | ### Task
  6 | 
  7 | **Sample** `(n_frames, feat_dim)`: Each action segment (start-end span) from BABEL is divided into contiguous 5-second chunks. See the [paper](https://arxiv.org/pdf/2106.09696.pdf) for more details. 
  8 | **Label** `<int>`: Index of the ground-truth action label of the segment that the current chunk belongs to. 
  9 | 
 10 | 
 11 | ### Features 
 12 | 
 13 | We extract the joint positions (in `x, y, z` co-ordinates) from the AMASS mocap sequences in NTU RGB+D [1] skeleton format. There are 25 joints, resulting in `feat_dim=25*3=75`. 
 14 | 
 15 | Each sample is a 5-second chunk @ 30fps, resulting in `n_frames=150`. 
 16 | 
 17 | Pre-preprocessing of the skeleton joints follows Shi et al. [2]. Download the pre-processed sample features and corresponding labels: 
 18 | 
 19 | ```
 20 | # BABEL Dense
 21 | cd data/
 22 | wget https://human-movement.is.tue.mpg.de/babel_feats_labels.tar.gz
 23 | tar -xzvf babel_feats_labels.tar.gz -C ./
 24 | 
 25 | # BABEL Dense+Extra
 26 | wget https://human-movement.is.tue.mpg.de/babel_dense_and_extra_feats_labels.tar.gz
 27 | tar -xzvf babel_dense_and_extra_feats_labels.tar.gz -C ./
 28 | ```
 29 | 
 30 | Note: We only train and test with Dense annotations. For details regarding Dense and Extra annotations, please see BABEL's [Data page](https://babel.is.tue.mpg.de/data.html).
 31 | 
 32 | 
 33 | ### Training and Inference 
 34 | 
 35 | Set up and activate a virtual environment:
 36 | 
 37 | ```
 38 | python3 -m venv babel-env
 39 | source $PWD/babel-env/bin/activate
 40 | $PWD/babel-env/bin/pip install --upgrade pip setuptools
 41 | $PWD/babel-env/bin/pip install -r requirements.txt
 42 | ```
 43 | 
 44 | #### Model 
 45 | 
 46 | We use [this](https://github.com/lshiwjx/2s-AGCN) implementation for the 2S-AGCN [2] model for 3D skeleton-based action recognition. Note that we use only the Joint-stream alone. 
 47 | 
 48 | 
 49 | #### Training
 50 | 
 51 | To train a model with CE loss:
 52 | 
 53 | From the top directory `babel/`, enter the following to train a model with the Cross-Entropy loss:
 54 | 
 55 | ```python action_recognition/train_test.py --config action_recognition/config/babel_v1.0/train_60.yaml```
 56 | 
 57 | To train a model with Focal loss [3] with class-balancing [4]: 
 58 | 
 59 | ```python action_recognition/train_test.py --config action_recognition/config/babel_v1.0/train_60_wfl.yaml```
 60 | 
 61 | You can use the repsective configuration files inside `config/babel_v1.0` to train the model with `120` classes in both ways.
 62 | 
 63 | 
 64 | #### Inference 
 65 | 
 66 | Provide the path to the trained model in the `weights` key in the respective config file. 
 67 | 
 68 | To perform inference, use the same command as when training, and pass the test config file as argument. E.g.:
 69 | 
 70 | ```python action_recognition/main.py --config action_recognition/config/babel_v1.0/test_60.yaml```
 71 | 
 72 | or
 73 | 
 74 | ```python action_recognition/main_wl.py --config action_recognition/config/babel_v1.0/test_60_wfl.yaml```
 75 | 
 76 | To save the predicted scores to disk, in the config file, set `save_score: True`. 
 77 | 
 78 | ### Pre-trained models 
 79 | 
 80 | Download the checkpoints from the links below and place them in `action_recognition/ckpts/`. 
 81 | 
 82 | Performing inference on the validation set should result in the following performance. 
 83 | 
 84 | | \# Classes | Loss type  | Ckpt  | Top-5 | Top-1 | Top-1-norm | 
 85 | |---|---|---|---|---|--|
 86 | | BABEL-60 | CE | [ntu_sk_60_agcn_joint_const_lr_1e-3-17-6390.pt](https://human-movement.is.tue.mpg.de/release/ckpts/ntu_sk_60_agcn_joint_const_lr_1e-3-17-6390.pt) | 0.74 | 0.42 | 0.24 | 
 87 | | BABEL-60 | Focal | [wfl_ntu_sk_60_agcn_joint_const_lr_1e-3-93-33370.pt](https://human-movement.is.tue.mpg.de/release/ckpts/wfl_ntu_sk_60_agcn_joint_const_lr_1e-3-93-33370.pt) | 0.69 | 0.34 | 0.30 | 
 88 | | BABEL-120 | CE | [ntu_sk_120_agcn_joint_const_lr_1e-3-15-12240.pt](https://human-movement.is.tue.mpg.de/release/ckpts/ntu_sk_120_agcn_joint_const_lr_1e-3-15-12240.pt) | 0.72 | 0.4 | 0.16 | 
 89 | | BABEL-120 | Focal | [wfl_ntu_sk_120_agcn_joint_const_lr_1e-3-157-60356.pt](https://human-movement.is.tue.mpg.de/release/ckpts/wfl_ntu_sk_120_agcn_joint_const_lr_1e-3-157-60356.pt) | 0.59 | 0.29 | 0.23 |
 90 | 
 91 | **Note:** The models are *only* trained with dense labels from `train.json` (See [project webpage](https://babel.is.tue.mpg.de/data.html) for more details about the data). 
 92 | 
 93 | 
 94 | ### Metrics 
 95 | 
 96 | **Description**
 97 | 
 98 | 1. **Top-1** measures the accuracy of the highest-scoring prediction. 
 99 | 2. **Top-5** evaluates whether the ground-truth category is present among the top 5 highest-scoring predictions. 
100 |     1. It accounts for labeling noise and inherent label ambiguity. 
101 |     2. It also accounts for the possible association of multiple action categories with a single input movement sequence. For instance, a person `walking in a circle` is mapped to the two action categories `walk` and `circular movement`. 
102 |     Ideal models will predict high scores for all the categories relevant to the movement  sample. 
103 | 3. **Top-1-norm** is the mean `Top-1` across categories. The magnitude of `Top-1-norm` - `Top-1` illustrates the class-specific bias in the model performance. In Babel, it reflects the impact of class imbalance on learning. 
104 | 
105 | 
106 | ### Challenge 
107 | 
108 | To make a submission: 
109 | 
110 | 1. Store the predictions (variable `pred_scores` in [L591](https://github.com/abhinanda-punnakkal/BABEL/blob/6454163e196fc6400e1b8232dffb651341ed7c14/action_recognition/train_test.py#L591) of `train_test.py`) as a python pickle. 
111 |     - `pred_scores` is list of tuples, each containing the following 4 elements — (sequence ID, segment ID, chunk ID, score). Here score is an `np.array` of size `(N, C)` where `N` is # samples in the test set and `C` is the # classes. 
112 |     - By default, `train_test.py` stores this pickle file as `<work_dir>/epoch1_test_score.pkl` (see [L604](https://github.com/abhinanda-punnakkal/BABEL/blob/6454163e196fc6400e1b8232dffb651341ed7c14/action_recognition/train_test.py#L606)). 
113 | 2. In the command line, type the following commands: 
114 |     1. `cd action_recognition/challenge/`
115 |     2. `python create_submission.py --pred_path <work_dir>/epoch1_test_score.pkl --sub_path <path on disk to write submission file>`
116 |     - Note: This code assumes that the GT test samples (`test_label_{60, 120}.pkl`) are present in the following path: `action_recognition/data/release/`
117 | 3. Submit the `.npz`  submission file to the BABEL Action Recognition Challenge [evaluation server](https://babel-evaluation.is.tuebingen.mpg.de/). 
118 | 
119 | 
120 | ### References 
121 | 
122 | [1] Shahroudy, Amir, et al. "NTU RGB+D: A large scale dataset for 3d human activity analysis." Proceedings of the IEEE conference on computer vision and pattern recognition. 2016. <br>
123 | [2] Shi, Lei, et al. "Two-stream adaptive graph convolutional networks for skeleton-based action recognition." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2019. <br>
124 | [3] Lin, Tsung-Yi, et al. "Focal loss for dense object detection." Proceedings of the IEEE international conference on computer vision. 2017. <br>
125 | [4] Cui, Yin, et al. "Class-balanced loss based on effective number of samples." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2019. <br>
126 | 


--------------------------------------------------------------------------------
/action_recognition/data_gen/dutils.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # vim:fenc=utf-8
  4 | #
  5 | # Copyright © 2021 achandrasekaran <arjun.chandrasekaran@tuebingen.mpg.de>
  6 | #
  7 | # Distributed under terms of the MIT license.
  8 | 
  9 | import sys, os, pdb
 10 | import os.path as osp
 11 | from os.path import join as ospj
 12 | from os.path import basename as ospb
 13 | from os.path import dirname as ospd
 14 | 
 15 | import numpy as np
 16 | import torch
 17 | 
 18 | import json, pickle, csv
 19 | from collections import Counter
 20 | from tqdm import tqdm
 21 | 
 22 | from smplx import SMPLH
 23 | 
 24 | import viz
 25 | 
 26 | 
 27 | def read_json(json_filename):
 28 | 	'''Return contents of JSON file'''
 29 | 	jc = None
 30 | 	with open(json_filename) as infile:
 31 | 		jc = json.load(infile)
 32 | 	return jc
 33 | 
 34 | def read_pkl(pkl_filename):
 35 | 	'''Return contents of pikcle file'''
 36 | 	pklc = None
 37 | 	with open(pkl_filename, 'rb') as infile:
 38 | 		pklc = pickle.load(infile)
 39 | 	return pklc
 40 | 
 41 | def write_json(contents, filename):
 42 | 	with open(filename, 'w') as outfile:
 43 | 		json.dump(contents, outfile, indent=2)
 44 | 
 45 | def write_pkl(contents, filename):
 46 | 	with open(filename, 'wb') as outfile:
 47 | 		pickle.dump(contents, outfile)
 48 | 
 49 | def smpl_to_nturgbd(model_type='smplh', out_format='nturgbd'):
 50 | 	''' Borrowed from https://gitlab.tuebingen.mpg.de/apunnakkal/2s_agcn/-/blob/master/data_gen/smpl_data_utils.py
 51 | 	NTU mapping
 52 | 	-----------
 53 | 	0 --> ?
 54 | 	1-base of the spine
 55 | 	2-middle of the spine
 56 | 	3-neck
 57 | 	4-head
 58 | 	5-left shoulder
 59 | 	6-left elbow
 60 | 	7-left wrist
 61 | 	8-left hand
 62 | 	9-right shoulder
 63 | 	10-right elbow
 64 | 	11-right wrist
 65 | 	12-right hand
 66 | 	13-left hip
 67 | 	14-left knee
 68 | 	15-left ankle
 69 | 	16-left foot
 70 | 	17-right hip
 71 | 	18-right knee
 72 | 	19-right ankle
 73 | 	20-right foot
 74 | 	21-spine
 75 | 	22-tip of the left hand
 76 | 	23-left thumb
 77 | 	24-tip of the right hand
 78 | 	25-right thumb
 79 | 
 80 | 	:param model_type:
 81 | 	:param out_format:
 82 | 	:return:
 83 | 	'''
 84 | 	if model_type == 'smplh' and out_format == 'nturgbd':
 85 | 		'22 and 37 are approximation for hand (base of index finger)'
 86 | 		return np.array([0, 3, 12, 15,
 87 | 						 16, 18, 20, 22,		#left hand
 88 | 						 17, 19, 21, 37,		   # right hand
 89 | 						 1, 4, 7, 10,			#left leg
 90 | 						 2, 5, 8, 11,			#right hand
 91 | 						 9,
 92 | 						 63, 64 , 68, 69
 93 | 						 ],
 94 | 						dtype=np.int32)
 95 | 
 96 | class dotdict(dict):
 97 | 	"""dot.notation access to dictionary attributes"""
 98 | 	__getattr__ = dict.get
 99 | 	__setattr__ = dict.__setitem__
100 | 	__delattr__ = dict.__delitem__
101 | 
102 | def store_counts(label_fp):
103 | 	"""Compute # samples per class, from stored labels
104 | 
105 | 	Args:
106 | 		label_fp <str>: Path to label file
107 | 
108 | 	Writes (to same path as label file):
109 | 		out_fp <dict>: # samples per class = {<idx>: <count>, ...}
110 | 	"""
111 | 	Y_tup = read_pkl(label_fp)
112 | 	Y_idxs = Y_tup[1][0]
113 | 	print('# Samples in set = ', len(Y_idxs))
114 | 
115 | 	label_count = Counter(Y_idxs)
116 | 	print('File ', label_fp, 'len',len(label_count))
117 | 
118 | 	out_fp = label_fp.replace('.pkl', '_count.pkl')
119 | 	write_pkl(label_count, out_fp)
120 | 
121 | def load_babel_dataset(d_folder='../../data/babel_v1.0_release'):
122 | 	'''Load the BABEL dataset'''
123 | 	# Data folder
124 | 	l_babel_dense_files = ['train', 'val', 'test']
125 | 	l_babel_extra_files = ['extra_train', 'extra_val']
126 | 
127 | 	# BABEL Dataset
128 | 	babel = {}
129 | 	for fn in l_babel_dense_files + l_babel_extra_files:
130 | 		babel[fn] = json.load(open(ospj(d_folder, fn+'.json')))
131 | 
132 | 	return babel
133 | 
134 | def store_seq_fps(amass_p):
135 | 	'''Get fps for each seq. in BABEL
136 | 	Arguments:
137 | 	---------
138 | 		amass_p <str>: Path where you download AMASS to.
139 | 	Save:
140 | 	-----
141 | 		featp_2_fps.json <dict>: Key: feat path <str>, value: orig. fps
142 | 	in AMASS <float>. E.g.,: {'KIT/KIT/4/RightTurn01_poses.npz': 100.0, ...}
143 | 	'''
144 | 	# Get BABEL dataset
145 | 	babel = load_babel_dataset()
146 | 
147 | 	# Loop over each BABEL seq, store frame-rate
148 | 	ft_p_2_fps = {}
149 | 	for fn in babel:
150 | 		for sid in tqdm(babel[fn]):
151 | 			ann = babel[fn][sid]
152 | 			if ann['feat_p'] not in ft_p_2_fps:
153 | 				fps = np.load(ospj(amass_p, ann['feat_p']))['mocap_framerate']
154 | 				ft_p_2_fps[ann['feat_p']] = float(fps)
155 | 	dest_fp = '../data/featp_2_fps.json'
156 | 	write_json(ft_p_2_fps, dest_fp)
157 | 	return None
158 | 
159 | def store_ntu_jpos(smplh_model_p, dest_jpos_p, amass_p):
160 | 	'''Store joint positions of kfor NTU-RGBD skeleton
161 | 	'''
162 | 	# Model to forward-pass through, to store joint positions
163 | 	smplh = SMPLH(smplh_model_p, create_transl=False, ext='pkl',
164 | 							gender='male', use_pca=False, batch_size=1)
165 | 
166 | 	# Load paths to all BABEL features
167 | 	featp_2_fps = read_json('../data/featp_2_fps.json')
168 | 
169 | 	# Loop over all BABEL data, verify that joint positions are stored on disk
170 | 	l_m_ft_p = []
171 | 	for ft_p in featp_2_fps:
172 | 
173 | 		# Get the correct dataset folder name
174 | 		ddir_n = ospb(ospd(ospd(ft_p)))
175 | 		ddir_map = {'BioMotionLab_NTroje': 'BMLrub', 'DFaust_67': 'DFaust'}
176 | 		ddir_n = ddir_map[ddir_n] if ddir_n in ddir_map else ddir_n
177 | 		# Get the subject folder name
178 | 		sub_fol_n = ospb(ospd(ft_p))
179 | 
180 | 		# Sanity check
181 | 		fft_p = ospj(dest_jpos_p, ddir_n, sub_fol_n, ospb(ft_p))
182 | 		if not os.path.exists(fft_p):
183 | 			l_m_ft_p.append((ft_p, fft_p))
184 | 	print('Total # missing NTU RGBD skeleton features = ', len(l_m_ft_p))
185 | 
186 | 	# Loop over missing joint positions and store them on disk
187 | 	for i, (ft_p, ntu_jpos_p) in enumerate(tqdm(l_m_ft_p)):
188 | 		jrot_smplh = np.load(ospj(amass_p, ft_p))['poses']
189 | 		# Break joints down into body parts
190 | 		smpl_body_jrot = jrot_smplh[:, 3:66]
191 | 		left_hand_jrot = jrot_smplh[:, 66:111]
192 | 		right_hand_jrot = jrot_smplh[:, 111:]
193 | 		root_orient = jrot_smplh[:, 0:3].reshape(-1, 3)
194 | 
195 | 		# Forward through model to get a superset of required joints
196 | 		T = jrot_smplh.shape[0]
197 | 		ntu_jpos = np.zeros((T, 219))
198 | 		for t in range(T):
199 | 			res = smplh(body_pose=torch.Tensor(smpl_body_jrot[t:t+1, :]),
200 | 						global_orient=torch.Tensor(root_orient[t: t+1, :]),
201 | 						left_hand_pose = torch.Tensor(left_hand_jrot[t: t+1, :]),
202 | 						right_hand_pose=torch.Tensor(right_hand_jrot[t: t+1, :]),
203 | 						# transl=torch.Tensor(transl)
204 | 						)
205 | 			jpos = res.joints.detach().cpu().numpy()[:, :, :].reshape(-1)
206 | 			ntu_jpos[t, :] = jpos
207 | 
208 | 		# Save to disk
209 | 		if not os.path.exists(ospd(ntu_jpos_p)):
210 | 			os.makedirs(ospd(ntu_jpos_p))
211 | 		np.savez(ntu_jpos_p, joint_pos=ntu_jpos, allow_pickle=True)
212 | 
213 | 	return
214 | 
215 | def viz_ntu_jpos(jpos_p, l_ft_p):
216 | 	'''Visualize sequences of NTU-skeleton joint positions'''
217 | 	# Load paths to all BABEL features
218 | 	featp_2_fps = read_json('../data/featp_2_fps.json')
219 | 	# Indices that are in the NTU RGBD skeleton
220 | 	smpl2nturgbd = smpl_to_nturgbd()
221 | 	# Iterate over each
222 | 	for ft_p in l_ft_p:
223 | 		x = np.load(ospj(jpos_p, ft_p))['joint_pos']
224 | 		T, ft_sz = x.shape
225 | 		x = x.reshape(T, ft_sz//3, 3)
226 | 		# print('Data shape = {0}'.format(x.shape))
227 | 		x = x[:, smpl2nturgbd, :]
228 | 		# print('Data shape = {0}'.format(x.shape))
229 | 		# x = x[:,:,:, 0].transpose(1, 2, 0)	# (3, 150, 22, 1) --> (150, 22, 3)
230 | 		print('Data shape = {0}'.format(x.shape))
231 | 		viz.viz_seq(seq=x, folder_p='test_viz/test_ntu_w_axis', sk_type='nturgbd', debug=True)
232 | 		print('-'*50)
233 | 
234 | 
235 | def main():
236 | 	'''Store preliminary stuff'''
237 | 	amass_p= '/ps/project/conditional_action_gen/data/AMASS_March2021/'
238 | 
239 | 	# Save feature paths --> fps (released in babel/action_recognition/data/)
240 | 	# store_seq_fps(amass_p)
241 | 
242 | 	# Save joint positions in NTU-RGBD skeleton format
243 | 	smplh_model_p = '/ps/project/conditional_action_gen/body_models/mano_v1_2/models_cleaned_merged/SMPLH_male.pkl'
244 | 	jpos_p = '/ps/project/conditional_action_gen/amass/babel_joint_pos'
245 | 	# store_ntu_jpos(smplh_model_p, jpos_p, amass_p)
246 | 
247 | 	#  Viz. saved seqs.
248 | 	# l_ft_p = ['KIT/917/Experiment3a_09_poses.npz']
249 | 	# viz_ntu_jpos(jpos_p, l_ft_p)
250 | 
251 | if __name__ == '__main__':
252 | 	main()
253 | 
254 | 


--------------------------------------------------------------------------------
/action_recognition/feeders/feeder.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 |   
  3 | # -*- coding: utf-8 -*-
  4 | #
  5 | # Adapted from https://github.com/lshiwjx/2s-AGCN for BABEL (https://babel.is.tue.mpg.de/)
  6 | 
  7 | import numpy as np
  8 | import pickle
  9 | import torch
 10 | from torch.utils.data import Dataset
 11 | import sys
 12 | import pdb
 13 | 
 14 | sys.path.extend(['../'])
 15 | from feeders import tools
 16 | 
 17 | 
 18 | class Feeder(Dataset):
 19 |     def __init__(self, data_path, label_path,
 20 |                  random_choose=False, random_shift=False, random_move=False,
 21 |                  window_size=-1, normalization=False, debug=False, use_mmap=True):
 22 |         """
 23 | 
 24 |         :param data_path:
 25 |         :param label_path:
 26 |         :param random_choose: If true, randomly choose a portion of the input sequence
 27 |         :param random_shift: If true, randomly pad zeros at the begining or end of sequence
 28 |         :param random_move:
 29 |         :param window_size: The length of the output sequence
 30 |         :param normalization: If true, normalize input sequence
 31 |         :param debug: If true, only use the first 100 samples
 32 |         :param use_mmap: If true, use mmap mode to load data, which can save the running memory
 33 |         """
 34 | 
 35 |         self.debug = debug
 36 |         self.data_path = data_path
 37 |         self.label_path = label_path
 38 |         self.random_choose = random_choose
 39 |         self.random_shift = random_shift
 40 |         self.random_move = random_move
 41 |         self.window_size = window_size
 42 |         self.normalization = normalization
 43 |         self.use_mmap = use_mmap
 44 |         self.load_data()
 45 |         if normalization:
 46 |             self.get_mean_map()
 47 | 
 48 |     def load_data(self):
 49 |         # data: N C V T M
 50 |         try:
 51 |             with open(self.label_path) as f:
 52 |                 self.sample_name, self.label = pickle.load(f)
 53 |         except:
 54 |             # for pickle file from python2
 55 |             with open(self.label_path, 'rb') as f:
 56 |                 self.sample_name, self.label = pickle.load(f, encoding='latin1')
 57 | 
 58 |         # load data
 59 |         if self.use_mmap:
 60 |             self.data = np.load(self.data_path, mmap_mode='r')
 61 |         else:
 62 |             self.data = np.load(self.data_path)
 63 |         if self.debug:
 64 |             self.label = self.label[0:1000]
 65 |             self.data = self.data[0:1000]
 66 |             self.sample_name = self.sample_name[0:1000]
 67 | 
 68 | 
 69 |     def get_mean_map(self):
 70 |         data = self.data
 71 |         N, C, T, V, M = data.shape
 72 |         self.mean_map = data.mean(axis=2, keepdims=True).mean(axis=4, keepdims=True).mean(axis=0)
 73 |         self.std_map = data.transpose((0, 2, 4, 1, 3)).reshape((N * T * M, C * V)).std(axis=0).reshape((C, 1, V, 1))
 74 | 
 75 |     def __len__(self):
 76 |         return len(self.sample_name)
 77 | 
 78 |     def __iter__(self):
 79 |         return self
 80 | 
 81 |     def __getitem__(self, index):
 82 |         data_numpy = self.data[index]
 83 |         data_numpy = np.array(data_numpy)
 84 | 
 85 |         seg_id = self.sample_name[index]
 86 |         label = self.label[0][index]
 87 |         sid = self.label[1][index]
 88 |         chunk_n = self.label[2][index]
 89 |         anntr_id = self.label[3][index]
 90 | 
 91 |         if self.normalization:
 92 |             data_numpy = (data_numpy - self.mean_map) / self.std_map
 93 |         if self.random_shift:
 94 |             data_numpy = tools.random_shift(data_numpy)
 95 |         if self.random_choose:
 96 |             data_numpy = tools.random_choose(data_numpy, self.window_size)
 97 |         elif self.window_size > 0:
 98 |             data_numpy = tools.auto_pading(data_numpy, self.window_size)
 99 |         if self.random_move:
100 |             data_numpy = tools.random_move(data_numpy)
101 | 
102 |         return data_numpy, label, sid, seg_id, chunk_n, anntr_id, index
103 | 
104 |     def top_k(self, score, top_k):
105 |         rank = score.argsort()
106 |         hit_top_k = [l in rank[i, -top_k:] for i, l in enumerate(self.label[0])]
107 |         return sum(hit_top_k) * 1.0 / len(hit_top_k)
108 | 
109 | 
110 | def import_class(name):
111 |     components = name.split('.')
112 |     mod = __import__(components[0])
113 |     for comp in components[1:]:
114 |         mod = getattr(mod, comp)
115 |     return mod
116 | 
117 | 
118 | def test(data_path, label_path, vid=None, graph=None, is_3d=False):
119 |     '''
120 |     vis the samples using matplotlib
121 |     :param data_path:
122 |     :param label_path:
123 |     :param vid: the id of sample
124 |     :param graph:
125 |     :param is_3d: when vis NTU, set it True
126 |     :return:
127 |     '''
128 |     import matplotlib.pyplot as plt
129 |     loader = torch.utils.data.DataLoader(
130 |         dataset=Feeder(data_path, label_path),
131 |         batch_size=64,
132 |         shuffle=False,
133 |         num_workers=2)
134 | 
135 |     if vid is not None:
136 |         sample_name = loader.dataset.sample_name
137 |         sample_id = [name.split('.')[0] for name in sample_name]
138 |         index = sample_id.index(vid)
139 |         data, label, index = loader.dataset[index]
140 |         data = data.reshape((1,) + data.shape)
141 | 
142 |         # for batch_idx, (data, label) in enumerate(loader):
143 |         N, C, T, V, M = data.shape
144 | 
145 |         plt.ion()
146 |         fig = plt.figure()
147 |         if is_3d:
148 |             from mpl_toolkits.mplot3d import Axes3D
149 |             ax = fig.add_subplot(111, projection='3d')
150 |         else:
151 |             ax = fig.add_subplot(111)
152 | 
153 |         if graph is None:
154 |             p_type = ['b.', 'g.', 'r.', 'c.', 'm.', 'y.', 'k.', 'k.', 'k.', 'k.']
155 |             pose = [
156 |                 ax.plot(np.zeros(V), np.zeros(V), p_type[m])[0] for m in range(M)
157 |             ]
158 |             ax.axis([-1, 1, -1, 1])
159 |             for t in range(T):
160 |                 for m in range(M):
161 |                     pose[m].set_xdata(data[0, 0, t, :, m])
162 |                     pose[m].set_ydata(data[0, 1, t, :, m])
163 |                 fig.canvas.draw()
164 |                 plt.pause(0.001)
165 |         else:
166 |             p_type = ['b-', 'g-', 'r-', 'c-', 'm-', 'y-', 'k-', 'k-', 'k-', 'k-']
167 |             import sys
168 |             from os import path
169 |             sys.path.append(
170 |                 path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))
171 |             G = import_class(graph)()
172 |             edge = G.inward
173 |             pose = []
174 |             for m in range(M):
175 |                 a = []
176 |                 for i in range(len(edge)):
177 |                     if is_3d:
178 |                         a.append(ax.plot(np.zeros(3), np.zeros(3), p_type[m])[0])
179 |                     else:
180 |                         a.append(ax.plot(np.zeros(2), np.zeros(2), p_type[m])[0])
181 |                 pose.append(a)
182 |             ax.axis([-1, 1, -1, 1])
183 |             if is_3d:
184 |                 ax.set_zlim3d(-1, 1)
185 |             for t in range(T):
186 |                 for m in range(M):
187 |                     for i, (v1, v2) in enumerate(edge):
188 |                         x1 = data[0, :2, t, v1, m]
189 |                         x2 = data[0, :2, t, v2, m]
190 |                         if (x1.sum() != 0 and x2.sum() != 0) or v1 == 1 or v2 == 1:
191 |                             pose[m][i].set_xdata(data[0, 0, t, [v1, v2], m])
192 |                             pose[m][i].set_ydata(data[0, 1, t, [v1, v2], m])
193 |                             if is_3d:
194 |                                 pose[m][i].set_3d_properties(data[0, 2, t, [v1, v2], m])
195 |                 fig.canvas.draw()
196 |                 # plt.savefig('/home/lshi/Desktop/skeleton_sequence/' + str(t) + '.jpg')
197 |                 plt.pause(0.01)
198 | 
199 | 
200 | if __name__ == '__main__':
201 |     import os
202 | 
203 |     os.environ['DISPLAY'] = 'localhost:10.0'
204 |     data_path = "../data/ntu/xview/val_data_joint.npy"
205 |     label_path = "../data/ntu/xview/val_label.pkl"
206 |     graph = 'graph.ntu_rgb_d.Graph'
207 |     test(data_path, label_path, vid='S004C001P003R001A032', graph=graph, is_3d=True)
208 |     # data_path = "../data/kinetics/val_data.npy"
209 |     # label_path = "../data/kinetics/val_label.pkl"
210 |     # graph = 'graph.Kinetics'
211 |     # test(data_path, label_path, vid='UOD7oll3Kqo', graph=graph)
212 | 


--------------------------------------------------------------------------------
/notebooks/BABEL_explore.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Explore BABEL\n",
  8 |     "\n",
  9 |     "We present some code to explore BABEL by computing stats., and searching for specific actions."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "# Preparing the environment\n",
 19 |     "%load_ext autoreload\n",
 20 |     "%autoreload 2\n",
 21 |     "%matplotlib notebook\n",
 22 |     "%matplotlib inline\n",
 23 |     "\n",
 24 |     "import sys, os, pdb\n",
 25 |     "from os.path import join as ospj\n",
 26 |     "import json\n",
 27 |     "from collections import *\n",
 28 |     "\n",
 29 |     "import numpy as np\n",
 30 |     "import pandas as pd\n",
 31 |     "from pandas.core.common import flatten\n",
 32 |     "\n",
 33 |     "import pprint\n",
 34 |     "pp = pprint.PrettyPrinter()"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### Load BABEL \n",
 42 |     "Note that we are not loading the test set "
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 2,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "d_folder = '../data/babel_v1.0_release' # Data folder\n",
 52 |     "l_babel_dense_files = ['train', 'val']  \n",
 53 |     "l_babel_extra_files = ['extra_train', 'extra_val']\n",
 54 |     "\n",
 55 |     "# BABEL Dataset \n",
 56 |     "babel = {}\n",
 57 |     "for file in l_babel_dense_files:\n",
 58 |     "    babel[file] = json.load(open(ospj(d_folder, file+'.json')))\n",
 59 |     "    \n",
 60 |     "for file in l_babel_extra_files:\n",
 61 |     "    babel[file] = json.load(open(ospj(d_folder, file+'.json'))) "
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "### Duration of mocap for which BABEL action labels are available"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 3,
 74 |    "metadata": {},
 75 |    "outputs": [
 76 |     {
 77 |      "name": "stdout",
 78 |      "output_type": "stream",
 79 |      "text": [
 80 |       "Total duration = 30.0 hours 2.0 min. 32 sec.\n",
 81 |       "Total # seqs. =  8808\n",
 82 |       "------------------------------\n",
 83 |       "Total duration = 34.0 hours 43.0 min. 39 sec.\n",
 84 |       "Total # seqs. =  10576\n",
 85 |       "------------------------------\n"
 86 |      ]
 87 |     }
 88 |    ],
 89 |    "source": [
 90 |     "for babel_set in [l_babel_dense_files, l_babel_dense_files+l_babel_extra_files]:\n",
 91 |     "    dur = 0.0\n",
 92 |     "    list_sids = []    \n",
 93 |     "    for spl in babel_set:\n",
 94 |     "        for sid in babel[spl]:\n",
 95 |     "            if sid not in list_sids:\n",
 96 |     "                list_sids.append(sid)\n",
 97 |     "                dur += babel[spl][sid]['dur'] \n",
 98 |     "                \n",
 99 |     "    # Duration of each set\n",
100 |     "    minutes = dur//60\n",
101 |     "    print('Total duration = {0} hours {1} min. {2:.0f} sec.'.format(\n",
102 |     "                                            minutes//60, minutes%60, dur%60))\n",
103 |     "    print('Total # seqs. = ', len(list_sids))\n",
104 |     "    print('-'*30)"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "### Search BABEL for action"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 4,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "def get_cats(ann, file):\n",
121 |     "    # Get sequence labels and frame labels if they exist\n",
122 |     "    seq_l, frame_l = [], []\n",
123 |     "    if 'extra' not in file:\n",
124 |     "        if ann['seq_ann'] is not None:\n",
125 |     "            seq_l = flatten([seg['act_cat'] for seg in ann['seq_ann']['labels']])\n",
126 |     "        if ann['frame_ann'] is not None:\n",
127 |     "            frame_l = flatten([seg['act_cat'] for seg in ann['frame_ann']['labels']])\n",
128 |     "    else:\n",
129 |     "        # Load all labels from (possibly) multiple annotators\n",
130 |     "        if ann['seq_anns'] is not None:\n",
131 |     "            seq_l = flatten([seg['act_cat'] for seq_ann in ann['seq_anns'] for seg in seq_ann['labels']])\n",
132 |     "        if ann['frame_anns'] is not None:            \n",
133 |     "            frame_l = flatten([seg['act_cat'] for frame_ann in ann['frame_anns'] for seg in frame_ann['labels']])\n",
134 |     "            \n",
135 |     "    return list(seq_l), list(frame_l)"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 5,
141 |    "metadata": {},
142 |    "outputs": [
143 |     {
144 |      "name": "stdout",
145 |      "output_type": "stream",
146 |      "text": [
147 |       "# Seqs. containing action jump = 746\n",
148 |       "# Segments containing action jump = 1597\n"
149 |      ]
150 |     }
151 |    ],
152 |    "source": [
153 |     "action = 'jump'\n",
154 |     "act_anns = defaultdict(list) # { seq_id_1: [ann_1_1, ann_1_2], seq_id_2: [ann_2_1], ...} \n",
155 |     "n_act_spans = 0\n",
156 |     "\n",
157 |     "for spl in babel:\n",
158 |     "    for sid in babel[spl]:\n",
159 |     "        \n",
160 |     "        seq_l, frame_l = get_cats(babel[spl][sid], spl)\n",
161 |     "        # print(seq_l + frame_l)\n",
162 |     "        \n",
163 |     "        if action in seq_l + frame_l:\n",
164 |     "            \n",
165 |     "            # Store all relevant mocap sequence annotations\n",
166 |     "            act_anns[sid].append(babel[spl][sid])\n",
167 |     "            \n",
168 |     "            # # Individual spans of the action in the sequence\n",
169 |     "            n_act_spans += Counter(seq_l+frame_l)[action]\n",
170 |     "            \n",
171 |     "print('# Seqs. containing action {0} = {1}'.format(action, len(act_anns)))\n",
172 |     "print('# Segments containing action {0} = {1}'.format(action, n_act_spans))"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 6,
178 |    "metadata": {},
179 |    "outputs": [
180 |     {
181 |      "name": "stdout",
182 |      "output_type": "stream",
183 |      "text": [
184 |       "[{'babel_sid': 7692,\n",
185 |       "  'dur': 3.83,\n",
186 |       "  'feat_p': 'CMU/CMU/141/141_05_poses.npz',\n",
187 |       "  'frame_ann': {'anntr_id': 'eab5b72f-7399-43a7-a752-e4ee2807faaf',\n",
188 |       "                'babel_lid': '59ad905d-f378-4d2b-90a7-4e3222bbc1f7',\n",
189 |       "                'labels': [{'act_cat': ['hop'],\n",
190 |       "                            'end_t': 2,\n",
191 |       "                            'proc_label': 'hop left',\n",
192 |       "                            'raw_label': 'hopping left',\n",
193 |       "                            'seg_id': 'daf942ad-7cbe-4387-b6a0-0fc391c702ea',\n",
194 |       "                            'start_t': 1},\n",
195 |       "                           {'act_cat': ['hop'],\n",
196 |       "                            'end_t': 3,\n",
197 |       "                            'proc_label': 'hop right',\n",
198 |       "                            'raw_label': 'hopping right',\n",
199 |       "                            'seg_id': '7b17f75e-3da9-4e56-aca1-9bbb6b8d5dd9',\n",
200 |       "                            'start_t': 2},\n",
201 |       "                           {'act_cat': ['stand'],\n",
202 |       "                            'end_t': 1,\n",
203 |       "                            'proc_label': 'stand',\n",
204 |       "                            'raw_label': 'standing',\n",
205 |       "                            'seg_id': '70687891-613e-42f7-87f4-5760f18a3548',\n",
206 |       "                            'start_t': 0},\n",
207 |       "                           {'act_cat': ['stand'],\n",
208 |       "                            'end_t': 3.834,\n",
209 |       "                            'proc_label': 'stand',\n",
210 |       "                            'raw_label': 'standing',\n",
211 |       "                            'seg_id': 'f0cdfd79-5dad-43f3-b2d1-8a0ce8668010',\n",
212 |       "                            'start_t': 3}],\n",
213 |       "                'mul_act': True},\n",
214 |       "  'seq_ann': {'anntr_id': '30bf91ac-e0c1-4298-814f-7811fe634bac',\n",
215 |       "              'babel_lid': 'da9d959f-f5b6-434f-a927-35effc7b5afe',\n",
216 |       "              'labels': [{'act_cat': ['jump'],\n",
217 |       "                          'proc_label': 'jump',\n",
218 |       "                          'raw_label': 'jump',\n",
219 |       "                          'seg_id': '082c172b-3883-4231-9c81-fcee4cf1a999'}],\n",
220 |       "              'mul_act': True},\n",
221 |       "  'url': 'https://babel-renders.s3.eu-central-1.amazonaws.com/007692.mp4'}]\n"
222 |      ]
223 |     }
224 |    ],
225 |    "source": [
226 |     "# View a random annotation \n",
227 |     "key = np.random.choice(list(act_anns.keys()))\n",
228 |     "pp.pprint(act_anns[key])"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "metadata": {},
235 |    "outputs": [],
236 |    "source": []
237 |   }
238 |  ],
239 |  "metadata": {
240 |   "kernelspec": {
241 |    "display_name": "Python 3",
242 |    "language": "python",
243 |    "name": "python3"
244 |   },
245 |   "language_info": {
246 |    "codemirror_mode": {
247 |     "name": "ipython",
248 |     "version": 3
249 |    },
250 |    "file_extension": ".py",
251 |    "mimetype": "text/x-python",
252 |    "name": "python",
253 |    "nbconvert_exporter": "python",
254 |    "pygments_lexer": "ipython3",
255 |    "version": "3.8.3"
256 |   }
257 |  },
258 |  "nbformat": 4,
259 |  "nbformat_minor": 5
260 | }
261 | 


--------------------------------------------------------------------------------
/action_recognition/model/aagcn.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch.autograd import Variable
  7 | 
  8 | 
  9 | def import_class(name):
 10 |     components = name.split('.')
 11 |     mod = __import__(components[0])
 12 |     for comp in components[1:]:
 13 |         mod = getattr(mod, comp)
 14 |     return mod
 15 | 
 16 | 
 17 | def conv_branch_init(conv, branches):
 18 |     weight = conv.weight
 19 |     n = weight.size(0)
 20 |     k1 = weight.size(1)
 21 |     k2 = weight.size(2)
 22 |     nn.init.normal_(weight, 0, math.sqrt(2. / (n * k1 * k2 * branches)))
 23 |     nn.init.constant_(conv.bias, 0)
 24 | 
 25 | 
 26 | def conv_init(conv):
 27 |     nn.init.kaiming_normal_(conv.weight, mode='fan_out')
 28 |     nn.init.constant_(conv.bias, 0)
 29 | 
 30 | 
 31 | def bn_init(bn, scale):
 32 |     nn.init.constant_(bn.weight, scale)
 33 |     nn.init.constant_(bn.bias, 0)
 34 | 
 35 | 
 36 | class unit_tcn(nn.Module):
 37 |     def __init__(self, in_channels, out_channels, kernel_size=9, stride=1):
 38 |         super(unit_tcn, self).__init__()
 39 |         pad = int((kernel_size - 1) / 2)
 40 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=(kernel_size, 1), padding=(pad, 0),
 41 |                               stride=(stride, 1))
 42 | 
 43 |         self.bn = nn.BatchNorm2d(out_channels)
 44 |         self.relu = nn.ReLU(inplace=True)
 45 |         conv_init(self.conv)
 46 |         bn_init(self.bn, 1)
 47 | 
 48 |     def forward(self, x):
 49 |         x = self.bn(self.conv(x))
 50 |         return x
 51 | 
 52 | 
 53 | class unit_gcn(nn.Module):
 54 |     def __init__(self, in_channels, out_channels, A, coff_embedding=4, num_subset=3, adaptive=True, attention=True):
 55 |         super(unit_gcn, self).__init__()
 56 |         inter_channels = out_channels // coff_embedding
 57 |         self.inter_c = inter_channels
 58 |         self.out_c = out_channels
 59 |         self.in_c = in_channels
 60 |         self.num_subset = num_subset
 61 |         num_jpts = A.shape[-1]
 62 | 
 63 |         self.conv_d = nn.ModuleList()
 64 |         for i in range(self.num_subset):
 65 |             self.conv_d.append(nn.Conv2d(in_channels, out_channels, 1))
 66 | 
 67 |         if adaptive:
 68 |             self.PA = nn.Parameter(torch.from_numpy(A.astype(np.float32)))
 69 |             self.alpha = nn.Parameter(torch.zeros(1))
 70 |             # self.beta = nn.Parameter(torch.ones(1))
 71 |             # nn.init.constant_(self.PA, 1e-6)
 72 |             # self.A = Variable(torch.from_numpy(A.astype(np.float32)), requires_grad=False)
 73 |             # self.A = self.PA
 74 |             self.conv_a = nn.ModuleList()
 75 |             self.conv_b = nn.ModuleList()
 76 |             for i in range(self.num_subset):
 77 |                 self.conv_a.append(nn.Conv2d(in_channels, inter_channels, 1))
 78 |                 self.conv_b.append(nn.Conv2d(in_channels, inter_channels, 1))
 79 |         else:
 80 |             self.A = Variable(torch.from_numpy(A.astype(np.float32)), requires_grad=False)
 81 |         self.adaptive = adaptive
 82 | 
 83 |         if attention:
 84 |             # self.beta = nn.Parameter(torch.zeros(1))
 85 |             # self.gamma = nn.Parameter(torch.zeros(1))
 86 |             # unified attention
 87 |             # self.Attention = nn.Parameter(torch.ones(num_jpts))
 88 | 
 89 |             # temporal attention
 90 |             self.conv_ta = nn.Conv1d(out_channels, 1, 9, padding=4)
 91 |             nn.init.constant_(self.conv_ta.weight, 0)
 92 |             nn.init.constant_(self.conv_ta.bias, 0)
 93 | 
 94 |             # s attention
 95 |             ker_jpt = num_jpts - 1 if not num_jpts % 2 else num_jpts
 96 |             pad = (ker_jpt - 1) // 2
 97 |             self.conv_sa = nn.Conv1d(out_channels, 1, ker_jpt, padding=pad)
 98 |             nn.init.xavier_normal_(self.conv_sa.weight)
 99 |             nn.init.constant_(self.conv_sa.bias, 0)
100 | 
101 |             # channel attention
102 |             rr = 2
103 |             self.fc1c = nn.Linear(out_channels, out_channels // rr)
104 |             self.fc2c = nn.Linear(out_channels // rr, out_channels)
105 |             nn.init.kaiming_normal_(self.fc1c.weight)
106 |             nn.init.constant_(self.fc1c.bias, 0)
107 |             nn.init.constant_(self.fc2c.weight, 0)
108 |             nn.init.constant_(self.fc2c.bias, 0)
109 | 
110 |             # self.bn = nn.BatchNorm2d(out_channels)
111 |             # bn_init(self.bn, 1)
112 |         self.attention = attention
113 | 
114 |         if in_channels != out_channels:
115 |             self.down = nn.Sequential(
116 |                 nn.Conv2d(in_channels, out_channels, 1),
117 |                 nn.BatchNorm2d(out_channels)
118 |             )
119 |         else:
120 |             self.down = lambda x: x
121 | 
122 |         self.bn = nn.BatchNorm2d(out_channels)
123 |         self.soft = nn.Softmax(-2)
124 |         self.tan = nn.Tanh()
125 |         self.sigmoid = nn.Sigmoid()
126 |         self.relu = nn.ReLU(inplace=True)
127 | 
128 |         for m in self.modules():
129 |             if isinstance(m, nn.Conv2d):
130 |                 conv_init(m)
131 |             elif isinstance(m, nn.BatchNorm2d):
132 |                 bn_init(m, 1)
133 |         bn_init(self.bn, 1e-6)
134 |         for i in range(self.num_subset):
135 |             conv_branch_init(self.conv_d[i], self.num_subset)
136 | 
137 |     def forward(self, x):
138 |         N, C, T, V = x.size()
139 | 
140 |         y = None
141 |         if self.adaptive:
142 |             A = self.PA
143 |             # A = A + self.PA
144 |             for i in range(self.num_subset):
145 |                 A1 = self.conv_a[i](x).permute(0, 3, 1, 2).contiguous().view(N, V, self.inter_c * T)
146 |                 A2 = self.conv_b[i](x).view(N, self.inter_c * T, V)
147 |                 A1 = self.tan(torch.matmul(A1, A2) / A1.size(-1))  # N V V
148 |                 A1 = A[i] + A1 * self.alpha
149 |                 A2 = x.view(N, C * T, V)
150 |                 z = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V))
151 |                 y = z + y if y is not None else z
152 |         else:
153 |             A = self.A.cuda(x.get_device()) * self.mask
154 |             for i in range(self.num_subset):
155 |                 A1 = A[i]
156 |                 A2 = x.view(N, C * T, V)
157 |                 z = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V))
158 |                 y = z + y if y is not None else z
159 | 
160 |         y = self.bn(y)
161 |         y += self.down(x)
162 |         y = self.relu(y)
163 | 
164 |         if self.attention:
165 |             # spatial attention
166 |             se = y.mean(-2)  # N C V
167 |             se1 = self.sigmoid(self.conv_sa(se))
168 |             y = y * se1.unsqueeze(-2) + y
169 |             # a1 = se1.unsqueeze(-2)
170 | 
171 |             # temporal attention
172 |             se = y.mean(-1)
173 |             se1 = self.sigmoid(self.conv_ta(se))
174 |             y = y * se1.unsqueeze(-1) + y
175 |             # a2 = se1.unsqueeze(-1)
176 | 
177 |             # channel attention
178 |             se = y.mean(-1).mean(-1)
179 |             se1 = self.relu(self.fc1c(se))
180 |             se2 = self.sigmoid(self.fc2c(se1))
181 |             y = y * se2.unsqueeze(-1).unsqueeze(-1) + y
182 |             # a3 = se2.unsqueeze(-1).unsqueeze(-1)
183 | 
184 |             # unified attention
185 |             # y = y * self.Attention + y
186 |             # y = y + y * ((a2 + a3) / 2)
187 |             # y = self.bn(y)
188 |         return y
189 | 
190 | 
191 | class TCN_GCN_unit(nn.Module):
192 |     def __init__(self, in_channels, out_channels, A, stride=1, residual=True, adaptive=True, attention=True):
193 |         super(TCN_GCN_unit, self).__init__()
194 |         self.gcn1 = unit_gcn(in_channels, out_channels, A, adaptive=adaptive, attention=attention)
195 |         self.tcn1 = unit_tcn(out_channels, out_channels, stride=stride)
196 |         self.relu = nn.ReLU(inplace=True)
197 |         # if attention:
198 |         # self.alpha = nn.Parameter(torch.zeros(1))
199 |         # self.beta = nn.Parameter(torch.ones(1))
200 |         # temporal attention
201 |         # self.conv_ta1 = nn.Conv1d(out_channels, out_channels//rt, 9, padding=4)
202 |         # self.bn = nn.BatchNorm2d(out_channels)
203 |         # bn_init(self.bn, 1)
204 |         # self.conv_ta2 = nn.Conv1d(out_channels, 1, 9, padding=4)
205 |         # nn.init.kaiming_normal_(self.conv_ta1.weight)
206 |         # nn.init.constant_(self.conv_ta1.bias, 0)
207 |         # nn.init.constant_(self.conv_ta2.weight, 0)
208 |         # nn.init.constant_(self.conv_ta2.bias, 0)
209 | 
210 |         # rt = 4
211 |         # self.inter_c = out_channels // rt
212 |         # self.conv_ta1 = nn.Conv2d(out_channels, out_channels // rt, 1)
213 |         # self.conv_ta2 = nn.Conv2d(out_channels, out_channels // rt, 1)
214 |         # nn.init.constant_(self.conv_ta1.weight, 0)
215 |         # nn.init.constant_(self.conv_ta1.bias, 0)
216 |         # nn.init.constant_(self.conv_ta2.weight, 0)
217 |         # nn.init.constant_(self.conv_ta2.bias, 0)
218 |         # s attention
219 |         # num_jpts = A.shape[-1]
220 |         # ker_jpt = num_jpts - 1 if not num_jpts % 2 else num_jpts
221 |         # pad = (ker_jpt - 1) // 2
222 |         # self.conv_sa = nn.Conv1d(out_channels, 1, ker_jpt, padding=pad)
223 |         # nn.init.constant_(self.conv_sa.weight, 0)
224 |         # nn.init.constant_(self.conv_sa.bias, 0)
225 | 
226 |         # channel attention
227 |         # rr = 16
228 |         # self.fc1c = nn.Linear(out_channels, out_channels // rr)
229 |         # self.fc2c = nn.Linear(out_channels // rr, out_channels)
230 |         # nn.init.kaiming_normal_(self.fc1c.weight)
231 |         # nn.init.constant_(self.fc1c.bias, 0)
232 |         # nn.init.constant_(self.fc2c.weight, 0)
233 |         # nn.init.constant_(self.fc2c.bias, 0)
234 |         #
235 |         # self.softmax = nn.Softmax(-2)
236 |         # self.sigmoid = nn.Sigmoid()
237 |         self.attention = attention
238 | 
239 |         if not residual:
240 |             self.residual = lambda x: 0
241 | 
242 |         elif (in_channels == out_channels) and (stride == 1):
243 |             self.residual = lambda x: x
244 | 
245 |         else:
246 |             self.residual = unit_tcn(in_channels, out_channels, kernel_size=1, stride=stride)
247 | 
248 |     def forward(self, x):
249 |         if self.attention:
250 |             y = self.relu(self.tcn1(self.gcn1(x)) + self.residual(x))
251 | 
252 |             # spatial attention
253 |             # se = y.mean(-2)  # N C V
254 |             # se1 = self.sigmoid(self.conv_sa(se))
255 |             # y = y * se1.unsqueeze(-2) + y
256 |             # a1 = se1.unsqueeze(-2)
257 | 
258 |             # temporal attention
259 |             # se = y.mean(-1)  # N C T
260 |             # # se1 = self.relu(self.bn(self.conv_ta1(se)))
261 |             # se2 = self.sigmoid(self.conv_ta2(se))
262 |             # # y = y * se1.unsqueeze(-1) + y
263 |             # a2 = se2.unsqueeze(-1)
264 | 
265 |             # se = y  # NCTV
266 |             # N, C, T, V = y.shape
267 |             # se1 = self.conv_ta1(se).permute(0, 2, 1, 3).contiguous().view(N, T, self.inter_c * V)  # NTCV
268 |             # se2 = self.conv_ta2(se).permute(0, 1, 3, 2).contiguous().view(N, self.inter_c * V, T)  # NCVT
269 |             # a2 = self.softmax(torch.matmul(se1, se2) / np.sqrt(se1.size(-1)))  # N T T
270 |             # y = torch.matmul(y.permute(0, 1, 3, 2).contiguous().view(N, C * V, T), a2) \
271 |             #         .view(N, C, V, T).permute(0, 1, 3, 2) * self.alpha + y
272 | 
273 |             # channel attention
274 |             # se = y.mean(-1).mean(-1)
275 |             # se1 = self.relu(self.fc1c(se))
276 |             # se2 = self.sigmoid(self.fc2c(se1))
277 |             # # y = y * se2.unsqueeze(-1).unsqueeze(-1) + y
278 |             # a3 = se2.unsqueeze(-1).unsqueeze(-1)
279 |             #
280 |             # y = y * ((a2 + a3) / 2) + y
281 |             # y = self.bn(y)
282 |         else:
283 |             y = self.relu(self.tcn1(self.gcn1(x)) + self.residual(x))
284 |         return y
285 | 
286 | 
287 | class Model(nn.Module):
288 |     def __init__(self, num_class=60, num_point=25, num_person=2, graph=None, graph_args=dict(), in_channels=3,
289 |                  drop_out=0, adaptive=True, attention=True):
290 |         super(Model, self).__init__()
291 | 
292 |         if graph is None:
293 |             raise ValueError()
294 |         else:
295 |             Graph = import_class(graph)
296 |             self.graph = Graph(**graph_args)
297 | 
298 |         A = self.graph.A
299 |         self.num_class = num_class
300 | 
301 |         self.data_bn = nn.BatchNorm1d(num_person * in_channels * num_point)
302 | 
303 |         self.l1 = TCN_GCN_unit(3, 64, A, residual=False, adaptive=adaptive, attention=attention)
304 |         self.l2 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention)
305 |         self.l3 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention)
306 |         self.l4 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention)
307 |         self.l5 = TCN_GCN_unit(64, 128, A, stride=2, adaptive=adaptive, attention=attention)
308 |         self.l6 = TCN_GCN_unit(128, 128, A, adaptive=adaptive, attention=attention)
309 |         self.l7 = TCN_GCN_unit(128, 128, A, adaptive=adaptive, attention=attention)
310 |         self.l8 = TCN_GCN_unit(128, 256, A, stride=2, adaptive=adaptive, attention=attention)
311 |         self.l9 = TCN_GCN_unit(256, 256, A, adaptive=adaptive, attention=attention)
312 |         self.l10 = TCN_GCN_unit(256, 256, A, adaptive=adaptive, attention=attention)
313 | 
314 |         self.fc = nn.Linear(256, num_class)
315 |         nn.init.normal_(self.fc.weight, 0, math.sqrt(2. / num_class))
316 |         bn_init(self.data_bn, 1)
317 |         if drop_out:
318 |             self.drop_out = nn.Dropout(drop_out)
319 |         else:
320 |             self.drop_out = lambda x: x
321 | 
322 |     def forward(self, x):
323 |         N, C, T, V, M = x.size()
324 | 
325 |         x = x.permute(0, 4, 3, 1, 2).contiguous().view(N, M * V * C, T)
326 |         x = self.data_bn(x)
327 |         x = x.view(N, M, V, C, T).permute(0, 1, 3, 4, 2).contiguous().view(N * M, C, T, V)
328 | 
329 |         x = self.l1(x)
330 |         x = self.l2(x)
331 |         x = self.l3(x)
332 |         x = self.l4(x)
333 |         x = self.l5(x)
334 |         x = self.l6(x)
335 |         x = self.l7(x)
336 |         x = self.l8(x)
337 |         x = self.l9(x)
338 |         x = self.l10(x)
339 | 
340 |         # N*M,C,T,V
341 |         c_new = x.size(1)
342 |         x = x.view(N, M, c_new, -1)
343 |         x = x.mean(3).mean(1)
344 |         x = self.drop_out(x)
345 | 
346 |         return self.fc(x)
347 | 


--------------------------------------------------------------------------------
/action_recognition/data_gen/viz.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # vim:fenc=utf-8
  4 | #
  5 | # Copyright © 2020 achandrasekaran <arjun.chandrasekaran@tuebingen.mpg.de>
  6 | #
  7 | # Distributed under terms of the MIT license.
  8 | 
  9 | import os, sys
 10 | import os.path as osp
 11 | 
 12 | import random
 13 | import numpy as np
 14 | import math
 15 | import torch
 16 | from torch.nn.functional import interpolate as intrp
 17 | 
 18 | import subprocess
 19 | import shutil
 20 | import uuid
 21 | import cv2
 22 | from matplotlib import pyplot as plt
 23 | from mpl_toolkits.mplot3d import Axes3D
 24 | 
 25 | import pdb
 26 | 
 27 | import dutils
 28 | 
 29 | 
 30 | """
 31 | Visualize input and output motion sequences and labels
 32 | """
 33 | 
 34 | def get_smpl_skeleton():
 35 |     '''Skeleton ordering so that you traverse joints in this order:
 36 |         Left lower, Left upper, Spine, Neck, Head, Right lower, Right upper.
 37 |     '''
 38 |     return np.array(
 39 |         [
 40 |             # Left lower
 41 |             [ 0, 1 ],
 42 |             [ 1, 4 ],
 43 |             [ 4, 7 ],
 44 |             [ 7, 10],
 45 | 
 46 |             # Left upper
 47 |             [ 9, 13],
 48 |             [13, 16],
 49 |             [16, 18],
 50 |             [18, 20],
 51 |             # [20, 22],
 52 | 
 53 |             # Spinal column
 54 |             [ 0, 3 ],
 55 |             [ 3, 6 ],
 56 |             [ 6, 9 ],
 57 |             [ 9, 12],
 58 |             [12, 15],
 59 | 
 60 |             # Right lower
 61 |             [ 0, 2 ],
 62 |             [ 2, 5 ],
 63 |             [ 5, 8 ],
 64 |             [ 8, 11],
 65 | 
 66 |             # Right upper
 67 |             [ 9, 14],
 68 |             [14, 17],
 69 |             [17, 19],
 70 |             [19, 21],
 71 |             # [21, 23],
 72 |         ])
 73 | 
 74 | def get_nturgbd_joint_names():
 75 |     '''From paper:
 76 |     1-base of the spine 2-middle of the spine 3-neck 4-head 5-left shoulder 6-left elbow 7-left wrist 8- left hand 9-right shoulder 10-right elbow 11-right wrist 12- right hand 13-left hip 14-left knee 15-left ankle 16-left foot 17- right hip 18-right knee 19-right ankle 20-right foot 21-spine 22- tip of the left hand 23-left thumb 24-tip of the right hand 25- right thumb
 77 |     '''
 78 |     # Joint names by AC, based on SMPL names
 79 |     joint_names_map = {
 80 |         0: 'Pelvis',
 81 | 
 82 |         12: 'L_Hip',
 83 |         13: 'L_Knee',
 84 |         14: 'L_Ankle',
 85 |         15: 'L_Foot',
 86 | 
 87 |         16: 'R_Hip',
 88 |         17: 'R_Knee',
 89 |         18: 'R_Ankle',
 90 |         19: 'R_Foot',
 91 | 
 92 |         1: 'Spine1',
 93 |         # 'Spine2',
 94 |         20: 'Spine3',
 95 |         2: 'Neck',
 96 |         3: 'Head',
 97 | 
 98 |         # 'L_Collar',
 99 |         4: 'L_Shoulder',
100 |         5: 'L_Elbow',
101 |         6: 'L_Wrist',
102 |         7: 'L_Hand',
103 |         21: 'L_HandTip',  # Not in SMPL
104 |         22: 'L_Thumb',  # Not in SMPL
105 | 
106 |         # 'R_Collar',
107 |         8: 'R_Shoulder',
108 |         9: 'R_Elbow',
109 |         10: 'R_Wrist',
110 |         11: 'R_Hand',
111 |         23: 'R_HandTip',  # Not in SMPL
112 |         24: 'R_Thumb',  # Not in SMPL
113 |     }
114 | 
115 |     return [joint_names_map[idx] for idx in range(len(joint_names_map))]
116 | 
117 | def get_smpl_joint_names():
118 |     # Joint names from SMPL Wiki
119 |     joint_names_map = {
120 |         0: 'Pelvis',
121 | 
122 |         1: 'L_Hip',
123 |         4: 'L_Knee',
124 |         7: 'L_Ankle',
125 |         10: 'L_Foot',
126 | 
127 |         2: 'R_Hip',
128 |         5: 'R_Knee',
129 |         8: 'R_Ankle',
130 |         11: 'R_Foot',
131 | 
132 |         3: 'Spine1',
133 |         6: 'Spine2',
134 |         9: 'Spine3',
135 |         12: 'Neck',
136 |         15: 'Head',
137 | 
138 |         13: 'L_Collar',
139 |         16: 'L_Shoulder',
140 |         18: 'L_Elbow',
141 |         20: 'L_Wrist',
142 |         22: 'L_Hand',
143 |         14: 'R_Collar',
144 |         17: 'R_Shoulder',
145 |         19: 'R_Elbow',
146 |         21: 'R_Wrist',
147 |         23: 'R_Hand'}
148 | 
149 |     # Return all joints except indices 22 (L_Hand), 23 (R_Hand)
150 |     return [joint_names_map[idx] for idx in range(len(joint_names_map)-2)]
151 | 
152 | def get_nturgbd_skeleton():
153 |     ''' Skeleton ordering such that you traverse joints in this order:
154 |         Left lower, Left upper, Spine, Neck, Head, Right lower, Right upper.
155 |     '''
156 |     return np.array(
157 |         [
158 |             # Left lower
159 |             [0, 12],
160 |             [12, 13],
161 |             [13, 14],
162 |             [14, 15],
163 | 
164 |             # Left upper
165 |             [4, 20],
166 |             [4, 5],
167 |             [5, 6],
168 |             [6, 7],
169 |             [7, 21],
170 |             [7, 22],  # --> L Thumb
171 | 
172 |             # Spinal column
173 |             [0, 1],
174 |             [1, 20],
175 |             [20, 2],
176 |             [2, 3],
177 | 
178 |             # Right lower
179 |             [0, 16],
180 |             [16, 17],
181 |             [17, 18],
182 |             [18, 19],
183 | 
184 |             # Right upper
185 |             [20, 8],
186 |             [8, 9],
187 |             [9, 10],
188 |             [10, 11],
189 |             [11, 24],
190 |             # [24, 11] --> R Thumb
191 | 
192 |             [21, 22],
193 | 
194 |             [23, 24],
195 | 
196 |         ]
197 |     )
198 | 
199 | def get_joint_colors(joint_names):
200 |     '''Return joints based on a color spectrum. Also, joints on
201 |     L and R should have distinctly different colors.
202 |     '''
203 |     # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv.
204 |     cmap = plt.get_cmap('rainbow')
205 |     colors = [cmap(i) for i in np.linspace(0, 1, len(joint_names))]
206 |     colors = [np.array((c[2], c[1], c[0])) for c in colors]
207 |     return colors
208 | 
209 | def calc_angle_from_x(sk):
210 |     '''Given skeleton, calc. angle from x-axis'''
211 |     # Hip bone
212 |     id_l_hip = get_smpl_joint_names().index('L_Hip')
213 |     id_r_hip = get_smpl_joint_names().index('R_Hip')
214 |     pl, pr = sk[id_l_hip], sk[id_r_hip]
215 |     bone = np.array(pr-pl)
216 |     unit_v =  bone / np.linalg.norm(bone)
217 |     # Angle with x-axis
218 |     pdb.set_trace()
219 |     x_ax = np.array([1, 0, 0])
220 |     x_angle = math.degrees(np.arccos(np.dot(x_ax, unit_v)))
221 | 
222 |     '''
223 |     l_hip_z = seq[0, joint_names.index('L_Hip'), 2]
224 |     r_hip_z = seq[0, joint_names.index('R_Hip'), 2]
225 |     az = 0 if (l_hip_z > zroot and zroot > r_hip_z) else 180
226 |     '''
227 |     if bone[1] > 0:
228 |         x_angle = - x_angle
229 | 
230 |     return x_angle
231 | 
232 | def calc_angle_from_y(sk):
233 |     '''Given skeleton, calc. angle from x-axis'''
234 |     # Hip bone
235 |     id_l_hip = get_smpl_joint_names().index('L_Hip')
236 |     id_r_hip = get_smpl_joint_names().index('R_Hip')
237 |     pl, pr = sk[id_l_hip], sk[id_r_hip]
238 |     bone = np.array(pl-pr)
239 |     unit_v =  bone / np.linalg.norm(bone)
240 |     print(unit_v)
241 |     # Angle with x-axis
242 |     pdb.set_trace()
243 |     y_ax = np.array([0, 1, 0])
244 |     y_angle = math.degrees(np.arccos(np.dot(y_ax, unit_v)))
245 | 
246 |     '''
247 |     l_hip_z = seq[0, joint_names.index('L_Hip'), 2]
248 |     r_hip_z = seq[0, joint_names.index('R_Hip'), 2]
249 |     az = 0 if (l_hip_z > zroot and zroot > r_hip_z) else 180
250 |     '''
251 |     # if bone[1] > 0:
252 |     #    y_angle = - y_angle
253 |     seq_y_proj = bone * np.cos(np.deg2rad(y_angle))
254 |     print('Bone projected onto y-axis: ', seq_y_proj)
255 | 
256 |     return y_angle
257 | 
258 | def viz_skeleton(seq, folder_p, sk_type='smpl', radius=1, lcolor='#ff0000', rcolor='#0000ff', action='', debug=False):
259 |     ''' Visualize skeletons for given sequence and store as images.
260 | 
261 |     Args:
262 |         seq (np.array): Array (frames) of joint positions.
263 |         Size depends on sk_type (see below).
264 |             if sk_type is 'smpl' then assume:
265 |                 1. first 3 dims = translation.
266 |                 2. Size = (# frames, 69)
267 |             elif sk_type is 'nturgbd', then assume:
268 |                 1. no translation.
269 |                 2. Size = (# frames, 25, 3)
270 |         folder_p (str): Path to root folder containing visualized frames.
271 |             Frames are dumped to the path: folder_p/frames/*.jpg
272 |         radius (float): Space around the subject?
273 | 
274 |     Returns:
275 |         Stores skeleton sequence as jpg frames.
276 |     '''
277 |     joint_names = get_nturgbd_joint_names() if 'nturgbd' == sk_type \
278 |                                     else get_smpl_joint_names()
279 |     n_j = n_j = len(joint_names)
280 | 
281 |     az = 90
282 |     if 'smpl' == sk_type:
283 |         # SMPL kinematic chain, joint list.
284 |         # NOTE that hands are skipped.
285 |         kin_chain = get_smpl_skeleton()
286 |         # Reshape flat pose features into (frames, joints, (x,y,z)) (skip trans)
287 |         seq = seq[:, 3:].reshape(-1, n_j, 3).cpu().detach().numpy()
288 | 
289 |     elif 'nturgbd' == sk_type:
290 |         kin_chain = get_nturgbd_skeleton()
291 |         az = 0
292 | 
293 |     # Get color-spectrum for skeleton
294 |     colors = get_joint_colors(joint_names)
295 |     labels = [(joint_names[jidx[0]], joint_names[jidx[1]]) for jidx in kin_chain]
296 | 
297 |     # xroot, yroot, zroot = 0.0, 0.0, 0.0
298 |     xroot, yroot, zroot = seq[0, 0, 0], seq[0, 0, 1], seq[0, 0, 2]
299 |     # seq = seq - seq[0, :, :]
300 | 
301 |     # Change viewing angle so that first frame is in frontal pose
302 |     # az = calc_angle_from_x(seq[0]-np.array([xroot, yroot, zroot]))
303 |     # az = calc_angle_from_y(seq[0]-np.array([xroot, yroot, zroot]))
304 | 
305 |     # Viz. skeleton for each frame
306 |     for t in range(seq.shape[0]):
307 | 
308 |         # Fig. settings
309 |         fig = plt.figure(figsize=(7, 6)) if debug else \
310 |               plt.figure(figsize=(5, 5))
311 |         ax = fig.add_subplot(111, projection='3d')
312 | 
313 |         for i, (j1, j2) in enumerate(kin_chain):
314 |             # Store bones
315 |             x = np.array([seq[t, j1, 0], seq[t, j2, 0]])
316 |             y = np.array([seq[t, j1, 1], seq[t, j2, 1]])
317 |             z = np.array([seq[t, j1, 2], seq[t, j2, 2]])
318 |             # Plot bones in skeleton
319 |             ax.plot(x, y, z, c=colors[i], marker='o', linewidth=2, label=labels[i])
320 | 
321 |         # More figure settings
322 |         ax.set_title(action)
323 |         ax.set_xlabel('X')
324 |         ax.set_ylabel('Y')
325 |         ax.set_zlabel('Z')
326 |         # xroot, yroot, zroot = seq[t, 0, 0], seq[t, 0, 1], seq[t, 0, 2]
327 | 
328 |         # pdb.set_trace()
329 |         ax.set_xlim3d(-radius + xroot, radius + xroot)
330 |         ax.set_ylim3d([-radius + yroot, radius + yroot])
331 |         ax.set_zlim3d([-radius + zroot, radius + zroot])
332 | 
333 |         if True==debug:
334 |             ax.axis('on')
335 |             ax.grid(b=True)
336 |         else:
337 |             ax.axis('off')
338 |             ax.grid(b=None)
339 |         # Turn off tick labels
340 |         ax.set_yticklabels([])
341 |         ax.set_xticklabels([])
342 |         ax.set_zticklabels([])
343 | 
344 |         cv2.waitKey(0)
345 | 
346 |         # ax.view_init(-75, 90)
347 |         # ax.view_init(elev=20, azim=90+az)
348 |         ax.view_init(elev=20, azim=az)
349 | 
350 |         if True==debug:
351 |             ax.legend(bbox_to_anchor=(1.1, 1), loc='upper right')
352 |             pass
353 | 
354 |         fig.savefig(osp.join(folder_p, 'frames', '{0}.jpg'.format(t)))
355 |         plt.close(fig)
356 | 
357 |         # break
358 | 
359 | def write_vid_from_imgs(folder_p, fps):
360 |     '''Collate frames into a video sequence.
361 | 
362 |     Args:
363 |         folder_p (str): Frame images are in the path: folder_p/frames/<int>.jpg
364 |         fps (float): Output frame rate.
365 | 
366 |     Returns:
367 |         Output video is stored in the path: folder_p/video.mp4
368 |     '''
369 |     vid_p = osp.join(folder_p, 'video.mp4')
370 |     cmd = ['ffmpeg', '-r', str(int(fps)), '-i',
371 |                     osp.join(folder_p, 'frames', '%d.jpg'), '-y', vid_p]
372 |     FNULL = open(os.devnull, 'w')
373 |     retcode = subprocess.call(cmd, stdout=FNULL, stderr=subprocess.STDOUT)
374 |     if not 0 == retcode:
375 |         print('*******ValueError(Error {0} executing command: {1}*********'.format(retcode, ' '.join(cmd)))
376 |     shutil.rmtree(osp.join(folder_p, 'frames'))
377 | 
378 | def viz_seq(seq, folder_p, sk_type, orig_fps=30.0, debug=False):
379 |     '''1. Dumps sequence of skeleton images for the given sequence of joints.
380 |     2. Collates the sequence of images into an mp4 video.
381 | 
382 |     Args:
383 |         seq (np.array): Array of joint positions.
384 |         folder_p (str): Path to root folder that will contain frames folder.
385 |         sk_type (str): {'smpl', 'nturgbd'}
386 | 
387 |     Return:
388 |         None. Path of mp4 video: folder_p/video.mp4
389 |     '''
390 |     # Delete folder if exists
391 |     if osp.exists(folder_p):
392 |         print('Deleting existing folder ', folder_p)
393 |         shutil.rmtree(folder_p)
394 | 
395 |     # Create folder for frames
396 |     os.makedirs(osp.join(folder_p, 'frames'))
397 | 
398 |     # Dump frames into folder. Args: (data, radius, frames path)
399 |     viz_skeleton(seq, folder_p=folder_p, sk_type=sk_type, radius=1.2, debug=debug)
400 |     write_vid_from_imgs(folder_p, orig_fps)
401 | 
402 |     return None
403 | 
404 | def viz_rand_seq(X, Y, dtype, epoch, wb, urls=None,
405 |                  k=3, pred_labels=None):
406 |     '''
407 |     Args:
408 |         X (np.array): Array (frames) of SMPL joint positions.
409 |         Y (np.array): Multiple labels for each frame in x \in X.
410 |         dtype (str): {'input', 'pred'}
411 |         k (int): # samples to viz.
412 |         urls (tuple): Tuple of URLs of the rendered videos from original mocap.
413 |         wb (dict): Wandb log dict.
414 |     Returns:
415 |         viz_ds (dict): Data structure containing all viz. info so far.
416 |     '''
417 |     import wandb
418 |     # `idx2al`: idx --> action label string
419 |     al2idx = dutils.read_json('data/action_label_to_idx.json')
420 |     idx2al = {al2idx[k]: k for k in al2idx}
421 | 
422 |     # Sample k random seqs. to viz.
423 |     for s_idx in random.sample(list(range(X.shape[0])), k):
424 |         # Visualize a single seq. in path `folder_p`
425 |         folder_p = osp.join('viz', str(uuid.uuid4()))
426 |         viz_seq(seq=X[s_idx], folder_p=folder_p)
427 |         title='{0} seq. {1}: '.format(dtype, s_idx)
428 |         acts_str = ', '.join([idx2al[l] for l in torch.unique(Y[s_idx])])
429 |         wb[title+urls[s_idx]] = wandb.Video(osp.join(folder_p, 'video.mp4'),
430 |                                            caption='Actions: '+acts_str)
431 | 
432 |         if 'pred' == dtype or 'preds'==dtype:
433 |             raise NotImplementedError
434 | 
435 |     print('Done viz. {0} seqs.'.format(k))
436 |     return wb
437 | 


--------------------------------------------------------------------------------
/notebooks/BABEL_visualization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Visualizing BABEL labels\n",
  8 |     "[BABEL](https://babel.is.tue.mpg.de/) labels mocap sequences from [AMASS](https://amass.is.tue.mpg.de) with action labels. \n",
  9 |     "A single sequence in BABEL can have multiple action labels associated with it, from multiple annotators. \n",
 10 |     "Here, we present code to load data from BABEL, visualize the mocap sequence rendered as a 2D video, and view the action labels corresponding to the sequence. "
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "# Preparing the environment\n",
 20 |     "%load_ext autoreload\n",
 21 |     "%autoreload 2\n",
 22 |     "%matplotlib notebook\n",
 23 |     "%matplotlib inline"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import json\n",
 33 |     "from os.path import join as ospj\n",
 34 |     "\n",
 35 |     "import numpy as np\n",
 36 |     "\n",
 37 |     "import pprint\n",
 38 |     "pp = pprint.PrettyPrinter()\n",
 39 |     "\n",
 40 |     "from IPython.display import HTML"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "### Load BABEL\n",
 48 |     "We assume that you have downloaded BABEL annotations from the [website](https://babel.is.tue.mpg.de/data.html) and placed the downloaded `babel_v1.0_release` folder in `data/`. The BABEL data is provided as two sets -- BABEL dense and BABEL extra. "
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 3,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "d_folder = '../data/babel_v1.0_release'  # Data folder\n",
 58 |     "l_babel_dense_files = ['train', 'val', 'test']\n",
 59 |     "l_babel_extra_files = ['extra_train', 'extra_val']\n",
 60 |     "\n",
 61 |     "# BABEL Dataset \n",
 62 |     "babel = {}\n",
 63 |     "for file in l_babel_dense_files:\n",
 64 |     "    babel[file] = json.load(open(ospj(d_folder, file+'.json')))\n",
 65 |     "    \n",
 66 |     "for file in l_babel_extra_files:\n",
 67 |     "    babel[file] = json.load(open(ospj(d_folder, file+'.json')))    "
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "### View random annotation"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "Now, let us view an annotation data structure from the BABEL. \n",
 82 |     "The overall data structure is a dictionary, with a unique sequence ID as key and the annotation as value.  "
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 4,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "def get_random_babel_ann():\n",
 92 |     "    '''Get annotation from random sequence from a random file'''\n",
 93 |     "    file = np.random.choice(l_babel_dense_files + l_babel_extra_files)\n",
 94 |     "    seq_id = np.random.choice(list(babel[file].keys()))\n",
 95 |     "    print('We are visualizing annotations for seq ID: {0} in \"{1}.json\"'.format(seq_id, file))\n",
 96 |     "    ann = babel[file][seq_id]\n",
 97 |     "    return ann, file"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 5,
103 |    "metadata": {},
104 |    "outputs": [
105 |     {
106 |      "name": "stdout",
107 |      "output_type": "stream",
108 |      "text": [
109 |       "We are visualizing annotations for seq ID: 3312 in \"test.json\"\n",
110 |       "{'babel_sid': 3312,\n",
111 |       " 'dur': 76.73,\n",
112 |       " 'feat_p': 'CMU/CMU/86/86_08_poses.npz',\n",
113 |       " 'frame_ann': {'anntr_id': 'c6065e9c-1652-46df-a45f-fe8b8158428f',\n",
114 |       "               'babel_lid': 'a642048f-7fa9-402f-a4c1-d7e9e7f696d1',\n",
115 |       "               'labels': [{'act_cat': None,\n",
116 |       "                           'end_t': 68.093,\n",
117 |       "                           'proc_label': None,\n",
118 |       "                           'raw_label': None,\n",
119 |       "                           'seg_id': 'ad703788-bd17-42d4-854b-2b64cb58ee16',\n",
120 |       "                           'start_t': 59.51},\n",
121 |       "                          {'act_cat': None,\n",
122 |       "                           'end_t': 32.82,\n",
123 |       "                           'proc_label': None,\n",
124 |       "                           'raw_label': None,\n",
125 |       "                           'seg_id': '1785aeca-53ce-4a33-a249-8a5d3466ea95',\n",
126 |       "                           'start_t': 27.445},\n",
127 |       "                          {'act_cat': None,\n",
128 |       "                           'end_t': 52.426,\n",
129 |       "                           'proc_label': None,\n",
130 |       "                           'raw_label': None,\n",
131 |       "                           'seg_id': '12768b82-b342-46ee-ae60-e158f8b1dd47',\n",
132 |       "                           'start_t': 47.843},\n",
133 |       "                          {'act_cat': None,\n",
134 |       "                           'end_t': 59.51,\n",
135 |       "                           'proc_label': None,\n",
136 |       "                           'raw_label': None,\n",
137 |       "                           'seg_id': '435bd5a6-01e9-4fc4-abee-642954466832',\n",
138 |       "                           'start_t': 53.26},\n",
139 |       "                          {'act_cat': None,\n",
140 |       "                           'end_t': 40.007,\n",
141 |       "                           'proc_label': None,\n",
142 |       "                           'raw_label': None,\n",
143 |       "                           'seg_id': 'd3911406-ad83-4438-941c-919bf296d5e1',\n",
144 |       "                           'start_t': 33.382},\n",
145 |       "                          {'act_cat': None,\n",
146 |       "                           'end_t': 76.733,\n",
147 |       "                           'proc_label': None,\n",
148 |       "                           'raw_label': None,\n",
149 |       "                           'seg_id': 'f222a4d9-a8d5-4002-893b-4df102e1e0fa',\n",
150 |       "                           'start_t': 70.593},\n",
151 |       "                          {'act_cat': None,\n",
152 |       "                           'end_t': 2.252,\n",
153 |       "                           'proc_label': None,\n",
154 |       "                           'raw_label': None,\n",
155 |       "                           'seg_id': '35e605ec-c9f8-4c9d-8320-680de71837ce',\n",
156 |       "                           'start_t': 0.294},\n",
157 |       "                          {'act_cat': None,\n",
158 |       "                           'end_t': 6.961,\n",
159 |       "                           'proc_label': None,\n",
160 |       "                           'raw_label': None,\n",
161 |       "                           'seg_id': 'fdaead4c-0a37-4579-a42a-4a94145570b9',\n",
162 |       "                           'start_t': 4.232},\n",
163 |       "                          {'act_cat': None,\n",
164 |       "                           'end_t': 70.593,\n",
165 |       "                           'proc_label': None,\n",
166 |       "                           'raw_label': None,\n",
167 |       "                           'seg_id': '52d3c3e9-102b-4cf0-b082-cd416a7b5f64',\n",
168 |       "                           'start_t': 68.093},\n",
169 |       "                          {'act_cat': None,\n",
170 |       "                           'end_t': 4.232,\n",
171 |       "                           'proc_label': None,\n",
172 |       "                           'raw_label': None,\n",
173 |       "                           'seg_id': 'f524e2df-36e2-45ce-a54e-892fdb7353d0',\n",
174 |       "                           'start_t': 2.252},\n",
175 |       "                          {'act_cat': None,\n",
176 |       "                           'end_t': 9.336,\n",
177 |       "                           'proc_label': None,\n",
178 |       "                           'raw_label': None,\n",
179 |       "                           'seg_id': '7f265bed-f445-4b6b-a41f-c62106d7be3b',\n",
180 |       "                           'start_t': 6.961},\n",
181 |       "                          {'act_cat': None,\n",
182 |       "                           'end_t': 47.843,\n",
183 |       "                           'proc_label': None,\n",
184 |       "                           'raw_label': None,\n",
185 |       "                           'seg_id': '1aa33355-a669-45a6-86a9-19ae862a47e9',\n",
186 |       "                           'start_t': 40.007},\n",
187 |       "                          {'act_cat': None,\n",
188 |       "                           'end_t': 15.523,\n",
189 |       "                           'proc_label': None,\n",
190 |       "                           'raw_label': None,\n",
191 |       "                           'seg_id': 'd9c310f5-fc1e-47d8-b2f7-075c31a2eb6d',\n",
192 |       "                           'start_t': 9.523},\n",
193 |       "                          {'act_cat': None,\n",
194 |       "                           'end_t': 22.507,\n",
195 |       "                           'proc_label': None,\n",
196 |       "                           'raw_label': None,\n",
197 |       "                           'seg_id': 'f7a71a16-2807-49f7-8a66-7df3e678e161',\n",
198 |       "                           'start_t': 15.523},\n",
199 |       "                          {'act_cat': None,\n",
200 |       "                           'end_t': 0.294,\n",
201 |       "                           'proc_label': None,\n",
202 |       "                           'raw_label': None,\n",
203 |       "                           'seg_id': '3f57a657-2c8f-4995-87a4-965bcf8ea2a6',\n",
204 |       "                           'start_t': 0},\n",
205 |       "                          {'act_cat': None,\n",
206 |       "                           'end_t': 9.523,\n",
207 |       "                           'proc_label': None,\n",
208 |       "                           'raw_label': None,\n",
209 |       "                           'seg_id': 'c9f97199-97eb-463c-a04e-a511413ad5ba',\n",
210 |       "                           'start_t': 9.336},\n",
211 |       "                          {'act_cat': None,\n",
212 |       "                           'end_t': 33.382,\n",
213 |       "                           'proc_label': None,\n",
214 |       "                           'raw_label': None,\n",
215 |       "                           'seg_id': 'dac4fabe-e96c-411c-ad2e-29211e8c212a',\n",
216 |       "                           'start_t': 32.82},\n",
217 |       "                          {'act_cat': None,\n",
218 |       "                           'end_t': 53.26,\n",
219 |       "                           'proc_label': None,\n",
220 |       "                           'raw_label': None,\n",
221 |       "                           'seg_id': 'ed99bf22-3ea5-45a6-9df3-17e67e49f119',\n",
222 |       "                           'start_t': 52.426},\n",
223 |       "                          {'act_cat': None,\n",
224 |       "                           'end_t': 27.445,\n",
225 |       "                           'proc_label': None,\n",
226 |       "                           'raw_label': None,\n",
227 |       "                           'seg_id': '5c459b13-35e6-4c36-8ec4-9eb1536bfe95',\n",
228 |       "                           'start_t': 22.507}],\n",
229 |       "               'mul_act': True},\n",
230 |       " 'seq_ann': {'anntr_id': 'a217bb6b-93ae-4611-8e53-d4318ed5be00',\n",
231 |       "             'babel_lid': '037dc092-28d5-4537-9632-9a91fc9f7fb9',\n",
232 |       "             'labels': [{'act_cat': None,\n",
233 |       "                         'proc_label': None,\n",
234 |       "                         'raw_label': None,\n",
235 |       "                         'seg_id': 'f7d4b8fa-de77-487f-a08c-84bbc05c3148'}],\n",
236 |       "             'mul_act': True},\n",
237 |       " 'url': 'https://babel-renders.s3.eu-central-1.amazonaws.com/003312.mp4'}\n"
238 |      ]
239 |     }
240 |    ],
241 |    "source": [
242 |     "ann, _ = get_random_babel_ann()\n",
243 |     "pp.pprint(ann)"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "markdown",
248 |    "metadata": {},
249 |    "source": [
250 |     "Note that the action labels from `test.json` are not available publicly. \n",
251 |     "Also note that the internal data structures of BABEL dense and BABEL extra differ slightly. \n",
252 |     "For a detailed description of the annotation, see [BABEL's data page](https://babel.is.tue.mpg.de/data.html)."
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "markdown",
257 |    "metadata": {},
258 |    "source": [
259 |     "### Visualize a mocap seq. and its action labels "
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": 6,
265 |    "metadata": {},
266 |    "outputs": [],
267 |    "source": [
268 |     "def get_vid_html(url):\n",
269 |     "    '''Helper code to embed a URL in a notebook'''\n",
270 |     "    html_code = '<div align=\"middle\"><video width=\"80%\" controls>'\n",
271 |     "    html_code += f'<source src=\"{url}\" type=\"video/mp4\">'\n",
272 |     "    html_code += '</video></div>'\n",
273 |     "    return html_code"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 7,
279 |    "metadata": {},
280 |    "outputs": [],
281 |    "source": [
282 |     "def get_labels(ann, file):\n",
283 |     "    # Get sequence labels and frame labels if they exist\n",
284 |     "    seq_l, frame_l = None, None\n",
285 |     "    if 'extra' not in file:\n",
286 |     "        if ann['seq_ann'] is not None:\n",
287 |     "            seq_l = [seg['raw_label'] for seg in ann['seq_ann']['labels']]\n",
288 |     "        if ann['frame_ann'] is not None:\n",
289 |     "            frame_l = [(seg['raw_label'], seg['start_t'], seg['end_t']) for seg in ann['frame_ann']['labels']]\n",
290 |     "    else:\n",
291 |     "        # Load labels from 1st annotator (random) if there are multiple annotators\n",
292 |     "        if ann['seq_anns'] is not None:\n",
293 |     "            seq_l = [seg['raw_label'] for seg in ann['seq_anns'][0]['labels']]\n",
294 |     "        if ann['frame_anns'] is not None:\n",
295 |     "            frame_l = [(seg['raw_label'], seg['start_t'], seg['end_t']) for seg in ann['frame_anns'][0]['labels']]\n",
296 |     "    return seq_l, frame_l"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "markdown",
301 |    "metadata": {},
302 |    "source": [
303 |     "#### Visualize a random mocap and its annotation from BABEL, by running the cell below. "
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": 8,
309 |    "metadata": {},
310 |    "outputs": [
311 |     {
312 |      "name": "stdout",
313 |      "output_type": "stream",
314 |      "text": [
315 |       "We are visualizing annotations for seq ID: 7536 in \"train.json\"\n",
316 |       "Sequence labels:  ['pace and shake hand']\n",
317 |       "Frame labels: (action label, start time, end time)\n",
318 |       "[('walk', 0, 2.106),\n",
319 |       " ('transition', 2.106, 2.845),\n",
320 |       " ('make a knocking gesture', 2.845, 3.507),\n",
321 |       " ('transition', 3.466, 4.6),\n",
322 |       " ('turn around', 4.519, 5.519),\n",
323 |       " ('walk back', 5.424, 7.734)]\n"
324 |      ]
325 |     },
326 |     {
327 |      "data": {
328 |       "text/html": [
329 |        "<div align=\"middle\"><video width=\"80%\" controls><source src=\"https://babel-renders.s3.eu-central-1.amazonaws.com/007536.mp4\" type=\"video/mp4\"></video></div>"
330 |       ],
331 |       "text/plain": [
332 |        "<IPython.core.display.HTML object>"
333 |       ]
334 |      },
335 |      "execution_count": 8,
336 |      "metadata": {},
337 |      "output_type": "execute_result"
338 |     }
339 |    ],
340 |    "source": [
341 |     "ann, file = get_random_babel_ann()\n",
342 |     "seq_l, frame_l = get_labels(ann, file)\n",
343 |     "print('Sequence labels: ', seq_l)\n",
344 |     "print('Frame labels: (action label, start time, end time)')\n",
345 |     "pp.pprint(frame_l)       \n",
346 |     "HTML(get_vid_html(ann['url']))"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "markdown",
351 |    "metadata": {},
352 |    "source": [
353 |     "- If you are interested in loading the mocap sequence in 3D, please refer to the tutorials in [AMASS](https://github.com/nghorbani/amass/tree/master/notebooks)"
354 |    ]
355 |   }
356 |  ],
357 |  "metadata": {
358 |   "kernelspec": {
359 |    "display_name": "Python 3",
360 |    "language": "python",
361 |    "name": "python3"
362 |   },
363 |   "language_info": {
364 |    "codemirror_mode": {
365 |     "name": "ipython",
366 |     "version": 3
367 |    },
368 |    "file_extension": ".py",
369 |    "mimetype": "text/x-python",
370 |    "name": "python",
371 |    "nbconvert_exporter": "python",
372 |    "pygments_lexer": "ipython3",
373 |    "version": "3.8.3"
374 |   }
375 |  },
376 |  "nbformat": 4,
377 |  "nbformat_minor": 4
378 | }
379 | 


--------------------------------------------------------------------------------
/action_recognition/data_gen/create_dataset.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # vim:fenc=utf-8
  4 | #
  5 | # Copyright © 2021 achandrasekaran <arjun.chandrasekaran@tuebingen.mpg.de>
  6 | #
  7 | # Distributed under terms of the MIT license.
  8 | 
  9 | 
 10 | import sys, os, pdb
 11 | from os.path import join as ospj
 12 | from os.path import basename as ospb
 13 | from os.path import dirname as ospd
 14 | import numpy as np
 15 | import torch
 16 | from collections import *
 17 | from itertools import *
 18 | import pandas as pd
 19 | import pickle, json, csv
 20 | from tqdm import tqdm
 21 | from pandas.core.common import flatten
 22 | import ipdb
 23 | import pickle
 24 | 
 25 | # Custom
 26 | import preprocess
 27 | import dutils
 28 | import viz
 29 | 
 30 | """
 31 | Script to load BABEL segments with NTU skeleton format and pre-process.
 32 | """
 33 | 
 34 | 
 35 | def ntu_style_preprocessing(b_dset_path):
 36 |     '''
 37 |     '''
 38 |     pdb.set_trace()
 39 |     print('Load BABEL v1.0 dataset subset', b_dset_path)
 40 |     b_dset = dutils.read_pkl(b_dset_path)
 41 |     # Get unnormalized 5-sec. samples
 42 |     X = np.array(b_dset['X'])
 43 |     print('X (old) = ', np.shape(X))  # N, T, V, C
 44 | 
 45 |     # Prep. data for normalization
 46 |     X = X.transpose(0, 3, 1, 2)  # N, C, T, V
 47 |     X = X[:, :, :, :, np.newaxis]  # N, C, T, V, M
 48 |     print('Shape of prepped X: ', X.shape)
 49 | 
 50 |     # Normalize (pre-process) in NTU RGBD-style
 51 |     ntu_sk_spine_bone = np.array([0, 1])
 52 |     ntu_sk_shoulder_bone = np.array([8, 4])
 53 |     X, l_m_sk = preprocess.pre_normalization(X, zaxis=ntu_sk_spine_bone,
 54 |                                                     xaxis=ntu_sk_shoulder_bone)
 55 |     print('Shape of normalized X: ', X.shape)
 56 |     print('Skipping {0} samples because "skeleton is missing"'.format(len(l_m_sk)))
 57 |     print('Skipped idxs = ', l_m_sk)
 58 | 
 59 |     # Dataset w/ processed seg. chunks. (Skip samples w/ missing skeletons)
 60 |     b_AR_dset = {k: np.delete(b_dset[k], l_m_sk) for k in b_dset if k!='X'}
 61 |     b_AR_dset['X'] = np.delete(X, l_m_sk, axis=0)
 62 |     print('Shape of dataset = ', b_AR_dset['X'].shape)
 63 | 
 64 |     fp = b_dset_path.replace('samples', 'ntu_sk_ntu-style_preprocessed' )
 65 |     # fp = '../data/babel_v1.0/babel_v1.0_ntu_sk_ntu-style_preprocessed.pkl'
 66 |     # dutils.write_pkl(b_AR_dset, fp)
 67 |     with open(fp, 'wb') as of:
 68 |         pickle.dump(b_AR_dset, of, protocol=4)
 69 | 
 70 | def get_act_idx(y, act2idx, n_classes):
 71 |     '''
 72 |     '''
 73 |     if y in act2idx:
 74 |         return act2idx[y]
 75 |     else:
 76 |         return n_classes
 77 | 
 78 | def store_splits_subsets(n_classes, spl, plus_extra = True, w_folder = '../data/babel_v1.0/'):
 79 |     '''
 80 |     '''
 81 |     # Get splits
 82 |     splits = dutils.read_json('../data/amass_splits.json')
 83 |     sid2split = {int(ospb(u).replace('.mp4', '')): spl for spl in splits \
 84 |                                                         for u in splits[spl] }
 85 | 
 86 |     # In labels, act. cat. --> idx
 87 |     act2idx_150 = dutils.read_json('../data/action_label_2_idx.json')
 88 |     act2idx = {k: act2idx_150[k] for k in act2idx_150 if act2idx_150[k] < n_classes}
 89 |     print('{0} actions in label set: {1}'.format(len(act2idx), act2idx))
 90 | 
 91 |     if plus_extra :
 92 |         fp = w_folder + 'babel_v1.0_'+spl+'_extra_ntu_sk_ntu-style_preprocessed.pkl'
 93 |     else:
 94 |         fp = w_folder + 'babel_v1.0_'+spl+'_ntu_sk_ntu-style_preprocessed.pkl'
 95 | 
 96 |     # Get full dataset
 97 |     b_AR_dset = dutils.read_pkl(fp)
 98 | 
 99 |     # Store idxs of samples to include in learning
100 |     split_idxs = defaultdict(list)
101 |     for i, y1 in enumerate(b_AR_dset['Y1']):
102 | 
103 |         # Check if action category in list of classes
104 |         if y1 not in act2idx:
105 |             continue
106 | 
107 |         sid = b_AR_dset['sid'][i]
108 |         split_idxs[sid2split[sid]].append(i)  # Include idx in dataset
109 | 
110 |     # Save features that'll be loaded by dataloader
111 |     ar_idxs = np.array(split_idxs[spl])
112 |     X = b_AR_dset['X'][ar_idxs]
113 |     if plus_extra:
114 |         fn = w_folder + f'{spl}_extra_ntu_sk_{n_classes}.npy'
115 |     else:
116 |         fn = w_folder + f'{spl}_ntu_sk_{n_classes}.npy'
117 |     np.save(fn, X)
118 | 
119 |     # labels
120 |     labels = {k: np.array(b_AR_dset[k])[ar_idxs] for k in b_AR_dset if k!='X'}
121 | 
122 |     # Create, save label data structure that'll be loaded by dataloader
123 |     label_idxs = defaultdict(list)
124 |     for i, y1 in enumerate(labels['Y1']):
125 |         # y1
126 |         label_idxs['Y1'].append(act2idx[y1])
127 |         # yk
128 |         yk = [get_act_idx(y, act2idx, n_classes) for y in labels['Yk'][i]]
129 |         label_idxs['Yk'].append(yk)
130 |         # yov
131 |         yov_o = labels['Yov'][i]
132 |         yov = {get_act_idx(y, act2idx, n_classes): yov_o[y] for y in yov_o}
133 |         label_idxs['Yov'].append(yov)
134 |         #
135 |         label_idxs['seg_id'].append(labels['seg_id'][i])
136 |         label_idxs['sid'].append(labels['sid'][i])
137 |         label_idxs['chunk_n'].append(labels['chunk_n'][i])
138 |         label_idxs['anntr_id'].append(labels['anntr_id'][i])
139 | 
140 |     if plus_extra:
141 |         wr_f = w_folder + f'{spl}_extra_label_{n_classes}.pkl'
142 |     else:
143 |         wr_f = w_folder + f'{spl}_label_{n_classes}.pkl'
144 |     dutils.write_pkl(\
145 |         (label_idxs['seg_id'], (label_idxs['Y1'], label_idxs['sid'],
146 |                                 label_idxs['chunk_n'], label_idxs['anntr_id'])), \
147 |                                 wr_f)
148 | 
149 | class Babel_AR:
150 |     '''Object containing data, methods for Action Recognition.
151 | 
152 |     Task
153 |     -----
154 |     Given: x (Segment from Babel)
155 |     Predict: \hat{p}(x) (Distribution over action categories)
156 | 
157 |     GT
158 |     ---
159 |     How to compute GT for a given segment?
160 |     - yk: All action categories that are labeled for the entirety of segment
161 |     - y1: One of yk
162 |     - yov: Any y that belongs to part of a segment is considered to be GT.
163 |            Fraction of segment covered by an action: {'walk': 1.0, 'wave': 0.5}
164 | 
165 |     '''
166 |     def __init__(self, dataset, dense=True, seq_dense_ann_type={}):
167 |         '''Dataset with (samples, different GTs)
168 |         '''
169 |         # Load dataset
170 |         self.babel = dataset
171 |         self.dense = dense
172 |         self.seq_dense_ann_type = seq_dense_ann_type
173 |         self.jpos_p = '../../../../../amass/'
174 | 
175 |         # Get frame-rate for each seq. in AMASS
176 |         f_p = '../data/featp_2_fps.json'
177 |         self.ft_p_2_fps = dutils.read_json(f_p)
178 | 
179 |         # Dataset w/ keys = {'X', 'Y1', 'Yk', 'Yov', 'seg_id',  'sid',
180 |         # 'seg_dur'}
181 |         self.d = defaultdict(list)
182 |         for ann in tqdm(self.babel):
183 |             self._update_dataset(ann)
184 | 
185 |     def _subsample_to_30fps(self, orig_ft, orig_fps):
186 |         '''Get features at 30fps frame-rate
187 |         Args:
188 |             orig_ft <array> (T, 25*3): Feats. @ `orig_fps` frame-rate
189 |             orig_fps <float>: Frame-rate in original (ft) seq.
190 |         Return:
191 |             ft <array> (T', 25*3): Feats. @ 30fps
192 |         '''
193 |         T, n_j, _ = orig_ft.shape
194 |         out_fps = 30.0
195 |         # Matching the sub-sampling used for rendering
196 |         if int(orig_fps)%int(out_fps):
197 |             sel_fr = np.floor(orig_fps / out_fps * np.arange(int(out_fps))).astype(int)
198 |             n_duration = int(T/int(orig_fps))
199 |             t_idxs = []
200 |             for i in range(n_duration):
201 |                 t_idxs += list(i * int(orig_fps) + sel_fr)
202 |             if int(T % int(orig_fps)):
203 |                 last_sec_frame_idx = n_duration*int(orig_fps)
204 |                 t_idxs += [x+ last_sec_frame_idx for x in sel_fr if x + last_sec_frame_idx < T ]
205 |         else:
206 |             t_idxs = np.arange(0, T, orig_fps/out_fps, dtype=int)
207 | 
208 |         ft = orig_ft[t_idxs, :, :]
209 |         return ft
210 | 
211 |     def _viz_x(self, ft, fn='test_sample'):
212 |         '''Wraper to Viz. the given sample (w/ NTU RGBD skeleton)'''
213 |         viz.viz_seq(seq=ft, folder_p=f'test_viz/{fn}', sk_type='nturgbd',
214 |                                                                     debug=True)
215 |         return None
216 | 
217 |     def _load_seq_feats(self, ft_p, sk_type):
218 |         '''Given path to joint position features, return them in 30fps'''
219 |         # Identify appropriate feature directory path on disk
220 |         if 'smpl_wo_hands' == sk_type:  # SMPL w/o hands (T, 22*3)
221 |             jpos_p = ospj(self.jpos_p, 'joint_pos')
222 |         if 'nturgbd' == sk_type:  # NTU (T, 219)
223 |             jpos_p = ospj(self.jpos_p, 'babel_joint_pos')
224 | 
225 |         # Get the correct dataset folder name
226 |         ddir_n = ospb(ospd(ospd(ft_p)))
227 |         ddir_map = {'BioMotionLab_NTroje': 'BMLrub', 'DFaust_67': 'DFaust'}
228 |         ddir_n = ddir_map[ddir_n] if ddir_n in ddir_map else ddir_n
229 |         # Get the subject folder name
230 |         sub_fol_n = ospb(ospd(ft_p))
231 | 
232 |         # Sanity check
233 |         fft_p = ospj(jpos_p, ddir_n, sub_fol_n, ospb(ft_p))
234 |         assert os.path.exists(fft_p)
235 | 
236 |         # Load seq. fts.
237 |         ft = np.load(fft_p)['joint_pos']
238 |         T, ft_sz = ft.shape
239 | 
240 |         # Get NTU skeleton joints
241 |         ntu_js = dutils.smpl_to_nturgbd(model_type='smplh', out_format='nturgbd')
242 |         ft = ft.reshape(T, -1, 3)
243 |         ft = ft[:, ntu_js, :]
244 | 
245 |         # Sub-sample to 30fps
246 |         orig_fps = self.ft_p_2_fps[ft_p]
247 |         ft = self._subsample_to_30fps(ft, orig_fps)
248 |         # print(f'Feat. shape = {ft.shape}, fps = {orig_fps}')
249 |         # if orig_fps != 30.0:
250 |         #   self._viz_x(ft)
251 |         return ft
252 | 
253 |     def _get_per_f_labels(self, ann, ann_type, seq_dur):
254 |         ''' '''
255 |         # Per-frame labels: {0: ['walk'], 1: ['walk', 'wave'], ... T: ['stand']}
256 |         yf = defaultdict(list)
257 |         T = int(30.0*seq_dur)
258 |         for n_f in range(T):
259 |             cur_t = float(n_f/30.0)
260 |             for seg in ann['labels']:
261 | 
262 |                 if seg['act_cat'] is None:
263 |                     continue
264 | 
265 |                 if 'seq_ann' == ann_type:
266 |                     seg['start_t'] = 0.0
267 |                     seg['end_t'] = seq_dur
268 | 
269 |                 if cur_t >= float(seg['start_t']) and cur_t < float(seg['end_t']):
270 |                     yf[n_f] += seg['act_cat']
271 |         return yf
272 | 
273 |     def _compute_dur_samples(self, ann, ann_type, seq_ft, seq_dur, dur=5.0):
274 |         '''Return each GT action, corresponding to the fraction of the
275 |         segment that it overlaps with.
276 |         There are 2 conditions that we need to handle:
277 |             1. Multiple action categories in 'act_cat'
278 |             2. Simultaneous (overlapping action segments).
279 | 
280 |         Example Input:
281 |         Seq. => frames [0,      1,      2,      3,      4,      5]
282 |         GT acts. =>  [[2,3],  [2,3],    [2],    [0],    [0,1],  [0,1]]
283 | 
284 |         Segs, GT:
285 |         1. seg_x = seq[0: 3], y1 = 2, yall = {2: 1.0, 3: 0.66}
286 |         2. seg_x = seq[0: 2], y1 = 3, yall = {2: 1.0, 3: 1.0}
287 |         3. seg_x = seq[3: ],  y1 = 0, yall = {0: 1.0, 1: 0.66}
288 |         4. seg_x = seq[4: ],  y1 = 1, yall = {0: 1.0, 1: 1.0}
289 | 
290 |         - Note that we should do the above for each chunk in a segment,
291 |         each of duration = <dur> seconds.
292 | 
293 |         Return:
294 |         [ { 'x': [st_t, end_t],
295 |             'y1': <act. cat.>,
296 |             'yall': { <act. cat.>: <fraction in seg>, ...}},
297 |           { ... }, ...
298 |         ]
299 |         '''
300 |         #
301 |         yf = self._get_per_f_labels(ann, ann_type, seq_dur)
302 | 
303 |         # Compute, store all samples for each segment
304 |         seq_samples = []
305 |         for seg in ann['labels']:
306 | 
307 |             # If no labeled act. cats. for current seg., skip it
308 |             if seg['act_cat'] is None or 0 == len(seg['act_cat']):
309 |                 continue
310 | 
311 |             # Handle stage 1 missing durs.
312 |             if 'seq_ann' == ann_type:
313 |                 seg['start_t'] = 0.0
314 |                 seg['end_t'] = seq_dur
315 | 
316 |             # Get segment feats.
317 |             seg_st_f, seg_end_f = int(30.0*seg['start_t']), int(30.0*seg['end_t'])
318 |             seg_x = seq_ft[seg_st_f: seg_end_f, :, :]
319 | 
320 |             # Split segment into <n_chunks> <dur>-second chunks
321 |             n_f_pc = 30.0 * dur
322 |             n_chunks = int(np.ceil(seg_x.shape[0]/n_f_pc))
323 |             for n_ch in range(n_chunks):
324 | 
325 |                 # Single <dur>-sec. chunk in segment
326 |                 ch_st_f = int(n_f_pc * n_ch)
327 |                 ch_end_f = int(min(ch_st_f + n_f_pc, seg_x.shape[0]))
328 |                 x = seg_x[ch_st_f: ch_end_f, :, :]
329 | 
330 |                 # Handle case where chunk_T < n_f_pc
331 |                 x_T, nj, xyz = x.shape
332 |                 x_ch = np.concatenate((x, np.zeros((int(n_f_pc)- x_T,  nj, xyz))), axis=0)
333 | 
334 |                 # Labels for this chunk
335 |                 yov = Counter(flatten([yf[seg_st_f + n_f] for n_f in range(ch_st_f, ch_end_f)]))
336 | 
337 |                 # Sanity check -- is segment smaller than 1 frame?
338 |                 if seg['act_cat'][0] not in yov:
339 |                     # print('Skipping seg:', seg)
340 |                     # print(f'Chunk # {n_ch}, Yov: ', yov)
341 |                     continue
342 | 
343 |                 yov = {k: round(yov[k]/x_T, 3) for k in yov}
344 | 
345 |                 # For each act_cat in segment, create a separate sample
346 |                 for cat in seg['act_cat']:
347 |                     # Add to samples GTs
348 |                     seq_samples.append({'seg_id': seg['seg_id'],
349 |                                         'chunk_n': n_ch,
350 |                                         'chunk_dur': round(x_T/n_f_pc, 3),
351 |                                         'x': x_ch,
352 |                                         'y1': cat,
353 |                                         'yk': seg['act_cat'],
354 |                                         'yov': yov,
355 |                                         'anntr_id': ann['anntr_id']
356 |                                        })
357 |         return seq_samples
358 | 
359 |     def _sample_at_seg_chunk_level(self, ann, seq_samples):
360 |         # Samples at segment-chunk-level
361 |         for i, sample in enumerate(seq_samples):
362 | 
363 |             self.d['sid'].append(ann['babel_sid'])  # Seq. info
364 |             self.d['seg_id'].append(sample['seg_id'])  # Seg. info
365 |             self.d['chunk_n'].append(sample['chunk_n'])  # Seg. chunk info
366 |             self.d['anntr_id'].append(sample['anntr_id']) # Annotator id (useful in rebuttal exp.)
367 |             self.d['chunk_dur'].append(sample['chunk_dur'])  # Seg. chunk info
368 |             self.d['X'].append(sample['x'])  # Seg. chunk feats.
369 |             self.d['Y1'].append(sample['y1'])  # 1 out of k GT act. cats.
370 |             self.d['Yk'].append(sample['yk'])  # List of k GT act. cats.
371 |             # <dict>: fractions of overlapping act. cats.
372 |             self.d['Yov'].append(sample['yov'])
373 |         return
374 | 
375 |     def _update_dataset(self, ann):
376 |         '''Return one sample (one segment) = (X, Y1, Yall)'''
377 | 
378 |         # Get feats. for seq.
379 |         seq_ft = self._load_seq_feats(ann['feat_p'], 'nturgbd')
380 | 
381 |         # To keep track of type of annotation for loading 'extra'
382 |         # Compute all GT labels for this seq.
383 |         seq_samples = None
384 |         if self.dense:
385 |             if ann['frame_ann'] is not None:
386 |                 ann_ar = ann['frame_ann']
387 |                 self.seq_dense_ann_type[ann['babel_sid']] = 'frame_ann'
388 |                 seq_samples = self._compute_dur_samples(ann_ar, 'frame_ann', seq_ft, ann['dur'])
389 |             else:
390 |                 ann_ar = ann['seq_ann']
391 |                 self.seq_dense_ann_type[ann['babel_sid']] = 'seq_ann'
392 |                 seq_samples = self._compute_dur_samples(ann_ar, 'seq_ann', seq_ft, ann['dur'])
393 |             self._sample_at_seg_chunk_level(ann, seq_samples)
394 |         else:
395 |             # check if extra exists
396 |             if 'frame_anns' in ann.keys() or 'seq_anns' in ann.keys():
397 |                 ann_type = None
398 |                 if ann['babel_sid'] in self.seq_dense_ann_type:
399 |                     ann_type = self.seq_dense_ann_type[ann['babel_sid']]
400 |                 else:
401 |                     if ann['frame_anns'] is not None:
402 |                         ann_type = 'frame_ann'
403 |                     elif ann['seq_anns'] is not None:
404 |                         ann_type = 'seq_ann'
405 |                     else:
406 |                         ipdb.set_trace()
407 |                     self.seq_dense_ann_type['babel_sid'] = ann_type
408 |                 ann_ar = None
409 |                 if ann_type == 'frame_ann':
410 |                     if ann['frame_anns'] is not None:
411 |                         ann_ar = ann['frame_anns']
412 |                 elif ann_type == 'seq_ann':
413 |                     if ann['seq_anns'] is not None:
414 |                         ann_ar = ann['seq_anns']
415 |                 else:
416 |                     ipdb.set_trace()
417 |                 if ann_ar:
418 |                     for an in ann_ar:
419 |                         seq_samples = self._compute_dur_samples(an, ann_type, \
420 |                                                                 seq_ft, ann['dur'])
421 |                         self._sample_at_seg_chunk_level(ann, seq_samples)
422 |             else:
423 |                 print('Unexpected format for extra!')
424 |         return
425 | 
426 | 
427 | #  Create dataset
428 | # --------------------------
429 | d_folder = '../../data/babel_v1.0_release/'
430 | w_folder = '../data/babel_v1.0/'
431 | for spl in ['train', 'val']:
432 | 
433 |     # Load Dense BABEL
434 |     data = dutils.read_json(ospj(d_folder, f'{spl}.json'))
435 |     dataset = [data[sid] for sid in data]
436 |     dense_babel = Babel_AR(dataset, dense=True)
437 |     # Store Dense BABEL
438 |     d_filename = w_folder + 'babel_v1.0_'+ spl + '_samples.pkl'
439 |     dutils.write_pkl(dense_babel.d, d_filename)
440 | 
441 |     # Load Extra BABEL
442 |     data = dutils.read_json(ospj(d_folder, f'extra_{spl}.json'))
443 |     dataset = [data[sid] for sid in data]
444 |     extra_babel = Babel_AR(dataset, dense=False,
445 |                            seq_dense_ann_type=dense_babel.seq_dense_ann_type)
446 |     # Store Dense + Extra
447 |     de = {}
448 |     for k in dense_babel.d.keys():
449 |         de[k] = dense_babel.d[k] + extra_babel.d[k]
450 |     ex_filename = w_folder + 'babel_v1.0_' + spl + '_extra_samples.pkl'
451 |     dutils.write_pkl(de, ex_filename)
452 | 
453 |     #  Pre-process, Store data in dataset
454 |     print('NTU-style preprocessing')
455 |     babel_dataset_AR = ntu_style_preprocessing(d_filename)
456 |     babel_dataset_AR = ntu_style_preprocessing(ex_filename)
457 | 
458 |     for ex, C in product(('', '_extra'), (120, 60)):
459 | 
460 |         #  Split, store data in npy file, labels in pkl
461 |         store_splits_subsets(n_classes=C, spl=spl, plus_extra=True)
462 |         store_splits_subsets(n_classes=C, spl=spl, plus_extra=False)
463 | 
464 |         # Store counts of samples for training with class-balanced focal loss
465 |         label_fp = ospj(w_folder, f'{spl}{ex}_label_{C}.pkl')
466 |         dutils.store_counts(label_fp)
467 | 
468 | 


--------------------------------------------------------------------------------
/action_recognition/train_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # -*- coding: utf-8 -*-
  4 | #
  5 | # Adapted from https://github.com/lshiwjx/2s-AGCN for BABEL (https://babel.is.tue.mpg.de/)
  6 | 
  7 | from __future__ import print_function
  8 | 
  9 | import argparse
 10 | import inspect
 11 | import os
 12 | import pickle
 13 | import random
 14 | import shutil
 15 | import time
 16 | from collections import *
 17 | import numpy as np
 18 | 
 19 | # torch
 20 | import torch
 21 | import torch.backends.cudnn as cudnn
 22 | import torch.nn as nn
 23 | import torch.optim as optim
 24 | import torch.nn.functional as F
 25 | 
 26 | import yaml
 27 | from tensorboardX import SummaryWriter
 28 | from torch.autograd import Variable
 29 | from torch.optim.lr_scheduler import _LRScheduler
 30 | from tqdm import tqdm
 31 | 
 32 | import pdb
 33 | import ipdb
 34 | 
 35 | # Custom
 36 | from class_balanced_loss import CB_loss
 37 | 
 38 | 
 39 | # class GradualWarmupScheduler(_LRScheduler):
 40 | #       def __init__(self, optimizer, total_epoch, after_scheduler=None):
 41 | #               self.total_epoch = total_epoch
 42 | #               self.after_scheduler = after_scheduler
 43 | #               self.finished = False
 44 | #               self.last_epoch = -1
 45 | #               super().__init__(optimizer)
 46 | 
 47 | #       def get_lr(self):
 48 | #               return [base_lr * (self.last_epoch + 1) / self.total_epoch for base_lr in self.base_lrs]
 49 | 
 50 | #       def step(self, epoch=None, metric=None):
 51 | #               if self.last_epoch >= self.total_epoch - 1:
 52 | #                       if metric is None:
 53 | #                               return self.after_scheduler.step(epoch)
 54 | #                       else:
 55 | #                               return self.after_scheduler.step(metric, epoch)
 56 | #               else:
 57 | #                       return super(GradualWarmupScheduler, self).step(epoch)
 58 | 
 59 | 
 60 | def init_seed(_):
 61 |     torch.cuda.manual_seed_all(1)
 62 |     torch.manual_seed(1)
 63 |     np.random.seed(1)
 64 |     random.seed(1)
 65 |     # torch.backends.cudnn.enabled = False
 66 |     torch.backends.cudnn.deterministic = True
 67 |     torch.backends.cudnn.benchmark = False
 68 | 
 69 | 
 70 | def get_parser():
 71 |     # parameter priority: command line > config > default
 72 |     parser = argparse.ArgumentParser(
 73 |         description='Spatial Temporal Graph Convolution Network')
 74 |     parser.add_argument(
 75 |         '--work-dir',
 76 |         default='./work_dir/temp',
 77 |         help='the work folder for storing results')
 78 | 
 79 |     parser.add_argument('-model_saved_name', default='')
 80 |     parser.add_argument(
 81 |         '--config',
 82 |         default='./config/nturgbd-cross-view/test_bone.yaml',
 83 |         help='path to the configuration file')
 84 | 
 85 |     # processor
 86 |     parser.add_argument(
 87 |         '--phase', default='train', help='must be train or test')
 88 |     parser.add_argument(
 89 |         '--save-score',
 90 |         type=str2bool,
 91 |         default=True,
 92 |         help='if ture, the classification score will be stored')
 93 | 
 94 |     # visulize and debug
 95 |     parser.add_argument(
 96 |         '--seed', type=int, default=1, help='random seed for pytorch')
 97 |     parser.add_argument(
 98 |         '--log-interval',
 99 |         type=int,
100 |         default=100,
101 |         help='the interval for printing messages (#iteration)')
102 |     parser.add_argument(
103 |         '--save-interval',
104 |         type=int,
105 |         default=2,
106 |         help='the interval for storing models (#iteration)')
107 |     parser.add_argument(
108 |         '--eval-interval',
109 |         type=int,
110 |         default=5,
111 |         help='the interval for evaluating models (#iteration)')
112 |     parser.add_argument(
113 |         '--print-log',
114 |         type=str2bool,
115 |         default=True,
116 |         help='print logging or not')
117 |     parser.add_argument(
118 |         '--show-topk',
119 |         type=int,
120 |         default=[1, 5],
121 |         nargs='+',
122 |         help='which Top K accuracy will be shown')
123 | 
124 |     # feeder
125 |     parser.add_argument(
126 |         '--feeder', default='feeder.feeder', help='data loader will be used')
127 |     parser.add_argument(
128 |         '--num-worker',
129 |         type=int,
130 |         default=32,
131 |         help='the number of worker for data loader')
132 |     parser.add_argument(
133 |         '--train-feeder-args',
134 |         default=dict(),
135 |         help='the arguments of data loader for training')
136 |     parser.add_argument(
137 |         '--test-feeder-args',
138 |         default=dict(),
139 |         help='the arguments of data loader for test')
140 | 
141 |     # model
142 |     parser.add_argument('--model', default=None, help='the model will be used')
143 |     parser.add_argument(
144 |         '--model-args',
145 |         type=dict,
146 |         default=dict(),
147 |         help='the arguments of model')
148 |     parser.add_argument(
149 |         '--weights',
150 |         default=None,
151 |         help='the weights for network initialization')
152 |     parser.add_argument(
153 |         '--ignore-weights',
154 |         type=str,
155 |         default=[],
156 |         nargs='+',
157 |         help='the name of weights which will be ignored in the initialization')
158 | 
159 |     # optim
160 |     parser.add_argument(
161 |         '--base-lr', type=float, default=0.01, help='initial learning rate')
162 |     parser.add_argument(
163 |         '--step',
164 |         type=int,
165 |         default=[20, 40, 60],
166 |         nargs='+',
167 |         help='the epoch where optimizer reduce the learning rate')
168 | 
169 |     #training
170 |     parser.add_argument(
171 |         '--device',
172 |         type=int,
173 |         default=0,
174 |         nargs='+',
175 |         help='the indexes of GPUs for training or testing')
176 |     parser.add_argument('--optimizer', default='SGD', help='type of optimizer')
177 |     parser.add_argument(
178 |         '--nesterov', type=str2bool, default=False, help='use nesterov or not')
179 |     parser.add_argument(
180 |         '--batch-size', type=int, default=256, help='training batch size')
181 |     parser.add_argument(
182 |         '--test-batch-size', type=int, default=256, help='test batch size')
183 |     parser.add_argument(
184 |         '--start-epoch',
185 |         type=int,
186 |         default=0,
187 |         help='start training from which epoch')
188 |     parser.add_argument(
189 |         '--num-epoch',
190 |         type=int,
191 |         default=80,
192 |         help='stop training in which epoch')
193 |     parser.add_argument(
194 |         '--weight-decay',
195 |         type=float,
196 |         default=0.0005,
197 |         help='weight decay for optimizer')
198 |     # loss
199 |     parser.add_argument(
200 |         '--loss',
201 |         type=str,
202 |         default='CE',
203 |         help='loss type(CE or focal)')
204 |     parser.add_argument(
205 |         '--label_count_path',
206 |         default=None,
207 |         type=str,
208 |         help='Path to label counts (used in loss weighting)')
209 |     parser.add_argument(
210 |         '---beta',
211 |         type=float,
212 |         default=0.9999,
213 |         help='Hyperparameter for Class balanced loss')
214 |     parser.add_argument(
215 |         '--gamma',
216 |         type=float,
217 |         default=2.0,
218 |         help='Hyperparameter for Focal loss')
219 | 
220 |     parser.add_argument('--only_train_part', default=False)
221 |     parser.add_argument('--only_train_epoch', default=0)
222 |     parser.add_argument('--warm_up_epoch', default=0)
223 |     return parser
224 | 
225 | 
226 | class Processor():
227 |     """
228 |             Processor for Skeleton-based Action Recgnition
229 |     """
230 |     def __init__(self, arg):
231 |         self.arg = arg
232 |         self.save_arg()
233 |         if arg.phase == 'train':
234 |             if not arg.train_feeder_args['debug']:
235 |                 if os.path.isdir(arg.model_saved_name):
236 |                     print('log_dir: ', arg.model_saved_name, 'already exist')
237 |                     # answer = input('delete it? y/n:')
238 |                     answer = 'y'
239 |                     if answer == 'y':
240 |                         print('Deleting dir...')
241 |                         shutil.rmtree(arg.model_saved_name)
242 |                         print('Dir removed: ', arg.model_saved_name)
243 |                         # input('Refresh the website of tensorboard by pressing any keys')
244 |                     else:
245 |                         print('Dir not removed: ', arg.model_saved_name)
246 |                 self.train_writer = SummaryWriter(os.path.join(arg.model_saved_name, 'train'), 'train')
247 |                 self.val_writer = SummaryWriter(os.path.join(arg.model_saved_name, 'val'), 'val')
248 |             else:
249 |                 self.train_writer = self.val_writer = SummaryWriter(os.path.join(arg.model_saved_name, 'test'), 'test')
250 |         self.global_step = 0
251 |         self.load_model()
252 |         self.load_optimizer()
253 |         self.load_data()
254 |         self.lr = self.arg.base_lr
255 |         self.best_acc = 0
256 |         self.best_per_class_acc = 0
257 | 
258 |     def load_data(self):
259 |         Feeder = import_class(self.arg.feeder)
260 |         self.data_loader = dict()
261 |         if self.arg.phase == 'train':
262 |             self.data_loader['train'] = torch.utils.data.DataLoader(
263 |                 dataset=Feeder(**self.arg.train_feeder_args),
264 |                 batch_size=self.arg.batch_size,
265 |                 shuffle=True,
266 |                 num_workers=self.arg.num_worker,
267 |                 drop_last=True,
268 |                 worker_init_fn=init_seed)
269 |         self.data_loader['test'] = torch.utils.data.DataLoader(
270 |             dataset=Feeder(**self.arg.test_feeder_args),
271 |             batch_size=self.arg.test_batch_size,
272 |             shuffle=False,
273 |             num_workers=self.arg.num_worker,
274 |             drop_last=False,
275 |             worker_init_fn=init_seed)
276 | 
277 |     def load_class_weights(self):
278 |         if arg.label_count_path == None:
279 |             raise Exception('No label  count path..!!!')
280 |         with open(arg.label_count_path, 'rb') as f:
281 |             label_count = pickle.load(f)
282 |         img_num_per_cls = []
283 |         # ipdb.set_trace()
284 |         for cls_idx in range(len(label_count)):
285 |             img_num_per_cls.append(int(label_count[cls_idx]))
286 |         self.samples_per_class = img_num_per_cls
287 | 
288 |     def load_model(self):
289 |         output_device = self.arg.device[0] if type(self.arg.device) is list else self.arg.device
290 |         self.output_device = output_device
291 |         Model = import_class(self.arg.model)
292 |         shutil.copy2(inspect.getfile(Model), self.arg.work_dir)
293 |         print(Model)
294 |         self.model = Model(**self.arg.model_args).cuda(output_device)
295 |         print(self.model)
296 |         self.loss_type = arg.loss
297 |         if self.loss_type != 'CE':
298 |             self.load_class_weights()
299 | 
300 |         if self.arg.weights:
301 |             self.global_step = int(arg.weights[:-3].split('-')[-1])
302 |             self.print_log('Load weights from {}.'.format(self.arg.weights))
303 |             if '.pkl' in self.arg.weights:
304 |                 with open(self.arg.weights, 'r') as f:
305 |                     weights = pickle.load(f)
306 |             else:
307 |                 weights = torch.load(self.arg.weights)
308 | 
309 |             weights = OrderedDict(
310 |                 [[k.split('module.')[-1],
311 |                   v.cuda(output_device)] for k, v in weights.items()])
312 | 
313 |             keys = list(weights.keys())
314 |             for w in self.arg.ignore_weights:
315 |                 for key in keys:
316 |                     if w in key:
317 |                         if weights.pop(key, None) is not None:
318 |                             self.print_log('Sucessfully Remove Weights: {}.'.format(key))
319 |                         else:
320 |                             self.print_log('Can Not Remove Weights: {}.'.format(key))
321 | 
322 |             try:
323 |                 self.model.load_state_dict(weights)
324 |             except:
325 |                 state = self.model.state_dict()
326 |                 diff = list(set(state.keys()).difference(set(weights.keys())))
327 |                 print('Can not find these weights:')
328 |                 for d in diff:
329 |                     print('  ' + d)
330 |                 state.update(weights)
331 |                 self.model.load_state_dict(state)
332 | 
333 |         if type(self.arg.device) is list:
334 |             if len(self.arg.device) > 1:
335 |                 self.model = nn.DataParallel(
336 |                     self.model,
337 |                     device_ids=self.arg.device,
338 |                     output_device=output_device)
339 | 
340 |     def load_optimizer(self):
341 |         if self.arg.optimizer == 'SGD':
342 |             self.optimizer = optim.SGD(
343 |                 self.model.parameters(),
344 |                 lr=self.arg.base_lr,
345 |                 momentum=0.9,
346 |                 nesterov=self.arg.nesterov,
347 |                 weight_decay=self.arg.weight_decay)
348 |         elif self.arg.optimizer == 'Adam':
349 |             self.optimizer = optim.Adam(
350 |                 self.model.parameters(),
351 |                 lr=self.arg.base_lr,
352 |                 weight_decay=self.arg.weight_decay)
353 |         else:
354 |             raise ValueError()
355 | 
356 |     def save_arg(self):
357 |         # save arg
358 |         arg_dict = vars(self.arg)
359 |         if not os.path.exists(self.arg.work_dir):
360 |             os.makedirs(self.arg.work_dir)
361 |         with open('{}/config.yaml'.format(self.arg.work_dir), 'w') as f:
362 |             yaml.dump(arg_dict, f)
363 | 
364 |     def adjust_learning_rate(self, epoch):
365 |         if self.arg.optimizer == 'SGD' or self.arg.optimizer == 'Adam':
366 |             if epoch < self.arg.warm_up_epoch:
367 |                 lr = self.arg.base_lr * (epoch + 1) / self.arg.warm_up_epoch
368 |             else:
369 |                 lr = self.arg.base_lr * (
370 |                         0.1 ** np.sum(epoch >= np.array(self.arg.step)))
371 |             for param_group in self.optimizer.param_groups:
372 |                 param_group['lr'] = lr
373 | 
374 |             return lr
375 |         else:
376 |             raise ValueError()
377 | 
378 |     def print_time(self):
379 |         localtime = time.asctime(time.localtime(time.time()))
380 |         self.print_log("Local current time :  " + localtime)
381 | 
382 |     def print_log(self, str, print_time=True):
383 |         if print_time:
384 |             localtime = time.asctime(time.localtime(time.time()))
385 |             str = "[ " + localtime + ' ] ' + str
386 |         print(str)
387 |         if self.arg.print_log:
388 |             with open('{}/log.txt'.format(self.arg.work_dir), 'a') as f:
389 |                 print(str, file=f)
390 | 
391 |     def record_time(self):
392 |         self.cur_time = time.time()
393 |         return self.cur_time
394 | 
395 |     def split_time(self):
396 |         split_time = time.time() - self.cur_time
397 |         self.record_time()
398 |         return split_time
399 | 
400 |     def train(self, epoch, wb_dict, save_model=False):
401 |         self.model.train()
402 |         self.print_log('Training epoch: {}'.format(epoch + 1))
403 |         loader = self.data_loader['train']
404 |         self.adjust_learning_rate(epoch)
405 | 
406 |         loss_value, batch_acc, batch_per_class_acc = [], [], []
407 |         self.train_writer.add_scalar('epoch', epoch, self.global_step)
408 |         self.record_time()
409 |         timer = dict(dataloader=0.001, model=0.001, statistics=0.001)
410 |         process = tqdm(loader)
411 |         if self.arg.only_train_part:
412 |             if epoch > self.arg.only_train_epoch:
413 |                 print('only train part, require grad')
414 |                 for key, value in self.model.named_parameters():
415 |                     if 'PA' in key:
416 |                         value.requires_grad = True
417 |             else:
418 |                 print('only train part, do not require grad')
419 |                 for key, value in self.model.named_parameters():
420 |                     if 'PA' in key:
421 |                         value.requires_grad = False
422 | 
423 |         nb_classes = self.arg.model_args['num_class']
424 |         confusion_matrix = torch.zeros(nb_classes, nb_classes)
425 |         for batch_idx, (data, label, sid, seg_id, chunk_n, anntr_id, index) in enumerate(process):
426 | 
427 |             self.global_step += 1
428 |             # get data
429 |             data = Variable(data.float().cuda(self.output_device), requires_grad=False)
430 |             label = Variable(label.long().cuda(self.output_device), requires_grad=False)
431 |             timer['dataloader'] += self.split_time()
432 | 
433 |             # forward
434 |             output = self.model(data)
435 | 
436 |             if self.loss_type == "CE":
437 |                 l_type = nn.CrossEntropyLoss()
438 |                 loss = l_type(output, label)
439 |             else:
440 |                 loss = CB_loss(label, output,
441 |                                self.samples_per_class,
442 |                                nb_classes, self.loss_type,
443 |                                self.arg.beta,
444 |                                self.arg.gamma,
445 |                                self.arg.device[0]
446 |                               )
447 | 
448 |             # backward
449 |             self.optimizer.zero_grad()
450 |             loss.backward()
451 |             self.optimizer.step()
452 |             loss_value.append(loss.data.item())
453 |             timer['model'] += self.split_time()
454 | 
455 |             # Compute per-class acc.
456 |             value, predict_label = torch.max(output.data, 1)
457 |             for t, p in zip(label.view(-1), predict_label.view(-1)):
458 |                 confusion_matrix[t.long(), p.long()] += 1
459 | 
460 |             # Acc.
461 |             acc = torch.mean((predict_label == label.data).float())
462 |             batch_acc.append(acc.item())
463 |             self.train_writer.add_scalar('acc', acc, self.global_step)
464 |             self.train_writer.add_scalar('loss', loss.data.item(), self.global_step)
465 | 
466 |             # statistics
467 |             self.lr = self.optimizer.param_groups[0]['lr']
468 |             self.train_writer.add_scalar('lr', self.lr, self.global_step)
469 |             # if self.global_step % self.arg.log_interval == 0:
470 |             #         self.print_log(
471 |             #                 '\tBatch({}/{}) done. Loss: {:.4f}  lr:{:.6f}'.format(
472 |             #                         batch_idx, len(loader), loss.data[0], lr))
473 |             timer['statistics'] += self.split_time()
474 | 
475 |         per_class_acc_vals = confusion_matrix.diag()/confusion_matrix.sum(1)
476 |         per_class_acc =  torch.mean(per_class_acc_vals).float()
477 | 
478 |         # statistics of time consumption and loss
479 |         proportion = {
480 |             k: '{:02d}%'.format(int(round(v * 100 / sum(timer.values()))))
481 |             for k, v in timer.items()
482 |         }
483 |         self.print_log(
484 |             '\tMean training loss: {:.4f}.'.format(np.mean(loss_value)))
485 |         self.print_log('\tTop-1-norm: {:.3f}%'.format(100*per_class_acc))
486 | 
487 |         # Log
488 |         wb_dict['train loss'] = np.mean(loss_value)
489 |         wb_dict['train acc'] = np.mean(batch_acc)
490 | 
491 |         if save_model:
492 |             state_dict = self.model.state_dict()
493 |             weights = OrderedDict([[k.split('module.')[-1],
494 |                                     v.cpu()] for k, v in state_dict.items()])
495 | 
496 |             torch.save(weights, self.arg.model_saved_name + '-' + str(epoch) + '-' + str(int(self.global_step)) + '.pt')
497 | 
498 |         return wb_dict
499 | 
500 |     @torch.no_grad()
501 |     def eval(self, epoch,
502 |              wb_dict,
503 |              save_score=True,
504 |              loader_name=['test'],
505 |              wrong_file=None,
506 |              result_file=None
507 |              ):
508 |         if wrong_file is not None:
509 |             f_w = open(wrong_file, 'w')
510 |         if result_file is not None:
511 |             f_r = open(result_file, 'w')
512 |         self.model.eval()
513 |         self.print_log('Eval epoch: {}'.format(epoch + 1))
514 |         for ln in loader_name:
515 |             loss_value = []
516 |             score_frag = []
517 |             pred_label_list = []
518 |             step = 0
519 |             nb_classes = self.arg.model_args['num_class']
520 |             confusion_matrix = torch.zeros(nb_classes, nb_classes)
521 |             process = tqdm(self.data_loader[ln])
522 |             for batch_idx, (data, label, sid, seg_id, chunk_n, anntr_id, index) in enumerate(process):
523 |                 data = Variable(
524 |                     data.float().cuda(self.output_device),
525 |                     requires_grad=False)
526 |                 # volatile=True)
527 |                 label = Variable(
528 |                     label.long().cuda(self.output_device),
529 |                     requires_grad=False)
530 |                 # volatile=True)
531 |                 output = self.model(data)
532 | 
533 |                 if self.loss_type == "CE":
534 |                     l_type = nn.CrossEntropyLoss()
535 |                     loss = l_type(output, label)
536 |                 else:
537 |                     loss = CB_loss(label, output,
538 |                                         self.samples_per_class,
539 |                                         nb_classes, self.loss_type,
540 |                                         self.arg.beta,
541 |                                         self.arg.gamma,
542 |                                         self.arg.device[0]
543 |                                         )
544 |                 # Store outputs
545 |                 logits = output.data.cpu().numpy()
546 |                 score_frag.append(logits)
547 |                 loss_value.append(loss.data.item())
548 | 
549 |                 _, predict_label = torch.max(output.data, 1)
550 |                 pred_label_list.append(predict_label)
551 | 
552 |                 step += 1
553 | 
554 |                 # Compute per-class acc.
555 |                 for t, p in zip(label.view(-1), predict_label.view(-1)):
556 |                     confusion_matrix[t.long(), p.long()] += 1
557 |                 if wrong_file is not None or result_file is not None:
558 |                     predict = list(predict_label.cpu().numpy())
559 |                     true = list(label.data.cpu().numpy())
560 |                     for i, x in enumerate(predict):
561 |                         if result_file is not None:
562 |                             f_r.write(str(x) + ',' + str(true[i]) + '\n')
563 |                         if x != true[i] and wrong_file is not None:
564 |                             f_w.write(str(index[i]) + ',' + str(x) + ',' + str(true[i]) + '\n')
565 |             per_class_acc_vals = confusion_matrix.diag()/confusion_matrix.sum(1)
566 |             per_class_acc =  torch.mean(per_class_acc_vals).float()
567 |             score = np.concatenate(score_frag)
568 |             loss = np.mean(loss_value)
569 | 
570 |             accuracy = self.data_loader[ln].dataset.top_k(score, 1)
571 |             topk_scores = { k: self.data_loader[ln].dataset.top_k(score, k) \
572 |                             for k in self.arg.show_topk }
573 | 
574 |             wb_dict['val loss'] = loss
575 |             wb_dict['val acc'] = accuracy
576 |             wb_dict['val per class acc'] = per_class_acc
577 |             for k in topk_scores:
578 |                 wb_dict['val top{0} score'.format(k)] = topk_scores[k]
579 | 
580 |             if accuracy > self.best_acc:
581 |                 self.best_acc = accuracy
582 |             if per_class_acc > self.best_per_class_acc:
583 |                 self.best_per_class_acc = per_class_acc
584 | 
585 |             print('Accuracy: ', accuracy, ' model: ', self.arg.model_saved_name)
586 |             if self.arg.phase == 'train':
587 |                 self.val_writer.add_scalar('loss', loss, self.global_step)
588 |                 self.val_writer.add_scalar('acc', accuracy, self.global_step)
589 |                 self.val_writer.add_scalar('per_class_acc', per_class_acc , self.global_step)
590 | 
591 |             pred_scores = list(zip(
592 |                 self.data_loader[ln].dataset.label[1],  # sid
593 |                 self.data_loader[ln].dataset.sample_name,  # seg_id
594 |                 self.data_loader[ln].dataset.label[2],  # chunk_id
595 |                 score))
596 | 
597 |             self.print_log('\tMean {} loss of {} batches: {}.'.format(
598 |                 ln, len(self.data_loader[ln]), np.mean(loss_value)))
599 |             self.print_log('\tTop-1-norm: {:.3f}%'.format(100*per_class_acc))
600 |             for k in topk_scores:
601 |                 self.print_log('\tTop{}: {:.3f}%'.format(k, 100*topk_scores[k]))
602 | 
603 |             if save_score:
604 |                 with open('{}/epoch{}_{}_score.pkl'.format(
605 |                         self.arg.work_dir, epoch + 1, ln), 'wb') as f:
606 |                     pickle.dump(pred_scores, f)
607 |         return wb_dict
608 | 
609 |     def start(self):
610 |         wb_dict = {}
611 |         if self.arg.phase == 'train':
612 |             self.print_log('Parameters:\n{}\n'.format(str(vars(self.arg))))
613 |             self.global_step = self.arg.start_epoch * len(self.data_loader['train']) / self.arg.batch_size
614 | 
615 |             for epoch in range(self.arg.start_epoch, self.arg.num_epoch):
616 | 
617 |                 save_model = ((epoch + 1) % self.arg.save_interval == 0) or (
618 |                         epoch + 1 == self.arg.num_epoch)
619 | 
620 |                 # Wandb logging
621 |                 wb_dict = {'lr': self.lr}
622 | 
623 |                 # Train
624 |                 wb_dict = self.train(epoch, wb_dict, save_model=save_model)
625 | 
626 |                 # Eval. on val set
627 |                 wb_dict = self.eval(
628 |                     epoch,
629 |                     wb_dict,
630 |                     save_score=self.arg.save_score,
631 |                     loader_name=['test'])
632 |                 # Log stats. for this epoch
633 |                 print('Epoch: {0}\nMetrics: {1}'.format(epoch, wb_dict))
634 | 
635 |             print('best accuracy: ', self.best_acc, ' model_name: ', self.arg.model_saved_name)
636 | 
637 |         elif self.arg.phase == 'test':
638 |             if not self.arg.test_feeder_args['debug']:
639 |                 wf = self.arg.model_saved_name + '_wrong.txt'
640 |                 rf = self.arg.model_saved_name + '_right.txt'
641 |             else:
642 |                 wf = rf = None
643 |             if self.arg.weights is None:
644 |                 raise ValueError('Please appoint --weights.')
645 |             self.arg.print_log = False
646 |             self.print_log('Model:   {}.'.format(self.arg.model))
647 |             self.print_log('Weights: {}.'.format(self.arg.weights))
648 | 
649 |             wb_dict = self.eval(epoch=0, wb_dict=wb_dict,
650 |                                 save_score=self.arg.save_score,
651 |                                 loader_name=['test'],
652 |                                 wrong_file=wf,
653 |                                 result_file=rf
654 |                                 )
655 |             print('Inference metrics: ', wb_dict)
656 |             self.print_log('Done.\n')
657 | 
658 | 
659 | def str2bool(v):
660 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
661 |         return True
662 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
663 |         return False
664 |     else:
665 |         raise argparse.ArgumentTypeError('Boolean value expected.')
666 | 
667 | 
668 | def import_class(name):
669 |     components = name.split('.')
670 |     mod = __import__(components[0])
671 |     for comp in components[1:]:
672 |         mod = getattr(mod, comp)
673 |     return mod
674 | 
675 | 
676 | if __name__ == '__main__':
677 |     parser = get_parser()
678 | 
679 |     # load arg form config file
680 |     p = parser.parse_args()
681 |     if p.config is not None:
682 |         with open(p.config, 'r') as f:
683 |             default_arg = yaml.load(f)
684 |         key = vars(p).keys()
685 |         for k in default_arg.keys():
686 |             if k not in key:
687 |                 print('WRONG ARG: {}'.format(k))
688 |                 assert (k in key)
689 |         parser.set_defaults(**default_arg)
690 | 
691 |     arg = parser.parse_args()
692 |     print('BABEL Action Recognition')
693 |     print('Config: ', arg)
694 |     init_seed(0)
695 |     processor = Processor(arg)
696 |     processor.start()
697 | 


--------------------------------------------------------------------------------