├── ActionsEstLoader.py
├── Actionsrecognition
├── Models.py
├── Utils.py
└── train.py
├── App.py
├── CameraLoader.py
├── Data
├── create_dataset_1.py
├── create_dataset_2.py
└── create_dataset_3.py
├── Detection
├── Models.py
└── Utils.py
├── DetectorLoader.py
├── Models
├── TSSTG
│ └── _.txt
├── sppe
│ └── _.txt
└── yolo-tiny-onecls
│ └── _.txt
├── PoseEstimateLoader.py
├── README.md
├── SPPE
├── LICENSE
├── README.md
└── src
│ ├── main_fast_inference.py
│ ├── models
│ ├── FastPose.py
│ ├── __init__.py
│ ├── hg-prm.py
│ ├── hgPRM.py
│ └── layers
│ │ ├── DUC.py
│ │ ├── PRM.py
│ │ ├── Residual.py
│ │ ├── Resnet.py
│ │ ├── SE_Resnet.py
│ │ ├── SE_module.py
│ │ ├── __init__.py
│ │ └── util_models.py
│ ├── opt.py
│ └── utils
│ ├── __init__.py
│ ├── dataset
│ ├── .coco.py.swp
│ ├── __init__.py
│ ├── coco.py
│ ├── fuse.py
│ └── mpii.py
│ ├── eval.py
│ ├── img.py
│ └── pose.py
├── Track
├── Tracker.py
├── iou_matching.py
├── kalman_filter.py
└── linear_assignment.py
├── Visualizer.py
├── fn.py
├── main.py
├── pPose_nms.py
├── pose_utils.py
└── sample1.gif
/ActionsEstLoader.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import numpy as np
4 |
5 | from Actionsrecognition.Models import TwoStreamSpatialTemporalGraph
6 | from pose_utils import normalize_points_with_size, scale_pose
7 |
8 |
9 | class TSSTG(object):
10 | """Two-Stream Spatial Temporal Graph Model Loader.
11 | Args:
12 | weight_file: (str) Path to trained weights file.
13 | device: (str) Device to load the model on 'cpu' or 'cuda'.
14 | """
15 | def __init__(self,
16 | weight_file='./Models/TSSTG/tsstg-model.pth',
17 | device='cuda'):
18 | self.graph_args = {'strategy': 'spatial'}
19 | self.class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
20 | 'Stand up', 'Sit down', 'Fall Down']
21 | self.num_class = len(self.class_names)
22 | self.device = device
23 |
24 | self.model = TwoStreamSpatialTemporalGraph(self.graph_args, self.num_class).to(self.device)
25 | self.model.load_state_dict(torch.load(weight_file))
26 | self.model.eval()
27 |
28 | def predict(self, pts, image_size):
29 | """Predict actions from single person skeleton points and score in time sequence.
30 | Args:
31 | pts: (numpy array) points and score in shape `(t, v, c)` where
32 | t : inputs sequence (time steps).,
33 | v : number of graph node (body parts).,
34 | c : channel (x, y, score).,
35 | image_size: (tuple of int) width, height of image frame.
36 | Returns:
37 | (numpy array) Probability of each class actions.
38 | """
39 | pts[:, :, :2] = normalize_points_with_size(pts[:, :, :2], image_size[0], image_size[1])
40 | pts[:, :, :2] = scale_pose(pts[:, :, :2])
41 | pts = np.concatenate((pts, np.expand_dims((pts[:, 1, :] + pts[:, 2, :]) / 2, 1)), axis=1)
42 |
43 | pts = torch.tensor(pts, dtype=torch.float32)
44 | pts = pts.permute(2, 0, 1)[None, :]
45 |
46 | mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :]
47 | mot = mot.to(self.device)
48 | pts = pts.to(self.device)
49 |
50 | out = self.model((pts, mot))
51 |
52 | return out.detach().cpu().numpy()
53 |
--------------------------------------------------------------------------------
/Actionsrecognition/Models.py:
--------------------------------------------------------------------------------
1 | ### Reference from: https://github.com/yysijie/st-gcn/tree/master/net
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | import numpy as np
7 |
8 | from Actionsrecognition.Utils import Graph
9 |
10 |
11 | class GraphConvolution(nn.Module):
12 | """The basic module for applying a graph convolution.
13 | Args:
14 | - in_channel: (int) Number of channels in the input sequence data.
15 | - out_channels: (int) Number of channels produced by the convolution.
16 | - kernel_size: (int) Size of the graph convolving kernel.
17 | - t_kernel_size: (int) Size of the temporal convolving kernel.
18 | - t_stride: (int, optional) Stride of the temporal convolution. Default: 1
19 | - t_padding: (int, optional) Temporal zero-padding added to both sides of
20 | the input. Default: 0
21 | - t_dilation: (int, optional) Spacing between temporal kernel elements. Default: 1
22 | - bias: (bool, optional) If `True`, adds a learnable bias to the output.
23 | Default: `True`
24 | Shape:
25 | - Inputs x: Graph sequence in :math:`(N, in_channels, T_{in}, V)`,
26 | A: Graph adjacency matrix in :math:`(K, V, V)`,
27 | - Output: Graph sequence out in :math:`(N, out_channels, T_{out}, V)`
28 |
29 | where
30 | :math:`N` is a batch size,
31 | :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
32 | :math:`T_{in}/T_{out}` is a length of input/output sequence,
33 | :math:`V` is the number of graph nodes.
34 |
35 | """
36 | def __init__(self, in_channels, out_channels, kernel_size,
37 | t_kernel_size=1,
38 | t_stride=1,
39 | t_padding=0,
40 | t_dilation=1,
41 | bias=True):
42 | super().__init__()
43 |
44 | self.kernel_size = kernel_size
45 | self.conv = nn.Conv2d(in_channels,
46 | out_channels * kernel_size,
47 | kernel_size=(t_kernel_size, 1),
48 | padding=(t_padding, 0),
49 | stride=(t_stride, 1),
50 | dilation=(t_dilation, 1),
51 | bias=bias)
52 |
53 | def forward(self, x, A):
54 | x = self.conv(x)
55 | n, kc, t, v = x.size()
56 | x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v)
57 | x = torch.einsum('nkctv,kvw->nctw', (x, A))
58 |
59 | return x.contiguous()
60 |
61 |
62 | class st_gcn(nn.Module):
63 | """Applies a spatial temporal graph convolution over an input graph sequence.
64 | Args:
65 | - in_channels: (int) Number of channels in the input sequence data.
66 | - out_channels: (int) Number of channels produced by the convolution.
67 | - kernel_size: (tuple) Size of the temporal convolving kernel and
68 | graph convolving kernel.
69 | - stride: (int, optional) Stride of the temporal convolution. Default: 1
70 | - dropout: (int, optional) Dropout rate of the final output. Default: 0
71 | - residual: (bool, optional) If `True`, applies a residual mechanism.
72 | Default: `True`
73 | Shape:
74 | - Inputs x: Graph sequence in :math: `(N, in_channels, T_{in}, V)`,
75 | A: Graph Adjecency matrix in :math: `(K, V, V)`,
76 | - Output: Graph sequence out in :math: `(N, out_channels, T_{out}, V)`
77 | where
78 | :math:`N` is a batch size,
79 | :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
80 | :math:`T_{in}/T_{out}` is a length of input/output sequence,
81 | :math:`V` is the number of graph nodes.
82 | """
83 | def __init__(self, in_channels, out_channels, kernel_size,
84 | stride=1,
85 | dropout=0,
86 | residual=True):
87 | super().__init__()
88 | assert len(kernel_size) == 2
89 | assert kernel_size[0] % 2 == 1
90 |
91 | padding = ((kernel_size[0] - 1) // 2, 0)
92 |
93 | self.gcn = GraphConvolution(in_channels, out_channels, kernel_size[1])
94 | self.tcn = nn.Sequential(nn.BatchNorm2d(out_channels),
95 | nn.ReLU(inplace=True),
96 | nn.Conv2d(out_channels,
97 | out_channels,
98 | (kernel_size[0], 1),
99 | (stride, 1),
100 | padding),
101 | nn.BatchNorm2d(out_channels),
102 | nn.Dropout(dropout, inplace=True)
103 | )
104 |
105 | if not residual:
106 | self.residual = lambda x: 0
107 | elif (in_channels == out_channels) and (stride == 1):
108 | self.residual = lambda x: x
109 | else:
110 | self.residual = nn.Sequential(nn.Conv2d(in_channels,
111 | out_channels,
112 | kernel_size=1,
113 | stride=(stride, 1)),
114 | nn.BatchNorm2d(out_channels)
115 | )
116 | self.relu = nn.ReLU(inplace=True)
117 |
118 | def forward(self, x, A):
119 | res = self.residual(x)
120 | x = self.gcn(x, A)
121 | x = self.tcn(x) + res
122 |
123 | return self.relu(x)
124 |
125 |
126 | class StreamSpatialTemporalGraph(nn.Module):
127 | """Spatial temporal graph convolutional networks.
128 | Args:
129 | - in_channels: (int) Number of input channels.
130 | - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
131 | - num_class: (int) Number of class outputs. If `None` return pooling features of
132 | the last st-gcn layer instead.
133 | - edge_importance_weighting: (bool) If `True`, adds a learnable importance
134 | weighting to the edges of the graph.
135 | - **kwargs: (optional) Other parameters for graph convolution units.
136 | Shape:
137 | - Input: :math:`(N, in_channels, T_{in}, V_{in})`
138 | - Output: :math:`(N, num_class)` where
139 | :math:`N` is a batch size,
140 | :math:`T_{in}` is a length of input sequence,
141 | :math:`V_{in}` is the number of graph nodes,
142 | or If num_class is `None`: `(N, out_channels)`
143 | :math:`out_channels` is number of out_channels of the last layer.
144 | """
145 | def __init__(self, in_channels, graph_args, num_class=None,
146 | edge_importance_weighting=True, **kwargs):
147 | super().__init__()
148 | # Load graph.
149 | graph = Graph(**graph_args)
150 | A = torch.tensor(graph.A, dtype=torch.float32, requires_grad=False)
151 | self.register_buffer('A', A)
152 |
153 | # Networks.
154 | spatial_kernel_size = A.size(0)
155 | temporal_kernel_size = 9
156 | kernel_size = (temporal_kernel_size, spatial_kernel_size)
157 | kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}
158 |
159 | self.data_bn = nn.BatchNorm1d(in_channels * A.size(1))
160 | self.st_gcn_networks = nn.ModuleList((
161 | st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0),
162 | st_gcn(64, 64, kernel_size, 1, **kwargs),
163 | st_gcn(64, 64, kernel_size, 1, **kwargs),
164 | st_gcn(64, 64, kernel_size, 1, **kwargs),
165 | st_gcn(64, 128, kernel_size, 2, **kwargs),
166 | st_gcn(128, 128, kernel_size, 1, **kwargs),
167 | st_gcn(128, 128, kernel_size, 1, **kwargs),
168 | st_gcn(128, 256, kernel_size, 2, **kwargs),
169 | st_gcn(256, 256, kernel_size, 1, **kwargs),
170 | st_gcn(256, 256, kernel_size, 1, **kwargs)
171 | ))
172 |
173 | # initialize parameters for edge importance weighting.
174 | if edge_importance_weighting:
175 | self.edge_importance = nn.ParameterList([
176 | nn.Parameter(torch.ones(A.size()))
177 | for i in self.st_gcn_networks
178 | ])
179 | else:
180 | self.edge_importance = [1] * len(self.st_gcn_networks)
181 |
182 | if num_class is not None:
183 | self.cls = nn.Conv2d(256, num_class, kernel_size=1)
184 | else:
185 | self.cls = lambda x: x
186 |
187 | def forward(self, x):
188 | # data normalization.
189 | N, C, T, V = x.size()
190 | x = x.permute(0, 3, 1, 2).contiguous() # (N, V, C, T)
191 | x = x.view(N, V * C, T)
192 | x = self.data_bn(x)
193 | x = x.view(N, V, C, T)
194 | x = x.permute(0, 2, 3, 1).contiguous()
195 | x = x.view(N, C, T, V)
196 |
197 | # forward.
198 | for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
199 | x = gcn(x, self.A * importance)
200 |
201 | x = F.avg_pool2d(x, x.size()[2:])
202 | x = self.cls(x)
203 | x = x.view(x.size(0), -1)
204 |
205 | return x
206 |
207 |
208 | class TwoStreamSpatialTemporalGraph(nn.Module):
209 | """Two inputs spatial temporal graph convolutional networks.
210 | Args:
211 | - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
212 | - num_class: (int) Number of class outputs.
213 | - edge_importance_weighting: (bool) If `True`, adds a learnable importance
214 | weighting to the edges of the graph.
215 | - **kwargs: (optional) Other parameters for graph convolution units.
216 | Shape:
217 | - Input: :tuple of math:`((N, 3, T, V), (N, 2, T, V))`
218 | for points and motions stream where.
219 | :math:`N` is a batch size,
220 | :math:`in_channels` is data channels (3 is (x, y, score)), (2 is (mot_x, mot_y))
221 | :math:`T` is a length of input sequence,
222 | :math:`V` is the number of graph nodes,
223 | - Output: :math:`(N, num_class)`
224 | """
225 | def __init__(self, graph_args, num_class, edge_importance_weighting=True,
226 | **kwargs):
227 | super().__init__()
228 | self.pts_stream = StreamSpatialTemporalGraph(3, graph_args, None,
229 | edge_importance_weighting,
230 | **kwargs)
231 | self.mot_stream = StreamSpatialTemporalGraph(2, graph_args, None,
232 | edge_importance_weighting,
233 | **kwargs)
234 |
235 | self.fcn = nn.Linear(256 * 2, num_class)
236 |
237 | def forward(self, inputs):
238 | out1 = self.pts_stream(inputs[0])
239 | out2 = self.mot_stream(inputs[1])
240 |
241 | concat = torch.cat([out1, out2], dim=-1)
242 | out = self.fcn(concat)
243 |
244 | return torch.sigmoid(out)
245 |
--------------------------------------------------------------------------------
/Actionsrecognition/Utils.py:
--------------------------------------------------------------------------------
1 | ### Reference from: https://github.com/yysijie/st-gcn/blob/master/net/utils/graph.py
2 |
3 | import os
4 | import torch
5 | import numpy as np
6 |
7 |
8 | class Graph:
9 | """The Graph to model the skeletons extracted by the Alpha-Pose.
10 | Args:
11 | - strategy: (string) must be one of the follow candidates
12 | - uniform: Uniform Labeling,
13 | - distance: Distance Partitioning,
14 | - spatial: Spatial Configuration,
15 | For more information, please refer to the section 'Partition Strategies'
16 | in our paper (https://arxiv.org/abs/1801.07455).
17 | - layout: (string) must be one of the follow candidates
18 | - coco_cut: Is COCO format but cut 4 joints (L-R ears, L-R eyes) out.
19 | - max_hop: (int) the maximal distance between two connected nodes.
20 | - dilation: (int) controls the spacing between the kernel points.
21 | """
22 | def __init__(self,
23 | layout='coco_cut',
24 | strategy='uniform',
25 | max_hop=1,
26 | dilation=1):
27 | self.max_hop = max_hop
28 | self.dilation = dilation
29 |
30 | self.get_edge(layout)
31 | self.hop_dis = get_hop_distance(self.num_node, self.edge, max_hop)
32 | self.get_adjacency(strategy)
33 |
34 | def get_edge(self, layout):
35 | if layout == 'coco_cut':
36 | self.num_node = 14
37 | self_link = [(i, i) for i in range(self.num_node)]
38 | neighbor_link = [(6, 4), (4, 2), (2, 13), (13, 1), (5, 3), (3, 1), (12, 10),
39 | (10, 8), (8, 2), (11, 9), (9, 7), (7, 1), (13, 0)]
40 | self.edge = self_link + neighbor_link
41 | self.center = 13
42 | else:
43 | raise ValueError('This layout is not supported!')
44 |
45 | def get_adjacency(self, strategy):
46 | valid_hop = range(0, self.max_hop + 1, self.dilation)
47 | adjacency = np.zeros((self.num_node, self.num_node))
48 | for hop in valid_hop:
49 | adjacency[self.hop_dis == hop] = 1
50 | normalize_adjacency = normalize_digraph(adjacency)
51 |
52 | if strategy == 'uniform':
53 | A = np.zeros((1, self.num_node, self.num_node))
54 | A[0] = normalize_adjacency
55 | self.A = A
56 | elif strategy == 'distance':
57 | A = np.zeros((len(valid_hop), self.num_node, self.num_node))
58 | for i, hop in enumerate(valid_hop):
59 | A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis ==
60 | hop]
61 | self.A = A
62 | elif strategy == 'spatial':
63 | A = []
64 | for hop in valid_hop:
65 | a_root = np.zeros((self.num_node, self.num_node))
66 | a_close = np.zeros((self.num_node, self.num_node))
67 | a_further = np.zeros((self.num_node, self.num_node))
68 | for i in range(self.num_node):
69 | for j in range(self.num_node):
70 | if self.hop_dis[j, i] == hop:
71 | if self.hop_dis[j, self.center] == self.hop_dis[i, self.center]:
72 | a_root[j, i] = normalize_adjacency[j, i]
73 | elif self.hop_dis[j, self.center] > self.hop_dis[i, self.center]:
74 | a_close[j, i] = normalize_adjacency[j, i]
75 | else:
76 | a_further[j, i] = normalize_adjacency[j, i]
77 | if hop == 0:
78 | A.append(a_root)
79 | else:
80 | A.append(a_root + a_close)
81 | A.append(a_further)
82 | A = np.stack(A)
83 | self.A = A
84 | #self.A = np.swapaxes(np.swapaxes(A, 0, 1), 1, 2)
85 | else:
86 | raise ValueError("This strategy is not supported!")
87 |
88 |
89 | def get_hop_distance(num_node, edge, max_hop=1):
90 | A = np.zeros((num_node, num_node))
91 | for i, j in edge:
92 | A[j, i] = 1
93 | A[i, j] = 1
94 |
95 | # compute hop steps
96 | hop_dis = np.zeros((num_node, num_node)) + np.inf
97 | transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
98 | arrive_mat = (np.stack(transfer_mat) > 0)
99 | for d in range(max_hop, -1, -1):
100 | hop_dis[arrive_mat[d]] = d
101 | return hop_dis
102 |
103 |
104 | def normalize_digraph(A):
105 | Dl = np.sum(A, 0)
106 | num_node = A.shape[0]
107 | Dn = np.zeros((num_node, num_node))
108 | for i in range(num_node):
109 | if Dl[i] > 0:
110 | Dn[i, i] = Dl[i]**(-1)
111 | AD = np.dot(A, Dn)
112 | return AD
113 |
114 |
115 | def normalize_undigraph(A):
116 | Dl = np.sum(A, 0)
117 | num_node = A.shape[0]
118 | Dn = np.zeros((num_node, num_node))
119 | for i in range(num_node):
120 | if Dl[i] > 0:
121 | Dn[i, i] = Dl[i]**(-0.5)
122 | DAD = np.dot(np.dot(Dn, A), Dn)
123 | return DAD
124 |
--------------------------------------------------------------------------------
/Actionsrecognition/train.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import torch
4 | import pickle
5 | import numpy as np
6 | import torch.nn.functional as F
7 | from shutil import copyfile
8 | from tqdm import tqdm
9 | from torch.utils import data
10 | from torch.optim.adadelta import Adadelta
11 | from sklearn.model_selection import train_test_split
12 |
13 | from Actionsrecognition.Models import *
14 | from Visualizer import plot_graphs, plot_confusion_metrix
15 |
16 |
17 | save_folder = 'saved/TSSTG(pts+mot)-01(cf+hm-hm)'
18 |
19 | device = 'cuda'
20 | epochs = 30
21 | batch_size = 32
22 |
23 | # DATA FILES.
24 | # Should be in format of
25 | # inputs: (N_samples, time_steps, graph_node, channels),
26 | # labels: (N_samples, num_class)
27 | # and do some of normalizations on it. Default data create from:
28 | # Data.create_dataset_(1-3).py
29 | # where
30 | # time_steps: Number of frame input sequence, Default: 30
31 | # graph_node: Number of node in skeleton, Default: 14
32 | # channels: Inputs data (x, y and scores), Default: 3
33 | # num_class: Number of pose class to train, Default: 7
34 |
35 | data_files = ['../Data/Coffee_room_new-set(labelXscrw).pkl',
36 | '../Data/Home_new-set(labelXscrw).pkl']
37 | class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
38 | 'Stand up', 'Sit down', 'Fall Down']
39 | num_class = len(class_names)
40 |
41 |
42 | def load_dataset(data_files, batch_size, split_size=0):
43 | """Load data files into torch DataLoader with/without spliting train-test.
44 | """
45 | features, labels = [], []
46 | for fil in data_files:
47 | with open(fil, 'rb') as f:
48 | fts, lbs = pickle.load(f)
49 | features.append(fts)
50 | labels.append(lbs)
51 | del fts, lbs
52 | features = np.concatenate(features, axis=0)
53 | labels = np.concatenate(labels, axis=0)
54 |
55 | if split_size > 0:
56 | x_train, x_valid, y_train, y_valid = train_test_split(features, labels, test_size=split_size,
57 | random_state=9)
58 | train_set = data.TensorDataset(torch.tensor(x_train, dtype=torch.float32).permute(0, 3, 1, 2),
59 | torch.tensor(y_train, dtype=torch.float32))
60 | valid_set = data.TensorDataset(torch.tensor(x_valid, dtype=torch.float32).permute(0, 3, 1, 2),
61 | torch.tensor(y_valid, dtype=torch.float32))
62 | train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
63 | valid_loader = data.DataLoader(valid_set, batch_size)
64 | else:
65 | train_set = data.TensorDataset(torch.tensor(features, dtype=torch.float32).permute(0, 3, 1, 2),
66 | torch.tensor(labels, dtype=torch.float32))
67 | train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
68 | valid_loader = None
69 | return train_loader, valid_loader
70 |
71 |
72 | def accuracy_batch(y_pred, y_true):
73 | return (y_pred.argmax(1) == y_true.argmax(1)).mean()
74 |
75 |
76 | def set_training(model, mode=True):
77 | for p in model.parameters():
78 | p.requires_grad = mode
79 | model.train(mode)
80 | return model
81 |
82 |
83 | if __name__ == '__main__':
84 | save_folder = os.path.join(os.path.dirname(__file__), save_folder)
85 | if not os.path.exists(save_folder):
86 | os.makedirs(save_folder)
87 |
88 | # DATA.
89 | train_loader, _ = load_dataset(data_files[0:1], batch_size)
90 | valid_loader, train_loader_ = load_dataset(data_files[1:2], batch_size, 0.2)
91 |
92 | train_loader = data.DataLoader(data.ConcatDataset([train_loader.dataset, train_loader_.dataset]),
93 | batch_size, shuffle=True)
94 | dataloader = {'train': train_loader, 'valid': valid_loader}
95 | del train_loader_
96 |
97 | # MODEL.
98 | graph_args = {'strategy': 'spatial'}
99 | model = TwoStreamSpatialTemporalGraph(graph_args, num_class).to(device)
100 |
101 | #optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
102 | optimizer = Adadelta(model.parameters())
103 |
104 | losser = torch.nn.BCELoss()
105 |
106 | # TRAINING.
107 | loss_list = {'train': [], 'valid': []}
108 | accu_list = {'train': [], 'valid': []}
109 | for e in range(epochs):
110 | print('Epoch {}/{}'.format(e, epochs - 1))
111 | for phase in ['train', 'valid']:
112 | if phase == 'train':
113 | model = set_training(model, True)
114 | else:
115 | model = set_training(model, False)
116 |
117 | run_loss = 0.0
118 | run_accu = 0.0
119 | with tqdm(dataloader[phase], desc=phase) as iterator:
120 | for pts, lbs in iterator:
121 | # Create motion input by distance of points (x, y) of the same node
122 | # in two frames.
123 | mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :]
124 |
125 | mot = mot.to(device)
126 | pts = pts.to(device)
127 | lbs = lbs.to(device)
128 |
129 | # Forward.
130 | out = model((pts, mot))
131 | loss = losser(out, lbs)
132 |
133 | if phase == 'train':
134 | # Backward.
135 | model.zero_grad()
136 | loss.backward()
137 | optimizer.step()
138 |
139 | run_loss += loss.item()
140 | accu = accuracy_batch(out.detach().cpu().numpy(),
141 | lbs.detach().cpu().numpy())
142 | run_accu += accu
143 |
144 | iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format(
145 | loss.item(), accu))
146 | iterator.update()
147 | #break
148 | loss_list[phase].append(run_loss / len(iterator))
149 | accu_list[phase].append(run_accu / len(iterator))
150 | #break
151 |
152 | print('Summary epoch:\n - Train loss: {:.4f}, accu: {:.4f}\n - Valid loss:'
153 | ' {:.4f}, accu: {:.4f}'.format(loss_list['train'][-1], accu_list['train'][-1],
154 | loss_list['valid'][-1], accu_list['valid'][-1]))
155 |
156 | # SAVE.
157 | torch.save(model.state_dict(), os.path.join(save_folder, 'tsstg-model.pth'))
158 |
159 | plot_graphs(list(loss_list.values()), list(loss_list.keys()),
160 | 'Last Train: {:.2f}, Valid: {:.2f}'.format(
161 | loss_list['train'][-1], loss_list['valid'][-1]
162 | ), 'Loss', xlim=[0, epochs],
163 | save=os.path.join(save_folder, 'loss_graph.png'))
164 | plot_graphs(list(accu_list.values()), list(accu_list.keys()),
165 | 'Last Train: {:.2f}, Valid: {:.2f}'.format(
166 | accu_list['train'][-1], accu_list['valid'][-1]
167 | ), 'Accu', xlim=[0, epochs],
168 | save=os.path.join(save_folder, 'accu_graph.png'))
169 |
170 | #break
171 |
172 | del train_loader, valid_loader
173 |
174 | model.load_state_dict(torch.load(os.path.join(save_folder, 'tsstg-model.pth')))
175 |
176 | # EVALUATION.
177 | model = set_training(model, False)
178 | data_file = data_files[1]
179 | eval_loader, _ = load_dataset([data_file], 32)
180 |
181 | print('Evaluation.')
182 | run_loss = 0.0
183 | run_accu = 0.0
184 | y_preds = []
185 | y_trues = []
186 | with tqdm(eval_loader, desc='eval') as iterator:
187 | for pts, lbs in iterator:
188 | mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :]
189 | mot = mot.to(device)
190 | pts = pts.to(device)
191 | lbs = lbs.to(device)
192 |
193 | out = model((pts, mot))
194 | loss = losser(out, lbs)
195 |
196 | run_loss += loss.item()
197 | accu = accuracy_batch(out.detach().cpu().numpy(),
198 | lbs.detach().cpu().numpy())
199 | run_accu += accu
200 |
201 | y_preds.extend(out.argmax(1).detach().cpu().numpy())
202 | y_trues.extend(lbs.argmax(1).cpu().numpy())
203 |
204 | iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format(
205 | loss.item(), accu))
206 | iterator.update()
207 |
208 | run_loss = run_loss / len(iterator)
209 | run_accu = run_accu / len(iterator)
210 |
211 | plot_confusion_metrix(y_trues, y_preds, class_names, 'Eval on: {}\nLoss: {:.4f}, Accu{:.4f}'.format(
212 | os.path.basename(data_file), run_loss, run_accu
213 | ), 'true', save=os.path.join(save_folder, '{}-confusion_matrix.png'.format(
214 | os.path.basename(data_file).split('.')[0])))
215 |
216 | print('Eval Loss: {:.4f}, Accu: {:.4f}'.format(run_loss, run_accu))
217 |
--------------------------------------------------------------------------------
/App.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import time
4 | import torch
5 | import screeninfo
6 | import numpy as np
7 | import tkinter as tk
8 | import matplotlib.pyplot as plt
9 | from PIL import Image, ImageTk
10 |
11 | from Detection.Utils import ResizePadding
12 | from CameraLoader import CamLoader, CamLoader_Q
13 | from DetectorLoader import TinyYOLOv3_onecls
14 |
15 | from PoseEstimateLoader import SPPE_FastPose
16 | from fn import draw_single
17 |
18 | from Track.Tracker import Detection, Tracker
19 | from ActionsEstLoader import TSSTG
20 |
21 | import matplotlib
22 | matplotlib.use('TkAgg')
23 | import matplotlib.pyplot as plt
24 | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk
25 |
26 |
27 | def get_monitor_from_coord(x, y): # multiple monitor dealing.
28 | monitors = screeninfo.get_monitors()
29 | for m in reversed(monitors):
30 | if m.x <= x <= m.width + m.x and m.y <= y <= m.height + m.y:
31 | return m
32 | return monitors[0]
33 |
34 |
35 | class Models:
36 | def __init__(self):
37 | self.inp_dets = 416
38 | self.inp_pose = (256, 192)
39 | self.pose_backbone = 'resnet50'
40 | self.show_detected = True
41 | self.show_skeleton = True
42 | self.device = 'cuda'
43 |
44 | self.load_models()
45 |
46 | def load_models(self):
47 | self.detect_model = TinyYOLOv3_onecls(self.inp_dets, device=self.device)
48 | self.pose_model = SPPE_FastPose(self.pose_backbone, self.inp_pose[0], self.inp_pose[1],
49 | device=self.device)
50 | self.tracker = Tracker(30, n_init=3)
51 | self.action_model = TSSTG(device=self.device)
52 |
53 | def kpt2bbox(self, kpt, ex=20):
54 | return np.array((kpt[:, 0].min() - ex, kpt[:, 1].min() - ex,
55 | kpt[:, 0].max() + ex, kpt[:, 1].max() + ex))
56 |
57 | def process_frame(self, frame):
58 | detected = self.detect_model.detect(frame, need_resize=False, expand_bb=10)
59 |
60 | self.tracker.predict()
61 | for track in self.tracker.tracks:
62 | det = torch.tensor([track.to_tlbr().tolist() + [1.0, 1.0, 0.0]], dtype=torch.float32)
63 | detected = torch.cat([detected, det], dim=0) if detected is not None else det
64 |
65 | detections = []
66 | if detected is not None:
67 | poses = self.pose_model.predict(frame, detected[:, 0:4], detected[:, 4])
68 | detections = [Detection(self.kpt2bbox(ps['keypoints'].numpy()),
69 | np.concatenate((ps['keypoints'].numpy(),
70 | ps['kp_score'].numpy()), axis=1),
71 | ps['kp_score'].mean().numpy()) for ps in poses]
72 | if self.show_detected:
73 | for bb in detected[:, 0:5]:
74 | frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 0, 255), 1)
75 |
76 | self.tracker.update(detections)
77 | for i, track in enumerate(self.tracker.tracks):
78 | if not track.is_confirmed():
79 | continue
80 | track_id = track.track_id
81 | bbox = track.to_tlbr().astype(int)
82 | center = track.get_center().astype(int)
83 |
84 | action = 'pending..'
85 | clr = (0, 255, 0)
86 | if len(track.keypoints_list) == 30:
87 | pts = np.array(track.keypoints_list, dtype=np.float32)
88 | out = self.action_model.predict(pts, frame.shape[:2])
89 | action_name = self.action_model.class_names[out[0].argmax()]
90 | action = '{}: {:.2f}%'.format(action_name, out[0].max() * 100)
91 | if action_name == 'Fall Down':
92 | clr = (255, 0, 0)
93 | elif action_name == 'Lying Down':
94 | clr = (255, 200, 0)
95 |
96 | track.actions = out
97 |
98 | if track.time_since_update == 0:
99 | if self.show_skeleton:
100 | frame = draw_single(frame, track.keypoints_list[-1])
101 | frame = cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 1)
102 | frame = cv2.putText(frame, str(track_id), (center[0], center[1]), cv2.FONT_HERSHEY_DUPLEX,
103 | 0.4, (255, 0, 0), 2)
104 | frame = cv2.putText(frame, action, (bbox[0] + 5, bbox[1] + 15), cv2.FONT_HERSHEY_COMPLEX,
105 | 0.4, clr, 1)
106 |
107 | return frame
108 |
109 |
110 | class main:
111 | def __init__(self, master: tk.Tk):
112 | self.master = master
113 | self.master.title('Human Falling Detection')
114 | self.master.protocol('WM_DELETE_WINDOW', self._on_closing)
115 | self.main_screen = get_monitor_from_coord(master.winfo_x(), master.winfo_y())
116 |
117 | self.width = int(self.main_screen.width * .85)
118 | self.height = int(self.main_screen.height * .85)
119 | self.master.geometry('{}x{}'.format(self.width, self.height + 15))
120 |
121 | self.cam = None
122 | self.canvas = tk.Canvas(master, width=int(self.width * .65), height=self.height)
123 | self.canvas.grid(row=0, column=0, padx=5, pady=5, sticky=tk.NSEW)
124 |
125 | fig = plt.Figure(figsize=(6, 8), dpi=100)
126 | fig.suptitle('Actions')
127 | self.ax = fig.add_subplot(111)
128 | self.fig_canvas = FigureCanvasTkAgg(fig, self.master)
129 | self.fig_canvas.get_tk_widget().grid(row=0, column=1, padx=5, pady=5, sticky=tk.NSEW)
130 |
131 | # Load Models
132 | self.resize_fn = ResizePadding(416, 416)
133 | self.models = Models()
134 |
135 | self.actions_graph()
136 |
137 | self.delay = 15
138 | self.load_cam('../Data/falldata/Home/Videos/video (1).avi')
139 | self.update()
140 |
141 | def preproc(self, image):
142 | image = self.resize_fn(image)
143 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
144 | return image
145 |
146 | def load_cam(self, source):
147 | if self.cam:
148 | self.cam.__del__()
149 |
150 | if type(source) is str and os.path.isfile(source):
151 | self.cam = CamLoader_Q(source, queue_size=1000, preprocess=self.preproc).start()
152 | else:
153 | self.cam = CamLoader(source, preprocess=self.preproc).start()
154 |
155 | def actions_graph(self):
156 | if len(self.models.tracker.tracks) == 0:
157 | return
158 | track = self.models.tracker.tracks[0]
159 | if hasattr(track, 'actions'):
160 | y_labels = self.models.action_model.class_names
161 | self.ax.barh(np.arange(len(y_labels)), track.actions)
162 | self.fig_canvas.draw()
163 |
164 | def update(self):
165 | if self.cam is None:
166 | return
167 | if self.cam.grabbed():
168 | frame = self.cam.getitem()
169 |
170 | frame = self.models.process_frame(frame)
171 |
172 | frame = cv2.resize(frame, (self.canvas.winfo_width(), self.canvas.winfo_height()),
173 | interpolation=cv2.INTER_CUBIC)
174 | self.photo = ImageTk.PhotoImage(image=Image.fromarray(frame))
175 | self.canvas.create_image(0, 0, image=self.photo, anchor=tk.NW)
176 | else:
177 | self.cam.stop()
178 |
179 | self._cam = self.master.after(self.delay, self.update)
180 |
181 | def _on_closing(self):
182 | self.master.after_cancel(self._cam)
183 | if self.cam:
184 | self.cam.stop()
185 | self.cam.__del__()
186 | self.master.destroy()
187 |
188 |
189 | root = tk.Tk()
190 | app = main(root)
191 | root.mainloop()
192 |
--------------------------------------------------------------------------------
/CameraLoader.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import time
4 | import torch
5 | import numpy as np
6 |
7 | from queue import Queue
8 | from threading import Thread, Lock
9 |
10 |
11 | class CamLoader:
12 | """Use threading to capture a frame from camera for faster frame load.
13 | Recommend for camera or webcam.
14 |
15 | Args:
16 | camera: (int, str) Source of camera or video.,
17 | preprocess: (Callable function) to process the frame before return.
18 | """
19 | def __init__(self, camera, preprocess=None, ori_return=False):
20 | self.stream = cv2.VideoCapture(camera)
21 | assert self.stream.isOpened(), 'Cannot read camera source!'
22 | self.fps = self.stream.get(cv2.CAP_PROP_FPS)
23 | self.frame_size = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
24 | int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
25 |
26 | self.stopped = False
27 | self.ret = False
28 | self.frame = None
29 | self.ori_frame = None
30 | self.read_lock = Lock()
31 | self.ori = ori_return
32 |
33 | self.preprocess_fn = preprocess
34 |
35 | def start(self):
36 | self.t = Thread(target=self.update, args=()) # , daemon=True)
37 | self.t.start()
38 | c = 0
39 | while not self.ret:
40 | time.sleep(0.1)
41 | c += 1
42 | if c > 20:
43 | self.stop()
44 | raise TimeoutError('Can not get a frame from camera!!!')
45 | return self
46 |
47 | def update(self):
48 | while not self.stopped:
49 | ret, frame = self.stream.read()
50 | self.read_lock.acquire()
51 | self.ori_frame = frame.copy()
52 | if ret and self.preprocess_fn is not None:
53 | frame = self.preprocess_fn(frame)
54 |
55 | self.ret, self.frame = ret, frame
56 | self.read_lock.release()
57 |
58 | def grabbed(self):
59 | """Return `True` if can read a frame."""
60 | return self.ret
61 |
62 | def getitem(self):
63 | self.read_lock.acquire()
64 | frame = self.frame.copy()
65 | ori_frame = self.ori_frame.copy()
66 | self.read_lock.release()
67 | if self.ori:
68 | return frame, ori_frame
69 | else:
70 | return frame
71 |
72 | def stop(self):
73 | if self.stopped:
74 | return
75 | self.stopped = True
76 | if self.t.is_alive():
77 | self.t.join()
78 | self.stream.release()
79 |
80 | def __del__(self):
81 | if self.stream.isOpened():
82 | self.stream.release()
83 |
84 | def __exit__(self, exc_type, exc_val, exc_tb):
85 | if self.stream.isOpened():
86 | self.stream.release()
87 |
88 |
89 | class CamLoader_Q:
90 | """Use threading and queue to capture a frame and store to queue for pickup in sequence.
91 | Recommend for video file.
92 |
93 | Args:
94 | camera: (int, str) Source of camera or video.,
95 | batch_size: (int) Number of batch frame to store in queue. Default: 1,
96 | queue_size: (int) Maximum queue size. Default: 256,
97 | preprocess: (Callable function) to process the frame before return.
98 | """
99 | def __init__(self, camera, batch_size=1, queue_size=256, preprocess=None):
100 | self.stream = cv2.VideoCapture(camera)
101 | assert self.stream.isOpened(), 'Cannot read camera source!'
102 | self.fps = self.stream.get(cv2.CAP_PROP_FPS)
103 | self.frame_size = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
104 | int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
105 |
106 | # Queue for storing each frames.
107 |
108 | self.stopped = False
109 | self.batch_size = batch_size
110 | self.Q = Queue(maxsize=queue_size)
111 |
112 | self.preprocess_fn = preprocess
113 |
114 | def start(self):
115 | t = Thread(target=self.update, args=(), daemon=True).start()
116 | c = 0
117 | while not self.grabbed():
118 | time.sleep(0.1)
119 | c += 1
120 | if c > 20:
121 | self.stop()
122 | raise TimeoutError('Can not get a frame from camera!!!')
123 | return self
124 |
125 | def update(self):
126 | while not self.stopped:
127 | if not self.Q.full():
128 | frames = []
129 | for k in range(self.batch_size):
130 | ret, frame = self.stream.read()
131 | if not ret:
132 | self.stop()
133 | return
134 |
135 | if self.preprocess_fn is not None:
136 | frame = self.preprocess_fn(frame)
137 |
138 | frames.append(frame)
139 | frames = np.stack(frames)
140 | self.Q.put(frames)
141 | else:
142 | with self.Q.mutex:
143 | self.Q.queue.clear()
144 | # time.sleep(0.05)
145 |
146 | def grabbed(self):
147 | """Return `True` if can read a frame."""
148 | return self.Q.qsize() > 0
149 |
150 | def getitem(self):
151 | return self.Q.get().squeeze()
152 |
153 | def stop(self):
154 | if self.stopped:
155 | return
156 | self.stopped = True
157 | self.stream.release()
158 |
159 | def __len__(self):
160 | return self.Q.qsize()
161 |
162 | def __del__(self):
163 | if self.stream.isOpened():
164 | self.stream.release()
165 |
166 | def __exit__(self, exc_type, exc_val, exc_tb):
167 | if self.stream.isOpened():
168 | self.stream.release()
169 |
170 |
171 | if __name__ == '__main__':
172 | fps_time = 0
173 |
174 | # Using threading.
175 | cam = CamLoader(0).start()
176 | while cam.grabbed():
177 | frames = cam.getitem()
178 |
179 | frames = cv2.putText(frames, 'FPS: %f' % (1.0 / (time.time() - fps_time)),
180 | (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
181 | fps_time = time.time()
182 | cv2.imshow('frame', frames)
183 |
184 | if cv2.waitKey(1) & 0xFF == ord('q'):
185 | break
186 | cam.stop()
187 | cv2.destroyAllWindows()
188 |
189 | # Normal video capture.
190 | """cam = cv2.VideoCapture(0)
191 | while True:
192 | ret, frame = cam.read()
193 | if ret:
194 | #time.sleep(0.05)
195 | #frame = (cv2.flip(frame, 1) / 255.).astype(np.float)
196 |
197 | frame = cv2.putText(frame, 'FPS: %f' % (1.0 / (time.time() - fps_time)),
198 | (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
199 | fps_time = time.time()
200 | cv2.imshow('frame', frame)
201 | if cv2.waitKey(1) & 0xFF == ord('q'):
202 | break
203 | cam.release()
204 | cv2.destroyAllWindows()"""
205 |
--------------------------------------------------------------------------------
/Data/create_dataset_1.py:
--------------------------------------------------------------------------------
1 | """
2 | This script to create .csv videos frames action annotation file.
3 |
4 | - It will play a video frame by frame control the flow by [a] and [d]
5 | to play previos or next frame.
6 | - Open the annot_file (.csv) and label each frame of video with number
7 | of action class.
8 | """
9 |
10 | import os
11 | import cv2
12 | import time
13 | import numpy as np
14 | import pandas as pd
15 | import matplotlib.pyplot as plt
16 |
17 | class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
18 | 'Stand up', 'Sit down', 'Fall Down'] # label.
19 |
20 | video_folder = '../Data/falldata/Home/Videos'
21 | annot_file = '../Data/Home_new.csv'
22 |
23 | index_video_to_play = 0 # Choose video to play.
24 |
25 |
26 | def create_csv(folder):
27 | list_file = sorted(os.listdir(folder))
28 | cols = ['video', 'frame', 'label']
29 | df = pd.DataFrame(columns=cols)
30 | for fil in list_file:
31 | cap = cv2.VideoCapture(os.path.join(folder, fil))
32 | frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
33 | video = np.array([fil] * frames_count)
34 | frame = np.arange(1, frames_count + 1)
35 | label = np.array([0] * frames_count)
36 | rows = np.stack([video, frame, label], axis=1)
37 | df = df.append(pd.DataFrame(rows, columns=cols),
38 | ignore_index=True)
39 | cap.release()
40 | df.to_csv(annot_file, index=False)
41 |
42 |
43 | if not os.path.exists(annot_file):
44 | create_csv(video_folder)
45 |
46 | annot = pd.read_csv(annot_file)
47 | video_list = annot.iloc[:, 0].unique()
48 | video_file = os.path.join(video_folder, video_list[index_video_to_play])
49 | print(os.path.basename(video_file))
50 |
51 | annot = annot[annot['video'] == video_list[index_video_to_play]].reset_index(drop=True)
52 | frames_idx = annot.iloc[:, 1].tolist()
53 |
54 | cap = cv2.VideoCapture(video_file)
55 | frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
56 |
57 | assert frames_count == len(frames_idx), 'frame count not equal! {} and {}'.format(
58 | len(frames_idx), frames_count
59 | )
60 |
61 | i = 0
62 | while True:
63 | cap.set(cv2.CAP_PROP_POS_FRAMES, i)
64 | ret, frame = cap.read()
65 | if ret:
66 | cls_name = class_names[int(annot.iloc[i, -1]) - 1]
67 | frame = cv2.resize(frame, (0, 0), fx=1.5, fy=1.5)
68 | frame = cv2.putText(frame, 'Frame: {} Pose: {}'.format(i+1, cls_name),
69 | (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
70 | cv2.imshow('frame', frame)
71 |
72 | key = cv2.waitKey(0) & 0xFF
73 | if key == ord('q'):
74 | break
75 | elif key == ord('d'):
76 | i += 1
77 | continue
78 | elif key == ord('a'):
79 | i -= 1
80 | continue
81 | else:
82 | break
83 |
84 | cap.release()
85 | cv2.destroyAllWindows()
86 |
--------------------------------------------------------------------------------
/Data/create_dataset_2.py:
--------------------------------------------------------------------------------
1 | """
2 | This script to extract skeleton joints position and score.
3 |
4 | - This 'annot_folder' is a action class and bounding box for each frames that came with dataset.
5 | Should be in format of [frame_idx, action_cls, xmin, ymin, xmax, ymax]
6 | Use for crop a person to use in pose estimation model.
7 | - If have no annotation file you can leave annot_folder = '' for use Detector model to get the
8 | bounding box.
9 | """
10 |
11 | import os
12 | import cv2
13 | import time
14 | import torch
15 | import pandas as pd
16 | import numpy as np
17 | import torchvision.transforms as transforms
18 |
19 | from DetectorLoader import TinyYOLOv3_onecls
20 | from PoseEstimateLoader import SPPE_FastPose
21 | from fn import vis_frame_fast
22 |
23 | save_path = '../../Data/Home_new-pose+score.csv'
24 |
25 | annot_file = '../../Data/Home_new.csv' # from create_dataset_1.py
26 | video_folder = '../Data/falldata/Home/Videos'
27 | annot_folder = '../Data/falldata/Home/Annotation_files' # bounding box annotation for each frame.
28 |
29 | # DETECTION MODEL.
30 | detector = TinyYOLOv3_onecls()
31 |
32 | # POSE MODEL.
33 | inp_h = 320
34 | inp_w = 256
35 | pose_estimator = SPPE_FastPose(inp_h, inp_w)
36 |
37 | # with score.
38 | columns = ['video', 'frame', 'Nose_x', 'Nose_y', 'Nose_s', 'LShoulder_x', 'LShoulder_y', 'LShoulder_s',
39 | 'RShoulder_x', 'RShoulder_y', 'RShoulder_s', 'LElbow_x', 'LElbow_y', 'LElbow_s', 'RElbow_x',
40 | 'RElbow_y', 'RElbow_s', 'LWrist_x', 'LWrist_y', 'LWrist_s', 'RWrist_x', 'RWrist_y', 'RWrist_s',
41 | 'LHip_x', 'LHip_y', 'LHip_s', 'RHip_x', 'RHip_y', 'RHip_s', 'LKnee_x', 'LKnee_y', 'LKnee_s',
42 | 'RKnee_x', 'RKnee_y', 'RKnee_s', 'LAnkle_x', 'LAnkle_y', 'LAnkle_s', 'RAnkle_x', 'RAnkle_y',
43 | 'RAnkle_s', 'label']
44 |
45 |
46 | def normalize_points_with_size(points_xy, width, height, flip=False):
47 | points_xy[:, 0] /= width
48 | points_xy[:, 1] /= height
49 | if flip:
50 | points_xy[:, 0] = 1 - points_xy[:, 0]
51 | return points_xy
52 |
53 |
54 | annot = pd.read_csv(annot_file)
55 | vid_list = annot['video'].unique()
56 | for vid in vid_list:
57 | print(f'Process on: {vid}')
58 | df = pd.DataFrame(columns=columns)
59 | cur_row = 0
60 |
61 | # Pose Labels.
62 | frames_label = annot[annot['video'] == vid].reset_index(drop=True)
63 |
64 | cap = cv2.VideoCapture(os.path.join(video_folder, vid))
65 | frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
66 | frame_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
67 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
68 |
69 | # Bounding Boxs Labels.
70 | annot_file = os.path.join(annot_folder, vid.split('.')[0], '.txt')
71 | annot = None
72 | if os.path.exists(annot_file):
73 | annot = pd.read_csv(annot_file, header=None,
74 | names=['frame_idx', 'class', 'xmin', 'ymin', 'xmax', 'ymax'])
75 | annot = annot.dropna().reset_index(drop=True)
76 |
77 | assert frames_count == len(annot), 'frame count not equal! {} and {}'.format(frames_count, len(annot))
78 |
79 | fps_time = 0
80 | i = 1
81 | while True:
82 | ret, frame = cap.read()
83 | if ret:
84 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
85 | cls_idx = int(frames_label[frames_label['frame'] == i]['label'])
86 |
87 | if annot:
88 | bb = np.array(annot.iloc[i-1, 2:].astype(int))
89 | else:
90 | bb = detector.detect(frame)[0, :4].numpy().astype(int)
91 | bb[:2] = np.maximum(0, bb[:2] - 5)
92 | bb[2:] = np.minimum(frame_size, bb[2:] + 5) if bb[2:].any() != 0 else bb[2:]
93 |
94 | result = []
95 | if bb.any() != 0:
96 | result = pose_estimator.predict(frame, torch.tensor(bb[None, ...]),
97 | torch.tensor([[1.0]]))
98 |
99 | if len(result) > 0:
100 | pt_norm = normalize_points_with_size(result[0]['keypoints'].numpy().copy(),
101 | frame_size[0], frame_size[1])
102 | pt_norm = np.concatenate((pt_norm, result[0]['kp_score']), axis=1)
103 |
104 | #idx = result[0]['kp_score'] <= 0.05
105 | #pt_norm[idx.squeeze()] = np.nan
106 | row = [vid, i, *pt_norm.flatten().tolist(), cls_idx]
107 | scr = result[0]['kp_score'].mean()
108 | else:
109 | row = [vid, i, *[np.nan] * (13 * 3), cls_idx]
110 | scr = 0.0
111 |
112 | df.loc[cur_row] = row
113 | cur_row += 1
114 |
115 | # VISUALIZE.
116 | frame = vis_frame_fast(frame, result)
117 | frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2)
118 | frame = cv2.putText(frame, 'Frame: {}, Pose: {}, Score: {:.4f}'.format(i, cls_idx, scr),
119 | (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
120 | frame = frame[:, :, ::-1]
121 | fps_time = time.time()
122 | i += 1
123 |
124 | cv2.imshow('frame', frame)
125 | if cv2.waitKey(1) & 0xFF == ord('q'):
126 | break
127 | else:
128 | break
129 |
130 | cap.release()
131 | cv2.destroyAllWindows()
132 |
133 | if os.path.exists(save_path):
134 | df.to_csv(save_path, mode='a', header=False, index=False)
135 | else:
136 | df.to_csv(save_path, mode='w', index=False)
137 |
138 |
--------------------------------------------------------------------------------
/Data/create_dataset_3.py:
--------------------------------------------------------------------------------
1 | """
2 | This script to create dataset and labels by clean off some NaN, do a normalization,
3 | label smoothing and label weights by scores.
4 |
5 | """
6 | import os
7 | import pickle
8 | import numpy as np
9 | import pandas as pd
10 |
11 |
12 | class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
13 | 'Stand up', 'Sit down', 'Fall Down']
14 | main_parts = ['LShoulder_x', 'LShoulder_y', 'RShoulder_x', 'RShoulder_y', 'LHip_x', 'LHip_y',
15 | 'RHip_x', 'RHip_y']
16 | main_idx_parts = [1, 2, 7, 8, -1] # 1.5
17 |
18 | csv_pose_file = '../Data/Coffee_room_new-pose+score.csv'
19 | save_path = '../../Data/Coffee_room_new-set(labelXscrw).pkl'
20 |
21 | # Params.
22 | smooth_labels_step = 8
23 | n_frames = 30
24 | skip_frame = 1
25 |
26 | annot = pd.read_csv(csv_pose_file)
27 |
28 | # Remove NaN.
29 | idx = annot.iloc[:, 2:-1][main_parts].isna().sum(1) > 0
30 | idx = np.where(idx)[0]
31 | annot = annot.drop(idx)
32 | # One-Hot Labels.
33 | label_onehot = pd.get_dummies(annot['label'])
34 | annot = annot.drop('label', axis=1).join(label_onehot)
35 | cols = label_onehot.columns.values
36 |
37 |
38 | def scale_pose(xy):
39 | """
40 | Normalize pose points by scale with max/min value of each pose.
41 | xy : (frames, parts, xy) or (parts, xy)
42 | """
43 | if xy.ndim == 2:
44 | xy = np.expand_dims(xy, 0)
45 | xy_min = np.nanmin(xy, axis=1)
46 | xy_max = np.nanmax(xy, axis=1)
47 | for i in range(xy.shape[0]):
48 | xy[i] = ((xy[i] - xy_min[i]) / (xy_max[i] - xy_min[i])) * 2 - 1
49 | return xy.squeeze()
50 |
51 |
52 | def seq_label_smoothing(labels, max_step=10):
53 | steps = 0
54 | remain_step = 0
55 | target_label = 0
56 | active_label = 0
57 | start_change = 0
58 | max_val = np.max(labels)
59 | min_val = np.min(labels)
60 | for i in range(labels.shape[0]):
61 | if remain_step > 0:
62 | if i >= start_change:
63 | labels[i][active_label] = max_val * remain_step / steps
64 | labels[i][target_label] = max_val * (steps - remain_step) / steps \
65 | if max_val * (steps - remain_step) / steps else min_val
66 | remain_step -= 1
67 | continue
68 |
69 | diff_index = np.where(np.argmax(labels[i:i+max_step], axis=1) - np.argmax(labels[i]) != 0)[0]
70 | if len(diff_index) > 0:
71 | start_change = i + remain_step // 2
72 | steps = diff_index[0]
73 | remain_step = steps
74 | target_label = np.argmax(labels[i + remain_step])
75 | active_label = np.argmax(labels[i])
76 | return labels
77 |
78 |
79 | feature_set = np.empty((0, n_frames, 14, 3))
80 | labels_set = np.empty((0, len(cols)))
81 | vid_list = annot['video'].unique()
82 | for vid in vid_list:
83 | print(f'Process on: {vid}')
84 | data = annot[annot['video'] == vid].reset_index(drop=True).drop(columns='video')
85 |
86 | # Label Smoothing.
87 | esp = 0.1
88 | data[cols] = data[cols] * (1 - esp) + (1 - data[cols]) * esp / (len(cols) - 1)
89 | data[cols] = seq_label_smoothing(data[cols].values, smooth_labels_step)
90 |
91 | # Separate continuous frames.
92 | frames = data['frame'].values
93 | frames_set = []
94 | fs = [0]
95 | for i in range(1, len(frames)):
96 | if frames[i] < frames[i-1] + 10:
97 | fs.append(i)
98 | else:
99 | frames_set.append(fs)
100 | fs = [i]
101 | frames_set.append(fs)
102 |
103 | for fs in frames_set:
104 | xys = data.iloc[fs, 1:-len(cols)].values.reshape(-1, 13, 3)
105 | # Scale pose normalize.
106 | xys[:, :, :2] = scale_pose(xys[:, :, :2])
107 | # Add center point.
108 | xys = np.concatenate((xys, np.expand_dims((xys[:, 1, :] + xys[:, 2, :]) / 2, 1)), axis=1)
109 |
110 | # Weighting main parts score.
111 | scr = xys[:, :, -1].copy()
112 | scr[:, main_idx_parts] = np.minimum(scr[:, main_idx_parts] * 1.5, 1.0)
113 | # Mean score.
114 | scr = scr.mean(1)
115 |
116 | # Targets.
117 | lb = data.iloc[fs, -len(cols):].values
118 | # Apply points score mean to all labels.
119 | lb = lb * scr[:, None]
120 |
121 | for i in range(xys.shape[0] - n_frames):
122 | feature_set = np.append(feature_set, xys[i:i+n_frames][None, ...], axis=0)
123 | labels_set = np.append(labels_set, lb[i:i+n_frames].mean(0)[None, ...], axis=0)
124 |
125 |
126 | """with open(save_path, 'wb') as f:
127 | pickle.dump((feature_set, labels_set), f)"""
128 |
--------------------------------------------------------------------------------
/Detection/Utils.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import math
3 | import time
4 | import tqdm
5 | import torch
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 | import numpy as np
9 | from torch.utils.data import DataLoader
10 |
11 |
12 | def to_cpu(tensor):
13 | return tensor.detach().cpu()
14 |
15 |
16 | def load_classes(path):
17 | """
18 | Loads class labels at 'path'
19 | """
20 | fp = open(path, "r")
21 | names = fp.read().split("\n")[:-1]
22 | return names
23 |
24 |
25 | def weights_init_normal(m):
26 | classname = m.__class__.__name__
27 | if classname.find("Conv") != -1:
28 | torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
29 | elif classname.find("BatchNorm2d") != -1:
30 | torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
31 | torch.nn.init.constant_(m.bias.data, 0.0)
32 |
33 |
34 | def rescale_boxes(boxes, current_dim, original_shape):
35 | """ Rescales bounding boxes to the original shape """
36 | orig_h, orig_w = original_shape
37 | # The amount of padding that was added
38 | pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
39 | pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
40 | # Image height and width after padding is removed
41 | unpad_h = current_dim - pad_y
42 | unpad_w = current_dim - pad_x
43 | # Rescale bounding boxes to dimension of original image
44 | boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
45 | boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
46 | boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
47 | boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
48 | return boxes
49 |
50 |
51 | def xywh2xyxy(x):
52 | y = x.new(x.shape)
53 | y[..., 0] = x[..., 0] - x[..., 2] / 2
54 | y[..., 1] = x[..., 1] - x[..., 3] / 2
55 | y[..., 2] = x[..., 0] + x[..., 2] / 2
56 | y[..., 3] = x[..., 1] + x[..., 3] / 2
57 | return y
58 |
59 |
60 | def ap_per_class(tp, conf, pred_cls, target_cls):
61 | """ Compute the average precision, given the recall and precision curves.
62 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
63 | # Arguments
64 | tp: True positives (list).
65 | conf: Objectness value from 0-1 (list).
66 | pred_cls: Predicted object classes (list).
67 | target_cls: True object classes (list).
68 | # Returns
69 | The average precision as computed in py-faster-rcnn.
70 | """
71 | # Sort by objectness
72 | i = np.argsort(-conf)
73 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
74 |
75 | # Find unique classes
76 | unique_classes = np.unique(target_cls)
77 |
78 | # Create Precision-Recall curve and compute AP for each class
79 | ap, p, r = [], [], []
80 | for c in tqdm.tqdm(unique_classes, desc="Computing AP"):
81 | i = pred_cls == c
82 | n_gt = (target_cls == c).sum() # Number of ground truth objects
83 | n_p = i.sum() # Number of predicted objects
84 |
85 | if n_p == 0 and n_gt == 0:
86 | continue
87 | elif n_p == 0 or n_gt == 0:
88 | ap.append(0)
89 | r.append(0)
90 | p.append(0)
91 | else:
92 | # Accumulate FPs and TPs
93 | fpc = (1 - tp[i]).cumsum()
94 | tpc = (tp[i]).cumsum()
95 |
96 | # Recall
97 | recall_curve = tpc / (n_gt + 1e-16)
98 | r.append(recall_curve[-1])
99 |
100 | # Precision
101 | precision_curve = tpc / (tpc + fpc)
102 | p.append(precision_curve[-1])
103 |
104 | # AP from recall-precision curve
105 | ap.append(compute_ap(recall_curve, precision_curve))
106 |
107 | # Compute F1 score (harmonic mean of precision and recall)
108 | p, r, ap = np.array(p), np.array(r), np.array(ap)
109 | f1 = 2 * p * r / (p + r + 1e-16)
110 |
111 | return p, r, ap, f1, unique_classes.astype("int32")
112 |
113 |
114 | def compute_ap(recall, precision):
115 | """ Compute the average precision, given the recall and precision curves.
116 | Code originally from https://github.com/rbgirshick/py-faster-rcnn.
117 | # Arguments
118 | recall: The recall curve (list).
119 | precision: The precision curve (list).
120 | # Returns
121 | The average precision as computed in py-faster-rcnn.
122 | """
123 | # correct AP calculation
124 | # first append sentinel values at the end
125 | mrec = np.concatenate(([0.0], recall, [1.0]))
126 | mpre = np.concatenate(([0.0], precision, [0.0]))
127 |
128 | # compute the precision envelope
129 | for i in range(mpre.size - 1, 0, -1):
130 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
131 |
132 | # to calculate area under PR curve, look for points
133 | # where X axis (recall) changes value
134 | i = np.where(mrec[1:] != mrec[:-1])[0]
135 |
136 | # and sum (\Delta recall) * prec
137 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
138 | return ap
139 |
140 |
141 | def get_batch_statistics(outputs, targets, iou_threshold):
142 | """ Compute true positives, predicted scores and predicted labels per sample """
143 | batch_metrics = []
144 | for sample_i in range(len(outputs)):
145 |
146 | if outputs[sample_i] is None:
147 | continue
148 |
149 | output = outputs[sample_i]
150 | pred_boxes = output[:, :4]
151 | pred_scores = output[:, 4]
152 | pred_labels = output[:, -1]
153 |
154 | true_positives = np.zeros(pred_boxes.shape[0])
155 |
156 | annotations = targets[targets[:, 0] == sample_i][:, 1:]
157 | target_labels = annotations[:, 0] if len(annotations) else []
158 | if len(annotations):
159 | detected_boxes = []
160 | target_boxes = annotations[:, 1:]
161 |
162 | for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
163 |
164 | # If targets are found break
165 | if len(detected_boxes) == len(annotations):
166 | break
167 |
168 | # Ignore if label is not one of the target labels
169 | if pred_label not in target_labels:
170 | continue
171 |
172 | iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
173 | if iou >= iou_threshold and box_index not in detected_boxes:
174 | true_positives[pred_i] = 1
175 | detected_boxes += [box_index]
176 | batch_metrics.append([true_positives, pred_scores, pred_labels])
177 | return batch_metrics
178 |
179 |
180 | def bbox_wh_iou(wh1, wh2):
181 | wh2 = wh2.t()
182 | w1, h1 = wh1[0], wh1[1]
183 | w2, h2 = wh2[0], wh2[1]
184 | inter_area = torch.min(w1, w2) * torch.min(h1, h2)
185 | union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
186 | return inter_area / union_area
187 |
188 |
189 | def bbox_iou(box1, box2, x1y1x2y2=True):
190 | """
191 | Returns the IoU of two bounding boxes
192 | """
193 | if not x1y1x2y2:
194 | # Transform from center and width to exact coordinates
195 | b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
196 | b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
197 | b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
198 | b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
199 | else:
200 | # Get the coordinates of bounding boxes
201 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
202 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
203 |
204 | # get the corrdinates of the intersection rectangle
205 | inter_rect_x1 = torch.max(b1_x1, b2_x1)
206 | inter_rect_y1 = torch.max(b1_y1, b2_y1)
207 | inter_rect_x2 = torch.min(b1_x2, b2_x2)
208 | inter_rect_y2 = torch.min(b1_y2, b2_y2)
209 | # Intersection area
210 | inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
211 | inter_rect_y2 - inter_rect_y1 + 1, min=0
212 | )
213 | # Union Area
214 | b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
215 | b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
216 |
217 | iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
218 |
219 | return iou
220 |
221 |
222 | def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
223 | """
224 | Removes detections with lower object confidence score than 'conf_thres' and performs
225 | Non-Maximum Suppression to further filter detections.
226 | Returns detections with shape:
227 | (x1, y1, x2, y2, object_conf, class_score, class_pred)
228 | """
229 | # From (center x, center y, width, height) to (x1, y1, x2, y2)
230 | prediction[..., :4] = xywh2xyxy(prediction[..., :4])
231 | output = [None for _ in range(len(prediction))]
232 | for image_i, image_pred in enumerate(prediction):
233 | # Filter out confidence scores below threshold
234 | image_pred = image_pred[image_pred[:, 4] >= conf_thres]
235 | # If none are remaining => process next image
236 | if not image_pred.size(0):
237 | continue
238 | # Object confidence times class confidence
239 | score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
240 | # Sort by it
241 | image_pred = image_pred[(-score).argsort()]
242 | class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
243 | detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
244 | # Perform non-maximum suppression
245 | keep_boxes = []
246 | while detections.size(0):
247 | large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres
248 | label_match = detections[0, -1] == detections[:, -1]
249 | # Indices of boxes with lower confidence scores, large IOUs and matching labels
250 | invalid = large_overlap & label_match
251 | weights = detections[invalid, 4:5]
252 | # Merge overlapping bboxes by order of confidence
253 | detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum()
254 | keep_boxes += [detections[0]]
255 | detections = detections[~invalid]
256 | if keep_boxes:
257 | output[image_i] = torch.stack(keep_boxes)
258 |
259 | return output
260 |
261 |
262 | def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres):
263 | ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor
264 | FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor
265 |
266 | nB = pred_boxes.size(0)
267 | nA = pred_boxes.size(1)
268 | nC = pred_cls.size(-1)
269 | nG = pred_boxes.size(2)
270 |
271 | # Output tensors
272 | obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0)
273 | noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1)
274 | class_mask = FloatTensor(nB, nA, nG, nG).fill_(0)
275 | iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0)
276 | tx = FloatTensor(nB, nA, nG, nG).fill_(0)
277 | ty = FloatTensor(nB, nA, nG, nG).fill_(0)
278 | tw = FloatTensor(nB, nA, nG, nG).fill_(0)
279 | th = FloatTensor(nB, nA, nG, nG).fill_(0)
280 | tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0)
281 |
282 | # Convert to position relative to box
283 | target_boxes = target[:, 2:6] * nG
284 | gxy = target_boxes[:, :2]
285 | gwh = target_boxes[:, 2:]
286 | # Get anchors with best iou
287 | ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors])
288 | best_ious, best_n = ious.max(0)
289 | # Separate target values
290 | b, target_labels = target[:, :2].long().t()
291 | gx, gy = gxy.t()
292 | gw, gh = gwh.t()
293 | gi, gj = gxy.long().t()
294 | # Set masks
295 | obj_mask[b, best_n, gj, gi] = 1
296 | noobj_mask[b, best_n, gj, gi] = 0
297 |
298 | # Set noobj mask to zero where iou exceeds ignore threshold
299 | for i, anchor_ious in enumerate(ious.t()):
300 | noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0
301 |
302 | # Coordinates
303 | tx[b, best_n, gj, gi] = gx - gx.floor()
304 | ty[b, best_n, gj, gi] = gy - gy.floor()
305 | # Width and height
306 | tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
307 | th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
308 | # One-hot encoding of label
309 | tcls[b, best_n, gj, gi, target_labels] = 1
310 | # Compute label correctness and iou at best anchor
311 | class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float()
312 | iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False)
313 |
314 | tconf = obj_mask.float()
315 | return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
316 |
317 |
318 | def parse_model_config(path):
319 | """Parses the yolo-v3 layer configuration file and returns module definitions"""
320 | file = open(path, 'r')
321 | lines = file.read().split('\n')
322 | lines = [x for x in lines if x and not x.startswith('#')]
323 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
324 | module_defs = []
325 | for line in lines:
326 | if line.startswith('['): # This marks the start of a new block
327 | module_defs.append({})
328 | module_defs[-1]['type'] = line[1:-1].rstrip()
329 | if module_defs[-1]['type'] == 'convolutional':
330 | module_defs[-1]['batch_normalize'] = 0
331 | else:
332 | key, value = line.split("=")
333 | value = value.strip()
334 | module_defs[-1][key.rstrip()] = value.strip()
335 |
336 | return module_defs
337 |
338 |
339 | def parse_data_config(path):
340 | """Parses the data configuration file"""
341 | options = dict()
342 | options['gpus'] = '0,1,2,3'
343 | options['num_workers'] = '10'
344 | with open(path, 'r') as fp:
345 | lines = fp.readlines()
346 | for line in lines:
347 | line = line.strip()
348 | if line == '' or line.startswith('#'):
349 | continue
350 | key, value = line.split('=')
351 | options[key.strip()] = value.strip()
352 | return options
353 |
354 |
355 | def ResizePadding(height, width):
356 | desized_size = (height, width)
357 |
358 | def resizePadding(image, **kwargs):
359 | old_size = image.shape[:2]
360 | max_size_idx = old_size.index(max(old_size))
361 | ratio = float(desized_size[max_size_idx]) / max(old_size)
362 | new_size = tuple([int(x * ratio) for x in old_size])
363 |
364 | if new_size > desized_size:
365 | min_size_idx = old_size.index(min(old_size))
366 | ratio = float(desized_size[min_size_idx]) / min(old_size)
367 | new_size = tuple([int(x * ratio) for x in old_size])
368 |
369 | image = cv2.resize(image, (new_size[1], new_size[0]))
370 | delta_w = desized_size[1] - new_size[1]
371 | delta_h = desized_size[0] - new_size[0]
372 | top, bottom = delta_h // 2, delta_h - (delta_h // 2)
373 | left, right = delta_w // 2, delta_w - (delta_w // 2)
374 |
375 | image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT)
376 | return image
377 | return resizePadding
378 |
379 |
380 | class AverageValueMeter(object):
381 | def __init__(self):
382 | self.reset()
383 | self.val = 0
384 |
385 | def add(self, value, n=1):
386 | self.val = value
387 | self.sum += value
388 | self.var += value * value
389 | self.n += n
390 |
391 | if self.n == 0:
392 | self.mean, self.std = np.nan, np.nan
393 | elif self.n == 1:
394 | self.mean = 0.0 + self.sum # This is to force a copy in torch/numpy
395 | self.std = np.inf
396 | self.mean_old = self.mean
397 | self.m_s = 0.0
398 | else:
399 | self.mean = self.mean_old + (value - n * self.mean_old) / float(self.n)
400 | self.m_s += (value - self.mean_old) * (value - self.mean)
401 | self.mean_old = self.mean
402 | self.std = np.sqrt(self.m_s / (self.n - 1.0))
403 |
404 | def value(self):
405 | return self.mean, self.std
406 |
407 | def reset(self):
408 | self.n = 0
409 | self.sum = 0.0
410 | self.var = 0.0
411 | self.val = 0.0
412 | self.mean = np.nan
413 | self.mean_old = 0.0
414 | self.m_s = 0.0
415 | self.std = np.nan
416 |
--------------------------------------------------------------------------------
/DetectorLoader.py:
--------------------------------------------------------------------------------
1 | import time
2 | import torch
3 | import numpy as np
4 | import torchvision.transforms as transforms
5 |
6 | from queue import Queue
7 | from threading import Thread
8 |
9 | from Detection.Models import Darknet
10 | from Detection.Utils import non_max_suppression, ResizePadding
11 |
12 |
13 | class TinyYOLOv3_onecls(object):
14 | """Load trained Tiny-YOLOv3 one class (person) detection model.
15 | Args:
16 | input_size: (int) Size of input image must be divisible by 32. Default: 416,
17 | config_file: (str) Path to Yolo model structure config file.,
18 | weight_file: (str) Path to trained weights file.,
19 | nms: (float) Non-Maximum Suppression overlap threshold.,
20 | conf_thres: (float) Minimum Confidence threshold of predicted bboxs to cut off.,
21 | device: (str) Device to load the model on 'cpu' or 'cuda'.
22 | """
23 | def __init__(self,
24 | input_size=416,
25 | config_file='Models/yolo-tiny-onecls/yolov3-tiny-onecls.cfg',
26 | weight_file='Models/yolo-tiny-onecls/best-model.pth',
27 | nms=0.2,
28 | conf_thres=0.45,
29 | device='cuda'):
30 | self.input_size = input_size
31 | self.model = Darknet(config_file).to(device)
32 | self.model.load_state_dict(torch.load(weight_file))
33 | self.model.eval()
34 | self.device = device
35 |
36 | self.nms = nms
37 | self.conf_thres = conf_thres
38 |
39 | self.resize_fn = ResizePadding(input_size, input_size)
40 | self.transf_fn = transforms.ToTensor()
41 |
42 | def detect(self, image, need_resize=True, expand_bb=5):
43 | """Feed forward to the model.
44 | Args:
45 | image: (numpy array) Single RGB image to detect.,
46 | need_resize: (bool) Resize to input_size before feed and will return bboxs
47 | with scale to image original size.,
48 | expand_bb: (int) Expand boundary of the boxs.
49 | Returns:
50 | (torch.float32) Of each detected object contain a
51 | [top, left, bottom, right, bbox_score, class_score, class]
52 | return `None` if no detected.
53 | """
54 | image_size = (self.input_size, self.input_size)
55 | if need_resize:
56 | image_size = image.shape[:2]
57 | image = self.resize_fn(image)
58 |
59 | image = self.transf_fn(image)[None, ...]
60 | scf = torch.min(self.input_size / torch.FloatTensor([image_size]), 1)[0]
61 |
62 | detected = self.model(image.to(self.device))
63 | detected = non_max_suppression(detected, self.conf_thres, self.nms)[0]
64 | if detected is not None:
65 | detected[:, [0, 2]] -= (self.input_size - scf * image_size[1]) / 2
66 | detected[:, [1, 3]] -= (self.input_size - scf * image_size[0]) / 2
67 | detected[:, 0:4] /= scf
68 |
69 | detected[:, 0:2] = np.maximum(0, detected[:, 0:2] - expand_bb)
70 | detected[:, 2:4] = np.minimum(image_size[::-1], detected[:, 2:4] + expand_bb)
71 |
72 | return detected
73 |
74 |
75 | class ThreadDetection(object):
76 | def __init__(self,
77 | dataloader,
78 | model,
79 | queue_size=256):
80 | self.model = model
81 |
82 | self.dataloader = dataloader
83 | self.stopped = False
84 | self.Q = Queue(maxsize=queue_size)
85 |
86 | def start(self):
87 | t = Thread(target=self.update, args=(), daemon=True).start()
88 | return self
89 |
90 | def update(self):
91 | while True:
92 | if self.stopped:
93 | return
94 |
95 | images = self.dataloader.getitem()
96 |
97 | outputs = self.model.detect(images)
98 |
99 | if self.Q.full():
100 | time.sleep(2)
101 | self.Q.put((images, outputs))
102 |
103 | def getitem(self):
104 | return self.Q.get()
105 |
106 | def stop(self):
107 | self.stopped = True
108 |
109 | def __len__(self):
110 | return self.Q.qsize()
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
--------------------------------------------------------------------------------
/Models/TSSTG/_.txt:
--------------------------------------------------------------------------------
1 | tsstg-model.pth
--------------------------------------------------------------------------------
/Models/sppe/_.txt:
--------------------------------------------------------------------------------
1 | fast_res50_256x192.pth
2 | fast_res101_320x256.pth
--------------------------------------------------------------------------------
/Models/yolo-tiny-onecls/_.txt:
--------------------------------------------------------------------------------
1 | best-model.pth
2 | yolov3-tiny-onecls.cfg
--------------------------------------------------------------------------------
/PoseEstimateLoader.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import torch
4 |
5 | from SPPE.src.main_fast_inference import InferenNet_fast, InferenNet_fastRes50
6 | from SPPE.src.utils.img import crop_dets
7 | from pPose_nms import pose_nms
8 | from SPPE.src.utils.eval import getPrediction
9 |
10 |
11 | class SPPE_FastPose(object):
12 | def __init__(self,
13 | backbone,
14 | input_height=320,
15 | input_width=256,
16 | device='cuda'):
17 | assert backbone in ['resnet50', 'resnet101'], '{} backbone is not support yet!'.format(backbone)
18 |
19 | self.inp_h = input_height
20 | self.inp_w = input_width
21 | self.device = device
22 |
23 | if backbone == 'resnet101':
24 | self.model = InferenNet_fast().to(device)
25 | else:
26 | self.model = InferenNet_fastRes50().to(device)
27 | self.model.eval()
28 |
29 | def predict(self, image, bboxs, bboxs_scores):
30 | inps, pt1, pt2 = crop_dets(image, bboxs, self.inp_h, self.inp_w)
31 | pose_hm = self.model(inps.to(self.device)).cpu().data
32 |
33 | # Cut eyes and ears.
34 | pose_hm = torch.cat([pose_hm[:, :1, ...], pose_hm[:, 5:, ...]], dim=1)
35 |
36 | xy_hm, xy_img, scores = getPrediction(pose_hm, pt1, pt2, self.inp_h, self.inp_w,
37 | pose_hm.shape[-2], pose_hm.shape[-1])
38 | result = pose_nms(bboxs, bboxs_scores, xy_img, scores)
39 | return result
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
Human Falling Detection and Tracking
2 |
3 | Using Tiny-YOLO oneclass to detect each person in the frame and use
4 | [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) to get skeleton-pose and then use
5 | [ST-GCN](https://github.com/yysijie/st-gcn) model to predict action from every 30 frames
6 | of each person tracks.
7 |
8 | Which now support 7 actions: Standing, Walking, Sitting, Lying Down, Stand up, Sit down, Fall Down.
9 |
10 |
11 |

12 |
13 |
14 | ## Prerequisites
15 |
16 | - Python > 3.6
17 | - Pytorch > 1.3.1
18 |
19 | Original test run on: i7-8750H CPU @ 2.20GHz x12, GeForce RTX 2070 8GB, CUDA 10.2
20 |
21 | ## Data
22 |
23 | This project has trained a new Tiny-YOLO oneclass model to detect only person objects and to reducing
24 | model size. Train with rotation augmented [COCO](http://cocodataset.org/#home) person keypoints dataset
25 | for more robust person detection in a variant of angle pose.
26 |
27 | For actions recognition used data from [Le2i](http://le2i.cnrs.fr/Fall-detection-Dataset?lang=fr)
28 | Fall detection Dataset (Coffee room, Home) extract skeleton-pose by AlphaPose and labeled each action
29 | frames by hand for training ST-GCN model.
30 |
31 | ## Pre-Trained Models
32 |
33 | - Tiny-YOLO oneclass - [.pth](https://drive.google.com/file/d/1obEbWBSm9bXeg10FriJ7R2cGLRsg-AfP/view?usp=sharing),
34 | [.cfg](https://drive.google.com/file/d/19sPzBZjAjuJQ3emRteHybm2SG25w9Wn5/view?usp=sharing)
35 | - SPPE FastPose (AlphaPose) - [resnet101](https://drive.google.com/file/d/1N2MgE1Esq6CKYA6FyZVKpPwHRyOCrzA0/view?usp=sharing),
36 | [resnet50](https://drive.google.com/file/d/1IPfCDRwCmQDnQy94nT1V-_NVtTEi4VmU/view?usp=sharing)
37 | - ST-GCN action recognition - [tsstg](https://drive.google.com/file/d/1mQQ4JHe58ylKbBqTjuKzpwN2nwKOWJ9u/view?usp=sharing)
38 |
39 | ## Basic Use
40 |
41 | 1. Download all pre-trained models into ./Models folder.
42 | 2. Run main.py
43 | ```
44 | python main.py ${video file or camera source}
45 | ```
46 |
47 | ## Reference
48 |
49 | - AlphaPose : https://github.com/Amanbhandula/AlphaPose
50 | - ST-GCN : https://github.com/yysijie/st-gcn
--------------------------------------------------------------------------------
/SPPE/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Jeff-sjtu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/SPPE/README.md:
--------------------------------------------------------------------------------
1 | # pytorch-AlphaPose from: https://github.com/Amanbhandula/AlphaPose
2 |
--------------------------------------------------------------------------------
/SPPE/src/main_fast_inference.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.utils.data
4 | import torch.utils.data.distributed
5 | import torch.nn.functional as F
6 | import numpy as np
7 | from SPPE.src.utils.img import flip, shuffleLR
8 | from SPPE.src.utils.eval import getPrediction
9 | from SPPE.src.models.FastPose import FastPose
10 |
11 | import time
12 | import sys
13 |
14 | import torch._utils
15 | try:
16 | torch._utils._rebuild_tensor_v2
17 | except AttributeError:
18 | def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
19 | tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
20 | tensor.requires_grad = requires_grad
21 | tensor._backward_hooks = backward_hooks
22 | return tensor
23 | torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2
24 |
25 |
26 | class InferenNet(nn.Module):
27 | def __init__(self, dataset, weights_file='./Models/sppe/fast_res101_320x256.pth'):
28 | super().__init__()
29 |
30 | self.pyranet = FastPose('resnet101').cuda()
31 | print('Loading pose model from {}'.format(weights_file))
32 | sys.stdout.flush()
33 | self.pyranet.load_state_dict(torch.load(weights_file))
34 | self.pyranet.eval()
35 | self.pyranet = model
36 |
37 | self.dataset = dataset
38 |
39 | def forward(self, x):
40 | out = self.pyranet(x)
41 | out = out.narrow(1, 0, 17)
42 |
43 | flip_out = self.pyranet(flip(x))
44 | flip_out = flip_out.narrow(1, 0, 17)
45 |
46 | flip_out = flip(shuffleLR(
47 | flip_out, self.dataset))
48 |
49 | out = (flip_out + out) / 2
50 |
51 | return out
52 |
53 |
54 | class InferenNet_fast(nn.Module):
55 | def __init__(self, weights_file='./Models/sppe/fast_res101_320x256.pth'):
56 | super().__init__()
57 |
58 | self.pyranet = FastPose('resnet101').cuda()
59 | print('Loading pose model from {}'.format(weights_file))
60 | self.pyranet.load_state_dict(torch.load(weights_file))
61 | self.pyranet.eval()
62 |
63 | def forward(self, x):
64 | out = self.pyranet(x)
65 | out = out.narrow(1, 0, 17)
66 |
67 | return out
68 |
69 |
70 | class InferenNet_fastRes50(nn.Module):
71 | def __init__(self, weights_file='./Models/sppe/fast_res50_256x192.pth'):
72 | super().__init__()
73 |
74 | self.pyranet = FastPose('resnet50', 17).cuda()
75 | print('Loading pose model from {}'.format(weights_file))
76 | self.pyranet.load_state_dict(torch.load(weights_file))
77 | self.pyranet.eval()
78 |
79 | def forward(self, x):
80 | out = self.pyranet(x)
81 |
82 | return out
83 |
--------------------------------------------------------------------------------
/SPPE/src/models/FastPose.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from torch.autograd import Variable
3 |
4 | from .layers.SE_Resnet import SEResnet
5 | from .layers.DUC import DUC
6 | from SPPE.src.opt import opt
7 |
8 |
9 | class FastPose(nn.Module):
10 | DIM = 128
11 |
12 | def __init__(self, backbone='resnet101', num_join=opt.nClasses):
13 | super(FastPose, self).__init__()
14 | assert backbone in ['resnet50', 'resnet101']
15 |
16 | self.preact = SEResnet(backbone)
17 |
18 | self.suffle1 = nn.PixelShuffle(2)
19 | self.duc1 = DUC(512, 1024, upscale_factor=2)
20 | self.duc2 = DUC(256, 512, upscale_factor=2)
21 |
22 | self.conv_out = nn.Conv2d(
23 | self.DIM, num_join, kernel_size=3, stride=1, padding=1)
24 |
25 | def forward(self, x: Variable):
26 | out = self.preact(x)
27 | out = self.suffle1(out)
28 | out = self.duc1(out)
29 | out = self.duc2(out)
30 |
31 | out = self.conv_out(out)
32 | return out
33 |
--------------------------------------------------------------------------------
/SPPE/src/models/__init__.py:
--------------------------------------------------------------------------------
1 | from . import *
--------------------------------------------------------------------------------
/SPPE/src/models/hg-prm.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from .layers.PRM import Residual as ResidualPyramid
3 | from .layers.Residual import Residual as Residual
4 | from torch.autograd import Variable
5 | from SPPE.src.opt import opt
6 | from collections import defaultdict
7 |
8 |
9 | class Hourglass(nn.Module):
10 | def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C):
11 | super(Hourglass, self).__init__()
12 |
13 | self.ResidualUp = ResidualPyramid if n >= 2 else Residual
14 | self.ResidualDown = ResidualPyramid if n >= 3 else Residual
15 |
16 | self.depth = n
17 | self.nModules = nModules
18 | self.nFeats = nFeats
19 | self.net_type = net_type
20 | self.B = B
21 | self.C = C
22 | self.inputResH = inputResH
23 | self.inputResW = inputResW
24 |
25 | self.up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW)
26 | self.low1 = nn.Sequential(
27 | nn.MaxPool2d(2),
28 | self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
29 | )
30 | if n > 1:
31 | self.low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C)
32 | else:
33 | self.low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
34 |
35 | self.low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2)
36 | self.up2 = nn.UpsamplingNearest2d(scale_factor=2)
37 |
38 | self.upperBranch = self.up1
39 | self.lowerBranch = nn.Sequential(
40 | self.low1,
41 | self.low2,
42 | self.low3,
43 | self.up2
44 | )
45 |
46 | def _make_residual(self, resBlock, useConv, inputResH, inputResW):
47 | layer_list = []
48 | for i in range(self.nModules):
49 | layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW,
50 | stride=1, net_type=self.net_type, useConv=useConv,
51 | baseWidth=self.B, cardinality=self.C))
52 | return nn.Sequential(*layer_list)
53 |
54 | def forward(self, x: Variable):
55 | up1 = self.upperBranch(x)
56 | up2 = self.lowerBranch(x)
57 | out = up1 + up2
58 | return out
59 |
60 |
61 | class PyraNet(nn.Module):
62 | def __init__(self):
63 | super(PyraNet, self).__init__()
64 |
65 | B, C = opt.baseWidth, opt.cardinality
66 | self.inputResH = opt.inputResH / 4
67 | self.inputResW = opt.inputResW / 4
68 | self.nStack = opt.nStack
69 |
70 | self.cnv1 = nn.Sequential(
71 | nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
72 | nn.BatchNorm2d(64),
73 | nn.ReLU(True)
74 | )
75 | self.r1 = nn.Sequential(
76 | ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
77 | stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
78 | nn.MaxPool2d(2)
79 | )
80 | self.r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
81 | stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
82 | self.r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
83 | stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
84 | self.preact = nn.Sequential(
85 | self.cnv1,
86 | self.r1,
87 | self.r4,
88 | self.r5
89 | )
90 | self.stack_layers = defaultdict(list)
91 | for i in range(self.nStack):
92 | hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C)
93 | lin = nn.Sequential(
94 | hg,
95 | nn.BatchNorm2d(opt.nFeats),
96 | nn.ReLU(True),
97 | nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0),
98 | nn.BatchNorm2d(opt.nFeats),
99 | nn.ReLU(True)
100 | )
101 | tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0)
102 | self.stack_layers['lin'].append(lin)
103 | self.stack_layers['out'].append(tmpOut)
104 | if i < self.nStack - 1:
105 | lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
106 | tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0)
107 | self.stack_layers['lin_'].append(lin_)
108 | self.stack_layers['out_'].append(tmpOut_)
109 |
110 | def forward(self, x: Variable):
111 | out = []
112 | inter = self.preact(x)
113 | for i in range(self.nStack):
114 | lin = self.stack_layers['lin'][i](inter)
115 | tmpOut = self.stack_layers['out'][i](lin)
116 | out.append(tmpOut)
117 | if i < self.nStack - 1:
118 | lin_ = self.stack_layers['lin_'][i](lin)
119 | tmpOut_ = self.stack_layers['out_'][i](tmpOut)
120 | inter = inter + lin_ + tmpOut_
121 | return out
122 |
123 |
124 | def createModel(**kw):
125 | model = PyraNet()
126 | return model
127 |
--------------------------------------------------------------------------------
/SPPE/src/models/hgPRM.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from .layers.PRM import Residual as ResidualPyramid
3 | from .layers.Residual import Residual as Residual
4 | from torch.autograd import Variable
5 | import torch
6 | from SPPE.src.opt import opt
7 | import math
8 |
9 |
10 | class Hourglass(nn.Module):
11 | def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C):
12 | super(Hourglass, self).__init__()
13 |
14 | self.ResidualUp = ResidualPyramid if n >= 2 else Residual
15 | self.ResidualDown = ResidualPyramid if n >= 3 else Residual
16 |
17 | self.depth = n
18 | self.nModules = nModules
19 | self.nFeats = nFeats
20 | self.net_type = net_type
21 | self.B = B
22 | self.C = C
23 | self.inputResH = inputResH
24 | self.inputResW = inputResW
25 |
26 | up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW)
27 | low1 = nn.Sequential(
28 | nn.MaxPool2d(2),
29 | self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
30 | )
31 | if n > 1:
32 | low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C)
33 | else:
34 | low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
35 |
36 | low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2)
37 | up2 = nn.UpsamplingNearest2d(scale_factor=2)
38 |
39 | self.upperBranch = up1
40 | self.lowerBranch = nn.Sequential(
41 | low1,
42 | low2,
43 | low3,
44 | up2
45 | )
46 |
47 | def _make_residual(self, resBlock, useConv, inputResH, inputResW):
48 | layer_list = []
49 | for i in range(self.nModules):
50 | layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW,
51 | stride=1, net_type=self.net_type, useConv=useConv,
52 | baseWidth=self.B, cardinality=self.C))
53 | return nn.Sequential(*layer_list)
54 |
55 | def forward(self, x: Variable):
56 | up1 = self.upperBranch(x)
57 | up2 = self.lowerBranch(x)
58 | # out = up1 + up2
59 | out = torch.add(up1, up2)
60 | return out
61 |
62 |
63 | class PyraNet(nn.Module):
64 | def __init__(self):
65 | super(PyraNet, self).__init__()
66 |
67 | B, C = opt.baseWidth, opt.cardinality
68 | self.inputResH = opt.inputResH / 4
69 | self.inputResW = opt.inputResW / 4
70 | self.nStack = opt.nStack
71 |
72 | conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
73 | if opt.init:
74 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 3))
75 |
76 | cnv1 = nn.Sequential(
77 | conv1,
78 | nn.BatchNorm2d(64),
79 | nn.ReLU(True)
80 | )
81 |
82 | r1 = nn.Sequential(
83 | ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
84 | stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
85 | nn.MaxPool2d(2)
86 | )
87 | r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
88 | stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
89 | r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
90 | stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
91 | self.preact = nn.Sequential(
92 | cnv1,
93 | r1,
94 | r4,
95 | r5
96 | )
97 |
98 | self.stack_lin = nn.ModuleList()
99 | self.stack_out = nn.ModuleList()
100 | self.stack_lin_ = nn.ModuleList()
101 | self.stack_out_ = nn.ModuleList()
102 |
103 | for i in range(self.nStack):
104 | hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C)
105 | conv1 = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
106 | if opt.init:
107 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
108 | lin = nn.Sequential(
109 | hg,
110 | nn.BatchNorm2d(opt.nFeats),
111 | nn.ReLU(True),
112 | conv1,
113 | nn.BatchNorm2d(opt.nFeats),
114 | nn.ReLU(True)
115 | )
116 | tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0)
117 | if opt.init:
118 | nn.init.xavier_normal(tmpOut.weight)
119 | self.stack_lin.append(lin)
120 | self.stack_out.append(tmpOut)
121 | if i < self.nStack - 1:
122 | lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
123 | tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0)
124 | if opt.init:
125 | nn.init.xavier_normal(lin_.weight)
126 | nn.init.xavier_normal(tmpOut_.weight)
127 | self.stack_lin_.append(lin_)
128 | self.stack_out_.append(tmpOut_)
129 |
130 | def forward(self, x: Variable):
131 | out = []
132 | inter = self.preact(x)
133 | for i in range(self.nStack):
134 | lin = self.stack_lin[i](inter)
135 | tmpOut = self.stack_out[i](lin)
136 | out.append(tmpOut)
137 | if i < self.nStack - 1:
138 | lin_ = self.stack_lin_[i](lin)
139 | tmpOut_ = self.stack_out_[i](tmpOut)
140 | inter = inter + lin_ + tmpOut_
141 | return out
142 |
143 |
144 | class PyraNet_Inference(nn.Module):
145 | def __init__(self):
146 | super(PyraNet_Inference, self).__init__()
147 |
148 | B, C = opt.baseWidth, opt.cardinality
149 | self.inputResH = opt.inputResH / 4
150 | self.inputResW = opt.inputResW / 4
151 | self.nStack = opt.nStack
152 |
153 | conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
154 | if opt.init:
155 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 3))
156 |
157 | cnv1 = nn.Sequential(
158 | conv1,
159 | nn.BatchNorm2d(64),
160 | nn.ReLU(True)
161 | )
162 |
163 | r1 = nn.Sequential(
164 | ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
165 | stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
166 | nn.MaxPool2d(2)
167 | )
168 | r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
169 | stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
170 | r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
171 | stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
172 | self.preact = nn.Sequential(
173 | cnv1,
174 | r1,
175 | r4,
176 | r5
177 | )
178 |
179 | self.stack_lin = nn.ModuleList()
180 | self.stack_out = nn.ModuleList()
181 | self.stack_lin_ = nn.ModuleList()
182 | self.stack_out_ = nn.ModuleList()
183 |
184 | for i in range(self.nStack):
185 | hg = Hourglass(4, opt.nFeats, opt.nResidual,
186 | self.inputResH, self.inputResW, 'preact', B, C)
187 | conv1 = nn.Conv2d(opt.nFeats, opt.nFeats,
188 | kernel_size=1, stride=1, padding=0)
189 | if opt.init:
190 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
191 | lin = nn.Sequential(
192 | hg,
193 | nn.BatchNorm2d(opt.nFeats),
194 | nn.ReLU(True),
195 | conv1,
196 | nn.BatchNorm2d(opt.nFeats),
197 | nn.ReLU(True)
198 | )
199 | tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses,
200 | kernel_size=1, stride=1, padding=0)
201 | if opt.init:
202 | nn.init.xavier_normal(tmpOut.weight)
203 | self.stack_lin.append(lin)
204 | self.stack_out.append(tmpOut)
205 | if i < self.nStack - 1:
206 | lin_ = nn.Conv2d(opt.nFeats, opt.nFeats,
207 | kernel_size=1, stride=1, padding=0)
208 | tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats,
209 | kernel_size=1, stride=1, padding=0)
210 | if opt.init:
211 | nn.init.xavier_normal(lin_.weight)
212 | nn.init.xavier_normal(tmpOut_.weight)
213 | self.stack_lin_.append(lin_)
214 | self.stack_out_.append(tmpOut_)
215 |
216 | def forward(self, x: Variable):
217 | inter = self.preact(x)
218 | for i in range(self.nStack):
219 | lin = self.stack_lin[i](inter)
220 | tmpOut = self.stack_out[i](lin)
221 | out = tmpOut
222 | if i < self.nStack - 1:
223 | lin_ = self.stack_lin_[i](lin)
224 | tmpOut_ = self.stack_out_[i](tmpOut)
225 | inter = inter + lin_ + tmpOut_
226 | return out
227 |
228 |
229 | def createModel(**kw):
230 | model = PyraNet()
231 | return model
232 |
233 |
234 | def createModel_Inference(**kw):
235 | model = PyraNet_Inference()
236 | return model
237 |
--------------------------------------------------------------------------------
/SPPE/src/models/layers/DUC.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 |
4 |
5 | class DUC(nn.Module):
6 | """
7 | INPUT: inplanes, planes, upscale_factor
8 | OUTPUT: (planes // 4)* ht * wd
9 | """
10 | def __init__(self, inplanes, planes, upscale_factor=2):
11 | super(DUC, self).__init__()
12 | self.conv = nn.Conv2d(inplanes, planes, kernel_size=3, padding=1, bias=False)
13 | self.bn = nn.BatchNorm2d(planes)
14 | self.relu = nn.ReLU()
15 |
16 | self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
17 |
18 | def forward(self, x):
19 | x = self.conv(x)
20 | x = self.bn(x)
21 | x = self.relu(x)
22 | x = self.pixel_shuffle(x)
23 | return x
24 |
--------------------------------------------------------------------------------
/SPPE/src/models/layers/PRM.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from .util_models import ConcatTable, CaddTable, Identity
3 | import math
4 | from opt import opt
5 |
6 |
7 | class Residual(nn.Module):
8 | def __init__(self, numIn, numOut, inputResH, inputResW, stride=1,
9 | net_type='preact', useConv=False, baseWidth=9, cardinality=4):
10 | super(Residual, self).__init__()
11 |
12 | self.con = ConcatTable([convBlock(numIn, numOut, inputResH,
13 | inputResW, net_type, baseWidth, cardinality, stride),
14 | skipLayer(numIn, numOut, stride, useConv)])
15 | self.cadd = CaddTable(True)
16 |
17 | def forward(self, x):
18 | out = self.con(x)
19 | out = self.cadd(out)
20 | return out
21 |
22 |
23 | def convBlock(numIn, numOut, inputResH, inputResW, net_type, baseWidth, cardinality, stride):
24 | numIn = int(numIn)
25 | numOut = int(numOut)
26 |
27 | addTable = ConcatTable()
28 | s_list = []
29 | if net_type != 'no_preact':
30 | s_list.append(nn.BatchNorm2d(numIn))
31 | s_list.append(nn.ReLU(True))
32 |
33 | conv1 = nn.Conv2d(numIn, numOut // 2, kernel_size=1)
34 | if opt.init:
35 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
36 | s_list.append(conv1)
37 |
38 | s_list.append(nn.BatchNorm2d(numOut // 2))
39 | s_list.append(nn.ReLU(True))
40 |
41 | conv2 = nn.Conv2d(numOut // 2, numOut // 2,
42 | kernel_size=3, stride=stride, padding=1)
43 | if opt.init:
44 | nn.init.xavier_normal(conv2.weight)
45 | s_list.append(conv2)
46 |
47 | s = nn.Sequential(*s_list)
48 | addTable.add(s)
49 |
50 | D = math.floor(numOut // baseWidth)
51 | C = cardinality
52 | s_list = []
53 |
54 | if net_type != 'no_preact':
55 | s_list.append(nn.BatchNorm2d(numIn))
56 | s_list.append(nn.ReLU(True))
57 |
58 | conv1 = nn.Conv2d(numIn, D, kernel_size=1, stride=stride)
59 | if opt.init:
60 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / C))
61 |
62 | s_list.append(conv1)
63 | s_list.append(nn.BatchNorm2d(D))
64 | s_list.append(nn.ReLU(True))
65 | s_list.append(pyramid(D, C, inputResH, inputResW))
66 | s_list.append(nn.BatchNorm2d(D))
67 | s_list.append(nn.ReLU(True))
68 |
69 | a = nn.Conv2d(D, numOut // 2, kernel_size=1)
70 | a.nBranchIn = C
71 | if opt.init:
72 | nn.init.xavier_normal(a.weight, gain=math.sqrt(1 / C))
73 | s_list.append(a)
74 |
75 | s = nn.Sequential(*s_list)
76 | addTable.add(s)
77 |
78 | elewiswAdd = nn.Sequential(
79 | addTable,
80 | CaddTable(False)
81 | )
82 | conv2 = nn.Conv2d(numOut // 2, numOut, kernel_size=1)
83 | if opt.init:
84 | nn.init.xavier_normal(conv2.weight, gain=math.sqrt(1 / 2))
85 | model = nn.Sequential(
86 | elewiswAdd,
87 | nn.BatchNorm2d(numOut // 2),
88 | nn.ReLU(True),
89 | conv2
90 | )
91 | return model
92 |
93 |
94 | def pyramid(D, C, inputResH, inputResW):
95 | pyraTable = ConcatTable()
96 | sc = math.pow(2, 1 / C)
97 | for i in range(C):
98 | scaled = 1 / math.pow(sc, i + 1)
99 | conv1 = nn.Conv2d(D, D, kernel_size=3, stride=1, padding=1)
100 | if opt.init:
101 | nn.init.xavier_normal(conv1.weight)
102 | s = nn.Sequential(
103 | nn.FractionalMaxPool2d(2, output_ratio=(scaled, scaled)),
104 | conv1,
105 | nn.UpsamplingBilinear2d(size=(int(inputResH), int(inputResW))))
106 | pyraTable.add(s)
107 | pyra = nn.Sequential(
108 | pyraTable,
109 | CaddTable(False)
110 | )
111 | return pyra
112 |
113 |
114 | class skipLayer(nn.Module):
115 | def __init__(self, numIn, numOut, stride, useConv):
116 | super(skipLayer, self).__init__()
117 | self.identity = False
118 |
119 | if numIn == numOut and stride == 1 and not useConv:
120 | self.identity = True
121 | else:
122 | conv1 = nn.Conv2d(numIn, numOut, kernel_size=1, stride=stride)
123 | if opt.init:
124 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
125 | self.m = nn.Sequential(
126 | nn.BatchNorm2d(numIn),
127 | nn.ReLU(True),
128 | conv1
129 | )
130 |
131 | def forward(self, x):
132 | if self.identity:
133 | return x
134 | else:
135 | return self.m(x)
136 |
--------------------------------------------------------------------------------
/SPPE/src/models/layers/Residual.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import math
3 | from .util_models import ConcatTable, CaddTable, Identity
4 | from opt import opt
5 |
6 |
7 | def Residual(numIn, numOut, *arg, stride=1, net_type='preact', useConv=False, **kw):
8 | con = ConcatTable([convBlock(numIn, numOut, stride, net_type),
9 | skipLayer(numIn, numOut, stride, useConv)])
10 | cadd = CaddTable(True)
11 | return nn.Sequential(con, cadd)
12 |
13 |
14 | def convBlock(numIn, numOut, stride, net_type):
15 | s_list = []
16 | if net_type != 'no_preact':
17 | s_list.append(nn.BatchNorm2d(numIn))
18 | s_list.append(nn.ReLU(True))
19 |
20 | conv1 = nn.Conv2d(numIn, numOut // 2, kernel_size=1)
21 | if opt.init:
22 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
23 | s_list.append(conv1)
24 |
25 | s_list.append(nn.BatchNorm2d(numOut // 2))
26 | s_list.append(nn.ReLU(True))
27 |
28 | conv2 = nn.Conv2d(numOut // 2, numOut // 2, kernel_size=3, stride=stride, padding=1)
29 | if opt.init:
30 | nn.init.xavier_normal(conv2.weight)
31 | s_list.append(conv2)
32 | s_list.append(nn.BatchNorm2d(numOut // 2))
33 | s_list.append(nn.ReLU(True))
34 |
35 | conv3 = nn.Conv2d(numOut // 2, numOut, kernel_size=1)
36 | if opt.init:
37 | nn.init.xavier_normal(conv3.weight)
38 | s_list.append(conv3)
39 |
40 | return nn.Sequential(*s_list)
41 |
42 |
43 | def skipLayer(numIn, numOut, stride, useConv):
44 | if numIn == numOut and stride == 1 and not useConv:
45 | return Identity()
46 | else:
47 | conv1 = nn.Conv2d(numIn, numOut, kernel_size=1, stride=stride)
48 | if opt.init:
49 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
50 | return nn.Sequential(
51 | nn.BatchNorm2d(numIn),
52 | nn.ReLU(True),
53 | conv1
54 | )
55 |
--------------------------------------------------------------------------------
/SPPE/src/models/layers/Resnet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 |
4 |
5 | class Bottleneck(nn.Module):
6 | expansion = 4
7 |
8 | def __init__(self, inplanes, planes, stride=1, downsample=None):
9 | super(Bottleneck, self).__init__()
10 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)
11 | self.bn1 = nn.BatchNorm2d(planes)
12 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
13 | self.bn2 = nn.BatchNorm2d(planes)
14 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, stride=1, bias=False)
15 | self.bn3 = nn.BatchNorm2d(planes * 4)
16 | self.downsample = downsample
17 | self.stride = stride
18 |
19 | def forward(self, x):
20 | residual = x
21 |
22 | out = F.relu(self.bn1(self.conv1(x)), inplace=True)
23 | out = F.relu(self.bn2(self.conv2(out)), inplace=True)
24 | out = self.bn3(self.conv3(out))
25 |
26 | if self.downsample is not None:
27 | residual = self.downsample(x)
28 |
29 | out += residual
30 | out = F.relu(out, inplace=True)
31 |
32 | return out
33 |
34 |
35 | class ResNet(nn.Module):
36 | """ Resnet """
37 | def __init__(self, architecture):
38 | super(ResNet, self).__init__()
39 | assert architecture in ["resnet50", "resnet101"]
40 | self.inplanes = 64
41 | self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3]
42 | self.block = Bottleneck
43 |
44 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
45 | self.bn1 = nn.BatchNorm2d(64, eps=1e-5, momentum=0.01, affine=True)
46 | self.relu = nn.ReLU(inplace=True)
47 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
48 |
49 | self.layer1 = self.make_layer(self.block, 64, self.layers[0])
50 | self.layer2 = self.make_layer(self.block, 128, self.layers[1], stride=2)
51 | self.layer3 = self.make_layer(self.block, 256, self.layers[2], stride=2)
52 |
53 | self.layer4 = self.make_layer(
54 | self.block, 512, self.layers[3], stride=2)
55 |
56 | def forward(self, x):
57 | x = self.maxpool(self.relu(self.bn1(self.conv1(x))))
58 | x = self.layer1(x)
59 | x = self.layer2(x)
60 | x = self.layer3(x)
61 | x = self.layer4(x)
62 | return x
63 |
64 | def stages(self):
65 | return [self.layer1, self.layer2, self.layer3, self.layer4]
66 |
67 | def make_layer(self, block, planes, blocks, stride=1):
68 | downsample = None
69 | if stride != 1 or self.inplanes != planes * block.expansion:
70 | downsample = nn.Sequential(
71 | nn.Conv2d(self.inplanes, planes * block.expansion,
72 | kernel_size=1, stride=stride, bias=False),
73 | nn.BatchNorm2d(planes * block.expansion),
74 | )
75 |
76 | layers = []
77 | layers.append(block(self.inplanes, planes, stride, downsample))
78 | self.inplanes = planes * block.expansion
79 | for i in range(1, blocks):
80 | layers.append(block(self.inplanes, planes))
81 |
82 | return nn.Sequential(*layers)
83 |
--------------------------------------------------------------------------------
/SPPE/src/models/layers/SE_Resnet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from .SE_module import SELayer
3 | import torch.nn.functional as F
4 |
5 |
6 | class Bottleneck(nn.Module):
7 | expansion = 4
8 |
9 | def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=False):
10 | super(Bottleneck, self).__init__()
11 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
12 | self.bn1 = nn.BatchNorm2d(planes)
13 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
14 | padding=1, bias=False)
15 | self.bn2 = nn.BatchNorm2d(planes)
16 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
17 | self.bn3 = nn.BatchNorm2d(planes * 4)
18 | if reduction:
19 | self.se = SELayer(planes * 4)
20 |
21 | self.reduc = reduction
22 | self.downsample = downsample
23 | self.stride = stride
24 |
25 | def forward(self, x):
26 | residual = x
27 |
28 | out = F.relu(self.bn1(self.conv1(x)), inplace=True)
29 | out = F.relu(self.bn2(self.conv2(out)), inplace=True)
30 |
31 | out = self.conv3(out)
32 | out = self.bn3(out)
33 | if self.reduc:
34 | out = self.se(out)
35 |
36 | if self.downsample is not None:
37 | residual = self.downsample(x)
38 |
39 | out += residual
40 | out = F.relu(out)
41 |
42 | return out
43 |
44 |
45 | class SEResnet(nn.Module):
46 | """ SEResnet """
47 |
48 | def __init__(self, architecture):
49 | super(SEResnet, self).__init__()
50 | assert architecture in ["resnet50", "resnet101"]
51 | self.inplanes = 64
52 | self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3]
53 | self.block = Bottleneck
54 |
55 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7,
56 | stride=2, padding=3, bias=False)
57 | self.bn1 = nn.BatchNorm2d(64, eps=1e-5, momentum=0.01, affine=True)
58 | self.relu = nn.ReLU(inplace=True)
59 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
60 |
61 | self.layer1 = self.make_layer(self.block, 64, self.layers[0])
62 | self.layer2 = self.make_layer(
63 | self.block, 128, self.layers[1], stride=2)
64 | self.layer3 = self.make_layer(
65 | self.block, 256, self.layers[2], stride=2)
66 |
67 | self.layer4 = self.make_layer(
68 | self.block, 512, self.layers[3], stride=2)
69 |
70 | def forward(self, x):
71 | x = self.maxpool(self.relu(self.bn1(self.conv1(x)))) # 64 * h/4 * w/4
72 | x = self.layer1(x) # 256 * h/4 * w/4
73 | x = self.layer2(x) # 512 * h/8 * w/8
74 | x = self.layer3(x) # 1024 * h/16 * w/16
75 | x = self.layer4(x) # 2048 * h/32 * w/32
76 | return x
77 |
78 | def stages(self):
79 | return [self.layer1, self.layer2, self.layer3, self.layer4]
80 |
81 | def make_layer(self, block, planes, blocks, stride=1):
82 | downsample = None
83 | if stride != 1 or self.inplanes != planes * block.expansion:
84 | downsample = nn.Sequential(
85 | nn.Conv2d(self.inplanes, planes * block.expansion,
86 | kernel_size=1, stride=stride, bias=False),
87 | nn.BatchNorm2d(planes * block.expansion),
88 | )
89 |
90 | layers = []
91 | if downsample is not None:
92 | layers.append(block(self.inplanes, planes, stride, downsample, reduction=True))
93 | else:
94 | layers.append(block(self.inplanes, planes, stride, downsample))
95 | self.inplanes = planes * block.expansion
96 | for i in range(1, blocks):
97 | layers.append(block(self.inplanes, planes))
98 |
99 | return nn.Sequential(*layers)
100 |
--------------------------------------------------------------------------------
/SPPE/src/models/layers/SE_module.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 |
4 | class SELayer(nn.Module):
5 | def __init__(self, channel, reduction=1):
6 | super(SELayer, self).__init__()
7 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
8 | self.fc = nn.Sequential(
9 | nn.Linear(channel, channel // reduction),
10 | nn.ReLU(inplace=True),
11 | nn.Linear(channel // reduction, channel),
12 | nn.Sigmoid()
13 | )
14 |
15 | def forward(self, x):
16 | b, c, _, _ = x.size()
17 | y = self.avg_pool(x).view(b, c)
18 | y = self.fc(y).view(b, c, 1, 1)
19 | return x * y
20 |
--------------------------------------------------------------------------------
/SPPE/src/models/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from . import *
2 |
--------------------------------------------------------------------------------
/SPPE/src/models/layers/util_models.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Variable
4 |
5 |
6 | class ConcatTable(nn.Module):
7 | def __init__(self, module_list=None):
8 | super(ConcatTable, self).__init__()
9 |
10 | self.modules_list = nn.ModuleList(module_list)
11 |
12 | def forward(self, x: Variable):
13 | y = []
14 | for i in range(len(self.modules_list)):
15 | y.append(self.modules_list[i](x))
16 | return y
17 |
18 | def add(self, module):
19 | self.modules_list.append(module)
20 |
21 |
22 | class CaddTable(nn.Module):
23 | def __init__(self, inplace=False):
24 | super(CaddTable, self).__init__()
25 | self.inplace = inplace
26 |
27 | def forward(self, x: Variable or list):
28 | return torch.stack(x, 0).sum(0)
29 |
30 |
31 | class Identity(nn.Module):
32 | def __init__(self, params=None):
33 | super(Identity, self).__init__()
34 | self.params = nn.ParameterList(params)
35 |
36 | def forward(self, x: Variable or list):
37 | return x
38 |
--------------------------------------------------------------------------------
/SPPE/src/opt.py:
--------------------------------------------------------------------------------
1 | """import argparse
2 | import torch
3 |
4 | parser = argparse.ArgumentParser(description='PyTorch AlphaPose Training')
5 | parser.add_argument("--return_counts", type=bool, default=True)
6 | parser.add_argument("--mode", default='client')
7 | parser.add_argument("--port", default=52162)
8 |
9 | "----------------------------- General options -----------------------------"
10 | parser.add_argument('--expID', default='default', type=str,
11 | help='Experiment ID')
12 | parser.add_argument('--dataset', default='coco', type=str,
13 | help='Dataset choice: mpii | coco')
14 | parser.add_argument('--nThreads', default=30, type=int,
15 | help='Number of data loading threads')
16 | parser.add_argument('--debug', default=False, type=bool,
17 | help='Print the debug information')
18 | parser.add_argument('--snapshot', default=1, type=int,
19 | help='How often to take a snapshot of the model (0 = never)')
20 |
21 | "----------------------------- AlphaPose options -----------------------------"
22 | parser.add_argument('--addDPG', default=False, type=bool,
23 | help='Train with data augmentation')
24 |
25 | "----------------------------- Model options -----------------------------"
26 | parser.add_argument('--netType', default='hgPRM', type=str,
27 | help='Options: hgPRM | resnext')
28 | parser.add_argument('--loadModel', default=None, type=str,
29 | help='Provide full path to a previously trained model')
30 | parser.add_argument('--Continue', default=False, type=bool,
31 | help='Pick up where an experiment left off')
32 | parser.add_argument('--nFeats', default=256, type=int,
33 | help='Number of features in the hourglass')
34 | parser.add_argument('--nClasses', default=33, type=int,
35 | help='Number of output channel')
36 | parser.add_argument('--nStack', default=8, type=int,
37 | help='Number of hourglasses to stack')
38 |
39 | "----------------------------- Hyperparameter options -----------------------------"
40 | parser.add_argument('--LR', default=2.5e-4, type=float,
41 | help='Learning rate')
42 | parser.add_argument('--momentum', default=0, type=float,
43 | help='Momentum')
44 | parser.add_argument('--weightDecay', default=0, type=float,
45 | help='Weight decay')
46 | parser.add_argument('--crit', default='MSE', type=str,
47 | help='Criterion type')
48 | parser.add_argument('--optMethod', default='rmsprop', type=str,
49 | help='Optimization method: rmsprop | sgd | nag | adadelta')
50 |
51 |
52 | "----------------------------- Training options -----------------------------"
53 | parser.add_argument('--nEpochs', default=50, type=int,
54 | help='Number of hourglasses to stack')
55 | parser.add_argument('--epoch', default=0, type=int,
56 | help='Current epoch')
57 | parser.add_argument('--trainBatch', default=40, type=int,
58 | help='Train-batch size')
59 | parser.add_argument('--validBatch', default=20, type=int,
60 | help='Valid-batch size')
61 | parser.add_argument('--trainIters', default=0, type=int,
62 | help='Total train iters')
63 | parser.add_argument('--valIters', default=0, type=int,
64 | help='Total valid iters')
65 | parser.add_argument('--init', default=None, type=str,
66 | help='Initialization')
67 | "----------------------------- Data options -----------------------------"
68 | parser.add_argument('--inputResH', default=384, type=int,
69 | help='Input image height')
70 | parser.add_argument('--inputResW', default=320, type=int,
71 | help='Input image width')
72 | parser.add_argument('--outputResH', default=96, type=int,
73 | help='Output heatmap height')
74 | parser.add_argument('--outputResW', default=80, type=int,
75 | help='Output heatmap width')
76 | parser.add_argument('--scale', default=0.25, type=float,
77 | help='Degree of scale augmentation')
78 | parser.add_argument('--rotate', default=30, type=float,
79 | help='Degree of rotation augmentation')
80 | parser.add_argument('--hmGauss', default=1, type=int,
81 | help='Heatmap gaussian size')
82 |
83 | "----------------------------- PyraNet options -----------------------------"
84 | parser.add_argument('--baseWidth', default=9, type=int,
85 | help='Heatmap gaussian size')
86 | parser.add_argument('--cardinality', default=5, type=int,
87 | help='Heatmap gaussian size')
88 | parser.add_argument('--nResidual', default=1, type=int,
89 | help='Number of residual modules at each location in the pyranet')
90 |
91 | "----------------------------- Distribution options -----------------------------"
92 | parser.add_argument('--dist', dest='dist', type=int, default=1,
93 | help='distributed training or not')
94 | parser.add_argument('--backend', dest='backend', type=str, default='gloo',
95 | help='backend for distributed training')
96 | parser.add_argument('--port', dest='port',
97 | help='port of server')
98 | opt = parser.parse_args()"""
99 |
100 | """if opt.Continue:
101 | opt = torch.load("../exp/{}/{}/option.pkl".format(opt.dataset, opt.expID))
102 | opt.Continue = True
103 | opt.nEpochs = 50
104 | print("--- Continue ---")"""
105 |
106 |
107 | class opt:
108 | nClasses = 33
109 | inputResH = 384
110 | inputResW = 320
111 | outputResH = 96
112 | outputResW = 80
113 | scale = 0.25
114 | rotate = 30
115 | hmGauss = 1
116 |
--------------------------------------------------------------------------------
/SPPE/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import *
2 |
--------------------------------------------------------------------------------
/SPPE/src/utils/dataset/.coco.py.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GajuuzZ/Human-Falling-Detect-Tracks/7ed2faa4d6147dfd576f58869b6c25545208af35/SPPE/src/utils/dataset/.coco.py.swp
--------------------------------------------------------------------------------
/SPPE/src/utils/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GajuuzZ/Human-Falling-Detect-Tracks/7ed2faa4d6147dfd576f58869b6c25545208af35/SPPE/src/utils/dataset/__init__.py
--------------------------------------------------------------------------------
/SPPE/src/utils/dataset/coco.py:
--------------------------------------------------------------------------------
1 | import os
2 | import h5py
3 | from functools import reduce
4 |
5 | import torch.utils.data as data
6 | from ..pose import generateSampleBox
7 | from opt import opt
8 |
9 |
10 | class Mscoco(data.Dataset):
11 | def __init__(self, train=True, sigma=1,
12 | scale_factor=(0.2, 0.3), rot_factor=40, label_type='Gaussian'):
13 | self.img_folder = '../data/coco/images' # root image folders
14 | self.is_train = train # training set or test set
15 | self.inputResH = opt.inputResH
16 | self.inputResW = opt.inputResW
17 | self.outputResH = opt.outputResH
18 | self.outputResW = opt.outputResW
19 | self.sigma = sigma
20 | self.scale_factor = scale_factor
21 | self.rot_factor = rot_factor
22 | self.label_type = label_type
23 |
24 | self.nJoints_coco = 17
25 | self.nJoints_mpii = 16
26 | self.nJoints = 33
27 |
28 | self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8,
29 | 9, 10, 11, 12, 13, 14, 15, 16, 17)
30 | self.flipRef = ((2, 3), (4, 5), (6, 7),
31 | (8, 9), (10, 11), (12, 13),
32 | (14, 15), (16, 17))
33 |
34 | # create train/val split
35 | with h5py.File('../data/coco/annot_clean.h5', 'r') as annot:
36 | # train
37 | self.imgname_coco_train = annot['imgname'][:-5887]
38 | self.bndbox_coco_train = annot['bndbox'][:-5887]
39 | self.part_coco_train = annot['part'][:-5887]
40 | # val
41 | self.imgname_coco_val = annot['imgname'][-5887:]
42 | self.bndbox_coco_val = annot['bndbox'][-5887:]
43 | self.part_coco_val = annot['part'][-5887:]
44 |
45 | self.size_train = self.imgname_coco_train.shape[0]
46 | self.size_val = self.imgname_coco_val.shape[0]
47 |
48 | def __getitem__(self, index):
49 | sf = self.scale_factor
50 |
51 | if self.is_train:
52 | part = self.part_coco_train[index]
53 | bndbox = self.bndbox_coco_train[index]
54 | imgname = self.imgname_coco_train[index]
55 | else:
56 | part = self.part_coco_val[index]
57 | bndbox = self.bndbox_coco_val[index]
58 | imgname = self.imgname_coco_val[index]
59 |
60 | imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))
61 | img_path = os.path.join(self.img_folder, imgname)
62 |
63 | metaData = generateSampleBox(img_path, bndbox, part, self.nJoints,
64 | 'coco', sf, self, train=self.is_train)
65 |
66 | inp, out_bigcircle, out_smallcircle, out, setMask = metaData
67 |
68 | label = []
69 | for i in range(opt.nStack):
70 | if i < 2:
71 | # label.append(out_bigcircle.clone())
72 | label.append(out.clone())
73 | elif i < 4:
74 | # label.append(out_smallcircle.clone())
75 | label.append(out.clone())
76 | else:
77 | label.append(out.clone())
78 |
79 | return inp, label, setMask, 'coco'
80 |
81 | def __len__(self):
82 | if self.is_train:
83 | return self.size_train
84 | else:
85 | return self.size_val
86 |
--------------------------------------------------------------------------------
/SPPE/src/utils/dataset/fuse.py:
--------------------------------------------------------------------------------
1 | import os
2 | import h5py
3 | from functools import reduce
4 |
5 | import torch.utils.data as data
6 | from ..pose import generateSampleBox
7 | from opt import opt
8 |
9 |
10 | class Mscoco(data.Dataset):
11 | def __init__(self, train=True, sigma=1,
12 | scale_factor=0.25, rot_factor=30, label_type='Gaussian'):
13 | self.img_folder = '../data/' # root image folders
14 | self.is_train = train # training set or test set
15 | self.inputResH = 320
16 | self.inputResW = 256
17 | self.outputResH = 80
18 | self.outputResW = 64
19 | self.sigma = sigma
20 | self.scale_factor = (0.2, 0.3)
21 | self.rot_factor = rot_factor
22 | self.label_type = label_type
23 |
24 | self.nJoints_coco = 17
25 | self.nJoints_mpii = 16
26 | self.nJoints = 33
27 |
28 | self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8, # COCO
29 | 9, 10, 11, 12, 13, 14, 15, 16, 17,
30 | 18, 19, 20, 21, 22, 23, # MPII
31 | 28, 29, 32, 33)
32 |
33 | self.flipRef = ((2, 3), (4, 5), (6, 7), # COCO
34 | (8, 9), (10, 11), (12, 13),
35 | (14, 15), (16, 17),
36 | (18, 23), (19, 22), (20, 21), # MPII
37 | (28, 33), (29, 32), (30, 31))
38 |
39 | '''
40 | Create train/val split
41 | '''
42 | # COCO
43 | with h5py.File('../data/coco/annot_clean.h5', 'r') as annot:
44 | # train
45 | self.imgname_coco_train = annot['imgname'][:-5887]
46 | self.bndbox_coco_train = annot['bndbox'][:-5887]
47 | self.part_coco_train = annot['part'][:-5887]
48 | # val
49 | self.imgname_coco_val = annot['imgname'][-5887:]
50 | self.bndbox_coco_val = annot['bndbox'][-5887:]
51 | self.part_coco_val = annot['part'][-5887:]
52 | # MPII
53 | with h5py.File('../data/mpii/annot_mpii.h5', 'r') as annot:
54 | # train
55 | self.imgname_mpii_train = annot['imgname'][:-1358]
56 | self.bndbox_mpii_train = annot['bndbox'][:-1358]
57 | self.part_mpii_train = annot['part'][:-1358]
58 | # val
59 | self.imgname_mpii_val = annot['imgname'][-1358:]
60 | self.bndbox_mpii_val = annot['bndbox'][-1358:]
61 | self.part_mpii_val = annot['part'][-1358:]
62 |
63 | self.size_coco_train = self.imgname_coco_train.shape[0]
64 | self.size_coco_val = self.imgname_coco_val.shape[0]
65 | self.size_train = self.imgname_coco_train.shape[0] + self.imgname_mpii_train.shape[0]
66 | self.size_val = self.imgname_coco_val.shape[0] + self.imgname_mpii_val.shape[0]
67 | self.train, self.valid = [], []
68 |
69 | def __getitem__(self, index):
70 | sf = self.scale_factor
71 |
72 | if self.is_train and index < self.size_coco_train: # COCO
73 | part = self.part_coco_train[index]
74 | bndbox = self.bndbox_coco_train[index]
75 | imgname = self.imgname_coco_train[index]
76 | imgset = 'coco'
77 | elif self.is_train: # MPII
78 | part = self.part_mpii_train[index - self.size_coco_train]
79 | bndbox = self.bndbox_mpii_train[index - self.size_coco_train]
80 | imgname = self.imgname_mpii_train[index - self.size_coco_train]
81 | imgset = 'mpii'
82 | elif index < self.size_coco_val:
83 | part = self.part_coco_val[index]
84 | bndbox = self.bndbox_coco_val[index]
85 | imgname = self.imgname_coco_val[index]
86 | imgset = 'coco'
87 | else:
88 | part = self.part_mpii_val[index - self.size_coco_val]
89 | bndbox = self.bndbox_mpii_val[index - self.size_coco_val]
90 | imgname = self.imgname_mpii_val[index - self.size_coco_val]
91 | imgset = 'mpii'
92 |
93 | if imgset == 'coco':
94 | imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))
95 | else:
96 | imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))[:13]
97 |
98 | img_path = os.path.join(self.img_folder, imgset, 'images', imgname)
99 |
100 | metaData = generateSampleBox(img_path, bndbox, part, self.nJoints,
101 | imgset, sf, self, train=self.is_train)
102 |
103 | inp, out_bigcircle, out_smallcircle, out, setMask = metaData
104 |
105 | label = []
106 | for i in range(opt.nStack):
107 | if i < 2:
108 | # label.append(out_bigcircle.clone())
109 | label.append(out.clone())
110 | elif i < 4:
111 | # label.append(out_smallcircle.clone())
112 | label.append(out.clone())
113 | else:
114 | label.append(out.clone())
115 |
116 | return inp, label, setMask, imgset
117 |
118 | def __len__(self):
119 | if self.is_train:
120 | return self.size_train
121 | else:
122 | return self.size_val
123 |
--------------------------------------------------------------------------------
/SPPE/src/utils/dataset/mpii.py:
--------------------------------------------------------------------------------
1 | import os
2 | import h5py
3 | from functools import reduce
4 |
5 | import torch.utils.data as data
6 | from ..pose import generateSampleBox
7 | from opt import opt
8 |
9 |
10 | class Mpii(data.Dataset):
11 | def __init__(self, train=True, sigma=1,
12 | scale_factor=0.25, rot_factor=30, label_type='Gaussian'):
13 | self.img_folder = '../data/mpii/images' # root image folders
14 | self.is_train = train # training set or test set
15 | self.inputResH = 320
16 | self.inputResW = 256
17 | self.outputResH = 80
18 | self.outputResW = 64
19 | self.sigma = sigma
20 | self.scale_factor = (0.2, 0.3)
21 | self.rot_factor = rot_factor
22 | self.label_type = label_type
23 |
24 | self.nJoints_mpii = 16
25 | self.nJoints = 16
26 |
27 | self.accIdxs = (1, 2, 3, 4, 5, 6,
28 | 11, 12, 15, 16)
29 | self.flipRef = ((1, 6), (2, 5), (3, 4),
30 | (11, 16), (12, 15), (13, 14))
31 |
32 | # create train/val split
33 | with h5py.File('../data/mpii/annot_mpii.h5', 'r') as annot:
34 | # train
35 | self.imgname_mpii_train = annot['imgname'][:-1358]
36 | self.bndbox_mpii_train = annot['bndbox'][:-1358]
37 | self.part_mpii_train = annot['part'][:-1358]
38 | # val
39 | self.imgname_mpii_val = annot['imgname'][-1358:]
40 | self.bndbox_mpii_val = annot['bndbox'][-1358:]
41 | self.part_mpii_val = annot['part'][-1358:]
42 |
43 | self.size_train = self.imgname_mpii_train.shape[0]
44 | self.size_val = self.imgname_mpii_val.shape[0]
45 | self.train, self.valid = [], []
46 |
47 | def __getitem__(self, index):
48 | sf = self.scale_factor
49 |
50 | if self.is_train:
51 | part = self.part_mpii_train[index]
52 | bndbox = self.bndbox_mpii_train[index]
53 | imgname = self.imgname_mpii_train[index]
54 | else:
55 | part = self.part_mpii_val[index]
56 | bndbox = self.bndbox_mpii_val[index]
57 | imgname = self.imgname_mpii_val[index]
58 |
59 | imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))[:13]
60 | img_path = os.path.join(self.img_folder, imgname)
61 |
62 | metaData = generateSampleBox(img_path, bndbox, part, self.nJoints,
63 | 'mpii', sf, self, train=self.is_train)
64 |
65 | inp, out_bigcircle, out_smallcircle, out, setMask = metaData
66 |
67 | label = []
68 | for i in range(opt.nStack):
69 | if i < 2:
70 | #label.append(out_bigcircle.clone())
71 | label.append(out.clone())
72 | elif i < 4:
73 | #label.append(out_smallcircle.clone())
74 | label.append(out.clone())
75 | else:
76 | label.append(out.clone())
77 |
78 | return inp, label, setMask
79 |
80 | def __len__(self):
81 | if self.is_train:
82 | return self.size_train
83 | else:
84 | return self.size_val
85 |
--------------------------------------------------------------------------------
/SPPE/src/utils/eval.py:
--------------------------------------------------------------------------------
1 | from SPPE.src.opt import opt
2 | try:
3 | from utils import transformBoxInvert, transformBoxInvert_batch, findPeak, processPeaks
4 | except ImportError:
5 | from SPPE.src.utils.img import transformBoxInvert, transformBoxInvert_batch, findPeak, processPeaks
6 | import torch
7 |
8 |
9 | class DataLogger(object):
10 | def __init__(self):
11 | self.clear()
12 |
13 | def clear(self):
14 | self.value = 0
15 | self.sum = 0
16 | self.cnt = 0
17 | self.avg = 0
18 |
19 | def update(self, value, n=1):
20 | self.value = value
21 | self.sum += value * n
22 | self.cnt += n
23 | self._cal_avg()
24 |
25 | def _cal_avg(self):
26 | self.avg = self.sum / self.cnt
27 |
28 |
29 | def accuracy(output, label, dataset):
30 | if type(output) == list:
31 | return accuracy(output[opt.nStack - 1], label[opt.nStack - 1], dataset)
32 | else:
33 | return heatmapAccuracy(output.cpu().data, label.cpu().data, dataset.accIdxs)
34 |
35 |
36 | def heatmapAccuracy(output, label, idxs):
37 | preds = getPreds(output)
38 | gt = getPreds(label)
39 |
40 | norm = torch.ones(preds.size(0)) * opt.outputResH / 10
41 | dists = calc_dists(preds, gt, norm)
42 | #print(dists)
43 | acc = torch.zeros(len(idxs) + 1)
44 | avg_acc = 0
45 | cnt = 0
46 | for i in range(len(idxs)):
47 | acc[i + 1] = dist_acc(dists[idxs[i] - 1])
48 | if acc[i + 1] >= 0:
49 | avg_acc = avg_acc + acc[i + 1]
50 | cnt += 1
51 | if cnt != 0:
52 | acc[0] = avg_acc / cnt
53 | return acc
54 |
55 |
56 | def getPreds(hm):
57 | """ get predictions from score maps in torch Tensor
58 | return type: torch.LongTensor
59 | """
60 | assert hm.dim() == 4, 'Score maps should be 4-dim'
61 | maxval, idx = torch.max(hm.view(hm.size(0), hm.size(1), -1), 2)
62 |
63 | maxval = maxval.view(hm.size(0), hm.size(1), 1)
64 | idx = idx.view(hm.size(0), hm.size(1), 1) + 1
65 |
66 | preds = idx.repeat(1, 1, 2).float()
67 |
68 | preds[:, :, 0] = (preds[:, :, 0] - 1) % hm.size(3)
69 | preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hm.size(3))
70 |
71 | # pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
72 | # preds *= pred_mask
73 | return preds
74 |
75 |
76 | def calc_dists(preds, target, normalize):
77 | preds = preds.float().clone()
78 | target = target.float().clone()
79 | dists = torch.zeros(preds.size(1), preds.size(0))
80 | for n in range(preds.size(0)):
81 | for c in range(preds.size(1)):
82 | if target[n, c, 0] > 0 and target[n, c, 1] > 0:
83 | dists[c, n] = torch.dist(
84 | preds[n, c, :], target[n, c, :]) / normalize[n]
85 | else:
86 | dists[c, n] = -1
87 | return dists
88 |
89 |
90 | def dist_acc(dists, thr=0.5):
91 | """ Return percentage below threshold while ignoring values with a -1 """
92 | if dists.ne(-1).sum() > 0:
93 | return dists.le(thr).eq(dists.ne(-1)).float().sum() * 1.0 / dists.ne(-1).float().sum()
94 | else:
95 | return - 1
96 |
97 |
98 | def postprocess(output):
99 | p = getPreds(output)
100 |
101 | for i in range(p.size(0)):
102 | for j in range(p.size(1)):
103 | hm = output[i][j]
104 | pX, pY = int(round(p[i][j][0])), int(round(p[i][j][1]))
105 | if 0 < pX < opt.outputResW - 1 and 0 < pY < opt.outputResH - 1:
106 | diff = torch.Tensor((hm[pY][pX + 1] - hm[pY][pX - 1], hm[pY + 1][pX] - hm[pY - 1][pX]))
107 | p[i][j] += diff.sign() * 0.25
108 | p -= 0.5
109 |
110 | return p
111 |
112 |
113 | def getPrediction(hms, pt1, pt2, inpH, inpW, resH, resW):
114 | """
115 | Get keypoint location from heatmaps
116 | """
117 | assert hms.dim() == 4, 'Score maps should be 4-dim'
118 | maxval, idx = torch.max(hms.view(hms.size(0), hms.size(1), -1), 2)
119 |
120 | maxval = maxval.view(hms.size(0), hms.size(1), 1)
121 | idx = idx.view(hms.size(0), hms.size(1), 1) + 1
122 |
123 | preds = idx.repeat(1, 1, 2).float()
124 |
125 | preds[:, :, 0] = (preds[:, :, 0] - 1) % hms.size(3)
126 | preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hms.size(3))
127 |
128 | pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
129 | preds *= pred_mask
130 |
131 | # Very simple post-processing step to improve performance at tight PCK thresholds
132 | """for i in range(preds.size(0)):
133 | for j in range(preds.size(1)):
134 | hm = hms[i][j]
135 | pX, pY = int(round(float(preds[i][j][0]))), int(round(float(preds[i][j][1])))
136 | if 0 < pX < opt.outputResW - 1 and 0 < pY < opt.outputResH - 1:
137 | diff = torch.Tensor(
138 | (hm[pY][pX + 1] - hm[pY][pX - 1], hm[pY + 1][pX] - hm[pY - 1][pX]))
139 | preds[i][j] += diff.sign() * 0.25
140 | preds += 0.2"""
141 |
142 | preds_tf = torch.zeros(preds.size())
143 | preds_tf = transformBoxInvert_batch(preds, pt1, pt2, inpH, inpW, resH, resW)
144 | return preds, preds_tf, maxval
145 |
146 |
147 | def getMultiPeakPrediction(hms, pt1, pt2, inpH, inpW, resH, resW):
148 |
149 | assert hms.dim() == 4, 'Score maps should be 4-dim'
150 |
151 | preds_img = {}
152 | hms = hms.numpy()
153 | for n in range(hms.shape[0]): # Number of samples
154 | preds_img[n] = {} # Result of sample: n
155 | for k in range(hms.shape[1]): # Number of keypoints
156 | preds_img[n][k] = [] # Result of keypoint: k
157 | hm = hms[n][k]
158 |
159 | candidate_points = findPeak(hm)
160 |
161 | res_pt = processPeaks(candidate_points, hm,
162 | pt1[n], pt2[n], inpH, inpW, resH, resW)
163 |
164 | preds_img[n][k] = res_pt
165 |
166 | return preds_img
167 |
168 |
169 | def getPrediction_batch(hms, pt1, pt2, inpH, inpW, resH, resW):
170 | """
171 | Get keypoint location from heatmaps
172 | pt1, pt2: [n, 2]
173 | OUTPUT:
174 | preds: [n, 17, 2]
175 | """
176 |
177 | assert hms.dim() == 4, 'Score maps should be 4-dim'
178 | flat_hms = hms.view(hms.size(0), hms.size(1), -1)
179 | maxval, idx = torch.max(flat_hms, 2)
180 |
181 | maxval = maxval.view(hms.size(0), hms.size(1), 1)
182 | idx = idx.view(hms.size(0), hms.size(1), 1) + 1
183 |
184 | preds = idx.repeat(1, 1, 2).float()
185 |
186 | preds[:, :, 0] = (preds[:, :, 0] - 1) % hms.size(3)
187 | preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hms.size(3))
188 |
189 | pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
190 | preds *= pred_mask
191 |
192 | # Very simple post-processing step to improve performance at tight PCK thresholds
193 | idx_up = (idx - hms.size(3)).clamp(0, flat_hms.size(2) - 1)
194 | idx_down = (idx + hms.size(3)).clamp(0, flat_hms.size(2) - 1)
195 | idx_left = (idx - 1).clamp(0, flat_hms.size(2) - 1)
196 | idx_right = (idx + 1).clamp(0, flat_hms.size(2) - 1)
197 |
198 | maxval_up = flat_hms.gather(2, idx_up)
199 | maxval_down = flat_hms.gather(2, idx_down)
200 | maxval_left = flat_hms.gather(2, idx_left)
201 | maxval_right = flat_hms.gather(2, idx_right)
202 |
203 | diff1 = (maxval_right - maxval_left).sign() * 0.25
204 | diff2 = (maxval_down - maxval_up).sign() * 0.25
205 | diff1[idx_up <= hms.size(3)] = 0
206 | diff1[idx_down / hms.size(3) >= (hms.size(3) - 1)] = 0
207 | diff2[(idx_left % hms.size(3)) == 0] = 0
208 | diff2[(idx_left % hms.size(3)) == (hms.size(3) - 1)] = 0
209 |
210 | preds[:, :, 0] += diff1.squeeze(-1)
211 | preds[:, :, 1] += diff2.squeeze(-1)
212 |
213 | preds_tf = torch.zeros(preds.size())
214 | preds_tf = transformBoxInvert_batch(preds, pt1, pt2, inpH, inpW, resH, resW)
215 |
216 | return preds, preds_tf, maxval
217 |
--------------------------------------------------------------------------------
/SPPE/src/utils/pose.py:
--------------------------------------------------------------------------------
1 | from utils import (load_image, drawGaussian, drawBigCircle, drawSmallCircle, cv_rotate,
2 | cropBox, transformBox, flip, shuffleLR, drawCOCO)
3 | from utils import getPrediction
4 | import torch
5 | import numpy as np
6 | import random
7 | from SPPE.src.opt import opt
8 |
9 |
10 | def rnd(x):
11 | return max(-2 * x, min(2 * x, np.random.randn(1)[0] * x))
12 |
13 |
14 | def generateSampleBox(img_path, bndbox, part, nJoints, imgset, scale_factor, dataset, train=True):
15 |
16 | nJoints_coco = 17
17 | nJoints_mpii = 16
18 | img = load_image(img_path)
19 | if train:
20 | img[0].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
21 | img[1].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
22 | img[2].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
23 |
24 | ori_img = img.clone()
25 | img[0].add_(-0.406)
26 | img[1].add_(-0.457)
27 | img[2].add_(-0.480)
28 |
29 | upLeft = torch.Tensor((int(bndbox[0][0]), int(bndbox[0][1])))
30 | bottomRight = torch.Tensor((int(bndbox[0][2]), int(bndbox[0][3])))
31 | ht = bottomRight[1] - upLeft[1]
32 | width = bottomRight[0] - upLeft[0]
33 | imght = img.shape[1]
34 | imgwidth = img.shape[2]
35 | scaleRate = random.uniform(*scale_factor)
36 |
37 | upLeft[0] = max(0, upLeft[0] - width * scaleRate / 2)
38 | upLeft[1] = max(0, upLeft[1] - ht * scaleRate / 2)
39 | bottomRight[0] = min(imgwidth - 1, bottomRight[0] + width * scaleRate / 2)
40 | bottomRight[1] = min(imght - 1, bottomRight[1] + ht * scaleRate / 2)
41 |
42 | # Doing Random Sample
43 | if opt.addDPG:
44 | PatchScale = random.uniform(0, 1)
45 | if PatchScale > 0.85:
46 | ratio = ht / width
47 | if width < ht:
48 | patchWidth = PatchScale * width
49 | patchHt = patchWidth * ratio
50 | else:
51 | patchHt = PatchScale * ht
52 | patchWidth = patchHt / ratio
53 |
54 | xmin = upLeft[0] + random.uniform(0, 1) * (width - patchWidth)
55 | ymin = upLeft[1] + random.uniform(0, 1) * (ht - patchHt)
56 |
57 | xmax = xmin + patchWidth + 1
58 | ymax = ymin + patchHt + 1
59 | else:
60 | xmin = max(1, min(upLeft[0] + np.random.normal(-0.0142, 0.1158) * width, imgwidth - 3))
61 | ymin = max(1, min(upLeft[1] + np.random.normal(0.0043, 0.068) * ht, imght - 3))
62 | xmax = min(max(xmin + 2, bottomRight[0] + np.random.normal(0.0154, 0.1337) * width), imgwidth - 3)
63 | ymax = min(max(ymin + 2, bottomRight[1] + np.random.normal(-0.0013, 0.0711) * ht), imght - 3)
64 |
65 | upLeft[0] = xmin
66 | upLeft[1] = ymin
67 | bottomRight[0] = xmax
68 | bottomRight[1] = ymax
69 |
70 | # Counting Joints number
71 | jointNum = 0
72 | if imgset == 'coco':
73 | for i in range(17):
74 | if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \
75 | and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]:
76 | jointNum += 1
77 | else:
78 | for i in range(16):
79 | if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \
80 | and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]:
81 | jointNum += 1
82 |
83 | # Doing Random Crop
84 | if opt.addDPG:
85 | if jointNum > 13 and train:
86 | switch = random.uniform(0, 1)
87 | if switch > 0.96:
88 | bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2
89 | bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2
90 | elif switch > 0.92:
91 | upLeft[0] = (upLeft[0] + bottomRight[0]) / 2
92 | bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2
93 | elif switch > 0.88:
94 | upLeft[1] = (upLeft[1] + bottomRight[1]) / 2
95 | bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2
96 | elif switch > 0.84:
97 | upLeft[0] = (upLeft[0] + bottomRight[0]) / 2
98 | upLeft[1] = (upLeft[1] + bottomRight[1]) / 2
99 | elif switch > 0.80:
100 | bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2
101 | elif switch > 0.76:
102 | upLeft[0] = (upLeft[0] + bottomRight[0]) / 2
103 | elif switch > 0.72:
104 | bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2
105 | elif switch > 0.68:
106 | upLeft[1] = (upLeft[1] + bottomRight[1]) / 2
107 |
108 | ori_inp = cropBox(ori_img, upLeft, bottomRight, opt.inputResH, opt.inputResW)
109 | inp = cropBox(img, upLeft, bottomRight, opt.inputResH, opt.inputResW)
110 | if jointNum == 0:
111 | inp = torch.zeros(3, opt.inputResH, opt.inputResW)
112 |
113 | out_bigcircle = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
114 | out_smallcircle = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
115 | out = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
116 | setMask = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
117 |
118 | # Draw Label
119 | if imgset == 'coco':
120 | for i in range(nJoints_coco):
121 | if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \
122 | and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]:
123 | out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2)
124 | out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
125 | out[i] = drawGaussian(out[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
126 | setMask[i].add_(1)
127 | elif imgset == 'mpii':
128 | for i in range(nJoints_coco, nJoints_coco + nJoints_mpii):
129 | if part[i - nJoints_coco][0] > 0 and part[i - nJoints_coco][0] > upLeft[0] and part[i - nJoints_coco][1] > upLeft[1] \
130 | and part[i - nJoints_coco][0] < bottomRight[0] and part[i - nJoints_coco][1] < bottomRight[1]:
131 | out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2)
132 | out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
133 | out[i] = drawGaussian(out[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
134 | setMask[i].add_(1)
135 | else:
136 | for i in range(nJoints_coco, nJoints_coco + nJoints_mpii):
137 | if part[i - nJoints_coco][0] > 0 and part[i - nJoints_coco][0] > upLeft[0] and part[i - nJoints_coco][1] > upLeft[1] \
138 | and part[i - nJoints_coco][0] < bottomRight[0] and part[i - nJoints_coco][1] < bottomRight[1]:
139 | out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2)
140 | out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
141 | out[i] = drawGaussian(out[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
142 | if i != 6 + nJoints_coco and i != 7 + nJoints_coco:
143 | setMask[i].add_(1)
144 |
145 | if opt.debug:
146 | preds_hm, preds_img, preds_scores = getPrediction(out.unsqueeze(0), upLeft.unsqueeze(0), bottomRight.unsqueeze(0), opt.inputResH,
147 | opt.inputResW, opt.outputResH, opt.outputResW)
148 | tmp_preds = preds_hm.mul(opt.inputResH / opt.outputResH)
149 | drawCOCO(ori_inp.unsqueeze(0), tmp_preds, preds_scores)
150 |
151 | if train:
152 | # Flip
153 | if random.uniform(0, 1) < 0.5:
154 | inp = flip(inp)
155 | ori_inp = flip(ori_inp)
156 | out_bigcircle = shuffleLR(flip(out_bigcircle), dataset)
157 | out_smallcircle = shuffleLR(flip(out_smallcircle), dataset)
158 | out = shuffleLR(flip(out), dataset)
159 | # Rotate
160 | r = rnd(opt.rotate)
161 | if random.uniform(0, 1) < 0.6:
162 | r = 0
163 | if r != 0:
164 | inp = cv_rotate(inp, r, opt.inputResW, opt.inputResH)
165 | out_bigcircle = cv_rotate(out_bigcircle, r, opt.outputResW, opt.outputResH)
166 | out_smallcircle = cv_rotate(out_smallcircle, r, opt.outputResW, opt.outputResH)
167 | out = cv_rotate(out, r, opt.outputResW, opt.outputResH)
168 |
169 | return inp, out_bigcircle, out_smallcircle, out, setMask
170 |
--------------------------------------------------------------------------------
/Track/Tracker.py:
--------------------------------------------------------------------------------
1 | import time
2 | import numpy as np
3 | from collections import deque
4 |
5 | from .linear_assignment import min_cost_matching, matching_cascade
6 | from .kalman_filter import KalmanFilter
7 | from .iou_matching import iou_cost
8 |
9 |
10 | class TrackState:
11 | """Enumeration type for the single target track state. Newly created tracks are
12 | classified as `tentative` until enough evidence has been collected. Then,
13 | the track state is changed to `confirmed`. Tracks that are no longer alive
14 | are classified as `deleted` to mark them for removal from the set of active
15 | tracks.
16 | """
17 | Tentative = 1
18 | Confirmed = 2
19 | Deleted = 3
20 |
21 |
22 | class Detection(object):
23 | """This class represents a bounding box, keypoints, score of person detected
24 | in a single image.
25 |
26 | Args:
27 | tlbr: (float array) Of shape [top, left, bottom, right].,
28 | keypoints: (float array) Of shape [node, pts].,
29 | confidence: (float) Confidence score of detection.
30 | """
31 | def __init__(self, tlbr, keypoints, confidence):
32 | self.tlbr = tlbr
33 | self.keypoints = keypoints
34 | self.confidence = confidence
35 |
36 | def to_tlwh(self):
37 | """Get (top, left, width, height).
38 | """
39 | ret = self.tlbr.copy()
40 | ret[2:] = ret[2:] - ret[:2]
41 | return ret
42 |
43 | def to_xyah(self):
44 | """Get (x_center, y_center, aspect ratio, height).
45 | """
46 | ret = self.to_tlwh()
47 | ret[:2] += ret[2:] / 2
48 | ret[2] /= ret[3]
49 | return ret
50 |
51 |
52 | class Track:
53 | def __init__(self, mean, covariance, track_id, n_init, max_age=30, buffer=30):
54 | self.mean = mean
55 | self.covariance = covariance
56 | self.track_id = track_id
57 | self.hist = 1
58 | self.age = 1
59 | self.time_since_update = 0
60 | self.n_init = n_init
61 | self.max_age = max_age
62 |
63 | # keypoints list for use in Actions prediction.
64 | self.keypoints_list = deque(maxlen=buffer)
65 |
66 | self.state = TrackState.Tentative
67 |
68 | def to_tlwh(self):
69 | ret = self.mean[:4].copy()
70 | ret[2] *= ret[3]
71 | ret[:2] -= ret[2:] / 2
72 | return ret
73 |
74 | def to_tlbr(self):
75 | ret = self.to_tlwh()
76 | ret[2:] = ret[:2] + ret[2:]
77 | return ret
78 |
79 | def get_center(self):
80 | return self.mean[:2].copy()
81 |
82 | def predict(self, kf):
83 | """Propagate the state distribution to the current time step using a
84 | Kalman filter prediction step.
85 | """
86 | self.mean, self.covariance = kf.predict(self.mean, self.covariance)
87 | self.age += 1
88 | self.time_since_update += 1
89 |
90 | def update(self, kf, detection):
91 | """Perform Kalman filter measurement update step.
92 | """
93 | self.mean, self.covariance = kf.update(self.mean, self.covariance,
94 | detection.to_xyah())
95 | self.keypoints_list.append(detection.keypoints)
96 |
97 | self.hist += 1
98 | self.time_since_update = 0
99 | if self.state == TrackState.Tentative and self.hist >= self.n_init:
100 | self.state = TrackState.Confirmed
101 |
102 | def mark_missed(self):
103 | """Mark this track as missed (no association at the current time step).
104 | """
105 | if self.state == TrackState.Tentative:
106 | self.state = TrackState.Deleted
107 | elif self.time_since_update > self.max_age:
108 | self.state = TrackState.Deleted
109 |
110 | def is_tentative(self):
111 | return self.state == TrackState.Tentative
112 |
113 | def is_confirmed(self):
114 | return self.state == TrackState.Confirmed
115 |
116 | def is_deleted(self):
117 | return self.state == TrackState.Deleted
118 |
119 |
120 | class Tracker:
121 | def __init__(self, max_iou_distance=0.7, max_age=30, n_init=5):
122 | self.max_iou_dist = max_iou_distance
123 | self.max_age = max_age
124 | self.n_init = n_init
125 |
126 | self.kf = KalmanFilter()
127 | self.tracks = []
128 | self._next_id = 1
129 |
130 | def predict(self):
131 | """Propagate track state distributions one time step forward.
132 | This function should be called once every time step, before `update`.
133 | """
134 | for track in self.tracks:
135 | track.predict(self.kf)
136 |
137 | def update(self, detections):
138 | """Perform measurement update and track management.
139 | Parameters
140 | ----------
141 | detections : List[deep_sort.detection.Detection]
142 | A list of detections at the current time step.
143 | """
144 | # Run matching cascade.
145 | matches, unmatched_tracks, unmatched_detections = self._match(detections)
146 |
147 | # Update matched tracks set.
148 | for track_idx, detection_idx in matches:
149 | self.tracks[track_idx].update(self.kf, detections[detection_idx])
150 | # Update tracks that missing.
151 | for track_idx in unmatched_tracks:
152 | self.tracks[track_idx].mark_missed()
153 | # Create new detections track.
154 | for detection_idx in unmatched_detections:
155 | self._initiate_track(detections[detection_idx])
156 |
157 | # Remove deleted tracks.
158 | self.tracks = [t for t in self.tracks if not t.is_deleted()]
159 |
160 | def _match(self, detections):
161 | confirmed_tracks, unconfirmed_tracks = [], []
162 | for i, t in enumerate(self.tracks):
163 | if t.is_confirmed():
164 | confirmed_tracks.append(i)
165 | else:
166 | unconfirmed_tracks.append(i)
167 |
168 | matches_a, unmatched_tracks_a, unmatched_detections = matching_cascade(
169 | iou_cost, self.max_iou_dist, self.max_age, self.tracks, detections, confirmed_tracks
170 | )
171 |
172 | track_candidates = unconfirmed_tracks + [
173 | k for k in unmatched_tracks_a if self.tracks[k].time_since_update == 1]
174 | unmatched_tracks_a = [
175 | k for k in unmatched_tracks_a if self.tracks[k].time_since_update != 1]
176 |
177 | matches_b, unmatched_tracks_b, unmatched_detections = min_cost_matching(
178 | iou_cost, self.max_iou_dist, self.tracks, detections, track_candidates, unmatched_detections
179 | )
180 |
181 | matches = matches_a + matches_b
182 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
183 | return matches, unmatched_tracks, unmatched_detections
184 |
185 | def _initiate_track(self, detection):
186 | if detection.confidence < 0.4:
187 | return
188 | mean, covariance = self.kf.initiate(detection.to_xyah())
189 | self.tracks.append(Track(mean, covariance, self._next_id, self.n_init, self.max_age))
190 | self._next_id += 1
191 |
192 |
193 |
--------------------------------------------------------------------------------
/Track/iou_matching.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | INFTY_COST = 1e+5
4 |
5 |
6 | def iou(bbox, candidates):
7 | """Compute intersection over union.
8 | Parameters
9 | ----------
10 | bbox : ndarray
11 | A bounding box in format `(xmin, ymin, xmax, ymax)`.
12 | candidates : ndarray
13 | A matrix of candidate bounding boxes (one per row) in the same format
14 | as `bbox`.
15 |
16 | Returns
17 | -------
18 | ndarray
19 | The intersection over union in [0, 1] between the `bbox` and each
20 | candidate. A higher score means a larger fraction of the `bbox` is
21 | occluded by the candidate.
22 | """
23 | #bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
24 | bbox_tl, bbox_br = bbox[:2], bbox[2:]
25 | candidates_tl = candidates[:, :2]
26 | candidates_br = candidates[:, 2:] # + candidates[:, :2]
27 |
28 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
29 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
30 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
31 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
32 | wh = np.maximum(0., br - tl)
33 |
34 | area_intersection = wh.prod(axis=1)
35 | area_bbox = (bbox[2:] - bbox[:2]).prod()
36 | area_candidates = (candidates[:, 2:] - candidates[:, :2]).prod(axis=1)
37 | return area_intersection / (area_bbox + area_candidates - area_intersection)
38 |
39 |
40 | def iou_cost(tracks, detections, track_indices=None, detection_indices=None):
41 | """An intersection over union distance metric.
42 | Parameters
43 | ----------
44 | tracks : List[Track]
45 | A list of tracks.
46 | detections : List[Detection]
47 | A list of detections.
48 | track_indices : Optional[List[int]]
49 | A list of indices to tracks that should be matched. Defaults to
50 | all `tracks`.
51 | detection_indices : Optional[List[int]]
52 | A list of indices to detections that should be matched. Defaults
53 | to all `detections`.
54 |
55 | Returns
56 | -------
57 | ndarray
58 | Returns a cost matrix of shape
59 | len(track_indices), len(detection_indices) where entry (i, j) is
60 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
61 |
62 | """
63 | if track_indices is None:
64 | track_indices = np.arange(len(tracks))
65 | if detection_indices is None:
66 | detection_indices = np.arange(len(detections))
67 |
68 | cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
69 | for row, track_idx in enumerate(track_indices):
70 | #if tracks[track_idx].time_since_update > 1:
71 | # cost_matrix[row, :] = INFTY_COST
72 | # continue
73 |
74 | bbox = tracks[track_idx].to_tlbr()
75 | candidates = np.asarray([detections[i].tlbr for i in detection_indices])
76 | cost_matrix[row, :] = 1. - iou(bbox, candidates)
77 |
78 | return cost_matrix
79 |
--------------------------------------------------------------------------------
/Track/kalman_filter.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 | import scipy.linalg
4 |
5 |
6 | class KalmanFilter(object):
7 | """A simple Kalman filter for tracking bounding boxes in image space.
8 |
9 | The 8-dimensional state space
10 | x, y, a, h, vx, vy, va, vh
11 |
12 | contains the bounding box center position (x, y), aspect ratio a, height h,
13 | and their respective velocities.
14 |
15 | Object motion follows a constant velocity model. The bounding box location
16 | (x, y, a, h) is taken as direct observation of the state space (linear
17 | observation model).
18 | """
19 | def __init__(self):
20 | ndim, dt = 4, 1.
21 |
22 | # Create Kalman filter model matrices.
23 | self._motion_mat = np.eye(2 * ndim, 2 * ndim)
24 | for i in range(ndim):
25 | self._motion_mat[i, ndim + i] = dt
26 | self._update_mat = np.eye(ndim, 2 * ndim)
27 |
28 | # Motion and observation uncertainty are chosen relative to the current
29 | # state estimate. These weights control the amount of uncertainty in
30 | # the model. This is a bit hacky.
31 | self._std_weight_position = 1. / 20
32 | self._std_weight_velocity = 1. / 160
33 |
34 | def initiate(self, measurement):
35 | """Create track from unassociated measurement.
36 | Parameters
37 | ----------
38 | measurement : ndarray
39 | Bounding box coordinates (x, y, a, h) with center position (x, y),
40 | aspect ratio a, and height h.
41 |
42 | Returns
43 | -------
44 | (ndarray, ndarray)
45 | Returns the mean vector (8 dimensional) and covariance matrix (8x8
46 | dimensional) of the new track. Unobserved velocities are initialized
47 | to 0 mean.
48 | """
49 | mean_pos = measurement
50 | mean_vel = np.zeros_like(mean_pos)
51 | mean = np.r_[mean_pos, mean_vel]
52 |
53 | std = [
54 | 2 * self._std_weight_position * measurement[3],
55 | 2 * self._std_weight_position * measurement[3],
56 | 1e-2,
57 | 2 * self._std_weight_position * measurement[3],
58 | 10 * self._std_weight_velocity * measurement[3],
59 | 10 * self._std_weight_velocity * measurement[3],
60 | 1e-5,
61 | 10 * self._std_weight_velocity * measurement[3]]
62 | covariance = np.diag(np.square(std))
63 | return mean, covariance
64 |
65 | def predict(self, mean, covariance):
66 | """Run Kalman filter prediction step.
67 | Parameters
68 | ----------
69 | mean : ndarray
70 | The 8 dimensional mean vector of the object state at the previous
71 | time step.
72 | covariance : ndarray
73 | The 8x8 dimensional covariance matrix of the object state at the
74 | previous time step.
75 |
76 | Returns
77 | -------
78 | (ndarray, ndarray)
79 | Returns the mean vector and covariance matrix of the predicted
80 | state. Unobserved velocities are initialized to 0 mean.
81 | """
82 | std_pos = [
83 | self._std_weight_position * mean[3],
84 | self._std_weight_position * mean[3],
85 | 1e-2,
86 | self._std_weight_position * mean[3]]
87 | std_vel = [
88 | self._std_weight_velocity * mean[3],
89 | self._std_weight_velocity * mean[3],
90 | 1e-5,
91 | self._std_weight_velocity * mean[3]]
92 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
93 |
94 | mean = np.dot(self._motion_mat, mean)
95 | covariance = np.linalg.multi_dot((
96 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
97 |
98 | return mean, covariance
99 |
100 | def project(self, mean, covariance):
101 | """Project state distribution to measurement space.
102 | Parameters
103 | ----------
104 | mean : ndarray
105 | The state's mean vector (8 dimensional array).
106 | covariance : ndarray
107 | The state's covariance matrix (8x8 dimensional).
108 |
109 | Returns
110 | -------
111 | (ndarray, ndarray)
112 | Returns the projected mean and covariance matrix of the given state
113 | estimate.
114 | """
115 | std = [
116 | self._std_weight_position * mean[3],
117 | self._std_weight_position * mean[3],
118 | 1e-1,
119 | self._std_weight_position * mean[3]]
120 | innovation_cov = np.diag(np.square(std))
121 |
122 | mean = np.dot(self._update_mat, mean)
123 | covariance = np.linalg.multi_dot((
124 | self._update_mat, covariance, self._update_mat.T))
125 | return mean, covariance + innovation_cov
126 |
127 | def update(self, mean, covariance, measurement):
128 | """Run Kalman filter correction step.
129 | Parameters
130 | ----------
131 | mean : ndarray
132 | The predicted state's mean vector (8 dimensional).
133 | covariance : ndarray
134 | The state's covariance matrix (8x8 dimensional).
135 | measurement : ndarray
136 | The 4 dimensional measurement vector (x, y, a, h), where (x, y)
137 | is the center position, a the aspect ratio, and h the height of the
138 | bounding box.
139 |
140 | Returns
141 | -------
142 | (ndarray, ndarray)
143 | Returns the measurement-corrected state distribution.
144 | """
145 | projected_mean, projected_cov = self.project(mean, covariance)
146 |
147 | chol_factor, lower = scipy.linalg.cho_factor(
148 | projected_cov, lower=True, check_finite=False)
149 | kalman_gain = scipy.linalg.cho_solve(
150 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
151 | check_finite=False).T
152 | innovation = measurement - projected_mean
153 |
154 | new_mean = mean + np.dot(innovation, kalman_gain.T)
155 | new_covariance = covariance - np.linalg.multi_dot((
156 | kalman_gain, projected_cov, kalman_gain.T))
157 | return new_mean, new_covariance
158 |
159 | def gating_distance(self, mean, covariance, measurements,
160 | only_position=False):
161 | """Compute gating distance between state distribution and measurements.
162 | A suitable distance threshold can be obtained from `chi2inv95`. If
163 | `only_position` is False, the chi-square distribution has 4 degrees of
164 | freedom, otherwise 2.
165 |
166 | Parameters
167 | ----------
168 | mean : ndarray
169 | Mean vector over the state distribution (8 dimensional).
170 | covariance : ndarray
171 | Covariance of the state distribution (8x8 dimensional).
172 | measurements : ndarray
173 | An Nx4 dimensional matrix of N measurements, each in
174 | format (x, y, a, h) where (x, y) is the bounding box center
175 | position, a the aspect ratio, and h the height.
176 | only_position : Optional[bool]
177 | If True, distance computation is done with respect to the bounding
178 | box center position only.
179 |
180 | Returns
181 | -------
182 | ndarray
183 | Returns an array of length N, where the i-th element contains the
184 | squared Mahalanobis distance between (mean, covariance) and
185 | `measurements[i]`.
186 | """
187 | mean, covariance = self.project(mean, covariance)
188 | if only_position:
189 | mean, covariance = mean[:2], covariance[:2, :2]
190 | measurements = measurements[:, :2]
191 |
192 | cholesky_factor = np.linalg.cholesky(covariance)
193 | d = measurements - mean
194 | z = scipy.linalg.solve_triangular(
195 | cholesky_factor, d.T, lower=True, check_finite=False,
196 | overwrite_b=True)
197 | squared_maha = np.sum(z * z, axis=0)
198 | return squared_maha
199 |
--------------------------------------------------------------------------------
/Track/linear_assignment.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | #from sklearn.utils.linear_assignment_ import linear_assignment
3 | from scipy.optimize import linear_sum_assignment
4 |
5 | """
6 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
7 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
8 | function and used as Mahalanobis gating threshold.
9 | """
10 | chi2inv95 = {
11 | 1: 3.8415,
12 | 2: 5.9915,
13 | 3: 7.8147,
14 | 4: 9.4877,
15 | 5: 11.070,
16 | 6: 12.592,
17 | 7: 14.067,
18 | 8: 15.507,
19 | 9: 16.919}
20 | INFTY_COST = 1e+5
21 |
22 |
23 | def min_cost_matching(distance_metric, max_distance, tracks, detections,
24 | track_indices=None, detection_indices=None):
25 | """Solve linear assignment problem.
26 | Parameters
27 | ----------
28 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
29 | The distance metric is given a list of tracks and detections as well as
30 | a list of N track indices and M detection indices. The metric should
31 | return the NxM dimensional cost matrix, where element (i, j) is the
32 | association cost between the i-th track in the given track indices and
33 | the j-th detection in the given detection_indices.
34 | max_distance : float
35 | Gating threshold. Associations with cost larger than this value are
36 | disregarded.
37 | tracks : List[Track]
38 | A list of predicted tracks at the current time step.
39 | detections : List[Detection]
40 | A list of detections at the current time step.
41 | track_indices : List[int]
42 | List of track indices that maps rows in `cost_matrix` to tracks in
43 | `tracks` (see description above).
44 | detection_indices : List[int]
45 | List of detection indices that maps columns in `cost_matrix` to
46 | detections in `detections` (see description above).
47 |
48 | Returns
49 | -------
50 | (List[(int, int)], List[int], List[int])
51 | Returns a tuple with the following three entries:
52 | * A list of matched track and detection indices.
53 | * A list of unmatched track indices.
54 | * A list of unmatched detection indices.
55 | """
56 | if track_indices is None:
57 | track_indices = np.arange(len(tracks))
58 | if detection_indices is None:
59 | detection_indices = np.arange(len(detections))
60 |
61 | if len(detection_indices) == 0 or len(track_indices) == 0:
62 | return [], track_indices, detection_indices # Nothing to match.
63 |
64 | cost_matrix = distance_metric(tracks, detections, track_indices, detection_indices)
65 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
66 | indices = linear_sum_assignment(cost_matrix)
67 | indices = np.array(indices).transpose()
68 |
69 | matches, unmatched_tracks, unmatched_detections = [], [], []
70 | for col, detection_idx in enumerate(detection_indices):
71 | if col not in indices[:, 1]:
72 | unmatched_detections.append(detection_idx)
73 | for row, track_idx in enumerate(track_indices):
74 | if row not in indices[:, 0]:
75 | unmatched_tracks.append(track_idx)
76 | for row, col in indices:
77 | track_idx = track_indices[row]
78 | detection_idx = detection_indices[col]
79 | if cost_matrix[row, col] > max_distance:
80 | unmatched_tracks.append(track_idx)
81 | unmatched_detections.append(detection_idx)
82 | else:
83 | matches.append((track_idx, detection_idx))
84 |
85 | return matches, unmatched_tracks, unmatched_detections
86 |
87 |
88 | def matching_cascade(distance_metric, max_distance, cascade_depth, tracks, detections,
89 | track_indices=None, detection_indices=None):
90 | """Run matching cascade.
91 | Parameters
92 | ----------
93 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
94 | The distance metric is given a list of tracks and detections as well as
95 | a list of N track indices and M detection indices. The metric should
96 | return the NxM dimensional cost matrix, where element (i, j) is the
97 | association cost between the i-th track in the given track indices and
98 | the j-th detection in the given detection indices.
99 | max_distance : float
100 | Gating threshold. Associations with cost larger than this value are
101 | disregarded.
102 | cascade_depth: int
103 | The cascade depth, should be se to the maximum track age.
104 | tracks : List[Track]
105 | A list of predicted tracks at the current time step.
106 | detections : List[Detection]
107 | A list of detections at the current time step.
108 | track_indices : Optional[List[int]]
109 | List of track indices that maps rows in `cost_matrix` to tracks in
110 | `tracks` (see description above). Defaults to all tracks.
111 | detection_indices : Optional[List[int]]
112 | List of detection indices that maps columns in `cost_matrix` to
113 | detections in `detections` (see description above). Defaults to all
114 | detections.
115 |
116 | Returns
117 | -------
118 | (List[(int, int)], List[int], List[int])
119 | Returns a tuple with the following three entries:
120 | * A list of matched track and detection indices.
121 | * A list of unmatched track indices.
122 | * A list of unmatched detection indices.
123 | """
124 | if track_indices is None:
125 | track_indices = list(range(len(tracks)))
126 | if detection_indices is None:
127 | detection_indices = list(range(len(detections)))
128 |
129 | unmatched_detections = detection_indices
130 | matches = []
131 | for level in range(cascade_depth):
132 | if len(unmatched_detections) == 0: # No detections left
133 | break
134 |
135 | track_indices_l = [k for k in track_indices
136 | if tracks[k].time_since_update == 1 + level]
137 | if len(track_indices_l) == 0: # Nothing to match at this level
138 | continue
139 |
140 | matches_l, _, unmatched_detections = min_cost_matching(
141 | distance_metric, max_distance, tracks, detections, track_indices_l, unmatched_detections)
142 | matches += matches_l
143 |
144 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
145 | return matches, unmatched_tracks, unmatched_detections
146 |
147 |
148 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, track_indices, detection_indices,
149 | gated_cost=INFTY_COST, only_position=False):
150 | """Invalidate infeasible entries in cost matrix based on the state
151 | distributions obtained by Kalman filtering.
152 | Parameters
153 | ----------
154 | kf : The Kalman filter.
155 | cost_matrix : ndarray
156 | The NxM dimensional cost matrix, where N is the number of track indices
157 | and M is the number of detection indices, such that entry (i, j) is the
158 | association cost between `tracks[track_indices[i]]` and
159 | `detections[detection_indices[j]]`.
160 | tracks : List[Track]
161 | A list of predicted tracks at the current time step.
162 | detections : List[Detection]
163 | A list of detections at the current time step.
164 | track_indices : List[int]
165 | List of track indices that maps rows in `cost_matrix` to tracks in
166 | `tracks` (see description above).
167 | detection_indices : List[int]
168 | List of detection indices that maps columns in `cost_matrix` to
169 | detections in `detections` (see description above).
170 | gated_cost : Optional[float]
171 | Entries in the cost matrix corresponding to infeasible associations are
172 | set this value. Defaults to a very large value.
173 | only_position : Optional[bool]
174 | If True, only the x, y position of the state distribution is considered
175 | during gating. Defaults to False.
176 |
177 | Returns
178 | -------
179 | ndarray
180 | Returns the modified cost matrix.
181 | """
182 | gating_dim = 2 if only_position else 4
183 | gating_threshold = chi2inv95[gating_dim]
184 | measurements = np.asarray([detections[i].to_xyah() for i in detection_indices])
185 | for row, track_idx in enumerate(track_indices):
186 | track = tracks[track_idx]
187 | gating_distance = kf.gating_distance(track.mean, track.covariance,
188 | measurements, only_position)
189 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost
190 |
191 | return cost_matrix
192 |
--------------------------------------------------------------------------------
/Visualizer.py:
--------------------------------------------------------------------------------
1 | # import matplotlib.gridspec as gridspec
2 | import matplotlib.pyplot as plt
3 | import numpy as np
4 | import os
5 | import cv2
6 | import torch
7 | import imageio
8 | from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
9 | from matplotlib.font_manager import FontProperties
10 |
11 | fp = FontProperties(family='Tlwg Typo', size=10)
12 |
13 |
14 | def plot_piechart(x, labels, title='', fig_size=(10, 5), save=None):
15 | fig = plt.figure(figsize=fig_size)
16 |
17 | ax1 = fig.add_subplot(121)
18 | wedges, texts = ax1.pie(x, labels=labels, startangle=90)
19 |
20 | percents = x / sum(x) * 100.
21 | annots = ['{} - {:.2f}% ({:d})'.format(c, p, n) for c, p, n
22 | in zip(labels, percents, x)]
23 |
24 | ax2 = fig.add_subplot(122)
25 | ax2.axis('off')
26 | ax2.legend(wedges, annots, loc='center', fontsize=10)
27 |
28 | fig.suptitle(title)
29 |
30 | if save is not None:
31 | fig.savefig(save)
32 | plt.close()
33 | else:
34 | return fig
35 |
36 |
37 | def plot_x(x, title='', fig_size=(12, 10)):
38 | fig = plt.figure(figsize=fig_size)
39 | x = np.squeeze(x)
40 |
41 | if len(x.shape) == 1:
42 | plt.plot(x)
43 |
44 | elif len(x.shape) == 2:
45 | plt.imshow(x, cmap='gray')
46 | plt.axis('off')
47 |
48 | elif len(x.shape) == 3:
49 | if x.shape[-1] == 3:
50 | plt.imshow(x)
51 | plt.axis('off')
52 | else:
53 | fig = plot_multiImage(x.transpose(2, 0, 1), fig_size=fig_size)
54 |
55 | elif len(x.shape) == 4:
56 | fig = plot_multiImage(x.transpose(3, 0, 1, 2), fig_size=fig_size)
57 |
58 | fig.suptitle(title)
59 | return fig
60 |
61 |
62 | def plot_bars(x, y, title='', ylim=None, save=None):
63 | fig = plt.figure()
64 | bars = plt.bar(x, y)
65 | plt.ylim(ylim)
66 | plt.title(title)
67 | for b in bars:
68 | plt.annotate('{:.2f}'.format(b.get_height()),
69 | xy=(b.get_x(), b.get_height()))
70 |
71 | if save is not None:
72 | plt.savefig(save)
73 | plt.close()
74 | else:
75 | return fig
76 |
77 |
78 | def plot_graphs(x_list, legends, title, ylabel, xlabel='epoch', xlim=None, save=None):
79 | fig = plt.figure()
80 | for x in x_list:
81 | plt.plot(x)
82 |
83 | plt.legend(legends)
84 | plt.xlabel(xlabel)
85 | plt.ylabel(ylabel)
86 | plt.title(title)
87 | plt.xlim(xlim)
88 |
89 | if save is not None:
90 | plt.savefig(save)
91 | plt.close()
92 | else:
93 | return fig
94 |
95 |
96 | # images in shape (amount, h, w, c).
97 | def plot_multiImage(images, labels=None, pred=None, title=None, fig_size=(12, 10), tight_layout=False, save=None):
98 | n = int(np.ceil(np.sqrt(images.shape[0])))
99 | fig = plt.figure(figsize=fig_size)
100 |
101 | for i in range(images.shape[0]):
102 | ax = fig.add_subplot(n, n, i + 1)
103 |
104 | if len(images[i].shape) == 2 or images[i].shape[-1] == 1:
105 | ax.imshow(images[i], cmap='gray')
106 | else:
107 | ax.imshow(images[i])
108 |
109 | if labels is not None:
110 | ax.set_xlabel(labels[i], color='g', fontproperties=fp)
111 | if labels is not None and pred is not None:
112 | if labels[i] == pred[i]:
113 | clr = 'g'
114 | else:
115 | if len(labels[i]) == len(pred[i]):
116 | clr = 'm'
117 | else:
118 | clr = 'r'
119 |
120 | ax.set_xlabel('True: {}\nPred : {}'.format(u'' + labels[i], u'' + pred[i]),
121 | color=clr, fontproperties=fp)
122 |
123 | if title is not None:
124 | fig.suptitle(title)
125 |
126 | if tight_layout: # This make process slow if too many images.
127 | fig.tight_layout()
128 |
129 | if save is not None:
130 | plt.savefig(save)
131 | plt.close()
132 | else:
133 | return fig
134 |
135 |
136 | def plot_confusion_metrix(y_true, y_pred, labels=None, title='', normalize=None,
137 | fig_size=(10, 10), save=None):
138 | cm = confusion_matrix(y_true, y_pred, normalize=normalize)
139 | if labels is None:
140 | labels = list(set(y_trues))
141 |
142 | disp = ConfusionMatrixDisplay(cm, labels)
143 | disp.plot(xticks_rotation=45)
144 | disp.figure_.set_size_inches(fig_size)
145 | disp.figure_.suptitle(title)
146 | disp.figure_.tight_layout()
147 |
148 | if save is not None:
149 | disp.figure_.savefig(save)
150 | plt.close()
151 | else:
152 | return disp.figure_
153 |
154 |
155 | def get_fig_image(fig): # figure to array of image.
156 | fig.canvas.draw()
157 | img = np.array(fig.canvas.renderer._renderer)
158 | return img
159 |
160 |
161 | def vid2gif(video_file, output_file, delay=0.05):
162 | with imageio.get_writer(output_file, mode='I', duration=delay) as writer:
163 | cap = cv2.VideoCapture(video_file)
164 | while True:
165 | ret, frame = cap.read()
166 | if ret:
167 | #frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5)
168 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
169 | writer.append_data(frame)
170 | else:
171 | break
172 |
173 | #==========================================================================================#
174 | # For Fall_AlphaPose.
175 |
176 |
177 | PARTS_PAIR = [(0, 13), (1, 2), (1, 3), (3, 5), (2, 4), (4, 6), (13, 7), (13, 8),
178 | (7, 9), (8, 10), (9, 11), (10, 12)]
179 | CLASS_NAMES = ['Standing', 'Walking', 'Sitting', 'Lying Down',
180 | 'Stand up', 'Sit down', 'Fall Down']
181 |
182 |
183 | def plot_poseframes(data, labels=None, frames_stamp=None, delay=0.2, fig_size=(10, 5)):
184 | """
185 | data : (frames, parts, xy).
186 | labels : (frames, label) or (frames, labels).
187 | frames_stamp : (frames, number of frame).
188 | """
189 | fig_cols = 1
190 | if labels is not None and labels.shape[1] > 1:
191 | fig_cols = 2
192 | x_bar = CLASS_NAMES if labels.shape[1] == len(CLASS_NAMES) else np.arange(labels.shape[1])
193 |
194 | fig = plt.figure(figsize=fig_size)
195 | for i in range(data.shape[0]):
196 | xy = data[i]
197 | #xy = np.concatenate((xy, np.expand_dims((xy[1, :] + xy[2, :]) / 2, 0)))
198 |
199 | fig.clear()
200 |
201 | ax1 = fig.add_subplot(1, fig_cols, 1)
202 | for (sp, ep) in PARTS_PAIR:
203 | ax1.plot(xy[[sp, ep], 0], xy[[sp, ep], 1])
204 | if xy.shape[1] == 3:
205 | for pts in xy:
206 | ax1.scatter(pts[0], pts[1], 200 * pts[2])
207 | ax1.invert_yaxis()
208 |
209 | if fig_cols == 2:
210 | ax2 = fig.add_subplot(1, fig_cols, 2)
211 | ax2.bar(x_bar, labels[i])
212 | ax2.set_ylim([0, 1.0])
213 |
214 | frame = frames_stamp[i] if frames_stamp is not None else i
215 | idx = 0
216 | if labels is not None:
217 | idx = labels[i].argmax() if labels.shape[1] > 1 else labels[i][0]
218 | fig.suptitle('Frame : {}, Pose : {}'.format(frame, CLASS_NAMES[idx]))
219 |
220 | plt.pause(delay)
221 | plt.show()
222 |
223 |
224 |
--------------------------------------------------------------------------------
/fn.py:
--------------------------------------------------------------------------------
1 | import re
2 | import cv2
3 | import time
4 | import math
5 | import torch
6 | import numpy as np
7 |
8 | RED = (0, 0, 255)
9 | GREEN = (0, 255, 0)
10 | BLUE = (255, 0, 0)
11 | CYAN = (255, 255, 0)
12 | YELLOW = (0, 255, 255)
13 | ORANGE = (0, 165, 255)
14 | PURPLE = (255, 0, 255)
15 |
16 | """COCO_PAIR = [(0, 1), (0, 2), (1, 3), (2, 4), # Head
17 | (5, 6), (5, 7), (7, 9), (6, 8), (8, 10),
18 | (17, 11), (17, 12), # Body
19 | (11, 13), (12, 14), (13, 15), (14, 16)]"""
20 | COCO_PAIR = [(0, 13), (1, 2), (1, 3), (3, 5), (2, 4), (4, 6), (13, 7), (13, 8), # Body
21 | (7, 9), (8, 10), (9, 11), (10, 12)]
22 | POINT_COLORS = [(0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), # Nose, LEye, REye, LEar, REar
23 | (77, 255, 255), (77, 255, 204), (77, 204, 255), (191, 255, 77), (77, 191, 255), (191, 255, 77), # LShoulder, RShoulder, LElbow, RElbow, LWrist, RWrist
24 | (204, 77, 255), (77, 255, 204), (191, 77, 255), (77, 255, 191), (127, 77, 255), (77, 255, 127), (0, 255, 255)] # LHip, RHip, LKnee, Rknee, LAnkle, RAnkle, Neck
25 | LINE_COLORS = [(0, 215, 255), (0, 255, 204), (0, 134, 255), (0, 255, 50), (77, 255, 222),
26 | (77, 196, 255), (77, 135, 255), (191, 255, 77), (77, 255, 77), (77, 222, 255),
27 | (255, 156, 127), (0, 127, 255), (255, 127, 77), (0, 77, 255), (255, 77, 36)]
28 |
29 | MPII_PAIR = [(8, 9), (11, 12), (11, 10), (2, 1), (1, 0), (13, 14), (14, 15), (3, 4), (4, 5),
30 | (8, 7), (7, 6), (6, 2), (6, 3), (8, 12), (8, 13)]
31 |
32 | numpy_type_map = {
33 | 'float64': torch.DoubleTensor,
34 | 'float32': torch.FloatTensor,
35 | 'float16': torch.HalfTensor,
36 | 'int64': torch.LongTensor,
37 | 'int32': torch.IntTensor,
38 | 'int16': torch.ShortTensor,
39 | 'int8': torch.CharTensor,
40 | 'uint8': torch.ByteTensor,
41 | }
42 |
43 | _use_shared_memory = True
44 |
45 |
46 | def collate_fn(batch):
47 | r"""Puts each data field into a tensor with outer dimension batch size"""
48 |
49 | error_msg = "batch must contain tensors, numbers, dicts or lists; found {}"
50 | elem_type = type(batch[0])
51 |
52 | if isinstance(batch[0], torch.Tensor):
53 | out = None
54 | if _use_shared_memory:
55 | # If we're in a background process, concatenate directly into a
56 | # shared memory tensor to avoid an extra copy
57 | numel = sum([x.numel() for x in batch])
58 | storage = batch[0].storage()._new_shared(numel)
59 | out = batch[0].new(storage)
60 | return torch.stack(batch, 0, out=out)
61 | elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
62 | and elem_type.__name__ != 'string_':
63 | elem = batch[0]
64 | if elem_type.__name__ == 'ndarray':
65 | # array of string classes and object
66 | if re.search('[SaUO]', elem.dtype.str) is not None:
67 | raise TypeError(error_msg.format(elem.dtype))
68 |
69 | return torch.stack([torch.from_numpy(b) for b in batch], 0)
70 | if elem.shape == (): # scalars
71 | py_type = float if elem.dtype.name.startswith('float') else int
72 | return numpy_type_map[elem.dtype.name](list(map(py_type, batch)))
73 | elif isinstance(batch[0], int):
74 | return torch.LongTensor(batch)
75 | elif isinstance(batch[0], float):
76 | return torch.DoubleTensor(batch)
77 | elif isinstance(batch[0], (str, bytes)):
78 | return batch
79 | elif isinstance(batch[0], collections.Mapping):
80 | return {key: collate_fn([d[key] for d in batch]) for key in batch[0]}
81 | elif isinstance(batch[0], collections.Sequence):
82 | transposed = zip(*batch)
83 | return [collate_fn(samples) for samples in transposed]
84 |
85 | raise TypeError((error_msg.format(type(batch[0]))))
86 |
87 |
88 | def collate_fn_list(batch):
89 | img, inp, im_name = zip(*batch)
90 | img = collate_fn(img)
91 | im_name = collate_fn(im_name)
92 |
93 | return img, inp, im_name
94 |
95 |
96 | def draw_single(frame, pts, joint_format='coco'):
97 | if joint_format == 'coco':
98 | l_pair = COCO_PAIR
99 | p_color = POINT_COLORS
100 | line_color = LINE_COLORS
101 | elif joint_format == 'mpii':
102 | l_pair = MPII_PAIR
103 | p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED,BLUE,BLUE]
104 | else:
105 | NotImplementedError
106 |
107 | part_line = {}
108 | pts = np.concatenate((pts, np.expand_dims((pts[1, :] + pts[2, :]) / 2, 0)), axis=0)
109 | for n in range(pts.shape[0]):
110 | if pts[n, 2] <= 0.05:
111 | continue
112 | cor_x, cor_y = int(pts[n, 0]), int(pts[n, 1])
113 | part_line[n] = (cor_x, cor_y)
114 | cv2.circle(frame, (cor_x, cor_y), 3, p_color[n], -1)
115 |
116 | for i, (start_p, end_p) in enumerate(l_pair):
117 | if start_p in part_line and end_p in part_line:
118 | start_xy = part_line[start_p]
119 | end_xy = part_line[end_p]
120 | cv2.line(frame, start_xy, end_xy, line_color[i], int(1*(pts[start_p, 2] + pts[end_p, 2]) + 1))
121 | return frame
122 |
123 |
124 | def vis_frame_fast(frame, im_res, joint_format='coco'):
125 | """
126 | frame: frame image
127 | im_res: im_res of predictions
128 | format: coco or mpii
129 |
130 | return rendered image
131 | """
132 | if joint_format == 'coco':
133 | l_pair = COCO_PAIR
134 | p_color = POINT_COLORS
135 | line_color = LINE_COLORS
136 | elif joint_format == 'mpii':
137 | l_pair = MPII_PAIR
138 | p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED,BLUE,BLUE]
139 | else:
140 | NotImplementedError
141 |
142 | #im_name = im_res['imgname'].split('/')[-1]
143 | img = frame
144 | for human in im_res: # ['result']:
145 | part_line = {}
146 | kp_preds = human['keypoints']
147 | kp_scores = human['kp_score']
148 | kp_preds = torch.cat((kp_preds, torch.unsqueeze((kp_preds[1, :]+kp_preds[2, :]) / 2, 0)))
149 | kp_scores = torch.cat((kp_scores, torch.unsqueeze((kp_scores[1, :]+kp_scores[2, :]) / 2, 0)))
150 | # Draw keypoints
151 | for n in range(kp_scores.shape[0]):
152 | if kp_scores[n] <= 0.05:
153 | continue
154 | cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1])
155 | part_line[n] = (cor_x, cor_y)
156 | cv2.circle(img, (cor_x, cor_y), 4, p_color[n], -1)
157 | # Draw limbs
158 | for i, (start_p, end_p) in enumerate(l_pair):
159 | if start_p in part_line and end_p in part_line:
160 | start_xy = part_line[start_p]
161 | end_xy = part_line[end_p]
162 | cv2.line(img, start_xy, end_xy, line_color[i], 2*(kp_scores[start_p] + kp_scores[end_p]) + 1)
163 | return img
164 |
165 |
166 | def vis_frame(frame, im_res, joint_format='coco'):
167 | """
168 | frame: frame image
169 | im_res: im_res of predictions
170 | format: coco or mpii
171 |
172 | return rendered image
173 | """
174 | if joint_format == 'coco':
175 | l_pair = COCO_PAIR
176 | p_color = POINT_COLORS
177 | line_color = LINE_COLORS
178 | elif joint_format == 'mpii':
179 | l_pair = MPII_PAIR
180 | p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED, BLUE, BLUE]
181 | line_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, RED, RED, BLUE, BLUE]
182 | else:
183 | raise NotImplementedError
184 |
185 | im_name = im_res['imgname'].split('/')[-1]
186 | img = frame
187 | height, width = img.shape[:2]
188 | img = cv2.resize(img, (int(width/2), int(height/2)))
189 | for human in im_res['result']:
190 | part_line = {}
191 | kp_preds = human['keypoints']
192 | kp_scores = human['kp_score']
193 | kp_preds = torch.cat((kp_preds, torch.unsqueeze((kp_preds[5, :]+kp_preds[6, :]) / 2, 0)))
194 | kp_scores = torch.cat((kp_scores, torch.unsqueeze((kp_scores[5, :]+kp_scores[6, :]) / 2, 0)))
195 | # Draw keypoints
196 | for n in range(kp_scores.shape[0]):
197 | if kp_scores[n] <= 0.05:
198 | continue
199 | cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1])
200 | part_line[n] = (int(cor_x/2), int(cor_y/2))
201 | bg = img.copy()
202 | cv2.circle(bg, (int(cor_x/2), int(cor_y/2)), 2, p_color[n], -1)
203 | # Now create a mask of logo and create its inverse mask also
204 | transparency = max(0, min(1, kp_scores[n]))
205 | img = cv2.addWeighted(bg, transparency, img, 1-transparency, 0)
206 | # Draw limbs
207 | for i, (start_p, end_p) in enumerate(l_pair):
208 | if start_p in part_line and end_p in part_line:
209 | start_xy = part_line[start_p]
210 | end_xy = part_line[end_p]
211 | bg = img.copy()
212 |
213 | X = (start_xy[0], end_xy[0])
214 | Y = (start_xy[1], end_xy[1])
215 | mX = np.mean(X)
216 | mY = np.mean(Y)
217 | length = ((Y[0] - Y[1]) ** 2 + (X[0] - X[1]) ** 2) ** 0.5
218 | angle = math.degrees(math.atan2(Y[0] - Y[1], X[0] - X[1]))
219 | stickwidth = (kp_scores[start_p] + kp_scores[end_p]) + 1
220 | polygon = cv2.ellipse2Poly((int(mX),int(mY)), (int(length/2), stickwidth), int(angle), 0, 360, 1)
221 | cv2.fillConvexPoly(bg, polygon, line_color[i])
222 | #cv2.line(bg, start_xy, end_xy, line_color[i], (2 * (kp_scores[start_p] + kp_scores[end_p])) + 1)
223 | transparency = max(0, min(1, 0.5*(kp_scores[start_p] + kp_scores[end_p])))
224 | img = cv2.addWeighted(bg, transparency, img, 1-transparency, 0)
225 | img = cv2.resize(img, (width, height), interpolation=cv2.INTER_CUBIC)
226 | return img
227 |
228 |
229 | def getTime(time1=0):
230 | if not time1:
231 | return time.time()
232 | else:
233 | interval = time.time() - time1
234 | return time.time(), interval
235 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import time
4 | import torch
5 | import argparse
6 | import numpy as np
7 |
8 | from Detection.Utils import ResizePadding
9 | from CameraLoader import CamLoader, CamLoader_Q
10 | from DetectorLoader import TinyYOLOv3_onecls
11 |
12 | from PoseEstimateLoader import SPPE_FastPose
13 | from fn import draw_single
14 |
15 | from Track.Tracker import Detection, Tracker
16 | from ActionsEstLoader import TSSTG
17 |
18 | #source = '../Data/test_video/test7.mp4'
19 | #source = '../Data/falldata/Home/Videos/video (2).avi' # hard detect
20 | source = '../Data/falldata/Home/Videos/video (1).avi'
21 | #source = 2
22 |
23 |
24 | def preproc(image):
25 | """preprocess function for CameraLoader.
26 | """
27 | image = resize_fn(image)
28 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
29 | return image
30 |
31 |
32 | def kpt2bbox(kpt, ex=20):
33 | """Get bbox that hold on all of the keypoints (x,y)
34 | kpt: array of shape `(N, 2)`,
35 | ex: (int) expand bounding box,
36 | """
37 | return np.array((kpt[:, 0].min() - ex, kpt[:, 1].min() - ex,
38 | kpt[:, 0].max() + ex, kpt[:, 1].max() + ex))
39 |
40 |
41 | if __name__ == '__main__':
42 | par = argparse.ArgumentParser(description='Human Fall Detection Demo.')
43 | par.add_argument('-C', '--camera', default=source, # required=True, # default=2,
44 | help='Source of camera or video file path.')
45 | par.add_argument('--detection_input_size', type=int, default=384,
46 | help='Size of input in detection model in square must be divisible by 32 (int).')
47 | par.add_argument('--pose_input_size', type=str, default='224x160',
48 | help='Size of input in pose model must be divisible by 32 (h, w)')
49 | par.add_argument('--pose_backbone', type=str, default='resnet50',
50 | help='Backbone model for SPPE FastPose model.')
51 | par.add_argument('--show_detected', default=False, action='store_true',
52 | help='Show all bounding box from detection.')
53 | par.add_argument('--show_skeleton', default=True, action='store_true',
54 | help='Show skeleton pose.')
55 | par.add_argument('--save_out', type=str, default='',
56 | help='Save display to video file.')
57 | par.add_argument('--device', type=str, default='cuda',
58 | help='Device to run model on cpu or cuda.')
59 | args = par.parse_args()
60 |
61 | device = args.device
62 |
63 | # DETECTION MODEL.
64 | inp_dets = args.detection_input_size
65 | detect_model = TinyYOLOv3_onecls(inp_dets, device=device)
66 |
67 | # POSE MODEL.
68 | inp_pose = args.pose_input_size.split('x')
69 | inp_pose = (int(inp_pose[0]), int(inp_pose[1]))
70 | pose_model = SPPE_FastPose(args.pose_backbone, inp_pose[0], inp_pose[1], device=device)
71 |
72 | # Tracker.
73 | max_age = 30
74 | tracker = Tracker(max_age=max_age, n_init=3)
75 |
76 | # Actions Estimate.
77 | action_model = TSSTG()
78 |
79 | resize_fn = ResizePadding(inp_dets, inp_dets)
80 |
81 | cam_source = args.camera
82 | if type(cam_source) is str and os.path.isfile(cam_source):
83 | # Use loader thread with Q for video file.
84 | cam = CamLoader_Q(cam_source, queue_size=1000, preprocess=preproc).start()
85 | else:
86 | # Use normal thread loader for webcam.
87 | cam = CamLoader(int(cam_source) if cam_source.isdigit() else cam_source,
88 | preprocess=preproc).start()
89 |
90 | #frame_size = cam.frame_size
91 | #scf = torch.min(inp_size / torch.FloatTensor([frame_size]), 1)[0]
92 |
93 | outvid = False
94 | if args.save_out != '':
95 | outvid = True
96 | codec = cv2.VideoWriter_fourcc(*'MJPG')
97 | writer = cv2.VideoWriter(args.save_out, codec, 30, (inp_dets * 2, inp_dets * 2))
98 |
99 | fps_time = 0
100 | f = 0
101 | while cam.grabbed():
102 | f += 1
103 | frame = cam.getitem()
104 | image = frame.copy()
105 |
106 | # Detect humans bbox in the frame with detector model.
107 | detected = detect_model.detect(frame, need_resize=False, expand_bb=10)
108 |
109 | # Predict each tracks bbox of current frame from previous frames information with Kalman filter.
110 | tracker.predict()
111 | # Merge two source of predicted bbox together.
112 | for track in tracker.tracks:
113 | det = torch.tensor([track.to_tlbr().tolist() + [0.5, 1.0, 0.0]], dtype=torch.float32)
114 | detected = torch.cat([detected, det], dim=0) if detected is not None else det
115 |
116 | detections = [] # List of Detections object for tracking.
117 | if detected is not None:
118 | #detected = non_max_suppression(detected[None, :], 0.45, 0.2)[0]
119 | # Predict skeleton pose of each bboxs.
120 | poses = pose_model.predict(frame, detected[:, 0:4], detected[:, 4])
121 |
122 | # Create Detections object.
123 | detections = [Detection(kpt2bbox(ps['keypoints'].numpy()),
124 | np.concatenate((ps['keypoints'].numpy(),
125 | ps['kp_score'].numpy()), axis=1),
126 | ps['kp_score'].mean().numpy()) for ps in poses]
127 |
128 | # VISUALIZE.
129 | if args.show_detected:
130 | for bb in detected[:, 0:5]:
131 | frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 0, 255), 1)
132 |
133 | # Update tracks by matching each track information of current and previous frame or
134 | # create a new track if no matched.
135 | tracker.update(detections)
136 |
137 | # Predict Actions of each track.
138 | for i, track in enumerate(tracker.tracks):
139 | if not track.is_confirmed():
140 | continue
141 |
142 | track_id = track.track_id
143 | bbox = track.to_tlbr().astype(int)
144 | center = track.get_center().astype(int)
145 |
146 | action = 'pending..'
147 | clr = (0, 255, 0)
148 | # Use 30 frames time-steps to prediction.
149 | if len(track.keypoints_list) == 30:
150 | pts = np.array(track.keypoints_list, dtype=np.float32)
151 | out = action_model.predict(pts, frame.shape[:2])
152 | action_name = action_model.class_names[out[0].argmax()]
153 | action = '{}: {:.2f}%'.format(action_name, out[0].max() * 100)
154 | if action_name == 'Fall Down':
155 | clr = (255, 0, 0)
156 | elif action_name == 'Lying Down':
157 | clr = (255, 200, 0)
158 |
159 | # VISUALIZE.
160 | if track.time_since_update == 0:
161 | if args.show_skeleton:
162 | frame = draw_single(frame, track.keypoints_list[-1])
163 | frame = cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 1)
164 | frame = cv2.putText(frame, str(track_id), (center[0], center[1]), cv2.FONT_HERSHEY_COMPLEX,
165 | 0.4, (255, 0, 0), 2)
166 | frame = cv2.putText(frame, action, (bbox[0] + 5, bbox[1] + 15), cv2.FONT_HERSHEY_COMPLEX,
167 | 0.4, clr, 1)
168 |
169 | # Show Frame.
170 | frame = cv2.resize(frame, (0, 0), fx=2., fy=2.)
171 | frame = cv2.putText(frame, '%d, FPS: %f' % (f, 1.0 / (time.time() - fps_time)),
172 | (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
173 | frame = frame[:, :, ::-1]
174 | fps_time = time.time()
175 |
176 | if outvid:
177 | writer.write(frame)
178 |
179 | cv2.imshow('frame', frame)
180 | if cv2.waitKey(1) & 0xFF == ord('q'):
181 | break
182 |
183 | # Clear resource.
184 | cam.stop()
185 | if outvid:
186 | writer.release()
187 | cv2.destroyAllWindows()
188 |
--------------------------------------------------------------------------------
/pPose_nms.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import torch
3 | import json
4 | import os
5 | import zipfile
6 | import time
7 | from multiprocessing.dummy import Pool as ThreadPool
8 | import numpy as np
9 |
10 | ''' Constant Configuration '''
11 | delta1 = 1
12 | mu = 1.7
13 | delta2 = 2.65
14 | gamma = 22.48
15 | scoreThreds = 0.3
16 | matchThreds = 5
17 | areaThres = 0 # 40 * 40.5
18 | alpha = 0.1
19 | #pool = ThreadPool(4)
20 |
21 |
22 | def pose_nms(bboxes, bbox_scores, pose_preds, pose_scores):
23 | """
24 | Parametric Pose NMS algorithm
25 | bboxes: bbox locations list (n, 4)
26 | bbox_scores: bbox scores list (n,)
27 | pose_preds: pose locations list (n, 17, 2)
28 | pose_scores: pose scores list (n, 17, 1)
29 | """
30 | global ori_pose_preds, ori_pose_scores, ref_dists
31 |
32 | pose_scores[pose_scores == 0] = 1e-5
33 |
34 | final_result = []
35 |
36 | ori_bboxes = bboxes.clone()
37 | ori_bbox_scores = bbox_scores.clone()
38 | ori_pose_preds = pose_preds.clone()
39 | ori_pose_scores = pose_scores.clone()
40 |
41 | xmax = bboxes[:, 2]
42 | xmin = bboxes[:, 0]
43 | ymax = bboxes[:, 3]
44 | ymin = bboxes[:, 1]
45 |
46 | widths = xmax - xmin
47 | heights = ymax - ymin
48 | ref_dists = alpha * np.maximum(widths, heights)
49 |
50 | nsamples = bboxes.shape[0]
51 | human_scores = pose_scores.mean(dim=1)
52 |
53 | human_ids = np.arange(nsamples)
54 | # Do pPose-NMS
55 | pick = []
56 | merge_ids = []
57 | while human_scores.shape[0] != 0:
58 | # Pick the one with highest score
59 | pick_id = torch.argmax(human_scores)
60 | pick.append(human_ids[pick_id])
61 | # num_visPart = torch.sum(pose_scores[pick_id] > 0.2)
62 |
63 | # Get numbers of match keypoints by calling PCK_match
64 | ref_dist = ref_dists[human_ids[pick_id]]
65 | simi = get_parametric_distance(pick_id, pose_preds, pose_scores, ref_dist)
66 | num_match_keypoints = PCK_match(pose_preds[pick_id], pose_preds, ref_dist)
67 |
68 | # Delete humans who have more than matchThreds keypoints overlap and high similarity
69 | delete_ids = torch.from_numpy(np.arange(human_scores.shape[0]))[
70 | (simi > gamma) | (num_match_keypoints >= matchThreds)]
71 |
72 | if delete_ids.shape[0] == 0:
73 | delete_ids = pick_id
74 | #else:
75 | # delete_ids = torch.from_numpy(delete_ids)
76 |
77 | merge_ids.append(human_ids[delete_ids])
78 | pose_preds = np.delete(pose_preds, delete_ids, axis=0)
79 | pose_scores = np.delete(pose_scores, delete_ids, axis=0)
80 | human_ids = np.delete(human_ids, delete_ids)
81 | human_scores = np.delete(human_scores, delete_ids, axis=0)
82 | bbox_scores = np.delete(bbox_scores, delete_ids, axis=0)
83 |
84 | assert len(merge_ids) == len(pick)
85 | bboxs_pick = ori_bboxes[pick]
86 | preds_pick = ori_pose_preds[pick]
87 | scores_pick = ori_pose_scores[pick]
88 | bbox_scores_pick = ori_bbox_scores[pick]
89 | #final_result = pool.map(filter_result, zip(scores_pick, merge_ids, preds_pick, pick, bbox_scores_pick))
90 | #final_result = [item for item in final_result if item is not None]
91 |
92 | for j in range(len(pick)):
93 | ids = np.arange(pose_preds.shape[1])
94 | max_score = torch.max(scores_pick[j, ids, 0])
95 |
96 | if max_score < scoreThreds:
97 | continue
98 |
99 | # Merge poses
100 | merge_id = merge_ids[j]
101 | merge_pose, merge_score = p_merge_fast(
102 | preds_pick[j], ori_pose_preds[merge_id], ori_pose_scores[merge_id], ref_dists[pick[j]])
103 |
104 | max_score = torch.max(merge_score[ids])
105 | if max_score < scoreThreds:
106 | continue
107 |
108 | xmax = max(merge_pose[:, 0])
109 | xmin = min(merge_pose[:, 0])
110 | ymax = max(merge_pose[:, 1])
111 | ymin = min(merge_pose[:, 1])
112 |
113 | if 1.5 ** 2 * (xmax - xmin) * (ymax - ymin) < areaThres:
114 | continue
115 |
116 | final_result.append({
117 | 'bbox': bboxs_pick[j],
118 | 'bbox_score': bbox_scores_pick[j],
119 | 'keypoints': merge_pose - 0.3,
120 | 'kp_score': merge_score,
121 | 'proposal_score': torch.mean(merge_score) + bbox_scores_pick[j] + 1.25 * max(merge_score)
122 | })
123 |
124 | return final_result
125 |
126 |
127 | def filter_result(args):
128 | score_pick, merge_id, pred_pick, pick, bbox_score_pick = args
129 | global ori_pose_preds, ori_pose_scores, ref_dists
130 | ids = np.arange(17)
131 | max_score = torch.max(score_pick[ids, 0])
132 |
133 | if max_score < scoreThreds:
134 | return None
135 |
136 | # Merge poses
137 | merge_pose, merge_score = p_merge_fast(
138 | pred_pick, ori_pose_preds[merge_id], ori_pose_scores[merge_id], ref_dists[pick])
139 |
140 | max_score = torch.max(merge_score[ids])
141 | if max_score < scoreThreds:
142 | return None
143 |
144 | xmax = max(merge_pose[:, 0])
145 | xmin = min(merge_pose[:, 0])
146 | ymax = max(merge_pose[:, 1])
147 | ymin = min(merge_pose[:, 1])
148 |
149 | if 1.5 ** 2 * (xmax - xmin) * (ymax - ymin) < 40 * 40.5:
150 | return None
151 |
152 | return {
153 | 'keypoints': merge_pose - 0.3,
154 | 'kp_score': merge_score,
155 | 'proposal_score': torch.mean(merge_score) + bbox_score_pick + 1.25 * max(merge_score)
156 | }
157 |
158 |
159 | def p_merge(ref_pose, cluster_preds, cluster_scores, ref_dist):
160 | """
161 | Score-weighted pose merging
162 | INPUT:
163 | ref_pose: reference pose -- [17, 2]
164 | cluster_preds: redundant poses -- [n, 17, 2]
165 | cluster_scores: redundant poses score -- [n, 17, 1]
166 | ref_dist: reference scale -- Constant
167 | OUTPUT:
168 | final_pose: merged pose -- [17, 2]
169 | final_score: merged score -- [17]
170 | """
171 | dist = torch.sqrt(torch.sum(
172 | torch.pow(ref_pose[np.newaxis, :] - cluster_preds, 2),
173 | dim=2
174 | )) # [n, 17]
175 |
176 | kp_num = 17
177 | ref_dist = min(ref_dist, 15)
178 |
179 | mask = (dist <= ref_dist)
180 | final_pose = torch.zeros(kp_num, 2)
181 | final_score = torch.zeros(kp_num)
182 |
183 | if cluster_preds.dim() == 2:
184 | cluster_preds.unsqueeze_(0)
185 | cluster_scores.unsqueeze_(0)
186 | if mask.dim() == 1:
187 | mask.unsqueeze_(0)
188 |
189 | for i in range(kp_num):
190 | cluster_joint_scores = cluster_scores[:, i][mask[:, i]] # [k, 1]
191 | cluster_joint_location = cluster_preds[:, i, :][mask[:, i].unsqueeze(
192 | -1).repeat(1, 2)].view((torch.sum(mask[:, i]), -1))
193 |
194 | # Get an normalized score
195 | normed_scores = cluster_joint_scores / torch.sum(cluster_joint_scores)
196 |
197 | # Merge poses by a weighted sum
198 | final_pose[i, 0] = torch.dot(cluster_joint_location[:, 0], normed_scores.squeeze(-1))
199 | final_pose[i, 1] = torch.dot(cluster_joint_location[:, 1], normed_scores.squeeze(-1))
200 |
201 | final_score[i] = torch.dot(cluster_joint_scores.transpose(0, 1).squeeze(0), normed_scores.squeeze(-1))
202 |
203 | return final_pose, final_score
204 |
205 |
206 | def p_merge_fast(ref_pose, cluster_preds, cluster_scores, ref_dist):
207 | """
208 | Score-weighted pose merging
209 | INPUT:
210 | ref_pose: reference pose -- [17, 2]
211 | cluster_preds: redundant poses -- [n, 17, 2]
212 | cluster_scores: redundant poses score -- [n, 17, 1]
213 | ref_dist: reference scale -- Constant
214 | OUTPUT:
215 | final_pose: merged pose -- [17, 2]
216 | final_score: merged score -- [17]
217 | """
218 | dist = torch.sqrt(torch.sum(
219 | torch.pow(ref_pose[np.newaxis, :] - cluster_preds, 2),
220 | dim=2
221 | ))
222 |
223 | kp_num = 17
224 | ref_dist = min(ref_dist, 15)
225 |
226 | mask = (dist <= ref_dist)
227 | final_pose = torch.zeros(kp_num, 2)
228 | final_score = torch.zeros(kp_num)
229 |
230 | if cluster_preds.dim() == 2:
231 | cluster_preds.unsqueeze_(0)
232 | cluster_scores.unsqueeze_(0)
233 | if mask.dim() == 1:
234 | mask.unsqueeze_(0)
235 |
236 | # Weighted Merge
237 | masked_scores = cluster_scores.mul(mask.float().unsqueeze(-1))
238 | normed_scores = masked_scores / torch.sum(masked_scores, dim=0)
239 |
240 | final_pose = torch.mul(cluster_preds, normed_scores.repeat(1, 1, 2)).sum(dim=0)
241 | final_score = torch.mul(masked_scores, normed_scores).sum(dim=0)
242 | return final_pose, final_score
243 |
244 |
245 | def get_parametric_distance(i, all_preds, keypoint_scores, ref_dist):
246 | pick_preds = all_preds[i]
247 | pred_scores = keypoint_scores[i]
248 | dist = torch.sqrt(torch.sum(
249 | torch.pow(pick_preds[np.newaxis, :] - all_preds, 2),
250 | dim=2
251 | ))
252 | mask = (dist <= 1)
253 |
254 | # Define a keypoints distance
255 | score_dists = torch.zeros(all_preds.shape[0], all_preds.shape[1])
256 | keypoint_scores.squeeze_()
257 | if keypoint_scores.dim() == 1:
258 | keypoint_scores.unsqueeze_(0)
259 | if pred_scores.dim() == 1:
260 | pred_scores.unsqueeze_(1)
261 | # The predicted scores are repeated up to do broadcast
262 | pred_scores = pred_scores.repeat(1, all_preds.shape[0]).transpose(0, 1)
263 |
264 | score_dists[mask] = torch.tanh(pred_scores[mask] / delta1) *\
265 | torch.tanh(keypoint_scores[mask] / delta1)
266 |
267 | point_dist = torch.exp((-1) * dist / delta2)
268 | final_dist = torch.sum(score_dists, dim=1) + mu * torch.sum(point_dist, dim=1)
269 |
270 | return final_dist
271 |
272 |
273 | def PCK_match(pick_pred, all_preds, ref_dist):
274 | dist = torch.sqrt(torch.sum(
275 | torch.pow(pick_pred[np.newaxis, :] - all_preds, 2),
276 | dim=2
277 | ))
278 | ref_dist = min(ref_dist, 7)
279 | num_match_keypoints = torch.sum(
280 | dist / ref_dist <= 1,
281 | dim=1
282 | )
283 |
284 | return num_match_keypoints
285 |
--------------------------------------------------------------------------------
/pose_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def normalize_points_with_size(xy, width, height, flip=False):
5 | """Normalize scale points in image with size of image to (0-1).
6 | xy : (frames, parts, xy) or (parts, xy)
7 | """
8 | if xy.ndim == 2:
9 | xy = np.expand_dims(xy, 0)
10 | xy[:, :, 0] /= width
11 | xy[:, :, 1] /= height
12 | if flip:
13 | xy[:, :, 0] = 1 - xy[:, :, 0]
14 | return xy
15 |
16 |
17 | def scale_pose(xy):
18 | """Normalize pose points by scale with max/min value of each pose.
19 | xy : (frames, parts, xy) or (parts, xy)
20 | """
21 | if xy.ndim == 2:
22 | xy = np.expand_dims(xy, 0)
23 | xy_min = np.nanmin(xy, axis=1)
24 | xy_max = np.nanmax(xy, axis=1)
25 | for i in range(xy.shape[0]):
26 | xy[i] = ((xy[i] - xy_min[i]) / (xy_max[i] - xy_min[i])) * 2 - 1
27 | return xy.squeeze()
28 |
--------------------------------------------------------------------------------
/sample1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GajuuzZ/Human-Falling-Detect-Tracks/7ed2faa4d6147dfd576f58869b6c25545208af35/sample1.gif
--------------------------------------------------------------------------------