├── ActionsEstLoader.py ├── Actionsrecognition ├── Models.py ├── Utils.py └── train.py ├── App.py ├── CameraLoader.py ├── Data ├── create_dataset_1.py ├── create_dataset_2.py └── create_dataset_3.py ├── Detection ├── Models.py └── Utils.py ├── DetectorLoader.py ├── Models ├── TSSTG │ └── _.txt ├── sppe │ └── _.txt └── yolo-tiny-onecls │ └── _.txt ├── PoseEstimateLoader.py ├── README.md ├── SPPE ├── LICENSE ├── README.md └── src │ ├── main_fast_inference.py │ ├── models │ ├── FastPose.py │ ├── __init__.py │ ├── hg-prm.py │ ├── hgPRM.py │ └── layers │ │ ├── DUC.py │ │ ├── PRM.py │ │ ├── Residual.py │ │ ├── Resnet.py │ │ ├── SE_Resnet.py │ │ ├── SE_module.py │ │ ├── __init__.py │ │ └── util_models.py │ ├── opt.py │ └── utils │ ├── __init__.py │ ├── dataset │ ├── .coco.py.swp │ ├── __init__.py │ ├── coco.py │ ├── fuse.py │ └── mpii.py │ ├── eval.py │ ├── img.py │ └── pose.py ├── Track ├── Tracker.py ├── iou_matching.py ├── kalman_filter.py └── linear_assignment.py ├── Visualizer.py ├── fn.py ├── main.py ├── pPose_nms.py ├── pose_utils.py └── sample1.gif /ActionsEstLoader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | 5 | from Actionsrecognition.Models import TwoStreamSpatialTemporalGraph 6 | from pose_utils import normalize_points_with_size, scale_pose 7 | 8 | 9 | class TSSTG(object): 10 | """Two-Stream Spatial Temporal Graph Model Loader. 11 | Args: 12 | weight_file: (str) Path to trained weights file. 13 | device: (str) Device to load the model on 'cpu' or 'cuda'. 14 | """ 15 | def __init__(self, 16 | weight_file='./Models/TSSTG/tsstg-model.pth', 17 | device='cuda'): 18 | self.graph_args = {'strategy': 'spatial'} 19 | self.class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down', 20 | 'Stand up', 'Sit down', 'Fall Down'] 21 | self.num_class = len(self.class_names) 22 | self.device = device 23 | 24 | self.model = TwoStreamSpatialTemporalGraph(self.graph_args, self.num_class).to(self.device) 25 | self.model.load_state_dict(torch.load(weight_file)) 26 | self.model.eval() 27 | 28 | def predict(self, pts, image_size): 29 | """Predict actions from single person skeleton points and score in time sequence. 30 | Args: 31 | pts: (numpy array) points and score in shape `(t, v, c)` where 32 | t : inputs sequence (time steps)., 33 | v : number of graph node (body parts)., 34 | c : channel (x, y, score)., 35 | image_size: (tuple of int) width, height of image frame. 36 | Returns: 37 | (numpy array) Probability of each class actions. 38 | """ 39 | pts[:, :, :2] = normalize_points_with_size(pts[:, :, :2], image_size[0], image_size[1]) 40 | pts[:, :, :2] = scale_pose(pts[:, :, :2]) 41 | pts = np.concatenate((pts, np.expand_dims((pts[:, 1, :] + pts[:, 2, :]) / 2, 1)), axis=1) 42 | 43 | pts = torch.tensor(pts, dtype=torch.float32) 44 | pts = pts.permute(2, 0, 1)[None, :] 45 | 46 | mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :] 47 | mot = mot.to(self.device) 48 | pts = pts.to(self.device) 49 | 50 | out = self.model((pts, mot)) 51 | 52 | return out.detach().cpu().numpy() 53 | -------------------------------------------------------------------------------- /Actionsrecognition/Models.py: -------------------------------------------------------------------------------- 1 | ### Reference from: https://github.com/yysijie/st-gcn/tree/master/net 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import numpy as np 7 | 8 | from Actionsrecognition.Utils import Graph 9 | 10 | 11 | class GraphConvolution(nn.Module): 12 | """The basic module for applying a graph convolution. 13 | Args: 14 | - in_channel: (int) Number of channels in the input sequence data. 15 | - out_channels: (int) Number of channels produced by the convolution. 16 | - kernel_size: (int) Size of the graph convolving kernel. 17 | - t_kernel_size: (int) Size of the temporal convolving kernel. 18 | - t_stride: (int, optional) Stride of the temporal convolution. Default: 1 19 | - t_padding: (int, optional) Temporal zero-padding added to both sides of 20 | the input. Default: 0 21 | - t_dilation: (int, optional) Spacing between temporal kernel elements. Default: 1 22 | - bias: (bool, optional) If `True`, adds a learnable bias to the output. 23 | Default: `True` 24 | Shape: 25 | - Inputs x: Graph sequence in :math:`(N, in_channels, T_{in}, V)`, 26 | A: Graph adjacency matrix in :math:`(K, V, V)`, 27 | - Output: Graph sequence out in :math:`(N, out_channels, T_{out}, V)` 28 | 29 | where 30 | :math:`N` is a batch size, 31 | :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`, 32 | :math:`T_{in}/T_{out}` is a length of input/output sequence, 33 | :math:`V` is the number of graph nodes. 34 | 35 | """ 36 | def __init__(self, in_channels, out_channels, kernel_size, 37 | t_kernel_size=1, 38 | t_stride=1, 39 | t_padding=0, 40 | t_dilation=1, 41 | bias=True): 42 | super().__init__() 43 | 44 | self.kernel_size = kernel_size 45 | self.conv = nn.Conv2d(in_channels, 46 | out_channels * kernel_size, 47 | kernel_size=(t_kernel_size, 1), 48 | padding=(t_padding, 0), 49 | stride=(t_stride, 1), 50 | dilation=(t_dilation, 1), 51 | bias=bias) 52 | 53 | def forward(self, x, A): 54 | x = self.conv(x) 55 | n, kc, t, v = x.size() 56 | x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v) 57 | x = torch.einsum('nkctv,kvw->nctw', (x, A)) 58 | 59 | return x.contiguous() 60 | 61 | 62 | class st_gcn(nn.Module): 63 | """Applies a spatial temporal graph convolution over an input graph sequence. 64 | Args: 65 | - in_channels: (int) Number of channels in the input sequence data. 66 | - out_channels: (int) Number of channels produced by the convolution. 67 | - kernel_size: (tuple) Size of the temporal convolving kernel and 68 | graph convolving kernel. 69 | - stride: (int, optional) Stride of the temporal convolution. Default: 1 70 | - dropout: (int, optional) Dropout rate of the final output. Default: 0 71 | - residual: (bool, optional) If `True`, applies a residual mechanism. 72 | Default: `True` 73 | Shape: 74 | - Inputs x: Graph sequence in :math: `(N, in_channels, T_{in}, V)`, 75 | A: Graph Adjecency matrix in :math: `(K, V, V)`, 76 | - Output: Graph sequence out in :math: `(N, out_channels, T_{out}, V)` 77 | where 78 | :math:`N` is a batch size, 79 | :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`, 80 | :math:`T_{in}/T_{out}` is a length of input/output sequence, 81 | :math:`V` is the number of graph nodes. 82 | """ 83 | def __init__(self, in_channels, out_channels, kernel_size, 84 | stride=1, 85 | dropout=0, 86 | residual=True): 87 | super().__init__() 88 | assert len(kernel_size) == 2 89 | assert kernel_size[0] % 2 == 1 90 | 91 | padding = ((kernel_size[0] - 1) // 2, 0) 92 | 93 | self.gcn = GraphConvolution(in_channels, out_channels, kernel_size[1]) 94 | self.tcn = nn.Sequential(nn.BatchNorm2d(out_channels), 95 | nn.ReLU(inplace=True), 96 | nn.Conv2d(out_channels, 97 | out_channels, 98 | (kernel_size[0], 1), 99 | (stride, 1), 100 | padding), 101 | nn.BatchNorm2d(out_channels), 102 | nn.Dropout(dropout, inplace=True) 103 | ) 104 | 105 | if not residual: 106 | self.residual = lambda x: 0 107 | elif (in_channels == out_channels) and (stride == 1): 108 | self.residual = lambda x: x 109 | else: 110 | self.residual = nn.Sequential(nn.Conv2d(in_channels, 111 | out_channels, 112 | kernel_size=1, 113 | stride=(stride, 1)), 114 | nn.BatchNorm2d(out_channels) 115 | ) 116 | self.relu = nn.ReLU(inplace=True) 117 | 118 | def forward(self, x, A): 119 | res = self.residual(x) 120 | x = self.gcn(x, A) 121 | x = self.tcn(x) + res 122 | 123 | return self.relu(x) 124 | 125 | 126 | class StreamSpatialTemporalGraph(nn.Module): 127 | """Spatial temporal graph convolutional networks. 128 | Args: 129 | - in_channels: (int) Number of input channels. 130 | - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class. 131 | - num_class: (int) Number of class outputs. If `None` return pooling features of 132 | the last st-gcn layer instead. 133 | - edge_importance_weighting: (bool) If `True`, adds a learnable importance 134 | weighting to the edges of the graph. 135 | - **kwargs: (optional) Other parameters for graph convolution units. 136 | Shape: 137 | - Input: :math:`(N, in_channels, T_{in}, V_{in})` 138 | - Output: :math:`(N, num_class)` where 139 | :math:`N` is a batch size, 140 | :math:`T_{in}` is a length of input sequence, 141 | :math:`V_{in}` is the number of graph nodes, 142 | or If num_class is `None`: `(N, out_channels)` 143 | :math:`out_channels` is number of out_channels of the last layer. 144 | """ 145 | def __init__(self, in_channels, graph_args, num_class=None, 146 | edge_importance_weighting=True, **kwargs): 147 | super().__init__() 148 | # Load graph. 149 | graph = Graph(**graph_args) 150 | A = torch.tensor(graph.A, dtype=torch.float32, requires_grad=False) 151 | self.register_buffer('A', A) 152 | 153 | # Networks. 154 | spatial_kernel_size = A.size(0) 155 | temporal_kernel_size = 9 156 | kernel_size = (temporal_kernel_size, spatial_kernel_size) 157 | kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'} 158 | 159 | self.data_bn = nn.BatchNorm1d(in_channels * A.size(1)) 160 | self.st_gcn_networks = nn.ModuleList(( 161 | st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0), 162 | st_gcn(64, 64, kernel_size, 1, **kwargs), 163 | st_gcn(64, 64, kernel_size, 1, **kwargs), 164 | st_gcn(64, 64, kernel_size, 1, **kwargs), 165 | st_gcn(64, 128, kernel_size, 2, **kwargs), 166 | st_gcn(128, 128, kernel_size, 1, **kwargs), 167 | st_gcn(128, 128, kernel_size, 1, **kwargs), 168 | st_gcn(128, 256, kernel_size, 2, **kwargs), 169 | st_gcn(256, 256, kernel_size, 1, **kwargs), 170 | st_gcn(256, 256, kernel_size, 1, **kwargs) 171 | )) 172 | 173 | # initialize parameters for edge importance weighting. 174 | if edge_importance_weighting: 175 | self.edge_importance = nn.ParameterList([ 176 | nn.Parameter(torch.ones(A.size())) 177 | for i in self.st_gcn_networks 178 | ]) 179 | else: 180 | self.edge_importance = [1] * len(self.st_gcn_networks) 181 | 182 | if num_class is not None: 183 | self.cls = nn.Conv2d(256, num_class, kernel_size=1) 184 | else: 185 | self.cls = lambda x: x 186 | 187 | def forward(self, x): 188 | # data normalization. 189 | N, C, T, V = x.size() 190 | x = x.permute(0, 3, 1, 2).contiguous() # (N, V, C, T) 191 | x = x.view(N, V * C, T) 192 | x = self.data_bn(x) 193 | x = x.view(N, V, C, T) 194 | x = x.permute(0, 2, 3, 1).contiguous() 195 | x = x.view(N, C, T, V) 196 | 197 | # forward. 198 | for gcn, importance in zip(self.st_gcn_networks, self.edge_importance): 199 | x = gcn(x, self.A * importance) 200 | 201 | x = F.avg_pool2d(x, x.size()[2:]) 202 | x = self.cls(x) 203 | x = x.view(x.size(0), -1) 204 | 205 | return x 206 | 207 | 208 | class TwoStreamSpatialTemporalGraph(nn.Module): 209 | """Two inputs spatial temporal graph convolutional networks. 210 | Args: 211 | - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class. 212 | - num_class: (int) Number of class outputs. 213 | - edge_importance_weighting: (bool) If `True`, adds a learnable importance 214 | weighting to the edges of the graph. 215 | - **kwargs: (optional) Other parameters for graph convolution units. 216 | Shape: 217 | - Input: :tuple of math:`((N, 3, T, V), (N, 2, T, V))` 218 | for points and motions stream where. 219 | :math:`N` is a batch size, 220 | :math:`in_channels` is data channels (3 is (x, y, score)), (2 is (mot_x, mot_y)) 221 | :math:`T` is a length of input sequence, 222 | :math:`V` is the number of graph nodes, 223 | - Output: :math:`(N, num_class)` 224 | """ 225 | def __init__(self, graph_args, num_class, edge_importance_weighting=True, 226 | **kwargs): 227 | super().__init__() 228 | self.pts_stream = StreamSpatialTemporalGraph(3, graph_args, None, 229 | edge_importance_weighting, 230 | **kwargs) 231 | self.mot_stream = StreamSpatialTemporalGraph(2, graph_args, None, 232 | edge_importance_weighting, 233 | **kwargs) 234 | 235 | self.fcn = nn.Linear(256 * 2, num_class) 236 | 237 | def forward(self, inputs): 238 | out1 = self.pts_stream(inputs[0]) 239 | out2 = self.mot_stream(inputs[1]) 240 | 241 | concat = torch.cat([out1, out2], dim=-1) 242 | out = self.fcn(concat) 243 | 244 | return torch.sigmoid(out) 245 | -------------------------------------------------------------------------------- /Actionsrecognition/Utils.py: -------------------------------------------------------------------------------- 1 | ### Reference from: https://github.com/yysijie/st-gcn/blob/master/net/utils/graph.py 2 | 3 | import os 4 | import torch 5 | import numpy as np 6 | 7 | 8 | class Graph: 9 | """The Graph to model the skeletons extracted by the Alpha-Pose. 10 | Args: 11 | - strategy: (string) must be one of the follow candidates 12 | - uniform: Uniform Labeling, 13 | - distance: Distance Partitioning, 14 | - spatial: Spatial Configuration, 15 | For more information, please refer to the section 'Partition Strategies' 16 | in our paper (https://arxiv.org/abs/1801.07455). 17 | - layout: (string) must be one of the follow candidates 18 | - coco_cut: Is COCO format but cut 4 joints (L-R ears, L-R eyes) out. 19 | - max_hop: (int) the maximal distance between two connected nodes. 20 | - dilation: (int) controls the spacing between the kernel points. 21 | """ 22 | def __init__(self, 23 | layout='coco_cut', 24 | strategy='uniform', 25 | max_hop=1, 26 | dilation=1): 27 | self.max_hop = max_hop 28 | self.dilation = dilation 29 | 30 | self.get_edge(layout) 31 | self.hop_dis = get_hop_distance(self.num_node, self.edge, max_hop) 32 | self.get_adjacency(strategy) 33 | 34 | def get_edge(self, layout): 35 | if layout == 'coco_cut': 36 | self.num_node = 14 37 | self_link = [(i, i) for i in range(self.num_node)] 38 | neighbor_link = [(6, 4), (4, 2), (2, 13), (13, 1), (5, 3), (3, 1), (12, 10), 39 | (10, 8), (8, 2), (11, 9), (9, 7), (7, 1), (13, 0)] 40 | self.edge = self_link + neighbor_link 41 | self.center = 13 42 | else: 43 | raise ValueError('This layout is not supported!') 44 | 45 | def get_adjacency(self, strategy): 46 | valid_hop = range(0, self.max_hop + 1, self.dilation) 47 | adjacency = np.zeros((self.num_node, self.num_node)) 48 | for hop in valid_hop: 49 | adjacency[self.hop_dis == hop] = 1 50 | normalize_adjacency = normalize_digraph(adjacency) 51 | 52 | if strategy == 'uniform': 53 | A = np.zeros((1, self.num_node, self.num_node)) 54 | A[0] = normalize_adjacency 55 | self.A = A 56 | elif strategy == 'distance': 57 | A = np.zeros((len(valid_hop), self.num_node, self.num_node)) 58 | for i, hop in enumerate(valid_hop): 59 | A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis == 60 | hop] 61 | self.A = A 62 | elif strategy == 'spatial': 63 | A = [] 64 | for hop in valid_hop: 65 | a_root = np.zeros((self.num_node, self.num_node)) 66 | a_close = np.zeros((self.num_node, self.num_node)) 67 | a_further = np.zeros((self.num_node, self.num_node)) 68 | for i in range(self.num_node): 69 | for j in range(self.num_node): 70 | if self.hop_dis[j, i] == hop: 71 | if self.hop_dis[j, self.center] == self.hop_dis[i, self.center]: 72 | a_root[j, i] = normalize_adjacency[j, i] 73 | elif self.hop_dis[j, self.center] > self.hop_dis[i, self.center]: 74 | a_close[j, i] = normalize_adjacency[j, i] 75 | else: 76 | a_further[j, i] = normalize_adjacency[j, i] 77 | if hop == 0: 78 | A.append(a_root) 79 | else: 80 | A.append(a_root + a_close) 81 | A.append(a_further) 82 | A = np.stack(A) 83 | self.A = A 84 | #self.A = np.swapaxes(np.swapaxes(A, 0, 1), 1, 2) 85 | else: 86 | raise ValueError("This strategy is not supported!") 87 | 88 | 89 | def get_hop_distance(num_node, edge, max_hop=1): 90 | A = np.zeros((num_node, num_node)) 91 | for i, j in edge: 92 | A[j, i] = 1 93 | A[i, j] = 1 94 | 95 | # compute hop steps 96 | hop_dis = np.zeros((num_node, num_node)) + np.inf 97 | transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)] 98 | arrive_mat = (np.stack(transfer_mat) > 0) 99 | for d in range(max_hop, -1, -1): 100 | hop_dis[arrive_mat[d]] = d 101 | return hop_dis 102 | 103 | 104 | def normalize_digraph(A): 105 | Dl = np.sum(A, 0) 106 | num_node = A.shape[0] 107 | Dn = np.zeros((num_node, num_node)) 108 | for i in range(num_node): 109 | if Dl[i] > 0: 110 | Dn[i, i] = Dl[i]**(-1) 111 | AD = np.dot(A, Dn) 112 | return AD 113 | 114 | 115 | def normalize_undigraph(A): 116 | Dl = np.sum(A, 0) 117 | num_node = A.shape[0] 118 | Dn = np.zeros((num_node, num_node)) 119 | for i in range(num_node): 120 | if Dl[i] > 0: 121 | Dn[i, i] = Dl[i]**(-0.5) 122 | DAD = np.dot(np.dot(Dn, A), Dn) 123 | return DAD 124 | -------------------------------------------------------------------------------- /Actionsrecognition/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import torch 4 | import pickle 5 | import numpy as np 6 | import torch.nn.functional as F 7 | from shutil import copyfile 8 | from tqdm import tqdm 9 | from torch.utils import data 10 | from torch.optim.adadelta import Adadelta 11 | from sklearn.model_selection import train_test_split 12 | 13 | from Actionsrecognition.Models import * 14 | from Visualizer import plot_graphs, plot_confusion_metrix 15 | 16 | 17 | save_folder = 'saved/TSSTG(pts+mot)-01(cf+hm-hm)' 18 | 19 | device = 'cuda' 20 | epochs = 30 21 | batch_size = 32 22 | 23 | # DATA FILES. 24 | # Should be in format of 25 | # inputs: (N_samples, time_steps, graph_node, channels), 26 | # labels: (N_samples, num_class) 27 | # and do some of normalizations on it. Default data create from: 28 | # Data.create_dataset_(1-3).py 29 | # where 30 | # time_steps: Number of frame input sequence, Default: 30 31 | # graph_node: Number of node in skeleton, Default: 14 32 | # channels: Inputs data (x, y and scores), Default: 3 33 | # num_class: Number of pose class to train, Default: 7 34 | 35 | data_files = ['../Data/Coffee_room_new-set(labelXscrw).pkl', 36 | '../Data/Home_new-set(labelXscrw).pkl'] 37 | class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down', 38 | 'Stand up', 'Sit down', 'Fall Down'] 39 | num_class = len(class_names) 40 | 41 | 42 | def load_dataset(data_files, batch_size, split_size=0): 43 | """Load data files into torch DataLoader with/without spliting train-test. 44 | """ 45 | features, labels = [], [] 46 | for fil in data_files: 47 | with open(fil, 'rb') as f: 48 | fts, lbs = pickle.load(f) 49 | features.append(fts) 50 | labels.append(lbs) 51 | del fts, lbs 52 | features = np.concatenate(features, axis=0) 53 | labels = np.concatenate(labels, axis=0) 54 | 55 | if split_size > 0: 56 | x_train, x_valid, y_train, y_valid = train_test_split(features, labels, test_size=split_size, 57 | random_state=9) 58 | train_set = data.TensorDataset(torch.tensor(x_train, dtype=torch.float32).permute(0, 3, 1, 2), 59 | torch.tensor(y_train, dtype=torch.float32)) 60 | valid_set = data.TensorDataset(torch.tensor(x_valid, dtype=torch.float32).permute(0, 3, 1, 2), 61 | torch.tensor(y_valid, dtype=torch.float32)) 62 | train_loader = data.DataLoader(train_set, batch_size, shuffle=True) 63 | valid_loader = data.DataLoader(valid_set, batch_size) 64 | else: 65 | train_set = data.TensorDataset(torch.tensor(features, dtype=torch.float32).permute(0, 3, 1, 2), 66 | torch.tensor(labels, dtype=torch.float32)) 67 | train_loader = data.DataLoader(train_set, batch_size, shuffle=True) 68 | valid_loader = None 69 | return train_loader, valid_loader 70 | 71 | 72 | def accuracy_batch(y_pred, y_true): 73 | return (y_pred.argmax(1) == y_true.argmax(1)).mean() 74 | 75 | 76 | def set_training(model, mode=True): 77 | for p in model.parameters(): 78 | p.requires_grad = mode 79 | model.train(mode) 80 | return model 81 | 82 | 83 | if __name__ == '__main__': 84 | save_folder = os.path.join(os.path.dirname(__file__), save_folder) 85 | if not os.path.exists(save_folder): 86 | os.makedirs(save_folder) 87 | 88 | # DATA. 89 | train_loader, _ = load_dataset(data_files[0:1], batch_size) 90 | valid_loader, train_loader_ = load_dataset(data_files[1:2], batch_size, 0.2) 91 | 92 | train_loader = data.DataLoader(data.ConcatDataset([train_loader.dataset, train_loader_.dataset]), 93 | batch_size, shuffle=True) 94 | dataloader = {'train': train_loader, 'valid': valid_loader} 95 | del train_loader_ 96 | 97 | # MODEL. 98 | graph_args = {'strategy': 'spatial'} 99 | model = TwoStreamSpatialTemporalGraph(graph_args, num_class).to(device) 100 | 101 | #optimizer = torch.optim.Adam(model.parameters(), lr=0.001) 102 | optimizer = Adadelta(model.parameters()) 103 | 104 | losser = torch.nn.BCELoss() 105 | 106 | # TRAINING. 107 | loss_list = {'train': [], 'valid': []} 108 | accu_list = {'train': [], 'valid': []} 109 | for e in range(epochs): 110 | print('Epoch {}/{}'.format(e, epochs - 1)) 111 | for phase in ['train', 'valid']: 112 | if phase == 'train': 113 | model = set_training(model, True) 114 | else: 115 | model = set_training(model, False) 116 | 117 | run_loss = 0.0 118 | run_accu = 0.0 119 | with tqdm(dataloader[phase], desc=phase) as iterator: 120 | for pts, lbs in iterator: 121 | # Create motion input by distance of points (x, y) of the same node 122 | # in two frames. 123 | mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :] 124 | 125 | mot = mot.to(device) 126 | pts = pts.to(device) 127 | lbs = lbs.to(device) 128 | 129 | # Forward. 130 | out = model((pts, mot)) 131 | loss = losser(out, lbs) 132 | 133 | if phase == 'train': 134 | # Backward. 135 | model.zero_grad() 136 | loss.backward() 137 | optimizer.step() 138 | 139 | run_loss += loss.item() 140 | accu = accuracy_batch(out.detach().cpu().numpy(), 141 | lbs.detach().cpu().numpy()) 142 | run_accu += accu 143 | 144 | iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format( 145 | loss.item(), accu)) 146 | iterator.update() 147 | #break 148 | loss_list[phase].append(run_loss / len(iterator)) 149 | accu_list[phase].append(run_accu / len(iterator)) 150 | #break 151 | 152 | print('Summary epoch:\n - Train loss: {:.4f}, accu: {:.4f}\n - Valid loss:' 153 | ' {:.4f}, accu: {:.4f}'.format(loss_list['train'][-1], accu_list['train'][-1], 154 | loss_list['valid'][-1], accu_list['valid'][-1])) 155 | 156 | # SAVE. 157 | torch.save(model.state_dict(), os.path.join(save_folder, 'tsstg-model.pth')) 158 | 159 | plot_graphs(list(loss_list.values()), list(loss_list.keys()), 160 | 'Last Train: {:.2f}, Valid: {:.2f}'.format( 161 | loss_list['train'][-1], loss_list['valid'][-1] 162 | ), 'Loss', xlim=[0, epochs], 163 | save=os.path.join(save_folder, 'loss_graph.png')) 164 | plot_graphs(list(accu_list.values()), list(accu_list.keys()), 165 | 'Last Train: {:.2f}, Valid: {:.2f}'.format( 166 | accu_list['train'][-1], accu_list['valid'][-1] 167 | ), 'Accu', xlim=[0, epochs], 168 | save=os.path.join(save_folder, 'accu_graph.png')) 169 | 170 | #break 171 | 172 | del train_loader, valid_loader 173 | 174 | model.load_state_dict(torch.load(os.path.join(save_folder, 'tsstg-model.pth'))) 175 | 176 | # EVALUATION. 177 | model = set_training(model, False) 178 | data_file = data_files[1] 179 | eval_loader, _ = load_dataset([data_file], 32) 180 | 181 | print('Evaluation.') 182 | run_loss = 0.0 183 | run_accu = 0.0 184 | y_preds = [] 185 | y_trues = [] 186 | with tqdm(eval_loader, desc='eval') as iterator: 187 | for pts, lbs in iterator: 188 | mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :] 189 | mot = mot.to(device) 190 | pts = pts.to(device) 191 | lbs = lbs.to(device) 192 | 193 | out = model((pts, mot)) 194 | loss = losser(out, lbs) 195 | 196 | run_loss += loss.item() 197 | accu = accuracy_batch(out.detach().cpu().numpy(), 198 | lbs.detach().cpu().numpy()) 199 | run_accu += accu 200 | 201 | y_preds.extend(out.argmax(1).detach().cpu().numpy()) 202 | y_trues.extend(lbs.argmax(1).cpu().numpy()) 203 | 204 | iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format( 205 | loss.item(), accu)) 206 | iterator.update() 207 | 208 | run_loss = run_loss / len(iterator) 209 | run_accu = run_accu / len(iterator) 210 | 211 | plot_confusion_metrix(y_trues, y_preds, class_names, 'Eval on: {}\nLoss: {:.4f}, Accu{:.4f}'.format( 212 | os.path.basename(data_file), run_loss, run_accu 213 | ), 'true', save=os.path.join(save_folder, '{}-confusion_matrix.png'.format( 214 | os.path.basename(data_file).split('.')[0]))) 215 | 216 | print('Eval Loss: {:.4f}, Accu: {:.4f}'.format(run_loss, run_accu)) 217 | -------------------------------------------------------------------------------- /App.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import time 4 | import torch 5 | import screeninfo 6 | import numpy as np 7 | import tkinter as tk 8 | import matplotlib.pyplot as plt 9 | from PIL import Image, ImageTk 10 | 11 | from Detection.Utils import ResizePadding 12 | from CameraLoader import CamLoader, CamLoader_Q 13 | from DetectorLoader import TinyYOLOv3_onecls 14 | 15 | from PoseEstimateLoader import SPPE_FastPose 16 | from fn import draw_single 17 | 18 | from Track.Tracker import Detection, Tracker 19 | from ActionsEstLoader import TSSTG 20 | 21 | import matplotlib 22 | matplotlib.use('TkAgg') 23 | import matplotlib.pyplot as plt 24 | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk 25 | 26 | 27 | def get_monitor_from_coord(x, y): # multiple monitor dealing. 28 | monitors = screeninfo.get_monitors() 29 | for m in reversed(monitors): 30 | if m.x <= x <= m.width + m.x and m.y <= y <= m.height + m.y: 31 | return m 32 | return monitors[0] 33 | 34 | 35 | class Models: 36 | def __init__(self): 37 | self.inp_dets = 416 38 | self.inp_pose = (256, 192) 39 | self.pose_backbone = 'resnet50' 40 | self.show_detected = True 41 | self.show_skeleton = True 42 | self.device = 'cuda' 43 | 44 | self.load_models() 45 | 46 | def load_models(self): 47 | self.detect_model = TinyYOLOv3_onecls(self.inp_dets, device=self.device) 48 | self.pose_model = SPPE_FastPose(self.pose_backbone, self.inp_pose[0], self.inp_pose[1], 49 | device=self.device) 50 | self.tracker = Tracker(30, n_init=3) 51 | self.action_model = TSSTG(device=self.device) 52 | 53 | def kpt2bbox(self, kpt, ex=20): 54 | return np.array((kpt[:, 0].min() - ex, kpt[:, 1].min() - ex, 55 | kpt[:, 0].max() + ex, kpt[:, 1].max() + ex)) 56 | 57 | def process_frame(self, frame): 58 | detected = self.detect_model.detect(frame, need_resize=False, expand_bb=10) 59 | 60 | self.tracker.predict() 61 | for track in self.tracker.tracks: 62 | det = torch.tensor([track.to_tlbr().tolist() + [1.0, 1.0, 0.0]], dtype=torch.float32) 63 | detected = torch.cat([detected, det], dim=0) if detected is not None else det 64 | 65 | detections = [] 66 | if detected is not None: 67 | poses = self.pose_model.predict(frame, detected[:, 0:4], detected[:, 4]) 68 | detections = [Detection(self.kpt2bbox(ps['keypoints'].numpy()), 69 | np.concatenate((ps['keypoints'].numpy(), 70 | ps['kp_score'].numpy()), axis=1), 71 | ps['kp_score'].mean().numpy()) for ps in poses] 72 | if self.show_detected: 73 | for bb in detected[:, 0:5]: 74 | frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 0, 255), 1) 75 | 76 | self.tracker.update(detections) 77 | for i, track in enumerate(self.tracker.tracks): 78 | if not track.is_confirmed(): 79 | continue 80 | track_id = track.track_id 81 | bbox = track.to_tlbr().astype(int) 82 | center = track.get_center().astype(int) 83 | 84 | action = 'pending..' 85 | clr = (0, 255, 0) 86 | if len(track.keypoints_list) == 30: 87 | pts = np.array(track.keypoints_list, dtype=np.float32) 88 | out = self.action_model.predict(pts, frame.shape[:2]) 89 | action_name = self.action_model.class_names[out[0].argmax()] 90 | action = '{}: {:.2f}%'.format(action_name, out[0].max() * 100) 91 | if action_name == 'Fall Down': 92 | clr = (255, 0, 0) 93 | elif action_name == 'Lying Down': 94 | clr = (255, 200, 0) 95 | 96 | track.actions = out 97 | 98 | if track.time_since_update == 0: 99 | if self.show_skeleton: 100 | frame = draw_single(frame, track.keypoints_list[-1]) 101 | frame = cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 1) 102 | frame = cv2.putText(frame, str(track_id), (center[0], center[1]), cv2.FONT_HERSHEY_DUPLEX, 103 | 0.4, (255, 0, 0), 2) 104 | frame = cv2.putText(frame, action, (bbox[0] + 5, bbox[1] + 15), cv2.FONT_HERSHEY_COMPLEX, 105 | 0.4, clr, 1) 106 | 107 | return frame 108 | 109 | 110 | class main: 111 | def __init__(self, master: tk.Tk): 112 | self.master = master 113 | self.master.title('Human Falling Detection') 114 | self.master.protocol('WM_DELETE_WINDOW', self._on_closing) 115 | self.main_screen = get_monitor_from_coord(master.winfo_x(), master.winfo_y()) 116 | 117 | self.width = int(self.main_screen.width * .85) 118 | self.height = int(self.main_screen.height * .85) 119 | self.master.geometry('{}x{}'.format(self.width, self.height + 15)) 120 | 121 | self.cam = None 122 | self.canvas = tk.Canvas(master, width=int(self.width * .65), height=self.height) 123 | self.canvas.grid(row=0, column=0, padx=5, pady=5, sticky=tk.NSEW) 124 | 125 | fig = plt.Figure(figsize=(6, 8), dpi=100) 126 | fig.suptitle('Actions') 127 | self.ax = fig.add_subplot(111) 128 | self.fig_canvas = FigureCanvasTkAgg(fig, self.master) 129 | self.fig_canvas.get_tk_widget().grid(row=0, column=1, padx=5, pady=5, sticky=tk.NSEW) 130 | 131 | # Load Models 132 | self.resize_fn = ResizePadding(416, 416) 133 | self.models = Models() 134 | 135 | self.actions_graph() 136 | 137 | self.delay = 15 138 | self.load_cam('../Data/falldata/Home/Videos/video (1).avi') 139 | self.update() 140 | 141 | def preproc(self, image): 142 | image = self.resize_fn(image) 143 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 144 | return image 145 | 146 | def load_cam(self, source): 147 | if self.cam: 148 | self.cam.__del__() 149 | 150 | if type(source) is str and os.path.isfile(source): 151 | self.cam = CamLoader_Q(source, queue_size=1000, preprocess=self.preproc).start() 152 | else: 153 | self.cam = CamLoader(source, preprocess=self.preproc).start() 154 | 155 | def actions_graph(self): 156 | if len(self.models.tracker.tracks) == 0: 157 | return 158 | track = self.models.tracker.tracks[0] 159 | if hasattr(track, 'actions'): 160 | y_labels = self.models.action_model.class_names 161 | self.ax.barh(np.arange(len(y_labels)), track.actions) 162 | self.fig_canvas.draw() 163 | 164 | def update(self): 165 | if self.cam is None: 166 | return 167 | if self.cam.grabbed(): 168 | frame = self.cam.getitem() 169 | 170 | frame = self.models.process_frame(frame) 171 | 172 | frame = cv2.resize(frame, (self.canvas.winfo_width(), self.canvas.winfo_height()), 173 | interpolation=cv2.INTER_CUBIC) 174 | self.photo = ImageTk.PhotoImage(image=Image.fromarray(frame)) 175 | self.canvas.create_image(0, 0, image=self.photo, anchor=tk.NW) 176 | else: 177 | self.cam.stop() 178 | 179 | self._cam = self.master.after(self.delay, self.update) 180 | 181 | def _on_closing(self): 182 | self.master.after_cancel(self._cam) 183 | if self.cam: 184 | self.cam.stop() 185 | self.cam.__del__() 186 | self.master.destroy() 187 | 188 | 189 | root = tk.Tk() 190 | app = main(root) 191 | root.mainloop() 192 | -------------------------------------------------------------------------------- /CameraLoader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import time 4 | import torch 5 | import numpy as np 6 | 7 | from queue import Queue 8 | from threading import Thread, Lock 9 | 10 | 11 | class CamLoader: 12 | """Use threading to capture a frame from camera for faster frame load. 13 | Recommend for camera or webcam. 14 | 15 | Args: 16 | camera: (int, str) Source of camera or video., 17 | preprocess: (Callable function) to process the frame before return. 18 | """ 19 | def __init__(self, camera, preprocess=None, ori_return=False): 20 | self.stream = cv2.VideoCapture(camera) 21 | assert self.stream.isOpened(), 'Cannot read camera source!' 22 | self.fps = self.stream.get(cv2.CAP_PROP_FPS) 23 | self.frame_size = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)), 24 | int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) 25 | 26 | self.stopped = False 27 | self.ret = False 28 | self.frame = None 29 | self.ori_frame = None 30 | self.read_lock = Lock() 31 | self.ori = ori_return 32 | 33 | self.preprocess_fn = preprocess 34 | 35 | def start(self): 36 | self.t = Thread(target=self.update, args=()) # , daemon=True) 37 | self.t.start() 38 | c = 0 39 | while not self.ret: 40 | time.sleep(0.1) 41 | c += 1 42 | if c > 20: 43 | self.stop() 44 | raise TimeoutError('Can not get a frame from camera!!!') 45 | return self 46 | 47 | def update(self): 48 | while not self.stopped: 49 | ret, frame = self.stream.read() 50 | self.read_lock.acquire() 51 | self.ori_frame = frame.copy() 52 | if ret and self.preprocess_fn is not None: 53 | frame = self.preprocess_fn(frame) 54 | 55 | self.ret, self.frame = ret, frame 56 | self.read_lock.release() 57 | 58 | def grabbed(self): 59 | """Return `True` if can read a frame.""" 60 | return self.ret 61 | 62 | def getitem(self): 63 | self.read_lock.acquire() 64 | frame = self.frame.copy() 65 | ori_frame = self.ori_frame.copy() 66 | self.read_lock.release() 67 | if self.ori: 68 | return frame, ori_frame 69 | else: 70 | return frame 71 | 72 | def stop(self): 73 | if self.stopped: 74 | return 75 | self.stopped = True 76 | if self.t.is_alive(): 77 | self.t.join() 78 | self.stream.release() 79 | 80 | def __del__(self): 81 | if self.stream.isOpened(): 82 | self.stream.release() 83 | 84 | def __exit__(self, exc_type, exc_val, exc_tb): 85 | if self.stream.isOpened(): 86 | self.stream.release() 87 | 88 | 89 | class CamLoader_Q: 90 | """Use threading and queue to capture a frame and store to queue for pickup in sequence. 91 | Recommend for video file. 92 | 93 | Args: 94 | camera: (int, str) Source of camera or video., 95 | batch_size: (int) Number of batch frame to store in queue. Default: 1, 96 | queue_size: (int) Maximum queue size. Default: 256, 97 | preprocess: (Callable function) to process the frame before return. 98 | """ 99 | def __init__(self, camera, batch_size=1, queue_size=256, preprocess=None): 100 | self.stream = cv2.VideoCapture(camera) 101 | assert self.stream.isOpened(), 'Cannot read camera source!' 102 | self.fps = self.stream.get(cv2.CAP_PROP_FPS) 103 | self.frame_size = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)), 104 | int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) 105 | 106 | # Queue for storing each frames. 107 | 108 | self.stopped = False 109 | self.batch_size = batch_size 110 | self.Q = Queue(maxsize=queue_size) 111 | 112 | self.preprocess_fn = preprocess 113 | 114 | def start(self): 115 | t = Thread(target=self.update, args=(), daemon=True).start() 116 | c = 0 117 | while not self.grabbed(): 118 | time.sleep(0.1) 119 | c += 1 120 | if c > 20: 121 | self.stop() 122 | raise TimeoutError('Can not get a frame from camera!!!') 123 | return self 124 | 125 | def update(self): 126 | while not self.stopped: 127 | if not self.Q.full(): 128 | frames = [] 129 | for k in range(self.batch_size): 130 | ret, frame = self.stream.read() 131 | if not ret: 132 | self.stop() 133 | return 134 | 135 | if self.preprocess_fn is not None: 136 | frame = self.preprocess_fn(frame) 137 | 138 | frames.append(frame) 139 | frames = np.stack(frames) 140 | self.Q.put(frames) 141 | else: 142 | with self.Q.mutex: 143 | self.Q.queue.clear() 144 | # time.sleep(0.05) 145 | 146 | def grabbed(self): 147 | """Return `True` if can read a frame.""" 148 | return self.Q.qsize() > 0 149 | 150 | def getitem(self): 151 | return self.Q.get().squeeze() 152 | 153 | def stop(self): 154 | if self.stopped: 155 | return 156 | self.stopped = True 157 | self.stream.release() 158 | 159 | def __len__(self): 160 | return self.Q.qsize() 161 | 162 | def __del__(self): 163 | if self.stream.isOpened(): 164 | self.stream.release() 165 | 166 | def __exit__(self, exc_type, exc_val, exc_tb): 167 | if self.stream.isOpened(): 168 | self.stream.release() 169 | 170 | 171 | if __name__ == '__main__': 172 | fps_time = 0 173 | 174 | # Using threading. 175 | cam = CamLoader(0).start() 176 | while cam.grabbed(): 177 | frames = cam.getitem() 178 | 179 | frames = cv2.putText(frames, 'FPS: %f' % (1.0 / (time.time() - fps_time)), 180 | (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) 181 | fps_time = time.time() 182 | cv2.imshow('frame', frames) 183 | 184 | if cv2.waitKey(1) & 0xFF == ord('q'): 185 | break 186 | cam.stop() 187 | cv2.destroyAllWindows() 188 | 189 | # Normal video capture. 190 | """cam = cv2.VideoCapture(0) 191 | while True: 192 | ret, frame = cam.read() 193 | if ret: 194 | #time.sleep(0.05) 195 | #frame = (cv2.flip(frame, 1) / 255.).astype(np.float) 196 | 197 | frame = cv2.putText(frame, 'FPS: %f' % (1.0 / (time.time() - fps_time)), 198 | (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) 199 | fps_time = time.time() 200 | cv2.imshow('frame', frame) 201 | if cv2.waitKey(1) & 0xFF == ord('q'): 202 | break 203 | cam.release() 204 | cv2.destroyAllWindows()""" 205 | -------------------------------------------------------------------------------- /Data/create_dataset_1.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script to create .csv videos frames action annotation file. 3 | 4 | - It will play a video frame by frame control the flow by [a] and [d] 5 | to play previos or next frame. 6 | - Open the annot_file (.csv) and label each frame of video with number 7 | of action class. 8 | """ 9 | 10 | import os 11 | import cv2 12 | import time 13 | import numpy as np 14 | import pandas as pd 15 | import matplotlib.pyplot as plt 16 | 17 | class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down', 18 | 'Stand up', 'Sit down', 'Fall Down'] # label. 19 | 20 | video_folder = '../Data/falldata/Home/Videos' 21 | annot_file = '../Data/Home_new.csv' 22 | 23 | index_video_to_play = 0 # Choose video to play. 24 | 25 | 26 | def create_csv(folder): 27 | list_file = sorted(os.listdir(folder)) 28 | cols = ['video', 'frame', 'label'] 29 | df = pd.DataFrame(columns=cols) 30 | for fil in list_file: 31 | cap = cv2.VideoCapture(os.path.join(folder, fil)) 32 | frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 33 | video = np.array([fil] * frames_count) 34 | frame = np.arange(1, frames_count + 1) 35 | label = np.array([0] * frames_count) 36 | rows = np.stack([video, frame, label], axis=1) 37 | df = df.append(pd.DataFrame(rows, columns=cols), 38 | ignore_index=True) 39 | cap.release() 40 | df.to_csv(annot_file, index=False) 41 | 42 | 43 | if not os.path.exists(annot_file): 44 | create_csv(video_folder) 45 | 46 | annot = pd.read_csv(annot_file) 47 | video_list = annot.iloc[:, 0].unique() 48 | video_file = os.path.join(video_folder, video_list[index_video_to_play]) 49 | print(os.path.basename(video_file)) 50 | 51 | annot = annot[annot['video'] == video_list[index_video_to_play]].reset_index(drop=True) 52 | frames_idx = annot.iloc[:, 1].tolist() 53 | 54 | cap = cv2.VideoCapture(video_file) 55 | frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 56 | 57 | assert frames_count == len(frames_idx), 'frame count not equal! {} and {}'.format( 58 | len(frames_idx), frames_count 59 | ) 60 | 61 | i = 0 62 | while True: 63 | cap.set(cv2.CAP_PROP_POS_FRAMES, i) 64 | ret, frame = cap.read() 65 | if ret: 66 | cls_name = class_names[int(annot.iloc[i, -1]) - 1] 67 | frame = cv2.resize(frame, (0, 0), fx=1.5, fy=1.5) 68 | frame = cv2.putText(frame, 'Frame: {} Pose: {}'.format(i+1, cls_name), 69 | (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) 70 | cv2.imshow('frame', frame) 71 | 72 | key = cv2.waitKey(0) & 0xFF 73 | if key == ord('q'): 74 | break 75 | elif key == ord('d'): 76 | i += 1 77 | continue 78 | elif key == ord('a'): 79 | i -= 1 80 | continue 81 | else: 82 | break 83 | 84 | cap.release() 85 | cv2.destroyAllWindows() 86 | -------------------------------------------------------------------------------- /Data/create_dataset_2.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script to extract skeleton joints position and score. 3 | 4 | - This 'annot_folder' is a action class and bounding box for each frames that came with dataset. 5 | Should be in format of [frame_idx, action_cls, xmin, ymin, xmax, ymax] 6 | Use for crop a person to use in pose estimation model. 7 | - If have no annotation file you can leave annot_folder = '' for use Detector model to get the 8 | bounding box. 9 | """ 10 | 11 | import os 12 | import cv2 13 | import time 14 | import torch 15 | import pandas as pd 16 | import numpy as np 17 | import torchvision.transforms as transforms 18 | 19 | from DetectorLoader import TinyYOLOv3_onecls 20 | from PoseEstimateLoader import SPPE_FastPose 21 | from fn import vis_frame_fast 22 | 23 | save_path = '../../Data/Home_new-pose+score.csv' 24 | 25 | annot_file = '../../Data/Home_new.csv' # from create_dataset_1.py 26 | video_folder = '../Data/falldata/Home/Videos' 27 | annot_folder = '../Data/falldata/Home/Annotation_files' # bounding box annotation for each frame. 28 | 29 | # DETECTION MODEL. 30 | detector = TinyYOLOv3_onecls() 31 | 32 | # POSE MODEL. 33 | inp_h = 320 34 | inp_w = 256 35 | pose_estimator = SPPE_FastPose(inp_h, inp_w) 36 | 37 | # with score. 38 | columns = ['video', 'frame', 'Nose_x', 'Nose_y', 'Nose_s', 'LShoulder_x', 'LShoulder_y', 'LShoulder_s', 39 | 'RShoulder_x', 'RShoulder_y', 'RShoulder_s', 'LElbow_x', 'LElbow_y', 'LElbow_s', 'RElbow_x', 40 | 'RElbow_y', 'RElbow_s', 'LWrist_x', 'LWrist_y', 'LWrist_s', 'RWrist_x', 'RWrist_y', 'RWrist_s', 41 | 'LHip_x', 'LHip_y', 'LHip_s', 'RHip_x', 'RHip_y', 'RHip_s', 'LKnee_x', 'LKnee_y', 'LKnee_s', 42 | 'RKnee_x', 'RKnee_y', 'RKnee_s', 'LAnkle_x', 'LAnkle_y', 'LAnkle_s', 'RAnkle_x', 'RAnkle_y', 43 | 'RAnkle_s', 'label'] 44 | 45 | 46 | def normalize_points_with_size(points_xy, width, height, flip=False): 47 | points_xy[:, 0] /= width 48 | points_xy[:, 1] /= height 49 | if flip: 50 | points_xy[:, 0] = 1 - points_xy[:, 0] 51 | return points_xy 52 | 53 | 54 | annot = pd.read_csv(annot_file) 55 | vid_list = annot['video'].unique() 56 | for vid in vid_list: 57 | print(f'Process on: {vid}') 58 | df = pd.DataFrame(columns=columns) 59 | cur_row = 0 60 | 61 | # Pose Labels. 62 | frames_label = annot[annot['video'] == vid].reset_index(drop=True) 63 | 64 | cap = cv2.VideoCapture(os.path.join(video_folder, vid)) 65 | frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 66 | frame_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), 67 | int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) 68 | 69 | # Bounding Boxs Labels. 70 | annot_file = os.path.join(annot_folder, vid.split('.')[0], '.txt') 71 | annot = None 72 | if os.path.exists(annot_file): 73 | annot = pd.read_csv(annot_file, header=None, 74 | names=['frame_idx', 'class', 'xmin', 'ymin', 'xmax', 'ymax']) 75 | annot = annot.dropna().reset_index(drop=True) 76 | 77 | assert frames_count == len(annot), 'frame count not equal! {} and {}'.format(frames_count, len(annot)) 78 | 79 | fps_time = 0 80 | i = 1 81 | while True: 82 | ret, frame = cap.read() 83 | if ret: 84 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 85 | cls_idx = int(frames_label[frames_label['frame'] == i]['label']) 86 | 87 | if annot: 88 | bb = np.array(annot.iloc[i-1, 2:].astype(int)) 89 | else: 90 | bb = detector.detect(frame)[0, :4].numpy().astype(int) 91 | bb[:2] = np.maximum(0, bb[:2] - 5) 92 | bb[2:] = np.minimum(frame_size, bb[2:] + 5) if bb[2:].any() != 0 else bb[2:] 93 | 94 | result = [] 95 | if bb.any() != 0: 96 | result = pose_estimator.predict(frame, torch.tensor(bb[None, ...]), 97 | torch.tensor([[1.0]])) 98 | 99 | if len(result) > 0: 100 | pt_norm = normalize_points_with_size(result[0]['keypoints'].numpy().copy(), 101 | frame_size[0], frame_size[1]) 102 | pt_norm = np.concatenate((pt_norm, result[0]['kp_score']), axis=1) 103 | 104 | #idx = result[0]['kp_score'] <= 0.05 105 | #pt_norm[idx.squeeze()] = np.nan 106 | row = [vid, i, *pt_norm.flatten().tolist(), cls_idx] 107 | scr = result[0]['kp_score'].mean() 108 | else: 109 | row = [vid, i, *[np.nan] * (13 * 3), cls_idx] 110 | scr = 0.0 111 | 112 | df.loc[cur_row] = row 113 | cur_row += 1 114 | 115 | # VISUALIZE. 116 | frame = vis_frame_fast(frame, result) 117 | frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2) 118 | frame = cv2.putText(frame, 'Frame: {}, Pose: {}, Score: {:.4f}'.format(i, cls_idx, scr), 119 | (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) 120 | frame = frame[:, :, ::-1] 121 | fps_time = time.time() 122 | i += 1 123 | 124 | cv2.imshow('frame', frame) 125 | if cv2.waitKey(1) & 0xFF == ord('q'): 126 | break 127 | else: 128 | break 129 | 130 | cap.release() 131 | cv2.destroyAllWindows() 132 | 133 | if os.path.exists(save_path): 134 | df.to_csv(save_path, mode='a', header=False, index=False) 135 | else: 136 | df.to_csv(save_path, mode='w', index=False) 137 | 138 | -------------------------------------------------------------------------------- /Data/create_dataset_3.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script to create dataset and labels by clean off some NaN, do a normalization, 3 | label smoothing and label weights by scores. 4 | 5 | """ 6 | import os 7 | import pickle 8 | import numpy as np 9 | import pandas as pd 10 | 11 | 12 | class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down', 13 | 'Stand up', 'Sit down', 'Fall Down'] 14 | main_parts = ['LShoulder_x', 'LShoulder_y', 'RShoulder_x', 'RShoulder_y', 'LHip_x', 'LHip_y', 15 | 'RHip_x', 'RHip_y'] 16 | main_idx_parts = [1, 2, 7, 8, -1] # 1.5 17 | 18 | csv_pose_file = '../Data/Coffee_room_new-pose+score.csv' 19 | save_path = '../../Data/Coffee_room_new-set(labelXscrw).pkl' 20 | 21 | # Params. 22 | smooth_labels_step = 8 23 | n_frames = 30 24 | skip_frame = 1 25 | 26 | annot = pd.read_csv(csv_pose_file) 27 | 28 | # Remove NaN. 29 | idx = annot.iloc[:, 2:-1][main_parts].isna().sum(1) > 0 30 | idx = np.where(idx)[0] 31 | annot = annot.drop(idx) 32 | # One-Hot Labels. 33 | label_onehot = pd.get_dummies(annot['label']) 34 | annot = annot.drop('label', axis=1).join(label_onehot) 35 | cols = label_onehot.columns.values 36 | 37 | 38 | def scale_pose(xy): 39 | """ 40 | Normalize pose points by scale with max/min value of each pose. 41 | xy : (frames, parts, xy) or (parts, xy) 42 | """ 43 | if xy.ndim == 2: 44 | xy = np.expand_dims(xy, 0) 45 | xy_min = np.nanmin(xy, axis=1) 46 | xy_max = np.nanmax(xy, axis=1) 47 | for i in range(xy.shape[0]): 48 | xy[i] = ((xy[i] - xy_min[i]) / (xy_max[i] - xy_min[i])) * 2 - 1 49 | return xy.squeeze() 50 | 51 | 52 | def seq_label_smoothing(labels, max_step=10): 53 | steps = 0 54 | remain_step = 0 55 | target_label = 0 56 | active_label = 0 57 | start_change = 0 58 | max_val = np.max(labels) 59 | min_val = np.min(labels) 60 | for i in range(labels.shape[0]): 61 | if remain_step > 0: 62 | if i >= start_change: 63 | labels[i][active_label] = max_val * remain_step / steps 64 | labels[i][target_label] = max_val * (steps - remain_step) / steps \ 65 | if max_val * (steps - remain_step) / steps else min_val 66 | remain_step -= 1 67 | continue 68 | 69 | diff_index = np.where(np.argmax(labels[i:i+max_step], axis=1) - np.argmax(labels[i]) != 0)[0] 70 | if len(diff_index) > 0: 71 | start_change = i + remain_step // 2 72 | steps = diff_index[0] 73 | remain_step = steps 74 | target_label = np.argmax(labels[i + remain_step]) 75 | active_label = np.argmax(labels[i]) 76 | return labels 77 | 78 | 79 | feature_set = np.empty((0, n_frames, 14, 3)) 80 | labels_set = np.empty((0, len(cols))) 81 | vid_list = annot['video'].unique() 82 | for vid in vid_list: 83 | print(f'Process on: {vid}') 84 | data = annot[annot['video'] == vid].reset_index(drop=True).drop(columns='video') 85 | 86 | # Label Smoothing. 87 | esp = 0.1 88 | data[cols] = data[cols] * (1 - esp) + (1 - data[cols]) * esp / (len(cols) - 1) 89 | data[cols] = seq_label_smoothing(data[cols].values, smooth_labels_step) 90 | 91 | # Separate continuous frames. 92 | frames = data['frame'].values 93 | frames_set = [] 94 | fs = [0] 95 | for i in range(1, len(frames)): 96 | if frames[i] < frames[i-1] + 10: 97 | fs.append(i) 98 | else: 99 | frames_set.append(fs) 100 | fs = [i] 101 | frames_set.append(fs) 102 | 103 | for fs in frames_set: 104 | xys = data.iloc[fs, 1:-len(cols)].values.reshape(-1, 13, 3) 105 | # Scale pose normalize. 106 | xys[:, :, :2] = scale_pose(xys[:, :, :2]) 107 | # Add center point. 108 | xys = np.concatenate((xys, np.expand_dims((xys[:, 1, :] + xys[:, 2, :]) / 2, 1)), axis=1) 109 | 110 | # Weighting main parts score. 111 | scr = xys[:, :, -1].copy() 112 | scr[:, main_idx_parts] = np.minimum(scr[:, main_idx_parts] * 1.5, 1.0) 113 | # Mean score. 114 | scr = scr.mean(1) 115 | 116 | # Targets. 117 | lb = data.iloc[fs, -len(cols):].values 118 | # Apply points score mean to all labels. 119 | lb = lb * scr[:, None] 120 | 121 | for i in range(xys.shape[0] - n_frames): 122 | feature_set = np.append(feature_set, xys[i:i+n_frames][None, ...], axis=0) 123 | labels_set = np.append(labels_set, lb[i:i+n_frames].mean(0)[None, ...], axis=0) 124 | 125 | 126 | """with open(save_path, 'wb') as f: 127 | pickle.dump((feature_set, labels_set), f)""" 128 | -------------------------------------------------------------------------------- /Detection/Utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import math 3 | import time 4 | import tqdm 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import numpy as np 9 | from torch.utils.data import DataLoader 10 | 11 | 12 | def to_cpu(tensor): 13 | return tensor.detach().cpu() 14 | 15 | 16 | def load_classes(path): 17 | """ 18 | Loads class labels at 'path' 19 | """ 20 | fp = open(path, "r") 21 | names = fp.read().split("\n")[:-1] 22 | return names 23 | 24 | 25 | def weights_init_normal(m): 26 | classname = m.__class__.__name__ 27 | if classname.find("Conv") != -1: 28 | torch.nn.init.normal_(m.weight.data, 0.0, 0.02) 29 | elif classname.find("BatchNorm2d") != -1: 30 | torch.nn.init.normal_(m.weight.data, 1.0, 0.02) 31 | torch.nn.init.constant_(m.bias.data, 0.0) 32 | 33 | 34 | def rescale_boxes(boxes, current_dim, original_shape): 35 | """ Rescales bounding boxes to the original shape """ 36 | orig_h, orig_w = original_shape 37 | # The amount of padding that was added 38 | pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape)) 39 | pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape)) 40 | # Image height and width after padding is removed 41 | unpad_h = current_dim - pad_y 42 | unpad_w = current_dim - pad_x 43 | # Rescale bounding boxes to dimension of original image 44 | boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w 45 | boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h 46 | boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w 47 | boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h 48 | return boxes 49 | 50 | 51 | def xywh2xyxy(x): 52 | y = x.new(x.shape) 53 | y[..., 0] = x[..., 0] - x[..., 2] / 2 54 | y[..., 1] = x[..., 1] - x[..., 3] / 2 55 | y[..., 2] = x[..., 0] + x[..., 2] / 2 56 | y[..., 3] = x[..., 1] + x[..., 3] / 2 57 | return y 58 | 59 | 60 | def ap_per_class(tp, conf, pred_cls, target_cls): 61 | """ Compute the average precision, given the recall and precision curves. 62 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. 63 | # Arguments 64 | tp: True positives (list). 65 | conf: Objectness value from 0-1 (list). 66 | pred_cls: Predicted object classes (list). 67 | target_cls: True object classes (list). 68 | # Returns 69 | The average precision as computed in py-faster-rcnn. 70 | """ 71 | # Sort by objectness 72 | i = np.argsort(-conf) 73 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 74 | 75 | # Find unique classes 76 | unique_classes = np.unique(target_cls) 77 | 78 | # Create Precision-Recall curve and compute AP for each class 79 | ap, p, r = [], [], [] 80 | for c in tqdm.tqdm(unique_classes, desc="Computing AP"): 81 | i = pred_cls == c 82 | n_gt = (target_cls == c).sum() # Number of ground truth objects 83 | n_p = i.sum() # Number of predicted objects 84 | 85 | if n_p == 0 and n_gt == 0: 86 | continue 87 | elif n_p == 0 or n_gt == 0: 88 | ap.append(0) 89 | r.append(0) 90 | p.append(0) 91 | else: 92 | # Accumulate FPs and TPs 93 | fpc = (1 - tp[i]).cumsum() 94 | tpc = (tp[i]).cumsum() 95 | 96 | # Recall 97 | recall_curve = tpc / (n_gt + 1e-16) 98 | r.append(recall_curve[-1]) 99 | 100 | # Precision 101 | precision_curve = tpc / (tpc + fpc) 102 | p.append(precision_curve[-1]) 103 | 104 | # AP from recall-precision curve 105 | ap.append(compute_ap(recall_curve, precision_curve)) 106 | 107 | # Compute F1 score (harmonic mean of precision and recall) 108 | p, r, ap = np.array(p), np.array(r), np.array(ap) 109 | f1 = 2 * p * r / (p + r + 1e-16) 110 | 111 | return p, r, ap, f1, unique_classes.astype("int32") 112 | 113 | 114 | def compute_ap(recall, precision): 115 | """ Compute the average precision, given the recall and precision curves. 116 | Code originally from https://github.com/rbgirshick/py-faster-rcnn. 117 | # Arguments 118 | recall: The recall curve (list). 119 | precision: The precision curve (list). 120 | # Returns 121 | The average precision as computed in py-faster-rcnn. 122 | """ 123 | # correct AP calculation 124 | # first append sentinel values at the end 125 | mrec = np.concatenate(([0.0], recall, [1.0])) 126 | mpre = np.concatenate(([0.0], precision, [0.0])) 127 | 128 | # compute the precision envelope 129 | for i in range(mpre.size - 1, 0, -1): 130 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 131 | 132 | # to calculate area under PR curve, look for points 133 | # where X axis (recall) changes value 134 | i = np.where(mrec[1:] != mrec[:-1])[0] 135 | 136 | # and sum (\Delta recall) * prec 137 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 138 | return ap 139 | 140 | 141 | def get_batch_statistics(outputs, targets, iou_threshold): 142 | """ Compute true positives, predicted scores and predicted labels per sample """ 143 | batch_metrics = [] 144 | for sample_i in range(len(outputs)): 145 | 146 | if outputs[sample_i] is None: 147 | continue 148 | 149 | output = outputs[sample_i] 150 | pred_boxes = output[:, :4] 151 | pred_scores = output[:, 4] 152 | pred_labels = output[:, -1] 153 | 154 | true_positives = np.zeros(pred_boxes.shape[0]) 155 | 156 | annotations = targets[targets[:, 0] == sample_i][:, 1:] 157 | target_labels = annotations[:, 0] if len(annotations) else [] 158 | if len(annotations): 159 | detected_boxes = [] 160 | target_boxes = annotations[:, 1:] 161 | 162 | for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)): 163 | 164 | # If targets are found break 165 | if len(detected_boxes) == len(annotations): 166 | break 167 | 168 | # Ignore if label is not one of the target labels 169 | if pred_label not in target_labels: 170 | continue 171 | 172 | iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0) 173 | if iou >= iou_threshold and box_index not in detected_boxes: 174 | true_positives[pred_i] = 1 175 | detected_boxes += [box_index] 176 | batch_metrics.append([true_positives, pred_scores, pred_labels]) 177 | return batch_metrics 178 | 179 | 180 | def bbox_wh_iou(wh1, wh2): 181 | wh2 = wh2.t() 182 | w1, h1 = wh1[0], wh1[1] 183 | w2, h2 = wh2[0], wh2[1] 184 | inter_area = torch.min(w1, w2) * torch.min(h1, h2) 185 | union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area 186 | return inter_area / union_area 187 | 188 | 189 | def bbox_iou(box1, box2, x1y1x2y2=True): 190 | """ 191 | Returns the IoU of two bounding boxes 192 | """ 193 | if not x1y1x2y2: 194 | # Transform from center and width to exact coordinates 195 | b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 196 | b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 197 | b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 198 | b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 199 | else: 200 | # Get the coordinates of bounding boxes 201 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] 202 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] 203 | 204 | # get the corrdinates of the intersection rectangle 205 | inter_rect_x1 = torch.max(b1_x1, b2_x1) 206 | inter_rect_y1 = torch.max(b1_y1, b2_y1) 207 | inter_rect_x2 = torch.min(b1_x2, b2_x2) 208 | inter_rect_y2 = torch.min(b1_y2, b2_y2) 209 | # Intersection area 210 | inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp( 211 | inter_rect_y2 - inter_rect_y1 + 1, min=0 212 | ) 213 | # Union Area 214 | b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) 215 | b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) 216 | 217 | iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) 218 | 219 | return iou 220 | 221 | 222 | def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4): 223 | """ 224 | Removes detections with lower object confidence score than 'conf_thres' and performs 225 | Non-Maximum Suppression to further filter detections. 226 | Returns detections with shape: 227 | (x1, y1, x2, y2, object_conf, class_score, class_pred) 228 | """ 229 | # From (center x, center y, width, height) to (x1, y1, x2, y2) 230 | prediction[..., :4] = xywh2xyxy(prediction[..., :4]) 231 | output = [None for _ in range(len(prediction))] 232 | for image_i, image_pred in enumerate(prediction): 233 | # Filter out confidence scores below threshold 234 | image_pred = image_pred[image_pred[:, 4] >= conf_thres] 235 | # If none are remaining => process next image 236 | if not image_pred.size(0): 237 | continue 238 | # Object confidence times class confidence 239 | score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0] 240 | # Sort by it 241 | image_pred = image_pred[(-score).argsort()] 242 | class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True) 243 | detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1) 244 | # Perform non-maximum suppression 245 | keep_boxes = [] 246 | while detections.size(0): 247 | large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres 248 | label_match = detections[0, -1] == detections[:, -1] 249 | # Indices of boxes with lower confidence scores, large IOUs and matching labels 250 | invalid = large_overlap & label_match 251 | weights = detections[invalid, 4:5] 252 | # Merge overlapping bboxes by order of confidence 253 | detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum() 254 | keep_boxes += [detections[0]] 255 | detections = detections[~invalid] 256 | if keep_boxes: 257 | output[image_i] = torch.stack(keep_boxes) 258 | 259 | return output 260 | 261 | 262 | def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres): 263 | ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor 264 | FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor 265 | 266 | nB = pred_boxes.size(0) 267 | nA = pred_boxes.size(1) 268 | nC = pred_cls.size(-1) 269 | nG = pred_boxes.size(2) 270 | 271 | # Output tensors 272 | obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0) 273 | noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1) 274 | class_mask = FloatTensor(nB, nA, nG, nG).fill_(0) 275 | iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0) 276 | tx = FloatTensor(nB, nA, nG, nG).fill_(0) 277 | ty = FloatTensor(nB, nA, nG, nG).fill_(0) 278 | tw = FloatTensor(nB, nA, nG, nG).fill_(0) 279 | th = FloatTensor(nB, nA, nG, nG).fill_(0) 280 | tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0) 281 | 282 | # Convert to position relative to box 283 | target_boxes = target[:, 2:6] * nG 284 | gxy = target_boxes[:, :2] 285 | gwh = target_boxes[:, 2:] 286 | # Get anchors with best iou 287 | ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors]) 288 | best_ious, best_n = ious.max(0) 289 | # Separate target values 290 | b, target_labels = target[:, :2].long().t() 291 | gx, gy = gxy.t() 292 | gw, gh = gwh.t() 293 | gi, gj = gxy.long().t() 294 | # Set masks 295 | obj_mask[b, best_n, gj, gi] = 1 296 | noobj_mask[b, best_n, gj, gi] = 0 297 | 298 | # Set noobj mask to zero where iou exceeds ignore threshold 299 | for i, anchor_ious in enumerate(ious.t()): 300 | noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0 301 | 302 | # Coordinates 303 | tx[b, best_n, gj, gi] = gx - gx.floor() 304 | ty[b, best_n, gj, gi] = gy - gy.floor() 305 | # Width and height 306 | tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16) 307 | th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16) 308 | # One-hot encoding of label 309 | tcls[b, best_n, gj, gi, target_labels] = 1 310 | # Compute label correctness and iou at best anchor 311 | class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float() 312 | iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False) 313 | 314 | tconf = obj_mask.float() 315 | return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf 316 | 317 | 318 | def parse_model_config(path): 319 | """Parses the yolo-v3 layer configuration file and returns module definitions""" 320 | file = open(path, 'r') 321 | lines = file.read().split('\n') 322 | lines = [x for x in lines if x and not x.startswith('#')] 323 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 324 | module_defs = [] 325 | for line in lines: 326 | if line.startswith('['): # This marks the start of a new block 327 | module_defs.append({}) 328 | module_defs[-1]['type'] = line[1:-1].rstrip() 329 | if module_defs[-1]['type'] == 'convolutional': 330 | module_defs[-1]['batch_normalize'] = 0 331 | else: 332 | key, value = line.split("=") 333 | value = value.strip() 334 | module_defs[-1][key.rstrip()] = value.strip() 335 | 336 | return module_defs 337 | 338 | 339 | def parse_data_config(path): 340 | """Parses the data configuration file""" 341 | options = dict() 342 | options['gpus'] = '0,1,2,3' 343 | options['num_workers'] = '10' 344 | with open(path, 'r') as fp: 345 | lines = fp.readlines() 346 | for line in lines: 347 | line = line.strip() 348 | if line == '' or line.startswith('#'): 349 | continue 350 | key, value = line.split('=') 351 | options[key.strip()] = value.strip() 352 | return options 353 | 354 | 355 | def ResizePadding(height, width): 356 | desized_size = (height, width) 357 | 358 | def resizePadding(image, **kwargs): 359 | old_size = image.shape[:2] 360 | max_size_idx = old_size.index(max(old_size)) 361 | ratio = float(desized_size[max_size_idx]) / max(old_size) 362 | new_size = tuple([int(x * ratio) for x in old_size]) 363 | 364 | if new_size > desized_size: 365 | min_size_idx = old_size.index(min(old_size)) 366 | ratio = float(desized_size[min_size_idx]) / min(old_size) 367 | new_size = tuple([int(x * ratio) for x in old_size]) 368 | 369 | image = cv2.resize(image, (new_size[1], new_size[0])) 370 | delta_w = desized_size[1] - new_size[1] 371 | delta_h = desized_size[0] - new_size[0] 372 | top, bottom = delta_h // 2, delta_h - (delta_h // 2) 373 | left, right = delta_w // 2, delta_w - (delta_w // 2) 374 | 375 | image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT) 376 | return image 377 | return resizePadding 378 | 379 | 380 | class AverageValueMeter(object): 381 | def __init__(self): 382 | self.reset() 383 | self.val = 0 384 | 385 | def add(self, value, n=1): 386 | self.val = value 387 | self.sum += value 388 | self.var += value * value 389 | self.n += n 390 | 391 | if self.n == 0: 392 | self.mean, self.std = np.nan, np.nan 393 | elif self.n == 1: 394 | self.mean = 0.0 + self.sum # This is to force a copy in torch/numpy 395 | self.std = np.inf 396 | self.mean_old = self.mean 397 | self.m_s = 0.0 398 | else: 399 | self.mean = self.mean_old + (value - n * self.mean_old) / float(self.n) 400 | self.m_s += (value - self.mean_old) * (value - self.mean) 401 | self.mean_old = self.mean 402 | self.std = np.sqrt(self.m_s / (self.n - 1.0)) 403 | 404 | def value(self): 405 | return self.mean, self.std 406 | 407 | def reset(self): 408 | self.n = 0 409 | self.sum = 0.0 410 | self.var = 0.0 411 | self.val = 0.0 412 | self.mean = np.nan 413 | self.mean_old = 0.0 414 | self.m_s = 0.0 415 | self.std = np.nan 416 | -------------------------------------------------------------------------------- /DetectorLoader.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | import numpy as np 4 | import torchvision.transforms as transforms 5 | 6 | from queue import Queue 7 | from threading import Thread 8 | 9 | from Detection.Models import Darknet 10 | from Detection.Utils import non_max_suppression, ResizePadding 11 | 12 | 13 | class TinyYOLOv3_onecls(object): 14 | """Load trained Tiny-YOLOv3 one class (person) detection model. 15 | Args: 16 | input_size: (int) Size of input image must be divisible by 32. Default: 416, 17 | config_file: (str) Path to Yolo model structure config file., 18 | weight_file: (str) Path to trained weights file., 19 | nms: (float) Non-Maximum Suppression overlap threshold., 20 | conf_thres: (float) Minimum Confidence threshold of predicted bboxs to cut off., 21 | device: (str) Device to load the model on 'cpu' or 'cuda'. 22 | """ 23 | def __init__(self, 24 | input_size=416, 25 | config_file='Models/yolo-tiny-onecls/yolov3-tiny-onecls.cfg', 26 | weight_file='Models/yolo-tiny-onecls/best-model.pth', 27 | nms=0.2, 28 | conf_thres=0.45, 29 | device='cuda'): 30 | self.input_size = input_size 31 | self.model = Darknet(config_file).to(device) 32 | self.model.load_state_dict(torch.load(weight_file)) 33 | self.model.eval() 34 | self.device = device 35 | 36 | self.nms = nms 37 | self.conf_thres = conf_thres 38 | 39 | self.resize_fn = ResizePadding(input_size, input_size) 40 | self.transf_fn = transforms.ToTensor() 41 | 42 | def detect(self, image, need_resize=True, expand_bb=5): 43 | """Feed forward to the model. 44 | Args: 45 | image: (numpy array) Single RGB image to detect., 46 | need_resize: (bool) Resize to input_size before feed and will return bboxs 47 | with scale to image original size., 48 | expand_bb: (int) Expand boundary of the boxs. 49 | Returns: 50 | (torch.float32) Of each detected object contain a 51 | [top, left, bottom, right, bbox_score, class_score, class] 52 | return `None` if no detected. 53 | """ 54 | image_size = (self.input_size, self.input_size) 55 | if need_resize: 56 | image_size = image.shape[:2] 57 | image = self.resize_fn(image) 58 | 59 | image = self.transf_fn(image)[None, ...] 60 | scf = torch.min(self.input_size / torch.FloatTensor([image_size]), 1)[0] 61 | 62 | detected = self.model(image.to(self.device)) 63 | detected = non_max_suppression(detected, self.conf_thres, self.nms)[0] 64 | if detected is not None: 65 | detected[:, [0, 2]] -= (self.input_size - scf * image_size[1]) / 2 66 | detected[:, [1, 3]] -= (self.input_size - scf * image_size[0]) / 2 67 | detected[:, 0:4] /= scf 68 | 69 | detected[:, 0:2] = np.maximum(0, detected[:, 0:2] - expand_bb) 70 | detected[:, 2:4] = np.minimum(image_size[::-1], detected[:, 2:4] + expand_bb) 71 | 72 | return detected 73 | 74 | 75 | class ThreadDetection(object): 76 | def __init__(self, 77 | dataloader, 78 | model, 79 | queue_size=256): 80 | self.model = model 81 | 82 | self.dataloader = dataloader 83 | self.stopped = False 84 | self.Q = Queue(maxsize=queue_size) 85 | 86 | def start(self): 87 | t = Thread(target=self.update, args=(), daemon=True).start() 88 | return self 89 | 90 | def update(self): 91 | while True: 92 | if self.stopped: 93 | return 94 | 95 | images = self.dataloader.getitem() 96 | 97 | outputs = self.model.detect(images) 98 | 99 | if self.Q.full(): 100 | time.sleep(2) 101 | self.Q.put((images, outputs)) 102 | 103 | def getitem(self): 104 | return self.Q.get() 105 | 106 | def stop(self): 107 | self.stopped = True 108 | 109 | def __len__(self): 110 | return self.Q.qsize() 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /Models/TSSTG/_.txt: -------------------------------------------------------------------------------- 1 | tsstg-model.pth -------------------------------------------------------------------------------- /Models/sppe/_.txt: -------------------------------------------------------------------------------- 1 | fast_res50_256x192.pth 2 | fast_res101_320x256.pth -------------------------------------------------------------------------------- /Models/yolo-tiny-onecls/_.txt: -------------------------------------------------------------------------------- 1 | best-model.pth 2 | yolov3-tiny-onecls.cfg -------------------------------------------------------------------------------- /PoseEstimateLoader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import torch 4 | 5 | from SPPE.src.main_fast_inference import InferenNet_fast, InferenNet_fastRes50 6 | from SPPE.src.utils.img import crop_dets 7 | from pPose_nms import pose_nms 8 | from SPPE.src.utils.eval import getPrediction 9 | 10 | 11 | class SPPE_FastPose(object): 12 | def __init__(self, 13 | backbone, 14 | input_height=320, 15 | input_width=256, 16 | device='cuda'): 17 | assert backbone in ['resnet50', 'resnet101'], '{} backbone is not support yet!'.format(backbone) 18 | 19 | self.inp_h = input_height 20 | self.inp_w = input_width 21 | self.device = device 22 | 23 | if backbone == 'resnet101': 24 | self.model = InferenNet_fast().to(device) 25 | else: 26 | self.model = InferenNet_fastRes50().to(device) 27 | self.model.eval() 28 | 29 | def predict(self, image, bboxs, bboxs_scores): 30 | inps, pt1, pt2 = crop_dets(image, bboxs, self.inp_h, self.inp_w) 31 | pose_hm = self.model(inps.to(self.device)).cpu().data 32 | 33 | # Cut eyes and ears. 34 | pose_hm = torch.cat([pose_hm[:, :1, ...], pose_hm[:, 5:, ...]], dim=1) 35 | 36 | xy_hm, xy_img, scores = getPrediction(pose_hm, pt1, pt2, self.inp_h, self.inp_w, 37 | pose_hm.shape[-2], pose_hm.shape[-1]) 38 | result = pose_nms(bboxs, bboxs_scores, xy_img, scores) 39 | return result -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

Human Falling Detection and Tracking

2 | 3 | Using Tiny-YOLO oneclass to detect each person in the frame and use 4 | [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) to get skeleton-pose and then use 5 | [ST-GCN](https://github.com/yysijie/st-gcn) model to predict action from every 30 frames 6 | of each person tracks. 7 | 8 | Which now support 7 actions: Standing, Walking, Sitting, Lying Down, Stand up, Sit down, Fall Down. 9 | 10 |
11 | 12 |
13 | 14 | ## Prerequisites 15 | 16 | - Python > 3.6 17 | - Pytorch > 1.3.1 18 | 19 | Original test run on: i7-8750H CPU @ 2.20GHz x12, GeForce RTX 2070 8GB, CUDA 10.2 20 | 21 | ## Data 22 | 23 | This project has trained a new Tiny-YOLO oneclass model to detect only person objects and to reducing 24 | model size. Train with rotation augmented [COCO](http://cocodataset.org/#home) person keypoints dataset 25 | for more robust person detection in a variant of angle pose. 26 | 27 | For actions recognition used data from [Le2i](http://le2i.cnrs.fr/Fall-detection-Dataset?lang=fr) 28 | Fall detection Dataset (Coffee room, Home) extract skeleton-pose by AlphaPose and labeled each action 29 | frames by hand for training ST-GCN model. 30 | 31 | ## Pre-Trained Models 32 | 33 | - Tiny-YOLO oneclass - [.pth](https://drive.google.com/file/d/1obEbWBSm9bXeg10FriJ7R2cGLRsg-AfP/view?usp=sharing), 34 | [.cfg](https://drive.google.com/file/d/19sPzBZjAjuJQ3emRteHybm2SG25w9Wn5/view?usp=sharing) 35 | - SPPE FastPose (AlphaPose) - [resnet101](https://drive.google.com/file/d/1N2MgE1Esq6CKYA6FyZVKpPwHRyOCrzA0/view?usp=sharing), 36 | [resnet50](https://drive.google.com/file/d/1IPfCDRwCmQDnQy94nT1V-_NVtTEi4VmU/view?usp=sharing) 37 | - ST-GCN action recognition - [tsstg](https://drive.google.com/file/d/1mQQ4JHe58ylKbBqTjuKzpwN2nwKOWJ9u/view?usp=sharing) 38 | 39 | ## Basic Use 40 | 41 | 1. Download all pre-trained models into ./Models folder. 42 | 2. Run main.py 43 | ``` 44 | python main.py ${video file or camera source} 45 | ``` 46 | 47 | ## Reference 48 | 49 | - AlphaPose : https://github.com/Amanbhandula/AlphaPose 50 | - ST-GCN : https://github.com/yysijie/st-gcn -------------------------------------------------------------------------------- /SPPE/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Jeff-sjtu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /SPPE/README.md: -------------------------------------------------------------------------------- 1 | # pytorch-AlphaPose from: https://github.com/Amanbhandula/AlphaPose 2 | -------------------------------------------------------------------------------- /SPPE/src/main_fast_inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.utils.data 4 | import torch.utils.data.distributed 5 | import torch.nn.functional as F 6 | import numpy as np 7 | from SPPE.src.utils.img import flip, shuffleLR 8 | from SPPE.src.utils.eval import getPrediction 9 | from SPPE.src.models.FastPose import FastPose 10 | 11 | import time 12 | import sys 13 | 14 | import torch._utils 15 | try: 16 | torch._utils._rebuild_tensor_v2 17 | except AttributeError: 18 | def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks): 19 | tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride) 20 | tensor.requires_grad = requires_grad 21 | tensor._backward_hooks = backward_hooks 22 | return tensor 23 | torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2 24 | 25 | 26 | class InferenNet(nn.Module): 27 | def __init__(self, dataset, weights_file='./Models/sppe/fast_res101_320x256.pth'): 28 | super().__init__() 29 | 30 | self.pyranet = FastPose('resnet101').cuda() 31 | print('Loading pose model from {}'.format(weights_file)) 32 | sys.stdout.flush() 33 | self.pyranet.load_state_dict(torch.load(weights_file)) 34 | self.pyranet.eval() 35 | self.pyranet = model 36 | 37 | self.dataset = dataset 38 | 39 | def forward(self, x): 40 | out = self.pyranet(x) 41 | out = out.narrow(1, 0, 17) 42 | 43 | flip_out = self.pyranet(flip(x)) 44 | flip_out = flip_out.narrow(1, 0, 17) 45 | 46 | flip_out = flip(shuffleLR( 47 | flip_out, self.dataset)) 48 | 49 | out = (flip_out + out) / 2 50 | 51 | return out 52 | 53 | 54 | class InferenNet_fast(nn.Module): 55 | def __init__(self, weights_file='./Models/sppe/fast_res101_320x256.pth'): 56 | super().__init__() 57 | 58 | self.pyranet = FastPose('resnet101').cuda() 59 | print('Loading pose model from {}'.format(weights_file)) 60 | self.pyranet.load_state_dict(torch.load(weights_file)) 61 | self.pyranet.eval() 62 | 63 | def forward(self, x): 64 | out = self.pyranet(x) 65 | out = out.narrow(1, 0, 17) 66 | 67 | return out 68 | 69 | 70 | class InferenNet_fastRes50(nn.Module): 71 | def __init__(self, weights_file='./Models/sppe/fast_res50_256x192.pth'): 72 | super().__init__() 73 | 74 | self.pyranet = FastPose('resnet50', 17).cuda() 75 | print('Loading pose model from {}'.format(weights_file)) 76 | self.pyranet.load_state_dict(torch.load(weights_file)) 77 | self.pyranet.eval() 78 | 79 | def forward(self, x): 80 | out = self.pyranet(x) 81 | 82 | return out 83 | -------------------------------------------------------------------------------- /SPPE/src/models/FastPose.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Variable 3 | 4 | from .layers.SE_Resnet import SEResnet 5 | from .layers.DUC import DUC 6 | from SPPE.src.opt import opt 7 | 8 | 9 | class FastPose(nn.Module): 10 | DIM = 128 11 | 12 | def __init__(self, backbone='resnet101', num_join=opt.nClasses): 13 | super(FastPose, self).__init__() 14 | assert backbone in ['resnet50', 'resnet101'] 15 | 16 | self.preact = SEResnet(backbone) 17 | 18 | self.suffle1 = nn.PixelShuffle(2) 19 | self.duc1 = DUC(512, 1024, upscale_factor=2) 20 | self.duc2 = DUC(256, 512, upscale_factor=2) 21 | 22 | self.conv_out = nn.Conv2d( 23 | self.DIM, num_join, kernel_size=3, stride=1, padding=1) 24 | 25 | def forward(self, x: Variable): 26 | out = self.preact(x) 27 | out = self.suffle1(out) 28 | out = self.duc1(out) 29 | out = self.duc2(out) 30 | 31 | out = self.conv_out(out) 32 | return out 33 | -------------------------------------------------------------------------------- /SPPE/src/models/__init__.py: -------------------------------------------------------------------------------- 1 | from . import * -------------------------------------------------------------------------------- /SPPE/src/models/hg-prm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from .layers.PRM import Residual as ResidualPyramid 3 | from .layers.Residual import Residual as Residual 4 | from torch.autograd import Variable 5 | from SPPE.src.opt import opt 6 | from collections import defaultdict 7 | 8 | 9 | class Hourglass(nn.Module): 10 | def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C): 11 | super(Hourglass, self).__init__() 12 | 13 | self.ResidualUp = ResidualPyramid if n >= 2 else Residual 14 | self.ResidualDown = ResidualPyramid if n >= 3 else Residual 15 | 16 | self.depth = n 17 | self.nModules = nModules 18 | self.nFeats = nFeats 19 | self.net_type = net_type 20 | self.B = B 21 | self.C = C 22 | self.inputResH = inputResH 23 | self.inputResW = inputResW 24 | 25 | self.up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW) 26 | self.low1 = nn.Sequential( 27 | nn.MaxPool2d(2), 28 | self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2) 29 | ) 30 | if n > 1: 31 | self.low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C) 32 | else: 33 | self.low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2) 34 | 35 | self.low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2) 36 | self.up2 = nn.UpsamplingNearest2d(scale_factor=2) 37 | 38 | self.upperBranch = self.up1 39 | self.lowerBranch = nn.Sequential( 40 | self.low1, 41 | self.low2, 42 | self.low3, 43 | self.up2 44 | ) 45 | 46 | def _make_residual(self, resBlock, useConv, inputResH, inputResW): 47 | layer_list = [] 48 | for i in range(self.nModules): 49 | layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW, 50 | stride=1, net_type=self.net_type, useConv=useConv, 51 | baseWidth=self.B, cardinality=self.C)) 52 | return nn.Sequential(*layer_list) 53 | 54 | def forward(self, x: Variable): 55 | up1 = self.upperBranch(x) 56 | up2 = self.lowerBranch(x) 57 | out = up1 + up2 58 | return out 59 | 60 | 61 | class PyraNet(nn.Module): 62 | def __init__(self): 63 | super(PyraNet, self).__init__() 64 | 65 | B, C = opt.baseWidth, opt.cardinality 66 | self.inputResH = opt.inputResH / 4 67 | self.inputResW = opt.inputResW / 4 68 | self.nStack = opt.nStack 69 | 70 | self.cnv1 = nn.Sequential( 71 | nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3), 72 | nn.BatchNorm2d(64), 73 | nn.ReLU(True) 74 | ) 75 | self.r1 = nn.Sequential( 76 | ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2, 77 | stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C), 78 | nn.MaxPool2d(2) 79 | ) 80 | self.r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW, 81 | stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C) 82 | self.r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW, 83 | stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C) 84 | self.preact = nn.Sequential( 85 | self.cnv1, 86 | self.r1, 87 | self.r4, 88 | self.r5 89 | ) 90 | self.stack_layers = defaultdict(list) 91 | for i in range(self.nStack): 92 | hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C) 93 | lin = nn.Sequential( 94 | hg, 95 | nn.BatchNorm2d(opt.nFeats), 96 | nn.ReLU(True), 97 | nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0), 98 | nn.BatchNorm2d(opt.nFeats), 99 | nn.ReLU(True) 100 | ) 101 | tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0) 102 | self.stack_layers['lin'].append(lin) 103 | self.stack_layers['out'].append(tmpOut) 104 | if i < self.nStack - 1: 105 | lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0) 106 | tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0) 107 | self.stack_layers['lin_'].append(lin_) 108 | self.stack_layers['out_'].append(tmpOut_) 109 | 110 | def forward(self, x: Variable): 111 | out = [] 112 | inter = self.preact(x) 113 | for i in range(self.nStack): 114 | lin = self.stack_layers['lin'][i](inter) 115 | tmpOut = self.stack_layers['out'][i](lin) 116 | out.append(tmpOut) 117 | if i < self.nStack - 1: 118 | lin_ = self.stack_layers['lin_'][i](lin) 119 | tmpOut_ = self.stack_layers['out_'][i](tmpOut) 120 | inter = inter + lin_ + tmpOut_ 121 | return out 122 | 123 | 124 | def createModel(**kw): 125 | model = PyraNet() 126 | return model 127 | -------------------------------------------------------------------------------- /SPPE/src/models/hgPRM.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from .layers.PRM import Residual as ResidualPyramid 3 | from .layers.Residual import Residual as Residual 4 | from torch.autograd import Variable 5 | import torch 6 | from SPPE.src.opt import opt 7 | import math 8 | 9 | 10 | class Hourglass(nn.Module): 11 | def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C): 12 | super(Hourglass, self).__init__() 13 | 14 | self.ResidualUp = ResidualPyramid if n >= 2 else Residual 15 | self.ResidualDown = ResidualPyramid if n >= 3 else Residual 16 | 17 | self.depth = n 18 | self.nModules = nModules 19 | self.nFeats = nFeats 20 | self.net_type = net_type 21 | self.B = B 22 | self.C = C 23 | self.inputResH = inputResH 24 | self.inputResW = inputResW 25 | 26 | up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW) 27 | low1 = nn.Sequential( 28 | nn.MaxPool2d(2), 29 | self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2) 30 | ) 31 | if n > 1: 32 | low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C) 33 | else: 34 | low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2) 35 | 36 | low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2) 37 | up2 = nn.UpsamplingNearest2d(scale_factor=2) 38 | 39 | self.upperBranch = up1 40 | self.lowerBranch = nn.Sequential( 41 | low1, 42 | low2, 43 | low3, 44 | up2 45 | ) 46 | 47 | def _make_residual(self, resBlock, useConv, inputResH, inputResW): 48 | layer_list = [] 49 | for i in range(self.nModules): 50 | layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW, 51 | stride=1, net_type=self.net_type, useConv=useConv, 52 | baseWidth=self.B, cardinality=self.C)) 53 | return nn.Sequential(*layer_list) 54 | 55 | def forward(self, x: Variable): 56 | up1 = self.upperBranch(x) 57 | up2 = self.lowerBranch(x) 58 | # out = up1 + up2 59 | out = torch.add(up1, up2) 60 | return out 61 | 62 | 63 | class PyraNet(nn.Module): 64 | def __init__(self): 65 | super(PyraNet, self).__init__() 66 | 67 | B, C = opt.baseWidth, opt.cardinality 68 | self.inputResH = opt.inputResH / 4 69 | self.inputResW = opt.inputResW / 4 70 | self.nStack = opt.nStack 71 | 72 | conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3) 73 | if opt.init: 74 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 3)) 75 | 76 | cnv1 = nn.Sequential( 77 | conv1, 78 | nn.BatchNorm2d(64), 79 | nn.ReLU(True) 80 | ) 81 | 82 | r1 = nn.Sequential( 83 | ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2, 84 | stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C), 85 | nn.MaxPool2d(2) 86 | ) 87 | r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW, 88 | stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C) 89 | r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW, 90 | stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C) 91 | self.preact = nn.Sequential( 92 | cnv1, 93 | r1, 94 | r4, 95 | r5 96 | ) 97 | 98 | self.stack_lin = nn.ModuleList() 99 | self.stack_out = nn.ModuleList() 100 | self.stack_lin_ = nn.ModuleList() 101 | self.stack_out_ = nn.ModuleList() 102 | 103 | for i in range(self.nStack): 104 | hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C) 105 | conv1 = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0) 106 | if opt.init: 107 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2)) 108 | lin = nn.Sequential( 109 | hg, 110 | nn.BatchNorm2d(opt.nFeats), 111 | nn.ReLU(True), 112 | conv1, 113 | nn.BatchNorm2d(opt.nFeats), 114 | nn.ReLU(True) 115 | ) 116 | tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0) 117 | if opt.init: 118 | nn.init.xavier_normal(tmpOut.weight) 119 | self.stack_lin.append(lin) 120 | self.stack_out.append(tmpOut) 121 | if i < self.nStack - 1: 122 | lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0) 123 | tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0) 124 | if opt.init: 125 | nn.init.xavier_normal(lin_.weight) 126 | nn.init.xavier_normal(tmpOut_.weight) 127 | self.stack_lin_.append(lin_) 128 | self.stack_out_.append(tmpOut_) 129 | 130 | def forward(self, x: Variable): 131 | out = [] 132 | inter = self.preact(x) 133 | for i in range(self.nStack): 134 | lin = self.stack_lin[i](inter) 135 | tmpOut = self.stack_out[i](lin) 136 | out.append(tmpOut) 137 | if i < self.nStack - 1: 138 | lin_ = self.stack_lin_[i](lin) 139 | tmpOut_ = self.stack_out_[i](tmpOut) 140 | inter = inter + lin_ + tmpOut_ 141 | return out 142 | 143 | 144 | class PyraNet_Inference(nn.Module): 145 | def __init__(self): 146 | super(PyraNet_Inference, self).__init__() 147 | 148 | B, C = opt.baseWidth, opt.cardinality 149 | self.inputResH = opt.inputResH / 4 150 | self.inputResW = opt.inputResW / 4 151 | self.nStack = opt.nStack 152 | 153 | conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3) 154 | if opt.init: 155 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 3)) 156 | 157 | cnv1 = nn.Sequential( 158 | conv1, 159 | nn.BatchNorm2d(64), 160 | nn.ReLU(True) 161 | ) 162 | 163 | r1 = nn.Sequential( 164 | ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2, 165 | stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C), 166 | nn.MaxPool2d(2) 167 | ) 168 | r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW, 169 | stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C) 170 | r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW, 171 | stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C) 172 | self.preact = nn.Sequential( 173 | cnv1, 174 | r1, 175 | r4, 176 | r5 177 | ) 178 | 179 | self.stack_lin = nn.ModuleList() 180 | self.stack_out = nn.ModuleList() 181 | self.stack_lin_ = nn.ModuleList() 182 | self.stack_out_ = nn.ModuleList() 183 | 184 | for i in range(self.nStack): 185 | hg = Hourglass(4, opt.nFeats, opt.nResidual, 186 | self.inputResH, self.inputResW, 'preact', B, C) 187 | conv1 = nn.Conv2d(opt.nFeats, opt.nFeats, 188 | kernel_size=1, stride=1, padding=0) 189 | if opt.init: 190 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2)) 191 | lin = nn.Sequential( 192 | hg, 193 | nn.BatchNorm2d(opt.nFeats), 194 | nn.ReLU(True), 195 | conv1, 196 | nn.BatchNorm2d(opt.nFeats), 197 | nn.ReLU(True) 198 | ) 199 | tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, 200 | kernel_size=1, stride=1, padding=0) 201 | if opt.init: 202 | nn.init.xavier_normal(tmpOut.weight) 203 | self.stack_lin.append(lin) 204 | self.stack_out.append(tmpOut) 205 | if i < self.nStack - 1: 206 | lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, 207 | kernel_size=1, stride=1, padding=0) 208 | tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, 209 | kernel_size=1, stride=1, padding=0) 210 | if opt.init: 211 | nn.init.xavier_normal(lin_.weight) 212 | nn.init.xavier_normal(tmpOut_.weight) 213 | self.stack_lin_.append(lin_) 214 | self.stack_out_.append(tmpOut_) 215 | 216 | def forward(self, x: Variable): 217 | inter = self.preact(x) 218 | for i in range(self.nStack): 219 | lin = self.stack_lin[i](inter) 220 | tmpOut = self.stack_out[i](lin) 221 | out = tmpOut 222 | if i < self.nStack - 1: 223 | lin_ = self.stack_lin_[i](lin) 224 | tmpOut_ = self.stack_out_[i](tmpOut) 225 | inter = inter + lin_ + tmpOut_ 226 | return out 227 | 228 | 229 | def createModel(**kw): 230 | model = PyraNet() 231 | return model 232 | 233 | 234 | def createModel_Inference(**kw): 235 | model = PyraNet_Inference() 236 | return model 237 | -------------------------------------------------------------------------------- /SPPE/src/models/layers/DUC.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | class DUC(nn.Module): 6 | """ 7 | INPUT: inplanes, planes, upscale_factor 8 | OUTPUT: (planes // 4)* ht * wd 9 | """ 10 | def __init__(self, inplanes, planes, upscale_factor=2): 11 | super(DUC, self).__init__() 12 | self.conv = nn.Conv2d(inplanes, planes, kernel_size=3, padding=1, bias=False) 13 | self.bn = nn.BatchNorm2d(planes) 14 | self.relu = nn.ReLU() 15 | 16 | self.pixel_shuffle = nn.PixelShuffle(upscale_factor) 17 | 18 | def forward(self, x): 19 | x = self.conv(x) 20 | x = self.bn(x) 21 | x = self.relu(x) 22 | x = self.pixel_shuffle(x) 23 | return x 24 | -------------------------------------------------------------------------------- /SPPE/src/models/layers/PRM.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from .util_models import ConcatTable, CaddTable, Identity 3 | import math 4 | from opt import opt 5 | 6 | 7 | class Residual(nn.Module): 8 | def __init__(self, numIn, numOut, inputResH, inputResW, stride=1, 9 | net_type='preact', useConv=False, baseWidth=9, cardinality=4): 10 | super(Residual, self).__init__() 11 | 12 | self.con = ConcatTable([convBlock(numIn, numOut, inputResH, 13 | inputResW, net_type, baseWidth, cardinality, stride), 14 | skipLayer(numIn, numOut, stride, useConv)]) 15 | self.cadd = CaddTable(True) 16 | 17 | def forward(self, x): 18 | out = self.con(x) 19 | out = self.cadd(out) 20 | return out 21 | 22 | 23 | def convBlock(numIn, numOut, inputResH, inputResW, net_type, baseWidth, cardinality, stride): 24 | numIn = int(numIn) 25 | numOut = int(numOut) 26 | 27 | addTable = ConcatTable() 28 | s_list = [] 29 | if net_type != 'no_preact': 30 | s_list.append(nn.BatchNorm2d(numIn)) 31 | s_list.append(nn.ReLU(True)) 32 | 33 | conv1 = nn.Conv2d(numIn, numOut // 2, kernel_size=1) 34 | if opt.init: 35 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2)) 36 | s_list.append(conv1) 37 | 38 | s_list.append(nn.BatchNorm2d(numOut // 2)) 39 | s_list.append(nn.ReLU(True)) 40 | 41 | conv2 = nn.Conv2d(numOut // 2, numOut // 2, 42 | kernel_size=3, stride=stride, padding=1) 43 | if opt.init: 44 | nn.init.xavier_normal(conv2.weight) 45 | s_list.append(conv2) 46 | 47 | s = nn.Sequential(*s_list) 48 | addTable.add(s) 49 | 50 | D = math.floor(numOut // baseWidth) 51 | C = cardinality 52 | s_list = [] 53 | 54 | if net_type != 'no_preact': 55 | s_list.append(nn.BatchNorm2d(numIn)) 56 | s_list.append(nn.ReLU(True)) 57 | 58 | conv1 = nn.Conv2d(numIn, D, kernel_size=1, stride=stride) 59 | if opt.init: 60 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / C)) 61 | 62 | s_list.append(conv1) 63 | s_list.append(nn.BatchNorm2d(D)) 64 | s_list.append(nn.ReLU(True)) 65 | s_list.append(pyramid(D, C, inputResH, inputResW)) 66 | s_list.append(nn.BatchNorm2d(D)) 67 | s_list.append(nn.ReLU(True)) 68 | 69 | a = nn.Conv2d(D, numOut // 2, kernel_size=1) 70 | a.nBranchIn = C 71 | if opt.init: 72 | nn.init.xavier_normal(a.weight, gain=math.sqrt(1 / C)) 73 | s_list.append(a) 74 | 75 | s = nn.Sequential(*s_list) 76 | addTable.add(s) 77 | 78 | elewiswAdd = nn.Sequential( 79 | addTable, 80 | CaddTable(False) 81 | ) 82 | conv2 = nn.Conv2d(numOut // 2, numOut, kernel_size=1) 83 | if opt.init: 84 | nn.init.xavier_normal(conv2.weight, gain=math.sqrt(1 / 2)) 85 | model = nn.Sequential( 86 | elewiswAdd, 87 | nn.BatchNorm2d(numOut // 2), 88 | nn.ReLU(True), 89 | conv2 90 | ) 91 | return model 92 | 93 | 94 | def pyramid(D, C, inputResH, inputResW): 95 | pyraTable = ConcatTable() 96 | sc = math.pow(2, 1 / C) 97 | for i in range(C): 98 | scaled = 1 / math.pow(sc, i + 1) 99 | conv1 = nn.Conv2d(D, D, kernel_size=3, stride=1, padding=1) 100 | if opt.init: 101 | nn.init.xavier_normal(conv1.weight) 102 | s = nn.Sequential( 103 | nn.FractionalMaxPool2d(2, output_ratio=(scaled, scaled)), 104 | conv1, 105 | nn.UpsamplingBilinear2d(size=(int(inputResH), int(inputResW)))) 106 | pyraTable.add(s) 107 | pyra = nn.Sequential( 108 | pyraTable, 109 | CaddTable(False) 110 | ) 111 | return pyra 112 | 113 | 114 | class skipLayer(nn.Module): 115 | def __init__(self, numIn, numOut, stride, useConv): 116 | super(skipLayer, self).__init__() 117 | self.identity = False 118 | 119 | if numIn == numOut and stride == 1 and not useConv: 120 | self.identity = True 121 | else: 122 | conv1 = nn.Conv2d(numIn, numOut, kernel_size=1, stride=stride) 123 | if opt.init: 124 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2)) 125 | self.m = nn.Sequential( 126 | nn.BatchNorm2d(numIn), 127 | nn.ReLU(True), 128 | conv1 129 | ) 130 | 131 | def forward(self, x): 132 | if self.identity: 133 | return x 134 | else: 135 | return self.m(x) 136 | -------------------------------------------------------------------------------- /SPPE/src/models/layers/Residual.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | from .util_models import ConcatTable, CaddTable, Identity 4 | from opt import opt 5 | 6 | 7 | def Residual(numIn, numOut, *arg, stride=1, net_type='preact', useConv=False, **kw): 8 | con = ConcatTable([convBlock(numIn, numOut, stride, net_type), 9 | skipLayer(numIn, numOut, stride, useConv)]) 10 | cadd = CaddTable(True) 11 | return nn.Sequential(con, cadd) 12 | 13 | 14 | def convBlock(numIn, numOut, stride, net_type): 15 | s_list = [] 16 | if net_type != 'no_preact': 17 | s_list.append(nn.BatchNorm2d(numIn)) 18 | s_list.append(nn.ReLU(True)) 19 | 20 | conv1 = nn.Conv2d(numIn, numOut // 2, kernel_size=1) 21 | if opt.init: 22 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2)) 23 | s_list.append(conv1) 24 | 25 | s_list.append(nn.BatchNorm2d(numOut // 2)) 26 | s_list.append(nn.ReLU(True)) 27 | 28 | conv2 = nn.Conv2d(numOut // 2, numOut // 2, kernel_size=3, stride=stride, padding=1) 29 | if opt.init: 30 | nn.init.xavier_normal(conv2.weight) 31 | s_list.append(conv2) 32 | s_list.append(nn.BatchNorm2d(numOut // 2)) 33 | s_list.append(nn.ReLU(True)) 34 | 35 | conv3 = nn.Conv2d(numOut // 2, numOut, kernel_size=1) 36 | if opt.init: 37 | nn.init.xavier_normal(conv3.weight) 38 | s_list.append(conv3) 39 | 40 | return nn.Sequential(*s_list) 41 | 42 | 43 | def skipLayer(numIn, numOut, stride, useConv): 44 | if numIn == numOut and stride == 1 and not useConv: 45 | return Identity() 46 | else: 47 | conv1 = nn.Conv2d(numIn, numOut, kernel_size=1, stride=stride) 48 | if opt.init: 49 | nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2)) 50 | return nn.Sequential( 51 | nn.BatchNorm2d(numIn), 52 | nn.ReLU(True), 53 | conv1 54 | ) 55 | -------------------------------------------------------------------------------- /SPPE/src/models/layers/Resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | class Bottleneck(nn.Module): 6 | expansion = 4 7 | 8 | def __init__(self, inplanes, planes, stride=1, downsample=None): 9 | super(Bottleneck, self).__init__() 10 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False) 11 | self.bn1 = nn.BatchNorm2d(planes) 12 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 13 | self.bn2 = nn.BatchNorm2d(planes) 14 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, stride=1, bias=False) 15 | self.bn3 = nn.BatchNorm2d(planes * 4) 16 | self.downsample = downsample 17 | self.stride = stride 18 | 19 | def forward(self, x): 20 | residual = x 21 | 22 | out = F.relu(self.bn1(self.conv1(x)), inplace=True) 23 | out = F.relu(self.bn2(self.conv2(out)), inplace=True) 24 | out = self.bn3(self.conv3(out)) 25 | 26 | if self.downsample is not None: 27 | residual = self.downsample(x) 28 | 29 | out += residual 30 | out = F.relu(out, inplace=True) 31 | 32 | return out 33 | 34 | 35 | class ResNet(nn.Module): 36 | """ Resnet """ 37 | def __init__(self, architecture): 38 | super(ResNet, self).__init__() 39 | assert architecture in ["resnet50", "resnet101"] 40 | self.inplanes = 64 41 | self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3] 42 | self.block = Bottleneck 43 | 44 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 45 | self.bn1 = nn.BatchNorm2d(64, eps=1e-5, momentum=0.01, affine=True) 46 | self.relu = nn.ReLU(inplace=True) 47 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2) 48 | 49 | self.layer1 = self.make_layer(self.block, 64, self.layers[0]) 50 | self.layer2 = self.make_layer(self.block, 128, self.layers[1], stride=2) 51 | self.layer3 = self.make_layer(self.block, 256, self.layers[2], stride=2) 52 | 53 | self.layer4 = self.make_layer( 54 | self.block, 512, self.layers[3], stride=2) 55 | 56 | def forward(self, x): 57 | x = self.maxpool(self.relu(self.bn1(self.conv1(x)))) 58 | x = self.layer1(x) 59 | x = self.layer2(x) 60 | x = self.layer3(x) 61 | x = self.layer4(x) 62 | return x 63 | 64 | def stages(self): 65 | return [self.layer1, self.layer2, self.layer3, self.layer4] 66 | 67 | def make_layer(self, block, planes, blocks, stride=1): 68 | downsample = None 69 | if stride != 1 or self.inplanes != planes * block.expansion: 70 | downsample = nn.Sequential( 71 | nn.Conv2d(self.inplanes, planes * block.expansion, 72 | kernel_size=1, stride=stride, bias=False), 73 | nn.BatchNorm2d(planes * block.expansion), 74 | ) 75 | 76 | layers = [] 77 | layers.append(block(self.inplanes, planes, stride, downsample)) 78 | self.inplanes = planes * block.expansion 79 | for i in range(1, blocks): 80 | layers.append(block(self.inplanes, planes)) 81 | 82 | return nn.Sequential(*layers) 83 | -------------------------------------------------------------------------------- /SPPE/src/models/layers/SE_Resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from .SE_module import SELayer 3 | import torch.nn.functional as F 4 | 5 | 6 | class Bottleneck(nn.Module): 7 | expansion = 4 8 | 9 | def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=False): 10 | super(Bottleneck, self).__init__() 11 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 12 | self.bn1 = nn.BatchNorm2d(planes) 13 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 14 | padding=1, bias=False) 15 | self.bn2 = nn.BatchNorm2d(planes) 16 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 17 | self.bn3 = nn.BatchNorm2d(planes * 4) 18 | if reduction: 19 | self.se = SELayer(planes * 4) 20 | 21 | self.reduc = reduction 22 | self.downsample = downsample 23 | self.stride = stride 24 | 25 | def forward(self, x): 26 | residual = x 27 | 28 | out = F.relu(self.bn1(self.conv1(x)), inplace=True) 29 | out = F.relu(self.bn2(self.conv2(out)), inplace=True) 30 | 31 | out = self.conv3(out) 32 | out = self.bn3(out) 33 | if self.reduc: 34 | out = self.se(out) 35 | 36 | if self.downsample is not None: 37 | residual = self.downsample(x) 38 | 39 | out += residual 40 | out = F.relu(out) 41 | 42 | return out 43 | 44 | 45 | class SEResnet(nn.Module): 46 | """ SEResnet """ 47 | 48 | def __init__(self, architecture): 49 | super(SEResnet, self).__init__() 50 | assert architecture in ["resnet50", "resnet101"] 51 | self.inplanes = 64 52 | self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3] 53 | self.block = Bottleneck 54 | 55 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, 56 | stride=2, padding=3, bias=False) 57 | self.bn1 = nn.BatchNorm2d(64, eps=1e-5, momentum=0.01, affine=True) 58 | self.relu = nn.ReLU(inplace=True) 59 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 60 | 61 | self.layer1 = self.make_layer(self.block, 64, self.layers[0]) 62 | self.layer2 = self.make_layer( 63 | self.block, 128, self.layers[1], stride=2) 64 | self.layer3 = self.make_layer( 65 | self.block, 256, self.layers[2], stride=2) 66 | 67 | self.layer4 = self.make_layer( 68 | self.block, 512, self.layers[3], stride=2) 69 | 70 | def forward(self, x): 71 | x = self.maxpool(self.relu(self.bn1(self.conv1(x)))) # 64 * h/4 * w/4 72 | x = self.layer1(x) # 256 * h/4 * w/4 73 | x = self.layer2(x) # 512 * h/8 * w/8 74 | x = self.layer3(x) # 1024 * h/16 * w/16 75 | x = self.layer4(x) # 2048 * h/32 * w/32 76 | return x 77 | 78 | def stages(self): 79 | return [self.layer1, self.layer2, self.layer3, self.layer4] 80 | 81 | def make_layer(self, block, planes, blocks, stride=1): 82 | downsample = None 83 | if stride != 1 or self.inplanes != planes * block.expansion: 84 | downsample = nn.Sequential( 85 | nn.Conv2d(self.inplanes, planes * block.expansion, 86 | kernel_size=1, stride=stride, bias=False), 87 | nn.BatchNorm2d(planes * block.expansion), 88 | ) 89 | 90 | layers = [] 91 | if downsample is not None: 92 | layers.append(block(self.inplanes, planes, stride, downsample, reduction=True)) 93 | else: 94 | layers.append(block(self.inplanes, planes, stride, downsample)) 95 | self.inplanes = planes * block.expansion 96 | for i in range(1, blocks): 97 | layers.append(block(self.inplanes, planes)) 98 | 99 | return nn.Sequential(*layers) 100 | -------------------------------------------------------------------------------- /SPPE/src/models/layers/SE_module.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class SELayer(nn.Module): 5 | def __init__(self, channel, reduction=1): 6 | super(SELayer, self).__init__() 7 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 8 | self.fc = nn.Sequential( 9 | nn.Linear(channel, channel // reduction), 10 | nn.ReLU(inplace=True), 11 | nn.Linear(channel // reduction, channel), 12 | nn.Sigmoid() 13 | ) 14 | 15 | def forward(self, x): 16 | b, c, _, _ = x.size() 17 | y = self.avg_pool(x).view(b, c) 18 | y = self.fc(y).view(b, c, 1, 1) 19 | return x * y 20 | -------------------------------------------------------------------------------- /SPPE/src/models/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from . import * 2 | -------------------------------------------------------------------------------- /SPPE/src/models/layers/util_models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | 5 | 6 | class ConcatTable(nn.Module): 7 | def __init__(self, module_list=None): 8 | super(ConcatTable, self).__init__() 9 | 10 | self.modules_list = nn.ModuleList(module_list) 11 | 12 | def forward(self, x: Variable): 13 | y = [] 14 | for i in range(len(self.modules_list)): 15 | y.append(self.modules_list[i](x)) 16 | return y 17 | 18 | def add(self, module): 19 | self.modules_list.append(module) 20 | 21 | 22 | class CaddTable(nn.Module): 23 | def __init__(self, inplace=False): 24 | super(CaddTable, self).__init__() 25 | self.inplace = inplace 26 | 27 | def forward(self, x: Variable or list): 28 | return torch.stack(x, 0).sum(0) 29 | 30 | 31 | class Identity(nn.Module): 32 | def __init__(self, params=None): 33 | super(Identity, self).__init__() 34 | self.params = nn.ParameterList(params) 35 | 36 | def forward(self, x: Variable or list): 37 | return x 38 | -------------------------------------------------------------------------------- /SPPE/src/opt.py: -------------------------------------------------------------------------------- 1 | """import argparse 2 | import torch 3 | 4 | parser = argparse.ArgumentParser(description='PyTorch AlphaPose Training') 5 | parser.add_argument("--return_counts", type=bool, default=True) 6 | parser.add_argument("--mode", default='client') 7 | parser.add_argument("--port", default=52162) 8 | 9 | "----------------------------- General options -----------------------------" 10 | parser.add_argument('--expID', default='default', type=str, 11 | help='Experiment ID') 12 | parser.add_argument('--dataset', default='coco', type=str, 13 | help='Dataset choice: mpii | coco') 14 | parser.add_argument('--nThreads', default=30, type=int, 15 | help='Number of data loading threads') 16 | parser.add_argument('--debug', default=False, type=bool, 17 | help='Print the debug information') 18 | parser.add_argument('--snapshot', default=1, type=int, 19 | help='How often to take a snapshot of the model (0 = never)') 20 | 21 | "----------------------------- AlphaPose options -----------------------------" 22 | parser.add_argument('--addDPG', default=False, type=bool, 23 | help='Train with data augmentation') 24 | 25 | "----------------------------- Model options -----------------------------" 26 | parser.add_argument('--netType', default='hgPRM', type=str, 27 | help='Options: hgPRM | resnext') 28 | parser.add_argument('--loadModel', default=None, type=str, 29 | help='Provide full path to a previously trained model') 30 | parser.add_argument('--Continue', default=False, type=bool, 31 | help='Pick up where an experiment left off') 32 | parser.add_argument('--nFeats', default=256, type=int, 33 | help='Number of features in the hourglass') 34 | parser.add_argument('--nClasses', default=33, type=int, 35 | help='Number of output channel') 36 | parser.add_argument('--nStack', default=8, type=int, 37 | help='Number of hourglasses to stack') 38 | 39 | "----------------------------- Hyperparameter options -----------------------------" 40 | parser.add_argument('--LR', default=2.5e-4, type=float, 41 | help='Learning rate') 42 | parser.add_argument('--momentum', default=0, type=float, 43 | help='Momentum') 44 | parser.add_argument('--weightDecay', default=0, type=float, 45 | help='Weight decay') 46 | parser.add_argument('--crit', default='MSE', type=str, 47 | help='Criterion type') 48 | parser.add_argument('--optMethod', default='rmsprop', type=str, 49 | help='Optimization method: rmsprop | sgd | nag | adadelta') 50 | 51 | 52 | "----------------------------- Training options -----------------------------" 53 | parser.add_argument('--nEpochs', default=50, type=int, 54 | help='Number of hourglasses to stack') 55 | parser.add_argument('--epoch', default=0, type=int, 56 | help='Current epoch') 57 | parser.add_argument('--trainBatch', default=40, type=int, 58 | help='Train-batch size') 59 | parser.add_argument('--validBatch', default=20, type=int, 60 | help='Valid-batch size') 61 | parser.add_argument('--trainIters', default=0, type=int, 62 | help='Total train iters') 63 | parser.add_argument('--valIters', default=0, type=int, 64 | help='Total valid iters') 65 | parser.add_argument('--init', default=None, type=str, 66 | help='Initialization') 67 | "----------------------------- Data options -----------------------------" 68 | parser.add_argument('--inputResH', default=384, type=int, 69 | help='Input image height') 70 | parser.add_argument('--inputResW', default=320, type=int, 71 | help='Input image width') 72 | parser.add_argument('--outputResH', default=96, type=int, 73 | help='Output heatmap height') 74 | parser.add_argument('--outputResW', default=80, type=int, 75 | help='Output heatmap width') 76 | parser.add_argument('--scale', default=0.25, type=float, 77 | help='Degree of scale augmentation') 78 | parser.add_argument('--rotate', default=30, type=float, 79 | help='Degree of rotation augmentation') 80 | parser.add_argument('--hmGauss', default=1, type=int, 81 | help='Heatmap gaussian size') 82 | 83 | "----------------------------- PyraNet options -----------------------------" 84 | parser.add_argument('--baseWidth', default=9, type=int, 85 | help='Heatmap gaussian size') 86 | parser.add_argument('--cardinality', default=5, type=int, 87 | help='Heatmap gaussian size') 88 | parser.add_argument('--nResidual', default=1, type=int, 89 | help='Number of residual modules at each location in the pyranet') 90 | 91 | "----------------------------- Distribution options -----------------------------" 92 | parser.add_argument('--dist', dest='dist', type=int, default=1, 93 | help='distributed training or not') 94 | parser.add_argument('--backend', dest='backend', type=str, default='gloo', 95 | help='backend for distributed training') 96 | parser.add_argument('--port', dest='port', 97 | help='port of server') 98 | opt = parser.parse_args()""" 99 | 100 | """if opt.Continue: 101 | opt = torch.load("../exp/{}/{}/option.pkl".format(opt.dataset, opt.expID)) 102 | opt.Continue = True 103 | opt.nEpochs = 50 104 | print("--- Continue ---")""" 105 | 106 | 107 | class opt: 108 | nClasses = 33 109 | inputResH = 384 110 | inputResW = 320 111 | outputResH = 96 112 | outputResW = 80 113 | scale = 0.25 114 | rotate = 30 115 | hmGauss = 1 116 | -------------------------------------------------------------------------------- /SPPE/src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from . import * 2 | -------------------------------------------------------------------------------- /SPPE/src/utils/dataset/.coco.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GajuuzZ/Human-Falling-Detect-Tracks/7ed2faa4d6147dfd576f58869b6c25545208af35/SPPE/src/utils/dataset/.coco.py.swp -------------------------------------------------------------------------------- /SPPE/src/utils/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GajuuzZ/Human-Falling-Detect-Tracks/7ed2faa4d6147dfd576f58869b6c25545208af35/SPPE/src/utils/dataset/__init__.py -------------------------------------------------------------------------------- /SPPE/src/utils/dataset/coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | from functools import reduce 4 | 5 | import torch.utils.data as data 6 | from ..pose import generateSampleBox 7 | from opt import opt 8 | 9 | 10 | class Mscoco(data.Dataset): 11 | def __init__(self, train=True, sigma=1, 12 | scale_factor=(0.2, 0.3), rot_factor=40, label_type='Gaussian'): 13 | self.img_folder = '../data/coco/images' # root image folders 14 | self.is_train = train # training set or test set 15 | self.inputResH = opt.inputResH 16 | self.inputResW = opt.inputResW 17 | self.outputResH = opt.outputResH 18 | self.outputResW = opt.outputResW 19 | self.sigma = sigma 20 | self.scale_factor = scale_factor 21 | self.rot_factor = rot_factor 22 | self.label_type = label_type 23 | 24 | self.nJoints_coco = 17 25 | self.nJoints_mpii = 16 26 | self.nJoints = 33 27 | 28 | self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8, 29 | 9, 10, 11, 12, 13, 14, 15, 16, 17) 30 | self.flipRef = ((2, 3), (4, 5), (6, 7), 31 | (8, 9), (10, 11), (12, 13), 32 | (14, 15), (16, 17)) 33 | 34 | # create train/val split 35 | with h5py.File('../data/coco/annot_clean.h5', 'r') as annot: 36 | # train 37 | self.imgname_coco_train = annot['imgname'][:-5887] 38 | self.bndbox_coco_train = annot['bndbox'][:-5887] 39 | self.part_coco_train = annot['part'][:-5887] 40 | # val 41 | self.imgname_coco_val = annot['imgname'][-5887:] 42 | self.bndbox_coco_val = annot['bndbox'][-5887:] 43 | self.part_coco_val = annot['part'][-5887:] 44 | 45 | self.size_train = self.imgname_coco_train.shape[0] 46 | self.size_val = self.imgname_coco_val.shape[0] 47 | 48 | def __getitem__(self, index): 49 | sf = self.scale_factor 50 | 51 | if self.is_train: 52 | part = self.part_coco_train[index] 53 | bndbox = self.bndbox_coco_train[index] 54 | imgname = self.imgname_coco_train[index] 55 | else: 56 | part = self.part_coco_val[index] 57 | bndbox = self.bndbox_coco_val[index] 58 | imgname = self.imgname_coco_val[index] 59 | 60 | imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname)) 61 | img_path = os.path.join(self.img_folder, imgname) 62 | 63 | metaData = generateSampleBox(img_path, bndbox, part, self.nJoints, 64 | 'coco', sf, self, train=self.is_train) 65 | 66 | inp, out_bigcircle, out_smallcircle, out, setMask = metaData 67 | 68 | label = [] 69 | for i in range(opt.nStack): 70 | if i < 2: 71 | # label.append(out_bigcircle.clone()) 72 | label.append(out.clone()) 73 | elif i < 4: 74 | # label.append(out_smallcircle.clone()) 75 | label.append(out.clone()) 76 | else: 77 | label.append(out.clone()) 78 | 79 | return inp, label, setMask, 'coco' 80 | 81 | def __len__(self): 82 | if self.is_train: 83 | return self.size_train 84 | else: 85 | return self.size_val 86 | -------------------------------------------------------------------------------- /SPPE/src/utils/dataset/fuse.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | from functools import reduce 4 | 5 | import torch.utils.data as data 6 | from ..pose import generateSampleBox 7 | from opt import opt 8 | 9 | 10 | class Mscoco(data.Dataset): 11 | def __init__(self, train=True, sigma=1, 12 | scale_factor=0.25, rot_factor=30, label_type='Gaussian'): 13 | self.img_folder = '../data/' # root image folders 14 | self.is_train = train # training set or test set 15 | self.inputResH = 320 16 | self.inputResW = 256 17 | self.outputResH = 80 18 | self.outputResW = 64 19 | self.sigma = sigma 20 | self.scale_factor = (0.2, 0.3) 21 | self.rot_factor = rot_factor 22 | self.label_type = label_type 23 | 24 | self.nJoints_coco = 17 25 | self.nJoints_mpii = 16 26 | self.nJoints = 33 27 | 28 | self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8, # COCO 29 | 9, 10, 11, 12, 13, 14, 15, 16, 17, 30 | 18, 19, 20, 21, 22, 23, # MPII 31 | 28, 29, 32, 33) 32 | 33 | self.flipRef = ((2, 3), (4, 5), (6, 7), # COCO 34 | (8, 9), (10, 11), (12, 13), 35 | (14, 15), (16, 17), 36 | (18, 23), (19, 22), (20, 21), # MPII 37 | (28, 33), (29, 32), (30, 31)) 38 | 39 | ''' 40 | Create train/val split 41 | ''' 42 | # COCO 43 | with h5py.File('../data/coco/annot_clean.h5', 'r') as annot: 44 | # train 45 | self.imgname_coco_train = annot['imgname'][:-5887] 46 | self.bndbox_coco_train = annot['bndbox'][:-5887] 47 | self.part_coco_train = annot['part'][:-5887] 48 | # val 49 | self.imgname_coco_val = annot['imgname'][-5887:] 50 | self.bndbox_coco_val = annot['bndbox'][-5887:] 51 | self.part_coco_val = annot['part'][-5887:] 52 | # MPII 53 | with h5py.File('../data/mpii/annot_mpii.h5', 'r') as annot: 54 | # train 55 | self.imgname_mpii_train = annot['imgname'][:-1358] 56 | self.bndbox_mpii_train = annot['bndbox'][:-1358] 57 | self.part_mpii_train = annot['part'][:-1358] 58 | # val 59 | self.imgname_mpii_val = annot['imgname'][-1358:] 60 | self.bndbox_mpii_val = annot['bndbox'][-1358:] 61 | self.part_mpii_val = annot['part'][-1358:] 62 | 63 | self.size_coco_train = self.imgname_coco_train.shape[0] 64 | self.size_coco_val = self.imgname_coco_val.shape[0] 65 | self.size_train = self.imgname_coco_train.shape[0] + self.imgname_mpii_train.shape[0] 66 | self.size_val = self.imgname_coco_val.shape[0] + self.imgname_mpii_val.shape[0] 67 | self.train, self.valid = [], [] 68 | 69 | def __getitem__(self, index): 70 | sf = self.scale_factor 71 | 72 | if self.is_train and index < self.size_coco_train: # COCO 73 | part = self.part_coco_train[index] 74 | bndbox = self.bndbox_coco_train[index] 75 | imgname = self.imgname_coco_train[index] 76 | imgset = 'coco' 77 | elif self.is_train: # MPII 78 | part = self.part_mpii_train[index - self.size_coco_train] 79 | bndbox = self.bndbox_mpii_train[index - self.size_coco_train] 80 | imgname = self.imgname_mpii_train[index - self.size_coco_train] 81 | imgset = 'mpii' 82 | elif index < self.size_coco_val: 83 | part = self.part_coco_val[index] 84 | bndbox = self.bndbox_coco_val[index] 85 | imgname = self.imgname_coco_val[index] 86 | imgset = 'coco' 87 | else: 88 | part = self.part_mpii_val[index - self.size_coco_val] 89 | bndbox = self.bndbox_mpii_val[index - self.size_coco_val] 90 | imgname = self.imgname_mpii_val[index - self.size_coco_val] 91 | imgset = 'mpii' 92 | 93 | if imgset == 'coco': 94 | imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname)) 95 | else: 96 | imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))[:13] 97 | 98 | img_path = os.path.join(self.img_folder, imgset, 'images', imgname) 99 | 100 | metaData = generateSampleBox(img_path, bndbox, part, self.nJoints, 101 | imgset, sf, self, train=self.is_train) 102 | 103 | inp, out_bigcircle, out_smallcircle, out, setMask = metaData 104 | 105 | label = [] 106 | for i in range(opt.nStack): 107 | if i < 2: 108 | # label.append(out_bigcircle.clone()) 109 | label.append(out.clone()) 110 | elif i < 4: 111 | # label.append(out_smallcircle.clone()) 112 | label.append(out.clone()) 113 | else: 114 | label.append(out.clone()) 115 | 116 | return inp, label, setMask, imgset 117 | 118 | def __len__(self): 119 | if self.is_train: 120 | return self.size_train 121 | else: 122 | return self.size_val 123 | -------------------------------------------------------------------------------- /SPPE/src/utils/dataset/mpii.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | from functools import reduce 4 | 5 | import torch.utils.data as data 6 | from ..pose import generateSampleBox 7 | from opt import opt 8 | 9 | 10 | class Mpii(data.Dataset): 11 | def __init__(self, train=True, sigma=1, 12 | scale_factor=0.25, rot_factor=30, label_type='Gaussian'): 13 | self.img_folder = '../data/mpii/images' # root image folders 14 | self.is_train = train # training set or test set 15 | self.inputResH = 320 16 | self.inputResW = 256 17 | self.outputResH = 80 18 | self.outputResW = 64 19 | self.sigma = sigma 20 | self.scale_factor = (0.2, 0.3) 21 | self.rot_factor = rot_factor 22 | self.label_type = label_type 23 | 24 | self.nJoints_mpii = 16 25 | self.nJoints = 16 26 | 27 | self.accIdxs = (1, 2, 3, 4, 5, 6, 28 | 11, 12, 15, 16) 29 | self.flipRef = ((1, 6), (2, 5), (3, 4), 30 | (11, 16), (12, 15), (13, 14)) 31 | 32 | # create train/val split 33 | with h5py.File('../data/mpii/annot_mpii.h5', 'r') as annot: 34 | # train 35 | self.imgname_mpii_train = annot['imgname'][:-1358] 36 | self.bndbox_mpii_train = annot['bndbox'][:-1358] 37 | self.part_mpii_train = annot['part'][:-1358] 38 | # val 39 | self.imgname_mpii_val = annot['imgname'][-1358:] 40 | self.bndbox_mpii_val = annot['bndbox'][-1358:] 41 | self.part_mpii_val = annot['part'][-1358:] 42 | 43 | self.size_train = self.imgname_mpii_train.shape[0] 44 | self.size_val = self.imgname_mpii_val.shape[0] 45 | self.train, self.valid = [], [] 46 | 47 | def __getitem__(self, index): 48 | sf = self.scale_factor 49 | 50 | if self.is_train: 51 | part = self.part_mpii_train[index] 52 | bndbox = self.bndbox_mpii_train[index] 53 | imgname = self.imgname_mpii_train[index] 54 | else: 55 | part = self.part_mpii_val[index] 56 | bndbox = self.bndbox_mpii_val[index] 57 | imgname = self.imgname_mpii_val[index] 58 | 59 | imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))[:13] 60 | img_path = os.path.join(self.img_folder, imgname) 61 | 62 | metaData = generateSampleBox(img_path, bndbox, part, self.nJoints, 63 | 'mpii', sf, self, train=self.is_train) 64 | 65 | inp, out_bigcircle, out_smallcircle, out, setMask = metaData 66 | 67 | label = [] 68 | for i in range(opt.nStack): 69 | if i < 2: 70 | #label.append(out_bigcircle.clone()) 71 | label.append(out.clone()) 72 | elif i < 4: 73 | #label.append(out_smallcircle.clone()) 74 | label.append(out.clone()) 75 | else: 76 | label.append(out.clone()) 77 | 78 | return inp, label, setMask 79 | 80 | def __len__(self): 81 | if self.is_train: 82 | return self.size_train 83 | else: 84 | return self.size_val 85 | -------------------------------------------------------------------------------- /SPPE/src/utils/eval.py: -------------------------------------------------------------------------------- 1 | from SPPE.src.opt import opt 2 | try: 3 | from utils import transformBoxInvert, transformBoxInvert_batch, findPeak, processPeaks 4 | except ImportError: 5 | from SPPE.src.utils.img import transformBoxInvert, transformBoxInvert_batch, findPeak, processPeaks 6 | import torch 7 | 8 | 9 | class DataLogger(object): 10 | def __init__(self): 11 | self.clear() 12 | 13 | def clear(self): 14 | self.value = 0 15 | self.sum = 0 16 | self.cnt = 0 17 | self.avg = 0 18 | 19 | def update(self, value, n=1): 20 | self.value = value 21 | self.sum += value * n 22 | self.cnt += n 23 | self._cal_avg() 24 | 25 | def _cal_avg(self): 26 | self.avg = self.sum / self.cnt 27 | 28 | 29 | def accuracy(output, label, dataset): 30 | if type(output) == list: 31 | return accuracy(output[opt.nStack - 1], label[opt.nStack - 1], dataset) 32 | else: 33 | return heatmapAccuracy(output.cpu().data, label.cpu().data, dataset.accIdxs) 34 | 35 | 36 | def heatmapAccuracy(output, label, idxs): 37 | preds = getPreds(output) 38 | gt = getPreds(label) 39 | 40 | norm = torch.ones(preds.size(0)) * opt.outputResH / 10 41 | dists = calc_dists(preds, gt, norm) 42 | #print(dists) 43 | acc = torch.zeros(len(idxs) + 1) 44 | avg_acc = 0 45 | cnt = 0 46 | for i in range(len(idxs)): 47 | acc[i + 1] = dist_acc(dists[idxs[i] - 1]) 48 | if acc[i + 1] >= 0: 49 | avg_acc = avg_acc + acc[i + 1] 50 | cnt += 1 51 | if cnt != 0: 52 | acc[0] = avg_acc / cnt 53 | return acc 54 | 55 | 56 | def getPreds(hm): 57 | """ get predictions from score maps in torch Tensor 58 | return type: torch.LongTensor 59 | """ 60 | assert hm.dim() == 4, 'Score maps should be 4-dim' 61 | maxval, idx = torch.max(hm.view(hm.size(0), hm.size(1), -1), 2) 62 | 63 | maxval = maxval.view(hm.size(0), hm.size(1), 1) 64 | idx = idx.view(hm.size(0), hm.size(1), 1) + 1 65 | 66 | preds = idx.repeat(1, 1, 2).float() 67 | 68 | preds[:, :, 0] = (preds[:, :, 0] - 1) % hm.size(3) 69 | preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hm.size(3)) 70 | 71 | # pred_mask = maxval.gt(0).repeat(1, 1, 2).float() 72 | # preds *= pred_mask 73 | return preds 74 | 75 | 76 | def calc_dists(preds, target, normalize): 77 | preds = preds.float().clone() 78 | target = target.float().clone() 79 | dists = torch.zeros(preds.size(1), preds.size(0)) 80 | for n in range(preds.size(0)): 81 | for c in range(preds.size(1)): 82 | if target[n, c, 0] > 0 and target[n, c, 1] > 0: 83 | dists[c, n] = torch.dist( 84 | preds[n, c, :], target[n, c, :]) / normalize[n] 85 | else: 86 | dists[c, n] = -1 87 | return dists 88 | 89 | 90 | def dist_acc(dists, thr=0.5): 91 | """ Return percentage below threshold while ignoring values with a -1 """ 92 | if dists.ne(-1).sum() > 0: 93 | return dists.le(thr).eq(dists.ne(-1)).float().sum() * 1.0 / dists.ne(-1).float().sum() 94 | else: 95 | return - 1 96 | 97 | 98 | def postprocess(output): 99 | p = getPreds(output) 100 | 101 | for i in range(p.size(0)): 102 | for j in range(p.size(1)): 103 | hm = output[i][j] 104 | pX, pY = int(round(p[i][j][0])), int(round(p[i][j][1])) 105 | if 0 < pX < opt.outputResW - 1 and 0 < pY < opt.outputResH - 1: 106 | diff = torch.Tensor((hm[pY][pX + 1] - hm[pY][pX - 1], hm[pY + 1][pX] - hm[pY - 1][pX])) 107 | p[i][j] += diff.sign() * 0.25 108 | p -= 0.5 109 | 110 | return p 111 | 112 | 113 | def getPrediction(hms, pt1, pt2, inpH, inpW, resH, resW): 114 | """ 115 | Get keypoint location from heatmaps 116 | """ 117 | assert hms.dim() == 4, 'Score maps should be 4-dim' 118 | maxval, idx = torch.max(hms.view(hms.size(0), hms.size(1), -1), 2) 119 | 120 | maxval = maxval.view(hms.size(0), hms.size(1), 1) 121 | idx = idx.view(hms.size(0), hms.size(1), 1) + 1 122 | 123 | preds = idx.repeat(1, 1, 2).float() 124 | 125 | preds[:, :, 0] = (preds[:, :, 0] - 1) % hms.size(3) 126 | preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hms.size(3)) 127 | 128 | pred_mask = maxval.gt(0).repeat(1, 1, 2).float() 129 | preds *= pred_mask 130 | 131 | # Very simple post-processing step to improve performance at tight PCK thresholds 132 | """for i in range(preds.size(0)): 133 | for j in range(preds.size(1)): 134 | hm = hms[i][j] 135 | pX, pY = int(round(float(preds[i][j][0]))), int(round(float(preds[i][j][1]))) 136 | if 0 < pX < opt.outputResW - 1 and 0 < pY < opt.outputResH - 1: 137 | diff = torch.Tensor( 138 | (hm[pY][pX + 1] - hm[pY][pX - 1], hm[pY + 1][pX] - hm[pY - 1][pX])) 139 | preds[i][j] += diff.sign() * 0.25 140 | preds += 0.2""" 141 | 142 | preds_tf = torch.zeros(preds.size()) 143 | preds_tf = transformBoxInvert_batch(preds, pt1, pt2, inpH, inpW, resH, resW) 144 | return preds, preds_tf, maxval 145 | 146 | 147 | def getMultiPeakPrediction(hms, pt1, pt2, inpH, inpW, resH, resW): 148 | 149 | assert hms.dim() == 4, 'Score maps should be 4-dim' 150 | 151 | preds_img = {} 152 | hms = hms.numpy() 153 | for n in range(hms.shape[0]): # Number of samples 154 | preds_img[n] = {} # Result of sample: n 155 | for k in range(hms.shape[1]): # Number of keypoints 156 | preds_img[n][k] = [] # Result of keypoint: k 157 | hm = hms[n][k] 158 | 159 | candidate_points = findPeak(hm) 160 | 161 | res_pt = processPeaks(candidate_points, hm, 162 | pt1[n], pt2[n], inpH, inpW, resH, resW) 163 | 164 | preds_img[n][k] = res_pt 165 | 166 | return preds_img 167 | 168 | 169 | def getPrediction_batch(hms, pt1, pt2, inpH, inpW, resH, resW): 170 | """ 171 | Get keypoint location from heatmaps 172 | pt1, pt2: [n, 2] 173 | OUTPUT: 174 | preds: [n, 17, 2] 175 | """ 176 | 177 | assert hms.dim() == 4, 'Score maps should be 4-dim' 178 | flat_hms = hms.view(hms.size(0), hms.size(1), -1) 179 | maxval, idx = torch.max(flat_hms, 2) 180 | 181 | maxval = maxval.view(hms.size(0), hms.size(1), 1) 182 | idx = idx.view(hms.size(0), hms.size(1), 1) + 1 183 | 184 | preds = idx.repeat(1, 1, 2).float() 185 | 186 | preds[:, :, 0] = (preds[:, :, 0] - 1) % hms.size(3) 187 | preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hms.size(3)) 188 | 189 | pred_mask = maxval.gt(0).repeat(1, 1, 2).float() 190 | preds *= pred_mask 191 | 192 | # Very simple post-processing step to improve performance at tight PCK thresholds 193 | idx_up = (idx - hms.size(3)).clamp(0, flat_hms.size(2) - 1) 194 | idx_down = (idx + hms.size(3)).clamp(0, flat_hms.size(2) - 1) 195 | idx_left = (idx - 1).clamp(0, flat_hms.size(2) - 1) 196 | idx_right = (idx + 1).clamp(0, flat_hms.size(2) - 1) 197 | 198 | maxval_up = flat_hms.gather(2, idx_up) 199 | maxval_down = flat_hms.gather(2, idx_down) 200 | maxval_left = flat_hms.gather(2, idx_left) 201 | maxval_right = flat_hms.gather(2, idx_right) 202 | 203 | diff1 = (maxval_right - maxval_left).sign() * 0.25 204 | diff2 = (maxval_down - maxval_up).sign() * 0.25 205 | diff1[idx_up <= hms.size(3)] = 0 206 | diff1[idx_down / hms.size(3) >= (hms.size(3) - 1)] = 0 207 | diff2[(idx_left % hms.size(3)) == 0] = 0 208 | diff2[(idx_left % hms.size(3)) == (hms.size(3) - 1)] = 0 209 | 210 | preds[:, :, 0] += diff1.squeeze(-1) 211 | preds[:, :, 1] += diff2.squeeze(-1) 212 | 213 | preds_tf = torch.zeros(preds.size()) 214 | preds_tf = transformBoxInvert_batch(preds, pt1, pt2, inpH, inpW, resH, resW) 215 | 216 | return preds, preds_tf, maxval 217 | -------------------------------------------------------------------------------- /SPPE/src/utils/pose.py: -------------------------------------------------------------------------------- 1 | from utils import (load_image, drawGaussian, drawBigCircle, drawSmallCircle, cv_rotate, 2 | cropBox, transformBox, flip, shuffleLR, drawCOCO) 3 | from utils import getPrediction 4 | import torch 5 | import numpy as np 6 | import random 7 | from SPPE.src.opt import opt 8 | 9 | 10 | def rnd(x): 11 | return max(-2 * x, min(2 * x, np.random.randn(1)[0] * x)) 12 | 13 | 14 | def generateSampleBox(img_path, bndbox, part, nJoints, imgset, scale_factor, dataset, train=True): 15 | 16 | nJoints_coco = 17 17 | nJoints_mpii = 16 18 | img = load_image(img_path) 19 | if train: 20 | img[0].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1) 21 | img[1].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1) 22 | img[2].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1) 23 | 24 | ori_img = img.clone() 25 | img[0].add_(-0.406) 26 | img[1].add_(-0.457) 27 | img[2].add_(-0.480) 28 | 29 | upLeft = torch.Tensor((int(bndbox[0][0]), int(bndbox[0][1]))) 30 | bottomRight = torch.Tensor((int(bndbox[0][2]), int(bndbox[0][3]))) 31 | ht = bottomRight[1] - upLeft[1] 32 | width = bottomRight[0] - upLeft[0] 33 | imght = img.shape[1] 34 | imgwidth = img.shape[2] 35 | scaleRate = random.uniform(*scale_factor) 36 | 37 | upLeft[0] = max(0, upLeft[0] - width * scaleRate / 2) 38 | upLeft[1] = max(0, upLeft[1] - ht * scaleRate / 2) 39 | bottomRight[0] = min(imgwidth - 1, bottomRight[0] + width * scaleRate / 2) 40 | bottomRight[1] = min(imght - 1, bottomRight[1] + ht * scaleRate / 2) 41 | 42 | # Doing Random Sample 43 | if opt.addDPG: 44 | PatchScale = random.uniform(0, 1) 45 | if PatchScale > 0.85: 46 | ratio = ht / width 47 | if width < ht: 48 | patchWidth = PatchScale * width 49 | patchHt = patchWidth * ratio 50 | else: 51 | patchHt = PatchScale * ht 52 | patchWidth = patchHt / ratio 53 | 54 | xmin = upLeft[0] + random.uniform(0, 1) * (width - patchWidth) 55 | ymin = upLeft[1] + random.uniform(0, 1) * (ht - patchHt) 56 | 57 | xmax = xmin + patchWidth + 1 58 | ymax = ymin + patchHt + 1 59 | else: 60 | xmin = max(1, min(upLeft[0] + np.random.normal(-0.0142, 0.1158) * width, imgwidth - 3)) 61 | ymin = max(1, min(upLeft[1] + np.random.normal(0.0043, 0.068) * ht, imght - 3)) 62 | xmax = min(max(xmin + 2, bottomRight[0] + np.random.normal(0.0154, 0.1337) * width), imgwidth - 3) 63 | ymax = min(max(ymin + 2, bottomRight[1] + np.random.normal(-0.0013, 0.0711) * ht), imght - 3) 64 | 65 | upLeft[0] = xmin 66 | upLeft[1] = ymin 67 | bottomRight[0] = xmax 68 | bottomRight[1] = ymax 69 | 70 | # Counting Joints number 71 | jointNum = 0 72 | if imgset == 'coco': 73 | for i in range(17): 74 | if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \ 75 | and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]: 76 | jointNum += 1 77 | else: 78 | for i in range(16): 79 | if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \ 80 | and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]: 81 | jointNum += 1 82 | 83 | # Doing Random Crop 84 | if opt.addDPG: 85 | if jointNum > 13 and train: 86 | switch = random.uniform(0, 1) 87 | if switch > 0.96: 88 | bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2 89 | bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2 90 | elif switch > 0.92: 91 | upLeft[0] = (upLeft[0] + bottomRight[0]) / 2 92 | bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2 93 | elif switch > 0.88: 94 | upLeft[1] = (upLeft[1] + bottomRight[1]) / 2 95 | bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2 96 | elif switch > 0.84: 97 | upLeft[0] = (upLeft[0] + bottomRight[0]) / 2 98 | upLeft[1] = (upLeft[1] + bottomRight[1]) / 2 99 | elif switch > 0.80: 100 | bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2 101 | elif switch > 0.76: 102 | upLeft[0] = (upLeft[0] + bottomRight[0]) / 2 103 | elif switch > 0.72: 104 | bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2 105 | elif switch > 0.68: 106 | upLeft[1] = (upLeft[1] + bottomRight[1]) / 2 107 | 108 | ori_inp = cropBox(ori_img, upLeft, bottomRight, opt.inputResH, opt.inputResW) 109 | inp = cropBox(img, upLeft, bottomRight, opt.inputResH, opt.inputResW) 110 | if jointNum == 0: 111 | inp = torch.zeros(3, opt.inputResH, opt.inputResW) 112 | 113 | out_bigcircle = torch.zeros(nJoints, opt.outputResH, opt.outputResW) 114 | out_smallcircle = torch.zeros(nJoints, opt.outputResH, opt.outputResW) 115 | out = torch.zeros(nJoints, opt.outputResH, opt.outputResW) 116 | setMask = torch.zeros(nJoints, opt.outputResH, opt.outputResW) 117 | 118 | # Draw Label 119 | if imgset == 'coco': 120 | for i in range(nJoints_coco): 121 | if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \ 122 | and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]: 123 | out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2) 124 | out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss) 125 | out[i] = drawGaussian(out[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss) 126 | setMask[i].add_(1) 127 | elif imgset == 'mpii': 128 | for i in range(nJoints_coco, nJoints_coco + nJoints_mpii): 129 | if part[i - nJoints_coco][0] > 0 and part[i - nJoints_coco][0] > upLeft[0] and part[i - nJoints_coco][1] > upLeft[1] \ 130 | and part[i - nJoints_coco][0] < bottomRight[0] and part[i - nJoints_coco][1] < bottomRight[1]: 131 | out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2) 132 | out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss) 133 | out[i] = drawGaussian(out[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss) 134 | setMask[i].add_(1) 135 | else: 136 | for i in range(nJoints_coco, nJoints_coco + nJoints_mpii): 137 | if part[i - nJoints_coco][0] > 0 and part[i - nJoints_coco][0] > upLeft[0] and part[i - nJoints_coco][1] > upLeft[1] \ 138 | and part[i - nJoints_coco][0] < bottomRight[0] and part[i - nJoints_coco][1] < bottomRight[1]: 139 | out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2) 140 | out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss) 141 | out[i] = drawGaussian(out[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss) 142 | if i != 6 + nJoints_coco and i != 7 + nJoints_coco: 143 | setMask[i].add_(1) 144 | 145 | if opt.debug: 146 | preds_hm, preds_img, preds_scores = getPrediction(out.unsqueeze(0), upLeft.unsqueeze(0), bottomRight.unsqueeze(0), opt.inputResH, 147 | opt.inputResW, opt.outputResH, opt.outputResW) 148 | tmp_preds = preds_hm.mul(opt.inputResH / opt.outputResH) 149 | drawCOCO(ori_inp.unsqueeze(0), tmp_preds, preds_scores) 150 | 151 | if train: 152 | # Flip 153 | if random.uniform(0, 1) < 0.5: 154 | inp = flip(inp) 155 | ori_inp = flip(ori_inp) 156 | out_bigcircle = shuffleLR(flip(out_bigcircle), dataset) 157 | out_smallcircle = shuffleLR(flip(out_smallcircle), dataset) 158 | out = shuffleLR(flip(out), dataset) 159 | # Rotate 160 | r = rnd(opt.rotate) 161 | if random.uniform(0, 1) < 0.6: 162 | r = 0 163 | if r != 0: 164 | inp = cv_rotate(inp, r, opt.inputResW, opt.inputResH) 165 | out_bigcircle = cv_rotate(out_bigcircle, r, opt.outputResW, opt.outputResH) 166 | out_smallcircle = cv_rotate(out_smallcircle, r, opt.outputResW, opt.outputResH) 167 | out = cv_rotate(out, r, opt.outputResW, opt.outputResH) 168 | 169 | return inp, out_bigcircle, out_smallcircle, out, setMask 170 | -------------------------------------------------------------------------------- /Track/Tracker.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | from collections import deque 4 | 5 | from .linear_assignment import min_cost_matching, matching_cascade 6 | from .kalman_filter import KalmanFilter 7 | from .iou_matching import iou_cost 8 | 9 | 10 | class TrackState: 11 | """Enumeration type for the single target track state. Newly created tracks are 12 | classified as `tentative` until enough evidence has been collected. Then, 13 | the track state is changed to `confirmed`. Tracks that are no longer alive 14 | are classified as `deleted` to mark them for removal from the set of active 15 | tracks. 16 | """ 17 | Tentative = 1 18 | Confirmed = 2 19 | Deleted = 3 20 | 21 | 22 | class Detection(object): 23 | """This class represents a bounding box, keypoints, score of person detected 24 | in a single image. 25 | 26 | Args: 27 | tlbr: (float array) Of shape [top, left, bottom, right]., 28 | keypoints: (float array) Of shape [node, pts]., 29 | confidence: (float) Confidence score of detection. 30 | """ 31 | def __init__(self, tlbr, keypoints, confidence): 32 | self.tlbr = tlbr 33 | self.keypoints = keypoints 34 | self.confidence = confidence 35 | 36 | def to_tlwh(self): 37 | """Get (top, left, width, height). 38 | """ 39 | ret = self.tlbr.copy() 40 | ret[2:] = ret[2:] - ret[:2] 41 | return ret 42 | 43 | def to_xyah(self): 44 | """Get (x_center, y_center, aspect ratio, height). 45 | """ 46 | ret = self.to_tlwh() 47 | ret[:2] += ret[2:] / 2 48 | ret[2] /= ret[3] 49 | return ret 50 | 51 | 52 | class Track: 53 | def __init__(self, mean, covariance, track_id, n_init, max_age=30, buffer=30): 54 | self.mean = mean 55 | self.covariance = covariance 56 | self.track_id = track_id 57 | self.hist = 1 58 | self.age = 1 59 | self.time_since_update = 0 60 | self.n_init = n_init 61 | self.max_age = max_age 62 | 63 | # keypoints list for use in Actions prediction. 64 | self.keypoints_list = deque(maxlen=buffer) 65 | 66 | self.state = TrackState.Tentative 67 | 68 | def to_tlwh(self): 69 | ret = self.mean[:4].copy() 70 | ret[2] *= ret[3] 71 | ret[:2] -= ret[2:] / 2 72 | return ret 73 | 74 | def to_tlbr(self): 75 | ret = self.to_tlwh() 76 | ret[2:] = ret[:2] + ret[2:] 77 | return ret 78 | 79 | def get_center(self): 80 | return self.mean[:2].copy() 81 | 82 | def predict(self, kf): 83 | """Propagate the state distribution to the current time step using a 84 | Kalman filter prediction step. 85 | """ 86 | self.mean, self.covariance = kf.predict(self.mean, self.covariance) 87 | self.age += 1 88 | self.time_since_update += 1 89 | 90 | def update(self, kf, detection): 91 | """Perform Kalman filter measurement update step. 92 | """ 93 | self.mean, self.covariance = kf.update(self.mean, self.covariance, 94 | detection.to_xyah()) 95 | self.keypoints_list.append(detection.keypoints) 96 | 97 | self.hist += 1 98 | self.time_since_update = 0 99 | if self.state == TrackState.Tentative and self.hist >= self.n_init: 100 | self.state = TrackState.Confirmed 101 | 102 | def mark_missed(self): 103 | """Mark this track as missed (no association at the current time step). 104 | """ 105 | if self.state == TrackState.Tentative: 106 | self.state = TrackState.Deleted 107 | elif self.time_since_update > self.max_age: 108 | self.state = TrackState.Deleted 109 | 110 | def is_tentative(self): 111 | return self.state == TrackState.Tentative 112 | 113 | def is_confirmed(self): 114 | return self.state == TrackState.Confirmed 115 | 116 | def is_deleted(self): 117 | return self.state == TrackState.Deleted 118 | 119 | 120 | class Tracker: 121 | def __init__(self, max_iou_distance=0.7, max_age=30, n_init=5): 122 | self.max_iou_dist = max_iou_distance 123 | self.max_age = max_age 124 | self.n_init = n_init 125 | 126 | self.kf = KalmanFilter() 127 | self.tracks = [] 128 | self._next_id = 1 129 | 130 | def predict(self): 131 | """Propagate track state distributions one time step forward. 132 | This function should be called once every time step, before `update`. 133 | """ 134 | for track in self.tracks: 135 | track.predict(self.kf) 136 | 137 | def update(self, detections): 138 | """Perform measurement update and track management. 139 | Parameters 140 | ---------- 141 | detections : List[deep_sort.detection.Detection] 142 | A list of detections at the current time step. 143 | """ 144 | # Run matching cascade. 145 | matches, unmatched_tracks, unmatched_detections = self._match(detections) 146 | 147 | # Update matched tracks set. 148 | for track_idx, detection_idx in matches: 149 | self.tracks[track_idx].update(self.kf, detections[detection_idx]) 150 | # Update tracks that missing. 151 | for track_idx in unmatched_tracks: 152 | self.tracks[track_idx].mark_missed() 153 | # Create new detections track. 154 | for detection_idx in unmatched_detections: 155 | self._initiate_track(detections[detection_idx]) 156 | 157 | # Remove deleted tracks. 158 | self.tracks = [t for t in self.tracks if not t.is_deleted()] 159 | 160 | def _match(self, detections): 161 | confirmed_tracks, unconfirmed_tracks = [], [] 162 | for i, t in enumerate(self.tracks): 163 | if t.is_confirmed(): 164 | confirmed_tracks.append(i) 165 | else: 166 | unconfirmed_tracks.append(i) 167 | 168 | matches_a, unmatched_tracks_a, unmatched_detections = matching_cascade( 169 | iou_cost, self.max_iou_dist, self.max_age, self.tracks, detections, confirmed_tracks 170 | ) 171 | 172 | track_candidates = unconfirmed_tracks + [ 173 | k for k in unmatched_tracks_a if self.tracks[k].time_since_update == 1] 174 | unmatched_tracks_a = [ 175 | k for k in unmatched_tracks_a if self.tracks[k].time_since_update != 1] 176 | 177 | matches_b, unmatched_tracks_b, unmatched_detections = min_cost_matching( 178 | iou_cost, self.max_iou_dist, self.tracks, detections, track_candidates, unmatched_detections 179 | ) 180 | 181 | matches = matches_a + matches_b 182 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) 183 | return matches, unmatched_tracks, unmatched_detections 184 | 185 | def _initiate_track(self, detection): 186 | if detection.confidence < 0.4: 187 | return 188 | mean, covariance = self.kf.initiate(detection.to_xyah()) 189 | self.tracks.append(Track(mean, covariance, self._next_id, self.n_init, self.max_age)) 190 | self._next_id += 1 191 | 192 | 193 | -------------------------------------------------------------------------------- /Track/iou_matching.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | INFTY_COST = 1e+5 4 | 5 | 6 | def iou(bbox, candidates): 7 | """Compute intersection over union. 8 | Parameters 9 | ---------- 10 | bbox : ndarray 11 | A bounding box in format `(xmin, ymin, xmax, ymax)`. 12 | candidates : ndarray 13 | A matrix of candidate bounding boxes (one per row) in the same format 14 | as `bbox`. 15 | 16 | Returns 17 | ------- 18 | ndarray 19 | The intersection over union in [0, 1] between the `bbox` and each 20 | candidate. A higher score means a larger fraction of the `bbox` is 21 | occluded by the candidate. 22 | """ 23 | #bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 24 | bbox_tl, bbox_br = bbox[:2], bbox[2:] 25 | candidates_tl = candidates[:, :2] 26 | candidates_br = candidates[:, 2:] # + candidates[:, :2] 27 | 28 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 29 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 30 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 31 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 32 | wh = np.maximum(0., br - tl) 33 | 34 | area_intersection = wh.prod(axis=1) 35 | area_bbox = (bbox[2:] - bbox[:2]).prod() 36 | area_candidates = (candidates[:, 2:] - candidates[:, :2]).prod(axis=1) 37 | return area_intersection / (area_bbox + area_candidates - area_intersection) 38 | 39 | 40 | def iou_cost(tracks, detections, track_indices=None, detection_indices=None): 41 | """An intersection over union distance metric. 42 | Parameters 43 | ---------- 44 | tracks : List[Track] 45 | A list of tracks. 46 | detections : List[Detection] 47 | A list of detections. 48 | track_indices : Optional[List[int]] 49 | A list of indices to tracks that should be matched. Defaults to 50 | all `tracks`. 51 | detection_indices : Optional[List[int]] 52 | A list of indices to detections that should be matched. Defaults 53 | to all `detections`. 54 | 55 | Returns 56 | ------- 57 | ndarray 58 | Returns a cost matrix of shape 59 | len(track_indices), len(detection_indices) where entry (i, j) is 60 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. 61 | 62 | """ 63 | if track_indices is None: 64 | track_indices = np.arange(len(tracks)) 65 | if detection_indices is None: 66 | detection_indices = np.arange(len(detections)) 67 | 68 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 69 | for row, track_idx in enumerate(track_indices): 70 | #if tracks[track_idx].time_since_update > 1: 71 | # cost_matrix[row, :] = INFTY_COST 72 | # continue 73 | 74 | bbox = tracks[track_idx].to_tlbr() 75 | candidates = np.asarray([detections[i].tlbr for i in detection_indices]) 76 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 77 | 78 | return cost_matrix 79 | -------------------------------------------------------------------------------- /Track/kalman_filter.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import scipy.linalg 4 | 5 | 6 | class KalmanFilter(object): 7 | """A simple Kalman filter for tracking bounding boxes in image space. 8 | 9 | The 8-dimensional state space 10 | x, y, a, h, vx, vy, va, vh 11 | 12 | contains the bounding box center position (x, y), aspect ratio a, height h, 13 | and their respective velocities. 14 | 15 | Object motion follows a constant velocity model. The bounding box location 16 | (x, y, a, h) is taken as direct observation of the state space (linear 17 | observation model). 18 | """ 19 | def __init__(self): 20 | ndim, dt = 4, 1. 21 | 22 | # Create Kalman filter model matrices. 23 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 24 | for i in range(ndim): 25 | self._motion_mat[i, ndim + i] = dt 26 | self._update_mat = np.eye(ndim, 2 * ndim) 27 | 28 | # Motion and observation uncertainty are chosen relative to the current 29 | # state estimate. These weights control the amount of uncertainty in 30 | # the model. This is a bit hacky. 31 | self._std_weight_position = 1. / 20 32 | self._std_weight_velocity = 1. / 160 33 | 34 | def initiate(self, measurement): 35 | """Create track from unassociated measurement. 36 | Parameters 37 | ---------- 38 | measurement : ndarray 39 | Bounding box coordinates (x, y, a, h) with center position (x, y), 40 | aspect ratio a, and height h. 41 | 42 | Returns 43 | ------- 44 | (ndarray, ndarray) 45 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 46 | dimensional) of the new track. Unobserved velocities are initialized 47 | to 0 mean. 48 | """ 49 | mean_pos = measurement 50 | mean_vel = np.zeros_like(mean_pos) 51 | mean = np.r_[mean_pos, mean_vel] 52 | 53 | std = [ 54 | 2 * self._std_weight_position * measurement[3], 55 | 2 * self._std_weight_position * measurement[3], 56 | 1e-2, 57 | 2 * self._std_weight_position * measurement[3], 58 | 10 * self._std_weight_velocity * measurement[3], 59 | 10 * self._std_weight_velocity * measurement[3], 60 | 1e-5, 61 | 10 * self._std_weight_velocity * measurement[3]] 62 | covariance = np.diag(np.square(std)) 63 | return mean, covariance 64 | 65 | def predict(self, mean, covariance): 66 | """Run Kalman filter prediction step. 67 | Parameters 68 | ---------- 69 | mean : ndarray 70 | The 8 dimensional mean vector of the object state at the previous 71 | time step. 72 | covariance : ndarray 73 | The 8x8 dimensional covariance matrix of the object state at the 74 | previous time step. 75 | 76 | Returns 77 | ------- 78 | (ndarray, ndarray) 79 | Returns the mean vector and covariance matrix of the predicted 80 | state. Unobserved velocities are initialized to 0 mean. 81 | """ 82 | std_pos = [ 83 | self._std_weight_position * mean[3], 84 | self._std_weight_position * mean[3], 85 | 1e-2, 86 | self._std_weight_position * mean[3]] 87 | std_vel = [ 88 | self._std_weight_velocity * mean[3], 89 | self._std_weight_velocity * mean[3], 90 | 1e-5, 91 | self._std_weight_velocity * mean[3]] 92 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 93 | 94 | mean = np.dot(self._motion_mat, mean) 95 | covariance = np.linalg.multi_dot(( 96 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 97 | 98 | return mean, covariance 99 | 100 | def project(self, mean, covariance): 101 | """Project state distribution to measurement space. 102 | Parameters 103 | ---------- 104 | mean : ndarray 105 | The state's mean vector (8 dimensional array). 106 | covariance : ndarray 107 | The state's covariance matrix (8x8 dimensional). 108 | 109 | Returns 110 | ------- 111 | (ndarray, ndarray) 112 | Returns the projected mean and covariance matrix of the given state 113 | estimate. 114 | """ 115 | std = [ 116 | self._std_weight_position * mean[3], 117 | self._std_weight_position * mean[3], 118 | 1e-1, 119 | self._std_weight_position * mean[3]] 120 | innovation_cov = np.diag(np.square(std)) 121 | 122 | mean = np.dot(self._update_mat, mean) 123 | covariance = np.linalg.multi_dot(( 124 | self._update_mat, covariance, self._update_mat.T)) 125 | return mean, covariance + innovation_cov 126 | 127 | def update(self, mean, covariance, measurement): 128 | """Run Kalman filter correction step. 129 | Parameters 130 | ---------- 131 | mean : ndarray 132 | The predicted state's mean vector (8 dimensional). 133 | covariance : ndarray 134 | The state's covariance matrix (8x8 dimensional). 135 | measurement : ndarray 136 | The 4 dimensional measurement vector (x, y, a, h), where (x, y) 137 | is the center position, a the aspect ratio, and h the height of the 138 | bounding box. 139 | 140 | Returns 141 | ------- 142 | (ndarray, ndarray) 143 | Returns the measurement-corrected state distribution. 144 | """ 145 | projected_mean, projected_cov = self.project(mean, covariance) 146 | 147 | chol_factor, lower = scipy.linalg.cho_factor( 148 | projected_cov, lower=True, check_finite=False) 149 | kalman_gain = scipy.linalg.cho_solve( 150 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, 151 | check_finite=False).T 152 | innovation = measurement - projected_mean 153 | 154 | new_mean = mean + np.dot(innovation, kalman_gain.T) 155 | new_covariance = covariance - np.linalg.multi_dot(( 156 | kalman_gain, projected_cov, kalman_gain.T)) 157 | return new_mean, new_covariance 158 | 159 | def gating_distance(self, mean, covariance, measurements, 160 | only_position=False): 161 | """Compute gating distance between state distribution and measurements. 162 | A suitable distance threshold can be obtained from `chi2inv95`. If 163 | `only_position` is False, the chi-square distribution has 4 degrees of 164 | freedom, otherwise 2. 165 | 166 | Parameters 167 | ---------- 168 | mean : ndarray 169 | Mean vector over the state distribution (8 dimensional). 170 | covariance : ndarray 171 | Covariance of the state distribution (8x8 dimensional). 172 | measurements : ndarray 173 | An Nx4 dimensional matrix of N measurements, each in 174 | format (x, y, a, h) where (x, y) is the bounding box center 175 | position, a the aspect ratio, and h the height. 176 | only_position : Optional[bool] 177 | If True, distance computation is done with respect to the bounding 178 | box center position only. 179 | 180 | Returns 181 | ------- 182 | ndarray 183 | Returns an array of length N, where the i-th element contains the 184 | squared Mahalanobis distance between (mean, covariance) and 185 | `measurements[i]`. 186 | """ 187 | mean, covariance = self.project(mean, covariance) 188 | if only_position: 189 | mean, covariance = mean[:2], covariance[:2, :2] 190 | measurements = measurements[:, :2] 191 | 192 | cholesky_factor = np.linalg.cholesky(covariance) 193 | d = measurements - mean 194 | z = scipy.linalg.solve_triangular( 195 | cholesky_factor, d.T, lower=True, check_finite=False, 196 | overwrite_b=True) 197 | squared_maha = np.sum(z * z, axis=0) 198 | return squared_maha 199 | -------------------------------------------------------------------------------- /Track/linear_assignment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | #from sklearn.utils.linear_assignment_ import linear_assignment 3 | from scipy.optimize import linear_sum_assignment 4 | 5 | """ 6 | Table for the 0.95 quantile of the chi-square distribution with N degrees of 7 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv 8 | function and used as Mahalanobis gating threshold. 9 | """ 10 | chi2inv95 = { 11 | 1: 3.8415, 12 | 2: 5.9915, 13 | 3: 7.8147, 14 | 4: 9.4877, 15 | 5: 11.070, 16 | 6: 12.592, 17 | 7: 14.067, 18 | 8: 15.507, 19 | 9: 16.919} 20 | INFTY_COST = 1e+5 21 | 22 | 23 | def min_cost_matching(distance_metric, max_distance, tracks, detections, 24 | track_indices=None, detection_indices=None): 25 | """Solve linear assignment problem. 26 | Parameters 27 | ---------- 28 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 29 | The distance metric is given a list of tracks and detections as well as 30 | a list of N track indices and M detection indices. The metric should 31 | return the NxM dimensional cost matrix, where element (i, j) is the 32 | association cost between the i-th track in the given track indices and 33 | the j-th detection in the given detection_indices. 34 | max_distance : float 35 | Gating threshold. Associations with cost larger than this value are 36 | disregarded. 37 | tracks : List[Track] 38 | A list of predicted tracks at the current time step. 39 | detections : List[Detection] 40 | A list of detections at the current time step. 41 | track_indices : List[int] 42 | List of track indices that maps rows in `cost_matrix` to tracks in 43 | `tracks` (see description above). 44 | detection_indices : List[int] 45 | List of detection indices that maps columns in `cost_matrix` to 46 | detections in `detections` (see description above). 47 | 48 | Returns 49 | ------- 50 | (List[(int, int)], List[int], List[int]) 51 | Returns a tuple with the following three entries: 52 | * A list of matched track and detection indices. 53 | * A list of unmatched track indices. 54 | * A list of unmatched detection indices. 55 | """ 56 | if track_indices is None: 57 | track_indices = np.arange(len(tracks)) 58 | if detection_indices is None: 59 | detection_indices = np.arange(len(detections)) 60 | 61 | if len(detection_indices) == 0 or len(track_indices) == 0: 62 | return [], track_indices, detection_indices # Nothing to match. 63 | 64 | cost_matrix = distance_metric(tracks, detections, track_indices, detection_indices) 65 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 66 | indices = linear_sum_assignment(cost_matrix) 67 | indices = np.array(indices).transpose() 68 | 69 | matches, unmatched_tracks, unmatched_detections = [], [], [] 70 | for col, detection_idx in enumerate(detection_indices): 71 | if col not in indices[:, 1]: 72 | unmatched_detections.append(detection_idx) 73 | for row, track_idx in enumerate(track_indices): 74 | if row not in indices[:, 0]: 75 | unmatched_tracks.append(track_idx) 76 | for row, col in indices: 77 | track_idx = track_indices[row] 78 | detection_idx = detection_indices[col] 79 | if cost_matrix[row, col] > max_distance: 80 | unmatched_tracks.append(track_idx) 81 | unmatched_detections.append(detection_idx) 82 | else: 83 | matches.append((track_idx, detection_idx)) 84 | 85 | return matches, unmatched_tracks, unmatched_detections 86 | 87 | 88 | def matching_cascade(distance_metric, max_distance, cascade_depth, tracks, detections, 89 | track_indices=None, detection_indices=None): 90 | """Run matching cascade. 91 | Parameters 92 | ---------- 93 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 94 | The distance metric is given a list of tracks and detections as well as 95 | a list of N track indices and M detection indices. The metric should 96 | return the NxM dimensional cost matrix, where element (i, j) is the 97 | association cost between the i-th track in the given track indices and 98 | the j-th detection in the given detection indices. 99 | max_distance : float 100 | Gating threshold. Associations with cost larger than this value are 101 | disregarded. 102 | cascade_depth: int 103 | The cascade depth, should be se to the maximum track age. 104 | tracks : List[Track] 105 | A list of predicted tracks at the current time step. 106 | detections : List[Detection] 107 | A list of detections at the current time step. 108 | track_indices : Optional[List[int]] 109 | List of track indices that maps rows in `cost_matrix` to tracks in 110 | `tracks` (see description above). Defaults to all tracks. 111 | detection_indices : Optional[List[int]] 112 | List of detection indices that maps columns in `cost_matrix` to 113 | detections in `detections` (see description above). Defaults to all 114 | detections. 115 | 116 | Returns 117 | ------- 118 | (List[(int, int)], List[int], List[int]) 119 | Returns a tuple with the following three entries: 120 | * A list of matched track and detection indices. 121 | * A list of unmatched track indices. 122 | * A list of unmatched detection indices. 123 | """ 124 | if track_indices is None: 125 | track_indices = list(range(len(tracks))) 126 | if detection_indices is None: 127 | detection_indices = list(range(len(detections))) 128 | 129 | unmatched_detections = detection_indices 130 | matches = [] 131 | for level in range(cascade_depth): 132 | if len(unmatched_detections) == 0: # No detections left 133 | break 134 | 135 | track_indices_l = [k for k in track_indices 136 | if tracks[k].time_since_update == 1 + level] 137 | if len(track_indices_l) == 0: # Nothing to match at this level 138 | continue 139 | 140 | matches_l, _, unmatched_detections = min_cost_matching( 141 | distance_metric, max_distance, tracks, detections, track_indices_l, unmatched_detections) 142 | matches += matches_l 143 | 144 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) 145 | return matches, unmatched_tracks, unmatched_detections 146 | 147 | 148 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, track_indices, detection_indices, 149 | gated_cost=INFTY_COST, only_position=False): 150 | """Invalidate infeasible entries in cost matrix based on the state 151 | distributions obtained by Kalman filtering. 152 | Parameters 153 | ---------- 154 | kf : The Kalman filter. 155 | cost_matrix : ndarray 156 | The NxM dimensional cost matrix, where N is the number of track indices 157 | and M is the number of detection indices, such that entry (i, j) is the 158 | association cost between `tracks[track_indices[i]]` and 159 | `detections[detection_indices[j]]`. 160 | tracks : List[Track] 161 | A list of predicted tracks at the current time step. 162 | detections : List[Detection] 163 | A list of detections at the current time step. 164 | track_indices : List[int] 165 | List of track indices that maps rows in `cost_matrix` to tracks in 166 | `tracks` (see description above). 167 | detection_indices : List[int] 168 | List of detection indices that maps columns in `cost_matrix` to 169 | detections in `detections` (see description above). 170 | gated_cost : Optional[float] 171 | Entries in the cost matrix corresponding to infeasible associations are 172 | set this value. Defaults to a very large value. 173 | only_position : Optional[bool] 174 | If True, only the x, y position of the state distribution is considered 175 | during gating. Defaults to False. 176 | 177 | Returns 178 | ------- 179 | ndarray 180 | Returns the modified cost matrix. 181 | """ 182 | gating_dim = 2 if only_position else 4 183 | gating_threshold = chi2inv95[gating_dim] 184 | measurements = np.asarray([detections[i].to_xyah() for i in detection_indices]) 185 | for row, track_idx in enumerate(track_indices): 186 | track = tracks[track_idx] 187 | gating_distance = kf.gating_distance(track.mean, track.covariance, 188 | measurements, only_position) 189 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost 190 | 191 | return cost_matrix 192 | -------------------------------------------------------------------------------- /Visualizer.py: -------------------------------------------------------------------------------- 1 | # import matplotlib.gridspec as gridspec 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import os 5 | import cv2 6 | import torch 7 | import imageio 8 | from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay 9 | from matplotlib.font_manager import FontProperties 10 | 11 | fp = FontProperties(family='Tlwg Typo', size=10) 12 | 13 | 14 | def plot_piechart(x, labels, title='', fig_size=(10, 5), save=None): 15 | fig = plt.figure(figsize=fig_size) 16 | 17 | ax1 = fig.add_subplot(121) 18 | wedges, texts = ax1.pie(x, labels=labels, startangle=90) 19 | 20 | percents = x / sum(x) * 100. 21 | annots = ['{} - {:.2f}% ({:d})'.format(c, p, n) for c, p, n 22 | in zip(labels, percents, x)] 23 | 24 | ax2 = fig.add_subplot(122) 25 | ax2.axis('off') 26 | ax2.legend(wedges, annots, loc='center', fontsize=10) 27 | 28 | fig.suptitle(title) 29 | 30 | if save is not None: 31 | fig.savefig(save) 32 | plt.close() 33 | else: 34 | return fig 35 | 36 | 37 | def plot_x(x, title='', fig_size=(12, 10)): 38 | fig = plt.figure(figsize=fig_size) 39 | x = np.squeeze(x) 40 | 41 | if len(x.shape) == 1: 42 | plt.plot(x) 43 | 44 | elif len(x.shape) == 2: 45 | plt.imshow(x, cmap='gray') 46 | plt.axis('off') 47 | 48 | elif len(x.shape) == 3: 49 | if x.shape[-1] == 3: 50 | plt.imshow(x) 51 | plt.axis('off') 52 | else: 53 | fig = plot_multiImage(x.transpose(2, 0, 1), fig_size=fig_size) 54 | 55 | elif len(x.shape) == 4: 56 | fig = plot_multiImage(x.transpose(3, 0, 1, 2), fig_size=fig_size) 57 | 58 | fig.suptitle(title) 59 | return fig 60 | 61 | 62 | def plot_bars(x, y, title='', ylim=None, save=None): 63 | fig = plt.figure() 64 | bars = plt.bar(x, y) 65 | plt.ylim(ylim) 66 | plt.title(title) 67 | for b in bars: 68 | plt.annotate('{:.2f}'.format(b.get_height()), 69 | xy=(b.get_x(), b.get_height())) 70 | 71 | if save is not None: 72 | plt.savefig(save) 73 | plt.close() 74 | else: 75 | return fig 76 | 77 | 78 | def plot_graphs(x_list, legends, title, ylabel, xlabel='epoch', xlim=None, save=None): 79 | fig = plt.figure() 80 | for x in x_list: 81 | plt.plot(x) 82 | 83 | plt.legend(legends) 84 | plt.xlabel(xlabel) 85 | plt.ylabel(ylabel) 86 | plt.title(title) 87 | plt.xlim(xlim) 88 | 89 | if save is not None: 90 | plt.savefig(save) 91 | plt.close() 92 | else: 93 | return fig 94 | 95 | 96 | # images in shape (amount, h, w, c). 97 | def plot_multiImage(images, labels=None, pred=None, title=None, fig_size=(12, 10), tight_layout=False, save=None): 98 | n = int(np.ceil(np.sqrt(images.shape[0]))) 99 | fig = plt.figure(figsize=fig_size) 100 | 101 | for i in range(images.shape[0]): 102 | ax = fig.add_subplot(n, n, i + 1) 103 | 104 | if len(images[i].shape) == 2 or images[i].shape[-1] == 1: 105 | ax.imshow(images[i], cmap='gray') 106 | else: 107 | ax.imshow(images[i]) 108 | 109 | if labels is not None: 110 | ax.set_xlabel(labels[i], color='g', fontproperties=fp) 111 | if labels is not None and pred is not None: 112 | if labels[i] == pred[i]: 113 | clr = 'g' 114 | else: 115 | if len(labels[i]) == len(pred[i]): 116 | clr = 'm' 117 | else: 118 | clr = 'r' 119 | 120 | ax.set_xlabel('True: {}\nPred : {}'.format(u'' + labels[i], u'' + pred[i]), 121 | color=clr, fontproperties=fp) 122 | 123 | if title is not None: 124 | fig.suptitle(title) 125 | 126 | if tight_layout: # This make process slow if too many images. 127 | fig.tight_layout() 128 | 129 | if save is not None: 130 | plt.savefig(save) 131 | plt.close() 132 | else: 133 | return fig 134 | 135 | 136 | def plot_confusion_metrix(y_true, y_pred, labels=None, title='', normalize=None, 137 | fig_size=(10, 10), save=None): 138 | cm = confusion_matrix(y_true, y_pred, normalize=normalize) 139 | if labels is None: 140 | labels = list(set(y_trues)) 141 | 142 | disp = ConfusionMatrixDisplay(cm, labels) 143 | disp.plot(xticks_rotation=45) 144 | disp.figure_.set_size_inches(fig_size) 145 | disp.figure_.suptitle(title) 146 | disp.figure_.tight_layout() 147 | 148 | if save is not None: 149 | disp.figure_.savefig(save) 150 | plt.close() 151 | else: 152 | return disp.figure_ 153 | 154 | 155 | def get_fig_image(fig): # figure to array of image. 156 | fig.canvas.draw() 157 | img = np.array(fig.canvas.renderer._renderer) 158 | return img 159 | 160 | 161 | def vid2gif(video_file, output_file, delay=0.05): 162 | with imageio.get_writer(output_file, mode='I', duration=delay) as writer: 163 | cap = cv2.VideoCapture(video_file) 164 | while True: 165 | ret, frame = cap.read() 166 | if ret: 167 | #frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) 168 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 169 | writer.append_data(frame) 170 | else: 171 | break 172 | 173 | #==========================================================================================# 174 | # For Fall_AlphaPose. 175 | 176 | 177 | PARTS_PAIR = [(0, 13), (1, 2), (1, 3), (3, 5), (2, 4), (4, 6), (13, 7), (13, 8), 178 | (7, 9), (8, 10), (9, 11), (10, 12)] 179 | CLASS_NAMES = ['Standing', 'Walking', 'Sitting', 'Lying Down', 180 | 'Stand up', 'Sit down', 'Fall Down'] 181 | 182 | 183 | def plot_poseframes(data, labels=None, frames_stamp=None, delay=0.2, fig_size=(10, 5)): 184 | """ 185 | data : (frames, parts, xy). 186 | labels : (frames, label) or (frames, labels). 187 | frames_stamp : (frames, number of frame). 188 | """ 189 | fig_cols = 1 190 | if labels is not None and labels.shape[1] > 1: 191 | fig_cols = 2 192 | x_bar = CLASS_NAMES if labels.shape[1] == len(CLASS_NAMES) else np.arange(labels.shape[1]) 193 | 194 | fig = plt.figure(figsize=fig_size) 195 | for i in range(data.shape[0]): 196 | xy = data[i] 197 | #xy = np.concatenate((xy, np.expand_dims((xy[1, :] + xy[2, :]) / 2, 0))) 198 | 199 | fig.clear() 200 | 201 | ax1 = fig.add_subplot(1, fig_cols, 1) 202 | for (sp, ep) in PARTS_PAIR: 203 | ax1.plot(xy[[sp, ep], 0], xy[[sp, ep], 1]) 204 | if xy.shape[1] == 3: 205 | for pts in xy: 206 | ax1.scatter(pts[0], pts[1], 200 * pts[2]) 207 | ax1.invert_yaxis() 208 | 209 | if fig_cols == 2: 210 | ax2 = fig.add_subplot(1, fig_cols, 2) 211 | ax2.bar(x_bar, labels[i]) 212 | ax2.set_ylim([0, 1.0]) 213 | 214 | frame = frames_stamp[i] if frames_stamp is not None else i 215 | idx = 0 216 | if labels is not None: 217 | idx = labels[i].argmax() if labels.shape[1] > 1 else labels[i][0] 218 | fig.suptitle('Frame : {}, Pose : {}'.format(frame, CLASS_NAMES[idx])) 219 | 220 | plt.pause(delay) 221 | plt.show() 222 | 223 | 224 | -------------------------------------------------------------------------------- /fn.py: -------------------------------------------------------------------------------- 1 | import re 2 | import cv2 3 | import time 4 | import math 5 | import torch 6 | import numpy as np 7 | 8 | RED = (0, 0, 255) 9 | GREEN = (0, 255, 0) 10 | BLUE = (255, 0, 0) 11 | CYAN = (255, 255, 0) 12 | YELLOW = (0, 255, 255) 13 | ORANGE = (0, 165, 255) 14 | PURPLE = (255, 0, 255) 15 | 16 | """COCO_PAIR = [(0, 1), (0, 2), (1, 3), (2, 4), # Head 17 | (5, 6), (5, 7), (7, 9), (6, 8), (8, 10), 18 | (17, 11), (17, 12), # Body 19 | (11, 13), (12, 14), (13, 15), (14, 16)]""" 20 | COCO_PAIR = [(0, 13), (1, 2), (1, 3), (3, 5), (2, 4), (4, 6), (13, 7), (13, 8), # Body 21 | (7, 9), (8, 10), (9, 11), (10, 12)] 22 | POINT_COLORS = [(0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0), # Nose, LEye, REye, LEar, REar 23 | (77, 255, 255), (77, 255, 204), (77, 204, 255), (191, 255, 77), (77, 191, 255), (191, 255, 77), # LShoulder, RShoulder, LElbow, RElbow, LWrist, RWrist 24 | (204, 77, 255), (77, 255, 204), (191, 77, 255), (77, 255, 191), (127, 77, 255), (77, 255, 127), (0, 255, 255)] # LHip, RHip, LKnee, Rknee, LAnkle, RAnkle, Neck 25 | LINE_COLORS = [(0, 215, 255), (0, 255, 204), (0, 134, 255), (0, 255, 50), (77, 255, 222), 26 | (77, 196, 255), (77, 135, 255), (191, 255, 77), (77, 255, 77), (77, 222, 255), 27 | (255, 156, 127), (0, 127, 255), (255, 127, 77), (0, 77, 255), (255, 77, 36)] 28 | 29 | MPII_PAIR = [(8, 9), (11, 12), (11, 10), (2, 1), (1, 0), (13, 14), (14, 15), (3, 4), (4, 5), 30 | (8, 7), (7, 6), (6, 2), (6, 3), (8, 12), (8, 13)] 31 | 32 | numpy_type_map = { 33 | 'float64': torch.DoubleTensor, 34 | 'float32': torch.FloatTensor, 35 | 'float16': torch.HalfTensor, 36 | 'int64': torch.LongTensor, 37 | 'int32': torch.IntTensor, 38 | 'int16': torch.ShortTensor, 39 | 'int8': torch.CharTensor, 40 | 'uint8': torch.ByteTensor, 41 | } 42 | 43 | _use_shared_memory = True 44 | 45 | 46 | def collate_fn(batch): 47 | r"""Puts each data field into a tensor with outer dimension batch size""" 48 | 49 | error_msg = "batch must contain tensors, numbers, dicts or lists; found {}" 50 | elem_type = type(batch[0]) 51 | 52 | if isinstance(batch[0], torch.Tensor): 53 | out = None 54 | if _use_shared_memory: 55 | # If we're in a background process, concatenate directly into a 56 | # shared memory tensor to avoid an extra copy 57 | numel = sum([x.numel() for x in batch]) 58 | storage = batch[0].storage()._new_shared(numel) 59 | out = batch[0].new(storage) 60 | return torch.stack(batch, 0, out=out) 61 | elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ 62 | and elem_type.__name__ != 'string_': 63 | elem = batch[0] 64 | if elem_type.__name__ == 'ndarray': 65 | # array of string classes and object 66 | if re.search('[SaUO]', elem.dtype.str) is not None: 67 | raise TypeError(error_msg.format(elem.dtype)) 68 | 69 | return torch.stack([torch.from_numpy(b) for b in batch], 0) 70 | if elem.shape == (): # scalars 71 | py_type = float if elem.dtype.name.startswith('float') else int 72 | return numpy_type_map[elem.dtype.name](list(map(py_type, batch))) 73 | elif isinstance(batch[0], int): 74 | return torch.LongTensor(batch) 75 | elif isinstance(batch[0], float): 76 | return torch.DoubleTensor(batch) 77 | elif isinstance(batch[0], (str, bytes)): 78 | return batch 79 | elif isinstance(batch[0], collections.Mapping): 80 | return {key: collate_fn([d[key] for d in batch]) for key in batch[0]} 81 | elif isinstance(batch[0], collections.Sequence): 82 | transposed = zip(*batch) 83 | return [collate_fn(samples) for samples in transposed] 84 | 85 | raise TypeError((error_msg.format(type(batch[0])))) 86 | 87 | 88 | def collate_fn_list(batch): 89 | img, inp, im_name = zip(*batch) 90 | img = collate_fn(img) 91 | im_name = collate_fn(im_name) 92 | 93 | return img, inp, im_name 94 | 95 | 96 | def draw_single(frame, pts, joint_format='coco'): 97 | if joint_format == 'coco': 98 | l_pair = COCO_PAIR 99 | p_color = POINT_COLORS 100 | line_color = LINE_COLORS 101 | elif joint_format == 'mpii': 102 | l_pair = MPII_PAIR 103 | p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED,BLUE,BLUE] 104 | else: 105 | NotImplementedError 106 | 107 | part_line = {} 108 | pts = np.concatenate((pts, np.expand_dims((pts[1, :] + pts[2, :]) / 2, 0)), axis=0) 109 | for n in range(pts.shape[0]): 110 | if pts[n, 2] <= 0.05: 111 | continue 112 | cor_x, cor_y = int(pts[n, 0]), int(pts[n, 1]) 113 | part_line[n] = (cor_x, cor_y) 114 | cv2.circle(frame, (cor_x, cor_y), 3, p_color[n], -1) 115 | 116 | for i, (start_p, end_p) in enumerate(l_pair): 117 | if start_p in part_line and end_p in part_line: 118 | start_xy = part_line[start_p] 119 | end_xy = part_line[end_p] 120 | cv2.line(frame, start_xy, end_xy, line_color[i], int(1*(pts[start_p, 2] + pts[end_p, 2]) + 1)) 121 | return frame 122 | 123 | 124 | def vis_frame_fast(frame, im_res, joint_format='coco'): 125 | """ 126 | frame: frame image 127 | im_res: im_res of predictions 128 | format: coco or mpii 129 | 130 | return rendered image 131 | """ 132 | if joint_format == 'coco': 133 | l_pair = COCO_PAIR 134 | p_color = POINT_COLORS 135 | line_color = LINE_COLORS 136 | elif joint_format == 'mpii': 137 | l_pair = MPII_PAIR 138 | p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED,BLUE,BLUE] 139 | else: 140 | NotImplementedError 141 | 142 | #im_name = im_res['imgname'].split('/')[-1] 143 | img = frame 144 | for human in im_res: # ['result']: 145 | part_line = {} 146 | kp_preds = human['keypoints'] 147 | kp_scores = human['kp_score'] 148 | kp_preds = torch.cat((kp_preds, torch.unsqueeze((kp_preds[1, :]+kp_preds[2, :]) / 2, 0))) 149 | kp_scores = torch.cat((kp_scores, torch.unsqueeze((kp_scores[1, :]+kp_scores[2, :]) / 2, 0))) 150 | # Draw keypoints 151 | for n in range(kp_scores.shape[0]): 152 | if kp_scores[n] <= 0.05: 153 | continue 154 | cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1]) 155 | part_line[n] = (cor_x, cor_y) 156 | cv2.circle(img, (cor_x, cor_y), 4, p_color[n], -1) 157 | # Draw limbs 158 | for i, (start_p, end_p) in enumerate(l_pair): 159 | if start_p in part_line and end_p in part_line: 160 | start_xy = part_line[start_p] 161 | end_xy = part_line[end_p] 162 | cv2.line(img, start_xy, end_xy, line_color[i], 2*(kp_scores[start_p] + kp_scores[end_p]) + 1) 163 | return img 164 | 165 | 166 | def vis_frame(frame, im_res, joint_format='coco'): 167 | """ 168 | frame: frame image 169 | im_res: im_res of predictions 170 | format: coco or mpii 171 | 172 | return rendered image 173 | """ 174 | if joint_format == 'coco': 175 | l_pair = COCO_PAIR 176 | p_color = POINT_COLORS 177 | line_color = LINE_COLORS 178 | elif joint_format == 'mpii': 179 | l_pair = MPII_PAIR 180 | p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED, BLUE, BLUE] 181 | line_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, RED, RED, BLUE, BLUE] 182 | else: 183 | raise NotImplementedError 184 | 185 | im_name = im_res['imgname'].split('/')[-1] 186 | img = frame 187 | height, width = img.shape[:2] 188 | img = cv2.resize(img, (int(width/2), int(height/2))) 189 | for human in im_res['result']: 190 | part_line = {} 191 | kp_preds = human['keypoints'] 192 | kp_scores = human['kp_score'] 193 | kp_preds = torch.cat((kp_preds, torch.unsqueeze((kp_preds[5, :]+kp_preds[6, :]) / 2, 0))) 194 | kp_scores = torch.cat((kp_scores, torch.unsqueeze((kp_scores[5, :]+kp_scores[6, :]) / 2, 0))) 195 | # Draw keypoints 196 | for n in range(kp_scores.shape[0]): 197 | if kp_scores[n] <= 0.05: 198 | continue 199 | cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1]) 200 | part_line[n] = (int(cor_x/2), int(cor_y/2)) 201 | bg = img.copy() 202 | cv2.circle(bg, (int(cor_x/2), int(cor_y/2)), 2, p_color[n], -1) 203 | # Now create a mask of logo and create its inverse mask also 204 | transparency = max(0, min(1, kp_scores[n])) 205 | img = cv2.addWeighted(bg, transparency, img, 1-transparency, 0) 206 | # Draw limbs 207 | for i, (start_p, end_p) in enumerate(l_pair): 208 | if start_p in part_line and end_p in part_line: 209 | start_xy = part_line[start_p] 210 | end_xy = part_line[end_p] 211 | bg = img.copy() 212 | 213 | X = (start_xy[0], end_xy[0]) 214 | Y = (start_xy[1], end_xy[1]) 215 | mX = np.mean(X) 216 | mY = np.mean(Y) 217 | length = ((Y[0] - Y[1]) ** 2 + (X[0] - X[1]) ** 2) ** 0.5 218 | angle = math.degrees(math.atan2(Y[0] - Y[1], X[0] - X[1])) 219 | stickwidth = (kp_scores[start_p] + kp_scores[end_p]) + 1 220 | polygon = cv2.ellipse2Poly((int(mX),int(mY)), (int(length/2), stickwidth), int(angle), 0, 360, 1) 221 | cv2.fillConvexPoly(bg, polygon, line_color[i]) 222 | #cv2.line(bg, start_xy, end_xy, line_color[i], (2 * (kp_scores[start_p] + kp_scores[end_p])) + 1) 223 | transparency = max(0, min(1, 0.5*(kp_scores[start_p] + kp_scores[end_p]))) 224 | img = cv2.addWeighted(bg, transparency, img, 1-transparency, 0) 225 | img = cv2.resize(img, (width, height), interpolation=cv2.INTER_CUBIC) 226 | return img 227 | 228 | 229 | def getTime(time1=0): 230 | if not time1: 231 | return time.time() 232 | else: 233 | interval = time.time() - time1 234 | return time.time(), interval 235 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import time 4 | import torch 5 | import argparse 6 | import numpy as np 7 | 8 | from Detection.Utils import ResizePadding 9 | from CameraLoader import CamLoader, CamLoader_Q 10 | from DetectorLoader import TinyYOLOv3_onecls 11 | 12 | from PoseEstimateLoader import SPPE_FastPose 13 | from fn import draw_single 14 | 15 | from Track.Tracker import Detection, Tracker 16 | from ActionsEstLoader import TSSTG 17 | 18 | #source = '../Data/test_video/test7.mp4' 19 | #source = '../Data/falldata/Home/Videos/video (2).avi' # hard detect 20 | source = '../Data/falldata/Home/Videos/video (1).avi' 21 | #source = 2 22 | 23 | 24 | def preproc(image): 25 | """preprocess function for CameraLoader. 26 | """ 27 | image = resize_fn(image) 28 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 29 | return image 30 | 31 | 32 | def kpt2bbox(kpt, ex=20): 33 | """Get bbox that hold on all of the keypoints (x,y) 34 | kpt: array of shape `(N, 2)`, 35 | ex: (int) expand bounding box, 36 | """ 37 | return np.array((kpt[:, 0].min() - ex, kpt[:, 1].min() - ex, 38 | kpt[:, 0].max() + ex, kpt[:, 1].max() + ex)) 39 | 40 | 41 | if __name__ == '__main__': 42 | par = argparse.ArgumentParser(description='Human Fall Detection Demo.') 43 | par.add_argument('-C', '--camera', default=source, # required=True, # default=2, 44 | help='Source of camera or video file path.') 45 | par.add_argument('--detection_input_size', type=int, default=384, 46 | help='Size of input in detection model in square must be divisible by 32 (int).') 47 | par.add_argument('--pose_input_size', type=str, default='224x160', 48 | help='Size of input in pose model must be divisible by 32 (h, w)') 49 | par.add_argument('--pose_backbone', type=str, default='resnet50', 50 | help='Backbone model for SPPE FastPose model.') 51 | par.add_argument('--show_detected', default=False, action='store_true', 52 | help='Show all bounding box from detection.') 53 | par.add_argument('--show_skeleton', default=True, action='store_true', 54 | help='Show skeleton pose.') 55 | par.add_argument('--save_out', type=str, default='', 56 | help='Save display to video file.') 57 | par.add_argument('--device', type=str, default='cuda', 58 | help='Device to run model on cpu or cuda.') 59 | args = par.parse_args() 60 | 61 | device = args.device 62 | 63 | # DETECTION MODEL. 64 | inp_dets = args.detection_input_size 65 | detect_model = TinyYOLOv3_onecls(inp_dets, device=device) 66 | 67 | # POSE MODEL. 68 | inp_pose = args.pose_input_size.split('x') 69 | inp_pose = (int(inp_pose[0]), int(inp_pose[1])) 70 | pose_model = SPPE_FastPose(args.pose_backbone, inp_pose[0], inp_pose[1], device=device) 71 | 72 | # Tracker. 73 | max_age = 30 74 | tracker = Tracker(max_age=max_age, n_init=3) 75 | 76 | # Actions Estimate. 77 | action_model = TSSTG() 78 | 79 | resize_fn = ResizePadding(inp_dets, inp_dets) 80 | 81 | cam_source = args.camera 82 | if type(cam_source) is str and os.path.isfile(cam_source): 83 | # Use loader thread with Q for video file. 84 | cam = CamLoader_Q(cam_source, queue_size=1000, preprocess=preproc).start() 85 | else: 86 | # Use normal thread loader for webcam. 87 | cam = CamLoader(int(cam_source) if cam_source.isdigit() else cam_source, 88 | preprocess=preproc).start() 89 | 90 | #frame_size = cam.frame_size 91 | #scf = torch.min(inp_size / torch.FloatTensor([frame_size]), 1)[0] 92 | 93 | outvid = False 94 | if args.save_out != '': 95 | outvid = True 96 | codec = cv2.VideoWriter_fourcc(*'MJPG') 97 | writer = cv2.VideoWriter(args.save_out, codec, 30, (inp_dets * 2, inp_dets * 2)) 98 | 99 | fps_time = 0 100 | f = 0 101 | while cam.grabbed(): 102 | f += 1 103 | frame = cam.getitem() 104 | image = frame.copy() 105 | 106 | # Detect humans bbox in the frame with detector model. 107 | detected = detect_model.detect(frame, need_resize=False, expand_bb=10) 108 | 109 | # Predict each tracks bbox of current frame from previous frames information with Kalman filter. 110 | tracker.predict() 111 | # Merge two source of predicted bbox together. 112 | for track in tracker.tracks: 113 | det = torch.tensor([track.to_tlbr().tolist() + [0.5, 1.0, 0.0]], dtype=torch.float32) 114 | detected = torch.cat([detected, det], dim=0) if detected is not None else det 115 | 116 | detections = [] # List of Detections object for tracking. 117 | if detected is not None: 118 | #detected = non_max_suppression(detected[None, :], 0.45, 0.2)[0] 119 | # Predict skeleton pose of each bboxs. 120 | poses = pose_model.predict(frame, detected[:, 0:4], detected[:, 4]) 121 | 122 | # Create Detections object. 123 | detections = [Detection(kpt2bbox(ps['keypoints'].numpy()), 124 | np.concatenate((ps['keypoints'].numpy(), 125 | ps['kp_score'].numpy()), axis=1), 126 | ps['kp_score'].mean().numpy()) for ps in poses] 127 | 128 | # VISUALIZE. 129 | if args.show_detected: 130 | for bb in detected[:, 0:5]: 131 | frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 0, 255), 1) 132 | 133 | # Update tracks by matching each track information of current and previous frame or 134 | # create a new track if no matched. 135 | tracker.update(detections) 136 | 137 | # Predict Actions of each track. 138 | for i, track in enumerate(tracker.tracks): 139 | if not track.is_confirmed(): 140 | continue 141 | 142 | track_id = track.track_id 143 | bbox = track.to_tlbr().astype(int) 144 | center = track.get_center().astype(int) 145 | 146 | action = 'pending..' 147 | clr = (0, 255, 0) 148 | # Use 30 frames time-steps to prediction. 149 | if len(track.keypoints_list) == 30: 150 | pts = np.array(track.keypoints_list, dtype=np.float32) 151 | out = action_model.predict(pts, frame.shape[:2]) 152 | action_name = action_model.class_names[out[0].argmax()] 153 | action = '{}: {:.2f}%'.format(action_name, out[0].max() * 100) 154 | if action_name == 'Fall Down': 155 | clr = (255, 0, 0) 156 | elif action_name == 'Lying Down': 157 | clr = (255, 200, 0) 158 | 159 | # VISUALIZE. 160 | if track.time_since_update == 0: 161 | if args.show_skeleton: 162 | frame = draw_single(frame, track.keypoints_list[-1]) 163 | frame = cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 1) 164 | frame = cv2.putText(frame, str(track_id), (center[0], center[1]), cv2.FONT_HERSHEY_COMPLEX, 165 | 0.4, (255, 0, 0), 2) 166 | frame = cv2.putText(frame, action, (bbox[0] + 5, bbox[1] + 15), cv2.FONT_HERSHEY_COMPLEX, 167 | 0.4, clr, 1) 168 | 169 | # Show Frame. 170 | frame = cv2.resize(frame, (0, 0), fx=2., fy=2.) 171 | frame = cv2.putText(frame, '%d, FPS: %f' % (f, 1.0 / (time.time() - fps_time)), 172 | (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) 173 | frame = frame[:, :, ::-1] 174 | fps_time = time.time() 175 | 176 | if outvid: 177 | writer.write(frame) 178 | 179 | cv2.imshow('frame', frame) 180 | if cv2.waitKey(1) & 0xFF == ord('q'): 181 | break 182 | 183 | # Clear resource. 184 | cam.stop() 185 | if outvid: 186 | writer.release() 187 | cv2.destroyAllWindows() 188 | -------------------------------------------------------------------------------- /pPose_nms.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import json 4 | import os 5 | import zipfile 6 | import time 7 | from multiprocessing.dummy import Pool as ThreadPool 8 | import numpy as np 9 | 10 | ''' Constant Configuration ''' 11 | delta1 = 1 12 | mu = 1.7 13 | delta2 = 2.65 14 | gamma = 22.48 15 | scoreThreds = 0.3 16 | matchThreds = 5 17 | areaThres = 0 # 40 * 40.5 18 | alpha = 0.1 19 | #pool = ThreadPool(4) 20 | 21 | 22 | def pose_nms(bboxes, bbox_scores, pose_preds, pose_scores): 23 | """ 24 | Parametric Pose NMS algorithm 25 | bboxes: bbox locations list (n, 4) 26 | bbox_scores: bbox scores list (n,) 27 | pose_preds: pose locations list (n, 17, 2) 28 | pose_scores: pose scores list (n, 17, 1) 29 | """ 30 | global ori_pose_preds, ori_pose_scores, ref_dists 31 | 32 | pose_scores[pose_scores == 0] = 1e-5 33 | 34 | final_result = [] 35 | 36 | ori_bboxes = bboxes.clone() 37 | ori_bbox_scores = bbox_scores.clone() 38 | ori_pose_preds = pose_preds.clone() 39 | ori_pose_scores = pose_scores.clone() 40 | 41 | xmax = bboxes[:, 2] 42 | xmin = bboxes[:, 0] 43 | ymax = bboxes[:, 3] 44 | ymin = bboxes[:, 1] 45 | 46 | widths = xmax - xmin 47 | heights = ymax - ymin 48 | ref_dists = alpha * np.maximum(widths, heights) 49 | 50 | nsamples = bboxes.shape[0] 51 | human_scores = pose_scores.mean(dim=1) 52 | 53 | human_ids = np.arange(nsamples) 54 | # Do pPose-NMS 55 | pick = [] 56 | merge_ids = [] 57 | while human_scores.shape[0] != 0: 58 | # Pick the one with highest score 59 | pick_id = torch.argmax(human_scores) 60 | pick.append(human_ids[pick_id]) 61 | # num_visPart = torch.sum(pose_scores[pick_id] > 0.2) 62 | 63 | # Get numbers of match keypoints by calling PCK_match 64 | ref_dist = ref_dists[human_ids[pick_id]] 65 | simi = get_parametric_distance(pick_id, pose_preds, pose_scores, ref_dist) 66 | num_match_keypoints = PCK_match(pose_preds[pick_id], pose_preds, ref_dist) 67 | 68 | # Delete humans who have more than matchThreds keypoints overlap and high similarity 69 | delete_ids = torch.from_numpy(np.arange(human_scores.shape[0]))[ 70 | (simi > gamma) | (num_match_keypoints >= matchThreds)] 71 | 72 | if delete_ids.shape[0] == 0: 73 | delete_ids = pick_id 74 | #else: 75 | # delete_ids = torch.from_numpy(delete_ids) 76 | 77 | merge_ids.append(human_ids[delete_ids]) 78 | pose_preds = np.delete(pose_preds, delete_ids, axis=0) 79 | pose_scores = np.delete(pose_scores, delete_ids, axis=0) 80 | human_ids = np.delete(human_ids, delete_ids) 81 | human_scores = np.delete(human_scores, delete_ids, axis=0) 82 | bbox_scores = np.delete(bbox_scores, delete_ids, axis=0) 83 | 84 | assert len(merge_ids) == len(pick) 85 | bboxs_pick = ori_bboxes[pick] 86 | preds_pick = ori_pose_preds[pick] 87 | scores_pick = ori_pose_scores[pick] 88 | bbox_scores_pick = ori_bbox_scores[pick] 89 | #final_result = pool.map(filter_result, zip(scores_pick, merge_ids, preds_pick, pick, bbox_scores_pick)) 90 | #final_result = [item for item in final_result if item is not None] 91 | 92 | for j in range(len(pick)): 93 | ids = np.arange(pose_preds.shape[1]) 94 | max_score = torch.max(scores_pick[j, ids, 0]) 95 | 96 | if max_score < scoreThreds: 97 | continue 98 | 99 | # Merge poses 100 | merge_id = merge_ids[j] 101 | merge_pose, merge_score = p_merge_fast( 102 | preds_pick[j], ori_pose_preds[merge_id], ori_pose_scores[merge_id], ref_dists[pick[j]]) 103 | 104 | max_score = torch.max(merge_score[ids]) 105 | if max_score < scoreThreds: 106 | continue 107 | 108 | xmax = max(merge_pose[:, 0]) 109 | xmin = min(merge_pose[:, 0]) 110 | ymax = max(merge_pose[:, 1]) 111 | ymin = min(merge_pose[:, 1]) 112 | 113 | if 1.5 ** 2 * (xmax - xmin) * (ymax - ymin) < areaThres: 114 | continue 115 | 116 | final_result.append({ 117 | 'bbox': bboxs_pick[j], 118 | 'bbox_score': bbox_scores_pick[j], 119 | 'keypoints': merge_pose - 0.3, 120 | 'kp_score': merge_score, 121 | 'proposal_score': torch.mean(merge_score) + bbox_scores_pick[j] + 1.25 * max(merge_score) 122 | }) 123 | 124 | return final_result 125 | 126 | 127 | def filter_result(args): 128 | score_pick, merge_id, pred_pick, pick, bbox_score_pick = args 129 | global ori_pose_preds, ori_pose_scores, ref_dists 130 | ids = np.arange(17) 131 | max_score = torch.max(score_pick[ids, 0]) 132 | 133 | if max_score < scoreThreds: 134 | return None 135 | 136 | # Merge poses 137 | merge_pose, merge_score = p_merge_fast( 138 | pred_pick, ori_pose_preds[merge_id], ori_pose_scores[merge_id], ref_dists[pick]) 139 | 140 | max_score = torch.max(merge_score[ids]) 141 | if max_score < scoreThreds: 142 | return None 143 | 144 | xmax = max(merge_pose[:, 0]) 145 | xmin = min(merge_pose[:, 0]) 146 | ymax = max(merge_pose[:, 1]) 147 | ymin = min(merge_pose[:, 1]) 148 | 149 | if 1.5 ** 2 * (xmax - xmin) * (ymax - ymin) < 40 * 40.5: 150 | return None 151 | 152 | return { 153 | 'keypoints': merge_pose - 0.3, 154 | 'kp_score': merge_score, 155 | 'proposal_score': torch.mean(merge_score) + bbox_score_pick + 1.25 * max(merge_score) 156 | } 157 | 158 | 159 | def p_merge(ref_pose, cluster_preds, cluster_scores, ref_dist): 160 | """ 161 | Score-weighted pose merging 162 | INPUT: 163 | ref_pose: reference pose -- [17, 2] 164 | cluster_preds: redundant poses -- [n, 17, 2] 165 | cluster_scores: redundant poses score -- [n, 17, 1] 166 | ref_dist: reference scale -- Constant 167 | OUTPUT: 168 | final_pose: merged pose -- [17, 2] 169 | final_score: merged score -- [17] 170 | """ 171 | dist = torch.sqrt(torch.sum( 172 | torch.pow(ref_pose[np.newaxis, :] - cluster_preds, 2), 173 | dim=2 174 | )) # [n, 17] 175 | 176 | kp_num = 17 177 | ref_dist = min(ref_dist, 15) 178 | 179 | mask = (dist <= ref_dist) 180 | final_pose = torch.zeros(kp_num, 2) 181 | final_score = torch.zeros(kp_num) 182 | 183 | if cluster_preds.dim() == 2: 184 | cluster_preds.unsqueeze_(0) 185 | cluster_scores.unsqueeze_(0) 186 | if mask.dim() == 1: 187 | mask.unsqueeze_(0) 188 | 189 | for i in range(kp_num): 190 | cluster_joint_scores = cluster_scores[:, i][mask[:, i]] # [k, 1] 191 | cluster_joint_location = cluster_preds[:, i, :][mask[:, i].unsqueeze( 192 | -1).repeat(1, 2)].view((torch.sum(mask[:, i]), -1)) 193 | 194 | # Get an normalized score 195 | normed_scores = cluster_joint_scores / torch.sum(cluster_joint_scores) 196 | 197 | # Merge poses by a weighted sum 198 | final_pose[i, 0] = torch.dot(cluster_joint_location[:, 0], normed_scores.squeeze(-1)) 199 | final_pose[i, 1] = torch.dot(cluster_joint_location[:, 1], normed_scores.squeeze(-1)) 200 | 201 | final_score[i] = torch.dot(cluster_joint_scores.transpose(0, 1).squeeze(0), normed_scores.squeeze(-1)) 202 | 203 | return final_pose, final_score 204 | 205 | 206 | def p_merge_fast(ref_pose, cluster_preds, cluster_scores, ref_dist): 207 | """ 208 | Score-weighted pose merging 209 | INPUT: 210 | ref_pose: reference pose -- [17, 2] 211 | cluster_preds: redundant poses -- [n, 17, 2] 212 | cluster_scores: redundant poses score -- [n, 17, 1] 213 | ref_dist: reference scale -- Constant 214 | OUTPUT: 215 | final_pose: merged pose -- [17, 2] 216 | final_score: merged score -- [17] 217 | """ 218 | dist = torch.sqrt(torch.sum( 219 | torch.pow(ref_pose[np.newaxis, :] - cluster_preds, 2), 220 | dim=2 221 | )) 222 | 223 | kp_num = 17 224 | ref_dist = min(ref_dist, 15) 225 | 226 | mask = (dist <= ref_dist) 227 | final_pose = torch.zeros(kp_num, 2) 228 | final_score = torch.zeros(kp_num) 229 | 230 | if cluster_preds.dim() == 2: 231 | cluster_preds.unsqueeze_(0) 232 | cluster_scores.unsqueeze_(0) 233 | if mask.dim() == 1: 234 | mask.unsqueeze_(0) 235 | 236 | # Weighted Merge 237 | masked_scores = cluster_scores.mul(mask.float().unsqueeze(-1)) 238 | normed_scores = masked_scores / torch.sum(masked_scores, dim=0) 239 | 240 | final_pose = torch.mul(cluster_preds, normed_scores.repeat(1, 1, 2)).sum(dim=0) 241 | final_score = torch.mul(masked_scores, normed_scores).sum(dim=0) 242 | return final_pose, final_score 243 | 244 | 245 | def get_parametric_distance(i, all_preds, keypoint_scores, ref_dist): 246 | pick_preds = all_preds[i] 247 | pred_scores = keypoint_scores[i] 248 | dist = torch.sqrt(torch.sum( 249 | torch.pow(pick_preds[np.newaxis, :] - all_preds, 2), 250 | dim=2 251 | )) 252 | mask = (dist <= 1) 253 | 254 | # Define a keypoints distance 255 | score_dists = torch.zeros(all_preds.shape[0], all_preds.shape[1]) 256 | keypoint_scores.squeeze_() 257 | if keypoint_scores.dim() == 1: 258 | keypoint_scores.unsqueeze_(0) 259 | if pred_scores.dim() == 1: 260 | pred_scores.unsqueeze_(1) 261 | # The predicted scores are repeated up to do broadcast 262 | pred_scores = pred_scores.repeat(1, all_preds.shape[0]).transpose(0, 1) 263 | 264 | score_dists[mask] = torch.tanh(pred_scores[mask] / delta1) *\ 265 | torch.tanh(keypoint_scores[mask] / delta1) 266 | 267 | point_dist = torch.exp((-1) * dist / delta2) 268 | final_dist = torch.sum(score_dists, dim=1) + mu * torch.sum(point_dist, dim=1) 269 | 270 | return final_dist 271 | 272 | 273 | def PCK_match(pick_pred, all_preds, ref_dist): 274 | dist = torch.sqrt(torch.sum( 275 | torch.pow(pick_pred[np.newaxis, :] - all_preds, 2), 276 | dim=2 277 | )) 278 | ref_dist = min(ref_dist, 7) 279 | num_match_keypoints = torch.sum( 280 | dist / ref_dist <= 1, 281 | dim=1 282 | ) 283 | 284 | return num_match_keypoints 285 | -------------------------------------------------------------------------------- /pose_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def normalize_points_with_size(xy, width, height, flip=False): 5 | """Normalize scale points in image with size of image to (0-1). 6 | xy : (frames, parts, xy) or (parts, xy) 7 | """ 8 | if xy.ndim == 2: 9 | xy = np.expand_dims(xy, 0) 10 | xy[:, :, 0] /= width 11 | xy[:, :, 1] /= height 12 | if flip: 13 | xy[:, :, 0] = 1 - xy[:, :, 0] 14 | return xy 15 | 16 | 17 | def scale_pose(xy): 18 | """Normalize pose points by scale with max/min value of each pose. 19 | xy : (frames, parts, xy) or (parts, xy) 20 | """ 21 | if xy.ndim == 2: 22 | xy = np.expand_dims(xy, 0) 23 | xy_min = np.nanmin(xy, axis=1) 24 | xy_max = np.nanmax(xy, axis=1) 25 | for i in range(xy.shape[0]): 26 | xy[i] = ((xy[i] - xy_min[i]) / (xy_max[i] - xy_min[i])) * 2 - 1 27 | return xy.squeeze() 28 | -------------------------------------------------------------------------------- /sample1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GajuuzZ/Human-Falling-Detect-Tracks/7ed2faa4d6147dfd576f58869b6c25545208af35/sample1.gif --------------------------------------------------------------------------------