├── ActionsEstLoader.py
├── Actionsrecognition
    ├── Models.py
    ├── Utils.py
    └── train.py
├── App.py
├── CameraLoader.py
├── Data
    ├── create_dataset_1.py
    ├── create_dataset_2.py
    └── create_dataset_3.py
├── Detection
    ├── Models.py
    └── Utils.py
├── DetectorLoader.py
├── Models
    ├── TSSTG
    │   └── _.txt
    ├── sppe
    │   └── _.txt
    └── yolo-tiny-onecls
    │   └── _.txt
├── PoseEstimateLoader.py
├── README.md
├── SPPE
    ├── LICENSE
    ├── README.md
    └── src
    │   ├── main_fast_inference.py
    │   ├── models
    │       ├── FastPose.py
    │       ├── __init__.py
    │       ├── hg-prm.py
    │       ├── hgPRM.py
    │       └── layers
    │       │   ├── DUC.py
    │       │   ├── PRM.py
    │       │   ├── Residual.py
    │       │   ├── Resnet.py
    │       │   ├── SE_Resnet.py
    │       │   ├── SE_module.py
    │       │   ├── __init__.py
    │       │   └── util_models.py
    │   ├── opt.py
    │   └── utils
    │       ├── __init__.py
    │       ├── dataset
    │           ├── .coco.py.swp
    │           ├── __init__.py
    │           ├── coco.py
    │           ├── fuse.py
    │           └── mpii.py
    │       ├── eval.py
    │       ├── img.py
    │       └── pose.py
├── Track
    ├── Tracker.py
    ├── iou_matching.py
    ├── kalman_filter.py
    └── linear_assignment.py
├── Visualizer.py
├── fn.py
├── main.py
├── pPose_nms.py
├── pose_utils.py
└── sample1.gif


/ActionsEstLoader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import numpy as np
 4 | 
 5 | from Actionsrecognition.Models import TwoStreamSpatialTemporalGraph
 6 | from pose_utils import normalize_points_with_size, scale_pose
 7 | 
 8 | 
 9 | class TSSTG(object):
10 |     """Two-Stream Spatial Temporal Graph Model Loader.
11 |     Args:
12 |         weight_file: (str) Path to trained weights file.
13 |         device: (str) Device to load the model on 'cpu' or 'cuda'.
14 |     """
15 |     def __init__(self,
16 |                  weight_file='./Models/TSSTG/tsstg-model.pth',
17 |                  device='cuda'):
18 |         self.graph_args = {'strategy': 'spatial'}
19 |         self.class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
20 |                             'Stand up', 'Sit down', 'Fall Down']
21 |         self.num_class = len(self.class_names)
22 |         self.device = device
23 | 
24 |         self.model = TwoStreamSpatialTemporalGraph(self.graph_args, self.num_class).to(self.device)
25 |         self.model.load_state_dict(torch.load(weight_file))
26 |         self.model.eval()
27 | 
28 |     def predict(self, pts, image_size):
29 |         """Predict actions from single person skeleton points and score in time sequence.
30 |         Args:
31 |             pts: (numpy array) points and score in shape `(t, v, c)` where
32 |                 t : inputs sequence (time steps).,
33 |                 v : number of graph node (body parts).,
34 |                 c : channel (x, y, score).,
35 |             image_size: (tuple of int) width, height of image frame.
36 |         Returns:
37 |             (numpy array) Probability of each class actions.
38 |         """
39 |         pts[:, :, :2] = normalize_points_with_size(pts[:, :, :2], image_size[0], image_size[1])
40 |         pts[:, :, :2] = scale_pose(pts[:, :, :2])
41 |         pts = np.concatenate((pts, np.expand_dims((pts[:, 1, :] + pts[:, 2, :]) / 2, 1)), axis=1)
42 | 
43 |         pts = torch.tensor(pts, dtype=torch.float32)
44 |         pts = pts.permute(2, 0, 1)[None, :]
45 | 
46 |         mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :]
47 |         mot = mot.to(self.device)
48 |         pts = pts.to(self.device)
49 | 
50 |         out = self.model((pts, mot))
51 | 
52 |         return out.detach().cpu().numpy()
53 | 


--------------------------------------------------------------------------------
/Actionsrecognition/Models.py:
--------------------------------------------------------------------------------
  1 | ### Reference from: https://github.com/yysijie/st-gcn/tree/master/net
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import numpy as np
  7 | 
  8 | from Actionsrecognition.Utils import Graph
  9 | 
 10 | 
 11 | class GraphConvolution(nn.Module):
 12 |     """The basic module for applying a graph convolution.
 13 |     Args:
 14 |         - in_channel: (int) Number of channels in the input sequence data.
 15 |         - out_channels: (int) Number of channels produced by the convolution.
 16 |         - kernel_size: (int) Size of the graph convolving kernel.
 17 |         - t_kernel_size: (int) Size of the temporal convolving kernel.
 18 |         - t_stride: (int, optional) Stride of the temporal convolution. Default: 1
 19 |         - t_padding: (int, optional) Temporal zero-padding added to both sides of
 20 |             the input. Default: 0
 21 |         - t_dilation: (int, optional) Spacing between temporal kernel elements. Default: 1
 22 |         - bias: (bool, optional) If `True`, adds a learnable bias to the output.
 23 |             Default: `True`
 24 |     Shape:
 25 |         - Inputs x: Graph sequence in :math:`(N, in_channels, T_{in}, V)`,
 26 |                  A: Graph adjacency matrix in :math:`(K, V, V)`,
 27 |         - Output: Graph sequence out in :math:`(N, out_channels, T_{out}, V)`
 28 | 
 29 |             where
 30 |                 :math:`N` is a batch size,
 31 |                 :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
 32 |                 :math:`T_{in}/T_{out}` is a length of input/output sequence,
 33 |                 :math:`V` is the number of graph nodes.
 34 | 
 35 |     """
 36 |     def __init__(self, in_channels, out_channels, kernel_size,
 37 |                  t_kernel_size=1,
 38 |                  t_stride=1,
 39 |                  t_padding=0,
 40 |                  t_dilation=1,
 41 |                  bias=True):
 42 |         super().__init__()
 43 | 
 44 |         self.kernel_size = kernel_size
 45 |         self.conv = nn.Conv2d(in_channels,
 46 |                               out_channels * kernel_size,
 47 |                               kernel_size=(t_kernel_size, 1),
 48 |                               padding=(t_padding, 0),
 49 |                               stride=(t_stride, 1),
 50 |                               dilation=(t_dilation, 1),
 51 |                               bias=bias)
 52 | 
 53 |     def forward(self, x, A):
 54 |         x = self.conv(x)
 55 |         n, kc, t, v = x.size()
 56 |         x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v)
 57 |         x = torch.einsum('nkctv,kvw->nctw', (x, A))
 58 | 
 59 |         return x.contiguous()
 60 | 
 61 | 
 62 | class st_gcn(nn.Module):
 63 |     """Applies a spatial temporal graph convolution over an input graph sequence.
 64 |     Args:
 65 |         - in_channels: (int) Number of channels in the input sequence data.
 66 |         - out_channels: (int) Number of channels produced by the convolution.
 67 |         - kernel_size: (tuple) Size of the temporal convolving kernel and
 68 |             graph convolving kernel.
 69 |         - stride: (int, optional) Stride of the temporal convolution. Default: 1
 70 |         - dropout: (int, optional) Dropout rate of the final output. Default: 0
 71 |         - residual: (bool, optional) If `True`, applies a residual mechanism.
 72 |             Default: `True`
 73 |     Shape:
 74 |         - Inputs x: Graph sequence in :math: `(N, in_channels, T_{in}, V)`,
 75 |                  A: Graph Adjecency matrix in :math: `(K, V, V)`,
 76 |         - Output: Graph sequence out in :math: `(N, out_channels, T_{out}, V)`
 77 |             where
 78 |                 :math:`N` is a batch size,
 79 |                 :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
 80 |                 :math:`T_{in}/T_{out}` is a length of input/output sequence,
 81 |                 :math:`V` is the number of graph nodes.
 82 |     """
 83 |     def __init__(self, in_channels, out_channels, kernel_size,
 84 |                  stride=1,
 85 |                  dropout=0,
 86 |                  residual=True):
 87 |         super().__init__()
 88 |         assert len(kernel_size) == 2
 89 |         assert kernel_size[0] % 2 == 1
 90 | 
 91 |         padding = ((kernel_size[0] - 1) // 2, 0)
 92 | 
 93 |         self.gcn = GraphConvolution(in_channels, out_channels, kernel_size[1])
 94 |         self.tcn = nn.Sequential(nn.BatchNorm2d(out_channels),
 95 |                                  nn.ReLU(inplace=True),
 96 |                                  nn.Conv2d(out_channels,
 97 |                                            out_channels,
 98 |                                            (kernel_size[0], 1),
 99 |                                            (stride, 1),
100 |                                            padding),
101 |                                  nn.BatchNorm2d(out_channels),
102 |                                  nn.Dropout(dropout, inplace=True)
103 |                                  )
104 | 
105 |         if not residual:
106 |             self.residual = lambda x: 0
107 |         elif (in_channels == out_channels) and (stride == 1):
108 |             self.residual = lambda x: x
109 |         else:
110 |             self.residual = nn.Sequential(nn.Conv2d(in_channels,
111 |                                                     out_channels,
112 |                                                     kernel_size=1,
113 |                                                     stride=(stride, 1)),
114 |                                           nn.BatchNorm2d(out_channels)
115 |                                           )
116 |         self.relu = nn.ReLU(inplace=True)
117 | 
118 |     def forward(self, x, A):
119 |         res = self.residual(x)
120 |         x = self.gcn(x, A)
121 |         x = self.tcn(x) + res
122 | 
123 |         return self.relu(x)
124 | 
125 | 
126 | class StreamSpatialTemporalGraph(nn.Module):
127 |     """Spatial temporal graph convolutional networks.
128 |     Args:
129 |         - in_channels: (int) Number of input channels.
130 |         - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
131 |         - num_class: (int) Number of class outputs. If `None` return pooling features of
132 |             the last st-gcn layer instead.
133 |         - edge_importance_weighting: (bool) If `True`, adds a learnable importance
134 |             weighting to the edges of the graph.
135 |         - **kwargs: (optional) Other parameters for graph convolution units.
136 |     Shape:
137 |         - Input: :math:`(N, in_channels, T_{in}, V_{in})`
138 |         - Output: :math:`(N, num_class)` where
139 |             :math:`N` is a batch size,
140 |             :math:`T_{in}` is a length of input sequence,
141 |             :math:`V_{in}` is the number of graph nodes,
142 |         or If num_class is `None`: `(N, out_channels)`
143 |             :math:`out_channels` is number of out_channels of the last layer.
144 |     """
145 |     def __init__(self, in_channels, graph_args, num_class=None,
146 |                  edge_importance_weighting=True, **kwargs):
147 |         super().__init__()
148 |         # Load graph.
149 |         graph = Graph(**graph_args)
150 |         A = torch.tensor(graph.A, dtype=torch.float32, requires_grad=False)
151 |         self.register_buffer('A', A)
152 | 
153 |         # Networks.
154 |         spatial_kernel_size = A.size(0)
155 |         temporal_kernel_size = 9
156 |         kernel_size = (temporal_kernel_size, spatial_kernel_size)
157 |         kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}
158 | 
159 |         self.data_bn = nn.BatchNorm1d(in_channels * A.size(1))
160 |         self.st_gcn_networks = nn.ModuleList((
161 |             st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0),
162 |             st_gcn(64, 64, kernel_size, 1, **kwargs),
163 |             st_gcn(64, 64, kernel_size, 1, **kwargs),
164 |             st_gcn(64, 64, kernel_size, 1, **kwargs),
165 |             st_gcn(64, 128, kernel_size, 2, **kwargs),
166 |             st_gcn(128, 128, kernel_size, 1, **kwargs),
167 |             st_gcn(128, 128, kernel_size, 1, **kwargs),
168 |             st_gcn(128, 256, kernel_size, 2, **kwargs),
169 |             st_gcn(256, 256, kernel_size, 1, **kwargs),
170 |             st_gcn(256, 256, kernel_size, 1, **kwargs)
171 |         ))
172 | 
173 |         # initialize parameters for edge importance weighting.
174 |         if edge_importance_weighting:
175 |             self.edge_importance = nn.ParameterList([
176 |                 nn.Parameter(torch.ones(A.size()))
177 |                 for i in self.st_gcn_networks
178 |             ])
179 |         else:
180 |             self.edge_importance = [1] * len(self.st_gcn_networks)
181 | 
182 |         if num_class is not None:
183 |             self.cls = nn.Conv2d(256, num_class, kernel_size=1)
184 |         else:
185 |             self.cls = lambda x: x
186 | 
187 |     def forward(self, x):
188 |         # data normalization.
189 |         N, C, T, V = x.size()
190 |         x = x.permute(0, 3, 1, 2).contiguous()  # (N, V, C, T)
191 |         x = x.view(N, V * C, T)
192 |         x = self.data_bn(x)
193 |         x = x.view(N, V, C, T)
194 |         x = x.permute(0, 2, 3, 1).contiguous()
195 |         x = x.view(N, C, T, V)
196 | 
197 |         # forward.
198 |         for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
199 |             x = gcn(x, self.A * importance)
200 | 
201 |         x = F.avg_pool2d(x, x.size()[2:])
202 |         x = self.cls(x)
203 |         x = x.view(x.size(0), -1)
204 | 
205 |         return x
206 | 
207 | 
208 | class TwoStreamSpatialTemporalGraph(nn.Module):
209 |     """Two inputs spatial temporal graph convolutional networks.
210 |     Args:
211 |         - graph_args: (dict) Args map of `Actionsrecognition.Utils.Graph` Class.
212 |         - num_class: (int) Number of class outputs.
213 |         - edge_importance_weighting: (bool) If `True`, adds a learnable importance
214 |             weighting to the edges of the graph.
215 |         - **kwargs: (optional) Other parameters for graph convolution units.
216 |     Shape:
217 |         - Input: :tuple of math:`((N, 3, T, V), (N, 2, T, V))`
218 |         for points and motions stream where.
219 |             :math:`N` is a batch size,
220 |             :math:`in_channels` is data channels (3 is (x, y, score)), (2 is (mot_x, mot_y))
221 |             :math:`T` is a length of input sequence,
222 |             :math:`V` is the number of graph nodes,
223 |         - Output: :math:`(N, num_class)`
224 |     """
225 |     def __init__(self, graph_args, num_class, edge_importance_weighting=True,
226 |                  **kwargs):
227 |         super().__init__()
228 |         self.pts_stream = StreamSpatialTemporalGraph(3, graph_args, None,
229 |                                                      edge_importance_weighting,
230 |                                                      **kwargs)
231 |         self.mot_stream = StreamSpatialTemporalGraph(2, graph_args, None,
232 |                                                      edge_importance_weighting,
233 |                                                      **kwargs)
234 | 
235 |         self.fcn = nn.Linear(256 * 2, num_class)
236 | 
237 |     def forward(self, inputs):
238 |         out1 = self.pts_stream(inputs[0])
239 |         out2 = self.mot_stream(inputs[1])
240 | 
241 |         concat = torch.cat([out1, out2], dim=-1)
242 |         out = self.fcn(concat)
243 | 
244 |         return torch.sigmoid(out)
245 | 


--------------------------------------------------------------------------------
/Actionsrecognition/Utils.py:
--------------------------------------------------------------------------------
  1 | ### Reference from: https://github.com/yysijie/st-gcn/blob/master/net/utils/graph.py
  2 | 
  3 | import os
  4 | import torch
  5 | import numpy as np
  6 | 
  7 | 
  8 | class Graph:
  9 |     """The Graph to model the skeletons extracted by the Alpha-Pose.
 10 |     Args:
 11 |         - strategy: (string) must be one of the follow candidates
 12 |             - uniform: Uniform Labeling,
 13 |             - distance: Distance Partitioning,
 14 |             - spatial: Spatial Configuration,
 15 |         For more information, please refer to the section 'Partition Strategies'
 16 |             in our paper (https://arxiv.org/abs/1801.07455).
 17 |         - layout: (string) must be one of the follow candidates
 18 |             - coco_cut: Is COCO format but cut 4 joints (L-R ears, L-R eyes) out.
 19 |         - max_hop: (int) the maximal distance between two connected nodes.
 20 |         - dilation: (int) controls the spacing between the kernel points.
 21 |     """
 22 |     def __init__(self,
 23 |                  layout='coco_cut',
 24 |                  strategy='uniform',
 25 |                  max_hop=1,
 26 |                  dilation=1):
 27 |         self.max_hop = max_hop
 28 |         self.dilation = dilation
 29 | 
 30 |         self.get_edge(layout)
 31 |         self.hop_dis = get_hop_distance(self.num_node, self.edge, max_hop)
 32 |         self.get_adjacency(strategy)
 33 | 
 34 |     def get_edge(self, layout):
 35 |         if layout == 'coco_cut':
 36 |             self.num_node = 14
 37 |             self_link = [(i, i) for i in range(self.num_node)]
 38 |             neighbor_link = [(6, 4), (4, 2), (2, 13), (13, 1), (5, 3), (3, 1), (12, 10),
 39 |                              (10, 8), (8, 2), (11, 9), (9, 7), (7, 1), (13, 0)]
 40 |             self.edge = self_link + neighbor_link
 41 |             self.center = 13
 42 |         else:
 43 |             raise ValueError('This layout is not supported!')
 44 | 
 45 |     def get_adjacency(self, strategy):
 46 |         valid_hop = range(0, self.max_hop + 1, self.dilation)
 47 |         adjacency = np.zeros((self.num_node, self.num_node))
 48 |         for hop in valid_hop:
 49 |             adjacency[self.hop_dis == hop] = 1
 50 |         normalize_adjacency = normalize_digraph(adjacency)
 51 | 
 52 |         if strategy == 'uniform':
 53 |             A = np.zeros((1, self.num_node, self.num_node))
 54 |             A[0] = normalize_adjacency
 55 |             self.A = A
 56 |         elif strategy == 'distance':
 57 |             A = np.zeros((len(valid_hop), self.num_node, self.num_node))
 58 |             for i, hop in enumerate(valid_hop):
 59 |                 A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis ==
 60 |                                                                 hop]
 61 |             self.A = A
 62 |         elif strategy == 'spatial':
 63 |             A = []
 64 |             for hop in valid_hop:
 65 |                 a_root = np.zeros((self.num_node, self.num_node))
 66 |                 a_close = np.zeros((self.num_node, self.num_node))
 67 |                 a_further = np.zeros((self.num_node, self.num_node))
 68 |                 for i in range(self.num_node):
 69 |                     for j in range(self.num_node):
 70 |                         if self.hop_dis[j, i] == hop:
 71 |                             if self.hop_dis[j, self.center] == self.hop_dis[i, self.center]:
 72 |                                 a_root[j, i] = normalize_adjacency[j, i]
 73 |                             elif self.hop_dis[j, self.center] > self.hop_dis[i, self.center]:
 74 |                                 a_close[j, i] = normalize_adjacency[j, i]
 75 |                             else:
 76 |                                 a_further[j, i] = normalize_adjacency[j, i]
 77 |                 if hop == 0:
 78 |                     A.append(a_root)
 79 |                 else:
 80 |                     A.append(a_root + a_close)
 81 |                     A.append(a_further)
 82 |             A = np.stack(A)
 83 |             self.A = A
 84 |             #self.A = np.swapaxes(np.swapaxes(A, 0, 1), 1, 2)
 85 |         else:
 86 |             raise ValueError("This strategy is not supported!")
 87 | 
 88 | 
 89 | def get_hop_distance(num_node, edge, max_hop=1):
 90 |     A = np.zeros((num_node, num_node))
 91 |     for i, j in edge:
 92 |         A[j, i] = 1
 93 |         A[i, j] = 1
 94 | 
 95 |     # compute hop steps
 96 |     hop_dis = np.zeros((num_node, num_node)) + np.inf
 97 |     transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
 98 |     arrive_mat = (np.stack(transfer_mat) > 0)
 99 |     for d in range(max_hop, -1, -1):
100 |         hop_dis[arrive_mat[d]] = d
101 |     return hop_dis
102 | 
103 | 
104 | def normalize_digraph(A):
105 |     Dl = np.sum(A, 0)
106 |     num_node = A.shape[0]
107 |     Dn = np.zeros((num_node, num_node))
108 |     for i in range(num_node):
109 |         if Dl[i] > 0:
110 |             Dn[i, i] = Dl[i]**(-1)
111 |     AD = np.dot(A, Dn)
112 |     return AD
113 | 
114 | 
115 | def normalize_undigraph(A):
116 |     Dl = np.sum(A, 0)
117 |     num_node = A.shape[0]
118 |     Dn = np.zeros((num_node, num_node))
119 |     for i in range(num_node):
120 |         if Dl[i] > 0:
121 |             Dn[i, i] = Dl[i]**(-0.5)
122 |     DAD = np.dot(np.dot(Dn, A), Dn)
123 |     return DAD
124 | 


--------------------------------------------------------------------------------
/Actionsrecognition/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import torch
  4 | import pickle
  5 | import numpy as np
  6 | import torch.nn.functional as F
  7 | from shutil import copyfile
  8 | from tqdm import tqdm
  9 | from torch.utils import data
 10 | from torch.optim.adadelta import Adadelta
 11 | from sklearn.model_selection import train_test_split
 12 | 
 13 | from Actionsrecognition.Models import *
 14 | from Visualizer import plot_graphs, plot_confusion_metrix
 15 | 
 16 | 
 17 | save_folder = 'saved/TSSTG(pts+mot)-01(cf+hm-hm)'
 18 | 
 19 | device = 'cuda'
 20 | epochs = 30
 21 | batch_size = 32
 22 | 
 23 | # DATA FILES.
 24 | # Should be in format of
 25 | #  inputs: (N_samples, time_steps, graph_node, channels),
 26 | #  labels: (N_samples, num_class)
 27 | #   and do some of normalizations on it. Default data create from:
 28 | #       Data.create_dataset_(1-3).py
 29 | # where
 30 | #   time_steps: Number of frame input sequence, Default: 30
 31 | #   graph_node: Number of node in skeleton, Default: 14
 32 | #   channels: Inputs data (x, y and scores), Default: 3
 33 | #   num_class: Number of pose class to train, Default: 7
 34 | 
 35 | data_files = ['../Data/Coffee_room_new-set(labelXscrw).pkl',
 36 |               '../Data/Home_new-set(labelXscrw).pkl']
 37 | class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
 38 |                'Stand up', 'Sit down', 'Fall Down']
 39 | num_class = len(class_names)
 40 | 
 41 | 
 42 | def load_dataset(data_files, batch_size, split_size=0):
 43 |     """Load data files into torch DataLoader with/without spliting train-test.
 44 |     """
 45 |     features, labels = [], []
 46 |     for fil in data_files:
 47 |         with open(fil, 'rb') as f:
 48 |             fts, lbs = pickle.load(f)
 49 |             features.append(fts)
 50 |             labels.append(lbs)
 51 |         del fts, lbs
 52 |     features = np.concatenate(features, axis=0)
 53 |     labels = np.concatenate(labels, axis=0)
 54 | 
 55 |     if split_size > 0:
 56 |         x_train, x_valid, y_train, y_valid = train_test_split(features, labels, test_size=split_size,
 57 |                                                               random_state=9)
 58 |         train_set = data.TensorDataset(torch.tensor(x_train, dtype=torch.float32).permute(0, 3, 1, 2),
 59 |                                        torch.tensor(y_train, dtype=torch.float32))
 60 |         valid_set = data.TensorDataset(torch.tensor(x_valid, dtype=torch.float32).permute(0, 3, 1, 2),
 61 |                                        torch.tensor(y_valid, dtype=torch.float32))
 62 |         train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
 63 |         valid_loader = data.DataLoader(valid_set, batch_size)
 64 |     else:
 65 |         train_set = data.TensorDataset(torch.tensor(features, dtype=torch.float32).permute(0, 3, 1, 2),
 66 |                                        torch.tensor(labels, dtype=torch.float32))
 67 |         train_loader = data.DataLoader(train_set, batch_size, shuffle=True)
 68 |         valid_loader = None
 69 |     return train_loader, valid_loader
 70 | 
 71 | 
 72 | def accuracy_batch(y_pred, y_true):
 73 |     return (y_pred.argmax(1) == y_true.argmax(1)).mean()
 74 | 
 75 | 
 76 | def set_training(model, mode=True):
 77 |     for p in model.parameters():
 78 |         p.requires_grad = mode
 79 |     model.train(mode)
 80 |     return model
 81 | 
 82 | 
 83 | if __name__ == '__main__':
 84 |     save_folder = os.path.join(os.path.dirname(__file__), save_folder)
 85 |     if not os.path.exists(save_folder):
 86 |         os.makedirs(save_folder)
 87 | 
 88 |     # DATA.
 89 |     train_loader, _ = load_dataset(data_files[0:1], batch_size)
 90 |     valid_loader, train_loader_ = load_dataset(data_files[1:2], batch_size, 0.2)
 91 | 
 92 |     train_loader = data.DataLoader(data.ConcatDataset([train_loader.dataset, train_loader_.dataset]),
 93 |                                    batch_size, shuffle=True)
 94 |     dataloader = {'train': train_loader, 'valid': valid_loader}
 95 |     del train_loader_
 96 | 
 97 |     # MODEL.
 98 |     graph_args = {'strategy': 'spatial'}
 99 |     model = TwoStreamSpatialTemporalGraph(graph_args, num_class).to(device)
100 | 
101 |     #optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
102 |     optimizer = Adadelta(model.parameters())
103 | 
104 |     losser = torch.nn.BCELoss()
105 | 
106 |     # TRAINING.
107 |     loss_list = {'train': [], 'valid': []}
108 |     accu_list = {'train': [], 'valid': []}
109 |     for e in range(epochs):
110 |         print('Epoch {}/{}'.format(e, epochs - 1))
111 |         for phase in ['train', 'valid']:
112 |             if phase == 'train':
113 |                 model = set_training(model, True)
114 |             else:
115 |                 model = set_training(model, False)
116 | 
117 |             run_loss = 0.0
118 |             run_accu = 0.0
119 |             with tqdm(dataloader[phase], desc=phase) as iterator:
120 |                 for pts, lbs in iterator:
121 |                     # Create motion input by distance of points (x, y) of the same node
122 |                     # in two frames.
123 |                     mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :]
124 | 
125 |                     mot = mot.to(device)
126 |                     pts = pts.to(device)
127 |                     lbs = lbs.to(device)
128 | 
129 |                     # Forward.
130 |                     out = model((pts, mot))
131 |                     loss = losser(out, lbs)
132 | 
133 |                     if phase == 'train':
134 |                         # Backward.
135 |                         model.zero_grad()
136 |                         loss.backward()
137 |                         optimizer.step()
138 | 
139 |                     run_loss += loss.item()
140 |                     accu = accuracy_batch(out.detach().cpu().numpy(),
141 |                                           lbs.detach().cpu().numpy())
142 |                     run_accu += accu
143 | 
144 |                     iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format(
145 |                         loss.item(), accu))
146 |                     iterator.update()
147 |                     #break
148 |             loss_list[phase].append(run_loss / len(iterator))
149 |             accu_list[phase].append(run_accu / len(iterator))
150 |             #break
151 | 
152 |         print('Summary epoch:\n - Train loss: {:.4f}, accu: {:.4f}\n - Valid loss:'
153 |               ' {:.4f}, accu: {:.4f}'.format(loss_list['train'][-1], accu_list['train'][-1],
154 |                                              loss_list['valid'][-1], accu_list['valid'][-1]))
155 | 
156 |         # SAVE.
157 |         torch.save(model.state_dict(), os.path.join(save_folder, 'tsstg-model.pth'))
158 | 
159 |         plot_graphs(list(loss_list.values()), list(loss_list.keys()),
160 |                     'Last Train: {:.2f}, Valid: {:.2f}'.format(
161 |                         loss_list['train'][-1], loss_list['valid'][-1]
162 |                     ), 'Loss', xlim=[0, epochs],
163 |                     save=os.path.join(save_folder, 'loss_graph.png'))
164 |         plot_graphs(list(accu_list.values()), list(accu_list.keys()),
165 |                     'Last Train: {:.2f}, Valid: {:.2f}'.format(
166 |                         accu_list['train'][-1], accu_list['valid'][-1]
167 |                     ), 'Accu', xlim=[0, epochs],
168 |                     save=os.path.join(save_folder, 'accu_graph.png'))
169 | 
170 |         #break
171 | 
172 |     del train_loader, valid_loader
173 | 
174 |     model.load_state_dict(torch.load(os.path.join(save_folder, 'tsstg-model.pth')))
175 | 
176 |     # EVALUATION.
177 |     model = set_training(model, False)
178 |     data_file = data_files[1]
179 |     eval_loader, _ = load_dataset([data_file], 32)
180 | 
181 |     print('Evaluation.')
182 |     run_loss = 0.0
183 |     run_accu = 0.0
184 |     y_preds = []
185 |     y_trues = []
186 |     with tqdm(eval_loader, desc='eval') as iterator:
187 |         for pts, lbs in iterator:
188 |             mot = pts[:, :2, 1:, :] - pts[:, :2, :-1, :]
189 |             mot = mot.to(device)
190 |             pts = pts.to(device)
191 |             lbs = lbs.to(device)
192 | 
193 |             out = model((pts, mot))
194 |             loss = losser(out, lbs)
195 | 
196 |             run_loss += loss.item()
197 |             accu = accuracy_batch(out.detach().cpu().numpy(),
198 |                                   lbs.detach().cpu().numpy())
199 |             run_accu += accu
200 | 
201 |             y_preds.extend(out.argmax(1).detach().cpu().numpy())
202 |             y_trues.extend(lbs.argmax(1).cpu().numpy())
203 | 
204 |             iterator.set_postfix_str(' loss: {:.4f}, accu: {:.4f}'.format(
205 |                 loss.item(), accu))
206 |             iterator.update()
207 | 
208 |     run_loss = run_loss / len(iterator)
209 |     run_accu = run_accu / len(iterator)
210 | 
211 |     plot_confusion_metrix(y_trues, y_preds, class_names, 'Eval on: {}\nLoss: {:.4f}, Accu{:.4f}'.format(
212 |         os.path.basename(data_file), run_loss, run_accu
213 |     ), 'true', save=os.path.join(save_folder, '{}-confusion_matrix.png'.format(
214 |         os.path.basename(data_file).split('.')[0])))
215 | 
216 |     print('Eval Loss: {:.4f}, Accu: {:.4f}'.format(run_loss, run_accu))
217 | 


--------------------------------------------------------------------------------
/App.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import time
  4 | import torch
  5 | import screeninfo
  6 | import numpy as np
  7 | import tkinter as tk
  8 | import matplotlib.pyplot as plt
  9 | from PIL import Image, ImageTk
 10 | 
 11 | from Detection.Utils import ResizePadding
 12 | from CameraLoader import CamLoader, CamLoader_Q
 13 | from DetectorLoader import TinyYOLOv3_onecls
 14 | 
 15 | from PoseEstimateLoader import SPPE_FastPose
 16 | from fn import draw_single
 17 | 
 18 | from Track.Tracker import Detection, Tracker
 19 | from ActionsEstLoader import TSSTG
 20 | 
 21 | import matplotlib
 22 | matplotlib.use('TkAgg')
 23 | import matplotlib.pyplot as plt
 24 | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk
 25 | 
 26 | 
 27 | def get_monitor_from_coord(x, y):  # multiple monitor dealing.
 28 |     monitors = screeninfo.get_monitors()
 29 |     for m in reversed(monitors):
 30 |         if m.x <= x <= m.width + m.x and m.y <= y <= m.height + m.y:
 31 |             return m
 32 |     return monitors[0]
 33 | 
 34 | 
 35 | class Models:
 36 |     def __init__(self):
 37 |         self.inp_dets = 416
 38 |         self.inp_pose = (256, 192)
 39 |         self.pose_backbone = 'resnet50'
 40 |         self.show_detected = True
 41 |         self.show_skeleton = True
 42 |         self.device = 'cuda'
 43 | 
 44 |         self.load_models()
 45 | 
 46 |     def load_models(self):
 47 |         self.detect_model = TinyYOLOv3_onecls(self.inp_dets, device=self.device)
 48 |         self.pose_model = SPPE_FastPose(self.pose_backbone, self.inp_pose[0], self.inp_pose[1],
 49 |                                         device=self.device)
 50 |         self.tracker = Tracker(30, n_init=3)
 51 |         self.action_model = TSSTG(device=self.device)
 52 | 
 53 |     def kpt2bbox(self, kpt, ex=20):
 54 |         return np.array((kpt[:, 0].min() - ex, kpt[:, 1].min() - ex,
 55 |                          kpt[:, 0].max() + ex, kpt[:, 1].max() + ex))
 56 | 
 57 |     def process_frame(self, frame):
 58 |         detected = self.detect_model.detect(frame, need_resize=False, expand_bb=10)
 59 | 
 60 |         self.tracker.predict()
 61 |         for track in self.tracker.tracks:
 62 |             det = torch.tensor([track.to_tlbr().tolist() + [1.0, 1.0, 0.0]], dtype=torch.float32)
 63 |             detected = torch.cat([detected, det], dim=0) if detected is not None else det
 64 | 
 65 |         detections = []
 66 |         if detected is not None:
 67 |             poses = self.pose_model.predict(frame, detected[:, 0:4], detected[:, 4])
 68 |             detections = [Detection(self.kpt2bbox(ps['keypoints'].numpy()),
 69 |                                     np.concatenate((ps['keypoints'].numpy(),
 70 |                                                     ps['kp_score'].numpy()), axis=1),
 71 |                                     ps['kp_score'].mean().numpy()) for ps in poses]
 72 |             if self.show_detected:
 73 |                 for bb in detected[:, 0:5]:
 74 |                     frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 0, 255), 1)
 75 | 
 76 |         self.tracker.update(detections)
 77 |         for i, track in enumerate(self.tracker.tracks):
 78 |             if not track.is_confirmed():
 79 |                 continue
 80 |             track_id = track.track_id
 81 |             bbox = track.to_tlbr().astype(int)
 82 |             center = track.get_center().astype(int)
 83 | 
 84 |             action = 'pending..'
 85 |             clr = (0, 255, 0)
 86 |             if len(track.keypoints_list) == 30:
 87 |                 pts = np.array(track.keypoints_list, dtype=np.float32)
 88 |                 out = self.action_model.predict(pts, frame.shape[:2])
 89 |                 action_name = self.action_model.class_names[out[0].argmax()]
 90 |                 action = '{}: {:.2f}%'.format(action_name, out[0].max() * 100)
 91 |                 if action_name == 'Fall Down':
 92 |                     clr = (255, 0, 0)
 93 |                 elif action_name == 'Lying Down':
 94 |                     clr = (255, 200, 0)
 95 | 
 96 |                 track.actions = out
 97 | 
 98 |             if track.time_since_update == 0:
 99 |                 if self.show_skeleton:
100 |                     frame = draw_single(frame, track.keypoints_list[-1])
101 |                 frame = cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 1)
102 |                 frame = cv2.putText(frame, str(track_id), (center[0], center[1]), cv2.FONT_HERSHEY_DUPLEX,
103 |                                     0.4, (255, 0, 0), 2)
104 |                 frame = cv2.putText(frame, action, (bbox[0] + 5, bbox[1] + 15), cv2.FONT_HERSHEY_COMPLEX,
105 |                                     0.4, clr, 1)
106 | 
107 |         return frame
108 | 
109 | 
110 | class main:
111 |     def __init__(self, master: tk.Tk):
112 |         self.master = master
113 |         self.master.title('Human Falling Detection')
114 |         self.master.protocol('WM_DELETE_WINDOW', self._on_closing)
115 |         self.main_screen = get_monitor_from_coord(master.winfo_x(), master.winfo_y())
116 | 
117 |         self.width = int(self.main_screen.width * .85)
118 |         self.height = int(self.main_screen.height * .85)
119 |         self.master.geometry('{}x{}'.format(self.width, self.height + 15))
120 | 
121 |         self.cam = None
122 |         self.canvas = tk.Canvas(master, width=int(self.width * .65), height=self.height)
123 |         self.canvas.grid(row=0, column=0, padx=5, pady=5, sticky=tk.NSEW)
124 | 
125 |         fig = plt.Figure(figsize=(6, 8), dpi=100)
126 |         fig.suptitle('Actions')
127 |         self.ax = fig.add_subplot(111)
128 |         self.fig_canvas = FigureCanvasTkAgg(fig, self.master)
129 |         self.fig_canvas.get_tk_widget().grid(row=0, column=1, padx=5, pady=5, sticky=tk.NSEW)
130 | 
131 |         # Load Models
132 |         self.resize_fn = ResizePadding(416, 416)
133 |         self.models = Models()
134 | 
135 |         self.actions_graph()
136 | 
137 |         self.delay = 15
138 |         self.load_cam('../Data/falldata/Home/Videos/video (1).avi')
139 |         self.update()
140 | 
141 |     def preproc(self, image):
142 |         image = self.resize_fn(image)
143 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
144 |         return image
145 | 
146 |     def load_cam(self, source):
147 |         if self.cam:
148 |             self.cam.__del__()
149 | 
150 |         if type(source) is str and os.path.isfile(source):
151 |             self.cam = CamLoader_Q(source, queue_size=1000, preprocess=self.preproc).start()
152 |         else:
153 |             self.cam = CamLoader(source, preprocess=self.preproc).start()
154 | 
155 |     def actions_graph(self):
156 |         if len(self.models.tracker.tracks) == 0:
157 |             return
158 |         track = self.models.tracker.tracks[0]
159 |         if hasattr(track, 'actions'):
160 |             y_labels = self.models.action_model.class_names
161 |             self.ax.barh(np.arange(len(y_labels)), track.actions)
162 |         self.fig_canvas.draw()
163 | 
164 |     def update(self):
165 |         if self.cam is None:
166 |             return
167 |         if self.cam.grabbed():
168 |             frame = self.cam.getitem()
169 | 
170 |             frame = self.models.process_frame(frame)
171 | 
172 |             frame = cv2.resize(frame, (self.canvas.winfo_width(), self.canvas.winfo_height()),
173 |                                interpolation=cv2.INTER_CUBIC)
174 |             self.photo = ImageTk.PhotoImage(image=Image.fromarray(frame))
175 |             self.canvas.create_image(0, 0, image=self.photo, anchor=tk.NW)
176 |         else:
177 |             self.cam.stop()
178 | 
179 |         self._cam = self.master.after(self.delay, self.update)
180 | 
181 |     def _on_closing(self):
182 |         self.master.after_cancel(self._cam)
183 |         if self.cam:
184 |             self.cam.stop()
185 |             self.cam.__del__()
186 |         self.master.destroy()
187 | 
188 | 
189 | root = tk.Tk()
190 | app = main(root)
191 | root.mainloop()
192 | 


--------------------------------------------------------------------------------
/CameraLoader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import time
  4 | import torch
  5 | import numpy as np
  6 | 
  7 | from queue import Queue
  8 | from threading import Thread, Lock
  9 | 
 10 | 
 11 | class CamLoader:
 12 |     """Use threading to capture a frame from camera for faster frame load.
 13 |     Recommend for camera or webcam.
 14 | 
 15 |     Args:
 16 |         camera: (int, str) Source of camera or video.,
 17 |         preprocess: (Callable function) to process the frame before return.
 18 |     """
 19 |     def __init__(self, camera, preprocess=None, ori_return=False):
 20 |         self.stream = cv2.VideoCapture(camera)
 21 |         assert self.stream.isOpened(), 'Cannot read camera source!'
 22 |         self.fps = self.stream.get(cv2.CAP_PROP_FPS)
 23 |         self.frame_size = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
 24 |                            int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
 25 | 
 26 |         self.stopped = False
 27 |         self.ret = False
 28 |         self.frame = None
 29 |         self.ori_frame = None
 30 |         self.read_lock = Lock()
 31 |         self.ori = ori_return
 32 | 
 33 |         self.preprocess_fn = preprocess
 34 | 
 35 |     def start(self):
 36 |         self.t = Thread(target=self.update, args=())  # , daemon=True)
 37 |         self.t.start()
 38 |         c = 0
 39 |         while not self.ret:
 40 |             time.sleep(0.1)
 41 |             c += 1
 42 |             if c > 20:
 43 |                 self.stop()
 44 |                 raise TimeoutError('Can not get a frame from camera!!!')
 45 |         return self
 46 | 
 47 |     def update(self):
 48 |         while not self.stopped:
 49 |             ret, frame = self.stream.read()
 50 |             self.read_lock.acquire()
 51 |             self.ori_frame = frame.copy()
 52 |             if ret and self.preprocess_fn is not None:
 53 |                 frame = self.preprocess_fn(frame)
 54 | 
 55 |             self.ret, self.frame = ret, frame
 56 |             self.read_lock.release()
 57 | 
 58 |     def grabbed(self):
 59 |         """Return `True` if can read a frame."""
 60 |         return self.ret
 61 | 
 62 |     def getitem(self):
 63 |         self.read_lock.acquire()
 64 |         frame = self.frame.copy()
 65 |         ori_frame = self.ori_frame.copy()
 66 |         self.read_lock.release()
 67 |         if self.ori:
 68 |             return frame, ori_frame
 69 |         else:
 70 |             return frame
 71 | 
 72 |     def stop(self):
 73 |         if self.stopped:
 74 |             return
 75 |         self.stopped = True
 76 |         if self.t.is_alive():
 77 |             self.t.join()
 78 |         self.stream.release()
 79 | 
 80 |     def __del__(self):
 81 |         if self.stream.isOpened():
 82 |             self.stream.release()
 83 | 
 84 |     def __exit__(self, exc_type, exc_val, exc_tb):
 85 |         if self.stream.isOpened():
 86 |             self.stream.release()
 87 | 
 88 | 
 89 | class CamLoader_Q:
 90 |     """Use threading and queue to capture a frame and store to queue for pickup in sequence.
 91 |     Recommend for video file.
 92 | 
 93 |     Args:
 94 |         camera: (int, str) Source of camera or video.,
 95 |         batch_size: (int) Number of batch frame to store in queue. Default: 1,
 96 |         queue_size: (int) Maximum queue size. Default: 256,
 97 |         preprocess: (Callable function) to process the frame before return.
 98 |     """
 99 |     def __init__(self, camera, batch_size=1, queue_size=256, preprocess=None):
100 |         self.stream = cv2.VideoCapture(camera)
101 |         assert self.stream.isOpened(), 'Cannot read camera source!'
102 |         self.fps = self.stream.get(cv2.CAP_PROP_FPS)
103 |         self.frame_size = (int(self.stream.get(cv2.CAP_PROP_FRAME_WIDTH)),
104 |                            int(self.stream.get(cv2.CAP_PROP_FRAME_HEIGHT)))
105 | 
106 |         # Queue for storing each frames.
107 | 
108 |         self.stopped = False
109 |         self.batch_size = batch_size
110 |         self.Q = Queue(maxsize=queue_size)
111 | 
112 |         self.preprocess_fn = preprocess
113 | 
114 |     def start(self):
115 |         t = Thread(target=self.update, args=(), daemon=True).start()
116 |         c = 0
117 |         while not self.grabbed():
118 |             time.sleep(0.1)
119 |             c += 1
120 |             if c > 20:
121 |                 self.stop()
122 |                 raise TimeoutError('Can not get a frame from camera!!!')
123 |         return self
124 | 
125 |     def update(self):
126 |         while not self.stopped:
127 |             if not self.Q.full():
128 |                 frames = []
129 |                 for k in range(self.batch_size):
130 |                     ret, frame = self.stream.read()
131 |                     if not ret:
132 |                         self.stop()
133 |                         return
134 | 
135 |                     if self.preprocess_fn is not None:
136 |                         frame = self.preprocess_fn(frame)
137 | 
138 |                     frames.append(frame)
139 |                     frames = np.stack(frames)
140 |                     self.Q.put(frames)
141 |             else:
142 |                 with self.Q.mutex:
143 |                     self.Q.queue.clear()
144 |             # time.sleep(0.05)
145 | 
146 |     def grabbed(self):
147 |         """Return `True` if can read a frame."""
148 |         return self.Q.qsize() > 0
149 | 
150 |     def getitem(self):
151 |         return self.Q.get().squeeze()
152 | 
153 |     def stop(self):
154 |         if self.stopped:
155 |             return
156 |         self.stopped = True
157 |         self.stream.release()
158 | 
159 |     def __len__(self):
160 |         return self.Q.qsize()
161 | 
162 |     def __del__(self):
163 |         if self.stream.isOpened():
164 |             self.stream.release()
165 | 
166 |     def __exit__(self, exc_type, exc_val, exc_tb):
167 |         if self.stream.isOpened():
168 |             self.stream.release()
169 | 
170 | 
171 | if __name__ == '__main__':
172 |     fps_time = 0
173 | 
174 |     # Using threading.
175 |     cam = CamLoader(0).start()
176 |     while cam.grabbed():
177 |         frames = cam.getitem()
178 | 
179 |         frames = cv2.putText(frames, 'FPS: %f' % (1.0 / (time.time() - fps_time)),
180 |                              (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
181 |         fps_time = time.time()
182 |         cv2.imshow('frame', frames)
183 | 
184 |         if cv2.waitKey(1) & 0xFF == ord('q'):
185 |             break
186 |     cam.stop()
187 |     cv2.destroyAllWindows()
188 | 
189 |     # Normal video capture.
190 |     """cam = cv2.VideoCapture(0)
191 |     while True:
192 |         ret, frame = cam.read()
193 |         if ret:
194 |             #time.sleep(0.05)
195 |             #frame = (cv2.flip(frame, 1) / 255.).astype(np.float)
196 | 
197 |             frame = cv2.putText(frame, 'FPS: %f' % (1.0 / (time.time() - fps_time)),
198 |                                 (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
199 |             fps_time = time.time()
200 |             cv2.imshow('frame', frame)
201 |             if cv2.waitKey(1) & 0xFF == ord('q'):
202 |                 break
203 |     cam.release()
204 |     cv2.destroyAllWindows()"""
205 | 


--------------------------------------------------------------------------------
/Data/create_dataset_1.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This script to create .csv videos frames action annotation file.
 3 | 
 4 | - It will play a video frame by frame control the flow by [a] and [d]
 5 |  to play previos or next frame.
 6 | - Open the annot_file (.csv) and label each frame of video with number
 7 |  of action class.
 8 | """
 9 | 
10 | import os
11 | import cv2
12 | import time
13 | import numpy as np
14 | import pandas as pd
15 | import matplotlib.pyplot as plt
16 | 
17 | class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
18 |                'Stand up', 'Sit down', 'Fall Down']  # label.
19 | 
20 | video_folder = '../Data/falldata/Home/Videos'
21 | annot_file = '../Data/Home_new.csv'
22 | 
23 | index_video_to_play = 0  # Choose video to play.
24 | 
25 | 
26 | def create_csv(folder):
27 |     list_file = sorted(os.listdir(folder))
28 |     cols = ['video', 'frame', 'label']
29 |     df = pd.DataFrame(columns=cols)
30 |     for fil in list_file:
31 |         cap = cv2.VideoCapture(os.path.join(folder, fil))
32 |         frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
33 |         video = np.array([fil] * frames_count)
34 |         frame = np.arange(1, frames_count + 1)
35 |         label = np.array([0] * frames_count)
36 |         rows = np.stack([video, frame, label], axis=1)
37 |         df = df.append(pd.DataFrame(rows, columns=cols),
38 |                        ignore_index=True)
39 |         cap.release()
40 |     df.to_csv(annot_file, index=False)
41 | 
42 | 
43 | if not os.path.exists(annot_file):
44 |     create_csv(video_folder)
45 | 
46 | annot = pd.read_csv(annot_file)
47 | video_list = annot.iloc[:, 0].unique()
48 | video_file = os.path.join(video_folder, video_list[index_video_to_play])
49 | print(os.path.basename(video_file))
50 | 
51 | annot = annot[annot['video'] == video_list[index_video_to_play]].reset_index(drop=True)
52 | frames_idx = annot.iloc[:, 1].tolist()
53 | 
54 | cap = cv2.VideoCapture(video_file)
55 | frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
56 | 
57 | assert frames_count == len(frames_idx), 'frame count not equal! {} and {}'.format(
58 |     len(frames_idx), frames_count
59 | )
60 | 
61 | i = 0
62 | while True:
63 |     cap.set(cv2.CAP_PROP_POS_FRAMES, i)
64 |     ret, frame = cap.read()
65 |     if ret:
66 |         cls_name = class_names[int(annot.iloc[i, -1]) - 1]
67 |         frame = cv2.resize(frame, (0, 0), fx=1.5, fy=1.5)
68 |         frame = cv2.putText(frame, 'Frame: {} Pose: {}'.format(i+1, cls_name),
69 |                             (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
70 |         cv2.imshow('frame', frame)
71 | 
72 |         key = cv2.waitKey(0) & 0xFF
73 |         if key == ord('q'):
74 |             break
75 |         elif key == ord('d'):
76 |             i += 1
77 |             continue
78 |         elif key == ord('a'):
79 |             i -= 1
80 |             continue
81 |     else:
82 |         break
83 | 
84 | cap.release()
85 | cv2.destroyAllWindows()
86 | 


--------------------------------------------------------------------------------
/Data/create_dataset_2.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script to extract skeleton joints position and score.
  3 | 
  4 | - This 'annot_folder' is a action class and bounding box for each frames that came with dataset.
  5 |     Should be in format of [frame_idx, action_cls, xmin, ymin, xmax, ymax]
  6 |         Use for crop a person to use in pose estimation model.
  7 | - If have no annotation file you can leave annot_folder = '' for use Detector model to get the
  8 |     bounding box.
  9 | """
 10 | 
 11 | import os
 12 | import cv2
 13 | import time
 14 | import torch
 15 | import pandas as pd
 16 | import numpy as np
 17 | import torchvision.transforms as transforms
 18 | 
 19 | from DetectorLoader import TinyYOLOv3_onecls
 20 | from PoseEstimateLoader import SPPE_FastPose
 21 | from fn import vis_frame_fast
 22 | 
 23 | save_path = '../../Data/Home_new-pose+score.csv'
 24 | 
 25 | annot_file = '../../Data/Home_new.csv'  # from create_dataset_1.py
 26 | video_folder = '../Data/falldata/Home/Videos'
 27 | annot_folder = '../Data/falldata/Home/Annotation_files'  # bounding box annotation for each frame.
 28 | 
 29 | # DETECTION MODEL.
 30 | detector = TinyYOLOv3_onecls()
 31 | 
 32 | # POSE MODEL.
 33 | inp_h = 320
 34 | inp_w = 256
 35 | pose_estimator = SPPE_FastPose(inp_h, inp_w)
 36 | 
 37 | # with score.
 38 | columns = ['video', 'frame', 'Nose_x', 'Nose_y', 'Nose_s', 'LShoulder_x', 'LShoulder_y', 'LShoulder_s',
 39 |            'RShoulder_x', 'RShoulder_y', 'RShoulder_s', 'LElbow_x', 'LElbow_y', 'LElbow_s', 'RElbow_x',
 40 |            'RElbow_y', 'RElbow_s', 'LWrist_x', 'LWrist_y', 'LWrist_s', 'RWrist_x', 'RWrist_y', 'RWrist_s',
 41 |            'LHip_x', 'LHip_y', 'LHip_s', 'RHip_x', 'RHip_y', 'RHip_s', 'LKnee_x', 'LKnee_y', 'LKnee_s',
 42 |            'RKnee_x', 'RKnee_y', 'RKnee_s', 'LAnkle_x', 'LAnkle_y', 'LAnkle_s', 'RAnkle_x', 'RAnkle_y',
 43 |            'RAnkle_s', 'label']
 44 | 
 45 | 
 46 | def normalize_points_with_size(points_xy, width, height, flip=False):
 47 |     points_xy[:, 0] /= width
 48 |     points_xy[:, 1] /= height
 49 |     if flip:
 50 |         points_xy[:, 0] = 1 - points_xy[:, 0]
 51 |     return points_xy
 52 | 
 53 | 
 54 | annot = pd.read_csv(annot_file)
 55 | vid_list = annot['video'].unique()
 56 | for vid in vid_list:
 57 |     print(f'Process on: {vid}')
 58 |     df = pd.DataFrame(columns=columns)
 59 |     cur_row = 0
 60 | 
 61 |     # Pose Labels.
 62 |     frames_label = annot[annot['video'] == vid].reset_index(drop=True)
 63 | 
 64 |     cap = cv2.VideoCapture(os.path.join(video_folder, vid))
 65 |     frames_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 66 |     frame_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
 67 |                   int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
 68 | 
 69 |     # Bounding Boxs Labels.
 70 |     annot_file = os.path.join(annot_folder, vid.split('.')[0], '.txt')
 71 |     annot = None
 72 |     if os.path.exists(annot_file):
 73 |         annot = pd.read_csv(annot_file, header=None,
 74 |                                   names=['frame_idx', 'class', 'xmin', 'ymin', 'xmax', 'ymax'])
 75 |         annot = annot.dropna().reset_index(drop=True)
 76 | 
 77 |         assert frames_count == len(annot), 'frame count not equal! {} and {}'.format(frames_count, len(annot))
 78 | 
 79 |     fps_time = 0
 80 |     i = 1
 81 |     while True:
 82 |         ret, frame = cap.read()
 83 |         if ret:
 84 |             frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 85 |             cls_idx = int(frames_label[frames_label['frame'] == i]['label'])
 86 | 
 87 |             if annot:
 88 |                 bb = np.array(annot.iloc[i-1, 2:].astype(int))
 89 |             else:
 90 |                 bb = detector.detect(frame)[0, :4].numpy().astype(int)
 91 |             bb[:2] = np.maximum(0, bb[:2] - 5)
 92 |             bb[2:] = np.minimum(frame_size, bb[2:] + 5) if bb[2:].any() != 0 else bb[2:]
 93 | 
 94 |             result = []
 95 |             if bb.any() != 0:
 96 |                 result = pose_estimator.predict(frame, torch.tensor(bb[None, ...]),
 97 |                                                 torch.tensor([[1.0]]))
 98 | 
 99 |             if len(result) > 0:
100 |                 pt_norm = normalize_points_with_size(result[0]['keypoints'].numpy().copy(),
101 |                                                      frame_size[0], frame_size[1])
102 |                 pt_norm = np.concatenate((pt_norm, result[0]['kp_score']), axis=1)
103 | 
104 |                 #idx = result[0]['kp_score'] <= 0.05
105 |                 #pt_norm[idx.squeeze()] = np.nan
106 |                 row = [vid, i, *pt_norm.flatten().tolist(), cls_idx]
107 |                 scr = result[0]['kp_score'].mean()
108 |             else:
109 |                 row = [vid, i, *[np.nan] * (13 * 3), cls_idx]
110 |                 scr = 0.0
111 | 
112 |             df.loc[cur_row] = row
113 |             cur_row += 1
114 | 
115 |             # VISUALIZE.
116 |             frame = vis_frame_fast(frame, result)
117 |             frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2)
118 |             frame = cv2.putText(frame, 'Frame: {}, Pose: {}, Score: {:.4f}'.format(i, cls_idx, scr),
119 |                                 (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
120 |             frame = frame[:, :, ::-1]
121 |             fps_time = time.time()
122 |             i += 1
123 | 
124 |             cv2.imshow('frame', frame)
125 |             if cv2.waitKey(1) & 0xFF == ord('q'):
126 |                 break
127 |         else:
128 |             break
129 | 
130 |     cap.release()
131 |     cv2.destroyAllWindows()
132 | 
133 |     if os.path.exists(save_path):
134 |         df.to_csv(save_path, mode='a', header=False, index=False)
135 |     else:
136 |         df.to_csv(save_path, mode='w', index=False)
137 | 
138 | 


--------------------------------------------------------------------------------
/Data/create_dataset_3.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script to create dataset and labels by clean off some NaN, do a normalization,
  3 | label smoothing and label weights by scores.
  4 | 
  5 | """
  6 | import os
  7 | import pickle
  8 | import numpy as np
  9 | import pandas as pd
 10 | 
 11 | 
 12 | class_names = ['Standing', 'Walking', 'Sitting', 'Lying Down',
 13 |                'Stand up', 'Sit down', 'Fall Down']
 14 | main_parts = ['LShoulder_x', 'LShoulder_y', 'RShoulder_x', 'RShoulder_y', 'LHip_x', 'LHip_y',
 15 |               'RHip_x', 'RHip_y']
 16 | main_idx_parts = [1, 2, 7, 8, -1]  # 1.5
 17 | 
 18 | csv_pose_file = '../Data/Coffee_room_new-pose+score.csv'
 19 | save_path = '../../Data/Coffee_room_new-set(labelXscrw).pkl'
 20 | 
 21 | # Params.
 22 | smooth_labels_step = 8
 23 | n_frames = 30
 24 | skip_frame = 1
 25 | 
 26 | annot = pd.read_csv(csv_pose_file)
 27 | 
 28 | # Remove NaN.
 29 | idx = annot.iloc[:, 2:-1][main_parts].isna().sum(1) > 0
 30 | idx = np.where(idx)[0]
 31 | annot = annot.drop(idx)
 32 | # One-Hot Labels.
 33 | label_onehot = pd.get_dummies(annot['label'])
 34 | annot = annot.drop('label', axis=1).join(label_onehot)
 35 | cols = label_onehot.columns.values
 36 | 
 37 | 
 38 | def scale_pose(xy):
 39 |     """
 40 |     Normalize pose points by scale with max/min value of each pose.
 41 |     xy : (frames, parts, xy) or (parts, xy)
 42 |     """
 43 |     if xy.ndim == 2:
 44 |         xy = np.expand_dims(xy, 0)
 45 |     xy_min = np.nanmin(xy, axis=1)
 46 |     xy_max = np.nanmax(xy, axis=1)
 47 |     for i in range(xy.shape[0]):
 48 |         xy[i] = ((xy[i] - xy_min[i]) / (xy_max[i] - xy_min[i])) * 2 - 1
 49 |     return xy.squeeze()
 50 | 
 51 | 
 52 | def seq_label_smoothing(labels, max_step=10):
 53 |     steps = 0
 54 |     remain_step = 0
 55 |     target_label = 0
 56 |     active_label = 0
 57 |     start_change = 0
 58 |     max_val = np.max(labels)
 59 |     min_val = np.min(labels)
 60 |     for i in range(labels.shape[0]):
 61 |         if remain_step > 0:
 62 |             if i >= start_change:
 63 |                 labels[i][active_label] = max_val * remain_step / steps
 64 |                 labels[i][target_label] = max_val * (steps - remain_step) / steps \
 65 |                     if max_val * (steps - remain_step) / steps else min_val
 66 |                 remain_step -= 1
 67 |             continue
 68 | 
 69 |         diff_index = np.where(np.argmax(labels[i:i+max_step], axis=1) - np.argmax(labels[i]) != 0)[0]
 70 |         if len(diff_index) > 0:
 71 |             start_change = i + remain_step // 2
 72 |             steps = diff_index[0]
 73 |             remain_step = steps
 74 |             target_label = np.argmax(labels[i + remain_step])
 75 |             active_label = np.argmax(labels[i])
 76 |     return labels
 77 | 
 78 | 
 79 | feature_set = np.empty((0, n_frames, 14, 3))
 80 | labels_set = np.empty((0, len(cols)))
 81 | vid_list = annot['video'].unique()
 82 | for vid in vid_list:
 83 |     print(f'Process on: {vid}')
 84 |     data = annot[annot['video'] == vid].reset_index(drop=True).drop(columns='video')
 85 | 
 86 |     # Label Smoothing.
 87 |     esp = 0.1
 88 |     data[cols] = data[cols] * (1 - esp) + (1 - data[cols]) * esp / (len(cols) - 1)
 89 |     data[cols] = seq_label_smoothing(data[cols].values, smooth_labels_step)
 90 | 
 91 |     # Separate continuous frames.
 92 |     frames = data['frame'].values
 93 |     frames_set = []
 94 |     fs = [0]
 95 |     for i in range(1, len(frames)):
 96 |         if frames[i] < frames[i-1] + 10:
 97 |             fs.append(i)
 98 |         else:
 99 |             frames_set.append(fs)
100 |             fs = [i]
101 |     frames_set.append(fs)
102 | 
103 |     for fs in frames_set:
104 |         xys = data.iloc[fs, 1:-len(cols)].values.reshape(-1, 13, 3)
105 |         # Scale pose normalize.
106 |         xys[:, :, :2] = scale_pose(xys[:, :, :2])
107 |         # Add center point.
108 |         xys = np.concatenate((xys, np.expand_dims((xys[:, 1, :] + xys[:, 2, :]) / 2, 1)), axis=1)
109 | 
110 |         # Weighting main parts score.
111 |         scr = xys[:, :, -1].copy()
112 |         scr[:, main_idx_parts] = np.minimum(scr[:, main_idx_parts] * 1.5, 1.0)
113 |         # Mean score.
114 |         scr = scr.mean(1)
115 | 
116 |         # Targets.
117 |         lb = data.iloc[fs, -len(cols):].values
118 |         # Apply points score mean to all labels.
119 |         lb = lb * scr[:, None]
120 | 
121 |         for i in range(xys.shape[0] - n_frames):
122 |             feature_set = np.append(feature_set, xys[i:i+n_frames][None, ...], axis=0)
123 |             labels_set = np.append(labels_set, lb[i:i+n_frames].mean(0)[None, ...], axis=0)
124 | 
125 | 
126 | """with open(save_path, 'wb') as f:
127 |     pickle.dump((feature_set, labels_set), f)"""
128 | 


--------------------------------------------------------------------------------
/Detection/Utils.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import math
  3 | import time
  4 | import tqdm
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | import numpy as np
  9 | from torch.utils.data import DataLoader
 10 | 
 11 | 
 12 | def to_cpu(tensor):
 13 |     return tensor.detach().cpu()
 14 | 
 15 | 
 16 | def load_classes(path):
 17 |     """
 18 |     Loads class labels at 'path'
 19 |     """
 20 |     fp = open(path, "r")
 21 |     names = fp.read().split("\n")[:-1]
 22 |     return names
 23 | 
 24 | 
 25 | def weights_init_normal(m):
 26 |     classname = m.__class__.__name__
 27 |     if classname.find("Conv") != -1:
 28 |         torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
 29 |     elif classname.find("BatchNorm2d") != -1:
 30 |         torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
 31 |         torch.nn.init.constant_(m.bias.data, 0.0)
 32 | 
 33 | 
 34 | def rescale_boxes(boxes, current_dim, original_shape):
 35 |     """ Rescales bounding boxes to the original shape """
 36 |     orig_h, orig_w = original_shape
 37 |     # The amount of padding that was added
 38 |     pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
 39 |     pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
 40 |     # Image height and width after padding is removed
 41 |     unpad_h = current_dim - pad_y
 42 |     unpad_w = current_dim - pad_x
 43 |     # Rescale bounding boxes to dimension of original image
 44 |     boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
 45 |     boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
 46 |     boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
 47 |     boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
 48 |     return boxes
 49 | 
 50 | 
 51 | def xywh2xyxy(x):
 52 |     y = x.new(x.shape)
 53 |     y[..., 0] = x[..., 0] - x[..., 2] / 2
 54 |     y[..., 1] = x[..., 1] - x[..., 3] / 2
 55 |     y[..., 2] = x[..., 0] + x[..., 2] / 2
 56 |     y[..., 3] = x[..., 1] + x[..., 3] / 2
 57 |     return y
 58 | 
 59 | 
 60 | def ap_per_class(tp, conf, pred_cls, target_cls):
 61 |     """ Compute the average precision, given the recall and precision curves.
 62 |     Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
 63 |     # Arguments
 64 |         tp:    True positives (list).
 65 |         conf:  Objectness value from 0-1 (list).
 66 |         pred_cls: Predicted object classes (list).
 67 |         target_cls: True object classes (list).
 68 |     # Returns
 69 |         The average precision as computed in py-faster-rcnn.
 70 |     """
 71 |     # Sort by objectness
 72 |     i = np.argsort(-conf)
 73 |     tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
 74 | 
 75 |     # Find unique classes
 76 |     unique_classes = np.unique(target_cls)
 77 | 
 78 |     # Create Precision-Recall curve and compute AP for each class
 79 |     ap, p, r = [], [], []
 80 |     for c in tqdm.tqdm(unique_classes, desc="Computing AP"):
 81 |         i = pred_cls == c
 82 |         n_gt = (target_cls == c).sum()  # Number of ground truth objects
 83 |         n_p = i.sum()  # Number of predicted objects
 84 | 
 85 |         if n_p == 0 and n_gt == 0:
 86 |             continue
 87 |         elif n_p == 0 or n_gt == 0:
 88 |             ap.append(0)
 89 |             r.append(0)
 90 |             p.append(0)
 91 |         else:
 92 |             # Accumulate FPs and TPs
 93 |             fpc = (1 - tp[i]).cumsum()
 94 |             tpc = (tp[i]).cumsum()
 95 | 
 96 |             # Recall
 97 |             recall_curve = tpc / (n_gt + 1e-16)
 98 |             r.append(recall_curve[-1])
 99 | 
100 |             # Precision
101 |             precision_curve = tpc / (tpc + fpc)
102 |             p.append(precision_curve[-1])
103 | 
104 |             # AP from recall-precision curve
105 |             ap.append(compute_ap(recall_curve, precision_curve))
106 | 
107 |     # Compute F1 score (harmonic mean of precision and recall)
108 |     p, r, ap = np.array(p), np.array(r), np.array(ap)
109 |     f1 = 2 * p * r / (p + r + 1e-16)
110 | 
111 |     return p, r, ap, f1, unique_classes.astype("int32")
112 | 
113 | 
114 | def compute_ap(recall, precision):
115 |     """ Compute the average precision, given the recall and precision curves.
116 |     Code originally from https://github.com/rbgirshick/py-faster-rcnn.
117 |     # Arguments
118 |         recall:    The recall curve (list).
119 |         precision: The precision curve (list).
120 |     # Returns
121 |         The average precision as computed in py-faster-rcnn.
122 |     """
123 |     # correct AP calculation
124 |     # first append sentinel values at the end
125 |     mrec = np.concatenate(([0.0], recall, [1.0]))
126 |     mpre = np.concatenate(([0.0], precision, [0.0]))
127 | 
128 |     # compute the precision envelope
129 |     for i in range(mpre.size - 1, 0, -1):
130 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
131 | 
132 |     # to calculate area under PR curve, look for points
133 |     # where X axis (recall) changes value
134 |     i = np.where(mrec[1:] != mrec[:-1])[0]
135 | 
136 |     # and sum (\Delta recall) * prec
137 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
138 |     return ap
139 | 
140 | 
141 | def get_batch_statistics(outputs, targets, iou_threshold):
142 |     """ Compute true positives, predicted scores and predicted labels per sample """
143 |     batch_metrics = []
144 |     for sample_i in range(len(outputs)):
145 | 
146 |         if outputs[sample_i] is None:
147 |             continue
148 | 
149 |         output = outputs[sample_i]
150 |         pred_boxes = output[:, :4]
151 |         pred_scores = output[:, 4]
152 |         pred_labels = output[:, -1]
153 | 
154 |         true_positives = np.zeros(pred_boxes.shape[0])
155 | 
156 |         annotations = targets[targets[:, 0] == sample_i][:, 1:]
157 |         target_labels = annotations[:, 0] if len(annotations) else []
158 |         if len(annotations):
159 |             detected_boxes = []
160 |             target_boxes = annotations[:, 1:]
161 | 
162 |             for pred_i, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
163 | 
164 |                 # If targets are found break
165 |                 if len(detected_boxes) == len(annotations):
166 |                     break
167 | 
168 |                 # Ignore if label is not one of the target labels
169 |                 if pred_label not in target_labels:
170 |                     continue
171 | 
172 |                 iou, box_index = bbox_iou(pred_box.unsqueeze(0), target_boxes).max(0)
173 |                 if iou >= iou_threshold and box_index not in detected_boxes:
174 |                     true_positives[pred_i] = 1
175 |                     detected_boxes += [box_index]
176 |         batch_metrics.append([true_positives, pred_scores, pred_labels])
177 |     return batch_metrics
178 | 
179 | 
180 | def bbox_wh_iou(wh1, wh2):
181 |     wh2 = wh2.t()
182 |     w1, h1 = wh1[0], wh1[1]
183 |     w2, h2 = wh2[0], wh2[1]
184 |     inter_area = torch.min(w1, w2) * torch.min(h1, h2)
185 |     union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
186 |     return inter_area / union_area
187 | 
188 | 
189 | def bbox_iou(box1, box2, x1y1x2y2=True):
190 |     """
191 |     Returns the IoU of two bounding boxes
192 |     """
193 |     if not x1y1x2y2:
194 |         # Transform from center and width to exact coordinates
195 |         b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
196 |         b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
197 |         b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
198 |         b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
199 |     else:
200 |         # Get the coordinates of bounding boxes
201 |         b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
202 |         b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
203 | 
204 |     # get the corrdinates of the intersection rectangle
205 |     inter_rect_x1 = torch.max(b1_x1, b2_x1)
206 |     inter_rect_y1 = torch.max(b1_y1, b2_y1)
207 |     inter_rect_x2 = torch.min(b1_x2, b2_x2)
208 |     inter_rect_y2 = torch.min(b1_y2, b2_y2)
209 |     # Intersection area
210 |     inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
211 |         inter_rect_y2 - inter_rect_y1 + 1, min=0
212 |     )
213 |     # Union Area
214 |     b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
215 |     b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
216 | 
217 |     iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
218 | 
219 |     return iou
220 | 
221 | 
222 | def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4):
223 |     """
224 |     Removes detections with lower object confidence score than 'conf_thres' and performs
225 |     Non-Maximum Suppression to further filter detections.
226 |     Returns detections with shape:
227 |         (x1, y1, x2, y2, object_conf, class_score, class_pred)
228 |     """
229 |     # From (center x, center y, width, height) to (x1, y1, x2, y2)
230 |     prediction[..., :4] = xywh2xyxy(prediction[..., :4])
231 |     output = [None for _ in range(len(prediction))]
232 |     for image_i, image_pred in enumerate(prediction):
233 |         # Filter out confidence scores below threshold
234 |         image_pred = image_pred[image_pred[:, 4] >= conf_thres]
235 |         # If none are remaining => process next image
236 |         if not image_pred.size(0):
237 |             continue
238 |         # Object confidence times class confidence
239 |         score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
240 |         # Sort by it
241 |         image_pred = image_pred[(-score).argsort()]
242 |         class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
243 |         detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
244 |         # Perform non-maximum suppression
245 |         keep_boxes = []
246 |         while detections.size(0):
247 |             large_overlap = bbox_iou(detections[0, :4].unsqueeze(0), detections[:, :4]) > nms_thres
248 |             label_match = detections[0, -1] == detections[:, -1]
249 |             # Indices of boxes with lower confidence scores, large IOUs and matching labels
250 |             invalid = large_overlap & label_match
251 |             weights = detections[invalid, 4:5]
252 |             # Merge overlapping bboxes by order of confidence
253 |             detections[0, :4] = (weights * detections[invalid, :4]).sum(0) / weights.sum()
254 |             keep_boxes += [detections[0]]
255 |             detections = detections[~invalid]
256 |         if keep_boxes:
257 |             output[image_i] = torch.stack(keep_boxes)
258 | 
259 |     return output
260 | 
261 | 
262 | def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres):
263 |     ByteTensor = torch.cuda.ByteTensor if pred_boxes.is_cuda else torch.ByteTensor
264 |     FloatTensor = torch.cuda.FloatTensor if pred_boxes.is_cuda else torch.FloatTensor
265 | 
266 |     nB = pred_boxes.size(0)
267 |     nA = pred_boxes.size(1)
268 |     nC = pred_cls.size(-1)
269 |     nG = pred_boxes.size(2)
270 | 
271 |     # Output tensors
272 |     obj_mask = ByteTensor(nB, nA, nG, nG).fill_(0)
273 |     noobj_mask = ByteTensor(nB, nA, nG, nG).fill_(1)
274 |     class_mask = FloatTensor(nB, nA, nG, nG).fill_(0)
275 |     iou_scores = FloatTensor(nB, nA, nG, nG).fill_(0)
276 |     tx = FloatTensor(nB, nA, nG, nG).fill_(0)
277 |     ty = FloatTensor(nB, nA, nG, nG).fill_(0)
278 |     tw = FloatTensor(nB, nA, nG, nG).fill_(0)
279 |     th = FloatTensor(nB, nA, nG, nG).fill_(0)
280 |     tcls = FloatTensor(nB, nA, nG, nG, nC).fill_(0)
281 | 
282 |     # Convert to position relative to box
283 |     target_boxes = target[:, 2:6] * nG
284 |     gxy = target_boxes[:, :2]
285 |     gwh = target_boxes[:, 2:]
286 |     # Get anchors with best iou
287 |     ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors])
288 |     best_ious, best_n = ious.max(0)
289 |     # Separate target values
290 |     b, target_labels = target[:, :2].long().t()
291 |     gx, gy = gxy.t()
292 |     gw, gh = gwh.t()
293 |     gi, gj = gxy.long().t()
294 |     # Set masks
295 |     obj_mask[b, best_n, gj, gi] = 1
296 |     noobj_mask[b, best_n, gj, gi] = 0
297 | 
298 |     # Set noobj mask to zero where iou exceeds ignore threshold
299 |     for i, anchor_ious in enumerate(ious.t()):
300 |         noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0
301 | 
302 |     # Coordinates
303 |     tx[b, best_n, gj, gi] = gx - gx.floor()
304 |     ty[b, best_n, gj, gi] = gy - gy.floor()
305 |     # Width and height
306 |     tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
307 |     th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
308 |     # One-hot encoding of label
309 |     tcls[b, best_n, gj, gi, target_labels] = 1
310 |     # Compute label correctness and iou at best anchor
311 |     class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float()
312 |     iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False)
313 | 
314 |     tconf = obj_mask.float()
315 |     return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
316 | 
317 | 
318 | def parse_model_config(path):
319 |     """Parses the yolo-v3 layer configuration file and returns module definitions"""
320 |     file = open(path, 'r')
321 |     lines = file.read().split('\n')
322 |     lines = [x for x in lines if x and not x.startswith('#')]
323 |     lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
324 |     module_defs = []
325 |     for line in lines:
326 |         if line.startswith('['):  # This marks the start of a new block
327 |             module_defs.append({})
328 |             module_defs[-1]['type'] = line[1:-1].rstrip()
329 |             if module_defs[-1]['type'] == 'convolutional':
330 |                 module_defs[-1]['batch_normalize'] = 0
331 |         else:
332 |             key, value = line.split("=")
333 |             value = value.strip()
334 |             module_defs[-1][key.rstrip()] = value.strip()
335 | 
336 |     return module_defs
337 | 
338 | 
339 | def parse_data_config(path):
340 |     """Parses the data configuration file"""
341 |     options = dict()
342 |     options['gpus'] = '0,1,2,3'
343 |     options['num_workers'] = '10'
344 |     with open(path, 'r') as fp:
345 |         lines = fp.readlines()
346 |     for line in lines:
347 |         line = line.strip()
348 |         if line == '' or line.startswith('#'):
349 |             continue
350 |         key, value = line.split('=')
351 |         options[key.strip()] = value.strip()
352 |     return options
353 | 
354 | 
355 | def ResizePadding(height, width):
356 |     desized_size = (height, width)
357 | 
358 |     def resizePadding(image, **kwargs):
359 |         old_size = image.shape[:2]
360 |         max_size_idx = old_size.index(max(old_size))
361 |         ratio = float(desized_size[max_size_idx]) / max(old_size)
362 |         new_size = tuple([int(x * ratio) for x in old_size])
363 | 
364 |         if new_size > desized_size:
365 |             min_size_idx = old_size.index(min(old_size))
366 |             ratio = float(desized_size[min_size_idx]) / min(old_size)
367 |             new_size = tuple([int(x * ratio) for x in old_size])
368 | 
369 |         image = cv2.resize(image, (new_size[1], new_size[0]))
370 |         delta_w = desized_size[1] - new_size[1]
371 |         delta_h = desized_size[0] - new_size[0]
372 |         top, bottom = delta_h // 2, delta_h - (delta_h // 2)
373 |         left, right = delta_w // 2, delta_w - (delta_w // 2)
374 | 
375 |         image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT)
376 |         return image
377 |     return resizePadding
378 | 
379 | 
380 | class AverageValueMeter(object):
381 |     def __init__(self):
382 |         self.reset()
383 |         self.val = 0
384 | 
385 |     def add(self, value, n=1):
386 |         self.val = value
387 |         self.sum += value
388 |         self.var += value * value
389 |         self.n += n
390 | 
391 |         if self.n == 0:
392 |             self.mean, self.std = np.nan, np.nan
393 |         elif self.n == 1:
394 |             self.mean = 0.0 + self.sum  # This is to force a copy in torch/numpy
395 |             self.std = np.inf
396 |             self.mean_old = self.mean
397 |             self.m_s = 0.0
398 |         else:
399 |             self.mean = self.mean_old + (value - n * self.mean_old) / float(self.n)
400 |             self.m_s += (value - self.mean_old) * (value - self.mean)
401 |             self.mean_old = self.mean
402 |             self.std = np.sqrt(self.m_s / (self.n - 1.0))
403 | 
404 |     def value(self):
405 |         return self.mean, self.std
406 | 
407 |     def reset(self):
408 |         self.n = 0
409 |         self.sum = 0.0
410 |         self.var = 0.0
411 |         self.val = 0.0
412 |         self.mean = np.nan
413 |         self.mean_old = 0.0
414 |         self.m_s = 0.0
415 |         self.std = np.nan
416 | 


--------------------------------------------------------------------------------
/DetectorLoader.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import torch
  3 | import numpy as np
  4 | import torchvision.transforms as transforms
  5 | 
  6 | from queue import Queue
  7 | from threading import Thread
  8 | 
  9 | from Detection.Models import Darknet
 10 | from Detection.Utils import non_max_suppression, ResizePadding
 11 | 
 12 | 
 13 | class TinyYOLOv3_onecls(object):
 14 |     """Load trained Tiny-YOLOv3 one class (person) detection model.
 15 |     Args:
 16 |         input_size: (int) Size of input image must be divisible by 32. Default: 416,
 17 |         config_file: (str) Path to Yolo model structure config file.,
 18 |         weight_file: (str) Path to trained weights file.,
 19 |         nms: (float) Non-Maximum Suppression overlap threshold.,
 20 |         conf_thres: (float) Minimum Confidence threshold of predicted bboxs to cut off.,
 21 |         device: (str) Device to load the model on 'cpu' or 'cuda'.
 22 |     """
 23 |     def __init__(self,
 24 |                  input_size=416,
 25 |                  config_file='Models/yolo-tiny-onecls/yolov3-tiny-onecls.cfg',
 26 |                  weight_file='Models/yolo-tiny-onecls/best-model.pth',
 27 |                  nms=0.2,
 28 |                  conf_thres=0.45,
 29 |                  device='cuda'):
 30 |         self.input_size = input_size
 31 |         self.model = Darknet(config_file).to(device)
 32 |         self.model.load_state_dict(torch.load(weight_file))
 33 |         self.model.eval()
 34 |         self.device = device
 35 | 
 36 |         self.nms = nms
 37 |         self.conf_thres = conf_thres
 38 | 
 39 |         self.resize_fn = ResizePadding(input_size, input_size)
 40 |         self.transf_fn = transforms.ToTensor()
 41 | 
 42 |     def detect(self, image, need_resize=True, expand_bb=5):
 43 |         """Feed forward to the model.
 44 |         Args:
 45 |             image: (numpy array) Single RGB image to detect.,
 46 |             need_resize: (bool) Resize to input_size before feed and will return bboxs
 47 |                 with scale to image original size.,
 48 |             expand_bb: (int) Expand boundary of the boxs.
 49 |         Returns:
 50 |             (torch.float32) Of each detected object contain a
 51 |                 [top, left, bottom, right, bbox_score, class_score, class]
 52 |             return `None` if no detected.
 53 |         """
 54 |         image_size = (self.input_size, self.input_size)
 55 |         if need_resize:
 56 |             image_size = image.shape[:2]
 57 |             image = self.resize_fn(image)
 58 | 
 59 |         image = self.transf_fn(image)[None, ...]
 60 |         scf = torch.min(self.input_size / torch.FloatTensor([image_size]), 1)[0]
 61 | 
 62 |         detected = self.model(image.to(self.device))
 63 |         detected = non_max_suppression(detected, self.conf_thres, self.nms)[0]
 64 |         if detected is not None:
 65 |             detected[:, [0, 2]] -= (self.input_size - scf * image_size[1]) / 2
 66 |             detected[:, [1, 3]] -= (self.input_size - scf * image_size[0]) / 2
 67 |             detected[:, 0:4] /= scf
 68 | 
 69 |             detected[:, 0:2] = np.maximum(0, detected[:, 0:2] - expand_bb)
 70 |             detected[:, 2:4] = np.minimum(image_size[::-1], detected[:, 2:4] + expand_bb)
 71 | 
 72 |         return detected
 73 | 
 74 | 
 75 | class ThreadDetection(object):
 76 |     def __init__(self,
 77 |                  dataloader,
 78 |                  model,
 79 |                  queue_size=256):
 80 |         self.model = model
 81 | 
 82 |         self.dataloader = dataloader
 83 |         self.stopped = False
 84 |         self.Q = Queue(maxsize=queue_size)
 85 | 
 86 |     def start(self):
 87 |         t = Thread(target=self.update, args=(), daemon=True).start()
 88 |         return self
 89 | 
 90 |     def update(self):
 91 |         while True:
 92 |             if self.stopped:
 93 |                 return
 94 | 
 95 |             images = self.dataloader.getitem()
 96 | 
 97 |             outputs = self.model.detect(images)
 98 | 
 99 |             if self.Q.full():
100 |                 time.sleep(2)
101 |             self.Q.put((images, outputs))
102 | 
103 |     def getitem(self):
104 |         return self.Q.get()
105 | 
106 |     def stop(self):
107 |         self.stopped = True
108 | 
109 |     def __len__(self):
110 |         return self.Q.qsize()
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/Models/TSSTG/_.txt:
--------------------------------------------------------------------------------
1 | tsstg-model.pth


--------------------------------------------------------------------------------
/Models/sppe/_.txt:
--------------------------------------------------------------------------------
1 | fast_res50_256x192.pth
2 | fast_res101_320x256.pth


--------------------------------------------------------------------------------
/Models/yolo-tiny-onecls/_.txt:
--------------------------------------------------------------------------------
1 | best-model.pth
2 | yolov3-tiny-onecls.cfg


--------------------------------------------------------------------------------
/PoseEstimateLoader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import torch
 4 | 
 5 | from SPPE.src.main_fast_inference import InferenNet_fast, InferenNet_fastRes50
 6 | from SPPE.src.utils.img import crop_dets
 7 | from pPose_nms import pose_nms
 8 | from SPPE.src.utils.eval import getPrediction
 9 | 
10 | 
11 | class SPPE_FastPose(object):
12 |     def __init__(self,
13 |                  backbone,
14 |                  input_height=320,
15 |                  input_width=256,
16 |                  device='cuda'):
17 |         assert backbone in ['resnet50', 'resnet101'], '{} backbone is not support yet!'.format(backbone)
18 | 
19 |         self.inp_h = input_height
20 |         self.inp_w = input_width
21 |         self.device = device
22 | 
23 |         if backbone == 'resnet101':
24 |             self.model = InferenNet_fast().to(device)
25 |         else:
26 |             self.model = InferenNet_fastRes50().to(device)
27 |         self.model.eval()
28 | 
29 |     def predict(self, image, bboxs, bboxs_scores):
30 |         inps, pt1, pt2 = crop_dets(image, bboxs, self.inp_h, self.inp_w)
31 |         pose_hm = self.model(inps.to(self.device)).cpu().data
32 | 
33 |         # Cut eyes and ears.
34 |         pose_hm = torch.cat([pose_hm[:, :1, ...], pose_hm[:, 5:, ...]], dim=1)
35 | 
36 |         xy_hm, xy_img, scores = getPrediction(pose_hm, pt1, pt2, self.inp_h, self.inp_w,
37 |                                               pose_hm.shape[-2], pose_hm.shape[-1])
38 |         result = pose_nms(bboxs, bboxs_scores, xy_img, scores)
39 |         return result


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <h1> Human Falling Detection and Tracking </h1>
 2 | 
 3 | Using Tiny-YOLO oneclass to detect each person in the frame and use 
 4 | [AlphaPose](https://github.com/MVIG-SJTU/AlphaPose) to get skeleton-pose and then use
 5 | [ST-GCN](https://github.com/yysijie/st-gcn) model to predict action from every 30 frames 
 6 | of each person tracks.
 7 | 
 8 | Which now support 7 actions: Standing, Walking, Sitting, Lying Down, Stand up, Sit down, Fall Down.
 9 | 
10 | <div align="center">
11 |     <img src="sample1.gif" width="416">
12 | </div>
13 | 
14 | ## Prerequisites
15 | 
16 | - Python > 3.6
17 | - Pytorch > 1.3.1
18 | 
19 | Original test run on: i7-8750H CPU @ 2.20GHz x12, GeForce RTX 2070 8GB, CUDA 10.2
20 | 
21 | ## Data
22 | 
23 | This project has trained a new Tiny-YOLO oneclass model to detect only person objects and to reducing 
24 | model size. Train with rotation augmented [COCO](http://cocodataset.org/#home) person keypoints dataset 
25 | for more robust person detection in a variant of angle pose.
26 | 
27 | For actions recognition used data from [Le2i](http://le2i.cnrs.fr/Fall-detection-Dataset?lang=fr)
28 | Fall detection Dataset (Coffee room, Home) extract skeleton-pose by AlphaPose and labeled each action 
29 | frames by hand for training ST-GCN model.
30 | 
31 | ## Pre-Trained Models
32 | 
33 | - Tiny-YOLO oneclass - [.pth](https://drive.google.com/file/d/1obEbWBSm9bXeg10FriJ7R2cGLRsg-AfP/view?usp=sharing),
34 | [.cfg](https://drive.google.com/file/d/19sPzBZjAjuJQ3emRteHybm2SG25w9Wn5/view?usp=sharing)
35 | - SPPE FastPose (AlphaPose) - [resnet101](https://drive.google.com/file/d/1N2MgE1Esq6CKYA6FyZVKpPwHRyOCrzA0/view?usp=sharing),
36 | [resnet50](https://drive.google.com/file/d/1IPfCDRwCmQDnQy94nT1V-_NVtTEi4VmU/view?usp=sharing)
37 | - ST-GCN action recognition - [tsstg](https://drive.google.com/file/d/1mQQ4JHe58ylKbBqTjuKzpwN2nwKOWJ9u/view?usp=sharing)
38 | 
39 | ## Basic Use
40 | 
41 | 1. Download all pre-trained models into ./Models folder.
42 | 2. Run main.py
43 | ```
44 |     python main.py ${video file or camera source}
45 | ```
46 | 
47 | ## Reference
48 | 
49 | - AlphaPose : https://github.com/Amanbhandula/AlphaPose
50 | - ST-GCN : https://github.com/yysijie/st-gcn


--------------------------------------------------------------------------------
/SPPE/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Jeff-sjtu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/SPPE/README.md:
--------------------------------------------------------------------------------
1 | # pytorch-AlphaPose from: https://github.com/Amanbhandula/AlphaPose
2 | 


--------------------------------------------------------------------------------
/SPPE/src/main_fast_inference.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.utils.data
 4 | import torch.utils.data.distributed
 5 | import torch.nn.functional as F
 6 | import numpy as np
 7 | from SPPE.src.utils.img import flip, shuffleLR
 8 | from SPPE.src.utils.eval import getPrediction
 9 | from SPPE.src.models.FastPose import FastPose
10 | 
11 | import time
12 | import sys
13 | 
14 | import torch._utils
15 | try:
16 |     torch._utils._rebuild_tensor_v2
17 | except AttributeError:
18 |     def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
19 |         tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
20 |         tensor.requires_grad = requires_grad
21 |         tensor._backward_hooks = backward_hooks
22 |         return tensor
23 |     torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2
24 | 
25 | 
26 | class InferenNet(nn.Module):
27 |     def __init__(self, dataset, weights_file='./Models/sppe/fast_res101_320x256.pth'):
28 |         super().__init__()
29 | 
30 |         self.pyranet = FastPose('resnet101').cuda()
31 |         print('Loading pose model from {}'.format(weights_file))
32 |         sys.stdout.flush()
33 |         self.pyranet.load_state_dict(torch.load(weights_file))
34 |         self.pyranet.eval()
35 |         self.pyranet = model
36 | 
37 |         self.dataset = dataset
38 | 
39 |     def forward(self, x):
40 |         out = self.pyranet(x)
41 |         out = out.narrow(1, 0, 17)
42 | 
43 |         flip_out = self.pyranet(flip(x))
44 |         flip_out = flip_out.narrow(1, 0, 17)
45 | 
46 |         flip_out = flip(shuffleLR(
47 |             flip_out, self.dataset))
48 | 
49 |         out = (flip_out + out) / 2
50 | 
51 |         return out
52 | 
53 | 
54 | class InferenNet_fast(nn.Module):
55 |     def __init__(self, weights_file='./Models/sppe/fast_res101_320x256.pth'):
56 |         super().__init__()
57 | 
58 |         self.pyranet = FastPose('resnet101').cuda()
59 |         print('Loading pose model from {}'.format(weights_file))
60 |         self.pyranet.load_state_dict(torch.load(weights_file))
61 |         self.pyranet.eval()
62 | 
63 |     def forward(self, x):
64 |         out = self.pyranet(x)
65 |         out = out.narrow(1, 0, 17)
66 | 
67 |         return out
68 | 
69 | 
70 | class InferenNet_fastRes50(nn.Module):
71 |     def __init__(self, weights_file='./Models/sppe/fast_res50_256x192.pth'):
72 |         super().__init__()
73 | 
74 |         self.pyranet = FastPose('resnet50', 17).cuda()
75 |         print('Loading pose model from {}'.format(weights_file))
76 |         self.pyranet.load_state_dict(torch.load(weights_file))
77 |         self.pyranet.eval()
78 | 
79 |     def forward(self, x):
80 |         out = self.pyranet(x)
81 | 
82 |         return out
83 | 


--------------------------------------------------------------------------------
/SPPE/src/models/FastPose.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.autograd import Variable
 3 | 
 4 | from .layers.SE_Resnet import SEResnet
 5 | from .layers.DUC import DUC
 6 | from SPPE.src.opt import opt
 7 | 
 8 | 
 9 | class FastPose(nn.Module):
10 |     DIM = 128
11 | 
12 |     def __init__(self, backbone='resnet101', num_join=opt.nClasses):
13 |         super(FastPose, self).__init__()
14 |         assert backbone in ['resnet50', 'resnet101']
15 | 
16 |         self.preact = SEResnet(backbone)
17 | 
18 |         self.suffle1 = nn.PixelShuffle(2)
19 |         self.duc1 = DUC(512, 1024, upscale_factor=2)
20 |         self.duc2 = DUC(256, 512, upscale_factor=2)
21 | 
22 |         self.conv_out = nn.Conv2d(
23 |             self.DIM, num_join, kernel_size=3, stride=1, padding=1)
24 | 
25 |     def forward(self, x: Variable):
26 |         out = self.preact(x)
27 |         out = self.suffle1(out)
28 |         out = self.duc1(out)
29 |         out = self.duc2(out)
30 | 
31 |         out = self.conv_out(out)
32 |         return out
33 | 


--------------------------------------------------------------------------------
/SPPE/src/models/__init__.py:
--------------------------------------------------------------------------------
1 | from . import *


--------------------------------------------------------------------------------
/SPPE/src/models/hg-prm.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from .layers.PRM import Residual as ResidualPyramid
  3 | from .layers.Residual import Residual as Residual
  4 | from torch.autograd import Variable
  5 | from SPPE.src.opt import opt
  6 | from collections import defaultdict
  7 | 
  8 | 
  9 | class Hourglass(nn.Module):
 10 |     def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C):
 11 |         super(Hourglass, self).__init__()
 12 | 
 13 |         self.ResidualUp = ResidualPyramid if n >= 2 else Residual
 14 |         self.ResidualDown = ResidualPyramid if n >= 3 else Residual
 15 |         
 16 |         self.depth = n
 17 |         self.nModules = nModules
 18 |         self.nFeats = nFeats
 19 |         self.net_type = net_type
 20 |         self.B = B
 21 |         self.C = C
 22 |         self.inputResH = inputResH
 23 |         self.inputResW = inputResW
 24 | 
 25 |         self.up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW)
 26 |         self.low1 = nn.Sequential(
 27 |             nn.MaxPool2d(2),
 28 |             self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
 29 |         )
 30 |         if n > 1:
 31 |             self.low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C)
 32 |         else:
 33 |             self.low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
 34 |         
 35 |         self.low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2)
 36 |         self.up2 = nn.UpsamplingNearest2d(scale_factor=2)
 37 | 
 38 |         self.upperBranch = self.up1
 39 |         self.lowerBranch = nn.Sequential(
 40 |             self.low1,
 41 |             self.low2,
 42 |             self.low3,
 43 |             self.up2
 44 |         )
 45 | 
 46 |     def _make_residual(self, resBlock, useConv, inputResH, inputResW):
 47 |         layer_list = []
 48 |         for i in range(self.nModules):
 49 |             layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW,
 50 |                                        stride=1, net_type=self.net_type, useConv=useConv,
 51 |                                        baseWidth=self.B, cardinality=self.C))
 52 |         return nn.Sequential(*layer_list)
 53 | 
 54 |     def forward(self, x: Variable):
 55 |         up1 = self.upperBranch(x)
 56 |         up2 = self.lowerBranch(x)
 57 |         out = up1 + up2
 58 |         return out
 59 | 
 60 | 
 61 | class PyraNet(nn.Module):
 62 |     def __init__(self):
 63 |         super(PyraNet, self).__init__()
 64 | 
 65 |         B, C = opt.baseWidth, opt.cardinality
 66 |         self.inputResH = opt.inputResH / 4
 67 |         self.inputResW = opt.inputResW / 4
 68 |         self.nStack = opt.nStack
 69 | 
 70 |         self.cnv1 = nn.Sequential(
 71 |             nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
 72 |             nn.BatchNorm2d(64),
 73 |             nn.ReLU(True)
 74 |         )
 75 |         self.r1 = nn.Sequential(
 76 |             ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
 77 |                             stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
 78 |             nn.MaxPool2d(2)
 79 |         )
 80 |         self.r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
 81 |                                   stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
 82 |         self.r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
 83 |                                   stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
 84 |         self.preact = nn.Sequential(
 85 |             self.cnv1,
 86 |             self.r1,
 87 |             self.r4,
 88 |             self.r5
 89 |         )
 90 |         self.stack_layers = defaultdict(list)
 91 |         for i in range(self.nStack):
 92 |             hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C)
 93 |             lin = nn.Sequential(
 94 |                 hg,
 95 |                 nn.BatchNorm2d(opt.nFeats),
 96 |                 nn.ReLU(True),
 97 |                 nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0),
 98 |                 nn.BatchNorm2d(opt.nFeats),
 99 |                 nn.ReLU(True)
100 |             )
101 |             tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0)
102 |             self.stack_layers['lin'].append(lin)
103 |             self.stack_layers['out'].append(tmpOut)
104 |             if i < self.nStack - 1:
105 |                 lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
106 |                 tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0)
107 |                 self.stack_layers['lin_'].append(lin_)
108 |                 self.stack_layers['out_'].append(tmpOut_)
109 | 
110 |     def forward(self, x: Variable):
111 |         out = []
112 |         inter = self.preact(x)
113 |         for i in range(self.nStack):
114 |             lin = self.stack_layers['lin'][i](inter)
115 |             tmpOut = self.stack_layers['out'][i](lin)
116 |             out.append(tmpOut)
117 |             if i < self.nStack - 1:
118 |                 lin_ = self.stack_layers['lin_'][i](lin)
119 |                 tmpOut_ = self.stack_layers['out_'][i](tmpOut)
120 |                 inter = inter + lin_ + tmpOut_
121 |         return out
122 | 
123 | 
124 | def createModel(**kw):
125 |     model = PyraNet()
126 |     return model
127 | 


--------------------------------------------------------------------------------
/SPPE/src/models/hgPRM.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from .layers.PRM import Residual as ResidualPyramid
  3 | from .layers.Residual import Residual as Residual
  4 | from torch.autograd import Variable
  5 | import torch
  6 | from SPPE.src.opt import opt
  7 | import math
  8 | 
  9 | 
 10 | class Hourglass(nn.Module):
 11 |     def __init__(self, n, nFeats, nModules, inputResH, inputResW, net_type, B, C):
 12 |         super(Hourglass, self).__init__()
 13 | 
 14 |         self.ResidualUp = ResidualPyramid if n >= 2 else Residual
 15 |         self.ResidualDown = ResidualPyramid if n >= 3 else Residual
 16 | 
 17 |         self.depth = n
 18 |         self.nModules = nModules
 19 |         self.nFeats = nFeats
 20 |         self.net_type = net_type
 21 |         self.B = B
 22 |         self.C = C
 23 |         self.inputResH = inputResH
 24 |         self.inputResW = inputResW
 25 | 
 26 |         up1 = self._make_residual(self.ResidualUp, False, inputResH, inputResW)
 27 |         low1 = nn.Sequential(
 28 |             nn.MaxPool2d(2),
 29 |             self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
 30 |         )
 31 |         if n > 1:
 32 |             low2 = Hourglass(n - 1, nFeats, nModules, inputResH / 2, inputResW / 2, net_type, B, C)
 33 |         else:
 34 |             low2 = self._make_residual(self.ResidualDown, False, inputResH / 2, inputResW / 2)
 35 | 
 36 |         low3 = self._make_residual(self.ResidualDown, True, inputResH / 2, inputResW / 2)
 37 |         up2 = nn.UpsamplingNearest2d(scale_factor=2)
 38 | 
 39 |         self.upperBranch = up1
 40 |         self.lowerBranch = nn.Sequential(
 41 |             low1,
 42 |             low2,
 43 |             low3,
 44 |             up2
 45 |         )
 46 | 
 47 |     def _make_residual(self, resBlock, useConv, inputResH, inputResW):
 48 |         layer_list = []
 49 |         for i in range(self.nModules):
 50 |             layer_list.append(resBlock(self.nFeats, self.nFeats, inputResH, inputResW,
 51 |                                        stride=1, net_type=self.net_type, useConv=useConv,
 52 |                                        baseWidth=self.B, cardinality=self.C))
 53 |         return nn.Sequential(*layer_list)
 54 | 
 55 |     def forward(self, x: Variable):
 56 |         up1 = self.upperBranch(x)
 57 |         up2 = self.lowerBranch(x)
 58 |         # out = up1 + up2
 59 |         out = torch.add(up1, up2)
 60 |         return out
 61 | 
 62 | 
 63 | class PyraNet(nn.Module):
 64 |     def __init__(self):
 65 |         super(PyraNet, self).__init__()
 66 | 
 67 |         B, C = opt.baseWidth, opt.cardinality
 68 |         self.inputResH = opt.inputResH / 4
 69 |         self.inputResW = opt.inputResW / 4
 70 |         self.nStack = opt.nStack
 71 | 
 72 |         conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
 73 |         if opt.init:
 74 |             nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 3))
 75 | 
 76 |         cnv1 = nn.Sequential(
 77 |             conv1,
 78 |             nn.BatchNorm2d(64),
 79 |             nn.ReLU(True)
 80 |         )
 81 | 
 82 |         r1 = nn.Sequential(
 83 |             ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
 84 |                             stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
 85 |             nn.MaxPool2d(2)
 86 |         )
 87 |         r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
 88 |                              stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
 89 |         r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
 90 |                              stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
 91 |         self.preact = nn.Sequential(
 92 |             cnv1,
 93 |             r1,
 94 |             r4,
 95 |             r5
 96 |         )
 97 | 
 98 |         self.stack_lin = nn.ModuleList()
 99 |         self.stack_out = nn.ModuleList()
100 |         self.stack_lin_ = nn.ModuleList()
101 |         self.stack_out_ = nn.ModuleList()
102 | 
103 |         for i in range(self.nStack):
104 |             hg = Hourglass(4, opt.nFeats, opt.nResidual, self.inputResH, self.inputResW, 'preact', B, C)
105 |             conv1 = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
106 |             if opt.init:
107 |                 nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
108 |             lin = nn.Sequential(
109 |                 hg,
110 |                 nn.BatchNorm2d(opt.nFeats),
111 |                 nn.ReLU(True),
112 |                 conv1,
113 |                 nn.BatchNorm2d(opt.nFeats),
114 |                 nn.ReLU(True)
115 |             )
116 |             tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses, kernel_size=1, stride=1, padding=0)
117 |             if opt.init:
118 |                 nn.init.xavier_normal(tmpOut.weight)
119 |             self.stack_lin.append(lin)
120 |             self.stack_out.append(tmpOut)
121 |             if i < self.nStack - 1:
122 |                 lin_ = nn.Conv2d(opt.nFeats, opt.nFeats, kernel_size=1, stride=1, padding=0)
123 |                 tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats, kernel_size=1, stride=1, padding=0)
124 |                 if opt.init:
125 |                     nn.init.xavier_normal(lin_.weight)
126 |                     nn.init.xavier_normal(tmpOut_.weight)
127 |                 self.stack_lin_.append(lin_)
128 |                 self.stack_out_.append(tmpOut_)
129 | 
130 |     def forward(self, x: Variable):
131 |         out = []
132 |         inter = self.preact(x)
133 |         for i in range(self.nStack):
134 |             lin = self.stack_lin[i](inter)
135 |             tmpOut = self.stack_out[i](lin)
136 |             out.append(tmpOut)
137 |             if i < self.nStack - 1:
138 |                 lin_ = self.stack_lin_[i](lin)
139 |                 tmpOut_ = self.stack_out_[i](tmpOut)
140 |                 inter = inter + lin_ + tmpOut_
141 |         return out
142 | 
143 | 
144 | class PyraNet_Inference(nn.Module):
145 |     def __init__(self):
146 |         super(PyraNet_Inference, self).__init__()
147 | 
148 |         B, C = opt.baseWidth, opt.cardinality
149 |         self.inputResH = opt.inputResH / 4
150 |         self.inputResW = opt.inputResW / 4
151 |         self.nStack = opt.nStack
152 | 
153 |         conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
154 |         if opt.init:
155 |             nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 3))
156 | 
157 |         cnv1 = nn.Sequential(
158 |             conv1,
159 |             nn.BatchNorm2d(64),
160 |             nn.ReLU(True)
161 |         )
162 | 
163 |         r1 = nn.Sequential(
164 |             ResidualPyramid(64, 128, opt.inputResH / 2, opt.inputResW / 2,
165 |                             stride=1, net_type='no_preact', useConv=False, baseWidth=B, cardinality=C),
166 |             nn.MaxPool2d(2)
167 |         )
168 |         r4 = ResidualPyramid(128, 128, self.inputResH, self.inputResW,
169 |                              stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
170 |         r5 = ResidualPyramid(128, opt.nFeats, self.inputResH, self.inputResW,
171 |                              stride=1, net_type='preact', useConv=False, baseWidth=B, cardinality=C)
172 |         self.preact = nn.Sequential(
173 |             cnv1,
174 |             r1,
175 |             r4,
176 |             r5
177 |         )
178 | 
179 |         self.stack_lin = nn.ModuleList()
180 |         self.stack_out = nn.ModuleList()
181 |         self.stack_lin_ = nn.ModuleList()
182 |         self.stack_out_ = nn.ModuleList()
183 | 
184 |         for i in range(self.nStack):
185 |             hg = Hourglass(4, opt.nFeats, opt.nResidual,
186 |                            self.inputResH, self.inputResW, 'preact', B, C)
187 |             conv1 = nn.Conv2d(opt.nFeats, opt.nFeats,
188 |                               kernel_size=1, stride=1, padding=0)
189 |             if opt.init:
190 |                 nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
191 |             lin = nn.Sequential(
192 |                 hg,
193 |                 nn.BatchNorm2d(opt.nFeats),
194 |                 nn.ReLU(True),
195 |                 conv1,
196 |                 nn.BatchNorm2d(opt.nFeats),
197 |                 nn.ReLU(True)
198 |             )
199 |             tmpOut = nn.Conv2d(opt.nFeats, opt.nClasses,
200 |                                kernel_size=1, stride=1, padding=0)
201 |             if opt.init:
202 |                 nn.init.xavier_normal(tmpOut.weight)
203 |             self.stack_lin.append(lin)
204 |             self.stack_out.append(tmpOut)
205 |             if i < self.nStack - 1:
206 |                 lin_ = nn.Conv2d(opt.nFeats, opt.nFeats,
207 |                                  kernel_size=1, stride=1, padding=0)
208 |                 tmpOut_ = nn.Conv2d(opt.nClasses, opt.nFeats,
209 |                                     kernel_size=1, stride=1, padding=0)
210 |                 if opt.init:
211 |                     nn.init.xavier_normal(lin_.weight)
212 |                     nn.init.xavier_normal(tmpOut_.weight)
213 |                 self.stack_lin_.append(lin_)
214 |                 self.stack_out_.append(tmpOut_)
215 | 
216 |     def forward(self, x: Variable):
217 |         inter = self.preact(x)
218 |         for i in range(self.nStack):
219 |             lin = self.stack_lin[i](inter)
220 |             tmpOut = self.stack_out[i](lin)
221 |             out = tmpOut
222 |             if i < self.nStack - 1:
223 |                 lin_ = self.stack_lin_[i](lin)
224 |                 tmpOut_ = self.stack_out_[i](tmpOut)
225 |                 inter = inter + lin_ + tmpOut_
226 |         return out
227 | 
228 | 
229 | def createModel(**kw):
230 |     model = PyraNet()
231 |     return model
232 | 
233 | 
234 | def createModel_Inference(**kw):
235 |     model = PyraNet_Inference()
236 |     return model
237 | 


--------------------------------------------------------------------------------
/SPPE/src/models/layers/DUC.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | class DUC(nn.Module):
 6 |     """
 7 |     INPUT: inplanes, planes, upscale_factor
 8 |     OUTPUT: (planes // 4)* ht * wd
 9 |     """
10 |     def __init__(self, inplanes, planes, upscale_factor=2):
11 |         super(DUC, self).__init__()
12 |         self.conv = nn.Conv2d(inplanes, planes, kernel_size=3, padding=1, bias=False)
13 |         self.bn = nn.BatchNorm2d(planes)
14 |         self.relu = nn.ReLU()
15 | 
16 |         self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
17 | 
18 |     def forward(self, x):
19 |         x = self.conv(x)
20 |         x = self.bn(x)
21 |         x = self.relu(x)
22 |         x = self.pixel_shuffle(x)
23 |         return x
24 | 


--------------------------------------------------------------------------------
/SPPE/src/models/layers/PRM.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from .util_models import ConcatTable, CaddTable, Identity
  3 | import math
  4 | from opt import opt
  5 | 
  6 | 
  7 | class Residual(nn.Module):
  8 |     def __init__(self, numIn, numOut, inputResH, inputResW, stride=1,
  9 |                  net_type='preact', useConv=False, baseWidth=9, cardinality=4):
 10 |         super(Residual, self).__init__()
 11 | 
 12 |         self.con = ConcatTable([convBlock(numIn, numOut, inputResH,
 13 |                                           inputResW, net_type, baseWidth, cardinality, stride),
 14 |                                 skipLayer(numIn, numOut, stride, useConv)])
 15 |         self.cadd = CaddTable(True)
 16 | 
 17 |     def forward(self, x):
 18 |         out = self.con(x)
 19 |         out = self.cadd(out)
 20 |         return out
 21 | 
 22 | 
 23 | def convBlock(numIn, numOut, inputResH, inputResW, net_type, baseWidth, cardinality, stride):
 24 |     numIn = int(numIn)
 25 |     numOut = int(numOut)
 26 | 
 27 |     addTable = ConcatTable()
 28 |     s_list = []
 29 |     if net_type != 'no_preact':
 30 |         s_list.append(nn.BatchNorm2d(numIn))
 31 |         s_list.append(nn.ReLU(True))
 32 | 
 33 |     conv1 = nn.Conv2d(numIn, numOut // 2, kernel_size=1)
 34 |     if opt.init:
 35 |         nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
 36 |     s_list.append(conv1)
 37 | 
 38 |     s_list.append(nn.BatchNorm2d(numOut // 2))
 39 |     s_list.append(nn.ReLU(True))
 40 | 
 41 |     conv2 = nn.Conv2d(numOut // 2, numOut // 2,
 42 |                       kernel_size=3, stride=stride, padding=1)
 43 |     if opt.init:
 44 |         nn.init.xavier_normal(conv2.weight)
 45 |     s_list.append(conv2)
 46 | 
 47 |     s = nn.Sequential(*s_list)
 48 |     addTable.add(s)
 49 | 
 50 |     D = math.floor(numOut // baseWidth)
 51 |     C = cardinality
 52 |     s_list = []
 53 | 
 54 |     if net_type != 'no_preact':
 55 |         s_list.append(nn.BatchNorm2d(numIn))
 56 |         s_list.append(nn.ReLU(True))
 57 | 
 58 |     conv1 = nn.Conv2d(numIn, D, kernel_size=1, stride=stride)
 59 |     if opt.init:
 60 |         nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / C))
 61 | 
 62 |     s_list.append(conv1)
 63 |     s_list.append(nn.BatchNorm2d(D))
 64 |     s_list.append(nn.ReLU(True))
 65 |     s_list.append(pyramid(D, C, inputResH, inputResW))
 66 |     s_list.append(nn.BatchNorm2d(D))
 67 |     s_list.append(nn.ReLU(True))
 68 | 
 69 |     a = nn.Conv2d(D, numOut // 2, kernel_size=1)
 70 |     a.nBranchIn = C
 71 |     if opt.init:
 72 |         nn.init.xavier_normal(a.weight, gain=math.sqrt(1 / C))
 73 |     s_list.append(a)
 74 | 
 75 |     s = nn.Sequential(*s_list)
 76 |     addTable.add(s)
 77 | 
 78 |     elewiswAdd = nn.Sequential(
 79 |         addTable,
 80 |         CaddTable(False)
 81 |     )
 82 |     conv2 = nn.Conv2d(numOut // 2, numOut, kernel_size=1)
 83 |     if opt.init:
 84 |         nn.init.xavier_normal(conv2.weight, gain=math.sqrt(1 / 2))
 85 |     model = nn.Sequential(
 86 |         elewiswAdd,
 87 |         nn.BatchNorm2d(numOut // 2),
 88 |         nn.ReLU(True),
 89 |         conv2
 90 |     )
 91 |     return model
 92 | 
 93 | 
 94 | def pyramid(D, C, inputResH, inputResW):
 95 |     pyraTable = ConcatTable()
 96 |     sc = math.pow(2, 1 / C)
 97 |     for i in range(C):
 98 |         scaled = 1 / math.pow(sc, i + 1)
 99 |         conv1 = nn.Conv2d(D, D, kernel_size=3, stride=1, padding=1)
100 |         if opt.init:
101 |             nn.init.xavier_normal(conv1.weight)
102 |         s = nn.Sequential(
103 |             nn.FractionalMaxPool2d(2, output_ratio=(scaled, scaled)),
104 |             conv1,
105 |             nn.UpsamplingBilinear2d(size=(int(inputResH), int(inputResW))))
106 |         pyraTable.add(s)
107 |     pyra = nn.Sequential(
108 |         pyraTable,
109 |         CaddTable(False)
110 |     )
111 |     return pyra
112 | 
113 | 
114 | class skipLayer(nn.Module):
115 |     def __init__(self, numIn, numOut, stride, useConv):
116 |         super(skipLayer, self).__init__()
117 |         self.identity = False
118 | 
119 |         if numIn == numOut and stride == 1 and not useConv:
120 |             self.identity = True
121 |         else:
122 |             conv1 = nn.Conv2d(numIn, numOut, kernel_size=1, stride=stride)
123 |             if opt.init:
124 |                 nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
125 |             self.m = nn.Sequential(
126 |                 nn.BatchNorm2d(numIn),
127 |                 nn.ReLU(True),
128 |                 conv1
129 |             )
130 | 
131 |     def forward(self, x):
132 |         if self.identity:
133 |             return x
134 |         else:
135 |             return self.m(x)
136 | 


--------------------------------------------------------------------------------
/SPPE/src/models/layers/Residual.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import math
 3 | from .util_models import ConcatTable, CaddTable, Identity
 4 | from opt import opt
 5 | 
 6 | 
 7 | def Residual(numIn, numOut, *arg, stride=1, net_type='preact', useConv=False, **kw):
 8 |     con = ConcatTable([convBlock(numIn, numOut, stride, net_type),
 9 |                        skipLayer(numIn, numOut, stride, useConv)])
10 |     cadd = CaddTable(True)
11 |     return nn.Sequential(con, cadd)
12 | 
13 | 
14 | def convBlock(numIn, numOut, stride, net_type):
15 |     s_list = []
16 |     if net_type != 'no_preact':
17 |         s_list.append(nn.BatchNorm2d(numIn))
18 |         s_list.append(nn.ReLU(True))
19 | 
20 |     conv1 = nn.Conv2d(numIn, numOut // 2, kernel_size=1)
21 |     if opt.init:
22 |         nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
23 |     s_list.append(conv1)
24 | 
25 |     s_list.append(nn.BatchNorm2d(numOut // 2))
26 |     s_list.append(nn.ReLU(True))
27 | 
28 |     conv2 = nn.Conv2d(numOut // 2, numOut // 2, kernel_size=3, stride=stride, padding=1)
29 |     if opt.init:
30 |         nn.init.xavier_normal(conv2.weight)
31 |     s_list.append(conv2)
32 |     s_list.append(nn.BatchNorm2d(numOut // 2))
33 |     s_list.append(nn.ReLU(True))
34 | 
35 |     conv3 = nn.Conv2d(numOut // 2, numOut, kernel_size=1)
36 |     if opt.init:
37 |         nn.init.xavier_normal(conv3.weight)
38 |     s_list.append(conv3)
39 | 
40 |     return nn.Sequential(*s_list)
41 | 
42 | 
43 | def skipLayer(numIn, numOut, stride, useConv):
44 |     if numIn == numOut and stride == 1 and not useConv:
45 |         return Identity()
46 |     else:
47 |         conv1 = nn.Conv2d(numIn, numOut, kernel_size=1, stride=stride)
48 |         if opt.init:
49 |             nn.init.xavier_normal(conv1.weight, gain=math.sqrt(1 / 2))
50 |         return nn.Sequential(
51 |             nn.BatchNorm2d(numIn),
52 |             nn.ReLU(True),
53 |             conv1
54 |         )
55 | 


--------------------------------------------------------------------------------
/SPPE/src/models/layers/Resnet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | class Bottleneck(nn.Module):
 6 |     expansion = 4
 7 | 
 8 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 9 |         super(Bottleneck, self).__init__()
10 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)
11 |         self.bn1 = nn.BatchNorm2d(planes)
12 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
13 |         self.bn2 = nn.BatchNorm2d(planes)
14 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, stride=1, bias=False)
15 |         self.bn3 = nn.BatchNorm2d(planes * 4)
16 |         self.downsample = downsample
17 |         self.stride = stride
18 | 
19 |     def forward(self, x):
20 |         residual = x
21 | 
22 |         out = F.relu(self.bn1(self.conv1(x)), inplace=True)
23 |         out = F.relu(self.bn2(self.conv2(out)), inplace=True)
24 |         out = self.bn3(self.conv3(out))
25 | 
26 |         if self.downsample is not None:
27 |             residual = self.downsample(x)
28 | 
29 |         out += residual
30 |         out = F.relu(out, inplace=True)
31 | 
32 |         return out
33 | 
34 | 
35 | class ResNet(nn.Module):
36 |     """ Resnet """
37 |     def __init__(self, architecture):
38 |         super(ResNet, self).__init__()
39 |         assert architecture in ["resnet50", "resnet101"]
40 |         self.inplanes = 64
41 |         self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3]
42 |         self.block = Bottleneck
43 | 
44 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
45 |         self.bn1 = nn.BatchNorm2d(64, eps=1e-5, momentum=0.01, affine=True)
46 |         self.relu = nn.ReLU(inplace=True)
47 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
48 | 
49 |         self.layer1 = self.make_layer(self.block, 64, self.layers[0])
50 |         self.layer2 = self.make_layer(self.block, 128, self.layers[1], stride=2)
51 |         self.layer3 = self.make_layer(self.block, 256, self.layers[2], stride=2)
52 | 
53 |         self.layer4 = self.make_layer(
54 |             self.block, 512, self.layers[3], stride=2)
55 | 
56 |     def forward(self, x):
57 |         x = self.maxpool(self.relu(self.bn1(self.conv1(x))))
58 |         x = self.layer1(x)
59 |         x = self.layer2(x)
60 |         x = self.layer3(x)
61 |         x = self.layer4(x)
62 |         return x
63 | 
64 |     def stages(self):
65 |         return [self.layer1, self.layer2, self.layer3, self.layer4]
66 | 
67 |     def make_layer(self, block, planes, blocks, stride=1):
68 |         downsample = None
69 |         if stride != 1 or self.inplanes != planes * block.expansion:
70 |             downsample = nn.Sequential(
71 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
72 |                           kernel_size=1, stride=stride, bias=False),
73 |                 nn.BatchNorm2d(planes * block.expansion),
74 |             )
75 | 
76 |         layers = []
77 |         layers.append(block(self.inplanes, planes, stride, downsample))
78 |         self.inplanes = planes * block.expansion
79 |         for i in range(1, blocks):
80 |             layers.append(block(self.inplanes, planes))
81 | 
82 |         return nn.Sequential(*layers)
83 | 


--------------------------------------------------------------------------------
/SPPE/src/models/layers/SE_Resnet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from .SE_module import SELayer
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class Bottleneck(nn.Module):
  7 |     expansion = 4
  8 | 
  9 |     def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=False):
 10 |         super(Bottleneck, self).__init__()
 11 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 12 |         self.bn1 = nn.BatchNorm2d(planes)
 13 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 14 |                                padding=1, bias=False)
 15 |         self.bn2 = nn.BatchNorm2d(planes)
 16 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 17 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 18 |         if reduction:
 19 |             self.se = SELayer(planes * 4)
 20 | 
 21 |         self.reduc = reduction
 22 |         self.downsample = downsample
 23 |         self.stride = stride
 24 | 
 25 |     def forward(self, x):
 26 |         residual = x
 27 | 
 28 |         out = F.relu(self.bn1(self.conv1(x)), inplace=True)
 29 |         out = F.relu(self.bn2(self.conv2(out)), inplace=True)
 30 | 
 31 |         out = self.conv3(out)
 32 |         out = self.bn3(out)
 33 |         if self.reduc:
 34 |             out = self.se(out)
 35 | 
 36 |         if self.downsample is not None:
 37 |             residual = self.downsample(x)
 38 | 
 39 |         out += residual
 40 |         out = F.relu(out)
 41 | 
 42 |         return out
 43 | 
 44 | 
 45 | class SEResnet(nn.Module):
 46 |     """ SEResnet """
 47 | 
 48 |     def __init__(self, architecture):
 49 |         super(SEResnet, self).__init__()
 50 |         assert architecture in ["resnet50", "resnet101"]
 51 |         self.inplanes = 64
 52 |         self.layers = [3, 4, {"resnet50": 6, "resnet101": 23}[architecture], 3]
 53 |         self.block = Bottleneck
 54 | 
 55 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7,
 56 |                                stride=2, padding=3, bias=False)
 57 |         self.bn1 = nn.BatchNorm2d(64, eps=1e-5, momentum=0.01, affine=True)
 58 |         self.relu = nn.ReLU(inplace=True)
 59 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 60 | 
 61 |         self.layer1 = self.make_layer(self.block, 64, self.layers[0])
 62 |         self.layer2 = self.make_layer(
 63 |             self.block, 128, self.layers[1], stride=2)
 64 |         self.layer3 = self.make_layer(
 65 |             self.block, 256, self.layers[2], stride=2)
 66 | 
 67 |         self.layer4 = self.make_layer(
 68 |             self.block, 512, self.layers[3], stride=2)
 69 | 
 70 |     def forward(self, x):
 71 |         x = self.maxpool(self.relu(self.bn1(self.conv1(x))))  # 64 * h/4 * w/4
 72 |         x = self.layer1(x)  # 256 * h/4 * w/4
 73 |         x = self.layer2(x)  # 512 * h/8 * w/8
 74 |         x = self.layer3(x)  # 1024 * h/16 * w/16
 75 |         x = self.layer4(x)  # 2048 * h/32 * w/32
 76 |         return x
 77 | 
 78 |     def stages(self):
 79 |         return [self.layer1, self.layer2, self.layer3, self.layer4]
 80 | 
 81 |     def make_layer(self, block, planes, blocks, stride=1):
 82 |         downsample = None
 83 |         if stride != 1 or self.inplanes != planes * block.expansion:
 84 |             downsample = nn.Sequential(
 85 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
 86 |                           kernel_size=1, stride=stride, bias=False),
 87 |                 nn.BatchNorm2d(planes * block.expansion),
 88 |             )
 89 | 
 90 |         layers = []
 91 |         if downsample is not None:
 92 |             layers.append(block(self.inplanes, planes, stride, downsample, reduction=True))
 93 |         else:
 94 |             layers.append(block(self.inplanes, planes, stride, downsample))
 95 |         self.inplanes = planes * block.expansion
 96 |         for i in range(1, blocks):
 97 |             layers.append(block(self.inplanes, planes))
 98 | 
 99 |         return nn.Sequential(*layers)
100 | 


--------------------------------------------------------------------------------
/SPPE/src/models/layers/SE_module.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | 
 4 | class SELayer(nn.Module):
 5 |     def __init__(self, channel, reduction=1):
 6 |         super(SELayer, self).__init__()
 7 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 8 |         self.fc = nn.Sequential(
 9 |             nn.Linear(channel, channel // reduction),
10 |             nn.ReLU(inplace=True),
11 |             nn.Linear(channel // reduction, channel),
12 |             nn.Sigmoid()
13 |         )
14 | 
15 |     def forward(self, x):
16 |         b, c, _, _ = x.size()
17 |         y = self.avg_pool(x).view(b, c)
18 |         y = self.fc(y).view(b, c, 1, 1)
19 |         return x * y
20 | 


--------------------------------------------------------------------------------
/SPPE/src/models/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from . import *
2 | 


--------------------------------------------------------------------------------
/SPPE/src/models/layers/util_models.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | 
 5 | 
 6 | class ConcatTable(nn.Module):
 7 |     def __init__(self, module_list=None):
 8 |         super(ConcatTable, self).__init__()
 9 | 
10 |         self.modules_list = nn.ModuleList(module_list)
11 | 
12 |     def forward(self, x: Variable):
13 |         y = []
14 |         for i in range(len(self.modules_list)):
15 |             y.append(self.modules_list[i](x))
16 |         return y
17 | 
18 |     def add(self, module):
19 |         self.modules_list.append(module)
20 | 
21 | 
22 | class CaddTable(nn.Module):
23 |     def __init__(self, inplace=False):
24 |         super(CaddTable, self).__init__()
25 |         self.inplace = inplace
26 | 
27 |     def forward(self, x: Variable or list):
28 |         return torch.stack(x, 0).sum(0)
29 | 
30 | 
31 | class Identity(nn.Module):
32 |     def __init__(self, params=None):
33 |         super(Identity, self).__init__()
34 |         self.params = nn.ParameterList(params)
35 | 
36 |     def forward(self, x: Variable or list):
37 |         return x
38 | 


--------------------------------------------------------------------------------
/SPPE/src/opt.py:
--------------------------------------------------------------------------------
  1 | """import argparse
  2 | import torch
  3 | 
  4 | parser = argparse.ArgumentParser(description='PyTorch AlphaPose Training')
  5 | parser.add_argument("--return_counts", type=bool, default=True)
  6 | parser.add_argument("--mode", default='client')
  7 | parser.add_argument("--port", default=52162)
  8 | 
  9 | "----------------------------- General options -----------------------------"
 10 | parser.add_argument('--expID', default='default', type=str,
 11 |                     help='Experiment ID')
 12 | parser.add_argument('--dataset', default='coco', type=str,
 13 |                     help='Dataset choice: mpii | coco')
 14 | parser.add_argument('--nThreads', default=30, type=int,
 15 |                     help='Number of data loading threads')
 16 | parser.add_argument('--debug', default=False, type=bool,
 17 |                     help='Print the debug information')
 18 | parser.add_argument('--snapshot', default=1, type=int,
 19 |                     help='How often to take a snapshot of the model (0 = never)')
 20 | 
 21 | "----------------------------- AlphaPose options -----------------------------"
 22 | parser.add_argument('--addDPG', default=False, type=bool,
 23 |                     help='Train with data augmentation')
 24 | 
 25 | "----------------------------- Model options -----------------------------"
 26 | parser.add_argument('--netType', default='hgPRM', type=str,
 27 |                     help='Options: hgPRM | resnext')
 28 | parser.add_argument('--loadModel', default=None, type=str,
 29 |                     help='Provide full path to a previously trained model')
 30 | parser.add_argument('--Continue', default=False, type=bool,
 31 |                     help='Pick up where an experiment left off')
 32 | parser.add_argument('--nFeats', default=256, type=int,
 33 |                     help='Number of features in the hourglass')
 34 | parser.add_argument('--nClasses', default=33, type=int,
 35 |                     help='Number of output channel')
 36 | parser.add_argument('--nStack', default=8, type=int,
 37 |                     help='Number of hourglasses to stack')
 38 | 
 39 | "----------------------------- Hyperparameter options -----------------------------"
 40 | parser.add_argument('--LR', default=2.5e-4, type=float,
 41 |                     help='Learning rate')
 42 | parser.add_argument('--momentum', default=0, type=float,
 43 |                     help='Momentum')
 44 | parser.add_argument('--weightDecay', default=0, type=float,
 45 |                     help='Weight decay')
 46 | parser.add_argument('--crit', default='MSE', type=str,
 47 |                     help='Criterion type')
 48 | parser.add_argument('--optMethod', default='rmsprop', type=str,
 49 |                     help='Optimization method: rmsprop | sgd | nag | adadelta')
 50 | 
 51 | 
 52 | "----------------------------- Training options -----------------------------"
 53 | parser.add_argument('--nEpochs', default=50, type=int,
 54 |                     help='Number of hourglasses to stack')
 55 | parser.add_argument('--epoch', default=0, type=int,
 56 |                     help='Current epoch')
 57 | parser.add_argument('--trainBatch', default=40, type=int,
 58 |                     help='Train-batch size')
 59 | parser.add_argument('--validBatch', default=20, type=int,
 60 |                     help='Valid-batch size')
 61 | parser.add_argument('--trainIters', default=0, type=int,
 62 |                     help='Total train iters')
 63 | parser.add_argument('--valIters', default=0, type=int,
 64 |                     help='Total valid iters')
 65 | parser.add_argument('--init', default=None, type=str,
 66 |                     help='Initialization')
 67 | "----------------------------- Data options -----------------------------"
 68 | parser.add_argument('--inputResH', default=384, type=int,
 69 |                     help='Input image height')
 70 | parser.add_argument('--inputResW', default=320, type=int,
 71 |                     help='Input image width')
 72 | parser.add_argument('--outputResH', default=96, type=int,
 73 |                     help='Output heatmap height')
 74 | parser.add_argument('--outputResW', default=80, type=int,
 75 |                     help='Output heatmap width')
 76 | parser.add_argument('--scale', default=0.25, type=float,
 77 |                     help='Degree of scale augmentation')
 78 | parser.add_argument('--rotate', default=30, type=float,
 79 |                     help='Degree of rotation augmentation')
 80 | parser.add_argument('--hmGauss', default=1, type=int,
 81 |                     help='Heatmap gaussian size')
 82 | 
 83 | "----------------------------- PyraNet options -----------------------------"
 84 | parser.add_argument('--baseWidth', default=9, type=int,
 85 |                     help='Heatmap gaussian size')
 86 | parser.add_argument('--cardinality', default=5, type=int,
 87 |                     help='Heatmap gaussian size')
 88 | parser.add_argument('--nResidual', default=1, type=int,
 89 |                     help='Number of residual modules at each location in the pyranet')
 90 | 
 91 | "----------------------------- Distribution options -----------------------------"
 92 | parser.add_argument('--dist', dest='dist', type=int, default=1,
 93 |                     help='distributed training or not')
 94 | parser.add_argument('--backend', dest='backend', type=str, default='gloo',
 95 |                     help='backend for distributed training')
 96 | parser.add_argument('--port', dest='port',
 97 |                     help='port of server')
 98 | opt = parser.parse_args()"""
 99 | 
100 | """if opt.Continue:
101 |     opt = torch.load("../exp/{}/{}/option.pkl".format(opt.dataset, opt.expID))
102 |     opt.Continue = True
103 |     opt.nEpochs = 50
104 |     print("--- Continue ---")"""
105 | 
106 | 
107 | class opt:
108 |     nClasses = 33
109 |     inputResH = 384
110 |     inputResW = 320
111 |     outputResH = 96
112 |     outputResW = 80
113 |     scale = 0.25
114 |     rotate = 30
115 |     hmGauss = 1
116 | 


--------------------------------------------------------------------------------
/SPPE/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import *
2 | 


--------------------------------------------------------------------------------
/SPPE/src/utils/dataset/.coco.py.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GajuuzZ/Human-Falling-Detect-Tracks/7ed2faa4d6147dfd576f58869b6c25545208af35/SPPE/src/utils/dataset/.coco.py.swp


--------------------------------------------------------------------------------
/SPPE/src/utils/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GajuuzZ/Human-Falling-Detect-Tracks/7ed2faa4d6147dfd576f58869b6c25545208af35/SPPE/src/utils/dataset/__init__.py


--------------------------------------------------------------------------------
/SPPE/src/utils/dataset/coco.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import h5py
 3 | from functools import reduce
 4 | 
 5 | import torch.utils.data as data
 6 | from ..pose import generateSampleBox
 7 | from opt import opt
 8 | 
 9 | 
10 | class Mscoco(data.Dataset):
11 |     def __init__(self, train=True, sigma=1,
12 |                  scale_factor=(0.2, 0.3), rot_factor=40, label_type='Gaussian'):
13 |         self.img_folder = '../data/coco/images'    # root image folders
14 |         self.is_train = train           # training set or test set
15 |         self.inputResH = opt.inputResH
16 |         self.inputResW = opt.inputResW
17 |         self.outputResH = opt.outputResH
18 |         self.outputResW = opt.outputResW
19 |         self.sigma = sigma
20 |         self.scale_factor = scale_factor
21 |         self.rot_factor = rot_factor
22 |         self.label_type = label_type
23 | 
24 |         self.nJoints_coco = 17
25 |         self.nJoints_mpii = 16
26 |         self.nJoints = 33
27 | 
28 |         self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8,
29 |                         9, 10, 11, 12, 13, 14, 15, 16, 17)
30 |         self.flipRef = ((2, 3), (4, 5), (6, 7),
31 |                         (8, 9), (10, 11), (12, 13),
32 |                         (14, 15), (16, 17))
33 | 
34 |         # create train/val split
35 |         with h5py.File('../data/coco/annot_clean.h5', 'r') as annot:
36 |             # train
37 |             self.imgname_coco_train = annot['imgname'][:-5887]
38 |             self.bndbox_coco_train = annot['bndbox'][:-5887]
39 |             self.part_coco_train = annot['part'][:-5887]
40 |             # val
41 |             self.imgname_coco_val = annot['imgname'][-5887:]
42 |             self.bndbox_coco_val = annot['bndbox'][-5887:]
43 |             self.part_coco_val = annot['part'][-5887:]
44 | 
45 |         self.size_train = self.imgname_coco_train.shape[0]
46 |         self.size_val = self.imgname_coco_val.shape[0]
47 | 
48 |     def __getitem__(self, index):
49 |         sf = self.scale_factor
50 | 
51 |         if self.is_train:
52 |             part = self.part_coco_train[index]
53 |             bndbox = self.bndbox_coco_train[index]
54 |             imgname = self.imgname_coco_train[index]
55 |         else:
56 |             part = self.part_coco_val[index]
57 |             bndbox = self.bndbox_coco_val[index]
58 |             imgname = self.imgname_coco_val[index]
59 | 
60 |         imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))
61 |         img_path = os.path.join(self.img_folder, imgname)
62 | 
63 |         metaData = generateSampleBox(img_path, bndbox, part, self.nJoints,
64 |                                      'coco', sf, self, train=self.is_train)
65 | 
66 |         inp, out_bigcircle, out_smallcircle, out, setMask = metaData
67 | 
68 |         label = []
69 |         for i in range(opt.nStack):
70 |             if i < 2:
71 |                 # label.append(out_bigcircle.clone())
72 |                 label.append(out.clone())
73 |             elif i < 4:
74 |                 # label.append(out_smallcircle.clone())
75 |                 label.append(out.clone())
76 |             else:
77 |                 label.append(out.clone())
78 | 
79 |         return inp, label, setMask, 'coco'
80 | 
81 |     def __len__(self):
82 |         if self.is_train:
83 |             return self.size_train
84 |         else:
85 |             return self.size_val
86 | 


--------------------------------------------------------------------------------
/SPPE/src/utils/dataset/fuse.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import h5py
  3 | from functools import reduce
  4 | 
  5 | import torch.utils.data as data
  6 | from ..pose import generateSampleBox
  7 | from opt import opt
  8 | 
  9 | 
 10 | class Mscoco(data.Dataset):
 11 |     def __init__(self, train=True, sigma=1,
 12 |                  scale_factor=0.25, rot_factor=30, label_type='Gaussian'):
 13 |         self.img_folder = '../data/'    # root image folders
 14 |         self.is_train = train           # training set or test set
 15 |         self.inputResH = 320
 16 |         self.inputResW = 256
 17 |         self.outputResH = 80
 18 |         self.outputResW = 64
 19 |         self.sigma = sigma
 20 |         self.scale_factor = (0.2, 0.3)
 21 |         self.rot_factor = rot_factor
 22 |         self.label_type = label_type
 23 | 
 24 |         self.nJoints_coco = 17
 25 |         self.nJoints_mpii = 16
 26 |         self.nJoints = 33
 27 | 
 28 |         self.accIdxs = (1, 2, 3, 4, 5, 6, 7, 8,             # COCO
 29 |                         9, 10, 11, 12, 13, 14, 15, 16, 17,
 30 |                         18, 19, 20, 21, 22, 23,             # MPII
 31 |                         28, 29, 32, 33)
 32 | 
 33 |         self.flipRef = ((2, 3), (4, 5), (6, 7),             # COCO
 34 |                         (8, 9), (10, 11), (12, 13),
 35 |                         (14, 15), (16, 17),
 36 |                         (18, 23), (19, 22), (20, 21),       # MPII
 37 |                         (28, 33), (29, 32), (30, 31))
 38 | 
 39 |         '''
 40 |         Create train/val split
 41 |         '''
 42 |         # COCO
 43 |         with h5py.File('../data/coco/annot_clean.h5', 'r') as annot:
 44 |             # train
 45 |             self.imgname_coco_train = annot['imgname'][:-5887]
 46 |             self.bndbox_coco_train = annot['bndbox'][:-5887]
 47 |             self.part_coco_train = annot['part'][:-5887]
 48 |             # val
 49 |             self.imgname_coco_val = annot['imgname'][-5887:]
 50 |             self.bndbox_coco_val = annot['bndbox'][-5887:]
 51 |             self.part_coco_val = annot['part'][-5887:]
 52 |         # MPII
 53 |         with h5py.File('../data/mpii/annot_mpii.h5', 'r') as annot:
 54 |             # train
 55 |             self.imgname_mpii_train = annot['imgname'][:-1358]
 56 |             self.bndbox_mpii_train = annot['bndbox'][:-1358]
 57 |             self.part_mpii_train = annot['part'][:-1358]
 58 |             # val
 59 |             self.imgname_mpii_val = annot['imgname'][-1358:]
 60 |             self.bndbox_mpii_val = annot['bndbox'][-1358:]
 61 |             self.part_mpii_val = annot['part'][-1358:]
 62 | 
 63 |         self.size_coco_train = self.imgname_coco_train.shape[0]
 64 |         self.size_coco_val = self.imgname_coco_val.shape[0]
 65 |         self.size_train = self.imgname_coco_train.shape[0] + self.imgname_mpii_train.shape[0]
 66 |         self.size_val = self.imgname_coco_val.shape[0] + self.imgname_mpii_val.shape[0]
 67 |         self.train, self.valid = [], []
 68 | 
 69 |     def __getitem__(self, index):
 70 |         sf = self.scale_factor
 71 | 
 72 |         if self.is_train and index < self.size_coco_train:  # COCO
 73 |             part = self.part_coco_train[index]
 74 |             bndbox = self.bndbox_coco_train[index]
 75 |             imgname = self.imgname_coco_train[index]
 76 |             imgset = 'coco'
 77 |         elif self.is_train:  # MPII
 78 |             part = self.part_mpii_train[index - self.size_coco_train]
 79 |             bndbox = self.bndbox_mpii_train[index - self.size_coco_train]
 80 |             imgname = self.imgname_mpii_train[index - self.size_coco_train]
 81 |             imgset = 'mpii'
 82 |         elif index < self.size_coco_val:
 83 |             part = self.part_coco_val[index]
 84 |             bndbox = self.bndbox_coco_val[index]
 85 |             imgname = self.imgname_coco_val[index]
 86 |             imgset = 'coco'
 87 |         else:
 88 |             part = self.part_mpii_val[index - self.size_coco_val]
 89 |             bndbox = self.bndbox_mpii_val[index - self.size_coco_val]
 90 |             imgname = self.imgname_mpii_val[index - self.size_coco_val]
 91 |             imgset = 'mpii'
 92 | 
 93 |         if imgset == 'coco':
 94 |             imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))
 95 |         else:
 96 |             imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))[:13]
 97 | 
 98 |         img_path = os.path.join(self.img_folder, imgset, 'images', imgname)
 99 | 
100 |         metaData = generateSampleBox(img_path, bndbox, part, self.nJoints,
101 |                                      imgset, sf, self, train=self.is_train)
102 | 
103 |         inp, out_bigcircle, out_smallcircle, out, setMask = metaData
104 | 
105 |         label = []
106 |         for i in range(opt.nStack):
107 |             if i < 2:
108 |                 # label.append(out_bigcircle.clone())
109 |                 label.append(out.clone())
110 |             elif i < 4:
111 |                 # label.append(out_smallcircle.clone())
112 |                 label.append(out.clone())
113 |             else:
114 |                 label.append(out.clone())
115 | 
116 |         return inp, label, setMask, imgset
117 | 
118 |     def __len__(self):
119 |         if self.is_train:
120 |             return self.size_train
121 |         else:
122 |             return self.size_val
123 | 


--------------------------------------------------------------------------------
/SPPE/src/utils/dataset/mpii.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import h5py
 3 | from functools import reduce
 4 | 
 5 | import torch.utils.data as data
 6 | from ..pose import generateSampleBox
 7 | from opt import opt
 8 | 
 9 | 
10 | class Mpii(data.Dataset):
11 |     def __init__(self, train=True, sigma=1,
12 |                  scale_factor=0.25, rot_factor=30, label_type='Gaussian'):
13 |         self.img_folder = '../data/mpii/images'    # root image folders
14 |         self.is_train = train           # training set or test set
15 |         self.inputResH = 320
16 |         self.inputResW = 256
17 |         self.outputResH = 80
18 |         self.outputResW = 64
19 |         self.sigma = sigma
20 |         self.scale_factor = (0.2, 0.3)
21 |         self.rot_factor = rot_factor
22 |         self.label_type = label_type
23 | 
24 |         self.nJoints_mpii = 16
25 |         self.nJoints = 16
26 | 
27 |         self.accIdxs = (1, 2, 3, 4, 5, 6,
28 |                         11, 12, 15, 16)
29 |         self.flipRef = ((1, 6), (2, 5), (3, 4),
30 |                         (11, 16), (12, 15), (13, 14))
31 | 
32 |         # create train/val split
33 |         with h5py.File('../data/mpii/annot_mpii.h5', 'r') as annot:
34 |             # train
35 |             self.imgname_mpii_train = annot['imgname'][:-1358]
36 |             self.bndbox_mpii_train = annot['bndbox'][:-1358]
37 |             self.part_mpii_train = annot['part'][:-1358]
38 |             # val
39 |             self.imgname_mpii_val = annot['imgname'][-1358:]
40 |             self.bndbox_mpii_val = annot['bndbox'][-1358:]
41 |             self.part_mpii_val = annot['part'][-1358:]
42 | 
43 |         self.size_train = self.imgname_mpii_train.shape[0]
44 |         self.size_val = self.imgname_mpii_val.shape[0]
45 |         self.train, self.valid = [], []
46 | 
47 |     def __getitem__(self, index):
48 |         sf = self.scale_factor
49 | 
50 |         if self.is_train:
51 |             part = self.part_mpii_train[index]
52 |             bndbox = self.bndbox_mpii_train[index]
53 |             imgname = self.imgname_mpii_train[index]
54 |         else:
55 |             part = self.part_mpii_val[index]
56 |             bndbox = self.bndbox_mpii_val[index]
57 |             imgname = self.imgname_mpii_val[index]
58 | 
59 |         imgname = reduce(lambda x, y: x + y, map(lambda x: chr(int(x)), imgname))[:13]
60 |         img_path = os.path.join(self.img_folder, imgname)
61 | 
62 |         metaData = generateSampleBox(img_path, bndbox, part, self.nJoints,
63 |                                      'mpii', sf, self, train=self.is_train)
64 | 
65 |         inp, out_bigcircle, out_smallcircle, out, setMask = metaData
66 | 
67 |         label = []
68 |         for i in range(opt.nStack):
69 |             if i < 2:
70 |                 #label.append(out_bigcircle.clone())
71 |                 label.append(out.clone())
72 |             elif i < 4:
73 |                 #label.append(out_smallcircle.clone())
74 |                 label.append(out.clone())
75 |             else:
76 |                 label.append(out.clone())
77 | 
78 |         return inp, label, setMask
79 | 
80 |     def __len__(self):
81 |         if self.is_train:
82 |             return self.size_train
83 |         else:
84 |             return self.size_val
85 | 


--------------------------------------------------------------------------------
/SPPE/src/utils/eval.py:
--------------------------------------------------------------------------------
  1 | from SPPE.src.opt import opt
  2 | try:
  3 |     from utils import transformBoxInvert, transformBoxInvert_batch, findPeak, processPeaks
  4 | except ImportError:
  5 |     from SPPE.src.utils.img import transformBoxInvert, transformBoxInvert_batch, findPeak, processPeaks
  6 | import torch
  7 | 
  8 | 
  9 | class DataLogger(object):
 10 |     def __init__(self):
 11 |         self.clear()
 12 | 
 13 |     def clear(self):
 14 |         self.value = 0
 15 |         self.sum = 0
 16 |         self.cnt = 0
 17 |         self.avg = 0
 18 | 
 19 |     def update(self, value, n=1):
 20 |         self.value = value
 21 |         self.sum += value * n
 22 |         self.cnt += n
 23 |         self._cal_avg()
 24 | 
 25 |     def _cal_avg(self):
 26 |         self.avg = self.sum / self.cnt
 27 | 
 28 | 
 29 | def accuracy(output, label, dataset):
 30 |     if type(output) == list:
 31 |         return accuracy(output[opt.nStack - 1], label[opt.nStack - 1], dataset)
 32 |     else:
 33 |         return heatmapAccuracy(output.cpu().data, label.cpu().data, dataset.accIdxs)
 34 | 
 35 | 
 36 | def heatmapAccuracy(output, label, idxs):
 37 |     preds = getPreds(output)
 38 |     gt = getPreds(label)
 39 | 
 40 |     norm = torch.ones(preds.size(0)) * opt.outputResH / 10
 41 |     dists = calc_dists(preds, gt, norm)
 42 |     #print(dists)
 43 |     acc = torch.zeros(len(idxs) + 1)
 44 |     avg_acc = 0
 45 |     cnt = 0
 46 |     for i in range(len(idxs)):
 47 |         acc[i + 1] = dist_acc(dists[idxs[i] - 1])
 48 |         if acc[i + 1] >= 0:
 49 |             avg_acc = avg_acc + acc[i + 1]
 50 |             cnt += 1
 51 |     if cnt != 0:
 52 |         acc[0] = avg_acc / cnt
 53 |     return acc
 54 | 
 55 | 
 56 | def getPreds(hm):
 57 |     """ get predictions from score maps in torch Tensor
 58 |         return type: torch.LongTensor
 59 |     """
 60 |     assert hm.dim() == 4, 'Score maps should be 4-dim'
 61 |     maxval, idx = torch.max(hm.view(hm.size(0), hm.size(1), -1), 2)
 62 | 
 63 |     maxval = maxval.view(hm.size(0), hm.size(1), 1)
 64 |     idx = idx.view(hm.size(0), hm.size(1), 1) + 1
 65 | 
 66 |     preds = idx.repeat(1, 1, 2).float()
 67 | 
 68 |     preds[:, :, 0] = (preds[:, :, 0] - 1) % hm.size(3)
 69 |     preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hm.size(3))
 70 | 
 71 |     # pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
 72 |     # preds *= pred_mask
 73 |     return preds
 74 | 
 75 | 
 76 | def calc_dists(preds, target, normalize):
 77 |     preds = preds.float().clone()
 78 |     target = target.float().clone()
 79 |     dists = torch.zeros(preds.size(1), preds.size(0))
 80 |     for n in range(preds.size(0)):
 81 |         for c in range(preds.size(1)):
 82 |             if target[n, c, 0] > 0 and target[n, c, 1] > 0:
 83 |                 dists[c, n] = torch.dist(
 84 |                     preds[n, c, :], target[n, c, :]) / normalize[n]
 85 |             else:
 86 |                 dists[c, n] = -1
 87 |     return dists
 88 | 
 89 | 
 90 | def dist_acc(dists, thr=0.5):
 91 |     """ Return percentage below threshold while ignoring values with a -1 """
 92 |     if dists.ne(-1).sum() > 0:
 93 |         return dists.le(thr).eq(dists.ne(-1)).float().sum() * 1.0 / dists.ne(-1).float().sum()
 94 |     else:
 95 |         return - 1
 96 | 
 97 | 
 98 | def postprocess(output):
 99 |     p = getPreds(output)
100 | 
101 |     for i in range(p.size(0)):
102 |         for j in range(p.size(1)):
103 |             hm = output[i][j]
104 |             pX, pY = int(round(p[i][j][0])), int(round(p[i][j][1]))
105 |             if 0 < pX < opt.outputResW - 1 and 0 < pY < opt.outputResH - 1:
106 |                 diff = torch.Tensor((hm[pY][pX + 1] - hm[pY][pX - 1], hm[pY + 1][pX] - hm[pY - 1][pX]))
107 |                 p[i][j] += diff.sign() * 0.25
108 |     p -= 0.5
109 | 
110 |     return p
111 | 
112 | 
113 | def getPrediction(hms, pt1, pt2, inpH, inpW, resH, resW):
114 |     """
115 |     Get keypoint location from heatmaps
116 |     """
117 |     assert hms.dim() == 4, 'Score maps should be 4-dim'
118 |     maxval, idx = torch.max(hms.view(hms.size(0), hms.size(1), -1), 2)
119 | 
120 |     maxval = maxval.view(hms.size(0), hms.size(1), 1)
121 |     idx = idx.view(hms.size(0), hms.size(1), 1) + 1
122 | 
123 |     preds = idx.repeat(1, 1, 2).float()
124 | 
125 |     preds[:, :, 0] = (preds[:, :, 0] - 1) % hms.size(3)
126 |     preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hms.size(3))
127 | 
128 |     pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
129 |     preds *= pred_mask
130 | 
131 |     # Very simple post-processing step to improve performance at tight PCK thresholds
132 |     """for i in range(preds.size(0)):
133 |         for j in range(preds.size(1)):
134 |             hm = hms[i][j]
135 |             pX, pY = int(round(float(preds[i][j][0]))), int(round(float(preds[i][j][1])))
136 |             if 0 < pX < opt.outputResW - 1 and 0 < pY < opt.outputResH - 1:
137 |                 diff = torch.Tensor(
138 |                     (hm[pY][pX + 1] - hm[pY][pX - 1], hm[pY + 1][pX] - hm[pY - 1][pX]))
139 |                 preds[i][j] += diff.sign() * 0.25
140 |     preds += 0.2"""
141 | 
142 |     preds_tf = torch.zeros(preds.size())
143 |     preds_tf = transformBoxInvert_batch(preds, pt1, pt2, inpH, inpW, resH, resW)
144 |     return preds, preds_tf, maxval
145 | 
146 | 
147 | def getMultiPeakPrediction(hms, pt1, pt2, inpH, inpW, resH, resW):
148 | 
149 |     assert hms.dim() == 4, 'Score maps should be 4-dim'
150 | 
151 |     preds_img = {}
152 |     hms = hms.numpy()
153 |     for n in range(hms.shape[0]):        # Number of samples
154 |         preds_img[n] = {}           # Result of sample: n
155 |         for k in range(hms.shape[1]):    # Number of keypoints
156 |             preds_img[n][k] = []    # Result of keypoint: k
157 |             hm = hms[n][k]
158 | 
159 |             candidate_points = findPeak(hm)
160 | 
161 |             res_pt = processPeaks(candidate_points, hm,
162 |                                   pt1[n], pt2[n], inpH, inpW, resH, resW)
163 | 
164 |             preds_img[n][k] = res_pt
165 | 
166 |     return preds_img
167 | 
168 | 
169 | def getPrediction_batch(hms, pt1, pt2, inpH, inpW, resH, resW):
170 |     """
171 |     Get keypoint location from heatmaps
172 |     pt1, pt2:   [n, 2]
173 |     OUTPUT:
174 |         preds:  [n, 17, 2]
175 |     """
176 | 
177 |     assert hms.dim() == 4, 'Score maps should be 4-dim'
178 |     flat_hms = hms.view(hms.size(0), hms.size(1), -1)
179 |     maxval, idx = torch.max(flat_hms, 2)
180 | 
181 |     maxval = maxval.view(hms.size(0), hms.size(1), 1)
182 |     idx = idx.view(hms.size(0), hms.size(1), 1) + 1
183 | 
184 |     preds = idx.repeat(1, 1, 2).float()
185 | 
186 |     preds[:, :, 0] = (preds[:, :, 0] - 1) % hms.size(3)
187 |     preds[:, :, 1] = torch.floor((preds[:, :, 1] - 1) / hms.size(3))
188 | 
189 |     pred_mask = maxval.gt(0).repeat(1, 1, 2).float()
190 |     preds *= pred_mask
191 | 
192 |     # Very simple post-processing step to improve performance at tight PCK thresholds
193 |     idx_up = (idx - hms.size(3)).clamp(0, flat_hms.size(2) - 1)
194 |     idx_down = (idx + hms.size(3)).clamp(0, flat_hms.size(2) - 1)
195 |     idx_left = (idx - 1).clamp(0, flat_hms.size(2) - 1)
196 |     idx_right = (idx + 1).clamp(0, flat_hms.size(2) - 1)
197 | 
198 |     maxval_up = flat_hms.gather(2, idx_up)
199 |     maxval_down = flat_hms.gather(2, idx_down)
200 |     maxval_left = flat_hms.gather(2, idx_left)
201 |     maxval_right = flat_hms.gather(2, idx_right)
202 | 
203 |     diff1 = (maxval_right - maxval_left).sign() * 0.25
204 |     diff2 = (maxval_down - maxval_up).sign() * 0.25
205 |     diff1[idx_up <= hms.size(3)] = 0
206 |     diff1[idx_down / hms.size(3) >= (hms.size(3) - 1)] = 0
207 |     diff2[(idx_left % hms.size(3)) == 0] = 0
208 |     diff2[(idx_left % hms.size(3)) == (hms.size(3) - 1)] = 0
209 | 
210 |     preds[:, :, 0] += diff1.squeeze(-1)
211 |     preds[:, :, 1] += diff2.squeeze(-1)
212 | 
213 |     preds_tf = torch.zeros(preds.size())
214 |     preds_tf = transformBoxInvert_batch(preds, pt1, pt2, inpH, inpW, resH, resW)
215 | 
216 |     return preds, preds_tf, maxval
217 | 


--------------------------------------------------------------------------------
/SPPE/src/utils/pose.py:
--------------------------------------------------------------------------------
  1 | from utils import (load_image, drawGaussian, drawBigCircle, drawSmallCircle, cv_rotate,
  2 |                    cropBox, transformBox, flip, shuffleLR, drawCOCO)
  3 | from utils import getPrediction
  4 | import torch
  5 | import numpy as np
  6 | import random
  7 | from SPPE.src.opt import opt
  8 | 
  9 | 
 10 | def rnd(x):
 11 |     return max(-2 * x, min(2 * x, np.random.randn(1)[0] * x))
 12 | 
 13 | 
 14 | def generateSampleBox(img_path, bndbox, part, nJoints, imgset, scale_factor, dataset, train=True):
 15 | 
 16 |     nJoints_coco = 17
 17 |     nJoints_mpii = 16
 18 |     img = load_image(img_path)
 19 |     if train:
 20 |         img[0].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
 21 |         img[1].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
 22 |         img[2].mul_(random.uniform(0.7, 1.3)).clamp_(0, 1)
 23 | 
 24 |     ori_img = img.clone()
 25 |     img[0].add_(-0.406)
 26 |     img[1].add_(-0.457)
 27 |     img[2].add_(-0.480)
 28 | 
 29 |     upLeft = torch.Tensor((int(bndbox[0][0]), int(bndbox[0][1])))
 30 |     bottomRight = torch.Tensor((int(bndbox[0][2]), int(bndbox[0][3])))
 31 |     ht = bottomRight[1] - upLeft[1]
 32 |     width = bottomRight[0] - upLeft[0]
 33 |     imght = img.shape[1]
 34 |     imgwidth = img.shape[2]
 35 |     scaleRate = random.uniform(*scale_factor)
 36 | 
 37 |     upLeft[0] = max(0, upLeft[0] - width * scaleRate / 2)
 38 |     upLeft[1] = max(0, upLeft[1] - ht * scaleRate / 2)
 39 |     bottomRight[0] = min(imgwidth - 1, bottomRight[0] + width * scaleRate / 2)
 40 |     bottomRight[1] = min(imght - 1, bottomRight[1] + ht * scaleRate / 2)
 41 | 
 42 |     # Doing Random Sample
 43 |     if opt.addDPG:
 44 |         PatchScale = random.uniform(0, 1)
 45 |         if PatchScale > 0.85:
 46 |             ratio = ht / width
 47 |             if width < ht:
 48 |                 patchWidth = PatchScale * width
 49 |                 patchHt = patchWidth * ratio
 50 |             else:
 51 |                 patchHt = PatchScale * ht
 52 |                 patchWidth = patchHt / ratio
 53 | 
 54 |             xmin = upLeft[0] + random.uniform(0, 1) * (width - patchWidth)
 55 |             ymin = upLeft[1] + random.uniform(0, 1) * (ht - patchHt)
 56 | 
 57 |             xmax = xmin + patchWidth + 1
 58 |             ymax = ymin + patchHt + 1
 59 |         else:
 60 |             xmin = max(1, min(upLeft[0] + np.random.normal(-0.0142, 0.1158) * width, imgwidth - 3))
 61 |             ymin = max(1, min(upLeft[1] + np.random.normal(0.0043, 0.068) * ht, imght - 3))
 62 |             xmax = min(max(xmin + 2, bottomRight[0] + np.random.normal(0.0154, 0.1337) * width), imgwidth - 3)
 63 |             ymax = min(max(ymin + 2, bottomRight[1] + np.random.normal(-0.0013, 0.0711) * ht), imght - 3)
 64 | 
 65 |         upLeft[0] = xmin
 66 |         upLeft[1] = ymin
 67 |         bottomRight[0] = xmax
 68 |         bottomRight[1] = ymax
 69 | 
 70 |     # Counting Joints number
 71 |     jointNum = 0
 72 |     if imgset == 'coco':
 73 |         for i in range(17):
 74 |             if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \
 75 |                     and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]:
 76 |                 jointNum += 1
 77 |     else:
 78 |         for i in range(16):
 79 |             if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \
 80 |                     and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]:
 81 |                 jointNum += 1
 82 | 
 83 |     # Doing Random Crop
 84 |     if opt.addDPG:
 85 |         if jointNum > 13 and train:
 86 |             switch = random.uniform(0, 1)
 87 |             if switch > 0.96:
 88 |                 bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2
 89 |                 bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2
 90 |             elif switch > 0.92:
 91 |                 upLeft[0] = (upLeft[0] + bottomRight[0]) / 2
 92 |                 bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2
 93 |             elif switch > 0.88:
 94 |                 upLeft[1] = (upLeft[1] + bottomRight[1]) / 2
 95 |                 bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2
 96 |             elif switch > 0.84:
 97 |                 upLeft[0] = (upLeft[0] + bottomRight[0]) / 2
 98 |                 upLeft[1] = (upLeft[1] + bottomRight[1]) / 2
 99 |             elif switch > 0.80:
100 |                 bottomRight[0] = (upLeft[0] + bottomRight[0]) / 2
101 |             elif switch > 0.76:
102 |                 upLeft[0] = (upLeft[0] + bottomRight[0]) / 2
103 |             elif switch > 0.72:
104 |                 bottomRight[1] = (upLeft[1] + bottomRight[1]) / 2
105 |             elif switch > 0.68:
106 |                 upLeft[1] = (upLeft[1] + bottomRight[1]) / 2
107 | 
108 |     ori_inp = cropBox(ori_img, upLeft, bottomRight, opt.inputResH, opt.inputResW)
109 |     inp = cropBox(img, upLeft, bottomRight, opt.inputResH, opt.inputResW)
110 |     if jointNum == 0:
111 |         inp = torch.zeros(3, opt.inputResH, opt.inputResW)
112 | 
113 |     out_bigcircle = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
114 |     out_smallcircle = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
115 |     out = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
116 |     setMask = torch.zeros(nJoints, opt.outputResH, opt.outputResW)
117 | 
118 |     # Draw Label
119 |     if imgset == 'coco':
120 |         for i in range(nJoints_coco):
121 |             if part[i][0] > 0 and part[i][0] > upLeft[0] and part[i][1] > upLeft[1] \
122 |                and part[i][0] < bottomRight[0] and part[i][1] < bottomRight[1]:
123 |                 out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2)
124 |                 out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
125 |                 out[i] = drawGaussian(out[i], transformBox(part[i], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
126 |             setMask[i].add_(1)
127 |     elif imgset == 'mpii':
128 |         for i in range(nJoints_coco, nJoints_coco + nJoints_mpii):
129 |             if part[i - nJoints_coco][0] > 0 and part[i - nJoints_coco][0] > upLeft[0] and part[i - nJoints_coco][1] > upLeft[1] \
130 |                and part[i - nJoints_coco][0] < bottomRight[0] and part[i - nJoints_coco][1] < bottomRight[1]:
131 |                 out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2)
132 |                 out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
133 |                 out[i] = drawGaussian(out[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
134 |             setMask[i].add_(1)
135 |     else:
136 |         for i in range(nJoints_coco, nJoints_coco + nJoints_mpii):
137 |             if part[i - nJoints_coco][0] > 0 and part[i - nJoints_coco][0] > upLeft[0] and part[i - nJoints_coco][1] > upLeft[1] \
138 |                and part[i - nJoints_coco][0] < bottomRight[0] and part[i - nJoints_coco][1] < bottomRight[1]:
139 |                 out_bigcircle[i] = drawBigCircle(out_bigcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss * 2)
140 |                 out_smallcircle[i] = drawSmallCircle(out_smallcircle[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
141 |                 out[i] = drawGaussian(out[i], transformBox(part[i - nJoints_coco], upLeft, bottomRight, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW), opt.hmGauss)
142 |             if i != 6 + nJoints_coco and i != 7 + nJoints_coco:
143 |                 setMask[i].add_(1)
144 | 
145 |     if opt.debug:
146 |         preds_hm, preds_img, preds_scores = getPrediction(out.unsqueeze(0), upLeft.unsqueeze(0), bottomRight.unsqueeze(0), opt.inputResH,
147 |                                                           opt.inputResW, opt.outputResH, opt.outputResW)
148 |         tmp_preds = preds_hm.mul(opt.inputResH / opt.outputResH)
149 |         drawCOCO(ori_inp.unsqueeze(0), tmp_preds, preds_scores)
150 | 
151 |     if train:
152 |         # Flip
153 |         if random.uniform(0, 1) < 0.5:
154 |             inp = flip(inp)
155 |             ori_inp = flip(ori_inp)        
156 |             out_bigcircle = shuffleLR(flip(out_bigcircle), dataset)
157 |             out_smallcircle = shuffleLR(flip(out_smallcircle), dataset)
158 |             out = shuffleLR(flip(out), dataset)
159 |         # Rotate
160 |         r = rnd(opt.rotate)
161 |         if random.uniform(0, 1) < 0.6:
162 |             r = 0
163 |         if r != 0:
164 |             inp = cv_rotate(inp, r, opt.inputResW, opt.inputResH)
165 |             out_bigcircle = cv_rotate(out_bigcircle, r, opt.outputResW, opt.outputResH)
166 |             out_smallcircle = cv_rotate(out_smallcircle, r, opt.outputResW, opt.outputResH)
167 |             out = cv_rotate(out, r, opt.outputResW, opt.outputResH)
168 | 
169 |     return inp, out_bigcircle, out_smallcircle, out, setMask
170 | 


--------------------------------------------------------------------------------
/Track/Tracker.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import numpy as np
  3 | from collections import deque
  4 | 
  5 | from .linear_assignment import min_cost_matching, matching_cascade
  6 | from .kalman_filter import KalmanFilter
  7 | from .iou_matching import iou_cost
  8 | 
  9 | 
 10 | class TrackState:
 11 |     """Enumeration type for the single target track state. Newly created tracks are
 12 |     classified as `tentative` until enough evidence has been collected. Then,
 13 |     the track state is changed to `confirmed`. Tracks that are no longer alive
 14 |     are classified as `deleted` to mark them for removal from the set of active
 15 |     tracks.
 16 |     """
 17 |     Tentative = 1
 18 |     Confirmed = 2
 19 |     Deleted = 3
 20 | 
 21 | 
 22 | class Detection(object):
 23 |     """This class represents a bounding box, keypoints, score of person detected
 24 |     in a single image.
 25 | 
 26 |     Args:
 27 |         tlbr: (float array) Of shape [top, left, bottom, right].,
 28 |         keypoints: (float array) Of shape [node, pts].,
 29 |         confidence: (float) Confidence score of detection.
 30 |     """
 31 |     def __init__(self, tlbr, keypoints, confidence):
 32 |         self.tlbr = tlbr
 33 |         self.keypoints = keypoints
 34 |         self.confidence = confidence
 35 | 
 36 |     def to_tlwh(self):
 37 |         """Get (top, left, width, height).
 38 |         """
 39 |         ret = self.tlbr.copy()
 40 |         ret[2:] = ret[2:] - ret[:2]
 41 |         return ret
 42 | 
 43 |     def to_xyah(self):
 44 |         """Get (x_center, y_center, aspect ratio, height).
 45 |         """
 46 |         ret = self.to_tlwh()
 47 |         ret[:2] += ret[2:] / 2
 48 |         ret[2] /= ret[3]
 49 |         return ret
 50 | 
 51 | 
 52 | class Track:
 53 |     def __init__(self, mean, covariance, track_id, n_init, max_age=30, buffer=30):
 54 |         self.mean = mean
 55 |         self.covariance = covariance
 56 |         self.track_id = track_id
 57 |         self.hist = 1
 58 |         self.age = 1
 59 |         self.time_since_update = 0
 60 |         self.n_init = n_init
 61 |         self.max_age = max_age
 62 | 
 63 |         # keypoints list for use in Actions prediction.
 64 |         self.keypoints_list = deque(maxlen=buffer)
 65 | 
 66 |         self.state = TrackState.Tentative
 67 | 
 68 |     def to_tlwh(self):
 69 |         ret = self.mean[:4].copy()
 70 |         ret[2] *= ret[3]
 71 |         ret[:2] -= ret[2:] / 2
 72 |         return ret
 73 | 
 74 |     def to_tlbr(self):
 75 |         ret = self.to_tlwh()
 76 |         ret[2:] = ret[:2] + ret[2:]
 77 |         return ret
 78 | 
 79 |     def get_center(self):
 80 |         return self.mean[:2].copy()
 81 | 
 82 |     def predict(self, kf):
 83 |         """Propagate the state distribution to the current time step using a
 84 |         Kalman filter prediction step.
 85 |         """
 86 |         self.mean, self.covariance = kf.predict(self.mean, self.covariance)
 87 |         self.age += 1
 88 |         self.time_since_update += 1
 89 | 
 90 |     def update(self, kf, detection):
 91 |         """Perform Kalman filter measurement update step.
 92 |         """
 93 |         self.mean, self.covariance = kf.update(self.mean, self.covariance,
 94 |                                                detection.to_xyah())
 95 |         self.keypoints_list.append(detection.keypoints)
 96 | 
 97 |         self.hist += 1
 98 |         self.time_since_update = 0
 99 |         if self.state == TrackState.Tentative and self.hist >= self.n_init:
100 |             self.state = TrackState.Confirmed
101 | 
102 |     def mark_missed(self):
103 |         """Mark this track as missed (no association at the current time step).
104 |         """
105 |         if self.state == TrackState.Tentative:
106 |             self.state = TrackState.Deleted
107 |         elif self.time_since_update > self.max_age:
108 |             self.state = TrackState.Deleted
109 | 
110 |     def is_tentative(self):
111 |         return self.state == TrackState.Tentative
112 | 
113 |     def is_confirmed(self):
114 |         return self.state == TrackState.Confirmed
115 | 
116 |     def is_deleted(self):
117 |         return self.state == TrackState.Deleted
118 | 
119 | 
120 | class Tracker:
121 |     def __init__(self, max_iou_distance=0.7, max_age=30, n_init=5):
122 |         self.max_iou_dist = max_iou_distance
123 |         self.max_age = max_age
124 |         self.n_init = n_init
125 | 
126 |         self.kf = KalmanFilter()
127 |         self.tracks = []
128 |         self._next_id = 1
129 | 
130 |     def predict(self):
131 |         """Propagate track state distributions one time step forward.
132 |         This function should be called once every time step, before `update`.
133 |         """
134 |         for track in self.tracks:
135 |             track.predict(self.kf)
136 | 
137 |     def update(self, detections):
138 |         """Perform measurement update and track management.
139 |         Parameters
140 |         ----------
141 |         detections : List[deep_sort.detection.Detection]
142 |             A list of detections at the current time step.
143 |         """
144 |         # Run matching cascade.
145 |         matches, unmatched_tracks, unmatched_detections = self._match(detections)
146 | 
147 |         # Update matched tracks set.
148 |         for track_idx, detection_idx in matches:
149 |             self.tracks[track_idx].update(self.kf, detections[detection_idx])
150 |         # Update tracks that missing.
151 |         for track_idx in unmatched_tracks:
152 |             self.tracks[track_idx].mark_missed()
153 |         # Create new detections track.
154 |         for detection_idx in unmatched_detections:
155 |             self._initiate_track(detections[detection_idx])
156 | 
157 |         # Remove deleted tracks.
158 |         self.tracks = [t for t in self.tracks if not t.is_deleted()]
159 | 
160 |     def _match(self, detections):
161 |         confirmed_tracks, unconfirmed_tracks = [], []
162 |         for i, t in enumerate(self.tracks):
163 |             if t.is_confirmed():
164 |                 confirmed_tracks.append(i)
165 |             else:
166 |                 unconfirmed_tracks.append(i)
167 | 
168 |         matches_a, unmatched_tracks_a, unmatched_detections = matching_cascade(
169 |             iou_cost, self.max_iou_dist, self.max_age, self.tracks, detections, confirmed_tracks
170 |         )
171 | 
172 |         track_candidates = unconfirmed_tracks + [
173 |             k for k in unmatched_tracks_a if self.tracks[k].time_since_update == 1]
174 |         unmatched_tracks_a = [
175 |             k for k in unmatched_tracks_a if self.tracks[k].time_since_update != 1]
176 | 
177 |         matches_b, unmatched_tracks_b, unmatched_detections = min_cost_matching(
178 |             iou_cost, self.max_iou_dist, self.tracks, detections, track_candidates, unmatched_detections
179 |         )
180 | 
181 |         matches = matches_a + matches_b
182 |         unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
183 |         return matches, unmatched_tracks, unmatched_detections
184 | 
185 |     def _initiate_track(self, detection):
186 |         if detection.confidence < 0.4:
187 |             return
188 |         mean, covariance = self.kf.initiate(detection.to_xyah())
189 |         self.tracks.append(Track(mean, covariance, self._next_id, self.n_init, self.max_age))
190 |         self._next_id += 1
191 | 
192 | 
193 | 


--------------------------------------------------------------------------------
/Track/iou_matching.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | INFTY_COST = 1e+5
 4 | 
 5 | 
 6 | def iou(bbox, candidates):
 7 |     """Compute intersection over union.
 8 |     Parameters
 9 |     ----------
10 |     bbox : ndarray
11 |         A bounding box in format `(xmin, ymin, xmax, ymax)`.
12 |     candidates : ndarray
13 |         A matrix of candidate bounding boxes (one per row) in the same format
14 |         as `bbox`.
15 | 
16 |     Returns
17 |     -------
18 |     ndarray
19 |         The intersection over union in [0, 1] between the `bbox` and each
20 |         candidate. A higher score means a larger fraction of the `bbox` is
21 |         occluded by the candidate.
22 |     """
23 |     #bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
24 |     bbox_tl, bbox_br = bbox[:2], bbox[2:]
25 |     candidates_tl = candidates[:, :2]
26 |     candidates_br = candidates[:, 2:]  # + candidates[:, :2]
27 | 
28 |     tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
29 |                np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
30 |     br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
31 |                np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
32 |     wh = np.maximum(0., br - tl)
33 | 
34 |     area_intersection = wh.prod(axis=1)
35 |     area_bbox = (bbox[2:] - bbox[:2]).prod()
36 |     area_candidates = (candidates[:, 2:] - candidates[:, :2]).prod(axis=1)
37 |     return area_intersection / (area_bbox + area_candidates - area_intersection)
38 | 
39 | 
40 | def iou_cost(tracks, detections, track_indices=None, detection_indices=None):
41 |     """An intersection over union distance metric.
42 |     Parameters
43 |     ----------
44 |     tracks : List[Track]
45 |         A list of tracks.
46 |     detections : List[Detection]
47 |         A list of detections.
48 |     track_indices : Optional[List[int]]
49 |         A list of indices to tracks that should be matched. Defaults to
50 |         all `tracks`.
51 |     detection_indices : Optional[List[int]]
52 |         A list of indices to detections that should be matched. Defaults
53 |         to all `detections`.
54 | 
55 |     Returns
56 |     -------
57 |     ndarray
58 |         Returns a cost matrix of shape
59 |         len(track_indices), len(detection_indices) where entry (i, j) is
60 |         `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
61 | 
62 |     """
63 |     if track_indices is None:
64 |         track_indices = np.arange(len(tracks))
65 |     if detection_indices is None:
66 |         detection_indices = np.arange(len(detections))
67 | 
68 |     cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
69 |     for row, track_idx in enumerate(track_indices):
70 |         #if tracks[track_idx].time_since_update > 1:
71 |         #    cost_matrix[row, :] = INFTY_COST
72 |         #    continue
73 | 
74 |         bbox = tracks[track_idx].to_tlbr()
75 |         candidates = np.asarray([detections[i].tlbr for i in detection_indices])
76 |         cost_matrix[row, :] = 1. - iou(bbox, candidates)
77 | 
78 |     return cost_matrix
79 | 


--------------------------------------------------------------------------------
/Track/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | import scipy.linalg
  4 | 
  5 | 
  6 | class KalmanFilter(object):
  7 |     """A simple Kalman filter for tracking bounding boxes in image space.
  8 | 
  9 |     The 8-dimensional state space
 10 |         x, y, a, h, vx, vy, va, vh
 11 | 
 12 |     contains the bounding box center position (x, y), aspect ratio a, height h,
 13 |     and their respective velocities.
 14 | 
 15 |     Object motion follows a constant velocity model. The bounding box location
 16 |     (x, y, a, h) is taken as direct observation of the state space (linear
 17 |     observation model).
 18 |     """
 19 |     def __init__(self):
 20 |         ndim, dt = 4, 1.
 21 | 
 22 |         # Create Kalman filter model matrices.
 23 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
 24 |         for i in range(ndim):
 25 |             self._motion_mat[i, ndim + i] = dt
 26 |         self._update_mat = np.eye(ndim, 2 * ndim)
 27 | 
 28 |         # Motion and observation uncertainty are chosen relative to the current
 29 |         # state estimate. These weights control the amount of uncertainty in
 30 |         # the model. This is a bit hacky.
 31 |         self._std_weight_position = 1. / 20
 32 |         self._std_weight_velocity = 1. / 160
 33 | 
 34 |     def initiate(self, measurement):
 35 |         """Create track from unassociated measurement.
 36 |         Parameters
 37 |         ----------
 38 |         measurement : ndarray
 39 |             Bounding box coordinates (x, y, a, h) with center position (x, y),
 40 |             aspect ratio a, and height h.
 41 | 
 42 |         Returns
 43 |         -------
 44 |         (ndarray, ndarray)
 45 |             Returns the mean vector (8 dimensional) and covariance matrix (8x8
 46 |             dimensional) of the new track. Unobserved velocities are initialized
 47 |             to 0 mean.
 48 |         """
 49 |         mean_pos = measurement
 50 |         mean_vel = np.zeros_like(mean_pos)
 51 |         mean = np.r_[mean_pos, mean_vel]
 52 | 
 53 |         std = [
 54 |             2 * self._std_weight_position * measurement[3],
 55 |             2 * self._std_weight_position * measurement[3],
 56 |             1e-2,
 57 |             2 * self._std_weight_position * measurement[3],
 58 |             10 * self._std_weight_velocity * measurement[3],
 59 |             10 * self._std_weight_velocity * measurement[3],
 60 |             1e-5,
 61 |             10 * self._std_weight_velocity * measurement[3]]
 62 |         covariance = np.diag(np.square(std))
 63 |         return mean, covariance
 64 | 
 65 |     def predict(self, mean, covariance):
 66 |         """Run Kalman filter prediction step.
 67 |         Parameters
 68 |         ----------
 69 |         mean : ndarray
 70 |             The 8 dimensional mean vector of the object state at the previous
 71 |             time step.
 72 |         covariance : ndarray
 73 |             The 8x8 dimensional covariance matrix of the object state at the
 74 |             previous time step.
 75 | 
 76 |         Returns
 77 |         -------
 78 |         (ndarray, ndarray)
 79 |             Returns the mean vector and covariance matrix of the predicted
 80 |             state. Unobserved velocities are initialized to 0 mean.
 81 |         """
 82 |         std_pos = [
 83 |             self._std_weight_position * mean[3],
 84 |             self._std_weight_position * mean[3],
 85 |             1e-2,
 86 |             self._std_weight_position * mean[3]]
 87 |         std_vel = [
 88 |             self._std_weight_velocity * mean[3],
 89 |             self._std_weight_velocity * mean[3],
 90 |             1e-5,
 91 |             self._std_weight_velocity * mean[3]]
 92 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
 93 | 
 94 |         mean = np.dot(self._motion_mat, mean)
 95 |         covariance = np.linalg.multi_dot((
 96 |             self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
 97 | 
 98 |         return mean, covariance
 99 | 
100 |     def project(self, mean, covariance):
101 |         """Project state distribution to measurement space.
102 |         Parameters
103 |         ----------
104 |         mean : ndarray
105 |             The state's mean vector (8 dimensional array).
106 |         covariance : ndarray
107 |             The state's covariance matrix (8x8 dimensional).
108 | 
109 |         Returns
110 |         -------
111 |         (ndarray, ndarray)
112 |             Returns the projected mean and covariance matrix of the given state
113 |             estimate.
114 |         """
115 |         std = [
116 |             self._std_weight_position * mean[3],
117 |             self._std_weight_position * mean[3],
118 |             1e-1,
119 |             self._std_weight_position * mean[3]]
120 |         innovation_cov = np.diag(np.square(std))
121 | 
122 |         mean = np.dot(self._update_mat, mean)
123 |         covariance = np.linalg.multi_dot((
124 |             self._update_mat, covariance, self._update_mat.T))
125 |         return mean, covariance + innovation_cov
126 | 
127 |     def update(self, mean, covariance, measurement):
128 |         """Run Kalman filter correction step.
129 |         Parameters
130 |         ----------
131 |         mean : ndarray
132 |             The predicted state's mean vector (8 dimensional).
133 |         covariance : ndarray
134 |             The state's covariance matrix (8x8 dimensional).
135 |         measurement : ndarray
136 |             The 4 dimensional measurement vector (x, y, a, h), where (x, y)
137 |             is the center position, a the aspect ratio, and h the height of the
138 |             bounding box.
139 | 
140 |         Returns
141 |         -------
142 |         (ndarray, ndarray)
143 |             Returns the measurement-corrected state distribution.
144 |         """
145 |         projected_mean, projected_cov = self.project(mean, covariance)
146 | 
147 |         chol_factor, lower = scipy.linalg.cho_factor(
148 |             projected_cov, lower=True, check_finite=False)
149 |         kalman_gain = scipy.linalg.cho_solve(
150 |             (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
151 |             check_finite=False).T
152 |         innovation = measurement - projected_mean
153 | 
154 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
155 |         new_covariance = covariance - np.linalg.multi_dot((
156 |             kalman_gain, projected_cov, kalman_gain.T))
157 |         return new_mean, new_covariance
158 | 
159 |     def gating_distance(self, mean, covariance, measurements,
160 |                         only_position=False):
161 |         """Compute gating distance between state distribution and measurements.
162 |         A suitable distance threshold can be obtained from `chi2inv95`. If
163 |         `only_position` is False, the chi-square distribution has 4 degrees of
164 |         freedom, otherwise 2.
165 | 
166 |         Parameters
167 |         ----------
168 |         mean : ndarray
169 |             Mean vector over the state distribution (8 dimensional).
170 |         covariance : ndarray
171 |             Covariance of the state distribution (8x8 dimensional).
172 |         measurements : ndarray
173 |             An Nx4 dimensional matrix of N measurements, each in
174 |             format (x, y, a, h) where (x, y) is the bounding box center
175 |             position, a the aspect ratio, and h the height.
176 |         only_position : Optional[bool]
177 |             If True, distance computation is done with respect to the bounding
178 |             box center position only.
179 | 
180 |         Returns
181 |         -------
182 |         ndarray
183 |             Returns an array of length N, where the i-th element contains the
184 |             squared Mahalanobis distance between (mean, covariance) and
185 |             `measurements[i]`.
186 |         """
187 |         mean, covariance = self.project(mean, covariance)
188 |         if only_position:
189 |             mean, covariance = mean[:2], covariance[:2, :2]
190 |             measurements = measurements[:, :2]
191 | 
192 |         cholesky_factor = np.linalg.cholesky(covariance)
193 |         d = measurements - mean
194 |         z = scipy.linalg.solve_triangular(
195 |             cholesky_factor, d.T, lower=True, check_finite=False,
196 |             overwrite_b=True)
197 |         squared_maha = np.sum(z * z, axis=0)
198 |         return squared_maha
199 | 


--------------------------------------------------------------------------------
/Track/linear_assignment.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | #from sklearn.utils.linear_assignment_ import linear_assignment
  3 | from scipy.optimize import linear_sum_assignment
  4 | 
  5 | """
  6 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
  7 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
  8 | function and used as Mahalanobis gating threshold.
  9 | """
 10 | chi2inv95 = {
 11 |     1: 3.8415,
 12 |     2: 5.9915,
 13 |     3: 7.8147,
 14 |     4: 9.4877,
 15 |     5: 11.070,
 16 |     6: 12.592,
 17 |     7: 14.067,
 18 |     8: 15.507,
 19 |     9: 16.919}
 20 | INFTY_COST = 1e+5
 21 | 
 22 | 
 23 | def min_cost_matching(distance_metric, max_distance, tracks, detections,
 24 |                       track_indices=None, detection_indices=None):
 25 |     """Solve linear assignment problem.
 26 |     Parameters
 27 |     ----------
 28 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 29 |         The distance metric is given a list of tracks and detections as well as
 30 |         a list of N track indices and M detection indices. The metric should
 31 |         return the NxM dimensional cost matrix, where element (i, j) is the
 32 |         association cost between the i-th track in the given track indices and
 33 |         the j-th detection in the given detection_indices.
 34 |     max_distance : float
 35 |         Gating threshold. Associations with cost larger than this value are
 36 |         disregarded.
 37 |     tracks : List[Track]
 38 |         A list of predicted tracks at the current time step.
 39 |     detections : List[Detection]
 40 |         A list of detections at the current time step.
 41 |     track_indices : List[int]
 42 |         List of track indices that maps rows in `cost_matrix` to tracks in
 43 |         `tracks` (see description above).
 44 |     detection_indices : List[int]
 45 |         List of detection indices that maps columns in `cost_matrix` to
 46 |         detections in `detections` (see description above).
 47 | 
 48 |     Returns
 49 |     -------
 50 |     (List[(int, int)], List[int], List[int])
 51 |         Returns a tuple with the following three entries:
 52 |         * A list of matched track and detection indices.
 53 |         * A list of unmatched track indices.
 54 |         * A list of unmatched detection indices.
 55 |     """
 56 |     if track_indices is None:
 57 |         track_indices = np.arange(len(tracks))
 58 |     if detection_indices is None:
 59 |         detection_indices = np.arange(len(detections))
 60 | 
 61 |     if len(detection_indices) == 0 or len(track_indices) == 0:
 62 |         return [], track_indices, detection_indices  # Nothing to match.
 63 | 
 64 |     cost_matrix = distance_metric(tracks, detections, track_indices, detection_indices)
 65 |     cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
 66 |     indices = linear_sum_assignment(cost_matrix)
 67 |     indices = np.array(indices).transpose()
 68 | 
 69 |     matches, unmatched_tracks, unmatched_detections = [], [], []
 70 |     for col, detection_idx in enumerate(detection_indices):
 71 |         if col not in indices[:, 1]:
 72 |             unmatched_detections.append(detection_idx)
 73 |     for row, track_idx in enumerate(track_indices):
 74 |         if row not in indices[:, 0]:
 75 |             unmatched_tracks.append(track_idx)
 76 |     for row, col in indices:
 77 |         track_idx = track_indices[row]
 78 |         detection_idx = detection_indices[col]
 79 |         if cost_matrix[row, col] > max_distance:
 80 |             unmatched_tracks.append(track_idx)
 81 |             unmatched_detections.append(detection_idx)
 82 |         else:
 83 |             matches.append((track_idx, detection_idx))
 84 | 
 85 |     return matches, unmatched_tracks, unmatched_detections
 86 | 
 87 | 
 88 | def matching_cascade(distance_metric, max_distance, cascade_depth, tracks, detections,
 89 |                      track_indices=None, detection_indices=None):
 90 |     """Run matching cascade.
 91 |     Parameters
 92 |     ----------
 93 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 94 |         The distance metric is given a list of tracks and detections as well as
 95 |         a list of N track indices and M detection indices. The metric should
 96 |         return the NxM dimensional cost matrix, where element (i, j) is the
 97 |         association cost between the i-th track in the given track indices and
 98 |         the j-th detection in the given detection indices.
 99 |     max_distance : float
100 |         Gating threshold. Associations with cost larger than this value are
101 |         disregarded.
102 |     cascade_depth: int
103 |         The cascade depth, should be se to the maximum track age.
104 |     tracks : List[Track]
105 |         A list of predicted tracks at the current time step.
106 |     detections : List[Detection]
107 |         A list of detections at the current time step.
108 |     track_indices : Optional[List[int]]
109 |         List of track indices that maps rows in `cost_matrix` to tracks in
110 |         `tracks` (see description above). Defaults to all tracks.
111 |     detection_indices : Optional[List[int]]
112 |         List of detection indices that maps columns in `cost_matrix` to
113 |         detections in `detections` (see description above). Defaults to all
114 |         detections.
115 | 
116 |     Returns
117 |     -------
118 |     (List[(int, int)], List[int], List[int])
119 |         Returns a tuple with the following three entries:
120 |         * A list of matched track and detection indices.
121 |         * A list of unmatched track indices.
122 |         * A list of unmatched detection indices.
123 |     """
124 |     if track_indices is None:
125 |         track_indices = list(range(len(tracks)))
126 |     if detection_indices is None:
127 |         detection_indices = list(range(len(detections)))
128 | 
129 |     unmatched_detections = detection_indices
130 |     matches = []
131 |     for level in range(cascade_depth):
132 |         if len(unmatched_detections) == 0:  # No detections left
133 |             break
134 | 
135 |         track_indices_l = [k for k in track_indices
136 |                            if tracks[k].time_since_update == 1 + level]
137 |         if len(track_indices_l) == 0:  # Nothing to match at this level
138 |             continue
139 | 
140 |         matches_l, _, unmatched_detections = min_cost_matching(
141 |             distance_metric, max_distance, tracks, detections, track_indices_l, unmatched_detections)
142 |         matches += matches_l
143 | 
144 |     unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
145 |     return matches, unmatched_tracks, unmatched_detections
146 | 
147 | 
148 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, track_indices, detection_indices,
149 |                      gated_cost=INFTY_COST, only_position=False):
150 |     """Invalidate infeasible entries in cost matrix based on the state
151 |     distributions obtained by Kalman filtering.
152 |     Parameters
153 |     ----------
154 |     kf : The Kalman filter.
155 |     cost_matrix : ndarray
156 |         The NxM dimensional cost matrix, where N is the number of track indices
157 |         and M is the number of detection indices, such that entry (i, j) is the
158 |         association cost between `tracks[track_indices[i]]` and
159 |         `detections[detection_indices[j]]`.
160 |     tracks : List[Track]
161 |         A list of predicted tracks at the current time step.
162 |     detections : List[Detection]
163 |         A list of detections at the current time step.
164 |     track_indices : List[int]
165 |         List of track indices that maps rows in `cost_matrix` to tracks in
166 |         `tracks` (see description above).
167 |     detection_indices : List[int]
168 |         List of detection indices that maps columns in `cost_matrix` to
169 |         detections in `detections` (see description above).
170 |     gated_cost : Optional[float]
171 |         Entries in the cost matrix corresponding to infeasible associations are
172 |         set this value. Defaults to a very large value.
173 |     only_position : Optional[bool]
174 |         If True, only the x, y position of the state distribution is considered
175 |         during gating. Defaults to False.
176 | 
177 |     Returns
178 |     -------
179 |     ndarray
180 |         Returns the modified cost matrix.
181 |     """
182 |     gating_dim = 2 if only_position else 4
183 |     gating_threshold = chi2inv95[gating_dim]
184 |     measurements = np.asarray([detections[i].to_xyah() for i in detection_indices])
185 |     for row, track_idx in enumerate(track_indices):
186 |         track = tracks[track_idx]
187 |         gating_distance = kf.gating_distance(track.mean, track.covariance,
188 |                                              measurements, only_position)
189 |         cost_matrix[row, gating_distance > gating_threshold] = gated_cost
190 | 
191 |     return cost_matrix
192 | 


--------------------------------------------------------------------------------
/Visualizer.py:
--------------------------------------------------------------------------------
  1 | # import matplotlib.gridspec as gridspec
  2 | import matplotlib.pyplot as plt
  3 | import numpy as np
  4 | import os
  5 | import cv2
  6 | import torch
  7 | import imageio
  8 | from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
  9 | from matplotlib.font_manager import FontProperties
 10 | 
 11 | fp = FontProperties(family='Tlwg Typo', size=10)
 12 | 
 13 | 
 14 | def plot_piechart(x, labels, title='', fig_size=(10, 5), save=None):
 15 |     fig = plt.figure(figsize=fig_size)
 16 | 
 17 |     ax1 = fig.add_subplot(121)
 18 |     wedges, texts = ax1.pie(x, labels=labels, startangle=90)
 19 | 
 20 |     percents = x / sum(x) * 100.
 21 |     annots = ['{} - {:.2f}% ({:d})'.format(c, p, n) for c, p, n
 22 |               in zip(labels, percents, x)]
 23 | 
 24 |     ax2 = fig.add_subplot(122)
 25 |     ax2.axis('off')
 26 |     ax2.legend(wedges, annots, loc='center', fontsize=10)
 27 | 
 28 |     fig.suptitle(title)
 29 | 
 30 |     if save is not None:
 31 |         fig.savefig(save)
 32 |         plt.close()
 33 |     else:
 34 |         return fig
 35 | 
 36 | 
 37 | def plot_x(x, title='', fig_size=(12, 10)):
 38 |     fig = plt.figure(figsize=fig_size)
 39 |     x = np.squeeze(x)
 40 | 
 41 |     if len(x.shape) == 1:
 42 |         plt.plot(x)
 43 | 
 44 |     elif len(x.shape) == 2:
 45 |         plt.imshow(x, cmap='gray')
 46 |         plt.axis('off')
 47 | 
 48 |     elif len(x.shape) == 3:
 49 |         if x.shape[-1] == 3:
 50 |             plt.imshow(x)
 51 |             plt.axis('off')
 52 |         else:
 53 |             fig = plot_multiImage(x.transpose(2, 0, 1), fig_size=fig_size)
 54 | 
 55 |     elif len(x.shape) == 4:
 56 |         fig = plot_multiImage(x.transpose(3, 0, 1, 2), fig_size=fig_size)
 57 | 
 58 |     fig.suptitle(title)
 59 |     return fig
 60 | 
 61 | 
 62 | def plot_bars(x, y, title='', ylim=None, save=None):
 63 |     fig = plt.figure()
 64 |     bars = plt.bar(x, y)
 65 |     plt.ylim(ylim)
 66 |     plt.title(title)
 67 |     for b in bars:
 68 |         plt.annotate('{:.2f}'.format(b.get_height()),
 69 |                      xy=(b.get_x(), b.get_height()))
 70 | 
 71 |     if save is not None:
 72 |         plt.savefig(save)
 73 |         plt.close()
 74 |     else:
 75 |         return fig
 76 | 
 77 | 
 78 | def plot_graphs(x_list, legends, title, ylabel, xlabel='epoch', xlim=None, save=None):
 79 |     fig = plt.figure()
 80 |     for x in x_list:
 81 |         plt.plot(x)
 82 | 
 83 |     plt.legend(legends)
 84 |     plt.xlabel(xlabel)
 85 |     plt.ylabel(ylabel)
 86 |     plt.title(title)
 87 |     plt.xlim(xlim)
 88 | 
 89 |     if save is not None:
 90 |         plt.savefig(save)
 91 |         plt.close()
 92 |     else:
 93 |         return fig
 94 | 
 95 | 
 96 | # images in shape (amount, h, w, c).
 97 | def plot_multiImage(images, labels=None, pred=None, title=None, fig_size=(12, 10), tight_layout=False, save=None):
 98 |     n = int(np.ceil(np.sqrt(images.shape[0])))
 99 |     fig = plt.figure(figsize=fig_size)
100 | 
101 |     for i in range(images.shape[0]):
102 |         ax = fig.add_subplot(n, n, i + 1)
103 | 
104 |         if len(images[i].shape) == 2 or images[i].shape[-1] == 1:
105 |             ax.imshow(images[i], cmap='gray')
106 |         else:
107 |             ax.imshow(images[i])
108 | 
109 |         if labels is not None:
110 |             ax.set_xlabel(labels[i], color='g', fontproperties=fp)
111 |         if labels is not None and pred is not None:
112 |             if labels[i] == pred[i]:
113 |                 clr = 'g'
114 |             else:
115 |                 if len(labels[i]) == len(pred[i]):
116 |                     clr = 'm'
117 |                 else:
118 |                     clr = 'r'
119 | 
120 |             ax.set_xlabel('True: {}\nPred : {}'.format(u'' + labels[i], u'' + pred[i]),
121 |                           color=clr, fontproperties=fp)
122 | 
123 |     if title is not None:
124 |         fig.suptitle(title)
125 | 
126 |     if tight_layout:  # This make process slow if too many images.
127 |         fig.tight_layout()
128 | 
129 |     if save is not None:
130 |         plt.savefig(save)
131 |         plt.close()
132 |     else:
133 |         return fig
134 | 
135 | 
136 | def plot_confusion_metrix(y_true, y_pred, labels=None, title='', normalize=None,
137 |                           fig_size=(10, 10), save=None):
138 |     cm = confusion_matrix(y_true, y_pred, normalize=normalize)
139 |     if labels is None:
140 |         labels = list(set(y_trues))
141 | 
142 |     disp = ConfusionMatrixDisplay(cm, labels)
143 |     disp.plot(xticks_rotation=45)
144 |     disp.figure_.set_size_inches(fig_size)
145 |     disp.figure_.suptitle(title)
146 |     disp.figure_.tight_layout()
147 | 
148 |     if save is not None:
149 |         disp.figure_.savefig(save)
150 |         plt.close()
151 |     else:
152 |         return disp.figure_
153 | 
154 | 
155 | def get_fig_image(fig):  # figure to array of image.
156 |     fig.canvas.draw()
157 |     img = np.array(fig.canvas.renderer._renderer)
158 |     return img
159 | 
160 | 
161 | def vid2gif(video_file, output_file, delay=0.05):
162 |     with imageio.get_writer(output_file, mode='I', duration=delay) as writer:
163 |         cap = cv2.VideoCapture(video_file)
164 |         while True:
165 |             ret, frame = cap.read()
166 |             if ret:
167 |                 #frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5)
168 |                 frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
169 |                 writer.append_data(frame)
170 |             else:
171 |                 break
172 | 
173 | #==========================================================================================#
174 | # For Fall_AlphaPose.
175 | 
176 | 
177 | PARTS_PAIR = [(0, 13), (1, 2), (1, 3), (3, 5), (2, 4), (4, 6), (13, 7), (13, 8),
178 |               (7, 9), (8, 10), (9, 11), (10, 12)]
179 | CLASS_NAMES = ['Standing', 'Walking', 'Sitting', 'Lying Down',
180 |                'Stand up', 'Sit down', 'Fall Down']
181 | 
182 | 
183 | def plot_poseframes(data, labels=None, frames_stamp=None, delay=0.2, fig_size=(10, 5)):
184 |     """
185 |     data : (frames, parts, xy).
186 |     labels : (frames, label) or (frames, labels).
187 |     frames_stamp : (frames, number of frame).
188 |     """
189 |     fig_cols = 1
190 |     if labels is not None and labels.shape[1] > 1:
191 |         fig_cols = 2
192 |         x_bar = CLASS_NAMES if labels.shape[1] == len(CLASS_NAMES) else np.arange(labels.shape[1])
193 | 
194 |     fig = plt.figure(figsize=fig_size)
195 |     for i in range(data.shape[0]):
196 |         xy = data[i]
197 |         #xy = np.concatenate((xy, np.expand_dims((xy[1, :] + xy[2, :]) / 2, 0)))
198 | 
199 |         fig.clear()
200 | 
201 |         ax1 = fig.add_subplot(1, fig_cols, 1)
202 |         for (sp, ep) in PARTS_PAIR:
203 |             ax1.plot(xy[[sp, ep], 0], xy[[sp, ep], 1])
204 |         if xy.shape[1] == 3:
205 |             for pts in xy:
206 |                 ax1.scatter(pts[0], pts[1], 200 * pts[2])
207 |         ax1.invert_yaxis()
208 | 
209 |         if fig_cols == 2:
210 |             ax2 = fig.add_subplot(1, fig_cols, 2)
211 |             ax2.bar(x_bar, labels[i])
212 |             ax2.set_ylim([0, 1.0])
213 | 
214 |         frame = frames_stamp[i] if frames_stamp is not None else i
215 |         idx = 0
216 |         if labels is not None:
217 |             idx = labels[i].argmax() if labels.shape[1] > 1 else labels[i][0]
218 |         fig.suptitle('Frame : {}, Pose : {}'.format(frame, CLASS_NAMES[idx]))
219 | 
220 |         plt.pause(delay)
221 |     plt.show()
222 | 
223 | 
224 | 


--------------------------------------------------------------------------------
/fn.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import cv2
  3 | import time
  4 | import math
  5 | import torch
  6 | import numpy as np
  7 | 
  8 | RED = (0, 0, 255)
  9 | GREEN = (0, 255, 0)
 10 | BLUE = (255, 0, 0)
 11 | CYAN = (255, 255, 0)
 12 | YELLOW = (0, 255, 255)
 13 | ORANGE = (0, 165, 255)
 14 | PURPLE = (255, 0, 255)
 15 | 
 16 | """COCO_PAIR = [(0, 1), (0, 2), (1, 3), (2, 4),  # Head
 17 |              (5, 6), (5, 7), (7, 9), (6, 8), (8, 10),
 18 |              (17, 11), (17, 12),  # Body
 19 |              (11, 13), (12, 14), (13, 15), (14, 16)]"""
 20 | COCO_PAIR = [(0, 13), (1, 2), (1, 3), (3, 5), (2, 4), (4, 6), (13, 7), (13, 8),  # Body
 21 |              (7, 9), (8, 10), (9, 11), (10, 12)]
 22 | POINT_COLORS = [(0, 255, 255), (0, 191, 255), (0, 255, 102), (0, 77, 255), (0, 255, 0),  # Nose, LEye, REye, LEar, REar
 23 |                 (77, 255, 255), (77, 255, 204), (77, 204, 255), (191, 255, 77), (77, 191, 255), (191, 255, 77),  # LShoulder, RShoulder, LElbow, RElbow, LWrist, RWrist
 24 |                 (204, 77, 255), (77, 255, 204), (191, 77, 255), (77, 255, 191), (127, 77, 255), (77, 255, 127), (0, 255, 255)]  # LHip, RHip, LKnee, Rknee, LAnkle, RAnkle, Neck
 25 | LINE_COLORS = [(0, 215, 255), (0, 255, 204), (0, 134, 255), (0, 255, 50), (77, 255, 222),
 26 |                (77, 196, 255), (77, 135, 255), (191, 255, 77), (77, 255, 77), (77, 222, 255),
 27 |                (255, 156, 127), (0, 127, 255), (255, 127, 77), (0, 77, 255), (255, 77, 36)]
 28 | 
 29 | MPII_PAIR = [(8, 9), (11, 12), (11, 10), (2, 1), (1, 0), (13, 14), (14, 15), (3, 4), (4, 5),
 30 |              (8, 7), (7, 6), (6, 2), (6, 3), (8, 12), (8, 13)]
 31 | 
 32 | numpy_type_map = {
 33 |     'float64': torch.DoubleTensor,
 34 |     'float32': torch.FloatTensor,
 35 |     'float16': torch.HalfTensor,
 36 |     'int64': torch.LongTensor,
 37 |     'int32': torch.IntTensor,
 38 |     'int16': torch.ShortTensor,
 39 |     'int8': torch.CharTensor,
 40 |     'uint8': torch.ByteTensor,
 41 | }
 42 | 
 43 | _use_shared_memory = True
 44 | 
 45 | 
 46 | def collate_fn(batch):
 47 |     r"""Puts each data field into a tensor with outer dimension batch size"""
 48 | 
 49 |     error_msg = "batch must contain tensors, numbers, dicts or lists; found {}"
 50 |     elem_type = type(batch[0])
 51 | 
 52 |     if isinstance(batch[0], torch.Tensor):
 53 |         out = None
 54 |         if _use_shared_memory:
 55 |             # If we're in a background process, concatenate directly into a
 56 |             # shared memory tensor to avoid an extra copy
 57 |             numel = sum([x.numel() for x in batch])
 58 |             storage = batch[0].storage()._new_shared(numel)
 59 |             out = batch[0].new(storage)
 60 |         return torch.stack(batch, 0, out=out)
 61 |     elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
 62 |             and elem_type.__name__ != 'string_':
 63 |         elem = batch[0]
 64 |         if elem_type.__name__ == 'ndarray':
 65 |             # array of string classes and object
 66 |             if re.search('[SaUO]', elem.dtype.str) is not None:
 67 |                 raise TypeError(error_msg.format(elem.dtype))
 68 | 
 69 |             return torch.stack([torch.from_numpy(b) for b in batch], 0)
 70 |         if elem.shape == ():  # scalars
 71 |             py_type = float if elem.dtype.name.startswith('float') else int
 72 |             return numpy_type_map[elem.dtype.name](list(map(py_type, batch)))
 73 |     elif isinstance(batch[0], int):
 74 |         return torch.LongTensor(batch)
 75 |     elif isinstance(batch[0], float):
 76 |         return torch.DoubleTensor(batch)
 77 |     elif isinstance(batch[0], (str, bytes)):
 78 |         return batch
 79 |     elif isinstance(batch[0], collections.Mapping):
 80 |         return {key: collate_fn([d[key] for d in batch]) for key in batch[0]}
 81 |     elif isinstance(batch[0], collections.Sequence):
 82 |         transposed = zip(*batch)
 83 |         return [collate_fn(samples) for samples in transposed]
 84 | 
 85 |     raise TypeError((error_msg.format(type(batch[0]))))
 86 | 
 87 | 
 88 | def collate_fn_list(batch):
 89 |     img, inp, im_name = zip(*batch)
 90 |     img = collate_fn(img)
 91 |     im_name = collate_fn(im_name)
 92 | 
 93 |     return img, inp, im_name
 94 | 
 95 | 
 96 | def draw_single(frame, pts, joint_format='coco'):
 97 |     if joint_format == 'coco':
 98 |         l_pair = COCO_PAIR
 99 |         p_color = POINT_COLORS
100 |         line_color = LINE_COLORS
101 |     elif joint_format == 'mpii':
102 |         l_pair = MPII_PAIR
103 |         p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED,BLUE,BLUE]
104 |     else:
105 |         NotImplementedError
106 | 
107 |     part_line = {}
108 |     pts = np.concatenate((pts, np.expand_dims((pts[1, :] + pts[2, :]) / 2, 0)), axis=0)
109 |     for n in range(pts.shape[0]):
110 |         if pts[n, 2] <= 0.05:
111 |             continue
112 |         cor_x, cor_y = int(pts[n, 0]), int(pts[n, 1])
113 |         part_line[n] = (cor_x, cor_y)
114 |         cv2.circle(frame, (cor_x, cor_y), 3, p_color[n], -1)
115 | 
116 |     for i, (start_p, end_p) in enumerate(l_pair):
117 |         if start_p in part_line and end_p in part_line:
118 |             start_xy = part_line[start_p]
119 |             end_xy = part_line[end_p]
120 |             cv2.line(frame, start_xy, end_xy, line_color[i], int(1*(pts[start_p, 2] + pts[end_p, 2]) + 1))
121 |     return frame
122 | 
123 | 
124 | def vis_frame_fast(frame, im_res, joint_format='coco'):
125 |     """
126 |     frame: frame image
127 |     im_res: im_res of predictions
128 |     format: coco or mpii
129 | 
130 |     return rendered image
131 |     """
132 |     if joint_format == 'coco':
133 |         l_pair = COCO_PAIR
134 |         p_color = POINT_COLORS
135 |         line_color = LINE_COLORS
136 |     elif joint_format == 'mpii':
137 |         l_pair = MPII_PAIR
138 |         p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED,BLUE,BLUE]
139 |     else:
140 |         NotImplementedError
141 | 
142 |     #im_name = im_res['imgname'].split('/')[-1]
143 |     img = frame
144 |     for human in im_res:  # ['result']:
145 |         part_line = {}
146 |         kp_preds = human['keypoints']
147 |         kp_scores = human['kp_score']
148 |         kp_preds = torch.cat((kp_preds, torch.unsqueeze((kp_preds[1, :]+kp_preds[2, :]) / 2, 0)))
149 |         kp_scores = torch.cat((kp_scores, torch.unsqueeze((kp_scores[1, :]+kp_scores[2, :]) / 2, 0)))
150 |         # Draw keypoints
151 |         for n in range(kp_scores.shape[0]):
152 |             if kp_scores[n] <= 0.05:
153 |                 continue
154 |             cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1])
155 |             part_line[n] = (cor_x, cor_y)
156 |             cv2.circle(img, (cor_x, cor_y), 4, p_color[n], -1)
157 |         # Draw limbs
158 |         for i, (start_p, end_p) in enumerate(l_pair):
159 |             if start_p in part_line and end_p in part_line:
160 |                 start_xy = part_line[start_p]
161 |                 end_xy = part_line[end_p]
162 |                 cv2.line(img, start_xy, end_xy, line_color[i], 2*(kp_scores[start_p] + kp_scores[end_p]) + 1)
163 |     return img
164 | 
165 | 
166 | def vis_frame(frame, im_res, joint_format='coco'):
167 |     """
168 |     frame: frame image
169 |     im_res: im_res of predictions
170 |     format: coco or mpii
171 | 
172 |     return rendered image
173 |     """
174 |     if joint_format == 'coco':
175 |         l_pair = COCO_PAIR
176 |         p_color = POINT_COLORS
177 |         line_color = LINE_COLORS
178 |     elif joint_format == 'mpii':
179 |         l_pair = MPII_PAIR
180 |         p_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, PURPLE, RED, RED, BLUE, BLUE]
181 |         line_color = [PURPLE, BLUE, BLUE, RED, RED, BLUE, BLUE, RED, RED, PURPLE, PURPLE, RED, RED, BLUE, BLUE]
182 |     else:
183 |         raise NotImplementedError
184 | 
185 |     im_name = im_res['imgname'].split('/')[-1]
186 |     img = frame
187 |     height, width = img.shape[:2]
188 |     img = cv2.resize(img, (int(width/2), int(height/2)))
189 |     for human in im_res['result']:
190 |         part_line = {}
191 |         kp_preds = human['keypoints']
192 |         kp_scores = human['kp_score']
193 |         kp_preds = torch.cat((kp_preds, torch.unsqueeze((kp_preds[5, :]+kp_preds[6, :]) / 2, 0)))
194 |         kp_scores = torch.cat((kp_scores, torch.unsqueeze((kp_scores[5, :]+kp_scores[6, :]) / 2, 0)))
195 |         # Draw keypoints
196 |         for n in range(kp_scores.shape[0]):
197 |             if kp_scores[n] <= 0.05:
198 |                 continue
199 |             cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1])
200 |             part_line[n] = (int(cor_x/2), int(cor_y/2))
201 |             bg = img.copy()
202 |             cv2.circle(bg, (int(cor_x/2), int(cor_y/2)), 2, p_color[n], -1)
203 |             # Now create a mask of logo and create its inverse mask also
204 |             transparency = max(0, min(1, kp_scores[n]))
205 |             img = cv2.addWeighted(bg, transparency, img, 1-transparency, 0)
206 |         # Draw limbs
207 |         for i, (start_p, end_p) in enumerate(l_pair):
208 |             if start_p in part_line and end_p in part_line:
209 |                 start_xy = part_line[start_p]
210 |                 end_xy = part_line[end_p]
211 |                 bg = img.copy()
212 | 
213 |                 X = (start_xy[0], end_xy[0])
214 |                 Y = (start_xy[1], end_xy[1])
215 |                 mX = np.mean(X)
216 |                 mY = np.mean(Y)
217 |                 length = ((Y[0] - Y[1]) ** 2 + (X[0] - X[1]) ** 2) ** 0.5
218 |                 angle = math.degrees(math.atan2(Y[0] - Y[1], X[0] - X[1]))
219 |                 stickwidth = (kp_scores[start_p] + kp_scores[end_p]) + 1
220 |                 polygon = cv2.ellipse2Poly((int(mX),int(mY)), (int(length/2), stickwidth), int(angle), 0, 360, 1)
221 |                 cv2.fillConvexPoly(bg, polygon, line_color[i])
222 |                 #cv2.line(bg, start_xy, end_xy, line_color[i], (2 * (kp_scores[start_p] + kp_scores[end_p])) + 1)
223 |                 transparency = max(0, min(1, 0.5*(kp_scores[start_p] + kp_scores[end_p])))
224 |                 img = cv2.addWeighted(bg, transparency, img, 1-transparency, 0)
225 |     img = cv2.resize(img, (width, height), interpolation=cv2.INTER_CUBIC)
226 |     return img
227 | 
228 | 
229 | def getTime(time1=0):
230 |     if not time1:
231 |         return time.time()
232 |     else:
233 |         interval = time.time() - time1
234 |         return time.time(), interval
235 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import time
  4 | import torch
  5 | import argparse
  6 | import numpy as np
  7 | 
  8 | from Detection.Utils import ResizePadding
  9 | from CameraLoader import CamLoader, CamLoader_Q
 10 | from DetectorLoader import TinyYOLOv3_onecls
 11 | 
 12 | from PoseEstimateLoader import SPPE_FastPose
 13 | from fn import draw_single
 14 | 
 15 | from Track.Tracker import Detection, Tracker
 16 | from ActionsEstLoader import TSSTG
 17 | 
 18 | #source = '../Data/test_video/test7.mp4'
 19 | #source = '../Data/falldata/Home/Videos/video (2).avi'  # hard detect
 20 | source = '../Data/falldata/Home/Videos/video (1).avi'
 21 | #source = 2
 22 | 
 23 | 
 24 | def preproc(image):
 25 |     """preprocess function for CameraLoader.
 26 |     """
 27 |     image = resize_fn(image)
 28 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 29 |     return image
 30 | 
 31 | 
 32 | def kpt2bbox(kpt, ex=20):
 33 |     """Get bbox that hold on all of the keypoints (x,y)
 34 |     kpt: array of shape `(N, 2)`,
 35 |     ex: (int) expand bounding box,
 36 |     """
 37 |     return np.array((kpt[:, 0].min() - ex, kpt[:, 1].min() - ex,
 38 |                      kpt[:, 0].max() + ex, kpt[:, 1].max() + ex))
 39 | 
 40 | 
 41 | if __name__ == '__main__':
 42 |     par = argparse.ArgumentParser(description='Human Fall Detection Demo.')
 43 |     par.add_argument('-C', '--camera', default=source,  # required=True,  # default=2,
 44 |                         help='Source of camera or video file path.')
 45 |     par.add_argument('--detection_input_size', type=int, default=384,
 46 |                         help='Size of input in detection model in square must be divisible by 32 (int).')
 47 |     par.add_argument('--pose_input_size', type=str, default='224x160',
 48 |                         help='Size of input in pose model must be divisible by 32 (h, w)')
 49 |     par.add_argument('--pose_backbone', type=str, default='resnet50',
 50 |                         help='Backbone model for SPPE FastPose model.')
 51 |     par.add_argument('--show_detected', default=False, action='store_true',
 52 |                         help='Show all bounding box from detection.')
 53 |     par.add_argument('--show_skeleton', default=True, action='store_true',
 54 |                         help='Show skeleton pose.')
 55 |     par.add_argument('--save_out', type=str, default='',
 56 |                         help='Save display to video file.')
 57 |     par.add_argument('--device', type=str, default='cuda',
 58 |                         help='Device to run model on cpu or cuda.')
 59 |     args = par.parse_args()
 60 | 
 61 |     device = args.device
 62 | 
 63 |     # DETECTION MODEL.
 64 |     inp_dets = args.detection_input_size
 65 |     detect_model = TinyYOLOv3_onecls(inp_dets, device=device)
 66 | 
 67 |     # POSE MODEL.
 68 |     inp_pose = args.pose_input_size.split('x')
 69 |     inp_pose = (int(inp_pose[0]), int(inp_pose[1]))
 70 |     pose_model = SPPE_FastPose(args.pose_backbone, inp_pose[0], inp_pose[1], device=device)
 71 | 
 72 |     # Tracker.
 73 |     max_age = 30
 74 |     tracker = Tracker(max_age=max_age, n_init=3)
 75 | 
 76 |     # Actions Estimate.
 77 |     action_model = TSSTG()
 78 | 
 79 |     resize_fn = ResizePadding(inp_dets, inp_dets)
 80 | 
 81 |     cam_source = args.camera
 82 |     if type(cam_source) is str and os.path.isfile(cam_source):
 83 |         # Use loader thread with Q for video file.
 84 |         cam = CamLoader_Q(cam_source, queue_size=1000, preprocess=preproc).start()
 85 |     else:
 86 |         # Use normal thread loader for webcam.
 87 |         cam = CamLoader(int(cam_source) if cam_source.isdigit() else cam_source,
 88 |                         preprocess=preproc).start()
 89 | 
 90 |     #frame_size = cam.frame_size
 91 |     #scf = torch.min(inp_size / torch.FloatTensor([frame_size]), 1)[0]
 92 | 
 93 |     outvid = False
 94 |     if args.save_out != '':
 95 |         outvid = True
 96 |         codec = cv2.VideoWriter_fourcc(*'MJPG')
 97 |         writer = cv2.VideoWriter(args.save_out, codec, 30, (inp_dets * 2, inp_dets * 2))
 98 | 
 99 |     fps_time = 0
100 |     f = 0
101 |     while cam.grabbed():
102 |         f += 1
103 |         frame = cam.getitem()
104 |         image = frame.copy()
105 | 
106 |         # Detect humans bbox in the frame with detector model.
107 |         detected = detect_model.detect(frame, need_resize=False, expand_bb=10)
108 | 
109 |         # Predict each tracks bbox of current frame from previous frames information with Kalman filter.
110 |         tracker.predict()
111 |         # Merge two source of predicted bbox together.
112 |         for track in tracker.tracks:
113 |             det = torch.tensor([track.to_tlbr().tolist() + [0.5, 1.0, 0.0]], dtype=torch.float32)
114 |             detected = torch.cat([detected, det], dim=0) if detected is not None else det
115 | 
116 |         detections = []  # List of Detections object for tracking.
117 |         if detected is not None:
118 |             #detected = non_max_suppression(detected[None, :], 0.45, 0.2)[0]
119 |             # Predict skeleton pose of each bboxs.
120 |             poses = pose_model.predict(frame, detected[:, 0:4], detected[:, 4])
121 | 
122 |             # Create Detections object.
123 |             detections = [Detection(kpt2bbox(ps['keypoints'].numpy()),
124 |                                     np.concatenate((ps['keypoints'].numpy(),
125 |                                                     ps['kp_score'].numpy()), axis=1),
126 |                                     ps['kp_score'].mean().numpy()) for ps in poses]
127 | 
128 |             # VISUALIZE.
129 |             if args.show_detected:
130 |                 for bb in detected[:, 0:5]:
131 |                     frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 0, 255), 1)
132 | 
133 |         # Update tracks by matching each track information of current and previous frame or
134 |         # create a new track if no matched.
135 |         tracker.update(detections)
136 | 
137 |         # Predict Actions of each track.
138 |         for i, track in enumerate(tracker.tracks):
139 |             if not track.is_confirmed():
140 |                 continue
141 | 
142 |             track_id = track.track_id
143 |             bbox = track.to_tlbr().astype(int)
144 |             center = track.get_center().astype(int)
145 | 
146 |             action = 'pending..'
147 |             clr = (0, 255, 0)
148 |             # Use 30 frames time-steps to prediction.
149 |             if len(track.keypoints_list) == 30:
150 |                 pts = np.array(track.keypoints_list, dtype=np.float32)
151 |                 out = action_model.predict(pts, frame.shape[:2])
152 |                 action_name = action_model.class_names[out[0].argmax()]
153 |                 action = '{}: {:.2f}%'.format(action_name, out[0].max() * 100)
154 |                 if action_name == 'Fall Down':
155 |                     clr = (255, 0, 0)
156 |                 elif action_name == 'Lying Down':
157 |                     clr = (255, 200, 0)
158 | 
159 |             # VISUALIZE.
160 |             if track.time_since_update == 0:
161 |                 if args.show_skeleton:
162 |                     frame = draw_single(frame, track.keypoints_list[-1])
163 |                 frame = cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 1)
164 |                 frame = cv2.putText(frame, str(track_id), (center[0], center[1]), cv2.FONT_HERSHEY_COMPLEX,
165 |                                     0.4, (255, 0, 0), 2)
166 |                 frame = cv2.putText(frame, action, (bbox[0] + 5, bbox[1] + 15), cv2.FONT_HERSHEY_COMPLEX,
167 |                                     0.4, clr, 1)
168 | 
169 |         # Show Frame.
170 |         frame = cv2.resize(frame, (0, 0), fx=2., fy=2.)
171 |         frame = cv2.putText(frame, '%d, FPS: %f' % (f, 1.0 / (time.time() - fps_time)),
172 |                             (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
173 |         frame = frame[:, :, ::-1]
174 |         fps_time = time.time()
175 | 
176 |         if outvid:
177 |             writer.write(frame)
178 | 
179 |         cv2.imshow('frame', frame)
180 |         if cv2.waitKey(1) & 0xFF == ord('q'):
181 |             break
182 | 
183 |     # Clear resource.
184 |     cam.stop()
185 |     if outvid:
186 |         writer.release()
187 |     cv2.destroyAllWindows()
188 | 


--------------------------------------------------------------------------------
/pPose_nms.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import torch
  3 | import json
  4 | import os
  5 | import zipfile
  6 | import time
  7 | from multiprocessing.dummy import Pool as ThreadPool
  8 | import numpy as np
  9 | 
 10 | ''' Constant Configuration '''
 11 | delta1 = 1
 12 | mu = 1.7
 13 | delta2 = 2.65
 14 | gamma = 22.48
 15 | scoreThreds = 0.3
 16 | matchThreds = 5
 17 | areaThres = 0  # 40 * 40.5
 18 | alpha = 0.1
 19 | #pool = ThreadPool(4)
 20 | 
 21 | 
 22 | def pose_nms(bboxes, bbox_scores, pose_preds, pose_scores):
 23 |     """
 24 |     Parametric Pose NMS algorithm
 25 |     bboxes:         bbox locations list (n, 4)
 26 |     bbox_scores:    bbox scores list (n,)
 27 |     pose_preds:     pose locations list (n, 17, 2)
 28 |     pose_scores:    pose scores list    (n, 17, 1)
 29 |     """
 30 |     global ori_pose_preds, ori_pose_scores, ref_dists
 31 | 
 32 |     pose_scores[pose_scores == 0] = 1e-5
 33 | 
 34 |     final_result = []
 35 | 
 36 |     ori_bboxes = bboxes.clone()
 37 |     ori_bbox_scores = bbox_scores.clone()
 38 |     ori_pose_preds = pose_preds.clone()
 39 |     ori_pose_scores = pose_scores.clone()
 40 | 
 41 |     xmax = bboxes[:, 2]
 42 |     xmin = bboxes[:, 0]
 43 |     ymax = bboxes[:, 3]
 44 |     ymin = bboxes[:, 1]
 45 | 
 46 |     widths = xmax - xmin
 47 |     heights = ymax - ymin
 48 |     ref_dists = alpha * np.maximum(widths, heights)
 49 | 
 50 |     nsamples = bboxes.shape[0]
 51 |     human_scores = pose_scores.mean(dim=1)
 52 | 
 53 |     human_ids = np.arange(nsamples)
 54 |     # Do pPose-NMS
 55 |     pick = []
 56 |     merge_ids = []
 57 |     while human_scores.shape[0] != 0:
 58 |         # Pick the one with highest score
 59 |         pick_id = torch.argmax(human_scores)
 60 |         pick.append(human_ids[pick_id])
 61 |         # num_visPart = torch.sum(pose_scores[pick_id] > 0.2)
 62 | 
 63 |         # Get numbers of match keypoints by calling PCK_match
 64 |         ref_dist = ref_dists[human_ids[pick_id]]
 65 |         simi = get_parametric_distance(pick_id, pose_preds, pose_scores, ref_dist)
 66 |         num_match_keypoints = PCK_match(pose_preds[pick_id], pose_preds, ref_dist)
 67 | 
 68 |         # Delete humans who have more than matchThreds keypoints overlap and high similarity
 69 |         delete_ids = torch.from_numpy(np.arange(human_scores.shape[0]))[
 70 |             (simi > gamma) | (num_match_keypoints >= matchThreds)]
 71 | 
 72 |         if delete_ids.shape[0] == 0:
 73 |             delete_ids = pick_id
 74 |         #else:
 75 |         #    delete_ids = torch.from_numpy(delete_ids)
 76 | 
 77 |         merge_ids.append(human_ids[delete_ids])
 78 |         pose_preds = np.delete(pose_preds, delete_ids, axis=0)
 79 |         pose_scores = np.delete(pose_scores, delete_ids, axis=0)
 80 |         human_ids = np.delete(human_ids, delete_ids)
 81 |         human_scores = np.delete(human_scores, delete_ids, axis=0)
 82 |         bbox_scores = np.delete(bbox_scores, delete_ids, axis=0)
 83 | 
 84 |     assert len(merge_ids) == len(pick)
 85 |     bboxs_pick = ori_bboxes[pick]
 86 |     preds_pick = ori_pose_preds[pick]
 87 |     scores_pick = ori_pose_scores[pick]
 88 |     bbox_scores_pick = ori_bbox_scores[pick]
 89 |     #final_result = pool.map(filter_result, zip(scores_pick, merge_ids, preds_pick, pick, bbox_scores_pick))
 90 |     #final_result = [item for item in final_result if item is not None]
 91 | 
 92 |     for j in range(len(pick)):
 93 |         ids = np.arange(pose_preds.shape[1])
 94 |         max_score = torch.max(scores_pick[j, ids, 0])
 95 | 
 96 |         if max_score < scoreThreds:
 97 |             continue
 98 | 
 99 |         # Merge poses
100 |         merge_id = merge_ids[j]
101 |         merge_pose, merge_score = p_merge_fast(
102 |             preds_pick[j], ori_pose_preds[merge_id], ori_pose_scores[merge_id], ref_dists[pick[j]])
103 | 
104 |         max_score = torch.max(merge_score[ids])
105 |         if max_score < scoreThreds:
106 |             continue
107 | 
108 |         xmax = max(merge_pose[:, 0])
109 |         xmin = min(merge_pose[:, 0])
110 |         ymax = max(merge_pose[:, 1])
111 |         ymin = min(merge_pose[:, 1])
112 | 
113 |         if 1.5 ** 2 * (xmax - xmin) * (ymax - ymin) < areaThres:
114 |             continue
115 | 
116 |         final_result.append({
117 |             'bbox': bboxs_pick[j],
118 |             'bbox_score': bbox_scores_pick[j],
119 |             'keypoints': merge_pose - 0.3,
120 |             'kp_score': merge_score,
121 |             'proposal_score': torch.mean(merge_score) + bbox_scores_pick[j] + 1.25 * max(merge_score)
122 |         })
123 | 
124 |     return final_result
125 | 
126 | 
127 | def filter_result(args):
128 |     score_pick, merge_id, pred_pick, pick, bbox_score_pick = args
129 |     global ori_pose_preds, ori_pose_scores, ref_dists
130 |     ids = np.arange(17)
131 |     max_score = torch.max(score_pick[ids, 0])
132 | 
133 |     if max_score < scoreThreds:
134 |         return None
135 | 
136 |     # Merge poses
137 |     merge_pose, merge_score = p_merge_fast(
138 |         pred_pick, ori_pose_preds[merge_id], ori_pose_scores[merge_id], ref_dists[pick])
139 | 
140 |     max_score = torch.max(merge_score[ids])
141 |     if max_score < scoreThreds:
142 |         return None
143 | 
144 |     xmax = max(merge_pose[:, 0])
145 |     xmin = min(merge_pose[:, 0])
146 |     ymax = max(merge_pose[:, 1])
147 |     ymin = min(merge_pose[:, 1])
148 | 
149 |     if 1.5 ** 2 * (xmax - xmin) * (ymax - ymin) < 40 * 40.5:
150 |         return None
151 | 
152 |     return {
153 |         'keypoints': merge_pose - 0.3,
154 |         'kp_score': merge_score,
155 |         'proposal_score': torch.mean(merge_score) + bbox_score_pick + 1.25 * max(merge_score)
156 |     }
157 | 
158 | 
159 | def p_merge(ref_pose, cluster_preds, cluster_scores, ref_dist):
160 |     """
161 |     Score-weighted pose merging
162 |     INPUT:
163 |         ref_pose:       reference pose          -- [17, 2]
164 |         cluster_preds:  redundant poses         -- [n, 17, 2]
165 |         cluster_scores: redundant poses score   -- [n, 17, 1]
166 |         ref_dist:       reference scale         -- Constant
167 |     OUTPUT:
168 |         final_pose:     merged pose             -- [17, 2]
169 |         final_score:    merged score            -- [17]
170 |     """
171 |     dist = torch.sqrt(torch.sum(
172 |         torch.pow(ref_pose[np.newaxis, :] - cluster_preds, 2),
173 |         dim=2
174 |     ))  # [n, 17]
175 | 
176 |     kp_num = 17
177 |     ref_dist = min(ref_dist, 15)
178 | 
179 |     mask = (dist <= ref_dist)
180 |     final_pose = torch.zeros(kp_num, 2)
181 |     final_score = torch.zeros(kp_num)
182 | 
183 |     if cluster_preds.dim() == 2:
184 |         cluster_preds.unsqueeze_(0)
185 |         cluster_scores.unsqueeze_(0)
186 |     if mask.dim() == 1:
187 |         mask.unsqueeze_(0)
188 | 
189 |     for i in range(kp_num):
190 |         cluster_joint_scores = cluster_scores[:, i][mask[:, i]]  # [k, 1]
191 |         cluster_joint_location = cluster_preds[:, i, :][mask[:, i].unsqueeze(
192 |             -1).repeat(1, 2)].view((torch.sum(mask[:, i]), -1))
193 | 
194 |         # Get an normalized score
195 |         normed_scores = cluster_joint_scores / torch.sum(cluster_joint_scores)
196 | 
197 |         # Merge poses by a weighted sum
198 |         final_pose[i, 0] = torch.dot(cluster_joint_location[:, 0], normed_scores.squeeze(-1))
199 |         final_pose[i, 1] = torch.dot(cluster_joint_location[:, 1], normed_scores.squeeze(-1))
200 | 
201 |         final_score[i] = torch.dot(cluster_joint_scores.transpose(0, 1).squeeze(0), normed_scores.squeeze(-1))
202 | 
203 |     return final_pose, final_score
204 | 
205 | 
206 | def p_merge_fast(ref_pose, cluster_preds, cluster_scores, ref_dist):
207 |     """
208 |     Score-weighted pose merging
209 |     INPUT:
210 |         ref_pose:       reference pose          -- [17, 2]
211 |         cluster_preds:  redundant poses         -- [n, 17, 2]
212 |         cluster_scores: redundant poses score   -- [n, 17, 1]
213 |         ref_dist:       reference scale         -- Constant
214 |     OUTPUT:
215 |         final_pose:     merged pose             -- [17, 2]
216 |         final_score:    merged score            -- [17]
217 |     """
218 |     dist = torch.sqrt(torch.sum(
219 |         torch.pow(ref_pose[np.newaxis, :] - cluster_preds, 2),
220 |         dim=2
221 |     ))
222 | 
223 |     kp_num = 17
224 |     ref_dist = min(ref_dist, 15)
225 | 
226 |     mask = (dist <= ref_dist)
227 |     final_pose = torch.zeros(kp_num, 2)
228 |     final_score = torch.zeros(kp_num)
229 | 
230 |     if cluster_preds.dim() == 2:
231 |         cluster_preds.unsqueeze_(0)
232 |         cluster_scores.unsqueeze_(0)
233 |     if mask.dim() == 1:
234 |         mask.unsqueeze_(0)
235 | 
236 |     # Weighted Merge
237 |     masked_scores = cluster_scores.mul(mask.float().unsqueeze(-1))
238 |     normed_scores = masked_scores / torch.sum(masked_scores, dim=0)
239 | 
240 |     final_pose = torch.mul(cluster_preds, normed_scores.repeat(1, 1, 2)).sum(dim=0)
241 |     final_score = torch.mul(masked_scores, normed_scores).sum(dim=0)
242 |     return final_pose, final_score
243 | 
244 | 
245 | def get_parametric_distance(i, all_preds, keypoint_scores, ref_dist):
246 |     pick_preds = all_preds[i]
247 |     pred_scores = keypoint_scores[i]
248 |     dist = torch.sqrt(torch.sum(
249 |         torch.pow(pick_preds[np.newaxis, :] - all_preds, 2),
250 |         dim=2
251 |     ))
252 |     mask = (dist <= 1)
253 | 
254 |     # Define a keypoints distance
255 |     score_dists = torch.zeros(all_preds.shape[0], all_preds.shape[1])
256 |     keypoint_scores.squeeze_()
257 |     if keypoint_scores.dim() == 1:
258 |         keypoint_scores.unsqueeze_(0)
259 |     if pred_scores.dim() == 1:
260 |         pred_scores.unsqueeze_(1)
261 |     # The predicted scores are repeated up to do broadcast
262 |     pred_scores = pred_scores.repeat(1, all_preds.shape[0]).transpose(0, 1)
263 | 
264 |     score_dists[mask] = torch.tanh(pred_scores[mask] / delta1) *\
265 |                         torch.tanh(keypoint_scores[mask] / delta1)
266 | 
267 |     point_dist = torch.exp((-1) * dist / delta2)
268 |     final_dist = torch.sum(score_dists, dim=1) + mu * torch.sum(point_dist, dim=1)
269 | 
270 |     return final_dist
271 | 
272 | 
273 | def PCK_match(pick_pred, all_preds, ref_dist):
274 |     dist = torch.sqrt(torch.sum(
275 |         torch.pow(pick_pred[np.newaxis, :] - all_preds, 2),
276 |         dim=2
277 |     ))
278 |     ref_dist = min(ref_dist, 7)
279 |     num_match_keypoints = torch.sum(
280 |         dist / ref_dist <= 1,
281 |         dim=1
282 |     )
283 | 
284 |     return num_match_keypoints
285 | 


--------------------------------------------------------------------------------
/pose_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def normalize_points_with_size(xy, width, height, flip=False):
 5 |     """Normalize scale points in image with size of image to (0-1).
 6 |     xy : (frames, parts, xy) or (parts, xy)
 7 |     """
 8 |     if xy.ndim == 2:
 9 |         xy = np.expand_dims(xy, 0)
10 |     xy[:, :, 0] /= width
11 |     xy[:, :, 1] /= height
12 |     if flip:
13 |         xy[:, :, 0] = 1 - xy[:, :, 0]
14 |     return xy
15 | 
16 | 
17 | def scale_pose(xy):
18 |     """Normalize pose points by scale with max/min value of each pose.
19 |     xy : (frames, parts, xy) or (parts, xy)
20 |     """
21 |     if xy.ndim == 2:
22 |         xy = np.expand_dims(xy, 0)
23 |     xy_min = np.nanmin(xy, axis=1)
24 |     xy_max = np.nanmax(xy, axis=1)
25 |     for i in range(xy.shape[0]):
26 |         xy[i] = ((xy[i] - xy_min[i]) / (xy_max[i] - xy_min[i])) * 2 - 1
27 |     return xy.squeeze()
28 | 


--------------------------------------------------------------------------------
/sample1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GajuuzZ/Human-Falling-Detect-Tracks/7ed2faa4d6147dfd576f58869b6c25545208af35/sample1.gif


--------------------------------------------------------------------------------