├── src
├── datasets
│ ├── __init__.py
│ ├── .DS_Store
│ ├── dataset_path.py
│ ├── datasets.py
│ ├── Corridor.py
│ ├── ShanghaiTech_AlphaPose.py
│ └── UCF_crime.py
├── models
│ ├── .DS_Store
│ └── moprl.py
├── utils
│ ├── .DS_Store
│ ├── logger.py
│ ├── load_save.py
│ ├── normalize.py
│ ├── visualization.py
│ ├── metrics.py
│ └── losses.py
├── train.sh
├── opts.py
└── train_test.py
├── assets
├── fig1.png
└── fig2.png
├── README.md
└── env.yaml
/src/datasets/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/assets/fig1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yui010206/MoPRL/HEAD/assets/fig1.png
--------------------------------------------------------------------------------
/assets/fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yui010206/MoPRL/HEAD/assets/fig2.png
--------------------------------------------------------------------------------
/src/models/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yui010206/MoPRL/HEAD/src/models/.DS_Store
--------------------------------------------------------------------------------
/src/utils/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yui010206/MoPRL/HEAD/src/utils/.DS_Store
--------------------------------------------------------------------------------
/src/datasets/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Yui010206/MoPRL/HEAD/src/datasets/.DS_Store
--------------------------------------------------------------------------------
/src/train.sh:
--------------------------------------------------------------------------------
1 | python train_test.py --exp_name ShanghaiTech_AlphaPose_train --dataset ShanghaiTech_AlphaPose
2 | python train_test.py --exp_name ShanghaiTech_AlphaPose_test --dataset ShanghaiTech_AlphaPose --inference True --model_path your_path_to_model
--------------------------------------------------------------------------------
/src/datasets/dataset_path.py:
--------------------------------------------------------------------------------
1 | # set to your own path
2 | ShanghaiTech_AlphaPose_Dir = '/home/ps/codebase/gepc/data/pose/'
3 | ShanghaiTech_Lable_Dir = '/home/ps/codebase/gepc/data/test/test_frame_mask/'
4 | ShanghaiTech_Frame_Dir = '/home/ps/codebase/gepc/data/testing/frames/'
5 |
6 | Corridor_Pose_Dir = '/home/ps/codebase/Corridor/Corridor_Pose/'
7 | Corridor_Label_Dir = '/home/ps/codebase/Corridor/Corridor_Label/'
8 |
9 | UCF_crime_Dir = '/home/ps/codebase/code/other/moprl/data/ucf_pose/'
--------------------------------------------------------------------------------
/src/utils/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | def get_logger(filename, verbosity=1, name=None):
4 |
5 | level_dict = {0: logging.DEBUG, 1: logging.INFO, 2: logging.WARNING}
6 | formatter = logging.Formatter(
7 | "[%(asctime)s][%(filename)s][line:%(lineno)d][%(levelname)s] %(message)s"
8 | )
9 | logger = logging.getLogger(name)
10 | logger.setLevel(level_dict[verbosity])
11 |
12 | fh = logging.FileHandler(filename, "w")
13 | fh.setFormatter(formatter)
14 | logger.addHandler(fh)
15 |
16 | sh = logging.StreamHandler()
17 | sh.setFormatter(formatter)
18 | logger.addHandler(sh)
19 |
20 | return logger
21 |
--------------------------------------------------------------------------------
/src/utils/load_save.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import zipfile
4 | import numpy as np
5 | import pickle
6 |
7 | def load_pickle(filename):
8 | with open(filename, "rb") as f:
9 | return pickle.load(f)
10 |
11 |
12 | def save_pickle(data, filename):
13 | with open(filename, "wb") as f:
14 | pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
15 |
16 |
17 | def load_json(filename):
18 | with open(filename, "r") as f:
19 | return json.load(f)
20 |
21 |
22 | def write_json(data, filename, save_pretty=False, sort_keys=False):
23 | with open(filename, "w") as f:
24 | if save_pretty:
25 | f.write(json.dumps(data, indent=4, sort_keys=sort_keys))
26 | else:
27 | json.dump(data, f)
28 |
29 | def concat_json_list(filepaths, save_path):
30 | json_lists = []
31 | for p in filepaths:
32 | json_lists += load_json(p)
33 | write_json(json_lists, save_path)
34 |
35 |
36 | def save_lines(list_of_str, filepath):
37 | with open(filepath, "w") as f:
38 | f.write("\n".join(list_of_str))
39 |
40 |
41 | def read_lines(filepath):
42 | with open(filepath, "r") as f:
43 | return [e.strip("\n") for e in f.readlines()]
44 |
45 |
46 | def get_rounded_percentage(float_number, n_floats=2):
47 | return round(float_number * 100, n_floats)
48 |
49 |
50 | def save_parameters(path,opt):
51 | '''Write parameters setting file'''
52 | with open(os.path.join(path, 'params.txt'), 'w') as file:
53 | file.write('Training Parameters: \n')
54 | file.write(str(opt) + '\n')
--------------------------------------------------------------------------------
/src/utils/normalize.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def normalize_anomaly_score(scores):
4 | max_score = max(scores)
5 | min_score = min(scores)
6 | length = max_score - 0
7 | if length==0:
8 | length=1
9 | scores = np.array(scores)
10 |
11 | return scores/length
12 |
13 | def normalize_score(score):
14 |
15 | return score/np.sum(score)
16 |
17 | def normalize_pose(pose_x, pose_y):
18 |
19 | x_max, y_max = np.max(pose_x,axis=1), np.max(pose_y,axis=1)
20 | x_min, y_min = np.min(pose_x,axis=1), np.min(pose_y,axis=1)
21 | x_c, y_c = (x_max+x_min)/2, (y_max+y_min)/2
22 | w, h = x_max-x_min, y_max - y_min
23 |
24 | x, y = [], []
25 |
26 | for i in range(len(w)):
27 | nor_x = ((pose_x[i] - x_c[i]) / w[i]).tolist()
28 | nor_y = ((pose_y[i] - y_c[i]) / h[i]).tolist()
29 | x.append(nor_x)
30 | y.append(nor_y)
31 |
32 | return x, y
33 |
34 | def center_pose(pose_x, pose_y):
35 |
36 | x_max, y_max = np.max(pose_x,axis=1), np.max(pose_y,axis=1)
37 | x_min, y_min = np.min(pose_x,axis=1), np.min(pose_y,axis=1)
38 | x_c, y_c = (x_max+x_min)/2, (y_max+y_min)/2
39 | w, h = x_max-x_min, y_max - y_min
40 |
41 | w[w<1e-5] = 1
42 | h[h<1e-5] = 1
43 |
44 | x, y = [], []
45 |
46 | for i in range(len(w)):
47 | nor_x = ((pose_x[i] - x_c[i])).tolist()
48 | nor_y = ((pose_y[i] - y_c[i])).tolist()
49 | x.append(nor_x)
50 | y.append(nor_y)
51 |
52 | return x, y
53 |
54 | def keypoints17_to_coco18(kps):
55 |
56 | kp_np = np.array(kps)#.reshape(-1,17,3)
57 | neck_kp_vec = 0.5 * (kp_np[..., 5, :] + kp_np[..., 6, :])
58 | kp_np = np.concatenate([kp_np, neck_kp_vec[..., None, :]], axis=-2)
59 | opp_order = [0, 17, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3]
60 | opp_order = np.array(opp_order, dtype=np.int)
61 | kp_coco18 = kp_np[..., opp_order, :]
62 |
63 | return kp_coco18
--------------------------------------------------------------------------------
/src/utils/visualization.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import numpy as np
4 | import matplotlib.pyplot as plt
5 | from PIL import Image
6 | import torch
7 |
8 | def visualize_local_tracklets(poses,joints_num=17,scale_factor=100):
9 |
10 | poses = poses.reshape((-1,int(joints_num),2))
11 | tracklet_img = visualize_tracklet(poses,scale_factor,joints_num)
12 |
13 | return tracklet_img
14 |
15 | def visualize_tracklet(tracklet,scale_factor,num_joints):
16 |
17 | imgs = []
18 | for pose in tracklet:
19 | img = visulize_single_pose(pose,scale_factor,num_joints)
20 | imgs.append(img)
21 | imgs = np.hstack(imgs)
22 |
23 | return imgs
24 |
25 | def visulize_single_pose(kpts,scale_factor,num_joints):
26 |
27 | if num_joints == 17:
28 | links = [(0, 1), (0, 2), (1, 3), (2, 4),
29 | (5, 7), (7, 9), (6, 8), (8, 10),
30 | (11, 13), (13, 15), (12, 14), (14, 16),
31 | (3, 5), (4, 6), (5, 6), (5, 11), (6, 12), (11, 12)]
32 |
33 | if num_joints == 25:
34 | links = [(17,15),(15,0),(0,16),(16,18),(0,1),(1,8),(1,2),(2,3),(3,4),(1,5),(5,6),
35 | (6,7),(8,9),(9,10),(10,11),(11,22),(22,23),(11,24),(8,12),(12,13),(13,14),(14,21),(14,19),(19,20)]
36 |
37 | kpts = np.array(kpts)
38 |
39 | x = kpts[:,0]
40 | y = kpts[:,1]
41 |
42 | img = np.zeros((100,100,3),np.uint8)
43 | cmap = plt.get_cmap('rainbow')
44 | colors = [cmap(i) for i in np.linspace(0, 1, len(links) + 2)]
45 | colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors]
46 |
47 | for i in range(len(links)):
48 |
49 | order1, order2 = links[i][0], links[i][1]
50 | x1 =int(((np.float32(x[order1])))) + int(scale_factor/2)
51 | y1 =int(((np.float32(y[order1])))) + int(scale_factor/2)
52 | x2 =int(((np.float32(x[order2])))) + int(scale_factor/2)
53 | y2 =int(((np.float32(y[order2])))) + int(scale_factor/2)
54 | cv2.line(img,(x1,y1),(x2,y2),thickness=1,color=colors[i])
55 |
56 | return img
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 💻 Code for Regularity Learning via Explicit Distribution Modeling for Skeletal Video Anomaly Detection
2 |
3 | * Authors: [Shoubin Yu](https://yui010206.github.io/), [Zhongyin Zhao](https://github.com/ZZhongYin),
4 | [Hao-shu Fang](https://fang-haoshu.github.io/), [Andong Deng](https://dengandong.github.io/),
5 | [Haisheng Su](https://scholar.google.com.hk/citations?user=OFrMZh8AAAAJ&hl=zh-CN), [Dongliang Wang](https://scholar.google.com.hk/citations?hl=zh-CN&user=gurERzcAAAAJ),
6 | [Weihao Gan](https://scholar.google.com.hk/citations?hl=zh-CN&user=QIC0rCYAAAAJ), [Cewu Lu](https://www.mvig.org/), and [Wei Wu](https://scholar.google.com/citations?user=9RBxtd8AAAAJ&hl=en&oi=ao)
7 |
8 | * [Paper](https://ieeexplore.ieee.org/abstract/document/10185076) ([IEEE Transactions on Circuits and Systems for Video Technology 2023](https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=76))
9 |
10 |
11 |
12 |
13 |
14 | ## Data Preparetion
15 |
16 | We extract poses in ShanghaiTech in the same way as [GEPC](https://github.com/amirmk89/gepc). As for Corridor, we adopt the same methods mentioned [here](https://rodrigues-royston.github.io/Multi-timescale_Trajectory_Prediction/). Once poses have been downloaded or extracted, change the data path in ***src/datasets/dataset_path.py*** to train or test models.
17 |
18 | ## Train model
19 | For ShanghaiTech
20 |
21 | ``` python train_test.py --exp_name ShanghaiTech_AlphaPose_train --dataset ShanghaiTech_AlphaPose```
22 |
23 | For Corridor
24 |
25 | ``` python train_test.py --exp_name Corridor_train --dataset Corridor```
26 |
27 | ## Test model
28 | For ShanghaiTech
29 |
30 | ``` python train_test.py --exp_name ShanghaiTech_AlphaPose_test --dataset ShanghaiTech_AlphaPose --inference True --model_path your_path_to_model ```
31 |
32 | For Corridor
33 |
34 | ``` python train_test.py --exp_name Corridor_test --dataset Corridor --inference True --model_path your_path_to_model ```
35 |
--------------------------------------------------------------------------------
/src/opts.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | def parse_opts():
4 | parser = argparse.ArgumentParser()
5 |
6 | # basic config
7 | parser.add_argument('--seed',default=2021,type=int)
8 | parser.add_argument('--workers',default=2,type=int)
9 | parser.add_argument('--exp_name',default='debug',type=str)
10 | parser.add_argument('--inference',default=False,type=bool,help='turn on inference mode')
11 | parser.add_argument('--ano_score',default='max',type=str)
12 | parser.add_argument('--gpu',default=0,type=int)
13 |
14 | # show config
15 | parser.add_argument('--log_interval',default=100,type=int)
16 | parser.add_argument('--vis_interval',default=200,type=int)
17 | parser.add_argument('--eval_interval',default=2000,type=int)
18 |
19 | # test config
20 | parser.add_argument('--model_path',default='',type=str)
21 |
22 | # training config
23 | parser.add_argument('--batch_size',default=256,type=int,help='batch size')
24 | parser.add_argument('--lr_rate',default=5e-5,type=float)
25 | parser.add_argument('--epochs',default=50,type=int)
26 | parser.add_argument('--load_pretrain_model',default=False,type=bool)
27 | parser.add_argument('--iter_to_load',default=5000,type=int,help='load checkpoints')
28 |
29 | # dataset config
30 | parser.add_argument('--dataset',default='UCF_crime',type=str)
31 | parser.add_argument('--tracklet_len',default=8,type=int)
32 | parser.add_argument('--stride',default=1,type=int)
33 | parser.add_argument('--headless',default=False,type=bool)
34 | parser.add_argument('--mask_ratio',default=0.15,type=float)
35 | parser.add_argument('--motion_type',default='rayleigh',type=str) #rayleigh #gaussian #uniform #none
36 | parser.add_argument('--fusion_type',default='div',type=str) #div #add #mul
37 | parser.add_argument('--noise_factor',default=0,type=float)
38 | parser.add_argument('--pre_len',default=0,type=int)
39 |
40 | # model config
41 | parser.add_argument('--embed_dim',default=128,type=int)
42 | parser.add_argument('--spatial_depth',default=2,type=int)
43 | parser.add_argument('--temporal_depth',default=2,type=int)
44 |
45 | args = parser.parse_args()
46 |
47 | return args
--------------------------------------------------------------------------------
/src/datasets/datasets.py:
--------------------------------------------------------------------------------
1 | from .dataset_path import *
2 | import os
3 |
4 | def get_training_set(opt):
5 | assert opt.dataset in ['ShanghaiTech_AlphaPose', 'Corridor', 'UCF_crime']
6 |
7 | if opt.dataset == 'ShanghaiTech_AlphaPose':
8 |
9 | from .ShanghaiTech_AlphaPose import ShanghaiTech_AlphaPose
10 |
11 | train_Dataset = ShanghaiTech_AlphaPose(pose_dir=ShanghaiTech_AlphaPose_Dir, split='train', mask_pro=opt.mask_ratio,
12 | tracklet_len=opt.tracklet_len ,stride=opt.stride,head_less=opt.headless,pre_len=opt.pre_len,embed_dim=opt.embed_dim, fusion_type=opt.fusion_type, motion_type=opt.motion_type, noise_factor = opt.noise_factor)
13 |
14 | elif opt.dataset == 'Corridor':
15 | from .Corridor import Corridor
16 |
17 | train_Dataset = Corridor(pose_dir=Corridor_Pose_Dir, split='train', mask_pro=opt.mask_ratio,
18 | tracklet_len=opt.tracklet_len ,stride=opt.stride,pre_len=opt.pre_len,embed_dim=opt.embed_dim, fusion_type=opt.fusion_type, motion_type=opt.motion_type)
19 |
20 | elif opt.dataset == 'UCF_crime':
21 |
22 | from .UCF_crime import UCF_crime
23 |
24 | train_Dataset = UCF_crime(pose_dir=UCF_crime_Dir, split='train', mask_pro=opt.mask_ratio,
25 | tracklet_len=opt.tracklet_len ,stride=opt.stride,head_less=opt.headless,pre_len=opt.pre_len,embed_dim=opt.embed_dim, fusion_type=opt.fusion_type, motion_type=opt.motion_type)
26 |
27 |
28 | return train_Dataset
29 |
30 |
31 | def get_test_set(opt):
32 | assert opt.dataset in ['ShanghaiTech_AlphaPose', 'Corridor', 'UCF_crime']
33 |
34 | if opt.dataset == 'ShanghaiTech_AlphaPose':
35 |
36 | from .ShanghaiTech_AlphaPose import ShanghaiTech_AlphaPose
37 |
38 | test_Dataset = ShanghaiTech_AlphaPose(pose_dir=ShanghaiTech_AlphaPose_Dir, split='test',
39 | tracklet_len=opt.tracklet_len ,stride=opt.stride,head_less=opt.headless,pre_len=opt.pre_len,embed_dim=opt.embed_dim, fusion_type=opt.fusion_type, motion_type=opt.motion_type, noise_factor = opt.noise_factor)
40 |
41 | elif opt.dataset == 'Corridor':
42 | from .Corridor import Corridor
43 |
44 | test_Dataset = Corridor(pose_dir=Corridor_Pose_Dir, split='test',
45 | tracklet_len=opt.tracklet_len ,stride=opt.stride,pre_len=opt.pre_len,embed_dim=opt.embed_dim, fusion_type=opt.fusion_type, motion_type=opt.motion_type)
46 |
47 | elif opt.dataset == 'UCF_crime':
48 |
49 | from .UCF_crime import UCF_crime
50 |
51 | test_Dataset = UCF_crime(pose_dir=UCF_crime_Dir, split='test',
52 | tracklet_len=opt.tracklet_len ,stride=opt.stride,head_less=opt.headless,pre_len=opt.pre_len,embed_dim=opt.embed_dim, fusion_type=opt.fusion_type, motion_type=opt.motion_type)
53 |
54 | else:
55 | raise ValueError ("Dataset Name Invalid!")
56 |
57 |
58 | return test_Dataset
59 |
--------------------------------------------------------------------------------
/env.yaml:
--------------------------------------------------------------------------------
1 | name: moprl
2 | channels:
3 | - defaults
4 | dependencies:
5 | - _libgcc_mutex=0.1=main
6 | - _pytorch_select=0.2=gpu_0
7 | - backcall=0.2.0=pyhd3eb1b0_0
8 | - blas=1.0=mkl
9 | - ca-certificates=2021.9.30=h06a4308_1
10 | - certifi=2021.5.30=py36h06a4308_0
11 | - cffi=1.14.6=py36h400218f_0
12 | - cudatoolkit=10.0.130=0
13 | - cudnn=7.6.5=cuda10.0_0
14 | - decorator=5.0.9=pyhd3eb1b0_0
15 | - freetype=2.10.4=h5ab3b9f_0
16 | - intel-openmp=2021.3.0=h06a4308_3350
17 | - ipykernel=5.3.4=py36h5ca1d4c_0
18 | - ipython=7.16.1=py36h5ca1d4c_0
19 | - ipython_genutils=0.2.0=pyhd3eb1b0_1
20 | - jedi=0.17.0=py36_0
21 | - jpeg=9d=h7f8727e_0
22 | - jupyter_client=6.1.12=pyhd3eb1b0_0
23 | - jupyter_core=4.7.1=py36h06a4308_0
24 | - lcms2=2.12=h3be6417_0
25 | - ld_impl_linux-64=2.33.1=h53a641e_7
26 | - libffi=3.3=he6710b0_2
27 | - libgcc-ng=9.1.0=hdf63c60_0
28 | - libpng=1.6.37=hbc83047_0
29 | - libsodium=1.0.18=h7b6447c_0
30 | - libstdcxx-ng=9.1.0=hdf63c60_0
31 | - libtiff=4.2.0=h85742a9_0
32 | - libwebp-base=1.2.0=h27cfd23_0
33 | - lz4-c=1.9.3=h295c915_1
34 | - mkl=2020.2=256
35 | - mkl-service=2.3.0=py36he8ac12f_0
36 | - mkl_fft=1.3.0=py36h54f3939_0
37 | - mkl_random=1.1.1=py36h0573a6f_0
38 | - ncurses=6.2=he6710b0_1
39 | - ninja=1.10.2=hff7bd54_1
40 | - numpy-base=1.19.2=py36hfa32c7d_0
41 | - olefile=0.46=py36_0
42 | - openjpeg=2.4.0=h3ad879b_0
43 | - openssl=1.1.1l=h7f8727e_0
44 | - parso=0.8.2=pyhd3eb1b0_0
45 | - pexpect=4.8.0=pyhd3eb1b0_3
46 | - pickleshare=0.7.5=pyhd3eb1b0_1003
47 | - pip=21.1.1=py36h06a4308_0
48 | - prompt-toolkit=3.0.17=pyh06a4308_0
49 | - ptyprocess=0.7.0=pyhd3eb1b0_2
50 | - pycparser=2.20=py_2
51 | - pygments=2.9.0=pyhd3eb1b0_0
52 | - python=3.6.13=hdb3f193_0
53 | - python-dateutil=2.8.1=pyhd3eb1b0_0
54 | - pytorch=1.3.1=cuda100py36h53c1284_0
55 | - pyzmq=20.0.0=py36h2531618_1
56 | - readline=8.1=h27cfd23_0
57 | - setuptools=52.0.0=py36h06a4308_0
58 | - six=1.15.0=py36h06a4308_0
59 | - sqlite=3.35.4=hdfb4753_0
60 | - tk=8.6.10=hbc83047_0
61 | - torchvision=0.4.2=cuda100py36hecfc37a_0
62 | - tornado=6.1=py36h27cfd23_0
63 | - traitlets=4.3.3=py36_0
64 | - wcwidth=0.2.5=py_0
65 | - wheel=0.36.2=pyhd3eb1b0_0
66 | - xz=5.2.5=h7b6447c_0
67 | - zeromq=4.3.4=h2531618_0
68 | - zlib=1.2.11=h7b6447c_3
69 | - zstd=1.4.9=haebb681_0
70 | - pip:
71 | - absl-py==0.12.0
72 | - argon2-cffi==21.1.0
73 | - astor==0.8.1
74 | - astunparse==1.6.3
75 | - async-generator==1.10
76 | - attrs==21.2.0
77 | - bleach==4.1.0
78 | - cached-property==1.5.2
79 | - cachetools==4.2.2
80 | - chardet==4.0.0
81 | - click==8.0.3
82 | - cycler==0.10.0
83 | - dataclasses==0.8
84 | - defusedxml==0.7.1
85 | - easydict==1.9
86 | - einops==0.3.2
87 | - entrypoints==0.3
88 | - filelock==3.3.1
89 | - flatbuffers==1.12
90 | - gast==0.3.3
91 | - google-auth==1.30.0
92 | - google-auth-oauthlib==0.4.4
93 | - google-pasta==0.2.0
94 | - grpcio==1.34.1
95 | - h5py==2.10.0
96 | - huggingface-hub==0.0.19
97 | - idna==2.10
98 | - imageio==2.5.0
99 | - importlib-metadata==4.0.1
100 | - ipyplot==1.1.0
101 | - ipywidgets==7.6.5
102 | - jinja2==3.0.2
103 | - joblib==1.0.1
104 | - jsonschema==3.2.0
105 | - jupyterlab-pygments==0.1.2
106 | - jupyterlab-widgets==1.0.2
107 | - keras-applications==1.0.8
108 | - keras-nightly==2.5.0.dev2021032900
109 | - keras-preprocessing==1.1.2
110 | - kiwisolver==1.3.1
111 | - markdown==3.3.4
112 | - markupsafe==2.0.1
113 | - matplotlib==3.1.2
114 | - mistune==0.8.4
115 | - nbclient==0.5.4
116 | - nbconvert==6.0.7
117 | - nbformat==5.1.3
118 | - nest-asyncio==1.5.1
119 | - notebook==6.4.5
120 | - numpy==1.19.5
121 | - oauthlib==3.1.0
122 | - opencv-python==4.1.2.30
123 | - opt-einsum==3.3.0
124 | - packaging==21.0
125 | - pandas==1.1.5
126 | - pandocfilters==1.5.0
127 | - pillow==8.2.0
128 | - prometheus-client==0.11.0
129 | - protobuf==3.17.0
130 | - pyasn1==0.4.8
131 | - pyasn1-modules==0.2.8
132 | - pyparsing==2.4.7
133 | - pyrsistent==0.18.0
134 | - pytz==2021.1
135 | - pyyaml==5.1.1
136 | - regex==2021.10.8
137 | - requests==2.25.1
138 | - requests-oauthlib==1.3.0
139 | - rsa==4.7.2
140 | - sacremoses==0.0.46
141 | - scikit-learn==0.24.2
142 | - scipy==1.4.1
143 | - seaborn==0.9.0
144 | - send2trash==1.8.0
145 | - shortuuid==1.0.1
146 | - sklearn==0.0
147 | - tensorboard==2.2.2
148 | - tensorboard-data-server==0.6.1
149 | - tensorboard-plugin-wit==1.8.0
150 | - tensorboardx==2.2
151 | - tensorflow==2.2.0
152 | - tensorflow-estimator==2.2.0
153 | - termcolor==1.1.0
154 | - terminado==0.12.1
155 | - testpath==0.5.0
156 | - threadpoolctl==2.1.0
157 | - timm==0.4.12
158 | - tokenizers==0.10.3
159 | - torch==1.9.1
160 | - torch-tb-profiler==0.1.0
161 | - tqdm==4.41.0
162 | - transformers==4.11.3
163 | - typing-extensions==3.7.4.3
164 | - urllib3==1.26.4
165 | - webencodings==0.5.1
166 | - werkzeug==2.0.1
167 | - widgetsnbextension==3.5.1
168 | - wrapt==1.12.1
169 | - zipp==3.4.1
170 |
171 |
--------------------------------------------------------------------------------
/src/utils/metrics.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import os
4 |
5 | from sklearn.metrics import roc_curve
6 | from sklearn.metrics import auc
7 | from scipy.ndimage import gaussian_filter1d
8 | from sklearn.metrics import roc_auc_score
9 | from utils.normalize import normalize_anomaly_score
10 | from datasets.dataset_path import *
11 | from utils.load_save import load_json
12 |
13 | def recover_poses(local_pose,global_box,dataset):
14 | # local_pose: (B,17*T,2)
15 | # global_box: (B,2*T,2)
16 | t = int(global_box.shape[1]/2)
17 | scale_factor = 100
18 | if dataset=='ShanghaiTech':
19 | frame_width = 856
20 | frame_height = 480
21 |
22 | local_pose_split = torch.chunk(local_pose,t,dim=1)
23 | global_box_split = torch.chunk(global_box,t,dim=1)
24 |
25 | assert len(local_pose_split) == len(global_box_split)
26 |
27 | recovered_poses = []
28 |
29 | for i in range(len(local_pose_split)):
30 | pose = local_pose_split[i] #(B,17,2)
31 | #print(pose.shape)
32 | b_min,b_max = global_box_split[i][:,0,:],global_box_split[i][:,1,:] #(B,2)
33 |
34 | x_min ,y_min = b_min[:,0]/scale_factor*frame_width, b_min[:,1]/scale_factor*frame_height
35 | x_max ,y_max = b_max[:,0]/scale_factor*frame_width, b_max[:,1]/scale_factor*frame_height
36 |
37 | w, h = x_max-x_min, y_max-y_min
38 | x_c , y_c = (x_max+x_min)/2, (y_max+y_min)/2
39 |
40 | x = ((pose[:,:,0]/scale_factor)*w[0] + x_c[0]).unsqueeze(-1)
41 | y = ((pose[:,:,1]/scale_factor)*h[0] + y_c[0]).unsqueeze(-1)
42 | recovered_pose = torch.cat([x,y],dim=-1)
43 | #print(recovered_pose.shape)
44 | recovered_poses.append(recovered_pose)
45 |
46 | recovered_poses = torch.cat(recovered_poses,dim=1)
47 |
48 | return recovered_poses
49 |
50 | def L1_err(gt,pred,weight=None):
51 |
52 | err = torch.norm((gt - pred), p=1, dim=-1)
53 |
54 | if weight is not None:
55 | err = err*weight
56 |
57 | err = err.mean(dim=-1)
58 |
59 | return err.tolist()
60 |
61 | def L2_err(gt,pred,weight=None):
62 |
63 | err = torch.norm(gt - pred, p=2, dim=-1)
64 |
65 | if weight is not None:
66 | err = err*weight
67 |
68 | err = err.mean(dim=-1)
69 |
70 | return err.tolist()
71 |
72 | def compute_auc(rec_errs,pred_errs,meta,duration,k,dataset,cal_type='sum'):
73 |
74 | if dataset == 'UCF_crime':
75 | fps_dic = load_json(os.path.join(UCF_crime_Dir, 'fps_stats.json'))
76 | frames_dic = load_json(os.path.join(UCF_crime_Dir, 'frame_stats.json'))
77 | label_dic = {}
78 | with open(os.path.join(UCF_crime_Dir, 'Temporal_Anomaly_Annotation_for_Testing_Videos.txt'), 'r') as fin:
79 | for line in fin.readlines():
80 | spl = line.strip().split(' ')
81 | label_dic[spl[0]] = [spl[2], spl[3], spl[4], spl[5]]
82 |
83 | compute_dict = {}
84 |
85 | for rec_err, pred_err, name in zip(rec_errs,pred_errs,meta):
86 |
87 | scene ,frame = name.split('.mp4_')
88 | scene = scene + '.mp4'
89 |
90 | if cal_type=='sum':
91 | err = (1-k/10)*rec_err+(k/10)*pred_err
92 | elif cal_type=='max':
93 | err = max(rec_err,pred_err)
94 |
95 | if scene not in compute_dict:
96 | compute_dict[scene] = {}
97 | if int(frame) not in compute_dict[scene]:
98 | compute_dict[scene][int(frame)] = [err]
99 | else:
100 | compute_dict[scene][int(frame)].append(err)
101 |
102 | max_err_dict = {}
103 | all_label = []
104 | all_score = []
105 | all_nor_score = []
106 |
107 | for scene in compute_dict:
108 | max_err_dict[scene] = []
109 | frames = compute_dict[scene].keys()
110 | sorted_frames = list(sorted(frames))
111 |
112 | label = np.zeros(int(frames_dic[scene]*5/fps_dic[scene]))
113 |
114 | if int(label_dic[scene][0]) != -1 and int(label_dic[scene][1]) != -1:
115 | s1 = int(float(label_dic[scene][0])*5/float(fps_dic[scene]))
116 | f1 = int(float(label_dic[scene][1])*5/float(fps_dic[scene]))
117 | label[s1: f1] = 1
118 |
119 | if int(label_dic[scene][2]) != -1 and int(label_dic[scene][3]) != -1:
120 | s2 = int(float(label_dic[scene][2])*5/float(fps_dic[scene]))
121 | f2 = int(float(label_dic[scene][3])*5/float(fps_dic[scene]))
122 | label[s2: f2] = 1
123 |
124 | label = label.tolist()
125 |
126 | num_frame = len(label)
127 | anchor = 0
128 | for i in range(num_frame):
129 | if i > sorted_frames[-1]:
130 | max_err_dict[scene].append(0)
131 | elif int(sorted_frames[anchor]) == i:
132 | max_rec = max(compute_dict[scene][sorted_frames[anchor]])
133 | max_err_dict[scene].append(max_rec)
134 | anchor += 1
135 | else:
136 | max_err_dict[scene].append(0)
137 |
138 | ano_score = max_err_dict[scene]
139 | all_label.extend(label[duration:])
140 | all_score.extend(ano_score[duration:])
141 | all_nor_score.extend(normalize_anomaly_score(ano_score)[duration:])
142 |
143 | else:
144 | compute_dict = {}
145 |
146 | for rec_err, pred_err, name in zip(rec_errs,pred_errs,meta):
147 | # main scene/ sub scene
148 | if dataset.split('_')[0] == 'ShanghaiTech' or dataset == 'Avenue':
149 | main, sub ,frame = name.split('_')
150 | scene = main + '_' + sub
151 | else:
152 | scene ,frame = name.split('_')
153 |
154 | if cal_type=='sum':
155 | err = (1-k/10)*rec_err+(k/10)*pred_err
156 | elif cal_type=='max':
157 | err = max(rec_err,pred_err)
158 |
159 | if scene not in compute_dict:
160 | compute_dict[scene] = {}
161 | if int(frame) not in compute_dict[scene]:
162 | compute_dict[scene][int(frame)] = [err]
163 | else:
164 | compute_dict[scene][int(frame)].append(err)
165 |
166 | max_err_dict = {}
167 | all_label = []
168 | all_score = []
169 | all_nor_score = []
170 |
171 | for scene in compute_dict:
172 | max_err_dict[scene] = []
173 | frames = compute_dict[scene].keys()
174 | sorted_frames = list(sorted(frames))
175 | if dataset.split('_')[0] == 'ShanghaiTech':
176 | Label_Dir = ShanghaiTech_Lable_Dir + scene
177 |
178 | elif dataset == 'Corridor':
179 |
180 | Label_Dir = Corridor_Label_Dir + scene + '/' +scene
181 |
182 | label = np.load(Label_Dir+'.npy').tolist()
183 | num_frame = len(label)
184 | anchor = 0
185 | for i in range(num_frame):
186 | if i > sorted_frames[-1]:
187 | max_err_dict[scene].append(0)
188 | elif int(sorted_frames[anchor]) == i:
189 | max_rec = max(compute_dict[scene][sorted_frames[anchor]])
190 | max_err_dict[scene].append(max_rec)
191 | anchor += 1
192 | else:
193 | max_err_dict[scene].append(0)
194 |
195 | ano_score = max_err_dict[scene]
196 | all_label.extend(label[duration:])
197 | all_score.extend(ano_score[duration:])
198 | all_nor_score.extend(normalize_anomaly_score(ano_score)[duration:])
199 |
200 | all_score = gaussian_filter1d(all_score, 20)
201 | all_nor_score = gaussian_filter1d(all_nor_score, 20)
202 | AUC = roc_auc_score(all_label, all_score)
203 | AUC_norm = roc_auc_score(all_label, all_nor_score)
204 |
205 | return AUC,AUC_norm
--------------------------------------------------------------------------------
/src/models/moprl.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import math
3 | import logging
4 | import torchvision.models
5 |
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 |
9 | from functools import partial
10 | from timm.models.layers import DropPath
11 | from einops import rearrange, repeat
12 |
13 |
14 | class Mlp(nn.Module):
15 | def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
16 | super().__init__()
17 | out_features = out_features or in_features
18 | hidden_features = hidden_features or in_features
19 | self.fc1 = nn.Linear(in_features, hidden_features)
20 | self.act = act_layer()
21 | self.fc2 = nn.Linear(hidden_features, out_features)
22 | self.drop = nn.Dropout(drop)
23 |
24 | def forward(self, x):
25 | x = self.fc1(x)
26 | x = self.act(x)
27 | x = self.drop(x)
28 | x = self.fc2(x)
29 | x = self.drop(x)
30 | return x
31 |
32 | class Attention(nn.Module):
33 | def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
34 | super().__init__()
35 | self.num_heads = num_heads
36 | head_dim = dim // num_heads
37 | # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
38 | self.scale = qk_scale or head_dim ** -0.5
39 |
40 | self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
41 | self.attn_drop = nn.Dropout(attn_drop)
42 | self.proj = nn.Linear(dim, dim)
43 | self.proj_drop = nn.Dropout(proj_drop)
44 |
45 | def forward(self, x, mask=None):
46 | B, N, C = x.shape
47 | qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
48 | q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
49 |
50 | attn = (q @ k.transpose(-2, -1)) * self.scale
51 |
52 | #print('mask',mask.shape)
53 | #print('attn',attn.shape)
54 |
55 | if mask is not None:
56 | attn = attn.masked_fill(mask == 0, -1e9)
57 |
58 | attn = attn.softmax(dim=-1)
59 | attn = self.attn_drop(attn)
60 |
61 | x = (attn @ v).transpose(1, 2).reshape(B, N, C)
62 | x = self.proj(x)
63 | x = self.proj_drop(x)
64 | return x
65 |
66 |
67 | class Block(nn.Module):
68 |
69 | def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
70 | drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
71 | super().__init__()
72 | self.norm1 = norm_layer(dim)
73 | self.attn = Attention(
74 | dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
75 | # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
76 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
77 | self.norm2 = norm_layer(dim)
78 | mlp_hidden_dim = int(dim * mlp_ratio)
79 | self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
80 |
81 | def forward(self, x,mask=None):
82 | x = x + self.drop_path(self.attn(self.norm1(x),mask))
83 | x = x + self.drop_path(self.mlp(self.norm2(x)))
84 |
85 | return x
86 |
87 | class MoPRL(nn.Module):
88 | def __init__(self, tracklet_len=8, pre_len=1,headless=False, in_chans=2,embed_dim=128, spatial_depth=4, temporal_depth=4,
89 | num_heads=4, mlp_ratio=2., qkv_bias=True, qk_scale=None,
90 | drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1, norm_layer=None, num_joints=17,
91 | ):
92 |
93 | """ ##########hybrid_backbone=None, representation_size=None,
94 | Args:
95 | num_frame (int, tuple): input frame number
96 | headless (bool): use head joints or not
97 | num_joints (int, tuple): joints number
98 | in_chans (int): number of input channels, 2D joints have 2 channels: (x,y)
99 | embed_dim_ratio (int): embedding dimension ratio
100 | depth (int): depth of transformer
101 | num_heads (int): number of attention heads
102 | mlp_ratio (int): ratio of mlp hidden dim to embedding dim
103 | qkv_bias (bool): enable bias for qkv if True
104 | qk_scale (float): override default qk scale of head_dim ** -0.5 if set
105 | drop_rate (float): dropout rate
106 | attn_drop_rate (float): attention dropout rate
107 | drop_path_rate (float): stochastic depth rate
108 | norm_layer: (nn.Module): normalization layer
109 | """
110 | super().__init__()
111 |
112 | # if headless:
113 | # self.num_joints = 14
114 | # else:
115 | # self.num_joints = 17
116 |
117 | self.num_joints = num_joints
118 | self.tracklet_len = tracklet_len
119 | self.pre_len = pre_len
120 |
121 | norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
122 | self.embed_dim = embed_dim
123 | out_dim = 2
124 |
125 | ### spatial patch embedding
126 | self.pose_embedding = nn.Linear(in_chans, embed_dim)
127 | self.pos_drop = nn.Dropout(p=drop_rate)
128 |
129 |
130 | dpr_s = [x.item() for x in torch.linspace(0, drop_path_rate, spatial_depth)] # stochastic depth decay rule
131 | self.spatial_blocks = nn.ModuleList([
132 | Block(
133 | dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
134 | drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr_s[i], norm_layer=norm_layer)
135 | for i in range(spatial_depth)])
136 | self.spatial_norm = norm_layer(embed_dim)
137 |
138 | self.spatial_position_embedding = nn.Embedding(self.num_joints+1,embed_dim)
139 |
140 | dpr_t = [x.item() for x in torch.linspace(0, drop_path_rate, temporal_depth)] # stochastic depth decay rule
141 | self.temporal_blocks = nn.ModuleList([
142 | Block(
143 | dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
144 | drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr_t[i], norm_layer=norm_layer)
145 | for i in range(temporal_depth)])
146 | self.temporal_norm = norm_layer(embed_dim)
147 |
148 | self.temporal_postion_embedding = nn.Embedding(tracklet_len+1,embed_dim)
149 |
150 | self.head = nn.Sequential(
151 | nn.LayerNorm(embed_dim),
152 | nn.Linear(embed_dim , out_dim),
153 | )
154 |
155 | def Spatial_Attention(self, x, spatial_tokens):
156 |
157 | b,f,_,_ = x.shape
158 | x = rearrange(x, 'b f w c -> (b f) w c',)
159 |
160 | spatial_embedding = self.spatial_position_embedding(spatial_tokens)
161 | _,_,n,d= spatial_embedding.shape
162 | spatial_embedding = spatial_embedding.view(-1,n,d)
163 | x += spatial_embedding
164 |
165 | x = self.pos_drop(x)
166 |
167 | for blk in self.spatial_blocks:
168 | x = blk(x)
169 |
170 | x = self.spatial_norm(x)
171 | x = rearrange(x, '(b f) w c -> b f w c', f=f)
172 |
173 | return x
174 |
175 | def Temporal_Attention(self, x, temporal_tokens):
176 | # x: b, T, N, embed_dim
177 | temporal_tembedding = self.temporal_postion_embedding(temporal_tokens)
178 | x += temporal_tembedding
179 |
180 | features = self.pos_drop(x)
181 |
182 | _, t, n, _ = features.shape
183 | features = rearrange(features, 'b t n c -> (b n) t c', t=t)
184 |
185 | for blk in self.temporal_blocks:
186 | features = blk(features)
187 |
188 | features = self.temporal_norm(features)
189 |
190 | features = rearrange(features, '(b n) t c -> b t n c', n=n)
191 |
192 | return features
193 |
194 |
195 | def forward(self, pose, spatial_tokens, temporal_tokens):
196 | # pose: (B, T, 17, 2)
197 | # box: (B, T, 2, 2)
198 | # spatial_embedding: (B, T, 17)
199 | pose = pose.permute(0, 3, 1, 2)
200 | b, _, f, p = pose.shape ##### b is batch size, f is number of frames, p is number of joints
201 | pose = rearrange(pose, 'b c f p -> (b f) p c', )
202 | pose = self.pose_embedding(pose)
203 | pose = rearrange(pose, '(b f) p c -> b f p c', b=b)
204 | pose = self.Spatial_Attention(pose,spatial_tokens)
205 | pose = self.Temporal_Attention(pose,temporal_tokens)
206 | rec_pose = self.head(pose).reshape(b,-1,2)
207 |
208 | return rec_pose
209 |
210 | if __name__ == '__main__':
211 | model = MoPRL(tracklet_len=8, pre_len=1,headless=False, in_chans=2,embed_dim=64, spatial_depth=4, temporal_depth=4,
212 | num_heads=4, mlp_ratio=2., qkv_bias=True, qk_scale=None,
213 | drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1, norm_layer=None, num_joints=25)
214 |
215 | pose = torch.rand([4,8,25,2])
216 | box = torch.rand([4,8,2,2])
217 | spatial_tokens = torch.randint(0,3,(4,8,25))
218 | temporal_tokens = torch.randint(0,3,(4,8,25))
219 |
220 | output = model(pose,spatial_tokens,temporal_tokens)
221 |
222 | print('output',output.shape)
223 |
224 |
225 |
226 |
--------------------------------------------------------------------------------
/src/utils/losses.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | import math
5 |
6 | class OKS_Loss(nn.Module):
7 | def __init__(self):
8 | super(OKS_Loss, self).__init__()
9 |
10 | def forward(self, predicted_pose, target_pose, weight=None):
11 | # predicted: B,N,2
12 | # mask: B, N
13 | # weitgt: B, N
14 |
15 | assert predicted_pose.shape == target_pose.shape
16 |
17 | norm_pose = torch.norm((predicted_pose - target_pose), p=2, dim=-1)
18 | if weight is not None:
19 | norm_pose = norm_pose.clone() * weight
20 |
21 | loss = norm_pose.mean()
22 | return loss
23 |
24 | class IOU_Loss(nn.Module):
25 | def __init__(self):
26 | super(IOU_Loss, self).__init__()
27 |
28 | def forward(self, predict_, target_, eps=1e-7):
29 | """`Implementation of Distance-IoU Loss: Faster and Better
30 | Learning for Bounding Box Regression, https://arxiv.org/abs/1911.08287`_.
31 |
32 | Code is modified from https://github.com/Zzh-tju/DIoU.
33 |
34 | Args:
35 | pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2),
36 | shape (n, 4).
37 | target (Tensor): Corresponding gt bboxes, shape (n, 4).
38 | eps (float): Eps to avoid log(0).
39 | Return:
40 | Tensor: Loss tensor.
41 | """
42 | # overlap
43 | assert predict_.shape == target_.shape
44 |
45 | pre_xy_min = predict_[:,::2,:]
46 | pre_xy_max = predict_[:,1::2,:]
47 |
48 | gt_xy_min = target_[:,::2,:]
49 | gt_xy_max = target_[:,1::2,:]
50 |
51 | pred = torch.cat([pre_xy_min,pre_xy_max],dim=-1).reshape(-1,4)
52 | target = torch.cat([gt_xy_min,gt_xy_max],dim=-1).reshape(-1,4)
53 |
54 | lt = torch.max(pred[:, :2], target[:, :2])
55 | rb = torch.min(pred[:, 2:], target[:, 2:])
56 | wh = (rb - lt).clamp(min=0)
57 | overlap = wh[:, 0] * wh[:, 1]
58 |
59 | # union
60 | ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1])
61 | ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1])
62 | union = ap + ag - overlap + eps
63 |
64 | # IoU
65 | ious = overlap / union
66 |
67 | loss = 1 - ious
68 |
69 | loss = torch.where(torch.isnan(loss), torch.full_like(loss, 1.), loss)
70 | loss = torch.where(torch.isinf(loss), torch.full_like(loss, 1.), loss)
71 | loss = loss.mean()
72 |
73 | return loss
74 |
75 |
76 | class GIOU_Loss(nn.Module):
77 | def __init__(self):
78 | super(GIOU_Loss, self).__init__()
79 |
80 | def forward(self, predict_, target_, eps=1e-7):
81 | """`Implementation of Distance-IoU Loss: Faster and Better
82 | Learning for Bounding Box Regression, https://arxiv.org/abs/1911.08287`_.
83 |
84 | Code is modified from https://github.com/Zzh-tju/DIoU.
85 |
86 | Args:
87 | pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2),
88 | shape (n, 4).
89 | target (Tensor): Corresponding gt bboxes, shape (n, 4).
90 | eps (float): Eps to avoid log(0).
91 | Return:
92 | Tensor: Loss tensor.
93 | """
94 | # overlap
95 | assert predict_.shape == target_.shape
96 |
97 | pre_xy_min = predict_[:,::2,:]
98 | pre_xy_max = predict_[:,1::2,:]
99 |
100 | gt_xy_min = target_[:,::2,:]
101 | gt_xy_max = target_[:,1::2,:]
102 |
103 | pred = torch.cat([pre_xy_min,pre_xy_max],dim=-1).reshape(-1,4)
104 | target = torch.cat([gt_xy_min,gt_xy_max],dim=-1).reshape(-1,4)
105 |
106 | lt = torch.max(pred[:, :2], target[:, :2])
107 | rb = torch.min(pred[:, 2:], target[:, 2:])
108 | wh = (rb - lt).clamp(min=0)
109 | overlap = wh[:, 0] * wh[:, 1]
110 |
111 | # union
112 | ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1])
113 | ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1])
114 | union = ap + ag - overlap + eps
115 |
116 | # IoU
117 | ious = overlap / union
118 |
119 | loss = 1 - ious
120 |
121 | loss = torch.where(torch.isnan(loss), torch.full_like(loss, 1.), loss)
122 | loss = torch.where(torch.isinf(loss), torch.full_like(loss, 1.), loss)
123 | loss = loss.mean()
124 |
125 | return loss
126 |
127 | class DIOU_Loss(nn.Module):
128 | def __init__(self):
129 | super(DIOU_Loss, self).__init__()
130 |
131 | def forward(self, predict_, target_, eps=1e-7):
132 | """`Implementation of Distance-IoU Loss: Faster and Better
133 | Learning for Bounding Box Regression, https://arxiv.org/abs/1911.08287`_.
134 |
135 | Code is modified from https://github.com/Zzh-tju/DIoU.
136 |
137 | Args:
138 | pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2),
139 | shape (n, 4).
140 | target (Tensor): Corresponding gt bboxes, shape (n, 4).
141 | eps (float): Eps to avoid log(0).
142 | Return:
143 | Tensor: Loss tensor.
144 | """
145 | # overlap
146 | assert predict_.shape == target_.shape
147 |
148 | pre_xy_min = predict_[:,::2,:]
149 | pre_xy_max = predict_[:,1::2,:]
150 |
151 | gt_xy_min = target_[:,::2,:]
152 | gt_xy_max = target_[:,1::2,:]
153 |
154 | pred = torch.cat([pre_xy_min,pre_xy_max],dim=-1).reshape(-1,4)
155 | target = torch.cat([gt_xy_min,gt_xy_max],dim=-1).reshape(-1,4)
156 |
157 | lt = torch.max(pred[:, :2], target[:, :2])
158 | rb = torch.min(pred[:, 2:], target[:, 2:])
159 | wh = (rb - lt).clamp(min=0)
160 | overlap = wh[:, 0] * wh[:, 1]
161 |
162 | # union
163 | ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1])
164 | ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1])
165 | union = ap + ag - overlap + eps
166 |
167 | # IoU
168 | ious = overlap / union
169 |
170 | # enclose area
171 | enclose_x1y1 = torch.min(pred[:, :2], target[:, :2])
172 | enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:])
173 | enclose_wh = (enclose_x2y2 - enclose_x1y1).clamp(min=0)
174 |
175 | cw = enclose_wh[:, 0]
176 | ch = enclose_wh[:, 1]
177 |
178 | # 最小包闭区域的对角线距离
179 | c2 = cw**2 + ch**2 + eps
180 |
181 | b1_x1, b1_y1 = pred[:, 0], pred[:, 1]
182 | b1_x2, b1_y2 = pred[:, 2], pred[:, 3]
183 | b2_x1, b2_y1 = target[:, 0], target[:, 1]
184 | b2_x2, b2_y2 = target[:, 2], target[:, 3]
185 |
186 | # 中心点距离
187 | left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4 #== ((b2_x1 + b2_x2)/2 - (b1_x1 + b1_x2)/2)**2
188 | right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4
189 | rho2 = left + right
190 |
191 | # DIoU
192 | dious = ious - rho2 / c2
193 | loss = 1 - dious
194 |
195 | loss = torch.where(torch.isnan(loss), torch.full_like(loss, 1.), loss)
196 | loss = torch.where(torch.isinf(loss), torch.full_like(loss, 1.), loss)
197 | loss = loss.mean()
198 |
199 | return loss
200 |
201 | class CIOU_Loss(nn.Module):
202 | def __init__(self):
203 | super(CIOU_Loss, self).__init__()
204 |
205 | def forward(self, predict, target, eps=1e-7):
206 | predict_ = predict.clone()
207 | target_ = target.clone()
208 |
209 | assert predict_.shape == target_.shape
210 |
211 | pre_xy_min = predict_[:,::2,:]
212 | pre_xy_max = predict_[:,1::2,:]
213 |
214 | gt_xy_min = target_[:,::2,:]
215 | gt_xy_max = target_[:,1::2,:]
216 |
217 | pred = torch.cat([pre_xy_min,pre_xy_max],dim=-1).reshape(-1,4)
218 | target = torch.cat([gt_xy_min,gt_xy_max],dim=-1).reshape(-1,4)
219 |
220 | lt = torch.max(pred[:, :2], target[:, :2])
221 | rb = torch.min(pred[:, 2:], target[:, 2:])
222 | wh = (rb - lt).clamp(min=0)
223 | overlap = wh[:, 0] * wh[:, 1]
224 |
225 | # union
226 | ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1])
227 | ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1])
228 | union = ap + ag - overlap + eps
229 |
230 | # IoU
231 | ious = overlap / union
232 |
233 | # enclose area
234 | enclose_x1y1 = torch.min(pred[:, :2], target[:, :2])
235 | enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:])
236 | enclose_wh = (enclose_x2y2 - enclose_x1y1).clamp(min=0)
237 |
238 | cw = enclose_wh[:, 0]
239 | ch = enclose_wh[:, 1]
240 |
241 | c2 = cw**2 + ch**2 + eps
242 |
243 | b1_x1, b1_y1 = pred[:, 0], pred[:, 1]
244 | b1_x2, b1_y2 = pred[:, 2], pred[:, 3]
245 | b2_x1, b2_y1 = target[:, 0], target[:, 1]
246 | b2_x2, b2_y2 = target[:, 2], target[:, 3]
247 |
248 | w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
249 | w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
250 |
251 | left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4
252 | right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4
253 | rho2 = left + right
254 | # 对应公式
255 | factor = 4 / math.pi**2
256 | v = factor * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
257 |
258 | # CIoU
259 | cious = ious - (rho2 / c2 + v**2 / (1 - ious + v))
260 | loss = 1 - cious
261 |
262 | loss = torch.where(torch.isnan(loss), torch.full_like(loss, 1.), loss)
263 | loss = torch.where(torch.isinf(loss), torch.full_like(loss, 1.), loss)
264 | loss = loss.mean()
265 |
266 | return loss
267 |
268 | if __name__ == '__main__':
269 | loss = DIOU_Loss()
270 |
271 | predicted = torch.randint(1,10,(2,16,2))
272 | target = torch.randint(1,10,(2,16,2))
273 |
274 | l = loss(predicted,target)
275 |
276 | print(l)
--------------------------------------------------------------------------------
/src/train_test.py:
--------------------------------------------------------------------------------
1 | import os, time, sys, cv2
2 | import torch
3 | import random
4 |
5 | import numpy as np
6 | import torch.nn as nn
7 |
8 | from torch.utils.data import DataLoader
9 | from tqdm import tqdm
10 | from transformers import AdamW,get_linear_schedule_with_warmup
11 |
12 | from utils.logger import get_logger
13 | from utils.load_save import save_parameters,write_json
14 | from utils.losses import OKS_Loss
15 | from utils.visualization import visualize_local_tracklets
16 | from utils.metrics import recover_poses,L1_err,L2_err,compute_auc
17 | from opts import parse_opts
18 | from datasets.datasets import get_training_set, get_test_set
19 | from models.moprl import MoPRL
20 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
21 |
22 | class Train_Eval_Inference(object):
23 |
24 | def __init__(self, opt):
25 |
26 | self.opt = opt
27 | self.dataset_name = opt.dataset
28 | self.exp_name = opt.exp_name
29 |
30 | self.workspace = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../exps/')
31 | self.jobname = opt.dataset
32 | self.exp_dir = os.path.join(self.workspace, self.exp_name)
33 | self.model_save_dir = os.path.join(self.exp_dir, 'models')
34 | self.vis_sample_dir = os.path.join(self.exp_dir, 'vis_samples')
35 | self.test_result_dir = os.path.join(self.exp_dir, 'result')
36 |
37 | self.train_tasks = 'rec'
38 | self.test_tasks = 'rec'
39 | self.scale_factor = 100
40 |
41 | if not os.path.exists(self.model_save_dir):
42 | os.makedirs(self.model_save_dir)
43 | if not os.path.exists(self.vis_sample_dir):
44 | os.makedirs(self.vis_sample_dir)
45 | if not os.path.exists(self.test_result_dir):
46 | os.makedirs(self.test_result_dir)
47 |
48 | # whether to start training from an existing snapshot
49 | self.load_pretrain_model = opt.load_pretrain_model
50 | if self.load_pretrain_model:
51 | self.iter_to_load = opt.iter_to_load
52 |
53 | save_parameters(self.exp_dir,opt)
54 |
55 | train_Dataset = get_training_set(opt)
56 | self.train_loader = DataLoader(train_Dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.workers,
57 | pin_memory=True, drop_last=True)
58 | test_Dataset = get_test_set(opt)
59 | self.test_loader = DataLoader(test_Dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.workers,
60 | pin_memory=True, drop_last=False)
61 |
62 | self.logger = get_logger(self.exp_dir + '/log.txt')
63 |
64 | self.oks_loss = OKS_Loss()
65 |
66 | if self.opt.dataset == 'ShanghaiTech_AlphaPose' or self.opt.dataset == 'UCF_crime':
67 | self.num_joints = 17
68 | else:
69 | self.num_joints = 25
70 |
71 | def train_batch(self,model,optimizer,epoch,iteration,scheduler=None):
72 |
73 | for input_dict in iter(self.train_loader):
74 |
75 | pose = input_dict['pose'].float().cuda()
76 | weigths = input_dict['weigths'].float().cuda()
77 | gt = input_dict['gt'].float().cuda()
78 | spatial_token = input_dict['spatial_token'].long().cuda()
79 | temporal_token = input_dict['temporal_token'].long().cuda()
80 |
81 | model.zero_grad()
82 | rec_pose = model(pose,spatial_token,temporal_token)
83 |
84 | loss = self.oks_loss(rec_pose,gt,weigths)
85 | loss.backward()
86 | torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
87 | optimizer.step()
88 |
89 | if scheduler is not None:
90 | scheduler.step()
91 |
92 | iteration += 1
93 |
94 | if iteration % self.opt.log_interval == 0:
95 |
96 | self.logger.info("iter {} (epoch {}), loss = {:.6f}".format(iteration, epoch, loss.item()))
97 |
98 | if iteration % self.opt.vis_interval == 0:
99 |
100 | pred_pose = rec_pose[0].cpu().detach()
101 | gt_pose = gt[0].cpu().detach()
102 | pred_local_img = visualize_local_tracklets(pred_pose, self.num_joints)
103 | gt_local_img = visualize_local_tracklets(gt_pose, self.num_joints)
104 | local_imgs = np.vstack([gt_local_img,pred_local_img])
105 | cv2.imwrite(self.vis_sample_dir+'/{}_normal.jpg'.format(str(iteration)),local_imgs)
106 |
107 | if iteration % self.opt.eval_interval == 0 or iteration == 1:
108 |
109 | self.logger.info('Start evaluation!')
110 | model.eval()
111 | l1, l2, all_meta, vis_pose, vis_meta, vis_gt = self.eval_batch(model)
112 | write_json(l1,self.test_result_dir+'/iteration{}_L1.json'.format(str(iteration)))
113 | write_json(l2,self.test_result_dir+'/iteration{}_L2.json'.format(str(iteration)))
114 | write_json(all_meta,self.test_result_dir+'/iteration{}_meta.json'.format(str(iteration)))
115 |
116 | model.train()
117 | torch.save(model.state_dict(), self.model_save_dir+'/{:06d}_model.pth.tar'.format(iteration))
118 |
119 | return iteration
120 |
121 | def eval_batch(self,model):
122 | # Set to evaluation mode (randomly sample z from the whole distribution)
123 | all_err_l1 = []
124 | all_err_l2 = []
125 | all_meta = []
126 | vis_pose = []
127 | vis_meta = []
128 | vis_gt = []
129 |
130 | with torch.no_grad():
131 | for i,input_dict in enumerate(tqdm(self.test_loader)):
132 | #input = input_dict['input_sequence'].float().cuda()
133 | weigths = input_dict['weigths'].float().cuda()
134 | pose = input_dict['pose'].float().cuda()
135 | gt = input_dict['gt'].float()
136 | spatial_token = input_dict['spatial_token'].long().cuda()
137 | temporal_token = input_dict['temporal_token'].long().cuda()
138 | meta = input_dict['meta']
139 | output = model(pose,spatial_token,temporal_token)
140 | err_l1 = L1_err(output.cpu(),gt)
141 | err_l2 = L2_err(output.cpu(),gt)
142 |
143 | all_err_l1.extend(err_l1)
144 | all_err_l2.extend(err_l2)
145 | all_meta.extend(meta)
146 |
147 | L1_auc, L1_norm_auc = compute_auc(all_err_l1,all_err_l1,all_meta,0, 0,self.dataset_name)
148 | L2_auc, L2_norm_auc = compute_auc(all_err_l2,all_err_l2,all_meta,0, 0,self.dataset_name)
149 | self.logger.info('Best AUC under L1 Err: {}'.format(str(round(L1_auc,4)*100)))
150 | self.logger.info('Best AUC under L2 Err: {}'.format(str(round(L2_auc,4)*100)))
151 | self.logger.info('Best AUC under Norm L1 Err: {}'.format(str(round(L1_norm_auc,4)*100)))
152 | self.logger.info('Best AUC under Norm L2 Err: {}'.format(str(round(L2_norm_auc,4)*100)))
153 |
154 | vis_pose_np = np.array(vis_pose)
155 | vis_gt_np = np.array(vis_gt)
156 |
157 | return all_err_l1,all_err_l2,all_meta, vis_pose_np, vis_meta, vis_gt_np
158 |
159 |
160 | def inference(self):
161 |
162 | model = MoPRL(tracklet_len=self.opt.tracklet_len,headless=self.opt.headless,pre_len=opt.pre_len,embed_dim=self.opt.embed_dim,
163 | spatial_depth=self.opt.spatial_depth, temporal_depth=self.opt.temporal_depth, num_joints=self.num_joints).cuda()
164 | model.load_state_dict(torch.load(self.opt.model_path))
165 |
166 | # ToDo
167 | self.logger.info('Start evaluation!')
168 | model.eval()
169 |
170 | all_err_l1 = []
171 | all_err_l2 = []
172 | all_err_score = []
173 | all_meta = []
174 | out_pose = []
175 |
176 | with torch.no_grad():
177 | for i,input_dict in enumerate(tqdm(self.test_loader)):
178 |
179 | weigths = input_dict['weigths'].float().cuda()
180 | pose = input_dict['pose'].float().cuda()
181 | gt = input_dict['gt'].float()
182 | spatial_token = input_dict['spatial_token'].long().cuda()
183 | temporal_token = input_dict['temporal_token'].long().cuda()
184 | meta = input_dict['meta']
185 | output = model(pose,spatial_token,temporal_token)
186 |
187 | err_l1 = L1_err(output.cpu(),gt)
188 | err_l2 = L2_err(output.cpu(),gt)
189 | all_err_l1.extend(err_l1)
190 | all_err_l2.extend(err_l2)
191 | all_meta.extend(meta)
192 | out_pose.extend(output.cpu().numpy())
193 |
194 | L1_auc, L1_norm_auc = compute_auc(all_err_l1,all_err_l1,all_meta,0, 0,self.dataset_name)
195 | L2_auc, L2_norm_auc = compute_auc(all_err_l2,all_err_l2,all_meta,0, 0,self.dataset_name)
196 | self.logger.info('Best AUC under L1 Err: {}'.format(str(round(L1_auc,4)*100)))
197 | self.logger.info('Best AUC under L2 Err: {}'.format(str(round(L2_auc,4)*100)))
198 | self.logger.info('Best AUC under Norm L1 Err: {}'.format(str(round(L1_norm_auc,4)*100)))
199 | self.logger.info('Best AUC under Norm L2 Err: {}'.format(str(round(L2_norm_auc,4)*100)))
200 |
201 |
202 | def train_eval(self):
203 |
204 | gpu_ids = range(torch.cuda.device_count())
205 | self.logger.info('Number of GPUs in use {}'.format(gpu_ids))
206 |
207 |
208 | model = MoPRL(tracklet_len=self.opt.tracklet_len,headless=self.opt.headless,pre_len=opt.pre_len,embed_dim=self.opt.embed_dim,
209 | spatial_depth=self.opt.spatial_depth, temporal_depth=self.opt.temporal_depth, num_joints=self.num_joints).cuda()
210 |
211 | total_steps = len(self.train_loader)*self.opt.epochs
212 | optimizer = AdamW(model.parameters(), lr=opt.lr_rate, eps = 1e-8)
213 | scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = 1000, num_training_steps = total_steps)
214 |
215 | self.logger.info(self.jobname)
216 |
217 | iteration = 0
218 | if self.load_pretrain_model:
219 | model_name = self.model_save_dir + '/{:06d}_model.pth.tar'.format(self.iter_to_load)
220 | self.logger.info("loading model from {}".format(model_name))
221 | state_dict = torch.load(model_name)
222 | model.load_state_dict(state_dict['model'])
223 | optimizer.load_state_dict(state_dict['optimizer'])
224 | iteration = self.iter_to_load + 1
225 |
226 | tmp = sum(p.numel() for p in model.parameters())
227 |
228 | self.logger.info('model paras sum: {}'.format(tmp))
229 |
230 | self.logger.info('Start Training!')
231 |
232 | for epoch in range(self.opt.epochs):
233 |
234 | model.train()
235 | iteration = self.train_batch(model,optimizer,epoch,iteration,scheduler=scheduler)
236 |
237 | self.logger.info('End Training!')
238 |
239 | if __name__ == '__main__':
240 |
241 | opt = parse_opts()
242 | print (opt)
243 |
244 | os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.gpu)
245 |
246 | torch.manual_seed(opt.seed)
247 | torch.cuda.manual_seed(opt.seed)
248 | torch.cuda.manual_seed_all(opt.seed)
249 | np.random.seed(opt.seed)
250 | torch.backends.cudnn.benchmark = False
251 | torch.backends.cudnn.deterministic = True
252 | random.seed(opt.seed)
253 |
254 | pipeline = Train_Eval_Inference(opt)
255 |
256 | if opt.inference:
257 | pipeline.inference()
258 | else:
259 | pipeline.train_eval()
--------------------------------------------------------------------------------
/src/datasets/Corridor.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 | import torch
4 | import copy
5 | import numpy as np
6 | from tqdm import tqdm
7 | from torch.utils.data import Dataset
8 | import glob
9 |
10 | import sys
11 | sys.path.append("../")
12 | from utils.load_save import load_json, write_json
13 | from utils.normalize import *
14 |
15 | POSE_META_FILE = 'pose_meta_{}_length{}_stride{}.json'
16 | POSE_DATA_FILE = 'pose_data_{}_length{}_stride{}.json'
17 |
18 | class Corridor(Dataset):
19 | def __init__(self, pose_dir, split='train', tracklet_len=8 , stride=2, pre_len=1,
20 | normalize_tracklet=True, normalize_score=True, normalize_pose=True,
21 | embed_dim=128,
22 | mask_rec=True,fusion_type='div',motion_type='rayleigh',mask_pro=0.15):
23 |
24 | self.pose_dir = pose_dir
25 | self.split = split
26 | self.tracklet_len = tracklet_len
27 | self.stride = stride
28 | self.frame_width = 1920
29 | self.frame_height = 1080
30 | self.scale_factor = 100
31 | self.mask_rec = mask_rec
32 | self.fusion_type = fusion_type #'none' #fusion_type
33 | self.motion_type = motion_type
34 | self.mask_pro = mask_pro
35 | self.pre_len = pre_len
36 |
37 | self.joints_num = 25
38 | self.type_token, self.spatial_token, self.temporal_token = self._gen_fixed_token_seq()
39 | self.meta_path = pose_dir + '/' + POSE_META_FILE.format(self.split,str(self.tracklet_len),str(self.stride))
40 | self.tracklet_path = pose_dir + '/' + POSE_DATA_FILE.format(self.split,str(self.tracklet_len),str(self.stride))
41 |
42 | self.normalize_tracklet = True # normalize_boxes
43 | self.normalize_score = normalize_score
44 | self.normalize_pose = True # False # normalize_pose
45 |
46 | self._load_tracklets()
47 |
48 | def __len__(self):
49 | return len(self.meta_data)
50 |
51 | def _gen_fixed_token_seq(self):
52 |
53 | type_token = []
54 | spatial_token = []
55 | temporal_token = []
56 | single_type_tok = [0,0] + [1 for n in range(self.joints_num)]
57 |
58 | for i in range(self.tracklet_len):
59 | type_token.extend(single_type_tok)
60 | for j in range(self.joints_num):
61 | spatial_token.append(j)
62 | temporal_token.append(i)
63 |
64 | return torch.tensor(type_token), torch.tensor(spatial_token), torch.tensor(temporal_token)
65 |
66 | def _load_tracklets(self):
67 |
68 | if os.path.exists(self.tracklet_path) and os.path.exists(self.meta_path):
69 | print('Load {} Traclets from saved files, Traclet Length {}, Stride {}'.format(self.split, self.tracklet_len, self.stride))
70 | self.meta_data, self.tracklet_data = self._lazy_load_tracklets()
71 | else:
72 | print('Load {} Traclets from scratch, Traclet Length {}, Stride {}'.format(self.split, self.tracklet_len, self.stride))
73 | self.meta_data, self.tracklet_data = self._scratch_load_tracklets()
74 |
75 | def _lazy_load_tracklets(self):
76 |
77 | return load_json(self.meta_path), load_json(self.tracklet_path)
78 |
79 | def _scratch_load_tracklets(self):
80 |
81 | meta_data = []
82 | tracklet_data = []
83 | base_dir = self.pose_dir+'/'+self.split+'/'
84 | all_npy = glob.glob(os.path.join(base_dir, "*.npy"))
85 | logging.info('Processing raw traclets')
86 | filter_less_than = self.tracklet_len * self.stride
87 |
88 | for file in tqdm(all_npy):
89 |
90 | track = np.load(file)
91 | video_id, pid = file.split('/')[-1].split('_')
92 | if len(track)1e-10, y>1e-10)
146 | no_valid = np.logical_and(x<1e-10, y<1e-10)
147 | valid_sum = valid.sum(axis=1)
148 |
149 | valid_sum[valid_sum < 1] = 1
150 |
151 |
152 | x_mean = (x.sum(axis=1)/valid_sum)[:, np.newaxis]
153 | y_mean = (y.sum(axis=1)/valid_sum)[:, np.newaxis]
154 |
155 | tmp = np.zeros_like(x)
156 | tmp[no_valid] = 1
157 |
158 | x += tmp*x_mean
159 | y += tmp*y_mean
160 |
161 | if normalize:
162 | x, y = normalize_pose(x,y)
163 |
164 | if isinstance(x,list):
165 | x, y = np.array(x), np.array(y)
166 |
167 | x[no_valid] = 0
168 | y[no_valid] = 0
169 |
170 | x = np.expand_dims(x,-1)
171 | y = np.expand_dims(y,-1)
172 | pose = np.concatenate((x,y),axis=-1).tolist()
173 |
174 | # (T,17,2)
175 |
176 | return pose
177 |
178 | def _extract_poses_boxes(self,tracklet,normalize_boxes=True,normalize_poses=False):
179 |
180 | if isinstance(tracklet,list):
181 | tracklet = np.array(tracklet)
182 | x = tracklet[:, :25]
183 | y = tracklet[:, 25:50]
184 |
185 | valid = np.logical_or(x>1e-10, y>1e-10)
186 | no_valid = np.logical_and(x<1e-10, y<1e-10)
187 | valid_sum = valid.sum(axis=1)
188 |
189 | valid_sum[valid_sum < 1] = 1
190 |
191 |
192 | x_mean = (x.sum(axis=1)/valid_sum)[:, np.newaxis]
193 | y_mean = (y.sum(axis=1)/valid_sum)[:, np.newaxis]
194 |
195 | tmp = np.zeros_like(x)
196 | tmp[no_valid] = 1
197 |
198 | x += tmp*x_mean
199 | y += tmp*y_mean
200 |
201 | if normalize_boxes:
202 | box_xy_max = [[x[i, :].max()/self.frame_width,y[i, :].max()/self.frame_height] for i in range(len(x))]
203 | box_xy_min = [[x[i, :].min()/self.frame_width,y[i, :].min()/self.frame_height] for i in range(len(y))]
204 | else:
205 | box_xy_max = [[x[i, :].max(),y[i, :].max()] for i in range(len(x))]
206 | box_xy_min = [[x[i, :].min(),y[i, :].min()] for i in range(len(y))]
207 |
208 | if normalize_poses:
209 | x, y = normalize_pose(x,y)
210 | else:
211 | x, y = center_pose(x, y)
212 |
213 | if isinstance(x,list):
214 | x, y = np.array(x), np.array(y)
215 |
216 | x[no_valid] = 0
217 | y[no_valid] = 0
218 |
219 | # w = np.abs(np.array(box_xy_max)[:,0] - np.array(box_xy_min)[:,0]).max()
220 | # h = np.abs(np.array(box_xy_max)[:,1] - np.array(box_xy_min)[:,1]).max()
221 | # x = x*w
222 | # y = y*h
223 |
224 | x = np.expand_dims(x,-1)
225 | y = np.expand_dims(y,-1)
226 | pose = np.concatenate((x,y),axis=-1).tolist()
227 |
228 | # (T,17,2)
229 |
230 | return pose, box_xy_max, box_xy_min
231 |
232 | def _inters_factor(self, v):
233 |
234 | if self.motion_type == 'gaussian':
235 |
236 | sigma = 0.18917838310469845
237 | mu = 0.09870275102403338
238 | factor = np.exp(-(np.linalg.norm(v-mu))**2/(2*(sigma**2)))
239 |
240 | if self.motion_type == 'rayleigh':
241 |
242 | sigma = 0.0202
243 | con = 0.0142
244 | factor = v * np.exp(-(v**2)/(2*(sigma**2))) / con
245 |
246 | if self.motion_type == 'uniform':
247 |
248 | factor = 0.5
249 |
250 | if self.motion_type == 'none':
251 |
252 | factor = 1
253 |
254 | return factor*0.7 + 0.3 # avoid zero
255 |
256 | def merge(self,pose,factor):
257 |
258 | if self.fusion_type == 'div':
259 | return pose / factor
260 | if self.fusion_type == 'add':
261 | return pose + factor
262 | if self.fusion_type == 'mul':
263 | return pose * factor
264 |
265 | def _gen_rec_mask(self,mask,prob=0.15):
266 |
267 | ref = torch.ones_like(torch.tensor(mask))
268 | masked_indices = torch.bernoulli(torch.full(ref.shape, prob)).bool()
269 | ref[masked_indices] = 0
270 |
271 | return ref.tolist()
272 |
273 | def _flat_input(self,poses, boxes_max, boxes_min, scores):
274 |
275 | assert len(poses) == len(boxes_max)
276 | assert len(boxes_max) == len(boxes_min)
277 | assert len(poses) == len(scores)
278 |
279 | pose_fusion = []
280 |
281 | weights = []
282 | inters = []
283 | poses_np = np.array(poses)
284 | boxes_max_np = np.array(boxes_max)
285 | boxes_min_np = np.array(boxes_min)
286 |
287 | for i in range(len(poses_np)-1):
288 | v = np.linalg.norm((boxes_max_np[i] + boxes_min_np[i])/2 - (boxes_max_np[(i+1)] + boxes_min_np[(i+1)])/2)
289 | v_norm = v/((boxes_max_np[i] - boxes_min_np[i] + 1e-6).mean())
290 | inters.append(self._inters_factor(v_norm))
291 |
292 | inters.append(inters[len(poses_np)-2])
293 |
294 | # inters = [max(inters)] * len(poses_np)
295 |
296 | pose_fusion = self.merge(poses_np[0],inters[0])[np.newaxis, :, :]
297 | weights.extend(scores[0])
298 | ## begin
299 | for i in range(len(poses)-1):
300 | pose_fusion = np.concatenate((pose_fusion, (self.merge(poses_np[i+1],inters[i+1]))[np.newaxis, :, :]), axis = 0)
301 | weights.extend(scores[i+1])
302 |
303 | return weights, pose_fusion.tolist()
304 |
305 | def __getitem__(self, idx):
306 |
307 | meta = self.meta_data[idx]
308 | tracklet = self.tracklet_data[idx]
309 | # boxes_max, boxes_min = self._extract_boxes(tracklet,self.normalize_tracklet)
310 | scores = self._extract_conf_score(tracklet,self.normalize_score)
311 | # poses = self._extract_poses(tracklet,self.normalize_pose)
312 | poses, boxes_max, boxes_min = self._extract_poses_boxes(tracklet,self.normalize_tracklet,self.normalize_pose)
313 |
314 | poses_gt = copy.deepcopy(poses)
315 | weights, pose_fusion = self._flat_input(poses, boxes_max, boxes_min,scores)
316 | pose_fusion = (torch.tensor(pose_fusion)*self.scale_factor).to(torch.int32)
317 |
318 | weights = torch.tensor(weights)
319 | poses_gt = (torch.tensor(poses_gt)*self.scale_factor).to(torch.int32)
320 | gt = poses_gt.reshape(-1,2)
321 | #poses_gt = torch.chunk(poses_gt,self.tracklet_len,0)
322 | weights = torch.chunk(weights,self.tracklet_len,0)
323 |
324 | weights = torch.cat([weights[i] for i in range(len(weights))],dim=0)
325 | #gt = torch.cat([poses_gt[i] for i in range(len(poses_gt))],dim=0)
326 |
327 | spatial_token = self.spatial_token.reshape(self.tracklet_len,-1)
328 | temporal_token = self.temporal_token.reshape(self.tracklet_len,-1)
329 |
330 | if self.mask_rec:
331 | mask = [1 for i in range((self.tracklet_len)*(self.joints_num))]
332 | mask = self._gen_rec_mask(mask)
333 | mask = torch.tensor(mask)
334 | #rint(mask)
335 | mask_ = torch.cat((mask.unsqueeze(0),mask.unsqueeze(0)),dim=0).permute(1,0)
336 | mask_index = mask_==0
337 | mask_index = mask_index.reshape(self.tracklet_len,self.joints_num,-1)
338 | pose_fusion[mask_index] = 0
339 |
340 | input_dict = {
341 | 'meta': meta,
342 | 'pose': pose_fusion,
343 | 'gt': gt,
344 | 'weigths': weights,
345 | 'spatial_token':spatial_token,
346 | 'temporal_token':temporal_token,
347 | 'frame_width':self.frame_width,
348 | 'frame_height':self.frame_height,
349 | 'scale_factor': self.scale_factor,
350 | 'joints_num':self.joints_num
351 | }
352 |
353 | return input_dict
354 |
355 | if __name__ == '__main__':
356 |
357 | from dataset_path import *
358 | import cv2
359 | from torch.utils.data import DataLoader
360 | sys.path.append(".")
361 | from utils.visualization import visualize_local_tracklets
362 | #from utils.metrics import recover_poses
363 |
364 | debug_Dataset = Corridor(pose_dir=Corridor_Pose_Dir,split='train',tracklet_len=8 , stride=2, pre_len=4)
365 | dataloader = DataLoader(debug_Dataset, batch_size=2, shuffle=True, num_workers=0)
366 | VIS = False
367 |
368 | for i, input_dict in enumerate(tqdm(dataloader)):
369 |
370 | print(input_dict['pose'].size())
371 | print(input_dict['weigths'].size())
372 | print(input_dict['gt'].size())
373 | print(input_dict['spatial_token'].size())
374 | print(input_dict['temporal_token'].size())
375 | print(input_dict['meta'])
376 |
377 | print("----------",i,"-------------")
378 |
379 | if i>10:
380 | break
381 |
--------------------------------------------------------------------------------
/src/datasets/ShanghaiTech_AlphaPose.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 | from sklearn.utils import shuffle
4 | import torch
5 | import copy
6 | import numpy as np
7 | from tqdm import tqdm
8 | from torch.utils.data import Dataset
9 | import random
10 |
11 | import sys
12 | sys.path.append("../../src")
13 | from utils.load_save import load_json, write_json
14 | from utils.normalize import normalize_score,normalize_pose
15 | from datasets.dataset_path import ShanghaiTech_Lable_Dir
16 |
17 | POSE_META_FILE = 'pose_meta_{}_length{}_stride{}.json'
18 | POSE_DATA_FILE = 'pose_data_{}_length{}_stride{}.json'
19 | SHT_LABEL = ShanghaiTech_Lable_Dir
20 |
21 | class ShanghaiTech_AlphaPose(Dataset):
22 | def __init__(self, pose_dir, split='train', tracklet_len=8 , stride=2, pre_len=1,head_less=False,
23 | normalize_tracklet=True, normalize_score=True,
24 | normalize_pose=True,embed_dim=128,
25 | mask_rec=True,fusion_type='div',motion_type='rayleigh', mask_pro=0.15, noise_factor = 0.1):
26 |
27 | self.pose_dir = pose_dir
28 | self.split = split
29 | self.head_less = head_less
30 | self.tracklet_len = tracklet_len
31 | self.stride = stride
32 | self.frame_width = 856
33 | self.frame_height = 480
34 | self.scale_factor = 100
35 | self.mask_rec = mask_rec
36 | self.fusion_type = fusion_type
37 | self.motion_type = motion_type
38 | self.mask_pro = mask_pro
39 | self.noise_factor = noise_factor
40 |
41 | if self.head_less:
42 | self.joints_num =14
43 | else:
44 | self.joints_num =17
45 |
46 | self.pre_len = pre_len
47 |
48 | self.type_token, self.spatial_token, self.temporal_token = self._gen_fixed_token_seq()
49 | self.meta_path = pose_dir + '/' + POSE_META_FILE.format(self.split,str(self.tracklet_len),str(self.stride))
50 | self.tracklet_path = pose_dir + '/' + POSE_DATA_FILE.format(self.split,str(self.tracklet_len),str(self.stride))
51 |
52 | self.normalize_tracklet = normalize_tracklet
53 | self.normalize_score = normalize_score
54 | self.normalize_pose = normalize_pose
55 |
56 | self._load_tracklets()
57 |
58 | if self.split == 'train':
59 | self.add_noise(self.noise_factor)
60 |
61 | def add_noise(self, noise_factor):
62 |
63 | print('adding noise')
64 | test_meta_path = self.pose_dir + '/' + POSE_META_FILE.format('test',str(self.tracklet_len),str(self.stride))
65 | test_tracklet_path = self.pose_dir + '/' + POSE_DATA_FILE.format('test',str(self.tracklet_len),str(self.stride))
66 | test_meta, test_tracklet = load_json(test_meta_path), load_json(test_tracklet_path)
67 | label_dic = {}
68 | abnormal_list = []
69 |
70 | label_filenames = os.listdir(SHT_LABEL)
71 | for filename in label_filenames:
72 | filepath = os.path.join(SHT_LABEL, filename)
73 | label_dic[filename[:-4]] = np.load(filepath)
74 |
75 | for i, name in enumerate(test_meta):
76 | main, sub ,frame = name.split('_')
77 | scene = main + '_' + sub
78 | frame = int(frame)
79 | if label_dic[scene][frame] == 1:
80 | abnormal_list.append(i)
81 |
82 | print('total normal tracklet data is {}'.format(len(self.meta_data)))
83 | print('total abnormal tracklet data is {}'.format(len(abnormal_list)))
84 | add_noise_index = random.sample(abnormal_list, int(len(abnormal_list)*noise_factor))
85 | print('adding abnormal tracklet data is {}'.format(len(add_noise_index)))
86 |
87 | # for i in range(1000):
88 | # main, sub ,frame = test_meta[add_noise_index[i]].split('_')
89 | # scene = main + '_' + sub
90 | # print(label_dic[scene][int(frame)])
91 |
92 | # sys.exit(0)
93 |
94 | for index in add_noise_index:
95 | self.meta_data.append(test_meta[index])
96 | self.tracklet_data.append(test_tracklet[index])
97 |
98 | del test_meta
99 | del test_tracklet
100 |
101 |
102 | def __len__(self):
103 | return len(self.meta_data)
104 |
105 | def _gen_fixed_token_seq(self):
106 |
107 | type_token = []
108 | spatial_token = []
109 | temporal_token = []
110 | single_type_tok = [0,0] + [1 for n in range(self.joints_num)]
111 |
112 | for i in range(self.tracklet_len):
113 | type_token.extend(single_type_tok)
114 | for j in range(self.joints_num):
115 | spatial_token.append(j)
116 | temporal_token.append(i)
117 |
118 | return torch.tensor(type_token), torch.tensor(spatial_token), torch.tensor(temporal_token)
119 |
120 | def _load_tracklets(self):
121 |
122 | if os.path.exists(self.tracklet_path) and os.path.exists(self.meta_path):
123 | print('Load {} Traclets from saved files, Traclet Length {}, Stride {}'.format(self.split, self.tracklet_len, self.stride))
124 | self.meta_data, self.tracklet_data = self._lazy_load_tracklets()
125 | else:
126 | print('Load {} Traclets from scratch, Traclet Length {}, Stride {}'.format(self.split, self.tracklet_len, self.stride))
127 | self.meta_data, self.tracklet_data = self._scratch_load_tracklets()
128 |
129 | def _lazy_load_tracklets(self):
130 |
131 | return load_json(self.meta_path), load_json(self.tracklet_path)
132 |
133 | def _scratch_load_tracklets(self):
134 |
135 | meta_data = []
136 | tracklet_data = []
137 | base_dir = self.pose_dir+'/'+self.split+'/tracked_person/'
138 | all_json = os.listdir(base_dir)
139 | logging.info('Processing raw traclets')
140 | filter_less_than = self.tracklet_len * self.stride
141 |
142 | for file in tqdm(all_json):
143 | scene_tracks = load_json(base_dir+file)
144 | person_num = len(scene_tracks.keys())
145 | for p in scene_tracks.keys():
146 | tracks = scene_tracks[p]
147 | frame_num = len(tracks.keys())
148 | if frame_num < filter_less_than:
149 | continue
150 |
151 | if self.split == 'train':
152 | if frame_num < filter_less_than*2:
153 | continue
154 |
155 | frame_index = list(sorted(tracks.keys()))
156 | for i in range(len(frame_index)-self.tracklet_len*self.stride):
157 | select_frame = frame_index[i : i+self.tracklet_len*self.stride : self.stride]
158 | simple_pose = [ np.around(np.array(tracks[f]['keypoints']),2).tolist() for f in select_frame ]
159 | meta_data.append(file.split('_')[0]+'_'+file.split('_')[1]+'_'+select_frame[-1])
160 | tracklet_data.append(simple_pose)
161 |
162 | print('Process Done. Sample amount: ', len(meta_data))
163 | write_json(meta_data,self.meta_path)
164 | print('Save meta data Done')
165 | write_json(tracklet_data,self.tracklet_path)
166 | print('Save data Done')
167 |
168 | return meta_data,tracklet_data
169 |
170 | # tracklet[8,17*3] (x,y,c)
171 |
172 | def _extract_boxes(self,tracklet,normalize=True):
173 |
174 | if normalize:
175 | box_xy_max = [[max(pose[::3])/self.frame_width,max(pose[1::3])/self.frame_height] for pose in tracklet]
176 | box_xy_min = [[min(pose[::3])/self.frame_width,min(pose[1::3])/self.frame_height] for pose in tracklet]
177 | else:
178 | box_xy_max = [[max(pose[::3]),max(pose[1::3])] for pose in tracklet]
179 | box_xy_min = [[min(pose[::3]),min(pose[1::3])] for pose in tracklet]
180 |
181 | return box_xy_max , box_xy_min
182 |
183 | def _extract_conf_score(self,tracklet,normalize=True):
184 |
185 | scores = []
186 | for pose in tracklet:
187 | pose_score = np.array(pose[2::3])
188 | if normalize:
189 | pose_score = normalize_score(pose_score)
190 | scores.append(pose_score.tolist())
191 |
192 | return scores
193 |
194 | def _extract_poses(self,tracklet,normalize=True):
195 |
196 | if isinstance(tracklet,list):
197 | tracklet = np.array(tracklet)
198 | x = tracklet[:, ::3]
199 | y = tracklet[:, 1::3]
200 |
201 | if normalize:
202 | x, y = normalize_pose(x,y)
203 |
204 | if isinstance(x,list):
205 | x, y = np.array(x), np.array(y)
206 |
207 | x = np.expand_dims(x,-1)
208 | y = np.expand_dims(y,-1)
209 | pose = np.concatenate((x,y),axis=-1).tolist()
210 |
211 | # (T,17,2)
212 |
213 | return pose
214 |
215 | def _inters_factor(self, v):
216 |
217 | if self.motion_type == 'gaussian':
218 |
219 | sigma = 0.18917838310469845
220 | mu = 0.09870275102403338
221 | factor = np.exp(-(np.linalg.norm(v-mu))**2/(2*(sigma**2)))
222 |
223 | if self.motion_type == 'rayleigh':
224 |
225 | if self.tracklet_len == 8 and self.stride == 2:
226 |
227 | sigma = 0.0464
228 | con = 0.0299
229 |
230 | factor = v * np.exp(-(v**2)/(2*(sigma**2))) / con
231 |
232 | if self.motion_type == 'uniform':
233 |
234 | factor = 0.5
235 |
236 | if self.motion_type == 'none':
237 |
238 | factor = 1
239 |
240 | if self.motion_type == 'random':
241 |
242 | factor = 1 + 0.5*np.random.rand()
243 |
244 | return factor*0.7 + 0.3
245 |
246 | def merge(self,pose,factor):
247 |
248 | if self.fusion_type == 'div':
249 | return pose / factor
250 | if self.fusion_type == 'add':
251 | return pose + factor
252 | if self.fusion_type == 'mul':
253 | return pose * factor
254 |
255 | def _gen_rec_mask(self,mask,prob=0.15):
256 |
257 | ref = torch.ones_like(torch.tensor(mask))
258 | masked_indices = torch.bernoulli(torch.full(ref.shape, prob)).bool()
259 | ref[masked_indices] = 0
260 |
261 | return ref.tolist()
262 |
263 | def _flat_input(self,poses, boxes_max, boxes_min, scores):
264 |
265 | assert len(poses) == len(boxes_max)
266 | assert len(boxes_max) == len(boxes_min)
267 | assert len(poses) == len(scores)
268 |
269 | pose_fusion = []
270 |
271 | weights = []
272 | inters = []
273 | poses_np = np.array(poses)
274 | boxes_max_np = np.array(boxes_max)
275 | boxes_min_np = np.array(boxes_min)
276 |
277 | for i in range(len(poses_np)-1):
278 | v = np.linalg.norm((boxes_max_np[i] + boxes_min_np[i])/2 - (boxes_max_np[(i+1)] + boxes_min_np[(i+1)])/2)
279 | v_norm = v/((boxes_max_np[i] - boxes_min_np[i]).mean())
280 | inters.append(self._inters_factor(v_norm))
281 |
282 | inters.append(inters[len(poses_np)-2])
283 | pose_fusion = self.merge(poses_np[0],inters[0])[np.newaxis, :, :]
284 | weights.extend(scores[0])
285 | ## begin
286 | for i in range(len(poses)-1):
287 | pose_fusion = np.concatenate((pose_fusion, (self.merge(poses_np[i+1],inters[i+1]))[np.newaxis, :, :]), axis = 0)
288 | weights.extend(scores[i+1])
289 |
290 | return weights, pose_fusion.tolist()
291 |
292 | def __getitem__(self, idx):
293 |
294 | meta = self.meta_data[idx]
295 | tracklet = self.tracklet_data[idx]
296 | boxes_max, boxes_min = self._extract_boxes(tracklet,self.normalize_tracklet)
297 | scores = self._extract_conf_score(tracklet,self.normalize_score)
298 | poses = self._extract_poses(tracklet,self.normalize_pose)
299 |
300 | poses_gt = copy.deepcopy(poses)
301 | weights, pose_fusion = self._flat_input(poses, boxes_max, boxes_min,scores)
302 | pose_fusion = (torch.tensor(pose_fusion)*self.scale_factor).to(torch.int32)
303 |
304 | weights = torch.tensor(weights)
305 | poses_gt = (torch.tensor(poses_gt)*self.scale_factor).to(torch.int32)
306 | gt = poses_gt.reshape(-1,2)
307 | #poses_gt = torch.chunk(poses_gt,self.tracklet_len,0)
308 | weights = torch.chunk(weights,self.tracklet_len,0)
309 |
310 | weights = torch.cat([weights[i] for i in range(len(weights))],dim=0)
311 | #gt = torch.cat([poses_gt[i] for i in range(len(poses_gt))],dim=0)
312 |
313 | spatial_token = self.spatial_token.reshape(self.tracklet_len,-1)
314 | temporal_token = self.temporal_token.reshape(self.tracklet_len,-1)
315 |
316 | if self.mask_rec and self.split=='train':
317 | mask = [1 for i in range((self.tracklet_len)*(self.joints_num))]
318 | mask = self._gen_rec_mask(mask,self.mask_pro)
319 | mask = torch.tensor(mask)
320 | #rint(mask)
321 | mask_ = torch.cat((mask.unsqueeze(0),mask.unsqueeze(0)),dim=0).permute(1,0)
322 | mask_index = mask_==0
323 | mask_index = mask_index.reshape(self.tracklet_len,self.joints_num,-1)
324 | pose_fusion[mask_index] = 0
325 |
326 | if self.pre_len>0 :
327 | mask = torch.tensor([1 for i in range((self.tracklet_len-self.pre_len)*(self.joints_num))] + [0 for i in range(self.joints_num*self.pre_len)])
328 | mask_ = torch.cat((mask.unsqueeze(0),mask.unsqueeze(0)),dim=0).permute(1,0)
329 | mask_ = mask_.reshape(self.tracklet_len, self.joints_num, 2)
330 | mask_index = mask_==0
331 | pose_fusion[mask_index] = 0
332 |
333 | input_dict = {
334 | 'meta': meta,
335 | 'pose': pose_fusion,
336 | 'gt': gt,
337 | 'weigths': weights,
338 | 'spatial_token':spatial_token,
339 | 'temporal_token':temporal_token,
340 | 'frame_width':self.frame_width,
341 | 'frame_height':self.frame_height,
342 | 'scale_factor': self.scale_factor,
343 | 'joints_num':self.joints_num
344 | }
345 |
346 | return input_dict
347 |
348 | if __name__ == '__main__':
349 |
350 | from dataset_path import *
351 | # import cv2
352 | from torch.utils.data import DataLoader
353 | sys.path.append(".")
354 | from utils.visualization import visualize_local_tracklets
355 | #from utils.metrics import recover_poses
356 |
357 | debug_Dataset = ShanghaiTech_AlphaPose(pose_dir=ShanghaiTech_AlphaPose_Dir,split='test',tracklet_len=8 , stride=2, head_less=False,pre_len=4)
358 | dataloader = DataLoader(debug_Dataset, batch_size=2, shuffle=True, num_workers=0)
359 | VIS = False
360 |
361 | for i, input_dict in enumerate(tqdm(dataloader)):
362 |
363 | # print(input_dict['MPP_GT'].size())
364 | # print(input_dict['MPR_GT'].size())
365 | #print(input_dict['pose'])
366 | print(input_dict['spatial_token'].size())
367 | print(input_dict['temporal_token'].size())
368 | print(input_dict['meta'])
369 | #recovered_poses = recover_poses(input_dict['MPP_GT'],input_dict['MTP_GT'],'ShanghaiTech')
370 | #print('recovered_poses',recovered_poses.shape)
371 |
372 | print("----------",i,"-------------")
373 |
374 | if i>10:
375 | break
376 |
377 |
378 |
--------------------------------------------------------------------------------
/src/datasets/UCF_crime.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 | from sklearn.utils import shuffle
4 | import torch
5 | import copy
6 | import numpy as np
7 | from tqdm import tqdm
8 | from torch.utils.data import Dataset
9 | import random
10 |
11 | import sys
12 | sys.path.append("../../src")
13 | from utils.load_save import load_json, write_json
14 | from utils.normalize import normalize_score,normalize_pose
15 |
16 | POSE_META_FILE = 'processed/pose_meta_{}_length{}_stride{}.json'
17 | POSE_DATA_FILE = 'processed/pose_data_{}_length{}_stride{}.json'
18 |
19 | class UCF_crime(Dataset):
20 | def __init__(self, pose_dir, split='train', tracklet_len=8, stride=1, pre_len=0, head_less=False,
21 | normalize_tracklet=True, normalize_score=True,
22 | normalize_pose=True, embed_dim=128,
23 | mask_rec=True,fusion_type='div',motion_type='rayleigh', mask_pro=0.15):
24 |
25 | self.pose_dir = pose_dir
26 | self.split = split
27 | self.head_less = head_less
28 | self.tracklet_len = tracklet_len
29 | self.stride = stride
30 | ## TO DO
31 | self.frame_width = 320
32 | self.frame_height = 240
33 | ## TO DO END
34 | self.scale_factor = 100
35 | self.mask_rec = mask_rec
36 | self.fusion_type = fusion_type
37 | self.motion_type = motion_type
38 | self.mask_pro = mask_pro
39 |
40 | if self.head_less:
41 | self.joints_num =14
42 | else:
43 | self.joints_num =17
44 |
45 | self.pre_len = pre_len
46 |
47 | self.type_token, self.spatial_token, self.temporal_token = self._gen_fixed_token_seq()
48 | self.meta_path = os.path.join(pose_dir, POSE_META_FILE.format(self.split,str(self.tracklet_len),str(self.stride)))
49 | self.tracklet_path = os.path.join(pose_dir, POSE_DATA_FILE.format(self.split,str(self.tracklet_len),str(self.stride)))
50 |
51 | self.propossed_path = os.path.dirname(self.meta_path)
52 | os.makedirs(self.propossed_path, exist_ok=True)
53 |
54 | self.normalize_tracklet = normalize_tracklet
55 | self.normalize_score = normalize_score
56 | self.normalize_pose = normalize_pose
57 |
58 | self._load_tracklets()
59 | print('dataset length: {}'.format(self.__len__()))
60 |
61 | def __len__(self):
62 | return len(self.meta_data)
63 |
64 | def _gen_fixed_token_seq(self):
65 |
66 | type_token = []
67 | spatial_token = []
68 | temporal_token = []
69 | single_type_tok = [0,0] + [1 for n in range(self.joints_num)]
70 |
71 | for i in range(self.tracklet_len):
72 | type_token.extend(single_type_tok)
73 | for j in range(self.joints_num):
74 | spatial_token.append(j)
75 | temporal_token.append(i)
76 |
77 | return torch.tensor(type_token), torch.tensor(spatial_token), torch.tensor(temporal_token)
78 |
79 | def _load_tracklets(self):
80 |
81 | if os.path.exists(self.tracklet_path) and os.path.exists(self.meta_path):
82 | print('Load {} Traclets from saved files, Traclet Length {}, Stride {}'.format(self.split, self.tracklet_len, self.stride))
83 | self.meta_data, self.tracklet_data = self._lazy_load_tracklets()
84 | else:
85 | print('Load {} Traclets from scratch, Traclet Length {}, Stride {}'.format(self.split, self.tracklet_len, self.stride))
86 | self.meta_data, self.tracklet_data = self._scratch_load_tracklets()
87 |
88 | def _lazy_load_tracklets(self):
89 |
90 | return load_json(self.meta_path), load_json(self.tracklet_path)
91 |
92 | ## TO DO
93 |
94 | def _scratch_load_tracklets(self):
95 |
96 | meta_data = []
97 | tracklet_data = []
98 |
99 | split_info = load_json(os.path.join(self.pose_dir, 'train_test_split.json'))
100 | name_list = split_info[self.split]
101 |
102 | base_dir = os.path.join(self.pose_dir, 'ucf_samples')
103 | logging.info('Processing raw traclets')
104 | filter_less_than = self.tracklet_len * self.stride
105 |
106 | for name in tqdm(name_list):
107 | filepath = os.path.join(base_dir, name[:-4], 'alphapose-results.json')
108 | if not os.path.exists(filepath):
109 | continue
110 | origin_tracks = load_json(filepath)
111 | person_tracks_frame = {}
112 | person_tracks_pose = {}
113 | person_tracks_frame_exist = {}
114 | person_frame_for_search = {}
115 | for detected in origin_tracks:
116 | if detected['idx'] not in person_tracks_frame.keys():
117 | person_frame_for_search[detected['idx']] = []
118 | person_tracks_frame[detected['idx']] = [None]*1000000
119 | person_tracks_pose[detected['idx']] = [None]*1000000
120 | person_tracks_frame_exist[detected['idx']] = np.zeros(1000000, dtype=bool)
121 | else:
122 | person_frame_for_search[detected['idx']].append(int(detected['image_id'][:-4]))
123 | person_tracks_frame[detected['idx']][int(detected['image_id'][:-4])] = detected['image_id'][:-4].rjust(4, '0')
124 | person_tracks_pose[detected['idx']][int(detected['image_id'][:-4])] = detected['keypoints']
125 | person_tracks_frame_exist[detected['idx']][int(detected['image_id'][:-4])] = True
126 |
127 | # person_num = len(person_tracks.keys())
128 | for p in person_frame_for_search.keys():
129 | frame_num = len(person_frame_for_search[p])
130 | if frame_num < filter_less_than:
131 | continue
132 |
133 | if self.split == 'train':
134 | if frame_num < filter_less_than*2:
135 | continue
136 |
137 | ### version1
138 | # for i in range(frame_num-self.tracklet_len*self.stride):
139 | # simple_pose = person_tracks_pose[p][i : i+self.tracklet_len*self.stride : self.stride]
140 | # meta_data.append(name+'_'+person_tracks_frame[p][i+(self.tracklet_len-1)*self.stride])
141 | # tracklet_data.append(simple_pose)
142 |
143 | ### version2
144 | for j in range(frame_num):
145 | i = int(person_frame_for_search[p][j])
146 | if np.all(person_tracks_frame_exist[p][i : i+self.tracklet_len*self.stride : self.stride]):
147 | simple_pose = person_tracks_pose[p][i : i+self.tracklet_len*self.stride : self.stride]
148 | meta_data.append(name+'_'+person_tracks_frame[p][i+(self.tracklet_len-1)*self.stride])
149 | tracklet_data.append(simple_pose)
150 |
151 | print('Process Done. Sample amount: ', len(meta_data))
152 | write_json(meta_data,self.meta_path)
153 | print('Save meta data Done')
154 | write_json(tracklet_data,self.tracklet_path)
155 | print('Save data Done')
156 |
157 | return meta_data,tracklet_data
158 |
159 | ## TO DO END
160 |
161 | # tracklet[8,17*3] (x,y,c)
162 |
163 | def _extract_boxes(self,tracklet,normalize=True):
164 |
165 | if normalize:
166 | box_xy_max = [[max(pose[::3])/self.frame_width,max(pose[1::3])/self.frame_height] for pose in tracklet]
167 | box_xy_min = [[min(pose[::3])/self.frame_width,min(pose[1::3])/self.frame_height] for pose in tracklet]
168 | else:
169 | box_xy_max = [[max(pose[::3]),max(pose[1::3])] for pose in tracklet]
170 | box_xy_min = [[min(pose[::3]),min(pose[1::3])] for pose in tracklet]
171 |
172 | return box_xy_max , box_xy_min
173 |
174 | def _extract_conf_score(self,tracklet,normalize=True):
175 |
176 | scores = []
177 | for pose in tracklet:
178 | pose_score = np.array(pose[2::3])
179 | if normalize:
180 | pose_score = normalize_score(pose_score)
181 | scores.append(pose_score.tolist())
182 |
183 | return scores
184 |
185 | def _extract_poses(self,tracklet,normalize=True):
186 |
187 | if isinstance(tracklet,list):
188 | tracklet = np.array(tracklet)
189 | x = tracklet[:, ::3]
190 | y = tracklet[:, 1::3]
191 |
192 | if normalize:
193 | x, y = normalize_pose(x,y)
194 |
195 | if isinstance(x,list):
196 | x, y = np.array(x), np.array(y)
197 |
198 | x = np.expand_dims(x,-1)
199 | y = np.expand_dims(y,-1)
200 | pose = np.concatenate((x,y),axis=-1).tolist()
201 |
202 | # (T,17,2)
203 |
204 | return pose
205 |
206 | ## TO DO
207 |
208 | def _inters_factor(self, v):
209 |
210 | if self.motion_type == 'gaussian':
211 |
212 | # sigma = 0.18917838310469845
213 | # mu = 0.09870275102403338
214 | # factor = np.exp(-(np.linalg.norm(v-mu))**2/(2*(sigma**2)))
215 | pass
216 |
217 | if self.motion_type == 'rayleigh':
218 |
219 | if self.stride == 1:
220 |
221 | sigma = 0.008
222 | con = 0.0048
223 |
224 | if self.stride == 2:
225 |
226 | sigma = 0.009
227 | con = 0.0055
228 |
229 | factor = v * np.exp(-(v**2)/(2*(sigma**2))) / con
230 |
231 | if self.motion_type == 'uniform':
232 |
233 | factor = 0.5
234 |
235 | if self.motion_type == 'none':
236 |
237 | factor = 1
238 |
239 | if self.motion_type == 'random':
240 |
241 | factor = 1 + 0.5*np.random.rand()
242 |
243 | return factor*0.7 + 0.3
244 |
245 | ## TO DO END
246 |
247 | def merge(self,pose,factor):
248 |
249 | if self.fusion_type == 'div':
250 | return pose / factor
251 | if self.fusion_type == 'add':
252 | return pose + factor
253 | if self.fusion_type == 'mul':
254 | return pose * factor
255 |
256 | def _gen_rec_mask(self,mask,prob=0.15):
257 |
258 | ref = torch.ones_like(torch.tensor(mask))
259 | masked_indices = torch.bernoulli(torch.full(ref.shape, prob)).bool()
260 | ref[masked_indices] = 0
261 |
262 | return ref.tolist()
263 |
264 | def _flat_input(self,poses, boxes_max, boxes_min, scores):
265 |
266 | assert len(poses) == len(boxes_max)
267 | assert len(boxes_max) == len(boxes_min)
268 | assert len(poses) == len(scores)
269 |
270 | pose_fusion = []
271 |
272 | weights = []
273 | inters = []
274 | poses_np = np.array(poses)
275 | boxes_max_np = np.array(boxes_max)
276 | boxes_min_np = np.array(boxes_min)
277 |
278 | for i in range(len(poses_np)-1):
279 | v = np.linalg.norm((boxes_max_np[i] + boxes_min_np[i])/2 - (boxes_max_np[(i+1)] + boxes_min_np[(i+1)])/2)
280 | v_norm = v/((boxes_max_np[i] - boxes_min_np[i]).mean())
281 | inters.append(self._inters_factor(v_norm))
282 |
283 | inters.append(inters[len(poses_np)-2])
284 | pose_fusion = self.merge(poses_np[0],inters[0])[np.newaxis, :, :]
285 | weights.extend(scores[0])
286 | ## begin
287 | for i in range(len(poses)-1):
288 | pose_fusion = np.concatenate((pose_fusion, (self.merge(poses_np[i+1],inters[i+1]))[np.newaxis, :, :]), axis = 0)
289 | weights.extend(scores[i+1])
290 |
291 | return weights, pose_fusion.tolist()
292 |
293 | def __getitem__(self, idx):
294 |
295 | meta = self.meta_data[idx]
296 | tracklet = self.tracklet_data[idx]
297 | boxes_max, boxes_min = self._extract_boxes(tracklet,self.normalize_tracklet)
298 | scores = self._extract_conf_score(tracklet,self.normalize_score)
299 | poses = self._extract_poses(tracklet,self.normalize_pose)
300 |
301 | poses_gt = copy.deepcopy(poses)
302 | weights, pose_fusion = self._flat_input(poses, boxes_max, boxes_min,scores)
303 | pose_fusion = (torch.tensor(pose_fusion)*self.scale_factor).to(torch.int32)
304 |
305 | weights = torch.tensor(weights)
306 | poses_gt = (torch.tensor(poses_gt)*self.scale_factor).to(torch.int32)
307 | gt = poses_gt.reshape(-1,2)
308 | #poses_gt = torch.chunk(poses_gt,self.tracklet_len,0)
309 | weights = torch.chunk(weights,self.tracklet_len,0)
310 |
311 | weights = torch.cat([weights[i] for i in range(len(weights))],dim=0)
312 | #gt = torch.cat([poses_gt[i] for i in range(len(poses_gt))],dim=0)
313 |
314 | spatial_token = self.spatial_token.reshape(self.tracklet_len,-1)
315 | temporal_token = self.temporal_token.reshape(self.tracklet_len,-1)
316 |
317 | if self.mask_rec and self.split=='train':
318 | mask = [1 for i in range((self.tracklet_len)*(self.joints_num))]
319 | mask = self._gen_rec_mask(mask,self.mask_pro)
320 | mask = torch.tensor(mask)
321 | #rint(mask)
322 | mask_ = torch.cat((mask.unsqueeze(0),mask.unsqueeze(0)),dim=0).permute(1,0)
323 | mask_index = mask_==0
324 | mask_index = mask_index.reshape(self.tracklet_len,self.joints_num,-1)
325 | pose_fusion[mask_index] = 0
326 |
327 | if self.pre_len>0 :
328 | mask = torch.tensor([1 for i in range((self.tracklet_len-self.pre_len)*(self.joints_num))] + [0 for i in range(self.joints_num*self.pre_len)])
329 | mask_ = torch.cat((mask.unsqueeze(0),mask.unsqueeze(0)),dim=0).permute(1,0)
330 | mask_ = mask_.reshape(self.tracklet_len, self.joints_num, 2)
331 | mask_index = mask_==0
332 | pose_fusion[mask_index] = 0
333 |
334 | input_dict = {
335 | 'meta': meta,
336 | 'pose': pose_fusion,
337 | 'gt': gt,
338 | 'weigths': weights,
339 | 'spatial_token':spatial_token,
340 | 'temporal_token':temporal_token,
341 | 'frame_width':self.frame_width,
342 | 'frame_height':self.frame_height,
343 | 'scale_factor': self.scale_factor,
344 | 'joints_num':self.joints_num
345 | }
346 |
347 | return input_dict
348 |
349 | if __name__ == '__main__':
350 |
351 | from dataset_path import *
352 | # import cv2
353 | from torch.utils.data import DataLoader
354 | sys.path.append(".")
355 | from utils.visualization import visualize_local_tracklets
356 | #from utils.metrics import recover_poses
357 |
358 | debug_Dataset = UCF_crime(pose_dir=UCF_crime_Dir,split='test',tracklet_len=8 , stride=2, head_less=False,pre_len=4)
359 | dataloader = DataLoader(debug_Dataset, batch_size=2, shuffle=True, num_workers=0)
360 | VIS = False
361 |
362 | for i, input_dict in enumerate(tqdm(dataloader)):
363 |
364 | # print(input_dict['MPP_GT'].size())
365 | # print(input_dict['MPR_GT'].size())
366 | #print(input_dict['pose'])
367 | print(input_dict['spatial_token'].size())
368 | print(input_dict['temporal_token'].size())
369 | print(input_dict['meta'])
370 | #recovered_poses = recover_poses(input_dict['MPP_GT'],input_dict['MTP_GT'],'ShanghaiTech')
371 | #print('recovered_poses',recovered_poses.shape)
372 |
373 | print("----------",i,"-------------")
374 |
375 | if i>10:
376 | break
377 |
378 |
379 |
--------------------------------------------------------------------------------