├── src ├── datasets │ ├── __init__.py │ ├── .DS_Store │ ├── dataset_path.py │ ├── datasets.py │ ├── Corridor.py │ ├── ShanghaiTech_AlphaPose.py │ └── UCF_crime.py ├── models │ ├── .DS_Store │ └── moprl.py ├── utils │ ├── .DS_Store │ ├── logger.py │ ├── load_save.py │ ├── normalize.py │ ├── visualization.py │ ├── metrics.py │ └── losses.py ├── train.sh ├── opts.py └── train_test.py ├── assets ├── fig1.png └── fig2.png ├── README.md └── env.yaml /src/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assets/fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yui010206/MoPRL/HEAD/assets/fig1.png -------------------------------------------------------------------------------- /assets/fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yui010206/MoPRL/HEAD/assets/fig2.png -------------------------------------------------------------------------------- /src/models/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yui010206/MoPRL/HEAD/src/models/.DS_Store -------------------------------------------------------------------------------- /src/utils/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yui010206/MoPRL/HEAD/src/utils/.DS_Store -------------------------------------------------------------------------------- /src/datasets/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yui010206/MoPRL/HEAD/src/datasets/.DS_Store -------------------------------------------------------------------------------- /src/train.sh: -------------------------------------------------------------------------------- 1 | python train_test.py --exp_name ShanghaiTech_AlphaPose_train --dataset ShanghaiTech_AlphaPose 2 | python train_test.py --exp_name ShanghaiTech_AlphaPose_test --dataset ShanghaiTech_AlphaPose --inference True --model_path your_path_to_model -------------------------------------------------------------------------------- /src/datasets/dataset_path.py: -------------------------------------------------------------------------------- 1 | # set to your own path 2 | ShanghaiTech_AlphaPose_Dir = '/home/ps/codebase/gepc/data/pose/' 3 | ShanghaiTech_Lable_Dir = '/home/ps/codebase/gepc/data/test/test_frame_mask/' 4 | ShanghaiTech_Frame_Dir = '/home/ps/codebase/gepc/data/testing/frames/' 5 | 6 | Corridor_Pose_Dir = '/home/ps/codebase/Corridor/Corridor_Pose/' 7 | Corridor_Label_Dir = '/home/ps/codebase/Corridor/Corridor_Label/' 8 | 9 | UCF_crime_Dir = '/home/ps/codebase/code/other/moprl/data/ucf_pose/' -------------------------------------------------------------------------------- /src/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | def get_logger(filename, verbosity=1, name=None): 4 | 5 | level_dict = {0: logging.DEBUG, 1: logging.INFO, 2: logging.WARNING} 6 | formatter = logging.Formatter( 7 | "[%(asctime)s][%(filename)s][line:%(lineno)d][%(levelname)s] %(message)s" 8 | ) 9 | logger = logging.getLogger(name) 10 | logger.setLevel(level_dict[verbosity]) 11 | 12 | fh = logging.FileHandler(filename, "w") 13 | fh.setFormatter(formatter) 14 | logger.addHandler(fh) 15 | 16 | sh = logging.StreamHandler() 17 | sh.setFormatter(formatter) 18 | logger.addHandler(sh) 19 | 20 | return logger 21 | -------------------------------------------------------------------------------- /src/utils/load_save.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import zipfile 4 | import numpy as np 5 | import pickle 6 | 7 | def load_pickle(filename): 8 | with open(filename, "rb") as f: 9 | return pickle.load(f) 10 | 11 | 12 | def save_pickle(data, filename): 13 | with open(filename, "wb") as f: 14 | pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL) 15 | 16 | 17 | def load_json(filename): 18 | with open(filename, "r") as f: 19 | return json.load(f) 20 | 21 | 22 | def write_json(data, filename, save_pretty=False, sort_keys=False): 23 | with open(filename, "w") as f: 24 | if save_pretty: 25 | f.write(json.dumps(data, indent=4, sort_keys=sort_keys)) 26 | else: 27 | json.dump(data, f) 28 | 29 | def concat_json_list(filepaths, save_path): 30 | json_lists = [] 31 | for p in filepaths: 32 | json_lists += load_json(p) 33 | write_json(json_lists, save_path) 34 | 35 | 36 | def save_lines(list_of_str, filepath): 37 | with open(filepath, "w") as f: 38 | f.write("\n".join(list_of_str)) 39 | 40 | 41 | def read_lines(filepath): 42 | with open(filepath, "r") as f: 43 | return [e.strip("\n") for e in f.readlines()] 44 | 45 | 46 | def get_rounded_percentage(float_number, n_floats=2): 47 | return round(float_number * 100, n_floats) 48 | 49 | 50 | def save_parameters(path,opt): 51 | '''Write parameters setting file''' 52 | with open(os.path.join(path, 'params.txt'), 'w') as file: 53 | file.write('Training Parameters: \n') 54 | file.write(str(opt) + '\n') -------------------------------------------------------------------------------- /src/utils/normalize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def normalize_anomaly_score(scores): 4 | max_score = max(scores) 5 | min_score = min(scores) 6 | length = max_score - 0 7 | if length==0: 8 | length=1 9 | scores = np.array(scores) 10 | 11 | return scores/length 12 | 13 | def normalize_score(score): 14 | 15 | return score/np.sum(score) 16 | 17 | def normalize_pose(pose_x, pose_y): 18 | 19 | x_max, y_max = np.max(pose_x,axis=1), np.max(pose_y,axis=1) 20 | x_min, y_min = np.min(pose_x,axis=1), np.min(pose_y,axis=1) 21 | x_c, y_c = (x_max+x_min)/2, (y_max+y_min)/2 22 | w, h = x_max-x_min, y_max - y_min 23 | 24 | x, y = [], [] 25 | 26 | for i in range(len(w)): 27 | nor_x = ((pose_x[i] - x_c[i]) / w[i]).tolist() 28 | nor_y = ((pose_y[i] - y_c[i]) / h[i]).tolist() 29 | x.append(nor_x) 30 | y.append(nor_y) 31 | 32 | return x, y 33 | 34 | def center_pose(pose_x, pose_y): 35 | 36 | x_max, y_max = np.max(pose_x,axis=1), np.max(pose_y,axis=1) 37 | x_min, y_min = np.min(pose_x,axis=1), np.min(pose_y,axis=1) 38 | x_c, y_c = (x_max+x_min)/2, (y_max+y_min)/2 39 | w, h = x_max-x_min, y_max - y_min 40 | 41 | w[w<1e-5] = 1 42 | h[h<1e-5] = 1 43 | 44 | x, y = [], [] 45 | 46 | for i in range(len(w)): 47 | nor_x = ((pose_x[i] - x_c[i])).tolist() 48 | nor_y = ((pose_y[i] - y_c[i])).tolist() 49 | x.append(nor_x) 50 | y.append(nor_y) 51 | 52 | return x, y 53 | 54 | def keypoints17_to_coco18(kps): 55 | 56 | kp_np = np.array(kps)#.reshape(-1,17,3) 57 | neck_kp_vec = 0.5 * (kp_np[..., 5, :] + kp_np[..., 6, :]) 58 | kp_np = np.concatenate([kp_np, neck_kp_vec[..., None, :]], axis=-2) 59 | opp_order = [0, 17, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3] 60 | opp_order = np.array(opp_order, dtype=np.int) 61 | kp_coco18 = kp_np[..., opp_order, :] 62 | 63 | return kp_coco18 -------------------------------------------------------------------------------- /src/utils/visualization.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from PIL import Image 6 | import torch 7 | 8 | def visualize_local_tracklets(poses,joints_num=17,scale_factor=100): 9 | 10 | poses = poses.reshape((-1,int(joints_num),2)) 11 | tracklet_img = visualize_tracklet(poses,scale_factor,joints_num) 12 | 13 | return tracklet_img 14 | 15 | def visualize_tracklet(tracklet,scale_factor,num_joints): 16 | 17 | imgs = [] 18 | for pose in tracklet: 19 | img = visulize_single_pose(pose,scale_factor,num_joints) 20 | imgs.append(img) 21 | imgs = np.hstack(imgs) 22 | 23 | return imgs 24 | 25 | def visulize_single_pose(kpts,scale_factor,num_joints): 26 | 27 | if num_joints == 17: 28 | links = [(0, 1), (0, 2), (1, 3), (2, 4), 29 | (5, 7), (7, 9), (6, 8), (8, 10), 30 | (11, 13), (13, 15), (12, 14), (14, 16), 31 | (3, 5), (4, 6), (5, 6), (5, 11), (6, 12), (11, 12)] 32 | 33 | if num_joints == 25: 34 | links = [(17,15),(15,0),(0,16),(16,18),(0,1),(1,8),(1,2),(2,3),(3,4),(1,5),(5,6), 35 | (6,7),(8,9),(9,10),(10,11),(11,22),(22,23),(11,24),(8,12),(12,13),(13,14),(14,21),(14,19),(19,20)] 36 | 37 | kpts = np.array(kpts) 38 | 39 | x = kpts[:,0] 40 | y = kpts[:,1] 41 | 42 | img = np.zeros((100,100,3),np.uint8) 43 | cmap = plt.get_cmap('rainbow') 44 | colors = [cmap(i) for i in np.linspace(0, 1, len(links) + 2)] 45 | colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors] 46 | 47 | for i in range(len(links)): 48 | 49 | order1, order2 = links[i][0], links[i][1] 50 | x1 =int(((np.float32(x[order1])))) + int(scale_factor/2) 51 | y1 =int(((np.float32(y[order1])))) + int(scale_factor/2) 52 | x2 =int(((np.float32(x[order2])))) + int(scale_factor/2) 53 | y2 =int(((np.float32(y[order2])))) + int(scale_factor/2) 54 | cv2.line(img,(x1,y1),(x2,y2),thickness=1,color=colors[i]) 55 | 56 | return img -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 💻 Code for Regularity Learning via Explicit Distribution Modeling for Skeletal Video Anomaly Detection 2 | 3 | * Authors: [Shoubin Yu](https://yui010206.github.io/), [Zhongyin Zhao](https://github.com/ZZhongYin), 4 | [Hao-shu Fang](https://fang-haoshu.github.io/), [Andong Deng](https://dengandong.github.io/), 5 | [Haisheng Su](https://scholar.google.com.hk/citations?user=OFrMZh8AAAAJ&hl=zh-CN), [Dongliang Wang](https://scholar.google.com.hk/citations?hl=zh-CN&user=gurERzcAAAAJ), 6 | [Weihao Gan](https://scholar.google.com.hk/citations?hl=zh-CN&user=QIC0rCYAAAAJ), [Cewu Lu](https://www.mvig.org/), and [Wei Wu](https://scholar.google.com/citations?user=9RBxtd8AAAAJ&hl=en&oi=ao) 7 | 8 | * [Paper](https://ieeexplore.ieee.org/abstract/document/10185076) ([IEEE Transactions on Circuits and Systems for Video Technology 2023](https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=76)) 9 | 10 | teaser image 11 | 12 | teaser image 13 | 14 | ## Data Preparetion 15 | 16 | We extract poses in ShanghaiTech in the same way as [GEPC](https://github.com/amirmk89/gepc). As for Corridor, we adopt the same methods mentioned [here](https://rodrigues-royston.github.io/Multi-timescale_Trajectory_Prediction/). Once poses have been downloaded or extracted, change the data path in ***src/datasets/dataset_path.py*** to train or test models. 17 | 18 | ## Train model 19 | For ShanghaiTech 20 | 21 | ``` python train_test.py --exp_name ShanghaiTech_AlphaPose_train --dataset ShanghaiTech_AlphaPose``` 22 | 23 | For Corridor 24 | 25 | ``` python train_test.py --exp_name Corridor_train --dataset Corridor``` 26 | 27 | ## Test model 28 | For ShanghaiTech 29 | 30 | ``` python train_test.py --exp_name ShanghaiTech_AlphaPose_test --dataset ShanghaiTech_AlphaPose --inference True --model_path your_path_to_model ``` 31 | 32 | For Corridor 33 | 34 | ``` python train_test.py --exp_name Corridor_test --dataset Corridor --inference True --model_path your_path_to_model ``` 35 | -------------------------------------------------------------------------------- /src/opts.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def parse_opts(): 4 | parser = argparse.ArgumentParser() 5 | 6 | # basic config 7 | parser.add_argument('--seed',default=2021,type=int) 8 | parser.add_argument('--workers',default=2,type=int) 9 | parser.add_argument('--exp_name',default='debug',type=str) 10 | parser.add_argument('--inference',default=False,type=bool,help='turn on inference mode') 11 | parser.add_argument('--ano_score',default='max',type=str) 12 | parser.add_argument('--gpu',default=0,type=int) 13 | 14 | # show config 15 | parser.add_argument('--log_interval',default=100,type=int) 16 | parser.add_argument('--vis_interval',default=200,type=int) 17 | parser.add_argument('--eval_interval',default=2000,type=int) 18 | 19 | # test config 20 | parser.add_argument('--model_path',default='',type=str) 21 | 22 | # training config 23 | parser.add_argument('--batch_size',default=256,type=int,help='batch size') 24 | parser.add_argument('--lr_rate',default=5e-5,type=float) 25 | parser.add_argument('--epochs',default=50,type=int) 26 | parser.add_argument('--load_pretrain_model',default=False,type=bool) 27 | parser.add_argument('--iter_to_load',default=5000,type=int,help='load checkpoints') 28 | 29 | # dataset config 30 | parser.add_argument('--dataset',default='UCF_crime',type=str) 31 | parser.add_argument('--tracklet_len',default=8,type=int) 32 | parser.add_argument('--stride',default=1,type=int) 33 | parser.add_argument('--headless',default=False,type=bool) 34 | parser.add_argument('--mask_ratio',default=0.15,type=float) 35 | parser.add_argument('--motion_type',default='rayleigh',type=str) #rayleigh #gaussian #uniform #none 36 | parser.add_argument('--fusion_type',default='div',type=str) #div #add #mul 37 | parser.add_argument('--noise_factor',default=0,type=float) 38 | parser.add_argument('--pre_len',default=0,type=int) 39 | 40 | # model config 41 | parser.add_argument('--embed_dim',default=128,type=int) 42 | parser.add_argument('--spatial_depth',default=2,type=int) 43 | parser.add_argument('--temporal_depth',default=2,type=int) 44 | 45 | args = parser.parse_args() 46 | 47 | return args -------------------------------------------------------------------------------- /src/datasets/datasets.py: -------------------------------------------------------------------------------- 1 | from .dataset_path import * 2 | import os 3 | 4 | def get_training_set(opt): 5 | assert opt.dataset in ['ShanghaiTech_AlphaPose', 'Corridor', 'UCF_crime'] 6 | 7 | if opt.dataset == 'ShanghaiTech_AlphaPose': 8 | 9 | from .ShanghaiTech_AlphaPose import ShanghaiTech_AlphaPose 10 | 11 | train_Dataset = ShanghaiTech_AlphaPose(pose_dir=ShanghaiTech_AlphaPose_Dir, split='train', mask_pro=opt.mask_ratio, 12 | tracklet_len=opt.tracklet_len ,stride=opt.stride,head_less=opt.headless,pre_len=opt.pre_len,embed_dim=opt.embed_dim, fusion_type=opt.fusion_type, motion_type=opt.motion_type, noise_factor = opt.noise_factor) 13 | 14 | elif opt.dataset == 'Corridor': 15 | from .Corridor import Corridor 16 | 17 | train_Dataset = Corridor(pose_dir=Corridor_Pose_Dir, split='train', mask_pro=opt.mask_ratio, 18 | tracklet_len=opt.tracklet_len ,stride=opt.stride,pre_len=opt.pre_len,embed_dim=opt.embed_dim, fusion_type=opt.fusion_type, motion_type=opt.motion_type) 19 | 20 | elif opt.dataset == 'UCF_crime': 21 | 22 | from .UCF_crime import UCF_crime 23 | 24 | train_Dataset = UCF_crime(pose_dir=UCF_crime_Dir, split='train', mask_pro=opt.mask_ratio, 25 | tracklet_len=opt.tracklet_len ,stride=opt.stride,head_less=opt.headless,pre_len=opt.pre_len,embed_dim=opt.embed_dim, fusion_type=opt.fusion_type, motion_type=opt.motion_type) 26 | 27 | 28 | return train_Dataset 29 | 30 | 31 | def get_test_set(opt): 32 | assert opt.dataset in ['ShanghaiTech_AlphaPose', 'Corridor', 'UCF_crime'] 33 | 34 | if opt.dataset == 'ShanghaiTech_AlphaPose': 35 | 36 | from .ShanghaiTech_AlphaPose import ShanghaiTech_AlphaPose 37 | 38 | test_Dataset = ShanghaiTech_AlphaPose(pose_dir=ShanghaiTech_AlphaPose_Dir, split='test', 39 | tracklet_len=opt.tracklet_len ,stride=opt.stride,head_less=opt.headless,pre_len=opt.pre_len,embed_dim=opt.embed_dim, fusion_type=opt.fusion_type, motion_type=opt.motion_type, noise_factor = opt.noise_factor) 40 | 41 | elif opt.dataset == 'Corridor': 42 | from .Corridor import Corridor 43 | 44 | test_Dataset = Corridor(pose_dir=Corridor_Pose_Dir, split='test', 45 | tracklet_len=opt.tracklet_len ,stride=opt.stride,pre_len=opt.pre_len,embed_dim=opt.embed_dim, fusion_type=opt.fusion_type, motion_type=opt.motion_type) 46 | 47 | elif opt.dataset == 'UCF_crime': 48 | 49 | from .UCF_crime import UCF_crime 50 | 51 | test_Dataset = UCF_crime(pose_dir=UCF_crime_Dir, split='test', 52 | tracklet_len=opt.tracklet_len ,stride=opt.stride,head_less=opt.headless,pre_len=opt.pre_len,embed_dim=opt.embed_dim, fusion_type=opt.fusion_type, motion_type=opt.motion_type) 53 | 54 | else: 55 | raise ValueError ("Dataset Name Invalid!") 56 | 57 | 58 | return test_Dataset 59 | -------------------------------------------------------------------------------- /env.yaml: -------------------------------------------------------------------------------- 1 | name: moprl 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1=main 6 | - _pytorch_select=0.2=gpu_0 7 | - backcall=0.2.0=pyhd3eb1b0_0 8 | - blas=1.0=mkl 9 | - ca-certificates=2021.9.30=h06a4308_1 10 | - certifi=2021.5.30=py36h06a4308_0 11 | - cffi=1.14.6=py36h400218f_0 12 | - cudatoolkit=10.0.130=0 13 | - cudnn=7.6.5=cuda10.0_0 14 | - decorator=5.0.9=pyhd3eb1b0_0 15 | - freetype=2.10.4=h5ab3b9f_0 16 | - intel-openmp=2021.3.0=h06a4308_3350 17 | - ipykernel=5.3.4=py36h5ca1d4c_0 18 | - ipython=7.16.1=py36h5ca1d4c_0 19 | - ipython_genutils=0.2.0=pyhd3eb1b0_1 20 | - jedi=0.17.0=py36_0 21 | - jpeg=9d=h7f8727e_0 22 | - jupyter_client=6.1.12=pyhd3eb1b0_0 23 | - jupyter_core=4.7.1=py36h06a4308_0 24 | - lcms2=2.12=h3be6417_0 25 | - ld_impl_linux-64=2.33.1=h53a641e_7 26 | - libffi=3.3=he6710b0_2 27 | - libgcc-ng=9.1.0=hdf63c60_0 28 | - libpng=1.6.37=hbc83047_0 29 | - libsodium=1.0.18=h7b6447c_0 30 | - libstdcxx-ng=9.1.0=hdf63c60_0 31 | - libtiff=4.2.0=h85742a9_0 32 | - libwebp-base=1.2.0=h27cfd23_0 33 | - lz4-c=1.9.3=h295c915_1 34 | - mkl=2020.2=256 35 | - mkl-service=2.3.0=py36he8ac12f_0 36 | - mkl_fft=1.3.0=py36h54f3939_0 37 | - mkl_random=1.1.1=py36h0573a6f_0 38 | - ncurses=6.2=he6710b0_1 39 | - ninja=1.10.2=hff7bd54_1 40 | - numpy-base=1.19.2=py36hfa32c7d_0 41 | - olefile=0.46=py36_0 42 | - openjpeg=2.4.0=h3ad879b_0 43 | - openssl=1.1.1l=h7f8727e_0 44 | - parso=0.8.2=pyhd3eb1b0_0 45 | - pexpect=4.8.0=pyhd3eb1b0_3 46 | - pickleshare=0.7.5=pyhd3eb1b0_1003 47 | - pip=21.1.1=py36h06a4308_0 48 | - prompt-toolkit=3.0.17=pyh06a4308_0 49 | - ptyprocess=0.7.0=pyhd3eb1b0_2 50 | - pycparser=2.20=py_2 51 | - pygments=2.9.0=pyhd3eb1b0_0 52 | - python=3.6.13=hdb3f193_0 53 | - python-dateutil=2.8.1=pyhd3eb1b0_0 54 | - pytorch=1.3.1=cuda100py36h53c1284_0 55 | - pyzmq=20.0.0=py36h2531618_1 56 | - readline=8.1=h27cfd23_0 57 | - setuptools=52.0.0=py36h06a4308_0 58 | - six=1.15.0=py36h06a4308_0 59 | - sqlite=3.35.4=hdfb4753_0 60 | - tk=8.6.10=hbc83047_0 61 | - torchvision=0.4.2=cuda100py36hecfc37a_0 62 | - tornado=6.1=py36h27cfd23_0 63 | - traitlets=4.3.3=py36_0 64 | - wcwidth=0.2.5=py_0 65 | - wheel=0.36.2=pyhd3eb1b0_0 66 | - xz=5.2.5=h7b6447c_0 67 | - zeromq=4.3.4=h2531618_0 68 | - zlib=1.2.11=h7b6447c_3 69 | - zstd=1.4.9=haebb681_0 70 | - pip: 71 | - absl-py==0.12.0 72 | - argon2-cffi==21.1.0 73 | - astor==0.8.1 74 | - astunparse==1.6.3 75 | - async-generator==1.10 76 | - attrs==21.2.0 77 | - bleach==4.1.0 78 | - cached-property==1.5.2 79 | - cachetools==4.2.2 80 | - chardet==4.0.0 81 | - click==8.0.3 82 | - cycler==0.10.0 83 | - dataclasses==0.8 84 | - defusedxml==0.7.1 85 | - easydict==1.9 86 | - einops==0.3.2 87 | - entrypoints==0.3 88 | - filelock==3.3.1 89 | - flatbuffers==1.12 90 | - gast==0.3.3 91 | - google-auth==1.30.0 92 | - google-auth-oauthlib==0.4.4 93 | - google-pasta==0.2.0 94 | - grpcio==1.34.1 95 | - h5py==2.10.0 96 | - huggingface-hub==0.0.19 97 | - idna==2.10 98 | - imageio==2.5.0 99 | - importlib-metadata==4.0.1 100 | - ipyplot==1.1.0 101 | - ipywidgets==7.6.5 102 | - jinja2==3.0.2 103 | - joblib==1.0.1 104 | - jsonschema==3.2.0 105 | - jupyterlab-pygments==0.1.2 106 | - jupyterlab-widgets==1.0.2 107 | - keras-applications==1.0.8 108 | - keras-nightly==2.5.0.dev2021032900 109 | - keras-preprocessing==1.1.2 110 | - kiwisolver==1.3.1 111 | - markdown==3.3.4 112 | - markupsafe==2.0.1 113 | - matplotlib==3.1.2 114 | - mistune==0.8.4 115 | - nbclient==0.5.4 116 | - nbconvert==6.0.7 117 | - nbformat==5.1.3 118 | - nest-asyncio==1.5.1 119 | - notebook==6.4.5 120 | - numpy==1.19.5 121 | - oauthlib==3.1.0 122 | - opencv-python==4.1.2.30 123 | - opt-einsum==3.3.0 124 | - packaging==21.0 125 | - pandas==1.1.5 126 | - pandocfilters==1.5.0 127 | - pillow==8.2.0 128 | - prometheus-client==0.11.0 129 | - protobuf==3.17.0 130 | - pyasn1==0.4.8 131 | - pyasn1-modules==0.2.8 132 | - pyparsing==2.4.7 133 | - pyrsistent==0.18.0 134 | - pytz==2021.1 135 | - pyyaml==5.1.1 136 | - regex==2021.10.8 137 | - requests==2.25.1 138 | - requests-oauthlib==1.3.0 139 | - rsa==4.7.2 140 | - sacremoses==0.0.46 141 | - scikit-learn==0.24.2 142 | - scipy==1.4.1 143 | - seaborn==0.9.0 144 | - send2trash==1.8.0 145 | - shortuuid==1.0.1 146 | - sklearn==0.0 147 | - tensorboard==2.2.2 148 | - tensorboard-data-server==0.6.1 149 | - tensorboard-plugin-wit==1.8.0 150 | - tensorboardx==2.2 151 | - tensorflow==2.2.0 152 | - tensorflow-estimator==2.2.0 153 | - termcolor==1.1.0 154 | - terminado==0.12.1 155 | - testpath==0.5.0 156 | - threadpoolctl==2.1.0 157 | - timm==0.4.12 158 | - tokenizers==0.10.3 159 | - torch==1.9.1 160 | - torch-tb-profiler==0.1.0 161 | - tqdm==4.41.0 162 | - transformers==4.11.3 163 | - typing-extensions==3.7.4.3 164 | - urllib3==1.26.4 165 | - webencodings==0.5.1 166 | - werkzeug==2.0.1 167 | - widgetsnbextension==3.5.1 168 | - wrapt==1.12.1 169 | - zipp==3.4.1 170 | 171 | -------------------------------------------------------------------------------- /src/utils/metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import os 4 | 5 | from sklearn.metrics import roc_curve 6 | from sklearn.metrics import auc 7 | from scipy.ndimage import gaussian_filter1d 8 | from sklearn.metrics import roc_auc_score 9 | from utils.normalize import normalize_anomaly_score 10 | from datasets.dataset_path import * 11 | from utils.load_save import load_json 12 | 13 | def recover_poses(local_pose,global_box,dataset): 14 | # local_pose: (B,17*T,2) 15 | # global_box: (B,2*T,2) 16 | t = int(global_box.shape[1]/2) 17 | scale_factor = 100 18 | if dataset=='ShanghaiTech': 19 | frame_width = 856 20 | frame_height = 480 21 | 22 | local_pose_split = torch.chunk(local_pose,t,dim=1) 23 | global_box_split = torch.chunk(global_box,t,dim=1) 24 | 25 | assert len(local_pose_split) == len(global_box_split) 26 | 27 | recovered_poses = [] 28 | 29 | for i in range(len(local_pose_split)): 30 | pose = local_pose_split[i] #(B,17,2) 31 | #print(pose.shape) 32 | b_min,b_max = global_box_split[i][:,0,:],global_box_split[i][:,1,:] #(B,2) 33 | 34 | x_min ,y_min = b_min[:,0]/scale_factor*frame_width, b_min[:,1]/scale_factor*frame_height 35 | x_max ,y_max = b_max[:,0]/scale_factor*frame_width, b_max[:,1]/scale_factor*frame_height 36 | 37 | w, h = x_max-x_min, y_max-y_min 38 | x_c , y_c = (x_max+x_min)/2, (y_max+y_min)/2 39 | 40 | x = ((pose[:,:,0]/scale_factor)*w[0] + x_c[0]).unsqueeze(-1) 41 | y = ((pose[:,:,1]/scale_factor)*h[0] + y_c[0]).unsqueeze(-1) 42 | recovered_pose = torch.cat([x,y],dim=-1) 43 | #print(recovered_pose.shape) 44 | recovered_poses.append(recovered_pose) 45 | 46 | recovered_poses = torch.cat(recovered_poses,dim=1) 47 | 48 | return recovered_poses 49 | 50 | def L1_err(gt,pred,weight=None): 51 | 52 | err = torch.norm((gt - pred), p=1, dim=-1) 53 | 54 | if weight is not None: 55 | err = err*weight 56 | 57 | err = err.mean(dim=-1) 58 | 59 | return err.tolist() 60 | 61 | def L2_err(gt,pred,weight=None): 62 | 63 | err = torch.norm(gt - pred, p=2, dim=-1) 64 | 65 | if weight is not None: 66 | err = err*weight 67 | 68 | err = err.mean(dim=-1) 69 | 70 | return err.tolist() 71 | 72 | def compute_auc(rec_errs,pred_errs,meta,duration,k,dataset,cal_type='sum'): 73 | 74 | if dataset == 'UCF_crime': 75 | fps_dic = load_json(os.path.join(UCF_crime_Dir, 'fps_stats.json')) 76 | frames_dic = load_json(os.path.join(UCF_crime_Dir, 'frame_stats.json')) 77 | label_dic = {} 78 | with open(os.path.join(UCF_crime_Dir, 'Temporal_Anomaly_Annotation_for_Testing_Videos.txt'), 'r') as fin: 79 | for line in fin.readlines(): 80 | spl = line.strip().split(' ') 81 | label_dic[spl[0]] = [spl[2], spl[3], spl[4], spl[5]] 82 | 83 | compute_dict = {} 84 | 85 | for rec_err, pred_err, name in zip(rec_errs,pred_errs,meta): 86 | 87 | scene ,frame = name.split('.mp4_') 88 | scene = scene + '.mp4' 89 | 90 | if cal_type=='sum': 91 | err = (1-k/10)*rec_err+(k/10)*pred_err 92 | elif cal_type=='max': 93 | err = max(rec_err,pred_err) 94 | 95 | if scene not in compute_dict: 96 | compute_dict[scene] = {} 97 | if int(frame) not in compute_dict[scene]: 98 | compute_dict[scene][int(frame)] = [err] 99 | else: 100 | compute_dict[scene][int(frame)].append(err) 101 | 102 | max_err_dict = {} 103 | all_label = [] 104 | all_score = [] 105 | all_nor_score = [] 106 | 107 | for scene in compute_dict: 108 | max_err_dict[scene] = [] 109 | frames = compute_dict[scene].keys() 110 | sorted_frames = list(sorted(frames)) 111 | 112 | label = np.zeros(int(frames_dic[scene]*5/fps_dic[scene])) 113 | 114 | if int(label_dic[scene][0]) != -1 and int(label_dic[scene][1]) != -1: 115 | s1 = int(float(label_dic[scene][0])*5/float(fps_dic[scene])) 116 | f1 = int(float(label_dic[scene][1])*5/float(fps_dic[scene])) 117 | label[s1: f1] = 1 118 | 119 | if int(label_dic[scene][2]) != -1 and int(label_dic[scene][3]) != -1: 120 | s2 = int(float(label_dic[scene][2])*5/float(fps_dic[scene])) 121 | f2 = int(float(label_dic[scene][3])*5/float(fps_dic[scene])) 122 | label[s2: f2] = 1 123 | 124 | label = label.tolist() 125 | 126 | num_frame = len(label) 127 | anchor = 0 128 | for i in range(num_frame): 129 | if i > sorted_frames[-1]: 130 | max_err_dict[scene].append(0) 131 | elif int(sorted_frames[anchor]) == i: 132 | max_rec = max(compute_dict[scene][sorted_frames[anchor]]) 133 | max_err_dict[scene].append(max_rec) 134 | anchor += 1 135 | else: 136 | max_err_dict[scene].append(0) 137 | 138 | ano_score = max_err_dict[scene] 139 | all_label.extend(label[duration:]) 140 | all_score.extend(ano_score[duration:]) 141 | all_nor_score.extend(normalize_anomaly_score(ano_score)[duration:]) 142 | 143 | else: 144 | compute_dict = {} 145 | 146 | for rec_err, pred_err, name in zip(rec_errs,pred_errs,meta): 147 | # main scene/ sub scene 148 | if dataset.split('_')[0] == 'ShanghaiTech' or dataset == 'Avenue': 149 | main, sub ,frame = name.split('_') 150 | scene = main + '_' + sub 151 | else: 152 | scene ,frame = name.split('_') 153 | 154 | if cal_type=='sum': 155 | err = (1-k/10)*rec_err+(k/10)*pred_err 156 | elif cal_type=='max': 157 | err = max(rec_err,pred_err) 158 | 159 | if scene not in compute_dict: 160 | compute_dict[scene] = {} 161 | if int(frame) not in compute_dict[scene]: 162 | compute_dict[scene][int(frame)] = [err] 163 | else: 164 | compute_dict[scene][int(frame)].append(err) 165 | 166 | max_err_dict = {} 167 | all_label = [] 168 | all_score = [] 169 | all_nor_score = [] 170 | 171 | for scene in compute_dict: 172 | max_err_dict[scene] = [] 173 | frames = compute_dict[scene].keys() 174 | sorted_frames = list(sorted(frames)) 175 | if dataset.split('_')[0] == 'ShanghaiTech': 176 | Label_Dir = ShanghaiTech_Lable_Dir + scene 177 | 178 | elif dataset == 'Corridor': 179 | 180 | Label_Dir = Corridor_Label_Dir + scene + '/' +scene 181 | 182 | label = np.load(Label_Dir+'.npy').tolist() 183 | num_frame = len(label) 184 | anchor = 0 185 | for i in range(num_frame): 186 | if i > sorted_frames[-1]: 187 | max_err_dict[scene].append(0) 188 | elif int(sorted_frames[anchor]) == i: 189 | max_rec = max(compute_dict[scene][sorted_frames[anchor]]) 190 | max_err_dict[scene].append(max_rec) 191 | anchor += 1 192 | else: 193 | max_err_dict[scene].append(0) 194 | 195 | ano_score = max_err_dict[scene] 196 | all_label.extend(label[duration:]) 197 | all_score.extend(ano_score[duration:]) 198 | all_nor_score.extend(normalize_anomaly_score(ano_score)[duration:]) 199 | 200 | all_score = gaussian_filter1d(all_score, 20) 201 | all_nor_score = gaussian_filter1d(all_nor_score, 20) 202 | AUC = roc_auc_score(all_label, all_score) 203 | AUC_norm = roc_auc_score(all_label, all_nor_score) 204 | 205 | return AUC,AUC_norm -------------------------------------------------------------------------------- /src/models/moprl.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import logging 4 | import torchvision.models 5 | 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from functools import partial 10 | from timm.models.layers import DropPath 11 | from einops import rearrange, repeat 12 | 13 | 14 | class Mlp(nn.Module): 15 | def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): 16 | super().__init__() 17 | out_features = out_features or in_features 18 | hidden_features = hidden_features or in_features 19 | self.fc1 = nn.Linear(in_features, hidden_features) 20 | self.act = act_layer() 21 | self.fc2 = nn.Linear(hidden_features, out_features) 22 | self.drop = nn.Dropout(drop) 23 | 24 | def forward(self, x): 25 | x = self.fc1(x) 26 | x = self.act(x) 27 | x = self.drop(x) 28 | x = self.fc2(x) 29 | x = self.drop(x) 30 | return x 31 | 32 | class Attention(nn.Module): 33 | def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): 34 | super().__init__() 35 | self.num_heads = num_heads 36 | head_dim = dim // num_heads 37 | # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights 38 | self.scale = qk_scale or head_dim ** -0.5 39 | 40 | self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) 41 | self.attn_drop = nn.Dropout(attn_drop) 42 | self.proj = nn.Linear(dim, dim) 43 | self.proj_drop = nn.Dropout(proj_drop) 44 | 45 | def forward(self, x, mask=None): 46 | B, N, C = x.shape 47 | qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) 48 | q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) 49 | 50 | attn = (q @ k.transpose(-2, -1)) * self.scale 51 | 52 | #print('mask',mask.shape) 53 | #print('attn',attn.shape) 54 | 55 | if mask is not None: 56 | attn = attn.masked_fill(mask == 0, -1e9) 57 | 58 | attn = attn.softmax(dim=-1) 59 | attn = self.attn_drop(attn) 60 | 61 | x = (attn @ v).transpose(1, 2).reshape(B, N, C) 62 | x = self.proj(x) 63 | x = self.proj_drop(x) 64 | return x 65 | 66 | 67 | class Block(nn.Module): 68 | 69 | def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., 70 | drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm): 71 | super().__init__() 72 | self.norm1 = norm_layer(dim) 73 | self.attn = Attention( 74 | dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop) 75 | # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here 76 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 77 | self.norm2 = norm_layer(dim) 78 | mlp_hidden_dim = int(dim * mlp_ratio) 79 | self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) 80 | 81 | def forward(self, x,mask=None): 82 | x = x + self.drop_path(self.attn(self.norm1(x),mask)) 83 | x = x + self.drop_path(self.mlp(self.norm2(x))) 84 | 85 | return x 86 | 87 | class MoPRL(nn.Module): 88 | def __init__(self, tracklet_len=8, pre_len=1,headless=False, in_chans=2,embed_dim=128, spatial_depth=4, temporal_depth=4, 89 | num_heads=4, mlp_ratio=2., qkv_bias=True, qk_scale=None, 90 | drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1, norm_layer=None, num_joints=17, 91 | ): 92 | 93 | """ ##########hybrid_backbone=None, representation_size=None, 94 | Args: 95 | num_frame (int, tuple): input frame number 96 | headless (bool): use head joints or not 97 | num_joints (int, tuple): joints number 98 | in_chans (int): number of input channels, 2D joints have 2 channels: (x,y) 99 | embed_dim_ratio (int): embedding dimension ratio 100 | depth (int): depth of transformer 101 | num_heads (int): number of attention heads 102 | mlp_ratio (int): ratio of mlp hidden dim to embedding dim 103 | qkv_bias (bool): enable bias for qkv if True 104 | qk_scale (float): override default qk scale of head_dim ** -0.5 if set 105 | drop_rate (float): dropout rate 106 | attn_drop_rate (float): attention dropout rate 107 | drop_path_rate (float): stochastic depth rate 108 | norm_layer: (nn.Module): normalization layer 109 | """ 110 | super().__init__() 111 | 112 | # if headless: 113 | # self.num_joints = 14 114 | # else: 115 | # self.num_joints = 17 116 | 117 | self.num_joints = num_joints 118 | self.tracklet_len = tracklet_len 119 | self.pre_len = pre_len 120 | 121 | norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6) 122 | self.embed_dim = embed_dim 123 | out_dim = 2 124 | 125 | ### spatial patch embedding 126 | self.pose_embedding = nn.Linear(in_chans, embed_dim) 127 | self.pos_drop = nn.Dropout(p=drop_rate) 128 | 129 | 130 | dpr_s = [x.item() for x in torch.linspace(0, drop_path_rate, spatial_depth)] # stochastic depth decay rule 131 | self.spatial_blocks = nn.ModuleList([ 132 | Block( 133 | dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, 134 | drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr_s[i], norm_layer=norm_layer) 135 | for i in range(spatial_depth)]) 136 | self.spatial_norm = norm_layer(embed_dim) 137 | 138 | self.spatial_position_embedding = nn.Embedding(self.num_joints+1,embed_dim) 139 | 140 | dpr_t = [x.item() for x in torch.linspace(0, drop_path_rate, temporal_depth)] # stochastic depth decay rule 141 | self.temporal_blocks = nn.ModuleList([ 142 | Block( 143 | dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, 144 | drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr_t[i], norm_layer=norm_layer) 145 | for i in range(temporal_depth)]) 146 | self.temporal_norm = norm_layer(embed_dim) 147 | 148 | self.temporal_postion_embedding = nn.Embedding(tracklet_len+1,embed_dim) 149 | 150 | self.head = nn.Sequential( 151 | nn.LayerNorm(embed_dim), 152 | nn.Linear(embed_dim , out_dim), 153 | ) 154 | 155 | def Spatial_Attention(self, x, spatial_tokens): 156 | 157 | b,f,_,_ = x.shape 158 | x = rearrange(x, 'b f w c -> (b f) w c',) 159 | 160 | spatial_embedding = self.spatial_position_embedding(spatial_tokens) 161 | _,_,n,d= spatial_embedding.shape 162 | spatial_embedding = spatial_embedding.view(-1,n,d) 163 | x += spatial_embedding 164 | 165 | x = self.pos_drop(x) 166 | 167 | for blk in self.spatial_blocks: 168 | x = blk(x) 169 | 170 | x = self.spatial_norm(x) 171 | x = rearrange(x, '(b f) w c -> b f w c', f=f) 172 | 173 | return x 174 | 175 | def Temporal_Attention(self, x, temporal_tokens): 176 | # x: b, T, N, embed_dim 177 | temporal_tembedding = self.temporal_postion_embedding(temporal_tokens) 178 | x += temporal_tembedding 179 | 180 | features = self.pos_drop(x) 181 | 182 | _, t, n, _ = features.shape 183 | features = rearrange(features, 'b t n c -> (b n) t c', t=t) 184 | 185 | for blk in self.temporal_blocks: 186 | features = blk(features) 187 | 188 | features = self.temporal_norm(features) 189 | 190 | features = rearrange(features, '(b n) t c -> b t n c', n=n) 191 | 192 | return features 193 | 194 | 195 | def forward(self, pose, spatial_tokens, temporal_tokens): 196 | # pose: (B, T, 17, 2) 197 | # box: (B, T, 2, 2) 198 | # spatial_embedding: (B, T, 17) 199 | pose = pose.permute(0, 3, 1, 2) 200 | b, _, f, p = pose.shape ##### b is batch size, f is number of frames, p is number of joints 201 | pose = rearrange(pose, 'b c f p -> (b f) p c', ) 202 | pose = self.pose_embedding(pose) 203 | pose = rearrange(pose, '(b f) p c -> b f p c', b=b) 204 | pose = self.Spatial_Attention(pose,spatial_tokens) 205 | pose = self.Temporal_Attention(pose,temporal_tokens) 206 | rec_pose = self.head(pose).reshape(b,-1,2) 207 | 208 | return rec_pose 209 | 210 | if __name__ == '__main__': 211 | model = MoPRL(tracklet_len=8, pre_len=1,headless=False, in_chans=2,embed_dim=64, spatial_depth=4, temporal_depth=4, 212 | num_heads=4, mlp_ratio=2., qkv_bias=True, qk_scale=None, 213 | drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1, norm_layer=None, num_joints=25) 214 | 215 | pose = torch.rand([4,8,25,2]) 216 | box = torch.rand([4,8,2,2]) 217 | spatial_tokens = torch.randint(0,3,(4,8,25)) 218 | temporal_tokens = torch.randint(0,3,(4,8,25)) 219 | 220 | output = model(pose,spatial_tokens,temporal_tokens) 221 | 222 | print('output',output.shape) 223 | 224 | 225 | 226 | -------------------------------------------------------------------------------- /src/utils/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import math 5 | 6 | class OKS_Loss(nn.Module): 7 | def __init__(self): 8 | super(OKS_Loss, self).__init__() 9 | 10 | def forward(self, predicted_pose, target_pose, weight=None): 11 | # predicted: B,N,2 12 | # mask: B, N 13 | # weitgt: B, N 14 | 15 | assert predicted_pose.shape == target_pose.shape 16 | 17 | norm_pose = torch.norm((predicted_pose - target_pose), p=2, dim=-1) 18 | if weight is not None: 19 | norm_pose = norm_pose.clone() * weight 20 | 21 | loss = norm_pose.mean() 22 | return loss 23 | 24 | class IOU_Loss(nn.Module): 25 | def __init__(self): 26 | super(IOU_Loss, self).__init__() 27 | 28 | def forward(self, predict_, target_, eps=1e-7): 29 | """`Implementation of Distance-IoU Loss: Faster and Better 30 | Learning for Bounding Box Regression, https://arxiv.org/abs/1911.08287`_. 31 | 32 | Code is modified from https://github.com/Zzh-tju/DIoU. 33 | 34 | Args: 35 | pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), 36 | shape (n, 4). 37 | target (Tensor): Corresponding gt bboxes, shape (n, 4). 38 | eps (float): Eps to avoid log(0). 39 | Return: 40 | Tensor: Loss tensor. 41 | """ 42 | # overlap 43 | assert predict_.shape == target_.shape 44 | 45 | pre_xy_min = predict_[:,::2,:] 46 | pre_xy_max = predict_[:,1::2,:] 47 | 48 | gt_xy_min = target_[:,::2,:] 49 | gt_xy_max = target_[:,1::2,:] 50 | 51 | pred = torch.cat([pre_xy_min,pre_xy_max],dim=-1).reshape(-1,4) 52 | target = torch.cat([gt_xy_min,gt_xy_max],dim=-1).reshape(-1,4) 53 | 54 | lt = torch.max(pred[:, :2], target[:, :2]) 55 | rb = torch.min(pred[:, 2:], target[:, 2:]) 56 | wh = (rb - lt).clamp(min=0) 57 | overlap = wh[:, 0] * wh[:, 1] 58 | 59 | # union 60 | ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1]) 61 | ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1]) 62 | union = ap + ag - overlap + eps 63 | 64 | # IoU 65 | ious = overlap / union 66 | 67 | loss = 1 - ious 68 | 69 | loss = torch.where(torch.isnan(loss), torch.full_like(loss, 1.), loss) 70 | loss = torch.where(torch.isinf(loss), torch.full_like(loss, 1.), loss) 71 | loss = loss.mean() 72 | 73 | return loss 74 | 75 | 76 | class GIOU_Loss(nn.Module): 77 | def __init__(self): 78 | super(GIOU_Loss, self).__init__() 79 | 80 | def forward(self, predict_, target_, eps=1e-7): 81 | """`Implementation of Distance-IoU Loss: Faster and Better 82 | Learning for Bounding Box Regression, https://arxiv.org/abs/1911.08287`_. 83 | 84 | Code is modified from https://github.com/Zzh-tju/DIoU. 85 | 86 | Args: 87 | pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), 88 | shape (n, 4). 89 | target (Tensor): Corresponding gt bboxes, shape (n, 4). 90 | eps (float): Eps to avoid log(0). 91 | Return: 92 | Tensor: Loss tensor. 93 | """ 94 | # overlap 95 | assert predict_.shape == target_.shape 96 | 97 | pre_xy_min = predict_[:,::2,:] 98 | pre_xy_max = predict_[:,1::2,:] 99 | 100 | gt_xy_min = target_[:,::2,:] 101 | gt_xy_max = target_[:,1::2,:] 102 | 103 | pred = torch.cat([pre_xy_min,pre_xy_max],dim=-1).reshape(-1,4) 104 | target = torch.cat([gt_xy_min,gt_xy_max],dim=-1).reshape(-1,4) 105 | 106 | lt = torch.max(pred[:, :2], target[:, :2]) 107 | rb = torch.min(pred[:, 2:], target[:, 2:]) 108 | wh = (rb - lt).clamp(min=0) 109 | overlap = wh[:, 0] * wh[:, 1] 110 | 111 | # union 112 | ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1]) 113 | ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1]) 114 | union = ap + ag - overlap + eps 115 | 116 | # IoU 117 | ious = overlap / union 118 | 119 | loss = 1 - ious 120 | 121 | loss = torch.where(torch.isnan(loss), torch.full_like(loss, 1.), loss) 122 | loss = torch.where(torch.isinf(loss), torch.full_like(loss, 1.), loss) 123 | loss = loss.mean() 124 | 125 | return loss 126 | 127 | class DIOU_Loss(nn.Module): 128 | def __init__(self): 129 | super(DIOU_Loss, self).__init__() 130 | 131 | def forward(self, predict_, target_, eps=1e-7): 132 | """`Implementation of Distance-IoU Loss: Faster and Better 133 | Learning for Bounding Box Regression, https://arxiv.org/abs/1911.08287`_. 134 | 135 | Code is modified from https://github.com/Zzh-tju/DIoU. 136 | 137 | Args: 138 | pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), 139 | shape (n, 4). 140 | target (Tensor): Corresponding gt bboxes, shape (n, 4). 141 | eps (float): Eps to avoid log(0). 142 | Return: 143 | Tensor: Loss tensor. 144 | """ 145 | # overlap 146 | assert predict_.shape == target_.shape 147 | 148 | pre_xy_min = predict_[:,::2,:] 149 | pre_xy_max = predict_[:,1::2,:] 150 | 151 | gt_xy_min = target_[:,::2,:] 152 | gt_xy_max = target_[:,1::2,:] 153 | 154 | pred = torch.cat([pre_xy_min,pre_xy_max],dim=-1).reshape(-1,4) 155 | target = torch.cat([gt_xy_min,gt_xy_max],dim=-1).reshape(-1,4) 156 | 157 | lt = torch.max(pred[:, :2], target[:, :2]) 158 | rb = torch.min(pred[:, 2:], target[:, 2:]) 159 | wh = (rb - lt).clamp(min=0) 160 | overlap = wh[:, 0] * wh[:, 1] 161 | 162 | # union 163 | ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1]) 164 | ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1]) 165 | union = ap + ag - overlap + eps 166 | 167 | # IoU 168 | ious = overlap / union 169 | 170 | # enclose area 171 | enclose_x1y1 = torch.min(pred[:, :2], target[:, :2]) 172 | enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:]) 173 | enclose_wh = (enclose_x2y2 - enclose_x1y1).clamp(min=0) 174 | 175 | cw = enclose_wh[:, 0] 176 | ch = enclose_wh[:, 1] 177 | 178 | # 最小包闭区域的对角线距离 179 | c2 = cw**2 + ch**2 + eps 180 | 181 | b1_x1, b1_y1 = pred[:, 0], pred[:, 1] 182 | b1_x2, b1_y2 = pred[:, 2], pred[:, 3] 183 | b2_x1, b2_y1 = target[:, 0], target[:, 1] 184 | b2_x2, b2_y2 = target[:, 2], target[:, 3] 185 | 186 | # 中心点距离 187 | left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4 #== ((b2_x1 + b2_x2)/2 - (b1_x1 + b1_x2)/2)**2 188 | right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4 189 | rho2 = left + right 190 | 191 | # DIoU 192 | dious = ious - rho2 / c2 193 | loss = 1 - dious 194 | 195 | loss = torch.where(torch.isnan(loss), torch.full_like(loss, 1.), loss) 196 | loss = torch.where(torch.isinf(loss), torch.full_like(loss, 1.), loss) 197 | loss = loss.mean() 198 | 199 | return loss 200 | 201 | class CIOU_Loss(nn.Module): 202 | def __init__(self): 203 | super(CIOU_Loss, self).__init__() 204 | 205 | def forward(self, predict, target, eps=1e-7): 206 | predict_ = predict.clone() 207 | target_ = target.clone() 208 | 209 | assert predict_.shape == target_.shape 210 | 211 | pre_xy_min = predict_[:,::2,:] 212 | pre_xy_max = predict_[:,1::2,:] 213 | 214 | gt_xy_min = target_[:,::2,:] 215 | gt_xy_max = target_[:,1::2,:] 216 | 217 | pred = torch.cat([pre_xy_min,pre_xy_max],dim=-1).reshape(-1,4) 218 | target = torch.cat([gt_xy_min,gt_xy_max],dim=-1).reshape(-1,4) 219 | 220 | lt = torch.max(pred[:, :2], target[:, :2]) 221 | rb = torch.min(pred[:, 2:], target[:, 2:]) 222 | wh = (rb - lt).clamp(min=0) 223 | overlap = wh[:, 0] * wh[:, 1] 224 | 225 | # union 226 | ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1]) 227 | ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1]) 228 | union = ap + ag - overlap + eps 229 | 230 | # IoU 231 | ious = overlap / union 232 | 233 | # enclose area 234 | enclose_x1y1 = torch.min(pred[:, :2], target[:, :2]) 235 | enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:]) 236 | enclose_wh = (enclose_x2y2 - enclose_x1y1).clamp(min=0) 237 | 238 | cw = enclose_wh[:, 0] 239 | ch = enclose_wh[:, 1] 240 | 241 | c2 = cw**2 + ch**2 + eps 242 | 243 | b1_x1, b1_y1 = pred[:, 0], pred[:, 1] 244 | b1_x2, b1_y2 = pred[:, 2], pred[:, 3] 245 | b2_x1, b2_y1 = target[:, 0], target[:, 1] 246 | b2_x2, b2_y2 = target[:, 2], target[:, 3] 247 | 248 | w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps 249 | w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps 250 | 251 | left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4 252 | right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4 253 | rho2 = left + right 254 | # 对应公式 255 | factor = 4 / math.pi**2 256 | v = factor * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) 257 | 258 | # CIoU 259 | cious = ious - (rho2 / c2 + v**2 / (1 - ious + v)) 260 | loss = 1 - cious 261 | 262 | loss = torch.where(torch.isnan(loss), torch.full_like(loss, 1.), loss) 263 | loss = torch.where(torch.isinf(loss), torch.full_like(loss, 1.), loss) 264 | loss = loss.mean() 265 | 266 | return loss 267 | 268 | if __name__ == '__main__': 269 | loss = DIOU_Loss() 270 | 271 | predicted = torch.randint(1,10,(2,16,2)) 272 | target = torch.randint(1,10,(2,16,2)) 273 | 274 | l = loss(predicted,target) 275 | 276 | print(l) -------------------------------------------------------------------------------- /src/train_test.py: -------------------------------------------------------------------------------- 1 | import os, time, sys, cv2 2 | import torch 3 | import random 4 | 5 | import numpy as np 6 | import torch.nn as nn 7 | 8 | from torch.utils.data import DataLoader 9 | from tqdm import tqdm 10 | from transformers import AdamW,get_linear_schedule_with_warmup 11 | 12 | from utils.logger import get_logger 13 | from utils.load_save import save_parameters,write_json 14 | from utils.losses import OKS_Loss 15 | from utils.visualization import visualize_local_tracklets 16 | from utils.metrics import recover_poses,L1_err,L2_err,compute_auc 17 | from opts import parse_opts 18 | from datasets.datasets import get_training_set, get_test_set 19 | from models.moprl import MoPRL 20 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 21 | 22 | class Train_Eval_Inference(object): 23 | 24 | def __init__(self, opt): 25 | 26 | self.opt = opt 27 | self.dataset_name = opt.dataset 28 | self.exp_name = opt.exp_name 29 | 30 | self.workspace = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../exps/') 31 | self.jobname = opt.dataset 32 | self.exp_dir = os.path.join(self.workspace, self.exp_name) 33 | self.model_save_dir = os.path.join(self.exp_dir, 'models') 34 | self.vis_sample_dir = os.path.join(self.exp_dir, 'vis_samples') 35 | self.test_result_dir = os.path.join(self.exp_dir, 'result') 36 | 37 | self.train_tasks = 'rec' 38 | self.test_tasks = 'rec' 39 | self.scale_factor = 100 40 | 41 | if not os.path.exists(self.model_save_dir): 42 | os.makedirs(self.model_save_dir) 43 | if not os.path.exists(self.vis_sample_dir): 44 | os.makedirs(self.vis_sample_dir) 45 | if not os.path.exists(self.test_result_dir): 46 | os.makedirs(self.test_result_dir) 47 | 48 | # whether to start training from an existing snapshot 49 | self.load_pretrain_model = opt.load_pretrain_model 50 | if self.load_pretrain_model: 51 | self.iter_to_load = opt.iter_to_load 52 | 53 | save_parameters(self.exp_dir,opt) 54 | 55 | train_Dataset = get_training_set(opt) 56 | self.train_loader = DataLoader(train_Dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.workers, 57 | pin_memory=True, drop_last=True) 58 | test_Dataset = get_test_set(opt) 59 | self.test_loader = DataLoader(test_Dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.workers, 60 | pin_memory=True, drop_last=False) 61 | 62 | self.logger = get_logger(self.exp_dir + '/log.txt') 63 | 64 | self.oks_loss = OKS_Loss() 65 | 66 | if self.opt.dataset == 'ShanghaiTech_AlphaPose' or self.opt.dataset == 'UCF_crime': 67 | self.num_joints = 17 68 | else: 69 | self.num_joints = 25 70 | 71 | def train_batch(self,model,optimizer,epoch,iteration,scheduler=None): 72 | 73 | for input_dict in iter(self.train_loader): 74 | 75 | pose = input_dict['pose'].float().cuda() 76 | weigths = input_dict['weigths'].float().cuda() 77 | gt = input_dict['gt'].float().cuda() 78 | spatial_token = input_dict['spatial_token'].long().cuda() 79 | temporal_token = input_dict['temporal_token'].long().cuda() 80 | 81 | model.zero_grad() 82 | rec_pose = model(pose,spatial_token,temporal_token) 83 | 84 | loss = self.oks_loss(rec_pose,gt,weigths) 85 | loss.backward() 86 | torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) 87 | optimizer.step() 88 | 89 | if scheduler is not None: 90 | scheduler.step() 91 | 92 | iteration += 1 93 | 94 | if iteration % self.opt.log_interval == 0: 95 | 96 | self.logger.info("iter {} (epoch {}), loss = {:.6f}".format(iteration, epoch, loss.item())) 97 | 98 | if iteration % self.opt.vis_interval == 0: 99 | 100 | pred_pose = rec_pose[0].cpu().detach() 101 | gt_pose = gt[0].cpu().detach() 102 | pred_local_img = visualize_local_tracklets(pred_pose, self.num_joints) 103 | gt_local_img = visualize_local_tracklets(gt_pose, self.num_joints) 104 | local_imgs = np.vstack([gt_local_img,pred_local_img]) 105 | cv2.imwrite(self.vis_sample_dir+'/{}_normal.jpg'.format(str(iteration)),local_imgs) 106 | 107 | if iteration % self.opt.eval_interval == 0 or iteration == 1: 108 | 109 | self.logger.info('Start evaluation!') 110 | model.eval() 111 | l1, l2, all_meta, vis_pose, vis_meta, vis_gt = self.eval_batch(model) 112 | write_json(l1,self.test_result_dir+'/iteration{}_L1.json'.format(str(iteration))) 113 | write_json(l2,self.test_result_dir+'/iteration{}_L2.json'.format(str(iteration))) 114 | write_json(all_meta,self.test_result_dir+'/iteration{}_meta.json'.format(str(iteration))) 115 | 116 | model.train() 117 | torch.save(model.state_dict(), self.model_save_dir+'/{:06d}_model.pth.tar'.format(iteration)) 118 | 119 | return iteration 120 | 121 | def eval_batch(self,model): 122 | # Set to evaluation mode (randomly sample z from the whole distribution) 123 | all_err_l1 = [] 124 | all_err_l2 = [] 125 | all_meta = [] 126 | vis_pose = [] 127 | vis_meta = [] 128 | vis_gt = [] 129 | 130 | with torch.no_grad(): 131 | for i,input_dict in enumerate(tqdm(self.test_loader)): 132 | #input = input_dict['input_sequence'].float().cuda() 133 | weigths = input_dict['weigths'].float().cuda() 134 | pose = input_dict['pose'].float().cuda() 135 | gt = input_dict['gt'].float() 136 | spatial_token = input_dict['spatial_token'].long().cuda() 137 | temporal_token = input_dict['temporal_token'].long().cuda() 138 | meta = input_dict['meta'] 139 | output = model(pose,spatial_token,temporal_token) 140 | err_l1 = L1_err(output.cpu(),gt) 141 | err_l2 = L2_err(output.cpu(),gt) 142 | 143 | all_err_l1.extend(err_l1) 144 | all_err_l2.extend(err_l2) 145 | all_meta.extend(meta) 146 | 147 | L1_auc, L1_norm_auc = compute_auc(all_err_l1,all_err_l1,all_meta,0, 0,self.dataset_name) 148 | L2_auc, L2_norm_auc = compute_auc(all_err_l2,all_err_l2,all_meta,0, 0,self.dataset_name) 149 | self.logger.info('Best AUC under L1 Err: {}'.format(str(round(L1_auc,4)*100))) 150 | self.logger.info('Best AUC under L2 Err: {}'.format(str(round(L2_auc,4)*100))) 151 | self.logger.info('Best AUC under Norm L1 Err: {}'.format(str(round(L1_norm_auc,4)*100))) 152 | self.logger.info('Best AUC under Norm L2 Err: {}'.format(str(round(L2_norm_auc,4)*100))) 153 | 154 | vis_pose_np = np.array(vis_pose) 155 | vis_gt_np = np.array(vis_gt) 156 | 157 | return all_err_l1,all_err_l2,all_meta, vis_pose_np, vis_meta, vis_gt_np 158 | 159 | 160 | def inference(self): 161 | 162 | model = MoPRL(tracklet_len=self.opt.tracklet_len,headless=self.opt.headless,pre_len=opt.pre_len,embed_dim=self.opt.embed_dim, 163 | spatial_depth=self.opt.spatial_depth, temporal_depth=self.opt.temporal_depth, num_joints=self.num_joints).cuda() 164 | model.load_state_dict(torch.load(self.opt.model_path)) 165 | 166 | # ToDo 167 | self.logger.info('Start evaluation!') 168 | model.eval() 169 | 170 | all_err_l1 = [] 171 | all_err_l2 = [] 172 | all_err_score = [] 173 | all_meta = [] 174 | out_pose = [] 175 | 176 | with torch.no_grad(): 177 | for i,input_dict in enumerate(tqdm(self.test_loader)): 178 | 179 | weigths = input_dict['weigths'].float().cuda() 180 | pose = input_dict['pose'].float().cuda() 181 | gt = input_dict['gt'].float() 182 | spatial_token = input_dict['spatial_token'].long().cuda() 183 | temporal_token = input_dict['temporal_token'].long().cuda() 184 | meta = input_dict['meta'] 185 | output = model(pose,spatial_token,temporal_token) 186 | 187 | err_l1 = L1_err(output.cpu(),gt) 188 | err_l2 = L2_err(output.cpu(),gt) 189 | all_err_l1.extend(err_l1) 190 | all_err_l2.extend(err_l2) 191 | all_meta.extend(meta) 192 | out_pose.extend(output.cpu().numpy()) 193 | 194 | L1_auc, L1_norm_auc = compute_auc(all_err_l1,all_err_l1,all_meta,0, 0,self.dataset_name) 195 | L2_auc, L2_norm_auc = compute_auc(all_err_l2,all_err_l2,all_meta,0, 0,self.dataset_name) 196 | self.logger.info('Best AUC under L1 Err: {}'.format(str(round(L1_auc,4)*100))) 197 | self.logger.info('Best AUC under L2 Err: {}'.format(str(round(L2_auc,4)*100))) 198 | self.logger.info('Best AUC under Norm L1 Err: {}'.format(str(round(L1_norm_auc,4)*100))) 199 | self.logger.info('Best AUC under Norm L2 Err: {}'.format(str(round(L2_norm_auc,4)*100))) 200 | 201 | 202 | def train_eval(self): 203 | 204 | gpu_ids = range(torch.cuda.device_count()) 205 | self.logger.info('Number of GPUs in use {}'.format(gpu_ids)) 206 | 207 | 208 | model = MoPRL(tracklet_len=self.opt.tracklet_len,headless=self.opt.headless,pre_len=opt.pre_len,embed_dim=self.opt.embed_dim, 209 | spatial_depth=self.opt.spatial_depth, temporal_depth=self.opt.temporal_depth, num_joints=self.num_joints).cuda() 210 | 211 | total_steps = len(self.train_loader)*self.opt.epochs 212 | optimizer = AdamW(model.parameters(), lr=opt.lr_rate, eps = 1e-8) 213 | scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = 1000, num_training_steps = total_steps) 214 | 215 | self.logger.info(self.jobname) 216 | 217 | iteration = 0 218 | if self.load_pretrain_model: 219 | model_name = self.model_save_dir + '/{:06d}_model.pth.tar'.format(self.iter_to_load) 220 | self.logger.info("loading model from {}".format(model_name)) 221 | state_dict = torch.load(model_name) 222 | model.load_state_dict(state_dict['model']) 223 | optimizer.load_state_dict(state_dict['optimizer']) 224 | iteration = self.iter_to_load + 1 225 | 226 | tmp = sum(p.numel() for p in model.parameters()) 227 | 228 | self.logger.info('model paras sum: {}'.format(tmp)) 229 | 230 | self.logger.info('Start Training!') 231 | 232 | for epoch in range(self.opt.epochs): 233 | 234 | model.train() 235 | iteration = self.train_batch(model,optimizer,epoch,iteration,scheduler=scheduler) 236 | 237 | self.logger.info('End Training!') 238 | 239 | if __name__ == '__main__': 240 | 241 | opt = parse_opts() 242 | print (opt) 243 | 244 | os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.gpu) 245 | 246 | torch.manual_seed(opt.seed) 247 | torch.cuda.manual_seed(opt.seed) 248 | torch.cuda.manual_seed_all(opt.seed) 249 | np.random.seed(opt.seed) 250 | torch.backends.cudnn.benchmark = False 251 | torch.backends.cudnn.deterministic = True 252 | random.seed(opt.seed) 253 | 254 | pipeline = Train_Eval_Inference(opt) 255 | 256 | if opt.inference: 257 | pipeline.inference() 258 | else: 259 | pipeline.train_eval() -------------------------------------------------------------------------------- /src/datasets/Corridor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import torch 4 | import copy 5 | import numpy as np 6 | from tqdm import tqdm 7 | from torch.utils.data import Dataset 8 | import glob 9 | 10 | import sys 11 | sys.path.append("../") 12 | from utils.load_save import load_json, write_json 13 | from utils.normalize import * 14 | 15 | POSE_META_FILE = 'pose_meta_{}_length{}_stride{}.json' 16 | POSE_DATA_FILE = 'pose_data_{}_length{}_stride{}.json' 17 | 18 | class Corridor(Dataset): 19 | def __init__(self, pose_dir, split='train', tracklet_len=8 , stride=2, pre_len=1, 20 | normalize_tracklet=True, normalize_score=True, normalize_pose=True, 21 | embed_dim=128, 22 | mask_rec=True,fusion_type='div',motion_type='rayleigh',mask_pro=0.15): 23 | 24 | self.pose_dir = pose_dir 25 | self.split = split 26 | self.tracklet_len = tracklet_len 27 | self.stride = stride 28 | self.frame_width = 1920 29 | self.frame_height = 1080 30 | self.scale_factor = 100 31 | self.mask_rec = mask_rec 32 | self.fusion_type = fusion_type #'none' #fusion_type 33 | self.motion_type = motion_type 34 | self.mask_pro = mask_pro 35 | self.pre_len = pre_len 36 | 37 | self.joints_num = 25 38 | self.type_token, self.spatial_token, self.temporal_token = self._gen_fixed_token_seq() 39 | self.meta_path = pose_dir + '/' + POSE_META_FILE.format(self.split,str(self.tracklet_len),str(self.stride)) 40 | self.tracklet_path = pose_dir + '/' + POSE_DATA_FILE.format(self.split,str(self.tracklet_len),str(self.stride)) 41 | 42 | self.normalize_tracklet = True # normalize_boxes 43 | self.normalize_score = normalize_score 44 | self.normalize_pose = True # False # normalize_pose 45 | 46 | self._load_tracklets() 47 | 48 | def __len__(self): 49 | return len(self.meta_data) 50 | 51 | def _gen_fixed_token_seq(self): 52 | 53 | type_token = [] 54 | spatial_token = [] 55 | temporal_token = [] 56 | single_type_tok = [0,0] + [1 for n in range(self.joints_num)] 57 | 58 | for i in range(self.tracklet_len): 59 | type_token.extend(single_type_tok) 60 | for j in range(self.joints_num): 61 | spatial_token.append(j) 62 | temporal_token.append(i) 63 | 64 | return torch.tensor(type_token), torch.tensor(spatial_token), torch.tensor(temporal_token) 65 | 66 | def _load_tracklets(self): 67 | 68 | if os.path.exists(self.tracklet_path) and os.path.exists(self.meta_path): 69 | print('Load {} Traclets from saved files, Traclet Length {}, Stride {}'.format(self.split, self.tracklet_len, self.stride)) 70 | self.meta_data, self.tracklet_data = self._lazy_load_tracklets() 71 | else: 72 | print('Load {} Traclets from scratch, Traclet Length {}, Stride {}'.format(self.split, self.tracklet_len, self.stride)) 73 | self.meta_data, self.tracklet_data = self._scratch_load_tracklets() 74 | 75 | def _lazy_load_tracklets(self): 76 | 77 | return load_json(self.meta_path), load_json(self.tracklet_path) 78 | 79 | def _scratch_load_tracklets(self): 80 | 81 | meta_data = [] 82 | tracklet_data = [] 83 | base_dir = self.pose_dir+'/'+self.split+'/' 84 | all_npy = glob.glob(os.path.join(base_dir, "*.npy")) 85 | logging.info('Processing raw traclets') 86 | filter_less_than = self.tracklet_len * self.stride 87 | 88 | for file in tqdm(all_npy): 89 | 90 | track = np.load(file) 91 | video_id, pid = file.split('/')[-1].split('_') 92 | if len(track)1e-10, y>1e-10) 146 | no_valid = np.logical_and(x<1e-10, y<1e-10) 147 | valid_sum = valid.sum(axis=1) 148 | 149 | valid_sum[valid_sum < 1] = 1 150 | 151 | 152 | x_mean = (x.sum(axis=1)/valid_sum)[:, np.newaxis] 153 | y_mean = (y.sum(axis=1)/valid_sum)[:, np.newaxis] 154 | 155 | tmp = np.zeros_like(x) 156 | tmp[no_valid] = 1 157 | 158 | x += tmp*x_mean 159 | y += tmp*y_mean 160 | 161 | if normalize: 162 | x, y = normalize_pose(x,y) 163 | 164 | if isinstance(x,list): 165 | x, y = np.array(x), np.array(y) 166 | 167 | x[no_valid] = 0 168 | y[no_valid] = 0 169 | 170 | x = np.expand_dims(x,-1) 171 | y = np.expand_dims(y,-1) 172 | pose = np.concatenate((x,y),axis=-1).tolist() 173 | 174 | # (T,17,2) 175 | 176 | return pose 177 | 178 | def _extract_poses_boxes(self,tracklet,normalize_boxes=True,normalize_poses=False): 179 | 180 | if isinstance(tracklet,list): 181 | tracklet = np.array(tracklet) 182 | x = tracklet[:, :25] 183 | y = tracklet[:, 25:50] 184 | 185 | valid = np.logical_or(x>1e-10, y>1e-10) 186 | no_valid = np.logical_and(x<1e-10, y<1e-10) 187 | valid_sum = valid.sum(axis=1) 188 | 189 | valid_sum[valid_sum < 1] = 1 190 | 191 | 192 | x_mean = (x.sum(axis=1)/valid_sum)[:, np.newaxis] 193 | y_mean = (y.sum(axis=1)/valid_sum)[:, np.newaxis] 194 | 195 | tmp = np.zeros_like(x) 196 | tmp[no_valid] = 1 197 | 198 | x += tmp*x_mean 199 | y += tmp*y_mean 200 | 201 | if normalize_boxes: 202 | box_xy_max = [[x[i, :].max()/self.frame_width,y[i, :].max()/self.frame_height] for i in range(len(x))] 203 | box_xy_min = [[x[i, :].min()/self.frame_width,y[i, :].min()/self.frame_height] for i in range(len(y))] 204 | else: 205 | box_xy_max = [[x[i, :].max(),y[i, :].max()] for i in range(len(x))] 206 | box_xy_min = [[x[i, :].min(),y[i, :].min()] for i in range(len(y))] 207 | 208 | if normalize_poses: 209 | x, y = normalize_pose(x,y) 210 | else: 211 | x, y = center_pose(x, y) 212 | 213 | if isinstance(x,list): 214 | x, y = np.array(x), np.array(y) 215 | 216 | x[no_valid] = 0 217 | y[no_valid] = 0 218 | 219 | # w = np.abs(np.array(box_xy_max)[:,0] - np.array(box_xy_min)[:,0]).max() 220 | # h = np.abs(np.array(box_xy_max)[:,1] - np.array(box_xy_min)[:,1]).max() 221 | # x = x*w 222 | # y = y*h 223 | 224 | x = np.expand_dims(x,-1) 225 | y = np.expand_dims(y,-1) 226 | pose = np.concatenate((x,y),axis=-1).tolist() 227 | 228 | # (T,17,2) 229 | 230 | return pose, box_xy_max, box_xy_min 231 | 232 | def _inters_factor(self, v): 233 | 234 | if self.motion_type == 'gaussian': 235 | 236 | sigma = 0.18917838310469845 237 | mu = 0.09870275102403338 238 | factor = np.exp(-(np.linalg.norm(v-mu))**2/(2*(sigma**2))) 239 | 240 | if self.motion_type == 'rayleigh': 241 | 242 | sigma = 0.0202 243 | con = 0.0142 244 | factor = v * np.exp(-(v**2)/(2*(sigma**2))) / con 245 | 246 | if self.motion_type == 'uniform': 247 | 248 | factor = 0.5 249 | 250 | if self.motion_type == 'none': 251 | 252 | factor = 1 253 | 254 | return factor*0.7 + 0.3 # avoid zero 255 | 256 | def merge(self,pose,factor): 257 | 258 | if self.fusion_type == 'div': 259 | return pose / factor 260 | if self.fusion_type == 'add': 261 | return pose + factor 262 | if self.fusion_type == 'mul': 263 | return pose * factor 264 | 265 | def _gen_rec_mask(self,mask,prob=0.15): 266 | 267 | ref = torch.ones_like(torch.tensor(mask)) 268 | masked_indices = torch.bernoulli(torch.full(ref.shape, prob)).bool() 269 | ref[masked_indices] = 0 270 | 271 | return ref.tolist() 272 | 273 | def _flat_input(self,poses, boxes_max, boxes_min, scores): 274 | 275 | assert len(poses) == len(boxes_max) 276 | assert len(boxes_max) == len(boxes_min) 277 | assert len(poses) == len(scores) 278 | 279 | pose_fusion = [] 280 | 281 | weights = [] 282 | inters = [] 283 | poses_np = np.array(poses) 284 | boxes_max_np = np.array(boxes_max) 285 | boxes_min_np = np.array(boxes_min) 286 | 287 | for i in range(len(poses_np)-1): 288 | v = np.linalg.norm((boxes_max_np[i] + boxes_min_np[i])/2 - (boxes_max_np[(i+1)] + boxes_min_np[(i+1)])/2) 289 | v_norm = v/((boxes_max_np[i] - boxes_min_np[i] + 1e-6).mean()) 290 | inters.append(self._inters_factor(v_norm)) 291 | 292 | inters.append(inters[len(poses_np)-2]) 293 | 294 | # inters = [max(inters)] * len(poses_np) 295 | 296 | pose_fusion = self.merge(poses_np[0],inters[0])[np.newaxis, :, :] 297 | weights.extend(scores[0]) 298 | ## begin 299 | for i in range(len(poses)-1): 300 | pose_fusion = np.concatenate((pose_fusion, (self.merge(poses_np[i+1],inters[i+1]))[np.newaxis, :, :]), axis = 0) 301 | weights.extend(scores[i+1]) 302 | 303 | return weights, pose_fusion.tolist() 304 | 305 | def __getitem__(self, idx): 306 | 307 | meta = self.meta_data[idx] 308 | tracklet = self.tracklet_data[idx] 309 | # boxes_max, boxes_min = self._extract_boxes(tracklet,self.normalize_tracklet) 310 | scores = self._extract_conf_score(tracklet,self.normalize_score) 311 | # poses = self._extract_poses(tracklet,self.normalize_pose) 312 | poses, boxes_max, boxes_min = self._extract_poses_boxes(tracklet,self.normalize_tracklet,self.normalize_pose) 313 | 314 | poses_gt = copy.deepcopy(poses) 315 | weights, pose_fusion = self._flat_input(poses, boxes_max, boxes_min,scores) 316 | pose_fusion = (torch.tensor(pose_fusion)*self.scale_factor).to(torch.int32) 317 | 318 | weights = torch.tensor(weights) 319 | poses_gt = (torch.tensor(poses_gt)*self.scale_factor).to(torch.int32) 320 | gt = poses_gt.reshape(-1,2) 321 | #poses_gt = torch.chunk(poses_gt,self.tracklet_len,0) 322 | weights = torch.chunk(weights,self.tracklet_len,0) 323 | 324 | weights = torch.cat([weights[i] for i in range(len(weights))],dim=0) 325 | #gt = torch.cat([poses_gt[i] for i in range(len(poses_gt))],dim=0) 326 | 327 | spatial_token = self.spatial_token.reshape(self.tracklet_len,-1) 328 | temporal_token = self.temporal_token.reshape(self.tracklet_len,-1) 329 | 330 | if self.mask_rec: 331 | mask = [1 for i in range((self.tracklet_len)*(self.joints_num))] 332 | mask = self._gen_rec_mask(mask) 333 | mask = torch.tensor(mask) 334 | #rint(mask) 335 | mask_ = torch.cat((mask.unsqueeze(0),mask.unsqueeze(0)),dim=0).permute(1,0) 336 | mask_index = mask_==0 337 | mask_index = mask_index.reshape(self.tracklet_len,self.joints_num,-1) 338 | pose_fusion[mask_index] = 0 339 | 340 | input_dict = { 341 | 'meta': meta, 342 | 'pose': pose_fusion, 343 | 'gt': gt, 344 | 'weigths': weights, 345 | 'spatial_token':spatial_token, 346 | 'temporal_token':temporal_token, 347 | 'frame_width':self.frame_width, 348 | 'frame_height':self.frame_height, 349 | 'scale_factor': self.scale_factor, 350 | 'joints_num':self.joints_num 351 | } 352 | 353 | return input_dict 354 | 355 | if __name__ == '__main__': 356 | 357 | from dataset_path import * 358 | import cv2 359 | from torch.utils.data import DataLoader 360 | sys.path.append(".") 361 | from utils.visualization import visualize_local_tracklets 362 | #from utils.metrics import recover_poses 363 | 364 | debug_Dataset = Corridor(pose_dir=Corridor_Pose_Dir,split='train',tracklet_len=8 , stride=2, pre_len=4) 365 | dataloader = DataLoader(debug_Dataset, batch_size=2, shuffle=True, num_workers=0) 366 | VIS = False 367 | 368 | for i, input_dict in enumerate(tqdm(dataloader)): 369 | 370 | print(input_dict['pose'].size()) 371 | print(input_dict['weigths'].size()) 372 | print(input_dict['gt'].size()) 373 | print(input_dict['spatial_token'].size()) 374 | print(input_dict['temporal_token'].size()) 375 | print(input_dict['meta']) 376 | 377 | print("----------",i,"-------------") 378 | 379 | if i>10: 380 | break 381 | -------------------------------------------------------------------------------- /src/datasets/ShanghaiTech_AlphaPose.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from sklearn.utils import shuffle 4 | import torch 5 | import copy 6 | import numpy as np 7 | from tqdm import tqdm 8 | from torch.utils.data import Dataset 9 | import random 10 | 11 | import sys 12 | sys.path.append("../../src") 13 | from utils.load_save import load_json, write_json 14 | from utils.normalize import normalize_score,normalize_pose 15 | from datasets.dataset_path import ShanghaiTech_Lable_Dir 16 | 17 | POSE_META_FILE = 'pose_meta_{}_length{}_stride{}.json' 18 | POSE_DATA_FILE = 'pose_data_{}_length{}_stride{}.json' 19 | SHT_LABEL = ShanghaiTech_Lable_Dir 20 | 21 | class ShanghaiTech_AlphaPose(Dataset): 22 | def __init__(self, pose_dir, split='train', tracklet_len=8 , stride=2, pre_len=1,head_less=False, 23 | normalize_tracklet=True, normalize_score=True, 24 | normalize_pose=True,embed_dim=128, 25 | mask_rec=True,fusion_type='div',motion_type='rayleigh', mask_pro=0.15, noise_factor = 0.1): 26 | 27 | self.pose_dir = pose_dir 28 | self.split = split 29 | self.head_less = head_less 30 | self.tracklet_len = tracklet_len 31 | self.stride = stride 32 | self.frame_width = 856 33 | self.frame_height = 480 34 | self.scale_factor = 100 35 | self.mask_rec = mask_rec 36 | self.fusion_type = fusion_type 37 | self.motion_type = motion_type 38 | self.mask_pro = mask_pro 39 | self.noise_factor = noise_factor 40 | 41 | if self.head_less: 42 | self.joints_num =14 43 | else: 44 | self.joints_num =17 45 | 46 | self.pre_len = pre_len 47 | 48 | self.type_token, self.spatial_token, self.temporal_token = self._gen_fixed_token_seq() 49 | self.meta_path = pose_dir + '/' + POSE_META_FILE.format(self.split,str(self.tracklet_len),str(self.stride)) 50 | self.tracklet_path = pose_dir + '/' + POSE_DATA_FILE.format(self.split,str(self.tracklet_len),str(self.stride)) 51 | 52 | self.normalize_tracklet = normalize_tracklet 53 | self.normalize_score = normalize_score 54 | self.normalize_pose = normalize_pose 55 | 56 | self._load_tracklets() 57 | 58 | if self.split == 'train': 59 | self.add_noise(self.noise_factor) 60 | 61 | def add_noise(self, noise_factor): 62 | 63 | print('adding noise') 64 | test_meta_path = self.pose_dir + '/' + POSE_META_FILE.format('test',str(self.tracklet_len),str(self.stride)) 65 | test_tracklet_path = self.pose_dir + '/' + POSE_DATA_FILE.format('test',str(self.tracklet_len),str(self.stride)) 66 | test_meta, test_tracklet = load_json(test_meta_path), load_json(test_tracklet_path) 67 | label_dic = {} 68 | abnormal_list = [] 69 | 70 | label_filenames = os.listdir(SHT_LABEL) 71 | for filename in label_filenames: 72 | filepath = os.path.join(SHT_LABEL, filename) 73 | label_dic[filename[:-4]] = np.load(filepath) 74 | 75 | for i, name in enumerate(test_meta): 76 | main, sub ,frame = name.split('_') 77 | scene = main + '_' + sub 78 | frame = int(frame) 79 | if label_dic[scene][frame] == 1: 80 | abnormal_list.append(i) 81 | 82 | print('total normal tracklet data is {}'.format(len(self.meta_data))) 83 | print('total abnormal tracklet data is {}'.format(len(abnormal_list))) 84 | add_noise_index = random.sample(abnormal_list, int(len(abnormal_list)*noise_factor)) 85 | print('adding abnormal tracklet data is {}'.format(len(add_noise_index))) 86 | 87 | # for i in range(1000): 88 | # main, sub ,frame = test_meta[add_noise_index[i]].split('_') 89 | # scene = main + '_' + sub 90 | # print(label_dic[scene][int(frame)]) 91 | 92 | # sys.exit(0) 93 | 94 | for index in add_noise_index: 95 | self.meta_data.append(test_meta[index]) 96 | self.tracklet_data.append(test_tracklet[index]) 97 | 98 | del test_meta 99 | del test_tracklet 100 | 101 | 102 | def __len__(self): 103 | return len(self.meta_data) 104 | 105 | def _gen_fixed_token_seq(self): 106 | 107 | type_token = [] 108 | spatial_token = [] 109 | temporal_token = [] 110 | single_type_tok = [0,0] + [1 for n in range(self.joints_num)] 111 | 112 | for i in range(self.tracklet_len): 113 | type_token.extend(single_type_tok) 114 | for j in range(self.joints_num): 115 | spatial_token.append(j) 116 | temporal_token.append(i) 117 | 118 | return torch.tensor(type_token), torch.tensor(spatial_token), torch.tensor(temporal_token) 119 | 120 | def _load_tracklets(self): 121 | 122 | if os.path.exists(self.tracklet_path) and os.path.exists(self.meta_path): 123 | print('Load {} Traclets from saved files, Traclet Length {}, Stride {}'.format(self.split, self.tracklet_len, self.stride)) 124 | self.meta_data, self.tracklet_data = self._lazy_load_tracklets() 125 | else: 126 | print('Load {} Traclets from scratch, Traclet Length {}, Stride {}'.format(self.split, self.tracklet_len, self.stride)) 127 | self.meta_data, self.tracklet_data = self._scratch_load_tracklets() 128 | 129 | def _lazy_load_tracklets(self): 130 | 131 | return load_json(self.meta_path), load_json(self.tracklet_path) 132 | 133 | def _scratch_load_tracklets(self): 134 | 135 | meta_data = [] 136 | tracklet_data = [] 137 | base_dir = self.pose_dir+'/'+self.split+'/tracked_person/' 138 | all_json = os.listdir(base_dir) 139 | logging.info('Processing raw traclets') 140 | filter_less_than = self.tracklet_len * self.stride 141 | 142 | for file in tqdm(all_json): 143 | scene_tracks = load_json(base_dir+file) 144 | person_num = len(scene_tracks.keys()) 145 | for p in scene_tracks.keys(): 146 | tracks = scene_tracks[p] 147 | frame_num = len(tracks.keys()) 148 | if frame_num < filter_less_than: 149 | continue 150 | 151 | if self.split == 'train': 152 | if frame_num < filter_less_than*2: 153 | continue 154 | 155 | frame_index = list(sorted(tracks.keys())) 156 | for i in range(len(frame_index)-self.tracklet_len*self.stride): 157 | select_frame = frame_index[i : i+self.tracklet_len*self.stride : self.stride] 158 | simple_pose = [ np.around(np.array(tracks[f]['keypoints']),2).tolist() for f in select_frame ] 159 | meta_data.append(file.split('_')[0]+'_'+file.split('_')[1]+'_'+select_frame[-1]) 160 | tracklet_data.append(simple_pose) 161 | 162 | print('Process Done. Sample amount: ', len(meta_data)) 163 | write_json(meta_data,self.meta_path) 164 | print('Save meta data Done') 165 | write_json(tracklet_data,self.tracklet_path) 166 | print('Save data Done') 167 | 168 | return meta_data,tracklet_data 169 | 170 | # tracklet[8,17*3] (x,y,c) 171 | 172 | def _extract_boxes(self,tracklet,normalize=True): 173 | 174 | if normalize: 175 | box_xy_max = [[max(pose[::3])/self.frame_width,max(pose[1::3])/self.frame_height] for pose in tracklet] 176 | box_xy_min = [[min(pose[::3])/self.frame_width,min(pose[1::3])/self.frame_height] for pose in tracklet] 177 | else: 178 | box_xy_max = [[max(pose[::3]),max(pose[1::3])] for pose in tracklet] 179 | box_xy_min = [[min(pose[::3]),min(pose[1::3])] for pose in tracklet] 180 | 181 | return box_xy_max , box_xy_min 182 | 183 | def _extract_conf_score(self,tracklet,normalize=True): 184 | 185 | scores = [] 186 | for pose in tracklet: 187 | pose_score = np.array(pose[2::3]) 188 | if normalize: 189 | pose_score = normalize_score(pose_score) 190 | scores.append(pose_score.tolist()) 191 | 192 | return scores 193 | 194 | def _extract_poses(self,tracklet,normalize=True): 195 | 196 | if isinstance(tracklet,list): 197 | tracklet = np.array(tracklet) 198 | x = tracklet[:, ::3] 199 | y = tracklet[:, 1::3] 200 | 201 | if normalize: 202 | x, y = normalize_pose(x,y) 203 | 204 | if isinstance(x,list): 205 | x, y = np.array(x), np.array(y) 206 | 207 | x = np.expand_dims(x,-1) 208 | y = np.expand_dims(y,-1) 209 | pose = np.concatenate((x,y),axis=-1).tolist() 210 | 211 | # (T,17,2) 212 | 213 | return pose 214 | 215 | def _inters_factor(self, v): 216 | 217 | if self.motion_type == 'gaussian': 218 | 219 | sigma = 0.18917838310469845 220 | mu = 0.09870275102403338 221 | factor = np.exp(-(np.linalg.norm(v-mu))**2/(2*(sigma**2))) 222 | 223 | if self.motion_type == 'rayleigh': 224 | 225 | if self.tracklet_len == 8 and self.stride == 2: 226 | 227 | sigma = 0.0464 228 | con = 0.0299 229 | 230 | factor = v * np.exp(-(v**2)/(2*(sigma**2))) / con 231 | 232 | if self.motion_type == 'uniform': 233 | 234 | factor = 0.5 235 | 236 | if self.motion_type == 'none': 237 | 238 | factor = 1 239 | 240 | if self.motion_type == 'random': 241 | 242 | factor = 1 + 0.5*np.random.rand() 243 | 244 | return factor*0.7 + 0.3 245 | 246 | def merge(self,pose,factor): 247 | 248 | if self.fusion_type == 'div': 249 | return pose / factor 250 | if self.fusion_type == 'add': 251 | return pose + factor 252 | if self.fusion_type == 'mul': 253 | return pose * factor 254 | 255 | def _gen_rec_mask(self,mask,prob=0.15): 256 | 257 | ref = torch.ones_like(torch.tensor(mask)) 258 | masked_indices = torch.bernoulli(torch.full(ref.shape, prob)).bool() 259 | ref[masked_indices] = 0 260 | 261 | return ref.tolist() 262 | 263 | def _flat_input(self,poses, boxes_max, boxes_min, scores): 264 | 265 | assert len(poses) == len(boxes_max) 266 | assert len(boxes_max) == len(boxes_min) 267 | assert len(poses) == len(scores) 268 | 269 | pose_fusion = [] 270 | 271 | weights = [] 272 | inters = [] 273 | poses_np = np.array(poses) 274 | boxes_max_np = np.array(boxes_max) 275 | boxes_min_np = np.array(boxes_min) 276 | 277 | for i in range(len(poses_np)-1): 278 | v = np.linalg.norm((boxes_max_np[i] + boxes_min_np[i])/2 - (boxes_max_np[(i+1)] + boxes_min_np[(i+1)])/2) 279 | v_norm = v/((boxes_max_np[i] - boxes_min_np[i]).mean()) 280 | inters.append(self._inters_factor(v_norm)) 281 | 282 | inters.append(inters[len(poses_np)-2]) 283 | pose_fusion = self.merge(poses_np[0],inters[0])[np.newaxis, :, :] 284 | weights.extend(scores[0]) 285 | ## begin 286 | for i in range(len(poses)-1): 287 | pose_fusion = np.concatenate((pose_fusion, (self.merge(poses_np[i+1],inters[i+1]))[np.newaxis, :, :]), axis = 0) 288 | weights.extend(scores[i+1]) 289 | 290 | return weights, pose_fusion.tolist() 291 | 292 | def __getitem__(self, idx): 293 | 294 | meta = self.meta_data[idx] 295 | tracklet = self.tracklet_data[idx] 296 | boxes_max, boxes_min = self._extract_boxes(tracklet,self.normalize_tracklet) 297 | scores = self._extract_conf_score(tracklet,self.normalize_score) 298 | poses = self._extract_poses(tracklet,self.normalize_pose) 299 | 300 | poses_gt = copy.deepcopy(poses) 301 | weights, pose_fusion = self._flat_input(poses, boxes_max, boxes_min,scores) 302 | pose_fusion = (torch.tensor(pose_fusion)*self.scale_factor).to(torch.int32) 303 | 304 | weights = torch.tensor(weights) 305 | poses_gt = (torch.tensor(poses_gt)*self.scale_factor).to(torch.int32) 306 | gt = poses_gt.reshape(-1,2) 307 | #poses_gt = torch.chunk(poses_gt,self.tracklet_len,0) 308 | weights = torch.chunk(weights,self.tracklet_len,0) 309 | 310 | weights = torch.cat([weights[i] for i in range(len(weights))],dim=0) 311 | #gt = torch.cat([poses_gt[i] for i in range(len(poses_gt))],dim=0) 312 | 313 | spatial_token = self.spatial_token.reshape(self.tracklet_len,-1) 314 | temporal_token = self.temporal_token.reshape(self.tracklet_len,-1) 315 | 316 | if self.mask_rec and self.split=='train': 317 | mask = [1 for i in range((self.tracklet_len)*(self.joints_num))] 318 | mask = self._gen_rec_mask(mask,self.mask_pro) 319 | mask = torch.tensor(mask) 320 | #rint(mask) 321 | mask_ = torch.cat((mask.unsqueeze(0),mask.unsqueeze(0)),dim=0).permute(1,0) 322 | mask_index = mask_==0 323 | mask_index = mask_index.reshape(self.tracklet_len,self.joints_num,-1) 324 | pose_fusion[mask_index] = 0 325 | 326 | if self.pre_len>0 : 327 | mask = torch.tensor([1 for i in range((self.tracklet_len-self.pre_len)*(self.joints_num))] + [0 for i in range(self.joints_num*self.pre_len)]) 328 | mask_ = torch.cat((mask.unsqueeze(0),mask.unsqueeze(0)),dim=0).permute(1,0) 329 | mask_ = mask_.reshape(self.tracklet_len, self.joints_num, 2) 330 | mask_index = mask_==0 331 | pose_fusion[mask_index] = 0 332 | 333 | input_dict = { 334 | 'meta': meta, 335 | 'pose': pose_fusion, 336 | 'gt': gt, 337 | 'weigths': weights, 338 | 'spatial_token':spatial_token, 339 | 'temporal_token':temporal_token, 340 | 'frame_width':self.frame_width, 341 | 'frame_height':self.frame_height, 342 | 'scale_factor': self.scale_factor, 343 | 'joints_num':self.joints_num 344 | } 345 | 346 | return input_dict 347 | 348 | if __name__ == '__main__': 349 | 350 | from dataset_path import * 351 | # import cv2 352 | from torch.utils.data import DataLoader 353 | sys.path.append(".") 354 | from utils.visualization import visualize_local_tracklets 355 | #from utils.metrics import recover_poses 356 | 357 | debug_Dataset = ShanghaiTech_AlphaPose(pose_dir=ShanghaiTech_AlphaPose_Dir,split='test',tracklet_len=8 , stride=2, head_less=False,pre_len=4) 358 | dataloader = DataLoader(debug_Dataset, batch_size=2, shuffle=True, num_workers=0) 359 | VIS = False 360 | 361 | for i, input_dict in enumerate(tqdm(dataloader)): 362 | 363 | # print(input_dict['MPP_GT'].size()) 364 | # print(input_dict['MPR_GT'].size()) 365 | #print(input_dict['pose']) 366 | print(input_dict['spatial_token'].size()) 367 | print(input_dict['temporal_token'].size()) 368 | print(input_dict['meta']) 369 | #recovered_poses = recover_poses(input_dict['MPP_GT'],input_dict['MTP_GT'],'ShanghaiTech') 370 | #print('recovered_poses',recovered_poses.shape) 371 | 372 | print("----------",i,"-------------") 373 | 374 | if i>10: 375 | break 376 | 377 | 378 | -------------------------------------------------------------------------------- /src/datasets/UCF_crime.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from sklearn.utils import shuffle 4 | import torch 5 | import copy 6 | import numpy as np 7 | from tqdm import tqdm 8 | from torch.utils.data import Dataset 9 | import random 10 | 11 | import sys 12 | sys.path.append("../../src") 13 | from utils.load_save import load_json, write_json 14 | from utils.normalize import normalize_score,normalize_pose 15 | 16 | POSE_META_FILE = 'processed/pose_meta_{}_length{}_stride{}.json' 17 | POSE_DATA_FILE = 'processed/pose_data_{}_length{}_stride{}.json' 18 | 19 | class UCF_crime(Dataset): 20 | def __init__(self, pose_dir, split='train', tracklet_len=8, stride=1, pre_len=0, head_less=False, 21 | normalize_tracklet=True, normalize_score=True, 22 | normalize_pose=True, embed_dim=128, 23 | mask_rec=True,fusion_type='div',motion_type='rayleigh', mask_pro=0.15): 24 | 25 | self.pose_dir = pose_dir 26 | self.split = split 27 | self.head_less = head_less 28 | self.tracklet_len = tracklet_len 29 | self.stride = stride 30 | ## TO DO 31 | self.frame_width = 320 32 | self.frame_height = 240 33 | ## TO DO END 34 | self.scale_factor = 100 35 | self.mask_rec = mask_rec 36 | self.fusion_type = fusion_type 37 | self.motion_type = motion_type 38 | self.mask_pro = mask_pro 39 | 40 | if self.head_less: 41 | self.joints_num =14 42 | else: 43 | self.joints_num =17 44 | 45 | self.pre_len = pre_len 46 | 47 | self.type_token, self.spatial_token, self.temporal_token = self._gen_fixed_token_seq() 48 | self.meta_path = os.path.join(pose_dir, POSE_META_FILE.format(self.split,str(self.tracklet_len),str(self.stride))) 49 | self.tracklet_path = os.path.join(pose_dir, POSE_DATA_FILE.format(self.split,str(self.tracklet_len),str(self.stride))) 50 | 51 | self.propossed_path = os.path.dirname(self.meta_path) 52 | os.makedirs(self.propossed_path, exist_ok=True) 53 | 54 | self.normalize_tracklet = normalize_tracklet 55 | self.normalize_score = normalize_score 56 | self.normalize_pose = normalize_pose 57 | 58 | self._load_tracklets() 59 | print('dataset length: {}'.format(self.__len__())) 60 | 61 | def __len__(self): 62 | return len(self.meta_data) 63 | 64 | def _gen_fixed_token_seq(self): 65 | 66 | type_token = [] 67 | spatial_token = [] 68 | temporal_token = [] 69 | single_type_tok = [0,0] + [1 for n in range(self.joints_num)] 70 | 71 | for i in range(self.tracklet_len): 72 | type_token.extend(single_type_tok) 73 | for j in range(self.joints_num): 74 | spatial_token.append(j) 75 | temporal_token.append(i) 76 | 77 | return torch.tensor(type_token), torch.tensor(spatial_token), torch.tensor(temporal_token) 78 | 79 | def _load_tracklets(self): 80 | 81 | if os.path.exists(self.tracklet_path) and os.path.exists(self.meta_path): 82 | print('Load {} Traclets from saved files, Traclet Length {}, Stride {}'.format(self.split, self.tracklet_len, self.stride)) 83 | self.meta_data, self.tracklet_data = self._lazy_load_tracklets() 84 | else: 85 | print('Load {} Traclets from scratch, Traclet Length {}, Stride {}'.format(self.split, self.tracklet_len, self.stride)) 86 | self.meta_data, self.tracklet_data = self._scratch_load_tracklets() 87 | 88 | def _lazy_load_tracklets(self): 89 | 90 | return load_json(self.meta_path), load_json(self.tracklet_path) 91 | 92 | ## TO DO 93 | 94 | def _scratch_load_tracklets(self): 95 | 96 | meta_data = [] 97 | tracklet_data = [] 98 | 99 | split_info = load_json(os.path.join(self.pose_dir, 'train_test_split.json')) 100 | name_list = split_info[self.split] 101 | 102 | base_dir = os.path.join(self.pose_dir, 'ucf_samples') 103 | logging.info('Processing raw traclets') 104 | filter_less_than = self.tracklet_len * self.stride 105 | 106 | for name in tqdm(name_list): 107 | filepath = os.path.join(base_dir, name[:-4], 'alphapose-results.json') 108 | if not os.path.exists(filepath): 109 | continue 110 | origin_tracks = load_json(filepath) 111 | person_tracks_frame = {} 112 | person_tracks_pose = {} 113 | person_tracks_frame_exist = {} 114 | person_frame_for_search = {} 115 | for detected in origin_tracks: 116 | if detected['idx'] not in person_tracks_frame.keys(): 117 | person_frame_for_search[detected['idx']] = [] 118 | person_tracks_frame[detected['idx']] = [None]*1000000 119 | person_tracks_pose[detected['idx']] = [None]*1000000 120 | person_tracks_frame_exist[detected['idx']] = np.zeros(1000000, dtype=bool) 121 | else: 122 | person_frame_for_search[detected['idx']].append(int(detected['image_id'][:-4])) 123 | person_tracks_frame[detected['idx']][int(detected['image_id'][:-4])] = detected['image_id'][:-4].rjust(4, '0') 124 | person_tracks_pose[detected['idx']][int(detected['image_id'][:-4])] = detected['keypoints'] 125 | person_tracks_frame_exist[detected['idx']][int(detected['image_id'][:-4])] = True 126 | 127 | # person_num = len(person_tracks.keys()) 128 | for p in person_frame_for_search.keys(): 129 | frame_num = len(person_frame_for_search[p]) 130 | if frame_num < filter_less_than: 131 | continue 132 | 133 | if self.split == 'train': 134 | if frame_num < filter_less_than*2: 135 | continue 136 | 137 | ### version1 138 | # for i in range(frame_num-self.tracklet_len*self.stride): 139 | # simple_pose = person_tracks_pose[p][i : i+self.tracklet_len*self.stride : self.stride] 140 | # meta_data.append(name+'_'+person_tracks_frame[p][i+(self.tracklet_len-1)*self.stride]) 141 | # tracklet_data.append(simple_pose) 142 | 143 | ### version2 144 | for j in range(frame_num): 145 | i = int(person_frame_for_search[p][j]) 146 | if np.all(person_tracks_frame_exist[p][i : i+self.tracklet_len*self.stride : self.stride]): 147 | simple_pose = person_tracks_pose[p][i : i+self.tracklet_len*self.stride : self.stride] 148 | meta_data.append(name+'_'+person_tracks_frame[p][i+(self.tracklet_len-1)*self.stride]) 149 | tracklet_data.append(simple_pose) 150 | 151 | print('Process Done. Sample amount: ', len(meta_data)) 152 | write_json(meta_data,self.meta_path) 153 | print('Save meta data Done') 154 | write_json(tracklet_data,self.tracklet_path) 155 | print('Save data Done') 156 | 157 | return meta_data,tracklet_data 158 | 159 | ## TO DO END 160 | 161 | # tracklet[8,17*3] (x,y,c) 162 | 163 | def _extract_boxes(self,tracklet,normalize=True): 164 | 165 | if normalize: 166 | box_xy_max = [[max(pose[::3])/self.frame_width,max(pose[1::3])/self.frame_height] for pose in tracklet] 167 | box_xy_min = [[min(pose[::3])/self.frame_width,min(pose[1::3])/self.frame_height] for pose in tracklet] 168 | else: 169 | box_xy_max = [[max(pose[::3]),max(pose[1::3])] for pose in tracklet] 170 | box_xy_min = [[min(pose[::3]),min(pose[1::3])] for pose in tracklet] 171 | 172 | return box_xy_max , box_xy_min 173 | 174 | def _extract_conf_score(self,tracklet,normalize=True): 175 | 176 | scores = [] 177 | for pose in tracklet: 178 | pose_score = np.array(pose[2::3]) 179 | if normalize: 180 | pose_score = normalize_score(pose_score) 181 | scores.append(pose_score.tolist()) 182 | 183 | return scores 184 | 185 | def _extract_poses(self,tracklet,normalize=True): 186 | 187 | if isinstance(tracklet,list): 188 | tracklet = np.array(tracklet) 189 | x = tracklet[:, ::3] 190 | y = tracklet[:, 1::3] 191 | 192 | if normalize: 193 | x, y = normalize_pose(x,y) 194 | 195 | if isinstance(x,list): 196 | x, y = np.array(x), np.array(y) 197 | 198 | x = np.expand_dims(x,-1) 199 | y = np.expand_dims(y,-1) 200 | pose = np.concatenate((x,y),axis=-1).tolist() 201 | 202 | # (T,17,2) 203 | 204 | return pose 205 | 206 | ## TO DO 207 | 208 | def _inters_factor(self, v): 209 | 210 | if self.motion_type == 'gaussian': 211 | 212 | # sigma = 0.18917838310469845 213 | # mu = 0.09870275102403338 214 | # factor = np.exp(-(np.linalg.norm(v-mu))**2/(2*(sigma**2))) 215 | pass 216 | 217 | if self.motion_type == 'rayleigh': 218 | 219 | if self.stride == 1: 220 | 221 | sigma = 0.008 222 | con = 0.0048 223 | 224 | if self.stride == 2: 225 | 226 | sigma = 0.009 227 | con = 0.0055 228 | 229 | factor = v * np.exp(-(v**2)/(2*(sigma**2))) / con 230 | 231 | if self.motion_type == 'uniform': 232 | 233 | factor = 0.5 234 | 235 | if self.motion_type == 'none': 236 | 237 | factor = 1 238 | 239 | if self.motion_type == 'random': 240 | 241 | factor = 1 + 0.5*np.random.rand() 242 | 243 | return factor*0.7 + 0.3 244 | 245 | ## TO DO END 246 | 247 | def merge(self,pose,factor): 248 | 249 | if self.fusion_type == 'div': 250 | return pose / factor 251 | if self.fusion_type == 'add': 252 | return pose + factor 253 | if self.fusion_type == 'mul': 254 | return pose * factor 255 | 256 | def _gen_rec_mask(self,mask,prob=0.15): 257 | 258 | ref = torch.ones_like(torch.tensor(mask)) 259 | masked_indices = torch.bernoulli(torch.full(ref.shape, prob)).bool() 260 | ref[masked_indices] = 0 261 | 262 | return ref.tolist() 263 | 264 | def _flat_input(self,poses, boxes_max, boxes_min, scores): 265 | 266 | assert len(poses) == len(boxes_max) 267 | assert len(boxes_max) == len(boxes_min) 268 | assert len(poses) == len(scores) 269 | 270 | pose_fusion = [] 271 | 272 | weights = [] 273 | inters = [] 274 | poses_np = np.array(poses) 275 | boxes_max_np = np.array(boxes_max) 276 | boxes_min_np = np.array(boxes_min) 277 | 278 | for i in range(len(poses_np)-1): 279 | v = np.linalg.norm((boxes_max_np[i] + boxes_min_np[i])/2 - (boxes_max_np[(i+1)] + boxes_min_np[(i+1)])/2) 280 | v_norm = v/((boxes_max_np[i] - boxes_min_np[i]).mean()) 281 | inters.append(self._inters_factor(v_norm)) 282 | 283 | inters.append(inters[len(poses_np)-2]) 284 | pose_fusion = self.merge(poses_np[0],inters[0])[np.newaxis, :, :] 285 | weights.extend(scores[0]) 286 | ## begin 287 | for i in range(len(poses)-1): 288 | pose_fusion = np.concatenate((pose_fusion, (self.merge(poses_np[i+1],inters[i+1]))[np.newaxis, :, :]), axis = 0) 289 | weights.extend(scores[i+1]) 290 | 291 | return weights, pose_fusion.tolist() 292 | 293 | def __getitem__(self, idx): 294 | 295 | meta = self.meta_data[idx] 296 | tracklet = self.tracklet_data[idx] 297 | boxes_max, boxes_min = self._extract_boxes(tracklet,self.normalize_tracklet) 298 | scores = self._extract_conf_score(tracklet,self.normalize_score) 299 | poses = self._extract_poses(tracklet,self.normalize_pose) 300 | 301 | poses_gt = copy.deepcopy(poses) 302 | weights, pose_fusion = self._flat_input(poses, boxes_max, boxes_min,scores) 303 | pose_fusion = (torch.tensor(pose_fusion)*self.scale_factor).to(torch.int32) 304 | 305 | weights = torch.tensor(weights) 306 | poses_gt = (torch.tensor(poses_gt)*self.scale_factor).to(torch.int32) 307 | gt = poses_gt.reshape(-1,2) 308 | #poses_gt = torch.chunk(poses_gt,self.tracklet_len,0) 309 | weights = torch.chunk(weights,self.tracklet_len,0) 310 | 311 | weights = torch.cat([weights[i] for i in range(len(weights))],dim=0) 312 | #gt = torch.cat([poses_gt[i] for i in range(len(poses_gt))],dim=0) 313 | 314 | spatial_token = self.spatial_token.reshape(self.tracklet_len,-1) 315 | temporal_token = self.temporal_token.reshape(self.tracklet_len,-1) 316 | 317 | if self.mask_rec and self.split=='train': 318 | mask = [1 for i in range((self.tracklet_len)*(self.joints_num))] 319 | mask = self._gen_rec_mask(mask,self.mask_pro) 320 | mask = torch.tensor(mask) 321 | #rint(mask) 322 | mask_ = torch.cat((mask.unsqueeze(0),mask.unsqueeze(0)),dim=0).permute(1,0) 323 | mask_index = mask_==0 324 | mask_index = mask_index.reshape(self.tracklet_len,self.joints_num,-1) 325 | pose_fusion[mask_index] = 0 326 | 327 | if self.pre_len>0 : 328 | mask = torch.tensor([1 for i in range((self.tracklet_len-self.pre_len)*(self.joints_num))] + [0 for i in range(self.joints_num*self.pre_len)]) 329 | mask_ = torch.cat((mask.unsqueeze(0),mask.unsqueeze(0)),dim=0).permute(1,0) 330 | mask_ = mask_.reshape(self.tracklet_len, self.joints_num, 2) 331 | mask_index = mask_==0 332 | pose_fusion[mask_index] = 0 333 | 334 | input_dict = { 335 | 'meta': meta, 336 | 'pose': pose_fusion, 337 | 'gt': gt, 338 | 'weigths': weights, 339 | 'spatial_token':spatial_token, 340 | 'temporal_token':temporal_token, 341 | 'frame_width':self.frame_width, 342 | 'frame_height':self.frame_height, 343 | 'scale_factor': self.scale_factor, 344 | 'joints_num':self.joints_num 345 | } 346 | 347 | return input_dict 348 | 349 | if __name__ == '__main__': 350 | 351 | from dataset_path import * 352 | # import cv2 353 | from torch.utils.data import DataLoader 354 | sys.path.append(".") 355 | from utils.visualization import visualize_local_tracklets 356 | #from utils.metrics import recover_poses 357 | 358 | debug_Dataset = UCF_crime(pose_dir=UCF_crime_Dir,split='test',tracklet_len=8 , stride=2, head_less=False,pre_len=4) 359 | dataloader = DataLoader(debug_Dataset, batch_size=2, shuffle=True, num_workers=0) 360 | VIS = False 361 | 362 | for i, input_dict in enumerate(tqdm(dataloader)): 363 | 364 | # print(input_dict['MPP_GT'].size()) 365 | # print(input_dict['MPR_GT'].size()) 366 | #print(input_dict['pose']) 367 | print(input_dict['spatial_token'].size()) 368 | print(input_dict['temporal_token'].size()) 369 | print(input_dict['meta']) 370 | #recovered_poses = recover_poses(input_dict['MPP_GT'],input_dict['MTP_GT'],'ShanghaiTech') 371 | #print('recovered_poses',recovered_poses.shape) 372 | 373 | print("----------",i,"-------------") 374 | 375 | if i>10: 376 | break 377 | 378 | 379 | --------------------------------------------------------------------------------