├── Dataset.py ├── config.py ├── conv_utils.py ├── inference.py ├── loss.py ├── model ├── __pycache__ │ ├── conv3D.cpython-310.pyc │ └── layer.cpython-310.pyc ├── conv3D.py └── layer.py ├── train.py └── yolov3 ├── detec_loc.py └── utils └── general.py /Dataset.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch 3 | import numpy as np 4 | import cv2 5 | import glob 6 | import os 7 | import scipy.io as scio 8 | from torch.utils.data import Dataset 9 | from multiprocessing import Process, freeze_support 10 | 11 | 12 | # frame 불러오기 13 | def np_load_frame(filename): 14 | img = cv2.imread(filename) 15 | return img 16 | 17 | class train_dataset(Dataset): 18 | 19 | def __init__(self, cfg): 20 | # list마다 각 folder의 이미지들이 넣어져있음 21 | self.training_videos = [] 22 | self.all_frames_training = [] 23 | 24 | # from cfg.train_data : 'data_root/' + dataset name + '/training/' + all folder 25 | all_folder = sorted(glob.glob(f'{cfg.train_data}/*')) 26 | all_folder_len = len(all_folder) 27 | for folder in all_folder[:int(all_folder_len*0.85 + 1)]: 28 | # root 속에 있는 모든 jpg 파일들 선택 29 | all_imgs = glob.glob(f'{folder}/*.jpg') 30 | all_imgs.sort() 31 | self.training_videos.append(all_imgs) 32 | 33 | frames = list(range(3, len(all_imgs) - 3)) 34 | random.shuffle(frames) 35 | self.all_frames_training.append(frames) 36 | 37 | def __len__(self): 38 | return len(self.training_videos) 39 | 40 | def __getitem__(self, idx): 41 | folder = self.training_videos[idx] 42 | start = self.all_frames_training[idx][-1] 43 | i_path = folder[start] 44 | 45 | video_clip = [] 46 | for i in range(start-3, start + 4): 47 | video_clip.append(np_load_frame(folder[i])) 48 | 49 | random_clip = [np_load_frame(folder[start])] 50 | 51 | temp = start 52 | for i in range(3): 53 | f = random.randrange(1, 5) 54 | if temp - (2 - i) - f >= 0: 55 | random_clip.append(np_load_frame(folder[temp - f])) 56 | temp -= f 57 | else: 58 | random_clip.append(np_load_frame(folder[temp - 1])) 59 | temp -= 1 60 | 61 | random_clip.reverse() 62 | 63 | temp = start 64 | for i in range(3): 65 | f = random.randrange(1, 5) 66 | if temp + (2 - i) + f <= len(folder) - 1: 67 | random_clip.append(np_load_frame(folder[temp + f])) 68 | temp += f 69 | else: 70 | random_clip.append(np_load_frame(folder[temp + 1])) 71 | temp += 1 72 | 73 | video_clip = np.array(video_clip) 74 | random_clip = np.array(random_clip) 75 | 76 | return idx, video_clip, random_clip, i_path 77 | 78 | 79 | 80 | class val_dataset(Dataset): 81 | 82 | def __init__(self, video_folder): 83 | self.imgs = glob.glob(video_folder + '/*.jpg') 84 | self.imgs.sort() 85 | self.img_idx = range(3, len(self.imgs)-3) 86 | 87 | def __len__(self): 88 | return len(self.imgs) - 6 89 | 90 | def __getitem__(self, idx): 91 | video_clips = [] 92 | start = self.img_idx[idx] - 3 93 | for i in range(7): 94 | video_clips.append(np_load_frame(self.imgs[start + i])) 95 | 96 | i_path = self.imgs[self.img_idx[idx]] 97 | 98 | video_clips = torch.from_numpy(np.array(video_clips)) 99 | return video_clips, i_path 100 | 101 | 102 | class Label_loader: 103 | def __init__(self, cfg, video_folders): 104 | assert cfg.dataset in ('ped2', 'avenue', 'shanghaitech'), f'Did not find the related gt for \'{cfg.dataset}\'.' 105 | self.cfg = cfg 106 | self.name = cfg.dataset 107 | self.frame_path = cfg.test_data 108 | self.mat_path = f'{cfg.data_root + self.name}/{self.name}.mat' 109 | self.video_folders = video_folders 110 | 111 | def __call__(self): 112 | if self.name == 'shanghaitech': 113 | gt = self.load_shanghaitech() 114 | else: 115 | gt = self.load_ucsd_avenue() 116 | return gt 117 | 118 | def load_ucsd_avenue(self): 119 | abnormal_events = scio.loadmat(self.mat_path, squeeze_me=True)['gt'] 120 | 121 | all_gt = [] 122 | for i in range(abnormal_events.shape[0]): 123 | length = len(os.listdir(self.video_folders[i])) 124 | sub_video_gt = np.zeros((length,), dtype=np.int8) 125 | 126 | one_abnormal = abnormal_events[i] 127 | if one_abnormal.ndim == 1: 128 | one_abnormal = one_abnormal.reshape((one_abnormal.shape[0], -1)) 129 | 130 | for j in range(one_abnormal.shape[1]): 131 | start = one_abnormal[0, j] - 1 132 | end = one_abnormal[1, j] 133 | 134 | sub_video_gt[start: end] = 1 135 | 136 | all_gt.append(sub_video_gt) 137 | 138 | return all_gt 139 | 140 | def load_shanghaitech(self): 141 | np_list = glob.glob(f'{self.cfg.data_root + self.name}/frame_masks/') 142 | np_list.sort() 143 | 144 | gt = [] 145 | for npy in np_list: 146 | gt.append(np.load(npy)) 147 | 148 | return gt -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | from glob import glob 2 | import os 3 | 4 | if not os.path.exists('tensorboard_log'): 5 | os.mkdir('tensorboard_log') 6 | if not os.path.exists('weights'): 7 | os.mkdir('weights') 8 | 9 | share_config = { 10 | 'mode': 'training', 11 | 'dataset': 'avenue', 12 | 'img_size': (64, 64), 13 | 'data_root': 'Data/' 14 | } 15 | 16 | 17 | class dict2class: 18 | def __init__(self, config): 19 | for k, v in config.items(): 20 | self.__setattr__(k, v) 21 | 22 | def print_cfg(self): 23 | print('\n' + '-' * 30 + f'{self.mode} cfg' + '-' * 30) 24 | for k, v in vars(self).items(): 25 | print(f'{k}: {v}') 26 | print() 27 | 28 | 29 | def update_config(args=None, mode=None): 30 | share_config['mode'] = mode 31 | assert args.dataset in ('ped2', 'avenue', 'shanghaitech'), 'Dataset error! Check Dataset argument' 32 | share_config['dataset'] = args.dataset 33 | 34 | if mode == 'train': 35 | share_config['batch_size'] = args.batch_size 36 | share_config['train_data'] = share_config['data_root'] + args.dataset + '/training' 37 | share_config['test_data'] = share_config['data_root'] + args.dataset + '/testing' 38 | share_config['lr'] = 0.001 39 | share_config['level'] = args.level 40 | share_config['width'] = args.width 41 | share_config['depth'] = args.depth 42 | share_config['iters'] = args.iters 43 | share_config['resume'] = glob(f'weights/{args.resume}*')[0] if args.resume else None 44 | share_config['save_interval'] = args.save_interval 45 | share_config['val_interval'] = args.val_interval 46 | 47 | elif mode == 'test': 48 | share_config['test_data'] = share_config['data_root'] + args.dataset + '/testing/' 49 | share_config['trained_model'] = args.trained_model 50 | share_config['level'] = args.level 51 | share_config['width'] = share_config['trained_model'].split('_')[1] 52 | share_config['depth'] = share_config['trained_model'].split('_')[2] 53 | 54 | return dict2class(share_config) -------------------------------------------------------------------------------- /conv_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import cv2 3 | import numpy as np 4 | import torch.nn as nn 5 | 6 | 7 | def weights_init_normal(self): 8 | for m in self.modules(): 9 | if isinstance(m, (nn.Conv2d, nn.Conv3d)): 10 | nn.init.kaiming_uniform_(m.weight) 11 | if m.bias is not None: 12 | nn.init.constant_(m.bias, 0) 13 | elif isinstance(m, (nn.BatchNorm2d, nn.BatchNorm3d)): 14 | nn.init.constant_(m.weight, 1) 15 | nn.init.constant_(m.bias, 0) 16 | 17 | elif isinstance(m, nn.Linear): 18 | nn.init.kaiming_uniform_(m.weight) 19 | nn.init.constant_(m.bias, 0) 20 | 21 | 22 | def img_crop(clips, pred): 23 | crop_img = [] 24 | for p in pred[0]: 25 | crop_flow_img = [] 26 | for c in clips: 27 | c = c.numpy() 28 | p = p.int() 29 | crop_flow = cv2.resize(c[p[1]:p[3], p[0]:p[2]], (64, 64)).astype('float32') 30 | crop_flow /= 255 31 | crop_flow_img.append(crop_flow) 32 | crop_img.append(crop_flow_img) 33 | return crop_img 34 | 35 | 36 | def res_prob(model, preprocess, input_img): 37 | cls_prob = [] 38 | for img in input_img: 39 | img_pre = preprocess(img).unsqueeze(0) 40 | pred = model(img_pre) 41 | cls_prob.append(pred) 42 | cls_prob = torch.cat(cls_prob, 0) 43 | return cls_prob -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import torch 4 | from pathlib import Path 5 | import sys 6 | from sklearn import metrics 7 | 8 | import Dataset 9 | from Dataset import Label_loader 10 | from conv_utils import * 11 | from config import update_config 12 | from model.conv3D import conv3D 13 | from model.layer import aothead, mihead, mbphead, mdhead 14 | from loss import * 15 | from yolov3.detect_loc import run 16 | 17 | 18 | FILE = Path(__file__).resolve() 19 | ROOT = FILE.parents[0] 20 | if str(ROOT) not in sys.path: 21 | sys.path.append(str(ROOT)) # add ROOT to PATH 22 | ROOT = Path(os.path.relpath(ROOT, Path.cwd())) 23 | 24 | parser = argparse.ArgumentParser(description='Anomaly Prediction') 25 | parser.add_argument('--dataset', default='avenue', type=str, help='The name of the dataset to train.') 26 | parser.add_argument('--trained_model', default=None, type=str, help='The pre-trained model to evaluate.') 27 | parser.add_argument('--level', default='object', type=str, help='Determine level of the Input') 28 | 29 | 30 | def inference(cfg): 31 | shared_conv = conv3D(cfg.width).cuda().eval() 32 | aot_head = aothead(cfg.width).cuda().eval() 33 | mi_head = mihead(cfg.width).cuda().eval() 34 | mbp_head = mbphead(cfg.width).cuda().eval() 35 | md_head = mdhead(cfg.width).cuda().eval() 36 | 37 | shared_conv.load_state_dict(torch.load('weights/' + cfg.trained_model)['shared']) 38 | aot_head.load_state_dict(torch.load('weights/' + cfg.trained_model)['aot']) 39 | mi_head.load_state_dict(torch.load('weights/' + cfg.trained_model)['mi']) 40 | mbp_head.load_state_dict(torch.load('weights/' + cfg.trained_model)['mbp']) 41 | md_head.load_state_dict(torch.load('weights/' + cfg.trained_model)['md']) 42 | 43 | mbp_loss = mbploss().cuda() 44 | val_mdloss = valmdloss().cuda() 45 | 46 | if cfg.level == 'object': 47 | conf_thres = 0.5 if cfg.dataset == 'ped2' else 0.8 48 | 49 | video_folders = os.listdir(cfg.test_data) 50 | video_folders.sort() 51 | video_folders = [os.path.join(cfg.test_data, aa) for aa in video_folders] 52 | 53 | anomaly_score = [] 54 | 55 | with torch.no_grad(): 56 | for i, folder in enumerate(video_folders): 57 | dataset = Dataset.val_dataset(folder) 58 | 59 | score = [] 60 | 61 | for j, (clips, i_path) in enumerate(dataset): 62 | 63 | pred, val_yolo_cls_prob = run(weights=ROOT / 'yolov3/yolov3.pt', source=i_path, imgsz=clips.shape[1:3], conf_thres=conf_thres) 64 | if pred == -1: 65 | print(pred) 66 | continue 67 | 68 | val_input_crop = img_crop(clips, pred[0]) 69 | val_input_crop = torch.from_numpy(np.array(val_input_crop)) 70 | 71 | val_aot_input = val_input_crop.clone().detach() 72 | val_aot_shape = val_aot_input.shape 73 | val_aot_input = val_aot_input.reshape(val_aot_shape[0], -1, val_aot_shape[1], val_aot_shape[2], val_aot_shape[3]).cuda() 74 | 75 | val_mi_input = val_input_crop.clone().detach() 76 | val_mi_shape = val_mi_input.shape 77 | val_mi_input = val_mi_input.reshape(val_mi_shape[0], -1, val_mi_shape[1], val_mi_shape[2], val_mi_shape[3]).cuda() 78 | 79 | val_mbp_input = torch.cat([val_input_crop[:, :3, :].clone().detach(), val_input_crop[:, 4:, :].clone().detach()], 1) 80 | val_mbp_shape = val_mbp_input.shape 81 | val_mbp_input = val_mbp_input.reshape(val_mbp_shape[0], -1, val_mbp_shape[1], val_mbp_shape[2], val_mbp_shape[3]).cuda() 82 | val_mbp_target = val_input_crop[:, 3, :].clone().detach() 83 | val_mbp_target_shape = val_mbp_target.shape 84 | val_mbp_target = val_mbp_target.reshape(val_mbp_target_shape[0], -1, val_mbp_target_shape[1], val_mbp_target_shape[2]).cuda() 85 | 86 | val_md_input = val_input_crop[:, 3, :].clone().detach().unsqueeze(dim=1) 87 | val_md_shape = val_md_input.shape 88 | val_md_input = val_md_input.reshape(val_md_shape[0], -1, val_md_shape[1], val_md_shape[2], val_md_shape[3]).cuda() 89 | 90 | val_aot_shared = shared_conv(val_aot_input, cfg.depth) 91 | val_mi_shared = shared_conv(val_mi_input, cfg.depth) 92 | val_mbp_shared = shared_conv(val_mbp_input, cfg.depth) 93 | val_md_shared = shared_conv(val_md_input, cfg.depth) 94 | 95 | val_aot_shared = val_aot_shared.squeeze(dim=2) 96 | val_mi_shared = val_mi_shared.squeeze(dim=2) 97 | val_mbp_shared = val_mbp_shared.squeeze(dim=2) 98 | val_md_shared = val_md_shared.squeeze(dim=2) 99 | 100 | val_aot_output = aot_head(val_aot_shared, cfg.depth) 101 | val_mi_output = mi_head(val_mi_shared, cfg.depth) 102 | val_mbp_output = mbp_head(val_mbp_shared, cfg.depth) 103 | val_md_output_res, val_md_output_yolo = md_head(val_md_shared, cfg.depth) 104 | 105 | aot_score = torch.sum(val_aot_output[:, 1]) 106 | mi_score = torch.sum(val_mi_output[:, 1]) 107 | mbp_score = mbp_loss(val_mbp_output, val_mbp_target) 108 | md_score = val_mdloss(val_md_output_yolo, val_yolo_cls_prob) 109 | 110 | total_score = ((aot_score + mi_score + mbp_score + md_score)/4).cpu().detach().numpy() 111 | 112 | score.append(float(total_score)) 113 | 114 | anomaly_score.append(np.array(score)) 115 | 116 | print('\nAll frames were detected, begin to compute AUC.') 117 | 118 | gt_loader = Label_loader(cfg, video_folders) # Get gt labels. 119 | gt = gt_loader() 120 | 121 | assert len(anomaly_score) == len(gt), f'Ground truth has {len(gt)} videos, but got {len(anomaly_score)} detected videos.' 122 | 123 | scores = np.array([], dtype=np.float32) 124 | labels = np.array([], dtype=np.int8) 125 | for i in range(len(anomaly_score)): 126 | scores = np.concatenate((scores, anomaly_score[i]), axis=0) 127 | labels = np.concatenate((labels, gt[i][3:-3]), axis=0) 128 | 129 | assert scores.shape == labels.shape, f'Ground truth has {labels.shape[0]} frames, but got {scores.shape[0]} detected frames.' 130 | 131 | fpr, tpr, thresholds = metrics.roc_curve(labels, scores, pos_label=0) 132 | auc = metrics.auc(fpr, tpr) 133 | print(f'AUC: {auc}\n') 134 | return auc 135 | 136 | 137 | if __name__ == '__main__': 138 | args = parser.parse_args() 139 | test_cfg = update_config(args, mode='test') 140 | test_cfg.print_cfg() 141 | inference(test_cfg) -------------------------------------------------------------------------------- /loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional 4 | import numpy as np 5 | 6 | 7 | class aotloss(nn.Module): 8 | def __init__(self): 9 | super().__init__() 10 | 11 | def forward(self, aot_output, target): 12 | loss = nn.CrossEntropyLoss() 13 | return loss(aot_output, target) 14 | 15 | 16 | class miloss(nn.Module): 17 | def __init__(self): 18 | super().__init__() 19 | 20 | def forward(self, mi_output, target): 21 | loss = nn.CrossEntropyLoss() 22 | return loss(mi_output, target) 23 | 24 | 25 | class mbploss(nn.Module): 26 | def __init__(self): 27 | super().__init__() 28 | 29 | def forward(self, mbp_output, target): 30 | loss = nn.L1Loss(reduction='mean') 31 | return loss(mbp_output, target) 32 | 33 | 34 | class mdloss(nn.Module): 35 | def __init__(self): 36 | super().__init__() 37 | 38 | def forward(self, md_res, md_yolo, target_res, target_yolo): 39 | softmax = nn.Softmax(dim=1) 40 | loss = nn.L1Loss(reduction='mean') 41 | md_yolo = softmax(md_yolo) 42 | md_res = softmax(md_res) 43 | target_yolo = softmax(target_yolo) 44 | target_res = softmax(target_res) 45 | 46 | md_merged_output = torch.cat((md_yolo, md_res), 1) 47 | target_merged = torch.cat((target_yolo, target_res), 1) 48 | return loss(md_merged_output, target_merged) 49 | 50 | 51 | class valmdloss(nn.Module): 52 | def __init__(self): 53 | super().__init__() 54 | 55 | def forward(self, md_yolo, target_yolo): 56 | softmax = nn.Softmax(dim=1) 57 | loss = nn.L1Loss(reduction='mean') 58 | md_yolo = softmax(md_yolo) 59 | target_yolo = softmax(target_yolo) 60 | return loss(md_yolo, target_yolo) -------------------------------------------------------------------------------- /model/__pycache__/conv3D.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whduddhks/Anomaly-Detection-in-Video-via-Self-Supervised-and-Multi-Task-Learning/c9772f3226d59988583ec42b9e910cc17e40c61c/model/__pycache__/conv3D.cpython-310.pyc -------------------------------------------------------------------------------- /model/__pycache__/layer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whduddhks/Anomaly-Detection-in-Video-via-Self-Supervised-and-Multi-Task-Learning/c9772f3226d59988583ec42b9e910cc17e40c61c/model/__pycache__/layer.cpython-310.pyc -------------------------------------------------------------------------------- /model/conv3D.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class conv3D(nn.Module): 6 | def __init__(self, width): 7 | super().__init__() 8 | if width == 'narrow': 9 | channel = [16, 32] 10 | else: 11 | channel = [32, 64] 12 | self.conv3D1 = nn.Sequential( 13 | nn.Conv3d(3, channel[0], kernel_size=(3, 3, 3), padding='same', stride=1), 14 | nn.BatchNorm3d(channel[0]), 15 | nn.ReLU(inplace=True) 16 | ) 17 | 18 | self.conv3D2 = nn.Sequential( 19 | nn.Conv3d(channel[0], channel[1], kernel_size=(3, 3, 3), padding='same', stride=1), 20 | nn.BatchNorm3d(channel[1]), 21 | nn.ReLU(inplace=True) 22 | ) 23 | 24 | self.conv3D3 = nn.Sequential( 25 | nn.Conv3d(channel[1], channel[1], kernel_size=(3, 3, 3), padding='same', stride=1), 26 | nn.BatchNorm3d(channel[1]), 27 | nn.ReLU(inplace=True) 28 | ) 29 | 30 | self.conv3D1_1 = nn.Sequential( 31 | nn.Conv3d(channel[0], channel[0], kernel_size=(3, 3, 3), padding='same', stride=1), 32 | nn.BatchNorm3d(channel[0]), 33 | nn.ReLU(inplace=True) 34 | ) 35 | self.conv3D2_1 = nn.Sequential( 36 | nn.Conv3d(channel[1], channel[1], kernel_size=(3, 3, 3), padding='same', stride=1), 37 | nn.BatchNorm3d(channel[1]), 38 | nn.ReLU(inplace=True) 39 | ) 40 | self.conv3D3_1 = nn.Sequential( 41 | nn.Conv3d(channel[1], channel[1], kernel_size=(3, 3, 3), padding='same', stride=1), 42 | nn.BatchNorm3d(channel[1]), 43 | nn.ReLU(inplace=True) 44 | ) 45 | 46 | self.maxpool = nn.MaxPool3d((1, 2, 2), stride=(1, 2, 2)) 47 | 48 | def forward(self, x, depth): 49 | out = self.conv3D1(x) 50 | if depth == 'deep': 51 | out = self.conv3D1_1(out) 52 | out = self.maxpool(out) 53 | 54 | out = self.conv3D2(out) 55 | if depth == 'deep': 56 | out = self.conv3D2_1(out) 57 | out = self.maxpool(out) 58 | 59 | out = self.conv3D3(out) 60 | if depth == 'deep': 61 | out = self.maxpool(out) 62 | out = self.conv3D3_1(out) 63 | 64 | d = out.shape[2] 65 | out = F.max_pool3d(out, (d, 2, 2), stride=(1, 2, 2)) 66 | 67 | return out 68 | 69 | 70 | if __name__ == "__main__": 71 | rand = torch.ones([8, 3, 4, 64, 64]) 72 | t = conv3D('narrow') 73 | print(t) 74 | 75 | r = t(rand, 'shallow') 76 | print(r.shape) -------------------------------------------------------------------------------- /model/layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class aothead(nn.Module): 6 | def __init__(self, width): 7 | super().__init__() 8 | in_channel = 32 if width == 'narrow' else 64 9 | self.aot = nn.Sequential( 10 | nn.Conv2d(in_channel, 32, kernel_size=(3, 3), padding='same', stride=1), 11 | nn.MaxPool2d(2), 12 | ) 13 | self.fc_s = nn.Linear(32*4*4, 2) 14 | self.fc_d = nn.Linear(32*2*2, 2) 15 | 16 | def forward(self, x, depth): 17 | out = self.aot(x) 18 | out = torch.flatten(out, 1) 19 | return self.fc_s(out) if depth == 'shallow' else self.fc_d(out) 20 | 21 | 22 | class mihead(nn.Module): 23 | def __init__(self, width): 24 | super().__init__() 25 | in_channel = 32 if width == 'narrow' else 64 26 | self.mi = nn.Sequential( 27 | nn.Conv2d(in_channel, 32, kernel_size=(3, 3), padding='same', stride=1), 28 | nn.MaxPool2d(2), 29 | ) 30 | self.fc_s = nn.Linear(32*4*4, 2) 31 | self.fc_d = nn.Linear(32*2*2, 2) 32 | 33 | def forward(self, x, depth): 34 | out = self.mi(x) 35 | out = torch.flatten(out, 1) 36 | return self.fc_s(out) if depth == 'shallow' else self.fc_d(out) 37 | 38 | 39 | class mbphead(nn.Module): 40 | def __init__(self, width): 41 | super().__init__() 42 | if width == 'narrow': 43 | channel = [32, 16] 44 | else: 45 | channel = [64, 32] 46 | 47 | self.conv2D1 = nn.Sequential( 48 | nn.Upsample(scale_factor=2, mode='nearest'), 49 | nn.Conv2d(channel[0], channel[0], kernel_size=(3, 3), padding='same', stride=1), 50 | nn.BatchNorm2d(channel[0]), 51 | nn.ReLU(inplace=True) 52 | ) 53 | self.conv2D2 = nn.Sequential( 54 | nn.Upsample(scale_factor=2, mode='nearest'), 55 | nn.Conv2d(channel[0], channel[1], kernel_size=(3, 3), padding='same', stride=1), 56 | nn.BatchNorm2d(channel[1]), 57 | nn.ReLU(inplace=True) 58 | ) 59 | self.conv2D3 = nn.Sequential( 60 | nn.Upsample(scale_factor=2, mode='nearest'), 61 | nn.Conv2d(channel[1], 3, kernel_size=(3, 3), padding='same', stride=1), 62 | ) 63 | self.conv2D1_1 = nn.Sequential( 64 | nn.Conv2d(channel[0], channel[0], kernel_size=(3, 3), padding='same', stride=1), 65 | nn.BatchNorm2d(channel[0]), 66 | nn.ReLU(inplace=True) 67 | ) 68 | self.conv2D2_1 = nn.Sequential( 69 | nn.Conv2d(channel[1], channel[1], kernel_size=(3, 3), padding='same', stride=1), 70 | nn.BatchNorm2d(channel[1]), 71 | nn.ReLU(inplace=True) 72 | ) 73 | self.conv2D3_0 = nn.Sequential( 74 | nn.Upsample(scale_factor=2, mode='nearest'), 75 | nn.Conv2d(channel[1], channel[1], kernel_size=(3, 3), padding='same', stride=1), 76 | nn.BatchNorm2d(channel[1]), 77 | nn.ReLU(inplace=True) 78 | ) 79 | 80 | def forward(self, x, depth): 81 | out = self.conv2D1(x) 82 | if depth == 'deep': 83 | out = self.conv2D1_1(out) 84 | 85 | out = self.conv2D2(out) 86 | if depth == 'deep': 87 | out = self.conv2D2_1(out) 88 | 89 | if depth == 'deep': 90 | out = self.conv2D3_0(out) 91 | out = self.conv2D3(out) 92 | 93 | return out 94 | 95 | 96 | class mdhead(nn.Module): 97 | def __init__(self, width): 98 | super().__init__() 99 | in_channel = 32 if width == 'narrow' else 64 100 | self.md = nn.Sequential( 101 | nn.Conv2d(in_channel, 32, kernel_size=(3, 3), padding='same', stride=1), 102 | nn.MaxPool2d(2), 103 | ) 104 | 105 | # ResNet output 106 | self.fc_s_r = nn.Linear(32*4*4, 1000) 107 | self.fc_d_r = nn.Linear(32*2*2, 1000) 108 | 109 | # yolo output 110 | self.fc_s_y = nn.Linear(32*4*4, 80) 111 | self.fc_d_y = nn.Linear(32*2*2, 80) 112 | 113 | def forward(self, x, depth): 114 | out = self.md(x) 115 | out = torch.flatten(out, 1) 116 | out_r = self.fc_s_r(out) if depth == 'shallow' else self.fc_d_r(out) 117 | out_y = self.fc_s_y(out) if depth == 'shallow' else self.fc_d_y(out) 118 | return out_r, out_y 119 | 120 | 121 | if __name__ == "__main__": 122 | rand = torch.ones([8, 64, 4, 4]) 123 | a = aothead(64) 124 | m = mihead(64) 125 | p = mbphead('wide') 126 | t = mdhead(64) 127 | 128 | r_1 = a(rand, 'deep') 129 | r_2 = m(rand, 'deep') 130 | r_3 = p(rand, 'deep') 131 | r_4 = t(rand, 'deep') 132 | print(r_1.shape) 133 | print(r_2.shape) 134 | print(r_3.shape) 135 | print(r_4[0].shape, r_4[1].shape) -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import Dataset 3 | import torch 4 | import os 5 | import sys 6 | import random 7 | import time 8 | import datetime 9 | import glob 10 | 11 | from loss import * 12 | from conv_utils import * 13 | 14 | from pathlib import Path 15 | from config import update_config 16 | from tensorboardX import SummaryWriter 17 | from torch.utils.data import DataLoader 18 | import torch.nn as nn 19 | 20 | from model.conv3D import conv3D 21 | from model.layer import aothead, mihead, mbphead, mdhead 22 | from yolov3.detect_loc import run 23 | from torchvision.models import resnet50, ResNet50_Weights 24 | 25 | 26 | FILE = Path(__file__).resolve() 27 | ROOT = FILE.parents[0] 28 | if str(ROOT) not in sys.path: 29 | sys.path.append(str(ROOT)) # add ROOT to PATH 30 | ROOT = Path(os.path.relpath(ROOT, Path.cwd())) 31 | 32 | 33 | parser = argparse.ArgumentParser(description='baseline') 34 | parser.add_argument('--batch_size', default=8, type=int) 35 | parser.add_argument('--dataset', default='avenue', type=str, help='The name of the dataset to train.') 36 | parser.add_argument('--level', default='object', type=str, help='Determine level of the Input') 37 | parser.add_argument('--width', default='narrow', type=str, help='Model width [narrow, wide]') 38 | parser.add_argument('--depth', default='shallow', type=str, help='Model depth [shallow, deep]') 39 | parser.add_argument('--iters', default=40000, type=int, help='The total iteration number.') 40 | parser.add_argument('--resume', default=None, type=str, 41 | help='The pre-trained model to resume training with, pass \'latest\' or the model name.') 42 | parser.add_argument('--save_interval', default=1000, type=int, help='Save the model every [save_interval] iterations.') 43 | parser.add_argument('--val_interval', default=1000, type=int, 44 | help='Evaluate the model every [val_interval] iterations, pass -1 to disable.') 45 | 46 | args = parser.parse_args() 47 | train_cfg = update_config(args, mode='train') 48 | train_cfg.print_cfg() 49 | 50 | # model 불러오기 또는 초기화 51 | shared_conv = conv3D(train_cfg.width).cuda() 52 | aot_head = aothead(train_cfg.width).cuda() 53 | mi_head = mihead(train_cfg.width).cuda() 54 | mbp_head = mbphead(train_cfg.width).cuda() 55 | md_head = mdhead(train_cfg.width).cuda() 56 | 57 | optimizer_shared = torch.optim.Adam(shared_conv.parameters(), lr=train_cfg.lr) 58 | optimizer_aot = torch.optim.Adam(aot_head.parameters(), lr=train_cfg.lr) 59 | optimizer_mi = torch.optim.Adam(mi_head.parameters(), lr=train_cfg.lr) 60 | optimizer_mbp = torch.optim.Adam(mbp_head.parameters(), lr=train_cfg.lr) 61 | optimizer_md = torch.optim.Adam(md_head.parameters(), lr=train_cfg.lr) 62 | 63 | if train_cfg.resume: 64 | shared_conv.load_state_dict(torch.load(train_cfg.resume)['shared']) 65 | aot_head.load_state_dict(torch.load(train_cfg.resume)['aot']) 66 | mi_head.load_state_dict(torch.load(train_cfg.resume)['mi']) 67 | mbp_head.load_state_dict(torch.load(train_cfg.resume)['mbp']) 68 | md_head.load_state_dict(torch.load(train_cfg.resume)['md']) 69 | 70 | optimizer_shared.load_state_dict(torch.load(train_cfg.resume)['opt_shared']) 71 | optimizer_aot.load_state_dict(torch.load(train_cfg.resume)['opt_aot']) 72 | optimizer_mi.load_state_dict(torch.load(train_cfg.resume)['opt_mi']) 73 | optimizer_mbp.load_state_dict(torch.load(train_cfg.resume)['opt_mbp']) 74 | optimizer_md.load_state_dict(torch.load(train_cfg.resume)['opt_md']) 75 | else: 76 | shared_conv.apply(weights_init_normal) 77 | aot_head.apply(weights_init_normal) 78 | mi_head.apply(weights_init_normal) 79 | mbp_head.apply(weights_init_normal) 80 | md_head.apply(weights_init_normal) 81 | 82 | # loss 83 | aot_loss = aotloss().cuda() 84 | mi_loss = miloss().cuda() 85 | mbp_loss = mbploss().cuda() 86 | md_loss =mdloss().cuda() 87 | val_mdloss = valmdloss().cuda() 88 | 89 | # Yolo v3 model 불러오기 90 | if train_cfg.level == 'object': 91 | conf_thres = 0.5 if train_cfg.dataset == 'ped2' else 0.8 92 | md_lambda = 0.5 if train_cfg.dataset == 'ped2' else 0.2 93 | 94 | weights_res = ResNet50_Weights.DEFAULT 95 | res_model = resnet50(weights=weights_res).cuda() 96 | res_model.eval() 97 | 98 | preprocess_res = weights_res.transforms().cuda() 99 | 100 | 101 | # Dataloader 정의 102 | train_dataset = Dataset.train_dataset(train_cfg) 103 | train_dataloader = DataLoader(dataset=train_dataset, batch_size=train_cfg.batch_size, 104 | shuffle=True, num_workers=4) 105 | 106 | val_dataset = sorted(glob.glob(f'{train_cfg.train_data}/*')) 107 | val_dataset_len = len(val_dataset) 108 | val_dataset = val_dataset[int(val_dataset_len*0.85+1):] 109 | 110 | writer = SummaryWriter(f'tensorboard_log/{train_cfg.dataset}_{train_cfg.width}_{train_cfg.depth}') 111 | start_iter = int(train_cfg.resume.split('_')[-1].split('.')[0]) if train_cfg.resume else 0 112 | training = True 113 | 114 | shared_conv = shared_conv.train() 115 | aot_head = aot_head.train() 116 | mi_head = mi_head.train() 117 | mbp_head = mbp_head.train() 118 | md_head = md_head.train() 119 | 120 | aot_save = (-1, -1) 121 | mi_save = (-1, -1) 122 | mbp_save = (-1, -1) 123 | md_save = (-1, -1) 124 | 125 | try: 126 | step = start_iter 127 | while training: 128 | for indice, video_clips, random_clips, path_list in train_dataloader: 129 | 130 | for index in indice: 131 | train_dataset.all_frames_training[index].pop() 132 | if len(train_dataset.all_frames_training[index]) == 0: 133 | train_dataset.all_frames_training[index] = list(range(3, len(train_dataset.training_videos[index]) - 3)) 134 | random.shuffle(train_dataset.all_frames_training[index]) 135 | 136 | pred, yolo_cls_prob = run(weights=ROOT / 'yolov3/yolov3.pt', source=path_list[0], imgsz=video_clips.shape[2:4], conf_thres=conf_thres) 137 | if pred == -1: 138 | continue 139 | 140 | video_input_crop = img_crop(video_clips[0], pred[0]) 141 | random_input_crop = img_crop(random_clips[0], pred[0]) 142 | 143 | video_input_crop = torch.from_numpy(np.array(video_input_crop)) 144 | random_input_crop = torch.from_numpy(np.array(random_input_crop)) 145 | 146 | aot_input = torch.cat([video_input_crop.clone().detach().requires_grad_(True), torch.flip(video_input_crop.clone().detach().requires_grad_(True), [0, 1])], 0) 147 | aot_shape = aot_input.shape 148 | aot_input = aot_input.reshape(aot_shape[0], -1, aot_shape[1], aot_shape[2], aot_shape[3]).cuda() 149 | aot_target = torch.cat([torch.zeros([video_input_crop.shape[0]]), torch.ones([video_input_crop.shape[0]])], 0).long().cuda() 150 | 151 | mi_input = torch.cat([video_input_crop.clone().detach().requires_grad_(True), random_input_crop.clone().detach().requires_grad_(True)], 0) 152 | mi_shape = mi_input.shape 153 | mi_input = mi_input.reshape(mi_shape[0], -1, mi_shape[1], mi_shape[2], mi_shape[3]).cuda() 154 | mi_target = torch.cat([torch.zeros([video_input_crop.shape[0]]), torch.ones([video_input_crop.shape[0]])], 0).long().cuda() 155 | 156 | mbp_input = torch.cat([video_input_crop[:, :3, :].clone().detach().requires_grad_(True), video_input_crop[:, 4:, :].clone().detach().requires_grad_(True)], 1) 157 | mbp_shape = mbp_input.shape 158 | mbp_input = mbp_input.reshape(mbp_shape[0], -1, mbp_shape[1], mbp_shape[2], mbp_shape[3]).cuda() 159 | mbp_target = video_input_crop[:, 3, :].clone().detach().requires_grad_(True) 160 | mbp_target_shape = mbp_target.shape 161 | mbp_target = mbp_target.reshape(mbp_target_shape[0], -1, mbp_target_shape[1], mbp_target_shape[2]).cuda() 162 | 163 | md_input = video_input_crop[:, 3, :].clone().detach().requires_grad_(True).unsqueeze(dim=1) 164 | md_shape = md_input.shape 165 | md_input = md_input.reshape(md_shape[0], -1, md_shape[1], md_shape[2], md_shape[3]).cuda() 166 | md_res_input = video_input_crop[:, 3, :].clone().detach().requires_grad_(True).cuda() 167 | md_res_input = md_input.reshape(md_shape[0], -1, md_shape[2], md_shape[3]) 168 | res_cls_prob = res_prob(res_model, preprocess_res, md_res_input) 169 | 170 | aot_shared = shared_conv(aot_input, train_cfg.depth) 171 | mi_shared = shared_conv(mi_input, train_cfg.depth) 172 | mbp_shared = shared_conv(mbp_input, train_cfg.depth) 173 | md_shared = shared_conv(md_input, train_cfg.depth) 174 | 175 | aot_shared = aot_shared.squeeze(dim=2) 176 | mi_shared = mi_shared.squeeze(dim=2) 177 | mbp_shared = mbp_shared.squeeze(dim=2) 178 | md_shared = md_shared.squeeze(dim=2) 179 | 180 | aot_output = aot_head(aot_shared, train_cfg.depth) 181 | mi_output = mi_head(mi_shared, train_cfg.depth) 182 | mbp_output = mbp_head(mbp_shared, train_cfg.depth) 183 | md_output_res, md_output_yolo = md_head(md_shared, train_cfg.depth) 184 | 185 | aot_l = aot_loss(aot_output, aot_target) 186 | mi_l = mi_loss(mi_output, mi_target) 187 | mbp_l = mbp_loss(mbp_output, mbp_target) 188 | md_l = md_loss(md_output_res, md_output_yolo, res_cls_prob, yolo_cls_prob) 189 | 190 | total_loss = aot_l + mi_l + mbp_l + md_lambda*md_l 191 | 192 | optimizer_aot.zero_grad() 193 | optimizer_mi.zero_grad() 194 | optimizer_mbp.zero_grad() 195 | optimizer_mi.zero_grad() 196 | optimizer_shared.zero_grad() 197 | 198 | total_loss.backward() 199 | 200 | optimizer_aot.step() 201 | optimizer_mi.step() 202 | optimizer_mbp.step() 203 | optimizer_mi.step() 204 | optimizer_shared.step() 205 | 206 | 207 | torch.cuda.synchronize() 208 | time_end = time.time() 209 | if step > start_iter: # This doesn't include the testing time during training. 210 | iter_t = time_end - temp 211 | temp = time_end 212 | 213 | if step != start_iter: 214 | if step % 20 == 0: 215 | time_remain = (train_cfg.iters - step) * iter_t 216 | eta = str(datetime.timedelta(seconds=time_remain)).split('.')[0] 217 | 218 | lr_shared = optimizer_shared.param_groups[0]['lr'] 219 | lr_aot = optimizer_aot.param_groups[0]['lr'] 220 | lr_mi = optimizer_mi.param_groups[0]['lr'] 221 | lr_mbp = optimizer_mbp.param_groups[0]['lr'] 222 | lr_md = optimizer_md.param_groups[0]['lr'] 223 | 224 | print(f"{step} | aot_l: {aot_l:.3f} | mi_l: {mi_l:.3f} | mbp_l: {mbp_l:.3f} | md_l: {md_l:.3f} | total: {total_loss:.3f} |" 225 | f"iter: {iter_t:.3f}s | ETA: {eta} | lr_shared: {lr_shared} | lr_aot: {lr_aot} | lr_mi: {lr_mi} | lr_mbp: {lr_mbp} | lr_md: {lr_md}") 226 | 227 | if step % train_cfg.save_interval == 0: 228 | model_dict = { 'shared': shared_conv.state_dict(), 229 | 'aot': aot_head.state_dict(), 230 | 'mi': mi_head.state_dict(), 231 | 'mbp': mbp_head.state_dict(), 232 | 'md': md_head.state_dict(), 233 | 'opt_shared': optimizer_shared.state_dict(), 234 | 'opt_aot': optimizer_aot.state_dict(), 235 | 'opt_mi': optimizer_mi.state_dict(), 236 | 'opt_mbp': optimizer_mbp.state_dict(), 237 | 'opt_md': optimizer_md.state_dict()} 238 | torch.save(model_dict, f'weights/{train_cfg.dataset}_{train_cfg.width}_{train_cfg.depth}_{step}.pth') 239 | print(f'\nAlready saved: \'{train_cfg.dataset}_{train_cfg.width}_{train_cfg.depth}_{step}.pth\'.') 240 | 241 | if step % train_cfg.val_interval == 0: 242 | shared_conv.eval() 243 | aot_head.eval() 244 | mi_head.eval() 245 | mbp_head.eval() 246 | md_head.eval() 247 | 248 | fps = 0 249 | 250 | aot_score = 0 251 | mi_score = 0 252 | mbp_score = 0 253 | md_score = 0 254 | 255 | with torch.no_grad(): 256 | for i, folder in enumerate(val_dataset): 257 | val_data = Dataset.val_dataset(folder) 258 | 259 | for j, (clip, i_path) in enumerate(val_data): 260 | 261 | pred, val_yolo_cls_prob = run(weights=ROOT / 'yolov3/yolov3.pt', source=i_path, imgsz=clip.shape[1:3], conf_thres=conf_thres) 262 | if pred == -1: 263 | continue 264 | 265 | val_input_crop = img_crop(clip, pred[0]) 266 | val_input_crop = torch.from_numpy(np.array(val_input_crop)) 267 | 268 | val_aot_input = val_input_crop.clone().detach() 269 | val_aot_shape = val_aot_input.shape 270 | val_aot_input = val_aot_input.reshape(val_aot_shape[0], -1, val_aot_shape[1], val_aot_shape[2], val_aot_shape[3]).cuda() 271 | 272 | val_mi_input = val_input_crop.clone().detach() 273 | val_mi_shape = val_mi_input.shape 274 | val_mi_input = val_mi_input.reshape(val_mi_shape[0], -1, val_mi_shape[1], val_mi_shape[2], val_mi_shape[3]).cuda() 275 | 276 | val_mbp_input = torch.cat([val_input_crop[:, :3, :].clone().detach(), val_input_crop[:, 4:, :].clone().detach()], 1) 277 | val_mbp_shape = val_mbp_input.shape 278 | val_mbp_input = val_mbp_input.reshape(val_mbp_shape[0], -1, val_mbp_shape[1], val_mbp_shape[2], val_mbp_shape[3]).cuda() 279 | val_mbp_target = val_input_crop[:, 3, :].clone().detach() 280 | val_mbp_target_shape = val_mbp_target.shape 281 | val_mbp_target = val_mbp_target.reshape(val_mbp_target_shape[0], -1, val_mbp_target_shape[1], val_mbp_target_shape[2]).cuda() 282 | 283 | val_md_input = val_input_crop[:, 3, :].clone().detach().unsqueeze(dim=1) 284 | val_md_shape = val_md_input.shape 285 | val_md_input = val_md_input.reshape(val_md_shape[0], -1, val_md_shape[1], val_md_shape[2], val_md_shape[3]).cuda() 286 | 287 | val_aot_shared = shared_conv(val_aot_input, train_cfg.depth) 288 | val_mi_shared = shared_conv(val_mi_input, train_cfg.depth) 289 | val_mbp_shared = shared_conv(val_mbp_input, train_cfg.depth) 290 | val_md_shared = shared_conv(val_md_input, train_cfg.depth) 291 | 292 | val_aot_shared = val_aot_shared.squeeze(dim=2) 293 | val_mi_shared = val_mi_shared.squeeze(dim=2) 294 | val_mbp_shared = val_mbp_shared.squeeze(dim=2) 295 | val_md_shared = val_md_shared.squeeze(dim=2) 296 | 297 | softmax_loss = nn.Softmax(dim=1) 298 | val_aot_output = aot_head(val_aot_shared, train_cfg.depth) 299 | val_aot_output = softmax_loss(val_aot_output) 300 | val_mi_output = mi_head(val_mi_shared, train_cfg.depth) 301 | val_mi_output = softmax_loss(val_mi_output) 302 | val_mbp_output = mbp_head(val_mbp_shared, train_cfg.depth) 303 | val_md_output_res, val_md_output_yolo = md_head(val_md_shared, train_cfg.depth) 304 | 305 | aot_score += torch.sum(val_aot_output[:, 1]) 306 | mi_score += torch.sum(val_mi_output[:, 1]) 307 | mbp_score += mbp_loss(val_mbp_output, val_mbp_target) 308 | md_score += val_mdloss(val_md_output_yolo, val_yolo_cls_prob) 309 | 310 | print(f"val | aot score: {aot_score:.3f} | mi score: {mi_score:.3f} | mbp score: {mbp_score:.3f} | md score: {md_score:.3f}") 311 | 312 | if aot_save[0] == -1: 313 | aot_save = (aot_score, step) 314 | print(f'Save aot model | score: {aot_score}') 315 | else: 316 | if aot_save[0] < aot_score: 317 | aot_head.load_state_dict(torch.load(f'weights/{train_cfg.dataset}_{train_cfg.width}_{train_cfg.depth}_{aot_save[1]}.pth')['aot']) 318 | print(f'Load aot model from {aot_save[1]} step | save: {aot_save} | score: {aot_score}') 319 | else: 320 | print(f'Save aot model | save: {aot_save[0]} | score: {aot_score}') 321 | aot_save = (aot_score, step) 322 | 323 | if mi_save[0] == -1: 324 | mi_save = (mi_score, step) 325 | print(f'Save mi model | score: {mi_score}') 326 | else: 327 | if mi_save[0] < mi_score: 328 | mi_head.load_state_dict(torch.load(f'weights/{train_cfg.dataset}_{train_cfg.width}_{train_cfg.depth}_{mi_save[1]}.pth')['mi']) 329 | print(f'Load mi model from {mi_save[1]} step | save: {mi_save} | score: {mi_score}') 330 | else: 331 | print(f'Save mi model | save: {mi_save[0]} | score: {mi_score}') 332 | mi_save = (mi_score, step) 333 | 334 | if mbp_save[0] == -1: 335 | mbp_save = (mbp_score, step) 336 | print(f'Save mbp model | score: {mbp_score}') 337 | else: 338 | if mbp_save[0] < mbp_score: 339 | mbp_head.load_state_dict(torch.load(f'weights/{train_cfg.dataset}_{train_cfg.width}_{train_cfg.depth}_{mbp_save[1]}.pth')['mbp']) 340 | print(f'Load mbp model from {mbp_save[1]} step | save: {mbp_save} | score: {mbp_score}') 341 | else: 342 | print(f'Save mbp model | save: {mbp_save[0]} | score: {mbp_score}') 343 | mbp_save = (mbp_score, step) 344 | 345 | if md_save[0] == -1: 346 | md_save = (md_score, step) 347 | print(f'Save md model | score: {md_score}') 348 | else: 349 | if md_save[0] < md_score: 350 | md_head.load_state_dict(torch.load(f'weights/{train_cfg.dataset}_{train_cfg.width}_{train_cfg.depth}_{md_save[1]}.pth')['md']) 351 | print(f'Load md model from {md_save[1]} step | save: {md_save} | score: {md_score}') 352 | else: 353 | print(f'Save md model | save: {md_save[0]} | score: {md_score}') 354 | md_save = (md_score, step) 355 | 356 | 357 | shared_conv.train() 358 | aot_head.train() 359 | mi_head.train() 360 | mbp_head.train() 361 | md_head.train() 362 | print('') 363 | 364 | step += 1 365 | if step > train_cfg.iters: 366 | training = False 367 | model_dict = { 'shared': shared_conv.state_dict(), 368 | 'aot': aot_head.state_dict(), 369 | 'mi': mi_head.state_dict(), 370 | 'mbp': mbp_head.state_dict(), 371 | 'md': md_head.state_dict(), 372 | 'opt_shared': optimizer_shared.state_dict(), 373 | 'opt_aot': optimizer_aot.state_dict(), 374 | 'opt_mi': optimizer_mi.state_dict(), 375 | 'opt_mbp': optimizer_mbp.state_dict(), 376 | 'opt_md': optimizer_md.state_dict()} 377 | torch.save(model_dict, f'weights/{train_cfg.dataset}_{train_cfg.width}_{train_cfg.depth}_{step}.pth') 378 | break 379 | 380 | except KeyboardInterrupt: 381 | print(f'\nStop early, model saved: \'latest_{train_cfg.dataset}_{train_cfg.width}_{train_cfg.depth}_{step}.pth\'.\n') 382 | 383 | if glob(f'weights/latest*'): 384 | os.remove(glob(f'weights/latest*')[0]) 385 | 386 | model_dict = { 'shared': shared_conv.state_dict(), 387 | 'aot': aot_head.state_dict(), 388 | 'mi': mi_head.state_dict(), 389 | 'mbp': mbp_head.state_dict(), 390 | 'md': md_head.state_dict(), 391 | 'opt_shared': optimizer_shared.state_dict(), 392 | 'opt_aot': optimizer_aot.state_dict(), 393 | 'opt_mi': optimizer_mi.state_dict(), 394 | 'opt_mbp': optimizer_mbp.state_dict(), 395 | 'opt_md': optimizer_md.state_dict()} 396 | torch.save(model_dict, f'weights/latest_{train_cfg.dataset}_{train_cfg.width}_{train_cfg.depth}_{step}.pth') -------------------------------------------------------------------------------- /yolov3/detec_loc.py: -------------------------------------------------------------------------------- 1 | # YOLOv3 ?? by Ultralytics, GPL-3.0 license 2 | 3 | import argparse 4 | import os 5 | import sys 6 | from pathlib import Path 7 | 8 | import cv2 9 | import torch 10 | import torch.backends.cudnn as cudnn 11 | import torch.nn as nn 12 | 13 | FILE = Path(__file__).resolve() 14 | ROOT = FILE.parents[0] # root directory 15 | if str(ROOT) not in sys.path: 16 | sys.path.append(str(ROOT)) # add ROOT to PATH 17 | ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative 18 | 19 | from models.common import DetectMultiBackend 20 | from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams 21 | from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, 22 | increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh) 23 | from utils.plots import Annotator, colors, save_one_box 24 | from utils.torch_utils import select_device, time_sync 25 | 26 | 27 | @torch.no_grad() 28 | def run(weights=ROOT / 'yolov3.pt', # model.pt path(s) 29 | source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam 30 | imgsz=640, # inference size (pixels) 31 | conf_thres=0.25, # confidence threshold 32 | iou_thres=0.45, # NMS IOU threshold 33 | max_det=1000, # maximum detections per image 34 | device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu 35 | view_img=False, # show results 36 | save_txt=False, # save results to *.txt 37 | save_conf=False, # save confidences in --save-txt labels 38 | save_crop=False, # save cropped prediction boxes 39 | nosave=False, # do not save images/videos 40 | classes=None, # filter by class: --class 0, or --class 0 2 3 41 | agnostic_nms=False, # class-agnostic NMS 42 | augment=False, # augmented inference 43 | visualize=False, # visualize features 44 | update=False, # update all models 45 | project=ROOT / 'runs/detect', # save results to project/name 46 | name='exp', # save results to project/name 47 | exist_ok=False, # existing project/name ok, do not increment 48 | line_thickness=3, # bounding box thickness (pixels) 49 | hide_labels=False, # hide labels 50 | hide_conf=False, # hide confidences 51 | half=False, # use FP16 half-precision inference 52 | dnn=False, # use OpenCV DNN for ONNX inference 53 | ): 54 | source = str(source) 55 | save_img = not nosave and not source.endswith('.txt') # save inference images 56 | is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) 57 | is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) 58 | webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file) 59 | if is_url and is_file: 60 | source = check_file(source) # download 61 | 62 | # Directories 63 | save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run 64 | (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir 65 | 66 | # Load model 67 | device = select_device(device) 68 | model = DetectMultiBackend(weights, device=device, dnn=dnn) 69 | stride, names, pt, jit, onnx = model.stride, model.names, model.pt, model.jit, model.onnx 70 | imgsz = check_img_size(imgsz, s=stride) # check image size 71 | 72 | dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt and not jit) 73 | bs = 1 # batch_size 74 | 75 | # Half 76 | half &= pt and device.type != 'cpu' # half precision only supported by PyTorch on CUDA 77 | if pt: 78 | model.model.half() if half else model.model.float() 79 | 80 | output_coord = [] 81 | 82 | for path, im, im0s, vid_cap, s in dataset: 83 | im = torch.from_numpy(im).to(device) 84 | im = im.half() if half else im.float() # uint8 to fp16/32 85 | im /= 255 # 0 - 255 to 0.0 - 1.0 86 | if len(im.shape) == 3: 87 | im = im[None] # expand for batch dim 88 | 89 | # Inference 90 | pred = model(im, augment=augment, visualize=False) 91 | 92 | # NMS 93 | pred, cls_prob = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) 94 | 95 | if pred == -1: 96 | return -1, -1 97 | 98 | tmp_coord = [] 99 | 100 | for i, det in enumerate(pred): 101 | p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) 102 | 103 | gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] 104 | 105 | if len(det): 106 | det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() 107 | 108 | tmp_coord.append(det) 109 | 110 | output_coord.append(tmp_coord) 111 | 112 | return output_coord, cls_prob 113 | 114 | 115 | if __name__ == "__main__": 116 | path = '../Data/avenue/training/frames/13/101.jpg' 117 | a = cv2.imread(path) 118 | size = a.shape[:2] 119 | pred, cls_prob = run(weights=ROOT / 'yolov3.pt', source=path, imgsz=size, conf_thres=0.8) 120 | 121 | print(pred) -------------------------------------------------------------------------------- /yolov3/utils/general.py: -------------------------------------------------------------------------------- 1 | # YOLOv3 ?? by Ultralytics, GPL-3.0 license 2 | """ 3 | General utils 4 | """ 5 | 6 | import contextlib 7 | import glob 8 | import logging 9 | import math 10 | import os 11 | import platform 12 | import random 13 | import re 14 | import shutil 15 | import signal 16 | import time 17 | import urllib 18 | from itertools import repeat 19 | from multiprocessing.pool import ThreadPool 20 | from pathlib import Path 21 | from subprocess import check_output 22 | from zipfile import ZipFile 23 | 24 | import cv2 25 | import numpy as np 26 | import pandas as pd 27 | import pkg_resources as pkg 28 | import torch 29 | import torchvision 30 | import yaml 31 | 32 | from utils.downloads import gsutil_getsize 33 | from utils.metrics import box_iou, fitness 34 | 35 | # Settings 36 | torch.set_printoptions(linewidth=320, precision=5, profile='long') 37 | np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5 38 | pd.options.display.max_columns = 10 39 | cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) 40 | os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(), 8)) # NumExpr max threads 41 | 42 | FILE = Path(__file__).resolve() 43 | ROOT = FILE.parents[1] # root directory 44 | 45 | 46 | def set_logging(name=None, verbose=True): 47 | # Sets level and returns logger 48 | rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings 49 | logging.basicConfig(format="%(message)s", level=logging.INFO if (verbose and rank in (-1, 0)) else logging.WARNING) 50 | return logging.getLogger(name) 51 | 52 | 53 | LOGGER = set_logging(__name__) # define globally (used in train.py, val.py, detect.py, etc.) 54 | 55 | 56 | class Profile(contextlib.ContextDecorator): 57 | # Usage: @Profile() decorator or 'with Profile():' context manager 58 | def __enter__(self): 59 | self.start = time.time() 60 | 61 | def __exit__(self, type, value, traceback): 62 | print(f'Profile results: {time.time() - self.start:.5f}s') 63 | 64 | 65 | class Timeout(contextlib.ContextDecorator): 66 | # Usage: @Timeout(seconds) decorator or 'with Timeout(seconds):' context manager 67 | def __init__(self, seconds, *, timeout_msg='', suppress_timeout_errors=True): 68 | self.seconds = int(seconds) 69 | self.timeout_message = timeout_msg 70 | self.suppress = bool(suppress_timeout_errors) 71 | 72 | def _timeout_handler(self, signum, frame): 73 | raise TimeoutError(self.timeout_message) 74 | 75 | def __enter__(self): 76 | signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM 77 | signal.alarm(self.seconds) # start countdown for SIGALRM to be raised 78 | 79 | def __exit__(self, exc_type, exc_val, exc_tb): 80 | signal.alarm(0) # Cancel SIGALRM if it's scheduled 81 | if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError 82 | return True 83 | 84 | 85 | class WorkingDirectory(contextlib.ContextDecorator): 86 | # Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager 87 | def __init__(self, new_dir): 88 | self.dir = new_dir # new dir 89 | self.cwd = Path.cwd().resolve() # current dir 90 | 91 | def __enter__(self): 92 | os.chdir(self.dir) 93 | 94 | def __exit__(self, exc_type, exc_val, exc_tb): 95 | os.chdir(self.cwd) 96 | 97 | 98 | def try_except(func): 99 | # try-except function. Usage: @try_except decorator 100 | def handler(*args, **kwargs): 101 | try: 102 | func(*args, **kwargs) 103 | except Exception as e: 104 | print(e) 105 | 106 | return handler 107 | 108 | 109 | def methods(instance): 110 | # Get class/instance methods 111 | return [f for f in dir(instance) if callable(getattr(instance, f)) and not f.startswith("__")] 112 | 113 | 114 | def print_args(name, opt): 115 | # Print argparser arguments 116 | LOGGER.info(colorstr(f'{name}: ') + ', '.join(f'{k}={v}' for k, v in vars(opt).items())) 117 | 118 | 119 | def init_seeds(seed=0): 120 | # Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html 121 | # cudnn seed 0 settings are slower and more reproducible, else faster and less reproducible 122 | import torch.backends.cudnn as cudnn 123 | random.seed(seed) 124 | np.random.seed(seed) 125 | torch.manual_seed(seed) 126 | cudnn.benchmark, cudnn.deterministic = (False, True) if seed == 0 else (True, False) 127 | 128 | 129 | def intersect_dicts(da, db, exclude=()): 130 | # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values 131 | return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} 132 | 133 | 134 | def get_latest_run(search_dir='.'): 135 | # Return path to most recent 'last.pt' in /runs (i.e. to --resume from) 136 | last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True) 137 | return max(last_list, key=os.path.getctime) if last_list else '' 138 | 139 | 140 | def user_config_dir(dir='Ultralytics', env_var='YOLOV3_CONFIG_DIR'): 141 | # Return path of user configuration directory. Prefer environment variable if exists. Make dir if required. 142 | env = os.getenv(env_var) 143 | if env: 144 | path = Path(env) # use environment variable 145 | else: 146 | cfg = {'Windows': 'AppData/Roaming', 'Linux': '.config', 'Darwin': 'Library/Application Support'} # 3 OS dirs 147 | path = Path.home() / cfg.get(platform.system(), '') # OS-specific config dir 148 | path = (path if is_writeable(path) else Path('/tmp')) / dir # GCP and AWS lambda fix, only /tmp is writeable 149 | path.mkdir(exist_ok=True) # make if required 150 | return path 151 | 152 | 153 | def is_writeable(dir, test=False): 154 | # Return True if directory has write permissions, test opening a file with write permissions if test=True 155 | if test: # method 1 156 | file = Path(dir) / 'tmp.txt' 157 | try: 158 | with open(file, 'w'): # open file with write permissions 159 | pass 160 | file.unlink() # remove file 161 | return True 162 | except OSError: 163 | return False 164 | else: # method 2 165 | return os.access(dir, os.R_OK) # possible issues on Windows 166 | 167 | 168 | def is_docker(): 169 | # Is environment a Docker container? 170 | return Path('/workspace').exists() # or Path('/.dockerenv').exists() 171 | 172 | 173 | def is_colab(): 174 | # Is environment a Google Colab instance? 175 | try: 176 | import google.colab 177 | return True 178 | except ImportError: 179 | return False 180 | 181 | 182 | def is_pip(): 183 | # Is file in a pip package? 184 | return 'site-packages' in Path(__file__).resolve().parts 185 | 186 | 187 | def is_ascii(s=''): 188 | # Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7) 189 | s = str(s) # convert list, tuple, None, etc. to str 190 | return len(s.encode().decode('ascii', 'ignore')) == len(s) 191 | 192 | 193 | def is_chinese(s=''): 194 | # Is string composed of any Chinese characters? 195 | return re.search('[\u4e00-\u9fff]', s) 196 | 197 | 198 | def emojis(str=''): 199 | # Return platform-dependent emoji-safe version of string 200 | return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str 201 | 202 | 203 | def file_size(path): 204 | # Return file/dir size (MB) 205 | path = Path(path) 206 | if path.is_file(): 207 | return path.stat().st_size / 1E6 208 | elif path.is_dir(): 209 | return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / 1E6 210 | else: 211 | return 0.0 212 | 213 | 214 | def check_online(): 215 | # Check internet connectivity 216 | import socket 217 | try: 218 | socket.create_connection(("1.1.1.1", 443), 5) # check host accessibility 219 | return True 220 | except OSError: 221 | return False 222 | 223 | 224 | @try_except 225 | @WorkingDirectory(ROOT) 226 | def check_git_status(): 227 | # Recommend 'git pull' if code is out of date 228 | msg = ', for updates see https://github.com/ultralytics/yolov3' 229 | print(colorstr('github: '), end='') 230 | assert Path('.git').exists(), 'skipping check (not a git repository)' + msg 231 | assert not is_docker(), 'skipping check (Docker image)' + msg 232 | assert check_online(), 'skipping check (offline)' + msg 233 | 234 | cmd = 'git fetch && git config --get remote.origin.url' 235 | url = check_output(cmd, shell=True, timeout=5).decode().strip().rstrip('.git') # git fetch 236 | branch = check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip() # checked out 237 | n = int(check_output(f'git rev-list {branch}..origin/master --count', shell=True)) # commits behind 238 | if n > 0: 239 | s = f"YOLOv3 is out of date by {n} commit{'s' * (n > 1)}. Use `git pull` or `git clone {url}` to update." 240 | else: 241 | s = f'up to date with {url} ??' 242 | print(emojis(s)) # emoji-safe 243 | 244 | 245 | def check_python(minimum='3.6.2'): 246 | # Check current python version vs. required python version 247 | check_version(platform.python_version(), minimum, name='Python ', hard=True) 248 | 249 | 250 | def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False, hard=False): 251 | # Check version vs. required version 252 | current, minimum = (pkg.parse_version(x) for x in (current, minimum)) 253 | result = (current == minimum) if pinned else (current >= minimum) # bool 254 | if hard: # assert min requirements met 255 | assert result, f'{name}{minimum} required by YOLOv3, but {name}{current} is currently installed' 256 | else: 257 | return result 258 | 259 | 260 | @try_except 261 | def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True): 262 | # Check installed dependencies meet requirements (pass *.txt file or list of packages) 263 | prefix = colorstr('red', 'bold', 'requirements:') 264 | check_python() # check python version 265 | if isinstance(requirements, (str, Path)): # requirements.txt file 266 | file = Path(requirements) 267 | assert file.exists(), f"{prefix} {file.resolve()} not found, check failed." 268 | with file.open() as f: 269 | requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude] 270 | else: # list or tuple of packages 271 | requirements = [x for x in requirements if x not in exclude] 272 | 273 | n = 0 # number of packages updates 274 | for r in requirements: 275 | try: 276 | pkg.require(r) 277 | except Exception as e: # DistributionNotFound or VersionConflict if requirements not met 278 | s = f"{prefix} {r} not found and is required by YOLOv3" 279 | if install: 280 | print(f"{s}, attempting auto-update...") 281 | try: 282 | assert check_online(), f"'pip install {r}' skipped (offline)" 283 | print(check_output(f"pip install '{r}'", shell=True).decode()) 284 | n += 1 285 | except Exception as e: 286 | print(f'{prefix} {e}') 287 | else: 288 | print(f'{s}. Please install and rerun your command.') 289 | 290 | if n: # if packages updated 291 | source = file.resolve() if 'file' in locals() else requirements 292 | s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \ 293 | f"{prefix} {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n" 294 | print(emojis(s)) 295 | 296 | 297 | def check_img_size(imgsz, s=32, floor=0): 298 | # Verify image size is a multiple of stride s in each dimension 299 | if isinstance(imgsz, int): # integer i.e. img_size=640 300 | new_size = max(make_divisible(imgsz, int(s)), floor) 301 | else: # list i.e. img_size=[640, 480] 302 | new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz] 303 | return new_size 304 | 305 | 306 | def check_imshow(): 307 | # Check if environment supports image displays 308 | try: 309 | assert not is_docker(), 'cv2.imshow() is disabled in Docker environments' 310 | assert not is_colab(), 'cv2.imshow() is disabled in Google Colab environments' 311 | cv2.imshow('test', np.zeros((1, 1, 3))) 312 | cv2.waitKey(1) 313 | cv2.destroyAllWindows() 314 | cv2.waitKey(1) 315 | return True 316 | except Exception as e: 317 | print(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}') 318 | return False 319 | 320 | 321 | def check_suffix(file='yolov3.pt', suffix=('.pt',), msg=''): 322 | # Check file(s) for acceptable suffix 323 | if file and suffix: 324 | if isinstance(suffix, str): 325 | suffix = [suffix] 326 | for f in file if isinstance(file, (list, tuple)) else [file]: 327 | s = Path(f).suffix.lower() # file suffix 328 | if len(s): 329 | assert s in suffix, f"{msg}{f} acceptable suffix is {suffix}" 330 | 331 | 332 | def check_yaml(file, suffix=('.yaml', '.yml')): 333 | # Search/download YAML file (if necessary) and return path, checking suffix 334 | return check_file(file, suffix) 335 | 336 | 337 | def check_file(file, suffix=''): 338 | # Search/download file (if necessary) and return path 339 | check_suffix(file, suffix) # optional 340 | file = str(file) # convert to str() 341 | if Path(file).is_file() or file == '': # exists 342 | return file 343 | elif file.startswith(('http:/', 'https:/')): # download 344 | url = str(Path(file).as_posix()).replace(':/', '://') # Pathlib turns :// -> :/ 345 | file = Path(urllib.parse.unquote(file).split('?')[0]).name # '%2F' to '/', split https://url.com/file.txt?auth 346 | if Path(file).is_file(): 347 | print(f'Found {url} locally at {file}') # file already exists 348 | else: 349 | print(f'Downloading {url} to {file}...') 350 | torch.hub.download_url_to_file(url, file) 351 | assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check 352 | return file 353 | else: # search 354 | files = [] 355 | for d in 'data', 'models', 'utils': # search directories 356 | files.extend(glob.glob(str(ROOT / d / '**' / file), recursive=True)) # find file 357 | assert len(files), f'File not found: {file}' # assert file was found 358 | assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique 359 | return files[0] # return file 360 | 361 | 362 | def check_dataset(data, autodownload=True): 363 | # Download and/or unzip dataset if not found locally 364 | # Usage: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128_with_yaml.zip 365 | 366 | # Download (optional) 367 | extract_dir = '' 368 | if isinstance(data, (str, Path)) and str(data).endswith('.zip'): # i.e. gs://bucket/dir/coco128.zip 369 | download(data, dir='../datasets', unzip=True, delete=False, curl=False, threads=1) 370 | data = next((Path('../datasets') / Path(data).stem).rglob('*.yaml')) 371 | extract_dir, autodownload = data.parent, False 372 | 373 | # Read yaml (optional) 374 | if isinstance(data, (str, Path)): 375 | with open(data, errors='ignore') as f: 376 | data = yaml.safe_load(f) # dictionary 377 | 378 | # Parse yaml 379 | path = extract_dir or Path(data.get('path') or '') # optional 'path' default to '.' 380 | for k in 'train', 'val', 'test': 381 | if data.get(k): # prepend path 382 | data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]] 383 | 384 | assert 'nc' in data, "Dataset 'nc' key missing." 385 | if 'names' not in data: 386 | data['names'] = [f'class{i}' for i in range(data['nc'])] # assign class names if missing 387 | train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download')) 388 | if val: 389 | val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path 390 | if not all(x.exists() for x in val): 391 | print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()]) 392 | if s and autodownload: # download script 393 | root = path.parent if 'path' in data else '..' # unzip directory i.e. '../' 394 | if s.startswith('http') and s.endswith('.zip'): # URL 395 | f = Path(s).name # filename 396 | print(f'Downloading {s} to {f}...') 397 | torch.hub.download_url_to_file(s, f) 398 | Path(root).mkdir(parents=True, exist_ok=True) # create root 399 | ZipFile(f).extractall(path=root) # unzip 400 | Path(f).unlink() # remove zip 401 | r = None # success 402 | elif s.startswith('bash '): # bash script 403 | print(f'Running {s} ...') 404 | r = os.system(s) 405 | else: # python script 406 | r = exec(s, {'yaml': data}) # return None 407 | print(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}\n") 408 | else: 409 | raise Exception('Dataset not found.') 410 | 411 | return data # dictionary 412 | 413 | 414 | def url2file(url): 415 | # Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt 416 | url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/ 417 | file = Path(urllib.parse.unquote(url)).name.split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth 418 | return file 419 | 420 | 421 | def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1): 422 | # Multi-threaded file download and unzip function, used in data.yaml for autodownload 423 | def download_one(url, dir): 424 | # Download 1 file 425 | f = dir / Path(url).name # filename 426 | if Path(url).is_file(): # exists in current path 427 | Path(url).rename(f) # move to dir 428 | elif not f.exists(): 429 | print(f'Downloading {url} to {f}...') 430 | if curl: 431 | os.system(f"curl -L '{url}' -o '{f}' --retry 9 -C -") # curl download, retry and resume on fail 432 | else: 433 | torch.hub.download_url_to_file(url, f, progress=True) # torch download 434 | if unzip and f.suffix in ('.zip', '.gz'): 435 | print(f'Unzipping {f}...') 436 | if f.suffix == '.zip': 437 | ZipFile(f).extractall(path=dir) # unzip 438 | elif f.suffix == '.gz': 439 | os.system(f'tar xfz {f} --directory {f.parent}') # unzip 440 | if delete: 441 | f.unlink() # remove zip 442 | 443 | dir = Path(dir) 444 | dir.mkdir(parents=True, exist_ok=True) # make directory 445 | if threads > 1: 446 | pool = ThreadPool(threads) 447 | pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded 448 | pool.close() 449 | pool.join() 450 | else: 451 | for u in [url] if isinstance(url, (str, Path)) else url: 452 | download_one(u, dir) 453 | 454 | 455 | def make_divisible(x, divisor): 456 | # Returns x evenly divisible by divisor 457 | return math.ceil(x / divisor) * divisor 458 | 459 | 460 | def clean_str(s): 461 | # Cleans a string by replacing special characters with underscore _ 462 | return re.sub(pattern="[|@#!¡·$??&()=?¿^*;:,¨´><+]", repl="_", string=s) 463 | 464 | 465 | def one_cycle(y1=0.0, y2=1.0, steps=100): 466 | # lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf 467 | return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1 468 | 469 | 470 | def colorstr(*input): 471 | # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world') 472 | *args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string 473 | colors = {'black': '\033[30m', # basic colors 474 | 'red': '\033[31m', 475 | 'green': '\033[32m', 476 | 'yellow': '\033[33m', 477 | 'blue': '\033[34m', 478 | 'magenta': '\033[35m', 479 | 'cyan': '\033[36m', 480 | 'white': '\033[37m', 481 | 'bright_black': '\033[90m', # bright colors 482 | 'bright_red': '\033[91m', 483 | 'bright_green': '\033[92m', 484 | 'bright_yellow': '\033[93m', 485 | 'bright_blue': '\033[94m', 486 | 'bright_magenta': '\033[95m', 487 | 'bright_cyan': '\033[96m', 488 | 'bright_white': '\033[97m', 489 | 'end': '\033[0m', # misc 490 | 'bold': '\033[1m', 491 | 'underline': '\033[4m'} 492 | return ''.join(colors[x] for x in args) + f'{string}' + colors['end'] 493 | 494 | 495 | def labels_to_class_weights(labels, nc=80): 496 | # Get class weights (inverse frequency) from training labels 497 | if labels[0] is None: # no labels loaded 498 | return torch.Tensor() 499 | 500 | labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO 501 | classes = labels[:, 0].astype(np.int) # labels = [class xywh] 502 | weights = np.bincount(classes, minlength=nc) # occurrences per class 503 | 504 | # Prepend gridpoint count (for uCE training) 505 | # gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image 506 | # weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start 507 | 508 | weights[weights == 0] = 1 # replace empty bins with 1 509 | weights = 1 / weights # number of targets per class 510 | weights /= weights.sum() # normalize 511 | return torch.from_numpy(weights) 512 | 513 | 514 | def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)): 515 | # Produces image weights based on class_weights and image contents 516 | class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels]) 517 | image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1) 518 | # index = random.choices(range(n), weights=image_weights, k=1) # weight image sample 519 | return image_weights 520 | 521 | 522 | def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) 523 | # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ 524 | # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n') 525 | # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n') 526 | # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco 527 | # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet 528 | x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 529 | 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 530 | 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] 531 | return x 532 | 533 | 534 | def xyxy2xywh(x): 535 | # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right 536 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 537 | y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center 538 | y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center 539 | y[:, 2] = x[:, 2] - x[:, 0] # width 540 | y[:, 3] = x[:, 3] - x[:, 1] # height 541 | return y 542 | 543 | 544 | def xywh2xyxy(x): 545 | # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right 546 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 547 | y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x 548 | y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y 549 | y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x 550 | y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y 551 | return y 552 | 553 | 554 | def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): 555 | # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right 556 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 557 | y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw # top left x 558 | y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh # top left y 559 | y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw # bottom right x 560 | y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh # bottom right y 561 | return y 562 | 563 | 564 | def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): 565 | # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right 566 | if clip: 567 | clip_coords(x, (h - eps, w - eps)) # warning: inplace clip 568 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 569 | y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center 570 | y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center 571 | y[:, 2] = (x[:, 2] - x[:, 0]) / w # width 572 | y[:, 3] = (x[:, 3] - x[:, 1]) / h # height 573 | return y 574 | 575 | 576 | def xyn2xy(x, w=640, h=640, padw=0, padh=0): 577 | # Convert normalized segments into pixel segments, shape (n,2) 578 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 579 | y[:, 0] = w * x[:, 0] + padw # top left x 580 | y[:, 1] = h * x[:, 1] + padh # top left y 581 | return y 582 | 583 | 584 | def segment2box(segment, width=640, height=640): 585 | # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) 586 | x, y = segment.T # segment xy 587 | inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) 588 | x, y, = x[inside], y[inside] 589 | return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # xyxy 590 | 591 | 592 | def segments2boxes(segments): 593 | # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) 594 | boxes = [] 595 | for s in segments: 596 | x, y = s.T # segment xy 597 | boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy 598 | return xyxy2xywh(np.array(boxes)) # cls, xywh 599 | 600 | 601 | def resample_segments(segments, n=1000): 602 | # Up-sample an (n,2) segment 603 | for i, s in enumerate(segments): 604 | x = np.linspace(0, len(s) - 1, n) 605 | xp = np.arange(len(s)) 606 | segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy 607 | return segments 608 | 609 | 610 | def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): 611 | # Rescale coords (xyxy) from img1_shape to img0_shape 612 | if ratio_pad is None: # calculate from img0_shape 613 | gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new 614 | pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding 615 | else: 616 | gain = ratio_pad[0][0] 617 | pad = ratio_pad[1] 618 | 619 | coords[:, [0, 2]] -= pad[0] # x padding 620 | coords[:, [1, 3]] -= pad[1] # y padding 621 | coords[:, :4] /= gain 622 | clip_coords(coords, img0_shape) 623 | return coords 624 | 625 | 626 | def clip_coords(boxes, shape): 627 | # Clip bounding xyxy bounding boxes to image shape (height, width) 628 | if isinstance(boxes, torch.Tensor): # faster individually 629 | boxes[:, 0].clamp_(0, shape[1]) # x1 630 | boxes[:, 1].clamp_(0, shape[0]) # y1 631 | boxes[:, 2].clamp_(0, shape[1]) # x2 632 | boxes[:, 3].clamp_(0, shape[0]) # y2 633 | else: # np.array (faster grouped) 634 | boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 635 | boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 636 | 637 | 638 | def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, 639 | labels=(), max_det=300): 640 | """Runs Non-Maximum Suppression (NMS) on inference results 641 | 642 | Returns: 643 | list of detections, on (n,6) tensor per image [xyxy, conf, cls] 644 | """ 645 | 646 | nc = prediction.shape[2] - 5 # number of classes 647 | xc = prediction[..., 4] > conf_thres # candidates 648 | 649 | # Checks 650 | assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' 651 | assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' 652 | 653 | # Settings 654 | min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height 655 | max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() 656 | time_limit = 10.0 # seconds to quit after 657 | redundant = True # require redundant detections 658 | multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) 659 | merge = False # use merge-NMS 660 | 661 | t = time.time() 662 | output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] 663 | for xi, x in enumerate(prediction): # image index, image inference 664 | # Apply constraints 665 | # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height 666 | x = x[xc[xi]] # confidence 667 | 668 | 669 | 670 | # Cat apriori labels if autolabelling 671 | if labels and len(labels[xi]): 672 | l = labels[xi] 673 | v = torch.zeros((len(l), nc + 5), device=x.device) 674 | v[:, :4] = l[:, 1:5] # box 675 | v[:, 4] = 1.0 # conf 676 | v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls 677 | x = torch.cat((x, v), 0) 678 | 679 | 680 | # If none remain process next image 681 | if not x.shape[0]: 682 | return -1, -1 683 | 684 | # save cls_conf 685 | cc = x[:, 5:] 686 | 687 | # Compute conf 688 | x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf 689 | 690 | # Box (center x, center y, width, height) to (x1, y1, x2, y2) 691 | box = xywh2xyxy(x[:, :4]) 692 | 693 | # Detections matrix nx6 (xyxy, conf, cls) 694 | if multi_label: 695 | i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T 696 | x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) 697 | else: # best class only 698 | conf, j = x[:, 5:].max(1, keepdim=True) 699 | x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] 700 | 701 | # Filter by class 702 | if classes is not None: 703 | x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] 704 | 705 | # Apply finite constraint 706 | # if not torch.isfinite(x).all(): 707 | # x = x[torch.isfinite(x).all(1)] 708 | 709 | # Check shape 710 | n = x.shape[0] # number of boxes 711 | if not n: # no boxes 712 | return -1, -1 713 | elif n > max_nms: # excess boxes 714 | x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence 715 | 716 | # Batched NMS 717 | c = x[:, 5:6] * (0 if agnostic else max_wh) # classes 718 | boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores 719 | i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS 720 | if i.shape[0] > max_det: # limit detections 721 | i = i[:max_det] 722 | if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) 723 | # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) 724 | iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix 725 | weights = iou * scores[None] # box weights 726 | x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes 727 | if redundant: 728 | i = i[iou.sum(1) > 1] # require redundancy 729 | 730 | output[xi] = x[i] 731 | if (time.time() - t) > time_limit: 732 | print(f'WARNING: NMS time limit {time_limit}s exceeded') 733 | break # time limit exceeded 734 | cc = cc[i] 735 | 736 | return output, cc 737 | 738 | 739 | 740 | def strip_optimizer(f='best.pt', s=''): # from utils.general import *; strip_optimizer() 741 | # Strip optimizer from 'f' to finalize training, optionally save as 's' 742 | x = torch.load(f, map_location=torch.device('cpu')) 743 | if x.get('ema'): 744 | x['model'] = x['ema'] # replace model with ema 745 | for k in 'optimizer', 'training_results', 'wandb_id', 'ema', 'updates': # keys 746 | x[k] = None 747 | x['epoch'] = -1 748 | x['model'].half() # to FP16 749 | for p in x['model'].parameters(): 750 | p.requires_grad = False 751 | torch.save(x, s or f) 752 | mb = os.path.getsize(s or f) / 1E6 # filesize 753 | print(f"Optimizer stripped from {f},{(' saved as %s,' % s) if s else ''} {mb:.1f}MB") 754 | 755 | 756 | def print_mutation(results, hyp, save_dir, bucket): 757 | evolve_csv, results_csv, evolve_yaml = save_dir / 'evolve.csv', save_dir / 'results.csv', save_dir / 'hyp_evolve.yaml' 758 | keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 759 | 'val/box_loss', 'val/obj_loss', 'val/cls_loss') + tuple(hyp.keys()) # [results + hyps] 760 | keys = tuple(x.strip() for x in keys) 761 | vals = results + tuple(hyp.values()) 762 | n = len(keys) 763 | 764 | # Download (optional) 765 | if bucket: 766 | url = f'gs://{bucket}/evolve.csv' 767 | if gsutil_getsize(url) > (os.path.getsize(evolve_csv) if os.path.exists(evolve_csv) else 0): 768 | os.system(f'gsutil cp {url} {save_dir}') # download evolve.csv if larger than local 769 | 770 | # Log to evolve.csv 771 | s = '' if evolve_csv.exists() else (('%20s,' * n % keys).rstrip(',') + '\n') # add header 772 | with open(evolve_csv, 'a') as f: 773 | f.write(s + ('%20.5g,' * n % vals).rstrip(',') + '\n') 774 | 775 | # Print to screen 776 | print(colorstr('evolve: ') + ', '.join(f'{x.strip():>20s}' for x in keys)) 777 | print(colorstr('evolve: ') + ', '.join(f'{x:20.5g}' for x in vals), end='\n\n\n') 778 | 779 | # Save yaml 780 | with open(evolve_yaml, 'w') as f: 781 | data = pd.read_csv(evolve_csv) 782 | data = data.rename(columns=lambda x: x.strip()) # strip keys 783 | i = np.argmax(fitness(data.values[:, :7])) # 784 | f.write('# YOLOv3 Hyperparameter Evolution Results\n' + 785 | f'# Best generation: {i}\n' + 786 | f'# Last generation: {len(data)}\n' + 787 | '# ' + ', '.join(f'{x.strip():>20s}' for x in keys[:7]) + '\n' + 788 | '# ' + ', '.join(f'{x:>20.5g}' for x in data.values[i, :7]) + '\n\n') 789 | yaml.safe_dump(hyp, f, sort_keys=False) 790 | 791 | if bucket: 792 | os.system(f'gsutil cp {evolve_csv} {evolve_yaml} gs://{bucket}') # upload 793 | 794 | 795 | def apply_classifier(x, model, img, im0): 796 | # Apply a second stage classifier to YOLO outputs 797 | # Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval() 798 | im0 = [im0] if isinstance(im0, np.ndarray) else im0 799 | for i, d in enumerate(x): # per image 800 | if d is not None and len(d): 801 | d = d.clone() 802 | 803 | # Reshape and pad cutouts 804 | b = xyxy2xywh(d[:, :4]) # boxes 805 | b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square 806 | b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad 807 | d[:, :4] = xywh2xyxy(b).long() 808 | 809 | # Rescale boxes from img_size to im0 size 810 | scale_coords(img.shape[2:], d[:, :4], im0[i].shape) 811 | 812 | # Classes 813 | pred_cls1 = d[:, 5].long() 814 | ims = [] 815 | for j, a in enumerate(d): # per item 816 | cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])] 817 | im = cv2.resize(cutout, (224, 224)) # BGR 818 | # cv2.imwrite('example%i.jpg' % j, cutout) 819 | 820 | im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 821 | im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32 822 | im /= 255 # 0 - 255 to 0.0 - 1.0 823 | ims.append(im) 824 | 825 | pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction 826 | x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections 827 | 828 | return x 829 | 830 | 831 | def increment_path(path, exist_ok=False, sep='', mkdir=False): 832 | # Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc. 833 | path = Path(path) # os-agnostic 834 | if path.exists() and not exist_ok: 835 | path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '') 836 | dirs = glob.glob(f"{path}{sep}*") # similar paths 837 | matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs] 838 | i = [int(m.groups()[0]) for m in matches if m] # indices 839 | n = max(i) + 1 if i else 2 # increment number 840 | path = Path(f"{path}{sep}{n}{suffix}") # increment path 841 | if mkdir: 842 | path.mkdir(parents=True, exist_ok=True) # make directory 843 | return path 844 | 845 | 846 | # Variables 847 | NCOLS = 0 if is_docker() else shutil.get_terminal_size().columns # terminal window size 848 | --------------------------------------------------------------------------------