├── .gitignore ├── LICENSE ├── README.md ├── data ├── __init__.py ├── config.py └── ucf24.py ├── layers ├── __init__.py ├── box_utils.py ├── functions │ ├── __init__.py │ └── prior_box.py └── modules │ ├── __init__.py │ ├── l2norm.py │ └── multibox_loss.py ├── matlab-online-display ├── .gitignore ├── actionpath │ ├── actionPaths.m │ ├── fusedActionPaths.m │ ├── incremental_linking.m │ └── nms.m ├── display01.txt ├── eval │ ├── compute_spatio_temporal_iou.m │ ├── get_PR_curve.m │ └── xVOCap.m ├── frameAp.m ├── gentube │ ├── convert2eval.m │ ├── dpEM_max.m │ ├── mydpEM_max.m │ ├── parActionPathSmoother.m │ └── readALLactionPaths.m ├── myI01onlineTubes.m ├── myI02genFusedTubes.m └── utils │ ├── createdires.m │ ├── initDatasetOpts.m │ └── initDatasetOptsFused.m ├── online-tubes ├── .gitignore ├── I01onlineTubes.m ├── I02genFusedTubes.m ├── actionpath │ ├── actionPaths.m │ ├── fusedActionPaths.m │ ├── incremental_linking.m │ └── nms.m ├── eval │ ├── compute_spatio_temporal_iou.m │ ├── get_PR_curve.m │ └── xVOCap.m ├── frameAp.m ├── gentube │ ├── PARactionPathSmoother.m │ ├── convert2eval.m │ ├── dpEM_max.m │ └── readALLactionPaths.m └── utils │ ├── createdires.m │ ├── initDatasetOpts.m │ └── initDatasetOptsFused.m ├── ssd.py ├── test-ucf24.py ├── train-ucf24.py └── utils ├── __init__.py ├── augmentations.py └── evaluation.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.log 3 | *.pyc 4 | *.pyo 5 | __pycache__/ 6 | .idea/ 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Gurkirt Singh 4 | This is an adaption of Max deGroot, Ellis Brown originl code of SSD for VOC dataset 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | #from .voc0712 import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES 2 | from .ucf24 import UCF24Detection, AnnotationTransform, detection_collate, CLASSES 3 | from .config import * 4 | import cv2 5 | import numpy as np 6 | 7 | 8 | def base_transform(image, size, mean): 9 | x = cv2.resize(image, (size, size)).astype(np.float32) 10 | # x = cv2.resize(np.array(image), (size, size)).astype(np.float32) 11 | x -= mean 12 | x = x.astype(np.float32) 13 | return x 14 | 15 | 16 | class BaseTransform: 17 | def __init__(self, size, mean): 18 | self.size = size 19 | self.mean = np.array(mean, dtype=np.float32) 20 | 21 | def __call__(self, image, boxes=None, labels=None): 22 | return base_transform(image, self.size, self.mean), boxes, labels 23 | -------------------------------------------------------------------------------- /data/config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | """ SSD network configs 3 | 4 | Original author: Ellis Brown, Max deGroot for VOC dataset 5 | https://github.com/amdegroot/ssd.pytorch 6 | 7 | """ 8 | 9 | #SSD300 CONFIGS 10 | # newer version: use additional conv11_2 layer as last layer before multibox layers 11 | v2 = { 12 | 'feature_maps' : [38, 19, 10, 5, 3, 1], 13 | 14 | 'min_dim' : 300, 15 | 16 | 'steps' : [8, 16, 32, 64, 100, 300], 17 | 18 | 'min_sizes' : [30, 60, 111, 162, 213, 264], 19 | 20 | 'max_sizes' : [60, 111, 162, 213, 264, 315], 21 | 22 | # 'aspect_ratios' : [[2, 1/2], [2, 1/2, 3, 1/3], [2, 1/2, 3, 1/3], 23 | # [2, 1/2, 3, 1/3], [2, 1/2], [2, 1/2]], 24 | 'aspect_ratios' : [[2], [2, 3], [2, 3], [2, 3], [2], [2]], 25 | 26 | 'variance' : [0.1, 0.2], 27 | 28 | 'clip' : True, 29 | 30 | 'name' : 'v2', 31 | } 32 | 33 | # use average pooling layer as last layer before multibox layers 34 | v1 = { 35 | 'feature_maps' : [38, 19, 10, 5, 3, 1], 36 | 37 | 'min_dim' : 300, 38 | 39 | 'steps' : [8, 16, 32, 64, 100, 300], 40 | 41 | 'min_sizes' : [30, 60, 114, 168, 222, 276], 42 | 43 | 'max_sizes' : [-1, 114, 168, 222, 276, 330], 44 | 45 | # 'aspect_ratios' : [[2], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]], 46 | 'aspect_ratios' : [[1,1,2,1/2],[1,1,2,1/2,3,1/3],[1,1,2,1/2,3,1/3], 47 | [1,1,2,1/2,3,1/3],[1,1,2,1/2,3,1/3],[1,1,2,1/2,3,1/3]], 48 | 49 | 'variance' : [0.1, 0.2], 50 | 51 | 'clip' : True, 52 | 53 | 'name' : 'v1', 54 | } 55 | -------------------------------------------------------------------------------- /data/ucf24.py: -------------------------------------------------------------------------------- 1 | """UCF24 Dataset Classes 2 | 3 | Author: Gurkirt Singh for ucf101-24 dataset 4 | 5 | """ 6 | 7 | import os 8 | import os.path 9 | import torch 10 | import torch.utils.data as data 11 | import cv2, pickle 12 | import numpy as np 13 | 14 | CLASSES = ( # always index 0 15 | 'Basketball', 'BasketballDunk', 'Biking', 'CliffDiving', 'CricketBowling', 'Diving', 'Fencing', 16 | 'FloorGymnastics', 'GolfSwing', 'HorseRiding', 'IceDancing', 'LongJump', 'PoleVault', 'RopeClimbing', 17 | 'SalsaSpin','SkateBoarding', 'Skiing', 'Skijet', 'SoccerJuggling', 18 | 'Surfing', 'TennisSwing', 'TrampolineJumping', 'VolleyballSpiking', 'WalkingWithDog') 19 | 20 | 21 | class AnnotationTransform(object): 22 | """ 23 | Same as original 24 | Transforms a VOC annotation into a Tensor of bbox coords and label index 25 | Initilized with a dictionary lookup of classnames to indexes 26 | Arguments: 27 | class_to_ind (dict, optional): dictionary lookup of classnames -> indexes 28 | (default: alphabetic indexing of UCF24's 24 classes) 29 | keep_difficult (bool, optional): keep difficult instances or not 30 | (default: False) 31 | height (int): height 32 | width (int): width 33 | """ 34 | 35 | def __init__(self, class_to_ind=None, keep_difficult=False): 36 | self.class_to_ind = class_to_ind or dict( 37 | zip(CLASSES, range(len(CLASSES)))) 38 | self.ind_to_class = dict(zip(range(len(CLASSES)),CLASSES)) 39 | 40 | def __call__(self, bboxs, labels, width, height): 41 | res = [] 42 | for t in range(len(labels)): 43 | bbox = bboxs[t,:] 44 | label = labels[t] 45 | '''pts = ['xmin', 'ymin', 'xmax', 'ymax']''' 46 | bndbox = [] 47 | for i in range(4): 48 | cur_pt = max(0,int(bbox[i]) - 1) 49 | scale = width if i % 2 == 0 else height 50 | cur_pt = min(scale, int(bbox[i])) 51 | cur_pt = float(cur_pt) / scale 52 | bndbox.append(cur_pt) 53 | bndbox.append(label) 54 | res += [bndbox] # [xmin, ymin, xmax, ymax, label_ind] 55 | # img_id = target.find('filename').text[:-4] 56 | return res # [[xmin, ymin, xmax, ymax, label_ind], ... ] 57 | 58 | 59 | def readsplitfile(splitfile): 60 | with open(splitfile, 'r') as f: 61 | temptrainvideos = f.readlines() 62 | trainvideos = [] 63 | for vid in temptrainvideos: 64 | vid = vid.rstrip('\n') 65 | trainvideos.append(vid) 66 | return trainvideos 67 | 68 | 69 | def make_lists(rootpath, imgtype, split=1, fulltest=False): 70 | imagesDir = rootpath + imgtype + '/' 71 | splitfile = rootpath + 'splitfiles/trainlist{:02d}.txt'.format(split) 72 | trainvideos = readsplitfile(splitfile) 73 | trainlist = [] 74 | testlist = [] 75 | 76 | with open(rootpath + 'splitfiles/pyannot.pkl','rb') as fff: 77 | database = pickle.load(fff) 78 | 79 | train_action_counts = np.zeros(len(CLASSES), dtype=np.int32) 80 | test_action_counts = np.zeros(len(CLASSES), dtype=np.int32) 81 | 82 | #4500ratios = np.asarray([1.1, 0.8, 4.7, 1.4, 0.9, 2.6, 2.2, 3.0, 3.0, 5.0, 6.2, 2.7, 83 | # 3.5, 3.1, 4.3, 2.5, 4.5, 3.4, 6.7, 3.6, 1.6, 3.4, 0.6, 4.3]) 84 | ratios = np.asarray([1.03, 0.75, 4.22, 1.32, 0.8, 2.36, 1.99, 2.66, 2.68, 4.51, 5.56, 2.46, 3.17, 2.76, 3.89, 2.28, 4.01, 3.08, 6.06, 3.28, 1.51, 3.05, 0.6, 3.84]) 85 | #ratios = np.ones_like(ratios) #TODO:uncomment this line and line 155, 156 to compute new ratios might be useful for JHMDB21 86 | video_list = [] 87 | for vid, videoname in enumerate(sorted(database.keys())): 88 | video_list.append(videoname) 89 | actidx = database[videoname]['label'] 90 | istrain = True 91 | step = ratios[actidx] 92 | numf = database[videoname]['numf'] 93 | lastf = numf-1 94 | if videoname not in trainvideos: 95 | istrain = False 96 | step = max(1, ratios[actidx])*3 97 | if fulltest: 98 | step = 1 99 | lastf = numf 100 | 101 | annotations = database[videoname]['annotations'] 102 | num_tubes = len(annotations) 103 | 104 | tube_labels = np.zeros((numf,num_tubes),dtype=np.int16) # check for each tube if present in 105 | tube_boxes = [[[] for _ in range(num_tubes)] for _ in range(numf)] 106 | for tubeid, tube in enumerate(annotations): 107 | # print('numf00', numf, tube['sf'], tube['ef']) 108 | for frame_id, frame_num in enumerate(np.arange(tube['sf'], tube['ef'], 1)): # start of the tube to end frame of the tube 109 | label = tube['label'] 110 | assert actidx == label, 'Tube label and video label should be same' 111 | box = tube['boxes'][frame_id, :] # get the box as an array 112 | box = box.astype(np.float32) 113 | box[2] += box[0] #convert width to xmax 114 | box[3] += box[1] #converst height to ymax 115 | tube_labels[frame_num, tubeid] = 1 #label+1 # change label in tube_labels matrix to 1 form 0 116 | tube_boxes[frame_num][tubeid] = box # put the box in matrix of lists 117 | 118 | possible_frame_nums = np.arange(0, lastf, step) 119 | # print('numf',numf,possible_frame_nums[-1]) 120 | for frame_num in possible_frame_nums: # loop from start to last possible frame which can make a legit sequence 121 | frame_num = int(frame_num) 122 | check_tubes = tube_labels[frame_num,:] 123 | 124 | if np.sum(check_tubes)>0: # check if there aren't any semi overlapping tubes 125 | all_boxes = [] 126 | labels = [] 127 | image_name = imagesDir + videoname+'/{:05d}.jpg'.format(frame_num+1) 128 | #label_name = rootpath + 'labels/' + videoname + '/{:05d}.txt'.format(frame_num + 1) 129 | # assert os.path.isfile(image_name), 'Image does not exist'+image_name 130 | for tubeid, tube in enumerate(annotations): 131 | label = tube['label'] 132 | if tube_labels[frame_num, tubeid]>0: 133 | box = np.asarray(tube_boxes[frame_num][tubeid]) 134 | all_boxes.append(box) 135 | labels.append(label) 136 | 137 | if istrain: # if it is training video 138 | trainlist.append([vid, frame_num+1, np.asarray(labels), np.asarray(all_boxes)]) 139 | train_action_counts[actidx] += 1 #len(labels) 140 | else: # if test video and has micro-tubes with GT 141 | testlist.append([vid, frame_num+1, np.asarray(labels), np.asarray(all_boxes)]) 142 | test_action_counts[actidx] += 1 #len(labels) 143 | elif fulltest and not istrain: # if test video with no ground truth and fulltest is trues 144 | testlist.append([vid, frame_num+1, np.asarray([9999]), np.zeros((1,4))]) 145 | 146 | for actidx, act_count in enumerate(train_action_counts): # just to see the distribution of train and test sets 147 | print('train {:05d} test {:05d} action {:02d} {:s}'.format(act_count, test_action_counts[actidx] , int(actidx), CLASSES[actidx])) 148 | 149 | newratios = train_action_counts/5000 150 | #print('new ratios', newratios) 151 | line = '[' 152 | for r in newratios: 153 | line +='{:0.2f}, '.format(r) 154 | print(line+']') 155 | print('Trainlistlen', len(trainlist), ' testlist ', len(testlist)) 156 | 157 | return trainlist, testlist, video_list 158 | 159 | 160 | class UCF24Detection(data.Dataset): 161 | """UCF24 Action Detection Dataset 162 | to access input images and target which is annotation 163 | """ 164 | 165 | def __init__(self, root, image_set, transform=None, target_transform=None, 166 | dataset_name='ucf24', input_type='rgb', full_test=False): 167 | 168 | self.input_type = input_type 169 | input_type = input_type+'-images' 170 | self.root = root 171 | self.CLASSES = CLASSES 172 | self.image_set = image_set 173 | self.transform = transform 174 | self.target_transform = target_transform 175 | self.name = dataset_name 176 | self._annopath = os.path.join(root, 'labels/', '%s.txt') 177 | self._imgpath = os.path.join(root, input_type) 178 | self.ids = list() 179 | 180 | trainlist, testlist, video_list = make_lists(root, input_type, split=1, fulltest=full_test) 181 | self.video_list = video_list 182 | if self.image_set == 'train': 183 | self.ids = trainlist 184 | elif self.image_set == 'test': 185 | self.ids = testlist 186 | else: 187 | print('spacify correct subset ') 188 | 189 | def __getitem__(self, index): 190 | im, gt, img_index = self.pull_item(index) 191 | 192 | return im, gt, img_index 193 | 194 | def __len__(self): 195 | return len(self.ids) 196 | 197 | def pull_item(self, index): 198 | annot_info = self.ids[index] 199 | frame_num = annot_info[1] 200 | video_id = annot_info[0] 201 | videoname = self.video_list[video_id] 202 | img_name = self._imgpath + '/{:s}/{:05d}.jpg'.format(videoname, frame_num) 203 | # print(img_name) 204 | img = cv2.imread(img_name) 205 | height, width, channels = img.shape 206 | 207 | target = self.target_transform(annot_info[3], annot_info[2], width, height) 208 | 209 | if self.transform is not None: 210 | target = np.array(target) 211 | img, boxes, labels = self.transform(img, target[:, :4], target[:, 4]) 212 | img = img[:, :, (2, 1, 0)] 213 | # img = img.transpose(2, 0, 1) 214 | target = np.hstack((boxes, np.expand_dims(labels, axis=1))) 215 | # print(height, width,target) 216 | return torch.from_numpy(img).permute(2, 0, 1), target, index 217 | # return torch.from_numpy(img), target, height, width 218 | 219 | 220 | def detection_collate(batch): 221 | """Custom collate fn for dealing with batches of images that have a different 222 | number of associated object annotations (bounding boxes). 223 | Arguments: 224 | batch: (tuple) A tuple of tensor images and lists of annotations 225 | Return: 226 | A tuple containing: 227 | 1) (tensor) batch of images stacked on their 0 dim 228 | 2) (list of tensors) annotations for a given image are stacked on 0 dim 229 | """ 230 | 231 | targets = [] 232 | imgs = [] 233 | image_ids = [] 234 | for sample in batch: 235 | imgs.append(sample[0]) 236 | targets.append(torch.FloatTensor(sample[1])) 237 | image_ids.append(sample[2]) 238 | return torch.stack(imgs, 0), targets, image_ids 239 | -------------------------------------------------------------------------------- /layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | from .modules import * 3 | -------------------------------------------------------------------------------- /layers/box_utils.py: -------------------------------------------------------------------------------- 1 | """ Bounding box utilities 2 | 3 | Original author: Ellis Brown, Max deGroot for VOC dataset 4 | https://github.com/amdegroot/ssd.pytorch 5 | 6 | """ 7 | 8 | import torch 9 | 10 | def point_form(boxes): 11 | """ Convert prior_boxes to (xmin, ymin, xmax, ymax) 12 | representation for comparison to point form ground truth data. 13 | Args: 14 | boxes: (tensor) center-size default boxes from priorbox layers. 15 | Return: 16 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. 17 | """ 18 | return torch.cat((boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin 19 | boxes[:, :2] + boxes[:, 2:]/2), 1) # xmax, ymax 20 | 21 | 22 | def center_size(boxes): 23 | """ Convert prior_boxes to (cx, cy, w, h) 24 | representation for comparison to center-size form ground truth data. 25 | Args: 26 | boxes: (tensor) point_form boxes 27 | Return: 28 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. 29 | """ 30 | return torch.cat((boxes[:, 2:] + boxes[:, :2])/2, # cx, cy 31 | boxes[:, 2:] - boxes[:, :2], 1) # w, h 32 | 33 | 34 | def intersect(box_a, box_b): 35 | """ We resize both tensors to [A,B,2] without new malloc: 36 | [A,2] -> [A,1,2] -> [A,B,2] 37 | [B,2] -> [1,B,2] -> [A,B,2] 38 | Then we compute the area of intersect between box_a and box_b. 39 | Args: 40 | box_a: (tensor) bounding boxes, Shape: [A,4]. 41 | box_b: (tensor) bounding boxes, Shape: [B,4]. 42 | Return: 43 | (tensor) intersection area, Shape: [A,B]. 44 | """ 45 | A = box_a.size(0) 46 | B = box_b.size(0) 47 | max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), 48 | box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) 49 | min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), 50 | box_b[:, :2].unsqueeze(0).expand(A, B, 2)) 51 | inter = torch.clamp((max_xy - min_xy), min=0) 52 | return inter[:, :, 0] * inter[:, :, 1] 53 | 54 | 55 | def jaccard(box_a, box_b): 56 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap 57 | is simply the intersection over union of two boxes. Here we operate on 58 | ground truth boxes and default boxes. 59 | E.g.: 60 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) 61 | Args: 62 | box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] 63 | box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] 64 | Return: 65 | jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] 66 | """ 67 | inter = intersect(box_a, box_b) 68 | area_a = ((box_a[:, 2]-box_a[:, 0]) * 69 | (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] 70 | area_b = ((box_b[:, 2]-box_b[:, 0]) * 71 | (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] 72 | union = area_a + area_b - inter 73 | return inter / union # [A,B] 74 | 75 | 76 | def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx): 77 | """Match each prior box with the ground truth box of the highest jaccard 78 | overlap, encode the bounding boxes, then return the matched indices 79 | corresponding to both confidence and location preds. 80 | Args: 81 | threshold: (float) The overlap threshold used when mathing boxes. 82 | truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. 83 | priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. 84 | variances: (tensor) Variances corresponding to each prior coord, 85 | Shape: [num_priors, 4]. 86 | labels: (tensor) All the class labels for the image, Shape: [num_obj]. 87 | loc_t: (tensor) Tensor to be filled w/ endcoded location targets. 88 | conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. 89 | idx: (int) current batch index 90 | Return: 91 | The matched indices corresponding to 1)location and 2)confidence preds. 92 | """ 93 | # jaccard index 94 | overlaps = jaccard( 95 | truths, 96 | point_form(priors) 97 | ) 98 | # (Bipartite Matching) 99 | # [1,num_objects] best prior for each ground truth 100 | best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True) 101 | # [1,num_priors] best ground truth for each prior 102 | best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True) 103 | best_truth_idx.squeeze_(0) 104 | best_truth_overlap.squeeze_(0) 105 | best_prior_idx.squeeze_(1) 106 | best_prior_overlap.squeeze_(1) 107 | best_truth_overlap.index_fill_(0, best_prior_idx, 2) # ensure best prior 108 | # TODO refactor: index best_prior_idx with long tensor 109 | # ensure every gt matches with its prior of max overlap 110 | for j in range(best_prior_idx.size(0)): 111 | best_truth_idx[best_prior_idx[j]] = j 112 | matches = truths[best_truth_idx] # Shape: [num_priors,4] 113 | conf = labels[best_truth_idx] + 1 # Shape: [num_priors] 114 | conf[best_truth_overlap < threshold] = 0 # label as background 115 | loc = encode(matches, priors, variances) 116 | loc_t[idx] = loc # [num_priors,4] encoded offsets to learn 117 | conf_t[idx] = conf # [num_priors] top class label for each prior 118 | 119 | 120 | def encode(matched, priors, variances): 121 | """Encode the variances from the priorbox layers into the ground truth boxes 122 | we have matched (based on jaccard overlap) with the prior boxes. 123 | Args: 124 | matched: (tensor) Coords of ground truth for each prior in point-form 125 | Shape: [num_priors, 4]. 126 | priors: (tensor) Prior boxes in center-offset form 127 | Shape: [num_priors,4]. 128 | variances: (list[float]) Variances of priorboxes 129 | Return: 130 | encoded boxes (tensor), Shape: [num_priors, 4] 131 | """ 132 | 133 | # dist b/t match center and prior's center 134 | g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2] 135 | # encode variance 136 | g_cxcy /= (variances[0] * priors[:, 2:]) 137 | # match wh / prior wh 138 | g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:] 139 | g_wh = torch.log(g_wh) / variances[1] 140 | # return target for smooth_l1_loss 141 | return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4] 142 | 143 | 144 | # Adapted from https://github.com/Hakuyume/chainer-ssd 145 | def decode(loc, priors, variances): 146 | """Decode locations from predictions using priors to undo 147 | the encoding we did for offset regression at train time. 148 | Args: 149 | loc (tensor): location predictions for loc layers, 150 | Shape: [num_priors,4] 151 | priors (tensor): Prior boxes in center-offset form. 152 | Shape: [num_priors,4]. 153 | variances: (list[float]) Variances of priorboxes 154 | Return: 155 | decoded bounding box predictions 156 | """ 157 | 158 | boxes = torch.cat(( 159 | priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], 160 | priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1) 161 | boxes[:, :2] -= boxes[:, 2:] / 2 162 | boxes[:, 2:] += boxes[:, :2] 163 | return boxes 164 | 165 | 166 | def log_sum_exp(x): 167 | """Utility function for computing log_sum_exp while determining 168 | This will be used to determine unaveraged confidence loss across 169 | all examples in a batch. 170 | Args: 171 | x (Variable(tensor)): conf_preds from conf layers 172 | """ 173 | x_max = x.data.max() 174 | return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max 175 | 176 | 177 | # Original author: Francisco Massa: 178 | # https://github.com/fmassa/object-detection.torch 179 | # Ported to PyTorch by Max deGroot (02/01/2017) 180 | def nms(boxes, scores, overlap=0.5, top_k=200): 181 | """Apply non-maximum suppression at test time to avoid detecting too many 182 | overlapping bounding boxes for a given object. 183 | Args: 184 | boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. 185 | scores: (tensor) The class predscores for the img, Shape:[num_priors]. 186 | overlap: (float) The overlap thresh for suppressing unnecessary boxes. 187 | top_k: (int) The Maximum number of box preds to consider. 188 | Return: 189 | The indices of the kept boxes with respect to num_priors. 190 | """ 191 | 192 | keep = scores.new(scores.size(0)).zero_().long() 193 | if boxes.numel() == 0: 194 | return keep 195 | x1 = boxes[:, 0] 196 | y1 = boxes[:, 1] 197 | x2 = boxes[:, 2] 198 | y2 = boxes[:, 3] 199 | area = torch.mul(x2 - x1, y2 - y1) 200 | v, idx = scores.sort(0) # sort in ascending order 201 | # I = I[v >= 0.01] 202 | idx = idx[-top_k:] # indices of the top-k largest vals 203 | xx1 = boxes.new() 204 | yy1 = boxes.new() 205 | xx2 = boxes.new() 206 | yy2 = boxes.new() 207 | w = boxes.new() 208 | h = boxes.new() 209 | 210 | # keep = torch.Tensor() 211 | count = 0 212 | while idx.numel() > 0: 213 | i = idx[-1] # index of current largest val 214 | # keep.append(i) 215 | keep[count] = i 216 | count += 1 217 | if idx.size(0) == 1: 218 | break 219 | idx = idx[:-1] # remove kept element from view 220 | # load bboxes of next highest vals 221 | torch.index_select(x1, 0, idx, out=xx1) 222 | torch.index_select(y1, 0, idx, out=yy1) 223 | torch.index_select(x2, 0, idx, out=xx2) 224 | torch.index_select(y2, 0, idx, out=yy2) 225 | # store element-wise max with next highest score 226 | xx1 = torch.clamp(xx1, min=x1[i]) 227 | yy1 = torch.clamp(yy1, min=y1[i]) 228 | xx2 = torch.clamp(xx2, max=x2[i]) 229 | yy2 = torch.clamp(yy2, max=y2[i]) 230 | w.resize_as_(xx2) 231 | h.resize_as_(yy2) 232 | w = xx2 - xx1 233 | h = yy2 - yy1 234 | # check sizes of xx1 and xx2.. after each iteration 235 | w = torch.clamp(w, min=0.0) 236 | h = torch.clamp(h, min=0.0) 237 | inter = w*h 238 | # IoU = i / (area(a) + area(b) - i) 239 | rem_areas = torch.index_select(area, 0, idx) # load remaining areas) 240 | union = (rem_areas - inter) + area[i] 241 | IoU = inter/union # store result in iou 242 | # keep only elements with an IoU <= overlap 243 | idx = idx[IoU.le(overlap)] 244 | return keep, count 245 | -------------------------------------------------------------------------------- /layers/functions/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .prior_box import PriorBox 3 | 4 | 5 | __all__ = ['PriorBox'] 6 | -------------------------------------------------------------------------------- /layers/functions/prior_box.py: -------------------------------------------------------------------------------- 1 | """ Generates prior boxes for SSD netowrk 2 | 3 | Original author: Ellis Brown, Max deGroot for VOC dataset 4 | https://github.com/amdegroot/ssd.pytorch 5 | 6 | """ 7 | 8 | import torch 9 | from math import sqrt as sqrt 10 | from itertools import product as product 11 | 12 | class PriorBox(object): 13 | """Compute priorbox coordinates in center-offset form for each source 14 | feature map. 15 | Note: 16 | This 'layer' has changed between versions of the original SSD 17 | paper, so we include both versions, but note v2 is the most tested and most 18 | recent version of the paper. 19 | 20 | """ 21 | def __init__(self, cfg): 22 | super(PriorBox, self).__init__() 23 | # self.type = cfg.name 24 | self.image_size = cfg['min_dim'] 25 | # number of priors for feature map location (either 4 or 6) 26 | self.num_priors = len(cfg['aspect_ratios']) 27 | self.variance = cfg['variance'] or [0.1] 28 | self.feature_maps = cfg['feature_maps'] 29 | self.min_sizes = cfg['min_sizes'] 30 | self.max_sizes = cfg['max_sizes'] 31 | self.steps = cfg['steps'] 32 | self.aspect_ratios = cfg['aspect_ratios'] 33 | self.clip = cfg['clip'] 34 | self.version = cfg['name'] 35 | for v in self.variance: 36 | if v <= 0: 37 | raise ValueError('Variances must be greater than 0') 38 | 39 | def forward(self): 40 | mean = [] 41 | # TODO merge these 42 | if self.version == 'v2': 43 | for k, f in enumerate(self.feature_maps): 44 | for i, j in product(range(f), repeat=2): 45 | f_k = self.image_size / self.steps[k] 46 | # unit center x,y 47 | cx = (j + 0.5) / f_k 48 | cy = (i + 0.5) / f_k 49 | 50 | # aspect_ratio: 1 51 | # rel size: min_size 52 | s_k = self.min_sizes[k]/self.image_size 53 | mean += [cx, cy, s_k, s_k] 54 | 55 | # aspect_ratio: 1 56 | # rel size: sqrt(s_k * s_(k+1)) 57 | s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size)) 58 | mean += [cx, cy, s_k_prime, s_k_prime] 59 | 60 | # rest of aspect ratios 61 | for ar in self.aspect_ratios[k]: 62 | mean += [cx, cy, s_k*sqrt(ar), s_k/sqrt(ar)] 63 | mean += [cx, cy, s_k/sqrt(ar), s_k*sqrt(ar)] 64 | 65 | else: 66 | # original version generation of prior (default) boxes 67 | for i, k in enumerate(self.feature_maps): 68 | step_x = step_y = self.image_size/k 69 | for h, w in product(range(k), repeat=2): 70 | c_x = ((w+0.5) * step_x) 71 | c_y = ((h+0.5) * step_y) 72 | c_w = c_h = self.min_sizes[i] / 2 73 | s_k = self.image_size # 300 74 | # aspect_ratio: 1, 75 | # size: min_size 76 | mean += [(c_x-c_w)/s_k, (c_y-c_h)/s_k, 77 | (c_x+c_w)/s_k, (c_y+c_h)/s_k] 78 | if self.max_sizes[i] > 0: 79 | # aspect_ratio: 1 80 | # size: sqrt(min_size * max_size)/2 81 | c_w = c_h = sqrt(self.min_sizes[i] * 82 | self.max_sizes[i])/2 83 | mean += [(c_x-c_w)/s_k, (c_y-c_h)/s_k, 84 | (c_x+c_w)/s_k, (c_y+c_h)/s_k] 85 | # rest of prior boxes 86 | for ar in self.aspect_ratios[i]: 87 | if not (abs(ar-1) < 1e-6): 88 | c_w = self.min_sizes[i] * sqrt(ar)/2 89 | c_h = self.min_sizes[i] / sqrt(ar)/2 90 | mean += [(c_x-c_w)/s_k, (c_y-c_h)/s_k, 91 | (c_x+c_w)/s_k, (c_y+c_h)/s_k] 92 | # back to torch land 93 | output = torch.Tensor(mean).view(-1, 4) 94 | if self.clip: 95 | output.clamp_(max=1, min=0) 96 | return output 97 | -------------------------------------------------------------------------------- /layers/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .l2norm import L2Norm 2 | from .multibox_loss import MultiBoxLoss 3 | 4 | __all__ = ['L2Norm', 'MultiBoxLoss'] 5 | -------------------------------------------------------------------------------- /layers/modules/l2norm.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn as nn 3 | import torch 4 | import torch.nn.functional as F 5 | 6 | # class L2Norm(nn.Module): 7 | # def __init__(self,n_channels, scale): 8 | # super(L2Norm,self).__init__() 9 | # self.n_channels = n_channels 10 | # self.gamma = scale or None 11 | # self.eps = 1e-10 12 | # self.weight = nn.Parameter(torch.Tensor(self.n_channels)) 13 | # self.reset_parameters() 14 | 15 | # def reset_parameters(self): 16 | # init.constant(self.weight,self.gamma) 17 | 18 | # def forward(self, x): 19 | # norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps 20 | # x /= norm 21 | # out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x 22 | # return out 23 | 24 | class L2Norm(nn.Module): 25 | def __init__(self, in_channels, initial_scale): 26 | super(L2Norm, self).__init__() 27 | self.in_channels = in_channels 28 | self.weight = nn.Parameter(torch.Tensor(in_channels)) 29 | self.initial_scale = initial_scale 30 | self.reset_parameters() 31 | 32 | def forward(self, x): 33 | return (F.normalize(x, p=2, dim=1) 34 | * self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3)) 35 | 36 | def reset_parameters(self): 37 | self.weight.data.fill_(self.initial_scale) -------------------------------------------------------------------------------- /layers/modules/multibox_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from data import v2 as cfg 5 | from ..box_utils import match, log_sum_exp 6 | 7 | class MultiBoxLoss(nn.Module): 8 | """SSD Weighted Loss Function 9 | Compute Targets: 10 | 1) Produce Confidence Target Indices by matching ground truth boxes 11 | with (default) 'priorboxes' that have jaccard index > threshold parameter 12 | (default threshold: 0.5). 13 | 2) Produce localization target by 'encoding' variance into offsets of ground 14 | truth boxes and their matched 'priorboxes'. 15 | 3) Hard negative mining to filter the excessive number of negative examples 16 | that comes with using a large number of default bounding boxes. 17 | (default negative:positive ratio 3:1) 18 | Objective Loss: 19 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 20 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss 21 | weighted by α which is set to 1 by cross val. 22 | Args: 23 | c: class confidences, 24 | l: predicted boxes, 25 | g: ground truth boxes 26 | N: number of matched default boxes 27 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 28 | """ 29 | 30 | def __init__(self, num_classes, overlap_thresh, prior_for_matching, 31 | bkg_label, neg_mining, neg_pos, neg_overlap, encode_target, 32 | use_gpu=True): 33 | super(MultiBoxLoss, self).__init__() 34 | self.use_gpu = use_gpu 35 | self.num_classes = num_classes 36 | self.threshold = overlap_thresh 37 | self.background_label = bkg_label 38 | self.encode_target = encode_target 39 | self.use_prior_for_matching = prior_for_matching 40 | self.do_neg_mining = neg_mining 41 | self.negpos_ratio = neg_pos 42 | self.neg_overlap = neg_overlap 43 | self.variance = cfg['variance'] 44 | 45 | def forward(self, predictions, targets): 46 | """Multibox Loss 47 | Args: 48 | predictions (tuple): A tuple containing loc preds, conf preds, 49 | and prior boxes from SSD net. 50 | conf shape: torch.size(batch_size,num_priors,num_classes) 51 | loc shape: torch.size(batch_size,num_priors,4) 52 | priors shape: torch.size(num_priors,4) 53 | 54 | ground_truth (tensor): Ground truth boxes and labels for a batch, 55 | shape: [batch_size,num_objs,5] (last idx is the label). 56 | """ 57 | loc_data, conf_data, priors = predictions 58 | num = loc_data.size(0) 59 | priors = priors[:loc_data.size(1), :] 60 | num_priors = (priors.size(0)) 61 | num_classes = self.num_classes 62 | 63 | # match priors (default boxes) and ground truth boxes 64 | with torch.no_grad(): 65 | if self.use_gpu: 66 | loc_t = torch.cuda.FloatTensor(num, num_priors, 4) 67 | conf_t = torch.cuda.LongTensor(num, num_priors) 68 | else: 69 | loc_t = torch.Tensor(num, num_priors, 4) 70 | conf_t = torch.LongTensor(num, num_priors) 71 | for idx in range(num): 72 | truths = targets[idx][:, :-1].data 73 | labels = targets[idx][:, -1].data 74 | defaults = priors.data 75 | match(self.threshold, truths, defaults, self.variance, labels, 76 | loc_t, conf_t, idx) 77 | if self.use_gpu: 78 | loc_t = loc_t.cuda() 79 | conf_t = conf_t.cuda() 80 | # wrap targets 81 | # loc_t = Variable(loc_t, requires_grad=False) 82 | # conf_t = Variable(conf_t, requires_grad=False) 83 | 84 | pos = conf_t > 0 85 | #num_pos = pos.sum(keepdim=True) 86 | 87 | # Localization Loss (Smooth L1) 88 | # Shape: [batch,num_priors,4] 89 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) 90 | loc_p = loc_data[pos_idx].view(-1, 4) 91 | loc_t = loc_t[pos_idx].view(-1, 4) 92 | loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') 93 | with torch.no_grad(): 94 | # Compute max conf across batch for hard negative mining 95 | batch_conf = conf_data.view(-1, self.num_classes) 96 | 97 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) 98 | 99 | # Hard Negative Mining 100 | loss_c[pos.view(-1,1)] = 0 # filter out pos boxes for now 101 | loss_c = loss_c.view(num, -1) 102 | _, loss_idx = loss_c.sort(1, descending=True) 103 | _, idx_rank = loss_idx.sort(1) 104 | num_pos = pos.long().sum(1, keepdim=True) 105 | num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) 106 | neg = idx_rank < num_neg.expand_as(idx_rank) 107 | 108 | # Confidence Loss Including Positive and Negative Examples 109 | pos_idx = pos.unsqueeze(2).expand_as(conf_data) 110 | neg_idx = neg.unsqueeze(2).expand_as(conf_data) 111 | 112 | conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes) 113 | targets_weighted = conf_t[(pos+neg).gt(0)] 114 | loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') 115 | 116 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 117 | 118 | N = float(num_pos.data.sum()) 119 | loss_l /= N 120 | loss_c /= N 121 | return loss_l, loss_c 122 | -------------------------------------------------------------------------------- /matlab-online-display/.gitignore: -------------------------------------------------------------------------------- 1 | *.ods# 2 | *.m~ 3 | *.prototxt~ 4 | *.xml~ 5 | *.log 6 | *~ 7 | /results 8 | -------------------------------------------------------------------------------- /matlab-online-display/actionpath/actionPaths.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | function actionPaths(dopts) 3 | % --------------------------------------------------------- 4 | % Copyright (c) 2017, Gurkirt Singh 5 | % This code and is available 6 | % under the terms of MID License provided in LICENSE. 7 | % Please retain this notice and LICENSE if you use 8 | % this file (or any portion of it) in your project. 9 | % --------------------------------------------------------- 10 | 11 | detresultpath = dopts.detDir; 12 | costtype = dopts.costtype; 13 | gap = dopts.gap; 14 | videolist = dopts.vidList; 15 | actions = dopts.actions; 16 | saveName = dopts.actPathDir; 17 | iouth = dopts.iouThresh; 18 | numActions = length(actions); 19 | nms_thresh = 0.45; 20 | videos = getVideoNames(videolist); 21 | NumVideos = length(videos); 22 | 23 | for vid=1:2%NumVideos 24 | tic; 25 | videoID = videos{vid}; 26 | videoDetDir = [detresultpath,videoID,'/']; 27 | fprintf('computing tubes for vide [%d out of %d] video ID = %s\n',vid,NumVideos, videoID); 28 | %% loop over all the frames of the video 29 | fprintf('Reading detections '); 30 | frames = readDetections(videoDetDir); 31 | fprintf('\nDone reading detections\n'); 32 | fprintf('Gernrating action paths ...........\n'); 33 | %% parllel loop over all action class and genrate paths for each class 34 | threshold = [0.01,0.5,0.9]; 35 | for iiii = 2:2 36 | dis_thres = threshold(iiii); 37 | 38 | my_live_paths = cell(1); %% Stores live paths 39 | my_dead_paths = cell(1); %% Store the paths that has been terminated 40 | 41 | for i = 1:24 42 | my_live_paths{i} = struct(); 43 | my_dead_paths{i} = struct(); 44 | my_dead_paths{i}.dp_count = 0; 45 | end 46 | 47 | action_frames = struct(); 48 | for f=1:length(frames) 49 | for a=1:numActions 50 | %allpaths{a} = genActionPaths(frames, a, nms_thresh, iouth, costtype,gap,videoID, final_tubes); 51 | [boxes,scores,allscores] = dofilter(frames, a, f, nms_thresh); 52 | action_frames(f).boxes = boxes; 53 | action_frames(f).scores = scores; 54 | action_frames(f).allScores = allscores; 55 | [my_live_paths{a}, my_dead_paths{a}] = incremental_linking(f, action_frames, iouth, costtype, gap,... 56 | my_live_paths{a}, my_dead_paths{a},a); 57 | end 58 | 59 | strr = strcat('/home/zhujiagang/realtime-action-detection/ucf24/rgb-images/', videoID, '/', num2str(f, '%05d'), '.jpg'); 60 | img = imread(strr); 61 | 62 | dis_boxes = []; 63 | for a=1:numActions 64 | %size(my_live_paths{a}, 2) 65 | if size(my_live_paths{a}, 2) > 0 66 | for ii = 1:size(my_live_paths{a}, 2) 67 | if isfield(my_live_paths{a}(ii),'scores') 68 | if my_live_paths{a}(ii).foundAT(end) == f 69 | if my_live_paths{a}(ii).scores(end) > dis_thres 70 | count = my_live_paths{a}(ii).count; 71 | dis_boxes = [dis_boxes;my_live_paths{a}(ii).boxes(count,:), my_live_paths{a}(ii).scores(end), a]; 72 | pt = round(my_live_paths{a}(ii).boxes(count,1:2)); 73 | wSize = round(my_live_paths{a}(ii).boxes(count,3:4) - my_live_paths{a}(ii).boxes(count,1:2)); 74 | 75 | %% adding boxes to images 76 | img = drawRect(img, pt, wSize); 77 | end 78 | end 79 | end 80 | end 81 | end 82 | end 83 | %% display images, scores and boxes 84 | if size(dis_boxes,1)>0 85 | strcell=cell(size(dis_boxes,1),1); 86 | for iii=1:size(dis_boxes,1) 87 | strcell(iii) = {strcat(actions{dis_boxes(iii,6)}, ': ', num2str(dis_boxes(iii,5),3))}; 88 | end 89 | RGB = insertText(img, double(dis_boxes(:,1:2)), strcell); 90 | imshow(RGB) 91 | str_save_dir = strcat('/home/zhujiagang/realtime-action-detection/online_save/',videoID,'_', num2str(dis_thres)); 92 | if ~exist(str_save_dir) 93 | mkdir(str_save_dir) 94 | end 95 | str_save = strcat(str_save_dir, '/', num2str(f, '%05d'), '.jpg'); 96 | imwrite(RGB, str_save); 97 | end 98 | end 99 | 100 | fprintf('All Done in %03d Seconds\n',round(toc)); 101 | end 102 | 103 | disp('done computing action paths'); 104 | 105 | end 106 | end 107 | 108 | function paths = genActionPaths(frames,a,nms_thresh,iouth,costtype,gap, video_id, final_tubes) 109 | action_frames = struct(); 110 | 111 | for f=1:length(frames) 112 | [boxes,scores,allscores] = dofilter(frames,a,f,nms_thresh); 113 | action_frames(f).boxes = boxes; 114 | action_frames(f).scores = scores; 115 | action_frames(f).allScores = allscores; 116 | end 117 | 118 | paths = incremental_linking(action_frames,iouth,costtype, gap, gap, a, video_id, final_tubes); 119 | 120 | end 121 | 122 | %-- filter out least likkey detections for actions --- 123 | function [boxes,scores,allscores] = dofilter(frames, a, f, nms_thresh) 124 | scores = frames(f).scores(:,a); 125 | pick = scores>0.001; 126 | scores = scores(pick); 127 | boxes = frames(f).boxes(pick,:); 128 | allscores = frames(f).scores(pick,:); 129 | [~,pick] = sort(scores,'descend'); 130 | to_pick = min(50,size(pick,1)); 131 | pick = pick(1:to_pick); 132 | scores = scores(pick); 133 | boxes = boxes(pick,:); 134 | allscores = allscores(pick,:); 135 | pick = nms([boxes scores], nms_thresh); 136 | pick = pick(1:min(10,length(pick))); 137 | boxes = boxes(pick,:); 138 | scores = scores(pick); 139 | allscores = allscores(pick,:); 140 | end 141 | 142 | %-- list the files in directory and sort them ---------- 143 | function list = sortdirlist(dirname) 144 | list = dir(dirname); 145 | list = sort({list.name}); 146 | end 147 | 148 | % ------------------------------------------------------------------------- 149 | function [videos] = getVideoNames(split_file) 150 | % ------------------------------------------------------------------------- 151 | fprintf('Get both lis is %s\n',split_file); 152 | fid = fopen(split_file,'r'); 153 | data = textscan(fid, '%s'); 154 | videos = cell(1); 155 | count = 0; 156 | 157 | for i=1:length(data{1}) 158 | filename = cell2mat(data{1}(i,1)); 159 | count = count +1; 160 | videos{count} = filename; 161 | % videos(i).vid = str2num(cell2mat(data{1}(i,1))); 162 | end 163 | end 164 | 165 | 166 | function frames = readDetections(detectionDir) 167 | 168 | detectionList = sortdirlist([detectionDir,'*.mat']); 169 | frames = struct([]); 170 | numframes = length(detectionList); 171 | scores = 0; 172 | loc = 0; 173 | for f = 1 : numframes 174 | filename = [detectionDir,detectionList{f}]; 175 | load(filename); % loads loc and scores variable 176 | loc = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240]; 177 | loc(loc(:,1)<0,1) = 0; 178 | loc(loc(:,2)<0,2) = 0; 179 | loc(loc(:,3)>319,3) = 319; 180 | loc(loc(:,4)>239,4) = 239; 181 | loc = loc + 1; 182 | frames(f).boxes = loc; 183 | frames(f).scores = [scores(:,2:end),scores(:,1)]; 184 | end 185 | 186 | end 187 | 188 | function [ dest ] = drawRect( src, pt, wSize, lineSize, color ) 189 | flag = 2; 190 | 191 | if nargin < 5 192 | color = [255 255 0]; 193 | end 194 | 195 | if nargin < 4 196 | lineSize = 1; 197 | end 198 | 199 | if nargin < 3 200 | disp('inenough parameters') 201 | return; 202 | end 203 | 204 | [yA, xA, z] = size(src); 205 | x1 = pt(1); 206 | y1 = pt(2); 207 | 208 | wx = wSize(1); 209 | wy = wSize(2); 210 | 211 | if x1>xA 212 | x1 = xA; 213 | end 214 | if x1<1 215 | x1 = 1; 216 | end 217 | 218 | if y1>yA 219 | y1 = yA; 220 | end 221 | if y1<1 222 | y1 = 1; 223 | end 224 | 225 | if (x1+wx)>xA 226 | wx = xA - x1; 227 | end 228 | if (y1+wy)>yA 229 | wy = yA - y1; 230 | end 231 | 232 | if (x1+wx)<1 233 | wx = 1; 234 | end 235 | if (y1+wy)<1 236 | wy = 1; 237 | end 238 | 239 | if 1==z 240 | dest(:, : ,1) = src; 241 | dest(:, : ,2) = src; 242 | dest(:, : ,3) = src; 243 | else 244 | dest = src; 245 | end 246 | 247 | 248 | for c = 1 : 3 249 | for dl = 1 : lineSize 250 | d = dl - 1; 251 | if 1==flag 252 | dest( y1-d , x1:(x1+wx) , c ) = color(c); 253 | dest( y1+wy+d , x1:(x1+wx) , c ) = color(c); 254 | dest( y1:(y1+wy) , x1-d , c ) = color(c); 255 | dest( y1:(y1+wy) , x1+wx+d , c ) = color(c); 256 | elseif 2==flag 257 | dest( y1-d , (x1-d):(x1+wx+d) , c ) = color(c); 258 | dest( y1+wy+d , (x1-d):(x1+wx+d) , c ) = color(c); 259 | dest( (y1-d):(y1+wy+d) , x1-d , c ) = color(c); 260 | dest( (y1-d):(y1+wy+d) , x1+wx+d , c ) = color(c); 261 | end 262 | end 263 | end 264 | 265 | end -------------------------------------------------------------------------------- /matlab-online-display/actionpath/fusedActionPaths.m: -------------------------------------------------------------------------------- 1 | function fusedActionPaths(dopts) 2 | % AUTORIGHTS 3 | % --------------------------------------------------------- 4 | % Copyright (c) 2016, Gurkirt Singh 5 | % 6 | % This code and is available 7 | % under the terms of the Simplified BSD License provided in 8 | % LICENSE. Please retain this notice and LICENSE if you use 9 | % this file (or any portion of it) in your project. 10 | % --------------------------------------------------------- 11 | 12 | detresultpathBase = dopts.basedetDir; 13 | detresultpathTop = dopts.topdetDir; 14 | videolist = dopts.vidList; 15 | actions = dopts.actions; 16 | saveName = dopts.actPathDir; 17 | iouth = dopts.iouThresh; 18 | numActions = length(actions); 19 | costtype = dopts.costtype; 20 | gap = dopts.gap; 21 | fuseiouth = dopts.fuseiouth; 22 | fusiontype = dopts.fusiontype; 23 | nms_thresh = 0.45; 24 | videos = getVideoNames(videolist); 25 | 26 | NumVideos = length(videos); 27 | timimngs = zeros(NumVideos,1); 28 | 29 | for vid=1:NumVideos 30 | tt = tic; 31 | videoID = videos{vid}; 32 | pathsSaveName = [saveName,videoID,'-actionpaths.mat']; 33 | videoDetDirBase = [detresultpathBase,videoID,'/']; 34 | videoTopDirBase = [detresultpathTop,videoID,'/']; 35 | frames = readDetections(videoDetDirBase,videoTopDirBase); 36 | fprintf('\nDone reading detection files \n'); 37 | fprintf('Gernrating action paths ...........\n'); 38 | %% parllel loop over all action class and genrate paths for each class 39 | threshold = [0.01,0.5,0.9]; 40 | for iiii = 2:2 41 | dis_thres = threshold(iiii); 42 | 43 | my_live_paths = cell(1); %% Stores live paths 44 | my_dead_paths = cell(1); %% Store the paths that has been terminated 45 | 46 | for i = 1:24 47 | my_live_paths{i} = struct(); 48 | my_dead_paths{i} = struct(); 49 | my_dead_paths{i}.dp_count = 0; 50 | end 51 | 52 | action_frames = struct(); 53 | for f=1:length(frames) 54 | for a=1:numActions 55 | baseBoxes = frames(f).baseBoxes; 56 | baseAllScores = frames(f).baseScores; 57 | topBoxes = frames(f).topBoxes; 58 | topAllScores = frames(f).topScores; 59 | meanScores = frames(f).meanScores; 60 | [boxes, allscores] = fuseboxes(baseBoxes,topBoxes,baseAllScores,topAllScores,meanScores,fuseiouth,fusiontype,a,nms_thresh); 61 | 62 | action_frames(f).allScores = allscores; 63 | action_frames(f).boxes = boxes(:,1:4); 64 | action_frames(f).scores = boxes(:,5); 65 | 66 | [my_live_paths{a}, my_dead_paths{a}] = incremental_linking(f, action_frames, iouth, costtype, gap,... 67 | my_live_paths{a}, my_dead_paths{a},a); 68 | end 69 | 70 | strr = strcat('/home/zhujiagang/realtime-action-detection/ucf24/rgb-images/', videoID, '/', num2str(f, '%05d'), '.jpg'); 71 | img = imread(strr); 72 | 73 | dis_boxes = []; 74 | for a=1:numActions 75 | %size(my_live_paths{a}, 2) 76 | if size(my_live_paths{a}, 2) > 0 77 | for ii = 1:size(my_live_paths{a}, 2) 78 | if isfield(my_live_paths{a}(ii),'scores') 79 | if my_live_paths{a}(ii).foundAT(end) == f 80 | if my_live_paths{a}(ii).scores(end) > dis_thres 81 | count = my_live_paths{a}(ii).count; 82 | dis_boxes = [dis_boxes;my_live_paths{a}(ii).boxes(count,:), my_live_paths{a}(ii).scores(end), a]; 83 | pt = round(my_live_paths{a}(ii).boxes(count,1:2)); 84 | wSize = round(my_live_paths{a}(ii).boxes(count,3:4) - my_live_paths{a}(ii).boxes(count,1:2)); 85 | 86 | %% adding boxes to images 87 | img = drawRect(img, pt, wSize); 88 | end 89 | end 90 | end 91 | end 92 | end 93 | end 94 | %% display images, scores and boxes 95 | if size(dis_boxes,1)>0 96 | strcell=cell(size(dis_boxes,1),1); 97 | for iii=1:size(dis_boxes,1) 98 | strcell(iii) = {strcat(actions{dis_boxes(iii,6)}, ': ', num2str(dis_boxes(iii,5),3))}; 99 | end 100 | RGB = insertText(img, double(dis_boxes(:,1:2)), strcell); 101 | imshow(RGB) 102 | str_save_dir = strcat('/home/zhujiagang/realtime-action-detection/online_save/',videoID,'_', num2str(dis_thres)); 103 | if ~exist(str_save_dir) 104 | mkdir(str_save_dir) 105 | end 106 | str_save = strcat(str_save_dir, '/', num2str(f, '%05d'), '.jpg'); 107 | imwrite(RGB, str_save); 108 | end 109 | end 110 | 111 | fprintf('All Done in %03d Seconds\n',round(toc)); 112 | end 113 | 114 | disp('done computing action paths'); 115 | %%%%%%%%%%%%%% 116 | end 117 | 118 | % save('ucf101timing.mat','numfs','timimngs') 119 | disp('done computing action paths'); 120 | end 121 | 122 | % --------------------------------------------------------- 123 | % function to gather the detection box and nms them and pass it to linking script 124 | function paths = genActionPaths(frames,a,nms_thresh,fuseiouth,fusiontype,iouth,costtype,gap) 125 | % --------------------------------------------------------- 126 | action_frames = struct(); 127 | for f=1:length(frames) 128 | 129 | baseBoxes = frames(f).baseBoxes; 130 | baseAllScores = frames(f).baseScores; 131 | topBoxes = frames(f).topBoxes; 132 | topAllScores = frames(f).topScores; 133 | meanScores = frames(f).meanScores; 134 | [boxes, allscores] = fuseboxes(baseBoxes,topBoxes,baseAllScores,topAllScores,meanScores,fuseiouth,fusiontype,a,nms_thresh); 135 | 136 | action_frames(f).allScores = allscores; 137 | action_frames(f).boxes = boxes(:,1:4); 138 | action_frames(f).scores = boxes(:,5); 139 | end 140 | 141 | paths = incremental_linking(action_frames,iouth,costtype,gap, gap); 142 | end 143 | 144 | % --------------------------------------------------------- 145 | function [boxes,allscores] = fuseboxes(baseBoxes,topBoxes,baseAllScores,topAllScores,meanScores,fuseiouth,fusiontype,a,nms_thresh) 146 | % --------------------------------------------------------- 147 | 148 | if strcmp(fusiontype,'mean') 149 | [boxes,allscores] = dofilter(baseBoxes,meanScores,a,nms_thresh); 150 | elseif strcmp(fusiontype,'nwsum-plus') 151 | [baseBoxes,baseAllScores] = dofilter(baseBoxes,baseAllScores,a,nms_thresh); 152 | [topBoxes,topAllScores] = dofilter(topBoxes,topAllScores,a,nms_thresh); 153 | [boxes,allscores] = boost_fusion(baseBoxes,topBoxes,baseAllScores,topAllScores,fuseiouth,a); 154 | pick = nms(boxes,nms_thresh); 155 | boxes = boxes(pick(1:min(10,length(pick))),:); 156 | allscores = allscores(pick(1:min(10,length(pick))),:); 157 | 158 | else %% fusion type is cat // union-set fusion 159 | [baseBoxes,baseAllScores] = dofilter(baseBoxes,baseAllScores,a,nms_thresh); 160 | [topBoxes,topAllScores] = dofilter(topBoxes,topAllScores,a,nms_thresh); 161 | boxes = [baseBoxes;topBoxes]; 162 | allscores = [baseAllScores;topAllScores]; 163 | pick = nms(boxes,nms_thresh); 164 | boxes = boxes(pick(1:min(10,length(pick))),:); 165 | allscores = allscores(pick(1:min(10,length(pick))),:); 166 | end 167 | 168 | end 169 | 170 | 171 | function [boxes,allscores] = dofilter(boxes, allscores,a,nms_thresh) 172 | scores = allscores(:,a); 173 | pick = scores>0.001; 174 | scores = scores(pick); 175 | boxes = boxes(pick,:); 176 | allscores = allscores(pick,:); 177 | [~,pick] = sort(scores,'descend'); 178 | to_pick = min(50,size(pick,1)); 179 | pick = pick(1:to_pick); 180 | scores = scores(pick); 181 | boxes = boxes(pick,:); 182 | allscores = allscores(pick,:); 183 | pick = nms([boxes scores], nms_thresh); 184 | pick = pick(1:min(10,length(pick))); 185 | boxes = [boxes(pick,:),scores(pick,:)]; 186 | allscores = allscores(pick,:); 187 | end 188 | 189 | % --------------------------------------------------------- 190 | function [sb,ss] = boost_fusion(sb, fb,ss,fs,fuseiouth,a) % bs - boxes_spatial bf-boxes_flow 191 | % --------------------------------------------------------- 192 | 193 | nb = size(sb,1); % num boxes 194 | box_spatial = [sb(:,1:2) sb(:,3:4)-sb(:,1:2)+1]; 195 | box_flow = [fb(:,1:2) fb(:,3:4)-fb(:,1:2)+1]; 196 | coveredboxes = []; 197 | 198 | for i=1:nb 199 | ovlp = inters_union(box_spatial(i,:), box_flow); % ovlp has 1x5 or 5x1 dim 200 | if ~isempty(ovlp) 201 | [movlp, maxind] = max(ovlp); 202 | 203 | if movlp>=fuseiouth && isempty(ismember(coveredboxes,maxind)) 204 | ms = ss(i,:) + fs(maxind,:)*movlp; 205 | ms = ms/sum(ms); 206 | sb(i,5) = ms(a); 207 | ss(i,:) = ms; 208 | coveredboxes = [coveredboxes;maxind]; 209 | end 210 | end 211 | end 212 | 213 | nb = size(fb,1); 214 | 215 | for i=1:nb 216 | if ~ismember(coveredboxes,i) 217 | sb = [sb;fb(i,:)]; 218 | ss = [ss;fs(i,:)]; 219 | end 220 | end 221 | end 222 | 223 | 224 | function iou = inters_union(bounds1,bounds2) 225 | % ------------------------------------------------------------------------ 226 | inters = rectint(bounds1,bounds2); 227 | ar1 = bounds1(:,3).*bounds1(:,4); 228 | ar2 = bounds2(:,3).*bounds2(:,4); 229 | union = bsxfun(@plus,ar1,ar2')-inters; 230 | iou = inters./(union+0.001); 231 | end 232 | 233 | % ------------------------------------------------------------------------- 234 | function list = sortdirlist(dirname) 235 | list = dir(dirname); 236 | list = sort({list.name}); 237 | end 238 | 239 | % ------------------------------------------------------------------------- 240 | function [videos] = getVideoNames(split_file) 241 | % ------------------------------------------------------------------------- 242 | fprintf('Get both lis %s\n',split_file); 243 | fid = fopen(split_file,'r'); 244 | data = textscan(fid, '%s'); 245 | videos = cell(1); 246 | count = 0; 247 | 248 | for i=1:length(data{1}) 249 | filename = cell2mat(data{1}(i,1)); 250 | count = count +1; 251 | videos{count} = filename; 252 | % videos(i).vid = str2num(cell2mat(data{1}(i,1))); 253 | end 254 | 255 | end 256 | 257 | function frames = readDetections(detectionDir,top_detectionDir ) 258 | 259 | detectionList = sortdirlist([detectionDir,'*.mat']); 260 | frames = struct([]); 261 | numframes = length(detectionList); 262 | scores = 0; 263 | loc = 0; 264 | for f = 1 : numframes 265 | filename = [detectionDir,detectionList{f}]; 266 | load(filename); % load loc and scores variable 267 | loc = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240]; 268 | loc(loc(:,1)<0,1) = 0; 269 | loc(loc(:,2)<0,2) = 0; 270 | loc(loc(:,3)>319,3) = 319; 271 | loc(loc(:,4)>239,4) = 239; 272 | loc = loc + 1; 273 | frames(f).baseBoxes = loc; 274 | frames(f).baseScores = [scores(:,2:end),scores(:,1)]; 275 | 276 | filename = [top_detectionDir,detectionList{f}]; 277 | load(filename); % load loc and scores variable 278 | loc = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240]; 279 | loc(loc(:,1)<0,1) = 0; 280 | loc(loc(:,2)<0,2) = 0; 281 | loc(loc(:,3)>319,3) = 319; 282 | loc(loc(:,4)>239,4) = 239; 283 | loc = loc + 1; 284 | frames(f).topBoxes = loc; 285 | frames(f).topScores = [scores(:,2:end),scores(:,1)]; 286 | frames(f).meanScores = (frames(f).topScores + frames(f).baseScores)/2.0; 287 | end 288 | 289 | end 290 | 291 | 292 | function [ dest ] = drawRect( src, pt, wSize, lineSize, color ) 293 | flag = 2; 294 | 295 | if nargin < 5 296 | color = [255 255 0]; 297 | end 298 | 299 | if nargin < 4 300 | lineSize = 1; 301 | end 302 | 303 | if nargin < 3 304 | disp('inenough parameters') 305 | return; 306 | end 307 | 308 | [yA, xA, z] = size(src); 309 | x1 = pt(1); 310 | y1 = pt(2); 311 | 312 | wx = wSize(1); 313 | wy = wSize(2); 314 | 315 | if x1>xA 316 | x1 = xA; 317 | end 318 | if x1<1 319 | x1 = 1; 320 | end 321 | 322 | if y1>yA 323 | y1 = yA; 324 | end 325 | if y1<1 326 | y1 = 1; 327 | end 328 | 329 | if (x1+wx)>xA 330 | wx = xA - x1; 331 | end 332 | if (y1+wy)>yA 333 | wy = yA - y1; 334 | end 335 | 336 | if (x1+wx)<1 337 | wx = 1; 338 | end 339 | if (y1+wy)<1 340 | wy = 1; 341 | end 342 | 343 | if 1==z 344 | dest(:, : ,1) = src; 345 | dest(:, : ,2) = src; 346 | dest(:, : ,3) = src; 347 | else 348 | dest = src; 349 | end 350 | 351 | 352 | for c = 1 : 3 353 | for dl = 1 : lineSize 354 | d = dl - 1; 355 | if 1==flag 356 | dest( y1-d , x1:(x1+wx) , c ) = color(c); 357 | dest( y1+wy+d , x1:(x1+wx) , c ) = color(c); 358 | dest( y1:(y1+wy) , x1-d , c ) = color(c); 359 | dest( y1:(y1+wy) , x1+wx+d , c ) = color(c); 360 | elseif 2==flag 361 | dest( y1-d , (x1-d):(x1+wx+d) , c ) = color(c); 362 | dest( y1+wy+d , (x1-d):(x1+wx+d) , c ) = color(c); 363 | dest( (y1-d):(y1+wy+d) , x1-d , c ) = color(c); 364 | dest( (y1-d):(y1+wy+d) , x1+wx+d , c ) = color(c); 365 | end 366 | end 367 | end 368 | 369 | end 370 | -------------------------------------------------------------------------------- /matlab-online-display/actionpath/nms.m: -------------------------------------------------------------------------------- 1 | function pick = nms(boxes, overlap) 2 | % Non-maximum suppression. 3 | % pick = nms(boxes, overlap) 4 | % 5 | % Greedily select high-scoring detections and skip detections that are 6 | % significantly covered by a previously selected detection. 7 | % 8 | % Return value 9 | % pick Indices of locally maximal detections 10 | % 11 | % Arguments 12 | % boxes Detection bounding boxes (see pascal_test.m) 13 | % overlap Overlap threshold for suppression 14 | % For a selected box Bi, all boxes Bj that are covered by 15 | % more than overlap are suppressed. Note that 'covered' is 16 | % is |Bi \cap Bj| / |Bj|, not the PASCAL intersection over 17 | % union measure. 18 | 19 | % AUTORIGHTS 20 | % ------------------------------------------------------- 21 | % Copyright (C) 2011-2012 Ross Girshick 22 | % Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick 23 | % Copyright (C) 2007 Pedro Felzenszwalb, Deva Ramanan 24 | % 25 | % This file is part of the voc-releaseX code 26 | % (http://people.cs.uchicago.edu/~rbg/latent/) 27 | % and is available under the terms of an MIT-like license 28 | % provided in COPYING. Please retain this notice and 29 | % COPYING if you use this file (or a portion of it) in 30 | % your project. 31 | % ------------------------------------------------------- 32 | 33 | if isempty(boxes) 34 | pick = []; 35 | else 36 | x1 = boxes(:,1); 37 | y1 = boxes(:,2); 38 | x2 = boxes(:,3); 39 | y2 = boxes(:,4); 40 | s = boxes(:,end); 41 | area = (x2-x1) .* (y2-y1); 42 | %area = (x2-x1+1) .* (y2-y1+1); 43 | 44 | [vals, I] = sort(s); 45 | pick = []; 46 | while ~isempty(I) 47 | last = length(I); 48 | i = I(last); 49 | pick = [pick; i]; 50 | suppress = [last]; 51 | for pos = 1:last-1 52 | j = I(pos); 53 | xx1 = max(x1(i), x1(j)); 54 | yy1 = max(y1(i), y1(j)); 55 | xx2 = min(x2(i), x2(j)); 56 | yy2 = min(y2(i), y2(j)); 57 | w = xx2-xx1; 58 | h = yy2-yy1; 59 | 60 | % w = xx2-xx1+1; 61 | % h = yy2-yy1+1; 62 | 63 | if w > 0 && h > 0 64 | % compute overlap 65 | inter = w*h; 66 | o = inter / (area(j) + area(i) - inter); 67 | if o > overlap 68 | suppress = [suppress; pos]; 69 | end 70 | end 71 | end 72 | I(suppress) = []; 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /matlab-online-display/display01.txt: -------------------------------------------------------------------------------- 1 | Basketball/v_Basketball_g08_c01 2 | Basketball/v_Basketball_g08_c02 -------------------------------------------------------------------------------- /matlab-online-display/eval/compute_spatio_temporal_iou.m: -------------------------------------------------------------------------------- 1 | 2 | % ###################################################################################################################################################################################### 3 | % We are here talking about spatio-temporal detections, i.e. a set of ground-truth bounding boxes that 4 | % I will denote by g_t, with t between t_g^b and t_g^e (beginning and end time of the ground-truth) 5 | % versus a detection which is also a set of bounding boxes, denoted by d_t, with t between t_d^e et t_d^e. 6 | % 7 | % a) temporal iou = T_i / T_u 8 | % this is the intersection over union between the timing of the the tubes, 9 | % ie mathematically T_i / T_u with 10 | % the intersection T_i = max(0, max(t_g^b,t_d^b)-min(t_d^e,t_g^e) ) 11 | % and the union T_u = min(t_g^b,t_d^b)-max(t_d^e,t_g^e) 12 | % 13 | % b) for each t between max(tgb,tdb)-min(tde,tge), we compute the IoU between g_t and d_t, and average them 14 | % 15 | % Multiplying (a) and (b) is the same as computed the average of the spatial iou over all frames in T_u of the two tubes, with a spatial iou of 0 for frames where only one box exists. 16 | % c) as this is standard in detection problem, if there are multiple detections for the same groundtruth detection, the first one is counted as positive and the other ones as negatives 17 | % ###################################################################################################################################################################################### 18 | %{ 19 | gt_fnr = 1xn doube 20 | gt_bb = nx4 doubld - [x y w h] 21 | dt_fnr = 1xm double 22 | dt_bb = mx4 double - [x y w h] 23 | %} 24 | % ------------------------------------------------------------------------- 25 | function st_iou = compute_spatio_temporal_iou(gt_fnr, gt_bb, dt_fnr, dt_bb) 26 | % ------------------------------------------------------------------------- 27 | 28 | % time gt begin 29 | tgb = gt_fnr(1); 30 | % time gt end 31 | tge = gt_fnr(end); 32 | %time dt begin 33 | tdb = dt_fnr(1); 34 | tde = dt_fnr(end); 35 | % temporal intersection 36 | T_i = double(max(0, min(tge,tde)-max(tgb,tdb))); 37 | 38 | if T_i>0 39 | T_i = T_i +1; 40 | % temporal union 41 | T_u = double(max(tge,tde) - min(tgb,tdb)+1); 42 | %temporal IoU 43 | T_iou = T_i/T_u; 44 | % intersect frame numbers 45 | int_fnr = max(tgb,tdb):min(tge,tde); 46 | 47 | % find the ind of the intersected frames in the detected frames 48 | [~,int_find_dt] = ismember(int_fnr, dt_fnr); 49 | [~,int_find_gt] = ismember(int_fnr, gt_fnr); 50 | 51 | assert(length(int_find_dt)==length(int_find_gt)); 52 | 53 | iou = zeros(length(int_find_dt),1); 54 | for i=1:length(int_find_dt) 55 | if int_find_gt(i)<1 56 | % fprintf('error ') 57 | pf = pf; 58 | else 59 | pf = i; 60 | end 61 | 62 | gt_bound = gt_bb(int_find_gt(pf),:); 63 | dt_bound = dt_bb(int_find_dt(pf),:)+1; 64 | 65 | % gt_bound = [gt_bound(:,1:2) gt_bound(:,3:4)-gt_bound(:,1:2)]; 66 | % dt_bound = [dt_bound(:,1:2) dt_bound(:,3:4)-dt_bound(:,1:2)]; 67 | iou(i) = inters_union(double(gt_bound),double(dt_bound)); 68 | end 69 | % finalspatio-temporal IoU threshold 70 | st_iou = T_iou*mean(iou); 71 | else 72 | st_iou =0; 73 | end 74 | % % iou_thresh = 0.2,...,0.6 % 'Learing to track paper' takes 0.2 for UCF101 and 0.5 for JHMDB 75 | % if delta >= iou_thresh 76 | % % consider this tube as valid detection 77 | % end 78 | 79 | end 80 | 81 | % ------------------------------------------------------------------------- 82 | function iou = inters_union(bounds1,bounds2) 83 | % ------------------------------------------------------------------------- 84 | 85 | inters = rectint(bounds1,bounds2); 86 | ar1 = bounds1(:,3).*bounds1(:,4); 87 | ar2 = bounds2(:,3).*bounds2(:,4); 88 | union = bsxfun(@plus,ar1,ar2')-inters; 89 | 90 | iou = inters./(union+eps); 91 | 92 | end 93 | -------------------------------------------------------------------------------- /matlab-online-display/eval/get_PR_curve.m: -------------------------------------------------------------------------------- 1 | %%################################################################################################################################################## 2 | 3 | %% Author: Gurkirt Singh 4 | %% Release date: 26th January 2017 5 | % STEP-1: loop over the videos present in the predicited Tubes 6 | % STEP-2: for each video get the GT Tubes 7 | % STEP-3: Compute the spatio-temporal overlap bwtween GT tube and predicited 8 | % tubes 9 | % STEP-4: then label tp 1 or fp 0 to each predicted tube 10 | % STEP-5: Compute PR and AP for each class using scores, tp and fp in allscore 11 | %################################################################################################################################################## 12 | 13 | function [mAP,mAIoU,acc,AP] = get_PR_curve(annot, xmldata, testlist, actions, iou_th) 14 | % load(xmlfile) 15 | num_vid = length(testlist); 16 | num_actions = length(actions); 17 | AP = zeros(num_actions,1); 18 | averageIoU = zeros(num_actions,1); 19 | 20 | cc = zeros(num_actions,1); 21 | for a=1:num_actions 22 | allscore{a} = zeros(10000,2,'single'); 23 | end 24 | 25 | total_num_gt_tubes = zeros(num_actions,1); 26 | % count all the gt tubes from all the vidoes for label a 27 | % total_num_detection = zeros(num_actions,1); 28 | 29 | preds = zeros(num_vid,1) - 1; 30 | gts = zeros(num_vid,1); 31 | annotNames = {annot.name}; 32 | dtNames = {xmldata.videoName}; 33 | for vid=1:num_vid 34 | maxscore = -10000; 35 | [action,~] = getActionName(testlist{vid}); %%get action name to which this video belongs to 36 | [~,action_id] = find(strcmp(action, actions)); %% process only the videos from current action a 37 | [~,gtVidInd] = find(strcmp(annotNames,testlist{vid})); 38 | [~,dtVidInd] = find(strcmp(dtNames,testlist{vid})); 39 | 40 | dt_tubes = sort_detection(xmldata(dtVidInd)); 41 | gt_tubes = annot(gtVidInd).tubes; 42 | 43 | num_detection = length(dt_tubes.class); 44 | num_gt_tubes = length(gt_tubes); 45 | 46 | % total_num_detection = total_num_detection + num_detection; 47 | for gtind = 1:num_gt_tubes 48 | action_id = gt_tubes(gtind).class; 49 | total_num_gt_tubes(action_id) = total_num_gt_tubes(action_id) + 1; 50 | end 51 | gts(vid) = action_id; 52 | dt_labels = dt_tubes.class; 53 | covered_gt_tubes = zeros(num_gt_tubes,1); 54 | for dtind = 1:num_detection 55 | dt_fnr = dt_tubes.framenr(dtind).fnr; 56 | dt_bb = dt_tubes.boxes(dtind).bxs; 57 | dt_label = dt_labels(dtind); 58 | if dt_tubes.score(dtind)>maxscore 59 | preds(vid) = dt_label; 60 | maxscore = dt_tubes.score(dtind); 61 | end 62 | cc(dt_label) = cc(dt_label) + 1; 63 | 64 | ioumax=-inf;maxgtind=0; 65 | for gtind = 1:num_gt_tubes 66 | action_id = gt_tubes(gtind).class; 67 | if ~covered_gt_tubes(gtind) && dt_label == action_id 68 | gt_fnr = gt_tubes(gtind).sf:gt_tubes(gtind).ef; 69 | % if isempty(gt_fnr) 70 | % continue 71 | % end 72 | gt_bb = gt_tubes(gtind).boxes; 73 | iou = compute_spatio_temporal_iou(gt_fnr, gt_bb, dt_fnr, dt_bb); 74 | if iou>ioumax 75 | ioumax=iou; 76 | maxgtind=gtind; 77 | end 78 | end 79 | end 80 | 81 | if ioumax>iou_th 82 | covered_gt_tubes(maxgtind) = 1; 83 | allscore{dt_label}(cc(dt_label),:) = [dt_tubes.score(dtind),1]; 84 | averageIoU(dt_label) = averageIoU(dt_label) + ioumax; 85 | else 86 | allscore{dt_label}(cc(dt_label),:) = [dt_tubes.score(dtind),0]; 87 | end 88 | 89 | end 90 | end 91 | 92 | for a=1:num_actions 93 | allscore{a} = allscore{a}(1:cc(a),:); 94 | scores = allscore{a}(:,1); 95 | labels = allscore{a}(:,2); 96 | [~, si] = sort(scores,'descend'); 97 | % scores = scores(si); 98 | labels = labels(si); 99 | fp=cumsum(labels==0); 100 | tp=cumsum(labels==1); 101 | cdet =0; 102 | if ~isempty(tp)>0 103 | cdet = tp(end); 104 | averageIoU(a) = (averageIoU(a)+0.000001)/(tp(end)+0.00001); 105 | end 106 | 107 | recall=tp/total_num_gt_tubes(a); 108 | precision=tp./(fp+tp); 109 | AP(a) = xVOCap(recall,precision); 110 | draw = 0; 111 | if draw 112 | % plot precision/recall 113 | plot(recall,precision,'-'); 114 | grid; 115 | xlabel 'recall' 116 | ylabel 'precision' 117 | title(sprintf('class: %s, AP = %.3f',actions{a},AP(a))); 118 | end 119 | % fprintf('Action %02d AP = %0.5f and AIOU %0.5f GT %03d total det %02d correct det %02d %s\n', a, AP(a),averageIoU(a),total_num_gt_tubes(a),length(tp),cdet,actions{a}); 120 | 121 | end 122 | acc = mean(preds==gts); 123 | AP(isnan(AP)) = 0; 124 | mAP = mean(AP); 125 | averageIoU(isnan(averageIoU)) = 0; 126 | mAIoU = mean(averageIoU); 127 | 128 | 129 | %% ------------------------------------------------------------------------------------------------------------------------------------------------ 130 | function [action,vidID] = getActionName(str) 131 | %------------------------------------------------------------------------------------------------------------------------------------------------ 132 | indx = strsplit(str, '/'); 133 | action = indx{1}; 134 | vidID = indx{2}; 135 | %% 136 | function sorted_tubes = sort_detection(dt_tubes) 137 | 138 | sorted_tubes = dt_tubes; 139 | 140 | if ~isempty(dt_tubes.class) 141 | 142 | num_detection = length(dt_tubes.class); 143 | scores = dt_tubes.score; 144 | [~,indexs] = sort(scores,'descend'); 145 | for dt = 1 : num_detection 146 | dtind = indexs(dt); 147 | sorted_tubes.framenr(dt).fnr = dt_tubes.framenr(dtind).fnr; 148 | sorted_tubes.boxes(dt).bxs = dt_tubes.boxes(dtind).bxs; 149 | sorted_tubes.class(dt) = dt_tubes.class(dtind); 150 | sorted_tubes.score(dt) = dt_tubes.score(dtind); 151 | sorted_tubes.nr(dt) = dt; 152 | end 153 | end 154 | %% 155 | -------------------------------------------------------------------------------- /matlab-online-display/eval/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); -------------------------------------------------------------------------------- /matlab-online-display/frameAp.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | 9 | %% This is main script to compute frame mean AP %% 10 | %% this code is very new so hasn't been tested a lot 11 | % Input: Detection directory; annotation file path; split file path 12 | % Output: computes frame AP for all the detection directories 13 | % It should produce results almost identical to test_ucf24.py 14 | 15 | function frameAp() 16 | 17 | addpath(genpath('eval/')); 18 | addpath(genpath('utils/')); 19 | addpath(genpath('actionpath/')); 20 | data_root = '/home/zhujiagang/realtime-action-detection'; 21 | save_root = '/home/zhujiagang/realtime-action-detection/save'; 22 | iou_th = 0.5; 23 | model_type = 'CONV'; 24 | dataset = 'ucf24'; 25 | list_id = '01'; 26 | split_file = sprintf('%s/%s/splitfiles/t%s.txt',data_root,dataset,list_id); 27 | annotfile = sprintf('%s/%s/splitfiles/annots.mat',data_root,dataset); 28 | annot = load(annotfile); 29 | annot = annot.annot; 30 | testlist = getVideoNames(split_file); 31 | num_vid = length(testlist); 32 | num_actions = 24; 33 | 34 | logfile = fopen('frameAP.log','w'); % open log file 35 | 36 | imgType = 'rgb'; iteration_num = 120000; 37 | det_dirs1 = sprintf('%s/%s/detections/%s-%s-%s-%06d/',save_root,dataset,model_type,imgType,list_id,iteration_num); 38 | imgType = 'brox'; iteration_num = 120000; 39 | det_dirs2 = sprintf('%s/%s/detections/%s-%s-%s-%06d/',save_root,dataset,model_type,imgType,list_id,iteration_num); 40 | imgType = 'fastOF'; iteration_num = 120000; 41 | det_dirs3 = sprintf('%s/%s/detections/%s-%s-%s-%06d/',save_root,dataset,model_type,imgType,list_id,iteration_num); 42 | 43 | combinations = {{det_dirs1},{det_dirs2},{det_dirs3},... 44 | {det_dirs1,det_dirs3,'boost'},{det_dirs1,det_dirs2,'boost'},... 45 | {det_dirs1,det_dirs3,'cat'},{det_dirs1,det_dirs2,'cat'},... 46 | {det_dirs1,det_dirs3,'mean'},{det_dirs1,det_dirs2,'mean'}}; 47 | 48 | for c=1:length(combinations) 49 | comb = combinations{c}; 50 | line = comb{1}; 51 | if length(comb)>1 52 | fusion_type = comb{3}; 53 | line = [line,' ',comb{2},' \n\n fusion type: ',fusion_type,'\n\n']; 54 | 55 | else 56 | fusion_type = 'none'; 57 | end 58 | 59 | line = sprintf('Evaluation for %s\n',line); 60 | fprintf('%s',line) 61 | fprintf(logfile,'%s',line); 62 | AP = zeros(num_actions,1); 63 | cc = zeros(num_actions,1); 64 | for a=1:num_actions 65 | allscore{a} = zeros(24*20*160000,2,'single'); 66 | end 67 | 68 | total_num_gt_boxes = zeros(num_actions,1); 69 | annotNames = {annot.name}; 70 | 71 | for vid=1:num_vid 72 | video_name = testlist{vid}; 73 | [~,gtVidInd] = find(strcmp(annotNames, testlist{vid})); 74 | gt_tubes = annot(gtVidInd).tubes; 75 | numf = annot(gtVidInd).num_imgs; 76 | num_gt_tubes = length(gt_tubes); 77 | if mod(vid,5) == 0 78 | fprintf('Done procesing %d videos out of %d %s\n', vid, num_vid, video_name) 79 | end 80 | for nf = 1:numf 81 | gt_boxes = get_gt_boxes(gt_tubes,nf); 82 | dt_boxes = get_dt_boxes(comb, video_name, nf, num_actions, fusion_type); 83 | num_gt_boxes = size(gt_boxes,1); 84 | for g = 1:num_gt_boxes 85 | total_num_gt_boxes(gt_boxes(g,5)) = total_num_gt_boxes(gt_boxes(g,5)) + 1; 86 | end 87 | covered_gt_boxes = zeros(num_gt_boxes,1); 88 | for d = 1 : size(dt_boxes,1) 89 | dt_score = dt_boxes(d,5); 90 | dt_label = dt_boxes(d,6); 91 | cc(dt_label) = cc(dt_label) + 1; 92 | ioumax=-inf; maxgtind=0; 93 | if num_gt_boxes>0 && any(gt_boxes(:,5) == dt_label) 94 | for g = 1:num_gt_boxes 95 | if ~covered_gt_boxes(g) && any(dt_label == gt_boxes(:,5)) 96 | iou = compute_spatial_iou(gt_boxes(g,1:4), dt_boxes(d,1:4)); 97 | if iou>ioumax 98 | ioumax=iou; 99 | maxgtind=g; 100 | end 101 | end 102 | end 103 | end 104 | 105 | if ioumax>=iou_th 106 | covered_gt_boxes(maxgtind) = 1; 107 | allscore{dt_label}(cc(dt_label),:) = [dt_score,1]; % tp detection 108 | else 109 | allscore{dt_label}(cc(dt_label),:) = [dt_score,0]; % fp detection 110 | end 111 | 112 | end 113 | 114 | end 115 | end 116 | % Sort scores and then reorder tp fp labels in result precision and recall for each action 117 | for a=1:num_actions 118 | allscore{a} = allscore{a}(1:cc(a),:); 119 | scores = allscore{a}(:,1); 120 | labels = allscore{a}(:,2); 121 | [~, si] = sort(scores,'descend'); 122 | % scores = scores(si); 123 | labels = labels(si); 124 | fp=cumsum(labels==0); 125 | tp=cumsum(labels==1); 126 | recall=tp/total_num_gt_boxes(a); 127 | precision=tp./(fp+tp); 128 | AP(a) = xVOCap(recall,precision); 129 | line = sprintf('Action %02d AP = %0.5f \n', a, AP(a)); 130 | fprintf('%s',line); 131 | fprintf(logfile,'%s',line); 132 | end 133 | 134 | AP(isnan(AP)) = 0; 135 | mAP = mean(AP); 136 | line = sprintf('\nMean AP::=> %.5f\n\n',mAP); 137 | fprintf('%s',line); 138 | fprintf(logfile,'%s',line); 139 | end 140 | end 141 | 142 | 143 | % ------------------------------------------------------------------------- 144 | function [videos] = getVideoNames(split_file) 145 | % ------------------------------------------------------------------------- 146 | fprintf('Get both lis is %s\n',split_file); 147 | fid = fopen(split_file,'r'); 148 | data = textscan(fid, '%s'); 149 | videos = cell(1); 150 | count = 0; 151 | 152 | for i=1:length(data{1}) 153 | filename = cell2mat(data{1}(i,1)); 154 | count = count +1; 155 | videos{count} = filename; 156 | % videos(i).vid = str2num(cell2mat(data{1}(i,1))); 157 | end 158 | end 159 | 160 | function gt_boxes = get_gt_boxes(gt_tubes,nf) 161 | gt_boxes = []; 162 | gt_tubes; 163 | for t = 1:length(gt_tubes) 164 | if nf >= gt_tubes(t).sf && nf <= gt_tubes(t).ef 165 | b_ind = nf - gt_tubes(t).sf + 1; 166 | box = [gt_tubes(t).boxes(b_ind,:), gt_tubes(t).class]; 167 | gt_boxes = [gt_boxes;box]; 168 | end 169 | end 170 | end 171 | 172 | function dt_boxes = get_dt_boxes(detection_dir, video_name, nf, num_actions, fusion_type) 173 | dt_boxes = []; 174 | %% apply nms per class 175 | [boxes,scores] = read_detections(detection_dir, video_name, nf); 176 | for a = 1 : num_actions 177 | cls_boxes = get_cls_detection(boxes,scores,a,fusion_type); 178 | dt_boxes = [dt_boxes; cls_boxes]; 179 | end 180 | end 181 | 182 | function cls_boxes = get_cls_detection(boxes,scores,a,fusion_type) 183 | 184 | if strcmp(fusion_type,'none') 185 | cls_boxes = dofilter(boxes(1).b,scores(1).s,a); 186 | elseif strcmp(fusion_type,'mean') 187 | cls_boxes = dofilter(boxes(1).b,(scores(1).s+scores(2).s)/2.0,a); 188 | elseif strcmp(fusion_type,'cat') 189 | cls_boxes_base = dofilter(boxes(1).b,scores(1).s,a); 190 | cls_boxes_top = dofilter(boxes(2).b,scores(2).s,a); 191 | all_boxes = [cls_boxes_base;cls_boxes_top]; 192 | pick = nms(all_boxes(:,1:5),0.45); 193 | cls_boxes = all_boxes(pick,:); 194 | elseif strcmp(fusion_type,'boost') 195 | cls_boxes_base = dofilter(boxes(1).b,scores(1).s,a); 196 | cls_boxes_top = dofilter(boxes(2).b,scores(2).s,a); 197 | all_boxes = boost_boxes(cls_boxes_base,cls_boxes_top); 198 | pick = nms(all_boxes(:,1:5),0.45); 199 | cls_boxes = all_boxes(pick,:); 200 | else 201 | error('Spacify correct fusion technique'); 202 | end 203 | 204 | end 205 | 206 | function cls_boxes_base = boost_boxes(cls_boxes_base,cls_boxes_top) 207 | 208 | box_spatial = [cls_boxes_base(:,1:2) cls_boxes_base(:,3:4)-cls_boxes_base(:,1:2)+1]; 209 | box_flow = [cls_boxes_top(:,1:2) cls_boxes_top(:,3:4)-cls_boxes_top(:,1:2)+1]; 210 | coveredboxes = []; 211 | nb = size(cls_boxes_base,1); % num boxes 212 | for i=1:nb 213 | ovlp = inters_union(box_spatial(i,:), box_flow); % ovlp has 1x5 or 5x1 dim 214 | if ~isempty(ovlp) 215 | [movlp, maxind] = max(ovlp); 216 | if movlp>=0.3 && isempty(ismember(coveredboxes,maxind)) 217 | cls_boxes_base(i,5) = cls_boxes_base(i,5) + cls_boxes_top(maxind,5)*movlp; 218 | coveredboxes = [coveredboxes;maxind]; 219 | end 220 | end 221 | end 222 | 223 | nb = size(cls_boxes_top,1); 224 | for i=1:nb 225 | if ~ismember(coveredboxes,i) 226 | cls_boxes_base = [cls_boxes_base; cls_boxes_top(i,:)]; 227 | end 228 | end 229 | 230 | end 231 | 232 | function [bxs, sc] = read_detections(detection_dir, video_name, nf) 233 | detection_dir1 = detection_dir{1}; 234 | det_file = sprintf('%s%s/%05d.mat', detection_dir1, video_name, nf); 235 | load(det_file); % loads loc and scores variable 236 | boxes = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240] + 1; 237 | boxes(boxes(:,1)<1,1) = 1; boxes(boxes(:,2)<1,2) = 1; 238 | boxes(boxes(:,3)>320,3) = 320; boxes(boxes(:,4)>240,4) = 240; 239 | scores = [scores(:,2:end),scores(:,1)]; 240 | bxs = struct(); 241 | sc = struct(); 242 | bxs(1).b = boxes; 243 | sc(1).s = scores; 244 | if length(detection_dir)>1 245 | detection_dir1 = detection_dir{2}; 246 | det_file = sprintf('%s%s/%05d.mat', detection_dir1, video_name, nf); 247 | load(det_file); % loads loc and scores variable 248 | boxes = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240] + 1; 249 | boxes(boxes(:,1)<1,1) = 1; boxes(boxes(:,2)<1,2) = 1; 250 | boxes(boxes(:,3)>320,3) = 320; boxes(boxes(:,4)>240,4) = 240; 251 | scores = [scores(:,2:end),scores(:,1)]; 252 | bxs(2).b = boxes; 253 | sc(2).s = scores; 254 | end 255 | 256 | end 257 | 258 | 259 | function boxes = dofilter(boxes,scores,a) 260 | scores = scores(:,a); 261 | pick = scores>0.01; 262 | scores = scores(pick); 263 | boxes = boxes(pick,:); 264 | [~,pick] = sort(scores,'descend'); 265 | to_pick = min(50,size(pick,1)); 266 | pick = pick(1:to_pick); 267 | scores = scores(pick); 268 | boxes = boxes(pick,:); 269 | pick = nms([boxes scores],0.45); 270 | pick = pick(1:min(20,length(pick))); 271 | boxes = boxes(pick,:); 272 | scores = scores(pick); 273 | cls = scores*0 + a; 274 | boxes = [boxes,scores, cls]; 275 | end 276 | 277 | function iou = inters_union(bounds1,bounds2) 278 | % ------------------------------------------------------------------------ 279 | inters = rectint(bounds1,bounds2); 280 | ar1 = bounds1(:,3).*bounds1(:,4); 281 | ar2 = bounds2(:,3).*bounds2(:,4); 282 | union = bsxfun(@plus,ar1,ar2')-inters; 283 | iou = inters./(union+0.001); 284 | end 285 | 286 | 287 | function iou = compute_spatial_iou(gt_box, dt_box) 288 | dt_box = [dt_box(1:2), dt_box(3:4)-dt_box(1:2)+1]; 289 | inter = rectint(gt_box,dt_box); 290 | ar1 = gt_box(3)*gt_box(4); 291 | ar2 = dt_box(3)*dt_box(4); 292 | union = ar1 + ar2 - inter; 293 | iou = inter/union; 294 | end -------------------------------------------------------------------------------- /matlab-online-display/gentube/convert2eval.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | % Input: smoothed tubes 9 | % Output: filtered out tubes with proper scoring 10 | 11 | function xmld = convert2eval(final_tubes,min_num_frames,kthresh,topk,vids) 12 | 13 | xmld = struct([]); 14 | v= 1; 15 | 16 | for vv = 1 : length(vids) 17 | action_indexes = find(strcmp(final_tubes.video_id,vids{vv})); 18 | videoName = vids{vv}; 19 | xmld(v).videoName = videoName; 20 | actionscore = final_tubes.dpActionScore(action_indexes); 21 | path_scores = final_tubes.path_scores(1,action_indexes); 22 | 23 | ts = final_tubes.ts(action_indexes); 24 | starts = final_tubes.starts(action_indexes); 25 | te = final_tubes.te(action_indexes); 26 | act_nr = 1; 27 | 28 | for a = 1 : length(ts) 29 | act_ts = ts(a); 30 | act_te = te(a); 31 | % act_dp_score = actionscore(a); %% only useful on JHMDB 32 | act_path_scores = cell2mat(path_scores(a)); 33 | 34 | %----------------------------------------------------------- 35 | act_scores = sort(act_path_scores(act_ts:act_te),'descend'); 36 | %save('test.mat', 'act_scores'); pause; 37 | 38 | topk_mean = mean(act_scores(1:min(topk,length(act_scores)))); 39 | 40 | bxs = final_tubes.path_boxes{action_indexes(a)}(act_ts:act_te,:); 41 | 42 | bxs = [bxs(:,1:2), bxs(:,3:4)-bxs(:,1:2)]; 43 | 44 | label = final_tubes.label(action_indexes(a)); 45 | 46 | if topk_mean > kthresh(label) && (act_te-act_ts) > min_num_frames 47 | xmld(v).score(act_nr) = topk_mean; 48 | xmld(v).nr(act_nr) = act_nr; 49 | xmld(v).class(act_nr) = label; 50 | xmld(v).framenr(act_nr).fnr = (act_ts:act_te) + starts(a)-1; 51 | xmld(v).boxes(act_nr).bxs = bxs; 52 | act_nr = act_nr+1; 53 | end 54 | end 55 | v = v + 1; 56 | 57 | end 58 | -------------------------------------------------------------------------------- /matlab-online-display/gentube/dpEM_max.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Original code comes from https://team.inria.fr/perception/research/skeletalquads/ 3 | % Copyright (c) 2014, Georgios Evangelidis and Gurkirt Singh, 4 | % This code and is available 5 | % under the terms of MIT License provided in LICENSE. 6 | % Please retain this notice and LICENSE if you use 7 | % this file (or any portion of it) in your project. 8 | % --------------------------------------------------------- 9 | 10 | % M = <10xnum_frames> 11 | % r = 10 (action labels) 12 | % c = frame indices in a video 13 | 14 | function [p,q,D] = dpEM_max(M,alpha) 15 | 16 | % transition cost for the smoothness term 17 | % V(L1,L2) = 0, if L1=L2 18 | % V(L1,L2) = alpha, if L1~=L2 19 | 20 | 21 | 22 | [r,c] = size(M); 23 | 24 | 25 | 26 | % costs 27 | D = zeros(r, c+1); % add an extra column 28 | D(:,1) = 0; % put the maximum cost 29 | D(:, 2:(c+1)) = M; 30 | 31 | v = [1:r]'; 32 | 33 | 34 | %D = M; 35 | phi = zeros(r,c); 36 | 37 | %test = struct([]); 38 | for j = 2:c+1; % c = 1230 39 | for i = 1:r; % r = 10 40 | 41 | % test(j).D = D(:, j-1); % fetching prev column 10 rows 42 | % test(j).alpha = alpha*(v~=i); % switching each row for each class 43 | % test(j).D_alpha = [D(:, j-1)-alpha*(v~=i)]; 44 | % test(j).max = max([D(:, j-1)-alpha*(v~=i)]); % for ith class taking the max score 45 | 46 | 47 | [dmax, tb] = max([D(:, j-1)-alpha*(v~=i)]); 48 | %keyboard; 49 | D(i,j) = D(i,j)+dmax; 50 | phi(i,j-1) = tb; 51 | end 52 | end 53 | 54 | % Note: 55 | % the outer loop (j) is to visit one by one each frames 56 | % the inner loop (i) is to get the max score for each action label 57 | % the -alpha*(v~=i) term is to add a penalty by subtracting alpha from the 58 | % data term for all other class labels other than i, for ith class label 59 | % it adds zero penalty; 60 | % (v~=i) will return a logical array consists of 10 elements, in the ith 61 | % location it is 0 (false becuase the condition v~=i is false) and all other locations 62 | % returns 1, thus for ith calss it multiplies 0 63 | % with alpha and for the rest of the classes multiplies 1; 64 | % for each iteration of ith loop we get a max value which we add to the 65 | % data term d(i,j), in this way the 10 max values for 10 different action 66 | % labels are stored to the jth column (or for the jth frame): D(1,j), D(2,j),...,D(10,j), 67 | 68 | % save('test.mat','r','c','M', 'phi'); 69 | % pause; 70 | 71 | % Traceback from last frame 72 | D = D(:,2:(c+1)); 73 | 74 | % best of the last column 75 | q = c; % frame inidces 76 | [~,p] = max(D(:,c)); 77 | 78 | 79 | 80 | i = p; % index of max element in last column of D, 81 | j = q; % frame indices 82 | 83 | while j>1 % loop over frames in a video 84 | tb = phi(i,j); % i -> index of max element in last column of D, j-> last frame index or last column of D 85 | p = [tb,p]; 86 | q = [j-1,q]; 87 | j = j-1; 88 | i = tb; 89 | end 90 | 91 | % 92 | % phi(i,j) stores all the max indices in the forward pass 93 | % during the backward pass , a predicited path is constructed using these indices values 94 | -------------------------------------------------------------------------------- /matlab-online-display/gentube/mydpEM_max.m: -------------------------------------------------------------------------------- 1 | function [p,q,D] = mydpEM_max(M,alpha) 2 | 3 | [r,c] = size(M); 4 | % costs 5 | D = zeros(r, c+1); % add an extra column 6 | D(:,1) = 0; % put the maximum cost 7 | D(:, 2:(c+1)) = M; 8 | 9 | v = [1:r]'; 10 | phi = zeros(r,c); 11 | 12 | for j = 2:c+1; % c = 1230 13 | for i = 1:r; % r = 10 14 | 15 | [dmax, tb] = max([D(:, j-1)-alpha*(v~=i)]); 16 | %keyboard; 17 | D(i,j) = D(i,j)+dmax; 18 | phi(i,j-1) = tb; 19 | end 20 | end 21 | 22 | % Traceback from last frame 23 | D = D(:,2:(c+1)); 24 | 25 | % best of the last column 26 | q = c; % frame inidces 27 | [~,p] = max(D(:,c)); 28 | i = p; % index of max element in last column of D, 29 | j = q; % frame indices 30 | 31 | while j>1 % loop over frames in a video 32 | tb = phi(i,j); % i -> index of max element in last column of D, j-> last frame index or last column of D 33 | p = [tb,p]; 34 | q = [j-1,q]; 35 | j = j-1; 36 | i = tb; 37 | end -------------------------------------------------------------------------------- /matlab-online-display/gentube/parActionPathSmoother.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | 9 | 10 | function final_tubes = parActionPathSmoother(actionpaths,alpha,num_action) 11 | 12 | % load data 13 | % fprintf('Number of video intest set %d \n', actionpath,alpha,num_action,calpha,useNeg 14 | % alpha = 1; 15 | 16 | final_tubes = struct('starts',[],'ts',[],'te',[],'label',[],'path_total_score',[],... 17 | 'dpActionScore',[],'dpPathScore',[],... 18 | 'path_boxes',cell(1),'path_scores',cell(1),'video_id',cell(1)); 19 | 20 | 21 | alltubes = cell(length(actionpaths),1); 22 | 23 | for t = 1 : length(actionpaths) 24 | % fprintf('[%03d/%03d] calpha %04d\n',t,length(tubes),uint16(calpha*100)); 25 | % fprintf('.'); 26 | video_id = actionpaths(t).video_id; 27 | % fprintf('[doing for %s %d out of %d]\n',video_id,t,length(tubes)); 28 | alltubes{t} = actionPathSmoother4oneVideo(actionpaths(t).paths,alpha,num_action,video_id) ; 29 | end 30 | 31 | action_count = 1; 32 | for t = 1 : length(actionpaths) 33 | vid_tubes = alltubes{t}; 34 | for k=1:length(vid_tubes.ts) 35 | final_tubes.starts(action_count) = vid_tubes.starts(k); 36 | final_tubes.ts(action_count) = vid_tubes.ts(k); 37 | final_tubes.video_id{action_count} = vid_tubes.video_id{k}; 38 | final_tubes.te(action_count) = vid_tubes.te(k); 39 | final_tubes.dpActionScore(action_count) = vid_tubes.dpActionScore(k); 40 | final_tubes.label(action_count) = vid_tubes.label(k); 41 | final_tubes.dpPathScore(action_count) = vid_tubes.dpPathScore(k); 42 | final_tubes.path_total_score(action_count) = vid_tubes.path_total_score(k); 43 | final_tubes.path_boxes{action_count} = vid_tubes.path_boxes{k}; 44 | final_tubes.path_scores{action_count} = vid_tubes.path_scores{k}; 45 | action_count = action_count + 1; 46 | end 47 | 48 | end 49 | end 50 | 51 | function final_tubes = actionPathSmoother4oneVideo(video_paths,alpha,num_action,video_id) 52 | action_count =1; 53 | final_tubes = struct('starts',[],'ts',[],'te',[],'label',[],'path_total_score',[],... 54 | 'dpActionScore',[],'dpPathScore',[],'vid',[],... 55 | 'path_boxes',cell(1),'path_scores',cell(1),'video_id',cell(1)); 56 | 57 | if ~isempty(video_paths) 58 | %gt_ind = find(strcmp(video_id,annot.videoName)); 59 | %number_frames = length(video_paths{1}(1).idx); 60 | % alpha = alpha-3.2; 61 | for a = 1 : num_action 62 | action_paths = video_paths{a}; 63 | num_act_paths = getPathCount(action_paths); 64 | for p = 1 : num_act_paths 65 | M = action_paths(p).allScores(:,1:num_action)'; %(:,1:num_action)'; 66 | %M = normM(M); 67 | %M = [M(a,:),1-M(a,:)]; 68 | M = M +20; 69 | 70 | [pred_path,time,D] = dpEM_max(M,alpha(a)); 71 | [ Ts, Te, Scores, Label, DpPathScore] = extract_action(pred_path,time,D,a); 72 | for k = 1 : length(Ts) 73 | final_tubes.starts(action_count) = action_paths(p).start; 74 | final_tubes.ts(action_count) = Ts(k); 75 | final_tubes.video_id{action_count} = video_id; 76 | % final_tubes.vid(action_count) = vid_num; 77 | final_tubes.te(action_count) = Te(k); 78 | final_tubes.dpActionScore(action_count) = Scores(k); 79 | final_tubes.label(action_count) = Label(k); 80 | final_tubes.dpPathScore(action_count) = DpPathScore(k); 81 | final_tubes.path_total_score(action_count) = mean(action_paths(p).scores); 82 | final_tubes.path_boxes{action_count} = action_paths(p).boxes; 83 | final_tubes.path_scores{action_count} = action_paths(p).scores; 84 | action_count = action_count + 1; 85 | end 86 | 87 | end 88 | 89 | end 90 | end 91 | end 92 | 93 | function M = normM(M) 94 | for i = 1: size(M,2) 95 | M(:,i) = M(:,i)/sum(M(:,i)); 96 | end 97 | end 98 | function [ts,te,scores,label,total_score] = extract_action(p,q,D,action) 99 | % p(1:1) = 1; 100 | indexs = find(p==action); 101 | 102 | if isempty(indexs) 103 | ts = []; te = []; scores = []; label = []; total_score = []; 104 | 105 | else 106 | indexs_diff = [indexs,indexs(end)+1] - [indexs(1)-2,indexs]; 107 | ts = find(indexs_diff>1); 108 | 109 | if length(ts)>1 110 | te = [ts(2:end)-1,length(indexs)]; 111 | else 112 | te = length(indexs); 113 | end 114 | ts = indexs(ts); 115 | te = indexs(te); 116 | scores = (D(action,q(te)) - D(action,q(ts)))./(te-ts); 117 | label = ones(length(ts),1)*action; 118 | total_score = ones(length(ts),1)*D(p(end),q(end))/length(p); 119 | end 120 | end 121 | 122 | % ------------------------------------------------------------------------- 123 | function lp_count = getPathCount(live_paths) 124 | % ------------------------------------------------------------------------- 125 | 126 | if isfield(live_paths,'boxes') 127 | lp_count = length(live_paths); 128 | else 129 | lp_count = 0; 130 | end 131 | end 132 | -------------------------------------------------------------------------------- /matlab-online-display/gentube/readALLactionPaths.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | 9 | function actionpath = readALLactionPaths(videolist,actionPathDir,step) 10 | 11 | videos = getVideoNames(videolist); 12 | NumVideos = length(videos); 13 | 14 | actionpath = struct([]); 15 | fprintf('Loading action paths of %d videos\n',NumVideos); 16 | count = 1; 17 | for vid=1:step:NumVideos 18 | 19 | videoID = videos(vid).video_id; 20 | pathsSaveName = [actionPathDir,videoID,'-actionpaths.mat']; 21 | 22 | if ~exist(pathsSaveName,'file') 23 | error('Action path does not exist please genrate actin path', pathsSaveName) 24 | else 25 | % fprintf('loading vid %d %s \n',vid,pathsSaveName); 26 | load(pathsSaveName); 27 | actionpath(count).video_id = videos(vid).video_id; 28 | actionpath(count).paths = allpaths; 29 | count = count+1; 30 | end 31 | end 32 | end 33 | 34 | function [videos] = getVideoNames(split_file) 35 | % ------------------------------------------------------------------------- 36 | fid = fopen(split_file,'r'); 37 | data = textscan(fid, '%s'); 38 | videos = struct(); 39 | for i=1:length(data{1}) 40 | filename = cell2mat(data{1}(i,1)); 41 | videos(i).video_id = filename; 42 | % videos(i).vid = str2num(cell2mat(data{1}(i,1))); 43 | 44 | end 45 | count = length(data{1}); 46 | 47 | end 48 | -------------------------------------------------------------------------------- /matlab-online-display/myI01onlineTubes.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | %% This is main script to build tubes and evaluate them %% 9 | 10 | function myI01onlineTubes() 11 | close all 12 | data_root = '/home/zhujiagang/realtime-action-detection'; 13 | save_root = '/home/zhujiagang/realtime-action-detection/save'; 14 | iteration_num_rgb = [120000]; % you can also evaluate on multiple iertations 15 | iteration_num_flow = [120000]; % you can also evaluate on multiple iertations 16 | 17 | % add subfolder to matlab paths 18 | addpath(genpath('gentube/')); 19 | addpath(genpath('actionpath/')); 20 | addpath(genpath('eval/')); 21 | addpath(genpath('utils/')); 22 | model_type = 'CONV'; 23 | 24 | completeList = {... 25 | {'ucf24','01',{'rgb'},iteration_num_rgb,{'score'}},... 26 | {'ucf24','01',{'brox'},iteration_num_flow,{'score'}}... 27 | {'ucf24','01',{'fastOF'},iteration_num_flow,{'score'}}... 28 | }; 29 | 30 | alldopts = cell(2,1); 31 | count = 1; 32 | gap=3; 33 | 34 | for setind = 1:length(completeList) 35 | [dataset, listid, imtypes, iteration_nums, costTypes] = enumurateList(completeList{setind}); 36 | for ct = 1:length(costTypes) 37 | costtype = costTypes{ct}; 38 | for imtind = 1:length(imtypes) 39 | imgType = imtypes{imtind}; 40 | for iteration = iteration_nums 41 | for iouthresh=0.1 42 | %% generate directory sturcture based on the options 43 | dopts = initDatasetOpts(data_root,save_root,dataset,imgType,model_type,listid,iteration,iouthresh,costtype, gap); 44 | if exist(dopts.detDir,'dir') 45 | alldopts{count} = dopts; 46 | count = count+1; 47 | end 48 | end 49 | end 50 | end 51 | end 52 | end 53 | 54 | %% For each option type build tubes and evaluate them 55 | for index = 1:count-1 56 | opts = alldopts{index}; 57 | if exist(opts.detDir,'dir') 58 | fprintf('Video List %02d :: %s\nAnnotFile :: %s\nImage Dir :: %s\nDetection Dir:: %s\nActionpath Dir:: %s\nTube Dir:: %s\n',... 59 | index, opts.vidList, opts.annotFile, opts.imgDir, opts.detDir, opts.actPathDir, opts.tubeDir); 60 | %% online bbx and prediction scores display given frame level detections 61 | actionPaths(opts); 62 | end 63 | end 64 | 65 | 66 | 67 | %% Function to enumrate options 68 | function [dataset,listnum,imtypes,weights,costTypes] = enumurateList(sublist) 69 | dataset = sublist{1}; listnum = sublist{2}; imtypes = sublist{3}; 70 | weights = sublist{4};costTypes = sublist{5}; 71 | 72 | %% Facade function for smoothing tubes and evaluating them 73 | function results = gettubes(dopts) 74 | 75 | numActions = length(dopts.actions); 76 | results = zeros(300,6); 77 | counter=1; 78 | class_aps = cell(2,1); 79 | 80 | annot = load(dopts.annotFile); 81 | annot = annot.annot; 82 | testvideos = getVideoNames(dopts.vidList); 83 | 84 | for alpha = 3 85 | fprintf('alpha %03d ', alpha); 86 | % read action paths 87 | actionpaths = readALLactionPaths(dopts.vidList,dopts.actPathDir,1); 88 | %% perform temporal trimming 89 | smoothedtubes = parActionPathSmoother(actionpaths,alpha*ones(numActions,1),numActions); 90 | fprintf('\n'); 91 | end 92 | 93 | results(counter:end,:) = []; 94 | result = cell(2,1); 95 | result{2} = class_aps; 96 | result{1} = results; 97 | results = result; 98 | 99 | 100 | function videos = getVideoNames(split_file) 101 | % ------------------------------------------------------------------------- 102 | fid = fopen(split_file,'r'); 103 | data = textscan(fid, '%s'); 104 | videos = cell(1); 105 | count = 0; 106 | for i=1:length(data{1}) 107 | filename = cell2mat(data{1}(i,1)); 108 | count = count +1; 109 | videos{count} = filename; 110 | end 111 | -------------------------------------------------------------------------------- /matlab-online-display/myI02genFusedTubes.m: -------------------------------------------------------------------------------- 1 | 2 | function myI02genFusedTubes() 3 | 4 | data_root = '/home/zhujiagang/realtime-action-detection'; 5 | save_root = '/home/zhujiagang/realtime-action-detection/save'; 6 | iteration_num_rgb = 90000; % you can also evaluate on multiple iertations 7 | iteration_num_flow = 120000; % you can also evaluate on multiple iertations 8 | 9 | addpath(genpath('actionpath/')); 10 | addpath(genpath('gentube/')); 11 | addpath(genpath('eval/')); 12 | addpath(genpath('utils/')); 13 | 14 | completeList = {... 15 | {'ucf24','01',{'rgb','brox'},[90000,120000],{'cat','nwsum-plus','mean'}, 0.25},... 16 | {'ucf24','01',{'rgb','brox'},[120000,120000],{'cat','nwsum-plus','mean'}, 0.25},... 17 | {'ucf24','01',{'rgb','fastOF'},[90000,120000],{'cat','nwsum-plus','mean'}, 0.25},... 18 | {'ucf24','01',{'rgb','fastOF'},[120000,120000],{'cat','nwsum-plus','mean'}, 0.25},... 19 | }; 20 | model_type = 'CONV'; 21 | costtype = 'score'; 22 | iouthresh = 0.1; 23 | gap = 3; 24 | alldopts = cell(2,1); 25 | count = 1; 26 | for setind = [2,4] %1:length(completeList) 27 | [dataset,listid,imtypes,iteration_nums,fusiontypes,fuseiouths] = enumurateList(completeList{setind}); 28 | for ff =1:length(fusiontypes) 29 | fusiontype = fusiontypes{ff}; 30 | if strcmp(fusiontype,'cat') || strcmp(fusiontype,'mean') 31 | tempfuseiouths = 0; 32 | else 33 | tempfuseiouths = fuseiouths; 34 | end 35 | for fuseiouth = tempfuseiouths 36 | for iouWeight = 1 37 | dopts = initDatasetOptsFused(data_root,save_root,dataset,imtypes,model_type, ... 38 | listid,iteration_nums,iouthresh,costtype,gap,fusiontype,fuseiouth); 39 | if exist(dopts.basedetDir,'dir') && exist(dopts.topdetDir,'dir') 40 | alldopts{count} = dopts; 41 | count = count+1; 42 | end 43 | end 44 | end 45 | end 46 | end 47 | 48 | fprintf('\n\n\n\n %d \n\n\n\n',count) 49 | 50 | % sets = {1:12,13:24,25:36,49:64}; 51 | % parpool('local',16); %length(set)); 52 | 53 | for setid = 1 54 | for index = 1:count-1 55 | opts = alldopts{index}; 56 | if exist(opts.basedetDir,'dir') && exist(opts.topdetDir,'dir') 57 | fprintf('Video List :: %s\n \nDetection basedetDir:: %s\nActionpath Dir:: %s\nTube Dir:: %s\n',... 58 | opts.vidList,opts.basedetDir,opts.actPathDir,opts.tubeDir); 59 | 60 | %% online bbx and prediction scores display given fused frame level detections 61 | fusedActionPaths(opts); 62 | end 63 | end 64 | end 65 | 66 | 67 | function [dataset,listnum,imtypes,weights,fusiontypes,fuseiouths] = enumurateList(sublist) 68 | 69 | dataset = sublist{1}; listnum = sublist{2}; imtypes = sublist{3}; 70 | weights = sublist{4}; 71 | fusiontypes = sublist{5}; 72 | fuseiouths = sublist{6}; 73 | 74 | %% Facade function for smoothing tubes and evaluating them 75 | function results = gettubes(dopts) 76 | 77 | numActions = length(dopts.actions); 78 | results = zeros(300,6); 79 | counter=1; 80 | class_aps = cell(2,1); 81 | % save file name to save result for eah option type 82 | saveName = sprintf('%stubes-results.mat',dopts.tubeDir); 83 | if ~exist(saveName,'file') 84 | 85 | annot = load(dopts.annotFile); 86 | annot = annot.annot; 87 | testvideos = getVideoNames(dopts.vidList); 88 | for alpha = 3 89 | fprintf('alpha %03d ',alpha); 90 | tubesSaveName = sprintf('%stubes-alpha%04d.mat',dopts.tubeDir,uint16(alpha*100)); 91 | if ~exist(tubesSaveName,'file') 92 | % read action paths 93 | actionpaths = readALLactionPaths(dopts.vidList,dopts.actPathDir,1); 94 | %% perform temporal trimming 95 | smoothedtubes = parActionPathSmoother(actionpaths,alpha*ones(numActions,1),numActions); 96 | save(tubesSaveName,'smoothedtubes','-v7.3'); 97 | else 98 | load(tubesSaveName) 99 | end 100 | 101 | min_num_frames = 8; kthresh = 0.0; topk = 40; 102 | % strip off uncessary parts and remove very small actions less than 103 | % 8 frames; not really necessary but for speed at eval time 104 | xmldata = convert2eval(smoothedtubes, min_num_frames, kthresh*ones(numActions,1), topk,testvideos); 105 | 106 | %% Do the evaluation 107 | for iou_th =[0.2,[0.5:0.05:0.95]] 108 | [tmAP,tmIoU,tacc,AP] = get_PR_curve(annot, xmldata, testvideos, dopts.actions, iou_th); 109 | % pritn outs iou_threshold, meanAp, sm, classifcation accuracy 110 | fprintf('%.2f %0.3f %0.3f N ',iou_th,tmAP, tacc); 111 | results(counter,:) = [iou_th,alpha,alpha,tmIoU,tmAP,tacc]; 112 | class_aps{counter} = AP; 113 | counter = counter+1; 114 | end 115 | fprintf('\n'); 116 | end 117 | 118 | results(counter:end,:) = []; 119 | result = cell(2,1); 120 | result{2} = class_aps; 121 | result{1} = results; 122 | results = result; 123 | fprintf('results saved in %s\n',saveName); 124 | save(saveName,'results'); 125 | else 126 | load(saveName) 127 | end 128 | 129 | function videos = getVideoNames(split_file) 130 | % ------------------------------------------------------------------------- 131 | fid = fopen(split_file,'r'); 132 | data = textscan(fid, '%s'); 133 | videos = cell(1); 134 | count = 0; 135 | for i=1:length(data{1}) 136 | filename = cell2mat(data{1}(i,1)); 137 | count = count +1; 138 | videos{count} = filename; 139 | end -------------------------------------------------------------------------------- /matlab-online-display/utils/createdires.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | 9 | 10 | function createdires(basedirs,actions) 11 | for s = 1: length(basedirs) 12 | savename = basedirs{s}; 13 | for action = actions 14 | saveNameaction = [savename,action{1}]; 15 | if ~isdir(saveNameaction) 16 | mkdir(saveNameaction); 17 | end 18 | end 19 | end 20 | end -------------------------------------------------------------------------------- /matlab-online-display/utils/initDatasetOpts.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | 9 | function opts = initDatasetOpts(data_root,baseDir,dataset,imgType,model_type,listid,iteration_num,iouthresh,costtype,gap) 10 | 11 | opts = struct(); 12 | opts.imgType = imgType; 13 | opts.costtype = costtype; 14 | opts.gap = gap; 15 | opts.baseDir = baseDir; 16 | opts.imgType = imgType; 17 | opts.dataset = dataset; 18 | opts.iouThresh = iouthresh; 19 | opts.weight = iteration_num; 20 | opts.listid = listid; 21 | 22 | testlist = ['display',listid]; 23 | % opts.vidList = sprintf('%s/%s/splitfiles/%s.txt',data_root,dataset,testlist); 24 | opts.vidList = sprintf('%s.txt',testlist); 25 | 26 | if strcmp(dataset,'ucf24') 27 | opts.actions = {'Basketball','BasketballDunk','Biking','CliffDiving','CricketBowling',... 28 | 'Diving','Fencing','FloorGymnastics','GolfSwing','HorseRiding','IceDancing',... 29 | 'LongJump','PoleVault','RopeClimbing','SalsaSpin','SkateBoarding','Skiing',... 30 | 'Skijet','SoccerJuggling','Surfing','TennisSwing','TrampolineJumping',... 31 | 'VolleyballSpiking','WalkingWithDog'}; 32 | elseif strcmp(dataset,'JHMDB') 33 | opts.actions = {'brush_hair','catch','clap','climb_stairs','golf','jump',... 34 | 'kick_ball','pick','pour','pullup','push','run','shoot_ball','shoot_bow',... 35 | 'shoot_gun','sit','stand','swing_baseball','throw','walk','wave'}; 36 | elseif strcmp(dataset,'LIRIS') 37 | opts.actions = {'discussion', 'give_object_to_person','put_take_obj_into_from_box_desk',... 38 | 'enter_leave_room_no_unlocking','try_enter_room_unsuccessfully','unlock_enter_leave_room',... 39 | 'leave_baggage_unattended','handshaking','typing_on_keyboard','telephone_conversation'}; 40 | end 41 | 42 | opts.imgDir = sprintf('%s/%s/%s-images/',data_root,dataset,imgType); 43 | 44 | opts.detDir = sprintf('%s/%s/detections/%s-%s-%s-%06d/',baseDir,dataset,model_type,imgType,listid,iteration_num); 45 | opts.annotFile = sprintf('%s/%s/splitfiles/annots.mat',data_root,dataset); 46 | 47 | opts.actPathDir = sprintf('%s/%s/actionPaths/%s-%s-%s-%06d-%s-%d-%04d/',baseDir,dataset,model_type,imgType,listid,iteration_num,costtype,gap,iouthresh*100); 48 | opts.tubeDir = sprintf('%s/%s/actionTubes/%s-%s-%s-%06d-%s-%d-%04d/',baseDir,dataset,model_type,imgType,listid,iteration_num,costtype,gap,iouthresh*100); 49 | 50 | if exist(opts.detDir,'dir') 51 | if ~isdir(opts.actPathDir) 52 | fprintf('Creating %s\n',opts.actPathDir); 53 | mkdir(opts.actPathDir) 54 | end 55 | if ~isdir(opts.tubeDir) 56 | mkdir(opts.tubeDir) 57 | end 58 | if strcmp(dataset,'ucf24') || strcmp(dataset,'JHMDB') 59 | createdires({opts.actPathDir},opts.actions) 60 | end 61 | end 62 | -------------------------------------------------------------------------------- /matlab-online-display/utils/initDatasetOptsFused.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | 9 | function opts = initDatasetOptsFused(data_root,baseDir,dataset,imtypes,model_type, ... 10 | listid,iteration_nums,iouthresh,costtype,gap,fusiontype,fuseiouth) 11 | %% data_root,baseDir,dataset,imgType,model_type,listid,iteration_num,iouthresh,costtype,gap 12 | 13 | opts = struct(); 14 | imgType = [imtypes{1},'-',imtypes{2}]; 15 | opts.imgType = imgType; 16 | opts.costtype = costtype; 17 | opts.gap = gap; 18 | opts.baseDir = baseDir; 19 | opts.imgType = imgType; 20 | opts.dataset = dataset; 21 | opts.iouThresh = iouthresh; 22 | opts.iteration_nums = iteration_nums; 23 | opts.listid = listid; 24 | opts.fusiontype = fusiontype; 25 | opts.fuseiouth = fuseiouth; 26 | testlist = ['display',listid]; 27 | opts.data_root = data_root; 28 | % opts.vidList = sprintf('%s/%s/splitfiles/%s.txt',data_root,dataset,testlist); 29 | opts.vidList = sprintf('%s.txt',testlist); 30 | 31 | if strcmp(dataset,'ucf24') 32 | opts.actions = {'Basketball','BasketballDunk','Biking','CliffDiving','CricketBowling',... 33 | 'Diving','Fencing','FloorGymnastics','GolfSwing','HorseRiding','IceDancing',... 34 | 'LongJump','PoleVault','RopeClimbing','SalsaSpin','SkateBoarding','Skiing',... 35 | 'Skijet','SoccerJuggling','Surfing','TennisSwing','TrampolineJumping',... 36 | 'VolleyballSpiking','WalkingWithDog'}; 37 | elseif strcmp(dataset,'JHMDB') 38 | opts.actions = {'brush_hair','catch','clap','climb_stairs','golf','jump',... 39 | 'kick_ball','pick','pour','pullup','push','run','shoot_ball','shoot_bow',... 40 | 'shoot_gun','sit','stand','swing_baseball','throw','walk','wave'}; 41 | elseif strcmp(dataset,'LIRIS') 42 | opts.actions = {'discussion', 'give_object_to_person','put_take_obj_into_from_box_desk',... 43 | 'enter_leave_room_no_unlocking','try_enter_room_unsuccessfully','unlock_enter_leave_room',... 44 | 'leave_baggage_unattended','handshaking','typing_on_keyboard','telephone_conversation'}; 45 | end 46 | 47 | opts.imgDir = sprintf('%s/%s/%s-images/',data_root,dataset,imtypes{1}); 48 | 49 | opts.basedetDir = sprintf('%s/%s/detections/%s-%s-%s-%06d/',baseDir,dataset,model_type,imtypes{1},listid,iteration_nums(1)); 50 | opts.topdetDir = sprintf('%s/%s/detections/%s-%s-%s-%06d/',baseDir,dataset,model_type,imtypes{2},listid,iteration_nums(2)); 51 | 52 | opts.annotFile = sprintf('%s/%s/splitfiles/annots.mat',data_root,dataset); 53 | 54 | opts.actPathDir = sprintf('%s/%s/actionPaths/%s/%s-%s-%s-%s-%d-%d-%s-%d-%04d-fiou%03d/',baseDir,dataset,fusiontype,model_type,imtypes{1},imtypes{2},... 55 | listid,iteration_nums(1),iteration_nums(2),costtype,gap,iouthresh*100,uint16(fuseiouth*100)); 56 | opts.tubeDir = sprintf('%s/%s/actionTubes/%s/%s-%s-%s-%s-%d-%d-%s-%d-%04d-fiou%03d/',baseDir,dataset,fusiontype,model_type,imtypes{1},imtypes{2},... 57 | listid,iteration_nums(1),iteration_nums(2),costtype,gap,iouthresh*100,uint16(fuseiouth*100)); 58 | 59 | if exist(opts.basedetDir,'dir') 60 | if ~isdir(opts.actPathDir) 61 | fprintf('Creating %s\n',opts.actPathDir); 62 | mkdir(opts.actPathDir) 63 | end 64 | 65 | if ~isdir(opts.tubeDir) 66 | mkdir(opts.tubeDir) 67 | end 68 | 69 | if strcmp(dataset,'ucf24') || strcmp(dataset,'JHMDB') 70 | createdires({opts.actPathDir},opts.actions) 71 | end 72 | end 73 | 74 | %fprintf('Video List :: %s\nImage Dir :: %s\nDetection Dir:: %s\nActionpath Dir:: %s\nTube Dir:: %s\n',... 75 | % opts.vidList,opts.imgDir,opts.detDir,opts.actPathDir,opts.tubeDir) 76 | -------------------------------------------------------------------------------- /online-tubes/.gitignore: -------------------------------------------------------------------------------- 1 | *.ods# 2 | *.m~ 3 | *.prototxt~ 4 | *.txt~ 5 | *.xml~ 6 | *.log 7 | *.txt 8 | *.txt~ 9 | *~ 10 | /results 11 | -------------------------------------------------------------------------------- /online-tubes/I01onlineTubes.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | %% This is main script to build tubes and evaluate them %% 9 | 10 | function I01onlineTubes() 11 | 12 | data_root = '/mnt/sun-gamma/datasets'; 13 | save_root = '/mnt/sun-gamma/datasets'; 14 | iteration_nums = [70000,120000,50000,90000]; % you can also evaluate on multiple iterations 15 | 16 | % add subfolder to matlab paths 17 | addpath(genpath('gentube/')); 18 | addpath(genpath('actionpath/')); 19 | addpath(genpath('eval/')); 20 | addpath(genpath('utils/')); 21 | model_type = 'CONV'; 22 | 23 | completeList = {... 24 | {'ucf24','01', {'rgb'}, iteration_nums,{'score'}},... 25 | {'ucf24','01', {'brox'}, iteration_nums,{'score'}}... 26 | {'ucf24','01', {'fastOF'}, iteration_nums,{'score'}}... 27 | }; 28 | 29 | alldopts = cell(2,1); 30 | count = 1; 31 | gap=3; 32 | 33 | for setind = 1 %:length(completeList) 34 | [dataset, listid, imtypes, iteration_nums, costTypes] = enumurateList(completeList{setind}); 35 | for ct = 1:length(costTypes) 36 | costtype = costTypes{ct}; 37 | for imtind = 1:length(imtypes) 38 | imgType = imtypes{imtind}; 39 | for iteration = iteration_nums 40 | for iouthresh=0.1 41 | %% generate directory sturcture based on the options 42 | dopts = initDatasetOpts(data_root,save_root,dataset,imgType,model_type,listid,iteration,iouthresh,costtype, gap); 43 | if exist(dopts.detDir,'dir') 44 | alldopts{count} = dopts; 45 | count = count+1; 46 | end 47 | end 48 | end 49 | end 50 | end 51 | end 52 | 53 | results = cell(2,1); 54 | 55 | %% For each option type build tubes and evaluate them 56 | for index = 1:count-1 57 | opts = alldopts{index}; 58 | if exist(opts.detDir,'dir') 59 | fprintf('Video List %02d :: %s\nAnnotFile :: %s\nImage Dir :: %s\nDetection Dir:: %s\nActionpath Dir:: %s\nTube Dir:: %s\n',... 60 | index, opts.vidList, opts.annotFile, opts.imgDir, opts.detDir, opts.actPathDir, opts.tubeDir); 61 | %% Build action paths given frame level detections 62 | actionPaths(opts); 63 | %% Perform temproal labelling and evaluate; results saved in results cell 64 | result_cell = gettubes(opts); 65 | results{index,1} = result_cell; 66 | results{index,2} = opts; 67 | rm = result_cell{1}; 68 | rm = rm(rm(:,2) == 5,:); 69 | fprintf('\nmAP@0.2:%0.4f mAP@0.5:%0.4f mAP@0.75:%0.4f AVGmAP:%0.4f clsAcc:%0.4f\n\n',... 70 | rm(1,5),rm(2,5),rm(7,5),mean(rm(2:end,5)),rm(1,6)); 71 | end 72 | end 73 | 74 | %% save results 75 | save_dir = [save_root,'/results/']; 76 | if ~isdir(save_dir) 77 | mkdir(save_dir) 78 | end 79 | save_dir 80 | save([save_dir,'online_tubes_results_CONV.mat'],'results') 81 | 82 | %% Function to enumrate options 83 | function [dataset,listnum,imtypes,weights,costTypes] = enumurateList(sublist) 84 | dataset = sublist{1}; listnum = sublist{2}; imtypes = sublist{3}; 85 | weights = sublist{4};costTypes = sublist{5}; 86 | 87 | %% Facade function for smoothing tubes and evaluating them 88 | function results = gettubes(dopts) 89 | 90 | numActions = length(dopts.actions); 91 | results = zeros(300,6); 92 | counter=1; 93 | class_aps = cell(2,1); 94 | % save file name to save result for eah option type 95 | saveName = sprintf('%stubes-results.mat',dopts.tubeDir); 96 | if ~exist(saveName,'file') 97 | 98 | annot = load(dopts.annotFile); 99 | annot = annot.annot; 100 | testvideos = getVideoNames(dopts.vidList); 101 | actionpaths = readALLactionPaths(dopts.vidList,dopts.actPathDir,1); 102 | for alpha = [3, 5] 103 | fprintf('alpha %03d ',alpha); 104 | tubesSaveName = sprintf('%stubes-alpha%04d.mat',dopts.tubeDir,uint16(alpha*100)); 105 | if ~exist(tubesSaveName,'file') 106 | % read action paths 107 | %% perform temporal trimming 108 | smoothedtubes = PARactionPathSmoother(actionpaths,alpha*ones(numActions,1),numActions); 109 | save(tubesSaveName,'smoothedtubes','-v7.3'); 110 | else 111 | load(tubesSaveName) 112 | end 113 | 114 | min_num_frames = 8; kthresh = 0.0; topk = 40; 115 | xmldata = convert2eval(smoothedtubes, min_num_frames, kthresh*ones(numActions,1), topk,testvideos); 116 | 117 | %% Do the evaluation 118 | for iou_th =[0.2,[0.5:0.05:0.95]] 119 | [tmAP,tmIoU,tacc,AP] = get_PR_curve(annot, xmldata, testvideos, dopts.actions, iou_th); 120 | % pritn outs iou_threshold, meanAp, sm, classifcation accuracy 121 | fprintf('%.2f %0.3f %0.3f N ',iou_th,tmAP, tacc); 122 | results(counter,:) = [iou_th,alpha,alpha,tmIoU,tmAP,tacc]; 123 | class_aps{counter} = AP; 124 | counter = counter+1; 125 | end 126 | fprintf('\n'); 127 | end 128 | 129 | 130 | 131 | results(counter:end,:) = []; 132 | result = cell(2,1); 133 | result{2} = class_aps; 134 | result{1} = results; 135 | results = result; 136 | fprintf('results saved in %s\n',saveName); 137 | save(saveName,'results'); 138 | else 139 | load(saveName) 140 | end 141 | 142 | function videos = getVideoNames(split_file) 143 | % ------------------------------------------------------------------------- 144 | fid = fopen(split_file,'r'); 145 | data = textscan(fid, '%s'); 146 | videos = cell(1); 147 | count = 0; 148 | for i=1:length(data{1}) 149 | filename = cell2mat(data{1}(i,1)); 150 | count = count +1; 151 | videos{count} = filename; 152 | end 153 | -------------------------------------------------------------------------------- /online-tubes/I02genFusedTubes.m: -------------------------------------------------------------------------------- 1 | 2 | function I02genFusedTubes() 3 | 4 | data_root = '/mnt/mars-fast/datasets'; 5 | save_root = '/mnt/mars-gamma/datasets'; 6 | 7 | addpath(genpath('actionpath/')); 8 | addpath(genpath('gentube/')); 9 | addpath(genpath('eval/')); 10 | addpath(genpath('utils/')); 11 | 12 | completeList = {... 13 | {'ucf24','01',{'rgb','brox'},[120000,120000],{'nwsum-plus','cat','mean'}, 0.25},... 14 | {'ucf24','01',{'rgb','fastOF'},[120000,120000],{'nwsum-plus','cat','mean'}, 0.25},... 15 | }; 16 | 17 | model_type = 'CONV'; 18 | costtype = 'score'; 19 | iouthresh = 0.1; 20 | gap = 3; 21 | alldopts = cell(2,1); 22 | count = 0; 23 | for setind = 1:length(completeList) 24 | [dataset,listid,imtypes,iteration_nums,fusiontypes,fuseiouths] = enumurateList(completeList{setind}); 25 | for ff =1:length(fusiontypes) 26 | fusiontype = fusiontypes{ff}; 27 | if strcmp(fusiontype,'cat') || strcmp(fusiontype,'mean') 28 | tempfuseiouths = 0; 29 | else 30 | tempfuseiouths = fuseiouths; 31 | end 32 | for fuseiouth = tempfuseiouths 33 | for iouWeight = 1 34 | dopts = initDatasetOptsFused(data_root,save_root,dataset,imtypes,model_type, ... 35 | listid,iteration_nums,iouthresh,costtype,gap,fusiontype,fuseiouth); 36 | if exist(dopts.basedetDir,'dir') && exist(dopts.topdetDir,'dir') 37 | count = count+1; 38 | alldopts{count} = dopts; 39 | end 40 | end 41 | end 42 | end 43 | end 44 | 45 | fprintf('\n\n\n\n Count is %d \n\n\n\n',count) 46 | 47 | results = cell(2,1); 48 | 49 | for index = 1:count 50 | opts = alldopts{index}; 51 | if exist(opts.basedetDir,'dir') && exist(opts.topdetDir,'dir') 52 | fprintf('Video List :: %s\n \nDetection basedetDir:: %s\nActionpath Dir:: %s\nTube Dir:: %s\n',... 53 | opts.vidList,opts.basedetDir,opts.actPathDir,opts.tubeDir); 54 | 55 | %% Build action paths given frame level detections 56 | fusedActionPaths(opts); 57 | %% Perform temproal labelling and evaluate; results saved in results cell 58 | result_cell = gettubes(opts); 59 | results{index,1} = result_cell; 60 | results{index,2} = opts; 61 | rm = result_cell{1}; 62 | rm = rm(rm(:,2) == 5,:); 63 | fprintf('\nmAP@0.2:%0.4f mAP@0.5:%0.4f mAP@0.75:%0.4f AVGmAP:%0.4f clsAcc:%0.4f\n\n',... 64 | rm(1,5),rm(2,5),rm(7,5),mean(rm(2:end,5)),rm(1,6)); 65 | end 66 | end 67 | 68 | 69 | %% save results 70 | save_dir = [save_root,'/results/']; 71 | if ~isdir(save_dir) 72 | mkdir(save_dir) 73 | end 74 | 75 | save([save_dir,'online_fused_tubes_results.mat'],'results') 76 | 77 | 78 | function [dataset,listnum,imtypes,weights,fusiontypes,fuseiouths] = enumurateList(sublist) 79 | 80 | dataset = sublist{1}; listnum = sublist{2}; imtypes = sublist{3}; 81 | weights = sublist{4}; 82 | fusiontypes = sublist{5}; 83 | fuseiouths = sublist{6}; 84 | 85 | %% Facade function for smoothing tubes and evaluating them 86 | function results = gettubes(dopts) 87 | 88 | numActions = length(dopts.actions); 89 | results = zeros(400,6); 90 | counter=1; 91 | class_aps = cell(2,1); 92 | % save file name to save result for eah option type 93 | saveName = sprintf('%stubes-results.mat',dopts.tubeDir); 94 | if ~exist(saveName,'file') 95 | 96 | annot = load(dopts.annotFile); 97 | annot = annot.annot; 98 | testvideos = getVideoNames(dopts.vidList); 99 | for alpha = [3,5] 100 | fprintf('alpha %03d ',alpha); 101 | tubesSaveName = sprintf('%stubes-alpha%04d.mat',dopts.tubeDir,uint16(alpha*100)); 102 | if ~exist(tubesSaveName,'file') 103 | % read action paths 104 | actionpaths = readALLactionPaths(dopts.vidList,dopts.actPathDir,1); 105 | %% perform temporal trimming 106 | smoothedtubes = PARactionPathSmoother(actionpaths,alpha*ones(numActions,1),numActions); 107 | save(tubesSaveName,'smoothedtubes','-v7.3'); 108 | else 109 | load(tubesSaveName) 110 | end 111 | 112 | min_num_frames = 8; kthresh = 0.0; topk = 40; 113 | % strip off uncessary parts and remove very small actions less than 114 | % 8 frames; not really necessary but for speed at eval time 115 | xmldata = convert2eval(smoothedtubes, min_num_frames, kthresh*ones(numActions,1), topk,testvideos); 116 | 117 | %% Do the evaluation 118 | for iou_th =[0.2,[0.5:0.05:0.95]] 119 | [tmAP,tmIoU,tacc,AP] = get_PR_curve(annot, xmldata, testvideos, dopts.actions, iou_th); 120 | % pritn outs iou_threshold, meanAp, sm, classifcation accuracy 121 | fprintf('%.2f %0.3f %0.3f N ',iou_th,tmAP, tacc); 122 | results(counter,:) = [iou_th,alpha,alpha,tmIoU,tmAP,tacc]; 123 | class_aps{counter} = AP; 124 | counter = counter+1; 125 | end 126 | fprintf('\n'); 127 | end 128 | 129 | results(counter:end,:) = []; 130 | result = cell(2,1); 131 | result{2} = class_aps; 132 | result{1} = results; 133 | results = result; 134 | fprintf('results saved in %s\n',saveName); 135 | save(saveName,'results'); 136 | else 137 | load(saveName) 138 | end 139 | 140 | function videos = getVideoNames(split_file) 141 | % ------------------------------------------------------------------------- 142 | fid = fopen(split_file,'r'); 143 | data = textscan(fid, '%s'); 144 | videos = cell(1); 145 | count = 0; 146 | for i=1:length(data{1}) 147 | filename = cell2mat(data{1}(i,1)); 148 | count = count +1; 149 | videos{count} = filename; 150 | end 151 | -------------------------------------------------------------------------------- /online-tubes/actionpath/actionPaths.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | function actionPaths(dopts) 3 | % --------------------------------------------------------- 4 | % Copyright (c) 2017, Gurkirt Singh 5 | % This code and is available 6 | % under the terms of MID License provided in LICENSE. 7 | % Please retain this notice and LICENSE if you use 8 | % this file (or any portion of it) in your project. 9 | % --------------------------------------------------------- 10 | 11 | detresultpath = dopts.detDir; 12 | costtype = dopts.costtype; 13 | gap = dopts.gap; 14 | videolist = dopts.vidList; 15 | actions = dopts.actions; 16 | saveName = dopts.actPathDir; 17 | iouth = dopts.iouThresh; 18 | numActions = length(actions); 19 | nms_thresh = 0.45; 20 | videos = getVideoNames(videolist); 21 | NumVideos = length(videos); 22 | 23 | for vid=1:NumVideos 24 | tic; 25 | videoID = videos{vid}; 26 | pathsSaveName = [saveName,videoID,'-actionpaths.mat']; 27 | 28 | videoDetDir = [detresultpath,videoID,'/']; 29 | 30 | if ~exist(pathsSaveName,'file') 31 | fprintf('computing tubes for vide [%d out of %d] video ID = %s\n',vid,NumVideos, videoID); 32 | 33 | %% loop over all the frames of the video 34 | fprintf('Reading detections '); 35 | 36 | frames = readDetections(videoDetDir); 37 | 38 | fprintf('\nDone reading detections\n'); 39 | 40 | fprintf('Gernrating action paths ...........\n'); 41 | 42 | %% parllel loop over all action class and genrate paths for each class 43 | allpaths = cell(1); 44 | parfor a=1:numActions 45 | allpaths{a} = genActionPaths(frames, a, nms_thresh, iouth, costtype,gap); 46 | end 47 | 48 | fprintf('results are being saved in::: %s for %d classes\n',pathsSaveName,length(allpaths)); 49 | save(pathsSaveName,'allpaths'); 50 | fprintf('All Done in %03d Seconds\n',round(toc)); 51 | end 52 | 53 | end 54 | 55 | disp('done computing action paths'); 56 | 57 | end 58 | 59 | function paths = genActionPaths(frames,a,nms_thresh,iouth,costtype,gap) 60 | action_frames = struct(); 61 | 62 | for f=1:length(frames) 63 | [boxes,scores,allscores] = dofilter(frames,a,f,nms_thresh); 64 | action_frames(f).boxes = boxes; 65 | action_frames(f).scores = scores; 66 | action_frames(f).allScores = allscores; 67 | end 68 | 69 | paths = incremental_linking(action_frames,iouth,costtype, gap, gap); 70 | 71 | end 72 | 73 | %-- filter out least likkey detections for actions --- 74 | function [boxes,scores,allscores] = dofilter(frames, a, f, nms_thresh) 75 | scores = frames(f).scores(:,a); 76 | pick = scores>0.001; 77 | scores = scores(pick); 78 | boxes = frames(f).boxes(pick,:); 79 | allscores = frames(f).scores(pick,:); 80 | [~,pick] = sort(scores,'descend'); 81 | to_pick = min(50,size(pick,1)); 82 | pick = pick(1:to_pick); 83 | scores = scores(pick); 84 | boxes = boxes(pick,:); 85 | allscores = allscores(pick,:); 86 | pick = nms([boxes scores], nms_thresh); 87 | pick = pick(1:min(10,length(pick))); 88 | boxes = boxes(pick,:); 89 | scores = scores(pick); 90 | allscores = allscores(pick,:); 91 | end 92 | 93 | %-- list the files in directory and sort them ---------- 94 | function list = sortdirlist(dirname) 95 | list = dir(dirname); 96 | list = sort({list.name}); 97 | end 98 | 99 | % ------------------------------------------------------------------------- 100 | function [videos] = getVideoNames(split_file) 101 | % ------------------------------------------------------------------------- 102 | fprintf('Get both lis is %s\n',split_file); 103 | fid = fopen(split_file,'r'); 104 | data = textscan(fid, '%s'); 105 | videos = cell(1); 106 | count = 0; 107 | 108 | for i=1:length(data{1}) 109 | filename = cell2mat(data{1}(i,1)); 110 | count = count +1; 111 | videos{count} = filename; 112 | % videos(i).vid = str2num(cell2mat(data{1}(i,1))); 113 | end 114 | end 115 | 116 | 117 | function frames = readDetections(detectionDir) 118 | 119 | detectionList = sortdirlist([detectionDir,'*.mat']); 120 | frames = struct([]); 121 | numframes = length(detectionList); 122 | scores = 0; 123 | loc = 0; 124 | for f = 1 : numframes 125 | filename = [detectionDir,detectionList{f}]; 126 | load(filename); % loads loc and scores variable 127 | loc = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240]; 128 | loc(loc(:,1)<0,1) = 0; 129 | loc(loc(:,2)<0,2) = 0; 130 | loc(loc(:,3)>319,3) = 319; 131 | loc(loc(:,4)>239,4) = 239; 132 | loc = loc + 1; 133 | frames(f).boxes = loc; 134 | frames(f).scores = [scores(:,2:end),scores(:,1)]; 135 | end 136 | 137 | end 138 | -------------------------------------------------------------------------------- /online-tubes/actionpath/fusedActionPaths.m: -------------------------------------------------------------------------------- 1 | function fusedActionPaths(dopts) 2 | % AUTORIGHTS 3 | % --------------------------------------------------------- 4 | % Copyright (c) 2016, Gurkirt Singh 5 | % 6 | % This code and is available 7 | % under the terms of the Simplified BSD License provided in 8 | % LICENSE. Please retain this notice and LICENSE if you use 9 | % this file (or any portion of it) in your project. 10 | % --------------------------------------------------------- 11 | 12 | detresultpathBase = dopts.basedetDir; 13 | detresultpathTop = dopts.topdetDir; 14 | videolist = dopts.vidList; 15 | actions = dopts.actions; 16 | saveName = dopts.actPathDir; 17 | iouth = dopts.iouThresh; 18 | numActions = length(actions); 19 | costtype = dopts.costtype; 20 | gap = dopts.gap; 21 | nms_thresh = 0.45; 22 | videos = getVideoNames(videolist); 23 | 24 | NumVideos = length(videos); 25 | timimngs = zeros(NumVideos,1); 26 | 27 | for vid=1:NumVideos 28 | tt = tic; 29 | videoID = videos{vid}; 30 | pathsSaveName = [saveName,videoID,'-actionpaths.mat']; 31 | videoDetDirBase = [detresultpathBase,videoID,'/']; 32 | videoTopDirBase = [detresultpathTop,videoID,'/']; 33 | if ~exist(pathsSaveName,'file') 34 | fprintf('computing tubes for vide [%d out of %d] video ID = %s\n',vid,NumVideos, videoID); 35 | 36 | fprintf('Reading detection files searlially '); 37 | frames = readDetections(videoDetDirBase,videoTopDirBase); 38 | fprintf('\nDone reading detection files \n'); 39 | fprintf('Gernrating action paths ...........\n'); 40 | 41 | %% parllel loop over all action class and genrate paths for each class 42 | thpath = tic; 43 | allpaths = cell(1); 44 | for a=1:numActions 45 | allpaths{a} = genActionPaths(frames,a,nms_thresh,dopts.fuseiouth,dopts.fusiontype,iouth,costtype,gap); 46 | end 47 | timimngs(vid) = toc(thpath); 48 | %% 49 | fprintf('Completed linking \n'); 50 | fprintf('results are being saved in::: %s\n',pathsSaveName); 51 | save(pathsSaveName,'allpaths'); 52 | fprintf('All Done in %03d Seconds\n',round(toc(tt))); 53 | end 54 | end 55 | 56 | % save('ucf101timing.mat','numfs','timimngs') 57 | disp('done computing action paths'); 58 | end 59 | 60 | % --------------------------------------------------------- 61 | % function to gather the detection box and nms them and pass it to linking script 62 | function paths = genActionPaths(frames,a,nms_thresh,fuseiouth,fusiontype,iouth,costtype,gap) 63 | % --------------------------------------------------------- 64 | action_frames = struct(); 65 | for f=1:length(frames) 66 | 67 | baseBoxes = frames(f).baseBoxes; 68 | baseAllScores = frames(f).baseScores; 69 | topBoxes = frames(f).topBoxes; 70 | topAllScores = frames(f).topScores; 71 | meanScores = frames(f).meanScores; 72 | [boxes, allscores] = fuseboxes(baseBoxes,topBoxes,baseAllScores,topAllScores,meanScores,fuseiouth,fusiontype,a,nms_thresh); 73 | 74 | action_frames(f).allScores = allscores; 75 | action_frames(f).boxes = boxes(:,1:4); 76 | action_frames(f).scores = boxes(:,5); 77 | end 78 | 79 | paths = incremental_linking(action_frames,iouth,costtype,gap, gap); 80 | end 81 | 82 | % --------------------------------------------------------- 83 | function [boxes,allscores] = fuseboxes(baseBoxes,topBoxes,baseAllScores,topAllScores,meanScores,fuseiouth,fusiontype,a,nms_thresh) 84 | % --------------------------------------------------------- 85 | 86 | if strcmp(fusiontype,'mean') 87 | [boxes,allscores] = dofilter(baseBoxes,meanScores,a,nms_thresh); 88 | elseif strcmp(fusiontype,'nwsum-plus') 89 | [baseBoxes,baseAllScores] = dofilter(baseBoxes,baseAllScores,a,nms_thresh); 90 | [topBoxes,topAllScores] = dofilter(topBoxes,topAllScores,a,nms_thresh); 91 | [boxes,allscores] = boost_fusion(baseBoxes,topBoxes,baseAllScores,topAllScores,fuseiouth,a); 92 | pick = nms(boxes,nms_thresh); 93 | boxes = boxes(pick(1:min(10,length(pick))),:); 94 | allscores = allscores(pick(1:min(10,length(pick))),:); 95 | 96 | else %% fusion type is cat // union-set fusion 97 | [baseBoxes,baseAllScores] = dofilter(baseBoxes,baseAllScores,a,nms_thresh); 98 | [topBoxes,topAllScores] = dofilter(topBoxes,topAllScores,a,nms_thresh); 99 | boxes = [baseBoxes;topBoxes]; 100 | allscores = [baseAllScores;topAllScores]; 101 | pick = nms(boxes,nms_thresh); 102 | boxes = boxes(pick(1:min(10,length(pick))),:); 103 | allscores = allscores(pick(1:min(10,length(pick))),:); 104 | end 105 | 106 | end 107 | 108 | 109 | function [boxes,allscores] = dofilter(boxes, allscores,a,nms_thresh) 110 | scores = allscores(:,a); 111 | pick = scores>0.001; 112 | scores = scores(pick); 113 | boxes = boxes(pick,:); 114 | allscores = allscores(pick,:); 115 | [~,pick] = sort(scores,'descend'); 116 | to_pick = min(50,size(pick,1)); 117 | pick = pick(1:to_pick); 118 | scores = scores(pick); 119 | boxes = boxes(pick,:); 120 | allscores = allscores(pick,:); 121 | pick = nms([boxes scores], nms_thresh); 122 | pick = pick(1:min(10,length(pick))); 123 | boxes = [boxes(pick,:),scores(pick,:)]; 124 | allscores = allscores(pick,:); 125 | end 126 | 127 | % --------------------------------------------------------- 128 | function [sb,ss] = boost_fusion(sb, fb,ss,fs,fuseiouth,a) % bs - boxes_spatial bf-boxes_flow 129 | % --------------------------------------------------------- 130 | 131 | nb = size(sb,1); % num boxes 132 | box_spatial = [sb(:,1:2) sb(:,3:4)-sb(:,1:2)+1]; 133 | box_flow = [fb(:,1:2) fb(:,3:4)-fb(:,1:2)+1]; 134 | coveredboxes = []; 135 | 136 | for i=1:nb 137 | ovlp = inters_union(box_spatial(i,:), box_flow); % ovlp has 1x5 or 5x1 dim 138 | if ~isempty(ovlp) 139 | [movlp, maxind] = max(ovlp); 140 | 141 | if movlp>=fuseiouth && isempty(ismember(coveredboxes,maxind)) 142 | ms = ss(i,:) + fs(maxind,:)*movlp; 143 | ms = ms/sum(ms); 144 | sb(i,5) = ms(a); 145 | ss(i,:) = ms; 146 | coveredboxes = [coveredboxes;maxind]; 147 | end 148 | end 149 | end 150 | 151 | nb = size(fb,1); 152 | 153 | for i=1:nb 154 | if ~ismember(coveredboxes,i) 155 | sb = [sb;fb(i,:)]; 156 | ss = [ss;fs(i,:)]; 157 | end 158 | end 159 | end 160 | 161 | 162 | function iou = inters_union(bounds1,bounds2) 163 | % ------------------------------------------------------------------------ 164 | inters = rectint(bounds1,bounds2); 165 | ar1 = bounds1(:,3).*bounds1(:,4); 166 | ar2 = bounds2(:,3).*bounds2(:,4); 167 | union = bsxfun(@plus,ar1,ar2')-inters; 168 | iou = inters./(union+0.001); 169 | end 170 | 171 | % ------------------------------------------------------------------------- 172 | function list = sortdirlist(dirname) 173 | list = dir(dirname); 174 | list = sort({list.name}); 175 | end 176 | 177 | % ------------------------------------------------------------------------- 178 | function [videos] = getVideoNames(split_file) 179 | % ------------------------------------------------------------------------- 180 | fprintf('Get both lis %s\n',split_file); 181 | fid = fopen(split_file,'r'); 182 | data = textscan(fid, '%s'); 183 | videos = cell(1); 184 | count = 0; 185 | 186 | for i=1:length(data{1}) 187 | filename = cell2mat(data{1}(i,1)); 188 | count = count +1; 189 | videos{count} = filename; 190 | % videos(i).vid = str2num(cell2mat(data{1}(i,1))); 191 | end 192 | 193 | end 194 | 195 | function frames = readDetections(detectionDir,top_detectionDir ) 196 | 197 | detectionList = sortdirlist([detectionDir,'*.mat']); 198 | frames = struct([]); 199 | numframes = length(detectionList); 200 | scores = 0; 201 | loc = 0; 202 | for f = 1 : numframes 203 | filename = [detectionDir,detectionList{f}]; 204 | load(filename); % load loc and scores variable 205 | loc = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240]; 206 | loc(loc(:,1)<0,1) = 0; 207 | loc(loc(:,2)<0,2) = 0; 208 | loc(loc(:,3)>319,3) = 319; 209 | loc(loc(:,4)>239,4) = 239; 210 | loc = loc + 1; 211 | frames(f).baseBoxes = loc; 212 | frames(f).baseScores = [scores(:,2:end),scores(:,1)]; 213 | 214 | filename = [top_detectionDir,detectionList{f}]; 215 | load(filename); % load loc and scores variable 216 | loc = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240]; 217 | loc(loc(:,1)<0,1) = 0; 218 | loc(loc(:,2)<0,2) = 0; 219 | loc(loc(:,3)>319,3) = 319; 220 | loc(loc(:,4)>239,4) = 239; 221 | loc = loc + 1; 222 | frames(f).topBoxes = loc; 223 | frames(f).topScores = [scores(:,2:end),scores(:,1)]; 224 | frames(f).meanScores = (frames(f).topScores + frames(f).baseScores)/2.0; 225 | end 226 | 227 | end 228 | 229 | 230 | -------------------------------------------------------------------------------- /online-tubes/actionpath/incremental_linking.m: -------------------------------------------------------------------------------- 1 | % ------------------------------------------------------------------------- 2 | function live_paths = incremental_linking(frames,iouth,costtype,jumpgap,threhgap) 3 | % ------------------------------------------------------------------------- 4 | 5 | num_frames = length(frames); 6 | 7 | %% online path building 8 | 9 | live_paths = struct(); %% Stores live paths 10 | dead_paths = struct(); %% Store the paths that has been terminated 11 | dp_count = 0; 12 | for t = 1:num_frames 13 | num_box = size(frames(t).boxes,1); 14 | if t==1 15 | for b = 1 : num_box 16 | live_paths(b).boxes = frames(t).boxes(b,:); 17 | live_paths(b).scores = frames(t).scores(b); 18 | live_paths(b).allScores(t,:) = frames(t).allScores(b,:); 19 | live_paths(b).pathScore = frames(t).scores(b); 20 | live_paths(b).foundAT(t) = 1; 21 | live_paths(b).count = 1; 22 | live_paths(b).lastfound = 0; %less than 5 mean yes 23 | end 24 | else 25 | lp_count = getPathCount(live_paths); 26 | 27 | % fprintf(' %d ', t); 28 | edge_scores = zeros(lp_count,num_box); 29 | 30 | for lp = 1 : lp_count 31 | edge_scores(lp,:) = score_of_edge(live_paths(lp),frames(t),iouth,costtype); 32 | end 33 | 34 | 35 | dead_count = 0 ; 36 | coverd_boxes = zeros(1,num_box); 37 | path_order_score = zeros(1,lp_count); 38 | for lp = 1 : lp_count 39 | if live_paths(lp).lastfound < jumpgap %less than 5 mean yes 40 | box_to_lp_score = edge_scores(lp,:); 41 | if sum(box_to_lp_score)>0 %%checking if atleast there is one match 42 | [m_score,maxInd] = max(box_to_lp_score); 43 | live_paths(lp).count = live_paths(lp).count + 1; 44 | lpc = live_paths(lp).count; 45 | live_paths(lp).boxes(lpc,:) = frames(t).boxes(maxInd,:); 46 | live_paths(lp).scores(lpc) = frames(t).scores(maxInd); 47 | live_paths(lp).allScores(lpc,:) = frames(t).allScores(maxInd,:); 48 | live_paths(lp).pathScore = live_paths(lp).pathScore + m_score; 49 | live_paths(lp).foundAT(lpc) = t; 50 | live_paths(lp).lastfound = 0; 51 | edge_scores(:,maxInd) = 0; 52 | coverd_boxes(maxInd) = 1; 53 | else 54 | live_paths(lp).lastfound = live_paths(lp).lastfound +1; 55 | end 56 | 57 | scores = sort(live_paths(lp).scores,'ascend'); 58 | num_sc = length(scores); 59 | path_order_score(lp) = mean(scores(max(1,num_sc-jumpgap):num_sc)); 60 | 61 | else 62 | dead_count = dead_count + 1; 63 | end 64 | end 65 | 66 | % Sort the path based on scoe of the boxes and terminate dead path 67 | 68 | [live_paths,dead_paths,dp_count] = sort_live_paths(live_paths,.... 69 | path_order_score,dead_paths,dp_count,jumpgap); 70 | lp_count = getPathCount(live_paths); 71 | % start new paths using boxes that are not assigned 72 | if sum(coverd_boxes)0 118 | path_order_score = zeros(1,lp_count); 119 | 120 | for lp = 1 : length(live_paths) 121 | scores = sort(live_paths(lp).scores,'descend'); 122 | num_sc = length(scores); 123 | path_order_score(lp) = mean(scores(1:min(20,num_sc))); 124 | end 125 | 126 | [~,ind] = sort(path_order_score,'descend'); 127 | for lpc = 1 : length(live_paths) 128 | olp = ind(lpc); 129 | sorted_live_paths(lpc).start = live_paths(olp).start; 130 | sorted_live_paths(lpc).end = live_paths(olp).end; 131 | sorted_live_paths(lpc).boxes = live_paths(olp).boxes; 132 | sorted_live_paths(lpc).scores = live_paths(olp).scores; 133 | sorted_live_paths(lpc).allScores = live_paths(olp).allScores; 134 | sorted_live_paths(lpc).pathScore = live_paths(olp).pathScore; 135 | sorted_live_paths(lpc).foundAT = live_paths(olp).foundAT; 136 | sorted_live_paths(lpc).count = live_paths(olp).count; 137 | sorted_live_paths(lpc).lastfound = live_paths(olp).lastfound; 138 | end 139 | end 140 | 141 | % ------------------------------------------------------------------------- 142 | function gap_filled_paths = fill_gaps(paths,gap) 143 | % ------------------------------------------------------------------------- 144 | gap_filled_paths = struct(); 145 | if isfield(paths,'boxes') 146 | g_count = 1; 147 | 148 | for lp = 1 : getPathCount(paths) 149 | if length(paths(lp).foundAT)>gap 150 | gap_filled_paths(g_count).start = paths(lp).foundAT(1); 151 | gap_filled_paths(g_count).end = paths(lp).foundAT(end); 152 | gap_filled_paths(g_count).pathScore = paths(lp).pathScore; 153 | gap_filled_paths(g_count).foundAT = paths(lp).foundAT; 154 | gap_filled_paths(g_count).count = paths(lp).count; 155 | gap_filled_paths(g_count).lastfound = paths(lp).lastfound; 156 | count = 1; 157 | i = 1; 158 | while i <= length(paths(lp).scores) 159 | diff_found = paths(lp).foundAT(i)-paths(lp).foundAT(max(i-1,1)); 160 | if count == 1 || diff_found == 1 161 | gap_filled_paths(g_count).boxes(count,:) = paths(lp).boxes(i,:); 162 | gap_filled_paths(g_count).scores(count) = paths(lp).scores(i); 163 | gap_filled_paths(g_count).allScores(count,:) = paths(lp).allScores(i,:); 164 | i = i + 1; 165 | count = count + 1; 166 | else 167 | for d = 1 : diff_found 168 | gap_filled_paths(g_count).boxes(count,:) = paths(lp).boxes(i,:); 169 | gap_filled_paths(g_count).scores(count) = paths(lp).scores(i); 170 | gap_filled_paths(g_count).allScores(count,:) = paths(lp).allScores(i,:); 171 | count = count + 1; 172 | end 173 | i = i + 1; 174 | end 175 | end 176 | g_count = g_count + 1; 177 | end 178 | end 179 | end 180 | 181 | 182 | % ------------------------------------------------------------------------- 183 | function [sorted_live_paths,dead_paths,dp_count] = sort_live_paths(live_paths,... 184 | path_order_score,dead_paths,dp_count,gap) 185 | % ------------------------------------------------------------------------- 186 | 187 | sorted_live_paths = struct(); 188 | [~,ind] = sort(path_order_score,'descend'); 189 | lpc = 0; 190 | for lp = 1 : getPathCount(live_paths) 191 | olp = ind(lp); 192 | if live_paths(ind(lp)).lastfound < gap 193 | lpc = lpc + 1; 194 | sorted_live_paths(lpc).boxes = live_paths(olp).boxes; 195 | sorted_live_paths(lpc).scores = live_paths(olp).scores; 196 | sorted_live_paths(lpc).allScores = live_paths(olp).allScores; 197 | sorted_live_paths(lpc).pathScore = live_paths(olp).pathScore; 198 | sorted_live_paths(lpc).foundAT = live_paths(olp).foundAT; 199 | sorted_live_paths(lpc).count = live_paths(olp).count; 200 | sorted_live_paths(lpc).lastfound = live_paths(olp).lastfound; 201 | else 202 | dp_count = dp_count + 1; 203 | dead_paths(dp_count).boxes = live_paths(olp).boxes; 204 | dead_paths(dp_count).scores = live_paths(olp).scores; 205 | dead_paths(dp_count).allScores = live_paths(olp).allScores; 206 | dead_paths(dp_count).pathScore = live_paths(olp).pathScore; 207 | dead_paths(dp_count).foundAT = live_paths(olp).foundAT; 208 | dead_paths(dp_count).count = live_paths(olp).count; 209 | dead_paths(dp_count).lastfound = live_paths(olp).lastfound; 210 | 211 | end 212 | end 213 | 214 | 215 | 216 | 217 | % ------------------------------------------------------------------------- 218 | function score = score_of_edge(v1,v2,iouth,costtype) 219 | % ------------------------------------------------------------------------- 220 | 221 | N2 = size(v2.boxes,1); 222 | score = zeros(1,N2); 223 | 224 | % try 225 | bounds1 = [v1.boxes(end,1:2) v1.boxes(end,3:4)-v1.boxes(end,1:2)+1]; 226 | % catch 227 | % fprintf('catch here') 228 | % end 229 | bounds2 = [v2.boxes(:,1:2) v2.boxes(:,3:4)-v2.boxes(:,1:2)+1]; 230 | iou = inters_union(bounds1,bounds2); 231 | 232 | for i = 1 : N2 233 | 234 | if iou(i)>=iouth 235 | 236 | scores2 = v2.scores(i); 237 | scores1 = v1.scores(end); 238 | score_similarity = sqrt(sum((v1.allScores(end,:) - v2.allScores(i,:)).^2)); 239 | if strcmp(costtype, 'score') 240 | score(i) = scores2; 241 | elseif strcmp(costtype, 'scrSim') 242 | score(i) = 1-score_similarity; 243 | elseif strcmp(costtype, 'scrMinusSim') 244 | score(i) = scores2 + (1 - score_similarity); 245 | end 246 | 247 | end 248 | 249 | end 250 | 251 | % ------------------------------------------------------------------------- 252 | function lp_count = getPathCount(live_paths) 253 | % ------------------------------------------------------------------------- 254 | 255 | if isfield(live_paths,'boxes') 256 | lp_count = length(live_paths); 257 | else 258 | lp_count = 0; 259 | end 260 | 261 | % ------------------------------------------------------------------------- 262 | function iou = inters_union(bounds1,bounds2) 263 | % ------------------------------------------------------------------------- 264 | 265 | inters = rectint(bounds1,bounds2); 266 | ar1 = bounds1(:,3).*bounds1(:,4); 267 | ar2 = bounds2(:,3).*bounds2(:,4); 268 | union = bsxfun(@plus,ar1,ar2')-inters; 269 | 270 | iou = inters./(union+eps); 271 | -------------------------------------------------------------------------------- /online-tubes/actionpath/nms.m: -------------------------------------------------------------------------------- 1 | function pick = nms(boxes, overlap) 2 | % Non-maximum suppression. 3 | % pick = nms(boxes, overlap) 4 | % 5 | % Greedily select high-scoring detections and skip detections that are 6 | % significantly covered by a previously selected detection. 7 | % 8 | % Return value 9 | % pick Indices of locally maximal detections 10 | % 11 | % Arguments 12 | % boxes Detection bounding boxes (see pascal_test.m) 13 | % overlap Overlap threshold for suppression 14 | % For a selected box Bi, all boxes Bj that are covered by 15 | % more than overlap are suppressed. Note that 'covered' is 16 | % is |Bi \cap Bj| / |Bj|, not the PASCAL intersection over 17 | % union measure. 18 | 19 | % AUTORIGHTS 20 | % ------------------------------------------------------- 21 | % Copyright (C) 2011-2012 Ross Girshick 22 | % Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick 23 | % Copyright (C) 2007 Pedro Felzenszwalb, Deva Ramanan 24 | % 25 | % This file is part of the voc-releaseX code 26 | % (http://people.cs.uchicago.edu/~rbg/latent/) 27 | % and is available under the terms of an MIT-like license 28 | % provided in COPYING. Please retain this notice and 29 | % COPYING if you use this file (or a portion of it) in 30 | % your project. 31 | % ------------------------------------------------------- 32 | 33 | if isempty(boxes) 34 | pick = []; 35 | else 36 | x1 = boxes(:,1); 37 | y1 = boxes(:,2); 38 | x2 = boxes(:,3); 39 | y2 = boxes(:,4); 40 | s = boxes(:,end); 41 | area = (x2-x1) .* (y2-y1); 42 | %area = (x2-x1+1) .* (y2-y1+1); 43 | 44 | [vals, I] = sort(s); 45 | pick = []; 46 | while ~isempty(I) 47 | last = length(I); 48 | i = I(last); 49 | pick = [pick; i]; 50 | suppress = [last]; 51 | for pos = 1:last-1 52 | j = I(pos); 53 | xx1 = max(x1(i), x1(j)); 54 | yy1 = max(y1(i), y1(j)); 55 | xx2 = min(x2(i), x2(j)); 56 | yy2 = min(y2(i), y2(j)); 57 | w = xx2-xx1; 58 | h = yy2-yy1; 59 | 60 | % w = xx2-xx1+1; 61 | % h = yy2-yy1+1; 62 | 63 | if w > 0 && h > 0 64 | % compute overlap 65 | inter = w*h; 66 | o = inter / (area(j) + area(i) - inter); 67 | if o > overlap 68 | suppress = [suppress; pos]; 69 | end 70 | end 71 | end 72 | I(suppress) = []; 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /online-tubes/eval/compute_spatio_temporal_iou.m: -------------------------------------------------------------------------------- 1 | 2 | % ###################################################################################################################################################################################### 3 | % We are here talking about spatio-temporal detections, i.e. a set of ground-truth bounding boxes that 4 | % I will denote by g_t, with t between t_g^b and t_g^e (beginning and end time of the ground-truth) 5 | % versus a detection which is also a set of bounding boxes, denoted by d_t, with t between t_d^e et t_d^e. 6 | % 7 | % a) temporal iou = T_i / T_u 8 | % this is the intersection over union between the timing of the the tubes, 9 | % ie mathematically T_i / T_u with 10 | % the intersection T_i = max(0, max(t_g^b,t_d^b)-min(t_d^e,t_g^e) ) 11 | % and the union T_u = min(t_g^b,t_d^b)-max(t_d^e,t_g^e) 12 | % 13 | % b) for each t between max(tgb,tdb)-min(tde,tge), we compute the IoU between g_t and d_t, and average them 14 | % 15 | % Multiplying (a) and (b) is the same as computed the average of the spatial iou over all frames in T_u of the two tubes, with a spatial iou of 0 for frames where only one box exists. 16 | % c) as this is standard in detection problem, if there are multiple detections for the same groundtruth detection, the first one is counted as positive and the other ones as negatives 17 | % ###################################################################################################################################################################################### 18 | %{ 19 | gt_fnr = 1xn doube 20 | gt_bb = nx4 doubld - [x y w h] 21 | dt_fnr = 1xm double 22 | dt_bb = mx4 double - [x y w h] 23 | %} 24 | % ------------------------------------------------------------------------- 25 | function st_iou = compute_spatio_temporal_iou(gt_fnr, gt_bb, dt_fnr, dt_bb) 26 | % ------------------------------------------------------------------------- 27 | 28 | % time gt begin 29 | tgb = gt_fnr(1); 30 | % time gt end 31 | tge = gt_fnr(end); 32 | %time dt begin 33 | tdb = dt_fnr(1); 34 | tde = dt_fnr(end); 35 | % temporal intersection 36 | T_i = double(max(0, min(tge,tde)-max(tgb,tdb))); 37 | 38 | if T_i>0 39 | T_i = T_i +1; 40 | % temporal union 41 | T_u = double(max(tge,tde) - min(tgb,tdb)+1); 42 | %temporal IoU 43 | T_iou = T_i/T_u; 44 | % intersect frame numbers 45 | int_fnr = max(tgb,tdb):min(tge,tde); 46 | 47 | % find the ind of the intersected frames in the detected frames 48 | [~,int_find_dt] = ismember(int_fnr, dt_fnr); 49 | [~,int_find_gt] = ismember(int_fnr, gt_fnr); 50 | 51 | assert(length(int_find_dt)==length(int_find_gt)); 52 | 53 | iou = zeros(length(int_find_dt),1); 54 | for i=1:length(int_find_dt) 55 | if int_find_gt(i)<1 56 | % fprintf('error ') 57 | pf = pf; 58 | else 59 | pf = i; 60 | end 61 | 62 | gt_bound = gt_bb(int_find_gt(pf),:); 63 | dt_bound = dt_bb(int_find_dt(pf),:)+1; 64 | 65 | % gt_bound = [gt_bound(:,1:2) gt_bound(:,3:4)-gt_bound(:,1:2)]; 66 | % dt_bound = [dt_bound(:,1:2) dt_bound(:,3:4)-dt_bound(:,1:2)]; 67 | iou(i) = inters_union(double(gt_bound),double(dt_bound)); 68 | end 69 | % finalspatio-temporal IoU threshold 70 | st_iou = T_iou*mean(iou); 71 | else 72 | st_iou =0; 73 | end 74 | % % iou_thresh = 0.2,...,0.6 % 'Learing to track paper' takes 0.2 for UCF101 and 0.5 for JHMDB 75 | % if delta >= iou_thresh 76 | % % consider this tube as valid detection 77 | % end 78 | 79 | end 80 | 81 | % ------------------------------------------------------------------------- 82 | function iou = inters_union(bounds1,bounds2) 83 | % ------------------------------------------------------------------------- 84 | 85 | inters = rectint(bounds1,bounds2); 86 | ar1 = bounds1(:,3).*bounds1(:,4); 87 | ar2 = bounds2(:,3).*bounds2(:,4); 88 | union = bsxfun(@plus,ar1,ar2')-inters; 89 | 90 | iou = inters./(union+eps); 91 | 92 | end 93 | -------------------------------------------------------------------------------- /online-tubes/eval/get_PR_curve.m: -------------------------------------------------------------------------------- 1 | %%################################################################################################################################################## 2 | 3 | %% Author: Gurkirt Singh 4 | %% Release date: 26th January 2017 5 | % STEP-1: loop over the videos present in the predicited Tubes 6 | % STEP-2: for each video get the GT Tubes 7 | % STEP-3: Compute the spatio-temporal overlap bwtween GT tube and predicited 8 | % tubes 9 | % STEP-4: then label tp 1 or fp 0 to each predicted tube 10 | % STEP-5: Compute PR and AP for each class using scores, tp and fp in allscore 11 | %################################################################################################################################################## 12 | 13 | function [mAP,mAIoU,acc,AP] = get_PR_curve(annot, xmldata, testlist, actions, iou_th) 14 | % load(xmlfile) 15 | num_vid = length(testlist); 16 | num_actions = length(actions); 17 | AP = zeros(num_actions,1); 18 | averageIoU = zeros(num_actions,1); 19 | 20 | cc = zeros(num_actions,1); 21 | for a=1:num_actions 22 | allscore{a} = zeros(10000,2,'single'); 23 | end 24 | 25 | total_num_gt_tubes = zeros(num_actions,1); 26 | % count all the gt tubes from all the vidoes for label a 27 | % total_num_detection = zeros(num_actions,1); 28 | 29 | preds = zeros(num_vid,1) - 1; 30 | gts = zeros(num_vid,1); 31 | annotNames = {annot.name}; 32 | dtNames = {xmldata.videoName}; 33 | for vid=1:num_vid 34 | maxscore = -10000; 35 | [action,~] = getActionName(testlist{vid}); %%get action name to which this video belongs to 36 | [~,action_id] = find(strcmp(action, actions)); %% process only the videos from current action a 37 | [~,gtVidInd] = find(strcmp(annotNames,testlist{vid})); 38 | [~,dtVidInd] = find(strcmp(dtNames,testlist{vid})); 39 | 40 | dt_tubes = sort_detection(xmldata(dtVidInd)); 41 | gt_tubes = annot(gtVidInd).tubes; 42 | 43 | num_detection = length(dt_tubes.class); 44 | num_gt_tubes = length(gt_tubes); 45 | 46 | % total_num_detection = total_num_detection + num_detection; 47 | for gtind = 1:num_gt_tubes 48 | action_id = gt_tubes(gtind).class; 49 | total_num_gt_tubes(action_id) = total_num_gt_tubes(action_id) + 1; 50 | end 51 | gts(vid) = action_id; 52 | dt_labels = dt_tubes.class; 53 | covered_gt_tubes = zeros(num_gt_tubes,1); 54 | for dtind = 1:num_detection 55 | dt_fnr = dt_tubes.framenr(dtind).fnr; 56 | dt_bb = dt_tubes.boxes(dtind).bxs; 57 | dt_label = dt_labels(dtind); 58 | if dt_tubes.score(dtind)>maxscore 59 | preds(vid) = dt_label; 60 | maxscore = dt_tubes.score(dtind); 61 | end 62 | cc(dt_label) = cc(dt_label) + 1; 63 | 64 | ioumax=-inf;maxgtind=0; 65 | for gtind = 1:num_gt_tubes 66 | action_id = gt_tubes(gtind).class; 67 | if ~covered_gt_tubes(gtind) && dt_label == action_id 68 | gt_fnr = gt_tubes(gtind).sf:gt_tubes(gtind).ef; 69 | % if isempty(gt_fnr) 70 | % continue 71 | % end 72 | gt_bb = gt_tubes(gtind).boxes; 73 | iou = compute_spatio_temporal_iou(gt_fnr, gt_bb, dt_fnr, dt_bb); 74 | if iou>ioumax 75 | ioumax=iou; 76 | maxgtind=gtind; 77 | end 78 | end 79 | end 80 | 81 | if ioumax>iou_th 82 | covered_gt_tubes(maxgtind) = 1; 83 | allscore{dt_label}(cc(dt_label),:) = [dt_tubes.score(dtind),1]; 84 | averageIoU(dt_label) = averageIoU(dt_label) + ioumax; 85 | else 86 | allscore{dt_label}(cc(dt_label),:) = [dt_tubes.score(dtind),0]; 87 | end 88 | 89 | end 90 | end 91 | 92 | for a=1:num_actions 93 | allscore{a} = allscore{a}(1:cc(a),:); 94 | scores = allscore{a}(:,1); 95 | labels = allscore{a}(:,2); 96 | [~, si] = sort(scores,'descend'); 97 | % scores = scores(si); 98 | labels = labels(si); 99 | fp=cumsum(labels==0); 100 | tp=cumsum(labels==1); 101 | cdet =0; 102 | if ~isempty(tp)>0 103 | cdet = tp(end); 104 | averageIoU(a) = (averageIoU(a)+0.000001)/(tp(end)+0.00001); 105 | end 106 | 107 | recall=tp/total_num_gt_tubes(a); 108 | precision=tp./(fp+tp); 109 | AP(a) = xVOCap(recall,precision); 110 | draw = 0; 111 | if draw 112 | % plot precision/recall 113 | plot(recall,precision,'-'); 114 | grid; 115 | xlabel 'recall' 116 | ylabel 'precision' 117 | title(sprintf('class: %s, AP = %.3f',actions{a},AP(a))); 118 | end 119 | % fprintf('Action %02d AP = %0.5f and AIOU %0.5f GT %03d total det %02d correct det %02d %s\n', a, AP(a),averageIoU(a),total_num_gt_tubes(a),length(tp),cdet,actions{a}); 120 | 121 | end 122 | acc = mean(preds==gts); 123 | AP(isnan(AP)) = 0; 124 | mAP = mean(AP); 125 | averageIoU(isnan(averageIoU)) = 0; 126 | mAIoU = mean(averageIoU); 127 | 128 | 129 | %% ------------------------------------------------------------------------------------------------------------------------------------------------ 130 | function [action,vidID] = getActionName(str) 131 | %------------------------------------------------------------------------------------------------------------------------------------------------ 132 | indx = strsplit(str, '/'); 133 | action = indx{1}; 134 | vidID = indx{2}; 135 | %% 136 | function sorted_tubes = sort_detection(dt_tubes) 137 | 138 | sorted_tubes = dt_tubes; 139 | 140 | if ~isempty(dt_tubes.class) 141 | 142 | num_detection = length(dt_tubes.class); 143 | scores = dt_tubes.score; 144 | [~,indexs] = sort(scores,'descend'); 145 | for dt = 1 : num_detection 146 | dtind = indexs(dt); 147 | sorted_tubes.framenr(dt).fnr = dt_tubes.framenr(dtind).fnr; 148 | sorted_tubes.boxes(dt).bxs = dt_tubes.boxes(dtind).bxs; 149 | sorted_tubes.class(dt) = dt_tubes.class(dtind); 150 | sorted_tubes.score(dt) = dt_tubes.score(dtind); 151 | sorted_tubes.nr(dt) = dt; 152 | end 153 | end 154 | %% 155 | -------------------------------------------------------------------------------- /online-tubes/eval/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); -------------------------------------------------------------------------------- /online-tubes/frameAp.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | 9 | %% This is main script to compute frame mean AP %% 10 | %% this code is very new so hasn't been tested a lot 11 | % Input: Detection directory; annotation file path; split file path 12 | % Output: computes frame AP for all the detection directories 13 | % It should produce results almost identical to test_ucf24.py 14 | 15 | function frameAP() 16 | 17 | addpath(genpath('eval/')); 18 | addpath(genpath('utils/')); 19 | addpath(genpath('actionpath/')); 20 | data_root = '/mnt/mars-fast/datasets'; 21 | save_root = '/mnt/mars-gamma/datasets'; 22 | iou_th = 0.5; 23 | model_type = 'CONV'; 24 | dataset = 'ucf24'; 25 | list_id = '01'; 26 | split_file = sprintf('%s/%s/splitfiles/testlist%s.txt',data_root,dataset,list_id); 27 | annotfile = sprintf('%s/%s/splitfiles/annots.mat',data_root,dataset); 28 | annot = load(annotfile); 29 | annot = annot.annot; 30 | testlist = getVideoNames(split_file); 31 | num_vid = length(testlist); 32 | num_actions = 24; 33 | 34 | logfile = fopen('frameAP.log','w'); % open log file 35 | 36 | imgType = 'rgb'; iteration_num = 120000; 37 | det_dirs1 = sprintf('%s/%s/detections/%s-%s-%s-%06d/',save_root,dataset,model_type,imgType,list_id,iteration_num); 38 | imgType = 'brox'; iteration_num = 120000; 39 | det_dirs2 = sprintf('%s/%s/detections/%s-%s-%s-%06d/',save_root,dataset,model_type,imgType,list_id,iteration_num); 40 | imgType = 'fastOF'; iteration_num = 120000; 41 | det_dirs3 = sprintf('%s/%s/detections/%s-%s-%s-%06d/',save_root,dataset,model_type,imgType,list_id,iteration_num); 42 | 43 | combinations = {{det_dirs1},{det_dirs2},{det_dirs3},... 44 | {det_dirs1,det_dirs3,'boost'},{det_dirs1,det_dirs2,'boost'},... 45 | {det_dirs1,det_dirs3,'cat'},{det_dirs1,det_dirs2,'cat'},... 46 | {det_dirs1,det_dirs3,'mean'},{det_dirs1,det_dirs2,'mean'}}; 47 | 48 | for c=1:length(combinations) 49 | comb = combinations{c}; 50 | line = comb{1}; 51 | if length(comb)>1 52 | fusion_type = comb{3}; 53 | line = [line,' ',comb{2},' \n\n fusion type: ',fusion_type,'\n\n']; 54 | 55 | else 56 | fusion_type = 'none'; 57 | end 58 | 59 | line = sprintf('Evaluation for %s\n',line); 60 | fprintf('%s',line) 61 | fprintf(logfile,'%s',line); 62 | AP = zeros(num_actions,1); 63 | cc = zeros(num_actions,1); 64 | for a=1:num_actions 65 | allscore{a} = zeros(24*20*160000,2,'single'); 66 | end 67 | 68 | total_num_gt_boxes = zeros(num_actions,1); 69 | annotNames = {annot.name}; 70 | 71 | for vid=1:num_vid 72 | video_name = testlist{vid}; 73 | [~,gtVidInd] = find(strcmp(annotNames, testlist{vid})); 74 | gt_tubes = annot(gtVidInd).tubes; 75 | numf = annot(gtVidInd).num_imgs; 76 | num_gt_tubes = length(gt_tubes); 77 | if mod(vid,5) == 0 78 | fprintf('Done procesing %d videos out of %d %s\n', vid, num_vid, video_name) 79 | end 80 | for nf = 1:numf 81 | gt_boxes = get_gt_boxes(gt_tubes,nf); 82 | dt_boxes = get_dt_boxes(comb, video_name, nf, num_actions, fusion_type); 83 | num_gt_boxes = size(gt_boxes,1); 84 | for g = 1:num_gt_boxes 85 | total_num_gt_boxes(gt_boxes(g,5)) = total_num_gt_boxes(gt_boxes(g,5)) + 1; 86 | end 87 | covered_gt_boxes = zeros(num_gt_boxes,1); 88 | for d = 1 : size(dt_boxes,1) 89 | dt_score = dt_boxes(d,5); 90 | dt_label = dt_boxes(d,6); 91 | cc(dt_label) = cc(dt_label) + 1; 92 | ioumax=-inf; maxgtind=0; 93 | if num_gt_boxes>0 && any(gt_boxes(:,5) == dt_label) 94 | for g = 1:num_gt_boxes 95 | if ~covered_gt_boxes(g) && any(dt_label == gt_boxes(:,5)) 96 | iou = compute_spatial_iou(gt_boxes(g,1:4), dt_boxes(d,1:4)); 97 | if iou>ioumax 98 | ioumax=iou; 99 | maxgtind=g; 100 | end 101 | end 102 | end 103 | end 104 | 105 | if ioumax>=iou_th 106 | covered_gt_boxes(maxgtind) = 1; 107 | allscore{dt_label}(cc(dt_label),:) = [dt_score,1]; % tp detection 108 | else 109 | allscore{dt_label}(cc(dt_label),:) = [dt_score,0]; % fp detection 110 | end 111 | 112 | end 113 | 114 | end 115 | end 116 | % Sort scores and then reorder tp fp labels in result precision and recall for each action 117 | for a=1:num_actions 118 | allscore{a} = allscore{a}(1:cc(a),:); 119 | scores = allscore{a}(:,1); 120 | labels = allscore{a}(:,2); 121 | [~, si] = sort(scores,'descend'); 122 | % scores = scores(si); 123 | labels = labels(si); 124 | fp=cumsum(labels==0); 125 | tp=cumsum(labels==1); 126 | recall=tp/total_num_gt_boxes(a); 127 | precision=tp./(fp+tp); 128 | AP(a) = xVOCap(recall,precision); 129 | line = sprintf('Action %02d AP = %0.5f \n', a, AP(a)); 130 | fprintf('%s',line); 131 | fprintf(logfile,'%s',line); 132 | end 133 | 134 | AP(isnan(AP)) = 0; 135 | mAP = mean(AP); 136 | line = sprintf('\nMean AP::=> %.5f\n\n',mAP); 137 | fprintf('%s',line); 138 | fprintf(logfile,'%s',line); 139 | end 140 | end 141 | 142 | 143 | % ------------------------------------------------------------------------- 144 | function [videos] = getVideoNames(split_file) 145 | % ------------------------------------------------------------------------- 146 | fprintf('Get both lis is %s\n',split_file); 147 | fid = fopen(split_file,'r'); 148 | data = textscan(fid, '%s'); 149 | videos = cell(1); 150 | count = 0; 151 | 152 | for i=1:length(data{1}) 153 | filename = cell2mat(data{1}(i,1)); 154 | count = count +1; 155 | videos{count} = filename; 156 | % videos(i).vid = str2num(cell2mat(data{1}(i,1))); 157 | end 158 | end 159 | 160 | function gt_boxes = get_gt_boxes(gt_tubes,nf) 161 | gt_boxes = []; 162 | gt_tubes; 163 | for t = 1:length(gt_tubes) 164 | if nf >= gt_tubes(t).sf && nf <= gt_tubes(t).ef 165 | b_ind = nf - gt_tubes(t).sf + 1; 166 | box = [gt_tubes(t).boxes(b_ind,:), gt_tubes(t).class]; 167 | gt_boxes = [gt_boxes;box]; 168 | end 169 | end 170 | end 171 | 172 | function dt_boxes = get_dt_boxes(detection_dir, video_name, nf, num_actions, fusion_type) 173 | dt_boxes = []; 174 | %% apply nms per class 175 | [boxes,scores] = read_detections(detection_dir, video_name, nf); 176 | for a = 1 : num_actions 177 | cls_boxes = get_cls_detection(boxes,scores,a,fusion_type); 178 | dt_boxes = [dt_boxes; cls_boxes]; 179 | end 180 | end 181 | 182 | function cls_boxes = get_cls_detection(boxes,scores,a,fusion_type) 183 | 184 | if strcmp(fusion_type,'none') 185 | cls_boxes = dofilter(boxes(1).b,scores(1).s,a); 186 | elseif strcmp(fusion_type,'mean') 187 | cls_boxes = dofilter(boxes(1).b,(scores(1).s+scores(2).s)/2.0,a); 188 | elseif strcmp(fusion_type,'cat') 189 | cls_boxes_base = dofilter(boxes(1).b,scores(1).s,a); 190 | cls_boxes_top = dofilter(boxes(2).b,scores(2).s,a); 191 | all_boxes = [cls_boxes_base;cls_boxes_top]; 192 | pick = nms(all_boxes(:,1:5),0.45); 193 | cls_boxes = all_boxes(pick,:); 194 | elseif strcmp(fusion_type,'boost') 195 | cls_boxes_base = dofilter(boxes(1).b,scores(1).s,a); 196 | cls_boxes_top = dofilter(boxes(2).b,scores(2).s,a); 197 | all_boxes = boost_boxes(cls_boxes_base,cls_boxes_top); 198 | pick = nms(all_boxes(:,1:5),0.45); 199 | cls_boxes = all_boxes(pick,:); 200 | else 201 | error('Spacify correct fusion technique'); 202 | end 203 | 204 | end 205 | 206 | function cls_boxes_base = boost_boxes(cls_boxes_base,cls_boxes_top) 207 | 208 | box_spatial = [cls_boxes_base(:,1:2) cls_boxes_base(:,3:4)-cls_boxes_base(:,1:2)+1]; 209 | box_flow = [cls_boxes_top(:,1:2) cls_boxes_top(:,3:4)-cls_boxes_top(:,1:2)+1]; 210 | coveredboxes = []; 211 | nb = size(cls_boxes_base,1); % num boxes 212 | for i=1:nb 213 | ovlp = inters_union(box_spatial(i,:), box_flow); % ovlp has 1x5 or 5x1 dim 214 | if ~isempty(ovlp) 215 | [movlp, maxind] = max(ovlp); 216 | if movlp>=0.3 && isempty(ismember(coveredboxes,maxind)) 217 | cls_boxes_base(i,5) = cls_boxes_base(i,5) + cls_boxes_top(maxind,5)*movlp; 218 | coveredboxes = [coveredboxes;maxind]; 219 | end 220 | end 221 | end 222 | 223 | nb = size(cls_boxes_top,1); 224 | for i=1:nb 225 | if ~ismember(coveredboxes,i) 226 | cls_boxes_base = [cls_boxes_base; cls_boxes_top(i,:)]; 227 | end 228 | end 229 | 230 | end 231 | 232 | function [bxs, sc] = read_detections(detection_dir, video_name, nf) 233 | detection_dir1 = detection_dir{1}; 234 | det_file = sprintf('%s%s/%05d.mat', detection_dir1, video_name, nf); 235 | load(det_file); % loads loc and scores variable 236 | boxes = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240] + 1; 237 | boxes(boxes(:,1)<1,1) = 1; boxes(boxes(:,2)<1,2) = 1; 238 | boxes(boxes(:,3)>320,3) = 320; boxes(boxes(:,4)>240,4) = 240; 239 | scores = [scores(:,2:end),scores(:,1)]; 240 | bxs = struct(); 241 | sc = struct(); 242 | bxs(1).b = boxes; 243 | sc(1).s = scores; 244 | if length(detection_dir)>1 245 | detection_dir1 = detection_dir{2}; 246 | det_file = sprintf('%s%s/%05d.mat', detection_dir1, video_name, nf); 247 | load(det_file); % loads loc and scores variable 248 | boxes = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240] + 1; 249 | boxes(boxes(:,1)<1,1) = 1; boxes(boxes(:,2)<1,2) = 1; 250 | boxes(boxes(:,3)>320,3) = 320; boxes(boxes(:,4)>240,4) = 240; 251 | scores = [scores(:,2:end),scores(:,1)]; 252 | bxs(2).b = boxes; 253 | sc(2).s = scores; 254 | end 255 | 256 | end 257 | 258 | 259 | function boxes = dofilter(boxes,scores,a) 260 | scores = scores(:,a); 261 | pick = scores>0.01; 262 | scores = scores(pick); 263 | boxes = boxes(pick,:); 264 | [~,pick] = sort(scores,'descend'); 265 | to_pick = min(50,size(pick,1)); 266 | pick = pick(1:to_pick); 267 | scores = scores(pick); 268 | boxes = boxes(pick,:); 269 | pick = nms([boxes scores],0.45); 270 | pick = pick(1:min(20,length(pick))); 271 | boxes = boxes(pick,:); 272 | scores = scores(pick); 273 | cls = scores*0 + a; 274 | boxes = [boxes,scores, cls]; 275 | end 276 | 277 | function iou = inters_union(bounds1,bounds2) 278 | % ------------------------------------------------------------------------ 279 | inters = rectint(bounds1,bounds2); 280 | ar1 = bounds1(:,3).*bounds1(:,4); 281 | ar2 = bounds2(:,3).*bounds2(:,4); 282 | union = bsxfun(@plus,ar1,ar2')-inters; 283 | iou = inters./(union+0.001); 284 | end 285 | 286 | 287 | function iou = compute_spatial_iou(gt_box, dt_box) 288 | dt_box = [dt_box(1:2), dt_box(3:4)-dt_box(1:2)+1]; 289 | inter = rectint(gt_box,dt_box); 290 | ar1 = gt_box(3)*gt_box(4); 291 | ar2 = dt_box(3)*dt_box(4); 292 | union = ar1 + ar2 - inter; 293 | iou = inter/union; 294 | end -------------------------------------------------------------------------------- /online-tubes/gentube/PARactionPathSmoother.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | 9 | 10 | function final_tubes = parActionPathSmoother(actionpaths,alpha,num_action) 11 | 12 | % load data 13 | % fprintf('Number of video intest set %d \n', actionpath,alpha,num_action,calpha,useNeg 14 | % alpha = 1; 15 | 16 | final_tubes = struct('starts',[],'ts',[],'te',[],'label',[],'path_total_score',[],... 17 | 'dpActionScore',[],'dpPathScore',[],... 18 | 'path_boxes',cell(1),'path_scores',cell(1),'video_id',cell(1)); 19 | 20 | 21 | alltubes = cell(length(actionpaths),1); 22 | 23 | parfor t = 1 : length(actionpaths) 24 | % fprintf('[%03d/%03d] calpha %04d\n',t,length(tubes),uint16(calpha*100)); 25 | % fprintf('.'); 26 | video_id = actionpaths(t).video_id; 27 | % fprintf('[doing for %s %d out of %d]\n',video_id,t,length(tubes)); 28 | alltubes{t} = actionPathSmoother4oneVideo(actionpaths(t).paths,alpha,num_action,video_id) ; 29 | end 30 | 31 | action_count = 1; 32 | for t = 1 : length(actionpaths) 33 | vid_tubes = alltubes{t}; 34 | for k=1:length(vid_tubes.ts) 35 | final_tubes.starts(action_count) = vid_tubes.starts(k); 36 | final_tubes.ts(action_count) = vid_tubes.ts(k); 37 | final_tubes.video_id{action_count} = vid_tubes.video_id{k}; 38 | final_tubes.te(action_count) = vid_tubes.te(k); 39 | final_tubes.dpActionScore(action_count) = vid_tubes.dpActionScore(k); 40 | final_tubes.label(action_count) = vid_tubes.label(k); 41 | final_tubes.dpPathScore(action_count) = vid_tubes.dpPathScore(k); 42 | final_tubes.path_total_score(action_count) = vid_tubes.path_total_score(k); 43 | final_tubes.path_boxes{action_count} = vid_tubes.path_boxes{k}; 44 | final_tubes.path_scores{action_count} = vid_tubes.path_scores{k}; 45 | action_count = action_count + 1; 46 | end 47 | 48 | end 49 | end 50 | 51 | function final_tubes = actionPathSmoother4oneVideo(video_paths,alpha,num_action,video_id) 52 | action_count =1; 53 | final_tubes = struct('starts',[],'ts',[],'te',[],'label',[],'path_total_score',[],... 54 | 'dpActionScore',[],'dpPathScore',[],'vid',[],... 55 | 'path_boxes',cell(1),'path_scores',cell(1),'video_id',cell(1)); 56 | 57 | if ~isempty(video_paths) 58 | %gt_ind = find(strcmp(video_id,annot.videoName)); 59 | %number_frames = length(video_paths{1}(1).idx); 60 | % alpha = alpha-3.2; 61 | for a = 1 : num_action 62 | action_paths = video_paths{a}; 63 | num_act_paths = getPathCount(action_paths); 64 | for p = 1 : num_act_paths 65 | M = action_paths(p).allScores(:,1:num_action)'; %(:,1:num_action)'; 66 | %M = normM(M); 67 | %M = [M(a,:),1-M(a,:)]; 68 | M = M +20; 69 | 70 | [pred_path,time,D] = dpEM_max(M,alpha(a)); 71 | [ Ts, Te, Scores, Label, DpPathScore] = extract_action(pred_path,time,D,a); 72 | for k = 1 : length(Ts) 73 | final_tubes.starts(action_count) = action_paths(p).start; 74 | final_tubes.ts(action_count) = Ts(k); 75 | final_tubes.video_id{action_count} = video_id; 76 | % final_tubes.vid(action_count) = vid_num; 77 | final_tubes.te(action_count) = Te(k); 78 | final_tubes.dpActionScore(action_count) = Scores(k); 79 | final_tubes.label(action_count) = Label(k); 80 | final_tubes.dpPathScore(action_count) = DpPathScore(k); 81 | final_tubes.path_total_score(action_count) = mean(action_paths(p).scores); 82 | final_tubes.path_boxes{action_count} = action_paths(p).boxes; 83 | final_tubes.path_scores{action_count} = action_paths(p).scores; 84 | action_count = action_count + 1; 85 | end 86 | 87 | end 88 | 89 | end 90 | end 91 | end 92 | 93 | function M = normM(M) 94 | for i = 1: size(M,2) 95 | M(:,i) = M(:,i)/sum(M(:,i)); 96 | end 97 | end 98 | function [ts,te,scores,label,total_score] = extract_action(p,q,D,action) 99 | % p(1:1) = 1; 100 | indexs = find(p==action); 101 | 102 | if isempty(indexs) 103 | ts = []; te = []; scores = []; label = []; total_score = []; 104 | 105 | else 106 | indexs_diff = [indexs,indexs(end)+1] - [indexs(1)-2,indexs]; 107 | ts = find(indexs_diff>1); 108 | 109 | if length(ts)>1 110 | te = [ts(2:end)-1,length(indexs)]; 111 | else 112 | te = length(indexs); 113 | end 114 | ts = indexs(ts); 115 | te = indexs(te); 116 | scores = (D(action,q(te)) - D(action,q(ts)))./(te-ts); 117 | label = ones(length(ts),1)*action; 118 | total_score = ones(length(ts),1)*D(p(end),q(end))/length(p); 119 | end 120 | end 121 | 122 | % ------------------------------------------------------------------------- 123 | function lp_count = getPathCount(live_paths) 124 | % ------------------------------------------------------------------------- 125 | 126 | if isfield(live_paths,'boxes') 127 | lp_count = length(live_paths); 128 | else 129 | lp_count = 0; 130 | end 131 | end 132 | -------------------------------------------------------------------------------- /online-tubes/gentube/convert2eval.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | % Input: smoothed tubes 9 | % Output: filtered out tubes with proper scoring 10 | 11 | function xmld = convert2eval(final_tubes,min_num_frames,kthresh,topk,vids) 12 | 13 | xmld = struct([]); 14 | v= 1; 15 | 16 | for vv = 1 : length(vids) 17 | action_indexes = find(strcmp(final_tubes.video_id,vids{vv})); 18 | videoName = vids{vv}; 19 | xmld(v).videoName = videoName; 20 | actionscore = final_tubes.dpActionScore(action_indexes); 21 | path_scores = final_tubes.path_scores(1,action_indexes); 22 | 23 | ts = final_tubes.ts(action_indexes); 24 | starts = final_tubes.starts(action_indexes); 25 | te = final_tubes.te(action_indexes); 26 | act_nr = 1; 27 | 28 | for a = 1 : length(ts) 29 | act_ts = ts(a); 30 | act_te = te(a); 31 | % act_dp_score = actionscore(a); %% only useful on JHMDB 32 | act_path_scores = cell2mat(path_scores(a)); 33 | 34 | %----------------------------------------------------------- 35 | act_scores = sort(act_path_scores(act_ts:act_te),'descend'); 36 | %save('test.mat', 'act_scores'); pause; 37 | 38 | topk_mean = mean(act_scores(1:min(topk,length(act_scores)))); 39 | 40 | bxs = final_tubes.path_boxes{action_indexes(a)}(act_ts:act_te,:); 41 | 42 | bxs = [bxs(:,1:2), bxs(:,3:4)-bxs(:,1:2)]; 43 | 44 | label = final_tubes.label(action_indexes(a)); 45 | 46 | if topk_mean > kthresh(label) && (act_te-act_ts) > min_num_frames 47 | xmld(v).score(act_nr) = topk_mean; 48 | xmld(v).nr(act_nr) = act_nr; 49 | xmld(v).class(act_nr) = label; 50 | xmld(v).framenr(act_nr).fnr = (act_ts:act_te) + starts(a)-1; 51 | xmld(v).boxes(act_nr).bxs = bxs; 52 | act_nr = act_nr+1; 53 | end 54 | end 55 | v = v + 1; 56 | 57 | end 58 | -------------------------------------------------------------------------------- /online-tubes/gentube/dpEM_max.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Original code comes from https://team.inria.fr/perception/research/skeletalquads/ 3 | % Copyright (c) 2014, Georgios Evangelidis and Gurkirt Singh, 4 | % This code and is available 5 | % under the terms of MIT License provided in LICENSE. 6 | % Please retain this notice and LICENSE if you use 7 | % this file (or any portion of it) in your project. 8 | % --------------------------------------------------------- 9 | 10 | % M = <10xnum_frames> 11 | % r = 10 (action labels) 12 | % c = frame indices in a video 13 | 14 | function [p,q,D] = dpEM_max(M,alpha) 15 | 16 | % transition cost for the smoothness term 17 | % V(L1,L2) = 0, if L1=L2 18 | % V(L1,L2) = alpha, if L1~=L2 19 | 20 | 21 | 22 | [r,c] = size(M); 23 | 24 | 25 | 26 | % costs 27 | D = zeros(r, c+1); % add an extra column 28 | D(:,1) = 0; % put the maximum cost 29 | D(:, 2:(c+1)) = M; 30 | 31 | v = [1:r]'; 32 | 33 | 34 | %D = M; 35 | phi = zeros(r,c); 36 | 37 | %test = struct([]); 38 | for j = 2:c+1; % c = 1230 39 | for i = 1:r; % r = 10 40 | 41 | % test(j).D = D(:, j-1); % fetching prev column 10 rows 42 | % test(j).alpha = alpha*(v~=i); % switching each row for each class 43 | % test(j).D_alpha = [D(:, j-1)-alpha*(v~=i)]; 44 | % test(j).max = max([D(:, j-1)-alpha*(v~=i)]); % for ith class taking the max score 45 | 46 | 47 | [dmax, tb] = max([D(:, j-1)-alpha*(v~=i)]); 48 | %keyboard; 49 | D(i,j) = D(i,j)+dmax; 50 | phi(i,j-1) = tb; 51 | end 52 | end 53 | 54 | % Note: 55 | % the outer loop (j) is to visit one by one each frames 56 | % the inner loop (i) is to get the max score for each action label 57 | % the -alpha*(v~=i) term is to add a penalty by subtracting alpha from the 58 | % data term for all other class labels other than i, for ith class label 59 | % it adds zero penalty; 60 | % (v~=i) will return a logical array consists of 10 elements, in the ith 61 | % location it is 0 (false becuase the condition v~=i is false) and all other locations 62 | % returns 1, thus for ith calss it multiplies 0 63 | % with alpha and for the rest of the classes multiplies 1; 64 | % for each iteration of ith loop we get a max value which we add to the 65 | % data term d(i,j), in this way the 10 max values for 10 different action 66 | % labels are stored to the jth column (or for the jth frame): D(1,j), D(2,j),...,D(10,j), 67 | 68 | % save('test.mat','r','c','M', 'phi'); 69 | % pause; 70 | 71 | % Traceback from last frame 72 | D = D(:,2:(c+1)); 73 | 74 | % best of the last column 75 | q = c; % frame inidces 76 | [~,p] = max(D(:,c)); 77 | 78 | 79 | 80 | i = p; % index of max element in last column of D, 81 | j = q; % frame indices 82 | 83 | while j>1 % loop over frames in a video 84 | tb = phi(i,j); % i -> index of max element in last column of D, j-> last frame index or last column of D 85 | p = [tb,p]; 86 | q = [j-1,q]; 87 | j = j-1; 88 | i = tb; 89 | end 90 | 91 | % 92 | % phi(i,j) stores all the max indices in the forward pass 93 | % during the backward pass , a predicited path is constructed using these indices values 94 | -------------------------------------------------------------------------------- /online-tubes/gentube/readALLactionPaths.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | 9 | function actionpath = readALLactionPaths(videolist,actionPathDir,step) 10 | 11 | videos = getVideoNames(videolist); 12 | NumVideos = length(videos); 13 | 14 | actionpath = struct([]); 15 | fprintf('Loading action paths of %d videos\n',NumVideos); 16 | count = 1; 17 | for vid=1:step:NumVideos 18 | 19 | videoID = videos(vid).video_id; 20 | pathsSaveName = [actionPathDir,videoID,'-actionpaths.mat']; 21 | 22 | if ~exist(pathsSaveName,'file') 23 | error('Action path does not exist please genrate actin path', pathsSaveName) 24 | else 25 | % fprintf('loading vid %d %s \n',vid,pathsSaveName); 26 | load(pathsSaveName); 27 | actionpath(count).video_id = videos(vid).video_id; 28 | actionpath(count).paths = allpaths; 29 | count = count+1; 30 | end 31 | end 32 | end 33 | 34 | function [videos] = getVideoNames(split_file) 35 | % ------------------------------------------------------------------------- 36 | fid = fopen(split_file,'r'); 37 | data = textscan(fid, '%s'); 38 | videos = struct(); 39 | for i=1:length(data{1}) 40 | filename = cell2mat(data{1}(i,1)); 41 | videos(i).video_id = filename; 42 | % videos(i).vid = str2num(cell2mat(data{1}(i,1))); 43 | 44 | end 45 | count = length(data{1}); 46 | 47 | end 48 | -------------------------------------------------------------------------------- /online-tubes/utils/createdires.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | 9 | 10 | function createdires(basedirs,actions) 11 | for s = 1: length(basedirs) 12 | savename = basedirs{s}; 13 | for action = actions 14 | saveNameaction = [savename,action{1}]; 15 | if ~isdir(saveNameaction) 16 | mkdir(saveNameaction); 17 | end 18 | end 19 | end 20 | end -------------------------------------------------------------------------------- /online-tubes/utils/initDatasetOpts.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | 9 | function opts = initDatasetOpts(data_root,baseDir,dataset,imgType,model_type,listid,iteration_num,iouthresh,costtype,gap) 10 | 11 | opts = struct(); 12 | opts.imgType = imgType; 13 | opts.costtype = costtype; 14 | opts.gap = gap; 15 | opts.baseDir = baseDir; 16 | opts.imgType = imgType; 17 | opts.dataset = dataset; 18 | opts.iouThresh = iouthresh; 19 | opts.weight = iteration_num; 20 | opts.listid = listid; 21 | 22 | testlist = ['testlist',listid]; 23 | %%testlist = 'testlist01'; 24 | opts.vidList = sprintf('%s/%s/splitfiles/%s.txt',data_root,dataset,testlist); 25 | 26 | if strcmp(dataset,'ucf24') 27 | opts.actions = {'Basketball','BasketballDunk','Biking','CliffDiving','CricketBowling',... 28 | 'Diving','Fencing','FloorGymnastics','GolfSwing','HorseRiding','IceDancing',... 29 | 'LongJump','PoleVault','RopeClimbing','SalsaSpin','SkateBoarding','Skiing',... 30 | 'Skijet','SoccerJuggling','Surfing','TennisSwing','TrampolineJumping',... 31 | 'VolleyballSpiking','WalkingWithDog'}; 32 | elseif strcmp(dataset,'JHMDB') 33 | opts.actions = {'brush_hair','catch','clap','climb_stairs','golf','jump',... 34 | 'kick_ball','pick','pour','pullup','push','run','shoot_ball','shoot_bow',... 35 | 'shoot_gun','sit','stand','swing_baseball','throw','walk','wave'}; 36 | elseif strcmp(dataset,'LIRIS') 37 | opts.actions = {'discussion', 'give_object_to_person','put_take_obj_into_from_box_desk',... 38 | 'enter_leave_room_no_unlocking','try_enter_room_unsuccessfully','unlock_enter_leave_room',... 39 | 'leave_baggage_unattended','handshaking','typing_on_keyboard','telephone_conversation'}; 40 | end 41 | 42 | opts.imgDir = sprintf('%s/%s/%s-images/',data_root,dataset,imgType); 43 | 44 | opts.detDir = sprintf('%s/%s/detections/%s-%s-%s-%06d/',baseDir,dataset,model_type,imgType,listid,iteration_num); 45 | opts.annotFile = sprintf('%s/%s/splitfiles/finalAnnots.ma.mat',data_root,dataset); 46 | 47 | opts.actPathDir = sprintf('%s/%s/actionPaths/%s-%s-%s-%06d-%s-%d-%04d/',baseDir,dataset,model_type,imgType,listid,iteration_num,costtype,gap,iouthresh*100); 48 | opts.tubeDir = sprintf('%s/%s/actionTubes/%s-%s-%s-%06d-%s-%d-%04d/',baseDir,dataset,model_type,imgType,listid,iteration_num,costtype,gap,iouthresh*100); 49 | 50 | if exist(opts.detDir,'dir') 51 | if ~isdir(opts.actPathDir) 52 | fprintf('Creating %s\n',opts.actPathDir); 53 | mkdir(opts.actPathDir) 54 | end 55 | if ~isdir(opts.tubeDir) 56 | mkdir(opts.tubeDir) 57 | end 58 | if strcmp(dataset,'ucf24') || strcmp(dataset,'JHMDB') 59 | createdires({opts.actPathDir},opts.actions) 60 | end 61 | end 62 | -------------------------------------------------------------------------------- /online-tubes/utils/initDatasetOptsFused.m: -------------------------------------------------------------------------------- 1 | % --------------------------------------------------------- 2 | % Copyright (c) 2017, Gurkirt Singh 3 | % This code and is available 4 | % under the terms of MIT License provided in LICENSE. 5 | % Please retain this notice and LICENSE if you use 6 | % this file (or any portion of it) in your project. 7 | % --------------------------------------------------------- 8 | 9 | function opts = initDatasetOptsFused(data_root,baseDir,dataset,imtypes,model_type, ... 10 | listid,iteration_nums,iouthresh,costtype,gap,fusiontype,fuseiouth) 11 | %% data_root,baseDir,dataset,imgType,model_type,listid,iteration_num,iouthresh,costtype,gap 12 | 13 | opts = struct(); 14 | imgType = [imtypes{1},'-',imtypes{2}]; 15 | opts.imgType = imgType; 16 | opts.costtype = costtype; 17 | opts.gap = gap; 18 | opts.baseDir = baseDir; 19 | opts.imgType = imgType; 20 | opts.dataset = dataset; 21 | opts.iouThresh = iouthresh; 22 | opts.iteration_nums = iteration_nums; 23 | opts.listid = listid; 24 | opts.fusiontype = fusiontype; 25 | opts.fuseiouth = fuseiouth; 26 | testlist = ['testlist',listid]; 27 | opts.data_root = data_root; 28 | opts.vidList = sprintf('%s/%s/splitfiles/%s.txt',data_root,dataset,testlist); 29 | 30 | if strcmp(dataset,'ucf24') 31 | opts.actions = {'Basketball','BasketballDunk','Biking','CliffDiving','CricketBowling',... 32 | 'Diving','Fencing','FloorGymnastics','GolfSwing','HorseRiding','IceDancing',... 33 | 'LongJump','PoleVault','RopeClimbing','SalsaSpin','SkateBoarding','Skiing',... 34 | 'Skijet','SoccerJuggling','Surfing','TennisSwing','TrampolineJumping',... 35 | 'VolleyballSpiking','WalkingWithDog'}; 36 | elseif strcmp(dataset,'JHMDB') 37 | opts.actions = {'brush_hair','catch','clap','climb_stairs','golf','jump',... 38 | 'kick_ball','pick','pour','pullup','push','run','shoot_ball','shoot_bow',... 39 | 'shoot_gun','sit','stand','swing_baseball','throw','walk','wave'}; 40 | elseif strcmp(dataset,'LIRIS') 41 | opts.actions = {'discussion', 'give_object_to_person','put_take_obj_into_from_box_desk',... 42 | 'enter_leave_room_no_unlocking','try_enter_room_unsuccessfully','unlock_enter_leave_room',... 43 | 'leave_baggage_unattended','handshaking','typing_on_keyboard','telephone_conversation'}; 44 | end 45 | 46 | opts.imgDir = sprintf('%s/%s/%s-images/',data_root,dataset,imtypes{1}); 47 | 48 | opts.basedetDir = sprintf('%s/%s/detections/%s-%s-%s-%06d/',baseDir,dataset,model_type,imtypes{1},listid,iteration_nums(1)); 49 | opts.topdetDir = sprintf('%s/%s/detections/%s-%s-%s-%06d/',baseDir,dataset,model_type,imtypes{2},listid,iteration_nums(2)); 50 | 51 | opts.annotFile = sprintf('%s/%s/splitfiles/annots.mat',data_root,dataset); 52 | 53 | opts.actPathDir = sprintf('%s/%s/actionPaths/%s/%s-%s-%s-%s-%d-%d-%s-%d-%04d-fiou%03d/',baseDir,dataset,fusiontype,model_type,imtypes{1},imtypes{2},... 54 | listid,iteration_nums(1),iteration_nums(2),costtype,gap,iouthresh*100,uint16(fuseiouth*100)); 55 | opts.tubeDir = sprintf('%s/%s/actionTubes/%s/%s-%s-%s-%s-%d-%d-%s-%d-%04d-fiou%03d/',baseDir,dataset,fusiontype,model_type,imtypes{1},imtypes{2},... 56 | listid,iteration_nums(1),iteration_nums(2),costtype,gap,iouthresh*100,uint16(fuseiouth*100)); 57 | 58 | if exist(opts.basedetDir,'dir') 59 | if ~isdir(opts.actPathDir) 60 | fprintf('Creating %s\n',opts.actPathDir); 61 | mkdir(opts.actPathDir) 62 | end 63 | 64 | if ~isdir(opts.tubeDir) 65 | mkdir(opts.tubeDir) 66 | end 67 | 68 | if strcmp(dataset,'ucf24') || strcmp(dataset,'JHMDB') 69 | createdires({opts.actPathDir},opts.actions) 70 | end 71 | end 72 | 73 | %fprintf('Video List :: %s\nImage Dir :: %s\nDetection Dir:: %s\nActionpath Dir:: %s\nTube Dir:: %s\n',... 74 | % opts.vidList,opts.imgDir,opts.detDir,opts.actPathDir,opts.tubeDir) 75 | -------------------------------------------------------------------------------- /ssd.py: -------------------------------------------------------------------------------- 1 | 2 | """ SSD network Classes 3 | 4 | Original author: Ellis Brown, Max deGroot for VOC dataset 5 | https://github.com/amdegroot/ssd.pytorch 6 | 7 | Updated by Gurkirt Singh for ucf101-24 dataset 8 | """ 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | from torch.autograd import Variable 14 | from layers import * 15 | from data import v2 16 | import os 17 | 18 | 19 | class SSD(nn.Module): 20 | """Single Shot Multibox Architecture 21 | The network is composed of a base VGG network followed by the 22 | added multibox conv layers. Each multibox layer branches into 23 | 1) conv2d for class conf scores 24 | 2) conv2d for localization predictions 25 | 3) associated priorbox layer to produce default bounding 26 | boxes specific to the layer's feature map size. 27 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 28 | 29 | Args: 30 | base: VGG16 layers for input, size of either 300 or 500 31 | extras: extra layers that feed to multibox loc and conf layers 32 | head: "multibox head" consists of loc and conf conv layers 33 | """ 34 | 35 | def __init__(self, base, extras, head, num_classes): 36 | super(SSD, self).__init__() 37 | 38 | self.num_classes = num_classes 39 | # TODO: implement __call__ in PriorBox 40 | self.priorbox = PriorBox(v2) 41 | with torch.no_grad(): 42 | self.priors = self.priorbox.forward().cuda() 43 | self.num_priors = self.priors.size(0) 44 | self.size = 300 45 | 46 | # SSD network 47 | self.vgg = nn.ModuleList(base) 48 | # Layer learns to scale the l2 normalized features from conv4_3 49 | self.L2Norm = L2Norm(512, 20) 50 | self.extras = nn.ModuleList(extras) 51 | 52 | self.loc = nn.ModuleList(head[0]) 53 | self.conf = nn.ModuleList(head[1]) 54 | 55 | self.softmax = nn.Softmax(dim=1).cuda() 56 | # self.detect = Detect(num_classes, 0, 200, 0.001, 0.45) 57 | 58 | def forward(self, x): 59 | 60 | """Applies network layers and ops on input image(s) x. 61 | 62 | Args: 63 | x: input image or batch of images. Shape: [batch,3*batch,300,300]. 64 | 65 | Return: 66 | Depending on phase: 67 | test: 68 | Variable(tensor) of output class label predictions, 69 | confidence score, and corresponding location predictions for 70 | each object detected. Shape: [batch,topk,7] 71 | 72 | train: 73 | list of concat outputs from: 74 | 1: confidence layers, Shape: [batch*num_priors,num_classes] 75 | 2: localization layers, Shape: [batch,num_priors*4] 76 | 3: priorbox layers, Shape: [2,num_priors*4] 77 | """ 78 | 79 | sources = list() 80 | loc = list() 81 | conf = list() 82 | 83 | # apply vgg up to conv4_3 relu 84 | for k in range(23): 85 | x = self.vgg[k](x) 86 | 87 | s = self.L2Norm(x) 88 | sources.append(s) 89 | 90 | # apply vgg up to fc7 91 | for k in range(23, len(self.vgg)): 92 | x = self.vgg[k](x) 93 | sources.append(x) 94 | 95 | # apply extra layers and cache source layer outputs 96 | for k, v in enumerate(self.extras): 97 | x = F.relu(v(x), inplace=True) 98 | if k % 2 == 1: 99 | sources.append(x) 100 | 101 | # apply multibox head to source layers 102 | for (x, l, c) in zip(sources, self.loc, self.conf): 103 | loc.append(l(x).permute(0, 2, 3, 1).contiguous()) 104 | conf.append(c(x).permute(0, 2, 3, 1).contiguous()) 105 | 106 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) 107 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) 108 | output = (loc.view(loc.size(0), -1, 4), 109 | conf.view(conf.size(0), -1, self.num_classes), 110 | self.priors 111 | ) 112 | return output 113 | 114 | def load_weights(self, base_file): 115 | other, ext = os.path.splitext(base_file) 116 | if ext == '.pkl' or '.pth': 117 | print('Loading weights into state dict...') 118 | self.load_state_dict(torch.load(base_file, map_location=lambda storage, loc: storage)) 119 | print('Finished!') 120 | else: 121 | print('Sorry only .pth and .pkl files supported.') 122 | 123 | 124 | # This function is derived from torchvision VGG make_layers() 125 | # https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py 126 | def vgg(cfg, i, batch_norm=False): 127 | layers = [] 128 | in_channels = i 129 | for v in cfg: 130 | if v == 'M': 131 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 132 | elif v == 'C': 133 | layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)] 134 | else: 135 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 136 | if batch_norm: 137 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 138 | else: 139 | layers += [conv2d, nn.ReLU(inplace=True)] 140 | in_channels = v 141 | pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1) 142 | conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6) 143 | conv7 = nn.Conv2d(1024, 1024, kernel_size=1) 144 | layers += [pool5, conv6, 145 | nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)] 146 | return layers 147 | 148 | 149 | def add_extras(cfg, i, batch_norm=False): 150 | # Extra layers added to VGG for feature scaling 151 | layers = [] 152 | in_channels = i 153 | flag = False 154 | for k, v in enumerate(cfg): 155 | if in_channels != 'S': 156 | if v == 'S': 157 | layers += [nn.Conv2d(in_channels, cfg[k + 1], 158 | kernel_size=(1, 3)[flag], stride=2, padding=1)] 159 | else: 160 | layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])] 161 | flag = not flag 162 | in_channels = v 163 | return layers 164 | 165 | 166 | def multibox(vgg, extra_layers, cfg, num_classes): 167 | loc_layers = [] 168 | conf_layers = [] 169 | vgg_source = [24, -2] 170 | for k, v in enumerate(vgg_source): 171 | loc_layers += [nn.Conv2d(vgg[v].out_channels, 172 | cfg[k] * 4, kernel_size=3, padding=1)] 173 | conf_layers += [nn.Conv2d(vgg[v].out_channels, 174 | cfg[k] * num_classes, kernel_size=3, padding=1)] 175 | for k, v in enumerate(extra_layers[1::2], 2): 176 | loc_layers += [nn.Conv2d(v.out_channels, cfg[k] 177 | * 4, kernel_size=3, padding=1)] 178 | conf_layers += [nn.Conv2d(v.out_channels, cfg[k] 179 | * num_classes, kernel_size=3, padding=1)] 180 | return vgg, extra_layers, (loc_layers, conf_layers) 181 | 182 | 183 | base = { 184 | '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', 185 | 512, 512, 512], 186 | '512': [], 187 | } 188 | extras = { 189 | '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256], 190 | '512': [], 191 | } 192 | mbox = { 193 | '300': [4, 6, 6, 6, 4, 4], # number of boxes per feature map location 194 | '512': [], 195 | } 196 | 197 | 198 | def build_ssd(size=300, num_classes=21): 199 | 200 | if size != 300: 201 | print("Error: Sorry only SSD300 is supported currently!") 202 | return 203 | 204 | return SSD(*multibox(vgg(base[str(size)], 3), 205 | add_extras(extras[str(size)], 1024), 206 | mbox[str(size)], num_classes), num_classes) 207 | -------------------------------------------------------------------------------- /test-ucf24.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2017, Gurkirt Singh 3 | 4 | This code and is available 5 | under the terms of MIT License provided in LICENSE. 6 | Please retain this notice and LICENSE if you use 7 | this file (or any portion of it) in your project. 8 | --------------------------------------------------------- 9 | """ 10 | 11 | import torch 12 | import torch.backends.cudnn as cudnn 13 | from torch.autograd import Variable 14 | from data import AnnotationTransform, UCF24Detection, BaseTransform, CLASSES, detection_collate, v2 15 | from ssd import build_ssd 16 | import torch.utils.data as data 17 | from layers.box_utils import decode, nms 18 | from utils.evaluation import evaluate_detections 19 | import os, time 20 | import argparse 21 | import numpy as np 22 | import pickle 23 | import scipy.io as sio # to save detection as mat files 24 | cfg = v2 25 | 26 | def str2bool(v): 27 | return v.lower() in ("yes", "true", "t", "1") 28 | 29 | parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training') 30 | parser.add_argument('--version', default='v2', help='conv11_2(v2) or pool6(v1) as last layer') 31 | parser.add_argument('--basenet', default='vgg16_reducedfc.pth', help='pretrained base model') 32 | parser.add_argument('--dataset', default='ucf24', help='pretrained base model') 33 | parser.add_argument('--ssd_dim', default=300, type=int, help='Input Size for SSD') # only support 300 now 34 | parser.add_argument('--input_type', default='rgb', type=str, help='INput tyep default rgb can take flow as well') 35 | parser.add_argument('--jaccard_threshold', default=0.5, type=float, help='Min Jaccard index for matching') 36 | parser.add_argument('--batch_size', default=32, type=int, help='Batch size for training') 37 | parser.add_argument('--resume', default=None, type=str, help='Resume from checkpoint') 38 | parser.add_argument('--num_workers', default=0, type=int, help='Number of workers used in dataloading') 39 | parser.add_argument('--eval_iter', default='120000,', type=str, help='Number of training iterations') 40 | parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda to train model') 41 | parser.add_argument('--ngpu', default=1, type=str2bool, help='Use cuda to train model') 42 | parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float, help='initial learning rate') 43 | parser.add_argument('--visdom', default=False, type=str2bool, help='Use visdom to for loss visualization') 44 | parser.add_argument('--data_root', default='/mnt/mars-fast/datasets/', help='Location of VOC root directory') 45 | parser.add_argument('--save_root', default='/mnt/mars-gamma/datasets/', help='Location to save checkpoint models') 46 | parser.add_argument('--iou_thresh', default=0.5, type=float, help='Evaluation threshold') 47 | parser.add_argument('--conf_thresh', default=0.01, type=float, help='Confidence threshold for evaluation') 48 | parser.add_argument('--nms_thresh', default=0.45, type=float, help='NMS threshold') 49 | parser.add_argument('--topk', default=20, type=int, help='topk for evaluation') 50 | 51 | args = parser.parse_args() 52 | 53 | if args.input_type != 'rgb': 54 | args.conf_thresh = 0.05 55 | 56 | if args.cuda and torch.cuda.is_available(): 57 | torch.set_default_tensor_type('torch.cuda.FloatTensor') 58 | else: 59 | torch.set_default_tensor_type('torch.FloatTensor') 60 | 61 | 62 | def test_net(net, save_root, exp_name, input_type, dataset, iteration, num_classes, thresh=0.5 ): 63 | """ Test a SSD network on an Action image database. """ 64 | 65 | val_data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, 66 | shuffle=False, collate_fn=detection_collate, pin_memory=True) 67 | image_ids = dataset.ids 68 | save_ids = [] 69 | val_step = 250 70 | num_images = len(dataset) 71 | video_list = dataset.video_list 72 | det_boxes = [[] for _ in range(len(CLASSES))] 73 | gt_boxes = [] 74 | print_time = True 75 | batch_iterator = None 76 | count = 0 77 | torch.cuda.synchronize() 78 | ts = time.perf_counter() 79 | num_batches = len(val_data_loader) 80 | det_file = save_root + 'cache/' + exp_name + '/detection-'+str(iteration).zfill(6)+'.pkl' 81 | print('Number of images ', len(dataset),' number of batchs', num_batches) 82 | frame_save_dir = save_root+'detections/CONV-'+input_type+'-'+args.listid+'-'+str(iteration).zfill(6)+'/' 83 | print('\n\n\nDetections will be store in ',frame_save_dir,'\n\n') 84 | with torch.no_grad(): 85 | for val_itr in range(len(val_data_loader)): 86 | if not batch_iterator: 87 | batch_iterator = iter(val_data_loader) 88 | 89 | torch.cuda.synchronize() 90 | t1 = time.perf_counter() 91 | 92 | images, targets, img_indexs = next(batch_iterator) 93 | batch_size = images.size(0) 94 | height, width = images.size(2), images.size(3) 95 | 96 | if args.cuda: 97 | images = images.cuda() 98 | output = net(images) 99 | 100 | loc_data = output[0] 101 | conf_preds = output[1] 102 | prior_data = output[2] 103 | 104 | if print_time and val_itr%val_step == 0: 105 | torch.cuda.synchronize() 106 | tf = time.perf_counter() 107 | print('Forward Time {:0.3f}'.format(tf - t1)) 108 | for b in range(batch_size): 109 | gt = targets[b].numpy() 110 | gt[:, 0] *= width 111 | gt[:, 2] *= width 112 | gt[:, 1] *= height 113 | gt[:, 3] *= height 114 | gt_boxes.append(gt) 115 | decoded_boxes = decode(loc_data[b].data, prior_data.data, cfg['variance']).clone() 116 | conf_scores = net.softmax(conf_preds[b]).data.clone() 117 | index = img_indexs[b] 118 | annot_info = image_ids[index] 119 | 120 | frame_num = annot_info[1]; video_id = annot_info[0]; videoname = video_list[video_id] 121 | output_dir = frame_save_dir+videoname 122 | if not os.path.isdir(output_dir): 123 | os.makedirs(output_dir) 124 | 125 | output_file_name = output_dir+'/{:05d}.mat'.format(int(frame_num)) 126 | save_ids.append(output_file_name) 127 | sio.savemat(output_file_name, mdict={'scores':conf_scores.cpu().numpy(),'loc':decoded_boxes.cpu().numpy()}) 128 | 129 | for cl_ind in range(1, num_classes): 130 | scores = conf_scores[:, cl_ind].squeeze() 131 | c_mask = scores.gt(args.conf_thresh) # greater than minmum threshold 132 | scores = scores[c_mask].squeeze() 133 | # print('scores size',scores.size()) 134 | if scores.dim() == 0: 135 | # print(len(''), ' dim ==0 ') 136 | det_boxes[cl_ind - 1].append(np.asarray([])) 137 | continue 138 | boxes = decoded_boxes.clone() 139 | l_mask = c_mask.unsqueeze(1).expand_as(boxes) 140 | boxes = boxes[l_mask].view(-1, 4) 141 | # idx of highest scoring and non-overlapping boxes per class 142 | ids, counts = nms(boxes, scores, args.nms_thresh, args.topk) # idsn - ids after nms 143 | scores = scores[ids[:counts]].cpu().numpy() 144 | boxes = boxes[ids[:counts]].cpu().numpy() 145 | # print('boxes sahpe',boxes.shape) 146 | boxes[:, 0] *= width 147 | boxes[:, 2] *= width 148 | boxes[:, 1] *= height 149 | boxes[:, 3] *= height 150 | 151 | for ik in range(boxes.shape[0]): 152 | boxes[ik, 0] = max(0, boxes[ik, 0]) 153 | boxes[ik, 2] = min(width, boxes[ik, 2]) 154 | boxes[ik, 1] = max(0, boxes[ik, 1]) 155 | boxes[ik, 3] = min(height, boxes[ik, 3]) 156 | 157 | cls_dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=True) 158 | det_boxes[cl_ind - 1].append(cls_dets) 159 | 160 | count += 1 161 | if val_itr%val_step == 0: 162 | torch.cuda.synchronize() 163 | te = time.perf_counter() 164 | print('im_detect: {:d}/{:d} time taken {:0.3f}'.format(count, num_images, te - ts)) 165 | torch.cuda.synchronize() 166 | ts = time.perf_counter() 167 | if print_time and val_itr%val_step == 0: 168 | torch.cuda.synchronize() 169 | te = time.perf_counter() 170 | print('NMS stuff Time {:0.3f}'.format(te - tf)) 171 | print('Evaluating detections for itration number ', iteration) 172 | 173 | #Save detection after NMS along with GT 174 | with open(det_file, 'wb') as f: 175 | pickle.dump([gt_boxes, det_boxes, save_ids], f, pickle.HIGHEST_PROTOCOL) 176 | 177 | return evaluate_detections(gt_boxes, det_boxes, CLASSES, iou_thresh=thresh) 178 | 179 | 180 | def main(): 181 | 182 | means = (104, 117, 123) # only support voc now 183 | 184 | exp_name = 'CONV-SSD-{}-{}-bs-{}-{}-lr-{:05d}'.format(args.dataset, args.input_type, 185 | args.batch_size, args.basenet[:-14], int(args.lr * 100000)) 186 | 187 | args.save_root += args.dataset+'/' 188 | args.data_root += args.dataset+'/' 189 | args.listid = '01' ## would be usefull in JHMDB-21 190 | print('Exp name', exp_name, args.listid) 191 | for iteration in [int(itr) for itr in args.eval_iter.split(',')]: 192 | log_file = open(args.save_root + 'cache/' + exp_name + "/testing-{:d}.log".format(iteration), "w", 1) 193 | log_file.write(exp_name + '\n') 194 | trained_model_path = args.save_root + 'cache/' + exp_name + '/ssd300_ucf24_' + repr(iteration) + '.pth' 195 | log_file.write(trained_model_path+'\n') 196 | num_classes = len(CLASSES) + 1 #7 +1 background 197 | net = build_ssd(300, num_classes) # initialize SSD 198 | net.load_state_dict(torch.load(trained_model_path)) 199 | net.eval() 200 | if args.cuda: 201 | net = net.cuda() 202 | cudnn.benchmark = True 203 | print('Finished loading model %d !' % iteration) 204 | # Load dataset 205 | dataset = UCF24Detection(args.data_root, 'test', BaseTransform(args.ssd_dim, means), AnnotationTransform(), 206 | input_type=args.input_type, full_test=True) 207 | # evaluation 208 | torch.cuda.synchronize() 209 | tt0 = time.perf_counter() 210 | log_file.write('Testing net \n') 211 | mAP, ap_all, ap_strs = test_net(net, args.save_root, exp_name, args.input_type, dataset, iteration, num_classes) 212 | for ap_str in ap_strs: 213 | print(ap_str) 214 | log_file.write(ap_str + '\n') 215 | ptr_str = '\nMEANAP:::=>' + str(mAP) + '\n' 216 | print(ptr_str) 217 | log_file.write(ptr_str) 218 | 219 | torch.cuda.synchronize() 220 | print('Complete set time {:0.2f}'.format(time.perf_counter() - tt0)) 221 | log_file.close() 222 | 223 | if __name__ == '__main__': 224 | main() 225 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | class AverageMeter(object): 2 | """Computes and stores the average and current value""" 3 | def __init__(self): 4 | self.reset() 5 | 6 | def reset(self): 7 | self.val = 0 8 | self.avg = 0 9 | self.sum = 0 10 | self.count = 0 11 | 12 | def update(self, val, n=1): 13 | self.val = val 14 | self.sum += val * n 15 | self.count += n 16 | self.avg = self.sum / self.count -------------------------------------------------------------------------------- /utils/evaluation.py: -------------------------------------------------------------------------------- 1 | 2 | """ Evaluation code based on VOC protocol 3 | 4 | Original author: Ellis Brown, Max deGroot for VOC dataset 5 | https://github.com/amdegroot/ssd.pytorch 6 | 7 | Updated by Gurkirt Singh for ucf101-24 dataset 8 | 9 | """ 10 | 11 | import os 12 | import numpy as np 13 | 14 | def voc_ap(rec, prec, use_07_metric=False): 15 | """ ap = voc_ap(rec, prec, [use_07_metric]) 16 | Compute VOC AP given precision and recall. 17 | If use_07_metric is true, uses the 18 | VOC 07 11 point method (default:False). 19 | """ 20 | # print('voc_ap() - use_07_metric:=' + str(use_07_metric)) 21 | if use_07_metric: 22 | # 11 point metric 23 | ap = 0. 24 | for t in np.arange(0., 1.1, 0.1): 25 | if np.sum(rec >= t) == 0: 26 | p = 0 27 | else: 28 | p = np.max(prec[rec >= t]) 29 | ap = ap + p / 11. 30 | else: 31 | # correct AP calculation 32 | # first append sentinel values at the end 33 | mrec = np.concatenate(([0.], rec, [1.])) 34 | mpre = np.concatenate(([0.], prec, [0.])) 35 | 36 | # compute the precision envelope 37 | for i in range(mpre.size - 1, 0, -1): 38 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 39 | 40 | # to calculate area under PR curve, look for points 41 | # where X axis (recall) changes value 42 | i = np.where(mrec[1:] != mrec[:-1])[0] 43 | 44 | # and sum (\Delta recall) * prec 45 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 46 | return ap 47 | 48 | 49 | def get_gt_of_cls(gt_boxes, cls): 50 | cls_gt_boxes = [] 51 | for i in range(len(gt_boxes)): 52 | if gt_boxes[i,-1] == cls: 53 | cls_gt_boxes.append(gt_boxes[i, :-1]) 54 | return np.asarray(cls_gt_boxes) 55 | 56 | 57 | def compute_iou(cls_gt_boxes, box): 58 | ious = np.zeros(cls_gt_boxes.shape[0]) 59 | 60 | for m in range(ious.shape[0]): 61 | gtbox = cls_gt_boxes[m] 62 | 63 | xmin = max(gtbox[0],box[0]) 64 | ymin = max(gtbox[1], box[1]) 65 | xmax = min(gtbox[2], box[2]) 66 | ymax = min(gtbox[3], box[3]) 67 | iw = np.maximum(xmax - xmin, 0.) 68 | ih = np.maximum(ymax - ymin, 0.) 69 | if iw>0 and ih>0: 70 | intsc = iw*ih 71 | else: 72 | intsc = 0.0 73 | # print (intsc) 74 | union = (gtbox[2] - gtbox[0]) * (gtbox[3] - gtbox[1]) + (box[2] - box[0]) * (box[3] - box[1]) - intsc 75 | ious[m] = intsc/union 76 | 77 | return ious 78 | 79 | def evaluate_detections(gt_boxes, det_boxes, CLASSES=[], iou_thresh=0.5): 80 | 81 | ap_strs = [] 82 | num_frames = len(gt_boxes) 83 | print('Evaluating for ', num_frames, 'frames') 84 | ap_all = np.zeros(len(CLASSES), dtype=np.float32) 85 | for cls_ind, cls in enumerate(CLASSES): # loop over each class 'cls' 86 | scores = np.zeros(num_frames * 220) 87 | istp = np.zeros(num_frames * 220) 88 | det_count = 0 89 | num_postives = 0.0 90 | for nf in range(num_frames): # loop over each frame 'nf' 91 | # if len(gt_boxes[nf])>0 and len(det_boxes[cls_ind][nf]): 92 | frame_det_boxes = np.copy(det_boxes[cls_ind][nf]) # get frame detections for class cls in nf 93 | cls_gt_boxes = get_gt_of_cls(np.copy(gt_boxes[nf]), cls_ind) # get gt boxes for class cls in nf frame 94 | num_postives += cls_gt_boxes.shape[0] 95 | if frame_det_boxes.shape[0]>0: # check if there are dection for class cls in nf frame 96 | argsort_scores = np.argsort(-frame_det_boxes[:,-1]) # sort in descending order 97 | for i, k in enumerate(argsort_scores): # start from best scoring detection of cls to end 98 | box = frame_det_boxes[k, :-1] # detection bounfing box 99 | score = frame_det_boxes[k,-1] # detection score 100 | ispositive = False # set ispostive to false every time 101 | if cls_gt_boxes.shape[0]>0: # we can only find a postive detection 102 | # if there is atleast one gt bounding for class cls is there in frame nf 103 | iou = compute_iou(cls_gt_boxes, box) # compute IOU between remaining gt boxes 104 | # and detection boxes 105 | maxid = np.argmax(iou) # get the max IOU window gt index 106 | if iou[maxid] >= iou_thresh: # check is max IOU is greater than detection threshold 107 | ispositive = True # if yes then this is ture positive detection 108 | cls_gt_boxes = np.delete(cls_gt_boxes, maxid, 0) # remove assigned gt box 109 | scores[det_count] = score # fill score array with score of current detection 110 | if ispositive: 111 | istp[det_count] = 1 # set current detection index (det_count) 112 | # to 1 if it is true postive example 113 | det_count += 1 114 | if num_postives<1: 115 | num_postives =1 116 | scores = scores[:det_count] 117 | istp = istp[:det_count] 118 | argsort_scores = np.argsort(-scores) # sort in descending order 119 | istp = istp[argsort_scores] # reorder istp's on score sorting 120 | fp = np.cumsum(istp == 0) # get false positives 121 | tp = np.cumsum(istp == 1) # get true positives 122 | fp = fp.astype(np.float64) 123 | tp = tp.astype(np.float64) 124 | recall = tp / float(num_postives) # compute recall 125 | precision = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) # compute precision 126 | cls_ap = voc_ap(recall, precision) # compute average precision using voc2007 metric 127 | ap_all[cls_ind] = cls_ap 128 | # print(cls_ind,CLASSES[cls_ind], cls_ap) 129 | ap_str = str(CLASSES[cls_ind]) + ' : ' + str(num_postives) + ' : ' + str(det_count) + ' : ' + str(cls_ap) 130 | ap_strs.append(ap_str) 131 | 132 | # print ('mean ap ', np.mean(ap_all)) 133 | return np.mean(ap_all), ap_all, ap_strs 134 | 135 | 136 | def save_detection_framewise(det_boxes, image_ids, iteration): 137 | det_save_dir = '/mnt/mars-beta/gur-workspace/use-ssd-data/UCF101/detections/RGB-01-{:06d}/'.format(iteration) 138 | print('Saving detections to', det_save_dir) 139 | num_images = len(image_ids) 140 | for idx in range(num_images): 141 | img_id = image_ids[idx] 142 | save_path = det_save_dir+img_id[:-5] 143 | if not os.path.isdir(save_path): 144 | os.system('mkdir -p '+save_path) 145 | fid = open(det_save_dir+img_id+'.txt','w') 146 | for cls_ind in range(len(det_boxes)): 147 | frame_det_boxes = det_boxes[cls_ind][idx] 148 | for d in range(len(frame_det_boxes)): 149 | line = str(cls_ind+1) 150 | for k in range(5): 151 | line += ' {:f}'.format(frame_det_boxes[d,k]) 152 | line += '\n' 153 | fid.write(line) 154 | fid.close() 155 | 156 | --------------------------------------------------------------------------------