├── .gitignore
├── LICENSE
├── README.md
├── data
    ├── __init__.py
    ├── config.py
    └── ucf24.py
├── layers
    ├── __init__.py
    ├── box_utils.py
    ├── functions
    │   ├── __init__.py
    │   └── prior_box.py
    └── modules
    │   ├── __init__.py
    │   ├── l2norm.py
    │   └── multibox_loss.py
├── matlab-online-display
    ├── .gitignore
    ├── actionpath
    │   ├── actionPaths.m
    │   ├── fusedActionPaths.m
    │   ├── incremental_linking.m
    │   └── nms.m
    ├── display01.txt
    ├── eval
    │   ├── compute_spatio_temporal_iou.m
    │   ├── get_PR_curve.m
    │   └── xVOCap.m
    ├── frameAp.m
    ├── gentube
    │   ├── convert2eval.m
    │   ├── dpEM_max.m
    │   ├── mydpEM_max.m
    │   ├── parActionPathSmoother.m
    │   └── readALLactionPaths.m
    ├── myI01onlineTubes.m
    ├── myI02genFusedTubes.m
    └── utils
    │   ├── createdires.m
    │   ├── initDatasetOpts.m
    │   └── initDatasetOptsFused.m
├── online-tubes
    ├── .gitignore
    ├── I01onlineTubes.m
    ├── I02genFusedTubes.m
    ├── actionpath
    │   ├── actionPaths.m
    │   ├── fusedActionPaths.m
    │   ├── incremental_linking.m
    │   └── nms.m
    ├── eval
    │   ├── compute_spatio_temporal_iou.m
    │   ├── get_PR_curve.m
    │   └── xVOCap.m
    ├── frameAp.m
    ├── gentube
    │   ├── PARactionPathSmoother.m
    │   ├── convert2eval.m
    │   ├── dpEM_max.m
    │   └── readALLactionPaths.m
    └── utils
    │   ├── createdires.m
    │   ├── initDatasetOpts.m
    │   └── initDatasetOptsFused.m
├── ssd.py
├── test-ucf24.py
├── train-ucf24.py
└── utils
    ├── __init__.py
    ├── augmentations.py
    └── evaluation.py


/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | *.log
3 | *.pyc
4 | *.pyo
5 | __pycache__/
6 | .idea/
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Gurkirt Singh
 4 | This is an adaption of Max deGroot, Ellis Brown originl code of SSD for VOC dataset
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 
24 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
 1 | #from .voc0712 import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES
 2 | from .ucf24 import UCF24Detection, AnnotationTransform, detection_collate, CLASSES
 3 | from .config import *
 4 | import cv2
 5 | import numpy as np
 6 | 
 7 | 
 8 | def base_transform(image, size, mean):
 9 |     x = cv2.resize(image, (size, size)).astype(np.float32)
10 |     # x = cv2.resize(np.array(image), (size, size)).astype(np.float32)
11 |     x -= mean
12 |     x = x.astype(np.float32)
13 |     return x
14 | 
15 | 
16 | class BaseTransform:
17 |     def __init__(self, size, mean):
18 |         self.size = size
19 |         self.mean = np.array(mean, dtype=np.float32)
20 | 
21 |     def __call__(self, image, boxes=None, labels=None):
22 |         return base_transform(image, self.size, self.mean), boxes, labels
23 | 


--------------------------------------------------------------------------------
/data/config.py:
--------------------------------------------------------------------------------
 1 | # config.py
 2 | """  SSD network configs
 3 | 
 4 | Original author: Ellis Brown, Max deGroot for VOC dataset
 5 | https://github.com/amdegroot/ssd.pytorch
 6 | 
 7 | """
 8 | 
 9 | #SSD300 CONFIGS
10 | # newer version: use additional conv11_2 layer as last layer before multibox layers
11 | v2 = {
12 |     'feature_maps' : [38, 19, 10, 5, 3, 1],
13 | 
14 |     'min_dim' : 300,
15 | 
16 |     'steps' : [8, 16, 32, 64, 100, 300],
17 | 
18 |     'min_sizes' : [30, 60, 111, 162, 213, 264],
19 | 
20 |     'max_sizes' : [60, 111, 162, 213, 264, 315],
21 | 
22 |     # 'aspect_ratios' : [[2, 1/2], [2, 1/2, 3, 1/3], [2, 1/2, 3, 1/3],
23 |     #                    [2, 1/2, 3, 1/3], [2, 1/2], [2, 1/2]],
24 |     'aspect_ratios' : [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
25 | 
26 |     'variance' : [0.1, 0.2],
27 | 
28 |     'clip' : True,
29 | 
30 |     'name' : 'v2',
31 | }
32 | 
33 | # use average pooling layer as last layer before multibox layers
34 | v1 = {
35 |     'feature_maps' : [38, 19, 10, 5, 3, 1],
36 | 
37 |     'min_dim' : 300,
38 | 
39 |     'steps' : [8, 16, 32, 64, 100, 300],
40 | 
41 |     'min_sizes' : [30, 60, 114, 168, 222, 276],
42 | 
43 |     'max_sizes' : [-1, 114, 168, 222, 276, 330],
44 | 
45 |     # 'aspect_ratios' : [[2], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]],
46 |     'aspect_ratios' : [[1,1,2,1/2],[1,1,2,1/2,3,1/3],[1,1,2,1/2,3,1/3],
47 |                         [1,1,2,1/2,3,1/3],[1,1,2,1/2,3,1/3],[1,1,2,1/2,3,1/3]],
48 | 
49 |     'variance' : [0.1, 0.2],
50 | 
51 |     'clip' : True,
52 | 
53 |     'name' : 'v1',
54 | }
55 | 


--------------------------------------------------------------------------------
/data/ucf24.py:
--------------------------------------------------------------------------------
  1 | """UCF24 Dataset Classes
  2 | 
  3 | Author: Gurkirt Singh for ucf101-24 dataset
  4 | 
  5 | """
  6 | 
  7 | import os
  8 | import os.path
  9 | import torch
 10 | import torch.utils.data as data
 11 | import cv2, pickle
 12 | import numpy as np
 13 | 
 14 | CLASSES = (  # always index 0
 15 |         'Basketball', 'BasketballDunk', 'Biking', 'CliffDiving', 'CricketBowling', 'Diving', 'Fencing',
 16 |         'FloorGymnastics', 'GolfSwing', 'HorseRiding', 'IceDancing', 'LongJump', 'PoleVault', 'RopeClimbing',
 17 |         'SalsaSpin','SkateBoarding', 'Skiing', 'Skijet', 'SoccerJuggling',
 18 |         'Surfing', 'TennisSwing', 'TrampolineJumping', 'VolleyballSpiking', 'WalkingWithDog')
 19 | 
 20 | 
 21 | class AnnotationTransform(object):
 22 |     """
 23 |     Same as original
 24 |     Transforms a VOC annotation into a Tensor of bbox coords and label index
 25 |     Initilized with a dictionary lookup of classnames to indexes
 26 |     Arguments:
 27 |         class_to_ind (dict, optional): dictionary lookup of classnames -> indexes
 28 |             (default: alphabetic indexing of UCF24's 24 classes)
 29 |         keep_difficult (bool, optional): keep difficult instances or not
 30 |             (default: False)
 31 |         height (int): height
 32 |         width (int): width
 33 |     """
 34 | 
 35 |     def __init__(self, class_to_ind=None, keep_difficult=False):
 36 |         self.class_to_ind = class_to_ind or dict(
 37 |             zip(CLASSES, range(len(CLASSES))))
 38 |         self.ind_to_class = dict(zip(range(len(CLASSES)),CLASSES))
 39 | 
 40 |     def __call__(self, bboxs, labels, width, height):
 41 |         res = []
 42 |         for t in range(len(labels)):
 43 |             bbox = bboxs[t,:]
 44 |             label = labels[t]
 45 |             '''pts = ['xmin', 'ymin', 'xmax', 'ymax']'''
 46 |             bndbox = []
 47 |             for i in range(4):
 48 |                 cur_pt = max(0,int(bbox[i]) - 1)
 49 |                 scale =  width if i % 2 == 0 else height
 50 |                 cur_pt = min(scale, int(bbox[i]))
 51 |                 cur_pt = float(cur_pt) / scale
 52 |                 bndbox.append(cur_pt)
 53 |             bndbox.append(label)
 54 |             res += [bndbox]  # [xmin, ymin, xmax, ymax, label_ind]
 55 |             # img_id = target.find('filename').text[:-4]
 56 |         return res  # [[xmin, ymin, xmax, ymax, label_ind], ... ]
 57 | 
 58 | 
 59 | def readsplitfile(splitfile):
 60 |     with open(splitfile, 'r') as f:
 61 |         temptrainvideos = f.readlines()
 62 |     trainvideos = []
 63 |     for vid in temptrainvideos:
 64 |         vid = vid.rstrip('\n')
 65 |         trainvideos.append(vid)
 66 |     return trainvideos
 67 | 
 68 | 
 69 | def make_lists(rootpath, imgtype, split=1, fulltest=False):
 70 |     imagesDir = rootpath + imgtype + '/'
 71 |     splitfile = rootpath + 'splitfiles/trainlist{:02d}.txt'.format(split)
 72 |     trainvideos = readsplitfile(splitfile)
 73 |     trainlist = []
 74 |     testlist = []
 75 | 
 76 |     with open(rootpath + 'splitfiles/pyannot.pkl','rb') as fff:
 77 |         database = pickle.load(fff)
 78 | 
 79 |     train_action_counts = np.zeros(len(CLASSES), dtype=np.int32)
 80 |     test_action_counts = np.zeros(len(CLASSES), dtype=np.int32)
 81 | 
 82 |     #4500ratios = np.asarray([1.1, 0.8, 4.7, 1.4, 0.9, 2.6, 2.2, 3.0, 3.0, 5.0, 6.2, 2.7,
 83 |     #                     3.5, 3.1, 4.3, 2.5, 4.5, 3.4, 6.7, 3.6, 1.6, 3.4, 0.6, 4.3])
 84 |     ratios = np.asarray([1.03, 0.75, 4.22, 1.32, 0.8, 2.36, 1.99, 2.66, 2.68, 4.51, 5.56, 2.46, 3.17, 2.76, 3.89, 2.28, 4.01, 3.08, 6.06, 3.28, 1.51, 3.05, 0.6, 3.84])
 85 |     #ratios = np.ones_like(ratios) #TODO:uncomment this line and line 155, 156 to compute new ratios might be useful for JHMDB21
 86 |     video_list = []
 87 |     for vid, videoname in enumerate(sorted(database.keys())):
 88 |         video_list.append(videoname)
 89 |         actidx = database[videoname]['label']
 90 |         istrain = True
 91 |         step = ratios[actidx]
 92 |         numf = database[videoname]['numf']
 93 |         lastf = numf-1
 94 |         if videoname not in trainvideos:
 95 |             istrain = False
 96 |             step = max(1, ratios[actidx])*3
 97 |         if fulltest:
 98 |             step = 1
 99 |             lastf = numf
100 | 
101 |         annotations = database[videoname]['annotations']
102 |         num_tubes = len(annotations)
103 | 
104 |         tube_labels = np.zeros((numf,num_tubes),dtype=np.int16) # check for each tube if present in
105 |         tube_boxes = [[[] for _ in range(num_tubes)] for _ in range(numf)]
106 |         for tubeid, tube in enumerate(annotations):
107 |             # print('numf00', numf, tube['sf'], tube['ef'])
108 |             for frame_id, frame_num in enumerate(np.arange(tube['sf'], tube['ef'], 1)): # start of the tube to end frame of the tube
109 |                 label = tube['label']
110 |                 assert actidx == label, 'Tube label and video label should be same'
111 |                 box = tube['boxes'][frame_id, :]  # get the box as an array
112 |                 box = box.astype(np.float32)
113 |                 box[2] += box[0]  #convert width to xmax
114 |                 box[3] += box[1]  #converst height to ymax
115 |                 tube_labels[frame_num, tubeid] = 1 #label+1  # change label in tube_labels matrix to 1 form 0
116 |                 tube_boxes[frame_num][tubeid] = box  # put the box in matrix of lists
117 | 
118 |         possible_frame_nums = np.arange(0, lastf, step)
119 |         # print('numf',numf,possible_frame_nums[-1])
120 |         for frame_num in possible_frame_nums: # loop from start to last possible frame which can make a legit sequence
121 |             frame_num = int(frame_num)
122 |             check_tubes = tube_labels[frame_num,:]
123 | 
124 |             if np.sum(check_tubes)>0:  # check if there aren't any semi overlapping tubes
125 |                 all_boxes = []
126 |                 labels = []
127 |                 image_name = imagesDir + videoname+'/{:05d}.jpg'.format(frame_num+1)
128 |                 #label_name = rootpath + 'labels/' + videoname + '/{:05d}.txt'.format(frame_num + 1)
129 |                 # assert os.path.isfile(image_name), 'Image does not exist'+image_name
130 |                 for tubeid, tube in enumerate(annotations):
131 |                     label = tube['label']
132 |                     if tube_labels[frame_num, tubeid]>0:
133 |                         box = np.asarray(tube_boxes[frame_num][tubeid])
134 |                         all_boxes.append(box)
135 |                         labels.append(label)
136 | 
137 |                 if istrain: # if it is training video
138 |                     trainlist.append([vid, frame_num+1, np.asarray(labels), np.asarray(all_boxes)])
139 |                     train_action_counts[actidx] += 1 #len(labels)
140 |                 else: # if test video and has micro-tubes with GT
141 |                     testlist.append([vid, frame_num+1, np.asarray(labels), np.asarray(all_boxes)])
142 |                     test_action_counts[actidx] += 1 #len(labels)
143 |             elif fulltest and not istrain: # if test video with no ground truth and fulltest is trues
144 |                 testlist.append([vid, frame_num+1, np.asarray([9999]), np.zeros((1,4))])
145 | 
146 |     for actidx, act_count in enumerate(train_action_counts): # just to see the distribution of train and test sets
147 |         print('train {:05d} test {:05d} action {:02d} {:s}'.format(act_count, test_action_counts[actidx] , int(actidx), CLASSES[actidx]))
148 | 
149 |     newratios = train_action_counts/5000
150 |     #print('new   ratios', newratios)
151 |     line = '['
152 |     for r in newratios:
153 |         line +='{:0.2f}, '.format(r)
154 |     print(line+']')
155 |     print('Trainlistlen', len(trainlist), ' testlist ', len(testlist))
156 | 
157 |     return trainlist, testlist, video_list
158 | 
159 | 
160 | class UCF24Detection(data.Dataset):
161 |     """UCF24 Action Detection Dataset
162 |     to access input images and target which is annotation
163 |     """
164 | 
165 |     def __init__(self, root, image_set, transform=None, target_transform=None,
166 |                  dataset_name='ucf24', input_type='rgb', full_test=False):
167 | 
168 |         self.input_type = input_type
169 |         input_type = input_type+'-images'
170 |         self.root = root
171 |         self.CLASSES = CLASSES
172 |         self.image_set = image_set
173 |         self.transform = transform
174 |         self.target_transform = target_transform
175 |         self.name = dataset_name
176 |         self._annopath = os.path.join(root, 'labels/', '%s.txt')
177 |         self._imgpath = os.path.join(root, input_type)
178 |         self.ids = list()
179 | 
180 |         trainlist, testlist, video_list = make_lists(root, input_type, split=1, fulltest=full_test)
181 |         self.video_list = video_list
182 |         if self.image_set == 'train':
183 |             self.ids = trainlist
184 |         elif self.image_set == 'test':
185 |             self.ids = testlist
186 |         else:
187 |             print('spacify correct subset ')
188 | 
189 |     def __getitem__(self, index):
190 |         im, gt, img_index = self.pull_item(index)
191 | 
192 |         return im, gt, img_index
193 | 
194 |     def __len__(self):
195 |         return len(self.ids)
196 | 
197 |     def pull_item(self, index):
198 |         annot_info = self.ids[index]
199 |         frame_num = annot_info[1]
200 |         video_id = annot_info[0]
201 |         videoname = self.video_list[video_id]
202 |         img_name = self._imgpath + '/{:s}/{:05d}.jpg'.format(videoname, frame_num)
203 |         # print(img_name)
204 |         img = cv2.imread(img_name)
205 |         height, width, channels = img.shape
206 | 
207 |         target = self.target_transform(annot_info[3], annot_info[2], width, height)
208 | 
209 |         if self.transform is not None:
210 |             target = np.array(target)
211 |             img, boxes, labels = self.transform(img, target[:, :4], target[:, 4])
212 |             img = img[:, :, (2, 1, 0)]
213 |             # img = img.transpose(2, 0, 1)
214 |             target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
215 |         # print(height, width,target)
216 |         return torch.from_numpy(img).permute(2, 0, 1), target, index
217 |         # return torch.from_numpy(img), target, height, width
218 | 
219 | 
220 | def detection_collate(batch):
221 |     """Custom collate fn for dealing with batches of images that have a different
222 |     number of associated object annotations (bounding boxes).
223 |     Arguments:
224 |         batch: (tuple) A tuple of tensor images and lists of annotations
225 |     Return:
226 |         A tuple containing:
227 |             1) (tensor) batch of images stacked on their 0 dim
228 |             2) (list of tensors) annotations for a given image are stacked on 0 dim
229 |     """
230 | 
231 |     targets = []
232 |     imgs = []
233 |     image_ids = []
234 |     for sample in batch:
235 |         imgs.append(sample[0])
236 |         targets.append(torch.FloatTensor(sample[1]))
237 |         image_ids.append(sample[2])
238 |     return torch.stack(imgs, 0), targets, image_ids
239 | 


--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 | 


--------------------------------------------------------------------------------
/layers/box_utils.py:
--------------------------------------------------------------------------------
  1 | """ Bounding box utilities
  2 | 
  3 | Original author: Ellis Brown, Max deGroot for VOC dataset
  4 | https://github.com/amdegroot/ssd.pytorch
  5 | 
  6 | """
  7 | 
  8 | import torch
  9 | 
 10 | def point_form(boxes):
 11 |     """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
 12 |     representation for comparison to point form ground truth data.
 13 |     Args:
 14 |         boxes: (tensor) center-size default boxes from priorbox layers.
 15 |     Return:
 16 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 17 |     """
 18 |     return torch.cat((boxes[:, :2] - boxes[:, 2:]/2,     # xmin, ymin
 19 |                      boxes[:, :2] + boxes[:, 2:]/2), 1)  # xmax, ymax
 20 | 
 21 | 
 22 | def center_size(boxes):
 23 |     """ Convert prior_boxes to (cx, cy, w, h)
 24 |     representation for comparison to center-size form ground truth data.
 25 |     Args:
 26 |         boxes: (tensor) point_form boxes
 27 |     Return:
 28 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 29 |     """
 30 |     return torch.cat((boxes[:, 2:] + boxes[:, :2])/2,  # cx, cy
 31 |                      boxes[:, 2:] - boxes[:, :2], 1)  # w, h
 32 | 
 33 | 
 34 | def intersect(box_a, box_b):
 35 |     """ We resize both tensors to [A,B,2] without new malloc:
 36 |     [A,2] -> [A,1,2] -> [A,B,2]
 37 |     [B,2] -> [1,B,2] -> [A,B,2]
 38 |     Then we compute the area of intersect between box_a and box_b.
 39 |     Args:
 40 |       box_a: (tensor) bounding boxes, Shape: [A,4].
 41 |       box_b: (tensor) bounding boxes, Shape: [B,4].
 42 |     Return:
 43 |       (tensor) intersection area, Shape: [A,B].
 44 |     """
 45 |     A = box_a.size(0)
 46 |     B = box_b.size(0)
 47 |     max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
 48 |                        box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
 49 |     min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
 50 |                        box_b[:, :2].unsqueeze(0).expand(A, B, 2))
 51 |     inter = torch.clamp((max_xy - min_xy), min=0)
 52 |     return inter[:, :, 0] * inter[:, :, 1]
 53 | 
 54 | 
 55 | def jaccard(box_a, box_b):
 56 |     """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
 57 |     is simply the intersection over union of two boxes.  Here we operate on
 58 |     ground truth boxes and default boxes.
 59 |     E.g.:
 60 |         A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
 61 |     Args:
 62 |         box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
 63 |         box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
 64 |     Return:
 65 |         jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
 66 |     """
 67 |     inter = intersect(box_a, box_b)
 68 |     area_a = ((box_a[:, 2]-box_a[:, 0]) *
 69 |               (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
 70 |     area_b = ((box_b[:, 2]-box_b[:, 0]) *
 71 |               (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
 72 |     union = area_a + area_b - inter
 73 |     return inter / union  # [A,B]
 74 | 
 75 | 
 76 | def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx):
 77 |     """Match each prior box with the ground truth box of the highest jaccard
 78 |     overlap, encode the bounding boxes, then return the matched indices
 79 |     corresponding to both confidence and location preds.
 80 |     Args:
 81 |         threshold: (float) The overlap threshold used when mathing boxes.
 82 |         truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors].
 83 |         priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
 84 |         variances: (tensor) Variances corresponding to each prior coord,
 85 |             Shape: [num_priors, 4].
 86 |         labels: (tensor) All the class labels for the image, Shape: [num_obj].
 87 |         loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
 88 |         conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
 89 |         idx: (int) current batch index
 90 |     Return:
 91 |         The matched indices corresponding to 1)location and 2)confidence preds.
 92 |     """
 93 |     # jaccard index
 94 |     overlaps = jaccard(
 95 |         truths,
 96 |         point_form(priors)
 97 |     )
 98 |     # (Bipartite Matching)
 99 |     # [1,num_objects] best prior for each ground truth
100 |     best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
101 |     # [1,num_priors] best ground truth for each prior
102 |     best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
103 |     best_truth_idx.squeeze_(0)
104 |     best_truth_overlap.squeeze_(0)
105 |     best_prior_idx.squeeze_(1)
106 |     best_prior_overlap.squeeze_(1)
107 |     best_truth_overlap.index_fill_(0, best_prior_idx, 2)  # ensure best prior
108 |     # TODO refactor: index  best_prior_idx with long tensor
109 |     # ensure every gt matches with its prior of max overlap
110 |     for j in range(best_prior_idx.size(0)):
111 |         best_truth_idx[best_prior_idx[j]] = j
112 |     matches = truths[best_truth_idx]          # Shape: [num_priors,4]
113 |     conf = labels[best_truth_idx] + 1         # Shape: [num_priors]
114 |     conf[best_truth_overlap < threshold] = 0  # label as background
115 |     loc = encode(matches, priors, variances)
116 |     loc_t[idx] = loc    # [num_priors,4] encoded offsets to learn
117 |     conf_t[idx] = conf  # [num_priors] top class label for each prior
118 | 
119 | 
120 | def encode(matched, priors, variances):
121 |     """Encode the variances from the priorbox layers into the ground truth boxes
122 |     we have matched (based on jaccard overlap) with the prior boxes.
123 |     Args:
124 |         matched: (tensor) Coords of ground truth for each prior in point-form
125 |             Shape: [num_priors, 4].
126 |         priors: (tensor) Prior boxes in center-offset form
127 |             Shape: [num_priors,4].
128 |         variances: (list[float]) Variances of priorboxes
129 |     Return:
130 |         encoded boxes (tensor), Shape: [num_priors, 4]
131 |     """
132 | 
133 |     # dist b/t match center and prior's center
134 |     g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
135 |     # encode variance
136 |     g_cxcy /= (variances[0] * priors[:, 2:])
137 |     # match wh / prior wh
138 |     g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
139 |     g_wh = torch.log(g_wh) / variances[1]
140 |     # return target for smooth_l1_loss
141 |     return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
142 | 
143 | 
144 | # Adapted from https://github.com/Hakuyume/chainer-ssd
145 | def decode(loc, priors, variances):
146 |     """Decode locations from predictions using priors to undo
147 |     the encoding we did for offset regression at train time.
148 |     Args:
149 |         loc (tensor): location predictions for loc layers,
150 |             Shape: [num_priors,4]
151 |         priors (tensor): Prior boxes in center-offset form.
152 |             Shape: [num_priors,4].
153 |         variances: (list[float]) Variances of priorboxes
154 |     Return:
155 |         decoded bounding box predictions
156 |     """
157 | 
158 |     boxes = torch.cat((
159 |         priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
160 |         priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
161 |     boxes[:, :2] -= boxes[:, 2:] / 2
162 |     boxes[:, 2:] += boxes[:, :2]
163 |     return boxes
164 | 
165 | 
166 | def log_sum_exp(x):
167 |     """Utility function for computing log_sum_exp while determining
168 |     This will be used to determine unaveraged confidence loss across
169 |     all examples in a batch.
170 |     Args:
171 |         x (Variable(tensor)): conf_preds from conf layers
172 |     """
173 |     x_max = x.data.max()
174 |     return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max
175 | 
176 | 
177 | # Original author: Francisco Massa:
178 | # https://github.com/fmassa/object-detection.torch
179 | # Ported to PyTorch by Max deGroot (02/01/2017)
180 | def nms(boxes, scores, overlap=0.5, top_k=200):
181 |     """Apply non-maximum suppression at test time to avoid detecting too many
182 |     overlapping bounding boxes for a given object.
183 |     Args:
184 |         boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
185 |         scores: (tensor) The class predscores for the img, Shape:[num_priors].
186 |         overlap: (float) The overlap thresh for suppressing unnecessary boxes.
187 |         top_k: (int) The Maximum number of box preds to consider.
188 |     Return:
189 |         The indices of the kept boxes with respect to num_priors.
190 |     """
191 | 
192 |     keep = scores.new(scores.size(0)).zero_().long()
193 |     if boxes.numel() == 0:
194 |         return keep
195 |     x1 = boxes[:, 0]
196 |     y1 = boxes[:, 1]
197 |     x2 = boxes[:, 2]
198 |     y2 = boxes[:, 3]
199 |     area = torch.mul(x2 - x1, y2 - y1)
200 |     v, idx = scores.sort(0)  # sort in ascending order
201 |     # I = I[v >= 0.01]
202 |     idx = idx[-top_k:]  # indices of the top-k largest vals
203 |     xx1 = boxes.new()
204 |     yy1 = boxes.new()
205 |     xx2 = boxes.new()
206 |     yy2 = boxes.new()
207 |     w = boxes.new()
208 |     h = boxes.new()
209 | 
210 |     # keep = torch.Tensor()
211 |     count = 0
212 |     while idx.numel() > 0:
213 |         i = idx[-1]  # index of current largest val
214 |         # keep.append(i)
215 |         keep[count] = i
216 |         count += 1
217 |         if idx.size(0) == 1:
218 |             break
219 |         idx = idx[:-1]  # remove kept element from view
220 |         # load bboxes of next highest vals
221 |         torch.index_select(x1, 0, idx, out=xx1)
222 |         torch.index_select(y1, 0, idx, out=yy1)
223 |         torch.index_select(x2, 0, idx, out=xx2)
224 |         torch.index_select(y2, 0, idx, out=yy2)
225 |         # store element-wise max with next highest score
226 |         xx1 = torch.clamp(xx1, min=x1[i])
227 |         yy1 = torch.clamp(yy1, min=y1[i])
228 |         xx2 = torch.clamp(xx2, max=x2[i])
229 |         yy2 = torch.clamp(yy2, max=y2[i])
230 |         w.resize_as_(xx2)
231 |         h.resize_as_(yy2)
232 |         w = xx2 - xx1
233 |         h = yy2 - yy1
234 |         # check sizes of xx1 and xx2.. after each iteration
235 |         w = torch.clamp(w, min=0.0)
236 |         h = torch.clamp(h, min=0.0)
237 |         inter = w*h
238 |         # IoU = i / (area(a) + area(b) - i)
239 |         rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
240 |         union = (rem_areas - inter) + area[i]
241 |         IoU = inter/union  # store result in iou
242 |         # keep only elements with an IoU <= overlap
243 |         idx = idx[IoU.le(overlap)]
244 |     return keep, count
245 | 


--------------------------------------------------------------------------------
/layers/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .prior_box import PriorBox
3 | 
4 | 
5 | __all__ = ['PriorBox']
6 | 


--------------------------------------------------------------------------------
/layers/functions/prior_box.py:
--------------------------------------------------------------------------------
 1 | """ Generates prior boxes for SSD netowrk
 2 | 
 3 | Original author: Ellis Brown, Max deGroot for VOC dataset
 4 | https://github.com/amdegroot/ssd.pytorch
 5 | 
 6 | """
 7 | 
 8 | import torch
 9 | from math import sqrt as sqrt
10 | from itertools import product as product
11 | 
12 | class PriorBox(object):
13 |     """Compute priorbox coordinates in center-offset form for each source
14 |     feature map.
15 |     Note:
16 |     This 'layer' has changed between versions of the original SSD
17 |     paper, so we include both versions, but note v2 is the most tested and most
18 |     recent version of the paper.
19 | 
20 |     """
21 |     def __init__(self, cfg):
22 |         super(PriorBox, self).__init__()
23 |         # self.type = cfg.name
24 |         self.image_size = cfg['min_dim']
25 |         # number of priors for feature map location (either 4 or 6)
26 |         self.num_priors = len(cfg['aspect_ratios'])
27 |         self.variance = cfg['variance'] or [0.1]
28 |         self.feature_maps = cfg['feature_maps']
29 |         self.min_sizes = cfg['min_sizes']
30 |         self.max_sizes = cfg['max_sizes']
31 |         self.steps = cfg['steps']
32 |         self.aspect_ratios = cfg['aspect_ratios']
33 |         self.clip = cfg['clip']
34 |         self.version = cfg['name']
35 |         for v in self.variance:
36 |             if v <= 0:
37 |                 raise ValueError('Variances must be greater than 0')
38 | 
39 |     def forward(self):
40 |         mean = []
41 |         # TODO merge these
42 |         if self.version == 'v2':
43 |             for k, f in enumerate(self.feature_maps):
44 |                 for i, j in product(range(f), repeat=2):
45 |                     f_k = self.image_size / self.steps[k]
46 |                     # unit center x,y
47 |                     cx = (j + 0.5) / f_k
48 |                     cy = (i + 0.5) / f_k
49 | 
50 |                     # aspect_ratio: 1
51 |                     # rel size: min_size
52 |                     s_k = self.min_sizes[k]/self.image_size
53 |                     mean += [cx, cy, s_k, s_k]
54 | 
55 |                     # aspect_ratio: 1
56 |                     # rel size: sqrt(s_k * s_(k+1))
57 |                     s_k_prime = sqrt(s_k * (self.max_sizes[k]/self.image_size))
58 |                     mean += [cx, cy, s_k_prime, s_k_prime]
59 | 
60 |                     # rest of aspect ratios
61 |                     for ar in self.aspect_ratios[k]:
62 |                         mean += [cx, cy, s_k*sqrt(ar), s_k/sqrt(ar)]
63 |                         mean += [cx, cy, s_k/sqrt(ar), s_k*sqrt(ar)]
64 | 
65 |         else:
66 |             # original version generation of prior (default) boxes
67 |             for i, k in enumerate(self.feature_maps):
68 |                 step_x = step_y = self.image_size/k
69 |                 for h, w in product(range(k), repeat=2):
70 |                     c_x = ((w+0.5) * step_x)
71 |                     c_y = ((h+0.5) * step_y)
72 |                     c_w = c_h = self.min_sizes[i] / 2
73 |                     s_k = self.image_size  # 300
74 |                     # aspect_ratio: 1,
75 |                     # size: min_size
76 |                     mean += [(c_x-c_w)/s_k, (c_y-c_h)/s_k,
77 |                              (c_x+c_w)/s_k, (c_y+c_h)/s_k]
78 |                     if self.max_sizes[i] > 0:
79 |                         # aspect_ratio: 1
80 |                         # size: sqrt(min_size * max_size)/2
81 |                         c_w = c_h = sqrt(self.min_sizes[i] *
82 |                                          self.max_sizes[i])/2
83 |                         mean += [(c_x-c_w)/s_k, (c_y-c_h)/s_k,
84 |                                  (c_x+c_w)/s_k, (c_y+c_h)/s_k]
85 |                     # rest of prior boxes
86 |                     for ar in self.aspect_ratios[i]:
87 |                         if not (abs(ar-1) < 1e-6):
88 |                             c_w = self.min_sizes[i] * sqrt(ar)/2
89 |                             c_h = self.min_sizes[i] / sqrt(ar)/2
90 |                             mean += [(c_x-c_w)/s_k, (c_y-c_h)/s_k,
91 |                                      (c_x+c_w)/s_k, (c_y+c_h)/s_k]
92 |         # back to torch land
93 |         output = torch.Tensor(mean).view(-1, 4)
94 |         if self.clip:
95 |             output.clamp_(max=1, min=0)
96 |         return output
97 | 


--------------------------------------------------------------------------------
/layers/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .l2norm import L2Norm
2 | from .multibox_loss import MultiBoxLoss
3 | 
4 | __all__ = ['L2Norm', 'MultiBoxLoss']
5 | 


--------------------------------------------------------------------------------
/layers/modules/l2norm.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch.nn as nn
 3 | import torch
 4 | import torch.nn.functional as F
 5 | 
 6 | # class L2Norm(nn.Module):
 7 | #     def __init__(self,n_channels, scale):
 8 | #         super(L2Norm,self).__init__()
 9 | #         self.n_channels = n_channels
10 | #         self.gamma = scale or None
11 | #         self.eps = 1e-10
12 | #         self.weight = nn.Parameter(torch.Tensor(self.n_channels))
13 | #         self.reset_parameters()
14 | 
15 | #     def reset_parameters(self):
16 | #         init.constant(self.weight,self.gamma)
17 | 
18 | #     def forward(self, x):
19 | #         norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps
20 | #         x /= norm
21 | #         out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
22 | #         return out
23 | 
24 | class L2Norm(nn.Module):
25 |     def __init__(self, in_channels, initial_scale):
26 |         super(L2Norm, self).__init__()
27 |         self.in_channels = in_channels
28 |         self.weight = nn.Parameter(torch.Tensor(in_channels))
29 |         self.initial_scale = initial_scale
30 |         self.reset_parameters()
31 | 
32 |     def forward(self, x):
33 |         return (F.normalize(x, p=2, dim=1)
34 |                 * self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3))
35 | 
36 |     def reset_parameters(self):
37 |         self.weight.data.fill_(self.initial_scale)


--------------------------------------------------------------------------------
/layers/modules/multibox_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from data import v2 as cfg
  5 | from ..box_utils import match, log_sum_exp
  6 | 
  7 | class MultiBoxLoss(nn.Module):
  8 |     """SSD Weighted Loss Function
  9 |     Compute Targets:
 10 |         1) Produce Confidence Target Indices by matching  ground truth boxes
 11 |            with (default) 'priorboxes' that have jaccard index > threshold parameter
 12 |            (default threshold: 0.5).
 13 |         2) Produce localization target by 'encoding' variance into offsets of ground
 14 |            truth boxes and their matched  'priorboxes'.
 15 |         3) Hard negative mining to filter the excessive number of negative examples
 16 |            that comes with using a large number of default bounding boxes.
 17 |            (default negative:positive ratio 3:1)
 18 |     Objective Loss:
 19 |         L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
 20 |         Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
 21 |         weighted by α which is set to 1 by cross val.
 22 |         Args:
 23 |             c: class confidences,
 24 |             l: predicted boxes,
 25 |             g: ground truth boxes
 26 |             N: number of matched default boxes
 27 |         See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 28 |     """
 29 | 
 30 |     def __init__(self, num_classes, overlap_thresh, prior_for_matching,
 31 |                  bkg_label, neg_mining, neg_pos, neg_overlap, encode_target,
 32 |                  use_gpu=True):
 33 |         super(MultiBoxLoss, self).__init__()
 34 |         self.use_gpu = use_gpu
 35 |         self.num_classes = num_classes
 36 |         self.threshold = overlap_thresh
 37 |         self.background_label = bkg_label
 38 |         self.encode_target = encode_target
 39 |         self.use_prior_for_matching = prior_for_matching
 40 |         self.do_neg_mining = neg_mining
 41 |         self.negpos_ratio = neg_pos
 42 |         self.neg_overlap = neg_overlap
 43 |         self.variance = cfg['variance']
 44 | 
 45 |     def forward(self, predictions, targets):
 46 |         """Multibox Loss
 47 |         Args:
 48 |             predictions (tuple): A tuple containing loc preds, conf preds,
 49 |             and prior boxes from SSD net.
 50 |                 conf shape: torch.size(batch_size,num_priors,num_classes)
 51 |                 loc shape: torch.size(batch_size,num_priors,4)
 52 |                 priors shape: torch.size(num_priors,4)
 53 | 
 54 |             ground_truth (tensor): Ground truth boxes and labels for a batch,
 55 |                 shape: [batch_size,num_objs,5] (last idx is the label).
 56 |         """
 57 |         loc_data, conf_data, priors = predictions
 58 |         num = loc_data.size(0)
 59 |         priors = priors[:loc_data.size(1), :]
 60 |         num_priors = (priors.size(0))
 61 |         num_classes = self.num_classes
 62 | 
 63 |         # match priors (default boxes) and ground truth boxes
 64 |         with torch.no_grad():
 65 |             if self.use_gpu:
 66 |                 loc_t = torch.cuda.FloatTensor(num, num_priors, 4)
 67 |                 conf_t = torch.cuda.LongTensor(num, num_priors)
 68 |             else:
 69 |                 loc_t = torch.Tensor(num, num_priors, 4)
 70 |                 conf_t = torch.LongTensor(num, num_priors)
 71 |             for idx in range(num):
 72 |                 truths = targets[idx][:, :-1].data
 73 |                 labels = targets[idx][:, -1].data
 74 |                 defaults = priors.data
 75 |                 match(self.threshold, truths, defaults, self.variance, labels,
 76 |                     loc_t, conf_t, idx)
 77 |             if self.use_gpu:
 78 |                 loc_t = loc_t.cuda()
 79 |                 conf_t = conf_t.cuda()
 80 |             # wrap targets
 81 |             # loc_t = Variable(loc_t, requires_grad=False)
 82 |             # conf_t = Variable(conf_t, requires_grad=False)
 83 | 
 84 |             pos = conf_t > 0
 85 |         #num_pos = pos.sum(keepdim=True)
 86 | 
 87 |         # Localization Loss (Smooth L1)
 88 |         # Shape: [batch,num_priors,4]
 89 |         pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
 90 |         loc_p = loc_data[pos_idx].view(-1, 4)
 91 |         loc_t = loc_t[pos_idx].view(-1, 4)
 92 |         loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
 93 |         with torch.no_grad():
 94 |             # Compute max conf across batch for hard negative mining
 95 |             batch_conf = conf_data.view(-1, self.num_classes)
 96 | 
 97 |             loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
 98 | 
 99 |             # Hard Negative Mining
100 |             loss_c[pos.view(-1,1)] = 0  # filter out pos boxes for now
101 |             loss_c = loss_c.view(num, -1)
102 |             _, loss_idx = loss_c.sort(1, descending=True)
103 |             _, idx_rank = loss_idx.sort(1)
104 |             num_pos = pos.long().sum(1, keepdim=True)
105 |             num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
106 |             neg = idx_rank < num_neg.expand_as(idx_rank)
107 | 
108 |             # Confidence Loss Including Positive and Negative Examples
109 |             pos_idx = pos.unsqueeze(2).expand_as(conf_data)
110 |             neg_idx = neg.unsqueeze(2).expand_as(conf_data)
111 | 
112 |         conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes)
113 |         targets_weighted = conf_t[(pos+neg).gt(0)]
114 |         loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
115 | 
116 |         # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
117 | 
118 |         N = float(num_pos.data.sum())
119 |         loss_l /= N
120 |         loss_c /= N
121 |         return loss_l, loss_c
122 | 


--------------------------------------------------------------------------------
/matlab-online-display/.gitignore:
--------------------------------------------------------------------------------
1 | *.ods#
2 | *.m~
3 | *.prototxt~
4 | *.xml~
5 | *.log
6 | *~
7 | /results
8 | 


--------------------------------------------------------------------------------
/matlab-online-display/actionpath/actionPaths.m:
--------------------------------------------------------------------------------
  1 | % ---------------------------------------------------------
  2 | function actionPaths(dopts)
  3 | % ---------------------------------------------------------
  4 | % Copyright (c) 2017, Gurkirt Singh
  5 | % This code and is available
  6 | % under the terms of MID License provided in LICENSE.
  7 | % Please retain this notice and LICENSE if you use
  8 | % this file (or any portion of it) in your project.
  9 | % ---------------------------------------------------------
 10 | 
 11 | detresultpath = dopts.detDir;
 12 | costtype = dopts.costtype;
 13 | gap = dopts.gap;
 14 | videolist = dopts.vidList;
 15 | actions = dopts.actions;
 16 | saveName = dopts.actPathDir;
 17 | iouth = dopts.iouThresh;
 18 | numActions = length(actions);
 19 | nms_thresh = 0.45;
 20 | videos = getVideoNames(videolist);
 21 | NumVideos = length(videos);
 22 | 
 23 | for vid=1:2%NumVideos
 24 |     tic;
 25 |     videoID  = videos{vid};
 26 |     videoDetDir = [detresultpath,videoID,'/'];    
 27 |     fprintf('computing tubes for vide [%d out of %d] video ID = %s\n',vid,NumVideos, videoID);
 28 |     %% loop over all the frames of the video
 29 |     fprintf('Reading detections ');        
 30 |     frames = readDetections(videoDetDir);        
 31 |     fprintf('\nDone reading detections\n');        
 32 |     fprintf('Gernrating action paths ...........\n');
 33 |     %% parllel loop over all action class and genrate paths for each class
 34 |     threshold = [0.01,0.5,0.9];
 35 |     for iiii = 2:2
 36 |         dis_thres = threshold(iiii);
 37 |         
 38 |     my_live_paths = cell(1); %% Stores live paths
 39 |     my_dead_paths = cell(1); %% Store the paths that has been terminated
 40 |     
 41 |     for i = 1:24
 42 |         my_live_paths{i} = struct();
 43 |         my_dead_paths{i} = struct();
 44 |         my_dead_paths{i}.dp_count = 0;
 45 |     end
 46 |     
 47 |     action_frames = struct();
 48 |     for f=1:length(frames)
 49 |         for a=1:numActions
 50 |             %allpaths{a} = genActionPaths(frames, a, nms_thresh, iouth, costtype,gap,videoID, final_tubes);
 51 |             [boxes,scores,allscores] = dofilter(frames, a, f, nms_thresh);
 52 |             action_frames(f).boxes = boxes;
 53 |             action_frames(f).scores = scores;
 54 |             action_frames(f).allScores = allscores;            
 55 |             [my_live_paths{a}, my_dead_paths{a}] = incremental_linking(f, action_frames, iouth, costtype, gap,...
 56 |                 my_live_paths{a}, my_dead_paths{a},a);
 57 |         end
 58 |         
 59 |         strr =  strcat('/home/zhujiagang/realtime-action-detection/ucf24/rgb-images/', videoID, '/', num2str(f, '%05d'), '.jpg');
 60 |         img = imread(strr);
 61 |         
 62 |         dis_boxes = [];
 63 |         for a=1:numActions
 64 |             %size(my_live_paths{a}, 2)
 65 |             if size(my_live_paths{a}, 2) > 0
 66 |                 for ii = 1:size(my_live_paths{a}, 2)
 67 |                     if isfield(my_live_paths{a}(ii),'scores')
 68 |                         if my_live_paths{a}(ii).foundAT(end) == f
 69 |                             if my_live_paths{a}(ii).scores(end) > dis_thres
 70 |                                 count = my_live_paths{a}(ii).count;
 71 |                                 dis_boxes = [dis_boxes;my_live_paths{a}(ii).boxes(count,:), my_live_paths{a}(ii).scores(end), a];
 72 |                                 pt = round(my_live_paths{a}(ii).boxes(count,1:2));
 73 |                                 wSize = round(my_live_paths{a}(ii).boxes(count,3:4) - my_live_paths{a}(ii).boxes(count,1:2));
 74 |                                 
 75 |                                 %% adding boxes to images
 76 |                                 img = drawRect(img, pt, wSize);                                
 77 |                             end
 78 |                         end
 79 |                     end
 80 |                 end
 81 |             end
 82 |         end
 83 |         %% display images, scores and boxes    
 84 |         if size(dis_boxes,1)>0
 85 |             strcell=cell(size(dis_boxes,1),1);        
 86 |             for iii=1:size(dis_boxes,1)
 87 |                 strcell(iii) = {strcat(actions{dis_boxes(iii,6)}, ': ', num2str(dis_boxes(iii,5),3))};
 88 |             end
 89 |             RGB = insertText(img, double(dis_boxes(:,1:2)), strcell);
 90 |             imshow(RGB)
 91 |             str_save_dir =  strcat('/home/zhujiagang/realtime-action-detection/online_save/',videoID,'_', num2str(dis_thres));
 92 |             if ~exist(str_save_dir) 
 93 |                 mkdir(str_save_dir)
 94 |             end
 95 |             str_save =  strcat(str_save_dir, '/', num2str(f, '%05d'), '.jpg');
 96 |             imwrite(RGB, str_save);
 97 |         end
 98 |     end
 99 | 
100 |         fprintf('All Done in %03d Seconds\n',round(toc));
101 |     end
102 | 
103 |     disp('done computing action paths');
104 | 
105 | end
106 | end
107 | 
108 | function paths = genActionPaths(frames,a,nms_thresh,iouth,costtype,gap, video_id, final_tubes)
109 | action_frames = struct();
110 | 
111 | for f=1:length(frames)
112 |     [boxes,scores,allscores] = dofilter(frames,a,f,nms_thresh);
113 |     action_frames(f).boxes = boxes;
114 |     action_frames(f).scores = scores;
115 |     action_frames(f).allScores = allscores;
116 | end
117 | 
118 | paths = incremental_linking(action_frames,iouth,costtype, gap, gap, a, video_id, final_tubes);
119 | 
120 | end
121 | 
122 | %-- filter out least likkey detections for actions ---
123 | function [boxes,scores,allscores] = dofilter(frames, a, f, nms_thresh)
124 |     scores = frames(f).scores(:,a);
125 |     pick = scores>0.001;
126 |     scores = scores(pick);
127 |     boxes = frames(f).boxes(pick,:);
128 |     allscores = frames(f).scores(pick,:);
129 |     [~,pick] = sort(scores,'descend');
130 |     to_pick = min(50,size(pick,1));
131 |     pick = pick(1:to_pick);
132 |     scores = scores(pick);
133 |     boxes = boxes(pick,:);
134 |     allscores = allscores(pick,:);
135 |     pick = nms([boxes scores], nms_thresh);
136 |     pick = pick(1:min(10,length(pick)));
137 |     boxes = boxes(pick,:);
138 |     scores = scores(pick);
139 |     allscores = allscores(pick,:);
140 | end
141 | 
142 | %-- list the files in directory and sort them ----------
143 | function list = sortdirlist(dirname)
144 | list = dir(dirname);
145 | list = sort({list.name});
146 | end
147 | 
148 | % -------------------------------------------------------------------------
149 | function [videos] = getVideoNames(split_file)
150 | % -------------------------------------------------------------------------
151 | fprintf('Get both lis is %s\n',split_file);
152 | fid = fopen(split_file,'r');
153 | data = textscan(fid, '%s');
154 | videos  = cell(1);
155 | count = 0;
156 | 
157 | for i=1:length(data{1})
158 |     filename = cell2mat(data{1}(i,1));
159 |     count = count +1;
160 |     videos{count} = filename;
161 |     %     videos(i).vid = str2num(cell2mat(data{1}(i,1)));
162 | end
163 | end
164 | 
165 | 
166 | function frames = readDetections(detectionDir)
167 | 
168 | detectionList = sortdirlist([detectionDir,'*.mat']);
169 | frames = struct([]);
170 | numframes = length(detectionList);
171 | scores = 0;
172 | loc = 0;
173 | for f = 1 : numframes
174 |   filename = [detectionDir,detectionList{f}];
175 |   load(filename); % loads loc and scores variable
176 |   loc = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240];
177 |   loc(loc(:,1)<0,1) = 0;
178 |   loc(loc(:,2)<0,2) = 0;
179 |   loc(loc(:,3)>319,3) = 319;
180 |   loc(loc(:,4)>239,4) = 239;
181 |   loc = loc + 1;
182 |   frames(f).boxes = loc;
183 |   frames(f).scores = [scores(:,2:end),scores(:,1)];
184 | end
185 | 
186 | end
187 | 
188 | function [ dest ] = drawRect( src, pt, wSize,  lineSize, color )
189 | flag = 2;
190 | 
191 | if nargin < 5
192 |     color = [255 255 0];
193 | end
194 | 
195 | if nargin < 4
196 |     lineSize = 1;
197 | end
198 | 
199 | if nargin < 3
200 |     disp('inenough parameters')
201 |     return;
202 | end
203 | 
204 | [yA, xA, z] = size(src);
205 | x1 = pt(1);
206 | y1 = pt(2);
207 | 
208 | wx = wSize(1);
209 | wy = wSize(2);
210 | 
211 | if x1>xA 
212 |    x1 = xA;
213 | end
214 | if x1<1 
215 |    x1 = 1;
216 | end
217 | 
218 | if y1>yA
219 |    y1 = yA;
220 | end
221 | if y1<1
222 |    y1 = 1;
223 | end
224 | 
225 | if (x1+wx)>xA
226 |     wx = xA - x1;
227 | end
228 | if (y1+wy)>yA
229 |     wy = yA - y1;
230 | end
231 | 
232 | if (x1+wx)<1
233 |     wx = 1;
234 | end
235 | if (y1+wy)<1
236 |     wy = 1;
237 | end
238 | 
239 | if 1==z
240 |     dest(:, : ,1) = src;
241 |     dest(:, : ,2) = src;
242 |     dest(:, : ,3) = src;
243 | else
244 |     dest = src;
245 | end
246 | 
247 | 
248 | for c = 1 : 3                
249 |     for dl = 1 : lineSize    
250 |         d = dl - 1;
251 |         if  1==flag  
252 |             dest(  y1-d ,            x1:(x1+wx) ,  c  ) =  color(c); 
253 |             dest(  y1+wy+d ,     x1:(x1+wx) , c  ) =  color(c); 
254 |             dest(  y1:(y1+wy) ,   x1-d ,           c  ) =  color(c); 
255 |             dest(  y1:(y1+wy) ,   x1+wx+d ,    c  ) =  color(c); 
256 |         elseif 2==flag 
257 |             dest(  y1-d ,            (x1-d):(x1+wx+d) ,  c  ) =  color(c); 
258 |             dest(  y1+wy+d ,    (x1-d):(x1+wx+d) ,  c  ) =  color(c); 
259 |             dest(  (y1-d):(y1+wy+d) ,   x1-d ,           c  ) =  color(c); 
260 |             dest(  (y1-d):(y1+wy+d) ,   x1+wx+d ,    c  ) =  color(c); 
261 |         end
262 |     end
263 | end 
264 | 
265 | end 


--------------------------------------------------------------------------------
/matlab-online-display/actionpath/fusedActionPaths.m:
--------------------------------------------------------------------------------
  1 | function fusedActionPaths(dopts)
  2 | % AUTORIGHTS
  3 | % ---------------------------------------------------------
  4 | % Copyright (c) 2016, Gurkirt Singh
  5 | %
  6 | % This code and is available
  7 | % under the terms of the Simplified BSD License provided in
  8 | % LICENSE. Please retain this notice and LICENSE if you use
  9 | % this file (or any portion of it) in your project.
 10 | % ---------------------------------------------------------
 11 | 
 12 | detresultpathBase = dopts.basedetDir;
 13 | detresultpathTop = dopts.topdetDir;
 14 | videolist = dopts.vidList;
 15 | actions = dopts.actions;
 16 | saveName = dopts.actPathDir;
 17 | iouth = dopts.iouThresh;
 18 | numActions = length(actions);
 19 | costtype = dopts.costtype;
 20 | gap = dopts.gap;
 21 | fuseiouth = dopts.fuseiouth;
 22 | fusiontype = dopts.fusiontype;
 23 | nms_thresh = 0.45;
 24 | videos = getVideoNames(videolist);
 25 | 
 26 | NumVideos = length(videos);
 27 | timimngs = zeros(NumVideos,1);
 28 | 
 29 | for vid=1:NumVideos
 30 |     tt = tic;
 31 |     videoID  = videos{vid};
 32 |     pathsSaveName = [saveName,videoID,'-actionpaths.mat'];   
 33 |     videoDetDirBase = [detresultpathBase,videoID,'/'];
 34 |     videoTopDirBase = [detresultpathTop,videoID,'/'];
 35 |     frames = readDetections(videoDetDirBase,videoTopDirBase);
 36 |     fprintf('\nDone reading detection files \n');
 37 |     fprintf('Gernrating action paths ...........\n');
 38 |     %% parllel loop over all action class and genrate paths for each class
 39 |     threshold = [0.01,0.5,0.9];
 40 |     for iiii = 2:2
 41 |         dis_thres = threshold(iiii);
 42 |         
 43 |     my_live_paths = cell(1); %% Stores live paths
 44 |     my_dead_paths = cell(1); %% Store the paths that has been terminated
 45 |     
 46 |     for i = 1:24
 47 |         my_live_paths{i} = struct();
 48 |         my_dead_paths{i} = struct();
 49 |         my_dead_paths{i}.dp_count = 0;
 50 |     end
 51 |     
 52 |     action_frames = struct();
 53 |     for f=1:length(frames)
 54 |         for a=1:numActions
 55 |             baseBoxes = frames(f).baseBoxes;
 56 |             baseAllScores = frames(f).baseScores;
 57 |             topBoxes = frames(f).topBoxes;
 58 |             topAllScores = frames(f).topScores;
 59 |             meanScores = frames(f).meanScores;
 60 |             [boxes, allscores] = fuseboxes(baseBoxes,topBoxes,baseAllScores,topAllScores,meanScores,fuseiouth,fusiontype,a,nms_thresh);
 61 | 
 62 |             action_frames(f).allScores = allscores;
 63 |             action_frames(f).boxes = boxes(:,1:4);
 64 |             action_frames(f).scores = boxes(:,5);
 65 |     
 66 |             [my_live_paths{a}, my_dead_paths{a}] = incremental_linking(f, action_frames, iouth, costtype, gap,...
 67 |                 my_live_paths{a}, my_dead_paths{a},a);
 68 |         end
 69 |         
 70 |         strr =  strcat('/home/zhujiagang/realtime-action-detection/ucf24/rgb-images/', videoID, '/', num2str(f, '%05d'), '.jpg');
 71 |         img = imread(strr);
 72 |         
 73 |         dis_boxes = [];
 74 |         for a=1:numActions
 75 |             %size(my_live_paths{a}, 2)
 76 |             if size(my_live_paths{a}, 2) > 0
 77 |                 for ii = 1:size(my_live_paths{a}, 2)
 78 |                     if isfield(my_live_paths{a}(ii),'scores')
 79 |                         if my_live_paths{a}(ii).foundAT(end) == f
 80 |                             if my_live_paths{a}(ii).scores(end) > dis_thres
 81 |                                 count = my_live_paths{a}(ii).count;
 82 |                                 dis_boxes = [dis_boxes;my_live_paths{a}(ii).boxes(count,:), my_live_paths{a}(ii).scores(end), a];
 83 |                                 pt = round(my_live_paths{a}(ii).boxes(count,1:2));
 84 |                                 wSize = round(my_live_paths{a}(ii).boxes(count,3:4) - my_live_paths{a}(ii).boxes(count,1:2));
 85 |                                 
 86 |                                 %% adding boxes to images
 87 |                                 img = drawRect(img, pt, wSize);                                
 88 |                             end
 89 |                         end
 90 |                     end
 91 |                 end
 92 |             end
 93 |         end
 94 |         %% display images, scores and boxes    
 95 |         if size(dis_boxes,1)>0
 96 |             strcell=cell(size(dis_boxes,1),1);        
 97 |             for iii=1:size(dis_boxes,1)
 98 |                 strcell(iii) = {strcat(actions{dis_boxes(iii,6)}, ': ', num2str(dis_boxes(iii,5),3))};
 99 |             end
100 |             RGB = insertText(img, double(dis_boxes(:,1:2)), strcell);
101 |             imshow(RGB)
102 |             str_save_dir =  strcat('/home/zhujiagang/realtime-action-detection/online_save/',videoID,'_', num2str(dis_thres));
103 |             if ~exist(str_save_dir) 
104 |                 mkdir(str_save_dir)
105 |             end
106 |             str_save =  strcat(str_save_dir, '/', num2str(f, '%05d'), '.jpg');
107 |             imwrite(RGB, str_save);
108 |         end
109 |     end
110 | 
111 |         fprintf('All Done in %03d Seconds\n',round(toc));
112 |     end
113 |     
114 |     disp('done computing action paths');
115 |     %%%%%%%%%%%%%%
116 | end
117 | 
118 | % save('ucf101timing.mat','numfs','timimngs')
119 | disp('done computing action paths');
120 | end
121 | 
122 | % ---------------------------------------------------------
123 | % function to gather the detection box and nms them and pass it to linking script
124 | function paths = genActionPaths(frames,a,nms_thresh,fuseiouth,fusiontype,iouth,costtype,gap)
125 | % ---------------------------------------------------------
126 | action_frames = struct();
127 | for f=1:length(frames)
128 | 
129 |     baseBoxes = frames(f).baseBoxes;
130 |     baseAllScores = frames(f).baseScores;
131 |     topBoxes = frames(f).topBoxes;
132 |     topAllScores = frames(f).topScores;
133 |     meanScores = frames(f).meanScores;
134 |     [boxes, allscores] = fuseboxes(baseBoxes,topBoxes,baseAllScores,topAllScores,meanScores,fuseiouth,fusiontype,a,nms_thresh);
135 |     
136 |     action_frames(f).allScores = allscores;
137 |     action_frames(f).boxes = boxes(:,1:4);
138 |     action_frames(f).scores = boxes(:,5);
139 | end
140 | 
141 | paths = incremental_linking(action_frames,iouth,costtype,gap, gap);
142 | end
143 | 
144 | % ---------------------------------------------------------
145 | function [boxes,allscores] = fuseboxes(baseBoxes,topBoxes,baseAllScores,topAllScores,meanScores,fuseiouth,fusiontype,a,nms_thresh)
146 | % ---------------------------------------------------------
147 | 
148 | if strcmp(fusiontype,'mean')
149 |     [boxes,allscores] = dofilter(baseBoxes,meanScores,a,nms_thresh);
150 | elseif strcmp(fusiontype,'nwsum-plus')
151 |     [baseBoxes,baseAllScores] = dofilter(baseBoxes,baseAllScores,a,nms_thresh);
152 |     [topBoxes,topAllScores] = dofilter(topBoxes,topAllScores,a,nms_thresh);
153 |     [boxes,allscores] = boost_fusion(baseBoxes,topBoxes,baseAllScores,topAllScores,fuseiouth,a);
154 |     pick = nms(boxes,nms_thresh);
155 |     boxes = boxes(pick(1:min(10,length(pick))),:);
156 |     allscores = allscores(pick(1:min(10,length(pick))),:);
157 | 
158 | else %% fusion type is cat // union-set fusion
159 |     [baseBoxes,baseAllScores] = dofilter(baseBoxes,baseAllScores,a,nms_thresh);
160 |     [topBoxes,topAllScores] = dofilter(topBoxes,topAllScores,a,nms_thresh);
161 |     boxes = [baseBoxes;topBoxes];
162 |     allscores = [baseAllScores;topAllScores];
163 |     pick = nms(boxes,nms_thresh);
164 |     boxes = boxes(pick(1:min(10,length(pick))),:);
165 |     allscores = allscores(pick(1:min(10,length(pick))),:);
166 | end
167 | 
168 | end
169 | 
170 | 
171 | function [boxes,allscores] = dofilter(boxes, allscores,a,nms_thresh)
172 |  scores = allscores(:,a);
173 |  pick = scores>0.001;
174 |  scores = scores(pick);
175 |  boxes = boxes(pick,:);
176 |  allscores = allscores(pick,:);
177 |  [~,pick] = sort(scores,'descend');
178 |  to_pick = min(50,size(pick,1));
179 |  pick = pick(1:to_pick);
180 |  scores = scores(pick);
181 |  boxes = boxes(pick,:);
182 |  allscores = allscores(pick,:);
183 |  pick = nms([boxes scores], nms_thresh);
184 |  pick = pick(1:min(10,length(pick)));
185 |  boxes = [boxes(pick,:),scores(pick,:)];
186 |  allscores = allscores(pick,:);
187 | end
188 | 
189 | % ---------------------------------------------------------
190 | function [sb,ss] = boost_fusion(sb, fb,ss,fs,fuseiouth,a) % bs - boxes_spatial bf-boxes_flow
191 | % ---------------------------------------------------------
192 | 
193 | nb = size(sb,1); % num boxes
194 | box_spatial = [sb(:,1:2) sb(:,3:4)-sb(:,1:2)+1];
195 | box_flow =    [fb(:,1:2) fb(:,3:4)-fb(:,1:2)+1];
196 | coveredboxes = [];
197 | 
198 | for i=1:nb
199 |     ovlp = inters_union(box_spatial(i,:), box_flow); % ovlp has 1x5 or 5x1 dim
200 |     if ~isempty(ovlp)
201 |     [movlp, maxind] = max(ovlp);
202 | 
203 |     if movlp>=fuseiouth && isempty(ismember(coveredboxes,maxind))
204 |         ms = ss(i,:) + fs(maxind,:)*movlp;
205 |         ms = ms/sum(ms);
206 |         sb(i,5) = ms(a);
207 |         ss(i,:) = ms;
208 |         coveredboxes = [coveredboxes;maxind];
209 |     end
210 |     end
211 | end
212 | 
213 | nb = size(fb,1);
214 | 
215 | for i=1:nb
216 |     if ~ismember(coveredboxes,i)
217 |         sb = [sb;fb(i,:)];
218 |         ss = [ss;fs(i,:)];
219 |     end
220 | end
221 | end
222 | 
223 | 
224 | function iou = inters_union(bounds1,bounds2)
225 | % ------------------------------------------------------------------------
226 | inters = rectint(bounds1,bounds2);
227 | ar1 = bounds1(:,3).*bounds1(:,4);
228 | ar2 = bounds2(:,3).*bounds2(:,4);
229 | union = bsxfun(@plus,ar1,ar2')-inters;
230 | iou = inters./(union+0.001);
231 | end
232 | 
233 | % -------------------------------------------------------------------------
234 | function list = sortdirlist(dirname)
235 | list = dir(dirname);
236 | list = sort({list.name});
237 | end
238 | 
239 | % -------------------------------------------------------------------------
240 | function [videos] = getVideoNames(split_file)
241 | % -------------------------------------------------------------------------
242 | fprintf('Get both lis  %s\n',split_file);
243 | fid = fopen(split_file,'r');
244 | data = textscan(fid, '%s');
245 | videos  = cell(1);
246 | count = 0;
247 | 
248 | for i=1:length(data{1})
249 |     filename = cell2mat(data{1}(i,1));
250 |     count = count +1;
251 |     videos{count} = filename;
252 |     %     videos(i).vid = str2num(cell2mat(data{1}(i,1)));
253 | end
254 | 
255 | end
256 | 
257 | function frames = readDetections(detectionDir,top_detectionDir )
258 | 
259 | detectionList = sortdirlist([detectionDir,'*.mat']);
260 | frames = struct([]);
261 | numframes = length(detectionList);
262 | scores = 0;
263 | loc = 0;
264 | for f = 1 : numframes
265 |     filename = [detectionDir,detectionList{f}];
266 |     load(filename); % load loc and scores variable
267 |     loc = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240];
268 |     loc(loc(:,1)<0,1) = 0;
269 |     loc(loc(:,2)<0,2) = 0;
270 |     loc(loc(:,3)>319,3) = 319;
271 |     loc(loc(:,4)>239,4) = 239;
272 |     loc = loc + 1;
273 |     frames(f).baseBoxes = loc;
274 |     frames(f).baseScores = [scores(:,2:end),scores(:,1)];
275 |     
276 |     filename = [top_detectionDir,detectionList{f}];
277 |     load(filename); % load loc and scores variable
278 |     loc = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240];
279 |     loc(loc(:,1)<0,1) = 0;
280 |     loc(loc(:,2)<0,2) = 0;
281 |     loc(loc(:,3)>319,3) = 319;
282 |     loc(loc(:,4)>239,4) = 239;
283 |     loc = loc + 1;
284 |     frames(f).topBoxes = loc;
285 |     frames(f).topScores = [scores(:,2:end),scores(:,1)];
286 |     frames(f).meanScores = (frames(f).topScores + frames(f).baseScores)/2.0;
287 | end
288 | 
289 | end
290 | 
291 | 
292 | function [ dest ] = drawRect( src, pt, wSize,  lineSize, color )
293 | flag = 2;
294 | 
295 | if nargin < 5
296 |     color = [255 255 0];
297 | end
298 | 
299 | if nargin < 4
300 |     lineSize = 1;
301 | end
302 | 
303 | if nargin < 3
304 |     disp('inenough parameters')
305 |     return;
306 | end
307 | 
308 | [yA, xA, z] = size(src);
309 | x1 = pt(1);
310 | y1 = pt(2);
311 | 
312 | wx = wSize(1);
313 | wy = wSize(2);
314 | 
315 | if x1>xA 
316 |    x1 = xA;
317 | end
318 | if x1<1 
319 |    x1 = 1;
320 | end
321 | 
322 | if y1>yA
323 |    y1 = yA;
324 | end
325 | if y1<1
326 |    y1 = 1;
327 | end
328 | 
329 | if (x1+wx)>xA
330 |     wx = xA - x1;
331 | end
332 | if (y1+wy)>yA
333 |     wy = yA - y1;
334 | end
335 | 
336 | if (x1+wx)<1
337 |     wx = 1;
338 | end
339 | if (y1+wy)<1
340 |     wy = 1;
341 | end
342 | 
343 | if 1==z
344 |     dest(:, : ,1) = src;
345 |     dest(:, : ,2) = src;
346 |     dest(:, : ,3) = src;
347 | else
348 |     dest = src;
349 | end
350 | 
351 | 
352 | for c = 1 : 3                
353 |     for dl = 1 : lineSize    
354 |         d = dl - 1;
355 |         if  1==flag  
356 |             dest(  y1-d ,            x1:(x1+wx) ,  c  ) =  color(c); 
357 |             dest(  y1+wy+d ,     x1:(x1+wx) , c  ) =  color(c); 
358 |             dest(  y1:(y1+wy) ,   x1-d ,           c  ) =  color(c); 
359 |             dest(  y1:(y1+wy) ,   x1+wx+d ,    c  ) =  color(c); 
360 |         elseif 2==flag 
361 |             dest(  y1-d ,            (x1-d):(x1+wx+d) ,  c  ) =  color(c); 
362 |             dest(  y1+wy+d ,    (x1-d):(x1+wx+d) ,  c  ) =  color(c); 
363 |             dest(  (y1-d):(y1+wy+d) ,   x1-d ,           c  ) =  color(c); 
364 |             dest(  (y1-d):(y1+wy+d) ,   x1+wx+d ,    c  ) =  color(c); 
365 |         end
366 |     end
367 | end 
368 | 
369 | end 
370 | 


--------------------------------------------------------------------------------
/matlab-online-display/actionpath/nms.m:
--------------------------------------------------------------------------------
 1 | function pick = nms(boxes, overlap)
 2 | % Non-maximum suppression.
 3 | %   pick = nms(boxes, overlap) 
 4 | % 
 5 | %   Greedily select high-scoring detections and skip detections that are 
 6 | %   significantly covered by a previously selected detection.
 7 | %
 8 | % Return value
 9 | %   pick      Indices of locally maximal detections
10 | %
11 | % Arguments
12 | %   boxes     Detection bounding boxes (see pascal_test.m)
13 | %   overlap   Overlap threshold for suppression
14 | %             For a selected box Bi, all boxes Bj that are covered by 
15 | %             more than overlap are suppressed. Note that 'covered' is
16 | %             is |Bi \cap Bj| / |Bj|, not the PASCAL intersection over 
17 | %             union measure.
18 | 
19 | % AUTORIGHTS
20 | % -------------------------------------------------------
21 | % Copyright (C) 2011-2012 Ross Girshick
22 | % Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick
23 | % Copyright (C) 2007 Pedro Felzenszwalb, Deva Ramanan
24 | % 
25 | % This file is part of the voc-releaseX code
26 | % (http://people.cs.uchicago.edu/~rbg/latent/)
27 | % and is available under the terms of an MIT-like license
28 | % provided in COPYING. Please retain this notice and
29 | % COPYING if you use this file (or a portion of it) in
30 | % your project.
31 | % -------------------------------------------------------
32 | 
33 | if isempty(boxes)
34 |   pick = [];
35 | else
36 |   x1 = boxes(:,1);
37 |   y1 = boxes(:,2);
38 |   x2 = boxes(:,3);
39 |   y2 = boxes(:,4);
40 |   s = boxes(:,end);
41 |   area = (x2-x1) .* (y2-y1);
42 |   %area = (x2-x1+1) .* (y2-y1+1);
43 | 
44 |   [vals, I] = sort(s);
45 |   pick = [];
46 |   while ~isempty(I)
47 |     last = length(I);
48 |     i = I(last);
49 |     pick = [pick; i];
50 |     suppress = [last];
51 |     for pos = 1:last-1
52 |       j = I(pos);
53 |       xx1 = max(x1(i), x1(j));
54 |       yy1 = max(y1(i), y1(j));
55 |       xx2 = min(x2(i), x2(j));
56 |       yy2 = min(y2(i), y2(j));
57 |       w = xx2-xx1;
58 |       h = yy2-yy1;
59 |       
60 | %       w = xx2-xx1+1;
61 | %       h = yy2-yy1+1;
62 |       
63 |       if w > 0 && h > 0
64 |         % compute overlap
65 |         inter = w*h;
66 |         o = inter / (area(j) + area(i) - inter);
67 |         if o > overlap
68 |           suppress = [suppress; pos];
69 |         end
70 |       end
71 |     end
72 |     I(suppress) = [];
73 |   end  
74 | end
75 | 


--------------------------------------------------------------------------------
/matlab-online-display/display01.txt:
--------------------------------------------------------------------------------
1 | Basketball/v_Basketball_g08_c01
2 | Basketball/v_Basketball_g08_c02


--------------------------------------------------------------------------------
/matlab-online-display/eval/compute_spatio_temporal_iou.m:
--------------------------------------------------------------------------------
 1 | 
 2 | % ######################################################################################################################################################################################
 3 | % We are here talking about spatio-temporal detections, i.e. a set of ground-truth bounding boxes that
 4 | %  I will denote by g_t, with t between t_g^b and t_g^e (beginning and end time of the ground-truth)
 5 | % versus a detection which is also a set of bounding boxes, denoted by d_t, with t between t_d^e et t_d^e.
 6 | %
 7 | % a) temporal iou =  T_i / T_u
 8 | %  this is the intersection over union between the timing of the the tubes,
 9 | % ie mathematically T_i / T_u with
10 | % the intersection T_i = max(0,   max(t_g^b,t_d^b)-min(t_d^e,t_g^e) )
11 | % and the union T_u = min(t_g^b,t_d^b)-max(t_d^e,t_g^e)
12 | %
13 | % b) for each t between max(tgb,tdb)-min(tde,tge), we compute the IoU between g_t and d_t, and average them
14 | %
15 | % Multiplying (a) and (b) is the same as computed the average of the spatial iou over all frames in T_u of the two tubes, with a spatial iou of 0 for frames where only one box exists.
16 | % c) as this is standard in detection problem, if there are multiple detections for the same groundtruth detection, the first one is counted as positive and the other ones as negatives
17 | % ######################################################################################################################################################################################
18 | %{
19 | gt_fnr = 1xn doube
20 | gt_bb = nx4 doubld - [x y w h]
21 | dt_fnr = 1xm double
22 | dt_bb = mx4 double - [x y w h]
23 | %}
24 | % -------------------------------------------------------------------------
25 | function st_iou = compute_spatio_temporal_iou(gt_fnr, gt_bb, dt_fnr, dt_bb)
26 | % -------------------------------------------------------------------------
27 | 
28 | % time gt begin
29 | tgb = gt_fnr(1);
30 | % time gt end
31 | tge = gt_fnr(end);
32 | %time dt begin
33 | tdb = dt_fnr(1);
34 | tde = dt_fnr(end);
35 | % temporal intersection
36 | T_i = double(max(0, min(tge,tde)-max(tgb,tdb)));
37 | 
38 | if T_i>0
39 |     T_i = T_i +1;
40 |     % temporal union
41 |     T_u = double(max(tge,tde) - min(tgb,tdb)+1);
42 |     %temporal IoU
43 |     T_iou = T_i/T_u;
44 |     % intersect frame numbers
45 |     int_fnr = max(tgb,tdb):min(tge,tde);
46 |     
47 |     % find the ind of the intersected frames in the detected frames
48 |     [~,int_find_dt] = ismember(int_fnr, dt_fnr);
49 |     [~,int_find_gt] = ismember(int_fnr, gt_fnr);
50 |     
51 |     assert(length(int_find_dt)==length(int_find_gt));
52 |     
53 |     iou = zeros(length(int_find_dt),1);
54 |     for i=1:length(int_find_dt)
55 |         if int_find_gt(i)<1
56 | %             fprintf('error ')
57 |             pf = pf;
58 |         else
59 |             pf = i;
60 |         end
61 |         
62 |         gt_bound = gt_bb(int_find_gt(pf),:);
63 |         dt_bound = dt_bb(int_find_dt(pf),:)+1;
64 |         
65 |         % gt_bound = [gt_bound(:,1:2) gt_bound(:,3:4)-gt_bound(:,1:2)];
66 |         % dt_bound = [dt_bound(:,1:2) dt_bound(:,3:4)-dt_bound(:,1:2)];
67 |         iou(i) = inters_union(double(gt_bound),double(dt_bound));
68 |     end
69 |     % finalspatio-temporal IoU threshold
70 |     st_iou = T_iou*mean(iou);
71 | else
72 |     st_iou =0;
73 | end
74 | % % iou_thresh = 0.2,...,0.6 % 'Learing to track paper' takes 0.2 for UCF101 and 0.5 for JHMDB
75 | % if delta >= iou_thresh
76 | %     % consider this tube as valid detection
77 | % end
78 | 
79 | end
80 | 
81 | % -------------------------------------------------------------------------
82 | function iou = inters_union(bounds1,bounds2)
83 | % -------------------------------------------------------------------------
84 | 
85 | inters = rectint(bounds1,bounds2);
86 | ar1 = bounds1(:,3).*bounds1(:,4);
87 | ar2 = bounds2(:,3).*bounds2(:,4);
88 | union = bsxfun(@plus,ar1,ar2')-inters;
89 | 
90 | iou = inters./(union+eps);
91 | 
92 | end
93 | 


--------------------------------------------------------------------------------
/matlab-online-display/eval/get_PR_curve.m:
--------------------------------------------------------------------------------
  1 | %%##################################################################################################################################################
  2 | 
  3 | %% Author: Gurkirt Singh 
  4 | %% Release date: 26th January 2017
  5 | % STEP-1: loop over the videos present in the predicited Tubes
  6 | % STEP-2: for each video get the GT Tubes
  7 | % STEP-3: Compute the spatio-temporal overlap bwtween GT tube and predicited
  8 | % tubes
  9 | % STEP-4: then label tp 1 or fp 0 to each predicted tube
 10 | % STEP-5: Compute PR and AP for each class using scores, tp and fp in allscore
 11 | %##################################################################################################################################################
 12 | 
 13 | function [mAP,mAIoU,acc,AP] = get_PR_curve(annot, xmldata, testlist, actions, iou_th)
 14 | % load(xmlfile)
 15 | num_vid = length(testlist);
 16 | num_actions = length(actions);
 17 | AP = zeros(num_actions,1);
 18 | averageIoU = zeros(num_actions,1);
 19 | 
 20 | cc = zeros(num_actions,1);
 21 | for a=1:num_actions
 22 |     allscore{a} = zeros(10000,2,'single');
 23 | end
 24 | 
 25 | total_num_gt_tubes = zeros(num_actions,1); 
 26 | % count all the gt tubes from all the vidoes for label a
 27 | % total_num_detection = zeros(num_actions,1);
 28 | 
 29 | preds = zeros(num_vid,1) - 1;
 30 | gts = zeros(num_vid,1);
 31 | annotNames = {annot.name};
 32 | dtNames = {xmldata.videoName};
 33 | for vid=1:num_vid
 34 |     maxscore = -10000;
 35 |     [action,~] = getActionName(testlist{vid}); %%get action name to which this video belongs to
 36 |     [~,action_id] =  find(strcmp(action, actions)); %% process only the videos from current  action a
 37 |     [~,gtVidInd] = find(strcmp(annotNames,testlist{vid}));
 38 |     [~,dtVidInd] = find(strcmp(dtNames,testlist{vid}));
 39 |     
 40 |     dt_tubes = sort_detection(xmldata(dtVidInd));
 41 |     gt_tubes = annot(gtVidInd).tubes;
 42 |         
 43 |     num_detection = length(dt_tubes.class);
 44 |     num_gt_tubes = length(gt_tubes);
 45 |     
 46 |     %     total_num_detection = total_num_detection + num_detection;
 47 |     for gtind = 1:num_gt_tubes
 48 |         action_id = gt_tubes(gtind).class;
 49 |         total_num_gt_tubes(action_id) = total_num_gt_tubes(action_id) + 1;
 50 |     end
 51 |     gts(vid) = action_id;
 52 |     dt_labels = dt_tubes.class;
 53 |     covered_gt_tubes = zeros(num_gt_tubes,1);
 54 |     for dtind = 1:num_detection
 55 |         dt_fnr = dt_tubes.framenr(dtind).fnr;
 56 |         dt_bb = dt_tubes.boxes(dtind).bxs;
 57 |         dt_label = dt_labels(dtind);
 58 |         if dt_tubes.score(dtind)>maxscore
 59 |             preds(vid) = dt_label;
 60 |             maxscore = dt_tubes.score(dtind);
 61 |         end
 62 |         cc(dt_label) = cc(dt_label) + 1;
 63 |         
 64 |         ioumax=-inf;maxgtind=0;
 65 |         for gtind = 1:num_gt_tubes
 66 |             action_id = gt_tubes(gtind).class;
 67 |             if ~covered_gt_tubes(gtind) && dt_label == action_id
 68 |                 gt_fnr = gt_tubes(gtind).sf:gt_tubes(gtind).ef;
 69 | %                 if isempty(gt_fnr)
 70 | %                     continue
 71 | %                 end
 72 |                 gt_bb = gt_tubes(gtind).boxes;
 73 |                 iou = compute_spatio_temporal_iou(gt_fnr, gt_bb, dt_fnr, dt_bb);
 74 |                 if iou>ioumax
 75 |                     ioumax=iou;
 76 |                     maxgtind=gtind;
 77 |                 end
 78 |             end
 79 |         end
 80 |         
 81 |         if ioumax>iou_th
 82 |             covered_gt_tubes(maxgtind) = 1;
 83 |             allscore{dt_label}(cc(dt_label),:) = [dt_tubes.score(dtind),1];
 84 |             averageIoU(dt_label) = averageIoU(dt_label) + ioumax;
 85 |         else
 86 |             allscore{dt_label}(cc(dt_label),:) = [dt_tubes.score(dtind),0];
 87 |         end
 88 |         
 89 |     end
 90 | end
 91 | 
 92 | for a=1:num_actions
 93 |     allscore{a} = allscore{a}(1:cc(a),:);
 94 |     scores = allscore{a}(:,1);
 95 |     labels = allscore{a}(:,2);
 96 |     [~, si] = sort(scores,'descend');
 97 |     %     scores = scores(si);
 98 |     labels = labels(si);
 99 |     fp=cumsum(labels==0);
100 |     tp=cumsum(labels==1);
101 |     cdet =0;
102 |     if ~isempty(tp)>0
103 |         cdet = tp(end);
104 |         averageIoU(a) = (averageIoU(a)+0.000001)/(tp(end)+0.00001);
105 |     end
106 |     
107 |     recall=tp/total_num_gt_tubes(a);
108 |     precision=tp./(fp+tp);
109 |     AP(a) = xVOCap(recall,precision);
110 |     draw = 0;
111 |     if draw
112 |         % plot precision/recall
113 |         plot(recall,precision,'-');
114 |         grid;
115 |         xlabel 'recall'
116 |         ylabel 'precision'
117 |         title(sprintf('class: %s, AP = %.3f',actions{a},AP(a)));
118 |     end
119 |     %     fprintf('Action %02d AP = %0.5f and AIOU %0.5f GT %03d total det %02d correct det %02d %s\n', a, AP(a),averageIoU(a),total_num_gt_tubes(a),length(tp),cdet,actions{a});
120 |     
121 | end
122 | acc = mean(preds==gts);
123 | AP(isnan(AP)) = 0;
124 | mAP  = mean(AP);
125 | averageIoU(isnan(averageIoU)) = 0;
126 | mAIoU = mean(averageIoU);
127 | 
128 | 
129 | %% ------------------------------------------------------------------------------------------------------------------------------------------------
130 | function [action,vidID] = getActionName(str)
131 | %------------------------------------------------------------------------------------------------------------------------------------------------
132 | indx = strsplit(str, '/');
133 | action = indx{1};
134 | vidID = indx{2};
135 | %%
136 | function sorted_tubes = sort_detection(dt_tubes)
137 | 
138 | sorted_tubes = dt_tubes;
139 | 
140 | if ~isempty(dt_tubes.class)
141 |     
142 |     num_detection = length(dt_tubes.class);
143 |     scores = dt_tubes.score;
144 |     [~,indexs] = sort(scores,'descend');
145 |     for dt = 1 : num_detection
146 |         dtind = indexs(dt);
147 |         sorted_tubes.framenr(dt).fnr = dt_tubes.framenr(dtind).fnr;
148 |         sorted_tubes.boxes(dt).bxs = dt_tubes.boxes(dtind).bxs;
149 |         sorted_tubes.class(dt) = dt_tubes.class(dtind);
150 |         sorted_tubes.score(dt) = dt_tubes.score(dtind);
151 |         sorted_tubes.nr(dt) = dt;
152 |     end
153 | end
154 | %% 
155 | 


--------------------------------------------------------------------------------
/matlab-online-display/eval/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));


--------------------------------------------------------------------------------
/matlab-online-display/frameAp.m:
--------------------------------------------------------------------------------
  1 | % ---------------------------------------------------------
  2 | % Copyright (c) 2017, Gurkirt Singh
  3 | % This code and is available
  4 | % under the terms of MIT License provided in LICENSE.
  5 | % Please retain this notice and LICENSE if you use
  6 | % this file (or any portion of it) in your project.
  7 | % ---------------------------------------------------------
  8 | 
  9 | %% This is main script to compute frame mean AP %%
 10 | %% this code is very new so hasn't been tested a lot
 11 | % Input: Detection directory; annotation file path; split file path
 12 | % Output: computes frame AP for all the detection directories
 13 | % It should produce results almost identical to test_ucf24.py
 14 | 
 15 | function frameAp()
 16 | 
 17 | addpath(genpath('eval/'));
 18 | addpath(genpath('utils/'));
 19 | addpath(genpath('actionpath/'));
 20 | data_root = '/home/zhujiagang/realtime-action-detection';
 21 | save_root = '/home/zhujiagang/realtime-action-detection/save';
 22 | iou_th = 0.5;
 23 | model_type = 'CONV';
 24 | dataset = 'ucf24';
 25 | list_id = '01';
 26 | split_file = sprintf('%s/%s/splitfiles/t%s.txt',data_root,dataset,list_id);
 27 | annotfile = sprintf('%s/%s/splitfiles/annots.mat',data_root,dataset);
 28 | annot = load(annotfile);
 29 | annot = annot.annot;
 30 | testlist = getVideoNames(split_file);
 31 | num_vid = length(testlist);
 32 | num_actions = 24;
 33 | 
 34 | logfile = fopen('frameAP.log','w'); % open log file
 35 | 
 36 | imgType = 'rgb'; iteration_num = 120000;
 37 | det_dirs1 = sprintf('%s/%s/detections/%s-%s-%s-%06d/',save_root,dataset,model_type,imgType,list_id,iteration_num);
 38 | imgType = 'brox'; iteration_num = 120000;
 39 | det_dirs2 = sprintf('%s/%s/detections/%s-%s-%s-%06d/',save_root,dataset,model_type,imgType,list_id,iteration_num);
 40 | imgType = 'fastOF'; iteration_num = 120000;
 41 | det_dirs3 = sprintf('%s/%s/detections/%s-%s-%s-%06d/',save_root,dataset,model_type,imgType,list_id,iteration_num);
 42 | 
 43 | combinations = {{det_dirs1},{det_dirs2},{det_dirs3},...
 44 |     {det_dirs1,det_dirs3,'boost'},{det_dirs1,det_dirs2,'boost'},...
 45 |     {det_dirs1,det_dirs3,'cat'},{det_dirs1,det_dirs2,'cat'},...
 46 |     {det_dirs1,det_dirs3,'mean'},{det_dirs1,det_dirs2,'mean'}};
 47 | 
 48 | for c=1:length(combinations)
 49 |     comb = combinations{c};
 50 |     line = comb{1};
 51 |     if length(comb)>1
 52 |         fusion_type = comb{3};
 53 |         line = [line,' ',comb{2},' \n\n fusion type: ',fusion_type,'\n\n'];
 54 |         
 55 |     else
 56 |         fusion_type = 'none';
 57 |     end
 58 |     
 59 |     line = sprintf('Evaluation for %s\n',line);
 60 |     fprintf('%s',line)
 61 |     fprintf(logfile,'%s',line);
 62 |     AP = zeros(num_actions,1);
 63 |     cc = zeros(num_actions,1);
 64 |     for a=1:num_actions
 65 |         allscore{a} = zeros(24*20*160000,2,'single');
 66 |     end
 67 |     
 68 |     total_num_gt_boxes = zeros(num_actions,1);
 69 |     annotNames = {annot.name};
 70 |     
 71 |     for vid=1:num_vid
 72 |         video_name = testlist{vid};
 73 |         [~,gtVidInd] = find(strcmp(annotNames, testlist{vid}));
 74 |         gt_tubes = annot(gtVidInd).tubes;
 75 |         numf = annot(gtVidInd).num_imgs;
 76 |         num_gt_tubes = length(gt_tubes);
 77 |         if mod(vid,5) == 0
 78 |             fprintf('Done procesing %d videos out of %d %s\n', vid, num_vid, video_name)
 79 |         end
 80 |         for nf = 1:numf
 81 |             gt_boxes = get_gt_boxes(gt_tubes,nf);
 82 |             dt_boxes = get_dt_boxes(comb, video_name, nf, num_actions, fusion_type);
 83 |             num_gt_boxes = size(gt_boxes,1);
 84 |             for g = 1:num_gt_boxes
 85 |                 total_num_gt_boxes(gt_boxes(g,5)) = total_num_gt_boxes(gt_boxes(g,5)) + 1;
 86 |             end
 87 |             covered_gt_boxes = zeros(num_gt_boxes,1);
 88 |             for d = 1 : size(dt_boxes,1)
 89 |                 dt_score = dt_boxes(d,5);
 90 |                 dt_label = dt_boxes(d,6);
 91 |                 cc(dt_label) = cc(dt_label) + 1;
 92 |                 ioumax=-inf; maxgtind=0;
 93 |                 if num_gt_boxes>0  && any(gt_boxes(:,5) == dt_label)
 94 |                     for g = 1:num_gt_boxes
 95 |                         if ~covered_gt_boxes(g) && any(dt_label == gt_boxes(:,5))
 96 |                             iou = compute_spatial_iou(gt_boxes(g,1:4), dt_boxes(d,1:4));
 97 |                             if iou>ioumax
 98 |                                 ioumax=iou;
 99 |                                 maxgtind=g;
100 |                             end
101 |                         end
102 |                     end
103 |                 end
104 |                 
105 |                 if ioumax>=iou_th
106 |                     covered_gt_boxes(maxgtind) = 1;
107 |                     allscore{dt_label}(cc(dt_label),:) = [dt_score,1]; % tp detection
108 |                 else
109 |                     allscore{dt_label}(cc(dt_label),:) = [dt_score,0]; % fp detection
110 |                 end
111 |                 
112 |             end
113 |             
114 |         end
115 |     end
116 |     % Sort scores and then reorder tp fp labels in result precision and recall for each action
117 |     for a=1:num_actions
118 |         allscore{a} = allscore{a}(1:cc(a),:);
119 |         scores = allscore{a}(:,1);
120 |         labels = allscore{a}(:,2);
121 |         [~, si] = sort(scores,'descend');
122 |         %     scores = scores(si);
123 |         labels = labels(si);
124 |         fp=cumsum(labels==0);
125 |         tp=cumsum(labels==1);
126 |         recall=tp/total_num_gt_boxes(a);
127 |         precision=tp./(fp+tp);
128 |         AP(a) = xVOCap(recall,precision);
129 |         line = sprintf('Action %02d AP = %0.5f \n', a, AP(a));
130 |         fprintf('%s',line);
131 |         fprintf(logfile,'%s',line);
132 |     end
133 |     
134 |     AP(isnan(AP)) = 0;
135 |     mAP  = mean(AP);
136 |     line = sprintf('\nMean AP::=> %.5f\n\n',mAP);
137 |     fprintf('%s',line);
138 |     fprintf(logfile,'%s',line);
139 | end
140 | end
141 | 
142 | 
143 | % -------------------------------------------------------------------------
144 | function [videos] = getVideoNames(split_file)
145 | % -------------------------------------------------------------------------
146 | fprintf('Get both lis is %s\n',split_file);
147 | fid = fopen(split_file,'r');
148 | data = textscan(fid, '%s');
149 | videos  = cell(1);
150 | count = 0;
151 | 
152 | for i=1:length(data{1})
153 |     filename = cell2mat(data{1}(i,1));
154 |     count = count +1;
155 |     videos{count} = filename;
156 |     %     videos(i).vid = str2num(cell2mat(data{1}(i,1)));
157 | end
158 | end
159 | 
160 | function gt_boxes = get_gt_boxes(gt_tubes,nf)
161 | gt_boxes = [];
162 | gt_tubes;
163 | for t = 1:length(gt_tubes)
164 |     if nf >= gt_tubes(t).sf && nf <= gt_tubes(t).ef
165 |         b_ind = nf - gt_tubes(t).sf + 1;
166 |         box = [gt_tubes(t).boxes(b_ind,:), gt_tubes(t).class];
167 |         gt_boxes = [gt_boxes;box];
168 |     end
169 | end
170 | end
171 | 
172 | function dt_boxes = get_dt_boxes(detection_dir, video_name, nf, num_actions, fusion_type)
173 | dt_boxes = [];
174 | %% apply nms per class
175 | [boxes,scores] = read_detections(detection_dir, video_name, nf);
176 | for a = 1 : num_actions
177 |     cls_boxes = get_cls_detection(boxes,scores,a,fusion_type);
178 |     dt_boxes = [dt_boxes; cls_boxes];
179 | end
180 | end
181 | 
182 | function cls_boxes = get_cls_detection(boxes,scores,a,fusion_type)
183 | 
184 | if strcmp(fusion_type,'none')
185 |     cls_boxes = dofilter(boxes(1).b,scores(1).s,a);
186 | elseif strcmp(fusion_type,'mean')
187 |     cls_boxes = dofilter(boxes(1).b,(scores(1).s+scores(2).s)/2.0,a);
188 | elseif strcmp(fusion_type,'cat')
189 |     cls_boxes_base = dofilter(boxes(1).b,scores(1).s,a);
190 |     cls_boxes_top = dofilter(boxes(2).b,scores(2).s,a);
191 |     all_boxes = [cls_boxes_base;cls_boxes_top];
192 |     pick = nms(all_boxes(:,1:5),0.45);
193 |     cls_boxes = all_boxes(pick,:);
194 | elseif strcmp(fusion_type,'boost')
195 |     cls_boxes_base = dofilter(boxes(1).b,scores(1).s,a);
196 |     cls_boxes_top = dofilter(boxes(2).b,scores(2).s,a);
197 |     all_boxes = boost_boxes(cls_boxes_base,cls_boxes_top);
198 |     pick = nms(all_boxes(:,1:5),0.45);
199 |     cls_boxes = all_boxes(pick,:);
200 | else
201 |     error('Spacify correct fusion technique');
202 | end
203 | 
204 | end
205 | 
206 | function cls_boxes_base = boost_boxes(cls_boxes_base,cls_boxes_top)
207 | 
208 | box_spatial = [cls_boxes_base(:,1:2) cls_boxes_base(:,3:4)-cls_boxes_base(:,1:2)+1];
209 | box_flow =    [cls_boxes_top(:,1:2) cls_boxes_top(:,3:4)-cls_boxes_top(:,1:2)+1];
210 | coveredboxes = [];
211 | nb = size(cls_boxes_base,1); % num boxes
212 | for i=1:nb
213 |     ovlp = inters_union(box_spatial(i,:), box_flow); % ovlp has 1x5 or 5x1 dim
214 |     if ~isempty(ovlp)
215 |         [movlp, maxind] = max(ovlp);
216 |         if movlp>=0.3 && isempty(ismember(coveredboxes,maxind))
217 |             cls_boxes_base(i,5) = cls_boxes_base(i,5) + cls_boxes_top(maxind,5)*movlp;
218 |             coveredboxes = [coveredboxes;maxind];
219 |         end
220 |     end
221 | end
222 | 
223 | nb = size(cls_boxes_top,1);
224 | for i=1:nb
225 |     if ~ismember(coveredboxes,i)
226 |         cls_boxes_base = [cls_boxes_base; cls_boxes_top(i,:)];
227 |     end
228 | end
229 | 
230 | end
231 | 
232 | function [bxs, sc] = read_detections(detection_dir, video_name, nf)
233 | detection_dir1 = detection_dir{1};
234 | det_file = sprintf('%s%s/%05d.mat', detection_dir1, video_name, nf);
235 | load(det_file); % loads loc and scores variable
236 | boxes = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240] + 1;
237 | boxes(boxes(:,1)<1,1) = 1;   boxes(boxes(:,2)<1,2) = 1;
238 | boxes(boxes(:,3)>320,3) = 320;  boxes(boxes(:,4)>240,4) = 240;
239 | scores = [scores(:,2:end),scores(:,1)];
240 | bxs = struct();
241 | sc = struct();
242 | bxs(1).b = boxes;
243 | sc(1).s = scores;
244 | if length(detection_dir)>1
245 |     detection_dir1 = detection_dir{2};
246 |     det_file = sprintf('%s%s/%05d.mat', detection_dir1, video_name, nf);
247 |     load(det_file); % loads loc and scores variable
248 |     boxes = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240] + 1;
249 |     boxes(boxes(:,1)<1,1) = 1;   boxes(boxes(:,2)<1,2) = 1;
250 |     boxes(boxes(:,3)>320,3) = 320;  boxes(boxes(:,4)>240,4) = 240;
251 |     scores = [scores(:,2:end),scores(:,1)];
252 |     bxs(2).b = boxes;
253 |     sc(2).s = scores;
254 | end
255 | 
256 | end
257 | 
258 | 
259 | function boxes = dofilter(boxes,scores,a)
260 | scores = scores(:,a);
261 | pick = scores>0.01;
262 | scores = scores(pick);
263 | boxes = boxes(pick,:);
264 | [~,pick] = sort(scores,'descend');
265 | to_pick = min(50,size(pick,1));
266 | pick = pick(1:to_pick);
267 | scores = scores(pick);
268 | boxes = boxes(pick,:);
269 | pick = nms([boxes scores],0.45);
270 | pick = pick(1:min(20,length(pick)));
271 | boxes = boxes(pick,:);
272 | scores = scores(pick);
273 | cls = scores*0 + a;
274 | boxes = [boxes,scores, cls];
275 | end
276 | 
277 | function iou = inters_union(bounds1,bounds2)
278 | % ------------------------------------------------------------------------
279 | inters = rectint(bounds1,bounds2);
280 | ar1 = bounds1(:,3).*bounds1(:,4);
281 | ar2 = bounds2(:,3).*bounds2(:,4);
282 | union = bsxfun(@plus,ar1,ar2')-inters;
283 | iou = inters./(union+0.001);
284 | end
285 | 
286 | 
287 | function iou = compute_spatial_iou(gt_box, dt_box)
288 | dt_box = [dt_box(1:2), dt_box(3:4)-dt_box(1:2)+1];
289 | inter = rectint(gt_box,dt_box);
290 | ar1 = gt_box(3)*gt_box(4);
291 | ar2 = dt_box(3)*dt_box(4);
292 | union = ar1 + ar2 - inter;
293 | iou = inter/union;
294 | end


--------------------------------------------------------------------------------
/matlab-online-display/gentube/convert2eval.m:
--------------------------------------------------------------------------------
 1 | % ---------------------------------------------------------
 2 | % Copyright (c) 2017, Gurkirt Singh
 3 | % This code and is available
 4 | % under the terms of MIT License provided in LICENSE.
 5 | % Please retain this notice and LICENSE if you use
 6 | % this file (or any portion of it) in your project.
 7 | % ---------------------------------------------------------
 8 | % Input: smoothed tubes
 9 | % Output: filtered out tubes with proper scoring
10 | 
11 | function xmld = convert2eval(final_tubes,min_num_frames,kthresh,topk,vids)
12 | 
13 | xmld = struct([]);
14 | v= 1;
15 | 
16 | for vv = 1 :  length(vids)
17 |     action_indexes = find(strcmp(final_tubes.video_id,vids{vv}));
18 |     videoName = vids{vv};
19 |     xmld(v).videoName = videoName;
20 |     actionscore = final_tubes.dpActionScore(action_indexes);
21 |     path_scores = final_tubes.path_scores(1,action_indexes);
22 |     
23 |     ts = final_tubes.ts(action_indexes);
24 |     starts = final_tubes.starts(action_indexes);
25 |     te = final_tubes.te(action_indexes);
26 |     act_nr = 1;
27 |      
28 |     for a = 1 : length(ts)
29 |         act_ts = ts(a);
30 |         act_te = te(a);
31 | %         act_dp_score = actionscore(a); %% only useful on JHMDB
32 |         act_path_scores = cell2mat(path_scores(a));
33 |         
34 |         %-----------------------------------------------------------
35 |         act_scores = sort(act_path_scores(act_ts:act_te),'descend');   
36 |         %save('test.mat', 'act_scores'); pause;
37 |         
38 |         topk_mean = mean(act_scores(1:min(topk,length(act_scores))));        
39 |         
40 |         bxs = final_tubes.path_boxes{action_indexes(a)}(act_ts:act_te,:);
41 |         
42 |         bxs = [bxs(:,1:2), bxs(:,3:4)-bxs(:,1:2)];
43 |         
44 |         label = final_tubes.label(action_indexes(a));
45 |         
46 |         if topk_mean > kthresh(label) && (act_te-act_ts) > min_num_frames 
47 |             xmld(v).score(act_nr) = topk_mean;
48 |             xmld(v).nr(act_nr) = act_nr;
49 |             xmld(v).class(act_nr) = label;
50 |             xmld(v).framenr(act_nr).fnr = (act_ts:act_te) + starts(a)-1;
51 |             xmld(v).boxes(act_nr).bxs = bxs;
52 |             act_nr = act_nr+1;
53 |         end
54 |     end
55 |     v = v + 1;
56 | 
57 | end
58 | 


--------------------------------------------------------------------------------
/matlab-online-display/gentube/dpEM_max.m:
--------------------------------------------------------------------------------
 1 | % ---------------------------------------------------------
 2 | % Original code comes from  https://team.inria.fr/perception/research/skeletalquads/
 3 | % Copyright (c) 2014, Georgios Evangelidis and Gurkirt Singh,
 4 | % This code and is available
 5 | % under the terms of MIT License provided in LICENSE.
 6 | % Please retain this notice and LICENSE if you use
 7 | % this file (or any portion of it) in your project.
 8 | % ---------------------------------------------------------
 9 | 
10 | % M = <10xnum_frames>
11 | % r = 10 (action labels)
12 | % c = frame indices in a video
13 | 
14 | function [p,q,D] = dpEM_max(M,alpha)
15 | 
16 | % transition cost for the smoothness term
17 | % V(L1,L2) = 0, if L1=L2
18 | % V(L1,L2) = alpha, if L1~=L2
19 | 
20 | 
21 | 
22 | [r,c] = size(M);
23 | 
24 | 
25 | 
26 | % costs
27 | D = zeros(r, c+1); % add an extra column
28 | D(:,1) = 0; % put the maximum cost
29 | D(:, 2:(c+1)) = M;
30 | 
31 | v = [1:r]';
32 | 
33 | 
34 | %D = M;
35 | phi = zeros(r,c);
36 | 
37 | %test = struct([]);
38 | for j = 2:c+1; % c = 1230
39 |     for i = 1:r; % r = 10        
40 |         
41 | %         test(j).D =  D(:, j-1); % fetching prev column 10 rows
42 | %         test(j).alpha = alpha*(v~=i);  % switching each row for each class
43 | %         test(j).D_alpha = [D(:, j-1)-alpha*(v~=i)];
44 | %         test(j).max = max([D(:, j-1)-alpha*(v~=i)]); % for ith class taking the max score
45 |         
46 |         
47 |         [dmax, tb] = max([D(:, j-1)-alpha*(v~=i)]);
48 |         %keyboard;
49 |         D(i,j) = D(i,j)+dmax;
50 |         phi(i,j-1) = tb;
51 |     end
52 | end
53 | 
54 | % Note:
55 | % the outer loop (j) is to visit one by one each frames
56 | % the inner loop (i) is to get the max score for each action label
57 | % the -alpha*(v~=i) term is to add a penalty by subtracting alpha from the 
58 | % data term for all other class labels other than i, for ith class label 
59 | % it adds zero penalty;
60 | %  (v~=i) will return a logical array consists of 10 elements, in the ith 
61 | % location it is 0 (false becuase the condition v~=i is false) and all other locations
62 | % returns 1, thus for ith calss it multiplies 0
63 | % with alpha and for the rest of the classes multiplies 1;
64 | % for each iteration of ith loop we get a max value which we add to the
65 | % data term d(i,j), in this way the 10 max values for 10 different action
66 | % labels are stored to the jth column (or for the jth frame): D(1,j), D(2,j),...,D(10,j), 
67 | 
68 | %  save('test.mat','r','c','M', 'phi');
69 | %  pause;
70 | 
71 | % Traceback from last frame
72 | D = D(:,2:(c+1));
73 | 
74 | % best of the last column
75 | q = c; % frame inidces
76 | [~,p] = max(D(:,c));
77 | 
78 | 
79 | 
80 | i = p; % index of max element in last column of D, 
81 | j = q; % frame indices
82 | 
83 | while j>1 % loop over frames in a video
84 |     tb = phi(i,j); % i -> index of max element in last column of D, j-> last frame index or last column of D
85 |     p = [tb,p];
86 |     q = [j-1,q];
87 |     j = j-1;
88 |     i = tb;
89 | end
90 | 
91 | %
92 | % phi(i,j) stores all the max indices in the forward pass
93 | % during the backward pass , a predicited path is constructed using these indices values
94 | 


--------------------------------------------------------------------------------
/matlab-online-display/gentube/mydpEM_max.m:
--------------------------------------------------------------------------------
 1 | function [p,q,D] = mydpEM_max(M,alpha)
 2 | 
 3 | [r,c] = size(M);
 4 | % costs
 5 | D = zeros(r, c+1); % add an extra column
 6 | D(:,1) = 0; % put the maximum cost
 7 | D(:, 2:(c+1)) = M;
 8 | 
 9 | v = [1:r]';
10 | phi = zeros(r,c);
11 | 
12 | for j = 2:c+1; % c = 1230
13 |     for i = 1:r; % r = 10        
14 |         
15 |         [dmax, tb] = max([D(:, j-1)-alpha*(v~=i)]);
16 |         %keyboard;
17 |         D(i,j) = D(i,j)+dmax;
18 |         phi(i,j-1) = tb;
19 |     end
20 | end
21 | 
22 | % Traceback from last frame
23 | D = D(:,2:(c+1));
24 | 
25 | % best of the last column
26 | q = c; % frame inidces
27 | [~,p] = max(D(:,c));
28 | i = p; % index of max element in last column of D, 
29 | j = q; % frame indices
30 | 
31 | while j>1 % loop over frames in a video
32 |     tb = phi(i,j); % i -> index of max element in last column of D, j-> last frame index or last column of D
33 |     p = [tb,p];
34 |     q = [j-1,q];
35 |     j = j-1;
36 |     i = tb;
37 | end


--------------------------------------------------------------------------------
/matlab-online-display/gentube/parActionPathSmoother.m:
--------------------------------------------------------------------------------
  1 | % ---------------------------------------------------------
  2 | % Copyright (c) 2017, Gurkirt Singh
  3 | % This code and is available
  4 | % under the terms of MIT License provided in LICENSE.
  5 | % Please retain this notice and LICENSE if you use
  6 | % this file (or any portion of it) in your project.
  7 | % ---------------------------------------------------------
  8 | 
  9 | 
 10 | function final_tubes = parActionPathSmoother(actionpaths,alpha,num_action)
 11 | 
 12 | % load data
 13 | % fprintf('Number of video intest set %d \n', actionpath,alpha,num_action,calpha,useNeg
 14 | % alpha = 1;
 15 | 
 16 | final_tubes = struct('starts',[],'ts',[],'te',[],'label',[],'path_total_score',[],...
 17 |     'dpActionScore',[],'dpPathScore',[],...
 18 |     'path_boxes',cell(1),'path_scores',cell(1),'video_id',cell(1));
 19 | 
 20 | 
 21 | alltubes  = cell(length(actionpaths),1);
 22 | 
 23 | for t = 1 : length(actionpaths)
 24 |     %     fprintf('[%03d/%03d] calpha %04d\n',t,length(tubes),uint16(calpha*100));
 25 |     %     fprintf('.');
 26 |     video_id = actionpaths(t).video_id;
 27 |     %     fprintf('[doing for %s %d out of %d]\n',video_id,t,length(tubes));
 28 |     alltubes{t} = actionPathSmoother4oneVideo(actionpaths(t).paths,alpha,num_action,video_id) ;
 29 | end
 30 | 
 31 | action_count = 1;
 32 | for t = 1 : length(actionpaths)
 33 |     vid_tubes = alltubes{t};
 34 |     for  k=1:length(vid_tubes.ts)
 35 |         final_tubes.starts(action_count) = vid_tubes.starts(k);
 36 |         final_tubes.ts(action_count) = vid_tubes.ts(k);
 37 |         final_tubes.video_id{action_count} = vid_tubes.video_id{k};
 38 |         final_tubes.te(action_count) = vid_tubes.te(k);
 39 |         final_tubes.dpActionScore(action_count) = vid_tubes.dpActionScore(k);
 40 |         final_tubes.label(action_count) = vid_tubes.label(k);
 41 |         final_tubes.dpPathScore(action_count) = vid_tubes.dpPathScore(k);
 42 |         final_tubes.path_total_score(action_count) = vid_tubes.path_total_score(k);
 43 |         final_tubes.path_boxes{action_count} = vid_tubes.path_boxes{k};
 44 |         final_tubes.path_scores{action_count} = vid_tubes.path_scores{k};
 45 |         action_count = action_count + 1;
 46 |     end
 47 |     
 48 | end
 49 | end
 50 | 
 51 | function final_tubes = actionPathSmoother4oneVideo(video_paths,alpha,num_action,video_id)
 52 | action_count =1;
 53 | final_tubes = struct('starts',[],'ts',[],'te',[],'label',[],'path_total_score',[],...
 54 |     'dpActionScore',[],'dpPathScore',[],'vid',[],...
 55 |     'path_boxes',cell(1),'path_scores',cell(1),'video_id',cell(1));
 56 | 
 57 | if ~isempty(video_paths)
 58 |     %gt_ind = find(strcmp(video_id,annot.videoName));
 59 |     %number_frames = length(video_paths{1}(1).idx);
 60 | %     alpha = alpha-3.2; 
 61 |     for a = 1 : num_action
 62 |         action_paths = video_paths{a};
 63 |         num_act_paths = getPathCount(action_paths);
 64 |         for p = 1 : num_act_paths
 65 |             M = action_paths(p).allScores(:,1:num_action)'; %(:,1:num_action)';
 66 |             %M = normM(M);
 67 |             %M = [M(a,:),1-M(a,:)];
 68 |             M = M +20;
 69 |             
 70 |             [pred_path,time,D] = dpEM_max(M,alpha(a));
 71 |             [ Ts, Te, Scores, Label, DpPathScore] = extract_action(pred_path,time,D,a);
 72 |             for k = 1 : length(Ts)
 73 |                 final_tubes.starts(action_count) = action_paths(p).start;
 74 |                 final_tubes.ts(action_count) = Ts(k);
 75 |                 final_tubes.video_id{action_count} = video_id;
 76 |                 %     final_tubes.vid(action_count) = vid_num;
 77 |                 final_tubes.te(action_count) = Te(k);
 78 |                 final_tubes.dpActionScore(action_count) = Scores(k);
 79 |                 final_tubes.label(action_count) = Label(k);
 80 |                 final_tubes.dpPathScore(action_count) = DpPathScore(k);
 81 |                 final_tubes.path_total_score(action_count) = mean(action_paths(p).scores);
 82 |                 final_tubes.path_boxes{action_count} = action_paths(p).boxes;
 83 |                 final_tubes.path_scores{action_count} = action_paths(p).scores;
 84 |                 action_count = action_count + 1;
 85 |             end
 86 |             
 87 |         end
 88 |         
 89 |     end
 90 | end
 91 | end
 92 | 
 93 | function M = normM(M)
 94 | for i = 1: size(M,2)
 95 |     M(:,i) = M(:,i)/sum(M(:,i));
 96 | end
 97 | end
 98 | function [ts,te,scores,label,total_score] = extract_action(p,q,D,action)
 99 | % p(1:1) = 1;
100 | indexs = find(p==action);
101 | 
102 | if isempty(indexs)
103 |     ts = []; te = []; scores = []; label = []; total_score = [];
104 |     
105 | else
106 |     indexs_diff = [indexs,indexs(end)+1] - [indexs(1)-2,indexs];
107 |     ts = find(indexs_diff>1);
108 |     
109 |     if length(ts)>1
110 |         te = [ts(2:end)-1,length(indexs)];
111 |     else
112 |         te = length(indexs);
113 |     end
114 |     ts = indexs(ts);
115 |     te = indexs(te);
116 |     scores = (D(action,q(te)) - D(action,q(ts)))./(te-ts);
117 |     label = ones(length(ts),1)*action;
118 |     total_score = ones(length(ts),1)*D(p(end),q(end))/length(p);
119 | end
120 | end
121 | 
122 | % -------------------------------------------------------------------------
123 | function lp_count = getPathCount(live_paths)
124 | % -------------------------------------------------------------------------
125 | 
126 | if isfield(live_paths,'boxes')
127 |     lp_count = length(live_paths);
128 | else
129 |     lp_count = 0;
130 | end
131 | end
132 | 


--------------------------------------------------------------------------------
/matlab-online-display/gentube/readALLactionPaths.m:
--------------------------------------------------------------------------------
 1 | % ---------------------------------------------------------
 2 | % Copyright (c) 2017, Gurkirt Singh
 3 | % This code and is available
 4 | % under the terms of MIT License provided in LICENSE.
 5 | % Please retain this notice and LICENSE if you use
 6 | % this file (or any portion of it) in your project.
 7 | % ---------------------------------------------------------
 8 | 
 9 | function actionpath = readALLactionPaths(videolist,actionPathDir,step)
10 | 
11 | videos = getVideoNames(videolist);
12 | NumVideos = length(videos);
13 | 
14 | actionpath = struct([]);
15 | fprintf('Loading action paths of %d videos\n',NumVideos);
16 | count  = 1;
17 | for vid=1:step:NumVideos
18 |     
19 |     videoID  = videos(vid).video_id;
20 |     pathsSaveName = [actionPathDir,videoID,'-actionpaths.mat'];
21 |    
22 |     if ~exist(pathsSaveName,'file')
23 |         error('Action path does not exist please genrate actin path', pathsSaveName)
24 |     else
25 | %         fprintf('loading vid %d %s \n',vid,pathsSaveName);
26 |         load(pathsSaveName);
27 |         actionpath(count).video_id = videos(vid).video_id;
28 |         actionpath(count).paths = allpaths;
29 |         count = count+1;
30 |     end
31 | end
32 | end
33 | 
34 | function [videos] = getVideoNames(split_file)
35 | % -------------------------------------------------------------------------
36 | fid = fopen(split_file,'r');
37 | data = textscan(fid, '%s');
38 | videos  = struct();
39 | for i=1:length(data{1})
40 |     filename = cell2mat(data{1}(i,1));
41 |     videos(i).video_id = filename;
42 |     %     videos(i).vid = str2num(cell2mat(data{1}(i,1)));
43 |     
44 | end
45 | count = length(data{1});
46 | 
47 | end
48 | 


--------------------------------------------------------------------------------
/matlab-online-display/myI01onlineTubes.m:
--------------------------------------------------------------------------------
  1 | % ---------------------------------------------------------
  2 | % Copyright (c) 2017, Gurkirt Singh
  3 | % This code and is available
  4 | % under the terms of MIT License provided in LICENSE.
  5 | % Please retain this notice and LICENSE if you use
  6 | % this file (or any portion of it) in your project.
  7 | % ---------------------------------------------------------
  8 | %% This is main script to build tubes and evaluate them %%
  9 | 
 10 | function myI01onlineTubes()
 11 | close all
 12 | data_root = '/home/zhujiagang/realtime-action-detection';
 13 | save_root = '/home/zhujiagang/realtime-action-detection/save';
 14 | iteration_num_rgb = [120000]; % you can also evaluate on multiple iertations
 15 | iteration_num_flow = [120000]; % you can also evaluate on multiple iertations
 16 | 
 17 | % add subfolder to matlab paths
 18 | addpath(genpath('gentube/'));
 19 | addpath(genpath('actionpath/'));
 20 | addpath(genpath('eval/'));
 21 | addpath(genpath('utils/'));
 22 | model_type = 'CONV';
 23 | 
 24 | completeList = {...
 25 |     {'ucf24','01',{'rgb'},iteration_num_rgb,{'score'}},...
 26 |     {'ucf24','01',{'brox'},iteration_num_flow,{'score'}}...
 27 |     {'ucf24','01',{'fastOF'},iteration_num_flow,{'score'}}...
 28 |     };
 29 | 
 30 | alldopts = cell(2,1);
 31 | count = 1;
 32 | gap=3;
 33 | 
 34 | for setind = 1:length(completeList)
 35 |     [dataset, listid, imtypes, iteration_nums, costTypes] = enumurateList(completeList{setind});
 36 |     for ct = 1:length(costTypes)
 37 |         costtype = costTypes{ct};
 38 |         for imtind = 1:length(imtypes)
 39 |             imgType = imtypes{imtind};
 40 |             for iteration = iteration_nums
 41 |                 for iouthresh=0.1
 42 |                     %% generate directory sturcture based on the options
 43 |                     dopts = initDatasetOpts(data_root,save_root,dataset,imgType,model_type,listid,iteration,iouthresh,costtype, gap);
 44 |                     if exist(dopts.detDir,'dir')
 45 |                         alldopts{count} = dopts;
 46 |                         count = count+1;
 47 |                     end
 48 |                 end
 49 |             end
 50 |         end
 51 |     end
 52 | end
 53 | 
 54 | %% For each option type build tubes and evaluate them
 55 | for index = 1:count-1
 56 |     opts = alldopts{index};
 57 |     if exist(opts.detDir,'dir')
 58 |         fprintf('Video List %02d :: %s\nAnnotFile :: %s\nImage  Dir :: %s\nDetection Dir:: %s\nActionpath Dir:: %s\nTube Dir:: %s\n',...
 59 |             index, opts.vidList, opts.annotFile, opts.imgDir, opts.detDir, opts.actPathDir, opts.tubeDir);
 60 |         %% online bbx and prediction scores display given frame level detections
 61 |         actionPaths(opts);
 62 |     end
 63 | end
 64 | 
 65 | 
 66 | 
 67 | %% Function to enumrate options
 68 | function [dataset,listnum,imtypes,weights,costTypes] = enumurateList(sublist)
 69 | dataset = sublist{1}; listnum = sublist{2}; imtypes = sublist{3};
 70 | weights = sublist{4};costTypes = sublist{5};
 71 | 
 72 | %% Facade function for smoothing tubes and evaluating them
 73 | function results = gettubes(dopts)
 74 | 
 75 | numActions = length(dopts.actions);
 76 | results = zeros(300,6);
 77 | counter=1;
 78 | class_aps = cell(2,1);
 79 | 
 80 | annot = load(dopts.annotFile);
 81 | annot = annot.annot;
 82 | testvideos = getVideoNames(dopts.vidList);
 83 | 
 84 | for alpha = 3 
 85 |     fprintf('alpha %03d ', alpha);
 86 |     % read action paths
 87 |     actionpaths = readALLactionPaths(dopts.vidList,dopts.actPathDir,1);
 88 |     %% perform temporal trimming
 89 |     smoothedtubes = parActionPathSmoother(actionpaths,alpha*ones(numActions,1),numActions);
 90 |     fprintf('\n');
 91 | end
 92 | 
 93 | results(counter:end,:) = [];
 94 | result = cell(2,1);
 95 | result{2} = class_aps;
 96 | result{1} = results;
 97 | results = result;
 98 | 
 99 | 
100 | function videos = getVideoNames(split_file)
101 | % -------------------------------------------------------------------------
102 | fid = fopen(split_file,'r');
103 | data = textscan(fid, '%s');
104 | videos  = cell(1);
105 | count = 0;
106 | for i=1:length(data{1})
107 |     filename = cell2mat(data{1}(i,1));
108 |     count = count +1;
109 |     videos{count} = filename;
110 | end
111 | 


--------------------------------------------------------------------------------
/matlab-online-display/myI02genFusedTubes.m:
--------------------------------------------------------------------------------
  1 | 
  2 | function myI02genFusedTubes()
  3 | 
  4 | data_root = '/home/zhujiagang/realtime-action-detection';
  5 | save_root = '/home/zhujiagang/realtime-action-detection/save';
  6 | iteration_num_rgb = 90000; % you can also evaluate on multiple iertations
  7 | iteration_num_flow = 120000; % you can also evaluate on multiple iertations
  8 | 
  9 | addpath(genpath('actionpath/'));
 10 | addpath(genpath('gentube/'));
 11 | addpath(genpath('eval/'));
 12 | addpath(genpath('utils/'));
 13 | 
 14 | completeList = {...
 15 |     {'ucf24','01',{'rgb','brox'},[90000,120000],{'cat','nwsum-plus','mean'}, 0.25},...
 16 |     {'ucf24','01',{'rgb','brox'},[120000,120000],{'cat','nwsum-plus','mean'}, 0.25},...
 17 |     {'ucf24','01',{'rgb','fastOF'},[90000,120000],{'cat','nwsum-plus','mean'}, 0.25},...
 18 |     {'ucf24','01',{'rgb','fastOF'},[120000,120000],{'cat','nwsum-plus','mean'}, 0.25},...
 19 |     };
 20 | model_type = 'CONV';
 21 | costtype = 'score';
 22 | iouthresh = 0.1;
 23 | gap = 3;
 24 | alldopts = cell(2,1);
 25 | count = 1;
 26 | for setind = [2,4] %1:length(completeList)
 27 |     [dataset,listid,imtypes,iteration_nums,fusiontypes,fuseiouths] = enumurateList(completeList{setind});
 28 |     for ff =1:length(fusiontypes)
 29 |         fusiontype = fusiontypes{ff};
 30 |         if strcmp(fusiontype,'cat') || strcmp(fusiontype,'mean')
 31 |             tempfuseiouths = 0;
 32 |         else
 33 |             tempfuseiouths = fuseiouths;
 34 |         end
 35 |         for fuseiouth = tempfuseiouths
 36 |             for iouWeight = 1
 37 |                 dopts = initDatasetOptsFused(data_root,save_root,dataset,imtypes,model_type, ...
 38 |                             listid,iteration_nums,iouthresh,costtype,gap,fusiontype,fuseiouth);
 39 |                 if exist(dopts.basedetDir,'dir') && exist(dopts.topdetDir,'dir')
 40 |                     alldopts{count} = dopts;
 41 |                     count = count+1;
 42 |                 end
 43 |             end
 44 |         end
 45 |     end
 46 | end
 47 | 
 48 | fprintf('\n\n\n\n %d \n\n\n\n',count)
 49 | 
 50 | % sets = {1:12,13:24,25:36,49:64};
 51 | % parpool('local',16); %length(set));
 52 | 
 53 | for setid = 1
 54 |     for index = 1:count-1
 55 |         opts = alldopts{index};
 56 |         if exist(opts.basedetDir,'dir') && exist(opts.topdetDir,'dir')
 57 |             fprintf('Video List :: %s\n \nDetection basedetDir:: %s\nActionpath Dir:: %s\nTube Dir:: %s\n',...
 58 |                 opts.vidList,opts.basedetDir,opts.actPathDir,opts.tubeDir);
 59 |             
 60 |             %% online bbx and prediction scores display given fused frame level detections
 61 |             fusedActionPaths(opts);
 62 |         end
 63 |     end
 64 | end
 65 | 
 66 | 
 67 | function [dataset,listnum,imtypes,weights,fusiontypes,fuseiouths] = enumurateList(sublist)
 68 | 
 69 | dataset = sublist{1}; listnum = sublist{2}; imtypes = sublist{3};
 70 | weights = sublist{4};
 71 | fusiontypes = sublist{5};
 72 | fuseiouths =  sublist{6};
 73 | 
 74 | %% Facade function for smoothing tubes and evaluating them
 75 | function results = gettubes(dopts)
 76 | 
 77 | numActions = length(dopts.actions);
 78 | results = zeros(300,6);
 79 | counter=1;
 80 | class_aps = cell(2,1);
 81 | % save file name to save result for eah option type
 82 | saveName = sprintf('%stubes-results.mat',dopts.tubeDir);
 83 | if ~exist(saveName,'file')
 84 |     
 85 |     annot = load(dopts.annotFile);
 86 |     annot = annot.annot;
 87 |     testvideos = getVideoNames(dopts.vidList);
 88 |     for  alpha = 3 
 89 |         fprintf('alpha %03d ',alpha);
 90 |         tubesSaveName = sprintf('%stubes-alpha%04d.mat',dopts.tubeDir,uint16(alpha*100));
 91 |         if ~exist(tubesSaveName,'file')
 92 |             % read action paths
 93 |             actionpaths = readALLactionPaths(dopts.vidList,dopts.actPathDir,1);
 94 |             %% perform temporal trimming
 95 |             smoothedtubes = parActionPathSmoother(actionpaths,alpha*ones(numActions,1),numActions);
 96 |             save(tubesSaveName,'smoothedtubes','-v7.3');
 97 |         else
 98 |             load(tubesSaveName)
 99 |         end
100 |         
101 |         min_num_frames = 8;    kthresh = 0.0;     topk = 40;
102 |         % strip off uncessary parts and remove very small actions less than
103 |         % 8 frames; not really necessary but for speed at eval time
104 |         xmldata = convert2eval(smoothedtubes, min_num_frames, kthresh*ones(numActions,1), topk,testvideos);
105 |         
106 |         %% Do the evaluation
107 |         for iou_th =[0.2,[0.5:0.05:0.95]]
108 |             [tmAP,tmIoU,tacc,AP] = get_PR_curve(annot, xmldata, testvideos, dopts.actions, iou_th);
109 |             % pritn outs iou_threshold, meanAp, sm, classifcation accuracy
110 |             fprintf('%.2f %0.3f %0.3f N ',iou_th,tmAP, tacc);
111 |             results(counter,:) = [iou_th,alpha,alpha,tmIoU,tmAP,tacc];
112 |             class_aps{counter} = AP;
113 |             counter = counter+1;
114 |         end
115 |         fprintf('\n');
116 |     end
117 |     
118 |     results(counter:end,:) = [];
119 |     result = cell(2,1);
120 |     result{2} = class_aps;
121 |     result{1} = results;
122 |     results = result;
123 |     fprintf('results saved in %s\n',saveName);
124 |     save(saveName,'results');
125 | else
126 |     load(saveName)
127 | end
128 | 
129 | function videos = getVideoNames(split_file)
130 | % -------------------------------------------------------------------------
131 | fid = fopen(split_file,'r');
132 | data = textscan(fid, '%s');
133 | videos  = cell(1);
134 | count = 0;
135 | for i=1:length(data{1})
136 |     filename = cell2mat(data{1}(i,1));
137 |     count = count +1;
138 |     videos{count} = filename;
139 | end


--------------------------------------------------------------------------------
/matlab-online-display/utils/createdires.m:
--------------------------------------------------------------------------------
 1 | % ---------------------------------------------------------
 2 | % Copyright (c) 2017, Gurkirt Singh
 3 | % This code and is available
 4 | % under the terms of MIT License provided in LICENSE.
 5 | % Please retain this notice and LICENSE if you use
 6 | % this file (or any portion of it) in your project.
 7 | % ---------------------------------------------------------
 8 | 
 9 | 
10 | function createdires(basedirs,actions)
11 | for s = 1: length(basedirs)
12 |     savename = basedirs{s};
13 |     for action = actions
14 |         saveNameaction = [savename,action{1}];
15 |         if ~isdir(saveNameaction)
16 |             mkdir(saveNameaction);
17 |         end
18 |     end
19 | end
20 | end


--------------------------------------------------------------------------------
/matlab-online-display/utils/initDatasetOpts.m:
--------------------------------------------------------------------------------
 1 | % ---------------------------------------------------------
 2 | % Copyright (c) 2017, Gurkirt Singh
 3 | % This code and is available
 4 | % under the terms of MIT License provided in LICENSE.
 5 | % Please retain this notice and LICENSE if you use
 6 | % this file (or any portion of it) in your project.
 7 | % ---------------------------------------------------------
 8 | 
 9 | function opts = initDatasetOpts(data_root,baseDir,dataset,imgType,model_type,listid,iteration_num,iouthresh,costtype,gap)
10 | 
11 | opts = struct();
12 | opts.imgType = imgType;
13 | opts.costtype = costtype;
14 | opts.gap = gap;
15 | opts.baseDir = baseDir;
16 | opts.imgType = imgType;
17 | opts.dataset = dataset;
18 | opts.iouThresh = iouthresh;
19 | opts.weight = iteration_num;
20 | opts.listid = listid;
21 | 
22 | testlist = ['display',listid];
23 | % opts.vidList = sprintf('%s/%s/splitfiles/%s.txt',data_root,dataset,testlist);
24 | opts.vidList = sprintf('%s.txt',testlist);
25 | 
26 | if strcmp(dataset,'ucf24')
27 |     opts.actions = {'Basketball','BasketballDunk','Biking','CliffDiving','CricketBowling',...
28 |         'Diving','Fencing','FloorGymnastics','GolfSwing','HorseRiding','IceDancing',...
29 |         'LongJump','PoleVault','RopeClimbing','SalsaSpin','SkateBoarding','Skiing',...
30 |         'Skijet','SoccerJuggling','Surfing','TennisSwing','TrampolineJumping',...
31 |         'VolleyballSpiking','WalkingWithDog'};
32 | elseif strcmp(dataset,'JHMDB')
33 |     opts.actions = {'brush_hair','catch','clap','climb_stairs','golf','jump',...
34 |         'kick_ball','pick','pour','pullup','push','run','shoot_ball','shoot_bow',...
35 |         'shoot_gun','sit','stand','swing_baseball','throw','walk','wave'};
36 | elseif strcmp(dataset,'LIRIS')
37 |     opts.actions = {'discussion', 'give_object_to_person','put_take_obj_into_from_box_desk',...
38 |         'enter_leave_room_no_unlocking','try_enter_room_unsuccessfully','unlock_enter_leave_room',...
39 |         'leave_baggage_unattended','handshaking','typing_on_keyboard','telephone_conversation'};
40 | end
41 | 
42 | opts.imgDir = sprintf('%s/%s/%s-images/',data_root,dataset,imgType);
43 | 
44 | opts.detDir = sprintf('%s/%s/detections/%s-%s-%s-%06d/',baseDir,dataset,model_type,imgType,listid,iteration_num);
45 | opts.annotFile = sprintf('%s/%s/splitfiles/annots.mat',data_root,dataset);
46 | 
47 | opts.actPathDir = sprintf('%s/%s/actionPaths/%s-%s-%s-%06d-%s-%d-%04d/',baseDir,dataset,model_type,imgType,listid,iteration_num,costtype,gap,iouthresh*100);
48 | opts.tubeDir = sprintf('%s/%s/actionTubes/%s-%s-%s-%06d-%s-%d-%04d/',baseDir,dataset,model_type,imgType,listid,iteration_num,costtype,gap,iouthresh*100);
49 | 
50 | if exist(opts.detDir,'dir')
51 |     if ~isdir(opts.actPathDir)
52 |         fprintf('Creating %s\n',opts.actPathDir);
53 |         mkdir(opts.actPathDir)
54 |     end
55 |     if ~isdir(opts.tubeDir)
56 |         mkdir(opts.tubeDir)
57 |     end
58 |     if strcmp(dataset,'ucf24') || strcmp(dataset,'JHMDB')
59 |         createdires({opts.actPathDir},opts.actions)
60 |     end
61 | end
62 | 


--------------------------------------------------------------------------------
/matlab-online-display/utils/initDatasetOptsFused.m:
--------------------------------------------------------------------------------
 1 | % ---------------------------------------------------------
 2 | % Copyright (c) 2017, Gurkirt Singh
 3 | % This code and is available
 4 | % under the terms of MIT License provided in LICENSE.
 5 | % Please retain this notice and LICENSE if you use
 6 | % this file (or any portion of it) in your project.
 7 | % ---------------------------------------------------------
 8 | 
 9 | function opts = initDatasetOptsFused(data_root,baseDir,dataset,imtypes,model_type, ...
10 |     listid,iteration_nums,iouthresh,costtype,gap,fusiontype,fuseiouth)
11 | %% data_root,baseDir,dataset,imgType,model_type,listid,iteration_num,iouthresh,costtype,gap
12 | 
13 | opts = struct();
14 | imgType = [imtypes{1},'-',imtypes{2}];
15 | opts.imgType = imgType;
16 | opts.costtype = costtype;
17 | opts.gap = gap;
18 | opts.baseDir = baseDir;
19 | opts.imgType = imgType;
20 | opts.dataset = dataset;
21 | opts.iouThresh = iouthresh;
22 | opts.iteration_nums = iteration_nums;
23 | opts.listid = listid;
24 | opts.fusiontype = fusiontype;
25 | opts.fuseiouth = fuseiouth;
26 | testlist = ['display',listid];
27 | opts.data_root = data_root;
28 | % opts.vidList = sprintf('%s/%s/splitfiles/%s.txt',data_root,dataset,testlist);
29 | opts.vidList = sprintf('%s.txt',testlist);
30 | 
31 | if strcmp(dataset,'ucf24')
32 |     opts.actions = {'Basketball','BasketballDunk','Biking','CliffDiving','CricketBowling',...
33 |         'Diving','Fencing','FloorGymnastics','GolfSwing','HorseRiding','IceDancing',...
34 |         'LongJump','PoleVault','RopeClimbing','SalsaSpin','SkateBoarding','Skiing',...
35 |         'Skijet','SoccerJuggling','Surfing','TennisSwing','TrampolineJumping',...
36 |         'VolleyballSpiking','WalkingWithDog'};
37 | elseif strcmp(dataset,'JHMDB')
38 |     opts.actions = {'brush_hair','catch','clap','climb_stairs','golf','jump',...
39 |         'kick_ball','pick','pour','pullup','push','run','shoot_ball','shoot_bow',...
40 |         'shoot_gun','sit','stand','swing_baseball','throw','walk','wave'};
41 | elseif strcmp(dataset,'LIRIS')
42 |     opts.actions = {'discussion', 'give_object_to_person','put_take_obj_into_from_box_desk',...
43 |         'enter_leave_room_no_unlocking','try_enter_room_unsuccessfully','unlock_enter_leave_room',...
44 |         'leave_baggage_unattended','handshaking','typing_on_keyboard','telephone_conversation'};
45 | end
46 | 
47 | opts.imgDir = sprintf('%s/%s/%s-images/',data_root,dataset,imtypes{1});
48 | 
49 | opts.basedetDir = sprintf('%s/%s/detections/%s-%s-%s-%06d/',baseDir,dataset,model_type,imtypes{1},listid,iteration_nums(1));
50 | opts.topdetDir = sprintf('%s/%s/detections/%s-%s-%s-%06d/',baseDir,dataset,model_type,imtypes{2},listid,iteration_nums(2));
51 | 
52 | opts.annotFile = sprintf('%s/%s/splitfiles/annots.mat',data_root,dataset);
53 | 
54 | opts.actPathDir = sprintf('%s/%s/actionPaths/%s/%s-%s-%s-%s-%d-%d-%s-%d-%04d-fiou%03d/',baseDir,dataset,fusiontype,model_type,imtypes{1},imtypes{2},...
55 |                                         listid,iteration_nums(1),iteration_nums(2),costtype,gap,iouthresh*100,uint16(fuseiouth*100));
56 | opts.tubeDir = sprintf('%s/%s/actionTubes/%s/%s-%s-%s-%s-%d-%d-%s-%d-%04d-fiou%03d/',baseDir,dataset,fusiontype,model_type,imtypes{1},imtypes{2},...
57 |                                         listid,iteration_nums(1),iteration_nums(2),costtype,gap,iouthresh*100,uint16(fuseiouth*100));
58 | 
59 | if exist(opts.basedetDir,'dir')
60 |     if ~isdir(opts.actPathDir)
61 |         fprintf('Creating %s\n',opts.actPathDir);
62 |         mkdir(opts.actPathDir)
63 |     end
64 |     
65 |     if ~isdir(opts.tubeDir)
66 |         mkdir(opts.tubeDir)
67 |     end
68 |     
69 |     if strcmp(dataset,'ucf24') || strcmp(dataset,'JHMDB')
70 |         createdires({opts.actPathDir},opts.actions)
71 |     end
72 | end
73 | 
74 | %fprintf('Video List :: %s\nImage  Dir :: %s\nDetection Dir:: %s\nActionpath Dir:: %s\nTube Dir:: %s\n',...
75 |  %    opts.vidList,opts.imgDir,opts.detDir,opts.actPathDir,opts.tubeDir)
76 | 


--------------------------------------------------------------------------------
/online-tubes/.gitignore:
--------------------------------------------------------------------------------
 1 | *.ods#
 2 | *.m~
 3 | *.prototxt~
 4 | *.txt~
 5 | *.xml~
 6 | *.log
 7 | *.txt
 8 | *.txt~
 9 | *~
10 | /results
11 | 


--------------------------------------------------------------------------------
/online-tubes/I01onlineTubes.m:
--------------------------------------------------------------------------------
  1 | % ---------------------------------------------------------
  2 | % Copyright (c) 2017, Gurkirt Singh
  3 | % This code and is available
  4 | % under the terms of MIT License provided in LICENSE.
  5 | % Please retain this notice and LICENSE if you use
  6 | % this file (or any portion of it) in your project.
  7 | % ---------------------------------------------------------
  8 | %% This is main script to build tubes and evaluate them %%
  9 | 
 10 | function I01onlineTubes()
 11 | 
 12 | data_root = '/mnt/sun-gamma/datasets';
 13 | save_root = '/mnt/sun-gamma/datasets';
 14 | iteration_nums = [70000,120000,50000,90000]; % you can also evaluate on multiple iterations
 15 | 
 16 | % add subfolder to matlab paths
 17 | addpath(genpath('gentube/'));
 18 | addpath(genpath('actionpath/'));
 19 | addpath(genpath('eval/'));
 20 | addpath(genpath('utils/'));
 21 | model_type = 'CONV';
 22 | 
 23 | completeList = {...
 24 |     {'ucf24','01', {'rgb'}, iteration_nums,{'score'}},...
 25 |     {'ucf24','01', {'brox'}, iteration_nums,{'score'}}...
 26 |     {'ucf24','01', {'fastOF'}, iteration_nums,{'score'}}...
 27 |     };
 28 | 
 29 | alldopts = cell(2,1);
 30 | count = 1;
 31 | gap=3;
 32 | 
 33 | for setind = 1 %:length(completeList)
 34 |     [dataset, listid, imtypes, iteration_nums, costTypes] = enumurateList(completeList{setind});
 35 |     for ct = 1:length(costTypes)
 36 |         costtype = costTypes{ct};
 37 |         for imtind = 1:length(imtypes)
 38 |             imgType = imtypes{imtind};
 39 |             for iteration = iteration_nums
 40 |                 for iouthresh=0.1
 41 |                     %% generate directory sturcture based on the options
 42 |                     dopts = initDatasetOpts(data_root,save_root,dataset,imgType,model_type,listid,iteration,iouthresh,costtype, gap);
 43 |                     if exist(dopts.detDir,'dir')
 44 |                         alldopts{count} = dopts;
 45 |                         count = count+1;
 46 |                     end
 47 |                 end
 48 |             end
 49 |         end
 50 |     end
 51 | end
 52 | 
 53 | results = cell(2,1);
 54 | 
 55 | %% For each option type build tubes and evaluate them
 56 | for index = 1:count-1
 57 |     opts = alldopts{index};
 58 |     if exist(opts.detDir,'dir')
 59 |         fprintf('Video List %02d :: %s\nAnnotFile :: %s\nImage  Dir :: %s\nDetection Dir:: %s\nActionpath Dir:: %s\nTube Dir:: %s\n',...
 60 |             index, opts.vidList, opts.annotFile, opts.imgDir, opts.detDir, opts.actPathDir, opts.tubeDir);
 61 |         %% Build action paths given frame level detections
 62 |         actionPaths(opts);
 63 |         %% Perform temproal labelling and evaluate; results saved in results cell
 64 |         result_cell = gettubes(opts);
 65 |         results{index,1} = result_cell;
 66 |         results{index,2} = opts;
 67 |         rm = result_cell{1};
 68 |         rm = rm(rm(:,2) == 5,:);
 69 |         fprintf('\nmAP@0.2:%0.4f mAP@0.5:%0.4f mAP@0.75:%0.4f AVGmAP:%0.4f clsAcc:%0.4f\n\n',...
 70 |                     rm(1,5),rm(2,5),rm(7,5),mean(rm(2:end,5)),rm(1,6));
 71 |     end
 72 | end
 73 | 
 74 | %% save results
 75 | save_dir = [save_root,'/results/'];
 76 | if ~isdir(save_dir)
 77 |     mkdir(save_dir)
 78 | end
 79 | save_dir
 80 | save([save_dir,'online_tubes_results_CONV.mat'],'results')
 81 | 
 82 | %% Function to enumrate options
 83 | function [dataset,listnum,imtypes,weights,costTypes] = enumurateList(sublist)
 84 | dataset = sublist{1}; listnum = sublist{2}; imtypes = sublist{3};
 85 | weights = sublist{4};costTypes = sublist{5};
 86 | 
 87 | %% Facade function for smoothing tubes and evaluating them
 88 | function results = gettubes(dopts)
 89 | 
 90 | numActions = length(dopts.actions);
 91 | results = zeros(300,6);
 92 | counter=1;
 93 | class_aps = cell(2,1);
 94 | % save file name to save result for eah option type
 95 | saveName = sprintf('%stubes-results.mat',dopts.tubeDir);
 96 | if ~exist(saveName,'file')
 97 |     
 98 |     annot = load(dopts.annotFile);
 99 |     annot = annot.annot;
100 |     testvideos = getVideoNames(dopts.vidList);
101 |     actionpaths = readALLactionPaths(dopts.vidList,dopts.actPathDir,1);
102 |     for  alpha = [3, 5]
103 |         fprintf('alpha %03d ',alpha);
104 |         tubesSaveName = sprintf('%stubes-alpha%04d.mat',dopts.tubeDir,uint16(alpha*100));
105 |         if ~exist(tubesSaveName,'file')
106 |             % read action paths
107 |             %% perform temporal trimming
108 |             smoothedtubes = PARactionPathSmoother(actionpaths,alpha*ones(numActions,1),numActions);
109 |             save(tubesSaveName,'smoothedtubes','-v7.3');
110 |         else
111 |             load(tubesSaveName)
112 |         end
113 |         
114 |         min_num_frames = 8;    kthresh = 0.0;     topk = 40;
115 |         xmldata = convert2eval(smoothedtubes, min_num_frames, kthresh*ones(numActions,1), topk,testvideos);
116 |         
117 |         %% Do the evaluation
118 |         for iou_th =[0.2,[0.5:0.05:0.95]]
119 |             [tmAP,tmIoU,tacc,AP] = get_PR_curve(annot, xmldata, testvideos, dopts.actions, iou_th);
120 |             % pritn outs iou_threshold, meanAp, sm, classifcation accuracy
121 |             fprintf('%.2f %0.3f %0.3f N ',iou_th,tmAP, tacc);
122 |             results(counter,:) = [iou_th,alpha,alpha,tmIoU,tmAP,tacc];
123 |             class_aps{counter} = AP;
124 |             counter = counter+1;
125 |         end
126 |         fprintf('\n');
127 |     end
128 | 
129 | 
130 | 
131 |     results(counter:end,:) = [];
132 |     result = cell(2,1);
133 |     result{2} = class_aps;
134 |     result{1} = results;
135 |     results = result;
136 |     fprintf('results saved in %s\n',saveName);
137 |     save(saveName,'results');
138 | else
139 |     load(saveName)
140 | end
141 | 
142 | function videos = getVideoNames(split_file)
143 | % -------------------------------------------------------------------------
144 | fid = fopen(split_file,'r');
145 | data = textscan(fid, '%s');
146 | videos  = cell(1);
147 | count = 0;
148 | for i=1:length(data{1})
149 |     filename = cell2mat(data{1}(i,1));
150 |     count = count +1;
151 |     videos{count} = filename;
152 | end
153 | 


--------------------------------------------------------------------------------
/online-tubes/I02genFusedTubes.m:
--------------------------------------------------------------------------------
  1 | 
  2 | function I02genFusedTubes()
  3 | 
  4 | data_root = '/mnt/mars-fast/datasets';
  5 | save_root = '/mnt/mars-gamma/datasets';
  6 | 
  7 | addpath(genpath('actionpath/'));
  8 | addpath(genpath('gentube/'));
  9 | addpath(genpath('eval/'));
 10 | addpath(genpath('utils/'));
 11 | 
 12 | completeList = {...
 13 |     {'ucf24','01',{'rgb','brox'},[120000,120000],{'nwsum-plus','cat','mean'}, 0.25},...
 14 |     {'ucf24','01',{'rgb','fastOF'},[120000,120000],{'nwsum-plus','cat','mean'}, 0.25},...
 15 |     };
 16 | 
 17 | model_type = 'CONV';
 18 | costtype = 'score';
 19 | iouthresh = 0.1;
 20 | gap = 3;
 21 | alldopts = cell(2,1);
 22 | count = 0;
 23 | for setind = 1:length(completeList)
 24 |     [dataset,listid,imtypes,iteration_nums,fusiontypes,fuseiouths] = enumurateList(completeList{setind});
 25 |     for ff =1:length(fusiontypes)
 26 |         fusiontype = fusiontypes{ff};
 27 |         if strcmp(fusiontype,'cat') || strcmp(fusiontype,'mean')
 28 |             tempfuseiouths = 0;
 29 |         else
 30 |             tempfuseiouths = fuseiouths;
 31 |         end
 32 |         for fuseiouth = tempfuseiouths
 33 |             for iouWeight = 1
 34 |                 dopts = initDatasetOptsFused(data_root,save_root,dataset,imtypes,model_type, ...
 35 |                             listid,iteration_nums,iouthresh,costtype,gap,fusiontype,fuseiouth);
 36 |                 if exist(dopts.basedetDir,'dir') && exist(dopts.topdetDir,'dir')
 37 |                     count = count+1;
 38 |                     alldopts{count} = dopts;
 39 |                 end
 40 |             end
 41 |         end
 42 |     end
 43 | end
 44 | 
 45 | fprintf('\n\n\n\n Count is %d \n\n\n\n',count)
 46 | 
 47 | results = cell(2,1);
 48 | 
 49 | for index = 1:count
 50 |     opts = alldopts{index};
 51 |     if exist(opts.basedetDir,'dir') && exist(opts.topdetDir,'dir')
 52 |         fprintf('Video List :: %s\n \nDetection basedetDir:: %s\nActionpath Dir:: %s\nTube Dir:: %s\n',...
 53 |             opts.vidList,opts.basedetDir,opts.actPathDir,opts.tubeDir);
 54 | 
 55 |         %% Build action paths given frame level detections
 56 |         fusedActionPaths(opts);
 57 |         %% Perform temproal labelling and evaluate; results saved in results cell
 58 |         result_cell = gettubes(opts);
 59 |         results{index,1} = result_cell;
 60 |         results{index,2} = opts;
 61 |         rm = result_cell{1};
 62 |         rm = rm(rm(:,2) == 5,:);
 63 |         fprintf('\nmAP@0.2:%0.4f mAP@0.5:%0.4f mAP@0.75:%0.4f AVGmAP:%0.4f clsAcc:%0.4f\n\n',...
 64 |                     rm(1,5),rm(2,5),rm(7,5),mean(rm(2:end,5)),rm(1,6));
 65 |     end
 66 | end
 67 | 
 68 | 
 69 | %% save results
 70 | save_dir = [save_root,'/results/'];
 71 | if ~isdir(save_dir)
 72 |     mkdir(save_dir)
 73 | end
 74 | 
 75 | save([save_dir,'online_fused_tubes_results.mat'],'results')
 76 | 
 77 | 
 78 | function [dataset,listnum,imtypes,weights,fusiontypes,fuseiouths] = enumurateList(sublist)
 79 | 
 80 | dataset = sublist{1}; listnum = sublist{2}; imtypes = sublist{3};
 81 | weights = sublist{4};
 82 | fusiontypes = sublist{5};
 83 | fuseiouths =  sublist{6};
 84 | 
 85 | %% Facade function for smoothing tubes and evaluating them
 86 | function results = gettubes(dopts)
 87 | 
 88 | numActions = length(dopts.actions);
 89 | results = zeros(400,6);
 90 | counter=1;
 91 | class_aps = cell(2,1);
 92 | % save file name to save result for eah option type
 93 | saveName = sprintf('%stubes-results.mat',dopts.tubeDir);
 94 | if ~exist(saveName,'file')
 95 |     
 96 |     annot = load(dopts.annotFile);
 97 |     annot = annot.annot;
 98 |     testvideos = getVideoNames(dopts.vidList);
 99 |     for  alpha = [3,5]
100 |         fprintf('alpha %03d ',alpha);
101 |         tubesSaveName = sprintf('%stubes-alpha%04d.mat',dopts.tubeDir,uint16(alpha*100));
102 |         if ~exist(tubesSaveName,'file')
103 |             % read action paths
104 |             actionpaths = readALLactionPaths(dopts.vidList,dopts.actPathDir,1);
105 |             %% perform temporal trimming
106 |             smoothedtubes = PARactionPathSmoother(actionpaths,alpha*ones(numActions,1),numActions);
107 |             save(tubesSaveName,'smoothedtubes','-v7.3');
108 |         else
109 |             load(tubesSaveName)
110 |         end
111 |         
112 |         min_num_frames = 8;    kthresh = 0.0;     topk = 40;
113 |         % strip off uncessary parts and remove very small actions less than
114 |         % 8 frames; not really necessary but for speed at eval time
115 |         xmldata = convert2eval(smoothedtubes, min_num_frames, kthresh*ones(numActions,1), topk,testvideos);
116 |         
117 |         %% Do the evaluation
118 |         for iou_th =[0.2,[0.5:0.05:0.95]]
119 |             [tmAP,tmIoU,tacc,AP] = get_PR_curve(annot, xmldata, testvideos, dopts.actions, iou_th);
120 |             % pritn outs iou_threshold, meanAp, sm, classifcation accuracy
121 |             fprintf('%.2f %0.3f %0.3f N ',iou_th,tmAP, tacc);
122 |             results(counter,:) = [iou_th,alpha,alpha,tmIoU,tmAP,tacc];
123 |             class_aps{counter} = AP;
124 |             counter = counter+1;
125 |         end
126 |         fprintf('\n');
127 |     end
128 |     
129 |     results(counter:end,:) = [];
130 |     result = cell(2,1);
131 |     result{2} = class_aps;
132 |     result{1} = results;
133 |     results = result;
134 |     fprintf('results saved in %s\n',saveName);
135 |     save(saveName,'results');
136 | else
137 |     load(saveName)
138 | end
139 | 
140 | function videos = getVideoNames(split_file)
141 | % -------------------------------------------------------------------------
142 | fid = fopen(split_file,'r');
143 | data = textscan(fid, '%s');
144 | videos  = cell(1);
145 | count = 0;
146 | for i=1:length(data{1})
147 |     filename = cell2mat(data{1}(i,1));
148 |     count = count +1;
149 |     videos{count} = filename;
150 | end
151 | 


--------------------------------------------------------------------------------
/online-tubes/actionpath/actionPaths.m:
--------------------------------------------------------------------------------
  1 | % ---------------------------------------------------------
  2 | function actionPaths(dopts)
  3 | % ---------------------------------------------------------
  4 | % Copyright (c) 2017, Gurkirt Singh
  5 | % This code and is available
  6 | % under the terms of MID License provided in LICENSE.
  7 | % Please retain this notice and LICENSE if you use
  8 | % this file (or any portion of it) in your project.
  9 | % ---------------------------------------------------------
 10 | 
 11 | detresultpath = dopts.detDir;
 12 | costtype = dopts.costtype;
 13 | gap = dopts.gap;
 14 | videolist = dopts.vidList;
 15 | actions = dopts.actions;
 16 | saveName = dopts.actPathDir;
 17 | iouth = dopts.iouThresh;
 18 | numActions = length(actions);
 19 | nms_thresh = 0.45;
 20 | videos = getVideoNames(videolist);
 21 | NumVideos = length(videos);
 22 | 
 23 | for vid=1:NumVideos
 24 |     tic;
 25 |     videoID  = videos{vid};
 26 |     pathsSaveName = [saveName,videoID,'-actionpaths.mat'];
 27 |     
 28 |     videoDetDir = [detresultpath,videoID,'/'];
 29 | 
 30 |     if ~exist(pathsSaveName,'file')
 31 |         fprintf('computing tubes for vide [%d out of %d] video ID = %s\n',vid,NumVideos, videoID);
 32 |         
 33 |         %% loop over all the frames of the video
 34 |         fprintf('Reading detections ');
 35 |         
 36 |         frames = readDetections(videoDetDir);
 37 |         
 38 |         fprintf('\nDone reading detections\n');
 39 |         
 40 |         fprintf('Gernrating action paths ...........\n');
 41 |         
 42 |         %% parllel loop over all action class and genrate paths for each class
 43 |         allpaths = cell(1);
 44 |         parfor a=1:numActions
 45 |             allpaths{a} = genActionPaths(frames, a, nms_thresh, iouth, costtype,gap);
 46 |         end
 47 |         
 48 |         fprintf('results are being saved in::: %s for %d classes\n',pathsSaveName,length(allpaths));
 49 |         save(pathsSaveName,'allpaths');
 50 |         fprintf('All Done in %03d Seconds\n',round(toc));
 51 |     end
 52 | 
 53 | end
 54 | 
 55 | disp('done computing action paths');
 56 | 
 57 | end
 58 | 
 59 | function paths = genActionPaths(frames,a,nms_thresh,iouth,costtype,gap)
 60 | action_frames = struct();
 61 | 
 62 | for f=1:length(frames)
 63 |     [boxes,scores,allscores] = dofilter(frames,a,f,nms_thresh);
 64 |     action_frames(f).boxes = boxes;
 65 |     action_frames(f).scores = scores;
 66 |     action_frames(f).allScores = allscores;
 67 | end
 68 | 
 69 | paths = incremental_linking(action_frames,iouth,costtype, gap, gap);
 70 | 
 71 | end
 72 | 
 73 | %-- filter out least likkey detections for actions ---
 74 | function [boxes,scores,allscores] = dofilter(frames, a, f, nms_thresh)
 75 |     scores = frames(f).scores(:,a);
 76 |     pick = scores>0.001;
 77 |     scores = scores(pick);
 78 |     boxes = frames(f).boxes(pick,:);
 79 |     allscores = frames(f).scores(pick,:);
 80 |     [~,pick] = sort(scores,'descend');
 81 |     to_pick = min(50,size(pick,1));
 82 |     pick = pick(1:to_pick);
 83 |     scores = scores(pick);
 84 |     boxes = boxes(pick,:);
 85 |     allscores = allscores(pick,:);
 86 |     pick = nms([boxes scores], nms_thresh);
 87 |     pick = pick(1:min(10,length(pick)));
 88 |     boxes = boxes(pick,:);
 89 |     scores = scores(pick);
 90 |     allscores = allscores(pick,:);
 91 | end
 92 | 
 93 | %-- list the files in directory and sort them ----------
 94 | function list = sortdirlist(dirname)
 95 | list = dir(dirname);
 96 | list = sort({list.name});
 97 | end
 98 | 
 99 | % -------------------------------------------------------------------------
100 | function [videos] = getVideoNames(split_file)
101 | % -------------------------------------------------------------------------
102 | fprintf('Get both lis is %s\n',split_file);
103 | fid = fopen(split_file,'r');
104 | data = textscan(fid, '%s');
105 | videos  = cell(1);
106 | count = 0;
107 | 
108 | for i=1:length(data{1})
109 |     filename = cell2mat(data{1}(i,1));
110 |     count = count +1;
111 |     videos{count} = filename;
112 |     %     videos(i).vid = str2num(cell2mat(data{1}(i,1)));
113 | end
114 | end
115 | 
116 | 
117 | function frames = readDetections(detectionDir)
118 | 
119 | detectionList = sortdirlist([detectionDir,'*.mat']);
120 | frames = struct([]);
121 | numframes = length(detectionList);
122 | scores = 0;
123 | loc = 0;
124 | for f = 1 : numframes
125 |   filename = [detectionDir,detectionList{f}];
126 |   load(filename); % loads loc and scores variable
127 |   loc = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240];
128 |   loc(loc(:,1)<0,1) = 0;
129 |   loc(loc(:,2)<0,2) = 0;
130 |   loc(loc(:,3)>319,3) = 319;
131 |   loc(loc(:,4)>239,4) = 239;
132 |   loc = loc + 1;
133 |   frames(f).boxes = loc;
134 |   frames(f).scores = [scores(:,2:end),scores(:,1)];
135 | end
136 | 
137 | end
138 | 


--------------------------------------------------------------------------------
/online-tubes/actionpath/fusedActionPaths.m:
--------------------------------------------------------------------------------
  1 | function fusedActionPaths(dopts)
  2 | % AUTORIGHTS
  3 | % ---------------------------------------------------------
  4 | % Copyright (c) 2016, Gurkirt Singh
  5 | %
  6 | % This code and is available
  7 | % under the terms of the Simplified BSD License provided in
  8 | % LICENSE. Please retain this notice and LICENSE if you use
  9 | % this file (or any portion of it) in your project.
 10 | % ---------------------------------------------------------
 11 | 
 12 | detresultpathBase = dopts.basedetDir;
 13 | detresultpathTop = dopts.topdetDir;
 14 | videolist = dopts.vidList;
 15 | actions = dopts.actions;
 16 | saveName = dopts.actPathDir;
 17 | iouth = dopts.iouThresh;
 18 | numActions = length(actions);
 19 | costtype = dopts.costtype;
 20 | gap = dopts.gap;
 21 | nms_thresh = 0.45;
 22 | videos = getVideoNames(videolist);
 23 | 
 24 | NumVideos = length(videos);
 25 | timimngs = zeros(NumVideos,1);
 26 | 
 27 | for vid=1:NumVideos
 28 |     tt = tic;
 29 |     videoID  = videos{vid};
 30 |     pathsSaveName = [saveName,videoID,'-actionpaths.mat'];   
 31 |     videoDetDirBase = [detresultpathBase,videoID,'/'];
 32 |     videoTopDirBase = [detresultpathTop,videoID,'/'];
 33 |     if ~exist(pathsSaveName,'file')
 34 |         fprintf('computing tubes for vide [%d out of %d] video ID = %s\n',vid,NumVideos, videoID);
 35 | 
 36 |         fprintf('Reading detection files searlially ');
 37 |         frames = readDetections(videoDetDirBase,videoTopDirBase);
 38 |         fprintf('\nDone reading detection files \n');
 39 |         fprintf('Gernrating action paths ...........\n');
 40 |         
 41 |         %% parllel loop over all action class and genrate paths for each class
 42 |         thpath = tic;
 43 |         allpaths = cell(1);
 44 |         for a=1:numActions
 45 |             allpaths{a} = genActionPaths(frames,a,nms_thresh,dopts.fuseiouth,dopts.fusiontype,iouth,costtype,gap);
 46 |         end
 47 |         timimngs(vid) = toc(thpath);
 48 |         %%
 49 |         fprintf('Completed linking \n');
 50 |         fprintf('results are being saved in::: %s\n',pathsSaveName);
 51 |         save(pathsSaveName,'allpaths');
 52 |         fprintf('All Done in %03d Seconds\n',round(toc(tt)));
 53 |     end
 54 | end
 55 | 
 56 | % save('ucf101timing.mat','numfs','timimngs')
 57 | disp('done computing action paths');
 58 | end
 59 | 
 60 | % ---------------------------------------------------------
 61 | % function to gather the detection box and nms them and pass it to linking script
 62 | function paths = genActionPaths(frames,a,nms_thresh,fuseiouth,fusiontype,iouth,costtype,gap)
 63 | % ---------------------------------------------------------
 64 | action_frames = struct();
 65 | for f=1:length(frames)
 66 | 
 67 |     baseBoxes = frames(f).baseBoxes;
 68 |     baseAllScores = frames(f).baseScores;
 69 |     topBoxes = frames(f).topBoxes;
 70 |     topAllScores = frames(f).topScores;
 71 |     meanScores = frames(f).meanScores;
 72 |     [boxes, allscores] = fuseboxes(baseBoxes,topBoxes,baseAllScores,topAllScores,meanScores,fuseiouth,fusiontype,a,nms_thresh);
 73 |     
 74 |     action_frames(f).allScores = allscores;
 75 |     action_frames(f).boxes = boxes(:,1:4);
 76 |     action_frames(f).scores = boxes(:,5);
 77 | end
 78 | 
 79 | paths = incremental_linking(action_frames,iouth,costtype,gap, gap);
 80 | end
 81 | 
 82 | % ---------------------------------------------------------
 83 | function [boxes,allscores] = fuseboxes(baseBoxes,topBoxes,baseAllScores,topAllScores,meanScores,fuseiouth,fusiontype,a,nms_thresh)
 84 | % ---------------------------------------------------------
 85 | 
 86 | if strcmp(fusiontype,'mean')
 87 |     [boxes,allscores] = dofilter(baseBoxes,meanScores,a,nms_thresh);
 88 | elseif strcmp(fusiontype,'nwsum-plus')
 89 |     [baseBoxes,baseAllScores] = dofilter(baseBoxes,baseAllScores,a,nms_thresh);
 90 |     [topBoxes,topAllScores] = dofilter(topBoxes,topAllScores,a,nms_thresh);
 91 |     [boxes,allscores] = boost_fusion(baseBoxes,topBoxes,baseAllScores,topAllScores,fuseiouth,a);
 92 |     pick = nms(boxes,nms_thresh);
 93 |     boxes = boxes(pick(1:min(10,length(pick))),:);
 94 |     allscores = allscores(pick(1:min(10,length(pick))),:);
 95 | 
 96 | else %% fusion type is cat // union-set fusion
 97 |     [baseBoxes,baseAllScores] = dofilter(baseBoxes,baseAllScores,a,nms_thresh);
 98 |     [topBoxes,topAllScores] = dofilter(topBoxes,topAllScores,a,nms_thresh);
 99 |     boxes = [baseBoxes;topBoxes];
100 |     allscores = [baseAllScores;topAllScores];
101 |     pick = nms(boxes,nms_thresh);
102 |     boxes = boxes(pick(1:min(10,length(pick))),:);
103 |     allscores = allscores(pick(1:min(10,length(pick))),:);
104 | end
105 | 
106 | end
107 | 
108 | 
109 | function [boxes,allscores] = dofilter(boxes, allscores,a,nms_thresh)
110 |  scores = allscores(:,a);
111 |  pick = scores>0.001;
112 |  scores = scores(pick);
113 |  boxes = boxes(pick,:);
114 |  allscores = allscores(pick,:);
115 |  [~,pick] = sort(scores,'descend');
116 |  to_pick = min(50,size(pick,1));
117 |  pick = pick(1:to_pick);
118 |  scores = scores(pick);
119 |  boxes = boxes(pick,:);
120 |  allscores = allscores(pick,:);
121 |  pick = nms([boxes scores], nms_thresh);
122 |  pick = pick(1:min(10,length(pick)));
123 |  boxes = [boxes(pick,:),scores(pick,:)];
124 |  allscores = allscores(pick,:);
125 | end
126 | 
127 | % ---------------------------------------------------------
128 | function [sb,ss] = boost_fusion(sb, fb,ss,fs,fuseiouth,a) % bs - boxes_spatial bf-boxes_flow
129 | % ---------------------------------------------------------
130 | 
131 | nb = size(sb,1); % num boxes
132 | box_spatial = [sb(:,1:2) sb(:,3:4)-sb(:,1:2)+1];
133 | box_flow =    [fb(:,1:2) fb(:,3:4)-fb(:,1:2)+1];
134 | coveredboxes = [];
135 | 
136 | for i=1:nb
137 |     ovlp = inters_union(box_spatial(i,:), box_flow); % ovlp has 1x5 or 5x1 dim
138 |     if ~isempty(ovlp)
139 |     [movlp, maxind] = max(ovlp);
140 | 
141 |     if movlp>=fuseiouth && isempty(ismember(coveredboxes,maxind))
142 |         ms = ss(i,:) + fs(maxind,:)*movlp;
143 |         ms = ms/sum(ms);
144 |         sb(i,5) = ms(a);
145 |         ss(i,:) = ms;
146 |         coveredboxes = [coveredboxes;maxind];
147 |     end
148 |     end
149 | end
150 | 
151 | nb = size(fb,1);
152 | 
153 | for i=1:nb
154 |     if ~ismember(coveredboxes,i)
155 |         sb = [sb;fb(i,:)];
156 |         ss = [ss;fs(i,:)];
157 |     end
158 | end
159 | end
160 | 
161 | 
162 | function iou = inters_union(bounds1,bounds2)
163 | % ------------------------------------------------------------------------
164 | inters = rectint(bounds1,bounds2);
165 | ar1 = bounds1(:,3).*bounds1(:,4);
166 | ar2 = bounds2(:,3).*bounds2(:,4);
167 | union = bsxfun(@plus,ar1,ar2')-inters;
168 | iou = inters./(union+0.001);
169 | end
170 | 
171 | % -------------------------------------------------------------------------
172 | function list = sortdirlist(dirname)
173 | list = dir(dirname);
174 | list = sort({list.name});
175 | end
176 | 
177 | % -------------------------------------------------------------------------
178 | function [videos] = getVideoNames(split_file)
179 | % -------------------------------------------------------------------------
180 | fprintf('Get both lis  %s\n',split_file);
181 | fid = fopen(split_file,'r');
182 | data = textscan(fid, '%s');
183 | videos  = cell(1);
184 | count = 0;
185 | 
186 | for i=1:length(data{1})
187 |     filename = cell2mat(data{1}(i,1));
188 |     count = count +1;
189 |     videos{count} = filename;
190 |     %     videos(i).vid = str2num(cell2mat(data{1}(i,1)));
191 | end
192 | 
193 | end
194 | 
195 | function frames = readDetections(detectionDir,top_detectionDir )
196 | 
197 | detectionList = sortdirlist([detectionDir,'*.mat']);
198 | frames = struct([]);
199 | numframes = length(detectionList);
200 | scores = 0;
201 | loc = 0;
202 | for f = 1 : numframes
203 |     filename = [detectionDir,detectionList{f}];
204 |     load(filename); % load loc and scores variable
205 |     loc = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240];
206 |     loc(loc(:,1)<0,1) = 0;
207 |     loc(loc(:,2)<0,2) = 0;
208 |     loc(loc(:,3)>319,3) = 319;
209 |     loc(loc(:,4)>239,4) = 239;
210 |     loc = loc + 1;
211 |     frames(f).baseBoxes = loc;
212 |     frames(f).baseScores = [scores(:,2:end),scores(:,1)];
213 |     
214 |     filename = [top_detectionDir,detectionList{f}];
215 |     load(filename); % load loc and scores variable
216 |     loc = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240];
217 |     loc(loc(:,1)<0,1) = 0;
218 |     loc(loc(:,2)<0,2) = 0;
219 |     loc(loc(:,3)>319,3) = 319;
220 |     loc(loc(:,4)>239,4) = 239;
221 |     loc = loc + 1;
222 |     frames(f).topBoxes = loc;
223 |     frames(f).topScores = [scores(:,2:end),scores(:,1)];
224 |     frames(f).meanScores = (frames(f).topScores + frames(f).baseScores)/2.0;
225 | end
226 | 
227 | end
228 | 
229 | 
230 | 


--------------------------------------------------------------------------------
/online-tubes/actionpath/incremental_linking.m:
--------------------------------------------------------------------------------
  1 | % -------------------------------------------------------------------------
  2 | function live_paths = incremental_linking(frames,iouth,costtype,jumpgap,threhgap)
  3 | % -------------------------------------------------------------------------
  4 | 
  5 | num_frames = length(frames);
  6 | 
  7 | %% online path building
  8 | 
  9 | live_paths = struct(); %% Stores live paths
 10 | dead_paths = struct(); %% Store the paths that has been terminated
 11 | dp_count = 0;
 12 | for  t = 1:num_frames
 13 |     num_box = size(frames(t).boxes,1);
 14 |     if t==1
 15 |         for b = 1 : num_box
 16 |             live_paths(b).boxes = frames(t).boxes(b,:);
 17 |             live_paths(b).scores = frames(t).scores(b);
 18 |             live_paths(b).allScores(t,:) = frames(t).allScores(b,:);
 19 |             live_paths(b).pathScore = frames(t).scores(b);
 20 |             live_paths(b).foundAT(t) = 1;
 21 |             live_paths(b).count = 1;
 22 |             live_paths(b).lastfound = 0; %less than 5 mean yes
 23 |         end
 24 |     else
 25 |         lp_count = getPathCount(live_paths);
 26 |         
 27 |         %         fprintf(' %d ', t);
 28 |         edge_scores = zeros(lp_count,num_box);
 29 |         
 30 |         for lp = 1 : lp_count
 31 |             edge_scores(lp,:) = score_of_edge(live_paths(lp),frames(t),iouth,costtype);
 32 |         end
 33 |         
 34 |         
 35 |         dead_count = 0 ;
 36 |         coverd_boxes = zeros(1,num_box);
 37 |         path_order_score = zeros(1,lp_count);
 38 |         for lp = 1 : lp_count
 39 |             if live_paths(lp).lastfound < jumpgap %less than 5 mean yes
 40 |                 box_to_lp_score = edge_scores(lp,:);
 41 |                 if sum(box_to_lp_score)>0 %%checking if atleast there is one match
 42 |                     [m_score,maxInd] = max(box_to_lp_score);
 43 |                     live_paths(lp).count = live_paths(lp).count + 1;
 44 |                     lpc = live_paths(lp).count;
 45 |                     live_paths(lp).boxes(lpc,:) = frames(t).boxes(maxInd,:);
 46 |                     live_paths(lp).scores(lpc) = frames(t).scores(maxInd);
 47 |                     live_paths(lp).allScores(lpc,:) = frames(t).allScores(maxInd,:);
 48 |                     live_paths(lp).pathScore = live_paths(lp).pathScore + m_score;
 49 |                     live_paths(lp).foundAT(lpc) = t;
 50 |                     live_paths(lp).lastfound = 0;
 51 |                     edge_scores(:,maxInd) = 0;
 52 |                     coverd_boxes(maxInd) = 1;
 53 |                 else
 54 |                     live_paths(lp).lastfound = live_paths(lp).lastfound +1;
 55 |                 end
 56 |                 
 57 |                 scores = sort(live_paths(lp).scores,'ascend');
 58 |                 num_sc = length(scores);
 59 |                 path_order_score(lp) = mean(scores(max(1,num_sc-jumpgap):num_sc));
 60 |                 
 61 |             else
 62 |                 dead_count = dead_count + 1;
 63 |             end
 64 |         end
 65 |         
 66 |         % Sort the path based on scoe of the boxes and terminate dead path
 67 |         
 68 |         [live_paths,dead_paths,dp_count] = sort_live_paths(live_paths,....
 69 |             path_order_score,dead_paths,dp_count,jumpgap);
 70 |         lp_count = getPathCount(live_paths);
 71 |         % start new paths using boxes that are not assigned
 72 |         if sum(coverd_boxes)<num_box
 73 |             for b = 1 : num_box
 74 |                 if ~coverd_boxes(b)
 75 |                     lp_count = lp_count + 1;
 76 |                     live_paths(lp_count).boxes = frames(t).boxes(b,:);
 77 |                     live_paths(lp_count).scores = frames(t).scores(b);
 78 |                     live_paths(lp_count).allScores = frames(t).allScores(b,:);
 79 |                     live_paths(lp_count).pathScore = frames(t).scores(b);
 80 |                     live_paths(lp_count).foundAT = t;
 81 |                     live_paths(lp_count).count = 1;
 82 |                     live_paths(lp_count).lastfound = 0;
 83 |                 end
 84 |             end
 85 |         end
 86 |     end
 87 | end
 88 | 
 89 | live_paths = fill_gaps(live_paths,threhgap);
 90 | dead_paths = fill_gaps(dead_paths,threhgap);
 91 | lp_count = getPathCount(live_paths);
 92 | lp = lp_count+1;
 93 | if isfield(dead_paths,'boxes')
 94 |     for dp = 1 : length(dead_paths)
 95 |         live_paths(lp).start = dead_paths(dp).start;
 96 |         live_paths(lp).end = dead_paths(dp).end;
 97 |         live_paths(lp).boxes = dead_paths(dp).boxes;
 98 |         live_paths(lp).scores = dead_paths(dp).scores;
 99 |         live_paths(lp).allScores = dead_paths(dp).allScores;
100 |         live_paths(lp).pathScore = dead_paths(dp).pathScore;
101 |         live_paths(lp).foundAT = dead_paths(dp).foundAT;
102 |         live_paths(lp).count = dead_paths(dp).count;
103 |         live_paths(lp).lastfound = dead_paths(dp).lastfound;
104 |         lp = lp + 1;
105 |     end
106 | end
107 | 
108 | live_paths = sort_paths(live_paths);
109 | 
110 | 
111 | % -------------------------------------------------------------------------
112 | function sorted_live_paths = sort_paths(live_paths)
113 | % -------------------------------------------------------------------------
114 | sorted_live_paths = struct();
115 | 
116 | lp_count = getPathCount(live_paths);
117 | if lp_count>0
118 |     path_order_score = zeros(1,lp_count);
119 |     
120 |     for lp = 1 : length(live_paths)
121 |         scores = sort(live_paths(lp).scores,'descend');
122 |         num_sc = length(scores);
123 |         path_order_score(lp) = mean(scores(1:min(20,num_sc)));
124 |     end
125 |     
126 |     [~,ind] = sort(path_order_score,'descend');
127 |     for lpc = 1 : length(live_paths)
128 |         olp = ind(lpc);
129 |         sorted_live_paths(lpc).start = live_paths(olp).start;
130 |         sorted_live_paths(lpc).end = live_paths(olp).end;
131 |         sorted_live_paths(lpc).boxes = live_paths(olp).boxes;
132 |         sorted_live_paths(lpc).scores = live_paths(olp).scores;
133 |         sorted_live_paths(lpc).allScores = live_paths(olp).allScores;
134 |         sorted_live_paths(lpc).pathScore = live_paths(olp).pathScore;
135 |         sorted_live_paths(lpc).foundAT = live_paths(olp).foundAT;
136 |         sorted_live_paths(lpc).count = live_paths(olp).count;
137 |         sorted_live_paths(lpc).lastfound = live_paths(olp).lastfound;
138 |     end
139 | end
140 | 
141 | % -------------------------------------------------------------------------
142 | function gap_filled_paths = fill_gaps(paths,gap)
143 | % -------------------------------------------------------------------------
144 | gap_filled_paths = struct();
145 | if isfield(paths,'boxes')
146 |     g_count = 1;
147 |     
148 |     for lp = 1 : getPathCount(paths)
149 |         if length(paths(lp).foundAT)>gap
150 |             gap_filled_paths(g_count).start = paths(lp).foundAT(1);
151 |             gap_filled_paths(g_count).end = paths(lp).foundAT(end);
152 |             gap_filled_paths(g_count).pathScore = paths(lp).pathScore;
153 |             gap_filled_paths(g_count).foundAT = paths(lp).foundAT;
154 |             gap_filled_paths(g_count).count = paths(lp).count;
155 |             gap_filled_paths(g_count).lastfound = paths(lp).lastfound;
156 |             count = 1;
157 |             i = 1;
158 |             while i <= length(paths(lp).scores)
159 |                 diff_found = paths(lp).foundAT(i)-paths(lp).foundAT(max(i-1,1));
160 |                 if count == 1 || diff_found == 1
161 |                     gap_filled_paths(g_count).boxes(count,:) = paths(lp).boxes(i,:);
162 |                     gap_filled_paths(g_count).scores(count) = paths(lp).scores(i);
163 |                     gap_filled_paths(g_count).allScores(count,:) = paths(lp).allScores(i,:);
164 |                     i = i + 1;
165 |                     count = count + 1;
166 |                 else
167 |                     for d = 1 : diff_found
168 |                         gap_filled_paths(g_count).boxes(count,:) = paths(lp).boxes(i,:);
169 |                         gap_filled_paths(g_count).scores(count) = paths(lp).scores(i);
170 |                         gap_filled_paths(g_count).allScores(count,:) = paths(lp).allScores(i,:);
171 |                         count = count + 1;
172 |                     end
173 |                     i = i + 1;
174 |                 end
175 |             end
176 |             g_count = g_count + 1;
177 |         end
178 |     end
179 | end
180 | 
181 | 
182 | % -------------------------------------------------------------------------
183 | function [sorted_live_paths,dead_paths,dp_count] = sort_live_paths(live_paths,...
184 |     path_order_score,dead_paths,dp_count,gap)
185 | % -------------------------------------------------------------------------
186 | 
187 | sorted_live_paths = struct();
188 | [~,ind] = sort(path_order_score,'descend');
189 | lpc = 0;
190 | for lp = 1 : getPathCount(live_paths)
191 |     olp = ind(lp);
192 |     if live_paths(ind(lp)).lastfound < gap
193 |         lpc = lpc + 1;
194 |         sorted_live_paths(lpc).boxes = live_paths(olp).boxes;
195 |         sorted_live_paths(lpc).scores = live_paths(olp).scores;
196 |         sorted_live_paths(lpc).allScores = live_paths(olp).allScores;
197 |         sorted_live_paths(lpc).pathScore = live_paths(olp).pathScore;
198 |         sorted_live_paths(lpc).foundAT = live_paths(olp).foundAT;
199 |         sorted_live_paths(lpc).count = live_paths(olp).count;
200 |         sorted_live_paths(lpc).lastfound = live_paths(olp).lastfound;
201 |     else
202 |         dp_count = dp_count + 1;
203 |         dead_paths(dp_count).boxes = live_paths(olp).boxes;
204 |         dead_paths(dp_count).scores = live_paths(olp).scores;
205 |         dead_paths(dp_count).allScores = live_paths(olp).allScores;
206 |         dead_paths(dp_count).pathScore = live_paths(olp).pathScore;
207 |         dead_paths(dp_count).foundAT = live_paths(olp).foundAT;
208 |         dead_paths(dp_count).count = live_paths(olp).count;
209 |         dead_paths(dp_count).lastfound = live_paths(olp).lastfound;
210 |         
211 |     end
212 | end
213 | 
214 | 
215 | 
216 | 
217 | % -------------------------------------------------------------------------
218 | function score = score_of_edge(v1,v2,iouth,costtype)
219 | % -------------------------------------------------------------------------
220 | 
221 | N2 = size(v2.boxes,1);
222 | score = zeros(1,N2);
223 | 
224 | % try
225 | bounds1 = [v1.boxes(end,1:2) v1.boxes(end,3:4)-v1.boxes(end,1:2)+1];
226 | % catch
227 | %     fprintf('catch here')
228 | % end
229 | bounds2 = [v2.boxes(:,1:2) v2.boxes(:,3:4)-v2.boxes(:,1:2)+1];
230 | iou = inters_union(bounds1,bounds2);
231 | 
232 | for i = 1 : N2
233 |     
234 |     if iou(i)>=iouth
235 |         
236 |         scores2 = v2.scores(i);
237 |         scores1 = v1.scores(end);
238 |         score_similarity = sqrt(sum((v1.allScores(end,:) - v2.allScores(i,:)).^2));
239 |         if strcmp(costtype, 'score')
240 |             score(i) =  scores2;
241 |         elseif strcmp(costtype, 'scrSim')
242 |             score(i) = 1-score_similarity;
243 |         elseif strcmp(costtype, 'scrMinusSim')
244 |             score(i) = scores2 + (1 - score_similarity);
245 |         end
246 |         
247 |     end
248 |     
249 | end
250 | 
251 | % -------------------------------------------------------------------------
252 | function lp_count = getPathCount(live_paths)
253 | % -------------------------------------------------------------------------
254 | 
255 | if isfield(live_paths,'boxes')
256 |     lp_count = length(live_paths);
257 | else
258 |     lp_count = 0;
259 | end
260 | 
261 | % -------------------------------------------------------------------------
262 | function iou = inters_union(bounds1,bounds2)
263 | % -------------------------------------------------------------------------
264 | 
265 | inters = rectint(bounds1,bounds2);
266 | ar1 = bounds1(:,3).*bounds1(:,4);
267 | ar2 = bounds2(:,3).*bounds2(:,4);
268 | union = bsxfun(@plus,ar1,ar2')-inters;
269 | 
270 | iou = inters./(union+eps);
271 | 


--------------------------------------------------------------------------------
/online-tubes/actionpath/nms.m:
--------------------------------------------------------------------------------
 1 | function pick = nms(boxes, overlap)
 2 | % Non-maximum suppression.
 3 | %   pick = nms(boxes, overlap) 
 4 | % 
 5 | %   Greedily select high-scoring detections and skip detections that are 
 6 | %   significantly covered by a previously selected detection.
 7 | %
 8 | % Return value
 9 | %   pick      Indices of locally maximal detections
10 | %
11 | % Arguments
12 | %   boxes     Detection bounding boxes (see pascal_test.m)
13 | %   overlap   Overlap threshold for suppression
14 | %             For a selected box Bi, all boxes Bj that are covered by 
15 | %             more than overlap are suppressed. Note that 'covered' is
16 | %             is |Bi \cap Bj| / |Bj|, not the PASCAL intersection over 
17 | %             union measure.
18 | 
19 | % AUTORIGHTS
20 | % -------------------------------------------------------
21 | % Copyright (C) 2011-2012 Ross Girshick
22 | % Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick
23 | % Copyright (C) 2007 Pedro Felzenszwalb, Deva Ramanan
24 | % 
25 | % This file is part of the voc-releaseX code
26 | % (http://people.cs.uchicago.edu/~rbg/latent/)
27 | % and is available under the terms of an MIT-like license
28 | % provided in COPYING. Please retain this notice and
29 | % COPYING if you use this file (or a portion of it) in
30 | % your project.
31 | % -------------------------------------------------------
32 | 
33 | if isempty(boxes)
34 |   pick = [];
35 | else
36 |   x1 = boxes(:,1);
37 |   y1 = boxes(:,2);
38 |   x2 = boxes(:,3);
39 |   y2 = boxes(:,4);
40 |   s = boxes(:,end);
41 |   area = (x2-x1) .* (y2-y1);
42 |   %area = (x2-x1+1) .* (y2-y1+1);
43 | 
44 |   [vals, I] = sort(s);
45 |   pick = [];
46 |   while ~isempty(I)
47 |     last = length(I);
48 |     i = I(last);
49 |     pick = [pick; i];
50 |     suppress = [last];
51 |     for pos = 1:last-1
52 |       j = I(pos);
53 |       xx1 = max(x1(i), x1(j));
54 |       yy1 = max(y1(i), y1(j));
55 |       xx2 = min(x2(i), x2(j));
56 |       yy2 = min(y2(i), y2(j));
57 |       w = xx2-xx1;
58 |       h = yy2-yy1;
59 |       
60 | %       w = xx2-xx1+1;
61 | %       h = yy2-yy1+1;
62 |       
63 |       if w > 0 && h > 0
64 |         % compute overlap
65 |         inter = w*h;
66 |         o = inter / (area(j) + area(i) - inter);
67 |         if o > overlap
68 |           suppress = [suppress; pos];
69 |         end
70 |       end
71 |     end
72 |     I(suppress) = [];
73 |   end  
74 | end
75 | 


--------------------------------------------------------------------------------
/online-tubes/eval/compute_spatio_temporal_iou.m:
--------------------------------------------------------------------------------
 1 | 
 2 | % ######################################################################################################################################################################################
 3 | % We are here talking about spatio-temporal detections, i.e. a set of ground-truth bounding boxes that
 4 | %  I will denote by g_t, with t between t_g^b and t_g^e (beginning and end time of the ground-truth)
 5 | % versus a detection which is also a set of bounding boxes, denoted by d_t, with t between t_d^e et t_d^e.
 6 | %
 7 | % a) temporal iou =  T_i / T_u
 8 | %  this is the intersection over union between the timing of the the tubes,
 9 | % ie mathematically T_i / T_u with
10 | % the intersection T_i = max(0,   max(t_g^b,t_d^b)-min(t_d^e,t_g^e) )
11 | % and the union T_u = min(t_g^b,t_d^b)-max(t_d^e,t_g^e)
12 | %
13 | % b) for each t between max(tgb,tdb)-min(tde,tge), we compute the IoU between g_t and d_t, and average them
14 | %
15 | % Multiplying (a) and (b) is the same as computed the average of the spatial iou over all frames in T_u of the two tubes, with a spatial iou of 0 for frames where only one box exists.
16 | % c) as this is standard in detection problem, if there are multiple detections for the same groundtruth detection, the first one is counted as positive and the other ones as negatives
17 | % ######################################################################################################################################################################################
18 | %{
19 | gt_fnr = 1xn doube
20 | gt_bb = nx4 doubld - [x y w h]
21 | dt_fnr = 1xm double
22 | dt_bb = mx4 double - [x y w h]
23 | %}
24 | % -------------------------------------------------------------------------
25 | function st_iou = compute_spatio_temporal_iou(gt_fnr, gt_bb, dt_fnr, dt_bb)
26 | % -------------------------------------------------------------------------
27 | 
28 | % time gt begin
29 | tgb = gt_fnr(1);
30 | % time gt end
31 | tge = gt_fnr(end);
32 | %time dt begin
33 | tdb = dt_fnr(1);
34 | tde = dt_fnr(end);
35 | % temporal intersection
36 | T_i = double(max(0, min(tge,tde)-max(tgb,tdb)));
37 | 
38 | if T_i>0
39 |     T_i = T_i +1;
40 |     % temporal union
41 |     T_u = double(max(tge,tde) - min(tgb,tdb)+1);
42 |     %temporal IoU
43 |     T_iou = T_i/T_u;
44 |     % intersect frame numbers
45 |     int_fnr = max(tgb,tdb):min(tge,tde);
46 |     
47 |     % find the ind of the intersected frames in the detected frames
48 |     [~,int_find_dt] = ismember(int_fnr, dt_fnr);
49 |     [~,int_find_gt] = ismember(int_fnr, gt_fnr);
50 |     
51 |     assert(length(int_find_dt)==length(int_find_gt));
52 |     
53 |     iou = zeros(length(int_find_dt),1);
54 |     for i=1:length(int_find_dt)
55 |         if int_find_gt(i)<1
56 | %             fprintf('error ')
57 |             pf = pf;
58 |         else
59 |             pf = i;
60 |         end
61 |         
62 |         gt_bound = gt_bb(int_find_gt(pf),:);
63 |         dt_bound = dt_bb(int_find_dt(pf),:)+1;
64 |         
65 |         % gt_bound = [gt_bound(:,1:2) gt_bound(:,3:4)-gt_bound(:,1:2)];
66 |         % dt_bound = [dt_bound(:,1:2) dt_bound(:,3:4)-dt_bound(:,1:2)];
67 |         iou(i) = inters_union(double(gt_bound),double(dt_bound));
68 |     end
69 |     % finalspatio-temporal IoU threshold
70 |     st_iou = T_iou*mean(iou);
71 | else
72 |     st_iou =0;
73 | end
74 | % % iou_thresh = 0.2,...,0.6 % 'Learing to track paper' takes 0.2 for UCF101 and 0.5 for JHMDB
75 | % if delta >= iou_thresh
76 | %     % consider this tube as valid detection
77 | % end
78 | 
79 | end
80 | 
81 | % -------------------------------------------------------------------------
82 | function iou = inters_union(bounds1,bounds2)
83 | % -------------------------------------------------------------------------
84 | 
85 | inters = rectint(bounds1,bounds2);
86 | ar1 = bounds1(:,3).*bounds1(:,4);
87 | ar2 = bounds2(:,3).*bounds2(:,4);
88 | union = bsxfun(@plus,ar1,ar2')-inters;
89 | 
90 | iou = inters./(union+eps);
91 | 
92 | end
93 | 


--------------------------------------------------------------------------------
/online-tubes/eval/get_PR_curve.m:
--------------------------------------------------------------------------------
  1 | %%##################################################################################################################################################
  2 | 
  3 | %% Author: Gurkirt Singh 
  4 | %% Release date: 26th January 2017
  5 | % STEP-1: loop over the videos present in the predicited Tubes
  6 | % STEP-2: for each video get the GT Tubes
  7 | % STEP-3: Compute the spatio-temporal overlap bwtween GT tube and predicited
  8 | % tubes
  9 | % STEP-4: then label tp 1 or fp 0 to each predicted tube
 10 | % STEP-5: Compute PR and AP for each class using scores, tp and fp in allscore
 11 | %##################################################################################################################################################
 12 | 
 13 | function [mAP,mAIoU,acc,AP] = get_PR_curve(annot, xmldata, testlist, actions, iou_th)
 14 | % load(xmlfile)
 15 | num_vid = length(testlist);
 16 | num_actions = length(actions);
 17 | AP = zeros(num_actions,1);
 18 | averageIoU = zeros(num_actions,1);
 19 | 
 20 | cc = zeros(num_actions,1);
 21 | for a=1:num_actions
 22 |     allscore{a} = zeros(10000,2,'single');
 23 | end
 24 | 
 25 | total_num_gt_tubes = zeros(num_actions,1); 
 26 | % count all the gt tubes from all the vidoes for label a
 27 | % total_num_detection = zeros(num_actions,1);
 28 | 
 29 | preds = zeros(num_vid,1) - 1;
 30 | gts = zeros(num_vid,1);
 31 | annotNames = {annot.name};
 32 | dtNames = {xmldata.videoName};
 33 | for vid=1:num_vid
 34 |     maxscore = -10000;
 35 |     [action,~] = getActionName(testlist{vid}); %%get action name to which this video belongs to
 36 |     [~,action_id] =  find(strcmp(action, actions)); %% process only the videos from current  action a
 37 |     [~,gtVidInd] = find(strcmp(annotNames,testlist{vid}));
 38 |     [~,dtVidInd] = find(strcmp(dtNames,testlist{vid}));
 39 |     
 40 |     dt_tubes = sort_detection(xmldata(dtVidInd));
 41 |     gt_tubes = annot(gtVidInd).tubes;
 42 |         
 43 |     num_detection = length(dt_tubes.class);
 44 |     num_gt_tubes = length(gt_tubes);
 45 |     
 46 |     %     total_num_detection = total_num_detection + num_detection;
 47 |     for gtind = 1:num_gt_tubes
 48 |         action_id = gt_tubes(gtind).class;
 49 |         total_num_gt_tubes(action_id) = total_num_gt_tubes(action_id) + 1;
 50 |     end
 51 |     gts(vid) = action_id;
 52 |     dt_labels = dt_tubes.class;
 53 |     covered_gt_tubes = zeros(num_gt_tubes,1);
 54 |     for dtind = 1:num_detection
 55 |         dt_fnr = dt_tubes.framenr(dtind).fnr;
 56 |         dt_bb = dt_tubes.boxes(dtind).bxs;
 57 |         dt_label = dt_labels(dtind);
 58 |         if dt_tubes.score(dtind)>maxscore
 59 |             preds(vid) = dt_label;
 60 |             maxscore = dt_tubes.score(dtind);
 61 |         end
 62 |         cc(dt_label) = cc(dt_label) + 1;
 63 |         
 64 |         ioumax=-inf;maxgtind=0;
 65 |         for gtind = 1:num_gt_tubes
 66 |             action_id = gt_tubes(gtind).class;
 67 |             if ~covered_gt_tubes(gtind) && dt_label == action_id
 68 |                 gt_fnr = gt_tubes(gtind).sf:gt_tubes(gtind).ef;
 69 | %                 if isempty(gt_fnr)
 70 | %                     continue
 71 | %                 end
 72 |                 gt_bb = gt_tubes(gtind).boxes;
 73 |                 iou = compute_spatio_temporal_iou(gt_fnr, gt_bb, dt_fnr, dt_bb);
 74 |                 if iou>ioumax
 75 |                     ioumax=iou;
 76 |                     maxgtind=gtind;
 77 |                 end
 78 |             end
 79 |         end
 80 |         
 81 |         if ioumax>iou_th
 82 |             covered_gt_tubes(maxgtind) = 1;
 83 |             allscore{dt_label}(cc(dt_label),:) = [dt_tubes.score(dtind),1];
 84 |             averageIoU(dt_label) = averageIoU(dt_label) + ioumax;
 85 |         else
 86 |             allscore{dt_label}(cc(dt_label),:) = [dt_tubes.score(dtind),0];
 87 |         end
 88 |         
 89 |     end
 90 | end
 91 | 
 92 | for a=1:num_actions
 93 |     allscore{a} = allscore{a}(1:cc(a),:);
 94 |     scores = allscore{a}(:,1);
 95 |     labels = allscore{a}(:,2);
 96 |     [~, si] = sort(scores,'descend');
 97 |     %     scores = scores(si);
 98 |     labels = labels(si);
 99 |     fp=cumsum(labels==0);
100 |     tp=cumsum(labels==1);
101 |     cdet =0;
102 |     if ~isempty(tp)>0
103 |         cdet = tp(end);
104 |         averageIoU(a) = (averageIoU(a)+0.000001)/(tp(end)+0.00001);
105 |     end
106 |     
107 |     recall=tp/total_num_gt_tubes(a);
108 |     precision=tp./(fp+tp);
109 |     AP(a) = xVOCap(recall,precision);
110 |     draw = 0;
111 |     if draw
112 |         % plot precision/recall
113 |         plot(recall,precision,'-');
114 |         grid;
115 |         xlabel 'recall'
116 |         ylabel 'precision'
117 |         title(sprintf('class: %s, AP = %.3f',actions{a},AP(a)));
118 |     end
119 |     %     fprintf('Action %02d AP = %0.5f and AIOU %0.5f GT %03d total det %02d correct det %02d %s\n', a, AP(a),averageIoU(a),total_num_gt_tubes(a),length(tp),cdet,actions{a});
120 |     
121 | end
122 | acc = mean(preds==gts);
123 | AP(isnan(AP)) = 0;
124 | mAP  = mean(AP);
125 | averageIoU(isnan(averageIoU)) = 0;
126 | mAIoU = mean(averageIoU);
127 | 
128 | 
129 | %% ------------------------------------------------------------------------------------------------------------------------------------------------
130 | function [action,vidID] = getActionName(str)
131 | %------------------------------------------------------------------------------------------------------------------------------------------------
132 | indx = strsplit(str, '/');
133 | action = indx{1};
134 | vidID = indx{2};
135 | %%
136 | function sorted_tubes = sort_detection(dt_tubes)
137 | 
138 | sorted_tubes = dt_tubes;
139 | 
140 | if ~isempty(dt_tubes.class)
141 |     
142 |     num_detection = length(dt_tubes.class);
143 |     scores = dt_tubes.score;
144 |     [~,indexs] = sort(scores,'descend');
145 |     for dt = 1 : num_detection
146 |         dtind = indexs(dt);
147 |         sorted_tubes.framenr(dt).fnr = dt_tubes.framenr(dtind).fnr;
148 |         sorted_tubes.boxes(dt).bxs = dt_tubes.boxes(dtind).bxs;
149 |         sorted_tubes.class(dt) = dt_tubes.class(dtind);
150 |         sorted_tubes.score(dt) = dt_tubes.score(dtind);
151 |         sorted_tubes.nr(dt) = dt;
152 |     end
153 | end
154 | %% 
155 | 


--------------------------------------------------------------------------------
/online-tubes/eval/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));


--------------------------------------------------------------------------------
/online-tubes/frameAp.m:
--------------------------------------------------------------------------------
  1 | % ---------------------------------------------------------
  2 | % Copyright (c) 2017, Gurkirt Singh
  3 | % This code and is available
  4 | % under the terms of MIT License provided in LICENSE.
  5 | % Please retain this notice and LICENSE if you use
  6 | % this file (or any portion of it) in your project.
  7 | % ---------------------------------------------------------
  8 | 
  9 | %% This is main script to compute frame mean AP %%
 10 | %% this code is very new so hasn't been tested a lot
 11 | % Input: Detection directory; annotation file path; split file path
 12 | % Output: computes frame AP for all the detection directories
 13 | % It should produce results almost identical to test_ucf24.py
 14 | 
 15 | function frameAP()
 16 | 
 17 | addpath(genpath('eval/'));
 18 | addpath(genpath('utils/'));
 19 | addpath(genpath('actionpath/'));
 20 | data_root = '/mnt/mars-fast/datasets';
 21 | save_root = '/mnt/mars-gamma/datasets';
 22 | iou_th = 0.5;
 23 | model_type = 'CONV';
 24 | dataset = 'ucf24';
 25 | list_id = '01';
 26 | split_file = sprintf('%s/%s/splitfiles/testlist%s.txt',data_root,dataset,list_id);
 27 | annotfile = sprintf('%s/%s/splitfiles/annots.mat',data_root,dataset);
 28 | annot = load(annotfile);
 29 | annot = annot.annot;
 30 | testlist = getVideoNames(split_file);
 31 | num_vid = length(testlist);
 32 | num_actions = 24;
 33 | 
 34 | logfile = fopen('frameAP.log','w'); % open log file
 35 | 
 36 | imgType = 'rgb'; iteration_num = 120000;
 37 | det_dirs1 = sprintf('%s/%s/detections/%s-%s-%s-%06d/',save_root,dataset,model_type,imgType,list_id,iteration_num);
 38 | imgType = 'brox'; iteration_num = 120000;
 39 | det_dirs2 = sprintf('%s/%s/detections/%s-%s-%s-%06d/',save_root,dataset,model_type,imgType,list_id,iteration_num);
 40 | imgType = 'fastOF'; iteration_num = 120000;
 41 | det_dirs3 = sprintf('%s/%s/detections/%s-%s-%s-%06d/',save_root,dataset,model_type,imgType,list_id,iteration_num);
 42 | 
 43 | combinations = {{det_dirs1},{det_dirs2},{det_dirs3},...
 44 |     {det_dirs1,det_dirs3,'boost'},{det_dirs1,det_dirs2,'boost'},...
 45 |     {det_dirs1,det_dirs3,'cat'},{det_dirs1,det_dirs2,'cat'},...
 46 |     {det_dirs1,det_dirs3,'mean'},{det_dirs1,det_dirs2,'mean'}};
 47 | 
 48 | for c=1:length(combinations)
 49 |     comb = combinations{c};
 50 |     line = comb{1};
 51 |     if length(comb)>1
 52 |         fusion_type = comb{3};
 53 |         line = [line,' ',comb{2},' \n\n fusion type: ',fusion_type,'\n\n'];
 54 |         
 55 |     else
 56 |         fusion_type = 'none';
 57 |     end
 58 |     
 59 |     line = sprintf('Evaluation for %s\n',line);
 60 |     fprintf('%s',line)
 61 |     fprintf(logfile,'%s',line);
 62 |     AP = zeros(num_actions,1);
 63 |     cc = zeros(num_actions,1);
 64 |     for a=1:num_actions
 65 |         allscore{a} = zeros(24*20*160000,2,'single');
 66 |     end
 67 |     
 68 |     total_num_gt_boxes = zeros(num_actions,1);
 69 |     annotNames = {annot.name};
 70 |     
 71 |     for vid=1:num_vid
 72 |         video_name = testlist{vid};
 73 |         [~,gtVidInd] = find(strcmp(annotNames, testlist{vid}));
 74 |         gt_tubes = annot(gtVidInd).tubes;
 75 |         numf = annot(gtVidInd).num_imgs;
 76 |         num_gt_tubes = length(gt_tubes);
 77 |         if mod(vid,5) == 0
 78 |             fprintf('Done procesing %d videos out of %d %s\n', vid, num_vid, video_name)
 79 |         end
 80 |         for nf = 1:numf
 81 |             gt_boxes = get_gt_boxes(gt_tubes,nf);
 82 |             dt_boxes = get_dt_boxes(comb, video_name, nf, num_actions, fusion_type);
 83 |             num_gt_boxes = size(gt_boxes,1);
 84 |             for g = 1:num_gt_boxes
 85 |                 total_num_gt_boxes(gt_boxes(g,5)) = total_num_gt_boxes(gt_boxes(g,5)) + 1;
 86 |             end
 87 |             covered_gt_boxes = zeros(num_gt_boxes,1);
 88 |             for d = 1 : size(dt_boxes,1)
 89 |                 dt_score = dt_boxes(d,5);
 90 |                 dt_label = dt_boxes(d,6);
 91 |                 cc(dt_label) = cc(dt_label) + 1;
 92 |                 ioumax=-inf; maxgtind=0;
 93 |                 if num_gt_boxes>0  && any(gt_boxes(:,5) == dt_label)
 94 |                     for g = 1:num_gt_boxes
 95 |                         if ~covered_gt_boxes(g) && any(dt_label == gt_boxes(:,5))
 96 |                             iou = compute_spatial_iou(gt_boxes(g,1:4), dt_boxes(d,1:4));
 97 |                             if iou>ioumax
 98 |                                 ioumax=iou;
 99 |                                 maxgtind=g;
100 |                             end
101 |                         end
102 |                     end
103 |                 end
104 |                 
105 |                 if ioumax>=iou_th
106 |                     covered_gt_boxes(maxgtind) = 1;
107 |                     allscore{dt_label}(cc(dt_label),:) = [dt_score,1]; % tp detection
108 |                 else
109 |                     allscore{dt_label}(cc(dt_label),:) = [dt_score,0]; % fp detection
110 |                 end
111 |                 
112 |             end
113 |             
114 |         end
115 |     end
116 |     % Sort scores and then reorder tp fp labels in result precision and recall for each action
117 |     for a=1:num_actions
118 |         allscore{a} = allscore{a}(1:cc(a),:);
119 |         scores = allscore{a}(:,1);
120 |         labels = allscore{a}(:,2);
121 |         [~, si] = sort(scores,'descend');
122 |         %     scores = scores(si);
123 |         labels = labels(si);
124 |         fp=cumsum(labels==0);
125 |         tp=cumsum(labels==1);
126 |         recall=tp/total_num_gt_boxes(a);
127 |         precision=tp./(fp+tp);
128 |         AP(a) = xVOCap(recall,precision);
129 |         line = sprintf('Action %02d AP = %0.5f \n', a, AP(a));
130 |         fprintf('%s',line);
131 |         fprintf(logfile,'%s',line);
132 |     end
133 |     
134 |     AP(isnan(AP)) = 0;
135 |     mAP  = mean(AP);
136 |     line = sprintf('\nMean AP::=> %.5f\n\n',mAP);
137 |     fprintf('%s',line);
138 |     fprintf(logfile,'%s',line);
139 | end
140 | end
141 | 
142 | 
143 | % -------------------------------------------------------------------------
144 | function [videos] = getVideoNames(split_file)
145 | % -------------------------------------------------------------------------
146 | fprintf('Get both lis is %s\n',split_file);
147 | fid = fopen(split_file,'r');
148 | data = textscan(fid, '%s');
149 | videos  = cell(1);
150 | count = 0;
151 | 
152 | for i=1:length(data{1})
153 |     filename = cell2mat(data{1}(i,1));
154 |     count = count +1;
155 |     videos{count} = filename;
156 |     %     videos(i).vid = str2num(cell2mat(data{1}(i,1)));
157 | end
158 | end
159 | 
160 | function gt_boxes = get_gt_boxes(gt_tubes,nf)
161 | gt_boxes = [];
162 | gt_tubes;
163 | for t = 1:length(gt_tubes)
164 |     if nf >= gt_tubes(t).sf && nf <= gt_tubes(t).ef
165 |         b_ind = nf - gt_tubes(t).sf + 1;
166 |         box = [gt_tubes(t).boxes(b_ind,:), gt_tubes(t).class];
167 |         gt_boxes = [gt_boxes;box];
168 |     end
169 | end
170 | end
171 | 
172 | function dt_boxes = get_dt_boxes(detection_dir, video_name, nf, num_actions, fusion_type)
173 | dt_boxes = [];
174 | %% apply nms per class
175 | [boxes,scores] = read_detections(detection_dir, video_name, nf);
176 | for a = 1 : num_actions
177 |     cls_boxes = get_cls_detection(boxes,scores,a,fusion_type);
178 |     dt_boxes = [dt_boxes; cls_boxes];
179 | end
180 | end
181 | 
182 | function cls_boxes = get_cls_detection(boxes,scores,a,fusion_type)
183 | 
184 | if strcmp(fusion_type,'none')
185 |     cls_boxes = dofilter(boxes(1).b,scores(1).s,a);
186 | elseif strcmp(fusion_type,'mean')
187 |     cls_boxes = dofilter(boxes(1).b,(scores(1).s+scores(2).s)/2.0,a);
188 | elseif strcmp(fusion_type,'cat')
189 |     cls_boxes_base = dofilter(boxes(1).b,scores(1).s,a);
190 |     cls_boxes_top = dofilter(boxes(2).b,scores(2).s,a);
191 |     all_boxes = [cls_boxes_base;cls_boxes_top];
192 |     pick = nms(all_boxes(:,1:5),0.45);
193 |     cls_boxes = all_boxes(pick,:);
194 | elseif strcmp(fusion_type,'boost')
195 |     cls_boxes_base = dofilter(boxes(1).b,scores(1).s,a);
196 |     cls_boxes_top = dofilter(boxes(2).b,scores(2).s,a);
197 |     all_boxes = boost_boxes(cls_boxes_base,cls_boxes_top);
198 |     pick = nms(all_boxes(:,1:5),0.45);
199 |     cls_boxes = all_boxes(pick,:);
200 | else
201 |     error('Spacify correct fusion technique');
202 | end
203 | 
204 | end
205 | 
206 | function cls_boxes_base = boost_boxes(cls_boxes_base,cls_boxes_top)
207 | 
208 | box_spatial = [cls_boxes_base(:,1:2) cls_boxes_base(:,3:4)-cls_boxes_base(:,1:2)+1];
209 | box_flow =    [cls_boxes_top(:,1:2) cls_boxes_top(:,3:4)-cls_boxes_top(:,1:2)+1];
210 | coveredboxes = [];
211 | nb = size(cls_boxes_base,1); % num boxes
212 | for i=1:nb
213 |     ovlp = inters_union(box_spatial(i,:), box_flow); % ovlp has 1x5 or 5x1 dim
214 |     if ~isempty(ovlp)
215 |         [movlp, maxind] = max(ovlp);
216 |         if movlp>=0.3 && isempty(ismember(coveredboxes,maxind))
217 |             cls_boxes_base(i,5) = cls_boxes_base(i,5) + cls_boxes_top(maxind,5)*movlp;
218 |             coveredboxes = [coveredboxes;maxind];
219 |         end
220 |     end
221 | end
222 | 
223 | nb = size(cls_boxes_top,1);
224 | for i=1:nb
225 |     if ~ismember(coveredboxes,i)
226 |         cls_boxes_base = [cls_boxes_base; cls_boxes_top(i,:)];
227 |     end
228 | end
229 | 
230 | end
231 | 
232 | function [bxs, sc] = read_detections(detection_dir, video_name, nf)
233 | detection_dir1 = detection_dir{1};
234 | det_file = sprintf('%s%s/%05d.mat', detection_dir1, video_name, nf);
235 | load(det_file); % loads loc and scores variable
236 | boxes = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240] + 1;
237 | boxes(boxes(:,1)<1,1) = 1;   boxes(boxes(:,2)<1,2) = 1;
238 | boxes(boxes(:,3)>320,3) = 320;  boxes(boxes(:,4)>240,4) = 240;
239 | scores = [scores(:,2:end),scores(:,1)];
240 | bxs = struct();
241 | sc = struct();
242 | bxs(1).b = boxes;
243 | sc(1).s = scores;
244 | if length(detection_dir)>1
245 |     detection_dir1 = detection_dir{2};
246 |     det_file = sprintf('%s%s/%05d.mat', detection_dir1, video_name, nf);
247 |     load(det_file); % loads loc and scores variable
248 |     boxes = [loc(:,1)*320, loc(:,2)*240, loc(:,3)*320, loc(:,4)*240] + 1;
249 |     boxes(boxes(:,1)<1,1) = 1;   boxes(boxes(:,2)<1,2) = 1;
250 |     boxes(boxes(:,3)>320,3) = 320;  boxes(boxes(:,4)>240,4) = 240;
251 |     scores = [scores(:,2:end),scores(:,1)];
252 |     bxs(2).b = boxes;
253 |     sc(2).s = scores;
254 | end
255 | 
256 | end
257 | 
258 | 
259 | function boxes = dofilter(boxes,scores,a)
260 | scores = scores(:,a);
261 | pick = scores>0.01;
262 | scores = scores(pick);
263 | boxes = boxes(pick,:);
264 | [~,pick] = sort(scores,'descend');
265 | to_pick = min(50,size(pick,1));
266 | pick = pick(1:to_pick);
267 | scores = scores(pick);
268 | boxes = boxes(pick,:);
269 | pick = nms([boxes scores],0.45);
270 | pick = pick(1:min(20,length(pick)));
271 | boxes = boxes(pick,:);
272 | scores = scores(pick);
273 | cls = scores*0 + a;
274 | boxes = [boxes,scores, cls];
275 | end
276 | 
277 | function iou = inters_union(bounds1,bounds2)
278 | % ------------------------------------------------------------------------
279 | inters = rectint(bounds1,bounds2);
280 | ar1 = bounds1(:,3).*bounds1(:,4);
281 | ar2 = bounds2(:,3).*bounds2(:,4);
282 | union = bsxfun(@plus,ar1,ar2')-inters;
283 | iou = inters./(union+0.001);
284 | end
285 | 
286 | 
287 | function iou = compute_spatial_iou(gt_box, dt_box)
288 | dt_box = [dt_box(1:2), dt_box(3:4)-dt_box(1:2)+1];
289 | inter = rectint(gt_box,dt_box);
290 | ar1 = gt_box(3)*gt_box(4);
291 | ar2 = dt_box(3)*dt_box(4);
292 | union = ar1 + ar2 - inter;
293 | iou = inter/union;
294 | end


--------------------------------------------------------------------------------
/online-tubes/gentube/PARactionPathSmoother.m:
--------------------------------------------------------------------------------
  1 | % ---------------------------------------------------------
  2 | % Copyright (c) 2017, Gurkirt Singh
  3 | % This code and is available
  4 | % under the terms of MIT License provided in LICENSE.
  5 | % Please retain this notice and LICENSE if you use
  6 | % this file (or any portion of it) in your project.
  7 | % ---------------------------------------------------------
  8 | 
  9 | 
 10 | function final_tubes = parActionPathSmoother(actionpaths,alpha,num_action)
 11 | 
 12 | % load data
 13 | % fprintf('Number of video intest set %d \n', actionpath,alpha,num_action,calpha,useNeg
 14 | % alpha = 1;
 15 | 
 16 | final_tubes = struct('starts',[],'ts',[],'te',[],'label',[],'path_total_score',[],...
 17 |     'dpActionScore',[],'dpPathScore',[],...
 18 |     'path_boxes',cell(1),'path_scores',cell(1),'video_id',cell(1));
 19 | 
 20 | 
 21 | alltubes  = cell(length(actionpaths),1);
 22 | 
 23 | parfor t = 1 : length(actionpaths)
 24 |     %     fprintf('[%03d/%03d] calpha %04d\n',t,length(tubes),uint16(calpha*100));
 25 |     %     fprintf('.');
 26 |     video_id = actionpaths(t).video_id;
 27 |     %     fprintf('[doing for %s %d out of %d]\n',video_id,t,length(tubes));
 28 |     alltubes{t} = actionPathSmoother4oneVideo(actionpaths(t).paths,alpha,num_action,video_id) ;
 29 | end
 30 | 
 31 | action_count = 1;
 32 | for t = 1 : length(actionpaths)
 33 |     vid_tubes = alltubes{t};
 34 |     for  k=1:length(vid_tubes.ts)
 35 |         final_tubes.starts(action_count) = vid_tubes.starts(k);
 36 |         final_tubes.ts(action_count) = vid_tubes.ts(k);
 37 |         final_tubes.video_id{action_count} = vid_tubes.video_id{k};
 38 |         final_tubes.te(action_count) = vid_tubes.te(k);
 39 |         final_tubes.dpActionScore(action_count) = vid_tubes.dpActionScore(k);
 40 |         final_tubes.label(action_count) = vid_tubes.label(k);
 41 |         final_tubes.dpPathScore(action_count) = vid_tubes.dpPathScore(k);
 42 |         final_tubes.path_total_score(action_count) = vid_tubes.path_total_score(k);
 43 |         final_tubes.path_boxes{action_count} = vid_tubes.path_boxes{k};
 44 |         final_tubes.path_scores{action_count} = vid_tubes.path_scores{k};
 45 |         action_count = action_count + 1;
 46 |     end
 47 |     
 48 | end
 49 | end
 50 | 
 51 | function final_tubes = actionPathSmoother4oneVideo(video_paths,alpha,num_action,video_id)
 52 | action_count =1;
 53 | final_tubes = struct('starts',[],'ts',[],'te',[],'label',[],'path_total_score',[],...
 54 |     'dpActionScore',[],'dpPathScore',[],'vid',[],...
 55 |     'path_boxes',cell(1),'path_scores',cell(1),'video_id',cell(1));
 56 | 
 57 | if ~isempty(video_paths)
 58 |     %gt_ind = find(strcmp(video_id,annot.videoName));
 59 |     %number_frames = length(video_paths{1}(1).idx);
 60 | %     alpha = alpha-3.2; 
 61 |     for a = 1 : num_action
 62 |         action_paths = video_paths{a};
 63 |         num_act_paths = getPathCount(action_paths);
 64 |         for p = 1 : num_act_paths
 65 |             M = action_paths(p).allScores(:,1:num_action)'; %(:,1:num_action)';
 66 |             %M = normM(M);
 67 |             %M = [M(a,:),1-M(a,:)];
 68 |             M = M +20;
 69 |             
 70 |             [pred_path,time,D] = dpEM_max(M,alpha(a));
 71 |             [ Ts, Te, Scores, Label, DpPathScore] = extract_action(pred_path,time,D,a);
 72 |             for k = 1 : length(Ts)
 73 |                 final_tubes.starts(action_count) = action_paths(p).start;
 74 |                 final_tubes.ts(action_count) = Ts(k);
 75 |                 final_tubes.video_id{action_count} = video_id;
 76 |                 %     final_tubes.vid(action_count) = vid_num;
 77 |                 final_tubes.te(action_count) = Te(k);
 78 |                 final_tubes.dpActionScore(action_count) = Scores(k);
 79 |                 final_tubes.label(action_count) = Label(k);
 80 |                 final_tubes.dpPathScore(action_count) = DpPathScore(k);
 81 |                 final_tubes.path_total_score(action_count) = mean(action_paths(p).scores);
 82 |                 final_tubes.path_boxes{action_count} = action_paths(p).boxes;
 83 |                 final_tubes.path_scores{action_count} = action_paths(p).scores;
 84 |                 action_count = action_count + 1;
 85 |             end
 86 |             
 87 |         end
 88 |         
 89 |     end
 90 | end
 91 | end
 92 | 
 93 | function M = normM(M)
 94 | for i = 1: size(M,2)
 95 |     M(:,i) = M(:,i)/sum(M(:,i));
 96 | end
 97 | end
 98 | function [ts,te,scores,label,total_score] = extract_action(p,q,D,action)
 99 | % p(1:1) = 1;
100 | indexs = find(p==action);
101 | 
102 | if isempty(indexs)
103 |     ts = []; te = []; scores = []; label = []; total_score = [];
104 |     
105 | else
106 |     indexs_diff = [indexs,indexs(end)+1] - [indexs(1)-2,indexs];
107 |     ts = find(indexs_diff>1);
108 |     
109 |     if length(ts)>1
110 |         te = [ts(2:end)-1,length(indexs)];
111 |     else
112 |         te = length(indexs);
113 |     end
114 |     ts = indexs(ts);
115 |     te = indexs(te);
116 |     scores = (D(action,q(te)) - D(action,q(ts)))./(te-ts);
117 |     label = ones(length(ts),1)*action;
118 |     total_score = ones(length(ts),1)*D(p(end),q(end))/length(p);
119 | end
120 | end
121 | 
122 | % -------------------------------------------------------------------------
123 | function lp_count = getPathCount(live_paths)
124 | % -------------------------------------------------------------------------
125 | 
126 | if isfield(live_paths,'boxes')
127 |     lp_count = length(live_paths);
128 | else
129 |     lp_count = 0;
130 | end
131 | end
132 | 


--------------------------------------------------------------------------------
/online-tubes/gentube/convert2eval.m:
--------------------------------------------------------------------------------
 1 | % ---------------------------------------------------------
 2 | % Copyright (c) 2017, Gurkirt Singh
 3 | % This code and is available
 4 | % under the terms of MIT License provided in LICENSE.
 5 | % Please retain this notice and LICENSE if you use
 6 | % this file (or any portion of it) in your project.
 7 | % ---------------------------------------------------------
 8 | % Input: smoothed tubes
 9 | % Output: filtered out tubes with proper scoring
10 | 
11 | function xmld = convert2eval(final_tubes,min_num_frames,kthresh,topk,vids)
12 | 
13 | xmld = struct([]);
14 | v= 1;
15 | 
16 | for vv = 1 :  length(vids)
17 |     action_indexes = find(strcmp(final_tubes.video_id,vids{vv}));
18 |     videoName = vids{vv};
19 |     xmld(v).videoName = videoName;
20 |     actionscore = final_tubes.dpActionScore(action_indexes);
21 |     path_scores = final_tubes.path_scores(1,action_indexes);
22 |     
23 |     ts = final_tubes.ts(action_indexes);
24 |     starts = final_tubes.starts(action_indexes);
25 |     te = final_tubes.te(action_indexes);
26 |     act_nr = 1;
27 |      
28 |     for a = 1 : length(ts)
29 |         act_ts = ts(a);
30 |         act_te = te(a);
31 | %         act_dp_score = actionscore(a); %% only useful on JHMDB
32 |         act_path_scores = cell2mat(path_scores(a));
33 |         
34 |         %-----------------------------------------------------------
35 |         act_scores = sort(act_path_scores(act_ts:act_te),'descend');   
36 |         %save('test.mat', 'act_scores'); pause;
37 |         
38 |         topk_mean = mean(act_scores(1:min(topk,length(act_scores))));        
39 |         
40 |         bxs = final_tubes.path_boxes{action_indexes(a)}(act_ts:act_te,:);
41 |         
42 |         bxs = [bxs(:,1:2), bxs(:,3:4)-bxs(:,1:2)];
43 |         
44 |         label = final_tubes.label(action_indexes(a));
45 |         
46 |         if topk_mean > kthresh(label) && (act_te-act_ts) > min_num_frames 
47 |             xmld(v).score(act_nr) = topk_mean;
48 |             xmld(v).nr(act_nr) = act_nr;
49 |             xmld(v).class(act_nr) = label;
50 |             xmld(v).framenr(act_nr).fnr = (act_ts:act_te) + starts(a)-1;
51 |             xmld(v).boxes(act_nr).bxs = bxs;
52 |             act_nr = act_nr+1;
53 |         end
54 |     end
55 |     v = v + 1;
56 | 
57 | end
58 | 


--------------------------------------------------------------------------------
/online-tubes/gentube/dpEM_max.m:
--------------------------------------------------------------------------------
 1 | % ---------------------------------------------------------
 2 | % Original code comes from  https://team.inria.fr/perception/research/skeletalquads/
 3 | % Copyright (c) 2014, Georgios Evangelidis and Gurkirt Singh,
 4 | % This code and is available
 5 | % under the terms of MIT License provided in LICENSE.
 6 | % Please retain this notice and LICENSE if you use
 7 | % this file (or any portion of it) in your project.
 8 | % ---------------------------------------------------------
 9 | 
10 | % M = <10xnum_frames>
11 | % r = 10 (action labels)
12 | % c = frame indices in a video
13 | 
14 | function [p,q,D] = dpEM_max(M,alpha)
15 | 
16 | % transition cost for the smoothness term
17 | % V(L1,L2) = 0, if L1=L2
18 | % V(L1,L2) = alpha, if L1~=L2
19 | 
20 | 
21 | 
22 | [r,c] = size(M);
23 | 
24 | 
25 | 
26 | % costs
27 | D = zeros(r, c+1); % add an extra column
28 | D(:,1) = 0; % put the maximum cost
29 | D(:, 2:(c+1)) = M;
30 | 
31 | v = [1:r]';
32 | 
33 | 
34 | %D = M;
35 | phi = zeros(r,c);
36 | 
37 | %test = struct([]);
38 | for j = 2:c+1; % c = 1230
39 |     for i = 1:r; % r = 10        
40 |         
41 | %         test(j).D =  D(:, j-1); % fetching prev column 10 rows
42 | %         test(j).alpha = alpha*(v~=i);  % switching each row for each class
43 | %         test(j).D_alpha = [D(:, j-1)-alpha*(v~=i)];
44 | %         test(j).max = max([D(:, j-1)-alpha*(v~=i)]); % for ith class taking the max score
45 |         
46 |         
47 |         [dmax, tb] = max([D(:, j-1)-alpha*(v~=i)]);
48 |         %keyboard;
49 |         D(i,j) = D(i,j)+dmax;
50 |         phi(i,j-1) = tb;
51 |     end
52 | end
53 | 
54 | % Note:
55 | % the outer loop (j) is to visit one by one each frames
56 | % the inner loop (i) is to get the max score for each action label
57 | % the -alpha*(v~=i) term is to add a penalty by subtracting alpha from the 
58 | % data term for all other class labels other than i, for ith class label 
59 | % it adds zero penalty;
60 | %  (v~=i) will return a logical array consists of 10 elements, in the ith 
61 | % location it is 0 (false becuase the condition v~=i is false) and all other locations
62 | % returns 1, thus for ith calss it multiplies 0
63 | % with alpha and for the rest of the classes multiplies 1;
64 | % for each iteration of ith loop we get a max value which we add to the
65 | % data term d(i,j), in this way the 10 max values for 10 different action
66 | % labels are stored to the jth column (or for the jth frame): D(1,j), D(2,j),...,D(10,j), 
67 | 
68 | %  save('test.mat','r','c','M', 'phi');
69 | %  pause;
70 | 
71 | % Traceback from last frame
72 | D = D(:,2:(c+1));
73 | 
74 | % best of the last column
75 | q = c; % frame inidces
76 | [~,p] = max(D(:,c));
77 | 
78 | 
79 | 
80 | i = p; % index of max element in last column of D, 
81 | j = q; % frame indices
82 | 
83 | while j>1 % loop over frames in a video
84 |     tb = phi(i,j); % i -> index of max element in last column of D, j-> last frame index or last column of D
85 |     p = [tb,p];
86 |     q = [j-1,q];
87 |     j = j-1;
88 |     i = tb;
89 | end
90 | 
91 | %
92 | % phi(i,j) stores all the max indices in the forward pass
93 | % during the backward pass , a predicited path is constructed using these indices values
94 | 


--------------------------------------------------------------------------------
/online-tubes/gentube/readALLactionPaths.m:
--------------------------------------------------------------------------------
 1 | % ---------------------------------------------------------
 2 | % Copyright (c) 2017, Gurkirt Singh
 3 | % This code and is available
 4 | % under the terms of MIT License provided in LICENSE.
 5 | % Please retain this notice and LICENSE if you use
 6 | % this file (or any portion of it) in your project.
 7 | % ---------------------------------------------------------
 8 | 
 9 | function actionpath = readALLactionPaths(videolist,actionPathDir,step)
10 | 
11 | videos = getVideoNames(videolist);
12 | NumVideos = length(videos);
13 | 
14 | actionpath = struct([]);
15 | fprintf('Loading action paths of %d videos\n',NumVideos);
16 | count  = 1;
17 | for vid=1:step:NumVideos
18 |     
19 |     videoID  = videos(vid).video_id;
20 |     pathsSaveName = [actionPathDir,videoID,'-actionpaths.mat'];
21 |    
22 |     if ~exist(pathsSaveName,'file')
23 |         error('Action path does not exist please genrate actin path', pathsSaveName)
24 |     else
25 | %         fprintf('loading vid %d %s \n',vid,pathsSaveName);
26 |         load(pathsSaveName);
27 |         actionpath(count).video_id = videos(vid).video_id;
28 |         actionpath(count).paths = allpaths;
29 |         count = count+1;
30 |     end
31 | end
32 | end
33 | 
34 | function [videos] = getVideoNames(split_file)
35 | % -------------------------------------------------------------------------
36 | fid = fopen(split_file,'r');
37 | data = textscan(fid, '%s');
38 | videos  = struct();
39 | for i=1:length(data{1})
40 |     filename = cell2mat(data{1}(i,1));
41 |     videos(i).video_id = filename;
42 |     %     videos(i).vid = str2num(cell2mat(data{1}(i,1)));
43 |     
44 | end
45 | count = length(data{1});
46 | 
47 | end
48 | 


--------------------------------------------------------------------------------
/online-tubes/utils/createdires.m:
--------------------------------------------------------------------------------
 1 | % ---------------------------------------------------------
 2 | % Copyright (c) 2017, Gurkirt Singh
 3 | % This code and is available
 4 | % under the terms of MIT License provided in LICENSE.
 5 | % Please retain this notice and LICENSE if you use
 6 | % this file (or any portion of it) in your project.
 7 | % ---------------------------------------------------------
 8 | 
 9 | 
10 | function createdires(basedirs,actions)
11 | for s = 1: length(basedirs)
12 |     savename = basedirs{s};
13 |     for action = actions
14 |         saveNameaction = [savename,action{1}];
15 |         if ~isdir(saveNameaction)
16 |             mkdir(saveNameaction);
17 |         end
18 |     end
19 | end
20 | end


--------------------------------------------------------------------------------
/online-tubes/utils/initDatasetOpts.m:
--------------------------------------------------------------------------------
 1 | % ---------------------------------------------------------
 2 | % Copyright (c) 2017, Gurkirt Singh
 3 | % This code and is available
 4 | % under the terms of MIT License provided in LICENSE.
 5 | % Please retain this notice and LICENSE if you use
 6 | % this file (or any portion of it) in your project.
 7 | % ---------------------------------------------------------
 8 | 
 9 | function opts = initDatasetOpts(data_root,baseDir,dataset,imgType,model_type,listid,iteration_num,iouthresh,costtype,gap)
10 | 
11 | opts = struct();
12 | opts.imgType = imgType;
13 | opts.costtype = costtype;
14 | opts.gap = gap;
15 | opts.baseDir = baseDir;
16 | opts.imgType = imgType;
17 | opts.dataset = dataset;
18 | opts.iouThresh = iouthresh;
19 | opts.weight = iteration_num;
20 | opts.listid = listid;
21 | 
22 | testlist = ['testlist',listid];
23 | %%testlist = 'testlist01';
24 | opts.vidList = sprintf('%s/%s/splitfiles/%s.txt',data_root,dataset,testlist);
25 | 
26 | if strcmp(dataset,'ucf24')
27 |     opts.actions = {'Basketball','BasketballDunk','Biking','CliffDiving','CricketBowling',...
28 |         'Diving','Fencing','FloorGymnastics','GolfSwing','HorseRiding','IceDancing',...
29 |         'LongJump','PoleVault','RopeClimbing','SalsaSpin','SkateBoarding','Skiing',...
30 |         'Skijet','SoccerJuggling','Surfing','TennisSwing','TrampolineJumping',...
31 |         'VolleyballSpiking','WalkingWithDog'};
32 | elseif strcmp(dataset,'JHMDB')
33 |     opts.actions = {'brush_hair','catch','clap','climb_stairs','golf','jump',...
34 |         'kick_ball','pick','pour','pullup','push','run','shoot_ball','shoot_bow',...
35 |         'shoot_gun','sit','stand','swing_baseball','throw','walk','wave'};
36 | elseif strcmp(dataset,'LIRIS')
37 |     opts.actions = {'discussion', 'give_object_to_person','put_take_obj_into_from_box_desk',...
38 |         'enter_leave_room_no_unlocking','try_enter_room_unsuccessfully','unlock_enter_leave_room',...
39 |         'leave_baggage_unattended','handshaking','typing_on_keyboard','telephone_conversation'};
40 | end
41 | 
42 | opts.imgDir = sprintf('%s/%s/%s-images/',data_root,dataset,imgType);
43 | 
44 | opts.detDir = sprintf('%s/%s/detections/%s-%s-%s-%06d/',baseDir,dataset,model_type,imgType,listid,iteration_num);
45 | opts.annotFile = sprintf('%s/%s/splitfiles/finalAnnots.ma.mat',data_root,dataset);
46 | 
47 | opts.actPathDir = sprintf('%s/%s/actionPaths/%s-%s-%s-%06d-%s-%d-%04d/',baseDir,dataset,model_type,imgType,listid,iteration_num,costtype,gap,iouthresh*100);
48 | opts.tubeDir = sprintf('%s/%s/actionTubes/%s-%s-%s-%06d-%s-%d-%04d/',baseDir,dataset,model_type,imgType,listid,iteration_num,costtype,gap,iouthresh*100);
49 | 
50 | if exist(opts.detDir,'dir')
51 |     if ~isdir(opts.actPathDir)
52 |         fprintf('Creating %s\n',opts.actPathDir);
53 |         mkdir(opts.actPathDir)
54 |     end
55 |     if ~isdir(opts.tubeDir)
56 |         mkdir(opts.tubeDir)
57 |     end
58 |     if strcmp(dataset,'ucf24') || strcmp(dataset,'JHMDB')
59 |         createdires({opts.actPathDir},opts.actions)
60 |     end
61 | end
62 | 


--------------------------------------------------------------------------------
/online-tubes/utils/initDatasetOptsFused.m:
--------------------------------------------------------------------------------
 1 | % ---------------------------------------------------------
 2 | % Copyright (c) 2017, Gurkirt Singh
 3 | % This code and is available
 4 | % under the terms of MIT License provided in LICENSE.
 5 | % Please retain this notice and LICENSE if you use
 6 | % this file (or any portion of it) in your project.
 7 | % ---------------------------------------------------------
 8 | 
 9 | function opts = initDatasetOptsFused(data_root,baseDir,dataset,imtypes,model_type, ...
10 |     listid,iteration_nums,iouthresh,costtype,gap,fusiontype,fuseiouth)
11 | %% data_root,baseDir,dataset,imgType,model_type,listid,iteration_num,iouthresh,costtype,gap
12 | 
13 | opts = struct();
14 | imgType = [imtypes{1},'-',imtypes{2}];
15 | opts.imgType = imgType;
16 | opts.costtype = costtype;
17 | opts.gap = gap;
18 | opts.baseDir = baseDir;
19 | opts.imgType = imgType;
20 | opts.dataset = dataset;
21 | opts.iouThresh = iouthresh;
22 | opts.iteration_nums = iteration_nums;
23 | opts.listid = listid;
24 | opts.fusiontype = fusiontype;
25 | opts.fuseiouth = fuseiouth;
26 | testlist = ['testlist',listid];
27 | opts.data_root = data_root;
28 | opts.vidList = sprintf('%s/%s/splitfiles/%s.txt',data_root,dataset,testlist);
29 | 
30 | if strcmp(dataset,'ucf24')
31 |     opts.actions = {'Basketball','BasketballDunk','Biking','CliffDiving','CricketBowling',...
32 |         'Diving','Fencing','FloorGymnastics','GolfSwing','HorseRiding','IceDancing',...
33 |         'LongJump','PoleVault','RopeClimbing','SalsaSpin','SkateBoarding','Skiing',...
34 |         'Skijet','SoccerJuggling','Surfing','TennisSwing','TrampolineJumping',...
35 |         'VolleyballSpiking','WalkingWithDog'};
36 | elseif strcmp(dataset,'JHMDB')
37 |     opts.actions = {'brush_hair','catch','clap','climb_stairs','golf','jump',...
38 |         'kick_ball','pick','pour','pullup','push','run','shoot_ball','shoot_bow',...
39 |         'shoot_gun','sit','stand','swing_baseball','throw','walk','wave'};
40 | elseif strcmp(dataset,'LIRIS')
41 |     opts.actions = {'discussion', 'give_object_to_person','put_take_obj_into_from_box_desk',...
42 |         'enter_leave_room_no_unlocking','try_enter_room_unsuccessfully','unlock_enter_leave_room',...
43 |         'leave_baggage_unattended','handshaking','typing_on_keyboard','telephone_conversation'};
44 | end
45 | 
46 | opts.imgDir = sprintf('%s/%s/%s-images/',data_root,dataset,imtypes{1});
47 | 
48 | opts.basedetDir = sprintf('%s/%s/detections/%s-%s-%s-%06d/',baseDir,dataset,model_type,imtypes{1},listid,iteration_nums(1));
49 | opts.topdetDir = sprintf('%s/%s/detections/%s-%s-%s-%06d/',baseDir,dataset,model_type,imtypes{2},listid,iteration_nums(2));
50 | 
51 | opts.annotFile = sprintf('%s/%s/splitfiles/annots.mat',data_root,dataset);
52 | 
53 | opts.actPathDir = sprintf('%s/%s/actionPaths/%s/%s-%s-%s-%s-%d-%d-%s-%d-%04d-fiou%03d/',baseDir,dataset,fusiontype,model_type,imtypes{1},imtypes{2},...
54 |                                         listid,iteration_nums(1),iteration_nums(2),costtype,gap,iouthresh*100,uint16(fuseiouth*100));
55 | opts.tubeDir = sprintf('%s/%s/actionTubes/%s/%s-%s-%s-%s-%d-%d-%s-%d-%04d-fiou%03d/',baseDir,dataset,fusiontype,model_type,imtypes{1},imtypes{2},...
56 |                                         listid,iteration_nums(1),iteration_nums(2),costtype,gap,iouthresh*100,uint16(fuseiouth*100));
57 | 
58 | if exist(opts.basedetDir,'dir')
59 |     if ~isdir(opts.actPathDir)
60 |         fprintf('Creating %s\n',opts.actPathDir);
61 |         mkdir(opts.actPathDir)
62 |     end
63 |     
64 |     if ~isdir(opts.tubeDir)
65 |         mkdir(opts.tubeDir)
66 |     end
67 |     
68 |     if strcmp(dataset,'ucf24') || strcmp(dataset,'JHMDB')
69 |         createdires({opts.actPathDir},opts.actions)
70 |     end
71 | end
72 | 
73 | %fprintf('Video List :: %s\nImage  Dir :: %s\nDetection Dir:: %s\nActionpath Dir:: %s\nTube Dir:: %s\n',...
74 |  %    opts.vidList,opts.imgDir,opts.detDir,opts.actPathDir,opts.tubeDir)
75 | 


--------------------------------------------------------------------------------
/ssd.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """ SSD network Classes
  3 | 
  4 | Original author: Ellis Brown, Max deGroot for VOC dataset
  5 | https://github.com/amdegroot/ssd.pytorch
  6 | 
  7 | Updated by Gurkirt Singh for ucf101-24 dataset
  8 | """
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | from torch.autograd import Variable
 14 | from layers import *
 15 | from data import v2
 16 | import os
 17 | 
 18 | 
 19 | class SSD(nn.Module):
 20 |     """Single Shot Multibox Architecture
 21 |     The network is composed of a base VGG network followed by the
 22 |     added multibox conv layers.  Each multibox layer branches into
 23 |         1) conv2d for class conf scores
 24 |         2) conv2d for localization predictions
 25 |         3) associated priorbox layer to produce default bounding
 26 |            boxes specific to the layer's feature map size.
 27 |     See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 28 | 
 29 |     Args:
 30 |         base: VGG16 layers for input, size of either 300 or 500
 31 |         extras: extra layers that feed to multibox loc and conf layers
 32 |         head: "multibox head" consists of loc and conf conv layers
 33 |     """
 34 | 
 35 |     def __init__(self, base, extras, head, num_classes):
 36 |         super(SSD, self).__init__()
 37 | 
 38 |         self.num_classes = num_classes
 39 |         # TODO: implement __call__ in PriorBox
 40 |         self.priorbox = PriorBox(v2)
 41 |         with torch.no_grad():
 42 |             self.priors = self.priorbox.forward().cuda()
 43 |             self.num_priors = self.priors.size(0)
 44 |             self.size = 300
 45 | 
 46 |         # SSD network
 47 |         self.vgg = nn.ModuleList(base)
 48 |         # Layer learns to scale the l2 normalized features from conv4_3
 49 |         self.L2Norm = L2Norm(512, 20)
 50 |         self.extras = nn.ModuleList(extras)
 51 | 
 52 |         self.loc = nn.ModuleList(head[0])
 53 |         self.conf = nn.ModuleList(head[1])
 54 | 
 55 |         self.softmax = nn.Softmax(dim=1).cuda()
 56 |         # self.detect = Detect(num_classes, 0, 200, 0.001, 0.45)
 57 | 
 58 |     def forward(self, x):
 59 | 
 60 |         """Applies network layers and ops on input image(s) x.
 61 | 
 62 |         Args:
 63 |             x: input image or batch of images. Shape: [batch,3*batch,300,300].
 64 | 
 65 |         Return:
 66 |             Depending on phase:
 67 |             test:
 68 |                 Variable(tensor) of output class label predictions,
 69 |                 confidence score, and corresponding location predictions for
 70 |                 each object detected. Shape: [batch,topk,7]
 71 | 
 72 |             train:
 73 |                 list of concat outputs from:
 74 |                     1: confidence layers, Shape: [batch*num_priors,num_classes]
 75 |                     2: localization layers, Shape: [batch,num_priors*4]
 76 |                     3: priorbox layers, Shape: [2,num_priors*4]
 77 |         """
 78 | 
 79 |         sources = list()
 80 |         loc = list()
 81 |         conf = list()
 82 | 
 83 |         # apply vgg up to conv4_3 relu
 84 |         for k in range(23):
 85 |             x = self.vgg[k](x)
 86 | 
 87 |         s = self.L2Norm(x)
 88 |         sources.append(s)
 89 | 
 90 |         # apply vgg up to fc7
 91 |         for k in range(23, len(self.vgg)):
 92 |             x = self.vgg[k](x)
 93 |         sources.append(x)
 94 | 
 95 |         # apply extra layers and cache source layer outputs
 96 |         for k, v in enumerate(self.extras):
 97 |             x = F.relu(v(x), inplace=True)
 98 |             if k % 2 == 1:
 99 |                 sources.append(x)
100 | 
101 |         # apply multibox head to source layers
102 |         for (x, l, c) in zip(sources, self.loc, self.conf):
103 |             loc.append(l(x).permute(0, 2, 3, 1).contiguous())
104 |             conf.append(c(x).permute(0, 2, 3, 1).contiguous())
105 | 
106 |         loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
107 |         conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
108 |         output = (loc.view(loc.size(0), -1, 4),
109 |                   conf.view(conf.size(0), -1, self.num_classes),
110 |                   self.priors
111 |                   )
112 |         return output
113 | 
114 |     def load_weights(self, base_file):
115 |         other, ext = os.path.splitext(base_file)
116 |         if ext == '.pkl' or '.pth':
117 |             print('Loading weights into state dict...')
118 |             self.load_state_dict(torch.load(base_file, map_location=lambda storage, loc: storage))
119 |             print('Finished!')
120 |         else:
121 |             print('Sorry only .pth and .pkl files supported.')
122 | 
123 | 
124 | # This function is derived from torchvision VGG make_layers()
125 | # https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
126 | def vgg(cfg, i, batch_norm=False):
127 |     layers = []
128 |     in_channels = i
129 |     for v in cfg:
130 |         if v == 'M':
131 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
132 |         elif v == 'C':
133 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
134 |         else:
135 |             conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
136 |             if batch_norm:
137 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
138 |             else:
139 |                 layers += [conv2d, nn.ReLU(inplace=True)]
140 |             in_channels = v
141 |     pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
142 |     conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
143 |     conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
144 |     layers += [pool5, conv6,
145 |                nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
146 |     return layers
147 | 
148 | 
149 | def add_extras(cfg, i, batch_norm=False):
150 |     # Extra layers added to VGG for feature scaling
151 |     layers = []
152 |     in_channels = i
153 |     flag = False
154 |     for k, v in enumerate(cfg):
155 |         if in_channels != 'S':
156 |             if v == 'S':
157 |                 layers += [nn.Conv2d(in_channels, cfg[k + 1],
158 |                            kernel_size=(1, 3)[flag], stride=2, padding=1)]
159 |             else:
160 |                 layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
161 |             flag = not flag
162 |         in_channels = v
163 |     return layers
164 | 
165 | 
166 | def multibox(vgg, extra_layers, cfg, num_classes):
167 |     loc_layers = []
168 |     conf_layers = []
169 |     vgg_source = [24, -2]
170 |     for k, v in enumerate(vgg_source):
171 |         loc_layers += [nn.Conv2d(vgg[v].out_channels,
172 |                                  cfg[k] * 4, kernel_size=3, padding=1)]
173 |         conf_layers += [nn.Conv2d(vgg[v].out_channels,
174 |                         cfg[k] * num_classes, kernel_size=3, padding=1)]
175 |     for k, v in enumerate(extra_layers[1::2], 2):
176 |         loc_layers += [nn.Conv2d(v.out_channels, cfg[k]
177 |                                  * 4, kernel_size=3, padding=1)]
178 |         conf_layers += [nn.Conv2d(v.out_channels, cfg[k]
179 |                                   * num_classes, kernel_size=3, padding=1)]
180 |     return vgg, extra_layers, (loc_layers, conf_layers)
181 | 
182 | 
183 | base = {
184 |     '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
185 |             512, 512, 512],
186 |     '512': [],
187 | }
188 | extras = {
189 |     '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
190 |     '512': [],
191 | }
192 | mbox = {
193 |     '300': [4, 6, 6, 6, 4, 4],  # number of boxes per feature map location
194 |     '512': [],
195 | }
196 | 
197 | 
198 | def build_ssd(size=300, num_classes=21):
199 | 
200 |     if size != 300:
201 |         print("Error: Sorry only SSD300 is supported currently!")
202 |         return
203 | 
204 |     return SSD(*multibox(vgg(base[str(size)], 3),
205 |                                 add_extras(extras[str(size)], 1024),
206 |                                 mbox[str(size)], num_classes), num_classes)
207 | 


--------------------------------------------------------------------------------
/test-ucf24.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     Copyright (c) 2017, Gurkirt Singh
  3 | 
  4 |     This code and is available
  5 |     under the terms of MIT License provided in LICENSE.
  6 |     Please retain this notice and LICENSE if you use
  7 |     this file (or any portion of it) in your project.
  8 |     ---------------------------------------------------------
  9 | """
 10 | 
 11 | import torch
 12 | import torch.backends.cudnn as cudnn
 13 | from torch.autograd import Variable
 14 | from data import AnnotationTransform, UCF24Detection, BaseTransform, CLASSES, detection_collate, v2
 15 | from ssd import build_ssd
 16 | import torch.utils.data as data
 17 | from layers.box_utils import decode, nms
 18 | from utils.evaluation import evaluate_detections
 19 | import os, time
 20 | import argparse
 21 | import numpy as np
 22 | import pickle
 23 | import scipy.io as sio # to save detection as mat files
 24 | cfg = v2
 25 | 
 26 | def str2bool(v):
 27 |     return v.lower() in ("yes", "true", "t", "1")
 28 | 
 29 | parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training')
 30 | parser.add_argument('--version', default='v2', help='conv11_2(v2) or pool6(v1) as last layer')
 31 | parser.add_argument('--basenet', default='vgg16_reducedfc.pth', help='pretrained base model')
 32 | parser.add_argument('--dataset', default='ucf24', help='pretrained base model')
 33 | parser.add_argument('--ssd_dim', default=300, type=int, help='Input Size for SSD') # only support 300 now
 34 | parser.add_argument('--input_type', default='rgb', type=str, help='INput tyep default rgb can take flow as well')
 35 | parser.add_argument('--jaccard_threshold', default=0.5, type=float, help='Min Jaccard index for matching')
 36 | parser.add_argument('--batch_size', default=32, type=int, help='Batch size for training')
 37 | parser.add_argument('--resume', default=None, type=str, help='Resume from checkpoint')
 38 | parser.add_argument('--num_workers', default=0, type=int, help='Number of workers used in dataloading')
 39 | parser.add_argument('--eval_iter', default='120000,', type=str, help='Number of training iterations')
 40 | parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda to train model')
 41 | parser.add_argument('--ngpu', default=1, type=str2bool, help='Use cuda to train model')
 42 | parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float, help='initial learning rate')
 43 | parser.add_argument('--visdom', default=False, type=str2bool, help='Use visdom to for loss visualization')
 44 | parser.add_argument('--data_root', default='/mnt/mars-fast/datasets/', help='Location of VOC root directory')
 45 | parser.add_argument('--save_root', default='/mnt/mars-gamma/datasets/', help='Location to save checkpoint models')
 46 | parser.add_argument('--iou_thresh', default=0.5, type=float, help='Evaluation threshold')
 47 | parser.add_argument('--conf_thresh', default=0.01, type=float, help='Confidence threshold for evaluation')
 48 | parser.add_argument('--nms_thresh', default=0.45, type=float, help='NMS threshold')
 49 | parser.add_argument('--topk', default=20, type=int, help='topk for evaluation')
 50 | 
 51 | args = parser.parse_args()
 52 | 
 53 | if args.input_type != 'rgb':
 54 |     args.conf_thresh = 0.05
 55 | 
 56 | if args.cuda and torch.cuda.is_available():
 57 |     torch.set_default_tensor_type('torch.cuda.FloatTensor')
 58 | else:
 59 |     torch.set_default_tensor_type('torch.FloatTensor')
 60 | 
 61 | 
 62 | def test_net(net, save_root, exp_name, input_type, dataset, iteration, num_classes, thresh=0.5 ):
 63 |     """ Test a SSD network on an Action image database. """
 64 | 
 65 |     val_data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers,
 66 |                             shuffle=False, collate_fn=detection_collate, pin_memory=True)
 67 |     image_ids = dataset.ids
 68 |     save_ids = []
 69 |     val_step = 250
 70 |     num_images = len(dataset)
 71 |     video_list = dataset.video_list
 72 |     det_boxes = [[] for _ in range(len(CLASSES))]
 73 |     gt_boxes = []
 74 |     print_time = True
 75 |     batch_iterator = None
 76 |     count = 0
 77 |     torch.cuda.synchronize()
 78 |     ts = time.perf_counter()
 79 |     num_batches = len(val_data_loader)
 80 |     det_file = save_root + 'cache/' + exp_name + '/detection-'+str(iteration).zfill(6)+'.pkl'
 81 |     print('Number of images ', len(dataset),' number of batchs', num_batches)
 82 |     frame_save_dir = save_root+'detections/CONV-'+input_type+'-'+args.listid+'-'+str(iteration).zfill(6)+'/'
 83 |     print('\n\n\nDetections will be store in ',frame_save_dir,'\n\n')
 84 |     with torch.no_grad():
 85 |         for val_itr in range(len(val_data_loader)):
 86 |             if not batch_iterator:
 87 |                 batch_iterator = iter(val_data_loader)
 88 | 
 89 |             torch.cuda.synchronize()
 90 |             t1 = time.perf_counter()
 91 | 
 92 |             images, targets, img_indexs = next(batch_iterator)
 93 |             batch_size = images.size(0)
 94 |             height, width = images.size(2), images.size(3)
 95 | 
 96 |             if args.cuda:
 97 |                 images = images.cuda()
 98 |             output = net(images)
 99 | 
100 |             loc_data = output[0]
101 |             conf_preds = output[1]
102 |             prior_data = output[2]
103 | 
104 |             if print_time and val_itr%val_step == 0:
105 |                 torch.cuda.synchronize()
106 |                 tf = time.perf_counter()
107 |                 print('Forward Time {:0.3f}'.format(tf - t1))
108 |             for b in range(batch_size):
109 |                 gt = targets[b].numpy()
110 |                 gt[:, 0] *= width
111 |                 gt[:, 2] *= width
112 |                 gt[:, 1] *= height
113 |                 gt[:, 3] *= height
114 |                 gt_boxes.append(gt)
115 |                 decoded_boxes = decode(loc_data[b].data, prior_data.data, cfg['variance']).clone()
116 |                 conf_scores = net.softmax(conf_preds[b]).data.clone()
117 |                 index = img_indexs[b]
118 |                 annot_info = image_ids[index]
119 | 
120 |                 frame_num = annot_info[1]; video_id = annot_info[0]; videoname = video_list[video_id]
121 |                 output_dir = frame_save_dir+videoname
122 |                 if not os.path.isdir(output_dir):
123 |                     os.makedirs(output_dir)
124 | 
125 |                 output_file_name = output_dir+'/{:05d}.mat'.format(int(frame_num))
126 |                 save_ids.append(output_file_name)
127 |                 sio.savemat(output_file_name, mdict={'scores':conf_scores.cpu().numpy(),'loc':decoded_boxes.cpu().numpy()})
128 | 
129 |                 for cl_ind in range(1, num_classes):
130 |                     scores = conf_scores[:, cl_ind].squeeze()
131 |                     c_mask = scores.gt(args.conf_thresh)  # greater than minmum threshold
132 |                     scores = scores[c_mask].squeeze()
133 |                     # print('scores size',scores.size())
134 |                     if scores.dim() == 0:
135 |                         # print(len(''), ' dim ==0 ')
136 |                         det_boxes[cl_ind - 1].append(np.asarray([]))
137 |                         continue
138 |                     boxes = decoded_boxes.clone()
139 |                     l_mask = c_mask.unsqueeze(1).expand_as(boxes)
140 |                     boxes = boxes[l_mask].view(-1, 4)
141 |                     # idx of highest scoring and non-overlapping boxes per class
142 |                     ids, counts = nms(boxes, scores, args.nms_thresh, args.topk)  # idsn - ids after nms
143 |                     scores = scores[ids[:counts]].cpu().numpy()
144 |                     boxes = boxes[ids[:counts]].cpu().numpy()
145 |                     # print('boxes sahpe',boxes.shape)
146 |                     boxes[:, 0] *= width
147 |                     boxes[:, 2] *= width
148 |                     boxes[:, 1] *= height
149 |                     boxes[:, 3] *= height
150 | 
151 |                     for ik in range(boxes.shape[0]):
152 |                         boxes[ik, 0] = max(0, boxes[ik, 0])
153 |                         boxes[ik, 2] = min(width, boxes[ik, 2])
154 |                         boxes[ik, 1] = max(0, boxes[ik, 1])
155 |                         boxes[ik, 3] = min(height, boxes[ik, 3])
156 | 
157 |                     cls_dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=True)
158 |                     det_boxes[cl_ind - 1].append(cls_dets)
159 | 
160 |                 count += 1
161 |             if val_itr%val_step == 0:
162 |                 torch.cuda.synchronize()
163 |                 te = time.perf_counter()
164 |                 print('im_detect: {:d}/{:d} time taken {:0.3f}'.format(count, num_images, te - ts))
165 |                 torch.cuda.synchronize()
166 |                 ts = time.perf_counter()
167 |             if print_time and val_itr%val_step == 0:
168 |                 torch.cuda.synchronize()
169 |                 te = time.perf_counter()
170 |                 print('NMS stuff Time {:0.3f}'.format(te - tf))
171 |     print('Evaluating detections for itration number ', iteration)
172 | 
173 |     #Save detection after NMS along with GT
174 |     with open(det_file, 'wb') as f:
175 |         pickle.dump([gt_boxes, det_boxes, save_ids], f, pickle.HIGHEST_PROTOCOL)
176 | 
177 |     return evaluate_detections(gt_boxes, det_boxes, CLASSES, iou_thresh=thresh)
178 | 
179 | 
180 | def main():
181 | 
182 |     means = (104, 117, 123)  # only support voc now
183 | 
184 |     exp_name = 'CONV-SSD-{}-{}-bs-{}-{}-lr-{:05d}'.format(args.dataset, args.input_type,
185 |                             args.batch_size, args.basenet[:-14], int(args.lr * 100000))
186 | 
187 |     args.save_root += args.dataset+'/'
188 |     args.data_root += args.dataset+'/'
189 |     args.listid = '01' ## would be usefull in JHMDB-21
190 |     print('Exp name', exp_name, args.listid)
191 |     for iteration in [int(itr) for itr in args.eval_iter.split(',')]:
192 |         log_file = open(args.save_root + 'cache/' + exp_name + "/testing-{:d}.log".format(iteration), "w", 1)
193 |         log_file.write(exp_name + '\n')
194 |         trained_model_path = args.save_root + 'cache/' + exp_name + '/ssd300_ucf24_' + repr(iteration) + '.pth'
195 |         log_file.write(trained_model_path+'\n')
196 |         num_classes = len(CLASSES) + 1  #7 +1 background
197 |         net = build_ssd(300, num_classes)  # initialize SSD
198 |         net.load_state_dict(torch.load(trained_model_path))
199 |         net.eval()
200 |         if args.cuda:
201 |             net = net.cuda()
202 |             cudnn.benchmark = True
203 |         print('Finished loading model %d !' % iteration)
204 |         # Load dataset
205 |         dataset = UCF24Detection(args.data_root, 'test', BaseTransform(args.ssd_dim, means), AnnotationTransform(),
206 |                                  input_type=args.input_type, full_test=True)
207 |         # evaluation
208 |         torch.cuda.synchronize()
209 |         tt0 = time.perf_counter()
210 |         log_file.write('Testing net \n')
211 |         mAP, ap_all, ap_strs = test_net(net, args.save_root, exp_name, args.input_type, dataset, iteration, num_classes)
212 |         for ap_str in ap_strs:
213 |             print(ap_str)
214 |             log_file.write(ap_str + '\n')
215 |         ptr_str = '\nMEANAP:::=>' + str(mAP) + '\n'
216 |         print(ptr_str)
217 |         log_file.write(ptr_str)
218 | 
219 |         torch.cuda.synchronize()
220 |         print('Complete set time {:0.2f}'.format(time.perf_counter() - tt0))
221 |         log_file.close()
222 | 
223 | if __name__ == '__main__':
224 |     main()
225 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | class AverageMeter(object):
 2 |     """Computes and stores the average and current value"""
 3 |     def __init__(self):
 4 |         self.reset()
 5 | 
 6 |     def reset(self):
 7 |         self.val = 0
 8 |         self.avg = 0
 9 |         self.sum = 0
10 |         self.count = 0
11 | 
12 |     def update(self, val, n=1):
13 |         self.val = val
14 |         self.sum += val * n
15 |         self.count += n
16 |         self.avg = self.sum / self.count


--------------------------------------------------------------------------------
/utils/evaluation.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """ Evaluation code based on VOC protocol
  3 | 
  4 | Original author: Ellis Brown, Max deGroot for VOC dataset
  5 | https://github.com/amdegroot/ssd.pytorch
  6 | 
  7 | Updated by Gurkirt Singh for ucf101-24 dataset
  8 | 
  9 | """
 10 | 
 11 | import os
 12 | import numpy as np
 13 | 
 14 | def voc_ap(rec, prec, use_07_metric=False):
 15 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 16 |     Compute VOC AP given precision and recall.
 17 |     If use_07_metric is true, uses the
 18 |     VOC 07 11 point method (default:False).
 19 |     """
 20 |     # print('voc_ap() - use_07_metric:=' + str(use_07_metric))
 21 |     if use_07_metric:
 22 |         # 11 point metric
 23 |         ap = 0.
 24 |         for t in np.arange(0., 1.1, 0.1):
 25 |             if np.sum(rec >= t) == 0:
 26 |                 p = 0
 27 |             else:
 28 |                 p = np.max(prec[rec >= t])
 29 |             ap = ap + p / 11.
 30 |     else:
 31 |         # correct AP calculation
 32 |         # first append sentinel values at the end
 33 |         mrec = np.concatenate(([0.], rec, [1.]))
 34 |         mpre = np.concatenate(([0.], prec, [0.]))
 35 | 
 36 |         # compute the precision envelope
 37 |         for i in range(mpre.size - 1, 0, -1):
 38 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 39 | 
 40 |         # to calculate area under PR curve, look for points
 41 |         # where X axis (recall) changes value
 42 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 43 | 
 44 |         # and sum (\Delta recall) * prec
 45 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 46 |     return ap
 47 | 
 48 | 
 49 | def get_gt_of_cls(gt_boxes, cls):
 50 |     cls_gt_boxes = []
 51 |     for i in range(len(gt_boxes)):
 52 |         if gt_boxes[i,-1] == cls:
 53 |             cls_gt_boxes.append(gt_boxes[i, :-1])
 54 |     return np.asarray(cls_gt_boxes)
 55 | 
 56 | 
 57 | def compute_iou(cls_gt_boxes, box):
 58 |     ious = np.zeros(cls_gt_boxes.shape[0])
 59 | 
 60 |     for m in range(ious.shape[0]):
 61 |         gtbox = cls_gt_boxes[m]
 62 | 
 63 |         xmin = max(gtbox[0],box[0])
 64 |         ymin = max(gtbox[1], box[1])
 65 |         xmax = min(gtbox[2], box[2])
 66 |         ymax = min(gtbox[3], box[3])
 67 |         iw = np.maximum(xmax - xmin, 0.)
 68 |         ih = np.maximum(ymax - ymin, 0.)
 69 |         if iw>0 and ih>0:
 70 |             intsc = iw*ih
 71 |         else:
 72 |             intsc = 0.0
 73 |         # print (intsc)
 74 |         union = (gtbox[2] - gtbox[0]) * (gtbox[3] - gtbox[1]) + (box[2] - box[0]) * (box[3] - box[1]) - intsc
 75 |         ious[m] = intsc/union
 76 | 
 77 |     return ious
 78 | 
 79 | def evaluate_detections(gt_boxes, det_boxes, CLASSES=[], iou_thresh=0.5):
 80 | 
 81 |     ap_strs = []
 82 |     num_frames = len(gt_boxes)
 83 |     print('Evaluating for ', num_frames, 'frames')
 84 |     ap_all = np.zeros(len(CLASSES), dtype=np.float32)
 85 |     for cls_ind, cls in enumerate(CLASSES): # loop over each class 'cls'
 86 |         scores = np.zeros(num_frames * 220)
 87 |         istp = np.zeros(num_frames * 220)
 88 |         det_count = 0
 89 |         num_postives = 0.0
 90 |         for nf in range(num_frames): # loop over each frame 'nf'
 91 |                 # if len(gt_boxes[nf])>0 and len(det_boxes[cls_ind][nf]):
 92 |                 frame_det_boxes = np.copy(det_boxes[cls_ind][nf]) # get frame detections for class cls in nf
 93 |                 cls_gt_boxes = get_gt_of_cls(np.copy(gt_boxes[nf]), cls_ind) # get gt boxes for class cls in nf frame
 94 |                 num_postives += cls_gt_boxes.shape[0]
 95 |                 if frame_det_boxes.shape[0]>0: # check if there are dection for class cls in nf frame
 96 |                     argsort_scores = np.argsort(-frame_det_boxes[:,-1]) # sort in descending order
 97 |                     for i, k in enumerate(argsort_scores): # start from best scoring detection of cls to end
 98 |                         box = frame_det_boxes[k, :-1] # detection bounfing box
 99 |                         score = frame_det_boxes[k,-1] # detection score
100 |                         ispositive = False # set ispostive to false every time
101 |                         if cls_gt_boxes.shape[0]>0: # we can only find a postive detection
102 |                             # if there is atleast one gt bounding for class cls is there in frame nf
103 |                             iou = compute_iou(cls_gt_boxes, box) # compute IOU between remaining gt boxes
104 |                             # and detection boxes
105 |                             maxid = np.argmax(iou)  # get the max IOU window gt index
106 |                             if iou[maxid] >= iou_thresh: # check is max IOU is greater than detection threshold
107 |                                 ispositive = True # if yes then this is ture positive detection
108 |                                 cls_gt_boxes = np.delete(cls_gt_boxes, maxid, 0) # remove assigned gt box
109 |                         scores[det_count] = score # fill score array with score of current detection
110 |                         if ispositive:
111 |                             istp[det_count] = 1 # set current detection index (det_count)
112 |                             #  to 1 if it is true postive example
113 |                         det_count += 1
114 |         if num_postives<1:
115 |             num_postives =1
116 |         scores = scores[:det_count]
117 |         istp = istp[:det_count]
118 |         argsort_scores = np.argsort(-scores) # sort in descending order
119 |         istp = istp[argsort_scores] # reorder istp's on score sorting
120 |         fp = np.cumsum(istp == 0) # get false positives
121 |         tp = np.cumsum(istp == 1) # get  true positives
122 |         fp = fp.astype(np.float64)
123 |         tp = tp.astype(np.float64)
124 |         recall = tp / float(num_postives) # compute recall
125 |         precision = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) # compute precision
126 |         cls_ap = voc_ap(recall, precision) # compute average precision using voc2007 metric
127 |         ap_all[cls_ind] = cls_ap
128 |         # print(cls_ind,CLASSES[cls_ind], cls_ap)
129 |         ap_str = str(CLASSES[cls_ind]) + ' : ' + str(num_postives) + ' : ' + str(det_count) + ' : ' + str(cls_ap)
130 |         ap_strs.append(ap_str)
131 | 
132 |     # print ('mean ap ', np.mean(ap_all))
133 |     return np.mean(ap_all), ap_all, ap_strs
134 | 
135 | 
136 | def save_detection_framewise(det_boxes, image_ids, iteration):
137 |     det_save_dir = '/mnt/mars-beta/gur-workspace/use-ssd-data/UCF101/detections/RGB-01-{:06d}/'.format(iteration)
138 |     print('Saving detections to', det_save_dir)
139 |     num_images = len(image_ids)
140 |     for idx in range(num_images):
141 |         img_id = image_ids[idx]
142 |         save_path = det_save_dir+img_id[:-5]
143 |         if not os.path.isdir(save_path):
144 |             os.system('mkdir -p '+save_path)
145 |         fid = open(det_save_dir+img_id+'.txt','w')
146 |         for cls_ind in range(len(det_boxes)):
147 |             frame_det_boxes = det_boxes[cls_ind][idx]
148 |             for d in range(len(frame_det_boxes)):
149 |                 line = str(cls_ind+1)
150 |                 for k in range(5):
151 |                     line += ' {:f}'.format(frame_det_boxes[d,k])
152 |                 line += '\n'
153 |                 fid.write(line)
154 |         fid.close()
155 | 
156 | 


--------------------------------------------------------------------------------