├── meter.py
├── run_PDAN.sh
├── README.md
├── charades_i3d_per_video.py
├── PDAN.py
├── apmeter.py
└── train_PDAN.py


/meter.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class Meter(object):
 3 |     def reset(self):
 4 |         pass
 5 | 
 6 |     def add(self):
 7 |         pass
 8 | 
 9 |     def value(self):
10 |         pass
11 | 


--------------------------------------------------------------------------------
/run_PDAN.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | export PATH=/Your/ENV/bin:$PATH
 4 | 
 5 | python train_PDAN.py \
 6 | -dataset charades \
 7 | -mode rgb \
 8 | -model PDAN \
 9 | -train True \
10 | -num_channel 512 \
11 | -lr 0.0001 \
12 | -comp_info charades_PDAN \
13 | -APtype map \
14 | -epoch 100 \
15 | -batch_size 32 # -run_mode debug
16 | 
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # [WACV2021] PDAN
 2 | Implementation for the paper ["PDAN: Pyramid Dilated Attention Network for Action Detection"](https://openaccess.thecvf.com/content/WACV2021/html/Dai_PDAN_Pyramid_Dilated_Attention_Network_for_Action_Detection_WACV_2021_paper.html).
 3 | 
 4 | The code is tested in Python3.7 + PyTorch1.2 environment with one Tesla V100 GPU and the overall code framework is adapted from the [Superevent](https://github.com/piergiaj/super-events-cvpr18).
 5 | 
 6 | ## Toyota Smarthome Untrimmed
 7 | The evaluation code and pre-trained model for PDAN on [Toyota Smarthome Untrimmed (TSU) dataset](https://project.inria.fr/toyotasmarthome/) can be found in this [repository](https://github.com/dairui01/Toyota_Smarthome/blob/main/pipline/). With the pretrained model, the mAP should be more than 32.7 % for f-map CS protocol. 
 8 | 
 9 | ## Charades
10 | For training and testing this code on **Charades**, please download the dataset from this [link](https://prior.allenai.org/projects/charades) and follow this [repository](https://github.com/piergiaj/pytorch-i3d) to extract the snippet-level I3D feature. The RGB-Pretrained PDAN can be downloaded via this [Link](https://mybox.inria.fr/f/9fa53012b2684cb588b5/?dl=1). If the I3D feature is well extracted, the pretrained RGB model should achieve ~ 23.8% per frame-mAP on Charades. Note that this mAP  is the one reported in the paper which is computed by all the timesteps and not the weighted mAP. While using the original setting (25 sampled version, w/o weighted) for Charades localization, the mAP should be more than 24 %. 
11 | 
12 | If you find this work useful for your research, please cite our [paper](https://openaccess.thecvf.com/content/WACV2021/html/Dai_PDAN_Pyramid_Dilated_Attention_Network_for_Action_Detection_WACV_2021_paper.html):
13 | ```bibtex
14 | @inproceedings{dai2021pdan,
15 |   title={Pdan: Pyramid dilated attention network for action detection},
16 |   author={Dai, Rui and Das, Srijan and Minciullo, Luca and Garattoni, Lorenzo and Francesca, Gianpiero and Bremond, Francois},
17 |   booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
18 |   pages={2970--2979},
19 |   year={2021}
20 | }
21 | ```
22 | Contact: rui.dai@inria.fr
23 | 
24 | 


--------------------------------------------------------------------------------
/charades_i3d_per_video.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.utils.data as data_utl
  3 | from torch.utils.data.dataloader import default_collate
  4 | 
  5 | import numpy as np
  6 | import json
  7 | import csv
  8 | # import h5py
  9 | 
 10 | import os
 11 | import os.path
 12 | from tqdm import tqdm
 13 | 
 14 | 
 15 | def video_to_tensor(pic):
 16 |     """Convert a ``numpy.ndarray`` to tensor.
 17 |     Converts a numpy.ndarray (T x H x W x C)
 18 |     to a torch.FloatTensor of shape (C x T x H x W)
 19 |     
 20 |     Args:
 21 |          pic (numpy.ndarray): Video to be converted to tensor.
 22 |     Returns:
 23 |          Tensor: Converted video.
 24 |     """
 25 |     return torch.from_numpy(pic.transpose([3,0,1,2]))
 26 | 
 27 | 
 28 | def make_dataset(split_file, split, root, num_classes=157):
 29 |     dataset = []
 30 |     with open(split_file, 'r') as f:
 31 |         data = json.load(f)
 32 |     print('split!!!!',split)
 33 |     i = 0
 34 |     for vid in tqdm(data.keys()):
 35 |         if data[vid]['subset'] != split:
 36 |             continue
 37 | 
 38 |         if not os.path.exists(os.path.join(root, vid+'.npy')):
 39 |             continue
 40 |         fts = np.load(os.path.join(root, vid+'.npy'))
 41 |         num_feat = fts.shape[0]
 42 |         label = np.zeros((num_feat,num_classes), np.float32)
 43 | 
 44 |         fps = num_feat/data[vid]['duration']
 45 |         for ann in data[vid]['actions']:
 46 |             for fr in range(0,num_feat,1):
 47 |                 if fr/fps > ann[1] and fr/fps < ann[2]:
 48 |                     label[fr, ann[0]] = 1 # binary classification
 49 |         dataset.append((vid, label, data[vid]['duration']))
 50 |         i += 1
 51 |     
 52 |     return dataset
 53 | 
 54 | # make_dataset('multithumos.json', 'training', '/ssd2/thumos/val_i3d_rgb')
 55 | 
 56 | 
 57 | class MultiThumos(data_utl.Dataset):
 58 | 
 59 |     def __init__(self, split_file, split, root, batch_size, classes):
 60 |         
 61 |         self.data = make_dataset(split_file, split, root, classes)
 62 |         self.split_file = split_file
 63 |         self.batch_size = batch_size
 64 |         self.root = root
 65 |         self.in_mem = {}
 66 | 
 67 |     def __getitem__(self, index):
 68 |         """
 69 |         Args:
 70 |             index (int): Index
 71 | 
 72 |         Returns:
 73 |             tuple: (image, target) where target is class_index of the target class.
 74 |         """
 75 |         entry = self.data[index]
 76 |         if entry[0] in self.in_mem:
 77 |             feat = self.in_mem[entry[0]]
 78 |         else:
 79 |             # print('here')
 80 |             feat = np.load(os.path.join(self.root, entry[0]+'.npy'))
 81 |             # print(feat.shape[-1])
 82 |             feat = feat.reshape((feat.shape[0],1,1,feat.shape[-1]))
 83 |             feat = feat.astype(np.float32)
 84 | 
 85 |             
 86 |         label = entry[1]
 87 |         return feat, label, [entry[0], entry[2]]
 88 | 
 89 |     def __len__(self):
 90 |         return len(self.data)
 91 | 
 92 | 
 93 | def mt_collate_fn(batch):
 94 |     "Pads data and puts it into a tensor of same dimensions"
 95 |     max_len = 0
 96 |     for b in batch:
 97 |         if b[0].shape[0] > max_len:
 98 |             max_len = b[0].shape[0]
 99 | 
100 |     new_batch = []
101 |     for b in batch:
102 |         f = np.zeros((max_len, b[0].shape[1], b[0].shape[2], b[0].shape[3]), np.float32)
103 |         m = np.zeros((max_len), np.float32)
104 |         l = np.zeros((max_len, b[1].shape[1]), np.float32)
105 |         f[:b[0].shape[0]] = b[0]
106 |         m[:b[0].shape[0]] = 1
107 |         l[:b[0].shape[0], :] = b[1]
108 |         new_batch.append([video_to_tensor(f), torch.from_numpy(m), torch.from_numpy(l), b[2]])
109 | 
110 |     return default_collate(new_batch)
111 | 
112 | 


--------------------------------------------------------------------------------
/PDAN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import torch.nn.init as init
 5 | import os
 6 | import copy
 7 | 
 8 | 
 9 | class PDAN(nn.Module):
10 |     def __init__(self, num_stages=1, num_layers=5, num_f_maps=512, dim=1024, num_classes=157):
11 |         super(PDAN, self).__init__()
12 |         self.stage1 = SSPDAN(num_layers, num_f_maps, dim, num_classes)
13 |         self.stages = nn.ModuleList([copy.deepcopy(SSPDAN(num_layers, num_f_maps, num_classes, num_classes)) for s in range(num_stages-1)])
14 | 
15 |     def forward(self, x, mask):
16 |         out = self.stage1(x, mask)
17 |         outputs = out.unsqueeze(0)
18 |         for s in self.stages:
19 |             out = s(out * mask[:, 0:1, :], mask)
20 |             outputs = torch.cat((outputs, out.unsqueeze(0)), dim=0)
21 |         return outputs
22 | 
23 | class SSPDAN(nn.Module):
24 |     def __init__(self, num_layers, num_f_maps, dim, num_classes):
25 |         super(SSPDAN, self).__init__()
26 |         self.conv_1x1 = nn.Conv1d(dim, num_f_maps, 1)
27 |         self.layers = nn.ModuleList([copy.deepcopy(PDAN_Block(2 ** i, num_f_maps, num_f_maps)) for i in range(num_layers)])
28 |         self.conv_out = nn.Conv1d(num_f_maps, num_classes, 1)
29 | 
30 |     def forward(self, x, mask):
31 |         out = self.conv_1x1(x)
32 |         for layer in self.layers:
33 |             out = layer(out, mask)
34 |         out = self.conv_out(out) * mask[:, 0:1, :]
35 |         return out
36 | 
37 | 
38 | class PDAN_Block(nn.Module):
39 |     def __init__(self, dilation, in_channels, out_channels):
40 |         super(PDAN_Block, self).__init__()
41 |         self.conv_attention=DAL(in_channels, out_channels, kernel_size=3, padding=dilation, dilated=dilation)
42 |         self.conv_1x1 = nn.Conv1d(out_channels, out_channels, 1)
43 |         self.dropout = nn.Dropout()
44 | 
45 |     def forward(self, x, mask):
46 |         out = F.relu(self.conv_attention(x))
47 |         out = self.conv_1x1(out)
48 |         out = self.dropout(out)
49 |         return (x + out) * mask[:, 0:1, :]
50 | 
51 | class DAL(nn.Module):
52 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, dilated=1, groups=1, bias=False):
53 |         super(DAL, self).__init__()
54 |         self.out_channels = out_channels
55 |         self.kernel_size = kernel_size
56 |         self.stride = stride
57 |         self.padding = padding
58 |         self.groups = groups
59 |         self.dilated = dilated
60 |         assert self.out_channels % self.groups == 0, "out_channels should be divided by groups. (example: out_channels: 40, groups: 4)"
61 |         self.rel_t = nn.Parameter(torch.randn(out_channels, 1, kernel_size), requires_grad=True)
62 |         self.key_conv = nn.Conv1d(in_channels, out_channels, kernel_size=1, bias=bias)
63 |         self.query_conv = nn.Conv1d(in_channels, out_channels, kernel_size=1, bias=bias)
64 |         self.value_conv = nn.Conv1d(in_channels, out_channels, kernel_size=1, bias=bias)
65 |         self.reset_parameters()
66 | 
67 | 
68 |     def forward(self, x):
69 |         batch, channels, time = x.size()
70 |         padded_x = F.pad(x, (self.padding, self.padding))
71 |         q_out = self.query_conv(x)
72 |         k_out = self.key_conv(padded_x)
73 |         v_out = self.value_conv(padded_x)
74 |         kernal_size = 2*self.dilated + 1
75 |         k_out = k_out.unfold(2, kernal_size, self.stride)  # unfold(dim, size, step)
76 |         k_out=torch.cat((k_out[:,:,:,0].unsqueeze(3),k_out[:,:,:,0+self.dilated].unsqueeze(3),k_out[:,:,:,0+2*self.dilated].unsqueeze(3)),dim=3)  #dilated
77 |         v_out = v_out.unfold(2, kernal_size, self.stride)
78 |         v_out=torch.cat((v_out[:,:,:,0].unsqueeze(3),v_out[:,:,:,0+self.dilated].unsqueeze(3),v_out[:,:,:,0+2*self.dilated].unsqueeze(3)),dim=3)  #dilated
79 |         v_out = v_out + self.rel_t
80 |         k_out = k_out.contiguous().view(batch, self.groups, self.out_channels // self.groups, time, -1)
81 |         v_out = v_out.contiguous().view(batch, self.groups, self.out_channels // self.groups, time, -1)
82 |         q_out = q_out.view(batch, self.groups, self.out_channels // self.groups, time, 1)
83 |         out = q_out * k_out
84 |         out = F.softmax(out, dim=-1)
85 |         out = torch.einsum('bnctk,bnctk -> bnct', out, v_out).view(batch, -1, time)
86 |         return out
87 | 
88 |     def reset_parameters(self):
89 |         init.kaiming_normal(self.key_conv.weight, mode='fan_out')
90 |         init.kaiming_normal(self.value_conv.weight, mode='fan_out')
91 |         init.kaiming_normal(self.query_conv.weight, mode='fan_out')
92 |         init.normal(self.rel_t, 0, 1)
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/apmeter.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import meter
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | 
  7 | class APMeter(meter.Meter):
  8 |     """
  9 |     The APMeter measures the average precision per class.
 10 | 
 11 |     The APMeter is designed to operate on `NxK` Tensors `output` and
 12 |     `target`, and optionally a `Nx1` Tensor weight where (1) the `output`
 13 |     contains model output scores for `N` examples and `K` classes that ought to
 14 |     be higher when the model is more convinced that the example should be
 15 |     positively labeled, and smaller when the model believes the example should
 16 |     be negatively labeled (for instance, the output of a sigmoid function); (2)
 17 |     the `target` contains only values 0 (for negative examples) and 1
 18 |     (for positive examples); and (3) the `weight` ( > 0) represents weight for
 19 |     each sample.
 20 |     """
 21 |     def __init__(self):
 22 |         super(APMeter, self).__init__()
 23 |         self.reset()
 24 | 
 25 |     def reset(self):
 26 |         """Resets the meter with empty member variables"""
 27 |         self.scores = torch.FloatTensor(torch.FloatStorage())
 28 |         self.targets = torch.LongTensor(torch.LongStorage())
 29 |         self.weights = torch.FloatTensor(torch.FloatStorage())
 30 | 
 31 |     def add(self, output, target, weight=None):
 32 |         """
 33 |         Args:
 34 |             output (Tensor): NxK tensor that for each of the N examples
 35 |                 indicates the probability of the example belonging to each of
 36 |                 the K classes, according to the model. The probabilities should
 37 |                 sum to one over all classes
 38 |             target (Tensor): binary NxK tensort that encodes which of the K
 39 |                 classes are associated with the N-th input
 40 |                     (eg: a row [0, 1, 0, 1] indicates that the example is
 41 |                          associated with classes 2 and 4)
 42 |             weight (optional, Tensor): Nx1 tensor representing the weight for
 43 |                 each example (each weight > 0)
 44 |         """
 45 |         if not torch.is_tensor(output):
 46 |             output = torch.from_numpy(output)
 47 |         if not torch.is_tensor(target):
 48 |             target = torch.from_numpy(target)
 49 | 
 50 |         if weight is not None:
 51 |             if not torch.is_tensor(weight):
 52 |                 weight = torch.from_numpy(weight)
 53 |             weight = weight.squeeze()
 54 |         if output.dim() == 1:
 55 |             output = output.view(-1, 1)
 56 |         else:
 57 |             assert output.dim() == 2, \
 58 |                 'wrong output size (should be 1D or 2D with one column \
 59 |                 per class)'
 60 |         if target.dim() == 1:
 61 |             target = target.view(-1, 1)
 62 |         else:
 63 |             assert target.dim() == 2, \
 64 |                 'wrong target size (should be 1D or 2D with one column \
 65 |                 per class)'
 66 |         if weight is not None:
 67 |             assert weight.dim() == 1, 'Weight dimension should be 1'
 68 |             assert weight.numel() == target.size(0), \
 69 |                 'Weight dimension 1 should be the same as that of target'
 70 |             assert torch.min(weight) >= 0, 'Weight should be non-negative only'
 71 |         assert torch.equal(target**2, target), \
 72 |             'targets should be binary (0 or 1)'
 73 |         if self.scores.numel() > 0:
 74 |             assert target.size(1) == self.targets.size(1), \
 75 |                 'dimensions for output should match previously added examples.'
 76 | 
 77 |         # make sure storage is of sufficient size
 78 |         if self.scores.storage().size() < self.scores.numel() + output.numel():
 79 |             new_size = math.ceil(self.scores.storage().size() * 1.5)
 80 |             new_weight_size = math.ceil(self.weights.storage().size() * 1.5)
 81 |             self.scores.storage().resize_(int(new_size + output.numel()))
 82 |             self.targets.storage().resize_(int(new_size + output.numel()))
 83 |             if weight is not None:
 84 |                 self.weights.storage().resize_(int(new_weight_size
 85 |                                                + output.size(0)))
 86 | 
 87 |         # store scores and targets
 88 |         offset = self.scores.size(0) if self.scores.dim() > 0 else 0
 89 |         self.scores.resize_(offset + output.size(0), output.size(1))
 90 |         self.targets.resize_(offset + target.size(0), target.size(1))
 91 |         self.scores.narrow(0, offset, output.size(0)).copy_(output)
 92 |         self.targets.narrow(0, offset, target.size(0)).copy_(target)
 93 | 
 94 |         if weight is not None:
 95 |             self.weights.resize_(offset + weight.size(0))
 96 |             self.weights.narrow(0, offset, weight.size(0)).copy_(weight)
 97 | 
 98 |     def value(self):
 99 |         """Returns the model's average precision for each class
100 | 
101 |         Return:
102 |             ap (FloatTensor): 1xK tensor, with avg precision for each class k
103 |         """
104 | 
105 |         if self.scores.numel() == 0:
106 |             return 0
107 |         ap = torch.zeros(self.scores.size(1))
108 |         rg = torch.range(1, self.scores.size(0)).float()
109 |         if self.weights.numel() > 0:
110 |             weight = self.weights.new(self.weights.size())
111 |             weighted_truth = self.weights.new(self.weights.size())
112 | 
113 |         # print(self.scores)
114 |         # compute average precision for each class
115 |         for k in range(self.scores.size(1)):
116 |             # sort scores
117 |             scores = self.scores[:, k]
118 |             targets = self.targets[:, k]
119 |             _, sortind = torch.sort(scores, 0, True)
120 |             truth = targets[sortind]
121 |             if self.weights.numel() > 0:
122 |                 weight = self.weights[sortind]
123 |                 weighted_truth = truth.float() * weight
124 |                 rg = weight.cumsum(0)
125 | 
126 |             # compute true positive sums
127 |             if self.weights.numel() > 0:
128 |                 tp = weighted_truth.cumsum(0)
129 |             else:
130 |                 tp = truth.float().cumsum(0)
131 | 
132 |             # compute precision curve
133 |             precision = tp.div(rg)
134 | 
135 |             # compute average precision
136 |             ap[k] = precision[truth.byte()].sum() / max(truth.sum(), 1)
137 |         return ap
138 | 


--------------------------------------------------------------------------------
/train_PDAN.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import time
  3 | import os
  4 | import argparse
  5 | import sys
  6 | 
  7 | import torchvision.models as models
  8 | import torch
  9 | 
 10 | 
 11 | 
 12 | def str2bool(v):
 13 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 14 |         return True
 15 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
 16 |         return False
 17 |     else:
 18 |         raise argparse.ArgumentTypeError('Boolean value expected.')
 19 | 
 20 | 
 21 | parser = argparse.ArgumentParser()
 22 | parser.add_argument('-mode', type=str, help='rgb or flow (or joint for eval)')
 23 | parser.add_argument('-train', type=str2bool, default='True', help='train or eval')
 24 | parser.add_argument('-comp_info', type=str)
 25 | parser.add_argument('-rgb_model_file', type=str)
 26 | parser.add_argument('-flow_model_file', type=str)
 27 | parser.add_argument('-gpu', type=str, default='4')
 28 | parser.add_argument('-dataset', type=str, default='charades')
 29 | parser.add_argument('-rgb_root', type=str, default='no_root')
 30 | parser.add_argument('-flow_root', type=str, default='no_root')
 31 | parser.add_argument('-type', type=str, default='original')
 32 | parser.add_argument('-lr', type=str, default='0.1')
 33 | parser.add_argument('-epoch', type=str, default='50')
 34 | parser.add_argument('-model', type=str, default='')
 35 | parser.add_argument('-APtype', type=str, default='wap')
 36 | parser.add_argument('-randomseed', type=str, default='False')
 37 | parser.add_argument('-load_model', type=str, default='False')
 38 | parser.add_argument('-batch_size', type=str, default='False')
 39 | parser.add_argument('-num_channel', type=str, default='False')
 40 | parser.add_argument('-run_mode', type=str, default='False')
 41 | parser.add_argument('-feat', type=str, default='False')
 42 | 
 43 | 
 44 | args = parser.parse_args()
 45 | 
 46 | import torch
 47 | import torch.nn as nn
 48 | import torch.nn.functional as F
 49 | import torch.optim as optim
 50 | import numpy as np
 51 | import random
 52 | 
 53 | # set random seed
 54 | if args.randomseed=="False":
 55 |     SEED = 0
 56 | elif args.randomseed=="True":
 57 |     SEED = random.randint(1, 100000)
 58 | else:
 59 |     SEED = int(args.randomseed)
 60 | 
 61 | torch.manual_seed(SEED)
 62 | torch.cuda.manual_seed(SEED)
 63 | torch.manual_seed(SEED)
 64 | np.random.seed(SEED)
 65 | torch.cuda.manual_seed_all(SEED)
 66 | random.seed(SEED)
 67 | torch.backends.cudnn.deterministic = True
 68 | torch.backends.cudnn.benchmark = False
 69 | print('Random_SEED!!!:', SEED)
 70 | 
 71 | from torch.optim import lr_scheduler
 72 | from torch.autograd import Variable
 73 | 
 74 | import json
 75 | 
 76 | import pickle
 77 | import math
 78 | 
 79 | from apmeter import APMeter
 80 | 
 81 | 
 82 | batch_size = int(args.batch_size)
 83 | 
 84 | 
 85 | if args.dataset == 'charades':
 86 |     from charades_i3d_per_video import MultiThumos as Dataset
 87 |     from charades_i3d_per_video import mt_collate_fn as collate_fn
 88 |     if args.run_mode == 'debug':
 89 |         print('debug!!!')
 90 |         train_split = './data/charades_test.json'
 91 |         test_split = './data/charades_test.json'
 92 |     else:
 93 |         train_split = './data/charades.json'
 94 |         test_split = './data/charades.json'
 95 |     # print('load feature from:', args.rgb_root)
 96 |     rgb_root = '/Path/to/charades_feat_rgb'
 97 |     skeleton_root = '/Path/to/charades_feat_pose'
 98 |     flow_root = '/Path/to/charades_feat_flow'
 99 |     rgb_of=[rgb_root,flow_root]
100 |     classes = 157
101 | 
102 | 
103 | def load_data(train_split, val_split, root):
104 |     # Load Data
105 |     print('load data', root)
106 |     if len(train_split) > 0:
107 |         if str(args.feat) == '2d':
108 |             dataset = Dataset(train_split, 'training', root, batch_size, classes, int(args.pool_step))
109 |         else:
110 |             dataset = Dataset(train_split, 'training', root, batch_size, classes)
111 |         dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8,
112 |                                                  pin_memory=True, collate_fn=collate_fn)
113 |         dataloader.root = root
114 |     else:
115 | 
116 |         dataset = None
117 |         dataloader = None
118 | 
119 |     if str(args.feat) == '2d':
120 |         val_dataset = Dataset(val_split, 'testing', root, batch_size, classes, int(args.pool_step))
121 |     else:
122 |         val_dataset = Dataset(val_split, 'testing', root, batch_size, classes)
123 |     val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=1, shuffle=True, num_workers=2,
124 |                                                  pin_memory=True, collate_fn=collate_fn)
125 |     val_dataloader.root = root
126 | 
127 |     dataloaders = {'train': dataloader, 'val': val_dataloader}
128 |     datasets = {'train': dataset, 'val': val_dataset}
129 |     return dataloaders, datasets
130 | 
131 | 
132 | # train the model
133 | def run(models, criterion, num_epochs=50):
134 |     since = time.time()
135 | 
136 |     for epoch in range(num_epochs):
137 |         print('Epoch {}/{}'.format(epoch, num_epochs - 1))
138 |         print('-' * 10)
139 | 
140 |         probs = []
141 |         for model, gpu, dataloader, optimizer, sched, model_file in models:
142 |             train_map, train_loss = train_step(model, gpu, optimizer, dataloader['train'], epoch)
143 |             prob_val, val_loss, val_map = val_step(model, gpu, dataloader['val'], epoch)
144 |             probs.append(prob_val)
145 |             sched.step(val_loss)
146 | 
147 | 
148 | def eval_model(model, dataloader, baseline=False):
149 |     results = {}
150 |     for data in dataloader:
151 |         other = data[3]
152 |         outputs, loss, probs, _ = run_network(model, data, 0, baseline)
153 |         fps = outputs.size()[1] / other[1][0]
154 | 
155 |         results[other[0][0]] = (outputs.data.cpu().numpy()[0], probs.data.cpu().numpy()[0], data[2].numpy()[0], fps)
156 |     return results
157 | 
158 | 
159 | def run_network(model, data, gpu, epoch=0, baseline=False):
160 |     inputs, mask, labels, other = data
161 |     # wrap them in Variable
162 |     inputs = Variable(inputs.cuda(gpu))
163 |     mask = Variable(mask.cuda(gpu))
164 |     labels = Variable(labels.cuda(gpu))
165 | 
166 |     mask_list = torch.sum(mask, 1)
167 |     mask_new = np.zeros((mask.size()[0], classes, mask.size()[1]))
168 |     for i in range(mask.size()[0]):
169 |         mask_new[i, :, :int(mask_list[i])] = np.ones((classes, int(mask_list[i])))
170 |     mask_new = torch.from_numpy(mask_new).float()
171 |     mask_new = Variable(mask_new.cuda(gpu))
172 | 
173 |     inputs = inputs.squeeze(3).squeeze(3)
174 |     #print("inputs",inputs.size())
175 |     activation = model(inputs, mask_new)
176 | 
177 |     
178 |     outputs_final = activation
179 | 
180 |     #print("outputs_final",outputs_final.size())
181 |     outputs_final = outputs_final[-1]
182 |     #print("outputs_final",outputs_final.size())
183 |     outputs_final = outputs_final.permute(0, 2, 1)  
184 |     probs_f = F.sigmoid(outputs_final) * mask.unsqueeze(2)
185 |     loss_f = F.binary_cross_entropy_with_logits(outputs_final, labels, size_average=False)
186 |     loss_f = torch.sum(loss_f) / torch.sum(mask)  
187 | 
188 |     loss = loss_f 
189 | 
190 |     corr = torch.sum(mask)
191 |     tot = torch.sum(mask)
192 | 
193 |     return outputs_final, loss, probs_f, corr / tot
194 | 
195 | 
196 | def train_step(model, gpu, optimizer, dataloader, epoch):
197 |     model.train(True)
198 |     tot_loss = 0.0
199 |     error = 0.0
200 |     num_iter = 0.
201 |     apm = APMeter()
202 |     for data in dataloader:
203 |         optimizer.zero_grad()
204 |         num_iter += 1
205 | 
206 |         outputs, loss, probs, err = run_network(model, data, gpu, epoch)
207 |         apm.add(probs.data.cpu().numpy()[0], data[2].numpy()[0])
208 |         error += err.data
209 |         tot_loss += loss.data
210 | 
211 |         loss.backward()
212 |         optimizer.step()
213 |     if args.APtype == 'wap':
214 |         train_map = 100 * apm.value()
215 |     else:
216 |         train_map = 100 * apm.value().mean()
217 |     print('train-map:', train_map)
218 |     apm.reset()
219 | 
220 |     epoch_loss = tot_loss / num_iter
221 | 
222 |     return train_map, epoch_loss
223 | 
224 | 
225 | def val_step(model, gpu, dataloader, epoch):
226 |     model.train(False)
227 |     apm = APMeter()
228 |     tot_loss = 0.0
229 |     error = 0.0
230 |     num_iter = 0.
231 |     num_preds = 0
232 | 
233 |     full_probs = {}
234 | 
235 |     # Iterate over data.
236 |     for data in dataloader:
237 |         num_iter += 1
238 |         other = data[3]
239 | 
240 |         outputs, loss, probs, err = run_network(model, data, gpu, epoch)
241 | 
242 |         apm.add(probs.data.cpu().numpy()[0], data[2].numpy()[0])
243 | 
244 |         error += err.data
245 |         tot_loss += loss.data
246 | 
247 |         probs = probs.squeeze()
248 | 
249 |         full_probs[other[0][0]] = probs.data.cpu().numpy().T
250 | 
251 |     epoch_loss = tot_loss / num_iter
252 | 
253 | 
254 |     val_map = torch.sum(100 * apm.value()) / torch.nonzero(100 * apm.value()).size()[0]
255 |     print('val-map:', val_map)
256 |     print(100 * apm.value())
257 |     apm.reset()
258 | 
259 |     return full_probs, epoch_loss, val_map
260 | 
261 | 
262 | if __name__ == '__main__':
263 |     print(str(args.model))
264 |     print('batch_size:', batch_size)
265 |     print('cuda_avail', torch.cuda.is_available())
266 | 
267 |     if args.mode == 'flow':
268 |         print('flow mode', flow_root)
269 |         dataloaders, datasets = load_data(train_split, test_split, flow_root)
270 |     elif args.mode == 'skeleton':
271 |         print('Pose mode', skeleton_root)
272 |         dataloaders, datasets = load_data(train_split, test_split, skeleton_root)
273 |     elif args.mode == 'rgb':
274 |         print('RGB mode', rgb_root)
275 |         dataloaders, datasets = load_data(train_split, test_split, rgb_root)
276 | 
277 |     if args.train:
278 |         num_channel = args.num_channel
279 |         if args.mode == 'skeleton':
280 |             input_channnel = 256
281 |         else:
282 |             input_channnel = 1024
283 | 
284 |         num_classes = classes
285 |         mid_channel=int(args.num_channel)
286 | 
287 |         if args.model=="PDAN":
288 |             print("you are processing PDAN_original")
289 |             from PDAN import PDAN
290 |             # rgb_model = Net(mid_channel, input_channnel, classes)
291 |             stage=1
292 |             block=5
293 |             num_channel=512
294 |             input_channnel=1024
295 |             num_classes=classes
296 |             rgb_model = PDAN(stage, block, num_channel, input_channnel, num_classes)
297 |             pytorch_total_params = sum(p.numel() for p in rgb_model.parameters() if p.requires_grad)
298 |             print('pytorch_total_params', pytorch_total_params)
299 |             #exit()
300 |             print ('stage:', stage, 'block:', block, 'num_channel:', num_channel, 'input_channnel:', input_channnel,
301 |                    'num_classes:', num_classes)
302 | 
303 | 
304 |         rgb_model=torch.nn.DataParallel(rgb_model)
305 | 
306 |         if args.load_model!= "False":
307 |             rgb_model.load_state_dict(torch.load(str(args.load_model)))
308 |             print("loaded",args.load_model)
309 | 
310 |         pytorch_total_params = sum(p.numel() for p in rgb_model.parameters() if p.requires_grad)
311 |         print('pytorch_total_params', pytorch_total_params)
312 |         print('num_channel:', num_channel, 'input_channnel:', input_channnel,'num_classes:', num_classes)
313 |         rgb_model.cuda()
314 | 
315 |         criterion = nn.NLLLoss(reduce=False)
316 |         lr = float(args.lr)
317 |         print(lr)
318 |         optimizer = optim.Adam(rgb_model.parameters(), lr=lr)
319 |         lr_sched = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=8, verbose=True)
320 |         run([(rgb_model, 0, dataloaders, optimizer, lr_sched, args.comp_info)], criterion, num_epochs=int(args.epoch))
321 | 
322 | 
323 | 


--------------------------------------------------------------------------------