├── .gitignore ├── README.md ├── __init__.py ├── demo.py ├── images ├── frankfurt_000000_014480_leftImg8bit.png ├── frankfurt_000000_019607_leftImg8bit.png ├── frankfurt_000000_020321_leftImg8bit.png └── frankfurt_000001_009504_leftImg8bit.png └── peddla.py /.gitignore: -------------------------------------------------------------------------------- 1 | DCNv2 2 | *.pth 3 | *.txt 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # APD 2 | [Attribute-aware Pedestrian Detection in a Crowd](https://arxiv.org/pdf/1910.09188.pdf) 3 | 4 | ## Installation 5 | 6 | To run the demo, the following requirements are needed. 7 | ``` 8 | numpy 9 | matplotlib 10 | torch >= 0.4.1 11 | glob 12 | argparse 13 | [DCNv2](https://github.com/CharlesShang/DCNv2/tree/pytorch_0.4) 14 | ``` 15 | 16 | ## Model 17 | [final.pth](https://drive.google.com/file/d/1CqLsFCLzWaDPojwPlbepeqUhEP7n9nS0/view?usp=sharing) is a model trained on [cityperson dataset](https://bitbucket.org/shanshanzhang/citypersons/src/default/). 18 | 19 | ## Demo 20 | The demo code and the trained is only for cityperson dataset. 21 | ``` 22 | python demo.py --img_list 'images/*.pth' 23 | ``` 24 | 25 | 26 | ## Citation 27 | 28 | If you find this project useful for your research, please use the following BibTeX entry. 29 | 30 | @article{DBLP:journals/corr/abs-1910-09188, 31 | author = {Jialiang Zhang and 32 | Lixiang Lin and 33 | Yun{-}chen Chen and 34 | Yao Hu and 35 | Steven C. H. Hoi and 36 | Jianke Zhu}, 37 | title = {{CSID:} Center, Scale, Identity and Density-aware Pedestrian Detection 38 | in a Crowd}, 39 | journal = {CoRR}, 40 | volume = {abs/1910.09188}, 41 | year = {2019}, 42 | url = {http://arxiv.org/abs/1910.09188}, 43 | archivePrefix = {arXiv}, 44 | eprint = {1910.09188}, 45 | timestamp = {Tue, 22 Oct 2019 18:17:16 +0200}, 46 | biburl = {https://dblp.org/rec/bib/journals/corr/abs-1910-09188}, 47 | bibsource = {dblp computer science bibliography, https://dblp.org} 48 | } 49 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kalyo-zjl/APD/3c182f34abc80bf2d037fa51484d2611b1f26349/__init__.py -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import glob 3 | from peddla import peddla_net 4 | import matplotlib.pyplot as plt 5 | import matplotlib.patches as patches 6 | import numpy as np 7 | import torch 8 | 9 | def parse_args(): 10 | 11 | parser = argparse.ArgumentParser(description='Train SiamAF') 12 | parser.add_argument('--img_list', type=str, default='files of image list') 13 | 14 | args = parser.parse_args() 15 | return args 16 | 17 | def preprocess(image, mean, std): 18 | img = (image - mean) / std 19 | return torch.from_numpy(img.transpose(2, 0, 1)[np.newaxis, ...]) 20 | 21 | def parse_det(hm, wh, reg, density=None, diversity=None, score=0.1,down=4): 22 | # hm = _nms(hm, kernel=2) 23 | seman = hm[0, 0].cpu().numpy() 24 | height = wh[0, 0].cpu().numpy() 25 | offset_y = reg[0, 0, :, :].cpu().numpy() 26 | offset_x = reg[0, 1, :, :].cpu().numpy() 27 | density = density[0, 0].cpu().numpy() 28 | diversity = diversity[0].cpu().numpy() 29 | y_c, x_c = np.where(seman > score) 30 | maxh = int(down * seman.shape[0]) 31 | maxw = int(down * seman.shape[1]) 32 | boxs = [] 33 | dens = [] 34 | divers = [] 35 | if len(y_c) > 0: 36 | for i in range(len(y_c)): 37 | h = np.exp(height[y_c[i], x_c[i]]) * down 38 | w = 0.41 * h 39 | o_y = offset_y[y_c[i], x_c[i]] 40 | o_x = offset_x[y_c[i], x_c[i]] 41 | s = seman[y_c[i], x_c[i]] 42 | x1, y1 = max(0, (x_c[i] + o_x) * down - w / 2), max(0, (y_c[i] + o_y) * down - h / 2) 43 | boxs.append([x1, y1, min(x1 + w, maxw), min(y1 + h, maxh), s]) 44 | dens.append(density[y_c[i], x_c[i]]) 45 | divers.append(diversity[:, y_c[i], x_c[i]]) 46 | boxs = np.asarray(boxs, dtype=np.float32) 47 | dens = np.asarray(dens, dtype=np.float32) 48 | divers = np.asarray(divers, dtype=np.float32) 49 | keep = a_nms(boxs, 0.5, dens, divers) 50 | boxs = boxs[keep, :] 51 | else: 52 | boxs = np.asarray(boxs, dtype=np.float32) 53 | return boxs 54 | 55 | def a_nms(dets, thresh, density, diversity): 56 | x1 = dets[:, 0] 57 | y1 = dets[:, 1] 58 | x2 = dets[:, 2] 59 | y2 = dets[:, 3] 60 | scores = dets[:, 4] 61 | 62 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 63 | order = scores.argsort()[::-1] 64 | 65 | keep = [] 66 | while order.size > 0: 67 | i = order[0] 68 | keep.append(i) 69 | xx1 = np.maximum(x1[i], x1[order[1:]]) 70 | yy1 = np.maximum(y1[i], y1[order[1:]]) 71 | xx2 = np.minimum(x2[i], x2[order[1:]]) 72 | yy2 = np.minimum(y2[i], y2[order[1:]]) 73 | 74 | w = np.maximum(0.0, xx2 - xx1 + 1) 75 | h = np.maximum(0.0, yy2 - yy1 + 1) 76 | inter = w * h 77 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 78 | 79 | thresh_update = min(max(thresh, density[i]), 0.75) 80 | 81 | temp_tag = diversity[i] 82 | temp_tags = diversity[order[1:]] 83 | diff = np.sqrt(np.power((temp_tag - temp_tags), 2).sum(1)) 84 | Flag_4 = diff > 0.95 85 | 86 | thresh_ = np.ones_like(ovr) * 0.5 87 | thresh_[Flag_4] = thresh_update 88 | inds = np.where(ovr <= thresh_)[0] 89 | order = order[inds + 1] 90 | 91 | return keep 92 | 93 | 94 | def load_model(model, model_path): 95 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) 96 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch'])) 97 | state_dict_ = checkpoint['state_dict'] 98 | state_dict = {} 99 | 100 | # convert data_parallal to model 101 | for k in state_dict_: 102 | if k.startswith('module') and not k.startswith('module_list'): 103 | state_dict[k[7:]] = state_dict_[k] 104 | else: 105 | state_dict[k] = state_dict_[k] 106 | model_state_dict = model.state_dict() 107 | 108 | # check loaded parameters and created model parameters 109 | for k in state_dict: 110 | if k in model_state_dict: 111 | if state_dict[k].shape != model_state_dict[k].shape: 112 | print('Skip loading parameter {}, required shape{}, ' \ 113 | 'loaded shape{}.'.format( 114 | k, model_state_dict[k].shape, state_dict[k].shape)) 115 | state_dict[k] = model_state_dict[k] 116 | else: 117 | print('Drop parameter {}.'.format(k)) 118 | for k in model_state_dict: 119 | if not (k in state_dict): 120 | print('No param {}.'.format(k)) 121 | state_dict[k] = model_state_dict[k] 122 | model.load_state_dict(state_dict, strict=False) 123 | 124 | return model 125 | 126 | def main(): 127 | # BGR 128 | mean = np.array([0.485, 0.456, 0.406], 129 | dtype=np.float32).reshape(1, 1, 3) 130 | std = np.array([0.229, 0.224, 0.225], 131 | dtype=np.float32).reshape(1, 1, 3) 132 | 133 | args = parse_args() 134 | num_layers = 34 135 | heads = {'hm': 1, 'wh': 1, 'reg': 2, 'aed': 4} 136 | model = peddla_net(num_layers, heads, head_conv=256, down_ratio=4).cuda().eval() 137 | 138 | # load model 139 | model = load_model(model, 'final.pth') 140 | # torch.cuda.empty_cache() 141 | 142 | file_lists = sorted(glob.glob(args.img_list)) 143 | for file in file_lists: 144 | torch.cuda.synchronize() 145 | img = plt.imread(file).astype(np.float32) 146 | img_pre = preprocess(img[:, :, ::-1], mean, std) 147 | img_pre = img_pre.cuda() 148 | 149 | with torch.no_grad(): 150 | output = model(img_pre)[-1] 151 | output['hm'].sigmoid_() 152 | hm, wh, reg, attr = output['hm'], output['wh'], output['reg'], output['aed'] 153 | 154 | density = attr.pow(2).sum(dim=1, keepdim=True).sqrt() 155 | diversity = torch.div(attr, density) 156 | boxes = parse_det(hm, wh, reg, density=density, diversity=diversity, score=0.5, down=4) 157 | 158 | if len(boxes) > 0: 159 | boxes[:, [2, 3]] -= boxes[:, [0, 1]] 160 | 161 | fig, ax = plt.subplots(1) 162 | ax.imshow(img) 163 | for i in range(len(boxes)): 164 | x, y, w, h, score = boxes[i] 165 | rect = patches.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none') 166 | ax.add_patch(rect) 167 | else: 168 | plt.imshow(img) 169 | plt.show() 170 | 171 | if __name__ == "__main__": 172 | main() -------------------------------------------------------------------------------- /images/frankfurt_000000_014480_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kalyo-zjl/APD/3c182f34abc80bf2d037fa51484d2611b1f26349/images/frankfurt_000000_014480_leftImg8bit.png -------------------------------------------------------------------------------- /images/frankfurt_000000_019607_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kalyo-zjl/APD/3c182f34abc80bf2d037fa51484d2611b1f26349/images/frankfurt_000000_019607_leftImg8bit.png -------------------------------------------------------------------------------- /images/frankfurt_000000_020321_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kalyo-zjl/APD/3c182f34abc80bf2d037fa51484d2611b1f26349/images/frankfurt_000000_020321_leftImg8bit.png -------------------------------------------------------------------------------- /images/frankfurt_000001_009504_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kalyo-zjl/APD/3c182f34abc80bf2d037fa51484d2611b1f26349/images/frankfurt_000001_009504_leftImg8bit.png -------------------------------------------------------------------------------- /peddla.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import math 7 | import logging 8 | import numpy as np 9 | from os.path import join 10 | 11 | import torch 12 | from torch import nn 13 | import torch.nn.functional as F 14 | import torch.utils.model_zoo as model_zoo 15 | 16 | from DCNv2.dcn_v2 import DCN 17 | 18 | BN_MOMENTUM = 0.1 19 | Track = True 20 | logger = logging.getLogger(__name__) 21 | 22 | def get_model_url(data='imagenet', name='dla34', hash='ba72cf86'): 23 | return join('http://dl.yf.io/dla/models', data, '{}-{}.pth'.format(name, hash)) 24 | 25 | 26 | def conv3x3(in_planes, out_planes, stride=1): 27 | "3x3 convolution with padding" 28 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 29 | padding=1, bias=False) 30 | 31 | 32 | class BasicBlock(nn.Module): 33 | def __init__(self, inplanes, planes, stride=1, dilation=1): 34 | super(BasicBlock, self).__init__() 35 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, 36 | stride=stride, padding=dilation, 37 | bias=False, dilation=dilation) 38 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM, track_running_stats=Track) 39 | self.relu = nn.ReLU(inplace=True) 40 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, 41 | stride=1, padding=dilation, 42 | bias=False, dilation=dilation) 43 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM, track_running_stats=Track) 44 | self.stride = stride 45 | 46 | def forward(self, x, residual=None): 47 | if residual is None: 48 | residual = x 49 | 50 | out = self.conv1(x) 51 | out = self.bn1(out) 52 | out = self.relu(out) 53 | 54 | out = self.conv2(out) 55 | out = self.bn2(out) 56 | 57 | out += residual 58 | out = self.relu(out) 59 | 60 | return out 61 | 62 | 63 | class Bottleneck(nn.Module): 64 | expansion = 2 65 | 66 | def __init__(self, inplanes, planes, stride=1, dilation=1): 67 | super(Bottleneck, self).__init__() 68 | expansion = Bottleneck.expansion 69 | bottle_planes = planes // expansion 70 | self.conv1 = nn.Conv2d(inplanes, bottle_planes, 71 | kernel_size=1, bias=False) 72 | self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM, track_running_stats=Track) 73 | self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3, 74 | stride=stride, padding=dilation, 75 | bias=False, dilation=dilation) 76 | self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM, track_running_stats=Track) 77 | self.conv3 = nn.Conv2d(bottle_planes, planes, 78 | kernel_size=1, bias=False) 79 | self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM, track_running_stats=Track) 80 | self.relu = nn.ReLU(inplace=True) 81 | self.stride = stride 82 | 83 | def forward(self, x, residual=None): 84 | if residual is None: 85 | residual = x 86 | 87 | out = self.conv1(x) 88 | out = self.bn1(out) 89 | out = self.relu(out) 90 | 91 | out = self.conv2(out) 92 | out = self.bn2(out) 93 | out = self.relu(out) 94 | 95 | out = self.conv3(out) 96 | out = self.bn3(out) 97 | 98 | out += residual 99 | out = self.relu(out) 100 | 101 | return out 102 | 103 | 104 | class BottleneckX(nn.Module): 105 | expansion = 2 106 | cardinality = 32 107 | 108 | def __init__(self, inplanes, planes, stride=1, dilation=1): 109 | super(BottleneckX, self).__init__() 110 | cardinality = BottleneckX.cardinality 111 | # dim = int(math.floor(planes * (BottleneckV5.expansion / 64.0))) 112 | # bottle_planes = dim * cardinality 113 | bottle_planes = planes * cardinality // 32 114 | self.conv1 = nn.Conv2d(inplanes, bottle_planes, 115 | kernel_size=1, bias=False) 116 | self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM, track_running_stats=Track) 117 | self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3, 118 | stride=stride, padding=dilation, bias=False, 119 | dilation=dilation, groups=cardinality) 120 | self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM, track_running_stats=Track) 121 | self.conv3 = nn.Conv2d(bottle_planes, planes, 122 | kernel_size=1, bias=False) 123 | self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM, track_running_stats=Track) 124 | self.relu = nn.ReLU(inplace=True) 125 | self.stride = stride 126 | 127 | def forward(self, x, residual=None): 128 | if residual is None: 129 | residual = x 130 | 131 | out = self.conv1(x) 132 | out = self.bn1(out) 133 | out = self.relu(out) 134 | 135 | out = self.conv2(out) 136 | out = self.bn2(out) 137 | out = self.relu(out) 138 | 139 | out = self.conv3(out) 140 | out = self.bn3(out) 141 | 142 | out += residual 143 | out = self.relu(out) 144 | 145 | return out 146 | 147 | 148 | class Root(nn.Module): 149 | def __init__(self, in_channels, out_channels, kernel_size, residual): 150 | super(Root, self).__init__() 151 | self.conv = nn.Conv2d( 152 | in_channels, out_channels, 1, 153 | stride=1, bias=False, padding=(kernel_size - 1) // 2) 154 | self.bn = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM, track_running_stats=Track) 155 | self.relu = nn.ReLU(inplace=True) 156 | self.residual = residual 157 | 158 | def forward(self, *x): 159 | children = x 160 | x = self.conv(torch.cat(x, 1)) 161 | x = self.bn(x) 162 | if self.residual: 163 | x += children[0] 164 | x = self.relu(x) 165 | 166 | return x 167 | 168 | 169 | class Tree(nn.Module): 170 | def __init__(self, levels, block, in_channels, out_channels, stride=1, 171 | level_root=False, root_dim=0, root_kernel_size=1, 172 | dilation=1, root_residual=False): 173 | super(Tree, self).__init__() 174 | if root_dim == 0: 175 | root_dim = 2 * out_channels 176 | if level_root: 177 | root_dim += in_channels 178 | if levels == 1: 179 | self.tree1 = block(in_channels, out_channels, stride, 180 | dilation=dilation) 181 | self.tree2 = block(out_channels, out_channels, 1, 182 | dilation=dilation) 183 | else: 184 | self.tree1 = Tree(levels - 1, block, in_channels, out_channels, 185 | stride, root_dim=0, 186 | root_kernel_size=root_kernel_size, 187 | dilation=dilation, root_residual=root_residual) 188 | self.tree2 = Tree(levels - 1, block, out_channels, out_channels, 189 | root_dim=root_dim + out_channels, 190 | root_kernel_size=root_kernel_size, 191 | dilation=dilation, root_residual=root_residual) 192 | if levels == 1: 193 | self.root = Root(root_dim, out_channels, root_kernel_size, 194 | root_residual) 195 | self.level_root = level_root 196 | self.root_dim = root_dim 197 | self.downsample = None 198 | self.project = None 199 | self.levels = levels 200 | if stride > 1: 201 | self.downsample = nn.MaxPool2d(stride, stride=stride) 202 | if in_channels != out_channels: 203 | self.project = nn.Sequential( 204 | nn.Conv2d(in_channels, out_channels, 205 | kernel_size=1, stride=1, bias=False), 206 | nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM, track_running_stats=Track) 207 | ) 208 | 209 | def forward(self, x, residual=None, children=None): 210 | children = [] if children is None else children 211 | bottom = self.downsample(x) if self.downsample else x 212 | residual = self.project(bottom) if self.project else bottom 213 | if self.level_root: 214 | children.append(bottom) 215 | x1 = self.tree1(x, residual) 216 | if self.levels == 1: 217 | x2 = self.tree2(x1) 218 | x = self.root(x2, x1, *children) 219 | else: 220 | children.append(x1) 221 | x = self.tree2(x1, children=children) 222 | return x 223 | 224 | 225 | class DLA(nn.Module): 226 | def __init__(self, levels, channels, num_classes=1000, 227 | block=BasicBlock, residual_root=False, linear_root=False): 228 | super(DLA, self).__init__() 229 | self.channels = channels 230 | self.num_classes = num_classes 231 | self.base_layer = nn.Sequential( 232 | nn.Conv2d(3, channels[0], kernel_size=7, stride=1, 233 | padding=3, bias=False), 234 | nn.BatchNorm2d(channels[0], momentum=BN_MOMENTUM, track_running_stats=Track), 235 | nn.ReLU(inplace=True)) 236 | self.level0 = self._make_conv_level( 237 | channels[0], channels[0], levels[0]) 238 | self.level1 = self._make_conv_level( 239 | channels[0], channels[1], levels[1], stride=2) 240 | self.level2 = Tree(levels[2], block, channels[1], channels[2], 2, 241 | level_root=False, 242 | root_residual=residual_root) 243 | self.level3 = Tree(levels[3], block, channels[2], channels[3], 2, 244 | level_root=True, root_residual=residual_root) 245 | self.level4 = Tree(levels[4], block, channels[3], channels[4], 2, 246 | level_root=True, root_residual=residual_root) 247 | # modify here 248 | self.level5 = Tree(levels[5], block, channels[4], channels[5], 1, dilation=2, 249 | level_root=True, root_residual=residual_root) 250 | # self.level5 = Tree(levels[5], block, channels[4], channels[5], 2, 251 | # level_root=True, root_residual=residual_root) 252 | 253 | # for m in self.modules(): 254 | # if isinstance(m, nn.Conv2d): 255 | # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 256 | # m.weight.data.normal_(0, math.sqrt(2. / n)) 257 | # elif isinstance(m, nn.BatchNorm2d): 258 | # m.weight.data.fill_(1) 259 | # m.bias.data.zero_() 260 | 261 | def _make_level(self, block, inplanes, planes, blocks, stride=1): 262 | downsample = None 263 | if stride != 1 or inplanes != planes: 264 | downsample = nn.Sequential( 265 | nn.MaxPool2d(stride, stride=stride), 266 | nn.Conv2d(inplanes, planes, 267 | kernel_size=1, stride=1, bias=False), 268 | nn.BatchNorm2d(planes, momentum=BN_MOMENTUM, track_running_stats=Track), 269 | ) 270 | 271 | layers = [] 272 | layers.append(block(inplanes, planes, stride, downsample=downsample)) 273 | for i in range(1, blocks): 274 | layers.append(block(inplanes, planes)) 275 | 276 | return nn.Sequential(*layers) 277 | 278 | def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1): 279 | modules = [] 280 | for i in range(convs): 281 | modules.extend([ 282 | nn.Conv2d(inplanes, planes, kernel_size=3, 283 | stride=stride if i == 0 else 1, 284 | padding=dilation, bias=False, dilation=dilation), 285 | nn.BatchNorm2d(planes, momentum=BN_MOMENTUM, track_running_stats=Track), 286 | nn.ReLU(inplace=True)]) 287 | inplanes = planes 288 | return nn.Sequential(*modules) 289 | 290 | def forward(self, x): 291 | y = [] 292 | x = self.base_layer(x) 293 | for i in range(6): 294 | x = getattr(self, 'level{}'.format(i))(x) 295 | y.append(x) 296 | # x = self.level0(x) 297 | # y.append(x) 298 | # x = self.level1(x) 299 | # y.append(x) 300 | # x = self.level2(x) 301 | # y.append(x) 302 | # x = self.level3(x) 303 | # y.append(x) 304 | # x = self.level4(x) 305 | # y.append(x) 306 | # x = self.level5(x) 307 | # y.append(x) 308 | 309 | return y 310 | 311 | def load_pretrained_model(self, data='imagenet', name='dla34', hash='ba72cf86'): 312 | # fc = self.fc 313 | if name.endswith('.pth'): 314 | model_weights = torch.load(data + name) 315 | else: 316 | model_url = get_model_url(data, name, hash) 317 | model_weights = model_zoo.load_url(model_url) 318 | num_classes = len(model_weights[list(model_weights.keys())[-1]]) 319 | self.fc = nn.Conv2d( 320 | self.channels[-1], num_classes, 321 | kernel_size=1, stride=1, padding=0, bias=True) 322 | self.load_state_dict(model_weights, strict=False) 323 | # self.fc = fc 324 | 325 | 326 | def dla34(pretrained=True, **kwargs): # DLA-34 327 | model = DLA([1, 1, 1, 2, 2, 1], 328 | [16, 32, 64, 128, 256, 512], 329 | block=BasicBlock, **kwargs) 330 | if pretrained: 331 | model.load_pretrained_model(data='imagenet', name='dla34', hash='ba72cf86') 332 | return model 333 | 334 | class Identity(nn.Module): 335 | 336 | def __init__(self): 337 | super(Identity, self).__init__() 338 | 339 | def forward(self, x): 340 | return x 341 | 342 | 343 | def fill_fc_weights(layers): 344 | for m in layers.modules(): 345 | if isinstance(m, nn.Conv2d): 346 | if m.bias is not None: 347 | nn.init.constant_(m.bias, 0) 348 | 349 | 350 | def fill_up_weights(up): 351 | w = up.weight.data 352 | f = math.ceil(w.size(2) / 2) 353 | c = (2 * f - 1 - f % 2) / (2. * f) 354 | for i in range(w.size(2)): 355 | for j in range(w.size(3)): 356 | w[0, 0, i, j] = \ 357 | (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c)) 358 | for c in range(1, w.size(0)): 359 | w[c, 0, :, :] = w[0, 0, :, :] 360 | 361 | 362 | class DeformConv(nn.Module): 363 | def __init__(self, chi, cho): 364 | super(DeformConv, self).__init__() 365 | self.actf = nn.Sequential( 366 | nn.BatchNorm2d(cho, momentum=BN_MOMENTUM, track_running_stats=Track), 367 | nn.ReLU(inplace=True) 368 | ) 369 | self.conv = DCN(chi, cho, kernel_size=(3,3), stride=1, padding=1, dilation=1, deformable_groups=1) 370 | 371 | def forward(self, x): 372 | x = self.conv(x) 373 | x = self.actf(x) 374 | return x 375 | 376 | 377 | class IDAUp(nn.Module): 378 | 379 | def __init__(self, o, channels, up_f): 380 | super(IDAUp, self).__init__() 381 | for i in range(1, len(channels)): 382 | c = channels[i] 383 | f = int(up_f[i]) 384 | proj = DeformConv(c, o) 385 | node = DeformConv(o, o) 386 | if len(up_f) == 2 and up_f[0] == up_f[1]: 387 | up = nn.Conv2d(o, o, kernel_size=1, stride=f, bias=False) 388 | else: 389 | up = nn.ConvTranspose2d(o, o, f * 2, stride=f, 390 | padding=f // 2, output_padding=0, 391 | groups=o, bias=False) 392 | fill_up_weights(up) 393 | 394 | setattr(self, 'proj_' + str(i), proj) 395 | setattr(self, 'up_' + str(i), up) 396 | setattr(self, 'node_' + str(i), node) 397 | 398 | 399 | def forward(self, layers, startp, endp): 400 | for i in range(startp + 1, endp): 401 | upsample = getattr(self, 'up_' + str(i - startp)) 402 | project = getattr(self, 'proj_' + str(i - startp)) 403 | layers[i] = upsample(project(layers[i])) 404 | node = getattr(self, 'node_' + str(i - startp)) 405 | layers[i] = node(layers[i] + layers[i - 1]) 406 | 407 | 408 | 409 | class DLAUp(nn.Module): 410 | def __init__(self, startp, channels, scales, in_channels=None): 411 | super(DLAUp, self).__init__() 412 | self.startp = startp 413 | if in_channels is None: 414 | in_channels = channels 415 | self.channels = channels 416 | channels = list(channels) 417 | scales = np.array(scales, dtype=int) 418 | for i in range(len(channels) - 1): 419 | j = -i - 2 420 | setattr(self, 'ida_{}'.format(i), 421 | IDAUp(channels[j], in_channels[j:], 422 | scales[j:] // scales[j])) 423 | scales[j + 1:] = scales[j] 424 | in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]] 425 | 426 | def forward(self, layers): 427 | out = [layers[-1]] # start with 32 428 | for i in range(len(layers) - self.startp - 1): 429 | ida = getattr(self, 'ida_{}'.format(i)) 430 | ida(layers, len(layers) -i - 2, len(layers)) 431 | out.insert(0, layers[-1]) 432 | return out 433 | 434 | 435 | class Interpolate(nn.Module): 436 | def __init__(self, scale, mode): 437 | super(Interpolate, self).__init__() 438 | self.scale = scale 439 | self.mode = mode 440 | 441 | def forward(self, x): 442 | x = F.interpolate(x, scale_factor=self.scale, mode=self.mode, align_corners=False) 443 | return x 444 | 445 | 446 | class DLASeg(nn.Module): 447 | def __init__(self, base_name, heads, pretrained, down_ratio, final_kernel, 448 | last_level, head_conv, out_channel=0): 449 | super(DLASeg, self).__init__() 450 | assert down_ratio in [2, 4, 8, 16] 451 | self.first_level = int(np.log2(down_ratio)) 452 | self.last_level = last_level 453 | self.base = globals()[base_name](pretrained=pretrained) 454 | channels = self.base.channels 455 | scales = [2 ** i for i in range(len(channels[self.first_level:]))] 456 | # modify 457 | scales[-1] = scales[-2] 458 | self.dla_up = DLAUp(self.first_level, channels[self.first_level:], scales) 459 | 460 | if out_channel == 0: 461 | out_channel = channels[self.first_level] 462 | 463 | self.ida_up = IDAUp(out_channel, channels[self.first_level:self.last_level], 464 | [2 ** i for i in range(self.last_level - self.first_level)]) 465 | 466 | 467 | self.heads = heads 468 | for head in self.heads: 469 | classes = self.heads[head] 470 | fc = nn.Sequential( 471 | nn.Conv2d(channels[self.first_level], head_conv, 472 | kernel_size=3, padding=1, bias=True), 473 | nn.ReLU(inplace=True), 474 | nn.Conv2d(head_conv, classes, 475 | kernel_size=final_kernel, stride=1, 476 | padding=final_kernel // 2, bias=True)) 477 | self.__setattr__(head, fc) 478 | 479 | def forward(self, x): 480 | x = self.base(x) 481 | x = self.dla_up(x) 482 | 483 | y = [] 484 | for i in range(self.last_level - self.first_level): 485 | y.append(x[i].clone()) 486 | self.ida_up(y, 0, len(y)) 487 | 488 | z = {} 489 | for head in self.heads: 490 | z[head] = self.__getattr__(head)(y[-1]) 491 | return [z] 492 | 493 | 494 | def peddla_net(num_layers, heads, head_conv=256, down_ratio=4): 495 | model = DLASeg('dla{}'.format(num_layers), heads, 496 | pretrained=True, 497 | down_ratio=down_ratio, 498 | final_kernel=1, 499 | last_level=5, 500 | head_conv=head_conv) 501 | return model 502 | 503 | class convolution(nn.Module): 504 | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True): 505 | super(convolution, self).__init__() 506 | 507 | pad = (k - 1) // 2 508 | self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn) 509 | self.bn = nn.BatchNorm2d(out_dim, track_running_stats=Track) if with_bn else nn.Sequential() 510 | self.relu = nn.ReLU(inplace=True) 511 | 512 | def forward(self, x): 513 | conv = self.conv(x) 514 | bn = self.bn(conv) 515 | relu = self.relu(bn) 516 | return relu 517 | --------------------------------------------------------------------------------