├── out_models └── readme.md ├── PytorchCudaOpExtension ├── adaptive_sigmoid │ ├── adaptive_sigmoid.egg-info │ │ ├── dependency_links.txt │ │ ├── top_level.txt │ │ ├── PKG-INFO │ │ └── SOURCES.txt │ ├── dist │ │ └── adaptive_sigmoid-0.0.0-py3.7-linux-x86_64.egg │ ├── build │ │ └── lib.linux-x86_64-3.7 │ │ │ └── adaptive_sigmoid_gpu.cpython-37m-x86_64-linux-gnu.so │ ├── setup.py │ ├── adaptive_sigmoid.h │ ├── adaptive_sigmoid_wrapper.py │ ├── adaptive_sigmoid.cpp │ └── adaptive_sigmoid_cuda.cu └── perspective_aware_conv2d │ ├── pad_conv2d.egg-info │ ├── dependency_links.txt │ ├── top_level.txt │ ├── SOURCES.txt │ └── PKG-INFO │ ├── dist │ └── pad_conv2d-0.0.0-py3.7-linux-x86_64.egg │ ├── build │ └── lib.linux-x86_64-3.7 │ │ └── pad_conv2d_gpu.cpython-37m-x86_64-linux-gnu.so │ ├── setup.py │ ├── pad_conv2d.h │ ├── pad_conv2d_wrapper.py │ ├── pad_conv2d.cpp │ └── pad_conv2d_cuda.cu ├── data └── readme.md ├── download_models.sh ├── SHA_test.sh ├── options ├── train_options.py ├── test_options.py └── base_options.py ├── README.md ├── net ├── BasicConv2d.py ├── networks.py ├── CSRNet.py ├── CSRPersNet_onlyBack_crop.py ├── CSRPersNet.py └── CSRPersNet_crop.py ├── LICENSE ├── metrics.py ├── op_wrapper ├── adaptive_sigmoid_wrapper.py └── pad_conv2d_wrapper.py ├── config.py ├── test.py ├── utils.py ├── eval └── Estimator.py ├── Dataset └── DatasetConstructor.py └── generate_map.py /out_models/readme.md: -------------------------------------------------------------------------------- 1 | Put models here 2 | -------------------------------------------------------------------------------- /PytorchCudaOpExtension/adaptive_sigmoid/adaptive_sigmoid.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /PytorchCudaOpExtension/perspective_aware_conv2d/pad_conv2d.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /PytorchCudaOpExtension/perspective_aware_conv2d/pad_conv2d.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | pad_conv2d_gpu 2 | -------------------------------------------------------------------------------- /PytorchCudaOpExtension/adaptive_sigmoid/adaptive_sigmoid.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | adaptive_sigmoid_gpu 2 | -------------------------------------------------------------------------------- /data/readme.md: -------------------------------------------------------------------------------- 1 | Put data here, refer to `config.py` to make sure that data files are correctly arranged. 2 | -------------------------------------------------------------------------------- /download_models.sh: -------------------------------------------------------------------------------- 1 | wget -c https://drive.google.com/file/d/1GR0gmoJvNlv5a8o0D9ucfraiNYrQV8Ip/view?usp=sharing 2 | -------------------------------------------------------------------------------- /SHA_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python test.py --dataset_name='SHA' --mode='crop' --nThreads=1 --gpu_ids='0' --batch_size=1 --net_name='csrpersp_crop' --test_model_name='model_path' 3 | -------------------------------------------------------------------------------- /PytorchCudaOpExtension/adaptive_sigmoid/dist/adaptive_sigmoid-0.0.0-py3.7-linux-x86_64.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhaoyi-Yan/PFDNet/HEAD/PytorchCudaOpExtension/adaptive_sigmoid/dist/adaptive_sigmoid-0.0.0-py3.7-linux-x86_64.egg -------------------------------------------------------------------------------- /PytorchCudaOpExtension/perspective_aware_conv2d/dist/pad_conv2d-0.0.0-py3.7-linux-x86_64.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhaoyi-Yan/PFDNet/HEAD/PytorchCudaOpExtension/perspective_aware_conv2d/dist/pad_conv2d-0.0.0-py3.7-linux-x86_64.egg -------------------------------------------------------------------------------- /options/train_options.py: -------------------------------------------------------------------------------- 1 | from .base_options import BaseOptions 2 | 3 | class TrainOptions(BaseOptions): 4 | def initialize(self, parser): 5 | parser = BaseOptions.initialize(self, parser) 6 | 7 | self.isTrain = True 8 | return parser -------------------------------------------------------------------------------- /PytorchCudaOpExtension/perspective_aware_conv2d/pad_conv2d.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | pad_conv2d.cpp 2 | pad_conv2d_cuda.cu 3 | setup.py 4 | pad_conv2d.egg-info/PKG-INFO 5 | pad_conv2d.egg-info/SOURCES.txt 6 | pad_conv2d.egg-info/dependency_links.txt 7 | pad_conv2d.egg-info/top_level.txt -------------------------------------------------------------------------------- /PytorchCudaOpExtension/adaptive_sigmoid/build/lib.linux-x86_64-3.7/adaptive_sigmoid_gpu.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhaoyi-Yan/PFDNet/HEAD/PytorchCudaOpExtension/adaptive_sigmoid/build/lib.linux-x86_64-3.7/adaptive_sigmoid_gpu.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /PytorchCudaOpExtension/perspective_aware_conv2d/build/lib.linux-x86_64-3.7/pad_conv2d_gpu.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zhaoyi-Yan/PFDNet/HEAD/PytorchCudaOpExtension/perspective_aware_conv2d/build/lib.linux-x86_64-3.7/pad_conv2d_gpu.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /PytorchCudaOpExtension/perspective_aware_conv2d/pad_conv2d.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: pad-conv2d 3 | Version: 0.0.0 4 | Summary: UNKNOWN 5 | Home-page: UNKNOWN 6 | Author: UNKNOWN 7 | Author-email: UNKNOWN 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | -------------------------------------------------------------------------------- /PytorchCudaOpExtension/adaptive_sigmoid/adaptive_sigmoid.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: adaptive-sigmoid 3 | Version: 0.0.0 4 | Summary: UNKNOWN 5 | Home-page: UNKNOWN 6 | Author: UNKNOWN 7 | Author-email: UNKNOWN 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | -------------------------------------------------------------------------------- /PytorchCudaOpExtension/adaptive_sigmoid/adaptive_sigmoid.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | adaptive_sigmoid.cpp 2 | adaptive_sigmoid_cuda.cu 3 | setup.py 4 | adaptive_sigmoid.egg-info/PKG-INFO 5 | adaptive_sigmoid.egg-info/SOURCES.txt 6 | adaptive_sigmoid.egg-info/dependency_links.txt 7 | adaptive_sigmoid.egg-info/top_level.txt -------------------------------------------------------------------------------- /PytorchCudaOpExtension/adaptive_sigmoid/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import CppExtension, BuildExtension, CUDAExtension 3 | setup(name='adaptive_sigmoid', ext_modules=[CUDAExtension('adaptive_sigmoid_gpu',['adaptive_sigmoid.cpp', 'adaptive_sigmoid_cuda.cu']),], cmdclass={'build_ext': BuildExtension}) -------------------------------------------------------------------------------- /PytorchCudaOpExtension/perspective_aware_conv2d/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import CppExtension, BuildExtension, CUDAExtension 3 | setup(name='pad_conv2d', 4 | ext_modules=[CUDAExtension('pad_conv2d_gpu', ['pad_conv2d.cpp', 'pad_conv2d_cuda.cu']),], 5 | cmdclass={'build_ext': BuildExtension}) 6 | -------------------------------------------------------------------------------- /options/test_options.py: -------------------------------------------------------------------------------- 1 | 2 | from .base_options import BaseOptions 3 | 4 | 5 | class TestOptions(BaseOptions): 6 | def initialize(self, parser): 7 | parser = BaseOptions.initialize(self, parser) 8 | # TODO: implemented me 9 | parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.') 10 | self.isTrain = False 11 | 12 | return parser -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PFDNet 2 | 3 | # Data preparation: 4 | Download the ShanghaiTech dataset, then you also need to generate density map files via `generate_map.py`. And put them in `./data`, then download perspective map from paper `Revisiting Perspective Information for Efficient Crowd Counting`. 5 | And put the corresonding files inside `./data`. You need to move coresponding files under the guidance of `config.py`. It is mentioned, that you need upgrade to `mat` files of perspective maps to v7.3 yourself. 6 | Then `h5py` can read the mat files correctly. 7 | 8 | # Download model 9 | ``` 10 | bash download_models.sh 11 | ``` 12 | # Test 13 | Pytorch version: 1.0 or 1.1 14 | Install the cuda extension, and `sh SHA_test.sh`. 15 | -------------------------------------------------------------------------------- /net/BasicConv2d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class BasicConv2d(nn.Module): 5 | def __init__(self, 6 | in_channels, 7 | out_channels, 8 | kernel_size, 9 | stride, 10 | pad, 11 | if_Bn=False, 12 | activation=nn.ReLU(inplace=True)): 13 | super(BasicConv2d, self).__init__() 14 | self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=pad) 15 | self.if_Bn = if_Bn 16 | if self.if_Bn: 17 | self.Bn = nn.BatchNorm2d(out_channels) 18 | self.activation = activation 19 | 20 | def forward(self, x): 21 | x = self.conv2d(x) 22 | if self.if_Bn: 23 | x = self.Bn(x) 24 | if not(self.activation == None): 25 | x = self.activation(x) 26 | return x 27 | -------------------------------------------------------------------------------- /net/networks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import init 4 | import functools 5 | from net.CSRPersNet_crop import CSRPersNet_BN 6 | from net.CSRNet import CSRNet 7 | 8 | 9 | def init_net(net, init_type='normal', init_gain=0.01, gpu_ids=[]): 10 | if len(gpu_ids) > 0: 11 | assert(torch.cuda.is_available()) 12 | net.to(gpu_ids[0]) 13 | net = torch.nn.DataParallel(net, gpu_ids) # multi-GPUs 14 | # Has been initlized inside 15 | return net 16 | 17 | def define_net(opt): 18 | net_name = opt.net_name 19 | if net_name == 'csrnet': 20 | net = CSRNet() 21 | elif net_name == 'csrpersp_crop': 22 | net = CSRPersNet_BN(load_path=None, 23 | updates_signal=[True, True, True, True], is_relu=False, 24 | sigma=[opt.alpha, opt.beta, opt.gamma, opt.theta]) 25 | else: 26 | raise NotImplementedError('Unrecognized model: '+net_name) 27 | return net 28 | -------------------------------------------------------------------------------- /PytorchCudaOpExtension/adaptive_sigmoid/adaptive_sigmoid.h: -------------------------------------------------------------------------------- 1 | #ifndef ADAPTIVE_SIGMOID 2 | #define ADAPTIVE_SIGMOID 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | extern THCState *state; 10 | typedef std::vector TShape; 11 | 12 | void adaptive_sigmoid_fucntion( 13 | cudaStream_t stream, 14 | const float* data_in, 15 | const float* params, 16 | float* output, 17 | int channels, int height, int width 18 | ); 19 | 20 | void adaptive_sigmoid_input_grad( 21 | cudaStream_t stream, 22 | const float* data_in, 23 | const float* grad_outputs, 24 | const float* params, 25 | float* grad_input, 26 | int channels, int height, int width 27 | ); 28 | 29 | void adaptive_sigmoid_params_grad( 30 | cudaStream_t stream, 31 | const float* data_in, 32 | const float* grad_outputs, 33 | const float* params, 34 | float* grad_params, 35 | int channels, int height, int width, 36 | bool alpha_update, 37 | bool beta_update, 38 | bool gamma_update, 39 | bool theta_update 40 | ); 41 | 42 | #endif -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Zhaoyi-Yan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PytorchCudaOpExtension/adaptive_sigmoid/adaptive_sigmoid_wrapper.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torch.nn as nn 4 | from torch.autograd import Function 5 | import adaptive_sigmoid_gpu as adaptive_sigmoid 6 | 7 | class AdaptiveSigmoidFunction(Function): 8 | @staticmethod 9 | def forward(ctx, *args): 10 | if len(args) != 2: 11 | print("wrong input parameters number, check the input") 12 | return 13 | input = args[0] 14 | params = args[1] 15 | output = adaptive_sigmoid.forward(input, params) 16 | ctx.save_for_backward(input, params) 17 | return output 18 | 19 | @staticmethod 20 | def backward(ctx, *grad_outputs): 21 | if len(grad_outputs) != 1: 22 | print("Wrong output number, check your output") 23 | return 24 | input, params = ctx.saved_tensors 25 | grad_input, grad_weight= adaptive_sigmoid.backward(input, params, grad_outputs[0]) 26 | return grad_input, grad_weight 27 | 28 | class AdaptiveSigmoid(nn.Module): 29 | def __init__(self, alpha, beta, gamma, theta): 30 | super(AdaptiveSigmoid, self).__init__() 31 | self.params = nn.Parameter(torch.FloatTensor([alpha, beta, gamma, theta])) 32 | # self.params.register_hook(print) 33 | 34 | def forward(self, x): 35 | return AdaptiveSigmoidFunction.apply(x, self.params) -------------------------------------------------------------------------------- /metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import sys 4 | from functools import reduce 5 | 6 | class JointLoss(nn.Module): 7 | def __init__(self, alpha, beta): 8 | super(JointLoss, self).__init__() 9 | self.MSELoss = nn.MSELoss(size_average=False) 10 | self.BCELoss = nn.BCELoss(size_average=True) 11 | self.alpha = alpha 12 | self.beta = beta 13 | 14 | def forward(self, x, gt_map, target_map): 15 | mse = self.MSELoss(x, gt_map) * self.alpha 16 | bce = self.BCELoss(x, target_map) * self.beta 17 | # sys.stdout.write("mse loss = {}, bce loss = {}\r".format(mse, bce)) 18 | sys.stdout.flush() 19 | return mse + bce 20 | 21 | class MSEScalarLoss(nn.Module): 22 | def __init__(self): 23 | super(MSEScalarLoss, self).__init__() 24 | 25 | def forward(self, x, gt_map): 26 | return torch.pow(x.sum() - gt_map.sum(), 2) / (reduce(lambda a,b:a * b, x.shape)) 27 | 28 | class AEBatch(nn.Module): 29 | def __init__(self): 30 | super(AEBatch, self).__init__() 31 | 32 | def forward(self, estimated_density_map, gt_num): 33 | return torch.abs(torch.sum(estimated_density_map, dim=(1, 2, 3)) - gt_num) 34 | 35 | 36 | class SEBatch(nn.Module): 37 | def __init__(self): 38 | super(SEBatch, self).__init__() 39 | 40 | def forward(self, estimated_density_map, gt_num): 41 | return torch.pow(torch.sum(estimated_density_map, dim=(1, 2, 3)) - gt_num, 2) 42 | -------------------------------------------------------------------------------- /op_wrapper/adaptive_sigmoid_wrapper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | import adaptive_sigmoid_gpu as adaptive_sigmoid 5 | 6 | class AdaptiveSigmoidFunction(Function): 7 | @staticmethod 8 | def forward(ctx, *args): 9 | if len(args) != 3: 10 | print("wrong input parameters number, check the input") 11 | return 12 | input = args[0] 13 | params = args[1] 14 | ctx.updates_signal = args[2] 15 | output = adaptive_sigmoid.forward(input, params) 16 | ctx.save_for_backward(input, params) 17 | return output 18 | 19 | @staticmethod 20 | def backward(ctx, *grad_outputs): 21 | if len(grad_outputs) != 1: 22 | print("Wrong output number, check your output") 23 | return 24 | input, params = ctx.saved_tensors 25 | grad_copy = grad_outputs[0].clone() 26 | grad_input, grad_weight= adaptive_sigmoid.backward(input, params, grad_copy, *ctx.updates_signal) 27 | return grad_input, grad_weight, None 28 | 29 | class AdaptiveSigmoid(nn.Module): 30 | def __init__(self, **kwargs): 31 | super(AdaptiveSigmoid, self).__init__() 32 | self.params = nn.Parameter(torch.FloatTensor(kwargs['sigma'])) 33 | self.updates_signal = kwargs['updates_signal'] 34 | # self.params.register_hook(print) 35 | 36 | def forward(self, x): 37 | return AdaptiveSigmoidFunction.apply(x, self.params, self.updates_signal) -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import util.utils as util 4 | 5 | class config(object): 6 | def __init__(self, opt): 7 | self.opt = opt 8 | self.min_mae = 10240000 9 | self.min_loss = 10240000 10 | self.dataset_name = opt.dataset_name 11 | self.batch_size = opt.batch_size 12 | self.device = torch.device('cuda:{}'.format(opt.gpu_ids[0])) if opt.gpu_ids else torch.device('cpu') 13 | self.model_save_path = os.path.join(opt.checkpoints_dir, opt.name, opt.dataset_name) # path of saving model 14 | self.mode = opt.mode 15 | prefix_path = opt.prefix_path # prefix path of training path 16 | if self.dataset_name == "SHA": 17 | self.eval_num = 182 18 | self.train_num = 300 19 | 20 | self.train_gt_map_path = prefix_path + "/part_A_final/train_data/gt_map_sigma=4_k=7" 21 | self.train_img_path = prefix_path + "/part_A_final/train_data/images" 22 | self.train_pers_path = prefix_path + "/part_A_final/train_data/perspective_gt" 23 | self.eval_gt_map_path = prefix_path + "/part_A_final/test_data/gt_map_sigma=4_k=7" 24 | self.eval_img_path = prefix_path + "/part_A_final/test_data/images" 25 | self.eval_gt_path = prefix_path + "/part_A_final/test_data/ground_truth" 26 | self.eval_pers_path = prefix_path + "/part_A_final/test_data/perspective_gt" 27 | 28 | else: 29 | raise NameError("Only SHA is released currently") 30 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # config 2 | import sys 3 | import numpy as np 4 | import torch 5 | from config import config 6 | import warnings 7 | import time 8 | from options.test_options import TestOptions 9 | from Dataset.DatasetConstructor import EvalDatasetConstructor 10 | from eval.Estimator import Estimator 11 | from net.CSRPersNet import CSRPersNet 12 | import net.networks as networks 13 | 14 | opt = TestOptions().parse() 15 | opt.nThreads = 1 # test code only supports nThreads = 1 16 | opt.batch_size = 1 # test code only supports batchSize = 1 17 | opt.is_flip = 0 # no flip 18 | 19 | setting = config(opt) 20 | 21 | 22 | eval_dataset = EvalDatasetConstructor( 23 | setting.eval_num, 24 | setting.eval_img_path, 25 | setting.eval_gt_map_path, 26 | setting.eval_pers_path, 27 | mode=setting.mode, 28 | dataset_name=setting.dataset_name, 29 | device=setting.device) 30 | eval_loader = torch.utils.data.DataLoader(dataset=eval_dataset, batch_size=1) 31 | 32 | # model construct 33 | net = networks.define_net(opt) 34 | net = networks.init_net(net, gpu_ids=opt.gpu_ids) 35 | 36 | net.module.load_state_dict(torch.load(opt.test_model_name, map_location=str(setting.device))) 37 | criterion = torch.nn.MSELoss(reduction='sum').to(setting.device) 38 | estimator = Estimator(setting, eval_loader, criterion=criterion) 39 | 40 | validate_MAE, validate_RMSE, validate_loss, time_cost = estimator.evaluate(net) 41 | sys.stdout.write('loss = {}, eval_mae = {}, eval_rmse = {}, time cost eval = {}s\n' 42 | .format(validate_loss, validate_MAE, validate_RMSE, time_cost)) 43 | sys.stdout.flush() 44 | -------------------------------------------------------------------------------- /net/CSRNet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torchvision import models 3 | import torch.nn.functional as functional 4 | import time 5 | import torch 6 | 7 | 8 | class CSRNet(nn.Module): 9 | def __init__(self): 10 | super(CSRNet, self).__init__() 11 | self.backend_feat = [(512, 2), (512, 2), (512, 2), (256, 2), (128, 2), (64, 2)] 12 | self.front_end = nn.Sequential(*(list(list(models.vgg16_bn(True).children())[0].children())[0:33])) 13 | self.back_end = make_layers(self.backend_feat, in_channels=512, batch_norm=True) 14 | self.output_layer = nn.Conv2d(64, 1, kernel_size=1) 15 | 16 | for m in self.output_layer.modules(): 17 | if isinstance(m, nn.Conv2d): 18 | nn.init.normal_(m.weight, std=0.01) 19 | if m.bias is not None: 20 | nn.init.constant_(m.bias, 0) 21 | elif isinstance(m, nn.BatchNorm2d): 22 | nn.init.constant_(m.weight, 1) 23 | nn.init.constant_(m.bias, 0) 24 | 25 | def forward(self, x): 26 | img_shape = x.shape 27 | front_end = self.front_end(x) 28 | back_end = self.back_end(front_end) 29 | output = self.output_layer(back_end) 30 | output = functional.interpolate(output, scale_factor=4, mode='bilinear', align_corners=False) 31 | return output 32 | 33 | 34 | def make_layers(cfg, in_channels, batch_norm=False): 35 | layers = [] 36 | for v, atrous in cfg: 37 | if v == 'M': 38 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 39 | else: 40 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=atrous, dilation=atrous) 41 | if batch_norm: 42 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 43 | else: 44 | layers += [conv2d, nn.ReLU(inplace=True)] 45 | in_channels = v 46 | return nn.Sequential(*layers) 47 | -------------------------------------------------------------------------------- /PytorchCudaOpExtension/perspective_aware_conv2d/pad_conv2d.h: -------------------------------------------------------------------------------- 1 | #ifndef PAD_CONVOLUTION 2 | #define PAD_CONVOLUTION 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | extern THCState *state; 9 | typedef std::vector TShape; 10 | 11 | inline int ProdShape(const TShape &shape, int start, int end) { 12 | int res = 1; 13 | for(int i=start; i 2 | #include "adaptive_sigmoid.h" 3 | 4 | at::Tensor adaptive_sigmoid_forward( 5 | at::Tensor input, 6 | at::Tensor params 7 | ){ 8 | int batch = input.size(0); 9 | int channels = input.size(1); 10 | int height = input.size(2); 11 | int width = input.size(3); 12 | 13 | auto output = at::empty({batch, channels, height, width}, input.options()); 14 | 15 | auto input_ptr = input.data(); 16 | auto output_ptr = output.data(); 17 | auto params_ptr = params.data(); 18 | 19 | for(int i = 0; i adaptive_sigmoid_backward( 35 | at::Tensor input, 36 | at::Tensor params, 37 | at::Tensor grad_outputs, 38 | bool alpha_update, 39 | bool beta_update, 40 | bool gamma_update, 41 | bool theta_update 42 | ){ 43 | int batch = input.size(0); 44 | int channels = input.size(1); 45 | int height = input.size(2); 46 | int width = input.size(3); 47 | 48 | auto grad_input = at::zeros_like(input); 49 | auto grad_params = at::zeros_like(params); 50 | 51 | auto input_ptr = input.data(); 52 | auto grad_output_ptr = grad_outputs.data(); 53 | auto params_ptr = params.data(); 54 | auto grad_input_ptr = grad_input.data(); 55 | auto grad_params_ptr = grad_params.data(); 56 | 57 | for(int i = 0; i < batch; i++){ 58 | auto input_instance_ptr = input_ptr + i * channels * height * width; 59 | auto grad_output_instance_ptr = grad_output_ptr + i * channels * height * width; 60 | auto grad_input_instance_ptr = grad_input_ptr + i * channels * height * width; 61 | adaptive_sigmoid_input_grad( 62 | THCState_getCurrentStream(state), 63 | input_instance_ptr, 64 | grad_output_instance_ptr, 65 | params_ptr, 66 | grad_input_instance_ptr, 67 | channels, height, width 68 | ); 69 | 70 | adaptive_sigmoid_params_grad( 71 | THCState_getCurrentStream(state), 72 | input_instance_ptr, 73 | grad_output_instance_ptr, 74 | params_ptr, 75 | grad_params_ptr, 76 | channels, height, width, 77 | alpha_update, beta_update, gamma_update, theta_update 78 | ); 79 | } 80 | 81 | return {grad_input, grad_params}; 82 | } 83 | 84 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){ 85 | m.def("forward", &adaptive_sigmoid_forward, "adaptive sigmoid forward (CUDA)"); 86 | m.def("backward", &adaptive_sigmoid_backward, "adaptive sigmoid backward (CUDA)"); 87 | } -------------------------------------------------------------------------------- /options/base_options.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import torch 4 | import util.utils as util 5 | 6 | class BaseOptions(): 7 | def __init__(self): 8 | self.initialized = False 9 | 10 | def initialize(self, parser): 11 | parser.add_argument('--dataset_name', default='SHA', help='SHA|SHB|QNRF') 12 | parser.add_argument('--test_model_name', default='', help='path of pretrained model') 13 | parser.add_argument('--batch_size', type=int, default=1, help='input batch size') 14 | parser.add_argument('--net_name', type=str, default='csrpersp', help='csrnet|csrpersp') 15 | parser.add_argument('--mode', type=str, default='whole', help='whole|crop') 16 | parser.add_argument('--prefix_path', type=str, default='./data', help='path of the dataset folder') 17 | parser.add_argument('--name', type=str, default='Csrnet_persp', help='name of the experiment.s') 18 | parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2') 19 | parser.add_argument('--alpha', type=float, default=1, help='alpha in adaptive sigmoid') 20 | parser.add_argument('--beta', type=float, default=1, help='beta in adaptive sigmoid') 21 | parser.add_argument('--gamma', type=float, default=1, help='gamma in adaptive sigmoid') 22 | parser.add_argument('--theta', type=float, default=2, help='theta in adaptive sigmoid') 23 | parser.add_argument('--nThreads', default=2, type=int, help='# threads for loading data') 24 | parser.add_argument('--checkpoints_dir', type=str, default='./output', help='models are saved here') 25 | self.initialized = True 26 | return parser 27 | 28 | def gather_options(self, options=None): 29 | # initialize parser with basic options 30 | if not self.initialized: 31 | parser = argparse.ArgumentParser( 32 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 33 | parser = self.initialize(parser) 34 | 35 | 36 | self.parser = parser 37 | if options == None: 38 | return parser.parse_args() 39 | else: 40 | return parser.parse_args(options) 41 | 42 | def print_options(self, opt): 43 | message = '' 44 | message += '----------------- Options ---------------\n' 45 | for k, v in sorted(vars(opt).items()): 46 | comment = '' 47 | default = self.parser.get_default(k) 48 | if v != default: 49 | comment = '\t[default: %s]' % str(default) 50 | message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment) 51 | message += '----------------- End -------------------' 52 | print(message) 53 | 54 | # save to the disk 55 | expr_dir = os.path.join(opt.checkpoints_dir, opt.name, opt.dataset_name) 56 | util.mkdirs(expr_dir) 57 | file_name = os.path.join(expr_dir, 'opt.txt') 58 | with open(file_name, 'wt') as opt_file: 59 | opt_file.write(message) 60 | opt_file.write('\n') 61 | 62 | def parse(self, options=None): 63 | 64 | opt = self.gather_options(options=options) 65 | opt.isTrain = self.isTrain # train or test 66 | 67 | 68 | self.print_options(opt) 69 | 70 | # set gpu ids 71 | os.environ["CUDA_VISIBLE_DEVICES"]=opt.gpu_ids 72 | str_ids = opt.gpu_ids.split(',') 73 | opt.gpu_ids = [] 74 | for str_id in str_ids: 75 | id = int(str_id) 76 | if id >= 0: 77 | opt.gpu_ids.append(id) 78 | # re-order gpu ids 79 | opt.gpu_ids = [i.item() for i in torch.arange(len(opt.gpu_ids))] 80 | if len(opt.gpu_ids) > 0: 81 | torch.cuda.set_device(opt.gpu_ids[0]) 82 | 83 | self.opt = opt 84 | return self.opt 85 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import matplotlib.pyplot as plt 5 | import torchvision.transforms as transforms 6 | import cv2 7 | import numpy as np 8 | import scipy 9 | 10 | def show(origin_map, gt_map, predict, index): 11 | figure, (origin, gt, pred) = plt.subplots(1, 3, figsize=(20, 4)) 12 | origin.imshow(origin_map) 13 | origin.set_title("origin picture") 14 | gt.imshow(gt_map, cmap=plt.cm.jet) 15 | gt.set_title("gt map") 16 | pred.imshow(predict, cmap=plt.cm.jet) 17 | pred.set_title("prediction") 18 | plt.suptitle(str(index) + "th sample") 19 | plt.show() 20 | plt.close() 21 | 22 | 23 | class HSI_Calculator(nn.Module): 24 | def __init__(self): 25 | super(HSI_Calculator, self).__init__() 26 | 27 | def forward(self, image): 28 | image = transforms.ToTensor()(image) 29 | I = torch.mean(image) 30 | Sum = image.sum(0) 31 | Min = 3 * image.min(0)[0] 32 | S = (1 - Min.div(Sum.clamp(1e-6))).mean() 33 | numerator = (2 * image[0] - image[1] - image[2]) / 2 34 | denominator = ((image[0] - image[1]) ** 2 + (image[0] - image[2]) * (image[1] - image[2])).sqrt() 35 | theta = (numerator.div(denominator.clamp(1e-6))).clamp(-1 + 1e-6, 1 - 1e-6).acos() 36 | logistic_matrix = (image[1] - image[2]).ceil() 37 | H = (theta * logistic_matrix + (1 - logistic_matrix) * (360 - theta)).mean() / 360 38 | return H, S, I 39 | 40 | 41 | def eval_steps_adaptive(var): 42 | return { 43 | 400 * 100: 5000, 44 | 400 * 500: 2000, 45 | 400 * 1000: 1000, 46 | }.get(var, 1600) 47 | 48 | 49 | def get_density_map_gaussian(N, M, points, adaptive_kernel=False, fixed_value=15): 50 | density_map = np.zeros([N, M], dtype=np.float32) 51 | h, w = density_map.shape[:2] 52 | h = h // 8 53 | w = w // 8 54 | num_gt = np.squeeze(points).shape[0] 55 | if num_gt == 0: 56 | return density_map 57 | 58 | if adaptive_kernel: 59 | # referred from https://github.com/vlad3996/computing-density-maps/blob/master/make_ShanghaiTech.ipynb 60 | leafsize = 2048 61 | tree = scipy.spatial.KDTree(points.copy(), leafsize=leafsize) 62 | distances = tree.query(points, k=4)[0] 63 | 64 | for idx, p in enumerate(points): 65 | p = np.round(p).astype(int) 66 | p[0], p[1] = min(h-1, p[1] // 8), min(w-1, p[0] // 8) 67 | if num_gt > 1: 68 | if adaptive_kernel: 69 | sigma = int(np.sum(distances[idx][1:4]) // 3 * 0.3) 70 | else: 71 | sigma = fixed_value 72 | else: 73 | sigma = fixed_value # np.average([h, w]) / 2. / 2. 74 | sigma = max(1, sigma) 75 | 76 | gaussian_radius = sigma * 3 77 | gaussian_map = np.multiply( 78 | cv2.getGaussianKernel(gaussian_radius*2+1, sigma), 79 | cv2.getGaussianKernel(gaussian_radius*2+1, sigma).T 80 | ) 81 | x_left, x_right, y_up, y_down = 0, gaussian_map.shape[1], 0, gaussian_map.shape[0] 82 | # cut the gaussian kernel 83 | if p[1] < 0 or p[0] < 0: 84 | continue 85 | if p[1] < gaussian_radius: 86 | x_left = gaussian_radius - p[1] 87 | if p[0] < gaussian_radius: 88 | y_up = gaussian_radius - p[0] 89 | if p[1] + gaussian_radius >= w: 90 | x_right = gaussian_map.shape[1] - (gaussian_radius + p[1] - w) - 1 91 | if p[0] + gaussian_radius >= h: 92 | y_down = gaussian_map.shape[0] - (gaussian_radius + p[0] - h) - 1 93 | density_map[ 94 | max(0, p[0]-gaussian_radius):min(density_map.shape[0] // 8, p[0]+gaussian_radius+1), 95 | max(0, p[1]-gaussian_radius):min(density_map.shape[1] // 8, p[1]+gaussian_radius+1) 96 | ] += gaussian_map[y_up:y_down, x_left:x_right] 97 | return density_map -------------------------------------------------------------------------------- /op_wrapper/pad_conv2d_wrapper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | from torch.nn import Module 5 | import pad_conv2d_gpu as pad_conv2d 6 | from op_wrapper.adaptive_sigmoid_wrapper import AdaptiveSigmoid 7 | 8 | 9 | class PerspectiveDilatedConv2dFunction(Function): 10 | @staticmethod 11 | def forward(ctx, *args): 12 | if len(args) != 6: 13 | print("wrong input parameters number, check the input") 14 | return 15 | input = args[0] 16 | weights = args[1] 17 | rate_map = args[2] 18 | bias = args[3] 19 | ctx.stride_h = args[4] 20 | ctx.stride_w = args[5] 21 | output = pad_conv2d.forward(input, weights, rate_map, bias, ctx.stride_h, ctx.stride_w) 22 | ctx.save_for_backward(input, weights, rate_map, bias) 23 | return output 24 | 25 | @staticmethod 26 | def backward(ctx, *grad_outputs): 27 | if len(grad_outputs) != 1: 28 | print("Wrong output number, check your output") 29 | return 30 | input, weights, rate_map, bias = ctx.saved_tensors 31 | grad_copy = grad_outputs[0].clone() 32 | grad_input, grad_weight, grad_rate_map, grad_bias = pad_conv2d.backward(input, weights, rate_map, bias, grad_copy, ctx.stride_h, ctx.stride_w) 33 | return grad_input, grad_weight, grad_rate_map, grad_bias, None, None 34 | 35 | 36 | class PerspectiveDilatedConv2dLayer(Module): 37 | def __init__(self, in_channels, out_channels, kernel_size, stride_h, stride_w): 38 | super(PerspectiveDilatedConv2dLayer, self).__init__() 39 | self.stride_h = stride_h 40 | self.stride_w = stride_w 41 | self.weight = nn.Parameter(torch.zeros(out_channels, in_channels, kernel_size, kernel_size, dtype=torch.float32)) 42 | self.bias = nn.Parameter(torch.zeros(out_channels, dtype=torch.float32)) 43 | nn.init.xavier_uniform_(self.weight, gain=1) 44 | 45 | def forward(self, inputs, rate_map): 46 | return PerspectiveDilatedConv2dFunction.apply(inputs, self.weight, rate_map, self.bias, self.stride_h, self.stride_w) 47 | 48 | 49 | class BasicPerspectiveDilatedConv2D(Module): 50 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, **kwargs): 51 | super(BasicPerspectiveDilatedConv2D, self).__init__() 52 | self.rate_map_generator = AdaptiveSigmoid(**kwargs) 53 | # self.rate_map_generator.params.register_hook(lambda x:print('Conv', x)) 54 | self.stride = 1 55 | self.pad = (kernel_size // 2) 56 | self.perspective_dilated_conv2d = PerspectiveDilatedConv2dLayer(in_channels, out_channels, kernel_size, self.stride, self.stride) 57 | 58 | def forward(self, x, perspective): 59 | rate_map = self.rate_map_generator(perspective) 60 | # rate_map = self.rate_map_generator(x) 61 | x = torch.nn.functional.pad(x, [self.pad, self.pad, self.pad, self.pad ]) 62 | return self.perspective_dilated_conv2d(x, rate_map) 63 | 64 | class BasicPerspectiveDilatedConv2D_BN(Module): 65 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, **kwargs): 66 | super(BasicPerspectiveDilatedConv2D_BN, self).__init__() 67 | self.rate_map_generator = AdaptiveSigmoid(**kwargs) 68 | # self.rate_map_generator.params.register_hook(lambda x:print('Conv', x)) 69 | self.stride = 1 70 | self.pad = (kernel_size // 2) 71 | self.perspective_dilated_conv2d = PerspectiveDilatedConv2dLayer(in_channels, out_channels, kernel_size, self.stride, self.stride) 72 | self.bn = nn.BatchNorm2d(out_channels) 73 | 74 | def forward(self, x, perspective): 75 | rate_map = self.rate_map_generator(perspective) 76 | # rate_map = self.rate_map_generator(x) 77 | x = torch.nn.functional.pad(x, [self.pad, self.pad, self.pad, self.pad ]) 78 | x = self.perspective_dilated_conv2d(x, rate_map) 79 | x = self.bn(x) 80 | return x 81 | 82 | -------------------------------------------------------------------------------- /eval/Estimator.py: -------------------------------------------------------------------------------- 1 | import random 2 | import math 3 | import os 4 | import numpy as np 5 | import sys 6 | from PIL import Image 7 | from utils import show 8 | from metrics import AEBatch, SEBatch 9 | import time 10 | import torch 11 | import scipy.io as scio 12 | 13 | class Estimator(object): 14 | def __init__(self, setting, eval_loader, criterion=torch.nn.MSELoss(reduction="sum")): 15 | self.setting = setting 16 | self.ae_batch = AEBatch().to(self.setting.device) 17 | self.se_batch = SEBatch().to(self.setting.device) 18 | self.criterion = criterion 19 | self.eval_loader = eval_loader 20 | 21 | def evaluate(self, model): 22 | net = model.eval() 23 | MAE_, MSE_, loss_ = [], [], [] 24 | time_cost = 0 25 | for eval_img_path, eval_img, eval_gt, eval_pers in self.eval_loader: 26 | eval_img_path = eval_img_path[0] 27 | eval_img = eval_img.to(self.setting.device) 28 | eval_gt = eval_gt.to(self.setting.device) 29 | 30 | start = time.time() 31 | with torch.no_grad(): 32 | # test cropped patches 33 | if self.setting.mode == 'crop': 34 | eval_patchs, eval_pers = torch.squeeze(eval_img), torch.squeeze(eval_pers, dim=0) 35 | eval_prediction = net(eval_patchs, eval_pers) 36 | prediction_map = torch.zeros(eval_gt.shape).to(self.setting.device) 37 | self.test_crops(eval_prediction.shape, eval_prediction, prediction_map) 38 | # test whole images 39 | elif self.setting.mode == 'whole': 40 | prediction_map = net(eval_img, eval_pers) 41 | gt_counts = self.get_gt_num(self.setting.eval_gt_path, eval_img_path) 42 | # calculate metrics 43 | batch_ae = self.ae_batch(prediction_map, gt_counts).data.cpu().numpy() 44 | batch_se = self.se_batch(prediction_map, gt_counts).data.cpu().numpy() 45 | loss = self.criterion(prediction_map, eval_gt) 46 | loss_.append(loss.data.item()) 47 | MAE_.append(batch_ae) 48 | MSE_.append(batch_se) 49 | torch.cuda.synchronize() 50 | end = time.time() 51 | time_cost += (end - start) 52 | 53 | # return the validate loss, validate MAE and validate RMSE 54 | MAE_, MSE_, loss_ = np.reshape(MAE_, [-1]), np.reshape(MSE_, [-1]), np.reshape(loss_, [-1]) 55 | return np.mean(MAE_), np.sqrt(np.mean(MSE_)), np.mean(loss_), time_cost 56 | 57 | def get_gt_num(self, eval_gt_path, img_path): 58 | tmp_mat_name = os.path.basename(img_path).replace('IMG_', 'GT_IMG_').replace('.jpg', '.mat') 59 | gt_path = os.path.join(eval_gt_path, os.path.basename(tmp_mat_name)) 60 | gt_counts = len(scio.loadmat(gt_path)['image_info'][0][0][0][0][0]) 61 | return gt_counts 62 | 63 | def test_crops(self, eval_shape, eval_p, pred_m): 64 | for i in range(3): 65 | for j in range(3): 66 | start_h, start_w = math.floor(eval_shape[2] / 4), math.floor(eval_shape[3] / 4) 67 | valid_h, valid_w = eval_shape[2] // 2, eval_shape[3] // 2 68 | pred_h = math.floor(3 * eval_shape[2] / 4) + (eval_shape[2] // 2) * (i - 1) 69 | pred_w = math.floor(3 * eval_shape[3] / 4) + (eval_shape[3] // 2) * (j - 1) 70 | if i == 0: 71 | valid_h = math.floor(3 * eval_shape[2] / 4) 72 | start_h = 0 73 | pred_h = 0 74 | elif i == 2: 75 | valid_h = math.ceil(3 * eval_shape[2] / 4) 76 | 77 | if j == 0: 78 | valid_w = math.floor(3 * eval_shape[3] / 4) 79 | start_w = 0 80 | pred_w = 0 81 | elif j == 2: 82 | valid_w = math.ceil(3 * eval_shape[3] / 4) 83 | pred_m[:, :, pred_h:pred_h + valid_h, pred_w:pred_w + valid_w] += eval_p[i * 3 + j:i * 3 + j + 1, :,start_h:start_h + valid_h, start_w:start_w + valid_w] 84 | -------------------------------------------------------------------------------- /PytorchCudaOpExtension/adaptive_sigmoid/adaptive_sigmoid_cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "adaptive_sigmoid.h" 6 | 7 | #define CUDA_KERNEL_LOOP(i ,n) \ 8 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i<(n); i+= blockDim.x * gridDim.x) 9 | 10 | const int CUDA_NUM_THREADS = 1024; 11 | 12 | inline int GET_BLOCKS(const int N){ 13 | return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS; 14 | } 15 | 16 | __global__ void adaptive_sigmoid_fucntion_kernel( 17 | int n, 18 | const float* data_in, 19 | const float* params, 20 | float* output 21 | ){ 22 | CUDA_KERNEL_LOOP(index, n){ 23 | float alpha = params[0]; 24 | float beta = params[1]; 25 | float gamma = params[2]; 26 | float theta = params[3]; 27 | float value = data_in[index]; 28 | // output[index] = gamma * (1 / (1 + exp(-alpha * (value - beta)))) + theta; 29 | output[index] = gamma * (1 / (1 + exp(-alpha * (value - beta))) - theta); 30 | } 31 | } 32 | 33 | __global__ void adaptive_sigmoid_input_grad_kernel( 34 | int n, 35 | const float* data_in, 36 | const float* grad_output, 37 | const float* params, 38 | float* grad_input 39 | ){ 40 | CUDA_KERNEL_LOOP(index, n){ 41 | float alpha = params[0]; 42 | float beta = params[1]; 43 | float gamma = params[2]; 44 | float value = data_in[index]; 45 | float d_grad_output = grad_output[index]; 46 | float efx = exp(- alpha * (value - beta)); 47 | float patial = efx / ((1 + efx) * (1 + efx)); 48 | grad_input[index] = gamma * alpha * patial * d_grad_output; 49 | } 50 | } 51 | 52 | __global__ void adaptive_sigmoid_params_grad_kernel( 53 | int n, 54 | const float* data_in, 55 | const float* grad_output, 56 | const float* params, 57 | float* grad_params, 58 | bool alpha_update, 59 | bool beta_update, 60 | bool gamma_update, 61 | bool theta_update 62 | ){ 63 | CUDA_KERNEL_LOOP(index, n){ 64 | float alpha = params[0]; 65 | float beta = params[1]; 66 | float gamma = params[2]; 67 | float value = data_in[index]; 68 | float d_grad_output = grad_output[index]; 69 | float efx = exp(- alpha * (value - beta)); 70 | float patial = efx / ((1 + efx) * (1 + efx)); 71 | 72 | float d_alpha = gamma * patial * (value - beta); 73 | float d_beta = gamma * patial * (- alpha); 74 | float d_gamma = 1 / (1 + efx); 75 | float d_theta = -gamma; 76 | // float d_beta = 0; 77 | // float d_gamma = 0; 78 | // float d_theta = 0; 79 | if (alpha_update) 80 | atomicAdd(grad_params + 0, d_alpha * d_grad_output); 81 | if (beta_update) 82 | atomicAdd(grad_params + 1, d_beta * d_grad_output); 83 | if (gamma_update) 84 | atomicAdd(grad_params + 2, d_gamma * d_grad_output); 85 | if (theta_update) 86 | atomicAdd(grad_params + 3, d_theta * d_grad_output); 87 | } 88 | } 89 | 90 | void adaptive_sigmoid_fucntion( 91 | cudaStream_t stream, 92 | const float* data_in, 93 | const float* params, 94 | float* output, 95 | int channels, int height, int width 96 | ){ 97 | int num_kernels = channels * height * width; 98 | adaptive_sigmoid_fucntion_kernel<<>>( 99 | num_kernels, 100 | data_in, 101 | params, 102 | output 103 | ); 104 | } 105 | 106 | void adaptive_sigmoid_input_grad( 107 | cudaStream_t stream, 108 | const float* data_in, 109 | const float* grad_outputs, 110 | const float* params, 111 | float* grad_input, 112 | int channels, int height, int width 113 | ){ 114 | int num_kernels = channels * height * width; 115 | adaptive_sigmoid_input_grad_kernel<<>>( 116 | num_kernels, 117 | data_in, 118 | grad_outputs, 119 | params, 120 | grad_input 121 | ); 122 | } 123 | 124 | void adaptive_sigmoid_params_grad( 125 | cudaStream_t stream, 126 | const float* data_in, 127 | const float* grad_outputs, 128 | const float* params, 129 | float* grad_params, 130 | int channels, int height, int width, 131 | bool alpha_update, 132 | bool beta_update, 133 | bool gamma_update, 134 | bool theta_update 135 | ){ 136 | int num_kernels = channels * height * width; 137 | adaptive_sigmoid_params_grad_kernel<<>>( 138 | num_kernels, 139 | data_in, 140 | grad_outputs, 141 | params, 142 | grad_params, 143 | alpha_update, 144 | beta_update, 145 | gamma_update, 146 | theta_update 147 | ); 148 | } -------------------------------------------------------------------------------- /net/CSRPersNet_onlyBack_crop.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torchvision import models 4 | from net.BasicConv2d import BasicConv2d 5 | from op_wrapper.pad_conv2d_wrapper import BasicPerspectiveDilatedConv2D_BN 6 | from net.BasicConv2d import BasicConv2d 7 | from op_wrapper.adaptive_sigmoid_wrapper import AdaptiveSigmoid 8 | from op_wrapper.pad_conv2d_wrapper import PerspectiveDilatedConv2dLayer 9 | from collections import OrderedDict 10 | import torch.nn.functional as F 11 | 12 | pretrain_dict = nn.ModuleList(list(list(models.vgg16(True).children())[0].children())[0:33]).state_dict() 13 | 14 | class Frontend(nn.Module): 15 | def __init__(self, pretrain=True, **kwargs): 16 | super(Frontend, self).__init__() 17 | self.front_end = nn.Sequential(*(list(list(models.vgg16_bn(True).children())[0].children())[0:33])) 18 | 19 | def forward(self, x, perspective_map): 20 | x = self.front_end(x) 21 | perspective_map = F.interpolate(x, (x.shape[2], x.shape[3])) 22 | return x, perspective_map 23 | 24 | class Backend(nn.Module): 25 | def __init__(self, in_channels, **kwargs): 26 | super(Backend, self).__init__() 27 | self.pad_conv2d_1 = BasicPerspectiveDilatedConv2D_BN(in_channels, 512, 3, 1, **kwargs) 28 | self.pad_relu_1 = nn.ReLU(inplace=True) 29 | self.pad_conv2d_2 = BasicPerspectiveDilatedConv2D_BN(512, 512, 3, 1, **kwargs) 30 | self.pad_relu_2 = nn.ReLU(inplace=True) 31 | self.pad_conv2d_3 = BasicPerspectiveDilatedConv2D_BN(512, 512, 3, 1, **kwargs) 32 | self.pad_relu_3 = nn.ReLU(inplace=True) 33 | self.pad_conv2d_4 = BasicPerspectiveDilatedConv2D_BN(512, 256, 3, 1, **kwargs) 34 | self.pad_relu_4 = nn.ReLU(inplace=True) 35 | self.pad_conv2d_5 = BasicPerspectiveDilatedConv2D_BN(256, 128, 3, 1, **kwargs) 36 | self.pad_relu_5 = nn.ReLU(inplace=True) 37 | self.pad_conv2d_6 = BasicPerspectiveDilatedConv2D_BN(128, 64, 3, 1, **kwargs) 38 | self.pad_relu_6 = nn.ReLU(inplace=True) 39 | 40 | def forward(self, x, perspective_map): 41 | x = self.pad_conv2d_1(x, perspective_map) 42 | x = self.pad_relu_1(x) 43 | x = self.pad_conv2d_2(x, perspective_map) 44 | x = self.pad_relu_2(x) 45 | x = self.pad_conv2d_3(x, perspective_map) 46 | x = self.pad_relu_3(x) 47 | x = self.pad_conv2d_4(x, perspective_map) 48 | x = self.pad_relu_4(x) 49 | x = self.pad_conv2d_5(x, perspective_map) 50 | x = self.pad_relu_5(x) 51 | x = self.pad_conv2d_6(x, perspective_map) 52 | x = self.pad_relu_6(x) 53 | return x 54 | 55 | class CSRPersNet_onlyBack_BN(nn.Module): 56 | def __init__(self, load_path=None, is_relu=False, **kwargs): 57 | super(CSRPersNet_onlyBack_BN, self).__init__() 58 | self.is_relu = is_relu 59 | self.front_end = Frontend(True, **kwargs) 60 | self.back_end = Backend(512, **kwargs) 61 | self.output_layer = nn.Conv2d(64, 1, kernel_size=1) 62 | if not(load_path == None): 63 | new_state_dict = OrderedDict() 64 | state_dict = torch.load(load_path) 65 | count = 1 66 | for k,v in state_dict.items(): 67 | if 'back_end' in k: 68 | name_prefix = "back_end.pad_conv2d_" + str(count) 69 | if 'weight' in k: 70 | new_state_dict[name_prefix + '.rate_map_generator.params'] = torch.FloatTensor(*kwargs) 71 | new_state_dict[name_prefix + '.perspective_dilated_conv2d.weight'] = v 72 | elif 'bias' in k: 73 | new_state_dict[name_prefix + '.perspective_dilated_conv2d.bias'] = v 74 | count += 1 75 | else: 76 | new_state_dict[k] = v 77 | self.load_state_dict(new_state_dict) 78 | 79 | else: 80 | for m in self.output_layer.modules(): 81 | if isinstance(m, nn.Conv2d): 82 | nn.init.normal_(m.weight, std=0.01) 83 | if m.bias is not None: 84 | nn.init.constant_(m.bias, 0) 85 | elif isinstance(m, nn.BatchNorm2d): 86 | nn.init.constant_(m.weight, 1) 87 | nn.init.constant_(m.bias, 0) 88 | 89 | def forward(self, x, perspective_map): 90 | x, perspective_map = self.front_end(x, perspective_map) 91 | x = self.back_end(x, perspective_map) 92 | x = self.output_layer(x) 93 | 94 | x = F.interpolate(x, (x.shape[2]*4, x.shape[3]*4), mode='bilinear', align_corners=False) 95 | 96 | if self.is_relu: 97 | x = F.relu(x) 98 | return x 99 | 100 | def get_params(self): 101 | self.ada_sig_params = [] 102 | self.conv_params = [] 103 | self.bn_params = [] 104 | for m in self.modules(): 105 | if isinstance(m, AdaptiveSigmoid): 106 | self.ada_sig_params.append(m.params) 107 | elif isinstance(m, nn.Conv2d): 108 | self.conv_params.append(m.weight) 109 | self.conv_params.append(m.bias) 110 | elif isinstance(m, PerspectiveDilatedConv2dLayer): 111 | self.conv_params.append(m.weight) 112 | self.conv_params.append(m.bias) 113 | elif isinstance(m, nn.BatchNorm2d): 114 | self.bn_params.append(m.weight) 115 | self.bn_params.append(m.bias) 116 | return self.conv_params, self.bn_params, self.ada_sig_params 117 | -------------------------------------------------------------------------------- /Dataset/DatasetConstructor.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import numpy as np 3 | import os 4 | import glob 5 | import torch 6 | import torchvision.transforms as transforms 7 | import torchvision.transforms.functional as F 8 | import torch.nn.functional as functional 9 | import torch.utils.data as data 10 | import random 11 | import time 12 | import scipy.io as scio 13 | import h5py 14 | import math 15 | 16 | class DatasetConstructor(data.Dataset): 17 | def __init__(self): 18 | return 19 | 20 | def get_path_tuple(self, i, dataset_name = "SHA", is_pers=True): 21 | if dataset_name == "SHA" or dataset_name == "SHB": 22 | img_name = '/IMG_' + str(i + 1) + ".jpg" 23 | gt_map_name = '/GT_IMG_' + str(i + 1) + ".npy" 24 | perspective_map_name = "" 25 | if is_pers: 26 | perspective_map_name = '/IMG_' + str(i + 1) + ".mat" 27 | else: 28 | raise NameError("Only SHA is released") 29 | return img_name, gt_map_name, perspective_map_name 30 | 31 | def resize(self, img, dataset_name): 32 | height = img.size[1] 33 | width = img.size[0] 34 | resize_height = height 35 | resize_width = width 36 | if dataset_name == "SHA": 37 | if resize_height <= 416: 38 | tmp = resize_height 39 | resize_height = 416 40 | resize_width = (resize_height / tmp) * resize_width 41 | if resize_width <= 416: 42 | tmp = resize_width 43 | resize_width = 416 44 | resize_height = (resize_width / tmp) * resize_height 45 | resize_height = math.ceil(resize_height / 32) * 32 46 | resize_width = math.ceil(resize_width / 32) * 32 47 | else: 48 | raise NameError("Only SHA is released") 49 | img = transforms.Resize([resize_height, resize_width])(img) 50 | return img 51 | 52 | 53 | class EvalDatasetConstructor(DatasetConstructor): 54 | def __init__(self, 55 | validate_num, 56 | data_dir_path, 57 | gt_dir_path, 58 | pers_dir_path=None, 59 | mode="crop", 60 | dataset_name="SHA", 61 | device=None, 62 | ): 63 | super(EvalDatasetConstructor, self).__init__() 64 | self.validate_num = validate_num 65 | self.imgs = [] 66 | self.data_root = data_dir_path 67 | self.gt_root = gt_dir_path 68 | self.pers_root = pers_dir_path 69 | self.mode = mode 70 | self.device = device 71 | self.dataset_name = dataset_name 72 | self.kernel = torch.ones(1, 1, 8, 8, dtype=torch.float32) 73 | self.kernel_crop = torch.ones(1, 1, 2, 2, dtype=torch.float32) 74 | self.img_paths = glob.glob(os.path.join(self.data_root, "*.jpg")) 75 | 76 | def __getitem__(self, index): 77 | if self.mode == 'crop': 78 | img_path = self.img_paths[index] 79 | gt_map_path = os.path.join(self.gt_root, os.path.basename(img_path.replace('IMG_', "GT_IMG_"))[:-4]+".npy") 80 | pers_path = os.path.join(self.pers_root, os.path.basename(img_path.replace('jpg', "mat"))) 81 | img = Image.open(img_path).convert("RGB") 82 | p_m = np.zeros(img.size[::-1], dtype=float) if self.pers_root == "" else (h5py.File(pers_path, 'r')['pmap'][:] / 100).T 83 | p_m = super(EvalDatasetConstructor, self).resize(Image.fromarray(p_m), self.dataset_name) 84 | img = super(EvalDatasetConstructor, self).resize(img, self.dataset_name) 85 | img = transforms.ToTensor()(img) 86 | gt_map = Image.fromarray(np.squeeze(np.load(gt_map_path))) 87 | gt_map = transforms.ToTensor()(gt_map) 88 | p_m = transforms.ToTensor()(p_m) 89 | img_shape, gt_shape = img.shape, gt_map.shape # C, H, W 90 | img = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(img) 91 | patch_height, patch_width = (img_shape[1]) // 2, (img_shape[2]) // 2 92 | imgs, pers = [], [] 93 | for i in range(3): 94 | for j in range(3): 95 | start_h, start_w = (patch_height // 2) * i, (patch_width // 2) * j 96 | imgs.append(img[:, start_h:start_h + patch_height, start_w:start_w + patch_width]) 97 | pers.append(p_m[:, start_h:start_h + patch_height, start_w:start_w + patch_width]) 98 | imgs, pers = torch.stack(imgs), torch.stack(pers) 99 | gt_map = functional.conv2d(gt_map.view(1, *(gt_shape)), self.kernel_crop, bias=None, stride=2, padding=0) 100 | return img_path, imgs, gt_map.view(1, gt_shape[1] // 2, gt_shape[2] // 2), pers 101 | 102 | elif self.mode == 'whole': 103 | img_path, gt_map_path, pers_path, img_index = self.imgs[index] 104 | img = Image.open(img_path).convert("RGB") 105 | p_m = np.zeros(img.size[::-1], dtype=float) if self.pers_root == "" else (h5py.File(pers_path)['pmap'][:] / 100).T 106 | p_m = super(EvalDatasetConstructor, self).resize(Image.fromarray(p_m), self.dataset_name) 107 | img = super(EvalDatasetConstructor, self).resize(img, self.dataset_name) 108 | img = transforms.ToTensor()(img) 109 | gt_map = Image.fromarray(np.squeeze(np.load(gt_map_path))) 110 | gt_map = transforms.ToTensor()(gt_map) 111 | p_m = transforms.ToTensor()(p_m) 112 | img_shape, gt_shape = img.shape, gt_map.shape # C, H, W 113 | img = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(img) 114 | gt_map = functional.conv2d(gt_map.view(1, *(gt_shape)), self.kernel, bias=None, stride=8, padding=0) 115 | return img_path, img, gt_map.view(1, gt_shape[1] // 8, gt_shape[2] // 8), p_m 116 | 117 | def __len__(self): 118 | return self.validate_num 119 | -------------------------------------------------------------------------------- /generate_map.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import scipy 4 | import scipy.io as scio 5 | from PIL import Image 6 | import time 7 | import math 8 | import os 9 | import h5py 10 | 11 | def get_density_map_gaussian(H, W, ratio_h, ratio_w, points, adaptive_kernel=False, fixed_value=15): 12 | h = H 13 | w = W 14 | density_map = np.zeros([h, w], dtype=np.float32) 15 | num_gt = np.squeeze(points).shape[0] 16 | if num_gt == 0: 17 | return density_map 18 | 19 | for idx, p in enumerate(points): 20 | p = np.round(p).astype(int) 21 | p[0], p[1] = min(h-1, math.floor(p[1] * ratio_h)), min(w-1, math.floor(p[0] * ratio_w)) 22 | sigma = fixed_value 23 | sigma = max(1, sigma) 24 | 25 | gaussian_radius = 7 26 | gaussian_map = np.multiply( 27 | cv2.getGaussianKernel(gaussian_radius*2+1, sigma), 28 | cv2.getGaussianKernel(gaussian_radius*2+1, sigma).T 29 | ) 30 | x_left, x_right, y_up, y_down = 0, gaussian_map.shape[1], 0, gaussian_map.shape[0] 31 | # cut the gaussian kernel 32 | if p[1] < 0 or p[0] < 0: 33 | continue 34 | if p[1] < gaussian_radius: 35 | x_left = gaussian_radius - p[1] 36 | if p[0] < gaussian_radius: 37 | y_up = gaussian_radius - p[0] 38 | if p[1] + gaussian_radius >= w: 39 | x_right = gaussian_map.shape[1] - (gaussian_radius + p[1] - w) - 1 40 | if p[0] + gaussian_radius >= h: 41 | y_down = gaussian_map.shape[0] - (gaussian_radius + p[0] - h) - 1 42 | density_map[ 43 | max(0, p[0]-gaussian_radius):min(h, p[0]+gaussian_radius+1), 44 | max(0, p[1]-gaussian_radius):min(w, p[1]+gaussian_radius+1) 45 | ] += gaussian_map[y_up:y_down, x_left:x_right] 46 | return density_map 47 | 48 | def mkdir(path): 49 | """create a single empty directory if it didn't exist 50 | Parameters: 51 | path (str) -- a single directory path 52 | """ 53 | if not os.path.exists(path): 54 | os.makedirs(path) 55 | 56 | def mkdirs(paths): 57 | """create empty directories if they don't exist 58 | Parameters: 59 | paths (str list) -- a list of directory paths 60 | """ 61 | if isinstance(paths, list) and not isinstance(paths, str): 62 | for path in paths: 63 | mkdir(path) 64 | else: 65 | mkdir(paths) 66 | 67 | 68 | # SHA: 300, 182 69 | # SHB: 400, 316 70 | if __name__ == "__main__": 71 | 72 | is_train = 0 # 0 for test 73 | train_test = 'train' if is_train else 'test' 74 | dataset = 'SHA' 75 | 76 | if dataset == 'SHA': 77 | num_img = 300 if is_train else 182 78 | image_dir_path = "ShanghaiTech/part_A_final/"+train_test+"_data/images" 79 | ground_truth_dir_path = "ShanghaiTech/part_A_final/"+train_test+"_data/ground_truth" 80 | output_gt_dir = "./SH_part_A/"+train_test 81 | elif dataset == 'SHB': 82 | num_img = 400 if is_train else 316 83 | image_dir_path = "ShanghaiTech/part_B_final/"+train_test+"_data/images" 84 | ground_truth_dir_path = "ShanghaiTech/part_B_final/"+train_test+"_data/ground_truth" 85 | output_gt_dir = "./SH_part_B/" + train_test 86 | elif dataset == 'QNRF': 87 | num_img = 1201 if is_train else 334 88 | image_dir_path = "UCF-QNRF_ECCV18/" + train_test 89 | ground_truth_dir_path = "UCF-QNRF_ECCV18/" + train_test 90 | output_gt_dir = "./QNRF/" + train_test 91 | elif dataset == 'UCF50': # take all images as testing images 92 | num_img = 50 93 | image_dir_path = "UCF_CC_50/images/UCF_CC_50_img" 94 | ground_truth_dir_path = "UCF_CC_50/UCF_CC_50_mat" 95 | output_gt_dir = "./UCF50/" + train_test 96 | 97 | mkdirs(output_gt_dir) 98 | 99 | for i in range(num_img): 100 | if dataset == 'SHA' or dataset == 'SHB': 101 | img_path = image_dir_path + "/IMG_" + str(i + 1) + ".jpg" 102 | gt_path = ground_truth_dir_path + "/GT_IMG_" + str(i + 1) + ".mat" 103 | elif dataset == 'QNRF': 104 | img_path = os.path.join(image_dir_path, "img_"+("%04d" % (i+1))+".jpg") 105 | gt_path = os.path.join(image_dir_path, "img_"+("%04d" % (i+1))+"_ann.mat") 106 | elif dataset == 'UCF50': 107 | img_path = os.path.join(image_dir_path, ("%d" % (i+1))+".jpg") 108 | gt_path = os.path.join(ground_truth_dir_path, ("%d" % (i+1))+"_ann.mat") 109 | 110 | img = Image.open(img_path) 111 | height = img.size[1] 112 | width = img.size[0] 113 | 114 | if dataset == 'SHA' or dataset == 'SHB': 115 | points = scio.loadmat(gt_path)['image_info'][0][0][0][0][0] 116 | elif dataset == 'QNRF': 117 | points = scio.loadmat(gt_path)['annPoints'] 118 | elif dataset == 'UCF50': 119 | points = h5py.File(gt_path, 'r')['annPoints'].value.astype(np.float32) 120 | 121 | 122 | resize_height = height 123 | resize_width = width 124 | 125 | if dataset == 'SHA' or dataset == 'UCF50': 126 | if resize_height <= 416: 127 | tmp = resize_height 128 | resize_height = 416 129 | resize_width = (resize_height / tmp) * resize_width 130 | 131 | if resize_width <= 416: 132 | tmp = resize_width 133 | resize_width = 416 134 | resize_height = (resize_width / tmp) * resize_height 135 | 136 | resize_height = math.ceil(resize_height / 32) * 32 137 | resize_width = math.ceil(resize_width / 32) * 32 138 | elif dataset == 'QNRF': 139 | pass 140 | 141 | 142 | ratio_h = (resize_height) / (height) 143 | ratio_w = (resize_width) / (width) 144 | # print(height, width, ratio_h, ratio_w) 145 | gt = get_density_map_gaussian(resize_height, resize_width, ratio_h, ratio_w, points, False, 4) 146 | gt = np.reshape(gt, [resize_height, resize_width]) # transpose into w, h 147 | np.save(output_gt_dir + "/GT_IMG_" + str(i + 1), gt) 148 | print("complete!") 149 | -------------------------------------------------------------------------------- /net/CSRPersNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torchvision import models 4 | from op_wrapper.pad_conv2d_wrapper import BasicPerspectiveDilatedConv2D 5 | from op_wrapper.adaptive_sigmoid_wrapper import AdaptiveSigmoid 6 | from op_wrapper.pad_conv2d_wrapper import PerspectiveDilatedConv2dLayer 7 | from collections import OrderedDict 8 | pretrain_dict = nn.ModuleList(list(list(models.vgg16(True).children())[0].children())[0:23]).state_dict() 9 | 10 | class Frontend(nn.Module): 11 | def __init__(self, pretrain=True, **kwargs): 12 | super(Frontend, self).__init__() 13 | self.pad_conv2d_1 = BasicPerspectiveDilatedConv2D(3, 64, 3, 1, **kwargs) 14 | self.pad_relu_1 = nn.ReLU(inplace=True) 15 | self.pad_conv2d_2 = BasicPerspectiveDilatedConv2D(64, 64, 3, 1, **kwargs) 16 | self.pad_relu_2 = nn.ReLU(inplace=True) 17 | self.max_pool_1 = nn.MaxPool2d(kernel_size=2) 18 | 19 | self.pad_conv2d_3 = BasicPerspectiveDilatedConv2D(64, 128, 3, 1, **kwargs) 20 | self.pad_relu_3 = nn.ReLU(inplace=True) 21 | self.pad_conv2d_4 = BasicPerspectiveDilatedConv2D(128, 128, 3, 1, **kwargs) 22 | self.pad_relu_4 = nn.ReLU(inplace=True) 23 | self.max_pool_2 = nn.MaxPool2d(kernel_size=2) 24 | 25 | self.pad_conv2d_5 = BasicPerspectiveDilatedConv2D(128, 256, 3, 1, **kwargs) 26 | self.pad_relu_5 = nn.ReLU(inplace=True) 27 | self.pad_conv2d_6 = BasicPerspectiveDilatedConv2D(256, 256, 3, 1, **kwargs) 28 | self.pad_relu_6 = nn.ReLU(inplace=True) 29 | self.pad_conv2d_7 = BasicPerspectiveDilatedConv2D(256, 256, 3, 1, **kwargs) 30 | self.pad_relu_7 = nn.ReLU(inplace=True) 31 | self.max_pool_3 = nn.MaxPool2d(kernel_size=2) 32 | 33 | self.pad_conv2d_8 = BasicPerspectiveDilatedConv2D(256, 512, 3, 1, **kwargs) 34 | self.pad_relu_8 = nn.ReLU(inplace=True) 35 | self.pad_conv2d_9 = BasicPerspectiveDilatedConv2D(512, 512, 3, 1, **kwargs) 36 | self.pad_relu_9 = nn.ReLU(inplace=True) 37 | self.pad_conv2d_10 = BasicPerspectiveDilatedConv2D(512, 512, 3, 1, **kwargs) 38 | self.pad_relu_10 = nn.ReLU(inplace=True) 39 | if pretrain == True: 40 | new_state_dict = OrderedDict() 41 | count = 1 42 | for k,v in pretrain_dict.items(): 43 | name_prefix = "pad_conv2d_" + str(count) 44 | if 'weight' in k: 45 | new_state_dict[name_prefix + '.rate_map_generator.params'] = torch.FloatTensor(kwargs['sigma']) 46 | new_state_dict[name_prefix + '.perspective_dilated_conv2d.weight'] = v 47 | elif 'bias' in k: 48 | new_state_dict[name_prefix + '.perspective_dilated_conv2d.bias'] = v 49 | count += 1 50 | self.load_state_dict(new_state_dict) 51 | 52 | 53 | def forward(self, x, perspective_map): 54 | x = self.pad_conv2d_1(x, perspective_map) 55 | x = self.pad_relu_1(x) 56 | x = self.pad_conv2d_2(x, perspective_map) 57 | x = self.pad_relu_2(x) 58 | x = self.max_pool_1(x) 59 | perspective_map = self.max_pool_1(perspective_map) 60 | 61 | x = self.pad_conv2d_3(x, perspective_map) 62 | x = self.pad_relu_3(x) 63 | x = self.pad_conv2d_4(x, perspective_map) 64 | x = self.pad_relu_4(x) 65 | x = self.max_pool_2(x) 66 | perspective_map = self.max_pool_2(perspective_map) 67 | 68 | x = self.pad_conv2d_5(x, perspective_map) 69 | x = self.pad_relu_5(x) 70 | x = self.pad_conv2d_6(x, perspective_map) 71 | x = self.pad_relu_6(x) 72 | x = self.pad_conv2d_7(x, perspective_map) 73 | x = self.pad_relu_7(x) 74 | x = self.max_pool_3(x) 75 | perspective_map = self.max_pool_3(perspective_map) 76 | 77 | x = self.pad_conv2d_8(x, perspective_map) 78 | x = self.pad_relu_8(x) 79 | x = self.pad_conv2d_9(x, perspective_map) 80 | x = self.pad_relu_9(x) 81 | x = self.pad_conv2d_10(x, perspective_map) 82 | x = self.pad_relu_10(x) 83 | 84 | return x, perspective_map 85 | 86 | class Backend(nn.Module): 87 | def __init__(self, in_channels, **kwargs): 88 | super(Backend, self).__init__() 89 | self.pad_conv2d_1 = BasicPerspectiveDilatedConv2D(in_channels, 512, 3, 1, **kwargs) 90 | self.pad_relu_1 = nn.ReLU(inplace=True) 91 | self.pad_conv2d_2 = BasicPerspectiveDilatedConv2D(512, 512, 3, 1, **kwargs) 92 | self.pad_relu_2 = nn.ReLU(inplace=True) 93 | self.pad_conv2d_3 = BasicPerspectiveDilatedConv2D(512, 512, 3, 1, **kwargs) 94 | self.pad_relu_3 = nn.ReLU(inplace=True) 95 | self.pad_conv2d_4 = BasicPerspectiveDilatedConv2D(512, 256, 3, 1, **kwargs) 96 | self.pad_relu_4 = nn.ReLU(inplace=True) 97 | self.pad_conv2d_5 = BasicPerspectiveDilatedConv2D(256, 128, 3, 1, **kwargs) 98 | self.pad_relu_5 = nn.ReLU(inplace=True) 99 | self.pad_conv2d_6 = BasicPerspectiveDilatedConv2D(128, 64, 3, 1, **kwargs) 100 | self.pad_relu_6 = nn.ReLU(inplace=True) 101 | 102 | def forward(self, x, perspective_map): 103 | x = self.pad_conv2d_1(x, perspective_map) 104 | x = self.pad_relu_1(x) 105 | x = self.pad_conv2d_2(x, perspective_map) 106 | x = self.pad_relu_2(x) 107 | x = self.pad_conv2d_3(x, perspective_map) 108 | x = self.pad_relu_3(x) 109 | x = self.pad_conv2d_4(x, perspective_map) 110 | x = self.pad_relu_4(x) 111 | x = self.pad_conv2d_5(x, perspective_map) 112 | x = self.pad_relu_5(x) 113 | x = self.pad_conv2d_6(x, perspective_map) 114 | x = self.pad_relu_6(x) 115 | return x 116 | 117 | class CSRPersNet(nn.Module): 118 | def __init__(self, load_path=None, **kwargs): 119 | super(CSRPersNet, self).__init__() 120 | self.front_end = Frontend(True, **kwargs) 121 | self.back_end = Backend(512, **kwargs) 122 | self.output_layer = nn.Conv2d(64, 1, kernel_size=1) 123 | if not(load_path == None): 124 | new_state_dict = OrderedDict() 125 | state_dict = torch.load(load_path) 126 | count = 1 127 | for k,v in state_dict.items(): 128 | if 'back_end' in k: 129 | name_prefix = "back_end.pad_conv2d_" + str(count) 130 | if 'weight' in k: 131 | new_state_dict[name_prefix + '.rate_map_generator.params'] = torch.FloatTensor(*kwargs) 132 | new_state_dict[name_prefix + '.perspective_dilated_conv2d.weight'] = v 133 | elif 'bias' in k: 134 | new_state_dict[name_prefix + '.perspective_dilated_conv2d.bias'] = v 135 | count += 1 136 | else: 137 | new_state_dict[k] = v 138 | self.load_state_dict(new_state_dict) 139 | 140 | else: 141 | for m in self.output_layer.modules(): 142 | if isinstance(m, nn.Conv2d): 143 | nn.init.normal_(m.weight, std=0.01) 144 | if m.bias is not None: 145 | nn.init.constant_(m.bias, 0) 146 | elif isinstance(m, nn.BatchNorm2d): 147 | nn.init.constant_(m.weight, 1) 148 | nn.init.constant_(m.bias, 0) 149 | 150 | def forward(self, x, perspective_map): 151 | x, perspective_map = self.front_end(x, perspective_map) 152 | x = self.back_end(x, perspective_map) 153 | x = self.output_layer(x) 154 | return x 155 | 156 | def get_params(self): 157 | self.ada_sig_params = [] 158 | self.conv_params = [] 159 | for m in self.modules(): 160 | if isinstance(m, AdaptiveSigmoid): 161 | self.ada_sig_params.append(m.params) 162 | elif isinstance(m, nn.Conv2d): 163 | self.conv_params.append(m.weight) 164 | self.conv_params.append(m.bias) 165 | elif isinstance(m, PerspectiveDilatedConv2dLayer): 166 | self.conv_params.append(m.weight) 167 | self.conv_params.append(m.bias) 168 | return self.conv_params, self.ada_sig_params -------------------------------------------------------------------------------- /PytorchCudaOpExtension/perspective_aware_conv2d/pad_conv2d.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "pad_conv2d.h" 3 | 4 | 5 | 6 | at::Tensor pad_conv2d_forward( 7 | at::Tensor input, 8 | at::Tensor weight, 9 | at::Tensor rate_map, 10 | at::Tensor bias, 11 | int stride_h, int stride_w 12 | // int dilation_h, int dilation_w, 13 | // int pad_h, int pad_w 14 | ){ 15 | /** 16 | * get the input parameter's information 17 | **/ 18 | int batch = input.size(0); 19 | int in_channels = input.size(1); 20 | int input_height = input.size(2); 21 | int input_width = input.size(3); 22 | int out_channels = weight.size(0); 23 | // int kernel_channels = weight.size(1); 24 | int kernel_h = weight.size(2); 25 | int kernel_w = weight.size(3); 26 | int rate_map_height = rate_map.size(2); 27 | int rate_map_width = rate_map.size(3); 28 | int height_out = (input_height - (1 * (kernel_h - 1) + 1)) / stride_h + 1; 29 | int width_out = (input_width - (1 * (kernel_w - 1) + 1)) / stride_w + 1; 30 | /** 31 | * data correctness validation 32 | **/ 33 | AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor"); 34 | AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor"); 35 | AT_ASSERTM(rate_map.type().is_cuda(), "rate_map must be a CUDA tensor"); 36 | AT_ASSERTM(rate_map_height == height_out, "output height must be same with rate map height"); 37 | AT_ASSERTM(rate_map_width == width_out, "output width must be same with rate map width"); 38 | AT_ASSERTM(kernel_h % 2 == 1 || kernel_w % 2 ==1, "kernel_size must be odd number"); 39 | /** 40 | * derive more information 41 | **/ 42 | int kernel_dim = in_channels * kernel_h * kernel_w; 43 | int input_dim = in_channels * input_height * input_width; 44 | int conv_out_spatial_dim = height_out * width_out; 45 | 46 | int M = out_channels; 47 | int N = conv_out_spatial_dim; 48 | int K = kernel_dim; 49 | /** 50 | * malloc tmp space and output space 51 | **/ 52 | auto col_buffer = at::empty({in_channels * kernel_h * kernel_w, conv_out_spatial_dim}, input.options()); 53 | auto output = at::empty({batch, out_channels, height_out, width_out}, input.options()); 54 | /** 55 | * get pointer of the tensors 56 | **/ 57 | auto input_ptr = input.data(); 58 | auto weight_ptr = weight.data(); 59 | auto rate_map_ptr = rate_map.data(); 60 | auto col_buffer_ptr = col_buffer.data(); 61 | auto output_ptr = output.data(); 62 | auto bias_ptr = bias.data(); 63 | 64 | for (int n = 0; n < batch; ++n) { 65 | pad_conv2d_im2col( 66 | THCState_getCurrentStream(state), 67 | input_ptr + n * input_dim, 68 | rate_map_ptr + n * conv_out_spatial_dim, 69 | in_channels, input_height, input_width, 70 | kernel_h, kernel_w, 71 | // pad_h, pad_w, 72 | stride_h, stride_w, 73 | // dilation_h, dilation_w, 74 | height_out, width_out, 75 | col_buffer_ptr 76 | ); 77 | auto output_instance_ptr = output_ptr + (n * M * N); 78 | THCudaBlas_Sgemm(state, 'n', 'n', N, M, K, 1.0f, col_buffer_ptr, N, weight_ptr, K, 0.0f, output_instance_ptr, N); 79 | add_bias( 80 | THCState_getCurrentStream(state), 81 | output_instance_ptr, 82 | bias_ptr, 83 | out_channels, height_out, width_out 84 | ); 85 | } 86 | return output; 87 | } 88 | 89 | std::vector pad_conv2d_backward( 90 | at::Tensor input, 91 | at::Tensor weight, 92 | at::Tensor rate_map, 93 | at::Tensor bias, 94 | at::Tensor out_grad, 95 | int stride_h, int stride_w 96 | // int dilation_h, int dilation_w, 97 | // int pad_h, int pad_w 98 | ){ 99 | /** 100 | * get the input parameter's information 101 | **/ 102 | int batch = input.size(0); 103 | int in_channels = input.size(1); 104 | int input_height = input.size(2); 105 | int input_width = input.size(3); 106 | int out_channels = weight.size(0); 107 | // int kernel_channels = weight.size(1); 108 | int kernel_h = weight.size(2); 109 | int kernel_w = weight.size(3); 110 | int rate_map_height = rate_map.size(2); 111 | int rate_map_width = rate_map.size(3); 112 | int height_out = (input_height - (1 * (kernel_h - 1) + 1)) / stride_h + 1; 113 | int width_out = (input_width - (1 * (kernel_w - 1) + 1)) / stride_w + 1; 114 | /** 115 | * data correctness validation 116 | **/ 117 | AT_ASSERTM(height_out==out_grad.size(2) && width_out == out_grad.size(3), 118 | "the calculated out shape won't match the out_grad_shape:(%d x %d vs %d x %d)", 119 | height_out, width_out, out_grad.size(2), out_grad.size(3)); 120 | AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor"); 121 | AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor"); 122 | AT_ASSERTM(rate_map.type().is_cuda(), "rate_map must be a CUDA tensor"); 123 | AT_ASSERTM(rate_map_height == height_out, "output height must be same with rate map height"); 124 | AT_ASSERTM(rate_map_width == width_out, "output width must be same with rate map width"); 125 | /** 126 | * derive more information 127 | **/ 128 | int kernel_dim = in_channels * kernel_h * kernel_w; 129 | int input_dim = in_channels * input_height * input_width; 130 | int conv_out_spatial_dim = height_out * width_out; 131 | 132 | int M = kernel_dim; 133 | int N = conv_out_spatial_dim; 134 | int K = out_channels; 135 | /** 136 | * malloc tmp space and output space 137 | **/ 138 | auto col_buffer = at::empty({in_channels * kernel_h * kernel_w, conv_out_spatial_dim}, input.options()); 139 | auto grad_input = at::zeros_like(input); 140 | auto grad_weight = at::zeros_like(weight); 141 | auto grad_bias = at::zeros_like(bias); 142 | auto grad_rate_map = at::zeros_like(rate_map); 143 | /** 144 | * get pointer of the tensors 145 | **/ 146 | auto input_ptr = input.data(); 147 | auto weight_ptr = weight.data(); 148 | auto rate_map_ptr = rate_map.data(); 149 | auto out_grad_ptr = out_grad.data(); 150 | auto col_buffer_ptr = col_buffer.data(); 151 | auto grad_input_ptr = grad_input.data(); 152 | auto grad_weight_ptr = grad_weight.data(); 153 | auto grad_bias_ptr = grad_bias.data(); 154 | // auto bias_ptr = bias.data(); 155 | auto grad_rate_map_ptr = grad_rate_map.data(); 156 | 157 | for (int n = 0; n < batch; ++n) { 158 | auto out_grad_instance_ptr = out_grad_ptr + n * K * N; 159 | calculate_dbias( 160 | THCState_getCurrentStream(state), 161 | out_grad_instance_ptr, 162 | grad_bias_ptr, 163 | out_channels, 164 | height_out, width_out 165 | ); 166 | THCudaBlas_Sgemm(state, 167 | 'n', 't', 168 | N, M, K, 169 | 1.0f, 170 | out_grad_instance_ptr, N, 171 | weight_ptr, M, 172 | 0.0f, 173 | col_buffer_ptr, N); 174 | /** 175 | * calculate d loss / d rate_map 176 | **/ 177 | pad_conv2d_col2im_coord( 178 | THCState_getCurrentStream(state), 179 | col_buffer_ptr, 180 | input_ptr + n * input_dim, 181 | rate_map_ptr + n * conv_out_spatial_dim, 182 | in_channels, input_height, input_width, 183 | kernel_h, kernel_w, 184 | stride_h, stride_w, 185 | height_out, width_out, 186 | grad_rate_map_ptr + n * conv_out_spatial_dim 187 | ); 188 | 189 | /** 190 | * calculate d loss / d input 191 | **/ 192 | pad_conv2d_col2im( 193 | THCState_getCurrentStream(state), 194 | col_buffer_ptr, 195 | rate_map_ptr + n * conv_out_spatial_dim, 196 | in_channels, input_height, input_width, 197 | kernel_h, kernel_w, 198 | stride_h, stride_w, 199 | height_out, width_out, 200 | grad_input_ptr + n * input_dim 201 | ); 202 | 203 | /** 204 | * calculate d loss / d weight 205 | **/ 206 | pad_conv2d_im2col( 207 | THCState_getCurrentStream(state), 208 | input_ptr + n * input_dim, 209 | rate_map_ptr + n * conv_out_spatial_dim, 210 | in_channels, input_height, input_width, 211 | kernel_h, kernel_w, 212 | stride_h, stride_w, 213 | height_out, width_out, 214 | col_buffer_ptr); 215 | 216 | THCudaBlas_Sgemm(state, 217 | 't', 'n', 218 | M, K, N, 219 | 1.0f, 220 | col_buffer_ptr, N, 221 | out_grad_instance_ptr, N, 222 | 1.0f, 223 | grad_weight_ptr, M); 224 | } 225 | // return {grad_input, grad_weight, grad_rate_map}; 226 | return {grad_input, grad_weight, grad_rate_map, grad_bias}; 227 | } 228 | 229 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){ 230 | m.def("forward", &pad_conv2d_forward, "perspective-aware dilated conv2d forward (CUDA)"); 231 | m.def("backward", &pad_conv2d_backward, "perspective-aware dilated conv2d backward (CUDA)"); 232 | } 233 | -------------------------------------------------------------------------------- /net/CSRPersNet_crop.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torchvision import models 5 | from op_wrapper.pad_conv2d_wrapper import BasicPerspectiveDilatedConv2D_BN 6 | from op_wrapper.adaptive_sigmoid_wrapper import AdaptiveSigmoid 7 | from op_wrapper.pad_conv2d_wrapper import PerspectiveDilatedConv2dLayer 8 | from collections import OrderedDict 9 | pretrain_dict = nn.ModuleList(list(list(models.vgg16_bn(True).children())[0].children())[0:33]).state_dict() 10 | 11 | class Frontend(nn.Module): 12 | def __init__(self, pretrain=True, **kwargs): 13 | super(Frontend, self).__init__() 14 | self.pad_conv2d_1 = BasicPerspectiveDilatedConv2D_BN(3, 64, 3, 1, **kwargs) 15 | self.pad_relu_1 = nn.ReLU(inplace=True) 16 | self.pad_conv2d_2 = BasicPerspectiveDilatedConv2D_BN(64, 64, 3, 1, **kwargs) 17 | self.pad_relu_2 = nn.ReLU(inplace=True) 18 | self.max_pool_1 = nn.MaxPool2d(kernel_size=2) 19 | 20 | self.pad_conv2d_3 = BasicPerspectiveDilatedConv2D_BN(64, 128, 3, 1, **kwargs) 21 | self.pad_relu_3 = nn.ReLU(inplace=True) 22 | self.pad_conv2d_4 = BasicPerspectiveDilatedConv2D_BN(128, 128, 3, 1, **kwargs) 23 | self.pad_relu_4 = nn.ReLU(inplace=True) 24 | self.max_pool_2 = nn.MaxPool2d(kernel_size=2) 25 | 26 | self.pad_conv2d_5 = BasicPerspectiveDilatedConv2D_BN(128, 256, 3, 1, **kwargs) 27 | self.pad_relu_5 = nn.ReLU(inplace=True) 28 | self.pad_conv2d_6 = BasicPerspectiveDilatedConv2D_BN(256, 256, 3, 1, **kwargs) 29 | self.pad_relu_6 = nn.ReLU(inplace=True) 30 | self.pad_conv2d_7 = BasicPerspectiveDilatedConv2D_BN(256, 256, 3, 1, **kwargs) 31 | self.pad_relu_7 = nn.ReLU(inplace=True) 32 | self.max_pool_3 = nn.MaxPool2d(kernel_size=2) 33 | 34 | self.pad_conv2d_8 = BasicPerspectiveDilatedConv2D_BN(256, 512, 3, 1, **kwargs) 35 | self.pad_relu_8 = nn.ReLU(inplace=True) 36 | self.pad_conv2d_9 = BasicPerspectiveDilatedConv2D_BN(512, 512, 3, 1, **kwargs) 37 | self.pad_relu_9 = nn.ReLU(inplace=True) 38 | self.pad_conv2d_10 = BasicPerspectiveDilatedConv2D_BN(512, 512, 3, 1, **kwargs) 39 | self.pad_relu_10 = nn.ReLU(inplace=True) 40 | if pretrain == True: 41 | new_state_dict = OrderedDict() 42 | count, ori_count = 1, 0 43 | for k,v in pretrain_dict.items(): 44 | name_prefix = "pad_conv2d_" + str(count) 45 | if count != ori_count: 46 | if 'weight' in k: 47 | new_state_dict[name_prefix + '.rate_map_generator.params'] = torch.FloatTensor(kwargs['sigma']) 48 | new_state_dict[name_prefix + '.perspective_dilated_conv2d.weight'] = v 49 | elif 'bias' in k: 50 | new_state_dict[name_prefix + '.perspective_dilated_conv2d.bias'] = v 51 | ori_count += 1 52 | elif count == ori_count: 53 | if 'weight' in k: 54 | new_state_dict[name_prefix + '.bn.weight'] = v 55 | elif 'bias' in k: 56 | new_state_dict[name_prefix + '.bn.bias'] = v 57 | elif 'running_mean' in k: 58 | new_state_dict[name_prefix + '.bn.running_mean'] = v 59 | elif 'running_var' in k: 60 | new_state_dict[name_prefix + '.bn.running_var'] = v 61 | count += 1 62 | self.load_state_dict(new_state_dict) 63 | 64 | 65 | def forward(self, x, perspective_map): 66 | x = self.pad_conv2d_1(x, perspective_map) 67 | x = self.pad_relu_1(x) 68 | x = self.pad_conv2d_2(x, perspective_map) 69 | x = self.pad_relu_2(x) 70 | x = self.max_pool_1(x) 71 | perspective_map = self.max_pool_1(perspective_map) 72 | 73 | x = self.pad_conv2d_3(x, perspective_map) 74 | x = self.pad_relu_3(x) 75 | x = self.pad_conv2d_4(x, perspective_map) 76 | x = self.pad_relu_4(x) 77 | x = self.max_pool_2(x) 78 | perspective_map = self.max_pool_2(perspective_map) 79 | 80 | x = self.pad_conv2d_5(x, perspective_map) 81 | x = self.pad_relu_5(x) 82 | x = self.pad_conv2d_6(x, perspective_map) 83 | x = self.pad_relu_6(x) 84 | x = self.pad_conv2d_7(x, perspective_map) 85 | x = self.pad_relu_7(x) 86 | x = self.max_pool_3(x) 87 | perspective_map = self.max_pool_3(perspective_map) 88 | 89 | x = self.pad_conv2d_8(x, perspective_map) 90 | x = self.pad_relu_8(x) 91 | x = self.pad_conv2d_9(x, perspective_map) 92 | x = self.pad_relu_9(x) 93 | x = self.pad_conv2d_10(x, perspective_map) 94 | x = self.pad_relu_10(x) 95 | 96 | return x, perspective_map 97 | 98 | class Backend(nn.Module): 99 | def __init__(self, in_channels, **kwargs): 100 | super(Backend, self).__init__() 101 | self.pad_conv2d_1 = BasicPerspectiveDilatedConv2D_BN(in_channels, 512, 3, 1, **kwargs) 102 | self.pad_relu_1 = nn.ReLU(inplace=True) 103 | self.pad_conv2d_2 = BasicPerspectiveDilatedConv2D_BN(512, 512, 3, 1, **kwargs) 104 | self.pad_relu_2 = nn.ReLU(inplace=True) 105 | self.pad_conv2d_3 = BasicPerspectiveDilatedConv2D_BN(512, 512, 3, 1, **kwargs) 106 | self.pad_relu_3 = nn.ReLU(inplace=True) 107 | self.pad_conv2d_4 = BasicPerspectiveDilatedConv2D_BN(512, 256, 3, 1, **kwargs) 108 | self.pad_relu_4 = nn.ReLU(inplace=True) 109 | self.pad_conv2d_5 = BasicPerspectiveDilatedConv2D_BN(256, 128, 3, 1, **kwargs) 110 | self.pad_relu_5 = nn.ReLU(inplace=True) 111 | self.pad_conv2d_6 = BasicPerspectiveDilatedConv2D_BN(128, 64, 3, 1, **kwargs) 112 | self.pad_relu_6 = nn.ReLU(inplace=True) 113 | 114 | def forward(self, x, perspective_map): 115 | x = self.pad_conv2d_1(x, perspective_map) 116 | x = self.pad_relu_1(x) 117 | x = self.pad_conv2d_2(x, perspective_map) 118 | x = self.pad_relu_2(x) 119 | x = self.pad_conv2d_3(x, perspective_map) 120 | x = self.pad_relu_3(x) 121 | x = self.pad_conv2d_4(x, perspective_map) 122 | x = self.pad_relu_4(x) 123 | x = self.pad_conv2d_5(x, perspective_map) 124 | x = self.pad_relu_5(x) 125 | x = self.pad_conv2d_6(x, perspective_map) 126 | x = self.pad_relu_6(x) 127 | return x 128 | 129 | class CSRPersNet_BN(nn.Module): 130 | def __init__(self, load_path=None, is_relu=False, **kwargs): 131 | super(CSRPersNet_BN, self).__init__() 132 | self.is_relu = is_relu 133 | self.front_end = Frontend(True, **kwargs) 134 | self.back_end = Backend(512, **kwargs) 135 | self.output_layer = nn.Conv2d(64, 1, kernel_size=1) 136 | if not(load_path == None): 137 | new_state_dict = OrderedDict() 138 | state_dict = torch.load(load_path) 139 | count = 1 140 | for k,v in state_dict.items(): 141 | if 'back_end' in k: 142 | name_prefix = "back_end.pad_conv2d_" + str(count) 143 | if 'weight' in k: 144 | new_state_dict[name_prefix + '.rate_map_generator.params'] = torch.FloatTensor(*kwargs) 145 | new_state_dict[name_prefix + '.perspective_dilated_conv2d.weight'] = v 146 | elif 'bias' in k: 147 | new_state_dict[name_prefix + '.perspective_dilated_conv2d.bias'] = v 148 | count += 1 149 | else: 150 | new_state_dict[k] = v 151 | self.load_state_dict(new_state_dict) 152 | 153 | else: 154 | for m in self.output_layer.modules(): 155 | if isinstance(m, nn.Conv2d): 156 | nn.init.normal_(m.weight, std=0.01) 157 | if m.bias is not None: 158 | nn.init.constant_(m.bias, 0) 159 | elif isinstance(m, nn.BatchNorm2d): 160 | nn.init.constant_(m.weight, 1) 161 | nn.init.constant_(m.bias, 0) 162 | 163 | def forward(self, x, perspective_map): 164 | x, perspective_map = self.front_end(x, perspective_map) 165 | x = self.back_end(x, perspective_map) 166 | x = self.output_layer(x) 167 | 168 | if self.is_relu: 169 | x = F.relu(x) 170 | x = F.interpolate(x, (x.shape[2]*4, x.shape[3]*4), mode='bilinear', align_corners=False) 171 | return x 172 | 173 | def get_params(self): 174 | self.ada_sig_params = [] 175 | self.conv_params = [] 176 | self.bn_params = [] 177 | for m in self.modules(): 178 | if isinstance(m, AdaptiveSigmoid): 179 | self.ada_sig_params.append(m.params) 180 | elif isinstance(m, nn.Conv2d): 181 | self.conv_params.append(m.weight) 182 | self.conv_params.append(m.bias) 183 | elif isinstance(m, PerspectiveDilatedConv2dLayer): 184 | self.conv_params.append(m.weight) 185 | self.conv_params.append(m.bias) 186 | elif isinstance(m, nn.BatchNorm2d): 187 | self.bn_params.append(m.weight) 188 | self.bn_params.append(m.bias) 189 | return self.conv_params, self.bn_params, self.ada_sig_params 190 | -------------------------------------------------------------------------------- /PytorchCudaOpExtension/perspective_aware_conv2d/pad_conv2d_cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "pad_conv2d.h" 6 | 7 | #define CUDA_KERNEL_LOOP(i ,n) \ 8 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i<(n); i+= blockDim.x * gridDim.x) 9 | 10 | const int CUDA_NUM_THREADS = 1024; 11 | 12 | inline int GET_BLOCKS(const int N){ 13 | return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS; 14 | } 15 | 16 | __device__ float dmcn_im2col_bilinear( 17 | const float* bottom_data, 18 | const int data_width, 19 | const int height, 20 | const int width, 21 | float h, 22 | float w){ 23 | 24 | int h_low = floor(h); 25 | int w_low = floor(w); 26 | int h_high = h_low + 1; 27 | int w_high = w_low + 1; 28 | 29 | float lh = h - h_low; 30 | float lw = w - w_low; 31 | float hh = 1 - lh, hw = 1 - lw; 32 | 33 | float v1 = 0; 34 | if (h_low >= 0 && w_low >= 0) 35 | v1 = bottom_data[h_low * data_width + w_low]; 36 | float v2 = 0; 37 | if (h_low >=0 && w_high <= width - 1) 38 | v2 = bottom_data[h_low * data_width + w_high]; 39 | float v3 = 0; 40 | if (h_high <= height - 1 && w_low >= 0) 41 | v3 = bottom_data[h_high * data_width + w_low]; 42 | float v4 = 0; 43 | if (h_high <= height - 1 && w_high <= width - 1) 44 | v4 = bottom_data[h_high * data_width + w_high]; 45 | 46 | float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; 47 | 48 | float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); 49 | return val; 50 | 51 | } 52 | 53 | __device__ float dmcn_get_gradient_weight( 54 | float argmax_h, // offset h 55 | float argmax_w, // offset w 56 | const int h, const int w, // coordinate 57 | const int height, const int width){ 58 | 59 | if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) { 60 | //empty 61 | return 0; 62 | } 63 | 64 | int argmax_h_low = floor(argmax_h); 65 | int argmax_w_low = floor(argmax_w); 66 | int argmax_h_high = argmax_h_low + 1; 67 | int argmax_w_high = argmax_w_low + 1; 68 | 69 | float weight = 0; 70 | if (h == argmax_h_low && w == argmax_w_low) 71 | weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); 72 | if (h == argmax_h_low && w == argmax_w_high) 73 | weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); 74 | if (h == argmax_h_high && w == argmax_w_low) 75 | weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); 76 | if (h == argmax_h_high && w == argmax_w_high) 77 | weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); 78 | return weight; 79 | } 80 | 81 | __device__ float dmcn_get_coordinate_weight( 82 | float argmax_h, 83 | float argmax_w, 84 | const int height, 85 | const int width, 86 | const float* im_data, 87 | const int data_width, 88 | const int bp_dir 89 | ) { 90 | 91 | if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) 92 | { 93 | //empty 94 | return 0; 95 | } 96 | 97 | int argmax_h_low = floor(argmax_h); 98 | int argmax_w_low = floor(argmax_w); 99 | int argmax_h_high = argmax_h_low + 1; 100 | int argmax_w_high = argmax_w_low + 1; 101 | 102 | float weight = 0; 103 | 104 | if (bp_dir == 0) { 105 | if (argmax_h_low >= 0 && argmax_w_low >= 0) 106 | weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low]; 107 | if (argmax_h_low >= 0 && argmax_w_high <= width - 1) 108 | weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high]; 109 | if (argmax_h_high <= height - 1 && argmax_w_low >= 0) 110 | weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low]; 111 | if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) 112 | weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high]; 113 | } else if (bp_dir == 1) { 114 | if (argmax_h_low >= 0 && argmax_w_low >= 0) 115 | weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low]; 116 | if (argmax_h_low >= 0 && argmax_w_high <= width - 1) 117 | weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high]; 118 | if (argmax_h_high <= height - 1 && argmax_w_low >= 0) 119 | weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low]; 120 | if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) 121 | weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high]; 122 | } 123 | 124 | return weight; 125 | } 126 | 127 | __global__ void add_bias_kernel( 128 | int n, 129 | float* data_out, 130 | const float* bias, 131 | const int out_channels, 132 | const int height_out, const int width_out 133 | ){ 134 | CUDA_KERNEL_LOOP(index, n){ 135 | const int c_col = (index / width_out / height_out) % out_channels; 136 | float value = bias[c_col]; 137 | atomicAdd(data_out + index, value); 138 | } 139 | } 140 | 141 | __global__ void calculate_dbias_kernel( 142 | int n, 143 | const float* grad_output, 144 | float* grad_bias, 145 | const int out_channels, 146 | const int height_out, const int width_out 147 | ){ 148 | CUDA_KERNEL_LOOP(index, n){ 149 | const int c_col = (index / width_out / height_out) % out_channels; 150 | float value = grad_output[index]; 151 | atomicAdd(grad_bias + c_col, value); 152 | } 153 | } 154 | 155 | __global__ void pad_conv2d_im2col_kernel( 156 | int n, 157 | const float* data_im, 158 | const float* data_rate, 159 | const int height, const int width, 160 | const int kernel_h, const int kernel_w, 161 | const int stride_h, const int stride_w, 162 | const int num_channels, 163 | const int height_col, const int width_col, 164 | float* data_col 165 | ){ 166 | CUDA_KERNEL_LOOP(index, n){ 167 | const int w_col = index % width_col; 168 | const int h_col = (index / width_col) % height_col; 169 | const int c_im = index / width_col / height_col; 170 | const int c_col = c_im * kernel_h * kernel_w; 171 | const float rate = data_rate[h_col * width_col + w_col]; 172 | 173 | const int h_in = h_col * stride_h + (int)((kernel_h - 1 ) / 2); 174 | const int w_in = w_col * stride_w + (int)((kernel_w - 1 ) / 2); 175 | 176 | float* data_col_ptr = data_col + (c_col * height_col + h_col) * width_col + w_col; 177 | const float* data_im_ptr = data_im + c_im * height * width; 178 | 179 | for (int i = - (int)(kernel_h / 2); i <= (int)(kernel_h / 2); ++i) { 180 | for (int j = - (int)(kernel_w / 2); j <= (int)(kernel_w / 2); ++j) { 181 | 182 | float val = static_cast(0); 183 | const float h_im = h_in + i * 1 * rate; 184 | const float w_im = w_in + j * 1 * rate; 185 | if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) { 186 | val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im); 187 | } 188 | *data_col_ptr = val; 189 | data_col_ptr += height_col * width_col; 190 | } 191 | } 192 | } 193 | } 194 | 195 | __global__ void pad_conv2d_col2im_coord_kernel( 196 | const int n, 197 | const float* data_col, 198 | const float* data_im, 199 | const float* data_rate, 200 | const int channels, const int height, const int width, 201 | const int kernel_h, const int kernel_w, 202 | const int stride_h, const int stride_w, 203 | const int height_col, const int width_col, 204 | float* grad_rate_map 205 | ){ 206 | CUDA_KERNEL_LOOP(index, n){ 207 | // the relative location in the filter 208 | const int j = (index / width_col / height_col) % kernel_w; 209 | const int i = (index / width_col / height_col / kernel_w) % kernel_h; 210 | const int c = index / width_col / height_col / kernel_w / kernel_h; 211 | int w_out = index % width_col; 212 | int h_out = (index / width_col) % height_col; 213 | // corrdinates of center of conv window in the image. 214 | const int h_in = h_out * stride_h + (int)((kernel_h - 1 ) / 2); 215 | const int w_in = w_out * stride_w + (int)((kernel_w - 1 ) / 2); 216 | const float rate = data_rate[h_out * width_col + w_out]; 217 | 218 | const float cur_inv_h_data = h_in + (i - (int)((kernel_h - 1 ) / 2)) * rate; 219 | const float cur_inv_w_data = w_in + (j - (int)((kernel_w - 1 ) / 2)) * rate; 220 | 221 | const float reletive_i = (i - (int)((kernel_h - 1 ) / 2)); 222 | const float reletive_j = (j - (int)((kernel_w - 1 ) / 2)); 223 | if (reletive_i != 0 || reletive_j != 0){ 224 | float val_h = 0; 225 | float val_w = 0; 226 | float h_weight = dmcn_get_coordinate_weight( 227 | cur_inv_h_data, cur_inv_w_data, 228 | height, width, 229 | data_im + c * height * width, 230 | width, 231 | 0); 232 | float w_weight = dmcn_get_coordinate_weight( 233 | cur_inv_h_data, cur_inv_w_data, 234 | height, width, 235 | data_im + c * height * width, 236 | width, 237 | 1); 238 | 239 | val_h = (h_weight) * data_col[index]; 240 | val_w = (w_weight) * data_col[index]; 241 | 242 | float gradient = 0; 243 | float tmp = val_h * reletive_i + val_w * reletive_j; 244 | gradient = tmp / std::sqrt(float(reletive_i * reletive_i + reletive_j * reletive_j)); 245 | atomicAdd(grad_rate_map + h_out * width_col + w_out, gradient); 246 | } 247 | } 248 | } 249 | 250 | __global__ void pad_conv2d_col2im_kernel( 251 | const int n, 252 | const float* data_col, 253 | const float* data_rate, 254 | const int channels, const int height, const int width, 255 | const int kernel_h, const int kernel_w, 256 | const int stride_h, const int stride_w, 257 | const int height_col, const int width_col, 258 | float* grad_im 259 | ){ 260 | CUDA_KERNEL_LOOP(index, n){ 261 | // the relative location in the filter 262 | const int j = (index / width_col / height_col) % kernel_w; 263 | const int i = (index / width_col / height_col / kernel_w) % kernel_h; 264 | const int c = index / width_col / height_col / kernel_w / kernel_h; // which channel 265 | int w_out = index % width_col; 266 | int h_out = (index / width_col) % height_col; 267 | const int h_in = h_out * stride_h + (int)((kernel_h - 1 ) / 2); 268 | const int w_in = w_out * stride_w + (int)((kernel_w - 1 ) / 2); 269 | const float rate = data_rate[h_out * width_col + w_out]; 270 | const float cur_inv_h_data = h_in + (i - (int)((kernel_h - 1 ) / 2)) * rate; 271 | const float cur_inv_w_data = w_in + (j - (int)((kernel_w - 1 ) / 2)) * rate; 272 | const int cur_h = (int)cur_inv_h_data; 273 | const int cur_w = (int)cur_inv_w_data; 274 | const float cur_top_grad = data_col[index]; 275 | for (int dy = 0; dy <= 1; dy++) { 276 | for (int dx = 0; dx <= 1; dx++) { 277 | if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && cur_w + dx < width) 278 | { 279 | int cur_bottom_grad_pos = (c * height + cur_h + dy) * width + cur_w + dx; 280 | float weight = dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width); 281 | atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); 282 | } 283 | } 284 | } 285 | } 286 | } 287 | 288 | void pad_conv2d_im2col(cudaStream_t stream, 289 | const float* data_im, 290 | const float* data_rate, 291 | const int in_channels, const int height, const int width, 292 | const int kernel_h, const int kernel_w, 293 | // const int pad_h, const int pad_w, 294 | const int stride_h, const int stride_w, 295 | // const int dilation_h, const int dilation_w, 296 | const int height_out, const int width_out, 297 | float* data_col){ 298 | int num_kernels = in_channels * height_out * width_out; 299 | pad_conv2d_im2col_kernel<<>>( 300 | num_kernels, 301 | data_im, 302 | data_rate, 303 | height, width, 304 | kernel_h, kernel_w, 305 | stride_h, stride_w, 306 | in_channels, 307 | height_out, width_out, 308 | data_col 309 | ); 310 | } 311 | 312 | void pad_conv2d_col2im_coord(cudaStream_t stream, 313 | const float* data_col, const float* data_im, const float* data_rate, 314 | const int in_channels, const int height, const int width, 315 | const int kernel_h, const int kernel_w, 316 | const int stride_h, const int stride_w, 317 | const int height_col, const int width_col, 318 | float* grad_rate_map){ 319 | int num_kernels = in_channels * kernel_h * kernel_w * height_col * width_col; 320 | pad_conv2d_col2im_coord_kernel<<>>( 321 | num_kernels, 322 | data_col, 323 | data_im, 324 | data_rate, 325 | in_channels, height, width, 326 | kernel_h, kernel_w, 327 | stride_h, stride_w, 328 | height_col, width_col, 329 | grad_rate_map 330 | ); 331 | } 332 | 333 | void pad_conv2d_col2im(cudaStream_t stream, 334 | const float* data_col, const float* data_rate, 335 | const int in_channels, const int height, const int width, 336 | const int kernel_h, const int kernel_w, 337 | const int stride_h, const int stride_w, 338 | const int height_out, const int width_out, 339 | float* grad_im){ 340 | int num_kernels = in_channels * kernel_h * kernel_w * height_out * width_out; 341 | pad_conv2d_col2im_kernel<<>>( 342 | num_kernels, 343 | data_col, 344 | data_rate, 345 | in_channels, height, width, 346 | kernel_h, kernel_w, 347 | stride_h, stride_w, 348 | height_out, width_out, 349 | grad_im 350 | ); 351 | } 352 | 353 | void add_bias(cudaStream_t stream, 354 | float* data_out, 355 | const float* bias, 356 | const int out_channels, 357 | const int height_out, const int width_out 358 | ){ 359 | int num_kernels = out_channels * height_out * width_out; 360 | add_bias_kernel<<>>( 361 | num_kernels, 362 | data_out, 363 | bias, 364 | out_channels, 365 | height_out, width_out 366 | ); 367 | } 368 | 369 | void calculate_dbias(cudaStream_t stream, 370 | const float* grad_output, 371 | float* grad_bias, 372 | const int out_channels, 373 | const int height_out, const int width_out 374 | ){ 375 | int num_kernels = out_channels * height_out * width_out; 376 | calculate_dbias_kernel<<>>( 377 | num_kernels, 378 | grad_output, 379 | grad_bias, 380 | out_channels, 381 | height_out, width_out 382 | ); 383 | } 384 | --------------------------------------------------------------------------------