├── out_models
    └── readme.md
├── PytorchCudaOpExtension
    ├── adaptive_sigmoid
    │   ├── adaptive_sigmoid.egg-info
    │   │   ├── dependency_links.txt
    │   │   ├── top_level.txt
    │   │   ├── PKG-INFO
    │   │   └── SOURCES.txt
    │   ├── dist
    │   │   └── adaptive_sigmoid-0.0.0-py3.7-linux-x86_64.egg
    │   ├── build
    │   │   └── lib.linux-x86_64-3.7
    │   │   │   └── adaptive_sigmoid_gpu.cpython-37m-x86_64-linux-gnu.so
    │   ├── setup.py
    │   ├── adaptive_sigmoid.h
    │   ├── adaptive_sigmoid_wrapper.py
    │   ├── adaptive_sigmoid.cpp
    │   └── adaptive_sigmoid_cuda.cu
    └── perspective_aware_conv2d
    │   ├── pad_conv2d.egg-info
    │       ├── dependency_links.txt
    │       ├── top_level.txt
    │       ├── SOURCES.txt
    │       └── PKG-INFO
    │   ├── dist
    │       └── pad_conv2d-0.0.0-py3.7-linux-x86_64.egg
    │   ├── build
    │       └── lib.linux-x86_64-3.7
    │       │   └── pad_conv2d_gpu.cpython-37m-x86_64-linux-gnu.so
    │   ├── setup.py
    │   ├── pad_conv2d.h
    │   ├── pad_conv2d_wrapper.py
    │   ├── pad_conv2d.cpp
    │   └── pad_conv2d_cuda.cu
├── data
    └── readme.md
├── download_models.sh
├── SHA_test.sh
├── options
    ├── train_options.py
    ├── test_options.py
    └── base_options.py
├── README.md
├── net
    ├── BasicConv2d.py
    ├── networks.py
    ├── CSRNet.py
    ├── CSRPersNet_onlyBack_crop.py
    ├── CSRPersNet.py
    └── CSRPersNet_crop.py
├── LICENSE
├── metrics.py
├── op_wrapper
    ├── adaptive_sigmoid_wrapper.py
    └── pad_conv2d_wrapper.py
├── config.py
├── test.py
├── utils.py
├── eval
    └── Estimator.py
├── Dataset
    └── DatasetConstructor.py
└── generate_map.py


/out_models/readme.md:
--------------------------------------------------------------------------------
1 | Put models here
2 | 


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/adaptive_sigmoid/adaptive_sigmoid.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/perspective_aware_conv2d/pad_conv2d.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/perspective_aware_conv2d/pad_conv2d.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | pad_conv2d_gpu
2 | 


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/adaptive_sigmoid/adaptive_sigmoid.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | adaptive_sigmoid_gpu
2 | 


--------------------------------------------------------------------------------
/data/readme.md:
--------------------------------------------------------------------------------
1 | Put data here, refer to `config.py` to make sure that data files are correctly arranged.
2 | 


--------------------------------------------------------------------------------
/download_models.sh:
--------------------------------------------------------------------------------
1 | wget -c https://drive.google.com/file/d/1GR0gmoJvNlv5a8o0D9ucfraiNYrQV8Ip/view?usp=sharing
2 | 


--------------------------------------------------------------------------------
/SHA_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python test.py --dataset_name='SHA' --mode='crop' --nThreads=1 --gpu_ids='0' --batch_size=1 --net_name='csrpersp_crop' --test_model_name='model_path'
3 | 


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/adaptive_sigmoid/dist/adaptive_sigmoid-0.0.0-py3.7-linux-x86_64.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zhaoyi-Yan/PFDNet/HEAD/PytorchCudaOpExtension/adaptive_sigmoid/dist/adaptive_sigmoid-0.0.0-py3.7-linux-x86_64.egg


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/perspective_aware_conv2d/dist/pad_conv2d-0.0.0-py3.7-linux-x86_64.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zhaoyi-Yan/PFDNet/HEAD/PytorchCudaOpExtension/perspective_aware_conv2d/dist/pad_conv2d-0.0.0-py3.7-linux-x86_64.egg


--------------------------------------------------------------------------------
/options/train_options.py:
--------------------------------------------------------------------------------
1 | from .base_options import BaseOptions
2 | 
3 | class TrainOptions(BaseOptions):
4 |     def initialize(self, parser):
5 |         parser = BaseOptions.initialize(self, parser)
6 | 
7 |         self.isTrain = True
8 |         return parser


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/perspective_aware_conv2d/pad_conv2d.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | pad_conv2d.cpp
2 | pad_conv2d_cuda.cu
3 | setup.py
4 | pad_conv2d.egg-info/PKG-INFO
5 | pad_conv2d.egg-info/SOURCES.txt
6 | pad_conv2d.egg-info/dependency_links.txt
7 | pad_conv2d.egg-info/top_level.txt


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/adaptive_sigmoid/build/lib.linux-x86_64-3.7/adaptive_sigmoid_gpu.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zhaoyi-Yan/PFDNet/HEAD/PytorchCudaOpExtension/adaptive_sigmoid/build/lib.linux-x86_64-3.7/adaptive_sigmoid_gpu.cpython-37m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/perspective_aware_conv2d/build/lib.linux-x86_64-3.7/pad_conv2d_gpu.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zhaoyi-Yan/PFDNet/HEAD/PytorchCudaOpExtension/perspective_aware_conv2d/build/lib.linux-x86_64-3.7/pad_conv2d_gpu.cpython-37m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/perspective_aware_conv2d/pad_conv2d.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: pad-conv2d
 3 | Version: 0.0.0
 4 | Summary: UNKNOWN
 5 | Home-page: UNKNOWN
 6 | Author: UNKNOWN
 7 | Author-email: UNKNOWN
 8 | License: UNKNOWN
 9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 | 


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/adaptive_sigmoid/adaptive_sigmoid.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: adaptive-sigmoid
 3 | Version: 0.0.0
 4 | Summary: UNKNOWN
 5 | Home-page: UNKNOWN
 6 | Author: UNKNOWN
 7 | Author-email: UNKNOWN
 8 | License: UNKNOWN
 9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 | 


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/adaptive_sigmoid/adaptive_sigmoid.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | adaptive_sigmoid.cpp
2 | adaptive_sigmoid_cuda.cu
3 | setup.py
4 | adaptive_sigmoid.egg-info/PKG-INFO
5 | adaptive_sigmoid.egg-info/SOURCES.txt
6 | adaptive_sigmoid.egg-info/dependency_links.txt
7 | adaptive_sigmoid.egg-info/top_level.txt


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/adaptive_sigmoid/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from torch.utils.cpp_extension import CppExtension, BuildExtension, CUDAExtension
3 | setup(name='adaptive_sigmoid', ext_modules=[CUDAExtension('adaptive_sigmoid_gpu',['adaptive_sigmoid.cpp', 'adaptive_sigmoid_cuda.cu']),], cmdclass={'build_ext': BuildExtension})


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/perspective_aware_conv2d/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from torch.utils.cpp_extension import CppExtension, BuildExtension, CUDAExtension
3 | setup(name='pad_conv2d',
4 |       ext_modules=[CUDAExtension('pad_conv2d_gpu', ['pad_conv2d.cpp', 'pad_conv2d_cuda.cu']),],
5 |       cmdclass={'build_ext': BuildExtension})
6 | 


--------------------------------------------------------------------------------
/options/test_options.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .base_options import BaseOptions
 3 | 
 4 | 
 5 | class TestOptions(BaseOptions):
 6 |     def initialize(self, parser):
 7 |         parser = BaseOptions.initialize(self, parser)
 8 |         # TODO: implemented me
 9 |         parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.')
10 |         self.isTrain = False
11 | 
12 |         return parser


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PFDNet
 2 | 
 3 | # Data preparation:
 4 | Download the ShanghaiTech dataset, then you also need to generate density map files via `generate_map.py`. And put them in `./data`, then download perspective map from paper `Revisiting Perspective Information for Efficient Crowd Counting`.
 5 | And put the corresonding files inside `./data`. You need to move coresponding files under the guidance of `config.py`. It is mentioned, that you need upgrade to `mat` files of perspective maps to v7.3 yourself.
 6 | Then `h5py` can read the mat files correctly. 
 7 | 
 8 | # Download model
 9 | ```
10 | bash download_models.sh
11 | ```
12 | # Test
13 | Pytorch version: 1.0 or 1.1
14 | Install the cuda extension, and `sh SHA_test.sh`.
15 | 


--------------------------------------------------------------------------------
/net/BasicConv2d.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class BasicConv2d(nn.Module):
 5 |     def __init__(self, 
 6 |                  in_channels, 
 7 |                  out_channels, 
 8 |                  kernel_size, 
 9 |                  stride, 
10 |                  pad, 
11 |                  if_Bn=False,
12 |                  activation=nn.ReLU(inplace=True)):
13 |         super(BasicConv2d, self).__init__()
14 |         self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=pad)
15 |         self.if_Bn = if_Bn
16 |         if self.if_Bn:
17 |             self.Bn = nn.BatchNorm2d(out_channels)
18 |         self.activation = activation
19 |     
20 |     def forward(self, x):
21 |         x = self.conv2d(x)
22 |         if self.if_Bn:
23 |             x = self.Bn(x)
24 |         if not(self.activation == None):
25 |             x = self.activation(x)
26 |         return x
27 | 


--------------------------------------------------------------------------------
/net/networks.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.nn import init
 4 | import functools
 5 | from net.CSRPersNet_crop import CSRPersNet_BN
 6 | from net.CSRNet import CSRNet
 7 | 
 8 | 
 9 | def init_net(net, init_type='normal', init_gain=0.01, gpu_ids=[]):
10 |     if len(gpu_ids) > 0:
11 |         assert(torch.cuda.is_available())
12 |         net.to(gpu_ids[0])
13 |         net = torch.nn.DataParallel(net, gpu_ids)  # multi-GPUs
14 |     # Has been initlized inside
15 |     return net
16 | 
17 | def define_net(opt):
18 |     net_name = opt.net_name
19 |     if net_name == 'csrnet':
20 |         net = CSRNet()
21 |     elif net_name == 'csrpersp_crop':
22 |         net = CSRPersNet_BN(load_path=None,
23 |                         updates_signal=[True, True, True, True], is_relu=False,
24 |                         sigma=[opt.alpha, opt.beta, opt.gamma, opt.theta])
25 |     else:
26 |         raise NotImplementedError('Unrecognized model: '+net_name)
27 |     return net
28 | 


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/adaptive_sigmoid/adaptive_sigmoid.h:
--------------------------------------------------------------------------------
 1 | #ifndef ADAPTIVE_SIGMOID
 2 | #define ADAPTIVE_SIGMOID
 3 | #include <THC/THC.h>
 4 | #include <cuda.h>
 5 | #include <cuda_runtime.h>
 6 | #include <vector>
 7 | #include <cmath>
 8 | 
 9 | extern THCState *state;
10 | typedef std::vector<int> TShape;
11 | 
12 | void adaptive_sigmoid_fucntion(
13 |     cudaStream_t stream,
14 |     const float* data_in,
15 |     const float* params,
16 |     float* output,
17 |     int channels, int height, int width
18 | );
19 | 
20 | void adaptive_sigmoid_input_grad(
21 |     cudaStream_t stream,
22 |     const float* data_in,
23 |     const float* grad_outputs,
24 |     const float* params,
25 |     float* grad_input,
26 |     int channels, int height, int width
27 | );
28 | 
29 | void adaptive_sigmoid_params_grad(
30 |     cudaStream_t stream,
31 |     const float* data_in,
32 |     const float* grad_outputs,
33 |     const float* params,
34 |     float* grad_params,
35 |     int channels, int height, int width,
36 |     bool alpha_update, 
37 |     bool beta_update,
38 |     bool gamma_update,
39 |     bool theta_update
40 | );
41 | 
42 | #endif


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Zhaoyi-Yan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/adaptive_sigmoid/adaptive_sigmoid_wrapper.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.autograd import Function
 5 | import adaptive_sigmoid_gpu as adaptive_sigmoid
 6 | 
 7 | class AdaptiveSigmoidFunction(Function):
 8 |     @staticmethod
 9 |     def forward(ctx, *args):
10 |         if len(args) != 2:
11 |             print("wrong input parameters number, check the input")
12 |             return
13 |         input = args[0]
14 |         params = args[1]
15 |         output = adaptive_sigmoid.forward(input, params)
16 |         ctx.save_for_backward(input, params)
17 |         return output
18 | 
19 |     @staticmethod
20 |     def backward(ctx, *grad_outputs):
21 |         if len(grad_outputs) != 1:
22 |             print("Wrong output number, check your output")
23 |             return
24 |         input, params = ctx.saved_tensors
25 |         grad_input, grad_weight= adaptive_sigmoid.backward(input, params, grad_outputs[0])
26 |         return grad_input, grad_weight
27 |     
28 | class AdaptiveSigmoid(nn.Module):
29 |     def __init__(self, alpha, beta, gamma, theta):
30 |         super(AdaptiveSigmoid, self).__init__()
31 |         self.params = nn.Parameter(torch.FloatTensor([alpha, beta, gamma, theta]))
32 | #         self.params.register_hook(print)
33 |         
34 |     def forward(self, x):
35 |         return AdaptiveSigmoidFunction.apply(x, self.params)


--------------------------------------------------------------------------------
/metrics.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import sys
 4 | from functools import reduce
 5 | 
 6 | class JointLoss(nn.Module):
 7 |     def __init__(self, alpha, beta):
 8 |         super(JointLoss, self).__init__()
 9 |         self.MSELoss = nn.MSELoss(size_average=False)
10 |         self.BCELoss = nn.BCELoss(size_average=True)
11 |         self.alpha = alpha
12 |         self.beta = beta
13 |     
14 |     def forward(self, x, gt_map, target_map):
15 |         mse = self.MSELoss(x, gt_map) * self.alpha
16 |         bce = self.BCELoss(x, target_map) * self.beta
17 | #         sys.stdout.write("mse loss = {}, bce loss = {}\r".format(mse, bce))
18 |         sys.stdout.flush()
19 |         return  mse + bce
20 |     
21 | class MSEScalarLoss(nn.Module):
22 |     def __init__(self):
23 |         super(MSEScalarLoss, self).__init__()
24 |     
25 |     def forward(self, x, gt_map):
26 |         return torch.pow(x.sum() - gt_map.sum(), 2) / (reduce(lambda a,b:a * b, x.shape))
27 |         
28 | class AEBatch(nn.Module):
29 |     def __init__(self):
30 |         super(AEBatch, self).__init__()
31 | 
32 |     def forward(self, estimated_density_map, gt_num):
33 |         return torch.abs(torch.sum(estimated_density_map, dim=(1, 2, 3)) - gt_num)
34 | 
35 | 
36 | class SEBatch(nn.Module):
37 |     def __init__(self):
38 |         super(SEBatch, self).__init__()
39 | 
40 |     def forward(self, estimated_density_map, gt_num):
41 |         return torch.pow(torch.sum(estimated_density_map, dim=(1, 2, 3)) - gt_num, 2)
42 | 


--------------------------------------------------------------------------------
/op_wrapper/adaptive_sigmoid_wrapper.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | import adaptive_sigmoid_gpu as adaptive_sigmoid
 5 | 
 6 | class AdaptiveSigmoidFunction(Function):
 7 |     @staticmethod
 8 |     def forward(ctx, *args):
 9 |         if len(args) != 3:
10 |             print("wrong input parameters number, check the input")
11 |             return
12 |         input = args[0]
13 |         params = args[1]
14 |         ctx.updates_signal = args[2]
15 |         output = adaptive_sigmoid.forward(input, params)
16 |         ctx.save_for_backward(input, params)
17 |         return output
18 | 
19 |     @staticmethod
20 |     def backward(ctx, *grad_outputs):
21 |         if len(grad_outputs) != 1:
22 |             print("Wrong output number, check your output")
23 |             return
24 |         input, params = ctx.saved_tensors
25 |         grad_copy = grad_outputs[0].clone()
26 |         grad_input, grad_weight= adaptive_sigmoid.backward(input, params, grad_copy, *ctx.updates_signal)
27 |         return grad_input, grad_weight, None
28 |     
29 | class AdaptiveSigmoid(nn.Module):
30 |     def __init__(self, **kwargs):
31 |         super(AdaptiveSigmoid, self).__init__()
32 |         self.params = nn.Parameter(torch.FloatTensor(kwargs['sigma']))
33 |         self.updates_signal = kwargs['updates_signal']
34 | #         self.params.register_hook(print)
35 |         
36 |     def forward(self, x):
37 |         return AdaptiveSigmoidFunction.apply(x, self.params, self.updates_signal)


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import util.utils as util
 4 | 
 5 | class config(object):
 6 |     def __init__(self, opt):
 7 |         self.opt = opt
 8 |         self.min_mae = 10240000
 9 |         self.min_loss = 10240000
10 |         self.dataset_name = opt.dataset_name
11 |         self.batch_size = opt.batch_size
12 |         self.device = torch.device('cuda:{}'.format(opt.gpu_ids[0])) if opt.gpu_ids else torch.device('cpu')
13 |         self.model_save_path = os.path.join(opt.checkpoints_dir, opt.name, opt.dataset_name) # path of saving model
14 |         self.mode = opt.mode
15 |         prefix_path = opt.prefix_path # prefix path of training path
16 |         if self.dataset_name == "SHA":
17 |             self.eval_num = 182
18 |             self.train_num = 300
19 |             
20 |             self.train_gt_map_path = prefix_path + "/part_A_final/train_data/gt_map_sigma=4_k=7"
21 |             self.train_img_path = prefix_path + "/part_A_final/train_data/images"
22 |             self.train_pers_path = prefix_path + "/part_A_final/train_data/perspective_gt"
23 |             self.eval_gt_map_path = prefix_path + "/part_A_final/test_data/gt_map_sigma=4_k=7"
24 |             self.eval_img_path = prefix_path + "/part_A_final/test_data/images"
25 |             self.eval_gt_path = prefix_path + "/part_A_final/test_data/ground_truth"
26 |             self.eval_pers_path = prefix_path + "/part_A_final/test_data/perspective_gt"
27 |             
28 |         else:
29 |             raise NameError("Only SHA is released currently")
30 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | # config
 2 | import sys
 3 | import numpy as np
 4 | import torch
 5 | from config import config
 6 | import warnings
 7 | import time
 8 | from options.test_options import TestOptions
 9 | from Dataset.DatasetConstructor import EvalDatasetConstructor
10 | from eval.Estimator import Estimator
11 | from net.CSRPersNet import CSRPersNet
12 | import net.networks as networks
13 | 
14 | opt = TestOptions().parse()
15 | opt.nThreads = 1   # test code only supports nThreads = 1
16 | opt.batch_size = 1  # test code only supports batchSize = 1
17 | opt.is_flip = 0  # no flip
18 | 
19 | setting = config(opt)
20 | 
21 | 
22 | eval_dataset = EvalDatasetConstructor(
23 |     setting.eval_num,
24 |     setting.eval_img_path,
25 |     setting.eval_gt_map_path,
26 |     setting.eval_pers_path,
27 |     mode=setting.mode,
28 |     dataset_name=setting.dataset_name,
29 |     device=setting.device)
30 | eval_loader = torch.utils.data.DataLoader(dataset=eval_dataset, batch_size=1)
31 | 
32 | # model construct
33 | net = networks.define_net(opt)
34 | net = networks.init_net(net, gpu_ids=opt.gpu_ids)
35 | 
36 | net.module.load_state_dict(torch.load(opt.test_model_name, map_location=str(setting.device)))
37 | criterion = torch.nn.MSELoss(reduction='sum').to(setting.device)
38 | estimator = Estimator(setting, eval_loader, criterion=criterion)
39 | 
40 | validate_MAE, validate_RMSE, validate_loss, time_cost = estimator.evaluate(net) 
41 | sys.stdout.write('loss = {}, eval_mae = {}, eval_rmse = {}, time cost eval = {}s\n'
42 |                 .format(validate_loss, validate_MAE, validate_RMSE, time_cost))
43 | sys.stdout.flush()
44 | 


--------------------------------------------------------------------------------
/net/CSRNet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torchvision import models
 3 | import torch.nn.functional as functional
 4 | import time
 5 | import torch
 6 | 
 7 | 
 8 | class CSRNet(nn.Module):
 9 |     def __init__(self):
10 |         super(CSRNet, self).__init__()
11 |         self.backend_feat = [(512, 2), (512, 2), (512, 2), (256, 2), (128, 2), (64, 2)]
12 |         self.front_end = nn.Sequential(*(list(list(models.vgg16_bn(True).children())[0].children())[0:33]))
13 |         self.back_end = make_layers(self.backend_feat, in_channels=512, batch_norm=True)
14 |         self.output_layer = nn.Conv2d(64, 1, kernel_size=1)
15 | 
16 |         for m in self.output_layer.modules():
17 |             if isinstance(m, nn.Conv2d):
18 |                 nn.init.normal_(m.weight, std=0.01)
19 |                 if m.bias is not None:
20 |                     nn.init.constant_(m.bias, 0)
21 |             elif isinstance(m, nn.BatchNorm2d):
22 |                 nn.init.constant_(m.weight, 1)
23 |                 nn.init.constant_(m.bias, 0)
24 | 
25 |     def forward(self, x):
26 |         img_shape = x.shape
27 |         front_end = self.front_end(x)
28 |         back_end = self.back_end(front_end)
29 |         output = self.output_layer(back_end)
30 |         output = functional.interpolate(output, scale_factor=4, mode='bilinear', align_corners=False)
31 |         return output
32 | 
33 | 
34 | def make_layers(cfg, in_channels, batch_norm=False):
35 |     layers = []
36 |     for v, atrous in cfg:
37 |         if v == 'M':
38 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
39 |         else:
40 |             conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=atrous, dilation=atrous)
41 |             if batch_norm:
42 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
43 |             else:
44 |                 layers += [conv2d, nn.ReLU(inplace=True)]
45 |             in_channels = v
46 |     return nn.Sequential(*layers)
47 | 


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/perspective_aware_conv2d/pad_conv2d.h:
--------------------------------------------------------------------------------
 1 | #ifndef PAD_CONVOLUTION
 2 | #define PAD_CONVOLUTION
 3 | #include <THC/THC.h>
 4 | #include <cuda.h>
 5 | #include <cuda_runtime.h>
 6 | #include <vector>
 7 | #include <cmath>
 8 | extern THCState *state;
 9 | typedef std::vector<int> TShape;
10 | 
11 | inline int ProdShape(const TShape &shape, int start, int end) {
12 |     int res = 1;
13 |     for(int i=start; i<end; i++) {
14 |         res*=shape[i];
15 |     }
16 |     return res;
17 | }
18 | 
19 | inline TShape SubVector(const TShape &shape, int start, int end) {
20 |     TShape res;
21 |     for(int i=start;i<end;i++){
22 |         res.push_back(shape[i]);
23 |     }
24 |     return res;
25 | }
26 | 
27 | void pad_conv2d_im2col(cudaStream_t stream,
28 |     const float* data_im, const float* rate,
29 |     const int in_channels, const int height, const int width,
30 |     const int kernel_h, const int kernel_w,
31 | //     const int pad_h, const int pad_w,
32 |     const int stride_h, const int stride_w,
33 | //     const int dilation_h, const int dilation_w,
34 |     const int height_out, const int width_out,
35 |     float* data_col);
36 | 
37 | void pad_conv2d_col2im_coord(cudaStream_t stream,
38 |     const float* data_col, const float* data_im, const float* data_rate,
39 |     const int in_channels, const int height, const int width,
40 |     const int kernel_h, const int kernel_w,
41 |     const int stride_h, const int stride_w,
42 |     const int height_col, const int width_col,
43 |     float* grad_rate_map);
44 | 
45 | void pad_conv2d_col2im(cudaStream_t stream,
46 |     const float* data_col, const float* data_rate,
47 |     const int in_channels, const int height, const int width,
48 |     const int kernel_h, const int kernel_w,
49 |     const int stride_h, const int stride_w,
50 |     const int height_out, const int width_out,
51 |     float* grad_im);
52 | 
53 | void add_bias(cudaStream_t stream,
54 |     float* data_out,
55 |     const float* bias,
56 |     const int out_channels,
57 |     const int height_out, const int width_out
58 |     );
59 | 
60 | void calculate_dbias(cudaStream_t stream,
61 |     const float* grad_output,
62 |     float* grad_bias,
63 |     const int out_channels,
64 |     const int height_out, const int width_out
65 |     );
66 | 
67 | #endif
68 | 


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/perspective_aware_conv2d/pad_conv2d_wrapper.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | from torch.nn import Module
 5 | import pad_conv2d_gpu as pad_conv2d
 6 | from adaptive_sigmoid.adaptive_sigmoid_wrapper import AdaptiveSigmoid
 7 | 
 8 | 
 9 | class PerspectiveDilatedConv2dFunction(Function):
10 |     @staticmethod
11 |     def forward(ctx, *args):
12 |         if len(args) != 6:
13 |             print("wrong input parameters number, check the input")
14 |             return
15 |         input = args[0]
16 |         weights = args[1]
17 |         rate_map = args[2]
18 |         bias = args[3]
19 |         ctx.stride_h = args[4]
20 |         ctx.stride_w = args[5]
21 |         output = pad_conv2d.forward(input, weights, rate_map, bias, ctx.stride_h, ctx.stride_w)
22 |         ctx.save_for_backward(input, weights, rate_map, bias)
23 |         return output
24 | 
25 |     @staticmethod
26 |     def backward(ctx, *grad_outputs):
27 |         if len(grad_outputs) != 1:
28 |             print("Wrong output number, check your output")
29 |             return
30 |         input, weights, rate_map, bias = ctx.saved_tensors
31 |         grad_input, grad_weight, grad_rate_map, grad_bias = pad_conv2d.backward(input, weights, rate_map, bias, grad_outputs[0], ctx.stride_h, ctx.stride_w)
32 |         return grad_input, grad_weight, grad_rate_map, grad_bias, None, None
33 | 
34 | 
35 | class PerspectiveDilatedConv2dLayer(Module):
36 |     def __init__(self, in_channels, out_channels, kernel_size, stride_h, stride_w):
37 |         super(PerspectiveDilatedConv2dLayer, self).__init__()
38 |         self.stride_h = stride_h
39 |         self.stride_w = stride_w
40 |         self.weight = nn.Parameter(torch.zeros(out_channels, in_channels, kernel_size, kernel_size, dtype=torch.float32))
41 |         self.bias = nn.Parameter(torch.zeros(out_channels, dtype=torch.float32))
42 |         nn.init.xavier_uniform_(self.weight, gain=1)
43 | 
44 |     def forward(self, inputs, rate_map):
45 |         return PerspectiveDilatedConv2dFunction.apply(inputs, self.weight, rate_map, self.bias, self.stride_h, self.stride_w)
46 | 
47 |     
48 | class BasicPerspectiveDilatedConv2D(Module):
49 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, *args):
50 |         super(BasicPerspectiveDilatedConv2D, self).__init__()
51 |         self.rate_map_generator = AdaptiveSigmoid(args[0], args[1], args[2], args[3])
52 | #         self.rate_map_generator.params.register_hook(print)
53 |         
54 |         self.stride = 1
55 |         self.pad = (kernel_size // 2)
56 |         self.perspective_dilated_conv2d = PerspectiveDilatedConv2dLayer(in_channels, out_channels, kernel_size, self.stride, self.stride)
57 |         
58 |     def forward(self, x, perspective):
59 |         rate_map = self.rate_map_generator(perspective)
60 | #         rate_map = self.rate_map_generator(x)
61 |         x = torch.nn.functional.pad(x, [self.pad, self.pad, self.pad, self.pad ])
62 |         return self.perspective_dilated_conv2d(x, rate_map)
63 | 
64 | 


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/adaptive_sigmoid/adaptive_sigmoid.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | #include "adaptive_sigmoid.h"
 3 | 
 4 | at::Tensor adaptive_sigmoid_forward(
 5 |     at::Tensor input,
 6 |     at::Tensor params
 7 | ){
 8 |     int batch = input.size(0);
 9 |     int channels = input.size(1);
10 |     int height = input.size(2);
11 |     int width = input.size(3);
12 |     
13 |     auto output = at::empty({batch, channels, height, width}, input.options());
14 |     
15 |     auto input_ptr = input.data<float>();
16 |     auto output_ptr = output.data<float>();
17 |     auto params_ptr = params.data<float>();
18 |     
19 |     for(int i = 0; i<batch; i++){
20 |         auto input_instance_ptr = input_ptr + i * channels * height * width;
21 |         auto output_instance_ptr = output_ptr + i * channels * height * width;
22 |         adaptive_sigmoid_fucntion(
23 |             THCState_getCurrentStream(state),
24 |             input_instance_ptr,
25 |             params_ptr,
26 |             output_instance_ptr,
27 |             channels, height, width
28 |         );
29 |     }
30 |     
31 |     return output;
32 | }
33 | 
34 | std::vector<at::Tensor> adaptive_sigmoid_backward(
35 |     at::Tensor input,
36 |     at::Tensor params,
37 |     at::Tensor grad_outputs,
38 |     bool alpha_update,
39 |     bool beta_update,
40 |     bool gamma_update,
41 |     bool theta_update
42 | ){
43 |     int batch = input.size(0);
44 |     int channels = input.size(1);
45 |     int height = input.size(2);
46 |     int width = input.size(3);
47 |     
48 |     auto grad_input = at::zeros_like(input);
49 |     auto grad_params = at::zeros_like(params);
50 |     
51 |     auto input_ptr = input.data<float>();
52 |     auto grad_output_ptr = grad_outputs.data<float>();
53 |     auto params_ptr = params.data<float>();
54 |     auto grad_input_ptr = grad_input.data<float>();
55 |     auto grad_params_ptr = grad_params.data<float>();
56 |     
57 |     for(int i = 0; i < batch; i++){
58 |         auto input_instance_ptr = input_ptr + i * channels * height * width;
59 |         auto grad_output_instance_ptr = grad_output_ptr + i * channels * height * width;
60 |         auto grad_input_instance_ptr = grad_input_ptr + i * channels * height * width;
61 |         adaptive_sigmoid_input_grad(
62 |             THCState_getCurrentStream(state),
63 |             input_instance_ptr,
64 |             grad_output_instance_ptr,
65 |             params_ptr,
66 |             grad_input_instance_ptr,
67 |             channels, height, width
68 |         );
69 |         
70 |         adaptive_sigmoid_params_grad(
71 |             THCState_getCurrentStream(state),
72 |             input_instance_ptr,
73 |             grad_output_instance_ptr,
74 |             params_ptr,
75 |             grad_params_ptr,
76 |             channels, height, width,
77 |             alpha_update, beta_update, gamma_update, theta_update
78 |         );
79 |     }
80 |     
81 |     return {grad_input, grad_params};
82 | }
83 | 
84 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){
85 |   m.def("forward", &adaptive_sigmoid_forward, "adaptive sigmoid forward (CUDA)");
86 |   m.def("backward", &adaptive_sigmoid_backward, "adaptive sigmoid backward (CUDA)");
87 | }


--------------------------------------------------------------------------------
/options/base_options.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import torch
 4 | import util.utils as util
 5 | 
 6 | class BaseOptions():
 7 |     def __init__(self):
 8 |         self.initialized = False
 9 | 
10 |     def initialize(self, parser):
11 |         parser.add_argument('--dataset_name', default='SHA', help='SHA|SHB|QNRF')
12 |         parser.add_argument('--test_model_name', default='', help='path of pretrained model')
13 |         parser.add_argument('--batch_size', type=int, default=1, help='input batch size')
14 |         parser.add_argument('--net_name', type=str, default='csrpersp', help='csrnet|csrpersp')
15 |         parser.add_argument('--mode', type=str, default='whole', help='whole|crop')
16 |         parser.add_argument('--prefix_path', type=str, default='./data', help='path of the dataset folder')
17 |         parser.add_argument('--name', type=str, default='Csrnet_persp', help='name of the experiment.s')
18 |         parser.add_argument('--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0  0,1,2, 0,2')
19 |         parser.add_argument('--alpha', type=float, default=1, help='alpha in adaptive sigmoid')
20 |         parser.add_argument('--beta', type=float, default=1, help='beta in adaptive sigmoid')
21 |         parser.add_argument('--gamma', type=float, default=1, help='gamma in adaptive sigmoid')
22 |         parser.add_argument('--theta', type=float, default=2, help='theta in adaptive sigmoid')
23 |         parser.add_argument('--nThreads', default=2, type=int, help='# threads for loading data')
24 |         parser.add_argument('--checkpoints_dir', type=str, default='./output', help='models are saved here')
25 |         self.initialized = True
26 |         return parser
27 | 
28 |     def gather_options(self, options=None):
29 |         # initialize parser with basic options
30 |         if not self.initialized:
31 |             parser = argparse.ArgumentParser(
32 |                 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
33 |             parser = self.initialize(parser)
34 | 
35 | 
36 |         self.parser = parser
37 |         if options == None:
38 |             return parser.parse_args()
39 |         else:
40 |             return parser.parse_args(options)
41 | 
42 |     def print_options(self, opt):
43 |         message = ''
44 |         message += '----------------- Options ---------------\n'
45 |         for k, v in sorted(vars(opt).items()):
46 |             comment = ''
47 |             default = self.parser.get_default(k)
48 |             if v != default:
49 |                 comment = '\t[default: %s]' % str(default)
50 |             message += '{:>25}: {:<30}{}\n'.format(str(k), str(v), comment)
51 |         message += '----------------- End -------------------'
52 |         print(message)
53 | 
54 |         # save to the disk
55 |         expr_dir = os.path.join(opt.checkpoints_dir, opt.name, opt.dataset_name)
56 |         util.mkdirs(expr_dir)
57 |         file_name = os.path.join(expr_dir, 'opt.txt')
58 |         with open(file_name, 'wt') as opt_file:
59 |             opt_file.write(message)
60 |             opt_file.write('\n')
61 | 
62 |     def parse(self, options=None):
63 | 
64 |         opt = self.gather_options(options=options)
65 |         opt.isTrain = self.isTrain   # train or test
66 | 
67 | 
68 |         self.print_options(opt)
69 | 
70 |         # set gpu ids
71 |         os.environ["CUDA_VISIBLE_DEVICES"]=opt.gpu_ids
72 |         str_ids = opt.gpu_ids.split(',')
73 |         opt.gpu_ids = []
74 |         for str_id in str_ids:
75 |             id = int(str_id)
76 |             if id >= 0:
77 |                 opt.gpu_ids.append(id)
78 |         # re-order gpu ids
79 |         opt.gpu_ids = [i.item() for i in torch.arange(len(opt.gpu_ids))]
80 |         if len(opt.gpu_ids) > 0:
81 |             torch.cuda.set_device(opt.gpu_ids[0])
82 | 
83 |         self.opt = opt
84 |         return self.opt
85 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import matplotlib.pyplot as plt
 5 | import torchvision.transforms as transforms
 6 | import cv2
 7 | import numpy as np
 8 | import scipy
 9 | 
10 | def show(origin_map, gt_map, predict, index):
11 |     figure, (origin, gt, pred) = plt.subplots(1, 3, figsize=(20, 4))
12 |     origin.imshow(origin_map)
13 |     origin.set_title("origin picture")
14 |     gt.imshow(gt_map, cmap=plt.cm.jet)
15 |     gt.set_title("gt map")
16 |     pred.imshow(predict, cmap=plt.cm.jet)
17 |     pred.set_title("prediction")
18 |     plt.suptitle(str(index) + "th sample")
19 |     plt.show()
20 |     plt.close()
21 | 
22 | 
23 | class HSI_Calculator(nn.Module):
24 |     def __init__(self):
25 |         super(HSI_Calculator, self).__init__()
26 | 
27 |     def forward(self, image):
28 |         image = transforms.ToTensor()(image)
29 |         I = torch.mean(image)
30 |         Sum = image.sum(0)
31 |         Min = 3 * image.min(0)[0]
32 |         S = (1 - Min.div(Sum.clamp(1e-6))).mean()
33 |         numerator = (2 * image[0] - image[1] - image[2]) / 2
34 |         denominator = ((image[0] - image[1]) ** 2 + (image[0] - image[2]) * (image[1] - image[2])).sqrt()
35 |         theta = (numerator.div(denominator.clamp(1e-6))).clamp(-1 + 1e-6, 1 - 1e-6).acos()
36 |         logistic_matrix = (image[1] - image[2]).ceil()
37 |         H = (theta * logistic_matrix + (1 - logistic_matrix) * (360 - theta)).mean() / 360
38 |         return H, S, I
39 | 
40 | 
41 | def eval_steps_adaptive(var):
42 |     return {
43 |             400 * 100: 5000,
44 |             400 * 500: 2000,
45 |             400 * 1000: 1000,
46 |     }.get(var, 1600)
47 | 
48 | 
49 | def get_density_map_gaussian(N, M, points, adaptive_kernel=False, fixed_value=15):
50 |     density_map = np.zeros([N, M], dtype=np.float32)
51 |     h, w = density_map.shape[:2]
52 |     h = h // 8
53 |     w = w // 8
54 |     num_gt = np.squeeze(points).shape[0]
55 |     if num_gt == 0:
56 |         return density_map
57 | 
58 |     if adaptive_kernel:
59 |         # referred from https://github.com/vlad3996/computing-density-maps/blob/master/make_ShanghaiTech.ipynb
60 |         leafsize = 2048
61 |         tree = scipy.spatial.KDTree(points.copy(), leafsize=leafsize)
62 |         distances = tree.query(points, k=4)[0]
63 | 
64 |     for idx, p in enumerate(points):
65 |         p = np.round(p).astype(int)
66 |         p[0], p[1] = min(h-1, p[1] // 8), min(w-1, p[0] // 8)
67 |         if num_gt > 1:
68 |             if adaptive_kernel:
69 |                 sigma = int(np.sum(distances[idx][1:4]) // 3 * 0.3)
70 |             else:
71 |                 sigma = fixed_value
72 |         else:
73 |             sigma = fixed_value  # np.average([h, w]) / 2. / 2.
74 |         sigma = max(1, sigma)
75 | 
76 |         gaussian_radius = sigma * 3
77 |         gaussian_map = np.multiply(
78 |             cv2.getGaussianKernel(gaussian_radius*2+1, sigma),
79 |             cv2.getGaussianKernel(gaussian_radius*2+1, sigma).T
80 |         )
81 |         x_left, x_right, y_up, y_down = 0, gaussian_map.shape[1], 0, gaussian_map.shape[0]
82 |         # cut the gaussian kernel
83 |         if p[1] < 0 or p[0] < 0:
84 |             continue
85 |         if p[1] < gaussian_radius:
86 |             x_left = gaussian_radius - p[1]
87 |         if p[0] < gaussian_radius:
88 |             y_up = gaussian_radius - p[0]
89 |         if p[1] + gaussian_radius >= w:
90 |             x_right = gaussian_map.shape[1] - (gaussian_radius + p[1] - w) - 1
91 |         if p[0] + gaussian_radius >= h:
92 |             y_down = gaussian_map.shape[0] - (gaussian_radius + p[0] - h) - 1
93 |         density_map[
94 |             max(0, p[0]-gaussian_radius):min(density_map.shape[0] // 8, p[0]+gaussian_radius+1),
95 |             max(0, p[1]-gaussian_radius):min(density_map.shape[1] // 8, p[1]+gaussian_radius+1)
96 |         ] += gaussian_map[y_up:y_down, x_left:x_right]
97 |     return density_map


--------------------------------------------------------------------------------
/op_wrapper/pad_conv2d_wrapper.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | from torch.nn import Module
 5 | import pad_conv2d_gpu as pad_conv2d
 6 | from op_wrapper.adaptive_sigmoid_wrapper import AdaptiveSigmoid
 7 | 
 8 | 
 9 | class PerspectiveDilatedConv2dFunction(Function):
10 |     @staticmethod
11 |     def forward(ctx, *args):
12 |         if len(args) != 6:
13 |             print("wrong input parameters number, check the input")
14 |             return
15 |         input = args[0]
16 |         weights = args[1]
17 |         rate_map = args[2]
18 |         bias = args[3]
19 |         ctx.stride_h = args[4]
20 |         ctx.stride_w = args[5]
21 |         output = pad_conv2d.forward(input, weights, rate_map, bias, ctx.stride_h, ctx.stride_w)
22 |         ctx.save_for_backward(input, weights, rate_map, bias)
23 |         return output
24 | 
25 |     @staticmethod
26 |     def backward(ctx, *grad_outputs):
27 |         if len(grad_outputs) != 1:
28 |             print("Wrong output number, check your output")
29 |             return
30 |         input, weights, rate_map, bias = ctx.saved_tensors
31 |         grad_copy = grad_outputs[0].clone()
32 |         grad_input, grad_weight, grad_rate_map, grad_bias = pad_conv2d.backward(input, weights, rate_map, bias, grad_copy, ctx.stride_h, ctx.stride_w)
33 |         return grad_input, grad_weight, grad_rate_map, grad_bias, None, None
34 | 
35 | 
36 | class PerspectiveDilatedConv2dLayer(Module):
37 |     def __init__(self, in_channels, out_channels, kernel_size, stride_h, stride_w):
38 |         super(PerspectiveDilatedConv2dLayer, self).__init__()
39 |         self.stride_h = stride_h
40 |         self.stride_w = stride_w
41 |         self.weight = nn.Parameter(torch.zeros(out_channels, in_channels, kernel_size, kernel_size, dtype=torch.float32))
42 |         self.bias = nn.Parameter(torch.zeros(out_channels, dtype=torch.float32))
43 |         nn.init.xavier_uniform_(self.weight, gain=1)
44 | 
45 |     def forward(self, inputs, rate_map):
46 |         return PerspectiveDilatedConv2dFunction.apply(inputs, self.weight, rate_map, self.bias, self.stride_h, self.stride_w)
47 | 
48 |     
49 | class BasicPerspectiveDilatedConv2D(Module):
50 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, **kwargs):
51 |         super(BasicPerspectiveDilatedConv2D, self).__init__()
52 |         self.rate_map_generator = AdaptiveSigmoid(**kwargs)
53 | #         self.rate_map_generator.params.register_hook(lambda x:print('Conv', x))
54 |         self.stride = 1
55 |         self.pad = (kernel_size // 2)
56 |         self.perspective_dilated_conv2d = PerspectiveDilatedConv2dLayer(in_channels, out_channels, kernel_size, self.stride, self.stride)
57 |         
58 |     def forward(self, x, perspective):
59 |         rate_map = self.rate_map_generator(perspective)
60 | #         rate_map = self.rate_map_generator(x)
61 |         x = torch.nn.functional.pad(x, [self.pad, self.pad, self.pad, self.pad ])
62 |         return self.perspective_dilated_conv2d(x, rate_map)
63 | 
64 | class BasicPerspectiveDilatedConv2D_BN(Module):
65 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, **kwargs):
66 |         super(BasicPerspectiveDilatedConv2D_BN, self).__init__()
67 |         self.rate_map_generator = AdaptiveSigmoid(**kwargs)
68 | #         self.rate_map_generator.params.register_hook(lambda x:print('Conv', x))
69 |         self.stride = 1
70 |         self.pad = (kernel_size // 2)
71 |         self.perspective_dilated_conv2d = PerspectiveDilatedConv2dLayer(in_channels, out_channels, kernel_size, self.stride, self.stride)
72 |         self.bn = nn.BatchNorm2d(out_channels)
73 |         
74 |     def forward(self, x, perspective):
75 |         rate_map = self.rate_map_generator(perspective)
76 | #         rate_map = self.rate_map_generator(x)
77 |         x = torch.nn.functional.pad(x, [self.pad, self.pad, self.pad, self.pad ])
78 |         x = self.perspective_dilated_conv2d(x, rate_map)
79 |         x = self.bn(x)
80 |         return x
81 | 
82 | 


--------------------------------------------------------------------------------
/eval/Estimator.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import math
 3 | import os
 4 | import numpy as np
 5 | import sys
 6 | from PIL import Image
 7 | from utils import show
 8 | from metrics import AEBatch, SEBatch
 9 | import time
10 | import torch
11 | import scipy.io as scio
12 | 
13 | class Estimator(object):
14 |     def __init__(self, setting, eval_loader, criterion=torch.nn.MSELoss(reduction="sum")):
15 |         self.setting = setting
16 |         self.ae_batch = AEBatch().to(self.setting.device)
17 |         self.se_batch = SEBatch().to(self.setting.device)
18 |         self.criterion = criterion
19 |         self.eval_loader = eval_loader
20 |         
21 |     def evaluate(self, model):
22 |         net = model.eval()
23 |         MAE_, MSE_, loss_ = [], [], []
24 |         time_cost = 0
25 |         for eval_img_path, eval_img, eval_gt, eval_pers in self.eval_loader:
26 |             eval_img_path = eval_img_path[0]
27 |             eval_img = eval_img.to(self.setting.device)
28 |             eval_gt = eval_gt.to(self.setting.device)
29 | 
30 |             start = time.time()
31 |             with torch.no_grad():
32 |                 # test cropped patches
33 |                 if self.setting.mode == 'crop': 
34 |                     eval_patchs, eval_pers = torch.squeeze(eval_img), torch.squeeze(eval_pers, dim=0)
35 |                     eval_prediction = net(eval_patchs, eval_pers)
36 |                     prediction_map = torch.zeros(eval_gt.shape).to(self.setting.device)
37 |                     self.test_crops(eval_prediction.shape, eval_prediction, prediction_map)
38 |                 # test whole images
39 |                 elif self.setting.mode == 'whole': 
40 |                     prediction_map = net(eval_img, eval_pers)
41 |                 gt_counts = self.get_gt_num(self.setting.eval_gt_path, eval_img_path)
42 |                 # calculate metrics
43 |                 batch_ae = self.ae_batch(prediction_map, gt_counts).data.cpu().numpy()
44 |                 batch_se = self.se_batch(prediction_map, gt_counts).data.cpu().numpy()
45 |                 loss = self.criterion(prediction_map, eval_gt)
46 |                 loss_.append(loss.data.item())
47 |                 MAE_.append(batch_ae)
48 |                 MSE_.append(batch_se)
49 |             torch.cuda.synchronize()
50 |             end = time.time()
51 |             time_cost += (end - start)
52 | 
53 |         # return the validate loss, validate MAE and validate RMSE
54 |         MAE_, MSE_, loss_ = np.reshape(MAE_, [-1]), np.reshape(MSE_, [-1]), np.reshape(loss_, [-1])
55 |         return np.mean(MAE_), np.sqrt(np.mean(MSE_)), np.mean(loss_), time_cost
56 | 
57 |     def get_gt_num(self, eval_gt_path, img_path):
58 |         tmp_mat_name = os.path.basename(img_path).replace('IMG_', 'GT_IMG_').replace('.jpg', '.mat')
59 |         gt_path = os.path.join(eval_gt_path, os.path.basename(tmp_mat_name))
60 |         gt_counts = len(scio.loadmat(gt_path)['image_info'][0][0][0][0][0])
61 |         return gt_counts
62 |     
63 |     def test_crops(self, eval_shape, eval_p, pred_m):
64 |         for i in range(3):
65 |             for j in range(3):
66 |                 start_h, start_w = math.floor(eval_shape[2] / 4), math.floor(eval_shape[3] / 4)
67 |                 valid_h, valid_w = eval_shape[2] // 2, eval_shape[3] // 2
68 |                 pred_h = math.floor(3 * eval_shape[2] / 4) + (eval_shape[2] // 2) * (i - 1)
69 |                 pred_w = math.floor(3 * eval_shape[3] / 4) + (eval_shape[3] // 2) * (j - 1)
70 |                 if i == 0:
71 |                     valid_h = math.floor(3 * eval_shape[2] / 4)
72 |                     start_h = 0
73 |                     pred_h = 0
74 |                 elif i == 2:
75 |                     valid_h = math.ceil(3 * eval_shape[2] / 4)
76 | 
77 |                 if j == 0:
78 |                     valid_w = math.floor(3 * eval_shape[3] / 4)
79 |                     start_w = 0
80 |                     pred_w = 0
81 |                 elif j == 2:
82 |                     valid_w = math.ceil(3 * eval_shape[3] / 4)
83 |                 pred_m[:, :, pred_h:pred_h + valid_h, pred_w:pred_w + valid_w] += eval_p[i * 3 + j:i * 3 + j + 1, :,start_h:start_h + valid_h, start_w:start_w + valid_w]
84 | 


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/adaptive_sigmoid/adaptive_sigmoid_cuda.cu:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | #include <ATen/cuda/CUDAContext.h>
  3 | #include <THC/THCAtomics.cuh>
  4 | #include <THC/THCDeviceUtils.cuh>
  5 | #include "adaptive_sigmoid.h"
  6 | 
  7 | #define CUDA_KERNEL_LOOP(i ,n) \
  8 |     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i<(n); i+= blockDim.x * gridDim.x)
  9 | 
 10 | const int CUDA_NUM_THREADS = 1024;
 11 | 
 12 | inline int GET_BLOCKS(const int N){
 13 |   return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
 14 | }
 15 | 
 16 | __global__ void adaptive_sigmoid_fucntion_kernel(
 17 |     int n,
 18 |     const float* data_in,
 19 |     const float* params,
 20 |     float* output
 21 | ){
 22 |     CUDA_KERNEL_LOOP(index, n){
 23 |         float alpha = params[0];
 24 |         float beta = params[1];
 25 |         float gamma = params[2];
 26 |         float theta = params[3];
 27 |         float value = data_in[index];
 28 |  //       output[index] = gamma * (1 / (1 + exp(-alpha * (value - beta)))) + theta;
 29 |         output[index] = gamma * (1 / (1 + exp(-alpha * (value - beta))) - theta);
 30 |     }
 31 | }
 32 | 
 33 | __global__ void adaptive_sigmoid_input_grad_kernel(
 34 |     int n,
 35 |     const float* data_in,
 36 |     const float* grad_output,
 37 |     const float* params,
 38 |     float* grad_input
 39 | ){
 40 |     CUDA_KERNEL_LOOP(index, n){
 41 |         float alpha = params[0];
 42 |         float beta = params[1];
 43 |         float gamma = params[2];
 44 |         float value = data_in[index];
 45 |         float d_grad_output = grad_output[index];
 46 |         float efx = exp(- alpha * (value - beta));
 47 |         float patial = efx / ((1 + efx) * (1 + efx));
 48 |         grad_input[index] = gamma * alpha * patial * d_grad_output;
 49 |     }
 50 | }
 51 | 
 52 | __global__ void adaptive_sigmoid_params_grad_kernel(
 53 |     int n,
 54 |     const float* data_in,
 55 |     const float* grad_output,
 56 |     const float* params,
 57 |     float* grad_params,
 58 |     bool alpha_update, 
 59 |     bool beta_update,
 60 |     bool gamma_update,
 61 |     bool theta_update
 62 | ){
 63 |     CUDA_KERNEL_LOOP(index, n){
 64 |         float alpha = params[0];
 65 |         float beta = params[1];
 66 |         float gamma = params[2];
 67 |         float value = data_in[index];
 68 |         float d_grad_output = grad_output[index];
 69 |         float efx = exp(- alpha * (value - beta));
 70 |         float patial = efx / ((1 + efx) * (1 + efx));
 71 |         
 72 |         float d_alpha = gamma * patial * (value - beta);
 73 |         float d_beta = gamma * patial * (- alpha);
 74 |         float d_gamma = 1 / (1 + efx);
 75 |         float d_theta = -gamma;
 76 |         // float d_beta = 0;
 77 |         // float d_gamma = 0;
 78 |         // float d_theta = 0;
 79 |         if (alpha_update)
 80 |             atomicAdd(grad_params + 0, d_alpha * d_grad_output);
 81 |         if (beta_update)
 82 |             atomicAdd(grad_params + 1, d_beta * d_grad_output);
 83 |         if (gamma_update)
 84 |             atomicAdd(grad_params + 2, d_gamma * d_grad_output);
 85 |         if (theta_update)
 86 |             atomicAdd(grad_params + 3, d_theta * d_grad_output);
 87 |     }
 88 | }
 89 | 
 90 | void adaptive_sigmoid_fucntion(
 91 |     cudaStream_t stream,
 92 |     const float* data_in,
 93 |     const float* params,
 94 |     float* output,
 95 |     int channels, int height, int width
 96 | ){
 97 |     int num_kernels = channels * height * width;
 98 |     adaptive_sigmoid_fucntion_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>>(
 99 |         num_kernels,
100 |         data_in,
101 |         params,
102 |         output
103 |     );
104 | }
105 | 
106 | void adaptive_sigmoid_input_grad(
107 |     cudaStream_t stream,
108 |     const float* data_in,
109 |     const float* grad_outputs,
110 |     const float* params,
111 |     float* grad_input,
112 |     int channels, int height, int width
113 | ){
114 |     int num_kernels = channels * height * width;
115 |     adaptive_sigmoid_input_grad_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>>(
116 |         num_kernels,
117 |         data_in,
118 |         grad_outputs,
119 |         params,
120 |         grad_input
121 |     );
122 | }
123 | 
124 | void adaptive_sigmoid_params_grad(
125 |     cudaStream_t stream,
126 |     const float* data_in,
127 |     const float* grad_outputs,
128 |     const float* params,
129 |     float* grad_params,
130 |     int channels, int height, int width,
131 |     bool alpha_update, 
132 |     bool beta_update,
133 |     bool gamma_update,
134 |     bool theta_update
135 | ){
136 |     int num_kernels = channels * height * width;
137 |     adaptive_sigmoid_params_grad_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>>(
138 |         num_kernels,
139 |         data_in,
140 |         grad_outputs,
141 |         params,
142 |         grad_params,
143 |         alpha_update, 
144 |         beta_update,
145 |         gamma_update,
146 |         theta_update
147 |     );
148 | }


--------------------------------------------------------------------------------
/net/CSRPersNet_onlyBack_crop.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torchvision import models
  4 | from net.BasicConv2d import BasicConv2d
  5 | from op_wrapper.pad_conv2d_wrapper import BasicPerspectiveDilatedConv2D_BN
  6 | from net.BasicConv2d import BasicConv2d
  7 | from op_wrapper.adaptive_sigmoid_wrapper import AdaptiveSigmoid
  8 | from op_wrapper.pad_conv2d_wrapper import PerspectiveDilatedConv2dLayer
  9 | from collections import OrderedDict
 10 | import torch.nn.functional as F
 11 | 
 12 | pretrain_dict = nn.ModuleList(list(list(models.vgg16(True).children())[0].children())[0:33]).state_dict()
 13 | 
 14 | class Frontend(nn.Module):
 15 |     def __init__(self, pretrain=True, **kwargs):
 16 |         super(Frontend, self).__init__()
 17 |         self.front_end = nn.Sequential(*(list(list(models.vgg16_bn(True).children())[0].children())[0:33]))
 18 |     
 19 |     def forward(self, x, perspective_map):
 20 |         x = self.front_end(x)
 21 |         perspective_map = F.interpolate(x, (x.shape[2], x.shape[3]))
 22 |         return x, perspective_map
 23 | 
 24 | class Backend(nn.Module):
 25 |     def __init__(self, in_channels, **kwargs):
 26 |         super(Backend, self).__init__()
 27 |         self.pad_conv2d_1 = BasicPerspectiveDilatedConv2D_BN(in_channels, 512, 3, 1,  **kwargs)
 28 |         self.pad_relu_1 = nn.ReLU(inplace=True)
 29 |         self.pad_conv2d_2 = BasicPerspectiveDilatedConv2D_BN(512, 512, 3, 1, **kwargs)
 30 |         self.pad_relu_2 = nn.ReLU(inplace=True)
 31 |         self.pad_conv2d_3 = BasicPerspectiveDilatedConv2D_BN(512, 512, 3, 1, **kwargs)
 32 |         self.pad_relu_3 = nn.ReLU(inplace=True)
 33 |         self.pad_conv2d_4 = BasicPerspectiveDilatedConv2D_BN(512, 256, 3, 1, **kwargs)
 34 |         self.pad_relu_4 = nn.ReLU(inplace=True)
 35 |         self.pad_conv2d_5 = BasicPerspectiveDilatedConv2D_BN(256, 128, 3, 1, **kwargs)
 36 |         self.pad_relu_5 = nn.ReLU(inplace=True)
 37 |         self.pad_conv2d_6 = BasicPerspectiveDilatedConv2D_BN(128, 64, 3, 1, **kwargs)
 38 |         self.pad_relu_6 = nn.ReLU(inplace=True)
 39 |     
 40 |     def forward(self, x, perspective_map):
 41 |         x = self.pad_conv2d_1(x, perspective_map)
 42 |         x = self.pad_relu_1(x)
 43 |         x = self.pad_conv2d_2(x, perspective_map)
 44 |         x = self.pad_relu_2(x)
 45 |         x = self.pad_conv2d_3(x, perspective_map)
 46 |         x = self.pad_relu_3(x)
 47 |         x = self.pad_conv2d_4(x, perspective_map)
 48 |         x = self.pad_relu_4(x)
 49 |         x = self.pad_conv2d_5(x, perspective_map)
 50 |         x = self.pad_relu_5(x)
 51 |         x = self.pad_conv2d_6(x, perspective_map)
 52 |         x = self.pad_relu_6(x)
 53 |         return x
 54 | 
 55 | class CSRPersNet_onlyBack_BN(nn.Module):
 56 |     def __init__(self, load_path=None, is_relu=False, **kwargs):
 57 |         super(CSRPersNet_onlyBack_BN, self).__init__()
 58 |         self.is_relu = is_relu
 59 |         self.front_end = Frontend(True, **kwargs)
 60 |         self.back_end = Backend(512, **kwargs)
 61 |         self.output_layer = nn.Conv2d(64, 1, kernel_size=1)
 62 |         if not(load_path == None):
 63 |             new_state_dict = OrderedDict()
 64 |             state_dict = torch.load(load_path)
 65 |             count = 1
 66 |             for k,v in state_dict.items():
 67 |                 if 'back_end' in k:
 68 |                     name_prefix = "back_end.pad_conv2d_" + str(count)
 69 |                     if 'weight' in k:
 70 |                         new_state_dict[name_prefix + '.rate_map_generator.params'] = torch.FloatTensor(*kwargs)
 71 |                         new_state_dict[name_prefix + '.perspective_dilated_conv2d.weight'] = v
 72 |                     elif 'bias' in k:
 73 |                         new_state_dict[name_prefix + '.perspective_dilated_conv2d.bias'] = v
 74 |                         count += 1
 75 |                 else:
 76 |                     new_state_dict[k] = v
 77 |             self.load_state_dict(new_state_dict)
 78 |             
 79 |         else:
 80 |             for m in self.output_layer.modules():
 81 |                 if isinstance(m, nn.Conv2d):
 82 |                     nn.init.normal_(m.weight, std=0.01)
 83 |                     if m.bias is not None:
 84 |                         nn.init.constant_(m.bias, 0)
 85 |                 elif isinstance(m, nn.BatchNorm2d):
 86 |                     nn.init.constant_(m.weight, 1)
 87 |                     nn.init.constant_(m.bias, 0)
 88 |                 
 89 |     def forward(self, x, perspective_map):
 90 |         x, perspective_map = self.front_end(x, perspective_map)
 91 |         x = self.back_end(x, perspective_map)
 92 |         x = self.output_layer(x)
 93 | 
 94 |         x = F.interpolate(x, (x.shape[2]*4, x.shape[3]*4), mode='bilinear', align_corners=False)
 95 |         
 96 |         if self.is_relu:
 97 |             x = F.relu(x)
 98 |         return x
 99 | 
100 |     def get_params(self):
101 |         self.ada_sig_params = []
102 |         self.conv_params = []
103 |         self.bn_params = []
104 |         for m in self.modules():
105 |             if isinstance(m, AdaptiveSigmoid):
106 |                 self.ada_sig_params.append(m.params)
107 |             elif isinstance(m, nn.Conv2d):
108 |                 self.conv_params.append(m.weight)
109 |                 self.conv_params.append(m.bias)
110 |             elif isinstance(m, PerspectiveDilatedConv2dLayer):
111 |                 self.conv_params.append(m.weight)
112 |                 self.conv_params.append(m.bias)
113 |             elif isinstance(m, nn.BatchNorm2d):
114 |                 self.bn_params.append(m.weight)
115 |                 self.bn_params.append(m.bias)
116 |         return self.conv_params, self.bn_params, self.ada_sig_params
117 | 


--------------------------------------------------------------------------------
/Dataset/DatasetConstructor.py:
--------------------------------------------------------------------------------
  1 | from PIL import Image
  2 | import numpy as np
  3 | import os
  4 | import glob
  5 | import torch
  6 | import torchvision.transforms as transforms
  7 | import torchvision.transforms.functional as F
  8 | import torch.nn.functional as functional
  9 | import torch.utils.data as data
 10 | import random
 11 | import time
 12 | import scipy.io as scio
 13 | import h5py
 14 | import math
 15 | 
 16 | class DatasetConstructor(data.Dataset):
 17 |     def __init__(self):
 18 |         return
 19 |     
 20 |     def get_path_tuple(self, i, dataset_name = "SHA", is_pers=True):
 21 |         if dataset_name == "SHA" or dataset_name == "SHB":
 22 |             img_name = '/IMG_' + str(i + 1) + ".jpg"
 23 |             gt_map_name = '/GT_IMG_' + str(i + 1) + ".npy"
 24 |             perspective_map_name = ""
 25 |             if is_pers:
 26 |                 perspective_map_name = '/IMG_' + str(i + 1) + ".mat"
 27 |         else:
 28 |             raise NameError("Only SHA is released")
 29 |         return img_name, gt_map_name, perspective_map_name
 30 |     
 31 |     def resize(self, img, dataset_name):
 32 |         height = img.size[1]
 33 |         width = img.size[0]
 34 |         resize_height = height
 35 |         resize_width = width
 36 |         if dataset_name == "SHA":
 37 |             if resize_height <= 416:
 38 |                 tmp = resize_height
 39 |                 resize_height = 416
 40 |                 resize_width = (resize_height / tmp) * resize_width
 41 |             if resize_width <= 416:
 42 |                 tmp = resize_width
 43 |                 resize_width = 416
 44 |                 resize_height = (resize_width / tmp) * resize_height
 45 |             resize_height = math.ceil(resize_height / 32) * 32
 46 |             resize_width = math.ceil(resize_width / 32) * 32
 47 |         else:
 48 |             raise NameError("Only SHA is released")
 49 |         img = transforms.Resize([resize_height, resize_width])(img)
 50 |         return img
 51 | 
 52 | 
 53 | class EvalDatasetConstructor(DatasetConstructor):
 54 |     def __init__(self,
 55 |                  validate_num,
 56 |                  data_dir_path,
 57 |                  gt_dir_path,
 58 |                  pers_dir_path=None,
 59 |                  mode="crop",
 60 |                  dataset_name="SHA",
 61 |                  device=None,
 62 |                  ):
 63 |         super(EvalDatasetConstructor, self).__init__()
 64 |         self.validate_num = validate_num
 65 |         self.imgs = []
 66 |         self.data_root = data_dir_path
 67 |         self.gt_root = gt_dir_path
 68 |         self.pers_root = pers_dir_path
 69 |         self.mode = mode
 70 |         self.device = device
 71 |         self.dataset_name = dataset_name
 72 |         self.kernel = torch.ones(1, 1, 8, 8, dtype=torch.float32)
 73 |         self.kernel_crop = torch.ones(1, 1, 2, 2, dtype=torch.float32)
 74 |         self.img_paths = glob.glob(os.path.join(self.data_root, "*.jpg"))
 75 | 
 76 |     def __getitem__(self, index):
 77 |         if self.mode == 'crop':
 78 |             img_path = self.img_paths[index]
 79 |             gt_map_path = os.path.join(self.gt_root, os.path.basename(img_path.replace('IMG_', "GT_IMG_"))[:-4]+".npy")
 80 |             pers_path = os.path.join(self.pers_root, os.path.basename(img_path.replace('jpg', "mat")))
 81 |             img = Image.open(img_path).convert("RGB")
 82 |             p_m = np.zeros(img.size[::-1], dtype=float) if self.pers_root == "" else (h5py.File(pers_path, 'r')['pmap'][:] / 100).T
 83 |             p_m = super(EvalDatasetConstructor, self).resize(Image.fromarray(p_m), self.dataset_name)
 84 |             img = super(EvalDatasetConstructor, self).resize(img, self.dataset_name)
 85 |             img = transforms.ToTensor()(img)
 86 |             gt_map = Image.fromarray(np.squeeze(np.load(gt_map_path)))
 87 |             gt_map = transforms.ToTensor()(gt_map)
 88 |             p_m = transforms.ToTensor()(p_m)
 89 |             img_shape, gt_shape = img.shape, gt_map.shape  # C, H, W
 90 |             img = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(img)
 91 |             patch_height, patch_width = (img_shape[1]) // 2, (img_shape[2]) // 2
 92 |             imgs, pers = [], []
 93 |             for i in range(3):
 94 |                 for j in range(3):
 95 |                     start_h, start_w = (patch_height // 2) * i, (patch_width // 2) * j
 96 |                     imgs.append(img[:, start_h:start_h + patch_height, start_w:start_w + patch_width])
 97 |                     pers.append(p_m[:, start_h:start_h + patch_height, start_w:start_w + patch_width])
 98 |             imgs, pers = torch.stack(imgs), torch.stack(pers)
 99 |             gt_map = functional.conv2d(gt_map.view(1, *(gt_shape)), self.kernel_crop, bias=None, stride=2, padding=0)
100 |             return img_path, imgs, gt_map.view(1, gt_shape[1] // 2, gt_shape[2] // 2), pers
101 |         
102 |         elif self.mode == 'whole':
103 |             img_path, gt_map_path, pers_path, img_index = self.imgs[index]
104 |             img = Image.open(img_path).convert("RGB")
105 |             p_m = np.zeros(img.size[::-1], dtype=float) if self.pers_root == "" else (h5py.File(pers_path)['pmap'][:] / 100).T
106 |             p_m = super(EvalDatasetConstructor, self).resize(Image.fromarray(p_m), self.dataset_name)
107 |             img = super(EvalDatasetConstructor, self).resize(img, self.dataset_name)
108 |             img = transforms.ToTensor()(img)
109 |             gt_map = Image.fromarray(np.squeeze(np.load(gt_map_path)))
110 |             gt_map = transforms.ToTensor()(gt_map)
111 |             p_m = transforms.ToTensor()(p_m)
112 |             img_shape, gt_shape = img.shape, gt_map.shape  # C, H, W
113 |             img = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(img)
114 |             gt_map = functional.conv2d(gt_map.view(1, *(gt_shape)), self.kernel, bias=None, stride=8, padding=0)
115 |             return img_path, img, gt_map.view(1, gt_shape[1] // 8, gt_shape[2] // 8), p_m
116 | 
117 |     def __len__(self):
118 |         return self.validate_num
119 | 


--------------------------------------------------------------------------------
/generate_map.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import scipy
  4 | import scipy.io as scio
  5 | from PIL import Image
  6 | import time
  7 | import math
  8 | import os
  9 | import h5py
 10 | 
 11 | def get_density_map_gaussian(H, W, ratio_h, ratio_w,  points, adaptive_kernel=False, fixed_value=15):
 12 |     h = H
 13 |     w = W
 14 |     density_map = np.zeros([h, w], dtype=np.float32)
 15 |     num_gt = np.squeeze(points).shape[0]
 16 |     if num_gt == 0:
 17 |         return density_map
 18 | 
 19 |     for idx, p in enumerate(points):
 20 |         p = np.round(p).astype(int)
 21 |         p[0], p[1] = min(h-1, math.floor(p[1] * ratio_h)), min(w-1, math.floor(p[0] * ratio_w))
 22 |         sigma = fixed_value
 23 |         sigma = max(1, sigma)
 24 | 
 25 |         gaussian_radius = 7
 26 |         gaussian_map = np.multiply(
 27 |             cv2.getGaussianKernel(gaussian_radius*2+1, sigma),
 28 |             cv2.getGaussianKernel(gaussian_radius*2+1, sigma).T
 29 |         )
 30 |         x_left, x_right, y_up, y_down = 0, gaussian_map.shape[1], 0, gaussian_map.shape[0]
 31 |         # cut the gaussian kernel
 32 |         if p[1] < 0 or p[0] < 0:
 33 |             continue
 34 |         if p[1] < gaussian_radius:
 35 |             x_left = gaussian_radius - p[1]
 36 |         if p[0] < gaussian_radius:
 37 |             y_up = gaussian_radius - p[0]
 38 |         if p[1] + gaussian_radius >= w:
 39 |             x_right = gaussian_map.shape[1] - (gaussian_radius + p[1] - w) - 1
 40 |         if p[0] + gaussian_radius >= h:
 41 |             y_down = gaussian_map.shape[0] - (gaussian_radius + p[0] - h) - 1
 42 |         density_map[
 43 |             max(0, p[0]-gaussian_radius):min(h, p[0]+gaussian_radius+1),
 44 |             max(0, p[1]-gaussian_radius):min(w, p[1]+gaussian_radius+1)
 45 |         ] += gaussian_map[y_up:y_down, x_left:x_right]
 46 |     return density_map
 47 | 
 48 | def mkdir(path):
 49 |     """create a single empty directory if it didn't exist
 50 |     Parameters:
 51 |         path (str) -- a single directory path
 52 |     """
 53 |     if not os.path.exists(path):
 54 |         os.makedirs(path)
 55 | 
 56 | def mkdirs(paths):
 57 |     """create empty directories if they don't exist
 58 |     Parameters:
 59 |         paths (str list) -- a list of directory paths
 60 |     """
 61 |     if isinstance(paths, list) and not isinstance(paths, str):
 62 |         for path in paths:
 63 |             mkdir(path)
 64 |     else:
 65 |         mkdir(paths)
 66 | 
 67 | 
 68 | # SHA: 300, 182
 69 | # SHB: 400, 316
 70 | if __name__ == "__main__":
 71 | 
 72 |     is_train = 0 # 0 for test
 73 |     train_test = 'train' if is_train else 'test'
 74 |     dataset = 'SHA'
 75 | 
 76 |     if dataset == 'SHA':
 77 |         num_img = 300 if is_train else 182
 78 |         image_dir_path = "ShanghaiTech/part_A_final/"+train_test+"_data/images"
 79 |         ground_truth_dir_path = "ShanghaiTech/part_A_final/"+train_test+"_data/ground_truth"
 80 |         output_gt_dir = "./SH_part_A/"+train_test
 81 |     elif dataset == 'SHB':
 82 |         num_img = 400 if is_train else 316
 83 |         image_dir_path = "ShanghaiTech/part_B_final/"+train_test+"_data/images"
 84 |         ground_truth_dir_path = "ShanghaiTech/part_B_final/"+train_test+"_data/ground_truth"
 85 |         output_gt_dir = "./SH_part_B/" + train_test
 86 |     elif dataset == 'QNRF':
 87 |         num_img = 1201 if is_train else 334
 88 |         image_dir_path = "UCF-QNRF_ECCV18/" + train_test
 89 |         ground_truth_dir_path = "UCF-QNRF_ECCV18/" + train_test
 90 |         output_gt_dir = "./QNRF/" + train_test
 91 |     elif dataset == 'UCF50': # take all images as testing images
 92 |         num_img = 50
 93 |         image_dir_path = "UCF_CC_50/images/UCF_CC_50_img"
 94 |         ground_truth_dir_path = "UCF_CC_50/UCF_CC_50_mat"
 95 |         output_gt_dir = "./UCF50/" + train_test
 96 | 
 97 |     mkdirs(output_gt_dir)
 98 | 
 99 |     for i in range(num_img):
100 |         if dataset == 'SHA' or dataset == 'SHB':
101 |             img_path = image_dir_path + "/IMG_" + str(i + 1) + ".jpg"
102 |             gt_path = ground_truth_dir_path + "/GT_IMG_" + str(i + 1) + ".mat"
103 |         elif dataset == 'QNRF':
104 |             img_path = os.path.join(image_dir_path, "img_"+("%04d" % (i+1))+".jpg")
105 |             gt_path = os.path.join(image_dir_path, "img_"+("%04d" % (i+1))+"_ann.mat")
106 |         elif dataset == 'UCF50':
107 |             img_path = os.path.join(image_dir_path, ("%d" % (i+1))+".jpg")
108 |             gt_path = os.path.join(ground_truth_dir_path, ("%d" % (i+1))+"_ann.mat")
109 | 
110 |         img = Image.open(img_path)
111 |         height = img.size[1]
112 |         width = img.size[0]
113 | 
114 |         if dataset == 'SHA' or dataset == 'SHB':
115 |             points = scio.loadmat(gt_path)['image_info'][0][0][0][0][0]
116 |         elif dataset == 'QNRF':
117 |             points = scio.loadmat(gt_path)['annPoints']
118 |         elif dataset == 'UCF50':
119 |             points = h5py.File(gt_path, 'r')['annPoints'].value.astype(np.float32)
120 | 
121 | 
122 |         resize_height = height
123 |         resize_width = width
124 | 
125 |         if dataset == 'SHA' or dataset == 'UCF50':
126 |             if resize_height <= 416:
127 |                 tmp = resize_height
128 |                 resize_height = 416
129 |                 resize_width = (resize_height / tmp) * resize_width
130 | 
131 |             if resize_width <= 416:
132 |                 tmp = resize_width
133 |                 resize_width = 416
134 |                 resize_height = (resize_width / tmp) * resize_height
135 | 
136 |             resize_height = math.ceil(resize_height / 32) * 32
137 |             resize_width = math.ceil(resize_width / 32) * 32
138 |         elif dataset == 'QNRF':
139 |             pass
140 | 
141 | 
142 |         ratio_h = (resize_height) / (height)
143 |         ratio_w = (resize_width) / (width)
144 |         # print(height, width, ratio_h, ratio_w)
145 |         gt = get_density_map_gaussian(resize_height, resize_width, ratio_h, ratio_w, points, False, 4)
146 |         gt = np.reshape(gt, [resize_height, resize_width])  # transpose into w, h
147 |         np.save(output_gt_dir + "/GT_IMG_" + str(i + 1), gt)
148 |     print("complete!")
149 | 


--------------------------------------------------------------------------------
/net/CSRPersNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torchvision import models
  4 | from op_wrapper.pad_conv2d_wrapper import BasicPerspectiveDilatedConv2D
  5 | from op_wrapper.adaptive_sigmoid_wrapper import AdaptiveSigmoid
  6 | from op_wrapper.pad_conv2d_wrapper import PerspectiveDilatedConv2dLayer
  7 | from collections import OrderedDict
  8 | pretrain_dict = nn.ModuleList(list(list(models.vgg16(True).children())[0].children())[0:23]).state_dict()
  9 | 
 10 | class Frontend(nn.Module):
 11 |     def __init__(self, pretrain=True, **kwargs):
 12 |         super(Frontend, self).__init__()
 13 |         self.pad_conv2d_1 = BasicPerspectiveDilatedConv2D(3, 64, 3, 1, **kwargs)
 14 |         self.pad_relu_1 = nn.ReLU(inplace=True)
 15 |         self.pad_conv2d_2 = BasicPerspectiveDilatedConv2D(64, 64, 3, 1, **kwargs)
 16 |         self.pad_relu_2 = nn.ReLU(inplace=True)
 17 |         self.max_pool_1 = nn.MaxPool2d(kernel_size=2)
 18 |         
 19 |         self.pad_conv2d_3 = BasicPerspectiveDilatedConv2D(64, 128, 3, 1, **kwargs)
 20 |         self.pad_relu_3 = nn.ReLU(inplace=True)
 21 |         self.pad_conv2d_4 = BasicPerspectiveDilatedConv2D(128, 128, 3, 1, **kwargs)
 22 |         self.pad_relu_4 = nn.ReLU(inplace=True)
 23 |         self.max_pool_2 = nn.MaxPool2d(kernel_size=2)
 24 |         
 25 |         self.pad_conv2d_5 = BasicPerspectiveDilatedConv2D(128, 256, 3, 1, **kwargs)
 26 |         self.pad_relu_5 = nn.ReLU(inplace=True)
 27 |         self.pad_conv2d_6 = BasicPerspectiveDilatedConv2D(256, 256, 3, 1, **kwargs)
 28 |         self.pad_relu_6 = nn.ReLU(inplace=True)
 29 |         self.pad_conv2d_7 = BasicPerspectiveDilatedConv2D(256, 256, 3, 1, **kwargs)
 30 |         self.pad_relu_7 = nn.ReLU(inplace=True)
 31 |         self.max_pool_3 = nn.MaxPool2d(kernel_size=2)
 32 |         
 33 |         self.pad_conv2d_8 = BasicPerspectiveDilatedConv2D(256, 512, 3, 1, **kwargs)
 34 |         self.pad_relu_8 = nn.ReLU(inplace=True)
 35 |         self.pad_conv2d_9 = BasicPerspectiveDilatedConv2D(512, 512, 3, 1, **kwargs)
 36 |         self.pad_relu_9 = nn.ReLU(inplace=True)
 37 |         self.pad_conv2d_10 = BasicPerspectiveDilatedConv2D(512, 512, 3, 1, **kwargs)
 38 |         self.pad_relu_10 = nn.ReLU(inplace=True)
 39 |         if pretrain == True:
 40 |             new_state_dict = OrderedDict()
 41 |             count = 1
 42 |             for k,v in pretrain_dict.items():
 43 |                 name_prefix = "pad_conv2d_" + str(count)
 44 |                 if 'weight' in k:
 45 |                     new_state_dict[name_prefix + '.rate_map_generator.params'] = torch.FloatTensor(kwargs['sigma'])
 46 |                     new_state_dict[name_prefix + '.perspective_dilated_conv2d.weight'] = v
 47 |                 elif 'bias' in k:
 48 |                     new_state_dict[name_prefix + '.perspective_dilated_conv2d.bias'] = v
 49 |                     count += 1
 50 |             self.load_state_dict(new_state_dict)
 51 |         
 52 |     
 53 |     def forward(self, x, perspective_map):
 54 |         x = self.pad_conv2d_1(x, perspective_map)
 55 |         x = self.pad_relu_1(x)
 56 |         x = self.pad_conv2d_2(x, perspective_map)
 57 |         x = self.pad_relu_2(x)
 58 |         x = self.max_pool_1(x)
 59 |         perspective_map = self.max_pool_1(perspective_map)
 60 |         
 61 |         x = self.pad_conv2d_3(x, perspective_map)
 62 |         x = self.pad_relu_3(x)
 63 |         x = self.pad_conv2d_4(x, perspective_map)
 64 |         x = self.pad_relu_4(x)
 65 |         x = self.max_pool_2(x)
 66 |         perspective_map = self.max_pool_2(perspective_map)
 67 |         
 68 |         x = self.pad_conv2d_5(x, perspective_map)
 69 |         x = self.pad_relu_5(x)
 70 |         x = self.pad_conv2d_6(x, perspective_map)
 71 |         x = self.pad_relu_6(x)
 72 |         x = self.pad_conv2d_7(x, perspective_map)
 73 |         x = self.pad_relu_7(x)
 74 |         x = self.max_pool_3(x)
 75 |         perspective_map = self.max_pool_3(perspective_map)
 76 |         
 77 |         x = self.pad_conv2d_8(x, perspective_map)
 78 |         x = self.pad_relu_8(x)
 79 |         x = self.pad_conv2d_9(x, perspective_map)
 80 |         x = self.pad_relu_9(x)
 81 |         x = self.pad_conv2d_10(x, perspective_map)
 82 |         x = self.pad_relu_10(x)
 83 |         
 84 |         return x, perspective_map
 85 | 
 86 | class Backend(nn.Module):
 87 |     def __init__(self, in_channels, **kwargs):
 88 |         super(Backend, self).__init__()
 89 |         self.pad_conv2d_1 = BasicPerspectiveDilatedConv2D(in_channels, 512, 3, 1,  **kwargs)
 90 |         self.pad_relu_1 = nn.ReLU(inplace=True)
 91 |         self.pad_conv2d_2 = BasicPerspectiveDilatedConv2D(512, 512, 3, 1, **kwargs)
 92 |         self.pad_relu_2 = nn.ReLU(inplace=True)
 93 |         self.pad_conv2d_3 = BasicPerspectiveDilatedConv2D(512, 512, 3, 1, **kwargs)
 94 |         self.pad_relu_3 = nn.ReLU(inplace=True)
 95 |         self.pad_conv2d_4 = BasicPerspectiveDilatedConv2D(512, 256, 3, 1, **kwargs)
 96 |         self.pad_relu_4 = nn.ReLU(inplace=True)
 97 |         self.pad_conv2d_5 = BasicPerspectiveDilatedConv2D(256, 128, 3, 1, **kwargs)
 98 |         self.pad_relu_5 = nn.ReLU(inplace=True)
 99 |         self.pad_conv2d_6 = BasicPerspectiveDilatedConv2D(128, 64, 3, 1, **kwargs)
100 |         self.pad_relu_6 = nn.ReLU(inplace=True)
101 |     
102 |     def forward(self, x, perspective_map):
103 |         x = self.pad_conv2d_1(x, perspective_map)
104 |         x = self.pad_relu_1(x)
105 |         x = self.pad_conv2d_2(x, perspective_map)
106 |         x = self.pad_relu_2(x)
107 |         x = self.pad_conv2d_3(x, perspective_map)
108 |         x = self.pad_relu_3(x)
109 |         x = self.pad_conv2d_4(x, perspective_map)
110 |         x = self.pad_relu_4(x)
111 |         x = self.pad_conv2d_5(x, perspective_map)
112 |         x = self.pad_relu_5(x)
113 |         x = self.pad_conv2d_6(x, perspective_map)
114 |         x = self.pad_relu_6(x)
115 |         return x
116 | 
117 | class CSRPersNet(nn.Module):
118 |     def __init__(self, load_path=None, **kwargs):
119 |         super(CSRPersNet, self).__init__()
120 |         self.front_end = Frontend(True, **kwargs)
121 |         self.back_end = Backend(512, **kwargs)
122 |         self.output_layer = nn.Conv2d(64, 1, kernel_size=1)
123 |         if not(load_path == None):
124 |             new_state_dict = OrderedDict()
125 |             state_dict = torch.load(load_path)
126 |             count = 1
127 |             for k,v in state_dict.items():
128 |                 if 'back_end' in k:
129 |                     name_prefix = "back_end.pad_conv2d_" + str(count)
130 |                     if 'weight' in k:
131 |                         new_state_dict[name_prefix + '.rate_map_generator.params'] = torch.FloatTensor(*kwargs)
132 |                         new_state_dict[name_prefix + '.perspective_dilated_conv2d.weight'] = v
133 |                     elif 'bias' in k:
134 |                         new_state_dict[name_prefix + '.perspective_dilated_conv2d.bias'] = v
135 |                         count += 1
136 |                 else:
137 |                     new_state_dict[k] = v
138 |             self.load_state_dict(new_state_dict)
139 |             
140 |         else:
141 |             for m in self.output_layer.modules():
142 |                 if isinstance(m, nn.Conv2d):
143 |                     nn.init.normal_(m.weight, std=0.01)
144 |                     if m.bias is not None:
145 |                         nn.init.constant_(m.bias, 0)
146 |                 elif isinstance(m, nn.BatchNorm2d):
147 |                     nn.init.constant_(m.weight, 1)
148 |                     nn.init.constant_(m.bias, 0)
149 |                 
150 |     def forward(self, x, perspective_map):
151 |         x, perspective_map = self.front_end(x, perspective_map)
152 |         x = self.back_end(x, perspective_map)
153 |         x = self.output_layer(x)
154 |         return x
155 | 
156 |     def get_params(self):
157 |         self.ada_sig_params = []
158 |         self.conv_params = []
159 |         for m in self.modules():
160 |             if isinstance(m, AdaptiveSigmoid):
161 |                 self.ada_sig_params.append(m.params)
162 |             elif isinstance(m, nn.Conv2d):
163 |                 self.conv_params.append(m.weight)
164 |                 self.conv_params.append(m.bias)
165 |             elif isinstance(m, PerspectiveDilatedConv2dLayer):
166 |                 self.conv_params.append(m.weight)
167 |                 self.conv_params.append(m.bias)
168 |         return self.conv_params, self.ada_sig_params


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/perspective_aware_conv2d/pad_conv2d.cpp:
--------------------------------------------------------------------------------
  1 | #include <torch/extension.h>
  2 | #include "pad_conv2d.h"
  3 | 
  4 | 
  5 | 
  6 | at::Tensor pad_conv2d_forward(
  7 |     at::Tensor input,
  8 |     at::Tensor weight,
  9 |     at::Tensor rate_map,
 10 |     at::Tensor bias,
 11 |     int stride_h, int stride_w
 12 | //     int dilation_h, int dilation_w,
 13 | //     int pad_h, int pad_w
 14 | ){
 15 |     /**
 16 |     * get the input parameter's information
 17 |     **/
 18 |     int batch = input.size(0);
 19 |     int in_channels = input.size(1);
 20 |     int input_height = input.size(2);
 21 |     int input_width = input.size(3);
 22 |     int out_channels = weight.size(0);
 23 | //     int kernel_channels = weight.size(1);
 24 |     int kernel_h = weight.size(2);
 25 |     int kernel_w = weight.size(3);
 26 |     int rate_map_height = rate_map.size(2);
 27 |     int rate_map_width = rate_map.size(3);
 28 |     int height_out = (input_height - (1 * (kernel_h - 1) + 1)) / stride_h + 1;
 29 |     int width_out = (input_width - (1 * (kernel_w - 1) + 1)) / stride_w + 1;
 30 |     /**
 31 |     * data correctness validation
 32 |     **/
 33 |     AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
 34 |     AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor");
 35 |     AT_ASSERTM(rate_map.type().is_cuda(), "rate_map must be a CUDA tensor");
 36 |     AT_ASSERTM(rate_map_height == height_out, "output height must be same with rate map height");
 37 |     AT_ASSERTM(rate_map_width == width_out, "output width must be same with rate map width");
 38 |     AT_ASSERTM(kernel_h % 2 == 1 || kernel_w % 2 ==1, "kernel_size must be odd number");
 39 |     /**
 40 |     * derive more information
 41 |     **/
 42 |     int kernel_dim = in_channels * kernel_h * kernel_w;
 43 |     int input_dim = in_channels * input_height * input_width;
 44 |     int conv_out_spatial_dim = height_out * width_out;
 45 | 
 46 |     int M = out_channels;
 47 |     int N = conv_out_spatial_dim;
 48 |     int K = kernel_dim;
 49 |     /**
 50 |     * malloc tmp space and output space
 51 |     **/
 52 |     auto col_buffer = at::empty({in_channels * kernel_h * kernel_w, conv_out_spatial_dim}, input.options());
 53 |     auto output = at::empty({batch, out_channels, height_out, width_out}, input.options());
 54 |     /**
 55 |     * get pointer of the tensors
 56 |     **/
 57 |     auto input_ptr = input.data<float>();
 58 |     auto weight_ptr = weight.data<float>();
 59 |     auto rate_map_ptr = rate_map.data<float>();
 60 |     auto col_buffer_ptr = col_buffer.data<float>();
 61 |     auto output_ptr = output.data<float>();
 62 |     auto bias_ptr = bias.data<float>();
 63 | 
 64 |     for (int n = 0; n < batch; ++n) {
 65 |         pad_conv2d_im2col(
 66 |             THCState_getCurrentStream(state),
 67 |             input_ptr + n * input_dim,
 68 |             rate_map_ptr + n * conv_out_spatial_dim,
 69 |             in_channels, input_height, input_width,
 70 |             kernel_h, kernel_w,
 71 | //             pad_h, pad_w,
 72 |             stride_h, stride_w,
 73 | //             dilation_h, dilation_w,
 74 |             height_out, width_out,
 75 |             col_buffer_ptr
 76 |         );
 77 |         auto output_instance_ptr = output_ptr + (n * M  * N);
 78 |         THCudaBlas_Sgemm(state, 'n', 'n', N, M, K, 1.0f, col_buffer_ptr, N, weight_ptr, K, 0.0f, output_instance_ptr, N);
 79 |         add_bias(
 80 |             THCState_getCurrentStream(state),
 81 |             output_instance_ptr, 
 82 |             bias_ptr, 
 83 |             out_channels, height_out, width_out
 84 |         );
 85 |     }
 86 |     return output;
 87 | }
 88 | 
 89 | std::vector<at::Tensor> pad_conv2d_backward(
 90 |     at::Tensor input,
 91 |     at::Tensor weight,
 92 |     at::Tensor rate_map,
 93 |     at::Tensor bias,
 94 |     at::Tensor out_grad,
 95 |     int stride_h, int stride_w
 96 | //     int dilation_h, int dilation_w,
 97 | //     int pad_h, int pad_w
 98 | ){
 99 |     /**
100 |     * get the input parameter's information
101 |     **/
102 |     int batch = input.size(0);
103 |     int in_channels = input.size(1);
104 |     int input_height = input.size(2);
105 |     int input_width = input.size(3);
106 |     int out_channels = weight.size(0);
107 | //     int kernel_channels = weight.size(1);
108 |     int kernel_h = weight.size(2);
109 |     int kernel_w = weight.size(3);
110 |     int rate_map_height = rate_map.size(2);
111 |     int rate_map_width = rate_map.size(3);
112 |     int height_out = (input_height  - (1 * (kernel_h - 1) + 1)) / stride_h + 1;
113 |     int width_out = (input_width - (1 * (kernel_w - 1) + 1)) / stride_w + 1;
114 |     /**
115 |     * data correctness validation
116 |     **/
117 |     AT_ASSERTM(height_out==out_grad.size(2) && width_out == out_grad.size(3),
118 |         "the calculated out shape won't match the out_grad_shape:(%d x %d vs %d x %d)",
119 |             height_out, width_out, out_grad.size(2), out_grad.size(3));
120 |     AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
121 |     AT_ASSERTM(weight.type().is_cuda(), "weight must be a CUDA tensor");
122 |     AT_ASSERTM(rate_map.type().is_cuda(), "rate_map must be a CUDA tensor");
123 |     AT_ASSERTM(rate_map_height == height_out, "output height must be same with rate map height");
124 |     AT_ASSERTM(rate_map_width == width_out, "output width must be same with rate map width");
125 |     /**
126 |     * derive more information
127 |     **/
128 |     int kernel_dim = in_channels * kernel_h * kernel_w;
129 |     int input_dim = in_channels * input_height * input_width;
130 |     int conv_out_spatial_dim = height_out * width_out;
131 | 
132 |     int M = kernel_dim;
133 |     int N = conv_out_spatial_dim;
134 |     int K = out_channels;
135 |     /**
136 |     * malloc tmp space and output space
137 |     **/
138 |     auto col_buffer = at::empty({in_channels * kernel_h * kernel_w,  conv_out_spatial_dim}, input.options());
139 |     auto grad_input = at::zeros_like(input);
140 |     auto grad_weight = at::zeros_like(weight);
141 |     auto grad_bias = at::zeros_like(bias);
142 |     auto grad_rate_map = at::zeros_like(rate_map);
143 |     /**
144 |     * get pointer of the tensors
145 |     **/
146 |     auto input_ptr = input.data<float>();
147 |     auto weight_ptr = weight.data<float>();
148 |     auto rate_map_ptr = rate_map.data<float>();
149 |     auto out_grad_ptr = out_grad.data<float>();
150 |     auto col_buffer_ptr = col_buffer.data<float>();
151 |     auto grad_input_ptr = grad_input.data<float>();
152 |     auto grad_weight_ptr = grad_weight.data<float>();
153 |     auto grad_bias_ptr = grad_bias.data<float>();
154 | //     auto bias_ptr = bias.data<float>();
155 |     auto grad_rate_map_ptr = grad_rate_map.data<float>();
156 |     
157 |     for (int n = 0; n < batch; ++n) {
158 |         auto out_grad_instance_ptr = out_grad_ptr + n * K * N;
159 |         calculate_dbias(
160 |             THCState_getCurrentStream(state),
161 |             out_grad_instance_ptr,
162 |             grad_bias_ptr,
163 |             out_channels,
164 |             height_out, width_out
165 |             );
166 |         THCudaBlas_Sgemm(state,
167 |             'n', 't',
168 |             N, M, K,
169 |             1.0f,
170 |             out_grad_instance_ptr, N,
171 |             weight_ptr, M,
172 |             0.0f,
173 |             col_buffer_ptr, N);
174 |         /**
175 |         * calculate d loss / d rate_map
176 |         **/
177 |        pad_conv2d_col2im_coord(
178 |            THCState_getCurrentStream(state),
179 |            col_buffer_ptr,
180 |            input_ptr + n * input_dim,
181 |            rate_map_ptr + n * conv_out_spatial_dim,
182 |            in_channels, input_height, input_width,
183 |            kernel_h, kernel_w,
184 |            stride_h, stride_w,
185 |            height_out, width_out,
186 |            grad_rate_map_ptr + n * conv_out_spatial_dim
187 |        );
188 | 
189 |         /**
190 |         * calculate d loss / d input
191 |         **/
192 |         pad_conv2d_col2im(
193 |             THCState_getCurrentStream(state),
194 |             col_buffer_ptr,
195 |             rate_map_ptr + n * conv_out_spatial_dim,
196 |             in_channels, input_height, input_width,
197 |             kernel_h, kernel_w,
198 |             stride_h, stride_w,
199 |             height_out, width_out,
200 |             grad_input_ptr + n * input_dim
201 |         );
202 | 
203 |         /**
204 |         * calculate d loss / d weight
205 |         **/
206 |         pad_conv2d_im2col(
207 |             THCState_getCurrentStream(state),
208 |             input_ptr + n * input_dim,
209 |             rate_map_ptr + n * conv_out_spatial_dim,
210 |             in_channels, input_height, input_width,
211 |             kernel_h, kernel_w,
212 |             stride_h, stride_w,
213 |             height_out, width_out,
214 |             col_buffer_ptr);
215 |         
216 |         THCudaBlas_Sgemm(state,
217 |                 't', 'n',
218 |                 M, K, N,
219 |                 1.0f,
220 |                 col_buffer_ptr, N,
221 |                 out_grad_instance_ptr, N,
222 |                 1.0f,
223 |                 grad_weight_ptr, M);
224 |     }
225 | //    return {grad_input, grad_weight, grad_rate_map};
226 |     return {grad_input, grad_weight, grad_rate_map, grad_bias};
227 | }
228 | 
229 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){
230 |   m.def("forward", &pad_conv2d_forward, "perspective-aware dilated conv2d forward (CUDA)");
231 |   m.def("backward", &pad_conv2d_backward, "perspective-aware dilated conv2d backward (CUDA)");
232 | }
233 | 


--------------------------------------------------------------------------------
/net/CSRPersNet_crop.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torchvision import models
  5 | from op_wrapper.pad_conv2d_wrapper import BasicPerspectiveDilatedConv2D_BN
  6 | from op_wrapper.adaptive_sigmoid_wrapper import AdaptiveSigmoid
  7 | from op_wrapper.pad_conv2d_wrapper import PerspectiveDilatedConv2dLayer
  8 | from collections import OrderedDict
  9 | pretrain_dict = nn.ModuleList(list(list(models.vgg16_bn(True).children())[0].children())[0:33]).state_dict()
 10 | 
 11 | class Frontend(nn.Module):
 12 |     def __init__(self, pretrain=True, **kwargs):
 13 |         super(Frontend, self).__init__()
 14 |         self.pad_conv2d_1 = BasicPerspectiveDilatedConv2D_BN(3, 64, 3, 1, **kwargs)
 15 |         self.pad_relu_1 = nn.ReLU(inplace=True)
 16 |         self.pad_conv2d_2 = BasicPerspectiveDilatedConv2D_BN(64, 64, 3, 1, **kwargs)
 17 |         self.pad_relu_2 = nn.ReLU(inplace=True)
 18 |         self.max_pool_1 = nn.MaxPool2d(kernel_size=2)
 19 |         
 20 |         self.pad_conv2d_3 = BasicPerspectiveDilatedConv2D_BN(64, 128, 3, 1, **kwargs)
 21 |         self.pad_relu_3 = nn.ReLU(inplace=True)
 22 |         self.pad_conv2d_4 = BasicPerspectiveDilatedConv2D_BN(128, 128, 3, 1, **kwargs)
 23 |         self.pad_relu_4 = nn.ReLU(inplace=True)
 24 |         self.max_pool_2 = nn.MaxPool2d(kernel_size=2)
 25 |         
 26 |         self.pad_conv2d_5 = BasicPerspectiveDilatedConv2D_BN(128, 256, 3, 1, **kwargs)
 27 |         self.pad_relu_5 = nn.ReLU(inplace=True)
 28 |         self.pad_conv2d_6 = BasicPerspectiveDilatedConv2D_BN(256, 256, 3, 1, **kwargs)
 29 |         self.pad_relu_6 = nn.ReLU(inplace=True)
 30 |         self.pad_conv2d_7 = BasicPerspectiveDilatedConv2D_BN(256, 256, 3, 1, **kwargs)
 31 |         self.pad_relu_7 = nn.ReLU(inplace=True)
 32 |         self.max_pool_3 = nn.MaxPool2d(kernel_size=2)
 33 |         
 34 |         self.pad_conv2d_8 = BasicPerspectiveDilatedConv2D_BN(256, 512, 3, 1, **kwargs)
 35 |         self.pad_relu_8 = nn.ReLU(inplace=True)
 36 |         self.pad_conv2d_9 = BasicPerspectiveDilatedConv2D_BN(512, 512, 3, 1, **kwargs)
 37 |         self.pad_relu_9 = nn.ReLU(inplace=True)
 38 |         self.pad_conv2d_10 = BasicPerspectiveDilatedConv2D_BN(512, 512, 3, 1, **kwargs)
 39 |         self.pad_relu_10 = nn.ReLU(inplace=True)
 40 |         if pretrain == True:
 41 |             new_state_dict = OrderedDict()
 42 |             count, ori_count = 1, 0
 43 |             for k,v in pretrain_dict.items():
 44 |                 name_prefix = "pad_conv2d_" + str(count)
 45 |                 if count != ori_count:
 46 |                     if 'weight' in k:
 47 |                         new_state_dict[name_prefix + '.rate_map_generator.params'] = torch.FloatTensor(kwargs['sigma'])
 48 |                         new_state_dict[name_prefix + '.perspective_dilated_conv2d.weight'] = v
 49 |                     elif 'bias' in k:
 50 |                         new_state_dict[name_prefix + '.perspective_dilated_conv2d.bias'] = v
 51 |                         ori_count += 1
 52 |                 elif count == ori_count:
 53 |                     if 'weight' in k:
 54 |                         new_state_dict[name_prefix + '.bn.weight'] = v
 55 |                     elif 'bias' in k:
 56 |                         new_state_dict[name_prefix + '.bn.bias'] = v
 57 |                     elif 'running_mean' in k:
 58 |                         new_state_dict[name_prefix + '.bn.running_mean'] = v
 59 |                     elif 'running_var' in k:
 60 |                         new_state_dict[name_prefix + '.bn.running_var'] = v
 61 |                         count += 1
 62 |             self.load_state_dict(new_state_dict)
 63 |         
 64 |     
 65 |     def forward(self, x, perspective_map):
 66 |         x = self.pad_conv2d_1(x, perspective_map)
 67 |         x = self.pad_relu_1(x)
 68 |         x = self.pad_conv2d_2(x, perspective_map)
 69 |         x = self.pad_relu_2(x)
 70 |         x = self.max_pool_1(x)
 71 |         perspective_map = self.max_pool_1(perspective_map)
 72 |         
 73 |         x = self.pad_conv2d_3(x, perspective_map)
 74 |         x = self.pad_relu_3(x)
 75 |         x = self.pad_conv2d_4(x, perspective_map)
 76 |         x = self.pad_relu_4(x)
 77 |         x = self.max_pool_2(x)
 78 |         perspective_map = self.max_pool_2(perspective_map)
 79 |         
 80 |         x = self.pad_conv2d_5(x, perspective_map)
 81 |         x = self.pad_relu_5(x)
 82 |         x = self.pad_conv2d_6(x, perspective_map)
 83 |         x = self.pad_relu_6(x)
 84 |         x = self.pad_conv2d_7(x, perspective_map)
 85 |         x = self.pad_relu_7(x)
 86 |         x = self.max_pool_3(x)
 87 |         perspective_map = self.max_pool_3(perspective_map)
 88 |         
 89 |         x = self.pad_conv2d_8(x, perspective_map)
 90 |         x = self.pad_relu_8(x)
 91 |         x = self.pad_conv2d_9(x, perspective_map)
 92 |         x = self.pad_relu_9(x)
 93 |         x = self.pad_conv2d_10(x, perspective_map)
 94 |         x = self.pad_relu_10(x)
 95 |         
 96 |         return x, perspective_map
 97 | 
 98 | class Backend(nn.Module):
 99 |     def __init__(self, in_channels, **kwargs):
100 |         super(Backend, self).__init__()
101 |         self.pad_conv2d_1 = BasicPerspectiveDilatedConv2D_BN(in_channels, 512, 3, 1,  **kwargs)
102 |         self.pad_relu_1 = nn.ReLU(inplace=True)
103 |         self.pad_conv2d_2 = BasicPerspectiveDilatedConv2D_BN(512, 512, 3, 1, **kwargs)
104 |         self.pad_relu_2 = nn.ReLU(inplace=True)
105 |         self.pad_conv2d_3 = BasicPerspectiveDilatedConv2D_BN(512, 512, 3, 1, **kwargs)
106 |         self.pad_relu_3 = nn.ReLU(inplace=True)
107 |         self.pad_conv2d_4 = BasicPerspectiveDilatedConv2D_BN(512, 256, 3, 1, **kwargs)
108 |         self.pad_relu_4 = nn.ReLU(inplace=True)
109 |         self.pad_conv2d_5 = BasicPerspectiveDilatedConv2D_BN(256, 128, 3, 1, **kwargs)
110 |         self.pad_relu_5 = nn.ReLU(inplace=True)
111 |         self.pad_conv2d_6 = BasicPerspectiveDilatedConv2D_BN(128, 64, 3, 1, **kwargs)
112 |         self.pad_relu_6 = nn.ReLU(inplace=True)
113 |     
114 |     def forward(self, x, perspective_map):
115 |         x = self.pad_conv2d_1(x, perspective_map)
116 |         x = self.pad_relu_1(x)
117 |         x = self.pad_conv2d_2(x, perspective_map)
118 |         x = self.pad_relu_2(x)
119 |         x = self.pad_conv2d_3(x, perspective_map)
120 |         x = self.pad_relu_3(x)
121 |         x = self.pad_conv2d_4(x, perspective_map)
122 |         x = self.pad_relu_4(x)
123 |         x = self.pad_conv2d_5(x, perspective_map)
124 |         x = self.pad_relu_5(x)
125 |         x = self.pad_conv2d_6(x, perspective_map)
126 |         x = self.pad_relu_6(x)
127 |         return x
128 | 
129 | class CSRPersNet_BN(nn.Module):
130 |     def __init__(self, load_path=None, is_relu=False, **kwargs):
131 |         super(CSRPersNet_BN, self).__init__()
132 |         self.is_relu = is_relu
133 |         self.front_end = Frontend(True, **kwargs)
134 |         self.back_end = Backend(512, **kwargs)
135 |         self.output_layer = nn.Conv2d(64, 1, kernel_size=1)
136 |         if not(load_path == None):
137 |             new_state_dict = OrderedDict()
138 |             state_dict = torch.load(load_path)
139 |             count = 1
140 |             for k,v in state_dict.items():
141 |                 if 'back_end' in k:
142 |                     name_prefix = "back_end.pad_conv2d_" + str(count)
143 |                     if 'weight' in k:
144 |                         new_state_dict[name_prefix + '.rate_map_generator.params'] = torch.FloatTensor(*kwargs)
145 |                         new_state_dict[name_prefix + '.perspective_dilated_conv2d.weight'] = v
146 |                     elif 'bias' in k:
147 |                         new_state_dict[name_prefix + '.perspective_dilated_conv2d.bias'] = v
148 |                         count += 1
149 |                 else:
150 |                     new_state_dict[k] = v
151 |             self.load_state_dict(new_state_dict)
152 |             
153 |         else:
154 |             for m in self.output_layer.modules():
155 |                 if isinstance(m, nn.Conv2d):
156 |                     nn.init.normal_(m.weight, std=0.01)
157 |                     if m.bias is not None:
158 |                         nn.init.constant_(m.bias, 0)
159 |                 elif isinstance(m, nn.BatchNorm2d):
160 |                     nn.init.constant_(m.weight, 1)
161 |                     nn.init.constant_(m.bias, 0)
162 |                 
163 |     def forward(self, x, perspective_map):
164 |         x, perspective_map = self.front_end(x, perspective_map)
165 |         x = self.back_end(x, perspective_map)
166 |         x = self.output_layer(x)
167 | 
168 |         if self.is_relu:
169 |             x = F.relu(x)
170 |         x = F.interpolate(x, (x.shape[2]*4, x.shape[3]*4), mode='bilinear', align_corners=False)
171 |         return x
172 | 
173 |     def get_params(self):
174 |         self.ada_sig_params = []
175 |         self.conv_params = []
176 |         self.bn_params = []
177 |         for m in self.modules():
178 |             if isinstance(m, AdaptiveSigmoid):
179 |                 self.ada_sig_params.append(m.params)
180 |             elif isinstance(m, nn.Conv2d):
181 |                 self.conv_params.append(m.weight)
182 |                 self.conv_params.append(m.bias)
183 |             elif isinstance(m, PerspectiveDilatedConv2dLayer):
184 |                 self.conv_params.append(m.weight)
185 |                 self.conv_params.append(m.bias)
186 |             elif isinstance(m, nn.BatchNorm2d):
187 |                 self.bn_params.append(m.weight)
188 |                 self.bn_params.append(m.bias)
189 |         return self.conv_params, self.bn_params, self.ada_sig_params
190 | 


--------------------------------------------------------------------------------
/PytorchCudaOpExtension/perspective_aware_conv2d/pad_conv2d_cuda.cu:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | #include <ATen/cuda/CUDAContext.h>
  3 | #include <THC/THCAtomics.cuh>
  4 | #include <THC/THCDeviceUtils.cuh>
  5 | #include "pad_conv2d.h"
  6 | 
  7 | #define CUDA_KERNEL_LOOP(i ,n) \
  8 |     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i<(n); i+= blockDim.x * gridDim.x)
  9 | 
 10 | const int CUDA_NUM_THREADS = 1024;
 11 | 
 12 | inline int GET_BLOCKS(const int N){
 13 |   return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
 14 | }
 15 | 
 16 | __device__ float dmcn_im2col_bilinear(
 17 |     const float* bottom_data,
 18 |     const int data_width,
 19 |     const int height,
 20 |     const int width,
 21 |     float h,
 22 |     float w){
 23 | 
 24 |   int h_low = floor(h);
 25 |   int w_low = floor(w);
 26 |   int h_high = h_low + 1;
 27 |   int w_high = w_low + 1;
 28 | 
 29 |   float lh = h - h_low;
 30 |   float lw = w - w_low;
 31 |   float hh = 1 - lh, hw = 1 - lw;
 32 | 
 33 |   float v1 = 0;
 34 |   if (h_low >= 0 && w_low >= 0)
 35 |     v1 = bottom_data[h_low * data_width + w_low];
 36 |   float v2 = 0;
 37 |   if (h_low >=0 && w_high <= width - 1)
 38 |     v2 = bottom_data[h_low * data_width + w_high];
 39 |   float v3 = 0;
 40 |   if (h_high <= height - 1 && w_low >= 0)
 41 |     v3 = bottom_data[h_high * data_width + w_low];
 42 |   float v4 = 0;
 43 |   if (h_high <= height - 1 && w_high <= width - 1)
 44 |     v4 = bottom_data[h_high * data_width + w_high];
 45 | 
 46 |   float w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw;
 47 | 
 48 |   float val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
 49 |   return val;
 50 | 
 51 | }
 52 | 
 53 | __device__ float dmcn_get_gradient_weight(
 54 |     float argmax_h, // offset h
 55 |     float argmax_w, // offset w
 56 |     const int h,  const int w, // coordinate
 57 |     const int height,  const int width){
 58 | 
 59 |   if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width) {
 60 |     //empty
 61 |     return 0;
 62 |   }
 63 | 
 64 |   int argmax_h_low = floor(argmax_h);
 65 |   int argmax_w_low = floor(argmax_w);
 66 |   int argmax_h_high = argmax_h_low + 1;
 67 |   int argmax_w_high = argmax_w_low + 1;
 68 | 
 69 |   float weight = 0;
 70 |   if (h == argmax_h_low && w == argmax_w_low)
 71 |       weight = (h + 1 - argmax_h) * (w + 1 - argmax_w);
 72 |   if (h == argmax_h_low && w == argmax_w_high)
 73 |       weight = (h + 1 - argmax_h) * (argmax_w + 1 - w);
 74 |   if (h == argmax_h_high && w == argmax_w_low)
 75 |       weight = (argmax_h + 1 - h) * (w + 1 - argmax_w);
 76 |   if (h == argmax_h_high && w == argmax_w_high)
 77 |       weight = (argmax_h + 1 - h) * (argmax_w + 1 - w);
 78 |   return weight;
 79 | }
 80 | 
 81 | __device__ float dmcn_get_coordinate_weight(
 82 |     float argmax_h,
 83 |     float argmax_w,
 84 |     const int height,
 85 |     const int width,
 86 |     const float* im_data,
 87 |     const int data_width,
 88 |     const int bp_dir
 89 |     ) {
 90 | 
 91 |   if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || argmax_w >= width)
 92 |   {
 93 |     //empty
 94 |     return 0;
 95 |   }
 96 | 
 97 |   int argmax_h_low = floor(argmax_h);
 98 |   int argmax_w_low = floor(argmax_w);
 99 |   int argmax_h_high = argmax_h_low + 1;
100 |   int argmax_w_high = argmax_w_low + 1;
101 | 
102 |   float weight = 0;
103 | 
104 |   if (bp_dir == 0) {
105 |     if (argmax_h_low >= 0 && argmax_w_low >= 0)
106 |         weight += -1 * (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_low * data_width + argmax_w_low];
107 |     if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
108 |         weight += -1 * (argmax_w - argmax_w_low) * im_data[argmax_h_low * data_width + argmax_w_high];
109 |     if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
110 |         weight += (argmax_w_low + 1 - argmax_w) * im_data[argmax_h_high * data_width + argmax_w_low];
111 |     if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
112 |         weight += (argmax_w - argmax_w_low) * im_data[argmax_h_high * data_width + argmax_w_high];
113 |   } else if (bp_dir == 1) {
114 |     if (argmax_h_low >= 0 && argmax_w_low >= 0)
115 |         weight += -1 * (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_low];
116 |     if (argmax_h_low >= 0 && argmax_w_high <= width - 1)
117 |         weight += (argmax_h_low + 1 - argmax_h) * im_data[argmax_h_low * data_width + argmax_w_high];
118 |     if (argmax_h_high <= height - 1 && argmax_w_low >= 0)
119 |         weight += -1 * (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_low];
120 |     if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1)
121 |         weight += (argmax_h - argmax_h_low) * im_data[argmax_h_high * data_width + argmax_w_high];
122 |   }
123 | 
124 |   return weight;
125 | }
126 | 
127 | __global__ void add_bias_kernel(
128 |     int n,
129 |     float* data_out,
130 |     const float* bias,
131 |     const int out_channels,
132 |     const int height_out, const int width_out
133 | ){
134 |     CUDA_KERNEL_LOOP(index, n){
135 |         const int c_col = (index / width_out / height_out) % out_channels;
136 |         float value = bias[c_col];
137 |         atomicAdd(data_out + index, value);
138 |     }
139 | }
140 | 
141 | __global__ void calculate_dbias_kernel(
142 |     int n,
143 |     const float* grad_output,
144 |     float* grad_bias,
145 |     const int out_channels,
146 |     const int height_out, const int width_out
147 | ){
148 |     CUDA_KERNEL_LOOP(index, n){
149 |         const int c_col = (index / width_out / height_out) % out_channels;
150 |         float value = grad_output[index];
151 |         atomicAdd(grad_bias + c_col, value);
152 |     }
153 | }
154 | 
155 | __global__ void pad_conv2d_im2col_kernel(
156 |     int n,
157 |     const float* data_im,
158 |     const float* data_rate,
159 |     const int height, const int width,
160 |     const int kernel_h, const int kernel_w,
161 |     const int stride_h, const int stride_w,
162 |     const int num_channels,
163 |     const int height_col, const int width_col,
164 |     float* data_col
165 |     ){
166 |     CUDA_KERNEL_LOOP(index, n){
167 |         const int w_col = index % width_col;
168 |         const int h_col = (index / width_col) % height_col;
169 |         const int c_im = index / width_col / height_col;
170 |         const int c_col = c_im * kernel_h * kernel_w;
171 |         const float rate = data_rate[h_col * width_col + w_col];
172 |         
173 |         const int h_in = h_col * stride_h + (int)((kernel_h - 1 ) / 2);
174 |         const int w_in = w_col * stride_w + (int)((kernel_w - 1 ) / 2);
175 | 
176 |         float* data_col_ptr = data_col + (c_col * height_col + h_col) * width_col + w_col;
177 |         const float* data_im_ptr = data_im + c_im * height * width;
178 |         
179 |         for (int i = - (int)(kernel_h / 2); i <= (int)(kernel_h / 2); ++i) {
180 |             for (int j = - (int)(kernel_w / 2); j <= (int)(kernel_w / 2); ++j) {
181 |                 
182 |                 float val = static_cast<float>(0);
183 |                 const float h_im = h_in + i * 1 * rate;
184 |                 const float w_im = w_in + j * 1 * rate;
185 |                 if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) {
186 |                     val = dmcn_im2col_bilinear(data_im_ptr, width, height, width, h_im, w_im);
187 |                 }
188 |                 *data_col_ptr = val;
189 |                 data_col_ptr += height_col * width_col;
190 |             }
191 |         }
192 |     }
193 | }
194 | 
195 | __global__ void pad_conv2d_col2im_coord_kernel(
196 |  const int n,
197 |  const float* data_col,
198 |  const float* data_im,
199 |  const float* data_rate,
200 |  const int channels, const int height, const int width,
201 |  const int kernel_h, const int kernel_w,
202 |  const int stride_h, const int stride_w,
203 |  const int height_col, const int width_col,
204 |  float* grad_rate_map
205 | ){
206 |    CUDA_KERNEL_LOOP(index, n){
207 |        // the relative location in the filter
208 |         const int j = (index / width_col / height_col) % kernel_w;
209 |         const int i = (index / width_col / height_col / kernel_w) % kernel_h;
210 |         const int c = index / width_col / height_col / kernel_w / kernel_h;
211 |         int w_out = index % width_col;
212 |         int h_out = (index / width_col) % height_col;
213 |         // corrdinates of center of conv window in the image.
214 |         const int h_in = h_out * stride_h + (int)((kernel_h - 1 ) / 2);
215 |         const int w_in = w_out * stride_w + (int)((kernel_w - 1 ) / 2);
216 |         const float rate = data_rate[h_out * width_col + w_out];
217 |         
218 |         const float cur_inv_h_data = h_in + (i - (int)((kernel_h - 1 ) / 2)) * rate;
219 |         const float cur_inv_w_data = w_in + (j - (int)((kernel_w - 1 ) / 2)) * rate;
220 |         
221 |         const float reletive_i = (i - (int)((kernel_h - 1 ) / 2));
222 |         const float reletive_j = (j - (int)((kernel_w - 1 ) / 2));
223 |         if (reletive_i != 0 || reletive_j != 0){
224 |             float val_h = 0;
225 |             float val_w = 0;
226 |             float h_weight = dmcn_get_coordinate_weight(
227 |                 cur_inv_h_data, cur_inv_w_data,
228 |                 height, width,
229 |                 data_im + c * height * width,
230 |                 width,
231 |                 0);
232 |             float w_weight = dmcn_get_coordinate_weight(
233 |                 cur_inv_h_data, cur_inv_w_data,
234 |                 height, width,
235 |                 data_im + c * height * width,
236 |                 width,
237 |                 1);
238 | 
239 |             val_h = (h_weight) * data_col[index];
240 |             val_w = (w_weight) * data_col[index];
241 | 
242 |             float gradient = 0;
243 |             float tmp = val_h * reletive_i + val_w * reletive_j;
244 |             gradient = tmp / std::sqrt(float(reletive_i * reletive_i + reletive_j * reletive_j));
245 |             atomicAdd(grad_rate_map + h_out * width_col + w_out, gradient);
246 |         }
247 |    }
248 | }
249 | 
250 | __global__ void pad_conv2d_col2im_kernel(
251 |     const int n,
252 |     const float* data_col,
253 |     const float* data_rate,
254 |     const int channels, const int height, const int width,
255 |     const int kernel_h, const int kernel_w,
256 |     const int stride_h, const int stride_w,
257 |     const int height_col, const int width_col,
258 |     float* grad_im
259 | ){
260 |     CUDA_KERNEL_LOOP(index, n){
261 |         // the relative location in the filter
262 |         const int j = (index / width_col / height_col) % kernel_w;
263 |         const int i = (index / width_col / height_col / kernel_w) % kernel_h;
264 |         const int c = index / width_col / height_col / kernel_w / kernel_h; // which channel
265 |         int w_out = index % width_col;
266 |         int h_out = (index / width_col) % height_col;
267 |         const int h_in = h_out * stride_h + (int)((kernel_h - 1 ) / 2);
268 |         const int w_in = w_out * stride_w + (int)((kernel_w - 1 ) / 2);
269 |         const float rate = data_rate[h_out * width_col + w_out];
270 |         const float cur_inv_h_data = h_in + (i - (int)((kernel_h - 1 ) / 2)) * rate;
271 |         const float cur_inv_w_data = w_in + (j - (int)((kernel_w - 1 ) / 2)) * rate;
272 |         const int cur_h = (int)cur_inv_h_data;
273 |         const int cur_w = (int)cur_inv_w_data;
274 |         const float cur_top_grad = data_col[index];
275 |         for (int dy = 0; dy <= 1; dy++) {
276 |         for (int dx = 0; dx <= 1; dx++) {
277 |             if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && cur_w + dx < width)
278 |                 {
279 |                     int cur_bottom_grad_pos = (c * height + cur_h + dy) * width + cur_w + dx;
280 |                     float weight = dmcn_get_gradient_weight(cur_inv_h_data, cur_inv_w_data, cur_h + dy, cur_w + dx, height, width);
281 |                     atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad);
282 |                 }
283 |             }
284 |         }
285 |     }
286 | }
287 | 
288 | void pad_conv2d_im2col(cudaStream_t stream,
289 |     const float* data_im,
290 |     const float* data_rate,
291 |     const int in_channels, const int height, const int width,
292 |     const int kernel_h, const int kernel_w,
293 | //     const int pad_h, const int pad_w,
294 |     const int stride_h, const int stride_w,
295 | //     const int dilation_h, const int dilation_w,
296 |     const int height_out, const int width_out,
297 |     float* data_col){
298 |     int num_kernels = in_channels * height_out * width_out;
299 |     pad_conv2d_im2col_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>>(
300 |             num_kernels,
301 |             data_im,
302 |             data_rate,
303 |             height, width,
304 |             kernel_h, kernel_w,
305 |             stride_h, stride_w,
306 |             in_channels,
307 |             height_out, width_out,
308 |             data_col
309 |     );
310 | }
311 | 
312 | void pad_conv2d_col2im_coord(cudaStream_t stream,
313 |    const float* data_col, const float* data_im, const float* data_rate,
314 |    const int in_channels, const int height, const int width,
315 |    const int kernel_h, const int kernel_w,
316 |    const int stride_h, const int stride_w,
317 |    const int height_col, const int width_col,
318 |    float* grad_rate_map){
319 |    int num_kernels = in_channels * kernel_h * kernel_w * height_col * width_col;
320 |    pad_conv2d_col2im_coord_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>>(
321 |        num_kernels,
322 |        data_col,
323 |        data_im,
324 |        data_rate,
325 |        in_channels, height, width,
326 |        kernel_h, kernel_w,
327 |        stride_h, stride_w,
328 |        height_col, width_col,
329 |        grad_rate_map
330 |    );
331 | }
332 | 
333 | void pad_conv2d_col2im(cudaStream_t stream,
334 |     const float* data_col, const float* data_rate,
335 |     const int in_channels, const int height, const int width,
336 |     const int kernel_h, const int kernel_w,
337 |     const int stride_h, const int stride_w,
338 |     const int height_out, const int width_out,
339 |     float* grad_im){
340 |     int  num_kernels = in_channels * kernel_h * kernel_w * height_out * width_out;
341 |     pad_conv2d_col2im_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>>(
342 |         num_kernels,
343 |         data_col,
344 |         data_rate,
345 |         in_channels, height, width,
346 |         kernel_h, kernel_w,
347 |         stride_h, stride_w,
348 |         height_out, width_out,
349 |         grad_im
350 |     );
351 | }
352 | 
353 | void add_bias(cudaStream_t stream,
354 |     float* data_out,
355 |     const float* bias,
356 |     const int out_channels,
357 |     const int height_out, const int width_out
358 |     ){
359 |     int num_kernels = out_channels * height_out * width_out;
360 |     add_bias_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>>(
361 |         num_kernels,
362 |         data_out,
363 |         bias,
364 |         out_channels,
365 |         height_out, width_out
366 |     );
367 | }
368 | 
369 | void calculate_dbias(cudaStream_t stream,
370 |     const float* grad_output,
371 |     float* grad_bias,
372 |     const int out_channels,
373 |     const int height_out, const int width_out
374 |     ){
375 |     int num_kernels = out_channels * height_out * width_out;
376 |     calculate_dbias_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>>(
377 |         num_kernels,
378 |         grad_output,
379 |         grad_bias,
380 |         out_channels,
381 |         height_out, width_out
382 |     );
383 | }
384 | 


--------------------------------------------------------------------------------