├── .gitignore
├── README.md
├── agents
├── __init__.py
├── base.py
└── sgnet_agent.py
├── configs
├── sgnet_aspp_nyud_fps.json
├── sgnet_aspp_nyud_test.json
├── sgnet_nyud_fps.json
├── sgnet_nyud_test.json
├── sgnet_res50_nyud_fps.json
└── sgnet_res50_nyud_test.json
├── data
├── nyudv2.py
└── transform
│ ├── __init__.py
│ └── rgbd_transform.py
├── dataset
└── list
│ └── nyud
│ ├── test_nyud.txt
│ └── train_nyud.txt
├── graphs
├── models
│ └── SGNet
│ │ ├── SGNet.py
│ │ ├── SGNet_ASPP.py
│ │ ├── SGNet_ASPP_fps.py
│ │ ├── SGNet_Res50.py
│ │ ├── SGNet_Res50_fps.py
│ │ └── SGNet_fps.py
└── ops
│ ├── __init__.py
│ ├── build.py
│ ├── build_modulated.py
│ ├── functions
│ ├── __init__.py
│ ├── deform_conv.py
│ └── modulated_dcn_func.py
│ ├── libs
│ ├── __init__.py
│ ├── _ext
│ │ └── __init__.py
│ ├── bn.py
│ ├── build.py
│ ├── build.sh
│ ├── dense.py
│ ├── functions.py
│ ├── misc.py
│ ├── residual.py
│ └── src
│ │ ├── bn.cu
│ │ ├── bn.h
│ │ ├── common.h
│ │ ├── lib_cffi.cpp
│ │ └── lib_cffi.h
│ ├── make.sh
│ ├── modules
│ ├── __init__.py
│ ├── deform_conv.py
│ └── s_conv.py
│ └── src
│ ├── cuda
│ ├── deform_psroi_pooling_cuda.cu
│ ├── deform_psroi_pooling_cuda.h
│ ├── modulated_deform_im2col_cuda.cu
│ └── modulated_deform_im2col_cuda.h
│ ├── deform_conv.c
│ ├── deform_conv.h
│ ├── deform_conv_cuda.c
│ ├── deform_conv_cuda.h
│ ├── deform_conv_cuda_kernel.cu
│ ├── deform_conv_cuda_kernel.h
│ ├── modulated_dcn.c
│ ├── modulated_dcn.h
│ ├── modulated_dcn_cuda.c
│ ├── modulated_dcn_cuda.h
│ ├── scale_conv.c
│ ├── scale_conv.h
│ ├── scale_conv_cuda.c
│ ├── scale_conv_cuda.h
│ ├── scale_conv_cuda_kernel.cu
│ └── scale_conv_cuda_kernel.h
├── main.py
├── run.sh
└── utils
├── __init__.py
├── assets
└── class_diagram.png
├── config.py
├── dirs.py
├── encoding.py
├── log.py
├── metrics.py
├── misc.py
├── optim.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | pretrained_weights/
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
Spatial information guided Convolution for Real-Time
4 | RGBD Semantic Segmentation
5 |
6 | Lin-Zhuo Chen, Zheng Lin, Ziqin Wang, Yong-Liang Yang and Ming-Ming Cheng
7 |
8 | ⭐ Project Home »
9 |
10 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | ***
24 | The official repo of the TIP 2021 paper ``
25 | [Spatial information guided Convolution for Real-Time RGBD Semantic Segmentation](https://arxiv.org/pdf/2004.04534.pdf).
26 |
27 | ## Results on NYUDv2 Dataset
28 |
29 | Speed is related to the hardware spec (e.g. CPU, GPU, RAM, etc), so it is hard to make an equal comparison.
30 |
31 | I get the following results under NVIDIA 1080TI GPU, Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz:
32 |
33 | | Model | mIoU(480x640) | mIoU(MS) | FPS(480x640) | FPS(425x560) |
34 | | :----------: | :-----------: | :------: | :----------: | :----------: |
35 | | SGNet(Res50) | 47.7% | 48.6% | 35 | 39 |
36 | | SGNet | 49.8% | 51.1% | 26 | 28 |
37 | | SGNet_ASPP | 50.2% | 51.1% | 24 | 26 |
38 |
39 | If you want to measure speed on more advanced graphics card (such as 2080ti), you can use the environment of pytorch 0.4.1 CUDA 9.2 to measure inference speed.
40 |
41 | ## Prerequisites
42 |
43 | #### Environments
44 | * PyTorch == 0.4.1
45 | * tqdm
46 | * CUDA==8.0
47 | * CUDNN=7.1.4
48 | * pillow
49 | * numpy
50 | * tensorboardX
51 | * tqdm
52 | #### Trained model and dataset
53 | Download NYUDv2 dataset and trained model:
54 |
55 | | | Dataset | model | model | model |
56 | | ------------------------ | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
57 | | BaiduDrive(passwd: scon) | [NYUDv2](https://pan.baidu.com/s/1mvEVXRGc0ESrFN6ux3YpDA) | [SGNet_Res50](https://pan.baidu.com/s/1yj3llVf14uT17HzqTi6pjw) | [SGNet](https://pan.baidu.com/s/1shzbcPjIKdq99Ji39OHIMg) | [SGNet_ASPP](https://pan.baidu.com/s/1HeiJfHpIjSQKmFtYJhBrng) |
58 |
59 |
60 |
61 | ## Usage
62 | 1. Put the pretrained model into `pretrained_weights` folder and unzip the dataset into `dataset` folder.
63 |
64 | 2. To compile the InPlace-ABN and S-Conv operation, please run:
65 | ```bash
66 | ## compile InPlace-ABN
67 | cd graphs/ops/libs
68 | sh build.sh
69 | python build.py
70 | ## compile S-Conv
71 | cd ..
72 | sh make.sh
73 | ```
74 |
75 | 3. Modify the config in `configs/sgnet_nyud_test.json` (mainly check "trained_model_path").
76 | To test the model with imput size $480 \times 640$, please run:
77 |
78 | ```bash
79 | ## SGNet
80 | python main.py ./configs/sgnet_nyud_test.json
81 |
82 | ## SGNet_ASPP
83 | python main.py ./configs/sgnet_aspp_nyud_test.json
84 |
85 | ## SGNet_Res50
86 | python main.py ./configs/sgnet_res50_nyud_test.json
87 | ```
88 | 4. You can run the follow command to
89 | test the model inference speed, input the image size such as 480 x 640:
90 |
91 | ```bash
92 | ## SGNet
93 | python main.py ./configs/sgnet_nyud_fps.json
94 |
95 | ## SGNet_ASPP
96 | python main.py ./configs/sgnet_aspp_nyud_fps.json
97 |
98 | ## SGNet_Res50
99 | python main.py ./configs/sgnet_res50_nyud_fps.json
100 | ```
101 |
102 |
103 | ## Citation
104 |
105 | If you find this work is useful for your research, please cite our paper:
106 | ```
107 | @article{21TIP-SGNet,
108 | author={Lin-Zhuo Chen and Zheng Lin and Ziqin Wang and Yong-Liang Yang and Ming-Ming Cheng},
109 | journal={IEEE Transactions on Image Processing},
110 | title={Spatial Information Guided Convolution for Real-Time RGBD Semantic Segmentation},
111 | year={2021},
112 | volume={30},
113 | pages={2313-2324},
114 | doi={10.1109/TIP.2021.3049332}
115 | }
116 | ```
117 |
118 | ### Thanks to the Third Party Libs
119 | [inplace_abn](https://github.com/mapillary/inplace_abn)
120 |
121 | [Pytorch-Deeplab](https://github.com/speedinghzl/Pytorch-Deeplab)
122 |
123 | [PyTorch-Encoding](https://github.com/zhanghang1989/PyTorch-Encoding)
124 |
125 | [PyTorch-segmentation-toolbox](https://github.com/speedinghzl/pytorch-segmentation-toolbox)
126 |
127 | [Depth2HHA-python](https://github.com/charlesCXK/Depth2HHA-python)
128 |
129 | [Pytorch-Project-Template](https://github.com/moemen95/Pytorch-Project-Template)
130 |
131 | [Deformable-Convolution-V2-PyTorch](https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch)
132 |
133 |
134 | If you have any questions, feel free to contact me via `linzhuochen🥳foxmail😲com`
135 |
--------------------------------------------------------------------------------
/agents/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | path = os.path.dirname(os.path.abspath(__file__))
5 |
6 | for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']:
7 | mod = __import__('.'.join([__name__, py]), fromlist=[py])
8 | classes = [getattr(mod, x) for x in dir(mod) if isinstance(getattr(mod, x), type)]
9 | for cls in classes:
10 | setattr(sys.modules[__name__], cls.__name__, cls)
--------------------------------------------------------------------------------
/agents/base.py:
--------------------------------------------------------------------------------
1 | """
2 | The Base Agent class, where all other agents inherit from, that contains definitions for all the necessary functions
3 | """
4 | import logging
5 |
6 |
7 | class BaseAgent:
8 | """
9 | This base class will contain the base functions to be overloaded by any agent you will implement.
10 | """
11 |
12 | def __init__(self, config):
13 | self.config = config
14 | self.logger = logging.getLogger("Agent")
15 |
16 | def load_checkpoint(self, file_name):
17 | """
18 | Latest checkpoint loader
19 | :param file_name: name of the checkpoint file
20 | :return:
21 | """
22 | raise NotImplementedError
23 |
24 | def save_checkpoint(self, file_name="checkpoint.pth.tar", is_best=0):
25 | """
26 | Checkpoint saver
27 | :param file_name: name of the checkpoint file
28 | :param is_best: boolean flag to indicate whether current checkpoint's metric is the best so far
29 | :return:
30 | """
31 | raise NotImplementedError
32 |
33 | def run(self):
34 | """
35 | The main operator
36 | :return:
37 | """
38 | raise NotImplementedError
39 |
40 | def train(self):
41 | """
42 | Main training loop
43 | :return:
44 | """
45 | raise NotImplementedError
46 |
47 | def train_one_epoch(self):
48 | """
49 | One epoch of training
50 | :return:
51 | """
52 | raise NotImplementedError
53 |
54 | def validate(self):
55 | """
56 | One cycle of model validation
57 | :return:
58 | """
59 | raise NotImplementedError
60 |
61 | def finalize(self):
62 | """
63 | Finalizes all the operations of the 2 Main classes of the process, the operator and the data loader
64 | :return:
65 | """
66 | raise NotImplementedError
--------------------------------------------------------------------------------
/agents/sgnet_agent.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | import os.path as osp
4 | from tqdm import tqdm
5 | import shutil
6 | import random
7 | import torch
8 | from torch.backends import cudnn
9 | from torch.utils import data
10 | import torch.optim as optim
11 | import timeit
12 | from torch.nn import functional as F
13 | import time
14 | from PIL import Image
15 |
16 | from data.nyudv2 import NYUDataset_val_full
17 | from utils.metrics import IOUMetric
18 | from utils.utils import get_currect_time
19 | from utils.encoding import DataParallelModel, DataParallelCriterion
20 | from utils.log import Visualizer, Log
21 | from utils.optim import adjust_learning_rate
22 | from utils.misc import print_cuda_statistics
23 | from agents.base import BaseAgent
24 | from utils.utils import predict_multiscale, get_palette
25 |
26 | class SGNetAgent(BaseAgent):
27 | """
28 | This class will be responsible for handling the whole process of our architecture.
29 | """
30 | def __init__(self, config):
31 | super().__init__(config)
32 | ## Select network
33 | if config.spatial_information == 'depth' and config.os == 16 and config.network == "SGNet" and config.mode != "measure_speed":
34 | from graphs.models.SGNet.SGNet import SGNet
35 | elif config.spatial_information == 'depth' and config.os == 16 and config.network == "SGNet":
36 | from graphs.models.SGNet.SGNet_fps import SGNet
37 | elif config.spatial_information == 'depth' and config.os == 16 and config.network == "SGNet_ASPP" and config.mode != "measure_speed":
38 | from graphs.models.SGNet.SGNet_ASPP import SGNet
39 | elif config.spatial_information == 'depth' and config.os == 16 and config.network == "SGNet_ASPP":
40 | from graphs.models.SGNet.SGNet_ASPP_fps import SGNet
41 | elif config.spatial_information == 'depth' and config.os == 16 and config.network == "SGNet_Res50" and config.mode != "measure_speed":
42 | from graphs.models.SGNet.SGNet_Res50 import SGNet
43 | elif config.spatial_information == 'depth' and config.os == 16 and config.network == "SGNet_Res50":
44 | from graphs.models.SGNet.SGNet_Res50_fps import SGNet
45 |
46 | random.seed(self.config.seed)
47 | os.environ['PYTHONHASHSEED'] = str(self.config.seed)
48 | np.random.seed(self.config.seed)
49 | torch.manual_seed(self.config.seed)
50 | torch.cuda.manual_seed(self.config.seed)
51 | torch.cuda.manual_seed_all(self.config.seed)
52 | cudnn.enabled = True
53 | cudnn.benchmark = True
54 | cudnn.deterministic = False
55 | os.environ["CUDA_VISIBLE_DEVICES"] = config.gpu
56 | # create data loader
57 | if config.dataset == "NYUD":
58 | self.testloader = data.DataLoader(NYUDataset_val_full(self.config.val_list_path),
59 | batch_size=1, shuffle=False, pin_memory=True)
60 | # Create an instance from the Model
61 | self.logger.info("Loading encoder pretrained in imagenet...")
62 | self.model = SGNet(self.config.num_classes)
63 | print(self.model)
64 |
65 | self.model.cuda()
66 | self.model.train()
67 | self.model.float()
68 | print(config.gpu)
69 | if config.mode == 'test':
70 | self.test_model = self.model
71 | if config.mode != 'measure_speed':
72 | self.model = DataParallelModel(self.model, device_ids=[0])
73 | print('parallel....................')
74 |
75 |
76 | total = sum([param.nelement() for param in self.model.parameters()])
77 | print(' + Number of params: %.2fM' % (total / 1e6))
78 | print_cuda_statistics()
79 |
80 | def load_checkpoint(self, filename):
81 | try:
82 | self.logger.info("Loading checkpoint '{}'".format(filename))
83 | checkpoint = torch.load(filename)
84 |
85 | self.current_epoch = checkpoint['epoch']
86 | self.current_iteration = checkpoint['iteration']
87 | self.model.load_state_dict(checkpoint['state_dict'])
88 |
89 | # self.optimizer.load_state_dict(checkpoint['optimizer'])
90 | except OSError as e:
91 | self.logger.info("No checkpoint exists from '{}'. Skipping...".format(self.config.checkpoint_dir))
92 | self.logger.info("**First time to train**")
93 |
94 | def run(self):
95 | """
96 | This function will the operator
97 | :return:
98 | """
99 | assert self.config.mode in ['train', 'test', 'measure_speed', 'train_iters']
100 | try:
101 | if self.config.mode == 'test':
102 | self.test()
103 | elif self.config.mode == 'measure_speed':
104 | with torch.no_grad():
105 | self.measure_speed(input_size=[1, 3, 480, 640])
106 | except KeyboardInterrupt:
107 | self.logger.info("You have entered CTRL+C.. Wait to finalize")
108 |
109 | def test(self):
110 |
111 | tqdm_batch = tqdm(self.testloader, total=len(self.testloader),
112 | desc="Testing...")
113 | self.test_model.eval()
114 | metrics = IOUMetric(self.config.num_classes)
115 | loss_val = 0
116 | metrics = IOUMetric(self.config.num_classes)
117 | palette = get_palette(256)
118 | # if (not os.path.exists(self.config.output_img_dir)):
119 | # os.mkdir(self.config.output_img_dir)
120 | # if (not os.path.exists(self.config.output_gt_dir)):
121 | # os.mkdir(self.config.output_gt_dir)
122 | if (not os.path.exists(self.config.output_predict_dir)):
123 | os.mkdir(self.config.output_predict_dir)
124 | self.load_checkpoint(self.config.trained_model_path)
125 | index = 0
126 | for batch_val in tqdm_batch:
127 | image = batch_val['image'].cuda()
128 | label = batch_val['seg'].cuda()
129 | label = torch.squeeze(label, 1).long()
130 | HHA = batch_val['HHA'].cuda()
131 | depth = batch_val['depth'].cuda()
132 | size = np.array([label.size(1), label.size(2)])
133 | input_size = (label.size(1), label.size(2))
134 |
135 | with torch.no_grad():
136 | if self.config.ms:
137 | output = predict_multiscale(self.test_model, image, depth, input_size, [0.8, 1.0, 2.0],
138 | self.config.num_classes, False)
139 | else:
140 | output = predict_multiscale(self.test_model, image, depth, input_size, [1.0],
141 | self.config.num_classes, False)
142 | seg_pred = np.asarray(np.argmax(output, axis=2), dtype=np.int)
143 | output_im = Image.fromarray(np.asarray(np.argmax(output, axis=2), dtype=np.uint8))
144 | output_im.putpalette(palette)
145 | output_im.save(self.config.output_predict_dir + '/' + str(index) + '.png')
146 | seg_gt = np.asarray(label[0].cpu().numpy(), dtype=np.int)
147 |
148 | ignore_index = seg_gt != 255
149 | seg_gt = seg_gt[ignore_index]
150 | seg_pred = seg_pred[ignore_index]
151 |
152 | metrics.add_batch(seg_pred, seg_gt, ignore_index=255)
153 |
154 | index = index + 1
155 | acc, acc_cls, iu, mean_iu, fwavacc = metrics.evaluate()
156 | print({'meanIU': mean_iu, 'IU_array': iu, 'acc': acc, 'acc_cls': acc_cls})
157 | pass
158 |
159 | def finalize(self):
160 | """
161 | Finalize all the operations of the 2 Main classes of the process the operator and the data loader
162 | :return:
163 | """
164 | # TODO
165 | pass
166 | def measure_speed(self, input_size, iteration=500):
167 | """
168 | Measure the speed of model
169 | :return: speed_time
170 | fps
171 | """
172 | self.model.eval()
173 | input = torch.randn(*input_size).cuda()
174 | depth = torch.randn(*input_size).cuda()
175 | HHA = torch.randn(*input_size).cuda()
176 |
177 | for _ in range(100):
178 | x = self.model(input, depth)
179 | print('=========Speed Testing=========')
180 | #torch.cuda.synchronize()
181 | torch.cuda.synchronize()
182 |
183 | for _ in range(iteration):
184 | torch.cuda.synchronize()
185 | t_start = time.time()
186 | x = self.model(input, depth)
187 | torch.cuda.synchronize()
188 | elapsed_time = time.time() - t_start
189 | speed_time = elapsed_time / 1 * 1000
190 | fps = 1 / elapsed_time
191 | #print(1)
192 | #print('Elapsed Time: [%.2f s / %d iter]' % (elapsed_time, iteration))
193 | print('Speed Time: %.2f ms / iter FPS: %.2f' % (speed_time, fps))
194 | time.sleep(0.005)
195 | return speed_time, fps
196 |
197 |
--------------------------------------------------------------------------------
/configs/sgnet_aspp_nyud_fps.json:
--------------------------------------------------------------------------------
1 | {
2 | "exp_name": "sgnet_depth_nyud_test",
3 | "agent": "SGNetAgent",
4 | "dataset": "NYUD",
5 | "network": "SGNet_ASPP",
6 | "spatial_information": "depth",
7 | "os": 16,
8 | "mode": "measure_speed",
9 | "cuda": true,
10 | "gpu": "1",
11 | "seed": 123,
12 | "num_classes": 40,
13 | "val_list_path": "./dataset/list/nyud/test_nyud.txt",
14 | "trained_model_path": "./pretrained_weights/SGNet_ASPP.pth.tar",
15 | "snapshot_dir": "./snapshots"
16 | }
--------------------------------------------------------------------------------
/configs/sgnet_aspp_nyud_test.json:
--------------------------------------------------------------------------------
1 | {
2 | "exp_name": "sgnet_depth_nyud_test",
3 | "agent": "SGNetAgent",
4 | "dataset": "NYUD",
5 | "network": "SGNet_ASPP",
6 | "spatial_information": "depth",
7 | "os": 16,
8 | "mode": "test",
9 | "ms": 0,
10 | "cuda": true,
11 | "gpu": "0",
12 | "seed": 123,
13 | "num_classes": 40,
14 | "val_list_path": "./dataset/list/nyud/test_nyud.txt",
15 | "trained_model_path": "./pretrained_weights/SGNet_ASPP.pth.tar",
16 | "snapshot_dir": "./snapshots",
17 | "output_predict_dir": "./output"
18 | }
--------------------------------------------------------------------------------
/configs/sgnet_nyud_fps.json:
--------------------------------------------------------------------------------
1 | {
2 | "exp_name": "sgnet_depth_nyud_test",
3 | "agent": "SGNetAgent",
4 | "dataset": "NYUD",
5 | "network": "SGNet",
6 | "spatial_information": "depth",
7 | "os": 16,
8 | "mode": "measure_speed",
9 | "cuda": true,
10 | "gpu": "1",
11 | "seed": 123,
12 | "num_classes": 40,
13 | "val_list_path": "./dataset/list/nyud/test_nyud.txt",
14 | "trained_model_path": "./pretrained_weights/SGNet.pth.tar",
15 | "snapshot_dir": "./snapshots"
16 | }
--------------------------------------------------------------------------------
/configs/sgnet_nyud_test.json:
--------------------------------------------------------------------------------
1 | {
2 | "exp_name": "sgnet_depth_nyud_test",
3 | "agent": "SGNetAgent",
4 | "dataset": "NYUD",
5 | "network": "SGNet",
6 | "spatial_information": "depth",
7 | "os": 16,
8 | "mode": "test",
9 | "ms": 0,
10 | "cuda": true,
11 | "gpu": "0",
12 | "seed": 123,
13 | "num_classes": 40,
14 | "val_list_path": "./dataset/list/nyud/test_nyud.txt",
15 | "trained_model_path": "./pretrained_weights/SGNet.pth.tar",
16 | "snapshot_dir": "./snapshots",
17 | "output_predict_dir": "./output"
18 | }
--------------------------------------------------------------------------------
/configs/sgnet_res50_nyud_fps.json:
--------------------------------------------------------------------------------
1 | {
2 | "exp_name": "sgnet_depth_nyud_test",
3 | "agent": "SGNetAgent",
4 | "dataset": "NYUD",
5 | "network": "SGNet_Res50",
6 | "spatial_information": "depth",
7 | "os": 16,
8 | "mode": "measure_speed",
9 | "cuda": true,
10 | "gpu": "1",
11 | "seed": 123,
12 | "num_classes": 40,
13 | "val_list_path": "./dataset/list/nyud/test_nyud.txt",
14 | "trained_model_path": "./pretrained_weights/SGNet.pth.tar",
15 | "snapshot_dir": "./snapshots"
16 | }
--------------------------------------------------------------------------------
/configs/sgnet_res50_nyud_test.json:
--------------------------------------------------------------------------------
1 | {
2 | "exp_name": "sgnet_depth_nyud_test",
3 | "agent": "SGNetAgent",
4 | "dataset": "NYUD",
5 | "network": "SGNet_Res50",
6 | "spatial_information": "depth",
7 | "os": 16,
8 | "mode": "test",
9 | "ms": 0,
10 | "cuda": true,
11 | "gpu": "1",
12 | "seed": 123,
13 | "num_classes": 40,
14 | "val_list_path": "./dataset/list/nyud/test_nyud.txt",
15 | "trained_model_path": "./pretrained_weights/SGNet(Res50).pth.tar",
16 | "snapshot_dir": "./snapshots",
17 | "output_predict_dir": "./output"
18 | }
--------------------------------------------------------------------------------
/data/nyudv2.py:
--------------------------------------------------------------------------------
1 | from torch.utils import data
2 | from torchvision import transforms
3 |
4 | from data.transform.rgbd_transform import *
5 |
6 | def make_dataset_fromlst(listfilename):
7 | """
8 | NYUlist format: image_path label_path depth_path HHA_path
9 | Args:
10 | listfilename: file path of list
11 | """
12 | images = []
13 | segs = []
14 | depths = []
15 | HHAs = []
16 |
17 | with open(listfilename) as f:
18 | content = f.readlines()
19 | for x in content:
20 | imgname, segname, depthname, HHAname = x.strip().split(' ')
21 | images += [imgname]
22 | segs += [segname]
23 | depths += [depthname]
24 | HHAs += [HHAname]
25 |
26 | return {'images':images, 'segs':segs, 'HHAs':HHAs, 'depths':depths}
27 |
28 |
29 | class NYUDataset_val_full(data.Dataset):
30 | """
31 | NYUDataset for evaluation with full size
32 | Init Args:
33 | list_path: file path of NYUlist
34 | """
35 | def __init__(self, list_path):
36 | self.list_path = list_path
37 | self.paths_dict = make_dataset_fromlst(self.list_path)
38 | self.len = len(self.paths_dict['images'])
39 |
40 | def __getitem__(self, index):
41 | # self.paths['images'][index]
42 | img = Image.open(self.paths_dict['images'][index]) # .astype(np.uint8)
43 | depth = Image.open(self.paths_dict['depths'][index])
44 | HHA = Image.open(self.paths_dict['HHAs'][index])
45 | seg = Image.open(self.paths_dict['segs'][index])
46 |
47 | sample = {'image':img,
48 | 'depth':depth,
49 | 'seg': seg,
50 | 'HHA': HHA}
51 |
52 | sample = self.transform_val(sample)
53 | sample = self.totensor(sample)
54 |
55 | return sample
56 |
57 | def __len__(self):
58 | return self.len
59 |
60 | def name(self):
61 | return 'NYUDataset_val_full'
62 |
63 | def transform_val(self, sample):
64 | composed_transforms = transforms.Compose([
65 | Normalize_PIL2numpy_depth2xyz()])
66 | return composed_transforms(sample)
67 |
68 | def totensor(self, sample):
69 | composed_transforms = transforms.Compose([
70 | ToTensor()])
71 | return composed_transforms(sample)
--------------------------------------------------------------------------------
/data/transform/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | path = os.path.dirname(os.path.abspath(__file__))
5 |
6 | for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']:
7 | mod = __import__('.'.join([__name__, py]), fromlist=[py])
8 | classes = [getattr(mod, x) for x in dir(mod) if isinstance(getattr(mod, x), type)]
9 | for cls in classes:
10 | setattr(sys.modules[__name__], cls.__name__, cls)
--------------------------------------------------------------------------------
/data/transform/rgbd_transform.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | import torch
3 | import random
4 | import numpy as np
5 | from PIL import Image, ImageOps, ImageFilter
6 |
7 | class Normalize_PIL2numpy_depth2xyz(object):
8 | """
9 | Normalize a tensor image with mean and standard deviation,then
10 | convert depth to xyz in train process.
11 | """
12 |
13 | def __init__(self):
14 | pass
15 |
16 | def __call__(self, sample):
17 | img = sample['image']
18 | mask = sample['seg']
19 | HHA = sample['HHA']
20 | depth = sample['depth']
21 |
22 | if 'scale_x' in sample.keys():
23 | scale_x = sample['scale_x']
24 | scale_y = sample['scale_y']
25 | center_x = sample['center_x']
26 | center_y = sample['center_y']
27 | else:
28 | scale_x = 1.
29 | scale_y = 1.
30 | center_x = 0.
31 | center_y = 0.
32 |
33 | ## convert PIL to numpy
34 | img = np.array(img).astype(np.float32)
35 | mask = np.array(mask).astype(np.float32)
36 | depth = np.array(depth).astype(np.float32)
37 | depth = depth[np.newaxis, ...]
38 | HHA = np.array(HHA).astype(np.float32)
39 |
40 | ## convert depth to xyz
41 | _, h, w = depth.shape
42 | z = depth
43 | xx, yy = np.meshgrid(np.array(range(w)) + 1, np.array(range(h)) + 1)
44 | fx_rgb = 5.18857e+02 * scale_x
45 | fy_rgb = 5.19469e+02 * scale_y
46 | cx_rgb = w / 2.0
47 | cy_rgb = h / 2.0
48 | C = np.array([[fx_rgb, 0, cx_rgb], [0, fy_rgb, cy_rgb], [0, 0, 1]])
49 | cc_rgb = C[0:2, 2]
50 | fc_rgb = np.diag(C[0:2, 0:2])
51 | x = (np.multiply((xx - cc_rgb[0]), z) / fc_rgb[0])
52 | y = (np.multiply((yy - cc_rgb[1]), z) / fc_rgb[1])
53 | depth = np.concatenate([x, y, z], axis=0)
54 |
55 | ## zero center, change to BGR
56 | img = (img - np.asarray([122.675, 116.669, 104.008]))[:, :, ::-1]
57 | HHA = (HHA - np.asarray([122.675, 116.669, 104.008]))[:, :, ::-1]
58 | depth /= 1000.0
59 |
60 | return {'image': img,
61 | 'depth': depth,
62 | 'seg': mask,
63 | 'HHA': HHA}
64 |
65 | class ToTensor(object):
66 | """
67 | Swap axis of image and convert ndarrays in sample to Tensors.
68 | """
69 | # swap color axis
70 | # numpy image: H x W x C
71 | # torch image: C X H X W
72 | def __call__(self, sample):
73 | img = sample['image']
74 | mask = sample['seg']
75 | HHA = sample['HHA']
76 | depth = sample['depth']
77 |
78 | # Swap axis
79 | img = np.array(img).astype(np.float32).transpose((2, 0, 1))
80 | ## convert 0-40 to 0-39 and 255
81 | mask = (np.array(mask).astype(np.uint8) - 1).astype(np.float32)
82 | HHA = np.array(HHA).astype(np.float32).transpose((2, 0, 1))
83 | depth = np.array(depth).astype(np.float32)
84 |
85 | # Convert numpy to tensor
86 | img = torch.from_numpy(img).float()
87 | mask = torch.from_numpy(mask).float()
88 | HHA = torch.from_numpy(HHA).float()
89 | depth = torch.from_numpy(depth).float()
90 |
91 |
92 | return {'image': img,
93 | 'depth': depth,
94 | 'seg': mask,
95 | 'HHA': HHA}
96 |
97 | class ToTensor_SUN(object):
98 | """
99 | Swap axis of image and convert ndarrays in sample to Tensors.
100 | """
101 | # swap color axis
102 | # numpy image: H x W x C
103 | # torch image: C X H X W
104 | def __call__(self, sample):
105 | img = sample['image']
106 | mask = sample['seg']
107 | HHA = sample['HHA']
108 | depth = sample['depth']
109 |
110 | img = np.array(img).astype(np.float32).transpose((2, 0, 1))
111 | # convert 0-40 to 0-39 and 255
112 | mask = (np.array(mask).astype(np.uint8)).astype(np.float32)
113 | HHA = np.array(HHA).astype(np.float32).transpose((2, 0, 1))
114 | depth = np.array(depth).astype(np.float32)
115 |
116 | # convert numpy to tensor
117 | img = torch.from_numpy(img).float()
118 | mask = torch.from_numpy(mask).float()
119 | HHA = torch.from_numpy(HHA).float()
120 | depth = torch.from_numpy(depth).float()
121 |
122 |
123 | return {'image': img,
124 | 'depth': depth,
125 | 'seg': mask,
126 | 'HHA': HHA}
127 |
128 | class RandomHorizontalFlip(object):
129 | """
130 | Random horizontal flip augment
131 | """
132 | def __call__(self, sample):
133 | img = sample['image']
134 | mask = sample['seg']
135 | HHA = sample['HHA']
136 | depth = sample['depth']
137 |
138 | if random.random() < 0.5:
139 | img = img.transpose(Image.FLIP_LEFT_RIGHT)
140 | mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
141 | depth = depth.transpose(Image.FLIP_LEFT_RIGHT)
142 | HHA = HHA.transpose(Image.FLIP_LEFT_RIGHT)
143 |
144 | return {'image': img,
145 | 'depth': depth,
146 | 'seg': mask,
147 | 'HHA': HHA}
148 |
149 | class RandomGaussianBlur(object):
150 | """
151 | Random gaussian blur
152 | """
153 | def __call__(self, sample):
154 | img = sample['image']
155 | mask = sample['seg']
156 | HHA = sample['HHA']
157 | depth = sample['depth']
158 | if random.random() < 0.5:
159 | img = img.filter(ImageFilter.GaussianBlur(
160 | radius=random.random()))
161 |
162 | return {'image': img,
163 | 'depth': depth,
164 | 'seg': mask,
165 | 'HHA': HHA}
166 |
167 | class RandomScaleCrop(object):
168 | """
169 | Random scale crop data augmentation
170 | """
171 | def __init__(self, base_size, crop_size, fill=0):
172 | self.base_size = base_size
173 | self.crop_size_h = crop_size[0]
174 | self.crop_size_w = crop_size[1]
175 | self.fill = fill
176 |
177 | def __call__(self, sample):
178 | img = sample['image']
179 | mask = sample['seg']
180 | HHA = sample['HHA']
181 | depth = sample['depth']
182 |
183 | short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.25))
184 | w, h = img.size
185 | if h > w:
186 | ow = short_size
187 | oh = int(1.0 * h * ow / w)
188 | else:
189 | oh = short_size
190 | ow = int(1.0 * w * oh / h)
191 |
192 | scale = ow / w
193 | img = img.resize((ow, oh), Image.BILINEAR)
194 | mask = mask.resize((ow, oh), Image.NEAREST)
195 | HHA = HHA.resize((ow, oh), Image.BILINEAR)
196 | depth = depth.resize((ow, oh), Image.BILINEAR)
197 | # pad crop
198 | if short_size < self.crop_size_h or ow < self.crop_size_w:
199 | padh = self.crop_size_h - oh if oh < self.crop_size_h else 0
200 | padw = self.crop_size_w - ow if ow < self.crop_size_w else 0
201 | img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
202 | HHA = ImageOps.expand(HHA, border=(0, 0, padw, padh), fill=0)
203 | mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
204 | depth = ImageOps.expand(depth, border=(0, 0, padw, padh), fill=0)
205 | # random crop crop_size
206 | w, h = img.size
207 | x1 = random.randint(0, w - self.crop_size_w)
208 | y1 = random.randint(0, h - self.crop_size_h)
209 | img = img.crop((x1, y1, x1 + self.crop_size_w, y1 + self.crop_size_h))
210 | mask = mask.crop((x1, y1, x1 + self.crop_size_w, y1 + self.crop_size_h))
211 | HHA = HHA.crop((x1, y1, x1 + self.crop_size_w, y1 + self.crop_size_h))
212 | depth = depth.crop((x1, y1, x1 + self.crop_size_w, y1 + self.crop_size_h))
213 | center_x = x1
214 | center_y = y1
215 |
216 | return {
217 | 'image': img,
218 | 'depth': depth,
219 | 'seg': mask,
220 | 'HHA': HHA,
221 | 'scale_x': scale,
222 | 'scale_y': scale,
223 | 'center_x': center_x,
224 | 'center_y': center_y
225 | }
226 |
227 | class FixScaleCrop(object):
228 | """
229 | Fix scale crop data augmentation
230 | """
231 |
232 | def __init__(self, crop_size):
233 | self.crop_size = crop_size
234 |
235 | def __call__(self, sample):
236 | img = sample['image']
237 | mask = sample['seg']
238 | HHA = sample['HHA']
239 | depth = sample['depth']
240 |
241 | w, h = img.size
242 | if w > h:
243 | oh = self.crop_size
244 | ow = int(1.0 * w * oh / h)
245 | else:
246 | ow = self.crop_size
247 | oh = int(1.0 * h * ow / w)
248 | img = img.resize((ow, oh), Image.BILINEAR)
249 | mask = mask.resize((ow, oh), Image.NEAREST)
250 | HHA = HHA.resize((ow, oh), Image.BILINEAR)
251 | depth = depth.resize((ow, oh), Image.BILINEAR)
252 | # center crop
253 | w, h = img.size
254 | x1 = int(round((w - self.crop_size) / 2.))
255 | y1 = int(round((h - self.crop_size) / 2.))
256 |
257 | img = img.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size))
258 | mask = mask.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size))
259 | HHA = HHA.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size))
260 | depth = depth.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size))
261 |
262 | return {'image': img,
263 | 'depth': depth,
264 | 'seg': mask,
265 | 'HHA': HHA}
266 |
267 | class FixedResize(object):
268 | """
269 | Resize data augmentation
270 | """
271 |
272 | def __init__(self, size):
273 | self.size_h = size[0]
274 | self.size_w = size[1]
275 | self.size = (self.size_w, self.size_h)
276 |
277 | def __call__(self, sample):
278 | img = sample['image']
279 | mask = sample['seg']
280 | HHA = sample['HHA']
281 | depth = sample['depth']
282 |
283 | assert img.size == mask.size
284 |
285 | img = img.resize(self.size, Image.BILINEAR)
286 | mask = mask.resize(self.size, Image.NEAREST)
287 | HHA = HHA.resize(self.size, Image.BILINEAR)
288 | depth = depth.resize(self.size, Image.BILINEAR)
289 |
290 | return {'image': img,
291 | 'depth': depth,
292 | 'seg': mask,
293 | 'HHA': HHA}
294 |
295 |
296 | class FixedResize_image(object):
297 | """Resize data augmentation (only for image and depth map)
298 | Init Args:
299 | size: new size of image
300 | """
301 |
302 | def __init__(self, size):
303 | self.size_h = size[0]
304 | self.size_w = size[1]
305 | self.size = (self.size_w, self.size_h)
306 |
307 | def __call__(self, sample):
308 | img = sample['image']
309 | mask = sample['seg']
310 | HHA = sample['HHA']
311 | depth = sample['depth']
312 |
313 | img = img.resize(self.size, Image.BILINEAR)
314 | HHA = HHA.resize(self.size, Image.BILINEAR)
315 | depth = depth.resize(self.size, Image.BILINEAR)
316 |
317 | return {'image': img,
318 | 'depth': depth,
319 | 'seg': mask,
320 | 'HHA': HHA}
321 |
322 |
323 | class CenterCrop(object):
324 | """center crop augmentation
325 | Init Args:
326 | size: crop size
327 | """
328 |
329 | def __init__(self, size):
330 | self.size = size
331 |
332 | def __call__(self, sample):
333 | img = sample['image']
334 | mask = sample['seg']
335 | HHA = sample['HHA']
336 | depth = sample['depth']
337 |
338 | w, h = img.size
339 | th, tw = self.size
340 |
341 | x = int(round((w - tw) / 2.))
342 | y = int(round((h - th) / 2.))
343 |
344 | img = img.crop((x, y, x + tw, y + th))
345 | mask = mask.crop((x, y, x + tw, y + th))
346 | HHA = HHA.crop((x, y, x + tw, y + th))
347 | depth = depth.crop((x, y, x + tw, y + th))
348 |
349 | return {'image': img,
350 | 'depth': depth,
351 | 'seg': mask,
352 | 'HHA': HHA}
353 |
354 | class CenterCrop_image(object):
355 | """center crop augmentation
356 | Init Args:
357 | size: crop size
358 | """
359 |
360 | def __init__(self, size):
361 | self.size = size
362 |
363 | def __call__(self, sample):
364 | img = sample['image']
365 | mask = sample['seg']
366 | HHA = sample['HHA']
367 | depth = sample['depth']
368 |
369 | w, h = img.size
370 | th, tw = self.size
371 |
372 | x = int(round((w - tw) / 2.))
373 | y = int(round((h - th) / 2.))
374 |
375 | img = img.crop((x, y, x + tw, y + th))
376 | HHA = HHA.crop((x, y, x + tw, y + th))
377 | depth = depth.crop((x, y, x + tw, y + th))
378 |
379 | return {'image': img,
380 | 'depth': depth,
381 | 'seg': mask,
382 | 'HHA': HHA}
--------------------------------------------------------------------------------
/graphs/models/SGNet/SGNet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import functools
3 | import torch
4 |
5 | from graphs.ops.modules.s_conv import SConv
6 | from graphs.ops.libs import InPlaceABNSync
7 |
8 | affine_par = True
9 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
10 |
11 | def conv3x3(in_planes, out_planes, stride=1):
12 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
13 | padding=1, bias=False)
14 |
15 | class Bottleneck(nn.Module):
16 | expansion = 4
17 |
18 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1,
19 | deformable=False):
20 | super(Bottleneck, self).__init__()
21 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
22 | self.bn1 = BatchNorm2d(planes)
23 | if deformable == False:
24 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
25 | padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False)
26 | else:
27 | self.conv2 = SConv(planes, planes, kernel_size=3, stride=stride,
28 | padding=1, deformable_groups=1, no_bias=True)
29 | self.bn2 = BatchNorm2d(planes)
30 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
31 | self.bn3 = BatchNorm2d(planes * 4)
32 | self.relu = nn.ReLU(inplace=False)
33 | self.relu_inplace = nn.ReLU(inplace=True)
34 | self.downsample = downsample
35 | self.dilation = dilation
36 | self.stride = stride
37 | self.deformable = deformable
38 |
39 | def forward(self, input):
40 | x, S = input
41 | residual = x
42 |
43 | out = self.conv1(x)
44 | out = self.bn1(out)
45 | out = self.relu(out)
46 | if self.deformable == False:
47 | out = self.conv2(out)
48 | else:
49 | out = self.conv2(out, S)
50 | out = self.bn2(out)
51 | out = self.relu(out)
52 |
53 | out = self.conv3(out)
54 | out = self.bn3(out)
55 |
56 | if self.downsample is not None:
57 | residual = self.downsample(x)
58 |
59 | out = out + residual
60 | out = self.relu_inplace(out)
61 |
62 | return [out, S]
63 |
64 | class ResNet(nn.Module):
65 | def __init__(self, block, layers, num_classes, deformable=True):
66 | self.inplanes = 128
67 | super(ResNet, self).__init__()
68 | self.conv1 = conv3x3(3, 64, stride=2)
69 | self.bn1 = BatchNorm2d(64)
70 | self.relu1 = nn.ReLU(inplace=False)
71 | self.conv2 = conv3x3(64, 64)
72 | self.bn2 = BatchNorm2d(64)
73 | self.relu2 = nn.ReLU(inplace=False)
74 | self.conv3 = conv3x3(64, 128)
75 | self.bn3 = BatchNorm2d(128)
76 | self.relu3 = nn.ReLU(inplace=False)
77 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
78 | self.relu = nn.ReLU(inplace=False)
79 |
80 | self.layer1 = self._make_layer(block, 64, layers[0], deformable=deformable, seg=True)
81 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, deformable=deformable, seg=True)
82 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=2, deformable=deformable, seg=True)
83 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1, 1, 1),
84 | deformable=deformable, seg=True)
85 |
86 | self.dsn3 = nn.Sequential(
87 | nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1),
88 | InPlaceABNSync(512),
89 | nn.Dropout2d(0.1),
90 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True),
91 | )
92 |
93 | self.dsn4 = nn.Sequential(
94 | nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1),
95 | InPlaceABNSync(512),
96 | nn.Dropout2d(0.1),
97 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True),
98 | )
99 |
100 | def make_up_conv_layer(self, block, in_channels, out_channels, batch_size):
101 | return block(in_channels, out_channels, batch_size)
102 |
103 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1, deformable=False, seg=False):
104 | downsample = None
105 | if stride != 1 or self.inplanes != planes * block.expansion:
106 | downsample = nn.Sequential(
107 | nn.Conv2d(self.inplanes, planes * block.expansion,
108 | kernel_size=1, stride=stride, bias=False),
109 | BatchNorm2d(planes * block.expansion, affine=affine_par))
110 |
111 | layers = []
112 | generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1
113 | layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample,
114 | multi_grid=generate_multi_grid(0, multi_grid), deformable=deformable))
115 | self.inplanes = planes * block.expansion
116 | for i in range(1, blocks):
117 | if seg == False:
118 | layers.append(block(self.inplanes, planes, dilation=dilation,
119 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable))
120 | else:
121 | if i >= blocks-2:
122 | layers.append(block(self.inplanes, planes, dilation=dilation,
123 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable))
124 | else:
125 | layers.append(block(self.inplanes, planes, dilation=dilation,
126 | multi_grid=generate_multi_grid(i, multi_grid), deformable=False))
127 |
128 | return nn.Sequential(*layers)
129 | def forward(self, x, depth):
130 | S = depth
131 | x = self.relu1(self.bn1(self.conv1(x)))
132 | x = self.relu2(self.bn2(self.conv2(x)))
133 | x = self.relu3(self.bn3(self.conv3(x)))
134 |
135 | x = self.maxpool(x)
136 |
137 | x = [x, S]
138 |
139 | x = self.layer3(self.layer2(self.layer1(x)))
140 | x3 = self.dsn3(x[0])
141 |
142 | x = self.layer4(x)
143 | x4 = self.dsn4(x[0])
144 |
145 | return [x4, x3]
146 |
147 | def load_pretrain(self, pretrain_model_path):
148 | """Load pretrained Network"""
149 | saved_state_dict = torch.load(pretrain_model_path)
150 | new_params = self.state_dict().copy()
151 | for i in saved_state_dict:
152 | i_parts = i.split('.')
153 | if not i_parts[0] == 'fc':
154 | new_params['.'.join(i_parts[0:])] = saved_state_dict[i]
155 |
156 | self.load_state_dict(new_params)
157 |
158 | def SGNet(num_classes=21):
159 | model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes)
160 | return model
--------------------------------------------------------------------------------
/graphs/models/SGNet/SGNet_ASPP.py:
--------------------------------------------------------------------------------
1 | """
2 | SCNet implementation
3 | """
4 | import torch
5 | import torch.nn as nn
6 | import functools
7 | from torch.nn import functional as F
8 |
9 | from graphs.ops.modules.s_conv import SConv
10 | from graphs.ops.libs import InPlaceABNSync
11 |
12 | affine_par = True
13 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
14 |
15 | def conv3x3(in_planes, out_planes, stride=1):
16 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
17 | padding=1, bias=False)
18 |
19 | class Bottleneck(nn.Module):
20 | expansion = 4
21 |
22 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1,
23 | deformable=False):
24 | super(Bottleneck, self).__init__()
25 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
26 | self.bn1 = BatchNorm2d(planes)
27 | if deformable == False:
28 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
29 | padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False)
30 | else:
31 | self.conv2 = SConv(planes, planes, kernel_size=3, stride=stride,
32 | padding=1, deformable_groups=1, no_bias=True)
33 | self.bn2 = BatchNorm2d(planes)
34 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
35 | self.bn3 = BatchNorm2d(planes * 4)
36 | self.relu = nn.ReLU(inplace=False)
37 | self.relu_inplace = nn.ReLU(inplace=True)
38 | self.downsample = downsample
39 | self.dilation = dilation
40 | self.stride = stride
41 | self.deformable = deformable
42 |
43 | def forward(self, input):
44 | x, S = input
45 | residual = x
46 |
47 | out = self.conv1(x)
48 | out = self.bn1(out)
49 | out = self.relu(out)
50 | if self.deformable == False:
51 | out = self.conv2(out)
52 | else:
53 | out = self.conv2(out, S)
54 | out = self.bn2(out)
55 | out = self.relu(out)
56 |
57 | out = self.conv3(out)
58 | out = self.bn3(out)
59 |
60 | if self.downsample is not None:
61 | residual = self.downsample(x)
62 |
63 | out = out + residual
64 | out = self.relu_inplace(out)
65 |
66 | return [out, S]
67 |
68 |
69 | class ASPPModule(nn.Module):
70 | """
71 | Reference:
72 | Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
73 | """
74 |
75 | def __init__(self, features, inner_features=256, out_features=512, dilations=(12, 24, 36)):
76 | super(ASPPModule, self).__init__()
77 |
78 | self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
79 | nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
80 | bias=False),
81 | InPlaceABNSync(inner_features))
82 | self.conv2 = nn.Sequential(
83 | nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
84 | InPlaceABNSync(inner_features))
85 | self.conv3 = nn.Sequential(
86 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
87 | InPlaceABNSync(inner_features))
88 | self.conv4 = nn.Sequential(
89 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
90 | InPlaceABNSync(inner_features))
91 | self.conv5 = nn.Sequential(
92 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
93 | InPlaceABNSync(inner_features))
94 |
95 | self.bottleneck = nn.Sequential(
96 | nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
97 | InPlaceABNSync(out_features),
98 | nn.Dropout2d(0.1)
99 | )
100 |
101 | def forward(self, x):
102 | _, _, h, w = x.size()
103 |
104 | feat1 = F.upsample(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
105 |
106 | feat2 = self.conv2(x)
107 | feat3 = self.conv3(x)
108 | feat4 = self.conv4(x)
109 | feat5 = self.conv5(x)
110 | out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
111 |
112 | bottle = self.bottleneck(out)
113 | return bottle
114 |
115 | class ResNet(nn.Module):
116 | def __init__(self, block, layers, num_classes, deformable=False):
117 | self.inplanes = 128
118 | super(ResNet, self).__init__()
119 | self.conv1 = conv3x3(3, 64, stride=2)
120 | self.bn1 = BatchNorm2d(64)
121 | self.relu1 = nn.ReLU(inplace=False)
122 | self.conv2 = conv3x3(64, 64)
123 | self.bn2 = BatchNorm2d(64)
124 | self.relu2 = nn.ReLU(inplace=False)
125 | self.conv3 = conv3x3(64, 128)
126 | self.bn3 = BatchNorm2d(128)
127 | self.relu3 = nn.ReLU(inplace=False)
128 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
129 | self.relu = nn.ReLU(inplace=False)
130 |
131 | self.layer1 = self._make_layer(block, 64, layers[0], deformable=deformable, seg=True)
132 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, deformable=deformable, seg=True)
133 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=2, deformable=deformable, seg=True)
134 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1, 1, 1),
135 | deformable=deformable, seg=True)
136 |
137 | self.head = nn.Sequential(ASPPModule(2048),
138 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True))
139 |
140 | self.dsn3 = nn.Sequential(
141 | nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1),
142 | InPlaceABNSync(512),
143 | nn.Dropout2d(0.1),
144 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True),
145 | )
146 |
147 | def make_up_conv_layer(self, block, in_channels, out_channels, batch_size):
148 | return block(in_channels, out_channels, batch_size)
149 |
150 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1, deformable=False, seg=False):
151 | downsample = None
152 | if stride != 1 or self.inplanes != planes * block.expansion:
153 | downsample = nn.Sequential(
154 | nn.Conv2d(self.inplanes, planes * block.expansion,
155 | kernel_size=1, stride=stride, bias=False),
156 | BatchNorm2d(planes * block.expansion, affine=affine_par))
157 |
158 | layers = []
159 | generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1
160 | layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample,
161 | multi_grid=generate_multi_grid(0, multi_grid), deformable=deformable))
162 | self.inplanes = planes * block.expansion
163 | for i in range(1, blocks):
164 | if seg == False:
165 | layers.append(block(self.inplanes, planes, dilation=dilation,
166 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable))
167 | else:
168 | if i >= blocks-2:
169 | layers.append(block(self.inplanes, planes, dilation=dilation,
170 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable))
171 | else:
172 | layers.append(block(self.inplanes, planes, dilation=dilation,
173 | multi_grid=generate_multi_grid(i, multi_grid), deformable=False))
174 |
175 | return nn.Sequential(*layers)
176 | def forward(self, x, depth):
177 | S = depth
178 | x = self.relu1(self.bn1(self.conv1(x)))
179 | x = self.relu2(self.bn2(self.conv2(x)))
180 | x = self.relu3(self.bn3(self.conv3(x)))
181 |
182 | x = self.maxpool(x)
183 |
184 | x = [x, S]
185 |
186 | x = self.layer3(self.layer2(self.layer1(x)))
187 | x3 = self.dsn3(x[0])
188 |
189 | x = self.layer4(x)
190 | x4 = self.head(x[0])
191 |
192 | return [x4, x3]
193 |
194 | def load_pretrain(self, pretrain_model_path):
195 | """Load pretrained Network"""
196 | saved_state_dict = torch.load(pretrain_model_path)
197 | new_params = self.state_dict().copy()
198 | for i in saved_state_dict:
199 | i_parts = i.split('.')
200 | if not i_parts[0] == 'fc':
201 | new_params['.'.join(i_parts[0:])] = saved_state_dict[i]
202 | self.load_state_dict(new_params)
203 |
204 | def SGNet(num_classes=21):
205 | model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes, deformable=True)
206 | return model
--------------------------------------------------------------------------------
/graphs/models/SGNet/SGNet_ASPP_fps.py:
--------------------------------------------------------------------------------
1 | """
2 | SCNet implementation
3 | """
4 | import torch
5 | import torch.nn as nn
6 | import functools
7 | from torch.nn import functional as F
8 |
9 | from graphs.ops.modules.s_conv import SConv
10 | from graphs.ops.libs import InPlaceABNSync
11 |
12 | affine_par = True
13 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
14 |
15 | def conv3x3(in_planes, out_planes, stride=1):
16 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
17 | padding=1, bias=False)
18 |
19 | class Bottleneck(nn.Module):
20 | expansion = 4
21 |
22 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1,
23 | deformable=False):
24 | super(Bottleneck, self).__init__()
25 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
26 | self.bn1 = BatchNorm2d(planes)
27 | if deformable == False:
28 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
29 | padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False)
30 | else:
31 | self.conv2 = SConv(planes, planes, kernel_size=3, stride=stride,
32 | padding=1, deformable_groups=1, no_bias=True)
33 | self.bn2 = BatchNorm2d(planes)
34 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
35 | self.bn3 = BatchNorm2d(planes * 4)
36 | self.relu = nn.ReLU(inplace=False)
37 | self.relu_inplace = nn.ReLU(inplace=True)
38 | self.downsample = downsample
39 | self.dilation = dilation
40 | self.stride = stride
41 | self.deformable = deformable
42 |
43 | def forward(self, input):
44 | x, S = input
45 | residual = x
46 |
47 | out = self.conv1(x)
48 | out = self.bn1(out)
49 | out = self.relu(out)
50 | if self.deformable == False:
51 | out = self.conv2(out)
52 | else:
53 | out = self.conv2(out, S)
54 | out = self.bn2(out)
55 | out = self.relu(out)
56 |
57 | out = self.conv3(out)
58 | out = self.bn3(out)
59 |
60 | if self.downsample is not None:
61 | residual = self.downsample(x)
62 |
63 | out = out + residual
64 | out = self.relu_inplace(out)
65 |
66 | return [out, S]
67 |
68 |
69 | class ASPPModule(nn.Module):
70 | """
71 | Reference:
72 | Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
73 | """
74 |
75 | def __init__(self, features, inner_features=256, out_features=512, dilations=(12, 24, 36)):
76 | super(ASPPModule, self).__init__()
77 |
78 | self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
79 | nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
80 | bias=False),
81 | InPlaceABNSync(inner_features))
82 | self.conv2 = nn.Sequential(
83 | nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
84 | InPlaceABNSync(inner_features))
85 | self.conv3 = nn.Sequential(
86 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
87 | InPlaceABNSync(inner_features))
88 | self.conv4 = nn.Sequential(
89 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
90 | InPlaceABNSync(inner_features))
91 | self.conv5 = nn.Sequential(
92 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
93 | InPlaceABNSync(inner_features))
94 |
95 | self.bottleneck = nn.Sequential(
96 | nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
97 | InPlaceABNSync(out_features),
98 | nn.Dropout2d(0.1)
99 | )
100 |
101 | def forward(self, x):
102 | _, _, h, w = x.size()
103 |
104 | feat1 = F.upsample(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
105 |
106 | feat2 = self.conv2(x)
107 | feat3 = self.conv3(x)
108 | feat4 = self.conv4(x)
109 | feat5 = self.conv5(x)
110 | out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
111 |
112 | bottle = self.bottleneck(out)
113 | return bottle
114 |
115 | class ResNet(nn.Module):
116 | def __init__(self, block, layers, num_classes, deformable=False):
117 | self.inplanes = 128
118 | super(ResNet, self).__init__()
119 | self.conv1 = conv3x3(3, 64, stride=2)
120 | self.bn1 = BatchNorm2d(64)
121 | self.relu1 = nn.ReLU(inplace=False)
122 | self.conv2 = conv3x3(64, 64)
123 | self.bn2 = BatchNorm2d(64)
124 | self.relu2 = nn.ReLU(inplace=False)
125 | self.conv3 = conv3x3(64, 128)
126 | self.bn3 = BatchNorm2d(128)
127 | self.relu3 = nn.ReLU(inplace=False)
128 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
129 | self.relu = nn.ReLU(inplace=False)
130 |
131 | self.layer1 = self._make_layer(block, 64, layers[0], deformable=deformable, seg=True)
132 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, deformable=deformable, seg=True)
133 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=2, deformable=deformable, seg=True)
134 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1, 1, 1),
135 | deformable=deformable, seg=True)
136 |
137 | self.head = nn.Sequential(ASPPModule(2048),
138 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True))
139 |
140 | def make_up_conv_layer(self, block, in_channels, out_channels, batch_size):
141 | return block(in_channels, out_channels, batch_size)
142 |
143 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1, deformable=False, seg=False):
144 | downsample = None
145 | if stride != 1 or self.inplanes != planes * block.expansion:
146 | downsample = nn.Sequential(
147 | nn.Conv2d(self.inplanes, planes * block.expansion,
148 | kernel_size=1, stride=stride, bias=False),
149 | BatchNorm2d(planes * block.expansion, affine=affine_par))
150 |
151 | layers = []
152 | generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1
153 | layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample,
154 | multi_grid=generate_multi_grid(0, multi_grid), deformable=deformable))
155 | self.inplanes = planes * block.expansion
156 | for i in range(1, blocks):
157 | if seg == False:
158 | layers.append(block(self.inplanes, planes, dilation=dilation,
159 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable))
160 | else:
161 | if i >= blocks-2:
162 | layers.append(block(self.inplanes, planes, dilation=dilation,
163 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable))
164 | else:
165 | layers.append(block(self.inplanes, planes, dilation=dilation,
166 | multi_grid=generate_multi_grid(i, multi_grid), deformable=False))
167 |
168 | return nn.Sequential(*layers)
169 | def forward(self, x, depth):
170 | S = depth
171 | x = self.relu1(self.bn1(self.conv1(x)))
172 | x = self.relu2(self.bn2(self.conv2(x)))
173 | x = self.relu3(self.bn3(self.conv3(x)))
174 |
175 | x = self.maxpool(x)
176 |
177 | x = [x, S]
178 |
179 | x = self.layer3(self.layer2(self.layer1(x)))
180 |
181 | x = self.layer4(x)
182 | x4 = self.head(x[0])
183 |
184 | return x4
185 |
186 | def load_pretrain(self, pretrain_model_path):
187 | """Load pretrained Network"""
188 | saved_state_dict = torch.load(pretrain_model_path)
189 | new_params = self.state_dict().copy()
190 | for i in saved_state_dict:
191 | i_parts = i.split('.')
192 | if not i_parts[0] == 'fc':
193 | new_params['.'.join(i_parts[0:])] = saved_state_dict[i]
194 | self.load_state_dict(new_params)
195 |
196 | def SGNet(num_classes=21):
197 | model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes, deformable=True)
198 | return model
--------------------------------------------------------------------------------
/graphs/models/SGNet/SGNet_Res50.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import functools
3 | import torch
4 |
5 | from graphs.ops.modules.s_conv import SConv
6 | from graphs.ops.libs import InPlaceABNSync
7 |
8 | affine_par = True
9 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
10 |
11 | def conv3x3(in_planes, out_planes, stride=1):
12 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
13 | padding=1, bias=False)
14 |
15 | class Bottleneck(nn.Module):
16 | expansion = 4
17 |
18 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1,
19 | deformable=False):
20 | super(Bottleneck, self).__init__()
21 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
22 | self.bn1 = BatchNorm2d(planes)
23 | if deformable == False:
24 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
25 | padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False)
26 | else:
27 | self.conv2 = SConv(planes, planes, kernel_size=3, stride=stride,
28 | padding=1, deformable_groups=1, no_bias=True)
29 | self.bn2 = BatchNorm2d(planes)
30 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
31 | self.bn3 = BatchNorm2d(planes * 4)
32 | self.relu = nn.ReLU(inplace=False)
33 | self.relu_inplace = nn.ReLU(inplace=True)
34 | self.downsample = downsample
35 | self.dilation = dilation
36 | self.stride = stride
37 | self.deformable = deformable
38 |
39 | def forward(self, input):
40 | x, S = input
41 | residual = x
42 |
43 | out = self.conv1(x)
44 | out = self.bn1(out)
45 | out = self.relu(out)
46 | if self.deformable == False:
47 | out = self.conv2(out)
48 | else:
49 | out = self.conv2(out, S)
50 | out = self.bn2(out)
51 | out = self.relu(out)
52 |
53 | out = self.conv3(out)
54 | out = self.bn3(out)
55 |
56 | if self.downsample is not None:
57 | residual = self.downsample(x)
58 |
59 | out = out + residual
60 | out = self.relu_inplace(out)
61 |
62 | return [out, S]
63 |
64 | class ResNet(nn.Module):
65 | def __init__(self, block, layers, num_classes, deformable=True):
66 | self.inplanes = 128
67 | super(ResNet, self).__init__()
68 | self.conv1 = conv3x3(3, 64, stride=2)
69 | self.bn1 = BatchNorm2d(64)
70 | self.relu1 = nn.ReLU(inplace=False)
71 | self.conv2 = conv3x3(64, 64)
72 | self.bn2 = BatchNorm2d(64)
73 | self.relu2 = nn.ReLU(inplace=False)
74 | self.conv3 = conv3x3(64, 128)
75 | self.bn3 = BatchNorm2d(128)
76 | self.relu3 = nn.ReLU(inplace=False)
77 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
78 | self.relu = nn.ReLU(inplace=False)
79 |
80 | self.layer1 = self._make_layer(block, 64, layers[0], deformable=deformable, seg=True)
81 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, deformable=deformable, seg=True)
82 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=2, deformable=deformable, seg=True)
83 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1, 1, 1),
84 | deformable=deformable, seg=True)
85 |
86 | self.dsn3 = nn.Sequential(
87 | nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1),
88 | InPlaceABNSync(512),
89 | nn.Dropout2d(0.1),
90 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True),
91 | )
92 |
93 | self.dsn4 = nn.Sequential(
94 | nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1),
95 | InPlaceABNSync(512),
96 | nn.Dropout2d(0.1),
97 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True),
98 | )
99 |
100 | def make_up_conv_layer(self, block, in_channels, out_channels, batch_size):
101 | return block(in_channels, out_channels, batch_size)
102 |
103 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1, deformable=False, seg=False):
104 | downsample = None
105 | if stride != 1 or self.inplanes != planes * block.expansion:
106 | downsample = nn.Sequential(
107 | nn.Conv2d(self.inplanes, planes * block.expansion,
108 | kernel_size=1, stride=stride, bias=False),
109 | BatchNorm2d(planes * block.expansion, affine=affine_par))
110 |
111 | layers = []
112 | generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1
113 | layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample,
114 | multi_grid=generate_multi_grid(0, multi_grid), deformable=deformable))
115 | self.inplanes = planes * block.expansion
116 | for i in range(1, blocks):
117 | if seg == False:
118 | layers.append(block(self.inplanes, planes, dilation=dilation,
119 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable))
120 | else:
121 | if i >= blocks-2:
122 | layers.append(block(self.inplanes, planes, dilation=dilation,
123 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable))
124 | else:
125 | layers.append(block(self.inplanes, planes, dilation=dilation,
126 | multi_grid=generate_multi_grid(i, multi_grid), deformable=False))
127 |
128 | return nn.Sequential(*layers)
129 | def forward(self, x, depth):
130 | S = depth
131 | x = self.relu1(self.bn1(self.conv1(x)))
132 | x = self.relu2(self.bn2(self.conv2(x)))
133 | x = self.relu3(self.bn3(self.conv3(x)))
134 |
135 | x = self.maxpool(x)
136 |
137 | x = [x, S]
138 |
139 | x = self.layer3(self.layer2(self.layer1(x)))
140 | x3 = self.dsn3(x[0])
141 |
142 | x = self.layer4(x)
143 | x4 = self.dsn4(x[0])
144 |
145 | return [x4, x3]
146 |
147 | def load_pretrain(self, pretrain_model_path):
148 | """Load pretrained Network"""
149 | saved_state_dict = torch.load(pretrain_model_path)
150 | new_params = self.state_dict().copy()
151 | for i in saved_state_dict:
152 | i_parts = i.split('.')
153 | if not i_parts[0] == 'fc':
154 | new_params['.'.join(i_parts[0:])] = saved_state_dict[i]
155 |
156 | self.load_state_dict(new_params)
157 |
158 | def SGNet(num_classes=21):
159 | model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes)
160 | return model
--------------------------------------------------------------------------------
/graphs/models/SGNet/SGNet_Res50_fps.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import functools
3 | import torch
4 |
5 | from graphs.ops.modules.s_conv import SConv
6 | from graphs.ops.libs import InPlaceABNSync
7 |
8 | affine_par = True
9 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
10 |
11 | def conv3x3(in_planes, out_planes, stride=1):
12 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
13 | padding=1, bias=False)
14 |
15 | class Bottleneck(nn.Module):
16 | expansion = 4
17 |
18 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1,
19 | deformable=False):
20 | super(Bottleneck, self).__init__()
21 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
22 | self.bn1 = BatchNorm2d(planes)
23 | if deformable == False:
24 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
25 | padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False)
26 | else:
27 | self.conv2 = SConv(planes, planes, kernel_size=3, stride=stride,
28 | padding=1, deformable_groups=1, no_bias=True)
29 | self.bn2 = BatchNorm2d(planes)
30 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
31 | self.bn3 = BatchNorm2d(planes * 4)
32 | self.relu = nn.ReLU(inplace=False)
33 | self.relu_inplace = nn.ReLU(inplace=True)
34 | self.downsample = downsample
35 | self.dilation = dilation
36 | self.stride = stride
37 | self.deformable = deformable
38 |
39 | def forward(self, input):
40 | x, S = input
41 | residual = x
42 |
43 | out = self.conv1(x)
44 | out = self.bn1(out)
45 | out = self.relu(out)
46 | if self.deformable == False:
47 | out = self.conv2(out)
48 | else:
49 | out = self.conv2(out, S)
50 | out = self.bn2(out)
51 | out = self.relu(out)
52 |
53 | out = self.conv3(out)
54 | out = self.bn3(out)
55 |
56 | if self.downsample is not None:
57 | residual = self.downsample(x)
58 |
59 | out = out + residual
60 | out = self.relu_inplace(out)
61 |
62 | return [out, S]
63 |
64 | class ResNet(nn.Module):
65 | def __init__(self, block, layers, num_classes, deformable=True):
66 | self.inplanes = 128
67 | super(ResNet, self).__init__()
68 | self.conv1 = conv3x3(3, 64, stride=2)
69 | self.bn1 = BatchNorm2d(64)
70 | self.relu1 = nn.ReLU(inplace=False)
71 | self.conv2 = conv3x3(64, 64)
72 | self.bn2 = BatchNorm2d(64)
73 | self.relu2 = nn.ReLU(inplace=False)
74 | self.conv3 = conv3x3(64, 128)
75 | self.bn3 = BatchNorm2d(128)
76 | self.relu3 = nn.ReLU(inplace=False)
77 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
78 | self.relu = nn.ReLU(inplace=False)
79 |
80 | self.layer1 = self._make_layer(block, 64, layers[0], deformable=deformable, seg=True)
81 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, deformable=deformable, seg=True)
82 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=2, deformable=deformable, seg=True)
83 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1, 1, 1),
84 | deformable=deformable, seg=True)
85 |
86 |
87 | self.dsn4 = nn.Sequential(
88 | nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1),
89 | InPlaceABNSync(512),
90 | nn.Dropout2d(0.1),
91 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True),
92 | )
93 |
94 | def make_up_conv_layer(self, block, in_channels, out_channels, batch_size):
95 | return block(in_channels, out_channels, batch_size)
96 |
97 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1, deformable=False, seg=False):
98 | downsample = None
99 | if stride != 1 or self.inplanes != planes * block.expansion:
100 | downsample = nn.Sequential(
101 | nn.Conv2d(self.inplanes, planes * block.expansion,
102 | kernel_size=1, stride=stride, bias=False),
103 | BatchNorm2d(planes * block.expansion, affine=affine_par))
104 |
105 | layers = []
106 | generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1
107 | layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample,
108 | multi_grid=generate_multi_grid(0, multi_grid), deformable=deformable))
109 | self.inplanes = planes * block.expansion
110 | for i in range(1, blocks):
111 | if seg == False:
112 | layers.append(block(self.inplanes, planes, dilation=dilation,
113 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable))
114 | else:
115 | if i >= blocks-2:
116 | layers.append(block(self.inplanes, planes, dilation=dilation,
117 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable))
118 | else:
119 | layers.append(block(self.inplanes, planes, dilation=dilation,
120 | multi_grid=generate_multi_grid(i, multi_grid), deformable=False))
121 |
122 | return nn.Sequential(*layers)
123 | def forward(self, x, depth):
124 | S = depth
125 | x = self.relu1(self.bn1(self.conv1(x)))
126 | x = self.relu2(self.bn2(self.conv2(x)))
127 | x = self.relu3(self.bn3(self.conv3(x)))
128 |
129 | x = self.maxpool(x)
130 |
131 | x = [x, S]
132 |
133 | x = self.layer3(self.layer2(self.layer1(x)))
134 |
135 | x = self.layer4(x)
136 | x4 = self.dsn4(x[0])
137 |
138 | return x4
139 |
140 | def load_pretrain(self, pretrain_model_path):
141 | """Load pretrained Network"""
142 | saved_state_dict = torch.load(pretrain_model_path)
143 | new_params = self.state_dict().copy()
144 | for i in saved_state_dict:
145 | i_parts = i.split('.')
146 | if not i_parts[0] == 'fc':
147 | new_params['.'.join(i_parts[0:])] = saved_state_dict[i]
148 |
149 | self.load_state_dict(new_params)
150 |
151 | def SGNet(num_classes=21):
152 | model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes)
153 | return model
--------------------------------------------------------------------------------
/graphs/models/SGNet/SGNet_fps.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import functools
3 | import torch
4 |
5 | from graphs.ops.modules.s_conv import SConv
6 | from graphs.ops.libs import InPlaceABNSync
7 |
8 | affine_par = True
9 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
10 |
11 | def conv3x3(in_planes, out_planes, stride=1):
12 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
13 | padding=1, bias=False)
14 |
15 | class Bottleneck(nn.Module):
16 | expansion = 4
17 |
18 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1,
19 | deformable=False):
20 | super(Bottleneck, self).__init__()
21 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
22 | self.bn1 = BatchNorm2d(planes)
23 | if deformable == False:
24 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
25 | padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False)
26 | else:
27 | self.conv2 = SConv(planes, planes, kernel_size=3, stride=stride,
28 | padding=1, deformable_groups=1, no_bias=True)
29 | self.bn2 = BatchNorm2d(planes)
30 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
31 | self.bn3 = BatchNorm2d(planes * 4)
32 | self.relu = nn.ReLU(inplace=False)
33 | self.relu_inplace = nn.ReLU(inplace=True)
34 | self.downsample = downsample
35 | self.dilation = dilation
36 | self.stride = stride
37 | self.deformable = deformable
38 |
39 | def forward(self, input):
40 | x, S = input
41 | residual = x
42 |
43 | out = self.conv1(x)
44 | out = self.bn1(out)
45 | out = self.relu(out)
46 | if self.deformable == False:
47 | out = self.conv2(out)
48 | else:
49 | out = self.conv2(out, S)
50 | out = self.bn2(out)
51 | out = self.relu(out)
52 |
53 | out = self.conv3(out)
54 | out = self.bn3(out)
55 |
56 | if self.downsample is not None:
57 | residual = self.downsample(x)
58 |
59 | out = out + residual
60 | out = self.relu_inplace(out)
61 |
62 | return [out, S]
63 |
64 | class ResNet(nn.Module):
65 | def __init__(self, block, layers, num_classes, deformable=True):
66 | self.inplanes = 128
67 | super(ResNet, self).__init__()
68 | self.conv1 = conv3x3(3, 64, stride=2)
69 | self.bn1 = BatchNorm2d(64)
70 | self.relu1 = nn.ReLU(inplace=False)
71 | self.conv2 = conv3x3(64, 64)
72 | self.bn2 = BatchNorm2d(64)
73 | self.relu2 = nn.ReLU(inplace=False)
74 | self.conv3 = conv3x3(64, 128)
75 | self.bn3 = BatchNorm2d(128)
76 | self.relu3 = nn.ReLU(inplace=False)
77 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
78 | self.relu = nn.ReLU(inplace=False)
79 |
80 | self.layer1 = self._make_layer(block, 64, layers[0], deformable=deformable, seg=True)
81 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, deformable=deformable, seg=True)
82 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=2, deformable=deformable, seg=True)
83 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1, 1, 1),
84 | deformable=deformable, seg=True)
85 |
86 | self.dsn4 = nn.Sequential(
87 | nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1),
88 | InPlaceABNSync(512),
89 | nn.Dropout2d(0.1),
90 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True),
91 | )
92 |
93 | def make_up_conv_layer(self, block, in_channels, out_channels, batch_size):
94 | return block(in_channels, out_channels, batch_size)
95 |
96 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1, deformable=False, seg=False):
97 | downsample = None
98 | if stride != 1 or self.inplanes != planes * block.expansion:
99 | downsample = nn.Sequential(
100 | nn.Conv2d(self.inplanes, planes * block.expansion,
101 | kernel_size=1, stride=stride, bias=False),
102 | BatchNorm2d(planes * block.expansion, affine=affine_par))
103 |
104 | layers = []
105 | generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1
106 | layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample,
107 | multi_grid=generate_multi_grid(0, multi_grid), deformable=deformable))
108 | self.inplanes = planes * block.expansion
109 | for i in range(1, blocks):
110 | if seg == False:
111 | layers.append(block(self.inplanes, planes, dilation=dilation,
112 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable))
113 | else:
114 | if i >= blocks-2:
115 | layers.append(block(self.inplanes, planes, dilation=dilation,
116 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable))
117 | else:
118 | layers.append(block(self.inplanes, planes, dilation=dilation,
119 | multi_grid=generate_multi_grid(i, multi_grid), deformable=False))
120 |
121 | return nn.Sequential(*layers)
122 | def forward(self, x, depth):
123 | S = depth
124 | x = self.relu1(self.bn1(self.conv1(x)))
125 | x = self.relu2(self.bn2(self.conv2(x)))
126 | x = self.relu3(self.bn3(self.conv3(x)))
127 |
128 | x = self.maxpool(x)
129 |
130 | x = [x, S]
131 |
132 | x = self.layer3(self.layer2(self.layer1(x)))
133 |
134 | x = self.layer4(x)
135 | x4 = self.dsn4(x[0])
136 |
137 | return x4
138 |
139 | def load_pretrain(self, pretrain_model_path):
140 | """Load pretrained Network"""
141 | saved_state_dict = torch.load(pretrain_model_path)
142 | new_params = self.state_dict().copy()
143 | for i in saved_state_dict:
144 | i_parts = i.split('.')
145 | if not i_parts[0] == 'fc':
146 | new_params['.'.join(i_parts[0:])] = saved_state_dict[i]
147 |
148 | self.load_state_dict(new_params)
149 |
150 | def SGNet(num_classes=21):
151 | model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes)
152 | return model
--------------------------------------------------------------------------------
/graphs/ops/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LinZhuoChen/SGNet/02510182eb4baca77dd1d99237a5e77812055a0c/graphs/ops/__init__.py
--------------------------------------------------------------------------------
/graphs/ops/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 |
5 | this_file = os.path.dirname(__file__)
6 |
7 | sources = ['src/deform_conv.c']
8 | headers = ['src/deform_conv.h']
9 | defines = []
10 | with_cuda = False
11 |
12 | if torch.cuda.is_available():
13 | print('Including CUDA code.')
14 | sources += ['src/deform_conv_cuda.c']
15 | headers += ['src/deform_conv_cuda.h']
16 | defines += [('WITH_CUDA', None)]
17 | with_cuda = True
18 |
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | print(this_file)
21 | extra_objects = ['src/deform_conv_cuda_kernel.cu.so']
22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
23 |
24 | ffi = create_extension(
25 | '_ext.deform_conv',
26 | headers=headers,
27 | sources=sources,
28 | define_macros=defines,
29 | relative_to=__file__,
30 | with_cuda=with_cuda,
31 | extra_objects=extra_objects,
32 | extra_compile_args=['-std=c++11']
33 | )
34 |
35 | assert torch.cuda.is_available(), 'Please install CUDA for GPU support.'
36 | ffi.build()
37 |
38 |
--------------------------------------------------------------------------------
/graphs/ops/build_modulated.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 |
5 |
6 | sources = ['src/modulated_dcn.c']
7 | headers = ['src/modulated_dcn.h']
8 | defines = []
9 | with_cuda = False
10 |
11 | extra_objects = []
12 | if torch.cuda.is_available():
13 | print('Including CUDA code.')
14 | sources += ['src/modulated_dcn_cuda.c']
15 | headers += ['src/modulated_dcn_cuda.h']
16 | defines += [('WITH_CUDA', None)]
17 | extra_objects += ['src/cuda/modulated_deform_im2col_cuda.cu.so']
18 | extra_objects += ['src/cuda/deform_psroi_pooling_cuda.cu.so']
19 | with_cuda = True
20 | else:
21 | raise ValueError('CUDA is not available')
22 |
23 | extra_compile_args = ['-fopenmp', '-std=c99']
24 |
25 | this_file = os.path.dirname(os.path.realpath(__file__))
26 | print(this_file)
27 | sources = [os.path.join(this_file, fname) for fname in sources]
28 | headers = [os.path.join(this_file, fname) for fname in headers]
29 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
30 |
31 | ffi = create_extension(
32 | '_ext.modulated_dcn',
33 | headers=headers,
34 | sources=sources,
35 | define_macros=defines,
36 | relative_to=__file__,
37 | with_cuda=with_cuda,
38 | extra_objects=extra_objects,
39 | extra_compile_args=extra_compile_args
40 | )
41 |
42 | if __name__ == '__main__':
43 | ffi.build()
44 |
--------------------------------------------------------------------------------
/graphs/ops/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .deform_conv import DeformConvFunction, deform_conv_function
2 | from .modulated_dcn_func import DeformRoIPoolingFunction, ModulatedDeformConvFunction
3 | # from .scale_conv import ScaleConvFunction, scale_conv_function
--------------------------------------------------------------------------------
/graphs/ops/functions/deform_conv.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from torch.nn.modules.utils import _pair
4 |
5 | from graphs.ops._ext import deform_conv
6 |
7 |
8 | def deform_conv_function(input,
9 | offset,
10 | weight,
11 | stride=1,
12 | padding=0,
13 | dilation=1,
14 | deform_groups=1,
15 | im2col_step=64):
16 |
17 | if input is not None and input.dim() != 4:
18 | raise ValueError(
19 | "Expected 4D tensor as input, got {}D tensor instead.".format(
20 | input.dim()))
21 |
22 | f = DeformConvFunction(
23 | _pair(stride), _pair(padding), _pair(dilation), deform_groups, im2col_step)
24 | return f(input, offset, weight)
25 |
26 |
27 | class DeformConvFunction(Function):
28 | def __init__(self, stride, padding, dilation, deformable_groups=1, im2col_step=64):
29 | super(DeformConvFunction, self).__init__()
30 | self.stride = stride
31 | self.padding = padding
32 | self.dilation = dilation
33 | self.deformable_groups = deformable_groups
34 | self.im2col_step = im2col_step
35 |
36 | def forward(self, input, offset, weight):
37 | self.save_for_backward(input, offset, weight)
38 |
39 | output = input.new(*self._output_size(input, weight))
40 |
41 | self.bufs_ = [input.new(), input.new()] # columns, ones
42 |
43 | if not input.is_cuda:
44 | raise NotImplementedError
45 | else:
46 | if isinstance(input, torch.autograd.Variable):
47 | if not isinstance(input.data, torch.cuda.FloatTensor):
48 | raise NotImplementedError
49 | else:
50 | if not isinstance(input, torch.cuda.FloatTensor):
51 | raise NotImplementedError
52 |
53 | cur_im2col_step = min(self.im2col_step, input.shape[0])
54 | assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize'
55 | deform_conv.deform_conv_forward_cuda(
56 | input, weight, offset, output, self.bufs_[0], self.bufs_[1],
57 | weight.size(3), weight.size(2), self.stride[1], self.stride[0],
58 | self.padding[1], self.padding[0], self.dilation[1],
59 | self.dilation[0], self.deformable_groups, cur_im2col_step)
60 | return output
61 |
62 | def backward(self, grad_output):
63 | input, offset, weight = self.saved_tensors
64 |
65 | grad_input = grad_offset = grad_weight = None
66 |
67 | if not grad_output.is_cuda:
68 | raise NotImplementedError
69 | else:
70 | if isinstance(grad_output, torch.autograd.Variable):
71 | if not isinstance(grad_output.data, torch.cuda.FloatTensor):
72 | raise NotImplementedError
73 | else:
74 | if not isinstance(grad_output, torch.cuda.FloatTensor):
75 | raise NotImplementedError
76 |
77 | cur_im2col_step = min(self.im2col_step, input.shape[0])
78 | assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize'
79 |
80 | if self.needs_input_grad[0] or self.needs_input_grad[1]:
81 | grad_input = input.new(*input.size()).zero_()
82 | grad_offset = offset.new(*offset.size()).zero_()
83 | deform_conv.deform_conv_backward_input_cuda(
84 | input, offset, grad_output, grad_input,
85 | grad_offset, weight, self.bufs_[0], weight.size(3),
86 | weight.size(2), self.stride[1], self.stride[0],
87 | self.padding[1], self.padding[0], self.dilation[1],
88 | self.dilation[0], self.deformable_groups, cur_im2col_step)
89 |
90 |
91 | if self.needs_input_grad[2]:
92 | grad_weight = weight.new(*weight.size()).zero_()
93 | deform_conv.deform_conv_backward_parameters_cuda(
94 | input, offset, grad_output,
95 | grad_weight, self.bufs_[0], self.bufs_[1], weight.size(3),
96 | weight.size(2), self.stride[1], self.stride[0],
97 | self.padding[1], self.padding[0], self.dilation[1],
98 | self.dilation[0], self.deformable_groups, 1, cur_im2col_step)
99 |
100 | return grad_input, grad_offset, grad_weight
101 |
102 | def _output_size(self, input, weight):
103 | channels = weight.size(0)
104 |
105 | output_size = (input.size(0), channels)
106 | for d in range(input.dim() - 2):
107 | in_size = input.size(d + 2)
108 | pad = self.padding[d]
109 | kernel = self.dilation[d] * (weight.size(d + 2) - 1) + 1
110 | stride = self.stride[d]
111 | output_size += ((in_size + (2 * pad) - kernel) // stride + 1, )
112 | if not all(map(lambda s: s > 0, output_size)):
113 | raise ValueError(
114 | "convolution input is too small (output would be {})".format(
115 | 'x'.join(map(str, output_size))))
116 | return output_size
117 |
--------------------------------------------------------------------------------
/graphs/ops/functions/modulated_dcn_func.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from __future__ import absolute_import
3 | from __future__ import print_function
4 | from __future__ import division
5 |
6 | import torch
7 | from torch.autograd import Function
8 |
9 | from graphs.ops._ext import modulated_dcn as _backend
10 |
11 |
12 | class ModulatedDeformConvFunction(Function):
13 |
14 | def __init__(self, stride, padding, dilation=1, deformable_groups=1):
15 | super(ModulatedDeformConvFunction, self).__init__()
16 | self.stride = stride
17 | self.padding = padding
18 | self.dilation = dilation
19 | self.deformable_groups = deformable_groups
20 |
21 | def forward(self, input, offset, mask, weight, bias):
22 | if not input.is_cuda:
23 | raise NotImplementedError
24 | if weight.requires_grad or mask.requires_grad or offset.requires_grad or input.requires_grad:
25 | self.save_for_backward(input, offset, mask, weight, bias)
26 | output = input.new(*self._infer_shape(input, weight))
27 | self._bufs = [input.new(), input.new()]
28 | _backend.modulated_deform_conv_cuda_forward(input, weight,
29 | bias, self._bufs[0],
30 | offset, mask,
31 | output, self._bufs[1],
32 | weight.shape[2], weight.shape[3],
33 | self.stride, self.stride,
34 | self.padding, self.padding,
35 | self.dilation, self.dilation,
36 | self.deformable_groups)
37 | return output
38 |
39 | def backward(self, grad_output):
40 | if not grad_output.is_cuda:
41 | raise NotImplementedError
42 | input, offset, mask, weight, bias = self.saved_tensors
43 | grad_input = input.new(*input.size()).zero_()
44 | grad_offset = offset.new(*offset.size()).zero_()
45 | grad_mask = mask.new(*mask.size()).zero_()
46 | grad_weight = weight.new(*weight.size()).zero_()
47 | grad_bias = bias.new(*bias.size()).zero_()
48 | _backend.modulated_deform_conv_cuda_backward(input, weight,
49 | bias, self._bufs[0],
50 | offset, mask,
51 | self._bufs[1],
52 | grad_input, grad_weight,
53 | grad_bias, grad_offset,
54 | grad_mask, grad_output,
55 | weight.shape[2], weight.shape[3],
56 | self.stride, self.stride,
57 | self.padding, self.padding,
58 | self.dilation, self.dilation,
59 | self.deformable_groups)
60 |
61 | return grad_input, grad_offset, grad_mask, grad_weight, grad_bias
62 |
63 | def _infer_shape(self, input, weight):
64 | n = input.size(0)
65 | channels_out = weight.size(0)
66 | height, width = input.shape[2:4]
67 | kernel_h, kernel_w = weight.shape[2:4]
68 | height_out = (height + 2 * self.padding -
69 | (self.dilation * (kernel_h - 1) + 1)) // self.stride + 1
70 | width_out = (width + 2 * self.padding - (self.dilation *
71 | (kernel_w - 1) + 1)) // self.stride + 1
72 | return (n, channels_out, height_out, width_out)
73 |
74 |
75 | class DeformRoIPoolingFunction(Function):
76 |
77 | def __init__(self,
78 | spatial_scale,
79 | pooled_size,
80 | output_dim,
81 | no_trans,
82 | group_size=1,
83 | part_size=None,
84 | sample_per_part=4,
85 | trans_std=.0):
86 | super(DeformRoIPoolingFunction, self).__init__()
87 | self.spatial_scale = spatial_scale
88 | self.pooled_size = pooled_size
89 | self.output_dim = output_dim
90 | self.no_trans = no_trans
91 | self.group_size = group_size
92 | self.part_size = pooled_size if part_size is None else part_size
93 | self.sample_per_part = sample_per_part
94 | self.trans_std = trans_std
95 |
96 | assert self.trans_std >= 0.0 and self.trans_std <= 1.0
97 |
98 | def forward(self, data, rois, offset):
99 | if not data.is_cuda:
100 | raise NotImplementedError
101 |
102 | output = data.new(*self._infer_shape(data, rois))
103 | output_count = data.new(*self._infer_shape(data, rois))
104 | _backend.deform_psroi_pooling_cuda_forward(data, rois, offset,
105 | output, output_count,
106 | self.no_trans, self.spatial_scale,
107 | self.output_dim, self.group_size,
108 | self.pooled_size, self.part_size,
109 | self.sample_per_part, self.trans_std)
110 |
111 | # if data.requires_grad or rois.requires_grad or offset.requires_grad:
112 | # self.save_for_backward(data, rois, offset, output_count)
113 | self.data = data
114 | self.rois = rois
115 | self.offset = offset
116 | self.output_count = output_count
117 |
118 | return output
119 |
120 | def backward(self, grad_output):
121 | if not grad_output.is_cuda:
122 | raise NotImplementedError
123 |
124 | # data, rois, offset, output_count = self.saved_tensors
125 | data = self.data
126 | rois = self.rois
127 | offset = self.offset
128 | output_count = self.output_count
129 | grad_input = data.new(*data.size()).zero_()
130 | grad_offset = offset.new(*offset.size()).zero_()
131 |
132 | _backend.deform_psroi_pooling_cuda_backward(grad_output,
133 | data,
134 | rois,
135 | offset,
136 | output_count,
137 | grad_input,
138 | grad_offset,
139 | self.no_trans,
140 | self.spatial_scale,
141 | self.output_dim,
142 | self.group_size,
143 | self.pooled_size,
144 | self.part_size,
145 | self.sample_per_part,
146 | self.trans_std)
147 | return grad_input, torch.zeros(rois.shape).cuda(), grad_offset
148 |
149 | def _infer_shape(self, data, rois):
150 | # _, c, h, w = data.shape[:4]
151 | c = data.shape[1]
152 | n = rois.shape[0]
153 | return (n, self.output_dim, self.pooled_size, self.pooled_size)
154 |
--------------------------------------------------------------------------------
/graphs/ops/libs/__init__.py:
--------------------------------------------------------------------------------
1 | from .bn import ABN, InPlaceABN, InPlaceABNWrapper, InPlaceABNSync, InPlaceABNSyncWrapper
2 | from .misc import GlobalAvgPool2d
3 | from .residual import IdentityResidualBlock
4 | from .dense import DenseModule
5 |
--------------------------------------------------------------------------------
/graphs/ops/libs/_ext/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from .__ext import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/graphs/ops/libs/bn.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict, Iterable
2 | from itertools import repeat
3 |
4 | try:
5 | # python 3
6 | from queue import Queue
7 | except ImportError:
8 | # python 2
9 | from Queue import Queue
10 |
11 | import torch
12 | import torch.nn as nn
13 | import torch.autograd as autograd
14 |
15 | from .functions import inplace_abn, inplace_abn_sync
16 |
17 |
18 | def _pair(x):
19 | if isinstance(x, Iterable):
20 | return x
21 | return tuple(repeat(x, 2))
22 |
23 |
24 | class ABN(nn.Sequential):
25 | """Activated Batch Normalization
26 |
27 | This gathers a `BatchNorm2d` and an activation function in a single module
28 | """
29 |
30 | def __init__(self, num_features, activation=nn.ReLU(inplace=True), **kwargs):
31 | """Creates an Activated Batch Normalization module
32 |
33 | Parameters
34 | ----------
35 | num_features : int
36 | Number of feature channels in the input and output.
37 | activation : nn.Module
38 | Module used as an activation function.
39 | kwargs
40 | All other arguments are forwarded to the `BatchNorm2d` constructor.
41 | """
42 | super(ABN, self).__init__(OrderedDict([
43 | ("bn", nn.BatchNorm2d(num_features, **kwargs)),
44 | ("act", activation)
45 | ]))
46 |
47 |
48 | class InPlaceABN(nn.Module):
49 | """InPlace Activated Batch Normalization"""
50 |
51 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
52 | """Creates an InPlace Activated Batch Normalization module
53 |
54 | Parameters
55 | ----------
56 | num_features : int
57 | Number of feature channels in the input and output.
58 | eps : float
59 | Small constant to prevent numerical issues.
60 | momentum : float
61 | Momentum factor applied to compute running statistics as.
62 | affine : bool
63 | If `True` apply learned scale and shift transformation after normalization.
64 | activation : str
65 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
66 | slope : float
67 | Negative slope for the `leaky_relu` activation.
68 | """
69 | super(InPlaceABN, self).__init__()
70 | self.num_features = num_features
71 | self.affine = affine
72 | self.eps = eps
73 | self.momentum = momentum
74 | self.activation = activation
75 | self.slope = slope
76 | if self.affine:
77 | self.weight = nn.Parameter(torch.Tensor(num_features))
78 | self.bias = nn.Parameter(torch.Tensor(num_features))
79 | else:
80 | self.register_parameter('weight', None)
81 | self.register_parameter('bias', None)
82 | self.register_buffer('running_mean', torch.zeros(num_features))
83 | self.register_buffer('running_var', torch.ones(num_features))
84 | self.reset_parameters()
85 |
86 | def reset_parameters(self):
87 | self.running_mean.zero_()
88 | self.running_var.fill_(1)
89 | if self.affine:
90 | self.weight.data.fill_(1)
91 | self.bias.data.zero_()
92 |
93 | def forward(self, x):
94 | return inplace_abn(x, self.weight, self.bias, autograd.Variable(self.running_mean),
95 | autograd.Variable(self.running_var), self.training, self.momentum, self.eps,
96 | self.activation, self.slope)
97 |
98 | def __repr__(self):
99 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
100 | ' affine={affine}, activation={activation}'
101 | if self.activation == "leaky_relu":
102 | rep += ' slope={slope})'
103 | else:
104 | rep += ')'
105 | return rep.format(name=self.__class__.__name__, **self.__dict__)
106 |
107 |
108 | class InPlaceABNSync(nn.Module):
109 | """InPlace Activated Batch Normalization with cross-GPU synchronization
110 |
111 | This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DataParallel`.
112 | """
113 |
114 | def __init__(self, num_features, devices=None, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu",
115 | slope=0.01):
116 | """Creates a synchronized, InPlace Activated Batch Normalization module
117 |
118 | Parameters
119 | ----------
120 | num_features : int
121 | Number of feature channels in the input and output.
122 | devices : list of int or None
123 | IDs of the GPUs that will run the replicas of this module.
124 | eps : float
125 | Small constant to prevent numerical issues.
126 | momentum : float
127 | Momentum factor applied to compute running statistics as.
128 | affine : bool
129 | If `True` apply learned scale and shift transformation after normalization.
130 | activation : str
131 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
132 | slope : float
133 | Negative slope for the `leaky_relu` activation.
134 | """
135 | super(InPlaceABNSync, self).__init__()
136 | self.num_features = num_features
137 | self.devices = devices if devices else list(range(torch.cuda.device_count()))
138 | self.affine = affine
139 | self.eps = eps
140 | self.momentum = momentum
141 | self.activation = activation
142 | self.slope = slope
143 | if self.affine:
144 | self.weight = nn.Parameter(torch.Tensor(num_features))
145 | self.bias = nn.Parameter(torch.Tensor(num_features))
146 | else:
147 | self.register_parameter('weight', None)
148 | self.register_parameter('bias', None)
149 | self.register_buffer('running_mean', torch.zeros(num_features))
150 | self.register_buffer('running_var', torch.ones(num_features))
151 | self.reset_parameters()
152 |
153 | # Initialize queues
154 | self.worker_ids = self.devices[1:]
155 | self.master_queue = Queue(len(self.worker_ids))
156 | self.worker_queues = [Queue(1) for _ in self.worker_ids]
157 |
158 | def reset_parameters(self):
159 | self.running_mean.zero_()
160 | self.running_var.fill_(1)
161 | if self.affine:
162 | self.weight.data.fill_(1)
163 | self.bias.data.zero_()
164 |
165 | def forward(self, x):
166 | if x.get_device() == self.devices[0]:
167 | # Master mode
168 | extra = {
169 | "is_master": True,
170 | "master_queue": self.master_queue,
171 | "worker_queues": self.worker_queues,
172 | "worker_ids": self.worker_ids
173 | }
174 | else:
175 | # Worker mode
176 | extra = {
177 | "is_master": False,
178 | "master_queue": self.master_queue,
179 | "worker_queue": self.worker_queues[self.worker_ids.index(x.get_device())]
180 | }
181 |
182 | return inplace_abn_sync(x, self.weight, self.bias, autograd.Variable(self.running_mean),
183 | autograd.Variable(self.running_var), extra, self.training, self.momentum, self.eps,
184 | self.activation, self.slope)
185 |
186 | def __repr__(self):
187 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
188 | ' affine={affine}, devices={devices}, activation={activation}'
189 | if self.activation == "leaky_relu":
190 | rep += ' slope={slope})'
191 | else:
192 | rep += ')'
193 | return rep.format(name=self.__class__.__name__, **self.__dict__)
194 |
195 |
196 | class InPlaceABNWrapper(nn.Module):
197 | """Wrapper module to make `InPlaceABN` compatible with `ABN`"""
198 |
199 | def __init__(self, *args, **kwargs):
200 | super(InPlaceABNWrapper, self).__init__()
201 | self.bn = InPlaceABN(*args, **kwargs)
202 |
203 | def forward(self, input):
204 | return self.bn(input)
205 |
206 |
207 | class InPlaceABNSyncWrapper(nn.Module):
208 | """Wrapper module to make `InPlaceABNSync` compatible with `ABN`"""
209 |
210 | def __init__(self, *args, **kwargs):
211 | super(InPlaceABNSyncWrapper, self).__init__()
212 | self.bn = InPlaceABNSync(*args, **kwargs)
213 |
214 | def forward(self, input):
215 | return self.bn(input)
216 |
--------------------------------------------------------------------------------
/graphs/ops/libs/build.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from torch.utils.ffi import create_extension
4 |
5 | sources = ['src/lib_cffi.cpp']
6 | headers = ['src/lib_cffi.h']
7 | extra_objects = ['src/bn.o']
8 | with_cuda = True
9 |
10 | this_file = os.path.dirname(os.path.realpath(__file__))
11 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
12 |
13 | ffi = create_extension(
14 | '_ext',
15 | headers=headers,
16 | sources=sources,
17 | relative_to=__file__,
18 | with_cuda=with_cuda,
19 | extra_objects=extra_objects,
20 | extra_compile_args=["-std=c++11"]
21 | )
22 |
23 | if __name__ == '__main__':
24 | ffi.build()
25 |
--------------------------------------------------------------------------------
/graphs/ops/libs/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Configuration
4 | CUDA_GENCODE="-arch=sm_50 \
5 | -gencode=arch=compute_50,code=sm_50 \
6 | -gencode=arch=compute_52,code=sm_52 \
7 | -gencode=arch=compute_60,code=sm_60"
8 |
9 | cd src
10 | nvcc -I/usr/local/cuda/include --expt-extended-lambda -O3 -c -o bn.o bn.cu -x cu -Xcompiler -fPIC -std=c++11 ${CUDA_GENCODE}
11 | cd ..
12 |
--------------------------------------------------------------------------------
/graphs/ops/libs/dense.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | import torch
4 | import torch.nn as nn
5 |
6 | from .bn import ABN
7 |
8 |
9 | class DenseModule(nn.Module):
10 | def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1):
11 | super(DenseModule, self).__init__()
12 | self.in_channels = in_channels
13 | self.growth = growth
14 | self.layers = layers
15 |
16 | self.convs1 = nn.ModuleList()
17 | self.convs3 = nn.ModuleList()
18 | for i in range(self.layers):
19 | self.convs1.append(nn.Sequential(OrderedDict([
20 | ("bn", norm_act(in_channels)),
21 | ("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False))
22 | ])))
23 | self.convs3.append(nn.Sequential(OrderedDict([
24 | ("bn", norm_act(self.growth * bottleneck_factor)),
25 | ("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False,
26 | dilation=dilation))
27 | ])))
28 | in_channels += self.growth
29 |
30 | @property
31 | def out_channels(self):
32 | return self.in_channels + self.growth * self.layers
33 |
34 | def forward(self, x):
35 | inputs = [x]
36 | for i in range(self.layers):
37 | x = torch.cat(inputs, dim=1)
38 | x = self.convs1[i](x)
39 | x = self.convs3[i](x)
40 | inputs += [x]
41 |
42 | return torch.cat(inputs, dim=1)
--------------------------------------------------------------------------------
/graphs/ops/libs/functions.py:
--------------------------------------------------------------------------------
1 | import torch.autograd as autograd
2 | import torch.cuda.comm as comm
3 | from torch.autograd.function import once_differentiable
4 |
5 | from . import _ext
6 |
7 | # Activation names
8 | ACT_LEAKY_RELU = "leaky_relu"
9 | ACT_ELU = "elu"
10 | ACT_NONE = "none"
11 |
12 |
13 | def _check(fn, *args, **kwargs):
14 | success = fn(*args, **kwargs)
15 | if not success:
16 | raise RuntimeError("CUDA Error encountered in {}".format(fn))
17 |
18 |
19 | def _broadcast_shape(x):
20 | out_size = []
21 | for i, s in enumerate(x.size()):
22 | if i != 1:
23 | out_size.append(1)
24 | else:
25 | out_size.append(s)
26 | return out_size
27 |
28 |
29 | def _reduce(x):
30 | if len(x.size()) == 2:
31 | return x.sum(dim=0)
32 | else:
33 | n, c = x.size()[0:2]
34 | return x.contiguous().view((n, c, -1)).sum(2).sum(0)
35 |
36 |
37 | def _count_samples(x):
38 | count = 1
39 | for i, s in enumerate(x.size()):
40 | if i != 1:
41 | count *= s
42 | return count
43 |
44 |
45 | def _act_forward(ctx, x):
46 | if ctx.activation == ACT_LEAKY_RELU:
47 | _check(_ext.leaky_relu_cuda, x, ctx.slope)
48 | elif ctx.activation == ACT_ELU:
49 | _check(_ext.elu_cuda, x)
50 | elif ctx.activation == ACT_NONE:
51 | pass
52 |
53 |
54 | def _act_backward(ctx, x, dx):
55 | if ctx.activation == ACT_LEAKY_RELU:
56 | _check(_ext.leaky_relu_backward_cuda, x, dx, ctx.slope)
57 | _check(_ext.leaky_relu_cuda, x, 1. / ctx.slope)
58 | elif ctx.activation == ACT_ELU:
59 | _check(_ext.elu_backward_cuda, x, dx)
60 | _check(_ext.elu_inv_cuda, x)
61 | elif ctx.activation == ACT_NONE:
62 | pass
63 |
64 |
65 | def _check_contiguous(*args):
66 | if not all([mod is None or mod.is_contiguous() for mod in args]):
67 | raise ValueError("Non-contiguous input")
68 |
69 |
70 | class InPlaceABN(autograd.Function):
71 | @staticmethod
72 | def forward(ctx, x, weight, bias, running_mean, running_var,
73 | training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
74 | # Save context
75 | ctx.training = training
76 | ctx.momentum = momentum
77 | ctx.eps = eps
78 | ctx.activation = activation
79 | ctx.slope = slope
80 |
81 | n = _count_samples(x)
82 |
83 | if ctx.training:
84 | mean = x.new().resize_as_(running_mean)
85 | var = x.new().resize_as_(running_var)
86 | _check_contiguous(x, mean, var)
87 | _check(_ext.bn_mean_var_cuda, x, mean, var)
88 |
89 | # Update running stats
90 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
91 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * n / (n - 1))
92 | else:
93 | mean, var = running_mean, running_var
94 |
95 | _check_contiguous(x, mean, var, weight, bias)
96 | _check(_ext.bn_forward_cuda,
97 | x, mean, var,
98 | weight if weight is not None else x.new(),
99 | bias if bias is not None else x.new(),
100 | x, x, ctx.eps)
101 |
102 | # Activation
103 | _act_forward(ctx, x)
104 |
105 | # Output
106 | ctx.var = var
107 | ctx.save_for_backward(x, weight, bias, running_mean, running_var)
108 | ctx.mark_dirty(x)
109 | return x
110 |
111 | @staticmethod
112 | @once_differentiable
113 | def backward(ctx, dz):
114 | z, weight, bias, running_mean, running_var = ctx.saved_tensors
115 | dz = dz.contiguous()
116 |
117 | # Undo activation
118 | _act_backward(ctx, z, dz)
119 |
120 | if ctx.needs_input_grad[0]:
121 | dx = dz.new().resize_as_(dz)
122 | else:
123 | dx = None
124 |
125 | if ctx.needs_input_grad[1]:
126 | dweight = dz.new().resize_as_(running_mean).zero_()
127 | else:
128 | dweight = None
129 |
130 | if ctx.needs_input_grad[2]:
131 | dbias = dz.new().resize_as_(running_mean).zero_()
132 | else:
133 | dbias = None
134 |
135 | if ctx.training:
136 | edz = dz.new().resize_as_(running_mean)
137 | eydz = dz.new().resize_as_(running_mean)
138 | _check_contiguous(z, dz, weight, bias, edz, eydz)
139 | _check(_ext.bn_edz_eydz_cuda,
140 | z, dz,
141 | weight if weight is not None else dz.new(),
142 | bias if bias is not None else dz.new(),
143 | edz, eydz, ctx.eps)
144 | else:
145 | # TODO: implement CUDA backward for inference mode
146 | edz = dz.new().resize_as_(running_mean).zero_()
147 | eydz = dz.new().resize_as_(running_mean).zero_()
148 |
149 | _check_contiguous(dz, z, ctx.var, weight, bias, edz, eydz, dx, dweight, dbias)
150 | _check(_ext.bn_backard_cuda,
151 | dz, z, ctx.var,
152 | weight if weight is not None else dz.new(),
153 | bias if bias is not None else dz.new(),
154 | edz, eydz,
155 | dx if dx is not None else dz.new(),
156 | dweight if dweight is not None else dz.new(),
157 | dbias if dbias is not None else dz.new(),
158 | ctx.eps)
159 |
160 | del ctx.var
161 |
162 | return dx, dweight, dbias, None, None, None, None, None, None, None
163 |
164 |
165 | class InPlaceABNSync(autograd.Function):
166 | @classmethod
167 | def forward(cls, ctx, x, weight, bias, running_mean, running_var,
168 | extra, training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
169 | # Save context
170 | cls._parse_extra(ctx, extra)
171 | ctx.training = training
172 | ctx.momentum = momentum
173 | ctx.eps = eps
174 | ctx.activation = activation
175 | ctx.slope = slope
176 |
177 | n = _count_samples(x) * (ctx.master_queue.maxsize + 1)
178 |
179 | if ctx.training:
180 | mean = x.new().resize_(1, running_mean.size(0))
181 | var = x.new().resize_(1, running_var.size(0))
182 | _check_contiguous(x, mean, var)
183 | _check(_ext.bn_mean_var_cuda, x, mean, var)
184 |
185 | if ctx.is_master:
186 | means, vars = [mean], [var]
187 | for _ in range(ctx.master_queue.maxsize):
188 | mean_w, var_w = ctx.master_queue.get()
189 | ctx.master_queue.task_done()
190 | means.append(mean_w)
191 | vars.append(var_w)
192 |
193 | means = comm.gather(means)
194 | vars = comm.gather(vars)
195 |
196 | mean = means.mean(0)
197 | var = (vars + (mean - means) ** 2).mean(0)
198 |
199 | tensors = comm.broadcast_coalesced((mean, var), [mean.get_device()] + ctx.worker_ids)
200 | for ts, queue in zip(tensors[1:], ctx.worker_queues):
201 | queue.put(ts)
202 | else:
203 | ctx.master_queue.put((mean, var))
204 | mean, var = ctx.worker_queue.get()
205 | ctx.worker_queue.task_done()
206 |
207 | # Update running stats
208 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
209 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * n / (n - 1))
210 | else:
211 | mean, var = running_mean, running_var
212 |
213 | _check_contiguous(x, mean, var, weight, bias)
214 | _check(_ext.bn_forward_cuda,
215 | x, mean, var,
216 | weight if weight is not None else x.new(),
217 | bias if bias is not None else x.new(),
218 | x, x, ctx.eps)
219 |
220 | # Activation
221 | _act_forward(ctx, x)
222 |
223 | # Output
224 | ctx.var = var
225 | ctx.save_for_backward(x, weight, bias, running_mean, running_var)
226 | ctx.mark_dirty(x)
227 | return x
228 |
229 | @staticmethod
230 | @once_differentiable
231 | def backward(ctx, dz):
232 | z, weight, bias, running_mean, running_var = ctx.saved_tensors
233 | dz = dz.contiguous()
234 |
235 | # Undo activation
236 | _act_backward(ctx, z, dz)
237 |
238 | if ctx.needs_input_grad[0]:
239 | dx = dz.new().resize_as_(dz)
240 | else:
241 | dx = None
242 |
243 | if ctx.needs_input_grad[1]:
244 | dweight = dz.new().resize_as_(running_mean).zero_()
245 | else:
246 | dweight = None
247 |
248 | if ctx.needs_input_grad[2]:
249 | dbias = dz.new().resize_as_(running_mean).zero_()
250 | else:
251 | dbias = None
252 |
253 | if ctx.training:
254 | edz = dz.new().resize_as_(running_mean)
255 | eydz = dz.new().resize_as_(running_mean)
256 | _check_contiguous(z, dz, weight, bias, edz, eydz)
257 | _check(_ext.bn_edz_eydz_cuda,
258 | z, dz,
259 | weight if weight is not None else dz.new(),
260 | bias if bias is not None else dz.new(),
261 | edz, eydz, ctx.eps)
262 |
263 | if ctx.is_master:
264 | edzs, eydzs = [edz], [eydz]
265 | for _ in range(len(ctx.worker_queues)):
266 | edz_w, eydz_w = ctx.master_queue.get()
267 | ctx.master_queue.task_done()
268 | edzs.append(edz_w)
269 | eydzs.append(eydz_w)
270 |
271 | edz = comm.reduce_add(edzs) / (ctx.master_queue.maxsize + 1)
272 | eydz = comm.reduce_add(eydzs) / (ctx.master_queue.maxsize + 1)
273 |
274 | tensors = comm.broadcast_coalesced((edz, eydz), [edz.get_device()] + ctx.worker_ids)
275 | for ts, queue in zip(tensors[1:], ctx.worker_queues):
276 | queue.put(ts)
277 | else:
278 | ctx.master_queue.put((edz, eydz))
279 | edz, eydz = ctx.worker_queue.get()
280 | ctx.worker_queue.task_done()
281 | else:
282 | edz = dz.new().resize_as_(running_mean).zero_()
283 | eydz = dz.new().resize_as_(running_mean).zero_()
284 |
285 | _check_contiguous(dz, z, ctx.var, weight, bias, edz, eydz, dx, dweight, dbias)
286 | _check(_ext.bn_backard_cuda,
287 | dz, z, ctx.var,
288 | weight if weight is not None else dz.new(),
289 | bias if bias is not None else dz.new(),
290 | edz, eydz,
291 | dx if dx is not None else dz.new(),
292 | dweight if dweight is not None else dz.new(),
293 | dbias if dbias is not None else dz.new(),
294 | ctx.eps)
295 |
296 | del ctx.var
297 |
298 | return dx, dweight, dbias, None, None, None, None, None, None, None, None
299 |
300 | @staticmethod
301 | def _parse_extra(ctx, extra):
302 | ctx.is_master = extra["is_master"]
303 | if ctx.is_master:
304 | ctx.master_queue = extra["master_queue"]
305 | ctx.worker_queues = extra["worker_queues"]
306 | ctx.worker_ids = extra["worker_ids"]
307 | else:
308 | ctx.master_queue = extra["master_queue"]
309 | ctx.worker_queue = extra["worker_queue"]
310 |
311 |
312 | inplace_abn = InPlaceABN.apply
313 | inplace_abn_sync = InPlaceABNSync.apply
314 |
315 | __all__ = ["inplace_abn", "inplace_abn_sync"]
316 |
--------------------------------------------------------------------------------
/graphs/ops/libs/misc.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 |
4 | class GlobalAvgPool2d(nn.Module):
5 | def __init__(self):
6 | """Global average pooling over the input's spatial dimensions"""
7 | super(GlobalAvgPool2d, self).__init__()
8 |
9 | def forward(self, inputs):
10 | in_size = inputs.size()
11 | return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2)
12 |
--------------------------------------------------------------------------------
/graphs/ops/libs/residual.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | import torch.nn as nn
4 |
5 | from .bn import ABN
6 |
7 |
8 | class IdentityResidualBlock(nn.Module):
9 | def __init__(self,
10 | in_channels,
11 | channels,
12 | stride=1,
13 | dilation=1,
14 | groups=1,
15 | norm_act=ABN,
16 | dropout=None):
17 | """Configurable identity-mapping residual block
18 |
19 | Parameters
20 | ----------
21 | in_channels : int
22 | Number of input channels.
23 | channels : list of int
24 | Number of channels in the internal feature maps. Can either have two or three elements: if three construct
25 | a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
26 | `3 x 3` then `1 x 1` convolutions.
27 | stride : int
28 | Stride of the first `3 x 3` convolution
29 | dilation : int
30 | Dilation to apply to the `3 x 3` convolutions.
31 | groups : int
32 | Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
33 | bottleneck blocks.
34 | norm_act : callable
35 | Function to create normalization / activation Module.
36 | dropout: callable
37 | Function to create Dropout Module.
38 | """
39 | super(IdentityResidualBlock, self).__init__()
40 |
41 | # Check parameters for inconsistencies
42 | if len(channels) != 2 and len(channels) != 3:
43 | raise ValueError("channels must contain either two or three values")
44 | if len(channels) == 2 and groups != 1:
45 | raise ValueError("groups > 1 are only valid if len(channels) == 3")
46 |
47 | is_bottleneck = len(channels) == 3
48 | need_proj_conv = stride != 1 or in_channels != channels[-1]
49 |
50 | self.bn1 = norm_act(in_channels)
51 | if not is_bottleneck:
52 | layers = [
53 | ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
54 | dilation=dilation)),
55 | ("bn2", norm_act(channels[0])),
56 | ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
57 | dilation=dilation))
58 | ]
59 | if dropout is not None:
60 | layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
61 | else:
62 | layers = [
63 | ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)),
64 | ("bn2", norm_act(channels[0])),
65 | ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
66 | groups=groups, dilation=dilation)),
67 | ("bn3", norm_act(channels[1])),
68 | ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False))
69 | ]
70 | if dropout is not None:
71 | layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
72 | self.convs = nn.Sequential(OrderedDict(layers))
73 |
74 | if need_proj_conv:
75 | self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
76 |
77 | def forward(self, x):
78 | if hasattr(self, "proj_conv"):
79 | bn1 = self.bn1(x)
80 | shortcut = self.proj_conv(bn1)
81 | else:
82 | shortcut = x.clone()
83 | bn1 = self.bn1(x)
84 |
85 | out = self.convs(bn1)
86 | out.add_(shortcut)
87 |
88 | return out
89 |
--------------------------------------------------------------------------------
/graphs/ops/libs/src/bn.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "common.h"
6 | #include "bn.h"
7 |
8 | /*
9 | * Device functions and data structures
10 | */
11 | struct Float2 {
12 | float v1, v2;
13 | __device__ Float2() {}
14 | __device__ Float2(float _v1, float _v2) : v1(_v1), v2(_v2) {}
15 | __device__ Float2(float v) : v1(v), v2(v) {}
16 | __device__ Float2(int v) : v1(v), v2(v) {}
17 | __device__ Float2 &operator+=(const Float2 &a) {
18 | v1 += a.v1;
19 | v2 += a.v2;
20 | return *this;
21 | }
22 | };
23 |
24 | struct SumOp {
25 | __device__ SumOp(const float *t, int c, int s)
26 | : tensor(t), C(c), S(s) {}
27 | __device__ __forceinline__ float operator()(int batch, int plane, int n) {
28 | return tensor[(batch * C + plane) * S + n];
29 | }
30 | const float *tensor;
31 | const int C;
32 | const int S;
33 | };
34 |
35 | struct VarOp {
36 | __device__ VarOp(float m, const float *t, int c, int s)
37 | : mean(m), tensor(t), C(c), S(s) {}
38 | __device__ __forceinline__ float operator()(int batch, int plane, int n) {
39 | float val = tensor[(batch * C + plane) * S + n];
40 | return (val - mean) * (val - mean);
41 | }
42 | const float mean;
43 | const float *tensor;
44 | const int C;
45 | const int S;
46 | };
47 |
48 | struct GradOp {
49 | __device__ GradOp(float _gamma, float _beta, const float *_z, const float *_dz, int c, int s)
50 | : gamma(_gamma), beta(_beta), z(_z), dz(_dz), C(c), S(s) {}
51 | __device__ __forceinline__ Float2 operator()(int batch, int plane, int n) {
52 | float _y = (z[(batch * C + plane) * S + n] - beta) / gamma;
53 | float _dz = dz[(batch * C + plane) * S + n];
54 | return Float2(_dz, _y * _dz);
55 | }
56 | const float gamma;
57 | const float beta;
58 | const float *z;
59 | const float *dz;
60 | const int C;
61 | const int S;
62 | };
63 |
64 | static __device__ __forceinline__ float warpSum(float val) {
65 | #if __CUDA_ARCH__ >= 300
66 | for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
67 | val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
68 | }
69 | #else
70 | __shared__ float values[MAX_BLOCK_SIZE];
71 | values[threadIdx.x] = val;
72 | __threadfence_block();
73 | const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
74 | for (int i = 1; i < WARP_SIZE; i++) {
75 | val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
76 | }
77 | #endif
78 | return val;
79 | }
80 |
81 | static __device__ __forceinline__ Float2 warpSum(Float2 value) {
82 | value.v1 = warpSum(value.v1);
83 | value.v2 = warpSum(value.v2);
84 | return value;
85 | }
86 |
87 | template
88 | __device__ T reduce(Op op, int plane, int N, int C, int S) {
89 | T sum = (T)0;
90 | for (int batch = 0; batch < N; ++batch) {
91 | for (int x = threadIdx.x; x < S; x += blockDim.x) {
92 | sum += op(batch, plane, x);
93 | }
94 | }
95 |
96 | // sum over NumThreads within a warp
97 | sum = warpSum(sum);
98 |
99 | // 'transpose', and reduce within warp again
100 | __shared__ T shared[32];
101 | __syncthreads();
102 | if (threadIdx.x % WARP_SIZE == 0) {
103 | shared[threadIdx.x / WARP_SIZE] = sum;
104 | }
105 | if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
106 | // zero out the other entries in shared
107 | shared[threadIdx.x] = (T)0;
108 | }
109 | __syncthreads();
110 | if (threadIdx.x / WARP_SIZE == 0) {
111 | sum = warpSum(shared[threadIdx.x]);
112 | if (threadIdx.x == 0) {
113 | shared[0] = sum;
114 | }
115 | }
116 | __syncthreads();
117 |
118 | // Everyone picks it up, should be broadcast into the whole gradInput
119 | return shared[0];
120 | }
121 |
122 | /*
123 | * Kernels
124 | */
125 | __global__ void mean_var_kernel(const float *x, float *mean, float *var, int N,
126 | int C, int S) {
127 | int plane = blockIdx.x;
128 | float norm = 1.f / (N * S);
129 |
130 | float _mean = reduce(SumOp(x, C, S), plane, N, C, S) * norm;
131 | __syncthreads();
132 | float _var = reduce(VarOp(_mean, x, C, S), plane, N, C, S) * norm;
133 |
134 | if (threadIdx.x == 0) {
135 | mean[plane] = _mean;
136 | var[plane] = _var;
137 | }
138 | }
139 |
140 | __global__ void forward_kernel(const float *x, const float *mean,
141 | const float *var, const float *weight,
142 | const float *bias, float *y, float *z, float eps,
143 | int N, int C, int S) {
144 | int plane = blockIdx.x;
145 |
146 | float _mean = mean[plane];
147 | float _var = var[plane];
148 | float invStd = 0;
149 | if (_var != 0.f || eps != 0.f) {
150 | invStd = 1 / sqrt(_var + eps);
151 | }
152 |
153 | float gamma = weight != 0 ? abs(weight[plane]) + eps : 1.f;
154 | float beta = bias != 0 ? bias[plane] : 0.f;
155 | for (int batch = 0; batch < N; ++batch) {
156 | for (int n = threadIdx.x; n < S; n += blockDim.x) {
157 | float _x = x[(batch * C + plane) * S + n];
158 | float _y = (_x - _mean) * invStd;
159 | float _z = _y * gamma + beta;
160 |
161 | y[(batch * C + plane) * S + n] = _y;
162 | z[(batch * C + plane) * S + n] = _z;
163 | }
164 | }
165 | }
166 |
167 | __global__ void edz_eydz_kernel(const float *z, const float *dz, const float *weight, const float *bias,
168 | float *edz, float *eydz, float eps, int N, int C, int S) {
169 | int plane = blockIdx.x;
170 | float norm = 1.f / (N * S);
171 |
172 | float gamma = weight != 0 ? abs(weight[plane]) + eps : 1.f;
173 | float beta = bias != 0 ? bias[plane] : 0.f;
174 |
175 | Float2 res = reduce(GradOp(gamma, beta, z, dz, C, S), plane, N, C, S);
176 | float _edz = res.v1 * norm;
177 | float _eydz = res.v2 * norm;
178 | __syncthreads();
179 |
180 | if (threadIdx.x == 0) {
181 | edz[plane] = _edz;
182 | eydz[plane] = _eydz;
183 | }
184 | }
185 |
186 | __global__ void backward_kernel(const float *dz, const float *z, const float *var, const float *weight,
187 | const float *bias, const float *edz, const float *eydz, float *dx, float *dweight,
188 | float *dbias, float eps, int N, int C, int S) {
189 | int plane = blockIdx.x;
190 | float _edz = edz[plane];
191 | float _eydz = eydz[plane];
192 |
193 | float gamma = weight != 0 ? abs(weight[plane]) + eps : 1.f;
194 | float beta = bias != 0 ? bias[plane] : 0.f;
195 |
196 | if (dx != 0) {
197 | float _var = var[plane];
198 | float invStd = 0;
199 | if (_var != 0.f || eps != 0.f) {
200 | invStd = 1 / sqrt(_var + eps);
201 | }
202 |
203 | float mul = gamma * invStd;
204 |
205 | for (int batch = 0; batch < N; ++batch) {
206 | for (int n = threadIdx.x; n < S; n += blockDim.x) {
207 | float _dz = dz[(batch * C + plane) * S + n];
208 | float _y = (z[(batch * C + plane) * S + n] - beta) / gamma;
209 | dx[(batch * C + plane) * S + n] = (_dz - _edz - _y * _eydz) * mul;
210 | }
211 | }
212 | }
213 |
214 | if (dweight != 0 || dbias != 0) {
215 | float norm = N * S;
216 |
217 | if (dweight != 0) {
218 | if (threadIdx.x == 0) {
219 | if (weight[plane] > 0)
220 | dweight[plane] += _eydz * norm;
221 | else if (weight[plane] < 0)
222 | dweight[plane] -= _eydz * norm;
223 | }
224 | }
225 |
226 | if (dbias != 0) {
227 | if (threadIdx.x == 0) {
228 | dbias[plane] += _edz * norm;
229 | }
230 | }
231 | }
232 | }
233 |
234 | /*
235 | * Implementations
236 | */
237 | extern "C" int _bn_mean_var_cuda(int N, int C, int S, const float *x, float *mean,
238 | float *var, cudaStream_t stream) {
239 | // Run kernel
240 | dim3 blocks(C);
241 | dim3 threads(getNumThreads(S));
242 | mean_var_kernel<<>>(x, mean, var, N, C, S);
243 |
244 | // Check for errors
245 | cudaError_t err = cudaGetLastError();
246 | if (err != cudaSuccess)
247 | return 0;
248 | else
249 | return 1;
250 | }
251 |
252 | extern "C" int _bn_forward_cuda(int N, int C, int S, const float *x,
253 | const float *mean, const float *var,
254 | const float *weight, const float *bias, float *y,
255 | float *z, float eps, cudaStream_t stream) {
256 | // Run kernel
257 | dim3 blocks(C);
258 | dim3 threads(getNumThreads(S));
259 | forward_kernel<<>>(x, mean, var, weight, bias, y,
260 | z, eps, N, C, S);
261 |
262 | // Check for errors
263 | cudaError_t err = cudaGetLastError();
264 | if (err != cudaSuccess)
265 | return 0;
266 | else
267 | return 1;
268 | }
269 |
270 | extern "C" int _bn_edz_eydz_cuda(int N, int C, int S, const float *z, const float *dz, const float *weight,
271 | const float *bias, float *edz, float *eydz, float eps, cudaStream_t stream) {
272 | // Run kernel
273 | dim3 blocks(C);
274 | dim3 threads(getNumThreads(S));
275 | edz_eydz_kernel<<>>(z, dz, weight, bias, edz, eydz, eps, N, C, S);
276 |
277 | // Check for errors
278 | cudaError_t err = cudaGetLastError();
279 | if (err != cudaSuccess)
280 | return 0;
281 | else
282 | return 1;
283 | }
284 |
285 | extern "C" int _bn_backward_cuda(int N, int C, int S, const float *dz, const float *z, const float *var,
286 | const float *weight, const float *bias, const float *edz, const float *eydz,
287 | float *dx, float *dweight, float *dbias, float eps, cudaStream_t stream) {
288 | // Run kernel
289 | dim3 blocks(C);
290 | dim3 threads(getNumThreads(S));
291 | backward_kernel<<>>(dz, z, var, weight, bias, edz, eydz, dx, dweight, dbias,
292 | eps, N, C, S);
293 |
294 | // Check for errors
295 | cudaError_t err = cudaGetLastError();
296 | if (err != cudaSuccess)
297 | return 0;
298 | else
299 | return 1;
300 | }
301 |
302 | extern "C" int _leaky_relu_cuda(int N, float *x, float slope, cudaStream_t stream) {
303 | // Run using thrust
304 | thrust::device_ptr th_x = thrust::device_pointer_cast(x);
305 | thrust::transform_if(thrust::cuda::par.on(stream), th_x, th_x + N, th_x,
306 | [slope] __device__ (const float& x) { return x * slope; },
307 | [] __device__ (const float& x) { return x < 0; });
308 |
309 | // Check for errors
310 | cudaError_t err = cudaGetLastError();
311 | if (err != cudaSuccess)
312 | return 0;
313 | else
314 | return 1;
315 | }
316 |
317 | extern "C" int _leaky_relu_backward_cuda(int N, const float *x, float *dx, float slope, cudaStream_t stream) {
318 | // Run using thrust
319 | thrust::device_ptr th_x = thrust::device_pointer_cast(x);
320 | thrust::device_ptr th_dx = thrust::device_pointer_cast(dx);
321 | thrust::transform_if(thrust::cuda::par.on(stream), th_dx, th_dx + N, th_x, th_dx,
322 | [slope] __device__ (const float& dx) { return dx * slope; },
323 | [] __device__ (const float& x) { return x < 0; });
324 |
325 | // Check for errors
326 | cudaError_t err = cudaGetLastError();
327 | if (err != cudaSuccess)
328 | return 0;
329 | else
330 | return 1;
331 | }
332 |
333 | extern "C" int _elu_cuda(int N, float *x, cudaStream_t stream) {
334 | // Run using thrust
335 | thrust::device_ptr th_x = thrust::device_pointer_cast(x);
336 | thrust::transform_if(thrust::cuda::par.on(stream), th_x, th_x + N, th_x,
337 | [] __device__ (const float& x) { return exp(x) - 1.f; },
338 | [] __device__ (const float& x) { return x < 0; });
339 |
340 | // Check for errors
341 | cudaError_t err = cudaGetLastError();
342 | if (err != cudaSuccess)
343 | return 0;
344 | else
345 | return 1;
346 | }
347 |
348 | extern "C" int _elu_backward_cuda(int N, const float *x, float *dx, cudaStream_t stream) {
349 | // Run using thrust
350 | thrust::device_ptr th_x = thrust::device_pointer_cast(x);
351 | thrust::device_ptr th_dx = thrust::device_pointer_cast(dx);
352 | thrust::transform_if(thrust::cuda::par.on(stream), th_dx, th_dx + N, th_x, th_x, th_dx,
353 | [] __device__ (const float& dx, const float& x) { return dx * (x + 1.f); },
354 | [] __device__ (const float& x) { return x < 0; });
355 |
356 | // Check for errors
357 | cudaError_t err = cudaGetLastError();
358 | if (err != cudaSuccess)
359 | return 0;
360 | else
361 | return 1;
362 | }
363 |
364 | extern "C" int _elu_inv_cuda(int N, float *x, cudaStream_t stream) {
365 | // Run using thrust
366 | thrust::device_ptr th_x = thrust::device_pointer_cast(x);
367 | thrust::transform_if(thrust::cuda::par.on(stream), th_x, th_x + N, th_x,
368 | [] __device__ (const float& x) { return log1p(x); },
369 | [] __device__ (const float& x) { return x < 0; });
370 |
371 | // Check for errors
372 | cudaError_t err = cudaGetLastError();
373 | if (err != cudaSuccess)
374 | return 0;
375 | else
376 | return 1;
377 | }
378 |
--------------------------------------------------------------------------------
/graphs/ops/libs/src/bn.h:
--------------------------------------------------------------------------------
1 | #ifndef __BN__
2 | #define __BN__
3 |
4 | /*
5 | * Exported functions
6 | */
7 | extern "C" int _bn_mean_var_cuda(int N, int C, int S, const float *x, float *mean, float *var, cudaStream_t);
8 | extern "C" int _bn_forward_cuda(int N, int C, int S, const float *x, const float *mean, const float *var,
9 | const float *weight, const float *bias, float *y, float *z, float eps, cudaStream_t);
10 | extern "C" int _bn_edz_eydz_cuda(int N, int C, int S, const float *z, const float *dz, const float *weight,
11 | const float *bias, float *edz, float *eydz, float eps, cudaStream_t stream);
12 | extern "C" int _bn_backward_cuda(int N, int C, int S, const float *dz, const float *z, const float *var,
13 | const float *weight, const float *bias, const float *edz, const float *eydz, float *dx,
14 | float *dweight, float *dbias, float eps, cudaStream_t stream);
15 | extern "C" int _leaky_relu_cuda(int N, float *x, float slope, cudaStream_t stream);
16 | extern "C" int _leaky_relu_backward_cuda(int N, const float *x, float *dx, float slope, cudaStream_t stream);
17 | extern "C" int _elu_cuda(int N, float *x, cudaStream_t stream);
18 | extern "C" int _elu_backward_cuda(int N, const float *x, float *dx, cudaStream_t stream);
19 | extern "C" int _elu_inv_cuda(int N, float *x, cudaStream_t stream);
20 |
21 | #endif
22 |
--------------------------------------------------------------------------------
/graphs/ops/libs/src/common.h:
--------------------------------------------------------------------------------
1 | #ifndef __COMMON__
2 | #define __COMMON__
3 | #include
4 |
5 | /*
6 | * General settings
7 | */
8 | const int WARP_SIZE = 32;
9 | const int MAX_BLOCK_SIZE = 512;
10 |
11 | /*
12 | * Utility functions
13 | */
14 | template
15 | __device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
16 | unsigned int mask = 0xffffffff) {
17 | #if CUDART_VERSION >= 9000
18 | return __shfl_xor_sync(mask, value, laneMask, width);
19 | #else
20 | return __shfl_xor(value, laneMask, width);
21 | #endif
22 | }
23 |
24 | __device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
25 |
26 | static int getNumThreads(int nElem) {
27 | int threadSizes[5] = {32, 64, 128, 256, MAX_BLOCK_SIZE};
28 | for (int i = 0; i != 5; ++i) {
29 | if (nElem <= threadSizes[i]) {
30 | return threadSizes[i];
31 | }
32 | }
33 | return MAX_BLOCK_SIZE;
34 | }
35 |
36 |
37 | #endif
--------------------------------------------------------------------------------
/graphs/ops/libs/src/lib_cffi.cpp:
--------------------------------------------------------------------------------
1 | // All functions assume that input and output tensors are already initialized
2 | // and have the correct dimensions
3 | #include
4 |
5 | // Forward definition of implementation functions
6 | extern "C" {
7 | int _bn_mean_var_cuda(int N, int C, int S, const float *x, float *mean, float *var, cudaStream_t);
8 | int _bn_forward_cuda(int N, int C, int S, const float *x, const float *mean, const float *var, const float *weight,
9 | const float *bias, float *y, float *z, float eps, cudaStream_t);
10 | int _bn_edz_eydz_cuda(int N, int C, int S, const float *z, const float *dz, const float *weight, const float *bias,
11 | float *edz, float *eydz, float eps, cudaStream_t stream);
12 | int _bn_backward_cuda(int N, int C, int S, const float *dz, const float *z, const float *var, const float *weight,
13 | const float *bias, const float *edz, const float *eydz, float *dx, float *dweight, float *dbias,
14 | float eps, cudaStream_t stream);
15 | int _leaky_relu_cuda(int N, float *x, float slope, cudaStream_t stream);
16 | int _leaky_relu_backward_cuda(int N, const float *x, float *dx, float slope, cudaStream_t stream);
17 | int _elu_cuda(int N, float *x, cudaStream_t stream);
18 | int _elu_backward_cuda(int N, const float *x, float *dx, cudaStream_t stream);
19 | int _elu_inv_cuda(int N, float *x, cudaStream_t stream);
20 | }
21 |
22 | extern THCState *state;
23 |
24 | void get_sizes(const THCudaTensor *t, int *N, int *C, int *S){
25 | // Get sizes
26 | *S = 1;
27 | *N = THCudaTensor_size(state, t, 0);
28 | *C = THCudaTensor_size(state, t, 1);
29 | if (THCudaTensor_nDimension(state, t) > 2) {
30 | for (int i = 2; i < THCudaTensor_nDimension(state, t); ++i) {
31 | *S *= THCudaTensor_size(state, t, i);
32 | }
33 | }
34 | }
35 |
36 | extern "C" int bn_mean_var_cuda(const THCudaTensor *x, THCudaTensor *mean, THCudaTensor *var) {
37 | cudaStream_t stream = THCState_getCurrentStream(state);
38 |
39 | int S, N, C;
40 | get_sizes(x, &N, &C, &S);
41 |
42 | // Get pointers
43 | const float *x_data = THCudaTensor_data(state, x);
44 | float *mean_data = THCudaTensor_data(state, mean);
45 | float *var_data = THCudaTensor_data(state, var);
46 |
47 | return _bn_mean_var_cuda(N, C, S, x_data, mean_data, var_data, stream);
48 | }
49 |
50 | extern "C" int bn_forward_cuda(const THCudaTensor *x, const THCudaTensor *mean, const THCudaTensor *var,
51 | const THCudaTensor *weight, const THCudaTensor *bias, THCudaTensor *y, THCudaTensor *z,
52 | float eps) {
53 | cudaStream_t stream = THCState_getCurrentStream(state);
54 |
55 | int S, N, C;
56 | get_sizes(x, &N, &C, &S);
57 |
58 | // Get pointers
59 | const float *x_data = THCudaTensor_data(state, x);
60 | const float *mean_data = THCudaTensor_data(state, mean);
61 | const float *var_data = THCudaTensor_data(state, var);
62 | const float *weight_data = THCudaTensor_nDimension(state, weight) != 0 ? THCudaTensor_data(state, weight) : 0;
63 | const float *bias_data = THCudaTensor_nDimension(state, bias) != 0 ? THCudaTensor_data(state, bias) : 0;
64 | float *y_data = THCudaTensor_data(state, y);
65 | float *z_data = THCudaTensor_data(state, z);
66 |
67 | return _bn_forward_cuda(N, C, S, x_data, mean_data, var_data, weight_data, bias_data, y_data, z_data, eps, stream);
68 | }
69 |
70 | extern "C" int bn_edz_eydz_cuda(const THCudaTensor *z, const THCudaTensor *dz, const THCudaTensor *weight,
71 | const THCudaTensor *bias, THCudaTensor *edz, THCudaTensor *eydz, float eps) {
72 | cudaStream_t stream = THCState_getCurrentStream(state);
73 |
74 | int S, N, C;
75 | get_sizes(z, &N, &C, &S);
76 |
77 | // Get pointers
78 | const float *z_data = THCudaTensor_data(state, z);
79 | const float *dz_data = THCudaTensor_data(state, dz);
80 | const float *weight_data = THCudaTensor_nDimension(state, weight) != 0 ? THCudaTensor_data(state, weight) : 0;
81 | const float *bias_data = THCudaTensor_nDimension(state, bias) != 0 ? THCudaTensor_data(state, bias) : 0;
82 | float *edz_data = THCudaTensor_data(state, edz);
83 | float *eydz_data = THCudaTensor_data(state, eydz);
84 |
85 | return _bn_edz_eydz_cuda(N, C, S, z_data, dz_data, weight_data, bias_data, edz_data, eydz_data, eps, stream);
86 | }
87 |
88 | extern "C" int bn_backard_cuda(const THCudaTensor *dz, const THCudaTensor *z, const THCudaTensor *var,
89 | const THCudaTensor *weight, const THCudaTensor *bias, const THCudaTensor *edz,
90 | const THCudaTensor *eydz, THCudaTensor *dx, THCudaTensor *dweight,
91 | THCudaTensor *dbias, float eps) {
92 | cudaStream_t stream = THCState_getCurrentStream(state);
93 |
94 | int S, N, C;
95 | get_sizes(dz, &N, &C, &S);
96 |
97 | // Get pointers
98 | const float *dz_data = THCudaTensor_data(state, dz);
99 | const float *z_data = THCudaTensor_data(state, z);
100 | const float *var_data = THCudaTensor_data(state, var);
101 | const float *weight_data = THCudaTensor_nDimension(state, weight) != 0 ? THCudaTensor_data(state, weight) : 0;
102 | const float *bias_data = THCudaTensor_nDimension(state, bias) != 0 ? THCudaTensor_data(state, bias) : 0;
103 | const float *edz_data = THCudaTensor_data(state, edz);
104 | const float *eydz_data = THCudaTensor_data(state, eydz);
105 | float *dx_data = THCudaTensor_nDimension(state, dx) != 0 ? THCudaTensor_data(state, dx) : 0;
106 | float *dweight_data = THCudaTensor_nDimension(state, dweight) != 0 ? THCudaTensor_data(state, dweight) : 0;
107 | float *dbias_data = THCudaTensor_nDimension(state, dbias) != 0 ? THCudaTensor_data(state, dbias) : 0;
108 |
109 | return _bn_backward_cuda(N, C, S, dz_data, z_data, var_data, weight_data, bias_data, edz_data, eydz_data, dx_data,
110 | dweight_data, dbias_data, eps, stream);
111 | }
112 |
113 | extern "C" int leaky_relu_cuda(THCudaTensor *x, float slope) {
114 | cudaStream_t stream = THCState_getCurrentStream(state);
115 |
116 | int N = THCudaTensor_nElement(state, x);
117 |
118 | // Get pointers
119 | float *x_data = THCudaTensor_data(state, x);
120 |
121 | return _leaky_relu_cuda(N, x_data, slope, stream);
122 | }
123 |
124 | extern "C" int leaky_relu_backward_cuda(const THCudaTensor *x, THCudaTensor *dx, float slope) {
125 | cudaStream_t stream = THCState_getCurrentStream(state);
126 |
127 | int N = THCudaTensor_nElement(state, x);
128 |
129 | // Get pointers
130 | const float *x_data = THCudaTensor_data(state, x);
131 | float *dx_data = THCudaTensor_data(state, dx);
132 |
133 | return _leaky_relu_backward_cuda(N, x_data, dx_data, slope, stream);
134 | }
135 |
136 | extern "C" int elu_cuda(THCudaTensor *x) {
137 | cudaStream_t stream = THCState_getCurrentStream(state);
138 |
139 | int N = THCudaTensor_nElement(state, x);
140 |
141 | // Get pointers
142 | float *x_data = THCudaTensor_data(state, x);
143 |
144 | return _elu_cuda(N, x_data, stream);
145 | }
146 |
147 | extern "C" int elu_backward_cuda(const THCudaTensor *x, THCudaTensor *dx) {
148 | cudaStream_t stream = THCState_getCurrentStream(state);
149 |
150 | int N = THCudaTensor_nElement(state, x);
151 |
152 | // Get pointers
153 | const float *x_data = THCudaTensor_data(state, x);
154 | float *dx_data = THCudaTensor_data(state, dx);
155 |
156 | return _elu_backward_cuda(N, x_data, dx_data, stream);
157 | }
158 |
159 | extern "C" int elu_inv_cuda(THCudaTensor *x) {
160 | cudaStream_t stream = THCState_getCurrentStream(state);
161 |
162 | int N = THCudaTensor_nElement(state, x);
163 |
164 | // Get pointers
165 | float *x_data = THCudaTensor_data(state, x);
166 |
167 | return _elu_inv_cuda(N, x_data, stream);
168 | }
169 |
--------------------------------------------------------------------------------
/graphs/ops/libs/src/lib_cffi.h:
--------------------------------------------------------------------------------
1 | int bn_mean_var_cuda(const THCudaTensor *x, THCudaTensor *mean, THCudaTensor *var);
2 | int bn_forward_cuda(const THCudaTensor *x, const THCudaTensor *mean, const THCudaTensor *var,
3 | const THCudaTensor *weight, const THCudaTensor *bias, THCudaTensor *y, THCudaTensor *z,
4 | float eps);
5 | int bn_edz_eydz_cuda(const THCudaTensor *z, const THCudaTensor *dz, const THCudaTensor *weight,
6 | const THCudaTensor *bias, THCudaTensor *edz, THCudaTensor *eydz, float eps);
7 | int bn_backard_cuda(const THCudaTensor *dz, const THCudaTensor *z, const THCudaTensor *var,
8 | const THCudaTensor *weight, const THCudaTensor *bias, const THCudaTensor *edz,
9 | const THCudaTensor *eydz, THCudaTensor *dx, THCudaTensor *dweight, THCudaTensor *dbias,
10 | float eps);
11 | int leaky_relu_cuda(THCudaTensor *x, float slope);
12 | int leaky_relu_backward_cuda(const THCudaTensor *x, THCudaTensor *dx, float slope);
13 | int elu_cuda(THCudaTensor *x);
14 | int elu_backward_cuda(const THCudaTensor *x, THCudaTensor *dx);
15 | int elu_inv_cuda(THCudaTensor *x);
--------------------------------------------------------------------------------
/graphs/ops/make.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | nvcc -c -o deform_conv_cuda_kernel.cu.so deform_conv_cuda_kernel.cu -x cu -Xcompiler -fPIC -std=c++11
3 | nvcc -c -o scale_conv_cuda_kernel.cu.so scale_conv_cuda_kernel.cu -x cu -Xcompiler -fPIC -std=c++11
4 | cd cuda
5 |
6 | # compile modulated deform conv
7 | nvcc -c -o modulated_deform_im2col_cuda.cu.so modulated_deform_im2col_cuda.cu -x cu -Xcompiler -fPIC
8 |
9 | # compile deform-psroi-pooling
10 | nvcc -c -o deform_psroi_pooling_cuda.cu.so deform_psroi_pooling_cuda.cu -x cu -Xcompiler -fPIC
11 |
12 | cd ../..
13 | CC=g++ python build.py
14 | python build_modulated.py
15 |
--------------------------------------------------------------------------------
/graphs/ops/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .deform_conv import DeformConv
2 | from .s_conv import SConv
3 |
--------------------------------------------------------------------------------
/graphs/ops/modules/deform_conv.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import torch
4 | import torch.nn as nn
5 | from torch.nn.modules.module import Module
6 | from torch.nn.modules.utils import _pair
7 | from graphs.ops.functions import deform_conv_function
8 |
9 |
10 | class DeformConv(Module):
11 | def __init__(self,
12 | in_channels,
13 | out_channels,
14 | kernel_size,
15 | stride=1,
16 | padding=0,
17 | dilation=1,
18 | num_deformable_groups=1):
19 | super(DeformConv, self).__init__()
20 | self.in_channels = in_channels
21 | self.out_channels = out_channels
22 | self.kernel_size = _pair(kernel_size)
23 | self.stride = _pair(stride)
24 | self.padding = _pair(padding)
25 | self.dilation = _pair(dilation)
26 | self.num_deformable_groups = num_deformable_groups
27 |
28 | self.weight = nn.Parameter(
29 | torch.Tensor(out_channels, in_channels, *self.kernel_size))
30 |
31 | self.reset_parameters()
32 |
33 | def reset_parameters(self):
34 | n = self.in_channels
35 | for k in self.kernel_size:
36 | n *= k
37 | stdv = 1. / math.sqrt(n)
38 | self.weight.data.uniform_(-stdv, stdv)
39 |
40 | def forward(self, input, offset):
41 | return deform_conv_function(input, offset, self.weight, self.stride,
42 | self.padding, self.dilation,
43 | self.num_deformable_groups)
44 |
--------------------------------------------------------------------------------
/graphs/ops/src/cuda/deform_psroi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | /*!
2 | * Copyright (c) 2017 Microsoft
3 | * Licensed under The MIT License [see LICENSE for details]
4 | * \file deformable_psroi_pooling.cu
5 | * \brief
6 | * \author Yi Li, Guodong Zhang, Jifeng Dai
7 | */
8 | /***************** Adapted by Charles Shang *********************/
9 |
10 | #ifndef DCN_V2_PSROI_POOLING_CUDA
11 | #define DCN_V2_PSROI_POOLING_CUDA
12 |
13 | #ifdef __cplusplus
14 | extern "C"
15 | {
16 | #endif
17 |
18 | void DeformablePSROIPoolForward(cudaStream_t stream,
19 | const float *data,
20 | const float *bbox,
21 | const float *trans,
22 | float *out,
23 | float *top_count,
24 | const int batch,
25 | const int channels,
26 | const int height,
27 | const int width,
28 | const int num_bbox,
29 | const int channels_trans,
30 | const int no_trans,
31 | const float spatial_scale,
32 | const int output_dim,
33 | const int group_size,
34 | const int pooled_size,
35 | const int part_size,
36 | const int sample_per_part,
37 | const float trans_std);
38 |
39 | void DeformablePSROIPoolBackwardAcc(cudaStream_t stream,
40 | const float *out_grad,
41 | const float *data,
42 | const float *bbox,
43 | const float *trans,
44 | const float *top_count,
45 | float *in_grad,
46 | float *trans_grad,
47 | const int batch,
48 | const int channels,
49 | const int height,
50 | const int width,
51 | const int num_bbox,
52 | const int channels_trans,
53 | const int no_trans,
54 | const float spatial_scale,
55 | const int output_dim,
56 | const int group_size,
57 | const int pooled_size,
58 | const int part_size,
59 | const int sample_per_part,
60 | const float trans_std);
61 |
62 | #ifdef __cplusplus
63 | }
64 | #endif
65 |
66 | #endif
--------------------------------------------------------------------------------
/graphs/ops/src/cuda/modulated_deform_im2col_cuda.h:
--------------------------------------------------------------------------------
1 | /*!
2 | ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
3 | *
4 | * COPYRIGHT
5 | *
6 | * All contributions by the University of California:
7 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
8 | * All rights reserved.
9 | *
10 | * All other contributions:
11 | * Copyright (c) 2014-2017, the respective contributors
12 | * All rights reserved.
13 | *
14 | * Caffe uses a shared copyright model: each contributor holds copyright over
15 | * their contributions to Caffe. The project versioning records all such
16 | * contribution and copyright details. If a contributor wants to further mark
17 | * their specific copyright on a particular contribution, they should indicate
18 | * their copyright solely in the commit message of the change when it is
19 | * committed.
20 | *
21 | * LICENSE
22 | *
23 | * Redistribution and use in source and binary forms, with or without
24 | * modification, are permitted provided that the following conditions are met:
25 | *
26 | * 1. Redistributions of source code must retain the above copyright notice, this
27 | * list of conditions and the following disclaimer.
28 | * 2. Redistributions in binary form must reproduce the above copyright notice,
29 | * this list of conditions and the following disclaimer in the documentation
30 | * and/or other materials provided with the distribution.
31 | *
32 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
33 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
35 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
36 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
37 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
39 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
41 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 | *
43 | * CONTRIBUTION AGREEMENT
44 | *
45 | * By contributing to the BVLC/caffe repository through pull-request, comment,
46 | * or otherwise, the contributor releases their content to the
47 | * license and copyright terms herein.
48 | *
49 | ***************** END Caffe Copyright Notice and Disclaimer ********************
50 | *
51 | * Copyright (c) 2018 Microsoft
52 | * Licensed under The MIT License [see LICENSE for details]
53 | * \file modulated_deformable_im2col.h
54 | * \brief Function definitions of converting an image to
55 | * column matrix based on kernel, padding, dilation, and offset.
56 | * These functions are mainly used in deformable convolution operators.
57 | * \ref: https://arxiv.org/abs/1811.11168
58 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
59 | */
60 |
61 | /***************** Adapted by Charles Shang *********************/
62 |
63 | #ifndef DCN_V2_IM2COL_CUDA
64 | #define DCN_V2_IM2COL_CUDA
65 |
66 | #ifdef __cplusplus
67 | extern "C"
68 | {
69 | #endif
70 |
71 | void modulated_deformable_im2col_cuda(cudaStream_t stream,
72 | const float *data_im, const float *data_offset, const float *data_mask,
73 | const int batch_size, const int channels, const int height_im, const int width_im,
74 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
75 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
76 | const int dilation_h, const int dilation_w,
77 | const int deformable_group, float *data_col);
78 |
79 | void modulated_deformable_col2im_cuda(cudaStream_t stream,
80 | const float *data_col, const float *data_offset, const float *data_mask,
81 | const int batch_size, const int channels, const int height_im, const int width_im,
82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
84 | const int dilation_h, const int dilation_w,
85 | const int deformable_group, float *grad_im);
86 |
87 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
88 | const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
89 | const int batch_size, const int channels, const int height_im, const int width_im,
90 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
91 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
92 | const int dilation_h, const int dilation_w,
93 | const int deformable_group,
94 | float *grad_offset, float *grad_mask);
95 |
96 | #ifdef __cplusplus
97 | }
98 | #endif
99 |
100 | #endif
--------------------------------------------------------------------------------
/graphs/ops/src/deform_conv.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | int deform_conv_forward(THFloatTensor *input, THFloatTensor *offset,
4 | THFloatTensor *output)
5 | {
6 | // if (!THFloatTensor_isSameSizeAs(input1, input2))
7 | // return 0;
8 | // THFloatTensor_resizeAs(output, input);
9 | // THFloatTensor_cadd(output, input1, 1.0, input2);
10 | return 1;
11 | }
12 |
13 | int deform_conv_backward(THFloatTensor *grad_output, THFloatTensor *grad_input,
14 | THFloatTensor *grad_offset)
15 | {
16 | // THFloatTensor_resizeAs(grad_input, grad_output);
17 | // THFloatTensor_fill(grad_input, 1);
18 | return 1;
19 | }
20 |
--------------------------------------------------------------------------------
/graphs/ops/src/deform_conv.h:
--------------------------------------------------------------------------------
1 | int deform_conv_forward(THFloatTensor *input, THFloatTensor *offset,
2 | THFloatTensor *output);
3 | int deform_conv_backward(THFloatTensor *grad_output, THFloatTensor *grad_input,
4 | THFloatTensor *grad_offset);
5 |
--------------------------------------------------------------------------------
/graphs/ops/src/deform_conv_cuda.h:
--------------------------------------------------------------------------------
1 | int deform_conv_forward_cuda(THCudaTensor *input,
2 | THCudaTensor *weight, /*THCudaTensor * bias, */
3 | THCudaTensor *offset, THCudaTensor *output,
4 | THCudaTensor *columns, THCudaTensor *ones, int kW,
5 | int kH, int dW, int dH, int padW, int padH,
6 | int dilationW, int dilationH,
7 | int deformable_group, int im2col_step);
8 |
9 | int deform_conv_backward_input_cuda(
10 | THCudaTensor *input, THCudaTensor *offset, THCudaTensor *gradOutput,
11 | THCudaTensor *gradInput, THCudaTensor *gradOffset, THCudaTensor *weight,
12 | THCudaTensor *columns, int kW, int kH, int dW, int dH, int padW, int padH,
13 | int dilationW, int dilationH, int deformable_group, int im2col_step);
14 |
15 | int deform_conv_backward_parameters_cuda(
16 | THCudaTensor *input, THCudaTensor *offset, THCudaTensor *gradOutput,
17 | THCudaTensor *gradWeight, /*THCudaTensor *gradBias, */
18 | THCudaTensor *columns, THCudaTensor *ones, int kW, int kH, int dW, int dH,
19 | int padW, int padH, int dilationW, int dilationH, int deformable_group,
20 | float scale, int im2col_step);
21 |
--------------------------------------------------------------------------------
/graphs/ops/src/deform_conv_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | template
2 | void deformable_im2col(cudaStream_t stream, const DType *data_im,
3 | const DType *data_offset, const int channels,
4 | const int height, const int width, const int ksize_h,
5 | const int ksize_w, const int pad_h, const int pad_w,
6 | const int stride_h, const int stride_w,
7 | const int dilation_h, const int dilation_w,
8 | const int parallel_imgs,
9 | const int deformable_group, DType *data_col);
10 |
11 | template
12 | void deformable_col2im(cudaStream_t stream, const DType *data_col,
13 | const DType *data_offset, const int channels,
14 | const int height, const int width, const int ksize_h,
15 | const int ksize_w, const int pad_h, const int pad_w,
16 | const int stride_h, const int stride_w,
17 | const int dilation_h, const int dilation_w,
18 | const int parallel_imgs,
19 | const int deformable_group, DType *grad_im);
20 |
21 | template
22 | void deformable_col2im_coord(cudaStream_t stream, const DType *data_col,
23 | const DType *data_im, const DType *data_offset,
24 | const int channels, const int height,
25 | const int width, const int ksize_h,
26 | const int ksize_w, const int pad_h,
27 | const int pad_w, const int stride_h,
28 | const int stride_w, const int dilation_h,
29 | const int dilation_w, const int parallel_imgs,
30 | const int deformable_group, DType *grad_offset);
31 |
--------------------------------------------------------------------------------
/graphs/ops/src/modulated_dcn.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | void modulated_deform_conv_forward(THFloatTensor *input, THFloatTensor *weight,
6 | THFloatTensor *bias, THFloatTensor *ones,
7 | THFloatTensor *offset, THFloatTensor *mask,
8 | THFloatTensor *output, THFloatTensor *columns,
9 | const int pad_h, const int pad_w,
10 | const int stride_h, const int stride_w,
11 | const int dilation_h, const int dilation_w,
12 | const int deformable_group)
13 | {
14 | printf("only implemented in GPU");
15 | }
16 | void modulated_deform_conv_backward(THFloatTensor *input, THFloatTensor *weight,
17 | THFloatTensor *bias, THFloatTensor *ones,
18 | THFloatTensor *offset, THFloatTensor *mask,
19 | THFloatTensor *output, THFloatTensor *columns,
20 | THFloatTensor *grad_input, THFloatTensor *grad_weight,
21 | THFloatTensor *grad_bias, THFloatTensor *grad_offset,
22 | THFloatTensor *grad_mask, THFloatTensor *grad_output,
23 | int kernel_h, int kernel_w,
24 | int stride_h, int stride_w,
25 | int pad_h, int pad_w,
26 | int dilation_h, int dilation_w,
27 | int deformable_group)
28 | {
29 | printf("only implemented in GPU");
30 | }
--------------------------------------------------------------------------------
/graphs/ops/src/modulated_dcn.h:
--------------------------------------------------------------------------------
1 | void modulated_deform_conv_forward(THFloatTensor *input, THFloatTensor *weight,
2 | THFloatTensor *bias, THFloatTensor *ones,
3 | THFloatTensor *offset, THFloatTensor *mask,
4 | THFloatTensor *output, THFloatTensor *columns,
5 | const int pad_h, const int pad_w,
6 | const int stride_h, const int stride_w,
7 | const int dilation_h, const int dilation_w,
8 | const int deformable_group);
9 | void modulated_deform_conv_backward(THFloatTensor *input, THFloatTensor *weight,
10 | THFloatTensor *bias, THFloatTensor *ones,
11 | THFloatTensor *offset, THFloatTensor *mask,
12 | THFloatTensor *output, THFloatTensor *columns,
13 | THFloatTensor *grad_input, THFloatTensor *grad_weight,
14 | THFloatTensor *grad_bias, THFloatTensor *grad_offset,
15 | THFloatTensor *grad_mask, THFloatTensor *grad_output,
16 | int kernel_h, int kernel_w,
17 | int stride_h, int stride_w,
18 | int pad_h, int pad_w,
19 | int dilation_h, int dilation_w,
20 | int deformable_group);
--------------------------------------------------------------------------------
/graphs/ops/src/modulated_dcn_cuda.h:
--------------------------------------------------------------------------------
1 | // #ifndef DCN_V2_CUDA
2 | // #define DCN_V2_CUDA
3 |
4 | // #ifdef __cplusplus
5 | // extern "C"
6 | // {
7 | // #endif
8 |
9 | void modulated_deform_conv_cuda_forward(THCudaTensor *input, THCudaTensor *weight,
10 | THCudaTensor *bias, THCudaTensor *ones,
11 | THCudaTensor *offset, THCudaTensor *mask,
12 | THCudaTensor *output, THCudaTensor *columns,
13 | int kernel_h, int kernel_w,
14 | const int stride_h, const int stride_w,
15 | const int pad_h, const int pad_w,
16 | const int dilation_h, const int dilation_w,
17 | const int deformable_group);
18 | void modulated_deform_conv_cuda_backward(THCudaTensor *input, THCudaTensor *weight,
19 | THCudaTensor *bias, THCudaTensor *ones,
20 | THCudaTensor *offset, THCudaTensor *mask,
21 | THCudaTensor *columns,
22 | THCudaTensor *grad_input, THCudaTensor *grad_weight,
23 | THCudaTensor *grad_bias, THCudaTensor *grad_offset,
24 | THCudaTensor *grad_mask, THCudaTensor *grad_output,
25 | int kernel_h, int kernel_w,
26 | int stride_h, int stride_w,
27 | int pad_h, int pad_w,
28 | int dilation_h, int dilation_w,
29 | int deformable_group);
30 |
31 | void deform_psroi_pooling_cuda_forward(THCudaTensor * input, THCudaTensor * bbox,
32 | THCudaTensor * trans,
33 | THCudaTensor * out, THCudaTensor * top_count,
34 | const int no_trans,
35 | const float spatial_scale,
36 | const int output_dim,
37 | const int group_size,
38 | const int pooled_size,
39 | const int part_size,
40 | const int sample_per_part,
41 | const float trans_std);
42 |
43 | void deform_psroi_pooling_cuda_backward(THCudaTensor * out_grad,
44 | THCudaTensor * input, THCudaTensor * bbox,
45 | THCudaTensor * trans, THCudaTensor * top_count,
46 | THCudaTensor * input_grad, THCudaTensor * trans_grad,
47 | const int no_trans,
48 | const float spatial_scale,
49 | const int output_dim,
50 | const int group_size,
51 | const int pooled_size,
52 | const int part_size,
53 | const int sample_per_part,
54 | const float trans_std);
55 |
--------------------------------------------------------------------------------
/graphs/ops/src/scale_conv.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | int scale_conv_forward(THFloatTensor *input, THFloatTensor *scale,
4 | THFloatTensor *output)
5 | {
6 | // if (!THFloatTensor_isSameSizeAs(input1, input2))
7 | // return 0;
8 | // THFloatTensor_resizeAs(output, input);
9 | // THFloatTensor_cadd(output, input1, 1.0, input2);
10 | return 1;
11 | }
12 |
13 | int scale_conv_backward(THFloatTensor *grad_output, THFloatTensor *grad_input,
14 | THFloatTensor *grad_scale)
15 | {
16 | // THFloatTensor_resizeAs(grad_input, grad_output);
17 | // THFloatTensor_fill(grad_input, 1);
18 | return 1;
19 | }
--------------------------------------------------------------------------------
/graphs/ops/src/scale_conv.h:
--------------------------------------------------------------------------------
1 | int scale_conv_forward(THFloatTensor *input, THFloatTensor *scale,
2 | THFloatTensor *output);
3 | int scale_conv_backward(THFloatTensor *grad_output, THFloatTensor *grad_input,
4 | THFloatTensor *grad_scale);
--------------------------------------------------------------------------------
/graphs/ops/src/scale_conv_cuda.h:
--------------------------------------------------------------------------------
1 | int scale_conv_forward_cuda(THCudaTensor *input,
2 | THCudaTensor *weight, /*THCudaTensor * bias, */
3 | THCudaTensor *scale, THCudaTensor *output,
4 | THCudaTensor *columns, THCudaTensor *ones, int kW,
5 | int kH, int dW, int dH, int padW, int padH,
6 | int dilationW, int dilationH,
7 | int deformable_group, int im2col_step);
8 |
9 | int scale_conv_backward_input_cuda(
10 | THCudaTensor *input, THCudaTensor *scale, THCudaTensor *gradOutput,
11 | THCudaTensor *gradInput, THCudaTensor *gradScale, THCudaTensor *weight,
12 | THCudaTensor *columns, int kW, int kH, int dW, int dH, int padW, int padH,
13 | int dilationW, int dilationH, int deformable_group, int im2col_step);
14 |
15 | int scale_conv_backward_parameters_cuda(
16 | THCudaTensor *input, THCudaTensor *scale, THCudaTensor *gradOutput,
17 | THCudaTensor *gradWeight, /*THCudaTensor *gradBias, */
18 | THCudaTensor *columns, THCudaTensor *ones, int kW, int kH, int dW, int dH,
19 | int padW, int padH, int dilationW, int dilationH, int deformable_group,
20 | float scale_, int im2col_step);
21 |
--------------------------------------------------------------------------------
/graphs/ops/src/scale_conv_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef SCALE_CONV_CUDA_KERNEL_H_
2 | #define SCALE_CONV_CUDA_KERNEL_H_
3 | void scale_im2col(cudaStream_t stream, const float *data_im,
4 | const float *data_scale, const int channels,
5 | const int height, const int width, const int ksize_h,
6 | const int ksize_w, const int pad_h, const int pad_w,
7 | const int stride_h, const int stride_w,
8 | const int dilation_h, const int dilation_w,
9 | const int parallel_imgs,
10 | const int deformable_group, float *data_col);
11 |
12 | void scale_col2im(cudaStream_t stream, const float *data_col,
13 | const float *data_scale, const int channels,
14 | const int height, const int width, const int ksize_h,
15 | const int ksize_w, const int pad_h, const int pad_w,
16 | const int stride_h, const int stride_w,
17 | const int dilation_h, const int dilation_w,
18 | const int parallel_imgs,
19 | const int deformable_group, float *grad_im);
20 |
21 | void scale_col2im_coord(cudaStream_t stream, const float *data_col,
22 | const float *data_im, const float *data_scale,
23 | const int channels, const int height,
24 | const int width, const int ksize_h,
25 | const int ksize_w, const int pad_h,
26 | const int pad_w, const int stride_h,
27 | const int stride_w, const int dilation_h,
28 | const int dilation_w, const int parallel_imgs,
29 | const int deformable_group, float *grad_scale);
30 | #endif
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | """
2 | __author__ = "Hager Rady and Mo'men AbdelRazek"
3 |
4 | Main
5 | -Capture the config file
6 | -Process the json config passed
7 | -Create an agent instance
8 | -Run the agent
9 | """
10 |
11 | import argparse
12 | from utils.config import *
13 | from agents import *
14 |
15 |
16 | def main():
17 |
18 | # parse the path of the json config file
19 | arg_parser = argparse.ArgumentParser(description="")
20 | arg_parser.add_argument(
21 | 'config',
22 | metavar='config_json_file',
23 | default='None',
24 | help='The Configuration file in json format')
25 | args = arg_parser.parse_args()
26 |
27 | # parse the config json file
28 | config = process_config(args.config)
29 |
30 | # Create the Agent and pass all the configuration to it then run it..
31 | agent_class = globals()[config.agent]
32 | agent = agent_class(config)
33 | agent.run()
34 | agent.finalize()
35 |
36 |
37 | if __name__ == '__main__':
38 | main()
39 |
--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python main.py ./configs/scnet_os16_depth.json
3 | python main.py ./configs/scnet_os16_depth.json
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | path = os.path.dirname(os.path.abspath(__file__))
5 |
6 | for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']:
7 | mod = __import__('.'.join([__name__, py]), fromlist=[py])
8 | classes = [getattr(mod, x) for x in dir(mod) if isinstance(getattr(mod, x), type)]
9 | for cls in classes:
10 | setattr(sys.modules[__name__], cls.__name__, cls)
--------------------------------------------------------------------------------
/utils/assets/class_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LinZhuoChen/SGNet/02510182eb4baca77dd1d99237a5e77812055a0c/utils/assets/class_diagram.png
--------------------------------------------------------------------------------
/utils/config.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path as osp
3 |
4 | import logging
5 | from logging import Formatter
6 | from logging.handlers import RotatingFileHandler
7 |
8 | import json
9 | from easydict import EasyDict
10 | from pprint import pprint
11 |
12 | from utils.dirs import create_dirs
13 |
14 | def get_config_from_json(json_file):
15 | """
16 | Get the config from a json file
17 | :param json_file: the path of the config file
18 | :return: config(namespace), config(dictionary)
19 | """
20 |
21 | # parse the configurations from the config json file provided
22 | with open(json_file, 'r') as config_file:
23 | try:
24 | config_dict = json.load(config_file)
25 | # EasyDict allows to access dict values as attributes (works recursively).
26 | config = EasyDict(config_dict)
27 | return config, config_dict
28 | except ValueError:
29 | print("INVALID JSON file format.. Please provide a good json file")
30 | exit(-1)
31 |
32 |
33 | def process_config(json_file):
34 | """
35 | Get the json file
36 | Processing it with EasyDict to be accessible as attributes
37 | then editing the path of the experiments folder
38 | creating some important directories in the experiment folder
39 | Then setup the logging in the whole program
40 | Then return the config
41 | :param json_file: the path of the config file
42 | :return: config object(namespace)
43 | """
44 | config, _ = get_config_from_json(json_file)
45 | print(" THE Configuration of your experiment ..")
46 | pprint(config)
47 |
48 | # making sure that you have provided the exp_name.
49 | try:
50 | print(" *************************************** ")
51 | print("The experiment name is {}".format(config.exp_name))
52 | print(" *************************************** ")
53 | except AttributeError:
54 | print("ERROR!!..Please provide the exp_name in json file..")
55 | exit(-1)
56 |
57 | # create some important directories to be used for that experiment.
58 | create_dirs([config.snapshot_dir])
59 |
60 | # setup logging in the project
61 |
62 |
63 | return config
64 |
--------------------------------------------------------------------------------
/utils/dirs.py:
--------------------------------------------------------------------------------
1 | import os
2 | import logging
3 |
4 |
5 | def create_dirs(dirs):
6 | """
7 | dirs - a list of directories to create if these directories are not found
8 | :param dirs:
9 | :return:
10 | """
11 | try:
12 | for dir_ in dirs:
13 | if not os.path.exists(dir_):
14 | os.makedirs(dir_)
15 | except Exception as err:
16 | logging.getLogger("Dirs Creator").info("Creating directories error: {0}".format(err))
17 | exit(-1)
18 |
--------------------------------------------------------------------------------
/utils/encoding.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## ECE Department, Rutgers University
4 | ## Email: zhang.hang@rutgers.edu
5 | ## Copyright (c) 2017
6 | ##
7 | ## This source code is licensed under the MIT-style license found in the
8 | ## LICENSE file in the root directory of this source tree
9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 |
11 | """Encoding Data Parallel"""
12 | import threading
13 | import functools
14 | import torch
15 | from torch.autograd import Variable, Function
16 | import torch.cuda.comm as comm
17 | from torch.nn.parallel.data_parallel import DataParallel
18 | from torch.nn.parallel.parallel_apply import get_a_var
19 | from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
20 |
21 | torch_ver = torch.__version__[:3]
22 |
23 | __all__ = ['allreduce', 'DataParallelModel', 'DataParallelCriterion',
24 | 'patch_replication_callback']
25 |
26 | def allreduce(*inputs):
27 | """Cross GPU all reduce autograd operation for calculate mean and
28 | variance in SyncBN.
29 | """
30 | return AllReduce.apply(*inputs)
31 |
32 | class AllReduce(Function):
33 | @staticmethod
34 | def forward(ctx, num_inputs, *inputs):
35 | ctx.num_inputs = num_inputs
36 | ctx.target_gpus = [inputs[i].get_device() for i in range(0, len(inputs), num_inputs)]
37 | inputs = [inputs[i:i + num_inputs]
38 | for i in range(0, len(inputs), num_inputs)]
39 | # sort before reduce sum
40 | inputs = sorted(inputs, key=lambda i: i[0].get_device())
41 | results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
42 | outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
43 | return tuple([t for tensors in outputs for t in tensors])
44 |
45 | @staticmethod
46 | def backward(ctx, *inputs):
47 | inputs = [i.data for i in inputs]
48 | inputs = [inputs[i:i + ctx.num_inputs]
49 | for i in range(0, len(inputs), ctx.num_inputs)]
50 | results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
51 | outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
52 | return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors])
53 |
54 |
55 | class Reduce(Function):
56 | @staticmethod
57 | def forward(ctx, *inputs):
58 | ctx.target_gpus = [inputs[i].get_device() for i in range(len(inputs))]
59 | inputs = sorted(inputs, key=lambda i: i.get_device())
60 | return comm.reduce_add(inputs)
61 |
62 | @staticmethod
63 | def backward(ctx, gradOutput):
64 | return Broadcast.apply(ctx.target_gpus, gradOutput)
65 |
66 |
67 | class DataParallelModel(DataParallel):
68 | """Implements data parallelism at the module level.
69 |
70 | This container parallelizes the application of the given module by
71 | splitting the input across the specified devices by chunking in the
72 | batch dimension.
73 | In the forward pass, the module is replicated on each device,
74 | and each replica handles a portion of the input. During the backwards pass, gradients from each replica are summed into the original module.
75 | Note that the outputs are not gathered, please use compatible
76 | :class:`encoding.parallel.DataParallelCriterion`.
77 |
78 | The batch size should be larger than the number of GPUs used. It should
79 | also be an integer multiple of the number of GPUs so that each chunk is
80 | the same size (so that each GPU processes the same number of samples).
81 |
82 | Args:
83 | module: module to be parallelized
84 | device_ids: CUDA devices (default: all devices)
85 |
86 | Reference:
87 | Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
88 | Amit Agrawal. “Context Encoding for Semantic Segmentation.
89 | *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
90 |
91 | Example::
92 |
93 | >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
94 | >>> y = net(x)
95 | """
96 | def gather(self, outputs, output_device):
97 | return outputs
98 |
99 | def replicate(self, module, device_ids):
100 | modules = super(DataParallelModel, self).replicate(module, device_ids)
101 | execute_replication_callbacks(modules)
102 | return modules
103 |
104 |
105 | class DataParallelCriterion(DataParallel):
106 | """
107 | Calculate loss in multiple-GPUs, which balance the memory usage for
108 | Semantic Segmentation.
109 |
110 | The targets are splitted across the specified devices by chunking in
111 | the batch dimension. Please use together with :class:`encoding.parallel.DataParallelModel`.
112 |
113 | Reference:
114 | Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
115 | Amit Agrawal. “Context Encoding for Semantic Segmentation.
116 | *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
117 |
118 | Example::
119 |
120 | >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
121 | >>> criterion = encoding.nn.DataParallelCriterion(criterion, device_ids=[0, 1, 2])
122 | >>> y = net(x)
123 | >>> loss = criterion(y, target)
124 | """
125 | def forward(self, inputs, *targets, **kwargs):
126 | # input should be already scatterd
127 | # scattering the targets instead
128 | if not self.device_ids:
129 | return self.module(inputs, *targets, **kwargs)
130 | targets, kwargs = self.scatter(targets, kwargs, self.device_ids)
131 | if len(self.device_ids) == 1:
132 | return self.module(inputs, *targets[0], **kwargs[0])
133 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
134 | outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs)
135 | return Reduce.apply(*outputs) / len(outputs)
136 | #return self.gather(outputs, self.output_device).mean()
137 |
138 |
139 | def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None):
140 | assert len(modules) == len(inputs)
141 | assert len(targets) == len(inputs)
142 | if kwargs_tup:
143 | assert len(modules) == len(kwargs_tup)
144 | else:
145 | kwargs_tup = ({},) * len(modules)
146 | if devices is not None:
147 | assert len(modules) == len(devices)
148 | else:
149 | devices = [None] * len(modules)
150 |
151 | lock = threading.Lock()
152 | results = {}
153 | if torch_ver != "0.3":
154 | grad_enabled = torch.is_grad_enabled()
155 |
156 | def _worker(i, module, input, target, kwargs, device=None):
157 | if torch_ver != "0.3":
158 | torch.set_grad_enabled(grad_enabled)
159 | if device is None:
160 | device = get_a_var(input).get_device()
161 | try:
162 | if not isinstance(input, tuple):
163 | input = (input,)
164 | with torch.cuda.device(device):
165 | output = module(*(input + target), **kwargs)
166 | with lock:
167 | results[i] = output
168 | except Exception as e:
169 | with lock:
170 | results[i] = e
171 |
172 | if len(modules) > 1:
173 | threads = [threading.Thread(target=_worker,
174 | args=(i, module, input, target,
175 | kwargs, device),)
176 | for i, (module, input, target, kwargs, device) in
177 | enumerate(zip(modules, inputs, targets, kwargs_tup, devices))]
178 |
179 | for thread in threads:
180 | thread.start()
181 | for thread in threads:
182 | thread.join()
183 | else:
184 | _worker(0, modules[0], inputs[0], kwargs_tup[0], devices[0])
185 |
186 | outputs = []
187 | for i in range(len(inputs)):
188 | output = results[i]
189 | if isinstance(output, Exception):
190 | raise output
191 | outputs.append(output)
192 | return outputs
193 |
194 |
195 | ###########################################################################
196 | # Adapted from Synchronized-BatchNorm-PyTorch.
197 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
198 | #
199 | class CallbackContext(object):
200 | pass
201 |
202 |
203 | def execute_replication_callbacks(modules):
204 | """
205 | Execute an replication callback `__data_parallel_replicate__` on each module created
206 | by original replication.
207 |
208 | The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`
209 |
210 | Note that, as all modules are isomorphism, we assign each sub-module with a context
211 | (shared among multiple copies of this module on different devices).
212 | Through this context, different copies can share some information.
213 |
214 | We guarantee that the callback on the master copy (the first copy) will be called ahead
215 | of calling the callback of any slave copies.
216 | """
217 | master_copy = modules[0]
218 | nr_modules = len(list(master_copy.modules()))
219 | ctxs = [CallbackContext() for _ in range(nr_modules)]
220 |
221 | for i, module in enumerate(modules):
222 | for j, m in enumerate(module.modules()):
223 | if hasattr(m, '__data_parallel_replicate__'):
224 | m.__data_parallel_replicate__(ctxs[j], i)
225 |
226 |
227 | def patch_replication_callback(data_parallel):
228 | """
229 | Monkey-patch an existing `DataParallel` object. Add the replication callback.
230 | Useful when you have customized `DataParallel` implementation.
231 |
232 | Examples:
233 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
234 | > sync_bn = DataParallel(sync_bn, device_ids=[0, 1])
235 | > patch_replication_callback(sync_bn)
236 | # this is equivalent to
237 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
238 | > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
239 | """
240 |
241 | assert isinstance(data_parallel, DataParallel)
242 |
243 | old_replicate = data_parallel.replicate
244 |
245 | @functools.wraps(old_replicate)
246 | def new_replicate(module, device_ids):
247 | modules = old_replicate(module, device_ids)
248 | execute_replication_callbacks(modules)
249 | return modules
250 |
251 | data_parallel.replicate = new_replicate
--------------------------------------------------------------------------------
/utils/log.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path as osp
3 | from tensorboardX import SummaryWriter
4 |
5 | import logging
6 | from logging import Formatter
7 | from logging.handlers import RotatingFileHandler
8 |
9 | def setup_logging(log_dir):
10 | log_file_format = "[%(levelname)s] - %(asctime)s - %(name)s - : %(message)s in %(pathname)s:%(lineno)d"
11 | log_console_format = "[%(levelname)s]: %(message)s"
12 |
13 | # Main logger
14 | main_logger = logging.getLogger()
15 | main_logger.setLevel(logging.INFO)
16 |
17 | console_handler = logging.StreamHandler()
18 | console_handler.setLevel(logging.INFO)
19 | console_handler.setFormatter(Formatter(log_console_format))
20 |
21 | exp_file_handler = RotatingFileHandler('{}exp_debug.log'.format(log_dir), maxBytes=10**6, backupCount=5)
22 | exp_file_handler.setLevel(logging.DEBUG)
23 | exp_file_handler.setFormatter(Formatter(log_file_format))
24 |
25 | exp_errors_file_handler = RotatingFileHandler('{}exp_error.log'.format(log_dir), maxBytes=10**6, backupCount=5)
26 | exp_errors_file_handler.setLevel(logging.WARNING)
27 | exp_errors_file_handler.setFormatter(Formatter(log_file_format))
28 |
29 | main_logger.addHandler(console_handler)
30 | main_logger.addHandler(exp_file_handler)
31 | main_logger.addHandler(exp_errors_file_handler)
32 |
33 | class Visualizer():
34 | """
35 | Visualizer
36 | :param:
37 | config:
38 | """
39 | def __init__(self, config):
40 | self.writer = SummaryWriter(osp.join(config.snapshot_dir, config.exp_name + config.time))
41 | self.config = config
42 | def add_scalar(self, name, x, y):
43 | self.writer.add_scalar(name, x, y)
44 | def add_image(self, name, image, iter):
45 | self.writer.add_image(name, image, iter)
46 |
47 | class Log():
48 | """
49 | Log
50 | :param:
51 | config:
52 | """
53 | def __init__(self, config):
54 | self.log_path = osp.join(config.snapshot_dir, config.exp_name + config.time)
55 | self.log = open(osp.join(self.log_path, 'log_train.txt'), 'w')
56 | self.config = config
57 | setup_logging(self.log_path)
58 | logging.getLogger().info("Hi, This is root.")
59 | logging.getLogger().info("After the configurations are successfully processed and dirs are created.")
60 | logging.getLogger().info("The pipeline of the project will begin now.")
61 |
62 | def record_sys_param(self):
63 | self.log.write(str(self.config) + '\n')
64 |
65 | def record_file(self):
66 | os.system('cp %s %s'%(self.config.model_file, self.log_path))
67 | os.system('cp %s %s'%(self.config.agent_file, self.log_path))
68 | os.system('cp %s %s'%(self.config.config_file, self.log_path))
69 | os.system('cp %s %s' % (self.config.dataset_file, self.log_path))
70 | os.system('cp %s %s' % (self.config.transform_file, self.log_path))
71 | os.system('cp %s %s' % (self.config.module_file, self.log_path))
72 |
73 | def log_string(self, out_str):
74 | self.log.write(out_str + '\n')
75 | self.log.flush()
76 | print(out_str)
--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
1 | """
2 | This file will contain the metrics of the framework
3 | """
4 | import numpy as np
5 |
6 |
7 | class IOUMetric:
8 | """
9 | Class to calculate mean-iou using fast_hist method
10 | """
11 |
12 | def __init__(self, num_classes):
13 | self.num_classes = num_classes
14 | self.hist = np.zeros((num_classes, num_classes))
15 |
16 | def _fast_hist(self, label_pred, label_true):
17 | mask = (label_true >= 0) & (label_true < self.num_classes)
18 | hist = np.bincount(
19 | self.num_classes * label_true[mask].astype(int) +
20 | label_pred[mask], minlength=self.num_classes ** 2).reshape(self.num_classes, self.num_classes)
21 | return hist
22 |
23 | def add_batch(self, predictions, gts, ignore_index):
24 | ignore_index = gts != 255
25 | predictions = predictions[ignore_index]
26 | gts = gts[ignore_index]
27 | self.hist += self._fast_hist(predictions, gts)
28 |
29 | def add_batch_ignore(self, predictions, gts, ignore_index):
30 | for lp, lt in zip(predictions, gts):
31 | ignore_index = lt != 255
32 | lp = lp[ignore_index]
33 | lt = lt[ignore_index]
34 | self.hist += self._fast_hist(lp, lt)
35 |
36 | def evaluate(self):
37 | acc = np.diag(self.hist).sum() / self.hist.sum()
38 | acc_cls = np.diag(self.hist) / self.hist.sum(axis=1)
39 | acc_cls = np.nanmean(acc_cls)
40 | iu = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - np.diag(self.hist))
41 | mean_iu = np.nanmean(iu)
42 | freq = self.hist.sum(axis=1) / self.hist.sum()
43 | fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
44 | return acc, acc_cls, iu, mean_iu, fwavacc
45 |
46 | class AverageMeter:
47 | """
48 | Class to be an average meter for any average metric like loss, accuracy, etc..
49 | """
50 |
51 | def __init__(self):
52 | self.value = 0
53 | self.avg = 0
54 | self.sum = 0
55 | self.count = 0
56 | self.reset()
57 |
58 | def reset(self):
59 | self.value = 0
60 | self.avg = 0
61 | self.sum = 0
62 | self.count = 0
63 |
64 | def update(self, val, n=1):
65 | self.value = val
66 | self.sum += val * n
67 | self.count += n
68 | self.avg = self.sum / self.count
69 |
70 | @property
71 | def val(self):
72 | return self.avg
73 |
74 |
75 | class AverageMeterList:
76 | """
77 | Class to be an average meter for any average metric List structure like mean_iou_per_class
78 | """
79 |
80 | def __init__(self, num_cls):
81 | self.cls = num_cls
82 | self.value = [0] * self.cls
83 | self.avg = [0] * self.cls
84 | self.sum = [0] * self.cls
85 | self.count = [0] * self.cls
86 | self.reset()
87 |
88 | def reset(self):
89 | self.value = [0] * self.cls
90 | self.avg = [0] * self.cls
91 | self.sum = [0] * self.cls
92 | self.count = [0] * self.cls
93 |
94 | def update(self, val, n=1):
95 | for i in range(self.cls):
96 | self.value[i] = val[i]
97 | self.sum[i] += val[i] * n
98 | self.count[i] += n
99 | self.avg[i] = self.sum[i] / self.count[i]
100 |
101 | @property
102 | def val(self):
103 | return self.avg
104 |
105 |
106 | def cls_accuracy(output, target, topk=(1,)):
107 | maxk = max(topk)
108 | batch_size = target.size(0)
109 |
110 | _, pred = output.topk(maxk, 1, True, True)
111 | pred = pred.t()
112 | correct = pred.eq(target.view(1, -1).expand_as(pred))
113 |
114 | res = []
115 | for k in topk:
116 | correct_k = correct[:k].view(-1).float().sum(0)
117 | res.append(correct_k / batch_size)
118 | return res
119 |
--------------------------------------------------------------------------------
/utils/misc.py:
--------------------------------------------------------------------------------
1 | import time
2 | import logging
3 |
4 |
5 | def timeit(f):
6 | """ Decorator to time Any Function """
7 |
8 | def timed(*args, **kwargs):
9 | start_time = time.time()
10 | result = f(*args, **kwargs)
11 | end_time = time.time()
12 | seconds = end_time - start_time
13 | logging.getLogger("Timer").info(" [-] %s : %2.5f sec, which is %2.5f min, which is %2.5f hour" %
14 | (f.__name__, seconds, seconds / 60, seconds / 3600))
15 | return result
16 |
17 | return timed
18 |
19 |
20 | def print_cuda_statistics():
21 | logger = logging.getLogger("Cuda Statistics")
22 | import sys
23 | from subprocess import call
24 | import torch
25 | logger.info('__Python VERSION: {}'.format(sys.version))
26 | logger.info('__pyTorch VERSION: {}'.format(torch.__version__))
27 | logger.info('__CUDA VERSION')
28 | call(["nvcc", "--version"])
29 | logger.info('__CUDNN VERSION: {}'.format(torch.backends.cudnn.version()))
30 | logger.info('__Number CUDA Devices: {}'.format(torch.cuda.device_count()))
31 | logger.info('__Devices')
32 | call(["nvidia-smi", "--format=csv",
33 | "--query-gpu=index,name,driver_version,memory.total,memory.used,memory.free"])
34 | logger.info('Active CUDA Device: GPU {}'.format(torch.cuda.current_device()))
35 | logger.info('Available devices {}'.format(torch.cuda.device_count()))
36 | logger.info('Current cuda device {}'.format(torch.cuda.current_device()))
37 |
--------------------------------------------------------------------------------
/utils/optim.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 |
3 | def lr_poly_exp(base_lr, iter, max_iter, power):
4 | return base_lr*((1-float(iter)/max_iter)**(power))
5 |
6 |
7 | def lr_poly_epoch(base_lr, iter, max_iter, power):
8 | return base_lr/2.0
9 |
10 |
11 | def adjust_learning_rate(optimizer, i_iter, args):
12 | """Sets the learning rate
13 | Args:
14 | optimizer: The optimizer
15 | i_iter: The number of interations
16 | """
17 | if args.dataset == "SUNRGBD":
18 | lr = max(lr_poly_exp(args.learning_rate, i_iter, args.num_steps, args.power), args.min_learining_rate)
19 | elif args.dataset == "NYUD":
20 | lr = max(lr_poly_exp(args.learning_rate, i_iter, args.num_steps, args.power), args.min_learining_rate)
21 | else:
22 | lr = lr_poly_exp(args.learning_rate, i_iter, args.num_steps, args.power)
23 | optimizer.param_groups[0]['lr'] = lr
24 | return lr
25 |
26 | def adjust_learning_rate_warmup(optimizer, i_iter, args):
27 | """Sets the learning rate
28 | Args:
29 | optimizer: The optimizer
30 | i_iter: The number of interations
31 | """
32 | args.warmup_steps = 6000
33 | if i_iter < args.warmup_steps:
34 | lr = args.learning_rate * (i_iter / args.warmup_steps)
35 | else:
36 | lr = max(lr_poly_exp(args.learning_rate, i_iter, args.num_steps, args.power), args.min_learining_rate)
37 | optimizer.param_groups[0]['lr'] = lr
38 | return lr
39 |
40 | def set_bn_eval(m):
41 | classname = m.__class__.__name__
42 | if classname.find('BatchNorm') != -1:
43 | m.eval()
44 |
45 |
46 | def set_bn_momentum(m):
47 | classname = m.__class__.__name__
48 | if classname.find('BatchNorm') != -1 or classname.find('InPlaceABN') != -1:
49 | m.momentum = 0.0003
--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from datetime import datetime
4 | from scipy import ndimage
5 | from PIL import Image
6 | import numpy as np
7 |
8 | model_urls = {
9 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
10 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
11 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
12 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
13 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
14 | }
15 |
16 | # colour map
17 | # label_colours = [(0,0,0)
18 | # # 0=background
19 | # ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128)
20 | # # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle
21 | # ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0)
22 | # # 6=bus, 7=car, 8=cat, 9=chair, 10=cow
23 | # ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128)
24 | # # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person
25 | # ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128)]
26 | # # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
27 | def uint82bin(n, count=8):
28 | """returns the binary of integer n, count refers to amount of bits"""
29 | return ''.join([str((n >> y) & 1) for y in range(count - 1, -1, -1)])
30 |
31 |
32 | def labelcolormap(N):
33 | if N == 35: # cityscape
34 | cmap = [(0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (111, 74, 0), (81, 0, 81),
35 | (128, 64, 128), (244, 35, 232), (250, 170, 160), (230, 150, 140), (70, 70, 70), (102, 102, 156),
36 | (190, 153, 153),
37 | (180, 165, 180), (150, 100, 100), (150, 120, 90), (153, 153, 153), (153, 153, 153), (250, 170, 30),
38 | (220, 220, 0),
39 | (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60), (255, 0, 0), (0, 0, 142), (0, 0, 70),
40 | (0, 60, 100), (0, 0, 90), (0, 0, 110), (0, 80, 100), (0, 0, 230), (119, 11, 32), (0, 0, 142)]
41 | else:
42 | cmap = []
43 | for i in range(N):
44 | r, g, b = 0, 0, 0
45 | id = i
46 | for j in range(7):
47 | str_id = uint82bin(id)
48 | r = r ^ (np.uint8(str_id[-1]) << (7 - j))
49 | g = g ^ (np.uint8(str_id[-2]) << (7 - j))
50 | b = b ^ (np.uint8(str_id[-3]) << (7 - j))
51 | id = id >> 3
52 | color = (r, g, b)
53 | cmap.append(color)
54 | return cmap
55 |
56 |
57 | def decode_labels(mask, num_images=1, num_classes=40):
58 | """Decode batch of segmentation masks.
59 |
60 | Args:
61 | mask: result of inference after taking argmax.
62 | num_images: number of images to decode from the batch.
63 | num_classes: number of classes to predict (including background).
64 |
65 | Returns:
66 | A batch with num_images RGB images of the same size as the input.
67 | """
68 | label_colours = labelcolormap(num_classes)
69 | mask = mask.data.cpu().numpy()
70 | n, h, w = mask.shape
71 | assert (n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (
72 | n, num_images)
73 | outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8)
74 | for i in range(num_images):
75 | img = Image.new('RGB', (len(mask[i, 0]), len(mask[i])))
76 | pixels = img.load()
77 | for j_, j in enumerate(mask[i, :, :]):
78 | for k_, k in enumerate(j):
79 | if k < num_classes:
80 | pixels[k_, j_] = label_colours[k]
81 | outputs[i] = np.array(img)
82 | return outputs
83 |
84 |
85 | def decode_predictions(preds, num_images=1, num_classes=40):
86 | """Decode batch of segmentation masks.
87 |
88 | Args:
89 | mask: result of inference after taking argmax.
90 | num_images: number of images to decode from the batch.
91 | num_classes: number of classes to predict (including background).
92 |
93 | Returns:
94 | A batch with num_images RGB images of the same size as the input.
95 | """
96 | label_colours = labelcolormap(num_classes)
97 | if isinstance(preds, list):
98 | preds_list = []
99 | for pred in preds:
100 | preds_list.append(pred[-1].data.cpu().numpy())
101 | preds = np.concatenate(preds_list, axis=0)
102 | else:
103 | preds = preds.data.cpu().numpy()
104 |
105 | preds = np.argmax(preds, axis=1)
106 | n, h, w = preds.shape
107 | assert (n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (
108 | n, num_images)
109 | outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8)
110 | for i in range(num_images):
111 | img = Image.new('RGB', (len(preds[i, 0]), len(preds[i])))
112 | pixels = img.load()
113 | for j_, j in enumerate(preds[i, :, :]):
114 | for k_, k in enumerate(j):
115 | if k < num_classes:
116 | pixels[k_, j_] = label_colours[k]
117 | outputs[i] = np.array(img)
118 | return outputs
119 |
120 |
121 | def inv_preprocess(imgs, num_images, img_mean):
122 | """Inverse preprocessing of the batch of images.
123 | Add the mean vector and convert from BGR to RGB.
124 |
125 | Args:
126 | imgs: batch of input images.
127 | num_images: number of images to apply the inverse transformations on.
128 | img_mean: vector of mean colour values.
129 |
130 | Returns:
131 | The batch of the size num_images with the same spatial dimensions as the input.
132 | """
133 | imgs = imgs.data.cpu().numpy()
134 | n, c, h, w = imgs.shape
135 | assert (n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (
136 | n, num_images)
137 | outputs = np.zeros((num_images, h, w, c), dtype=np.uint8)
138 | for i in range(num_images):
139 | outputs[i] = (np.transpose(imgs[i], (1, 2, 0)) + img_mean).astype(np.uint8)
140 | return outputs
141 |
142 |
143 | def get_confusion_matrix(gt_label, pred_label, class_num):
144 | """
145 | Calcute the confusion matrix by given label and pred
146 | :param gt_label: the ground truth label
147 | :param pred_label: the pred label
148 | :param class_num: the nunber of class
149 | :return: the confusion matrix
150 | """
151 | index = (gt_label * class_num + pred_label).astype('int32')
152 | label_count = np.bincount(index)
153 | confusion_matrix = np.zeros((class_num, class_num))
154 |
155 | for i_label in range(class_num):
156 | for i_pred_label in range(class_num):
157 | cur_index = i_label * class_num + i_pred_label
158 | if cur_index < len(label_count):
159 | confusion_matrix[i_label, i_pred_label] = label_count[cur_index]
160 |
161 | return confusion_matrix
162 |
163 |
164 | def get_currect_time():
165 | TIMESTAMP = "{0:%Y-%m-%dT%H-%M-%S/}".format(datetime.now())
166 |
167 | return TIMESTAMP
168 |
169 |
170 | def get_metric(hist):
171 | acc = np.diag(hist).sum() / hist.sum()
172 | acc_cls = np.diag(hist) / hist.sum(axis=1)
173 | acc_cls = np.nanmean(acc_cls)
174 | iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
175 | mean_iu = np.nanmean(iu)
176 | freq = hist.sum(axis=1) / hist.sum()
177 | fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
178 |
179 | return acc, acc_cls, mean_iu, fwavacc, iu[freq > 0]
180 |
181 | def predict_whole(net, image, tile_size, recurrence, S):
182 | image = torch.from_numpy(image)
183 | S = torch.from_numpy(S)
184 | interp = nn.Upsample(size=tile_size, mode='bilinear', align_corners=True)
185 | prediction = net(image.cuda(), S.cuda())
186 | if isinstance(prediction, list):
187 | prediction = prediction[0]
188 | prediction = interp(prediction).cpu().data[0].numpy().transpose(1, 2, 0)
189 | return prediction
190 |
191 | def predict_multiscale(net, image, S, tile_size, scales, classes, flip_evaluation, recurrence=False):
192 | """
193 | Predict an image by looking at it with different scales.
194 | We choose the "predict_whole_img" for the image with less than the original input size,
195 | for the input of larger size, we would choose the cropping method to ensure that GPU memory is enough.
196 | """
197 | image = image.data
198 | S = S.data
199 | N_, C_, H_, W_ = image.shape
200 | full_probs = np.zeros((tile_size[0], tile_size[1], classes))
201 | for scale in scales:
202 | scale = float(scale)
203 | scale_image = ndimage.zoom(image, (1.0, 1.0, scale, scale), order=1, prefilter=False)
204 | scale_S = ndimage.zoom(S, (1.0, 1.0, scale, scale), order=1, prefilter=False)
205 | scaled_probs = predict_whole(net, scale_image, tile_size, recurrence, scale_S)
206 | if flip_evaluation == True:
207 | flip_scaled_probs = predict_whole(net, scale_image[:, :, :, ::-1].copy(), tile_size, recurrence,
208 | scale_S[:, :, :, ::-1].copy())
209 | scaled_probs = 0.5 * (scaled_probs + flip_scaled_probs[:, ::-1, :])
210 | full_probs += scaled_probs
211 | full_probs /= len(scales)
212 | return full_probs
213 |
214 | def get_palette(num_cls):
215 | """ Returns the color map for visualizing the segmentation mask.
216 | Args:
217 | num_cls: Number of classes
218 | Returns:
219 | The color map
220 | """
221 |
222 | n = num_cls
223 | palette = [0] * (n * 3)
224 | for j in range(0, n):
225 | lab = j
226 | palette[j * 3 + 0] = 0
227 | palette[j * 3 + 1] = 0
228 | palette[j * 3 + 2] = 0
229 | i = 0
230 | while lab:
231 | palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
232 | palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
233 | palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
234 | i += 1
235 | lab >>= 3
236 | return palette
237 |
238 | def maybe_download(model_name, model_url, model_dir=None, map_location=None):
239 | import os, sys
240 | from six.moves import urllib
241 | if model_dir is None:
242 | torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch'))
243 | model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models'))
244 | if not os.path.exists(model_dir):
245 | os.makedirs(model_dir)
246 | filename = '{}.pth.tar'.format(model_name)
247 | cached_file = os.path.join(model_dir, filename)
248 | if not os.path.exists(cached_file):
249 | url = model_url
250 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
251 | urllib.request.urlretrieve(url, cached_file)
252 | return torch.load(cached_file, map_location=map_location)
253 |
254 | def expand_model_dict(model_dict, state_dict, num_parallel):
255 | model_dict_keys = model_dict.keys()
256 | state_dict_keys = state_dict.keys()
257 | for model_dict_key in model_dict_keys:
258 | model_dict_key_re = model_dict_key.replace('module.', '')
259 | if model_dict_key_re in state_dict_keys:
260 | model_dict[model_dict_key] = state_dict[model_dict_key_re]
261 | for i in range(num_parallel):
262 | bn = '.bn_%d' % i
263 | replace = True if bn in model_dict_key_re else False
264 | model_dict_key_re = model_dict_key_re.replace(bn, '')
265 | if replace and model_dict_key_re in state_dict_keys:
266 | model_dict[model_dict_key] = state_dict[model_dict_key_re]
267 | return model_dict
--------------------------------------------------------------------------------
| | |