├── .gitignore ├── README.md ├── agents ├── __init__.py ├── base.py └── sgnet_agent.py ├── configs ├── sgnet_aspp_nyud_fps.json ├── sgnet_aspp_nyud_test.json ├── sgnet_nyud_fps.json ├── sgnet_nyud_test.json ├── sgnet_res50_nyud_fps.json └── sgnet_res50_nyud_test.json ├── data ├── nyudv2.py └── transform │ ├── __init__.py │ └── rgbd_transform.py ├── dataset └── list │ └── nyud │ ├── test_nyud.txt │ └── train_nyud.txt ├── graphs ├── models │ └── SGNet │ │ ├── SGNet.py │ │ ├── SGNet_ASPP.py │ │ ├── SGNet_ASPP_fps.py │ │ ├── SGNet_Res50.py │ │ ├── SGNet_Res50_fps.py │ │ └── SGNet_fps.py └── ops │ ├── __init__.py │ ├── build.py │ ├── build_modulated.py │ ├── functions │ ├── __init__.py │ ├── deform_conv.py │ └── modulated_dcn_func.py │ ├── libs │ ├── __init__.py │ ├── _ext │ │ └── __init__.py │ ├── bn.py │ ├── build.py │ ├── build.sh │ ├── dense.py │ ├── functions.py │ ├── misc.py │ ├── residual.py │ └── src │ │ ├── bn.cu │ │ ├── bn.h │ │ ├── common.h │ │ ├── lib_cffi.cpp │ │ └── lib_cffi.h │ ├── make.sh │ ├── modules │ ├── __init__.py │ ├── deform_conv.py │ └── s_conv.py │ └── src │ ├── cuda │ ├── deform_psroi_pooling_cuda.cu │ ├── deform_psroi_pooling_cuda.h │ ├── modulated_deform_im2col_cuda.cu │ └── modulated_deform_im2col_cuda.h │ ├── deform_conv.c │ ├── deform_conv.h │ ├── deform_conv_cuda.c │ ├── deform_conv_cuda.h │ ├── deform_conv_cuda_kernel.cu │ ├── deform_conv_cuda_kernel.h │ ├── modulated_dcn.c │ ├── modulated_dcn.h │ ├── modulated_dcn_cuda.c │ ├── modulated_dcn_cuda.h │ ├── scale_conv.c │ ├── scale_conv.h │ ├── scale_conv_cuda.c │ ├── scale_conv_cuda.h │ ├── scale_conv_cuda_kernel.cu │ └── scale_conv_cuda_kernel.h ├── main.py ├── run.sh └── utils ├── __init__.py ├── assets └── class_diagram.png ├── config.py ├── dirs.py ├── encoding.py ├── log.py ├── metrics.py ├── misc.py ├── optim.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | pretrained_weights/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |
3 |

Spatial information guided Convolution for Real-Time 4 | RGBD Semantic Segmentation

5 |

6 | Lin-Zhuo Chen, Zheng Lin, Ziqin Wang, Yong-Liang Yang and Ming-Ming Cheng 7 |
8 | ⭐ Project Home » 9 |
10 | 14 |

15 | 16 |

17 |

18 | 19 | 20 | 21 |

22 | 23 | *** 24 | The official repo of the TIP 2021 paper `` 25 | [Spatial information guided Convolution for Real-Time RGBD Semantic Segmentation](https://arxiv.org/pdf/2004.04534.pdf). 26 | 27 | ## Results on NYUDv2 Dataset 28 | 29 | Speed is related to the hardware spec (e.g. CPU, GPU, RAM, etc), so it is hard to make an equal comparison. 30 | 31 | I get the following results under NVIDIA 1080TI GPU, Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz: 32 | 33 | | Model | mIoU(480x640) | mIoU(MS) | FPS(480x640) | FPS(425x560) | 34 | | :----------: | :-----------: | :------: | :----------: | :----------: | 35 | | SGNet(Res50) | 47.7% | 48.6% | 35 | 39 | 36 | | SGNet | 49.8% | 51.1% | 26 | 28 | 37 | | SGNet_ASPP | 50.2% | 51.1% | 24 | 26 | 38 | 39 | If you want to measure speed on more advanced graphics card (such as 2080ti), you can use the environment of pytorch 0.4.1 CUDA 9.2 to measure inference speed. 40 | 41 | ## Prerequisites 42 | 43 | #### Environments 44 | * PyTorch == 0.4.1 45 | * tqdm 46 | * CUDA==8.0 47 | * CUDNN=7.1.4 48 | * pillow 49 | * numpy 50 | * tensorboardX 51 | * tqdm 52 | #### Trained model and dataset 53 | Download NYUDv2 dataset and trained model: 54 | 55 | | | Dataset | model | model | model | 56 | | ------------------------ | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | 57 | | BaiduDrive(passwd: scon) | [NYUDv2](https://pan.baidu.com/s/1mvEVXRGc0ESrFN6ux3YpDA) | [SGNet_Res50](https://pan.baidu.com/s/1yj3llVf14uT17HzqTi6pjw) | [SGNet](https://pan.baidu.com/s/1shzbcPjIKdq99Ji39OHIMg) | [SGNet_ASPP](https://pan.baidu.com/s/1HeiJfHpIjSQKmFtYJhBrng) | 58 | 59 | 60 | 61 | ## Usage 62 | 1. Put the pretrained model into `pretrained_weights` folder and unzip the dataset into `dataset` folder. 63 | 64 | 2. To compile the InPlace-ABN and S-Conv operation, please run: 65 | ```bash 66 | ## compile InPlace-ABN 67 | cd graphs/ops/libs 68 | sh build.sh 69 | python build.py 70 | ## compile S-Conv 71 | cd .. 72 | sh make.sh 73 | ``` 74 | 75 | 3. Modify the config in `configs/sgnet_nyud_test.json` (mainly check "trained_model_path"). 76 | To test the model with imput size $480 \times 640$, please run: 77 | 78 | ```bash 79 | ## SGNet 80 | python main.py ./configs/sgnet_nyud_test.json 81 | 82 | ## SGNet_ASPP 83 | python main.py ./configs/sgnet_aspp_nyud_test.json 84 | 85 | ## SGNet_Res50 86 | python main.py ./configs/sgnet_res50_nyud_test.json 87 | ``` 88 | 4. You can run the follow command to 89 | test the model inference speed, input the image size such as 480 x 640: 90 | 91 | ```bash 92 | ## SGNet 93 | python main.py ./configs/sgnet_nyud_fps.json 94 | 95 | ## SGNet_ASPP 96 | python main.py ./configs/sgnet_aspp_nyud_fps.json 97 | 98 | ## SGNet_Res50 99 | python main.py ./configs/sgnet_res50_nyud_fps.json 100 | ``` 101 | 102 | 103 | ## Citation 104 | 105 | If you find this work is useful for your research, please cite our paper: 106 | ``` 107 | @article{21TIP-SGNet, 108 | author={Lin-Zhuo Chen and Zheng Lin and Ziqin Wang and Yong-Liang Yang and Ming-Ming Cheng}, 109 | journal={IEEE Transactions on Image Processing}, 110 | title={Spatial Information Guided Convolution for Real-Time RGBD Semantic Segmentation}, 111 | year={2021}, 112 | volume={30}, 113 | pages={2313-2324}, 114 | doi={10.1109/TIP.2021.3049332} 115 | } 116 | ``` 117 | 118 | ### Thanks to the Third Party Libs 119 | [inplace_abn](https://github.com/mapillary/inplace_abn) 120 | 121 | [Pytorch-Deeplab](https://github.com/speedinghzl/Pytorch-Deeplab) 122 | 123 | [PyTorch-Encoding](https://github.com/zhanghang1989/PyTorch-Encoding) 124 | 125 | [PyTorch-segmentation-toolbox](https://github.com/speedinghzl/pytorch-segmentation-toolbox) 126 | 127 | [Depth2HHA-python](https://github.com/charlesCXK/Depth2HHA-python) 128 | 129 | [Pytorch-Project-Template](https://github.com/moemen95/Pytorch-Project-Template) 130 | 131 | [Deformable-Convolution-V2-PyTorch](https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch) 132 | 133 | 134 | If you have any questions, feel free to contact me via `linzhuochen🥳foxmail😲com` 135 | -------------------------------------------------------------------------------- /agents/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | path = os.path.dirname(os.path.abspath(__file__)) 5 | 6 | for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']: 7 | mod = __import__('.'.join([__name__, py]), fromlist=[py]) 8 | classes = [getattr(mod, x) for x in dir(mod) if isinstance(getattr(mod, x), type)] 9 | for cls in classes: 10 | setattr(sys.modules[__name__], cls.__name__, cls) -------------------------------------------------------------------------------- /agents/base.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Base Agent class, where all other agents inherit from, that contains definitions for all the necessary functions 3 | """ 4 | import logging 5 | 6 | 7 | class BaseAgent: 8 | """ 9 | This base class will contain the base functions to be overloaded by any agent you will implement. 10 | """ 11 | 12 | def __init__(self, config): 13 | self.config = config 14 | self.logger = logging.getLogger("Agent") 15 | 16 | def load_checkpoint(self, file_name): 17 | """ 18 | Latest checkpoint loader 19 | :param file_name: name of the checkpoint file 20 | :return: 21 | """ 22 | raise NotImplementedError 23 | 24 | def save_checkpoint(self, file_name="checkpoint.pth.tar", is_best=0): 25 | """ 26 | Checkpoint saver 27 | :param file_name: name of the checkpoint file 28 | :param is_best: boolean flag to indicate whether current checkpoint's metric is the best so far 29 | :return: 30 | """ 31 | raise NotImplementedError 32 | 33 | def run(self): 34 | """ 35 | The main operator 36 | :return: 37 | """ 38 | raise NotImplementedError 39 | 40 | def train(self): 41 | """ 42 | Main training loop 43 | :return: 44 | """ 45 | raise NotImplementedError 46 | 47 | def train_one_epoch(self): 48 | """ 49 | One epoch of training 50 | :return: 51 | """ 52 | raise NotImplementedError 53 | 54 | def validate(self): 55 | """ 56 | One cycle of model validation 57 | :return: 58 | """ 59 | raise NotImplementedError 60 | 61 | def finalize(self): 62 | """ 63 | Finalizes all the operations of the 2 Main classes of the process, the operator and the data loader 64 | :return: 65 | """ 66 | raise NotImplementedError -------------------------------------------------------------------------------- /agents/sgnet_agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import os.path as osp 4 | from tqdm import tqdm 5 | import shutil 6 | import random 7 | import torch 8 | from torch.backends import cudnn 9 | from torch.utils import data 10 | import torch.optim as optim 11 | import timeit 12 | from torch.nn import functional as F 13 | import time 14 | from PIL import Image 15 | 16 | from data.nyudv2 import NYUDataset_val_full 17 | from utils.metrics import IOUMetric 18 | from utils.utils import get_currect_time 19 | from utils.encoding import DataParallelModel, DataParallelCriterion 20 | from utils.log import Visualizer, Log 21 | from utils.optim import adjust_learning_rate 22 | from utils.misc import print_cuda_statistics 23 | from agents.base import BaseAgent 24 | from utils.utils import predict_multiscale, get_palette 25 | 26 | class SGNetAgent(BaseAgent): 27 | """ 28 | This class will be responsible for handling the whole process of our architecture. 29 | """ 30 | def __init__(self, config): 31 | super().__init__(config) 32 | ## Select network 33 | if config.spatial_information == 'depth' and config.os == 16 and config.network == "SGNet" and config.mode != "measure_speed": 34 | from graphs.models.SGNet.SGNet import SGNet 35 | elif config.spatial_information == 'depth' and config.os == 16 and config.network == "SGNet": 36 | from graphs.models.SGNet.SGNet_fps import SGNet 37 | elif config.spatial_information == 'depth' and config.os == 16 and config.network == "SGNet_ASPP" and config.mode != "measure_speed": 38 | from graphs.models.SGNet.SGNet_ASPP import SGNet 39 | elif config.spatial_information == 'depth' and config.os == 16 and config.network == "SGNet_ASPP": 40 | from graphs.models.SGNet.SGNet_ASPP_fps import SGNet 41 | elif config.spatial_information == 'depth' and config.os == 16 and config.network == "SGNet_Res50" and config.mode != "measure_speed": 42 | from graphs.models.SGNet.SGNet_Res50 import SGNet 43 | elif config.spatial_information == 'depth' and config.os == 16 and config.network == "SGNet_Res50": 44 | from graphs.models.SGNet.SGNet_Res50_fps import SGNet 45 | 46 | random.seed(self.config.seed) 47 | os.environ['PYTHONHASHSEED'] = str(self.config.seed) 48 | np.random.seed(self.config.seed) 49 | torch.manual_seed(self.config.seed) 50 | torch.cuda.manual_seed(self.config.seed) 51 | torch.cuda.manual_seed_all(self.config.seed) 52 | cudnn.enabled = True 53 | cudnn.benchmark = True 54 | cudnn.deterministic = False 55 | os.environ["CUDA_VISIBLE_DEVICES"] = config.gpu 56 | # create data loader 57 | if config.dataset == "NYUD": 58 | self.testloader = data.DataLoader(NYUDataset_val_full(self.config.val_list_path), 59 | batch_size=1, shuffle=False, pin_memory=True) 60 | # Create an instance from the Model 61 | self.logger.info("Loading encoder pretrained in imagenet...") 62 | self.model = SGNet(self.config.num_classes) 63 | print(self.model) 64 | 65 | self.model.cuda() 66 | self.model.train() 67 | self.model.float() 68 | print(config.gpu) 69 | if config.mode == 'test': 70 | self.test_model = self.model 71 | if config.mode != 'measure_speed': 72 | self.model = DataParallelModel(self.model, device_ids=[0]) 73 | print('parallel....................') 74 | 75 | 76 | total = sum([param.nelement() for param in self.model.parameters()]) 77 | print(' + Number of params: %.2fM' % (total / 1e6)) 78 | print_cuda_statistics() 79 | 80 | def load_checkpoint(self, filename): 81 | try: 82 | self.logger.info("Loading checkpoint '{}'".format(filename)) 83 | checkpoint = torch.load(filename) 84 | 85 | self.current_epoch = checkpoint['epoch'] 86 | self.current_iteration = checkpoint['iteration'] 87 | self.model.load_state_dict(checkpoint['state_dict']) 88 | 89 | # self.optimizer.load_state_dict(checkpoint['optimizer']) 90 | except OSError as e: 91 | self.logger.info("No checkpoint exists from '{}'. Skipping...".format(self.config.checkpoint_dir)) 92 | self.logger.info("**First time to train**") 93 | 94 | def run(self): 95 | """ 96 | This function will the operator 97 | :return: 98 | """ 99 | assert self.config.mode in ['train', 'test', 'measure_speed', 'train_iters'] 100 | try: 101 | if self.config.mode == 'test': 102 | self.test() 103 | elif self.config.mode == 'measure_speed': 104 | with torch.no_grad(): 105 | self.measure_speed(input_size=[1, 3, 480, 640]) 106 | except KeyboardInterrupt: 107 | self.logger.info("You have entered CTRL+C.. Wait to finalize") 108 | 109 | def test(self): 110 | 111 | tqdm_batch = tqdm(self.testloader, total=len(self.testloader), 112 | desc="Testing...") 113 | self.test_model.eval() 114 | metrics = IOUMetric(self.config.num_classes) 115 | loss_val = 0 116 | metrics = IOUMetric(self.config.num_classes) 117 | palette = get_palette(256) 118 | # if (not os.path.exists(self.config.output_img_dir)): 119 | # os.mkdir(self.config.output_img_dir) 120 | # if (not os.path.exists(self.config.output_gt_dir)): 121 | # os.mkdir(self.config.output_gt_dir) 122 | if (not os.path.exists(self.config.output_predict_dir)): 123 | os.mkdir(self.config.output_predict_dir) 124 | self.load_checkpoint(self.config.trained_model_path) 125 | index = 0 126 | for batch_val in tqdm_batch: 127 | image = batch_val['image'].cuda() 128 | label = batch_val['seg'].cuda() 129 | label = torch.squeeze(label, 1).long() 130 | HHA = batch_val['HHA'].cuda() 131 | depth = batch_val['depth'].cuda() 132 | size = np.array([label.size(1), label.size(2)]) 133 | input_size = (label.size(1), label.size(2)) 134 | 135 | with torch.no_grad(): 136 | if self.config.ms: 137 | output = predict_multiscale(self.test_model, image, depth, input_size, [0.8, 1.0, 2.0], 138 | self.config.num_classes, False) 139 | else: 140 | output = predict_multiscale(self.test_model, image, depth, input_size, [1.0], 141 | self.config.num_classes, False) 142 | seg_pred = np.asarray(np.argmax(output, axis=2), dtype=np.int) 143 | output_im = Image.fromarray(np.asarray(np.argmax(output, axis=2), dtype=np.uint8)) 144 | output_im.putpalette(palette) 145 | output_im.save(self.config.output_predict_dir + '/' + str(index) + '.png') 146 | seg_gt = np.asarray(label[0].cpu().numpy(), dtype=np.int) 147 | 148 | ignore_index = seg_gt != 255 149 | seg_gt = seg_gt[ignore_index] 150 | seg_pred = seg_pred[ignore_index] 151 | 152 | metrics.add_batch(seg_pred, seg_gt, ignore_index=255) 153 | 154 | index = index + 1 155 | acc, acc_cls, iu, mean_iu, fwavacc = metrics.evaluate() 156 | print({'meanIU': mean_iu, 'IU_array': iu, 'acc': acc, 'acc_cls': acc_cls}) 157 | pass 158 | 159 | def finalize(self): 160 | """ 161 | Finalize all the operations of the 2 Main classes of the process the operator and the data loader 162 | :return: 163 | """ 164 | # TODO 165 | pass 166 | def measure_speed(self, input_size, iteration=500): 167 | """ 168 | Measure the speed of model 169 | :return: speed_time 170 | fps 171 | """ 172 | self.model.eval() 173 | input = torch.randn(*input_size).cuda() 174 | depth = torch.randn(*input_size).cuda() 175 | HHA = torch.randn(*input_size).cuda() 176 | 177 | for _ in range(100): 178 | x = self.model(input, depth) 179 | print('=========Speed Testing=========') 180 | #torch.cuda.synchronize() 181 | torch.cuda.synchronize() 182 | 183 | for _ in range(iteration): 184 | torch.cuda.synchronize() 185 | t_start = time.time() 186 | x = self.model(input, depth) 187 | torch.cuda.synchronize() 188 | elapsed_time = time.time() - t_start 189 | speed_time = elapsed_time / 1 * 1000 190 | fps = 1 / elapsed_time 191 | #print(1) 192 | #print('Elapsed Time: [%.2f s / %d iter]' % (elapsed_time, iteration)) 193 | print('Speed Time: %.2f ms / iter FPS: %.2f' % (speed_time, fps)) 194 | time.sleep(0.005) 195 | return speed_time, fps 196 | 197 | -------------------------------------------------------------------------------- /configs/sgnet_aspp_nyud_fps.json: -------------------------------------------------------------------------------- 1 | { 2 | "exp_name": "sgnet_depth_nyud_test", 3 | "agent": "SGNetAgent", 4 | "dataset": "NYUD", 5 | "network": "SGNet_ASPP", 6 | "spatial_information": "depth", 7 | "os": 16, 8 | "mode": "measure_speed", 9 | "cuda": true, 10 | "gpu": "1", 11 | "seed": 123, 12 | "num_classes": 40, 13 | "val_list_path": "./dataset/list/nyud/test_nyud.txt", 14 | "trained_model_path": "./pretrained_weights/SGNet_ASPP.pth.tar", 15 | "snapshot_dir": "./snapshots" 16 | } -------------------------------------------------------------------------------- /configs/sgnet_aspp_nyud_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "exp_name": "sgnet_depth_nyud_test", 3 | "agent": "SGNetAgent", 4 | "dataset": "NYUD", 5 | "network": "SGNet_ASPP", 6 | "spatial_information": "depth", 7 | "os": 16, 8 | "mode": "test", 9 | "ms": 0, 10 | "cuda": true, 11 | "gpu": "0", 12 | "seed": 123, 13 | "num_classes": 40, 14 | "val_list_path": "./dataset/list/nyud/test_nyud.txt", 15 | "trained_model_path": "./pretrained_weights/SGNet_ASPP.pth.tar", 16 | "snapshot_dir": "./snapshots", 17 | "output_predict_dir": "./output" 18 | } -------------------------------------------------------------------------------- /configs/sgnet_nyud_fps.json: -------------------------------------------------------------------------------- 1 | { 2 | "exp_name": "sgnet_depth_nyud_test", 3 | "agent": "SGNetAgent", 4 | "dataset": "NYUD", 5 | "network": "SGNet", 6 | "spatial_information": "depth", 7 | "os": 16, 8 | "mode": "measure_speed", 9 | "cuda": true, 10 | "gpu": "1", 11 | "seed": 123, 12 | "num_classes": 40, 13 | "val_list_path": "./dataset/list/nyud/test_nyud.txt", 14 | "trained_model_path": "./pretrained_weights/SGNet.pth.tar", 15 | "snapshot_dir": "./snapshots" 16 | } -------------------------------------------------------------------------------- /configs/sgnet_nyud_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "exp_name": "sgnet_depth_nyud_test", 3 | "agent": "SGNetAgent", 4 | "dataset": "NYUD", 5 | "network": "SGNet", 6 | "spatial_information": "depth", 7 | "os": 16, 8 | "mode": "test", 9 | "ms": 0, 10 | "cuda": true, 11 | "gpu": "0", 12 | "seed": 123, 13 | "num_classes": 40, 14 | "val_list_path": "./dataset/list/nyud/test_nyud.txt", 15 | "trained_model_path": "./pretrained_weights/SGNet.pth.tar", 16 | "snapshot_dir": "./snapshots", 17 | "output_predict_dir": "./output" 18 | } -------------------------------------------------------------------------------- /configs/sgnet_res50_nyud_fps.json: -------------------------------------------------------------------------------- 1 | { 2 | "exp_name": "sgnet_depth_nyud_test", 3 | "agent": "SGNetAgent", 4 | "dataset": "NYUD", 5 | "network": "SGNet_Res50", 6 | "spatial_information": "depth", 7 | "os": 16, 8 | "mode": "measure_speed", 9 | "cuda": true, 10 | "gpu": "1", 11 | "seed": 123, 12 | "num_classes": 40, 13 | "val_list_path": "./dataset/list/nyud/test_nyud.txt", 14 | "trained_model_path": "./pretrained_weights/SGNet.pth.tar", 15 | "snapshot_dir": "./snapshots" 16 | } -------------------------------------------------------------------------------- /configs/sgnet_res50_nyud_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "exp_name": "sgnet_depth_nyud_test", 3 | "agent": "SGNetAgent", 4 | "dataset": "NYUD", 5 | "network": "SGNet_Res50", 6 | "spatial_information": "depth", 7 | "os": 16, 8 | "mode": "test", 9 | "ms": 0, 10 | "cuda": true, 11 | "gpu": "1", 12 | "seed": 123, 13 | "num_classes": 40, 14 | "val_list_path": "./dataset/list/nyud/test_nyud.txt", 15 | "trained_model_path": "./pretrained_weights/SGNet(Res50).pth.tar", 16 | "snapshot_dir": "./snapshots", 17 | "output_predict_dir": "./output" 18 | } -------------------------------------------------------------------------------- /data/nyudv2.py: -------------------------------------------------------------------------------- 1 | from torch.utils import data 2 | from torchvision import transforms 3 | 4 | from data.transform.rgbd_transform import * 5 | 6 | def make_dataset_fromlst(listfilename): 7 | """ 8 | NYUlist format: image_path label_path depth_path HHA_path 9 | Args: 10 | listfilename: file path of list 11 | """ 12 | images = [] 13 | segs = [] 14 | depths = [] 15 | HHAs = [] 16 | 17 | with open(listfilename) as f: 18 | content = f.readlines() 19 | for x in content: 20 | imgname, segname, depthname, HHAname = x.strip().split(' ') 21 | images += [imgname] 22 | segs += [segname] 23 | depths += [depthname] 24 | HHAs += [HHAname] 25 | 26 | return {'images':images, 'segs':segs, 'HHAs':HHAs, 'depths':depths} 27 | 28 | 29 | class NYUDataset_val_full(data.Dataset): 30 | """ 31 | NYUDataset for evaluation with full size 32 | Init Args: 33 | list_path: file path of NYUlist 34 | """ 35 | def __init__(self, list_path): 36 | self.list_path = list_path 37 | self.paths_dict = make_dataset_fromlst(self.list_path) 38 | self.len = len(self.paths_dict['images']) 39 | 40 | def __getitem__(self, index): 41 | # self.paths['images'][index] 42 | img = Image.open(self.paths_dict['images'][index]) # .astype(np.uint8) 43 | depth = Image.open(self.paths_dict['depths'][index]) 44 | HHA = Image.open(self.paths_dict['HHAs'][index]) 45 | seg = Image.open(self.paths_dict['segs'][index]) 46 | 47 | sample = {'image':img, 48 | 'depth':depth, 49 | 'seg': seg, 50 | 'HHA': HHA} 51 | 52 | sample = self.transform_val(sample) 53 | sample = self.totensor(sample) 54 | 55 | return sample 56 | 57 | def __len__(self): 58 | return self.len 59 | 60 | def name(self): 61 | return 'NYUDataset_val_full' 62 | 63 | def transform_val(self, sample): 64 | composed_transforms = transforms.Compose([ 65 | Normalize_PIL2numpy_depth2xyz()]) 66 | return composed_transforms(sample) 67 | 68 | def totensor(self, sample): 69 | composed_transforms = transforms.Compose([ 70 | ToTensor()]) 71 | return composed_transforms(sample) -------------------------------------------------------------------------------- /data/transform/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | path = os.path.dirname(os.path.abspath(__file__)) 5 | 6 | for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']: 7 | mod = __import__('.'.join([__name__, py]), fromlist=[py]) 8 | classes = [getattr(mod, x) for x in dir(mod) if isinstance(getattr(mod, x), type)] 9 | for cls in classes: 10 | setattr(sys.modules[__name__], cls.__name__, cls) -------------------------------------------------------------------------------- /data/transform/rgbd_transform.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import torch 3 | import random 4 | import numpy as np 5 | from PIL import Image, ImageOps, ImageFilter 6 | 7 | class Normalize_PIL2numpy_depth2xyz(object): 8 | """ 9 | Normalize a tensor image with mean and standard deviation,then 10 | convert depth to xyz in train process. 11 | """ 12 | 13 | def __init__(self): 14 | pass 15 | 16 | def __call__(self, sample): 17 | img = sample['image'] 18 | mask = sample['seg'] 19 | HHA = sample['HHA'] 20 | depth = sample['depth'] 21 | 22 | if 'scale_x' in sample.keys(): 23 | scale_x = sample['scale_x'] 24 | scale_y = sample['scale_y'] 25 | center_x = sample['center_x'] 26 | center_y = sample['center_y'] 27 | else: 28 | scale_x = 1. 29 | scale_y = 1. 30 | center_x = 0. 31 | center_y = 0. 32 | 33 | ## convert PIL to numpy 34 | img = np.array(img).astype(np.float32) 35 | mask = np.array(mask).astype(np.float32) 36 | depth = np.array(depth).astype(np.float32) 37 | depth = depth[np.newaxis, ...] 38 | HHA = np.array(HHA).astype(np.float32) 39 | 40 | ## convert depth to xyz 41 | _, h, w = depth.shape 42 | z = depth 43 | xx, yy = np.meshgrid(np.array(range(w)) + 1, np.array(range(h)) + 1) 44 | fx_rgb = 5.18857e+02 * scale_x 45 | fy_rgb = 5.19469e+02 * scale_y 46 | cx_rgb = w / 2.0 47 | cy_rgb = h / 2.0 48 | C = np.array([[fx_rgb, 0, cx_rgb], [0, fy_rgb, cy_rgb], [0, 0, 1]]) 49 | cc_rgb = C[0:2, 2] 50 | fc_rgb = np.diag(C[0:2, 0:2]) 51 | x = (np.multiply((xx - cc_rgb[0]), z) / fc_rgb[0]) 52 | y = (np.multiply((yy - cc_rgb[1]), z) / fc_rgb[1]) 53 | depth = np.concatenate([x, y, z], axis=0) 54 | 55 | ## zero center, change to BGR 56 | img = (img - np.asarray([122.675, 116.669, 104.008]))[:, :, ::-1] 57 | HHA = (HHA - np.asarray([122.675, 116.669, 104.008]))[:, :, ::-1] 58 | depth /= 1000.0 59 | 60 | return {'image': img, 61 | 'depth': depth, 62 | 'seg': mask, 63 | 'HHA': HHA} 64 | 65 | class ToTensor(object): 66 | """ 67 | Swap axis of image and convert ndarrays in sample to Tensors. 68 | """ 69 | # swap color axis 70 | # numpy image: H x W x C 71 | # torch image: C X H X W 72 | def __call__(self, sample): 73 | img = sample['image'] 74 | mask = sample['seg'] 75 | HHA = sample['HHA'] 76 | depth = sample['depth'] 77 | 78 | # Swap axis 79 | img = np.array(img).astype(np.float32).transpose((2, 0, 1)) 80 | ## convert 0-40 to 0-39 and 255 81 | mask = (np.array(mask).astype(np.uint8) - 1).astype(np.float32) 82 | HHA = np.array(HHA).astype(np.float32).transpose((2, 0, 1)) 83 | depth = np.array(depth).astype(np.float32) 84 | 85 | # Convert numpy to tensor 86 | img = torch.from_numpy(img).float() 87 | mask = torch.from_numpy(mask).float() 88 | HHA = torch.from_numpy(HHA).float() 89 | depth = torch.from_numpy(depth).float() 90 | 91 | 92 | return {'image': img, 93 | 'depth': depth, 94 | 'seg': mask, 95 | 'HHA': HHA} 96 | 97 | class ToTensor_SUN(object): 98 | """ 99 | Swap axis of image and convert ndarrays in sample to Tensors. 100 | """ 101 | # swap color axis 102 | # numpy image: H x W x C 103 | # torch image: C X H X W 104 | def __call__(self, sample): 105 | img = sample['image'] 106 | mask = sample['seg'] 107 | HHA = sample['HHA'] 108 | depth = sample['depth'] 109 | 110 | img = np.array(img).astype(np.float32).transpose((2, 0, 1)) 111 | # convert 0-40 to 0-39 and 255 112 | mask = (np.array(mask).astype(np.uint8)).astype(np.float32) 113 | HHA = np.array(HHA).astype(np.float32).transpose((2, 0, 1)) 114 | depth = np.array(depth).astype(np.float32) 115 | 116 | # convert numpy to tensor 117 | img = torch.from_numpy(img).float() 118 | mask = torch.from_numpy(mask).float() 119 | HHA = torch.from_numpy(HHA).float() 120 | depth = torch.from_numpy(depth).float() 121 | 122 | 123 | return {'image': img, 124 | 'depth': depth, 125 | 'seg': mask, 126 | 'HHA': HHA} 127 | 128 | class RandomHorizontalFlip(object): 129 | """ 130 | Random horizontal flip augment 131 | """ 132 | def __call__(self, sample): 133 | img = sample['image'] 134 | mask = sample['seg'] 135 | HHA = sample['HHA'] 136 | depth = sample['depth'] 137 | 138 | if random.random() < 0.5: 139 | img = img.transpose(Image.FLIP_LEFT_RIGHT) 140 | mask = mask.transpose(Image.FLIP_LEFT_RIGHT) 141 | depth = depth.transpose(Image.FLIP_LEFT_RIGHT) 142 | HHA = HHA.transpose(Image.FLIP_LEFT_RIGHT) 143 | 144 | return {'image': img, 145 | 'depth': depth, 146 | 'seg': mask, 147 | 'HHA': HHA} 148 | 149 | class RandomGaussianBlur(object): 150 | """ 151 | Random gaussian blur 152 | """ 153 | def __call__(self, sample): 154 | img = sample['image'] 155 | mask = sample['seg'] 156 | HHA = sample['HHA'] 157 | depth = sample['depth'] 158 | if random.random() < 0.5: 159 | img = img.filter(ImageFilter.GaussianBlur( 160 | radius=random.random())) 161 | 162 | return {'image': img, 163 | 'depth': depth, 164 | 'seg': mask, 165 | 'HHA': HHA} 166 | 167 | class RandomScaleCrop(object): 168 | """ 169 | Random scale crop data augmentation 170 | """ 171 | def __init__(self, base_size, crop_size, fill=0): 172 | self.base_size = base_size 173 | self.crop_size_h = crop_size[0] 174 | self.crop_size_w = crop_size[1] 175 | self.fill = fill 176 | 177 | def __call__(self, sample): 178 | img = sample['image'] 179 | mask = sample['seg'] 180 | HHA = sample['HHA'] 181 | depth = sample['depth'] 182 | 183 | short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.25)) 184 | w, h = img.size 185 | if h > w: 186 | ow = short_size 187 | oh = int(1.0 * h * ow / w) 188 | else: 189 | oh = short_size 190 | ow = int(1.0 * w * oh / h) 191 | 192 | scale = ow / w 193 | img = img.resize((ow, oh), Image.BILINEAR) 194 | mask = mask.resize((ow, oh), Image.NEAREST) 195 | HHA = HHA.resize((ow, oh), Image.BILINEAR) 196 | depth = depth.resize((ow, oh), Image.BILINEAR) 197 | # pad crop 198 | if short_size < self.crop_size_h or ow < self.crop_size_w: 199 | padh = self.crop_size_h - oh if oh < self.crop_size_h else 0 200 | padw = self.crop_size_w - ow if ow < self.crop_size_w else 0 201 | img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0) 202 | HHA = ImageOps.expand(HHA, border=(0, 0, padw, padh), fill=0) 203 | mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0) 204 | depth = ImageOps.expand(depth, border=(0, 0, padw, padh), fill=0) 205 | # random crop crop_size 206 | w, h = img.size 207 | x1 = random.randint(0, w - self.crop_size_w) 208 | y1 = random.randint(0, h - self.crop_size_h) 209 | img = img.crop((x1, y1, x1 + self.crop_size_w, y1 + self.crop_size_h)) 210 | mask = mask.crop((x1, y1, x1 + self.crop_size_w, y1 + self.crop_size_h)) 211 | HHA = HHA.crop((x1, y1, x1 + self.crop_size_w, y1 + self.crop_size_h)) 212 | depth = depth.crop((x1, y1, x1 + self.crop_size_w, y1 + self.crop_size_h)) 213 | center_x = x1 214 | center_y = y1 215 | 216 | return { 217 | 'image': img, 218 | 'depth': depth, 219 | 'seg': mask, 220 | 'HHA': HHA, 221 | 'scale_x': scale, 222 | 'scale_y': scale, 223 | 'center_x': center_x, 224 | 'center_y': center_y 225 | } 226 | 227 | class FixScaleCrop(object): 228 | """ 229 | Fix scale crop data augmentation 230 | """ 231 | 232 | def __init__(self, crop_size): 233 | self.crop_size = crop_size 234 | 235 | def __call__(self, sample): 236 | img = sample['image'] 237 | mask = sample['seg'] 238 | HHA = sample['HHA'] 239 | depth = sample['depth'] 240 | 241 | w, h = img.size 242 | if w > h: 243 | oh = self.crop_size 244 | ow = int(1.0 * w * oh / h) 245 | else: 246 | ow = self.crop_size 247 | oh = int(1.0 * h * ow / w) 248 | img = img.resize((ow, oh), Image.BILINEAR) 249 | mask = mask.resize((ow, oh), Image.NEAREST) 250 | HHA = HHA.resize((ow, oh), Image.BILINEAR) 251 | depth = depth.resize((ow, oh), Image.BILINEAR) 252 | # center crop 253 | w, h = img.size 254 | x1 = int(round((w - self.crop_size) / 2.)) 255 | y1 = int(round((h - self.crop_size) / 2.)) 256 | 257 | img = img.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size)) 258 | mask = mask.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size)) 259 | HHA = HHA.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size)) 260 | depth = depth.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size)) 261 | 262 | return {'image': img, 263 | 'depth': depth, 264 | 'seg': mask, 265 | 'HHA': HHA} 266 | 267 | class FixedResize(object): 268 | """ 269 | Resize data augmentation 270 | """ 271 | 272 | def __init__(self, size): 273 | self.size_h = size[0] 274 | self.size_w = size[1] 275 | self.size = (self.size_w, self.size_h) 276 | 277 | def __call__(self, sample): 278 | img = sample['image'] 279 | mask = sample['seg'] 280 | HHA = sample['HHA'] 281 | depth = sample['depth'] 282 | 283 | assert img.size == mask.size 284 | 285 | img = img.resize(self.size, Image.BILINEAR) 286 | mask = mask.resize(self.size, Image.NEAREST) 287 | HHA = HHA.resize(self.size, Image.BILINEAR) 288 | depth = depth.resize(self.size, Image.BILINEAR) 289 | 290 | return {'image': img, 291 | 'depth': depth, 292 | 'seg': mask, 293 | 'HHA': HHA} 294 | 295 | 296 | class FixedResize_image(object): 297 | """Resize data augmentation (only for image and depth map) 298 | Init Args: 299 | size: new size of image 300 | """ 301 | 302 | def __init__(self, size): 303 | self.size_h = size[0] 304 | self.size_w = size[1] 305 | self.size = (self.size_w, self.size_h) 306 | 307 | def __call__(self, sample): 308 | img = sample['image'] 309 | mask = sample['seg'] 310 | HHA = sample['HHA'] 311 | depth = sample['depth'] 312 | 313 | img = img.resize(self.size, Image.BILINEAR) 314 | HHA = HHA.resize(self.size, Image.BILINEAR) 315 | depth = depth.resize(self.size, Image.BILINEAR) 316 | 317 | return {'image': img, 318 | 'depth': depth, 319 | 'seg': mask, 320 | 'HHA': HHA} 321 | 322 | 323 | class CenterCrop(object): 324 | """center crop augmentation 325 | Init Args: 326 | size: crop size 327 | """ 328 | 329 | def __init__(self, size): 330 | self.size = size 331 | 332 | def __call__(self, sample): 333 | img = sample['image'] 334 | mask = sample['seg'] 335 | HHA = sample['HHA'] 336 | depth = sample['depth'] 337 | 338 | w, h = img.size 339 | th, tw = self.size 340 | 341 | x = int(round((w - tw) / 2.)) 342 | y = int(round((h - th) / 2.)) 343 | 344 | img = img.crop((x, y, x + tw, y + th)) 345 | mask = mask.crop((x, y, x + tw, y + th)) 346 | HHA = HHA.crop((x, y, x + tw, y + th)) 347 | depth = depth.crop((x, y, x + tw, y + th)) 348 | 349 | return {'image': img, 350 | 'depth': depth, 351 | 'seg': mask, 352 | 'HHA': HHA} 353 | 354 | class CenterCrop_image(object): 355 | """center crop augmentation 356 | Init Args: 357 | size: crop size 358 | """ 359 | 360 | def __init__(self, size): 361 | self.size = size 362 | 363 | def __call__(self, sample): 364 | img = sample['image'] 365 | mask = sample['seg'] 366 | HHA = sample['HHA'] 367 | depth = sample['depth'] 368 | 369 | w, h = img.size 370 | th, tw = self.size 371 | 372 | x = int(round((w - tw) / 2.)) 373 | y = int(round((h - th) / 2.)) 374 | 375 | img = img.crop((x, y, x + tw, y + th)) 376 | HHA = HHA.crop((x, y, x + tw, y + th)) 377 | depth = depth.crop((x, y, x + tw, y + th)) 378 | 379 | return {'image': img, 380 | 'depth': depth, 381 | 'seg': mask, 382 | 'HHA': HHA} -------------------------------------------------------------------------------- /graphs/models/SGNet/SGNet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import functools 3 | import torch 4 | 5 | from graphs.ops.modules.s_conv import SConv 6 | from graphs.ops.libs import InPlaceABNSync 7 | 8 | affine_par = True 9 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') 10 | 11 | def conv3x3(in_planes, out_planes, stride=1): 12 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 13 | padding=1, bias=False) 14 | 15 | class Bottleneck(nn.Module): 16 | expansion = 4 17 | 18 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1, 19 | deformable=False): 20 | super(Bottleneck, self).__init__() 21 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 22 | self.bn1 = BatchNorm2d(planes) 23 | if deformable == False: 24 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 25 | padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False) 26 | else: 27 | self.conv2 = SConv(planes, planes, kernel_size=3, stride=stride, 28 | padding=1, deformable_groups=1, no_bias=True) 29 | self.bn2 = BatchNorm2d(planes) 30 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 31 | self.bn3 = BatchNorm2d(planes * 4) 32 | self.relu = nn.ReLU(inplace=False) 33 | self.relu_inplace = nn.ReLU(inplace=True) 34 | self.downsample = downsample 35 | self.dilation = dilation 36 | self.stride = stride 37 | self.deformable = deformable 38 | 39 | def forward(self, input): 40 | x, S = input 41 | residual = x 42 | 43 | out = self.conv1(x) 44 | out = self.bn1(out) 45 | out = self.relu(out) 46 | if self.deformable == False: 47 | out = self.conv2(out) 48 | else: 49 | out = self.conv2(out, S) 50 | out = self.bn2(out) 51 | out = self.relu(out) 52 | 53 | out = self.conv3(out) 54 | out = self.bn3(out) 55 | 56 | if self.downsample is not None: 57 | residual = self.downsample(x) 58 | 59 | out = out + residual 60 | out = self.relu_inplace(out) 61 | 62 | return [out, S] 63 | 64 | class ResNet(nn.Module): 65 | def __init__(self, block, layers, num_classes, deformable=True): 66 | self.inplanes = 128 67 | super(ResNet, self).__init__() 68 | self.conv1 = conv3x3(3, 64, stride=2) 69 | self.bn1 = BatchNorm2d(64) 70 | self.relu1 = nn.ReLU(inplace=False) 71 | self.conv2 = conv3x3(64, 64) 72 | self.bn2 = BatchNorm2d(64) 73 | self.relu2 = nn.ReLU(inplace=False) 74 | self.conv3 = conv3x3(64, 128) 75 | self.bn3 = BatchNorm2d(128) 76 | self.relu3 = nn.ReLU(inplace=False) 77 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 78 | self.relu = nn.ReLU(inplace=False) 79 | 80 | self.layer1 = self._make_layer(block, 64, layers[0], deformable=deformable, seg=True) 81 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, deformable=deformable, seg=True) 82 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=2, deformable=deformable, seg=True) 83 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1, 1, 1), 84 | deformable=deformable, seg=True) 85 | 86 | self.dsn3 = nn.Sequential( 87 | nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1), 88 | InPlaceABNSync(512), 89 | nn.Dropout2d(0.1), 90 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True), 91 | ) 92 | 93 | self.dsn4 = nn.Sequential( 94 | nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1), 95 | InPlaceABNSync(512), 96 | nn.Dropout2d(0.1), 97 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True), 98 | ) 99 | 100 | def make_up_conv_layer(self, block, in_channels, out_channels, batch_size): 101 | return block(in_channels, out_channels, batch_size) 102 | 103 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1, deformable=False, seg=False): 104 | downsample = None 105 | if stride != 1 or self.inplanes != planes * block.expansion: 106 | downsample = nn.Sequential( 107 | nn.Conv2d(self.inplanes, planes * block.expansion, 108 | kernel_size=1, stride=stride, bias=False), 109 | BatchNorm2d(planes * block.expansion, affine=affine_par)) 110 | 111 | layers = [] 112 | generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1 113 | layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample, 114 | multi_grid=generate_multi_grid(0, multi_grid), deformable=deformable)) 115 | self.inplanes = planes * block.expansion 116 | for i in range(1, blocks): 117 | if seg == False: 118 | layers.append(block(self.inplanes, planes, dilation=dilation, 119 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable)) 120 | else: 121 | if i >= blocks-2: 122 | layers.append(block(self.inplanes, planes, dilation=dilation, 123 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable)) 124 | else: 125 | layers.append(block(self.inplanes, planes, dilation=dilation, 126 | multi_grid=generate_multi_grid(i, multi_grid), deformable=False)) 127 | 128 | return nn.Sequential(*layers) 129 | def forward(self, x, depth): 130 | S = depth 131 | x = self.relu1(self.bn1(self.conv1(x))) 132 | x = self.relu2(self.bn2(self.conv2(x))) 133 | x = self.relu3(self.bn3(self.conv3(x))) 134 | 135 | x = self.maxpool(x) 136 | 137 | x = [x, S] 138 | 139 | x = self.layer3(self.layer2(self.layer1(x))) 140 | x3 = self.dsn3(x[0]) 141 | 142 | x = self.layer4(x) 143 | x4 = self.dsn4(x[0]) 144 | 145 | return [x4, x3] 146 | 147 | def load_pretrain(self, pretrain_model_path): 148 | """Load pretrained Network""" 149 | saved_state_dict = torch.load(pretrain_model_path) 150 | new_params = self.state_dict().copy() 151 | for i in saved_state_dict: 152 | i_parts = i.split('.') 153 | if not i_parts[0] == 'fc': 154 | new_params['.'.join(i_parts[0:])] = saved_state_dict[i] 155 | 156 | self.load_state_dict(new_params) 157 | 158 | def SGNet(num_classes=21): 159 | model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes) 160 | return model -------------------------------------------------------------------------------- /graphs/models/SGNet/SGNet_ASPP.py: -------------------------------------------------------------------------------- 1 | """ 2 | SCNet implementation 3 | """ 4 | import torch 5 | import torch.nn as nn 6 | import functools 7 | from torch.nn import functional as F 8 | 9 | from graphs.ops.modules.s_conv import SConv 10 | from graphs.ops.libs import InPlaceABNSync 11 | 12 | affine_par = True 13 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') 14 | 15 | def conv3x3(in_planes, out_planes, stride=1): 16 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 17 | padding=1, bias=False) 18 | 19 | class Bottleneck(nn.Module): 20 | expansion = 4 21 | 22 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1, 23 | deformable=False): 24 | super(Bottleneck, self).__init__() 25 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 26 | self.bn1 = BatchNorm2d(planes) 27 | if deformable == False: 28 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 29 | padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False) 30 | else: 31 | self.conv2 = SConv(planes, planes, kernel_size=3, stride=stride, 32 | padding=1, deformable_groups=1, no_bias=True) 33 | self.bn2 = BatchNorm2d(planes) 34 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 35 | self.bn3 = BatchNorm2d(planes * 4) 36 | self.relu = nn.ReLU(inplace=False) 37 | self.relu_inplace = nn.ReLU(inplace=True) 38 | self.downsample = downsample 39 | self.dilation = dilation 40 | self.stride = stride 41 | self.deformable = deformable 42 | 43 | def forward(self, input): 44 | x, S = input 45 | residual = x 46 | 47 | out = self.conv1(x) 48 | out = self.bn1(out) 49 | out = self.relu(out) 50 | if self.deformable == False: 51 | out = self.conv2(out) 52 | else: 53 | out = self.conv2(out, S) 54 | out = self.bn2(out) 55 | out = self.relu(out) 56 | 57 | out = self.conv3(out) 58 | out = self.bn3(out) 59 | 60 | if self.downsample is not None: 61 | residual = self.downsample(x) 62 | 63 | out = out + residual 64 | out = self.relu_inplace(out) 65 | 66 | return [out, S] 67 | 68 | 69 | class ASPPModule(nn.Module): 70 | """ 71 | Reference: 72 | Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."* 73 | """ 74 | 75 | def __init__(self, features, inner_features=256, out_features=512, dilations=(12, 24, 36)): 76 | super(ASPPModule, self).__init__() 77 | 78 | self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), 79 | nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, 80 | bias=False), 81 | InPlaceABNSync(inner_features)) 82 | self.conv2 = nn.Sequential( 83 | nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False), 84 | InPlaceABNSync(inner_features)) 85 | self.conv3 = nn.Sequential( 86 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False), 87 | InPlaceABNSync(inner_features)) 88 | self.conv4 = nn.Sequential( 89 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False), 90 | InPlaceABNSync(inner_features)) 91 | self.conv5 = nn.Sequential( 92 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False), 93 | InPlaceABNSync(inner_features)) 94 | 95 | self.bottleneck = nn.Sequential( 96 | nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False), 97 | InPlaceABNSync(out_features), 98 | nn.Dropout2d(0.1) 99 | ) 100 | 101 | def forward(self, x): 102 | _, _, h, w = x.size() 103 | 104 | feat1 = F.upsample(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True) 105 | 106 | feat2 = self.conv2(x) 107 | feat3 = self.conv3(x) 108 | feat4 = self.conv4(x) 109 | feat5 = self.conv5(x) 110 | out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1) 111 | 112 | bottle = self.bottleneck(out) 113 | return bottle 114 | 115 | class ResNet(nn.Module): 116 | def __init__(self, block, layers, num_classes, deformable=False): 117 | self.inplanes = 128 118 | super(ResNet, self).__init__() 119 | self.conv1 = conv3x3(3, 64, stride=2) 120 | self.bn1 = BatchNorm2d(64) 121 | self.relu1 = nn.ReLU(inplace=False) 122 | self.conv2 = conv3x3(64, 64) 123 | self.bn2 = BatchNorm2d(64) 124 | self.relu2 = nn.ReLU(inplace=False) 125 | self.conv3 = conv3x3(64, 128) 126 | self.bn3 = BatchNorm2d(128) 127 | self.relu3 = nn.ReLU(inplace=False) 128 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 129 | self.relu = nn.ReLU(inplace=False) 130 | 131 | self.layer1 = self._make_layer(block, 64, layers[0], deformable=deformable, seg=True) 132 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, deformable=deformable, seg=True) 133 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=2, deformable=deformable, seg=True) 134 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1, 1, 1), 135 | deformable=deformable, seg=True) 136 | 137 | self.head = nn.Sequential(ASPPModule(2048), 138 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True)) 139 | 140 | self.dsn3 = nn.Sequential( 141 | nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1), 142 | InPlaceABNSync(512), 143 | nn.Dropout2d(0.1), 144 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True), 145 | ) 146 | 147 | def make_up_conv_layer(self, block, in_channels, out_channels, batch_size): 148 | return block(in_channels, out_channels, batch_size) 149 | 150 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1, deformable=False, seg=False): 151 | downsample = None 152 | if stride != 1 or self.inplanes != planes * block.expansion: 153 | downsample = nn.Sequential( 154 | nn.Conv2d(self.inplanes, planes * block.expansion, 155 | kernel_size=1, stride=stride, bias=False), 156 | BatchNorm2d(planes * block.expansion, affine=affine_par)) 157 | 158 | layers = [] 159 | generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1 160 | layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample, 161 | multi_grid=generate_multi_grid(0, multi_grid), deformable=deformable)) 162 | self.inplanes = planes * block.expansion 163 | for i in range(1, blocks): 164 | if seg == False: 165 | layers.append(block(self.inplanes, planes, dilation=dilation, 166 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable)) 167 | else: 168 | if i >= blocks-2: 169 | layers.append(block(self.inplanes, planes, dilation=dilation, 170 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable)) 171 | else: 172 | layers.append(block(self.inplanes, planes, dilation=dilation, 173 | multi_grid=generate_multi_grid(i, multi_grid), deformable=False)) 174 | 175 | return nn.Sequential(*layers) 176 | def forward(self, x, depth): 177 | S = depth 178 | x = self.relu1(self.bn1(self.conv1(x))) 179 | x = self.relu2(self.bn2(self.conv2(x))) 180 | x = self.relu3(self.bn3(self.conv3(x))) 181 | 182 | x = self.maxpool(x) 183 | 184 | x = [x, S] 185 | 186 | x = self.layer3(self.layer2(self.layer1(x))) 187 | x3 = self.dsn3(x[0]) 188 | 189 | x = self.layer4(x) 190 | x4 = self.head(x[0]) 191 | 192 | return [x4, x3] 193 | 194 | def load_pretrain(self, pretrain_model_path): 195 | """Load pretrained Network""" 196 | saved_state_dict = torch.load(pretrain_model_path) 197 | new_params = self.state_dict().copy() 198 | for i in saved_state_dict: 199 | i_parts = i.split('.') 200 | if not i_parts[0] == 'fc': 201 | new_params['.'.join(i_parts[0:])] = saved_state_dict[i] 202 | self.load_state_dict(new_params) 203 | 204 | def SGNet(num_classes=21): 205 | model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes, deformable=True) 206 | return model -------------------------------------------------------------------------------- /graphs/models/SGNet/SGNet_ASPP_fps.py: -------------------------------------------------------------------------------- 1 | """ 2 | SCNet implementation 3 | """ 4 | import torch 5 | import torch.nn as nn 6 | import functools 7 | from torch.nn import functional as F 8 | 9 | from graphs.ops.modules.s_conv import SConv 10 | from graphs.ops.libs import InPlaceABNSync 11 | 12 | affine_par = True 13 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') 14 | 15 | def conv3x3(in_planes, out_planes, stride=1): 16 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 17 | padding=1, bias=False) 18 | 19 | class Bottleneck(nn.Module): 20 | expansion = 4 21 | 22 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1, 23 | deformable=False): 24 | super(Bottleneck, self).__init__() 25 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 26 | self.bn1 = BatchNorm2d(planes) 27 | if deformable == False: 28 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 29 | padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False) 30 | else: 31 | self.conv2 = SConv(planes, planes, kernel_size=3, stride=stride, 32 | padding=1, deformable_groups=1, no_bias=True) 33 | self.bn2 = BatchNorm2d(planes) 34 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 35 | self.bn3 = BatchNorm2d(planes * 4) 36 | self.relu = nn.ReLU(inplace=False) 37 | self.relu_inplace = nn.ReLU(inplace=True) 38 | self.downsample = downsample 39 | self.dilation = dilation 40 | self.stride = stride 41 | self.deformable = deformable 42 | 43 | def forward(self, input): 44 | x, S = input 45 | residual = x 46 | 47 | out = self.conv1(x) 48 | out = self.bn1(out) 49 | out = self.relu(out) 50 | if self.deformable == False: 51 | out = self.conv2(out) 52 | else: 53 | out = self.conv2(out, S) 54 | out = self.bn2(out) 55 | out = self.relu(out) 56 | 57 | out = self.conv3(out) 58 | out = self.bn3(out) 59 | 60 | if self.downsample is not None: 61 | residual = self.downsample(x) 62 | 63 | out = out + residual 64 | out = self.relu_inplace(out) 65 | 66 | return [out, S] 67 | 68 | 69 | class ASPPModule(nn.Module): 70 | """ 71 | Reference: 72 | Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."* 73 | """ 74 | 75 | def __init__(self, features, inner_features=256, out_features=512, dilations=(12, 24, 36)): 76 | super(ASPPModule, self).__init__() 77 | 78 | self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), 79 | nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, 80 | bias=False), 81 | InPlaceABNSync(inner_features)) 82 | self.conv2 = nn.Sequential( 83 | nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False), 84 | InPlaceABNSync(inner_features)) 85 | self.conv3 = nn.Sequential( 86 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False), 87 | InPlaceABNSync(inner_features)) 88 | self.conv4 = nn.Sequential( 89 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False), 90 | InPlaceABNSync(inner_features)) 91 | self.conv5 = nn.Sequential( 92 | nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False), 93 | InPlaceABNSync(inner_features)) 94 | 95 | self.bottleneck = nn.Sequential( 96 | nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False), 97 | InPlaceABNSync(out_features), 98 | nn.Dropout2d(0.1) 99 | ) 100 | 101 | def forward(self, x): 102 | _, _, h, w = x.size() 103 | 104 | feat1 = F.upsample(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True) 105 | 106 | feat2 = self.conv2(x) 107 | feat3 = self.conv3(x) 108 | feat4 = self.conv4(x) 109 | feat5 = self.conv5(x) 110 | out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1) 111 | 112 | bottle = self.bottleneck(out) 113 | return bottle 114 | 115 | class ResNet(nn.Module): 116 | def __init__(self, block, layers, num_classes, deformable=False): 117 | self.inplanes = 128 118 | super(ResNet, self).__init__() 119 | self.conv1 = conv3x3(3, 64, stride=2) 120 | self.bn1 = BatchNorm2d(64) 121 | self.relu1 = nn.ReLU(inplace=False) 122 | self.conv2 = conv3x3(64, 64) 123 | self.bn2 = BatchNorm2d(64) 124 | self.relu2 = nn.ReLU(inplace=False) 125 | self.conv3 = conv3x3(64, 128) 126 | self.bn3 = BatchNorm2d(128) 127 | self.relu3 = nn.ReLU(inplace=False) 128 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 129 | self.relu = nn.ReLU(inplace=False) 130 | 131 | self.layer1 = self._make_layer(block, 64, layers[0], deformable=deformable, seg=True) 132 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, deformable=deformable, seg=True) 133 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=2, deformable=deformable, seg=True) 134 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1, 1, 1), 135 | deformable=deformable, seg=True) 136 | 137 | self.head = nn.Sequential(ASPPModule(2048), 138 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True)) 139 | 140 | def make_up_conv_layer(self, block, in_channels, out_channels, batch_size): 141 | return block(in_channels, out_channels, batch_size) 142 | 143 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1, deformable=False, seg=False): 144 | downsample = None 145 | if stride != 1 or self.inplanes != planes * block.expansion: 146 | downsample = nn.Sequential( 147 | nn.Conv2d(self.inplanes, planes * block.expansion, 148 | kernel_size=1, stride=stride, bias=False), 149 | BatchNorm2d(planes * block.expansion, affine=affine_par)) 150 | 151 | layers = [] 152 | generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1 153 | layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample, 154 | multi_grid=generate_multi_grid(0, multi_grid), deformable=deformable)) 155 | self.inplanes = planes * block.expansion 156 | for i in range(1, blocks): 157 | if seg == False: 158 | layers.append(block(self.inplanes, planes, dilation=dilation, 159 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable)) 160 | else: 161 | if i >= blocks-2: 162 | layers.append(block(self.inplanes, planes, dilation=dilation, 163 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable)) 164 | else: 165 | layers.append(block(self.inplanes, planes, dilation=dilation, 166 | multi_grid=generate_multi_grid(i, multi_grid), deformable=False)) 167 | 168 | return nn.Sequential(*layers) 169 | def forward(self, x, depth): 170 | S = depth 171 | x = self.relu1(self.bn1(self.conv1(x))) 172 | x = self.relu2(self.bn2(self.conv2(x))) 173 | x = self.relu3(self.bn3(self.conv3(x))) 174 | 175 | x = self.maxpool(x) 176 | 177 | x = [x, S] 178 | 179 | x = self.layer3(self.layer2(self.layer1(x))) 180 | 181 | x = self.layer4(x) 182 | x4 = self.head(x[0]) 183 | 184 | return x4 185 | 186 | def load_pretrain(self, pretrain_model_path): 187 | """Load pretrained Network""" 188 | saved_state_dict = torch.load(pretrain_model_path) 189 | new_params = self.state_dict().copy() 190 | for i in saved_state_dict: 191 | i_parts = i.split('.') 192 | if not i_parts[0] == 'fc': 193 | new_params['.'.join(i_parts[0:])] = saved_state_dict[i] 194 | self.load_state_dict(new_params) 195 | 196 | def SGNet(num_classes=21): 197 | model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes, deformable=True) 198 | return model -------------------------------------------------------------------------------- /graphs/models/SGNet/SGNet_Res50.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import functools 3 | import torch 4 | 5 | from graphs.ops.modules.s_conv import SConv 6 | from graphs.ops.libs import InPlaceABNSync 7 | 8 | affine_par = True 9 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') 10 | 11 | def conv3x3(in_planes, out_planes, stride=1): 12 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 13 | padding=1, bias=False) 14 | 15 | class Bottleneck(nn.Module): 16 | expansion = 4 17 | 18 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1, 19 | deformable=False): 20 | super(Bottleneck, self).__init__() 21 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 22 | self.bn1 = BatchNorm2d(planes) 23 | if deformable == False: 24 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 25 | padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False) 26 | else: 27 | self.conv2 = SConv(planes, planes, kernel_size=3, stride=stride, 28 | padding=1, deformable_groups=1, no_bias=True) 29 | self.bn2 = BatchNorm2d(planes) 30 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 31 | self.bn3 = BatchNorm2d(planes * 4) 32 | self.relu = nn.ReLU(inplace=False) 33 | self.relu_inplace = nn.ReLU(inplace=True) 34 | self.downsample = downsample 35 | self.dilation = dilation 36 | self.stride = stride 37 | self.deformable = deformable 38 | 39 | def forward(self, input): 40 | x, S = input 41 | residual = x 42 | 43 | out = self.conv1(x) 44 | out = self.bn1(out) 45 | out = self.relu(out) 46 | if self.deformable == False: 47 | out = self.conv2(out) 48 | else: 49 | out = self.conv2(out, S) 50 | out = self.bn2(out) 51 | out = self.relu(out) 52 | 53 | out = self.conv3(out) 54 | out = self.bn3(out) 55 | 56 | if self.downsample is not None: 57 | residual = self.downsample(x) 58 | 59 | out = out + residual 60 | out = self.relu_inplace(out) 61 | 62 | return [out, S] 63 | 64 | class ResNet(nn.Module): 65 | def __init__(self, block, layers, num_classes, deformable=True): 66 | self.inplanes = 128 67 | super(ResNet, self).__init__() 68 | self.conv1 = conv3x3(3, 64, stride=2) 69 | self.bn1 = BatchNorm2d(64) 70 | self.relu1 = nn.ReLU(inplace=False) 71 | self.conv2 = conv3x3(64, 64) 72 | self.bn2 = BatchNorm2d(64) 73 | self.relu2 = nn.ReLU(inplace=False) 74 | self.conv3 = conv3x3(64, 128) 75 | self.bn3 = BatchNorm2d(128) 76 | self.relu3 = nn.ReLU(inplace=False) 77 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 78 | self.relu = nn.ReLU(inplace=False) 79 | 80 | self.layer1 = self._make_layer(block, 64, layers[0], deformable=deformable, seg=True) 81 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, deformable=deformable, seg=True) 82 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=2, deformable=deformable, seg=True) 83 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1, 1, 1), 84 | deformable=deformable, seg=True) 85 | 86 | self.dsn3 = nn.Sequential( 87 | nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1), 88 | InPlaceABNSync(512), 89 | nn.Dropout2d(0.1), 90 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True), 91 | ) 92 | 93 | self.dsn4 = nn.Sequential( 94 | nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1), 95 | InPlaceABNSync(512), 96 | nn.Dropout2d(0.1), 97 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True), 98 | ) 99 | 100 | def make_up_conv_layer(self, block, in_channels, out_channels, batch_size): 101 | return block(in_channels, out_channels, batch_size) 102 | 103 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1, deformable=False, seg=False): 104 | downsample = None 105 | if stride != 1 or self.inplanes != planes * block.expansion: 106 | downsample = nn.Sequential( 107 | nn.Conv2d(self.inplanes, planes * block.expansion, 108 | kernel_size=1, stride=stride, bias=False), 109 | BatchNorm2d(planes * block.expansion, affine=affine_par)) 110 | 111 | layers = [] 112 | generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1 113 | layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample, 114 | multi_grid=generate_multi_grid(0, multi_grid), deformable=deformable)) 115 | self.inplanes = planes * block.expansion 116 | for i in range(1, blocks): 117 | if seg == False: 118 | layers.append(block(self.inplanes, planes, dilation=dilation, 119 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable)) 120 | else: 121 | if i >= blocks-2: 122 | layers.append(block(self.inplanes, planes, dilation=dilation, 123 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable)) 124 | else: 125 | layers.append(block(self.inplanes, planes, dilation=dilation, 126 | multi_grid=generate_multi_grid(i, multi_grid), deformable=False)) 127 | 128 | return nn.Sequential(*layers) 129 | def forward(self, x, depth): 130 | S = depth 131 | x = self.relu1(self.bn1(self.conv1(x))) 132 | x = self.relu2(self.bn2(self.conv2(x))) 133 | x = self.relu3(self.bn3(self.conv3(x))) 134 | 135 | x = self.maxpool(x) 136 | 137 | x = [x, S] 138 | 139 | x = self.layer3(self.layer2(self.layer1(x))) 140 | x3 = self.dsn3(x[0]) 141 | 142 | x = self.layer4(x) 143 | x4 = self.dsn4(x[0]) 144 | 145 | return [x4, x3] 146 | 147 | def load_pretrain(self, pretrain_model_path): 148 | """Load pretrained Network""" 149 | saved_state_dict = torch.load(pretrain_model_path) 150 | new_params = self.state_dict().copy() 151 | for i in saved_state_dict: 152 | i_parts = i.split('.') 153 | if not i_parts[0] == 'fc': 154 | new_params['.'.join(i_parts[0:])] = saved_state_dict[i] 155 | 156 | self.load_state_dict(new_params) 157 | 158 | def SGNet(num_classes=21): 159 | model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes) 160 | return model -------------------------------------------------------------------------------- /graphs/models/SGNet/SGNet_Res50_fps.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import functools 3 | import torch 4 | 5 | from graphs.ops.modules.s_conv import SConv 6 | from graphs.ops.libs import InPlaceABNSync 7 | 8 | affine_par = True 9 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') 10 | 11 | def conv3x3(in_planes, out_planes, stride=1): 12 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 13 | padding=1, bias=False) 14 | 15 | class Bottleneck(nn.Module): 16 | expansion = 4 17 | 18 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1, 19 | deformable=False): 20 | super(Bottleneck, self).__init__() 21 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 22 | self.bn1 = BatchNorm2d(planes) 23 | if deformable == False: 24 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 25 | padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False) 26 | else: 27 | self.conv2 = SConv(planes, planes, kernel_size=3, stride=stride, 28 | padding=1, deformable_groups=1, no_bias=True) 29 | self.bn2 = BatchNorm2d(planes) 30 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 31 | self.bn3 = BatchNorm2d(planes * 4) 32 | self.relu = nn.ReLU(inplace=False) 33 | self.relu_inplace = nn.ReLU(inplace=True) 34 | self.downsample = downsample 35 | self.dilation = dilation 36 | self.stride = stride 37 | self.deformable = deformable 38 | 39 | def forward(self, input): 40 | x, S = input 41 | residual = x 42 | 43 | out = self.conv1(x) 44 | out = self.bn1(out) 45 | out = self.relu(out) 46 | if self.deformable == False: 47 | out = self.conv2(out) 48 | else: 49 | out = self.conv2(out, S) 50 | out = self.bn2(out) 51 | out = self.relu(out) 52 | 53 | out = self.conv3(out) 54 | out = self.bn3(out) 55 | 56 | if self.downsample is not None: 57 | residual = self.downsample(x) 58 | 59 | out = out + residual 60 | out = self.relu_inplace(out) 61 | 62 | return [out, S] 63 | 64 | class ResNet(nn.Module): 65 | def __init__(self, block, layers, num_classes, deformable=True): 66 | self.inplanes = 128 67 | super(ResNet, self).__init__() 68 | self.conv1 = conv3x3(3, 64, stride=2) 69 | self.bn1 = BatchNorm2d(64) 70 | self.relu1 = nn.ReLU(inplace=False) 71 | self.conv2 = conv3x3(64, 64) 72 | self.bn2 = BatchNorm2d(64) 73 | self.relu2 = nn.ReLU(inplace=False) 74 | self.conv3 = conv3x3(64, 128) 75 | self.bn3 = BatchNorm2d(128) 76 | self.relu3 = nn.ReLU(inplace=False) 77 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 78 | self.relu = nn.ReLU(inplace=False) 79 | 80 | self.layer1 = self._make_layer(block, 64, layers[0], deformable=deformable, seg=True) 81 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, deformable=deformable, seg=True) 82 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=2, deformable=deformable, seg=True) 83 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1, 1, 1), 84 | deformable=deformable, seg=True) 85 | 86 | 87 | self.dsn4 = nn.Sequential( 88 | nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1), 89 | InPlaceABNSync(512), 90 | nn.Dropout2d(0.1), 91 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True), 92 | ) 93 | 94 | def make_up_conv_layer(self, block, in_channels, out_channels, batch_size): 95 | return block(in_channels, out_channels, batch_size) 96 | 97 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1, deformable=False, seg=False): 98 | downsample = None 99 | if stride != 1 or self.inplanes != planes * block.expansion: 100 | downsample = nn.Sequential( 101 | nn.Conv2d(self.inplanes, planes * block.expansion, 102 | kernel_size=1, stride=stride, bias=False), 103 | BatchNorm2d(planes * block.expansion, affine=affine_par)) 104 | 105 | layers = [] 106 | generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1 107 | layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample, 108 | multi_grid=generate_multi_grid(0, multi_grid), deformable=deformable)) 109 | self.inplanes = planes * block.expansion 110 | for i in range(1, blocks): 111 | if seg == False: 112 | layers.append(block(self.inplanes, planes, dilation=dilation, 113 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable)) 114 | else: 115 | if i >= blocks-2: 116 | layers.append(block(self.inplanes, planes, dilation=dilation, 117 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable)) 118 | else: 119 | layers.append(block(self.inplanes, planes, dilation=dilation, 120 | multi_grid=generate_multi_grid(i, multi_grid), deformable=False)) 121 | 122 | return nn.Sequential(*layers) 123 | def forward(self, x, depth): 124 | S = depth 125 | x = self.relu1(self.bn1(self.conv1(x))) 126 | x = self.relu2(self.bn2(self.conv2(x))) 127 | x = self.relu3(self.bn3(self.conv3(x))) 128 | 129 | x = self.maxpool(x) 130 | 131 | x = [x, S] 132 | 133 | x = self.layer3(self.layer2(self.layer1(x))) 134 | 135 | x = self.layer4(x) 136 | x4 = self.dsn4(x[0]) 137 | 138 | return x4 139 | 140 | def load_pretrain(self, pretrain_model_path): 141 | """Load pretrained Network""" 142 | saved_state_dict = torch.load(pretrain_model_path) 143 | new_params = self.state_dict().copy() 144 | for i in saved_state_dict: 145 | i_parts = i.split('.') 146 | if not i_parts[0] == 'fc': 147 | new_params['.'.join(i_parts[0:])] = saved_state_dict[i] 148 | 149 | self.load_state_dict(new_params) 150 | 151 | def SGNet(num_classes=21): 152 | model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes) 153 | return model -------------------------------------------------------------------------------- /graphs/models/SGNet/SGNet_fps.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import functools 3 | import torch 4 | 5 | from graphs.ops.modules.s_conv import SConv 6 | from graphs.ops.libs import InPlaceABNSync 7 | 8 | affine_par = True 9 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') 10 | 11 | def conv3x3(in_planes, out_planes, stride=1): 12 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 13 | padding=1, bias=False) 14 | 15 | class Bottleneck(nn.Module): 16 | expansion = 4 17 | 18 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1, 19 | deformable=False): 20 | super(Bottleneck, self).__init__() 21 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 22 | self.bn1 = BatchNorm2d(planes) 23 | if deformable == False: 24 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 25 | padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False) 26 | else: 27 | self.conv2 = SConv(planes, planes, kernel_size=3, stride=stride, 28 | padding=1, deformable_groups=1, no_bias=True) 29 | self.bn2 = BatchNorm2d(planes) 30 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 31 | self.bn3 = BatchNorm2d(planes * 4) 32 | self.relu = nn.ReLU(inplace=False) 33 | self.relu_inplace = nn.ReLU(inplace=True) 34 | self.downsample = downsample 35 | self.dilation = dilation 36 | self.stride = stride 37 | self.deformable = deformable 38 | 39 | def forward(self, input): 40 | x, S = input 41 | residual = x 42 | 43 | out = self.conv1(x) 44 | out = self.bn1(out) 45 | out = self.relu(out) 46 | if self.deformable == False: 47 | out = self.conv2(out) 48 | else: 49 | out = self.conv2(out, S) 50 | out = self.bn2(out) 51 | out = self.relu(out) 52 | 53 | out = self.conv3(out) 54 | out = self.bn3(out) 55 | 56 | if self.downsample is not None: 57 | residual = self.downsample(x) 58 | 59 | out = out + residual 60 | out = self.relu_inplace(out) 61 | 62 | return [out, S] 63 | 64 | class ResNet(nn.Module): 65 | def __init__(self, block, layers, num_classes, deformable=True): 66 | self.inplanes = 128 67 | super(ResNet, self).__init__() 68 | self.conv1 = conv3x3(3, 64, stride=2) 69 | self.bn1 = BatchNorm2d(64) 70 | self.relu1 = nn.ReLU(inplace=False) 71 | self.conv2 = conv3x3(64, 64) 72 | self.bn2 = BatchNorm2d(64) 73 | self.relu2 = nn.ReLU(inplace=False) 74 | self.conv3 = conv3x3(64, 128) 75 | self.bn3 = BatchNorm2d(128) 76 | self.relu3 = nn.ReLU(inplace=False) 77 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 78 | self.relu = nn.ReLU(inplace=False) 79 | 80 | self.layer1 = self._make_layer(block, 64, layers[0], deformable=deformable, seg=True) 81 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, deformable=deformable, seg=True) 82 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilation=2, deformable=deformable, seg=True) 83 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4, multi_grid=(1, 1, 1), 84 | deformable=deformable, seg=True) 85 | 86 | self.dsn4 = nn.Sequential( 87 | nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1), 88 | InPlaceABNSync(512), 89 | nn.Dropout2d(0.1), 90 | nn.Conv2d(512, num_classes, kernel_size=1, stride=1, padding=0, bias=True), 91 | ) 92 | 93 | def make_up_conv_layer(self, block, in_channels, out_channels, batch_size): 94 | return block(in_channels, out_channels, batch_size) 95 | 96 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1, deformable=False, seg=False): 97 | downsample = None 98 | if stride != 1 or self.inplanes != planes * block.expansion: 99 | downsample = nn.Sequential( 100 | nn.Conv2d(self.inplanes, planes * block.expansion, 101 | kernel_size=1, stride=stride, bias=False), 102 | BatchNorm2d(planes * block.expansion, affine=affine_par)) 103 | 104 | layers = [] 105 | generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1 106 | layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample, 107 | multi_grid=generate_multi_grid(0, multi_grid), deformable=deformable)) 108 | self.inplanes = planes * block.expansion 109 | for i in range(1, blocks): 110 | if seg == False: 111 | layers.append(block(self.inplanes, planes, dilation=dilation, 112 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable)) 113 | else: 114 | if i >= blocks-2: 115 | layers.append(block(self.inplanes, planes, dilation=dilation, 116 | multi_grid=generate_multi_grid(i, multi_grid), deformable=deformable)) 117 | else: 118 | layers.append(block(self.inplanes, planes, dilation=dilation, 119 | multi_grid=generate_multi_grid(i, multi_grid), deformable=False)) 120 | 121 | return nn.Sequential(*layers) 122 | def forward(self, x, depth): 123 | S = depth 124 | x = self.relu1(self.bn1(self.conv1(x))) 125 | x = self.relu2(self.bn2(self.conv2(x))) 126 | x = self.relu3(self.bn3(self.conv3(x))) 127 | 128 | x = self.maxpool(x) 129 | 130 | x = [x, S] 131 | 132 | x = self.layer3(self.layer2(self.layer1(x))) 133 | 134 | x = self.layer4(x) 135 | x4 = self.dsn4(x[0]) 136 | 137 | return x4 138 | 139 | def load_pretrain(self, pretrain_model_path): 140 | """Load pretrained Network""" 141 | saved_state_dict = torch.load(pretrain_model_path) 142 | new_params = self.state_dict().copy() 143 | for i in saved_state_dict: 144 | i_parts = i.split('.') 145 | if not i_parts[0] == 'fc': 146 | new_params['.'.join(i_parts[0:])] = saved_state_dict[i] 147 | 148 | self.load_state_dict(new_params) 149 | 150 | def SGNet(num_classes=21): 151 | model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes) 152 | return model -------------------------------------------------------------------------------- /graphs/ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinZhuoChen/SGNet/02510182eb4baca77dd1d99237a5e77812055a0c/graphs/ops/__init__.py -------------------------------------------------------------------------------- /graphs/ops/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | this_file = os.path.dirname(__file__) 6 | 7 | sources = ['src/deform_conv.c'] 8 | headers = ['src/deform_conv.h'] 9 | defines = [] 10 | with_cuda = False 11 | 12 | if torch.cuda.is_available(): 13 | print('Including CUDA code.') 14 | sources += ['src/deform_conv_cuda.c'] 15 | headers += ['src/deform_conv_cuda.h'] 16 | defines += [('WITH_CUDA', None)] 17 | with_cuda = True 18 | 19 | this_file = os.path.dirname(os.path.realpath(__file__)) 20 | print(this_file) 21 | extra_objects = ['src/deform_conv_cuda_kernel.cu.so'] 22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 23 | 24 | ffi = create_extension( 25 | '_ext.deform_conv', 26 | headers=headers, 27 | sources=sources, 28 | define_macros=defines, 29 | relative_to=__file__, 30 | with_cuda=with_cuda, 31 | extra_objects=extra_objects, 32 | extra_compile_args=['-std=c++11'] 33 | ) 34 | 35 | assert torch.cuda.is_available(), 'Please install CUDA for GPU support.' 36 | ffi.build() 37 | 38 | -------------------------------------------------------------------------------- /graphs/ops/build_modulated.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/modulated_dcn.c'] 7 | headers = ['src/modulated_dcn.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | extra_objects = [] 12 | if torch.cuda.is_available(): 13 | print('Including CUDA code.') 14 | sources += ['src/modulated_dcn_cuda.c'] 15 | headers += ['src/modulated_dcn_cuda.h'] 16 | defines += [('WITH_CUDA', None)] 17 | extra_objects += ['src/cuda/modulated_deform_im2col_cuda.cu.so'] 18 | extra_objects += ['src/cuda/deform_psroi_pooling_cuda.cu.so'] 19 | with_cuda = True 20 | else: 21 | raise ValueError('CUDA is not available') 22 | 23 | extra_compile_args = ['-fopenmp', '-std=c99'] 24 | 25 | this_file = os.path.dirname(os.path.realpath(__file__)) 26 | print(this_file) 27 | sources = [os.path.join(this_file, fname) for fname in sources] 28 | headers = [os.path.join(this_file, fname) for fname in headers] 29 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 30 | 31 | ffi = create_extension( 32 | '_ext.modulated_dcn', 33 | headers=headers, 34 | sources=sources, 35 | define_macros=defines, 36 | relative_to=__file__, 37 | with_cuda=with_cuda, 38 | extra_objects=extra_objects, 39 | extra_compile_args=extra_compile_args 40 | ) 41 | 42 | if __name__ == '__main__': 43 | ffi.build() 44 | -------------------------------------------------------------------------------- /graphs/ops/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_conv import DeformConvFunction, deform_conv_function 2 | from .modulated_dcn_func import DeformRoIPoolingFunction, ModulatedDeformConvFunction 3 | # from .scale_conv import ScaleConvFunction, scale_conv_function -------------------------------------------------------------------------------- /graphs/ops/functions/deform_conv.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from torch.nn.modules.utils import _pair 4 | 5 | from graphs.ops._ext import deform_conv 6 | 7 | 8 | def deform_conv_function(input, 9 | offset, 10 | weight, 11 | stride=1, 12 | padding=0, 13 | dilation=1, 14 | deform_groups=1, 15 | im2col_step=64): 16 | 17 | if input is not None and input.dim() != 4: 18 | raise ValueError( 19 | "Expected 4D tensor as input, got {}D tensor instead.".format( 20 | input.dim())) 21 | 22 | f = DeformConvFunction( 23 | _pair(stride), _pair(padding), _pair(dilation), deform_groups, im2col_step) 24 | return f(input, offset, weight) 25 | 26 | 27 | class DeformConvFunction(Function): 28 | def __init__(self, stride, padding, dilation, deformable_groups=1, im2col_step=64): 29 | super(DeformConvFunction, self).__init__() 30 | self.stride = stride 31 | self.padding = padding 32 | self.dilation = dilation 33 | self.deformable_groups = deformable_groups 34 | self.im2col_step = im2col_step 35 | 36 | def forward(self, input, offset, weight): 37 | self.save_for_backward(input, offset, weight) 38 | 39 | output = input.new(*self._output_size(input, weight)) 40 | 41 | self.bufs_ = [input.new(), input.new()] # columns, ones 42 | 43 | if not input.is_cuda: 44 | raise NotImplementedError 45 | else: 46 | if isinstance(input, torch.autograd.Variable): 47 | if not isinstance(input.data, torch.cuda.FloatTensor): 48 | raise NotImplementedError 49 | else: 50 | if not isinstance(input, torch.cuda.FloatTensor): 51 | raise NotImplementedError 52 | 53 | cur_im2col_step = min(self.im2col_step, input.shape[0]) 54 | assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize' 55 | deform_conv.deform_conv_forward_cuda( 56 | input, weight, offset, output, self.bufs_[0], self.bufs_[1], 57 | weight.size(3), weight.size(2), self.stride[1], self.stride[0], 58 | self.padding[1], self.padding[0], self.dilation[1], 59 | self.dilation[0], self.deformable_groups, cur_im2col_step) 60 | return output 61 | 62 | def backward(self, grad_output): 63 | input, offset, weight = self.saved_tensors 64 | 65 | grad_input = grad_offset = grad_weight = None 66 | 67 | if not grad_output.is_cuda: 68 | raise NotImplementedError 69 | else: 70 | if isinstance(grad_output, torch.autograd.Variable): 71 | if not isinstance(grad_output.data, torch.cuda.FloatTensor): 72 | raise NotImplementedError 73 | else: 74 | if not isinstance(grad_output, torch.cuda.FloatTensor): 75 | raise NotImplementedError 76 | 77 | cur_im2col_step = min(self.im2col_step, input.shape[0]) 78 | assert (input.shape[0] % cur_im2col_step) == 0, 'im2col step must divide batchsize' 79 | 80 | if self.needs_input_grad[0] or self.needs_input_grad[1]: 81 | grad_input = input.new(*input.size()).zero_() 82 | grad_offset = offset.new(*offset.size()).zero_() 83 | deform_conv.deform_conv_backward_input_cuda( 84 | input, offset, grad_output, grad_input, 85 | grad_offset, weight, self.bufs_[0], weight.size(3), 86 | weight.size(2), self.stride[1], self.stride[0], 87 | self.padding[1], self.padding[0], self.dilation[1], 88 | self.dilation[0], self.deformable_groups, cur_im2col_step) 89 | 90 | 91 | if self.needs_input_grad[2]: 92 | grad_weight = weight.new(*weight.size()).zero_() 93 | deform_conv.deform_conv_backward_parameters_cuda( 94 | input, offset, grad_output, 95 | grad_weight, self.bufs_[0], self.bufs_[1], weight.size(3), 96 | weight.size(2), self.stride[1], self.stride[0], 97 | self.padding[1], self.padding[0], self.dilation[1], 98 | self.dilation[0], self.deformable_groups, 1, cur_im2col_step) 99 | 100 | return grad_input, grad_offset, grad_weight 101 | 102 | def _output_size(self, input, weight): 103 | channels = weight.size(0) 104 | 105 | output_size = (input.size(0), channels) 106 | for d in range(input.dim() - 2): 107 | in_size = input.size(d + 2) 108 | pad = self.padding[d] 109 | kernel = self.dilation[d] * (weight.size(d + 2) - 1) + 1 110 | stride = self.stride[d] 111 | output_size += ((in_size + (2 * pad) - kernel) // stride + 1, ) 112 | if not all(map(lambda s: s > 0, output_size)): 113 | raise ValueError( 114 | "convolution input is too small (output would be {})".format( 115 | 'x'.join(map(str, output_size)))) 116 | return output_size 117 | -------------------------------------------------------------------------------- /graphs/ops/functions/modulated_dcn_func.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import 3 | from __future__ import print_function 4 | from __future__ import division 5 | 6 | import torch 7 | from torch.autograd import Function 8 | 9 | from graphs.ops._ext import modulated_dcn as _backend 10 | 11 | 12 | class ModulatedDeformConvFunction(Function): 13 | 14 | def __init__(self, stride, padding, dilation=1, deformable_groups=1): 15 | super(ModulatedDeformConvFunction, self).__init__() 16 | self.stride = stride 17 | self.padding = padding 18 | self.dilation = dilation 19 | self.deformable_groups = deformable_groups 20 | 21 | def forward(self, input, offset, mask, weight, bias): 22 | if not input.is_cuda: 23 | raise NotImplementedError 24 | if weight.requires_grad or mask.requires_grad or offset.requires_grad or input.requires_grad: 25 | self.save_for_backward(input, offset, mask, weight, bias) 26 | output = input.new(*self._infer_shape(input, weight)) 27 | self._bufs = [input.new(), input.new()] 28 | _backend.modulated_deform_conv_cuda_forward(input, weight, 29 | bias, self._bufs[0], 30 | offset, mask, 31 | output, self._bufs[1], 32 | weight.shape[2], weight.shape[3], 33 | self.stride, self.stride, 34 | self.padding, self.padding, 35 | self.dilation, self.dilation, 36 | self.deformable_groups) 37 | return output 38 | 39 | def backward(self, grad_output): 40 | if not grad_output.is_cuda: 41 | raise NotImplementedError 42 | input, offset, mask, weight, bias = self.saved_tensors 43 | grad_input = input.new(*input.size()).zero_() 44 | grad_offset = offset.new(*offset.size()).zero_() 45 | grad_mask = mask.new(*mask.size()).zero_() 46 | grad_weight = weight.new(*weight.size()).zero_() 47 | grad_bias = bias.new(*bias.size()).zero_() 48 | _backend.modulated_deform_conv_cuda_backward(input, weight, 49 | bias, self._bufs[0], 50 | offset, mask, 51 | self._bufs[1], 52 | grad_input, grad_weight, 53 | grad_bias, grad_offset, 54 | grad_mask, grad_output, 55 | weight.shape[2], weight.shape[3], 56 | self.stride, self.stride, 57 | self.padding, self.padding, 58 | self.dilation, self.dilation, 59 | self.deformable_groups) 60 | 61 | return grad_input, grad_offset, grad_mask, grad_weight, grad_bias 62 | 63 | def _infer_shape(self, input, weight): 64 | n = input.size(0) 65 | channels_out = weight.size(0) 66 | height, width = input.shape[2:4] 67 | kernel_h, kernel_w = weight.shape[2:4] 68 | height_out = (height + 2 * self.padding - 69 | (self.dilation * (kernel_h - 1) + 1)) // self.stride + 1 70 | width_out = (width + 2 * self.padding - (self.dilation * 71 | (kernel_w - 1) + 1)) // self.stride + 1 72 | return (n, channels_out, height_out, width_out) 73 | 74 | 75 | class DeformRoIPoolingFunction(Function): 76 | 77 | def __init__(self, 78 | spatial_scale, 79 | pooled_size, 80 | output_dim, 81 | no_trans, 82 | group_size=1, 83 | part_size=None, 84 | sample_per_part=4, 85 | trans_std=.0): 86 | super(DeformRoIPoolingFunction, self).__init__() 87 | self.spatial_scale = spatial_scale 88 | self.pooled_size = pooled_size 89 | self.output_dim = output_dim 90 | self.no_trans = no_trans 91 | self.group_size = group_size 92 | self.part_size = pooled_size if part_size is None else part_size 93 | self.sample_per_part = sample_per_part 94 | self.trans_std = trans_std 95 | 96 | assert self.trans_std >= 0.0 and self.trans_std <= 1.0 97 | 98 | def forward(self, data, rois, offset): 99 | if not data.is_cuda: 100 | raise NotImplementedError 101 | 102 | output = data.new(*self._infer_shape(data, rois)) 103 | output_count = data.new(*self._infer_shape(data, rois)) 104 | _backend.deform_psroi_pooling_cuda_forward(data, rois, offset, 105 | output, output_count, 106 | self.no_trans, self.spatial_scale, 107 | self.output_dim, self.group_size, 108 | self.pooled_size, self.part_size, 109 | self.sample_per_part, self.trans_std) 110 | 111 | # if data.requires_grad or rois.requires_grad or offset.requires_grad: 112 | # self.save_for_backward(data, rois, offset, output_count) 113 | self.data = data 114 | self.rois = rois 115 | self.offset = offset 116 | self.output_count = output_count 117 | 118 | return output 119 | 120 | def backward(self, grad_output): 121 | if not grad_output.is_cuda: 122 | raise NotImplementedError 123 | 124 | # data, rois, offset, output_count = self.saved_tensors 125 | data = self.data 126 | rois = self.rois 127 | offset = self.offset 128 | output_count = self.output_count 129 | grad_input = data.new(*data.size()).zero_() 130 | grad_offset = offset.new(*offset.size()).zero_() 131 | 132 | _backend.deform_psroi_pooling_cuda_backward(grad_output, 133 | data, 134 | rois, 135 | offset, 136 | output_count, 137 | grad_input, 138 | grad_offset, 139 | self.no_trans, 140 | self.spatial_scale, 141 | self.output_dim, 142 | self.group_size, 143 | self.pooled_size, 144 | self.part_size, 145 | self.sample_per_part, 146 | self.trans_std) 147 | return grad_input, torch.zeros(rois.shape).cuda(), grad_offset 148 | 149 | def _infer_shape(self, data, rois): 150 | # _, c, h, w = data.shape[:4] 151 | c = data.shape[1] 152 | n = rois.shape[0] 153 | return (n, self.output_dim, self.pooled_size, self.pooled_size) 154 | -------------------------------------------------------------------------------- /graphs/ops/libs/__init__.py: -------------------------------------------------------------------------------- 1 | from .bn import ABN, InPlaceABN, InPlaceABNWrapper, InPlaceABNSync, InPlaceABNSyncWrapper 2 | from .misc import GlobalAvgPool2d 3 | from .residual import IdentityResidualBlock 4 | from .dense import DenseModule 5 | -------------------------------------------------------------------------------- /graphs/ops/libs/_ext/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from .__ext import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /graphs/ops/libs/bn.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict, Iterable 2 | from itertools import repeat 3 | 4 | try: 5 | # python 3 6 | from queue import Queue 7 | except ImportError: 8 | # python 2 9 | from Queue import Queue 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.autograd as autograd 14 | 15 | from .functions import inplace_abn, inplace_abn_sync 16 | 17 | 18 | def _pair(x): 19 | if isinstance(x, Iterable): 20 | return x 21 | return tuple(repeat(x, 2)) 22 | 23 | 24 | class ABN(nn.Sequential): 25 | """Activated Batch Normalization 26 | 27 | This gathers a `BatchNorm2d` and an activation function in a single module 28 | """ 29 | 30 | def __init__(self, num_features, activation=nn.ReLU(inplace=True), **kwargs): 31 | """Creates an Activated Batch Normalization module 32 | 33 | Parameters 34 | ---------- 35 | num_features : int 36 | Number of feature channels in the input and output. 37 | activation : nn.Module 38 | Module used as an activation function. 39 | kwargs 40 | All other arguments are forwarded to the `BatchNorm2d` constructor. 41 | """ 42 | super(ABN, self).__init__(OrderedDict([ 43 | ("bn", nn.BatchNorm2d(num_features, **kwargs)), 44 | ("act", activation) 45 | ])) 46 | 47 | 48 | class InPlaceABN(nn.Module): 49 | """InPlace Activated Batch Normalization""" 50 | 51 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): 52 | """Creates an InPlace Activated Batch Normalization module 53 | 54 | Parameters 55 | ---------- 56 | num_features : int 57 | Number of feature channels in the input and output. 58 | eps : float 59 | Small constant to prevent numerical issues. 60 | momentum : float 61 | Momentum factor applied to compute running statistics as. 62 | affine : bool 63 | If `True` apply learned scale and shift transformation after normalization. 64 | activation : str 65 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 66 | slope : float 67 | Negative slope for the `leaky_relu` activation. 68 | """ 69 | super(InPlaceABN, self).__init__() 70 | self.num_features = num_features 71 | self.affine = affine 72 | self.eps = eps 73 | self.momentum = momentum 74 | self.activation = activation 75 | self.slope = slope 76 | if self.affine: 77 | self.weight = nn.Parameter(torch.Tensor(num_features)) 78 | self.bias = nn.Parameter(torch.Tensor(num_features)) 79 | else: 80 | self.register_parameter('weight', None) 81 | self.register_parameter('bias', None) 82 | self.register_buffer('running_mean', torch.zeros(num_features)) 83 | self.register_buffer('running_var', torch.ones(num_features)) 84 | self.reset_parameters() 85 | 86 | def reset_parameters(self): 87 | self.running_mean.zero_() 88 | self.running_var.fill_(1) 89 | if self.affine: 90 | self.weight.data.fill_(1) 91 | self.bias.data.zero_() 92 | 93 | def forward(self, x): 94 | return inplace_abn(x, self.weight, self.bias, autograd.Variable(self.running_mean), 95 | autograd.Variable(self.running_var), self.training, self.momentum, self.eps, 96 | self.activation, self.slope) 97 | 98 | def __repr__(self): 99 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ 100 | ' affine={affine}, activation={activation}' 101 | if self.activation == "leaky_relu": 102 | rep += ' slope={slope})' 103 | else: 104 | rep += ')' 105 | return rep.format(name=self.__class__.__name__, **self.__dict__) 106 | 107 | 108 | class InPlaceABNSync(nn.Module): 109 | """InPlace Activated Batch Normalization with cross-GPU synchronization 110 | 111 | This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DataParallel`. 112 | """ 113 | 114 | def __init__(self, num_features, devices=None, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", 115 | slope=0.01): 116 | """Creates a synchronized, InPlace Activated Batch Normalization module 117 | 118 | Parameters 119 | ---------- 120 | num_features : int 121 | Number of feature channels in the input and output. 122 | devices : list of int or None 123 | IDs of the GPUs that will run the replicas of this module. 124 | eps : float 125 | Small constant to prevent numerical issues. 126 | momentum : float 127 | Momentum factor applied to compute running statistics as. 128 | affine : bool 129 | If `True` apply learned scale and shift transformation after normalization. 130 | activation : str 131 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 132 | slope : float 133 | Negative slope for the `leaky_relu` activation. 134 | """ 135 | super(InPlaceABNSync, self).__init__() 136 | self.num_features = num_features 137 | self.devices = devices if devices else list(range(torch.cuda.device_count())) 138 | self.affine = affine 139 | self.eps = eps 140 | self.momentum = momentum 141 | self.activation = activation 142 | self.slope = slope 143 | if self.affine: 144 | self.weight = nn.Parameter(torch.Tensor(num_features)) 145 | self.bias = nn.Parameter(torch.Tensor(num_features)) 146 | else: 147 | self.register_parameter('weight', None) 148 | self.register_parameter('bias', None) 149 | self.register_buffer('running_mean', torch.zeros(num_features)) 150 | self.register_buffer('running_var', torch.ones(num_features)) 151 | self.reset_parameters() 152 | 153 | # Initialize queues 154 | self.worker_ids = self.devices[1:] 155 | self.master_queue = Queue(len(self.worker_ids)) 156 | self.worker_queues = [Queue(1) for _ in self.worker_ids] 157 | 158 | def reset_parameters(self): 159 | self.running_mean.zero_() 160 | self.running_var.fill_(1) 161 | if self.affine: 162 | self.weight.data.fill_(1) 163 | self.bias.data.zero_() 164 | 165 | def forward(self, x): 166 | if x.get_device() == self.devices[0]: 167 | # Master mode 168 | extra = { 169 | "is_master": True, 170 | "master_queue": self.master_queue, 171 | "worker_queues": self.worker_queues, 172 | "worker_ids": self.worker_ids 173 | } 174 | else: 175 | # Worker mode 176 | extra = { 177 | "is_master": False, 178 | "master_queue": self.master_queue, 179 | "worker_queue": self.worker_queues[self.worker_ids.index(x.get_device())] 180 | } 181 | 182 | return inplace_abn_sync(x, self.weight, self.bias, autograd.Variable(self.running_mean), 183 | autograd.Variable(self.running_var), extra, self.training, self.momentum, self.eps, 184 | self.activation, self.slope) 185 | 186 | def __repr__(self): 187 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ 188 | ' affine={affine}, devices={devices}, activation={activation}' 189 | if self.activation == "leaky_relu": 190 | rep += ' slope={slope})' 191 | else: 192 | rep += ')' 193 | return rep.format(name=self.__class__.__name__, **self.__dict__) 194 | 195 | 196 | class InPlaceABNWrapper(nn.Module): 197 | """Wrapper module to make `InPlaceABN` compatible with `ABN`""" 198 | 199 | def __init__(self, *args, **kwargs): 200 | super(InPlaceABNWrapper, self).__init__() 201 | self.bn = InPlaceABN(*args, **kwargs) 202 | 203 | def forward(self, input): 204 | return self.bn(input) 205 | 206 | 207 | class InPlaceABNSyncWrapper(nn.Module): 208 | """Wrapper module to make `InPlaceABNSync` compatible with `ABN`""" 209 | 210 | def __init__(self, *args, **kwargs): 211 | super(InPlaceABNSyncWrapper, self).__init__() 212 | self.bn = InPlaceABNSync(*args, **kwargs) 213 | 214 | def forward(self, input): 215 | return self.bn(input) 216 | -------------------------------------------------------------------------------- /graphs/ops/libs/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from torch.utils.ffi import create_extension 4 | 5 | sources = ['src/lib_cffi.cpp'] 6 | headers = ['src/lib_cffi.h'] 7 | extra_objects = ['src/bn.o'] 8 | with_cuda = True 9 | 10 | this_file = os.path.dirname(os.path.realpath(__file__)) 11 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 12 | 13 | ffi = create_extension( 14 | '_ext', 15 | headers=headers, 16 | sources=sources, 17 | relative_to=__file__, 18 | with_cuda=with_cuda, 19 | extra_objects=extra_objects, 20 | extra_compile_args=["-std=c++11"] 21 | ) 22 | 23 | if __name__ == '__main__': 24 | ffi.build() 25 | -------------------------------------------------------------------------------- /graphs/ops/libs/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Configuration 4 | CUDA_GENCODE="-arch=sm_50 \ 5 | -gencode=arch=compute_50,code=sm_50 \ 6 | -gencode=arch=compute_52,code=sm_52 \ 7 | -gencode=arch=compute_60,code=sm_60" 8 | 9 | cd src 10 | nvcc -I/usr/local/cuda/include --expt-extended-lambda -O3 -c -o bn.o bn.cu -x cu -Xcompiler -fPIC -std=c++11 ${CUDA_GENCODE} 11 | cd .. 12 | -------------------------------------------------------------------------------- /graphs/ops/libs/dense.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .bn import ABN 7 | 8 | 9 | class DenseModule(nn.Module): 10 | def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1): 11 | super(DenseModule, self).__init__() 12 | self.in_channels = in_channels 13 | self.growth = growth 14 | self.layers = layers 15 | 16 | self.convs1 = nn.ModuleList() 17 | self.convs3 = nn.ModuleList() 18 | for i in range(self.layers): 19 | self.convs1.append(nn.Sequential(OrderedDict([ 20 | ("bn", norm_act(in_channels)), 21 | ("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False)) 22 | ]))) 23 | self.convs3.append(nn.Sequential(OrderedDict([ 24 | ("bn", norm_act(self.growth * bottleneck_factor)), 25 | ("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False, 26 | dilation=dilation)) 27 | ]))) 28 | in_channels += self.growth 29 | 30 | @property 31 | def out_channels(self): 32 | return self.in_channels + self.growth * self.layers 33 | 34 | def forward(self, x): 35 | inputs = [x] 36 | for i in range(self.layers): 37 | x = torch.cat(inputs, dim=1) 38 | x = self.convs1[i](x) 39 | x = self.convs3[i](x) 40 | inputs += [x] 41 | 42 | return torch.cat(inputs, dim=1) -------------------------------------------------------------------------------- /graphs/ops/libs/functions.py: -------------------------------------------------------------------------------- 1 | import torch.autograd as autograd 2 | import torch.cuda.comm as comm 3 | from torch.autograd.function import once_differentiable 4 | 5 | from . import _ext 6 | 7 | # Activation names 8 | ACT_LEAKY_RELU = "leaky_relu" 9 | ACT_ELU = "elu" 10 | ACT_NONE = "none" 11 | 12 | 13 | def _check(fn, *args, **kwargs): 14 | success = fn(*args, **kwargs) 15 | if not success: 16 | raise RuntimeError("CUDA Error encountered in {}".format(fn)) 17 | 18 | 19 | def _broadcast_shape(x): 20 | out_size = [] 21 | for i, s in enumerate(x.size()): 22 | if i != 1: 23 | out_size.append(1) 24 | else: 25 | out_size.append(s) 26 | return out_size 27 | 28 | 29 | def _reduce(x): 30 | if len(x.size()) == 2: 31 | return x.sum(dim=0) 32 | else: 33 | n, c = x.size()[0:2] 34 | return x.contiguous().view((n, c, -1)).sum(2).sum(0) 35 | 36 | 37 | def _count_samples(x): 38 | count = 1 39 | for i, s in enumerate(x.size()): 40 | if i != 1: 41 | count *= s 42 | return count 43 | 44 | 45 | def _act_forward(ctx, x): 46 | if ctx.activation == ACT_LEAKY_RELU: 47 | _check(_ext.leaky_relu_cuda, x, ctx.slope) 48 | elif ctx.activation == ACT_ELU: 49 | _check(_ext.elu_cuda, x) 50 | elif ctx.activation == ACT_NONE: 51 | pass 52 | 53 | 54 | def _act_backward(ctx, x, dx): 55 | if ctx.activation == ACT_LEAKY_RELU: 56 | _check(_ext.leaky_relu_backward_cuda, x, dx, ctx.slope) 57 | _check(_ext.leaky_relu_cuda, x, 1. / ctx.slope) 58 | elif ctx.activation == ACT_ELU: 59 | _check(_ext.elu_backward_cuda, x, dx) 60 | _check(_ext.elu_inv_cuda, x) 61 | elif ctx.activation == ACT_NONE: 62 | pass 63 | 64 | 65 | def _check_contiguous(*args): 66 | if not all([mod is None or mod.is_contiguous() for mod in args]): 67 | raise ValueError("Non-contiguous input") 68 | 69 | 70 | class InPlaceABN(autograd.Function): 71 | @staticmethod 72 | def forward(ctx, x, weight, bias, running_mean, running_var, 73 | training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01): 74 | # Save context 75 | ctx.training = training 76 | ctx.momentum = momentum 77 | ctx.eps = eps 78 | ctx.activation = activation 79 | ctx.slope = slope 80 | 81 | n = _count_samples(x) 82 | 83 | if ctx.training: 84 | mean = x.new().resize_as_(running_mean) 85 | var = x.new().resize_as_(running_var) 86 | _check_contiguous(x, mean, var) 87 | _check(_ext.bn_mean_var_cuda, x, mean, var) 88 | 89 | # Update running stats 90 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) 91 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * n / (n - 1)) 92 | else: 93 | mean, var = running_mean, running_var 94 | 95 | _check_contiguous(x, mean, var, weight, bias) 96 | _check(_ext.bn_forward_cuda, 97 | x, mean, var, 98 | weight if weight is not None else x.new(), 99 | bias if bias is not None else x.new(), 100 | x, x, ctx.eps) 101 | 102 | # Activation 103 | _act_forward(ctx, x) 104 | 105 | # Output 106 | ctx.var = var 107 | ctx.save_for_backward(x, weight, bias, running_mean, running_var) 108 | ctx.mark_dirty(x) 109 | return x 110 | 111 | @staticmethod 112 | @once_differentiable 113 | def backward(ctx, dz): 114 | z, weight, bias, running_mean, running_var = ctx.saved_tensors 115 | dz = dz.contiguous() 116 | 117 | # Undo activation 118 | _act_backward(ctx, z, dz) 119 | 120 | if ctx.needs_input_grad[0]: 121 | dx = dz.new().resize_as_(dz) 122 | else: 123 | dx = None 124 | 125 | if ctx.needs_input_grad[1]: 126 | dweight = dz.new().resize_as_(running_mean).zero_() 127 | else: 128 | dweight = None 129 | 130 | if ctx.needs_input_grad[2]: 131 | dbias = dz.new().resize_as_(running_mean).zero_() 132 | else: 133 | dbias = None 134 | 135 | if ctx.training: 136 | edz = dz.new().resize_as_(running_mean) 137 | eydz = dz.new().resize_as_(running_mean) 138 | _check_contiguous(z, dz, weight, bias, edz, eydz) 139 | _check(_ext.bn_edz_eydz_cuda, 140 | z, dz, 141 | weight if weight is not None else dz.new(), 142 | bias if bias is not None else dz.new(), 143 | edz, eydz, ctx.eps) 144 | else: 145 | # TODO: implement CUDA backward for inference mode 146 | edz = dz.new().resize_as_(running_mean).zero_() 147 | eydz = dz.new().resize_as_(running_mean).zero_() 148 | 149 | _check_contiguous(dz, z, ctx.var, weight, bias, edz, eydz, dx, dweight, dbias) 150 | _check(_ext.bn_backard_cuda, 151 | dz, z, ctx.var, 152 | weight if weight is not None else dz.new(), 153 | bias if bias is not None else dz.new(), 154 | edz, eydz, 155 | dx if dx is not None else dz.new(), 156 | dweight if dweight is not None else dz.new(), 157 | dbias if dbias is not None else dz.new(), 158 | ctx.eps) 159 | 160 | del ctx.var 161 | 162 | return dx, dweight, dbias, None, None, None, None, None, None, None 163 | 164 | 165 | class InPlaceABNSync(autograd.Function): 166 | @classmethod 167 | def forward(cls, ctx, x, weight, bias, running_mean, running_var, 168 | extra, training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01): 169 | # Save context 170 | cls._parse_extra(ctx, extra) 171 | ctx.training = training 172 | ctx.momentum = momentum 173 | ctx.eps = eps 174 | ctx.activation = activation 175 | ctx.slope = slope 176 | 177 | n = _count_samples(x) * (ctx.master_queue.maxsize + 1) 178 | 179 | if ctx.training: 180 | mean = x.new().resize_(1, running_mean.size(0)) 181 | var = x.new().resize_(1, running_var.size(0)) 182 | _check_contiguous(x, mean, var) 183 | _check(_ext.bn_mean_var_cuda, x, mean, var) 184 | 185 | if ctx.is_master: 186 | means, vars = [mean], [var] 187 | for _ in range(ctx.master_queue.maxsize): 188 | mean_w, var_w = ctx.master_queue.get() 189 | ctx.master_queue.task_done() 190 | means.append(mean_w) 191 | vars.append(var_w) 192 | 193 | means = comm.gather(means) 194 | vars = comm.gather(vars) 195 | 196 | mean = means.mean(0) 197 | var = (vars + (mean - means) ** 2).mean(0) 198 | 199 | tensors = comm.broadcast_coalesced((mean, var), [mean.get_device()] + ctx.worker_ids) 200 | for ts, queue in zip(tensors[1:], ctx.worker_queues): 201 | queue.put(ts) 202 | else: 203 | ctx.master_queue.put((mean, var)) 204 | mean, var = ctx.worker_queue.get() 205 | ctx.worker_queue.task_done() 206 | 207 | # Update running stats 208 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) 209 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * n / (n - 1)) 210 | else: 211 | mean, var = running_mean, running_var 212 | 213 | _check_contiguous(x, mean, var, weight, bias) 214 | _check(_ext.bn_forward_cuda, 215 | x, mean, var, 216 | weight if weight is not None else x.new(), 217 | bias if bias is not None else x.new(), 218 | x, x, ctx.eps) 219 | 220 | # Activation 221 | _act_forward(ctx, x) 222 | 223 | # Output 224 | ctx.var = var 225 | ctx.save_for_backward(x, weight, bias, running_mean, running_var) 226 | ctx.mark_dirty(x) 227 | return x 228 | 229 | @staticmethod 230 | @once_differentiable 231 | def backward(ctx, dz): 232 | z, weight, bias, running_mean, running_var = ctx.saved_tensors 233 | dz = dz.contiguous() 234 | 235 | # Undo activation 236 | _act_backward(ctx, z, dz) 237 | 238 | if ctx.needs_input_grad[0]: 239 | dx = dz.new().resize_as_(dz) 240 | else: 241 | dx = None 242 | 243 | if ctx.needs_input_grad[1]: 244 | dweight = dz.new().resize_as_(running_mean).zero_() 245 | else: 246 | dweight = None 247 | 248 | if ctx.needs_input_grad[2]: 249 | dbias = dz.new().resize_as_(running_mean).zero_() 250 | else: 251 | dbias = None 252 | 253 | if ctx.training: 254 | edz = dz.new().resize_as_(running_mean) 255 | eydz = dz.new().resize_as_(running_mean) 256 | _check_contiguous(z, dz, weight, bias, edz, eydz) 257 | _check(_ext.bn_edz_eydz_cuda, 258 | z, dz, 259 | weight if weight is not None else dz.new(), 260 | bias if bias is not None else dz.new(), 261 | edz, eydz, ctx.eps) 262 | 263 | if ctx.is_master: 264 | edzs, eydzs = [edz], [eydz] 265 | for _ in range(len(ctx.worker_queues)): 266 | edz_w, eydz_w = ctx.master_queue.get() 267 | ctx.master_queue.task_done() 268 | edzs.append(edz_w) 269 | eydzs.append(eydz_w) 270 | 271 | edz = comm.reduce_add(edzs) / (ctx.master_queue.maxsize + 1) 272 | eydz = comm.reduce_add(eydzs) / (ctx.master_queue.maxsize + 1) 273 | 274 | tensors = comm.broadcast_coalesced((edz, eydz), [edz.get_device()] + ctx.worker_ids) 275 | for ts, queue in zip(tensors[1:], ctx.worker_queues): 276 | queue.put(ts) 277 | else: 278 | ctx.master_queue.put((edz, eydz)) 279 | edz, eydz = ctx.worker_queue.get() 280 | ctx.worker_queue.task_done() 281 | else: 282 | edz = dz.new().resize_as_(running_mean).zero_() 283 | eydz = dz.new().resize_as_(running_mean).zero_() 284 | 285 | _check_contiguous(dz, z, ctx.var, weight, bias, edz, eydz, dx, dweight, dbias) 286 | _check(_ext.bn_backard_cuda, 287 | dz, z, ctx.var, 288 | weight if weight is not None else dz.new(), 289 | bias if bias is not None else dz.new(), 290 | edz, eydz, 291 | dx if dx is not None else dz.new(), 292 | dweight if dweight is not None else dz.new(), 293 | dbias if dbias is not None else dz.new(), 294 | ctx.eps) 295 | 296 | del ctx.var 297 | 298 | return dx, dweight, dbias, None, None, None, None, None, None, None, None 299 | 300 | @staticmethod 301 | def _parse_extra(ctx, extra): 302 | ctx.is_master = extra["is_master"] 303 | if ctx.is_master: 304 | ctx.master_queue = extra["master_queue"] 305 | ctx.worker_queues = extra["worker_queues"] 306 | ctx.worker_ids = extra["worker_ids"] 307 | else: 308 | ctx.master_queue = extra["master_queue"] 309 | ctx.worker_queue = extra["worker_queue"] 310 | 311 | 312 | inplace_abn = InPlaceABN.apply 313 | inplace_abn_sync = InPlaceABNSync.apply 314 | 315 | __all__ = ["inplace_abn", "inplace_abn_sync"] 316 | -------------------------------------------------------------------------------- /graphs/ops/libs/misc.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class GlobalAvgPool2d(nn.Module): 5 | def __init__(self): 6 | """Global average pooling over the input's spatial dimensions""" 7 | super(GlobalAvgPool2d, self).__init__() 8 | 9 | def forward(self, inputs): 10 | in_size = inputs.size() 11 | return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2) 12 | -------------------------------------------------------------------------------- /graphs/ops/libs/residual.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.nn as nn 4 | 5 | from .bn import ABN 6 | 7 | 8 | class IdentityResidualBlock(nn.Module): 9 | def __init__(self, 10 | in_channels, 11 | channels, 12 | stride=1, 13 | dilation=1, 14 | groups=1, 15 | norm_act=ABN, 16 | dropout=None): 17 | """Configurable identity-mapping residual block 18 | 19 | Parameters 20 | ---------- 21 | in_channels : int 22 | Number of input channels. 23 | channels : list of int 24 | Number of channels in the internal feature maps. Can either have two or three elements: if three construct 25 | a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then 26 | `3 x 3` then `1 x 1` convolutions. 27 | stride : int 28 | Stride of the first `3 x 3` convolution 29 | dilation : int 30 | Dilation to apply to the `3 x 3` convolutions. 31 | groups : int 32 | Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with 33 | bottleneck blocks. 34 | norm_act : callable 35 | Function to create normalization / activation Module. 36 | dropout: callable 37 | Function to create Dropout Module. 38 | """ 39 | super(IdentityResidualBlock, self).__init__() 40 | 41 | # Check parameters for inconsistencies 42 | if len(channels) != 2 and len(channels) != 3: 43 | raise ValueError("channels must contain either two or three values") 44 | if len(channels) == 2 and groups != 1: 45 | raise ValueError("groups > 1 are only valid if len(channels) == 3") 46 | 47 | is_bottleneck = len(channels) == 3 48 | need_proj_conv = stride != 1 or in_channels != channels[-1] 49 | 50 | self.bn1 = norm_act(in_channels) 51 | if not is_bottleneck: 52 | layers = [ 53 | ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False, 54 | dilation=dilation)), 55 | ("bn2", norm_act(channels[0])), 56 | ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, 57 | dilation=dilation)) 58 | ] 59 | if dropout is not None: 60 | layers = layers[0:2] + [("dropout", dropout())] + layers[2:] 61 | else: 62 | layers = [ 63 | ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)), 64 | ("bn2", norm_act(channels[0])), 65 | ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, 66 | groups=groups, dilation=dilation)), 67 | ("bn3", norm_act(channels[1])), 68 | ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False)) 69 | ] 70 | if dropout is not None: 71 | layers = layers[0:4] + [("dropout", dropout())] + layers[4:] 72 | self.convs = nn.Sequential(OrderedDict(layers)) 73 | 74 | if need_proj_conv: 75 | self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False) 76 | 77 | def forward(self, x): 78 | if hasattr(self, "proj_conv"): 79 | bn1 = self.bn1(x) 80 | shortcut = self.proj_conv(bn1) 81 | else: 82 | shortcut = x.clone() 83 | bn1 = self.bn1(x) 84 | 85 | out = self.convs(bn1) 86 | out.add_(shortcut) 87 | 88 | return out 89 | -------------------------------------------------------------------------------- /graphs/ops/libs/src/bn.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "common.h" 6 | #include "bn.h" 7 | 8 | /* 9 | * Device functions and data structures 10 | */ 11 | struct Float2 { 12 | float v1, v2; 13 | __device__ Float2() {} 14 | __device__ Float2(float _v1, float _v2) : v1(_v1), v2(_v2) {} 15 | __device__ Float2(float v) : v1(v), v2(v) {} 16 | __device__ Float2(int v) : v1(v), v2(v) {} 17 | __device__ Float2 &operator+=(const Float2 &a) { 18 | v1 += a.v1; 19 | v2 += a.v2; 20 | return *this; 21 | } 22 | }; 23 | 24 | struct SumOp { 25 | __device__ SumOp(const float *t, int c, int s) 26 | : tensor(t), C(c), S(s) {} 27 | __device__ __forceinline__ float operator()(int batch, int plane, int n) { 28 | return tensor[(batch * C + plane) * S + n]; 29 | } 30 | const float *tensor; 31 | const int C; 32 | const int S; 33 | }; 34 | 35 | struct VarOp { 36 | __device__ VarOp(float m, const float *t, int c, int s) 37 | : mean(m), tensor(t), C(c), S(s) {} 38 | __device__ __forceinline__ float operator()(int batch, int plane, int n) { 39 | float val = tensor[(batch * C + plane) * S + n]; 40 | return (val - mean) * (val - mean); 41 | } 42 | const float mean; 43 | const float *tensor; 44 | const int C; 45 | const int S; 46 | }; 47 | 48 | struct GradOp { 49 | __device__ GradOp(float _gamma, float _beta, const float *_z, const float *_dz, int c, int s) 50 | : gamma(_gamma), beta(_beta), z(_z), dz(_dz), C(c), S(s) {} 51 | __device__ __forceinline__ Float2 operator()(int batch, int plane, int n) { 52 | float _y = (z[(batch * C + plane) * S + n] - beta) / gamma; 53 | float _dz = dz[(batch * C + plane) * S + n]; 54 | return Float2(_dz, _y * _dz); 55 | } 56 | const float gamma; 57 | const float beta; 58 | const float *z; 59 | const float *dz; 60 | const int C; 61 | const int S; 62 | }; 63 | 64 | static __device__ __forceinline__ float warpSum(float val) { 65 | #if __CUDA_ARCH__ >= 300 66 | for (int i = 0; i < getMSB(WARP_SIZE); ++i) { 67 | val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE); 68 | } 69 | #else 70 | __shared__ float values[MAX_BLOCK_SIZE]; 71 | values[threadIdx.x] = val; 72 | __threadfence_block(); 73 | const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE; 74 | for (int i = 1; i < WARP_SIZE; i++) { 75 | val += values[base + ((i + threadIdx.x) % WARP_SIZE)]; 76 | } 77 | #endif 78 | return val; 79 | } 80 | 81 | static __device__ __forceinline__ Float2 warpSum(Float2 value) { 82 | value.v1 = warpSum(value.v1); 83 | value.v2 = warpSum(value.v2); 84 | return value; 85 | } 86 | 87 | template 88 | __device__ T reduce(Op op, int plane, int N, int C, int S) { 89 | T sum = (T)0; 90 | for (int batch = 0; batch < N; ++batch) { 91 | for (int x = threadIdx.x; x < S; x += blockDim.x) { 92 | sum += op(batch, plane, x); 93 | } 94 | } 95 | 96 | // sum over NumThreads within a warp 97 | sum = warpSum(sum); 98 | 99 | // 'transpose', and reduce within warp again 100 | __shared__ T shared[32]; 101 | __syncthreads(); 102 | if (threadIdx.x % WARP_SIZE == 0) { 103 | shared[threadIdx.x / WARP_SIZE] = sum; 104 | } 105 | if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) { 106 | // zero out the other entries in shared 107 | shared[threadIdx.x] = (T)0; 108 | } 109 | __syncthreads(); 110 | if (threadIdx.x / WARP_SIZE == 0) { 111 | sum = warpSum(shared[threadIdx.x]); 112 | if (threadIdx.x == 0) { 113 | shared[0] = sum; 114 | } 115 | } 116 | __syncthreads(); 117 | 118 | // Everyone picks it up, should be broadcast into the whole gradInput 119 | return shared[0]; 120 | } 121 | 122 | /* 123 | * Kernels 124 | */ 125 | __global__ void mean_var_kernel(const float *x, float *mean, float *var, int N, 126 | int C, int S) { 127 | int plane = blockIdx.x; 128 | float norm = 1.f / (N * S); 129 | 130 | float _mean = reduce(SumOp(x, C, S), plane, N, C, S) * norm; 131 | __syncthreads(); 132 | float _var = reduce(VarOp(_mean, x, C, S), plane, N, C, S) * norm; 133 | 134 | if (threadIdx.x == 0) { 135 | mean[plane] = _mean; 136 | var[plane] = _var; 137 | } 138 | } 139 | 140 | __global__ void forward_kernel(const float *x, const float *mean, 141 | const float *var, const float *weight, 142 | const float *bias, float *y, float *z, float eps, 143 | int N, int C, int S) { 144 | int plane = blockIdx.x; 145 | 146 | float _mean = mean[plane]; 147 | float _var = var[plane]; 148 | float invStd = 0; 149 | if (_var != 0.f || eps != 0.f) { 150 | invStd = 1 / sqrt(_var + eps); 151 | } 152 | 153 | float gamma = weight != 0 ? abs(weight[plane]) + eps : 1.f; 154 | float beta = bias != 0 ? bias[plane] : 0.f; 155 | for (int batch = 0; batch < N; ++batch) { 156 | for (int n = threadIdx.x; n < S; n += blockDim.x) { 157 | float _x = x[(batch * C + plane) * S + n]; 158 | float _y = (_x - _mean) * invStd; 159 | float _z = _y * gamma + beta; 160 | 161 | y[(batch * C + plane) * S + n] = _y; 162 | z[(batch * C + plane) * S + n] = _z; 163 | } 164 | } 165 | } 166 | 167 | __global__ void edz_eydz_kernel(const float *z, const float *dz, const float *weight, const float *bias, 168 | float *edz, float *eydz, float eps, int N, int C, int S) { 169 | int plane = blockIdx.x; 170 | float norm = 1.f / (N * S); 171 | 172 | float gamma = weight != 0 ? abs(weight[plane]) + eps : 1.f; 173 | float beta = bias != 0 ? bias[plane] : 0.f; 174 | 175 | Float2 res = reduce(GradOp(gamma, beta, z, dz, C, S), plane, N, C, S); 176 | float _edz = res.v1 * norm; 177 | float _eydz = res.v2 * norm; 178 | __syncthreads(); 179 | 180 | if (threadIdx.x == 0) { 181 | edz[plane] = _edz; 182 | eydz[plane] = _eydz; 183 | } 184 | } 185 | 186 | __global__ void backward_kernel(const float *dz, const float *z, const float *var, const float *weight, 187 | const float *bias, const float *edz, const float *eydz, float *dx, float *dweight, 188 | float *dbias, float eps, int N, int C, int S) { 189 | int plane = blockIdx.x; 190 | float _edz = edz[plane]; 191 | float _eydz = eydz[plane]; 192 | 193 | float gamma = weight != 0 ? abs(weight[plane]) + eps : 1.f; 194 | float beta = bias != 0 ? bias[plane] : 0.f; 195 | 196 | if (dx != 0) { 197 | float _var = var[plane]; 198 | float invStd = 0; 199 | if (_var != 0.f || eps != 0.f) { 200 | invStd = 1 / sqrt(_var + eps); 201 | } 202 | 203 | float mul = gamma * invStd; 204 | 205 | for (int batch = 0; batch < N; ++batch) { 206 | for (int n = threadIdx.x; n < S; n += blockDim.x) { 207 | float _dz = dz[(batch * C + plane) * S + n]; 208 | float _y = (z[(batch * C + plane) * S + n] - beta) / gamma; 209 | dx[(batch * C + plane) * S + n] = (_dz - _edz - _y * _eydz) * mul; 210 | } 211 | } 212 | } 213 | 214 | if (dweight != 0 || dbias != 0) { 215 | float norm = N * S; 216 | 217 | if (dweight != 0) { 218 | if (threadIdx.x == 0) { 219 | if (weight[plane] > 0) 220 | dweight[plane] += _eydz * norm; 221 | else if (weight[plane] < 0) 222 | dweight[plane] -= _eydz * norm; 223 | } 224 | } 225 | 226 | if (dbias != 0) { 227 | if (threadIdx.x == 0) { 228 | dbias[plane] += _edz * norm; 229 | } 230 | } 231 | } 232 | } 233 | 234 | /* 235 | * Implementations 236 | */ 237 | extern "C" int _bn_mean_var_cuda(int N, int C, int S, const float *x, float *mean, 238 | float *var, cudaStream_t stream) { 239 | // Run kernel 240 | dim3 blocks(C); 241 | dim3 threads(getNumThreads(S)); 242 | mean_var_kernel<<>>(x, mean, var, N, C, S); 243 | 244 | // Check for errors 245 | cudaError_t err = cudaGetLastError(); 246 | if (err != cudaSuccess) 247 | return 0; 248 | else 249 | return 1; 250 | } 251 | 252 | extern "C" int _bn_forward_cuda(int N, int C, int S, const float *x, 253 | const float *mean, const float *var, 254 | const float *weight, const float *bias, float *y, 255 | float *z, float eps, cudaStream_t stream) { 256 | // Run kernel 257 | dim3 blocks(C); 258 | dim3 threads(getNumThreads(S)); 259 | forward_kernel<<>>(x, mean, var, weight, bias, y, 260 | z, eps, N, C, S); 261 | 262 | // Check for errors 263 | cudaError_t err = cudaGetLastError(); 264 | if (err != cudaSuccess) 265 | return 0; 266 | else 267 | return 1; 268 | } 269 | 270 | extern "C" int _bn_edz_eydz_cuda(int N, int C, int S, const float *z, const float *dz, const float *weight, 271 | const float *bias, float *edz, float *eydz, float eps, cudaStream_t stream) { 272 | // Run kernel 273 | dim3 blocks(C); 274 | dim3 threads(getNumThreads(S)); 275 | edz_eydz_kernel<<>>(z, dz, weight, bias, edz, eydz, eps, N, C, S); 276 | 277 | // Check for errors 278 | cudaError_t err = cudaGetLastError(); 279 | if (err != cudaSuccess) 280 | return 0; 281 | else 282 | return 1; 283 | } 284 | 285 | extern "C" int _bn_backward_cuda(int N, int C, int S, const float *dz, const float *z, const float *var, 286 | const float *weight, const float *bias, const float *edz, const float *eydz, 287 | float *dx, float *dweight, float *dbias, float eps, cudaStream_t stream) { 288 | // Run kernel 289 | dim3 blocks(C); 290 | dim3 threads(getNumThreads(S)); 291 | backward_kernel<<>>(dz, z, var, weight, bias, edz, eydz, dx, dweight, dbias, 292 | eps, N, C, S); 293 | 294 | // Check for errors 295 | cudaError_t err = cudaGetLastError(); 296 | if (err != cudaSuccess) 297 | return 0; 298 | else 299 | return 1; 300 | } 301 | 302 | extern "C" int _leaky_relu_cuda(int N, float *x, float slope, cudaStream_t stream) { 303 | // Run using thrust 304 | thrust::device_ptr th_x = thrust::device_pointer_cast(x); 305 | thrust::transform_if(thrust::cuda::par.on(stream), th_x, th_x + N, th_x, 306 | [slope] __device__ (const float& x) { return x * slope; }, 307 | [] __device__ (const float& x) { return x < 0; }); 308 | 309 | // Check for errors 310 | cudaError_t err = cudaGetLastError(); 311 | if (err != cudaSuccess) 312 | return 0; 313 | else 314 | return 1; 315 | } 316 | 317 | extern "C" int _leaky_relu_backward_cuda(int N, const float *x, float *dx, float slope, cudaStream_t stream) { 318 | // Run using thrust 319 | thrust::device_ptr th_x = thrust::device_pointer_cast(x); 320 | thrust::device_ptr th_dx = thrust::device_pointer_cast(dx); 321 | thrust::transform_if(thrust::cuda::par.on(stream), th_dx, th_dx + N, th_x, th_dx, 322 | [slope] __device__ (const float& dx) { return dx * slope; }, 323 | [] __device__ (const float& x) { return x < 0; }); 324 | 325 | // Check for errors 326 | cudaError_t err = cudaGetLastError(); 327 | if (err != cudaSuccess) 328 | return 0; 329 | else 330 | return 1; 331 | } 332 | 333 | extern "C" int _elu_cuda(int N, float *x, cudaStream_t stream) { 334 | // Run using thrust 335 | thrust::device_ptr th_x = thrust::device_pointer_cast(x); 336 | thrust::transform_if(thrust::cuda::par.on(stream), th_x, th_x + N, th_x, 337 | [] __device__ (const float& x) { return exp(x) - 1.f; }, 338 | [] __device__ (const float& x) { return x < 0; }); 339 | 340 | // Check for errors 341 | cudaError_t err = cudaGetLastError(); 342 | if (err != cudaSuccess) 343 | return 0; 344 | else 345 | return 1; 346 | } 347 | 348 | extern "C" int _elu_backward_cuda(int N, const float *x, float *dx, cudaStream_t stream) { 349 | // Run using thrust 350 | thrust::device_ptr th_x = thrust::device_pointer_cast(x); 351 | thrust::device_ptr th_dx = thrust::device_pointer_cast(dx); 352 | thrust::transform_if(thrust::cuda::par.on(stream), th_dx, th_dx + N, th_x, th_x, th_dx, 353 | [] __device__ (const float& dx, const float& x) { return dx * (x + 1.f); }, 354 | [] __device__ (const float& x) { return x < 0; }); 355 | 356 | // Check for errors 357 | cudaError_t err = cudaGetLastError(); 358 | if (err != cudaSuccess) 359 | return 0; 360 | else 361 | return 1; 362 | } 363 | 364 | extern "C" int _elu_inv_cuda(int N, float *x, cudaStream_t stream) { 365 | // Run using thrust 366 | thrust::device_ptr th_x = thrust::device_pointer_cast(x); 367 | thrust::transform_if(thrust::cuda::par.on(stream), th_x, th_x + N, th_x, 368 | [] __device__ (const float& x) { return log1p(x); }, 369 | [] __device__ (const float& x) { return x < 0; }); 370 | 371 | // Check for errors 372 | cudaError_t err = cudaGetLastError(); 373 | if (err != cudaSuccess) 374 | return 0; 375 | else 376 | return 1; 377 | } 378 | -------------------------------------------------------------------------------- /graphs/ops/libs/src/bn.h: -------------------------------------------------------------------------------- 1 | #ifndef __BN__ 2 | #define __BN__ 3 | 4 | /* 5 | * Exported functions 6 | */ 7 | extern "C" int _bn_mean_var_cuda(int N, int C, int S, const float *x, float *mean, float *var, cudaStream_t); 8 | extern "C" int _bn_forward_cuda(int N, int C, int S, const float *x, const float *mean, const float *var, 9 | const float *weight, const float *bias, float *y, float *z, float eps, cudaStream_t); 10 | extern "C" int _bn_edz_eydz_cuda(int N, int C, int S, const float *z, const float *dz, const float *weight, 11 | const float *bias, float *edz, float *eydz, float eps, cudaStream_t stream); 12 | extern "C" int _bn_backward_cuda(int N, int C, int S, const float *dz, const float *z, const float *var, 13 | const float *weight, const float *bias, const float *edz, const float *eydz, float *dx, 14 | float *dweight, float *dbias, float eps, cudaStream_t stream); 15 | extern "C" int _leaky_relu_cuda(int N, float *x, float slope, cudaStream_t stream); 16 | extern "C" int _leaky_relu_backward_cuda(int N, const float *x, float *dx, float slope, cudaStream_t stream); 17 | extern "C" int _elu_cuda(int N, float *x, cudaStream_t stream); 18 | extern "C" int _elu_backward_cuda(int N, const float *x, float *dx, cudaStream_t stream); 19 | extern "C" int _elu_inv_cuda(int N, float *x, cudaStream_t stream); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /graphs/ops/libs/src/common.h: -------------------------------------------------------------------------------- 1 | #ifndef __COMMON__ 2 | #define __COMMON__ 3 | #include 4 | 5 | /* 6 | * General settings 7 | */ 8 | const int WARP_SIZE = 32; 9 | const int MAX_BLOCK_SIZE = 512; 10 | 11 | /* 12 | * Utility functions 13 | */ 14 | template 15 | __device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize, 16 | unsigned int mask = 0xffffffff) { 17 | #if CUDART_VERSION >= 9000 18 | return __shfl_xor_sync(mask, value, laneMask, width); 19 | #else 20 | return __shfl_xor(value, laneMask, width); 21 | #endif 22 | } 23 | 24 | __device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); } 25 | 26 | static int getNumThreads(int nElem) { 27 | int threadSizes[5] = {32, 64, 128, 256, MAX_BLOCK_SIZE}; 28 | for (int i = 0; i != 5; ++i) { 29 | if (nElem <= threadSizes[i]) { 30 | return threadSizes[i]; 31 | } 32 | } 33 | return MAX_BLOCK_SIZE; 34 | } 35 | 36 | 37 | #endif -------------------------------------------------------------------------------- /graphs/ops/libs/src/lib_cffi.cpp: -------------------------------------------------------------------------------- 1 | // All functions assume that input and output tensors are already initialized 2 | // and have the correct dimensions 3 | #include 4 | 5 | // Forward definition of implementation functions 6 | extern "C" { 7 | int _bn_mean_var_cuda(int N, int C, int S, const float *x, float *mean, float *var, cudaStream_t); 8 | int _bn_forward_cuda(int N, int C, int S, const float *x, const float *mean, const float *var, const float *weight, 9 | const float *bias, float *y, float *z, float eps, cudaStream_t); 10 | int _bn_edz_eydz_cuda(int N, int C, int S, const float *z, const float *dz, const float *weight, const float *bias, 11 | float *edz, float *eydz, float eps, cudaStream_t stream); 12 | int _bn_backward_cuda(int N, int C, int S, const float *dz, const float *z, const float *var, const float *weight, 13 | const float *bias, const float *edz, const float *eydz, float *dx, float *dweight, float *dbias, 14 | float eps, cudaStream_t stream); 15 | int _leaky_relu_cuda(int N, float *x, float slope, cudaStream_t stream); 16 | int _leaky_relu_backward_cuda(int N, const float *x, float *dx, float slope, cudaStream_t stream); 17 | int _elu_cuda(int N, float *x, cudaStream_t stream); 18 | int _elu_backward_cuda(int N, const float *x, float *dx, cudaStream_t stream); 19 | int _elu_inv_cuda(int N, float *x, cudaStream_t stream); 20 | } 21 | 22 | extern THCState *state; 23 | 24 | void get_sizes(const THCudaTensor *t, int *N, int *C, int *S){ 25 | // Get sizes 26 | *S = 1; 27 | *N = THCudaTensor_size(state, t, 0); 28 | *C = THCudaTensor_size(state, t, 1); 29 | if (THCudaTensor_nDimension(state, t) > 2) { 30 | for (int i = 2; i < THCudaTensor_nDimension(state, t); ++i) { 31 | *S *= THCudaTensor_size(state, t, i); 32 | } 33 | } 34 | } 35 | 36 | extern "C" int bn_mean_var_cuda(const THCudaTensor *x, THCudaTensor *mean, THCudaTensor *var) { 37 | cudaStream_t stream = THCState_getCurrentStream(state); 38 | 39 | int S, N, C; 40 | get_sizes(x, &N, &C, &S); 41 | 42 | // Get pointers 43 | const float *x_data = THCudaTensor_data(state, x); 44 | float *mean_data = THCudaTensor_data(state, mean); 45 | float *var_data = THCudaTensor_data(state, var); 46 | 47 | return _bn_mean_var_cuda(N, C, S, x_data, mean_data, var_data, stream); 48 | } 49 | 50 | extern "C" int bn_forward_cuda(const THCudaTensor *x, const THCudaTensor *mean, const THCudaTensor *var, 51 | const THCudaTensor *weight, const THCudaTensor *bias, THCudaTensor *y, THCudaTensor *z, 52 | float eps) { 53 | cudaStream_t stream = THCState_getCurrentStream(state); 54 | 55 | int S, N, C; 56 | get_sizes(x, &N, &C, &S); 57 | 58 | // Get pointers 59 | const float *x_data = THCudaTensor_data(state, x); 60 | const float *mean_data = THCudaTensor_data(state, mean); 61 | const float *var_data = THCudaTensor_data(state, var); 62 | const float *weight_data = THCudaTensor_nDimension(state, weight) != 0 ? THCudaTensor_data(state, weight) : 0; 63 | const float *bias_data = THCudaTensor_nDimension(state, bias) != 0 ? THCudaTensor_data(state, bias) : 0; 64 | float *y_data = THCudaTensor_data(state, y); 65 | float *z_data = THCudaTensor_data(state, z); 66 | 67 | return _bn_forward_cuda(N, C, S, x_data, mean_data, var_data, weight_data, bias_data, y_data, z_data, eps, stream); 68 | } 69 | 70 | extern "C" int bn_edz_eydz_cuda(const THCudaTensor *z, const THCudaTensor *dz, const THCudaTensor *weight, 71 | const THCudaTensor *bias, THCudaTensor *edz, THCudaTensor *eydz, float eps) { 72 | cudaStream_t stream = THCState_getCurrentStream(state); 73 | 74 | int S, N, C; 75 | get_sizes(z, &N, &C, &S); 76 | 77 | // Get pointers 78 | const float *z_data = THCudaTensor_data(state, z); 79 | const float *dz_data = THCudaTensor_data(state, dz); 80 | const float *weight_data = THCudaTensor_nDimension(state, weight) != 0 ? THCudaTensor_data(state, weight) : 0; 81 | const float *bias_data = THCudaTensor_nDimension(state, bias) != 0 ? THCudaTensor_data(state, bias) : 0; 82 | float *edz_data = THCudaTensor_data(state, edz); 83 | float *eydz_data = THCudaTensor_data(state, eydz); 84 | 85 | return _bn_edz_eydz_cuda(N, C, S, z_data, dz_data, weight_data, bias_data, edz_data, eydz_data, eps, stream); 86 | } 87 | 88 | extern "C" int bn_backard_cuda(const THCudaTensor *dz, const THCudaTensor *z, const THCudaTensor *var, 89 | const THCudaTensor *weight, const THCudaTensor *bias, const THCudaTensor *edz, 90 | const THCudaTensor *eydz, THCudaTensor *dx, THCudaTensor *dweight, 91 | THCudaTensor *dbias, float eps) { 92 | cudaStream_t stream = THCState_getCurrentStream(state); 93 | 94 | int S, N, C; 95 | get_sizes(dz, &N, &C, &S); 96 | 97 | // Get pointers 98 | const float *dz_data = THCudaTensor_data(state, dz); 99 | const float *z_data = THCudaTensor_data(state, z); 100 | const float *var_data = THCudaTensor_data(state, var); 101 | const float *weight_data = THCudaTensor_nDimension(state, weight) != 0 ? THCudaTensor_data(state, weight) : 0; 102 | const float *bias_data = THCudaTensor_nDimension(state, bias) != 0 ? THCudaTensor_data(state, bias) : 0; 103 | const float *edz_data = THCudaTensor_data(state, edz); 104 | const float *eydz_data = THCudaTensor_data(state, eydz); 105 | float *dx_data = THCudaTensor_nDimension(state, dx) != 0 ? THCudaTensor_data(state, dx) : 0; 106 | float *dweight_data = THCudaTensor_nDimension(state, dweight) != 0 ? THCudaTensor_data(state, dweight) : 0; 107 | float *dbias_data = THCudaTensor_nDimension(state, dbias) != 0 ? THCudaTensor_data(state, dbias) : 0; 108 | 109 | return _bn_backward_cuda(N, C, S, dz_data, z_data, var_data, weight_data, bias_data, edz_data, eydz_data, dx_data, 110 | dweight_data, dbias_data, eps, stream); 111 | } 112 | 113 | extern "C" int leaky_relu_cuda(THCudaTensor *x, float slope) { 114 | cudaStream_t stream = THCState_getCurrentStream(state); 115 | 116 | int N = THCudaTensor_nElement(state, x); 117 | 118 | // Get pointers 119 | float *x_data = THCudaTensor_data(state, x); 120 | 121 | return _leaky_relu_cuda(N, x_data, slope, stream); 122 | } 123 | 124 | extern "C" int leaky_relu_backward_cuda(const THCudaTensor *x, THCudaTensor *dx, float slope) { 125 | cudaStream_t stream = THCState_getCurrentStream(state); 126 | 127 | int N = THCudaTensor_nElement(state, x); 128 | 129 | // Get pointers 130 | const float *x_data = THCudaTensor_data(state, x); 131 | float *dx_data = THCudaTensor_data(state, dx); 132 | 133 | return _leaky_relu_backward_cuda(N, x_data, dx_data, slope, stream); 134 | } 135 | 136 | extern "C" int elu_cuda(THCudaTensor *x) { 137 | cudaStream_t stream = THCState_getCurrentStream(state); 138 | 139 | int N = THCudaTensor_nElement(state, x); 140 | 141 | // Get pointers 142 | float *x_data = THCudaTensor_data(state, x); 143 | 144 | return _elu_cuda(N, x_data, stream); 145 | } 146 | 147 | extern "C" int elu_backward_cuda(const THCudaTensor *x, THCudaTensor *dx) { 148 | cudaStream_t stream = THCState_getCurrentStream(state); 149 | 150 | int N = THCudaTensor_nElement(state, x); 151 | 152 | // Get pointers 153 | const float *x_data = THCudaTensor_data(state, x); 154 | float *dx_data = THCudaTensor_data(state, dx); 155 | 156 | return _elu_backward_cuda(N, x_data, dx_data, stream); 157 | } 158 | 159 | extern "C" int elu_inv_cuda(THCudaTensor *x) { 160 | cudaStream_t stream = THCState_getCurrentStream(state); 161 | 162 | int N = THCudaTensor_nElement(state, x); 163 | 164 | // Get pointers 165 | float *x_data = THCudaTensor_data(state, x); 166 | 167 | return _elu_inv_cuda(N, x_data, stream); 168 | } 169 | -------------------------------------------------------------------------------- /graphs/ops/libs/src/lib_cffi.h: -------------------------------------------------------------------------------- 1 | int bn_mean_var_cuda(const THCudaTensor *x, THCudaTensor *mean, THCudaTensor *var); 2 | int bn_forward_cuda(const THCudaTensor *x, const THCudaTensor *mean, const THCudaTensor *var, 3 | const THCudaTensor *weight, const THCudaTensor *bias, THCudaTensor *y, THCudaTensor *z, 4 | float eps); 5 | int bn_edz_eydz_cuda(const THCudaTensor *z, const THCudaTensor *dz, const THCudaTensor *weight, 6 | const THCudaTensor *bias, THCudaTensor *edz, THCudaTensor *eydz, float eps); 7 | int bn_backard_cuda(const THCudaTensor *dz, const THCudaTensor *z, const THCudaTensor *var, 8 | const THCudaTensor *weight, const THCudaTensor *bias, const THCudaTensor *edz, 9 | const THCudaTensor *eydz, THCudaTensor *dx, THCudaTensor *dweight, THCudaTensor *dbias, 10 | float eps); 11 | int leaky_relu_cuda(THCudaTensor *x, float slope); 12 | int leaky_relu_backward_cuda(const THCudaTensor *x, THCudaTensor *dx, float slope); 13 | int elu_cuda(THCudaTensor *x); 14 | int elu_backward_cuda(const THCudaTensor *x, THCudaTensor *dx); 15 | int elu_inv_cuda(THCudaTensor *x); -------------------------------------------------------------------------------- /graphs/ops/make.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | nvcc -c -o deform_conv_cuda_kernel.cu.so deform_conv_cuda_kernel.cu -x cu -Xcompiler -fPIC -std=c++11 3 | nvcc -c -o scale_conv_cuda_kernel.cu.so scale_conv_cuda_kernel.cu -x cu -Xcompiler -fPIC -std=c++11 4 | cd cuda 5 | 6 | # compile modulated deform conv 7 | nvcc -c -o modulated_deform_im2col_cuda.cu.so modulated_deform_im2col_cuda.cu -x cu -Xcompiler -fPIC 8 | 9 | # compile deform-psroi-pooling 10 | nvcc -c -o deform_psroi_pooling_cuda.cu.so deform_psroi_pooling_cuda.cu -x cu -Xcompiler -fPIC 11 | 12 | cd ../.. 13 | CC=g++ python build.py 14 | python build_modulated.py 15 | -------------------------------------------------------------------------------- /graphs/ops/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_conv import DeformConv 2 | from .s_conv import SConv 3 | -------------------------------------------------------------------------------- /graphs/ops/modules/deform_conv.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn.modules.module import Module 6 | from torch.nn.modules.utils import _pair 7 | from graphs.ops.functions import deform_conv_function 8 | 9 | 10 | class DeformConv(Module): 11 | def __init__(self, 12 | in_channels, 13 | out_channels, 14 | kernel_size, 15 | stride=1, 16 | padding=0, 17 | dilation=1, 18 | num_deformable_groups=1): 19 | super(DeformConv, self).__init__() 20 | self.in_channels = in_channels 21 | self.out_channels = out_channels 22 | self.kernel_size = _pair(kernel_size) 23 | self.stride = _pair(stride) 24 | self.padding = _pair(padding) 25 | self.dilation = _pair(dilation) 26 | self.num_deformable_groups = num_deformable_groups 27 | 28 | self.weight = nn.Parameter( 29 | torch.Tensor(out_channels, in_channels, *self.kernel_size)) 30 | 31 | self.reset_parameters() 32 | 33 | def reset_parameters(self): 34 | n = self.in_channels 35 | for k in self.kernel_size: 36 | n *= k 37 | stdv = 1. / math.sqrt(n) 38 | self.weight.data.uniform_(-stdv, stdv) 39 | 40 | def forward(self, input, offset): 41 | return deform_conv_function(input, offset, self.weight, self.stride, 42 | self.padding, self.dilation, 43 | self.num_deformable_groups) 44 | -------------------------------------------------------------------------------- /graphs/ops/src/cuda/deform_psroi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2017 Microsoft 3 | * Licensed under The MIT License [see LICENSE for details] 4 | * \file deformable_psroi_pooling.cu 5 | * \brief 6 | * \author Yi Li, Guodong Zhang, Jifeng Dai 7 | */ 8 | /***************** Adapted by Charles Shang *********************/ 9 | 10 | #ifndef DCN_V2_PSROI_POOLING_CUDA 11 | #define DCN_V2_PSROI_POOLING_CUDA 12 | 13 | #ifdef __cplusplus 14 | extern "C" 15 | { 16 | #endif 17 | 18 | void DeformablePSROIPoolForward(cudaStream_t stream, 19 | const float *data, 20 | const float *bbox, 21 | const float *trans, 22 | float *out, 23 | float *top_count, 24 | const int batch, 25 | const int channels, 26 | const int height, 27 | const int width, 28 | const int num_bbox, 29 | const int channels_trans, 30 | const int no_trans, 31 | const float spatial_scale, 32 | const int output_dim, 33 | const int group_size, 34 | const int pooled_size, 35 | const int part_size, 36 | const int sample_per_part, 37 | const float trans_std); 38 | 39 | void DeformablePSROIPoolBackwardAcc(cudaStream_t stream, 40 | const float *out_grad, 41 | const float *data, 42 | const float *bbox, 43 | const float *trans, 44 | const float *top_count, 45 | float *in_grad, 46 | float *trans_grad, 47 | const int batch, 48 | const int channels, 49 | const int height, 50 | const int width, 51 | const int num_bbox, 52 | const int channels_trans, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); 61 | 62 | #ifdef __cplusplus 63 | } 64 | #endif 65 | 66 | #endif -------------------------------------------------------------------------------- /graphs/ops/src/cuda/modulated_deform_im2col_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 3 | * 4 | * COPYRIGHT 5 | * 6 | * All contributions by the University of California: 7 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 8 | * All rights reserved. 9 | * 10 | * All other contributions: 11 | * Copyright (c) 2014-2017, the respective contributors 12 | * All rights reserved. 13 | * 14 | * Caffe uses a shared copyright model: each contributor holds copyright over 15 | * their contributions to Caffe. The project versioning records all such 16 | * contribution and copyright details. If a contributor wants to further mark 17 | * their specific copyright on a particular contribution, they should indicate 18 | * their copyright solely in the commit message of the change when it is 19 | * committed. 20 | * 21 | * LICENSE 22 | * 23 | * Redistribution and use in source and binary forms, with or without 24 | * modification, are permitted provided that the following conditions are met: 25 | * 26 | * 1. Redistributions of source code must retain the above copyright notice, this 27 | * list of conditions and the following disclaimer. 28 | * 2. Redistributions in binary form must reproduce the above copyright notice, 29 | * this list of conditions and the following disclaimer in the documentation 30 | * and/or other materials provided with the distribution. 31 | * 32 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 33 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 34 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 35 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 36 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 37 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 38 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 39 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 41 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 | * 43 | * CONTRIBUTION AGREEMENT 44 | * 45 | * By contributing to the BVLC/caffe repository through pull-request, comment, 46 | * or otherwise, the contributor releases their content to the 47 | * license and copyright terms herein. 48 | * 49 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 50 | * 51 | * Copyright (c) 2018 Microsoft 52 | * Licensed under The MIT License [see LICENSE for details] 53 | * \file modulated_deformable_im2col.h 54 | * \brief Function definitions of converting an image to 55 | * column matrix based on kernel, padding, dilation, and offset. 56 | * These functions are mainly used in deformable convolution operators. 57 | * \ref: https://arxiv.org/abs/1811.11168 58 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu 59 | */ 60 | 61 | /***************** Adapted by Charles Shang *********************/ 62 | 63 | #ifndef DCN_V2_IM2COL_CUDA 64 | #define DCN_V2_IM2COL_CUDA 65 | 66 | #ifdef __cplusplus 67 | extern "C" 68 | { 69 | #endif 70 | 71 | void modulated_deformable_im2col_cuda(cudaStream_t stream, 72 | const float *data_im, const float *data_offset, const float *data_mask, 73 | const int batch_size, const int channels, const int height_im, const int width_im, 74 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 75 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 76 | const int dilation_h, const int dilation_w, 77 | const int deformable_group, float *data_col); 78 | 79 | void modulated_deformable_col2im_cuda(cudaStream_t stream, 80 | const float *data_col, const float *data_offset, const float *data_mask, 81 | const int batch_size, const int channels, const int height_im, const int width_im, 82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 84 | const int dilation_h, const int dilation_w, 85 | const int deformable_group, float *grad_im); 86 | 87 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream, 88 | const float *data_col, const float *data_im, const float *data_offset, const float *data_mask, 89 | const int batch_size, const int channels, const int height_im, const int width_im, 90 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 91 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 92 | const int dilation_h, const int dilation_w, 93 | const int deformable_group, 94 | float *grad_offset, float *grad_mask); 95 | 96 | #ifdef __cplusplus 97 | } 98 | #endif 99 | 100 | #endif -------------------------------------------------------------------------------- /graphs/ops/src/deform_conv.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int deform_conv_forward(THFloatTensor *input, THFloatTensor *offset, 4 | THFloatTensor *output) 5 | { 6 | // if (!THFloatTensor_isSameSizeAs(input1, input2)) 7 | // return 0; 8 | // THFloatTensor_resizeAs(output, input); 9 | // THFloatTensor_cadd(output, input1, 1.0, input2); 10 | return 1; 11 | } 12 | 13 | int deform_conv_backward(THFloatTensor *grad_output, THFloatTensor *grad_input, 14 | THFloatTensor *grad_offset) 15 | { 16 | // THFloatTensor_resizeAs(grad_input, grad_output); 17 | // THFloatTensor_fill(grad_input, 1); 18 | return 1; 19 | } 20 | -------------------------------------------------------------------------------- /graphs/ops/src/deform_conv.h: -------------------------------------------------------------------------------- 1 | int deform_conv_forward(THFloatTensor *input, THFloatTensor *offset, 2 | THFloatTensor *output); 3 | int deform_conv_backward(THFloatTensor *grad_output, THFloatTensor *grad_input, 4 | THFloatTensor *grad_offset); 5 | -------------------------------------------------------------------------------- /graphs/ops/src/deform_conv_cuda.h: -------------------------------------------------------------------------------- 1 | int deform_conv_forward_cuda(THCudaTensor *input, 2 | THCudaTensor *weight, /*THCudaTensor * bias, */ 3 | THCudaTensor *offset, THCudaTensor *output, 4 | THCudaTensor *columns, THCudaTensor *ones, int kW, 5 | int kH, int dW, int dH, int padW, int padH, 6 | int dilationW, int dilationH, 7 | int deformable_group, int im2col_step); 8 | 9 | int deform_conv_backward_input_cuda( 10 | THCudaTensor *input, THCudaTensor *offset, THCudaTensor *gradOutput, 11 | THCudaTensor *gradInput, THCudaTensor *gradOffset, THCudaTensor *weight, 12 | THCudaTensor *columns, int kW, int kH, int dW, int dH, int padW, int padH, 13 | int dilationW, int dilationH, int deformable_group, int im2col_step); 14 | 15 | int deform_conv_backward_parameters_cuda( 16 | THCudaTensor *input, THCudaTensor *offset, THCudaTensor *gradOutput, 17 | THCudaTensor *gradWeight, /*THCudaTensor *gradBias, */ 18 | THCudaTensor *columns, THCudaTensor *ones, int kW, int kH, int dW, int dH, 19 | int padW, int padH, int dilationW, int dilationH, int deformable_group, 20 | float scale, int im2col_step); 21 | -------------------------------------------------------------------------------- /graphs/ops/src/deform_conv_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | template 2 | void deformable_im2col(cudaStream_t stream, const DType *data_im, 3 | const DType *data_offset, const int channels, 4 | const int height, const int width, const int ksize_h, 5 | const int ksize_w, const int pad_h, const int pad_w, 6 | const int stride_h, const int stride_w, 7 | const int dilation_h, const int dilation_w, 8 | const int parallel_imgs, 9 | const int deformable_group, DType *data_col); 10 | 11 | template 12 | void deformable_col2im(cudaStream_t stream, const DType *data_col, 13 | const DType *data_offset, const int channels, 14 | const int height, const int width, const int ksize_h, 15 | const int ksize_w, const int pad_h, const int pad_w, 16 | const int stride_h, const int stride_w, 17 | const int dilation_h, const int dilation_w, 18 | const int parallel_imgs, 19 | const int deformable_group, DType *grad_im); 20 | 21 | template 22 | void deformable_col2im_coord(cudaStream_t stream, const DType *data_col, 23 | const DType *data_im, const DType *data_offset, 24 | const int channels, const int height, 25 | const int width, const int ksize_h, 26 | const int ksize_w, const int pad_h, 27 | const int pad_w, const int stride_h, 28 | const int stride_w, const int dilation_h, 29 | const int dilation_w, const int parallel_imgs, 30 | const int deformable_group, DType *grad_offset); 31 | -------------------------------------------------------------------------------- /graphs/ops/src/modulated_dcn.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void modulated_deform_conv_forward(THFloatTensor *input, THFloatTensor *weight, 6 | THFloatTensor *bias, THFloatTensor *ones, 7 | THFloatTensor *offset, THFloatTensor *mask, 8 | THFloatTensor *output, THFloatTensor *columns, 9 | const int pad_h, const int pad_w, 10 | const int stride_h, const int stride_w, 11 | const int dilation_h, const int dilation_w, 12 | const int deformable_group) 13 | { 14 | printf("only implemented in GPU"); 15 | } 16 | void modulated_deform_conv_backward(THFloatTensor *input, THFloatTensor *weight, 17 | THFloatTensor *bias, THFloatTensor *ones, 18 | THFloatTensor *offset, THFloatTensor *mask, 19 | THFloatTensor *output, THFloatTensor *columns, 20 | THFloatTensor *grad_input, THFloatTensor *grad_weight, 21 | THFloatTensor *grad_bias, THFloatTensor *grad_offset, 22 | THFloatTensor *grad_mask, THFloatTensor *grad_output, 23 | int kernel_h, int kernel_w, 24 | int stride_h, int stride_w, 25 | int pad_h, int pad_w, 26 | int dilation_h, int dilation_w, 27 | int deformable_group) 28 | { 29 | printf("only implemented in GPU"); 30 | } -------------------------------------------------------------------------------- /graphs/ops/src/modulated_dcn.h: -------------------------------------------------------------------------------- 1 | void modulated_deform_conv_forward(THFloatTensor *input, THFloatTensor *weight, 2 | THFloatTensor *bias, THFloatTensor *ones, 3 | THFloatTensor *offset, THFloatTensor *mask, 4 | THFloatTensor *output, THFloatTensor *columns, 5 | const int pad_h, const int pad_w, 6 | const int stride_h, const int stride_w, 7 | const int dilation_h, const int dilation_w, 8 | const int deformable_group); 9 | void modulated_deform_conv_backward(THFloatTensor *input, THFloatTensor *weight, 10 | THFloatTensor *bias, THFloatTensor *ones, 11 | THFloatTensor *offset, THFloatTensor *mask, 12 | THFloatTensor *output, THFloatTensor *columns, 13 | THFloatTensor *grad_input, THFloatTensor *grad_weight, 14 | THFloatTensor *grad_bias, THFloatTensor *grad_offset, 15 | THFloatTensor *grad_mask, THFloatTensor *grad_output, 16 | int kernel_h, int kernel_w, 17 | int stride_h, int stride_w, 18 | int pad_h, int pad_w, 19 | int dilation_h, int dilation_w, 20 | int deformable_group); -------------------------------------------------------------------------------- /graphs/ops/src/modulated_dcn_cuda.h: -------------------------------------------------------------------------------- 1 | // #ifndef DCN_V2_CUDA 2 | // #define DCN_V2_CUDA 3 | 4 | // #ifdef __cplusplus 5 | // extern "C" 6 | // { 7 | // #endif 8 | 9 | void modulated_deform_conv_cuda_forward(THCudaTensor *input, THCudaTensor *weight, 10 | THCudaTensor *bias, THCudaTensor *ones, 11 | THCudaTensor *offset, THCudaTensor *mask, 12 | THCudaTensor *output, THCudaTensor *columns, 13 | int kernel_h, int kernel_w, 14 | const int stride_h, const int stride_w, 15 | const int pad_h, const int pad_w, 16 | const int dilation_h, const int dilation_w, 17 | const int deformable_group); 18 | void modulated_deform_conv_cuda_backward(THCudaTensor *input, THCudaTensor *weight, 19 | THCudaTensor *bias, THCudaTensor *ones, 20 | THCudaTensor *offset, THCudaTensor *mask, 21 | THCudaTensor *columns, 22 | THCudaTensor *grad_input, THCudaTensor *grad_weight, 23 | THCudaTensor *grad_bias, THCudaTensor *grad_offset, 24 | THCudaTensor *grad_mask, THCudaTensor *grad_output, 25 | int kernel_h, int kernel_w, 26 | int stride_h, int stride_w, 27 | int pad_h, int pad_w, 28 | int dilation_h, int dilation_w, 29 | int deformable_group); 30 | 31 | void deform_psroi_pooling_cuda_forward(THCudaTensor * input, THCudaTensor * bbox, 32 | THCudaTensor * trans, 33 | THCudaTensor * out, THCudaTensor * top_count, 34 | const int no_trans, 35 | const float spatial_scale, 36 | const int output_dim, 37 | const int group_size, 38 | const int pooled_size, 39 | const int part_size, 40 | const int sample_per_part, 41 | const float trans_std); 42 | 43 | void deform_psroi_pooling_cuda_backward(THCudaTensor * out_grad, 44 | THCudaTensor * input, THCudaTensor * bbox, 45 | THCudaTensor * trans, THCudaTensor * top_count, 46 | THCudaTensor * input_grad, THCudaTensor * trans_grad, 47 | const int no_trans, 48 | const float spatial_scale, 49 | const int output_dim, 50 | const int group_size, 51 | const int pooled_size, 52 | const int part_size, 53 | const int sample_per_part, 54 | const float trans_std); 55 | -------------------------------------------------------------------------------- /graphs/ops/src/scale_conv.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int scale_conv_forward(THFloatTensor *input, THFloatTensor *scale, 4 | THFloatTensor *output) 5 | { 6 | // if (!THFloatTensor_isSameSizeAs(input1, input2)) 7 | // return 0; 8 | // THFloatTensor_resizeAs(output, input); 9 | // THFloatTensor_cadd(output, input1, 1.0, input2); 10 | return 1; 11 | } 12 | 13 | int scale_conv_backward(THFloatTensor *grad_output, THFloatTensor *grad_input, 14 | THFloatTensor *grad_scale) 15 | { 16 | // THFloatTensor_resizeAs(grad_input, grad_output); 17 | // THFloatTensor_fill(grad_input, 1); 18 | return 1; 19 | } -------------------------------------------------------------------------------- /graphs/ops/src/scale_conv.h: -------------------------------------------------------------------------------- 1 | int scale_conv_forward(THFloatTensor *input, THFloatTensor *scale, 2 | THFloatTensor *output); 3 | int scale_conv_backward(THFloatTensor *grad_output, THFloatTensor *grad_input, 4 | THFloatTensor *grad_scale); -------------------------------------------------------------------------------- /graphs/ops/src/scale_conv_cuda.h: -------------------------------------------------------------------------------- 1 | int scale_conv_forward_cuda(THCudaTensor *input, 2 | THCudaTensor *weight, /*THCudaTensor * bias, */ 3 | THCudaTensor *scale, THCudaTensor *output, 4 | THCudaTensor *columns, THCudaTensor *ones, int kW, 5 | int kH, int dW, int dH, int padW, int padH, 6 | int dilationW, int dilationH, 7 | int deformable_group, int im2col_step); 8 | 9 | int scale_conv_backward_input_cuda( 10 | THCudaTensor *input, THCudaTensor *scale, THCudaTensor *gradOutput, 11 | THCudaTensor *gradInput, THCudaTensor *gradScale, THCudaTensor *weight, 12 | THCudaTensor *columns, int kW, int kH, int dW, int dH, int padW, int padH, 13 | int dilationW, int dilationH, int deformable_group, int im2col_step); 14 | 15 | int scale_conv_backward_parameters_cuda( 16 | THCudaTensor *input, THCudaTensor *scale, THCudaTensor *gradOutput, 17 | THCudaTensor *gradWeight, /*THCudaTensor *gradBias, */ 18 | THCudaTensor *columns, THCudaTensor *ones, int kW, int kH, int dW, int dH, 19 | int padW, int padH, int dilationW, int dilationH, int deformable_group, 20 | float scale_, int im2col_step); 21 | -------------------------------------------------------------------------------- /graphs/ops/src/scale_conv_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef SCALE_CONV_CUDA_KERNEL_H_ 2 | #define SCALE_CONV_CUDA_KERNEL_H_ 3 | void scale_im2col(cudaStream_t stream, const float *data_im, 4 | const float *data_scale, const int channels, 5 | const int height, const int width, const int ksize_h, 6 | const int ksize_w, const int pad_h, const int pad_w, 7 | const int stride_h, const int stride_w, 8 | const int dilation_h, const int dilation_w, 9 | const int parallel_imgs, 10 | const int deformable_group, float *data_col); 11 | 12 | void scale_col2im(cudaStream_t stream, const float *data_col, 13 | const float *data_scale, const int channels, 14 | const int height, const int width, const int ksize_h, 15 | const int ksize_w, const int pad_h, const int pad_w, 16 | const int stride_h, const int stride_w, 17 | const int dilation_h, const int dilation_w, 18 | const int parallel_imgs, 19 | const int deformable_group, float *grad_im); 20 | 21 | void scale_col2im_coord(cudaStream_t stream, const float *data_col, 22 | const float *data_im, const float *data_scale, 23 | const int channels, const int height, 24 | const int width, const int ksize_h, 25 | const int ksize_w, const int pad_h, 26 | const int pad_w, const int stride_h, 27 | const int stride_w, const int dilation_h, 28 | const int dilation_w, const int parallel_imgs, 29 | const int deformable_group, float *grad_scale); 30 | #endif -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """ 2 | __author__ = "Hager Rady and Mo'men AbdelRazek" 3 | 4 | Main 5 | -Capture the config file 6 | -Process the json config passed 7 | -Create an agent instance 8 | -Run the agent 9 | """ 10 | 11 | import argparse 12 | from utils.config import * 13 | from agents import * 14 | 15 | 16 | def main(): 17 | 18 | # parse the path of the json config file 19 | arg_parser = argparse.ArgumentParser(description="") 20 | arg_parser.add_argument( 21 | 'config', 22 | metavar='config_json_file', 23 | default='None', 24 | help='The Configuration file in json format') 25 | args = arg_parser.parse_args() 26 | 27 | # parse the config json file 28 | config = process_config(args.config) 29 | 30 | # Create the Agent and pass all the configuration to it then run it.. 31 | agent_class = globals()[config.agent] 32 | agent = agent_class(config) 33 | agent.run() 34 | agent.finalize() 35 | 36 | 37 | if __name__ == '__main__': 38 | main() 39 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python main.py ./configs/scnet_os16_depth.json 3 | python main.py ./configs/scnet_os16_depth.json -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | path = os.path.dirname(os.path.abspath(__file__)) 5 | 6 | for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']: 7 | mod = __import__('.'.join([__name__, py]), fromlist=[py]) 8 | classes = [getattr(mod, x) for x in dir(mod) if isinstance(getattr(mod, x), type)] 9 | for cls in classes: 10 | setattr(sys.modules[__name__], cls.__name__, cls) -------------------------------------------------------------------------------- /utils/assets/class_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinZhuoChen/SGNet/02510182eb4baca77dd1d99237a5e77812055a0c/utils/assets/class_diagram.png -------------------------------------------------------------------------------- /utils/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | 4 | import logging 5 | from logging import Formatter 6 | from logging.handlers import RotatingFileHandler 7 | 8 | import json 9 | from easydict import EasyDict 10 | from pprint import pprint 11 | 12 | from utils.dirs import create_dirs 13 | 14 | def get_config_from_json(json_file): 15 | """ 16 | Get the config from a json file 17 | :param json_file: the path of the config file 18 | :return: config(namespace), config(dictionary) 19 | """ 20 | 21 | # parse the configurations from the config json file provided 22 | with open(json_file, 'r') as config_file: 23 | try: 24 | config_dict = json.load(config_file) 25 | # EasyDict allows to access dict values as attributes (works recursively). 26 | config = EasyDict(config_dict) 27 | return config, config_dict 28 | except ValueError: 29 | print("INVALID JSON file format.. Please provide a good json file") 30 | exit(-1) 31 | 32 | 33 | def process_config(json_file): 34 | """ 35 | Get the json file 36 | Processing it with EasyDict to be accessible as attributes 37 | then editing the path of the experiments folder 38 | creating some important directories in the experiment folder 39 | Then setup the logging in the whole program 40 | Then return the config 41 | :param json_file: the path of the config file 42 | :return: config object(namespace) 43 | """ 44 | config, _ = get_config_from_json(json_file) 45 | print(" THE Configuration of your experiment ..") 46 | pprint(config) 47 | 48 | # making sure that you have provided the exp_name. 49 | try: 50 | print(" *************************************** ") 51 | print("The experiment name is {}".format(config.exp_name)) 52 | print(" *************************************** ") 53 | except AttributeError: 54 | print("ERROR!!..Please provide the exp_name in json file..") 55 | exit(-1) 56 | 57 | # create some important directories to be used for that experiment. 58 | create_dirs([config.snapshot_dir]) 59 | 60 | # setup logging in the project 61 | 62 | 63 | return config 64 | -------------------------------------------------------------------------------- /utils/dirs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | 5 | def create_dirs(dirs): 6 | """ 7 | dirs - a list of directories to create if these directories are not found 8 | :param dirs: 9 | :return: 10 | """ 11 | try: 12 | for dir_ in dirs: 13 | if not os.path.exists(dir_): 14 | os.makedirs(dir_) 15 | except Exception as err: 16 | logging.getLogger("Dirs Creator").info("Creating directories error: {0}".format(err)) 17 | exit(-1) 18 | -------------------------------------------------------------------------------- /utils/encoding.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """Encoding Data Parallel""" 12 | import threading 13 | import functools 14 | import torch 15 | from torch.autograd import Variable, Function 16 | import torch.cuda.comm as comm 17 | from torch.nn.parallel.data_parallel import DataParallel 18 | from torch.nn.parallel.parallel_apply import get_a_var 19 | from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast 20 | 21 | torch_ver = torch.__version__[:3] 22 | 23 | __all__ = ['allreduce', 'DataParallelModel', 'DataParallelCriterion', 24 | 'patch_replication_callback'] 25 | 26 | def allreduce(*inputs): 27 | """Cross GPU all reduce autograd operation for calculate mean and 28 | variance in SyncBN. 29 | """ 30 | return AllReduce.apply(*inputs) 31 | 32 | class AllReduce(Function): 33 | @staticmethod 34 | def forward(ctx, num_inputs, *inputs): 35 | ctx.num_inputs = num_inputs 36 | ctx.target_gpus = [inputs[i].get_device() for i in range(0, len(inputs), num_inputs)] 37 | inputs = [inputs[i:i + num_inputs] 38 | for i in range(0, len(inputs), num_inputs)] 39 | # sort before reduce sum 40 | inputs = sorted(inputs, key=lambda i: i[0].get_device()) 41 | results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0]) 42 | outputs = comm.broadcast_coalesced(results, ctx.target_gpus) 43 | return tuple([t for tensors in outputs for t in tensors]) 44 | 45 | @staticmethod 46 | def backward(ctx, *inputs): 47 | inputs = [i.data for i in inputs] 48 | inputs = [inputs[i:i + ctx.num_inputs] 49 | for i in range(0, len(inputs), ctx.num_inputs)] 50 | results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0]) 51 | outputs = comm.broadcast_coalesced(results, ctx.target_gpus) 52 | return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors]) 53 | 54 | 55 | class Reduce(Function): 56 | @staticmethod 57 | def forward(ctx, *inputs): 58 | ctx.target_gpus = [inputs[i].get_device() for i in range(len(inputs))] 59 | inputs = sorted(inputs, key=lambda i: i.get_device()) 60 | return comm.reduce_add(inputs) 61 | 62 | @staticmethod 63 | def backward(ctx, gradOutput): 64 | return Broadcast.apply(ctx.target_gpus, gradOutput) 65 | 66 | 67 | class DataParallelModel(DataParallel): 68 | """Implements data parallelism at the module level. 69 | 70 | This container parallelizes the application of the given module by 71 | splitting the input across the specified devices by chunking in the 72 | batch dimension. 73 | In the forward pass, the module is replicated on each device, 74 | and each replica handles a portion of the input. During the backwards pass, gradients from each replica are summed into the original module. 75 | Note that the outputs are not gathered, please use compatible 76 | :class:`encoding.parallel.DataParallelCriterion`. 77 | 78 | The batch size should be larger than the number of GPUs used. It should 79 | also be an integer multiple of the number of GPUs so that each chunk is 80 | the same size (so that each GPU processes the same number of samples). 81 | 82 | Args: 83 | module: module to be parallelized 84 | device_ids: CUDA devices (default: all devices) 85 | 86 | Reference: 87 | Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, 88 | Amit Agrawal. “Context Encoding for Semantic Segmentation. 89 | *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018* 90 | 91 | Example:: 92 | 93 | >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2]) 94 | >>> y = net(x) 95 | """ 96 | def gather(self, outputs, output_device): 97 | return outputs 98 | 99 | def replicate(self, module, device_ids): 100 | modules = super(DataParallelModel, self).replicate(module, device_ids) 101 | execute_replication_callbacks(modules) 102 | return modules 103 | 104 | 105 | class DataParallelCriterion(DataParallel): 106 | """ 107 | Calculate loss in multiple-GPUs, which balance the memory usage for 108 | Semantic Segmentation. 109 | 110 | The targets are splitted across the specified devices by chunking in 111 | the batch dimension. Please use together with :class:`encoding.parallel.DataParallelModel`. 112 | 113 | Reference: 114 | Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, 115 | Amit Agrawal. “Context Encoding for Semantic Segmentation. 116 | *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018* 117 | 118 | Example:: 119 | 120 | >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2]) 121 | >>> criterion = encoding.nn.DataParallelCriterion(criterion, device_ids=[0, 1, 2]) 122 | >>> y = net(x) 123 | >>> loss = criterion(y, target) 124 | """ 125 | def forward(self, inputs, *targets, **kwargs): 126 | # input should be already scatterd 127 | # scattering the targets instead 128 | if not self.device_ids: 129 | return self.module(inputs, *targets, **kwargs) 130 | targets, kwargs = self.scatter(targets, kwargs, self.device_ids) 131 | if len(self.device_ids) == 1: 132 | return self.module(inputs, *targets[0], **kwargs[0]) 133 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 134 | outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs) 135 | return Reduce.apply(*outputs) / len(outputs) 136 | #return self.gather(outputs, self.output_device).mean() 137 | 138 | 139 | def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None): 140 | assert len(modules) == len(inputs) 141 | assert len(targets) == len(inputs) 142 | if kwargs_tup: 143 | assert len(modules) == len(kwargs_tup) 144 | else: 145 | kwargs_tup = ({},) * len(modules) 146 | if devices is not None: 147 | assert len(modules) == len(devices) 148 | else: 149 | devices = [None] * len(modules) 150 | 151 | lock = threading.Lock() 152 | results = {} 153 | if torch_ver != "0.3": 154 | grad_enabled = torch.is_grad_enabled() 155 | 156 | def _worker(i, module, input, target, kwargs, device=None): 157 | if torch_ver != "0.3": 158 | torch.set_grad_enabled(grad_enabled) 159 | if device is None: 160 | device = get_a_var(input).get_device() 161 | try: 162 | if not isinstance(input, tuple): 163 | input = (input,) 164 | with torch.cuda.device(device): 165 | output = module(*(input + target), **kwargs) 166 | with lock: 167 | results[i] = output 168 | except Exception as e: 169 | with lock: 170 | results[i] = e 171 | 172 | if len(modules) > 1: 173 | threads = [threading.Thread(target=_worker, 174 | args=(i, module, input, target, 175 | kwargs, device),) 176 | for i, (module, input, target, kwargs, device) in 177 | enumerate(zip(modules, inputs, targets, kwargs_tup, devices))] 178 | 179 | for thread in threads: 180 | thread.start() 181 | for thread in threads: 182 | thread.join() 183 | else: 184 | _worker(0, modules[0], inputs[0], kwargs_tup[0], devices[0]) 185 | 186 | outputs = [] 187 | for i in range(len(inputs)): 188 | output = results[i] 189 | if isinstance(output, Exception): 190 | raise output 191 | outputs.append(output) 192 | return outputs 193 | 194 | 195 | ########################################################################### 196 | # Adapted from Synchronized-BatchNorm-PyTorch. 197 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 198 | # 199 | class CallbackContext(object): 200 | pass 201 | 202 | 203 | def execute_replication_callbacks(modules): 204 | """ 205 | Execute an replication callback `__data_parallel_replicate__` on each module created 206 | by original replication. 207 | 208 | The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` 209 | 210 | Note that, as all modules are isomorphism, we assign each sub-module with a context 211 | (shared among multiple copies of this module on different devices). 212 | Through this context, different copies can share some information. 213 | 214 | We guarantee that the callback on the master copy (the first copy) will be called ahead 215 | of calling the callback of any slave copies. 216 | """ 217 | master_copy = modules[0] 218 | nr_modules = len(list(master_copy.modules())) 219 | ctxs = [CallbackContext() for _ in range(nr_modules)] 220 | 221 | for i, module in enumerate(modules): 222 | for j, m in enumerate(module.modules()): 223 | if hasattr(m, '__data_parallel_replicate__'): 224 | m.__data_parallel_replicate__(ctxs[j], i) 225 | 226 | 227 | def patch_replication_callback(data_parallel): 228 | """ 229 | Monkey-patch an existing `DataParallel` object. Add the replication callback. 230 | Useful when you have customized `DataParallel` implementation. 231 | 232 | Examples: 233 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 234 | > sync_bn = DataParallel(sync_bn, device_ids=[0, 1]) 235 | > patch_replication_callback(sync_bn) 236 | # this is equivalent to 237 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 238 | > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) 239 | """ 240 | 241 | assert isinstance(data_parallel, DataParallel) 242 | 243 | old_replicate = data_parallel.replicate 244 | 245 | @functools.wraps(old_replicate) 246 | def new_replicate(module, device_ids): 247 | modules = old_replicate(module, device_ids) 248 | execute_replication_callbacks(modules) 249 | return modules 250 | 251 | data_parallel.replicate = new_replicate -------------------------------------------------------------------------------- /utils/log.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | from tensorboardX import SummaryWriter 4 | 5 | import logging 6 | from logging import Formatter 7 | from logging.handlers import RotatingFileHandler 8 | 9 | def setup_logging(log_dir): 10 | log_file_format = "[%(levelname)s] - %(asctime)s - %(name)s - : %(message)s in %(pathname)s:%(lineno)d" 11 | log_console_format = "[%(levelname)s]: %(message)s" 12 | 13 | # Main logger 14 | main_logger = logging.getLogger() 15 | main_logger.setLevel(logging.INFO) 16 | 17 | console_handler = logging.StreamHandler() 18 | console_handler.setLevel(logging.INFO) 19 | console_handler.setFormatter(Formatter(log_console_format)) 20 | 21 | exp_file_handler = RotatingFileHandler('{}exp_debug.log'.format(log_dir), maxBytes=10**6, backupCount=5) 22 | exp_file_handler.setLevel(logging.DEBUG) 23 | exp_file_handler.setFormatter(Formatter(log_file_format)) 24 | 25 | exp_errors_file_handler = RotatingFileHandler('{}exp_error.log'.format(log_dir), maxBytes=10**6, backupCount=5) 26 | exp_errors_file_handler.setLevel(logging.WARNING) 27 | exp_errors_file_handler.setFormatter(Formatter(log_file_format)) 28 | 29 | main_logger.addHandler(console_handler) 30 | main_logger.addHandler(exp_file_handler) 31 | main_logger.addHandler(exp_errors_file_handler) 32 | 33 | class Visualizer(): 34 | """ 35 | Visualizer 36 | :param: 37 | config: 38 | """ 39 | def __init__(self, config): 40 | self.writer = SummaryWriter(osp.join(config.snapshot_dir, config.exp_name + config.time)) 41 | self.config = config 42 | def add_scalar(self, name, x, y): 43 | self.writer.add_scalar(name, x, y) 44 | def add_image(self, name, image, iter): 45 | self.writer.add_image(name, image, iter) 46 | 47 | class Log(): 48 | """ 49 | Log 50 | :param: 51 | config: 52 | """ 53 | def __init__(self, config): 54 | self.log_path = osp.join(config.snapshot_dir, config.exp_name + config.time) 55 | self.log = open(osp.join(self.log_path, 'log_train.txt'), 'w') 56 | self.config = config 57 | setup_logging(self.log_path) 58 | logging.getLogger().info("Hi, This is root.") 59 | logging.getLogger().info("After the configurations are successfully processed and dirs are created.") 60 | logging.getLogger().info("The pipeline of the project will begin now.") 61 | 62 | def record_sys_param(self): 63 | self.log.write(str(self.config) + '\n') 64 | 65 | def record_file(self): 66 | os.system('cp %s %s'%(self.config.model_file, self.log_path)) 67 | os.system('cp %s %s'%(self.config.agent_file, self.log_path)) 68 | os.system('cp %s %s'%(self.config.config_file, self.log_path)) 69 | os.system('cp %s %s' % (self.config.dataset_file, self.log_path)) 70 | os.system('cp %s %s' % (self.config.transform_file, self.log_path)) 71 | os.system('cp %s %s' % (self.config.module_file, self.log_path)) 72 | 73 | def log_string(self, out_str): 74 | self.log.write(out_str + '\n') 75 | self.log.flush() 76 | print(out_str) -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file will contain the metrics of the framework 3 | """ 4 | import numpy as np 5 | 6 | 7 | class IOUMetric: 8 | """ 9 | Class to calculate mean-iou using fast_hist method 10 | """ 11 | 12 | def __init__(self, num_classes): 13 | self.num_classes = num_classes 14 | self.hist = np.zeros((num_classes, num_classes)) 15 | 16 | def _fast_hist(self, label_pred, label_true): 17 | mask = (label_true >= 0) & (label_true < self.num_classes) 18 | hist = np.bincount( 19 | self.num_classes * label_true[mask].astype(int) + 20 | label_pred[mask], minlength=self.num_classes ** 2).reshape(self.num_classes, self.num_classes) 21 | return hist 22 | 23 | def add_batch(self, predictions, gts, ignore_index): 24 | ignore_index = gts != 255 25 | predictions = predictions[ignore_index] 26 | gts = gts[ignore_index] 27 | self.hist += self._fast_hist(predictions, gts) 28 | 29 | def add_batch_ignore(self, predictions, gts, ignore_index): 30 | for lp, lt in zip(predictions, gts): 31 | ignore_index = lt != 255 32 | lp = lp[ignore_index] 33 | lt = lt[ignore_index] 34 | self.hist += self._fast_hist(lp, lt) 35 | 36 | def evaluate(self): 37 | acc = np.diag(self.hist).sum() / self.hist.sum() 38 | acc_cls = np.diag(self.hist) / self.hist.sum(axis=1) 39 | acc_cls = np.nanmean(acc_cls) 40 | iu = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - np.diag(self.hist)) 41 | mean_iu = np.nanmean(iu) 42 | freq = self.hist.sum(axis=1) / self.hist.sum() 43 | fwavacc = (freq[freq > 0] * iu[freq > 0]).sum() 44 | return acc, acc_cls, iu, mean_iu, fwavacc 45 | 46 | class AverageMeter: 47 | """ 48 | Class to be an average meter for any average metric like loss, accuracy, etc.. 49 | """ 50 | 51 | def __init__(self): 52 | self.value = 0 53 | self.avg = 0 54 | self.sum = 0 55 | self.count = 0 56 | self.reset() 57 | 58 | def reset(self): 59 | self.value = 0 60 | self.avg = 0 61 | self.sum = 0 62 | self.count = 0 63 | 64 | def update(self, val, n=1): 65 | self.value = val 66 | self.sum += val * n 67 | self.count += n 68 | self.avg = self.sum / self.count 69 | 70 | @property 71 | def val(self): 72 | return self.avg 73 | 74 | 75 | class AverageMeterList: 76 | """ 77 | Class to be an average meter for any average metric List structure like mean_iou_per_class 78 | """ 79 | 80 | def __init__(self, num_cls): 81 | self.cls = num_cls 82 | self.value = [0] * self.cls 83 | self.avg = [0] * self.cls 84 | self.sum = [0] * self.cls 85 | self.count = [0] * self.cls 86 | self.reset() 87 | 88 | def reset(self): 89 | self.value = [0] * self.cls 90 | self.avg = [0] * self.cls 91 | self.sum = [0] * self.cls 92 | self.count = [0] * self.cls 93 | 94 | def update(self, val, n=1): 95 | for i in range(self.cls): 96 | self.value[i] = val[i] 97 | self.sum[i] += val[i] * n 98 | self.count[i] += n 99 | self.avg[i] = self.sum[i] / self.count[i] 100 | 101 | @property 102 | def val(self): 103 | return self.avg 104 | 105 | 106 | def cls_accuracy(output, target, topk=(1,)): 107 | maxk = max(topk) 108 | batch_size = target.size(0) 109 | 110 | _, pred = output.topk(maxk, 1, True, True) 111 | pred = pred.t() 112 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 113 | 114 | res = [] 115 | for k in topk: 116 | correct_k = correct[:k].view(-1).float().sum(0) 117 | res.append(correct_k / batch_size) 118 | return res 119 | -------------------------------------------------------------------------------- /utils/misc.py: -------------------------------------------------------------------------------- 1 | import time 2 | import logging 3 | 4 | 5 | def timeit(f): 6 | """ Decorator to time Any Function """ 7 | 8 | def timed(*args, **kwargs): 9 | start_time = time.time() 10 | result = f(*args, **kwargs) 11 | end_time = time.time() 12 | seconds = end_time - start_time 13 | logging.getLogger("Timer").info(" [-] %s : %2.5f sec, which is %2.5f min, which is %2.5f hour" % 14 | (f.__name__, seconds, seconds / 60, seconds / 3600)) 15 | return result 16 | 17 | return timed 18 | 19 | 20 | def print_cuda_statistics(): 21 | logger = logging.getLogger("Cuda Statistics") 22 | import sys 23 | from subprocess import call 24 | import torch 25 | logger.info('__Python VERSION: {}'.format(sys.version)) 26 | logger.info('__pyTorch VERSION: {}'.format(torch.__version__)) 27 | logger.info('__CUDA VERSION') 28 | call(["nvcc", "--version"]) 29 | logger.info('__CUDNN VERSION: {}'.format(torch.backends.cudnn.version())) 30 | logger.info('__Number CUDA Devices: {}'.format(torch.cuda.device_count())) 31 | logger.info('__Devices') 32 | call(["nvidia-smi", "--format=csv", 33 | "--query-gpu=index,name,driver_version,memory.total,memory.used,memory.free"]) 34 | logger.info('Active CUDA Device: GPU {}'.format(torch.cuda.current_device())) 35 | logger.info('Available devices {}'.format(torch.cuda.device_count())) 36 | logger.info('Current cuda device {}'.format(torch.cuda.current_device())) 37 | -------------------------------------------------------------------------------- /utils/optim.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | def lr_poly_exp(base_lr, iter, max_iter, power): 4 | return base_lr*((1-float(iter)/max_iter)**(power)) 5 | 6 | 7 | def lr_poly_epoch(base_lr, iter, max_iter, power): 8 | return base_lr/2.0 9 | 10 | 11 | def adjust_learning_rate(optimizer, i_iter, args): 12 | """Sets the learning rate 13 | Args: 14 | optimizer: The optimizer 15 | i_iter: The number of interations 16 | """ 17 | if args.dataset == "SUNRGBD": 18 | lr = max(lr_poly_exp(args.learning_rate, i_iter, args.num_steps, args.power), args.min_learining_rate) 19 | elif args.dataset == "NYUD": 20 | lr = max(lr_poly_exp(args.learning_rate, i_iter, args.num_steps, args.power), args.min_learining_rate) 21 | else: 22 | lr = lr_poly_exp(args.learning_rate, i_iter, args.num_steps, args.power) 23 | optimizer.param_groups[0]['lr'] = lr 24 | return lr 25 | 26 | def adjust_learning_rate_warmup(optimizer, i_iter, args): 27 | """Sets the learning rate 28 | Args: 29 | optimizer: The optimizer 30 | i_iter: The number of interations 31 | """ 32 | args.warmup_steps = 6000 33 | if i_iter < args.warmup_steps: 34 | lr = args.learning_rate * (i_iter / args.warmup_steps) 35 | else: 36 | lr = max(lr_poly_exp(args.learning_rate, i_iter, args.num_steps, args.power), args.min_learining_rate) 37 | optimizer.param_groups[0]['lr'] = lr 38 | return lr 39 | 40 | def set_bn_eval(m): 41 | classname = m.__class__.__name__ 42 | if classname.find('BatchNorm') != -1: 43 | m.eval() 44 | 45 | 46 | def set_bn_momentum(m): 47 | classname = m.__class__.__name__ 48 | if classname.find('BatchNorm') != -1 or classname.find('InPlaceABN') != -1: 49 | m.momentum = 0.0003 -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from datetime import datetime 4 | from scipy import ndimage 5 | from PIL import Image 6 | import numpy as np 7 | 8 | model_urls = { 9 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 10 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 11 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 12 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 13 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 14 | } 15 | 16 | # colour map 17 | # label_colours = [(0,0,0) 18 | # # 0=background 19 | # ,(128,0,0),(0,128,0),(128,128,0),(0,0,128),(128,0,128) 20 | # # 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle 21 | # ,(0,128,128),(128,128,128),(64,0,0),(192,0,0),(64,128,0) 22 | # # 6=bus, 7=car, 8=cat, 9=chair, 10=cow 23 | # ,(192,128,0),(64,0,128),(192,0,128),(64,128,128),(192,128,128) 24 | # # 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person 25 | # ,(0,64,0),(128,64,0),(0,192,0),(128,192,0),(0,64,128)] 26 | # # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor 27 | def uint82bin(n, count=8): 28 | """returns the binary of integer n, count refers to amount of bits""" 29 | return ''.join([str((n >> y) & 1) for y in range(count - 1, -1, -1)]) 30 | 31 | 32 | def labelcolormap(N): 33 | if N == 35: # cityscape 34 | cmap = [(0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (111, 74, 0), (81, 0, 81), 35 | (128, 64, 128), (244, 35, 232), (250, 170, 160), (230, 150, 140), (70, 70, 70), (102, 102, 156), 36 | (190, 153, 153), 37 | (180, 165, 180), (150, 100, 100), (150, 120, 90), (153, 153, 153), (153, 153, 153), (250, 170, 30), 38 | (220, 220, 0), 39 | (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60), (255, 0, 0), (0, 0, 142), (0, 0, 70), 40 | (0, 60, 100), (0, 0, 90), (0, 0, 110), (0, 80, 100), (0, 0, 230), (119, 11, 32), (0, 0, 142)] 41 | else: 42 | cmap = [] 43 | for i in range(N): 44 | r, g, b = 0, 0, 0 45 | id = i 46 | for j in range(7): 47 | str_id = uint82bin(id) 48 | r = r ^ (np.uint8(str_id[-1]) << (7 - j)) 49 | g = g ^ (np.uint8(str_id[-2]) << (7 - j)) 50 | b = b ^ (np.uint8(str_id[-3]) << (7 - j)) 51 | id = id >> 3 52 | color = (r, g, b) 53 | cmap.append(color) 54 | return cmap 55 | 56 | 57 | def decode_labels(mask, num_images=1, num_classes=40): 58 | """Decode batch of segmentation masks. 59 | 60 | Args: 61 | mask: result of inference after taking argmax. 62 | num_images: number of images to decode from the batch. 63 | num_classes: number of classes to predict (including background). 64 | 65 | Returns: 66 | A batch with num_images RGB images of the same size as the input. 67 | """ 68 | label_colours = labelcolormap(num_classes) 69 | mask = mask.data.cpu().numpy() 70 | n, h, w = mask.shape 71 | assert (n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % ( 72 | n, num_images) 73 | outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8) 74 | for i in range(num_images): 75 | img = Image.new('RGB', (len(mask[i, 0]), len(mask[i]))) 76 | pixels = img.load() 77 | for j_, j in enumerate(mask[i, :, :]): 78 | for k_, k in enumerate(j): 79 | if k < num_classes: 80 | pixels[k_, j_] = label_colours[k] 81 | outputs[i] = np.array(img) 82 | return outputs 83 | 84 | 85 | def decode_predictions(preds, num_images=1, num_classes=40): 86 | """Decode batch of segmentation masks. 87 | 88 | Args: 89 | mask: result of inference after taking argmax. 90 | num_images: number of images to decode from the batch. 91 | num_classes: number of classes to predict (including background). 92 | 93 | Returns: 94 | A batch with num_images RGB images of the same size as the input. 95 | """ 96 | label_colours = labelcolormap(num_classes) 97 | if isinstance(preds, list): 98 | preds_list = [] 99 | for pred in preds: 100 | preds_list.append(pred[-1].data.cpu().numpy()) 101 | preds = np.concatenate(preds_list, axis=0) 102 | else: 103 | preds = preds.data.cpu().numpy() 104 | 105 | preds = np.argmax(preds, axis=1) 106 | n, h, w = preds.shape 107 | assert (n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % ( 108 | n, num_images) 109 | outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8) 110 | for i in range(num_images): 111 | img = Image.new('RGB', (len(preds[i, 0]), len(preds[i]))) 112 | pixels = img.load() 113 | for j_, j in enumerate(preds[i, :, :]): 114 | for k_, k in enumerate(j): 115 | if k < num_classes: 116 | pixels[k_, j_] = label_colours[k] 117 | outputs[i] = np.array(img) 118 | return outputs 119 | 120 | 121 | def inv_preprocess(imgs, num_images, img_mean): 122 | """Inverse preprocessing of the batch of images. 123 | Add the mean vector and convert from BGR to RGB. 124 | 125 | Args: 126 | imgs: batch of input images. 127 | num_images: number of images to apply the inverse transformations on. 128 | img_mean: vector of mean colour values. 129 | 130 | Returns: 131 | The batch of the size num_images with the same spatial dimensions as the input. 132 | """ 133 | imgs = imgs.data.cpu().numpy() 134 | n, c, h, w = imgs.shape 135 | assert (n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % ( 136 | n, num_images) 137 | outputs = np.zeros((num_images, h, w, c), dtype=np.uint8) 138 | for i in range(num_images): 139 | outputs[i] = (np.transpose(imgs[i], (1, 2, 0)) + img_mean).astype(np.uint8) 140 | return outputs 141 | 142 | 143 | def get_confusion_matrix(gt_label, pred_label, class_num): 144 | """ 145 | Calcute the confusion matrix by given label and pred 146 | :param gt_label: the ground truth label 147 | :param pred_label: the pred label 148 | :param class_num: the nunber of class 149 | :return: the confusion matrix 150 | """ 151 | index = (gt_label * class_num + pred_label).astype('int32') 152 | label_count = np.bincount(index) 153 | confusion_matrix = np.zeros((class_num, class_num)) 154 | 155 | for i_label in range(class_num): 156 | for i_pred_label in range(class_num): 157 | cur_index = i_label * class_num + i_pred_label 158 | if cur_index < len(label_count): 159 | confusion_matrix[i_label, i_pred_label] = label_count[cur_index] 160 | 161 | return confusion_matrix 162 | 163 | 164 | def get_currect_time(): 165 | TIMESTAMP = "{0:%Y-%m-%dT%H-%M-%S/}".format(datetime.now()) 166 | 167 | return TIMESTAMP 168 | 169 | 170 | def get_metric(hist): 171 | acc = np.diag(hist).sum() / hist.sum() 172 | acc_cls = np.diag(hist) / hist.sum(axis=1) 173 | acc_cls = np.nanmean(acc_cls) 174 | iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist)) 175 | mean_iu = np.nanmean(iu) 176 | freq = hist.sum(axis=1) / hist.sum() 177 | fwavacc = (freq[freq > 0] * iu[freq > 0]).sum() 178 | 179 | return acc, acc_cls, mean_iu, fwavacc, iu[freq > 0] 180 | 181 | def predict_whole(net, image, tile_size, recurrence, S): 182 | image = torch.from_numpy(image) 183 | S = torch.from_numpy(S) 184 | interp = nn.Upsample(size=tile_size, mode='bilinear', align_corners=True) 185 | prediction = net(image.cuda(), S.cuda()) 186 | if isinstance(prediction, list): 187 | prediction = prediction[0] 188 | prediction = interp(prediction).cpu().data[0].numpy().transpose(1, 2, 0) 189 | return prediction 190 | 191 | def predict_multiscale(net, image, S, tile_size, scales, classes, flip_evaluation, recurrence=False): 192 | """ 193 | Predict an image by looking at it with different scales. 194 | We choose the "predict_whole_img" for the image with less than the original input size, 195 | for the input of larger size, we would choose the cropping method to ensure that GPU memory is enough. 196 | """ 197 | image = image.data 198 | S = S.data 199 | N_, C_, H_, W_ = image.shape 200 | full_probs = np.zeros((tile_size[0], tile_size[1], classes)) 201 | for scale in scales: 202 | scale = float(scale) 203 | scale_image = ndimage.zoom(image, (1.0, 1.0, scale, scale), order=1, prefilter=False) 204 | scale_S = ndimage.zoom(S, (1.0, 1.0, scale, scale), order=1, prefilter=False) 205 | scaled_probs = predict_whole(net, scale_image, tile_size, recurrence, scale_S) 206 | if flip_evaluation == True: 207 | flip_scaled_probs = predict_whole(net, scale_image[:, :, :, ::-1].copy(), tile_size, recurrence, 208 | scale_S[:, :, :, ::-1].copy()) 209 | scaled_probs = 0.5 * (scaled_probs + flip_scaled_probs[:, ::-1, :]) 210 | full_probs += scaled_probs 211 | full_probs /= len(scales) 212 | return full_probs 213 | 214 | def get_palette(num_cls): 215 | """ Returns the color map for visualizing the segmentation mask. 216 | Args: 217 | num_cls: Number of classes 218 | Returns: 219 | The color map 220 | """ 221 | 222 | n = num_cls 223 | palette = [0] * (n * 3) 224 | for j in range(0, n): 225 | lab = j 226 | palette[j * 3 + 0] = 0 227 | palette[j * 3 + 1] = 0 228 | palette[j * 3 + 2] = 0 229 | i = 0 230 | while lab: 231 | palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) 232 | palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) 233 | palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) 234 | i += 1 235 | lab >>= 3 236 | return palette 237 | 238 | def maybe_download(model_name, model_url, model_dir=None, map_location=None): 239 | import os, sys 240 | from six.moves import urllib 241 | if model_dir is None: 242 | torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) 243 | model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) 244 | if not os.path.exists(model_dir): 245 | os.makedirs(model_dir) 246 | filename = '{}.pth.tar'.format(model_name) 247 | cached_file = os.path.join(model_dir, filename) 248 | if not os.path.exists(cached_file): 249 | url = model_url 250 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) 251 | urllib.request.urlretrieve(url, cached_file) 252 | return torch.load(cached_file, map_location=map_location) 253 | 254 | def expand_model_dict(model_dict, state_dict, num_parallel): 255 | model_dict_keys = model_dict.keys() 256 | state_dict_keys = state_dict.keys() 257 | for model_dict_key in model_dict_keys: 258 | model_dict_key_re = model_dict_key.replace('module.', '') 259 | if model_dict_key_re in state_dict_keys: 260 | model_dict[model_dict_key] = state_dict[model_dict_key_re] 261 | for i in range(num_parallel): 262 | bn = '.bn_%d' % i 263 | replace = True if bn in model_dict_key_re else False 264 | model_dict_key_re = model_dict_key_re.replace(bn, '') 265 | if replace and model_dict_key_re in state_dict_keys: 266 | model_dict[model_dict_key] = state_dict[model_dict_key_re] 267 | return model_dict --------------------------------------------------------------------------------