├── README.md ├── image classification ├── CIFAR10 │ ├── main.py │ ├── models │ │ ├── __init__.py │ │ ├── alexnet.py │ │ ├── quant_layer.py │ │ ├── spiking.py │ │ └── vgg.py │ ├── snn.py │ └── snn_ft.py ├── CIFAR10_res │ ├── main.py │ ├── models │ │ ├── __init__.py │ │ ├── quant_layer.py │ │ └── resnet.py │ ├── snn.py │ └── snn_ft.py ├── CIFAR10_resnet18 │ ├── main.py │ ├── models │ │ ├── __init__.py │ │ ├── quant_layer.py │ │ └── resnet.py │ └── snn_ft.py └── ImageNet │ ├── dali_main.py │ ├── models │ ├── __init__.py │ ├── alexnet.py │ ├── quant_layer.py │ ├── spiking.py │ └── vgg.py │ ├── snn.py │ └── snn_ft.py ├── object detection ├── README.md ├── backbone │ ├── __init__.py │ ├── darknet19.py │ ├── darknet53.py │ ├── darknet_tiny.py │ ├── darknet_tiny_v2.py │ ├── myresnet.py │ ├── quant_layer.py │ └── resnet.py ├── data │ ├── __init__.py │ ├── coco2017.py │ ├── config.py │ ├── scripts │ │ ├── COCO2017.sh │ │ ├── VOC2007.sh │ │ └── VOC2012.sh │ └── voc0712.py ├── demo.py ├── eval.py ├── models │ ├── yolov2_d19.py │ ├── yolov2_r34.py │ ├── yolov2_r50.py │ ├── yolov2_tiny.py │ ├── yolov3.py │ └── yolov3_spp.py ├── test.py ├── tools.py ├── train.py └── utils │ ├── __init__.py │ ├── augmentations.py │ ├── cocoapi_evaluator.py │ ├── com_paras_flops.py │ ├── distributed_utils.py │ ├── kmeans_anchor.py │ ├── modules.py │ └── vocapi_evaluator.py └── semantic segmentation ├── README.md ├── configs ├── coco_deeplabv1.py ├── coco_deeplabv1_2bit.py ├── coco_deeplabv1_3bit.py ├── coco_deeplabv1_4bit.py ├── coco_deeplabv1_T15.py ├── coco_deeplabv1_T3.py ├── coco_deeplabv1_T7.py ├── coco_deeplabv3.py ├── coco_deeplabv3_2bit.py ├── coco_deeplabv3_3bit.py ├── coco_deeplabv3_4bit.py ├── coco_deeplabv3_T15.py ├── coco_deeplabv3_T3.py ├── coco_deeplabv3_T7.py ├── coco_multilabel_unet.py ├── coco_unet.py ├── voc_deeplabv1.py ├── voc_deeplabv1_2bit.py ├── voc_deeplabv1_3bit.py ├── voc_deeplabv1_4bit.py ├── voc_deeplabv1_T15.py ├── voc_deeplabv1_T3.py ├── voc_deeplabv1_T7.py ├── voc_deeplabv2.py ├── voc_deeplabv3.bak ├── voc_deeplabv3.py ├── voc_deeplabv3_2bit.py ├── voc_deeplabv3_3bit.py ├── voc_deeplabv3_4bit.py ├── voc_deeplabv3_T15.py ├── voc_deeplabv3_T3.py ├── voc_deeplabv3_T7.py ├── voc_deeplabv3plus.py ├── voc_deeplabvr.py ├── voc_deeplabvr_2bit.py ├── voc_deeplabvr_3bit.py ├── voc_deeplabvr_4bit.py ├── voc_fpn.py ├── voc_pspnet.py ├── voc_pspnet_v1c.py └── voc_unet.py ├── tools ├── decode.py ├── dist_test.sh ├── dist_train.sh ├── encode_voc12.py ├── encode_voc12_aug.py ├── inference.py ├── test.py ├── torch2onnx.py └── train.py └── vedaseg ├── __init__.py ├── criteria ├── __init__.py ├── bce_loss.py ├── builder.py └── registry.py ├── dataloaders ├── __init__.py ├── builder.py ├── registry.py └── samplers │ ├── __init__.py │ ├── builder.py │ ├── distributed.py │ ├── non_distributed.py │ └── registry.py ├── datasets ├── __init__.py ├── base.py ├── builder.py ├── coco.py ├── registry.py └── voc.py ├── loggers ├── __init__.py └── builder.py ├── lr_schedulers ├── __init__.py ├── base.py ├── builder.py ├── poly_lr.py └── registry.py ├── metrics ├── __init__.py ├── base.py ├── builder.py ├── metrics.py └── registry.py ├── models ├── __init__.py ├── builder.py ├── decoders │ ├── __init__.py │ ├── bricks.py │ ├── builder.py │ ├── gfpn │ │ ├── __init__.py │ │ └── gfpn.py │ └── registry.py ├── encoders │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── myresnet.py │ │ ├── quant_layer.py │ │ ├── registry.py │ │ ├── resnet.py │ │ ├── spiking.py │ │ └── vgg.py │ ├── builder.py │ └── enhance_modules │ │ ├── __init__.py │ │ ├── aspp.py │ │ ├── builder.py │ │ ├── ppm.py │ │ └── registry.py ├── heads │ ├── __init__.py │ ├── builder.py │ ├── head.py │ └── registry.py ├── registry.py ├── utils │ ├── __init__.py │ ├── act.py │ ├── builder.py │ ├── conv_module.py │ ├── norm.py │ ├── registry.py │ └── upsample.py └── weight_init.py ├── optims ├── __init__.py └── builder.py ├── runners ├── __init__.py ├── base.py ├── inference_runner.py ├── test_runner.py └── train_runner.py ├── transforms ├── __init__.py ├── builder.py ├── registry.py └── transforms.py └── utils ├── __init__.py ├── checkpoint.py ├── config.py ├── dist_utils.py └── registry.py /image classification/CIFAR10/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .quant_layer import * 2 | from .alexnet import * 3 | from .vgg import * 4 | from .spiking import * -------------------------------------------------------------------------------- /image classification/CIFAR10/models/spiking.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | 6 | def unsigned_spikes(model): 7 | for m in model.modules(): 8 | if isinstance(m, Spiking): 9 | m.sign = False 10 | 11 | #####the spiking wrapper###### 12 | 13 | class Spiking(nn.Module): 14 | def __init__(self, block, T): 15 | super(Spiking, self).__init__() 16 | self.block = block 17 | self.T = T 18 | self.is_first = False 19 | self.idem = False 20 | self.sign = True 21 | def forward(self, x): 22 | if self.idem: 23 | return x 24 | 25 | ###initialize membrane to half threshold 26 | threshold = self.block[2].act_alpha.data 27 | membrane = 0.5 * threshold 28 | sum_spikes = 0 29 | 30 | #prepare charges 31 | if self.is_first: 32 | x.unsqueeze_(1) 33 | x = x.repeat(1, self.T, 1, 1, 1) 34 | train_shape = [x.shape[0], x.shape[1]] 35 | x = x.flatten(0, 1) 36 | x = self.block(x) 37 | train_shape.extend(x.shape[1:]) 38 | x = x.reshape(train_shape) 39 | 40 | #integrate charges 41 | for dt in range(self.T): 42 | membrane = membrane + x[:,dt] 43 | if dt == 0: 44 | spike_train = torch.zeros(membrane.shape[:1] + torch.Size([self.T]) + membrane.shape[1:],device=membrane.device) 45 | 46 | spikes = membrane >= threshold 47 | membrane[spikes] = membrane[spikes] - threshold 48 | spikes = spikes.float() 49 | sum_spikes = sum_spikes + spikes 50 | 51 | ###signed spikes### 52 | if self.sign: 53 | inhibit = membrane <= -1e-3 54 | inhibit = inhibit & (sum_spikes > 0) 55 | membrane[inhibit] = membrane[inhibit] + threshold 56 | inhibit = inhibit.float() 57 | sum_spikes = sum_spikes - inhibit 58 | else: 59 | inhibit = 0 60 | 61 | spike_train[:,dt] = spikes - inhibit 62 | 63 | spike_train = spike_train * threshold 64 | return spike_train 65 | 66 | 67 | class last_Spiking(nn.Module): 68 | def __init__(self, block, T): 69 | super(last_Spiking, self).__init__() 70 | self.block = block 71 | self.T = T 72 | self.idem = False 73 | 74 | def forward(self, x): 75 | if self.idem: 76 | return x 77 | #prepare charges 78 | train_shape = [x.shape[0], x.shape[1]] 79 | x = x.flatten(0, 1) 80 | x = self.block(x) 81 | train_shape.extend(x.shape[1:]) 82 | x = x.reshape(train_shape) 83 | 84 | #integrate charges 85 | return x.sum(dim=1) 86 | 87 | class IF(nn.Module): 88 | def __init__(self): 89 | super(IF, self).__init__() 90 | ###changes threshold to act_alpha 91 | ###being fleet 92 | self.act_alpha = torch.nn.Parameter(torch.tensor(1.0)) 93 | 94 | def forward(self, x): 95 | return x 96 | 97 | def show_params(self): 98 | act_alpha = round(self.act_alpha.data.item(), 3) 99 | print('clipping threshold activation alpha: {:2f}'.format(act_alpha)) 100 | 101 | def extra_repr(self) -> str: 102 | return 'threshold={:.3f}'.format(self.act_alpha) -------------------------------------------------------------------------------- /image classification/CIFAR10_res/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import * 2 | from .quant_layer import * -------------------------------------------------------------------------------- /image classification/CIFAR10_resnet18/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import * 2 | from .quant_layer import * -------------------------------------------------------------------------------- /image classification/ImageNet/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Additive Power-of-Two Quantization: An Efficient Non-uniform Discretization For Neural Networks 2 | # Yuhang Li, Xin Dong, Wei Wang 3 | # International Conference on Learning Representations (ICLR), 2020. 4 | 5 | 6 | from .spiking import * 7 | from .quant_layer import * 8 | from .alexnet import * 9 | from .vgg import * -------------------------------------------------------------------------------- /image classification/ImageNet/models/spiking.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | 6 | def unsigned_spikes(model): 7 | for m in model.modules(): 8 | if isinstance(m, Spiking): 9 | m.sign = False 10 | 11 | #####the spiking wrapper###### 12 | 13 | class Spiking(nn.Module): 14 | def __init__(self, block, T): 15 | super(Spiking, self).__init__() 16 | self.block = block 17 | self.T = T 18 | self.is_first = False 19 | self.idem = False 20 | self.sign = True 21 | 22 | def forward(self, x): 23 | if self.idem: 24 | return x 25 | 26 | ###initialize membrane to half threshold 27 | threshold = self.block[2].act_alpha.data 28 | membrane = 0.5 * threshold 29 | sum_spikes = 0 30 | 31 | #prepare charges 32 | if self.is_first: 33 | x.unsqueeze_(1) 34 | x = x.repeat(1, self.T, 1, 1, 1) 35 | train_shape = [x.shape[0], x.shape[1]] 36 | x = x.flatten(0, 1) 37 | x = self.block(x) 38 | train_shape.extend(x.shape[1:]) 39 | x = x.reshape(train_shape) 40 | 41 | #integrate charges 42 | for dt in range(self.T): 43 | membrane = membrane + x[:,dt] 44 | if dt == 0: 45 | spike_train = torch.zeros(membrane.shape[:1] + torch.Size([self.T]) + membrane.shape[1:],device=membrane.device) 46 | 47 | spikes = membrane >= threshold 48 | membrane[spikes] = membrane[spikes] - threshold 49 | spikes = spikes.float() 50 | sum_spikes = sum_spikes + spikes 51 | 52 | ###signed spikes### 53 | if self.sign: 54 | inhibit = membrane <= -1e-3 55 | inhibit = inhibit & (sum_spikes > 0) 56 | membrane[inhibit] = membrane[inhibit] + threshold 57 | inhibit = inhibit.float() 58 | sum_spikes = sum_spikes - inhibit 59 | else: 60 | inhibit = 0 61 | 62 | spike_train[:,dt] = spikes - inhibit 63 | 64 | spike_train = spike_train * threshold 65 | return spike_train 66 | 67 | class last_Spiking(nn.Module): 68 | def __init__(self, block, T): 69 | super(last_Spiking, self).__init__() 70 | self.block = block 71 | self.T = T 72 | self.idem = False 73 | 74 | def forward(self, x): 75 | if self.idem: 76 | return x 77 | #prepare charges 78 | train_shape = [x.shape[0], x.shape[1]] 79 | x = x.flatten(0, 1) 80 | x = self.block(x) 81 | train_shape.extend(x.shape[1:]) 82 | x = x.reshape(train_shape) 83 | 84 | #integrate charges 85 | return x.sum(dim=1) 86 | 87 | class IF(nn.Module): 88 | def __init__(self): 89 | super(IF, self).__init__() 90 | ###changes threshold to act_alpha 91 | ###being fleet 92 | self.act_alpha = torch.nn.Parameter(torch.tensor(1.0)) 93 | 94 | def forward(self, x): 95 | return x 96 | 97 | def show_params(self): 98 | act_alpha = round(self.act_alpha.data.item(), 3) 99 | print('clipping threshold activation alpha: {:2f}'.format(act_alpha)) 100 | 101 | def extra_repr(self) -> str: 102 | return 'threshold={:.3f}'.format(self.act_alpha) -------------------------------------------------------------------------------- /object detection/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import build_resnet 2 | from .darknet19 import build_darknet19 3 | from .darknet53 import build_darknet53 4 | from .darknet_tiny import build_darknet_tiny 5 | from .darknet_tiny_v2 import build_darknet_tiny_v2 6 | 7 | 8 | def build_backbone(model_name='resnet18', pretrained=False): 9 | if 'resnet' in model_name: 10 | backbone = build_resnet(model_name, pretrained) 11 | 12 | elif model_name == 'darknet19': 13 | backbone = build_darknet19(pretrained) 14 | 15 | elif model_name == 'darknet53': 16 | backbone = build_darknet53(pretrained) 17 | 18 | elif model_name == 'darknet_tiny': 19 | backbone = build_darknet_tiny(pretrained) 20 | 21 | elif model_name == 'darknet_tiny_v2': 22 | backbone = build_darknet_tiny_v2(pretrained) 23 | 24 | return backbone 25 | -------------------------------------------------------------------------------- /object detection/backbone/darknet19.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import os 4 | 5 | 6 | model_urls = { 7 | "darknet19": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/darknet19.pth", 8 | } 9 | 10 | 11 | __all__ = ['darknet19'] 12 | 13 | 14 | class Conv_BN_LeakyReLU(nn.Module): 15 | def __init__(self, in_channels, out_channels, ksize, padding=0, stride=1, dilation=1): 16 | super(Conv_BN_LeakyReLU, self).__init__() 17 | self.convs = nn.Sequential( 18 | nn.Conv2d(in_channels, out_channels, ksize, padding=padding, stride=stride, dilation=dilation), 19 | nn.BatchNorm2d(out_channels), 20 | nn.LeakyReLU(0.1, inplace=True) 21 | ) 22 | 23 | def forward(self, x): 24 | return self.convs(x) 25 | 26 | 27 | class DarkNet_19(nn.Module): 28 | def __init__(self): 29 | super(DarkNet_19, self).__init__() 30 | # backbone network : DarkNet-19 31 | # output : stride = 2, c = 32 32 | self.conv_1 = nn.Sequential( 33 | Conv_BN_LeakyReLU(3, 32, 3, 1), 34 | nn.MaxPool2d((2,2), 2), 35 | ) 36 | 37 | # output : stride = 4, c = 64 38 | self.conv_2 = nn.Sequential( 39 | Conv_BN_LeakyReLU(32, 64, 3, 1), 40 | nn.MaxPool2d((2,2), 2) 41 | ) 42 | 43 | # output : stride = 8, c = 128 44 | self.conv_3 = nn.Sequential( 45 | Conv_BN_LeakyReLU(64, 128, 3, 1), 46 | Conv_BN_LeakyReLU(128, 64, 1), 47 | Conv_BN_LeakyReLU(64, 128, 3, 1), 48 | nn.MaxPool2d((2,2), 2) 49 | ) 50 | 51 | # output : stride = 8, c = 256 52 | self.conv_4 = nn.Sequential( 53 | Conv_BN_LeakyReLU(128, 256, 3, 1), 54 | Conv_BN_LeakyReLU(256, 128, 1), 55 | Conv_BN_LeakyReLU(128, 256, 3, 1), 56 | ) 57 | 58 | # output : stride = 16, c = 512 59 | self.maxpool_4 = nn.MaxPool2d((2, 2), 2) 60 | self.conv_5 = nn.Sequential( 61 | Conv_BN_LeakyReLU(256, 512, 3, 1), 62 | Conv_BN_LeakyReLU(512, 256, 1), 63 | Conv_BN_LeakyReLU(256, 512, 3, 1), 64 | Conv_BN_LeakyReLU(512, 256, 1), 65 | Conv_BN_LeakyReLU(256, 512, 3, 1), 66 | ) 67 | 68 | # output : stride = 32, c = 1024 69 | self.maxpool_5 = nn.MaxPool2d((2, 2), 2) 70 | self.conv_6 = nn.Sequential( 71 | Conv_BN_LeakyReLU(512, 1024, 3, 1), 72 | Conv_BN_LeakyReLU(1024, 512, 1), 73 | Conv_BN_LeakyReLU(512, 1024, 3, 1), 74 | Conv_BN_LeakyReLU(1024, 512, 1), 75 | Conv_BN_LeakyReLU(512, 1024, 3, 1) 76 | ) 77 | 78 | def forward(self, x): 79 | c1 = self.conv_1(x) 80 | c2 = self.conv_2(c1) 81 | c3 = self.conv_3(c2) 82 | c3 = self.conv_4(c3) 83 | c4 = self.conv_5(self.maxpool_4(c3)) 84 | c5 = self.conv_6(self.maxpool_5(c4)) 85 | 86 | output = { 87 | 'layer1': c3, 88 | 'layer2': c4, 89 | 'layer3': c5 90 | } 91 | 92 | return output 93 | 94 | 95 | def build_darknet19(pretrained=False): 96 | # model 97 | model = DarkNet_19() 98 | 99 | # load weight 100 | if pretrained: 101 | print('Loading pretrained weight ...') 102 | url = model_urls['darknet19'] 103 | # checkpoint state dict 104 | checkpoint_state_dict = torch.hub.load_state_dict_from_url( 105 | url=url, map_location="cpu", check_hash=True) 106 | # model state dict 107 | model_state_dict = model.state_dict() 108 | # check 109 | for k in list(checkpoint_state_dict.keys()): 110 | if k in model_state_dict: 111 | shape_model = tuple(model_state_dict[k].shape) 112 | shape_checkpoint = tuple(checkpoint_state_dict[k].shape) 113 | if shape_model != shape_checkpoint: 114 | checkpoint_state_dict.pop(k) 115 | else: 116 | checkpoint_state_dict.pop(k) 117 | print(k) 118 | 119 | model.load_state_dict(checkpoint_state_dict) 120 | 121 | return model 122 | 123 | 124 | if __name__ == '__main__': 125 | import time 126 | net = build_darknet19(pretrained=True) 127 | x = torch.randn(1, 3, 224, 224) 128 | t0 = time.time() 129 | output = net(x) 130 | t1 = time.time() 131 | print('Time: ', t1 - t0) 132 | 133 | for k in output.keys(): 134 | print('{} : {}'.format(k, output[k].shape)) 135 | -------------------------------------------------------------------------------- /object detection/backbone/darknet53.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | model_urls = { 6 | "darknet53": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/darknet53.pth", 7 | } 8 | 9 | 10 | __all__ = ['darknet53'] 11 | 12 | 13 | class Conv_BN_LeakyReLU(nn.Module): 14 | def __init__(self, in_channels, out_channels, ksize, padding=0, stride=1, dilation=1): 15 | super(Conv_BN_LeakyReLU, self).__init__() 16 | self.convs = nn.Sequential( 17 | nn.Conv2d(in_channels, out_channels, ksize, padding=padding, stride=stride, dilation=dilation), 18 | nn.BatchNorm2d(out_channels), 19 | nn.LeakyReLU(0.1, inplace=True) 20 | ) 21 | 22 | def forward(self, x): 23 | return self.convs(x) 24 | 25 | 26 | class ResBlock(nn.Module): 27 | def __init__(self, ch, nblocks=1): 28 | super().__init__() 29 | self.module_list = nn.ModuleList() 30 | for _ in range(nblocks): 31 | resblock_one = nn.Sequential( 32 | Conv_BN_LeakyReLU(ch, ch//2, 1), 33 | Conv_BN_LeakyReLU(ch//2, ch, 3, padding=1) 34 | ) 35 | self.module_list.append(resblock_one) 36 | 37 | def forward(self, x): 38 | for module in self.module_list: 39 | x = module(x) + x 40 | return x 41 | 42 | 43 | class DarkNet_53(nn.Module): 44 | """ 45 | DarkNet-53. 46 | """ 47 | def __init__(self): 48 | super(DarkNet_53, self).__init__() 49 | # stride = 2 50 | self.layer_1 = nn.Sequential( 51 | Conv_BN_LeakyReLU(3, 32, 3, padding=1), 52 | Conv_BN_LeakyReLU(32, 64, 3, padding=1, stride=2), 53 | ResBlock(64, nblocks=1) 54 | ) 55 | # stride = 4 56 | self.layer_2 = nn.Sequential( 57 | Conv_BN_LeakyReLU(64, 128, 3, padding=1, stride=2), 58 | ResBlock(128, nblocks=2) 59 | ) 60 | # stride = 8 61 | self.layer_3 = nn.Sequential( 62 | Conv_BN_LeakyReLU(128, 256, 3, padding=1, stride=2), 63 | ResBlock(256, nblocks=8) 64 | ) 65 | # stride = 16 66 | self.layer_4 = nn.Sequential( 67 | Conv_BN_LeakyReLU(256, 512, 3, padding=1, stride=2), 68 | ResBlock(512, nblocks=8) 69 | ) 70 | # stride = 32 71 | self.layer_5 = nn.Sequential( 72 | Conv_BN_LeakyReLU(512, 1024, 3, padding=1, stride=2), 73 | ResBlock(1024, nblocks=4) 74 | ) 75 | 76 | 77 | def forward(self, x, targets=None): 78 | c1 = self.layer_1(x) 79 | c2 = self.layer_2(c1) 80 | c3 = self.layer_3(c2) 81 | c4 = self.layer_4(c3) 82 | c5 = self.layer_5(c4) 83 | 84 | output = { 85 | 'layer1': c3, 86 | 'layer2': c4, 87 | 'layer3': c5 88 | } 89 | 90 | return output 91 | 92 | 93 | def build_darknet53(pretrained=False): 94 | # model 95 | model = DarkNet_53() 96 | 97 | # load weight 98 | if pretrained: 99 | print('Loading pretrained weight ...') 100 | url = model_urls['darknet53'] 101 | # checkpoint state dict 102 | checkpoint_state_dict = torch.hub.load_state_dict_from_url( 103 | url=url, map_location="cpu", check_hash=True) 104 | # model state dict 105 | model_state_dict = model.state_dict() 106 | # check 107 | for k in list(checkpoint_state_dict.keys()): 108 | if k in model_state_dict: 109 | shape_model = tuple(model_state_dict[k].shape) 110 | shape_checkpoint = tuple(checkpoint_state_dict[k].shape) 111 | if shape_model != shape_checkpoint: 112 | checkpoint_state_dict.pop(k) 113 | else: 114 | checkpoint_state_dict.pop(k) 115 | print(k) 116 | 117 | model.load_state_dict(checkpoint_state_dict) 118 | 119 | return model 120 | 121 | 122 | if __name__ == '__main__': 123 | import time 124 | net = build_darknet53(pretrained=True) 125 | x = torch.randn(1, 3, 224, 224) 126 | t0 = time.time() 127 | output = net(x) 128 | t1 = time.time() 129 | print('Time: ', t1 - t0) 130 | 131 | for k in output.keys(): 132 | print('{} : {}'.format(k, output[k].shape)) 133 | -------------------------------------------------------------------------------- /object detection/backbone/darknet_tiny.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | model_urls = { 6 | "darknet_tiny": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/darknet_tiny.pth", 7 | } 8 | 9 | 10 | __all__ = ['build_darknet_tiny'] 11 | 12 | 13 | class Conv_BN_LeakyReLU(nn.Module): 14 | def __init__(self, in_channels, out_channels, ksize, padding=0, stride=1, dilation=1): 15 | super(Conv_BN_LeakyReLU, self).__init__() 16 | self.convs = nn.Sequential( 17 | nn.Conv2d(in_channels, out_channels, ksize, padding=padding, stride=stride, dilation=dilation), 18 | nn.BatchNorm2d(out_channels), 19 | nn.LeakyReLU(0.1, inplace=True) 20 | ) 21 | 22 | def forward(self, x): 23 | return self.convs(x) 24 | 25 | 26 | class DarkNet_Tiny(nn.Module): 27 | def __init__(self): 28 | 29 | super(DarkNet_Tiny, self).__init__() 30 | # backbone network : DarkNet_Tiny 31 | self.conv_1 = Conv_BN_LeakyReLU(3, 16, 3, 1) 32 | self.maxpool_1 = nn.MaxPool2d((2, 2), 2) # stride = 2 33 | 34 | self.conv_2 = Conv_BN_LeakyReLU(16, 32, 3, 1) 35 | self.maxpool_2 = nn.MaxPool2d((2, 2), 2) # stride = 4 36 | 37 | self.conv_3 = Conv_BN_LeakyReLU(32, 64, 3, 1) 38 | self.maxpool_3 = nn.MaxPool2d((2, 2), 2) # stride = 8 39 | 40 | self.conv_4 = Conv_BN_LeakyReLU(64, 128, 3, 1) 41 | self.maxpool_4 = nn.MaxPool2d((2, 2), 2) # stride = 16 42 | 43 | self.conv_5 = Conv_BN_LeakyReLU(128, 256, 3, 1) 44 | self.maxpool_5 = nn.MaxPool2d((2, 2), 2) # stride = 32 45 | 46 | self.conv_6 = Conv_BN_LeakyReLU(256, 512, 3, 1) 47 | self.maxpool_6 = nn.Sequential( 48 | nn.ZeroPad2d((0, 1, 0, 1)), 49 | nn.MaxPool2d((2, 2), 1) # stride = 32 50 | ) 51 | 52 | self.conv_7 = Conv_BN_LeakyReLU(512, 1024, 3, 1) 53 | 54 | 55 | def forward(self, x): 56 | x = self.conv_1(x) 57 | c1 = self.maxpool_1(x) 58 | c1 = self.conv_2(c1) 59 | c2 = self.maxpool_2(c1) 60 | c2 = self.conv_3(c2) 61 | c3 = self.maxpool_3(c2) 62 | c3 = self.conv_4(c3) 63 | c4 = self.maxpool_4(c3) 64 | c4 = self.conv_5(c4) # stride = 16 65 | c5 = self.maxpool_5(c4) 66 | c5 = self.conv_6(c5) 67 | c5 = self.maxpool_6(c5) 68 | c5 = self.conv_7(c5) # stride = 32 69 | 70 | output = { 71 | 'layer1': c3, 72 | 'layer2': c4, 73 | 'layer3': c5 74 | } 75 | 76 | return output 77 | 78 | 79 | def build_darknet_tiny(pretrained=False): 80 | # model 81 | model = DarkNet_Tiny() 82 | 83 | # load weight 84 | if pretrained: 85 | print('Loading pretrained weight ...') 86 | url = model_urls['darknet_tiny'] 87 | # checkpoint state dict 88 | checkpoint_state_dict = torch.hub.load_state_dict_from_url( 89 | url=url, map_location="cpu", check_hash=True) 90 | # model state dict 91 | model_state_dict = model.state_dict() 92 | # check 93 | for k in list(checkpoint_state_dict.keys()): 94 | if k in model_state_dict: 95 | shape_model = tuple(model_state_dict[k].shape) 96 | shape_checkpoint = tuple(checkpoint_state_dict[k].shape) 97 | if shape_model != shape_checkpoint: 98 | checkpoint_state_dict.pop(k) 99 | else: 100 | checkpoint_state_dict.pop(k) 101 | print(k) 102 | 103 | model.load_state_dict(checkpoint_state_dict) 104 | 105 | return model 106 | 107 | 108 | if __name__ == '__main__': 109 | import time 110 | net = build_darknet_tiny(pretrained=True) 111 | x = torch.randn(1, 3, 224, 224) 112 | t0 = time.time() 113 | output = net(x) 114 | t1 = time.time() 115 | print('Time: ', t1 - t0) 116 | 117 | for k in output.keys(): 118 | print('{} : {}'.format(k, output[k].shape)) 119 | -------------------------------------------------------------------------------- /object detection/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES 2 | from .coco2017 import COCODataset, coco_class_labels, coco_class_index 3 | from .config import * 4 | import torch 5 | import cv2 6 | import numpy as np 7 | 8 | 9 | def detection_collate(batch): 10 | """Custom collate fn for dealing with batches of images that have a different 11 | number of associated object annotations (bounding boxes). 12 | 13 | Arguments: 14 | batch: (tuple) A tuple of tensor images and lists of annotations 15 | 16 | Return: 17 | A tuple containing: 18 | 1) (tensor) batch of images stacked on their 0 dim 19 | 2) (list of tensors) annotations for a given image are stacked on 20 | 0 dim 21 | """ 22 | targets = [] 23 | imgs = [] 24 | for sample in batch: 25 | imgs.append(sample[0]) 26 | targets.append(torch.FloatTensor(sample[1])) 27 | return torch.stack(imgs, 0), targets 28 | 29 | 30 | def base_transform(image, size, mean, std): 31 | x = cv2.resize(image, (size, size)).astype(np.float32) 32 | x /= 255. 33 | x -= mean 34 | x /= std 35 | return x 36 | 37 | 38 | class BaseTransform: 39 | def __init__(self, size, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)): 40 | self.size = size 41 | self.mean = np.array(mean, dtype=np.float32) 42 | self.std = np.array(std, dtype=np.float32) 43 | 44 | def __call__(self, image, boxes=None, labels=None): 45 | return base_transform(image, self.size, self.mean, self.std), boxes, labels 46 | -------------------------------------------------------------------------------- /object detection/data/config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | 3 | # YOLOv2 with darknet-19 4 | yolov2_d19_cfg = { 5 | # network 6 | 'backbone': 'd19', 7 | # for multi-scale trick 8 | 'train_size': 640, 9 | 'val_size': 416, 10 | 'random_size_range': [10, 19], 11 | # anchor size 12 | 'anchor_size_voc': [[1.19, 1.98], [2.79, 4.59], [4.53, 8.92], [8.06, 5.29], [10.32, 10.65]], 13 | 'anchor_size_coco': [[0.53, 0.79], [1.71, 2.36], [2.89, 6.44], [6.33, 3.79], [9.03, 9.74]], 14 | # train 15 | # 'lr_epoch': (60, 90), 16 | # 'max_epoch': 160, 17 | 'lr_epoch': (150, 200), 18 | 'max_epoch': 250, 19 | 'ignore_thresh': 0.5 20 | } 21 | 22 | # tinyYOLOv2 23 | yolov2_tiny_cfg = { 24 | # network 25 | 'backbone': 'd-light', 26 | # for multi-scale trick 27 | 'train_size': 640, 28 | 'val_size': 416, 29 | 'random_size_range': [10, 19], 30 | # anchor size 31 | 'anchor_size_voc': [[1.19, 1.98], [2.79, 4.59], [4.53, 8.92], [8.06, 5.29], [10.32, 10.65]], 32 | # 'anchor_size_voc': [[1.08,1.19], [3.42,4.41], [6.63,11.38], [9.42,5.11], [16.62,10.52]], 33 | # 'anchor_size_voc':[[1.3221, 1.73145], [3.19275, 4.00944], [5.05587, 8.09892], [9.47112, 4.84053], [11.2364, 10.0071]], 34 | 'anchor_size_coco': [[0.53, 0.79], [1.71, 2.36], [2.89, 6.44], [6.33, 3.79], [9.03, 9.74]], 35 | # train 36 | # 'lr_epoch': (60, 90), 37 | # 'max_epoch': 160, 38 | 'lr_epoch': (150, 200), 39 | 'max_epoch': 250, 40 | # 'lr_epoch': (80, 120), 41 | # 'max_epoch': 160, 42 | 'ignore_thresh': 0.5 43 | } 44 | 45 | # YOLOv2 with resnet-50 46 | yolov2_r50_cfg = { 47 | # network 48 | 'backbone': 'r50', 49 | # for multi-scale trick 50 | 'train_size': 640, 51 | 'val_size': 416, 52 | 'random_size_range': [10, 19], 53 | # anchor size 54 | 'anchor_size_voc': [[1.19, 1.98], [2.79, 4.59], [4.53, 8.92], [8.06, 5.29], [10.32, 10.65]], 55 | 'anchor_size_coco': [[0.53, 0.79], [1.71, 2.36], [2.89, 6.44], [6.33, 3.79], [9.03, 9.74]], 56 | # train 57 | # 'lr_epoch': (60, 90), 58 | # 'max_epoch': 160, 59 | 'lr_epoch': (150, 200), 60 | 'max_epoch': 250, 61 | 'ignore_thresh': 0.5 62 | } 63 | 64 | # YOLOv3 / YOLOv3Spp 65 | yolov3_d53_cfg = { 66 | # network 67 | 'backbone': 'd53', 68 | # for multi-scale trick 69 | 'train_size': 640, 70 | 'val_size': 416, 71 | 'random_size_range': [10, 19], 72 | # anchor size 73 | 'anchor_size_voc': [[32.64, 47.68], [50.24, 108.16], [126.72, 96.32], 74 | [78.4, 201.92], [178.24, 178.56], [129.6, 294.72], 75 | [331.84, 194.56], [227.84, 325.76], [365.44, 358.72]], 76 | 'anchor_size_coco': [[12.48, 19.2], [31.36, 46.4],[46.4, 113.92], 77 | [97.28, 55.04], [133.12, 127.36], [79.04, 224.], 78 | [301.12, 150.4 ], [172.16, 285.76], [348.16, 341.12]], 79 | # train 80 | 'lr_epoch': (150, 200), 81 | 'max_epoch': 250, 82 | 'ignore_thresh': 0.5 83 | } 84 | 85 | # YOLOv3Tiny 86 | yolov3_tiny_cfg = { 87 | # network 88 | 'backbone': 'd-light', 89 | # for multi-scale trick 90 | 'train_size': 640, 91 | 'val_size': 416, 92 | 'random_size_range':[10, 19], 93 | # anchor size 94 | 'anchor_size_voc': [[34.01, 61.79], [86.94, 109.68], [93.49, 227.46], 95 | [246.38, 163.33], [178.68, 306.55], [344.89, 337.14]], 96 | 'anchor_size_coco': [[15.09, 23.25], [46.36, 61.47], [68.41, 161.84], 97 | [168.88, 93.59], [154.96, 257.45], [334.74, 302.47]], 98 | # train 99 | 'lr_epoch': (150, 200), 100 | 'max_epoch': 250, 101 | 'ignore_thresh': 0.5 102 | } 103 | -------------------------------------------------------------------------------- /object detection/data/scripts/COCO2017.sh: -------------------------------------------------------------------------------- 1 | mkdir COCO 2 | cd COCO 3 | 4 | wget http://images.cocodataset.org/zips/train2017.zip 5 | wget http://images.cocodataset.org/zips/val2017.zip 6 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip 7 | wget http://images.cocodataset.org/zips/test2017.zip 8 | wget http://images.cocodataset.org/annotations/image_info_test2017.zip  9 | 10 | unzip train2017.zip 11 | unzip val2017.zip 12 | unzip annotations_trainval2017.zip 13 | unzip test2017.zip 14 | unzip image_info_test2017.zip 15 | 16 | # rm -f train2017.zip 17 | # rm -f val2017.zip 18 | # rm -f annotations_trainval2017.zip 19 | # rm -f test2017.zip 20 | # rm -f image_info_test2017.zip 21 | -------------------------------------------------------------------------------- /object detection/data/scripts/VOC2007.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Ellis Brown 3 | 4 | start=`date +%s` 5 | 6 | # handle optional download dir 7 | if [ -z "$1" ] 8 | then 9 | # navigate to ~/data 10 | echo "navigating to ~/data/ ..." 11 | mkdir -p ~/data 12 | cd ~/data/ 13 | else 14 | # check if is valid directory 15 | if [ ! -d $1 ]; then 16 | echo $1 "is not a valid directory" 17 | exit 0 18 | fi 19 | echo "navigating to" $1 "..." 20 | cd $1 21 | fi 22 | 23 | echo "Downloading VOC2007 trainval ..." 24 | # Download the data. 25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 26 | echo "Downloading VOC2007 test data ..." 27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 28 | echo "Done downloading." 29 | 30 | # Extract data 31 | echo "Extracting trainval ..." 32 | tar -xvf VOCtrainval_06-Nov-2007.tar 33 | echo "Extracting test ..." 34 | tar -xvf VOCtest_06-Nov-2007.tar 35 | echo "removing tars ..." 36 | rm VOCtrainval_06-Nov-2007.tar 37 | rm VOCtest_06-Nov-2007.tar 38 | 39 | end=`date +%s` 40 | runtime=$((end-start)) 41 | 42 | echo "Completed in" $runtime "seconds" -------------------------------------------------------------------------------- /object detection/data/scripts/VOC2012.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Ellis Brown 3 | 4 | start=`date +%s` 5 | 6 | # handle optional download dir 7 | if [ -z "$1" ] 8 | then 9 | # navigate to ~/data 10 | echo "navigating to ~/data/ ..." 11 | mkdir -p ~/data 12 | cd ~/data/ 13 | else 14 | # check if is valid directory 15 | if [ ! -d $1 ]; then 16 | echo $1 "is not a valid directory" 17 | exit 0 18 | fi 19 | echo "navigating to" $1 "..." 20 | cd $1 21 | fi 22 | 23 | echo "Downloading VOC2012 trainval ..." 24 | # Download the data. 25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 26 | echo "Done downloading." 27 | 28 | 29 | # Extract data 30 | echo "Extracting trainval ..." 31 | tar -xvf VOCtrainval_11-May-2012.tar 32 | echo "removing tar ..." 33 | rm VOCtrainval_11-May-2012.tar 34 | 35 | end=`date +%s` 36 | runtime=$((end-start)) 37 | 38 | echo "Completed in" $runtime "seconds" -------------------------------------------------------------------------------- /object detection/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-bmi-lab/Fast-SNN/e4315cd4e74b4e185ab12bbe2dd74bc3fdccc547/object detection/utils/__init__.py -------------------------------------------------------------------------------- /object detection/utils/cocoapi_evaluator.py: -------------------------------------------------------------------------------- 1 | import json 2 | import tempfile 3 | 4 | from pycocotools.cocoeval import COCOeval 5 | from torch.autograd import Variable 6 | 7 | from data.coco2017 import * 8 | from data import * 9 | 10 | 11 | class COCOAPIEvaluator(): 12 | """ 13 | COCO AP Evaluation class. 14 | All the data in the val2017 dataset are processed \ 15 | and evaluated by COCO API. 16 | """ 17 | def __init__(self, data_dir, img_size, device, testset=False, transform=None): 18 | """ 19 | Args: 20 | data_dir (str): dataset root directory 21 | img_size (int): image size after preprocess. images are resized \ 22 | to squares whose shape is (img_size, img_size). 23 | confthre (float): 24 | confidence threshold ranging from 0 to 1, \ 25 | which is defined in the config file. 26 | nmsthre (float): 27 | IoU threshold of non-max supression ranging from 0 to 1. 28 | """ 29 | self.testset = testset 30 | if self.testset: 31 | json_file='image_info_test-dev2017.json' 32 | name = 'test2017' 33 | else: 34 | json_file='instances_val2017.json' 35 | name='val2017' 36 | 37 | self.dataset = COCODataset(data_dir=data_dir, 38 | json_file=json_file, 39 | name=name) 40 | self.img_size = img_size 41 | self.transform = transform 42 | self.device = device 43 | 44 | self.map = 0. 45 | self.ap50_95 = 0. 46 | self.ap50 = 0. 47 | 48 | def evaluate(self, model): 49 | """ 50 | COCO average precision (AP) Evaluation. Iterate inference on the test dataset 51 | and the results are evaluated by COCO API. 52 | Args: 53 | model : model object 54 | Returns: 55 | ap50_95 (float) : calculated COCO AP for IoU=50:95 56 | ap50 (float) : calculated COCO AP for IoU=50 57 | """ 58 | model.eval() 59 | ids = [] 60 | data_dict = [] 61 | num_images = len(self.dataset) 62 | print('total number of images: %d' % (num_images)) 63 | 64 | # start testing 65 | for index in range(num_images): # all the data in val2017 66 | if index % 500 == 0: 67 | print('[Eval: %d / %d]'%(index, num_images)) 68 | 69 | img, id_ = self.dataset.pull_image(index) # load a batch 70 | if self.transform is not None: 71 | x = torch.from_numpy(self.transform(img)[0][:, :, (2, 1, 0)]).permute(2, 0, 1) 72 | x = x.unsqueeze(0).to(self.device) 73 | scale = np.array([[img.shape[1], img.shape[0], 74 | img.shape[1], img.shape[0]]]) 75 | 76 | id_ = int(id_) 77 | ids.append(id_) 78 | with torch.no_grad(): 79 | outputs = model(x) 80 | bboxes, scores, cls_inds = outputs 81 | bboxes *= scale 82 | for i, box in enumerate(bboxes): 83 | x1 = float(box[0]) 84 | y1 = float(box[1]) 85 | x2 = float(box[2]) 86 | y2 = float(box[3]) 87 | label = self.dataset.class_ids[int(cls_inds[i])] 88 | 89 | bbox = [x1, y1, x2 - x1, y2 - y1] 90 | score = float(scores[i]) # object score * class score 91 | A = {"image_id": id_, "category_id": label, "bbox": bbox, 92 | "score": score} # COCO json format 93 | data_dict.append(A) 94 | 95 | annType = ['segm', 'bbox', 'keypoints'] 96 | 97 | # Evaluate the Dt (detection) json comparing with the ground truth 98 | if len(data_dict) > 0: 99 | print('evaluating ......') 100 | cocoGt = self.dataset.coco 101 | # For test 102 | if self.testset: 103 | json.dump(data_dict, open('yolov2_2017.json', 'w')) 104 | cocoDt = cocoGt.loadRes('yolov2_2017.json') 105 | print('inference on test-dev is done !!') 106 | return -1, -1 107 | # For val 108 | else: 109 | _, tmp = tempfile.mkstemp() 110 | json.dump(data_dict, open(tmp, 'w')) 111 | cocoDt = cocoGt.loadRes(tmp) 112 | cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1]) 113 | cocoEval.params.imgIds = ids 114 | cocoEval.evaluate() 115 | cocoEval.accumulate() 116 | cocoEval.summarize() 117 | 118 | ap50_95, ap50 = cocoEval.stats[0], cocoEval.stats[1] 119 | print('ap50_95 : ', ap50_95) 120 | print('ap50 : ', ap50) 121 | self.map = ap50_95 122 | self.ap50_95 = ap50_95 123 | self.ap50 = ap50 124 | 125 | return ap50, ap50_95 126 | else: 127 | return 0, 0 128 | 129 | -------------------------------------------------------------------------------- /object detection/utils/com_paras_flops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from thop import profile 3 | 4 | 5 | def FLOPs_and_Params(model, size, device): 6 | x = torch.randn(1, 3, size, size).to(device) 7 | model.trainable = False 8 | model.eval() 9 | 10 | flops, params = profile(model, inputs=(x, )) 11 | print('FLOPs : ', flops / 1e9, ' B') 12 | print('Params : ', params / 1e6, ' M') 13 | 14 | model.trainable = True 15 | model.train() 16 | 17 | 18 | if __name__ == "__main__": 19 | pass 20 | -------------------------------------------------------------------------------- /object detection/utils/distributed_utils.py: -------------------------------------------------------------------------------- 1 | # from github: https://github.com/ruinmessi/ASFF/blob/master/utils/distributed_util.py 2 | 3 | import torch 4 | import torch.distributed as dist 5 | import os 6 | import subprocess 7 | import pickle 8 | 9 | 10 | def all_gather(data): 11 | """ 12 | Run all_gather on arbitrary picklable data (not necessarily tensors) 13 | Args: 14 | data: any picklable object 15 | Returns: 16 | list[data]: list of data gathered from each rank 17 | """ 18 | world_size = get_world_size() 19 | if world_size == 1: 20 | return [data] 21 | 22 | # serialized to a Tensor 23 | buffer = pickle.dumps(data) 24 | storage = torch.ByteStorage.from_buffer(buffer) 25 | tensor = torch.ByteTensor(storage).to("cuda") 26 | 27 | # obtain Tensor size of each rank 28 | local_size = torch.tensor([tensor.numel()], device="cuda") 29 | size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] 30 | dist.all_gather(size_list, local_size) 31 | size_list = [int(size.item()) for size in size_list] 32 | max_size = max(size_list) 33 | 34 | # receiving Tensor from all ranks 35 | # we pad the tensor because torch all_gather does not support 36 | # gathering tensors of different shapes 37 | tensor_list = [] 38 | for _ in size_list: 39 | tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) 40 | if local_size != max_size: 41 | padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") 42 | tensor = torch.cat((tensor, padding), dim=0) 43 | dist.all_gather(tensor_list, tensor) 44 | 45 | data_list = [] 46 | for size, tensor in zip(size_list, tensor_list): 47 | buffer = tensor.cpu().numpy().tobytes()[:size] 48 | data_list.append(pickle.loads(buffer)) 49 | 50 | return data_list 51 | 52 | 53 | def reduce_dict(input_dict, average=True): 54 | """ 55 | Args: 56 | input_dict (dict): all the values will be reduced 57 | average (bool): whether to do average or sum 58 | Reduce the values in the dictionary from all processes so that all processes 59 | have the averaged results. Returns a dict with the same fields as 60 | input_dict, after reduction. 61 | """ 62 | world_size = get_world_size() 63 | if world_size < 2: 64 | return input_dict 65 | with torch.no_grad(): 66 | names = [] 67 | values = [] 68 | # sort the keys so that they are consistent across processes 69 | for k in sorted(input_dict.keys()): 70 | names.append(k) 71 | values.append(input_dict[k]) 72 | values = torch.stack(values, dim=0) 73 | dist.all_reduce(values) 74 | if average: 75 | values /= world_size 76 | reduced_dict = {k: v for k, v in zip(names, values)} 77 | return reduced_dict 78 | 79 | 80 | def get_sha(): 81 | cwd = os.path.dirname(os.path.abspath(__file__)) 82 | 83 | def _run(command): 84 | return subprocess.check_output(command, cwd=cwd).decode('ascii').strip() 85 | sha = 'N/A' 86 | diff = "clean" 87 | branch = 'N/A' 88 | try: 89 | sha = _run(['git', 'rev-parse', 'HEAD']) 90 | subprocess.check_output(['git', 'diff'], cwd=cwd) 91 | diff = _run(['git', 'diff-index', 'HEAD']) 92 | diff = "has uncommited changes" if diff else "clean" 93 | branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD']) 94 | except Exception: 95 | pass 96 | message = f"sha: {sha}, status: {diff}, branch: {branch}" 97 | return message 98 | 99 | 100 | def setup_for_distributed(is_master): 101 | """ 102 | This function disables printing when not in master process 103 | """ 104 | import builtins as __builtin__ 105 | builtin_print = __builtin__.print 106 | 107 | def print(*args, **kwargs): 108 | force = kwargs.pop('force', False) 109 | if is_master or force: 110 | builtin_print(*args, **kwargs) 111 | 112 | __builtin__.print = print 113 | 114 | 115 | def is_dist_avail_and_initialized(): 116 | if not dist.is_available(): 117 | return False 118 | if not dist.is_initialized(): 119 | return False 120 | return True 121 | 122 | 123 | def get_world_size(): 124 | if not is_dist_avail_and_initialized(): 125 | return 1 126 | return dist.get_world_size() 127 | 128 | 129 | def get_rank(): 130 | if not is_dist_avail_and_initialized(): 131 | return 0 132 | return dist.get_rank() 133 | 134 | 135 | def is_main_process(): 136 | return get_rank() == 0 137 | 138 | 139 | def save_on_master(*args, **kwargs): 140 | if is_main_process(): 141 | torch.save(*args, **kwargs) 142 | 143 | 144 | def init_distributed_mode(args): 145 | if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: 146 | args.rank = int(os.environ["RANK"]) 147 | args.world_size = int(os.environ['WORLD_SIZE']) 148 | args.gpu = int(os.environ['LOCAL_RANK']) 149 | elif 'SLURM_PROCID' in os.environ: 150 | args.rank = int(os.environ['SLURM_PROCID']) 151 | args.gpu = args.rank % torch.cuda.device_count() 152 | else: 153 | print('Not using distributed mode') 154 | args.distributed = False 155 | return 156 | 157 | args.distributed = True 158 | 159 | torch.cuda.set_device(args.gpu) 160 | args.dist_backend = 'nccl' 161 | print('| distributed init (rank {}): {}'.format( 162 | args.rank, args.dist_url), flush=True) 163 | torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 164 | world_size=args.world_size, rank=args.rank) 165 | torch.distributed.barrier() 166 | setup_for_distributed(args.rank == 0) 167 | -------------------------------------------------------------------------------- /object detection/utils/modules.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | from copy import deepcopy 5 | 6 | 7 | class Conv(nn.Module): 8 | def __init__(self, in_ch, out_ch, k=1, p=0, s=1, d=1, g=1, act=True): 9 | super(Conv, self).__init__() 10 | if act: 11 | self.convs = nn.Sequential( 12 | nn.Conv2d(in_ch, out_ch, k, stride=s, padding=p, dilation=d, groups=g), 13 | nn.BatchNorm2d(out_ch), 14 | nn.LeakyReLU(0.1, inplace=True) 15 | ) 16 | else: 17 | self.convs = nn.Sequential( 18 | nn.Conv2d(in_ch, out_ch, k, stride=s, padding=p, dilation=d, groups=g), 19 | nn.BatchNorm2d(out_ch) 20 | ) 21 | 22 | def forward(self, x): 23 | return self.convs(x) 24 | 25 | 26 | class UpSample(nn.Module): 27 | def __init__(self, size=None, scale_factor=None, mode='nearest', align_corner=None): 28 | super(UpSample, self).__init__() 29 | self.size = size 30 | self.scale_factor = scale_factor 31 | self.mode = mode 32 | self.align_corner = align_corner 33 | 34 | def forward(self, x): 35 | return torch.nn.functional.interpolate(x, size=self.size, scale_factor=self.scale_factor, 36 | mode=self.mode, align_corners=self.align_corner) 37 | 38 | 39 | class reorg_layer(nn.Module): 40 | def __init__(self, stride): 41 | super(reorg_layer, self).__init__() 42 | self.stride = stride 43 | 44 | def forward(self, x): 45 | batch_size, channels, height, width = x.size() 46 | _height, _width = height // self.stride, width // self.stride 47 | 48 | x = x.view(batch_size, channels, _height, self.stride, _width, self.stride).transpose(3, 4).contiguous() 49 | x = x.view(batch_size, channels, _height * _width, self.stride * self.stride).transpose(2, 3).contiguous() 50 | x = x.view(batch_size, channels, self.stride * self.stride, _height, _width).transpose(1, 2).contiguous() 51 | x = x.view(batch_size, -1, _height, _width) 52 | 53 | return x 54 | 55 | 56 | class SPP(nn.Module): 57 | """ 58 | Spatial Pyramid Pooling 59 | """ 60 | def __init__(self): 61 | super(SPP, self).__init__() 62 | 63 | def forward(self, x): 64 | x_1 = torch.nn.functional.max_pool2d(x, 5, stride=1, padding=2) 65 | x_2 = torch.nn.functional.max_pool2d(x, 9, stride=1, padding=4) 66 | x_3 = torch.nn.functional.max_pool2d(x, 13, stride=1, padding=6) 67 | x = torch.cat([x, x_1, x_2, x_3], dim=1) 68 | 69 | return x 70 | 71 | 72 | class ModelEMA(object): 73 | def __init__(self, model, decay=0.9999, updates=0): 74 | # create EMA 75 | self.ema = deepcopy(model).eval() 76 | self.updates = updates 77 | self.decay = lambda x: decay * (1 - math.exp(-x / 2000.)) 78 | for p in self.ema.parameters(): 79 | p.requires_grad_(False) 80 | 81 | def update(self, model): 82 | # Update EMA parameters 83 | with torch.no_grad(): 84 | self.updates += 1 85 | d = self.decay(self.updates) 86 | 87 | msd = model.state_dict() 88 | for k, v in self.ema.state_dict().items(): 89 | if v.dtype.is_floating_point: 90 | v *= d 91 | v += (1. - d) * msd[k].detach() 92 | -------------------------------------------------------------------------------- /semantic segmentation/configs/coco_deeplabv1.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # 1. configuration for inference 4 | nclasses = 81 5 | ignore_label = 255 6 | multi_label = False 7 | 8 | crop_size_h, crop_size_w = 513, 513 9 | test_size_h, test_size_w = 641, 641 10 | image_pad_value = (123.675, 116.280, 103.530) 11 | 12 | img_norm_cfg = dict( 13 | max_pixel_value=255.0, 14 | std=(0.229, 0.224, 0.225), 15 | mean=(0.485, 0.456, 0.406), 16 | ) 17 | norm_cfg = dict(type='SyncBN') 18 | 19 | inference = dict( 20 | multi_label=multi_label, 21 | transforms=[ 22 | dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w, 23 | value=image_pad_value, mask_value=ignore_label), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='ToTensor'), 26 | ], 27 | model=dict( 28 | # model/encoder 29 | encoder=dict( 30 | backbone=dict( 31 | type='MYVGG9', 32 | pretrain=True, 33 | nclasses = nclasses, 34 | ), 35 | ), 36 | collect=dict(type='CollectBlock', from_layer='c5'), 37 | # model/head 38 | head=dict( 39 | type='Head', 40 | no_convs=True, 41 | upsample=dict( 42 | type='Upsample', 43 | scale_factor=8, 44 | scale_bias=-7, 45 | mode='bilinear', 46 | align_corners=True, 47 | ), 48 | ), 49 | ), 50 | ) 51 | 52 | # 2. configuration for train/test 53 | root_workdir = 'workdir' 54 | dataset_type = 'CocoDataset' 55 | dataset_root = 'data/COCO2017' 56 | 57 | common = dict( 58 | seed=0, 59 | logger=dict( 60 | handlers=( 61 | dict(type='StreamHandler', level='INFO'), 62 | dict(type='FileHandler', level='INFO'), 63 | ), 64 | ), 65 | cudnn_deterministic=False, 66 | cudnn_benchmark=True, 67 | metrics=[ 68 | dict(type='IoU', num_classes=nclasses), 69 | dict(type='MIoU', num_classes=nclasses, average='equal'), 70 | ], 71 | dist_params=dict(backend='nccl'), 72 | ) 73 | 74 | ## 2.1 configuration for test 75 | test = dict( 76 | data=dict( 77 | dataset=dict( 78 | type=dataset_type, 79 | root=dataset_root, 80 | ann_file='instances_val2017.json', 81 | img_prefix='val2017', 82 | multi_label=multi_label, 83 | ), 84 | transforms=inference['transforms'], 85 | sampler=dict( 86 | type='DefaultSampler', 87 | ), 88 | dataloader=dict( 89 | type='DataLoader', 90 | samples_per_gpu=4, 91 | workers_per_gpu=4, 92 | shuffle=False, 93 | drop_last=False, 94 | pin_memory=True, 95 | ), 96 | ), 97 | # tta=dict( 98 | # scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 99 | # biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75], 100 | # flip=True, 101 | # ), 102 | ) 103 | 104 | ## 2.2 configuration for train 105 | max_epochs = 50 106 | 107 | train = dict( 108 | data=dict( 109 | train=dict( 110 | dataset=dict( 111 | type=dataset_type, 112 | root=dataset_root, 113 | ann_file='instances_train2017.json', 114 | img_prefix='train2017', 115 | multi_label=multi_label, 116 | ), 117 | transforms=[ 118 | dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25, 119 | interpolation=cv2.INTER_LINEAR), 120 | dict(type='PadIfNeeded', min_height=crop_size_h, 121 | min_width=crop_size_w, value=image_pad_value, 122 | mask_value=ignore_label), 123 | dict(type='RandomCrop', height=crop_size_h, width=crop_size_w), 124 | dict(type='HorizontalFlip', p=0.5), 125 | dict(type='Normalize', **img_norm_cfg), 126 | dict(type='ToTensor'), 127 | ], 128 | sampler=dict( 129 | type='DefaultSampler', 130 | ), 131 | dataloader=dict( 132 | type='DataLoader', 133 | samples_per_gpu=8, 134 | workers_per_gpu=4, 135 | shuffle=True, 136 | drop_last=True, 137 | pin_memory=True, 138 | ), 139 | ), 140 | val=dict( 141 | dataset=dict( 142 | type=dataset_type, 143 | root=dataset_root, 144 | ann_file='instances_val2017.json', 145 | img_prefix='val2017', 146 | multi_label=multi_label, 147 | ), 148 | transforms=inference['transforms'], 149 | sampler=dict( 150 | type='DefaultSampler', 151 | ), 152 | dataloader=dict( 153 | type='DataLoader', 154 | samples_per_gpu=8, 155 | workers_per_gpu=4, 156 | shuffle=False, 157 | drop_last=False, 158 | pin_memory=True, 159 | ), 160 | ), 161 | ), 162 | resume=None, 163 | criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label), 164 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001), 165 | lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs), 166 | max_epochs=max_epochs, 167 | trainval_ratio=1, 168 | log_interval=10, 169 | snapshot_interval=5, 170 | save_best=True, 171 | ) 172 | -------------------------------------------------------------------------------- /semantic segmentation/configs/coco_deeplabv1_4bit.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # 1. configuration for inference 4 | nclasses = 81 5 | ignore_label = 255 6 | multi_label = False 7 | 8 | crop_size_h, crop_size_w = 513, 513 9 | test_size_h, test_size_w = 641, 641 10 | image_pad_value = (123.675, 116.280, 103.530) 11 | 12 | img_norm_cfg = dict( 13 | max_pixel_value=255.0, 14 | std=(0.229, 0.224, 0.225), 15 | mean=(0.485, 0.456, 0.406), 16 | ) 17 | norm_cfg = dict(type='SyncBN') 18 | 19 | inference = dict( 20 | multi_label=multi_label, 21 | transforms=[ 22 | dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w, 23 | value=image_pad_value, mask_value=ignore_label), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='ToTensor'), 26 | ], 27 | model=dict( 28 | # model/encoder 29 | encoder=dict( 30 | backbone=dict( 31 | type='MYVGG9', 32 | pretrain=True, 33 | nclasses = nclasses, 34 | bit=4, 35 | init='./workdir/coco_deeplabv1/best_mIoU.pth' 36 | ), 37 | ), 38 | collect=dict(type='CollectBlock', from_layer='c5'), 39 | # model/head 40 | head=dict( 41 | type='Head', 42 | no_convs=True, 43 | upsample=dict( 44 | type='Upsample', 45 | scale_factor=8, 46 | scale_bias=-7, 47 | mode='bilinear', 48 | align_corners=True, 49 | ), 50 | ), 51 | ), 52 | ) 53 | 54 | # 2. configuration for train/test 55 | root_workdir = 'workdir' 56 | dataset_type = 'CocoDataset' 57 | dataset_root = 'data/COCO2017' 58 | 59 | common = dict( 60 | seed=0, 61 | logger=dict( 62 | handlers=( 63 | dict(type='StreamHandler', level='INFO'), 64 | dict(type='FileHandler', level='INFO'), 65 | ), 66 | ), 67 | cudnn_deterministic=False, 68 | cudnn_benchmark=True, 69 | metrics=[ 70 | dict(type='IoU', num_classes=nclasses), 71 | dict(type='MIoU', num_classes=nclasses, average='equal'), 72 | ], 73 | dist_params=dict(backend='nccl'), 74 | ) 75 | 76 | ## 2.1 configuration for test 77 | test = dict( 78 | data=dict( 79 | dataset=dict( 80 | type=dataset_type, 81 | root=dataset_root, 82 | ann_file='instances_val2017.json', 83 | img_prefix='val2017', 84 | multi_label=multi_label, 85 | ), 86 | transforms=inference['transforms'], 87 | sampler=dict( 88 | type='DefaultSampler', 89 | ), 90 | dataloader=dict( 91 | type='DataLoader', 92 | samples_per_gpu=4, 93 | workers_per_gpu=4, 94 | shuffle=False, 95 | drop_last=False, 96 | pin_memory=True, 97 | ), 98 | ), 99 | # tta=dict( 100 | # scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 101 | # biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75], 102 | # flip=True, 103 | # ), 104 | ) 105 | 106 | ## 2.2 configuration for train 107 | max_epochs = 50 108 | 109 | train = dict( 110 | data=dict( 111 | train=dict( 112 | dataset=dict( 113 | type=dataset_type, 114 | root=dataset_root, 115 | ann_file='instances_train2017.json', 116 | img_prefix='train2017', 117 | multi_label=multi_label, 118 | ), 119 | transforms=[ 120 | dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25, 121 | interpolation=cv2.INTER_LINEAR), 122 | dict(type='PadIfNeeded', min_height=crop_size_h, 123 | min_width=crop_size_w, value=image_pad_value, 124 | mask_value=ignore_label), 125 | dict(type='RandomCrop', height=crop_size_h, width=crop_size_w), 126 | dict(type='HorizontalFlip', p=0.5), 127 | dict(type='Normalize', **img_norm_cfg), 128 | dict(type='ToTensor'), 129 | ], 130 | sampler=dict( 131 | type='DefaultSampler', 132 | ), 133 | dataloader=dict( 134 | type='DataLoader', 135 | samples_per_gpu=6, 136 | workers_per_gpu=4, 137 | shuffle=True, 138 | drop_last=True, 139 | pin_memory=True, 140 | ), 141 | ), 142 | val=dict( 143 | dataset=dict( 144 | type=dataset_type, 145 | root=dataset_root, 146 | ann_file='instances_val2017.json', 147 | img_prefix='val2017', 148 | multi_label=multi_label, 149 | ), 150 | transforms=inference['transforms'], 151 | sampler=dict( 152 | type='DefaultSampler', 153 | ), 154 | dataloader=dict( 155 | type='DataLoader', 156 | samples_per_gpu=8, 157 | workers_per_gpu=4, 158 | shuffle=False, 159 | drop_last=False, 160 | pin_memory=True, 161 | ), 162 | ), 163 | ), 164 | resume=None, 165 | criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label), 166 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001), 167 | lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs), 168 | max_epochs=max_epochs, 169 | trainval_ratio=1, 170 | log_interval=10, 171 | snapshot_interval=5, 172 | save_best=True, 173 | ) 174 | -------------------------------------------------------------------------------- /semantic segmentation/configs/voc_deeplabv1.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # 1. configuration for inference 4 | nclasses = 21 5 | ignore_label = 255 6 | multi_label = False 7 | 8 | crop_size_h, crop_size_w = 513, 513 9 | test_size_h, test_size_w = 513, 513 10 | image_pad_value = (123.675, 116.280, 103.530) 11 | 12 | img_norm_cfg = dict( 13 | max_pixel_value=255.0, 14 | std=(0.229, 0.224, 0.225), 15 | mean=(0.485, 0.456, 0.406), 16 | ) 17 | norm_cfg = dict(type='SyncBN') 18 | 19 | inference = dict( 20 | multi_label=multi_label, 21 | transforms=[ 22 | dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w, 23 | value=image_pad_value, mask_value=ignore_label), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='ToTensor'), 26 | ], 27 | model=dict( 28 | # model/encoder 29 | encoder=dict( 30 | backbone=dict( 31 | type='MYVGG9', 32 | pretrain=True, 33 | ), 34 | ), 35 | collect=dict(type='CollectBlock', from_layer='c5'), 36 | # model/head 37 | head=dict( 38 | type='Head', 39 | no_convs=True, 40 | upsample=dict( 41 | type='Upsample', 42 | scale_factor=8, 43 | scale_bias=-7, 44 | mode='bilinear', 45 | align_corners=True, 46 | ), 47 | ), 48 | ), 49 | ) 50 | 51 | # 2. configuration for train/test 52 | root_workdir = 'workdir' 53 | dataset_type = 'VOCDataset' 54 | dataset_root = 'data/VOCdevkit/VOC2012/' 55 | 56 | common = dict( 57 | seed=0, 58 | logger=dict( 59 | handlers=( 60 | dict(type='StreamHandler', level='INFO'), 61 | dict(type='FileHandler', level='INFO'), 62 | ), 63 | ), 64 | cudnn_deterministic=False, 65 | cudnn_benchmark=True, 66 | metrics=[ 67 | dict(type='IoU', num_classes=nclasses), 68 | dict(type='MIoU', num_classes=nclasses, average='equal'), 69 | ], 70 | dist_params=dict(backend='nccl'), 71 | ) 72 | 73 | ## 2.1 configuration for test 74 | test = dict( 75 | data=dict( 76 | dataset=dict( 77 | type=dataset_type, 78 | root=dataset_root, 79 | imglist_name='val.txt', 80 | multi_label=multi_label, 81 | ), 82 | transforms=inference['transforms'], 83 | sampler=dict( 84 | type='DefaultSampler', 85 | ), 86 | dataloader=dict( 87 | type='DataLoader', 88 | samples_per_gpu=4, 89 | workers_per_gpu=4, 90 | shuffle=False, 91 | drop_last=False, 92 | pin_memory=True, 93 | ), 94 | ), 95 | # tta=dict( 96 | # scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 97 | # biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75], 98 | # flip=True, 99 | # ), 100 | ) 101 | 102 | ## 2.2 configuration for train 103 | max_epochs = 50 104 | 105 | train = dict( 106 | data=dict( 107 | train=dict( 108 | dataset=dict( 109 | type=dataset_type, 110 | root=dataset_root, 111 | imglist_name='trainaug.txt', 112 | multi_label=multi_label, 113 | ), 114 | transforms=[ 115 | dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25, 116 | interpolation=cv2.INTER_LINEAR), 117 | dict(type='PadIfNeeded', min_height=crop_size_h, 118 | min_width=crop_size_w, value=image_pad_value, 119 | mask_value=ignore_label), 120 | dict(type='RandomCrop', height=crop_size_h, width=crop_size_w), 121 | dict(type='HorizontalFlip', p=0.5), 122 | dict(type='Normalize', **img_norm_cfg), 123 | dict(type='ToTensor'), 124 | ], 125 | sampler=dict( 126 | type='DefaultSampler', 127 | ), 128 | dataloader=dict( 129 | type='DataLoader', 130 | samples_per_gpu=8, 131 | workers_per_gpu=4, 132 | shuffle=True, 133 | drop_last=True, 134 | pin_memory=True, 135 | ), 136 | ), 137 | val=dict( 138 | dataset=dict( 139 | type=dataset_type, 140 | root=dataset_root, 141 | imglist_name='val.txt', 142 | multi_label=multi_label, 143 | ), 144 | transforms=inference['transforms'], 145 | sampler=dict( 146 | type='DefaultSampler', 147 | ), 148 | dataloader=dict( 149 | type='DataLoader', 150 | samples_per_gpu=8, 151 | workers_per_gpu=4, 152 | shuffle=False, 153 | drop_last=False, 154 | pin_memory=True, 155 | ), 156 | ), 157 | ), 158 | resume=None, 159 | criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label), 160 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001), 161 | lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs), 162 | max_epochs=max_epochs, 163 | trainval_ratio=1, 164 | log_interval=10, 165 | snapshot_interval=5, 166 | save_best=True, 167 | ) 168 | -------------------------------------------------------------------------------- /semantic segmentation/configs/voc_deeplabv1_2bit.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # 1. configuration for inference 4 | nclasses = 21 5 | ignore_label = 255 6 | multi_label = False 7 | 8 | crop_size_h, crop_size_w = 513, 513 9 | test_size_h, test_size_w = 513, 513 10 | image_pad_value = (123.675, 116.280, 103.530) 11 | 12 | img_norm_cfg = dict( 13 | max_pixel_value=255.0, 14 | std=(0.229, 0.224, 0.225), 15 | mean=(0.485, 0.456, 0.406), 16 | ) 17 | norm_cfg = dict(type='SyncBN') 18 | 19 | inference = dict( 20 | multi_label=multi_label, 21 | transforms=[ 22 | dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w, 23 | value=image_pad_value, mask_value=ignore_label), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='ToTensor'), 26 | ], 27 | model=dict( 28 | # model/encoder 29 | encoder=dict( 30 | backbone=dict( 31 | type='MYVGG9', 32 | bit=2, 33 | pretrain=True, 34 | init='./workdir/voc_deeplabv1_3bit/best_mIoU.pth' 35 | ), 36 | ), 37 | collect=dict(type='CollectBlock', from_layer='c5'), 38 | # model/head 39 | head=dict( 40 | type='Head', 41 | no_convs=True, 42 | upsample=dict( 43 | type='Upsample', 44 | scale_factor=8, 45 | scale_bias=-7, 46 | mode='bilinear', 47 | align_corners=True, 48 | ), 49 | ), 50 | ), 51 | ) 52 | 53 | # 2. configuration for train/test 54 | root_workdir = 'workdir' 55 | dataset_type = 'VOCDataset' 56 | dataset_root = 'data/VOCdevkit/VOC2012/' 57 | 58 | common = dict( 59 | seed=0, 60 | logger=dict( 61 | handlers=( 62 | dict(type='StreamHandler', level='INFO'), 63 | dict(type='FileHandler', level='INFO'), 64 | ), 65 | ), 66 | cudnn_deterministic=False, 67 | cudnn_benchmark=True, 68 | metrics=[ 69 | dict(type='IoU', num_classes=nclasses), 70 | dict(type='MIoU', num_classes=nclasses, average='equal'), 71 | ], 72 | dist_params=dict(backend='nccl'), 73 | ) 74 | 75 | ## 2.1 configuration for test 76 | test = dict( 77 | data=dict( 78 | dataset=dict( 79 | type=dataset_type, 80 | root=dataset_root, 81 | imglist_name='val.txt', 82 | multi_label=multi_label, 83 | ), 84 | transforms=inference['transforms'], 85 | sampler=dict( 86 | type='DefaultSampler', 87 | ), 88 | dataloader=dict( 89 | type='DataLoader', 90 | samples_per_gpu=4, 91 | workers_per_gpu=4, 92 | shuffle=False, 93 | drop_last=False, 94 | pin_memory=True, 95 | ), 96 | ), 97 | # tta=dict( 98 | # scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 99 | # biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75], 100 | # flip=True, 101 | # ), 102 | ) 103 | 104 | ## 2.2 configuration for train 105 | max_epochs = 50 106 | 107 | train = dict( 108 | data=dict( 109 | train=dict( 110 | dataset=dict( 111 | type=dataset_type, 112 | root=dataset_root, 113 | imglist_name='trainaug.txt', 114 | multi_label=multi_label, 115 | ), 116 | transforms=[ 117 | dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25, 118 | interpolation=cv2.INTER_LINEAR), 119 | dict(type='PadIfNeeded', min_height=crop_size_h, 120 | min_width=crop_size_w, value=image_pad_value, 121 | mask_value=ignore_label), 122 | dict(type='RandomCrop', height=crop_size_h, width=crop_size_w), 123 | dict(type='HorizontalFlip', p=0.5), 124 | dict(type='Normalize', **img_norm_cfg), 125 | dict(type='ToTensor'), 126 | ], 127 | sampler=dict( 128 | type='DefaultSampler', 129 | ), 130 | dataloader=dict( 131 | type='DataLoader', 132 | samples_per_gpu=8, 133 | workers_per_gpu=4, 134 | shuffle=True, 135 | drop_last=True, 136 | pin_memory=True, 137 | ), 138 | ), 139 | val=dict( 140 | dataset=dict( 141 | type=dataset_type, 142 | root=dataset_root, 143 | imglist_name='val.txt', 144 | multi_label=multi_label, 145 | ), 146 | transforms=inference['transforms'], 147 | sampler=dict( 148 | type='DefaultSampler', 149 | ), 150 | dataloader=dict( 151 | type='DataLoader', 152 | samples_per_gpu=8, 153 | workers_per_gpu=4, 154 | shuffle=False, 155 | drop_last=False, 156 | pin_memory=True, 157 | ), 158 | ), 159 | ), 160 | resume=None, 161 | criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label), 162 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001), 163 | lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs), 164 | max_epochs=max_epochs, 165 | trainval_ratio=1, 166 | log_interval=10, 167 | snapshot_interval=5, 168 | save_best=True, 169 | ) 170 | -------------------------------------------------------------------------------- /semantic segmentation/configs/voc_deeplabv1_3bit.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # 1. configuration for inference 4 | nclasses = 21 5 | ignore_label = 255 6 | multi_label = False 7 | 8 | crop_size_h, crop_size_w = 513, 513 9 | test_size_h, test_size_w = 513, 513 10 | image_pad_value = (123.675, 116.280, 103.530) 11 | 12 | img_norm_cfg = dict( 13 | max_pixel_value=255.0, 14 | std=(0.229, 0.224, 0.225), 15 | mean=(0.485, 0.456, 0.406), 16 | ) 17 | norm_cfg = dict(type='SyncBN') 18 | 19 | inference = dict( 20 | multi_label=multi_label, 21 | transforms=[ 22 | dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w, 23 | value=image_pad_value, mask_value=ignore_label), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='ToTensor'), 26 | ], 27 | model=dict( 28 | # model/encoder 29 | encoder=dict( 30 | backbone=dict( 31 | type='MYVGG9', 32 | bit=3, 33 | pretrain=True, 34 | init='./workdir/voc_deeplabv1_4bit/best_mIoU.pth' 35 | ), 36 | ), 37 | collect=dict(type='CollectBlock', from_layer='c5'), 38 | # model/head 39 | head=dict( 40 | type='Head', 41 | no_convs=True, 42 | upsample=dict( 43 | type='Upsample', 44 | scale_factor=8, 45 | scale_bias=-7, 46 | mode='bilinear', 47 | align_corners=True, 48 | ), 49 | ), 50 | ), 51 | ) 52 | 53 | # 2. configuration for train/test 54 | root_workdir = 'workdir' 55 | dataset_type = 'VOCDataset' 56 | dataset_root = 'data/VOCdevkit/VOC2012/' 57 | 58 | common = dict( 59 | seed=0, 60 | logger=dict( 61 | handlers=( 62 | dict(type='StreamHandler', level='INFO'), 63 | dict(type='FileHandler', level='INFO'), 64 | ), 65 | ), 66 | cudnn_deterministic=False, 67 | cudnn_benchmark=True, 68 | metrics=[ 69 | dict(type='IoU', num_classes=nclasses), 70 | dict(type='MIoU', num_classes=nclasses, average='equal'), 71 | ], 72 | dist_params=dict(backend='nccl'), 73 | ) 74 | 75 | ## 2.1 configuration for test 76 | test = dict( 77 | data=dict( 78 | dataset=dict( 79 | type=dataset_type, 80 | root=dataset_root, 81 | imglist_name='val.txt', 82 | multi_label=multi_label, 83 | ), 84 | transforms=inference['transforms'], 85 | sampler=dict( 86 | type='DefaultSampler', 87 | ), 88 | dataloader=dict( 89 | type='DataLoader', 90 | samples_per_gpu=4, 91 | workers_per_gpu=4, 92 | shuffle=False, 93 | drop_last=False, 94 | pin_memory=True, 95 | ), 96 | ), 97 | # tta=dict( 98 | # scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 99 | # biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75], 100 | # flip=True, 101 | # ), 102 | ) 103 | 104 | ## 2.2 configuration for train 105 | max_epochs = 50 106 | 107 | train = dict( 108 | data=dict( 109 | train=dict( 110 | dataset=dict( 111 | type=dataset_type, 112 | root=dataset_root, 113 | imglist_name='trainaug.txt', 114 | multi_label=multi_label, 115 | ), 116 | transforms=[ 117 | dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25, 118 | interpolation=cv2.INTER_LINEAR), 119 | dict(type='PadIfNeeded', min_height=crop_size_h, 120 | min_width=crop_size_w, value=image_pad_value, 121 | mask_value=ignore_label), 122 | dict(type='RandomCrop', height=crop_size_h, width=crop_size_w), 123 | dict(type='HorizontalFlip', p=0.5), 124 | dict(type='Normalize', **img_norm_cfg), 125 | dict(type='ToTensor'), 126 | ], 127 | sampler=dict( 128 | type='DefaultSampler', 129 | ), 130 | dataloader=dict( 131 | type='DataLoader', 132 | samples_per_gpu=8, 133 | workers_per_gpu=4, 134 | shuffle=True, 135 | drop_last=True, 136 | pin_memory=True, 137 | ), 138 | ), 139 | val=dict( 140 | dataset=dict( 141 | type=dataset_type, 142 | root=dataset_root, 143 | imglist_name='val.txt', 144 | multi_label=multi_label, 145 | ), 146 | transforms=inference['transforms'], 147 | sampler=dict( 148 | type='DefaultSampler', 149 | ), 150 | dataloader=dict( 151 | type='DataLoader', 152 | samples_per_gpu=8, 153 | workers_per_gpu=4, 154 | shuffle=False, 155 | drop_last=False, 156 | pin_memory=True, 157 | ), 158 | ), 159 | ), 160 | resume=None, 161 | criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label), 162 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001), 163 | lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs), 164 | max_epochs=max_epochs, 165 | trainval_ratio=1, 166 | log_interval=10, 167 | snapshot_interval=5, 168 | save_best=True, 169 | ) 170 | -------------------------------------------------------------------------------- /semantic segmentation/configs/voc_deeplabv1_4bit.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # 1. configuration for inference 4 | nclasses = 21 5 | ignore_label = 255 6 | multi_label = False 7 | 8 | crop_size_h, crop_size_w = 513, 513 9 | test_size_h, test_size_w = 513, 513 10 | image_pad_value = (123.675, 116.280, 103.530) 11 | 12 | img_norm_cfg = dict( 13 | max_pixel_value=255.0, 14 | std=(0.229, 0.224, 0.225), 15 | mean=(0.485, 0.456, 0.406), 16 | ) 17 | norm_cfg = dict(type='SyncBN') 18 | 19 | inference = dict( 20 | multi_label=multi_label, 21 | transforms=[ 22 | dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w, 23 | value=image_pad_value, mask_value=ignore_label), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='ToTensor'), 26 | ], 27 | model=dict( 28 | # model/encoder 29 | encoder=dict( 30 | backbone=dict( 31 | type='MYVGG9', 32 | bit=4, 33 | pretrain=True, 34 | init='./workdir/voc_deeplabv1/best_mIoU.pth' 35 | ), 36 | ), 37 | collect=dict(type='CollectBlock', from_layer='c5'), 38 | # model/head 39 | head=dict( 40 | type='Head', 41 | no_convs=True, 42 | upsample=dict( 43 | type='Upsample', 44 | scale_factor=8, 45 | scale_bias=-7, 46 | mode='bilinear', 47 | align_corners=True, 48 | ), 49 | ), 50 | ), 51 | ) 52 | 53 | # 2. configuration for train/test 54 | root_workdir = 'workdir' 55 | dataset_type = 'VOCDataset' 56 | dataset_root = 'data/VOCdevkit/VOC2012/' 57 | 58 | common = dict( 59 | seed=0, 60 | logger=dict( 61 | handlers=( 62 | dict(type='StreamHandler', level='INFO'), 63 | dict(type='FileHandler', level='INFO'), 64 | ), 65 | ), 66 | cudnn_deterministic=False, 67 | cudnn_benchmark=True, 68 | metrics=[ 69 | dict(type='IoU', num_classes=nclasses), 70 | dict(type='MIoU', num_classes=nclasses, average='equal'), 71 | ], 72 | dist_params=dict(backend='nccl'), 73 | ) 74 | 75 | ## 2.1 configuration for test 76 | test = dict( 77 | data=dict( 78 | dataset=dict( 79 | type=dataset_type, 80 | root=dataset_root, 81 | imglist_name='val.txt', 82 | multi_label=multi_label, 83 | ), 84 | transforms=inference['transforms'], 85 | sampler=dict( 86 | type='DefaultSampler', 87 | ), 88 | dataloader=dict( 89 | type='DataLoader', 90 | samples_per_gpu=4, 91 | workers_per_gpu=4, 92 | shuffle=False, 93 | drop_last=False, 94 | pin_memory=True, 95 | ), 96 | ), 97 | # tta=dict( 98 | # scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 99 | # biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75], 100 | # flip=True, 101 | # ), 102 | ) 103 | 104 | ## 2.2 configuration for train 105 | max_epochs = 50 106 | 107 | train = dict( 108 | data=dict( 109 | train=dict( 110 | dataset=dict( 111 | type=dataset_type, 112 | root=dataset_root, 113 | imglist_name='trainaug.txt', 114 | multi_label=multi_label, 115 | ), 116 | transforms=[ 117 | dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25, 118 | interpolation=cv2.INTER_LINEAR), 119 | dict(type='PadIfNeeded', min_height=crop_size_h, 120 | min_width=crop_size_w, value=image_pad_value, 121 | mask_value=ignore_label), 122 | dict(type='RandomCrop', height=crop_size_h, width=crop_size_w), 123 | dict(type='HorizontalFlip', p=0.5), 124 | dict(type='Normalize', **img_norm_cfg), 125 | dict(type='ToTensor'), 126 | ], 127 | sampler=dict( 128 | type='DefaultSampler', 129 | ), 130 | dataloader=dict( 131 | type='DataLoader', 132 | samples_per_gpu=8, 133 | workers_per_gpu=4, 134 | shuffle=True, 135 | drop_last=True, 136 | pin_memory=True, 137 | ), 138 | ), 139 | val=dict( 140 | dataset=dict( 141 | type=dataset_type, 142 | root=dataset_root, 143 | imglist_name='val.txt', 144 | multi_label=multi_label, 145 | ), 146 | transforms=inference['transforms'], 147 | sampler=dict( 148 | type='DefaultSampler', 149 | ), 150 | dataloader=dict( 151 | type='DataLoader', 152 | samples_per_gpu=8, 153 | workers_per_gpu=4, 154 | shuffle=False, 155 | drop_last=False, 156 | pin_memory=True, 157 | ), 158 | ), 159 | ), 160 | resume=None, 161 | criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label), 162 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001), 163 | lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs), 164 | max_epochs=max_epochs, 165 | trainval_ratio=1, 166 | log_interval=10, 167 | snapshot_interval=5, 168 | save_best=True, 169 | ) 170 | -------------------------------------------------------------------------------- /semantic segmentation/configs/voc_deeplabv1_T15.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # 1. configuration for inference 4 | nclasses = 21 5 | ignore_label = 255 6 | multi_label = False 7 | 8 | crop_size_h, crop_size_w = 513, 513 9 | test_size_h, test_size_w = 513, 513 10 | image_pad_value = (123.675, 116.280, 103.530) 11 | 12 | img_norm_cfg = dict( 13 | max_pixel_value=255.0, 14 | std=(0.229, 0.224, 0.225), 15 | mean=(0.485, 0.456, 0.406), 16 | ) 17 | norm_cfg = dict(type='SyncBN') 18 | 19 | inference = dict( 20 | multi_label=multi_label, 21 | transforms=[ 22 | dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w, 23 | value=image_pad_value, mask_value=ignore_label), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='ToTensor'), 26 | ], 27 | model=dict( 28 | # model/encoder 29 | encoder=dict( 30 | backbone=dict( 31 | type='MYSVGG9', 32 | bit=4, 33 | pretrain=False, 34 | ), 35 | ), 36 | collect=dict(type='CollectBlock', from_layer='c5'), 37 | # model/head 38 | head=dict( 39 | type='Head', 40 | no_convs=True, 41 | upsample=dict( 42 | type='Upsample', 43 | scale_factor=8, 44 | scale_bias=-7, 45 | mode='bilinear', 46 | align_corners=True, 47 | ), 48 | ), 49 | ), 50 | ) 51 | 52 | # 2. configuration for train/test 53 | root_workdir = 'workdir' 54 | dataset_type = 'VOCDataset' 55 | dataset_root = 'data/VOCdevkit/VOC2012/' 56 | 57 | common = dict( 58 | seed=0, 59 | logger=dict( 60 | handlers=( 61 | dict(type='StreamHandler', level='INFO'), 62 | dict(type='FileHandler', level='INFO'), 63 | ), 64 | ), 65 | cudnn_deterministic=False, 66 | cudnn_benchmark=True, 67 | metrics=[ 68 | dict(type='IoU', num_classes=nclasses), 69 | dict(type='MIoU', num_classes=nclasses, average='equal'), 70 | ], 71 | dist_params=dict(backend='nccl'), 72 | ) 73 | 74 | ## 2.1 configuration for test 75 | test = dict( 76 | data=dict( 77 | dataset=dict( 78 | type=dataset_type, 79 | root=dataset_root, 80 | imglist_name='val.txt', 81 | multi_label=multi_label, 82 | ), 83 | transforms=inference['transforms'], 84 | sampler=dict( 85 | type='DefaultSampler', 86 | ), 87 | dataloader=dict( 88 | type='DataLoader', 89 | samples_per_gpu=1, 90 | workers_per_gpu=4, 91 | shuffle=False, 92 | drop_last=False, 93 | pin_memory=True, 94 | ), 95 | ), 96 | # tta=dict( 97 | # scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 98 | # biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75], 99 | # flip=True, 100 | # ), 101 | ) 102 | 103 | ## 2.2 configuration for train 104 | max_epochs = 50 105 | 106 | train = dict( 107 | data=dict( 108 | train=dict( 109 | dataset=dict( 110 | type=dataset_type, 111 | root=dataset_root, 112 | imglist_name='trainaug.txt', 113 | multi_label=multi_label, 114 | ), 115 | transforms=[ 116 | dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25, 117 | interpolation=cv2.INTER_LINEAR), 118 | dict(type='PadIfNeeded', min_height=crop_size_h, 119 | min_width=crop_size_w, value=image_pad_value, 120 | mask_value=ignore_label), 121 | dict(type='RandomCrop', height=crop_size_h, width=crop_size_w), 122 | dict(type='HorizontalFlip', p=0.5), 123 | dict(type='Normalize', **img_norm_cfg), 124 | dict(type='ToTensor'), 125 | ], 126 | sampler=dict( 127 | type='DefaultSampler', 128 | ), 129 | dataloader=dict( 130 | type='DataLoader', 131 | samples_per_gpu=8, 132 | workers_per_gpu=4, 133 | shuffle=True, 134 | drop_last=True, 135 | pin_memory=True, 136 | ), 137 | ), 138 | val=dict( 139 | dataset=dict( 140 | type=dataset_type, 141 | root=dataset_root, 142 | imglist_name='val.txt', 143 | multi_label=multi_label, 144 | ), 145 | transforms=inference['transforms'], 146 | sampler=dict( 147 | type='DefaultSampler', 148 | ), 149 | dataloader=dict( 150 | type='DataLoader', 151 | samples_per_gpu=8, 152 | workers_per_gpu=4, 153 | shuffle=False, 154 | drop_last=False, 155 | pin_memory=True, 156 | ), 157 | ), 158 | ), 159 | resume=None, 160 | criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label), 161 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001), 162 | lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs), 163 | max_epochs=max_epochs, 164 | trainval_ratio=1, 165 | log_interval=10, 166 | snapshot_interval=5, 167 | save_best=True, 168 | ) 169 | -------------------------------------------------------------------------------- /semantic segmentation/configs/voc_deeplabv1_T3.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # 1. configuration for inference 4 | nclasses = 21 5 | ignore_label = 255 6 | multi_label = False 7 | 8 | crop_size_h, crop_size_w = 513, 513 9 | test_size_h, test_size_w = 513, 513 10 | image_pad_value = (123.675, 116.280, 103.530) 11 | 12 | img_norm_cfg = dict( 13 | max_pixel_value=255.0, 14 | std=(0.229, 0.224, 0.225), 15 | mean=(0.485, 0.456, 0.406), 16 | ) 17 | norm_cfg = dict(type='SyncBN') 18 | 19 | inference = dict( 20 | multi_label=multi_label, 21 | transforms=[ 22 | dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w, 23 | value=image_pad_value, mask_value=ignore_label), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='ToTensor'), 26 | ], 27 | model=dict( 28 | # model/encoder 29 | encoder=dict( 30 | backbone=dict( 31 | type='MYSVGG9', 32 | bit=2, 33 | pretrain=False, 34 | ), 35 | ), 36 | collect=dict(type='CollectBlock', from_layer='c5'), 37 | # model/head 38 | head=dict( 39 | type='Head', 40 | no_convs=True, 41 | upsample=dict( 42 | type='Upsample', 43 | scale_factor=8, 44 | scale_bias=-7, 45 | mode='bilinear', 46 | align_corners=True, 47 | ), 48 | ), 49 | ), 50 | ) 51 | 52 | # 2. configuration for train/test 53 | root_workdir = 'workdir' 54 | dataset_type = 'VOCDataset' 55 | dataset_root = 'data/VOCdevkit/VOC2012/' 56 | 57 | common = dict( 58 | seed=0, 59 | logger=dict( 60 | handlers=( 61 | dict(type='StreamHandler', level='INFO'), 62 | dict(type='FileHandler', level='INFO'), 63 | ), 64 | ), 65 | cudnn_deterministic=False, 66 | cudnn_benchmark=True, 67 | metrics=[ 68 | dict(type='IoU', num_classes=nclasses), 69 | dict(type='MIoU', num_classes=nclasses, average='equal'), 70 | ], 71 | dist_params=dict(backend='nccl'), 72 | ) 73 | 74 | ## 2.1 configuration for test 75 | test = dict( 76 | data=dict( 77 | dataset=dict( 78 | type=dataset_type, 79 | root=dataset_root, 80 | imglist_name='val.txt', 81 | multi_label=multi_label, 82 | ), 83 | transforms=inference['transforms'], 84 | sampler=dict( 85 | type='DefaultSampler', 86 | ), 87 | dataloader=dict( 88 | type='DataLoader', 89 | samples_per_gpu=4, 90 | workers_per_gpu=4, 91 | shuffle=False, 92 | drop_last=False, 93 | pin_memory=True, 94 | ), 95 | ), 96 | # tta=dict( 97 | # scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 98 | # biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75], 99 | # flip=True, 100 | # ), 101 | ) 102 | 103 | ## 2.2 configuration for train 104 | max_epochs = 50 105 | 106 | train = dict( 107 | data=dict( 108 | train=dict( 109 | dataset=dict( 110 | type=dataset_type, 111 | root=dataset_root, 112 | imglist_name='trainaug.txt', 113 | multi_label=multi_label, 114 | ), 115 | transforms=[ 116 | dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25, 117 | interpolation=cv2.INTER_LINEAR), 118 | dict(type='PadIfNeeded', min_height=crop_size_h, 119 | min_width=crop_size_w, value=image_pad_value, 120 | mask_value=ignore_label), 121 | dict(type='RandomCrop', height=crop_size_h, width=crop_size_w), 122 | dict(type='HorizontalFlip', p=0.5), 123 | dict(type='Normalize', **img_norm_cfg), 124 | dict(type='ToTensor'), 125 | ], 126 | sampler=dict( 127 | type='DefaultSampler', 128 | ), 129 | dataloader=dict( 130 | type='DataLoader', 131 | samples_per_gpu=8, 132 | workers_per_gpu=4, 133 | shuffle=True, 134 | drop_last=True, 135 | pin_memory=True, 136 | ), 137 | ), 138 | val=dict( 139 | dataset=dict( 140 | type=dataset_type, 141 | root=dataset_root, 142 | imglist_name='val.txt', 143 | multi_label=multi_label, 144 | ), 145 | transforms=inference['transforms'], 146 | sampler=dict( 147 | type='DefaultSampler', 148 | ), 149 | dataloader=dict( 150 | type='DataLoader', 151 | samples_per_gpu=8, 152 | workers_per_gpu=4, 153 | shuffle=False, 154 | drop_last=False, 155 | pin_memory=True, 156 | ), 157 | ), 158 | ), 159 | resume=None, 160 | criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label), 161 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001), 162 | lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs), 163 | max_epochs=max_epochs, 164 | trainval_ratio=1, 165 | log_interval=10, 166 | snapshot_interval=5, 167 | save_best=True, 168 | ) 169 | -------------------------------------------------------------------------------- /semantic segmentation/configs/voc_deeplabv1_T7.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # 1. configuration for inference 4 | nclasses = 21 5 | ignore_label = 255 6 | multi_label = False 7 | 8 | crop_size_h, crop_size_w = 513, 513 9 | test_size_h, test_size_w = 513, 513 10 | image_pad_value = (123.675, 116.280, 103.530) 11 | 12 | img_norm_cfg = dict( 13 | max_pixel_value=255.0, 14 | std=(0.229, 0.224, 0.225), 15 | mean=(0.485, 0.456, 0.406), 16 | ) 17 | norm_cfg = dict(type='SyncBN') 18 | 19 | inference = dict( 20 | multi_label=multi_label, 21 | transforms=[ 22 | dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w, 23 | value=image_pad_value, mask_value=ignore_label), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='ToTensor'), 26 | ], 27 | model=dict( 28 | # model/encoder 29 | encoder=dict( 30 | backbone=dict( 31 | type='MYSVGG9', 32 | bit=3, 33 | pretrain=False, 34 | ), 35 | ), 36 | collect=dict(type='CollectBlock', from_layer='c5'), 37 | # model/head 38 | head=dict( 39 | type='Head', 40 | no_convs=True, 41 | upsample=dict( 42 | type='Upsample', 43 | scale_factor=8, 44 | scale_bias=-7, 45 | mode='bilinear', 46 | align_corners=True, 47 | ), 48 | ), 49 | ), 50 | ) 51 | 52 | # 2. configuration for train/test 53 | root_workdir = 'workdir' 54 | dataset_type = 'VOCDataset' 55 | dataset_root = 'data/VOCdevkit/VOC2012/' 56 | 57 | common = dict( 58 | seed=0, 59 | logger=dict( 60 | handlers=( 61 | dict(type='StreamHandler', level='INFO'), 62 | dict(type='FileHandler', level='INFO'), 63 | ), 64 | ), 65 | cudnn_deterministic=False, 66 | cudnn_benchmark=True, 67 | metrics=[ 68 | dict(type='IoU', num_classes=nclasses), 69 | dict(type='MIoU', num_classes=nclasses, average='equal'), 70 | ], 71 | dist_params=dict(backend='nccl'), 72 | ) 73 | 74 | ## 2.1 configuration for test 75 | test = dict( 76 | data=dict( 77 | dataset=dict( 78 | type=dataset_type, 79 | root=dataset_root, 80 | imglist_name='val.txt', 81 | multi_label=multi_label, 82 | ), 83 | transforms=inference['transforms'], 84 | sampler=dict( 85 | type='DefaultSampler', 86 | ), 87 | dataloader=dict( 88 | type='DataLoader', 89 | samples_per_gpu=1, 90 | workers_per_gpu=4, 91 | shuffle=False, 92 | drop_last=False, 93 | pin_memory=True, 94 | ), 95 | ), 96 | # tta=dict( 97 | # scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 98 | # biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75], 99 | # flip=True, 100 | # ), 101 | ) 102 | 103 | ## 2.2 configuration for train 104 | max_epochs = 50 105 | 106 | train = dict( 107 | data=dict( 108 | train=dict( 109 | dataset=dict( 110 | type=dataset_type, 111 | root=dataset_root, 112 | imglist_name='trainaug.txt', 113 | multi_label=multi_label, 114 | ), 115 | transforms=[ 116 | dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25, 117 | interpolation=cv2.INTER_LINEAR), 118 | dict(type='PadIfNeeded', min_height=crop_size_h, 119 | min_width=crop_size_w, value=image_pad_value, 120 | mask_value=ignore_label), 121 | dict(type='RandomCrop', height=crop_size_h, width=crop_size_w), 122 | dict(type='HorizontalFlip', p=0.5), 123 | dict(type='Normalize', **img_norm_cfg), 124 | dict(type='ToTensor'), 125 | ], 126 | sampler=dict( 127 | type='DefaultSampler', 128 | ), 129 | dataloader=dict( 130 | type='DataLoader', 131 | samples_per_gpu=8, 132 | workers_per_gpu=4, 133 | shuffle=True, 134 | drop_last=True, 135 | pin_memory=True, 136 | ), 137 | ), 138 | val=dict( 139 | dataset=dict( 140 | type=dataset_type, 141 | root=dataset_root, 142 | imglist_name='val.txt', 143 | multi_label=multi_label, 144 | ), 145 | transforms=inference['transforms'], 146 | sampler=dict( 147 | type='DefaultSampler', 148 | ), 149 | dataloader=dict( 150 | type='DataLoader', 151 | samples_per_gpu=8, 152 | workers_per_gpu=4, 153 | shuffle=False, 154 | drop_last=False, 155 | pin_memory=True, 156 | ), 157 | ), 158 | ), 159 | resume=None, 160 | criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label), 161 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001), 162 | lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs), 163 | max_epochs=max_epochs, 164 | trainval_ratio=1, 165 | log_interval=10, 166 | snapshot_interval=5, 167 | save_best=True, 168 | ) 169 | -------------------------------------------------------------------------------- /semantic segmentation/configs/voc_deeplabv2.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # 1. configuration for inference 4 | nclasses = 21 5 | ignore_label = 255 6 | multi_label = False 7 | 8 | crop_size_h, crop_size_w = 513, 513 9 | test_size_h, test_size_w = 513, 513 10 | image_pad_value = (123.675, 116.280, 103.530) 11 | 12 | img_norm_cfg = dict( 13 | max_pixel_value=255.0, 14 | std=(0.229, 0.224, 0.225), 15 | mean=(0.485, 0.456, 0.406), 16 | ) 17 | norm_cfg = dict(type='SyncBN') 18 | 19 | inference = dict( 20 | multi_label=multi_label, 21 | transforms=[ 22 | dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w, 23 | value=image_pad_value, mask_value=ignore_label), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='ToTensor'), 26 | ], 27 | model=dict( 28 | # model/encoder 29 | encoder=dict( 30 | backbone=dict( 31 | type='MYVGG9', 32 | ), 33 | enhance=dict( 34 | type='ASPP_v2', 35 | from_layer='c5', 36 | to_layer='enhance', 37 | in_channels=512, 38 | out_channels=nclasses, 39 | atrous_rates=[6, 12, 18, 24], 40 | ), 41 | ), 42 | collect=dict(type='CollectBlock', from_layer='enhance'), 43 | # model/head 44 | head=dict( 45 | type='Head', 46 | no_convs=True, 47 | upsample=dict( 48 | type='Upsample', 49 | scale_factor=32, 50 | scale_bias=-31, 51 | mode='bilinear', 52 | align_corners=True, 53 | ), 54 | ), 55 | ), 56 | ) 57 | 58 | # 2. configuration for train/test 59 | root_workdir = 'workdir' 60 | dataset_type = 'VOCDataset' 61 | dataset_root = 'data/VOCdevkit/VOC2012/' 62 | 63 | common = dict( 64 | seed=0, 65 | logger=dict( 66 | handlers=( 67 | dict(type='StreamHandler', level='INFO'), 68 | dict(type='FileHandler', level='INFO'), 69 | ), 70 | ), 71 | cudnn_deterministic=False, 72 | cudnn_benchmark=True, 73 | metrics=[ 74 | dict(type='IoU', num_classes=nclasses), 75 | dict(type='MIoU', num_classes=nclasses, average='equal'), 76 | ], 77 | dist_params=dict(backend='nccl'), 78 | ) 79 | 80 | ## 2.1 configuration for test 81 | test = dict( 82 | data=dict( 83 | dataset=dict( 84 | type=dataset_type, 85 | root=dataset_root, 86 | imglist_name='val.txt', 87 | multi_label=multi_label, 88 | ), 89 | transforms=inference['transforms'], 90 | sampler=dict( 91 | type='DefaultSampler', 92 | ), 93 | dataloader=dict( 94 | type='DataLoader', 95 | samples_per_gpu=4, 96 | workers_per_gpu=4, 97 | shuffle=False, 98 | drop_last=False, 99 | pin_memory=True, 100 | ), 101 | ), 102 | # tta=dict( 103 | # scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 104 | # biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75], 105 | # flip=True, 106 | # ), 107 | ) 108 | 109 | ## 2.2 configuration for train 110 | max_epochs = 50 111 | 112 | train = dict( 113 | data=dict( 114 | train=dict( 115 | dataset=dict( 116 | type=dataset_type, 117 | root=dataset_root, 118 | imglist_name='trainaug.txt', 119 | multi_label=multi_label, 120 | ), 121 | transforms=[ 122 | dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25, 123 | interpolation=cv2.INTER_LINEAR), 124 | dict(type='PadIfNeeded', min_height=crop_size_h, 125 | min_width=crop_size_w, value=image_pad_value, 126 | mask_value=ignore_label), 127 | dict(type='RandomCrop', height=crop_size_h, width=crop_size_w), 128 | dict(type='HorizontalFlip', p=0.5), 129 | dict(type='Normalize', **img_norm_cfg), 130 | dict(type='ToTensor'), 131 | ], 132 | sampler=dict( 133 | type='DefaultSampler', 134 | ), 135 | dataloader=dict( 136 | type='DataLoader', 137 | samples_per_gpu=8, 138 | workers_per_gpu=4, 139 | shuffle=True, 140 | drop_last=True, 141 | pin_memory=True, 142 | ), 143 | ), 144 | val=dict( 145 | dataset=dict( 146 | type=dataset_type, 147 | root=dataset_root, 148 | imglist_name='val.txt', 149 | multi_label=multi_label, 150 | ), 151 | transforms=inference['transforms'], 152 | sampler=dict( 153 | type='DefaultSampler', 154 | ), 155 | dataloader=dict( 156 | type='DataLoader', 157 | samples_per_gpu=8, 158 | workers_per_gpu=4, 159 | shuffle=False, 160 | drop_last=False, 161 | pin_memory=True, 162 | ), 163 | ), 164 | ), 165 | resume=None, 166 | criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label), 167 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001), 168 | lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs), 169 | max_epochs=max_epochs, 170 | trainval_ratio=1, 171 | log_interval=10, 172 | snapshot_interval=5, 173 | save_best=True, 174 | ) 175 | -------------------------------------------------------------------------------- /semantic segmentation/configs/voc_deeplabv3.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # 1. configuration for inference 4 | nclasses = 21 5 | ignore_label = 255 6 | multi_label = False 7 | 8 | crop_size_h, crop_size_w = 513, 513 9 | test_size_h, test_size_w = 513, 513 10 | image_pad_value = (123.675, 116.280, 103.530) 11 | 12 | img_norm_cfg = dict( 13 | max_pixel_value=255.0, 14 | std=(0.229, 0.224, 0.225), 15 | mean=(0.485, 0.456, 0.406), 16 | ) 17 | norm_cfg = dict(type='SyncBN') 18 | 19 | inference = dict( 20 | multi_label=multi_label, 21 | transforms=[ 22 | dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w, 23 | value=image_pad_value, mask_value=ignore_label), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='ToTensor'), 26 | ], 27 | model=dict( 28 | # model/encoder 29 | encoder=dict( 30 | backbone=dict( 31 | type='MYResNet', 32 | arch='resnet34', 33 | replace_stride_with_dilation=[False, False, False], 34 | layer_stride = [1, 2, 2, 1], 35 | # multi_grid=[1, 2, 4], 36 | norm_cfg=norm_cfg, 37 | bit=32, 38 | ), 39 | ), 40 | collect=dict(type='CollectBlock', from_layer='c5'), 41 | # model/head 42 | head=dict( 43 | type='Head', 44 | no_convs=True, 45 | upsample=dict( 46 | type='Upsample', 47 | scale_factor=16, 48 | scale_bias=-15, 49 | mode='bilinear', 50 | align_corners=True, 51 | ), 52 | ), 53 | ), 54 | ) 55 | 56 | # 2. configuration for train/test 57 | root_workdir = 'workdir' 58 | dataset_type = 'VOCDataset' 59 | dataset_root = 'data/VOCdevkit/VOC2012/' 60 | 61 | common = dict( 62 | seed=0, 63 | logger=dict( 64 | handlers=( 65 | dict(type='StreamHandler', level='INFO'), 66 | dict(type='FileHandler', level='INFO'), 67 | ), 68 | ), 69 | cudnn_deterministic=False, 70 | cudnn_benchmark=True, 71 | metrics=[ 72 | dict(type='IoU', num_classes=nclasses), 73 | dict(type='MIoU', num_classes=nclasses, average='equal'), 74 | ], 75 | dist_params=dict(backend='nccl'), 76 | ) 77 | 78 | ## 2.1 configuration for test 79 | test = dict( 80 | data=dict( 81 | dataset=dict( 82 | type=dataset_type, 83 | root=dataset_root, 84 | imglist_name='val.txt', 85 | multi_label=multi_label, 86 | ), 87 | transforms=inference['transforms'], 88 | sampler=dict( 89 | type='DefaultSampler', 90 | ), 91 | dataloader=dict( 92 | type='DataLoader', 93 | samples_per_gpu=4, 94 | workers_per_gpu=4, 95 | shuffle=False, 96 | drop_last=False, 97 | pin_memory=True, 98 | ), 99 | ), 100 | # tta=dict( 101 | # scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 102 | # biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75], 103 | # flip=True, 104 | # ), 105 | ) 106 | 107 | ## 2.2 configuration for train 108 | max_epochs = 50 109 | 110 | train = dict( 111 | data=dict( 112 | train=dict( 113 | dataset=dict( 114 | type=dataset_type, 115 | root=dataset_root, 116 | imglist_name='trainaug.txt', 117 | multi_label=multi_label, 118 | ), 119 | transforms=[ 120 | dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25, 121 | interpolation=cv2.INTER_LINEAR), 122 | dict(type='PadIfNeeded', min_height=crop_size_h, 123 | min_width=crop_size_w, value=image_pad_value, 124 | mask_value=ignore_label), 125 | dict(type='RandomCrop', height=crop_size_h, width=crop_size_w), 126 | dict(type='HorizontalFlip', p=0.5), 127 | dict(type='Normalize', **img_norm_cfg), 128 | dict(type='ToTensor'), 129 | ], 130 | sampler=dict( 131 | type='DefaultSampler', 132 | ), 133 | dataloader=dict( 134 | type='DataLoader', 135 | samples_per_gpu=8, 136 | workers_per_gpu=4, 137 | shuffle=True, 138 | drop_last=True, 139 | pin_memory=True, 140 | ), 141 | ), 142 | val=dict( 143 | dataset=dict( 144 | type=dataset_type, 145 | root=dataset_root, 146 | imglist_name='val.txt', 147 | multi_label=multi_label, 148 | ), 149 | transforms=inference['transforms'], 150 | sampler=dict( 151 | type='DefaultSampler', 152 | ), 153 | dataloader=dict( 154 | type='DataLoader', 155 | samples_per_gpu=8, 156 | workers_per_gpu=4, 157 | shuffle=False, 158 | drop_last=False, 159 | pin_memory=True, 160 | ), 161 | ), 162 | ), 163 | resume=None, 164 | criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label), 165 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001), 166 | lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs), 167 | max_epochs=max_epochs, 168 | trainval_ratio=1, 169 | log_interval=10, 170 | snapshot_interval=5, 171 | save_best=True, 172 | ) 173 | -------------------------------------------------------------------------------- /semantic segmentation/configs/voc_deeplabv3_T15.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # 1. configuration for inference 4 | nclasses = 21 5 | ignore_label = 255 6 | multi_label = False 7 | 8 | crop_size_h, crop_size_w = 513, 513 9 | test_size_h, test_size_w = 513, 513 10 | image_pad_value = (123.675, 116.280, 103.530) 11 | 12 | img_norm_cfg = dict( 13 | max_pixel_value=255.0, 14 | std=(0.229, 0.224, 0.225), 15 | mean=(0.485, 0.456, 0.406), 16 | ) 17 | norm_cfg = dict(type='SyncBN') 18 | 19 | inference = dict( 20 | multi_label=multi_label, 21 | transforms=[ 22 | dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w, 23 | value=image_pad_value, mask_value=ignore_label), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='ToTensor'), 26 | ], 27 | model=dict( 28 | # model/encoder 29 | encoder=dict( 30 | backbone=dict( 31 | type='MYSResNet', 32 | arch='resnet34', 33 | replace_stride_with_dilation=[False, False, False], 34 | layer_stride = [1, 2, 2, 1], 35 | # multi_grid=[1, 2, 4], 36 | norm_cfg=norm_cfg, 37 | bit=4, 38 | pretrain=False, 39 | ), 40 | ), 41 | collect=dict(type='CollectBlock', from_layer='c5'), 42 | # model/head 43 | head=dict( 44 | type='Head', 45 | no_convs=True, 46 | upsample=dict( 47 | type='Upsample', 48 | scale_factor=16, 49 | scale_bias=-15, 50 | mode='bilinear', 51 | align_corners=True, 52 | ), 53 | ), 54 | ), 55 | ) 56 | 57 | # 2. configuration for train/test 58 | root_workdir = 'workdir' 59 | dataset_type = 'VOCDataset' 60 | dataset_root = 'data/VOCdevkit/VOC2012/' 61 | 62 | common = dict( 63 | seed=0, 64 | logger=dict( 65 | handlers=( 66 | dict(type='StreamHandler', level='INFO'), 67 | dict(type='FileHandler', level='INFO'), 68 | ), 69 | ), 70 | cudnn_deterministic=False, 71 | cudnn_benchmark=True, 72 | metrics=[ 73 | dict(type='IoU', num_classes=nclasses), 74 | dict(type='MIoU', num_classes=nclasses, average='equal'), 75 | ], 76 | dist_params=dict(backend='nccl'), 77 | ) 78 | 79 | ## 2.1 configuration for test 80 | test = dict( 81 | data=dict( 82 | dataset=dict( 83 | type=dataset_type, 84 | root=dataset_root, 85 | imglist_name='val.txt', 86 | multi_label=multi_label, 87 | ), 88 | transforms=inference['transforms'], 89 | sampler=dict( 90 | type='DefaultSampler', 91 | ), 92 | dataloader=dict( 93 | type='DataLoader', 94 | samples_per_gpu=1, 95 | workers_per_gpu=4, 96 | shuffle=False, 97 | drop_last=False, 98 | pin_memory=True, 99 | ), 100 | ), 101 | # tta=dict( 102 | # scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 103 | # biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75], 104 | # flip=True, 105 | # ), 106 | ) 107 | 108 | ## 2.2 configuration for train 109 | max_epochs = 50 110 | 111 | train = dict( 112 | data=dict( 113 | train=dict( 114 | dataset=dict( 115 | type=dataset_type, 116 | root=dataset_root, 117 | imglist_name='trainaug.txt', 118 | multi_label=multi_label, 119 | ), 120 | transforms=[ 121 | dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25, 122 | interpolation=cv2.INTER_LINEAR), 123 | dict(type='PadIfNeeded', min_height=crop_size_h, 124 | min_width=crop_size_w, value=image_pad_value, 125 | mask_value=ignore_label), 126 | dict(type='RandomCrop', height=crop_size_h, width=crop_size_w), 127 | dict(type='HorizontalFlip', p=0.5), 128 | dict(type='Normalize', **img_norm_cfg), 129 | dict(type='ToTensor'), 130 | ], 131 | sampler=dict( 132 | type='DefaultSampler', 133 | ), 134 | dataloader=dict( 135 | type='DataLoader', 136 | samples_per_gpu=8, 137 | workers_per_gpu=4, 138 | shuffle=True, 139 | drop_last=True, 140 | pin_memory=True, 141 | ), 142 | ), 143 | val=dict( 144 | dataset=dict( 145 | type=dataset_type, 146 | root=dataset_root, 147 | imglist_name='val.txt', 148 | multi_label=multi_label, 149 | ), 150 | transforms=inference['transforms'], 151 | sampler=dict( 152 | type='DefaultSampler', 153 | ), 154 | dataloader=dict( 155 | type='DataLoader', 156 | samples_per_gpu=8, 157 | workers_per_gpu=4, 158 | shuffle=False, 159 | drop_last=False, 160 | pin_memory=True, 161 | ), 162 | ), 163 | ), 164 | resume=None, 165 | criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label), 166 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001), 167 | lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs), 168 | max_epochs=max_epochs, 169 | trainval_ratio=1, 170 | log_interval=10, 171 | snapshot_interval=5, 172 | save_best=True, 173 | ) 174 | -------------------------------------------------------------------------------- /semantic segmentation/configs/voc_deeplabv3_T3.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # 1. configuration for inference 4 | nclasses = 21 5 | ignore_label = 255 6 | multi_label = False 7 | 8 | crop_size_h, crop_size_w = 513, 513 9 | test_size_h, test_size_w = 513, 513 10 | image_pad_value = (123.675, 116.280, 103.530) 11 | 12 | img_norm_cfg = dict( 13 | max_pixel_value=255.0, 14 | std=(0.229, 0.224, 0.225), 15 | mean=(0.485, 0.456, 0.406), 16 | ) 17 | norm_cfg = dict(type='SyncBN') 18 | 19 | inference = dict( 20 | multi_label=multi_label, 21 | transforms=[ 22 | dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w, 23 | value=image_pad_value, mask_value=ignore_label), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='ToTensor'), 26 | ], 27 | model=dict( 28 | # model/encoder 29 | encoder=dict( 30 | backbone=dict( 31 | type='MYSResNet', 32 | arch='resnet34', 33 | replace_stride_with_dilation=[False, False, False], 34 | layer_stride = [1, 2, 2, 1], 35 | # multi_grid=[1, 2, 4], 36 | norm_cfg=norm_cfg, 37 | bit=2, 38 | pretrain=False, 39 | ), 40 | ), 41 | collect=dict(type='CollectBlock', from_layer='c5'), 42 | # model/head 43 | head=dict( 44 | type='Head', 45 | no_convs=True, 46 | upsample=dict( 47 | type='Upsample', 48 | scale_factor=16, 49 | scale_bias=-15, 50 | mode='bilinear', 51 | align_corners=True, 52 | ), 53 | ), 54 | ), 55 | ) 56 | 57 | # 2. configuration for train/test 58 | root_workdir = 'workdir' 59 | dataset_type = 'VOCDataset' 60 | dataset_root = 'data/VOCdevkit/VOC2012/' 61 | 62 | common = dict( 63 | seed=0, 64 | logger=dict( 65 | handlers=( 66 | dict(type='StreamHandler', level='INFO'), 67 | dict(type='FileHandler', level='INFO'), 68 | ), 69 | ), 70 | cudnn_deterministic=False, 71 | cudnn_benchmark=True, 72 | metrics=[ 73 | dict(type='IoU', num_classes=nclasses), 74 | dict(type='MIoU', num_classes=nclasses, average='equal'), 75 | ], 76 | dist_params=dict(backend='nccl'), 77 | ) 78 | 79 | ## 2.1 configuration for test 80 | test = dict( 81 | data=dict( 82 | dataset=dict( 83 | type=dataset_type, 84 | root=dataset_root, 85 | imglist_name='val.txt', 86 | multi_label=multi_label, 87 | ), 88 | transforms=inference['transforms'], 89 | sampler=dict( 90 | type='DefaultSampler', 91 | ), 92 | dataloader=dict( 93 | type='DataLoader', 94 | samples_per_gpu=1, 95 | workers_per_gpu=4, 96 | shuffle=False, 97 | drop_last=False, 98 | pin_memory=True, 99 | ), 100 | ), 101 | # tta=dict( 102 | # scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 103 | # biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75], 104 | # flip=True, 105 | # ), 106 | ) 107 | 108 | ## 2.2 configuration for train 109 | max_epochs = 50 110 | 111 | train = dict( 112 | data=dict( 113 | train=dict( 114 | dataset=dict( 115 | type=dataset_type, 116 | root=dataset_root, 117 | imglist_name='trainaug.txt', 118 | multi_label=multi_label, 119 | ), 120 | transforms=[ 121 | dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25, 122 | interpolation=cv2.INTER_LINEAR), 123 | dict(type='PadIfNeeded', min_height=crop_size_h, 124 | min_width=crop_size_w, value=image_pad_value, 125 | mask_value=ignore_label), 126 | dict(type='RandomCrop', height=crop_size_h, width=crop_size_w), 127 | dict(type='HorizontalFlip', p=0.5), 128 | dict(type='Normalize', **img_norm_cfg), 129 | dict(type='ToTensor'), 130 | ], 131 | sampler=dict( 132 | type='DefaultSampler', 133 | ), 134 | dataloader=dict( 135 | type='DataLoader', 136 | samples_per_gpu=8, 137 | workers_per_gpu=4, 138 | shuffle=True, 139 | drop_last=True, 140 | pin_memory=True, 141 | ), 142 | ), 143 | val=dict( 144 | dataset=dict( 145 | type=dataset_type, 146 | root=dataset_root, 147 | imglist_name='val.txt', 148 | multi_label=multi_label, 149 | ), 150 | transforms=inference['transforms'], 151 | sampler=dict( 152 | type='DefaultSampler', 153 | ), 154 | dataloader=dict( 155 | type='DataLoader', 156 | samples_per_gpu=8, 157 | workers_per_gpu=4, 158 | shuffle=False, 159 | drop_last=False, 160 | pin_memory=True, 161 | ), 162 | ), 163 | ), 164 | resume=None, 165 | criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label), 166 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001), 167 | lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs), 168 | max_epochs=max_epochs, 169 | trainval_ratio=1, 170 | log_interval=10, 171 | snapshot_interval=5, 172 | save_best=True, 173 | ) 174 | -------------------------------------------------------------------------------- /semantic segmentation/configs/voc_deeplabv3_T7.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # 1. configuration for inference 4 | nclasses = 21 5 | ignore_label = 255 6 | multi_label = False 7 | 8 | crop_size_h, crop_size_w = 513, 513 9 | test_size_h, test_size_w = 513, 513 10 | image_pad_value = (123.675, 116.280, 103.530) 11 | 12 | img_norm_cfg = dict( 13 | max_pixel_value=255.0, 14 | std=(0.229, 0.224, 0.225), 15 | mean=(0.485, 0.456, 0.406), 16 | ) 17 | norm_cfg = dict(type='SyncBN') 18 | 19 | inference = dict( 20 | multi_label=multi_label, 21 | transforms=[ 22 | dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w, 23 | value=image_pad_value, mask_value=ignore_label), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='ToTensor'), 26 | ], 27 | model=dict( 28 | # model/encoder 29 | encoder=dict( 30 | backbone=dict( 31 | type='MYSResNet', 32 | arch='resnet34', 33 | replace_stride_with_dilation=[False, False, False], 34 | layer_stride = [1, 2, 2, 1], 35 | # multi_grid=[1, 2, 4], 36 | norm_cfg=norm_cfg, 37 | bit=3, 38 | pretrain=False, 39 | ), 40 | ), 41 | collect=dict(type='CollectBlock', from_layer='c5'), 42 | # model/head 43 | head=dict( 44 | type='Head', 45 | no_convs=True, 46 | upsample=dict( 47 | type='Upsample', 48 | scale_factor=16, 49 | scale_bias=-15, 50 | mode='bilinear', 51 | align_corners=True, 52 | ), 53 | ), 54 | ), 55 | ) 56 | 57 | # 2. configuration for train/test 58 | root_workdir = 'workdir' 59 | dataset_type = 'VOCDataset' 60 | dataset_root = 'data/VOCdevkit/VOC2012/' 61 | 62 | common = dict( 63 | seed=0, 64 | logger=dict( 65 | handlers=( 66 | dict(type='StreamHandler', level='INFO'), 67 | dict(type='FileHandler', level='INFO'), 68 | ), 69 | ), 70 | cudnn_deterministic=False, 71 | cudnn_benchmark=True, 72 | metrics=[ 73 | dict(type='IoU', num_classes=nclasses), 74 | dict(type='MIoU', num_classes=nclasses, average='equal'), 75 | ], 76 | dist_params=dict(backend='nccl'), 77 | ) 78 | 79 | ## 2.1 configuration for test 80 | test = dict( 81 | data=dict( 82 | dataset=dict( 83 | type=dataset_type, 84 | root=dataset_root, 85 | imglist_name='val.txt', 86 | multi_label=multi_label, 87 | ), 88 | transforms=inference['transforms'], 89 | sampler=dict( 90 | type='DefaultSampler', 91 | ), 92 | dataloader=dict( 93 | type='DataLoader', 94 | samples_per_gpu=1, 95 | workers_per_gpu=4, 96 | shuffle=False, 97 | drop_last=False, 98 | pin_memory=True, 99 | ), 100 | ), 101 | # tta=dict( 102 | # scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 103 | # biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75], 104 | # flip=True, 105 | # ), 106 | ) 107 | 108 | ## 2.2 configuration for train 109 | max_epochs = 50 110 | 111 | train = dict( 112 | data=dict( 113 | train=dict( 114 | dataset=dict( 115 | type=dataset_type, 116 | root=dataset_root, 117 | imglist_name='trainaug.txt', 118 | multi_label=multi_label, 119 | ), 120 | transforms=[ 121 | dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25, 122 | interpolation=cv2.INTER_LINEAR), 123 | dict(type='PadIfNeeded', min_height=crop_size_h, 124 | min_width=crop_size_w, value=image_pad_value, 125 | mask_value=ignore_label), 126 | dict(type='RandomCrop', height=crop_size_h, width=crop_size_w), 127 | dict(type='HorizontalFlip', p=0.5), 128 | dict(type='Normalize', **img_norm_cfg), 129 | dict(type='ToTensor'), 130 | ], 131 | sampler=dict( 132 | type='DefaultSampler', 133 | ), 134 | dataloader=dict( 135 | type='DataLoader', 136 | samples_per_gpu=8, 137 | workers_per_gpu=4, 138 | shuffle=True, 139 | drop_last=True, 140 | pin_memory=True, 141 | ), 142 | ), 143 | val=dict( 144 | dataset=dict( 145 | type=dataset_type, 146 | root=dataset_root, 147 | imglist_name='val.txt', 148 | multi_label=multi_label, 149 | ), 150 | transforms=inference['transforms'], 151 | sampler=dict( 152 | type='DefaultSampler', 153 | ), 154 | dataloader=dict( 155 | type='DataLoader', 156 | samples_per_gpu=8, 157 | workers_per_gpu=4, 158 | shuffle=False, 159 | drop_last=False, 160 | pin_memory=True, 161 | ), 162 | ), 163 | ), 164 | resume=None, 165 | criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label), 166 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001), 167 | lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs), 168 | max_epochs=max_epochs, 169 | trainval_ratio=1, 170 | log_interval=10, 171 | snapshot_interval=5, 172 | save_best=True, 173 | ) 174 | -------------------------------------------------------------------------------- /semantic segmentation/configs/voc_deeplabvr.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # 1. configuration for inference 4 | nclasses = 21 5 | ignore_label = 255 6 | multi_label = False 7 | 8 | crop_size_h, crop_size_w = 513, 513 9 | test_size_h, test_size_w = 513, 513 10 | image_pad_value = (123.675, 116.280, 103.530) 11 | 12 | img_norm_cfg = dict( 13 | max_pixel_value=255.0, 14 | std=(0.229, 0.224, 0.225), 15 | mean=(0.485, 0.456, 0.406), 16 | ) 17 | norm_cfg = dict(type='SyncBN') 18 | 19 | inference = dict( 20 | multi_label=multi_label, 21 | transforms=[ 22 | dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w, 23 | value=image_pad_value, mask_value=ignore_label), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='ToTensor'), 26 | ], 27 | model=dict( 28 | # model/encoder 29 | encoder=dict( 30 | backbone=dict( 31 | type='MYResNet', 32 | arch='resnet18', 33 | replace_stride_with_dilation=[False, False, False], 34 | layer_stride = [1, 2, 2, 1], 35 | # multi_grid=[1, 2, 4], 36 | norm_cfg=norm_cfg, 37 | bit=32, 38 | ), 39 | ), 40 | collect=dict(type='CollectBlock', from_layer='c5'), 41 | # model/head 42 | head=dict( 43 | type='Head', 44 | no_convs=True, 45 | upsample=dict( 46 | type='Upsample', 47 | scale_factor=16, 48 | scale_bias=-15, 49 | mode='bilinear', 50 | align_corners=True, 51 | ), 52 | ), 53 | ), 54 | ) 55 | 56 | # 2. configuration for train/test 57 | root_workdir = 'workdir' 58 | dataset_type = 'VOCDataset' 59 | dataset_root = 'data/VOCdevkit/VOC2012/' 60 | 61 | common = dict( 62 | seed=0, 63 | logger=dict( 64 | handlers=( 65 | dict(type='StreamHandler', level='INFO'), 66 | dict(type='FileHandler', level='INFO'), 67 | ), 68 | ), 69 | cudnn_deterministic=False, 70 | cudnn_benchmark=True, 71 | metrics=[ 72 | dict(type='IoU', num_classes=nclasses), 73 | dict(type='MIoU', num_classes=nclasses, average='equal'), 74 | ], 75 | dist_params=dict(backend='nccl'), 76 | ) 77 | 78 | ## 2.1 configuration for test 79 | test = dict( 80 | data=dict( 81 | dataset=dict( 82 | type=dataset_type, 83 | root=dataset_root, 84 | imglist_name='val.txt', 85 | multi_label=multi_label, 86 | ), 87 | transforms=inference['transforms'], 88 | sampler=dict( 89 | type='DefaultSampler', 90 | ), 91 | dataloader=dict( 92 | type='DataLoader', 93 | samples_per_gpu=4, 94 | workers_per_gpu=4, 95 | shuffle=False, 96 | drop_last=False, 97 | pin_memory=True, 98 | ), 99 | ), 100 | # tta=dict( 101 | # scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 102 | # biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75], 103 | # flip=True, 104 | # ), 105 | ) 106 | 107 | ## 2.2 configuration for train 108 | max_epochs = 50 109 | 110 | train = dict( 111 | data=dict( 112 | train=dict( 113 | dataset=dict( 114 | type=dataset_type, 115 | root=dataset_root, 116 | imglist_name='trainaug.txt', 117 | multi_label=multi_label, 118 | ), 119 | transforms=[ 120 | dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25, 121 | interpolation=cv2.INTER_LINEAR), 122 | dict(type='PadIfNeeded', min_height=crop_size_h, 123 | min_width=crop_size_w, value=image_pad_value, 124 | mask_value=ignore_label), 125 | dict(type='RandomCrop', height=crop_size_h, width=crop_size_w), 126 | dict(type='HorizontalFlip', p=0.5), 127 | dict(type='Normalize', **img_norm_cfg), 128 | dict(type='ToTensor'), 129 | ], 130 | sampler=dict( 131 | type='DefaultSampler', 132 | ), 133 | dataloader=dict( 134 | type='DataLoader', 135 | samples_per_gpu=8, 136 | workers_per_gpu=4, 137 | shuffle=True, 138 | drop_last=True, 139 | pin_memory=True, 140 | ), 141 | ), 142 | val=dict( 143 | dataset=dict( 144 | type=dataset_type, 145 | root=dataset_root, 146 | imglist_name='val.txt', 147 | multi_label=multi_label, 148 | ), 149 | transforms=inference['transforms'], 150 | sampler=dict( 151 | type='DefaultSampler', 152 | ), 153 | dataloader=dict( 154 | type='DataLoader', 155 | samples_per_gpu=8, 156 | workers_per_gpu=4, 157 | shuffle=False, 158 | drop_last=False, 159 | pin_memory=True, 160 | ), 161 | ), 162 | ), 163 | resume=None, 164 | criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label), 165 | optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001), 166 | lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs), 167 | max_epochs=max_epochs, 168 | trainval_ratio=1, 169 | log_interval=10, 170 | snapshot_interval=5, 171 | save_best=True, 172 | ) 173 | -------------------------------------------------------------------------------- /semantic segmentation/tools/decode.py: -------------------------------------------------------------------------------- 1 | # https://gist.github.com/wllhf/a4533e0adebe57e3ed06d4b50c8419ae 2 | # https://github.com/tensorflow/models/blob/master/research/deeplab/utils/get_dataset_colormap.py 3 | # https://github.com/tensorflow/models/blob/master/research/deeplab/datasets/remove_gt_colormap.py 4 | 5 | import glob 6 | import numpy as np 7 | import os 8 | from PIL import Image 9 | 10 | 11 | def color_map(N=256, normalized=False): 12 | def bitget(byteval, idx): 13 | return ((byteval & (1 << idx)) != 0) 14 | 15 | dtype = 'float32' if normalized else 'uint8' 16 | cmap = np.zeros((N, 3), dtype=dtype) 17 | for i in range(N): 18 | r = g = b = 0 19 | c = i 20 | for j in range(8): 21 | r = r | (bitget(c, 0) << 7 - j) 22 | g = g | (bitget(c, 1) << 7 - j) 23 | b = b | (bitget(c, 2) << 7 - j) 24 | c = c >> 3 25 | 26 | cmap[i] = np.array([r, g, b]) 27 | 28 | cmap = cmap / 255 if normalized else cmap 29 | return cmap 30 | 31 | 32 | def main(): 33 | root = 'workpiece/VOC2012' 34 | src_name = 'EncodeSegmentationClass' 35 | dst_name = 'DecodeSegmentationClass' 36 | src_dir = '%s/%s' % (root, src_name) 37 | dst_dir = '%s/%s' % (root, dst_name) 38 | os.makedirs(dst_dir) 39 | items = glob.glob('%s/*.png' % src_dir) 40 | total = len(items) 41 | for idx, item in enumerate(items): 42 | print('%d/%d' % (idx, total)) 43 | new_item = item.replace(src_name, dst_name) 44 | target = np.array(Image.open(item))[:, :, np.newaxis] 45 | cmap = color_map()[:, np.newaxis, :] 46 | new_im = np.dot(target == 0, cmap[0]) 47 | for i in range(1, cmap.shape[0]): 48 | new_im += np.dot(target == i, cmap[i]) 49 | new_im = Image.fromarray(new_im.astype(np.uint8)) 50 | new_im.save(new_item) 51 | 52 | 53 | if __name__ == '__main__': 54 | main() 55 | -------------------------------------------------------------------------------- /semantic segmentation/tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if (($# < 3)); then 4 | echo "Uasage: bash tools/dist_test.sh config_file checkpoint gpus_to_use" 5 | exit 1 6 | fi 7 | 8 | CONFIG="$1" 9 | CHECKPOINT="$2" 10 | GPUS="$3" 11 | 12 | IFS=', ' read -r -a gpus <<<"${GPUS}" 13 | NGPUS="${#gpus[@]}" 14 | PORT="$((29400 + RANDOM % 100))" 15 | 16 | export CUDA_VISIBLE_DEVICES=${GPUS} 17 | 18 | PYTHONPATH="$(dirname "$0")/..":${PYTHONPATH} \ 19 | python -m torch.distributed.launch \ 20 | --nproc_per_node="${NGPUS}" \ 21 | --master_port=${PORT} \ 22 | "$(dirname "$0")"/test.py \ 23 | "$CONFIG" \ 24 | "$CHECKPOINT" \ 25 | --distribute \ 26 | "${@:4}" 27 | -------------------------------------------------------------------------------- /semantic segmentation/tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if (($# < 2)); then 4 | echo "Uasage: bash tools/dist_train.sh config_file gpus_to_use" 5 | exit 1 6 | fi 7 | CONFIG="$1" 8 | GPUS="$2" 9 | 10 | IFS=', ' read -r -a gpus <<<"${GPUS}" 11 | NGPUS="${#gpus[@]}" 12 | PORT="$((29400 + RANDOM % 100))" 13 | 14 | export CUDA_VISIBLE_DEVICES=${GPUS} 15 | 16 | PYTHONPATH="$(dirname "$0")/..":${PYTHONPATH} \ 17 | python -m torch.distributed.launch \ 18 | --nproc_per_node="${NGPUS}" \ 19 | --master_port=${PORT} \ 20 | "$(dirname "$0")"/train.py \ 21 | "$CONFIG" \ 22 | --distribute \ 23 | "${@:3}" 24 | -------------------------------------------------------------------------------- /semantic segmentation/tools/encode_voc12.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import numpy as np 3 | import os 4 | from PIL import Image 5 | 6 | 7 | def main(): 8 | root = 'VOCdevkit/VOC2012' 9 | src_name = 'SegmentationClass' 10 | dst_name = 'EncodeSegmentationClassPart' 11 | src_dir = '%s/%s' % (root, src_name) 12 | dst_dir = '%s/%s' % (root, dst_name) 13 | os.makedirs(dst_dir) 14 | items = glob.glob('%s/*.png' % src_dir) 15 | total = len(items) 16 | for idx, item in enumerate(items): 17 | print('%d/%d' % (idx, total)) 18 | new_item = item.replace(src_name, dst_name) 19 | new_mask = np.array(Image.open(item)) 20 | Image.fromarray(new_mask.astype(dtype=np.uint8)).save(new_item, 'PNG') 21 | 22 | 23 | if __name__ == '__main__': 24 | main() 25 | -------------------------------------------------------------------------------- /semantic segmentation/tools/encode_voc12_aug.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import numpy as np 3 | import os 4 | import scipy.io as io 5 | from PIL import Image 6 | 7 | 8 | def main(): 9 | root = 'benchmark_RELEASE/dataset' 10 | src_name = 'cls' 11 | dst_name = 'encode_cls' 12 | src_dir = '%s/%s' % (root, src_name) 13 | dst_dir = '%s/%s' % (root, dst_name) 14 | os.makedirs(dst_dir) 15 | items = glob.glob('%s/*.mat' % src_dir) 16 | total = len(items) 17 | for idx, item in enumerate(items): 18 | print('%d/%d' % (idx, total)) 19 | data = io.loadmat(item) 20 | mask = data['GTcls'][0]['Segmentation'][0].astype(np.int32) 21 | new_item = item.replace(src_name, dst_name).replace('.mat', '.png') 22 | Image.fromarray(mask.astype(dtype=np.uint8)).save(new_item, 'PNG') 23 | 24 | 25 | if __name__ == '__main__': 26 | main() 27 | -------------------------------------------------------------------------------- /semantic segmentation/tools/test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | 5 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../')) 6 | 7 | from vedaseg.runners import TestRunner 8 | from vedaseg.utils import Config 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser(description='Test a segmentation model') 13 | parser.add_argument('config', type=str, help='config file path') 14 | parser.add_argument('checkpoint', type=str, help='checkpoint file path') 15 | parser.add_argument('--distribute', default=False, action='store_true') 16 | parser.add_argument('--local_rank', type=int, default=0) 17 | args = parser.parse_args() 18 | if 'LOCAL_RANK' not in os.environ: 19 | os.environ['LOCAL_RANK'] = str(args.local_rank) 20 | args = parser.parse_args() 21 | return args 22 | 23 | 24 | def main(): 25 | args = parse_args() 26 | 27 | cfg_path = args.config 28 | cfg = Config.fromfile(cfg_path) 29 | 30 | _, fullname = os.path.split(cfg_path) 31 | fname, ext = os.path.splitext(fullname) 32 | 33 | root_workdir = cfg.pop('root_workdir') 34 | workdir = os.path.join(root_workdir, fname) 35 | os.makedirs(workdir, exist_ok=True) 36 | 37 | test_cfg = cfg['test'] 38 | inference_cfg = cfg['inference'] 39 | common_cfg = cfg['common'] 40 | common_cfg['workdir'] = workdir 41 | common_cfg['distribute'] = args.distribute 42 | 43 | runner = TestRunner(test_cfg, inference_cfg, common_cfg) 44 | runner.load_checkpoint(args.checkpoint) 45 | runner() 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /semantic segmentation/tools/torch2onnx.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | 5 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../')) 6 | 7 | import torch 8 | from volksdep.converters import torch2onnx 9 | 10 | from vedaseg.runners import InferenceRunner 11 | from vedaseg.utils import Config 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='Convert to Onnx model.') 16 | parser.add_argument('config', help='config file path') 17 | parser.add_argument('checkpoint', help='checkpoint file path') 18 | parser.add_argument('out', help='output onnx file name') 19 | parser.add_argument('--dummy_input_shape', default='3,800,1344', 20 | type=str, help='model input shape like 3,800,1344. ' 21 | 'Shape format is CxHxW') 22 | parser.add_argument('--dynamic_shape', default=False, action='store_true', 23 | help='whether to use dynamic shape') 24 | parser.add_argument('--opset_version', default=9, type=int, 25 | help='onnx opset version') 26 | parser.add_argument('--do_constant_folding', default=False, 27 | action='store_true', 28 | help='whether to apply constant-folding optimization') 29 | parser.add_argument('--verbose', default=False, action='store_true', 30 | help='whether print convert info') 31 | 32 | args = parser.parse_args() 33 | 34 | return args 35 | 36 | 37 | def main(): 38 | args = parse_args() 39 | cfg = Config.fromfile(args.config) 40 | 41 | inference_cfg = cfg['inference'] 42 | common_cfg = cfg.get('common') 43 | 44 | runner = InferenceRunner(inference_cfg, common_cfg) 45 | assert runner.use_gpu, 'Please use valid gpu to export model.' 46 | runner.load_checkpoint(args.checkpoint) 47 | model = runner.model 48 | 49 | shape = map(int, args.dummy_input_shape.split(',')) 50 | dummy_input = torch.randn(1, *shape) 51 | 52 | if args.dynamic_shape: 53 | print(f'Convert to Onnx with dynamic input shape and ' 54 | f'opset version {args.opset_version}') 55 | else: 56 | print(f'Convert to Onnx with constant input shape ' 57 | f'{args.dummy_input_shape} and ' 58 | f'opset version {args.opset_version}') 59 | torch2onnx(model, dummy_input, args.out, dynamic_shape=args.dynamic_shape, 60 | opset_version=args.opset_version, 61 | do_constant_folding=args.do_constant_folding, 62 | verbose=args.verbose) 63 | print(f'Convert successfully, saved onnx file: {os.path.abspath(args.out)}') 64 | 65 | 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /semantic segmentation/tools/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | 5 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../')) 6 | 7 | from vedaseg.runners import TrainRunner 8 | from vedaseg.utils import Config 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser(description='Train a segmentation model') 13 | parser.add_argument('config', type=str, help='config file path') 14 | parser.add_argument('--distribute', default=False, action='store_true') 15 | parser.add_argument('--local_rank', type=int, default=0) 16 | args = parser.parse_args() 17 | if 'LOCAL_RANK' not in os.environ: 18 | os.environ['LOCAL_RANK'] = str(args.local_rank) 19 | 20 | return args 21 | 22 | 23 | def main(): 24 | args = parse_args() 25 | 26 | cfg_path = args.config 27 | cfg = Config.fromfile(cfg_path) 28 | 29 | _, fullname = os.path.split(cfg_path) 30 | fname, ext = os.path.splitext(fullname) 31 | 32 | root_workdir = cfg.pop('root_workdir') 33 | workdir = os.path.join(root_workdir, fname) 34 | os.makedirs(workdir, exist_ok=True) 35 | 36 | train_cfg = cfg['train'] 37 | inference_cfg = cfg['inference'] 38 | common_cfg = cfg['common'] 39 | common_cfg['workdir'] = workdir 40 | common_cfg['distribute'] = args.distribute 41 | 42 | runner = TrainRunner(train_cfg, inference_cfg, common_cfg) 43 | runner() 44 | 45 | 46 | if __name__ == '__main__': 47 | main() 48 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-bmi-lab/Fast-SNN/e4315cd4e74b4e185ab12bbe2dd74bc3fdccc547/semantic segmentation/vedaseg/__init__.py -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/criteria/__init__.py: -------------------------------------------------------------------------------- 1 | from .bce_loss import BCEWithLogitsLoss 2 | from .builder import build_criterion 3 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/criteria/bce_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from .registry import CRITERIA 4 | 5 | 6 | @CRITERIA.register_module 7 | class BCEWithLogitsLoss(nn.Module): 8 | def __init__(self, ignore_index=-1, *args, **kwargs): 9 | super(BCEWithLogitsLoss, self).__init__() 10 | 11 | self.ignore_index = ignore_index 12 | self.loss = nn.BCEWithLogitsLoss(*args, **kwargs) 13 | 14 | def forward(self, pred, target): 15 | 16 | valid_mask = target != self.ignore_index 17 | losses = self.loss(pred[valid_mask], target[valid_mask].float()) 18 | 19 | return losses 20 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/criteria/builder.py: -------------------------------------------------------------------------------- 1 | from ..utils import build_from_cfg 2 | from .registry import CRITERIA 3 | 4 | 5 | def build_criterion(cfg): 6 | criterion = build_from_cfg(cfg, CRITERIA, mode='registry') 7 | return criterion 8 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/criteria/registry.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from vedaseg.utils import Registry 4 | 5 | CRITERIA = Registry('criterion') 6 | 7 | CrossEntropyLoss = nn.CrossEntropyLoss 8 | CRITERIA.register_module(CrossEntropyLoss) 9 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/dataloaders/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_dataloader 2 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/dataloaders/builder.py: -------------------------------------------------------------------------------- 1 | from ..utils import build_from_cfg 2 | from .registry import DATALOADERS 3 | 4 | 5 | def build_dataloader(distributed, num_gpus, cfg, default_args=None): 6 | cfg_ = cfg.copy() 7 | 8 | samples_per_gpu = cfg_.pop('samples_per_gpu') 9 | workers_per_gpu = cfg_.pop('workers_per_gpu') 10 | 11 | if distributed: 12 | batch_size = samples_per_gpu 13 | num_workers = workers_per_gpu 14 | else: 15 | batch_size = num_gpus * samples_per_gpu 16 | num_workers = num_gpus * workers_per_gpu 17 | 18 | cfg_.update({'batch_size': batch_size, 19 | 'num_workers': num_workers}) 20 | 21 | dataloader = build_from_cfg(cfg_, DATALOADERS, default_args) 22 | 23 | return dataloader 24 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/dataloaders/registry.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DataLoader 2 | 3 | from ..utils import Registry 4 | 5 | DATALOADERS = Registry('dataloader') 6 | 7 | DATALOADERS.register_module(DataLoader) -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/dataloaders/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_sampler 2 | from .distributed import DefaultSampler 3 | from .non_distributed import DefaultSampler 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/dataloaders/samplers/builder.py: -------------------------------------------------------------------------------- 1 | from ...utils import build_from_cfg 2 | from .registry import DISTRIBUTED_SAMPLERS, NON_DISTRIBUTED_SAMPLERS 3 | 4 | 5 | def build_sampler(distributed, cfg, default_args=None): 6 | if distributed: 7 | sampler = build_from_cfg(cfg, DISTRIBUTED_SAMPLERS, default_args) 8 | else: 9 | sampler = build_from_cfg(cfg, NON_DISTRIBUTED_SAMPLERS, default_args) 10 | 11 | return sampler 12 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/dataloaders/samplers/distributed.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DistributedSampler 2 | 3 | from ...utils import get_dist_info 4 | from .registry import DISTRIBUTED_SAMPLERS 5 | 6 | 7 | @DISTRIBUTED_SAMPLERS.register_module 8 | class DefaultSampler(DistributedSampler): 9 | """Default distributed sampler.""" 10 | 11 | def __init__(self, dataset, shuffle=True): 12 | rank, num_replicas = get_dist_info() 13 | super().__init__(dataset, num_replicas, rank, shuffle) 14 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/dataloaders/samplers/non_distributed.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Sampler 3 | 4 | from .registry import NON_DISTRIBUTED_SAMPLERS 5 | 6 | 7 | @NON_DISTRIBUTED_SAMPLERS.register_module 8 | class DefaultSampler(Sampler): 9 | """Default non-distributed sampler.""" 10 | 11 | def __init__(self, dataset, shuffle=True): 12 | self.dataset = dataset 13 | self.shuffle = shuffle 14 | 15 | def __iter__(self): 16 | if self.shuffle: 17 | return iter(torch.randperm(len(self.dataset)).tolist()) 18 | else: 19 | return iter(range(len(self.dataset))) 20 | 21 | def __len__(self): 22 | return len(self.dataset) 23 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/dataloaders/samplers/registry.py: -------------------------------------------------------------------------------- 1 | from ...utils import Registry 2 | 3 | DISTRIBUTED_SAMPLERS = Registry('distributed_sampler') 4 | NON_DISTRIBUTED_SAMPLERS = Registry('non_distributed_sampler') 5 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_dataset 2 | from .coco import CocoDataset 3 | from .voc import VOCDataset 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/datasets/base.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Dataset 2 | 3 | 4 | class BaseDataset(Dataset): 5 | """ BaseDataset 6 | """ 7 | CLASSES = None 8 | 9 | PALETTE = None 10 | 11 | def __init__(self, transform=None): 12 | self.transform = transform 13 | 14 | def process(self, image, masks): 15 | if self.transform: 16 | augmented = self.transform(image=image, masks=masks) 17 | return augmented['image'], augmented['masks'] 18 | else: 19 | return image, masks 20 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/datasets/builder.py: -------------------------------------------------------------------------------- 1 | from vedaseg.utils import build_from_cfg 2 | from .registry import DATASETS 3 | 4 | 5 | def build_dataset(cfg, default_args=None): 6 | dataset = build_from_cfg(cfg, DATASETS, default_args) 7 | return dataset 8 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/datasets/coco.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import json 3 | import logging 4 | import numpy as np 5 | import os 6 | from collections import defaultdict 7 | 8 | from vedaseg.datasets.base import BaseDataset 9 | from .registry import DATASETS 10 | 11 | logger = logging.getLogger() 12 | 13 | 14 | @DATASETS.register_module 15 | class CocoDataset(BaseDataset): 16 | def __init__(self, root, ann_file, img_prefix='', transform=None, 17 | multi_label=False): 18 | super().__init__() 19 | self.multi_label = multi_label 20 | self.root = root 21 | self.ann_file = ann_file 22 | self.img_prefix = img_prefix 23 | self.transform = transform 24 | if self.root is not None: 25 | self.img_prefix = os.path.join(self.root, self.img_prefix) 26 | 27 | self.data = json.load( 28 | open(os.path.join(self.root, 'annotations', self.ann_file), 'r')) 29 | 30 | self.load_annotations() 31 | logger.debug('Total of images is {}'.format(len(self.data_infos))) 32 | 33 | def load_annotations(self): 34 | self.cat_ids = [cat['id'] for cat in self.data['categories']] 35 | self.numclass = len(self.cat_ids) 36 | self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)} 37 | 38 | self.img_ids, self.data_infos = [], [] 39 | self.imgToAnns = defaultdict(list) 40 | 41 | for img in self.data['images']: 42 | self.img_ids.append(img['id']) 43 | img['filename'] = os.path.join(self.img_prefix, img['file_name']) 44 | self.data_infos.append(img) 45 | 46 | for ann in self.data['annotations']: 47 | self.imgToAnns[ann['image_id']].append(ann) 48 | 49 | def _parse_ann_info(self, img_info, ann_info): 50 | gt_bboxes = [] 51 | gt_labels = [] 52 | gt_bboxes_ignore = [] 53 | gt_masks_ann = [] 54 | for i, ann in enumerate(ann_info): 55 | if ann.get('ignore', False): 56 | continue 57 | x1, y1, w, h = ann['bbox'] 58 | inter_w = max(0, min(x1 + w, img_info['width']) - max(x1, 0)) 59 | inter_h = max(0, min(y1 + h, img_info['height']) - max(y1, 0)) 60 | if inter_w * inter_h == 0: 61 | continue 62 | if ann['area'] <= 0 or w < 1 or h < 1: 63 | continue 64 | if ann['category_id'] not in self.cat_ids: 65 | continue 66 | bbox = [x1, y1, x1 + w, y1 + h] 67 | if ann.get('iscrowd', False): 68 | gt_bboxes_ignore.append(bbox) 69 | else: 70 | gt_bboxes.append(bbox) 71 | gt_labels.append(self.cat2label[ann['category_id']]) 72 | gt_masks_ann.append(ann['segmentation']) 73 | 74 | if gt_bboxes: 75 | gt_bboxes = np.array(gt_bboxes, dtype=np.float32) 76 | gt_labels = np.array(gt_labels, dtype=np.int64) 77 | else: 78 | gt_bboxes = np.zeros((0, 4), dtype=np.float32) 79 | gt_labels = np.array([], dtype=np.int64) 80 | 81 | if gt_bboxes_ignore: 82 | gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32) 83 | else: 84 | gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) 85 | 86 | seg_map = img_info['filename'].replace('jpg', 'png') 87 | 88 | ann = dict( 89 | bboxes=gt_bboxes, 90 | labels=gt_labels, 91 | bboxes_ignore=gt_bboxes_ignore, 92 | masks=gt_masks_ann, 93 | seg_map=seg_map) 94 | 95 | return ann 96 | 97 | def get_ann_info(self, img_info): 98 | img_id = img_info['id'] 99 | ann_info = [ann for ann in self.imgToAnns[img_id]] 100 | return self._parse_ann_info(img_info, ann_info) 101 | 102 | def generate_mask(self, shape, ann_info): 103 | h, w, c = shape 104 | if self.multi_label: 105 | masks = [np.zeros((h, w), np.uint8) for _ in range(self.numclass)] 106 | for m, l in zip(ann_info['masks'], ann_info['labels']): 107 | for m_ in m: 108 | m_ = np.array(m_).reshape((-1, 1, 2)).astype(np.int32) 109 | cv2.fillPoly(masks[l], [m_], 1) 110 | else: 111 | mask = np.zeros((h, w), np.uint8) 112 | for m, l in zip(ann_info['masks'], ann_info['labels']): 113 | for m_ in m: 114 | m_ = np.array(m_).reshape((-1, 1, 2)).astype(np.int32) 115 | cv2.fillPoly(mask, [m_], int(l + 1)) 116 | masks = [mask] 117 | return masks 118 | 119 | def __getitem__(self, idx): 120 | img_info = self.data_infos[idx] 121 | ann_info = self.get_ann_info(img_info) 122 | 123 | img = cv2.imread(img_info['filename']).astype(np.float32) 124 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 125 | 126 | masks = self.generate_mask(img.shape, ann_info) 127 | image, masks = self.process(img, masks) 128 | return image, masks.long() 129 | 130 | def __len__(self): 131 | return len(self.data_infos) 132 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/datasets/registry.py: -------------------------------------------------------------------------------- 1 | from ..utils import Registry 2 | 3 | DATASETS = Registry('dataset') 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/datasets/voc.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import logging 3 | import numpy as np 4 | import os 5 | 6 | from .base import BaseDataset 7 | from .registry import DATASETS 8 | 9 | logger = logging.getLogger() 10 | 11 | 12 | @DATASETS.register_module 13 | class VOCDataset(BaseDataset): 14 | def __init__(self, root, imglist_name, transform, multi_label=False): 15 | if multi_label: 16 | raise ValueError('multi label training is only ' 17 | 'supported by using COCO data form') 18 | super().__init__() 19 | 20 | imglist_fp = os.path.join(root, 'ImageSets/Segmentation', imglist_name) 21 | self.imglist = self.read_imglist(imglist_fp) 22 | 23 | logger.debug('Total of images is {}'.format(len(self.imglist))) 24 | 25 | self.root = root 26 | self.transform = transform 27 | 28 | def __getitem__(self, idx): 29 | imgname = self.imglist[idx] 30 | img_fp = os.path.join(self.root, 'JPEGImages', imgname) + '.jpg' 31 | mask_fp = os.path.join(self.root, 'EncodeSegmentationClass', 32 | imgname) + '.png' 33 | 34 | img = cv2.imread(img_fp).astype(np.float32) 35 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 36 | 37 | mask = cv2.imread(mask_fp, cv2.IMREAD_GRAYSCALE) 38 | 39 | image, mask = self.process(img, [mask]) 40 | 41 | return image, mask.long() 42 | 43 | def __len__(self): 44 | return len(self.imglist) 45 | 46 | def read_imglist(self, imglist_fp): 47 | ll = [] 48 | with open(imglist_fp, 'r') as fd: 49 | for line in fd: 50 | ll.append(line.strip()) 51 | return ll 52 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/loggers/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_logger 2 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/loggers/builder.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | import time 5 | import torch.distributed as dist 6 | 7 | 8 | def build_logger(cfg, default_args): 9 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) 10 | format_ = '%(asctime)s - %(levelname)s - %(message)s' 11 | 12 | formatter = logging.Formatter(format_) 13 | logger = logging.getLogger() 14 | logger.setLevel(logging.DEBUG) 15 | 16 | if dist.is_available() and dist.is_initialized(): 17 | rank = dist.get_rank() 18 | else: 19 | rank = 0 20 | 21 | for handler in cfg['handlers']: 22 | if handler['type'] == 'StreamHandler': 23 | instance = logging.StreamHandler(sys.stdout) 24 | elif handler['type'] == 'FileHandler': 25 | # only rank 0 will add a FileHandler 26 | if default_args.get('workdir') and rank == 0: 27 | fp = os.path.join(default_args['workdir'], '%s.log' % timestamp) 28 | instance = logging.FileHandler(fp, 'w') 29 | else: 30 | continue 31 | else: 32 | instance = logging.StreamHandler(sys.stdout) 33 | 34 | level = getattr(logging, handler['level']) 35 | 36 | instance.setFormatter(formatter) 37 | if rank == 0: 38 | instance.setLevel(level) 39 | else: 40 | instance.setLevel(logging.ERROR) 41 | 42 | logger.addHandler(instance) 43 | 44 | return logger 45 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/lr_schedulers/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_lr_scheduler 2 | from .poly_lr import PolyLR 3 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/lr_schedulers/base.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import weakref 3 | from functools import wraps 4 | from torch.optim import Optimizer 5 | 6 | 7 | class _Iter_LRScheduler(object): 8 | """ 9 | """ 10 | 11 | _iter_based = True 12 | 13 | def __init__(self, optimizer, niter_per_epoch, last_iter=-1): 14 | if not isinstance(optimizer, Optimizer): 15 | raise TypeError('{} is not an Optimizer'.format( 16 | type(optimizer).__name__)) 17 | self.optimizer = optimizer 18 | self.niter_per_epoch = niter_per_epoch 19 | if last_iter == -1: 20 | for group in optimizer.param_groups: 21 | group.setdefault('initial_lr', group['lr']) 22 | last_iter = 0 23 | else: 24 | for i, group in enumerate(optimizer.param_groups): 25 | if 'initial_lr' not in group: 26 | raise KeyError("param 'initial_lr' is not specified in " 27 | "param_groups[{}] when resuming an " 28 | "optimizer".format(i)) 29 | self.base_lrs = list( 30 | map(lambda group: group['initial_lr'], optimizer.param_groups)) 31 | self.last_epoch = int(last_iter / niter_per_epoch) 32 | self.last_iter = None 33 | 34 | # Following https://github.com/pytorch/pytorch/issues/20124 35 | # We would like to ensure that `lr_scheduler.step()` is called after 36 | # `optimizer.step()` 37 | def with_counter(method): 38 | if getattr(method, '_with_counter', False): 39 | # `optimizer.step()` has already been replaced, return. 40 | return method 41 | 42 | # Keep a weak reference to the optimizer instance to prevent 43 | # cyclic references. 44 | instance_ref = weakref.ref(method.__self__) 45 | # Get the unbound method for the same purpose. 46 | func = method.__func__ 47 | cls = instance_ref().__class__ 48 | del method 49 | 50 | @wraps(func) 51 | def wrapper(*args, **kwargs): 52 | instance = instance_ref() 53 | instance._step_count += 1 54 | wrapped = func.__get__(instance, cls) 55 | return wrapped(*args, **kwargs) 56 | 57 | # Note that the returned function here is no longer a bound method, 58 | # so attributes like `__func__` and `__self__` no longer exist. 59 | wrapper._with_counter = True 60 | return wrapper 61 | 62 | self.optimizer.step = with_counter(self.optimizer.step) 63 | self.optimizer._step_count = 0 64 | self._step_count = 0 65 | self.step(last_iter) 66 | 67 | def state_dict(self): 68 | """Returns the state of the scheduler as a :class:`dict`. 69 | 70 | It contains an entry for every variable in self.__dict__ which 71 | is not the optimizer. 72 | """ 73 | return {key: value for key, value in self.__dict__.items() if 74 | key != 'optimizer'} 75 | 76 | def load_state_dict(self, state_dict): 77 | """Loads the schedulers state. 78 | 79 | Arguments: 80 | state_dict (dict): scheduler state. Should be an object returned 81 | from a call to :meth:`state_dict`. 82 | """ 83 | self.__dict__.update(state_dict) 84 | 85 | def get_lr(self): 86 | raise NotImplementedError 87 | 88 | def step(self, iter_=None): 89 | # Raise a warning if old pattern is detected 90 | # https://github.com/pytorch/pytorch/issues/20124 91 | if self._step_count == 1: 92 | if not hasattr(self.optimizer.step, "_with_counter"): 93 | warnings.warn( 94 | "Seems like `optimizer.step()` has been overridden after learning rate scheduler " 95 | "initialization. Please, make sure to call `optimizer.step()` before " 96 | "`lr_scheduler.step()`. See more details at " 97 | "https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate", 98 | UserWarning) 99 | 100 | # Just check if there were two first lr_scheduler.step() calls before optimizer.step() 101 | elif self.optimizer._step_count < 1: 102 | warnings.warn( 103 | "Detected call of `lr_scheduler.step()` before `optimizer.step()`. " 104 | "In PyTorch 1.1.0 and later, you should call them in the opposite order: " 105 | "`optimizer.step()` before `lr_scheduler.step()`. Failure to do this " 106 | "will result in PyTorch skipping the first value of the learning rate schedule." 107 | "See more details at " 108 | "https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate", 109 | UserWarning) 110 | self._step_count += 1 111 | 112 | if iter_ is None: 113 | iter_ = self.last_iter + 1 114 | self.last_iter = iter_ 115 | self.last_epoch = int(iter_ / self.niter_per_epoch) 116 | for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()): 117 | param_group['lr'] = lr 118 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/lr_schedulers/builder.py: -------------------------------------------------------------------------------- 1 | from torch.optim import lr_scheduler 2 | 3 | from vedaseg.utils import build_from_cfg 4 | from .registry import LR_SCHEDULERS 5 | 6 | 7 | def build_lr_scheduler(cfg, default_args=None): 8 | if LR_SCHEDULERS.get(cfg['type']): 9 | scheduler = build_from_cfg(cfg, LR_SCHEDULERS, default_args, 'registry') 10 | else: 11 | default_args = dict(optimizer=default_args.get('optimizer')) 12 | scheduler = build_from_cfg(cfg, lr_scheduler, default_args, 'module') 13 | 14 | return scheduler 15 | 16 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/lr_schedulers/poly_lr.py: -------------------------------------------------------------------------------- 1 | from .base import _Iter_LRScheduler 2 | from .registry import LR_SCHEDULERS 3 | 4 | 5 | @LR_SCHEDULERS.register_module 6 | class PolyLR(_Iter_LRScheduler): 7 | """PolyLR 8 | """ 9 | 10 | def __init__(self, optimizer, niter_per_epoch, max_epochs, power=0.9, 11 | last_iter=-1, warm_up=0): 12 | self.max_iters = niter_per_epoch * max_epochs 13 | self.power = power 14 | self.warm_up = warm_up 15 | super().__init__(optimizer, niter_per_epoch, last_iter) 16 | 17 | def get_lr(self): 18 | if self.last_iter < self.warm_up: 19 | multiplier = (self.last_iter / float(self.warm_up)) ** self.power 20 | else: 21 | multiplier = (1 - self.last_iter / float( 22 | self.max_iters)) ** self.power 23 | return [base_lr * multiplier for base_lr in self.base_lrs] 24 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/lr_schedulers/registry.py: -------------------------------------------------------------------------------- 1 | from ..utils import Registry 2 | 3 | LR_SCHEDULERS = Registry('lr_scheduler') 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_metrics 2 | from .metrics import (Accuracy, DiceScore, IoU, MIoU, MultiLabelIoU, 3 | MultiLabelMIoU) 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/metrics/base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from abc import ABCMeta, abstractmethod 3 | 4 | 5 | class BaseMetric(object, metaclass=ABCMeta): 6 | """ 7 | Base metric for segmentation metrics in an online manner. 8 | This class is abstract, providing a standard interface for metrics of this type. 9 | """ 10 | 11 | def __init__(self): 12 | super().__init__() 13 | self.reset() 14 | 15 | @abstractmethod 16 | def reset(self): 17 | """ 18 | Reset variables to default settings. 19 | """ 20 | pass 21 | 22 | @abstractmethod 23 | def compute(self, pred, target): 24 | """ 25 | Compute metric value for current batch for metrics. 26 | Args: 27 | pred (numpy.ndarray): prediction results from segmentation model, 28 | pred should have the following shape (batch_size, h, w, num_categories) 29 | target (numpy.ndarray): ground truth class indices, 30 | target should have the following shape (batch_size, h, w) 31 | Returns: 32 | metric value or process value for current batch 33 | """ 34 | pass 35 | 36 | @abstractmethod 37 | def update(self, n=1): 38 | """ 39 | Add metric value or process value to statistic containers. 40 | """ 41 | pass 42 | 43 | @abstractmethod 44 | def accumulate(self): 45 | """ 46 | Compute accumulated metric value. 47 | """ 48 | pass 49 | 50 | def export(self): 51 | """ 52 | Export figures, images or reports of metrics 53 | """ 54 | pass 55 | 56 | def check(self, pred, target): 57 | """ 58 | Check inputs 59 | """ 60 | self._check_type(pred, target) 61 | self._check_match(pred, target) 62 | 63 | @staticmethod 64 | def _check_match(pred, target): 65 | assert pred.shape[0] == target.shape[0] and pred.shape[-2:-1] == target.shape[-2:-1], \ 66 | "pred and target don't match" 67 | 68 | @staticmethod 69 | def _check_type(pred, target): 70 | assert type(pred) == np.ndarray and type(target) == np.ndarray, \ 71 | "Only numpy.ndarray is supported for computing accuracy" 72 | 73 | @staticmethod 74 | def _check_pred_range(pred): 75 | assert np.all(0 <= pred) and np.all(pred <= 1), \ 76 | "Pred should stand for the predicted probability in range (0, 1)" 77 | 78 | def __call__(self, pred, target): 79 | self.check(pred, target) 80 | current_state = self.compute(pred, target) 81 | self.update() 82 | return current_state 83 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/metrics/builder.py: -------------------------------------------------------------------------------- 1 | from ..utils import build_from_cfg 2 | from .metrics import Compose 3 | from .registry import METRICS 4 | 5 | 6 | def build_metrics(cfg): 7 | mtcs = [] 8 | for icfg in cfg: 9 | mtc = build_from_cfg(icfg, METRICS) 10 | mtcs.append(mtc) 11 | metrics = Compose(mtcs) 12 | 13 | return metrics 14 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/metrics/registry.py: -------------------------------------------------------------------------------- 1 | from ..utils import Registry 2 | 3 | METRICS = Registry('metric') 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_model 2 | from .registry import MODELS 3 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/builder.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | # from .decoders import build_brick 4 | from vedaseg.models.decoders import build_brick, build_decoder 5 | from vedaseg.models.encoders import build_encoder 6 | from vedaseg.models.heads import build_head 7 | 8 | 9 | def build_model(cfg): 10 | encoder = build_encoder(cfg.get('encoder')) 11 | 12 | if cfg.get('decoder'): 13 | middle = build_decoder(cfg.get('decoder')) 14 | assert 'collect' not in cfg 15 | else: 16 | assert 'collect' in cfg 17 | middle = build_brick(cfg.get('collect')) 18 | 19 | head = build_head(cfg['head']) 20 | 21 | model = nn.Sequential(encoder, middle, head) 22 | 23 | return model 24 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/decoders/__init__.py: -------------------------------------------------------------------------------- 1 | from .bricks import FusionBlock, JunctionBlock 2 | from .builder import build_brick, build_decoder 3 | from .gfpn import GFPN 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/decoders/builder.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from vedaseg.utils import build_from_cfg 4 | from .registry import BRICKS, DECODERS 5 | 6 | 7 | def build_brick(cfg, default_args=None): 8 | brick = build_from_cfg(cfg, BRICKS, default_args) 9 | return brick 10 | 11 | 12 | def build_bricks(cfgs): 13 | bricks = nn.ModuleList() 14 | for brick_cfg in cfgs: 15 | bricks.append(build_brick(brick_cfg)) 16 | return bricks 17 | 18 | 19 | def build_decoder(cfg, default_args=None): 20 | decoder = build_from_cfg(cfg, DECODERS, default_args) 21 | return decoder 22 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/decoders/gfpn/__init__.py: -------------------------------------------------------------------------------- 1 | from .gfpn import GFPN 2 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/decoders/gfpn/gfpn.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import torch.nn as nn 3 | 4 | from ...weight_init import init_weights 5 | from ..builder import build_brick, build_bricks 6 | from ..registry import DECODERS 7 | 8 | logger = logging.getLogger() 9 | 10 | 11 | @DECODERS.register_module 12 | class GFPN(nn.Module): 13 | """GFPN 14 | A general framework for FPN-alike structures. 15 | """ 16 | 17 | def __init__(self, neck, fusion=None): 18 | """ 19 | Args: 20 | neck: cfg that describes the structure of GFPN 21 | 22 | fusion: cfg that describes the fusion behaviour of GFPN 23 | """ 24 | super().__init__() 25 | self.neck = build_bricks(neck) 26 | if fusion: 27 | self.fusion = build_brick(fusion) 28 | else: 29 | self.fusion = None 30 | logger.info('GFPN init weights') 31 | init_weights(self.modules()) 32 | 33 | def forward(self, bottom_up): 34 | """ 35 | Args: 36 | bottom_up: dict of features from backbone 37 | """ 38 | x = None 39 | feats = {**bottom_up} 40 | for ii, layer in enumerate(self.neck): 41 | if layer.to_layer in feats: 42 | raise KeyError(f'Layer name {layer.to_layer} already in use. ' 43 | f'Used names are: {list(feats.keys())}.') 44 | 45 | vertical_sources = layer.from_layers.get('vertical') 46 | lateral_sources = layer.from_layers.get('lateral') 47 | lateral_in, vertical_in = [], [] 48 | 49 | if lateral_sources is not None and len(lateral_sources) > 0: 50 | for l_source in lateral_sources: 51 | lateral_in.append(feats[l_source]) 52 | 53 | if vertical_sources is not None and len(vertical_sources) > 0: 54 | for v_source in vertical_sources: 55 | vertical_in.append(feats[v_source]) 56 | 57 | x = layer(vertical_in, lateral_in) 58 | feats[layer.to_layer] = x 59 | if self.fusion: 60 | x = self.fusion(feats) 61 | return x 62 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/decoders/registry.py: -------------------------------------------------------------------------------- 1 | from vedaseg.utils import Registry 2 | 3 | BRICKS = Registry('brick') 4 | DECODERS = Registry('decoder') 5 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/encoders/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_encoder 2 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/encoders/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_backbone 2 | from .resnet import ResNet 3 | from .myresnet import MYResNet, MYSResNet 4 | from .vgg import MYVGG11, MYVGG9, MYSVGG9 5 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/encoders/backbones/builder.py: -------------------------------------------------------------------------------- 1 | from vedaseg.utils import build_from_cfg 2 | from .registry import BACKBONES 3 | 4 | 5 | def build_backbone(cfg, default_args=None): 6 | backbone = build_from_cfg(cfg, BACKBONES, default_args) 7 | return backbone 8 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/encoders/backbones/registry.py: -------------------------------------------------------------------------------- 1 | from vedaseg.utils import Registry 2 | 3 | BACKBONES = Registry('backbone') 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/encoders/backbones/spiking.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | 6 | def unsigned_spikes(model): 7 | for m in model.modules(): 8 | if isinstance(m, Spiking): 9 | m.sign = False 10 | 11 | #####the spiking wrapper###### 12 | 13 | class Spiking(nn.Module): 14 | def __init__(self, block, T): 15 | super(Spiking, self).__init__() 16 | self.block = block 17 | self.T = T 18 | self.is_first = False 19 | self.idem = False 20 | self.sign = True 21 | def forward(self, x): 22 | if self.idem: 23 | return x 24 | 25 | ###initialize membrane to half threshold 26 | threshold = self.block[2].act_alpha.data 27 | membrane = 0.5 * threshold 28 | sum_spikes = 0 29 | 30 | #prepare charges 31 | if self.is_first: 32 | x.unsqueeze_(1) 33 | x = x.repeat(1, self.T, 1, 1, 1) 34 | train_shape = [x.shape[0], x.shape[1]] 35 | x = x.flatten(0, 1) 36 | x = self.block(x) 37 | train_shape.extend(x.shape[1:]) 38 | x = x.reshape(train_shape) 39 | 40 | #integrate charges 41 | for dt in range(self.T): 42 | membrane = membrane + x[:,dt] 43 | if dt == 0: 44 | spike_train = torch.zeros(membrane.shape[:1] + torch.Size([self.T]) + membrane.shape[1:],device=membrane.device) 45 | 46 | spikes = membrane >= threshold 47 | membrane[spikes] = membrane[spikes] - threshold 48 | spikes = spikes.float() 49 | sum_spikes = sum_spikes + spikes 50 | 51 | ###signed spikes### 52 | if self.sign: 53 | inhibit = membrane <= -1e-3 54 | inhibit = inhibit & (sum_spikes > 0) 55 | membrane[inhibit] = membrane[inhibit] + threshold 56 | inhibit = inhibit.float() 57 | sum_spikes = sum_spikes - inhibit 58 | else: 59 | inhibit = 0 60 | 61 | spike_train[:,dt] = spikes - inhibit 62 | 63 | spike_train = spike_train * threshold 64 | return spike_train 65 | 66 | 67 | class last_Spiking(nn.Module): 68 | def __init__(self, block, T): 69 | super(last_Spiking, self).__init__() 70 | self.block = block 71 | self.T = T 72 | self.idem = False 73 | 74 | def forward(self, x): 75 | if self.idem: 76 | return x 77 | #prepare charges 78 | train_shape = [x.shape[0], x.shape[1]] 79 | x = x.flatten(0, 1) 80 | x = self.block(x) 81 | train_shape.extend(x.shape[1:]) 82 | x = x.reshape(train_shape) 83 | 84 | #integrate charges 85 | return x.sum(dim=1).div(self.T) 86 | 87 | class IF(nn.Module): 88 | def __init__(self): 89 | super(IF, self).__init__() 90 | ###changes threshold to act_alpha 91 | ###being fleet 92 | self.act_alpha = torch.nn.Parameter(torch.tensor(1.0)) 93 | 94 | def forward(self, x): 95 | return x 96 | 97 | def show_params(self): 98 | act_alpha = round(self.act_alpha.data.item(), 3) 99 | print('clipping threshold activation alpha: {:2f}'.format(act_alpha)) 100 | 101 | def extra_repr(self) -> str: 102 | return 'threshold={:.3f}'.format(self.act_alpha) -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/encoders/builder.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from vedaseg.utils import build_from_cfg 4 | from .backbones.registry import BACKBONES 5 | from .enhance_modules.registry import ENHANCE_MODULES 6 | 7 | 8 | def build_encoder(cfg, default_args=None): 9 | backbone = build_from_cfg(cfg['backbone'], BACKBONES, default_args) 10 | 11 | enhance_cfg = cfg.get('enhance') 12 | if enhance_cfg: 13 | enhance_module = build_from_cfg(enhance_cfg, ENHANCE_MODULES, 14 | default_args) 15 | encoder = nn.Sequential(backbone, enhance_module) 16 | else: 17 | encoder = backbone 18 | 19 | return encoder 20 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/encoders/enhance_modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .aspp import ASPP, ASPP_v2 2 | from .builder import build_enhance_module 3 | from .ppm import PPM 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/encoders/enhance_modules/builder.py: -------------------------------------------------------------------------------- 1 | from vedaseg.utils import build_from_cfg 2 | from .registry import ENHANCE_MODULES 3 | 4 | 5 | def build_enhance_module(cfg, default_args=None): 6 | enhance_module = build_from_cfg(cfg, ENHANCE_MODULES, default_args) 7 | return enhance_module 8 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/encoders/enhance_modules/ppm.py: -------------------------------------------------------------------------------- 1 | # modify from https://github.com/hszhao/semseg/blob/master/model/pspnet.py 2 | 3 | import logging 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | from ...utils.act import build_act_layer 9 | from ...utils.norm import build_norm_layer 10 | from ...weight_init import init_weights 11 | from .registry import ENHANCE_MODULES 12 | 13 | logger = logging.getLogger() 14 | 15 | 16 | @ENHANCE_MODULES.register_module 17 | class PPM(nn.Module): 18 | def __init__(self, in_channels, out_channels, bins, from_layer, to_layer, 19 | mode='bilinear', align_corners=True, 20 | norm_cfg=None, act_cfg=None): 21 | super(PPM, self).__init__() 22 | self.from_layer = from_layer 23 | self.to_layer = to_layer 24 | self.mode = mode 25 | self.align_corners = align_corners 26 | 27 | if norm_cfg is None: 28 | norm_cfg = dict(type='BN') 29 | 30 | if act_cfg is None: 31 | act_cfg = dict(type='Relu', inplace=True) 32 | 33 | self.blocks = nn.ModuleList() 34 | for bin_ in bins: 35 | self.blocks.append( 36 | nn.Sequential( 37 | nn.AdaptiveAvgPool2d(bin_), 38 | nn.Conv2d(in_channels, out_channels, 1, bias=False), 39 | build_norm_layer(norm_cfg, out_channels, layer_only=True), 40 | build_act_layer(act_cfg, out_channels, layer_only=True) 41 | ) 42 | ) 43 | logger.info('PPM init weights') 44 | init_weights(self.modules()) 45 | 46 | def forward(self, feats): 47 | feats_ = feats.copy() 48 | x = feats_[self.from_layer] 49 | h, w = x.shape[2:] 50 | out = [x] 51 | for block in self.blocks: 52 | feat = F.interpolate( 53 | block(x), 54 | (h, w), 55 | mode=self.mode, 56 | align_corners=self.align_corners 57 | ) 58 | out.append(feat) 59 | out = torch.cat(out, 1) 60 | feats_[self.to_layer] = out 61 | return feats_ 62 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/encoders/enhance_modules/registry.py: -------------------------------------------------------------------------------- 1 | from vedaseg.utils import Registry 2 | 3 | ENHANCE_MODULES = Registry('enhance_module') 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_head 2 | from .head import Head 3 | from .registry import HEADS 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/heads/builder.py: -------------------------------------------------------------------------------- 1 | from vedaseg.utils import build_from_cfg 2 | from .registry import HEADS 3 | 4 | 5 | def build_head(cfg, default_args=None): 6 | head = build_from_cfg(cfg, HEADS, default_args) 7 | return head 8 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/heads/head.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import torch.nn as nn 3 | 4 | from ..utils import ConvModules, build_module 5 | from ..weight_init import init_weights 6 | from .registry import HEADS 7 | 8 | logger = logging.getLogger() 9 | 10 | 11 | @HEADS.register_module 12 | class Head(nn.Module): 13 | """Head 14 | 15 | Args: 16 | """ 17 | 18 | def __init__(self, 19 | in_channels=256, 20 | out_channels=21, 21 | inter_channels=None, 22 | conv_cfg=dict(type='Conv'), 23 | norm_cfg=dict(type='BN'), 24 | act_cfg=dict(type='Relu', inplace=True), 25 | num_convs=0, 26 | upsample=None, 27 | dropouts=None, 28 | no_convs=False): 29 | super().__init__() 30 | 31 | if num_convs > 0: 32 | layers = [ 33 | ConvModules(in_channels, 34 | inter_channels, 35 | 3, 36 | padding=1, 37 | conv_cfg=conv_cfg, 38 | norm_cfg=norm_cfg, 39 | act_cfg=act_cfg, 40 | num_convs=num_convs, 41 | dropouts=dropouts), 42 | nn.Conv2d(inter_channels, out_channels, 1) 43 | ] 44 | elif no_convs: 45 | layers = [] 46 | else: 47 | layers = [nn.Conv2d(in_channels, out_channels, 1)] 48 | if upsample: 49 | upsample_layer = build_module(upsample) 50 | layers.append(upsample_layer) 51 | 52 | self.block = nn.Sequential(*layers) 53 | logger.info('Head init weights') 54 | init_weights(self.modules()) 55 | 56 | def forward(self, x): 57 | feat = self.block(x) 58 | return feat 59 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/heads/registry.py: -------------------------------------------------------------------------------- 1 | from vedaseg.utils import Registry 2 | 3 | HEADS = Registry('head') 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/registry.py: -------------------------------------------------------------------------------- 1 | from ..utils import Registry 2 | 3 | MODELS = Registry('model') 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_module, build_torch_nn 2 | from .conv_module import ConvModule, ConvModules 3 | from .upsample import Upsample 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/utils/act.py: -------------------------------------------------------------------------------- 1 | # modify from mmcv and mmdetection 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn.parameter import Parameter 6 | 7 | 8 | class TLU(nn.Module): 9 | def __init__(self, num_features): 10 | super(TLU, self).__init__() 11 | 12 | self.num_features = num_features 13 | self.tau = Parameter(torch.Tensor(1, num_features, 1, 1), 14 | requires_grad=True) 15 | 16 | self.reset_parameters() 17 | 18 | def reset_parameters(self): 19 | nn.init.zeros_(self.tau) 20 | 21 | def forward(self, x): 22 | return torch.max(x, self.tau) 23 | 24 | def extra_repr(self): 25 | return '{num_features}'.format(**self.__dict__) 26 | 27 | 28 | act_cfg = { 29 | 'Relu': ('relu', nn.ReLU), 30 | 'Tlu': ('tlu', TLU), 31 | } 32 | 33 | 34 | def build_act_layer(cfg, num_features, postfix='', layer_only=False): 35 | """ Build activate layer 36 | 37 | Args: 38 | cfg (dict): cfg should contain: 39 | type (str): identify activate layer type. 40 | layer args: args needed to instantiate a activate layer. 41 | requires_grad (bool): [optional] whether stop gradient updates 42 | num_features (int): number of channels from input. 43 | postfix (int, str): appended into act abbreviation to 44 | create named layer. 45 | 46 | Returns: 47 | name (str): abbreviation + postfix 48 | layer (nn.Module): created act layer 49 | """ 50 | assert isinstance(cfg, dict) and 'type' in cfg 51 | cfg_ = cfg.copy() 52 | 53 | layer_type = cfg_.pop('type') 54 | if layer_type not in act_cfg: 55 | raise KeyError('Unrecognized activate type {}'.format(layer_type)) 56 | else: 57 | abbr, act_layer = act_cfg[layer_type] 58 | if act_layer is None: 59 | raise NotImplementedError 60 | 61 | assert isinstance(postfix, (int, str)) 62 | name = abbr + str(postfix) 63 | 64 | requires_grad = cfg_.pop('requires_grad', True) 65 | if layer_type != 'Tlu': 66 | layer = act_layer(**cfg_) 67 | else: 68 | layer = act_layer(num_features, **cfg_) 69 | 70 | for param in layer.parameters(): 71 | param.requires_grad = requires_grad 72 | 73 | if layer_only: 74 | return layer 75 | else: 76 | return name, layer 77 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/utils/builder.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from vedaseg.utils import build_from_cfg 4 | from .registry import UTILS 5 | 6 | 7 | def build_module(cfg, default_args=None): 8 | try: 9 | module = build_from_cfg(cfg, UTILS, default_args) 10 | except KeyError as error: 11 | if ' is not in the ' not in error.args[0]: 12 | raise KeyError from error 13 | if ' registry' not in error.args[0]: 14 | raise KeyError from error 15 | module = build_torch_nn(cfg, default_args=default_args) 16 | 17 | return module 18 | 19 | 20 | def build_torch_nn(cfg, default_args=None): 21 | module = build_from_cfg(cfg, nn, default_args, 'module') 22 | return module 23 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/utils/norm.py: -------------------------------------------------------------------------------- 1 | # modify from mmcv and mmdetection 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn.parameter import Parameter 6 | 7 | 8 | class FRN(nn.Module): 9 | def __init__(self, num_features, eps=1e-6): 10 | super(FRN, self).__init__() 11 | 12 | self.num_features = num_features 13 | self.gamma = Parameter(torch.Tensor(1, num_features, 1, 1), 14 | requires_grad=True) 15 | self.beta = Parameter(torch.Tensor(1, num_features, 1, 1), 16 | requires_grad=True) 17 | 18 | self.register_buffer('eps', torch.Tensor([eps])) 19 | 20 | self.reset_parameters() 21 | 22 | def reset_parameters(self): 23 | nn.init.ones_(self.gamma) 24 | nn.init.zeros_(self.beta) 25 | 26 | def forward(self, x): 27 | nu2 = torch.mean(x.pow(2), dim=[2, 3], keepdim=True) 28 | x = x * torch.rsqrt(nu2 + self.eps.abs()) 29 | x = self.gamma * x + self.beta 30 | 31 | return x 32 | 33 | def extra_repr(self): 34 | return '{num_features}, eps={eps}'.format(**self.__dict__) 35 | 36 | 37 | norm_cfg = { 38 | 'FRN': ('frn', FRN), 39 | # format: layer_type: (abbreviation, module) 40 | 'BN': ('bn', nn.BatchNorm2d), 41 | 'SyncBN': ('bn', nn.SyncBatchNorm), 42 | 'GN': ('gn', nn.GroupNorm), 43 | # and potentially 'SN' 44 | } 45 | 46 | 47 | def build_norm_layer(cfg, num_features, postfix='', layer_only=False): 48 | """ Build normalization layer 49 | 50 | Args: 51 | cfg (dict): cfg should contain: 52 | type (str): identify norm layer type. 53 | layer args: args needed to instantiate a norm layer. 54 | requires_grad (bool): [optional] whether stop gradient updates 55 | num_features (int): number of channels from input. 56 | postfix (int, str): appended into norm abbreviation to 57 | create named layer. 58 | 59 | Returns: 60 | name (str): abbreviation + postfix 61 | layer (nn.Module): created norm layer 62 | """ 63 | assert isinstance(cfg, dict) and 'type' in cfg 64 | cfg_ = cfg.copy() 65 | 66 | layer_type = cfg_.pop('type') 67 | if layer_type not in norm_cfg: 68 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 69 | else: 70 | abbr, norm_layer = norm_cfg[layer_type] 71 | if norm_layer is None: 72 | raise NotImplementedError 73 | 74 | assert isinstance(postfix, (int, str)) 75 | name = abbr + str(postfix) 76 | 77 | requires_grad = cfg_.pop('requires_grad', True) 78 | if layer_type != 'GN': 79 | layer = norm_layer(num_features, **cfg_) 80 | if layer_type == 'SyncBN': 81 | layer._specify_ddp_gpu_num(1) # noqa 82 | else: 83 | assert 'num_groups' in cfg_ 84 | layer = norm_layer(num_channels=num_features, **cfg_) 85 | 86 | for param in layer.parameters(): 87 | param.requires_grad = requires_grad 88 | 89 | if layer_only: 90 | return layer 91 | return name, layer 92 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/utils/registry.py: -------------------------------------------------------------------------------- 1 | from vedaseg.utils import Registry 2 | 3 | UTILS = Registry('utils') 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/utils/upsample.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from .registry import UTILS 5 | 6 | 7 | @UTILS.register_module 8 | class Upsample(nn.Module): 9 | __constants__ = ['size', 'scale_factor', 'scale_bias', 'mode', 10 | 'align_corners', 'name'] 11 | 12 | def __init__(self, size=None, scale_factor=None, scale_bias=0, 13 | mode='nearest', align_corners=None): 14 | super(Upsample, self).__init__() 15 | self.size = size 16 | self.scale_factor = scale_factor 17 | self.scale_bias = scale_bias 18 | self.mode = mode 19 | self.align_corners = align_corners 20 | 21 | assert (self.size is None) ^ (self.scale_factor is None) 22 | 23 | def forward(self, x): 24 | if self.size: 25 | size = self.size 26 | else: 27 | n, c, h, w = x.size() 28 | new_h = int(h * self.scale_factor + self.scale_bias) 29 | new_w = int(w * self.scale_factor + self.scale_bias) 30 | 31 | size = (new_h, new_w) 32 | 33 | return F.interpolate(x, size=size, mode=self.mode, 34 | align_corners=self.align_corners) 35 | 36 | def extra_repr(self): 37 | if self.size is not None: 38 | info = 'size=' + str(self.size) 39 | else: 40 | info = 'scale_factor=' + str(self.scale_factor) 41 | info += ', scale_bias=' + str(self.scale_bias) 42 | info += ', mode=' + self.mode 43 | return info 44 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/models/weight_init.py: -------------------------------------------------------------------------------- 1 | # modify from mmcv and mmdetection 2 | 3 | import torch.nn as nn 4 | 5 | 6 | def constant_init(module, val, bias=0): 7 | nn.init.constant_(module.weight, val) 8 | if hasattr(module, 'bias') and module.bias is not None: 9 | nn.init.constant_(module.bias, bias) 10 | 11 | 12 | def xavier_init(module, gain=1, bias=0, distribution='normal'): 13 | assert distribution in ['uniform', 'normal'] 14 | if distribution == 'uniform': 15 | nn.init.xavier_uniform_(module.weight, gain=gain) 16 | else: 17 | nn.init.xavier_normal_(module.weight, gain=gain) 18 | if hasattr(module, 'bias') and module.bias is not None: 19 | nn.init.constant_(module.bias, bias) 20 | 21 | 22 | def normal_init(module, mean=0, std=1, bias=0): 23 | nn.init.normal_(module.weight, mean, std) 24 | if hasattr(module, 'bias') and module.bias is not None: 25 | nn.init.constant_(module.bias, bias) 26 | 27 | 28 | def uniform_init(module, a=0, b=1, bias=0): 29 | nn.init.uniform_(module.weight, a, b) 30 | if hasattr(module, 'bias') and module.bias is not None: 31 | nn.init.constant_(module.bias, bias) 32 | 33 | 34 | def kaiming_init(module, 35 | a=0, 36 | mode='fan_out', 37 | nonlinearity='relu', 38 | bias=0, 39 | distribution='normal'): 40 | assert distribution in ['uniform', 'normal'] 41 | if distribution == 'uniform': 42 | nn.init.kaiming_uniform_(module.weight, 43 | a=a, 44 | mode=mode, 45 | nonlinearity=nonlinearity) 46 | else: 47 | nn.init.kaiming_normal_(module.weight, 48 | a=a, 49 | mode=mode, 50 | nonlinearity=nonlinearity) 51 | if hasattr(module, 'bias') and module.bias is not None: 52 | nn.init.constant_(module.bias, bias) 53 | 54 | 55 | def caffe2_xavier_init(module, bias=0): 56 | # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch 57 | # Acknowledgment to FAIR's internal code 58 | kaiming_init(module, 59 | a=1, 60 | mode='fan_in', 61 | nonlinearity='leaky_relu', 62 | distribution='uniform') 63 | 64 | 65 | def init_weights(modules): 66 | for m in modules: 67 | if isinstance(m, nn.Conv2d): 68 | kaiming_init(m) 69 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 70 | constant_init(m, 1) 71 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/optims/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_optimizer 2 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/optims/builder.py: -------------------------------------------------------------------------------- 1 | import torch.optim as optims 2 | 3 | from ..utils import build_from_cfg 4 | 5 | 6 | def build_optimizer(cfg_optimizer, default_args=None): 7 | optimizer = build_from_cfg(cfg_optimizer, optims, default_args, 'module') 8 | return optimizer 9 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/runners/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference_runner import InferenceRunner 2 | from .test_runner import TestRunner 3 | from .train_runner import TrainRunner 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/runners/base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import torch 4 | from torch.backends import cudnn 5 | 6 | from ..dataloaders import build_dataloader 7 | from ..dataloaders.samplers import build_sampler 8 | from ..datasets import build_dataset 9 | from ..loggers import build_logger 10 | from ..metrics import build_metrics 11 | from ..transforms import build_transform 12 | from ..utils import get_dist_info, init_dist_pytorch 13 | 14 | 15 | class Common: 16 | def __init__(self, cfg): 17 | # build logger 18 | logger_cfg = cfg.get('logger') 19 | if logger_cfg is None: 20 | logger_cfg = dict( 21 | handlers=(dict(type='StreamHandler', level='INFO'),)) 22 | 23 | self.workdir = cfg.get('workdir') 24 | self.distribute = cfg.get('distribute', False) 25 | 26 | # set gpu devices 27 | self.use_gpu = self._set_device() 28 | 29 | # set distribute setting 30 | if self.distribute and self.use_gpu: 31 | init_dist_pytorch(**cfg.dist_params) 32 | 33 | self.rank, self.world_size = get_dist_info() 34 | 35 | self.logger = self._build_logger(logger_cfg) 36 | 37 | # set cudnn configuration 38 | self._set_cudnn( 39 | cfg.get('cudnn_deterministic', False), 40 | cfg.get('cudnn_benchmark', False)) 41 | 42 | # set seed 43 | self._set_seed(cfg.get('seed', None)) 44 | 45 | # build metric 46 | if 'metrics' in cfg: 47 | self.metric = self._build_metric(cfg['metrics']) 48 | 49 | def _build_logger(self, cfg): 50 | return build_logger(cfg, dict(workdir=self.workdir)) 51 | 52 | def _set_device(self): 53 | self.gpu_num = torch.cuda.device_count() 54 | if torch.cuda.is_available(): 55 | use_gpu = True 56 | else: 57 | use_gpu = False 58 | 59 | return use_gpu 60 | 61 | def _set_seed(self, seed): 62 | if seed is not None: 63 | self.logger.info('Set seed {}'.format(seed)) 64 | random.seed(seed) 65 | np.random.seed(seed) 66 | torch.manual_seed(seed) 67 | 68 | def _set_cudnn(self, deterministic, benchmark): 69 | self.logger.info('Set cudnn deterministic {}'.format(deterministic)) 70 | cudnn.deterministic = deterministic 71 | 72 | self.logger.info('Set cudnn benchmark {}'.format(benchmark)) 73 | cudnn.benchmark = benchmark 74 | 75 | def _build_metric(self, cfg): 76 | return build_metrics(cfg) 77 | 78 | def _build_transform(self, cfg): 79 | return build_transform(cfg) 80 | 81 | def _build_dataloader(self, cfg): 82 | transform = build_transform(cfg['transforms']) 83 | dataset = build_dataset(cfg['dataset'], dict(transform=transform)) 84 | 85 | shuffle = cfg['dataloader'].pop('shuffle', False) 86 | sampler = build_sampler(self.distribute, 87 | cfg['sampler'], 88 | dict(dataset=dataset, 89 | shuffle=shuffle)) 90 | 91 | dataloader = build_dataloader(self.distribute, 92 | self.gpu_num, 93 | cfg['dataloader'], 94 | dict(dataset=dataset, 95 | sampler=sampler)) 96 | 97 | return dataloader 98 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/runners/inference_runner.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..models import build_model 4 | from ..utils import load_checkpoint 5 | from .base import Common 6 | 7 | 8 | class InferenceRunner(Common): 9 | def __init__(self, inference_cfg, base_cfg=None): 10 | inference_cfg = inference_cfg.copy() 11 | base_cfg = {} if base_cfg is None else base_cfg.copy() 12 | 13 | super().__init__(base_cfg) 14 | 15 | self.multi_label = inference_cfg.get('multi_label', False) 16 | 17 | # build inference transform 18 | self.transform = self._build_transform(inference_cfg['transforms']) 19 | 20 | # build model 21 | self.model = self._build_model(inference_cfg['model']) 22 | self.model.eval() 23 | 24 | def load_checkpoint(self, filename, map_location='default', strict=True): 25 | self.logger.info('Load checkpoint from {}'.format(filename)) 26 | 27 | if map_location == 'default': 28 | if self.use_gpu: 29 | device_id = torch.cuda.current_device() 30 | map_location = lambda storage, loc: storage.cuda(device_id) 31 | else: 32 | map_location = 'cpu' 33 | 34 | return load_checkpoint(self.model, filename, map_location, strict) 35 | 36 | def _build_model(self, cfg): 37 | self.logger.info('Build model') 38 | 39 | model = build_model(cfg) 40 | 41 | if torch.cuda.is_available(): 42 | if self.distribute: 43 | model = torch.nn.parallel.DistributedDataParallel( 44 | model.cuda(), 45 | device_ids=[torch.cuda.current_device()], 46 | broadcast_buffers=True, 47 | ) 48 | self.logger.info('Using distributed training') 49 | else: 50 | if torch.cuda.device_count() > 1: 51 | model = torch.nn.DataParallel(model) 52 | model.cuda() 53 | return model 54 | 55 | def compute(self, output): 56 | if self.multi_label: 57 | output = output.sigmoid() 58 | output = torch.where(output >= 0.5, 59 | torch.full_like(output, 1), 60 | torch.full_like(output, 0)).long() 61 | 62 | else: 63 | output = output.softmax(dim=1) 64 | _, output = torch.max(output, dim=1) 65 | return output 66 | 67 | def __call__(self, image, masks): 68 | with torch.no_grad(): 69 | image = self.transform(image=image, masks=masks)['image'] 70 | image = image.unsqueeze(0) 71 | 72 | if self.use_gpu: 73 | image = image.cuda() 74 | 75 | output = self.model(image) 76 | output = self.compute(output) 77 | 78 | output = output.squeeze().cpu().numpy() 79 | 80 | return output 81 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/runners/test_runner.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | from ..utils import gather_tensor 6 | from .inference_runner import InferenceRunner 7 | 8 | 9 | class TestRunner(InferenceRunner): 10 | def __init__(self, test_cfg, inference_cfg, base_cfg=None): 11 | super().__init__(inference_cfg, base_cfg) 12 | 13 | self.test_dataloader = self._build_dataloader(test_cfg['data']) 14 | extra_data = len(self.test_dataloader.dataset) % self.world_size 15 | self.test_exclude_num = self.world_size - extra_data if extra_data != 0 else 0 16 | 17 | self.tta = test_cfg.get('tta', False) 18 | 19 | def __call__(self): 20 | self.metric.reset() 21 | self.model.eval() 22 | 23 | res = {} 24 | 25 | self.logger.info('Start testing') 26 | with torch.no_grad(): 27 | for idx, (image, mask) in enumerate(self.test_dataloader): 28 | if self.use_gpu: 29 | image = image.cuda() 30 | mask = mask.cuda() 31 | 32 | if self.tta: 33 | output = self._tta_compute(image) 34 | else: 35 | output = self.model(image) 36 | output = self.compute(output) 37 | 38 | output = gather_tensor(output) 39 | mask = gather_tensor(mask) 40 | 41 | if idx + 1 == len( 42 | self.test_dataloader) and self.test_exclude_num > 0: 43 | output = output[:-self.test_exclude_num] 44 | mask = mask[:-self.test_exclude_num] 45 | 46 | self.metric(output.cpu().numpy(), mask.cpu().numpy()) 47 | res = self.metric.accumulate() 48 | self.logger.info('Test, Iter {}, {}'.format( 49 | idx + 1, 50 | ', '.join(['{}: {}'.format(k, np.round(v, 4)) for k, v in 51 | res.items()]))) 52 | self.logger.info('Test Result: {}'.format(', '.join( 53 | ['{}: {}'.format(k, np.round(v, 4)) for k, v in res.items()]))) 54 | 55 | return res 56 | 57 | def _tta_compute(self, image): 58 | b, c, h, w = image.size() 59 | probs = [] 60 | for scale, bias in zip(self.tta['scales'], self.tta['biases']): 61 | new_h, new_w = int(h * scale + bias), int(w * scale + bias) 62 | new_img = F.interpolate(image, size=(new_h, new_w), 63 | mode='bilinear', align_corners=True) 64 | output = self.model(new_img) 65 | probs.append(output) 66 | 67 | if self.tta['flip']: 68 | flip_img = new_img.flip(3) 69 | flip_output = self.model(flip_img) 70 | prob = flip_output.flip(3) 71 | probs.append(prob) 72 | 73 | for idx, prob in enumerate(probs): 74 | probs[idx] = F.interpolate(prob, size=(h, w), 75 | mode='bilinear', align_corners=True) 76 | 77 | if self.multi_label: 78 | prob = torch.stack(probs, dim=0).sigmoid().mean(dim=0) 79 | prob = torch.where(prob >= 0.5, 80 | torch.full_like(prob, 1), 81 | torch.full_like(prob, 0)).long() # b c h w 82 | else: 83 | prob = torch.stack(probs, dim=0).softmax(dim=2).mean(dim=0) 84 | _, prob = torch.max(prob, dim=1) # b h w 85 | return prob 86 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_transform 2 | from .transforms import (FactorScale, LongestMaxSize, PadIfNeeded, RandomScale, 3 | ToTensor) 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/transforms/builder.py: -------------------------------------------------------------------------------- 1 | import albumentations as albu 2 | 3 | from vedaseg.utils import build_from_cfg 4 | from .registry import TRANSFORMS 5 | 6 | 7 | def build_transform(cfgs): 8 | tfs = [] 9 | for cfg in cfgs: 10 | if TRANSFORMS.get(cfg['type']): 11 | tf = build_from_cfg(cfg, TRANSFORMS) 12 | else: 13 | tf = build_from_cfg(cfg, albu, mode='module') 14 | tfs.append(tf) 15 | aug = albu.Compose(tfs) 16 | 17 | return aug 18 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/transforms/registry.py: -------------------------------------------------------------------------------- 1 | from ..utils import Registry 2 | 3 | TRANSFORMS = Registry('transforms') 4 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | import albumentations as albu 2 | import albumentations.augmentations.functional as F 3 | import cv2 4 | import numpy as np 5 | import random 6 | import torch 7 | from albumentations import DualTransform 8 | 9 | from .registry import TRANSFORMS 10 | 11 | 12 | @TRANSFORMS.register_module 13 | class FactorScale(DualTransform): 14 | def __init__(self, scale=1.0, interpolation=cv2.INTER_LINEAR, 15 | always_apply=False, 16 | p=1.0): 17 | super(FactorScale, self).__init__(always_apply, p) 18 | self.scale = scale 19 | self.interpolation = interpolation 20 | 21 | def apply(self, image, scale=1.0, **params): 22 | return F.scale(image, scale, interpolation=self.interpolation) 23 | 24 | def apply_to_mask(self, image, scale=1.0, **params): 25 | return F.scale(image, scale, interpolation=cv2.INTER_NEAREST) 26 | 27 | def get_params(self): 28 | return {'scale': self.scale} 29 | 30 | def get_transform_init_args_names(self): 31 | return ('scale',) 32 | 33 | 34 | @TRANSFORMS.register_module 35 | class LongestMaxSize(FactorScale): 36 | def __init__(self, h_max, w_max, interpolation=cv2.INTER_LINEAR, 37 | always_apply=False, p=1.0): 38 | self.h_max = h_max 39 | self.w_max = w_max 40 | super(LongestMaxSize, self).__init__(interpolation=interpolation, 41 | always_apply=always_apply, 42 | p=p) 43 | 44 | def update_params(self, params, **kwargs): 45 | params = super(LongestMaxSize, self).update_params(params, **kwargs) 46 | rows = params['rows'] 47 | cols = params['cols'] 48 | 49 | scale_h = self.h_max / rows 50 | scale_w = self.w_max / cols 51 | scale = min(scale_h, scale_w) 52 | 53 | params.update({'scale': scale}) 54 | return params 55 | 56 | def get_transform_init_args_names(self): 57 | return ('h_max', 'w_max',) 58 | 59 | 60 | @TRANSFORMS.register_module 61 | class RandomScale(FactorScale): 62 | def __init__(self, scale_limit=(0.5, 2), interpolation=cv2.INTER_LINEAR, 63 | scale_step=None, always_apply=False, p=1.0): 64 | super(RandomScale, self).__init__(interpolation=interpolation, 65 | always_apply=always_apply, 66 | p=p) 67 | self.scale_limit = albu.to_tuple(scale_limit) 68 | self.scale_step = scale_step 69 | 70 | def get_params(self): 71 | if self.scale_step: 72 | num_steps = int((self.scale_limit[1] - self.scale_limit[ 73 | 0]) / self.scale_step + 1) 74 | scale_factors = np.linspace(self.scale_limit[0], 75 | self.scale_limit[1], num_steps) 76 | scale_factor = np.random.choice(scale_factors).item() 77 | else: 78 | scale_factor = random.uniform(self.scale_limit[0], 79 | self.scale_limit[1]) 80 | 81 | return {'scale': scale_factor} 82 | 83 | def get_transform_init_args_names(self): 84 | return ('scale_limit', 'scale_step',) 85 | 86 | 87 | @TRANSFORMS.register_module 88 | class PadIfNeeded(albu.PadIfNeeded): 89 | def __init__(self, min_height, min_width, border_mode=cv2.BORDER_CONSTANT, 90 | value=None, mask_value=None): 91 | super(PadIfNeeded, self).__init__(min_height=min_height, 92 | min_width=min_width, 93 | border_mode=border_mode, 94 | value=value, 95 | mask_value=mask_value) 96 | 97 | def update_params(self, params, **kwargs): 98 | params = super(PadIfNeeded, self).update_params(params, **kwargs) 99 | rows = params['rows'] 100 | cols = params['cols'] 101 | 102 | if rows < self.min_height: 103 | h_pad_bottom = self.min_height - rows 104 | else: 105 | h_pad_bottom = 0 106 | 107 | if cols < self.min_width: 108 | w_pad_right = self.min_width - cols 109 | else: 110 | w_pad_right = 0 111 | 112 | params.update({'pad_top': 0, 113 | 'pad_bottom': h_pad_bottom, 114 | 'pad_left': 0, 115 | 'pad_right': w_pad_right}) 116 | return params 117 | 118 | def get_transform_init_args_names(self): 119 | return ('min_height', 'min_width',) 120 | 121 | 122 | @TRANSFORMS.register_module 123 | class ToTensor(DualTransform): 124 | def __init__(self): 125 | super(ToTensor, self).__init__(always_apply=True) 126 | 127 | def apply(self, image, **params): 128 | if isinstance(image, np.ndarray): 129 | if image.ndim == 2: 130 | image = image[:, :, None] 131 | image = torch.from_numpy(image).float() 132 | image = image.permute(2, 0, 1) 133 | else: 134 | raise TypeError('img shoud be np.ndarray. Got {}' 135 | .format(type(image))) 136 | return image 137 | 138 | def apply_to_mask(self, image, **params): 139 | image = torch.from_numpy(image) 140 | return image 141 | 142 | def apply_to_masks(self, masks, **params): 143 | masks = [self.apply_to_mask(mask, **params) for mask in masks] 144 | return torch.stack(masks, dim=0).squeeze() 145 | 146 | def get_transform_init_args_names(self): 147 | return () 148 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .checkpoint import load_checkpoint, save_checkpoint, weights_to_cpu 2 | from .config import Config 3 | from .dist_utils import (gather_tensor, get_dist_info, init_dist_pytorch, 4 | reduce_tensor) 5 | from .registry import Registry, build_from_cfg 6 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/utils/checkpoint.py: -------------------------------------------------------------------------------- 1 | # adapted from https://github.com/open-mmlab/mmcv 2 | import os 3 | import time 4 | import torch 5 | from collections import OrderedDict 6 | 7 | 8 | def weights_to_cpu(state_dict): 9 | """Copy a model state_dict to cpu. 10 | Args: 11 | state_dict (OrderedDict): Model weights on GPU. 12 | Returns: 13 | OrderedDict: Model weights on GPU. 14 | """ 15 | state_dict_cpu = OrderedDict() 16 | for key, val in state_dict.items(): 17 | state_dict_cpu[key] = val.cpu() 18 | return state_dict_cpu 19 | 20 | 21 | def save_checkpoint(model, filename, optimizer=None, lr_scheduler=None, 22 | meta=None): 23 | """Save checkpoint to file. 24 | The checkpoint will have 3 fields: ``meta``, ``state_dict`` and 25 | ``optimizer``. By default ``meta`` will contain version and time info. 26 | Args: 27 | model (Module): Module whose params are to be saved. 28 | filename (str): Checkpoint filename. 29 | optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. 30 | lr_scheduler (:obj:`_LRScheduler`, optional): _LRScheduler to be saved. 31 | meta (dict, optional): Metadata to be saved in checkpoint. 32 | """ 33 | if meta is None: 34 | meta = {} 35 | elif not isinstance(meta, dict): 36 | raise TypeError('meta must be a dict or None, but got {}'.format( 37 | type(meta))) 38 | meta.update(time=time.asctime()) 39 | 40 | file_dir = os.path.dirname(filename) 41 | if not os.path.exists(file_dir): 42 | os.mkdir(file_dir) 43 | 44 | if hasattr(model, 'module'): 45 | model = model.module 46 | 47 | checkpoint = { 48 | 'meta': meta, 49 | 'state_dict': weights_to_cpu(model.state_dict()) 50 | } 51 | if optimizer is not None: 52 | checkpoint['optimizer'] = optimizer.state_dict() 53 | if lr_scheduler is not None: 54 | checkpoint['lr_scheduler'] = lr_scheduler.state_dict() 55 | torch.save(checkpoint, filename) 56 | 57 | 58 | def load_checkpoint(model, filename, map_location=None, strict=False): 59 | if os.path.isfile(filename): 60 | checkpoint = torch.load(filename, map_location=map_location) 61 | 62 | if isinstance(checkpoint, OrderedDict): 63 | state_dict = checkpoint 64 | elif isinstance(checkpoint, dict) and 'state_dict' in checkpoint: 65 | state_dict = checkpoint['state_dict'] 66 | else: 67 | raise RuntimeError( 68 | 'No state_dict found in checkpoint file {}'.format(filename)) 69 | if hasattr(model, 'module'): 70 | model.module.load_state_dict(state_dict, strict=strict) 71 | else: 72 | model.load_state_dict(state_dict, strict=strict) 73 | return checkpoint 74 | else: 75 | raise RuntimeError( 76 | 'No checkpoint file found in path {}'.format(filename)) 77 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | # adapted from mmcv and mmdetection 2 | 3 | import os 4 | import torch 5 | import torch.distributed as dist 6 | 7 | 8 | def init_dist_pytorch(backend='nccl', **kwargs): 9 | rank = int(os.environ['RANK']) 10 | num_gpus = torch.cuda.device_count() 11 | torch.cuda.set_device(rank % num_gpus) 12 | dist.init_process_group(backend=backend, **kwargs) 13 | 14 | 15 | def get_dist_info(): 16 | if dist.is_available(): 17 | initialized = dist.is_initialized() 18 | else: 19 | initialized = False 20 | 21 | if initialized: 22 | rank = dist.get_rank() 23 | world_size = dist.get_world_size() 24 | else: 25 | rank = 0 26 | world_size = 1 27 | 28 | return rank, world_size 29 | 30 | 31 | def reduce_tensor(data, average=True): 32 | rank, world_size = get_dist_info() 33 | if world_size < 2: 34 | return data 35 | 36 | with torch.no_grad(): 37 | if not isinstance(data, torch.Tensor): 38 | data = torch.tensor(data).cuda() 39 | dist.reduce(data, dst=0) 40 | if rank == 0 and average: 41 | data /= world_size 42 | return data 43 | 44 | 45 | def gather_tensor(data): 46 | _, world_size = get_dist_info() 47 | if world_size < 2: 48 | return data 49 | 50 | with torch.no_grad(): 51 | if not isinstance(data, torch.Tensor): 52 | data = torch.tensor(data).cuda() 53 | 54 | gather_list = [torch.ones_like(data) for _ in range(world_size)] 55 | dist.all_gather(gather_list, data) 56 | gather_data = torch.stack(gather_list) 57 | gather_data = torch.transpose(gather_data, 0, 1) 58 | gather_data = gather_data.reshape((-1, *gather_data.shape[2:])) 59 | 60 | return gather_data 61 | 62 | 63 | def synchronize(): 64 | if not dist.is_available(): 65 | return 66 | if not dist.is_initialized(): 67 | return 68 | world_size = dist.get_world_size() 69 | if world_size == 1: 70 | return 71 | dist.barrier() 72 | -------------------------------------------------------------------------------- /semantic segmentation/vedaseg/utils/registry.py: -------------------------------------------------------------------------------- 1 | # adapted from https://github.com/open-mmlab/mmcv 2 | import inspect 3 | from functools import partial 4 | 5 | 6 | class Registry(object): 7 | 8 | def __init__(self, name): 9 | self._name = name 10 | self._module_dict = dict() 11 | 12 | def __repr__(self): 13 | format_str = self.__class__.__name__ + '(name={}, items={})'.format( 14 | self._name, list(self._module_dict.keys())) 15 | return format_str 16 | 17 | @property 18 | def name(self): 19 | return self._name 20 | 21 | @property 22 | def module_dict(self): 23 | return self._module_dict 24 | 25 | def get(self, key): 26 | return self._module_dict.get(key, None) 27 | 28 | def _register_module(self, module_class, force=False): 29 | """Register a module. 30 | Args: 31 | module (:obj:`nn.Module`): Module to be registered. 32 | """ 33 | if not inspect.isclass(module_class): 34 | raise TypeError('module must be a class, but got {}'.format( 35 | type(module_class))) 36 | module_name = module_class.__name__ 37 | if not force and module_name in self._module_dict: 38 | raise KeyError('{} is already registered in {}'.format( 39 | module_name, self.name)) 40 | self._module_dict[module_name] = module_class 41 | 42 | def register_module(self, cls=None, force=False): 43 | if cls is None: 44 | return partial(self.register_module, force=force) 45 | self._register_module(cls, force=force) 46 | return cls 47 | 48 | 49 | def build_from_cfg(cfg, src, default_args=None, mode='registry'): 50 | if mode == 'registry': 51 | return build_from_registry(cfg, src, default_args=default_args) 52 | elif mode == 'module': 53 | return build_from_module(cfg, src, default_args=default_args) 54 | else: 55 | raise ValueError('Mode {} is not supported currently'.format(mode)) 56 | 57 | 58 | def build_from_registry(cfg, registry, default_args=None): 59 | """Build a module from config dict. 60 | Args: 61 | cfg (dict): Config dict. It should at least contain the key "type". 62 | registry (:obj:`Registry`): The registry to search the type from. 63 | default_args (dict, optional): Default initialization arguments. 64 | Returns: 65 | obj: The constructed object. 66 | """ 67 | assert isinstance(cfg, dict) and 'type' in cfg 68 | assert isinstance(default_args, dict) or default_args is None 69 | args = cfg.copy() 70 | obj_type = args.pop('type') 71 | if isinstance(obj_type, str): 72 | obj_cls = registry.get(obj_type) 73 | if obj_cls is None: 74 | raise KeyError('{} is not in the {} registry'.format( 75 | obj_type, registry.name)) 76 | elif inspect.isclass(obj_type): 77 | obj_cls = obj_type 78 | else: 79 | raise TypeError('type must be a str or valid type, but got {}'.format( 80 | type(obj_type))) 81 | if default_args is not None: 82 | for name, value in default_args.items(): 83 | args.setdefault(name, value) 84 | return obj_cls(**args) 85 | 86 | 87 | def build_from_module(cfg, module, default_args=None): 88 | """Build a module from config dict. 89 | Args: 90 | cfg (dict): Config dict. It should at least contain the key "type". 91 | module (:obj:`module`): The module to search the type from. 92 | default_args (dict, optional): Default initialization arguments. 93 | Returns: 94 | obj: The constructed object. 95 | """ 96 | assert isinstance(cfg, dict) and 'type' in cfg 97 | assert isinstance(default_args, dict) or default_args is None 98 | args = cfg.copy() 99 | obj_type = args.pop('type') 100 | if isinstance(obj_type, str): 101 | obj_cls = getattr(module, obj_type) 102 | if obj_cls is None: 103 | raise KeyError('{} is not in the {} module'.format( 104 | obj_type, module)) 105 | elif inspect.isclass(obj_type): 106 | obj_cls = obj_type 107 | else: 108 | raise TypeError('type must be a str or valid type, but got {}'.format( 109 | type(obj_type))) 110 | if default_args is not None: 111 | for name, value in default_args.items(): 112 | args.setdefault(name, value) 113 | return obj_cls(**args) 114 | --------------------------------------------------------------------------------