├── README.md
├── image classification
    ├── CIFAR10
    │   ├── main.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── alexnet.py
    │   │   ├── quant_layer.py
    │   │   ├── spiking.py
    │   │   └── vgg.py
    │   ├── snn.py
    │   └── snn_ft.py
    ├── CIFAR10_res
    │   ├── main.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── quant_layer.py
    │   │   └── resnet.py
    │   ├── snn.py
    │   └── snn_ft.py
    ├── CIFAR10_resnet18
    │   ├── main.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── quant_layer.py
    │   │   └── resnet.py
    │   └── snn_ft.py
    └── ImageNet
    │   ├── dali_main.py
    │   ├── models
    │       ├── __init__.py
    │       ├── alexnet.py
    │       ├── quant_layer.py
    │       ├── spiking.py
    │       └── vgg.py
    │   ├── snn.py
    │   └── snn_ft.py
├── object detection
    ├── README.md
    ├── backbone
    │   ├── __init__.py
    │   ├── darknet19.py
    │   ├── darknet53.py
    │   ├── darknet_tiny.py
    │   ├── darknet_tiny_v2.py
    │   ├── myresnet.py
    │   ├── quant_layer.py
    │   └── resnet.py
    ├── data
    │   ├── __init__.py
    │   ├── coco2017.py
    │   ├── config.py
    │   ├── scripts
    │   │   ├── COCO2017.sh
    │   │   ├── VOC2007.sh
    │   │   └── VOC2012.sh
    │   └── voc0712.py
    ├── demo.py
    ├── eval.py
    ├── models
    │   ├── yolov2_d19.py
    │   ├── yolov2_r34.py
    │   ├── yolov2_r50.py
    │   ├── yolov2_tiny.py
    │   ├── yolov3.py
    │   └── yolov3_spp.py
    ├── test.py
    ├── tools.py
    ├── train.py
    └── utils
    │   ├── __init__.py
    │   ├── augmentations.py
    │   ├── cocoapi_evaluator.py
    │   ├── com_paras_flops.py
    │   ├── distributed_utils.py
    │   ├── kmeans_anchor.py
    │   ├── modules.py
    │   └── vocapi_evaluator.py
└── semantic segmentation
    ├── README.md
    ├── configs
        ├── coco_deeplabv1.py
        ├── coco_deeplabv1_2bit.py
        ├── coco_deeplabv1_3bit.py
        ├── coco_deeplabv1_4bit.py
        ├── coco_deeplabv1_T15.py
        ├── coco_deeplabv1_T3.py
        ├── coco_deeplabv1_T7.py
        ├── coco_deeplabv3.py
        ├── coco_deeplabv3_2bit.py
        ├── coco_deeplabv3_3bit.py
        ├── coco_deeplabv3_4bit.py
        ├── coco_deeplabv3_T15.py
        ├── coco_deeplabv3_T3.py
        ├── coco_deeplabv3_T7.py
        ├── coco_multilabel_unet.py
        ├── coco_unet.py
        ├── voc_deeplabv1.py
        ├── voc_deeplabv1_2bit.py
        ├── voc_deeplabv1_3bit.py
        ├── voc_deeplabv1_4bit.py
        ├── voc_deeplabv1_T15.py
        ├── voc_deeplabv1_T3.py
        ├── voc_deeplabv1_T7.py
        ├── voc_deeplabv2.py
        ├── voc_deeplabv3.bak
        ├── voc_deeplabv3.py
        ├── voc_deeplabv3_2bit.py
        ├── voc_deeplabv3_3bit.py
        ├── voc_deeplabv3_4bit.py
        ├── voc_deeplabv3_T15.py
        ├── voc_deeplabv3_T3.py
        ├── voc_deeplabv3_T7.py
        ├── voc_deeplabv3plus.py
        ├── voc_deeplabvr.py
        ├── voc_deeplabvr_2bit.py
        ├── voc_deeplabvr_3bit.py
        ├── voc_deeplabvr_4bit.py
        ├── voc_fpn.py
        ├── voc_pspnet.py
        ├── voc_pspnet_v1c.py
        └── voc_unet.py
    ├── tools
        ├── decode.py
        ├── dist_test.sh
        ├── dist_train.sh
        ├── encode_voc12.py
        ├── encode_voc12_aug.py
        ├── inference.py
        ├── test.py
        ├── torch2onnx.py
        └── train.py
    └── vedaseg
        ├── __init__.py
        ├── criteria
            ├── __init__.py
            ├── bce_loss.py
            ├── builder.py
            └── registry.py
        ├── dataloaders
            ├── __init__.py
            ├── builder.py
            ├── registry.py
            └── samplers
            │   ├── __init__.py
            │   ├── builder.py
            │   ├── distributed.py
            │   ├── non_distributed.py
            │   └── registry.py
        ├── datasets
            ├── __init__.py
            ├── base.py
            ├── builder.py
            ├── coco.py
            ├── registry.py
            └── voc.py
        ├── loggers
            ├── __init__.py
            └── builder.py
        ├── lr_schedulers
            ├── __init__.py
            ├── base.py
            ├── builder.py
            ├── poly_lr.py
            └── registry.py
        ├── metrics
            ├── __init__.py
            ├── base.py
            ├── builder.py
            ├── metrics.py
            └── registry.py
        ├── models
            ├── __init__.py
            ├── builder.py
            ├── decoders
            │   ├── __init__.py
            │   ├── bricks.py
            │   ├── builder.py
            │   ├── gfpn
            │   │   ├── __init__.py
            │   │   └── gfpn.py
            │   └── registry.py
            ├── encoders
            │   ├── __init__.py
            │   ├── backbones
            │   │   ├── __init__.py
            │   │   ├── builder.py
            │   │   ├── myresnet.py
            │   │   ├── quant_layer.py
            │   │   ├── registry.py
            │   │   ├── resnet.py
            │   │   ├── spiking.py
            │   │   └── vgg.py
            │   ├── builder.py
            │   └── enhance_modules
            │   │   ├── __init__.py
            │   │   ├── aspp.py
            │   │   ├── builder.py
            │   │   ├── ppm.py
            │   │   └── registry.py
            ├── heads
            │   ├── __init__.py
            │   ├── builder.py
            │   ├── head.py
            │   └── registry.py
            ├── registry.py
            ├── utils
            │   ├── __init__.py
            │   ├── act.py
            │   ├── builder.py
            │   ├── conv_module.py
            │   ├── norm.py
            │   ├── registry.py
            │   └── upsample.py
            └── weight_init.py
        ├── optims
            ├── __init__.py
            └── builder.py
        ├── runners
            ├── __init__.py
            ├── base.py
            ├── inference_runner.py
            ├── test_runner.py
            └── train_runner.py
        ├── transforms
            ├── __init__.py
            ├── builder.py
            ├── registry.py
            └── transforms.py
        └── utils
            ├── __init__.py
            ├── checkpoint.py
            ├── config.py
            ├── dist_utils.py
            └── registry.py


/image classification/CIFAR10/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .quant_layer import *
2 | from .alexnet import *
3 | from .vgg import *
4 | from .spiking import *


--------------------------------------------------------------------------------
/image classification/CIFAR10/models/spiking.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | 
  6 | def unsigned_spikes(model):
  7 |     for m in model.modules():
  8 |          if isinstance(m, Spiking):
  9 |              m.sign = False
 10 | 
 11 | #####the spiking wrapper######
 12 | 
 13 | class Spiking(nn.Module):
 14 |     def __init__(self, block, T):
 15 |         super(Spiking, self).__init__()
 16 |         self.block = block
 17 |         self.T = T
 18 |         self.is_first = False
 19 |         self.idem = False
 20 |         self.sign = True
 21 |     def forward(self, x):
 22 |         if self.idem:
 23 |             return x
 24 |         
 25 |         ###initialize membrane to half threshold
 26 |         threshold = self.block[2].act_alpha.data
 27 |         membrane = 0.5 * threshold
 28 |         sum_spikes = 0
 29 |         
 30 |         #prepare charges
 31 |         if self.is_first:
 32 |             x.unsqueeze_(1)
 33 |             x = x.repeat(1, self.T, 1, 1, 1)
 34 |         train_shape = [x.shape[0], x.shape[1]]
 35 |         x = x.flatten(0, 1)
 36 |         x = self.block(x)
 37 |         train_shape.extend(x.shape[1:])
 38 |         x = x.reshape(train_shape)
 39 |         
 40 |         #integrate charges
 41 |         for dt in range(self.T):
 42 |             membrane = membrane + x[:,dt]
 43 |             if dt == 0:
 44 |                 spike_train = torch.zeros(membrane.shape[:1] + torch.Size([self.T]) + membrane.shape[1:],device=membrane.device)
 45 |                 
 46 |             spikes = membrane >= threshold
 47 |             membrane[spikes] = membrane[spikes] - threshold
 48 |             spikes = spikes.float()
 49 |             sum_spikes = sum_spikes + spikes
 50 |             
 51 |             ###signed spikes###
 52 |             if self.sign:
 53 |                 inhibit = membrane <= -1e-3
 54 |                 inhibit = inhibit & (sum_spikes > 0)
 55 |                 membrane[inhibit] = membrane[inhibit] + threshold
 56 |                 inhibit = inhibit.float()
 57 |                 sum_spikes = sum_spikes - inhibit
 58 |             else:
 59 |                 inhibit = 0
 60 | 
 61 |             spike_train[:,dt] = spikes - inhibit
 62 |                 
 63 |         spike_train = spike_train * threshold
 64 |         return spike_train
 65 | 
 66 | 
 67 | class last_Spiking(nn.Module):
 68 |     def __init__(self, block, T):
 69 |         super(last_Spiking, self).__init__()
 70 |         self.block = block
 71 |         self.T = T
 72 |         self.idem = False
 73 |         
 74 |     def forward(self, x):
 75 |         if self.idem:
 76 |             return x
 77 |         #prepare charges
 78 |         train_shape = [x.shape[0], x.shape[1]]
 79 |         x = x.flatten(0, 1)
 80 |         x = self.block(x)
 81 |         train_shape.extend(x.shape[1:])
 82 |         x = x.reshape(train_shape)
 83 |         
 84 |         #integrate charges
 85 |         return x.sum(dim=1)
 86 |     
 87 | class IF(nn.Module):
 88 |     def __init__(self):
 89 |         super(IF, self).__init__()
 90 |         ###changes threshold to act_alpha
 91 |         ###being fleet
 92 |         self.act_alpha = torch.nn.Parameter(torch.tensor(1.0))
 93 | 
 94 |     def forward(self, x):
 95 |         return x
 96 | 
 97 |     def show_params(self):
 98 |         act_alpha = round(self.act_alpha.data.item(), 3)
 99 |         print('clipping threshold activation alpha: {:2f}'.format(act_alpha)) 
100 |     
101 |     def extra_repr(self) -> str:
102 |         return 'threshold={:.3f}'.format(self.act_alpha)  


--------------------------------------------------------------------------------
/image classification/CIFAR10_res/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import *
2 | from .quant_layer import *


--------------------------------------------------------------------------------
/image classification/CIFAR10_resnet18/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import *
2 | from .quant_layer import *


--------------------------------------------------------------------------------
/image classification/ImageNet/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Additive Power-of-Two Quantization: An Efficient Non-uniform Discretization For Neural Networks
2 | # Yuhang Li, Xin Dong, Wei Wang
3 | # International Conference on Learning Representations (ICLR), 2020.
4 | 
5 | 
6 | from .spiking import *
7 | from .quant_layer import *
8 | from .alexnet import *
9 | from .vgg import *


--------------------------------------------------------------------------------
/image classification/ImageNet/models/spiking.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | 
  6 | def unsigned_spikes(model):
  7 |     for m in model.modules():
  8 |          if isinstance(m, Spiking):
  9 |              m.sign = False
 10 | 
 11 | #####the spiking wrapper######
 12 | 
 13 | class Spiking(nn.Module):
 14 |     def __init__(self, block, T):
 15 |         super(Spiking, self).__init__()
 16 |         self.block = block
 17 |         self.T = T
 18 |         self.is_first = False
 19 |         self.idem = False
 20 |         self.sign = True
 21 |         
 22 |     def forward(self, x):
 23 |         if self.idem:
 24 |             return x
 25 |         
 26 |         ###initialize membrane to half threshold
 27 |         threshold = self.block[2].act_alpha.data
 28 |         membrane = 0.5 * threshold
 29 |         sum_spikes = 0
 30 |         
 31 |         #prepare charges
 32 |         if self.is_first:
 33 |             x.unsqueeze_(1)
 34 |             x = x.repeat(1, self.T, 1, 1, 1)
 35 |         train_shape = [x.shape[0], x.shape[1]]
 36 |         x = x.flatten(0, 1)
 37 |         x = self.block(x)
 38 |         train_shape.extend(x.shape[1:])
 39 |         x = x.reshape(train_shape)
 40 |         
 41 |         #integrate charges
 42 |         for dt in range(self.T):
 43 |             membrane = membrane + x[:,dt]
 44 |             if dt == 0:
 45 |                 spike_train = torch.zeros(membrane.shape[:1] + torch.Size([self.T]) + membrane.shape[1:],device=membrane.device)
 46 |                 
 47 |             spikes = membrane >= threshold
 48 |             membrane[spikes] = membrane[spikes] - threshold
 49 |             spikes = spikes.float()
 50 |             sum_spikes = sum_spikes + spikes
 51 |             
 52 |             ###signed spikes###
 53 |             if self.sign:
 54 |                 inhibit = membrane <= -1e-3
 55 |                 inhibit = inhibit & (sum_spikes > 0)
 56 |                 membrane[inhibit] = membrane[inhibit] + threshold
 57 |                 inhibit = inhibit.float()
 58 |                 sum_spikes = sum_spikes - inhibit
 59 |             else:
 60 |                 inhibit = 0
 61 | 
 62 |             spike_train[:,dt] = spikes - inhibit
 63 |                 
 64 |         spike_train = spike_train * threshold
 65 |         return spike_train
 66 | 
 67 | class last_Spiking(nn.Module):
 68 |     def __init__(self, block, T):
 69 |         super(last_Spiking, self).__init__()
 70 |         self.block = block
 71 |         self.T = T
 72 |         self.idem = False
 73 |         
 74 |     def forward(self, x):
 75 |         if self.idem:
 76 |             return x
 77 |         #prepare charges
 78 |         train_shape = [x.shape[0], x.shape[1]]
 79 |         x = x.flatten(0, 1)
 80 |         x = self.block(x)
 81 |         train_shape.extend(x.shape[1:])
 82 |         x = x.reshape(train_shape)
 83 |         
 84 |         #integrate charges
 85 |         return x.sum(dim=1)
 86 |     
 87 | class IF(nn.Module):
 88 |     def __init__(self):
 89 |         super(IF, self).__init__()
 90 |         ###changes threshold to act_alpha
 91 |         ###being fleet
 92 |         self.act_alpha = torch.nn.Parameter(torch.tensor(1.0))
 93 | 
 94 |     def forward(self, x):
 95 |         return x
 96 | 
 97 |     def show_params(self):
 98 |         act_alpha = round(self.act_alpha.data.item(), 3)
 99 |         print('clipping threshold activation alpha: {:2f}'.format(act_alpha)) 
100 |     
101 |     def extra_repr(self) -> str:
102 |         return 'threshold={:.3f}'.format(self.act_alpha)  


--------------------------------------------------------------------------------
/object detection/backbone/__init__.py:
--------------------------------------------------------------------------------
 1 | from .resnet import build_resnet
 2 | from .darknet19 import build_darknet19
 3 | from .darknet53 import build_darknet53
 4 | from .darknet_tiny import build_darknet_tiny
 5 | from .darknet_tiny_v2 import build_darknet_tiny_v2
 6 | 
 7 | 
 8 | def build_backbone(model_name='resnet18', pretrained=False):
 9 |     if 'resnet' in model_name:
10 |         backbone = build_resnet(model_name, pretrained)
11 | 
12 |     elif model_name == 'darknet19':
13 |         backbone = build_darknet19(pretrained)
14 | 
15 |     elif model_name == 'darknet53':
16 |         backbone = build_darknet53(pretrained)
17 | 
18 |     elif model_name == 'darknet_tiny':
19 |         backbone = build_darknet_tiny(pretrained)
20 | 
21 |     elif model_name == 'darknet_tiny_v2':
22 |         backbone = build_darknet_tiny_v2(pretrained)
23 |                         
24 |     return backbone
25 | 


--------------------------------------------------------------------------------
/object detection/backbone/darknet19.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import os
  4 | 
  5 | 
  6 | model_urls = {
  7 |     "darknet19": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/darknet19.pth",
  8 | }
  9 | 
 10 | 
 11 | __all__ = ['darknet19']
 12 | 
 13 | 
 14 | class Conv_BN_LeakyReLU(nn.Module):
 15 |     def __init__(self, in_channels, out_channels, ksize, padding=0, stride=1, dilation=1):
 16 |         super(Conv_BN_LeakyReLU, self).__init__()
 17 |         self.convs = nn.Sequential(
 18 |             nn.Conv2d(in_channels, out_channels, ksize, padding=padding, stride=stride, dilation=dilation),
 19 |             nn.BatchNorm2d(out_channels),
 20 |             nn.LeakyReLU(0.1, inplace=True)
 21 |         )
 22 | 
 23 |     def forward(self, x):
 24 |         return self.convs(x)
 25 | 
 26 | 
 27 | class DarkNet_19(nn.Module):
 28 |     def __init__(self):        
 29 |         super(DarkNet_19, self).__init__()
 30 |         # backbone network : DarkNet-19
 31 |         # output : stride = 2, c = 32
 32 |         self.conv_1 = nn.Sequential(
 33 |             Conv_BN_LeakyReLU(3, 32, 3, 1),
 34 |             nn.MaxPool2d((2,2), 2),
 35 |         )
 36 | 
 37 |         # output : stride = 4, c = 64
 38 |         self.conv_2 = nn.Sequential(
 39 |             Conv_BN_LeakyReLU(32, 64, 3, 1),
 40 |             nn.MaxPool2d((2,2), 2)
 41 |         )
 42 | 
 43 |         # output : stride = 8, c = 128
 44 |         self.conv_3 = nn.Sequential(
 45 |             Conv_BN_LeakyReLU(64, 128, 3, 1),
 46 |             Conv_BN_LeakyReLU(128, 64, 1),
 47 |             Conv_BN_LeakyReLU(64, 128, 3, 1),
 48 |             nn.MaxPool2d((2,2), 2)
 49 |         )
 50 | 
 51 |         # output : stride = 8, c = 256
 52 |         self.conv_4 = nn.Sequential(
 53 |             Conv_BN_LeakyReLU(128, 256, 3, 1),
 54 |             Conv_BN_LeakyReLU(256, 128, 1),
 55 |             Conv_BN_LeakyReLU(128, 256, 3, 1),
 56 |         )
 57 | 
 58 |         # output : stride = 16, c = 512
 59 |         self.maxpool_4 = nn.MaxPool2d((2, 2), 2)
 60 |         self.conv_5 = nn.Sequential(
 61 |             Conv_BN_LeakyReLU(256, 512, 3, 1),
 62 |             Conv_BN_LeakyReLU(512, 256, 1),
 63 |             Conv_BN_LeakyReLU(256, 512, 3, 1),
 64 |             Conv_BN_LeakyReLU(512, 256, 1),
 65 |             Conv_BN_LeakyReLU(256, 512, 3, 1),
 66 |         )
 67 |         
 68 |         # output : stride = 32, c = 1024
 69 |         self.maxpool_5 = nn.MaxPool2d((2, 2), 2)
 70 |         self.conv_6 = nn.Sequential(
 71 |             Conv_BN_LeakyReLU(512, 1024, 3, 1),
 72 |             Conv_BN_LeakyReLU(1024, 512, 1),
 73 |             Conv_BN_LeakyReLU(512, 1024, 3, 1),
 74 |             Conv_BN_LeakyReLU(1024, 512, 1),
 75 |             Conv_BN_LeakyReLU(512, 1024, 3, 1)
 76 |         )
 77 | 
 78 |     def forward(self, x):
 79 |         c1 = self.conv_1(x)
 80 |         c2 = self.conv_2(c1)
 81 |         c3 = self.conv_3(c2)
 82 |         c3 = self.conv_4(c3)
 83 |         c4 = self.conv_5(self.maxpool_4(c3))
 84 |         c5 = self.conv_6(self.maxpool_5(c4))
 85 | 
 86 |         output = {
 87 |             'layer1': c3,
 88 |             'layer2': c4,
 89 |             'layer3': c5
 90 |         }
 91 | 
 92 |         return output
 93 | 
 94 | 
 95 | def build_darknet19(pretrained=False):
 96 |     # model
 97 |     model = DarkNet_19()
 98 | 
 99 |     # load weight
100 |     if pretrained:
101 |         print('Loading pretrained weight ...')
102 |         url = model_urls['darknet19']
103 |         # checkpoint state dict
104 |         checkpoint_state_dict = torch.hub.load_state_dict_from_url(
105 |             url=url, map_location="cpu", check_hash=True)
106 |         # model state dict
107 |         model_state_dict = model.state_dict()
108 |         # check
109 |         for k in list(checkpoint_state_dict.keys()):
110 |             if k in model_state_dict:
111 |                 shape_model = tuple(model_state_dict[k].shape)
112 |                 shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
113 |                 if shape_model != shape_checkpoint:
114 |                     checkpoint_state_dict.pop(k)
115 |             else:
116 |                 checkpoint_state_dict.pop(k)
117 |                 print(k)
118 | 
119 |         model.load_state_dict(checkpoint_state_dict)
120 | 
121 |     return model
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     import time
126 |     net = build_darknet19(pretrained=True)
127 |     x = torch.randn(1, 3, 224, 224)
128 |     t0 = time.time()
129 |     output = net(x)
130 |     t1 = time.time()
131 |     print('Time: ', t1 - t0)
132 | 
133 |     for k in output.keys():
134 |         print('{} : {}'.format(k, output[k].shape))
135 | 


--------------------------------------------------------------------------------
/object detection/backbone/darknet53.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | model_urls = {
  6 |     "darknet53": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/darknet53.pth",
  7 | }
  8 | 
  9 | 
 10 | __all__ = ['darknet53']
 11 | 
 12 | 
 13 | class Conv_BN_LeakyReLU(nn.Module):
 14 |     def __init__(self, in_channels, out_channels, ksize, padding=0, stride=1, dilation=1):
 15 |         super(Conv_BN_LeakyReLU, self).__init__()
 16 |         self.convs = nn.Sequential(
 17 |             nn.Conv2d(in_channels, out_channels, ksize, padding=padding, stride=stride, dilation=dilation),
 18 |             nn.BatchNorm2d(out_channels),
 19 |             nn.LeakyReLU(0.1, inplace=True)
 20 |         )
 21 | 
 22 |     def forward(self, x):
 23 |         return self.convs(x)
 24 | 
 25 | 
 26 | class ResBlock(nn.Module):
 27 |     def __init__(self, ch, nblocks=1):
 28 |         super().__init__()
 29 |         self.module_list = nn.ModuleList()
 30 |         for _ in range(nblocks):
 31 |             resblock_one = nn.Sequential(
 32 |                 Conv_BN_LeakyReLU(ch, ch//2, 1),
 33 |                 Conv_BN_LeakyReLU(ch//2, ch, 3, padding=1)
 34 |             )
 35 |             self.module_list.append(resblock_one)
 36 | 
 37 |     def forward(self, x):
 38 |         for module in self.module_list:
 39 |             x = module(x) + x
 40 |         return x
 41 | 
 42 | 
 43 | class DarkNet_53(nn.Module):
 44 |     """
 45 |     DarkNet-53.
 46 |     """
 47 |     def __init__(self):
 48 |         super(DarkNet_53, self).__init__()
 49 |         # stride = 2
 50 |         self.layer_1 = nn.Sequential(
 51 |             Conv_BN_LeakyReLU(3, 32, 3, padding=1),
 52 |             Conv_BN_LeakyReLU(32, 64, 3, padding=1, stride=2),
 53 |             ResBlock(64, nblocks=1)
 54 |         )
 55 |         # stride = 4
 56 |         self.layer_2 = nn.Sequential(
 57 |             Conv_BN_LeakyReLU(64, 128, 3, padding=1, stride=2),
 58 |             ResBlock(128, nblocks=2)
 59 |         )
 60 |         # stride = 8
 61 |         self.layer_3 = nn.Sequential(
 62 |             Conv_BN_LeakyReLU(128, 256, 3, padding=1, stride=2),
 63 |             ResBlock(256, nblocks=8)
 64 |         )
 65 |         # stride = 16
 66 |         self.layer_4 = nn.Sequential(
 67 |             Conv_BN_LeakyReLU(256, 512, 3, padding=1, stride=2),
 68 |             ResBlock(512, nblocks=8)
 69 |         )
 70 |         # stride = 32
 71 |         self.layer_5 = nn.Sequential(
 72 |             Conv_BN_LeakyReLU(512, 1024, 3, padding=1, stride=2),
 73 |             ResBlock(1024, nblocks=4)
 74 |         )
 75 | 
 76 | 
 77 |     def forward(self, x, targets=None):
 78 |         c1 = self.layer_1(x)
 79 |         c2 = self.layer_2(c1)
 80 |         c3 = self.layer_3(c2)
 81 |         c4 = self.layer_4(c3)
 82 |         c5 = self.layer_5(c4)
 83 | 
 84 |         output = {
 85 |             'layer1': c3,
 86 |             'layer2': c4,
 87 |             'layer3': c5
 88 |         }
 89 | 
 90 |         return output
 91 | 
 92 | 
 93 | def build_darknet53(pretrained=False):
 94 |     # model
 95 |     model = DarkNet_53()
 96 | 
 97 |     # load weight
 98 |     if pretrained:
 99 |         print('Loading pretrained weight ...')
100 |         url = model_urls['darknet53']
101 |         # checkpoint state dict
102 |         checkpoint_state_dict = torch.hub.load_state_dict_from_url(
103 |             url=url, map_location="cpu", check_hash=True)
104 |         # model state dict
105 |         model_state_dict = model.state_dict()
106 |         # check
107 |         for k in list(checkpoint_state_dict.keys()):
108 |             if k in model_state_dict:
109 |                 shape_model = tuple(model_state_dict[k].shape)
110 |                 shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
111 |                 if shape_model != shape_checkpoint:
112 |                     checkpoint_state_dict.pop(k)
113 |             else:
114 |                 checkpoint_state_dict.pop(k)
115 |                 print(k)
116 | 
117 |         model.load_state_dict(checkpoint_state_dict)
118 | 
119 |     return model
120 | 
121 | 
122 | if __name__ == '__main__':
123 |     import time
124 |     net = build_darknet53(pretrained=True)
125 |     x = torch.randn(1, 3, 224, 224)
126 |     t0 = time.time()
127 |     output = net(x)
128 |     t1 = time.time()
129 |     print('Time: ', t1 - t0)
130 | 
131 |     for k in output.keys():
132 |         print('{} : {}'.format(k, output[k].shape))
133 | 


--------------------------------------------------------------------------------
/object detection/backbone/darknet_tiny.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | model_urls = {
  6 |     "darknet_tiny": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/darknet_tiny.pth",
  7 | }
  8 | 
  9 | 
 10 | __all__ = ['build_darknet_tiny']
 11 | 
 12 | 
 13 | class Conv_BN_LeakyReLU(nn.Module):
 14 |     def __init__(self, in_channels, out_channels, ksize, padding=0, stride=1, dilation=1):
 15 |         super(Conv_BN_LeakyReLU, self).__init__()
 16 |         self.convs = nn.Sequential(
 17 |             nn.Conv2d(in_channels, out_channels, ksize, padding=padding, stride=stride, dilation=dilation),
 18 |             nn.BatchNorm2d(out_channels),
 19 |             nn.LeakyReLU(0.1, inplace=True)
 20 |         )
 21 | 
 22 |     def forward(self, x):
 23 |         return self.convs(x)
 24 | 
 25 | 
 26 | class DarkNet_Tiny(nn.Module):
 27 |     def __init__(self):
 28 |         
 29 |         super(DarkNet_Tiny, self).__init__()
 30 |         # backbone network : DarkNet_Tiny
 31 |         self.conv_1 = Conv_BN_LeakyReLU(3, 16, 3, 1)
 32 |         self.maxpool_1 = nn.MaxPool2d((2, 2), 2)              # stride = 2
 33 | 
 34 |         self.conv_2 = Conv_BN_LeakyReLU(16, 32, 3, 1)
 35 |         self.maxpool_2 = nn.MaxPool2d((2, 2), 2)              # stride = 4
 36 | 
 37 |         self.conv_3 = Conv_BN_LeakyReLU(32, 64, 3, 1)
 38 |         self.maxpool_3 = nn.MaxPool2d((2, 2), 2)              # stride = 8
 39 | 
 40 |         self.conv_4 = Conv_BN_LeakyReLU(64, 128, 3, 1)
 41 |         self.maxpool_4 = nn.MaxPool2d((2, 2), 2)              # stride = 16
 42 | 
 43 |         self.conv_5 = Conv_BN_LeakyReLU(128, 256, 3, 1)
 44 |         self.maxpool_5 = nn.MaxPool2d((2, 2), 2)              # stride = 32
 45 | 
 46 |         self.conv_6 = Conv_BN_LeakyReLU(256, 512, 3, 1)
 47 |         self.maxpool_6 = nn.Sequential(
 48 |             nn.ZeroPad2d((0, 1, 0, 1)),
 49 |             nn.MaxPool2d((2, 2), 1)                           # stride = 32
 50 |         )
 51 | 
 52 |         self.conv_7 = Conv_BN_LeakyReLU(512, 1024, 3, 1)
 53 | 
 54 | 
 55 |     def forward(self, x):
 56 |         x = self.conv_1(x)
 57 |         c1 = self.maxpool_1(x)
 58 |         c1 = self.conv_2(c1)
 59 |         c2 = self.maxpool_2(c1)
 60 |         c2 = self.conv_3(c2)
 61 |         c3 = self.maxpool_3(c2)
 62 |         c3 = self.conv_4(c3)
 63 |         c4 = self.maxpool_4(c3)
 64 |         c4 = self.conv_5(c4)       # stride = 16
 65 |         c5 = self.maxpool_5(c4)  
 66 |         c5 = self.conv_6(c5)
 67 |         c5 = self.maxpool_6(c5)
 68 |         c5 = self.conv_7(c5)       # stride = 32
 69 | 
 70 |         output = {
 71 |             'layer1': c3,
 72 |             'layer2': c4,
 73 |             'layer3': c5
 74 |         }
 75 | 
 76 |         return output
 77 | 
 78 | 
 79 | def build_darknet_tiny(pretrained=False):
 80 |     # model
 81 |     model = DarkNet_Tiny()
 82 | 
 83 |     # load weight
 84 |     if pretrained:
 85 |         print('Loading pretrained weight ...')
 86 |         url = model_urls['darknet_tiny']
 87 |         # checkpoint state dict
 88 |         checkpoint_state_dict = torch.hub.load_state_dict_from_url(
 89 |             url=url, map_location="cpu", check_hash=True)
 90 |         # model state dict
 91 |         model_state_dict = model.state_dict()
 92 |         # check
 93 |         for k in list(checkpoint_state_dict.keys()):
 94 |             if k in model_state_dict:
 95 |                 shape_model = tuple(model_state_dict[k].shape)
 96 |                 shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
 97 |                 if shape_model != shape_checkpoint:
 98 |                     checkpoint_state_dict.pop(k)
 99 |             else:
100 |                 checkpoint_state_dict.pop(k)
101 |                 print(k)
102 | 
103 |         model.load_state_dict(checkpoint_state_dict)
104 | 
105 |     return model
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     import time
110 |     net = build_darknet_tiny(pretrained=True)
111 |     x = torch.randn(1, 3, 224, 224)
112 |     t0 = time.time()
113 |     output = net(x)
114 |     t1 = time.time()
115 |     print('Time: ', t1 - t0)
116 | 
117 |     for k in output.keys():
118 |         print('{} : {}'.format(k, output[k].shape))
119 | 


--------------------------------------------------------------------------------
/object detection/data/__init__.py:
--------------------------------------------------------------------------------
 1 | from .voc0712 import VOCDetection, VOCAnnotationTransform, VOC_CLASSES
 2 | from .coco2017 import COCODataset, coco_class_labels, coco_class_index
 3 | from .config import *
 4 | import torch
 5 | import cv2
 6 | import numpy as np
 7 | 
 8 | 
 9 | def detection_collate(batch):
10 |     """Custom collate fn for dealing with batches of images that have a different
11 |     number of associated object annotations (bounding boxes).
12 | 
13 |     Arguments:
14 |         batch: (tuple) A tuple of tensor images and lists of annotations
15 | 
16 |     Return:
17 |         A tuple containing:
18 |             1) (tensor) batch of images stacked on their 0 dim
19 |             2) (list of tensors) annotations for a given image are stacked on
20 |                                  0 dim
21 |     """
22 |     targets = []
23 |     imgs = []
24 |     for sample in batch:
25 |         imgs.append(sample[0])
26 |         targets.append(torch.FloatTensor(sample[1]))
27 |     return torch.stack(imgs, 0), targets
28 | 
29 | 
30 | def base_transform(image, size, mean, std):
31 |     x = cv2.resize(image, (size, size)).astype(np.float32)
32 |     x /= 255.
33 |     x -= mean
34 |     x /= std
35 |     return x
36 | 
37 | 
38 | class BaseTransform:
39 |     def __init__(self, size, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)):
40 |         self.size = size
41 |         self.mean = np.array(mean, dtype=np.float32)
42 |         self.std = np.array(std, dtype=np.float32)
43 | 
44 |     def __call__(self, image, boxes=None, labels=None):
45 |         return base_transform(image, self.size, self.mean, self.std), boxes, labels
46 | 


--------------------------------------------------------------------------------
/object detection/data/config.py:
--------------------------------------------------------------------------------
  1 | # config.py
  2 | 
  3 | # YOLOv2 with darknet-19
  4 | yolov2_d19_cfg = {
  5 |     # network
  6 |     'backbone': 'd19',
  7 |     # for multi-scale trick
  8 |     'train_size': 640,
  9 |     'val_size': 416,
 10 |     'random_size_range': [10, 19],
 11 |     # anchor size
 12 |     'anchor_size_voc': [[1.19, 1.98], [2.79, 4.59], [4.53, 8.92], [8.06, 5.29], [10.32, 10.65]],
 13 |     'anchor_size_coco': [[0.53, 0.79], [1.71, 2.36], [2.89, 6.44], [6.33, 3.79], [9.03, 9.74]],
 14 |     # train
 15 |     # 'lr_epoch': (60, 90),
 16 |     # 'max_epoch': 160,
 17 |     'lr_epoch': (150, 200),
 18 |     'max_epoch': 250,
 19 |     'ignore_thresh': 0.5
 20 | }
 21 | 
 22 | # tinyYOLOv2
 23 | yolov2_tiny_cfg = {
 24 |     # network
 25 |     'backbone': 'd-light',
 26 |     # for multi-scale trick
 27 |     'train_size': 640,
 28 |     'val_size': 416,
 29 |     'random_size_range': [10, 19],
 30 |     # anchor size
 31 |     'anchor_size_voc': [[1.19, 1.98], [2.79, 4.59], [4.53, 8.92], [8.06, 5.29], [10.32, 10.65]],
 32 |     # 'anchor_size_voc': [[1.08,1.19], [3.42,4.41], [6.63,11.38], [9.42,5.11], [16.62,10.52]],
 33 |     # 'anchor_size_voc':[[1.3221, 1.73145], [3.19275, 4.00944], [5.05587, 8.09892], [9.47112, 4.84053], [11.2364, 10.0071]],
 34 |     'anchor_size_coco': [[0.53, 0.79], [1.71, 2.36], [2.89, 6.44], [6.33, 3.79], [9.03, 9.74]],
 35 |     # train
 36 |     # 'lr_epoch': (60, 90),
 37 |     # 'max_epoch': 160,
 38 |     'lr_epoch': (150, 200),
 39 |     'max_epoch': 250,
 40 |     # 'lr_epoch': (80, 120),
 41 |     # 'max_epoch': 160,
 42 |     'ignore_thresh': 0.5
 43 | }
 44 | 
 45 | # YOLOv2 with resnet-50
 46 | yolov2_r50_cfg = {
 47 |     # network
 48 |     'backbone': 'r50',
 49 |     # for multi-scale trick
 50 |     'train_size': 640,
 51 |     'val_size': 416,
 52 |     'random_size_range': [10, 19],
 53 |     # anchor size
 54 |     'anchor_size_voc': [[1.19, 1.98], [2.79, 4.59], [4.53, 8.92], [8.06, 5.29], [10.32, 10.65]],
 55 |     'anchor_size_coco': [[0.53, 0.79], [1.71, 2.36], [2.89, 6.44], [6.33, 3.79], [9.03, 9.74]],
 56 |     # train
 57 |     # 'lr_epoch': (60, 90),
 58 |     # 'max_epoch': 160,
 59 |     'lr_epoch': (150, 200),
 60 |     'max_epoch': 250,
 61 |     'ignore_thresh': 0.5
 62 | }
 63 | 
 64 | # YOLOv3 / YOLOv3Spp
 65 | yolov3_d53_cfg = {
 66 |     # network
 67 |     'backbone': 'd53',
 68 |     # for multi-scale trick
 69 |     'train_size': 640,
 70 |     'val_size': 416,
 71 |     'random_size_range': [10, 19],
 72 |     # anchor size
 73 |     'anchor_size_voc': [[32.64, 47.68], [50.24, 108.16], [126.72, 96.32],     
 74 |                         [78.4, 201.92], [178.24, 178.56], [129.6, 294.72],     
 75 |                         [331.84, 194.56], [227.84, 325.76], [365.44, 358.72]],
 76 |     'anchor_size_coco': [[12.48, 19.2], [31.36, 46.4],[46.4, 113.92],
 77 |                          [97.28, 55.04], [133.12, 127.36], [79.04, 224.],
 78 |                          [301.12, 150.4 ], [172.16, 285.76], [348.16, 341.12]],
 79 |     # train
 80 |     'lr_epoch': (150, 200),
 81 |     'max_epoch': 250,
 82 |     'ignore_thresh': 0.5
 83 | }
 84 | 
 85 | # YOLOv3Tiny
 86 | yolov3_tiny_cfg = {
 87 |     # network
 88 |     'backbone': 'd-light',
 89 |     # for multi-scale trick
 90 |     'train_size': 640,
 91 |     'val_size': 416,
 92 |     'random_size_range':[10, 19],
 93 |     # anchor size
 94 |     'anchor_size_voc': [[34.01, 61.79],   [86.94, 109.68],  [93.49, 227.46],     
 95 |                         [246.38, 163.33], [178.68, 306.55], [344.89, 337.14]],
 96 |     'anchor_size_coco': [[15.09, 23.25],  [46.36, 61.47],   [68.41, 161.84],
 97 |                          [168.88, 93.59], [154.96, 257.45], [334.74, 302.47]],
 98 |     # train
 99 |     'lr_epoch': (150, 200),
100 |     'max_epoch': 250,
101 |     'ignore_thresh': 0.5
102 | }
103 | 


--------------------------------------------------------------------------------
/object detection/data/scripts/COCO2017.sh:
--------------------------------------------------------------------------------
 1 | mkdir COCO
 2 | cd COCO
 3 | 
 4 | wget http://images.cocodataset.org/zips/train2017.zip
 5 | wget http://images.cocodataset.org/zips/val2017.zip
 6 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
 7 | wget http://images.cocodataset.org/zips/test2017.zip
 8 | wget http://images.cocodataset.org/annotations/image_info_test2017.zip 
 9 | 
10 | unzip train2017.zip
11 | unzip val2017.zip
12 | unzip annotations_trainval2017.zip
13 | unzip test2017.zip
14 | unzip image_info_test2017.zip
15 | 
16 | # rm -f train2017.zip
17 | # rm -f val2017.zip
18 | # rm -f annotations_trainval2017.zip
19 | # rm -f test2017.zip
20 | # rm -f image_info_test2017.zip
21 | 


--------------------------------------------------------------------------------
/object detection/data/scripts/VOC2007.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to ~/data/ ..." 
11 |     mkdir -p ~/data
12 |     cd ~/data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2007 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
26 | echo "Downloading VOC2007 test data ..."
27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
28 | echo "Done downloading."
29 | 
30 | # Extract data
31 | echo "Extracting trainval ..."
32 | tar -xvf VOCtrainval_06-Nov-2007.tar
33 | echo "Extracting test ..."
34 | tar -xvf VOCtest_06-Nov-2007.tar
35 | echo "removing tars ..."
36 | rm VOCtrainval_06-Nov-2007.tar
37 | rm VOCtest_06-Nov-2007.tar
38 | 
39 | end=`date +%s`
40 | runtime=$((end-start))
41 | 
42 | echo "Completed in" $runtime "seconds"


--------------------------------------------------------------------------------
/object detection/data/scripts/VOC2012.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to ~/data/ ..." 
11 |     mkdir -p ~/data
12 |     cd ~/data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2012 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
26 | echo "Done downloading."
27 | 
28 | 
29 | # Extract data
30 | echo "Extracting trainval ..."
31 | tar -xvf VOCtrainval_11-May-2012.tar
32 | echo "removing tar ..."
33 | rm VOCtrainval_11-May-2012.tar
34 | 
35 | end=`date +%s`
36 | runtime=$((end-start))
37 | 
38 | echo "Completed in" $runtime "seconds"


--------------------------------------------------------------------------------
/object detection/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zju-bmi-lab/Fast-SNN/e4315cd4e74b4e185ab12bbe2dd74bc3fdccc547/object detection/utils/__init__.py


--------------------------------------------------------------------------------
/object detection/utils/cocoapi_evaluator.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import tempfile
  3 | 
  4 | from pycocotools.cocoeval import COCOeval
  5 | from torch.autograd import Variable
  6 | 
  7 | from data.coco2017 import *
  8 | from data import *
  9 | 
 10 | 
 11 | class COCOAPIEvaluator():
 12 |     """
 13 |     COCO AP Evaluation class.
 14 |     All the data in the val2017 dataset are processed \
 15 |     and evaluated by COCO API.
 16 |     """
 17 |     def __init__(self, data_dir, img_size, device, testset=False, transform=None):
 18 |         """
 19 |         Args:
 20 |             data_dir (str): dataset root directory
 21 |             img_size (int): image size after preprocess. images are resized \
 22 |                 to squares whose shape is (img_size, img_size).
 23 |             confthre (float):
 24 |                 confidence threshold ranging from 0 to 1, \
 25 |                 which is defined in the config file.
 26 |             nmsthre (float):
 27 |                 IoU threshold of non-max supression ranging from 0 to 1.
 28 |         """
 29 |         self.testset = testset
 30 |         if self.testset:
 31 |             json_file='image_info_test-dev2017.json'
 32 |             name = 'test2017'
 33 |         else:
 34 |             json_file='instances_val2017.json'
 35 |             name='val2017'
 36 | 
 37 |         self.dataset = COCODataset(data_dir=data_dir,
 38 |                                    json_file=json_file,
 39 |                                    name=name)
 40 |         self.img_size = img_size
 41 |         self.transform = transform
 42 |         self.device = device
 43 | 
 44 |         self.map = 0.
 45 |         self.ap50_95 = 0.
 46 |         self.ap50 = 0.
 47 | 
 48 |     def evaluate(self, model):
 49 |         """
 50 |         COCO average precision (AP) Evaluation. Iterate inference on the test dataset
 51 |         and the results are evaluated by COCO API.
 52 |         Args:
 53 |             model : model object
 54 |         Returns:
 55 |             ap50_95 (float) : calculated COCO AP for IoU=50:95
 56 |             ap50 (float) : calculated COCO AP for IoU=50
 57 |         """
 58 |         model.eval()
 59 |         ids = []
 60 |         data_dict = []
 61 |         num_images = len(self.dataset)
 62 |         print('total number of images: %d' % (num_images))
 63 | 
 64 |         # start testing
 65 |         for index in range(num_images): # all the data in val2017
 66 |             if index % 500 == 0:
 67 |                 print('[Eval: %d / %d]'%(index, num_images))
 68 | 
 69 |             img, id_ = self.dataset.pull_image(index)  # load a batch
 70 |             if self.transform is not None:
 71 |                 x = torch.from_numpy(self.transform(img)[0][:, :, (2, 1, 0)]).permute(2, 0, 1)
 72 |                 x = x.unsqueeze(0).to(self.device)
 73 |             scale = np.array([[img.shape[1], img.shape[0],
 74 |                             img.shape[1], img.shape[0]]])
 75 |             
 76 |             id_ = int(id_)
 77 |             ids.append(id_)
 78 |             with torch.no_grad():
 79 |                 outputs = model(x)
 80 |                 bboxes, scores, cls_inds = outputs
 81 |                 bboxes *= scale
 82 |             for i, box in enumerate(bboxes):
 83 |                 x1 = float(box[0])
 84 |                 y1 = float(box[1])
 85 |                 x2 = float(box[2])
 86 |                 y2 = float(box[3])
 87 |                 label = self.dataset.class_ids[int(cls_inds[i])]
 88 |                 
 89 |                 bbox = [x1, y1, x2 - x1, y2 - y1]
 90 |                 score = float(scores[i]) # object score * class score
 91 |                 A = {"image_id": id_, "category_id": label, "bbox": bbox,
 92 |                      "score": score} # COCO json format
 93 |                 data_dict.append(A)
 94 | 
 95 |         annType = ['segm', 'bbox', 'keypoints']
 96 | 
 97 |         # Evaluate the Dt (detection) json comparing with the ground truth
 98 |         if len(data_dict) > 0:
 99 |             print('evaluating ......')
100 |             cocoGt = self.dataset.coco
101 |             # For test
102 |             if self.testset:
103 |                 json.dump(data_dict, open('yolov2_2017.json', 'w'))
104 |                 cocoDt = cocoGt.loadRes('yolov2_2017.json')
105 |                 print('inference on test-dev is done !!')
106 |                 return -1, -1
107 |             # For val
108 |             else:
109 |                 _, tmp = tempfile.mkstemp()
110 |                 json.dump(data_dict, open(tmp, 'w'))
111 |                 cocoDt = cocoGt.loadRes(tmp)
112 |                 cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1])
113 |                 cocoEval.params.imgIds = ids
114 |                 cocoEval.evaluate()
115 |                 cocoEval.accumulate()
116 |                 cocoEval.summarize()
117 | 
118 |                 ap50_95, ap50 = cocoEval.stats[0], cocoEval.stats[1]
119 |                 print('ap50_95 : ', ap50_95)
120 |                 print('ap50 : ', ap50)
121 |                 self.map = ap50_95
122 |                 self.ap50_95 = ap50_95
123 |                 self.ap50 = ap50
124 | 
125 |                 return ap50, ap50_95
126 |         else:
127 |             return 0, 0
128 | 
129 | 


--------------------------------------------------------------------------------
/object detection/utils/com_paras_flops.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from thop import profile
 3 | 
 4 | 
 5 | def FLOPs_and_Params(model, size, device):
 6 |     x = torch.randn(1, 3, size, size).to(device)
 7 |     model.trainable = False
 8 |     model.eval()
 9 | 
10 |     flops, params = profile(model, inputs=(x, ))
11 |     print('FLOPs : ', flops / 1e9, ' B')
12 |     print('Params : ', params / 1e6, ' M')
13 | 
14 |     model.trainable = True
15 |     model.train()
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     pass
20 | 


--------------------------------------------------------------------------------
/object detection/utils/distributed_utils.py:
--------------------------------------------------------------------------------
  1 | # from github: https://github.com/ruinmessi/ASFF/blob/master/utils/distributed_util.py
  2 | 
  3 | import torch
  4 | import torch.distributed as dist
  5 | import os
  6 | import subprocess
  7 | import pickle
  8 | 
  9 | 
 10 | def all_gather(data):
 11 |     """
 12 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
 13 |     Args:
 14 |         data: any picklable object
 15 |     Returns:
 16 |         list[data]: list of data gathered from each rank
 17 |     """
 18 |     world_size = get_world_size()
 19 |     if world_size == 1:
 20 |         return [data]
 21 | 
 22 |     # serialized to a Tensor
 23 |     buffer = pickle.dumps(data)
 24 |     storage = torch.ByteStorage.from_buffer(buffer)
 25 |     tensor = torch.ByteTensor(storage).to("cuda")
 26 | 
 27 |     # obtain Tensor size of each rank
 28 |     local_size = torch.tensor([tensor.numel()], device="cuda")
 29 |     size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
 30 |     dist.all_gather(size_list, local_size)
 31 |     size_list = [int(size.item()) for size in size_list]
 32 |     max_size = max(size_list)
 33 | 
 34 |     # receiving Tensor from all ranks
 35 |     # we pad the tensor because torch all_gather does not support
 36 |     # gathering tensors of different shapes
 37 |     tensor_list = []
 38 |     for _ in size_list:
 39 |         tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
 40 |     if local_size != max_size:
 41 |         padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
 42 |         tensor = torch.cat((tensor, padding), dim=0)
 43 |     dist.all_gather(tensor_list, tensor)
 44 | 
 45 |     data_list = []
 46 |     for size, tensor in zip(size_list, tensor_list):
 47 |         buffer = tensor.cpu().numpy().tobytes()[:size]
 48 |         data_list.append(pickle.loads(buffer))
 49 | 
 50 |     return data_list
 51 | 
 52 | 
 53 | def reduce_dict(input_dict, average=True):
 54 |     """
 55 |     Args:
 56 |         input_dict (dict): all the values will be reduced
 57 |         average (bool): whether to do average or sum
 58 |     Reduce the values in the dictionary from all processes so that all processes
 59 |     have the averaged results. Returns a dict with the same fields as
 60 |     input_dict, after reduction.
 61 |     """
 62 |     world_size = get_world_size()
 63 |     if world_size < 2:
 64 |         return input_dict
 65 |     with torch.no_grad():
 66 |         names = []
 67 |         values = []
 68 |         # sort the keys so that they are consistent across processes
 69 |         for k in sorted(input_dict.keys()):
 70 |             names.append(k)
 71 |             values.append(input_dict[k])
 72 |         values = torch.stack(values, dim=0)
 73 |         dist.all_reduce(values)
 74 |         if average:
 75 |             values /= world_size
 76 |         reduced_dict = {k: v for k, v in zip(names, values)}
 77 |     return reduced_dict
 78 | 
 79 | 
 80 | def get_sha():
 81 |     cwd = os.path.dirname(os.path.abspath(__file__))
 82 | 
 83 |     def _run(command):
 84 |         return subprocess.check_output(command, cwd=cwd).decode('ascii').strip()
 85 |     sha = 'N/A'
 86 |     diff = "clean"
 87 |     branch = 'N/A'
 88 |     try:
 89 |         sha = _run(['git', 'rev-parse', 'HEAD'])
 90 |         subprocess.check_output(['git', 'diff'], cwd=cwd)
 91 |         diff = _run(['git', 'diff-index', 'HEAD'])
 92 |         diff = "has uncommited changes" if diff else "clean"
 93 |         branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
 94 |     except Exception:
 95 |         pass
 96 |     message = f"sha: {sha}, status: {diff}, branch: {branch}"
 97 |     return message
 98 | 
 99 | 
100 | def setup_for_distributed(is_master):
101 |     """
102 |     This function disables printing when not in master process
103 |     """
104 |     import builtins as __builtin__
105 |     builtin_print = __builtin__.print
106 | 
107 |     def print(*args, **kwargs):
108 |         force = kwargs.pop('force', False)
109 |         if is_master or force:
110 |             builtin_print(*args, **kwargs)
111 | 
112 |     __builtin__.print = print
113 | 
114 | 
115 | def is_dist_avail_and_initialized():
116 |     if not dist.is_available():
117 |         return False
118 |     if not dist.is_initialized():
119 |         return False
120 |     return True
121 | 
122 | 
123 | def get_world_size():
124 |     if not is_dist_avail_and_initialized():
125 |         return 1
126 |     return dist.get_world_size()
127 | 
128 | 
129 | def get_rank():
130 |     if not is_dist_avail_and_initialized():
131 |         return 0
132 |     return dist.get_rank()
133 | 
134 | 
135 | def is_main_process():
136 |     return get_rank() == 0
137 | 
138 | 
139 | def save_on_master(*args, **kwargs):
140 |     if is_main_process():
141 |         torch.save(*args, **kwargs)
142 | 
143 | 
144 | def init_distributed_mode(args):
145 |     if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
146 |         args.rank = int(os.environ["RANK"])
147 |         args.world_size = int(os.environ['WORLD_SIZE'])
148 |         args.gpu = int(os.environ['LOCAL_RANK'])
149 |     elif 'SLURM_PROCID' in os.environ:
150 |         args.rank = int(os.environ['SLURM_PROCID'])
151 |         args.gpu = args.rank % torch.cuda.device_count()
152 |     else:
153 |         print('Not using distributed mode')
154 |         args.distributed = False
155 |         return
156 | 
157 |     args.distributed = True
158 | 
159 |     torch.cuda.set_device(args.gpu)
160 |     args.dist_backend = 'nccl'
161 |     print('| distributed init (rank {}): {}'.format(
162 |         args.rank, args.dist_url), flush=True)
163 |     torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
164 |                                          world_size=args.world_size, rank=args.rank)
165 |     torch.distributed.barrier()
166 |     setup_for_distributed(args.rank == 0)
167 | 


--------------------------------------------------------------------------------
/object detection/utils/modules.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | from copy import deepcopy
 5 | 
 6 | 
 7 | class Conv(nn.Module):
 8 |     def __init__(self, in_ch, out_ch, k=1, p=0, s=1, d=1, g=1, act=True):
 9 |         super(Conv, self).__init__()
10 |         if act:
11 |             self.convs = nn.Sequential(
12 |                 nn.Conv2d(in_ch, out_ch, k, stride=s, padding=p, dilation=d, groups=g),
13 |                 nn.BatchNorm2d(out_ch),
14 |                 nn.LeakyReLU(0.1, inplace=True)
15 |             )
16 |         else:
17 |             self.convs = nn.Sequential(
18 |                 nn.Conv2d(in_ch, out_ch, k, stride=s, padding=p, dilation=d, groups=g),
19 |                 nn.BatchNorm2d(out_ch)
20 |             )
21 | 
22 |     def forward(self, x):
23 |         return self.convs(x)
24 | 
25 | 
26 | class UpSample(nn.Module):
27 |     def __init__(self, size=None, scale_factor=None, mode='nearest', align_corner=None):
28 |         super(UpSample, self).__init__()
29 |         self.size = size
30 |         self.scale_factor = scale_factor
31 |         self.mode = mode
32 |         self.align_corner = align_corner
33 | 
34 |     def forward(self, x):
35 |         return torch.nn.functional.interpolate(x, size=self.size, scale_factor=self.scale_factor, 
36 |                                                 mode=self.mode, align_corners=self.align_corner)
37 | 
38 | 
39 | class reorg_layer(nn.Module):
40 |     def __init__(self, stride):
41 |         super(reorg_layer, self).__init__()
42 |         self.stride = stride
43 | 
44 |     def forward(self, x):
45 |         batch_size, channels, height, width = x.size()
46 |         _height, _width = height // self.stride, width // self.stride
47 |         
48 |         x = x.view(batch_size, channels, _height, self.stride, _width, self.stride).transpose(3, 4).contiguous()
49 |         x = x.view(batch_size, channels, _height * _width, self.stride * self.stride).transpose(2, 3).contiguous()
50 |         x = x.view(batch_size, channels, self.stride * self.stride, _height, _width).transpose(1, 2).contiguous()
51 |         x = x.view(batch_size, -1, _height, _width)
52 | 
53 |         return x
54 | 
55 | 
56 | class SPP(nn.Module):
57 |     """
58 |         Spatial Pyramid Pooling
59 |     """
60 |     def __init__(self):
61 |         super(SPP, self).__init__()
62 | 
63 |     def forward(self, x):
64 |         x_1 = torch.nn.functional.max_pool2d(x, 5, stride=1, padding=2)
65 |         x_2 = torch.nn.functional.max_pool2d(x, 9, stride=1, padding=4)
66 |         x_3 = torch.nn.functional.max_pool2d(x, 13, stride=1, padding=6)
67 |         x = torch.cat([x, x_1, x_2, x_3], dim=1)
68 | 
69 |         return x
70 | 
71 | 
72 | class ModelEMA(object):
73 |     def __init__(self, model, decay=0.9999, updates=0):
74 |         # create EMA
75 |         self.ema = deepcopy(model).eval()
76 |         self.updates = updates
77 |         self.decay = lambda x: decay * (1 - math.exp(-x / 2000.))
78 |         for p in self.ema.parameters():
79 |             p.requires_grad_(False)
80 | 
81 |     def update(self, model):
82 |         # Update EMA parameters
83 |         with torch.no_grad():
84 |             self.updates += 1
85 |             d = self.decay(self.updates)
86 | 
87 |             msd = model.state_dict()
88 |             for k, v in self.ema.state_dict().items():
89 |                 if v.dtype.is_floating_point:
90 |                     v *= d
91 |                     v += (1. - d) * msd[k].detach()
92 | 


--------------------------------------------------------------------------------
/semantic segmentation/configs/coco_deeplabv1.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | # 1. configuration for inference
  4 | nclasses = 81
  5 | ignore_label = 255
  6 | multi_label = False
  7 | 
  8 | crop_size_h, crop_size_w = 513, 513
  9 | test_size_h, test_size_w = 641, 641
 10 | image_pad_value = (123.675, 116.280, 103.530)
 11 | 
 12 | img_norm_cfg = dict(
 13 |     max_pixel_value=255.0, 
 14 |     std=(0.229, 0.224, 0.225),
 15 |     mean=(0.485, 0.456, 0.406),
 16 | )
 17 | norm_cfg = dict(type='SyncBN')
 18 | 
 19 | inference = dict(
 20 |     multi_label=multi_label,
 21 |     transforms=[
 22 |         dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w,
 23 |              value=image_pad_value, mask_value=ignore_label),
 24 |         dict(type='Normalize', **img_norm_cfg),
 25 |         dict(type='ToTensor'),
 26 |     ],
 27 |     model=dict(
 28 |         # model/encoder
 29 |         encoder=dict(
 30 |             backbone=dict(
 31 |                 type='MYVGG9',
 32 |                 pretrain=True,
 33 |                 nclasses = nclasses,
 34 |             ),
 35 |         ),
 36 |         collect=dict(type='CollectBlock', from_layer='c5'),
 37 |         # model/head
 38 |         head=dict(
 39 |             type='Head',
 40 |             no_convs=True,
 41 |             upsample=dict(
 42 |                 type='Upsample',
 43 |                 scale_factor=8,
 44 |                 scale_bias=-7,
 45 |                 mode='bilinear',
 46 |                 align_corners=True,
 47 |             ),
 48 |         ),
 49 |     ),
 50 | )
 51 | 
 52 | # 2. configuration for train/test
 53 | root_workdir = 'workdir'
 54 | dataset_type = 'CocoDataset'
 55 | dataset_root = 'data/COCO2017'
 56 | 
 57 | common = dict(
 58 |     seed=0,
 59 |     logger=dict(
 60 |         handlers=(
 61 |             dict(type='StreamHandler', level='INFO'),
 62 |             dict(type='FileHandler', level='INFO'),
 63 |         ),
 64 |     ),
 65 |     cudnn_deterministic=False,
 66 |     cudnn_benchmark=True,
 67 |     metrics=[
 68 |         dict(type='IoU', num_classes=nclasses),
 69 |         dict(type='MIoU', num_classes=nclasses, average='equal'),
 70 |     ],
 71 |     dist_params=dict(backend='nccl'),
 72 | )
 73 | 
 74 | ## 2.1 configuration for test
 75 | test = dict(
 76 |     data=dict(
 77 |         dataset=dict(
 78 |             type=dataset_type,
 79 |             root=dataset_root,
 80 |             ann_file='instances_val2017.json',
 81 |             img_prefix='val2017',
 82 |             multi_label=multi_label,
 83 |         ),
 84 |         transforms=inference['transforms'],
 85 |         sampler=dict(
 86 |             type='DefaultSampler',
 87 |         ),
 88 |         dataloader=dict(
 89 |             type='DataLoader',
 90 |             samples_per_gpu=4,
 91 |             workers_per_gpu=4,
 92 |             shuffle=False,
 93 |             drop_last=False,
 94 |             pin_memory=True,
 95 |         ),
 96 |     ),
 97 |     # tta=dict(
 98 |     #     scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
 99 |     #     biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75],
100 |     #     flip=True,
101 |     # ),
102 | )
103 | 
104 | ## 2.2 configuration for train
105 | max_epochs = 50
106 | 
107 | train = dict(
108 |     data=dict(
109 |         train=dict(
110 |             dataset=dict(
111 |                 type=dataset_type,
112 |                 root=dataset_root,
113 |                 ann_file='instances_train2017.json',
114 |                 img_prefix='train2017',
115 |                 multi_label=multi_label,
116 |             ),
117 |             transforms=[
118 |                 dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25,
119 |                      interpolation=cv2.INTER_LINEAR),
120 |                 dict(type='PadIfNeeded', min_height=crop_size_h,
121 |                      min_width=crop_size_w, value=image_pad_value,
122 |                      mask_value=ignore_label),
123 |                 dict(type='RandomCrop', height=crop_size_h, width=crop_size_w),
124 |                 dict(type='HorizontalFlip', p=0.5),
125 |                 dict(type='Normalize', **img_norm_cfg),
126 |                 dict(type='ToTensor'),
127 |             ],
128 |             sampler=dict(
129 |                 type='DefaultSampler',
130 |             ),
131 |             dataloader=dict(
132 |                 type='DataLoader',
133 |                 samples_per_gpu=8,
134 |                 workers_per_gpu=4,
135 |                 shuffle=True,
136 |                 drop_last=True,
137 |                 pin_memory=True,
138 |             ),
139 |         ),
140 |         val=dict(
141 |             dataset=dict(
142 |                 type=dataset_type,
143 |                 root=dataset_root,
144 |                 ann_file='instances_val2017.json',
145 |                 img_prefix='val2017',
146 |                 multi_label=multi_label,
147 |             ),
148 |             transforms=inference['transforms'],
149 |             sampler=dict(
150 |                 type='DefaultSampler',
151 |             ),
152 |             dataloader=dict(
153 |                 type='DataLoader',
154 |                 samples_per_gpu=8,
155 |                 workers_per_gpu=4,
156 |                 shuffle=False,
157 |                 drop_last=False,
158 |                 pin_memory=True,
159 |             ),
160 |         ),
161 |     ),
162 |     resume=None,
163 |     criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label),
164 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001),
165 |     lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs),
166 |     max_epochs=max_epochs,
167 |     trainval_ratio=1,
168 |     log_interval=10,
169 |     snapshot_interval=5,
170 |     save_best=True,
171 | )
172 | 


--------------------------------------------------------------------------------
/semantic segmentation/configs/coco_deeplabv1_4bit.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | # 1. configuration for inference
  4 | nclasses = 81
  5 | ignore_label = 255
  6 | multi_label = False
  7 | 
  8 | crop_size_h, crop_size_w = 513, 513
  9 | test_size_h, test_size_w = 641, 641
 10 | image_pad_value = (123.675, 116.280, 103.530)
 11 | 
 12 | img_norm_cfg = dict(
 13 |     max_pixel_value=255.0, 
 14 |     std=(0.229, 0.224, 0.225),
 15 |     mean=(0.485, 0.456, 0.406),
 16 | )
 17 | norm_cfg = dict(type='SyncBN')
 18 | 
 19 | inference = dict(
 20 |     multi_label=multi_label,
 21 |     transforms=[
 22 |         dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w,
 23 |              value=image_pad_value, mask_value=ignore_label),
 24 |         dict(type='Normalize', **img_norm_cfg),
 25 |         dict(type='ToTensor'),
 26 |     ],
 27 |     model=dict(
 28 |         # model/encoder
 29 |         encoder=dict(
 30 |             backbone=dict(
 31 |                 type='MYVGG9',
 32 |                 pretrain=True,
 33 |                 nclasses = nclasses,
 34 |                 bit=4,
 35 |                 init='./workdir/coco_deeplabv1/best_mIoU.pth'
 36 |             ),
 37 |         ),
 38 |         collect=dict(type='CollectBlock', from_layer='c5'),
 39 |         # model/head
 40 |         head=dict(
 41 |             type='Head',
 42 |             no_convs=True,
 43 |             upsample=dict(
 44 |                 type='Upsample',
 45 |                 scale_factor=8,
 46 |                 scale_bias=-7,
 47 |                 mode='bilinear',
 48 |                 align_corners=True,
 49 |             ),
 50 |         ),
 51 |     ),
 52 | )
 53 | 
 54 | # 2. configuration for train/test
 55 | root_workdir = 'workdir'
 56 | dataset_type = 'CocoDataset'
 57 | dataset_root = 'data/COCO2017'
 58 | 
 59 | common = dict(
 60 |     seed=0,
 61 |     logger=dict(
 62 |         handlers=(
 63 |             dict(type='StreamHandler', level='INFO'),
 64 |             dict(type='FileHandler', level='INFO'),
 65 |         ),
 66 |     ),
 67 |     cudnn_deterministic=False,
 68 |     cudnn_benchmark=True,
 69 |     metrics=[
 70 |         dict(type='IoU', num_classes=nclasses),
 71 |         dict(type='MIoU', num_classes=nclasses, average='equal'),
 72 |     ],
 73 |     dist_params=dict(backend='nccl'),
 74 | )
 75 | 
 76 | ## 2.1 configuration for test
 77 | test = dict(
 78 |     data=dict(
 79 |         dataset=dict(
 80 |             type=dataset_type,
 81 |             root=dataset_root,
 82 |             ann_file='instances_val2017.json',
 83 |             img_prefix='val2017',
 84 |             multi_label=multi_label,
 85 |         ),
 86 |         transforms=inference['transforms'],
 87 |         sampler=dict(
 88 |             type='DefaultSampler',
 89 |         ),
 90 |         dataloader=dict(
 91 |             type='DataLoader',
 92 |             samples_per_gpu=4,
 93 |             workers_per_gpu=4,
 94 |             shuffle=False,
 95 |             drop_last=False,
 96 |             pin_memory=True,
 97 |         ),
 98 |     ),
 99 |     # tta=dict(
100 |     #     scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
101 |     #     biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75],
102 |     #     flip=True,
103 |     # ),
104 | )
105 | 
106 | ## 2.2 configuration for train
107 | max_epochs = 50
108 | 
109 | train = dict(
110 |     data=dict(
111 |         train=dict(
112 |             dataset=dict(
113 |                 type=dataset_type,
114 |                 root=dataset_root,
115 |                 ann_file='instances_train2017.json',
116 |                 img_prefix='train2017',
117 |                 multi_label=multi_label,
118 |             ),
119 |             transforms=[
120 |                 dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25,
121 |                      interpolation=cv2.INTER_LINEAR),
122 |                 dict(type='PadIfNeeded', min_height=crop_size_h,
123 |                      min_width=crop_size_w, value=image_pad_value,
124 |                      mask_value=ignore_label),
125 |                 dict(type='RandomCrop', height=crop_size_h, width=crop_size_w),
126 |                 dict(type='HorizontalFlip', p=0.5),
127 |                 dict(type='Normalize', **img_norm_cfg),
128 |                 dict(type='ToTensor'),
129 |             ],
130 |             sampler=dict(
131 |                 type='DefaultSampler',
132 |             ),
133 |             dataloader=dict(
134 |                 type='DataLoader',
135 |                 samples_per_gpu=6,
136 |                 workers_per_gpu=4,
137 |                 shuffle=True,
138 |                 drop_last=True,
139 |                 pin_memory=True,
140 |             ),
141 |         ),
142 |         val=dict(
143 |             dataset=dict(
144 |                 type=dataset_type,
145 |                 root=dataset_root,
146 |                 ann_file='instances_val2017.json',
147 |                 img_prefix='val2017',
148 |                 multi_label=multi_label,
149 |             ),
150 |             transforms=inference['transforms'],
151 |             sampler=dict(
152 |                 type='DefaultSampler',
153 |             ),
154 |             dataloader=dict(
155 |                 type='DataLoader',
156 |                 samples_per_gpu=8,
157 |                 workers_per_gpu=4,
158 |                 shuffle=False,
159 |                 drop_last=False,
160 |                 pin_memory=True,
161 |             ),
162 |         ),
163 |     ),
164 |     resume=None,
165 |     criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label),
166 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001),
167 |     lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs),
168 |     max_epochs=max_epochs,
169 |     trainval_ratio=1,
170 |     log_interval=10,
171 |     snapshot_interval=5,
172 |     save_best=True,
173 | )
174 | 


--------------------------------------------------------------------------------
/semantic segmentation/configs/voc_deeplabv1.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | # 1. configuration for inference
  4 | nclasses = 21
  5 | ignore_label = 255
  6 | multi_label = False
  7 | 
  8 | crop_size_h, crop_size_w = 513, 513
  9 | test_size_h, test_size_w = 513, 513
 10 | image_pad_value = (123.675, 116.280, 103.530)
 11 | 
 12 | img_norm_cfg = dict(
 13 |     max_pixel_value=255.0, 
 14 |     std=(0.229, 0.224, 0.225),
 15 |     mean=(0.485, 0.456, 0.406),
 16 | )
 17 | norm_cfg = dict(type='SyncBN')
 18 | 
 19 | inference = dict(
 20 |     multi_label=multi_label,
 21 |     transforms=[
 22 |         dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w,
 23 |              value=image_pad_value, mask_value=ignore_label),
 24 |         dict(type='Normalize', **img_norm_cfg),
 25 |         dict(type='ToTensor'),
 26 |     ],
 27 |     model=dict(
 28 |         # model/encoder
 29 |         encoder=dict(
 30 |             backbone=dict(
 31 |                 type='MYVGG9',
 32 |                 pretrain=True,
 33 |             ),
 34 |         ),
 35 |         collect=dict(type='CollectBlock', from_layer='c5'),
 36 |         # model/head
 37 |         head=dict(
 38 |             type='Head',
 39 |             no_convs=True,
 40 |             upsample=dict(
 41 |                 type='Upsample',
 42 |                 scale_factor=8,
 43 |                 scale_bias=-7,
 44 |                 mode='bilinear',
 45 |                 align_corners=True,
 46 |             ),
 47 |         ),
 48 |     ),
 49 | )
 50 | 
 51 | # 2. configuration for train/test
 52 | root_workdir = 'workdir'
 53 | dataset_type = 'VOCDataset'
 54 | dataset_root = 'data/VOCdevkit/VOC2012/'
 55 | 
 56 | common = dict(
 57 |     seed=0,
 58 |     logger=dict(
 59 |         handlers=(
 60 |             dict(type='StreamHandler', level='INFO'),
 61 |             dict(type='FileHandler', level='INFO'),
 62 |         ),
 63 |     ),
 64 |     cudnn_deterministic=False,
 65 |     cudnn_benchmark=True,
 66 |     metrics=[
 67 |         dict(type='IoU', num_classes=nclasses),
 68 |         dict(type='MIoU', num_classes=nclasses, average='equal'),
 69 |     ],
 70 |     dist_params=dict(backend='nccl'),
 71 | )
 72 | 
 73 | ## 2.1 configuration for test
 74 | test = dict(
 75 |     data=dict(
 76 |         dataset=dict(
 77 |             type=dataset_type,
 78 |             root=dataset_root,
 79 |             imglist_name='val.txt',
 80 |             multi_label=multi_label,
 81 |         ),
 82 |         transforms=inference['transforms'],
 83 |         sampler=dict(
 84 |             type='DefaultSampler',
 85 |         ),
 86 |         dataloader=dict(
 87 |             type='DataLoader',
 88 |             samples_per_gpu=4,
 89 |             workers_per_gpu=4,
 90 |             shuffle=False,
 91 |             drop_last=False,
 92 |             pin_memory=True,
 93 |         ),
 94 |     ),
 95 |     # tta=dict(
 96 |     #     scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
 97 |     #     biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75],
 98 |     #     flip=True,
 99 |     # ),
100 | )
101 | 
102 | ## 2.2 configuration for train
103 | max_epochs = 50
104 | 
105 | train = dict(
106 |     data=dict(
107 |         train=dict(
108 |             dataset=dict(
109 |                 type=dataset_type,
110 |                 root=dataset_root,
111 |                 imglist_name='trainaug.txt',
112 |                 multi_label=multi_label,
113 |             ),
114 |             transforms=[
115 |                 dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25,
116 |                      interpolation=cv2.INTER_LINEAR),
117 |                 dict(type='PadIfNeeded', min_height=crop_size_h,
118 |                      min_width=crop_size_w, value=image_pad_value,
119 |                      mask_value=ignore_label),
120 |                 dict(type='RandomCrop', height=crop_size_h, width=crop_size_w),
121 |                 dict(type='HorizontalFlip', p=0.5),
122 |                 dict(type='Normalize', **img_norm_cfg),
123 |                 dict(type='ToTensor'),
124 |             ],
125 |             sampler=dict(
126 |                 type='DefaultSampler',
127 |             ),
128 |             dataloader=dict(
129 |                 type='DataLoader',
130 |                 samples_per_gpu=8,
131 |                 workers_per_gpu=4,
132 |                 shuffle=True,
133 |                 drop_last=True,
134 |                 pin_memory=True,
135 |             ),
136 |         ),
137 |         val=dict(
138 |             dataset=dict(
139 |                 type=dataset_type,
140 |                 root=dataset_root,
141 |                 imglist_name='val.txt',
142 |                 multi_label=multi_label,
143 |             ),
144 |             transforms=inference['transforms'],
145 |             sampler=dict(
146 |                 type='DefaultSampler',
147 |             ),
148 |             dataloader=dict(
149 |                 type='DataLoader',
150 |                 samples_per_gpu=8,
151 |                 workers_per_gpu=4,
152 |                 shuffle=False,
153 |                 drop_last=False,
154 |                 pin_memory=True,
155 |             ),
156 |         ),
157 |     ),
158 |     resume=None,
159 |     criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label),
160 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001),
161 |     lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs),
162 |     max_epochs=max_epochs,
163 |     trainval_ratio=1,
164 |     log_interval=10,
165 |     snapshot_interval=5,
166 |     save_best=True,
167 | )
168 | 


--------------------------------------------------------------------------------
/semantic segmentation/configs/voc_deeplabv1_2bit.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | # 1. configuration for inference
  4 | nclasses = 21
  5 | ignore_label = 255
  6 | multi_label = False
  7 | 
  8 | crop_size_h, crop_size_w = 513, 513
  9 | test_size_h, test_size_w = 513, 513
 10 | image_pad_value = (123.675, 116.280, 103.530)
 11 | 
 12 | img_norm_cfg = dict(
 13 |     max_pixel_value=255.0, 
 14 |     std=(0.229, 0.224, 0.225),
 15 |     mean=(0.485, 0.456, 0.406),
 16 | )
 17 | norm_cfg = dict(type='SyncBN')
 18 | 
 19 | inference = dict(
 20 |     multi_label=multi_label,
 21 |     transforms=[
 22 |         dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w,
 23 |              value=image_pad_value, mask_value=ignore_label),
 24 |         dict(type='Normalize', **img_norm_cfg),
 25 |         dict(type='ToTensor'),
 26 |     ],
 27 |     model=dict(
 28 |         # model/encoder
 29 |         encoder=dict(
 30 |             backbone=dict(
 31 |                 type='MYVGG9',
 32 |                 bit=2,
 33 |                 pretrain=True,
 34 |                 init='./workdir/voc_deeplabv1_3bit/best_mIoU.pth'
 35 |             ),
 36 |         ),
 37 |         collect=dict(type='CollectBlock', from_layer='c5'),
 38 |         # model/head
 39 |         head=dict(
 40 |             type='Head',
 41 |             no_convs=True,
 42 |             upsample=dict(
 43 |                 type='Upsample',
 44 |                 scale_factor=8,
 45 |                 scale_bias=-7,
 46 |                 mode='bilinear',
 47 |                 align_corners=True,
 48 |             ),
 49 |         ),
 50 |     ),
 51 | )
 52 | 
 53 | # 2. configuration for train/test
 54 | root_workdir = 'workdir'
 55 | dataset_type = 'VOCDataset'
 56 | dataset_root = 'data/VOCdevkit/VOC2012/'
 57 | 
 58 | common = dict(
 59 |     seed=0,
 60 |     logger=dict(
 61 |         handlers=(
 62 |             dict(type='StreamHandler', level='INFO'),
 63 |             dict(type='FileHandler', level='INFO'),
 64 |         ),
 65 |     ),
 66 |     cudnn_deterministic=False,
 67 |     cudnn_benchmark=True,
 68 |     metrics=[
 69 |         dict(type='IoU', num_classes=nclasses),
 70 |         dict(type='MIoU', num_classes=nclasses, average='equal'),
 71 |     ],
 72 |     dist_params=dict(backend='nccl'),
 73 | )
 74 | 
 75 | ## 2.1 configuration for test
 76 | test = dict(
 77 |     data=dict(
 78 |         dataset=dict(
 79 |             type=dataset_type,
 80 |             root=dataset_root,
 81 |             imglist_name='val.txt',
 82 |             multi_label=multi_label,
 83 |         ),
 84 |         transforms=inference['transforms'],
 85 |         sampler=dict(
 86 |             type='DefaultSampler',
 87 |         ),
 88 |         dataloader=dict(
 89 |             type='DataLoader',
 90 |             samples_per_gpu=4,
 91 |             workers_per_gpu=4,
 92 |             shuffle=False,
 93 |             drop_last=False,
 94 |             pin_memory=True,
 95 |         ),
 96 |     ),
 97 |     # tta=dict(
 98 |     #     scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
 99 |     #     biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75],
100 |     #     flip=True,
101 |     # ),
102 | )
103 | 
104 | ## 2.2 configuration for train
105 | max_epochs = 50
106 | 
107 | train = dict(
108 |     data=dict(
109 |         train=dict(
110 |             dataset=dict(
111 |                 type=dataset_type,
112 |                 root=dataset_root,
113 |                 imglist_name='trainaug.txt',
114 |                 multi_label=multi_label,
115 |             ),
116 |             transforms=[
117 |                 dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25,
118 |                      interpolation=cv2.INTER_LINEAR),
119 |                 dict(type='PadIfNeeded', min_height=crop_size_h,
120 |                      min_width=crop_size_w, value=image_pad_value,
121 |                      mask_value=ignore_label),
122 |                 dict(type='RandomCrop', height=crop_size_h, width=crop_size_w),
123 |                 dict(type='HorizontalFlip', p=0.5),
124 |                 dict(type='Normalize', **img_norm_cfg),
125 |                 dict(type='ToTensor'),
126 |             ],
127 |             sampler=dict(
128 |                 type='DefaultSampler',
129 |             ),
130 |             dataloader=dict(
131 |                 type='DataLoader',
132 |                 samples_per_gpu=8,
133 |                 workers_per_gpu=4,
134 |                 shuffle=True,
135 |                 drop_last=True,
136 |                 pin_memory=True,
137 |             ),
138 |         ),
139 |         val=dict(
140 |             dataset=dict(
141 |                 type=dataset_type,
142 |                 root=dataset_root,
143 |                 imglist_name='val.txt',
144 |                 multi_label=multi_label,
145 |             ),
146 |             transforms=inference['transforms'],
147 |             sampler=dict(
148 |                 type='DefaultSampler',
149 |             ),
150 |             dataloader=dict(
151 |                 type='DataLoader',
152 |                 samples_per_gpu=8,
153 |                 workers_per_gpu=4,
154 |                 shuffle=False,
155 |                 drop_last=False,
156 |                 pin_memory=True,
157 |             ),
158 |         ),
159 |     ),
160 |     resume=None,
161 |     criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label),
162 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001),
163 |     lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs),
164 |     max_epochs=max_epochs,
165 |     trainval_ratio=1,
166 |     log_interval=10,
167 |     snapshot_interval=5,
168 |     save_best=True,
169 | )
170 | 


--------------------------------------------------------------------------------
/semantic segmentation/configs/voc_deeplabv1_3bit.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | # 1. configuration for inference
  4 | nclasses = 21
  5 | ignore_label = 255
  6 | multi_label = False
  7 | 
  8 | crop_size_h, crop_size_w = 513, 513
  9 | test_size_h, test_size_w = 513, 513
 10 | image_pad_value = (123.675, 116.280, 103.530)
 11 | 
 12 | img_norm_cfg = dict(
 13 |     max_pixel_value=255.0, 
 14 |     std=(0.229, 0.224, 0.225),
 15 |     mean=(0.485, 0.456, 0.406),
 16 | )
 17 | norm_cfg = dict(type='SyncBN')
 18 | 
 19 | inference = dict(
 20 |     multi_label=multi_label,
 21 |     transforms=[
 22 |         dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w,
 23 |              value=image_pad_value, mask_value=ignore_label),
 24 |         dict(type='Normalize', **img_norm_cfg),
 25 |         dict(type='ToTensor'),
 26 |     ],
 27 |     model=dict(
 28 |         # model/encoder
 29 |         encoder=dict(
 30 |             backbone=dict(
 31 |                 type='MYVGG9',
 32 |                 bit=3,
 33 |                 pretrain=True,
 34 |                 init='./workdir/voc_deeplabv1_4bit/best_mIoU.pth'
 35 |             ),
 36 |         ),
 37 |         collect=dict(type='CollectBlock', from_layer='c5'),
 38 |         # model/head
 39 |         head=dict(
 40 |             type='Head',
 41 |             no_convs=True,
 42 |             upsample=dict(
 43 |                 type='Upsample',
 44 |                 scale_factor=8,
 45 |                 scale_bias=-7,
 46 |                 mode='bilinear',
 47 |                 align_corners=True,
 48 |             ),
 49 |         ),
 50 |     ),
 51 | )
 52 | 
 53 | # 2. configuration for train/test
 54 | root_workdir = 'workdir'
 55 | dataset_type = 'VOCDataset'
 56 | dataset_root = 'data/VOCdevkit/VOC2012/'
 57 | 
 58 | common = dict(
 59 |     seed=0,
 60 |     logger=dict(
 61 |         handlers=(
 62 |             dict(type='StreamHandler', level='INFO'),
 63 |             dict(type='FileHandler', level='INFO'),
 64 |         ),
 65 |     ),
 66 |     cudnn_deterministic=False,
 67 |     cudnn_benchmark=True,
 68 |     metrics=[
 69 |         dict(type='IoU', num_classes=nclasses),
 70 |         dict(type='MIoU', num_classes=nclasses, average='equal'),
 71 |     ],
 72 |     dist_params=dict(backend='nccl'),
 73 | )
 74 | 
 75 | ## 2.1 configuration for test
 76 | test = dict(
 77 |     data=dict(
 78 |         dataset=dict(
 79 |             type=dataset_type,
 80 |             root=dataset_root,
 81 |             imglist_name='val.txt',
 82 |             multi_label=multi_label,
 83 |         ),
 84 |         transforms=inference['transforms'],
 85 |         sampler=dict(
 86 |             type='DefaultSampler',
 87 |         ),
 88 |         dataloader=dict(
 89 |             type='DataLoader',
 90 |             samples_per_gpu=4,
 91 |             workers_per_gpu=4,
 92 |             shuffle=False,
 93 |             drop_last=False,
 94 |             pin_memory=True,
 95 |         ),
 96 |     ),
 97 |     # tta=dict(
 98 |     #     scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
 99 |     #     biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75],
100 |     #     flip=True,
101 |     # ),
102 | )
103 | 
104 | ## 2.2 configuration for train
105 | max_epochs = 50
106 | 
107 | train = dict(
108 |     data=dict(
109 |         train=dict(
110 |             dataset=dict(
111 |                 type=dataset_type,
112 |                 root=dataset_root,
113 |                 imglist_name='trainaug.txt',
114 |                 multi_label=multi_label,
115 |             ),
116 |             transforms=[
117 |                 dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25,
118 |                      interpolation=cv2.INTER_LINEAR),
119 |                 dict(type='PadIfNeeded', min_height=crop_size_h,
120 |                      min_width=crop_size_w, value=image_pad_value,
121 |                      mask_value=ignore_label),
122 |                 dict(type='RandomCrop', height=crop_size_h, width=crop_size_w),
123 |                 dict(type='HorizontalFlip', p=0.5),
124 |                 dict(type='Normalize', **img_norm_cfg),
125 |                 dict(type='ToTensor'),
126 |             ],
127 |             sampler=dict(
128 |                 type='DefaultSampler',
129 |             ),
130 |             dataloader=dict(
131 |                 type='DataLoader',
132 |                 samples_per_gpu=8,
133 |                 workers_per_gpu=4,
134 |                 shuffle=True,
135 |                 drop_last=True,
136 |                 pin_memory=True,
137 |             ),
138 |         ),
139 |         val=dict(
140 |             dataset=dict(
141 |                 type=dataset_type,
142 |                 root=dataset_root,
143 |                 imglist_name='val.txt',
144 |                 multi_label=multi_label,
145 |             ),
146 |             transforms=inference['transforms'],
147 |             sampler=dict(
148 |                 type='DefaultSampler',
149 |             ),
150 |             dataloader=dict(
151 |                 type='DataLoader',
152 |                 samples_per_gpu=8,
153 |                 workers_per_gpu=4,
154 |                 shuffle=False,
155 |                 drop_last=False,
156 |                 pin_memory=True,
157 |             ),
158 |         ),
159 |     ),
160 |     resume=None,
161 |     criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label),
162 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001),
163 |     lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs),
164 |     max_epochs=max_epochs,
165 |     trainval_ratio=1,
166 |     log_interval=10,
167 |     snapshot_interval=5,
168 |     save_best=True,
169 | )
170 | 


--------------------------------------------------------------------------------
/semantic segmentation/configs/voc_deeplabv1_4bit.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | # 1. configuration for inference
  4 | nclasses = 21
  5 | ignore_label = 255
  6 | multi_label = False
  7 | 
  8 | crop_size_h, crop_size_w = 513, 513
  9 | test_size_h, test_size_w = 513, 513
 10 | image_pad_value = (123.675, 116.280, 103.530)
 11 | 
 12 | img_norm_cfg = dict(
 13 |     max_pixel_value=255.0, 
 14 |     std=(0.229, 0.224, 0.225),
 15 |     mean=(0.485, 0.456, 0.406),
 16 | )
 17 | norm_cfg = dict(type='SyncBN')
 18 | 
 19 | inference = dict(
 20 |     multi_label=multi_label,
 21 |     transforms=[
 22 |         dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w,
 23 |              value=image_pad_value, mask_value=ignore_label),
 24 |         dict(type='Normalize', **img_norm_cfg),
 25 |         dict(type='ToTensor'),
 26 |     ],
 27 |     model=dict(
 28 |         # model/encoder
 29 |         encoder=dict(
 30 |             backbone=dict(
 31 |                 type='MYVGG9',
 32 |                 bit=4,
 33 |                 pretrain=True,
 34 |                 init='./workdir/voc_deeplabv1/best_mIoU.pth'
 35 |             ),
 36 |         ),
 37 |         collect=dict(type='CollectBlock', from_layer='c5'),
 38 |         # model/head
 39 |         head=dict(
 40 |             type='Head',
 41 |             no_convs=True,
 42 |             upsample=dict(
 43 |                 type='Upsample',
 44 |                 scale_factor=8,
 45 |                 scale_bias=-7,
 46 |                 mode='bilinear',
 47 |                 align_corners=True,
 48 |             ),
 49 |         ),
 50 |     ),
 51 | )
 52 | 
 53 | # 2. configuration for train/test
 54 | root_workdir = 'workdir'
 55 | dataset_type = 'VOCDataset'
 56 | dataset_root = 'data/VOCdevkit/VOC2012/'
 57 | 
 58 | common = dict(
 59 |     seed=0,
 60 |     logger=dict(
 61 |         handlers=(
 62 |             dict(type='StreamHandler', level='INFO'),
 63 |             dict(type='FileHandler', level='INFO'),
 64 |         ),
 65 |     ),
 66 |     cudnn_deterministic=False,
 67 |     cudnn_benchmark=True,
 68 |     metrics=[
 69 |         dict(type='IoU', num_classes=nclasses),
 70 |         dict(type='MIoU', num_classes=nclasses, average='equal'),
 71 |     ],
 72 |     dist_params=dict(backend='nccl'),
 73 | )
 74 | 
 75 | ## 2.1 configuration for test
 76 | test = dict(
 77 |     data=dict(
 78 |         dataset=dict(
 79 |             type=dataset_type,
 80 |             root=dataset_root,
 81 |             imglist_name='val.txt',
 82 |             multi_label=multi_label,
 83 |         ),
 84 |         transforms=inference['transforms'],
 85 |         sampler=dict(
 86 |             type='DefaultSampler',
 87 |         ),
 88 |         dataloader=dict(
 89 |             type='DataLoader',
 90 |             samples_per_gpu=4,
 91 |             workers_per_gpu=4,
 92 |             shuffle=False,
 93 |             drop_last=False,
 94 |             pin_memory=True,
 95 |         ),
 96 |     ),
 97 |     # tta=dict(
 98 |     #     scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
 99 |     #     biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75],
100 |     #     flip=True,
101 |     # ),
102 | )
103 | 
104 | ## 2.2 configuration for train
105 | max_epochs = 50
106 | 
107 | train = dict(
108 |     data=dict(
109 |         train=dict(
110 |             dataset=dict(
111 |                 type=dataset_type,
112 |                 root=dataset_root,
113 |                 imglist_name='trainaug.txt',
114 |                 multi_label=multi_label,
115 |             ),
116 |             transforms=[
117 |                 dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25,
118 |                      interpolation=cv2.INTER_LINEAR),
119 |                 dict(type='PadIfNeeded', min_height=crop_size_h,
120 |                      min_width=crop_size_w, value=image_pad_value,
121 |                      mask_value=ignore_label),
122 |                 dict(type='RandomCrop', height=crop_size_h, width=crop_size_w),
123 |                 dict(type='HorizontalFlip', p=0.5),
124 |                 dict(type='Normalize', **img_norm_cfg),
125 |                 dict(type='ToTensor'),
126 |             ],
127 |             sampler=dict(
128 |                 type='DefaultSampler',
129 |             ),
130 |             dataloader=dict(
131 |                 type='DataLoader',
132 |                 samples_per_gpu=8,
133 |                 workers_per_gpu=4,
134 |                 shuffle=True,
135 |                 drop_last=True,
136 |                 pin_memory=True,
137 |             ),
138 |         ),
139 |         val=dict(
140 |             dataset=dict(
141 |                 type=dataset_type,
142 |                 root=dataset_root,
143 |                 imglist_name='val.txt',
144 |                 multi_label=multi_label,
145 |             ),
146 |             transforms=inference['transforms'],
147 |             sampler=dict(
148 |                 type='DefaultSampler',
149 |             ),
150 |             dataloader=dict(
151 |                 type='DataLoader',
152 |                 samples_per_gpu=8,
153 |                 workers_per_gpu=4,
154 |                 shuffle=False,
155 |                 drop_last=False,
156 |                 pin_memory=True,
157 |             ),
158 |         ),
159 |     ),
160 |     resume=None,
161 |     criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label),
162 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001),
163 |     lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs),
164 |     max_epochs=max_epochs,
165 |     trainval_ratio=1,
166 |     log_interval=10,
167 |     snapshot_interval=5,
168 |     save_best=True,
169 | )
170 | 


--------------------------------------------------------------------------------
/semantic segmentation/configs/voc_deeplabv1_T15.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | # 1. configuration for inference
  4 | nclasses = 21
  5 | ignore_label = 255
  6 | multi_label = False
  7 | 
  8 | crop_size_h, crop_size_w = 513, 513
  9 | test_size_h, test_size_w = 513, 513
 10 | image_pad_value = (123.675, 116.280, 103.530)
 11 | 
 12 | img_norm_cfg = dict(
 13 |     max_pixel_value=255.0, 
 14 |     std=(0.229, 0.224, 0.225),
 15 |     mean=(0.485, 0.456, 0.406),
 16 | )
 17 | norm_cfg = dict(type='SyncBN')
 18 | 
 19 | inference = dict(
 20 |     multi_label=multi_label,
 21 |     transforms=[
 22 |         dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w,
 23 |              value=image_pad_value, mask_value=ignore_label),
 24 |         dict(type='Normalize', **img_norm_cfg),
 25 |         dict(type='ToTensor'),
 26 |     ],
 27 |     model=dict(
 28 |         # model/encoder
 29 |         encoder=dict(
 30 |             backbone=dict(
 31 |                 type='MYSVGG9',
 32 |                 bit=4,
 33 |                 pretrain=False,
 34 |             ),
 35 |         ),
 36 |         collect=dict(type='CollectBlock', from_layer='c5'),
 37 |         # model/head
 38 |         head=dict(
 39 |             type='Head',
 40 |             no_convs=True,
 41 |             upsample=dict(
 42 |                 type='Upsample',
 43 |                 scale_factor=8,
 44 |                 scale_bias=-7,
 45 |                 mode='bilinear',
 46 |                 align_corners=True,
 47 |             ),
 48 |         ),
 49 |     ),
 50 | )
 51 | 
 52 | # 2. configuration for train/test
 53 | root_workdir = 'workdir'
 54 | dataset_type = 'VOCDataset'
 55 | dataset_root = 'data/VOCdevkit/VOC2012/'
 56 | 
 57 | common = dict(
 58 |     seed=0,
 59 |     logger=dict(
 60 |         handlers=(
 61 |             dict(type='StreamHandler', level='INFO'),
 62 |             dict(type='FileHandler', level='INFO'),
 63 |         ),
 64 |     ),
 65 |     cudnn_deterministic=False,
 66 |     cudnn_benchmark=True,
 67 |     metrics=[
 68 |         dict(type='IoU', num_classes=nclasses),
 69 |         dict(type='MIoU', num_classes=nclasses, average='equal'),
 70 |     ],
 71 |     dist_params=dict(backend='nccl'),
 72 | )
 73 | 
 74 | ## 2.1 configuration for test
 75 | test = dict(
 76 |     data=dict(
 77 |         dataset=dict(
 78 |             type=dataset_type,
 79 |             root=dataset_root,
 80 |             imglist_name='val.txt',
 81 |             multi_label=multi_label,
 82 |         ),
 83 |         transforms=inference['transforms'],
 84 |         sampler=dict(
 85 |             type='DefaultSampler',
 86 |         ),
 87 |         dataloader=dict(
 88 |             type='DataLoader',
 89 |             samples_per_gpu=1,
 90 |             workers_per_gpu=4,
 91 |             shuffle=False,
 92 |             drop_last=False,
 93 |             pin_memory=True,
 94 |         ),
 95 |     ),
 96 |     # tta=dict(
 97 |     #     scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
 98 |     #     biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75],
 99 |     #     flip=True,
100 |     # ),
101 | )
102 | 
103 | ## 2.2 configuration for train
104 | max_epochs = 50
105 | 
106 | train = dict(
107 |     data=dict(
108 |         train=dict(
109 |             dataset=dict(
110 |                 type=dataset_type,
111 |                 root=dataset_root,
112 |                 imglist_name='trainaug.txt',
113 |                 multi_label=multi_label,
114 |             ),
115 |             transforms=[
116 |                 dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25,
117 |                      interpolation=cv2.INTER_LINEAR),
118 |                 dict(type='PadIfNeeded', min_height=crop_size_h,
119 |                      min_width=crop_size_w, value=image_pad_value,
120 |                      mask_value=ignore_label),
121 |                 dict(type='RandomCrop', height=crop_size_h, width=crop_size_w),
122 |                 dict(type='HorizontalFlip', p=0.5),
123 |                 dict(type='Normalize', **img_norm_cfg),
124 |                 dict(type='ToTensor'),
125 |             ],
126 |             sampler=dict(
127 |                 type='DefaultSampler',
128 |             ),
129 |             dataloader=dict(
130 |                 type='DataLoader',
131 |                 samples_per_gpu=8,
132 |                 workers_per_gpu=4,
133 |                 shuffle=True,
134 |                 drop_last=True,
135 |                 pin_memory=True,
136 |             ),
137 |         ),
138 |         val=dict(
139 |             dataset=dict(
140 |                 type=dataset_type,
141 |                 root=dataset_root,
142 |                 imglist_name='val.txt',
143 |                 multi_label=multi_label,
144 |             ),
145 |             transforms=inference['transforms'],
146 |             sampler=dict(
147 |                 type='DefaultSampler',
148 |             ),
149 |             dataloader=dict(
150 |                 type='DataLoader',
151 |                 samples_per_gpu=8,
152 |                 workers_per_gpu=4,
153 |                 shuffle=False,
154 |                 drop_last=False,
155 |                 pin_memory=True,
156 |             ),
157 |         ),
158 |     ),
159 |     resume=None,
160 |     criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label),
161 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001),
162 |     lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs),
163 |     max_epochs=max_epochs,
164 |     trainval_ratio=1,
165 |     log_interval=10,
166 |     snapshot_interval=5,
167 |     save_best=True,
168 | )
169 | 


--------------------------------------------------------------------------------
/semantic segmentation/configs/voc_deeplabv1_T3.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | # 1. configuration for inference
  4 | nclasses = 21
  5 | ignore_label = 255
  6 | multi_label = False
  7 | 
  8 | crop_size_h, crop_size_w = 513, 513
  9 | test_size_h, test_size_w = 513, 513
 10 | image_pad_value = (123.675, 116.280, 103.530)
 11 | 
 12 | img_norm_cfg = dict(
 13 |     max_pixel_value=255.0, 
 14 |     std=(0.229, 0.224, 0.225),
 15 |     mean=(0.485, 0.456, 0.406),
 16 | )
 17 | norm_cfg = dict(type='SyncBN')
 18 | 
 19 | inference = dict(
 20 |     multi_label=multi_label,
 21 |     transforms=[
 22 |         dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w,
 23 |              value=image_pad_value, mask_value=ignore_label),
 24 |         dict(type='Normalize', **img_norm_cfg),
 25 |         dict(type='ToTensor'),
 26 |     ],
 27 |     model=dict(
 28 |         # model/encoder
 29 |         encoder=dict(
 30 |             backbone=dict(
 31 |                 type='MYSVGG9',
 32 |                 bit=2,
 33 |                 pretrain=False,
 34 |             ),
 35 |         ),
 36 |         collect=dict(type='CollectBlock', from_layer='c5'),
 37 |         # model/head
 38 |         head=dict(
 39 |             type='Head',
 40 |             no_convs=True,
 41 |             upsample=dict(
 42 |                 type='Upsample',
 43 |                 scale_factor=8,
 44 |                 scale_bias=-7,
 45 |                 mode='bilinear',
 46 |                 align_corners=True,
 47 |             ),
 48 |         ),
 49 |     ),
 50 | )
 51 | 
 52 | # 2. configuration for train/test
 53 | root_workdir = 'workdir'
 54 | dataset_type = 'VOCDataset'
 55 | dataset_root = 'data/VOCdevkit/VOC2012/'
 56 | 
 57 | common = dict(
 58 |     seed=0,
 59 |     logger=dict(
 60 |         handlers=(
 61 |             dict(type='StreamHandler', level='INFO'),
 62 |             dict(type='FileHandler', level='INFO'),
 63 |         ),
 64 |     ),
 65 |     cudnn_deterministic=False,
 66 |     cudnn_benchmark=True,
 67 |     metrics=[
 68 |         dict(type='IoU', num_classes=nclasses),
 69 |         dict(type='MIoU', num_classes=nclasses, average='equal'),
 70 |     ],
 71 |     dist_params=dict(backend='nccl'),
 72 | )
 73 | 
 74 | ## 2.1 configuration for test
 75 | test = dict(
 76 |     data=dict(
 77 |         dataset=dict(
 78 |             type=dataset_type,
 79 |             root=dataset_root,
 80 |             imglist_name='val.txt',
 81 |             multi_label=multi_label,
 82 |         ),
 83 |         transforms=inference['transforms'],
 84 |         sampler=dict(
 85 |             type='DefaultSampler',
 86 |         ),
 87 |         dataloader=dict(
 88 |             type='DataLoader',
 89 |             samples_per_gpu=4,
 90 |             workers_per_gpu=4,
 91 |             shuffle=False,
 92 |             drop_last=False,
 93 |             pin_memory=True,
 94 |         ),
 95 |     ),
 96 |     # tta=dict(
 97 |     #     scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
 98 |     #     biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75],
 99 |     #     flip=True,
100 |     # ),
101 | )
102 | 
103 | ## 2.2 configuration for train
104 | max_epochs = 50
105 | 
106 | train = dict(
107 |     data=dict(
108 |         train=dict(
109 |             dataset=dict(
110 |                 type=dataset_type,
111 |                 root=dataset_root,
112 |                 imglist_name='trainaug.txt',
113 |                 multi_label=multi_label,
114 |             ),
115 |             transforms=[
116 |                 dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25,
117 |                      interpolation=cv2.INTER_LINEAR),
118 |                 dict(type='PadIfNeeded', min_height=crop_size_h,
119 |                      min_width=crop_size_w, value=image_pad_value,
120 |                      mask_value=ignore_label),
121 |                 dict(type='RandomCrop', height=crop_size_h, width=crop_size_w),
122 |                 dict(type='HorizontalFlip', p=0.5),
123 |                 dict(type='Normalize', **img_norm_cfg),
124 |                 dict(type='ToTensor'),
125 |             ],
126 |             sampler=dict(
127 |                 type='DefaultSampler',
128 |             ),
129 |             dataloader=dict(
130 |                 type='DataLoader',
131 |                 samples_per_gpu=8,
132 |                 workers_per_gpu=4,
133 |                 shuffle=True,
134 |                 drop_last=True,
135 |                 pin_memory=True,
136 |             ),
137 |         ),
138 |         val=dict(
139 |             dataset=dict(
140 |                 type=dataset_type,
141 |                 root=dataset_root,
142 |                 imglist_name='val.txt',
143 |                 multi_label=multi_label,
144 |             ),
145 |             transforms=inference['transforms'],
146 |             sampler=dict(
147 |                 type='DefaultSampler',
148 |             ),
149 |             dataloader=dict(
150 |                 type='DataLoader',
151 |                 samples_per_gpu=8,
152 |                 workers_per_gpu=4,
153 |                 shuffle=False,
154 |                 drop_last=False,
155 |                 pin_memory=True,
156 |             ),
157 |         ),
158 |     ),
159 |     resume=None,
160 |     criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label),
161 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001),
162 |     lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs),
163 |     max_epochs=max_epochs,
164 |     trainval_ratio=1,
165 |     log_interval=10,
166 |     snapshot_interval=5,
167 |     save_best=True,
168 | )
169 | 


--------------------------------------------------------------------------------
/semantic segmentation/configs/voc_deeplabv1_T7.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | # 1. configuration for inference
  4 | nclasses = 21
  5 | ignore_label = 255
  6 | multi_label = False
  7 | 
  8 | crop_size_h, crop_size_w = 513, 513
  9 | test_size_h, test_size_w = 513, 513
 10 | image_pad_value = (123.675, 116.280, 103.530)
 11 | 
 12 | img_norm_cfg = dict(
 13 |     max_pixel_value=255.0, 
 14 |     std=(0.229, 0.224, 0.225),
 15 |     mean=(0.485, 0.456, 0.406),
 16 | )
 17 | norm_cfg = dict(type='SyncBN')
 18 | 
 19 | inference = dict(
 20 |     multi_label=multi_label,
 21 |     transforms=[
 22 |         dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w,
 23 |              value=image_pad_value, mask_value=ignore_label),
 24 |         dict(type='Normalize', **img_norm_cfg),
 25 |         dict(type='ToTensor'),
 26 |     ],
 27 |     model=dict(
 28 |         # model/encoder
 29 |         encoder=dict(
 30 |             backbone=dict(
 31 |                 type='MYSVGG9',
 32 |                 bit=3,
 33 |                 pretrain=False,
 34 |             ),
 35 |         ),
 36 |         collect=dict(type='CollectBlock', from_layer='c5'),
 37 |         # model/head
 38 |         head=dict(
 39 |             type='Head',
 40 |             no_convs=True,
 41 |             upsample=dict(
 42 |                 type='Upsample',
 43 |                 scale_factor=8,
 44 |                 scale_bias=-7,
 45 |                 mode='bilinear',
 46 |                 align_corners=True,
 47 |             ),
 48 |         ),
 49 |     ),
 50 | )
 51 | 
 52 | # 2. configuration for train/test
 53 | root_workdir = 'workdir'
 54 | dataset_type = 'VOCDataset'
 55 | dataset_root = 'data/VOCdevkit/VOC2012/'
 56 | 
 57 | common = dict(
 58 |     seed=0,
 59 |     logger=dict(
 60 |         handlers=(
 61 |             dict(type='StreamHandler', level='INFO'),
 62 |             dict(type='FileHandler', level='INFO'),
 63 |         ),
 64 |     ),
 65 |     cudnn_deterministic=False,
 66 |     cudnn_benchmark=True,
 67 |     metrics=[
 68 |         dict(type='IoU', num_classes=nclasses),
 69 |         dict(type='MIoU', num_classes=nclasses, average='equal'),
 70 |     ],
 71 |     dist_params=dict(backend='nccl'),
 72 | )
 73 | 
 74 | ## 2.1 configuration for test
 75 | test = dict(
 76 |     data=dict(
 77 |         dataset=dict(
 78 |             type=dataset_type,
 79 |             root=dataset_root,
 80 |             imglist_name='val.txt',
 81 |             multi_label=multi_label,
 82 |         ),
 83 |         transforms=inference['transforms'],
 84 |         sampler=dict(
 85 |             type='DefaultSampler',
 86 |         ),
 87 |         dataloader=dict(
 88 |             type='DataLoader',
 89 |             samples_per_gpu=1,
 90 |             workers_per_gpu=4,
 91 |             shuffle=False,
 92 |             drop_last=False,
 93 |             pin_memory=True,
 94 |         ),
 95 |     ),
 96 |     # tta=dict(
 97 |     #     scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
 98 |     #     biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75],
 99 |     #     flip=True,
100 |     # ),
101 | )
102 | 
103 | ## 2.2 configuration for train
104 | max_epochs = 50
105 | 
106 | train = dict(
107 |     data=dict(
108 |         train=dict(
109 |             dataset=dict(
110 |                 type=dataset_type,
111 |                 root=dataset_root,
112 |                 imglist_name='trainaug.txt',
113 |                 multi_label=multi_label,
114 |             ),
115 |             transforms=[
116 |                 dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25,
117 |                      interpolation=cv2.INTER_LINEAR),
118 |                 dict(type='PadIfNeeded', min_height=crop_size_h,
119 |                      min_width=crop_size_w, value=image_pad_value,
120 |                      mask_value=ignore_label),
121 |                 dict(type='RandomCrop', height=crop_size_h, width=crop_size_w),
122 |                 dict(type='HorizontalFlip', p=0.5),
123 |                 dict(type='Normalize', **img_norm_cfg),
124 |                 dict(type='ToTensor'),
125 |             ],
126 |             sampler=dict(
127 |                 type='DefaultSampler',
128 |             ),
129 |             dataloader=dict(
130 |                 type='DataLoader',
131 |                 samples_per_gpu=8,
132 |                 workers_per_gpu=4,
133 |                 shuffle=True,
134 |                 drop_last=True,
135 |                 pin_memory=True,
136 |             ),
137 |         ),
138 |         val=dict(
139 |             dataset=dict(
140 |                 type=dataset_type,
141 |                 root=dataset_root,
142 |                 imglist_name='val.txt',
143 |                 multi_label=multi_label,
144 |             ),
145 |             transforms=inference['transforms'],
146 |             sampler=dict(
147 |                 type='DefaultSampler',
148 |             ),
149 |             dataloader=dict(
150 |                 type='DataLoader',
151 |                 samples_per_gpu=8,
152 |                 workers_per_gpu=4,
153 |                 shuffle=False,
154 |                 drop_last=False,
155 |                 pin_memory=True,
156 |             ),
157 |         ),
158 |     ),
159 |     resume=None,
160 |     criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label),
161 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001),
162 |     lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs),
163 |     max_epochs=max_epochs,
164 |     trainval_ratio=1,
165 |     log_interval=10,
166 |     snapshot_interval=5,
167 |     save_best=True,
168 | )
169 | 


--------------------------------------------------------------------------------
/semantic segmentation/configs/voc_deeplabv2.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | # 1. configuration for inference
  4 | nclasses = 21
  5 | ignore_label = 255
  6 | multi_label = False
  7 | 
  8 | crop_size_h, crop_size_w = 513, 513
  9 | test_size_h, test_size_w = 513, 513
 10 | image_pad_value = (123.675, 116.280, 103.530)
 11 | 
 12 | img_norm_cfg = dict(
 13 |     max_pixel_value=255.0, 
 14 |     std=(0.229, 0.224, 0.225),
 15 |     mean=(0.485, 0.456, 0.406),
 16 | )
 17 | norm_cfg = dict(type='SyncBN')
 18 | 
 19 | inference = dict(
 20 |     multi_label=multi_label,
 21 |     transforms=[
 22 |         dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w,
 23 |              value=image_pad_value, mask_value=ignore_label),
 24 |         dict(type='Normalize', **img_norm_cfg),
 25 |         dict(type='ToTensor'),
 26 |     ],
 27 |     model=dict(
 28 |         # model/encoder
 29 |         encoder=dict(
 30 |             backbone=dict(
 31 |                 type='MYVGG9',
 32 |             ),
 33 |             enhance=dict(
 34 |                 type='ASPP_v2',
 35 |                 from_layer='c5',
 36 |                 to_layer='enhance',
 37 |                 in_channels=512,
 38 |                 out_channels=nclasses,
 39 |                 atrous_rates=[6, 12, 18, 24],
 40 |             ),
 41 |         ),
 42 |         collect=dict(type='CollectBlock', from_layer='enhance'),
 43 |         # model/head
 44 |         head=dict(
 45 |             type='Head',
 46 |             no_convs=True,
 47 |             upsample=dict(
 48 |                 type='Upsample',
 49 |                 scale_factor=32,
 50 |                 scale_bias=-31,
 51 |                 mode='bilinear',
 52 |                 align_corners=True,
 53 |             ),
 54 |         ),
 55 |     ),
 56 | )
 57 | 
 58 | # 2. configuration for train/test
 59 | root_workdir = 'workdir'
 60 | dataset_type = 'VOCDataset'
 61 | dataset_root = 'data/VOCdevkit/VOC2012/'
 62 | 
 63 | common = dict(
 64 |     seed=0,
 65 |     logger=dict(
 66 |         handlers=(
 67 |             dict(type='StreamHandler', level='INFO'),
 68 |             dict(type='FileHandler', level='INFO'),
 69 |         ),
 70 |     ),
 71 |     cudnn_deterministic=False,
 72 |     cudnn_benchmark=True,
 73 |     metrics=[
 74 |         dict(type='IoU', num_classes=nclasses),
 75 |         dict(type='MIoU', num_classes=nclasses, average='equal'),
 76 |     ],
 77 |     dist_params=dict(backend='nccl'),
 78 | )
 79 | 
 80 | ## 2.1 configuration for test
 81 | test = dict(
 82 |     data=dict(
 83 |         dataset=dict(
 84 |             type=dataset_type,
 85 |             root=dataset_root,
 86 |             imglist_name='val.txt',
 87 |             multi_label=multi_label,
 88 |         ),
 89 |         transforms=inference['transforms'],
 90 |         sampler=dict(
 91 |             type='DefaultSampler',
 92 |         ),
 93 |         dataloader=dict(
 94 |             type='DataLoader',
 95 |             samples_per_gpu=4,
 96 |             workers_per_gpu=4,
 97 |             shuffle=False,
 98 |             drop_last=False,
 99 |             pin_memory=True,
100 |         ),
101 |     ),
102 |     # tta=dict(
103 |     #     scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
104 |     #     biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75],
105 |     #     flip=True,
106 |     # ),
107 | )
108 | 
109 | ## 2.2 configuration for train
110 | max_epochs = 50
111 | 
112 | train = dict(
113 |     data=dict(
114 |         train=dict(
115 |             dataset=dict(
116 |                 type=dataset_type,
117 |                 root=dataset_root,
118 |                 imglist_name='trainaug.txt',
119 |                 multi_label=multi_label,
120 |             ),
121 |             transforms=[
122 |                 dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25,
123 |                      interpolation=cv2.INTER_LINEAR),
124 |                 dict(type='PadIfNeeded', min_height=crop_size_h,
125 |                      min_width=crop_size_w, value=image_pad_value,
126 |                      mask_value=ignore_label),
127 |                 dict(type='RandomCrop', height=crop_size_h, width=crop_size_w),
128 |                 dict(type='HorizontalFlip', p=0.5),
129 |                 dict(type='Normalize', **img_norm_cfg),
130 |                 dict(type='ToTensor'),
131 |             ],
132 |             sampler=dict(
133 |                 type='DefaultSampler',
134 |             ),
135 |             dataloader=dict(
136 |                 type='DataLoader',
137 |                 samples_per_gpu=8,
138 |                 workers_per_gpu=4,
139 |                 shuffle=True,
140 |                 drop_last=True,
141 |                 pin_memory=True,
142 |             ),
143 |         ),
144 |         val=dict(
145 |             dataset=dict(
146 |                 type=dataset_type,
147 |                 root=dataset_root,
148 |                 imglist_name='val.txt',
149 |                 multi_label=multi_label,
150 |             ),
151 |             transforms=inference['transforms'],
152 |             sampler=dict(
153 |                 type='DefaultSampler',
154 |             ),
155 |             dataloader=dict(
156 |                 type='DataLoader',
157 |                 samples_per_gpu=8,
158 |                 workers_per_gpu=4,
159 |                 shuffle=False,
160 |                 drop_last=False,
161 |                 pin_memory=True,
162 |             ),
163 |         ),
164 |     ),
165 |     resume=None,
166 |     criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label),
167 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001),
168 |     lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs),
169 |     max_epochs=max_epochs,
170 |     trainval_ratio=1,
171 |     log_interval=10,
172 |     snapshot_interval=5,
173 |     save_best=True,
174 | )
175 | 


--------------------------------------------------------------------------------
/semantic segmentation/configs/voc_deeplabv3.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | # 1. configuration for inference
  4 | nclasses = 21
  5 | ignore_label = 255
  6 | multi_label = False
  7 | 
  8 | crop_size_h, crop_size_w = 513, 513
  9 | test_size_h, test_size_w = 513, 513
 10 | image_pad_value = (123.675, 116.280, 103.530)
 11 | 
 12 | img_norm_cfg = dict(
 13 |     max_pixel_value=255.0, 
 14 |     std=(0.229, 0.224, 0.225),
 15 |     mean=(0.485, 0.456, 0.406),
 16 | )
 17 | norm_cfg = dict(type='SyncBN')
 18 | 
 19 | inference = dict(
 20 |     multi_label=multi_label,
 21 |     transforms=[
 22 |         dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w,
 23 |              value=image_pad_value, mask_value=ignore_label),
 24 |         dict(type='Normalize', **img_norm_cfg),
 25 |         dict(type='ToTensor'),
 26 |     ],
 27 |     model=dict(
 28 |         # model/encoder
 29 |         encoder=dict(
 30 |             backbone=dict(
 31 |                 type='MYResNet',
 32 |                 arch='resnet34',
 33 |                 replace_stride_with_dilation=[False, False, False],
 34 |                 layer_stride = [1, 2, 2, 1],
 35 |                 # multi_grid=[1, 2, 4],
 36 |                 norm_cfg=norm_cfg,
 37 |                 bit=32,
 38 |             ),
 39 |         ),
 40 |         collect=dict(type='CollectBlock', from_layer='c5'),
 41 |         # model/head
 42 |         head=dict(
 43 |             type='Head',
 44 |             no_convs=True,
 45 |             upsample=dict(
 46 |                 type='Upsample',
 47 |                 scale_factor=16,
 48 |                 scale_bias=-15,
 49 |                 mode='bilinear',
 50 |                 align_corners=True,
 51 |             ),
 52 |         ),
 53 |     ),
 54 | )
 55 | 
 56 | # 2. configuration for train/test
 57 | root_workdir = 'workdir'
 58 | dataset_type = 'VOCDataset'
 59 | dataset_root = 'data/VOCdevkit/VOC2012/'
 60 | 
 61 | common = dict(
 62 |     seed=0,
 63 |     logger=dict(
 64 |         handlers=(
 65 |             dict(type='StreamHandler', level='INFO'),
 66 |             dict(type='FileHandler', level='INFO'),
 67 |         ),
 68 |     ),
 69 |     cudnn_deterministic=False,
 70 |     cudnn_benchmark=True,
 71 |     metrics=[
 72 |         dict(type='IoU', num_classes=nclasses),
 73 |         dict(type='MIoU', num_classes=nclasses, average='equal'),
 74 |     ],
 75 |     dist_params=dict(backend='nccl'),
 76 | )
 77 | 
 78 | ## 2.1 configuration for test
 79 | test = dict(
 80 |     data=dict(
 81 |         dataset=dict(
 82 |             type=dataset_type,
 83 |             root=dataset_root,
 84 |             imglist_name='val.txt',
 85 |             multi_label=multi_label,
 86 |         ),
 87 |         transforms=inference['transforms'],
 88 |         sampler=dict(
 89 |             type='DefaultSampler',
 90 |         ),
 91 |         dataloader=dict(
 92 |             type='DataLoader',
 93 |             samples_per_gpu=4,
 94 |             workers_per_gpu=4,
 95 |             shuffle=False,
 96 |             drop_last=False,
 97 |             pin_memory=True,
 98 |         ),
 99 |     ),
100 |     # tta=dict(
101 |     #     scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
102 |     #     biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75],
103 |     #     flip=True,
104 |     # ),
105 | )
106 | 
107 | ## 2.2 configuration for train
108 | max_epochs = 50
109 | 
110 | train = dict(
111 |     data=dict(
112 |         train=dict(
113 |             dataset=dict(
114 |                 type=dataset_type,
115 |                 root=dataset_root,
116 |                 imglist_name='trainaug.txt',
117 |                 multi_label=multi_label,
118 |             ),
119 |             transforms=[
120 |                 dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25,
121 |                      interpolation=cv2.INTER_LINEAR),
122 |                 dict(type='PadIfNeeded', min_height=crop_size_h,
123 |                      min_width=crop_size_w, value=image_pad_value,
124 |                      mask_value=ignore_label),
125 |                 dict(type='RandomCrop', height=crop_size_h, width=crop_size_w),
126 |                 dict(type='HorizontalFlip', p=0.5),
127 |                 dict(type='Normalize', **img_norm_cfg),
128 |                 dict(type='ToTensor'),
129 |             ],
130 |             sampler=dict(
131 |                 type='DefaultSampler',
132 |             ),
133 |             dataloader=dict(
134 |                 type='DataLoader',
135 |                 samples_per_gpu=8,
136 |                 workers_per_gpu=4,
137 |                 shuffle=True,
138 |                 drop_last=True,
139 |                 pin_memory=True,
140 |             ),
141 |         ),
142 |         val=dict(
143 |             dataset=dict(
144 |                 type=dataset_type,
145 |                 root=dataset_root,
146 |                 imglist_name='val.txt',
147 |                 multi_label=multi_label,
148 |             ),
149 |             transforms=inference['transforms'],
150 |             sampler=dict(
151 |                 type='DefaultSampler',
152 |             ),
153 |             dataloader=dict(
154 |                 type='DataLoader',
155 |                 samples_per_gpu=8,
156 |                 workers_per_gpu=4,
157 |                 shuffle=False,
158 |                 drop_last=False,
159 |                 pin_memory=True,
160 |             ),
161 |         ),
162 |     ),
163 |     resume=None,
164 |     criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label),
165 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001),
166 |     lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs),
167 |     max_epochs=max_epochs,
168 |     trainval_ratio=1,
169 |     log_interval=10,
170 |     snapshot_interval=5,
171 |     save_best=True,
172 | )
173 | 


--------------------------------------------------------------------------------
/semantic segmentation/configs/voc_deeplabv3_T15.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | # 1. configuration for inference
  4 | nclasses = 21
  5 | ignore_label = 255
  6 | multi_label = False
  7 | 
  8 | crop_size_h, crop_size_w = 513, 513
  9 | test_size_h, test_size_w = 513, 513
 10 | image_pad_value = (123.675, 116.280, 103.530)
 11 | 
 12 | img_norm_cfg = dict(
 13 |     max_pixel_value=255.0, 
 14 |     std=(0.229, 0.224, 0.225),
 15 |     mean=(0.485, 0.456, 0.406),
 16 | )
 17 | norm_cfg = dict(type='SyncBN')
 18 | 
 19 | inference = dict(
 20 |     multi_label=multi_label,
 21 |     transforms=[
 22 |         dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w,
 23 |              value=image_pad_value, mask_value=ignore_label),
 24 |         dict(type='Normalize', **img_norm_cfg),
 25 |         dict(type='ToTensor'),
 26 |     ],
 27 |     model=dict(
 28 |         # model/encoder
 29 |         encoder=dict(
 30 |             backbone=dict(
 31 |                 type='MYSResNet',
 32 |                 arch='resnet34',
 33 |                 replace_stride_with_dilation=[False, False, False],
 34 |                 layer_stride = [1, 2, 2, 1],
 35 |                 # multi_grid=[1, 2, 4],
 36 |                 norm_cfg=norm_cfg,
 37 |                 bit=4,
 38 |                 pretrain=False,
 39 |             ),
 40 |         ),
 41 |         collect=dict(type='CollectBlock', from_layer='c5'),
 42 |         # model/head
 43 |         head=dict(
 44 |             type='Head',
 45 |             no_convs=True,
 46 |             upsample=dict(
 47 |                 type='Upsample',
 48 |                 scale_factor=16,
 49 |                 scale_bias=-15,
 50 |                 mode='bilinear',
 51 |                 align_corners=True,
 52 |             ),
 53 |         ),
 54 |     ),
 55 | )
 56 | 
 57 | # 2. configuration for train/test
 58 | root_workdir = 'workdir'
 59 | dataset_type = 'VOCDataset'
 60 | dataset_root = 'data/VOCdevkit/VOC2012/'
 61 | 
 62 | common = dict(
 63 |     seed=0,
 64 |     logger=dict(
 65 |         handlers=(
 66 |             dict(type='StreamHandler', level='INFO'),
 67 |             dict(type='FileHandler', level='INFO'),
 68 |         ),
 69 |     ),
 70 |     cudnn_deterministic=False,
 71 |     cudnn_benchmark=True,
 72 |     metrics=[
 73 |         dict(type='IoU', num_classes=nclasses),
 74 |         dict(type='MIoU', num_classes=nclasses, average='equal'),
 75 |     ],
 76 |     dist_params=dict(backend='nccl'),
 77 | )
 78 | 
 79 | ## 2.1 configuration for test
 80 | test = dict(
 81 |     data=dict(
 82 |         dataset=dict(
 83 |             type=dataset_type,
 84 |             root=dataset_root,
 85 |             imglist_name='val.txt',
 86 |             multi_label=multi_label,
 87 |         ),
 88 |         transforms=inference['transforms'],
 89 |         sampler=dict(
 90 |             type='DefaultSampler',
 91 |         ),
 92 |         dataloader=dict(
 93 |             type='DataLoader',
 94 |             samples_per_gpu=1,
 95 |             workers_per_gpu=4,
 96 |             shuffle=False,
 97 |             drop_last=False,
 98 |             pin_memory=True,
 99 |         ),
100 |     ),
101 |     # tta=dict(
102 |     #     scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
103 |     #     biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75],
104 |     #     flip=True,
105 |     # ),
106 | )
107 | 
108 | ## 2.2 configuration for train
109 | max_epochs = 50
110 | 
111 | train = dict(
112 |     data=dict(
113 |         train=dict(
114 |             dataset=dict(
115 |                 type=dataset_type,
116 |                 root=dataset_root,
117 |                 imglist_name='trainaug.txt',
118 |                 multi_label=multi_label,
119 |             ),
120 |             transforms=[
121 |                 dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25,
122 |                      interpolation=cv2.INTER_LINEAR),
123 |                 dict(type='PadIfNeeded', min_height=crop_size_h,
124 |                      min_width=crop_size_w, value=image_pad_value,
125 |                      mask_value=ignore_label),
126 |                 dict(type='RandomCrop', height=crop_size_h, width=crop_size_w),
127 |                 dict(type='HorizontalFlip', p=0.5),
128 |                 dict(type='Normalize', **img_norm_cfg),
129 |                 dict(type='ToTensor'),
130 |             ],
131 |             sampler=dict(
132 |                 type='DefaultSampler',
133 |             ),
134 |             dataloader=dict(
135 |                 type='DataLoader',
136 |                 samples_per_gpu=8,
137 |                 workers_per_gpu=4,
138 |                 shuffle=True,
139 |                 drop_last=True,
140 |                 pin_memory=True,
141 |             ),
142 |         ),
143 |         val=dict(
144 |             dataset=dict(
145 |                 type=dataset_type,
146 |                 root=dataset_root,
147 |                 imglist_name='val.txt',
148 |                 multi_label=multi_label,
149 |             ),
150 |             transforms=inference['transforms'],
151 |             sampler=dict(
152 |                 type='DefaultSampler',
153 |             ),
154 |             dataloader=dict(
155 |                 type='DataLoader',
156 |                 samples_per_gpu=8,
157 |                 workers_per_gpu=4,
158 |                 shuffle=False,
159 |                 drop_last=False,
160 |                 pin_memory=True,
161 |             ),
162 |         ),
163 |     ),
164 |     resume=None,
165 |     criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label),
166 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001),
167 |     lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs),
168 |     max_epochs=max_epochs,
169 |     trainval_ratio=1,
170 |     log_interval=10,
171 |     snapshot_interval=5,
172 |     save_best=True,
173 | )
174 | 


--------------------------------------------------------------------------------
/semantic segmentation/configs/voc_deeplabv3_T3.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | # 1. configuration for inference
  4 | nclasses = 21
  5 | ignore_label = 255
  6 | multi_label = False
  7 | 
  8 | crop_size_h, crop_size_w = 513, 513
  9 | test_size_h, test_size_w = 513, 513
 10 | image_pad_value = (123.675, 116.280, 103.530)
 11 | 
 12 | img_norm_cfg = dict(
 13 |     max_pixel_value=255.0, 
 14 |     std=(0.229, 0.224, 0.225),
 15 |     mean=(0.485, 0.456, 0.406),
 16 | )
 17 | norm_cfg = dict(type='SyncBN')
 18 | 
 19 | inference = dict(
 20 |     multi_label=multi_label,
 21 |     transforms=[
 22 |         dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w,
 23 |              value=image_pad_value, mask_value=ignore_label),
 24 |         dict(type='Normalize', **img_norm_cfg),
 25 |         dict(type='ToTensor'),
 26 |     ],
 27 |     model=dict(
 28 |         # model/encoder
 29 |         encoder=dict(
 30 |             backbone=dict(
 31 |                 type='MYSResNet',
 32 |                 arch='resnet34',
 33 |                 replace_stride_with_dilation=[False, False, False],
 34 |                 layer_stride = [1, 2, 2, 1],
 35 |                 # multi_grid=[1, 2, 4],
 36 |                 norm_cfg=norm_cfg,
 37 |                 bit=2,
 38 |                 pretrain=False,
 39 |             ),
 40 |         ),
 41 |         collect=dict(type='CollectBlock', from_layer='c5'),
 42 |         # model/head
 43 |         head=dict(
 44 |             type='Head',
 45 |             no_convs=True,
 46 |             upsample=dict(
 47 |                 type='Upsample',
 48 |                 scale_factor=16,
 49 |                 scale_bias=-15,
 50 |                 mode='bilinear',
 51 |                 align_corners=True,
 52 |             ),
 53 |         ),
 54 |     ),
 55 | )
 56 | 
 57 | # 2. configuration for train/test
 58 | root_workdir = 'workdir'
 59 | dataset_type = 'VOCDataset'
 60 | dataset_root = 'data/VOCdevkit/VOC2012/'
 61 | 
 62 | common = dict(
 63 |     seed=0,
 64 |     logger=dict(
 65 |         handlers=(
 66 |             dict(type='StreamHandler', level='INFO'),
 67 |             dict(type='FileHandler', level='INFO'),
 68 |         ),
 69 |     ),
 70 |     cudnn_deterministic=False,
 71 |     cudnn_benchmark=True,
 72 |     metrics=[
 73 |         dict(type='IoU', num_classes=nclasses),
 74 |         dict(type='MIoU', num_classes=nclasses, average='equal'),
 75 |     ],
 76 |     dist_params=dict(backend='nccl'),
 77 | )
 78 | 
 79 | ## 2.1 configuration for test
 80 | test = dict(
 81 |     data=dict(
 82 |         dataset=dict(
 83 |             type=dataset_type,
 84 |             root=dataset_root,
 85 |             imglist_name='val.txt',
 86 |             multi_label=multi_label,
 87 |         ),
 88 |         transforms=inference['transforms'],
 89 |         sampler=dict(
 90 |             type='DefaultSampler',
 91 |         ),
 92 |         dataloader=dict(
 93 |             type='DataLoader',
 94 |             samples_per_gpu=1,
 95 |             workers_per_gpu=4,
 96 |             shuffle=False,
 97 |             drop_last=False,
 98 |             pin_memory=True,
 99 |         ),
100 |     ),
101 |     # tta=dict(
102 |     #     scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
103 |     #     biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75],
104 |     #     flip=True,
105 |     # ),
106 | )
107 | 
108 | ## 2.2 configuration for train
109 | max_epochs = 50
110 | 
111 | train = dict(
112 |     data=dict(
113 |         train=dict(
114 |             dataset=dict(
115 |                 type=dataset_type,
116 |                 root=dataset_root,
117 |                 imglist_name='trainaug.txt',
118 |                 multi_label=multi_label,
119 |             ),
120 |             transforms=[
121 |                 dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25,
122 |                      interpolation=cv2.INTER_LINEAR),
123 |                 dict(type='PadIfNeeded', min_height=crop_size_h,
124 |                      min_width=crop_size_w, value=image_pad_value,
125 |                      mask_value=ignore_label),
126 |                 dict(type='RandomCrop', height=crop_size_h, width=crop_size_w),
127 |                 dict(type='HorizontalFlip', p=0.5),
128 |                 dict(type='Normalize', **img_norm_cfg),
129 |                 dict(type='ToTensor'),
130 |             ],
131 |             sampler=dict(
132 |                 type='DefaultSampler',
133 |             ),
134 |             dataloader=dict(
135 |                 type='DataLoader',
136 |                 samples_per_gpu=8,
137 |                 workers_per_gpu=4,
138 |                 shuffle=True,
139 |                 drop_last=True,
140 |                 pin_memory=True,
141 |             ),
142 |         ),
143 |         val=dict(
144 |             dataset=dict(
145 |                 type=dataset_type,
146 |                 root=dataset_root,
147 |                 imglist_name='val.txt',
148 |                 multi_label=multi_label,
149 |             ),
150 |             transforms=inference['transforms'],
151 |             sampler=dict(
152 |                 type='DefaultSampler',
153 |             ),
154 |             dataloader=dict(
155 |                 type='DataLoader',
156 |                 samples_per_gpu=8,
157 |                 workers_per_gpu=4,
158 |                 shuffle=False,
159 |                 drop_last=False,
160 |                 pin_memory=True,
161 |             ),
162 |         ),
163 |     ),
164 |     resume=None,
165 |     criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label),
166 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001),
167 |     lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs),
168 |     max_epochs=max_epochs,
169 |     trainval_ratio=1,
170 |     log_interval=10,
171 |     snapshot_interval=5,
172 |     save_best=True,
173 | )
174 | 


--------------------------------------------------------------------------------
/semantic segmentation/configs/voc_deeplabv3_T7.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | # 1. configuration for inference
  4 | nclasses = 21
  5 | ignore_label = 255
  6 | multi_label = False
  7 | 
  8 | crop_size_h, crop_size_w = 513, 513
  9 | test_size_h, test_size_w = 513, 513
 10 | image_pad_value = (123.675, 116.280, 103.530)
 11 | 
 12 | img_norm_cfg = dict(
 13 |     max_pixel_value=255.0, 
 14 |     std=(0.229, 0.224, 0.225),
 15 |     mean=(0.485, 0.456, 0.406),
 16 | )
 17 | norm_cfg = dict(type='SyncBN')
 18 | 
 19 | inference = dict(
 20 |     multi_label=multi_label,
 21 |     transforms=[
 22 |         dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w,
 23 |              value=image_pad_value, mask_value=ignore_label),
 24 |         dict(type='Normalize', **img_norm_cfg),
 25 |         dict(type='ToTensor'),
 26 |     ],
 27 |     model=dict(
 28 |         # model/encoder
 29 |         encoder=dict(
 30 |             backbone=dict(
 31 |                 type='MYSResNet',
 32 |                 arch='resnet34',
 33 |                 replace_stride_with_dilation=[False, False, False],
 34 |                 layer_stride = [1, 2, 2, 1],
 35 |                 # multi_grid=[1, 2, 4],
 36 |                 norm_cfg=norm_cfg,
 37 |                 bit=3,
 38 |                 pretrain=False,
 39 |             ),
 40 |         ),
 41 |         collect=dict(type='CollectBlock', from_layer='c5'),
 42 |         # model/head
 43 |         head=dict(
 44 |             type='Head',
 45 |             no_convs=True,
 46 |             upsample=dict(
 47 |                 type='Upsample',
 48 |                 scale_factor=16,
 49 |                 scale_bias=-15,
 50 |                 mode='bilinear',
 51 |                 align_corners=True,
 52 |             ),
 53 |         ),
 54 |     ),
 55 | )
 56 | 
 57 | # 2. configuration for train/test
 58 | root_workdir = 'workdir'
 59 | dataset_type = 'VOCDataset'
 60 | dataset_root = 'data/VOCdevkit/VOC2012/'
 61 | 
 62 | common = dict(
 63 |     seed=0,
 64 |     logger=dict(
 65 |         handlers=(
 66 |             dict(type='StreamHandler', level='INFO'),
 67 |             dict(type='FileHandler', level='INFO'),
 68 |         ),
 69 |     ),
 70 |     cudnn_deterministic=False,
 71 |     cudnn_benchmark=True,
 72 |     metrics=[
 73 |         dict(type='IoU', num_classes=nclasses),
 74 |         dict(type='MIoU', num_classes=nclasses, average='equal'),
 75 |     ],
 76 |     dist_params=dict(backend='nccl'),
 77 | )
 78 | 
 79 | ## 2.1 configuration for test
 80 | test = dict(
 81 |     data=dict(
 82 |         dataset=dict(
 83 |             type=dataset_type,
 84 |             root=dataset_root,
 85 |             imglist_name='val.txt',
 86 |             multi_label=multi_label,
 87 |         ),
 88 |         transforms=inference['transforms'],
 89 |         sampler=dict(
 90 |             type='DefaultSampler',
 91 |         ),
 92 |         dataloader=dict(
 93 |             type='DataLoader',
 94 |             samples_per_gpu=1,
 95 |             workers_per_gpu=4,
 96 |             shuffle=False,
 97 |             drop_last=False,
 98 |             pin_memory=True,
 99 |         ),
100 |     ),
101 |     # tta=dict(
102 |     #     scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
103 |     #     biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75],
104 |     #     flip=True,
105 |     # ),
106 | )
107 | 
108 | ## 2.2 configuration for train
109 | max_epochs = 50
110 | 
111 | train = dict(
112 |     data=dict(
113 |         train=dict(
114 |             dataset=dict(
115 |                 type=dataset_type,
116 |                 root=dataset_root,
117 |                 imglist_name='trainaug.txt',
118 |                 multi_label=multi_label,
119 |             ),
120 |             transforms=[
121 |                 dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25,
122 |                      interpolation=cv2.INTER_LINEAR),
123 |                 dict(type='PadIfNeeded', min_height=crop_size_h,
124 |                      min_width=crop_size_w, value=image_pad_value,
125 |                      mask_value=ignore_label),
126 |                 dict(type='RandomCrop', height=crop_size_h, width=crop_size_w),
127 |                 dict(type='HorizontalFlip', p=0.5),
128 |                 dict(type='Normalize', **img_norm_cfg),
129 |                 dict(type='ToTensor'),
130 |             ],
131 |             sampler=dict(
132 |                 type='DefaultSampler',
133 |             ),
134 |             dataloader=dict(
135 |                 type='DataLoader',
136 |                 samples_per_gpu=8,
137 |                 workers_per_gpu=4,
138 |                 shuffle=True,
139 |                 drop_last=True,
140 |                 pin_memory=True,
141 |             ),
142 |         ),
143 |         val=dict(
144 |             dataset=dict(
145 |                 type=dataset_type,
146 |                 root=dataset_root,
147 |                 imglist_name='val.txt',
148 |                 multi_label=multi_label,
149 |             ),
150 |             transforms=inference['transforms'],
151 |             sampler=dict(
152 |                 type='DefaultSampler',
153 |             ),
154 |             dataloader=dict(
155 |                 type='DataLoader',
156 |                 samples_per_gpu=8,
157 |                 workers_per_gpu=4,
158 |                 shuffle=False,
159 |                 drop_last=False,
160 |                 pin_memory=True,
161 |             ),
162 |         ),
163 |     ),
164 |     resume=None,
165 |     criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label),
166 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001),
167 |     lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs),
168 |     max_epochs=max_epochs,
169 |     trainval_ratio=1,
170 |     log_interval=10,
171 |     snapshot_interval=5,
172 |     save_best=True,
173 | )
174 | 


--------------------------------------------------------------------------------
/semantic segmentation/configs/voc_deeplabvr.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | 
  3 | # 1. configuration for inference
  4 | nclasses = 21
  5 | ignore_label = 255
  6 | multi_label = False
  7 | 
  8 | crop_size_h, crop_size_w = 513, 513
  9 | test_size_h, test_size_w = 513, 513
 10 | image_pad_value = (123.675, 116.280, 103.530)
 11 | 
 12 | img_norm_cfg = dict(
 13 |     max_pixel_value=255.0, 
 14 |     std=(0.229, 0.224, 0.225),
 15 |     mean=(0.485, 0.456, 0.406),
 16 | )
 17 | norm_cfg = dict(type='SyncBN')
 18 | 
 19 | inference = dict(
 20 |     multi_label=multi_label,
 21 |     transforms=[
 22 |         dict(type='PadIfNeeded', min_height=test_size_h, min_width=test_size_w,
 23 |              value=image_pad_value, mask_value=ignore_label),
 24 |         dict(type='Normalize', **img_norm_cfg),
 25 |         dict(type='ToTensor'),
 26 |     ],
 27 |     model=dict(
 28 |         # model/encoder
 29 |         encoder=dict(
 30 |             backbone=dict(
 31 |                 type='MYResNet',
 32 |                 arch='resnet18',
 33 |                 replace_stride_with_dilation=[False, False, False],
 34 |                 layer_stride = [1, 2, 2, 1],
 35 |                 # multi_grid=[1, 2, 4],
 36 |                 norm_cfg=norm_cfg,
 37 |                 bit=32,
 38 |             ),
 39 |         ),
 40 |         collect=dict(type='CollectBlock', from_layer='c5'),
 41 |         # model/head
 42 |         head=dict(
 43 |             type='Head',
 44 |             no_convs=True,
 45 |             upsample=dict(
 46 |                 type='Upsample',
 47 |                 scale_factor=16,
 48 |                 scale_bias=-15,
 49 |                 mode='bilinear',
 50 |                 align_corners=True,
 51 |             ),
 52 |         ),
 53 |     ),
 54 | )
 55 | 
 56 | # 2. configuration for train/test
 57 | root_workdir = 'workdir'
 58 | dataset_type = 'VOCDataset'
 59 | dataset_root = 'data/VOCdevkit/VOC2012/'
 60 | 
 61 | common = dict(
 62 |     seed=0,
 63 |     logger=dict(
 64 |         handlers=(
 65 |             dict(type='StreamHandler', level='INFO'),
 66 |             dict(type='FileHandler', level='INFO'),
 67 |         ),
 68 |     ),
 69 |     cudnn_deterministic=False,
 70 |     cudnn_benchmark=True,
 71 |     metrics=[
 72 |         dict(type='IoU', num_classes=nclasses),
 73 |         dict(type='MIoU', num_classes=nclasses, average='equal'),
 74 |     ],
 75 |     dist_params=dict(backend='nccl'),
 76 | )
 77 | 
 78 | ## 2.1 configuration for test
 79 | test = dict(
 80 |     data=dict(
 81 |         dataset=dict(
 82 |             type=dataset_type,
 83 |             root=dataset_root,
 84 |             imglist_name='val.txt',
 85 |             multi_label=multi_label,
 86 |         ),
 87 |         transforms=inference['transforms'],
 88 |         sampler=dict(
 89 |             type='DefaultSampler',
 90 |         ),
 91 |         dataloader=dict(
 92 |             type='DataLoader',
 93 |             samples_per_gpu=4,
 94 |             workers_per_gpu=4,
 95 |             shuffle=False,
 96 |             drop_last=False,
 97 |             pin_memory=True,
 98 |         ),
 99 |     ),
100 |     # tta=dict(
101 |     #     scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
102 |     #     biases=[0.5, 0.25, 0.0, -0.25, -0.5, -0.75],
103 |     #     flip=True,
104 |     # ),
105 | )
106 | 
107 | ## 2.2 configuration for train
108 | max_epochs = 50
109 | 
110 | train = dict(
111 |     data=dict(
112 |         train=dict(
113 |             dataset=dict(
114 |                 type=dataset_type,
115 |                 root=dataset_root,
116 |                 imglist_name='trainaug.txt',
117 |                 multi_label=multi_label,
118 |             ),
119 |             transforms=[
120 |                 dict(type='RandomScale', scale_limit=(0.5, 2), scale_step=0.25,
121 |                      interpolation=cv2.INTER_LINEAR),
122 |                 dict(type='PadIfNeeded', min_height=crop_size_h,
123 |                      min_width=crop_size_w, value=image_pad_value,
124 |                      mask_value=ignore_label),
125 |                 dict(type='RandomCrop', height=crop_size_h, width=crop_size_w),
126 |                 dict(type='HorizontalFlip', p=0.5),
127 |                 dict(type='Normalize', **img_norm_cfg),
128 |                 dict(type='ToTensor'),
129 |             ],
130 |             sampler=dict(
131 |                 type='DefaultSampler',
132 |             ),
133 |             dataloader=dict(
134 |                 type='DataLoader',
135 |                 samples_per_gpu=8,
136 |                 workers_per_gpu=4,
137 |                 shuffle=True,
138 |                 drop_last=True,
139 |                 pin_memory=True,
140 |             ),
141 |         ),
142 |         val=dict(
143 |             dataset=dict(
144 |                 type=dataset_type,
145 |                 root=dataset_root,
146 |                 imglist_name='val.txt',
147 |                 multi_label=multi_label,
148 |             ),
149 |             transforms=inference['transforms'],
150 |             sampler=dict(
151 |                 type='DefaultSampler',
152 |             ),
153 |             dataloader=dict(
154 |                 type='DataLoader',
155 |                 samples_per_gpu=8,
156 |                 workers_per_gpu=4,
157 |                 shuffle=False,
158 |                 drop_last=False,
159 |                 pin_memory=True,
160 |             ),
161 |         ),
162 |     ),
163 |     resume=None,
164 |     criterion=dict(type='CrossEntropyLoss', ignore_index=ignore_label),
165 |     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001),
166 |     lr_scheduler=dict(type='PolyLR', max_epochs=max_epochs),
167 |     max_epochs=max_epochs,
168 |     trainval_ratio=1,
169 |     log_interval=10,
170 |     snapshot_interval=5,
171 |     save_best=True,
172 | )
173 | 


--------------------------------------------------------------------------------
/semantic segmentation/tools/decode.py:
--------------------------------------------------------------------------------
 1 | # https://gist.github.com/wllhf/a4533e0adebe57e3ed06d4b50c8419ae
 2 | # https://github.com/tensorflow/models/blob/master/research/deeplab/utils/get_dataset_colormap.py
 3 | # https://github.com/tensorflow/models/blob/master/research/deeplab/datasets/remove_gt_colormap.py
 4 | 
 5 | import glob
 6 | import numpy as np
 7 | import os
 8 | from PIL import Image
 9 | 
10 | 
11 | def color_map(N=256, normalized=False):
12 |     def bitget(byteval, idx):
13 |         return ((byteval & (1 << idx)) != 0)
14 | 
15 |     dtype = 'float32' if normalized else 'uint8'
16 |     cmap = np.zeros((N, 3), dtype=dtype)
17 |     for i in range(N):
18 |         r = g = b = 0
19 |         c = i
20 |         for j in range(8):
21 |             r = r | (bitget(c, 0) << 7 - j)
22 |             g = g | (bitget(c, 1) << 7 - j)
23 |             b = b | (bitget(c, 2) << 7 - j)
24 |             c = c >> 3
25 | 
26 |         cmap[i] = np.array([r, g, b])
27 | 
28 |     cmap = cmap / 255 if normalized else cmap
29 |     return cmap
30 | 
31 | 
32 | def main():
33 |     root = 'workpiece/VOC2012'
34 |     src_name = 'EncodeSegmentationClass'
35 |     dst_name = 'DecodeSegmentationClass'
36 |     src_dir = '%s/%s' % (root, src_name)
37 |     dst_dir = '%s/%s' % (root, dst_name)
38 |     os.makedirs(dst_dir)
39 |     items = glob.glob('%s/*.png' % src_dir)
40 |     total = len(items)
41 |     for idx, item in enumerate(items):
42 |         print('%d/%d' % (idx, total))
43 |         new_item = item.replace(src_name, dst_name)
44 |         target = np.array(Image.open(item))[:, :, np.newaxis]
45 |         cmap = color_map()[:, np.newaxis, :]
46 |         new_im = np.dot(target == 0, cmap[0])
47 |         for i in range(1, cmap.shape[0]):
48 |             new_im += np.dot(target == i, cmap[i])
49 |         new_im = Image.fromarray(new_im.astype(np.uint8))
50 |         new_im.save(new_item)
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     main()
55 | 


--------------------------------------------------------------------------------
/semantic segmentation/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if (($# < 3)); then
 4 |   echo "Uasage: bash tools/dist_test.sh config_file checkpoint gpus_to_use"
 5 |   exit 1
 6 | fi
 7 | 
 8 | CONFIG="$1"
 9 | CHECKPOINT="$2"
10 | GPUS="$3"
11 | 
12 | IFS=', ' read -r -a gpus <<<"${GPUS}"
13 | NGPUS="${#gpus[@]}"
14 | PORT="$((29400 + RANDOM % 100))"
15 | 
16 | export CUDA_VISIBLE_DEVICES=${GPUS}
17 | 
18 | PYTHONPATH="$(dirname "$0")/..":${PYTHONPATH} \
19 |     python -m torch.distributed.launch \
20 |         --nproc_per_node="${NGPUS}" \
21 |         --master_port=${PORT} \
22 |         "$(dirname "$0")"/test.py \
23 |             "$CONFIG" \
24 |             "$CHECKPOINT" \
25 |             --distribute \
26 |             "${@:4}"
27 | 


--------------------------------------------------------------------------------
/semantic segmentation/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if (($# < 2)); then
 4 |   echo "Uasage: bash tools/dist_train.sh config_file gpus_to_use"
 5 |   exit 1
 6 | fi
 7 | CONFIG="$1"
 8 | GPUS="$2"
 9 | 
10 | IFS=', ' read -r -a gpus <<<"${GPUS}"
11 | NGPUS="${#gpus[@]}"
12 | PORT="$((29400 + RANDOM % 100))"
13 | 
14 | export CUDA_VISIBLE_DEVICES=${GPUS}
15 | 
16 | PYTHONPATH="$(dirname "$0")/..":${PYTHONPATH} \
17 |     python -m torch.distributed.launch \
18 |         --nproc_per_node="${NGPUS}" \
19 |         --master_port=${PORT} \
20 |         "$(dirname "$0")"/train.py \
21 |             "$CONFIG" \
22 |             --distribute \
23 |             "${@:3}"
24 | 


--------------------------------------------------------------------------------
/semantic segmentation/tools/encode_voc12.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import numpy as np
 3 | import os
 4 | from PIL import Image
 5 | 
 6 | 
 7 | def main():
 8 |     root = 'VOCdevkit/VOC2012'
 9 |     src_name = 'SegmentationClass'
10 |     dst_name = 'EncodeSegmentationClassPart'
11 |     src_dir = '%s/%s' % (root, src_name)
12 |     dst_dir = '%s/%s' % (root, dst_name)
13 |     os.makedirs(dst_dir)
14 |     items = glob.glob('%s/*.png' % src_dir)
15 |     total = len(items)
16 |     for idx, item in enumerate(items):
17 |         print('%d/%d' % (idx, total))
18 |         new_item = item.replace(src_name, dst_name)
19 |         new_mask = np.array(Image.open(item))
20 |         Image.fromarray(new_mask.astype(dtype=np.uint8)).save(new_item, 'PNG')
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     main()
25 | 


--------------------------------------------------------------------------------
/semantic segmentation/tools/encode_voc12_aug.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import numpy as np
 3 | import os
 4 | import scipy.io as io
 5 | from PIL import Image
 6 | 
 7 | 
 8 | def main():
 9 |     root = 'benchmark_RELEASE/dataset'
10 |     src_name = 'cls'
11 |     dst_name = 'encode_cls'
12 |     src_dir = '%s/%s' % (root, src_name)
13 |     dst_dir = '%s/%s' % (root, dst_name)
14 |     os.makedirs(dst_dir)
15 |     items = glob.glob('%s/*.mat' % src_dir)
16 |     total = len(items)
17 |     for idx, item in enumerate(items):
18 |         print('%d/%d' % (idx, total))
19 |         data = io.loadmat(item)
20 |         mask = data['GTcls'][0]['Segmentation'][0].astype(np.int32)
21 |         new_item = item.replace(src_name, dst_name).replace('.mat', '.png')
22 |         Image.fromarray(mask.astype(dtype=np.uint8)).save(new_item, 'PNG')
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     main()
27 | 


--------------------------------------------------------------------------------
/semantic segmentation/tools/test.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../'))
 6 | 
 7 | from vedaseg.runners import TestRunner
 8 | from vedaseg.utils import Config
 9 | 
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(description='Test a segmentation model')
13 |     parser.add_argument('config', type=str, help='config file path')
14 |     parser.add_argument('checkpoint', type=str, help='checkpoint file path')
15 |     parser.add_argument('--distribute', default=False, action='store_true')
16 |     parser.add_argument('--local_rank', type=int, default=0)
17 |     args = parser.parse_args()
18 |     if 'LOCAL_RANK' not in os.environ:
19 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
20 |     args = parser.parse_args()
21 |     return args
22 | 
23 | 
24 | def main():
25 |     args = parse_args()
26 | 
27 |     cfg_path = args.config
28 |     cfg = Config.fromfile(cfg_path)
29 | 
30 |     _, fullname = os.path.split(cfg_path)
31 |     fname, ext = os.path.splitext(fullname)
32 | 
33 |     root_workdir = cfg.pop('root_workdir')
34 |     workdir = os.path.join(root_workdir, fname)
35 |     os.makedirs(workdir, exist_ok=True)
36 | 
37 |     test_cfg = cfg['test']
38 |     inference_cfg = cfg['inference']
39 |     common_cfg = cfg['common']
40 |     common_cfg['workdir'] = workdir
41 |     common_cfg['distribute'] = args.distribute
42 | 
43 |     runner = TestRunner(test_cfg, inference_cfg, common_cfg)
44 |     runner.load_checkpoint(args.checkpoint)
45 |     runner()
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     main()
50 | 


--------------------------------------------------------------------------------
/semantic segmentation/tools/torch2onnx.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../'))
 6 | 
 7 | import torch
 8 | from volksdep.converters import torch2onnx
 9 | 
10 | from vedaseg.runners import InferenceRunner
11 | from vedaseg.utils import Config
12 | 
13 | 
14 | def parse_args():
15 |     parser = argparse.ArgumentParser(description='Convert to Onnx model.')
16 |     parser.add_argument('config', help='config file path')
17 |     parser.add_argument('checkpoint', help='checkpoint file path')
18 |     parser.add_argument('out', help='output onnx file name')
19 |     parser.add_argument('--dummy_input_shape', default='3,800,1344',
20 |                         type=str, help='model input shape like 3,800,1344. '
21 |                                        'Shape format is CxHxW')
22 |     parser.add_argument('--dynamic_shape', default=False, action='store_true',
23 |                         help='whether to use dynamic shape')
24 |     parser.add_argument('--opset_version', default=9, type=int,
25 |                         help='onnx opset version')
26 |     parser.add_argument('--do_constant_folding', default=False,
27 |                         action='store_true',
28 |                         help='whether to apply constant-folding optimization')
29 |     parser.add_argument('--verbose', default=False, action='store_true',
30 |                         help='whether print convert info')
31 | 
32 |     args = parser.parse_args()
33 | 
34 |     return args
35 | 
36 | 
37 | def main():
38 |     args = parse_args()
39 |     cfg = Config.fromfile(args.config)
40 | 
41 |     inference_cfg = cfg['inference']
42 |     common_cfg = cfg.get('common')
43 | 
44 |     runner = InferenceRunner(inference_cfg, common_cfg)
45 |     assert runner.use_gpu, 'Please use valid gpu to export model.'
46 |     runner.load_checkpoint(args.checkpoint)
47 |     model = runner.model
48 | 
49 |     shape = map(int, args.dummy_input_shape.split(','))
50 |     dummy_input = torch.randn(1, *shape)
51 | 
52 |     if args.dynamic_shape:
53 |         print(f'Convert to Onnx with dynamic input shape and '
54 |               f'opset version {args.opset_version}')
55 |     else:
56 |         print(f'Convert to Onnx with constant input shape '
57 |               f'{args.dummy_input_shape} and '
58 |               f'opset version {args.opset_version}')
59 |     torch2onnx(model, dummy_input, args.out, dynamic_shape=args.dynamic_shape,
60 |                opset_version=args.opset_version,
61 |                do_constant_folding=args.do_constant_folding,
62 |                verbose=args.verbose)
63 |     print(f'Convert successfully, saved onnx file: {os.path.abspath(args.out)}')
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     main()
68 | 


--------------------------------------------------------------------------------
/semantic segmentation/tools/train.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../'))
 6 | 
 7 | from vedaseg.runners import TrainRunner
 8 | from vedaseg.utils import Config
 9 | 
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(description='Train a segmentation model')
13 |     parser.add_argument('config', type=str, help='config file path')
14 |     parser.add_argument('--distribute', default=False, action='store_true')
15 |     parser.add_argument('--local_rank', type=int, default=0)
16 |     args = parser.parse_args()
17 |     if 'LOCAL_RANK' not in os.environ:
18 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
19 | 
20 |     return args
21 | 
22 | 
23 | def main():
24 |     args = parse_args()
25 | 
26 |     cfg_path = args.config
27 |     cfg = Config.fromfile(cfg_path)
28 | 
29 |     _, fullname = os.path.split(cfg_path)
30 |     fname, ext = os.path.splitext(fullname)
31 | 
32 |     root_workdir = cfg.pop('root_workdir')
33 |     workdir = os.path.join(root_workdir, fname)
34 |     os.makedirs(workdir, exist_ok=True)
35 | 
36 |     train_cfg = cfg['train']
37 |     inference_cfg = cfg['inference']
38 |     common_cfg = cfg['common']
39 |     common_cfg['workdir'] = workdir
40 |     common_cfg['distribute'] = args.distribute
41 | 
42 |     runner = TrainRunner(train_cfg, inference_cfg, common_cfg)
43 |     runner()
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     main()
48 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zju-bmi-lab/Fast-SNN/e4315cd4e74b4e185ab12bbe2dd74bc3fdccc547/semantic segmentation/vedaseg/__init__.py


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/criteria/__init__.py:
--------------------------------------------------------------------------------
1 | from .bce_loss import BCEWithLogitsLoss
2 | from .builder import build_criterion
3 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/criteria/bce_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from .registry import CRITERIA
 4 | 
 5 | 
 6 | @CRITERIA.register_module
 7 | class BCEWithLogitsLoss(nn.Module):
 8 |     def __init__(self, ignore_index=-1, *args, **kwargs):
 9 |         super(BCEWithLogitsLoss, self).__init__()
10 | 
11 |         self.ignore_index = ignore_index
12 |         self.loss = nn.BCEWithLogitsLoss(*args, **kwargs)
13 | 
14 |     def forward(self, pred, target):
15 |         
16 |         valid_mask = target != self.ignore_index
17 |         losses = self.loss(pred[valid_mask], target[valid_mask].float())
18 | 
19 |         return losses
20 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/criteria/builder.py:
--------------------------------------------------------------------------------
1 | from ..utils import build_from_cfg
2 | from .registry import CRITERIA
3 | 
4 | 
5 | def build_criterion(cfg):
6 |     criterion = build_from_cfg(cfg, CRITERIA, mode='registry')
7 |     return criterion
8 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/criteria/registry.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | 
3 | from vedaseg.utils import Registry
4 | 
5 | CRITERIA = Registry('criterion')
6 | 
7 | CrossEntropyLoss = nn.CrossEntropyLoss
8 | CRITERIA.register_module(CrossEntropyLoss)
9 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/dataloaders/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_dataloader
2 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/dataloaders/builder.py:
--------------------------------------------------------------------------------
 1 | from ..utils import build_from_cfg
 2 | from .registry import DATALOADERS
 3 | 
 4 | 
 5 | def build_dataloader(distributed, num_gpus, cfg, default_args=None):
 6 |     cfg_ = cfg.copy()
 7 | 
 8 |     samples_per_gpu = cfg_.pop('samples_per_gpu')
 9 |     workers_per_gpu = cfg_.pop('workers_per_gpu')
10 | 
11 |     if distributed:
12 |         batch_size = samples_per_gpu
13 |         num_workers = workers_per_gpu
14 |     else:
15 |         batch_size = num_gpus * samples_per_gpu
16 |         num_workers = num_gpus * workers_per_gpu
17 | 
18 |     cfg_.update({'batch_size': batch_size,
19 |                  'num_workers': num_workers})
20 | 
21 |     dataloader = build_from_cfg(cfg_, DATALOADERS, default_args)
22 | 
23 |     return dataloader
24 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/dataloaders/registry.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data import DataLoader
2 | 
3 | from ..utils import Registry
4 | 
5 | DATALOADERS = Registry('dataloader')
6 | 
7 | DATALOADERS.register_module(DataLoader)


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/dataloaders/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_sampler
2 | from .distributed import DefaultSampler
3 | from .non_distributed import DefaultSampler
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/dataloaders/samplers/builder.py:
--------------------------------------------------------------------------------
 1 | from ...utils import build_from_cfg
 2 | from .registry import DISTRIBUTED_SAMPLERS, NON_DISTRIBUTED_SAMPLERS
 3 | 
 4 | 
 5 | def build_sampler(distributed, cfg, default_args=None):
 6 |     if distributed:
 7 |         sampler = build_from_cfg(cfg, DISTRIBUTED_SAMPLERS, default_args)
 8 |     else:
 9 |         sampler = build_from_cfg(cfg, NON_DISTRIBUTED_SAMPLERS, default_args)
10 | 
11 |     return sampler
12 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/dataloaders/samplers/distributed.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import DistributedSampler
 2 | 
 3 | from ...utils import get_dist_info
 4 | from .registry import DISTRIBUTED_SAMPLERS
 5 | 
 6 | 
 7 | @DISTRIBUTED_SAMPLERS.register_module
 8 | class DefaultSampler(DistributedSampler):
 9 |     """Default distributed sampler."""
10 | 
11 |     def __init__(self, dataset, shuffle=True):
12 |         rank, num_replicas = get_dist_info()
13 |         super().__init__(dataset, num_replicas, rank, shuffle)
14 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/dataloaders/samplers/non_distributed.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Sampler
 3 | 
 4 | from .registry import NON_DISTRIBUTED_SAMPLERS
 5 | 
 6 | 
 7 | @NON_DISTRIBUTED_SAMPLERS.register_module
 8 | class DefaultSampler(Sampler):
 9 |     """Default non-distributed sampler."""
10 | 
11 |     def __init__(self, dataset, shuffle=True):
12 |         self.dataset = dataset
13 |         self.shuffle = shuffle
14 | 
15 |     def __iter__(self):
16 |         if self.shuffle:
17 |             return iter(torch.randperm(len(self.dataset)).tolist())
18 |         else:
19 |             return iter(range(len(self.dataset)))
20 | 
21 |     def __len__(self):
22 |         return len(self.dataset)
23 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/dataloaders/samplers/registry.py:
--------------------------------------------------------------------------------
1 | from ...utils import Registry
2 | 
3 | DISTRIBUTED_SAMPLERS = Registry('distributed_sampler')
4 | NON_DISTRIBUTED_SAMPLERS = Registry('non_distributed_sampler')
5 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_dataset
2 | from .coco import CocoDataset
3 | from .voc import VOCDataset
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/datasets/base.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import Dataset
 2 | 
 3 | 
 4 | class BaseDataset(Dataset):
 5 |     """ BaseDataset
 6 |     """
 7 |     CLASSES = None
 8 | 
 9 |     PALETTE = None
10 | 
11 |     def __init__(self, transform=None):
12 |         self.transform = transform
13 | 
14 |     def process(self, image, masks):
15 |         if self.transform:
16 |             augmented = self.transform(image=image, masks=masks)
17 |             return augmented['image'], augmented['masks']
18 |         else:
19 |             return image, masks
20 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/datasets/builder.py:
--------------------------------------------------------------------------------
1 | from vedaseg.utils import build_from_cfg
2 | from .registry import DATASETS
3 | 
4 | 
5 | def build_dataset(cfg, default_args=None):
6 |     dataset = build_from_cfg(cfg, DATASETS, default_args)
7 |     return dataset
8 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/datasets/coco.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import json
  3 | import logging
  4 | import numpy as np
  5 | import os
  6 | from collections import defaultdict
  7 | 
  8 | from vedaseg.datasets.base import BaseDataset
  9 | from .registry import DATASETS
 10 | 
 11 | logger = logging.getLogger()
 12 | 
 13 | 
 14 | @DATASETS.register_module
 15 | class CocoDataset(BaseDataset):
 16 |     def __init__(self, root, ann_file, img_prefix='', transform=None,
 17 |                  multi_label=False):
 18 |         super().__init__()
 19 |         self.multi_label = multi_label
 20 |         self.root = root
 21 |         self.ann_file = ann_file
 22 |         self.img_prefix = img_prefix
 23 |         self.transform = transform
 24 |         if self.root is not None:
 25 |             self.img_prefix = os.path.join(self.root, self.img_prefix)
 26 | 
 27 |         self.data = json.load(
 28 |             open(os.path.join(self.root, 'annotations', self.ann_file), 'r'))
 29 | 
 30 |         self.load_annotations()
 31 |         logger.debug('Total of images is {}'.format(len(self.data_infos)))
 32 | 
 33 |     def load_annotations(self):
 34 |         self.cat_ids = [cat['id'] for cat in self.data['categories']]
 35 |         self.numclass = len(self.cat_ids)
 36 |         self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)}
 37 | 
 38 |         self.img_ids, self.data_infos = [], []
 39 |         self.imgToAnns = defaultdict(list)
 40 | 
 41 |         for img in self.data['images']:
 42 |             self.img_ids.append(img['id'])
 43 |             img['filename'] = os.path.join(self.img_prefix, img['file_name'])
 44 |             self.data_infos.append(img)
 45 | 
 46 |         for ann in self.data['annotations']:
 47 |             self.imgToAnns[ann['image_id']].append(ann)
 48 | 
 49 |     def _parse_ann_info(self, img_info, ann_info):
 50 |         gt_bboxes = []
 51 |         gt_labels = []
 52 |         gt_bboxes_ignore = []
 53 |         gt_masks_ann = []
 54 |         for i, ann in enumerate(ann_info):
 55 |             if ann.get('ignore', False):
 56 |                 continue
 57 |             x1, y1, w, h = ann['bbox']
 58 |             inter_w = max(0, min(x1 + w, img_info['width']) - max(x1, 0))
 59 |             inter_h = max(0, min(y1 + h, img_info['height']) - max(y1, 0))
 60 |             if inter_w * inter_h == 0:
 61 |                 continue
 62 |             if ann['area'] <= 0 or w < 1 or h < 1:
 63 |                 continue
 64 |             if ann['category_id'] not in self.cat_ids:
 65 |                 continue
 66 |             bbox = [x1, y1, x1 + w, y1 + h]
 67 |             if ann.get('iscrowd', False):
 68 |                 gt_bboxes_ignore.append(bbox)
 69 |             else:
 70 |                 gt_bboxes.append(bbox)
 71 |                 gt_labels.append(self.cat2label[ann['category_id']])
 72 |                 gt_masks_ann.append(ann['segmentation'])
 73 | 
 74 |         if gt_bboxes:
 75 |             gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
 76 |             gt_labels = np.array(gt_labels, dtype=np.int64)
 77 |         else:
 78 |             gt_bboxes = np.zeros((0, 4), dtype=np.float32)
 79 |             gt_labels = np.array([], dtype=np.int64)
 80 | 
 81 |         if gt_bboxes_ignore:
 82 |             gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
 83 |         else:
 84 |             gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
 85 | 
 86 |         seg_map = img_info['filename'].replace('jpg', 'png')
 87 | 
 88 |         ann = dict(
 89 |             bboxes=gt_bboxes,
 90 |             labels=gt_labels,
 91 |             bboxes_ignore=gt_bboxes_ignore,
 92 |             masks=gt_masks_ann,
 93 |             seg_map=seg_map)
 94 | 
 95 |         return ann
 96 | 
 97 |     def get_ann_info(self, img_info):
 98 |         img_id = img_info['id']
 99 |         ann_info = [ann for ann in self.imgToAnns[img_id]]
100 |         return self._parse_ann_info(img_info, ann_info)
101 | 
102 |     def generate_mask(self, shape, ann_info):
103 |         h, w, c = shape
104 |         if self.multi_label:
105 |             masks = [np.zeros((h, w), np.uint8) for _ in range(self.numclass)]
106 |             for m, l in zip(ann_info['masks'], ann_info['labels']):
107 |                 for m_ in m:
108 |                     m_ = np.array(m_).reshape((-1, 1, 2)).astype(np.int32)
109 |                     cv2.fillPoly(masks[l], [m_], 1)
110 |         else:
111 |             mask = np.zeros((h, w), np.uint8)
112 |             for m, l in zip(ann_info['masks'], ann_info['labels']):
113 |                 for m_ in m:
114 |                     m_ = np.array(m_).reshape((-1, 1, 2)).astype(np.int32)
115 |                     cv2.fillPoly(mask, [m_], int(l + 1))
116 |             masks = [mask]
117 |         return masks
118 | 
119 |     def __getitem__(self, idx):
120 |         img_info = self.data_infos[idx]
121 |         ann_info = self.get_ann_info(img_info)
122 | 
123 |         img = cv2.imread(img_info['filename']).astype(np.float32)
124 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
125 | 
126 |         masks = self.generate_mask(img.shape, ann_info)
127 |         image, masks = self.process(img, masks)
128 |         return image, masks.long()
129 | 
130 |     def __len__(self):
131 |         return len(self.data_infos)
132 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/datasets/registry.py:
--------------------------------------------------------------------------------
1 | from ..utils import Registry
2 | 
3 | DATASETS = Registry('dataset')
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/datasets/voc.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import logging
 3 | import numpy as np
 4 | import os
 5 | 
 6 | from .base import BaseDataset
 7 | from .registry import DATASETS
 8 | 
 9 | logger = logging.getLogger()
10 | 
11 | 
12 | @DATASETS.register_module
13 | class VOCDataset(BaseDataset):
14 |     def __init__(self, root, imglist_name, transform, multi_label=False):
15 |         if multi_label:
16 |             raise ValueError('multi label training is only '
17 |                              'supported by using COCO data form')
18 |         super().__init__()
19 | 
20 |         imglist_fp = os.path.join(root, 'ImageSets/Segmentation', imglist_name)
21 |         self.imglist = self.read_imglist(imglist_fp)
22 | 
23 |         logger.debug('Total of images is {}'.format(len(self.imglist)))
24 | 
25 |         self.root = root
26 |         self.transform = transform
27 | 
28 |     def __getitem__(self, idx):
29 |         imgname = self.imglist[idx]
30 |         img_fp = os.path.join(self.root, 'JPEGImages', imgname) + '.jpg'
31 |         mask_fp = os.path.join(self.root, 'EncodeSegmentationClass',
32 |                                imgname) + '.png'
33 | 
34 |         img = cv2.imread(img_fp).astype(np.float32)
35 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
36 | 
37 |         mask = cv2.imread(mask_fp, cv2.IMREAD_GRAYSCALE)
38 | 
39 |         image, mask = self.process(img, [mask])
40 | 
41 |         return image, mask.long()
42 | 
43 |     def __len__(self):
44 |         return len(self.imglist)
45 | 
46 |     def read_imglist(self, imglist_fp):
47 |         ll = []
48 |         with open(imglist_fp, 'r') as fd:
49 |             for line in fd:
50 |                 ll.append(line.strip())
51 |         return ll
52 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/loggers/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_logger
2 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/loggers/builder.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import sys
 4 | import time
 5 | import torch.distributed as dist
 6 | 
 7 | 
 8 | def build_logger(cfg, default_args):
 9 |     timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
10 |     format_ = '%(asctime)s - %(levelname)s - %(message)s'
11 | 
12 |     formatter = logging.Formatter(format_)
13 |     logger = logging.getLogger()
14 |     logger.setLevel(logging.DEBUG)
15 | 
16 |     if dist.is_available() and dist.is_initialized():
17 |         rank = dist.get_rank()
18 |     else:
19 |         rank = 0
20 | 
21 |     for handler in cfg['handlers']:
22 |         if handler['type'] == 'StreamHandler':
23 |             instance = logging.StreamHandler(sys.stdout)
24 |         elif handler['type'] == 'FileHandler':
25 |             # only rank 0 will add a FileHandler
26 |             if default_args.get('workdir') and rank == 0:
27 |                 fp = os.path.join(default_args['workdir'], '%s.log' % timestamp)
28 |                 instance = logging.FileHandler(fp, 'w')
29 |             else:
30 |                 continue
31 |         else:
32 |             instance = logging.StreamHandler(sys.stdout)
33 | 
34 |         level = getattr(logging, handler['level'])
35 | 
36 |         instance.setFormatter(formatter)
37 |         if rank == 0:
38 |             instance.setLevel(level)
39 |         else:
40 |             instance.setLevel(logging.ERROR)
41 | 
42 |         logger.addHandler(instance)
43 | 
44 |     return logger
45 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/lr_schedulers/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_lr_scheduler
2 | from .poly_lr import PolyLR
3 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/lr_schedulers/base.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | import weakref
  3 | from functools import wraps
  4 | from torch.optim import Optimizer
  5 | 
  6 | 
  7 | class _Iter_LRScheduler(object):
  8 |     """
  9 |     """
 10 | 
 11 |     _iter_based = True
 12 | 
 13 |     def __init__(self, optimizer, niter_per_epoch, last_iter=-1):
 14 |         if not isinstance(optimizer, Optimizer):
 15 |             raise TypeError('{} is not an Optimizer'.format(
 16 |                 type(optimizer).__name__))
 17 |         self.optimizer = optimizer
 18 |         self.niter_per_epoch = niter_per_epoch
 19 |         if last_iter == -1:
 20 |             for group in optimizer.param_groups:
 21 |                 group.setdefault('initial_lr', group['lr'])
 22 |             last_iter = 0
 23 |         else:
 24 |             for i, group in enumerate(optimizer.param_groups):
 25 |                 if 'initial_lr' not in group:
 26 |                     raise KeyError("param 'initial_lr' is not specified in "
 27 |                                    "param_groups[{}] when resuming an "
 28 |                                    "optimizer".format(i))
 29 |         self.base_lrs = list(
 30 |             map(lambda group: group['initial_lr'], optimizer.param_groups))
 31 |         self.last_epoch = int(last_iter / niter_per_epoch)
 32 |         self.last_iter = None
 33 | 
 34 |         # Following https://github.com/pytorch/pytorch/issues/20124
 35 |         # We would like to ensure that `lr_scheduler.step()` is called after
 36 |         # `optimizer.step()`
 37 |         def with_counter(method):
 38 |             if getattr(method, '_with_counter', False):
 39 |                 # `optimizer.step()` has already been replaced, return.
 40 |                 return method
 41 | 
 42 |             # Keep a weak reference to the optimizer instance to prevent
 43 |             # cyclic references.
 44 |             instance_ref = weakref.ref(method.__self__)
 45 |             # Get the unbound method for the same purpose.
 46 |             func = method.__func__
 47 |             cls = instance_ref().__class__
 48 |             del method
 49 | 
 50 |             @wraps(func)
 51 |             def wrapper(*args, **kwargs):
 52 |                 instance = instance_ref()
 53 |                 instance._step_count += 1
 54 |                 wrapped = func.__get__(instance, cls)
 55 |                 return wrapped(*args, **kwargs)
 56 | 
 57 |             # Note that the returned function here is no longer a bound method,
 58 |             # so attributes like `__func__` and `__self__` no longer exist.
 59 |             wrapper._with_counter = True
 60 |             return wrapper
 61 | 
 62 |         self.optimizer.step = with_counter(self.optimizer.step)
 63 |         self.optimizer._step_count = 0
 64 |         self._step_count = 0
 65 |         self.step(last_iter)
 66 | 
 67 |     def state_dict(self):
 68 |         """Returns the state of the scheduler as a :class:`dict`.
 69 | 
 70 |         It contains an entry for every variable in self.__dict__ which
 71 |         is not the optimizer.
 72 |         """
 73 |         return {key: value for key, value in self.__dict__.items() if
 74 |                 key != 'optimizer'}
 75 | 
 76 |     def load_state_dict(self, state_dict):
 77 |         """Loads the schedulers state.
 78 | 
 79 |         Arguments:
 80 |             state_dict (dict): scheduler state. Should be an object returned
 81 |                 from a call to :meth:`state_dict`.
 82 |         """
 83 |         self.__dict__.update(state_dict)
 84 | 
 85 |     def get_lr(self):
 86 |         raise NotImplementedError
 87 | 
 88 |     def step(self, iter_=None):
 89 |         # Raise a warning if old pattern is detected
 90 |         # https://github.com/pytorch/pytorch/issues/20124
 91 |         if self._step_count == 1:
 92 |             if not hasattr(self.optimizer.step, "_with_counter"):
 93 |                 warnings.warn(
 94 |                     "Seems like `optimizer.step()` has been overridden after learning rate scheduler "
 95 |                     "initialization. Please, make sure to call `optimizer.step()` before "
 96 |                     "`lr_scheduler.step()`. See more details at "
 97 |                     "https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate",
 98 |                     UserWarning)
 99 | 
100 |             # Just check if there were two first lr_scheduler.step() calls before optimizer.step()
101 |             elif self.optimizer._step_count < 1:
102 |                 warnings.warn(
103 |                     "Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
104 |                     "In PyTorch 1.1.0 and later, you should call them in the opposite order: "
105 |                     "`optimizer.step()` before `lr_scheduler.step()`.  Failure to do this "
106 |                     "will result in PyTorch skipping the first value of the learning rate schedule."
107 |                     "See more details at "
108 |                     "https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate",
109 |                     UserWarning)
110 |         self._step_count += 1
111 | 
112 |         if iter_ is None:
113 |             iter_ = self.last_iter + 1
114 |         self.last_iter = iter_
115 |         self.last_epoch = int(iter_ / self.niter_per_epoch)
116 |         for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
117 |             param_group['lr'] = lr
118 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/lr_schedulers/builder.py:
--------------------------------------------------------------------------------
 1 | from torch.optim import lr_scheduler
 2 | 
 3 | from vedaseg.utils import build_from_cfg
 4 | from .registry import LR_SCHEDULERS
 5 | 
 6 | 
 7 | def build_lr_scheduler(cfg, default_args=None):
 8 |     if LR_SCHEDULERS.get(cfg['type']):
 9 |         scheduler = build_from_cfg(cfg, LR_SCHEDULERS, default_args, 'registry')
10 |     else:
11 |         default_args = dict(optimizer=default_args.get('optimizer'))
12 |         scheduler = build_from_cfg(cfg, lr_scheduler, default_args, 'module')
13 | 
14 |     return scheduler
15 | 
16 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/lr_schedulers/poly_lr.py:
--------------------------------------------------------------------------------
 1 | from .base import _Iter_LRScheduler
 2 | from .registry import LR_SCHEDULERS
 3 | 
 4 | 
 5 | @LR_SCHEDULERS.register_module
 6 | class PolyLR(_Iter_LRScheduler):
 7 |     """PolyLR
 8 |     """
 9 | 
10 |     def __init__(self, optimizer, niter_per_epoch, max_epochs, power=0.9,
11 |                  last_iter=-1, warm_up=0):
12 |         self.max_iters = niter_per_epoch * max_epochs
13 |         self.power = power
14 |         self.warm_up = warm_up
15 |         super().__init__(optimizer, niter_per_epoch, last_iter)
16 | 
17 |     def get_lr(self):
18 |         if self.last_iter < self.warm_up:
19 |             multiplier = (self.last_iter / float(self.warm_up)) ** self.power
20 |         else:
21 |             multiplier = (1 - self.last_iter / float(
22 |                 self.max_iters)) ** self.power
23 |         return [base_lr * multiplier for base_lr in self.base_lrs]
24 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/lr_schedulers/registry.py:
--------------------------------------------------------------------------------
1 | from ..utils import Registry
2 | 
3 | LR_SCHEDULERS = Registry('lr_scheduler')
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_metrics
2 | from .metrics import (Accuracy, DiceScore, IoU, MIoU, MultiLabelIoU,
3 |                       MultiLabelMIoU)
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/metrics/base.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from abc import ABCMeta, abstractmethod
 3 | 
 4 | 
 5 | class BaseMetric(object, metaclass=ABCMeta):
 6 |     """
 7 |     Base metric for segmentation metrics in an online manner.
 8 |     This class is abstract, providing a standard interface for metrics of this type.
 9 |     """
10 | 
11 |     def __init__(self):
12 |         super().__init__()
13 |         self.reset()
14 | 
15 |     @abstractmethod
16 |     def reset(self):
17 |         """
18 |         Reset variables to default settings.
19 |         """
20 |         pass
21 | 
22 |     @abstractmethod
23 |     def compute(self, pred, target):
24 |         """
25 |         Compute metric value for current batch for metrics.
26 |         Args:
27 |             pred (numpy.ndarray): prediction results from segmentation model,
28 |                 pred should have the following shape (batch_size, h, w, num_categories)
29 |             target (numpy.ndarray): ground truth  class indices,
30 |                 target should have the following shape (batch_size, h, w)
31 |         Returns:
32 |             metric value or process value for current batch
33 |         """
34 |         pass
35 | 
36 |     @abstractmethod
37 |     def update(self, n=1):
38 |         """
39 |         Add metric value or process value to statistic containers.
40 |         """
41 |         pass
42 | 
43 |     @abstractmethod
44 |     def accumulate(self):
45 |         """
46 |         Compute accumulated metric value.
47 |         """
48 |         pass
49 | 
50 |     def export(self):
51 |         """
52 |         Export figures, images or reports of metrics
53 |         """
54 |         pass
55 | 
56 |     def check(self, pred, target):
57 |         """
58 |         Check inputs
59 |         """
60 |         self._check_type(pred, target)
61 |         self._check_match(pred, target)
62 | 
63 |     @staticmethod
64 |     def _check_match(pred, target):
65 |         assert pred.shape[0] == target.shape[0] and pred.shape[-2:-1] == target.shape[-2:-1], \
66 |             "pred and target don't match"
67 | 
68 |     @staticmethod
69 |     def _check_type(pred, target):
70 |         assert type(pred) == np.ndarray and type(target) == np.ndarray, \
71 |             "Only numpy.ndarray is supported for computing accuracy"
72 | 
73 |     @staticmethod
74 |     def _check_pred_range(pred):
75 |         assert np.all(0 <= pred) and np.all(pred <= 1), \
76 |             "Pred should stand for the predicted probability in range (0, 1)"
77 | 
78 |     def __call__(self, pred, target):
79 |         self.check(pred, target)
80 |         current_state = self.compute(pred, target)
81 |         self.update()
82 |         return current_state
83 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/metrics/builder.py:
--------------------------------------------------------------------------------
 1 | from ..utils import build_from_cfg
 2 | from .metrics import Compose
 3 | from .registry import METRICS
 4 | 
 5 | 
 6 | def build_metrics(cfg):
 7 |     mtcs = []
 8 |     for icfg in cfg:
 9 |         mtc = build_from_cfg(icfg, METRICS)
10 |         mtcs.append(mtc)
11 |     metrics = Compose(mtcs)
12 | 
13 |     return metrics
14 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/metrics/registry.py:
--------------------------------------------------------------------------------
1 | from ..utils import Registry
2 | 
3 | METRICS = Registry('metric')
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_model
2 | from .registry import MODELS
3 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/builder.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | # from .decoders import build_brick
 4 | from vedaseg.models.decoders import build_brick, build_decoder
 5 | from vedaseg.models.encoders import build_encoder
 6 | from vedaseg.models.heads import build_head
 7 | 
 8 | 
 9 | def build_model(cfg):
10 |     encoder = build_encoder(cfg.get('encoder'))
11 | 
12 |     if cfg.get('decoder'):
13 |         middle = build_decoder(cfg.get('decoder'))
14 |         assert 'collect' not in cfg
15 |     else:
16 |         assert 'collect' in cfg
17 |         middle = build_brick(cfg.get('collect'))
18 | 
19 |     head = build_head(cfg['head'])
20 | 
21 |     model = nn.Sequential(encoder, middle, head)
22 | 
23 |     return model
24 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/decoders/__init__.py:
--------------------------------------------------------------------------------
1 | from .bricks import FusionBlock, JunctionBlock
2 | from .builder import build_brick, build_decoder
3 | from .gfpn import GFPN
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/decoders/builder.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from vedaseg.utils import build_from_cfg
 4 | from .registry import BRICKS, DECODERS
 5 | 
 6 | 
 7 | def build_brick(cfg, default_args=None):
 8 |     brick = build_from_cfg(cfg, BRICKS, default_args)
 9 |     return brick
10 | 
11 | 
12 | def build_bricks(cfgs):
13 |     bricks = nn.ModuleList()
14 |     for brick_cfg in cfgs:
15 |         bricks.append(build_brick(brick_cfg))
16 |     return bricks
17 | 
18 | 
19 | def build_decoder(cfg, default_args=None):
20 |     decoder = build_from_cfg(cfg, DECODERS, default_args)
21 |     return decoder
22 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/decoders/gfpn/__init__.py:
--------------------------------------------------------------------------------
1 | from .gfpn import GFPN
2 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/decoders/gfpn/gfpn.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import torch.nn as nn
 3 | 
 4 | from ...weight_init import init_weights
 5 | from ..builder import build_brick, build_bricks
 6 | from ..registry import DECODERS
 7 | 
 8 | logger = logging.getLogger()
 9 | 
10 | 
11 | @DECODERS.register_module
12 | class GFPN(nn.Module):
13 |     """GFPN
14 |     A general framework for FPN-alike structures.
15 |     """
16 | 
17 |     def __init__(self, neck, fusion=None):
18 |         """
19 |         Args:
20 |             neck: cfg that describes the structure of GFPN
21 | 
22 |             fusion: cfg that describes the fusion behaviour of GFPN
23 |         """
24 |         super().__init__()
25 |         self.neck = build_bricks(neck)
26 |         if fusion:
27 |             self.fusion = build_brick(fusion)
28 |         else:
29 |             self.fusion = None
30 |         logger.info('GFPN init weights')
31 |         init_weights(self.modules())
32 | 
33 |     def forward(self, bottom_up):
34 |         """
35 |         Args:
36 |             bottom_up: dict of features from backbone
37 |         """
38 |         x = None
39 |         feats = {**bottom_up}
40 |         for ii, layer in enumerate(self.neck):
41 |             if layer.to_layer in feats:
42 |                 raise KeyError(f'Layer name {layer.to_layer} already in use. '
43 |                                f'Used names are: {list(feats.keys())}.')
44 | 
45 |             vertical_sources = layer.from_layers.get('vertical')
46 |             lateral_sources = layer.from_layers.get('lateral')
47 |             lateral_in, vertical_in = [], []
48 | 
49 |             if lateral_sources is not None and len(lateral_sources) > 0:
50 |                 for l_source in lateral_sources:
51 |                     lateral_in.append(feats[l_source])
52 | 
53 |             if vertical_sources is not None and len(vertical_sources) > 0:
54 |                 for v_source in vertical_sources:
55 |                     vertical_in.append(feats[v_source])
56 | 
57 |             x = layer(vertical_in, lateral_in)
58 |             feats[layer.to_layer] = x
59 |         if self.fusion:
60 |             x = self.fusion(feats)
61 |         return x
62 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/decoders/registry.py:
--------------------------------------------------------------------------------
1 | from vedaseg.utils import Registry
2 | 
3 | BRICKS = Registry('brick')
4 | DECODERS = Registry('decoder')
5 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/encoders/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_encoder
2 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/encoders/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_backbone
2 | from .resnet import ResNet
3 | from .myresnet import MYResNet, MYSResNet
4 | from .vgg import MYVGG11, MYVGG9, MYSVGG9
5 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/encoders/backbones/builder.py:
--------------------------------------------------------------------------------
1 | from vedaseg.utils import build_from_cfg
2 | from .registry import BACKBONES
3 | 
4 | 
5 | def build_backbone(cfg, default_args=None):
6 |     backbone = build_from_cfg(cfg, BACKBONES, default_args)
7 |     return backbone
8 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/encoders/backbones/registry.py:
--------------------------------------------------------------------------------
1 | from vedaseg.utils import Registry
2 | 
3 | BACKBONES = Registry('backbone')
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/encoders/backbones/spiking.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | 
  6 | def unsigned_spikes(model):
  7 |     for m in model.modules():
  8 |          if isinstance(m, Spiking):
  9 |              m.sign = False
 10 | 
 11 | #####the spiking wrapper######
 12 | 
 13 | class Spiking(nn.Module):
 14 |     def __init__(self, block, T):
 15 |         super(Spiking, self).__init__()
 16 |         self.block = block
 17 |         self.T = T
 18 |         self.is_first = False
 19 |         self.idem = False
 20 |         self.sign = True
 21 |     def forward(self, x):
 22 |         if self.idem:
 23 |             return x
 24 |         
 25 |         ###initialize membrane to half threshold
 26 |         threshold = self.block[2].act_alpha.data
 27 |         membrane = 0.5 * threshold
 28 |         sum_spikes = 0
 29 |         
 30 |         #prepare charges
 31 |         if self.is_first:
 32 |             x.unsqueeze_(1)
 33 |             x = x.repeat(1, self.T, 1, 1, 1)
 34 |         train_shape = [x.shape[0], x.shape[1]]
 35 |         x = x.flatten(0, 1)
 36 |         x = self.block(x)
 37 |         train_shape.extend(x.shape[1:])
 38 |         x = x.reshape(train_shape)
 39 |         
 40 |         #integrate charges
 41 |         for dt in range(self.T):
 42 |             membrane = membrane + x[:,dt]
 43 |             if dt == 0:
 44 |                 spike_train = torch.zeros(membrane.shape[:1] + torch.Size([self.T]) + membrane.shape[1:],device=membrane.device)
 45 |                 
 46 |             spikes = membrane >= threshold
 47 |             membrane[spikes] = membrane[spikes] - threshold
 48 |             spikes = spikes.float()
 49 |             sum_spikes = sum_spikes + spikes
 50 |             
 51 |             ###signed spikes###
 52 |             if self.sign:
 53 |                 inhibit = membrane <= -1e-3
 54 |                 inhibit = inhibit & (sum_spikes > 0)
 55 |                 membrane[inhibit] = membrane[inhibit] + threshold
 56 |                 inhibit = inhibit.float()
 57 |                 sum_spikes = sum_spikes - inhibit
 58 |             else:
 59 |                 inhibit = 0
 60 | 
 61 |             spike_train[:,dt] = spikes - inhibit
 62 |                 
 63 |         spike_train = spike_train * threshold
 64 |         return spike_train
 65 | 
 66 | 
 67 | class last_Spiking(nn.Module):
 68 |     def __init__(self, block, T):
 69 |         super(last_Spiking, self).__init__()
 70 |         self.block = block
 71 |         self.T = T
 72 |         self.idem = False
 73 |         
 74 |     def forward(self, x):
 75 |         if self.idem:
 76 |             return x
 77 |         #prepare charges
 78 |         train_shape = [x.shape[0], x.shape[1]]
 79 |         x = x.flatten(0, 1)
 80 |         x = self.block(x)
 81 |         train_shape.extend(x.shape[1:])
 82 |         x = x.reshape(train_shape)
 83 |         
 84 |         #integrate charges
 85 |         return x.sum(dim=1).div(self.T)
 86 |     
 87 | class IF(nn.Module):
 88 |     def __init__(self):
 89 |         super(IF, self).__init__()
 90 |         ###changes threshold to act_alpha
 91 |         ###being fleet
 92 |         self.act_alpha = torch.nn.Parameter(torch.tensor(1.0))
 93 | 
 94 |     def forward(self, x):
 95 |         return x
 96 | 
 97 |     def show_params(self):
 98 |         act_alpha = round(self.act_alpha.data.item(), 3)
 99 |         print('clipping threshold activation alpha: {:2f}'.format(act_alpha)) 
100 |     
101 |     def extra_repr(self) -> str:
102 |         return 'threshold={:.3f}'.format(self.act_alpha)  


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/encoders/builder.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from vedaseg.utils import build_from_cfg
 4 | from .backbones.registry import BACKBONES
 5 | from .enhance_modules.registry import ENHANCE_MODULES
 6 | 
 7 | 
 8 | def build_encoder(cfg, default_args=None):
 9 |     backbone = build_from_cfg(cfg['backbone'], BACKBONES, default_args)
10 | 
11 |     enhance_cfg = cfg.get('enhance')
12 |     if enhance_cfg:
13 |         enhance_module = build_from_cfg(enhance_cfg, ENHANCE_MODULES,
14 |                                         default_args)
15 |         encoder = nn.Sequential(backbone, enhance_module)
16 |     else:
17 |         encoder = backbone
18 | 
19 |     return encoder
20 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/encoders/enhance_modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .aspp import ASPP, ASPP_v2
2 | from .builder import build_enhance_module
3 | from .ppm import PPM
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/encoders/enhance_modules/builder.py:
--------------------------------------------------------------------------------
1 | from vedaseg.utils import build_from_cfg
2 | from .registry import ENHANCE_MODULES
3 | 
4 | 
5 | def build_enhance_module(cfg, default_args=None):
6 |     enhance_module = build_from_cfg(cfg, ENHANCE_MODULES, default_args)
7 |     return enhance_module
8 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/encoders/enhance_modules/ppm.py:
--------------------------------------------------------------------------------
 1 | # modify from https://github.com/hszhao/semseg/blob/master/model/pspnet.py
 2 | 
 3 | import logging
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | from ...utils.act import build_act_layer
 9 | from ...utils.norm import build_norm_layer
10 | from ...weight_init import init_weights
11 | from .registry import ENHANCE_MODULES
12 | 
13 | logger = logging.getLogger()
14 | 
15 | 
16 | @ENHANCE_MODULES.register_module
17 | class PPM(nn.Module):
18 |     def __init__(self, in_channels, out_channels, bins, from_layer, to_layer,
19 |                  mode='bilinear', align_corners=True,
20 |                  norm_cfg=None, act_cfg=None):
21 |         super(PPM, self).__init__()
22 |         self.from_layer = from_layer
23 |         self.to_layer = to_layer
24 |         self.mode = mode
25 |         self.align_corners = align_corners
26 | 
27 |         if norm_cfg is None:
28 |             norm_cfg = dict(type='BN')
29 | 
30 |         if act_cfg is None:
31 |             act_cfg = dict(type='Relu', inplace=True)
32 | 
33 |         self.blocks = nn.ModuleList()
34 |         for bin_ in bins:
35 |             self.blocks.append(
36 |                 nn.Sequential(
37 |                     nn.AdaptiveAvgPool2d(bin_),
38 |                     nn.Conv2d(in_channels, out_channels, 1, bias=False),
39 |                     build_norm_layer(norm_cfg, out_channels, layer_only=True),
40 |                     build_act_layer(act_cfg, out_channels, layer_only=True)
41 |                 )
42 |             )
43 |         logger.info('PPM init weights')
44 |         init_weights(self.modules())
45 | 
46 |     def forward(self, feats):
47 |         feats_ = feats.copy()
48 |         x = feats_[self.from_layer]
49 |         h, w = x.shape[2:]
50 |         out = [x]
51 |         for block in self.blocks:
52 |             feat = F.interpolate(
53 |                 block(x),
54 |                 (h, w),
55 |                 mode=self.mode,
56 |                 align_corners=self.align_corners
57 |             )
58 |             out.append(feat)
59 |         out = torch.cat(out, 1)
60 |         feats_[self.to_layer] = out
61 |         return feats_
62 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/encoders/enhance_modules/registry.py:
--------------------------------------------------------------------------------
1 | from vedaseg.utils import Registry
2 | 
3 | ENHANCE_MODULES = Registry('enhance_module')
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_head
2 | from .head import Head
3 | from .registry import HEADS
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/heads/builder.py:
--------------------------------------------------------------------------------
1 | from vedaseg.utils import build_from_cfg
2 | from .registry import HEADS
3 | 
4 | 
5 | def build_head(cfg, default_args=None):
6 |     head = build_from_cfg(cfg, HEADS, default_args)
7 |     return head
8 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/heads/head.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import torch.nn as nn
 3 | 
 4 | from ..utils import ConvModules, build_module
 5 | from ..weight_init import init_weights
 6 | from .registry import HEADS
 7 | 
 8 | logger = logging.getLogger()
 9 | 
10 | 
11 | @HEADS.register_module
12 | class Head(nn.Module):
13 |     """Head
14 | 
15 |     Args:
16 |     """
17 | 
18 |     def __init__(self,
19 |                  in_channels=256,
20 |                  out_channels=21,
21 |                  inter_channels=None,
22 |                  conv_cfg=dict(type='Conv'),
23 |                  norm_cfg=dict(type='BN'),
24 |                  act_cfg=dict(type='Relu', inplace=True),
25 |                  num_convs=0,
26 |                  upsample=None,
27 |                  dropouts=None,
28 |                  no_convs=False):
29 |         super().__init__()
30 | 
31 |         if num_convs > 0:
32 |             layers = [
33 |                 ConvModules(in_channels,
34 |                             inter_channels,
35 |                             3,
36 |                             padding=1,
37 |                             conv_cfg=conv_cfg,
38 |                             norm_cfg=norm_cfg,
39 |                             act_cfg=act_cfg,
40 |                             num_convs=num_convs,
41 |                             dropouts=dropouts),
42 |                 nn.Conv2d(inter_channels, out_channels, 1)
43 |             ]
44 |         elif no_convs:
45 |             layers = []
46 |         else:
47 |             layers = [nn.Conv2d(in_channels, out_channels, 1)]
48 |         if upsample:
49 |             upsample_layer = build_module(upsample)
50 |             layers.append(upsample_layer)
51 | 
52 |         self.block = nn.Sequential(*layers)
53 |         logger.info('Head init weights')
54 |         init_weights(self.modules())
55 | 
56 |     def forward(self, x):
57 |         feat = self.block(x)
58 |         return feat
59 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/heads/registry.py:
--------------------------------------------------------------------------------
1 | from vedaseg.utils import Registry
2 | 
3 | HEADS = Registry('head')
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/registry.py:
--------------------------------------------------------------------------------
1 | from ..utils import Registry
2 | 
3 | MODELS = Registry('model')
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_module, build_torch_nn
2 | from .conv_module import ConvModule, ConvModules
3 | from .upsample import Upsample
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/utils/act.py:
--------------------------------------------------------------------------------
 1 | # modify from mmcv and mmdetection
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.nn.parameter import Parameter
 6 | 
 7 | 
 8 | class TLU(nn.Module):
 9 |     def __init__(self, num_features):
10 |         super(TLU, self).__init__()
11 | 
12 |         self.num_features = num_features
13 |         self.tau = Parameter(torch.Tensor(1, num_features, 1, 1),
14 |                              requires_grad=True)
15 | 
16 |         self.reset_parameters()
17 | 
18 |     def reset_parameters(self):
19 |         nn.init.zeros_(self.tau)
20 | 
21 |     def forward(self, x):
22 |         return torch.max(x, self.tau)
23 | 
24 |     def extra_repr(self):
25 |         return '{num_features}'.format(**self.__dict__)
26 | 
27 | 
28 | act_cfg = {
29 |     'Relu': ('relu', nn.ReLU),
30 |     'Tlu': ('tlu', TLU),
31 | }
32 | 
33 | 
34 | def build_act_layer(cfg, num_features, postfix='', layer_only=False):
35 |     """ Build activate layer
36 | 
37 |     Args:
38 |         cfg (dict): cfg should contain:
39 |             type (str): identify activate layer type.
40 |             layer args: args needed to instantiate a activate layer.
41 |             requires_grad (bool): [optional] whether stop gradient updates
42 |         num_features (int): number of channels from input.
43 |         postfix (int, str): appended into act abbreviation to
44 |             create named layer.
45 | 
46 |     Returns:
47 |         name (str): abbreviation + postfix
48 |         layer (nn.Module): created act layer
49 |     """
50 |     assert isinstance(cfg, dict) and 'type' in cfg
51 |     cfg_ = cfg.copy()
52 | 
53 |     layer_type = cfg_.pop('type')
54 |     if layer_type not in act_cfg:
55 |         raise KeyError('Unrecognized activate type {}'.format(layer_type))
56 |     else:
57 |         abbr, act_layer = act_cfg[layer_type]
58 |         if act_layer is None:
59 |             raise NotImplementedError
60 | 
61 |     assert isinstance(postfix, (int, str))
62 |     name = abbr + str(postfix)
63 | 
64 |     requires_grad = cfg_.pop('requires_grad', True)
65 |     if layer_type != 'Tlu':
66 |         layer = act_layer(**cfg_)
67 |     else:
68 |         layer = act_layer(num_features, **cfg_)
69 | 
70 |     for param in layer.parameters():
71 |         param.requires_grad = requires_grad
72 | 
73 |     if layer_only:
74 |         return layer
75 |     else:
76 |         return name, layer
77 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/utils/builder.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from vedaseg.utils import build_from_cfg
 4 | from .registry import UTILS
 5 | 
 6 | 
 7 | def build_module(cfg, default_args=None):
 8 |     try:
 9 |         module = build_from_cfg(cfg, UTILS, default_args)
10 |     except KeyError as error:
11 |         if ' is not in the ' not in error.args[0]:
12 |             raise KeyError from error
13 |         if ' registry' not in error.args[0]:
14 |             raise KeyError from error
15 |         module = build_torch_nn(cfg, default_args=default_args)
16 | 
17 |     return module
18 | 
19 | 
20 | def build_torch_nn(cfg, default_args=None):
21 |     module = build_from_cfg(cfg, nn, default_args, 'module')
22 |     return module
23 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/utils/norm.py:
--------------------------------------------------------------------------------
 1 | # modify from mmcv and mmdetection
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.nn.parameter import Parameter
 6 | 
 7 | 
 8 | class FRN(nn.Module):
 9 |     def __init__(self, num_features, eps=1e-6):
10 |         super(FRN, self).__init__()
11 | 
12 |         self.num_features = num_features
13 |         self.gamma = Parameter(torch.Tensor(1, num_features, 1, 1),
14 |                                requires_grad=True)
15 |         self.beta = Parameter(torch.Tensor(1, num_features, 1, 1),
16 |                               requires_grad=True)
17 | 
18 |         self.register_buffer('eps', torch.Tensor([eps]))
19 | 
20 |         self.reset_parameters()
21 | 
22 |     def reset_parameters(self):
23 |         nn.init.ones_(self.gamma)
24 |         nn.init.zeros_(self.beta)
25 | 
26 |     def forward(self, x):
27 |         nu2 = torch.mean(x.pow(2), dim=[2, 3], keepdim=True)
28 |         x = x * torch.rsqrt(nu2 + self.eps.abs())
29 |         x = self.gamma * x + self.beta
30 | 
31 |         return x
32 | 
33 |     def extra_repr(self):
34 |         return '{num_features}, eps={eps}'.format(**self.__dict__)
35 | 
36 | 
37 | norm_cfg = {
38 |     'FRN': ('frn', FRN),
39 |     # format: layer_type: (abbreviation, module)
40 |     'BN': ('bn', nn.BatchNorm2d),
41 |     'SyncBN': ('bn', nn.SyncBatchNorm),
42 |     'GN': ('gn', nn.GroupNorm),
43 |     # and potentially 'SN'
44 | }
45 | 
46 | 
47 | def build_norm_layer(cfg, num_features, postfix='', layer_only=False):
48 |     """ Build normalization layer
49 | 
50 |     Args:
51 |         cfg (dict): cfg should contain:
52 |             type (str): identify norm layer type.
53 |             layer args: args needed to instantiate a norm layer.
54 |             requires_grad (bool): [optional] whether stop gradient updates
55 |         num_features (int): number of channels from input.
56 |         postfix (int, str): appended into norm abbreviation to
57 |             create named layer.
58 | 
59 |     Returns:
60 |         name (str): abbreviation + postfix
61 |         layer (nn.Module): created norm layer
62 |     """
63 |     assert isinstance(cfg, dict) and 'type' in cfg
64 |     cfg_ = cfg.copy()
65 | 
66 |     layer_type = cfg_.pop('type')
67 |     if layer_type not in norm_cfg:
68 |         raise KeyError('Unrecognized norm type {}'.format(layer_type))
69 |     else:
70 |         abbr, norm_layer = norm_cfg[layer_type]
71 |         if norm_layer is None:
72 |             raise NotImplementedError
73 | 
74 |     assert isinstance(postfix, (int, str))
75 |     name = abbr + str(postfix)
76 | 
77 |     requires_grad = cfg_.pop('requires_grad', True)
78 |     if layer_type != 'GN':
79 |         layer = norm_layer(num_features, **cfg_)
80 |         if layer_type == 'SyncBN':
81 |             layer._specify_ddp_gpu_num(1)  # noqa
82 |     else:
83 |         assert 'num_groups' in cfg_
84 |         layer = norm_layer(num_channels=num_features, **cfg_)
85 | 
86 |     for param in layer.parameters():
87 |         param.requires_grad = requires_grad
88 | 
89 |     if layer_only:
90 |         return layer
91 |     return name, layer
92 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/utils/registry.py:
--------------------------------------------------------------------------------
1 | from vedaseg.utils import Registry
2 | 
3 | UTILS = Registry('utils')
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/utils/upsample.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from .registry import UTILS
 5 | 
 6 | 
 7 | @UTILS.register_module
 8 | class Upsample(nn.Module):
 9 |     __constants__ = ['size', 'scale_factor', 'scale_bias', 'mode',
10 |                      'align_corners', 'name']
11 | 
12 |     def __init__(self, size=None, scale_factor=None, scale_bias=0,
13 |                  mode='nearest', align_corners=None):
14 |         super(Upsample, self).__init__()
15 |         self.size = size
16 |         self.scale_factor = scale_factor
17 |         self.scale_bias = scale_bias
18 |         self.mode = mode
19 |         self.align_corners = align_corners
20 | 
21 |         assert (self.size is None) ^ (self.scale_factor is None)
22 | 
23 |     def forward(self, x):
24 |         if self.size:
25 |             size = self.size
26 |         else:
27 |             n, c, h, w = x.size()
28 |             new_h = int(h * self.scale_factor + self.scale_bias)
29 |             new_w = int(w * self.scale_factor + self.scale_bias)
30 | 
31 |             size = (new_h, new_w)
32 | 
33 |         return F.interpolate(x, size=size, mode=self.mode,
34 |                              align_corners=self.align_corners)
35 | 
36 |     def extra_repr(self):
37 |         if self.size is not None:
38 |             info = 'size=' + str(self.size)
39 |         else:
40 |             info = 'scale_factor=' + str(self.scale_factor)
41 |             info += ', scale_bias=' + str(self.scale_bias)
42 |         info += ', mode=' + self.mode
43 |         return info
44 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/models/weight_init.py:
--------------------------------------------------------------------------------
 1 | # modify from mmcv and mmdetection
 2 | 
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | def constant_init(module, val, bias=0):
 7 |     nn.init.constant_(module.weight, val)
 8 |     if hasattr(module, 'bias') and module.bias is not None:
 9 |         nn.init.constant_(module.bias, bias)
10 | 
11 | 
12 | def xavier_init(module, gain=1, bias=0, distribution='normal'):
13 |     assert distribution in ['uniform', 'normal']
14 |     if distribution == 'uniform':
15 |         nn.init.xavier_uniform_(module.weight, gain=gain)
16 |     else:
17 |         nn.init.xavier_normal_(module.weight, gain=gain)
18 |     if hasattr(module, 'bias') and module.bias is not None:
19 |         nn.init.constant_(module.bias, bias)
20 | 
21 | 
22 | def normal_init(module, mean=0, std=1, bias=0):
23 |     nn.init.normal_(module.weight, mean, std)
24 |     if hasattr(module, 'bias') and module.bias is not None:
25 |         nn.init.constant_(module.bias, bias)
26 | 
27 | 
28 | def uniform_init(module, a=0, b=1, bias=0):
29 |     nn.init.uniform_(module.weight, a, b)
30 |     if hasattr(module, 'bias') and module.bias is not None:
31 |         nn.init.constant_(module.bias, bias)
32 | 
33 | 
34 | def kaiming_init(module,
35 |                  a=0,
36 |                  mode='fan_out',
37 |                  nonlinearity='relu',
38 |                  bias=0,
39 |                  distribution='normal'):
40 |     assert distribution in ['uniform', 'normal']
41 |     if distribution == 'uniform':
42 |         nn.init.kaiming_uniform_(module.weight,
43 |                                  a=a,
44 |                                  mode=mode,
45 |                                  nonlinearity=nonlinearity)
46 |     else:
47 |         nn.init.kaiming_normal_(module.weight,
48 |                                 a=a,
49 |                                 mode=mode,
50 |                                 nonlinearity=nonlinearity)
51 |     if hasattr(module, 'bias') and module.bias is not None:
52 |         nn.init.constant_(module.bias, bias)
53 | 
54 | 
55 | def caffe2_xavier_init(module, bias=0):
56 |     # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch
57 |     # Acknowledgment to FAIR's internal code
58 |     kaiming_init(module,
59 |                  a=1,
60 |                  mode='fan_in',
61 |                  nonlinearity='leaky_relu',
62 |                  distribution='uniform')
63 | 
64 | 
65 | def init_weights(modules):
66 |     for m in modules:
67 |         if isinstance(m, nn.Conv2d):
68 |             kaiming_init(m)
69 |         elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
70 |             constant_init(m, 1)
71 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/optims/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_optimizer
2 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/optims/builder.py:
--------------------------------------------------------------------------------
1 | import torch.optim as optims
2 | 
3 | from ..utils import build_from_cfg
4 | 
5 | 
6 | def build_optimizer(cfg_optimizer, default_args=None):
7 |     optimizer = build_from_cfg(cfg_optimizer, optims, default_args, 'module')
8 |     return optimizer
9 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/runners/__init__.py:
--------------------------------------------------------------------------------
1 | from .inference_runner import InferenceRunner
2 | from .test_runner import TestRunner
3 | from .train_runner import TrainRunner
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/runners/base.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | import torch
 4 | from torch.backends import cudnn
 5 | 
 6 | from ..dataloaders import build_dataloader
 7 | from ..dataloaders.samplers import build_sampler
 8 | from ..datasets import build_dataset
 9 | from ..loggers import build_logger
10 | from ..metrics import build_metrics
11 | from ..transforms import build_transform
12 | from ..utils import get_dist_info, init_dist_pytorch
13 | 
14 | 
15 | class Common:
16 |     def __init__(self, cfg):
17 |         # build logger
18 |         logger_cfg = cfg.get('logger')
19 |         if logger_cfg is None:
20 |             logger_cfg = dict(
21 |                 handlers=(dict(type='StreamHandler', level='INFO'),))
22 | 
23 |         self.workdir = cfg.get('workdir')
24 |         self.distribute = cfg.get('distribute', False)
25 | 
26 |         # set gpu devices
27 |         self.use_gpu = self._set_device()
28 | 
29 |         # set distribute setting
30 |         if self.distribute and self.use_gpu:
31 |             init_dist_pytorch(**cfg.dist_params)
32 | 
33 |         self.rank, self.world_size = get_dist_info()
34 | 
35 |         self.logger = self._build_logger(logger_cfg)
36 | 
37 |         # set cudnn configuration
38 |         self._set_cudnn(
39 |             cfg.get('cudnn_deterministic', False),
40 |             cfg.get('cudnn_benchmark', False))
41 | 
42 |         # set seed
43 |         self._set_seed(cfg.get('seed', None))
44 | 
45 |         # build metric
46 |         if 'metrics' in cfg:
47 |             self.metric = self._build_metric(cfg['metrics'])
48 | 
49 |     def _build_logger(self, cfg):
50 |         return build_logger(cfg, dict(workdir=self.workdir))
51 | 
52 |     def _set_device(self):
53 |         self.gpu_num = torch.cuda.device_count()
54 |         if torch.cuda.is_available():
55 |             use_gpu = True
56 |         else:
57 |             use_gpu = False
58 | 
59 |         return use_gpu
60 | 
61 |     def _set_seed(self, seed):
62 |         if seed is not None:
63 |             self.logger.info('Set seed {}'.format(seed))
64 |             random.seed(seed)
65 |             np.random.seed(seed)
66 |             torch.manual_seed(seed)
67 | 
68 |     def _set_cudnn(self, deterministic, benchmark):
69 |         self.logger.info('Set cudnn deterministic {}'.format(deterministic))
70 |         cudnn.deterministic = deterministic
71 | 
72 |         self.logger.info('Set cudnn benchmark {}'.format(benchmark))
73 |         cudnn.benchmark = benchmark
74 | 
75 |     def _build_metric(self, cfg):
76 |         return build_metrics(cfg)
77 | 
78 |     def _build_transform(self, cfg):
79 |         return build_transform(cfg)
80 | 
81 |     def _build_dataloader(self, cfg):
82 |         transform = build_transform(cfg['transforms'])
83 |         dataset = build_dataset(cfg['dataset'], dict(transform=transform))
84 | 
85 |         shuffle = cfg['dataloader'].pop('shuffle', False)
86 |         sampler = build_sampler(self.distribute,
87 |                                 cfg['sampler'],
88 |                                 dict(dataset=dataset,
89 |                                      shuffle=shuffle))
90 | 
91 |         dataloader = build_dataloader(self.distribute,
92 |                                       self.gpu_num,
93 |                                       cfg['dataloader'],
94 |                                       dict(dataset=dataset,
95 |                                            sampler=sampler))
96 | 
97 |         return dataloader
98 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/runners/inference_runner.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..models import build_model
 4 | from ..utils import load_checkpoint
 5 | from .base import Common
 6 | 
 7 | 
 8 | class InferenceRunner(Common):
 9 |     def __init__(self, inference_cfg, base_cfg=None):
10 |         inference_cfg = inference_cfg.copy()
11 |         base_cfg = {} if base_cfg is None else base_cfg.copy()
12 | 
13 |         super().__init__(base_cfg)
14 | 
15 |         self.multi_label = inference_cfg.get('multi_label', False)
16 | 
17 |         # build inference transform
18 |         self.transform = self._build_transform(inference_cfg['transforms'])
19 | 
20 |         # build model
21 |         self.model = self._build_model(inference_cfg['model'])
22 |         self.model.eval()
23 | 
24 |     def load_checkpoint(self, filename, map_location='default', strict=True):
25 |         self.logger.info('Load checkpoint from {}'.format(filename))
26 | 
27 |         if map_location == 'default':
28 |             if self.use_gpu:
29 |                 device_id = torch.cuda.current_device()
30 |                 map_location = lambda storage, loc: storage.cuda(device_id)
31 |             else:
32 |                 map_location = 'cpu'
33 | 
34 |         return load_checkpoint(self.model, filename, map_location, strict)
35 | 
36 |     def _build_model(self, cfg):
37 |         self.logger.info('Build model')
38 | 
39 |         model = build_model(cfg)
40 | 
41 |         if torch.cuda.is_available():
42 |             if self.distribute:
43 |                 model = torch.nn.parallel.DistributedDataParallel(
44 |                     model.cuda(),
45 |                     device_ids=[torch.cuda.current_device()],
46 |                     broadcast_buffers=True,
47 |                 )
48 |                 self.logger.info('Using distributed training')
49 |             else:
50 |                 if torch.cuda.device_count() > 1:
51 |                     model = torch.nn.DataParallel(model)
52 |                 model.cuda()
53 |         return model
54 | 
55 |     def compute(self, output):
56 |         if self.multi_label:
57 |             output = output.sigmoid()
58 |             output = torch.where(output >= 0.5,
59 |                                  torch.full_like(output, 1),
60 |                                  torch.full_like(output, 0)).long()
61 | 
62 |         else:
63 |             output = output.softmax(dim=1)
64 |             _, output = torch.max(output, dim=1)
65 |         return output
66 | 
67 |     def __call__(self, image, masks):
68 |         with torch.no_grad():
69 |             image = self.transform(image=image, masks=masks)['image']
70 |             image = image.unsqueeze(0)
71 | 
72 |             if self.use_gpu:
73 |                 image = image.cuda()
74 | 
75 |             output = self.model(image)
76 |             output = self.compute(output)
77 | 
78 |             output = output.squeeze().cpu().numpy()
79 | 
80 |         return output
81 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/runners/test_runner.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn.functional as F
 4 | 
 5 | from ..utils import gather_tensor
 6 | from .inference_runner import InferenceRunner
 7 | 
 8 | 
 9 | class TestRunner(InferenceRunner):
10 |     def __init__(self, test_cfg, inference_cfg, base_cfg=None):
11 |         super().__init__(inference_cfg, base_cfg)
12 | 
13 |         self.test_dataloader = self._build_dataloader(test_cfg['data'])
14 |         extra_data = len(self.test_dataloader.dataset) % self.world_size
15 |         self.test_exclude_num = self.world_size - extra_data if extra_data != 0 else 0
16 | 
17 |         self.tta = test_cfg.get('tta', False)
18 | 
19 |     def __call__(self):
20 |         self.metric.reset()
21 |         self.model.eval()
22 | 
23 |         res = {}
24 | 
25 |         self.logger.info('Start testing')
26 |         with torch.no_grad():
27 |             for idx, (image, mask) in enumerate(self.test_dataloader):
28 |                 if self.use_gpu:
29 |                     image = image.cuda()
30 |                     mask = mask.cuda()
31 | 
32 |                 if self.tta:
33 |                     output = self._tta_compute(image)
34 |                 else:
35 |                     output = self.model(image)
36 |                     output = self.compute(output)
37 | 
38 |                 output = gather_tensor(output)
39 |                 mask = gather_tensor(mask)
40 | 
41 |                 if idx + 1 == len(
42 |                         self.test_dataloader) and self.test_exclude_num > 0:
43 |                     output = output[:-self.test_exclude_num]
44 |                     mask = mask[:-self.test_exclude_num]
45 | 
46 |                 self.metric(output.cpu().numpy(), mask.cpu().numpy())
47 |                 res = self.metric.accumulate()
48 |                 self.logger.info('Test, Iter {}, {}'.format(
49 |                     idx + 1,
50 |                     ', '.join(['{}: {}'.format(k, np.round(v, 4)) for k, v in
51 |                                res.items()])))
52 |         self.logger.info('Test Result: {}'.format(', '.join(
53 |             ['{}: {}'.format(k, np.round(v, 4)) for k, v in res.items()])))
54 | 
55 |         return res
56 | 
57 |     def _tta_compute(self, image):
58 |         b, c, h, w = image.size()
59 |         probs = []
60 |         for scale, bias in zip(self.tta['scales'], self.tta['biases']):
61 |             new_h, new_w = int(h * scale + bias), int(w * scale + bias)
62 |             new_img = F.interpolate(image, size=(new_h, new_w),
63 |                                     mode='bilinear', align_corners=True)
64 |             output = self.model(new_img)
65 |             probs.append(output)
66 | 
67 |             if self.tta['flip']:
68 |                 flip_img = new_img.flip(3)
69 |                 flip_output = self.model(flip_img)
70 |                 prob = flip_output.flip(3)
71 |                 probs.append(prob)
72 | 
73 |         for idx, prob in enumerate(probs):
74 |             probs[idx] = F.interpolate(prob, size=(h, w),
75 |                                        mode='bilinear', align_corners=True)
76 | 
77 |         if self.multi_label:
78 |             prob = torch.stack(probs, dim=0).sigmoid().mean(dim=0)
79 |             prob = torch.where(prob >= 0.5,
80 |                                torch.full_like(prob, 1),
81 |                                torch.full_like(prob, 0)).long()  # b c h w
82 |         else:
83 |             prob = torch.stack(probs, dim=0).softmax(dim=2).mean(dim=0)
84 |             _, prob = torch.max(prob, dim=1)  # b h w
85 |         return prob
86 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_transform
2 | from .transforms import (FactorScale, LongestMaxSize, PadIfNeeded, RandomScale,
3 |                          ToTensor)
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/transforms/builder.py:
--------------------------------------------------------------------------------
 1 | import albumentations as albu
 2 | 
 3 | from vedaseg.utils import build_from_cfg
 4 | from .registry import TRANSFORMS
 5 | 
 6 | 
 7 | def build_transform(cfgs):
 8 |     tfs = []
 9 |     for cfg in cfgs:
10 |         if TRANSFORMS.get(cfg['type']):
11 |             tf = build_from_cfg(cfg, TRANSFORMS)
12 |         else:
13 |             tf = build_from_cfg(cfg, albu, mode='module')
14 |         tfs.append(tf)
15 |     aug = albu.Compose(tfs)
16 | 
17 |     return aug
18 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/transforms/registry.py:
--------------------------------------------------------------------------------
1 | from ..utils import Registry
2 | 
3 | TRANSFORMS = Registry('transforms')
4 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/transforms/transforms.py:
--------------------------------------------------------------------------------
  1 | import albumentations as albu
  2 | import albumentations.augmentations.functional as F
  3 | import cv2
  4 | import numpy as np
  5 | import random
  6 | import torch
  7 | from albumentations import DualTransform
  8 | 
  9 | from .registry import TRANSFORMS
 10 | 
 11 | 
 12 | @TRANSFORMS.register_module
 13 | class FactorScale(DualTransform):
 14 |     def __init__(self, scale=1.0, interpolation=cv2.INTER_LINEAR,
 15 |                  always_apply=False,
 16 |                  p=1.0):
 17 |         super(FactorScale, self).__init__(always_apply, p)
 18 |         self.scale = scale
 19 |         self.interpolation = interpolation
 20 | 
 21 |     def apply(self, image, scale=1.0, **params):
 22 |         return F.scale(image, scale, interpolation=self.interpolation)
 23 | 
 24 |     def apply_to_mask(self, image, scale=1.0, **params):
 25 |         return F.scale(image, scale, interpolation=cv2.INTER_NEAREST)
 26 | 
 27 |     def get_params(self):
 28 |         return {'scale': self.scale}
 29 | 
 30 |     def get_transform_init_args_names(self):
 31 |         return ('scale',)
 32 | 
 33 | 
 34 | @TRANSFORMS.register_module
 35 | class LongestMaxSize(FactorScale):
 36 |     def __init__(self, h_max, w_max, interpolation=cv2.INTER_LINEAR,
 37 |                  always_apply=False, p=1.0):
 38 |         self.h_max = h_max
 39 |         self.w_max = w_max
 40 |         super(LongestMaxSize, self).__init__(interpolation=interpolation,
 41 |                                              always_apply=always_apply,
 42 |                                              p=p)
 43 | 
 44 |     def update_params(self, params, **kwargs):
 45 |         params = super(LongestMaxSize, self).update_params(params, **kwargs)
 46 |         rows = params['rows']
 47 |         cols = params['cols']
 48 | 
 49 |         scale_h = self.h_max / rows
 50 |         scale_w = self.w_max / cols
 51 |         scale = min(scale_h, scale_w)
 52 | 
 53 |         params.update({'scale': scale})
 54 |         return params
 55 | 
 56 |     def get_transform_init_args_names(self):
 57 |         return ('h_max', 'w_max',)
 58 | 
 59 | 
 60 | @TRANSFORMS.register_module
 61 | class RandomScale(FactorScale):
 62 |     def __init__(self, scale_limit=(0.5, 2), interpolation=cv2.INTER_LINEAR,
 63 |                  scale_step=None, always_apply=False, p=1.0):
 64 |         super(RandomScale, self).__init__(interpolation=interpolation,
 65 |                                           always_apply=always_apply,
 66 |                                           p=p)
 67 |         self.scale_limit = albu.to_tuple(scale_limit)
 68 |         self.scale_step = scale_step
 69 | 
 70 |     def get_params(self):
 71 |         if self.scale_step:
 72 |             num_steps = int((self.scale_limit[1] - self.scale_limit[
 73 |                 0]) / self.scale_step + 1)
 74 |             scale_factors = np.linspace(self.scale_limit[0],
 75 |                                         self.scale_limit[1], num_steps)
 76 |             scale_factor = np.random.choice(scale_factors).item()
 77 |         else:
 78 |             scale_factor = random.uniform(self.scale_limit[0],
 79 |                                           self.scale_limit[1])
 80 | 
 81 |         return {'scale': scale_factor}
 82 | 
 83 |     def get_transform_init_args_names(self):
 84 |         return ('scale_limit', 'scale_step',)
 85 | 
 86 | 
 87 | @TRANSFORMS.register_module
 88 | class PadIfNeeded(albu.PadIfNeeded):
 89 |     def __init__(self, min_height, min_width, border_mode=cv2.BORDER_CONSTANT,
 90 |                  value=None, mask_value=None):
 91 |         super(PadIfNeeded, self).__init__(min_height=min_height,
 92 |                                           min_width=min_width,
 93 |                                           border_mode=border_mode,
 94 |                                           value=value,
 95 |                                           mask_value=mask_value)
 96 | 
 97 |     def update_params(self, params, **kwargs):
 98 |         params = super(PadIfNeeded, self).update_params(params, **kwargs)
 99 |         rows = params['rows']
100 |         cols = params['cols']
101 | 
102 |         if rows < self.min_height:
103 |             h_pad_bottom = self.min_height - rows
104 |         else:
105 |             h_pad_bottom = 0
106 | 
107 |         if cols < self.min_width:
108 |             w_pad_right = self.min_width - cols
109 |         else:
110 |             w_pad_right = 0
111 | 
112 |         params.update({'pad_top': 0,
113 |                        'pad_bottom': h_pad_bottom,
114 |                        'pad_left': 0,
115 |                        'pad_right': w_pad_right})
116 |         return params
117 | 
118 |     def get_transform_init_args_names(self):
119 |         return ('min_height', 'min_width',)
120 | 
121 | 
122 | @TRANSFORMS.register_module
123 | class ToTensor(DualTransform):
124 |     def __init__(self):
125 |         super(ToTensor, self).__init__(always_apply=True)
126 | 
127 |     def apply(self, image, **params):
128 |         if isinstance(image, np.ndarray):
129 |             if image.ndim == 2:
130 |                 image = image[:, :, None]
131 |             image = torch.from_numpy(image).float()
132 |             image = image.permute(2, 0, 1)
133 |         else:
134 |             raise TypeError('img shoud be np.ndarray. Got {}'
135 |                             .format(type(image)))
136 |         return image
137 | 
138 |     def apply_to_mask(self, image, **params):
139 |         image = torch.from_numpy(image)
140 |         return image
141 | 
142 |     def apply_to_masks(self, masks, **params):
143 |         masks = [self.apply_to_mask(mask, **params) for mask in masks]
144 |         return torch.stack(masks, dim=0).squeeze()
145 | 
146 |     def get_transform_init_args_names(self):
147 |         return ()
148 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .checkpoint import load_checkpoint, save_checkpoint, weights_to_cpu
2 | from .config import Config
3 | from .dist_utils import (gather_tensor, get_dist_info, init_dist_pytorch,
4 |                          reduce_tensor)
5 | from .registry import Registry, build_from_cfg
6 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/utils/checkpoint.py:
--------------------------------------------------------------------------------
 1 | # adapted from https://github.com/open-mmlab/mmcv
 2 | import os
 3 | import time
 4 | import torch
 5 | from collections import OrderedDict
 6 | 
 7 | 
 8 | def weights_to_cpu(state_dict):
 9 |     """Copy a model state_dict to cpu.
10 |     Args:
11 |         state_dict (OrderedDict): Model weights on GPU.
12 |     Returns:
13 |         OrderedDict: Model weights on GPU.
14 |     """
15 |     state_dict_cpu = OrderedDict()
16 |     for key, val in state_dict.items():
17 |         state_dict_cpu[key] = val.cpu()
18 |     return state_dict_cpu
19 | 
20 | 
21 | def save_checkpoint(model, filename, optimizer=None, lr_scheduler=None,
22 |                     meta=None):
23 |     """Save checkpoint to file.
24 |     The checkpoint will have 3 fields: ``meta``, ``state_dict`` and
25 |     ``optimizer``. By default ``meta`` will contain version and time info.
26 |     Args:
27 |         model (Module): Module whose params are to be saved.
28 |         filename (str): Checkpoint filename.
29 |         optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
30 |         lr_scheduler (:obj:`_LRScheduler`, optional): _LRScheduler to be saved.
31 |         meta (dict, optional): Metadata to be saved in checkpoint.
32 |     """
33 |     if meta is None:
34 |         meta = {}
35 |     elif not isinstance(meta, dict):
36 |         raise TypeError('meta must be a dict or None, but got {}'.format(
37 |             type(meta)))
38 |     meta.update(time=time.asctime())
39 | 
40 |     file_dir = os.path.dirname(filename)
41 |     if not os.path.exists(file_dir):
42 |         os.mkdir(file_dir)
43 | 
44 |     if hasattr(model, 'module'):
45 |         model = model.module
46 | 
47 |     checkpoint = {
48 |         'meta': meta,
49 |         'state_dict': weights_to_cpu(model.state_dict())
50 |     }
51 |     if optimizer is not None:
52 |         checkpoint['optimizer'] = optimizer.state_dict()
53 |     if lr_scheduler is not None:
54 |         checkpoint['lr_scheduler'] = lr_scheduler.state_dict()
55 |     torch.save(checkpoint, filename)
56 | 
57 | 
58 | def load_checkpoint(model, filename, map_location=None, strict=False):
59 |     if os.path.isfile(filename):
60 |         checkpoint = torch.load(filename, map_location=map_location)
61 | 
62 |         if isinstance(checkpoint, OrderedDict):
63 |             state_dict = checkpoint
64 |         elif isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
65 |             state_dict = checkpoint['state_dict']
66 |         else:
67 |             raise RuntimeError(
68 |                 'No state_dict found in checkpoint file {}'.format(filename))
69 |         if hasattr(model, 'module'):
70 |             model.module.load_state_dict(state_dict, strict=strict)
71 |         else:
72 |             model.load_state_dict(state_dict, strict=strict)
73 |         return checkpoint
74 |     else:
75 |         raise RuntimeError(
76 |             'No checkpoint file found in path {}'.format(filename))
77 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/utils/dist_utils.py:
--------------------------------------------------------------------------------
 1 | # adapted from mmcv and mmdetection
 2 | 
 3 | import os
 4 | import torch
 5 | import torch.distributed as dist
 6 | 
 7 | 
 8 | def init_dist_pytorch(backend='nccl', **kwargs):
 9 |     rank = int(os.environ['RANK'])
10 |     num_gpus = torch.cuda.device_count()
11 |     torch.cuda.set_device(rank % num_gpus)
12 |     dist.init_process_group(backend=backend, **kwargs)
13 | 
14 | 
15 | def get_dist_info():
16 |     if dist.is_available():
17 |         initialized = dist.is_initialized()
18 |     else:
19 |         initialized = False
20 | 
21 |     if initialized:
22 |         rank = dist.get_rank()
23 |         world_size = dist.get_world_size()
24 |     else:
25 |         rank = 0
26 |         world_size = 1
27 | 
28 |     return rank, world_size
29 | 
30 | 
31 | def reduce_tensor(data, average=True):
32 |     rank, world_size = get_dist_info()
33 |     if world_size < 2:
34 |         return data
35 | 
36 |     with torch.no_grad():
37 |         if not isinstance(data, torch.Tensor):
38 |             data = torch.tensor(data).cuda()
39 |         dist.reduce(data, dst=0)
40 |         if rank == 0 and average:
41 |             data /= world_size
42 |     return data
43 | 
44 | 
45 | def gather_tensor(data):
46 |     _, world_size = get_dist_info()
47 |     if world_size < 2:
48 |         return data
49 | 
50 |     with torch.no_grad():
51 |         if not isinstance(data, torch.Tensor):
52 |             data = torch.tensor(data).cuda()
53 | 
54 |         gather_list = [torch.ones_like(data) for _ in range(world_size)]
55 |         dist.all_gather(gather_list, data)
56 |         gather_data = torch.stack(gather_list)
57 |         gather_data = torch.transpose(gather_data, 0, 1)
58 |         gather_data = gather_data.reshape((-1, *gather_data.shape[2:]))
59 | 
60 |     return gather_data
61 | 
62 | 
63 | def synchronize():
64 |     if not dist.is_available():
65 |         return
66 |     if not dist.is_initialized():
67 |         return
68 |     world_size = dist.get_world_size()
69 |     if world_size == 1:
70 |         return
71 |     dist.barrier()
72 | 


--------------------------------------------------------------------------------
/semantic segmentation/vedaseg/utils/registry.py:
--------------------------------------------------------------------------------
  1 | # adapted from https://github.com/open-mmlab/mmcv
  2 | import inspect
  3 | from functools import partial
  4 | 
  5 | 
  6 | class Registry(object):
  7 | 
  8 |     def __init__(self, name):
  9 |         self._name = name
 10 |         self._module_dict = dict()
 11 | 
 12 |     def __repr__(self):
 13 |         format_str = self.__class__.__name__ + '(name={}, items={})'.format(
 14 |             self._name, list(self._module_dict.keys()))
 15 |         return format_str
 16 | 
 17 |     @property
 18 |     def name(self):
 19 |         return self._name
 20 | 
 21 |     @property
 22 |     def module_dict(self):
 23 |         return self._module_dict
 24 | 
 25 |     def get(self, key):
 26 |         return self._module_dict.get(key, None)
 27 | 
 28 |     def _register_module(self, module_class, force=False):
 29 |         """Register a module.
 30 |         Args:
 31 |             module (:obj:`nn.Module`): Module to be registered.
 32 |         """
 33 |         if not inspect.isclass(module_class):
 34 |             raise TypeError('module must be a class, but got {}'.format(
 35 |                 type(module_class)))
 36 |         module_name = module_class.__name__
 37 |         if not force and module_name in self._module_dict:
 38 |             raise KeyError('{} is already registered in {}'.format(
 39 |                 module_name, self.name))
 40 |         self._module_dict[module_name] = module_class
 41 | 
 42 |     def register_module(self, cls=None, force=False):
 43 |         if cls is None:
 44 |             return partial(self.register_module, force=force)
 45 |         self._register_module(cls, force=force)
 46 |         return cls
 47 | 
 48 | 
 49 | def build_from_cfg(cfg, src, default_args=None, mode='registry'):
 50 |     if mode == 'registry':
 51 |         return build_from_registry(cfg,  src, default_args=default_args)
 52 |     elif mode == 'module':
 53 |         return build_from_module(cfg, src, default_args=default_args)
 54 |     else:
 55 |         raise ValueError('Mode {} is not supported currently'.format(mode))
 56 | 
 57 | 
 58 | def build_from_registry(cfg, registry, default_args=None):
 59 |     """Build a module from config dict.
 60 |     Args:
 61 |         cfg (dict): Config dict. It should at least contain the key "type".
 62 |         registry (:obj:`Registry`): The registry to search the type from.
 63 |         default_args (dict, optional): Default initialization arguments.
 64 |     Returns:
 65 |         obj: The constructed object.
 66 |     """
 67 |     assert isinstance(cfg, dict) and 'type' in cfg
 68 |     assert isinstance(default_args, dict) or default_args is None
 69 |     args = cfg.copy()
 70 |     obj_type = args.pop('type')
 71 |     if isinstance(obj_type, str):
 72 |         obj_cls = registry.get(obj_type)
 73 |         if obj_cls is None:
 74 |             raise KeyError('{} is not in the {} registry'.format(
 75 |                 obj_type, registry.name))
 76 |     elif inspect.isclass(obj_type):
 77 |         obj_cls = obj_type
 78 |     else:
 79 |         raise TypeError('type must be a str or valid type, but got {}'.format(
 80 |             type(obj_type)))
 81 |     if default_args is not None:
 82 |         for name, value in default_args.items():
 83 |             args.setdefault(name, value)
 84 |     return obj_cls(**args)
 85 | 
 86 | 
 87 | def build_from_module(cfg, module, default_args=None):
 88 |     """Build a module from config dict.
 89 |     Args:
 90 |         cfg (dict): Config dict. It should at least contain the key "type".
 91 |         module (:obj:`module`): The module to search the type from.
 92 |         default_args (dict, optional): Default initialization arguments.
 93 |     Returns:
 94 |         obj: The constructed object.
 95 |     """
 96 |     assert isinstance(cfg, dict) and 'type' in cfg
 97 |     assert isinstance(default_args, dict) or default_args is None
 98 |     args = cfg.copy()
 99 |     obj_type = args.pop('type')
100 |     if isinstance(obj_type, str):
101 |         obj_cls = getattr(module, obj_type)
102 |         if obj_cls is None:
103 |             raise KeyError('{} is not in the {} module'.format(
104 |                 obj_type, module))
105 |     elif inspect.isclass(obj_type):
106 |         obj_cls = obj_type
107 |     else:
108 |         raise TypeError('type must be a str or valid type, but got {}'.format(
109 |             type(obj_type)))
110 |     if default_args is not None:
111 |         for name, value in default_args.items():
112 |             args.setdefault(name, value)
113 |     return obj_cls(**args)
114 | 


--------------------------------------------------------------------------------