├── __init__.py
├── base_model
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-35.pyc
    │   └── mobilenetv1.cpython-35.pyc
    ├── lenet5.py
    ├── cfqk.py
    ├── mobilenetv1.py
    ├── vgg.py
    ├── resnet.py
    └── wrn.py
├── deprecated
    ├── __init__.py
    ├── base_model
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── cfqk.cpython-35.pyc
    │   │   ├── vgg.cpython-35.pyc
    │   │   ├── wrn.cpython-35.pyc
    │   │   ├── lenet5.cpython-35.pyc
    │   │   ├── resnet.cpython-35.pyc
    │   │   ├── __init__.cpython-35.pyc
    │   │   └── mobilenetv1.cpython-35.pyc
    │   ├── lenet5.py
    │   ├── cfqk.py
    │   ├── mobilenetv1.py
    │   ├── vgg.py
    │   ├── wrn.py
    │   └── resnet.py
    ├── utils
    │   ├── __pycache__
    │   │   ├── comm.cpython-35.pyc
    │   │   ├── misc.cpython-35.pyc
    │   │   ├── engine.cpython-35.pyc
    │   │   ├── logger.cpython-35.pyc
    │   │   ├── pyt_utils.cpython-35.pyc
    │   │   ├── checkpoint.cpython-35.pyc
    │   │   └── lr_scheduler.cpython-35.pyc
    │   ├── logger.py
    │   ├── timer.py
    │   ├── pyt_utils.py
    │   ├── loss.py
    │   ├── checkpoint.py
    │   ├── torch_utils.py
    │   ├── lr_scheduler.py
    │   ├── comm.py
    │   └── misc.py
    ├── acnet
    │   ├── __pycache__
    │   │   └── acnet_builder.cpython-35.pyc
    │   ├── acnet_test.py
    │   ├── acnet_rc56.py
    │   ├── acnet_cfqkbnc.py
    │   ├── acnet_vc.py
    │   ├── acnet_wrnc16.py
    │   └── acnet_fusion.py
    ├── custom_layers
    │   ├── __pycache__
    │   │   ├── se_block.cpython-35.pyc
    │   │   └── flatten_layer.cpython-35.pyc
    │   ├── flatten_layer.py
    │   └── se_block.py
    ├── show_log.py
    ├── display_hdf5.py
    ├── model_map.py
    ├── README.md
    ├── base_config.py
    ├── dataset.py
    ├── builder.py
    ├── ding_test.py
    └── constants.py
├── .gitignore
├── utils
    ├── __pycache__
    │   ├── comm.cpython-35.pyc
    │   ├── misc.cpython-35.pyc
    │   ├── misc.cpython-37.pyc
    │   ├── engine.cpython-35.pyc
    │   ├── logger.cpython-35.pyc
    │   ├── checkpoint.cpython-35.pyc
    │   ├── pyt_utils.cpython-35.pyc
    │   └── lr_scheduler.cpython-35.pyc
    ├── logger.py
    ├── timer.py
    ├── pyt_utils.py
    ├── loss.py
    ├── torch_utils.py
    ├── comm.py
    ├── checkpoint.py
    └── lr_scheduler.py
├── custom_layers
    ├── __pycache__
    │   ├── se_block.cpython-35.pyc
    │   └── flatten_layer.cpython-35.pyc
    ├── max_layer.py
    ├── flatten_layer.py
    ├── pad_layer.py
    ├── se_block.py
    ├── scale_layer.py
    └── crop_layer.py
├── nobn_builder.py
├── LICENSE
├── data
    ├── dataset_util.py
    ├── imagenet_data.py
    └── data_factory.py
├── model_map.py
├── display_hdf5.py
├── acnet
    ├── acnet_fusion.py
    ├── acnet_builder.py
    ├── do_acnet.py
    ├── acb.py
    └── acnet_test.py
├── show_log.py
├── base_config.py
└── ndp_test.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/base_model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/deprecated/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/deprecated/base_model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | #*.checkpoint
2 | #*.log
3 | #*log/*
4 | 
5 | .idea/workspace.xml
6 | .idea/*
7 | 


--------------------------------------------------------------------------------
/utils/__pycache__/comm.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/comm.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/misc.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/misc.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/misc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/misc.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/engine.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/engine.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/logger.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/logger.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/checkpoint.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/checkpoint.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/pyt_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/pyt_utils.cpython-35.pyc


--------------------------------------------------------------------------------
/base_model/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/base_model/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/lr_scheduler.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/lr_scheduler.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/utils/__pycache__/comm.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/utils/__pycache__/comm.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/utils/__pycache__/misc.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/utils/__pycache__/misc.cpython-35.pyc


--------------------------------------------------------------------------------
/base_model/__pycache__/mobilenetv1.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/base_model/__pycache__/mobilenetv1.cpython-35.pyc


--------------------------------------------------------------------------------
/custom_layers/__pycache__/se_block.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/custom_layers/__pycache__/se_block.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/utils/__pycache__/engine.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/utils/__pycache__/engine.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/utils/__pycache__/logger.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/utils/__pycache__/logger.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/base_model/__pycache__/cfqk.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/base_model/__pycache__/cfqk.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/base_model/__pycache__/vgg.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/base_model/__pycache__/vgg.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/base_model/__pycache__/wrn.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/base_model/__pycache__/wrn.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/utils/__pycache__/pyt_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/utils/__pycache__/pyt_utils.cpython-35.pyc


--------------------------------------------------------------------------------
/custom_layers/__pycache__/flatten_layer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/custom_layers/__pycache__/flatten_layer.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/base_model/__pycache__/lenet5.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/base_model/__pycache__/lenet5.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/base_model/__pycache__/resnet.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/base_model/__pycache__/resnet.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/utils/__pycache__/checkpoint.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/utils/__pycache__/checkpoint.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/utils/__pycache__/lr_scheduler.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/utils/__pycache__/lr_scheduler.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/acnet/__pycache__/acnet_builder.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/acnet/__pycache__/acnet_builder.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/base_model/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/base_model/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/base_model/__pycache__/mobilenetv1.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/base_model/__pycache__/mobilenetv1.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/custom_layers/__pycache__/se_block.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/custom_layers/__pycache__/se_block.cpython-35.pyc


--------------------------------------------------------------------------------
/deprecated/custom_layers/__pycache__/flatten_layer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/custom_layers/__pycache__/flatten_layer.cpython-35.pyc


--------------------------------------------------------------------------------
/custom_layers/max_layer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class MaxLayer(nn.Module):
 5 | 
 6 |     def __init__(self):
 7 |         super(MaxLayer, self).__init__()
 8 | 
 9 |     def forward(self, a, b):
10 |         return torch.max(a, b)
11 | 


--------------------------------------------------------------------------------
/custom_layers/flatten_layer.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | class FlattenLayer(nn.Module):
 4 | 
 5 |     def __init__(self):
 6 |         super(FlattenLayer, self).__init__()
 7 | 
 8 |     def forward(self, inputs):
 9 |         return inputs.view(inputs.size(0), -1)
10 | 


--------------------------------------------------------------------------------
/deprecated/custom_layers/flatten_layer.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | class FlattenLayer(nn.Module):
 4 | 
 5 |     def __init__(self):
 6 |         super(FlattenLayer, self).__init__()
 7 | 
 8 |     def forward(self, inputs):
 9 |         return inputs.view(inputs.size(0), -1)
10 | 


--------------------------------------------------------------------------------
/custom_layers/pad_layer.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | class PadLayer(nn.Module):
 5 | 
 6 |     #   E.g., (-1, 0) means this layer should crop the first and last rows of the feature map. And (0, -1) crops the first and last columns
 7 |     def __init__(self, pad):
 8 |         super(PadLayer, self).__init__()
 9 |         self.pad = pad
10 | 
11 |     def forward(self, input):
12 |         F.pad(input, [self.pad] * 4)


--------------------------------------------------------------------------------
/deprecated/acnet/acnet_test.py:
--------------------------------------------------------------------------------
 1 | from ding_test import general_test
 2 | from acnet.acnet_fusion import convert_acnet_weights
 3 | from acnet.acnet_builder import ACNetBuilder
 4 | import sys
 5 | 
 6 | def convert_and_test(network_type, train_weights):
 7 |     builder = ACNetBuilder(base_config=None, deploy=False)
 8 |     general_test(network_type=network_type, weights=train_weights, builder=builder)
 9 |     deploy_weights = train_weights.replace('.hdf5', '_deploy.hdf5')
10 |     convert_acnet_weights(train_weights, deploy_weights=deploy_weights, eps=1e-5)
11 |     builder.switch_to_deploy()
12 |     general_test(network_type=network_type, weights=deploy_weights, builder=builder)
13 | 
14 | if __name__ == '__main__':
15 |     convert_and_test(sys.argv[1], sys.argv[2])
16 | 
17 | 


--------------------------------------------------------------------------------
/custom_layers/se_block.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | class SEBlock(nn.Module):
 5 | 
 6 |     def __init__(self, input_channels, internal_neurons):
 7 |         super(SEBlock, self).__init__()
 8 |         self.down = nn.Conv2d(in_channels=input_channels, out_channels=internal_neurons, kernel_size=1, stride=1, bias=True)
 9 |         self.up = nn.Conv2d(in_channels=internal_neurons, out_channels=input_channels, kernel_size=1, stride=1, bias=True)
10 | 
11 |     def forward(self, inputs):
12 |         x = F.avg_pool2d(inputs, kernel_size=inputs.size(3))
13 |         x = self.down(x)
14 |         x = F.relu(x)
15 |         x = self.up(x)
16 |         x = F.sigmoid(x)
17 |         x = x.repeat(1, 1, inputs.size(2), inputs.size(3))
18 |         return inputs * x


--------------------------------------------------------------------------------
/deprecated/custom_layers/se_block.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | class SEBlock(nn.Module):
 5 | 
 6 |     def __init__(self, input_channels, internal_neurons):
 7 |         super(SEBlock, self).__init__()
 8 |         self.down = nn.Conv2d(in_channels=input_channels, out_channels=internal_neurons, kernel_size=1, stride=1, bias=True)
 9 |         self.up = nn.Conv2d(in_channels=internal_neurons, out_channels=input_channels, kernel_size=1, stride=1, bias=True)
10 | 
11 |     def forward(self, inputs):
12 |         x = F.avg_pool2d(inputs, kernel_size=inputs.size(3))
13 |         x = self.down(x)
14 |         x = F.relu(x)
15 |         x = self.up(x)
16 |         x = F.sigmoid(x)
17 |         x = x.repeat(1, 1, inputs.size(2), inputs.size(3))
18 |         return inputs * x


--------------------------------------------------------------------------------
/custom_layers/scale_layer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn.parameter import Parameter
 3 | import torch.nn.init as init
 4 | 
 5 | class ScaleLayer(torch.nn.Module):
 6 | 
 7 |     def __init__(self, num_features, use_bias=True):
 8 |         super(ScaleLayer, self).__init__()
 9 |         self.weight = Parameter(torch.Tensor(num_features))
10 |         init.ones_(self.weight)
11 |         self.num_features = num_features
12 | 
13 |         if use_bias:
14 |             self.bias = Parameter(torch.Tensor(num_features))
15 |             init.zeros_(self.bias)
16 |         else:
17 |             self.bias = None
18 | 
19 | 
20 |     def forward(self, inputs):
21 |         if self.bias is None:
22 |             return inputs * self.weight.view(1, self.num_features, 1, 1)
23 |         else:
24 |             return inputs * self.weight.view(1, self.num_features, 1, 1) + self.bias


--------------------------------------------------------------------------------
/nobn_builder.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from builder import ConvBuilder
 3 | 
 4 | class NoBNBuilder(ConvBuilder):
 5 | 
 6 |     def __init__(self, base_config):
 7 |         super(NoBNBuilder, self).__init__(base_config=base_config)
 8 |         print('NoBN ConvBuilder initialized.')
 9 | 
10 |     def Conv2dBN(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', use_original_conv=False):
11 |         conv_layer = self.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
12 |                          stride=stride, padding=padding, dilation=dilation, groups=groups,
13 |                                  bias=True, padding_mode=padding_mode, use_original_conv=use_original_conv)
14 |         se = self.Sequential()
15 |         se.add_module('conv', conv_layer)
16 |         return se
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import coloredlogs
 3 | import os
 4 | 
 5 | 
 6 | def get_logger(name='', save_dir=None, distributed_rank=0, filename="log.txt"):
 7 |     logger = logging.getLogger(name)
 8 |     coloredlogs.install(level='DEBUG', logger=logger)
 9 |     # logger.setLevel(logging.DEBUG)
10 |     # don't log results for the non-master process
11 |     if distributed_rank > 0:
12 |         return logger
13 |     formatter = logging.Formatter(
14 |         "%(asctime)s %(name)s %(levelname)s: %(message)s")
15 | 
16 |     # ch = logging.StreamHandler(stream=sys.stdout)
17 |     # ch.setLevel(logging.DEBUG)
18 |     # ch.setFormatter(formatter)
19 |     # logger.addHandler(ch)
20 | 
21 |     if save_dir:
22 |         fh = logging.FileHandler(os.path.join(save_dir, filename))
23 |         fh.setLevel(logging.DEBUG)
24 |         fh.setFormatter(formatter)
25 |         if len(logger.handlers) > 0:
26 |             logger.removeHandler(logger.handlers[0])
27 |         logger.addHandler(fh)
28 | 
29 |     return logger
30 | 
31 | 


--------------------------------------------------------------------------------
/custom_layers/crop_layer.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | class CropLayer(nn.Module):
 4 | 
 5 |     #   E.g., (-1, 0) means this layer should crop the first and last rows of the feature map. And (0, -1) crops the first and last columns
 6 |     def __init__(self, crop_set):
 7 |         super(CropLayer, self).__init__()
 8 |         self.rows_to_crop = - crop_set[0]
 9 |         self.cols_to_crop = - crop_set[1]
10 |         assert self.rows_to_crop >= 0
11 |         assert self.cols_to_crop >= 0
12 | 
13 |     def forward(self, input):
14 |         if self.rows_to_crop == 0 and self.cols_to_crop == 0:
15 |             return input
16 |         elif self.rows_to_crop > 0 and self.cols_to_crop == 0:
17 |             return input[:, :, self.rows_to_crop:-self.rows_to_crop, :]
18 |         elif self.rows_to_crop == 0 and self.cols_to_crop > 0:
19 |             return input[:, :, :, self.cols_to_crop:-self.cols_to_crop]
20 |         else:
21 |             return input[:, :, self.rows_to_crop:-self.rows_to_crop, self.cols_to_crop:-self.cols_to_crop]


--------------------------------------------------------------------------------
/deprecated/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import coloredlogs
 3 | import os
 4 | 
 5 | 
 6 | def get_logger(name='', save_dir=None, distributed_rank=0, filename="log.txt"):
 7 |     logger = logging.getLogger(name)
 8 |     coloredlogs.install(level='DEBUG', logger=logger)
 9 |     # logger.setLevel(logging.DEBUG)
10 |     # don't log results for the non-master process
11 |     if distributed_rank > 0:
12 |         return logger
13 |     formatter = logging.Formatter(
14 |         "%(asctime)s %(name)s %(levelname)s: %(message)s")
15 | 
16 |     # ch = logging.StreamHandler(stream=sys.stdout)
17 |     # ch.setLevel(logging.DEBUG)
18 |     # ch.setFormatter(formatter)
19 |     # logger.addHandler(ch)
20 | 
21 |     if save_dir:
22 |         fh = logging.FileHandler(os.path.join(save_dir, filename))
23 |         fh.setLevel(logging.DEBUG)
24 |         fh.setFormatter(formatter)
25 |         if len(logger.handlers) > 0:
26 |             logger.removeHandler(logger.handlers[0])
27 |         logger.addHandler(fh)
28 | 
29 |     return logger
30 | 
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Ding Xiaohan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/data/dataset_util.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data as data
 2 | import torch
 3 | 
 4 | class DataIterator(object):
 5 | 
 6 |     def __init__(self, dataloader):
 7 |         self.dataloader = dataloader
 8 |         self.iterator = enumerate(self.dataloader)
 9 | 
10 |     def __next__(self):
11 |         try:
12 |             _, data = next(self.iterator)
13 |         except Exception:
14 |             self.iterator = enumerate(self.dataloader)
15 |             _, data = next(self.iterator)
16 |         return data[0], data[1]
17 | 
18 | 
19 | class InfiniteDataLoader(torch.utils.data.DataLoader):
20 |     def __init__(self, *args, **kwargs):
21 |         super().__init__(*args, **kwargs)
22 |         # Initialize an iterator over the dataset.
23 |         self.dataset_iterator = super().__iter__()
24 | 
25 |     def __iter__(self):
26 |         return self
27 | 
28 |     def __next__(self):
29 |         try:
30 |             batch = next(self.dataset_iterator)
31 |         except StopIteration:
32 |             # Dataset exhausted, use a new fresh iterator.
33 |             self.dataset_iterator = super().__iter__()
34 |             batch = next(self.dataset_iterator)
35 |         return batch
36 | 


--------------------------------------------------------------------------------
/base_model/lenet5.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from builder import ConvBuilder
 3 | 
 4 | 
 5 | class LeNet5BN(nn.Module):
 6 | 
 7 |     def __init__(self, builder:ConvBuilder, deps):
 8 |         super(LeNet5BN, self).__init__()
 9 |         self.bd = builder
10 |         stem = builder.Sequential()
11 |         stem.add_module('conv1', builder.Conv2dBNReLU(in_channels=1, out_channels=deps[0], kernel_size=5))
12 |         stem.add_module('maxpool1', builder.Maxpool2d(kernel_size=2))
13 |         stem.add_module('conv2', builder.Conv2dBNReLU(in_channels=deps[0], out_channels=deps[1], kernel_size=5))
14 |         stem.add_module('maxpool2', builder.Maxpool2d(kernel_size=2))
15 |         self.stem = stem
16 |         self.flatten = builder.Flatten()
17 |         self.linear1 = builder.IntermediateLinear(in_features=deps[1] * 16, out_features=500)
18 |         self.relu1 = builder.ReLU()
19 |         self.linear2 = builder.Linear(in_features=500, out_features=10)
20 | 
21 |     def forward(self, x):
22 |         out = self.stem(x)
23 |         out = self.flatten(out)
24 |         out = self.linear1(out)
25 |         out = self.relu1(out)
26 |         out = self.linear2(out)
27 |         return out
28 | 
29 | 
30 | def create_lenet5bn(cfg, builder):
31 |     return LeNet5BN(builder=builder, deps=cfg.deps)
32 | 


--------------------------------------------------------------------------------
/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | import time
 5 | import datetime
 6 | 
 7 | 
 8 | class Timer(object):
 9 |     def __init__(self):
10 |         self.reset()
11 | 
12 |     @property
13 |     def average_time(self):
14 |         return self.total_time / self.calls if self.calls > 0 else 0.0
15 | 
16 |     def tic(self):
17 |         # using time.time instead of time.clock because time time.clock
18 |         # does not normalize for multithreading
19 |         self.start_time = time.time()
20 | 
21 |     def toc(self, average=True):
22 |         self.add(time.time() - self.start_time)
23 |         if average:
24 |             return self.average_time
25 |         else:
26 |             return self.diff
27 | 
28 |     def add(self, time_diff):
29 |         self.diff = time_diff
30 |         self.total_time += self.diff
31 |         self.calls += 1
32 | 
33 |     def reset(self):
34 |         self.total_time = 0.0
35 |         self.calls = 0
36 |         self.start_time = 0.0
37 |         self.diff = 0.0
38 | 
39 |     def avg_time_str(self):
40 |         time_str = str(datetime.timedelta(seconds=self.average_time))
41 |         return time_str
42 | 
43 | 
44 | def get_time_str(time_diff):
45 |     time_str = str(datetime.timedelta(seconds=time_diff))
46 |     return time_str
47 | 


--------------------------------------------------------------------------------
/deprecated/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | import time
 5 | import datetime
 6 | 
 7 | 
 8 | class Timer(object):
 9 |     def __init__(self):
10 |         self.reset()
11 | 
12 |     @property
13 |     def average_time(self):
14 |         return self.total_time / self.calls if self.calls > 0 else 0.0
15 | 
16 |     def tic(self):
17 |         # using time.time instead of time.clock because time time.clock
18 |         # does not normalize for multithreading
19 |         self.start_time = time.time()
20 | 
21 |     def toc(self, average=True):
22 |         self.add(time.time() - self.start_time)
23 |         if average:
24 |             return self.average_time
25 |         else:
26 |             return self.diff
27 | 
28 |     def add(self, time_diff):
29 |         self.diff = time_diff
30 |         self.total_time += self.diff
31 |         self.calls += 1
32 | 
33 |     def reset(self):
34 |         self.total_time = 0.0
35 |         self.calls = 0
36 |         self.start_time = 0.0
37 |         self.diff = 0.0
38 | 
39 |     def avg_time_str(self):
40 |         time_str = str(datetime.timedelta(seconds=self.average_time))
41 |         return time_str
42 | 
43 | 
44 | def get_time_str(time_diff):
45 |     time_str = str(datetime.timedelta(seconds=time_diff))
46 |     return time_str
47 | 


--------------------------------------------------------------------------------
/deprecated/base_model/lenet5.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from builder import ConvBuilder
 3 | 
 4 | LENET5_DEPS = [20, 50, 500]
 5 | 
 6 | class LeNet5(nn.Module):
 7 | 
 8 |     def __init__(self, builder:ConvBuilder, deps):
 9 |         super(LeNet5, self).__init__()
10 |         self.bd = builder
11 |         stem = builder.Sequential()
12 |         stem.add_module('conv1', builder.Conv2d(in_channels=1, out_channels=LENET5_DEPS[0], kernel_size=5, bias=True))
13 |         stem.add_module('maxpool1', builder.Maxpool2d(kernel_size=2))
14 |         stem.add_module('conv2', builder.Conv2d(in_channels=LENET5_DEPS[0], out_channels=LENET5_DEPS[1], kernel_size=5, bias=True))
15 |         stem.add_module('maxpool2', builder.Maxpool2d(kernel_size=2))
16 |         self.stem = stem
17 |         self.flatten = builder.Flatten()
18 |         self.linear1 = builder.Linear(in_features=LENET5_DEPS[1] * 16, out_features=LENET5_DEPS[2])
19 |         self.relu1 = builder.ReLU()
20 |         self.linear2 = builder.Linear(in_features=LENET5_DEPS[2], out_features=10)
21 | 
22 |     def forward(self, x):
23 |         out = self.stem(x)
24 |         # print(out.size())
25 |         out = self.flatten(out)
26 |         out = self.linear1(out)
27 |         out = self.relu1(out)
28 |         out = self.linear2(out)
29 |         return out
30 | 
31 | 
32 | def create_lenet5(cfg, builder):
33 |     return LeNet5(builder=builder, deps=cfg.deps)
34 | 


--------------------------------------------------------------------------------
/model_map.py:
--------------------------------------------------------------------------------
 1 | from base_model.mobilenetv1 import *
 2 | from base_model.stagewise_resnet import *
 3 | from base_model.vgg import *
 4 | from base_model.lenet5 import create_lenet5bn
 5 | from base_model.wrn import create_wrnc16plain
 6 | from base_model.resnet import create_ResNet18, create_ResNet34
 7 | from base_model.cfqk import create_CFQKBNC
 8 | 
 9 | IMAGENET_STANDARD_MODEL_MAP = {
10 |     'sres50': create_SResNet50,
11 |     'smi1': create_MobileV1Imagenet,
12 |     'sres18': create_ResNet18,
13 |     'sres34': create_ResNet34
14 | }
15 | 
16 | CIFAR10_MODEL_MAP = {
17 |     'src56':create_SRC56,
18 |     'src110':create_SRC110,
19 |     'vc':create_vc,
20 |     'wrnc16plain':create_wrnc16plain,
21 |     'cfqkbnc':create_CFQKBNC
22 | }
23 | 
24 | MNIST_MODEL_MAP = {
25 |     'lenet5bn': create_lenet5bn,
26 | }
27 | 
28 | DATASET_TO_MODEL_MAP = {
29 |     'imagenet_standard': IMAGENET_STANDARD_MODEL_MAP,
30 |     'cifar10': CIFAR10_MODEL_MAP,
31 |     'mnist': MNIST_MODEL_MAP
32 | }
33 | 
34 | 
35 | #   return the model creation function
36 | def get_model_fn(dataset_name, model_name):
37 |     # print(DATASET_TO_MODEL_MAP[dataset_name.replace('_blank', '_standard')].keys())
38 |     return DATASET_TO_MODEL_MAP[dataset_name.replace('_blank', '_standard')][model_name]
39 | 
40 | def get_dataset_name_by_model_name(model_name):
41 |     for dataset_name, model_map in DATASET_TO_MODEL_MAP.items():
42 |         if model_name in model_map:
43 |             return dataset_name
44 |     return None


--------------------------------------------------------------------------------
/deprecated/show_log.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import re
 3 | import numpy as np
 4 | 
 5 | top1_pattern = re.compile('top1=(\-*\d+(?:\.\d+)?)')
 6 | top5_pattern = re.compile('top5=(\-*\d+(?:\.\d+)?)')
 7 | loss_pattern = re.compile('loss=(\-*\d+(?:\.\d+)?)')
 8 | 
 9 | 
10 | def get_value_by_pattern(pattern, line):
11 |     return float(re.findall(pattern, line)[0])
12 | 
13 | def parse_top1_top5_loss_from_log_line(log_line):
14 |     top1 = get_value_by_pattern(top1_pattern, log_line)
15 |     top5 = get_value_by_pattern(top5_pattern, log_line)
16 |     loss = get_value_by_pattern(loss_pattern, log_line)
17 |     return top1, top5, loss
18 | 
19 | 
20 | root_dir = 'acnet_exps'
21 | num_logs = 10
22 | 
23 | log_files = glob.glob('{}/*/log.txt'.format(root_dir))
24 | 
25 | 
26 | 
27 | for file_path in log_files:
28 |     top1_list = []
29 |     top5_list = []
30 |     loss_list = []
31 |     with open(file_path, 'r') as f:
32 |         origin_lines = f.readlines()
33 |         log_lines = [l for l in origin_lines if 'top1' in l]
34 |         last_lines = log_lines[-num_logs:]
35 |     for l in last_lines:
36 |         top1, top5, loss = parse_top1_top5_loss_from_log_line(l)
37 |         top1_list.append(top1)
38 |         top5_list.append(top5)
39 |         loss_list.append(loss)
40 |     network_try_arg = file_path.split('/')[1].replace('_train', '')
41 |     print('{}, \t top1={:.3f}, \t top5={:.3f}, \t loss={:.5f}, \t {} logs'.format(network_try_arg, np.mean(top1_list), np.mean(top5_list), np.mean(loss_list), len(top1_list)))
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/base_model/cfqk.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from builder import ConvBuilder
 3 | from constants import CFQK_ORIGIN_DEPS
 4 | 
 5 | 
 6 | class CFQKBN(nn.Module):
 7 | 
 8 |     def __init__(self, num_classes, builder:ConvBuilder, deps=None):
 9 |         super(CFQKBN, self).__init__()
10 |         if deps is None:
11 |             deps = CFQK_ORIGIN_DEPS
12 |         self.bd = builder
13 |         self.conv1 = self.bd.Conv2dBNReLU(in_channels=3, out_channels=deps[0], kernel_size=5, stride=1, padding=2)
14 |         self.conv2 = self.bd.Conv2dBNReLU(in_channels=deps[0], out_channels=deps[1], kernel_size=5, stride=1, padding=2)
15 |         self.conv3 = self.bd.Conv2dBNReLU(in_channels=deps[1], out_channels=deps[2], kernel_size=5, stride=1, padding=2)
16 |         self.fc1 = self.bd.Linear(in_features=3*3*deps[2], out_features=64)
17 |         self.fc2 = self.bd.Linear(in_features=64, out_features=num_classes)
18 | 
19 |     def forward(self, x):
20 |         x = self.conv1(x)   #   32
21 |         x = self.bd.max_pool2d(x, kernel_size=3, stride=2, padding=0)   #15
22 |         x = self.conv2(x)
23 |         x = self.bd.avg_pool2d(x, kernel_size=3, stride=2, padding=0)   #7
24 |         x = self.conv3(x)
25 |         x = self.bd.avg_pool2d(x, kernel_size=3, stride=2, padding=0)   #3
26 |         x = self.bd.flatten(x)
27 |         x = self.fc1(x)
28 |         x = self.bd.relu(x)
29 |         x = self.fc2(x)
30 |         return x
31 | 
32 | def create_CFQKBNC(cfg, builder):
33 |     return CFQKBN(num_classes=10, builder=builder, deps=cfg.deps)
34 | 
35 | def create_CFQKBNH(cfg, builder):
36 |     return CFQKBN(num_classes=100, builder=builder, deps=cfg.deps)


--------------------------------------------------------------------------------
/deprecated/base_model/cfqk.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from builder import ConvBuilder
 3 | from constants import CFQK_ORIGIN_DEPS
 4 | 
 5 | 
 6 | class CFQKBN(nn.Module):
 7 | 
 8 |     def __init__(self, num_classes, builder:ConvBuilder, deps=None):
 9 |         super(CFQKBN, self).__init__()
10 |         if deps is None:
11 |             deps = CFQK_ORIGIN_DEPS
12 |         self.bd = builder
13 |         self.conv1 = self.bd.Conv2dBNReLU(in_channels=3, out_channels=deps[0], kernel_size=5, stride=1, padding=2)
14 |         self.conv2 = self.bd.Conv2dBNReLU(in_channels=deps[0], out_channels=deps[1], kernel_size=5, stride=1, padding=2)
15 |         self.conv3 = self.bd.Conv2dBNReLU(in_channels=deps[1], out_channels=deps[2], kernel_size=5, stride=1, padding=2)
16 |         self.fc1 = self.bd.Linear(in_features=3*3*deps[2], out_features=64)
17 |         self.fc2 = self.bd.Linear(in_features=64, out_features=num_classes)
18 | 
19 |     def forward(self, x):
20 |         x = self.conv1(x)   #   32
21 |         x = self.bd.max_pool2d(x, kernel_size=3, stride=2, padding=0)   #15
22 |         x = self.conv2(x)
23 |         x = self.bd.avg_pool2d(x, kernel_size=3, stride=2, padding=0)   #7
24 |         x = self.conv3(x)
25 |         x = self.bd.avg_pool2d(x, kernel_size=3, stride=2, padding=0)   #3
26 |         x = self.bd.flatten(x)
27 |         x = self.fc1(x)
28 |         x = self.bd.relu(x)
29 |         x = self.fc2(x)
30 |         return x
31 | 
32 | def create_CFQKBNC(cfg, builder):
33 |     return CFQKBN(num_classes=10, builder=builder, deps=cfg.deps)
34 | 
35 | def create_CFQKBNH(cfg, builder):
36 |     return CFQKBN(num_classes=100, builder=builder, deps=cfg.deps)


--------------------------------------------------------------------------------
/deprecated/display_hdf5.py:
--------------------------------------------------------------------------------
 1 | from utils.misc import read_hdf5
 2 | import sys
 3 | import numpy as np
 4 | 
 5 | di = read_hdf5(sys.argv[1])
 6 | num_kernel_params = 0
 7 | 
 8 | conv_kernel_cnt = 0
 9 | matrix_param_cnt = 0
10 | vec_param_cnt = 0
11 | 
12 | bias_cnt = 0
13 | beta_cnt = 0
14 | gamma_cnt = 0
15 | mu_cnt = 0
16 | var_cnt = 0
17 | 
18 | for name, array in di.items():
19 |     if array.ndim in [2, 4]:
20 |         num_kernel_params += array.size
21 |     print(name, array.shape, np.mean(array), np.std(array), ' positive {}, negative {}, zeros {}'.format(np.sum(array > 0), np.sum(array < 0), np.sum(array == 0)))
22 |     if 'res' in name:
23 |         print(array[:4, :4])
24 |     elif 'diag' in name:
25 |         print(array)
26 |     if array.ndim == 2:
27 |         matrix_param_cnt += array.size
28 |     elif array.ndim == 1:
29 |         vec_param_cnt += array.size
30 |     elif array.ndim == 4:
31 |         conv_kernel_cnt += array.size
32 |     if 'running_mean' in name or 'moving_mean' in name:
33 |         mu_cnt += array.size
34 |     elif 'running_var' in name or 'moving_var' in name:
35 |         var_cnt += array.size
36 |     elif ('weight' in name and 'bn' in name.lower()) or 'gamma' in name:
37 |         gamma_cnt += array.size
38 |     elif ('bias' in name and 'bn' in name.lower()) or 'beta' in name:
39 |         beta_cnt += array.size
40 |     elif 'bias' in name:
41 |         bias_cnt += array.size
42 | 
43 |     # if 'resmat' in name:
44 |     #     print(np.transpose(array).dot(array))
45 |     #     exit()
46 | print('number of kernel params: ', num_kernel_params)
47 | print('vec {}, matrix {}, conv {}, total {}'.format(vec_param_cnt, matrix_param_cnt, conv_kernel_cnt,
48 |                                                     vec_param_cnt + matrix_param_cnt + conv_kernel_cnt))
49 | print('mu {}, var {}, gamma {}, beta {}, bias {}'.format(mu_cnt, var_cnt, gamma_cnt, beta_cnt, bias_cnt))
50 | 


--------------------------------------------------------------------------------
/display_hdf5.py:
--------------------------------------------------------------------------------
 1 | from utils.misc import read_hdf5
 2 | import sys
 3 | import numpy as np
 4 | 
 5 | di = read_hdf5(sys.argv[1])
 6 | num_kernel_params = 0
 7 | 
 8 | conv_kernel_cnt = 0
 9 | matrix_param_cnt = 0
10 | vec_param_cnt = 0
11 | 
12 | bias_cnt = 0
13 | beta_cnt = 0
14 | gamma_cnt = 0
15 | mu_cnt = 0
16 | var_cnt = 0
17 | 
18 | for name, array in di.items():
19 |     if array.ndim in [2, 4]:
20 |         num_kernel_params += array.size
21 | 
22 |     if 'base_mask' in name:
23 |         print(name, array)
24 | 
25 |     print(name, array.shape, np.mean(array), np.std(array),
26 |           ' positive {}, negative {}, zeros {}, near-zero {}'.format(np.sum(array > 0), np.sum(array < 0), np.sum(array == 0),
27 |                                                                      np.sum(np.abs(array) <= 1e-5)))
28 | 
29 |     if array.ndim == 2:
30 |         matrix_param_cnt += array.size
31 |     elif array.ndim == 1:
32 |         vec_param_cnt += array.size
33 |     elif array.ndim == 4:
34 |         conv_kernel_cnt += array.size
35 |     if 'running_mean' in name or 'moving_mean' in name:
36 |         mu_cnt += array.size
37 |     elif 'running_var' in name or 'moving_var' in name:
38 |         var_cnt += array.size
39 |     elif ('weight' in name and 'bn' in name.lower()) or 'gamma' in name:
40 |         gamma_cnt += array.size
41 |     elif ('bias' in name and 'bn' in name.lower()) or 'beta' in name:
42 |         beta_cnt += array.size
43 |     elif 'bias' in name:
44 |         bias_cnt += array.size
45 |     elif 'spatial_mask' in name:
46 |         print(array)
47 |         print(np.sum(array))
48 | 
49 | print('number of kernel params: ', num_kernel_params)
50 | print('vec {}, matrix {}, conv {}, total {}'.format(vec_param_cnt, matrix_param_cnt, conv_kernel_cnt,
51 |                                                     vec_param_cnt + matrix_param_cnt + conv_kernel_cnt))
52 | print('mu {}, var {}, gamma {}, beta {}, bias {}'.format(mu_cnt, var_cnt, gamma_cnt, beta_cnt, bias_cnt))
53 | 


--------------------------------------------------------------------------------
/deprecated/model_map.py:
--------------------------------------------------------------------------------
 1 | from base_model.resnet import *
 2 | from base_model.cfqk import *
 3 | from base_model.wrn import *
 4 | from base_model.mobilenetv1 import *
 5 | from base_model.lenet5 import create_lenet5
 6 | from base_model.vgg import create_vc, create_vh
 7 | 
 8 | IMAGENET_MODEL_MAP = {
 9 |     'resnet18':create_ResNet18,
10 |     'resnet34':create_ResNet34,
11 |     'resnet50':create_ResNet50,
12 |     'resnet101':create_ResNet101,
13 |     'resnet152':create_ResNet152,
14 | }
15 | 
16 | 
17 | CIFAR10_MODEL_MAP = {
18 |     'rc56':create_RC56,
19 |     'rc110':create_RC110,
20 |     'rc164':create_RC164,
21 | 
22 |     'cfqkbnc':create_CFQKBNC,
23 | 
24 |     'wrnc16plain':create_wrnc16plain,
25 |     'wrnc16drop':create_wrnc16drop,
26 |     'wrnc28plain':create_wrnc28plain,
27 |     'wrnc28drop':create_wrnc28drop,
28 |     'wrnc40plain':create_wrnc40plain,
29 |     'wrnc40drop':create_wrnc40drop,
30 | 
31 |     'mc1':create_MobileV1Cifar,
32 |     'vc': create_vc
33 | 
34 | }
35 | 
36 | CH_MODEL_MAP = {
37 |     'rh56': create_RH56,
38 |     'rh110': create_RH110,
39 |     'rh164': create_RH164,
40 | 
41 |     'cfqkbnh':create_CFQKBNH,
42 | 
43 |     'wrnh16plain':create_wrnh16plain,
44 |     'wrnh16drop':create_wrnh16drop,
45 |     'wrnh28plain':create_wrnh28plain,
46 |     'wrnh28drop':create_wrnh28drop,
47 |     'wrnh40plain':create_wrnh40plain,
48 |     'wrnh40drop':create_wrnh40drop,
49 | 
50 |     'mh1':create_MobileV1CH,
51 | 
52 |     'vh':create_vh
53 | }
54 | 
55 | MNIST_MODEL_MAP = {
56 |     'lenet5': create_lenet5
57 | }
58 | 
59 | SVHN_MODEL_MAP = {
60 | 
61 | }
62 | 
63 | DATASET_TO_MODEL_MAP = {
64 |     'imagenet': IMAGENET_MODEL_MAP,
65 |     'cifar10': CIFAR10_MODEL_MAP,
66 |     'ch': CH_MODEL_MAP,           #ch for cifar-100
67 |     'svhn': SVHN_MODEL_MAP,
68 |     'mnist': MNIST_MODEL_MAP
69 | }
70 | 
71 | 
72 | #   return the model creation function
73 | def get_model_fn(dataset_name, model_name):
74 |     return DATASET_TO_MODEL_MAP[dataset_name][model_name]
75 | 
76 | def get_dataset_name_by_model_name(model_name):
77 |     for dataset_name, model_map in DATASET_TO_MODEL_MAP.items():
78 |         if model_name in model_map:
79 |             return dataset_name
80 |     return None
81 | 


--------------------------------------------------------------------------------
/deprecated/acnet/acnet_rc56.py:
--------------------------------------------------------------------------------
 1 | from ding_train import ding_train
 2 | from base_config import get_baseconfig_by_epoch
 3 | from utils.misc import start_exp
 4 | from constants import parse_usual_lr_schedule
 5 | 
 6 | def acnet_rc56():
 7 |     try_arg = start_exp()
 8 | 
 9 |     network_type = 'rc56'
10 |     dataset_name = 'cifar10'
11 |     log_dir = 'acnet_exps/{}_{}_train'.format(network_type, try_arg)
12 |     save_weights = 'acnet_exps/{}_{}_savedweights.pth'.format(network_type, try_arg)
13 |     weight_decay_strength = 1e-4
14 |     batch_size = 64
15 | 
16 |     lrs = parse_usual_lr_schedule(try_arg)
17 | 
18 |     if 'bias' in try_arg:
19 |         weight_decay_bias = weight_decay_strength
20 |     else:
21 |         weight_decay_bias = 0
22 | 
23 |     if 'warmup' in try_arg:
24 |         warmup_factor = 0
25 |     else:
26 |         warmup_factor = 1
27 | 
28 |     config = get_baseconfig_by_epoch(network_type=network_type, dataset_name=dataset_name, dataset_subset='train',
29 |                                      global_batch_size=batch_size, num_node=1,
30 |                                      weight_decay=weight_decay_strength, optimizer_type='sgd', momentum=0.9,
31 |                                      max_epochs=lrs.max_epochs, base_lr=lrs.base_lr, lr_epoch_boundaries=lrs.lr_epoch_boundaries,
32 |                                      lr_decay_factor=lrs.lr_decay_factor,
33 |                                      warmup_epochs=5, warmup_method='linear', warmup_factor=warmup_factor,
34 |                                      ckpt_iter_period=20000, tb_iter_period=100, output_dir=log_dir,
35 |                                      tb_dir=log_dir, save_weights=save_weights, val_epoch_period=2, linear_final_lr=lrs.linear_final_lr,
36 |                                      weight_decay_bias=weight_decay_bias)
37 | 
38 |     if 'normal' in try_arg:
39 |         builder = None
40 |     elif 'acnet' in try_arg:
41 |         from acnet.acnet_builder import ACNetBuilder
42 |         builder = ACNetBuilder(base_config=config, deploy=False)
43 |     else:
44 |         assert False
45 | 
46 |     ding_train(config, show_variables=True, convbuilder=builder, use_nesterov='nest' in try_arg)
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     acnet_rc56()


--------------------------------------------------------------------------------
/deprecated/acnet/acnet_cfqkbnc.py:
--------------------------------------------------------------------------------
 1 | from ding_train import ding_train
 2 | from base_config import get_baseconfig_by_epoch
 3 | from utils.misc import start_exp
 4 | from constants import parse_usual_lr_schedule
 5 | 
 6 | def acnet_cfqkbnc():
 7 |     try_arg = start_exp()
 8 | 
 9 |     network_type = 'cfqkbnc'
10 |     dataset_name = 'cifar10'
11 |     log_dir = 'acnet_exps/{}_{}_train'.format(network_type, try_arg)
12 |     save_weights = 'acnet_exps/{}_{}_savedweights.pth'.format(network_type, try_arg)
13 |     weight_decay_strength = 1e-4
14 |     batch_size = 64
15 | 
16 |     lrs = parse_usual_lr_schedule(try_arg)
17 | 
18 |     if 'bias' in try_arg:
19 |         weight_decay_bias = weight_decay_strength
20 |     else:
21 |         weight_decay_bias = 0
22 | 
23 |     if 'warmup' in try_arg:
24 |         warmup_factor = 0
25 |     else:
26 |         warmup_factor = 1
27 | 
28 |     config = get_baseconfig_by_epoch(network_type=network_type, dataset_name=dataset_name, dataset_subset='train',
29 |                                      global_batch_size=batch_size, num_node=1,
30 |                                      weight_decay=weight_decay_strength, optimizer_type='sgd', momentum=0.9,
31 |                                      max_epochs=lrs.max_epochs, base_lr=lrs.base_lr, lr_epoch_boundaries=lrs.lr_epoch_boundaries,
32 |                                      lr_decay_factor=lrs.lr_decay_factor,
33 |                                      warmup_epochs=5, warmup_method='linear', warmup_factor=warmup_factor,
34 |                                      ckpt_iter_period=20000, tb_iter_period=100, output_dir=log_dir,
35 |                                      tb_dir=log_dir, save_weights=save_weights, val_epoch_period=2, linear_final_lr=lrs.linear_final_lr,
36 |                                      weight_decay_bias=weight_decay_bias)
37 | 
38 |     if 'normal' in try_arg:
39 |         builder = None
40 |     elif 'acnet' in try_arg:
41 |         from acnet.acnet_builder import ACNetBuilder
42 |         builder = ACNetBuilder(base_config=config, deploy=False)
43 |     else:
44 |         assert False
45 | 
46 |     ding_train(config, show_variables=True, convbuilder=builder, use_nesterov='nest' in try_arg)
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     acnet_cfqkbnc()


--------------------------------------------------------------------------------
/deprecated/acnet/acnet_vc.py:
--------------------------------------------------------------------------------
 1 | from ding_train import ding_train
 2 | from base_config import get_baseconfig_by_epoch
 3 | from utils.misc import start_exp
 4 | from constants import VGG_ORIGIN_DEPS, parse_usual_lr_schedule
 5 | 
 6 | def acnet_vc():
 7 |     try_arg = start_exp()
 8 | 
 9 |     network_type = 'vc'
10 |     dataset_name = 'cifar10'
11 |     log_dir = 'acnet_exps/{}_{}_train'.format(network_type, try_arg)
12 |     save_weights = 'acnet_exps/{}_{}_savedweights.pth'.format(network_type, try_arg)
13 |     weight_decay_strength = 1e-4
14 |     batch_size = 64
15 |     deps = VGG_ORIGIN_DEPS
16 | 
17 |     lrs = parse_usual_lr_schedule(try_arg)
18 | 
19 |     if 'bias' in try_arg:
20 |         weight_decay_bias = weight_decay_strength
21 |     else:
22 |         weight_decay_bias = 0
23 | 
24 | 
25 | 
26 |     if 'warmup' in try_arg:
27 |         warmup_factor = 0
28 |     else:
29 |         warmup_factor = 1
30 | 
31 |     config = get_baseconfig_by_epoch(network_type=network_type, dataset_name=dataset_name, dataset_subset='train',
32 |                                      global_batch_size=batch_size, num_node=1,
33 |                                      weight_decay=weight_decay_strength, optimizer_type='sgd', momentum=0.9,
34 |                                      max_epochs=lrs.max_epochs, base_lr=lrs.base_lr, lr_epoch_boundaries=lrs.lr_epoch_boundaries,
35 |                                      lr_decay_factor=lrs.lr_decay_factor,
36 |                                      warmup_epochs=5, warmup_method='linear', warmup_factor=warmup_factor,
37 |                                      ckpt_iter_period=20000, tb_iter_period=100, output_dir=log_dir,
38 |                                      tb_dir=log_dir, save_weights=save_weights, val_epoch_period=2, linear_final_lr=lrs.linear_final_lr,
39 |                                      weight_decay_bias=weight_decay_bias, deps=deps)
40 | 
41 |     if 'normal' in try_arg:
42 |         builder = None
43 |     elif 'acnet' in try_arg:
44 |         from acnet.acnet_builder import ACNetBuilder
45 |         builder = ACNetBuilder(base_config=config, deploy=False)
46 |     else:
47 |         assert False
48 | 
49 |     ding_train(config, show_variables=True, convbuilder=builder, use_nesterov='nest' in try_arg)
50 | 
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     acnet_vc()


--------------------------------------------------------------------------------
/deprecated/acnet/acnet_wrnc16.py:
--------------------------------------------------------------------------------
 1 | from ding_train import ding_train
 2 | from base_config import get_baseconfig_by_epoch
 3 | from utils.misc import start_exp
 4 | from constants import wrn_origin_deps_flattened, parse_usual_lr_schedule
 5 | 
 6 | def acnet_wrnc16():
 7 |     try_arg = start_exp()
 8 | 
 9 |     network_type = 'wrnc16plain'
10 |     dataset_name = 'cifar10'
11 |     log_dir = 'acnet_exps/{}_{}_train'.format(network_type, try_arg)
12 |     save_weights = 'acnet_exps/{}_{}_savedweights.pth'.format(network_type, try_arg)
13 |     weight_decay_strength = 5e-4
14 |     batch_size = 128
15 |     deps = wrn_origin_deps_flattened(2, 8)
16 | 
17 |     lrs = parse_usual_lr_schedule(try_arg)
18 | 
19 |     if 'bias' in try_arg:
20 |         weight_decay_bias = weight_decay_strength
21 |     else:
22 |         weight_decay_bias = 0
23 | 
24 |     if 'warmup' in try_arg:
25 |         warmup_factor = 0
26 |     else:
27 |         warmup_factor = 1
28 | 
29 |     config = get_baseconfig_by_epoch(network_type=network_type, dataset_name=dataset_name, dataset_subset='train',
30 |                                      global_batch_size=batch_size, num_node=1,
31 |                                      weight_decay=weight_decay_strength, optimizer_type='sgd', momentum=0.9,
32 |                                      max_epochs=lrs.max_epochs, base_lr=lrs.base_lr, lr_epoch_boundaries=lrs.lr_epoch_boundaries,
33 |                                      lr_decay_factor=lrs.lr_decay_factor,
34 |                                      warmup_epochs=5, warmup_method='linear', warmup_factor=warmup_factor,
35 |                                      ckpt_iter_period=20000, tb_iter_period=100, output_dir=log_dir,
36 |                                      tb_dir=log_dir, save_weights=save_weights, val_epoch_period=2, linear_final_lr=lrs.linear_final_lr,
37 |                                      weight_decay_bias=weight_decay_bias, deps=deps)
38 | 
39 |     if 'normal' in try_arg:
40 |         builder = None
41 |     elif 'acnet' in try_arg:
42 |         from acnet.acnet_builder import ACNetBuilder
43 |         builder = ACNetBuilder(base_config=config, deploy=False)
44 |     else:
45 |         assert False
46 | 
47 |     ding_train(config, show_variables=True, convbuilder=builder, use_nesterov='nest' in try_arg)
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     acnet_wrnc16()


--------------------------------------------------------------------------------
/deprecated/README.md:
--------------------------------------------------------------------------------
 1 | # ACNet
 2 | 
 3 | **These are deprecated.**
 4 | 
 5 | ## Example Usage
 6 | 
 7 | 1. Install PyTorch 1.1. Clone this repo and enter the directory. Modify PYTHONPATH or you will get an ImportError.
 8 | ```
 9 | export PYTHONPATH='WHERE_YOU_CLONED_THIS_REPO'
10 | ```
11 | 
12 | 2. Modify 'CIFAR10_PATH' in dataset.py to the directory of your CIFAR-10 dataset. If the dataset is not found in that directory, it will be automatically downloaded. 
13 | 
14 | 3. Train a Cifar-quick on CIFAR-10 without Asymmetric Convolution Blocks as baseline. (We use learning rate warmup and weight decay on bias parameters. They are not necessities but just preferences. Here 'lrs5' is a pre-defined learning rate schedule.) The model will be evaluated every two epochs.
15 | ```
16 | python acnet/acnet_cfqkbnc.py --try_arg=normal_lrs5_warmup_bias
17 | ```
18 | 
19 | 4. Train a Cifar-quick on CIFAR-10 with Asymmetric Convolution Blocks. The trained weights will be saved to acnet_exps/cfqkbnc_acnet_lrs5_warmup_bias_train/finish.hdf5. Note that Cifar-quick uses 5x5 convs, and we add 1x3 and 3x1 onto 5x5 kernels. Of course, 1x5 and 5x1 convs may work better.
20 | ```
21 | python acnet/acnet_cfqkbnc.py --try_arg=acnet_lrs5_warmup_bias
22 | ```
23 | 
24 | 4. Check the average accuracy of the two models in their last ten evaluations. You will see the gap.
25 | ```
26 | python show_log.py
27 | ```
28 | 
29 | 5. Build a Cifar-quick with the same structure as the baseline model, then convert the weights of the ACNet counterpart via BN fusion and branch fusion to initialize it. Test before and after the conversion. You will see identical results.
30 | ```
31 | python acnet/acnet_test.py cfqkbnc acnet_exps/cfqkbnc_acnet_lrs5_warmup_bias_train/finish.hdf5
32 | ```
33 | 
34 | 6. Check the name and shape of the converted weights.
35 | ```
36 | python display_hdf5.py acnet_exps/cfqkbnc_acnet_lrs5_warmup_bias_train/finish_deploy.hdf5
37 | ```
38 | 
39 | Other models:
40 | 
41 | VGG is deeper, so we train it for longer:
42 | ```
43 | python acnet/acnet_vc.py --try_arg=acnet_lrs3_warmup_bias
44 | ```
45 | ResNet-56:
46 | ```
47 | python acnet/acnet_rc56.py --try_arg=acnet_lrs3_warmup_bias
48 | ```
49 | WRN-16-8, we slightly lengthen the learning rate schedule recommended in the WRN paper:
50 | ```
51 | python acnet/acnet_wrnc16.py --try_arg=acnet_lrs6_warmup_bias
52 | ```
53 | 
54 | 


--------------------------------------------------------------------------------
/deprecated/base_model/mobilenetv1.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from builder import ConvBuilder
 3 | 
 4 | class MobileV1Block(nn.Module):
 5 |     '''Depthwise conv + Pointwise conv'''
 6 |     def __init__(self, builder:ConvBuilder, in_planes, out_planes, stride=1):
 7 |         super(MobileV1Block, self).__init__()
 8 |         self.conv1 = builder.Conv2dBNReLU(in_channels=in_planes, out_channels=in_planes, kernel_size=3,
 9 |                                           stride=stride, padding=1, groups=in_planes)
10 |         self.conv2 = builder.Conv2dBNReLU(in_channels=in_planes, out_channels=out_planes, kernel_size=1,
11 |                                           stride=1, padding=0)
12 | 
13 |     def forward(self, x):
14 |         out = self.conv1(x)
15 |         out = self.conv2(out)
16 |         return out
17 | 
18 | imagenet_cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
19 | # cifar_cfg = [16, (32,2), 32, (64,2), 64, (128,2), 128, 128, 128, 128, 128, (256,2), 256]      # 86%
20 | # cifar_cfg = [16, 32, 32, (64,2), 64, (128,2), 128, 128, 128, 128, 128, (256,2), 256]
21 | cifar_cfg = [16, 32, 32, 64, 64, (128,2), 128, 128, 128, 128, 128, (256,2), 256]    # 93
22 | 
23 | class MobileV1CifarNet(nn.Module):
24 | 
25 |     def __init__(self, builder:ConvBuilder, num_classes):
26 |         super(MobileV1CifarNet, self).__init__()
27 |         self.conv1 = builder.Conv2dBNReLU(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
28 |         blocks = []
29 |         in_planes = cifar_cfg[0]
30 |         for x in cifar_cfg:
31 |             out_planes = x if isinstance(x, int) else x[0]
32 |             stride = 1 if isinstance(x, int) else x[1]
33 |             blocks.append(MobileV1Block(builder=builder, in_planes=in_planes, out_planes=out_planes, stride=stride))
34 |             in_planes = out_planes
35 |         self.stem = builder.Sequential(*blocks)
36 |         self.linear = builder.Linear(cifar_cfg[-1], num_classes)
37 |         self.bd = builder
38 | 
39 | 
40 | 
41 |     def forward(self, x):
42 |         out = self.conv1(x)
43 |         out = self.stem(out)
44 |         out = self.bd.avg_pool2d(out, 8, stride=1, padding=0)
45 |         out = self.bd.flatten(out)
46 |         out = self.linear(out)
47 |         return out
48 | 
49 | def create_MobileV1Cifar(cfg, builder):
50 |     return MobileV1CifarNet(builder=builder, num_classes=10)
51 | def create_MobileV1CH(cfg, builder):
52 |     return MobileV1CifarNet(builder=builder, num_classes=100)


--------------------------------------------------------------------------------
/data/imagenet_data.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision.transforms as transforms
 3 | from data.dataset_util import DataIterator
 4 | import os
 5 | import torchvision.datasets as datasets
 6 | 
 7 | IMGNET_TRAIN_DIR = 'imagenet_data'
 8 | 
 9 | class ImgnetStdTrainData(object):
10 | 
11 |     def __init__(self, distributed, batch_size_per_gpu):
12 |         normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
13 |                                          std=[0.229, 0.224, 0.225])
14 |         self.train_dataset = datasets.ImageFolder(
15 |             os.path.join(IMGNET_TRAIN_DIR, 'train'),
16 |             transforms.Compose([
17 |                 transforms.RandomResizedCrop(224),
18 |                 transforms.RandomHorizontalFlip(),
19 |                 transforms.ToTensor(),
20 |                 normalize,
21 |             ]))
22 | 
23 |         if distributed:
24 |             self.train_sampler = torch.utils.data.distributed.DistributedSampler(self.train_dataset, shuffle=True)
25 |             shuffle = False
26 |         else:
27 |             self.train_sampler = None
28 |             shuffle = True
29 |         self.train_loader = torch.utils.data.DataLoader(
30 |                             self.train_dataset, batch_size=batch_size_per_gpu, sampler=self.train_sampler, shuffle=shuffle,
31 |                             num_workers=4, pin_memory=True, drop_last=True)
32 |         self.dataprovider = DataIterator(self.train_loader)
33 | 
34 | 
35 | class ImgnetStdValData(object):
36 |     def __init__(self, batch_size):
37 |         normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
38 |                                          std=[0.229, 0.224, 0.225])
39 | 
40 |         self.val_dataset =  datasets.ImageFolder(
41 |                                 os.path.join(IMGNET_TRAIN_DIR, 'val'),
42 |                                 transforms.Compose([
43 |                                     transforms.Resize(256),
44 |                                     transforms.CenterCrop(224),
45 |                                     transforms.ToTensor(),
46 |                                     normalize,
47 |                                     ]
48 |                                 )
49 |                            )
50 |         self.val_loader = torch.utils.data.DataLoader(
51 |                      self.val_dataset, batch_size=batch_size, shuffle=False,
52 |                      num_workers=4, pin_memory=True
53 |                     )
54 |         self.dataprovider = DataIterator(self.val_loader)


--------------------------------------------------------------------------------
/utils/pyt_utils.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | # Most of the util functions should has nothing to do with torch
 3 | 
 4 | import os
 5 | import sys
 6 | import time
 7 | import argparse
 8 | import errno
 9 | from collections import OrderedDict, defaultdict
10 | 
11 | 
12 | def extant_file(x):
13 |     """
14 |     'Type' for argparse - checks that file exists but does not open.
15 |     """
16 |     if not os.path.exists(x):
17 |         # Argparse uses the ArgumentTypeError to give a rejection message like:
18 |         # error: argument input: x does not exist
19 |         raise argparse.ArgumentTypeError("{0} does not exist".format(x))
20 |     return x
21 | 
22 | 
23 | def parse_torch_devices(input_devices):
24 |     """Parse user's devices input string to standard format for Torch.
25 |     e.g. [gpu0, gpu1, ...]
26 | 
27 |     """
28 |     import torch
29 |     print('we have {} torch devices'.format(torch.cuda.device_count()))
30 |     from .logger import get_logger
31 |     logger = get_logger()
32 | 
33 |     if input_devices.endswith('*'):
34 |         devices = list(range(torch.cuda.device_count()))
35 |         return devices
36 | 
37 |     devices = []
38 |     for d in input_devices.split(','):
39 |         if '-' in d:
40 |             start_device, end_device = d.split('-')[0], d.split('-')[1]
41 |             assert start_device != ''
42 |             assert end_device != ''
43 |             start_device, end_device = int(start_device), int(end_device)
44 |             assert start_device < end_device
45 |             assert end_device < torch.cuda.device_count()
46 |             for sd in range(start_device, end_device + 1):
47 |                 devices.append(sd)
48 |         else:
49 |             device = int(d)
50 |             assert device < torch.cuda.device_count()
51 |             devices.append(device)
52 | 
53 |     logger.info('using devices {}'.format(', '.join([str(d) for d in devices])))
54 | 
55 |     return devices
56 | 
57 | 
58 | def link_file(src, target):
59 |     """symbol link the source directorie to target
60 |     """
61 |     if os.path.isdir(target) or os.path.isfile(target):
62 |         os.remove(target)
63 |     os.system('ln -s {} {}'.format(src, target))
64 | 
65 | 
66 | def ensure_dir(path):
67 |     """create directories if *path* does not exist
68 |     """
69 |     try:
70 |         if not os.path.isdir(path):
71 |             os.makedirs(path)
72 |     except OSError as e:
73 |         if e.errno != errno.EEXIST:
74 |             raise
75 | 
76 | 
77 | # def mk_dir(path):
78 | #     try:
79 | #         os.makedirs(path)
80 | #     except OSError as e:
81 | #         if e.errno != errno.EEXIST:
82 | #             raise
83 | 


--------------------------------------------------------------------------------
/deprecated/utils/pyt_utils.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | # Most of the util functions should has nothing to do with torch
 3 | 
 4 | import os
 5 | import sys
 6 | import time
 7 | import argparse
 8 | import errno
 9 | from collections import OrderedDict, defaultdict
10 | 
11 | 
12 | def extant_file(x):
13 |     """
14 |     'Type' for argparse - checks that file exists but does not open.
15 |     """
16 |     if not os.path.exists(x):
17 |         # Argparse uses the ArgumentTypeError to give a rejection message like:
18 |         # error: argument input: x does not exist
19 |         raise argparse.ArgumentTypeError("{0} does not exist".format(x))
20 |     return x
21 | 
22 | 
23 | def parse_torch_devices(input_devices):
24 |     """Parse user's devices input string to standard format for Torch.
25 |     e.g. [gpu0, gpu1, ...]
26 | 
27 |     """
28 |     import torch
29 |     print('we have {} torch devices'.format(torch.cuda.device_count()))
30 |     from .logger import get_logger
31 |     logger = get_logger()
32 | 
33 |     if input_devices.endswith('*'):
34 |         devices = list(range(torch.cuda.device_count()))
35 |         return devices
36 | 
37 |     devices = []
38 |     for d in input_devices.split(','):
39 |         if '-' in d:
40 |             start_device, end_device = d.split('-')[0], d.split('-')[1]
41 |             assert start_device != ''
42 |             assert end_device != ''
43 |             start_device, end_device = int(start_device), int(end_device)
44 |             assert start_device < end_device
45 |             assert end_device < torch.cuda.device_count()
46 |             for sd in range(start_device, end_device + 1):
47 |                 devices.append(sd)
48 |         else:
49 |             device = int(d)
50 |             assert device < torch.cuda.device_count()
51 |             devices.append(device)
52 | 
53 |     logger.info('using devices {}'.format(', '.join([str(d) for d in devices])))
54 | 
55 |     return devices
56 | 
57 | 
58 | def link_file(src, target):
59 |     """symbol link the source directorie to target
60 |     """
61 |     if os.path.isdir(target) or os.path.isfile(target):
62 |         os.remove(target)
63 |     os.system('ln -s {} {}'.format(src, target))
64 | 
65 | 
66 | def ensure_dir(path):
67 |     """create directories if *path* does not exist
68 |     """
69 |     try:
70 |         if not os.path.isdir(path):
71 |             os.makedirs(path)
72 |     except OSError as e:
73 |         if e.errno != errno.EEXIST:
74 |             raise
75 | 
76 | 
77 | # def mk_dir(path):
78 | #     try:
79 | #         os.makedirs(path)
80 | #     except OSError as e:
81 | #         if e.errno != errno.EEXIST:
82 | #             raise
83 | 


--------------------------------------------------------------------------------
/deprecated/base_model/vgg.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from builder import ConvBuilder
 3 | 
 4 | class VCNet(nn.Module):
 5 | 
 6 |     def __init__(self, num_classes, builder:ConvBuilder, deps):
 7 |         super(VCNet, self).__init__()
 8 |         self.bd = builder
 9 |         sq = builder.Sequential()
10 |         sq.add_module('conv1', builder.Conv2dBNReLU(in_channels=3, out_channels=deps[0], kernel_size=3, stride=1, padding=1))
11 |         sq.add_module('conv2', builder.Conv2dBNReLU(in_channels=deps[0], out_channels=deps[1], kernel_size=3, stride=1, padding=1))
12 |         sq.add_module('maxpool1', builder.Maxpool2d(kernel_size=2))
13 |         sq.add_module('conv3', builder.Conv2dBNReLU(in_channels=deps[1], out_channels=deps[2], kernel_size=3, stride=1, padding=1))
14 |         sq.add_module('conv4', builder.Conv2dBNReLU(in_channels=deps[2], out_channels=deps[3], kernel_size=3, stride=1, padding=1))
15 |         sq.add_module('maxpool2', builder.Maxpool2d(kernel_size=2))
16 |         sq.add_module('conv5', builder.Conv2dBNReLU(in_channels=deps[3], out_channels=deps[4], kernel_size=3, stride=1, padding=1))
17 |         sq.add_module('conv6', builder.Conv2dBNReLU(in_channels=deps[4], out_channels=deps[5], kernel_size=3, stride=1, padding=1))
18 |         sq.add_module('conv7', builder.Conv2dBNReLU(in_channels=deps[5], out_channels=deps[6], kernel_size=3, stride=1, padding=1))
19 |         sq.add_module('maxpool3', builder.Maxpool2d(kernel_size=2))
20 |         sq.add_module('conv8', builder.Conv2dBNReLU(in_channels=deps[6], out_channels=deps[7], kernel_size=3, stride=1, padding=1))
21 |         sq.add_module('conv9', builder.Conv2dBNReLU(in_channels=deps[7], out_channels=deps[8], kernel_size=3, stride=1, padding=1))
22 |         sq.add_module('conv10', builder.Conv2dBNReLU(in_channels=deps[8], out_channels=deps[9], kernel_size=3, stride=1, padding=1))
23 |         sq.add_module('maxpool4', builder.Maxpool2d(kernel_size=2))
24 |         sq.add_module('conv11', builder.Conv2dBNReLU(in_channels=deps[9], out_channels=deps[10], kernel_size=3, stride=1, padding=1))
25 |         sq.add_module('conv12', builder.Conv2dBNReLU(in_channels=deps[10], out_channels=deps[11], kernel_size=3, stride=1, padding=1))
26 |         sq.add_module('conv13', builder.Conv2dBNReLU(in_channels=deps[11], out_channels=deps[12], kernel_size=3, stride=1, padding=1))
27 |         sq.add_module('maxpool5', builder.Maxpool2d(kernel_size=2))
28 |         self.stem = sq
29 |         self.flatten = builder.Flatten()
30 |         self.linear1 = builder.Linear(in_features=deps[12], out_features=512)
31 |         self.relu = builder.ReLU()
32 |         self.linear2 = builder.Linear(in_features=512, out_features=num_classes)
33 | 
34 |     def forward(self, x):
35 |         out = self.stem(x)
36 |         out = self.flatten(out)
37 |         out = self.linear1(out)
38 |         out = self.relu(out)
39 |         out = self.linear2(out)
40 |         return out
41 | 
42 | 
43 | def create_vc(cfg, builder):
44 |     return VCNet(num_classes=10, builder=builder, deps=cfg.deps)
45 | def create_vh(cfg, builder):
46 |     return VCNet(num_classes=100, builder=builder, deps=cfg.deps)
47 | 


--------------------------------------------------------------------------------
/utils/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from torch.nn.modules.loss import _Loss
 4 | from typing import List, Tuple
 5 | 
 6 | 
 7 | 
 8 | class WeightedCrossEntropyLoss(_Loss):
 9 |     '''
10 |     Sampled reweighted Cross Entropy loss
11 |     only accept one demensions target and two demension input.
12 |     '''
13 | 
14 |     def __init__(self, ):
15 |         super(WeightedCrossEntropyLoss, self).__init__()
16 | 
17 |     def __call__(self, input:torch.Tensor, target:torch.Tensor, sample_weight):
18 |         probs = F.log_softmax(input, dim = 1)
19 |         #if target.ndimension():
20 |         if target.ndimension() == 1:
21 |             #print(target.shape)
22 |             target = target.expand(1, *target.shape)
23 |             target = target.transpose(1, 0)
24 |         one_hot = torch.zeros_like(probs).scatter_(1, target, 1)
25 |         probs = probs * one_hot * -1.0
26 |         loss = torch.sum(probs, 1)
27 |         loss = loss * sample_weight
28 |         loss = torch.mean(loss)
29 |         return loss
30 | 
31 | class LabelSmoothCrossEntropyLoss(_Loss):
32 | 
33 |     def __init__(self, eps = 0.1, class_num = 1000):
34 |         super(LabelSmoothCrossEntropyLoss, self).__init__()
35 | 
36 |         self.min_value = eps / class_num
37 |         self.eps = eps
38 | 
39 | 
40 |     def __call__(self, pred:torch.Tensor, target:torch.Tensor):
41 | 
42 |         epses = self.min_value * torch.ones_like(pred)
43 |         log_probs = F.log_softmax(pred, dim=1)
44 | 
45 |         if target.ndimension() == 1:
46 |             #print(target.shape)
47 |             target = target.expand(1, *target.shape)
48 |             #print(target, 'dwa')
49 |             target = target.transpose(1, 0)
50 |         target = torch.zeros_like(log_probs).scatter_(1, target, 1)
51 |         target = target.type(torch.float)
52 |         target = target * (1 - self.eps) + epses
53 | 
54 |         #print(target, 'fff')
55 |         element_wise_mul = log_probs * target * -1.0
56 | 
57 |         loss = torch.sum(element_wise_mul, 1)
58 |         loss = torch.mean(loss)
59 | 
60 |         return loss
61 | 
62 | 
63 | class AuxClassifersLoss(_Loss):
64 | 
65 |     def __init__(self, BasicLoss, weights:List[float]):
66 |         super(AuxClassifersLoss, self).__init__()
67 |         self.BasicLoss = BasicLoss
68 |         self.weights = weights
69 |         #print('AuxCls', self.BasicLoss)
70 | 
71 |     def __call__(self, preds:List[torch.Tensor], target):
72 | 
73 |         loss = 0
74 |         for pred in preds:
75 |             loss = loss + self.BasicLoss(pred, target)
76 |         return loss
77 | 
78 | 
79 | class GaussianWeightedCELoss(_Loss):
80 | 
81 |     def __init__(self, sigma = 1.0):
82 |         super(GaussianWeightedCELoss, self).__init__()
83 |         self.sigma = sigma
84 |         self.WCE = WeightedCrossEntropyLoss()
85 | 
86 |     def __call__(self, input:torch.Tensor, target:torch.Tensor):
87 |         sample_weight = torch.randn((input.size(0), 1)) * self.sigma
88 |         sample_weight = sample_weight.to(input.device)
89 |         sample_weight = sample_weight + torch.ones_like(sample_weight)
90 |         loss = self.WCE(input, target, sample_weight)
91 |         return loss
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/deprecated/utils/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from torch.nn.modules.loss import _Loss
 4 | from typing import List, Tuple
 5 | 
 6 | 
 7 | 
 8 | class WeightedCrossEntropyLoss(_Loss):
 9 |     '''
10 |     Sampled reweighted Cross Entropy loss
11 |     only accept one demensions target and two demension input.
12 |     '''
13 | 
14 |     def __init__(self, ):
15 |         super(WeightedCrossEntropyLoss, self).__init__()
16 | 
17 |     def __call__(self, input:torch.Tensor, target:torch.Tensor, sample_weight):
18 |         probs = F.log_softmax(input, dim = 1)
19 |         #if target.ndimension():
20 |         if target.ndimension() == 1:
21 |             #print(target.shape)
22 |             target = target.expand(1, *target.shape)
23 |             target = target.transpose(1, 0)
24 |         one_hot = torch.zeros_like(probs).scatter_(1, target, 1)
25 |         probs = probs * one_hot * -1.0
26 |         loss = torch.sum(probs, 1)
27 |         loss = loss * sample_weight
28 |         loss = torch.mean(loss)
29 |         return loss
30 | 
31 | class LabelSmoothCrossEntropyLoss(_Loss):
32 | 
33 |     def __init__(self, eps = 0.1, class_num = 1000):
34 |         super(LabelSmoothCrossEntropyLoss, self).__init__()
35 | 
36 |         self.min_value = eps / class_num
37 |         self.eps = eps
38 | 
39 | 
40 |     def __call__(self, pred:torch.Tensor, target:torch.Tensor):
41 | 
42 |         epses = self.min_value * torch.ones_like(pred)
43 |         log_probs = F.log_softmax(pred, dim=1)
44 | 
45 |         if target.ndimension() == 1:
46 |             #print(target.shape)
47 |             target = target.expand(1, *target.shape)
48 |             #print(target, 'dwa')
49 |             target = target.transpose(1, 0)
50 |         target = torch.zeros_like(log_probs).scatter_(1, target, 1)
51 |         target = target.type(torch.float)
52 |         target = target * (1 - self.eps) + epses
53 | 
54 |         #print(target, 'fff')
55 |         element_wise_mul = log_probs * target * -1.0
56 | 
57 |         loss = torch.sum(element_wise_mul, 1)
58 |         loss = torch.mean(loss)
59 | 
60 |         return loss
61 | 
62 | 
63 | class AuxClassifersLoss(_Loss):
64 | 
65 |     def __init__(self, BasicLoss, weights:List[float]):
66 |         super(AuxClassifersLoss, self).__init__()
67 |         self.BasicLoss = BasicLoss
68 |         self.weights = weights
69 |         #print('AuxCls', self.BasicLoss)
70 | 
71 |     def __call__(self, preds:List[torch.Tensor], target):
72 | 
73 |         loss = 0
74 |         for pred in preds:
75 |             loss = loss + self.BasicLoss(pred, target)
76 |         return loss
77 | 
78 | 
79 | class GaussianWeightedCELoss(_Loss):
80 | 
81 |     def __init__(self, sigma = 1.0):
82 |         super(GaussianWeightedCELoss, self).__init__()
83 |         self.sigma = sigma
84 |         self.WCE = WeightedCrossEntropyLoss()
85 | 
86 |     def __call__(self, input:torch.Tensor, target:torch.Tensor):
87 |         sample_weight = torch.randn((input.size(0), 1)) * self.sigma
88 |         sample_weight = sample_weight.to(input.device)
89 |         sample_weight = sample_weight + torch.ones_like(sample_weight)
90 |         loss = self.WCE(input, target, sample_weight)
91 |         return loss
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/base_model/mobilenetv1.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from builder import ConvBuilder
 3 | from constants import MI1_ORIGIN_DEPS
 4 | 
 5 | class MobileV1Block(nn.Module):
 6 |     '''Depthwise conv + Pointwise conv'''
 7 |     def __init__(self, builder:ConvBuilder, in_planes, out_planes, stride=1):
 8 |         super(MobileV1Block, self).__init__()
 9 |         self.depthwise = builder.Conv2dBNReLU(in_channels=in_planes, out_channels=in_planes, kernel_size=3,
10 |                                           stride=stride, padding=1, groups=in_planes)
11 |         self.pointwise = builder.Conv2dBNReLU(in_channels=in_planes, out_channels=out_planes, kernel_size=1,
12 |                                           stride=1, padding=0)
13 | 
14 |     def forward(self, x):
15 |         out = self.depthwise(x)
16 |         out = self.pointwise(out)
17 |         return out
18 | 
19 | 
20 | cifar_cfg = [16, 32, 32, 64, 64, (128,2), 128, 128, 128, 128, 128, (256,2), 256]    # 93
21 | 
22 | 
23 | 
24 | class MobileV1CifarNet(nn.Module):
25 | 
26 |     def __init__(self, builder:ConvBuilder, num_classes):
27 |         super(MobileV1CifarNet, self).__init__()
28 |         self.conv1 = builder.Conv2dBNReLU(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
29 |         blocks = []
30 |         in_planes = cifar_cfg[0]
31 |         for x in cifar_cfg:
32 |             out_planes = x if isinstance(x, int) else x[0]
33 |             stride = 1 if isinstance(x, int) else x[1]
34 |             blocks.append(MobileV1Block(builder=builder, in_planes=in_planes, out_planes=out_planes, stride=stride))
35 |             in_planes = out_planes
36 |         self.stem = builder.Sequential(*blocks)
37 |         self.gap = builder.GAP(kernel_size=8)
38 |         self.linear = builder.Linear(cifar_cfg[-1], num_classes)
39 | 
40 |     def forward(self, x):
41 |         out = self.conv1(x)
42 |         out = self.stem(out)
43 |         out = self.gap(out)
44 |         out = self.linear(out)
45 |         return out
46 | 
47 | class MobileV1ImagenetNet(nn.Module):
48 | 
49 |     def __init__(self, builder:ConvBuilder, num_classes, deps=None):
50 |         super(MobileV1ImagenetNet, self).__init__()
51 |         if deps is None:
52 |             deps = MI1_ORIGIN_DEPS
53 |         assert len(deps) == 27
54 |         self.conv1 = builder.Conv2dBNReLU(in_channels=3, out_channels=deps[0], kernel_size=3, stride=2, padding=1)
55 |         blocks = []
56 |         for block_idx in range(13):
57 |             depthwise_channels = int(deps[block_idx * 2 + 1])
58 |             pointwise_channels = int(deps[block_idx * 2 + 2])
59 |             stride = 2 if block_idx in [1, 3, 5, 11] else 1
60 |             blocks.append(MobileV1Block(builder=builder, in_planes=depthwise_channels, out_planes=pointwise_channels, stride=stride))
61 | 
62 |         self.stem = builder.Sequential(*blocks)
63 |         self.gap = builder.GAP(kernel_size=7)
64 |         self.linear = builder.Linear(deps[-1], num_classes)
65 | 
66 |     def forward(self, x):
67 |         out = self.conv1(x)
68 |         out = self.stem(out)
69 |         out = self.gap(out)
70 |         out = self.linear(out)
71 |         return out
72 | 
73 | def create_MobileV1Cifar(cfg, builder):
74 |     return MobileV1CifarNet(builder=builder, num_classes=10)
75 | def create_MobileV1CH(cfg, builder):
76 |     return MobileV1CifarNet(builder=builder, num_classes=100)
77 | def create_MobileV1Imagenet(cfg, builder):
78 |     return MobileV1ImagenetNet(builder=builder, num_classes=1000, deps=cfg.deps)


--------------------------------------------------------------------------------
/base_model/vgg.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from builder import ConvBuilder
 3 | from constants import VGG_ORIGIN_DEPS
 4 | 
 5 | def _create_vgg_stem(builder, deps):
 6 |     sq = builder.Sequential()
 7 |     sq.add_module('conv1',
 8 |                   builder.Conv2dBNReLU(in_channels=3, out_channels=deps[0], kernel_size=3, stride=1, padding=1))
 9 |     sq.add_module('conv2',
10 |                   builder.Conv2dBNReLU(in_channels=deps[0], out_channels=deps[1], kernel_size=3, stride=1, padding=1))
11 |     sq.add_module('maxpool1', builder.Maxpool2d(kernel_size=2))
12 |     sq.add_module('conv3',
13 |                   builder.Conv2dBNReLU(in_channels=deps[1], out_channels=deps[2], kernel_size=3, stride=1, padding=1))
14 |     sq.add_module('conv4',
15 |                   builder.Conv2dBNReLU(in_channels=deps[2], out_channels=deps[3], kernel_size=3, stride=1, padding=1))
16 |     sq.add_module('maxpool2', builder.Maxpool2d(kernel_size=2))
17 |     sq.add_module('conv5',
18 |                   builder.Conv2dBNReLU(in_channels=deps[3], out_channels=deps[4], kernel_size=3, stride=1, padding=1))
19 |     sq.add_module('conv6',
20 |                   builder.Conv2dBNReLU(in_channels=deps[4], out_channels=deps[5], kernel_size=3, stride=1, padding=1))
21 |     sq.add_module('conv7',
22 |                   builder.Conv2dBNReLU(in_channels=deps[5], out_channels=deps[6], kernel_size=3, stride=1, padding=1))
23 |     sq.add_module('maxpool3', builder.Maxpool2d(kernel_size=2))
24 |     sq.add_module('conv8',
25 |                   builder.Conv2dBNReLU(in_channels=deps[6], out_channels=deps[7], kernel_size=3, stride=1, padding=1))
26 |     sq.add_module('conv9',
27 |                   builder.Conv2dBNReLU(in_channels=deps[7], out_channels=deps[8], kernel_size=3, stride=1, padding=1))
28 |     sq.add_module('conv10',
29 |                   builder.Conv2dBNReLU(in_channels=deps[8], out_channels=deps[9], kernel_size=3, stride=1, padding=1))
30 |     sq.add_module('maxpool4', builder.Maxpool2d(kernel_size=2))
31 |     sq.add_module('conv11',
32 |                   builder.Conv2dBNReLU(in_channels=deps[9], out_channels=deps[10], kernel_size=3, stride=1, padding=1))
33 |     sq.add_module('conv12',
34 |                   builder.Conv2dBNReLU(in_channels=deps[10], out_channels=deps[11], kernel_size=3, stride=1, padding=1))
35 |     sq.add_module('conv13',
36 |                   builder.Conv2dBNReLU(in_channels=deps[11], out_channels=deps[12], kernel_size=3, stride=1, padding=1))
37 |     sq.add_module('maxpool5', builder.Maxpool2d(kernel_size=2))
38 |     return sq
39 | 
40 | class VCNet(nn.Module):
41 | 
42 |     def __init__(self, num_classes, builder:ConvBuilder, deps):
43 |         super(VCNet, self).__init__()
44 |         if deps is None:
45 |             deps = VGG_ORIGIN_DEPS
46 |         self.stem = _create_vgg_stem(builder=builder, deps=deps)
47 |         self.flatten = builder.Flatten()
48 |         self.linear1 = builder.IntermediateLinear(in_features=deps[12], out_features=512)
49 |         self.relu = builder.ReLU()
50 |         self.linear2 = builder.Linear(in_features=512, out_features=num_classes)
51 | 
52 |     def forward(self, x):
53 |         out = self.stem(x)
54 |         out = self.flatten(out)
55 |         out = self.linear1(out)
56 |         out = self.relu(out)
57 |         out = self.linear2(out)
58 |         return out
59 | 
60 | 
61 | def create_vc(cfg, builder):
62 |     return VCNet(num_classes=10, builder=builder, deps=cfg.deps)
63 | def create_vh(cfg, builder):
64 |     return VCNet(num_classes=100, builder=builder, deps=cfg.deps)
65 | 


--------------------------------------------------------------------------------
/acnet/acnet_fusion.py:
--------------------------------------------------------------------------------
 1 | from utils.misc import read_hdf5, save_hdf5
 2 | import numpy as np
 3 | 
 4 | SQUARE_KERNEL_KEYWORD = 'square_conv.weight'
 5 | 
 6 | def _fuse_kernel(kernel, gamma, std):
 7 |     b_gamma = np.reshape(gamma, (kernel.shape[0], 1, 1, 1))
 8 |     b_gamma = np.tile(b_gamma, (1, kernel.shape[1], kernel.shape[2], kernel.shape[3]))
 9 |     b_std = np.reshape(std, (kernel.shape[0], 1, 1, 1))
10 |     b_std = np.tile(b_std, (1, kernel.shape[1], kernel.shape[2], kernel.shape[3]))
11 |     return kernel * b_gamma / b_std
12 | 
13 | def _add_to_square_kernel(square_kernel, asym_kernel):
14 |     asym_h = asym_kernel.shape[2]
15 |     asym_w = asym_kernel.shape[3]
16 |     square_h = square_kernel.shape[2]
17 |     square_w = square_kernel.shape[3]
18 |     square_kernel[:, :, square_h // 2 - asym_h // 2: square_h // 2 - asym_h // 2 + asym_h,
19 |                                         square_w // 2 - asym_w // 2 : square_w // 2 - asym_w // 2 + asym_w] += asym_kernel
20 | 
21 | 
22 | def convert_acnet_weights(train_weights, deploy_weights, eps):
23 |     train_dict = read_hdf5(train_weights)
24 |     print(train_dict.keys())
25 |     deploy_dict = {}
26 |     square_conv_var_names = [name for name in train_dict.keys() if SQUARE_KERNEL_KEYWORD in name]
27 |     for square_name in square_conv_var_names:
28 |         square_kernel = train_dict[square_name]
29 |         square_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.running_mean')]
30 |         square_std = np.sqrt(train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.running_var')] + eps)
31 |         square_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.weight')]
32 |         square_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.bias')]
33 | 
34 |         ver_kernel = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_conv.weight')]
35 |         ver_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.running_mean')]
36 |         ver_std = np.sqrt(train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.running_var')] + eps)
37 |         ver_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.weight')]
38 |         ver_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.bias')]
39 | 
40 |         hor_kernel = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_conv.weight')]
41 |         hor_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.running_mean')]
42 |         hor_std = np.sqrt(train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.running_var')] + eps)
43 |         hor_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.weight')]
44 |         hor_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.bias')]
45 | 
46 |         fused_bias = square_beta + ver_beta + hor_beta - square_mean * square_gamma / square_std \
47 |                      - ver_mean * ver_gamma / ver_std - hor_mean * hor_gamma / hor_std
48 |         fused_kernel = _fuse_kernel(square_kernel, square_gamma, square_std)
49 |         _add_to_square_kernel(fused_kernel, _fuse_kernel(ver_kernel, ver_gamma, ver_std))
50 |         _add_to_square_kernel(fused_kernel, _fuse_kernel(hor_kernel, hor_gamma, hor_std))
51 | 
52 |         deploy_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'fused_conv.weight')] = fused_kernel
53 |         deploy_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'fused_conv.bias')] = fused_bias
54 | 
55 |     for k, v in train_dict.items():
56 |         if 'hor_' not in k and 'ver_' not in k and 'square_' not in k:
57 |             deploy_dict[k] = v
58 |     save_hdf5(deploy_dict, deploy_weights)


--------------------------------------------------------------------------------
/deprecated/acnet/acnet_fusion.py:
--------------------------------------------------------------------------------
 1 | from utils.misc import read_hdf5, save_hdf5
 2 | import numpy as np
 3 | 
 4 | SQUARE_KERNEL_KEYWORD = 'square_conv.weight'
 5 | 
 6 | def _fuse_kernel(kernel, gamma, std):
 7 |     b_gamma = np.reshape(gamma, (kernel.shape[0], 1, 1, 1))
 8 |     b_gamma = np.tile(b_gamma, (1, kernel.shape[1], kernel.shape[2], kernel.shape[3]))
 9 |     b_std = np.reshape(std, (kernel.shape[0], 1, 1, 1))
10 |     b_std = np.tile(b_std, (1, kernel.shape[1], kernel.shape[2], kernel.shape[3]))
11 |     return kernel * b_gamma / b_std
12 | 
13 | def _add_to_square_kernel(square_kernel, asym_kernel):
14 |     asym_h = asym_kernel.shape[2]
15 |     asym_w = asym_kernel.shape[3]
16 |     square_h = square_kernel.shape[2]
17 |     square_w = square_kernel.shape[3]
18 |     square_kernel[:, :, square_h // 2 - asym_h // 2: square_h // 2 - asym_h // 2 + asym_h,
19 |                                         square_w // 2 - asym_w // 2 : square_w // 2 - asym_w // 2 + asym_w] += asym_kernel
20 | 
21 | 
22 | def convert_acnet_weights(train_weights, deploy_weights, eps):
23 |     train_dict = read_hdf5(train_weights)
24 |     print(train_dict.keys())
25 |     deploy_dict = {}
26 |     square_conv_var_names = [name for name in train_dict.keys() if SQUARE_KERNEL_KEYWORD in name]
27 |     for square_name in square_conv_var_names:
28 |         square_kernel = train_dict[square_name]
29 |         square_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.running_mean')]
30 |         square_std = np.sqrt(train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.running_var')] + eps)
31 |         square_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.weight')]
32 |         square_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.bias')]
33 | 
34 |         ver_kernel = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_conv.weight')]
35 |         ver_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.running_mean')]
36 |         ver_std = np.sqrt(train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.running_var')] + eps)
37 |         ver_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.weight')]
38 |         ver_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.bias')]
39 | 
40 |         hor_kernel = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_conv.weight')]
41 |         hor_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.running_mean')]
42 |         hor_std = np.sqrt(train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.running_var')] + eps)
43 |         hor_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.weight')]
44 |         hor_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.bias')]
45 | 
46 |         fused_bias = square_beta + ver_beta + hor_beta - square_mean * square_gamma / square_std \
47 |                      - ver_mean * ver_gamma / ver_std - hor_mean * hor_gamma / hor_std
48 |         fused_kernel = _fuse_kernel(square_kernel, square_gamma, square_std)
49 |         _add_to_square_kernel(fused_kernel, _fuse_kernel(ver_kernel, ver_gamma, ver_std))
50 |         _add_to_square_kernel(fused_kernel, _fuse_kernel(hor_kernel, hor_gamma, hor_std))
51 | 
52 |         deploy_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'fused_conv.weight')] = fused_kernel
53 |         deploy_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'fused_conv.bias')] = fused_bias
54 | 
55 |     for k, v in train_dict.items():
56 |         if 'hor_' not in k and 'ver_' not in k and 'square_' not in k:
57 |             deploy_dict[k] = v
58 |     save_hdf5(deploy_dict, deploy_weights)
59 | 
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/base_model/resnet.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | ResNet in PyTorch.absFor Pre-activation ResNet, see 'preact_resnet.py'.
 3 | Reference:
 4 |     [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
 5 |         Deep Residual Learning for Image Recognition. arXiv:1512.03385
 6 | 
 7 | Note: cifar_resnet18 constructs the same model with that from
 8 | https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py
 9 | '''
10 | 
11 | import torch.nn as nn
12 | from builder import ConvBuilder
13 | 
14 | class BasicBlock(nn.Module):
15 | 
16 |     expansion = 1
17 | 
18 |     def __init__(self, builder:ConvBuilder, in_planes, planes, stride=1):
19 |         super(BasicBlock, self).__init__()
20 |         self.bd = builder
21 |         self.relu = builder.ReLU()
22 | 
23 |         if stride != 1 or in_planes != self.expansion * planes:
24 |             self.shortcut = builder.Conv2dBN(in_channels=in_planes, out_channels=self.expansion * planes, kernel_size=1, stride=stride)
25 |         else:
26 |             self.shortcut = builder.ResIdentity(num_channels=in_planes)
27 | 
28 |         self.conv1 = builder.Conv2dBNReLU(in_channels=in_planes, out_channels=planes, kernel_size=3, stride=stride, padding=1)
29 |         self.conv2 = builder.Conv2dBN(in_channels=planes, out_channels=self.expansion * planes, kernel_size=3, stride=1, padding=1)
30 | 
31 |     def forward(self, x):
32 |         out = self.conv1(x)
33 |         out = self.conv2(out)
34 |         out = self.bd.add(out, self.shortcut(x))
35 |         out = self.relu(out)
36 |         return out
37 | 
38 | 
39 | class ResNet(nn.Module):
40 |     def __init__(self, builder:ConvBuilder, block, num_blocks, num_classes=10, width_multiplier=None):
41 |         super(ResNet, self).__init__()
42 | 
43 |         print('width multiplier: ', width_multiplier)
44 | 
45 |         if width_multiplier is None:
46 |             width_multiplier = 1
47 |         else:
48 |             width_multiplier = width_multiplier[0]
49 | 
50 |         self.bd = builder
51 |         self.in_planes = int(64 * width_multiplier)
52 |         self.conv1 = builder.Conv2dBNReLU(3, int(64 * width_multiplier), kernel_size=7, stride=2, padding=3)
53 |         self.stage1 = self._make_stage(block, int(64 * width_multiplier), num_blocks[0], stride=1)
54 |         self.stage2 = self._make_stage(block, int(128 * width_multiplier), num_blocks[1], stride=2)
55 |         self.stage3 = self._make_stage(block, int(256 * width_multiplier), num_blocks[2], stride=2)
56 |         self.stage4 = self._make_stage(block, int(512 * width_multiplier), num_blocks[3], stride=2)
57 |         self.gap = builder.GAP(kernel_size=7)
58 |         self.linear = self.bd.Linear(int(512*block.expansion*width_multiplier), num_classes)
59 | 
60 |     def _make_stage(self, block, planes, num_blocks, stride):
61 |         strides = [stride] + [1]*(num_blocks-1)
62 |         blocks = []
63 |         for stride in strides:
64 |             blocks.append(block(builder=self.bd, in_planes=self.in_planes, planes=int(planes), stride=stride))
65 |             self.in_planes = int(planes * block.expansion)
66 |         return nn.Sequential(*blocks)
67 | 
68 |     def forward(self, x):
69 |         out = self.conv1(x)
70 |         out = self.bd.max_pool2d(out, kernel_size=3, stride=2, padding=1)
71 |         out = self.stage1(out)
72 |         out = self.stage2(out)
73 |         out = self.stage3(out)
74 |         out = self.stage4(out)
75 |         out = self.gap(out)
76 |         out = self.linear(out)
77 |         return out
78 | 
79 | def create_ResNet18(cfg, builder):
80 |     return ResNet(builder, BasicBlock, [2,2,2,2], num_classes=1000, width_multiplier=cfg.deps)
81 | def create_ResNet34(cfg, builder):
82 |     return ResNet(builder, BasicBlock, [3,4,6,3], num_classes=1000, width_multiplier=cfg.deps)


--------------------------------------------------------------------------------
/deprecated/utils/checkpoint.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import time
 3 | from collections import OrderedDict
 4 | 
 5 | 
 6 | def load_model(model, model_file, logger):
 7 |     t_start = time.time()
 8 |     if isinstance(model_file, str):
 9 |         state_dict = torch.load(model_file, map_location='cpu')
10 |         if 'model' in state_dict.keys():
11 |             state_dict = state_dict['model']
12 |     else:
13 |         state_dict = model_file
14 | 
15 |     state_dict = _align_and_update_loaded_state_dicts(
16 |         model.state_dict(), state_dict)
17 |     t_io_end = time.time()
18 | 
19 |     # if is_restore:
20 |     #     new_state_dict = OrderedDict()
21 |     #     for k, v in state_dict.items():
22 |     #         name = 'module.' + k
23 |     #         new_state_dict[name] = v
24 |     #     state_dict = new_state_dict
25 |     model.load_state_dict(state_dict, strict=False)
26 |     ckpt_keys = set(state_dict.keys())
27 |     own_keys = set(model.state_dict().keys())
28 |     missing_keys = own_keys - ckpt_keys
29 |     unexpected_keys = ckpt_keys - own_keys
30 | 
31 |     if len(missing_keys) > 0:
32 |         logger.warning('Missing key(s) in state_dict: {}'.format(
33 |             ', '.join('{}'.format(k) for k in missing_keys)))
34 | 
35 |     if len(unexpected_keys) > 0:
36 |         logger.warning('Unexpected key(s) in state_dict: {}'.format(
37 |             ', '.join('{}'.format(k) for k in unexpected_keys)))
38 | 
39 |     del state_dict
40 |     t_end = time.time()
41 |     logger.info(
42 |         "Load model, Time usage:\n\tIO: {}, "
43 |         "initialize parameters: {}".format(
44 |             t_io_end - t_start, t_end - t_io_end))
45 | 
46 |     return model
47 | 
48 | 
49 | def _align_and_update_loaded_state_dicts(model_state_dict, loaded_state_dict):
50 |     """
51 |     Strategy: suppose that the models that we will create will have
52 |     prefixes appended to each of its keys, for example due to an extra
53 |     level of nesting that the original pre-trained weights from ImageNet
54 |     won't contain. For example, model.state_dict() might return
55 |     backbone[0].body.res2.conv1.weight, while the pre-trained model contains
56 |     res2.conv1.weight. We thus want to match both parameters together.
57 |     For that, we look for each model weight, look among all loaded keys
58 |     if there is one that is a suffix of the current weight name,
59 |     and use it if that's the case. If multiple matches exist,
60 |     take the one with longest size of the corresponding name. For example,
61 |     for the same model as before, the pretrained weight file can contain
62 |     both res2.conv1.weight, as well as conv1.weight. In this case,
63 |     we want to match backbone[0].body.conv1.weight to conv1.weight, and
64 |     backbone[0].body.res2.conv1.weight to res2.conv1.weight.
65 |     """
66 |     current_keys = sorted(list(model_state_dict.keys()))
67 |     loaded_keys = sorted(list(loaded_state_dict.keys()))
68 |     aligned_loaded_state_dict = loaded_state_dict.copy()
69 | 
70 |     # get a matrix of string matches, where each (i, j) entry
71 |     # correspond to the size of the loaded_key string, if it matches
72 |     match_matrix = [
73 |         len(j) if i.endswith(j) else 0 for i in current_keys for j in
74 |         loaded_keys]
75 |     match_matrix = torch.as_tensor(match_matrix).view(
76 |         len(current_keys), len(loaded_keys))
77 |     max_match_size, idxs = match_matrix.max(1)
78 |     idxs[max_match_size == 0] = -1
79 | 
80 |     for idx_new, idx_old in enumerate(idxs.tolist()):
81 |         if idx_old == -1:
82 |             continue
83 |         key = current_keys[idx_new]
84 |         key_old = loaded_keys[idx_old]
85 |         aligned_loaded_state_dict[key] = \
86 |             aligned_loaded_state_dict.pop(key_old)
87 |     del loaded_state_dict
88 |     return aligned_loaded_state_dict
89 | 


--------------------------------------------------------------------------------
/utils/torch_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains primitives for multi-gpu communication.
  3 | This is useful when doing distributed training.
  4 | """
  5 | 
  6 | import pickle
  7 | import time
  8 | 
  9 | import torch
 10 | import torch.distributed as dist
 11 | 
 12 | 
 13 | def get_world_size():
 14 |     if not dist.is_available():
 15 |         return 1
 16 |     if not dist.is_initialized():
 17 |         return 1
 18 |     return dist.get_world_size()
 19 | 
 20 | 
 21 | def get_rank():
 22 |     if not dist.is_available():
 23 |         return 0
 24 |     if not dist.is_initialized():
 25 |         return 0
 26 |     return dist.get_rank()
 27 | 
 28 | 
 29 | def is_main_process():
 30 |     return get_rank() == 0
 31 | 
 32 | 
 33 | def synchronize():
 34 |     """
 35 |     Helper function to synchronize (barrier) among all processes when
 36 |     using distributed training
 37 |     """
 38 |     if not dist.is_available():
 39 |         return
 40 |     if not dist.is_initialized():
 41 |         return
 42 |     world_size = dist.get_world_size()
 43 |     if world_size == 1:
 44 |         return
 45 |     dist.barrier()
 46 | 
 47 | 
 48 | def all_gather(data):
 49 |     """
 50 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
 51 |     Args:
 52 |         data: any picklable object
 53 |     Returns:
 54 |         list[data]: list of data gathered from each rank
 55 |     """
 56 |     world_size = get_world_size()
 57 |     if world_size == 1:
 58 |         return [data]
 59 | 
 60 |     # serialized to a Tensor
 61 |     buffer = pickle.dumps(data)
 62 |     storage = torch.ByteStorage.from_buffer(buffer)
 63 |     tensor = torch.ByteTensor(storage).to("cuda")
 64 | 
 65 |     # obtain Tensor size of each rank
 66 |     local_size = torch.LongTensor([tensor.numel()]).to("cuda")
 67 |     size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)]
 68 |     dist.all_gather(size_list, local_size)
 69 |     size_list = [int(size.item()) for size in size_list]
 70 |     max_size = max(size_list)
 71 | 
 72 |     # receiving Tensor from all ranks
 73 |     # we pad the tensor because torch all_gather does not support
 74 |     # gathering tensors of different shapes
 75 |     tensor_list = []
 76 |     for _ in size_list:
 77 |         tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda"))
 78 |     if local_size != max_size:
 79 |         padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda")
 80 |         tensor = torch.cat((tensor, padding), dim=0)
 81 |     dist.all_gather(tensor_list, tensor)
 82 | 
 83 |     data_list = []
 84 |     for size, tensor in zip(size_list, tensor_list):
 85 |         buffer = tensor.cpu().numpy().tobytes()[:size]
 86 |         data_list.append(pickle.loads(buffer))
 87 | 
 88 |     return data_list
 89 | 
 90 | 
 91 | def reduce_dict(input_dict, average=True):
 92 |     """
 93 |     Args:
 94 |         input_dict (dict): all the values will be reduced
 95 |         average (bool): whether to do average or sum
 96 |     Reduce the values in the dictionary from all processes so that process with rank
 97 |     0 has the averaged results. Returns a dict with the same fields as
 98 |     input_dict, after reduction.
 99 |     """
100 |     world_size = get_world_size()
101 |     if world_size < 2:
102 |         return input_dict
103 |     with torch.no_grad():
104 |         names = []
105 |         values = []
106 |         # sort the keys so that they are consistent across processes
107 |         for k in sorted(input_dict.keys()):
108 |             names.append(k)
109 |             values.append(input_dict[k])
110 |         values = torch.stack(values, dim=0)
111 |         dist.reduce(values, dst=0)
112 |         if dist.get_rank() == 0 and average:
113 |             # only main process gets accumulated, so only divide by
114 |             # world_size in this case
115 |             values /= world_size
116 |         reduced_dict = {k: v for k, v in zip(names, values)}
117 |     return reduced_dict
118 | 


--------------------------------------------------------------------------------
/deprecated/utils/torch_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains primitives for multi-gpu communication.
  3 | This is useful when doing distributed training.
  4 | """
  5 | 
  6 | import pickle
  7 | import time
  8 | 
  9 | import torch
 10 | import torch.distributed as dist
 11 | 
 12 | 
 13 | def get_world_size():
 14 |     if not dist.is_available():
 15 |         return 1
 16 |     if not dist.is_initialized():
 17 |         return 1
 18 |     return dist.get_world_size()
 19 | 
 20 | 
 21 | def get_rank():
 22 |     if not dist.is_available():
 23 |         return 0
 24 |     if not dist.is_initialized():
 25 |         return 0
 26 |     return dist.get_rank()
 27 | 
 28 | 
 29 | def is_main_process():
 30 |     return get_rank() == 0
 31 | 
 32 | 
 33 | def synchronize():
 34 |     """
 35 |     Helper function to synchronize (barrier) among all processes when
 36 |     using distributed training
 37 |     """
 38 |     if not dist.is_available():
 39 |         return
 40 |     if not dist.is_initialized():
 41 |         return
 42 |     world_size = dist.get_world_size()
 43 |     if world_size == 1:
 44 |         return
 45 |     dist.barrier()
 46 | 
 47 | 
 48 | def all_gather(data):
 49 |     """
 50 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
 51 |     Args:
 52 |         data: any picklable object
 53 |     Returns:
 54 |         list[data]: list of data gathered from each rank
 55 |     """
 56 |     world_size = get_world_size()
 57 |     if world_size == 1:
 58 |         return [data]
 59 | 
 60 |     # serialized to a Tensor
 61 |     buffer = pickle.dumps(data)
 62 |     storage = torch.ByteStorage.from_buffer(buffer)
 63 |     tensor = torch.ByteTensor(storage).to("cuda")
 64 | 
 65 |     # obtain Tensor size of each rank
 66 |     local_size = torch.LongTensor([tensor.numel()]).to("cuda")
 67 |     size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)]
 68 |     dist.all_gather(size_list, local_size)
 69 |     size_list = [int(size.item()) for size in size_list]
 70 |     max_size = max(size_list)
 71 | 
 72 |     # receiving Tensor from all ranks
 73 |     # we pad the tensor because torch all_gather does not support
 74 |     # gathering tensors of different shapes
 75 |     tensor_list = []
 76 |     for _ in size_list:
 77 |         tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda"))
 78 |     if local_size != max_size:
 79 |         padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda")
 80 |         tensor = torch.cat((tensor, padding), dim=0)
 81 |     dist.all_gather(tensor_list, tensor)
 82 | 
 83 |     data_list = []
 84 |     for size, tensor in zip(size_list, tensor_list):
 85 |         buffer = tensor.cpu().numpy().tobytes()[:size]
 86 |         data_list.append(pickle.loads(buffer))
 87 | 
 88 |     return data_list
 89 | 
 90 | 
 91 | def reduce_dict(input_dict, average=True):
 92 |     """
 93 |     Args:
 94 |         input_dict (dict): all the values will be reduced
 95 |         average (bool): whether to do average or sum
 96 |     Reduce the values in the dictionary from all processes so that process with rank
 97 |     0 has the averaged results. Returns a dict with the same fields as
 98 |     input_dict, after reduction.
 99 |     """
100 |     world_size = get_world_size()
101 |     if world_size < 2:
102 |         return input_dict
103 |     with torch.no_grad():
104 |         names = []
105 |         values = []
106 |         # sort the keys so that they are consistent across processes
107 |         for k in sorted(input_dict.keys()):
108 |             names.append(k)
109 |             values.append(input_dict[k])
110 |         values = torch.stack(values, dim=0)
111 |         dist.reduce(values, dst=0)
112 |         if dist.get_rank() == 0 and average:
113 |             # only main process gets accumulated, so only divide by
114 |             # world_size in this case
115 |             values /= world_size
116 |         reduced_dict = {k: v for k, v in zip(names, values)}
117 |     return reduced_dict
118 | 


--------------------------------------------------------------------------------
/deprecated/utils/lr_scheduler.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | from bisect import bisect_right
  3 | 
  4 | import torch
  5 | 
  6 | 
  7 | # FIXME ideally this would be achieved with a CombinedLRScheduler,
  8 | # separating MultiStepLR with WarmupLR
  9 | # but the current LRScheduler design doesn't allow it
 10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
 11 |     def __init__(
 12 |         self,
 13 |         optimizer,
 14 |         milestones,
 15 |         gamma=0.1,
 16 |         warmup_factor=1.0 / 3,
 17 |         warmup_iters=500,
 18 |         warmup_method="linear",
 19 |         last_epoch=-1,
 20 |     ):
 21 |         if not list(milestones) == sorted(milestones):
 22 |             raise ValueError(
 23 |                 "Milestones should be a list of" " increasing integers. Got {}",
 24 |                 milestones,
 25 |             )
 26 | 
 27 |         if warmup_method not in ("constant", "linear"):
 28 |             raise ValueError(
 29 |                 "Only 'constant' or 'linear' warmup_method accepted"
 30 |                 "got {}".format(warmup_method)
 31 |             )
 32 |         self.milestones = milestones
 33 |         self.gamma = gamma
 34 |         self.warmup_factor = warmup_factor
 35 |         self.warmup_iters = warmup_iters
 36 |         self.warmup_method = warmup_method
 37 |         super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
 38 | 
 39 |     def get_lr(self):
 40 |         warmup_factor = 1
 41 |         if self.last_epoch < self.warmup_iters:
 42 |             if self.warmup_method == "constant":
 43 |                 warmup_factor = self.warmup_factor
 44 |             elif self.warmup_method == "linear":
 45 |                 alpha = float(self.last_epoch) / self.warmup_iters
 46 |                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
 47 |         return [
 48 |             base_lr
 49 |             * warmup_factor
 50 |             * self.gamma ** bisect_right(self.milestones, self.last_epoch)
 51 |             for base_lr in self.base_lrs
 52 |         ]
 53 | 
 54 | 
 55 | 
 56 | class WarmupLinearLR(torch.optim.lr_scheduler._LRScheduler):
 57 |     def __init__(
 58 |         self,
 59 |         optimizer,
 60 |         final_lr,
 61 |         final_iters,
 62 |         warmup_factor=1.0 / 3,
 63 |         warmup_iters=500,
 64 |         warmup_method="linear",
 65 |         last_epoch=-1,
 66 |     ):
 67 |         assert final_iters > warmup_iters
 68 |         self.final_lr = final_lr
 69 |         self.final_iters = final_iters
 70 |         self.warmup_factor = warmup_factor
 71 |         self.warmup_iters = max(warmup_iters, 0)
 72 |         self.warmup_method = warmup_method
 73 |         super(WarmupLinearLR, self).__init__(optimizer, last_epoch)
 74 | 
 75 |     #   last_epoch == 0:            base_lr * warmup_factor
 76 |     #   last_epoch == warmup_iters: base_lr
 77 |     #   last_epoch == final_iters:  final_lr
 78 | 
 79 |     def get_lr(self):
 80 |         if self.last_epoch < self.warmup_iters:
 81 |             if self.warmup_method == "constant":
 82 |                 warmup_factor = self.warmup_factor
 83 |             elif self.warmup_method == "linear":
 84 |                 alpha = float(self.last_epoch) / self.warmup_iters
 85 |                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
 86 |             else:
 87 |                 raise ValueError(
 88 |                     "Only 'constant' or 'linear' warmup_method accepted"
 89 |                     "got {}".format(self.warmup_method)
 90 |                 )
 91 |             return [
 92 |                 base_lr
 93 |                 * warmup_factor
 94 |                 for base_lr in self.base_lrs
 95 |             ]
 96 |         else:
 97 |             return [
 98 |                 base_lr - (base_lr - self.final_lr) * float(self.last_epoch - self.warmup_iters) / (
 99 |                             self.final_iters - self.warmup_iters)
100 |                 for base_lr in self.base_lrs
101 |             ]


--------------------------------------------------------------------------------
/deprecated/base_config.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | from model_map import get_dataset_name_by_model_name
 3 | 
 4 | BaseConfigByEpoch = namedtuple('BaseConfigByEpoch', ['network_type', 'dataset_name', 'dataset_subset', 'global_batch_size', 'num_node', 'device',
 5 |                                        'weight_decay', 'weight_decay_bias', 'optimizer_type', 'momentum',
 6 |                                        'bias_lr_factor', 'max_epochs', 'base_lr', 'lr_epoch_boundaries', 'lr_decay_factor', 'linear_final_lr',
 7 |                                        'warmup_epochs', 'warmup_method', 'warmup_factor',
 8 |                                        'ckpt_iter_period', 'tb_iter_period',
 9 |                                        'output_dir',  'tb_dir',
10 |                                        'init_weights', 'save_weights',
11 |                                        'val_epoch_period', 'grad_accum_iters',
12 |                                                      'deps',
13 |                                                      'se_reduce_scale'])
14 | 
15 | def get_baseconfig_by_epoch(network_type, dataset_name, dataset_subset, global_batch_size, num_node,
16 |                     weight_decay, optimizer_type, momentum,
17 |                     max_epochs, base_lr, lr_epoch_boundaries, lr_decay_factor, linear_final_lr,
18 |                     warmup_epochs, warmup_method, warmup_factor,
19 |                     ckpt_iter_period, tb_iter_period,
20 |                     output_dir, tb_dir, save_weights,
21 |                     device='cuda', weight_decay_bias=0, bias_lr_factor=2, init_weights=None, val_epoch_period=-1, grad_accum_iters=1,
22 |                             deps=None,
23 |                             se_reduce_scale=0):
24 |     print('----------------- show lr schedule --------------')
25 |     print('base_lr:', base_lr)
26 |     print('max_epochs:', max_epochs)
27 |     print('lr_epochs:', lr_epoch_boundaries)
28 |     print('lr_decay:', lr_decay_factor)
29 |     print('linear_final_lr:', linear_final_lr)
30 |     print('-------------------------------------------------')
31 | 
32 |     return BaseConfigByEpoch(network_type=network_type,dataset_name=dataset_name,dataset_subset=dataset_subset,global_batch_size=global_batch_size,num_node=num_node, device=device,
33 |                       weight_decay=weight_decay,weight_decay_bias=weight_decay_bias,optimizer_type=optimizer_type,momentum=momentum,bias_lr_factor=bias_lr_factor,
34 |                       max_epochs=max_epochs, base_lr=base_lr, lr_epoch_boundaries=lr_epoch_boundaries,lr_decay_factor=lr_decay_factor, linear_final_lr=linear_final_lr,
35 |                              warmup_epochs=warmup_epochs,warmup_method=warmup_method,warmup_factor=warmup_factor,
36 |                       ckpt_iter_period=int(ckpt_iter_period),tb_iter_period=int(tb_iter_period),
37 |                       output_dir=output_dir, tb_dir=tb_dir,
38 |                       init_weights=init_weights, save_weights=save_weights,
39 |                              val_epoch_period=val_epoch_period, grad_accum_iters=grad_accum_iters, deps=deps, se_reduce_scale=se_reduce_scale)
40 | 
41 | def get_baseconfig_for_test(network_type, dataset_subset, global_batch_size, init_weights, device='cuda', deps=None, se_reduce_scale=0):
42 |     return BaseConfigByEpoch(network_type=network_type, dataset_name=get_dataset_name_by_model_name(network_type),
43 |                              dataset_subset=dataset_subset, global_batch_size=global_batch_size, num_node=1, device=device,
44 |                              weight_decay=None, weight_decay_bias=None, optimizer_type=None, momentum=None, bias_lr_factor=None,
45 |                              max_epochs=None, base_lr=None, lr_epoch_boundaries=None, lr_decay_factor=None, linear_final_lr=None,
46 |                              warmup_epochs=None, warmup_method=None, warmup_factor=None, ckpt_iter_period=None,
47 |                              tb_iter_period=None, output_dir=None, tb_dir=None, init_weights=init_weights,
48 |                              save_weights=None, val_epoch_period=None, grad_accum_iters=None, deps=deps, se_reduce_scale=se_reduce_scale)


--------------------------------------------------------------------------------
/utils/comm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file is maily copied from https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/utils/comm.py
  3 | This file contains primitives for multi-gpu communication.
  4 | This is useful when doing distributed training.
  5 | """
  6 | 
  7 | 
  8 | import torch
  9 | import torch.distributed as dist
 10 | 
 11 | import pickle
 12 | import time
 13 | 
 14 | 
 15 | def get_world_size():
 16 |     if not dist.is_available():
 17 |         return 1
 18 |     if not dist.is_initialized():
 19 |         return 1
 20 |     return dist.get_world_size()
 21 | 
 22 | 
 23 | def get_rank():
 24 |     if not dist.is_available():
 25 |         return 0
 26 |     if not dist.is_initialized():
 27 |         return 0
 28 |     return dist.get_rank()
 29 | 
 30 | 
 31 | def is_main_process():
 32 |     return get_rank() == 0
 33 | 
 34 | 
 35 | def synchronize():
 36 |     """
 37 |     Helper function to synchronize (barrier) among all processes when
 38 |     using distributed training
 39 |     """
 40 |     if not dist.is_available():
 41 |         return
 42 |     if not dist.is_initialized():
 43 |         return
 44 |     world_size = dist.get_world_size()
 45 |     if world_size == 1:
 46 |         return
 47 |     dist.barrier()
 48 | 
 49 | def reduce_loss_dict(loss_dict):
 50 |     """
 51 |     Reduce the loss dictionary from all processes so that process with rank
 52 |     0 has the averaged results. Returns a dict with the same fields as
 53 |     loss_dict, after reduction.   (avg)
 54 |     """
 55 |     world_size = get_world_size()
 56 |     if world_size < 2:
 57 |         return loss_dict
 58 |     with torch.no_grad():
 59 |         loss_names = []
 60 |         all_losses = []
 61 |         for k in sorted(loss_dict.keys()):
 62 |             loss_names.append(k)
 63 |             all_losses.append(loss_dict[k])
 64 |         all_losses = torch.stack(all_losses, dim=0)
 65 |         dist.reduce(all_losses, dst=0)
 66 |         if dist.get_rank() == 0:
 67 |             # only main process gets accumulated, so only divide by
 68 |             # world_size in this case
 69 |             all_losses /= world_size
 70 |         reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
 71 |     return reduced_losses
 72 | 
 73 | 
 74 | def all_gather(data):
 75 |     """
 76 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
 77 |     Args:
 78 |         data: any picklable object
 79 |     Returns:
 80 |         list[data]: list of data gathered from each rank
 81 |     """
 82 |     world_size = get_world_size()
 83 |     if world_size == 1:
 84 |         return [data]
 85 | 
 86 |     # serialized to a Tensor
 87 |     buffer = pickle.dumps(data)
 88 |     storage = torch.ByteStorage.from_buffer(buffer)
 89 |     tensor = torch.ByteTensor(storage).to("cuda")
 90 | 
 91 |     # obtain Tensor size of each rank
 92 |     local_size = torch.IntTensor([tensor.numel()]).to("cuda")
 93 |     size_list = [torch.IntTensor([0]).to("cuda") for _ in range(world_size)]
 94 |     dist.all_gather(size_list, local_size)
 95 |     size_list = [int(size.item()) for size in size_list]
 96 |     max_size = max(size_list)
 97 | 
 98 |     # receiving Tensor from all ranks
 99 |     # we pad the tensor because torch all_gather does not support
100 |     # gathering tensors of different shapes
101 |     tensor_list = []
102 |     for _ in size_list:
103 |         tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda"))
104 |     if local_size != max_size:
105 |         padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda")
106 |         tensor = torch.cat((tensor, padding), dim=0)
107 |     dist.all_gather(tensor_list, tensor)
108 | 
109 |     data_list = []
110 |     for size, tensor in zip(size_list, tensor_list):
111 |         buffer = tensor.cpu().numpy().tobytes()[:size]
112 |         data_list.append(pickle.loads(buffer))
113 | 
114 |     return data_list
115 | 
116 | 
117 | 
118 | def my_reduce_dic(dic):
119 |     dics = all_gather(dic)
120 | 
121 |     if is_main_process():
122 |         for key in dic.keys():
123 |             value = 0
124 |             for tdic in dics:
125 |                 value = value + tdic[key]
126 |             dic[key] = value / (len(dics))
127 |     return dic


--------------------------------------------------------------------------------
/deprecated/utils/comm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file is maily copied from https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/utils/comm.py
  3 | This file contains primitives for multi-gpu communication.
  4 | This is useful when doing distributed training.
  5 | """
  6 | 
  7 | 
  8 | import torch
  9 | import torch.distributed as dist
 10 | 
 11 | import pickle
 12 | import time
 13 | 
 14 | 
 15 | def get_world_size():
 16 |     if not dist.is_available():
 17 |         return 1
 18 |     if not dist.is_initialized():
 19 |         return 1
 20 |     return dist.get_world_size()
 21 | 
 22 | 
 23 | def get_rank():
 24 |     if not dist.is_available():
 25 |         return 0
 26 |     if not dist.is_initialized():
 27 |         return 0
 28 |     return dist.get_rank()
 29 | 
 30 | 
 31 | def is_main_process():
 32 |     return get_rank() == 0
 33 | 
 34 | 
 35 | def synchronize():
 36 |     """
 37 |     Helper function to synchronize (barrier) among all processes when
 38 |     using distributed training
 39 |     """
 40 |     if not dist.is_available():
 41 |         return
 42 |     if not dist.is_initialized():
 43 |         return
 44 |     world_size = dist.get_world_size()
 45 |     if world_size == 1:
 46 |         return
 47 |     dist.barrier()
 48 | 
 49 | def reduce_loss_dict(loss_dict):
 50 |     """
 51 |     Reduce the loss dictionary from all processes so that process with rank
 52 |     0 has the averaged results. Returns a dict with the same fields as
 53 |     loss_dict, after reduction.   (avg)
 54 |     """
 55 |     world_size = get_world_size()
 56 |     if world_size < 2:
 57 |         return loss_dict
 58 |     with torch.no_grad():
 59 |         loss_names = []
 60 |         all_losses = []
 61 |         for k in sorted(loss_dict.keys()):
 62 |             loss_names.append(k)
 63 |             all_losses.append(loss_dict[k])
 64 |         all_losses = torch.stack(all_losses, dim=0)
 65 |         dist.reduce(all_losses, dst=0)
 66 |         if dist.get_rank() == 0:
 67 |             # only main process gets accumulated, so only divide by
 68 |             # world_size in this case
 69 |             all_losses /= world_size
 70 |         reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
 71 |     return reduced_losses
 72 | 
 73 | 
 74 | def all_gather(data):
 75 |     """
 76 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
 77 |     Args:
 78 |         data: any picklable object
 79 |     Returns:
 80 |         list[data]: list of data gathered from each rank
 81 |     """
 82 |     world_size = get_world_size()
 83 |     if world_size == 1:
 84 |         return [data]
 85 | 
 86 |     # serialized to a Tensor
 87 |     buffer = pickle.dumps(data)
 88 |     storage = torch.ByteStorage.from_buffer(buffer)
 89 |     tensor = torch.ByteTensor(storage).to("cuda")
 90 | 
 91 |     # obtain Tensor size of each rank
 92 |     local_size = torch.IntTensor([tensor.numel()]).to("cuda")
 93 |     size_list = [torch.IntTensor([0]).to("cuda") for _ in range(world_size)]
 94 |     dist.all_gather(size_list, local_size)
 95 |     size_list = [int(size.item()) for size in size_list]
 96 |     max_size = max(size_list)
 97 | 
 98 |     # receiving Tensor from all ranks
 99 |     # we pad the tensor because torch all_gather does not support
100 |     # gathering tensors of different shapes
101 |     tensor_list = []
102 |     for _ in size_list:
103 |         tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda"))
104 |     if local_size != max_size:
105 |         padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda")
106 |         tensor = torch.cat((tensor, padding), dim=0)
107 |     dist.all_gather(tensor_list, tensor)
108 | 
109 |     data_list = []
110 |     for size, tensor in zip(size_list, tensor_list):
111 |         buffer = tensor.cpu().numpy().tobytes()[:size]
112 |         data_list.append(pickle.loads(buffer))
113 | 
114 |     return data_list
115 | 
116 | 
117 | 
118 | def my_reduce_dic(dic):
119 |     dics = all_gather(dic)
120 | 
121 |     if is_main_process():
122 |         for key in dic.keys():
123 |             value = 0
124 |             for tdic in dics:
125 |                 value = value + tdic[key]
126 |             dic[key] = value / (len(dics))
127 |     return dic


--------------------------------------------------------------------------------
/acnet/acnet_builder.py:
--------------------------------------------------------------------------------
 1 | from builder import ConvBuilder
 2 | from acnet.acb import ACBlock
 3 | import torch.nn as nn
 4 | 
 5 | class ACNetBuilder(ConvBuilder):
 6 | 
 7 |     def __init__(self, base_config, deploy, gamma_init=None):
 8 |         super(ACNetBuilder, self).__init__(base_config=base_config)
 9 |         self.deploy = deploy
10 |         self.use_last_bn = False
11 |         self.gamma_init = gamma_init
12 | 
13 |     def switch_to_deploy(self):
14 |         self.deploy = True
15 | 
16 |     def Conv2d(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', use_original_conv=False):
17 |         if use_original_conv or kernel_size == 1 or kernel_size == (1, 1) or kernel_size >= 7:
18 |             return super(ACNetBuilder, self).Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
19 |                                  padding=padding, dilation=dilation, groups=groups, bias=bias, padding_mode=padding_mode, use_original_conv=True)
20 |         else:
21 |             return ACBlock(in_channels, out_channels, kernel_size=kernel_size, stride=stride,
22 |                        padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, deploy=self.deploy,
23 |                            use_last_bn=self.use_last_bn, gamma_init=self.gamma_init)
24 | 
25 | 
26 |     def Conv2dBN(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', use_original_conv=False):
27 |         if use_original_conv or kernel_size == 1 or kernel_size == (1, 1) or kernel_size >= 7:
28 |             return super(ACNetBuilder, self).Conv2dBN(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
29 |                                  padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, use_original_conv=True)
30 |         else:
31 |             return ACBlock(in_channels, out_channels, kernel_size=kernel_size, stride=stride,
32 |                        padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, deploy=self.deploy,
33 |                            use_last_bn=self.use_last_bn, gamma_init=self.gamma_init)
34 | 
35 | 
36 |     def Conv2dBNReLU(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', use_original_conv=False):
37 |         if use_original_conv or kernel_size == 1 or kernel_size == (1, 1) or kernel_size >= 7:
38 |             return super(ACNetBuilder, self).Conv2dBNReLU(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
39 |                                  padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, use_original_conv=True)
40 |         else:
41 |             se = nn.Sequential()
42 |             se.add_module('acb', ACBlock(in_channels, out_channels, kernel_size=kernel_size, stride=stride,
43 |                        padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, deploy=self.deploy,
44 |                                          use_last_bn=self.use_last_bn, gamma_init=self.gamma_init))
45 |             se.add_module('relu', self.ReLU())
46 |             return se
47 | 
48 | 
49 |     def BNReLUConv2d(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', use_original_conv=False):
50 |         if use_original_conv or kernel_size == 1 or kernel_size == (1, 1) or kernel_size >= 7:
51 |             return super(ACNetBuilder, self).BNReLUConv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
52 |                                  padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, use_original_conv=True)
53 |         bn_layer = self.BatchNorm2d(num_features=in_channels)
54 |         conv_layer = ACBlock(in_channels, out_channels, kernel_size=kernel_size, stride=stride,
55 |                        padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, deploy=self.deploy)
56 |         se = self.Sequential()
57 |         se.add_module('bn', bn_layer)
58 |         se.add_module('relu', self.ReLU())
59 |         se.add_module('acb', conv_layer)
60 |         return se


--------------------------------------------------------------------------------
/utils/checkpoint.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import time
 3 | from collections import OrderedDict
 4 | import os
 5 | 
 6 | def get_last_checkpoint(dir):
 7 |     'iter-200000.pth'
 8 |     target_ckpts = [t for t in os.listdir(dir) if '.pth' in t]
 9 |     if 'latest.pth' in target_ckpts:
10 |         return os.path.join(dir, 'latest.pth')
11 |     target_ckpts.sort(key=lambda x: int(x.replace('iter-', '').replace('.pth', '')))
12 |     ckpt = os.path.join(dir, target_ckpts[-1])
13 |     return ckpt
14 | 
15 | def load_model(model, model_file, logger):
16 |     t_start = time.time()
17 |     if isinstance(model_file, str):
18 |         state_dict = torch.load(model_file, map_location='cpu')
19 |         if 'model' in state_dict.keys():
20 |             state_dict = state_dict['model']
21 |     else:
22 |         state_dict = model_file
23 | 
24 |     state_dict = _align_and_update_loaded_state_dicts(
25 |         model.state_dict(), state_dict)
26 |     t_io_end = time.time()
27 | 
28 |     # if is_restore:
29 |     #     new_state_dict = OrderedDict()
30 |     #     for k, v in state_dict.items():
31 |     #         name = 'module.' + k
32 |     #         new_state_dict[name] = v
33 |     #     state_dict = new_state_dict
34 |     model.load_state_dict(state_dict, strict=False)
35 |     ckpt_keys = set(state_dict.keys())
36 |     own_keys = set(model.state_dict().keys())
37 |     missing_keys = own_keys - ckpt_keys
38 |     unexpected_keys = ckpt_keys - own_keys
39 | 
40 |     if len(missing_keys) > 0 and logger is not None:
41 |         logger.warning('Missing key(s) in state_dict: {}'.format(
42 |             ', '.join('{}'.format(k) for k in missing_keys)))
43 | 
44 |     if len(unexpected_keys) > 0 and logger is not None:
45 |         logger.warning('Unexpected key(s) in state_dict: {}'.format(
46 |             ', '.join('{}'.format(k) for k in unexpected_keys)))
47 | 
48 |     del state_dict
49 |     t_end = time.time()
50 |     if logger is not None:
51 |         logger.info(
52 |             "Load model, Time usage:\n\tIO: {}, "
53 |             "initialize parameters: {}".format(
54 |                 t_io_end - t_start, t_end - t_io_end))
55 | 
56 |     return model
57 | 
58 | 
59 | def _align_and_update_loaded_state_dicts(model_state_dict, loaded_state_dict):
60 |     """
61 |     Strategy: suppose that the models that we will create will have
62 |     prefixes appended to each of its keys, for example due to an extra
63 |     level of nesting that the original pre-trained weights from ImageNet
64 |     won't contain. For example, model.state_dict() might return
65 |     backbone[0].body.res2.conv1.weight, while the pre-trained model contains
66 |     res2.conv1.weight. We thus want to match both parameters together.
67 |     For that, we look for each model weight, look among all loaded keys
68 |     if there is one that is a suffix of the current weight name,
69 |     and use it if that's the case. If multiple matches exist,
70 |     take the one with longest size of the corresponding name. For example,
71 |     for the same model as before, the pretrained weight file can contain
72 |     both res2.conv1.weight, as well as conv1.weight. In this case,
73 |     we want to match backbone[0].body.conv1.weight to conv1.weight, and
74 |     backbone[0].body.res2.conv1.weight to res2.conv1.weight.
75 |     """
76 |     current_keys = sorted(list(model_state_dict.keys()))
77 |     loaded_keys = sorted(list(loaded_state_dict.keys()))
78 |     aligned_loaded_state_dict = loaded_state_dict.copy()
79 | 
80 |     # get a matrix of string matches, where each (i, j) entry
81 |     # correspond to the size of the loaded_key string, if it matches
82 |     match_matrix = [
83 |         len(j) if i.endswith(j) else 0 for i in current_keys for j in
84 |         loaded_keys]
85 |     match_matrix = torch.as_tensor(match_matrix).view(
86 |         len(current_keys), len(loaded_keys))
87 |     max_match_size, idxs = match_matrix.max(1)
88 |     idxs[max_match_size == 0] = -1
89 | 
90 |     for idx_new, idx_old in enumerate(idxs.tolist()):
91 |         if idx_old == -1:
92 |             continue
93 |         key = current_keys[idx_new]
94 |         key_old = loaded_keys[idx_old]
95 |         aligned_loaded_state_dict[key] = \
96 |             aligned_loaded_state_dict.pop(key_old)
97 |     del loaded_state_dict
98 |     return aligned_loaded_state_dict
99 | 


--------------------------------------------------------------------------------
/base_model/wrn.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from builder import ConvBuilder
 3 | 
 4 | class WRNCifarBlock(nn.Module):
 5 | 
 6 |     def __init__(self, input_channels, block_channels, stride, projection_shortcut, use_dropout, builder:ConvBuilder):
 7 |         super(WRNCifarBlock, self).__init__()
 8 |         assert len(block_channels) == 2
 9 | 
10 |         if projection_shortcut:
11 |             self.proj = builder.BNReLUConv2d(in_channels=input_channels, out_channels=block_channels[1], kernel_size=1, stride=stride, padding=0)
12 |         else:
13 |             self.proj = builder.ResIdentity(num_channels=block_channels[1])
14 | 
15 |         self.conv1 = builder.BNReLUConv2d(in_channels=input_channels, out_channels=block_channels[0], kernel_size=3,
16 |                                           stride=stride, padding=1)
17 |         if use_dropout:
18 |             self.dropout = builder.Dropout(keep_prob=0.7)
19 |             print('use dropout for WRN')
20 |         else:
21 |             self.dropout = builder.Identity()
22 |         self.conv2 = builder.BNReLUConv2d(in_channels=block_channels[0], out_channels=block_channels[1], kernel_size=3,
23 |                                           stride=1, padding=1)
24 | 
25 |     def forward(self, input):
26 |         x = self.conv1(input)
27 |         x = self.dropout(x)
28 |         x = self.conv2(x)
29 |         x += self.proj(input)
30 |         return x
31 | 
32 | class WRNCifarNet(nn.Module):
33 | 
34 |     def __init__(self, block_counts, num_classes, builder:ConvBuilder, use_dropout):
35 |         super(WRNCifarNet, self).__init__()
36 |         self.bd = builder
37 |         assert block_counts == (2,2,2)
38 |         converted_deps = [16, [[128, 128], [128, 128]], [[256, 256], [256, 256]], [[512, 512], [512, 512]]]
39 |         print('the converted deps is ', converted_deps)
40 | 
41 |         self.conv1 = builder.Conv2d(in_channels=3, out_channels=converted_deps[0], kernel_size=3, stride=1, padding=1, bias=False)
42 |         self.stage1 = self._build_wrn_stage(num_blocks=block_counts[0], stage_input_channels=converted_deps[0],
43 |                                             stage_deps=converted_deps[1], downsample=False, use_dropout=use_dropout)
44 |         self.stage2 = self._build_wrn_stage(num_blocks=block_counts[1], stage_input_channels=converted_deps[1][-1][1],
45 |                                             stage_deps=converted_deps[2], downsample=True, use_dropout=use_dropout)
46 |         self.stage3 = self._build_wrn_stage(num_blocks=block_counts[2], stage_input_channels=converted_deps[2][-1][1],
47 |                                             stage_deps=converted_deps[3], downsample=True, use_dropout=use_dropout)
48 |         self.last_bn = builder.BatchNorm2d(num_features=converted_deps[3][-1][1])
49 |         self.linear = builder.Linear(in_features=converted_deps[3][-1][1], out_features=num_classes)
50 | 
51 | 
52 |     def _build_wrn_stage(self, num_blocks, stage_input_channels, stage_deps, downsample, use_dropout):
53 |         se = self.bd.Sequential()
54 |         for i in range(num_blocks):
55 |             if i == 0:
56 |                 block_input_channels = stage_input_channels
57 |             else:
58 |                 block_input_channels = stage_deps[i - 1][1]
59 |             if i == 0 and downsample:
60 |                 stride = 2
61 |             else:
62 |                 stride = 1
63 |             se.add_module(name='block{}'.format(i+1),
64 |                           module=WRNCifarBlock(input_channels=block_input_channels, block_channels=stage_deps[i],
65 |                                                stride=stride, projection_shortcut=(i==0), use_dropout=use_dropout, builder=self.bd))
66 |         return se
67 | 
68 |     def forward(self, x):
69 |         out = self.conv1(x)
70 |         out = self.stage1(out)
71 |         out = self.stage2(out)
72 |         out = self.stage3(out)
73 |         out = self.last_bn(out)
74 |         out = self.bd.avg_pool2d(in_features=out, kernel_size=8, stride=1, padding=0)
75 |         out = self.bd.flatten(out)
76 |         out = self.linear(out)
77 |         return out
78 | 
79 | 
80 | 
81 | def create_wrnc16plain(cfg, builder):
82 |     return WRNCifarNet(block_counts=(2,2,2), num_classes=10, builder=builder, use_dropout=False)
83 | def create_wrnc16drop(cfg, builder):
84 |     return WRNCifarNet(block_counts=(2,2,2), num_classes=10, builder=builder, use_dropout=True)


--------------------------------------------------------------------------------
/deprecated/dataset.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torchvision import datasets, transforms
 3 | 
 4 | MNIST_PATH = '/home/dingxiaohan/datasets/torch_mnist/'
 5 | CIFAR10_PATH = '/home/dingxiaohan/datasets/cifar-10-batches-py/'
 6 | CH_PATH = '/home/dingxiaohan/datasets/torch_ch/'
 7 | SVHN_PATH = '/home/dingxiaohan/datasets/torch_svhn/'
 8 | 
 9 | 
10 | class InfiniteDataLoader(torch.utils.data.DataLoader):
11 |     def __init__(self, *args, **kwargs):
12 |         super().__init__(*args, **kwargs)
13 |         # Initialize an iterator over the dataset.
14 |         self.dataset_iterator = super().__iter__()
15 | 
16 |     def __iter__(self):
17 |         return self
18 | 
19 |     def __next__(self):
20 |         try:
21 |             batch = next(self.dataset_iterator)
22 |         except StopIteration:
23 |             # Dataset exhausted, use a new fresh iterator.
24 |             self.dataset_iterator = super().__iter__()
25 |             batch = next(self.dataset_iterator)
26 |         return batch
27 | 
28 | 
29 | def create_dataset(dataset_name, subset, batch_size):
30 |     assert dataset_name in ['imagenet', 'cifar10', 'ch', 'svhn', 'mnist']
31 |     assert subset in ['train', 'val']
32 |     if dataset_name == 'imagenet':
33 |         raise ValueError('TODO')
34 | 
35 |     #   copied from https://github.com/pytorch/examples/blob/master/mnist/main.py
36 |     elif dataset_name == 'mnist':
37 |         if subset == 'train':
38 |             return InfiniteDataLoader(datasets.MNIST(MNIST_PATH, train=True, download=True,
39 |                                transform=transforms.Compose([
40 |                                    transforms.ToTensor(),
41 |                                    transforms.Normalize((0.1307,), (0.3081,))])), batch_size=batch_size, shuffle=True)
42 |         else:
43 |             return InfiniteDataLoader(datasets.MNIST(MNIST_PATH, train=False, transform=transforms.Compose([
44 |                 transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])),
45 |                 batch_size=batch_size, shuffle=False)
46 | 
47 | 
48 | 
49 |     elif dataset_name == 'cifar10':
50 |         if subset == 'train':
51 |             return InfiniteDataLoader(datasets.CIFAR10(CIFAR10_PATH, train=True, download=False,
52 |                                transform=transforms.Compose([
53 |                                    transforms.Pad(padding=(4, 4, 4, 4)),
54 |                                    transforms.RandomCrop(32),
55 |                                    transforms.RandomHorizontalFlip(),
56 |                                    transforms.ToTensor(),
57 |                                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])),
58 |                                 batch_size=batch_size, shuffle=True)
59 |         else:
60 |             return InfiniteDataLoader(datasets.CIFAR10(CIFAR10_PATH, train=False,
61 |                                 transform=transforms.Compose([
62 |                                     transforms.ToTensor(),
63 |                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])),
64 |                                 batch_size=batch_size, shuffle=False)
65 | 
66 |     elif dataset_name == 'ch':
67 |         if subset == 'train':
68 |             return InfiniteDataLoader(datasets.CIFAR100(CH_PATH, train=True, download=True,
69 |                                transform=transforms.Compose([
70 |                                    transforms.Pad(padding=(4, 4, 4, 4)),
71 |                                    transforms.RandomCrop(32),
72 |                                    transforms.RandomHorizontalFlip(),
73 |                                    transforms.ToTensor(),
74 |                                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])),
75 |                                 batch_size=batch_size, shuffle=True)
76 |         else:
77 |             return InfiniteDataLoader(datasets.CIFAR100(CH_PATH, train=False,
78 |                                 transform=transforms.Compose([
79 |                                     transforms.ToTensor(),
80 |                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])),
81 |                                 batch_size=batch_size, shuffle=False)
82 | 
83 |     else:
84 |         assert False
85 | 
86 | 
87 | def num_train_examples_per_epoch(dataset_name):
88 |     if dataset_name == 'imagenet':
89 |         return 1281167
90 |     elif dataset_name == 'mnist':
91 |         return 60000
92 |     elif dataset_name in ['cifar10', 'ch']:
93 |         return 50000
94 |     else:
95 |         assert False


--------------------------------------------------------------------------------
/show_log.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import re
  3 | import numpy as np
  4 | import sys
  5 | 
  6 | root_dirs = ['ria_exps']
  7 | num_logs = 5
  8 | if len(sys.argv) > 1:
  9 |     root_dirs = [sys.argv[1]]
 10 | if len(sys.argv) > 2:
 11 |     excluded = sys.argv[2:]
 12 | else:
 13 |     excluded = None
 14 | 
 15 | 
 16 | 
 17 | top1_pattern = re.compile('top1=(\-*\d+(?:\.\d+)?)')
 18 | top5_pattern = re.compile('top5=(\-*\d+(?:\.\d+)?)')
 19 | loss_pattern = re.compile('loss=(\-*\d+(?:\.\d+)?)')
 20 | 
 21 | speed_pattern = re.compile(',(\-*\d+(?:\.\d+)?)example/s')
 22 | 
 23 | 
 24 | 
 25 | def get_value_by_pattern(pattern, line):
 26 |     return float(re.findall(pattern, line)[0])
 27 | 
 28 | def parse_top1_top5_loss_from_log_line(log_line):
 29 |     top1 = get_value_by_pattern(top1_pattern, log_line)
 30 |     top5 = get_value_by_pattern(top5_pattern, log_line)
 31 |     loss = get_value_by_pattern(loss_pattern, log_line)
 32 |     return top1, top5, loss
 33 | 
 34 | 
 35 | 
 36 | 
 37 | log_files = []
 38 | for root_dir in root_dirs:
 39 |     fs = glob.glob('{}/*/log.txt'.format(root_dir))
 40 |     log_files += fs
 41 | 
 42 | for file_path in log_files:
 43 |     if 'lrsRZ' in file_path:
 44 |         continue
 45 |     skip = False
 46 |     if excluded is not None:
 47 |         for ex in excluded:
 48 |             if ex in file_path:
 49 |                 skip = True
 50 |                 break
 51 |     if skip:
 52 |         continue
 53 |     top1_list = []
 54 |     top5_list = []
 55 |     loss_list = []
 56 |     baseline_speed = 0
 57 |     exp_speed = 0
 58 |     with open(file_path, 'r') as f:
 59 |         origin_lines = f.readlines()
 60 |         for l in origin_lines:
 61 |             if 'baseline speed' in l:
 62 |                 baseline_speed = get_value_by_pattern(speed_pattern, l)
 63 |             elif 'bbf speed' in l or 'exp speed' in l or 'ent speed' in l:
 64 |                 exp_speed = get_value_by_pattern(speed_pattern, l)
 65 |                 break
 66 | 
 67 |         log_lines = [l for l in origin_lines if 'top1' in l and 'top5' in l and 'loss' in l and 'beginning' not in l]
 68 |         avg_loss = '----'
 69 |         params = '----'
 70 |         train_speed = '----'
 71 |         deploy_speed = '----'
 72 |         for l in origin_lines[-5:]:
 73 |             if 'TRAIN LOSS collected over last' in l:
 74 |                 avg_loss = l.strip()[-8:]
 75 |             if 'num of params in hdf5' in l:
 76 |                 params = l.strip().split('=')[1]
 77 |             if 'TRAIN speed' in l:
 78 |                 train_speed = float(l.strip().split('=')[-1])
 79 |                 train_speed = '{:.2f}'.format(train_speed)
 80 |             if 'DEPLOY TEST' in l:
 81 |                 ll = l.strip().split(' ')
 82 |                 examples = int(ll[4])
 83 |                 secs = float(ll[6])
 84 |                 deploy_speed = examples / secs
 85 |                 deploy_speed = '{:.2f}'.format(deploy_speed)
 86 |         last_lines = log_lines[-num_logs:]
 87 |     for l in last_lines:
 88 |         if 'top1' not in l or 'loss' not in l or 'top5' not in l:
 89 |             continue
 90 |         top1, top5, loss = parse_top1_top5_loss_from_log_line(l)
 91 |         top1_list.append(top1)
 92 |         top5_list.append(top5)
 93 |         loss_list.append(loss)
 94 |     if len(top1_list) < num_logs:
 95 |         continue
 96 |     # network_try_arg = file_path.split('/')[1].replace('_train', '')
 97 |     network_try_arg = file_path.replace('_train/log.txt', '')
 98 |     last_validation = last_lines[-1]
 99 |     last_epoch_pattern = re.compile('epoch (\d+)')
100 | 
101 |     last_epoch = int(last_epoch_pattern.findall(last_validation)[0])
102 | 
103 |     if exp_speed > 0:
104 |         speedup = exp_speed / baseline_speed
105 |     else:
106 |         speedup = 0
107 | 
108 |     thresh = ''
109 |     flops_r = ''
110 |     for ol in origin_lines[-70:-1]:
111 |         # print(ol)
112 |         if 'thres 1e-05' in ol:
113 |             thresh = '1e-5'
114 |         elif 'thres 1e-06' in ol:
115 |             thresh = '1e-6'
116 |         if 'FLOPs' in ol:
117 |             flops_r = ol[ol.index('FLOPs'):].strip()
118 | 
119 |     msg = '{} \t maxtop1={:.3f}, spdup={:.3f}, mean={:.3f}, loss={:.5f}, {} logs, tr_loss={}, para={}, ts={}, ds={}, last={}'.format(network_try_arg,
120 |             np.max(top1_list), speedup, np.mean(top1_list), np.mean(loss_list),
121 |              len(top1_list), avg_loss, params, train_speed, deploy_speed, last_epoch)
122 |     if len(flops_r) > 0:
123 |         msg += '  ' + thresh + ':' + flops_r
124 |     print(msg)
125 | 


--------------------------------------------------------------------------------
/base_config.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | from model_map import get_dataset_name_by_model_name
 3 | import numpy as np
 4 | 
 5 | BaseConfigByEpoch = namedtuple('BaseConfigByEpoch', ['network_type', 'dataset_name', 'dataset_subset', 'global_batch_size', 'num_node', 'device',
 6 |                                        'weight_decay', 'weight_decay_bias', 'optimizer_type', 'momentum',
 7 |                                        'bias_lr_factor', 'max_epochs', 'base_lr', 'lr_epoch_boundaries', 'lr_decay_factor', 'linear_final_lr', 'cosine_minimum',
 8 |                                        'warmup_epochs', 'warmup_method', 'warmup_factor',
 9 |                                        'ckpt_iter_period', 'tb_iter_period',
10 |                                        'output_dir',  'tb_dir',
11 |                                        'init_weights', 'save_weights',
12 |                                        'val_epoch_period', 'grad_accum_iters',
13 |                                                      'deps',
14 |                                                      'se_reduce_scale', 'se_layers'])
15 | 
16 | 
17 | def get_baseconfig_by_epoch(network_type, dataset_name, dataset_subset, global_batch_size, num_node,
18 |                     weight_decay, optimizer_type, momentum,
19 |                     max_epochs, base_lr, lr_epoch_boundaries, lr_decay_factor, linear_final_lr, cosine_minimum,
20 |                     warmup_epochs, warmup_method, warmup_factor,
21 |                     ckpt_iter_period, tb_iter_period,
22 |                     output_dir, tb_dir, save_weights,
23 |                     device='cuda', weight_decay_bias=0, bias_lr_factor=2, init_weights=None, val_epoch_period=-1, grad_accum_iters=1,
24 |                             deps=None,
25 |                             se_reduce_scale=0, se_layers=None):
26 |     print('----------------- show lr schedule --------------')
27 |     print('base_lr:', base_lr)
28 |     print('max_epochs:', max_epochs)
29 |     print('lr_epochs:', lr_epoch_boundaries)
30 |     print('lr_decay:', lr_decay_factor)
31 |     print('linear_final_lr:', linear_final_lr)
32 |     print('-------------------------------------------------')
33 | 
34 |     if deps is not None:
35 |         deps = np.array(deps, dtype=np.int)
36 | 
37 |     return BaseConfigByEpoch(network_type=network_type,dataset_name=dataset_name,dataset_subset=dataset_subset,global_batch_size=global_batch_size,num_node=num_node, device=device,
38 |                       weight_decay=weight_decay,weight_decay_bias=weight_decay_bias,optimizer_type=optimizer_type,momentum=momentum,bias_lr_factor=bias_lr_factor,
39 |                       max_epochs=max_epochs, base_lr=base_lr, lr_epoch_boundaries=lr_epoch_boundaries,lr_decay_factor=lr_decay_factor, linear_final_lr=linear_final_lr, cosine_minimum=cosine_minimum,
40 |                              warmup_epochs=warmup_epochs,warmup_method=warmup_method,warmup_factor=warmup_factor,
41 |                       ckpt_iter_period=int(ckpt_iter_period),tb_iter_period=int(tb_iter_period),
42 |                       output_dir=output_dir, tb_dir=tb_dir,
43 |                       init_weights=init_weights, save_weights=save_weights,
44 |                              val_epoch_period=val_epoch_period, grad_accum_iters=grad_accum_iters, deps=deps, se_reduce_scale=se_reduce_scale,
45 |                              se_layers=se_layers)
46 | 
47 | def get_baseconfig_for_test(network_type, dataset_subset, global_batch_size, init_weights=None, device='cuda', deps=None,
48 |                             se_reduce_scale=0, se_layers=None, dataset_name=None):
49 |     if dataset_name is None:
50 |         dataset_name = get_dataset_name_by_model_name(network_type)
51 |     return BaseConfigByEpoch(network_type=network_type, dataset_name=dataset_name,
52 |                              dataset_subset=dataset_subset, global_batch_size=global_batch_size, num_node=1, device=device,
53 |                              weight_decay=None, weight_decay_bias=None, optimizer_type=None, momentum=None, bias_lr_factor=None,
54 |                              max_epochs=None, base_lr=None, lr_epoch_boundaries=None, lr_decay_factor=None, linear_final_lr=None, cosine_minimum=None,
55 |                              warmup_epochs=None, warmup_method=None, warmup_factor=None, ckpt_iter_period=None,
56 |                              tb_iter_period=None, output_dir=None, tb_dir=None, init_weights=init_weights,
57 |                              save_weights=None, val_epoch_period=None, grad_accum_iters=None, deps=deps,
58 |                              se_reduce_scale=se_reduce_scale, se_layers=se_layers)


--------------------------------------------------------------------------------
/data/data_factory.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torchvision import datasets, transforms
  3 | import numpy as np
  4 | from data.dataset_util import InfiniteDataLoader
  5 | 
  6 | CIFAR10_PATH = 'cifar10_data'
  7 | MNIST_PATH = 'mnist_data'
  8 | 
  9 | 
 10 | def load_cuda_data(data_loader, dataset_name):
 11 |     if dataset_name == 'imagenet_standard':
 12 |         data, label = next(data_loader.dataprovider)
 13 |         data = data.cuda()
 14 |         label = label.cuda()
 15 |     elif dataset_name == 'imagenet_blank':
 16 |         data_dict = next(data_loader)
 17 |         data = data_dict['data']
 18 |         label = data_dict['label']
 19 |     else:
 20 |         data, label = next(data_loader)
 21 |         data = data.cuda()
 22 |         label = label.cuda()
 23 |     return data, label
 24 | 
 25 | class ImageNetBlankGenerator(object):
 26 | 
 27 |     def __init__(self, batch_size, img_size):
 28 |         assert type(img_size) is int
 29 | 
 30 |         self.blank_img = np.ones((batch_size, 3, img_size, img_size), dtype=np.float)
 31 |         self.blank_label = np.ones(batch_size, dtype=np.int) * 42
 32 |         self.return_dict = {'data': torch.from_numpy(self.blank_img).type(torch.FloatTensor).cuda(),
 33 |                             'label': torch.from_numpy(self.blank_label).type(torch.long).cuda()}
 34 | 
 35 |     def __next__(self):
 36 |         return self.return_dict
 37 | 
 38 | def create_dataset(dataset_name, subset, global_batch_size, distributed):
 39 |     assert dataset_name in ['cifar10','imagenet_blank',
 40 |                             'imagenet_standard', 'mnist']
 41 |     assert subset in ['train', 'val']
 42 | 
 43 |     if dataset_name == 'imagenet_standard':
 44 |         from data.imagenet_data import ImgnetStdTrainData, ImgnetStdValData
 45 |         if subset == 'train':
 46 |             print('imgnet standard train data')
 47 |             return ImgnetStdTrainData(distributed=distributed,
 48 |                                       batch_size_per_gpu=global_batch_size // torch.cuda.device_count())
 49 |         else:
 50 |             print('imgnet standard val data')
 51 |             return ImgnetStdValData(batch_size=global_batch_size)
 52 | 
 53 |     elif dataset_name == 'imagenet_blank':
 54 |         assert not distributed
 55 |         return ImageNetBlankGenerator(batch_size=global_batch_size, img_size=224)
 56 | 
 57 |     elif dataset_name == 'mnist':
 58 |         assert not distributed
 59 |         if subset == 'train':
 60 |             return InfiniteDataLoader(datasets.MNIST(MNIST_PATH, train=True, download=True,
 61 |                                transform=transforms.Compose([
 62 |                                    transforms.ToTensor(),
 63 |                                    transforms.Normalize((0.1307,), (0.3081,))])),
 64 |                                       batch_size=global_batch_size, shuffle=True)
 65 |         else:
 66 |             return InfiniteDataLoader(datasets.MNIST(MNIST_PATH, train=False, transform=transforms.Compose([
 67 |                 transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])),
 68 |                 batch_size=global_batch_size, shuffle=False)
 69 | 
 70 |     elif dataset_name == 'cifar10':
 71 |         assert not distributed
 72 |         if subset == 'train':
 73 |             return InfiniteDataLoader(datasets.CIFAR10(CIFAR10_PATH, train=True, download=False,
 74 |                                transform=transforms.Compose([
 75 |                                    transforms.Pad(padding=(4, 4, 4, 4)),
 76 |                                    transforms.RandomCrop(32),
 77 |                                    transforms.RandomHorizontalFlip(),
 78 |                                    transforms.ToTensor(),
 79 |                                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])),
 80 |                                 batch_size=global_batch_size, shuffle=True)
 81 |         else:
 82 |             return InfiniteDataLoader(datasets.CIFAR10(CIFAR10_PATH, train=False,
 83 |                                 transform=transforms.Compose([
 84 |                                     transforms.ToTensor(),
 85 |                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])),
 86 |                                 batch_size=global_batch_size, shuffle=False)
 87 | 
 88 |     else:
 89 |         raise ValueError('??')
 90 | 
 91 | 
 92 | def num_train_examples_per_epoch(dataset_name):
 93 |     if 'imagenet' in dataset_name:
 94 |         return 1281167
 95 |     elif dataset_name in ['cifar10', 'ch']:
 96 |         return 50000
 97 |     elif dataset_name == 'mnist':
 98 |         return 60000
 99 |     else:
100 |         assert False
101 | 
102 | def num_iters_per_epoch(cfg):
103 |     return num_train_examples_per_epoch(cfg.dataset_name) // cfg.global_batch_size
104 | 
105 | def num_val_examples(dataset_name):
106 |     if 'imagenet' in dataset_name:
107 |         return 50000
108 |     elif dataset_name in ['cifar10', 'ch', 'mnist']:
109 |         return 10000
110 |     else:
111 |         assert False


--------------------------------------------------------------------------------
/deprecated/base_model/wrn.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from builder import ConvBuilder
  3 | from constants import wrn_convert_flattened_deps
  4 | 
  5 | class WRNCifarBlock(nn.Module):
  6 | 
  7 |     def __init__(self, input_channels, block_channels, stride, projection_shortcut, use_dropout, builder:ConvBuilder):
  8 |         super(WRNCifarBlock, self).__init__()
  9 |         assert len(block_channels) == 2
 10 | 
 11 |         if projection_shortcut:
 12 |             self.proj = builder.BNReLUConv2d(in_channels=input_channels, out_channels=block_channels[1], kernel_size=1, stride=stride, padding=0)
 13 |         else:
 14 |             self.proj = builder.ResIdentity(num_channels=block_channels[1])
 15 | 
 16 |         self.conv1 = builder.BNReLUConv2d(in_channels=input_channels, out_channels=block_channels[0], kernel_size=3,
 17 |                                           stride=stride, padding=1)
 18 |         if use_dropout:
 19 |             self.dropout = builder.Dropout(keep_prob=0.7)
 20 |             print('use dropout for WRN')
 21 |         else:
 22 |             self.dropout = builder.Identity()
 23 |         self.conv2 = builder.BNReLUConv2d(in_channels=block_channels[0], out_channels=block_channels[1], kernel_size=3,
 24 |                                           stride=1, padding=1)
 25 | 
 26 |     def forward(self, input):
 27 |         x = self.conv1(input)
 28 |         x = self.dropout(x)
 29 |         x = self.conv2(x)
 30 |         x += self.proj(input)
 31 |         return x
 32 | 
 33 | class WRNCifarNet(nn.Module):
 34 | 
 35 |     def __init__(self, block_counts, num_classes, builder:ConvBuilder, deps, use_dropout):
 36 |         super(WRNCifarNet, self).__init__()
 37 |         self.bd = builder
 38 |         converted_deps = wrn_convert_flattened_deps(deps)
 39 |         print('the converted deps is ', converted_deps)
 40 | 
 41 |         self.conv1 = builder.Conv2d(in_channels=3, out_channels=converted_deps[0], kernel_size=3, stride=1, padding=1, bias=False)
 42 |         self.stage1 = self._build_wrn_stage(num_blocks=block_counts[0], stage_input_channels=converted_deps[0],
 43 |                                             stage_deps=converted_deps[1], downsample=False, use_dropout=use_dropout)
 44 |         self.stage2 = self._build_wrn_stage(num_blocks=block_counts[1], stage_input_channels=converted_deps[1][-1][1],
 45 |                                             stage_deps=converted_deps[2], downsample=True, use_dropout=use_dropout)
 46 |         self.stage3 = self._build_wrn_stage(num_blocks=block_counts[2], stage_input_channels=converted_deps[2][-1][1],
 47 |                                             stage_deps=converted_deps[3], downsample=True, use_dropout=use_dropout)
 48 |         self.last_bn = builder.BatchNorm2d(num_features=converted_deps[3][-1][1])
 49 |         self.linear = builder.Linear(in_features=converted_deps[3][-1][1], out_features=num_classes)
 50 | 
 51 | 
 52 |     def _build_wrn_stage(self, num_blocks, stage_input_channels, stage_deps, downsample, use_dropout):
 53 |         se = self.bd.Sequential()
 54 |         for i in range(num_blocks):
 55 |             if i == 0:
 56 |                 block_input_channels = stage_input_channels
 57 |             else:
 58 |                 block_input_channels = stage_deps[i - 1][1]
 59 |             if i == 0 and downsample:
 60 |                 stride = 2
 61 |             else:
 62 |                 stride = 1
 63 |             se.add_module(name='block{}'.format(i+1),
 64 |                           module=WRNCifarBlock(input_channels=block_input_channels, block_channels=stage_deps[i],
 65 |                                                stride=stride, projection_shortcut=(i==0), use_dropout=use_dropout, builder=self.bd))
 66 |         return se
 67 | 
 68 |     def forward(self, x):
 69 |         out = self.conv1(x)
 70 |         out = self.stage1(out)
 71 |         out = self.stage2(out)
 72 |         out = self.stage3(out)
 73 |         out = self.last_bn(out)
 74 |         out = self.bd.avg_pool2d(in_features=out, kernel_size=8, stride=1, padding=0)
 75 |         out = self.bd.flatten(out)
 76 |         out = self.linear(out)
 77 |         return out
 78 | 
 79 | 
 80 | 
 81 | def create_wrnc16plain(cfg, builder):
 82 |     return WRNCifarNet(block_counts=(2,2,2), num_classes=10, builder=builder, deps=cfg.deps, use_dropout=False)
 83 | def create_wrnc16drop(cfg, builder):
 84 |     return WRNCifarNet(block_counts=(2,2,2), num_classes=10, builder=builder, deps=cfg.deps, use_dropout=True)
 85 | def create_wrnc28plain(cfg, builder):
 86 |     return WRNCifarNet(block_counts=(4,4,4), num_classes=10, builder=builder, deps=cfg.deps, use_dropout=False)
 87 | def create_wrnc28drop(cfg, builder):
 88 |     return WRNCifarNet(block_counts=(4,4,4), num_classes=10, builder=builder, deps=cfg.deps, use_dropout=True)
 89 | def create_wrnc40plain(cfg, builder):
 90 |     return WRNCifarNet(block_counts=(6,6,6), num_classes=10, builder=builder, deps=cfg.deps, use_dropout=False)
 91 | def create_wrnc40drop(cfg, builder):
 92 |     return WRNCifarNet(block_counts=(6,6,6), num_classes=10, builder=builder, deps=cfg.deps, use_dropout=True)
 93 | 
 94 | def create_wrnh16plain(cfg, builder):
 95 |     return WRNCifarNet(block_counts=(2,2,2), num_classes=100, builder=builder, deps=cfg.deps, use_dropout=False)
 96 | def create_wrnh16drop(cfg, builder):
 97 |     return WRNCifarNet(block_counts=(2,2,2), num_classes=100, builder=builder, deps=cfg.deps, use_dropout=True)
 98 | def create_wrnh28plain(cfg, builder):
 99 |     return WRNCifarNet(block_counts=(4,4,4), num_classes=100, builder=builder, deps=cfg.deps, use_dropout=False)
100 | def create_wrnh28drop(cfg, builder):
101 |     return WRNCifarNet(block_counts=(4,4,4), num_classes=100, builder=builder, deps=cfg.deps, use_dropout=True)
102 | def create_wrnh40plain(cfg, builder):
103 |     return WRNCifarNet(block_counts=(6,6,6), num_classes=100, builder=builder, deps=cfg.deps, use_dropout=False)
104 | def create_wrnh40drop(cfg, builder):
105 |     return WRNCifarNet(block_counts=(6,6,6), num_classes=100, builder=builder, deps=cfg.deps, use_dropout=True)


--------------------------------------------------------------------------------
/deprecated/builder.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | from custom_layers.flatten_layer import FlattenLayer
  4 | from custom_layers.se_block import SEBlock
  5 | 
  6 | class ConvBuilder(nn.Module):
  7 | 
  8 |     def __init__(self, base_config):
  9 |         super(ConvBuilder, self).__init__()
 10 |         print('ConvBuilder initialized.')
 11 |         self.BN_eps = 1e-5
 12 |         self.BN_momentum = 0.1
 13 |         self.BN_affine = True
 14 |         self.BN_track_running_stats = True
 15 |         self.base_config = base_config
 16 | 
 17 |     def set_BN_config(self, eps, momentum, affine, track_running_stats):
 18 |         self.BN_eps = eps
 19 |         self.BN_momentum = momentum
 20 |         self.BN_afine = affine
 21 |         self.BN_track_running_stats = track_running_stats
 22 | 
 23 | 
 24 |     def Conv2d(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', use_original_conv=False):
 25 |         return nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
 26 |                          stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, padding_mode=padding_mode)
 27 | 
 28 |     # The running estimates are kept with a default momentum of 0.1.
 29 |     # By default, the elements of \gammaγ are sampled from \mathcal{U}(0, 1)U(0,1) and the elements of \betaβ are set to 0.
 30 |     # If track_running_stats is set to False, this layer then does not keep running estimates, and batch statistics are instead used during evaluation time as well.
 31 |     def BatchNorm2d(self, num_features, eps=None, momentum=None, affine=None, track_running_stats=None):
 32 |         if eps is None:
 33 |             eps = self.BN_eps
 34 |         if momentum is None:
 35 |             momentum = self.BN_momentum
 36 |         if affine is None:
 37 |             affine = self.BN_affine
 38 |         if track_running_stats is None:
 39 |             track_running_stats = self.BN_track_running_stats
 40 |         return nn.BatchNorm2d(num_features=num_features, eps=eps, momentum=momentum, affine=affine, track_running_stats=track_running_stats)
 41 | 
 42 |     def Sequential(self, *args):
 43 |         return nn.Sequential(*args)
 44 | 
 45 |     def ReLU(self):
 46 |         return nn.ReLU()
 47 | 
 48 |     def Conv2dBN(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', use_original_conv=False):
 49 |         conv_layer = self.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
 50 |                          stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False, padding_mode=padding_mode, use_original_conv=use_original_conv)
 51 |         bn_layer = self.BatchNorm2d(num_features=out_channels)
 52 |         se = self.Sequential()
 53 |         se.add_module('conv', conv_layer)
 54 |         se.add_module('bn', bn_layer)
 55 |         if self.base_config is not None and self.base_config.se_reduce_scale is not None and self.base_config.se_reduce_scale > 0:
 56 |             se.add_module('se', SEBlock(input_channels=out_channels, internal_neurons=out_channels // self.base_config.se_reduce_scale))
 57 |         return se
 58 | 
 59 |     def Conv2dBNReLU(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', use_original_conv=False):
 60 |         conv = self.Conv2dBN(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
 61 |                                  padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, use_original_conv=use_original_conv)
 62 |         conv.add_module('relu', self.ReLU())
 63 |         return conv
 64 | 
 65 |     def BNReLUConv2d(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', use_original_conv=False):
 66 |         bn_layer = self.BatchNorm2d(num_features=in_channels)
 67 |         conv_layer = self.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
 68 |                          stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False, padding_mode=padding_mode)
 69 |         se = self.Sequential()
 70 |         se.add_module('bn', bn_layer)
 71 |         se.add_module('relu', self.ReLU())
 72 |         se.add_module('conv', conv_layer)
 73 |         return se
 74 | 
 75 |     def Linear(self, in_features, out_features, bias=True):
 76 |         return nn.Linear(in_features=in_features, out_features=out_features, bias=bias)
 77 | 
 78 |     def Identity(self):
 79 |         return nn.Identity()
 80 | 
 81 |     def ResIdentity(self, num_channels):
 82 |         return nn.Identity()
 83 | 
 84 | 
 85 |     def Dropout(self, keep_prob):
 86 |         return nn.Dropout(p=1-keep_prob)
 87 | 
 88 |     def Maxpool2d(self, kernel_size, stride=None):
 89 |         return nn.MaxPool2d(kernel_size=kernel_size, stride=stride)
 90 | 
 91 |     def Avgpool2d(self, kernel_size, stride=None):
 92 |         return nn.AvgPool2d(kernel_size=kernel_size, stride=stride)
 93 | 
 94 |     def Flatten(self):
 95 |         return FlattenLayer()
 96 | 
 97 |     def GAP(self, kernel_size):
 98 |         gap = nn.Sequential()
 99 |         gap.add_module('avg', nn.AvgPool2d(kernel_size=kernel_size, stride=kernel_size))
100 |         gap.add_module('flatten', FlattenLayer())
101 |         return gap
102 | 
103 | 
104 | 
105 |     def relu(self, in_features):
106 |         return F.relu(in_features)
107 | 
108 |     def max_pool2d(self, in_features, kernel_size, stride, padding):
109 |         return F.max_pool2d(in_features, kernel_size=kernel_size, stride=stride, padding=padding)
110 | 
111 |     def avg_pool2d(self, in_features, kernel_size, stride, padding):
112 |         return F.avg_pool2d(in_features, kernel_size=kernel_size, stride=stride, padding=padding)
113 | 
114 |     def flatten(self, in_features):
115 |         result = in_features.view(in_features.size(0), -1)
116 |         return result
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/deprecated/ding_test.py:
--------------------------------------------------------------------------------
  1 | from base_config import BaseConfigByEpoch
  2 | from model_map import get_model_fn
  3 | from dataset import create_dataset
  4 | from torch.nn.modules.loss import CrossEntropyLoss
  5 | from utils.engine import Engine
  6 | from utils.misc import torch_accuracy, AvgMeter
  7 | from utils.comm import reduce_loss_dict
  8 | from collections import OrderedDict
  9 | import torch
 10 | from tqdm import tqdm
 11 | import time
 12 | from builder import ConvBuilder
 13 | from ding_train import load_cuda_data
 14 | import sys
 15 | from utils.misc import log_important
 16 | from base_config import get_baseconfig_for_test
 17 | 
 18 | TEST_BATCH_SIZE = 100
 19 | OVERALL_LOG_FILE = 'overall_test_log.txt'
 20 | DETAIL_LOG_FILE = 'detail_test_log.txt'
 21 | 
 22 | def run_eval(ds_val, max_iters, net, criterion, discrip_str, dataset_name):
 23 |     pbar = tqdm(range(max_iters))
 24 |     top1 = AvgMeter()
 25 |     top5 = AvgMeter()
 26 |     losses = AvgMeter()
 27 |     pbar.set_description('Validation' + discrip_str)
 28 |     total_net_time = 0
 29 |     with torch.no_grad():
 30 |         for iter_idx, i in enumerate(pbar):
 31 |             start_time = time.time()
 32 |             data, label = load_cuda_data(ds_val, dataset_name=dataset_name)
 33 |             data_time = time.time() - start_time
 34 | 
 35 |             net_time_start = time.time()
 36 |             pred = net(data)
 37 |             net_time_end = time.time()
 38 |             total_net_time += net_time_end - net_time_start
 39 | 
 40 |             loss = criterion(pred, label)
 41 |             acc, acc5 = torch_accuracy(pred, label, (1, 5))
 42 | 
 43 |             top1.update(acc.item())
 44 |             top5.update(acc5.item())
 45 |             losses.update(loss.item())
 46 |             pbar_dic = OrderedDict()
 47 |             pbar_dic['data-time'] = '{:.2f}'.format(data_time)
 48 |             pbar_dic['top1'] = '{:.5f}'.format(top1.mean)
 49 |             pbar_dic['top5'] = '{:.5f}'.format(top5.mean)
 50 |             pbar_dic['loss'] = '{:.5f}'.format(losses.mean)
 51 |             pbar.set_postfix(pbar_dic)
 52 | 
 53 |     metric_dic = {'top1':torch.tensor(top1.mean),
 54 |                   'top5':torch.tensor(top5.mean),
 55 |                   'loss':torch.tensor(losses.mean)}
 56 |     reduced_metirc_dic = reduce_loss_dict(metric_dic)
 57 |     return reduced_metirc_dic, total_net_time
 58 | 
 59 | def get_criterion(cfg):
 60 |     return CrossEntropyLoss()
 61 | 
 62 | def ding_test(cfg:BaseConfigByEpoch, net=None, val_dataloader=None, show_variables=False, convbuilder=None,
 63 |                init_hdf5=None, ):
 64 | 
 65 |     with Engine() as engine:
 66 | 
 67 |         engine.setup_log(
 68 |             name='test', log_dir='./', file_name=DETAIL_LOG_FILE)
 69 | 
 70 |         if net is None:
 71 |             net = get_model_fn(cfg.dataset_name, cfg.network_type)
 72 | 
 73 |         if convbuilder is None:
 74 |             convbuilder = ConvBuilder(base_config=cfg)
 75 | 
 76 |         model = net(cfg, convbuilder).cuda()
 77 | 
 78 |         if val_dataloader is None:
 79 |             val_dataloader = create_dataset(cfg.dataset_name, cfg.dataset_subset, batch_size=cfg.global_batch_size)
 80 |         val_iters = 50000 // cfg.global_batch_size if cfg.dataset_name == 'imagenet' else 10000 // cfg.global_batch_size
 81 | 
 82 |         print('NOTE: Data prepared')
 83 |         print('NOTE: We have global_batch_size={} on {} GPUs, the allocated GPU memory is {}'.format(cfg.global_batch_size, torch.cuda.device_count(), torch.cuda.memory_allocated()))
 84 | 
 85 |         criterion = get_criterion(cfg).cuda()
 86 | 
 87 |         engine.register_state(
 88 |             scheduler=None, model=model, optimizer=None)
 89 | 
 90 |         if engine.distributed:
 91 |             print('Distributed training, engine.world_rank={}'.format(engine.world_rank))
 92 |             model = torch.nn.parallel.DistributedDataParallel(
 93 |                 model, device_ids=[engine.world_rank],
 94 |                 broadcast_buffers=False, )
 95 |             # model = DistributedDataParallel(model, delay_allreduce=True)
 96 |         elif torch.cuda.device_count() > 1:
 97 |             print('Single machine multiple GPU training')
 98 |             model = torch.nn.parallel.DataParallel(model)
 99 | 
100 |         if cfg.init_weights:
101 |             engine.load_checkpoint(cfg.init_weights, is_restore=True, just_weights=True)
102 | 
103 |         if init_hdf5:
104 |             engine.load_hdf5(init_hdf5)
105 | 
106 |         if show_variables:
107 |             engine.show_variables()
108 | 
109 |         model.eval()
110 |         eval_dict, _ = run_eval(val_dataloader, val_iters, model, criterion, 'TEST', dataset_name=cfg.dataset_name)
111 |         val_top1_value = eval_dict['top1'].item()
112 |         val_top5_value = eval_dict['top5'].item()
113 |         val_loss_value = eval_dict['loss'].item()
114 | 
115 |         msg = '{},{},{},top1={:.5f},top5={:.5f},loss={:.7f}'.format(cfg.network_type, init_hdf5 or cfg.init_weights, cfg.dataset_subset,
116 |                                                                     val_top1_value, val_top5_value, val_loss_value)
117 |         log_important(msg, OVERALL_LOG_FILE)
118 | 
119 | 
120 | def general_test(network_type, weights, builder=None):
121 |     if weights.endswith('.hdf5'):
122 |         init_weights = None
123 |         init_hdf5 = weights
124 |     else:
125 |         init_weights = weights
126 |         init_hdf5 = None
127 |     if 'wrnc16' in network_type or 'wrnh16' in network_type:
128 |         from constants import wrn_origin_deps_flattened
129 |         deps = wrn_origin_deps_flattened(2, 8)
130 |     else:
131 |         deps = None
132 |     test_config = get_baseconfig_for_test(network_type=network_type, dataset_subset='val', global_batch_size=TEST_BATCH_SIZE,
133 |                                           init_weights=init_weights, deps=deps)
134 |     ding_test(cfg=test_config, show_variables=True, init_hdf5=init_hdf5, convbuilder=builder)
135 | 
136 | 
137 | 
138 | if __name__ == '__main__':
139 |     network_type = sys.argv[1]
140 |     weights = sys.argv[2]
141 |     general_test(network_type=network_type, weights=weights)
142 | 
143 | 
144 | 


--------------------------------------------------------------------------------
/deprecated/constants.py:
--------------------------------------------------------------------------------
  1 | OVERALL_EVAL_RECORD_FILE = 'overall_eval_records.txt'
  2 | from collections import namedtuple
  3 | 
  4 | LRSchedule = namedtuple('LRSchedule', ['base_lr', 'max_epochs', 'lr_epoch_boundaries', 'lr_decay_factor',
  5 |                                        'linear_final_lr'])
  6 | 
  7 | import numpy as np
  8 | 
  9 | 
 10 | def parse_usual_lr_schedule(try_arg, keyword='lrs{}'):
 11 |     if keyword.format(1) in try_arg:
 12 |         lrs = LRSchedule(base_lr=0.1, max_epochs=500, lr_epoch_boundaries=[100, 200, 300, 400], lr_decay_factor=0.3,
 13 |                          linear_final_lr=None)
 14 |     elif keyword.format(2) in try_arg:
 15 |         lrs = LRSchedule(base_lr=0.1, max_epochs=500, lr_epoch_boundaries=[100, 200, 300, 400], lr_decay_factor=0.1,
 16 |                          linear_final_lr=None)
 17 |     elif keyword.format(3) in try_arg:
 18 |         lrs = LRSchedule(base_lr=0.1, max_epochs=800, lr_epoch_boundaries=[200, 400, 600], lr_decay_factor=0.1,
 19 |                          linear_final_lr=None)
 20 |     elif keyword.format(4) in try_arg:
 21 |         lrs = LRSchedule(base_lr=0.1, max_epochs=80, lr_epoch_boundaries=[20, 40, 60], lr_decay_factor=0.1,
 22 |                          linear_final_lr=None)
 23 |     elif keyword.format(5) in try_arg:
 24 |         lrs = LRSchedule(base_lr=0.05, max_epochs=200, lr_epoch_boundaries=[50, 100, 150], lr_decay_factor=0.1,
 25 |                          linear_final_lr=None)
 26 |     elif keyword.format(6) in try_arg:
 27 |         lrs = LRSchedule(base_lr=0.1, max_epochs=360, lr_epoch_boundaries=[90, 180, 240, 300], lr_decay_factor=0.2,
 28 |                          linear_final_lr=None)
 29 |     elif keyword.format(7) in try_arg:
 30 |         lrs = LRSchedule(base_lr=0.1, max_epochs=800, lr_epoch_boundaries=None, lr_decay_factor=None,
 31 |                          linear_final_lr=1e-4)
 32 |     elif keyword.format(8) in try_arg:  # may be enough for MobileNet v1 on CIFARs
 33 |         lrs = LRSchedule(base_lr=0.1, max_epochs=400, lr_epoch_boundaries=[100, 200, 300], lr_decay_factor=0.1,
 34 |                          linear_final_lr=None)
 35 |     elif keyword.format(9) in try_arg:
 36 |         lrs = LRSchedule(base_lr=0.1, max_epochs=200, lr_epoch_boundaries=[50, 100, 150], lr_decay_factor=0.1,
 37 |                          linear_final_lr=None)
 38 | 
 39 |     elif keyword.format('A') in try_arg:
 40 |         lrs = LRSchedule(base_lr=0.1, max_epochs=100, lr_epoch_boundaries=None, lr_decay_factor=None,
 41 |                          linear_final_lr=1e-5)
 42 |     elif keyword.format('B') in try_arg:
 43 |         lrs = LRSchedule(base_lr=0.1, max_epochs=100, lr_epoch_boundaries=None, lr_decay_factor=None,
 44 |                          linear_final_lr=1e-6)
 45 |     elif keyword.format('C') in try_arg:
 46 |         lrs = LRSchedule(base_lr=0.2, max_epochs=125, lr_epoch_boundaries=None, lr_decay_factor=None,
 47 |                          linear_final_lr=0)
 48 |     elif keyword.format('D') in try_arg:
 49 |         lrs = LRSchedule(base_lr=0.001, max_epochs=20, lr_epoch_boundaries=[5, 10], lr_decay_factor=0.1,
 50 |                          linear_final_lr=None)
 51 |     elif keyword.format('E') in try_arg:
 52 |         lrs = LRSchedule(base_lr=0.001, max_epochs=300, lr_epoch_boundaries=[100, 200], lr_decay_factor=0.1,
 53 |                          linear_final_lr=None)
 54 | 
 55 |     elif keyword.format('F') in try_arg:
 56 |         lrs = LRSchedule(base_lr=0.1, max_epochs=120, lr_epoch_boundaries=[30, 60, 90, 110], lr_decay_factor=0.1,
 57 |                          linear_final_lr=None)
 58 |     #   for VGG and CFQKBN
 59 |     elif keyword.format('G') in try_arg:
 60 |         lrs = LRSchedule(base_lr=0.05, max_epochs=800, lr_epoch_boundaries=[200, 400, 600], lr_decay_factor=0.1,
 61 |                          linear_final_lr=None)
 62 |     elif keyword.format('H') in try_arg:
 63 |         lrs = LRSchedule(base_lr=0.025, max_epochs=200, lr_epoch_boundaries=[50, 100, 150], lr_decay_factor=0.1,
 64 |                          linear_final_lr=None)
 65 | 
 66 |     elif keyword.format('X') in try_arg:
 67 |         lrs = LRSchedule(base_lr=0.2, max_epochs=6, lr_epoch_boundaries=None, lr_decay_factor=None,
 68 |                          linear_final_lr=0)
 69 | 
 70 |     elif keyword.replace('{}', '') in try_arg:
 71 |         raise ValueError('Unsupported lrs config.')
 72 |     else:
 73 |         lrs = None
 74 |     return lrs
 75 | 
 76 | 
 77 | VGG_ORIGIN_DEPS = [64, 64, 128, 128, 256, 256, 256, 512, 512, 512, 512, 512, 512]
 78 | 
 79 | CFQK_ORIGIN_DEPS = np.array([32, 32, 64], dtype=np.int32)
 80 | 
 81 | 
 82 | 
 83 | 
 84 | 
 85 | def wrn_origin_deps_flattened(n, k):
 86 |     assert n in [2, 4, 6]   # total_depth = 6n + 4
 87 |     filters_in_each_stage = n * 2 + 1
 88 |     stage0 = [16]
 89 |     stage1 = [16 * k] * filters_in_each_stage
 90 |     stage2 = [32 * k] * filters_in_each_stage
 91 |     stage3 = [64 * k] * filters_in_each_stage
 92 |     return np.array(stage0 + stage1 + stage2 + stage3)
 93 | 
 94 | def wrn_pacesetter_idxes(n):
 95 |     assert n in [2, 4, 6]
 96 |     filters_in_each_stage = n * 2 + 1
 97 |     pacesetters = [1, int(filters_in_each_stage)+1, int(2 * filters_in_each_stage)+1]   #[1, 10, 19] for WRN-28-x, for example
 98 |     return pacesetters
 99 | 
100 | def wrn_convert_flattened_deps(flattened):
101 |     assert len(flattened) in [16, 28, 40]
102 |     n = int((len(flattened) - 4) // 6)
103 |     assert n in [2, 4, 6]
104 |     pacesetters = wrn_pacesetter_idxes(n)
105 |     result = [flattened[0]]
106 |     for ps in pacesetters:
107 |         assert flattened[ps] == flattened[ps+2]
108 |         stage_deps = []
109 |         for i in range(n):
110 |             stage_deps.append([flattened[ps + 1 + 2 * i], flattened[ps + 2 + 2 * i]])
111 |         result.append(stage_deps)
112 |     return result
113 | 
114 | 
115 | ####################    WRN
116 | WRN16_FOLLOW_DICT = {1:1, 3:1, 5:1, 6:6, 8:6, 10:6, 11:11, 13:11, 15:11}
117 | WRN16_PACESETTER_IDS = [1, 6, 11]
118 | WRN16_SUBSEQUENT_STRATEGY = {
119 |     0:[1, 2],
120 |     1:[4, 6, 7],
121 |     2:3,
122 |     4:5,
123 |     6:[9, 11, 12],
124 |     7:8,
125 |     9:10,
126 |     11:[14, 16],
127 |     12:13,
128 |     14:15
129 | }
130 | WRN16_INTERNAL_IDS = [2,4,7,9,12,14]
131 | 
132 | 


--------------------------------------------------------------------------------
/acnet/do_acnet.py:
--------------------------------------------------------------------------------
  1 | from base_config import get_baseconfig_by_epoch
  2 | from model_map import get_dataset_name_by_model_name
  3 | import argparse
  4 | from acnet.acnet_builder import ACNetBuilder
  5 | from ndp_train import train_main
  6 | from acnet.acnet_fusion import convert_acnet_weights
  7 | import os
  8 | from ndp_test import general_test
  9 | from constants import LRSchedule
 10 | from builder import ConvBuilder
 11 | 
 12 | if __name__ == '__main__':
 13 |     parser = argparse.ArgumentParser()
 14 |     parser.add_argument('-a', '--arch', default='sres18')
 15 |     parser.add_argument('-b', '--block_type', default='acb')
 16 |     parser.add_argument('-c', '--conti_or_fs', default='fs')        # continue or train_from_scratch
 17 |     parser.add_argument(
 18 |         '--local_rank', default=0, type=int,
 19 |         help='process rank on node')
 20 | 
 21 |     start_arg = parser.parse_args()
 22 | 
 23 |     network_type = start_arg.arch
 24 |     block_type = start_arg.block_type
 25 |     conti_or_fs = start_arg.conti_or_fs
 26 |     assert conti_or_fs in ['continue', 'fs']
 27 |     assert block_type in ['acb', 'base']
 28 |     auto_continue = conti_or_fs == 'continue'
 29 |     print('auto continue: ', auto_continue)
 30 | 
 31 |     gamma_init = None
 32 | 
 33 |     if network_type == 'sres18':
 34 |         weight_decay_strength = 1e-4
 35 |         batch_size = 256
 36 |         lrs = LRSchedule(base_lr=0.1, max_epochs=100, lr_epoch_boundaries=None, lr_decay_factor=None,
 37 |                          linear_final_lr=None, cosine_minimum=0)
 38 |         warmup_epochs = 0
 39 |         gamma_init = 1
 40 | 
 41 |     elif network_type == 'sres34':
 42 |         weight_decay_strength = 1e-4
 43 |         batch_size = 256
 44 |         lrs = LRSchedule(base_lr=0.1, max_epochs=100, lr_epoch_boundaries=None, lr_decay_factor=None,
 45 |                          linear_final_lr=None, cosine_minimum=0)
 46 |         warmup_epochs = 0
 47 |         gamma_init = 1
 48 | 
 49 |     elif network_type == 'sres50':
 50 |         weight_decay_strength = 1e-4
 51 |         batch_size = 256
 52 |         lrs = LRSchedule(base_lr=0.1, max_epochs=100, lr_epoch_boundaries=None, lr_decay_factor=None,
 53 |                          linear_final_lr=None, cosine_minimum=0)
 54 |         warmup_epochs = 0
 55 |         gamma_init = 1
 56 | 
 57 |     elif network_type == 'cfqkbnc':
 58 |         weight_decay_strength = 1e-4
 59 |         #   ------------------------------------
 60 |         #   86.2  --->  86.8+
 61 |         batch_size = 128
 62 |         lrs = LRSchedule(base_lr=0.1, max_epochs=150, lr_epoch_boundaries=None, lr_decay_factor=None,
 63 |                          linear_final_lr=None, cosine_minimum=0)
 64 |         warmup_epochs = 0
 65 |         gamma_init = 0.333
 66 |         #   ------------------------------------
 67 | 
 68 | 
 69 |     elif network_type == 'src56':
 70 |         weight_decay_strength = 1e-4
 71 |         #   ------------------------------------
 72 |         #   94.47  --->  95+
 73 |         batch_size = 128
 74 |         lrs = LRSchedule(base_lr=0.2, max_epochs=400, lr_epoch_boundaries=None, lr_decay_factor=None,
 75 |                          linear_final_lr=None, cosine_minimum=0)
 76 |         warmup_epochs = 0
 77 |         gamma_init = 0.333
 78 |         #   --------------------------------------
 79 | 
 80 |     elif network_type == 'vc':
 81 |         weight_decay_strength = 1e-4
 82 |         #   --------------------------------------
 83 |         #   93.98  --->  94.54
 84 |         batch_size = 128
 85 |         lrs = LRSchedule(base_lr=0.1, max_epochs=400, lr_epoch_boundaries=None, lr_decay_factor=None,
 86 |                          linear_final_lr=None, cosine_minimum=0)
 87 |         warmup_epochs = 0
 88 |         gamma_init = 0.333
 89 |         #   --------------------------------------
 90 | 
 91 |     elif network_type == 'wrnc16plain':
 92 |         weight_decay_strength = 5e-4
 93 |         #   --------------------------------------
 94 |         #   95.90 -> 96.33
 95 |         batch_size = 128
 96 |         lrs = LRSchedule(base_lr=0.1, max_epochs=400, lr_epoch_boundaries=None, lr_decay_factor=None,
 97 |                          linear_final_lr=None, cosine_minimum=0)
 98 |         warmup_epochs = 0
 99 |         gamma_init = 0.333
100 |         #   --------------------------------------
101 | 
102 |     else:
103 |         raise ValueError('...')
104 | 
105 |     log_dir = 'acnet_exps/{}_{}_train'.format(network_type, block_type)
106 | 
107 |     weight_decay_bias = weight_decay_strength
108 |     config = get_baseconfig_by_epoch(network_type=network_type,
109 |                                      dataset_name=get_dataset_name_by_model_name(network_type), dataset_subset='train',
110 |                                      global_batch_size=batch_size, num_node=1,
111 |                                      weight_decay=weight_decay_strength, optimizer_type='sgd', momentum=0.9,
112 |                                      max_epochs=lrs.max_epochs, base_lr=lrs.base_lr, lr_epoch_boundaries=lrs.lr_epoch_boundaries, cosine_minimum=lrs.cosine_minimum,
113 |                                      lr_decay_factor=lrs.lr_decay_factor,
114 |                                      warmup_epochs=0, warmup_method='linear', warmup_factor=0,
115 |                                      ckpt_iter_period=40000, tb_iter_period=100, output_dir=log_dir,
116 |                                      tb_dir=log_dir, save_weights=None, val_epoch_period=5, linear_final_lr=lrs.linear_final_lr,
117 |                                      weight_decay_bias=weight_decay_bias, deps=None)
118 | 
119 |     if block_type == 'acb':
120 |         builder = ACNetBuilder(base_config=config, deploy=False, gamma_init=gamma_init)
121 |     else:
122 |         builder = ConvBuilder(base_config=config)
123 | 
124 |     target_weights = os.path.join(log_dir, 'finish.hdf5')
125 |     if not os.path.exists(target_weights):
126 |         train_main(local_rank=start_arg.local_rank, cfg=config, convbuilder=builder,
127 |                show_variables=True, auto_continue=auto_continue)
128 | 
129 |     if block_type == 'acb' and start_arg.local_rank == 0:
130 |         convert_acnet_weights(target_weights, target_weights.replace('.hdf5', '_deploy.hdf5'), eps=1e-5)
131 |         deploy_builder = ACNetBuilder(base_config=config, deploy=True)
132 |         general_test(network_type=network_type, weights=target_weights.replace('.hdf5', '_deploy.hdf5'),
133 |                  builder=deploy_builder)
134 | 


--------------------------------------------------------------------------------
/deprecated/utils/misc.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import os
  3 | from typing import Tuple, List, Dict
  4 | import torch
  5 | import sys
  6 | import json
  7 | import h5py
  8 | import numpy as np
  9 | import time
 10 | 
 11 | def cur_time():
 12 |     return time.strftime('%Y,%b,%d,%X')
 13 | 
 14 | def log_important(message, log_file):
 15 |     print(message, cur_time())
 16 |     with open(log_file, 'a') as f:
 17 |         print(message, cur_time(), file=f)
 18 | 
 19 | 
 20 | 
 21 | def representsInt(s):
 22 |     try:
 23 |         int(s)
 24 |         return True
 25 |     except ValueError:
 26 |         return False
 27 | 
 28 | def read_hdf5(file_path):
 29 |     result = {}
 30 |     with h5py.File(file_path, 'r') as f:
 31 |         for k in f.keys():
 32 |             value = np.asarray(f[k])
 33 |             if representsInt(k):
 34 |                 result[int(k)] = value
 35 |             else:
 36 |                 result[str(k).replace('+','/')] = value
 37 |     print('read {} arrays from {}'.format(len(result), file_path))
 38 |     f.close()
 39 |     return result
 40 | 
 41 | def save_hdf5(numpy_dict, file_path):
 42 |     with h5py.File(file_path, 'w') as f:
 43 |         for k,v in numpy_dict.items():
 44 |             f.create_dataset(str(k).replace('/','+'), data=v)
 45 |     print('saved {} arrays to {}'.format(len(numpy_dict), file_path))
 46 |     f.close()
 47 | 
 48 | 
 49 | def start_exp():
 50 |     import argparse
 51 |     parser = argparse.ArgumentParser()
 52 |     parser.add_argument("--try_arg", type=str, default='')
 53 |     args = parser.parse_args()
 54 |     try_arg = args.try_arg
 55 |     print('the try_arg is ', try_arg)
 56 |     print('we have {} torch devices'.format(torch.cuda.device_count()),
 57 |           'the allocated GPU memory is {}'.format(torch.cuda.memory_allocated()))
 58 |     return try_arg
 59 | 
 60 | 
 61 | def torch_accuracy(output, target, topk=(1,)) -> List[torch.Tensor]:
 62 |     '''
 63 |     param output, target: should be torch Variable
 64 |     '''
 65 |     # assert isinstance(output, torch.cuda.Tensor), 'expecting Torch Tensor'
 66 |     # assert isinstance(target, torch.Tensor), 'expecting Torch Tensor'
 67 |     # print(type(output))
 68 | 
 69 |     topn = max(topk)
 70 |     batch_size = output.size(0)
 71 | 
 72 |     _, pred = output.topk(topn, 1, True, True)
 73 |     pred = pred.t()
 74 | 
 75 |     is_correct = pred.eq(target.view(1, -1).expand_as(pred))
 76 | 
 77 |     ans = []
 78 |     for i in topk:
 79 |         is_correct_i = is_correct[:i].view(-1).float().sum(0, keepdim=True)
 80 |         ans.append(is_correct_i.mul_(100.0 / batch_size))
 81 | 
 82 |     return ans
 83 | 
 84 | class AvgMeter(object):
 85 |     '''
 86 |     Computing mean
 87 |     '''
 88 |     name = 'No name'
 89 | 
 90 |     def __init__(self, name='No name', fmt = ':.2f'):
 91 |         self.name = name
 92 |         self.fmt = fmt
 93 |         self.reset()
 94 | 
 95 |     def reset(self):
 96 |         self.sum = 0
 97 |         self.mean = 0
 98 |         self.num = 0
 99 |         self.now = 0
100 | 
101 |     def update(self, mean_var, count=1):
102 |         if math.isnan(mean_var):
103 |             mean_var = 1e6
104 |             print('Avgmeter getting Nan!')
105 |         self.now = mean_var
106 |         self.num += count
107 | 
108 |         self.sum += mean_var * count
109 |         self.mean = float(self.sum) / self.num
110 | 
111 |     def __str__(self):
112 |         print_str = self.name + '-{' + self.fmt + '}'
113 |         return print_str.format(self.mean)
114 | 
115 | def save_args(args, save_dir = None):
116 |     if save_dir == None:
117 |         param_path = os.path.join(args.resume, "params.json")
118 |     else:
119 |         param_path = os.path.join(save_dir, 'params.json')
120 | 
121 |     #logger.info("[*] MODEL dir: %s" % args.resume)
122 |     #logger.info("[*] PARAM path: %s" % param_path)
123 | 
124 |     with open(param_path, 'w') as fp:
125 |         json.dump(args.__dict__, fp, indent=4, sort_keys=True)
126 | 
127 | 
128 | def mkdir(path):
129 |     if not os.path.exists(path):
130 |         print('creating dir {}'.format(path))
131 |         os.mkdir(path)
132 | 
133 | # def save_checkpoint(cur_iters, net, optimizer, lr_scheduler, file_name):
134 | #     checkpoint = {'cur_iters': cur_iters,
135 | #                   'state_dict': net.state_dict(),
136 | #                   'optimizer_state_dict': optimizer.state_dict(),
137 | #                   'lr_scheduler_state_dict':lr_scheduler.state_dict()}
138 | #     if os.path.exists(file_name):
139 | #         print('Overwriting {}'.format(file_name))
140 | #     torch.save(checkpoint, file_name)
141 | #     link_name = os.path.join('/', *file_name.split(os.path.sep)[:-1], 'last.checkpoint')
142 | #     #print(link_name)
143 | #     make_symlink(source = file_name, link_name=link_name)
144 | 
145 | def load_checkpoint(file_name, net = None, optimizer = None, lr_scheduler = None):
146 |     if os.path.isfile(file_name):
147 |         print("=> loading checkpoint '{}'".format(file_name))
148 |         check_point = torch.load(file_name)
149 |         if net is not None:
150 |             print('Loading network state dict')
151 |             net.load_state_dict(check_point['state_dict'])
152 |         if optimizer is not None:
153 |             print('Loading optimizer state dict')
154 |             optimizer.load_state_dict(check_point['optimizer_state_dict'])
155 |         if lr_scheduler is not None:
156 |             print('Loading lr_scheduler state dict')
157 |             lr_scheduler.load_state_dict(check_point['lr_scheduler_state_dict'])
158 | 
159 |         return check_point['cur_iters']
160 |     else:
161 |         print("=> no checkpoint found at '{}'".format(file_name))
162 | 
163 | 
164 | def make_symlink(source, link_name):
165 |     '''
166 |     Note: overwriting enabled!
167 |     '''
168 |     if os.path.exists(link_name):
169 |         #print("Link name already exist! Removing '{}' and overwriting".format(link_name))
170 |         os.remove(link_name)
171 |     if os.path.exists(source):
172 |         os.symlink(source, link_name)
173 |         return
174 |     else:
175 |         print('Source path not exists')
176 |     #print('SymLink Wrong!')
177 | 
178 | def add_path(path):
179 |     if path not in sys.path:
180 |         print('Adding {}'.format(path))
181 |         sys.path.append(path)
182 | 
183 | def format_metric_dict_to_line(metric_dict):
184 |     msg = ''
185 |     for key, value in metric_dict.items():
186 |         msg += '{}={:.5f},'.format(key, value)
187 |     return msg
188 | 


--------------------------------------------------------------------------------
/utils/lr_scheduler.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | from bisect import bisect_right
  3 | from data.data_factory import num_iters_per_epoch
  4 | import torch
  5 | from torch.optim.lr_scheduler import CosineAnnealingLR
  6 | import math
  7 | 
  8 | # FIXME ideally this would be achieved with a CombinedLRScheduler,
  9 | # separating MultiStepLR with WarmupLR
 10 | # but the current LRScheduler design doesn't allow it
 11 | 
 12 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
 13 |     def __init__(
 14 |         self,
 15 |         optimizer,
 16 |         milestones,
 17 |         gamma=0.1,
 18 |         warmup_factor=1.0 / 3,
 19 |         warmup_iters=500,
 20 |         warmup_method="linear",
 21 |         last_epoch=-1,
 22 |     ):
 23 |         if not list(milestones) == sorted(milestones):
 24 |             raise ValueError(
 25 |                 "Milestones should be a list of" " increasing integers. Got {}",
 26 |                 milestones,
 27 |             )
 28 | 
 29 |         if warmup_method not in ("constant", "linear"):
 30 |             raise ValueError(
 31 |                 "Only 'constant' or 'linear' warmup_method accepted"
 32 |                 "got {}".format(warmup_method)
 33 |             )
 34 |         self.milestones = milestones
 35 |         self.gamma = gamma
 36 |         self.warmup_factor = warmup_factor
 37 |         self.warmup_iters = warmup_iters
 38 |         self.warmup_method = warmup_method
 39 |         super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
 40 | 
 41 |     def get_lr(self):
 42 |         warmup_factor = 1
 43 |         if self.last_epoch < self.warmup_iters:
 44 |             if self.warmup_method == "constant":
 45 |                 warmup_factor = self.warmup_factor
 46 |             elif self.warmup_method == "linear":
 47 |                 alpha = float(self.last_epoch) / self.warmup_iters
 48 |                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
 49 |         return [
 50 |             base_lr
 51 |             * warmup_factor
 52 |             * self.gamma ** bisect_right(self.milestones, self.last_epoch)
 53 |             for base_lr in self.base_lrs
 54 |         ]
 55 | 
 56 | 
 57 | 
 58 | class WarmupLinearLR(torch.optim.lr_scheduler._LRScheduler):
 59 |     def __init__(
 60 |         self,
 61 |         optimizer,
 62 |         final_lr,
 63 |         final_iters,
 64 |         warmup_factor=1.0 / 3,
 65 |         warmup_iters=500,
 66 |         warmup_method="linear",
 67 |         last_epoch=-1,
 68 |     ):
 69 |         assert final_iters > warmup_iters
 70 |         self.final_lr = final_lr
 71 |         self.final_iters = final_iters
 72 |         self.warmup_factor = warmup_factor
 73 |         self.warmup_iters = max(warmup_iters, 0)
 74 |         self.warmup_method = warmup_method
 75 |         super(WarmupLinearLR, self).__init__(optimizer, last_epoch)
 76 | 
 77 |     #   last_epoch == 0:            base_lr * warmup_factor
 78 |     #   last_epoch == warmup_iters: base_lr
 79 |     #   last_epoch == final_iters:  final_lr
 80 | 
 81 |     def get_lr(self):
 82 |         if self.last_epoch < self.warmup_iters:
 83 |             if self.warmup_method == "constant":
 84 |                 warmup_factor = self.warmup_factor
 85 |             elif self.warmup_method == "linear":
 86 |                 alpha = float(self.last_epoch) / self.warmup_iters
 87 |                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
 88 |             else:
 89 |                 raise ValueError(
 90 |                     "Only 'constant' or 'linear' warmup_method accepted"
 91 |                     "got {}".format(self.warmup_method)
 92 |                 )
 93 |             return [
 94 |                 base_lr
 95 |                 * warmup_factor
 96 |                 for base_lr in self.base_lrs
 97 |             ]
 98 |         else:
 99 |             return [
100 |                 base_lr - (base_lr - self.final_lr) * float(self.last_epoch - self.warmup_iters) / (
101 |                             self.final_iters - self.warmup_iters)
102 |                 for base_lr in self.base_lrs
103 |             ]
104 | 
105 | 
106 | class CosineAnnealingExtendLR(torch.optim.lr_scheduler._LRScheduler):
107 |     r"""Set the learning rate of each parameter group using a cosine annealing
108 |     schedule, where :math:`\eta_{max}` is set to the initial lr and
109 |     :math:`T_{cur}` is the number of epochs since the last restart in SGDR:
110 | 
111 |     .. math::
112 |         \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})(1 +
113 |         \cos(\frac{T_{cur}}{T_{max}}\pi))
114 | 
115 |     When last_epoch=-1, sets initial lr as lr.
116 | 
117 |     It has been proposed in
118 |     `SGDR: Stochastic Gradient Descent with Warm Restarts`_. Note that this only
119 |     implements the cosine annealing part of SGDR, and not the restarts.
120 | 
121 |     Args:
122 |         optimizer (Optimizer): Wrapped optimizer.
123 |         T_max (int): Maximum number of iterations.
124 |         eta_min (float): Minimum learning rate. Default: 0.
125 |         last_epoch (int): The index of last epoch. Default: -1.
126 | 
127 |     .. _SGDR\: Stochastic Gradient Descent with Warm Restarts:
128 |         https://arxiv.org/abs/1608.03983
129 |     """
130 | 
131 |     def __init__(self, optimizer, T_cosine_max, eta_min=0, last_epoch=-1):
132 |         self.eta_min = eta_min
133 |         self.T_cosine_max = T_cosine_max
134 |         super(CosineAnnealingExtendLR, self).__init__(optimizer, last_epoch)
135 | 
136 |     def get_lr(self):
137 |         if self.last_epoch <= self.T_cosine_max:
138 |             return [self.eta_min + (base_lr - self.eta_min) *
139 |                 (1 + math.cos(math.pi * self.last_epoch / self.T_cosine_max)) / 2
140 |                 for base_lr in self.base_lrs]
141 |         else:
142 |             return [self.eta_min
143 |                 for _ in self.base_lrs]
144 | 
145 | 
146 | #   LR scheduler should work according the number of iterations
147 | def get_lr_scheduler(cfg, optimizer):
148 |     it_ep = num_iters_per_epoch(cfg)
149 |     if cfg.linear_final_lr is None and cfg.cosine_minimum is None:
150 |         lr_iter_boundaries = [it_ep * ep for ep in cfg.lr_epoch_boundaries]
151 |         return WarmupMultiStepLR(
152 |             optimizer, lr_iter_boundaries, cfg.lr_decay_factor,
153 |             warmup_factor=cfg.warmup_factor,
154 |             warmup_iters=cfg.warmup_epochs * it_ep,
155 |             warmup_method=cfg.warmup_method, )
156 |     elif cfg.cosine_minimum is None:
157 |         return WarmupLinearLR(optimizer, final_lr=cfg.linear_final_lr,
158 |                               final_iters=cfg.max_epochs * it_ep,
159 |                               warmup_factor=cfg.warmup_factor,
160 |                               warmup_iters=cfg.warmup_epochs * it_ep,
161 |                               warmup_method=cfg.warmup_method,)
162 |     else:
163 |         assert cfg.warmup_epochs == 0
164 |         assert cfg.linear_final_lr is None
165 |         assert cfg.lr_decay_factor is None
166 |         if cfg.lr_epoch_boundaries is None:
167 |             print('use cosine decay, the minimum is ', cfg.cosine_minimum)
168 |             return CosineAnnealingLR(optimizer=optimizer, T_max=cfg.max_epochs * it_ep, eta_min=cfg.cosine_minimum)
169 |         else:
170 |             assert len(cfg.lr_epoch_boundaries) == 1
171 |             assert cfg.cosine_minimum > 0
172 |             print('use extended cosine decay, the minimum is ', cfg.cosine_minimum)
173 |             return CosineAnnealingExtendLR(optimizer=optimizer, T_cosine_max=cfg.lr_epoch_boundaries[0] * it_ep,
174 |                                            eta_min=cfg.cosine_minimum)
175 | 
176 | 
177 | 


--------------------------------------------------------------------------------
/acnet/acb.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.init as init
  3 | import torch
  4 | 
  5 | class ACBlock(nn.Module):
  6 | 
  7 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', deploy=False,
  8 |                  use_affine=True, reduce_gamma=False, gamma_init=None ):
  9 |         super(ACBlock, self).__init__()
 10 |         self.deploy = deploy
 11 |         if deploy:
 12 |             self.fused_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(kernel_size,kernel_size), stride=stride,
 13 |                                       padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode)
 14 |         else:
 15 |             self.square_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
 16 |                                          kernel_size=(kernel_size, kernel_size), stride=stride,
 17 |                                          padding=padding, dilation=dilation, groups=groups, bias=False,
 18 |                                          padding_mode=padding_mode)
 19 |             self.square_bn = nn.BatchNorm2d(num_features=out_channels, affine=use_affine)
 20 | 
 21 | 
 22 |             if padding - kernel_size // 2 >= 0:
 23 |                 #   Common use case. E.g., k=3, p=1 or k=5, p=2
 24 |                 self.crop = 0
 25 |                 #   Compared to the KxK layer, the padding of the 1xK layer and Kx1 layer should be adjust to align the sliding windows (Fig 2 in the paper)
 26 |                 hor_padding = [padding - kernel_size // 2, padding]
 27 |                 ver_padding = [padding, padding - kernel_size // 2]
 28 |             else:
 29 |                 #   A negative "padding" (padding - kernel_size//2 < 0, which is not a common use case) is cropping.
 30 |                 #   Since nn.Conv2d does not support negative padding, we implement it manually
 31 |                 self.crop = kernel_size // 2 - padding
 32 |                 hor_padding = [0, padding]
 33 |                 ver_padding = [padding, 0]
 34 | 
 35 |             self.ver_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(kernel_size, 1),
 36 |                                       stride=stride,
 37 |                                       padding=ver_padding, dilation=dilation, groups=groups, bias=False,
 38 |                                       padding_mode=padding_mode)
 39 | 
 40 |             self.hor_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, kernel_size),
 41 |                                       stride=stride,
 42 |                                       padding=hor_padding, dilation=dilation, groups=groups, bias=False,
 43 |                                       padding_mode=padding_mode)
 44 |             self.ver_bn = nn.BatchNorm2d(num_features=out_channels, affine=use_affine)
 45 |             self.hor_bn = nn.BatchNorm2d(num_features=out_channels, affine=use_affine)
 46 | 
 47 |             if reduce_gamma:
 48 |                 self.init_gamma(1.0 / 3)
 49 | 
 50 |             if gamma_init is not None:
 51 |                 assert not reduce_gamma
 52 |                 self.init_gamma(gamma_init)
 53 | 
 54 | 
 55 |     def _fuse_bn_tensor(self, conv, bn):
 56 |         std = (bn.running_var + bn.eps).sqrt()
 57 |         t = (bn.weight / std).reshape(-1, 1, 1, 1)
 58 |         return conv.weight * t, bn.bias - bn.running_mean * bn.weight / std
 59 | 
 60 |     def _add_to_square_kernel(self, square_kernel, asym_kernel):
 61 |         asym_h = asym_kernel.size(2)
 62 |         asym_w = asym_kernel.size(3)
 63 |         square_h = square_kernel.size(2)
 64 |         square_w = square_kernel.size(3)
 65 |         square_kernel[:, :, square_h // 2 - asym_h // 2: square_h // 2 - asym_h // 2 + asym_h,
 66 |                 square_w // 2 - asym_w // 2: square_w // 2 - asym_w // 2 + asym_w] += asym_kernel
 67 | 
 68 |     def get_equivalent_kernel_bias(self):
 69 |         hor_k, hor_b = self._fuse_bn_tensor(self.hor_conv, self.hor_bn)
 70 |         ver_k, ver_b = self._fuse_bn_tensor(self.ver_conv, self.ver_bn)
 71 |         square_k, square_b = self._fuse_bn_tensor(self.square_conv, self.square_bn)
 72 |         self._add_to_square_kernel(square_k, hor_k)
 73 |         self._add_to_square_kernel(square_k, ver_k)
 74 |         return square_k, hor_b + ver_b + square_b
 75 | 
 76 | 
 77 |     def switch_to_deploy(self):
 78 |         deploy_k, deploy_b = self.get_equivalent_kernel_bias()
 79 |         self.deploy = True
 80 |         self.fused_conv = nn.Conv2d(in_channels=self.square_conv.in_channels, out_channels=self.square_conv.out_channels,
 81 |                                     kernel_size=self.square_conv.kernel_size, stride=self.square_conv.stride,
 82 |                                     padding=self.square_conv.padding, dilation=self.square_conv.dilation, groups=self.square_conv.groups, bias=True,
 83 |                                     padding_mode=self.square_conv.padding_mode)
 84 |         self.__delattr__('square_conv')
 85 |         self.__delattr__('square_bn')
 86 |         self.__delattr__('hor_conv')
 87 |         self.__delattr__('hor_bn')
 88 |         self.__delattr__('ver_conv')
 89 |         self.__delattr__('ver_bn')
 90 |         self.fused_conv.weight.data = deploy_k
 91 |         self.fused_conv.bias.data = deploy_b
 92 | 
 93 | 
 94 |     def init_gamma(self, gamma_value):
 95 |         init.constant_(self.square_bn.weight, gamma_value)
 96 |         init.constant_(self.ver_bn.weight, gamma_value)
 97 |         init.constant_(self.hor_bn.weight, gamma_value)
 98 |         print('init gamma of square, ver and hor as ', gamma_value)
 99 | 
100 |     def single_init(self):
101 |         init.constant_(self.square_bn.weight, 1.0)
102 |         init.constant_(self.ver_bn.weight, 0.0)
103 |         init.constant_(self.hor_bn.weight, 0.0)
104 |         print('init gamma of square as 1, ver and hor as 0')
105 | 
106 |     def forward(self, input):
107 |         if self.deploy:
108 |             return self.fused_conv(input)
109 |         else:
110 |             square_outputs = self.square_conv(input)
111 |             square_outputs = self.square_bn(square_outputs)
112 |             if self.crop > 0:
113 |                 ver_input = input[:, :, :, self.crop:-self.crop]
114 |                 hor_input = input[:, :, self.crop:-self.crop, :]
115 |             else:
116 |                 ver_input = input
117 |                 hor_input = input
118 |             vertical_outputs = self.ver_conv(ver_input)
119 |             vertical_outputs = self.ver_bn(vertical_outputs)
120 |             horizontal_outputs = self.hor_conv(hor_input)
121 |             horizontal_outputs = self.hor_bn(horizontal_outputs)
122 |             result = square_outputs + vertical_outputs + horizontal_outputs
123 |             return result
124 | 
125 | if __name__ == '__main__':
126 |     N = 1
127 |     C = 2
128 |     H = 62
129 |     W = 62
130 |     O = 8
131 |     groups = 4
132 | 
133 |     x = torch.randn(N, C, H, W)
134 |     print('input shape is ', x.size())
135 | 
136 |     test_kernel_padding = [(3,1), (3,0), (5,1), (5,2), (5,3), (5,4), (5,6)]
137 | 
138 |     for k, p in test_kernel_padding:
139 |         acb = ACBlock(C, O, kernel_size=k, padding=p, stride=1, deploy=False)
140 |         acb.eval()
141 |         for module in acb.modules():
142 |             if isinstance(module, nn.BatchNorm2d):
143 |                 nn.init.uniform_(module.running_mean, 0, 0.1)
144 |                 nn.init.uniform_(module.running_var, 0, 0.2)
145 |                 nn.init.uniform_(module.weight, 0, 0.3)
146 |                 nn.init.uniform_(module.bias, 0, 0.4)
147 |         out = acb(x)
148 |         acb.switch_to_deploy()
149 |         deployout = acb(x)
150 |         print('difference between the outputs of the training-time and converted ACB is')
151 |         print(((deployout - out) ** 2).sum())
152 | 
153 | 


--------------------------------------------------------------------------------
/ndp_test.py:
--------------------------------------------------------------------------------
  1 | from base_config import BaseConfigByEpoch
  2 | from model_map import get_model_fn
  3 | from data.data_factory import create_dataset, load_cuda_data
  4 | from torch.nn.modules.loss import CrossEntropyLoss
  5 | from utils.engine import Engine
  6 | from utils.misc import torch_accuracy, AvgMeter
  7 | from collections import OrderedDict
  8 | import torch
  9 | from tqdm import tqdm
 10 | import time
 11 | from builder import ConvBuilder
 12 | from utils.misc import log_important, extract_deps_from_weights_file
 13 | from base_config import get_baseconfig_for_test
 14 | from data.data_factory import num_val_examples
 15 | 
 16 | SPEED_TEST_SAMPLE_IGNORE_RATIO = 0.5
 17 | 
 18 | TEST_BATCH_SIZE = 100
 19 | OVERALL_LOG_FILE = 'overall_test_log.txt'
 20 | DETAIL_LOG_FILE = 'detail_test_log.txt'
 21 | 
 22 | def run_eval(val_data, max_iters, net, criterion, discrip_str, dataset_name):
 23 |     pbar = tqdm(range(max_iters))
 24 |     top1 = AvgMeter()
 25 |     top5 = AvgMeter()
 26 |     losses = AvgMeter()
 27 |     pbar.set_description('Validation' + discrip_str)
 28 |     total_net_time = 0
 29 |     with torch.no_grad():
 30 |         for iter_idx, i in enumerate(pbar):
 31 |             start_time = time.time()
 32 | 
 33 |             data, label = load_cuda_data(val_data, dataset_name=dataset_name)
 34 |             data_time = time.time() - start_time
 35 | 
 36 |             net_time_start = time.time()
 37 |             pred = net(data)
 38 |             net_time_end = time.time()
 39 | 
 40 |             if iter_idx >= SPEED_TEST_SAMPLE_IGNORE_RATIO * max_iters:
 41 |                 total_net_time += net_time_end - net_time_start
 42 | 
 43 |             loss = criterion(pred, label)
 44 |             acc, acc5 = torch_accuracy(pred, label, (1, 5))
 45 | 
 46 |             top1.update(acc.item())
 47 |             top5.update(acc5.item())
 48 |             losses.update(loss.item())
 49 |             pbar_dic = OrderedDict()
 50 |             pbar_dic['data-time'] = '{:.2f}'.format(data_time)
 51 |             pbar_dic['top1'] = '{:.5f}'.format(top1.mean)
 52 |             pbar_dic['top5'] = '{:.5f}'.format(top5.mean)
 53 |             pbar_dic['loss'] = '{:.5f}'.format(losses.mean)
 54 |             pbar.set_postfix(pbar_dic)
 55 | 
 56 |     metric_dic = {'top1':torch.tensor(top1.mean),
 57 |                   'top5':torch.tensor(top5.mean),
 58 |                   'loss':torch.tensor(losses.mean)}
 59 |     # reduced_metirc_dic = reduce_loss_dict(metric_dic)
 60 |     reduced_metirc_dic = metric_dic     #TODO note this
 61 |     return reduced_metirc_dic, total_net_time
 62 | 
 63 | 
 64 | def val_during_train(epoch, iteration, tb_tags,
 65 |                       engine, model, val_data, criterion, descrip_str,
 66 |                       dataset_name, test_batch_size, tb_writer):
 67 |     model.eval()
 68 |     num_examples = num_val_examples(dataset_name)
 69 |     assert num_examples % test_batch_size == 0
 70 |     val_iters = num_examples // test_batch_size
 71 |     eval_dict, total_net_time = run_eval(val_data, val_iters, model, criterion, descrip_str,
 72 |                                          dataset_name=dataset_name)
 73 |     val_top1_value = eval_dict['top1'].item()
 74 |     val_top5_value = eval_dict['top5'].item()
 75 |     val_loss_value = eval_dict['loss'].item()
 76 |     for tag, value in zip(tb_tags, [val_top1_value, val_top5_value, val_loss_value]):
 77 |         tb_writer.add_scalars(tag, {'Val': value}, iteration)
 78 |     engine.log(
 79 |         'val at epoch {}, top1={:.5f}, top5={:.5f}, loss={:.6f}'.format(epoch, val_top1_value,
 80 |                                                                              val_top5_value,
 81 |                                                                              val_loss_value))
 82 |     model.train()
 83 | 
 84 | 
 85 | def get_criterion(cfg):
 86 |     return CrossEntropyLoss()   #TODO note this
 87 | 
 88 | 
 89 | def ding_test(cfg:BaseConfigByEpoch, net=None, val_dataloader=None, show_variables=False, convbuilder=None,
 90 |                init_hdf5=None, extra_msg=None, weights_dict=None):
 91 | 
 92 |     with Engine(local_rank=0, for_val_only=True) as engine:
 93 | 
 94 |         engine.setup_log(
 95 |             name='test', log_dir='./', file_name=DETAIL_LOG_FILE)
 96 | 
 97 |         if convbuilder is None:
 98 |             convbuilder = ConvBuilder(base_config=cfg)
 99 | 
100 |         if net is None:
101 |             net_fn = get_model_fn(cfg.dataset_name, cfg.network_type)
102 |             model = net_fn(cfg, convbuilder).cuda()
103 |         else:
104 |             model = net.cuda()
105 | 
106 |         if val_dataloader is None:
107 |             val_data = create_dataset(cfg.dataset_name, cfg.dataset_subset,
108 |                                       global_batch_size=cfg.global_batch_size, distributed=False)
109 |         num_examples = num_val_examples(cfg.dataset_name)
110 |         assert num_examples % cfg.global_batch_size == 0
111 |         val_iters = num_val_examples(cfg.dataset_name) // cfg.global_batch_size
112 |         print('batchsize={}, {} iters'.format(cfg.global_batch_size, val_iters))
113 | 
114 |         criterion = get_criterion(cfg).cuda()
115 | 
116 |         engine.register_state(
117 |             scheduler=None, model=model, optimizer=None)
118 | 
119 |         if show_variables:
120 |             engine.show_variables()
121 | 
122 |         assert not engine.distributed
123 | 
124 |         if weights_dict is not None:
125 |             engine.load_from_weights_dict(weights_dict)
126 |         else:
127 |             if cfg.init_weights:
128 |                 engine.load_checkpoint(cfg.init_weights)
129 |             if init_hdf5:
130 |                 engine.load_hdf5(init_hdf5)
131 | 
132 |         # engine.save_by_order('smi2_by_order.hdf5')
133 |         # engine.load_by_order('smi2_by_order.hdf5')
134 |         # engine.save_hdf5('model_files/stami2_lrs4Z.hdf5')
135 | 
136 |         model.eval()
137 |         eval_dict, total_net_time = run_eval(val_data, val_iters, model, criterion, 'TEST', dataset_name=cfg.dataset_name)
138 |         val_top1_value = eval_dict['top1'].item()
139 |         val_top5_value = eval_dict['top5'].item()
140 |         val_loss_value = eval_dict['loss'].item()
141 | 
142 |         msg = '{},{},{},top1={:.5f},top5={:.5f},loss={:.7f},total_net_time={}'.format(cfg.network_type, init_hdf5 or cfg.init_weights, cfg.dataset_subset,
143 |                                                                     val_top1_value, val_top5_value, val_loss_value, total_net_time)
144 |         if extra_msg is not None:
145 |             msg += ', ' + extra_msg
146 |         log_important(msg, OVERALL_LOG_FILE)
147 |         return eval_dict
148 | 
149 | 
150 | def general_test(network_type, weights, builder=None, net=None, dataset_name=None, weights_dict=None,
151 |                  batch_size=None):
152 |     if weights is None or weights == 'None':
153 |         init_weights = None
154 |         init_hdf5 = None
155 |     elif weights.endswith('.hdf5'):
156 |         init_weights = None
157 |         init_hdf5 = weights
158 |     else:
159 |         init_weights = weights
160 |         init_hdf5 = None
161 | 
162 |     if init_hdf5 is not None:
163 |         deps = extract_deps_from_weights_file(init_hdf5)
164 |     else:
165 |         deps = None
166 | 
167 |     if batch_size is None:
168 |         batch_size = TEST_BATCH_SIZE
169 |     test_config = get_baseconfig_for_test(network_type=network_type, dataset_subset='val', global_batch_size=batch_size,
170 |                                           init_weights=init_weights, deps=deps, dataset_name=dataset_name)
171 |     return ding_test(cfg=test_config, net=net, show_variables=True, init_hdf5=init_hdf5, convbuilder=builder, weights_dict=weights_dict)
172 | 
173 | 
174 | if __name__ == '__main__':
175 |     import sys
176 |     builder = None
177 |     if 'deploy' in sys.argv[2]:
178 |         from nobn_builder import NoBNBuilder
179 |         builder = NoBNBuilder(base_config=None)
180 |     general_test(network_type=sys.argv[1], weights=sys.argv[2], builder=builder)


--------------------------------------------------------------------------------
/deprecated/base_model/resnet.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ResNet in PyTorch.absFor Pre-activation ResNet, see 'preact_resnet.py'.
  3 | Reference:
  4 |     [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  5 |         Deep Residual Learning for Image Recognition. arXiv:1512.03385
  6 | 
  7 | Note: cifar_resnet18 constructs the same model with that from
  8 | https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py
  9 | '''
 10 | 
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | from builder import ConvBuilder
 14 | 
 15 | class BasicBlock(nn.Module):
 16 | 
 17 |     expansion = 1
 18 | 
 19 |     def __init__(self, builder:ConvBuilder, in_planes, planes, stride=1):
 20 |         super(BasicBlock, self).__init__()
 21 |         self.conv1 = builder.Conv2dBNReLU(in_channels=in_planes, out_channels=planes, kernel_size=3, stride=stride, padding=1)
 22 |         self.conv2 = builder.Conv2dBN(in_channels=planes, out_channels=self.expansion * planes, kernel_size=3, stride=1, padding=1)
 23 | 
 24 |         if stride != 1 or in_planes != self.expansion * planes:
 25 |             self.shortcut = builder.Conv2dBN(in_channels=in_planes, out_channels=self.expansion * planes, kernel_size=1, stride=stride)
 26 |         else:
 27 |             self.shortcut = builder.ResIdentity(num_channels=in_planes)
 28 | 
 29 | 
 30 | 
 31 |     def forward(self, x):
 32 |         out = self.conv1(x)
 33 |         out = self.conv2(out)
 34 |         out += self.shortcut(x)
 35 |         out = F.relu(out)
 36 |         return out
 37 | 
 38 | 
 39 | class Bottleneck(nn.Module):
 40 |     expansion = 4
 41 | 
 42 |     def __init__(self, builder:ConvBuilder, in_planes, planes, stride=1):
 43 |         super(Bottleneck, self).__init__()
 44 |         self.bd = builder
 45 | 
 46 |         self.conv1 = builder.Conv2dBNReLU(in_planes, planes, kernel_size=1)
 47 |         self.conv2 = builder.Conv2dBNReLU(planes, planes, kernel_size=3, stride=stride, padding=1)
 48 |         self.conv3 = builder.Conv2dBN(planes, self.expansion*planes, kernel_size=1)
 49 | 
 50 | 
 51 |         if stride != 1 or in_planes != self.expansion*planes:
 52 |             self.shortcut = builder.Conv2dBN(in_planes, self.expansion*planes, kernel_size=1, stride=stride)
 53 |         else:
 54 |             self.shortcut = builder.ResIdentity(num_channels=in_planes)
 55 | 
 56 | 
 57 |     def forward(self, x):
 58 |         out = self.conv1(x)
 59 |         out = self.conv2(out)
 60 |         out = self.conv3(out)
 61 |         out += self.shortcut(x)
 62 |         out = F.relu(out)
 63 |         return out
 64 | 
 65 | 
 66 | 
 67 | class RCBlock(nn.Module):
 68 | 
 69 |     def __init__(self, in_channels, out_channels, stride=1, builder=None):
 70 |         super(RCBlock, self).__init__()
 71 | 
 72 |         self.conv1 = builder.Conv2dBNReLU(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
 73 |         self.conv2 = builder.Conv2dBN(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
 74 | 
 75 |         if stride != 1:
 76 |             self.shortcut = builder.Conv2dBN(in_channels, out_channels, kernel_size=1, stride=stride)
 77 |         else:
 78 |             self.shortcut = builder.ResIdentity(num_channels=out_channels)
 79 |         self.relu = builder.ReLU()
 80 | 
 81 | 
 82 |     def forward(self, x):
 83 |         out = self.conv1(x)
 84 |         out = self.conv2(out)
 85 |         out += self.shortcut(x)
 86 |         out = self.relu(out)
 87 |         return out
 88 | 
 89 | 
 90 | class RCNet(nn.Module):
 91 | 
 92 |     def __init__(self, block_counts, num_classes, builder:ConvBuilder):
 93 |         super(RCNet, self).__init__()
 94 |         self.bd = builder
 95 | 
 96 |         self.conv1 = self.bd.Conv2dBNReLU(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
 97 |         self.stage1 = self._build_stage(stage_in_channels=16, out_channels=16, num_blocks=block_counts[0], stride=1)
 98 |         self.stage2 = self._build_stage(stage_in_channels=16, out_channels=32, num_blocks=block_counts[1], stride=2)
 99 |         self.stage3 = self._build_stage(stage_in_channels=32, out_channels=64, num_blocks=block_counts[2], stride=2)
100 |         self.linear = self.bd.Linear(in_features=64, out_features=num_classes)
101 | 
102 | 
103 | 
104 |     def _build_stage(self, stage_in_channels, out_channels, num_blocks, stride):
105 |         strides = [stride] + [1] * (num_blocks - 1)
106 |         in_channel_list = [stage_in_channels] + [out_channels] * (num_blocks - 1)
107 |         layers = []
108 |         for block_stride, block_in_channels in zip(strides, in_channel_list):
109 |             layers.append(RCBlock(in_channels=block_in_channels, out_channels=out_channels, stride=block_stride, builder=self.bd))
110 |         return nn.Sequential(*layers)
111 | 
112 |     def forward(self, x):
113 |         out = self.conv1(x)
114 |         out = self.stage1(out)
115 |         out = self.stage2(out)
116 |         out = self.stage3(out)
117 |         out = self.bd.avg_pool2d(in_features=out, kernel_size=8, stride=1, padding=0)
118 |         out = self.bd.flatten(out)
119 |         out = self.linear(out)
120 |         return out
121 | 
122 | 
123 | def create_RC56(cfg, builder):
124 |     return RCNet(block_counts=[9,9,9], num_classes=10, builder=builder)
125 | 
126 | def create_RC110(cfg, builder):
127 |     return RCNet(block_counts=[18,18,18], num_classes=10, builder=builder)
128 | 
129 | def create_RC164(cfg, builder):
130 |     return RCNet(block_counts=[27,27,27], num_classes=10, builder=builder)
131 | 
132 | def create_RH56(cfg, builder):
133 |     return RCNet(block_counts=[9,9,9], num_classes=100, builder=builder)
134 | 
135 | def create_RH110(cfg, builder):
136 |     return RCNet(block_counts=[18,18,18], num_classes=100, builder=builder)
137 | 
138 | def create_RH164(cfg, builder):
139 |     return RCNet(block_counts=[27,27,27], num_classes=100, builder=builder)
140 | 
141 | 
142 | 
143 | 
144 | class ResNet(nn.Module):
145 |     def __init__(self, builder:ConvBuilder, block, num_blocks, num_classes=10):
146 |         super(ResNet, self).__init__()
147 |         self.bd = builder
148 |         self.in_planes = 64
149 |         self.conv1 = builder.Conv2dBNReLU(3, 64, kernel_size=7, stride=2, padding=3)
150 |         self.stage1 = self._make_stage(block, 64, num_blocks[0], stride=1)
151 |         self.stage2 = self._make_stage(block, 128, num_blocks[1], stride=2)
152 |         self.stage3 = self._make_stage(block, 256, num_blocks[2], stride=2)
153 |         self.stage4 = self._make_stage(block, 512, num_blocks[3], stride=2)
154 |         self.linear = self.bd.Linear(512*block.expansion, num_classes)
155 | 
156 |     def _make_stage(self, block, planes, num_blocks, stride):
157 |         strides = [stride] + [1]*(num_blocks-1)
158 |         blocks = []
159 |         for stride in strides:
160 |             blocks.append(block(builder=self.bd, in_planes=self.in_planes, planes=planes, stride=stride))
161 |             self.in_planes = planes * block.expansion
162 |         return nn.Sequential(*blocks)
163 | 
164 |     def forward(self, x):
165 |         out = self.conv1(x)
166 |         out = self.bd.max_pool2d(out, kernel_size=3, stride=2, padding=1)
167 |         out = self.stage1(out)
168 |         out = self.stage2(out)
169 |         out = self.stage3(out)
170 |         out = self.stage4(out)
171 |         out = self.bd.avg_pool2d(out, 7, 1, 0)
172 |         out = self.bd.flatten(out)
173 |         out = self.linear(out)
174 |         return out
175 | 
176 | def create_ResNet18(cfg, builder):
177 |     return ResNet(builder, BasicBlock, [2,2,2,2], num_classes=1000)
178 | 
179 | def create_ResNet34(cfg, builder):
180 |     return ResNet(builder, BasicBlock, [3,4,6,3], num_classes=1000)
181 | 
182 | def create_ResNet50(cfg, builder):
183 |     return ResNet(builder, Bottleneck, [3,4,6,3], num_classes=1000)
184 | 
185 | def create_ResNet101(cfg, builder):
186 |     return ResNet(builder, Bottleneck, [3,4,23,3], num_classes=1000)
187 | 
188 | def create_ResNet152(cfg, builder):
189 |     return ResNet(builder, Bottleneck, [3,8,36,3], num_classes=1000)
190 | 
191 | 


--------------------------------------------------------------------------------
/acnet/acnet_test.py:
--------------------------------------------------------------------------------
  1 | from base_config import BaseConfigByEpoch
  2 | from model_map import get_model_fn
  3 | from data.data_factory import create_dataset, load_cuda_data
  4 | from torch.nn.modules.loss import CrossEntropyLoss
  5 | from utils.engine import Engine
  6 | from utils.misc import torch_accuracy, AvgMeter
  7 | from collections import OrderedDict
  8 | import torch
  9 | from tqdm import tqdm
 10 | import time
 11 | from builder import ConvBuilder
 12 | from utils.misc import log_important, extract_deps_from_weights_file
 13 | from base_config import get_baseconfig_for_test
 14 | from data.data_factory import num_val_examples
 15 | 
 16 | SPEED_TEST_SAMPLE_IGNORE_RATIO = 0.5
 17 | 
 18 | TEST_BATCH_SIZE = 100
 19 | OVERALL_LOG_FILE = 'overall_test_log.txt'
 20 | DETAIL_LOG_FILE = 'detail_test_log.txt'
 21 | 
 22 | def run_eval(val_data, max_iters, net, criterion, discrip_str, dataset_name):
 23 |     pbar = tqdm(range(max_iters))
 24 |     top1 = AvgMeter()
 25 |     top5 = AvgMeter()
 26 |     losses = AvgMeter()
 27 |     pbar.set_description('Validation' + discrip_str)
 28 |     total_net_time = 0
 29 |     with torch.no_grad():
 30 |         for iter_idx, i in enumerate(pbar):
 31 |             start_time = time.time()
 32 | 
 33 |             data, label = load_cuda_data(val_data, dataset_name=dataset_name)
 34 |             data_time = time.time() - start_time
 35 | 
 36 |             net_time_start = time.time()
 37 |             pred = net(data)
 38 |             net_time_end = time.time()
 39 | 
 40 |             if iter_idx >= SPEED_TEST_SAMPLE_IGNORE_RATIO * max_iters:
 41 |                 total_net_time += net_time_end - net_time_start
 42 | 
 43 |             loss = criterion(pred, label)
 44 |             acc, acc5 = torch_accuracy(pred, label, (1, 5))
 45 | 
 46 |             top1.update(acc.item())
 47 |             top5.update(acc5.item())
 48 |             losses.update(loss.item())
 49 |             pbar_dic = OrderedDict()
 50 |             pbar_dic['data-time'] = '{:.2f}'.format(data_time)
 51 |             pbar_dic['top1'] = '{:.5f}'.format(top1.mean)
 52 |             pbar_dic['top5'] = '{:.5f}'.format(top5.mean)
 53 |             pbar_dic['loss'] = '{:.5f}'.format(losses.mean)
 54 |             pbar.set_postfix(pbar_dic)
 55 | 
 56 |     metric_dic = {'top1':torch.tensor(top1.mean),
 57 |                   'top5':torch.tensor(top5.mean),
 58 |                   'loss':torch.tensor(losses.mean)}
 59 |     # reduced_metirc_dic = reduce_loss_dict(metric_dic)
 60 |     reduced_metirc_dic = metric_dic     #TODO note this
 61 |     return reduced_metirc_dic, total_net_time
 62 | 
 63 | 
 64 | def val_during_train(epoch, iteration, tb_tags,
 65 |                       engine, model, val_data, criterion, descrip_str,
 66 |                       dataset_name, test_batch_size, tb_writer):
 67 |     model.eval()
 68 |     num_examples = num_val_examples(dataset_name)
 69 |     assert num_examples % test_batch_size == 0
 70 |     val_iters = num_examples // test_batch_size
 71 |     eval_dict, total_net_time = run_eval(val_data, val_iters, model, criterion, descrip_str,
 72 |                                          dataset_name=dataset_name)
 73 |     val_top1_value = eval_dict['top1'].item()
 74 |     val_top5_value = eval_dict['top5'].item()
 75 |     val_loss_value = eval_dict['loss'].item()
 76 |     for tag, value in zip(tb_tags, [val_top1_value, val_top5_value, val_loss_value]):
 77 |         tb_writer.add_scalars(tag, {'Val': value}, iteration)
 78 |     engine.log(
 79 |         'val at epoch {}, top1={:.5f}, top5={:.5f}, loss={:.6f}'.format(epoch, val_top1_value,
 80 |                                                                              val_top5_value,
 81 |                                                                              val_loss_value))
 82 |     model.train()
 83 | 
 84 | 
 85 | def get_criterion(cfg):
 86 |     return CrossEntropyLoss()   #TODO note this
 87 | 
 88 | 
 89 | def ding_test(cfg:BaseConfigByEpoch, net=None, val_dataloader=None, show_variables=False, convbuilder=None,
 90 |                init_hdf5=None, extra_msg=None, weights_dict=None):
 91 | 
 92 |     with Engine(local_rank=0, for_val_only=True) as engine:
 93 | 
 94 |         engine.setup_log(
 95 |             name='test', log_dir='./', file_name=DETAIL_LOG_FILE)
 96 | 
 97 |         if convbuilder is None:
 98 |             convbuilder = ConvBuilder(base_config=cfg)
 99 | 
100 |         if net is None:
101 |             net_fn = get_model_fn(cfg.dataset_name, cfg.network_type)
102 |             model = net_fn(cfg, convbuilder).cuda()
103 |         else:
104 |             model = net.cuda()
105 | 
106 |         if val_dataloader is None:
107 |             val_data = create_dataset(cfg.dataset_name, cfg.dataset_subset,
108 |                                       global_batch_size=cfg.global_batch_size, distributed=False)
109 |         num_examples = num_val_examples(cfg.dataset_name)
110 |         assert num_examples % cfg.global_batch_size == 0
111 |         val_iters = num_val_examples(cfg.dataset_name) // cfg.global_batch_size
112 |         print('batchsize={}, {} iters'.format(cfg.global_batch_size, val_iters))
113 | 
114 |         criterion = get_criterion(cfg).cuda()
115 | 
116 |         engine.register_state(
117 |             scheduler=None, model=model, optimizer=None)
118 | 
119 |         if show_variables:
120 |             engine.show_variables()
121 | 
122 |         assert not engine.distributed
123 | 
124 |         if weights_dict is not None:
125 |             engine.load_from_weights_dict(weights_dict)
126 |         else:
127 |             if cfg.init_weights:
128 |                 engine.load_checkpoint(cfg.init_weights)
129 |             if init_hdf5:
130 |                 engine.load_hdf5(init_hdf5)
131 | 
132 |         # engine.save_by_order('smi2_by_order.hdf5')
133 |         # engine.load_by_order('smi2_by_order.hdf5')
134 |         # engine.save_hdf5('model_files/stami2_lrs4Z.hdf5')
135 | 
136 |         model.eval()
137 |         eval_dict, total_net_time = run_eval(val_data, val_iters, model, criterion, 'TEST', dataset_name=cfg.dataset_name)
138 |         val_top1_value = eval_dict['top1'].item()
139 |         val_top5_value = eval_dict['top5'].item()
140 |         val_loss_value = eval_dict['loss'].item()
141 | 
142 |         msg = '{},{},{},top1={:.5f},top5={:.5f},loss={:.7f},total_net_time={}'.format(cfg.network_type, init_hdf5 or cfg.init_weights, cfg.dataset_subset,
143 |                                                                     val_top1_value, val_top5_value, val_loss_value, total_net_time)
144 |         if extra_msg is not None:
145 |             msg += ', ' + extra_msg
146 |         log_important(msg, OVERALL_LOG_FILE)
147 |         return eval_dict
148 | 
149 | 
150 | def general_test(network_type, weights, builder=None, net=None, dataset_name=None, weights_dict=None,
151 |                  batch_size=None):
152 |     if weights is None or weights == 'None':
153 |         init_weights = None
154 |         init_hdf5 = None
155 |     elif weights.endswith('.hdf5'):
156 |         init_weights = None
157 |         init_hdf5 = weights
158 |     else:
159 |         init_weights = weights
160 |         init_hdf5 = None
161 | 
162 |     if init_hdf5 is not None:
163 |         deps = extract_deps_from_weights_file(init_hdf5)
164 |     else:
165 |         deps = None
166 | 
167 |     if deps is None and ('wrnc16' in network_type or 'wrnh16' in network_type):
168 |         from constants import wrn_origin_deps_flattened
169 |         deps = wrn_origin_deps_flattened(2, 8)
170 | 
171 | 
172 |     if batch_size is None:
173 |         batch_size = TEST_BATCH_SIZE
174 |     test_config = get_baseconfig_for_test(network_type=network_type, dataset_subset='val', global_batch_size=batch_size,
175 |                                           init_weights=init_weights, deps=deps, dataset_name=dataset_name)
176 |     return ding_test(cfg=test_config, net=net, show_variables=True, init_hdf5=init_hdf5, convbuilder=builder, weights_dict=weights_dict)
177 | 
178 | 
179 | if __name__ == '__main__':
180 | 
181 |     import sys
182 |     network_type = 'resnet50'
183 |     weights = sys.argv[1]
184 |     dataset_name='imagenet_standard'
185 |     from acnet.acnet_builder import ACNetBuilder
186 |     builder = ACNetBuilder(base_config=None, deploy=False, gamma_init=1/3)
187 | 
188 |     general_test(network_type=network_type, weights=weights, builder=builder,
189 |                  dataset_name=dataset_name)


--------------------------------------------------------------------------------