├── __init__.py ├── base_model ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-35.pyc │ └── mobilenetv1.cpython-35.pyc ├── lenet5.py ├── cfqk.py ├── mobilenetv1.py ├── vgg.py ├── resnet.py └── wrn.py ├── deprecated ├── __init__.py ├── base_model │ ├── __init__.py │ ├── __pycache__ │ │ ├── cfqk.cpython-35.pyc │ │ ├── vgg.cpython-35.pyc │ │ ├── wrn.cpython-35.pyc │ │ ├── lenet5.cpython-35.pyc │ │ ├── resnet.cpython-35.pyc │ │ ├── __init__.cpython-35.pyc │ │ └── mobilenetv1.cpython-35.pyc │ ├── lenet5.py │ ├── cfqk.py │ ├── mobilenetv1.py │ ├── vgg.py │ ├── wrn.py │ └── resnet.py ├── utils │ ├── __pycache__ │ │ ├── comm.cpython-35.pyc │ │ ├── misc.cpython-35.pyc │ │ ├── engine.cpython-35.pyc │ │ ├── logger.cpython-35.pyc │ │ ├── pyt_utils.cpython-35.pyc │ │ ├── checkpoint.cpython-35.pyc │ │ └── lr_scheduler.cpython-35.pyc │ ├── logger.py │ ├── timer.py │ ├── pyt_utils.py │ ├── loss.py │ ├── checkpoint.py │ ├── torch_utils.py │ ├── lr_scheduler.py │ ├── comm.py │ └── misc.py ├── acnet │ ├── __pycache__ │ │ └── acnet_builder.cpython-35.pyc │ ├── acnet_test.py │ ├── acnet_rc56.py │ ├── acnet_cfqkbnc.py │ ├── acnet_vc.py │ ├── acnet_wrnc16.py │ └── acnet_fusion.py ├── custom_layers │ ├── __pycache__ │ │ ├── se_block.cpython-35.pyc │ │ └── flatten_layer.cpython-35.pyc │ ├── flatten_layer.py │ └── se_block.py ├── show_log.py ├── display_hdf5.py ├── model_map.py ├── README.md ├── base_config.py ├── dataset.py ├── builder.py ├── ding_test.py └── constants.py ├── .gitignore ├── utils ├── __pycache__ │ ├── comm.cpython-35.pyc │ ├── misc.cpython-35.pyc │ ├── misc.cpython-37.pyc │ ├── engine.cpython-35.pyc │ ├── logger.cpython-35.pyc │ ├── checkpoint.cpython-35.pyc │ ├── pyt_utils.cpython-35.pyc │ └── lr_scheduler.cpython-35.pyc ├── logger.py ├── timer.py ├── pyt_utils.py ├── loss.py ├── torch_utils.py ├── comm.py ├── checkpoint.py └── lr_scheduler.py ├── custom_layers ├── __pycache__ │ ├── se_block.cpython-35.pyc │ └── flatten_layer.cpython-35.pyc ├── max_layer.py ├── flatten_layer.py ├── pad_layer.py ├── se_block.py ├── scale_layer.py └── crop_layer.py ├── nobn_builder.py ├── LICENSE ├── data ├── dataset_util.py ├── imagenet_data.py └── data_factory.py ├── model_map.py ├── display_hdf5.py ├── acnet ├── acnet_fusion.py ├── acnet_builder.py ├── do_acnet.py ├── acb.py └── acnet_test.py ├── show_log.py ├── base_config.py └── ndp_test.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /base_model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /deprecated/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /deprecated/base_model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #*.checkpoint 2 | #*.log 3 | #*log/* 4 | 5 | .idea/workspace.xml 6 | .idea/* 7 | -------------------------------------------------------------------------------- /utils/__pycache__/comm.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/comm.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/misc.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/misc.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/misc.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/misc.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/engine.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/engine.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/logger.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/logger.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/checkpoint.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/checkpoint.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/pyt_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/pyt_utils.cpython-35.pyc -------------------------------------------------------------------------------- /base_model/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/base_model/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/lr_scheduler.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/utils/__pycache__/lr_scheduler.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/utils/__pycache__/comm.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/utils/__pycache__/comm.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/utils/__pycache__/misc.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/utils/__pycache__/misc.cpython-35.pyc -------------------------------------------------------------------------------- /base_model/__pycache__/mobilenetv1.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/base_model/__pycache__/mobilenetv1.cpython-35.pyc -------------------------------------------------------------------------------- /custom_layers/__pycache__/se_block.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/custom_layers/__pycache__/se_block.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/utils/__pycache__/engine.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/utils/__pycache__/engine.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/utils/__pycache__/logger.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/utils/__pycache__/logger.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/base_model/__pycache__/cfqk.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/base_model/__pycache__/cfqk.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/base_model/__pycache__/vgg.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/base_model/__pycache__/vgg.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/base_model/__pycache__/wrn.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/base_model/__pycache__/wrn.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/utils/__pycache__/pyt_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/utils/__pycache__/pyt_utils.cpython-35.pyc -------------------------------------------------------------------------------- /custom_layers/__pycache__/flatten_layer.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/custom_layers/__pycache__/flatten_layer.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/base_model/__pycache__/lenet5.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/base_model/__pycache__/lenet5.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/base_model/__pycache__/resnet.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/base_model/__pycache__/resnet.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/utils/__pycache__/checkpoint.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/utils/__pycache__/checkpoint.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/utils/__pycache__/lr_scheduler.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/utils/__pycache__/lr_scheduler.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/acnet/__pycache__/acnet_builder.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/acnet/__pycache__/acnet_builder.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/base_model/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/base_model/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/base_model/__pycache__/mobilenetv1.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/base_model/__pycache__/mobilenetv1.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/custom_layers/__pycache__/se_block.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/custom_layers/__pycache__/se_block.cpython-35.pyc -------------------------------------------------------------------------------- /deprecated/custom_layers/__pycache__/flatten_layer.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DingXiaoH/ACNet/HEAD/deprecated/custom_layers/__pycache__/flatten_layer.cpython-35.pyc -------------------------------------------------------------------------------- /custom_layers/max_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class MaxLayer(nn.Module): 5 | 6 | def __init__(self): 7 | super(MaxLayer, self).__init__() 8 | 9 | def forward(self, a, b): 10 | return torch.max(a, b) 11 | -------------------------------------------------------------------------------- /custom_layers/flatten_layer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class FlattenLayer(nn.Module): 4 | 5 | def __init__(self): 6 | super(FlattenLayer, self).__init__() 7 | 8 | def forward(self, inputs): 9 | return inputs.view(inputs.size(0), -1) 10 | -------------------------------------------------------------------------------- /deprecated/custom_layers/flatten_layer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class FlattenLayer(nn.Module): 4 | 5 | def __init__(self): 6 | super(FlattenLayer, self).__init__() 7 | 8 | def forward(self, inputs): 9 | return inputs.view(inputs.size(0), -1) 10 | -------------------------------------------------------------------------------- /custom_layers/pad_layer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | class PadLayer(nn.Module): 5 | 6 | # E.g., (-1, 0) means this layer should crop the first and last rows of the feature map. And (0, -1) crops the first and last columns 7 | def __init__(self, pad): 8 | super(PadLayer, self).__init__() 9 | self.pad = pad 10 | 11 | def forward(self, input): 12 | F.pad(input, [self.pad] * 4) -------------------------------------------------------------------------------- /deprecated/acnet/acnet_test.py: -------------------------------------------------------------------------------- 1 | from ding_test import general_test 2 | from acnet.acnet_fusion import convert_acnet_weights 3 | from acnet.acnet_builder import ACNetBuilder 4 | import sys 5 | 6 | def convert_and_test(network_type, train_weights): 7 | builder = ACNetBuilder(base_config=None, deploy=False) 8 | general_test(network_type=network_type, weights=train_weights, builder=builder) 9 | deploy_weights = train_weights.replace('.hdf5', '_deploy.hdf5') 10 | convert_acnet_weights(train_weights, deploy_weights=deploy_weights, eps=1e-5) 11 | builder.switch_to_deploy() 12 | general_test(network_type=network_type, weights=deploy_weights, builder=builder) 13 | 14 | if __name__ == '__main__': 15 | convert_and_test(sys.argv[1], sys.argv[2]) 16 | 17 | -------------------------------------------------------------------------------- /custom_layers/se_block.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | class SEBlock(nn.Module): 5 | 6 | def __init__(self, input_channels, internal_neurons): 7 | super(SEBlock, self).__init__() 8 | self.down = nn.Conv2d(in_channels=input_channels, out_channels=internal_neurons, kernel_size=1, stride=1, bias=True) 9 | self.up = nn.Conv2d(in_channels=internal_neurons, out_channels=input_channels, kernel_size=1, stride=1, bias=True) 10 | 11 | def forward(self, inputs): 12 | x = F.avg_pool2d(inputs, kernel_size=inputs.size(3)) 13 | x = self.down(x) 14 | x = F.relu(x) 15 | x = self.up(x) 16 | x = F.sigmoid(x) 17 | x = x.repeat(1, 1, inputs.size(2), inputs.size(3)) 18 | return inputs * x -------------------------------------------------------------------------------- /deprecated/custom_layers/se_block.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | class SEBlock(nn.Module): 5 | 6 | def __init__(self, input_channels, internal_neurons): 7 | super(SEBlock, self).__init__() 8 | self.down = nn.Conv2d(in_channels=input_channels, out_channels=internal_neurons, kernel_size=1, stride=1, bias=True) 9 | self.up = nn.Conv2d(in_channels=internal_neurons, out_channels=input_channels, kernel_size=1, stride=1, bias=True) 10 | 11 | def forward(self, inputs): 12 | x = F.avg_pool2d(inputs, kernel_size=inputs.size(3)) 13 | x = self.down(x) 14 | x = F.relu(x) 15 | x = self.up(x) 16 | x = F.sigmoid(x) 17 | x = x.repeat(1, 1, inputs.size(2), inputs.size(3)) 18 | return inputs * x -------------------------------------------------------------------------------- /custom_layers/scale_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.parameter import Parameter 3 | import torch.nn.init as init 4 | 5 | class ScaleLayer(torch.nn.Module): 6 | 7 | def __init__(self, num_features, use_bias=True): 8 | super(ScaleLayer, self).__init__() 9 | self.weight = Parameter(torch.Tensor(num_features)) 10 | init.ones_(self.weight) 11 | self.num_features = num_features 12 | 13 | if use_bias: 14 | self.bias = Parameter(torch.Tensor(num_features)) 15 | init.zeros_(self.bias) 16 | else: 17 | self.bias = None 18 | 19 | 20 | def forward(self, inputs): 21 | if self.bias is None: 22 | return inputs * self.weight.view(1, self.num_features, 1, 1) 23 | else: 24 | return inputs * self.weight.view(1, self.num_features, 1, 1) + self.bias -------------------------------------------------------------------------------- /nobn_builder.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from builder import ConvBuilder 3 | 4 | class NoBNBuilder(ConvBuilder): 5 | 6 | def __init__(self, base_config): 7 | super(NoBNBuilder, self).__init__(base_config=base_config) 8 | print('NoBN ConvBuilder initialized.') 9 | 10 | def Conv2dBN(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', use_original_conv=False): 11 | conv_layer = self.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, 12 | stride=stride, padding=padding, dilation=dilation, groups=groups, 13 | bias=True, padding_mode=padding_mode, use_original_conv=use_original_conv) 14 | se = self.Sequential() 15 | se.add_module('conv', conv_layer) 16 | return se 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import coloredlogs 3 | import os 4 | 5 | 6 | def get_logger(name='', save_dir=None, distributed_rank=0, filename="log.txt"): 7 | logger = logging.getLogger(name) 8 | coloredlogs.install(level='DEBUG', logger=logger) 9 | # logger.setLevel(logging.DEBUG) 10 | # don't log results for the non-master process 11 | if distributed_rank > 0: 12 | return logger 13 | formatter = logging.Formatter( 14 | "%(asctime)s %(name)s %(levelname)s: %(message)s") 15 | 16 | # ch = logging.StreamHandler(stream=sys.stdout) 17 | # ch.setLevel(logging.DEBUG) 18 | # ch.setFormatter(formatter) 19 | # logger.addHandler(ch) 20 | 21 | if save_dir: 22 | fh = logging.FileHandler(os.path.join(save_dir, filename)) 23 | fh.setLevel(logging.DEBUG) 24 | fh.setFormatter(formatter) 25 | if len(logger.handlers) > 0: 26 | logger.removeHandler(logger.handlers[0]) 27 | logger.addHandler(fh) 28 | 29 | return logger 30 | 31 | -------------------------------------------------------------------------------- /custom_layers/crop_layer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class CropLayer(nn.Module): 4 | 5 | # E.g., (-1, 0) means this layer should crop the first and last rows of the feature map. And (0, -1) crops the first and last columns 6 | def __init__(self, crop_set): 7 | super(CropLayer, self).__init__() 8 | self.rows_to_crop = - crop_set[0] 9 | self.cols_to_crop = - crop_set[1] 10 | assert self.rows_to_crop >= 0 11 | assert self.cols_to_crop >= 0 12 | 13 | def forward(self, input): 14 | if self.rows_to_crop == 0 and self.cols_to_crop == 0: 15 | return input 16 | elif self.rows_to_crop > 0 and self.cols_to_crop == 0: 17 | return input[:, :, self.rows_to_crop:-self.rows_to_crop, :] 18 | elif self.rows_to_crop == 0 and self.cols_to_crop > 0: 19 | return input[:, :, :, self.cols_to_crop:-self.cols_to_crop] 20 | else: 21 | return input[:, :, self.rows_to_crop:-self.rows_to_crop, self.cols_to_crop:-self.cols_to_crop] -------------------------------------------------------------------------------- /deprecated/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import coloredlogs 3 | import os 4 | 5 | 6 | def get_logger(name='', save_dir=None, distributed_rank=0, filename="log.txt"): 7 | logger = logging.getLogger(name) 8 | coloredlogs.install(level='DEBUG', logger=logger) 9 | # logger.setLevel(logging.DEBUG) 10 | # don't log results for the non-master process 11 | if distributed_rank > 0: 12 | return logger 13 | formatter = logging.Formatter( 14 | "%(asctime)s %(name)s %(levelname)s: %(message)s") 15 | 16 | # ch = logging.StreamHandler(stream=sys.stdout) 17 | # ch.setLevel(logging.DEBUG) 18 | # ch.setFormatter(formatter) 19 | # logger.addHandler(ch) 20 | 21 | if save_dir: 22 | fh = logging.FileHandler(os.path.join(save_dir, filename)) 23 | fh.setLevel(logging.DEBUG) 24 | fh.setFormatter(formatter) 25 | if len(logger.handlers) > 0: 26 | logger.removeHandler(logger.handlers[0]) 27 | logger.addHandler(fh) 28 | 29 | return logger 30 | 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Ding Xiaohan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /data/dataset_util.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | import torch 3 | 4 | class DataIterator(object): 5 | 6 | def __init__(self, dataloader): 7 | self.dataloader = dataloader 8 | self.iterator = enumerate(self.dataloader) 9 | 10 | def __next__(self): 11 | try: 12 | _, data = next(self.iterator) 13 | except Exception: 14 | self.iterator = enumerate(self.dataloader) 15 | _, data = next(self.iterator) 16 | return data[0], data[1] 17 | 18 | 19 | class InfiniteDataLoader(torch.utils.data.DataLoader): 20 | def __init__(self, *args, **kwargs): 21 | super().__init__(*args, **kwargs) 22 | # Initialize an iterator over the dataset. 23 | self.dataset_iterator = super().__iter__() 24 | 25 | def __iter__(self): 26 | return self 27 | 28 | def __next__(self): 29 | try: 30 | batch = next(self.dataset_iterator) 31 | except StopIteration: 32 | # Dataset exhausted, use a new fresh iterator. 33 | self.dataset_iterator = super().__iter__() 34 | batch = next(self.dataset_iterator) 35 | return batch 36 | -------------------------------------------------------------------------------- /base_model/lenet5.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from builder import ConvBuilder 3 | 4 | 5 | class LeNet5BN(nn.Module): 6 | 7 | def __init__(self, builder:ConvBuilder, deps): 8 | super(LeNet5BN, self).__init__() 9 | self.bd = builder 10 | stem = builder.Sequential() 11 | stem.add_module('conv1', builder.Conv2dBNReLU(in_channels=1, out_channels=deps[0], kernel_size=5)) 12 | stem.add_module('maxpool1', builder.Maxpool2d(kernel_size=2)) 13 | stem.add_module('conv2', builder.Conv2dBNReLU(in_channels=deps[0], out_channels=deps[1], kernel_size=5)) 14 | stem.add_module('maxpool2', builder.Maxpool2d(kernel_size=2)) 15 | self.stem = stem 16 | self.flatten = builder.Flatten() 17 | self.linear1 = builder.IntermediateLinear(in_features=deps[1] * 16, out_features=500) 18 | self.relu1 = builder.ReLU() 19 | self.linear2 = builder.Linear(in_features=500, out_features=10) 20 | 21 | def forward(self, x): 22 | out = self.stem(x) 23 | out = self.flatten(out) 24 | out = self.linear1(out) 25 | out = self.relu1(out) 26 | out = self.linear2(out) 27 | return out 28 | 29 | 30 | def create_lenet5bn(cfg, builder): 31 | return LeNet5BN(builder=builder, deps=cfg.deps) 32 | -------------------------------------------------------------------------------- /utils/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | import time 5 | import datetime 6 | 7 | 8 | class Timer(object): 9 | def __init__(self): 10 | self.reset() 11 | 12 | @property 13 | def average_time(self): 14 | return self.total_time / self.calls if self.calls > 0 else 0.0 15 | 16 | def tic(self): 17 | # using time.time instead of time.clock because time time.clock 18 | # does not normalize for multithreading 19 | self.start_time = time.time() 20 | 21 | def toc(self, average=True): 22 | self.add(time.time() - self.start_time) 23 | if average: 24 | return self.average_time 25 | else: 26 | return self.diff 27 | 28 | def add(self, time_diff): 29 | self.diff = time_diff 30 | self.total_time += self.diff 31 | self.calls += 1 32 | 33 | def reset(self): 34 | self.total_time = 0.0 35 | self.calls = 0 36 | self.start_time = 0.0 37 | self.diff = 0.0 38 | 39 | def avg_time_str(self): 40 | time_str = str(datetime.timedelta(seconds=self.average_time)) 41 | return time_str 42 | 43 | 44 | def get_time_str(time_diff): 45 | time_str = str(datetime.timedelta(seconds=time_diff)) 46 | return time_str 47 | -------------------------------------------------------------------------------- /deprecated/utils/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | import time 5 | import datetime 6 | 7 | 8 | class Timer(object): 9 | def __init__(self): 10 | self.reset() 11 | 12 | @property 13 | def average_time(self): 14 | return self.total_time / self.calls if self.calls > 0 else 0.0 15 | 16 | def tic(self): 17 | # using time.time instead of time.clock because time time.clock 18 | # does not normalize for multithreading 19 | self.start_time = time.time() 20 | 21 | def toc(self, average=True): 22 | self.add(time.time() - self.start_time) 23 | if average: 24 | return self.average_time 25 | else: 26 | return self.diff 27 | 28 | def add(self, time_diff): 29 | self.diff = time_diff 30 | self.total_time += self.diff 31 | self.calls += 1 32 | 33 | def reset(self): 34 | self.total_time = 0.0 35 | self.calls = 0 36 | self.start_time = 0.0 37 | self.diff = 0.0 38 | 39 | def avg_time_str(self): 40 | time_str = str(datetime.timedelta(seconds=self.average_time)) 41 | return time_str 42 | 43 | 44 | def get_time_str(time_diff): 45 | time_str = str(datetime.timedelta(seconds=time_diff)) 46 | return time_str 47 | -------------------------------------------------------------------------------- /deprecated/base_model/lenet5.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from builder import ConvBuilder 3 | 4 | LENET5_DEPS = [20, 50, 500] 5 | 6 | class LeNet5(nn.Module): 7 | 8 | def __init__(self, builder:ConvBuilder, deps): 9 | super(LeNet5, self).__init__() 10 | self.bd = builder 11 | stem = builder.Sequential() 12 | stem.add_module('conv1', builder.Conv2d(in_channels=1, out_channels=LENET5_DEPS[0], kernel_size=5, bias=True)) 13 | stem.add_module('maxpool1', builder.Maxpool2d(kernel_size=2)) 14 | stem.add_module('conv2', builder.Conv2d(in_channels=LENET5_DEPS[0], out_channels=LENET5_DEPS[1], kernel_size=5, bias=True)) 15 | stem.add_module('maxpool2', builder.Maxpool2d(kernel_size=2)) 16 | self.stem = stem 17 | self.flatten = builder.Flatten() 18 | self.linear1 = builder.Linear(in_features=LENET5_DEPS[1] * 16, out_features=LENET5_DEPS[2]) 19 | self.relu1 = builder.ReLU() 20 | self.linear2 = builder.Linear(in_features=LENET5_DEPS[2], out_features=10) 21 | 22 | def forward(self, x): 23 | out = self.stem(x) 24 | # print(out.size()) 25 | out = self.flatten(out) 26 | out = self.linear1(out) 27 | out = self.relu1(out) 28 | out = self.linear2(out) 29 | return out 30 | 31 | 32 | def create_lenet5(cfg, builder): 33 | return LeNet5(builder=builder, deps=cfg.deps) 34 | -------------------------------------------------------------------------------- /model_map.py: -------------------------------------------------------------------------------- 1 | from base_model.mobilenetv1 import * 2 | from base_model.stagewise_resnet import * 3 | from base_model.vgg import * 4 | from base_model.lenet5 import create_lenet5bn 5 | from base_model.wrn import create_wrnc16plain 6 | from base_model.resnet import create_ResNet18, create_ResNet34 7 | from base_model.cfqk import create_CFQKBNC 8 | 9 | IMAGENET_STANDARD_MODEL_MAP = { 10 | 'sres50': create_SResNet50, 11 | 'smi1': create_MobileV1Imagenet, 12 | 'sres18': create_ResNet18, 13 | 'sres34': create_ResNet34 14 | } 15 | 16 | CIFAR10_MODEL_MAP = { 17 | 'src56':create_SRC56, 18 | 'src110':create_SRC110, 19 | 'vc':create_vc, 20 | 'wrnc16plain':create_wrnc16plain, 21 | 'cfqkbnc':create_CFQKBNC 22 | } 23 | 24 | MNIST_MODEL_MAP = { 25 | 'lenet5bn': create_lenet5bn, 26 | } 27 | 28 | DATASET_TO_MODEL_MAP = { 29 | 'imagenet_standard': IMAGENET_STANDARD_MODEL_MAP, 30 | 'cifar10': CIFAR10_MODEL_MAP, 31 | 'mnist': MNIST_MODEL_MAP 32 | } 33 | 34 | 35 | # return the model creation function 36 | def get_model_fn(dataset_name, model_name): 37 | # print(DATASET_TO_MODEL_MAP[dataset_name.replace('_blank', '_standard')].keys()) 38 | return DATASET_TO_MODEL_MAP[dataset_name.replace('_blank', '_standard')][model_name] 39 | 40 | def get_dataset_name_by_model_name(model_name): 41 | for dataset_name, model_map in DATASET_TO_MODEL_MAP.items(): 42 | if model_name in model_map: 43 | return dataset_name 44 | return None -------------------------------------------------------------------------------- /deprecated/show_log.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import re 3 | import numpy as np 4 | 5 | top1_pattern = re.compile('top1=(\-*\d+(?:\.\d+)?)') 6 | top5_pattern = re.compile('top5=(\-*\d+(?:\.\d+)?)') 7 | loss_pattern = re.compile('loss=(\-*\d+(?:\.\d+)?)') 8 | 9 | 10 | def get_value_by_pattern(pattern, line): 11 | return float(re.findall(pattern, line)[0]) 12 | 13 | def parse_top1_top5_loss_from_log_line(log_line): 14 | top1 = get_value_by_pattern(top1_pattern, log_line) 15 | top5 = get_value_by_pattern(top5_pattern, log_line) 16 | loss = get_value_by_pattern(loss_pattern, log_line) 17 | return top1, top5, loss 18 | 19 | 20 | root_dir = 'acnet_exps' 21 | num_logs = 10 22 | 23 | log_files = glob.glob('{}/*/log.txt'.format(root_dir)) 24 | 25 | 26 | 27 | for file_path in log_files: 28 | top1_list = [] 29 | top5_list = [] 30 | loss_list = [] 31 | with open(file_path, 'r') as f: 32 | origin_lines = f.readlines() 33 | log_lines = [l for l in origin_lines if 'top1' in l] 34 | last_lines = log_lines[-num_logs:] 35 | for l in last_lines: 36 | top1, top5, loss = parse_top1_top5_loss_from_log_line(l) 37 | top1_list.append(top1) 38 | top5_list.append(top5) 39 | loss_list.append(loss) 40 | network_try_arg = file_path.split('/')[1].replace('_train', '') 41 | print('{}, \t top1={:.3f}, \t top5={:.3f}, \t loss={:.5f}, \t {} logs'.format(network_try_arg, np.mean(top1_list), np.mean(top5_list), np.mean(loss_list), len(top1_list))) 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /base_model/cfqk.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from builder import ConvBuilder 3 | from constants import CFQK_ORIGIN_DEPS 4 | 5 | 6 | class CFQKBN(nn.Module): 7 | 8 | def __init__(self, num_classes, builder:ConvBuilder, deps=None): 9 | super(CFQKBN, self).__init__() 10 | if deps is None: 11 | deps = CFQK_ORIGIN_DEPS 12 | self.bd = builder 13 | self.conv1 = self.bd.Conv2dBNReLU(in_channels=3, out_channels=deps[0], kernel_size=5, stride=1, padding=2) 14 | self.conv2 = self.bd.Conv2dBNReLU(in_channels=deps[0], out_channels=deps[1], kernel_size=5, stride=1, padding=2) 15 | self.conv3 = self.bd.Conv2dBNReLU(in_channels=deps[1], out_channels=deps[2], kernel_size=5, stride=1, padding=2) 16 | self.fc1 = self.bd.Linear(in_features=3*3*deps[2], out_features=64) 17 | self.fc2 = self.bd.Linear(in_features=64, out_features=num_classes) 18 | 19 | def forward(self, x): 20 | x = self.conv1(x) # 32 21 | x = self.bd.max_pool2d(x, kernel_size=3, stride=2, padding=0) #15 22 | x = self.conv2(x) 23 | x = self.bd.avg_pool2d(x, kernel_size=3, stride=2, padding=0) #7 24 | x = self.conv3(x) 25 | x = self.bd.avg_pool2d(x, kernel_size=3, stride=2, padding=0) #3 26 | x = self.bd.flatten(x) 27 | x = self.fc1(x) 28 | x = self.bd.relu(x) 29 | x = self.fc2(x) 30 | return x 31 | 32 | def create_CFQKBNC(cfg, builder): 33 | return CFQKBN(num_classes=10, builder=builder, deps=cfg.deps) 34 | 35 | def create_CFQKBNH(cfg, builder): 36 | return CFQKBN(num_classes=100, builder=builder, deps=cfg.deps) -------------------------------------------------------------------------------- /deprecated/base_model/cfqk.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from builder import ConvBuilder 3 | from constants import CFQK_ORIGIN_DEPS 4 | 5 | 6 | class CFQKBN(nn.Module): 7 | 8 | def __init__(self, num_classes, builder:ConvBuilder, deps=None): 9 | super(CFQKBN, self).__init__() 10 | if deps is None: 11 | deps = CFQK_ORIGIN_DEPS 12 | self.bd = builder 13 | self.conv1 = self.bd.Conv2dBNReLU(in_channels=3, out_channels=deps[0], kernel_size=5, stride=1, padding=2) 14 | self.conv2 = self.bd.Conv2dBNReLU(in_channels=deps[0], out_channels=deps[1], kernel_size=5, stride=1, padding=2) 15 | self.conv3 = self.bd.Conv2dBNReLU(in_channels=deps[1], out_channels=deps[2], kernel_size=5, stride=1, padding=2) 16 | self.fc1 = self.bd.Linear(in_features=3*3*deps[2], out_features=64) 17 | self.fc2 = self.bd.Linear(in_features=64, out_features=num_classes) 18 | 19 | def forward(self, x): 20 | x = self.conv1(x) # 32 21 | x = self.bd.max_pool2d(x, kernel_size=3, stride=2, padding=0) #15 22 | x = self.conv2(x) 23 | x = self.bd.avg_pool2d(x, kernel_size=3, stride=2, padding=0) #7 24 | x = self.conv3(x) 25 | x = self.bd.avg_pool2d(x, kernel_size=3, stride=2, padding=0) #3 26 | x = self.bd.flatten(x) 27 | x = self.fc1(x) 28 | x = self.bd.relu(x) 29 | x = self.fc2(x) 30 | return x 31 | 32 | def create_CFQKBNC(cfg, builder): 33 | return CFQKBN(num_classes=10, builder=builder, deps=cfg.deps) 34 | 35 | def create_CFQKBNH(cfg, builder): 36 | return CFQKBN(num_classes=100, builder=builder, deps=cfg.deps) -------------------------------------------------------------------------------- /deprecated/display_hdf5.py: -------------------------------------------------------------------------------- 1 | from utils.misc import read_hdf5 2 | import sys 3 | import numpy as np 4 | 5 | di = read_hdf5(sys.argv[1]) 6 | num_kernel_params = 0 7 | 8 | conv_kernel_cnt = 0 9 | matrix_param_cnt = 0 10 | vec_param_cnt = 0 11 | 12 | bias_cnt = 0 13 | beta_cnt = 0 14 | gamma_cnt = 0 15 | mu_cnt = 0 16 | var_cnt = 0 17 | 18 | for name, array in di.items(): 19 | if array.ndim in [2, 4]: 20 | num_kernel_params += array.size 21 | print(name, array.shape, np.mean(array), np.std(array), ' positive {}, negative {}, zeros {}'.format(np.sum(array > 0), np.sum(array < 0), np.sum(array == 0))) 22 | if 'res' in name: 23 | print(array[:4, :4]) 24 | elif 'diag' in name: 25 | print(array) 26 | if array.ndim == 2: 27 | matrix_param_cnt += array.size 28 | elif array.ndim == 1: 29 | vec_param_cnt += array.size 30 | elif array.ndim == 4: 31 | conv_kernel_cnt += array.size 32 | if 'running_mean' in name or 'moving_mean' in name: 33 | mu_cnt += array.size 34 | elif 'running_var' in name or 'moving_var' in name: 35 | var_cnt += array.size 36 | elif ('weight' in name and 'bn' in name.lower()) or 'gamma' in name: 37 | gamma_cnt += array.size 38 | elif ('bias' in name and 'bn' in name.lower()) or 'beta' in name: 39 | beta_cnt += array.size 40 | elif 'bias' in name: 41 | bias_cnt += array.size 42 | 43 | # if 'resmat' in name: 44 | # print(np.transpose(array).dot(array)) 45 | # exit() 46 | print('number of kernel params: ', num_kernel_params) 47 | print('vec {}, matrix {}, conv {}, total {}'.format(vec_param_cnt, matrix_param_cnt, conv_kernel_cnt, 48 | vec_param_cnt + matrix_param_cnt + conv_kernel_cnt)) 49 | print('mu {}, var {}, gamma {}, beta {}, bias {}'.format(mu_cnt, var_cnt, gamma_cnt, beta_cnt, bias_cnt)) 50 | -------------------------------------------------------------------------------- /display_hdf5.py: -------------------------------------------------------------------------------- 1 | from utils.misc import read_hdf5 2 | import sys 3 | import numpy as np 4 | 5 | di = read_hdf5(sys.argv[1]) 6 | num_kernel_params = 0 7 | 8 | conv_kernel_cnt = 0 9 | matrix_param_cnt = 0 10 | vec_param_cnt = 0 11 | 12 | bias_cnt = 0 13 | beta_cnt = 0 14 | gamma_cnt = 0 15 | mu_cnt = 0 16 | var_cnt = 0 17 | 18 | for name, array in di.items(): 19 | if array.ndim in [2, 4]: 20 | num_kernel_params += array.size 21 | 22 | if 'base_mask' in name: 23 | print(name, array) 24 | 25 | print(name, array.shape, np.mean(array), np.std(array), 26 | ' positive {}, negative {}, zeros {}, near-zero {}'.format(np.sum(array > 0), np.sum(array < 0), np.sum(array == 0), 27 | np.sum(np.abs(array) <= 1e-5))) 28 | 29 | if array.ndim == 2: 30 | matrix_param_cnt += array.size 31 | elif array.ndim == 1: 32 | vec_param_cnt += array.size 33 | elif array.ndim == 4: 34 | conv_kernel_cnt += array.size 35 | if 'running_mean' in name or 'moving_mean' in name: 36 | mu_cnt += array.size 37 | elif 'running_var' in name or 'moving_var' in name: 38 | var_cnt += array.size 39 | elif ('weight' in name and 'bn' in name.lower()) or 'gamma' in name: 40 | gamma_cnt += array.size 41 | elif ('bias' in name and 'bn' in name.lower()) or 'beta' in name: 42 | beta_cnt += array.size 43 | elif 'bias' in name: 44 | bias_cnt += array.size 45 | elif 'spatial_mask' in name: 46 | print(array) 47 | print(np.sum(array)) 48 | 49 | print('number of kernel params: ', num_kernel_params) 50 | print('vec {}, matrix {}, conv {}, total {}'.format(vec_param_cnt, matrix_param_cnt, conv_kernel_cnt, 51 | vec_param_cnt + matrix_param_cnt + conv_kernel_cnt)) 52 | print('mu {}, var {}, gamma {}, beta {}, bias {}'.format(mu_cnt, var_cnt, gamma_cnt, beta_cnt, bias_cnt)) 53 | -------------------------------------------------------------------------------- /deprecated/model_map.py: -------------------------------------------------------------------------------- 1 | from base_model.resnet import * 2 | from base_model.cfqk import * 3 | from base_model.wrn import * 4 | from base_model.mobilenetv1 import * 5 | from base_model.lenet5 import create_lenet5 6 | from base_model.vgg import create_vc, create_vh 7 | 8 | IMAGENET_MODEL_MAP = { 9 | 'resnet18':create_ResNet18, 10 | 'resnet34':create_ResNet34, 11 | 'resnet50':create_ResNet50, 12 | 'resnet101':create_ResNet101, 13 | 'resnet152':create_ResNet152, 14 | } 15 | 16 | 17 | CIFAR10_MODEL_MAP = { 18 | 'rc56':create_RC56, 19 | 'rc110':create_RC110, 20 | 'rc164':create_RC164, 21 | 22 | 'cfqkbnc':create_CFQKBNC, 23 | 24 | 'wrnc16plain':create_wrnc16plain, 25 | 'wrnc16drop':create_wrnc16drop, 26 | 'wrnc28plain':create_wrnc28plain, 27 | 'wrnc28drop':create_wrnc28drop, 28 | 'wrnc40plain':create_wrnc40plain, 29 | 'wrnc40drop':create_wrnc40drop, 30 | 31 | 'mc1':create_MobileV1Cifar, 32 | 'vc': create_vc 33 | 34 | } 35 | 36 | CH_MODEL_MAP = { 37 | 'rh56': create_RH56, 38 | 'rh110': create_RH110, 39 | 'rh164': create_RH164, 40 | 41 | 'cfqkbnh':create_CFQKBNH, 42 | 43 | 'wrnh16plain':create_wrnh16plain, 44 | 'wrnh16drop':create_wrnh16drop, 45 | 'wrnh28plain':create_wrnh28plain, 46 | 'wrnh28drop':create_wrnh28drop, 47 | 'wrnh40plain':create_wrnh40plain, 48 | 'wrnh40drop':create_wrnh40drop, 49 | 50 | 'mh1':create_MobileV1CH, 51 | 52 | 'vh':create_vh 53 | } 54 | 55 | MNIST_MODEL_MAP = { 56 | 'lenet5': create_lenet5 57 | } 58 | 59 | SVHN_MODEL_MAP = { 60 | 61 | } 62 | 63 | DATASET_TO_MODEL_MAP = { 64 | 'imagenet': IMAGENET_MODEL_MAP, 65 | 'cifar10': CIFAR10_MODEL_MAP, 66 | 'ch': CH_MODEL_MAP, #ch for cifar-100 67 | 'svhn': SVHN_MODEL_MAP, 68 | 'mnist': MNIST_MODEL_MAP 69 | } 70 | 71 | 72 | # return the model creation function 73 | def get_model_fn(dataset_name, model_name): 74 | return DATASET_TO_MODEL_MAP[dataset_name][model_name] 75 | 76 | def get_dataset_name_by_model_name(model_name): 77 | for dataset_name, model_map in DATASET_TO_MODEL_MAP.items(): 78 | if model_name in model_map: 79 | return dataset_name 80 | return None 81 | -------------------------------------------------------------------------------- /deprecated/acnet/acnet_rc56.py: -------------------------------------------------------------------------------- 1 | from ding_train import ding_train 2 | from base_config import get_baseconfig_by_epoch 3 | from utils.misc import start_exp 4 | from constants import parse_usual_lr_schedule 5 | 6 | def acnet_rc56(): 7 | try_arg = start_exp() 8 | 9 | network_type = 'rc56' 10 | dataset_name = 'cifar10' 11 | log_dir = 'acnet_exps/{}_{}_train'.format(network_type, try_arg) 12 | save_weights = 'acnet_exps/{}_{}_savedweights.pth'.format(network_type, try_arg) 13 | weight_decay_strength = 1e-4 14 | batch_size = 64 15 | 16 | lrs = parse_usual_lr_schedule(try_arg) 17 | 18 | if 'bias' in try_arg: 19 | weight_decay_bias = weight_decay_strength 20 | else: 21 | weight_decay_bias = 0 22 | 23 | if 'warmup' in try_arg: 24 | warmup_factor = 0 25 | else: 26 | warmup_factor = 1 27 | 28 | config = get_baseconfig_by_epoch(network_type=network_type, dataset_name=dataset_name, dataset_subset='train', 29 | global_batch_size=batch_size, num_node=1, 30 | weight_decay=weight_decay_strength, optimizer_type='sgd', momentum=0.9, 31 | max_epochs=lrs.max_epochs, base_lr=lrs.base_lr, lr_epoch_boundaries=lrs.lr_epoch_boundaries, 32 | lr_decay_factor=lrs.lr_decay_factor, 33 | warmup_epochs=5, warmup_method='linear', warmup_factor=warmup_factor, 34 | ckpt_iter_period=20000, tb_iter_period=100, output_dir=log_dir, 35 | tb_dir=log_dir, save_weights=save_weights, val_epoch_period=2, linear_final_lr=lrs.linear_final_lr, 36 | weight_decay_bias=weight_decay_bias) 37 | 38 | if 'normal' in try_arg: 39 | builder = None 40 | elif 'acnet' in try_arg: 41 | from acnet.acnet_builder import ACNetBuilder 42 | builder = ACNetBuilder(base_config=config, deploy=False) 43 | else: 44 | assert False 45 | 46 | ding_train(config, show_variables=True, convbuilder=builder, use_nesterov='nest' in try_arg) 47 | 48 | 49 | if __name__ == '__main__': 50 | acnet_rc56() -------------------------------------------------------------------------------- /deprecated/acnet/acnet_cfqkbnc.py: -------------------------------------------------------------------------------- 1 | from ding_train import ding_train 2 | from base_config import get_baseconfig_by_epoch 3 | from utils.misc import start_exp 4 | from constants import parse_usual_lr_schedule 5 | 6 | def acnet_cfqkbnc(): 7 | try_arg = start_exp() 8 | 9 | network_type = 'cfqkbnc' 10 | dataset_name = 'cifar10' 11 | log_dir = 'acnet_exps/{}_{}_train'.format(network_type, try_arg) 12 | save_weights = 'acnet_exps/{}_{}_savedweights.pth'.format(network_type, try_arg) 13 | weight_decay_strength = 1e-4 14 | batch_size = 64 15 | 16 | lrs = parse_usual_lr_schedule(try_arg) 17 | 18 | if 'bias' in try_arg: 19 | weight_decay_bias = weight_decay_strength 20 | else: 21 | weight_decay_bias = 0 22 | 23 | if 'warmup' in try_arg: 24 | warmup_factor = 0 25 | else: 26 | warmup_factor = 1 27 | 28 | config = get_baseconfig_by_epoch(network_type=network_type, dataset_name=dataset_name, dataset_subset='train', 29 | global_batch_size=batch_size, num_node=1, 30 | weight_decay=weight_decay_strength, optimizer_type='sgd', momentum=0.9, 31 | max_epochs=lrs.max_epochs, base_lr=lrs.base_lr, lr_epoch_boundaries=lrs.lr_epoch_boundaries, 32 | lr_decay_factor=lrs.lr_decay_factor, 33 | warmup_epochs=5, warmup_method='linear', warmup_factor=warmup_factor, 34 | ckpt_iter_period=20000, tb_iter_period=100, output_dir=log_dir, 35 | tb_dir=log_dir, save_weights=save_weights, val_epoch_period=2, linear_final_lr=lrs.linear_final_lr, 36 | weight_decay_bias=weight_decay_bias) 37 | 38 | if 'normal' in try_arg: 39 | builder = None 40 | elif 'acnet' in try_arg: 41 | from acnet.acnet_builder import ACNetBuilder 42 | builder = ACNetBuilder(base_config=config, deploy=False) 43 | else: 44 | assert False 45 | 46 | ding_train(config, show_variables=True, convbuilder=builder, use_nesterov='nest' in try_arg) 47 | 48 | 49 | if __name__ == '__main__': 50 | acnet_cfqkbnc() -------------------------------------------------------------------------------- /deprecated/acnet/acnet_vc.py: -------------------------------------------------------------------------------- 1 | from ding_train import ding_train 2 | from base_config import get_baseconfig_by_epoch 3 | from utils.misc import start_exp 4 | from constants import VGG_ORIGIN_DEPS, parse_usual_lr_schedule 5 | 6 | def acnet_vc(): 7 | try_arg = start_exp() 8 | 9 | network_type = 'vc' 10 | dataset_name = 'cifar10' 11 | log_dir = 'acnet_exps/{}_{}_train'.format(network_type, try_arg) 12 | save_weights = 'acnet_exps/{}_{}_savedweights.pth'.format(network_type, try_arg) 13 | weight_decay_strength = 1e-4 14 | batch_size = 64 15 | deps = VGG_ORIGIN_DEPS 16 | 17 | lrs = parse_usual_lr_schedule(try_arg) 18 | 19 | if 'bias' in try_arg: 20 | weight_decay_bias = weight_decay_strength 21 | else: 22 | weight_decay_bias = 0 23 | 24 | 25 | 26 | if 'warmup' in try_arg: 27 | warmup_factor = 0 28 | else: 29 | warmup_factor = 1 30 | 31 | config = get_baseconfig_by_epoch(network_type=network_type, dataset_name=dataset_name, dataset_subset='train', 32 | global_batch_size=batch_size, num_node=1, 33 | weight_decay=weight_decay_strength, optimizer_type='sgd', momentum=0.9, 34 | max_epochs=lrs.max_epochs, base_lr=lrs.base_lr, lr_epoch_boundaries=lrs.lr_epoch_boundaries, 35 | lr_decay_factor=lrs.lr_decay_factor, 36 | warmup_epochs=5, warmup_method='linear', warmup_factor=warmup_factor, 37 | ckpt_iter_period=20000, tb_iter_period=100, output_dir=log_dir, 38 | tb_dir=log_dir, save_weights=save_weights, val_epoch_period=2, linear_final_lr=lrs.linear_final_lr, 39 | weight_decay_bias=weight_decay_bias, deps=deps) 40 | 41 | if 'normal' in try_arg: 42 | builder = None 43 | elif 'acnet' in try_arg: 44 | from acnet.acnet_builder import ACNetBuilder 45 | builder = ACNetBuilder(base_config=config, deploy=False) 46 | else: 47 | assert False 48 | 49 | ding_train(config, show_variables=True, convbuilder=builder, use_nesterov='nest' in try_arg) 50 | 51 | 52 | 53 | if __name__ == '__main__': 54 | acnet_vc() -------------------------------------------------------------------------------- /deprecated/acnet/acnet_wrnc16.py: -------------------------------------------------------------------------------- 1 | from ding_train import ding_train 2 | from base_config import get_baseconfig_by_epoch 3 | from utils.misc import start_exp 4 | from constants import wrn_origin_deps_flattened, parse_usual_lr_schedule 5 | 6 | def acnet_wrnc16(): 7 | try_arg = start_exp() 8 | 9 | network_type = 'wrnc16plain' 10 | dataset_name = 'cifar10' 11 | log_dir = 'acnet_exps/{}_{}_train'.format(network_type, try_arg) 12 | save_weights = 'acnet_exps/{}_{}_savedweights.pth'.format(network_type, try_arg) 13 | weight_decay_strength = 5e-4 14 | batch_size = 128 15 | deps = wrn_origin_deps_flattened(2, 8) 16 | 17 | lrs = parse_usual_lr_schedule(try_arg) 18 | 19 | if 'bias' in try_arg: 20 | weight_decay_bias = weight_decay_strength 21 | else: 22 | weight_decay_bias = 0 23 | 24 | if 'warmup' in try_arg: 25 | warmup_factor = 0 26 | else: 27 | warmup_factor = 1 28 | 29 | config = get_baseconfig_by_epoch(network_type=network_type, dataset_name=dataset_name, dataset_subset='train', 30 | global_batch_size=batch_size, num_node=1, 31 | weight_decay=weight_decay_strength, optimizer_type='sgd', momentum=0.9, 32 | max_epochs=lrs.max_epochs, base_lr=lrs.base_lr, lr_epoch_boundaries=lrs.lr_epoch_boundaries, 33 | lr_decay_factor=lrs.lr_decay_factor, 34 | warmup_epochs=5, warmup_method='linear', warmup_factor=warmup_factor, 35 | ckpt_iter_period=20000, tb_iter_period=100, output_dir=log_dir, 36 | tb_dir=log_dir, save_weights=save_weights, val_epoch_period=2, linear_final_lr=lrs.linear_final_lr, 37 | weight_decay_bias=weight_decay_bias, deps=deps) 38 | 39 | if 'normal' in try_arg: 40 | builder = None 41 | elif 'acnet' in try_arg: 42 | from acnet.acnet_builder import ACNetBuilder 43 | builder = ACNetBuilder(base_config=config, deploy=False) 44 | else: 45 | assert False 46 | 47 | ding_train(config, show_variables=True, convbuilder=builder, use_nesterov='nest' in try_arg) 48 | 49 | 50 | if __name__ == '__main__': 51 | acnet_wrnc16() -------------------------------------------------------------------------------- /deprecated/README.md: -------------------------------------------------------------------------------- 1 | # ACNet 2 | 3 | **These are deprecated.** 4 | 5 | ## Example Usage 6 | 7 | 1. Install PyTorch 1.1. Clone this repo and enter the directory. Modify PYTHONPATH or you will get an ImportError. 8 | ``` 9 | export PYTHONPATH='WHERE_YOU_CLONED_THIS_REPO' 10 | ``` 11 | 12 | 2. Modify 'CIFAR10_PATH' in dataset.py to the directory of your CIFAR-10 dataset. If the dataset is not found in that directory, it will be automatically downloaded. 13 | 14 | 3. Train a Cifar-quick on CIFAR-10 without Asymmetric Convolution Blocks as baseline. (We use learning rate warmup and weight decay on bias parameters. They are not necessities but just preferences. Here 'lrs5' is a pre-defined learning rate schedule.) The model will be evaluated every two epochs. 15 | ``` 16 | python acnet/acnet_cfqkbnc.py --try_arg=normal_lrs5_warmup_bias 17 | ``` 18 | 19 | 4. Train a Cifar-quick on CIFAR-10 with Asymmetric Convolution Blocks. The trained weights will be saved to acnet_exps/cfqkbnc_acnet_lrs5_warmup_bias_train/finish.hdf5. Note that Cifar-quick uses 5x5 convs, and we add 1x3 and 3x1 onto 5x5 kernels. Of course, 1x5 and 5x1 convs may work better. 20 | ``` 21 | python acnet/acnet_cfqkbnc.py --try_arg=acnet_lrs5_warmup_bias 22 | ``` 23 | 24 | 4. Check the average accuracy of the two models in their last ten evaluations. You will see the gap. 25 | ``` 26 | python show_log.py 27 | ``` 28 | 29 | 5. Build a Cifar-quick with the same structure as the baseline model, then convert the weights of the ACNet counterpart via BN fusion and branch fusion to initialize it. Test before and after the conversion. You will see identical results. 30 | ``` 31 | python acnet/acnet_test.py cfqkbnc acnet_exps/cfqkbnc_acnet_lrs5_warmup_bias_train/finish.hdf5 32 | ``` 33 | 34 | 6. Check the name and shape of the converted weights. 35 | ``` 36 | python display_hdf5.py acnet_exps/cfqkbnc_acnet_lrs5_warmup_bias_train/finish_deploy.hdf5 37 | ``` 38 | 39 | Other models: 40 | 41 | VGG is deeper, so we train it for longer: 42 | ``` 43 | python acnet/acnet_vc.py --try_arg=acnet_lrs3_warmup_bias 44 | ``` 45 | ResNet-56: 46 | ``` 47 | python acnet/acnet_rc56.py --try_arg=acnet_lrs3_warmup_bias 48 | ``` 49 | WRN-16-8, we slightly lengthen the learning rate schedule recommended in the WRN paper: 50 | ``` 51 | python acnet/acnet_wrnc16.py --try_arg=acnet_lrs6_warmup_bias 52 | ``` 53 | 54 | -------------------------------------------------------------------------------- /deprecated/base_model/mobilenetv1.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from builder import ConvBuilder 3 | 4 | class MobileV1Block(nn.Module): 5 | '''Depthwise conv + Pointwise conv''' 6 | def __init__(self, builder:ConvBuilder, in_planes, out_planes, stride=1): 7 | super(MobileV1Block, self).__init__() 8 | self.conv1 = builder.Conv2dBNReLU(in_channels=in_planes, out_channels=in_planes, kernel_size=3, 9 | stride=stride, padding=1, groups=in_planes) 10 | self.conv2 = builder.Conv2dBNReLU(in_channels=in_planes, out_channels=out_planes, kernel_size=1, 11 | stride=1, padding=0) 12 | 13 | def forward(self, x): 14 | out = self.conv1(x) 15 | out = self.conv2(out) 16 | return out 17 | 18 | imagenet_cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024] 19 | # cifar_cfg = [16, (32,2), 32, (64,2), 64, (128,2), 128, 128, 128, 128, 128, (256,2), 256] # 86% 20 | # cifar_cfg = [16, 32, 32, (64,2), 64, (128,2), 128, 128, 128, 128, 128, (256,2), 256] 21 | cifar_cfg = [16, 32, 32, 64, 64, (128,2), 128, 128, 128, 128, 128, (256,2), 256] # 93 22 | 23 | class MobileV1CifarNet(nn.Module): 24 | 25 | def __init__(self, builder:ConvBuilder, num_classes): 26 | super(MobileV1CifarNet, self).__init__() 27 | self.conv1 = builder.Conv2dBNReLU(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1) 28 | blocks = [] 29 | in_planes = cifar_cfg[0] 30 | for x in cifar_cfg: 31 | out_planes = x if isinstance(x, int) else x[0] 32 | stride = 1 if isinstance(x, int) else x[1] 33 | blocks.append(MobileV1Block(builder=builder, in_planes=in_planes, out_planes=out_planes, stride=stride)) 34 | in_planes = out_planes 35 | self.stem = builder.Sequential(*blocks) 36 | self.linear = builder.Linear(cifar_cfg[-1], num_classes) 37 | self.bd = builder 38 | 39 | 40 | 41 | def forward(self, x): 42 | out = self.conv1(x) 43 | out = self.stem(out) 44 | out = self.bd.avg_pool2d(out, 8, stride=1, padding=0) 45 | out = self.bd.flatten(out) 46 | out = self.linear(out) 47 | return out 48 | 49 | def create_MobileV1Cifar(cfg, builder): 50 | return MobileV1CifarNet(builder=builder, num_classes=10) 51 | def create_MobileV1CH(cfg, builder): 52 | return MobileV1CifarNet(builder=builder, num_classes=100) -------------------------------------------------------------------------------- /data/imagenet_data.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as transforms 3 | from data.dataset_util import DataIterator 4 | import os 5 | import torchvision.datasets as datasets 6 | 7 | IMGNET_TRAIN_DIR = 'imagenet_data' 8 | 9 | class ImgnetStdTrainData(object): 10 | 11 | def __init__(self, distributed, batch_size_per_gpu): 12 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 13 | std=[0.229, 0.224, 0.225]) 14 | self.train_dataset = datasets.ImageFolder( 15 | os.path.join(IMGNET_TRAIN_DIR, 'train'), 16 | transforms.Compose([ 17 | transforms.RandomResizedCrop(224), 18 | transforms.RandomHorizontalFlip(), 19 | transforms.ToTensor(), 20 | normalize, 21 | ])) 22 | 23 | if distributed: 24 | self.train_sampler = torch.utils.data.distributed.DistributedSampler(self.train_dataset, shuffle=True) 25 | shuffle = False 26 | else: 27 | self.train_sampler = None 28 | shuffle = True 29 | self.train_loader = torch.utils.data.DataLoader( 30 | self.train_dataset, batch_size=batch_size_per_gpu, sampler=self.train_sampler, shuffle=shuffle, 31 | num_workers=4, pin_memory=True, drop_last=True) 32 | self.dataprovider = DataIterator(self.train_loader) 33 | 34 | 35 | class ImgnetStdValData(object): 36 | def __init__(self, batch_size): 37 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 38 | std=[0.229, 0.224, 0.225]) 39 | 40 | self.val_dataset = datasets.ImageFolder( 41 | os.path.join(IMGNET_TRAIN_DIR, 'val'), 42 | transforms.Compose([ 43 | transforms.Resize(256), 44 | transforms.CenterCrop(224), 45 | transforms.ToTensor(), 46 | normalize, 47 | ] 48 | ) 49 | ) 50 | self.val_loader = torch.utils.data.DataLoader( 51 | self.val_dataset, batch_size=batch_size, shuffle=False, 52 | num_workers=4, pin_memory=True 53 | ) 54 | self.dataprovider = DataIterator(self.val_loader) -------------------------------------------------------------------------------- /utils/pyt_utils.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # Most of the util functions should has nothing to do with torch 3 | 4 | import os 5 | import sys 6 | import time 7 | import argparse 8 | import errno 9 | from collections import OrderedDict, defaultdict 10 | 11 | 12 | def extant_file(x): 13 | """ 14 | 'Type' for argparse - checks that file exists but does not open. 15 | """ 16 | if not os.path.exists(x): 17 | # Argparse uses the ArgumentTypeError to give a rejection message like: 18 | # error: argument input: x does not exist 19 | raise argparse.ArgumentTypeError("{0} does not exist".format(x)) 20 | return x 21 | 22 | 23 | def parse_torch_devices(input_devices): 24 | """Parse user's devices input string to standard format for Torch. 25 | e.g. [gpu0, gpu1, ...] 26 | 27 | """ 28 | import torch 29 | print('we have {} torch devices'.format(torch.cuda.device_count())) 30 | from .logger import get_logger 31 | logger = get_logger() 32 | 33 | if input_devices.endswith('*'): 34 | devices = list(range(torch.cuda.device_count())) 35 | return devices 36 | 37 | devices = [] 38 | for d in input_devices.split(','): 39 | if '-' in d: 40 | start_device, end_device = d.split('-')[0], d.split('-')[1] 41 | assert start_device != '' 42 | assert end_device != '' 43 | start_device, end_device = int(start_device), int(end_device) 44 | assert start_device < end_device 45 | assert end_device < torch.cuda.device_count() 46 | for sd in range(start_device, end_device + 1): 47 | devices.append(sd) 48 | else: 49 | device = int(d) 50 | assert device < torch.cuda.device_count() 51 | devices.append(device) 52 | 53 | logger.info('using devices {}'.format(', '.join([str(d) for d in devices]))) 54 | 55 | return devices 56 | 57 | 58 | def link_file(src, target): 59 | """symbol link the source directorie to target 60 | """ 61 | if os.path.isdir(target) or os.path.isfile(target): 62 | os.remove(target) 63 | os.system('ln -s {} {}'.format(src, target)) 64 | 65 | 66 | def ensure_dir(path): 67 | """create directories if *path* does not exist 68 | """ 69 | try: 70 | if not os.path.isdir(path): 71 | os.makedirs(path) 72 | except OSError as e: 73 | if e.errno != errno.EEXIST: 74 | raise 75 | 76 | 77 | # def mk_dir(path): 78 | # try: 79 | # os.makedirs(path) 80 | # except OSError as e: 81 | # if e.errno != errno.EEXIST: 82 | # raise 83 | -------------------------------------------------------------------------------- /deprecated/utils/pyt_utils.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # Most of the util functions should has nothing to do with torch 3 | 4 | import os 5 | import sys 6 | import time 7 | import argparse 8 | import errno 9 | from collections import OrderedDict, defaultdict 10 | 11 | 12 | def extant_file(x): 13 | """ 14 | 'Type' for argparse - checks that file exists but does not open. 15 | """ 16 | if not os.path.exists(x): 17 | # Argparse uses the ArgumentTypeError to give a rejection message like: 18 | # error: argument input: x does not exist 19 | raise argparse.ArgumentTypeError("{0} does not exist".format(x)) 20 | return x 21 | 22 | 23 | def parse_torch_devices(input_devices): 24 | """Parse user's devices input string to standard format for Torch. 25 | e.g. [gpu0, gpu1, ...] 26 | 27 | """ 28 | import torch 29 | print('we have {} torch devices'.format(torch.cuda.device_count())) 30 | from .logger import get_logger 31 | logger = get_logger() 32 | 33 | if input_devices.endswith('*'): 34 | devices = list(range(torch.cuda.device_count())) 35 | return devices 36 | 37 | devices = [] 38 | for d in input_devices.split(','): 39 | if '-' in d: 40 | start_device, end_device = d.split('-')[0], d.split('-')[1] 41 | assert start_device != '' 42 | assert end_device != '' 43 | start_device, end_device = int(start_device), int(end_device) 44 | assert start_device < end_device 45 | assert end_device < torch.cuda.device_count() 46 | for sd in range(start_device, end_device + 1): 47 | devices.append(sd) 48 | else: 49 | device = int(d) 50 | assert device < torch.cuda.device_count() 51 | devices.append(device) 52 | 53 | logger.info('using devices {}'.format(', '.join([str(d) for d in devices]))) 54 | 55 | return devices 56 | 57 | 58 | def link_file(src, target): 59 | """symbol link the source directorie to target 60 | """ 61 | if os.path.isdir(target) or os.path.isfile(target): 62 | os.remove(target) 63 | os.system('ln -s {} {}'.format(src, target)) 64 | 65 | 66 | def ensure_dir(path): 67 | """create directories if *path* does not exist 68 | """ 69 | try: 70 | if not os.path.isdir(path): 71 | os.makedirs(path) 72 | except OSError as e: 73 | if e.errno != errno.EEXIST: 74 | raise 75 | 76 | 77 | # def mk_dir(path): 78 | # try: 79 | # os.makedirs(path) 80 | # except OSError as e: 81 | # if e.errno != errno.EEXIST: 82 | # raise 83 | -------------------------------------------------------------------------------- /deprecated/base_model/vgg.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from builder import ConvBuilder 3 | 4 | class VCNet(nn.Module): 5 | 6 | def __init__(self, num_classes, builder:ConvBuilder, deps): 7 | super(VCNet, self).__init__() 8 | self.bd = builder 9 | sq = builder.Sequential() 10 | sq.add_module('conv1', builder.Conv2dBNReLU(in_channels=3, out_channels=deps[0], kernel_size=3, stride=1, padding=1)) 11 | sq.add_module('conv2', builder.Conv2dBNReLU(in_channels=deps[0], out_channels=deps[1], kernel_size=3, stride=1, padding=1)) 12 | sq.add_module('maxpool1', builder.Maxpool2d(kernel_size=2)) 13 | sq.add_module('conv3', builder.Conv2dBNReLU(in_channels=deps[1], out_channels=deps[2], kernel_size=3, stride=1, padding=1)) 14 | sq.add_module('conv4', builder.Conv2dBNReLU(in_channels=deps[2], out_channels=deps[3], kernel_size=3, stride=1, padding=1)) 15 | sq.add_module('maxpool2', builder.Maxpool2d(kernel_size=2)) 16 | sq.add_module('conv5', builder.Conv2dBNReLU(in_channels=deps[3], out_channels=deps[4], kernel_size=3, stride=1, padding=1)) 17 | sq.add_module('conv6', builder.Conv2dBNReLU(in_channels=deps[4], out_channels=deps[5], kernel_size=3, stride=1, padding=1)) 18 | sq.add_module('conv7', builder.Conv2dBNReLU(in_channels=deps[5], out_channels=deps[6], kernel_size=3, stride=1, padding=1)) 19 | sq.add_module('maxpool3', builder.Maxpool2d(kernel_size=2)) 20 | sq.add_module('conv8', builder.Conv2dBNReLU(in_channels=deps[6], out_channels=deps[7], kernel_size=3, stride=1, padding=1)) 21 | sq.add_module('conv9', builder.Conv2dBNReLU(in_channels=deps[7], out_channels=deps[8], kernel_size=3, stride=1, padding=1)) 22 | sq.add_module('conv10', builder.Conv2dBNReLU(in_channels=deps[8], out_channels=deps[9], kernel_size=3, stride=1, padding=1)) 23 | sq.add_module('maxpool4', builder.Maxpool2d(kernel_size=2)) 24 | sq.add_module('conv11', builder.Conv2dBNReLU(in_channels=deps[9], out_channels=deps[10], kernel_size=3, stride=1, padding=1)) 25 | sq.add_module('conv12', builder.Conv2dBNReLU(in_channels=deps[10], out_channels=deps[11], kernel_size=3, stride=1, padding=1)) 26 | sq.add_module('conv13', builder.Conv2dBNReLU(in_channels=deps[11], out_channels=deps[12], kernel_size=3, stride=1, padding=1)) 27 | sq.add_module('maxpool5', builder.Maxpool2d(kernel_size=2)) 28 | self.stem = sq 29 | self.flatten = builder.Flatten() 30 | self.linear1 = builder.Linear(in_features=deps[12], out_features=512) 31 | self.relu = builder.ReLU() 32 | self.linear2 = builder.Linear(in_features=512, out_features=num_classes) 33 | 34 | def forward(self, x): 35 | out = self.stem(x) 36 | out = self.flatten(out) 37 | out = self.linear1(out) 38 | out = self.relu(out) 39 | out = self.linear2(out) 40 | return out 41 | 42 | 43 | def create_vc(cfg, builder): 44 | return VCNet(num_classes=10, builder=builder, deps=cfg.deps) 45 | def create_vh(cfg, builder): 46 | return VCNet(num_classes=100, builder=builder, deps=cfg.deps) 47 | -------------------------------------------------------------------------------- /utils/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn.modules.loss import _Loss 4 | from typing import List, Tuple 5 | 6 | 7 | 8 | class WeightedCrossEntropyLoss(_Loss): 9 | ''' 10 | Sampled reweighted Cross Entropy loss 11 | only accept one demensions target and two demension input. 12 | ''' 13 | 14 | def __init__(self, ): 15 | super(WeightedCrossEntropyLoss, self).__init__() 16 | 17 | def __call__(self, input:torch.Tensor, target:torch.Tensor, sample_weight): 18 | probs = F.log_softmax(input, dim = 1) 19 | #if target.ndimension(): 20 | if target.ndimension() == 1: 21 | #print(target.shape) 22 | target = target.expand(1, *target.shape) 23 | target = target.transpose(1, 0) 24 | one_hot = torch.zeros_like(probs).scatter_(1, target, 1) 25 | probs = probs * one_hot * -1.0 26 | loss = torch.sum(probs, 1) 27 | loss = loss * sample_weight 28 | loss = torch.mean(loss) 29 | return loss 30 | 31 | class LabelSmoothCrossEntropyLoss(_Loss): 32 | 33 | def __init__(self, eps = 0.1, class_num = 1000): 34 | super(LabelSmoothCrossEntropyLoss, self).__init__() 35 | 36 | self.min_value = eps / class_num 37 | self.eps = eps 38 | 39 | 40 | def __call__(self, pred:torch.Tensor, target:torch.Tensor): 41 | 42 | epses = self.min_value * torch.ones_like(pred) 43 | log_probs = F.log_softmax(pred, dim=1) 44 | 45 | if target.ndimension() == 1: 46 | #print(target.shape) 47 | target = target.expand(1, *target.shape) 48 | #print(target, 'dwa') 49 | target = target.transpose(1, 0) 50 | target = torch.zeros_like(log_probs).scatter_(1, target, 1) 51 | target = target.type(torch.float) 52 | target = target * (1 - self.eps) + epses 53 | 54 | #print(target, 'fff') 55 | element_wise_mul = log_probs * target * -1.0 56 | 57 | loss = torch.sum(element_wise_mul, 1) 58 | loss = torch.mean(loss) 59 | 60 | return loss 61 | 62 | 63 | class AuxClassifersLoss(_Loss): 64 | 65 | def __init__(self, BasicLoss, weights:List[float]): 66 | super(AuxClassifersLoss, self).__init__() 67 | self.BasicLoss = BasicLoss 68 | self.weights = weights 69 | #print('AuxCls', self.BasicLoss) 70 | 71 | def __call__(self, preds:List[torch.Tensor], target): 72 | 73 | loss = 0 74 | for pred in preds: 75 | loss = loss + self.BasicLoss(pred, target) 76 | return loss 77 | 78 | 79 | class GaussianWeightedCELoss(_Loss): 80 | 81 | def __init__(self, sigma = 1.0): 82 | super(GaussianWeightedCELoss, self).__init__() 83 | self.sigma = sigma 84 | self.WCE = WeightedCrossEntropyLoss() 85 | 86 | def __call__(self, input:torch.Tensor, target:torch.Tensor): 87 | sample_weight = torch.randn((input.size(0), 1)) * self.sigma 88 | sample_weight = sample_weight.to(input.device) 89 | sample_weight = sample_weight + torch.ones_like(sample_weight) 90 | loss = self.WCE(input, target, sample_weight) 91 | return loss 92 | 93 | 94 | -------------------------------------------------------------------------------- /deprecated/utils/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn.modules.loss import _Loss 4 | from typing import List, Tuple 5 | 6 | 7 | 8 | class WeightedCrossEntropyLoss(_Loss): 9 | ''' 10 | Sampled reweighted Cross Entropy loss 11 | only accept one demensions target and two demension input. 12 | ''' 13 | 14 | def __init__(self, ): 15 | super(WeightedCrossEntropyLoss, self).__init__() 16 | 17 | def __call__(self, input:torch.Tensor, target:torch.Tensor, sample_weight): 18 | probs = F.log_softmax(input, dim = 1) 19 | #if target.ndimension(): 20 | if target.ndimension() == 1: 21 | #print(target.shape) 22 | target = target.expand(1, *target.shape) 23 | target = target.transpose(1, 0) 24 | one_hot = torch.zeros_like(probs).scatter_(1, target, 1) 25 | probs = probs * one_hot * -1.0 26 | loss = torch.sum(probs, 1) 27 | loss = loss * sample_weight 28 | loss = torch.mean(loss) 29 | return loss 30 | 31 | class LabelSmoothCrossEntropyLoss(_Loss): 32 | 33 | def __init__(self, eps = 0.1, class_num = 1000): 34 | super(LabelSmoothCrossEntropyLoss, self).__init__() 35 | 36 | self.min_value = eps / class_num 37 | self.eps = eps 38 | 39 | 40 | def __call__(self, pred:torch.Tensor, target:torch.Tensor): 41 | 42 | epses = self.min_value * torch.ones_like(pred) 43 | log_probs = F.log_softmax(pred, dim=1) 44 | 45 | if target.ndimension() == 1: 46 | #print(target.shape) 47 | target = target.expand(1, *target.shape) 48 | #print(target, 'dwa') 49 | target = target.transpose(1, 0) 50 | target = torch.zeros_like(log_probs).scatter_(1, target, 1) 51 | target = target.type(torch.float) 52 | target = target * (1 - self.eps) + epses 53 | 54 | #print(target, 'fff') 55 | element_wise_mul = log_probs * target * -1.0 56 | 57 | loss = torch.sum(element_wise_mul, 1) 58 | loss = torch.mean(loss) 59 | 60 | return loss 61 | 62 | 63 | class AuxClassifersLoss(_Loss): 64 | 65 | def __init__(self, BasicLoss, weights:List[float]): 66 | super(AuxClassifersLoss, self).__init__() 67 | self.BasicLoss = BasicLoss 68 | self.weights = weights 69 | #print('AuxCls', self.BasicLoss) 70 | 71 | def __call__(self, preds:List[torch.Tensor], target): 72 | 73 | loss = 0 74 | for pred in preds: 75 | loss = loss + self.BasicLoss(pred, target) 76 | return loss 77 | 78 | 79 | class GaussianWeightedCELoss(_Loss): 80 | 81 | def __init__(self, sigma = 1.0): 82 | super(GaussianWeightedCELoss, self).__init__() 83 | self.sigma = sigma 84 | self.WCE = WeightedCrossEntropyLoss() 85 | 86 | def __call__(self, input:torch.Tensor, target:torch.Tensor): 87 | sample_weight = torch.randn((input.size(0), 1)) * self.sigma 88 | sample_weight = sample_weight.to(input.device) 89 | sample_weight = sample_weight + torch.ones_like(sample_weight) 90 | loss = self.WCE(input, target, sample_weight) 91 | return loss 92 | 93 | 94 | -------------------------------------------------------------------------------- /base_model/mobilenetv1.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from builder import ConvBuilder 3 | from constants import MI1_ORIGIN_DEPS 4 | 5 | class MobileV1Block(nn.Module): 6 | '''Depthwise conv + Pointwise conv''' 7 | def __init__(self, builder:ConvBuilder, in_planes, out_planes, stride=1): 8 | super(MobileV1Block, self).__init__() 9 | self.depthwise = builder.Conv2dBNReLU(in_channels=in_planes, out_channels=in_planes, kernel_size=3, 10 | stride=stride, padding=1, groups=in_planes) 11 | self.pointwise = builder.Conv2dBNReLU(in_channels=in_planes, out_channels=out_planes, kernel_size=1, 12 | stride=1, padding=0) 13 | 14 | def forward(self, x): 15 | out = self.depthwise(x) 16 | out = self.pointwise(out) 17 | return out 18 | 19 | 20 | cifar_cfg = [16, 32, 32, 64, 64, (128,2), 128, 128, 128, 128, 128, (256,2), 256] # 93 21 | 22 | 23 | 24 | class MobileV1CifarNet(nn.Module): 25 | 26 | def __init__(self, builder:ConvBuilder, num_classes): 27 | super(MobileV1CifarNet, self).__init__() 28 | self.conv1 = builder.Conv2dBNReLU(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1) 29 | blocks = [] 30 | in_planes = cifar_cfg[0] 31 | for x in cifar_cfg: 32 | out_planes = x if isinstance(x, int) else x[0] 33 | stride = 1 if isinstance(x, int) else x[1] 34 | blocks.append(MobileV1Block(builder=builder, in_planes=in_planes, out_planes=out_planes, stride=stride)) 35 | in_planes = out_planes 36 | self.stem = builder.Sequential(*blocks) 37 | self.gap = builder.GAP(kernel_size=8) 38 | self.linear = builder.Linear(cifar_cfg[-1], num_classes) 39 | 40 | def forward(self, x): 41 | out = self.conv1(x) 42 | out = self.stem(out) 43 | out = self.gap(out) 44 | out = self.linear(out) 45 | return out 46 | 47 | class MobileV1ImagenetNet(nn.Module): 48 | 49 | def __init__(self, builder:ConvBuilder, num_classes, deps=None): 50 | super(MobileV1ImagenetNet, self).__init__() 51 | if deps is None: 52 | deps = MI1_ORIGIN_DEPS 53 | assert len(deps) == 27 54 | self.conv1 = builder.Conv2dBNReLU(in_channels=3, out_channels=deps[0], kernel_size=3, stride=2, padding=1) 55 | blocks = [] 56 | for block_idx in range(13): 57 | depthwise_channels = int(deps[block_idx * 2 + 1]) 58 | pointwise_channels = int(deps[block_idx * 2 + 2]) 59 | stride = 2 if block_idx in [1, 3, 5, 11] else 1 60 | blocks.append(MobileV1Block(builder=builder, in_planes=depthwise_channels, out_planes=pointwise_channels, stride=stride)) 61 | 62 | self.stem = builder.Sequential(*blocks) 63 | self.gap = builder.GAP(kernel_size=7) 64 | self.linear = builder.Linear(deps[-1], num_classes) 65 | 66 | def forward(self, x): 67 | out = self.conv1(x) 68 | out = self.stem(out) 69 | out = self.gap(out) 70 | out = self.linear(out) 71 | return out 72 | 73 | def create_MobileV1Cifar(cfg, builder): 74 | return MobileV1CifarNet(builder=builder, num_classes=10) 75 | def create_MobileV1CH(cfg, builder): 76 | return MobileV1CifarNet(builder=builder, num_classes=100) 77 | def create_MobileV1Imagenet(cfg, builder): 78 | return MobileV1ImagenetNet(builder=builder, num_classes=1000, deps=cfg.deps) -------------------------------------------------------------------------------- /base_model/vgg.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from builder import ConvBuilder 3 | from constants import VGG_ORIGIN_DEPS 4 | 5 | def _create_vgg_stem(builder, deps): 6 | sq = builder.Sequential() 7 | sq.add_module('conv1', 8 | builder.Conv2dBNReLU(in_channels=3, out_channels=deps[0], kernel_size=3, stride=1, padding=1)) 9 | sq.add_module('conv2', 10 | builder.Conv2dBNReLU(in_channels=deps[0], out_channels=deps[1], kernel_size=3, stride=1, padding=1)) 11 | sq.add_module('maxpool1', builder.Maxpool2d(kernel_size=2)) 12 | sq.add_module('conv3', 13 | builder.Conv2dBNReLU(in_channels=deps[1], out_channels=deps[2], kernel_size=3, stride=1, padding=1)) 14 | sq.add_module('conv4', 15 | builder.Conv2dBNReLU(in_channels=deps[2], out_channels=deps[3], kernel_size=3, stride=1, padding=1)) 16 | sq.add_module('maxpool2', builder.Maxpool2d(kernel_size=2)) 17 | sq.add_module('conv5', 18 | builder.Conv2dBNReLU(in_channels=deps[3], out_channels=deps[4], kernel_size=3, stride=1, padding=1)) 19 | sq.add_module('conv6', 20 | builder.Conv2dBNReLU(in_channels=deps[4], out_channels=deps[5], kernel_size=3, stride=1, padding=1)) 21 | sq.add_module('conv7', 22 | builder.Conv2dBNReLU(in_channels=deps[5], out_channels=deps[6], kernel_size=3, stride=1, padding=1)) 23 | sq.add_module('maxpool3', builder.Maxpool2d(kernel_size=2)) 24 | sq.add_module('conv8', 25 | builder.Conv2dBNReLU(in_channels=deps[6], out_channels=deps[7], kernel_size=3, stride=1, padding=1)) 26 | sq.add_module('conv9', 27 | builder.Conv2dBNReLU(in_channels=deps[7], out_channels=deps[8], kernel_size=3, stride=1, padding=1)) 28 | sq.add_module('conv10', 29 | builder.Conv2dBNReLU(in_channels=deps[8], out_channels=deps[9], kernel_size=3, stride=1, padding=1)) 30 | sq.add_module('maxpool4', builder.Maxpool2d(kernel_size=2)) 31 | sq.add_module('conv11', 32 | builder.Conv2dBNReLU(in_channels=deps[9], out_channels=deps[10], kernel_size=3, stride=1, padding=1)) 33 | sq.add_module('conv12', 34 | builder.Conv2dBNReLU(in_channels=deps[10], out_channels=deps[11], kernel_size=3, stride=1, padding=1)) 35 | sq.add_module('conv13', 36 | builder.Conv2dBNReLU(in_channels=deps[11], out_channels=deps[12], kernel_size=3, stride=1, padding=1)) 37 | sq.add_module('maxpool5', builder.Maxpool2d(kernel_size=2)) 38 | return sq 39 | 40 | class VCNet(nn.Module): 41 | 42 | def __init__(self, num_classes, builder:ConvBuilder, deps): 43 | super(VCNet, self).__init__() 44 | if deps is None: 45 | deps = VGG_ORIGIN_DEPS 46 | self.stem = _create_vgg_stem(builder=builder, deps=deps) 47 | self.flatten = builder.Flatten() 48 | self.linear1 = builder.IntermediateLinear(in_features=deps[12], out_features=512) 49 | self.relu = builder.ReLU() 50 | self.linear2 = builder.Linear(in_features=512, out_features=num_classes) 51 | 52 | def forward(self, x): 53 | out = self.stem(x) 54 | out = self.flatten(out) 55 | out = self.linear1(out) 56 | out = self.relu(out) 57 | out = self.linear2(out) 58 | return out 59 | 60 | 61 | def create_vc(cfg, builder): 62 | return VCNet(num_classes=10, builder=builder, deps=cfg.deps) 63 | def create_vh(cfg, builder): 64 | return VCNet(num_classes=100, builder=builder, deps=cfg.deps) 65 | -------------------------------------------------------------------------------- /acnet/acnet_fusion.py: -------------------------------------------------------------------------------- 1 | from utils.misc import read_hdf5, save_hdf5 2 | import numpy as np 3 | 4 | SQUARE_KERNEL_KEYWORD = 'square_conv.weight' 5 | 6 | def _fuse_kernel(kernel, gamma, std): 7 | b_gamma = np.reshape(gamma, (kernel.shape[0], 1, 1, 1)) 8 | b_gamma = np.tile(b_gamma, (1, kernel.shape[1], kernel.shape[2], kernel.shape[3])) 9 | b_std = np.reshape(std, (kernel.shape[0], 1, 1, 1)) 10 | b_std = np.tile(b_std, (1, kernel.shape[1], kernel.shape[2], kernel.shape[3])) 11 | return kernel * b_gamma / b_std 12 | 13 | def _add_to_square_kernel(square_kernel, asym_kernel): 14 | asym_h = asym_kernel.shape[2] 15 | asym_w = asym_kernel.shape[3] 16 | square_h = square_kernel.shape[2] 17 | square_w = square_kernel.shape[3] 18 | square_kernel[:, :, square_h // 2 - asym_h // 2: square_h // 2 - asym_h // 2 + asym_h, 19 | square_w // 2 - asym_w // 2 : square_w // 2 - asym_w // 2 + asym_w] += asym_kernel 20 | 21 | 22 | def convert_acnet_weights(train_weights, deploy_weights, eps): 23 | train_dict = read_hdf5(train_weights) 24 | print(train_dict.keys()) 25 | deploy_dict = {} 26 | square_conv_var_names = [name for name in train_dict.keys() if SQUARE_KERNEL_KEYWORD in name] 27 | for square_name in square_conv_var_names: 28 | square_kernel = train_dict[square_name] 29 | square_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.running_mean')] 30 | square_std = np.sqrt(train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.running_var')] + eps) 31 | square_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.weight')] 32 | square_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.bias')] 33 | 34 | ver_kernel = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_conv.weight')] 35 | ver_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.running_mean')] 36 | ver_std = np.sqrt(train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.running_var')] + eps) 37 | ver_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.weight')] 38 | ver_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.bias')] 39 | 40 | hor_kernel = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_conv.weight')] 41 | hor_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.running_mean')] 42 | hor_std = np.sqrt(train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.running_var')] + eps) 43 | hor_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.weight')] 44 | hor_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.bias')] 45 | 46 | fused_bias = square_beta + ver_beta + hor_beta - square_mean * square_gamma / square_std \ 47 | - ver_mean * ver_gamma / ver_std - hor_mean * hor_gamma / hor_std 48 | fused_kernel = _fuse_kernel(square_kernel, square_gamma, square_std) 49 | _add_to_square_kernel(fused_kernel, _fuse_kernel(ver_kernel, ver_gamma, ver_std)) 50 | _add_to_square_kernel(fused_kernel, _fuse_kernel(hor_kernel, hor_gamma, hor_std)) 51 | 52 | deploy_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'fused_conv.weight')] = fused_kernel 53 | deploy_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'fused_conv.bias')] = fused_bias 54 | 55 | for k, v in train_dict.items(): 56 | if 'hor_' not in k and 'ver_' not in k and 'square_' not in k: 57 | deploy_dict[k] = v 58 | save_hdf5(deploy_dict, deploy_weights) -------------------------------------------------------------------------------- /deprecated/acnet/acnet_fusion.py: -------------------------------------------------------------------------------- 1 | from utils.misc import read_hdf5, save_hdf5 2 | import numpy as np 3 | 4 | SQUARE_KERNEL_KEYWORD = 'square_conv.weight' 5 | 6 | def _fuse_kernel(kernel, gamma, std): 7 | b_gamma = np.reshape(gamma, (kernel.shape[0], 1, 1, 1)) 8 | b_gamma = np.tile(b_gamma, (1, kernel.shape[1], kernel.shape[2], kernel.shape[3])) 9 | b_std = np.reshape(std, (kernel.shape[0], 1, 1, 1)) 10 | b_std = np.tile(b_std, (1, kernel.shape[1], kernel.shape[2], kernel.shape[3])) 11 | return kernel * b_gamma / b_std 12 | 13 | def _add_to_square_kernel(square_kernel, asym_kernel): 14 | asym_h = asym_kernel.shape[2] 15 | asym_w = asym_kernel.shape[3] 16 | square_h = square_kernel.shape[2] 17 | square_w = square_kernel.shape[3] 18 | square_kernel[:, :, square_h // 2 - asym_h // 2: square_h // 2 - asym_h // 2 + asym_h, 19 | square_w // 2 - asym_w // 2 : square_w // 2 - asym_w // 2 + asym_w] += asym_kernel 20 | 21 | 22 | def convert_acnet_weights(train_weights, deploy_weights, eps): 23 | train_dict = read_hdf5(train_weights) 24 | print(train_dict.keys()) 25 | deploy_dict = {} 26 | square_conv_var_names = [name for name in train_dict.keys() if SQUARE_KERNEL_KEYWORD in name] 27 | for square_name in square_conv_var_names: 28 | square_kernel = train_dict[square_name] 29 | square_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.running_mean')] 30 | square_std = np.sqrt(train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.running_var')] + eps) 31 | square_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.weight')] 32 | square_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'square_bn.bias')] 33 | 34 | ver_kernel = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_conv.weight')] 35 | ver_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.running_mean')] 36 | ver_std = np.sqrt(train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.running_var')] + eps) 37 | ver_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.weight')] 38 | ver_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'ver_bn.bias')] 39 | 40 | hor_kernel = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_conv.weight')] 41 | hor_mean = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.running_mean')] 42 | hor_std = np.sqrt(train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.running_var')] + eps) 43 | hor_gamma = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.weight')] 44 | hor_beta = train_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'hor_bn.bias')] 45 | 46 | fused_bias = square_beta + ver_beta + hor_beta - square_mean * square_gamma / square_std \ 47 | - ver_mean * ver_gamma / ver_std - hor_mean * hor_gamma / hor_std 48 | fused_kernel = _fuse_kernel(square_kernel, square_gamma, square_std) 49 | _add_to_square_kernel(fused_kernel, _fuse_kernel(ver_kernel, ver_gamma, ver_std)) 50 | _add_to_square_kernel(fused_kernel, _fuse_kernel(hor_kernel, hor_gamma, hor_std)) 51 | 52 | deploy_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'fused_conv.weight')] = fused_kernel 53 | deploy_dict[square_name.replace(SQUARE_KERNEL_KEYWORD, 'fused_conv.bias')] = fused_bias 54 | 55 | for k, v in train_dict.items(): 56 | if 'hor_' not in k and 'ver_' not in k and 'square_' not in k: 57 | deploy_dict[k] = v 58 | save_hdf5(deploy_dict, deploy_weights) 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /base_model/resnet.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ResNet in PyTorch.absFor Pre-activation ResNet, see 'preact_resnet.py'. 3 | Reference: 4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 5 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 6 | 7 | Note: cifar_resnet18 constructs the same model with that from 8 | https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py 9 | ''' 10 | 11 | import torch.nn as nn 12 | from builder import ConvBuilder 13 | 14 | class BasicBlock(nn.Module): 15 | 16 | expansion = 1 17 | 18 | def __init__(self, builder:ConvBuilder, in_planes, planes, stride=1): 19 | super(BasicBlock, self).__init__() 20 | self.bd = builder 21 | self.relu = builder.ReLU() 22 | 23 | if stride != 1 or in_planes != self.expansion * planes: 24 | self.shortcut = builder.Conv2dBN(in_channels=in_planes, out_channels=self.expansion * planes, kernel_size=1, stride=stride) 25 | else: 26 | self.shortcut = builder.ResIdentity(num_channels=in_planes) 27 | 28 | self.conv1 = builder.Conv2dBNReLU(in_channels=in_planes, out_channels=planes, kernel_size=3, stride=stride, padding=1) 29 | self.conv2 = builder.Conv2dBN(in_channels=planes, out_channels=self.expansion * planes, kernel_size=3, stride=1, padding=1) 30 | 31 | def forward(self, x): 32 | out = self.conv1(x) 33 | out = self.conv2(out) 34 | out = self.bd.add(out, self.shortcut(x)) 35 | out = self.relu(out) 36 | return out 37 | 38 | 39 | class ResNet(nn.Module): 40 | def __init__(self, builder:ConvBuilder, block, num_blocks, num_classes=10, width_multiplier=None): 41 | super(ResNet, self).__init__() 42 | 43 | print('width multiplier: ', width_multiplier) 44 | 45 | if width_multiplier is None: 46 | width_multiplier = 1 47 | else: 48 | width_multiplier = width_multiplier[0] 49 | 50 | self.bd = builder 51 | self.in_planes = int(64 * width_multiplier) 52 | self.conv1 = builder.Conv2dBNReLU(3, int(64 * width_multiplier), kernel_size=7, stride=2, padding=3) 53 | self.stage1 = self._make_stage(block, int(64 * width_multiplier), num_blocks[0], stride=1) 54 | self.stage2 = self._make_stage(block, int(128 * width_multiplier), num_blocks[1], stride=2) 55 | self.stage3 = self._make_stage(block, int(256 * width_multiplier), num_blocks[2], stride=2) 56 | self.stage4 = self._make_stage(block, int(512 * width_multiplier), num_blocks[3], stride=2) 57 | self.gap = builder.GAP(kernel_size=7) 58 | self.linear = self.bd.Linear(int(512*block.expansion*width_multiplier), num_classes) 59 | 60 | def _make_stage(self, block, planes, num_blocks, stride): 61 | strides = [stride] + [1]*(num_blocks-1) 62 | blocks = [] 63 | for stride in strides: 64 | blocks.append(block(builder=self.bd, in_planes=self.in_planes, planes=int(planes), stride=stride)) 65 | self.in_planes = int(planes * block.expansion) 66 | return nn.Sequential(*blocks) 67 | 68 | def forward(self, x): 69 | out = self.conv1(x) 70 | out = self.bd.max_pool2d(out, kernel_size=3, stride=2, padding=1) 71 | out = self.stage1(out) 72 | out = self.stage2(out) 73 | out = self.stage3(out) 74 | out = self.stage4(out) 75 | out = self.gap(out) 76 | out = self.linear(out) 77 | return out 78 | 79 | def create_ResNet18(cfg, builder): 80 | return ResNet(builder, BasicBlock, [2,2,2,2], num_classes=1000, width_multiplier=cfg.deps) 81 | def create_ResNet34(cfg, builder): 82 | return ResNet(builder, BasicBlock, [3,4,6,3], num_classes=1000, width_multiplier=cfg.deps) -------------------------------------------------------------------------------- /deprecated/utils/checkpoint.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import time 3 | from collections import OrderedDict 4 | 5 | 6 | def load_model(model, model_file, logger): 7 | t_start = time.time() 8 | if isinstance(model_file, str): 9 | state_dict = torch.load(model_file, map_location='cpu') 10 | if 'model' in state_dict.keys(): 11 | state_dict = state_dict['model'] 12 | else: 13 | state_dict = model_file 14 | 15 | state_dict = _align_and_update_loaded_state_dicts( 16 | model.state_dict(), state_dict) 17 | t_io_end = time.time() 18 | 19 | # if is_restore: 20 | # new_state_dict = OrderedDict() 21 | # for k, v in state_dict.items(): 22 | # name = 'module.' + k 23 | # new_state_dict[name] = v 24 | # state_dict = new_state_dict 25 | model.load_state_dict(state_dict, strict=False) 26 | ckpt_keys = set(state_dict.keys()) 27 | own_keys = set(model.state_dict().keys()) 28 | missing_keys = own_keys - ckpt_keys 29 | unexpected_keys = ckpt_keys - own_keys 30 | 31 | if len(missing_keys) > 0: 32 | logger.warning('Missing key(s) in state_dict: {}'.format( 33 | ', '.join('{}'.format(k) for k in missing_keys))) 34 | 35 | if len(unexpected_keys) > 0: 36 | logger.warning('Unexpected key(s) in state_dict: {}'.format( 37 | ', '.join('{}'.format(k) for k in unexpected_keys))) 38 | 39 | del state_dict 40 | t_end = time.time() 41 | logger.info( 42 | "Load model, Time usage:\n\tIO: {}, " 43 | "initialize parameters: {}".format( 44 | t_io_end - t_start, t_end - t_io_end)) 45 | 46 | return model 47 | 48 | 49 | def _align_and_update_loaded_state_dicts(model_state_dict, loaded_state_dict): 50 | """ 51 | Strategy: suppose that the models that we will create will have 52 | prefixes appended to each of its keys, for example due to an extra 53 | level of nesting that the original pre-trained weights from ImageNet 54 | won't contain. For example, model.state_dict() might return 55 | backbone[0].body.res2.conv1.weight, while the pre-trained model contains 56 | res2.conv1.weight. We thus want to match both parameters together. 57 | For that, we look for each model weight, look among all loaded keys 58 | if there is one that is a suffix of the current weight name, 59 | and use it if that's the case. If multiple matches exist, 60 | take the one with longest size of the corresponding name. For example, 61 | for the same model as before, the pretrained weight file can contain 62 | both res2.conv1.weight, as well as conv1.weight. In this case, 63 | we want to match backbone[0].body.conv1.weight to conv1.weight, and 64 | backbone[0].body.res2.conv1.weight to res2.conv1.weight. 65 | """ 66 | current_keys = sorted(list(model_state_dict.keys())) 67 | loaded_keys = sorted(list(loaded_state_dict.keys())) 68 | aligned_loaded_state_dict = loaded_state_dict.copy() 69 | 70 | # get a matrix of string matches, where each (i, j) entry 71 | # correspond to the size of the loaded_key string, if it matches 72 | match_matrix = [ 73 | len(j) if i.endswith(j) else 0 for i in current_keys for j in 74 | loaded_keys] 75 | match_matrix = torch.as_tensor(match_matrix).view( 76 | len(current_keys), len(loaded_keys)) 77 | max_match_size, idxs = match_matrix.max(1) 78 | idxs[max_match_size == 0] = -1 79 | 80 | for idx_new, idx_old in enumerate(idxs.tolist()): 81 | if idx_old == -1: 82 | continue 83 | key = current_keys[idx_new] 84 | key_old = loaded_keys[idx_old] 85 | aligned_loaded_state_dict[key] = \ 86 | aligned_loaded_state_dict.pop(key_old) 87 | del loaded_state_dict 88 | return aligned_loaded_state_dict 89 | -------------------------------------------------------------------------------- /utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains primitives for multi-gpu communication. 3 | This is useful when doing distributed training. 4 | """ 5 | 6 | import pickle 7 | import time 8 | 9 | import torch 10 | import torch.distributed as dist 11 | 12 | 13 | def get_world_size(): 14 | if not dist.is_available(): 15 | return 1 16 | if not dist.is_initialized(): 17 | return 1 18 | return dist.get_world_size() 19 | 20 | 21 | def get_rank(): 22 | if not dist.is_available(): 23 | return 0 24 | if not dist.is_initialized(): 25 | return 0 26 | return dist.get_rank() 27 | 28 | 29 | def is_main_process(): 30 | return get_rank() == 0 31 | 32 | 33 | def synchronize(): 34 | """ 35 | Helper function to synchronize (barrier) among all processes when 36 | using distributed training 37 | """ 38 | if not dist.is_available(): 39 | return 40 | if not dist.is_initialized(): 41 | return 42 | world_size = dist.get_world_size() 43 | if world_size == 1: 44 | return 45 | dist.barrier() 46 | 47 | 48 | def all_gather(data): 49 | """ 50 | Run all_gather on arbitrary picklable data (not necessarily tensors) 51 | Args: 52 | data: any picklable object 53 | Returns: 54 | list[data]: list of data gathered from each rank 55 | """ 56 | world_size = get_world_size() 57 | if world_size == 1: 58 | return [data] 59 | 60 | # serialized to a Tensor 61 | buffer = pickle.dumps(data) 62 | storage = torch.ByteStorage.from_buffer(buffer) 63 | tensor = torch.ByteTensor(storage).to("cuda") 64 | 65 | # obtain Tensor size of each rank 66 | local_size = torch.LongTensor([tensor.numel()]).to("cuda") 67 | size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)] 68 | dist.all_gather(size_list, local_size) 69 | size_list = [int(size.item()) for size in size_list] 70 | max_size = max(size_list) 71 | 72 | # receiving Tensor from all ranks 73 | # we pad the tensor because torch all_gather does not support 74 | # gathering tensors of different shapes 75 | tensor_list = [] 76 | for _ in size_list: 77 | tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda")) 78 | if local_size != max_size: 79 | padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda") 80 | tensor = torch.cat((tensor, padding), dim=0) 81 | dist.all_gather(tensor_list, tensor) 82 | 83 | data_list = [] 84 | for size, tensor in zip(size_list, tensor_list): 85 | buffer = tensor.cpu().numpy().tobytes()[:size] 86 | data_list.append(pickle.loads(buffer)) 87 | 88 | return data_list 89 | 90 | 91 | def reduce_dict(input_dict, average=True): 92 | """ 93 | Args: 94 | input_dict (dict): all the values will be reduced 95 | average (bool): whether to do average or sum 96 | Reduce the values in the dictionary from all processes so that process with rank 97 | 0 has the averaged results. Returns a dict with the same fields as 98 | input_dict, after reduction. 99 | """ 100 | world_size = get_world_size() 101 | if world_size < 2: 102 | return input_dict 103 | with torch.no_grad(): 104 | names = [] 105 | values = [] 106 | # sort the keys so that they are consistent across processes 107 | for k in sorted(input_dict.keys()): 108 | names.append(k) 109 | values.append(input_dict[k]) 110 | values = torch.stack(values, dim=0) 111 | dist.reduce(values, dst=0) 112 | if dist.get_rank() == 0 and average: 113 | # only main process gets accumulated, so only divide by 114 | # world_size in this case 115 | values /= world_size 116 | reduced_dict = {k: v for k, v in zip(names, values)} 117 | return reduced_dict 118 | -------------------------------------------------------------------------------- /deprecated/utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains primitives for multi-gpu communication. 3 | This is useful when doing distributed training. 4 | """ 5 | 6 | import pickle 7 | import time 8 | 9 | import torch 10 | import torch.distributed as dist 11 | 12 | 13 | def get_world_size(): 14 | if not dist.is_available(): 15 | return 1 16 | if not dist.is_initialized(): 17 | return 1 18 | return dist.get_world_size() 19 | 20 | 21 | def get_rank(): 22 | if not dist.is_available(): 23 | return 0 24 | if not dist.is_initialized(): 25 | return 0 26 | return dist.get_rank() 27 | 28 | 29 | def is_main_process(): 30 | return get_rank() == 0 31 | 32 | 33 | def synchronize(): 34 | """ 35 | Helper function to synchronize (barrier) among all processes when 36 | using distributed training 37 | """ 38 | if not dist.is_available(): 39 | return 40 | if not dist.is_initialized(): 41 | return 42 | world_size = dist.get_world_size() 43 | if world_size == 1: 44 | return 45 | dist.barrier() 46 | 47 | 48 | def all_gather(data): 49 | """ 50 | Run all_gather on arbitrary picklable data (not necessarily tensors) 51 | Args: 52 | data: any picklable object 53 | Returns: 54 | list[data]: list of data gathered from each rank 55 | """ 56 | world_size = get_world_size() 57 | if world_size == 1: 58 | return [data] 59 | 60 | # serialized to a Tensor 61 | buffer = pickle.dumps(data) 62 | storage = torch.ByteStorage.from_buffer(buffer) 63 | tensor = torch.ByteTensor(storage).to("cuda") 64 | 65 | # obtain Tensor size of each rank 66 | local_size = torch.LongTensor([tensor.numel()]).to("cuda") 67 | size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)] 68 | dist.all_gather(size_list, local_size) 69 | size_list = [int(size.item()) for size in size_list] 70 | max_size = max(size_list) 71 | 72 | # receiving Tensor from all ranks 73 | # we pad the tensor because torch all_gather does not support 74 | # gathering tensors of different shapes 75 | tensor_list = [] 76 | for _ in size_list: 77 | tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda")) 78 | if local_size != max_size: 79 | padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda") 80 | tensor = torch.cat((tensor, padding), dim=0) 81 | dist.all_gather(tensor_list, tensor) 82 | 83 | data_list = [] 84 | for size, tensor in zip(size_list, tensor_list): 85 | buffer = tensor.cpu().numpy().tobytes()[:size] 86 | data_list.append(pickle.loads(buffer)) 87 | 88 | return data_list 89 | 90 | 91 | def reduce_dict(input_dict, average=True): 92 | """ 93 | Args: 94 | input_dict (dict): all the values will be reduced 95 | average (bool): whether to do average or sum 96 | Reduce the values in the dictionary from all processes so that process with rank 97 | 0 has the averaged results. Returns a dict with the same fields as 98 | input_dict, after reduction. 99 | """ 100 | world_size = get_world_size() 101 | if world_size < 2: 102 | return input_dict 103 | with torch.no_grad(): 104 | names = [] 105 | values = [] 106 | # sort the keys so that they are consistent across processes 107 | for k in sorted(input_dict.keys()): 108 | names.append(k) 109 | values.append(input_dict[k]) 110 | values = torch.stack(values, dim=0) 111 | dist.reduce(values, dst=0) 112 | if dist.get_rank() == 0 and average: 113 | # only main process gets accumulated, so only divide by 114 | # world_size in this case 115 | values /= world_size 116 | reduced_dict = {k: v for k, v in zip(names, values)} 117 | return reduced_dict 118 | -------------------------------------------------------------------------------- /deprecated/utils/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from bisect import bisect_right 3 | 4 | import torch 5 | 6 | 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler, 8 | # separating MultiStepLR with WarmupLR 9 | # but the current LRScheduler design doesn't allow it 10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 11 | def __init__( 12 | self, 13 | optimizer, 14 | milestones, 15 | gamma=0.1, 16 | warmup_factor=1.0 / 3, 17 | warmup_iters=500, 18 | warmup_method="linear", 19 | last_epoch=-1, 20 | ): 21 | if not list(milestones) == sorted(milestones): 22 | raise ValueError( 23 | "Milestones should be a list of" " increasing integers. Got {}", 24 | milestones, 25 | ) 26 | 27 | if warmup_method not in ("constant", "linear"): 28 | raise ValueError( 29 | "Only 'constant' or 'linear' warmup_method accepted" 30 | "got {}".format(warmup_method) 31 | ) 32 | self.milestones = milestones 33 | self.gamma = gamma 34 | self.warmup_factor = warmup_factor 35 | self.warmup_iters = warmup_iters 36 | self.warmup_method = warmup_method 37 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) 38 | 39 | def get_lr(self): 40 | warmup_factor = 1 41 | if self.last_epoch < self.warmup_iters: 42 | if self.warmup_method == "constant": 43 | warmup_factor = self.warmup_factor 44 | elif self.warmup_method == "linear": 45 | alpha = float(self.last_epoch) / self.warmup_iters 46 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 47 | return [ 48 | base_lr 49 | * warmup_factor 50 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 51 | for base_lr in self.base_lrs 52 | ] 53 | 54 | 55 | 56 | class WarmupLinearLR(torch.optim.lr_scheduler._LRScheduler): 57 | def __init__( 58 | self, 59 | optimizer, 60 | final_lr, 61 | final_iters, 62 | warmup_factor=1.0 / 3, 63 | warmup_iters=500, 64 | warmup_method="linear", 65 | last_epoch=-1, 66 | ): 67 | assert final_iters > warmup_iters 68 | self.final_lr = final_lr 69 | self.final_iters = final_iters 70 | self.warmup_factor = warmup_factor 71 | self.warmup_iters = max(warmup_iters, 0) 72 | self.warmup_method = warmup_method 73 | super(WarmupLinearLR, self).__init__(optimizer, last_epoch) 74 | 75 | # last_epoch == 0: base_lr * warmup_factor 76 | # last_epoch == warmup_iters: base_lr 77 | # last_epoch == final_iters: final_lr 78 | 79 | def get_lr(self): 80 | if self.last_epoch < self.warmup_iters: 81 | if self.warmup_method == "constant": 82 | warmup_factor = self.warmup_factor 83 | elif self.warmup_method == "linear": 84 | alpha = float(self.last_epoch) / self.warmup_iters 85 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 86 | else: 87 | raise ValueError( 88 | "Only 'constant' or 'linear' warmup_method accepted" 89 | "got {}".format(self.warmup_method) 90 | ) 91 | return [ 92 | base_lr 93 | * warmup_factor 94 | for base_lr in self.base_lrs 95 | ] 96 | else: 97 | return [ 98 | base_lr - (base_lr - self.final_lr) * float(self.last_epoch - self.warmup_iters) / ( 99 | self.final_iters - self.warmup_iters) 100 | for base_lr in self.base_lrs 101 | ] -------------------------------------------------------------------------------- /deprecated/base_config.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from model_map import get_dataset_name_by_model_name 3 | 4 | BaseConfigByEpoch = namedtuple('BaseConfigByEpoch', ['network_type', 'dataset_name', 'dataset_subset', 'global_batch_size', 'num_node', 'device', 5 | 'weight_decay', 'weight_decay_bias', 'optimizer_type', 'momentum', 6 | 'bias_lr_factor', 'max_epochs', 'base_lr', 'lr_epoch_boundaries', 'lr_decay_factor', 'linear_final_lr', 7 | 'warmup_epochs', 'warmup_method', 'warmup_factor', 8 | 'ckpt_iter_period', 'tb_iter_period', 9 | 'output_dir', 'tb_dir', 10 | 'init_weights', 'save_weights', 11 | 'val_epoch_period', 'grad_accum_iters', 12 | 'deps', 13 | 'se_reduce_scale']) 14 | 15 | def get_baseconfig_by_epoch(network_type, dataset_name, dataset_subset, global_batch_size, num_node, 16 | weight_decay, optimizer_type, momentum, 17 | max_epochs, base_lr, lr_epoch_boundaries, lr_decay_factor, linear_final_lr, 18 | warmup_epochs, warmup_method, warmup_factor, 19 | ckpt_iter_period, tb_iter_period, 20 | output_dir, tb_dir, save_weights, 21 | device='cuda', weight_decay_bias=0, bias_lr_factor=2, init_weights=None, val_epoch_period=-1, grad_accum_iters=1, 22 | deps=None, 23 | se_reduce_scale=0): 24 | print('----------------- show lr schedule --------------') 25 | print('base_lr:', base_lr) 26 | print('max_epochs:', max_epochs) 27 | print('lr_epochs:', lr_epoch_boundaries) 28 | print('lr_decay:', lr_decay_factor) 29 | print('linear_final_lr:', linear_final_lr) 30 | print('-------------------------------------------------') 31 | 32 | return BaseConfigByEpoch(network_type=network_type,dataset_name=dataset_name,dataset_subset=dataset_subset,global_batch_size=global_batch_size,num_node=num_node, device=device, 33 | weight_decay=weight_decay,weight_decay_bias=weight_decay_bias,optimizer_type=optimizer_type,momentum=momentum,bias_lr_factor=bias_lr_factor, 34 | max_epochs=max_epochs, base_lr=base_lr, lr_epoch_boundaries=lr_epoch_boundaries,lr_decay_factor=lr_decay_factor, linear_final_lr=linear_final_lr, 35 | warmup_epochs=warmup_epochs,warmup_method=warmup_method,warmup_factor=warmup_factor, 36 | ckpt_iter_period=int(ckpt_iter_period),tb_iter_period=int(tb_iter_period), 37 | output_dir=output_dir, tb_dir=tb_dir, 38 | init_weights=init_weights, save_weights=save_weights, 39 | val_epoch_period=val_epoch_period, grad_accum_iters=grad_accum_iters, deps=deps, se_reduce_scale=se_reduce_scale) 40 | 41 | def get_baseconfig_for_test(network_type, dataset_subset, global_batch_size, init_weights, device='cuda', deps=None, se_reduce_scale=0): 42 | return BaseConfigByEpoch(network_type=network_type, dataset_name=get_dataset_name_by_model_name(network_type), 43 | dataset_subset=dataset_subset, global_batch_size=global_batch_size, num_node=1, device=device, 44 | weight_decay=None, weight_decay_bias=None, optimizer_type=None, momentum=None, bias_lr_factor=None, 45 | max_epochs=None, base_lr=None, lr_epoch_boundaries=None, lr_decay_factor=None, linear_final_lr=None, 46 | warmup_epochs=None, warmup_method=None, warmup_factor=None, ckpt_iter_period=None, 47 | tb_iter_period=None, output_dir=None, tb_dir=None, init_weights=init_weights, 48 | save_weights=None, val_epoch_period=None, grad_accum_iters=None, deps=deps, se_reduce_scale=se_reduce_scale) -------------------------------------------------------------------------------- /utils/comm.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file is maily copied from https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/utils/comm.py 3 | This file contains primitives for multi-gpu communication. 4 | This is useful when doing distributed training. 5 | """ 6 | 7 | 8 | import torch 9 | import torch.distributed as dist 10 | 11 | import pickle 12 | import time 13 | 14 | 15 | def get_world_size(): 16 | if not dist.is_available(): 17 | return 1 18 | if not dist.is_initialized(): 19 | return 1 20 | return dist.get_world_size() 21 | 22 | 23 | def get_rank(): 24 | if not dist.is_available(): 25 | return 0 26 | if not dist.is_initialized(): 27 | return 0 28 | return dist.get_rank() 29 | 30 | 31 | def is_main_process(): 32 | return get_rank() == 0 33 | 34 | 35 | def synchronize(): 36 | """ 37 | Helper function to synchronize (barrier) among all processes when 38 | using distributed training 39 | """ 40 | if not dist.is_available(): 41 | return 42 | if not dist.is_initialized(): 43 | return 44 | world_size = dist.get_world_size() 45 | if world_size == 1: 46 | return 47 | dist.barrier() 48 | 49 | def reduce_loss_dict(loss_dict): 50 | """ 51 | Reduce the loss dictionary from all processes so that process with rank 52 | 0 has the averaged results. Returns a dict with the same fields as 53 | loss_dict, after reduction. (avg) 54 | """ 55 | world_size = get_world_size() 56 | if world_size < 2: 57 | return loss_dict 58 | with torch.no_grad(): 59 | loss_names = [] 60 | all_losses = [] 61 | for k in sorted(loss_dict.keys()): 62 | loss_names.append(k) 63 | all_losses.append(loss_dict[k]) 64 | all_losses = torch.stack(all_losses, dim=0) 65 | dist.reduce(all_losses, dst=0) 66 | if dist.get_rank() == 0: 67 | # only main process gets accumulated, so only divide by 68 | # world_size in this case 69 | all_losses /= world_size 70 | reduced_losses = {k: v for k, v in zip(loss_names, all_losses)} 71 | return reduced_losses 72 | 73 | 74 | def all_gather(data): 75 | """ 76 | Run all_gather on arbitrary picklable data (not necessarily tensors) 77 | Args: 78 | data: any picklable object 79 | Returns: 80 | list[data]: list of data gathered from each rank 81 | """ 82 | world_size = get_world_size() 83 | if world_size == 1: 84 | return [data] 85 | 86 | # serialized to a Tensor 87 | buffer = pickle.dumps(data) 88 | storage = torch.ByteStorage.from_buffer(buffer) 89 | tensor = torch.ByteTensor(storage).to("cuda") 90 | 91 | # obtain Tensor size of each rank 92 | local_size = torch.IntTensor([tensor.numel()]).to("cuda") 93 | size_list = [torch.IntTensor([0]).to("cuda") for _ in range(world_size)] 94 | dist.all_gather(size_list, local_size) 95 | size_list = [int(size.item()) for size in size_list] 96 | max_size = max(size_list) 97 | 98 | # receiving Tensor from all ranks 99 | # we pad the tensor because torch all_gather does not support 100 | # gathering tensors of different shapes 101 | tensor_list = [] 102 | for _ in size_list: 103 | tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda")) 104 | if local_size != max_size: 105 | padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda") 106 | tensor = torch.cat((tensor, padding), dim=0) 107 | dist.all_gather(tensor_list, tensor) 108 | 109 | data_list = [] 110 | for size, tensor in zip(size_list, tensor_list): 111 | buffer = tensor.cpu().numpy().tobytes()[:size] 112 | data_list.append(pickle.loads(buffer)) 113 | 114 | return data_list 115 | 116 | 117 | 118 | def my_reduce_dic(dic): 119 | dics = all_gather(dic) 120 | 121 | if is_main_process(): 122 | for key in dic.keys(): 123 | value = 0 124 | for tdic in dics: 125 | value = value + tdic[key] 126 | dic[key] = value / (len(dics)) 127 | return dic -------------------------------------------------------------------------------- /deprecated/utils/comm.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file is maily copied from https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/utils/comm.py 3 | This file contains primitives for multi-gpu communication. 4 | This is useful when doing distributed training. 5 | """ 6 | 7 | 8 | import torch 9 | import torch.distributed as dist 10 | 11 | import pickle 12 | import time 13 | 14 | 15 | def get_world_size(): 16 | if not dist.is_available(): 17 | return 1 18 | if not dist.is_initialized(): 19 | return 1 20 | return dist.get_world_size() 21 | 22 | 23 | def get_rank(): 24 | if not dist.is_available(): 25 | return 0 26 | if not dist.is_initialized(): 27 | return 0 28 | return dist.get_rank() 29 | 30 | 31 | def is_main_process(): 32 | return get_rank() == 0 33 | 34 | 35 | def synchronize(): 36 | """ 37 | Helper function to synchronize (barrier) among all processes when 38 | using distributed training 39 | """ 40 | if not dist.is_available(): 41 | return 42 | if not dist.is_initialized(): 43 | return 44 | world_size = dist.get_world_size() 45 | if world_size == 1: 46 | return 47 | dist.barrier() 48 | 49 | def reduce_loss_dict(loss_dict): 50 | """ 51 | Reduce the loss dictionary from all processes so that process with rank 52 | 0 has the averaged results. Returns a dict with the same fields as 53 | loss_dict, after reduction. (avg) 54 | """ 55 | world_size = get_world_size() 56 | if world_size < 2: 57 | return loss_dict 58 | with torch.no_grad(): 59 | loss_names = [] 60 | all_losses = [] 61 | for k in sorted(loss_dict.keys()): 62 | loss_names.append(k) 63 | all_losses.append(loss_dict[k]) 64 | all_losses = torch.stack(all_losses, dim=0) 65 | dist.reduce(all_losses, dst=0) 66 | if dist.get_rank() == 0: 67 | # only main process gets accumulated, so only divide by 68 | # world_size in this case 69 | all_losses /= world_size 70 | reduced_losses = {k: v for k, v in zip(loss_names, all_losses)} 71 | return reduced_losses 72 | 73 | 74 | def all_gather(data): 75 | """ 76 | Run all_gather on arbitrary picklable data (not necessarily tensors) 77 | Args: 78 | data: any picklable object 79 | Returns: 80 | list[data]: list of data gathered from each rank 81 | """ 82 | world_size = get_world_size() 83 | if world_size == 1: 84 | return [data] 85 | 86 | # serialized to a Tensor 87 | buffer = pickle.dumps(data) 88 | storage = torch.ByteStorage.from_buffer(buffer) 89 | tensor = torch.ByteTensor(storage).to("cuda") 90 | 91 | # obtain Tensor size of each rank 92 | local_size = torch.IntTensor([tensor.numel()]).to("cuda") 93 | size_list = [torch.IntTensor([0]).to("cuda") for _ in range(world_size)] 94 | dist.all_gather(size_list, local_size) 95 | size_list = [int(size.item()) for size in size_list] 96 | max_size = max(size_list) 97 | 98 | # receiving Tensor from all ranks 99 | # we pad the tensor because torch all_gather does not support 100 | # gathering tensors of different shapes 101 | tensor_list = [] 102 | for _ in size_list: 103 | tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda")) 104 | if local_size != max_size: 105 | padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda") 106 | tensor = torch.cat((tensor, padding), dim=0) 107 | dist.all_gather(tensor_list, tensor) 108 | 109 | data_list = [] 110 | for size, tensor in zip(size_list, tensor_list): 111 | buffer = tensor.cpu().numpy().tobytes()[:size] 112 | data_list.append(pickle.loads(buffer)) 113 | 114 | return data_list 115 | 116 | 117 | 118 | def my_reduce_dic(dic): 119 | dics = all_gather(dic) 120 | 121 | if is_main_process(): 122 | for key in dic.keys(): 123 | value = 0 124 | for tdic in dics: 125 | value = value + tdic[key] 126 | dic[key] = value / (len(dics)) 127 | return dic -------------------------------------------------------------------------------- /acnet/acnet_builder.py: -------------------------------------------------------------------------------- 1 | from builder import ConvBuilder 2 | from acnet.acb import ACBlock 3 | import torch.nn as nn 4 | 5 | class ACNetBuilder(ConvBuilder): 6 | 7 | def __init__(self, base_config, deploy, gamma_init=None): 8 | super(ACNetBuilder, self).__init__(base_config=base_config) 9 | self.deploy = deploy 10 | self.use_last_bn = False 11 | self.gamma_init = gamma_init 12 | 13 | def switch_to_deploy(self): 14 | self.deploy = True 15 | 16 | def Conv2d(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', use_original_conv=False): 17 | if use_original_conv or kernel_size == 1 or kernel_size == (1, 1) or kernel_size >= 7: 18 | return super(ACNetBuilder, self).Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, 19 | padding=padding, dilation=dilation, groups=groups, bias=bias, padding_mode=padding_mode, use_original_conv=True) 20 | else: 21 | return ACBlock(in_channels, out_channels, kernel_size=kernel_size, stride=stride, 22 | padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, deploy=self.deploy, 23 | use_last_bn=self.use_last_bn, gamma_init=self.gamma_init) 24 | 25 | 26 | def Conv2dBN(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', use_original_conv=False): 27 | if use_original_conv or kernel_size == 1 or kernel_size == (1, 1) or kernel_size >= 7: 28 | return super(ACNetBuilder, self).Conv2dBN(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, 29 | padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, use_original_conv=True) 30 | else: 31 | return ACBlock(in_channels, out_channels, kernel_size=kernel_size, stride=stride, 32 | padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, deploy=self.deploy, 33 | use_last_bn=self.use_last_bn, gamma_init=self.gamma_init) 34 | 35 | 36 | def Conv2dBNReLU(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', use_original_conv=False): 37 | if use_original_conv or kernel_size == 1 or kernel_size == (1, 1) or kernel_size >= 7: 38 | return super(ACNetBuilder, self).Conv2dBNReLU(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, 39 | padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, use_original_conv=True) 40 | else: 41 | se = nn.Sequential() 42 | se.add_module('acb', ACBlock(in_channels, out_channels, kernel_size=kernel_size, stride=stride, 43 | padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, deploy=self.deploy, 44 | use_last_bn=self.use_last_bn, gamma_init=self.gamma_init)) 45 | se.add_module('relu', self.ReLU()) 46 | return se 47 | 48 | 49 | def BNReLUConv2d(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', use_original_conv=False): 50 | if use_original_conv or kernel_size == 1 or kernel_size == (1, 1) or kernel_size >= 7: 51 | return super(ACNetBuilder, self).BNReLUConv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, 52 | padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, use_original_conv=True) 53 | bn_layer = self.BatchNorm2d(num_features=in_channels) 54 | conv_layer = ACBlock(in_channels, out_channels, kernel_size=kernel_size, stride=stride, 55 | padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, deploy=self.deploy) 56 | se = self.Sequential() 57 | se.add_module('bn', bn_layer) 58 | se.add_module('relu', self.ReLU()) 59 | se.add_module('acb', conv_layer) 60 | return se -------------------------------------------------------------------------------- /utils/checkpoint.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import time 3 | from collections import OrderedDict 4 | import os 5 | 6 | def get_last_checkpoint(dir): 7 | 'iter-200000.pth' 8 | target_ckpts = [t for t in os.listdir(dir) if '.pth' in t] 9 | if 'latest.pth' in target_ckpts: 10 | return os.path.join(dir, 'latest.pth') 11 | target_ckpts.sort(key=lambda x: int(x.replace('iter-', '').replace('.pth', ''))) 12 | ckpt = os.path.join(dir, target_ckpts[-1]) 13 | return ckpt 14 | 15 | def load_model(model, model_file, logger): 16 | t_start = time.time() 17 | if isinstance(model_file, str): 18 | state_dict = torch.load(model_file, map_location='cpu') 19 | if 'model' in state_dict.keys(): 20 | state_dict = state_dict['model'] 21 | else: 22 | state_dict = model_file 23 | 24 | state_dict = _align_and_update_loaded_state_dicts( 25 | model.state_dict(), state_dict) 26 | t_io_end = time.time() 27 | 28 | # if is_restore: 29 | # new_state_dict = OrderedDict() 30 | # for k, v in state_dict.items(): 31 | # name = 'module.' + k 32 | # new_state_dict[name] = v 33 | # state_dict = new_state_dict 34 | model.load_state_dict(state_dict, strict=False) 35 | ckpt_keys = set(state_dict.keys()) 36 | own_keys = set(model.state_dict().keys()) 37 | missing_keys = own_keys - ckpt_keys 38 | unexpected_keys = ckpt_keys - own_keys 39 | 40 | if len(missing_keys) > 0 and logger is not None: 41 | logger.warning('Missing key(s) in state_dict: {}'.format( 42 | ', '.join('{}'.format(k) for k in missing_keys))) 43 | 44 | if len(unexpected_keys) > 0 and logger is not None: 45 | logger.warning('Unexpected key(s) in state_dict: {}'.format( 46 | ', '.join('{}'.format(k) for k in unexpected_keys))) 47 | 48 | del state_dict 49 | t_end = time.time() 50 | if logger is not None: 51 | logger.info( 52 | "Load model, Time usage:\n\tIO: {}, " 53 | "initialize parameters: {}".format( 54 | t_io_end - t_start, t_end - t_io_end)) 55 | 56 | return model 57 | 58 | 59 | def _align_and_update_loaded_state_dicts(model_state_dict, loaded_state_dict): 60 | """ 61 | Strategy: suppose that the models that we will create will have 62 | prefixes appended to each of its keys, for example due to an extra 63 | level of nesting that the original pre-trained weights from ImageNet 64 | won't contain. For example, model.state_dict() might return 65 | backbone[0].body.res2.conv1.weight, while the pre-trained model contains 66 | res2.conv1.weight. We thus want to match both parameters together. 67 | For that, we look for each model weight, look among all loaded keys 68 | if there is one that is a suffix of the current weight name, 69 | and use it if that's the case. If multiple matches exist, 70 | take the one with longest size of the corresponding name. For example, 71 | for the same model as before, the pretrained weight file can contain 72 | both res2.conv1.weight, as well as conv1.weight. In this case, 73 | we want to match backbone[0].body.conv1.weight to conv1.weight, and 74 | backbone[0].body.res2.conv1.weight to res2.conv1.weight. 75 | """ 76 | current_keys = sorted(list(model_state_dict.keys())) 77 | loaded_keys = sorted(list(loaded_state_dict.keys())) 78 | aligned_loaded_state_dict = loaded_state_dict.copy() 79 | 80 | # get a matrix of string matches, where each (i, j) entry 81 | # correspond to the size of the loaded_key string, if it matches 82 | match_matrix = [ 83 | len(j) if i.endswith(j) else 0 for i in current_keys for j in 84 | loaded_keys] 85 | match_matrix = torch.as_tensor(match_matrix).view( 86 | len(current_keys), len(loaded_keys)) 87 | max_match_size, idxs = match_matrix.max(1) 88 | idxs[max_match_size == 0] = -1 89 | 90 | for idx_new, idx_old in enumerate(idxs.tolist()): 91 | if idx_old == -1: 92 | continue 93 | key = current_keys[idx_new] 94 | key_old = loaded_keys[idx_old] 95 | aligned_loaded_state_dict[key] = \ 96 | aligned_loaded_state_dict.pop(key_old) 97 | del loaded_state_dict 98 | return aligned_loaded_state_dict 99 | -------------------------------------------------------------------------------- /base_model/wrn.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from builder import ConvBuilder 3 | 4 | class WRNCifarBlock(nn.Module): 5 | 6 | def __init__(self, input_channels, block_channels, stride, projection_shortcut, use_dropout, builder:ConvBuilder): 7 | super(WRNCifarBlock, self).__init__() 8 | assert len(block_channels) == 2 9 | 10 | if projection_shortcut: 11 | self.proj = builder.BNReLUConv2d(in_channels=input_channels, out_channels=block_channels[1], kernel_size=1, stride=stride, padding=0) 12 | else: 13 | self.proj = builder.ResIdentity(num_channels=block_channels[1]) 14 | 15 | self.conv1 = builder.BNReLUConv2d(in_channels=input_channels, out_channels=block_channels[0], kernel_size=3, 16 | stride=stride, padding=1) 17 | if use_dropout: 18 | self.dropout = builder.Dropout(keep_prob=0.7) 19 | print('use dropout for WRN') 20 | else: 21 | self.dropout = builder.Identity() 22 | self.conv2 = builder.BNReLUConv2d(in_channels=block_channels[0], out_channels=block_channels[1], kernel_size=3, 23 | stride=1, padding=1) 24 | 25 | def forward(self, input): 26 | x = self.conv1(input) 27 | x = self.dropout(x) 28 | x = self.conv2(x) 29 | x += self.proj(input) 30 | return x 31 | 32 | class WRNCifarNet(nn.Module): 33 | 34 | def __init__(self, block_counts, num_classes, builder:ConvBuilder, use_dropout): 35 | super(WRNCifarNet, self).__init__() 36 | self.bd = builder 37 | assert block_counts == (2,2,2) 38 | converted_deps = [16, [[128, 128], [128, 128]], [[256, 256], [256, 256]], [[512, 512], [512, 512]]] 39 | print('the converted deps is ', converted_deps) 40 | 41 | self.conv1 = builder.Conv2d(in_channels=3, out_channels=converted_deps[0], kernel_size=3, stride=1, padding=1, bias=False) 42 | self.stage1 = self._build_wrn_stage(num_blocks=block_counts[0], stage_input_channels=converted_deps[0], 43 | stage_deps=converted_deps[1], downsample=False, use_dropout=use_dropout) 44 | self.stage2 = self._build_wrn_stage(num_blocks=block_counts[1], stage_input_channels=converted_deps[1][-1][1], 45 | stage_deps=converted_deps[2], downsample=True, use_dropout=use_dropout) 46 | self.stage3 = self._build_wrn_stage(num_blocks=block_counts[2], stage_input_channels=converted_deps[2][-1][1], 47 | stage_deps=converted_deps[3], downsample=True, use_dropout=use_dropout) 48 | self.last_bn = builder.BatchNorm2d(num_features=converted_deps[3][-1][1]) 49 | self.linear = builder.Linear(in_features=converted_deps[3][-1][1], out_features=num_classes) 50 | 51 | 52 | def _build_wrn_stage(self, num_blocks, stage_input_channels, stage_deps, downsample, use_dropout): 53 | se = self.bd.Sequential() 54 | for i in range(num_blocks): 55 | if i == 0: 56 | block_input_channels = stage_input_channels 57 | else: 58 | block_input_channels = stage_deps[i - 1][1] 59 | if i == 0 and downsample: 60 | stride = 2 61 | else: 62 | stride = 1 63 | se.add_module(name='block{}'.format(i+1), 64 | module=WRNCifarBlock(input_channels=block_input_channels, block_channels=stage_deps[i], 65 | stride=stride, projection_shortcut=(i==0), use_dropout=use_dropout, builder=self.bd)) 66 | return se 67 | 68 | def forward(self, x): 69 | out = self.conv1(x) 70 | out = self.stage1(out) 71 | out = self.stage2(out) 72 | out = self.stage3(out) 73 | out = self.last_bn(out) 74 | out = self.bd.avg_pool2d(in_features=out, kernel_size=8, stride=1, padding=0) 75 | out = self.bd.flatten(out) 76 | out = self.linear(out) 77 | return out 78 | 79 | 80 | 81 | def create_wrnc16plain(cfg, builder): 82 | return WRNCifarNet(block_counts=(2,2,2), num_classes=10, builder=builder, use_dropout=False) 83 | def create_wrnc16drop(cfg, builder): 84 | return WRNCifarNet(block_counts=(2,2,2), num_classes=10, builder=builder, use_dropout=True) -------------------------------------------------------------------------------- /deprecated/dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision import datasets, transforms 3 | 4 | MNIST_PATH = '/home/dingxiaohan/datasets/torch_mnist/' 5 | CIFAR10_PATH = '/home/dingxiaohan/datasets/cifar-10-batches-py/' 6 | CH_PATH = '/home/dingxiaohan/datasets/torch_ch/' 7 | SVHN_PATH = '/home/dingxiaohan/datasets/torch_svhn/' 8 | 9 | 10 | class InfiniteDataLoader(torch.utils.data.DataLoader): 11 | def __init__(self, *args, **kwargs): 12 | super().__init__(*args, **kwargs) 13 | # Initialize an iterator over the dataset. 14 | self.dataset_iterator = super().__iter__() 15 | 16 | def __iter__(self): 17 | return self 18 | 19 | def __next__(self): 20 | try: 21 | batch = next(self.dataset_iterator) 22 | except StopIteration: 23 | # Dataset exhausted, use a new fresh iterator. 24 | self.dataset_iterator = super().__iter__() 25 | batch = next(self.dataset_iterator) 26 | return batch 27 | 28 | 29 | def create_dataset(dataset_name, subset, batch_size): 30 | assert dataset_name in ['imagenet', 'cifar10', 'ch', 'svhn', 'mnist'] 31 | assert subset in ['train', 'val'] 32 | if dataset_name == 'imagenet': 33 | raise ValueError('TODO') 34 | 35 | # copied from https://github.com/pytorch/examples/blob/master/mnist/main.py 36 | elif dataset_name == 'mnist': 37 | if subset == 'train': 38 | return InfiniteDataLoader(datasets.MNIST(MNIST_PATH, train=True, download=True, 39 | transform=transforms.Compose([ 40 | transforms.ToTensor(), 41 | transforms.Normalize((0.1307,), (0.3081,))])), batch_size=batch_size, shuffle=True) 42 | else: 43 | return InfiniteDataLoader(datasets.MNIST(MNIST_PATH, train=False, transform=transforms.Compose([ 44 | transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])), 45 | batch_size=batch_size, shuffle=False) 46 | 47 | 48 | 49 | elif dataset_name == 'cifar10': 50 | if subset == 'train': 51 | return InfiniteDataLoader(datasets.CIFAR10(CIFAR10_PATH, train=True, download=False, 52 | transform=transforms.Compose([ 53 | transforms.Pad(padding=(4, 4, 4, 4)), 54 | transforms.RandomCrop(32), 55 | transforms.RandomHorizontalFlip(), 56 | transforms.ToTensor(), 57 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])), 58 | batch_size=batch_size, shuffle=True) 59 | else: 60 | return InfiniteDataLoader(datasets.CIFAR10(CIFAR10_PATH, train=False, 61 | transform=transforms.Compose([ 62 | transforms.ToTensor(), 63 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])), 64 | batch_size=batch_size, shuffle=False) 65 | 66 | elif dataset_name == 'ch': 67 | if subset == 'train': 68 | return InfiniteDataLoader(datasets.CIFAR100(CH_PATH, train=True, download=True, 69 | transform=transforms.Compose([ 70 | transforms.Pad(padding=(4, 4, 4, 4)), 71 | transforms.RandomCrop(32), 72 | transforms.RandomHorizontalFlip(), 73 | transforms.ToTensor(), 74 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])), 75 | batch_size=batch_size, shuffle=True) 76 | else: 77 | return InfiniteDataLoader(datasets.CIFAR100(CH_PATH, train=False, 78 | transform=transforms.Compose([ 79 | transforms.ToTensor(), 80 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])), 81 | batch_size=batch_size, shuffle=False) 82 | 83 | else: 84 | assert False 85 | 86 | 87 | def num_train_examples_per_epoch(dataset_name): 88 | if dataset_name == 'imagenet': 89 | return 1281167 90 | elif dataset_name == 'mnist': 91 | return 60000 92 | elif dataset_name in ['cifar10', 'ch']: 93 | return 50000 94 | else: 95 | assert False -------------------------------------------------------------------------------- /show_log.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import re 3 | import numpy as np 4 | import sys 5 | 6 | root_dirs = ['ria_exps'] 7 | num_logs = 5 8 | if len(sys.argv) > 1: 9 | root_dirs = [sys.argv[1]] 10 | if len(sys.argv) > 2: 11 | excluded = sys.argv[2:] 12 | else: 13 | excluded = None 14 | 15 | 16 | 17 | top1_pattern = re.compile('top1=(\-*\d+(?:\.\d+)?)') 18 | top5_pattern = re.compile('top5=(\-*\d+(?:\.\d+)?)') 19 | loss_pattern = re.compile('loss=(\-*\d+(?:\.\d+)?)') 20 | 21 | speed_pattern = re.compile(',(\-*\d+(?:\.\d+)?)example/s') 22 | 23 | 24 | 25 | def get_value_by_pattern(pattern, line): 26 | return float(re.findall(pattern, line)[0]) 27 | 28 | def parse_top1_top5_loss_from_log_line(log_line): 29 | top1 = get_value_by_pattern(top1_pattern, log_line) 30 | top5 = get_value_by_pattern(top5_pattern, log_line) 31 | loss = get_value_by_pattern(loss_pattern, log_line) 32 | return top1, top5, loss 33 | 34 | 35 | 36 | 37 | log_files = [] 38 | for root_dir in root_dirs: 39 | fs = glob.glob('{}/*/log.txt'.format(root_dir)) 40 | log_files += fs 41 | 42 | for file_path in log_files: 43 | if 'lrsRZ' in file_path: 44 | continue 45 | skip = False 46 | if excluded is not None: 47 | for ex in excluded: 48 | if ex in file_path: 49 | skip = True 50 | break 51 | if skip: 52 | continue 53 | top1_list = [] 54 | top5_list = [] 55 | loss_list = [] 56 | baseline_speed = 0 57 | exp_speed = 0 58 | with open(file_path, 'r') as f: 59 | origin_lines = f.readlines() 60 | for l in origin_lines: 61 | if 'baseline speed' in l: 62 | baseline_speed = get_value_by_pattern(speed_pattern, l) 63 | elif 'bbf speed' in l or 'exp speed' in l or 'ent speed' in l: 64 | exp_speed = get_value_by_pattern(speed_pattern, l) 65 | break 66 | 67 | log_lines = [l for l in origin_lines if 'top1' in l and 'top5' in l and 'loss' in l and 'beginning' not in l] 68 | avg_loss = '----' 69 | params = '----' 70 | train_speed = '----' 71 | deploy_speed = '----' 72 | for l in origin_lines[-5:]: 73 | if 'TRAIN LOSS collected over last' in l: 74 | avg_loss = l.strip()[-8:] 75 | if 'num of params in hdf5' in l: 76 | params = l.strip().split('=')[1] 77 | if 'TRAIN speed' in l: 78 | train_speed = float(l.strip().split('=')[-1]) 79 | train_speed = '{:.2f}'.format(train_speed) 80 | if 'DEPLOY TEST' in l: 81 | ll = l.strip().split(' ') 82 | examples = int(ll[4]) 83 | secs = float(ll[6]) 84 | deploy_speed = examples / secs 85 | deploy_speed = '{:.2f}'.format(deploy_speed) 86 | last_lines = log_lines[-num_logs:] 87 | for l in last_lines: 88 | if 'top1' not in l or 'loss' not in l or 'top5' not in l: 89 | continue 90 | top1, top5, loss = parse_top1_top5_loss_from_log_line(l) 91 | top1_list.append(top1) 92 | top5_list.append(top5) 93 | loss_list.append(loss) 94 | if len(top1_list) < num_logs: 95 | continue 96 | # network_try_arg = file_path.split('/')[1].replace('_train', '') 97 | network_try_arg = file_path.replace('_train/log.txt', '') 98 | last_validation = last_lines[-1] 99 | last_epoch_pattern = re.compile('epoch (\d+)') 100 | 101 | last_epoch = int(last_epoch_pattern.findall(last_validation)[0]) 102 | 103 | if exp_speed > 0: 104 | speedup = exp_speed / baseline_speed 105 | else: 106 | speedup = 0 107 | 108 | thresh = '' 109 | flops_r = '' 110 | for ol in origin_lines[-70:-1]: 111 | # print(ol) 112 | if 'thres 1e-05' in ol: 113 | thresh = '1e-5' 114 | elif 'thres 1e-06' in ol: 115 | thresh = '1e-6' 116 | if 'FLOPs' in ol: 117 | flops_r = ol[ol.index('FLOPs'):].strip() 118 | 119 | msg = '{} \t maxtop1={:.3f}, spdup={:.3f}, mean={:.3f}, loss={:.5f}, {} logs, tr_loss={}, para={}, ts={}, ds={}, last={}'.format(network_try_arg, 120 | np.max(top1_list), speedup, np.mean(top1_list), np.mean(loss_list), 121 | len(top1_list), avg_loss, params, train_speed, deploy_speed, last_epoch) 122 | if len(flops_r) > 0: 123 | msg += ' ' + thresh + ':' + flops_r 124 | print(msg) 125 | -------------------------------------------------------------------------------- /base_config.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from model_map import get_dataset_name_by_model_name 3 | import numpy as np 4 | 5 | BaseConfigByEpoch = namedtuple('BaseConfigByEpoch', ['network_type', 'dataset_name', 'dataset_subset', 'global_batch_size', 'num_node', 'device', 6 | 'weight_decay', 'weight_decay_bias', 'optimizer_type', 'momentum', 7 | 'bias_lr_factor', 'max_epochs', 'base_lr', 'lr_epoch_boundaries', 'lr_decay_factor', 'linear_final_lr', 'cosine_minimum', 8 | 'warmup_epochs', 'warmup_method', 'warmup_factor', 9 | 'ckpt_iter_period', 'tb_iter_period', 10 | 'output_dir', 'tb_dir', 11 | 'init_weights', 'save_weights', 12 | 'val_epoch_period', 'grad_accum_iters', 13 | 'deps', 14 | 'se_reduce_scale', 'se_layers']) 15 | 16 | 17 | def get_baseconfig_by_epoch(network_type, dataset_name, dataset_subset, global_batch_size, num_node, 18 | weight_decay, optimizer_type, momentum, 19 | max_epochs, base_lr, lr_epoch_boundaries, lr_decay_factor, linear_final_lr, cosine_minimum, 20 | warmup_epochs, warmup_method, warmup_factor, 21 | ckpt_iter_period, tb_iter_period, 22 | output_dir, tb_dir, save_weights, 23 | device='cuda', weight_decay_bias=0, bias_lr_factor=2, init_weights=None, val_epoch_period=-1, grad_accum_iters=1, 24 | deps=None, 25 | se_reduce_scale=0, se_layers=None): 26 | print('----------------- show lr schedule --------------') 27 | print('base_lr:', base_lr) 28 | print('max_epochs:', max_epochs) 29 | print('lr_epochs:', lr_epoch_boundaries) 30 | print('lr_decay:', lr_decay_factor) 31 | print('linear_final_lr:', linear_final_lr) 32 | print('-------------------------------------------------') 33 | 34 | if deps is not None: 35 | deps = np.array(deps, dtype=np.int) 36 | 37 | return BaseConfigByEpoch(network_type=network_type,dataset_name=dataset_name,dataset_subset=dataset_subset,global_batch_size=global_batch_size,num_node=num_node, device=device, 38 | weight_decay=weight_decay,weight_decay_bias=weight_decay_bias,optimizer_type=optimizer_type,momentum=momentum,bias_lr_factor=bias_lr_factor, 39 | max_epochs=max_epochs, base_lr=base_lr, lr_epoch_boundaries=lr_epoch_boundaries,lr_decay_factor=lr_decay_factor, linear_final_lr=linear_final_lr, cosine_minimum=cosine_minimum, 40 | warmup_epochs=warmup_epochs,warmup_method=warmup_method,warmup_factor=warmup_factor, 41 | ckpt_iter_period=int(ckpt_iter_period),tb_iter_period=int(tb_iter_period), 42 | output_dir=output_dir, tb_dir=tb_dir, 43 | init_weights=init_weights, save_weights=save_weights, 44 | val_epoch_period=val_epoch_period, grad_accum_iters=grad_accum_iters, deps=deps, se_reduce_scale=se_reduce_scale, 45 | se_layers=se_layers) 46 | 47 | def get_baseconfig_for_test(network_type, dataset_subset, global_batch_size, init_weights=None, device='cuda', deps=None, 48 | se_reduce_scale=0, se_layers=None, dataset_name=None): 49 | if dataset_name is None: 50 | dataset_name = get_dataset_name_by_model_name(network_type) 51 | return BaseConfigByEpoch(network_type=network_type, dataset_name=dataset_name, 52 | dataset_subset=dataset_subset, global_batch_size=global_batch_size, num_node=1, device=device, 53 | weight_decay=None, weight_decay_bias=None, optimizer_type=None, momentum=None, bias_lr_factor=None, 54 | max_epochs=None, base_lr=None, lr_epoch_boundaries=None, lr_decay_factor=None, linear_final_lr=None, cosine_minimum=None, 55 | warmup_epochs=None, warmup_method=None, warmup_factor=None, ckpt_iter_period=None, 56 | tb_iter_period=None, output_dir=None, tb_dir=None, init_weights=init_weights, 57 | save_weights=None, val_epoch_period=None, grad_accum_iters=None, deps=deps, 58 | se_reduce_scale=se_reduce_scale, se_layers=se_layers) -------------------------------------------------------------------------------- /data/data_factory.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision import datasets, transforms 3 | import numpy as np 4 | from data.dataset_util import InfiniteDataLoader 5 | 6 | CIFAR10_PATH = 'cifar10_data' 7 | MNIST_PATH = 'mnist_data' 8 | 9 | 10 | def load_cuda_data(data_loader, dataset_name): 11 | if dataset_name == 'imagenet_standard': 12 | data, label = next(data_loader.dataprovider) 13 | data = data.cuda() 14 | label = label.cuda() 15 | elif dataset_name == 'imagenet_blank': 16 | data_dict = next(data_loader) 17 | data = data_dict['data'] 18 | label = data_dict['label'] 19 | else: 20 | data, label = next(data_loader) 21 | data = data.cuda() 22 | label = label.cuda() 23 | return data, label 24 | 25 | class ImageNetBlankGenerator(object): 26 | 27 | def __init__(self, batch_size, img_size): 28 | assert type(img_size) is int 29 | 30 | self.blank_img = np.ones((batch_size, 3, img_size, img_size), dtype=np.float) 31 | self.blank_label = np.ones(batch_size, dtype=np.int) * 42 32 | self.return_dict = {'data': torch.from_numpy(self.blank_img).type(torch.FloatTensor).cuda(), 33 | 'label': torch.from_numpy(self.blank_label).type(torch.long).cuda()} 34 | 35 | def __next__(self): 36 | return self.return_dict 37 | 38 | def create_dataset(dataset_name, subset, global_batch_size, distributed): 39 | assert dataset_name in ['cifar10','imagenet_blank', 40 | 'imagenet_standard', 'mnist'] 41 | assert subset in ['train', 'val'] 42 | 43 | if dataset_name == 'imagenet_standard': 44 | from data.imagenet_data import ImgnetStdTrainData, ImgnetStdValData 45 | if subset == 'train': 46 | print('imgnet standard train data') 47 | return ImgnetStdTrainData(distributed=distributed, 48 | batch_size_per_gpu=global_batch_size // torch.cuda.device_count()) 49 | else: 50 | print('imgnet standard val data') 51 | return ImgnetStdValData(batch_size=global_batch_size) 52 | 53 | elif dataset_name == 'imagenet_blank': 54 | assert not distributed 55 | return ImageNetBlankGenerator(batch_size=global_batch_size, img_size=224) 56 | 57 | elif dataset_name == 'mnist': 58 | assert not distributed 59 | if subset == 'train': 60 | return InfiniteDataLoader(datasets.MNIST(MNIST_PATH, train=True, download=True, 61 | transform=transforms.Compose([ 62 | transforms.ToTensor(), 63 | transforms.Normalize((0.1307,), (0.3081,))])), 64 | batch_size=global_batch_size, shuffle=True) 65 | else: 66 | return InfiniteDataLoader(datasets.MNIST(MNIST_PATH, train=False, transform=transforms.Compose([ 67 | transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])), 68 | batch_size=global_batch_size, shuffle=False) 69 | 70 | elif dataset_name == 'cifar10': 71 | assert not distributed 72 | if subset == 'train': 73 | return InfiniteDataLoader(datasets.CIFAR10(CIFAR10_PATH, train=True, download=False, 74 | transform=transforms.Compose([ 75 | transforms.Pad(padding=(4, 4, 4, 4)), 76 | transforms.RandomCrop(32), 77 | transforms.RandomHorizontalFlip(), 78 | transforms.ToTensor(), 79 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])), 80 | batch_size=global_batch_size, shuffle=True) 81 | else: 82 | return InfiniteDataLoader(datasets.CIFAR10(CIFAR10_PATH, train=False, 83 | transform=transforms.Compose([ 84 | transforms.ToTensor(), 85 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])), 86 | batch_size=global_batch_size, shuffle=False) 87 | 88 | else: 89 | raise ValueError('??') 90 | 91 | 92 | def num_train_examples_per_epoch(dataset_name): 93 | if 'imagenet' in dataset_name: 94 | return 1281167 95 | elif dataset_name in ['cifar10', 'ch']: 96 | return 50000 97 | elif dataset_name == 'mnist': 98 | return 60000 99 | else: 100 | assert False 101 | 102 | def num_iters_per_epoch(cfg): 103 | return num_train_examples_per_epoch(cfg.dataset_name) // cfg.global_batch_size 104 | 105 | def num_val_examples(dataset_name): 106 | if 'imagenet' in dataset_name: 107 | return 50000 108 | elif dataset_name in ['cifar10', 'ch', 'mnist']: 109 | return 10000 110 | else: 111 | assert False -------------------------------------------------------------------------------- /deprecated/base_model/wrn.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from builder import ConvBuilder 3 | from constants import wrn_convert_flattened_deps 4 | 5 | class WRNCifarBlock(nn.Module): 6 | 7 | def __init__(self, input_channels, block_channels, stride, projection_shortcut, use_dropout, builder:ConvBuilder): 8 | super(WRNCifarBlock, self).__init__() 9 | assert len(block_channels) == 2 10 | 11 | if projection_shortcut: 12 | self.proj = builder.BNReLUConv2d(in_channels=input_channels, out_channels=block_channels[1], kernel_size=1, stride=stride, padding=0) 13 | else: 14 | self.proj = builder.ResIdentity(num_channels=block_channels[1]) 15 | 16 | self.conv1 = builder.BNReLUConv2d(in_channels=input_channels, out_channels=block_channels[0], kernel_size=3, 17 | stride=stride, padding=1) 18 | if use_dropout: 19 | self.dropout = builder.Dropout(keep_prob=0.7) 20 | print('use dropout for WRN') 21 | else: 22 | self.dropout = builder.Identity() 23 | self.conv2 = builder.BNReLUConv2d(in_channels=block_channels[0], out_channels=block_channels[1], kernel_size=3, 24 | stride=1, padding=1) 25 | 26 | def forward(self, input): 27 | x = self.conv1(input) 28 | x = self.dropout(x) 29 | x = self.conv2(x) 30 | x += self.proj(input) 31 | return x 32 | 33 | class WRNCifarNet(nn.Module): 34 | 35 | def __init__(self, block_counts, num_classes, builder:ConvBuilder, deps, use_dropout): 36 | super(WRNCifarNet, self).__init__() 37 | self.bd = builder 38 | converted_deps = wrn_convert_flattened_deps(deps) 39 | print('the converted deps is ', converted_deps) 40 | 41 | self.conv1 = builder.Conv2d(in_channels=3, out_channels=converted_deps[0], kernel_size=3, stride=1, padding=1, bias=False) 42 | self.stage1 = self._build_wrn_stage(num_blocks=block_counts[0], stage_input_channels=converted_deps[0], 43 | stage_deps=converted_deps[1], downsample=False, use_dropout=use_dropout) 44 | self.stage2 = self._build_wrn_stage(num_blocks=block_counts[1], stage_input_channels=converted_deps[1][-1][1], 45 | stage_deps=converted_deps[2], downsample=True, use_dropout=use_dropout) 46 | self.stage3 = self._build_wrn_stage(num_blocks=block_counts[2], stage_input_channels=converted_deps[2][-1][1], 47 | stage_deps=converted_deps[3], downsample=True, use_dropout=use_dropout) 48 | self.last_bn = builder.BatchNorm2d(num_features=converted_deps[3][-1][1]) 49 | self.linear = builder.Linear(in_features=converted_deps[3][-1][1], out_features=num_classes) 50 | 51 | 52 | def _build_wrn_stage(self, num_blocks, stage_input_channels, stage_deps, downsample, use_dropout): 53 | se = self.bd.Sequential() 54 | for i in range(num_blocks): 55 | if i == 0: 56 | block_input_channels = stage_input_channels 57 | else: 58 | block_input_channels = stage_deps[i - 1][1] 59 | if i == 0 and downsample: 60 | stride = 2 61 | else: 62 | stride = 1 63 | se.add_module(name='block{}'.format(i+1), 64 | module=WRNCifarBlock(input_channels=block_input_channels, block_channels=stage_deps[i], 65 | stride=stride, projection_shortcut=(i==0), use_dropout=use_dropout, builder=self.bd)) 66 | return se 67 | 68 | def forward(self, x): 69 | out = self.conv1(x) 70 | out = self.stage1(out) 71 | out = self.stage2(out) 72 | out = self.stage3(out) 73 | out = self.last_bn(out) 74 | out = self.bd.avg_pool2d(in_features=out, kernel_size=8, stride=1, padding=0) 75 | out = self.bd.flatten(out) 76 | out = self.linear(out) 77 | return out 78 | 79 | 80 | 81 | def create_wrnc16plain(cfg, builder): 82 | return WRNCifarNet(block_counts=(2,2,2), num_classes=10, builder=builder, deps=cfg.deps, use_dropout=False) 83 | def create_wrnc16drop(cfg, builder): 84 | return WRNCifarNet(block_counts=(2,2,2), num_classes=10, builder=builder, deps=cfg.deps, use_dropout=True) 85 | def create_wrnc28plain(cfg, builder): 86 | return WRNCifarNet(block_counts=(4,4,4), num_classes=10, builder=builder, deps=cfg.deps, use_dropout=False) 87 | def create_wrnc28drop(cfg, builder): 88 | return WRNCifarNet(block_counts=(4,4,4), num_classes=10, builder=builder, deps=cfg.deps, use_dropout=True) 89 | def create_wrnc40plain(cfg, builder): 90 | return WRNCifarNet(block_counts=(6,6,6), num_classes=10, builder=builder, deps=cfg.deps, use_dropout=False) 91 | def create_wrnc40drop(cfg, builder): 92 | return WRNCifarNet(block_counts=(6,6,6), num_classes=10, builder=builder, deps=cfg.deps, use_dropout=True) 93 | 94 | def create_wrnh16plain(cfg, builder): 95 | return WRNCifarNet(block_counts=(2,2,2), num_classes=100, builder=builder, deps=cfg.deps, use_dropout=False) 96 | def create_wrnh16drop(cfg, builder): 97 | return WRNCifarNet(block_counts=(2,2,2), num_classes=100, builder=builder, deps=cfg.deps, use_dropout=True) 98 | def create_wrnh28plain(cfg, builder): 99 | return WRNCifarNet(block_counts=(4,4,4), num_classes=100, builder=builder, deps=cfg.deps, use_dropout=False) 100 | def create_wrnh28drop(cfg, builder): 101 | return WRNCifarNet(block_counts=(4,4,4), num_classes=100, builder=builder, deps=cfg.deps, use_dropout=True) 102 | def create_wrnh40plain(cfg, builder): 103 | return WRNCifarNet(block_counts=(6,6,6), num_classes=100, builder=builder, deps=cfg.deps, use_dropout=False) 104 | def create_wrnh40drop(cfg, builder): 105 | return WRNCifarNet(block_counts=(6,6,6), num_classes=100, builder=builder, deps=cfg.deps, use_dropout=True) -------------------------------------------------------------------------------- /deprecated/builder.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from custom_layers.flatten_layer import FlattenLayer 4 | from custom_layers.se_block import SEBlock 5 | 6 | class ConvBuilder(nn.Module): 7 | 8 | def __init__(self, base_config): 9 | super(ConvBuilder, self).__init__() 10 | print('ConvBuilder initialized.') 11 | self.BN_eps = 1e-5 12 | self.BN_momentum = 0.1 13 | self.BN_affine = True 14 | self.BN_track_running_stats = True 15 | self.base_config = base_config 16 | 17 | def set_BN_config(self, eps, momentum, affine, track_running_stats): 18 | self.BN_eps = eps 19 | self.BN_momentum = momentum 20 | self.BN_afine = affine 21 | self.BN_track_running_stats = track_running_stats 22 | 23 | 24 | def Conv2d(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', use_original_conv=False): 25 | return nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, 26 | stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias, padding_mode=padding_mode) 27 | 28 | # The running estimates are kept with a default momentum of 0.1. 29 | # By default, the elements of \gammaγ are sampled from \mathcal{U}(0, 1)U(0,1) and the elements of \betaβ are set to 0. 30 | # If track_running_stats is set to False, this layer then does not keep running estimates, and batch statistics are instead used during evaluation time as well. 31 | def BatchNorm2d(self, num_features, eps=None, momentum=None, affine=None, track_running_stats=None): 32 | if eps is None: 33 | eps = self.BN_eps 34 | if momentum is None: 35 | momentum = self.BN_momentum 36 | if affine is None: 37 | affine = self.BN_affine 38 | if track_running_stats is None: 39 | track_running_stats = self.BN_track_running_stats 40 | return nn.BatchNorm2d(num_features=num_features, eps=eps, momentum=momentum, affine=affine, track_running_stats=track_running_stats) 41 | 42 | def Sequential(self, *args): 43 | return nn.Sequential(*args) 44 | 45 | def ReLU(self): 46 | return nn.ReLU() 47 | 48 | def Conv2dBN(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', use_original_conv=False): 49 | conv_layer = self.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, 50 | stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False, padding_mode=padding_mode, use_original_conv=use_original_conv) 51 | bn_layer = self.BatchNorm2d(num_features=out_channels) 52 | se = self.Sequential() 53 | se.add_module('conv', conv_layer) 54 | se.add_module('bn', bn_layer) 55 | if self.base_config is not None and self.base_config.se_reduce_scale is not None and self.base_config.se_reduce_scale > 0: 56 | se.add_module('se', SEBlock(input_channels=out_channels, internal_neurons=out_channels // self.base_config.se_reduce_scale)) 57 | return se 58 | 59 | def Conv2dBNReLU(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', use_original_conv=False): 60 | conv = self.Conv2dBN(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, 61 | padding=padding, dilation=dilation, groups=groups, padding_mode=padding_mode, use_original_conv=use_original_conv) 62 | conv.add_module('relu', self.ReLU()) 63 | return conv 64 | 65 | def BNReLUConv2d(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', use_original_conv=False): 66 | bn_layer = self.BatchNorm2d(num_features=in_channels) 67 | conv_layer = self.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, 68 | stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False, padding_mode=padding_mode) 69 | se = self.Sequential() 70 | se.add_module('bn', bn_layer) 71 | se.add_module('relu', self.ReLU()) 72 | se.add_module('conv', conv_layer) 73 | return se 74 | 75 | def Linear(self, in_features, out_features, bias=True): 76 | return nn.Linear(in_features=in_features, out_features=out_features, bias=bias) 77 | 78 | def Identity(self): 79 | return nn.Identity() 80 | 81 | def ResIdentity(self, num_channels): 82 | return nn.Identity() 83 | 84 | 85 | def Dropout(self, keep_prob): 86 | return nn.Dropout(p=1-keep_prob) 87 | 88 | def Maxpool2d(self, kernel_size, stride=None): 89 | return nn.MaxPool2d(kernel_size=kernel_size, stride=stride) 90 | 91 | def Avgpool2d(self, kernel_size, stride=None): 92 | return nn.AvgPool2d(kernel_size=kernel_size, stride=stride) 93 | 94 | def Flatten(self): 95 | return FlattenLayer() 96 | 97 | def GAP(self, kernel_size): 98 | gap = nn.Sequential() 99 | gap.add_module('avg', nn.AvgPool2d(kernel_size=kernel_size, stride=kernel_size)) 100 | gap.add_module('flatten', FlattenLayer()) 101 | return gap 102 | 103 | 104 | 105 | def relu(self, in_features): 106 | return F.relu(in_features) 107 | 108 | def max_pool2d(self, in_features, kernel_size, stride, padding): 109 | return F.max_pool2d(in_features, kernel_size=kernel_size, stride=stride, padding=padding) 110 | 111 | def avg_pool2d(self, in_features, kernel_size, stride, padding): 112 | return F.avg_pool2d(in_features, kernel_size=kernel_size, stride=stride, padding=padding) 113 | 114 | def flatten(self, in_features): 115 | result = in_features.view(in_features.size(0), -1) 116 | return result 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /deprecated/ding_test.py: -------------------------------------------------------------------------------- 1 | from base_config import BaseConfigByEpoch 2 | from model_map import get_model_fn 3 | from dataset import create_dataset 4 | from torch.nn.modules.loss import CrossEntropyLoss 5 | from utils.engine import Engine 6 | from utils.misc import torch_accuracy, AvgMeter 7 | from utils.comm import reduce_loss_dict 8 | from collections import OrderedDict 9 | import torch 10 | from tqdm import tqdm 11 | import time 12 | from builder import ConvBuilder 13 | from ding_train import load_cuda_data 14 | import sys 15 | from utils.misc import log_important 16 | from base_config import get_baseconfig_for_test 17 | 18 | TEST_BATCH_SIZE = 100 19 | OVERALL_LOG_FILE = 'overall_test_log.txt' 20 | DETAIL_LOG_FILE = 'detail_test_log.txt' 21 | 22 | def run_eval(ds_val, max_iters, net, criterion, discrip_str, dataset_name): 23 | pbar = tqdm(range(max_iters)) 24 | top1 = AvgMeter() 25 | top5 = AvgMeter() 26 | losses = AvgMeter() 27 | pbar.set_description('Validation' + discrip_str) 28 | total_net_time = 0 29 | with torch.no_grad(): 30 | for iter_idx, i in enumerate(pbar): 31 | start_time = time.time() 32 | data, label = load_cuda_data(ds_val, dataset_name=dataset_name) 33 | data_time = time.time() - start_time 34 | 35 | net_time_start = time.time() 36 | pred = net(data) 37 | net_time_end = time.time() 38 | total_net_time += net_time_end - net_time_start 39 | 40 | loss = criterion(pred, label) 41 | acc, acc5 = torch_accuracy(pred, label, (1, 5)) 42 | 43 | top1.update(acc.item()) 44 | top5.update(acc5.item()) 45 | losses.update(loss.item()) 46 | pbar_dic = OrderedDict() 47 | pbar_dic['data-time'] = '{:.2f}'.format(data_time) 48 | pbar_dic['top1'] = '{:.5f}'.format(top1.mean) 49 | pbar_dic['top5'] = '{:.5f}'.format(top5.mean) 50 | pbar_dic['loss'] = '{:.5f}'.format(losses.mean) 51 | pbar.set_postfix(pbar_dic) 52 | 53 | metric_dic = {'top1':torch.tensor(top1.mean), 54 | 'top5':torch.tensor(top5.mean), 55 | 'loss':torch.tensor(losses.mean)} 56 | reduced_metirc_dic = reduce_loss_dict(metric_dic) 57 | return reduced_metirc_dic, total_net_time 58 | 59 | def get_criterion(cfg): 60 | return CrossEntropyLoss() 61 | 62 | def ding_test(cfg:BaseConfigByEpoch, net=None, val_dataloader=None, show_variables=False, convbuilder=None, 63 | init_hdf5=None, ): 64 | 65 | with Engine() as engine: 66 | 67 | engine.setup_log( 68 | name='test', log_dir='./', file_name=DETAIL_LOG_FILE) 69 | 70 | if net is None: 71 | net = get_model_fn(cfg.dataset_name, cfg.network_type) 72 | 73 | if convbuilder is None: 74 | convbuilder = ConvBuilder(base_config=cfg) 75 | 76 | model = net(cfg, convbuilder).cuda() 77 | 78 | if val_dataloader is None: 79 | val_dataloader = create_dataset(cfg.dataset_name, cfg.dataset_subset, batch_size=cfg.global_batch_size) 80 | val_iters = 50000 // cfg.global_batch_size if cfg.dataset_name == 'imagenet' else 10000 // cfg.global_batch_size 81 | 82 | print('NOTE: Data prepared') 83 | print('NOTE: We have global_batch_size={} on {} GPUs, the allocated GPU memory is {}'.format(cfg.global_batch_size, torch.cuda.device_count(), torch.cuda.memory_allocated())) 84 | 85 | criterion = get_criterion(cfg).cuda() 86 | 87 | engine.register_state( 88 | scheduler=None, model=model, optimizer=None) 89 | 90 | if engine.distributed: 91 | print('Distributed training, engine.world_rank={}'.format(engine.world_rank)) 92 | model = torch.nn.parallel.DistributedDataParallel( 93 | model, device_ids=[engine.world_rank], 94 | broadcast_buffers=False, ) 95 | # model = DistributedDataParallel(model, delay_allreduce=True) 96 | elif torch.cuda.device_count() > 1: 97 | print('Single machine multiple GPU training') 98 | model = torch.nn.parallel.DataParallel(model) 99 | 100 | if cfg.init_weights: 101 | engine.load_checkpoint(cfg.init_weights, is_restore=True, just_weights=True) 102 | 103 | if init_hdf5: 104 | engine.load_hdf5(init_hdf5) 105 | 106 | if show_variables: 107 | engine.show_variables() 108 | 109 | model.eval() 110 | eval_dict, _ = run_eval(val_dataloader, val_iters, model, criterion, 'TEST', dataset_name=cfg.dataset_name) 111 | val_top1_value = eval_dict['top1'].item() 112 | val_top5_value = eval_dict['top5'].item() 113 | val_loss_value = eval_dict['loss'].item() 114 | 115 | msg = '{},{},{},top1={:.5f},top5={:.5f},loss={:.7f}'.format(cfg.network_type, init_hdf5 or cfg.init_weights, cfg.dataset_subset, 116 | val_top1_value, val_top5_value, val_loss_value) 117 | log_important(msg, OVERALL_LOG_FILE) 118 | 119 | 120 | def general_test(network_type, weights, builder=None): 121 | if weights.endswith('.hdf5'): 122 | init_weights = None 123 | init_hdf5 = weights 124 | else: 125 | init_weights = weights 126 | init_hdf5 = None 127 | if 'wrnc16' in network_type or 'wrnh16' in network_type: 128 | from constants import wrn_origin_deps_flattened 129 | deps = wrn_origin_deps_flattened(2, 8) 130 | else: 131 | deps = None 132 | test_config = get_baseconfig_for_test(network_type=network_type, dataset_subset='val', global_batch_size=TEST_BATCH_SIZE, 133 | init_weights=init_weights, deps=deps) 134 | ding_test(cfg=test_config, show_variables=True, init_hdf5=init_hdf5, convbuilder=builder) 135 | 136 | 137 | 138 | if __name__ == '__main__': 139 | network_type = sys.argv[1] 140 | weights = sys.argv[2] 141 | general_test(network_type=network_type, weights=weights) 142 | 143 | 144 | -------------------------------------------------------------------------------- /deprecated/constants.py: -------------------------------------------------------------------------------- 1 | OVERALL_EVAL_RECORD_FILE = 'overall_eval_records.txt' 2 | from collections import namedtuple 3 | 4 | LRSchedule = namedtuple('LRSchedule', ['base_lr', 'max_epochs', 'lr_epoch_boundaries', 'lr_decay_factor', 5 | 'linear_final_lr']) 6 | 7 | import numpy as np 8 | 9 | 10 | def parse_usual_lr_schedule(try_arg, keyword='lrs{}'): 11 | if keyword.format(1) in try_arg: 12 | lrs = LRSchedule(base_lr=0.1, max_epochs=500, lr_epoch_boundaries=[100, 200, 300, 400], lr_decay_factor=0.3, 13 | linear_final_lr=None) 14 | elif keyword.format(2) in try_arg: 15 | lrs = LRSchedule(base_lr=0.1, max_epochs=500, lr_epoch_boundaries=[100, 200, 300, 400], lr_decay_factor=0.1, 16 | linear_final_lr=None) 17 | elif keyword.format(3) in try_arg: 18 | lrs = LRSchedule(base_lr=0.1, max_epochs=800, lr_epoch_boundaries=[200, 400, 600], lr_decay_factor=0.1, 19 | linear_final_lr=None) 20 | elif keyword.format(4) in try_arg: 21 | lrs = LRSchedule(base_lr=0.1, max_epochs=80, lr_epoch_boundaries=[20, 40, 60], lr_decay_factor=0.1, 22 | linear_final_lr=None) 23 | elif keyword.format(5) in try_arg: 24 | lrs = LRSchedule(base_lr=0.05, max_epochs=200, lr_epoch_boundaries=[50, 100, 150], lr_decay_factor=0.1, 25 | linear_final_lr=None) 26 | elif keyword.format(6) in try_arg: 27 | lrs = LRSchedule(base_lr=0.1, max_epochs=360, lr_epoch_boundaries=[90, 180, 240, 300], lr_decay_factor=0.2, 28 | linear_final_lr=None) 29 | elif keyword.format(7) in try_arg: 30 | lrs = LRSchedule(base_lr=0.1, max_epochs=800, lr_epoch_boundaries=None, lr_decay_factor=None, 31 | linear_final_lr=1e-4) 32 | elif keyword.format(8) in try_arg: # may be enough for MobileNet v1 on CIFARs 33 | lrs = LRSchedule(base_lr=0.1, max_epochs=400, lr_epoch_boundaries=[100, 200, 300], lr_decay_factor=0.1, 34 | linear_final_lr=None) 35 | elif keyword.format(9) in try_arg: 36 | lrs = LRSchedule(base_lr=0.1, max_epochs=200, lr_epoch_boundaries=[50, 100, 150], lr_decay_factor=0.1, 37 | linear_final_lr=None) 38 | 39 | elif keyword.format('A') in try_arg: 40 | lrs = LRSchedule(base_lr=0.1, max_epochs=100, lr_epoch_boundaries=None, lr_decay_factor=None, 41 | linear_final_lr=1e-5) 42 | elif keyword.format('B') in try_arg: 43 | lrs = LRSchedule(base_lr=0.1, max_epochs=100, lr_epoch_boundaries=None, lr_decay_factor=None, 44 | linear_final_lr=1e-6) 45 | elif keyword.format('C') in try_arg: 46 | lrs = LRSchedule(base_lr=0.2, max_epochs=125, lr_epoch_boundaries=None, lr_decay_factor=None, 47 | linear_final_lr=0) 48 | elif keyword.format('D') in try_arg: 49 | lrs = LRSchedule(base_lr=0.001, max_epochs=20, lr_epoch_boundaries=[5, 10], lr_decay_factor=0.1, 50 | linear_final_lr=None) 51 | elif keyword.format('E') in try_arg: 52 | lrs = LRSchedule(base_lr=0.001, max_epochs=300, lr_epoch_boundaries=[100, 200], lr_decay_factor=0.1, 53 | linear_final_lr=None) 54 | 55 | elif keyword.format('F') in try_arg: 56 | lrs = LRSchedule(base_lr=0.1, max_epochs=120, lr_epoch_boundaries=[30, 60, 90, 110], lr_decay_factor=0.1, 57 | linear_final_lr=None) 58 | # for VGG and CFQKBN 59 | elif keyword.format('G') in try_arg: 60 | lrs = LRSchedule(base_lr=0.05, max_epochs=800, lr_epoch_boundaries=[200, 400, 600], lr_decay_factor=0.1, 61 | linear_final_lr=None) 62 | elif keyword.format('H') in try_arg: 63 | lrs = LRSchedule(base_lr=0.025, max_epochs=200, lr_epoch_boundaries=[50, 100, 150], lr_decay_factor=0.1, 64 | linear_final_lr=None) 65 | 66 | elif keyword.format('X') in try_arg: 67 | lrs = LRSchedule(base_lr=0.2, max_epochs=6, lr_epoch_boundaries=None, lr_decay_factor=None, 68 | linear_final_lr=0) 69 | 70 | elif keyword.replace('{}', '') in try_arg: 71 | raise ValueError('Unsupported lrs config.') 72 | else: 73 | lrs = None 74 | return lrs 75 | 76 | 77 | VGG_ORIGIN_DEPS = [64, 64, 128, 128, 256, 256, 256, 512, 512, 512, 512, 512, 512] 78 | 79 | CFQK_ORIGIN_DEPS = np.array([32, 32, 64], dtype=np.int32) 80 | 81 | 82 | 83 | 84 | 85 | def wrn_origin_deps_flattened(n, k): 86 | assert n in [2, 4, 6] # total_depth = 6n + 4 87 | filters_in_each_stage = n * 2 + 1 88 | stage0 = [16] 89 | stage1 = [16 * k] * filters_in_each_stage 90 | stage2 = [32 * k] * filters_in_each_stage 91 | stage3 = [64 * k] * filters_in_each_stage 92 | return np.array(stage0 + stage1 + stage2 + stage3) 93 | 94 | def wrn_pacesetter_idxes(n): 95 | assert n in [2, 4, 6] 96 | filters_in_each_stage = n * 2 + 1 97 | pacesetters = [1, int(filters_in_each_stage)+1, int(2 * filters_in_each_stage)+1] #[1, 10, 19] for WRN-28-x, for example 98 | return pacesetters 99 | 100 | def wrn_convert_flattened_deps(flattened): 101 | assert len(flattened) in [16, 28, 40] 102 | n = int((len(flattened) - 4) // 6) 103 | assert n in [2, 4, 6] 104 | pacesetters = wrn_pacesetter_idxes(n) 105 | result = [flattened[0]] 106 | for ps in pacesetters: 107 | assert flattened[ps] == flattened[ps+2] 108 | stage_deps = [] 109 | for i in range(n): 110 | stage_deps.append([flattened[ps + 1 + 2 * i], flattened[ps + 2 + 2 * i]]) 111 | result.append(stage_deps) 112 | return result 113 | 114 | 115 | #################### WRN 116 | WRN16_FOLLOW_DICT = {1:1, 3:1, 5:1, 6:6, 8:6, 10:6, 11:11, 13:11, 15:11} 117 | WRN16_PACESETTER_IDS = [1, 6, 11] 118 | WRN16_SUBSEQUENT_STRATEGY = { 119 | 0:[1, 2], 120 | 1:[4, 6, 7], 121 | 2:3, 122 | 4:5, 123 | 6:[9, 11, 12], 124 | 7:8, 125 | 9:10, 126 | 11:[14, 16], 127 | 12:13, 128 | 14:15 129 | } 130 | WRN16_INTERNAL_IDS = [2,4,7,9,12,14] 131 | 132 | -------------------------------------------------------------------------------- /acnet/do_acnet.py: -------------------------------------------------------------------------------- 1 | from base_config import get_baseconfig_by_epoch 2 | from model_map import get_dataset_name_by_model_name 3 | import argparse 4 | from acnet.acnet_builder import ACNetBuilder 5 | from ndp_train import train_main 6 | from acnet.acnet_fusion import convert_acnet_weights 7 | import os 8 | from ndp_test import general_test 9 | from constants import LRSchedule 10 | from builder import ConvBuilder 11 | 12 | if __name__ == '__main__': 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('-a', '--arch', default='sres18') 15 | parser.add_argument('-b', '--block_type', default='acb') 16 | parser.add_argument('-c', '--conti_or_fs', default='fs') # continue or train_from_scratch 17 | parser.add_argument( 18 | '--local_rank', default=0, type=int, 19 | help='process rank on node') 20 | 21 | start_arg = parser.parse_args() 22 | 23 | network_type = start_arg.arch 24 | block_type = start_arg.block_type 25 | conti_or_fs = start_arg.conti_or_fs 26 | assert conti_or_fs in ['continue', 'fs'] 27 | assert block_type in ['acb', 'base'] 28 | auto_continue = conti_or_fs == 'continue' 29 | print('auto continue: ', auto_continue) 30 | 31 | gamma_init = None 32 | 33 | if network_type == 'sres18': 34 | weight_decay_strength = 1e-4 35 | batch_size = 256 36 | lrs = LRSchedule(base_lr=0.1, max_epochs=100, lr_epoch_boundaries=None, lr_decay_factor=None, 37 | linear_final_lr=None, cosine_minimum=0) 38 | warmup_epochs = 0 39 | gamma_init = 1 40 | 41 | elif network_type == 'sres34': 42 | weight_decay_strength = 1e-4 43 | batch_size = 256 44 | lrs = LRSchedule(base_lr=0.1, max_epochs=100, lr_epoch_boundaries=None, lr_decay_factor=None, 45 | linear_final_lr=None, cosine_minimum=0) 46 | warmup_epochs = 0 47 | gamma_init = 1 48 | 49 | elif network_type == 'sres50': 50 | weight_decay_strength = 1e-4 51 | batch_size = 256 52 | lrs = LRSchedule(base_lr=0.1, max_epochs=100, lr_epoch_boundaries=None, lr_decay_factor=None, 53 | linear_final_lr=None, cosine_minimum=0) 54 | warmup_epochs = 0 55 | gamma_init = 1 56 | 57 | elif network_type == 'cfqkbnc': 58 | weight_decay_strength = 1e-4 59 | # ------------------------------------ 60 | # 86.2 ---> 86.8+ 61 | batch_size = 128 62 | lrs = LRSchedule(base_lr=0.1, max_epochs=150, lr_epoch_boundaries=None, lr_decay_factor=None, 63 | linear_final_lr=None, cosine_minimum=0) 64 | warmup_epochs = 0 65 | gamma_init = 0.333 66 | # ------------------------------------ 67 | 68 | 69 | elif network_type == 'src56': 70 | weight_decay_strength = 1e-4 71 | # ------------------------------------ 72 | # 94.47 ---> 95+ 73 | batch_size = 128 74 | lrs = LRSchedule(base_lr=0.2, max_epochs=400, lr_epoch_boundaries=None, lr_decay_factor=None, 75 | linear_final_lr=None, cosine_minimum=0) 76 | warmup_epochs = 0 77 | gamma_init = 0.333 78 | # -------------------------------------- 79 | 80 | elif network_type == 'vc': 81 | weight_decay_strength = 1e-4 82 | # -------------------------------------- 83 | # 93.98 ---> 94.54 84 | batch_size = 128 85 | lrs = LRSchedule(base_lr=0.1, max_epochs=400, lr_epoch_boundaries=None, lr_decay_factor=None, 86 | linear_final_lr=None, cosine_minimum=0) 87 | warmup_epochs = 0 88 | gamma_init = 0.333 89 | # -------------------------------------- 90 | 91 | elif network_type == 'wrnc16plain': 92 | weight_decay_strength = 5e-4 93 | # -------------------------------------- 94 | # 95.90 -> 96.33 95 | batch_size = 128 96 | lrs = LRSchedule(base_lr=0.1, max_epochs=400, lr_epoch_boundaries=None, lr_decay_factor=None, 97 | linear_final_lr=None, cosine_minimum=0) 98 | warmup_epochs = 0 99 | gamma_init = 0.333 100 | # -------------------------------------- 101 | 102 | else: 103 | raise ValueError('...') 104 | 105 | log_dir = 'acnet_exps/{}_{}_train'.format(network_type, block_type) 106 | 107 | weight_decay_bias = weight_decay_strength 108 | config = get_baseconfig_by_epoch(network_type=network_type, 109 | dataset_name=get_dataset_name_by_model_name(network_type), dataset_subset='train', 110 | global_batch_size=batch_size, num_node=1, 111 | weight_decay=weight_decay_strength, optimizer_type='sgd', momentum=0.9, 112 | max_epochs=lrs.max_epochs, base_lr=lrs.base_lr, lr_epoch_boundaries=lrs.lr_epoch_boundaries, cosine_minimum=lrs.cosine_minimum, 113 | lr_decay_factor=lrs.lr_decay_factor, 114 | warmup_epochs=0, warmup_method='linear', warmup_factor=0, 115 | ckpt_iter_period=40000, tb_iter_period=100, output_dir=log_dir, 116 | tb_dir=log_dir, save_weights=None, val_epoch_period=5, linear_final_lr=lrs.linear_final_lr, 117 | weight_decay_bias=weight_decay_bias, deps=None) 118 | 119 | if block_type == 'acb': 120 | builder = ACNetBuilder(base_config=config, deploy=False, gamma_init=gamma_init) 121 | else: 122 | builder = ConvBuilder(base_config=config) 123 | 124 | target_weights = os.path.join(log_dir, 'finish.hdf5') 125 | if not os.path.exists(target_weights): 126 | train_main(local_rank=start_arg.local_rank, cfg=config, convbuilder=builder, 127 | show_variables=True, auto_continue=auto_continue) 128 | 129 | if block_type == 'acb' and start_arg.local_rank == 0: 130 | convert_acnet_weights(target_weights, target_weights.replace('.hdf5', '_deploy.hdf5'), eps=1e-5) 131 | deploy_builder = ACNetBuilder(base_config=config, deploy=True) 132 | general_test(network_type=network_type, weights=target_weights.replace('.hdf5', '_deploy.hdf5'), 133 | builder=deploy_builder) 134 | -------------------------------------------------------------------------------- /deprecated/utils/misc.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | from typing import Tuple, List, Dict 4 | import torch 5 | import sys 6 | import json 7 | import h5py 8 | import numpy as np 9 | import time 10 | 11 | def cur_time(): 12 | return time.strftime('%Y,%b,%d,%X') 13 | 14 | def log_important(message, log_file): 15 | print(message, cur_time()) 16 | with open(log_file, 'a') as f: 17 | print(message, cur_time(), file=f) 18 | 19 | 20 | 21 | def representsInt(s): 22 | try: 23 | int(s) 24 | return True 25 | except ValueError: 26 | return False 27 | 28 | def read_hdf5(file_path): 29 | result = {} 30 | with h5py.File(file_path, 'r') as f: 31 | for k in f.keys(): 32 | value = np.asarray(f[k]) 33 | if representsInt(k): 34 | result[int(k)] = value 35 | else: 36 | result[str(k).replace('+','/')] = value 37 | print('read {} arrays from {}'.format(len(result), file_path)) 38 | f.close() 39 | return result 40 | 41 | def save_hdf5(numpy_dict, file_path): 42 | with h5py.File(file_path, 'w') as f: 43 | for k,v in numpy_dict.items(): 44 | f.create_dataset(str(k).replace('/','+'), data=v) 45 | print('saved {} arrays to {}'.format(len(numpy_dict), file_path)) 46 | f.close() 47 | 48 | 49 | def start_exp(): 50 | import argparse 51 | parser = argparse.ArgumentParser() 52 | parser.add_argument("--try_arg", type=str, default='') 53 | args = parser.parse_args() 54 | try_arg = args.try_arg 55 | print('the try_arg is ', try_arg) 56 | print('we have {} torch devices'.format(torch.cuda.device_count()), 57 | 'the allocated GPU memory is {}'.format(torch.cuda.memory_allocated())) 58 | return try_arg 59 | 60 | 61 | def torch_accuracy(output, target, topk=(1,)) -> List[torch.Tensor]: 62 | ''' 63 | param output, target: should be torch Variable 64 | ''' 65 | # assert isinstance(output, torch.cuda.Tensor), 'expecting Torch Tensor' 66 | # assert isinstance(target, torch.Tensor), 'expecting Torch Tensor' 67 | # print(type(output)) 68 | 69 | topn = max(topk) 70 | batch_size = output.size(0) 71 | 72 | _, pred = output.topk(topn, 1, True, True) 73 | pred = pred.t() 74 | 75 | is_correct = pred.eq(target.view(1, -1).expand_as(pred)) 76 | 77 | ans = [] 78 | for i in topk: 79 | is_correct_i = is_correct[:i].view(-1).float().sum(0, keepdim=True) 80 | ans.append(is_correct_i.mul_(100.0 / batch_size)) 81 | 82 | return ans 83 | 84 | class AvgMeter(object): 85 | ''' 86 | Computing mean 87 | ''' 88 | name = 'No name' 89 | 90 | def __init__(self, name='No name', fmt = ':.2f'): 91 | self.name = name 92 | self.fmt = fmt 93 | self.reset() 94 | 95 | def reset(self): 96 | self.sum = 0 97 | self.mean = 0 98 | self.num = 0 99 | self.now = 0 100 | 101 | def update(self, mean_var, count=1): 102 | if math.isnan(mean_var): 103 | mean_var = 1e6 104 | print('Avgmeter getting Nan!') 105 | self.now = mean_var 106 | self.num += count 107 | 108 | self.sum += mean_var * count 109 | self.mean = float(self.sum) / self.num 110 | 111 | def __str__(self): 112 | print_str = self.name + '-{' + self.fmt + '}' 113 | return print_str.format(self.mean) 114 | 115 | def save_args(args, save_dir = None): 116 | if save_dir == None: 117 | param_path = os.path.join(args.resume, "params.json") 118 | else: 119 | param_path = os.path.join(save_dir, 'params.json') 120 | 121 | #logger.info("[*] MODEL dir: %s" % args.resume) 122 | #logger.info("[*] PARAM path: %s" % param_path) 123 | 124 | with open(param_path, 'w') as fp: 125 | json.dump(args.__dict__, fp, indent=4, sort_keys=True) 126 | 127 | 128 | def mkdir(path): 129 | if not os.path.exists(path): 130 | print('creating dir {}'.format(path)) 131 | os.mkdir(path) 132 | 133 | # def save_checkpoint(cur_iters, net, optimizer, lr_scheduler, file_name): 134 | # checkpoint = {'cur_iters': cur_iters, 135 | # 'state_dict': net.state_dict(), 136 | # 'optimizer_state_dict': optimizer.state_dict(), 137 | # 'lr_scheduler_state_dict':lr_scheduler.state_dict()} 138 | # if os.path.exists(file_name): 139 | # print('Overwriting {}'.format(file_name)) 140 | # torch.save(checkpoint, file_name) 141 | # link_name = os.path.join('/', *file_name.split(os.path.sep)[:-1], 'last.checkpoint') 142 | # #print(link_name) 143 | # make_symlink(source = file_name, link_name=link_name) 144 | 145 | def load_checkpoint(file_name, net = None, optimizer = None, lr_scheduler = None): 146 | if os.path.isfile(file_name): 147 | print("=> loading checkpoint '{}'".format(file_name)) 148 | check_point = torch.load(file_name) 149 | if net is not None: 150 | print('Loading network state dict') 151 | net.load_state_dict(check_point['state_dict']) 152 | if optimizer is not None: 153 | print('Loading optimizer state dict') 154 | optimizer.load_state_dict(check_point['optimizer_state_dict']) 155 | if lr_scheduler is not None: 156 | print('Loading lr_scheduler state dict') 157 | lr_scheduler.load_state_dict(check_point['lr_scheduler_state_dict']) 158 | 159 | return check_point['cur_iters'] 160 | else: 161 | print("=> no checkpoint found at '{}'".format(file_name)) 162 | 163 | 164 | def make_symlink(source, link_name): 165 | ''' 166 | Note: overwriting enabled! 167 | ''' 168 | if os.path.exists(link_name): 169 | #print("Link name already exist! Removing '{}' and overwriting".format(link_name)) 170 | os.remove(link_name) 171 | if os.path.exists(source): 172 | os.symlink(source, link_name) 173 | return 174 | else: 175 | print('Source path not exists') 176 | #print('SymLink Wrong!') 177 | 178 | def add_path(path): 179 | if path not in sys.path: 180 | print('Adding {}'.format(path)) 181 | sys.path.append(path) 182 | 183 | def format_metric_dict_to_line(metric_dict): 184 | msg = '' 185 | for key, value in metric_dict.items(): 186 | msg += '{}={:.5f},'.format(key, value) 187 | return msg 188 | -------------------------------------------------------------------------------- /utils/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from bisect import bisect_right 3 | from data.data_factory import num_iters_per_epoch 4 | import torch 5 | from torch.optim.lr_scheduler import CosineAnnealingLR 6 | import math 7 | 8 | # FIXME ideally this would be achieved with a CombinedLRScheduler, 9 | # separating MultiStepLR with WarmupLR 10 | # but the current LRScheduler design doesn't allow it 11 | 12 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 13 | def __init__( 14 | self, 15 | optimizer, 16 | milestones, 17 | gamma=0.1, 18 | warmup_factor=1.0 / 3, 19 | warmup_iters=500, 20 | warmup_method="linear", 21 | last_epoch=-1, 22 | ): 23 | if not list(milestones) == sorted(milestones): 24 | raise ValueError( 25 | "Milestones should be a list of" " increasing integers. Got {}", 26 | milestones, 27 | ) 28 | 29 | if warmup_method not in ("constant", "linear"): 30 | raise ValueError( 31 | "Only 'constant' or 'linear' warmup_method accepted" 32 | "got {}".format(warmup_method) 33 | ) 34 | self.milestones = milestones 35 | self.gamma = gamma 36 | self.warmup_factor = warmup_factor 37 | self.warmup_iters = warmup_iters 38 | self.warmup_method = warmup_method 39 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) 40 | 41 | def get_lr(self): 42 | warmup_factor = 1 43 | if self.last_epoch < self.warmup_iters: 44 | if self.warmup_method == "constant": 45 | warmup_factor = self.warmup_factor 46 | elif self.warmup_method == "linear": 47 | alpha = float(self.last_epoch) / self.warmup_iters 48 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 49 | return [ 50 | base_lr 51 | * warmup_factor 52 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 53 | for base_lr in self.base_lrs 54 | ] 55 | 56 | 57 | 58 | class WarmupLinearLR(torch.optim.lr_scheduler._LRScheduler): 59 | def __init__( 60 | self, 61 | optimizer, 62 | final_lr, 63 | final_iters, 64 | warmup_factor=1.0 / 3, 65 | warmup_iters=500, 66 | warmup_method="linear", 67 | last_epoch=-1, 68 | ): 69 | assert final_iters > warmup_iters 70 | self.final_lr = final_lr 71 | self.final_iters = final_iters 72 | self.warmup_factor = warmup_factor 73 | self.warmup_iters = max(warmup_iters, 0) 74 | self.warmup_method = warmup_method 75 | super(WarmupLinearLR, self).__init__(optimizer, last_epoch) 76 | 77 | # last_epoch == 0: base_lr * warmup_factor 78 | # last_epoch == warmup_iters: base_lr 79 | # last_epoch == final_iters: final_lr 80 | 81 | def get_lr(self): 82 | if self.last_epoch < self.warmup_iters: 83 | if self.warmup_method == "constant": 84 | warmup_factor = self.warmup_factor 85 | elif self.warmup_method == "linear": 86 | alpha = float(self.last_epoch) / self.warmup_iters 87 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 88 | else: 89 | raise ValueError( 90 | "Only 'constant' or 'linear' warmup_method accepted" 91 | "got {}".format(self.warmup_method) 92 | ) 93 | return [ 94 | base_lr 95 | * warmup_factor 96 | for base_lr in self.base_lrs 97 | ] 98 | else: 99 | return [ 100 | base_lr - (base_lr - self.final_lr) * float(self.last_epoch - self.warmup_iters) / ( 101 | self.final_iters - self.warmup_iters) 102 | for base_lr in self.base_lrs 103 | ] 104 | 105 | 106 | class CosineAnnealingExtendLR(torch.optim.lr_scheduler._LRScheduler): 107 | r"""Set the learning rate of each parameter group using a cosine annealing 108 | schedule, where :math:`\eta_{max}` is set to the initial lr and 109 | :math:`T_{cur}` is the number of epochs since the last restart in SGDR: 110 | 111 | .. math:: 112 | \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})(1 + 113 | \cos(\frac{T_{cur}}{T_{max}}\pi)) 114 | 115 | When last_epoch=-1, sets initial lr as lr. 116 | 117 | It has been proposed in 118 | `SGDR: Stochastic Gradient Descent with Warm Restarts`_. Note that this only 119 | implements the cosine annealing part of SGDR, and not the restarts. 120 | 121 | Args: 122 | optimizer (Optimizer): Wrapped optimizer. 123 | T_max (int): Maximum number of iterations. 124 | eta_min (float): Minimum learning rate. Default: 0. 125 | last_epoch (int): The index of last epoch. Default: -1. 126 | 127 | .. _SGDR\: Stochastic Gradient Descent with Warm Restarts: 128 | https://arxiv.org/abs/1608.03983 129 | """ 130 | 131 | def __init__(self, optimizer, T_cosine_max, eta_min=0, last_epoch=-1): 132 | self.eta_min = eta_min 133 | self.T_cosine_max = T_cosine_max 134 | super(CosineAnnealingExtendLR, self).__init__(optimizer, last_epoch) 135 | 136 | def get_lr(self): 137 | if self.last_epoch <= self.T_cosine_max: 138 | return [self.eta_min + (base_lr - self.eta_min) * 139 | (1 + math.cos(math.pi * self.last_epoch / self.T_cosine_max)) / 2 140 | for base_lr in self.base_lrs] 141 | else: 142 | return [self.eta_min 143 | for _ in self.base_lrs] 144 | 145 | 146 | # LR scheduler should work according the number of iterations 147 | def get_lr_scheduler(cfg, optimizer): 148 | it_ep = num_iters_per_epoch(cfg) 149 | if cfg.linear_final_lr is None and cfg.cosine_minimum is None: 150 | lr_iter_boundaries = [it_ep * ep for ep in cfg.lr_epoch_boundaries] 151 | return WarmupMultiStepLR( 152 | optimizer, lr_iter_boundaries, cfg.lr_decay_factor, 153 | warmup_factor=cfg.warmup_factor, 154 | warmup_iters=cfg.warmup_epochs * it_ep, 155 | warmup_method=cfg.warmup_method, ) 156 | elif cfg.cosine_minimum is None: 157 | return WarmupLinearLR(optimizer, final_lr=cfg.linear_final_lr, 158 | final_iters=cfg.max_epochs * it_ep, 159 | warmup_factor=cfg.warmup_factor, 160 | warmup_iters=cfg.warmup_epochs * it_ep, 161 | warmup_method=cfg.warmup_method,) 162 | else: 163 | assert cfg.warmup_epochs == 0 164 | assert cfg.linear_final_lr is None 165 | assert cfg.lr_decay_factor is None 166 | if cfg.lr_epoch_boundaries is None: 167 | print('use cosine decay, the minimum is ', cfg.cosine_minimum) 168 | return CosineAnnealingLR(optimizer=optimizer, T_max=cfg.max_epochs * it_ep, eta_min=cfg.cosine_minimum) 169 | else: 170 | assert len(cfg.lr_epoch_boundaries) == 1 171 | assert cfg.cosine_minimum > 0 172 | print('use extended cosine decay, the minimum is ', cfg.cosine_minimum) 173 | return CosineAnnealingExtendLR(optimizer=optimizer, T_cosine_max=cfg.lr_epoch_boundaries[0] * it_ep, 174 | eta_min=cfg.cosine_minimum) 175 | 176 | 177 | -------------------------------------------------------------------------------- /acnet/acb.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.init as init 3 | import torch 4 | 5 | class ACBlock(nn.Module): 6 | 7 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', deploy=False, 8 | use_affine=True, reduce_gamma=False, gamma_init=None ): 9 | super(ACBlock, self).__init__() 10 | self.deploy = deploy 11 | if deploy: 12 | self.fused_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(kernel_size,kernel_size), stride=stride, 13 | padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode) 14 | else: 15 | self.square_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, 16 | kernel_size=(kernel_size, kernel_size), stride=stride, 17 | padding=padding, dilation=dilation, groups=groups, bias=False, 18 | padding_mode=padding_mode) 19 | self.square_bn = nn.BatchNorm2d(num_features=out_channels, affine=use_affine) 20 | 21 | 22 | if padding - kernel_size // 2 >= 0: 23 | # Common use case. E.g., k=3, p=1 or k=5, p=2 24 | self.crop = 0 25 | # Compared to the KxK layer, the padding of the 1xK layer and Kx1 layer should be adjust to align the sliding windows (Fig 2 in the paper) 26 | hor_padding = [padding - kernel_size // 2, padding] 27 | ver_padding = [padding, padding - kernel_size // 2] 28 | else: 29 | # A negative "padding" (padding - kernel_size//2 < 0, which is not a common use case) is cropping. 30 | # Since nn.Conv2d does not support negative padding, we implement it manually 31 | self.crop = kernel_size // 2 - padding 32 | hor_padding = [0, padding] 33 | ver_padding = [padding, 0] 34 | 35 | self.ver_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(kernel_size, 1), 36 | stride=stride, 37 | padding=ver_padding, dilation=dilation, groups=groups, bias=False, 38 | padding_mode=padding_mode) 39 | 40 | self.hor_conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, kernel_size), 41 | stride=stride, 42 | padding=hor_padding, dilation=dilation, groups=groups, bias=False, 43 | padding_mode=padding_mode) 44 | self.ver_bn = nn.BatchNorm2d(num_features=out_channels, affine=use_affine) 45 | self.hor_bn = nn.BatchNorm2d(num_features=out_channels, affine=use_affine) 46 | 47 | if reduce_gamma: 48 | self.init_gamma(1.0 / 3) 49 | 50 | if gamma_init is not None: 51 | assert not reduce_gamma 52 | self.init_gamma(gamma_init) 53 | 54 | 55 | def _fuse_bn_tensor(self, conv, bn): 56 | std = (bn.running_var + bn.eps).sqrt() 57 | t = (bn.weight / std).reshape(-1, 1, 1, 1) 58 | return conv.weight * t, bn.bias - bn.running_mean * bn.weight / std 59 | 60 | def _add_to_square_kernel(self, square_kernel, asym_kernel): 61 | asym_h = asym_kernel.size(2) 62 | asym_w = asym_kernel.size(3) 63 | square_h = square_kernel.size(2) 64 | square_w = square_kernel.size(3) 65 | square_kernel[:, :, square_h // 2 - asym_h // 2: square_h // 2 - asym_h // 2 + asym_h, 66 | square_w // 2 - asym_w // 2: square_w // 2 - asym_w // 2 + asym_w] += asym_kernel 67 | 68 | def get_equivalent_kernel_bias(self): 69 | hor_k, hor_b = self._fuse_bn_tensor(self.hor_conv, self.hor_bn) 70 | ver_k, ver_b = self._fuse_bn_tensor(self.ver_conv, self.ver_bn) 71 | square_k, square_b = self._fuse_bn_tensor(self.square_conv, self.square_bn) 72 | self._add_to_square_kernel(square_k, hor_k) 73 | self._add_to_square_kernel(square_k, ver_k) 74 | return square_k, hor_b + ver_b + square_b 75 | 76 | 77 | def switch_to_deploy(self): 78 | deploy_k, deploy_b = self.get_equivalent_kernel_bias() 79 | self.deploy = True 80 | self.fused_conv = nn.Conv2d(in_channels=self.square_conv.in_channels, out_channels=self.square_conv.out_channels, 81 | kernel_size=self.square_conv.kernel_size, stride=self.square_conv.stride, 82 | padding=self.square_conv.padding, dilation=self.square_conv.dilation, groups=self.square_conv.groups, bias=True, 83 | padding_mode=self.square_conv.padding_mode) 84 | self.__delattr__('square_conv') 85 | self.__delattr__('square_bn') 86 | self.__delattr__('hor_conv') 87 | self.__delattr__('hor_bn') 88 | self.__delattr__('ver_conv') 89 | self.__delattr__('ver_bn') 90 | self.fused_conv.weight.data = deploy_k 91 | self.fused_conv.bias.data = deploy_b 92 | 93 | 94 | def init_gamma(self, gamma_value): 95 | init.constant_(self.square_bn.weight, gamma_value) 96 | init.constant_(self.ver_bn.weight, gamma_value) 97 | init.constant_(self.hor_bn.weight, gamma_value) 98 | print('init gamma of square, ver and hor as ', gamma_value) 99 | 100 | def single_init(self): 101 | init.constant_(self.square_bn.weight, 1.0) 102 | init.constant_(self.ver_bn.weight, 0.0) 103 | init.constant_(self.hor_bn.weight, 0.0) 104 | print('init gamma of square as 1, ver and hor as 0') 105 | 106 | def forward(self, input): 107 | if self.deploy: 108 | return self.fused_conv(input) 109 | else: 110 | square_outputs = self.square_conv(input) 111 | square_outputs = self.square_bn(square_outputs) 112 | if self.crop > 0: 113 | ver_input = input[:, :, :, self.crop:-self.crop] 114 | hor_input = input[:, :, self.crop:-self.crop, :] 115 | else: 116 | ver_input = input 117 | hor_input = input 118 | vertical_outputs = self.ver_conv(ver_input) 119 | vertical_outputs = self.ver_bn(vertical_outputs) 120 | horizontal_outputs = self.hor_conv(hor_input) 121 | horizontal_outputs = self.hor_bn(horizontal_outputs) 122 | result = square_outputs + vertical_outputs + horizontal_outputs 123 | return result 124 | 125 | if __name__ == '__main__': 126 | N = 1 127 | C = 2 128 | H = 62 129 | W = 62 130 | O = 8 131 | groups = 4 132 | 133 | x = torch.randn(N, C, H, W) 134 | print('input shape is ', x.size()) 135 | 136 | test_kernel_padding = [(3,1), (3,0), (5,1), (5,2), (5,3), (5,4), (5,6)] 137 | 138 | for k, p in test_kernel_padding: 139 | acb = ACBlock(C, O, kernel_size=k, padding=p, stride=1, deploy=False) 140 | acb.eval() 141 | for module in acb.modules(): 142 | if isinstance(module, nn.BatchNorm2d): 143 | nn.init.uniform_(module.running_mean, 0, 0.1) 144 | nn.init.uniform_(module.running_var, 0, 0.2) 145 | nn.init.uniform_(module.weight, 0, 0.3) 146 | nn.init.uniform_(module.bias, 0, 0.4) 147 | out = acb(x) 148 | acb.switch_to_deploy() 149 | deployout = acb(x) 150 | print('difference between the outputs of the training-time and converted ACB is') 151 | print(((deployout - out) ** 2).sum()) 152 | 153 | -------------------------------------------------------------------------------- /ndp_test.py: -------------------------------------------------------------------------------- 1 | from base_config import BaseConfigByEpoch 2 | from model_map import get_model_fn 3 | from data.data_factory import create_dataset, load_cuda_data 4 | from torch.nn.modules.loss import CrossEntropyLoss 5 | from utils.engine import Engine 6 | from utils.misc import torch_accuracy, AvgMeter 7 | from collections import OrderedDict 8 | import torch 9 | from tqdm import tqdm 10 | import time 11 | from builder import ConvBuilder 12 | from utils.misc import log_important, extract_deps_from_weights_file 13 | from base_config import get_baseconfig_for_test 14 | from data.data_factory import num_val_examples 15 | 16 | SPEED_TEST_SAMPLE_IGNORE_RATIO = 0.5 17 | 18 | TEST_BATCH_SIZE = 100 19 | OVERALL_LOG_FILE = 'overall_test_log.txt' 20 | DETAIL_LOG_FILE = 'detail_test_log.txt' 21 | 22 | def run_eval(val_data, max_iters, net, criterion, discrip_str, dataset_name): 23 | pbar = tqdm(range(max_iters)) 24 | top1 = AvgMeter() 25 | top5 = AvgMeter() 26 | losses = AvgMeter() 27 | pbar.set_description('Validation' + discrip_str) 28 | total_net_time = 0 29 | with torch.no_grad(): 30 | for iter_idx, i in enumerate(pbar): 31 | start_time = time.time() 32 | 33 | data, label = load_cuda_data(val_data, dataset_name=dataset_name) 34 | data_time = time.time() - start_time 35 | 36 | net_time_start = time.time() 37 | pred = net(data) 38 | net_time_end = time.time() 39 | 40 | if iter_idx >= SPEED_TEST_SAMPLE_IGNORE_RATIO * max_iters: 41 | total_net_time += net_time_end - net_time_start 42 | 43 | loss = criterion(pred, label) 44 | acc, acc5 = torch_accuracy(pred, label, (1, 5)) 45 | 46 | top1.update(acc.item()) 47 | top5.update(acc5.item()) 48 | losses.update(loss.item()) 49 | pbar_dic = OrderedDict() 50 | pbar_dic['data-time'] = '{:.2f}'.format(data_time) 51 | pbar_dic['top1'] = '{:.5f}'.format(top1.mean) 52 | pbar_dic['top5'] = '{:.5f}'.format(top5.mean) 53 | pbar_dic['loss'] = '{:.5f}'.format(losses.mean) 54 | pbar.set_postfix(pbar_dic) 55 | 56 | metric_dic = {'top1':torch.tensor(top1.mean), 57 | 'top5':torch.tensor(top5.mean), 58 | 'loss':torch.tensor(losses.mean)} 59 | # reduced_metirc_dic = reduce_loss_dict(metric_dic) 60 | reduced_metirc_dic = metric_dic #TODO note this 61 | return reduced_metirc_dic, total_net_time 62 | 63 | 64 | def val_during_train(epoch, iteration, tb_tags, 65 | engine, model, val_data, criterion, descrip_str, 66 | dataset_name, test_batch_size, tb_writer): 67 | model.eval() 68 | num_examples = num_val_examples(dataset_name) 69 | assert num_examples % test_batch_size == 0 70 | val_iters = num_examples // test_batch_size 71 | eval_dict, total_net_time = run_eval(val_data, val_iters, model, criterion, descrip_str, 72 | dataset_name=dataset_name) 73 | val_top1_value = eval_dict['top1'].item() 74 | val_top5_value = eval_dict['top5'].item() 75 | val_loss_value = eval_dict['loss'].item() 76 | for tag, value in zip(tb_tags, [val_top1_value, val_top5_value, val_loss_value]): 77 | tb_writer.add_scalars(tag, {'Val': value}, iteration) 78 | engine.log( 79 | 'val at epoch {}, top1={:.5f}, top5={:.5f}, loss={:.6f}'.format(epoch, val_top1_value, 80 | val_top5_value, 81 | val_loss_value)) 82 | model.train() 83 | 84 | 85 | def get_criterion(cfg): 86 | return CrossEntropyLoss() #TODO note this 87 | 88 | 89 | def ding_test(cfg:BaseConfigByEpoch, net=None, val_dataloader=None, show_variables=False, convbuilder=None, 90 | init_hdf5=None, extra_msg=None, weights_dict=None): 91 | 92 | with Engine(local_rank=0, for_val_only=True) as engine: 93 | 94 | engine.setup_log( 95 | name='test', log_dir='./', file_name=DETAIL_LOG_FILE) 96 | 97 | if convbuilder is None: 98 | convbuilder = ConvBuilder(base_config=cfg) 99 | 100 | if net is None: 101 | net_fn = get_model_fn(cfg.dataset_name, cfg.network_type) 102 | model = net_fn(cfg, convbuilder).cuda() 103 | else: 104 | model = net.cuda() 105 | 106 | if val_dataloader is None: 107 | val_data = create_dataset(cfg.dataset_name, cfg.dataset_subset, 108 | global_batch_size=cfg.global_batch_size, distributed=False) 109 | num_examples = num_val_examples(cfg.dataset_name) 110 | assert num_examples % cfg.global_batch_size == 0 111 | val_iters = num_val_examples(cfg.dataset_name) // cfg.global_batch_size 112 | print('batchsize={}, {} iters'.format(cfg.global_batch_size, val_iters)) 113 | 114 | criterion = get_criterion(cfg).cuda() 115 | 116 | engine.register_state( 117 | scheduler=None, model=model, optimizer=None) 118 | 119 | if show_variables: 120 | engine.show_variables() 121 | 122 | assert not engine.distributed 123 | 124 | if weights_dict is not None: 125 | engine.load_from_weights_dict(weights_dict) 126 | else: 127 | if cfg.init_weights: 128 | engine.load_checkpoint(cfg.init_weights) 129 | if init_hdf5: 130 | engine.load_hdf5(init_hdf5) 131 | 132 | # engine.save_by_order('smi2_by_order.hdf5') 133 | # engine.load_by_order('smi2_by_order.hdf5') 134 | # engine.save_hdf5('model_files/stami2_lrs4Z.hdf5') 135 | 136 | model.eval() 137 | eval_dict, total_net_time = run_eval(val_data, val_iters, model, criterion, 'TEST', dataset_name=cfg.dataset_name) 138 | val_top1_value = eval_dict['top1'].item() 139 | val_top5_value = eval_dict['top5'].item() 140 | val_loss_value = eval_dict['loss'].item() 141 | 142 | msg = '{},{},{},top1={:.5f},top5={:.5f},loss={:.7f},total_net_time={}'.format(cfg.network_type, init_hdf5 or cfg.init_weights, cfg.dataset_subset, 143 | val_top1_value, val_top5_value, val_loss_value, total_net_time) 144 | if extra_msg is not None: 145 | msg += ', ' + extra_msg 146 | log_important(msg, OVERALL_LOG_FILE) 147 | return eval_dict 148 | 149 | 150 | def general_test(network_type, weights, builder=None, net=None, dataset_name=None, weights_dict=None, 151 | batch_size=None): 152 | if weights is None or weights == 'None': 153 | init_weights = None 154 | init_hdf5 = None 155 | elif weights.endswith('.hdf5'): 156 | init_weights = None 157 | init_hdf5 = weights 158 | else: 159 | init_weights = weights 160 | init_hdf5 = None 161 | 162 | if init_hdf5 is not None: 163 | deps = extract_deps_from_weights_file(init_hdf5) 164 | else: 165 | deps = None 166 | 167 | if batch_size is None: 168 | batch_size = TEST_BATCH_SIZE 169 | test_config = get_baseconfig_for_test(network_type=network_type, dataset_subset='val', global_batch_size=batch_size, 170 | init_weights=init_weights, deps=deps, dataset_name=dataset_name) 171 | return ding_test(cfg=test_config, net=net, show_variables=True, init_hdf5=init_hdf5, convbuilder=builder, weights_dict=weights_dict) 172 | 173 | 174 | if __name__ == '__main__': 175 | import sys 176 | builder = None 177 | if 'deploy' in sys.argv[2]: 178 | from nobn_builder import NoBNBuilder 179 | builder = NoBNBuilder(base_config=None) 180 | general_test(network_type=sys.argv[1], weights=sys.argv[2], builder=builder) -------------------------------------------------------------------------------- /deprecated/base_model/resnet.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ResNet in PyTorch.absFor Pre-activation ResNet, see 'preact_resnet.py'. 3 | Reference: 4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 5 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 6 | 7 | Note: cifar_resnet18 constructs the same model with that from 8 | https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py 9 | ''' 10 | 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | from builder import ConvBuilder 14 | 15 | class BasicBlock(nn.Module): 16 | 17 | expansion = 1 18 | 19 | def __init__(self, builder:ConvBuilder, in_planes, planes, stride=1): 20 | super(BasicBlock, self).__init__() 21 | self.conv1 = builder.Conv2dBNReLU(in_channels=in_planes, out_channels=planes, kernel_size=3, stride=stride, padding=1) 22 | self.conv2 = builder.Conv2dBN(in_channels=planes, out_channels=self.expansion * planes, kernel_size=3, stride=1, padding=1) 23 | 24 | if stride != 1 or in_planes != self.expansion * planes: 25 | self.shortcut = builder.Conv2dBN(in_channels=in_planes, out_channels=self.expansion * planes, kernel_size=1, stride=stride) 26 | else: 27 | self.shortcut = builder.ResIdentity(num_channels=in_planes) 28 | 29 | 30 | 31 | def forward(self, x): 32 | out = self.conv1(x) 33 | out = self.conv2(out) 34 | out += self.shortcut(x) 35 | out = F.relu(out) 36 | return out 37 | 38 | 39 | class Bottleneck(nn.Module): 40 | expansion = 4 41 | 42 | def __init__(self, builder:ConvBuilder, in_planes, planes, stride=1): 43 | super(Bottleneck, self).__init__() 44 | self.bd = builder 45 | 46 | self.conv1 = builder.Conv2dBNReLU(in_planes, planes, kernel_size=1) 47 | self.conv2 = builder.Conv2dBNReLU(planes, planes, kernel_size=3, stride=stride, padding=1) 48 | self.conv3 = builder.Conv2dBN(planes, self.expansion*planes, kernel_size=1) 49 | 50 | 51 | if stride != 1 or in_planes != self.expansion*planes: 52 | self.shortcut = builder.Conv2dBN(in_planes, self.expansion*planes, kernel_size=1, stride=stride) 53 | else: 54 | self.shortcut = builder.ResIdentity(num_channels=in_planes) 55 | 56 | 57 | def forward(self, x): 58 | out = self.conv1(x) 59 | out = self.conv2(out) 60 | out = self.conv3(out) 61 | out += self.shortcut(x) 62 | out = F.relu(out) 63 | return out 64 | 65 | 66 | 67 | class RCBlock(nn.Module): 68 | 69 | def __init__(self, in_channels, out_channels, stride=1, builder=None): 70 | super(RCBlock, self).__init__() 71 | 72 | self.conv1 = builder.Conv2dBNReLU(in_channels, out_channels, kernel_size=3, stride=stride, padding=1) 73 | self.conv2 = builder.Conv2dBN(out_channels, out_channels, kernel_size=3, stride=1, padding=1) 74 | 75 | if stride != 1: 76 | self.shortcut = builder.Conv2dBN(in_channels, out_channels, kernel_size=1, stride=stride) 77 | else: 78 | self.shortcut = builder.ResIdentity(num_channels=out_channels) 79 | self.relu = builder.ReLU() 80 | 81 | 82 | def forward(self, x): 83 | out = self.conv1(x) 84 | out = self.conv2(out) 85 | out += self.shortcut(x) 86 | out = self.relu(out) 87 | return out 88 | 89 | 90 | class RCNet(nn.Module): 91 | 92 | def __init__(self, block_counts, num_classes, builder:ConvBuilder): 93 | super(RCNet, self).__init__() 94 | self.bd = builder 95 | 96 | self.conv1 = self.bd.Conv2dBNReLU(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1) 97 | self.stage1 = self._build_stage(stage_in_channels=16, out_channels=16, num_blocks=block_counts[0], stride=1) 98 | self.stage2 = self._build_stage(stage_in_channels=16, out_channels=32, num_blocks=block_counts[1], stride=2) 99 | self.stage3 = self._build_stage(stage_in_channels=32, out_channels=64, num_blocks=block_counts[2], stride=2) 100 | self.linear = self.bd.Linear(in_features=64, out_features=num_classes) 101 | 102 | 103 | 104 | def _build_stage(self, stage_in_channels, out_channels, num_blocks, stride): 105 | strides = [stride] + [1] * (num_blocks - 1) 106 | in_channel_list = [stage_in_channels] + [out_channels] * (num_blocks - 1) 107 | layers = [] 108 | for block_stride, block_in_channels in zip(strides, in_channel_list): 109 | layers.append(RCBlock(in_channels=block_in_channels, out_channels=out_channels, stride=block_stride, builder=self.bd)) 110 | return nn.Sequential(*layers) 111 | 112 | def forward(self, x): 113 | out = self.conv1(x) 114 | out = self.stage1(out) 115 | out = self.stage2(out) 116 | out = self.stage3(out) 117 | out = self.bd.avg_pool2d(in_features=out, kernel_size=8, stride=1, padding=0) 118 | out = self.bd.flatten(out) 119 | out = self.linear(out) 120 | return out 121 | 122 | 123 | def create_RC56(cfg, builder): 124 | return RCNet(block_counts=[9,9,9], num_classes=10, builder=builder) 125 | 126 | def create_RC110(cfg, builder): 127 | return RCNet(block_counts=[18,18,18], num_classes=10, builder=builder) 128 | 129 | def create_RC164(cfg, builder): 130 | return RCNet(block_counts=[27,27,27], num_classes=10, builder=builder) 131 | 132 | def create_RH56(cfg, builder): 133 | return RCNet(block_counts=[9,9,9], num_classes=100, builder=builder) 134 | 135 | def create_RH110(cfg, builder): 136 | return RCNet(block_counts=[18,18,18], num_classes=100, builder=builder) 137 | 138 | def create_RH164(cfg, builder): 139 | return RCNet(block_counts=[27,27,27], num_classes=100, builder=builder) 140 | 141 | 142 | 143 | 144 | class ResNet(nn.Module): 145 | def __init__(self, builder:ConvBuilder, block, num_blocks, num_classes=10): 146 | super(ResNet, self).__init__() 147 | self.bd = builder 148 | self.in_planes = 64 149 | self.conv1 = builder.Conv2dBNReLU(3, 64, kernel_size=7, stride=2, padding=3) 150 | self.stage1 = self._make_stage(block, 64, num_blocks[0], stride=1) 151 | self.stage2 = self._make_stage(block, 128, num_blocks[1], stride=2) 152 | self.stage3 = self._make_stage(block, 256, num_blocks[2], stride=2) 153 | self.stage4 = self._make_stage(block, 512, num_blocks[3], stride=2) 154 | self.linear = self.bd.Linear(512*block.expansion, num_classes) 155 | 156 | def _make_stage(self, block, planes, num_blocks, stride): 157 | strides = [stride] + [1]*(num_blocks-1) 158 | blocks = [] 159 | for stride in strides: 160 | blocks.append(block(builder=self.bd, in_planes=self.in_planes, planes=planes, stride=stride)) 161 | self.in_planes = planes * block.expansion 162 | return nn.Sequential(*blocks) 163 | 164 | def forward(self, x): 165 | out = self.conv1(x) 166 | out = self.bd.max_pool2d(out, kernel_size=3, stride=2, padding=1) 167 | out = self.stage1(out) 168 | out = self.stage2(out) 169 | out = self.stage3(out) 170 | out = self.stage4(out) 171 | out = self.bd.avg_pool2d(out, 7, 1, 0) 172 | out = self.bd.flatten(out) 173 | out = self.linear(out) 174 | return out 175 | 176 | def create_ResNet18(cfg, builder): 177 | return ResNet(builder, BasicBlock, [2,2,2,2], num_classes=1000) 178 | 179 | def create_ResNet34(cfg, builder): 180 | return ResNet(builder, BasicBlock, [3,4,6,3], num_classes=1000) 181 | 182 | def create_ResNet50(cfg, builder): 183 | return ResNet(builder, Bottleneck, [3,4,6,3], num_classes=1000) 184 | 185 | def create_ResNet101(cfg, builder): 186 | return ResNet(builder, Bottleneck, [3,4,23,3], num_classes=1000) 187 | 188 | def create_ResNet152(cfg, builder): 189 | return ResNet(builder, Bottleneck, [3,8,36,3], num_classes=1000) 190 | 191 | -------------------------------------------------------------------------------- /acnet/acnet_test.py: -------------------------------------------------------------------------------- 1 | from base_config import BaseConfigByEpoch 2 | from model_map import get_model_fn 3 | from data.data_factory import create_dataset, load_cuda_data 4 | from torch.nn.modules.loss import CrossEntropyLoss 5 | from utils.engine import Engine 6 | from utils.misc import torch_accuracy, AvgMeter 7 | from collections import OrderedDict 8 | import torch 9 | from tqdm import tqdm 10 | import time 11 | from builder import ConvBuilder 12 | from utils.misc import log_important, extract_deps_from_weights_file 13 | from base_config import get_baseconfig_for_test 14 | from data.data_factory import num_val_examples 15 | 16 | SPEED_TEST_SAMPLE_IGNORE_RATIO = 0.5 17 | 18 | TEST_BATCH_SIZE = 100 19 | OVERALL_LOG_FILE = 'overall_test_log.txt' 20 | DETAIL_LOG_FILE = 'detail_test_log.txt' 21 | 22 | def run_eval(val_data, max_iters, net, criterion, discrip_str, dataset_name): 23 | pbar = tqdm(range(max_iters)) 24 | top1 = AvgMeter() 25 | top5 = AvgMeter() 26 | losses = AvgMeter() 27 | pbar.set_description('Validation' + discrip_str) 28 | total_net_time = 0 29 | with torch.no_grad(): 30 | for iter_idx, i in enumerate(pbar): 31 | start_time = time.time() 32 | 33 | data, label = load_cuda_data(val_data, dataset_name=dataset_name) 34 | data_time = time.time() - start_time 35 | 36 | net_time_start = time.time() 37 | pred = net(data) 38 | net_time_end = time.time() 39 | 40 | if iter_idx >= SPEED_TEST_SAMPLE_IGNORE_RATIO * max_iters: 41 | total_net_time += net_time_end - net_time_start 42 | 43 | loss = criterion(pred, label) 44 | acc, acc5 = torch_accuracy(pred, label, (1, 5)) 45 | 46 | top1.update(acc.item()) 47 | top5.update(acc5.item()) 48 | losses.update(loss.item()) 49 | pbar_dic = OrderedDict() 50 | pbar_dic['data-time'] = '{:.2f}'.format(data_time) 51 | pbar_dic['top1'] = '{:.5f}'.format(top1.mean) 52 | pbar_dic['top5'] = '{:.5f}'.format(top5.mean) 53 | pbar_dic['loss'] = '{:.5f}'.format(losses.mean) 54 | pbar.set_postfix(pbar_dic) 55 | 56 | metric_dic = {'top1':torch.tensor(top1.mean), 57 | 'top5':torch.tensor(top5.mean), 58 | 'loss':torch.tensor(losses.mean)} 59 | # reduced_metirc_dic = reduce_loss_dict(metric_dic) 60 | reduced_metirc_dic = metric_dic #TODO note this 61 | return reduced_metirc_dic, total_net_time 62 | 63 | 64 | def val_during_train(epoch, iteration, tb_tags, 65 | engine, model, val_data, criterion, descrip_str, 66 | dataset_name, test_batch_size, tb_writer): 67 | model.eval() 68 | num_examples = num_val_examples(dataset_name) 69 | assert num_examples % test_batch_size == 0 70 | val_iters = num_examples // test_batch_size 71 | eval_dict, total_net_time = run_eval(val_data, val_iters, model, criterion, descrip_str, 72 | dataset_name=dataset_name) 73 | val_top1_value = eval_dict['top1'].item() 74 | val_top5_value = eval_dict['top5'].item() 75 | val_loss_value = eval_dict['loss'].item() 76 | for tag, value in zip(tb_tags, [val_top1_value, val_top5_value, val_loss_value]): 77 | tb_writer.add_scalars(tag, {'Val': value}, iteration) 78 | engine.log( 79 | 'val at epoch {}, top1={:.5f}, top5={:.5f}, loss={:.6f}'.format(epoch, val_top1_value, 80 | val_top5_value, 81 | val_loss_value)) 82 | model.train() 83 | 84 | 85 | def get_criterion(cfg): 86 | return CrossEntropyLoss() #TODO note this 87 | 88 | 89 | def ding_test(cfg:BaseConfigByEpoch, net=None, val_dataloader=None, show_variables=False, convbuilder=None, 90 | init_hdf5=None, extra_msg=None, weights_dict=None): 91 | 92 | with Engine(local_rank=0, for_val_only=True) as engine: 93 | 94 | engine.setup_log( 95 | name='test', log_dir='./', file_name=DETAIL_LOG_FILE) 96 | 97 | if convbuilder is None: 98 | convbuilder = ConvBuilder(base_config=cfg) 99 | 100 | if net is None: 101 | net_fn = get_model_fn(cfg.dataset_name, cfg.network_type) 102 | model = net_fn(cfg, convbuilder).cuda() 103 | else: 104 | model = net.cuda() 105 | 106 | if val_dataloader is None: 107 | val_data = create_dataset(cfg.dataset_name, cfg.dataset_subset, 108 | global_batch_size=cfg.global_batch_size, distributed=False) 109 | num_examples = num_val_examples(cfg.dataset_name) 110 | assert num_examples % cfg.global_batch_size == 0 111 | val_iters = num_val_examples(cfg.dataset_name) // cfg.global_batch_size 112 | print('batchsize={}, {} iters'.format(cfg.global_batch_size, val_iters)) 113 | 114 | criterion = get_criterion(cfg).cuda() 115 | 116 | engine.register_state( 117 | scheduler=None, model=model, optimizer=None) 118 | 119 | if show_variables: 120 | engine.show_variables() 121 | 122 | assert not engine.distributed 123 | 124 | if weights_dict is not None: 125 | engine.load_from_weights_dict(weights_dict) 126 | else: 127 | if cfg.init_weights: 128 | engine.load_checkpoint(cfg.init_weights) 129 | if init_hdf5: 130 | engine.load_hdf5(init_hdf5) 131 | 132 | # engine.save_by_order('smi2_by_order.hdf5') 133 | # engine.load_by_order('smi2_by_order.hdf5') 134 | # engine.save_hdf5('model_files/stami2_lrs4Z.hdf5') 135 | 136 | model.eval() 137 | eval_dict, total_net_time = run_eval(val_data, val_iters, model, criterion, 'TEST', dataset_name=cfg.dataset_name) 138 | val_top1_value = eval_dict['top1'].item() 139 | val_top5_value = eval_dict['top5'].item() 140 | val_loss_value = eval_dict['loss'].item() 141 | 142 | msg = '{},{},{},top1={:.5f},top5={:.5f},loss={:.7f},total_net_time={}'.format(cfg.network_type, init_hdf5 or cfg.init_weights, cfg.dataset_subset, 143 | val_top1_value, val_top5_value, val_loss_value, total_net_time) 144 | if extra_msg is not None: 145 | msg += ', ' + extra_msg 146 | log_important(msg, OVERALL_LOG_FILE) 147 | return eval_dict 148 | 149 | 150 | def general_test(network_type, weights, builder=None, net=None, dataset_name=None, weights_dict=None, 151 | batch_size=None): 152 | if weights is None or weights == 'None': 153 | init_weights = None 154 | init_hdf5 = None 155 | elif weights.endswith('.hdf5'): 156 | init_weights = None 157 | init_hdf5 = weights 158 | else: 159 | init_weights = weights 160 | init_hdf5 = None 161 | 162 | if init_hdf5 is not None: 163 | deps = extract_deps_from_weights_file(init_hdf5) 164 | else: 165 | deps = None 166 | 167 | if deps is None and ('wrnc16' in network_type or 'wrnh16' in network_type): 168 | from constants import wrn_origin_deps_flattened 169 | deps = wrn_origin_deps_flattened(2, 8) 170 | 171 | 172 | if batch_size is None: 173 | batch_size = TEST_BATCH_SIZE 174 | test_config = get_baseconfig_for_test(network_type=network_type, dataset_subset='val', global_batch_size=batch_size, 175 | init_weights=init_weights, deps=deps, dataset_name=dataset_name) 176 | return ding_test(cfg=test_config, net=net, show_variables=True, init_hdf5=init_hdf5, convbuilder=builder, weights_dict=weights_dict) 177 | 178 | 179 | if __name__ == '__main__': 180 | 181 | import sys 182 | network_type = 'resnet50' 183 | weights = sys.argv[1] 184 | dataset_name='imagenet_standard' 185 | from acnet.acnet_builder import ACNetBuilder 186 | builder = ACNetBuilder(base_config=None, deploy=False, gamma_init=1/3) 187 | 188 | general_test(network_type=network_type, weights=weights, builder=builder, 189 | dataset_name=dataset_name) --------------------------------------------------------------------------------