├── .gitignore ├── LICENSE ├── README.md ├── data_parallel ├── model.py └── train.py ├── dist_parallel ├── model.py └── train.py └── single_gpu ├── model.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | data/ 107 | logs/ 108 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Woongwon Lee 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pytorch-multigpu 2 | Multi GPU Training Code for Deep Learning with PyTorch. Train PyramidNet for CIFAR10 classification task. This code is for comparing several ways of multi-GPU training. 3 | 4 | # Requirement 5 | - Python 3 6 | - PyTorch 1.0.0+ 7 | - TorchVision 8 | - TensorboardX 9 | 10 | # Usage 11 | ### single gpu 12 | ``` 13 | cd single_gpu 14 | python train.py 15 | ``` 16 | 17 | ### DataParallel 18 | ``` 19 | cd data_parallel 20 | python train.py --gpu_devices 0 1 2 3 --batch_size 768 21 | ``` 22 | 23 | ### DistributedDataParallel 24 | ``` 25 | cd dist_parallel 26 | python train.py --gpu_device 0 1 2 3 --batch_size 768 27 | ``` 28 | 29 | # Performance 30 | ### single gpu 31 | - batch size: 240 32 | - batch time: 6s 33 | - training time: 22 min 34 | - gpu util: 99 % 35 | - gpu memory: 10 G 36 | 37 | ### DataParallel(4 k80) 38 | - batch size: 768 39 | - batch time: 5s 40 | - training time: 5 min 41 | - gpu util: 99 % 42 | - gpu memory: 10 G * 4 43 | -------------------------------------------------------------------------------- /data_parallel/model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | # code from https://github.com/KellerJordan/ResNet-PyTorch-CIFAR10/blob/master/model.py 10 | class IdentityPadding(nn.Module): 11 | def __init__(self, in_channels, out_channels, stride=1): 12 | super(IdentityPadding, self).__init__() 13 | 14 | if stride == 2: 15 | self.pooling = nn.AvgPool2d(kernel_size=2, stride=2, ceil_mode=True) 16 | else: 17 | self.pooling = None 18 | 19 | self.add_channels = out_channels - in_channels 20 | 21 | def forward(self, x): 22 | out = F.pad(x, (0, 0, 0, 0, 0, self.add_channels)) 23 | if self.pooling is not None: 24 | out = self.pooling(out) 25 | return out 26 | 27 | 28 | class ResidualBlock(nn.Module): 29 | def __init__(self, in_channels, out_channels, stride=1): 30 | super(ResidualBlock, self).__init__() 31 | self.bn1 = nn.BatchNorm2d(in_channels) 32 | self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, 33 | stride=stride, padding=1, bias=False) 34 | self.bn2 = nn.BatchNorm2d(out_channels) 35 | self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, 36 | stride=1, padding=1, bias=False) 37 | self.bn3 = nn.BatchNorm2d(out_channels) 38 | self.relu = nn.ReLU(inplace=True) 39 | 40 | self.down_sample = IdentityPadding(in_channels, out_channels, stride) 41 | 42 | self.stride = stride 43 | 44 | def forward(self, x): 45 | shortcut = self.down_sample(x) 46 | out = self.bn1(x) 47 | out = self.conv1(out) 48 | out = self.bn2(out) 49 | out = self.relu(out) 50 | out = self.conv2(out) 51 | out = self.bn3(out) 52 | 53 | out += shortcut 54 | return out 55 | 56 | 57 | class PyramidNet(nn.Module): 58 | def __init__(self, num_layers, alpha, block, num_classes=10): 59 | super(PyramidNet, self).__init__() 60 | self.in_channels = 16 61 | 62 | # num_layers = (110 - 2)/6 = 18 63 | self.num_layers = num_layers 64 | self.addrate = alpha / (3*self.num_layers*1.0) 65 | 66 | self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, 67 | stride=1, padding=1, bias=False) 68 | self.bn1 = nn.BatchNorm2d(16) 69 | 70 | # feature map size = 32x32 71 | self.layer1 = self.get_layers(block, stride=1) 72 | # feature map size = 16x16 73 | self.layer2 = self.get_layers(block, stride=2) 74 | # feature map size = 8x8 75 | self.layer3 = self.get_layers(block, stride=2) 76 | 77 | self.out_channels = int(round(self.out_channels)) 78 | self.bn_out= nn.BatchNorm2d(self.out_channels) 79 | self.relu_out = nn.ReLU(inplace=True) 80 | self.avgpool = nn.AvgPool2d(8, stride=1) 81 | self.fc_out = nn.Linear(self.out_channels, num_classes) 82 | 83 | for m in self.modules(): 84 | if isinstance(m, nn.Conv2d): 85 | nn.init.kaiming_normal_(m.weight, mode='fan_out', 86 | nonlinearity='relu') 87 | elif isinstance(m, nn.BatchNorm2d): 88 | nn.init.constant_(m.weight, 1) 89 | nn.init.constant_(m.bias, 0) 90 | 91 | def get_layers(self, block, stride): 92 | layers_list = [] 93 | for _ in range(self.num_layers - 1): 94 | self.out_channels = self.in_channels + self.addrate 95 | layers_list.append(block(int(round(self.in_channels)), 96 | int(round(self.out_channels)), 97 | stride)) 98 | self.in_channels = self.out_channels 99 | stride=1 100 | 101 | return nn.Sequential(*layers_list) 102 | 103 | def forward(self, x): 104 | x = self.conv1(x) 105 | x = self.bn1(x) 106 | 107 | x = self.layer1(x) 108 | x = self.layer2(x) 109 | x = self.layer3(x) 110 | 111 | x = self.bn_out(x) 112 | x = self.relu_out(x) 113 | x = self.avgpool(x) 114 | x = x.view(x.size(0), -1) 115 | x = self.fc_out(x) 116 | return x 117 | 118 | def save(self, path_to_checkpoints_dir, step): 119 | path_to_checkpoint = os.path.join(path_to_checkpoints_dir, 'model-{:s}-{:d}.pth'.format(time.strftime('%Y%m%d%H%M'), step)) 120 | torch.save(self.state_dict(), path_to_checkpoint) 121 | return path_to_checkpoint 122 | 123 | def load(self, path_to_checkpoint): 124 | self.load_state_dict(torch.load(path_to_checkpoint)) 125 | return self 126 | 127 | def pyramidnet(): 128 | block = ResidualBlock 129 | model = PyramidNet(num_layers=18, alpha=270, block=block) 130 | return model 131 | -------------------------------------------------------------------------------- /data_parallel/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import datetime 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | from torch.optim import lr_scheduler 9 | import torch.backends.cudnn as cudnn 10 | 11 | import torchvision 12 | import torchvision.transforms as transforms 13 | from torchvision.datasets import CIFAR10 14 | from torch.utils.data import DataLoader 15 | 16 | from model import pyramidnet 17 | import argparse 18 | from tensorboardX import SummaryWriter 19 | 20 | 21 | parser = argparse.ArgumentParser(description='cifar10 classification models') 22 | parser.add_argument('--lr', default=0.1, help='') 23 | parser.add_argument('--resume', default=None, help='') 24 | parser.add_argument('--batch_size', type=int, default=768, help='') 25 | parser.add_argument('--num_worker', type=int, default=4, help='') 26 | parser.add_argument("--gpu_devices", type=int, nargs='+', default=None, help="") 27 | args = parser.parse_args() 28 | 29 | gpu_devices = ','.join([str(id) for id in args.gpu_devices]) 30 | os.environ["CUDA_VISIBLE_DEVICES"] = gpu_devices 31 | 32 | 33 | def main(): 34 | best_acc = 0 35 | 36 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 37 | 38 | print('==> Preparing data..') 39 | transforms_train = transforms.Compose([ 40 | transforms.RandomCrop(32, padding=4), 41 | transforms.RandomHorizontalFlip(), 42 | transforms.ToTensor(), 43 | transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) 44 | 45 | dataset_train = CIFAR10(root='../data', train=True, download=True, 46 | transform=transforms_train) 47 | 48 | train_loader = DataLoader(dataset_train, batch_size=args.batch_size, 49 | shuffle=True, num_workers=args.num_worker) 50 | 51 | # there are 10 classes so the dataset name is cifar-10 52 | classes = ('plane', 'car', 'bird', 'cat', 'deer', 53 | 'dog', 'frog', 'horse', 'ship', 'truck') 54 | 55 | print('==> Making model..') 56 | 57 | net = pyramidnet() 58 | net = nn.DataParallel(net) 59 | net = net.to(device) 60 | num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) 61 | print('The number of parameters of model is', num_params) 62 | 63 | criterion = nn.CrossEntropyLoss() 64 | optimizer = optim.Adam(net.parameters(), lr=args.lr) 65 | # optimizer = optim.SGD(net.parameters(), lr=args.lr, 66 | # momentum=0.9, weight_decay=1e-4) 67 | 68 | train(net, criterion, optimizer, train_loader, device) 69 | 70 | 71 | def train(net, criterion, optimizer, train_loader, device): 72 | net.train() 73 | 74 | train_loss = 0 75 | correct = 0 76 | total = 0 77 | 78 | epoch_start = time.time() 79 | for batch_idx, (inputs, targets) in enumerate(train_loader): 80 | start = time.time() 81 | 82 | inputs = inputs.to(device) 83 | targets = targets.to(device) 84 | outputs = net(inputs) 85 | loss = criterion(outputs, targets) 86 | 87 | optimizer.zero_grad() 88 | loss.backward() 89 | optimizer.step() 90 | 91 | train_loss += loss.item() 92 | _, predicted = outputs.max(1) 93 | total += targets.size(0) 94 | correct += predicted.eq(targets).sum().item() 95 | 96 | acc = 100 * correct / total 97 | 98 | batch_time = time.time() - start 99 | 100 | if batch_idx % 20 == 0: 101 | print('Epoch: [{}/{}]| loss: {:.3f} | acc: {:.3f} | batch time: {:.3f}s '.format( 102 | batch_idx, len(train_loader), train_loss/(batch_idx+1), acc, batch_time)) 103 | 104 | elapse_time = time.time() - epoch_start 105 | elapse_time = datetime.timedelta(seconds=elapse_time) 106 | print("Training time {}".format(elapse_time)) 107 | 108 | 109 | if __name__=='__main__': 110 | main() -------------------------------------------------------------------------------- /dist_parallel/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | # code from https://github.com/KellerJordan/ResNet-PyTorch-CIFAR10/blob/master/model.py 7 | class IdentityPadding(nn.Module): 8 | def __init__(self, in_channels, out_channels, stride=1): 9 | super(IdentityPadding, self).__init__() 10 | 11 | if stride == 2: 12 | self.pooling = nn.AvgPool2d(kernel_size=2, stride=2, ceil_mode=True) 13 | else: 14 | self.pooling = None 15 | 16 | self.add_channels = out_channels - in_channels 17 | 18 | def forward(self, x): 19 | out = F.pad(x, (0, 0, 0, 0, 0, self.add_channels)) 20 | if self.pooling is not None: 21 | out = self.pooling(out) 22 | return out 23 | 24 | 25 | class ResidualBlock(nn.Module): 26 | def __init__(self, in_channels, out_channels, stride=1): 27 | super(ResidualBlock, self).__init__() 28 | self.bn1 = nn.BatchNorm2d(in_channels) 29 | self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, 30 | stride=stride, padding=1, bias=False) 31 | self.bn2 = nn.BatchNorm2d(out_channels) 32 | self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, 33 | stride=1, padding=1, bias=False) 34 | self.bn3 = nn.BatchNorm2d(out_channels) 35 | self.relu = nn.ReLU(inplace=True) 36 | 37 | self.down_sample = IdentityPadding(in_channels, out_channels, stride) 38 | 39 | self.stride = stride 40 | 41 | def forward(self, x): 42 | shortcut = self.down_sample(x) 43 | out = self.bn1(x) 44 | out = self.conv1(out) 45 | out = self.bn2(out) 46 | out = self.relu(out) 47 | out = self.conv2(out) 48 | out = self.bn3(out) 49 | 50 | out += shortcut 51 | return out 52 | 53 | 54 | class PyramidNet(nn.Module): 55 | def __init__(self, num_layers, alpha, block, num_classes=10): 56 | super(PyramidNet, self).__init__() 57 | self.in_channels = 16 58 | 59 | # num_layers = (110 - 2)/6 = 18 60 | self.num_layers = num_layers 61 | self.addrate = alpha / (3*self.num_layers*1.0) 62 | 63 | self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, 64 | stride=1, padding=1, bias=False) 65 | self.bn1 = nn.BatchNorm2d(16) 66 | 67 | # feature map size = 32x32 68 | self.layer1 = self.get_layers(block, stride=1) 69 | # feature map size = 16x16 70 | self.layer2 = self.get_layers(block, stride=2) 71 | # feature map size = 8x8 72 | self.layer3 = self.get_layers(block, stride=2) 73 | 74 | self.out_channels = int(round(self.out_channels)) 75 | self.bn_out= nn.BatchNorm2d(self.out_channels) 76 | self.relu_out = nn.ReLU(inplace=True) 77 | self.avgpool = nn.AvgPool2d(8, stride=1) 78 | self.fc_out = nn.Linear(self.out_channels, num_classes) 79 | 80 | for m in self.modules(): 81 | if isinstance(m, nn.Conv2d): 82 | nn.init.kaiming_normal_(m.weight, mode='fan_out', 83 | nonlinearity='relu') 84 | elif isinstance(m, nn.BatchNorm2d): 85 | nn.init.constant_(m.weight, 1) 86 | nn.init.constant_(m.bias, 0) 87 | 88 | def get_layers(self, block, stride): 89 | layers_list = [] 90 | for _ in range(self.num_layers - 1): 91 | self.out_channels = self.in_channels + self.addrate 92 | layers_list.append(block(int(round(self.in_channels)), 93 | int(round(self.out_channels)), 94 | stride)) 95 | self.in_channels = self.out_channels 96 | stride=1 97 | 98 | return nn.Sequential(*layers_list) 99 | 100 | def forward(self, x): 101 | x = self.conv1(x) 102 | x = self.bn1(x) 103 | 104 | x = self.layer1(x) 105 | x = self.layer2(x) 106 | x = self.layer3(x) 107 | 108 | x = self.bn_out(x) 109 | x = self.relu_out(x) 110 | x = self.avgpool(x) 111 | x = x.view(x.size(0), -1) 112 | x = self.fc_out(x) 113 | return x 114 | 115 | 116 | def pyramidnet(): 117 | block = ResidualBlock 118 | model = PyramidNet(num_layers=18, alpha=270, block=block) 119 | return model 120 | -------------------------------------------------------------------------------- /dist_parallel/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import datetime 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | from torch.optim import lr_scheduler 9 | import torch.backends.cudnn as cudnn 10 | 11 | import torchvision 12 | import torchvision.transforms as transforms 13 | from torchvision.datasets import CIFAR10 14 | from torch.utils.data import DataLoader 15 | 16 | import torch.distributed as dist 17 | import torch.multiprocessing as mp 18 | import torch.utils.data.distributed 19 | 20 | from model import pyramidnet 21 | import argparse 22 | from tensorboardX import SummaryWriter 23 | 24 | 25 | parser = argparse.ArgumentParser(description='cifar10 classification models') 26 | parser.add_argument('--lr', default=0.1, help='') 27 | parser.add_argument('--resume', default=None, help='') 28 | parser.add_argument('--batch_size', type=int, default=768, help='') 29 | parser.add_argument('--num_workers', type=int, default=4, help='') 30 | parser.add_argument("--gpu_devices", type=int, nargs='+', default=None, help="") 31 | 32 | parser.add_argument('--gpu', default=None, type=int, help='GPU id to use.') 33 | parser.add_argument('--dist-url', default='tcp://127.0.0.1:3456', type=str, help='') 34 | parser.add_argument('--dist-backend', default='nccl', type=str, help='') 35 | parser.add_argument('--rank', default=0, type=int, help='') 36 | parser.add_argument('--world_size', default=1, type=int, help='') 37 | parser.add_argument('--distributed', action='store_true', help='') 38 | args = parser.parse_args() 39 | 40 | gpu_devices = ','.join([str(id) for id in args.gpu_devices]) 41 | os.environ["CUDA_VISIBLE_DEVICES"] = gpu_devices 42 | 43 | 44 | def main(): 45 | args = parser.parse_args() 46 | 47 | ngpus_per_node = torch.cuda.device_count() 48 | 49 | args.world_size = ngpus_per_node * args.world_size 50 | mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) 51 | 52 | 53 | def main_worker(gpu, ngpus_per_node, args): 54 | args.gpu = gpu 55 | ngpus_per_node = torch.cuda.device_count() 56 | print("Use GPU: {} for training".format(args.gpu)) 57 | 58 | args.rank = args.rank * ngpus_per_node + gpu 59 | dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 60 | world_size=args.world_size, rank=args.rank) 61 | 62 | print('==> Making model..') 63 | net = pyramidnet() 64 | torch.cuda.set_device(args.gpu) 65 | net.cuda(args.gpu) 66 | args.batch_size = int(args.batch_size / ngpus_per_node) 67 | args.num_workers = int(args.num_workers / ngpus_per_node) 68 | net = torch.nn.parallel.DistributedDataParallel(net, device_ids=[args.gpu]) 69 | num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) 70 | print('The number of parameters of model is', num_params) 71 | 72 | print('==> Preparing data..') 73 | transforms_train = transforms.Compose([ 74 | transforms.RandomCrop(32, padding=4), 75 | transforms.RandomHorizontalFlip(), 76 | transforms.ToTensor(), 77 | transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) 78 | 79 | dataset_train = CIFAR10(root='../data', train=True, download=True, 80 | transform=transforms_train) 81 | train_sampler = torch.utils.data.distributed.DistributedSampler(dataset_train) 82 | train_loader = DataLoader(dataset_train, batch_size=args.batch_size, 83 | shuffle=(train_sampler is None), num_workers=args.num_workers, 84 | sampler=train_sampler) 85 | 86 | # there are 10 classes so the dataset name is cifar-10 87 | classes = ('plane', 'car', 'bird', 'cat', 'deer', 88 | 'dog', 'frog', 'horse', 'ship', 'truck') 89 | 90 | criterion = nn.CrossEntropyLoss() 91 | optimizer = optim.SGD(net.parameters(), lr=args.lr, 92 | momentum=0.9, weight_decay=1e-4) 93 | 94 | train(net, criterion, optimizer, train_loader, args.gpu) 95 | 96 | 97 | def train(net, criterion, optimizer, train_loader, device): 98 | net.train() 99 | 100 | train_loss = 0 101 | correct = 0 102 | total = 0 103 | 104 | epoch_start = time.time() 105 | for batch_idx, (inputs, targets) in enumerate(train_loader): 106 | start = time.time() 107 | 108 | inputs = inputs.cuda(device) 109 | targets = targets.cuda(device) 110 | outputs = net(inputs) 111 | loss = criterion(outputs, targets) 112 | 113 | optimizer.zero_grad() 114 | loss.backward() 115 | optimizer.step() 116 | 117 | train_loss += loss.item() 118 | _, predicted = outputs.max(1) 119 | total += targets.size(0) 120 | correct += predicted.eq(targets).sum().item() 121 | 122 | acc = 100 * correct / total 123 | 124 | batch_time = time.time() - start 125 | 126 | if batch_idx % 20 == 0: 127 | print('Epoch: [{}/{}]| loss: {:.3f} | acc: {:.3f} | batch time: {:.3f}s '.format( 128 | batch_idx, len(train_loader), train_loss/(batch_idx+1), acc, batch_time)) 129 | 130 | elapse_time = time.time() - epoch_start 131 | elapse_time = datetime.timedelta(seconds=elapse_time) 132 | print("Training time {}".format(elapse_time)) 133 | 134 | 135 | if __name__=='__main__': 136 | main() -------------------------------------------------------------------------------- /single_gpu/model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | # code from https://github.com/KellerJordan/ResNet-PyTorch-CIFAR10/blob/master/model.py 10 | class IdentityPadding(nn.Module): 11 | def __init__(self, in_channels, out_channels, stride=1): 12 | super(IdentityPadding, self).__init__() 13 | 14 | if stride == 2: 15 | self.pooling = nn.AvgPool2d(kernel_size=2, stride=2, ceil_mode=True) 16 | else: 17 | self.pooling = None 18 | 19 | self.add_channels = out_channels - in_channels 20 | 21 | def forward(self, x): 22 | out = F.pad(x, (0, 0, 0, 0, 0, self.add_channels)) 23 | if self.pooling is not None: 24 | out = self.pooling(out) 25 | return out 26 | 27 | 28 | class ResidualBlock(nn.Module): 29 | def __init__(self, in_channels, out_channels, stride=1): 30 | super(ResidualBlock, self).__init__() 31 | self.bn1 = nn.BatchNorm2d(in_channels) 32 | self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, 33 | stride=stride, padding=1, bias=False) 34 | self.bn2 = nn.BatchNorm2d(out_channels) 35 | self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, 36 | stride=1, padding=1, bias=False) 37 | self.bn3 = nn.BatchNorm2d(out_channels) 38 | self.relu = nn.ReLU(inplace=True) 39 | 40 | self.down_sample = IdentityPadding(in_channels, out_channels, stride) 41 | 42 | self.stride = stride 43 | 44 | def forward(self, x): 45 | shortcut = self.down_sample(x) 46 | out = self.bn1(x) 47 | out = self.conv1(out) 48 | out = self.bn2(out) 49 | out = self.relu(out) 50 | out = self.conv2(out) 51 | out = self.bn3(out) 52 | 53 | out += shortcut 54 | return out 55 | 56 | 57 | class PyramidNet(nn.Module): 58 | def __init__(self, num_layers, alpha, block, num_classes=10): 59 | super(PyramidNet, self).__init__() 60 | self.in_channels = 16 61 | 62 | # num_layers = (110 - 2)/6 = 18 63 | self.num_layers = num_layers 64 | self.addrate = alpha / (3*self.num_layers*1.0) 65 | 66 | self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, 67 | stride=1, padding=1, bias=False) 68 | self.bn1 = nn.BatchNorm2d(16) 69 | 70 | # feature map size = 32x32 71 | self.layer1 = self.get_layers(block, stride=1) 72 | # feature map size = 16x16 73 | self.layer2 = self.get_layers(block, stride=2) 74 | # feature map size = 8x8 75 | self.layer3 = self.get_layers(block, stride=2) 76 | 77 | self.out_channels = int(round(self.out_channels)) 78 | self.bn_out= nn.BatchNorm2d(self.out_channels) 79 | self.relu_out = nn.ReLU(inplace=True) 80 | self.avgpool = nn.AvgPool2d(8, stride=1) 81 | self.fc_out = nn.Linear(self.out_channels, num_classes) 82 | 83 | for m in self.modules(): 84 | if isinstance(m, nn.Conv2d): 85 | nn.init.kaiming_normal_(m.weight, mode='fan_out', 86 | nonlinearity='relu') 87 | elif isinstance(m, nn.BatchNorm2d): 88 | nn.init.constant_(m.weight, 1) 89 | nn.init.constant_(m.bias, 0) 90 | 91 | def get_layers(self, block, stride): 92 | layers_list = [] 93 | for _ in range(self.num_layers - 1): 94 | self.out_channels = self.in_channels + self.addrate 95 | layers_list.append(block(int(round(self.in_channels)), 96 | int(round(self.out_channels)), 97 | stride)) 98 | self.in_channels = self.out_channels 99 | stride=1 100 | 101 | return nn.Sequential(*layers_list) 102 | 103 | def forward(self, x): 104 | x = self.conv1(x) 105 | x = self.bn1(x) 106 | 107 | x = self.layer1(x) 108 | x = self.layer2(x) 109 | x = self.layer3(x) 110 | 111 | x = self.bn_out(x) 112 | x = self.relu_out(x) 113 | x = self.avgpool(x) 114 | x = x.view(x.size(0), -1) 115 | x = self.fc_out(x) 116 | return x 117 | 118 | def save(self, path_to_checkpoints_dir, step): 119 | path_to_checkpoint = os.path.join(path_to_checkpoints_dir, 'model-{:s}-{:d}.pth'.format(time.strftime('%Y%m%d%H%M'), step)) 120 | torch.save(self.state_dict(), path_to_checkpoint) 121 | return path_to_checkpoint 122 | 123 | def load(self, path_to_checkpoint): 124 | self.load_state_dict(torch.load(path_to_checkpoint)) 125 | return self 126 | 127 | def pyramidnet(): 128 | block = ResidualBlock 129 | model = PyramidNet(num_layers=18, alpha=270, block=block) 130 | return model 131 | -------------------------------------------------------------------------------- /single_gpu/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import datetime 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.optim as optim 8 | from torch.optim import lr_scheduler 9 | import torch.backends.cudnn as cudnn 10 | 11 | import torchvision 12 | import torchvision.transforms as transforms 13 | from torchvision.datasets import CIFAR10 14 | from torch.utils.data import DataLoader 15 | 16 | from model import pyramidnet 17 | import argparse 18 | from tensorboardX import SummaryWriter 19 | 20 | 21 | parser = argparse.ArgumentParser(description='cifar10 classification models') 22 | parser.add_argument('--lr', default=0.1, help='') 23 | parser.add_argument('--resume', default=None, help='') 24 | parser.add_argument('--batch_size', type=int, default=512, help='') 25 | parser.add_argument('--num_worker', type=int, default=4, help='') 26 | args = parser.parse_args() 27 | 28 | 29 | def main(): 30 | best_acc = 0 31 | 32 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 33 | 34 | print('==> Preparing data..') 35 | transforms_train = transforms.Compose([ 36 | transforms.RandomCrop(32, padding=4), 37 | transforms.RandomHorizontalFlip(), 38 | transforms.ToTensor(), 39 | transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) 40 | 41 | dataset_train = CIFAR10(root='../data', train=True, download=True, 42 | transform=transforms_train) 43 | 44 | train_loader = DataLoader(dataset_train, batch_size=args.batch_size, 45 | shuffle=True, num_workers=args.num_worker) 46 | 47 | # there are 10 classes so the dataset name is cifar-10 48 | classes = ('plane', 'car', 'bird', 'cat', 'deer', 49 | 'dog', 'frog', 'horse', 'ship', 'truck') 50 | 51 | print('==> Making model..') 52 | 53 | net = pyramidnet() 54 | net = net.to(device) 55 | num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) 56 | print('The number of parameters of model is', num_params) 57 | 58 | criterion = nn.CrossEntropyLoss() 59 | optimizer = optim.SGD(net.parameters(), lr=args.lr, 60 | momentum=0.9, weight_decay=1e-4) 61 | 62 | train(net, criterion, optimizer, train_loader, device) 63 | 64 | 65 | def train(net, criterion, optimizer, train_loader, device): 66 | net.train() 67 | 68 | train_loss = 0 69 | correct = 0 70 | total = 0 71 | 72 | epoch_start = time.time() 73 | for batch_idx, (inputs, targets) in enumerate(train_loader): 74 | start = time.time() 75 | 76 | inputs = inputs.to(device) 77 | targets = targets.to(device) 78 | outputs = net(inputs) 79 | loss = criterion(outputs, targets) 80 | 81 | optimizer.zero_grad() 82 | loss.backward() 83 | optimizer.step() 84 | 85 | train_loss += loss.item() 86 | _, predicted = outputs.max(1) 87 | total += targets.size(0) 88 | correct += predicted.eq(targets).sum().item() 89 | 90 | acc = 100 * correct / total 91 | 92 | batch_time = time.time() - start 93 | 94 | if batch_idx % 20 == 0: 95 | print('Epoch: [{}/{}]| loss: {:.3f} | acc: {:.3f} | batch time: {:.3f}s '.format( 96 | batch_idx, len(train_loader), train_loss/(batch_idx+1), acc, batch_time)) 97 | 98 | elapse_time = time.time() - epoch_start 99 | elapse_time = datetime.timedelta(seconds=elapse_time) 100 | print("Training time {}".format(elapse_time)) 101 | 102 | 103 | if __name__=='__main__': 104 | main() --------------------------------------------------------------------------------