├── CIFAR10
    ├── checkpoints
    │   ├── regnet_200m_
    │   │   └── model_best.pth.tar
    │   ├── regnet_400m_
    │   │   └── model_best.pth.tar
    │   └── regnet_600m_
    │   │   └── model_best.pth.tar
    ├── main.py
    └── models
    │   ├── __init__.py
    │   ├── reglayers.py
    │   └── regnet.py
├── ImageNet
    ├── ckpts
    │   ├── regnet_200m.pth.tar
    │   ├── regnet_400m.pth.tar
    │   └── regnet_600m.pth.tar
    └── models
    │   ├── reglayers.py
    │   └── regnet.py
├── LICENSE
├── README.md
└── figs
    └── image-20200427104303632.png


/CIFAR10/checkpoints/regnet_200m_/model_best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yhhhli/RegNet-Pytorch/6035ff822595338efec9a4de21134b134c7dcaa8/CIFAR10/checkpoints/regnet_200m_/model_best.pth.tar


--------------------------------------------------------------------------------
/CIFAR10/checkpoints/regnet_400m_/model_best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yhhhli/RegNet-Pytorch/6035ff822595338efec9a4de21134b134c7dcaa8/CIFAR10/checkpoints/regnet_400m_/model_best.pth.tar


--------------------------------------------------------------------------------
/CIFAR10/checkpoints/regnet_600m_/model_best.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yhhhli/RegNet-Pytorch/6035ff822595338efec9a4de21134b134c7dcaa8/CIFAR10/checkpoints/regnet_600m_/model_best.pth.tar


--------------------------------------------------------------------------------
/CIFAR10/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import time
  4 | import shutil
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.backends.cudnn as cudnn
  8 | import torchvision
  9 | import torchvision.transforms as transforms
 10 | import models
 11 | 
 12 | model_names = sorted(name for name in models.__dict__
 13 |                      if name.islower() and not name.startswith("__")
 14 |                      and callable(models.__dict__[name]))
 15 | 
 16 | parser = argparse.ArgumentParser(description='PyTorch Cifar10 Training')
 17 | parser.add_argument('--epochs', default=300, type=int, metavar='N', help='number of total epochs to run')
 18 | parser.add_argument('-a', '--arch', metavar='ARCH', default='regnet_200m')
 19 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)')
 20 | parser.add_argument('-b', '--batch-size', default=128, type=int, metavar='N', help='mini-batch size (default: 128),only used for train')
 21 | parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, metavar='LR', help='initial learning rate')
 22 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum')
 23 | parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)')
 24 | parser.add_argument('--print-freq', '-p', default=100, type=int, metavar='N', help='print frequency (default: 10)')
 25 | parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')
 26 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set')
 27 | parser.add_argument('-ct', '--cifar-type', default='10', type=int, metavar='CT', help='10 for cifar10,100 for cifar100 (default: 10)')
 28 | parser.add_argument('-id', '--device', default='0', type=str, help='gpu device')
 29 | 
 30 | best_prec = 0
 31 | args = parser.parse_args()
 32 | 
 33 | def main():
 34 | 
 35 |     global args, best_prec
 36 |     use_gpu = torch.cuda.is_available()
 37 |     print('=> Building model...')
 38 |     if use_gpu:
 39 |         assert args.arch in model_names, 'Network Architecture Not Supported'
 40 |         model = models.__dict__[args.arch]()
 41 |         model = model.cuda()
 42 |         criterion = nn.CrossEntropyLoss().cuda()
 43 |         optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay)
 44 |         schedular = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)
 45 |         cudnn.benchmark = True
 46 |     else:
 47 |         print('Cuda is not available!')
 48 |         return
 49 | 
 50 |     if not os.path.exists('checkpoints'):
 51 |         os.makedirs('checkpoints')
 52 |     fdir = 'checkpoints/'+str(args.arch)+'_'
 53 |     if not os.path.exists(fdir):
 54 |         os.makedirs(fdir)
 55 | 
 56 |     print('=> loading cifar10 data...')
 57 |     normalize = transforms.Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262])
 58 |     train_dataset = torchvision.datasets.CIFAR10(
 59 |         root='./data',
 60 |         train=True,
 61 |         download=True,
 62 |         transform=transforms.Compose([
 63 |             transforms.RandomCrop(32, padding=4),
 64 |             transforms.RandomHorizontalFlip(),
 65 |             transforms.ToTensor(),
 66 |             normalize,
 67 |         ]))
 68 |     trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2)
 69 | 
 70 |     test_dataset = torchvision.datasets.CIFAR10(
 71 |         root='./data',
 72 |         train=False,
 73 |         download=True,
 74 |         transform=transforms.Compose([
 75 |             transforms.ToTensor(),
 76 |             normalize,
 77 |         ]))
 78 |     testloader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2)
 79 | 
 80 |     if args.evaluate:
 81 |         ckpt = torch.load(fdir+'/model_best.pth.tar')
 82 |         model.load_state_dict(ckpt['state_dict'])
 83 |         validate(testloader, model, criterion)
 84 |         return
 85 | 
 86 |     for epoch in range(args.start_epoch, args.epochs):
 87 | 
 88 |         # train for one epoch
 89 |         train(trainloader, model, criterion, optimizer, epoch)
 90 |         schedular.step()
 91 |         # evaluate on test set
 92 |         prec = validate(testloader, model, criterion)
 93 | 
 94 |         # remember best precision and save checkpoint
 95 |         is_best = prec > best_prec
 96 |         best_prec = max(prec,best_prec)
 97 |         print('best acc: {:1f}'.format(best_prec))
 98 |         save_checkpoint({
 99 |             'epoch': epoch + 1,
100 |             'state_dict': model.state_dict(),
101 |             'best_prec': best_prec,
102 |             'optimizer': optimizer.state_dict(),
103 |         }, is_best, fdir)
104 | 
105 | 
106 | class AverageMeter(object):
107 |     """Computes and stores the average and current value"""
108 |     def __init__(self):
109 |         self.reset()
110 | 
111 |     def reset(self):
112 |         self.val = 0
113 |         self.avg = 0
114 |         self.sum = 0
115 |         self.count = 0
116 | 
117 |     def update(self, val, n=1):
118 |         self.val = val
119 |         self.sum += val * n
120 |         self.count += n
121 |         self.avg = self.sum / self.count
122 | 
123 | 
124 | def train(trainloader, model, criterion, optimizer, epoch):
125 |     batch_time = AverageMeter()
126 |     data_time = AverageMeter()
127 |     losses = AverageMeter()
128 |     top1 = AverageMeter()
129 | 
130 |     model.train()
131 | 
132 |     end = time.time()
133 |     for i, (input, target) in enumerate(trainloader):
134 |         # measure data loading time
135 |         data_time.update(time.time() - end)
136 | 
137 |         input, target = input.cuda(), target.cuda()
138 | 
139 |         # compute output
140 |         output = model(input)
141 |         loss = criterion(output, target)
142 | 
143 |         # measure accuracy and record loss
144 |         prec = accuracy(output, target)[0]
145 |         losses.update(loss.item(), input.size(0))
146 |         top1.update(prec.item(), input.size(0))
147 | 
148 |         # compute gradient and do SGD step
149 |         optimizer.zero_grad()
150 |         loss.backward()
151 |         optimizer.step()
152 | 
153 |         # measure elapsed time
154 |         batch_time.update(time.time() - end)
155 |         end = time.time()
156 | 
157 |         # if i % 2 == 0:
158 |         #     model.module.show_params()
159 |         if i % args.print_freq == 0:
160 |             print('Epoch: [{0}][{1}/{2}]\t'
161 |                   'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
162 |                   'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
163 |                   'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
164 |                   'Prec {top1.val:.3f}% ({top1.avg:.3f}%)'.format(
165 |                    epoch, i, len(trainloader), batch_time=batch_time,
166 |                    data_time=data_time, loss=losses, top1=top1))
167 | 
168 | 
169 | def validate(val_loader, model, criterion):
170 |     batch_time = AverageMeter()
171 |     losses = AverageMeter()
172 |     top1 = AverageMeter()
173 | 
174 |     # switch to evaluate mode
175 |     model.eval()
176 | 
177 |     end = time.time()
178 |     with torch.no_grad():
179 |         for i, (input, target) in enumerate(val_loader):
180 |             input, target = input.cuda(), target.cuda()
181 | 
182 |             # compute output
183 |             output = model(input)
184 |             loss = criterion(output, target)
185 | 
186 |             # measure accuracy and record loss
187 |             prec = accuracy(output, target)[0]
188 |             losses.update(loss.item(), input.size(0))
189 |             top1.update(prec.item(), input.size(0))
190 | 
191 |             # measure elapsed time
192 |             batch_time.update(time.time() - end)
193 |             end = time.time()
194 | 
195 |             if i % args.print_freq == 0:
196 |                 print('Test: [{0}/{1}]\t'
197 |                   'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
198 |                   'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
199 |                   'Prec {top1.val:.3f}% ({top1.avg:.3f}%)'.format(
200 |                    i, len(val_loader), batch_time=batch_time, loss=losses,
201 |                    top1=top1))
202 | 
203 |     print(' * Prec {top1.avg:.3f}% '.format(top1=top1))
204 | 
205 |     return top1.avg
206 | 
207 | 
208 | def save_checkpoint(state, is_best, fdir):
209 |     filepath = os.path.join(fdir, 'checkpoint.pth')
210 |     torch.save(state, filepath)
211 |     if is_best:
212 |         shutil.copyfile(filepath, os.path.join(fdir, 'model_best.pth.tar'))
213 | 
214 | 
215 | def accuracy(output, target, topk=(1,)):
216 |     """Computes the precision@k for the specified values of k"""
217 |     maxk = max(topk)
218 |     batch_size = target.size(0)
219 | 
220 |     _, pred = output.topk(maxk, 1, True, True)
221 |     pred = pred.t()
222 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
223 | 
224 |     res = []
225 |     for k in topk:
226 |         correct_k = correct[:k].view(-1).float().sum(0)
227 |         res.append(correct_k.mul_(100.0 / batch_size))
228 |     return res
229 | 
230 | 
231 | if __name__=='__main__':
232 |     os.environ["CUDA_VISIBLE_DEVICES"] = args.device
233 |     main()
234 | 


--------------------------------------------------------------------------------
/CIFAR10/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .regnet import regnet_200m, regnet_400m, regnet_600m, regnet_800m, regnet_1600m, regnet_3200m, regnet_4000m, regnet_6400m


--------------------------------------------------------------------------------
/CIFAR10/models/reglayers.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import math
  3 | import torch
  4 | 
  5 | 
  6 | class ResStemCifar(nn.Module):
  7 |     """ResNet stem for CIFAR."""
  8 | 
  9 |     def __init__(self, w_in, w_out):
 10 |         super(ResStemCifar, self).__init__()
 11 |         self._construct(w_in, w_out)
 12 | 
 13 |     def _construct(self, w_in, w_out):
 14 |         # 3x3, BN, ReLU
 15 |         self.conv = nn.Conv2d(
 16 |             w_in, w_out, kernel_size=3, stride=1, padding=1, bias=False
 17 |         )
 18 |         self.bn = nn.BatchNorm2d(w_out)
 19 |         self.relu = nn.ReLU(True)
 20 | 
 21 |     def forward(self, x):
 22 |         for layer in self.children():
 23 |             x = layer(x)
 24 |         return x
 25 | 
 26 | 
 27 | class SE(nn.Module):
 28 |     """Squeeze-and-Excitation (SE) block"""
 29 | 
 30 |     def __init__(self, w_in, w_se):
 31 |         super(SE, self).__init__()
 32 |         self._construct(w_in, w_se)
 33 | 
 34 |     def _construct(self, w_in, w_se):
 35 |         # AvgPool
 36 |         self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
 37 |         # FC, Activation, FC, Sigmoid
 38 |         self.f_ex = nn.Sequential(
 39 |             nn.Conv2d(w_in, w_se, kernel_size=1, bias=True),
 40 |             nn.ReLU(inplace=True),
 41 |             nn.Conv2d(w_se, w_in, kernel_size=1, bias=True),
 42 |             nn.Sigmoid(),
 43 |         )
 44 | 
 45 |     def forward(self, x):
 46 |         return x * self.f_ex(self.avg_pool(x))
 47 | 
 48 | 
 49 | class BottleneckTransform(nn.Module):
 50 |     """Bottlenect transformation: 1x1, 3x3, 1x1"""
 51 | 
 52 |     def __init__(self, w_in, w_out, stride, bm, gw, se_r):
 53 |         super(BottleneckTransform, self).__init__()
 54 |         self._construct(w_in, w_out, stride, bm, gw, se_r)
 55 | 
 56 |     def _construct(self, w_in, w_out, stride, bm, gw, se_r):
 57 |         # Compute the bottleneck width
 58 |         w_b = int(round(w_out * bm))
 59 |         # Compute the number of groups
 60 |         num_gs = w_b // gw
 61 |         # 1x1, BN, ReLU
 62 |         self.a = nn.Conv2d(w_in, w_b, kernel_size=1, stride=1, padding=0, bias=False)
 63 |         self.a_bn = nn.BatchNorm2d(w_b)
 64 |         self.a_relu = nn.ReLU(True)
 65 |         # 3x3, BN, ReLU
 66 |         self.b = nn.Conv2d(
 67 |             w_b, w_b, kernel_size=3, stride=stride, padding=1, groups=num_gs, bias=False
 68 |         )
 69 |         self.b_bn = nn.BatchNorm2d(w_b)
 70 |         self.b_relu = nn.ReLU(True)
 71 |         # Squeeze-and-Excitation (SE)
 72 |         if se_r:
 73 |             w_se = int(round(w_in * se_r))
 74 |             self.se = SE(w_b, w_se)
 75 |         # 1x1, BN
 76 |         self.c = nn.Conv2d(w_b, w_out, kernel_size=1, stride=1, padding=0, bias=False)
 77 |         self.c_bn = nn.BatchNorm2d(w_out)
 78 |         self.c_bn.final_bn = True
 79 | 
 80 |     def forward(self, x):
 81 |         for layer in self.children():
 82 |             x = layer(x)
 83 |         return x
 84 | 
 85 | 
 86 | class ResBottleneckBlock(nn.Module):
 87 |     """Residual bottleneck block: x + F(x), F = bottleneck transform"""
 88 | 
 89 |     def __init__(self, w_in, w_out, stride, bm=1.0, gw=1, se_r=None):
 90 |         super(ResBottleneckBlock, self).__init__()
 91 |         self._construct(w_in, w_out, stride, bm, gw, se_r)
 92 | 
 93 |     def _add_skip_proj(self, w_in, w_out, stride):
 94 |         self.proj = nn.Conv2d(
 95 |             w_in, w_out, kernel_size=1, stride=stride, padding=0, bias=False
 96 |         )
 97 |         self.bn = nn.BatchNorm2d(w_out)
 98 | 
 99 |     def _construct(self, w_in, w_out, stride, bm, gw, se_r):
100 |         # Use skip connection with projection if shape changes
101 |         self.proj_block = (w_in != w_out) or (stride != 1)
102 |         if self.proj_block:
103 |             self._add_skip_proj(w_in, w_out, stride)
104 |         self.f = BottleneckTransform(w_in, w_out, stride, bm, gw, se_r)
105 |         self.relu = nn.ReLU(True)
106 | 
107 |     def forward(self, x):
108 |         if self.proj_block:
109 |             x = self.bn(self.proj(x)) + self.f(x)
110 |         else:
111 |             x = x + self.f(x)
112 |         x = self.relu(x)
113 |         return x
114 | 
115 | 
116 | class AnyHead(nn.Module):
117 |     """AnyNet head."""
118 | 
119 |     def __init__(self, w_in, nc):
120 |         super(AnyHead, self).__init__()
121 |         self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
122 |         self.fc = nn.Linear(w_in, nc, bias=True)
123 | 
124 |     def forward(self, x):
125 |         x = self.avg_pool(x)
126 |         x = x.view(x.size(0), -1)
127 |         x = self.fc(x)
128 |         return x
129 | 
130 | 
131 | class AnyStage(nn.Module):
132 |     """AnyNet stage (sequence of blocks w/ the same output shape)."""
133 | 
134 |     def __init__(self, w_in, w_out, stride, d, block_fun, bm, gw, se_r):
135 |         super(AnyStage, self).__init__()
136 |         self._construct(w_in, w_out, stride, d, block_fun, bm, gw, se_r)
137 | 
138 |     def _construct(self, w_in, w_out, stride, d, block_fun, bm, gw, se_r):
139 |         # Construct the blocks
140 |         for i in range(d):
141 |             # Stride and w_in apply to the first block of the stage
142 |             b_stride = stride if i == 0 else 1
143 |             b_w_in = w_in if i == 0 else w_out
144 |             # Construct the block
145 |             self.add_module(
146 |                 "b{}".format(i + 1), block_fun(b_w_in, w_out, b_stride, bm, gw, se_r)
147 |             )
148 | 
149 |     def forward(self, x):
150 |         for block in self.children():
151 |             x = block(x)
152 |         return x
153 | 
154 | 
155 | class AnyNet(nn.Module):
156 |     """AnyNet model."""
157 | 
158 |     def __init__(self, **kwargs):
159 |         super(AnyNet, self).__init__()
160 |         if kwargs:
161 |             self._construct(
162 |                 stem_w=kwargs["stem_w"],
163 |                 ds=kwargs["ds"],
164 |                 ws=kwargs["ws"],
165 |                 ss=kwargs["ss"],
166 |                 bms=kwargs["bms"],
167 |                 gws=kwargs["gws"],
168 |                 se_r=kwargs["se_r"],
169 |                 nc=kwargs["nc"],
170 |             )
171 |         for m in self.modules():
172 |             if isinstance(m, nn.Conv2d):
173 |                 # Note that there is no bias due to BN
174 |                 fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
175 |                 m.weight.data.normal_(mean=0.0, std=math.sqrt(2.0 / fan_out))
176 |             elif isinstance(m, nn.BatchNorm2d):
177 |                 zero_init_gamma = (
178 |                         hasattr(m, "final_bn") and m.final_bn
179 |                 )
180 |                 m.weight.data.fill_(0.0 if zero_init_gamma else 1.0)
181 |                 m.bias.data.zero_()
182 |             elif isinstance(m, nn.Linear):
183 |                 m.weight.data.normal_(mean=0.0, std=0.01)
184 |                 m.bias.data.zero_()
185 |         # self.macs, self.params = self._get_flops()
186 | 
187 |     def _construct(self, stem_w, ds, ws, ss, bms, gws, se_r, nc):
188 |         # logger.info("Constructing AnyNet: ds={}, ws={}".format(ds, ws))
189 |         # Generate dummy bot muls and gs for models that do not use them
190 |         bms = bms if bms else [1.0 for _d in ds]
191 |         gws = gws if gws else [1 for _d in ds]
192 |         # Group params by stage
193 |         stage_params = list(zip(ds, ws, ss, bms, gws))
194 |         # Construct the stem
195 |         self.stem = ResStemCifar(3, stem_w)
196 |         # Construct the stages
197 |         block_fun = ResBottleneckBlock
198 |         prev_w = stem_w
199 |         for i, (d, w, s, bm, gw) in enumerate(stage_params):
200 |             self.add_module(
201 |                 "s{}".format(i + 1), AnyStage(prev_w, w, s, d, block_fun, bm, gw, se_r)
202 |             )
203 |             prev_w = w
204 |         # Construct the head
205 |         self.head = AnyHead(w_in=prev_w, nc=nc)
206 | 
207 |     def forward(self, x):
208 |         for module in self.children():
209 |             x = module(x)
210 |         return x
211 | 
212 |     def _get_flops(self):
213 |         from thop import profile
214 |         input = torch.randn(1, 3, 32, 32)
215 |         macs, params = profile(self, inputs=(input,))
216 |         return macs, params
217 | 
218 | 


--------------------------------------------------------------------------------
/CIFAR10/models/regnet.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from models.reglayers import AnyNet
  3 | import torch.nn as nn
  4 | import torch
  5 | 
  6 | regnet_200M_config = {'WA': 36.44, 'W0': 24, 'WM': 2.49, 'DEPTH': 13, 'GROUP_W': 8, 'BOT_MUL': 1}
  7 | regnet_400M_config = {'WA': 24.48, 'W0': 24, 'WM': 2.54, 'DEPTH': 22, 'GROUP_W': 16, 'BOT_MUL': 1}
  8 | regnet_600M_config = {'WA': 36.97, 'W0': 48, 'WM': 2.24, 'DEPTH': 16, 'GROUP_W': 24, 'BOT_MUL': 1}
  9 | regnet_800M_config = {'WA': 35.73, 'W0': 56, 'WM': 2.28, 'DEPTH': 16, 'GROUP_W': 16, 'BOT_MUL': 1}
 10 | regnet_1600M_config = {'WA': 34.01, 'W0': 80, 'WM': 2.25, 'DEPTH': 18, 'GROUP_W': 24, 'BOT_MUL': 1}
 11 | regnet_3200M_config = {'WA': 26.31, 'W0': 88, 'WM': 2.25, 'DEPTH': 25, 'GROUP_W': 48, 'BOT_MUL': 1}
 12 | regnet_4000M_config = {'WA': 38.65, 'W0': 96, 'WM': 2.43, 'DEPTH': 23, 'GROUP_W': 40, 'BOT_MUL': 1}
 13 | regnet_6400M_config = {'WA': 60.83, 'W0': 184, 'WM': 2.07, 'DEPTH': 17, 'GROUP_W': 56, 'BOT_MUL': 1}
 14 | 
 15 | 
 16 | def quantize_float(f, q):
 17 |     """Converts a float to closest non-zero int divisible by q."""
 18 |     return int(round(f / q) * q)
 19 | 
 20 | 
 21 | def adjust_ws_gs_comp(ws, bms, gs):
 22 |     """Adjusts the compatibility of widths and groups."""
 23 |     ws_bot = [int(w * b) for w, b in zip(ws, bms)]
 24 |     gs = [min(g, w_bot) for g, w_bot in zip(gs, ws_bot)]
 25 |     ws_bot = [quantize_float(w_bot, g) for w_bot, g in zip(ws_bot, gs)]
 26 |     ws = [int(w_bot / b) for w_bot, b in zip(ws_bot, bms)]
 27 |     return ws, gs
 28 | 
 29 | 
 30 | def get_stages_from_blocks(ws, rs):
 31 |     """Gets ws/ds of network at each stage from per block values."""
 32 |     ts_temp = zip(ws + [0], [0] + ws, rs + [0], [0] + rs)
 33 |     ts = [w != wp or r != rp for w, wp, r, rp in ts_temp]
 34 |     s_ws = [w for w, t in zip(ws, ts[:-1]) if t]
 35 |     s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist()
 36 |     return s_ws, s_ds
 37 | 
 38 | 
 39 | def generate_regnet(w_a, w_0, w_m, d, q=8):
 40 |     """Generates per block ws from RegNet parameters."""
 41 |     assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0
 42 |     ws_cont = np.arange(d) * w_a + w_0
 43 |     ks = np.round(np.log(ws_cont / w_0) / np.log(w_m))       # ks = [0,1,2...,3...]
 44 |     ws = w_0 * np.power(w_m, ks)                             # float channel for 4 stages
 45 |     ws = np.round(np.divide(ws, q)) * q                      # make it divisible by 8
 46 |     num_stages, max_stage = len(np.unique(ws)), ks.max() + 1
 47 |     ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist()
 48 |     # ws: width list, num_stages: 4, max_stage: 4.0, wscont: float before round width
 49 |     return ws, num_stages, max_stage, ws_cont
 50 | 
 51 | 
 52 | class RegNet(AnyNet):
 53 |     """RegNet model."""
 54 | 
 55 |     def __init__(self, cfg):
 56 |         # Generate RegNet ws per block
 57 |         b_ws, num_s, _, _ = generate_regnet(
 58 |             cfg['WA'], cfg['W0'], cfg['WM'], cfg['DEPTH']
 59 |         )
 60 |         # Convert to per stage format
 61 |         ws, ds = get_stages_from_blocks(b_ws, b_ws)
 62 |         # Generate group widths and bot muls
 63 |         gws = [cfg['GROUP_W'] for _ in range(num_s)]
 64 |         bms = [cfg['BOT_MUL'] for _ in range(num_s)]
 65 |         # Adjust the compatibility of ws and gws
 66 |         ws, gws = adjust_ws_gs_comp(ws, bms, gws)
 67 |         # stride for cifar is set to 1,2,2,2
 68 |         ss = [1 if i==0 else 2 for i in range(num_s)]
 69 |         # Use SE for RegNetY
 70 |         se_r = None
 71 |         # Construct the model
 72 |         STEM_W = 32
 73 |         kwargs = {
 74 |             "stem_w": STEM_W,
 75 |             "ss": ss,
 76 |             "ds": ds,
 77 |             "ws": ws,
 78 |             "bms": bms,
 79 |             "gws": gws,
 80 |             "se_r": se_r,
 81 |             "nc": 10,
 82 |         }
 83 |         super(RegNet, self).__init__(**kwargs)
 84 | 
 85 | 
 86 | def regnet_200m(**kwargs):
 87 |     model = RegNet(regnet_200M_config)
 88 |     return model
 89 | 
 90 | def regnet_400m(**kwargs):
 91 |     model = RegNet(regnet_400M_config)
 92 |     return model
 93 | 
 94 | def regnet_600m(**kwargs):
 95 |     model = RegNet(regnet_600M_config)
 96 |     return model
 97 | 
 98 | def regnet_800m(**kwargs):
 99 |     model = RegNet(regnet_800M_config)
100 |     return model
101 | 
102 | def regnet_1600m(**kwargs):
103 |     model = RegNet(regnet_1600M_config)
104 |     return model
105 | 
106 | def regnet_3200m(**kwargs):
107 |     model = RegNet(regnet_3200M_config)
108 |     return model
109 | 
110 | def regnet_4000m(**kwargs):
111 |     model = RegNet(regnet_4000M_config)
112 |     return model
113 | 
114 | def regnet_6400m(**kwargs):
115 |     model = RegNet(regnet_6400M_config)
116 |     return model
117 | 


--------------------------------------------------------------------------------
/ImageNet/ckpts/regnet_200m.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yhhhli/RegNet-Pytorch/6035ff822595338efec9a4de21134b134c7dcaa8/ImageNet/ckpts/regnet_200m.pth.tar


--------------------------------------------------------------------------------
/ImageNet/ckpts/regnet_400m.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yhhhli/RegNet-Pytorch/6035ff822595338efec9a4de21134b134c7dcaa8/ImageNet/ckpts/regnet_400m.pth.tar


--------------------------------------------------------------------------------
/ImageNet/ckpts/regnet_600m.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yhhhli/RegNet-Pytorch/6035ff822595338efec9a4de21134b134c7dcaa8/ImageNet/ckpts/regnet_600m.pth.tar


--------------------------------------------------------------------------------
/ImageNet/models/reglayers.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import math
  3 | import torch
  4 | 
  5 | 
  6 | class ResStemIN(nn.Module):
  7 |     """ResNet stem for ImageNet."""
  8 | 
  9 |     def __init__(self, w_in, w_out):
 10 |         super(ResStemIN, self).__init__()
 11 |         self._construct(w_in, w_out)
 12 | 
 13 |     def _construct(self, w_in, w_out):
 14 |         # 7x7, BN, ReLU, maxpool
 15 |         self.conv = nn.Conv2d(
 16 |             w_in, w_out, kernel_size=7, stride=2, padding=3, bias=False
 17 |         )
 18 |         self.bn = nn.BatchNorm2d(w_out)
 19 |         self.relu = nn.ReLU(True)
 20 |         self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 21 | 
 22 |     def forward(self, x):
 23 |         for layer in self.children():
 24 |             x = layer(x)
 25 |         return x
 26 | 
 27 | 
 28 | class SimpleStemIN(nn.Module):
 29 |     """Simple stem for ImageNet."""
 30 | 
 31 |     def __init__(self, in_w, out_w):
 32 |         super(SimpleStemIN, self).__init__()
 33 |         self._construct(in_w, out_w)
 34 | 
 35 |     def _construct(self, in_w, out_w):
 36 |         # 3x3, BN, ReLU
 37 |         self.conv = nn.Conv2d(
 38 |             in_w, out_w, kernel_size=3, stride=2, padding=1, bias=False
 39 |         )
 40 |         self.bn = nn.BatchNorm2d(out_w)
 41 |         self.relu = nn.ReLU(True)
 42 | 
 43 |     def forward(self, x):
 44 |         for layer in self.children():
 45 |             x = layer(x)
 46 |         return x
 47 | 
 48 | 
 49 | class SE(nn.Module):
 50 |     """Squeeze-and-Excitation (SE) block"""
 51 | 
 52 |     def __init__(self, w_in, w_se):
 53 |         super(SE, self).__init__()
 54 |         self._construct(w_in, w_se)
 55 | 
 56 |     def _construct(self, w_in, w_se):
 57 |         # AvgPool
 58 |         self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
 59 |         # FC, Activation, FC, Sigmoid
 60 |         self.f_ex = nn.Sequential(
 61 |             nn.Conv2d(w_in, w_se, kernel_size=1, bias=True),
 62 |             nn.ReLU(inplace=True),
 63 |             nn.Conv2d(w_se, w_in, kernel_size=1, bias=True),
 64 |             nn.Sigmoid(),
 65 |         )
 66 | 
 67 |     def forward(self, x):
 68 |         return x * self.f_ex(self.avg_pool(x))
 69 | 
 70 | 
 71 | class BottleneckTransform(nn.Module):
 72 |     """Bottlenect transformation: 1x1, 3x3, 1x1"""
 73 | 
 74 |     def __init__(self, w_in, w_out, stride, bm, gw, se_r):
 75 |         super(BottleneckTransform, self).__init__()
 76 |         self._construct(w_in, w_out, stride, bm, gw, se_r)
 77 | 
 78 |     def _construct(self, w_in, w_out, stride, bm, gw, se_r):
 79 |         # Compute the bottleneck width
 80 |         w_b = int(round(w_out * bm))
 81 |         # Compute the number of groups
 82 |         num_gs = w_b // gw
 83 |         # 1x1, BN, ReLU
 84 |         self.a = nn.Conv2d(w_in, w_b, kernel_size=1, stride=1, padding=0, bias=False)
 85 |         self.a_bn = nn.BatchNorm2d(w_b)
 86 |         self.a_relu = nn.ReLU(True)
 87 |         # 3x3, BN, ReLU
 88 |         self.b = nn.Conv2d(
 89 |             w_b, w_b, kernel_size=3, stride=stride, padding=1, groups=num_gs, bias=False
 90 |         )
 91 |         self.b_bn = nn.BatchNorm2d(w_b)
 92 |         self.b_relu = nn.ReLU(True)
 93 |         # Squeeze-and-Excitation (SE)
 94 |         if se_r:
 95 |             w_se = int(round(w_in * se_r))
 96 |             self.se = SE(w_b, w_se)
 97 |         # 1x1, BN
 98 |         self.c = nn.Conv2d(w_b, w_out, kernel_size=1, stride=1, padding=0, bias=False)
 99 |         self.c_bn = nn.BatchNorm2d(w_out)
100 |         self.c_bn.final_bn = True
101 | 
102 |     def forward(self, x):
103 |         for layer in self.children():
104 |             x = layer(x)
105 |         return x
106 | 
107 | 
108 | class ResBottleneckBlock(nn.Module):
109 |     """Residual bottleneck block: x + F(x), F = bottleneck transform"""
110 | 
111 |     def __init__(self, w_in, w_out, stride, bm=1.0, gw=1, se_r=None):
112 |         super(ResBottleneckBlock, self).__init__()
113 |         self._construct(w_in, w_out, stride, bm, gw, se_r)
114 | 
115 |     def _add_skip_proj(self, w_in, w_out, stride):
116 |         self.proj = nn.Conv2d(
117 |             w_in, w_out, kernel_size=1, stride=stride, padding=0, bias=False
118 |         )
119 |         self.bn = nn.BatchNorm2d(w_out)
120 | 
121 |     def _construct(self, w_in, w_out, stride, bm, gw, se_r):
122 |         # Use skip connection with projection if shape changes
123 |         self.proj_block = (w_in != w_out) or (stride != 1)
124 |         if self.proj_block:
125 |             self._add_skip_proj(w_in, w_out, stride)
126 |         self.f = BottleneckTransform(w_in, w_out, stride, bm, gw, se_r)
127 |         self.relu = nn.ReLU(True)
128 | 
129 |     def forward(self, x):
130 |         if self.proj_block:
131 |             x = self.bn(self.proj(x)) + self.f(x)
132 |         else:
133 |             x = x + self.f(x)
134 |         x = self.relu(x)
135 |         return x
136 | 
137 | 
138 | class AnyHead(nn.Module):
139 |     """AnyNet head."""
140 | 
141 |     def __init__(self, w_in, nc):
142 |         super(AnyHead, self).__init__()
143 |         self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
144 |         self.fc = nn.Linear(w_in, nc, bias=True)
145 | 
146 |     def forward(self, x):
147 |         x = self.avg_pool(x)
148 |         x = x.view(x.size(0), -1)
149 |         x = self.fc(x)
150 |         return x
151 | 
152 | 
153 | class AnyStage(nn.Module):
154 |     """AnyNet stage (sequence of blocks w/ the same output shape)."""
155 | 
156 |     def __init__(self, w_in, w_out, stride, d, block_fun, bm, gw, se_r):
157 |         super(AnyStage, self).__init__()
158 |         self._construct(w_in, w_out, stride, d, block_fun, bm, gw, se_r)
159 | 
160 |     def _construct(self, w_in, w_out, stride, d, block_fun, bm, gw, se_r):
161 |         # Construct the blocks
162 |         for i in range(d):
163 |             # Stride and w_in apply to the first block of the stage
164 |             b_stride = stride if i == 0 else 1
165 |             b_w_in = w_in if i == 0 else w_out
166 |             # Construct the block
167 |             self.add_module(
168 |                 "b{}".format(i + 1), block_fun(b_w_in, w_out, b_stride, bm, gw, se_r)
169 |             )
170 | 
171 |     def forward(self, x):
172 |         for block in self.children():
173 |             x = block(x)
174 |         return x
175 | 
176 | 
177 | class AnyNet(nn.Module):
178 |     """AnyNet model."""
179 | 
180 |     def __init__(self, **kwargs):
181 |         super(AnyNet, self).__init__()
182 |         if kwargs:
183 |             self._construct(
184 |                 stem_w=kwargs["stem_w"],
185 |                 ds=kwargs["ds"],
186 |                 ws=kwargs["ws"],
187 |                 ss=kwargs["ss"],
188 |                 bms=kwargs["bms"],
189 |                 gws=kwargs["gws"],
190 |                 se_r=kwargs["se_r"],
191 |                 nc=kwargs["nc"],
192 |             )
193 |         for m in self.modules():
194 |             if isinstance(m, nn.Conv2d):
195 |                 # Note that there is no bias due to BN
196 |                 fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
197 |                 m.weight.data.normal_(mean=0.0, std=math.sqrt(2.0 / fan_out))
198 |             elif isinstance(m, nn.BatchNorm2d):
199 |                 zero_init_gamma = (
200 |                         hasattr(m, "final_bn") and m.final_bn
201 |                 )
202 |                 m.weight.data.fill_(0.0 if zero_init_gamma else 1.0)
203 |                 m.bias.data.zero_()
204 |             elif isinstance(m, nn.Linear):
205 |                 m.weight.data.normal_(mean=0.0, std=0.01)
206 |                 m.bias.data.zero_()
207 |         # self.macs, self.params = self._get_flops()
208 | 
209 |     def _construct(self, stem_w, ds, ws, ss, bms, gws, se_r, nc):
210 |         # logger.info("Constructing AnyNet: ds={}, ws={}".format(ds, ws))
211 |         # Generate dummy bot muls and gs for models that do not use them
212 |         bms = bms if bms else [1.0 for _d in ds]
213 |         gws = gws if gws else [1 for _d in ds]
214 |         # Group params by stage
215 |         stage_params = list(zip(ds, ws, ss, bms, gws))
216 |         # Construct the stem
217 |         self.stem = SimpleStemIN(3, stem_w)
218 |         # Construct the stages
219 |         block_fun = ResBottleneckBlock
220 |         prev_w = stem_w
221 |         for i, (d, w, s, bm, gw) in enumerate(stage_params):
222 |             self.add_module(
223 |                 "s{}".format(i + 1), AnyStage(prev_w, w, s, d, block_fun, bm, gw, se_r)
224 |             )
225 |             prev_w = w
226 |         # Construct the head
227 |         self.head = AnyHead(w_in=prev_w, nc=nc)
228 | 
229 |     def forward(self, x):
230 |         for module in self.children():
231 |             x = module(x)
232 |         return x
233 | 
234 |     def _get_flops(self):
235 |         from thop import profile
236 |         input = torch.randn(1, 3, 224, 224)
237 |         macs, params = profile(self, inputs=(input,))
238 |         return macs, params
239 | 
240 | 
241 | # load weights from DataParallel pretrained models for cpu.
242 | class WrappedModel(nn.Module):
243 |     def __init__(self, module):
244 |         super(WrappedModel, self).__init__()
245 |         self.module = module
246 | 
247 |     def forward(self, x):
248 |         return self.module(x)


--------------------------------------------------------------------------------
/ImageNet/models/regnet.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from models.reglayers import AnyNet, WrappedModel
  3 | import torch
  4 | 
  5 | regnet_200M_config = {'WA': 36.44, 'W0': 24, 'WM': 2.49, 'DEPTH': 13, 'GROUP_W': 8, 'BOT_MUL': 1}
  6 | regnet_400M_config = {'WA': 24.48, 'W0': 24, 'WM': 2.54, 'DEPTH': 22, 'GROUP_W': 16, 'BOT_MUL': 1}
  7 | regnet_600M_config = {'WA': 36.97, 'W0': 48, 'WM': 2.24, 'DEPTH': 16, 'GROUP_W': 24, 'BOT_MUL': 1}
  8 | regnet_800M_config = {'WA': 35.73, 'W0': 56, 'WM': 2.28, 'DEPTH': 16, 'GROUP_W': 16, 'BOT_MUL': 1}
  9 | regnet_1600M_config = {'WA': 34.01, 'W0': 80, 'WM': 2.25, 'DEPTH': 18, 'GROUP_W': 24, 'BOT_MUL': 1}
 10 | regnet_3200M_config = {'WA': 26.31, 'W0': 88, 'WM': 2.25, 'DEPTH': 25, 'GROUP_W': 48, 'BOT_MUL': 1}
 11 | regnet_4000M_config = {'WA': 38.65, 'W0': 96, 'WM': 2.43, 'DEPTH': 23, 'GROUP_W': 40, 'BOT_MUL': 1}
 12 | regnet_6400M_config = {'WA': 60.83, 'W0': 184, 'WM': 2.07, 'DEPTH': 17, 'GROUP_W': 56, 'BOT_MUL': 1}
 13 | model_paths = {
 14 |     'regnet_200m': '../ckpts/regnet_200m.pth.tar',
 15 |     'regnet_400m': '../ckpts/regnet_400m.pth.tar',
 16 |     'regnet_600m': '../ckpts/regnet_600m.pth.tar',
 17 |     'regnet_800m': '../ckpts/regnet_800m.pth.tar',
 18 |     'regnet_1600m': '../ckpts/regnet_1600m.pth.tar',
 19 |     'regnet_3200m': '../ckpts/regnet_3200m.pth.tar',
 20 |     }
 21 | 
 22 | 
 23 | def quantize_float(f, q):
 24 |     """Converts a float to closest non-zero int divisible by q."""
 25 |     return int(round(f / q) * q)
 26 | 
 27 | 
 28 | def adjust_ws_gs_comp(ws, bms, gs):
 29 |     """Adjusts the compatibility of widths and groups."""
 30 |     ws_bot = [int(w * b) for w, b in zip(ws, bms)]
 31 |     gs = [min(g, w_bot) for g, w_bot in zip(gs, ws_bot)]
 32 |     ws_bot = [quantize_float(w_bot, g) for w_bot, g in zip(ws_bot, gs)]
 33 |     ws = [int(w_bot / b) for w_bot, b in zip(ws_bot, bms)]
 34 |     return ws, gs
 35 | 
 36 | 
 37 | def get_stages_from_blocks(ws, rs):
 38 |     """Gets ws/ds of network at each stage from per block values."""
 39 |     ts_temp = zip(ws + [0], [0] + ws, rs + [0], [0] + rs)
 40 |     ts = [w != wp or r != rp for w, wp, r, rp in ts_temp]
 41 |     s_ws = [w for w, t in zip(ws, ts[:-1]) if t]
 42 |     s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist()
 43 |     return s_ws, s_ds
 44 | 
 45 | 
 46 | def generate_regnet(w_a, w_0, w_m, d, q=8):
 47 |     """Generates per block ws from RegNet parameters."""
 48 |     assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0
 49 |     ws_cont = np.arange(d) * w_a + w_0
 50 |     ks = np.round(np.log(ws_cont / w_0) / np.log(w_m))       # ks = [0,1,2...,3...]
 51 |     ws = w_0 * np.power(w_m, ks)                             # float channel for 4 stages
 52 |     ws = np.round(np.divide(ws, q)) * q                      # make it divisible by 8
 53 |     num_stages, max_stage = len(np.unique(ws)), ks.max() + 1
 54 |     ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist()
 55 |     # ws: width list, num_stages: 4, max_stage: 4.0, wscont: float before round width
 56 |     return ws, num_stages, max_stage, ws_cont
 57 | 
 58 | 
 59 | class RegNet(AnyNet):
 60 |     """RegNet model."""
 61 | 
 62 |     def __init__(self, cfg, **kwargs):
 63 |         # Generate RegNet ws per block
 64 |         b_ws, num_s, _, _ = generate_regnet(
 65 |             cfg['WA'], cfg['W0'], cfg['WM'], cfg['DEPTH']
 66 |         )
 67 |         # Convert to per stage format
 68 |         ws, ds = get_stages_from_blocks(b_ws, b_ws)
 69 |         # Generate group widths and bot muls
 70 |         gws = [cfg['GROUP_W'] for _ in range(num_s)]
 71 |         bms = [cfg['BOT_MUL'] for _ in range(num_s)]
 72 |         # Adjust the compatibility of ws and gws
 73 |         ws, gws = adjust_ws_gs_comp(ws, bms, gws)
 74 |         # Use the same stride for each stage, stride set to 2
 75 |         ss = [2 for _ in range(num_s)]
 76 |         # Use SE for RegNetY
 77 |         se_r = None
 78 |         # Construct the model
 79 |         STEM_W = 32
 80 |         kwargs = {
 81 |             "stem_w": STEM_W,
 82 |             "ss": ss,
 83 |             "ds": ds,
 84 |             "ws": ws,
 85 |             "bms": bms,
 86 |             "gws": gws,
 87 |             "se_r": se_r,
 88 |             "nc": 1000,
 89 |         }
 90 |         super(RegNet, self).__init__(**kwargs)
 91 | 
 92 | 
 93 | def regnet_200M(pretrained=False, **kwargs):
 94 |     model = RegNet(regnet_200M_config, **kwargs)
 95 |     if pretrained:
 96 |         model = WrappedModel(model)
 97 |         state_dict = torch.load(model_paths['regnet_200m'])
 98 |         model.load_state_dict(state_dict)
 99 |     return model
100 | 
101 | 
102 | def regnet_400M(pretrained=False, **kwargs):
103 |     model = RegNet(regnet_400M_config, **kwargs)
104 |     if pretrained:
105 |         model = WrappedModel(model)
106 |         state_dict = torch.load(model_paths['regnet_400m'])
107 |         model.load_state_dict(state_dict)
108 |     return model
109 | 
110 | 
111 | def regnet_600M(pretrained=False, **kwargs):
112 |     model = RegNet(regnet_600M_config, **kwargs)
113 |     if pretrained:
114 |         model = WrappedModel(model)
115 |         state_dict = torch.load(model_paths['regnet_600m'])
116 |         model.load_state_dict(state_dict)
117 |     return model
118 | 
119 | 
120 | def regnet_800M(pretrained=False, **kwargs):
121 |     model = RegNet(regnet_800M_config, **kwargs)
122 |     if pretrained:
123 |         model = WrappedModel(model)
124 |         state_dict = torch.load(model_paths['regnet_800m'])
125 |         model.load_state_dict(state_dict)
126 |     return model
127 | 
128 | 
129 | def regnet_1600M(pretrained=False, **kwargs):
130 |     model = RegNet(regnet_1600M_config, **kwargs)
131 |     if pretrained:
132 |         model = WrappedModel(model)
133 |         state_dict = torch.load(model_paths['regnet_1600m'])
134 |         model.load_state_dict(state_dict)
135 |     return model
136 | 
137 | 
138 | def regnet_3200M(pretrained=False, **kwargs):
139 |     model = RegNet(regnet_3200M_config, **kwargs)
140 |     if pretrained:
141 |         model = WrappedModel(model)
142 |         state_dict = torch.load(model_paths['regnet_3200m'])
143 |         model.load_state_dict(state_dict)
144 |     return model
145 | 
146 | 
147 | def regnet_4000M(pretrained=False, **kwargs):
148 |     model = RegNet(regnet_4000M_config, **kwargs)
149 |     if pretrained:
150 |         model = WrappedModel(model)
151 |         state_dict = torch.load(model_paths['regnet_4000m'])
152 |         model.load_state_dict(state_dict)
153 |     return model
154 | 
155 | 
156 | def regnet_6400M(pretrained=False, **kwargs):
157 |     model = RegNet(regnet_6400M_config, **kwargs)
158 |     if pretrained:
159 |         model = WrappedModel(model)
160 |         state_dict = torch.load(model_paths['regnet_6400m'])
161 |         model.load_state_dict(state_dict)
162 |     return model
163 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Yuhang Li
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RegNet-Pytorch
 2 | 
 3 | [Designing Network Design Spaces](https://arxiv.org/pdf/2003.13678.pdf) 
 4 | 
 5 | ![image-20200427104303632](figs/image-20200427104303632.png)
 6 | 
 7 | ### Prerequisite 
 8 | 
 9 | Pytorch 1.1.0+
10 | 
11 | [thop](https://github.com/Lyken17/pytorch-OpCounter) to calculate the flops and params
12 | 
13 | ### CIFAR10
14 | 
15 | ```bash
16 | cd $PATH-TO-THIS-REPO/CIFAR10
17 | ```
18 | 
19 | For CIFAR10 models, the stride for the first stage is set to 1, so that the input size for the last stage won't become to small (2x2). The STEM part does not contain max pooling layer. 
20 | 
21 | Run the following command to train a regnet from scratch, (add `-e` to evaluate the pre-trained model) 
22 | 
23 | ```bas
24 | python main.py -a regnet_200m 
25 | ```
26 | 
27 | Here is the summary for the accuracy, params and macs. 
28 | 
29 | | Models      | FLOPs (10^6) | Params (10^6) | Hyper-params                        | Accuracy |
30 | | ----------- | ------------ | ------------- | ----------------------------------- | -------- |
31 | | RegNet-200M | 62           | 2.31          | batch128_wd0.0001_cos300epoch_lr0.1 | 93.58    |
32 | | RegNet-400M | 126          | 4.77          | batch128_wd0.0001_cos300epoch_lr0.1 | 94.15    |
33 | | RegNet-600M | 192          | 5.67          | batch128_wd0.0001_cos300epoch_lr0.1 | 94.73    |
34 | | RegNet-800M | 258          | 6.60          | batch128_wd0.0001_cos300epoch_lr0.1 | 95.01    |
35 | | RegNet-1.6G | 522          | 8.28          | batch128_wd0.0001_cos300epoch_lr0.1 | 95.45    |
36 | | RegNet-3.2G | 1038         | 14.3          | batch128_wd0.0001_cos300epoch_lr0.1 | 95.53    |
37 | | RegNet-4G   | 1298         | 20.8          | batch128_wd0.0001_cos300epoch_lr0.1 | 95.69    |
38 | | RegNet-6.4G | 2108         | 24.6          | batch128_wd0.0001_cos300epoch_lr0.1 | 96.20    |
39 | 
40 | ### ImageNet
41 | 
42 | For imagenet models, we keep the model and training configuration exactly the same with the original [released codes](https://github.com/facebookresearch/pycls). We train the model using pytorch framework,  and the summary of the results is shown below.
43 | 
44 | | Models      | FLOPs (10^6) | Params (10^6) | Hyper-params                         | Accuracy (Paper) | Accuracy (Ours) |
45 | | ----------- | ------------ | ------------- | ------------------------------------ | ---------------- | --------------- |
46 | | RegNet-200M | 208          | 2.68          | batch1k_wd0.00005_cos100epoch_lr0.8  | 68.9             | 68.1            |
47 | | RegNet-400M | 410          | 5.15          | batch1k_wd0.00005_cos100epoch_lr0.8  | 72.7             | 72.24           |
48 | | RegNet-600M | 618          | 6.19          | batch1k_wd0.00005_cos100epoch_lr0.8  | 74.1             | 73.94           |
49 | | RegNet-800M | 820          | 7.25          | batch1k_wd0.00005_cos100epoch_lr0.8  | 75.2             | 75.13           |
50 | | RegNet-1.6G | 1635         | 9.19          | batch512_wd0.00005_cos100epoch_lr0.4 | 77.0             | 77.09           |
51 | | RegNet-3.2G | 3233         | 15.3          | batch512_wd0.00005_cos100epoch_lr0.4 | 78.3             | 78.54           |
52 | | RegNet-4G   | 4014         | 22.1          | batch512_wd0.00005_cos100epoch_lr0.4 | 78.6             | 79.09           |
53 | | RegNet-6.4G | 6527         | 26.2          | batch512_wd0.00005_cos100epoch_lr0.4 | 79.2             | 79.36           |
54 | 
55 | Note: we only uploaded regnet_200MF, 400MF, 600MF in this repo. Other pretrained models can be found in [here](https://drive.google.com/open?id=1lbIBzQNW2CqbmnbAkY8DM5apzq_YXWFc). Use `pretrained = True`  to load the pre-trained models. 


--------------------------------------------------------------------------------
/figs/image-20200427104303632.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yhhhli/RegNet-Pytorch/6035ff822595338efec9a4de21134b134c7dcaa8/figs/image-20200427104303632.png


--------------------------------------------------------------------------------