├── CIFAR10 ├── checkpoints │ ├── regnet_200m_ │ │ └── model_best.pth.tar │ ├── regnet_400m_ │ │ └── model_best.pth.tar │ └── regnet_600m_ │ │ └── model_best.pth.tar ├── main.py └── models │ ├── __init__.py │ ├── reglayers.py │ └── regnet.py ├── ImageNet ├── ckpts │ ├── regnet_200m.pth.tar │ ├── regnet_400m.pth.tar │ └── regnet_600m.pth.tar └── models │ ├── reglayers.py │ └── regnet.py ├── LICENSE ├── README.md └── figs └── image-20200427104303632.png /CIFAR10/checkpoints/regnet_200m_/model_best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhhhli/RegNet-Pytorch/6035ff822595338efec9a4de21134b134c7dcaa8/CIFAR10/checkpoints/regnet_200m_/model_best.pth.tar -------------------------------------------------------------------------------- /CIFAR10/checkpoints/regnet_400m_/model_best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhhhli/RegNet-Pytorch/6035ff822595338efec9a4de21134b134c7dcaa8/CIFAR10/checkpoints/regnet_400m_/model_best.pth.tar -------------------------------------------------------------------------------- /CIFAR10/checkpoints/regnet_600m_/model_best.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhhhli/RegNet-Pytorch/6035ff822595338efec9a4de21134b134c7dcaa8/CIFAR10/checkpoints/regnet_600m_/model_best.pth.tar -------------------------------------------------------------------------------- /CIFAR10/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import time 4 | import shutil 5 | import torch 6 | import torch.nn as nn 7 | import torch.backends.cudnn as cudnn 8 | import torchvision 9 | import torchvision.transforms as transforms 10 | import models 11 | 12 | model_names = sorted(name for name in models.__dict__ 13 | if name.islower() and not name.startswith("__") 14 | and callable(models.__dict__[name])) 15 | 16 | parser = argparse.ArgumentParser(description='PyTorch Cifar10 Training') 17 | parser.add_argument('--epochs', default=300, type=int, metavar='N', help='number of total epochs to run') 18 | parser.add_argument('-a', '--arch', metavar='ARCH', default='regnet_200m') 19 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)') 20 | parser.add_argument('-b', '--batch-size', default=128, type=int, metavar='N', help='mini-batch size (default: 128),only used for train') 21 | parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, metavar='LR', help='initial learning rate') 22 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') 23 | parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)') 24 | parser.add_argument('--print-freq', '-p', default=100, type=int, metavar='N', help='print frequency (default: 10)') 25 | parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') 26 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') 27 | parser.add_argument('-ct', '--cifar-type', default='10', type=int, metavar='CT', help='10 for cifar10,100 for cifar100 (default: 10)') 28 | parser.add_argument('-id', '--device', default='0', type=str, help='gpu device') 29 | 30 | best_prec = 0 31 | args = parser.parse_args() 32 | 33 | def main(): 34 | 35 | global args, best_prec 36 | use_gpu = torch.cuda.is_available() 37 | print('=> Building model...') 38 | if use_gpu: 39 | assert args.arch in model_names, 'Network Architecture Not Supported' 40 | model = models.__dict__[args.arch]() 41 | model = model.cuda() 42 | criterion = nn.CrossEntropyLoss().cuda() 43 | optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay) 44 | schedular = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs) 45 | cudnn.benchmark = True 46 | else: 47 | print('Cuda is not available!') 48 | return 49 | 50 | if not os.path.exists('checkpoints'): 51 | os.makedirs('checkpoints') 52 | fdir = 'checkpoints/'+str(args.arch)+'_' 53 | if not os.path.exists(fdir): 54 | os.makedirs(fdir) 55 | 56 | print('=> loading cifar10 data...') 57 | normalize = transforms.Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262]) 58 | train_dataset = torchvision.datasets.CIFAR10( 59 | root='./data', 60 | train=True, 61 | download=True, 62 | transform=transforms.Compose([ 63 | transforms.RandomCrop(32, padding=4), 64 | transforms.RandomHorizontalFlip(), 65 | transforms.ToTensor(), 66 | normalize, 67 | ])) 68 | trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2) 69 | 70 | test_dataset = torchvision.datasets.CIFAR10( 71 | root='./data', 72 | train=False, 73 | download=True, 74 | transform=transforms.Compose([ 75 | transforms.ToTensor(), 76 | normalize, 77 | ])) 78 | testloader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2) 79 | 80 | if args.evaluate: 81 | ckpt = torch.load(fdir+'/model_best.pth.tar') 82 | model.load_state_dict(ckpt['state_dict']) 83 | validate(testloader, model, criterion) 84 | return 85 | 86 | for epoch in range(args.start_epoch, args.epochs): 87 | 88 | # train for one epoch 89 | train(trainloader, model, criterion, optimizer, epoch) 90 | schedular.step() 91 | # evaluate on test set 92 | prec = validate(testloader, model, criterion) 93 | 94 | # remember best precision and save checkpoint 95 | is_best = prec > best_prec 96 | best_prec = max(prec,best_prec) 97 | print('best acc: {:1f}'.format(best_prec)) 98 | save_checkpoint({ 99 | 'epoch': epoch + 1, 100 | 'state_dict': model.state_dict(), 101 | 'best_prec': best_prec, 102 | 'optimizer': optimizer.state_dict(), 103 | }, is_best, fdir) 104 | 105 | 106 | class AverageMeter(object): 107 | """Computes and stores the average and current value""" 108 | def __init__(self): 109 | self.reset() 110 | 111 | def reset(self): 112 | self.val = 0 113 | self.avg = 0 114 | self.sum = 0 115 | self.count = 0 116 | 117 | def update(self, val, n=1): 118 | self.val = val 119 | self.sum += val * n 120 | self.count += n 121 | self.avg = self.sum / self.count 122 | 123 | 124 | def train(trainloader, model, criterion, optimizer, epoch): 125 | batch_time = AverageMeter() 126 | data_time = AverageMeter() 127 | losses = AverageMeter() 128 | top1 = AverageMeter() 129 | 130 | model.train() 131 | 132 | end = time.time() 133 | for i, (input, target) in enumerate(trainloader): 134 | # measure data loading time 135 | data_time.update(time.time() - end) 136 | 137 | input, target = input.cuda(), target.cuda() 138 | 139 | # compute output 140 | output = model(input) 141 | loss = criterion(output, target) 142 | 143 | # measure accuracy and record loss 144 | prec = accuracy(output, target)[0] 145 | losses.update(loss.item(), input.size(0)) 146 | top1.update(prec.item(), input.size(0)) 147 | 148 | # compute gradient and do SGD step 149 | optimizer.zero_grad() 150 | loss.backward() 151 | optimizer.step() 152 | 153 | # measure elapsed time 154 | batch_time.update(time.time() - end) 155 | end = time.time() 156 | 157 | # if i % 2 == 0: 158 | # model.module.show_params() 159 | if i % args.print_freq == 0: 160 | print('Epoch: [{0}][{1}/{2}]\t' 161 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 162 | 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 163 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 164 | 'Prec {top1.val:.3f}% ({top1.avg:.3f}%)'.format( 165 | epoch, i, len(trainloader), batch_time=batch_time, 166 | data_time=data_time, loss=losses, top1=top1)) 167 | 168 | 169 | def validate(val_loader, model, criterion): 170 | batch_time = AverageMeter() 171 | losses = AverageMeter() 172 | top1 = AverageMeter() 173 | 174 | # switch to evaluate mode 175 | model.eval() 176 | 177 | end = time.time() 178 | with torch.no_grad(): 179 | for i, (input, target) in enumerate(val_loader): 180 | input, target = input.cuda(), target.cuda() 181 | 182 | # compute output 183 | output = model(input) 184 | loss = criterion(output, target) 185 | 186 | # measure accuracy and record loss 187 | prec = accuracy(output, target)[0] 188 | losses.update(loss.item(), input.size(0)) 189 | top1.update(prec.item(), input.size(0)) 190 | 191 | # measure elapsed time 192 | batch_time.update(time.time() - end) 193 | end = time.time() 194 | 195 | if i % args.print_freq == 0: 196 | print('Test: [{0}/{1}]\t' 197 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 198 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 199 | 'Prec {top1.val:.3f}% ({top1.avg:.3f}%)'.format( 200 | i, len(val_loader), batch_time=batch_time, loss=losses, 201 | top1=top1)) 202 | 203 | print(' * Prec {top1.avg:.3f}% '.format(top1=top1)) 204 | 205 | return top1.avg 206 | 207 | 208 | def save_checkpoint(state, is_best, fdir): 209 | filepath = os.path.join(fdir, 'checkpoint.pth') 210 | torch.save(state, filepath) 211 | if is_best: 212 | shutil.copyfile(filepath, os.path.join(fdir, 'model_best.pth.tar')) 213 | 214 | 215 | def accuracy(output, target, topk=(1,)): 216 | """Computes the precision@k for the specified values of k""" 217 | maxk = max(topk) 218 | batch_size = target.size(0) 219 | 220 | _, pred = output.topk(maxk, 1, True, True) 221 | pred = pred.t() 222 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 223 | 224 | res = [] 225 | for k in topk: 226 | correct_k = correct[:k].view(-1).float().sum(0) 227 | res.append(correct_k.mul_(100.0 / batch_size)) 228 | return res 229 | 230 | 231 | if __name__=='__main__': 232 | os.environ["CUDA_VISIBLE_DEVICES"] = args.device 233 | main() 234 | -------------------------------------------------------------------------------- /CIFAR10/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .regnet import regnet_200m, regnet_400m, regnet_600m, regnet_800m, regnet_1600m, regnet_3200m, regnet_4000m, regnet_6400m -------------------------------------------------------------------------------- /CIFAR10/models/reglayers.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch 4 | 5 | 6 | class ResStemCifar(nn.Module): 7 | """ResNet stem for CIFAR.""" 8 | 9 | def __init__(self, w_in, w_out): 10 | super(ResStemCifar, self).__init__() 11 | self._construct(w_in, w_out) 12 | 13 | def _construct(self, w_in, w_out): 14 | # 3x3, BN, ReLU 15 | self.conv = nn.Conv2d( 16 | w_in, w_out, kernel_size=3, stride=1, padding=1, bias=False 17 | ) 18 | self.bn = nn.BatchNorm2d(w_out) 19 | self.relu = nn.ReLU(True) 20 | 21 | def forward(self, x): 22 | for layer in self.children(): 23 | x = layer(x) 24 | return x 25 | 26 | 27 | class SE(nn.Module): 28 | """Squeeze-and-Excitation (SE) block""" 29 | 30 | def __init__(self, w_in, w_se): 31 | super(SE, self).__init__() 32 | self._construct(w_in, w_se) 33 | 34 | def _construct(self, w_in, w_se): 35 | # AvgPool 36 | self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) 37 | # FC, Activation, FC, Sigmoid 38 | self.f_ex = nn.Sequential( 39 | nn.Conv2d(w_in, w_se, kernel_size=1, bias=True), 40 | nn.ReLU(inplace=True), 41 | nn.Conv2d(w_se, w_in, kernel_size=1, bias=True), 42 | nn.Sigmoid(), 43 | ) 44 | 45 | def forward(self, x): 46 | return x * self.f_ex(self.avg_pool(x)) 47 | 48 | 49 | class BottleneckTransform(nn.Module): 50 | """Bottlenect transformation: 1x1, 3x3, 1x1""" 51 | 52 | def __init__(self, w_in, w_out, stride, bm, gw, se_r): 53 | super(BottleneckTransform, self).__init__() 54 | self._construct(w_in, w_out, stride, bm, gw, se_r) 55 | 56 | def _construct(self, w_in, w_out, stride, bm, gw, se_r): 57 | # Compute the bottleneck width 58 | w_b = int(round(w_out * bm)) 59 | # Compute the number of groups 60 | num_gs = w_b // gw 61 | # 1x1, BN, ReLU 62 | self.a = nn.Conv2d(w_in, w_b, kernel_size=1, stride=1, padding=0, bias=False) 63 | self.a_bn = nn.BatchNorm2d(w_b) 64 | self.a_relu = nn.ReLU(True) 65 | # 3x3, BN, ReLU 66 | self.b = nn.Conv2d( 67 | w_b, w_b, kernel_size=3, stride=stride, padding=1, groups=num_gs, bias=False 68 | ) 69 | self.b_bn = nn.BatchNorm2d(w_b) 70 | self.b_relu = nn.ReLU(True) 71 | # Squeeze-and-Excitation (SE) 72 | if se_r: 73 | w_se = int(round(w_in * se_r)) 74 | self.se = SE(w_b, w_se) 75 | # 1x1, BN 76 | self.c = nn.Conv2d(w_b, w_out, kernel_size=1, stride=1, padding=0, bias=False) 77 | self.c_bn = nn.BatchNorm2d(w_out) 78 | self.c_bn.final_bn = True 79 | 80 | def forward(self, x): 81 | for layer in self.children(): 82 | x = layer(x) 83 | return x 84 | 85 | 86 | class ResBottleneckBlock(nn.Module): 87 | """Residual bottleneck block: x + F(x), F = bottleneck transform""" 88 | 89 | def __init__(self, w_in, w_out, stride, bm=1.0, gw=1, se_r=None): 90 | super(ResBottleneckBlock, self).__init__() 91 | self._construct(w_in, w_out, stride, bm, gw, se_r) 92 | 93 | def _add_skip_proj(self, w_in, w_out, stride): 94 | self.proj = nn.Conv2d( 95 | w_in, w_out, kernel_size=1, stride=stride, padding=0, bias=False 96 | ) 97 | self.bn = nn.BatchNorm2d(w_out) 98 | 99 | def _construct(self, w_in, w_out, stride, bm, gw, se_r): 100 | # Use skip connection with projection if shape changes 101 | self.proj_block = (w_in != w_out) or (stride != 1) 102 | if self.proj_block: 103 | self._add_skip_proj(w_in, w_out, stride) 104 | self.f = BottleneckTransform(w_in, w_out, stride, bm, gw, se_r) 105 | self.relu = nn.ReLU(True) 106 | 107 | def forward(self, x): 108 | if self.proj_block: 109 | x = self.bn(self.proj(x)) + self.f(x) 110 | else: 111 | x = x + self.f(x) 112 | x = self.relu(x) 113 | return x 114 | 115 | 116 | class AnyHead(nn.Module): 117 | """AnyNet head.""" 118 | 119 | def __init__(self, w_in, nc): 120 | super(AnyHead, self).__init__() 121 | self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) 122 | self.fc = nn.Linear(w_in, nc, bias=True) 123 | 124 | def forward(self, x): 125 | x = self.avg_pool(x) 126 | x = x.view(x.size(0), -1) 127 | x = self.fc(x) 128 | return x 129 | 130 | 131 | class AnyStage(nn.Module): 132 | """AnyNet stage (sequence of blocks w/ the same output shape).""" 133 | 134 | def __init__(self, w_in, w_out, stride, d, block_fun, bm, gw, se_r): 135 | super(AnyStage, self).__init__() 136 | self._construct(w_in, w_out, stride, d, block_fun, bm, gw, se_r) 137 | 138 | def _construct(self, w_in, w_out, stride, d, block_fun, bm, gw, se_r): 139 | # Construct the blocks 140 | for i in range(d): 141 | # Stride and w_in apply to the first block of the stage 142 | b_stride = stride if i == 0 else 1 143 | b_w_in = w_in if i == 0 else w_out 144 | # Construct the block 145 | self.add_module( 146 | "b{}".format(i + 1), block_fun(b_w_in, w_out, b_stride, bm, gw, se_r) 147 | ) 148 | 149 | def forward(self, x): 150 | for block in self.children(): 151 | x = block(x) 152 | return x 153 | 154 | 155 | class AnyNet(nn.Module): 156 | """AnyNet model.""" 157 | 158 | def __init__(self, **kwargs): 159 | super(AnyNet, self).__init__() 160 | if kwargs: 161 | self._construct( 162 | stem_w=kwargs["stem_w"], 163 | ds=kwargs["ds"], 164 | ws=kwargs["ws"], 165 | ss=kwargs["ss"], 166 | bms=kwargs["bms"], 167 | gws=kwargs["gws"], 168 | se_r=kwargs["se_r"], 169 | nc=kwargs["nc"], 170 | ) 171 | for m in self.modules(): 172 | if isinstance(m, nn.Conv2d): 173 | # Note that there is no bias due to BN 174 | fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 175 | m.weight.data.normal_(mean=0.0, std=math.sqrt(2.0 / fan_out)) 176 | elif isinstance(m, nn.BatchNorm2d): 177 | zero_init_gamma = ( 178 | hasattr(m, "final_bn") and m.final_bn 179 | ) 180 | m.weight.data.fill_(0.0 if zero_init_gamma else 1.0) 181 | m.bias.data.zero_() 182 | elif isinstance(m, nn.Linear): 183 | m.weight.data.normal_(mean=0.0, std=0.01) 184 | m.bias.data.zero_() 185 | # self.macs, self.params = self._get_flops() 186 | 187 | def _construct(self, stem_w, ds, ws, ss, bms, gws, se_r, nc): 188 | # logger.info("Constructing AnyNet: ds={}, ws={}".format(ds, ws)) 189 | # Generate dummy bot muls and gs for models that do not use them 190 | bms = bms if bms else [1.0 for _d in ds] 191 | gws = gws if gws else [1 for _d in ds] 192 | # Group params by stage 193 | stage_params = list(zip(ds, ws, ss, bms, gws)) 194 | # Construct the stem 195 | self.stem = ResStemCifar(3, stem_w) 196 | # Construct the stages 197 | block_fun = ResBottleneckBlock 198 | prev_w = stem_w 199 | for i, (d, w, s, bm, gw) in enumerate(stage_params): 200 | self.add_module( 201 | "s{}".format(i + 1), AnyStage(prev_w, w, s, d, block_fun, bm, gw, se_r) 202 | ) 203 | prev_w = w 204 | # Construct the head 205 | self.head = AnyHead(w_in=prev_w, nc=nc) 206 | 207 | def forward(self, x): 208 | for module in self.children(): 209 | x = module(x) 210 | return x 211 | 212 | def _get_flops(self): 213 | from thop import profile 214 | input = torch.randn(1, 3, 32, 32) 215 | macs, params = profile(self, inputs=(input,)) 216 | return macs, params 217 | 218 | -------------------------------------------------------------------------------- /CIFAR10/models/regnet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from models.reglayers import AnyNet 3 | import torch.nn as nn 4 | import torch 5 | 6 | regnet_200M_config = {'WA': 36.44, 'W0': 24, 'WM': 2.49, 'DEPTH': 13, 'GROUP_W': 8, 'BOT_MUL': 1} 7 | regnet_400M_config = {'WA': 24.48, 'W0': 24, 'WM': 2.54, 'DEPTH': 22, 'GROUP_W': 16, 'BOT_MUL': 1} 8 | regnet_600M_config = {'WA': 36.97, 'W0': 48, 'WM': 2.24, 'DEPTH': 16, 'GROUP_W': 24, 'BOT_MUL': 1} 9 | regnet_800M_config = {'WA': 35.73, 'W0': 56, 'WM': 2.28, 'DEPTH': 16, 'GROUP_W': 16, 'BOT_MUL': 1} 10 | regnet_1600M_config = {'WA': 34.01, 'W0': 80, 'WM': 2.25, 'DEPTH': 18, 'GROUP_W': 24, 'BOT_MUL': 1} 11 | regnet_3200M_config = {'WA': 26.31, 'W0': 88, 'WM': 2.25, 'DEPTH': 25, 'GROUP_W': 48, 'BOT_MUL': 1} 12 | regnet_4000M_config = {'WA': 38.65, 'W0': 96, 'WM': 2.43, 'DEPTH': 23, 'GROUP_W': 40, 'BOT_MUL': 1} 13 | regnet_6400M_config = {'WA': 60.83, 'W0': 184, 'WM': 2.07, 'DEPTH': 17, 'GROUP_W': 56, 'BOT_MUL': 1} 14 | 15 | 16 | def quantize_float(f, q): 17 | """Converts a float to closest non-zero int divisible by q.""" 18 | return int(round(f / q) * q) 19 | 20 | 21 | def adjust_ws_gs_comp(ws, bms, gs): 22 | """Adjusts the compatibility of widths and groups.""" 23 | ws_bot = [int(w * b) for w, b in zip(ws, bms)] 24 | gs = [min(g, w_bot) for g, w_bot in zip(gs, ws_bot)] 25 | ws_bot = [quantize_float(w_bot, g) for w_bot, g in zip(ws_bot, gs)] 26 | ws = [int(w_bot / b) for w_bot, b in zip(ws_bot, bms)] 27 | return ws, gs 28 | 29 | 30 | def get_stages_from_blocks(ws, rs): 31 | """Gets ws/ds of network at each stage from per block values.""" 32 | ts_temp = zip(ws + [0], [0] + ws, rs + [0], [0] + rs) 33 | ts = [w != wp or r != rp for w, wp, r, rp in ts_temp] 34 | s_ws = [w for w, t in zip(ws, ts[:-1]) if t] 35 | s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist() 36 | return s_ws, s_ds 37 | 38 | 39 | def generate_regnet(w_a, w_0, w_m, d, q=8): 40 | """Generates per block ws from RegNet parameters.""" 41 | assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0 42 | ws_cont = np.arange(d) * w_a + w_0 43 | ks = np.round(np.log(ws_cont / w_0) / np.log(w_m)) # ks = [0,1,2...,3...] 44 | ws = w_0 * np.power(w_m, ks) # float channel for 4 stages 45 | ws = np.round(np.divide(ws, q)) * q # make it divisible by 8 46 | num_stages, max_stage = len(np.unique(ws)), ks.max() + 1 47 | ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist() 48 | # ws: width list, num_stages: 4, max_stage: 4.0, wscont: float before round width 49 | return ws, num_stages, max_stage, ws_cont 50 | 51 | 52 | class RegNet(AnyNet): 53 | """RegNet model.""" 54 | 55 | def __init__(self, cfg): 56 | # Generate RegNet ws per block 57 | b_ws, num_s, _, _ = generate_regnet( 58 | cfg['WA'], cfg['W0'], cfg['WM'], cfg['DEPTH'] 59 | ) 60 | # Convert to per stage format 61 | ws, ds = get_stages_from_blocks(b_ws, b_ws) 62 | # Generate group widths and bot muls 63 | gws = [cfg['GROUP_W'] for _ in range(num_s)] 64 | bms = [cfg['BOT_MUL'] for _ in range(num_s)] 65 | # Adjust the compatibility of ws and gws 66 | ws, gws = adjust_ws_gs_comp(ws, bms, gws) 67 | # stride for cifar is set to 1,2,2,2 68 | ss = [1 if i==0 else 2 for i in range(num_s)] 69 | # Use SE for RegNetY 70 | se_r = None 71 | # Construct the model 72 | STEM_W = 32 73 | kwargs = { 74 | "stem_w": STEM_W, 75 | "ss": ss, 76 | "ds": ds, 77 | "ws": ws, 78 | "bms": bms, 79 | "gws": gws, 80 | "se_r": se_r, 81 | "nc": 10, 82 | } 83 | super(RegNet, self).__init__(**kwargs) 84 | 85 | 86 | def regnet_200m(**kwargs): 87 | model = RegNet(regnet_200M_config) 88 | return model 89 | 90 | def regnet_400m(**kwargs): 91 | model = RegNet(regnet_400M_config) 92 | return model 93 | 94 | def regnet_600m(**kwargs): 95 | model = RegNet(regnet_600M_config) 96 | return model 97 | 98 | def regnet_800m(**kwargs): 99 | model = RegNet(regnet_800M_config) 100 | return model 101 | 102 | def regnet_1600m(**kwargs): 103 | model = RegNet(regnet_1600M_config) 104 | return model 105 | 106 | def regnet_3200m(**kwargs): 107 | model = RegNet(regnet_3200M_config) 108 | return model 109 | 110 | def regnet_4000m(**kwargs): 111 | model = RegNet(regnet_4000M_config) 112 | return model 113 | 114 | def regnet_6400m(**kwargs): 115 | model = RegNet(regnet_6400M_config) 116 | return model 117 | -------------------------------------------------------------------------------- /ImageNet/ckpts/regnet_200m.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhhhli/RegNet-Pytorch/6035ff822595338efec9a4de21134b134c7dcaa8/ImageNet/ckpts/regnet_200m.pth.tar -------------------------------------------------------------------------------- /ImageNet/ckpts/regnet_400m.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhhhli/RegNet-Pytorch/6035ff822595338efec9a4de21134b134c7dcaa8/ImageNet/ckpts/regnet_400m.pth.tar -------------------------------------------------------------------------------- /ImageNet/ckpts/regnet_600m.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhhhli/RegNet-Pytorch/6035ff822595338efec9a4de21134b134c7dcaa8/ImageNet/ckpts/regnet_600m.pth.tar -------------------------------------------------------------------------------- /ImageNet/models/reglayers.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch 4 | 5 | 6 | class ResStemIN(nn.Module): 7 | """ResNet stem for ImageNet.""" 8 | 9 | def __init__(self, w_in, w_out): 10 | super(ResStemIN, self).__init__() 11 | self._construct(w_in, w_out) 12 | 13 | def _construct(self, w_in, w_out): 14 | # 7x7, BN, ReLU, maxpool 15 | self.conv = nn.Conv2d( 16 | w_in, w_out, kernel_size=7, stride=2, padding=3, bias=False 17 | ) 18 | self.bn = nn.BatchNorm2d(w_out) 19 | self.relu = nn.ReLU(True) 20 | self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 21 | 22 | def forward(self, x): 23 | for layer in self.children(): 24 | x = layer(x) 25 | return x 26 | 27 | 28 | class SimpleStemIN(nn.Module): 29 | """Simple stem for ImageNet.""" 30 | 31 | def __init__(self, in_w, out_w): 32 | super(SimpleStemIN, self).__init__() 33 | self._construct(in_w, out_w) 34 | 35 | def _construct(self, in_w, out_w): 36 | # 3x3, BN, ReLU 37 | self.conv = nn.Conv2d( 38 | in_w, out_w, kernel_size=3, stride=2, padding=1, bias=False 39 | ) 40 | self.bn = nn.BatchNorm2d(out_w) 41 | self.relu = nn.ReLU(True) 42 | 43 | def forward(self, x): 44 | for layer in self.children(): 45 | x = layer(x) 46 | return x 47 | 48 | 49 | class SE(nn.Module): 50 | """Squeeze-and-Excitation (SE) block""" 51 | 52 | def __init__(self, w_in, w_se): 53 | super(SE, self).__init__() 54 | self._construct(w_in, w_se) 55 | 56 | def _construct(self, w_in, w_se): 57 | # AvgPool 58 | self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) 59 | # FC, Activation, FC, Sigmoid 60 | self.f_ex = nn.Sequential( 61 | nn.Conv2d(w_in, w_se, kernel_size=1, bias=True), 62 | nn.ReLU(inplace=True), 63 | nn.Conv2d(w_se, w_in, kernel_size=1, bias=True), 64 | nn.Sigmoid(), 65 | ) 66 | 67 | def forward(self, x): 68 | return x * self.f_ex(self.avg_pool(x)) 69 | 70 | 71 | class BottleneckTransform(nn.Module): 72 | """Bottlenect transformation: 1x1, 3x3, 1x1""" 73 | 74 | def __init__(self, w_in, w_out, stride, bm, gw, se_r): 75 | super(BottleneckTransform, self).__init__() 76 | self._construct(w_in, w_out, stride, bm, gw, se_r) 77 | 78 | def _construct(self, w_in, w_out, stride, bm, gw, se_r): 79 | # Compute the bottleneck width 80 | w_b = int(round(w_out * bm)) 81 | # Compute the number of groups 82 | num_gs = w_b // gw 83 | # 1x1, BN, ReLU 84 | self.a = nn.Conv2d(w_in, w_b, kernel_size=1, stride=1, padding=0, bias=False) 85 | self.a_bn = nn.BatchNorm2d(w_b) 86 | self.a_relu = nn.ReLU(True) 87 | # 3x3, BN, ReLU 88 | self.b = nn.Conv2d( 89 | w_b, w_b, kernel_size=3, stride=stride, padding=1, groups=num_gs, bias=False 90 | ) 91 | self.b_bn = nn.BatchNorm2d(w_b) 92 | self.b_relu = nn.ReLU(True) 93 | # Squeeze-and-Excitation (SE) 94 | if se_r: 95 | w_se = int(round(w_in * se_r)) 96 | self.se = SE(w_b, w_se) 97 | # 1x1, BN 98 | self.c = nn.Conv2d(w_b, w_out, kernel_size=1, stride=1, padding=0, bias=False) 99 | self.c_bn = nn.BatchNorm2d(w_out) 100 | self.c_bn.final_bn = True 101 | 102 | def forward(self, x): 103 | for layer in self.children(): 104 | x = layer(x) 105 | return x 106 | 107 | 108 | class ResBottleneckBlock(nn.Module): 109 | """Residual bottleneck block: x + F(x), F = bottleneck transform""" 110 | 111 | def __init__(self, w_in, w_out, stride, bm=1.0, gw=1, se_r=None): 112 | super(ResBottleneckBlock, self).__init__() 113 | self._construct(w_in, w_out, stride, bm, gw, se_r) 114 | 115 | def _add_skip_proj(self, w_in, w_out, stride): 116 | self.proj = nn.Conv2d( 117 | w_in, w_out, kernel_size=1, stride=stride, padding=0, bias=False 118 | ) 119 | self.bn = nn.BatchNorm2d(w_out) 120 | 121 | def _construct(self, w_in, w_out, stride, bm, gw, se_r): 122 | # Use skip connection with projection if shape changes 123 | self.proj_block = (w_in != w_out) or (stride != 1) 124 | if self.proj_block: 125 | self._add_skip_proj(w_in, w_out, stride) 126 | self.f = BottleneckTransform(w_in, w_out, stride, bm, gw, se_r) 127 | self.relu = nn.ReLU(True) 128 | 129 | def forward(self, x): 130 | if self.proj_block: 131 | x = self.bn(self.proj(x)) + self.f(x) 132 | else: 133 | x = x + self.f(x) 134 | x = self.relu(x) 135 | return x 136 | 137 | 138 | class AnyHead(nn.Module): 139 | """AnyNet head.""" 140 | 141 | def __init__(self, w_in, nc): 142 | super(AnyHead, self).__init__() 143 | self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) 144 | self.fc = nn.Linear(w_in, nc, bias=True) 145 | 146 | def forward(self, x): 147 | x = self.avg_pool(x) 148 | x = x.view(x.size(0), -1) 149 | x = self.fc(x) 150 | return x 151 | 152 | 153 | class AnyStage(nn.Module): 154 | """AnyNet stage (sequence of blocks w/ the same output shape).""" 155 | 156 | def __init__(self, w_in, w_out, stride, d, block_fun, bm, gw, se_r): 157 | super(AnyStage, self).__init__() 158 | self._construct(w_in, w_out, stride, d, block_fun, bm, gw, se_r) 159 | 160 | def _construct(self, w_in, w_out, stride, d, block_fun, bm, gw, se_r): 161 | # Construct the blocks 162 | for i in range(d): 163 | # Stride and w_in apply to the first block of the stage 164 | b_stride = stride if i == 0 else 1 165 | b_w_in = w_in if i == 0 else w_out 166 | # Construct the block 167 | self.add_module( 168 | "b{}".format(i + 1), block_fun(b_w_in, w_out, b_stride, bm, gw, se_r) 169 | ) 170 | 171 | def forward(self, x): 172 | for block in self.children(): 173 | x = block(x) 174 | return x 175 | 176 | 177 | class AnyNet(nn.Module): 178 | """AnyNet model.""" 179 | 180 | def __init__(self, **kwargs): 181 | super(AnyNet, self).__init__() 182 | if kwargs: 183 | self._construct( 184 | stem_w=kwargs["stem_w"], 185 | ds=kwargs["ds"], 186 | ws=kwargs["ws"], 187 | ss=kwargs["ss"], 188 | bms=kwargs["bms"], 189 | gws=kwargs["gws"], 190 | se_r=kwargs["se_r"], 191 | nc=kwargs["nc"], 192 | ) 193 | for m in self.modules(): 194 | if isinstance(m, nn.Conv2d): 195 | # Note that there is no bias due to BN 196 | fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 197 | m.weight.data.normal_(mean=0.0, std=math.sqrt(2.0 / fan_out)) 198 | elif isinstance(m, nn.BatchNorm2d): 199 | zero_init_gamma = ( 200 | hasattr(m, "final_bn") and m.final_bn 201 | ) 202 | m.weight.data.fill_(0.0 if zero_init_gamma else 1.0) 203 | m.bias.data.zero_() 204 | elif isinstance(m, nn.Linear): 205 | m.weight.data.normal_(mean=0.0, std=0.01) 206 | m.bias.data.zero_() 207 | # self.macs, self.params = self._get_flops() 208 | 209 | def _construct(self, stem_w, ds, ws, ss, bms, gws, se_r, nc): 210 | # logger.info("Constructing AnyNet: ds={}, ws={}".format(ds, ws)) 211 | # Generate dummy bot muls and gs for models that do not use them 212 | bms = bms if bms else [1.0 for _d in ds] 213 | gws = gws if gws else [1 for _d in ds] 214 | # Group params by stage 215 | stage_params = list(zip(ds, ws, ss, bms, gws)) 216 | # Construct the stem 217 | self.stem = SimpleStemIN(3, stem_w) 218 | # Construct the stages 219 | block_fun = ResBottleneckBlock 220 | prev_w = stem_w 221 | for i, (d, w, s, bm, gw) in enumerate(stage_params): 222 | self.add_module( 223 | "s{}".format(i + 1), AnyStage(prev_w, w, s, d, block_fun, bm, gw, se_r) 224 | ) 225 | prev_w = w 226 | # Construct the head 227 | self.head = AnyHead(w_in=prev_w, nc=nc) 228 | 229 | def forward(self, x): 230 | for module in self.children(): 231 | x = module(x) 232 | return x 233 | 234 | def _get_flops(self): 235 | from thop import profile 236 | input = torch.randn(1, 3, 224, 224) 237 | macs, params = profile(self, inputs=(input,)) 238 | return macs, params 239 | 240 | 241 | # load weights from DataParallel pretrained models for cpu. 242 | class WrappedModel(nn.Module): 243 | def __init__(self, module): 244 | super(WrappedModel, self).__init__() 245 | self.module = module 246 | 247 | def forward(self, x): 248 | return self.module(x) -------------------------------------------------------------------------------- /ImageNet/models/regnet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from models.reglayers import AnyNet, WrappedModel 3 | import torch 4 | 5 | regnet_200M_config = {'WA': 36.44, 'W0': 24, 'WM': 2.49, 'DEPTH': 13, 'GROUP_W': 8, 'BOT_MUL': 1} 6 | regnet_400M_config = {'WA': 24.48, 'W0': 24, 'WM': 2.54, 'DEPTH': 22, 'GROUP_W': 16, 'BOT_MUL': 1} 7 | regnet_600M_config = {'WA': 36.97, 'W0': 48, 'WM': 2.24, 'DEPTH': 16, 'GROUP_W': 24, 'BOT_MUL': 1} 8 | regnet_800M_config = {'WA': 35.73, 'W0': 56, 'WM': 2.28, 'DEPTH': 16, 'GROUP_W': 16, 'BOT_MUL': 1} 9 | regnet_1600M_config = {'WA': 34.01, 'W0': 80, 'WM': 2.25, 'DEPTH': 18, 'GROUP_W': 24, 'BOT_MUL': 1} 10 | regnet_3200M_config = {'WA': 26.31, 'W0': 88, 'WM': 2.25, 'DEPTH': 25, 'GROUP_W': 48, 'BOT_MUL': 1} 11 | regnet_4000M_config = {'WA': 38.65, 'W0': 96, 'WM': 2.43, 'DEPTH': 23, 'GROUP_W': 40, 'BOT_MUL': 1} 12 | regnet_6400M_config = {'WA': 60.83, 'W0': 184, 'WM': 2.07, 'DEPTH': 17, 'GROUP_W': 56, 'BOT_MUL': 1} 13 | model_paths = { 14 | 'regnet_200m': '../ckpts/regnet_200m.pth.tar', 15 | 'regnet_400m': '../ckpts/regnet_400m.pth.tar', 16 | 'regnet_600m': '../ckpts/regnet_600m.pth.tar', 17 | 'regnet_800m': '../ckpts/regnet_800m.pth.tar', 18 | 'regnet_1600m': '../ckpts/regnet_1600m.pth.tar', 19 | 'regnet_3200m': '../ckpts/regnet_3200m.pth.tar', 20 | } 21 | 22 | 23 | def quantize_float(f, q): 24 | """Converts a float to closest non-zero int divisible by q.""" 25 | return int(round(f / q) * q) 26 | 27 | 28 | def adjust_ws_gs_comp(ws, bms, gs): 29 | """Adjusts the compatibility of widths and groups.""" 30 | ws_bot = [int(w * b) for w, b in zip(ws, bms)] 31 | gs = [min(g, w_bot) for g, w_bot in zip(gs, ws_bot)] 32 | ws_bot = [quantize_float(w_bot, g) for w_bot, g in zip(ws_bot, gs)] 33 | ws = [int(w_bot / b) for w_bot, b in zip(ws_bot, bms)] 34 | return ws, gs 35 | 36 | 37 | def get_stages_from_blocks(ws, rs): 38 | """Gets ws/ds of network at each stage from per block values.""" 39 | ts_temp = zip(ws + [0], [0] + ws, rs + [0], [0] + rs) 40 | ts = [w != wp or r != rp for w, wp, r, rp in ts_temp] 41 | s_ws = [w for w, t in zip(ws, ts[:-1]) if t] 42 | s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist() 43 | return s_ws, s_ds 44 | 45 | 46 | def generate_regnet(w_a, w_0, w_m, d, q=8): 47 | """Generates per block ws from RegNet parameters.""" 48 | assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0 49 | ws_cont = np.arange(d) * w_a + w_0 50 | ks = np.round(np.log(ws_cont / w_0) / np.log(w_m)) # ks = [0,1,2...,3...] 51 | ws = w_0 * np.power(w_m, ks) # float channel for 4 stages 52 | ws = np.round(np.divide(ws, q)) * q # make it divisible by 8 53 | num_stages, max_stage = len(np.unique(ws)), ks.max() + 1 54 | ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist() 55 | # ws: width list, num_stages: 4, max_stage: 4.0, wscont: float before round width 56 | return ws, num_stages, max_stage, ws_cont 57 | 58 | 59 | class RegNet(AnyNet): 60 | """RegNet model.""" 61 | 62 | def __init__(self, cfg, **kwargs): 63 | # Generate RegNet ws per block 64 | b_ws, num_s, _, _ = generate_regnet( 65 | cfg['WA'], cfg['W0'], cfg['WM'], cfg['DEPTH'] 66 | ) 67 | # Convert to per stage format 68 | ws, ds = get_stages_from_blocks(b_ws, b_ws) 69 | # Generate group widths and bot muls 70 | gws = [cfg['GROUP_W'] for _ in range(num_s)] 71 | bms = [cfg['BOT_MUL'] for _ in range(num_s)] 72 | # Adjust the compatibility of ws and gws 73 | ws, gws = adjust_ws_gs_comp(ws, bms, gws) 74 | # Use the same stride for each stage, stride set to 2 75 | ss = [2 for _ in range(num_s)] 76 | # Use SE for RegNetY 77 | se_r = None 78 | # Construct the model 79 | STEM_W = 32 80 | kwargs = { 81 | "stem_w": STEM_W, 82 | "ss": ss, 83 | "ds": ds, 84 | "ws": ws, 85 | "bms": bms, 86 | "gws": gws, 87 | "se_r": se_r, 88 | "nc": 1000, 89 | } 90 | super(RegNet, self).__init__(**kwargs) 91 | 92 | 93 | def regnet_200M(pretrained=False, **kwargs): 94 | model = RegNet(regnet_200M_config, **kwargs) 95 | if pretrained: 96 | model = WrappedModel(model) 97 | state_dict = torch.load(model_paths['regnet_200m']) 98 | model.load_state_dict(state_dict) 99 | return model 100 | 101 | 102 | def regnet_400M(pretrained=False, **kwargs): 103 | model = RegNet(regnet_400M_config, **kwargs) 104 | if pretrained: 105 | model = WrappedModel(model) 106 | state_dict = torch.load(model_paths['regnet_400m']) 107 | model.load_state_dict(state_dict) 108 | return model 109 | 110 | 111 | def regnet_600M(pretrained=False, **kwargs): 112 | model = RegNet(regnet_600M_config, **kwargs) 113 | if pretrained: 114 | model = WrappedModel(model) 115 | state_dict = torch.load(model_paths['regnet_600m']) 116 | model.load_state_dict(state_dict) 117 | return model 118 | 119 | 120 | def regnet_800M(pretrained=False, **kwargs): 121 | model = RegNet(regnet_800M_config, **kwargs) 122 | if pretrained: 123 | model = WrappedModel(model) 124 | state_dict = torch.load(model_paths['regnet_800m']) 125 | model.load_state_dict(state_dict) 126 | return model 127 | 128 | 129 | def regnet_1600M(pretrained=False, **kwargs): 130 | model = RegNet(regnet_1600M_config, **kwargs) 131 | if pretrained: 132 | model = WrappedModel(model) 133 | state_dict = torch.load(model_paths['regnet_1600m']) 134 | model.load_state_dict(state_dict) 135 | return model 136 | 137 | 138 | def regnet_3200M(pretrained=False, **kwargs): 139 | model = RegNet(regnet_3200M_config, **kwargs) 140 | if pretrained: 141 | model = WrappedModel(model) 142 | state_dict = torch.load(model_paths['regnet_3200m']) 143 | model.load_state_dict(state_dict) 144 | return model 145 | 146 | 147 | def regnet_4000M(pretrained=False, **kwargs): 148 | model = RegNet(regnet_4000M_config, **kwargs) 149 | if pretrained: 150 | model = WrappedModel(model) 151 | state_dict = torch.load(model_paths['regnet_4000m']) 152 | model.load_state_dict(state_dict) 153 | return model 154 | 155 | 156 | def regnet_6400M(pretrained=False, **kwargs): 157 | model = RegNet(regnet_6400M_config, **kwargs) 158 | if pretrained: 159 | model = WrappedModel(model) 160 | state_dict = torch.load(model_paths['regnet_6400m']) 161 | model.load_state_dict(state_dict) 162 | return model 163 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Yuhang Li 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RegNet-Pytorch 2 | 3 | [Designing Network Design Spaces](https://arxiv.org/pdf/2003.13678.pdf) 4 | 5 | ![image-20200427104303632](figs/image-20200427104303632.png) 6 | 7 | ### Prerequisite 8 | 9 | Pytorch 1.1.0+ 10 | 11 | [thop](https://github.com/Lyken17/pytorch-OpCounter) to calculate the flops and params 12 | 13 | ### CIFAR10 14 | 15 | ```bash 16 | cd $PATH-TO-THIS-REPO/CIFAR10 17 | ``` 18 | 19 | For CIFAR10 models, the stride for the first stage is set to 1, so that the input size for the last stage won't become to small (2x2). The STEM part does not contain max pooling layer. 20 | 21 | Run the following command to train a regnet from scratch, (add `-e` to evaluate the pre-trained model) 22 | 23 | ```bas 24 | python main.py -a regnet_200m 25 | ``` 26 | 27 | Here is the summary for the accuracy, params and macs. 28 | 29 | | Models | FLOPs (10^6) | Params (10^6) | Hyper-params | Accuracy | 30 | | ----------- | ------------ | ------------- | ----------------------------------- | -------- | 31 | | RegNet-200M | 62 | 2.31 | batch128_wd0.0001_cos300epoch_lr0.1 | 93.58 | 32 | | RegNet-400M | 126 | 4.77 | batch128_wd0.0001_cos300epoch_lr0.1 | 94.15 | 33 | | RegNet-600M | 192 | 5.67 | batch128_wd0.0001_cos300epoch_lr0.1 | 94.73 | 34 | | RegNet-800M | 258 | 6.60 | batch128_wd0.0001_cos300epoch_lr0.1 | 95.01 | 35 | | RegNet-1.6G | 522 | 8.28 | batch128_wd0.0001_cos300epoch_lr0.1 | 95.45 | 36 | | RegNet-3.2G | 1038 | 14.3 | batch128_wd0.0001_cos300epoch_lr0.1 | 95.53 | 37 | | RegNet-4G | 1298 | 20.8 | batch128_wd0.0001_cos300epoch_lr0.1 | 95.69 | 38 | | RegNet-6.4G | 2108 | 24.6 | batch128_wd0.0001_cos300epoch_lr0.1 | 96.20 | 39 | 40 | ### ImageNet 41 | 42 | For imagenet models, we keep the model and training configuration exactly the same with the original [released codes](https://github.com/facebookresearch/pycls). We train the model using pytorch framework, and the summary of the results is shown below. 43 | 44 | | Models | FLOPs (10^6) | Params (10^6) | Hyper-params | Accuracy (Paper) | Accuracy (Ours) | 45 | | ----------- | ------------ | ------------- | ------------------------------------ | ---------------- | --------------- | 46 | | RegNet-200M | 208 | 2.68 | batch1k_wd0.00005_cos100epoch_lr0.8 | 68.9 | 68.1 | 47 | | RegNet-400M | 410 | 5.15 | batch1k_wd0.00005_cos100epoch_lr0.8 | 72.7 | 72.24 | 48 | | RegNet-600M | 618 | 6.19 | batch1k_wd0.00005_cos100epoch_lr0.8 | 74.1 | 73.94 | 49 | | RegNet-800M | 820 | 7.25 | batch1k_wd0.00005_cos100epoch_lr0.8 | 75.2 | 75.13 | 50 | | RegNet-1.6G | 1635 | 9.19 | batch512_wd0.00005_cos100epoch_lr0.4 | 77.0 | 77.09 | 51 | | RegNet-3.2G | 3233 | 15.3 | batch512_wd0.00005_cos100epoch_lr0.4 | 78.3 | 78.54 | 52 | | RegNet-4G | 4014 | 22.1 | batch512_wd0.00005_cos100epoch_lr0.4 | 78.6 | 79.09 | 53 | | RegNet-6.4G | 6527 | 26.2 | batch512_wd0.00005_cos100epoch_lr0.4 | 79.2 | 79.36 | 54 | 55 | Note: we only uploaded regnet_200MF, 400MF, 600MF in this repo. Other pretrained models can be found in [here](https://drive.google.com/open?id=1lbIBzQNW2CqbmnbAkY8DM5apzq_YXWFc). Use `pretrained = True` to load the pre-trained models. -------------------------------------------------------------------------------- /figs/image-20200427104303632.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhhhli/RegNet-Pytorch/6035ff822595338efec9a4de21134b134c7dcaa8/figs/image-20200427104303632.png --------------------------------------------------------------------------------