├── Squeezenet_loss.jpg ├── README.md ├── temp.py ├── model.py └── main.py /Squeezenet_loss.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gsp-27/pytorch_Squeezenet/HEAD/Squeezenet_loss.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Pytorch Squeeznet 2 | 3 | Pytorch implementation of Squeezenet model as described in https://arxiv.org/abs/1602.07360 on cifar-10 Data. 4 | 5 | The definition of Squeezenet model is present **model.py**. 6 | The training procedure resides in the file **main.py** 7 | 8 | Command to train the Squeezenet model on CIFAR 10 data is: 9 | ```bash 10 | python main.py --batch-size 32 --epoch 10 11 | ``` 12 | Other options which can be used are specified in **main.py** 13 | Eg: if you want to use a pretrained_model 14 | ```bash 15 | python main.py --batch-size 32 --epoch 10 --model_name "pretrained model" 16 | ``` 17 | 18 | I am currently using SGD for training : learning rate and weight decay are currently updated using a 55 epoch learning rule, this usually gives good performance, but if you want to use something of your own, you can specify it by passing **learning_rate** and **weight_decay** parameter like so 19 | 20 | ```bash 21 | python main.py --batch-size 32 --epoch 10 --learning_rate 1e-3 --epoch_55 22 | ``` 23 | -------------------------------------------------------------------------------- /temp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import numpy as np 5 | import torch.optim as optim 6 | import torch.nn.functional as F 7 | import matplotlib.pyplot as plt 8 | 9 | # make the model 10 | class Net(nn.Module): 11 | def __init__(self): 12 | super(Net, self).__init__() 13 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1) 14 | self.relu1 = nn.ReLU(inplace=True) 15 | self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1) 16 | self.relu2 = nn.ReLU(inplace=True) 17 | self.fc1 = nn.Linear(32*32*32, 1024) 18 | self.relu3 = nn.ReLU(inplace=True) 19 | self.fc2 = nn.Linear(1024, 2) 20 | self.softmax = nn.LogSoftmax() 21 | for m in self.modules(): 22 | if isinstance(m, nn.Conv2d): 23 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 24 | m.weight.data.normal_(0, np.sqrt(2. / n)) 25 | 26 | def forward(self, x): 27 | x = self.conv1(x) 28 | x = self.relu1(x) 29 | x = self.conv2(x) 30 | x = self.relu2(x) 31 | x = x.view(-1, 32*32*32) 32 | x = self.fc1(x) 33 | x = self.relu3(x) 34 | x = self.fc2(x) 35 | x = self.softmax(x) 36 | return x 37 | 38 | # create the instance of model 39 | model = Net() 40 | 41 | #generate the data 42 | inp = torch.randn(128, 3, 32, 32) 43 | targets = torch.LongTensor(128) 44 | for i in xrange(0, inp.size()[0]): 45 | throw = np.random.uniform() 46 | if throw > 0.5: 47 | targets[i] = 1 48 | else: 49 | targets[i] = 0 50 | 51 | train_list = torch.split(inp, 16, 0) 52 | targets = torch.split(targets, 16) 53 | 54 | # define an optimizer 55 | optimizer = optim.Adam(model.parameters(), betas=(0.9, 0.999), lr=3e-3, eps=1e-8, weight_decay=0.05) 56 | avg_loss = list() 57 | 58 | # train the model for some number of epochs 59 | def train(epoch): 60 | for i, tr_batch in enumerate(train_list): 61 | data, t = Variable(tr_batch), Variable(targets[i]) 62 | # do the forward pass 63 | scores = model.forward(data) 64 | loss = F.nll_loss(scores, t) 65 | # zero the grad parameters 66 | optimizer.zero_grad() 67 | loss.backward() 68 | optimizer.step() 69 | avg_loss.append(loss.data[0]) 70 | 71 | if i % 2 == 0: 72 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 73 | epoch, (i) * len(data), len(inp), 74 | 100. * (i)*16 / inp.size()[0], loss.data[0])) 75 | # plot the loss 76 | plt.plot(avg_loss) 77 | plt.savefig("avg_loss.jpg") 78 | plt.close() 79 | 80 | if __name__ == '__main__': 81 | epoch = 100 82 | for i in xrange(epoch): 83 | train(i) 84 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import torch.functional as F 5 | import numpy as np 6 | import torch.optim as optim 7 | import math 8 | 9 | class fire(nn.Module): 10 | def __init__(self, inplanes, squeeze_planes, expand_planes): 11 | super(fire, self).__init__() 12 | self.conv1 = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1, stride=1) 13 | self.bn1 = nn.BatchNorm2d(squeeze_planes) 14 | self.relu1 = nn.ReLU(inplace=True) 15 | self.conv2 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=1, stride=1) 16 | self.bn2 = nn.BatchNorm2d(expand_planes) 17 | self.conv3 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=3, stride=1, padding=1) 18 | self.bn3 = nn.BatchNorm2d(expand_planes) 19 | self.relu2 = nn.ReLU(inplace=True) 20 | 21 | # using MSR initilization 22 | for m in self.modules(): 23 | if isinstance(m, nn.Conv2d): 24 | n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels 25 | m.weight.data.normal_(0, math.sqrt(2./n)) 26 | 27 | def forward(self, x): 28 | x = self.conv1(x) 29 | x = self.bn1(x) 30 | x = self.relu1(x) 31 | out1 = self.conv2(x) 32 | out1 = self.bn2(out1) 33 | out2 = self.conv3(x) 34 | out2 = self.bn3(out2) 35 | out = torch.cat([out1, out2], 1) 36 | out = self.relu2(out) 37 | return out 38 | 39 | 40 | class SqueezeNet(nn.Module): 41 | def __init__(self): 42 | super(SqueezeNet, self).__init__() 43 | self.conv1 = nn.Conv2d(3, 96, kernel_size=3, stride=1, padding=1) # 32 44 | self.bn1 = nn.BatchNorm2d(96) 45 | self.relu = nn.ReLU(inplace=True) 46 | self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2) # 16 47 | self.fire2 = fire(96, 16, 64) 48 | self.fire3 = fire(128, 16, 64) 49 | self.fire4 = fire(128, 32, 128) 50 | self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2) # 8 51 | self.fire5 = fire(256, 32, 128) 52 | self.fire6 = fire(256, 48, 192) 53 | self.fire7 = fire(384, 48, 192) 54 | self.fire8 = fire(384, 64, 256) 55 | self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2) # 4 56 | self.fire9 = fire(512, 64, 256) 57 | self.conv2 = nn.Conv2d(512, 10, kernel_size=1, stride=1) 58 | self.avg_pool = nn.AvgPool2d(kernel_size=4, stride=4) 59 | self.softmax = nn.LogSoftmax(dim=1) 60 | for m in self.modules(): 61 | if isinstance(m, nn.Conv2d): 62 | n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels 63 | m.weight.data.normal_(0, math.sqrt(2. / n)) 64 | elif isinstance(m, nn.BatchNorm2d): 65 | m.weight.data.fill_(1) 66 | m.bias.data.zero_() 67 | 68 | 69 | def forward(self, x): 70 | x = self.conv1(x) 71 | x = self.bn1(x) 72 | x = self.relu(x) 73 | x = self.maxpool1(x) 74 | x = self.fire2(x) 75 | x = self.fire3(x) 76 | x = self.fire4(x) 77 | x = self.maxpool2(x) 78 | x = self.fire5(x) 79 | x = self.fire6(x) 80 | x = self.fire7(x) 81 | x = self.fire8(x) 82 | x = self.maxpool3(x) 83 | x = self.fire9(x) 84 | x = self.conv2(x) 85 | x = self.avg_pool(x) 86 | x = self.softmax(x) 87 | return x 88 | 89 | def fire_layer(inp, s, e): 90 | f = fire(inp, s, e) 91 | return f 92 | 93 | def squeezenet(pretrained=False): 94 | net = SqueezeNet() 95 | # inp = Variable(torch.randn(64,3,32,32)) 96 | # out = net.forward(inp) 97 | # print(out.size()) 98 | return net 99 | 100 | # if __name__ == '__main__': 101 | # squeezenet() 102 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | from torch.autograd import Variable 5 | import argparse 6 | import numpy as np 7 | import torchvision.datasets as datasets 8 | import torchvision.transforms as transforms 9 | import os 10 | import model 11 | import torch.nn.functional as F 12 | import matplotlib.pyplot as plt 13 | from IPython import embed 14 | 15 | parser = argparse.ArgumentParser('Options for training SqueezeNet in pytorch') 16 | parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='batch size of train') 17 | parser.add_argument('--epoch', type=int, default=55, metavar='N', help='number of epochs to train for') 18 | parser.add_argument('--learning-rate', type=float, default=0.001, metavar='LR', help='learning rate') 19 | parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='percentage of past parameters to store') 20 | parser.add_argument('--no-cuda', action='store_true', default=False, help='use cuda for training') 21 | parser.add_argument('--log-schedule', type=int, default=10, metavar='N', help='number of epochs to save snapshot after') 22 | parser.add_argument('--seed', type=int, default=1, help='set seed to some constant value to reproduce experiments') 23 | parser.add_argument('--model_name', type=str, default=None, help='Use a pretrained model') 24 | parser.add_argument('--want_to_test', type=bool, default=False, help='make true if you just want to test') 25 | parser.add_argument('--epoch_55', action='store_true', help='would you like to use 55 epoch learning rule') 26 | parser.add_argument('--num_classes', type=int, default=10, help="how many classes training for") 27 | 28 | args = parser.parse_args() 29 | args.cuda = not args.no_cuda and torch.cuda.is_available() 30 | 31 | torch.manual_seed(args.seed) 32 | if args.cuda: 33 | torch.cuda.manual_seed(args.seed) 34 | 35 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} 36 | train_loader = torch.utils.data.DataLoader( 37 | datasets.CIFAR10('../', train=True, download=True, 38 | transform=transforms.Compose([ 39 | transforms.RandomHorizontalFlip(), 40 | transforms.ToTensor(), 41 | transforms.Normalize((0.491399689874, 0.482158419622, 0.446530924224), (0.247032237587, 0.243485133253, 0.261587846975)) 42 | ])), 43 | batch_size=args.batch_size, shuffle=True, **kwargs) 44 | test_loader = torch.utils.data.DataLoader( 45 | datasets.CIFAR10('../', train=False, transform=transforms.Compose([ 46 | transforms.RandomHorizontalFlip(), 47 | transforms.ToTensor(), 48 | transforms.Normalize((0.491399689874, 0.482158419622, 0.446530924224), (0.247032237587, 0.243485133253, 0.261587846975)) 49 | ])), 50 | batch_size=args.batch_size, shuffle=True, **kwargs) 51 | 52 | # get the model and convert it into cuda for if necessary 53 | net = model.SqueezeNet() 54 | if args.model_name is not None: 55 | print("loading pre trained weights") 56 | pretrained_weights = torch.load(args.model_name) 57 | net.load_state_dict(pretrained_weights) 58 | 59 | if args.cuda: 60 | net.cuda() 61 | #print(net) 62 | 63 | # create optimizer 64 | # using the 55 epoch learning rule here 65 | def paramsforepoch(epoch): 66 | p = dict() 67 | regimes = [[1, 18, 5e-3, 5e-4], 68 | [19, 29, 1e-3, 5e-4], 69 | [30, 43, 5e-4, 5e-4], 70 | [44, 52, 1e-4, 0], 71 | [53, 1e8, 1e-5, 0]] 72 | # regimes = [[1, 18, 1e-4, 5e-4], 73 | # [19, 29, 5e-5, 5e-4], 74 | # [30, 43, 1e-5, 5e-4], 75 | # [44, 52, 5e-6, 0], 76 | # [53, 1e8, 1e-6, 0]] 77 | for i, row in enumerate(regimes): 78 | if epoch >= row[0] and epoch <= row[1]: 79 | p['learning_rate'] = row[2] 80 | p['weight_decay'] = row[3] 81 | return p 82 | 83 | avg_loss = list() 84 | best_accuracy = 0.0 85 | fig1, ax1 = plt.subplots() 86 | 87 | 88 | # train the model 89 | # TODO: Compute training accuracy and test accuracy 90 | 91 | # create a temporary optimizer 92 | optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=0.9, weight_decay=5e-4) 93 | 94 | def adjustlrwd(params): 95 | for param_group in optimizer.state_dict()['param_groups']: 96 | param_group['lr'] = params['learning_rate'] 97 | param_group['weight_decay'] = params['weight_decay'] 98 | 99 | # train the network 100 | def train(epoch): 101 | 102 | # set the optimizer for this epoch 103 | if args.epoch_55: 104 | params = paramsforepoch(epoch) 105 | print("Configuring optimizer with lr={:.5f} and weight_decay={:.4f}".format(params['learning_rate'], params['weight_decay'])) 106 | adjustlrwd(params) 107 | ########################################################################### 108 | 109 | global avg_loss 110 | correct = 0 111 | net.train() 112 | for b_idx, (data, targets) in enumerate(train_loader): 113 | # trying to overfit a small data 114 | # if b_idx == 100: 115 | # break 116 | 117 | if args.cuda: 118 | data, targets = data.cuda(), targets.cuda() 119 | # convert the data and targets into Variable and cuda form 120 | data, targets = Variable(data), Variable(targets) 121 | 122 | # train the network 123 | optimizer.zero_grad() 124 | scores = net.forward(data) 125 | scores = scores.view(args.batch_size, args.num_classes) 126 | loss = F.nll_loss(scores, targets) 127 | 128 | # compute the accuracy 129 | pred = scores.data.max(1)[1] # get the index of the max log-probability 130 | correct += pred.eq(targets.data).cpu().sum() 131 | 132 | avg_loss.append(loss.data[0]) 133 | loss.backward() 134 | optimizer.step() 135 | 136 | if b_idx % args.log_schedule == 0: 137 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 138 | epoch, (b_idx+1) * len(data), len(train_loader.dataset), 139 | 100. * (b_idx+1)*len(data) / len(train_loader.dataset), loss.data[0])) 140 | 141 | # also plot the loss, it should go down exponentially at some point 142 | ax1.plot(avg_loss) 143 | fig1.savefig("Squeezenet_loss.jpg") 144 | 145 | # now that the epoch is completed plot the accuracy 146 | train_accuracy = correct / float(len(train_loader.dataset)) 147 | print("training accuracy ({:.2f}%)".format(100*train_accuracy)) 148 | return (train_accuracy*100.0) 149 | 150 | 151 | def val(): 152 | global best_accuracy 153 | correct = 0 154 | net.eval() 155 | for idx, (data, target) in enumerate(test_loader): 156 | if idx == 73: 157 | break 158 | 159 | if args.cuda: 160 | data, target = data.cuda(), target.cuda() 161 | data, target = Variable(data), Variable(target) 162 | 163 | # do the forward pass 164 | score = net.forward(data) 165 | pred = score.data.max(1)[1] # got the indices of the maximum, match them 166 | correct += pred.eq(target.data).cpu().sum() 167 | 168 | print("predicted {} out of {}".format(correct, 73*64)) 169 | val_accuracy = correct / (73.0*64.0) * 100 170 | print("accuracy = {:.2f}".format(val_accuracy)) 171 | 172 | # now save the model if it has better accuracy than the best model seen so forward 173 | if val_accuracy > best_accuracy: 174 | best_accuracy = val_accuracy 175 | # save the model 176 | torch.save(net.state_dict(),'bsqueezenet_onfulldata.pth') 177 | return val_accuracy 178 | 179 | def test(): 180 | # load the best saved model 181 | weights = torch.load('bsqueezenet_onfulldata.pth') 182 | net.load_state_dict(weights) 183 | net.eval() 184 | 185 | test_correct = 0 186 | total_examples = 0 187 | accuracy = 0.0 188 | for idx, (data, target) in enumerate(test_loader): 189 | if idx < 73: 190 | continue 191 | total_examples += len(target) 192 | data, target = Variable(data), Variable(target) 193 | if args.cuda: 194 | data, target = data.cuda(), target.cuda() 195 | 196 | scores = net(data) 197 | pred = scores.data.max(1)[1] 198 | test_correct += pred.eq(target.data).cpu().sum() 199 | print("Predicted {} out of {} correctly".format(test_correct, total_examples)) 200 | return 100.0 * test_correct / (float(total_examples)) 201 | 202 | if __name__ == '__main__': 203 | if not args.want_to_test: 204 | fig2, ax2 = plt.subplots() 205 | train_acc, val_acc = list(), list() 206 | for i in range(1,args.epoch+1): 207 | train_acc.append(train(i)) 208 | val_acc.append(val()) 209 | ax2.plot(train_acc, 'g') 210 | ax2.plot(val_acc, 'b') 211 | fig2.savefig('train_val_accuracy.jpg') 212 | else: 213 | test_acc = test() 214 | print("Testing accuracy on CIFAR-10 data is {:.2f}%".format(test_acc)) 215 | --------------------------------------------------------------------------------