├── Squeezenet_loss.jpg
├── README.md
├── temp.py
├── model.py
└── main.py


/Squeezenet_loss.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gsp-27/pytorch_Squeezenet/HEAD/Squeezenet_loss.jpg


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ### Pytorch Squeeznet
 2 | 
 3 | Pytorch implementation of Squeezenet model as described in https://arxiv.org/abs/1602.07360 on cifar-10 Data.
 4 | 
 5 | The definition of Squeezenet model is present **model.py**.
 6 | The training procedure resides in the file **main.py**
 7 | 
 8 | Command to train the Squeezenet model on CIFAR 10 data is:
 9 | ```bash
10 | python main.py --batch-size 32 --epoch 10
11 | ```
12 | Other options which can be used are specified in **main.py**
13 | Eg: if you want to use a pretrained_model
14 | ```bash
15 | python main.py --batch-size 32 --epoch 10 --model_name "pretrained model"
16 | ```
17 | 
18 | I am currently using SGD for training : learning rate and weight decay are currently updated using a 55 epoch learning rule, this usually gives good performance, but if you want to use something of your own, you can specify it by passing **learning_rate** and **weight_decay** parameter like so
19 | 
20 | ```bash
21 | python main.py --batch-size 32 --epoch 10 --learning_rate 1e-3 --epoch_55
22 | ```
23 | 


--------------------------------------------------------------------------------
/temp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | import numpy as np
 5 | import torch.optim as optim
 6 | import torch.nn.functional as F
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | # make the model
10 | class Net(nn.Module):
11 |     def __init__(self):
12 |         super(Net, self).__init__()
13 |         self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
14 |         self.relu1 = nn.ReLU(inplace=True)
15 |         self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
16 |         self.relu2 = nn.ReLU(inplace=True)
17 |         self.fc1 = nn.Linear(32*32*32, 1024)
18 |         self.relu3 = nn.ReLU(inplace=True)
19 |         self.fc2 = nn.Linear(1024, 2)
20 |         self.softmax = nn.LogSoftmax()
21 |         for m in self.modules():
22 |             if isinstance(m, nn.Conv2d):
23 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
24 |                 m.weight.data.normal_(0, np.sqrt(2. / n))
25 | 
26 |     def forward(self, x):
27 |         x = self.conv1(x)
28 |         x = self.relu1(x)
29 |         x = self.conv2(x)
30 |         x = self.relu2(x)
31 |         x = x.view(-1, 32*32*32)
32 |         x = self.fc1(x)
33 |         x = self.relu3(x)
34 |         x = self.fc2(x)
35 |         x = self.softmax(x)
36 |         return x
37 | 
38 | # create the instance of model
39 | model = Net()
40 | 
41 | #generate the data
42 | inp = torch.randn(128, 3, 32, 32)
43 | targets = torch.LongTensor(128)
44 | for i in xrange(0, inp.size()[0]):
45 |     throw = np.random.uniform()
46 |     if throw > 0.5:
47 |         targets[i] = 1
48 |     else:
49 |         targets[i] = 0
50 | 
51 | train_list = torch.split(inp, 16, 0)
52 | targets = torch.split(targets, 16)
53 | 
54 | # define an optimizer
55 | optimizer = optim.Adam(model.parameters(), betas=(0.9, 0.999), lr=3e-3, eps=1e-8, weight_decay=0.05)
56 | avg_loss = list()
57 | 
58 | # train the model for some number of epochs
59 | def train(epoch):
60 |     for i, tr_batch in enumerate(train_list):
61 |         data, t = Variable(tr_batch), Variable(targets[i])
62 |         # do the forward pass
63 |         scores = model.forward(data)
64 |         loss = F.nll_loss(scores, t)
65 |         # zero the grad parameters
66 |         optimizer.zero_grad()
67 |         loss.backward()
68 |         optimizer.step()
69 |         avg_loss.append(loss.data[0])
70 | 
71 |         if i % 2 == 0:
72 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
73 |                 epoch, (i) * len(data), len(inp),
74 |                 100. * (i)*16 / inp.size()[0], loss.data[0]))
75 |             # plot the loss
76 |             plt.plot(avg_loss)
77 |             plt.savefig("avg_loss.jpg")
78 |     plt.close()
79 | 
80 | if __name__ == '__main__':
81 |     epoch = 100
82 |     for i in xrange(epoch):
83 |         train(i)
84 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | import torch.functional as F
  5 | import numpy as np
  6 | import torch.optim as optim
  7 | import math
  8 | 
  9 | class fire(nn.Module):
 10 |     def __init__(self, inplanes, squeeze_planes, expand_planes):
 11 |         super(fire, self).__init__()
 12 |         self.conv1 = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1, stride=1)
 13 |         self.bn1 = nn.BatchNorm2d(squeeze_planes)
 14 |         self.relu1 = nn.ReLU(inplace=True)
 15 |         self.conv2 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=1, stride=1)
 16 |         self.bn2 = nn.BatchNorm2d(expand_planes)
 17 |         self.conv3 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=3, stride=1, padding=1)
 18 |         self.bn3 = nn.BatchNorm2d(expand_planes)
 19 |         self.relu2 = nn.ReLU(inplace=True)
 20 | 
 21 |         # using MSR initilization
 22 |         for m in self.modules():
 23 |             if isinstance(m, nn.Conv2d):
 24 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
 25 |                 m.weight.data.normal_(0, math.sqrt(2./n))
 26 | 
 27 |     def forward(self, x):
 28 |         x = self.conv1(x)
 29 |         x = self.bn1(x)
 30 |         x = self.relu1(x)
 31 |         out1 = self.conv2(x)
 32 |         out1 = self.bn2(out1)
 33 |         out2 = self.conv3(x)
 34 |         out2 = self.bn3(out2)
 35 |         out = torch.cat([out1, out2], 1)
 36 |         out = self.relu2(out)
 37 |         return out
 38 | 
 39 | 
 40 | class SqueezeNet(nn.Module):
 41 |     def __init__(self):
 42 |         super(SqueezeNet, self).__init__()
 43 |         self.conv1 = nn.Conv2d(3, 96, kernel_size=3, stride=1, padding=1) # 32
 44 |         self.bn1 = nn.BatchNorm2d(96)
 45 |         self.relu = nn.ReLU(inplace=True)
 46 |         self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2) # 16
 47 |         self.fire2 = fire(96, 16, 64)
 48 |         self.fire3 = fire(128, 16, 64)
 49 |         self.fire4 = fire(128, 32, 128)
 50 |         self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2) # 8
 51 |         self.fire5 = fire(256, 32, 128)
 52 |         self.fire6 = fire(256, 48, 192)
 53 |         self.fire7 = fire(384, 48, 192)
 54 |         self.fire8 = fire(384, 64, 256)
 55 |         self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2) # 4
 56 |         self.fire9 = fire(512, 64, 256)
 57 |         self.conv2 = nn.Conv2d(512, 10, kernel_size=1, stride=1)
 58 |         self.avg_pool = nn.AvgPool2d(kernel_size=4, stride=4)
 59 |         self.softmax = nn.LogSoftmax(dim=1)
 60 |         for m in self.modules():
 61 |             if isinstance(m, nn.Conv2d):
 62 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
 63 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 64 |             elif isinstance(m, nn.BatchNorm2d):
 65 |                 m.weight.data.fill_(1)
 66 |                 m.bias.data.zero_()
 67 | 
 68 | 
 69 |     def forward(self, x):
 70 |         x = self.conv1(x)
 71 |         x = self.bn1(x)
 72 |         x = self.relu(x)
 73 |         x = self.maxpool1(x)
 74 |         x = self.fire2(x)
 75 |         x = self.fire3(x)
 76 |         x = self.fire4(x)
 77 |         x = self.maxpool2(x)
 78 |         x = self.fire5(x)
 79 |         x = self.fire6(x)
 80 |         x = self.fire7(x)
 81 |         x = self.fire8(x)
 82 |         x = self.maxpool3(x)
 83 |         x = self.fire9(x)
 84 |         x = self.conv2(x)
 85 |         x = self.avg_pool(x)
 86 |         x = self.softmax(x)
 87 |         return x
 88 | 
 89 | def fire_layer(inp, s, e):
 90 |     f = fire(inp, s, e)
 91 |     return f
 92 | 
 93 | def squeezenet(pretrained=False):
 94 |     net = SqueezeNet()
 95 |     # inp = Variable(torch.randn(64,3,32,32))
 96 |     # out = net.forward(inp)
 97 |     # print(out.size())
 98 |     return net
 99 | 
100 | # if __name__ == '__main__':
101 | #     squeezenet()
102 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | from torch.autograd import Variable
  5 | import argparse
  6 | import numpy as np
  7 | import torchvision.datasets as datasets
  8 | import torchvision.transforms as transforms
  9 | import os
 10 | import model
 11 | import torch.nn.functional as F
 12 | import matplotlib.pyplot as plt
 13 | from IPython import embed
 14 | 
 15 | parser = argparse.ArgumentParser('Options for training SqueezeNet in pytorch')
 16 | parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='batch size of train')
 17 | parser.add_argument('--epoch', type=int, default=55, metavar='N', help='number of epochs to train for')
 18 | parser.add_argument('--learning-rate', type=float, default=0.001, metavar='LR', help='learning rate')
 19 | parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='percentage of past parameters to store')
 20 | parser.add_argument('--no-cuda', action='store_true', default=False, help='use cuda for training')
 21 | parser.add_argument('--log-schedule', type=int, default=10, metavar='N', help='number of epochs to save snapshot after')
 22 | parser.add_argument('--seed', type=int, default=1, help='set seed to some constant value to reproduce experiments')
 23 | parser.add_argument('--model_name', type=str, default=None, help='Use a pretrained model')
 24 | parser.add_argument('--want_to_test', type=bool, default=False, help='make true if you just want to test')
 25 | parser.add_argument('--epoch_55', action='store_true', help='would you like to use 55 epoch learning rule')
 26 | parser.add_argument('--num_classes', type=int, default=10, help="how many classes training for")
 27 | 
 28 | args = parser.parse_args()
 29 | args.cuda = not args.no_cuda and torch.cuda.is_available()
 30 | 
 31 | torch.manual_seed(args.seed)
 32 | if args.cuda:
 33 |     torch.cuda.manual_seed(args.seed)
 34 | 
 35 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
 36 | train_loader = torch.utils.data.DataLoader(
 37 |     datasets.CIFAR10('../', train=True, download=True,
 38 |                    transform=transforms.Compose([
 39 |                        transforms.RandomHorizontalFlip(),
 40 |                        transforms.ToTensor(),
 41 |                        transforms.Normalize((0.491399689874, 0.482158419622, 0.446530924224), (0.247032237587, 0.243485133253, 0.261587846975))
 42 |                    ])),
 43 |     batch_size=args.batch_size, shuffle=True, **kwargs)
 44 | test_loader = torch.utils.data.DataLoader(
 45 |     datasets.CIFAR10('../', train=False, transform=transforms.Compose([
 46 |                        transforms.RandomHorizontalFlip(),
 47 |                        transforms.ToTensor(),
 48 |                        transforms.Normalize((0.491399689874, 0.482158419622, 0.446530924224), (0.247032237587, 0.243485133253, 0.261587846975))
 49 |                    ])),
 50 |     batch_size=args.batch_size, shuffle=True, **kwargs)
 51 | 
 52 | # get the model and convert it into cuda for if necessary
 53 | net  = model.SqueezeNet()
 54 | if args.model_name is not None:
 55 |     print("loading pre trained weights")
 56 |     pretrained_weights = torch.load(args.model_name)
 57 |     net.load_state_dict(pretrained_weights)
 58 | 
 59 | if args.cuda:
 60 |     net.cuda()
 61 | #print(net)
 62 | 
 63 | # create optimizer
 64 | # using the 55 epoch learning rule here
 65 | def paramsforepoch(epoch):
 66 |     p = dict()
 67 |     regimes = [[1, 18, 5e-3, 5e-4],
 68 |                [19, 29, 1e-3, 5e-4],
 69 |                [30, 43, 5e-4, 5e-4],
 70 |                [44, 52, 1e-4, 0],
 71 |                [53, 1e8, 1e-5, 0]]
 72 |     # regimes = [[1, 18, 1e-4, 5e-4],
 73 |     #            [19, 29, 5e-5, 5e-4],
 74 |     #            [30, 43, 1e-5, 5e-4],
 75 |     #            [44, 52, 5e-6, 0],
 76 |     #            [53, 1e8, 1e-6, 0]]
 77 |     for i, row in enumerate(regimes):
 78 |         if epoch >= row[0] and epoch <= row[1]:
 79 |             p['learning_rate'] = row[2]
 80 |             p['weight_decay'] = row[3]
 81 |     return p
 82 | 
 83 | avg_loss = list()
 84 | best_accuracy = 0.0
 85 | fig1, ax1 = plt.subplots()
 86 | 
 87 | 
 88 | # train the model
 89 | # TODO: Compute training accuracy and test accuracy
 90 | 
 91 | # create a temporary optimizer
 92 | optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=0.9, weight_decay=5e-4)
 93 | 
 94 | def adjustlrwd(params):
 95 |     for param_group in optimizer.state_dict()['param_groups']:
 96 |         param_group['lr'] = params['learning_rate']
 97 |         param_group['weight_decay'] = params['weight_decay']
 98 | 
 99 | # train the network
100 | def train(epoch):
101 | 
102 |     # set the optimizer for this epoch
103 |     if args.epoch_55:
104 |         params = paramsforepoch(epoch)
105 |         print("Configuring optimizer with lr={:.5f} and weight_decay={:.4f}".format(params['learning_rate'], params['weight_decay']))
106 |         adjustlrwd(params)
107 |     ###########################################################################
108 | 
109 |     global avg_loss
110 |     correct = 0
111 |     net.train()
112 |     for b_idx, (data, targets) in enumerate(train_loader):
113 |         # trying to overfit a small data
114 |         # if b_idx == 100:
115 |         #     break
116 | 
117 |         if args.cuda:
118 |             data, targets = data.cuda(), targets.cuda()
119 |         # convert the data and targets into Variable and cuda form
120 |         data, targets = Variable(data), Variable(targets)
121 | 
122 |         # train the network
123 |         optimizer.zero_grad()
124 |         scores = net.forward(data)
125 |         scores = scores.view(args.batch_size, args.num_classes)
126 |         loss = F.nll_loss(scores, targets)
127 | 
128 |         # compute the accuracy
129 |         pred = scores.data.max(1)[1] # get the index of the max log-probability
130 |         correct += pred.eq(targets.data).cpu().sum()
131 | 
132 |         avg_loss.append(loss.data[0])
133 |         loss.backward()
134 |         optimizer.step()
135 | 
136 |         if b_idx % args.log_schedule == 0:
137 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
138 |                 epoch, (b_idx+1) * len(data), len(train_loader.dataset),
139 |                 100. * (b_idx+1)*len(data) / len(train_loader.dataset), loss.data[0]))
140 | 
141 |             # also plot the loss, it should go down exponentially at some point
142 |             ax1.plot(avg_loss)
143 |             fig1.savefig("Squeezenet_loss.jpg")
144 | 
145 |     # now that the epoch is completed plot the accuracy
146 |     train_accuracy = correct / float(len(train_loader.dataset))
147 |     print("training accuracy ({:.2f}%)".format(100*train_accuracy))
148 |     return (train_accuracy*100.0)
149 | 
150 | 
151 | def val():
152 |     global best_accuracy
153 |     correct = 0
154 |     net.eval()
155 |     for idx, (data, target) in enumerate(test_loader):
156 |         if idx == 73:
157 |             break
158 | 
159 |         if args.cuda:
160 |             data, target = data.cuda(), target.cuda()
161 |         data, target = Variable(data), Variable(target)
162 | 
163 |         # do the forward pass
164 |         score = net.forward(data)
165 |         pred = score.data.max(1)[1] # got the indices of the maximum, match them
166 |         correct += pred.eq(target.data).cpu().sum()
167 | 
168 |     print("predicted {} out of {}".format(correct, 73*64))
169 |     val_accuracy = correct / (73.0*64.0) * 100
170 |     print("accuracy = {:.2f}".format(val_accuracy))
171 | 
172 |     # now save the model if it has better accuracy than the best model seen so forward
173 |     if val_accuracy > best_accuracy:
174 |         best_accuracy = val_accuracy
175 |         # save the model
176 |         torch.save(net.state_dict(),'bsqueezenet_onfulldata.pth')
177 |     return val_accuracy
178 | 
179 | def test():
180 |     # load the best saved model
181 |     weights = torch.load('bsqueezenet_onfulldata.pth')
182 |     net.load_state_dict(weights)
183 |     net.eval()
184 | 
185 |     test_correct = 0
186 |     total_examples = 0
187 |     accuracy = 0.0
188 |     for idx, (data, target) in enumerate(test_loader):
189 |         if idx < 73:
190 |             continue
191 |         total_examples += len(target)
192 |         data, target = Variable(data), Variable(target)
193 |         if args.cuda:
194 |             data, target = data.cuda(), target.cuda()
195 | 
196 |         scores = net(data)
197 |         pred = scores.data.max(1)[1]
198 |         test_correct += pred.eq(target.data).cpu().sum()
199 |     print("Predicted {} out of {} correctly".format(test_correct, total_examples))
200 |     return 100.0 * test_correct / (float(total_examples))
201 | 
202 | if __name__ == '__main__':
203 |     if not args.want_to_test:
204 |         fig2, ax2 = plt.subplots()
205 |         train_acc, val_acc = list(), list()
206 |         for i in range(1,args.epoch+1):
207 |             train_acc.append(train(i))
208 |             val_acc.append(val())
209 |             ax2.plot(train_acc, 'g')
210 |             ax2.plot(val_acc, 'b')
211 |             fig2.savefig('train_val_accuracy.jpg')
212 |     else:
213 |         test_acc = test()
214 |         print("Testing accuracy on CIFAR-10 data is {:.2f}%".format(test_acc))
215 | 


--------------------------------------------------------------------------------