├── .gitignore
├── 2048_5.png
├── LICENSE
├── NeumannOptimizerNumpy.py
├── README.md
├── __init__.py
├── main.py
├── misc
    ├── NeumannOptimizerFinal.pdf
    ├── adam_skeleton.py
    └── neumann.pdf
├── models
    ├── 128_3.pkl
    ├── 2048_3.pkl
    ├── 2048_5.pkl
    ├── 256_3.pkl
    ├── 256_5.pkl
    ├── 32_5.pkl
    ├── ImageClassifier.py
    ├── __init__.py
    ├── cnn.py
    ├── linear_regression
    │   ├── Linear Regression.ipynb
    │   └── test.py
    ├── mlp.py
    ├── modules
    │   ├── Net.py
    │   └── __init__.py
    ├── optimizer
    │   ├── __init__.py
    │   ├── neumann.py
    │   └── stochastic.py
    └── plot.ipynb
├── slr10.csv
└── test.ipynb


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Jupyter Notebook
 7 | .ipynb_checkpoints
 8 | 
 9 | 
10 | .vscode/
11 | data/
12 | .idea/
13 | dataset/
14 | 


--------------------------------------------------------------------------------
/2048_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/2048_5.png


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Jay
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NeumannOptimizerNumpy.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import time
  3 | from math import exp
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | def gradient_descent( func, initial_x, eps=1e-5, maximum_iterations=65536, learning_rate=1e-2 ):
  7 |     """
  8 |     Gradient Descent
  9 |     func:               the function to optimize It is called as "value, gradient = func( x, 1 )
 10 |     initial_x:          the starting point, should be a float
 11 |     eps:                the maximum allowed error in the resulting stepsize t
 12 |     maximum_iterations: the maximum allowed number of iterations
 13 |     linesearch:         the linesearch routine
 14 |     *linesearch_args:   the extra arguments of linesearch routine
 15 |     """
 16 | 
 17 |     if eps <= 0:
 18 |         raise ValueError("Epsilon must be positive")
 19 |     x = np.matrix(initial_x)
 20 | 
 21 |     # initialization
 22 |     values = []
 23 |     runtimes = []
 24 |     xs = []
 25 |     start_time = time.time()
 26 |     iterations = 0
 27 | 
 28 |     # gradient updates
 29 |     while True:
 30 | 
 31 |         value, gradient = func( x , 1 )
 32 |         value = np.double( value )
 33 |         gradient = np.matrix( gradient )
 34 | 
 35 |         # updating the logs
 36 |         values.append( value )
 37 |         runtimes.append( time.time() - start_time )
 38 |         xs.append( x.copy() )
 39 | 
 40 |         direction = -gradient
 41 | 
 42 |         if np.linalg.norm(direction)<eps:
 43 |             break
 44 | 
 45 |         t = learning_rate
 46 | 
 47 |         x = x + t * direction
 48 | 
 49 |         iterations += 1
 50 |         if iterations >= maximum_iterations:
 51 |             break
 52 |     return (x, values, runtimes, xs)
 53 | 
 54 | def linear_regression(x, y, w, b, order=0):
 55 |     output = w*x.T + b
 56 |     error = np.mean((y-output)**2)
 57 |     if order == 1:
 58 |         grad_w = -2*x.T*(y-(w*x.T + b))
 59 |         grad_b = -2*(y-(w*x.T + b))
 60 |         grad_w = np.mean(grad_w)
 61 |         grad_b = np.mean(grad_b)
 62 |         return output, grad_w, grad_b
 63 |     return output
 64 | 
 65 | def boyd_example_func(x, order=0):
 66 |   a=np.matrix('1  3')
 67 |   b=np.matrix('1  -3')
 68 |   c=np.matrix('-1  0')
 69 |   x=np.asmatrix(x)
 70 | 
 71 |   value = exp(a*x-0.1)+exp(b*x-0.1)+exp(c*x-0.1)
 72 |   if order==0:
 73 |       return value
 74 |   elif order==1:
 75 |       gradient = a.T*exp(a*x-0.1)+b.T*exp(b*x-0.1)+c.T*exp(c*x-0.1)
 76 |       return (value, gradient)
 77 |   elif order==2:
 78 |       gradient = a.T*exp(a*x-0.1)+b.T*exp(b*x-0.1)+c.T*exp(c*x-0.1)
 79 |       hessian = a.T*a*exp(a*x-0.1)+b.T*b*exp(b*x-0.1)+c.T*c*exp(c*x-0.1)
 80 |       return (value, gradient, hessian)
 81 |   else:
 82 |         raise ValueError("The argument \"order\" should be 0, 1 or 2")
 83 | 
 84 | def neumann( func, initial_x, learning_rate=1e-2, eps=1e-5, maximum_iterations=65536):
 85 |     x = np.matrix(initial_x)
 86 |     # moving_average = x
 87 |     neumann_iterate = 0
 88 |     iterate = 0
 89 |     k_value = 10
 90 |     values = []
 91 |     runtimes = []
 92 |     xs = []
 93 |     grad_norm = []
 94 |     start_time = time.time()
 95 |     while True:
 96 |         print(x)
 97 |         if iterate < 5:
 98 |             value, grad = func(x, 1)
 99 |             x = x - learning_rate*grad
100 |             iterate += 1
101 |             continue
102 | 
103 |         values.append( value )
104 |         runtimes.append( time.time() - start_time )
105 |         xs.append( x.copy() )
106 | 
107 |         eta = 0.5/iterate
108 |         mu = iterate/(iterate + 1)
109 |         mu = min(max(mu, 0.5),0.9)
110 | 
111 |         value, grad = func(x, 1)
112 | 
113 |         grad_norm.append(np.linalg.norm(grad)**2)
114 | 
115 |         if np.linalg.norm(grad)**2 < eps:
116 |             break
117 | 
118 |         if iterate % k_value == 0:
119 |             neumann_iterate = -eta*grad
120 |             k_value *= 2
121 | 
122 |         #Removing crazy function as we're only trying on convex function
123 | 
124 |         neumann_iterate = mu*neumann_iterate - eta*grad
125 | 
126 |         x = x + mu*neumann_iterate - eta*grad
127 |         # moving_average =
128 |         iterate += 1
129 |         if iterate >= maximum_iterations:
130 |             break
131 |     return x,values,runtimes,xs,grad_norm
132 | 
133 | 
134 | def draw_contour( func, neumann_xs, fig, levels=np.arange(5, 1000, 10), x=np.arange(-5, 5.1, 0.05), y=np.arange(-5, 5.1, 0.05)):
135 |     """
136 |     Draws a contour plot of given iterations for a function
137 |     func:       the contour levels will be drawn based on the values of func
138 |     gd_xs:      gradient descent iterates
139 |     newton_xs:  Newton iterates
140 |     fig:        figure index
141 |     levels:     levels of the contour plot
142 |     x:          x coordinates to evaluate func and draw the plot
143 |     y:          y coordinates to evaluate func and draw the plot
144 |     """
145 |     Z = np.zeros((len(x), len(y)))
146 |     for i in range(len(x)):
147 |         for j in range(len(y)):
148 |             Z[i, j] = func( np.matrix([x[i],y[j]]).T , 0 )
149 | 
150 |     plt.figure(fig)
151 |     plt.contour( x, y, Z.T, levels, colors='0.75')
152 |     plt.ion()
153 |     plt.show()
154 | 
155 |     # line_gd, = plt.plot( gd_xs[0][0,0], gd_xs[0][1,0], linewidth=2, color='r', marker='o', label='GD' )
156 |     line_newton, = plt.plot( neumann_xs[0][0,0], neumann_xs[0][1,0], linewidth=2, color='m', marker='o',label='Neumann' )
157 | 
158 |     L = plt.legend(handles=[line_newton])
159 |     plt.draw()
160 |     time.sleep(1)
161 | 
162 |     for i in range( 1, len(neumann_xs)):
163 | 
164 |         # line_gd.set_xdata( np.append( line_gd.get_xdata(), gd_xs[ min(i,len(gd_xs)-1) ][0,0] ) )
165 |         # line_gd.set_ydata( np.append( line_gd.get_ydata(), gd_xs[ min(i,len(gd_xs)-1) ][1,0] ) )
166 | 
167 |         line_newton.set_xdata( np.append( line_newton.get_xdata(), neumann_xs[ min(i,len(neumann_xs)-1) ][0,0] ) )
168 |         line_newton.set_ydata( np.append( line_newton.get_ydata(), neumann_xs[ min(i,len(neumann_xs)-1) ][1,0] ) )
169 | 
170 | 
171 |         # L.get_texts()[0].set_text( " GD, %d iterations" % min(i,len(gd_xs)-1) )
172 |         L.get_texts()[0].set_text( " Neumann, %d iterations" % min(i,len(neumann_xs)-1) )
173 | 
174 |         plt.draw()
175 |         input("Press Enter to continue...")
176 | 
177 | 
178 | initial_x = np.matrix('-1.0; -1.0')
179 | 
180 | x, values, runtimes, neumann_xs, grad_norm = neumann(boyd_example_func, initial_x)
181 | x_gd, gd_values, runtimes_gd, gradient_xs = gradient_descent(boyd_example_func, initial_x)
182 | plt.figure(1)
183 | line_gd, = plt.semilogy([x for x in values], linewidth=2, color='r', marker='o', label='Neumann')
184 | line_neumann, = plt.semilogy([x for x in gd_values], linewidth=2, color='b', marker='o', label='Neumann')
185 | plt.figure(2)
186 | plt.semilogy([x for x in grad_norm], linewidth=2, color='b', marker='o', label='Neumann')
187 | draw_contour( boyd_example_func, neumann_xs, 3, levels=np.arange(0, 15, 1), x=np.arange(-2, 2, 0.1), y=np.arange(-2, 2, 0.1))
188 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # neumann-optimizer


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.insert(0,"models/optimizer")
3 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | # import torch.optim as optim
  7 | import models.optimizer as optim
  8 | from torchvision import datasets, transforms
  9 | 
 10 | # Training settings
 11 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
 12 | parser.add_argument('--batch-size', type=int, default=64, metavar='N',
 13 |                     help='input batch size for training (default: 64)')
 14 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
 15 |                     help='input batch size for testing (default: 1000)')
 16 | parser.add_argument('--epochs', type=int, default=10, metavar='N',
 17 |                     help='number of epochs to train (default: 10)')
 18 | parser.add_argument('--lr', type=float, default=1e-3, metavar='LR',
 19 |                     help='learning rate (default: 0.01)')
 20 | parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
 21 |                     help='SGD momentum (default: 0.5)')
 22 | parser.add_argument('--no-cuda', action='store_true', default=False,
 23 |                     help='disables CUDA training')
 24 | parser.add_argument('--seed', type=int, default=1, metavar='S',
 25 |                     help='random seed (default: 1)')
 26 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
 27 |                     help='how many batches to wait before logging training status')
 28 | args = parser.parse_args()
 29 | use_cuda = not args.no_cuda and torch.cuda.is_available()
 30 | 
 31 | torch.manual_seed(args.seed)
 32 | 
 33 | device = torch.device("cuda" if use_cuda else "cpu")
 34 | 
 35 | 
 36 | kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
 37 | train_loader = torch.utils.data.DataLoader(
 38 |     datasets.MNIST('../data', train=True, download=True,
 39 |                    transform=transforms.Compose([
 40 |                        transforms.ToTensor(),
 41 |                        transforms.Normalize((0.1307,), (0.3081,))
 42 |                    ])),
 43 |     batch_size=args.batch_size, shuffle=True, **kwargs)
 44 | test_loader = torch.utils.data.DataLoader(
 45 |     datasets.MNIST('../data', train=False, transform=transforms.Compose([
 46 |                        transforms.ToTensor(),
 47 |                        transforms.Normalize((0.1307,), (0.3081,))
 48 |                    ])),
 49 |     batch_size=args.test_batch_size, shuffle=True, **kwargs)
 50 | 
 51 | 
 52 | class Net(nn.Module):
 53 |     def __init__(self):
 54 |         super(Net, self).__init__()
 55 |         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
 56 |         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
 57 |         self.conv2_drop = nn.Dropout2d()
 58 |         self.fc1 = nn.Linear(320, 50)
 59 |         self.fc2 = nn.Linear(50, 10)
 60 | 
 61 |     def forward(self, x):
 62 |         x = F.relu(F.max_pool2d(self.conv1(x), 2))
 63 |         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
 64 |         x = x.view(-1, 320)
 65 |         x = F.relu(self.fc1(x))
 66 |         x = F.dropout(x, training=self.training)
 67 |         x = self.fc2(x)
 68 |         return F.log_softmax(x, dim=1)
 69 | 
 70 | model = Net().to(device)
 71 | 
 72 | 
 73 | alpha = 1e-3
 74 | beta = 1e-9
 75 | lr = 0.001
 76 | optimizer = optim.Neumann(list(model.parameters()), lr=lr, alpha=alpha, beta=beta)
 77 | 
 78 | def train(epoch):
 79 |     model.train()
 80 |     for batch_idx, (data, target) in enumerate(train_loader):
 81 |         data, target = data.to(device), target.to(device)
 82 |         optimizer.zero_grad()
 83 |         output = model(data)
 84 |         loss = F.nll_loss(output, target)
 85 |         loss.backward()
 86 |         optimizer.step()
 87 |         if batch_idx % args.log_interval == 0:
 88 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
 89 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
 90 |                 100. * batch_idx / len(train_loader), loss.item()))
 91 | 
 92 | def test():
 93 |     model.eval()
 94 |     test_loss = 0
 95 |     correct = 0
 96 |     with torch.no_grad():
 97 |         for data, target in test_loader:
 98 |             data, target = data.to(device), target.to(device)
 99 |             output = model(data)
100 |             test_loss += F.nll_loss(output, target, size_average=False).item() # sum up batch loss
101 |             pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
102 |             correct += pred.eq(target.view_as(pred)).sum().item()
103 | 
104 |     test_loss /= len(test_loader.dataset)
105 |     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
106 |         test_loss, correct, len(test_loader.dataset),
107 |         100. * correct / len(test_loader.dataset)))
108 | 
109 | 
110 | for epoch in range(1, args.epochs + 1):
111 |     train(epoch)
112 |     test()
113 | 


--------------------------------------------------------------------------------
/misc/NeumannOptimizerFinal.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/misc/NeumannOptimizerFinal.pdf


--------------------------------------------------------------------------------
/misc/adam_skeleton.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | from .optimizer import Optimizer
  4 | 
  5 | 
  6 | class Adam(Optimizer):
  7 |     """Implements Adam algorithm.
  8 |     It has been proposed in `Adam: A Method for Stochastic Optimization`_.
  9 |     Arguments:
 10 |         params (iterable): iterable of parameters to optimize or dicts defining
 11 |             parameter groups
 12 |         lr (float, optional): learning rate (default: 1e-3)
 13 |         betas (Tuple[float, float], optional): coefficients used for computing
 14 |             running averages of gradient and its square (default: (0.9, 0.999))
 15 |         eps (float, optional): term added to the denominator to improve
 16 |             numerical stability (default: 1e-8)
 17 |         weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
 18 |         amsgrad (boolean, optional): whether to use the AMSGrad variant of this
 19 |             algorithm from the paper `On the Convergence of Adam and Beyond`_
 20 |     .. _Adam\: A Method for Stochastic Optimization:
 21 |         https://arxiv.org/abs/1412.6980
 22 |     .. _On the Convergence of Adam and Beyond:
 23 |         https://openreview.net/forum?id=ryQu7f-RZ
 24 |     """
 25 | 
 26 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
 27 |                  weight_decay=0, amsgrad=False):
 28 |         if not 0.0 <= lr:
 29 |             raise ValueError("Invalid learning rate: {}".format(lr))
 30 |         if not 0.0 <= eps:
 31 |             raise ValueError("Invalid epsilon value: {}".format(eps))
 32 |         if not 0.0 <= betas[0] < 1.0:
 33 |             raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
 34 |         if not 0.0 <= betas[1] < 1.0:
 35 |             raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
 36 |         defaults = dict(lr=lr, betas=betas, eps=eps,
 37 |                         weight_decay=weight_decay, amsgrad=amsgrad)
 38 |         super(Adam, self).__init__(params, defaults)
 39 | 
 40 |     def __setstate__(self, state):
 41 |         super(Adam, self).__setstate__(state)
 42 |         for group in self.param_groups:
 43 |             group.setdefault('amsgrad', False)
 44 | 
 45 |     def step(self, closure=None):
 46 |         """Performs a single optimization step.
 47 |         Arguments:
 48 |             closure (callable, optional): A closure that reevaluates the model
 49 |                 and returns the loss.
 50 |         """
 51 |         loss = None
 52 |         if closure is not None:
 53 |             loss = closure()
 54 | 
 55 |         for group in self.param_groups:
 56 |             for p in group['params']:
 57 |                 if p.grad is None:
 58 |                     continue
 59 |                 grad = p.grad.data
 60 |                 if grad.is_sparse:
 61 |                     raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
 62 |                 amsgrad = group['amsgrad']
 63 | 
 64 |                 state = self.state[p]
 65 | 
 66 |                 # State initialization
 67 |                 if len(state) == 0:
 68 |                     state['step'] = 0
 69 |                     # Exponential moving average of gradient values
 70 |                     state['exp_avg'] = torch.zeros_like(p.data)
 71 |                     # Exponential moving average of squared gradient values
 72 |                     state['exp_avg_sq'] = torch.zeros_like(p.data)
 73 |                     if amsgrad:
 74 |                         # Maintains max of all exp. moving avg. of sq. grad. values
 75 |                         state['max_exp_avg_sq'] = torch.zeros_like(p.data)
 76 | 
 77 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
 78 |                 if amsgrad:
 79 |                     max_exp_avg_sq = state['max_exp_avg_sq']
 80 |                 beta1, beta2 = group['betas']
 81 | 
 82 |                 state['step'] += 1
 83 | 
 84 |                 if group['weight_decay'] != 0:
 85 |                     grad = grad.add(group['weight_decay'], p.data)
 86 | 
 87 |                 # Decay the first and second moment running average coefficient
 88 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
 89 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
 90 |                 if amsgrad:
 91 |                     # Maintains the maximum of all 2nd moment running avg. till now
 92 |                     torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
 93 |                     # Use the max. for normalizing running avg. of gradient
 94 |                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
 95 |                 else:
 96 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
 97 | 
 98 |                 bias_correction1 = 1 - beta1 ** state['step']
 99 |                 bias_correction2 = 1 - beta2 ** state['step']
100 |                 step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
101 | 
102 |                 p.data.addcdiv_(-step_size, exp_avg, denom)
103 | 
104 |         return loss


--------------------------------------------------------------------------------
/misc/neumann.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/misc/neumann.pdf


--------------------------------------------------------------------------------
/models/128_3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/models/128_3.pkl


--------------------------------------------------------------------------------
/models/2048_3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/models/2048_3.pkl


--------------------------------------------------------------------------------
/models/2048_5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/models/2048_5.pkl


--------------------------------------------------------------------------------
/models/256_3.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/models/256_3.pkl


--------------------------------------------------------------------------------
/models/256_5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/models/256_5.pkl


--------------------------------------------------------------------------------
/models/32_5.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/models/32_5.pkl


--------------------------------------------------------------------------------
/models/ImageClassifier.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torchvision
  3 | import torchvision.transforms as transforms
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | from modules.Net import Net
  7 | #from mlp import MLP
  8 | import torch.nn as nn
  9 | import torch.optim as optim
 10 | from torch.autograd import Variable
 11 | from optimizer.neumann import Neumann
 12 | 
 13 | # Random seed
 14 | torch.manual_seed(1)
 15 | np.random.seed(1)
 16 | 
 17 | # Batch size
 18 | batch_size = 4
 19 | 
 20 | # Transformation to tensor and normalization
 21 | transform = transforms.Compose(
 22 |     [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
 23 | )
 24 | 
 25 | # Download the training set
 26 | trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
 27 | 
 28 | # Training set loader
 29 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=False, num_workers=2)
 30 | 
 31 | # Test set
 32 | testset = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)
 33 | 
 34 | # Test set loader
 35 | testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)
 36 | 
 37 | 
 38 | # Function to show an image
 39 | def imshow(img):
 40 |     img = img / 2 + 0.5
 41 |     npimg = img.numpy()
 42 |     plt.imshow(np.transpose(npimg, (1, 2, 0)))
 43 |     plt.show()
 44 | # end imshow
 45 | 
 46 | 
 47 | # Classes
 48 | classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot')
 49 | 
 50 | # Dataset as iterator
 51 | dataiter = iter(trainloader)
 52 | 
 53 | # Get next batch
 54 | images, labels = dataiter.next()
 55 | 
 56 | # Show images
 57 | n_batches = len(dataiter)
 58 | print(u"First 4 labels {}".format([classes[labels[j]] for j in range(4)]))
 59 | # imshow(torchvision.utils.make_grid(images))
 60 | 
 61 | # Our neural net
 62 | net = Net()
 63 | 
 64 | # uncomment below line if running on GPU
 65 | #net.cuda()
 66 | 
 67 | # Objective function is cross-entropy
 68 | criterion = nn.CrossEntropyLoss()
 69 | 
 70 | # Learning rate
 71 | learning_rate = 0.001
 72 | 
 73 | # Stochastic Gradient Descent
 74 | optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
 75 | #optimizer = Neumann(list(net.parameters()), lr=learning_rate, momentum = 0.9)
 76 | 
 77 | # Nb iterations
 78 | n_iterations = 30
 79 | 
 80 | # List of training and test accuracies
 81 | train_accuracies = np.zeros(n_iterations)
 82 | test_accuracies = np.zeros(n_iterations)
 83 | 
 84 | # Training !
 85 | for epoch in range(n_iterations):
 86 |     # Average loss during training
 87 |     average_loss = 0.0
 88 | 
 89 |     # Data to compute accuracy
 90 |     total = 0
 91 |     success = 0
 92 | 
 93 |     # Iterate over batches
 94 |     for i, data in enumerate(trainloader, 0):
 95 |         # Get the inputs and labels
 96 |         inputs, labels = data
 97 | 
 98 |         # To variable
 99 |         #inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
100 |         inputs, labels = Variable(inputs), Variable(labels)
101 | 
102 |         # Put grad to zero
103 |         optimizer.zero_grad()
104 | 
105 |         # Forward
106 |         outputs = net(inputs)
107 | 
108 |         loss = criterion(outputs, labels)
109 | 
110 |         # Backward
111 |         loss.backward()
112 | 
113 |         # Optimize
114 |         optimizer.step()
115 | 
116 |         # Add to loss
117 |         average_loss += loss.data[0]
118 | 
119 |         # Take the max as predicted
120 |         _, predicted = torch.max(outputs.data, 1)
121 | 
122 |         # Add to total
123 |         total += labels.size(0)
124 | 
125 |         # Add correctly classified images
126 |         success += (predicted == labels.data).sum()
127 |     # end for
128 |     train_accuracy = 100.0 * success / total
129 | 
130 |     # Test model on test set
131 |     success = 0
132 |     total = 0
133 |     for (inputs, labels) in testloader:
134 |         # To variable
135 |         #inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
136 |         inputs, labels = Variable(inputs), Variable(labels)
137 |         # Neural net's output
138 |         outputs = net(inputs)
139 | 
140 |         # Take the max is predicted
141 |         _, predicted = torch.max(outputs.data, 1)
142 | 
143 |         # Add to total
144 |         total += labels.size(0)
145 | 
146 |         # Add correctly classified images
147 |         success += (predicted == labels.data).sum()
148 |     # end for
149 | 
150 |     # Print average loss
151 |     print(u"Epoch {}, average loss {}, train accuracy {}, test accuracy {}".format(
152 |         epoch, average_loss / n_batches,
153 |         train_accuracy,
154 |         100.0 * success / total
155 |         )
156 |     )
157 | 
158 |     # Save the model
159 |     train_accuracies[epoch] = train_accuracy
160 |     test_accuracies[epoch] = 100.0 * success / total
161 | # end for
162 | 
163 | plt.plot(np.arange(1, n_iterations+1), train_accuracies)
164 | plt.plot(np.arange(1, n_iterations+1), test_accuracies)
165 | plt.show()
166 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | import optimizer


--------------------------------------------------------------------------------
/models/cnn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | from torch.autograd import Variable
  5 | from torch.utils.data import DataLoader,sampler,Dataset
  6 | import torchvision.datasets as dset
  7 | import torchvision.transforms as T
  8 | from PIL import Image
  9 | import os
 10 | import numpy as np
 11 | import scipy.io
 12 | 
 13 | import matplotlib.pyplot as plt
 14 | from torch.optim.optimizer import Optimizer
 15 | from torch.optim.sgd import SGD
 16 | from optimizer import Neumann
 17 | 
 18 | import _pickle as pkl
 19 | 
 20 | import pdb; pdb.set_trace()
 21 | 
 22 | 
 23 | label_mat=scipy.io.loadmat('../data/q3_2_data.mat')
 24 | label_train=label_mat['trLb']
 25 | print(len(label_train))
 26 | label_val=label_mat['valLb']
 27 | print(len(label_val))
 28 | 
 29 | 
 30 | class ActionDataset(Dataset):
 31 |     """Action dataset."""
 32 | 
 33 |     def __init__(self,  root_dir,labels=[], transform=None):
 34 |         """
 35 |         Args:
 36 |             root_dir (string): Directory with all the images.
 37 |             labels(list): labels if images.
 38 |             transform (callable, optional): Optional transform to be applied on a sample.
 39 |         """
 40 |         self.root_dir = root_dir
 41 |         self.transform = transform
 42 |         self.length=len(os.listdir(self.root_dir))
 43 |         self.labels=labels
 44 |     def __len__(self):
 45 |         return self.length*3
 46 | 
 47 |     def __getitem__(self, idx):
 48 | 
 49 |         folder=idx//3+1
 50 |         imidx= idx%3+1
 51 |         folder=format(folder,'05d')
 52 |         imgname=str(imidx)+'.jpg'
 53 |         img_path = os.path.join(self.root_dir,
 54 |                                 folder,imgname)
 55 |         image = Image.open(img_path)
 56 |         if len(self.labels)!=0:
 57 |             Label=self.labels[idx//3][0]-1
 58 |         if self.transform:
 59 |             image = self.transform(image)
 60 |         if len(self.labels)!=0:
 61 |             sample={'image':image,'img_path':img_path,'Label':Label}
 62 |         else:
 63 |             sample={'image':image,'img_path':img_path}
 64 |         return sample
 65 | 
 66 | 
 67 | 
 68 | dtype = torch.FloatTensor # the CPU datatype
 69 | # Constant to control how frequently we print train loss
 70 | print_every = 400
 71 | # This is a little utility that we'll use to reset the model
 72 | # if we want to re-initialize all our parameters
 73 | def reset(m):
 74 |     if hasattr(m, 'reset_parameters'):
 75 |         m.reset_parameters()
 76 | 
 77 | class Flatten(nn.Module):
 78 |     def forward(self, x):
 79 |         N, C, H, W = x.size() # read in N, C, H, W
 80 |         return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image
 81 | gpu_dtype = torch.cuda.FloatTensor
 82 | 
 83 | 
 84 | 
 85 | 
 86 | def train(model, loss_fn, optimizer, dataloader, num_epochs = 1):
 87 |     losses = []
 88 |     for epoch in range(num_epochs):
 89 |         print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
 90 |         model.train()
 91 |         for t, sample in enumerate(dataloader):
 92 |             x_var = Variable(sample['image'].cuda())
 93 |             y_var = Variable(sample['Label'].cuda().long())
 94 | 
 95 |             scores = model(x_var)
 96 | 
 97 |             loss = loss_fn(scores, y_var)
 98 |             if (t + 1) % 1 == 0:
 99 |                 print('t = %d, loss = %.4f' % (t + 1, loss.data[0]))
100 |                 pass
101 | 
102 |             losses.append(loss.data[0])
103 | 
104 |             optimizer.zero_grad()
105 |             loss.backward()
106 |             optimizer.step()
107 | 
108 |     return losses
109 | 
110 | def check_accuracy(model, loader):
111 |     '''
112 |     if loader.dataset.train:
113 |         print('Checking accuracy on validation set')
114 |     else:
115 |         print('Checking accuracy on test set')
116 |     '''
117 |     num_correct = 0
118 |     num_samples = 0
119 |     model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
120 |     for t, sample in enumerate(loader):
121 |         x_var = Variable(sample['image'].cuda())
122 |         y_var = sample['Label'].cuda()
123 |         y_var=y_var.cpu()
124 |         scores = model(x_var)
125 |         _, preds = scores.data.cpu().max(1)
126 |         #print(preds)
127 |         #print(y_var)
128 |         num_correct += (preds.numpy() == y_var.numpy()).sum()
129 |         num_samples += preds.size(0)
130 |     acc = float(num_correct) / num_samples
131 |     print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
132 | 
133 | 
134 | 
135 | 
136 | augment_transforms = T.Compose([T.RandomHorizontalFlip(),T.RandomVerticalFlip(),T.RandomRotation(30),T.ToTensor()])
137 | batch_size = 256
138 | print_every = 50
139 | image_dataset_train=ActionDataset(root_dir='../data/trainClips/',labels=label_train,transform=augment_transforms)
140 | 
141 | image_dataloader_train = DataLoader(image_dataset_train, batch_size=batch_size,
142 |                         shuffle=True, num_workers=4)
143 | image_dataset_val=ActionDataset(root_dir='../data/valClips/',labels=label_val,transform=augment_transforms)
144 | 
145 | image_dataloader_val = DataLoader(image_dataset_val, batch_size=batch_size,
146 |                         shuffle=False, num_workers=4)
147 | image_dataset_test=ActionDataset(root_dir='../data/testClips/',labels=[],transform=augment_transforms)
148 | 
149 | image_dataloader_test = DataLoader(image_dataset_test, batch_size=batch_size,
150 |                         shuffle=False, num_workers=4)
151 | 
152 | 
153 | 
154 | ###########3rd To Do (16 points, must submit the results to Kaggle) ##############
155 | # Train your model here, and make sure the output of this cell is the accuracy of your best model on the
156 | # train, val, and test sets. Here's some code to get you started. The output of this cell should be the training
157 | # and validation accuracy on your best model (measured by validation accuracy).
158 | 
159 | model = nn.Sequential(
160 |             nn.Conv2d(3,32,kernel_size=5,stride=1), #8*58*58
161 |             nn.BatchNorm2d(32),
162 |             nn.LeakyReLU(inplace=True),
163 |             nn.MaxPool2d(kernel_size=2,stride=2),#8*29*29
164 | 
165 |             nn.Conv2d(32,128,kernel_size=3,stride=2),#16*23*23, 15
166 |             nn.BatchNorm2d(128),
167 |             # nn.LeakyReLU(inplace=True),
168 |             # nn.Dropout2d(p=0.4),
169 |             # nn.MaxPool2d(kernel_size=2,stride=2),#16*11*11
170 | 
171 |             # nn.Conv2d(128,256,kernel_size=3,stride=1),
172 |             # nn.BatchNorm2d(256),
173 |             # nn.LeakyReLU(inplace=True),
174 |             # nn.MaxPool2d(kernel_size=2,stride=2),
175 |             Flatten(),
176 |             nn.Linear(25088,10)
177 | )
178 | 
179 | model.cuda()
180 | model.apply(reset)
181 | loss_fn = nn.CrossEntropyLoss().cuda().type(gpu_dtype)
182 | # loss_fn = nn.CrossEntropyLoss()
183 | beta = 1e-9
184 | alpha = 1e-3
185 | optimizer = Neumann(list(model.parameters()), lr=1e-3, alpha=alpha, beta=beta, sgd_steps=10)
186 | 
187 | 
188 | 
189 | num_epochs=5
190 | # for i in range(1):
191 | model.train()
192 | losses = train(model, loss_fn, optimizer,image_dataloader_train, num_epochs=num_epochs)
193 | 
194 | model.eval()
195 | check_accuracy(model,image_dataloader_train)
196 | check_accuracy(model, image_dataloader_val)
197 | 
198 | filename = "./"+str(batch_size)+"_"+str(num_epochs)+".pkl"
199 | 
200 | with open(filename, 'wb') as f:
201 |     pkl.dump(losses, f)
202 | 
203 | plt.figure(figsize=(12, 8))
204 | plt.title("Neumann Opt on Action Detection", fontsize=17)
205 | plt.xlabel("Iteration", fontsize=15)
206 | plt.ylabel("Loss", fontsize=15)
207 | plt.plot(np.arange(len(losses)), losses)
208 | plt.show()
209 | 
210 | 
211 | 


--------------------------------------------------------------------------------
/models/linear_regression/Linear Regression.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Linear Regression\n",
  8 |     "\n",
  9 |     "In this tutorial, we'll try the classic Linear Regression Algorithm\n",
 10 |     "\n",
 11 |     "![Linear Regression](img/lr.jpg)\n",
 12 |     "\n",
 13 |     "Linear Regression involves creating a best fit linear line such the distance between the y' points on line and real values is minimum (the summation of the distance is known as our loss)\n"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "Import Libraries"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 1,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "import torch\n",
 30 |     "import torch.nn as nn\n",
 31 |     "from torch.autograd import Variable\n",
 32 |     "import numpy as np\n",
 33 |     "import matplotlib.pyplot as plt\n",
 34 |     "# from torch.optim import Neumann"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 2,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "import math\n",
 44 |     "import torch\n",
 45 |     "from torch.optim.optimizer import Optimizer\n",
 46 |     "from torch.optim.sgd import SGD\n",
 47 |     "\n",
 48 |     "class Neumann(Optimizer):\n",
 49 |     "    \"\"\"\n",
 50 |     "    Documentation about the algorithm\n",
 51 |     "    \"\"\"\n",
 52 |     "\n",
 53 |     "    def __init__(self, params , lr=1e-3,eps = 1e-8, alpha = 1e-7, beta = 1e-5, gamma = 0.9, momentum = 0.5, sgd_steps = 5, K = 10 ):\n",
 54 |     "        \n",
 55 |     "        if not 0.0 <= lr:\n",
 56 |     "            raise ValueError(\"Invalid learning rate: {}\".format(lr))\n",
 57 |     "        if not 0.0 <= eps:\n",
 58 |     "            raise ValueError(\"Invalid epsilon value: {}\".format(eps))\n",
 59 |     "        if not 0.9 >= momentum:\n",
 60 |     "            raise ValueError(\"Invalid momentum value: {}\".format(eps))\n",
 61 |     "        \n",
 62 |     "\n",
 63 |     "        self.iter = 0\n",
 64 |     "        self.sgd = SGD(params, lr=lr, momentum=0.9)\n",
 65 |     "\n",
 66 |     "        num_variables = 2#calculate here\n",
 67 |     "        defaults = dict(lr=lr, eps=eps, alpha=alpha,\n",
 68 |     "                    beta=beta*num_variables, gamma=gamma,\n",
 69 |     "                    sgd_steps=sgd_steps, momentum=momentum, K=K\n",
 70 |     "                    )\n",
 71 |     "\n",
 72 |     "        super(Neumann, self).__init__(params, defaults)\n",
 73 |     "\n",
 74 |     "\n",
 75 |     "    def step(self, closure=None):\n",
 76 |     "        \"\"\"\n",
 77 |     "        Performs a single optimization step.\n",
 78 |     "        \n",
 79 |     "        Arguments:\n",
 80 |     "            closure (callable, optional): A closure that reevaluates the model\n",
 81 |     "                and returns the loss.\n",
 82 |     "        \"\"\"\n",
 83 |     "        import ipdb; ipdb.set_trace()\n",
 84 |     "        self.iter += 1\n",
 85 |     "\n",
 86 |     "\n",
 87 |     "        loss = None\n",
 88 |     "        if closure is not None: #checkout what's the deal with this. present in multiple pytorch optimizers\n",
 89 |     "            loss = closure()\n",
 90 |     "\n",
 91 |     "        for group in self.param_groups:\n",
 92 |     "\n",
 93 |     "            sgd_steps = group['sgd_steps']\n",
 94 |     "\n",
 95 |     "            if self.iter <= sgd_steps:\n",
 96 |     "                self.sgd.step()\n",
 97 |     "                return\n",
 98 |     "\n",
 99 |     "            momentum = group['momentum']\n",
100 |     "            \n",
101 |     "            \n",
102 |     "            for p in group['params']:\n",
103 |     "                if p.grad is None:\n",
104 |     "                    continue\n",
105 |     "                grad = p.grad.data\n",
106 |     "\n",
107 |     "                state = self.state[p]\n",
108 |     "\n",
109 |     "                if len(state) == 0:\n",
110 |     "                    state['step'] = 0\n",
111 |     "                    state['m'] = torch.zeros_like(p.data).float()\n",
112 |     "                    state['d'] = torch.zeros_like(p.data).float()\n",
113 |     "                    state['moving_avg'] = p.data\n",
114 |     "\n",
115 |     "                state['step'] += 1\n",
116 |     "\n",
117 |     "                alpha = group['alpha']\n",
118 |     "                beta = group['beta']\n",
119 |     "                gamma = group['gamma']\n",
120 |     "                K = group['K']\n",
121 |     "                momentum = group['momentum']\n",
122 |     "                mu = momentum*(1 - (1/(1+self.iter)))\n",
123 |     "                eta = group['lr']/self.iter ## update with time\n",
124 |     "\n",
125 |     "                ## Reset neumann iterate \n",
126 |     "                if self.iter%K == 1:\n",
127 |     "                    state['m'] = grad.mul(-eta)\n",
128 |     "\n",
129 |     "                ## Compute update d_t\n",
130 |     "                diff = p.data.sub(state['moving_avg'])\n",
131 |     "                diff_norm = (p.data.sub(state['moving_avg'])).norm()\n",
132 |     "                state['d'] = grad.add( (( (diff_norm.pow(2)).mul(alpha) ).sub( (diff_norm.pow(-2)).mul(beta) )).mul( diff.div(diff_norm)) )\n",
133 |     "\n",
134 |     "                ## Update Neumann iterate\n",
135 |     "                state['m'] = (state['m'].mul_(mu)).sub_( state['d'].mul(eta))\n",
136 |     "\n",
137 |     "                ## Update Weights\n",
138 |     "                p.data.add_((state['m'].mul(mu)).sub( state['d'].mul(eta)))\n",
139 |     "\n",
140 |     "                ## Update Moving Average\n",
141 |     "                state['moving_avg'] = p.data.add( (state['moving_avg'].sub(p.data)).mul(gamma) )\n",
142 |     "\n",
143 |     "\n",
144 |     "\n",
145 |     "        \n",
146 |     "        return loss"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "Initializing Seed for consistent results everytime"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 3,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "np.random.seed(42)\n",
163 |     "pts = 50"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "markdown",
168 |    "metadata": {},
169 |    "source": [
170 |     "Creating a Dataset of 50 points"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 4,
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": [
179 |     "x_vals = np.random.rand(50)\n",
180 |     "x_train = np.asarray(x_vals,dtype=np.float32).reshape(-1,1)\n",
181 |     "m = 1\n",
182 |     "alpha = np.random.rand(1)\n",
183 |     "beta = np.random.rand(1)\n",
184 |     "y_correct = np.asarray([2*i+m for i in x_vals], dtype=np.float32).reshape(-1,1)"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {},
190 |    "source": [
191 |     "### PyTorch Models\n",
192 |     "\n",
193 |     "1. Create a Class\n",
194 |     "2. Declare your Forward Pass\n",
195 |     "3. Tune the HyperParameters"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": 5,
201 |    "metadata": {},
202 |    "outputs": [],
203 |    "source": [
204 |     "class LinearRegressionModel(nn.Module):\n",
205 |     "\n",
206 |     "    def __init__(self, input_dim, output_dim):\n",
207 |     "\n",
208 |     "        super(LinearRegressionModel, self).__init__() \n",
209 |     "        # Calling Super Class's constructor\n",
210 |     "        self.linear = nn.Linear(input_dim, output_dim)\n",
211 |     "        # nn.linear is defined in nn.Module\n",
212 |     "\n",
213 |     "    def forward(self, x):\n",
214 |     "        # Here the forward pass is simply a linear function\n",
215 |     "\n",
216 |     "        out = self.linear(x)\n",
217 |     "        return out\n",
218 |     "\n",
219 |     "input_dim = 1\n",
220 |     "output_dim = 1"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "### Steps\n",
228 |     "1. Create instance of model\n",
229 |     "2. Select Loss Criterion\n",
230 |     "3. Choose Hyper Parameters"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": 6,
236 |    "metadata": {},
237 |    "outputs": [],
238 |    "source": [
239 |     "model = LinearRegressionModel(input_dim,output_dim)# create our model just as we do in Scikit-Learn / C / C++//\n",
240 |     "\n",
241 |     "criterion = nn.MSELoss()# Mean Squared Loss\n",
242 |     "l_rate = 0.01\n",
243 |     "optimiser = Neumann(list(model.parameters()), lr = l_rate) #Stochastic Gradient Descent\n",
244 |     "\n",
245 |     "epochs = 10"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "markdown",
250 |    "metadata": {},
251 |    "source": [
252 |     "### Train the Model"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": null,
258 |    "metadata": {},
259 |    "outputs": [
260 |     {
261 |      "name": "stdout",
262 |      "output_type": "stream",
263 |      "text": [
264 |       "--Return--\n",
265 |       "None\n",
266 |       "> \u001b[0;32m<ipython-input-7-5f3e0237ecb0>\u001b[0m(1)\u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n",
267 |       "\u001b[0;32m----> 1 \u001b[0;31m\u001b[0;32mimport\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
268 |       "\u001b[0m\u001b[0;32m      2 \u001b[0;31m\u001b[0;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepochs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
269 |       "\u001b[0m\u001b[0;32m      3 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
270 |       "\u001b[0m\n",
271 |       "ipdb> n\n",
272 |       "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2913)\u001b[0;36mrun_code\u001b[0;34m()\u001b[0m\n",
273 |       "\u001b[0;32m   2912 \u001b[0;31m                \u001b[0;31m# Reset our crash handler in place\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
274 |       "\u001b[0m\u001b[0;32m-> 2913 \u001b[0;31m                \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexcepthook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mold_excepthook\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
275 |       "\u001b[0m\u001b[0;32m   2914 \u001b[0;31m        \u001b[0;32mexcept\u001b[0m \u001b[0mSystemExit\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
276 |       "\u001b[0m\n",
277 |       "ipdb> \n",
278 |       "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2929)\u001b[0;36mrun_code\u001b[0;34m()\u001b[0m\n",
279 |       "\u001b[0;32m   2928 \u001b[0;31m        \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
280 |       "\u001b[0m\u001b[0;32m-> 2929 \u001b[0;31m            \u001b[0moutflag\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
281 |       "\u001b[0m\u001b[0;32m   2930 \u001b[0;31m        \u001b[0;32mreturn\u001b[0m \u001b[0moutflag\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
282 |       "\u001b[0m\n",
283 |       "ipdb> \n",
284 |       "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2930)\u001b[0;36mrun_code\u001b[0;34m()\u001b[0m\n",
285 |       "\u001b[0;32m   2929 \u001b[0;31m            \u001b[0moutflag\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
286 |       "\u001b[0m\u001b[0;32m-> 2930 \u001b[0;31m        \u001b[0;32mreturn\u001b[0m \u001b[0moutflag\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
287 |       "\u001b[0m\u001b[0;32m   2931 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
288 |       "\u001b[0m\n",
289 |       "ipdb> \n",
290 |       "--Return--\n",
291 |       "False\n",
292 |       "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2930)\u001b[0;36mrun_code\u001b[0;34m()\u001b[0m\n",
293 |       "\u001b[0;32m   2929 \u001b[0;31m            \u001b[0moutflag\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
294 |       "\u001b[0m\u001b[0;32m-> 2930 \u001b[0;31m        \u001b[0;32mreturn\u001b[0m \u001b[0moutflag\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
295 |       "\u001b[0m\u001b[0;32m   2931 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
296 |       "\u001b[0m\n",
297 |       "ipdb> \n",
298 |       "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2847)\u001b[0;36mrun_ast_nodes\u001b[0;34m()\u001b[0m\n",
299 |       "\u001b[0;32m   2846 \u001b[0;31m        \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
300 |       "\u001b[0m\u001b[0;32m-> 2847 \u001b[0;31m            \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mto_run_exec\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
301 |       "\u001b[0m\u001b[0;32m   2848 \u001b[0;31m                \u001b[0mmod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mast\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mModule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
302 |       "\u001b[0m\n",
303 |       "ipdb> \n",
304 |       "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2848)\u001b[0;36mrun_ast_nodes\u001b[0;34m()\u001b[0m\n",
305 |       "\u001b[0;32m   2847 \u001b[0;31m            \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mto_run_exec\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
306 |       "\u001b[0m\u001b[0;32m-> 2848 \u001b[0;31m                \u001b[0mmod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mast\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mModule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
307 |       "\u001b[0m\u001b[0;32m   2849 \u001b[0;31m                \u001b[0mcode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompiler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"exec\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
308 |       "\u001b[0m\n",
309 |       "ipdb> \n",
310 |       "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2849)\u001b[0;36mrun_ast_nodes\u001b[0;34m()\u001b[0m\n",
311 |       "\u001b[0;32m   2848 \u001b[0;31m                \u001b[0mmod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mast\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mModule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
312 |       "\u001b[0m\u001b[0;32m-> 2849 \u001b[0;31m                \u001b[0mcode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompiler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"exec\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
313 |       "\u001b[0m\u001b[0;32m   2850 \u001b[0;31m                \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_code\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
314 |       "\u001b[0m\n",
315 |       "ipdb> \n",
316 |       "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2850)\u001b[0;36mrun_ast_nodes\u001b[0;34m()\u001b[0m\n",
317 |       "\u001b[0;32m   2849 \u001b[0;31m                \u001b[0mcode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompiler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"exec\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
318 |       "\u001b[0m\u001b[0;32m-> 2850 \u001b[0;31m                \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_code\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
319 |       "\u001b[0m\u001b[0;32m   2851 \u001b[0;31m                    \u001b[0;32mreturn\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
320 |       "\u001b[0m\n",
321 |       "ipdb> \n",
322 |       "> \u001b[0;32m<ipython-input-2-db919e912e81>\u001b[0m(42)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n",
323 |       "\u001b[0;32m     41 \u001b[0;31m        \u001b[0;32mimport\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
324 |       "\u001b[0m\u001b[0;32m---> 42 \u001b[0;31m        \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miter\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
325 |       "\u001b[0m\u001b[0;32m     43 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
326 |       "\u001b[0m\n",
327 |       "ipdb> \n",
328 |       "ipdb> \n",
329 |       "ipdb> \n",
330 |       "ipdb> n\n",
331 |       "> \u001b[0;32m<ipython-input-2-db919e912e81>\u001b[0m(45)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n",
332 |       "\u001b[0;32m     44 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
333 |       "\u001b[0m\u001b[0;32m---> 45 \u001b[0;31m        \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
334 |       "\u001b[0m\u001b[0;32m     46 \u001b[0;31m        \u001b[0;32mif\u001b[0m \u001b[0mclosure\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m#checkout what's the deal with this. present in multiple pytorch optimizers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
335 |       "\u001b[0m\n",
336 |       "ipdb> n\n",
337 |       "> \u001b[0;32m<ipython-input-2-db919e912e81>\u001b[0m(46)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n",
338 |       "\u001b[0;32m     45 \u001b[0;31m        \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
339 |       "\u001b[0m\u001b[0;32m---> 46 \u001b[0;31m        \u001b[0;32mif\u001b[0m \u001b[0mclosure\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m#checkout what's the deal with this. present in multiple pytorch optimizers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
340 |       "\u001b[0m\u001b[0;32m     47 \u001b[0;31m            \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mclosure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
341 |       "\u001b[0m\n",
342 |       "ipdb> \n",
343 |       "> \u001b[0;32m<ipython-input-2-db919e912e81>\u001b[0m(49)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n",
344 |       "\u001b[0;32m     48 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
345 |       "\u001b[0m\u001b[0;32m---> 49 \u001b[0;31m        \u001b[0;32mfor\u001b[0m \u001b[0mgroup\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparam_groups\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
346 |       "\u001b[0m\u001b[0;32m     50 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
347 |       "\u001b[0m\n",
348 |       "ipdb> \n",
349 |       "> \u001b[0;32m<ipython-input-2-db919e912e81>\u001b[0m(51)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n",
350 |       "\u001b[0;32m     50 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
351 |       "\u001b[0m\u001b[0;32m---> 51 \u001b[0;31m            \u001b[0msgd_steps\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgroup\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sgd_steps'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
352 |       "\u001b[0m\u001b[0;32m     52 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
353 |       "\u001b[0m\n",
354 |       "ipdb> \n",
355 |       "> \u001b[0;32m<ipython-input-2-db919e912e81>\u001b[0m(53)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n",
356 |       "\u001b[0;32m     52 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
357 |       "\u001b[0m\u001b[0;32m---> 53 \u001b[0;31m            \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miter\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0msgd_steps\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
358 |       "\u001b[0m\u001b[0;32m     54 \u001b[0;31m                \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msgd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
359 |       "\u001b[0m\n",
360 |       "ipdb> \n",
361 |       "> \u001b[0;32m<ipython-input-2-db919e912e81>\u001b[0m(54)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n",
362 |       "\u001b[0;32m     53 \u001b[0;31m            \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miter\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0msgd_steps\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
363 |       "\u001b[0m\u001b[0;32m---> 54 \u001b[0;31m                \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msgd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
364 |       "\u001b[0m\u001b[0;32m     55 \u001b[0;31m                \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
365 |       "\u001b[0m\n",
366 |       "ipdb> \n",
367 |       "> \u001b[0;32m<ipython-input-2-db919e912e81>\u001b[0m(55)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n",
368 |       "\u001b[0;32m     54 \u001b[0;31m                \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msgd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
369 |       "\u001b[0m\u001b[0;32m---> 55 \u001b[0;31m                \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
370 |       "\u001b[0m\u001b[0;32m     56 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n",
371 |       "\u001b[0m\n"
372 |      ]
373 |     }
374 |    ],
375 |    "source": [
376 |     "import ipdb; ipdb.set_trace()\n",
377 |     "for epoch in range(epochs):\n",
378 |     "\n",
379 |     "    epoch +=1\n",
380 |     "    inputs = Variable(torch.from_numpy(x_train))\n",
381 |     "    labels = Variable(torch.from_numpy(y_correct))\n",
382 |     "\n",
383 |     "    #clear grads\n",
384 |     "    optimiser.zero_grad()\n",
385 |     "    #forward to get predicted values\n",
386 |     "    outputs = model.forward(inputs)\n",
387 |     "    loss = criterion(outputs, labels)\n",
388 |     "    loss.backward()# back props\n",
389 |     "    optimiser.step()# update the parameters\n",
390 |     "    print('epoch {}, loss {}'.format(epoch,loss.data[0]))"
391 |    ]
392 |   },
393 |   {
394 |    "cell_type": "markdown",
395 |    "metadata": {},
396 |    "source": [
397 |     "### Printing the Predictions"
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "code",
402 |    "execution_count": null,
403 |    "metadata": {
404 |     "collapsed": true
405 |    },
406 |    "outputs": [],
407 |    "source": [
408 |     "predicted = model.forward(Variable(torch.from_numpy(x_train))).data.numpy()\n",
409 |     "\n",
410 |     "plt.plot(x_train, y_correct, 'go', label = 'from data', alpha = .5)\n",
411 |     "plt.plot(x_train, predicted, label = 'prediction', alpha = 0.5)\n",
412 |     "plt.legend()\n",
413 |     "plt.show()\n",
414 |     "print(model.state_dict())"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "markdown",
419 |    "metadata": {},
420 |    "source": [
421 |     "### Example to Use GPU"
422 |    ]
423 |   },
424 |   {
425 |    "cell_type": "code",
426 |    "execution_count": null,
427 |    "metadata": {
428 |     "collapsed": true
429 |    },
430 |    "outputs": [],
431 |    "source": [
432 |     "import torch\n",
433 |     "import torch.nn as nn\n",
434 |     "from torch.autograd import Variable\n",
435 |     "import numpy as np\n",
436 |     "\n",
437 |     "x_values = [i for i in range(11)]\n",
438 |     "x_train = np.array(x_values, dtype=np.float32)\n",
439 |     "x_train = x_train.reshape(-1, 1)\n",
440 |     "\n",
441 |     "y_values = [2*i + 1 for i in x_values]\n",
442 |     "y_train = np.array(y_values, dtype=np.float32)\n",
443 |     "y_train = y_train.reshape(-1, 1)\n",
444 |     "\n",
445 |     "'''\n",
446 |     "CREATE MODEL CLASS\n",
447 |     "'''\n",
448 |     "class LinearRegressionModel(nn.Module):\n",
449 |     "    def __init__(self, input_dim, output_dim):\n",
450 |     "        super(LinearRegressionModel, self).__init__()\n",
451 |     "        self.linear = nn.Linear(input_dim, output_dim)  \n",
452 |     "    \n",
453 |     "    def forward(self, x):\n",
454 |     "        out = self.linear(x)\n",
455 |     "        return out\n",
456 |     "\n",
457 |     "'''\n",
458 |     "INSTANTIATE MODEL CLASS\n",
459 |     "'''\n",
460 |     "input_dim = 1\n",
461 |     "output_dim = 1\n",
462 |     "\n",
463 |     "model = LinearRegressionModel(input_dim, output_dim)\n",
464 |     "\n",
465 |     "\n",
466 |     "\n",
467 |     "model.cuda()\n",
468 |     "\n",
469 |     "'''\n",
470 |     "INSTANTIATE LOSS CLASS\n",
471 |     "'''\n",
472 |     "\n",
473 |     "criterion = nn.MSELoss()\n",
474 |     "\n",
475 |     "\n",
476 |     "learning_rate = 0.01\n",
477 |     "\n",
478 |     "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n",
479 |     "\n",
480 |     "\n",
481 |     "epochs = 100\n",
482 |     "for epoch in range(epochs):\n",
483 |     "    epoch += 1\n",
484 |     "\n",
485 |     "    \n",
486 |     "    if torch.cuda.is_available():\n",
487 |     "        inputs = Variable(torch.from_numpy(x_train).cuda())\n",
488 |     "\n",
489 |     "        \n",
490 |     "    if torch.cuda.is_available():\n",
491 |     "        labels = Variable(torch.from_numpy(y_train).cuda())\n",
492 |     "        \n",
493 |     "\n",
494 |     "    optimizer.zero_grad() \n",
495 |     "    \n",
496 |     "\n",
497 |     "    outputs = model(inputs)\n",
498 |     "\n",
499 |     "    loss = criterion(outputs, labels)\n",
500 |     "    \n",
501 |     "\n",
502 |     "    loss.backward()\n",
503 |     "    \n",
504 |     "\n",
505 |     "    optimizer.step()\n",
506 |     "    \n",
507 |     "\n",
508 |     "    print('epoch {}, loss {}'.format(epoch, loss.data[0]))\n"
509 |    ]
510 |   },
511 |   {
512 |    "cell_type": "markdown",
513 |    "metadata": {},
514 |    "source": [
515 |     "Sources:\n",
516 |     "http://github.com/pytorch/examples\n",
517 |     "\n",
518 |     "http://github.com/ritchieng/the-incredible-pytorch"
519 |    ]
520 |   }
521 |  ],
522 |  "metadata": {
523 |   "kernelspec": {
524 |    "display_name": "Python 3",
525 |    "language": "python",
526 |    "name": "python3"
527 |   },
528 |   "language_info": {
529 |    "codemirror_mode": {
530 |     "name": "ipython",
531 |     "version": 3
532 |    },
533 |    "file_extension": ".py",
534 |    "mimetype": "text/x-python",
535 |    "name": "python",
536 |    "nbconvert_exporter": "python",
537 |    "pygments_lexer": "ipython3",
538 |    "version": "3.6.4"
539 |   }
540 |  },
541 |  "nbformat": 4,
542 |  "nbformat_minor": 2
543 | }
544 | 


--------------------------------------------------------------------------------
/models/linear_regression/test.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | # from torch.optim import Neumann
  7 | import math
  8 | import torch
  9 | from torch.optim.optimizer import Optimizer
 10 | from torch.optim.sgd import SGD
 11 | 
 12 | from neumann import Neumann
 13 | 
 14 | import pdb; pdb.set_trace()
 15 | 
 16 | 
 17 | np.random.seed(42)
 18 | pts = 50
 19 | x_vals = np.random.rand(50)
 20 | x_train = np.asarray(x_vals,dtype=np.float32).reshape(-1,1)
 21 | b = 0
 22 | alpha = np.random.rand(1)
 23 | beta = np.random.rand(1)
 24 | y_correct = np.asarray([2*i+b for i in x_vals], dtype=np.float32).reshape(-1,1)
 25 | 
 26 | 
 27 | class LinearRegressionModel(nn.Module):
 28 | 
 29 |     def __init__(self, input_dim, output_dim):
 30 | 
 31 |         super(LinearRegressionModel, self).__init__() 
 32 |         # Calling Super Class's constructor
 33 |         self.linear = nn.Linear(input_dim, output_dim)
 34 |         # nn.linear is defined in nn.Module
 35 | 
 36 |     def forward(self, x):
 37 |         # Here the forward pass is simply a linear function
 38 | 
 39 |         out = self.linear(x)
 40 |         return out
 41 | 
 42 | input_dim = 1
 43 | output_dim = 1
 44 | 
 45 | 
 46 | model = LinearRegressionModel(input_dim,output_dim)# create our model just as we do in Scikit-Learn / C / C++//
 47 | 
 48 | criterion = nn.MSELoss()# Mean Squared Loss
 49 | l_rate = 0.01
 50 | optimiser = Neumann(list(model.parameters()), lr = l_rate) #Stochastic Gradient Descent
 51 | 
 52 | epochs = 1000
 53 | 
 54 | 
 55 | for epoch in range(epochs):
 56 | 
 57 |     epoch +=1
 58 |     inputs = Variable(torch.from_numpy(x_train))
 59 |     labels = Variable(torch.from_numpy(y_correct))
 60 | 
 61 |     #clear grads
 62 |     optimiser.zero_grad()
 63 |     #forward to get predicted values
 64 |     outputs = model.forward(inputs)
 65 |     loss = criterion(outputs, labels)
 66 |     loss.backward()# back props
 67 |     optimiser.step()# update the parameters
 68 |     print('epoch {}, loss {}'.format(epoch,loss.data[0]))
 69 | 
 70 | 
 71 | predicted = model.forward(Variable(torch.from_numpy(x_train))).data.numpy()
 72 | 
 73 | plt.plot(x_train, y_correct, 'go', label = 'from data', alpha = .5)
 74 | plt.plot(x_train, predicted, label = 'prediction', alpha = 0.5)
 75 | plt.legend()
 76 | plt.show()
 77 | print(model.state_dict())
 78 | 
 79 | 
 80 | 
 81 | 
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | # from __future__ import print_function
107 | # import argparse
108 | # import torch
109 | # import torch.nn as nn
110 | # import torch.nn.functional as F
111 | # # import torch.optim as optim
112 | # from torchvision import datasets, transforms
113 | # from torch.autograd import Variable
114 | 
115 | # import pdb; pdb.set_trace()
116 | # from torch.optim import Neumann
117 | 
118 | # # Training settings
119 | # parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
120 | # parser.add_argument('--batch-size', type=int, default=64, metavar='N',
121 | #                     help='input batch size for training (default: 64)')
122 | # parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
123 | #                     help='input batch size for testing (default: 1000)')
124 | # parser.add_argument('--epochs', type=int, default=10, metavar='N',
125 | #                     help='number of epochs to train (default: 10)')
126 | # parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
127 | #                     help='learning rate (default: 0.01)')
128 | # parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
129 | #                     help='SGD momentum (default: 0.5)')
130 | # parser.add_argument('--no-cuda', action='store_true', default=False,
131 | #                     help='disables CUDA training')
132 | # parser.add_argument('--seed', type=int, default=1, metavar='S',
133 | #                     help='random seed (default: 1)')
134 | # parser.add_argument('--log-interval', type=int, default=10, metavar='N',
135 | #                     help='how many batches to wait before logging training status')
136 | # args = parser.parse_args()
137 | # args.cuda = not args.no_cuda and torch.cuda.is_available()
138 | 
139 | 
140 | 
141 | # torch.manual_seed(args.seed)
142 | # if args.cuda:
143 | #     torch.cuda.manual_seed(args.seed)
144 | 
145 | 
146 | # kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
147 | # train_loader = torch.utils.data.DataLoader(
148 | #     datasets.MNIST('../data', train=True, download=True,
149 | #                    transform=transforms.Compose([
150 | #                        transforms.ToTensor(),
151 | #                        transforms.Normalize((0.1307,), (0.3081,))
152 | #                    ])),
153 | #     batch_size=args.batch_size, shuffle=True, **kwargs)
154 | # test_loader = torch.utils.data.DataLoader(
155 | #     datasets.MNIST('../data', train=False, transform=transforms.Compose([
156 | #                        transforms.ToTensor(),
157 | #                        transforms.Normalize((0.1307,), (0.3081,))
158 | #                    ])),
159 | #     batch_size=args.test_batch_size, shuffle=True, **kwargs)
160 | 
161 | # # 
162 | # class Net(nn.Module):
163 | #     def __init__(self):
164 | #         super(Net, self).__init__()
165 | #         self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
166 | #         self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
167 | #         self.conv2_drop = nn.Dropout2d()
168 | #         self.fc1 = nn.Linear(320, 50)
169 | #         self.fc2 = nn.Linear(50, 10)
170 | 
171 | #     def forward(self, x):
172 | #         x = F.relu(F.max_pool2d(self.conv1(x), 2))
173 | #         x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
174 | #         x = x.view(-1, 320)
175 | #         x = F.relu(self.fc1(x))
176 | #         x = F.dropout(x, training=self.training)
177 | #         x = self.fc2(x)
178 | #         return F.log_softmax(x, dim=1)
179 | 
180 | # model = Net()
181 | # if args.cuda:
182 | #     model.cuda()
183 | 
184 | # optimizer = Neumann(model.parameters(), lr=args.lr)
185 | 
186 | # def train(epoch):
187 | #     model.train()
188 | #     for batch_idx, (data, target) in enumerate(train_loader):
189 | #         if args.cuda:
190 | #             data, target = data.cuda(), target.cuda()
191 | #         data, target = Variable(data), Variable(target)
192 | #         optimizer.zero_grad()
193 | #         output = model(data)
194 | #         loss = F.nll_loss(output, target)
195 | #         loss.backward()
196 | #         optimizer.step()
197 | #         if batch_idx % args.log_interval == 0:
198 | #             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
199 | #                 epoch, batch_idx * len(data), len(train_loader.dataset),
200 | #                 100. * batch_idx / len(train_loader), loss.data[0]))
201 | 
202 | # def test():
203 | #     model.eval()
204 | #     test_loss = 0
205 | #     correct = 0
206 | #     for data, target in test_loader:
207 | #         if args.cuda:
208 | #             data, target = data.cuda(), target.cuda()
209 | #         data, target = Variable(data, volatile=True), Variable(target)
210 | #         output = model(data)
211 | #         test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
212 | #         pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
213 | #         correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()
214 | 
215 | #     test_loss /= len(test_loader.dataset)
216 | #     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
217 | #         test_loss, correct, len(test_loader.dataset),
218 | #         100. * correct / len(test_loader.dataset)))
219 | 
220 | 
221 | # for epoch in range(1, args.epochs + 1):
222 | #     train(epoch)
223 | #     test()
224 | 


--------------------------------------------------------------------------------
/models/mlp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from optimizer import Neumann,SGD
 4 | import numpy as np
 5 | 
 6 | from batchup import data_source
 7 | import csv
 8 | 
 9 | csvfile = open('../dataset/HIGGS_subset.csv','r')
10 | csvreader = csv.reader(csvfile)
11 | train_X = []
12 | train_Y = []
13 | 
14 | test_X = []
15 | test_Y = []
16 | 
17 | train_error = []
18 | 
19 | size = 0
20 | for row in csvreader:
21 |     if size >= 90000:
22 |         test_X.append(row[1:])
23 |         test_Y.append([float(row[0])])
24 |     else:
25 |         train_X.append(row[1:])
26 |         train_Y.append([float(row[0])])
27 |     size+=1
28 | 
29 | train_X = np.array(train_X,dtype="float64")
30 | train_Y = np.array(train_Y,dtype="int32")
31 | test_X = np.array(test_X, dtype="float64")
32 | test_Y = np.array(test_Y, dtype="int32")
33 | ds = data_source.ArrayDataSource([train_X, train_Y])
34 | 
35 | 
36 | class MultilayerPerceptron(nn.Module):
37 |     def __init__(self, input_size, hidden_size, out_classes):
38 |         super(MultilayerPerceptron, self).__init__()
39 |         self.fc1 = nn.Linear(input_size, hidden_size)
40 |         self.fc2 = nn.Linear(hidden_size, num_classes)
41 |         self.tanh = nn.Tanh()
42 |         self.sigmoid = nn.Sigmoid()
43 | 
44 |     def forward(self, x):
45 |         out = self.fc1(x)
46 |         out = self.tanh(out)
47 |         out = self.fc2(out)
48 |         out = self.sigmoid(out)
49 |         return out
50 | 
51 | 
52 | input_size = 28
53 | hidden_size = 56
54 | num_classes = 1
55 | learning_rate = 1e-4
56 | num_epochs = 20
57 | minibatch_size = 16
58 | current_iter = 1
59 | device = torch.device('cpu')
60 | 
61 | net = MultilayerPerceptron(input_size, hidden_size, num_classes)
62 | 
63 | loss_fn = nn.MSELoss()
64 | optimizer = Neumann(list(net.parameters()), lr=learning_rate)
65 | # optimizer = SGD(net.parameters(), lr=learning_rate)
66 | 
67 | for epoch in range(num_epochs):
68 |     for batch_X, batch_Y in ds.batch_iterator(batch_size=minibatch_size, shuffle=True):
69 |         input = torch.tensor(batch_X, requires_grad=True, device=device, dtype=torch.float32)
70 |         label = torch.tensor(batch_Y, device=device, dtype=torch.float32)
71 |         optimizer.zero_grad()
72 |         outputs = net(input)
73 |         loss = loss_fn(outputs, label)
74 |         loss.backward()
75 |         optimizer.step()
76 |     print("Loss: ", loss)
77 | 
78 | test_inputs = torch.tensor(test_X, device=device, dtype=torch.float32)
79 | test_labels = torch.tensor(test_Y, device=device, dtype=torch.float32)
80 | 
81 | outputs = net(test_inputs)
82 | 
83 | error = loss_fn(outputs, test_labels)
84 | print(outputs)
85 | print("Error: ", error)
86 | 


--------------------------------------------------------------------------------
/models/modules/Net.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Variable
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | # Neural net
 7 | class Net(nn.Module):
 8 |     """
 9 |     Neural net
10 |     """
11 | 
12 |     # Constructor
13 |     def __init__(self):
14 |         """
15 |         Constructor
16 |         """
17 |         super(Net, self).__init__()
18 |         self.conv_layer1 = nn.Conv2d(1, 6, 5)
19 |         self.pool = nn.MaxPool2d(2, 2)
20 |         self.conv_layer2 = nn.Conv2d(6, 16, 5)
21 |         self.linear_layer1 = nn.Linear(16 * 4 * 4, 120)
22 |         self.linear_layer2 = nn.Linear(120, 10)
23 |         self.tanh = nn.Tanh()
24 |         self.sigmoid = nn.Sigmoid()
25 |     # end __init__
26 | 
27 |     # Forward pass
28 |     def forward(self, x):
29 |         """
30 |         Forward pass
31 |         :param x:
32 |         :return:
33 |         """
34 |         # print(u"Input : {}".format(x.size()))
35 |         x = self.conv_layer1(x)
36 |         # print(u"Conv1 : {}".format(x.size()))
37 |         x = F.relu(x)
38 |         # print(u"Relu : {}".format(x.size()))
39 |         x = self.pool(x)
40 |         # print(u"Max pool : {}".format(x.size()))
41 |         x = self.conv_layer2(x)
42 |         # print(u"Conv2 : {}".format(x.size()))
43 |         x = F.relu(x)
44 |         x = self.pool(x)
45 |         # print(u"Input : {}".format(x.size()))
46 |         x = x.view(-1, 16 * 4 * 4)
47 |         x = self.linear_layer1(x)
48 |         x = self.tanh(x)
49 |         x = self.linear_layer2(x)
50 |         x = self.sigmoid(x)
51 |         # x = F.relu(self.linear_layer1(x))
52 |         # x = F.relu(self.linear_layer2(x))
53 |         return x
54 |     # end forward
55 | 
56 | # end Net
57 | 


--------------------------------------------------------------------------------
/models/modules/__init__.py:
--------------------------------------------------------------------------------
1 | #Nothing much here except import
2 | from .Net import Net
3 | 
4 | 


--------------------------------------------------------------------------------
/models/optimizer/__init__.py:
--------------------------------------------------------------------------------
1 | from optimizer.neumann import Neumann
2 | from optimizer.stochastic import SGD
3 | 


--------------------------------------------------------------------------------
/models/optimizer/neumann.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | from torch.optim.optimizer import Optimizer
  4 | from torch.optim.sgd import SGD 
  5 | import numpy as np
  6 | 
  7 | class Neumann(Optimizer):
  8 |     """
  9 |     Documentation about the algorithm
 10 |     """
 11 | 
 12 |     def __init__(self, params , lr=1e-3, eps = 1e-8, alpha = 1e-7, beta = 1e-5, gamma = 0.9, momentum = 1, sgd_steps = 5, K = 10 ):
 13 |         
 14 |         if not 0.0 <= lr:
 15 |             raise ValueError("Invalid learning rate: {}".format(lr))
 16 |         if not 0.0 <= eps:
 17 |             raise ValueError("Invalid epsilon value: {}".format(eps))
 18 |         if not 1 >= momentum:
 19 |             raise ValueError("Invalid momentum value: {}".format(eps))
 20 |         
 21 | 
 22 |         self.iter = 0
 23 |         # self.sgd = SGD(params, lr=lr, momentum=0.9)
 24 | 
 25 |         param_count = np.sum([np.prod(p.size()) for p in params]) # got from MNIST-GAN
 26 | 
 27 |         defaults = dict(lr=lr, eps=eps, alpha=alpha,
 28 |                     beta=beta*param_count, gamma=gamma,
 29 |                     sgd_steps=sgd_steps, momentum=momentum, K=K
 30 |                     )
 31 | 
 32 |         super(Neumann, self).__init__(params, defaults)
 33 | 
 34 | 
 35 |     def step(self, closure=None):
 36 |         """
 37 |         Performs a single optimization step.
 38 |         
 39 |         Arguments:
 40 |             closure (callable, optional): A closure that reevaluates the model
 41 |                 and returns the loss.
 42 |         """
 43 |         self.iter += 1
 44 | 
 45 | 
 46 |         loss = None
 47 |         if closure is not None: #checkout what's the deal with this. present in multiple pytorch optimizers
 48 |             loss = closure()
 49 | 
 50 |         for group in self.param_groups:
 51 | 
 52 |             sgd_steps = group['sgd_steps']
 53 | 
 54 |             alpha = group['alpha']
 55 |             beta = group['beta']
 56 |             gamma = group['gamma']
 57 |             K = group['K']
 58 |             momentum = group['momentum']
 59 |             mu = momentum*(1 - (1/(1+self.iter)))
 60 |             
 61 |             if mu >= 0.9:
 62 |                 mu = 0.9
 63 |             elif mu <= 0.5:
 64 |                 mu = 0.5
 65 | 
 66 | 
 67 |             eta = group['lr']/self.iter ## update with time ## changed
 68 |             # print("here")
 69 |             
 70 |             
 71 |             for p in group['params']:
 72 |                 if p.grad is None:
 73 |                     continue
 74 |                 grad = p.grad.data 
 75 | 
 76 |                 state = self.state[p]
 77 | 
 78 |                 if len(state) == 0:
 79 |                     state['step'] = 0
 80 |                     state['m'] = torch.zeros_like(p.data).float()
 81 |                     state['d'] = torch.zeros_like(p.data).float()
 82 |                     # state['moving_avg'] = p.data
 83 | 
 84 | 
 85 |                 if self.iter <= sgd_steps:
 86 |                 
 87 |                     p.data.add_(-group['lr'], grad)
 88 |                     # self.sgd.step()
 89 |                     continue
 90 | 
 91 |                 state['step'] += 1
 92 | 
 93 | 
 94 |                 # Reset neumann iterate 
 95 |                 if self.iter%K == 0:
 96 |                     state['m'] = grad.mul(-eta)
 97 |                     ## changed                  
 98 | 
 99 |                 else:   
100 |                     ## Compute update d_t
101 |                     # diff = p.data.sub(state['moving_avg'])
102 |                     # # print(diff)
103 |                     # diff_norm = p.data.sub(state['moving_avg']).norm()
104 |                     # # if np.count_nonzero(diff) and diff_norm > 0:
105 |                     #     state['d'] = grad.add( (( (diff_norm.pow(2)).mul(alpha) ).sub( (diff_norm.pow(-2)).mul(beta) )).mul( diff.div(diff_norm)) )
106 |                     # # else:
107 |                     # state['d'].add_(grad)
108 |                     state['d'] = grad
109 | 
110 |                     ## Update Neumann iterate
111 |                     (state['m'].mul_(mu)).sub_( state['d'].mul(eta) )
112 | 
113 |                     ## Update Weights
114 |                     p.data.add_((state['m'].mul(mu)).sub( state['d'].mul(eta)))
115 | 
116 |                     ## Update Moving Average
117 |                     # state['moving_avg'] = p.data.add( (state['moving_avg'].sub(p.data)).mul(gamma) )
118 | 
119 |                 # print(p.data)
120 | 
121 |         ## changed
122 |         if self.iter%K == 0:
123 |             group['K'] = group['K']*2
124 |         
125 |         # return loss
126 | 


--------------------------------------------------------------------------------
/models/optimizer/stochastic.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.optim.optimizer import Optimizer, required
 3 | 
 4 | 
 5 | class SGD(Optimizer):
 6 |     r"""Implements stochastic gradient descent (optionally with momentum).
 7 |     Nesterov momentum is based on the formula from
 8 |     `On the importance of initialization and momentum in deep learning`__.
 9 |     Args:
10 |         params (iterable): iterable of parameters to optimize or dicts defining
11 |             parameter groups
12 |         lr (float): learning rate
13 |         momentum (float, optional): momentum factor (default: 0)
14 |         weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
15 |         dampening (float, optional): dampening for momentum (default: 0)
16 |         nesterov (bool, optional): enables Nesterov momentum (default: False)
17 |     Example:
18 |         >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
19 |         >>> optimizer.zero_grad()
20 |         >>> loss_fn(model(input), target).backward()
21 |         >>> optimizer.step()
22 |     __ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf
23 |     .. note::
24 |         The implementation of SGD with Momentum/Nesterov subtly differs from
25 |         Sutskever et. al. and implementations in some other frameworks.
26 |         Considering the specific case of Momentum, the update can be written as
27 |         .. math::
28 |                   v = \rho * v + g \\
29 |                   p = p - lr * v
30 |         where p, g, v and :math:`\rho` denote the parameters, gradient,
31 |         velocity, and momentum respectively.
32 |         This is in contrast to Sutskever et. al. and
33 |         other frameworks which employ an update of the form
34 |         .. math::
35 |              v = \rho * v + lr * g \\
36 |              p = p - v
37 |         The Nesterov version is analogously modified.
38 |     """
39 | 
40 |     def __init__(self, params, lr=required, momentum=0, dampening=0,
41 |                  weight_decay=0, nesterov=False):
42 |         if lr is not required and lr < 0.0:
43 |             raise ValueError("Invalid learning rate: {}".format(lr))
44 |         if momentum < 0.0:
45 |             raise ValueError("Invalid momentum value: {}".format(momentum))
46 |         if weight_decay < 0.0:
47 |             raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
48 | 
49 |         defaults = dict(lr=lr, momentum=momentum, dampening=dampening,
50 |                         weight_decay=weight_decay, nesterov=nesterov)
51 |         if nesterov and (momentum <= 0 or dampening != 0):
52 |             raise ValueError("Nesterov momentum requires a momentum and zero dampening")
53 |         super(SGD, self).__init__(params, defaults)
54 | 
55 |     def __setstate__(self, state):
56 |         super(SGD, self).__setstate__(state)
57 |         for group in self.param_groups:
58 |             group.setdefault('nesterov', False)
59 | 
60 |     def step(self, closure=None):
61 |         """Performs a single optimization step.
62 |         Arguments:
63 |             closure (callable, optional): A closure that reevaluates the model
64 |                 and returns the loss.
65 |         """
66 |         loss = None
67 |         if closure is not None:
68 |             loss = closure()
69 | 
70 |         for group in self.param_groups:
71 |             weight_decay = group['weight_decay']
72 |             momentum = group['momentum']
73 |             dampening = group['dampening']
74 |             nesterov = group['nesterov']
75 | 
76 |             for p in group['params']:
77 |                 if p.grad is None:
78 |                     continue
79 |                 d_p = p.grad.data
80 |                 if weight_decay != 0:
81 |                     d_p.add_(weight_decay, p.data)
82 |                 if momentum != 0:
83 |                     param_state = self.state[p]
84 |                     if 'momentum_buffer' not in param_state:
85 |                         buf = param_state['momentum_buffer'] = torch.zeros_like(p.data)
86 |                         buf.mul_(momentum).add_(d_p)
87 |                     else:
88 |                         buf = param_state['momentum_buffer']
89 |                         buf.mul_(momentum).add_(1 - dampening, d_p)
90 |                     if nesterov:
91 |                         d_p = d_p.add(momentum, buf)
92 |                     else:
93 |                         d_p = buf
94 | 
95 |                 p.data.add_(-group['lr'], d_p)
96 | 
97 |         return loss
98 | 


--------------------------------------------------------------------------------
/slr10.csv:
--------------------------------------------------------------------------------
 1 | 3.5,5.1
 2 | 3,4.9
 3 | 3.2,4.7
 4 | 3.1,4.6
 5 | 3.6,5
 6 | 3.9,5.4
 7 | 3.4,4.6
 8 | 3.4,5
 9 | 2.9,4.4
10 | 3.1,4.9
11 | 3.7,5.4
12 | 3.4,4.8
13 | 3,4.3
14 | 4,5.8
15 | 4.4,5.7
16 | 3.9,5.4
17 | 3.5,5.1
18 | 3.8,5.7
19 | 3.8,5.1
20 | 3.4,5.4
21 | 3.7,5.1
22 | 3.6,4.6
23 | 3.3,5.1
24 | 3.4,4.8
25 | 3,5
26 | 3.4,5
27 | 3.5,5.2
28 | 3.4,5.2
29 | 3.2,4.7
30 | 3.1,4.8
31 | 3.4,5.4
32 | 4.1,5.2
33 | 4.2,5.5
34 | 3.1,4.9
35 | 3.2,5
36 | 3.5,5.5
37 | 3.6,4.9
38 | 3,4.4
39 | 3.4,5.1
40 | 3.5,5
41 | 2.3,4.5
42 | 3.2,4.4
43 | 3.5,5
44 | 3.8,5.1
45 | 3,4.8
46 | 3.8,4.6
47 | 3.7,5.3
48 | 3.3,5
49 | 


--------------------------------------------------------------------------------
/test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 14,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import torch\n",
 10 |     "import torch.nn as nn\n",
 11 |     "import numpy as np\n",
 12 |     "import torch.nn.functional as F\n",
 13 |     "from torch.optim import Neumann\n",
 14 |     "from torch.utils.data import DataLoader,sampler,Dataset\n",
 15 |     "from torch.autograd import Variable"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 2,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "batch_size = 48\n",
 25 |     "test_batch_size = 1000\n",
 26 |     "seed = 123\n",
 27 |     "momentum = 0.5\n",
 28 |     "log_interval = 10\n",
 29 |     "\n",
 30 |     "torch.manual_seed(seed)\n",
 31 |     "if torch.cuda.is_available():\n",
 32 |     "    torch.cuda.manual_seed(seed)"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 3,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "class SimpleDataset(Dataset):\n",
 42 |     "    \"\"\"Action dataset.\"\"\"\n",
 43 |     "\n",
 44 |     "    def __init__(self, data_file):\n",
 45 |     "        \"\"\"\n",
 46 |     "        Args:\n",
 47 |     "            root_dir (string): Directory with all the data is stored.\n",
 48 |     "            labels(list): GT\n",
 49 |     "        \"\"\"\n",
 50 |     "        self.data_file = data_file\n",
 51 |     "        self.data = np.loadtxt(self.data_file, delimiter=',')#, dtype={'names': ('X', 'Y'), 'formats': ('S1', 'S1')})\n",
 52 |     "        self.length = self.data.shape[0]\n",
 53 |     "        self.labels = self.data[:, -1]\n",
 54 |     "        self.data = self.data[:,0]\n",
 55 |     "\n",
 56 |     "    def __len__(self):\n",
 57 |     "        return self.length\n",
 58 |     "\n",
 59 |     "    def __getitem__(self, idx):\n",
 60 |     "        sample={'X':self.data[idx], 'Y' : self.labels[idx]}\n",
 61 |     "#         print(sample)\n",
 62 |     "        return sample\n",
 63 |     "    "
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 4,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "simple_dataset = SimpleDataset(\"./slr10.csv\")\n",
 73 |     "\n",
 74 |     "train_loader = torch.utils.data.DataLoader(\n",
 75 |     "    simple_dataset,\n",
 76 |     "    batch_size=batch_size, shuffle=True)\n",
 77 |     "\n",
 78 |     "# test_loader = torch.utils.data.DataLoader(\n",
 79 |     "#     batch_size=test_batch_size, shuffle=True, **kwargs)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 7,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "class LinearRegressionModel(nn.Module):\n",
 89 |     "\n",
 90 |     "    def __init__(self, input_dim, output_dim):\n",
 91 |     "\n",
 92 |     "        super(LinearRegressionModel, self).__init__() \n",
 93 |     "        # Calling Super Class's constructor\n",
 94 |     "        self.linear = nn.Linear(input_dim, output_dim)\n",
 95 |     "        # nn.linear is defined in nn.Module\n",
 96 |     "\n",
 97 |     "    def forward(self, x):\n",
 98 |     "        # Here the forward pass is simply a linear function\n",
 99 |     "\n",
100 |     "        out = self.linear(x)\n",
101 |     "        return out\n",
102 |     "\n",
103 |     "input_dim = 1\n",
104 |     "output_dim = 1\n",
105 |     "\n",
106 |     "model = LinearRegressionModel(input_dim,output_dim)\n",
107 |     "\n",
108 |     "criterion = nn.MSELoss()# Mean Squared Loss\n",
109 |     "l_rate = 0.001\n",
110 |     "\n",
111 |     "optimiser = Neumann(list(model.parameters()), lr = l_rate) #Stochastic Gradient Descent\n",
112 |     "    "
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 10,
118 |    "metadata": {},
119 |    "outputs": [
120 |     {
121 |      "name": "stdout",
122 |      "output_type": "stream",
123 |      "text": [
124 |       "epoch 0, loss 17.191287994384766\n",
125 |       "epoch 1, loss 16.576515197753906\n",
126 |       "epoch 2, loss 15.96240234375\n",
127 |       "epoch 3, loss 15.351107597351074\n",
128 |       "epoch 4, loss 14.744500160217285\n",
129 |       "epoch 5, loss 14.144200325012207\n",
130 |       "epoch 6, loss 13.551612854003906\n",
131 |       "epoch 7, loss 12.967961311340332\n",
132 |       "epoch 8, loss 12.394291877746582\n",
133 |       "epoch 9, loss 11.831509590148926\n",
134 |       "epoch 10, loss 11.280391693115234\n",
135 |       "epoch 11, loss 10.741596221923828\n",
136 |       "epoch 12, loss 10.215685844421387\n",
137 |       "epoch 13, loss 9.703120231628418\n",
138 |       "epoch 14, loss 9.204290390014648\n",
139 |       "epoch 15, loss 8.719508171081543\n",
140 |       "epoch 16, loss 8.249022483825684\n",
141 |       "epoch 17, loss 7.793018341064453\n",
142 |       "epoch 18, loss 7.351638317108154\n",
143 |       "epoch 19, loss 6.924966335296631\n",
144 |       "epoch 20, loss 6.899356842041016\n",
145 |       "epoch 21, loss 6.866124629974365\n",
146 |       "epoch 22, loss 6.825706481933594\n",
147 |       "epoch 23, loss 6.778513431549072\n",
148 |       "epoch 24, loss 6.724961757659912\n",
149 |       "epoch 25, loss 6.665435314178467\n",
150 |       "epoch 26, loss 6.600317001342773\n",
151 |       "epoch 27, loss 6.5299763679504395\n",
152 |       "epoch 28, loss 6.454763889312744\n",
153 |       "epoch 29, loss 6.375024318695068\n",
154 |       "epoch 30, loss 6.291087627410889\n",
155 |       "epoch 31, loss 6.203271389007568\n",
156 |       "epoch 32, loss 6.11188268661499\n",
157 |       "epoch 33, loss 6.017217636108398\n",
158 |       "epoch 34, loss 5.919558048248291\n",
159 |       "epoch 35, loss 5.819180965423584\n",
160 |       "epoch 36, loss 5.716345310211182\n",
161 |       "epoch 37, loss 5.611307621002197\n",
162 |       "epoch 38, loss 5.504310607910156\n",
163 |       "epoch 39, loss 5.395582675933838\n",
164 |       "epoch 40, loss 5.285351753234863\n",
165 |       "epoch 41, loss 5.173831462860107\n",
166 |       "epoch 42, loss 5.061225414276123\n",
167 |       "epoch 43, loss 4.947733402252197\n",
168 |       "epoch 44, loss 4.833540439605713\n",
169 |       "epoch 45, loss 4.7188286781311035\n",
170 |       "epoch 46, loss 4.603769302368164\n",
171 |       "epoch 47, loss 4.488527774810791\n",
172 |       "epoch 48, loss 4.373256683349609\n",
173 |       "epoch 49, loss 4.25811243057251\n",
174 |       "epoch 50, loss 4.143230438232422\n",
175 |       "epoch 51, loss 4.028749942779541\n",
176 |       "epoch 52, loss 3.914796829223633\n",
177 |       "epoch 53, loss 3.801495313644409\n",
178 |       "epoch 54, loss 3.6889610290527344\n",
179 |       "epoch 55, loss 3.577302932739258\n",
180 |       "epoch 56, loss 3.4666271209716797\n",
181 |       "epoch 57, loss 3.357028007507324\n",
182 |       "epoch 58, loss 3.2486019134521484\n",
183 |       "epoch 59, loss 3.14143443107605\n",
184 |       "epoch 60, loss 3.0356082916259766\n",
185 |       "epoch 61, loss 2.9312002658843994\n",
186 |       "epoch 62, loss 2.828282356262207\n",
187 |       "epoch 63, loss 2.7269256114959717\n",
188 |       "epoch 64, loss 2.6271891593933105\n",
189 |       "epoch 65, loss 2.5291340351104736\n",
190 |       "epoch 66, loss 2.4328126907348633\n",
191 |       "epoch 67, loss 2.3382790088653564\n",
192 |       "epoch 68, loss 2.245577096939087\n",
193 |       "epoch 69, loss 2.154750347137451\n",
194 |       "epoch 70, loss 2.0658395290374756\n",
195 |       "epoch 71, loss 1.9788776636123657\n",
196 |       "epoch 72, loss 1.8938989639282227\n",
197 |       "epoch 73, loss 1.8109320402145386\n",
198 |       "epoch 74, loss 1.730002760887146\n",
199 |       "epoch 75, loss 1.6511331796646118\n",
200 |       "epoch 76, loss 1.5743441581726074\n",
201 |       "epoch 77, loss 1.4996517896652222\n",
202 |       "epoch 78, loss 1.4270719289779663\n",
203 |       "epoch 79, loss 1.356615662574768\n",
204 |       "epoch 80, loss 1.2882922887802124\n",
205 |       "epoch 81, loss 1.2221078872680664\n",
206 |       "epoch 82, loss 1.1580679416656494\n",
207 |       "epoch 83, loss 1.0961750745773315\n",
208 |       "epoch 84, loss 1.0364285707473755\n",
209 |       "epoch 85, loss 0.9788269400596619\n",
210 |       "epoch 86, loss 0.9233669638633728\n",
211 |       "epoch 87, loss 0.8700427412986755\n",
212 |       "epoch 88, loss 0.8188469409942627\n",
213 |       "epoch 89, loss 0.7697703242301941\n",
214 |       "epoch 90, loss 0.7228019833564758\n",
215 |       "epoch 91, loss 0.6779298782348633\n",
216 |       "epoch 92, loss 0.6351407170295715\n",
217 |       "epoch 93, loss 0.5944194197654724\n",
218 |       "epoch 94, loss 0.5557489395141602\n",
219 |       "epoch 95, loss 0.5191113948822021\n",
220 |       "epoch 96, loss 0.48448848724365234\n",
221 |       "epoch 97, loss 0.45185983180999756\n",
222 |       "epoch 98, loss 0.4212043285369873\n",
223 |       "epoch 99, loss 0.39249923825263977\n"
224 |      ]
225 |     }
226 |    ],
227 |    "source": [
228 |     "epochs = 100\n",
229 |     "\n",
230 |     "for epoch in range(epochs):\n",
231 |     "    for i, sample in enumerate(train_loader):\n",
232 |     "\n",
233 |     "        #increase the number of epochs by 1 every time\n",
234 |     "        inputs = Variable(sample['X'].type(torch.FloatTensor))\n",
235 |     "        labels = Variable(sample['Y'].type(torch.FloatTensor))\n",
236 |     "\n",
237 |     "        #clear grads as discussed in prev post\n",
238 |     "        optimiser.zero_grad()\n",
239 |     "        #forward to get predicted values\n",
240 |     "        outputs = model.forward(inputs.view(-1, 1))\n",
241 |     "#         loss = criterion(outputs, labels)\n",
242 |     "#         print(outputs)\n",
243 |     "        loss = criterion(outputs.squeeze(), labels)\n",
244 |     "        loss.backward()# back props\n",
245 |     "        optimiser.step()# update the parameters\n",
246 |     "        print('epoch {}, loss {}'.format(epoch,loss.data[0]))"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 17,
252 |    "metadata": {},
253 |    "outputs": [
254 |     {
255 |      "name": "stdout",
256 |      "output_type": "stream",
257 |      "text": [
258 |       "{'X': tensor([ 3.4000,  3.7000,  3.4000,  3.2000], dtype=torch.float64), 'Y': tensor([ 4.6000,  5.1000,  5.2000,  4.7000], dtype=torch.float64)}\n",
259 |       "{'X': tensor([ 4.4000,  3.5000,  3.2000,  3.8000], dtype=torch.float64), 'Y': tensor([ 5.7000,  5.5000,  4.7000,  5.1000], dtype=torch.float64)}\n",
260 |       "{'X': tensor([ 3.0000,  3.1000,  3.8000,  3.4000], dtype=torch.float64), 'Y': tensor([ 4.8000,  4.6000,  4.6000,  5.0000], dtype=torch.float64)}\n",
261 |       "{'X': tensor([ 3.8000,  3.4000,  4.0000,  3.1000], dtype=torch.float64), 'Y': tensor([ 5.7000,  5.0000,  5.8000,  4.9000], dtype=torch.float64)}\n",
262 |       "{'X': tensor([ 3.5000,  3.5000,  3.5000,  3.2000], dtype=torch.float64), 'Y': tensor([ 5.0000,  5.1000,  5.2000,  5.0000], dtype=torch.float64)}\n",
263 |       "{'X': tensor([ 3.5000,  3.0000,  3.9000,  3.4000], dtype=torch.float64), 'Y': tensor([ 5.1000,  4.9000,  5.4000,  5.4000], dtype=torch.float64)}\n",
264 |       "{'X': tensor([ 3.8000,  3.9000,  3.4000,  3.3000], dtype=torch.float64), 'Y': tensor([ 5.1000,  5.4000,  5.1000,  5.1000], dtype=torch.float64)}\n",
265 |       "{'X': tensor([ 4.2000,  3.3000,  3.0000,  3.7000], dtype=torch.float64), 'Y': tensor([ 5.5000,  5.0000,  5.0000,  5.4000], dtype=torch.float64)}\n",
266 |       "{'X': tensor([ 3.6000,  4.1000,  3.4000,  3.0000], dtype=torch.float64), 'Y': tensor([ 4.6000,  5.2000,  4.8000,  4.3000], dtype=torch.float64)}\n",
267 |       "{'X': tensor([ 3.5000,  3.4000,  3.1000,  3.0000], dtype=torch.float64), 'Y': tensor([ 5.0000,  4.8000,  4.9000,  4.4000], dtype=torch.float64)}\n",
268 |       "{'X': tensor([ 3.7000,  2.3000,  3.1000,  3.2000], dtype=torch.float64), 'Y': tensor([ 5.3000,  4.5000,  4.8000,  4.4000], dtype=torch.float64)}\n",
269 |       "{'X': tensor([ 3.6000,  2.9000,  3.6000,  3.4000], dtype=torch.float64), 'Y': tensor([ 4.9000,  4.4000,  5.0000,  5.4000], dtype=torch.float64)}\n"
270 |      ]
271 |     }
272 |    ],
273 |    "source": [
274 |     " for i, sample in enumerate(train_loader):\n",
275 |     "\n",
276 |     "        #increase the number of epochs by 1 every time\n",
277 |     "    print(sample)"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 12,
283 |    "metadata": {},
284 |    "outputs": [
285 |     {
286 |      "ename": "NameError",
287 |      "evalue": "name 'X_train' is not defined",
288 |      "output_type": "error",
289 |      "traceback": [
290 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
291 |       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
292 |       "\u001b[0;32m<ipython-input-12-6dbc45793471>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpredicted\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mVariable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_correct\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'go'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'from data'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpredicted\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'prediction'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlegend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
293 |       "\u001b[0;31mNameError\u001b[0m: name 'X_train' is not defined"
294 |      ]
295 |     }
296 |    ],
297 |    "source": [
298 |     "predicted =model.forward(Variable(torch.from_numpy(X_train))).data.numpy()\n",
299 |     "\n",
300 |     "plt.plot(x_train, y_correct, 'go', label = 'from data', alpha = .5)\n",
301 |     "plt.plot(x_train, predicted, label = 'prediction', alpha = 0.5)\n",
302 |     "plt.legend()\n",
303 |     "plt.show()\n",
304 |     "print(model.state_dict())"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": null,
310 |    "metadata": {},
311 |    "outputs": [],
312 |    "source": []
313 |   }
314 |  ],
315 |  "metadata": {
316 |   "kernelspec": {
317 |    "display_name": "Python 3",
318 |    "language": "python",
319 |    "name": "python3"
320 |   },
321 |   "language_info": {
322 |    "codemirror_mode": {
323 |     "name": "ipython",
324 |     "version": 3
325 |    },
326 |    "file_extension": ".py",
327 |    "mimetype": "text/x-python",
328 |    "name": "python",
329 |    "nbconvert_exporter": "python",
330 |    "pygments_lexer": "ipython3",
331 |    "version": "3.6.4"
332 |   }
333 |  },
334 |  "nbformat": 4,
335 |  "nbformat_minor": 2
336 | }
337 | 


--------------------------------------------------------------------------------