├── .gitignore ├── 2048_5.png ├── LICENSE ├── NeumannOptimizerNumpy.py ├── README.md ├── __init__.py ├── main.py ├── misc ├── NeumannOptimizerFinal.pdf ├── adam_skeleton.py └── neumann.pdf ├── models ├── 128_3.pkl ├── 2048_3.pkl ├── 2048_5.pkl ├── 256_3.pkl ├── 256_5.pkl ├── 32_5.pkl ├── ImageClassifier.py ├── __init__.py ├── cnn.py ├── linear_regression │ ├── Linear Regression.ipynb │ └── test.py ├── mlp.py ├── modules │ ├── Net.py │ └── __init__.py ├── optimizer │ ├── __init__.py │ ├── neumann.py │ └── stochastic.py └── plot.ipynb ├── slr10.csv └── test.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Jupyter Notebook 7 | .ipynb_checkpoints 8 | 9 | 10 | .vscode/ 11 | data/ 12 | .idea/ 13 | dataset/ 14 | -------------------------------------------------------------------------------- /2048_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/2048_5.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Jay 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NeumannOptimizerNumpy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | from math import exp 4 | import matplotlib.pyplot as plt 5 | 6 | def gradient_descent( func, initial_x, eps=1e-5, maximum_iterations=65536, learning_rate=1e-2 ): 7 | """ 8 | Gradient Descent 9 | func: the function to optimize It is called as "value, gradient = func( x, 1 ) 10 | initial_x: the starting point, should be a float 11 | eps: the maximum allowed error in the resulting stepsize t 12 | maximum_iterations: the maximum allowed number of iterations 13 | linesearch: the linesearch routine 14 | *linesearch_args: the extra arguments of linesearch routine 15 | """ 16 | 17 | if eps <= 0: 18 | raise ValueError("Epsilon must be positive") 19 | x = np.matrix(initial_x) 20 | 21 | # initialization 22 | values = [] 23 | runtimes = [] 24 | xs = [] 25 | start_time = time.time() 26 | iterations = 0 27 | 28 | # gradient updates 29 | while True: 30 | 31 | value, gradient = func( x , 1 ) 32 | value = np.double( value ) 33 | gradient = np.matrix( gradient ) 34 | 35 | # updating the logs 36 | values.append( value ) 37 | runtimes.append( time.time() - start_time ) 38 | xs.append( x.copy() ) 39 | 40 | direction = -gradient 41 | 42 | if np.linalg.norm(direction)= maximum_iterations: 51 | break 52 | return (x, values, runtimes, xs) 53 | 54 | def linear_regression(x, y, w, b, order=0): 55 | output = w*x.T + b 56 | error = np.mean((y-output)**2) 57 | if order == 1: 58 | grad_w = -2*x.T*(y-(w*x.T + b)) 59 | grad_b = -2*(y-(w*x.T + b)) 60 | grad_w = np.mean(grad_w) 61 | grad_b = np.mean(grad_b) 62 | return output, grad_w, grad_b 63 | return output 64 | 65 | def boyd_example_func(x, order=0): 66 | a=np.matrix('1 3') 67 | b=np.matrix('1 -3') 68 | c=np.matrix('-1 0') 69 | x=np.asmatrix(x) 70 | 71 | value = exp(a*x-0.1)+exp(b*x-0.1)+exp(c*x-0.1) 72 | if order==0: 73 | return value 74 | elif order==1: 75 | gradient = a.T*exp(a*x-0.1)+b.T*exp(b*x-0.1)+c.T*exp(c*x-0.1) 76 | return (value, gradient) 77 | elif order==2: 78 | gradient = a.T*exp(a*x-0.1)+b.T*exp(b*x-0.1)+c.T*exp(c*x-0.1) 79 | hessian = a.T*a*exp(a*x-0.1)+b.T*b*exp(b*x-0.1)+c.T*c*exp(c*x-0.1) 80 | return (value, gradient, hessian) 81 | else: 82 | raise ValueError("The argument \"order\" should be 0, 1 or 2") 83 | 84 | def neumann( func, initial_x, learning_rate=1e-2, eps=1e-5, maximum_iterations=65536): 85 | x = np.matrix(initial_x) 86 | # moving_average = x 87 | neumann_iterate = 0 88 | iterate = 0 89 | k_value = 10 90 | values = [] 91 | runtimes = [] 92 | xs = [] 93 | grad_norm = [] 94 | start_time = time.time() 95 | while True: 96 | print(x) 97 | if iterate < 5: 98 | value, grad = func(x, 1) 99 | x = x - learning_rate*grad 100 | iterate += 1 101 | continue 102 | 103 | values.append( value ) 104 | runtimes.append( time.time() - start_time ) 105 | xs.append( x.copy() ) 106 | 107 | eta = 0.5/iterate 108 | mu = iterate/(iterate + 1) 109 | mu = min(max(mu, 0.5),0.9) 110 | 111 | value, grad = func(x, 1) 112 | 113 | grad_norm.append(np.linalg.norm(grad)**2) 114 | 115 | if np.linalg.norm(grad)**2 < eps: 116 | break 117 | 118 | if iterate % k_value == 0: 119 | neumann_iterate = -eta*grad 120 | k_value *= 2 121 | 122 | #Removing crazy function as we're only trying on convex function 123 | 124 | neumann_iterate = mu*neumann_iterate - eta*grad 125 | 126 | x = x + mu*neumann_iterate - eta*grad 127 | # moving_average = 128 | iterate += 1 129 | if iterate >= maximum_iterations: 130 | break 131 | return x,values,runtimes,xs,grad_norm 132 | 133 | 134 | def draw_contour( func, neumann_xs, fig, levels=np.arange(5, 1000, 10), x=np.arange(-5, 5.1, 0.05), y=np.arange(-5, 5.1, 0.05)): 135 | """ 136 | Draws a contour plot of given iterations for a function 137 | func: the contour levels will be drawn based on the values of func 138 | gd_xs: gradient descent iterates 139 | newton_xs: Newton iterates 140 | fig: figure index 141 | levels: levels of the contour plot 142 | x: x coordinates to evaluate func and draw the plot 143 | y: y coordinates to evaluate func and draw the plot 144 | """ 145 | Z = np.zeros((len(x), len(y))) 146 | for i in range(len(x)): 147 | for j in range(len(y)): 148 | Z[i, j] = func( np.matrix([x[i],y[j]]).T , 0 ) 149 | 150 | plt.figure(fig) 151 | plt.contour( x, y, Z.T, levels, colors='0.75') 152 | plt.ion() 153 | plt.show() 154 | 155 | # line_gd, = plt.plot( gd_xs[0][0,0], gd_xs[0][1,0], linewidth=2, color='r', marker='o', label='GD' ) 156 | line_newton, = plt.plot( neumann_xs[0][0,0], neumann_xs[0][1,0], linewidth=2, color='m', marker='o',label='Neumann' ) 157 | 158 | L = plt.legend(handles=[line_newton]) 159 | plt.draw() 160 | time.sleep(1) 161 | 162 | for i in range( 1, len(neumann_xs)): 163 | 164 | # line_gd.set_xdata( np.append( line_gd.get_xdata(), gd_xs[ min(i,len(gd_xs)-1) ][0,0] ) ) 165 | # line_gd.set_ydata( np.append( line_gd.get_ydata(), gd_xs[ min(i,len(gd_xs)-1) ][1,0] ) ) 166 | 167 | line_newton.set_xdata( np.append( line_newton.get_xdata(), neumann_xs[ min(i,len(neumann_xs)-1) ][0,0] ) ) 168 | line_newton.set_ydata( np.append( line_newton.get_ydata(), neumann_xs[ min(i,len(neumann_xs)-1) ][1,0] ) ) 169 | 170 | 171 | # L.get_texts()[0].set_text( " GD, %d iterations" % min(i,len(gd_xs)-1) ) 172 | L.get_texts()[0].set_text( " Neumann, %d iterations" % min(i,len(neumann_xs)-1) ) 173 | 174 | plt.draw() 175 | input("Press Enter to continue...") 176 | 177 | 178 | initial_x = np.matrix('-1.0; -1.0') 179 | 180 | x, values, runtimes, neumann_xs, grad_norm = neumann(boyd_example_func, initial_x) 181 | x_gd, gd_values, runtimes_gd, gradient_xs = gradient_descent(boyd_example_func, initial_x) 182 | plt.figure(1) 183 | line_gd, = plt.semilogy([x for x in values], linewidth=2, color='r', marker='o', label='Neumann') 184 | line_neumann, = plt.semilogy([x for x in gd_values], linewidth=2, color='b', marker='o', label='Neumann') 185 | plt.figure(2) 186 | plt.semilogy([x for x in grad_norm], linewidth=2, color='b', marker='o', label='Neumann') 187 | draw_contour( boyd_example_func, neumann_xs, 3, levels=np.arange(0, 15, 1), x=np.arange(-2, 2, 0.1), y=np.arange(-2, 2, 0.1)) 188 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # neumann-optimizer -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0,"models/optimizer") 3 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | # import torch.optim as optim 7 | import models.optimizer as optim 8 | from torchvision import datasets, transforms 9 | 10 | # Training settings 11 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example') 12 | parser.add_argument('--batch-size', type=int, default=64, metavar='N', 13 | help='input batch size for training (default: 64)') 14 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', 15 | help='input batch size for testing (default: 1000)') 16 | parser.add_argument('--epochs', type=int, default=10, metavar='N', 17 | help='number of epochs to train (default: 10)') 18 | parser.add_argument('--lr', type=float, default=1e-3, metavar='LR', 19 | help='learning rate (default: 0.01)') 20 | parser.add_argument('--momentum', type=float, default=0.5, metavar='M', 21 | help='SGD momentum (default: 0.5)') 22 | parser.add_argument('--no-cuda', action='store_true', default=False, 23 | help='disables CUDA training') 24 | parser.add_argument('--seed', type=int, default=1, metavar='S', 25 | help='random seed (default: 1)') 26 | parser.add_argument('--log-interval', type=int, default=10, metavar='N', 27 | help='how many batches to wait before logging training status') 28 | args = parser.parse_args() 29 | use_cuda = not args.no_cuda and torch.cuda.is_available() 30 | 31 | torch.manual_seed(args.seed) 32 | 33 | device = torch.device("cuda" if use_cuda else "cpu") 34 | 35 | 36 | kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} 37 | train_loader = torch.utils.data.DataLoader( 38 | datasets.MNIST('../data', train=True, download=True, 39 | transform=transforms.Compose([ 40 | transforms.ToTensor(), 41 | transforms.Normalize((0.1307,), (0.3081,)) 42 | ])), 43 | batch_size=args.batch_size, shuffle=True, **kwargs) 44 | test_loader = torch.utils.data.DataLoader( 45 | datasets.MNIST('../data', train=False, transform=transforms.Compose([ 46 | transforms.ToTensor(), 47 | transforms.Normalize((0.1307,), (0.3081,)) 48 | ])), 49 | batch_size=args.test_batch_size, shuffle=True, **kwargs) 50 | 51 | 52 | class Net(nn.Module): 53 | def __init__(self): 54 | super(Net, self).__init__() 55 | self.conv1 = nn.Conv2d(1, 10, kernel_size=5) 56 | self.conv2 = nn.Conv2d(10, 20, kernel_size=5) 57 | self.conv2_drop = nn.Dropout2d() 58 | self.fc1 = nn.Linear(320, 50) 59 | self.fc2 = nn.Linear(50, 10) 60 | 61 | def forward(self, x): 62 | x = F.relu(F.max_pool2d(self.conv1(x), 2)) 63 | x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) 64 | x = x.view(-1, 320) 65 | x = F.relu(self.fc1(x)) 66 | x = F.dropout(x, training=self.training) 67 | x = self.fc2(x) 68 | return F.log_softmax(x, dim=1) 69 | 70 | model = Net().to(device) 71 | 72 | 73 | alpha = 1e-3 74 | beta = 1e-9 75 | lr = 0.001 76 | optimizer = optim.Neumann(list(model.parameters()), lr=lr, alpha=alpha, beta=beta) 77 | 78 | def train(epoch): 79 | model.train() 80 | for batch_idx, (data, target) in enumerate(train_loader): 81 | data, target = data.to(device), target.to(device) 82 | optimizer.zero_grad() 83 | output = model(data) 84 | loss = F.nll_loss(output, target) 85 | loss.backward() 86 | optimizer.step() 87 | if batch_idx % args.log_interval == 0: 88 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 89 | epoch, batch_idx * len(data), len(train_loader.dataset), 90 | 100. * batch_idx / len(train_loader), loss.item())) 91 | 92 | def test(): 93 | model.eval() 94 | test_loss = 0 95 | correct = 0 96 | with torch.no_grad(): 97 | for data, target in test_loader: 98 | data, target = data.to(device), target.to(device) 99 | output = model(data) 100 | test_loss += F.nll_loss(output, target, size_average=False).item() # sum up batch loss 101 | pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability 102 | correct += pred.eq(target.view_as(pred)).sum().item() 103 | 104 | test_loss /= len(test_loader.dataset) 105 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( 106 | test_loss, correct, len(test_loader.dataset), 107 | 100. * correct / len(test_loader.dataset))) 108 | 109 | 110 | for epoch in range(1, args.epochs + 1): 111 | train(epoch) 112 | test() 113 | -------------------------------------------------------------------------------- /misc/NeumannOptimizerFinal.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/misc/NeumannOptimizerFinal.pdf -------------------------------------------------------------------------------- /misc/adam_skeleton.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from .optimizer import Optimizer 4 | 5 | 6 | class Adam(Optimizer): 7 | """Implements Adam algorithm. 8 | It has been proposed in `Adam: A Method for Stochastic Optimization`_. 9 | Arguments: 10 | params (iterable): iterable of parameters to optimize or dicts defining 11 | parameter groups 12 | lr (float, optional): learning rate (default: 1e-3) 13 | betas (Tuple[float, float], optional): coefficients used for computing 14 | running averages of gradient and its square (default: (0.9, 0.999)) 15 | eps (float, optional): term added to the denominator to improve 16 | numerical stability (default: 1e-8) 17 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 18 | amsgrad (boolean, optional): whether to use the AMSGrad variant of this 19 | algorithm from the paper `On the Convergence of Adam and Beyond`_ 20 | .. _Adam\: A Method for Stochastic Optimization: 21 | https://arxiv.org/abs/1412.6980 22 | .. _On the Convergence of Adam and Beyond: 23 | https://openreview.net/forum?id=ryQu7f-RZ 24 | """ 25 | 26 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, 27 | weight_decay=0, amsgrad=False): 28 | if not 0.0 <= lr: 29 | raise ValueError("Invalid learning rate: {}".format(lr)) 30 | if not 0.0 <= eps: 31 | raise ValueError("Invalid epsilon value: {}".format(eps)) 32 | if not 0.0 <= betas[0] < 1.0: 33 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 34 | if not 0.0 <= betas[1] < 1.0: 35 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 36 | defaults = dict(lr=lr, betas=betas, eps=eps, 37 | weight_decay=weight_decay, amsgrad=amsgrad) 38 | super(Adam, self).__init__(params, defaults) 39 | 40 | def __setstate__(self, state): 41 | super(Adam, self).__setstate__(state) 42 | for group in self.param_groups: 43 | group.setdefault('amsgrad', False) 44 | 45 | def step(self, closure=None): 46 | """Performs a single optimization step. 47 | Arguments: 48 | closure (callable, optional): A closure that reevaluates the model 49 | and returns the loss. 50 | """ 51 | loss = None 52 | if closure is not None: 53 | loss = closure() 54 | 55 | for group in self.param_groups: 56 | for p in group['params']: 57 | if p.grad is None: 58 | continue 59 | grad = p.grad.data 60 | if grad.is_sparse: 61 | raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead') 62 | amsgrad = group['amsgrad'] 63 | 64 | state = self.state[p] 65 | 66 | # State initialization 67 | if len(state) == 0: 68 | state['step'] = 0 69 | # Exponential moving average of gradient values 70 | state['exp_avg'] = torch.zeros_like(p.data) 71 | # Exponential moving average of squared gradient values 72 | state['exp_avg_sq'] = torch.zeros_like(p.data) 73 | if amsgrad: 74 | # Maintains max of all exp. moving avg. of sq. grad. values 75 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 76 | 77 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 78 | if amsgrad: 79 | max_exp_avg_sq = state['max_exp_avg_sq'] 80 | beta1, beta2 = group['betas'] 81 | 82 | state['step'] += 1 83 | 84 | if group['weight_decay'] != 0: 85 | grad = grad.add(group['weight_decay'], p.data) 86 | 87 | # Decay the first and second moment running average coefficient 88 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 89 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 90 | if amsgrad: 91 | # Maintains the maximum of all 2nd moment running avg. till now 92 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 93 | # Use the max. for normalizing running avg. of gradient 94 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 95 | else: 96 | denom = exp_avg_sq.sqrt().add_(group['eps']) 97 | 98 | bias_correction1 = 1 - beta1 ** state['step'] 99 | bias_correction2 = 1 - beta2 ** state['step'] 100 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 101 | 102 | p.data.addcdiv_(-step_size, exp_avg, denom) 103 | 104 | return loss -------------------------------------------------------------------------------- /misc/neumann.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/misc/neumann.pdf -------------------------------------------------------------------------------- /models/128_3.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/models/128_3.pkl -------------------------------------------------------------------------------- /models/2048_3.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/models/2048_3.pkl -------------------------------------------------------------------------------- /models/2048_5.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/models/2048_5.pkl -------------------------------------------------------------------------------- /models/256_3.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/models/256_3.pkl -------------------------------------------------------------------------------- /models/256_5.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/models/256_5.pkl -------------------------------------------------------------------------------- /models/32_5.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jayybhatt/neumann-optimizer/c931631346a1097d198983684d7c68d91ae82d39/models/32_5.pkl -------------------------------------------------------------------------------- /models/ImageClassifier.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torchvision.transforms as transforms 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | from modules.Net import Net 7 | #from mlp import MLP 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from torch.autograd import Variable 11 | from optimizer.neumann import Neumann 12 | 13 | # Random seed 14 | torch.manual_seed(1) 15 | np.random.seed(1) 16 | 17 | # Batch size 18 | batch_size = 4 19 | 20 | # Transformation to tensor and normalization 21 | transform = transforms.Compose( 22 | [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] 23 | ) 24 | 25 | # Download the training set 26 | trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform) 27 | 28 | # Training set loader 29 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=False, num_workers=2) 30 | 31 | # Test set 32 | testset = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform) 33 | 34 | # Test set loader 35 | testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2) 36 | 37 | 38 | # Function to show an image 39 | def imshow(img): 40 | img = img / 2 + 0.5 41 | npimg = img.numpy() 42 | plt.imshow(np.transpose(npimg, (1, 2, 0))) 43 | plt.show() 44 | # end imshow 45 | 46 | 47 | # Classes 48 | classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot') 49 | 50 | # Dataset as iterator 51 | dataiter = iter(trainloader) 52 | 53 | # Get next batch 54 | images, labels = dataiter.next() 55 | 56 | # Show images 57 | n_batches = len(dataiter) 58 | print(u"First 4 labels {}".format([classes[labels[j]] for j in range(4)])) 59 | # imshow(torchvision.utils.make_grid(images)) 60 | 61 | # Our neural net 62 | net = Net() 63 | 64 | # uncomment below line if running on GPU 65 | #net.cuda() 66 | 67 | # Objective function is cross-entropy 68 | criterion = nn.CrossEntropyLoss() 69 | 70 | # Learning rate 71 | learning_rate = 0.001 72 | 73 | # Stochastic Gradient Descent 74 | optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9) 75 | #optimizer = Neumann(list(net.parameters()), lr=learning_rate, momentum = 0.9) 76 | 77 | # Nb iterations 78 | n_iterations = 30 79 | 80 | # List of training and test accuracies 81 | train_accuracies = np.zeros(n_iterations) 82 | test_accuracies = np.zeros(n_iterations) 83 | 84 | # Training ! 85 | for epoch in range(n_iterations): 86 | # Average loss during training 87 | average_loss = 0.0 88 | 89 | # Data to compute accuracy 90 | total = 0 91 | success = 0 92 | 93 | # Iterate over batches 94 | for i, data in enumerate(trainloader, 0): 95 | # Get the inputs and labels 96 | inputs, labels = data 97 | 98 | # To variable 99 | #inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda()) 100 | inputs, labels = Variable(inputs), Variable(labels) 101 | 102 | # Put grad to zero 103 | optimizer.zero_grad() 104 | 105 | # Forward 106 | outputs = net(inputs) 107 | 108 | loss = criterion(outputs, labels) 109 | 110 | # Backward 111 | loss.backward() 112 | 113 | # Optimize 114 | optimizer.step() 115 | 116 | # Add to loss 117 | average_loss += loss.data[0] 118 | 119 | # Take the max as predicted 120 | _, predicted = torch.max(outputs.data, 1) 121 | 122 | # Add to total 123 | total += labels.size(0) 124 | 125 | # Add correctly classified images 126 | success += (predicted == labels.data).sum() 127 | # end for 128 | train_accuracy = 100.0 * success / total 129 | 130 | # Test model on test set 131 | success = 0 132 | total = 0 133 | for (inputs, labels) in testloader: 134 | # To variable 135 | #inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda()) 136 | inputs, labels = Variable(inputs), Variable(labels) 137 | # Neural net's output 138 | outputs = net(inputs) 139 | 140 | # Take the max is predicted 141 | _, predicted = torch.max(outputs.data, 1) 142 | 143 | # Add to total 144 | total += labels.size(0) 145 | 146 | # Add correctly classified images 147 | success += (predicted == labels.data).sum() 148 | # end for 149 | 150 | # Print average loss 151 | print(u"Epoch {}, average loss {}, train accuracy {}, test accuracy {}".format( 152 | epoch, average_loss / n_batches, 153 | train_accuracy, 154 | 100.0 * success / total 155 | ) 156 | ) 157 | 158 | # Save the model 159 | train_accuracies[epoch] = train_accuracy 160 | test_accuracies[epoch] = 100.0 * success / total 161 | # end for 162 | 163 | plt.plot(np.arange(1, n_iterations+1), train_accuracies) 164 | plt.plot(np.arange(1, n_iterations+1), test_accuracies) 165 | plt.show() 166 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | import optimizer -------------------------------------------------------------------------------- /models/cnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | from torch.autograd import Variable 5 | from torch.utils.data import DataLoader,sampler,Dataset 6 | import torchvision.datasets as dset 7 | import torchvision.transforms as T 8 | from PIL import Image 9 | import os 10 | import numpy as np 11 | import scipy.io 12 | 13 | import matplotlib.pyplot as plt 14 | from torch.optim.optimizer import Optimizer 15 | from torch.optim.sgd import SGD 16 | from optimizer import Neumann 17 | 18 | import _pickle as pkl 19 | 20 | import pdb; pdb.set_trace() 21 | 22 | 23 | label_mat=scipy.io.loadmat('../data/q3_2_data.mat') 24 | label_train=label_mat['trLb'] 25 | print(len(label_train)) 26 | label_val=label_mat['valLb'] 27 | print(len(label_val)) 28 | 29 | 30 | class ActionDataset(Dataset): 31 | """Action dataset.""" 32 | 33 | def __init__(self, root_dir,labels=[], transform=None): 34 | """ 35 | Args: 36 | root_dir (string): Directory with all the images. 37 | labels(list): labels if images. 38 | transform (callable, optional): Optional transform to be applied on a sample. 39 | """ 40 | self.root_dir = root_dir 41 | self.transform = transform 42 | self.length=len(os.listdir(self.root_dir)) 43 | self.labels=labels 44 | def __len__(self): 45 | return self.length*3 46 | 47 | def __getitem__(self, idx): 48 | 49 | folder=idx//3+1 50 | imidx= idx%3+1 51 | folder=format(folder,'05d') 52 | imgname=str(imidx)+'.jpg' 53 | img_path = os.path.join(self.root_dir, 54 | folder,imgname) 55 | image = Image.open(img_path) 56 | if len(self.labels)!=0: 57 | Label=self.labels[idx//3][0]-1 58 | if self.transform: 59 | image = self.transform(image) 60 | if len(self.labels)!=0: 61 | sample={'image':image,'img_path':img_path,'Label':Label} 62 | else: 63 | sample={'image':image,'img_path':img_path} 64 | return sample 65 | 66 | 67 | 68 | dtype = torch.FloatTensor # the CPU datatype 69 | # Constant to control how frequently we print train loss 70 | print_every = 400 71 | # This is a little utility that we'll use to reset the model 72 | # if we want to re-initialize all our parameters 73 | def reset(m): 74 | if hasattr(m, 'reset_parameters'): 75 | m.reset_parameters() 76 | 77 | class Flatten(nn.Module): 78 | def forward(self, x): 79 | N, C, H, W = x.size() # read in N, C, H, W 80 | return x.view(N, -1) # "flatten" the C * H * W values into a single vector per image 81 | gpu_dtype = torch.cuda.FloatTensor 82 | 83 | 84 | 85 | 86 | def train(model, loss_fn, optimizer, dataloader, num_epochs = 1): 87 | losses = [] 88 | for epoch in range(num_epochs): 89 | print('Starting epoch %d / %d' % (epoch + 1, num_epochs)) 90 | model.train() 91 | for t, sample in enumerate(dataloader): 92 | x_var = Variable(sample['image'].cuda()) 93 | y_var = Variable(sample['Label'].cuda().long()) 94 | 95 | scores = model(x_var) 96 | 97 | loss = loss_fn(scores, y_var) 98 | if (t + 1) % 1 == 0: 99 | print('t = %d, loss = %.4f' % (t + 1, loss.data[0])) 100 | pass 101 | 102 | losses.append(loss.data[0]) 103 | 104 | optimizer.zero_grad() 105 | loss.backward() 106 | optimizer.step() 107 | 108 | return losses 109 | 110 | def check_accuracy(model, loader): 111 | ''' 112 | if loader.dataset.train: 113 | print('Checking accuracy on validation set') 114 | else: 115 | print('Checking accuracy on test set') 116 | ''' 117 | num_correct = 0 118 | num_samples = 0 119 | model.eval() # Put the model in test mode (the opposite of model.train(), essentially) 120 | for t, sample in enumerate(loader): 121 | x_var = Variable(sample['image'].cuda()) 122 | y_var = sample['Label'].cuda() 123 | y_var=y_var.cpu() 124 | scores = model(x_var) 125 | _, preds = scores.data.cpu().max(1) 126 | #print(preds) 127 | #print(y_var) 128 | num_correct += (preds.numpy() == y_var.numpy()).sum() 129 | num_samples += preds.size(0) 130 | acc = float(num_correct) / num_samples 131 | print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc)) 132 | 133 | 134 | 135 | 136 | augment_transforms = T.Compose([T.RandomHorizontalFlip(),T.RandomVerticalFlip(),T.RandomRotation(30),T.ToTensor()]) 137 | batch_size = 256 138 | print_every = 50 139 | image_dataset_train=ActionDataset(root_dir='../data/trainClips/',labels=label_train,transform=augment_transforms) 140 | 141 | image_dataloader_train = DataLoader(image_dataset_train, batch_size=batch_size, 142 | shuffle=True, num_workers=4) 143 | image_dataset_val=ActionDataset(root_dir='../data/valClips/',labels=label_val,transform=augment_transforms) 144 | 145 | image_dataloader_val = DataLoader(image_dataset_val, batch_size=batch_size, 146 | shuffle=False, num_workers=4) 147 | image_dataset_test=ActionDataset(root_dir='../data/testClips/',labels=[],transform=augment_transforms) 148 | 149 | image_dataloader_test = DataLoader(image_dataset_test, batch_size=batch_size, 150 | shuffle=False, num_workers=4) 151 | 152 | 153 | 154 | ###########3rd To Do (16 points, must submit the results to Kaggle) ############## 155 | # Train your model here, and make sure the output of this cell is the accuracy of your best model on the 156 | # train, val, and test sets. Here's some code to get you started. The output of this cell should be the training 157 | # and validation accuracy on your best model (measured by validation accuracy). 158 | 159 | model = nn.Sequential( 160 | nn.Conv2d(3,32,kernel_size=5,stride=1), #8*58*58 161 | nn.BatchNorm2d(32), 162 | nn.LeakyReLU(inplace=True), 163 | nn.MaxPool2d(kernel_size=2,stride=2),#8*29*29 164 | 165 | nn.Conv2d(32,128,kernel_size=3,stride=2),#16*23*23, 15 166 | nn.BatchNorm2d(128), 167 | # nn.LeakyReLU(inplace=True), 168 | # nn.Dropout2d(p=0.4), 169 | # nn.MaxPool2d(kernel_size=2,stride=2),#16*11*11 170 | 171 | # nn.Conv2d(128,256,kernel_size=3,stride=1), 172 | # nn.BatchNorm2d(256), 173 | # nn.LeakyReLU(inplace=True), 174 | # nn.MaxPool2d(kernel_size=2,stride=2), 175 | Flatten(), 176 | nn.Linear(25088,10) 177 | ) 178 | 179 | model.cuda() 180 | model.apply(reset) 181 | loss_fn = nn.CrossEntropyLoss().cuda().type(gpu_dtype) 182 | # loss_fn = nn.CrossEntropyLoss() 183 | beta = 1e-9 184 | alpha = 1e-3 185 | optimizer = Neumann(list(model.parameters()), lr=1e-3, alpha=alpha, beta=beta, sgd_steps=10) 186 | 187 | 188 | 189 | num_epochs=5 190 | # for i in range(1): 191 | model.train() 192 | losses = train(model, loss_fn, optimizer,image_dataloader_train, num_epochs=num_epochs) 193 | 194 | model.eval() 195 | check_accuracy(model,image_dataloader_train) 196 | check_accuracy(model, image_dataloader_val) 197 | 198 | filename = "./"+str(batch_size)+"_"+str(num_epochs)+".pkl" 199 | 200 | with open(filename, 'wb') as f: 201 | pkl.dump(losses, f) 202 | 203 | plt.figure(figsize=(12, 8)) 204 | plt.title("Neumann Opt on Action Detection", fontsize=17) 205 | plt.xlabel("Iteration", fontsize=15) 206 | plt.ylabel("Loss", fontsize=15) 207 | plt.plot(np.arange(len(losses)), losses) 208 | plt.show() 209 | 210 | 211 | -------------------------------------------------------------------------------- /models/linear_regression/Linear Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Linear Regression\n", 8 | "\n", 9 | "In this tutorial, we'll try the classic Linear Regression Algorithm\n", 10 | "\n", 11 | "![Linear Regression](img/lr.jpg)\n", 12 | "\n", 13 | "Linear Regression involves creating a best fit linear line such the distance between the y' points on line and real values is minimum (the summation of the distance is known as our loss)\n" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "Import Libraries" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import torch\n", 30 | "import torch.nn as nn\n", 31 | "from torch.autograd import Variable\n", 32 | "import numpy as np\n", 33 | "import matplotlib.pyplot as plt\n", 34 | "# from torch.optim import Neumann" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "import math\n", 44 | "import torch\n", 45 | "from torch.optim.optimizer import Optimizer\n", 46 | "from torch.optim.sgd import SGD\n", 47 | "\n", 48 | "class Neumann(Optimizer):\n", 49 | " \"\"\"\n", 50 | " Documentation about the algorithm\n", 51 | " \"\"\"\n", 52 | "\n", 53 | " def __init__(self, params , lr=1e-3,eps = 1e-8, alpha = 1e-7, beta = 1e-5, gamma = 0.9, momentum = 0.5, sgd_steps = 5, K = 10 ):\n", 54 | " \n", 55 | " if not 0.0 <= lr:\n", 56 | " raise ValueError(\"Invalid learning rate: {}\".format(lr))\n", 57 | " if not 0.0 <= eps:\n", 58 | " raise ValueError(\"Invalid epsilon value: {}\".format(eps))\n", 59 | " if not 0.9 >= momentum:\n", 60 | " raise ValueError(\"Invalid momentum value: {}\".format(eps))\n", 61 | " \n", 62 | "\n", 63 | " self.iter = 0\n", 64 | " self.sgd = SGD(params, lr=lr, momentum=0.9)\n", 65 | "\n", 66 | " num_variables = 2#calculate here\n", 67 | " defaults = dict(lr=lr, eps=eps, alpha=alpha,\n", 68 | " beta=beta*num_variables, gamma=gamma,\n", 69 | " sgd_steps=sgd_steps, momentum=momentum, K=K\n", 70 | " )\n", 71 | "\n", 72 | " super(Neumann, self).__init__(params, defaults)\n", 73 | "\n", 74 | "\n", 75 | " def step(self, closure=None):\n", 76 | " \"\"\"\n", 77 | " Performs a single optimization step.\n", 78 | " \n", 79 | " Arguments:\n", 80 | " closure (callable, optional): A closure that reevaluates the model\n", 81 | " and returns the loss.\n", 82 | " \"\"\"\n", 83 | " import ipdb; ipdb.set_trace()\n", 84 | " self.iter += 1\n", 85 | "\n", 86 | "\n", 87 | " loss = None\n", 88 | " if closure is not None: #checkout what's the deal with this. present in multiple pytorch optimizers\n", 89 | " loss = closure()\n", 90 | "\n", 91 | " for group in self.param_groups:\n", 92 | "\n", 93 | " sgd_steps = group['sgd_steps']\n", 94 | "\n", 95 | " if self.iter <= sgd_steps:\n", 96 | " self.sgd.step()\n", 97 | " return\n", 98 | "\n", 99 | " momentum = group['momentum']\n", 100 | " \n", 101 | " \n", 102 | " for p in group['params']:\n", 103 | " if p.grad is None:\n", 104 | " continue\n", 105 | " grad = p.grad.data\n", 106 | "\n", 107 | " state = self.state[p]\n", 108 | "\n", 109 | " if len(state) == 0:\n", 110 | " state['step'] = 0\n", 111 | " state['m'] = torch.zeros_like(p.data).float()\n", 112 | " state['d'] = torch.zeros_like(p.data).float()\n", 113 | " state['moving_avg'] = p.data\n", 114 | "\n", 115 | " state['step'] += 1\n", 116 | "\n", 117 | " alpha = group['alpha']\n", 118 | " beta = group['beta']\n", 119 | " gamma = group['gamma']\n", 120 | " K = group['K']\n", 121 | " momentum = group['momentum']\n", 122 | " mu = momentum*(1 - (1/(1+self.iter)))\n", 123 | " eta = group['lr']/self.iter ## update with time\n", 124 | "\n", 125 | " ## Reset neumann iterate \n", 126 | " if self.iter%K == 1:\n", 127 | " state['m'] = grad.mul(-eta)\n", 128 | "\n", 129 | " ## Compute update d_t\n", 130 | " diff = p.data.sub(state['moving_avg'])\n", 131 | " diff_norm = (p.data.sub(state['moving_avg'])).norm()\n", 132 | " state['d'] = grad.add( (( (diff_norm.pow(2)).mul(alpha) ).sub( (diff_norm.pow(-2)).mul(beta) )).mul( diff.div(diff_norm)) )\n", 133 | "\n", 134 | " ## Update Neumann iterate\n", 135 | " state['m'] = (state['m'].mul_(mu)).sub_( state['d'].mul(eta))\n", 136 | "\n", 137 | " ## Update Weights\n", 138 | " p.data.add_((state['m'].mul(mu)).sub( state['d'].mul(eta)))\n", 139 | "\n", 140 | " ## Update Moving Average\n", 141 | " state['moving_avg'] = p.data.add( (state['moving_avg'].sub(p.data)).mul(gamma) )\n", 142 | "\n", 143 | "\n", 144 | "\n", 145 | " \n", 146 | " return loss" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "Initializing Seed for consistent results everytime" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 3, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "np.random.seed(42)\n", 163 | "pts = 50" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "Creating a Dataset of 50 points" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 4, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "x_vals = np.random.rand(50)\n", 180 | "x_train = np.asarray(x_vals,dtype=np.float32).reshape(-1,1)\n", 181 | "m = 1\n", 182 | "alpha = np.random.rand(1)\n", 183 | "beta = np.random.rand(1)\n", 184 | "y_correct = np.asarray([2*i+m for i in x_vals], dtype=np.float32).reshape(-1,1)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "### PyTorch Models\n", 192 | "\n", 193 | "1. Create a Class\n", 194 | "2. Declare your Forward Pass\n", 195 | "3. Tune the HyperParameters" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 5, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "class LinearRegressionModel(nn.Module):\n", 205 | "\n", 206 | " def __init__(self, input_dim, output_dim):\n", 207 | "\n", 208 | " super(LinearRegressionModel, self).__init__() \n", 209 | " # Calling Super Class's constructor\n", 210 | " self.linear = nn.Linear(input_dim, output_dim)\n", 211 | " # nn.linear is defined in nn.Module\n", 212 | "\n", 213 | " def forward(self, x):\n", 214 | " # Here the forward pass is simply a linear function\n", 215 | "\n", 216 | " out = self.linear(x)\n", 217 | " return out\n", 218 | "\n", 219 | "input_dim = 1\n", 220 | "output_dim = 1" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "### Steps\n", 228 | "1. Create instance of model\n", 229 | "2. Select Loss Criterion\n", 230 | "3. Choose Hyper Parameters" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 6, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "model = LinearRegressionModel(input_dim,output_dim)# create our model just as we do in Scikit-Learn / C / C++//\n", 240 | "\n", 241 | "criterion = nn.MSELoss()# Mean Squared Loss\n", 242 | "l_rate = 0.01\n", 243 | "optimiser = Neumann(list(model.parameters()), lr = l_rate) #Stochastic Gradient Descent\n", 244 | "\n", 245 | "epochs = 10" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "### Train the Model" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [ 260 | { 261 | "name": "stdout", 262 | "output_type": "stream", 263 | "text": [ 264 | "--Return--\n", 265 | "None\n", 266 | "> \u001b[0;32m\u001b[0m(1)\u001b[0;36m\u001b[0;34m()\u001b[0m\n", 267 | "\u001b[0;32m----> 1 \u001b[0;31m\u001b[0;32mimport\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 268 | "\u001b[0m\u001b[0;32m 2 \u001b[0;31m\u001b[0;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepochs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 269 | "\u001b[0m\u001b[0;32m 3 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", 270 | "\u001b[0m\n", 271 | "ipdb> n\n", 272 | "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2913)\u001b[0;36mrun_code\u001b[0;34m()\u001b[0m\n", 273 | "\u001b[0;32m 2912 \u001b[0;31m \u001b[0;31m# Reset our crash handler in place\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 274 | "\u001b[0m\u001b[0;32m-> 2913 \u001b[0;31m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexcepthook\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mold_excepthook\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 275 | "\u001b[0m\u001b[0;32m 2914 \u001b[0;31m \u001b[0;32mexcept\u001b[0m \u001b[0mSystemExit\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 276 | "\u001b[0m\n", 277 | "ipdb> \n", 278 | "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2929)\u001b[0;36mrun_code\u001b[0;34m()\u001b[0m\n", 279 | "\u001b[0;32m 2928 \u001b[0;31m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 280 | "\u001b[0m\u001b[0;32m-> 2929 \u001b[0;31m \u001b[0moutflag\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 281 | "\u001b[0m\u001b[0;32m 2930 \u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0moutflag\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 282 | "\u001b[0m\n", 283 | "ipdb> \n", 284 | "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2930)\u001b[0;36mrun_code\u001b[0;34m()\u001b[0m\n", 285 | "\u001b[0;32m 2929 \u001b[0;31m \u001b[0moutflag\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 286 | "\u001b[0m\u001b[0;32m-> 2930 \u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0moutflag\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 287 | "\u001b[0m\u001b[0;32m 2931 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", 288 | "\u001b[0m\n", 289 | "ipdb> \n", 290 | "--Return--\n", 291 | "False\n", 292 | "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2930)\u001b[0;36mrun_code\u001b[0;34m()\u001b[0m\n", 293 | "\u001b[0;32m 2929 \u001b[0;31m \u001b[0moutflag\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 294 | "\u001b[0m\u001b[0;32m-> 2930 \u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0moutflag\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 295 | "\u001b[0m\u001b[0;32m 2931 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", 296 | "\u001b[0m\n", 297 | "ipdb> \n", 298 | "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2847)\u001b[0;36mrun_ast_nodes\u001b[0;34m()\u001b[0m\n", 299 | "\u001b[0;32m 2846 \u001b[0;31m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 300 | "\u001b[0m\u001b[0;32m-> 2847 \u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mto_run_exec\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 301 | "\u001b[0m\u001b[0;32m 2848 \u001b[0;31m \u001b[0mmod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mast\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mModule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 302 | "\u001b[0m\n", 303 | "ipdb> \n", 304 | "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2848)\u001b[0;36mrun_ast_nodes\u001b[0;34m()\u001b[0m\n", 305 | "\u001b[0;32m 2847 \u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnode\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mto_run_exec\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 306 | "\u001b[0m\u001b[0;32m-> 2848 \u001b[0;31m \u001b[0mmod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mast\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mModule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 307 | "\u001b[0m\u001b[0;32m 2849 \u001b[0;31m \u001b[0mcode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompiler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"exec\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 308 | "\u001b[0m\n", 309 | "ipdb> \n", 310 | "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2849)\u001b[0;36mrun_ast_nodes\u001b[0;34m()\u001b[0m\n", 311 | "\u001b[0;32m 2848 \u001b[0;31m \u001b[0mmod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mast\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mModule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 312 | "\u001b[0m\u001b[0;32m-> 2849 \u001b[0;31m \u001b[0mcode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompiler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"exec\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 313 | "\u001b[0m\u001b[0;32m 2850 \u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_code\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 314 | "\u001b[0m\n", 315 | "ipdb> \n", 316 | "> \u001b[0;32m/home/faizaan09/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py\u001b[0m(2850)\u001b[0;36mrun_ast_nodes\u001b[0;34m()\u001b[0m\n", 317 | "\u001b[0;32m 2849 \u001b[0;31m \u001b[0mcode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompiler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"exec\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 318 | "\u001b[0m\u001b[0;32m-> 2850 \u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_code\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 319 | "\u001b[0m\u001b[0;32m 2851 \u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 320 | "\u001b[0m\n", 321 | "ipdb> \n", 322 | "> \u001b[0;32m\u001b[0m(42)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n", 323 | "\u001b[0;32m 41 \u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mipdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 324 | "\u001b[0m\u001b[0;32m---> 42 \u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miter\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 325 | "\u001b[0m\u001b[0;32m 43 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", 326 | "\u001b[0m\n", 327 | "ipdb> \n", 328 | "ipdb> \n", 329 | "ipdb> \n", 330 | "ipdb> n\n", 331 | "> \u001b[0;32m\u001b[0m(45)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n", 332 | "\u001b[0;32m 44 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", 333 | "\u001b[0m\u001b[0;32m---> 45 \u001b[0;31m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 334 | "\u001b[0m\u001b[0;32m 46 \u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mclosure\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m#checkout what's the deal with this. present in multiple pytorch optimizers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 335 | "\u001b[0m\n", 336 | "ipdb> n\n", 337 | "> \u001b[0;32m\u001b[0m(46)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n", 338 | "\u001b[0;32m 45 \u001b[0;31m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 339 | "\u001b[0m\u001b[0;32m---> 46 \u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mclosure\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m#checkout what's the deal with this. present in multiple pytorch optimizers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 340 | "\u001b[0m\u001b[0;32m 47 \u001b[0;31m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mclosure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 341 | "\u001b[0m\n", 342 | "ipdb> \n", 343 | "> \u001b[0;32m\u001b[0m(49)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n", 344 | "\u001b[0;32m 48 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", 345 | "\u001b[0m\u001b[0;32m---> 49 \u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mgroup\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparam_groups\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 346 | "\u001b[0m\u001b[0;32m 50 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", 347 | "\u001b[0m\n", 348 | "ipdb> \n", 349 | "> \u001b[0;32m\u001b[0m(51)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n", 350 | "\u001b[0;32m 50 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", 351 | "\u001b[0m\u001b[0;32m---> 51 \u001b[0;31m \u001b[0msgd_steps\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgroup\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sgd_steps'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 352 | "\u001b[0m\u001b[0;32m 52 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", 353 | "\u001b[0m\n", 354 | "ipdb> \n", 355 | "> \u001b[0;32m\u001b[0m(53)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n", 356 | "\u001b[0;32m 52 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", 357 | "\u001b[0m\u001b[0;32m---> 53 \u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miter\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0msgd_steps\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 358 | "\u001b[0m\u001b[0;32m 54 \u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msgd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 359 | "\u001b[0m\n", 360 | "ipdb> \n", 361 | "> \u001b[0;32m\u001b[0m(54)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n", 362 | "\u001b[0;32m 53 \u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miter\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0msgd_steps\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 363 | "\u001b[0m\u001b[0;32m---> 54 \u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msgd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 364 | "\u001b[0m\u001b[0;32m 55 \u001b[0;31m \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 365 | "\u001b[0m\n", 366 | "ipdb> \n", 367 | "> \u001b[0;32m\u001b[0m(55)\u001b[0;36mstep\u001b[0;34m()\u001b[0m\n", 368 | "\u001b[0;32m 54 \u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msgd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 369 | "\u001b[0m\u001b[0;32m---> 55 \u001b[0;31m \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 370 | "\u001b[0m\u001b[0;32m 56 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", 371 | "\u001b[0m\n" 372 | ] 373 | } 374 | ], 375 | "source": [ 376 | "import ipdb; ipdb.set_trace()\n", 377 | "for epoch in range(epochs):\n", 378 | "\n", 379 | " epoch +=1\n", 380 | " inputs = Variable(torch.from_numpy(x_train))\n", 381 | " labels = Variable(torch.from_numpy(y_correct))\n", 382 | "\n", 383 | " #clear grads\n", 384 | " optimiser.zero_grad()\n", 385 | " #forward to get predicted values\n", 386 | " outputs = model.forward(inputs)\n", 387 | " loss = criterion(outputs, labels)\n", 388 | " loss.backward()# back props\n", 389 | " optimiser.step()# update the parameters\n", 390 | " print('epoch {}, loss {}'.format(epoch,loss.data[0]))" 391 | ] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "metadata": {}, 396 | "source": [ 397 | "### Printing the Predictions" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": null, 403 | "metadata": { 404 | "collapsed": true 405 | }, 406 | "outputs": [], 407 | "source": [ 408 | "predicted = model.forward(Variable(torch.from_numpy(x_train))).data.numpy()\n", 409 | "\n", 410 | "plt.plot(x_train, y_correct, 'go', label = 'from data', alpha = .5)\n", 411 | "plt.plot(x_train, predicted, label = 'prediction', alpha = 0.5)\n", 412 | "plt.legend()\n", 413 | "plt.show()\n", 414 | "print(model.state_dict())" 415 | ] 416 | }, 417 | { 418 | "cell_type": "markdown", 419 | "metadata": {}, 420 | "source": [ 421 | "### Example to Use GPU" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": null, 427 | "metadata": { 428 | "collapsed": true 429 | }, 430 | "outputs": [], 431 | "source": [ 432 | "import torch\n", 433 | "import torch.nn as nn\n", 434 | "from torch.autograd import Variable\n", 435 | "import numpy as np\n", 436 | "\n", 437 | "x_values = [i for i in range(11)]\n", 438 | "x_train = np.array(x_values, dtype=np.float32)\n", 439 | "x_train = x_train.reshape(-1, 1)\n", 440 | "\n", 441 | "y_values = [2*i + 1 for i in x_values]\n", 442 | "y_train = np.array(y_values, dtype=np.float32)\n", 443 | "y_train = y_train.reshape(-1, 1)\n", 444 | "\n", 445 | "'''\n", 446 | "CREATE MODEL CLASS\n", 447 | "'''\n", 448 | "class LinearRegressionModel(nn.Module):\n", 449 | " def __init__(self, input_dim, output_dim):\n", 450 | " super(LinearRegressionModel, self).__init__()\n", 451 | " self.linear = nn.Linear(input_dim, output_dim) \n", 452 | " \n", 453 | " def forward(self, x):\n", 454 | " out = self.linear(x)\n", 455 | " return out\n", 456 | "\n", 457 | "'''\n", 458 | "INSTANTIATE MODEL CLASS\n", 459 | "'''\n", 460 | "input_dim = 1\n", 461 | "output_dim = 1\n", 462 | "\n", 463 | "model = LinearRegressionModel(input_dim, output_dim)\n", 464 | "\n", 465 | "\n", 466 | "\n", 467 | "model.cuda()\n", 468 | "\n", 469 | "'''\n", 470 | "INSTANTIATE LOSS CLASS\n", 471 | "'''\n", 472 | "\n", 473 | "criterion = nn.MSELoss()\n", 474 | "\n", 475 | "\n", 476 | "learning_rate = 0.01\n", 477 | "\n", 478 | "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n", 479 | "\n", 480 | "\n", 481 | "epochs = 100\n", 482 | "for epoch in range(epochs):\n", 483 | " epoch += 1\n", 484 | "\n", 485 | " \n", 486 | " if torch.cuda.is_available():\n", 487 | " inputs = Variable(torch.from_numpy(x_train).cuda())\n", 488 | "\n", 489 | " \n", 490 | " if torch.cuda.is_available():\n", 491 | " labels = Variable(torch.from_numpy(y_train).cuda())\n", 492 | " \n", 493 | "\n", 494 | " optimizer.zero_grad() \n", 495 | " \n", 496 | "\n", 497 | " outputs = model(inputs)\n", 498 | "\n", 499 | " loss = criterion(outputs, labels)\n", 500 | " \n", 501 | "\n", 502 | " loss.backward()\n", 503 | " \n", 504 | "\n", 505 | " optimizer.step()\n", 506 | " \n", 507 | "\n", 508 | " print('epoch {}, loss {}'.format(epoch, loss.data[0]))\n" 509 | ] 510 | }, 511 | { 512 | "cell_type": "markdown", 513 | "metadata": {}, 514 | "source": [ 515 | "Sources:\n", 516 | "http://github.com/pytorch/examples\n", 517 | "\n", 518 | "http://github.com/ritchieng/the-incredible-pytorch" 519 | ] 520 | } 521 | ], 522 | "metadata": { 523 | "kernelspec": { 524 | "display_name": "Python 3", 525 | "language": "python", 526 | "name": "python3" 527 | }, 528 | "language_info": { 529 | "codemirror_mode": { 530 | "name": "ipython", 531 | "version": 3 532 | }, 533 | "file_extension": ".py", 534 | "mimetype": "text/x-python", 535 | "name": "python", 536 | "nbconvert_exporter": "python", 537 | "pygments_lexer": "ipython3", 538 | "version": "3.6.4" 539 | } 540 | }, 541 | "nbformat": 4, 542 | "nbformat_minor": 2 543 | } 544 | -------------------------------------------------------------------------------- /models/linear_regression/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | # from torch.optim import Neumann 7 | import math 8 | import torch 9 | from torch.optim.optimizer import Optimizer 10 | from torch.optim.sgd import SGD 11 | 12 | from neumann import Neumann 13 | 14 | import pdb; pdb.set_trace() 15 | 16 | 17 | np.random.seed(42) 18 | pts = 50 19 | x_vals = np.random.rand(50) 20 | x_train = np.asarray(x_vals,dtype=np.float32).reshape(-1,1) 21 | b = 0 22 | alpha = np.random.rand(1) 23 | beta = np.random.rand(1) 24 | y_correct = np.asarray([2*i+b for i in x_vals], dtype=np.float32).reshape(-1,1) 25 | 26 | 27 | class LinearRegressionModel(nn.Module): 28 | 29 | def __init__(self, input_dim, output_dim): 30 | 31 | super(LinearRegressionModel, self).__init__() 32 | # Calling Super Class's constructor 33 | self.linear = nn.Linear(input_dim, output_dim) 34 | # nn.linear is defined in nn.Module 35 | 36 | def forward(self, x): 37 | # Here the forward pass is simply a linear function 38 | 39 | out = self.linear(x) 40 | return out 41 | 42 | input_dim = 1 43 | output_dim = 1 44 | 45 | 46 | model = LinearRegressionModel(input_dim,output_dim)# create our model just as we do in Scikit-Learn / C / C++// 47 | 48 | criterion = nn.MSELoss()# Mean Squared Loss 49 | l_rate = 0.01 50 | optimiser = Neumann(list(model.parameters()), lr = l_rate) #Stochastic Gradient Descent 51 | 52 | epochs = 1000 53 | 54 | 55 | for epoch in range(epochs): 56 | 57 | epoch +=1 58 | inputs = Variable(torch.from_numpy(x_train)) 59 | labels = Variable(torch.from_numpy(y_correct)) 60 | 61 | #clear grads 62 | optimiser.zero_grad() 63 | #forward to get predicted values 64 | outputs = model.forward(inputs) 65 | loss = criterion(outputs, labels) 66 | loss.backward()# back props 67 | optimiser.step()# update the parameters 68 | print('epoch {}, loss {}'.format(epoch,loss.data[0])) 69 | 70 | 71 | predicted = model.forward(Variable(torch.from_numpy(x_train))).data.numpy() 72 | 73 | plt.plot(x_train, y_correct, 'go', label = 'from data', alpha = .5) 74 | plt.plot(x_train, predicted, label = 'prediction', alpha = 0.5) 75 | plt.legend() 76 | plt.show() 77 | print(model.state_dict()) 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | # from __future__ import print_function 107 | # import argparse 108 | # import torch 109 | # import torch.nn as nn 110 | # import torch.nn.functional as F 111 | # # import torch.optim as optim 112 | # from torchvision import datasets, transforms 113 | # from torch.autograd import Variable 114 | 115 | # import pdb; pdb.set_trace() 116 | # from torch.optim import Neumann 117 | 118 | # # Training settings 119 | # parser = argparse.ArgumentParser(description='PyTorch MNIST Example') 120 | # parser.add_argument('--batch-size', type=int, default=64, metavar='N', 121 | # help='input batch size for training (default: 64)') 122 | # parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', 123 | # help='input batch size for testing (default: 1000)') 124 | # parser.add_argument('--epochs', type=int, default=10, metavar='N', 125 | # help='number of epochs to train (default: 10)') 126 | # parser.add_argument('--lr', type=float, default=0.01, metavar='LR', 127 | # help='learning rate (default: 0.01)') 128 | # parser.add_argument('--momentum', type=float, default=0.5, metavar='M', 129 | # help='SGD momentum (default: 0.5)') 130 | # parser.add_argument('--no-cuda', action='store_true', default=False, 131 | # help='disables CUDA training') 132 | # parser.add_argument('--seed', type=int, default=1, metavar='S', 133 | # help='random seed (default: 1)') 134 | # parser.add_argument('--log-interval', type=int, default=10, metavar='N', 135 | # help='how many batches to wait before logging training status') 136 | # args = parser.parse_args() 137 | # args.cuda = not args.no_cuda and torch.cuda.is_available() 138 | 139 | 140 | 141 | # torch.manual_seed(args.seed) 142 | # if args.cuda: 143 | # torch.cuda.manual_seed(args.seed) 144 | 145 | 146 | # kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} 147 | # train_loader = torch.utils.data.DataLoader( 148 | # datasets.MNIST('../data', train=True, download=True, 149 | # transform=transforms.Compose([ 150 | # transforms.ToTensor(), 151 | # transforms.Normalize((0.1307,), (0.3081,)) 152 | # ])), 153 | # batch_size=args.batch_size, shuffle=True, **kwargs) 154 | # test_loader = torch.utils.data.DataLoader( 155 | # datasets.MNIST('../data', train=False, transform=transforms.Compose([ 156 | # transforms.ToTensor(), 157 | # transforms.Normalize((0.1307,), (0.3081,)) 158 | # ])), 159 | # batch_size=args.test_batch_size, shuffle=True, **kwargs) 160 | 161 | # # 162 | # class Net(nn.Module): 163 | # def __init__(self): 164 | # super(Net, self).__init__() 165 | # self.conv1 = nn.Conv2d(1, 10, kernel_size=5) 166 | # self.conv2 = nn.Conv2d(10, 20, kernel_size=5) 167 | # self.conv2_drop = nn.Dropout2d() 168 | # self.fc1 = nn.Linear(320, 50) 169 | # self.fc2 = nn.Linear(50, 10) 170 | 171 | # def forward(self, x): 172 | # x = F.relu(F.max_pool2d(self.conv1(x), 2)) 173 | # x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) 174 | # x = x.view(-1, 320) 175 | # x = F.relu(self.fc1(x)) 176 | # x = F.dropout(x, training=self.training) 177 | # x = self.fc2(x) 178 | # return F.log_softmax(x, dim=1) 179 | 180 | # model = Net() 181 | # if args.cuda: 182 | # model.cuda() 183 | 184 | # optimizer = Neumann(model.parameters(), lr=args.lr) 185 | 186 | # def train(epoch): 187 | # model.train() 188 | # for batch_idx, (data, target) in enumerate(train_loader): 189 | # if args.cuda: 190 | # data, target = data.cuda(), target.cuda() 191 | # data, target = Variable(data), Variable(target) 192 | # optimizer.zero_grad() 193 | # output = model(data) 194 | # loss = F.nll_loss(output, target) 195 | # loss.backward() 196 | # optimizer.step() 197 | # if batch_idx % args.log_interval == 0: 198 | # print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 199 | # epoch, batch_idx * len(data), len(train_loader.dataset), 200 | # 100. * batch_idx / len(train_loader), loss.data[0])) 201 | 202 | # def test(): 203 | # model.eval() 204 | # test_loss = 0 205 | # correct = 0 206 | # for data, target in test_loader: 207 | # if args.cuda: 208 | # data, target = data.cuda(), target.cuda() 209 | # data, target = Variable(data, volatile=True), Variable(target) 210 | # output = model(data) 211 | # test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss 212 | # pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability 213 | # correct += pred.eq(target.data.view_as(pred)).long().cpu().sum() 214 | 215 | # test_loss /= len(test_loader.dataset) 216 | # print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( 217 | # test_loss, correct, len(test_loader.dataset), 218 | # 100. * correct / len(test_loader.dataset))) 219 | 220 | 221 | # for epoch in range(1, args.epochs + 1): 222 | # train(epoch) 223 | # test() 224 | -------------------------------------------------------------------------------- /models/mlp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from optimizer import Neumann,SGD 4 | import numpy as np 5 | 6 | from batchup import data_source 7 | import csv 8 | 9 | csvfile = open('../dataset/HIGGS_subset.csv','r') 10 | csvreader = csv.reader(csvfile) 11 | train_X = [] 12 | train_Y = [] 13 | 14 | test_X = [] 15 | test_Y = [] 16 | 17 | train_error = [] 18 | 19 | size = 0 20 | for row in csvreader: 21 | if size >= 90000: 22 | test_X.append(row[1:]) 23 | test_Y.append([float(row[0])]) 24 | else: 25 | train_X.append(row[1:]) 26 | train_Y.append([float(row[0])]) 27 | size+=1 28 | 29 | train_X = np.array(train_X,dtype="float64") 30 | train_Y = np.array(train_Y,dtype="int32") 31 | test_X = np.array(test_X, dtype="float64") 32 | test_Y = np.array(test_Y, dtype="int32") 33 | ds = data_source.ArrayDataSource([train_X, train_Y]) 34 | 35 | 36 | class MultilayerPerceptron(nn.Module): 37 | def __init__(self, input_size, hidden_size, out_classes): 38 | super(MultilayerPerceptron, self).__init__() 39 | self.fc1 = nn.Linear(input_size, hidden_size) 40 | self.fc2 = nn.Linear(hidden_size, num_classes) 41 | self.tanh = nn.Tanh() 42 | self.sigmoid = nn.Sigmoid() 43 | 44 | def forward(self, x): 45 | out = self.fc1(x) 46 | out = self.tanh(out) 47 | out = self.fc2(out) 48 | out = self.sigmoid(out) 49 | return out 50 | 51 | 52 | input_size = 28 53 | hidden_size = 56 54 | num_classes = 1 55 | learning_rate = 1e-4 56 | num_epochs = 20 57 | minibatch_size = 16 58 | current_iter = 1 59 | device = torch.device('cpu') 60 | 61 | net = MultilayerPerceptron(input_size, hidden_size, num_classes) 62 | 63 | loss_fn = nn.MSELoss() 64 | optimizer = Neumann(list(net.parameters()), lr=learning_rate) 65 | # optimizer = SGD(net.parameters(), lr=learning_rate) 66 | 67 | for epoch in range(num_epochs): 68 | for batch_X, batch_Y in ds.batch_iterator(batch_size=minibatch_size, shuffle=True): 69 | input = torch.tensor(batch_X, requires_grad=True, device=device, dtype=torch.float32) 70 | label = torch.tensor(batch_Y, device=device, dtype=torch.float32) 71 | optimizer.zero_grad() 72 | outputs = net(input) 73 | loss = loss_fn(outputs, label) 74 | loss.backward() 75 | optimizer.step() 76 | print("Loss: ", loss) 77 | 78 | test_inputs = torch.tensor(test_X, device=device, dtype=torch.float32) 79 | test_labels = torch.tensor(test_Y, device=device, dtype=torch.float32) 80 | 81 | outputs = net(test_inputs) 82 | 83 | error = loss_fn(outputs, test_labels) 84 | print(outputs) 85 | print("Error: ", error) 86 | -------------------------------------------------------------------------------- /models/modules/Net.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Variable 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | # Neural net 7 | class Net(nn.Module): 8 | """ 9 | Neural net 10 | """ 11 | 12 | # Constructor 13 | def __init__(self): 14 | """ 15 | Constructor 16 | """ 17 | super(Net, self).__init__() 18 | self.conv_layer1 = nn.Conv2d(1, 6, 5) 19 | self.pool = nn.MaxPool2d(2, 2) 20 | self.conv_layer2 = nn.Conv2d(6, 16, 5) 21 | self.linear_layer1 = nn.Linear(16 * 4 * 4, 120) 22 | self.linear_layer2 = nn.Linear(120, 10) 23 | self.tanh = nn.Tanh() 24 | self.sigmoid = nn.Sigmoid() 25 | # end __init__ 26 | 27 | # Forward pass 28 | def forward(self, x): 29 | """ 30 | Forward pass 31 | :param x: 32 | :return: 33 | """ 34 | # print(u"Input : {}".format(x.size())) 35 | x = self.conv_layer1(x) 36 | # print(u"Conv1 : {}".format(x.size())) 37 | x = F.relu(x) 38 | # print(u"Relu : {}".format(x.size())) 39 | x = self.pool(x) 40 | # print(u"Max pool : {}".format(x.size())) 41 | x = self.conv_layer2(x) 42 | # print(u"Conv2 : {}".format(x.size())) 43 | x = F.relu(x) 44 | x = self.pool(x) 45 | # print(u"Input : {}".format(x.size())) 46 | x = x.view(-1, 16 * 4 * 4) 47 | x = self.linear_layer1(x) 48 | x = self.tanh(x) 49 | x = self.linear_layer2(x) 50 | x = self.sigmoid(x) 51 | # x = F.relu(self.linear_layer1(x)) 52 | # x = F.relu(self.linear_layer2(x)) 53 | return x 54 | # end forward 55 | 56 | # end Net 57 | -------------------------------------------------------------------------------- /models/modules/__init__.py: -------------------------------------------------------------------------------- 1 | #Nothing much here except import 2 | from .Net import Net 3 | 4 | -------------------------------------------------------------------------------- /models/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | from optimizer.neumann import Neumann 2 | from optimizer.stochastic import SGD 3 | -------------------------------------------------------------------------------- /models/optimizer/neumann.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch.optim.optimizer import Optimizer 4 | from torch.optim.sgd import SGD 5 | import numpy as np 6 | 7 | class Neumann(Optimizer): 8 | """ 9 | Documentation about the algorithm 10 | """ 11 | 12 | def __init__(self, params , lr=1e-3, eps = 1e-8, alpha = 1e-7, beta = 1e-5, gamma = 0.9, momentum = 1, sgd_steps = 5, K = 10 ): 13 | 14 | if not 0.0 <= lr: 15 | raise ValueError("Invalid learning rate: {}".format(lr)) 16 | if not 0.0 <= eps: 17 | raise ValueError("Invalid epsilon value: {}".format(eps)) 18 | if not 1 >= momentum: 19 | raise ValueError("Invalid momentum value: {}".format(eps)) 20 | 21 | 22 | self.iter = 0 23 | # self.sgd = SGD(params, lr=lr, momentum=0.9) 24 | 25 | param_count = np.sum([np.prod(p.size()) for p in params]) # got from MNIST-GAN 26 | 27 | defaults = dict(lr=lr, eps=eps, alpha=alpha, 28 | beta=beta*param_count, gamma=gamma, 29 | sgd_steps=sgd_steps, momentum=momentum, K=K 30 | ) 31 | 32 | super(Neumann, self).__init__(params, defaults) 33 | 34 | 35 | def step(self, closure=None): 36 | """ 37 | Performs a single optimization step. 38 | 39 | Arguments: 40 | closure (callable, optional): A closure that reevaluates the model 41 | and returns the loss. 42 | """ 43 | self.iter += 1 44 | 45 | 46 | loss = None 47 | if closure is not None: #checkout what's the deal with this. present in multiple pytorch optimizers 48 | loss = closure() 49 | 50 | for group in self.param_groups: 51 | 52 | sgd_steps = group['sgd_steps'] 53 | 54 | alpha = group['alpha'] 55 | beta = group['beta'] 56 | gamma = group['gamma'] 57 | K = group['K'] 58 | momentum = group['momentum'] 59 | mu = momentum*(1 - (1/(1+self.iter))) 60 | 61 | if mu >= 0.9: 62 | mu = 0.9 63 | elif mu <= 0.5: 64 | mu = 0.5 65 | 66 | 67 | eta = group['lr']/self.iter ## update with time ## changed 68 | # print("here") 69 | 70 | 71 | for p in group['params']: 72 | if p.grad is None: 73 | continue 74 | grad = p.grad.data 75 | 76 | state = self.state[p] 77 | 78 | if len(state) == 0: 79 | state['step'] = 0 80 | state['m'] = torch.zeros_like(p.data).float() 81 | state['d'] = torch.zeros_like(p.data).float() 82 | # state['moving_avg'] = p.data 83 | 84 | 85 | if self.iter <= sgd_steps: 86 | 87 | p.data.add_(-group['lr'], grad) 88 | # self.sgd.step() 89 | continue 90 | 91 | state['step'] += 1 92 | 93 | 94 | # Reset neumann iterate 95 | if self.iter%K == 0: 96 | state['m'] = grad.mul(-eta) 97 | ## changed 98 | 99 | else: 100 | ## Compute update d_t 101 | # diff = p.data.sub(state['moving_avg']) 102 | # # print(diff) 103 | # diff_norm = p.data.sub(state['moving_avg']).norm() 104 | # # if np.count_nonzero(diff) and diff_norm > 0: 105 | # state['d'] = grad.add( (( (diff_norm.pow(2)).mul(alpha) ).sub( (diff_norm.pow(-2)).mul(beta) )).mul( diff.div(diff_norm)) ) 106 | # # else: 107 | # state['d'].add_(grad) 108 | state['d'] = grad 109 | 110 | ## Update Neumann iterate 111 | (state['m'].mul_(mu)).sub_( state['d'].mul(eta) ) 112 | 113 | ## Update Weights 114 | p.data.add_((state['m'].mul(mu)).sub( state['d'].mul(eta))) 115 | 116 | ## Update Moving Average 117 | # state['moving_avg'] = p.data.add( (state['moving_avg'].sub(p.data)).mul(gamma) ) 118 | 119 | # print(p.data) 120 | 121 | ## changed 122 | if self.iter%K == 0: 123 | group['K'] = group['K']*2 124 | 125 | # return loss 126 | -------------------------------------------------------------------------------- /models/optimizer/stochastic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.optim.optimizer import Optimizer, required 3 | 4 | 5 | class SGD(Optimizer): 6 | r"""Implements stochastic gradient descent (optionally with momentum). 7 | Nesterov momentum is based on the formula from 8 | `On the importance of initialization and momentum in deep learning`__. 9 | Args: 10 | params (iterable): iterable of parameters to optimize or dicts defining 11 | parameter groups 12 | lr (float): learning rate 13 | momentum (float, optional): momentum factor (default: 0) 14 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 15 | dampening (float, optional): dampening for momentum (default: 0) 16 | nesterov (bool, optional): enables Nesterov momentum (default: False) 17 | Example: 18 | >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) 19 | >>> optimizer.zero_grad() 20 | >>> loss_fn(model(input), target).backward() 21 | >>> optimizer.step() 22 | __ http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf 23 | .. note:: 24 | The implementation of SGD with Momentum/Nesterov subtly differs from 25 | Sutskever et. al. and implementations in some other frameworks. 26 | Considering the specific case of Momentum, the update can be written as 27 | .. math:: 28 | v = \rho * v + g \\ 29 | p = p - lr * v 30 | where p, g, v and :math:`\rho` denote the parameters, gradient, 31 | velocity, and momentum respectively. 32 | This is in contrast to Sutskever et. al. and 33 | other frameworks which employ an update of the form 34 | .. math:: 35 | v = \rho * v + lr * g \\ 36 | p = p - v 37 | The Nesterov version is analogously modified. 38 | """ 39 | 40 | def __init__(self, params, lr=required, momentum=0, dampening=0, 41 | weight_decay=0, nesterov=False): 42 | if lr is not required and lr < 0.0: 43 | raise ValueError("Invalid learning rate: {}".format(lr)) 44 | if momentum < 0.0: 45 | raise ValueError("Invalid momentum value: {}".format(momentum)) 46 | if weight_decay < 0.0: 47 | raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) 48 | 49 | defaults = dict(lr=lr, momentum=momentum, dampening=dampening, 50 | weight_decay=weight_decay, nesterov=nesterov) 51 | if nesterov and (momentum <= 0 or dampening != 0): 52 | raise ValueError("Nesterov momentum requires a momentum and zero dampening") 53 | super(SGD, self).__init__(params, defaults) 54 | 55 | def __setstate__(self, state): 56 | super(SGD, self).__setstate__(state) 57 | for group in self.param_groups: 58 | group.setdefault('nesterov', False) 59 | 60 | def step(self, closure=None): 61 | """Performs a single optimization step. 62 | Arguments: 63 | closure (callable, optional): A closure that reevaluates the model 64 | and returns the loss. 65 | """ 66 | loss = None 67 | if closure is not None: 68 | loss = closure() 69 | 70 | for group in self.param_groups: 71 | weight_decay = group['weight_decay'] 72 | momentum = group['momentum'] 73 | dampening = group['dampening'] 74 | nesterov = group['nesterov'] 75 | 76 | for p in group['params']: 77 | if p.grad is None: 78 | continue 79 | d_p = p.grad.data 80 | if weight_decay != 0: 81 | d_p.add_(weight_decay, p.data) 82 | if momentum != 0: 83 | param_state = self.state[p] 84 | if 'momentum_buffer' not in param_state: 85 | buf = param_state['momentum_buffer'] = torch.zeros_like(p.data) 86 | buf.mul_(momentum).add_(d_p) 87 | else: 88 | buf = param_state['momentum_buffer'] 89 | buf.mul_(momentum).add_(1 - dampening, d_p) 90 | if nesterov: 91 | d_p = d_p.add(momentum, buf) 92 | else: 93 | d_p = buf 94 | 95 | p.data.add_(-group['lr'], d_p) 96 | 97 | return loss 98 | -------------------------------------------------------------------------------- /slr10.csv: -------------------------------------------------------------------------------- 1 | 3.5,5.1 2 | 3,4.9 3 | 3.2,4.7 4 | 3.1,4.6 5 | 3.6,5 6 | 3.9,5.4 7 | 3.4,4.6 8 | 3.4,5 9 | 2.9,4.4 10 | 3.1,4.9 11 | 3.7,5.4 12 | 3.4,4.8 13 | 3,4.3 14 | 4,5.8 15 | 4.4,5.7 16 | 3.9,5.4 17 | 3.5,5.1 18 | 3.8,5.7 19 | 3.8,5.1 20 | 3.4,5.4 21 | 3.7,5.1 22 | 3.6,4.6 23 | 3.3,5.1 24 | 3.4,4.8 25 | 3,5 26 | 3.4,5 27 | 3.5,5.2 28 | 3.4,5.2 29 | 3.2,4.7 30 | 3.1,4.8 31 | 3.4,5.4 32 | 4.1,5.2 33 | 4.2,5.5 34 | 3.1,4.9 35 | 3.2,5 36 | 3.5,5.5 37 | 3.6,4.9 38 | 3,4.4 39 | 3.4,5.1 40 | 3.5,5 41 | 2.3,4.5 42 | 3.2,4.4 43 | 3.5,5 44 | 3.8,5.1 45 | 3,4.8 46 | 3.8,4.6 47 | 3.7,5.3 48 | 3.3,5 49 | -------------------------------------------------------------------------------- /test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 14, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import torch\n", 10 | "import torch.nn as nn\n", 11 | "import numpy as np\n", 12 | "import torch.nn.functional as F\n", 13 | "from torch.optim import Neumann\n", 14 | "from torch.utils.data import DataLoader,sampler,Dataset\n", 15 | "from torch.autograd import Variable" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "batch_size = 48\n", 25 | "test_batch_size = 1000\n", 26 | "seed = 123\n", 27 | "momentum = 0.5\n", 28 | "log_interval = 10\n", 29 | "\n", 30 | "torch.manual_seed(seed)\n", 31 | "if torch.cuda.is_available():\n", 32 | " torch.cuda.manual_seed(seed)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "class SimpleDataset(Dataset):\n", 42 | " \"\"\"Action dataset.\"\"\"\n", 43 | "\n", 44 | " def __init__(self, data_file):\n", 45 | " \"\"\"\n", 46 | " Args:\n", 47 | " root_dir (string): Directory with all the data is stored.\n", 48 | " labels(list): GT\n", 49 | " \"\"\"\n", 50 | " self.data_file = data_file\n", 51 | " self.data = np.loadtxt(self.data_file, delimiter=',')#, dtype={'names': ('X', 'Y'), 'formats': ('S1', 'S1')})\n", 52 | " self.length = self.data.shape[0]\n", 53 | " self.labels = self.data[:, -1]\n", 54 | " self.data = self.data[:,0]\n", 55 | "\n", 56 | " def __len__(self):\n", 57 | " return self.length\n", 58 | "\n", 59 | " def __getitem__(self, idx):\n", 60 | " sample={'X':self.data[idx], 'Y' : self.labels[idx]}\n", 61 | "# print(sample)\n", 62 | " return sample\n", 63 | " " 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 4, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "simple_dataset = SimpleDataset(\"./slr10.csv\")\n", 73 | "\n", 74 | "train_loader = torch.utils.data.DataLoader(\n", 75 | " simple_dataset,\n", 76 | " batch_size=batch_size, shuffle=True)\n", 77 | "\n", 78 | "# test_loader = torch.utils.data.DataLoader(\n", 79 | "# batch_size=test_batch_size, shuffle=True, **kwargs)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 7, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "class LinearRegressionModel(nn.Module):\n", 89 | "\n", 90 | " def __init__(self, input_dim, output_dim):\n", 91 | "\n", 92 | " super(LinearRegressionModel, self).__init__() \n", 93 | " # Calling Super Class's constructor\n", 94 | " self.linear = nn.Linear(input_dim, output_dim)\n", 95 | " # nn.linear is defined in nn.Module\n", 96 | "\n", 97 | " def forward(self, x):\n", 98 | " # Here the forward pass is simply a linear function\n", 99 | "\n", 100 | " out = self.linear(x)\n", 101 | " return out\n", 102 | "\n", 103 | "input_dim = 1\n", 104 | "output_dim = 1\n", 105 | "\n", 106 | "model = LinearRegressionModel(input_dim,output_dim)\n", 107 | "\n", 108 | "criterion = nn.MSELoss()# Mean Squared Loss\n", 109 | "l_rate = 0.001\n", 110 | "\n", 111 | "optimiser = Neumann(list(model.parameters()), lr = l_rate) #Stochastic Gradient Descent\n", 112 | " " 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 10, 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "name": "stdout", 122 | "output_type": "stream", 123 | "text": [ 124 | "epoch 0, loss 17.191287994384766\n", 125 | "epoch 1, loss 16.576515197753906\n", 126 | "epoch 2, loss 15.96240234375\n", 127 | "epoch 3, loss 15.351107597351074\n", 128 | "epoch 4, loss 14.744500160217285\n", 129 | "epoch 5, loss 14.144200325012207\n", 130 | "epoch 6, loss 13.551612854003906\n", 131 | "epoch 7, loss 12.967961311340332\n", 132 | "epoch 8, loss 12.394291877746582\n", 133 | "epoch 9, loss 11.831509590148926\n", 134 | "epoch 10, loss 11.280391693115234\n", 135 | "epoch 11, loss 10.741596221923828\n", 136 | "epoch 12, loss 10.215685844421387\n", 137 | "epoch 13, loss 9.703120231628418\n", 138 | "epoch 14, loss 9.204290390014648\n", 139 | "epoch 15, loss 8.719508171081543\n", 140 | "epoch 16, loss 8.249022483825684\n", 141 | "epoch 17, loss 7.793018341064453\n", 142 | "epoch 18, loss 7.351638317108154\n", 143 | "epoch 19, loss 6.924966335296631\n", 144 | "epoch 20, loss 6.899356842041016\n", 145 | "epoch 21, loss 6.866124629974365\n", 146 | "epoch 22, loss 6.825706481933594\n", 147 | "epoch 23, loss 6.778513431549072\n", 148 | "epoch 24, loss 6.724961757659912\n", 149 | "epoch 25, loss 6.665435314178467\n", 150 | "epoch 26, loss 6.600317001342773\n", 151 | "epoch 27, loss 6.5299763679504395\n", 152 | "epoch 28, loss 6.454763889312744\n", 153 | "epoch 29, loss 6.375024318695068\n", 154 | "epoch 30, loss 6.291087627410889\n", 155 | "epoch 31, loss 6.203271389007568\n", 156 | "epoch 32, loss 6.11188268661499\n", 157 | "epoch 33, loss 6.017217636108398\n", 158 | "epoch 34, loss 5.919558048248291\n", 159 | "epoch 35, loss 5.819180965423584\n", 160 | "epoch 36, loss 5.716345310211182\n", 161 | "epoch 37, loss 5.611307621002197\n", 162 | "epoch 38, loss 5.504310607910156\n", 163 | "epoch 39, loss 5.395582675933838\n", 164 | "epoch 40, loss 5.285351753234863\n", 165 | "epoch 41, loss 5.173831462860107\n", 166 | "epoch 42, loss 5.061225414276123\n", 167 | "epoch 43, loss 4.947733402252197\n", 168 | "epoch 44, loss 4.833540439605713\n", 169 | "epoch 45, loss 4.7188286781311035\n", 170 | "epoch 46, loss 4.603769302368164\n", 171 | "epoch 47, loss 4.488527774810791\n", 172 | "epoch 48, loss 4.373256683349609\n", 173 | "epoch 49, loss 4.25811243057251\n", 174 | "epoch 50, loss 4.143230438232422\n", 175 | "epoch 51, loss 4.028749942779541\n", 176 | "epoch 52, loss 3.914796829223633\n", 177 | "epoch 53, loss 3.801495313644409\n", 178 | "epoch 54, loss 3.6889610290527344\n", 179 | "epoch 55, loss 3.577302932739258\n", 180 | "epoch 56, loss 3.4666271209716797\n", 181 | "epoch 57, loss 3.357028007507324\n", 182 | "epoch 58, loss 3.2486019134521484\n", 183 | "epoch 59, loss 3.14143443107605\n", 184 | "epoch 60, loss 3.0356082916259766\n", 185 | "epoch 61, loss 2.9312002658843994\n", 186 | "epoch 62, loss 2.828282356262207\n", 187 | "epoch 63, loss 2.7269256114959717\n", 188 | "epoch 64, loss 2.6271891593933105\n", 189 | "epoch 65, loss 2.5291340351104736\n", 190 | "epoch 66, loss 2.4328126907348633\n", 191 | "epoch 67, loss 2.3382790088653564\n", 192 | "epoch 68, loss 2.245577096939087\n", 193 | "epoch 69, loss 2.154750347137451\n", 194 | "epoch 70, loss 2.0658395290374756\n", 195 | "epoch 71, loss 1.9788776636123657\n", 196 | "epoch 72, loss 1.8938989639282227\n", 197 | "epoch 73, loss 1.8109320402145386\n", 198 | "epoch 74, loss 1.730002760887146\n", 199 | "epoch 75, loss 1.6511331796646118\n", 200 | "epoch 76, loss 1.5743441581726074\n", 201 | "epoch 77, loss 1.4996517896652222\n", 202 | "epoch 78, loss 1.4270719289779663\n", 203 | "epoch 79, loss 1.356615662574768\n", 204 | "epoch 80, loss 1.2882922887802124\n", 205 | "epoch 81, loss 1.2221078872680664\n", 206 | "epoch 82, loss 1.1580679416656494\n", 207 | "epoch 83, loss 1.0961750745773315\n", 208 | "epoch 84, loss 1.0364285707473755\n", 209 | "epoch 85, loss 0.9788269400596619\n", 210 | "epoch 86, loss 0.9233669638633728\n", 211 | "epoch 87, loss 0.8700427412986755\n", 212 | "epoch 88, loss 0.8188469409942627\n", 213 | "epoch 89, loss 0.7697703242301941\n", 214 | "epoch 90, loss 0.7228019833564758\n", 215 | "epoch 91, loss 0.6779298782348633\n", 216 | "epoch 92, loss 0.6351407170295715\n", 217 | "epoch 93, loss 0.5944194197654724\n", 218 | "epoch 94, loss 0.5557489395141602\n", 219 | "epoch 95, loss 0.5191113948822021\n", 220 | "epoch 96, loss 0.48448848724365234\n", 221 | "epoch 97, loss 0.45185983180999756\n", 222 | "epoch 98, loss 0.4212043285369873\n", 223 | "epoch 99, loss 0.39249923825263977\n" 224 | ] 225 | } 226 | ], 227 | "source": [ 228 | "epochs = 100\n", 229 | "\n", 230 | "for epoch in range(epochs):\n", 231 | " for i, sample in enumerate(train_loader):\n", 232 | "\n", 233 | " #increase the number of epochs by 1 every time\n", 234 | " inputs = Variable(sample['X'].type(torch.FloatTensor))\n", 235 | " labels = Variable(sample['Y'].type(torch.FloatTensor))\n", 236 | "\n", 237 | " #clear grads as discussed in prev post\n", 238 | " optimiser.zero_grad()\n", 239 | " #forward to get predicted values\n", 240 | " outputs = model.forward(inputs.view(-1, 1))\n", 241 | "# loss = criterion(outputs, labels)\n", 242 | "# print(outputs)\n", 243 | " loss = criterion(outputs.squeeze(), labels)\n", 244 | " loss.backward()# back props\n", 245 | " optimiser.step()# update the parameters\n", 246 | " print('epoch {}, loss {}'.format(epoch,loss.data[0]))" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 17, 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "name": "stdout", 256 | "output_type": "stream", 257 | "text": [ 258 | "{'X': tensor([ 3.4000, 3.7000, 3.4000, 3.2000], dtype=torch.float64), 'Y': tensor([ 4.6000, 5.1000, 5.2000, 4.7000], dtype=torch.float64)}\n", 259 | "{'X': tensor([ 4.4000, 3.5000, 3.2000, 3.8000], dtype=torch.float64), 'Y': tensor([ 5.7000, 5.5000, 4.7000, 5.1000], dtype=torch.float64)}\n", 260 | "{'X': tensor([ 3.0000, 3.1000, 3.8000, 3.4000], dtype=torch.float64), 'Y': tensor([ 4.8000, 4.6000, 4.6000, 5.0000], dtype=torch.float64)}\n", 261 | "{'X': tensor([ 3.8000, 3.4000, 4.0000, 3.1000], dtype=torch.float64), 'Y': tensor([ 5.7000, 5.0000, 5.8000, 4.9000], dtype=torch.float64)}\n", 262 | "{'X': tensor([ 3.5000, 3.5000, 3.5000, 3.2000], dtype=torch.float64), 'Y': tensor([ 5.0000, 5.1000, 5.2000, 5.0000], dtype=torch.float64)}\n", 263 | "{'X': tensor([ 3.5000, 3.0000, 3.9000, 3.4000], dtype=torch.float64), 'Y': tensor([ 5.1000, 4.9000, 5.4000, 5.4000], dtype=torch.float64)}\n", 264 | "{'X': tensor([ 3.8000, 3.9000, 3.4000, 3.3000], dtype=torch.float64), 'Y': tensor([ 5.1000, 5.4000, 5.1000, 5.1000], dtype=torch.float64)}\n", 265 | "{'X': tensor([ 4.2000, 3.3000, 3.0000, 3.7000], dtype=torch.float64), 'Y': tensor([ 5.5000, 5.0000, 5.0000, 5.4000], dtype=torch.float64)}\n", 266 | "{'X': tensor([ 3.6000, 4.1000, 3.4000, 3.0000], dtype=torch.float64), 'Y': tensor([ 4.6000, 5.2000, 4.8000, 4.3000], dtype=torch.float64)}\n", 267 | "{'X': tensor([ 3.5000, 3.4000, 3.1000, 3.0000], dtype=torch.float64), 'Y': tensor([ 5.0000, 4.8000, 4.9000, 4.4000], dtype=torch.float64)}\n", 268 | "{'X': tensor([ 3.7000, 2.3000, 3.1000, 3.2000], dtype=torch.float64), 'Y': tensor([ 5.3000, 4.5000, 4.8000, 4.4000], dtype=torch.float64)}\n", 269 | "{'X': tensor([ 3.6000, 2.9000, 3.6000, 3.4000], dtype=torch.float64), 'Y': tensor([ 4.9000, 4.4000, 5.0000, 5.4000], dtype=torch.float64)}\n" 270 | ] 271 | } 272 | ], 273 | "source": [ 274 | " for i, sample in enumerate(train_loader):\n", 275 | "\n", 276 | " #increase the number of epochs by 1 every time\n", 277 | " print(sample)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 12, 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "ename": "NameError", 287 | "evalue": "name 'X_train' is not defined", 288 | "output_type": "error", 289 | "traceback": [ 290 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 291 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 292 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpredicted\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mVariable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_correct\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'go'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'from data'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpredicted\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'prediction'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlegend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 293 | "\u001b[0;31mNameError\u001b[0m: name 'X_train' is not defined" 294 | ] 295 | } 296 | ], 297 | "source": [ 298 | "predicted =model.forward(Variable(torch.from_numpy(X_train))).data.numpy()\n", 299 | "\n", 300 | "plt.plot(x_train, y_correct, 'go', label = 'from data', alpha = .5)\n", 301 | "plt.plot(x_train, predicted, label = 'prediction', alpha = 0.5)\n", 302 | "plt.legend()\n", 303 | "plt.show()\n", 304 | "print(model.state_dict())" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [] 313 | } 314 | ], 315 | "metadata": { 316 | "kernelspec": { 317 | "display_name": "Python 3", 318 | "language": "python", 319 | "name": "python3" 320 | }, 321 | "language_info": { 322 | "codemirror_mode": { 323 | "name": "ipython", 324 | "version": 3 325 | }, 326 | "file_extension": ".py", 327 | "mimetype": "text/x-python", 328 | "name": "python", 329 | "nbconvert_exporter": "python", 330 | "pygments_lexer": "ipython3", 331 | "version": "3.6.4" 332 | } 333 | }, 334 | "nbformat": 4, 335 | "nbformat_minor": 2 336 | } 337 | --------------------------------------------------------------------------------