├── README.md ├── data ├── data.dat ├── mnist │ ├── mnist_test.npz │ └── mnist_train.npz └── shakespeare.txt ├── dknet ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-35.pyc │ ├── __init__.cpython-36.pyc │ ├── loss.cpython-35.pyc │ ├── loss.cpython-36.pyc │ ├── models.cpython-35.pyc │ ├── models.cpython-36.pyc │ ├── optimizers.cpython-35.pyc │ ├── optimizers.cpython-36.pyc │ ├── utils.cpython-35.pyc │ └── utils.cpython-36.pyc ├── layers │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── __init__.cpython-36.pyc │ │ ├── activation.cpython-35.pyc │ │ ├── activation.cpython-36.pyc │ │ ├── convolutional.cpython-35.pyc │ │ ├── convolutional.cpython-36.pyc │ │ ├── dense.cpython-35.pyc │ │ ├── dense.cpython-36.pyc │ │ ├── dropout.cpython-35.pyc │ │ ├── dropout.cpython-36.pyc │ │ ├── layer.cpython-35.pyc │ │ ├── layer.cpython-36.pyc │ │ ├── pooling.cpython-35.pyc │ │ ├── pooling.cpython-36.pyc │ │ ├── reshape.cpython-35.pyc │ │ └── reshape.cpython-36.pyc │ ├── activation.py │ ├── convolutional.py │ ├── dense.py │ ├── dropout.py │ ├── layer.py │ ├── pooling.py │ └── reshape.py ├── loss.py ├── models.py ├── optimizers.py └── utils.py ├── ex1_1.png ├── ex1_2.png ├── ex2_1.png ├── example.py ├── example_mnist.py └── loo-loss ├── __pycache__ └── utils.cpython-36.pyc ├── kfold-loss.py ├── loo-loss.py └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # Deep-Kernel-GP 2 | 3 | ## Dependencies 4 | The package has numpy and scipy.linalg as dependencies. 5 | The examples also use matplotlib and scikit-learn 6 | 7 | ## Introduction 8 | 9 | 10 | 11 | Instead of learning a mapping X-->Y with a neural network or GP regression, we learn the following mappings: 12 | X-->Z-->Y where the first step is performed by a neural net and the second by a gp regression algorithm. 13 | 14 | This way we are able to use GP Regression to learn functions on data where the the assumption that y(x) is a gaussian surface with covariance specified by one of the standard covariance fucntions, might not be a fair assumption. 15 | For instance we can learn functions with image pixels as inputs or functions with length scales that varies with the input. 16 | 17 | 18 | The parameters of the neural net are trained maximizing the log marginal likelihood implied by z(x_train) and y_train. 19 | 20 | [Deep Kernel Learning - A.G. Wilson ++ ](https://arxiv.org/pdf/1511.02222.pdf) 21 | 22 | [Using Deep Belief Nets to Learn Covariance Kernels 23 | for Gaussian Processes - G. Hinton ++](http://www.cs.toronto.edu/~fritz/absps/dbngp.pdf) 24 | 25 | ## Examples 26 | Basic usage is done with a Scikit ish API: 27 | 28 | ```python 29 | 30 | layers=[] 31 | layers.append(Dense(32,activation='tanh')) 32 | layers.append(Dense(1)) 33 | layers.append(CovMat(kernel='rbf')) 34 | 35 | opt=Adam(1e-3) # or opt=SciPyMin('l-bfgs-b') 36 | 37 | gp=NNRegressor(layers,opt=opt,batch_size=x_train.shape[0],maxiter=1000,gp=True,verbose=True) 38 | gp.fit(x_train,y_train) 39 | y_pred,std=gp.predict(x_test) 40 | 41 | ``` 42 | 43 | The example creates a mapping z(x) where both x and z are 1d vectors using a neural network with 1 hidden layer. 44 | The CovMat layer creates a covariance matrix from z using the covariance function v\*exp(-0.5*|z1-z2|**2) with noise y where x and y are learned during training. 45 | 46 | x and y are available after training as gp.layers[-1].var and gp.layers[-1].s_alpha. 47 | The gp.fast_forward() function can be used to extract the z(x) function (It skips the last layer that makes an array of size [batch_size, batch_size]). 48 | 49 | ### Learning a function with varying length scale 50 | 51 | In the example.py script, deep kernel learning (DKL) is used to learn from samples of the function sin(64(x+0.5)**4). 52 | 53 | Learning this function with a Neural Network would be hard, since it can be challenging to fit rapidly oscilating functions using NNs. 54 | Learning the function using GPRegression with a squared exponential covariance function, would also be suboptimal, since we need to commit to one fixed length scale. 55 | Unless we have a lot of samples,we would be forced to give up precision on the slowly varying part of the function. 56 | 57 | DKL Prediction: 58 | 59 |

60 |

61 | 62 | 63 |
DKL Prediction
64 | 65 | 66 |
67 |
68 | 69 |
z(x) function learned by neural network.
70 |
71 |

72 | 73 | We see that DKL solves the problem quite nicely, given the limited data. We also see that for x<-0.5 the std.dev of the DKL model does not capture the prediction error. 74 | -------------------------------------------------------------------------------- /data/data.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/data/data.dat -------------------------------------------------------------------------------- /data/mnist/mnist_test.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/data/mnist/mnist_test.npz -------------------------------------------------------------------------------- /data/mnist/mnist_train.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/data/mnist/mnist_train.npz -------------------------------------------------------------------------------- /dknet/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import layers 4 | from . import models 5 | from . import optimizers 6 | from . import utils 7 | from . import loss 8 | from .models import NNRegressor -------------------------------------------------------------------------------- /dknet/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /dknet/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /dknet/__pycache__/loss.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/__pycache__/loss.cpython-35.pyc -------------------------------------------------------------------------------- /dknet/__pycache__/loss.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/__pycache__/loss.cpython-36.pyc -------------------------------------------------------------------------------- /dknet/__pycache__/models.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/__pycache__/models.cpython-35.pyc -------------------------------------------------------------------------------- /dknet/__pycache__/models.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/__pycache__/models.cpython-36.pyc -------------------------------------------------------------------------------- /dknet/__pycache__/optimizers.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/__pycache__/optimizers.cpython-35.pyc -------------------------------------------------------------------------------- /dknet/__pycache__/optimizers.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/__pycache__/optimizers.cpython-36.pyc -------------------------------------------------------------------------------- /dknet/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /dknet/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /dknet/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from . import layer 3 | from . import activation 4 | from . import convolutional 5 | from . import dense 6 | from . import reshape 7 | from . import pooling 8 | from . import dropout 9 | 10 | 11 | from .pooling import MaxPool2D,AveragePool2D 12 | from .dense import Dense,RNNCell,CovMat,Parametrize,Scale 13 | from .convolutional import Conv2D 14 | from .activation import Activation 15 | from .reshape import Flatten 16 | from .dropout import Dropout -------------------------------------------------------------------------------- /dknet/layers/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /dknet/layers/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /dknet/layers/__pycache__/activation.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/activation.cpython-35.pyc -------------------------------------------------------------------------------- /dknet/layers/__pycache__/activation.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/activation.cpython-36.pyc -------------------------------------------------------------------------------- /dknet/layers/__pycache__/convolutional.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/convolutional.cpython-35.pyc -------------------------------------------------------------------------------- /dknet/layers/__pycache__/convolutional.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/convolutional.cpython-36.pyc -------------------------------------------------------------------------------- /dknet/layers/__pycache__/dense.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/dense.cpython-35.pyc -------------------------------------------------------------------------------- /dknet/layers/__pycache__/dense.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/dense.cpython-36.pyc -------------------------------------------------------------------------------- /dknet/layers/__pycache__/dropout.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/dropout.cpython-35.pyc -------------------------------------------------------------------------------- /dknet/layers/__pycache__/dropout.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/dropout.cpython-36.pyc -------------------------------------------------------------------------------- /dknet/layers/__pycache__/layer.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/layer.cpython-35.pyc -------------------------------------------------------------------------------- /dknet/layers/__pycache__/layer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/layer.cpython-36.pyc -------------------------------------------------------------------------------- /dknet/layers/__pycache__/pooling.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/pooling.cpython-35.pyc -------------------------------------------------------------------------------- /dknet/layers/__pycache__/pooling.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/pooling.cpython-36.pyc -------------------------------------------------------------------------------- /dknet/layers/__pycache__/reshape.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/reshape.cpython-35.pyc -------------------------------------------------------------------------------- /dknet/layers/__pycache__/reshape.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/dknet/layers/__pycache__/reshape.cpython-36.pyc -------------------------------------------------------------------------------- /dknet/layers/activation.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numpy import unravel_index 3 | from .layer import Layer 4 | def relu(x,dtype=numpy.float64): 5 | tmp=(x>=0) 6 | return x*tmp,1*tmp 7 | 8 | def sigmoid(x,dtype=numpy.float64): 9 | a=1.0/(numpy.exp(-x)+1.0) 10 | return a, a*(1-a) 11 | 12 | def linear(x,dtype=numpy.float64): 13 | return x,1.0#numpy.ones_like(x,dtype=dtype) 14 | 15 | def tanh(x,dtype=numpy.float64): 16 | a=numpy.tanh(x) 17 | return a, 1.0-a**2 18 | 19 | def lrelu(x,dtype=numpy.float64): 20 | y=(x>=0)*1.0+(x<0)*0.01 21 | return y*x,y 22 | 23 | def softplus(x,dtype=numpy.float64): 24 | tmp=numpy.exp(x) 25 | return numpy.log(tmp+1.0), tmp/(1.0+tmp) 26 | 27 | def softmax(x,dtype=numpy.float64): 28 | s=numpy.exp(x) 29 | s=s/numpy.sum(s,1)[:,numpy.newaxis] 30 | return s,s*(1.0-s) 31 | 32 | def rbf(x,dtype=numpy.float64): 33 | 34 | s=numpy.exp(-0.5*numpy.sum(x**2,-1)) 35 | print(x.shape,s.shape) 36 | return s, -x*s[:,:,numpy.newaxis] 37 | 38 | class Activation(Layer): 39 | 40 | dict={'linear':linear,'relu':relu,'sigmoid':sigmoid,'tanh':tanh,'softmax':softmax,'lrelu':lrelu,'softplus':softplus,'rbf':rbf} 41 | 42 | def __init__(self,strr): 43 | 44 | if strr in self.dict.keys(): 45 | self.afstr=strr 46 | self.af=self.dict[strr] 47 | else: 48 | print("Error. Undefined activation function '" + str(strr)+"'. Using linear activation.") 49 | print("Available activations: " + str(list(self.dict.keys()))) 50 | self.af=linear 51 | self.afstr='linear' 52 | self.trainable=False 53 | def forward(self,X): 54 | self.inp=X 55 | self.a=self.af(X,dtype=self.dtype) 56 | self.out=self.a[0] 57 | return self.out 58 | def backward(self,err): 59 | return self.a[1]*err 60 | 61 | -------------------------------------------------------------------------------- /dknet/layers/convolutional.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numpy import unravel_index 3 | from .activation import Activation 4 | from .layer import Layer 5 | 6 | class Conv2D(Layer): 7 | def __init__(self,n_out,kernel_size,activation=None): 8 | self.n_out=n_out 9 | self.activation=activation 10 | self.kernel_size=kernel_size 11 | self.trainable=True 12 | def initialize_ws(self): 13 | self.W=numpy.random.randn(self.kernel_size[0],self.kernel_size[1],self.n_inp,self.n_out).astype(dtype=self.dtype)*numpy.sqrt(1.0/(self.n_inp*numpy.prod(self.kernel_size))) 14 | self.b=numpy.zeros((1,self.n_out),dtype=self.dtype) 15 | self.dW=numpy.zeros_like(self.W,dtype=self.dtype) 16 | self.db=numpy.zeros_like(self.b,dtype=self.dtype) 17 | assert(self.W.shape[0]%2!=0) #Odd filter size pls 18 | assert(self.W.shape[1]%2!=0) #Odd fiter size pls 19 | def forward(self,X): 20 | self.inp=X 21 | 22 | hpad,wpad=int(self.W.shape[0]/2),int(self.W.shape[1]/2) 23 | X2=numpy.zeros((X.shape[0],X.shape[1]+2*hpad,X.shape[2]+2*wpad,X.shape[3]),dtype=self.dtype) 24 | X2[:,hpad:X2.shape[1]-hpad,wpad:X2.shape[2]-wpad,:]=numpy.copy(X) 25 | A=numpy.zeros((X.shape[0],X.shape[1],X.shape[2],self.n_out),dtype=self.dtype) 26 | M,N=X.shape[1],X.shape[2] 27 | for i in range(0,M): 28 | for j in range(0,N): 29 | A[:,i,j,:]=numpy.sum(X2[:,hpad+i-hpad:hpad+i+hpad+1,wpad+j-wpad:wpad+j+wpad+1,:][:,:,:,:,numpy.newaxis]*self.W[numpy.newaxis,:,:,:,:],axis=(1,2,3)) 30 | A+=self.b[0,:] 31 | 32 | self.out=A 33 | return self.out 34 | 35 | def backward(self,err): 36 | 37 | X=self.inp 38 | hpad,wpad=int(self.W.shape[0]/2),int(self.W.shape[1]/2) 39 | X2=numpy.zeros((X.shape[0],X.shape[1]+2*hpad,X.shape[2]+2*wpad,X.shape[3]),dtype=self.dtype) 40 | X2[:,hpad:X2.shape[1]-hpad,wpad:X2.shape[2]-wpad,:]=numpy.copy(X) 41 | 42 | tmpdW=numpy.zeros_like(self.dW,dtype=self.dtype) 43 | dodi=numpy.zeros_like(X2,dtype=self.dtype) 44 | M,N=X.shape[1],X.shape[2] 45 | for i in range(0,M): 46 | for j in range(0,N): 47 | tmpdW+=numpy.sum(err[:,i,j,:][:,numpy.newaxis,numpy.newaxis,numpy.newaxis,:]*X2[:,i:i+2*hpad+1,j:j+2*wpad+1,:][:,:,:,:,numpy.newaxis],0) 48 | dodi[:,i:i+2*hpad+1,j:j+2*wpad+1,:]+=numpy.sum(err[:,i,j,:][:,numpy.newaxis,numpy.newaxis,numpy.newaxis,:]*self.W[numpy.newaxis,:,:,:,:],-1) 49 | self.dW=tmpdW 50 | self.db[0,:]=numpy.sum(err,(0,1,2)) 51 | 52 | return dodi[:,hpad:dodi.shape[1]-hpad,wpad:dodi.shape[2]-wpad,:] -------------------------------------------------------------------------------- /dknet/layers/dense.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numpy import unravel_index 3 | from .activation import Activation 4 | from .layer import Layer 5 | 6 | 7 | class Parametrize(Layer): 8 | def __init__(self,type="oscillate",w=1.0): 9 | self.trainable = False 10 | self.w=w 11 | if type=="oscillate": 12 | self.forward,self.backward=self.forward_osc,self.backward_osc 13 | def forward_osc(self,X): 14 | self.inp=X 15 | self.out=numpy.concatenate([numpy.sin(self.w*X),numpy.cos(self.w*X)],1) 16 | return self.out 17 | def backward_osc(self,err): 18 | return self.w*err[:,[0]]*numpy.cos(self.w*self.inp) -self.w*err[:,[1]]*numpy.sin(self.w*self.inp) 19 | 20 | class Scale(Layer): 21 | def __init__(self,fixed=False,init_vals=None): 22 | self.fixed=fixed 23 | self.trainable=True 24 | self.init_vals=init_vals 25 | self.activation=None 26 | #assert( not ( (not fixed) and (init_vals is None) ) ) 27 | 28 | def initialize_ws(self): 29 | self.n_out = self.n_inp 30 | if self.init_vals is None: 31 | self.W=numpy.ones((1,self.n_inp)) 32 | self.init_vals=self.W 33 | else: 34 | #assert(len(init_vals)==self.n_inp or type) 35 | self.W=numpy.ones((1,self.n_inp))*self.init_vals 36 | self.b=numpy.zeros((1,self.n_inp)) 37 | self.dW=numpy.zeros_like(self.W) 38 | self.db=numpy.zeros_like(self.b) 39 | 40 | def forward(self,X): 41 | self.inp=X 42 | self.out=self.init_vals*X 43 | if not self.fixed: 44 | self.out=self.W*X 45 | 46 | return self.out 47 | def backward(self,err): 48 | if not self.fixed: 49 | self.dW=numpy.sum(err*self.inp,0).reshape(1,-1) 50 | 51 | return self.W*err 52 | 53 | class Dense(Layer): 54 | def __init__(self,n_out,activation=None): 55 | self.n_out=n_out 56 | self.activation=activation 57 | self.trainable=True 58 | 59 | def initialize_ws(self): 60 | self.W=numpy.random.randn(self.n_inp,self.n_out)*numpy.sqrt(1.0/self.n_inp) 61 | self.b=numpy.zeros((1,self.n_out)) 62 | self.dW=numpy.zeros((self.n_inp,self.n_out)) 63 | self.db=numpy.zeros((1,self.n_out)) 64 | def forward(self,X): 65 | self.inp=X 66 | self.out=numpy.dot(self.inp,self.W)+self.b 67 | return self.out 68 | def backward(self,err): 69 | 70 | self.db=numpy.sum(err,axis=0).reshape(1,-1) 71 | self.dW=numpy.dot(self.inp.T,err) 72 | 73 | return numpy.dot(err,self.W.T) 74 | 75 | class CovMat(Layer): 76 | def __init__(self,alpha=1e-1,var=1.0,kernel='rbf',alpha_fixed=False): 77 | 78 | self.trainable=True 79 | self.s_alpha=alpha 80 | self.var = var 81 | self.activation=None 82 | self.alpha_fixed=alpha_fixed 83 | self.kernel=kernel 84 | if kernel=='rbf': 85 | self.forward,self.backward = self.forward_rbf,self.backward_rbf 86 | elif kernel == 'dot': 87 | self.forward,self.backward = self.forward_dot,self.backward_dot 88 | self.predict=self.forward 89 | def initialize_ws(self): 90 | self.W=numpy.ones((1,2))*numpy.array([[numpy.log(self.s_alpha/(1.0-self.s_alpha)),numpy.sqrt(self.var)]]) 91 | self.b=numpy.zeros((1,1)) 92 | self.dW=numpy.zeros((1,2)) 93 | self.db=numpy.zeros((1,1)) 94 | 95 | 96 | def forward_dot(self,X): 97 | self.inp=X 98 | 99 | #Dot product 100 | self.s0=numpy.dot(X,X.T) 101 | 102 | #Add variance (or alpha0) 103 | self.var=self.W[0,1]**2 104 | self.s0 = self.s0+self.var 105 | 106 | #Add noise 107 | self.s_alpha=1.0/(numpy.exp(-self.W[0,0])+1.0) 108 | self.s=self.s0+numpy.identity(X.shape[0])*(self.s_alpha+1e-8) 109 | 110 | self.out=self.s 111 | return self.out 112 | def backward_dot(self,err): 113 | if not self.alpha_fixed: 114 | a_err=err*self.s_alpha*(1.0-self.s_alpha) 115 | self.dW[0,0]=numpy.mean(numpy.diag(a_err))#*err.shape[0] 116 | self.dW[0,1]=numpy.sum(err)*2*self.W[0,1]/err.shape[0] 117 | 118 | #Backpropagate through dot product: 119 | err2=2.0*numpy.dot(err,self.inp)/err.shape[0]#/err.shape[0] 120 | return err2 121 | def forward_rbf(self,X): 122 | self.inp=X 123 | 124 | #Calculate distances 125 | ll=[] 126 | for i in range(0,X.shape[1]): 127 | tmp=X[:,i].reshape(1,-1)-X[:,i].reshape(-1,1) 128 | ll.append(tmp.reshape(X.shape[0],X.shape[0],1)) 129 | self.z=numpy.concatenate(ll,-1) 130 | 131 | #Apply RBF function to distance 132 | self.s0=numpy.exp(-0.5*numpy.sum(self.z**2,-1)) 133 | 134 | #Multiply with variance 135 | self.var=self.W[0,1]**2 136 | self.s=self.var*self.s0 137 | 138 | #Add noise / whitekernel 139 | self.s_alpha=1.0/(numpy.exp(-self.W[0,0])+1.0) 140 | self.out=self.s+(self.s_alpha+1e-8)*numpy.identity(X.shape[0]) 141 | return self.out 142 | 143 | 144 | def backward_rbf(self,err): 145 | #Update trainable weight gradients (if applicable) I.e. noise and variance. 146 | if not self.alpha_fixed: 147 | a_err=err*self.s_alpha*(1.0-self.s_alpha) 148 | self.dW[0,0]=numpy.mean(numpy.diag(a_err)) 149 | self.dW[0,1]=numpy.mean(err*self.s0)*err.shape[0]*2.0*self.W[0,1] 150 | 151 | #Backprop through multiplication with variance 152 | err=self.var*err 153 | 154 | #Backprop through RBF function 155 | err=-err[:,:,numpy.newaxis]*self.z*self.s0[:,:,numpy.newaxis] 156 | 157 | #Backprop through distance calculation 158 | err2=numpy.zeros_like(self.inp) 159 | X=self.inp 160 | for i in range(0,X.shape[1]): 161 | err2[:,i]=numpy.sum(err[:,:,i]-err[:,:,i].T,0)/X.shape[0] 162 | 163 | return err2 164 | 165 | class RNNCell(Layer): 166 | def __init__(self,n_out,activation='tanh',return_seq=False): 167 | self.n_out=n_out 168 | self.trainable=True 169 | self.activation=activation 170 | self.rs=return_seq 171 | def initialize_ws(self): 172 | self.W=numpy.random.randn(self.n_inp+self.n_out,self.n_out) 173 | self.W[0:self.n_inp,:]/=numpy.sqrt(self.n_inp) 174 | #self.W[self.n_inp::,:]/=(10.0*numpy.sqrt(self.n_out)) 175 | self.W[self.n_inp::,:]=numpy.identity(len(self.W[self.n_inp::]))/numpy.sqrt(self.n_inp) 176 | self.b=numpy.zeros((1,self.n_out))#0.0 177 | self.dW=numpy.zeros((self.n_inp+self.n_out,self.n_out)) 178 | self.db=numpy.zeros((1,self.n_out))#0.0 179 | self.init_run=True 180 | 181 | def forward(self,X): 182 | if self.init_run: 183 | self.afs=[] 184 | for i in range(0,len(X[0,:,0])): 185 | self.afs.append(Activation(self.activation)) 186 | self.init_run=False 187 | 188 | self.inp=X 189 | 190 | self.tmp=numpy.zeros((X.shape[0],X.shape[1],self.n_out)) 191 | 192 | 193 | tmpX=X.reshape(X.shape[0]*X.shape[1],X.shape[2]) 194 | 195 | self.tmp[:,0,:]=numpy.dot(X[:,0,:],self.W[0:self.n_inp,:])+self.b 196 | self.tmp[:,0,:]=self.afs[0].forward(self.tmp[:,0,:]) 197 | 198 | for i in range(1,len(X[0,:,0])): 199 | self.tmp[:,i,:]= numpy.dot(self.tmp[:,i-1,:],self.W[self.n_inp::,:])+numpy.dot(X[:,i,:],self.W[0:self.n_inp,:])+self.b 200 | self.tmp[:,i,:]=self.afs[i].forward(self.tmp[:,i,:]) 201 | 202 | if self.rs: 203 | self.out=self.tmp 204 | else: 205 | self.out=self.tmp[:,-1,:] 206 | 207 | return self.out 208 | 209 | def backward(self,err): 210 | if self.rs: 211 | erra=numpy.zeros_like(self.inp) 212 | self.dW=numpy.zeros((self.n_inp+self.n_out,self.n_out)) 213 | self.db=numpy.zeros((1,self.n_out)) 214 | for j in range(0,len(err[0,:,0])): 215 | 216 | errs=err[:,j,:] 217 | for i in reversed(range(1,j+1)): 218 | errs=self.afs[i].backward(errs) 219 | erra[:,i,:]+=numpy.dot(errs,self.W[0:self.n_inp,:].T) 220 | self.dW[0:self.n_inp,:]+=numpy.dot(errs.T,self.inp[:,i,:]).T 221 | self.db+=numpy.sum(errs,0).reshape(1,-1) 222 | self.dW[self.n_inp::,:]+=numpy.dot(errs.T,self.tmp[:,i-1,:]).T 223 | errs=numpy.dot(errs,self.W[self.n_inp::,:].T) 224 | 225 | errs=self.afs[0].backward(errs) 226 | erra[:,0,:]+=numpy.dot(errs,self.W[0:self.n_inp,:].T) 227 | self.db+=numpy.sum(errs,0).reshape(1,-1) 228 | self.dW[0:self.n_inp,:]+=numpy.dot(errs.T,self.inp[:,0,:]).T 229 | 230 | else: 231 | self.dW=numpy.zeros((self.n_inp+self.n_out,self.n_out)) 232 | self.db=numpy.zeros((1,self.n_out)) 233 | erra=numpy.zeros_like(self.inp) 234 | for i in reversed(range(1,len(self.inp[0,:,0]))): 235 | err=self.afs[i].backward(err) 236 | erra[:,i,:]=numpy.dot(err,self.W[0:self.n_inp,:].T) 237 | self.dW[0:self.n_inp,:]+=numpy.dot(err.T,self.inp[:,i,:]).T 238 | self.db+=numpy.sum(err,0).reshape(1,-1) 239 | self.dW[self.n_inp::,:]+=numpy.dot(err.T,self.tmp[:,i-1,:]).T 240 | err=numpy.dot(err,self.W[self.n_inp::,:].T) 241 | 242 | err=self.afs[0].backward(err) 243 | erra[:,0,:]=numpy.dot(err,self.W[0:self.n_inp,:].T) 244 | self.db+=numpy.sum(err,0).reshape(1,-1) 245 | self.dW[0:self.n_inp,:]+=numpy.dot(err.T,self.inp[:,0,:]).T 246 | 247 | 248 | 249 | return erra 250 | 251 | -------------------------------------------------------------------------------- /dknet/layers/dropout.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from .layer import Layer 3 | class Dropout(Layer): 4 | 5 | def __init__(self,keep_prob): 6 | self.keep_prob=keep_prob 7 | self.trainable=False 8 | def forward(self,X): 9 | self.inp=X 10 | self.mask=(numpy.random.random(size=X.shape).astype(self.dtype)<=self.keep_prob)/self.keep_prob 11 | self.out=self.mask*self.inp 12 | return self.out 13 | 14 | def predict(self,X): 15 | self.inp=X 16 | self.out=X 17 | self.mask=numpy.ones_like(X) 18 | return X 19 | 20 | def backward(self,err): 21 | return err*self.mask -------------------------------------------------------------------------------- /dknet/layers/layer.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | class Layer: 4 | dtype=numpy.float64 5 | def set_inp(self,n_inp): 6 | self.n_inp=n_inp -------------------------------------------------------------------------------- /dknet/layers/pooling.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numpy import unravel_index 3 | from .layer import Layer 4 | class MaxPool2D(Layer): 5 | def __init__(self,pool_size=(2,2)): 6 | self.trainable=False 7 | self.pool_size=pool_size 8 | def forward(self,X): 9 | self.inp=X 10 | self.mask=numpy.zeros_like(X) 11 | assert(X.shape[1]%self.pool_size[0]==0) 12 | assert(X.shape[2]%self.pool_size[1]==0) 13 | self.out=numpy.zeros((X.shape[0],int(X.shape[1]/self.pool_size[0]),int(X.shape[2]/self.pool_size[1]),X.shape[3])) 14 | 15 | for i in range(0,self.out.shape[1]): 16 | for j in range(0,self.out.shape[2]): 17 | a=X[:,self.pool_size[0]*i:self.pool_size[0]*(i+1),self.pool_size[1]*j:self.pool_size[1]*(j+1),:] 18 | mv=numpy.max(a,axis=(1,2)) 19 | self.out[:,i,j,:] = mv 20 | self.mask[:,self.pool_size[0]*i:self.pool_size[0]*(i+1),self.pool_size[1]*j:self.pool_size[1]*(j+1),:]=mv[:,numpy.newaxis,numpy.newaxis,:] 21 | return self.out 22 | 23 | def backward(self,err): 24 | err2=numpy.zeros_like(self.inp) 25 | for i in range(0,self.out.shape[1]): 26 | for j in range(0,self.out.shape[2]): 27 | mm=(self.mask[:,self.pool_size[0]*i:self.pool_size[0]*(i+1),self.pool_size[1]*j:self.pool_size[1]*(j+1),:]==self.inp[:,self.pool_size[0]*i:self.pool_size[0]*(i+1),self.pool_size[1]*j:self.pool_size[1]*(j+1),:]) 28 | ms=numpy.sum(mm,axis=(1,2)) 29 | err2[:,self.pool_size[0]*i:self.pool_size[0]*(i+1),self.pool_size[1]*j:self.pool_size[1]*(j+1),:]=(mm/ms[:,numpy.newaxis,numpy.newaxis,:])*err[:,i,j,:][:,numpy.newaxis,numpy.newaxis,:] 30 | return err2 31 | class AveragePool2D(Layer): 32 | def __init__(self,pool_size=(2,2)): 33 | self.trainable=False 34 | self.pool_size=pool_size 35 | def forward(self,X): 36 | self.inp=X 37 | assert(X.shape[1]%self.pool_size[0]==0) 38 | assert(X.shape[2]%self.pool_size[1]==0) 39 | self.out=numpy.zeros((X.shape[0],int(X.shape[1]/self.pool_size[0]),int(X.shape[2]/self.pool_size[1]),X.shape[3])) 40 | 41 | for i in range(0,self.out.shape[1]): 42 | for j in range(0,self.out.shape[2]): 43 | a=X[:,self.pool_size[0]*i:self.pool_size[0]*(i+1),self.pool_size[1]*j:self.pool_size[1]*(j+1),:] 44 | mv=numpy.average(a,axis=(1,2)) 45 | self.out[:,i,j,:] = mv 46 | return self.out 47 | 48 | def backward(self,err): 49 | err2=numpy.zeros_like(self.inp) 50 | for i in range(0,self.out.shape[1]): 51 | for j in range(0,self.out.shape[2]): 52 | err2[:,self.pool_size[0]*i:self.pool_size[0]*(i+1),self.pool_size[1]*j:self.pool_size[1]*(j+1),:]=err[:,i,j,:][:,numpy.newaxis,numpy.newaxis,:]/numpy.prod(self.pool_size) 53 | return err2 -------------------------------------------------------------------------------- /dknet/layers/reshape.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from numpy import unravel_index 3 | from .layer import Layer 4 | class Flatten(Layer): 5 | def __init__(self): 6 | self.trainable=False 7 | def forward(self,X): 8 | self.inp=numpy.copy(X) 9 | self.out=X.reshape(X.shape[0],numpy.prod(X.shape[1::])) 10 | return self.out 11 | def backward(self,err): 12 | return err.reshape(self.inp.shape) -------------------------------------------------------------------------------- /dknet/loss.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | def mse_loss(y_true,y_pred): 3 | return 0.5*numpy.average((y_true-y_pred)**2),(y_pred-y_true)/numpy.prod(y_true.shape) 4 | 5 | def cce_loss(y_true,y_pred): 6 | return -numpy.average(numpy.sum(y_true*numpy.log(y_pred),1)), (y_pred-y_true)/(y_pred*(1.0-y_pred)+1e-12)/y_true.shape[0] -------------------------------------------------------------------------------- /dknet/models.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from .optimizers import Adam 3 | from .utils import grad_check,calc_acc,one_hot 4 | 5 | from .layers import Activation,Dense,Dropout,CovMat 6 | from .loss import mse_loss,cce_loss 7 | 8 | from scipy.linalg import cholesky,cho_solve,solve_triangular 9 | 10 | 11 | 12 | class CoreNN: 13 | #Hidden layers - list of layers. 14 | #costfn - costfunction in the form as in loss.py 15 | def __init__(self,layers,costfn): 16 | self.layers=layers 17 | self.cost=costfn 18 | 19 | 20 | def forward(self,X,gc=False): 21 | 22 | A=X 23 | if not gc: 24 | for i in range(0,len(self.layers)): 25 | A=self.layers[i].forward(A) 26 | else: 27 | for i in range(0,len(self.layers)): 28 | A=self.layers[i].predict(A) 29 | 30 | return A 31 | 32 | def backward(self,Y): 33 | self.j,err=self.cost(Y,self.layers[-1].out) 34 | for i in reversed(range(0,len(self.layers))): 35 | err=self.layers[i].backward(err) 36 | return err 37 | 38 | #First run of NN. calculate inp shapes of layers, initialize weights, add activation layers and ouput layer. 39 | def first_run(self,X,Y): 40 | A=X 41 | if not self.layers: 42 | brflag=True 43 | else: 44 | brflag=False 45 | i=0 46 | while not brflag: 47 | if type( self.layers[i] ) == int: 48 | self.layers[i]=Dense(self.layers[i],activation='tanh') 49 | 50 | self.layers[i].set_inp(A.shape[-1]) 51 | if self.layers[i].trainable: 52 | self.layers[i].initialize_ws() 53 | if self.layers[i].activation is not None: 54 | self.layers.insert(i+1,Activation(self.layers[i].activation)) 55 | A=self.layers[i].forward(A) 56 | i+=1 57 | if i==len(self.layers): 58 | brflag=True 59 | 60 | for i in range(0,len(self.layers)): 61 | if type(self.layers[i]) != Dropout and type(self.layers[i]) != CovMat: 62 | self.layers[i].predict=self.layers[i].forward 63 | 64 | 65 | def grad_check(self,X,Y,n_checks=100): 66 | return grad_check(self,X,Y,n_checks) 67 | 68 | 69 | class NNRegressor(CoreNN): 70 | def __init__(self,layers=[64],opt=None,maxiter=200,batch_size=64,gp=True,verbose=False): 71 | super().__init__(layers,mse_loss) 72 | if gp: 73 | self.cost=self.gp_loss 74 | self.opt=opt 75 | self.verbose=verbose 76 | self.maxiter=maxiter 77 | self.batch_size=batch_size 78 | self.fitted=False 79 | self.opt=opt 80 | self.task=0 81 | 82 | def gp_loss(self,y,K): 83 | self.y=y 84 | self.A=self.layers[-2].out 85 | self.K=K 86 | self.L_ = cholesky(K, lower=True) 87 | 88 | L_inv = solve_triangular(self.L_.T,numpy.eye(self.L_.shape[0])) 89 | self.K_inv = L_inv.dot(L_inv.T) 90 | 91 | self.alpha_ = cho_solve((self.L_, True), y) 92 | self.nlml=0.0 93 | self.nlml_grad=0.0 94 | for i in range(0,y.shape[1]): 95 | 96 | gg1=numpy.dot(self.alpha_[:,i].reshape(1,-1),y[:,i].reshape(-1,1))[0,0] 97 | 98 | self.nlml+=0.5*gg1+numpy.sum(numpy.log(numpy.diag(self.L_)))+K.shape[0]*0.5*numpy.log(2.0*numpy.pi) 99 | yy=numpy.dot(y[:,i].reshape(-1,1),y[:,i].reshape(1,-1)) 100 | self.nlml_grad += -0.5*( numpy.dot(numpy.dot(self.K_inv,yy),self.K_inv)-self.K_inv)*K.shape[0] 101 | 102 | return self.nlml,self.nlml_grad 103 | def fast_forward(self,X): 104 | A=X 105 | for i in range(0,len(self.layers)-1): 106 | A=self.layers[i].predict(A) 107 | return A 108 | def fit(self,X,Y,batch_size=None,maxiter=None): 109 | if batch_size is not None: 110 | self.batch_size=batch_size 111 | if maxiter is not None: 112 | self.maxiter=maxiter 113 | if self.opt is None: 114 | self.opt=Adam() 115 | if not self.fitted: 116 | self.first_run(X[0:2],Y[0:2]) 117 | 118 | 119 | a=self.opt.fit(X,Y,self,batch_size=self.batch_size,maxiter=self.maxiter,verbose=self.verbose) 120 | 121 | self.fitted=True 122 | 123 | self.y=Y 124 | self.x=X 125 | 126 | return a 127 | def predict(self,X): 128 | A=X 129 | A2=self.x 130 | for i in range(0,len(self.layers)-1): 131 | A2=self.layers[i].predict(A2) 132 | A=self.layers[i].predict(A) 133 | 134 | self.K=self.layers[-1].forward(A2) 135 | self.L_ = cholesky(self.K, lower=True) 136 | 137 | L_inv = solve_triangular(self.L_.T,numpy.eye(self.L_.shape[0])) 138 | self.K_inv = L_inv.dot(L_inv.T) 139 | 140 | self.alpha_ = cho_solve((self.L_, True), self.y) 141 | 142 | 143 | K2=numpy.zeros((X.shape[0],X.shape[0])) 144 | K3=numpy.zeros((X.shape[0],self.K.shape[0])) 145 | 146 | if self.layers[-1].kernel=='rbf': 147 | d1=0.0 148 | d2=0.0 149 | for i in range(0,A.shape[1]): 150 | d1+=(A[:,i].reshape(-1,1)-A[:,i].reshape(1,-1))**2 151 | d2+=(A[:,i].reshape(-1,1)-A2[:,i].reshape(1,-1))**2 152 | K2=self.layers[-1].var*numpy.exp(-0.5*d1)+numpy.identity(A.shape[0])*(self.layers[-1].s_alpha+1e-8) 153 | K3=self.layers[-1].var*numpy.exp(-0.5*d2) 154 | elif self.layers[-1].kernel=='dot': 155 | K2=numpy.dot(A,A.T)+numpy.identity(A.shape[0])*(self.layers[-1].s_alpha+1e-8) + self.layers[-1].var 156 | K3=numpy.dot(A,A2.T) + self.layers[-1].var 157 | 158 | preds=numpy.zeros((X.shape[0],self.y.shape[1])) 159 | for i in range(0,self.alpha_.shape[1]): 160 | preds[:,i]=numpy.dot(K3,self.alpha_[:,i].reshape(-1,1))[:,0] 161 | 162 | return preds, numpy.sqrt(numpy.diagonal(K2-numpy.dot(K3,numpy.dot(self.K_inv,K3.T)))) 163 | 164 | 165 | def update(self,X,Y): 166 | self.forward(X) 167 | self.backward(Y) 168 | return self.layers[-1].out 169 | -------------------------------------------------------------------------------- /dknet/optimizers.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from .utils import r2,calc_acc 3 | from scipy.linalg import eigh 4 | import time 5 | class Optimizer: 6 | 7 | def weight_grads_as_arr(self): 8 | x=numpy.zeros((0,)) 9 | for i in range(0,len(self.model.layers)): 10 | if self.model.layers[i].trainable: 11 | x=numpy.concatenate((x,self.model.layers[i].dW.ravel())) 12 | x=numpy.concatenate((x,self.model.layers[i].db.ravel())) 13 | return x 14 | def weight_grads_std_as_arr(self): 15 | x=numpy.zeros((0,)) 16 | for i in range(0,len(self.model.layers)): 17 | if self.model.layers[i].trainable: 18 | x=numpy.concatenate((x,self.model.layers[i].dWs.ravel())) 19 | x=numpy.concatenate((x,self.model.layers[i].dbs.ravel())) 20 | return x 21 | def weights_as_arr(self): 22 | x=numpy.zeros((0,)) 23 | for i in range(0,len(self.model.layers)): 24 | if self.model.layers[i].trainable: 25 | x=numpy.concatenate((x,self.model.layers[i].W.ravel())) 26 | x=numpy.concatenate((x,self.model.layers[i].b.ravel())) 27 | return x 28 | 29 | def update_params_from_1darr(self,x): 30 | n=0 31 | for i in range(0,len(self.model.layers)): 32 | if self.model.layers[i].trainable: 33 | shape = self.model.layers[i].W.shape 34 | mm=numpy.prod(shape) 35 | self.model.layers[i].W=x[n:n+mm].reshape(shape) 36 | n+=mm 37 | 38 | shape = self.model.layers[i].b.shape 39 | mm=numpy.prod(shape) 40 | self.model.layers[i].b=x[n:n+mm].reshape(shape) 41 | n+=mm 42 | 43 | class ALR(Optimizer): 44 | def __init__(self,learning_rate=1e-3,beta=0.99,C=500): 45 | super() 46 | self.learning_rate=learning_rate 47 | self.dlr=numpy.zeros_like(self.learning_rate) 48 | self.beta=beta 49 | self.C=C 50 | self.first_run=True 51 | def reset(self): 52 | self.init_moments() 53 | def init_moments(self): 54 | self.m11=numpy.zeros_like(self.weights_as_arr()) 55 | self.m1=numpy.zeros_like(self.weights_as_arr()) 56 | self.m2=numpy.zeros_like(self.weights_as_arr()) 57 | self.t=0 58 | #self.learning_rate*=numpy.ones_like(self.weights_as_arr()) 59 | 60 | def fit(self,X,Y,model,batch_size=16,maxiter=100,verbose=True): 61 | self.model=model 62 | self.n_iter=0 63 | #self.beta=numpy.exp(-batch_size/self.C) 64 | if self.first_run: 65 | self.init_moments() 66 | self.first_run=False 67 | brflag=False 68 | self.save=[] 69 | full_batch=X.shape[0] 70 | m=int(X.shape[0]/batch_size) 71 | j=0 72 | for i in range(0,100000): 73 | 74 | 75 | 76 | x=self.weights_as_arr() 77 | 78 | 79 | num_mb=0.0 80 | dw=0.0 81 | dws=0.0 82 | score=0.0 83 | batch_done=False 84 | while not batch_done: 85 | 86 | 87 | batch_x=X[j*batch_size:(j+1)*batch_size] 88 | batch_y=Y[j*batch_size:(j+1)*batch_size] 89 | 90 | #Calc raw gradients 91 | self.model.update(batch_x,batch_y) 92 | 93 | wtmp=self.weight_grads_as_arr() 94 | dw=(num_mb*dw+wtmp)/(num_mb+1) 95 | stmp=self.weight_grads_std_as_arr()-wtmp**2 96 | dws=(num_mb*dws+stmp)/(num_mb+1) 97 | score=(score*num_mb+model.j)/(num_mb+1) 98 | num_mb+=1 99 | 100 | if not numpy.all(numpy.isfinite(dws)) or numpy.any(dws<0): 101 | print("ERROR",numpy.min(dws)) 102 | if num_mb*batch_size > 30 or num_mb>int(X.shape[0]/batch_size): 103 | batch_done=True 104 | j+=1 105 | #print(j) 106 | if j>=m-1: 107 | j=0 108 | 109 | 110 | 111 | self.n_iter+=1 112 | self.t+=1 113 | 114 | step=self.learning_rate*dw/numpy.sqrt((dws+1e-8)/(num_mb*batch_size))#/(batch_size*num_mb)) 115 | x=x-step 116 | self.update_params_from_1darr(x) 117 | self.save.append(self.model.j) 118 | #print(verbose) 119 | if verbose: 120 | strr="Epoch "+str(i+1)+": " + str(int(100.0*float(j)/m))+ " %. Loss: "+str(score) 121 | if self.model.task==1: 122 | strr+=". Acc: "+str(calc_acc(batch_y,self.model.layers[-1].out)) 123 | else: 124 | strr+=". r2: " + str(r2(batch_y,self.model.layers[-1].out)) 125 | print(strr+str(num_mb)) 126 | if self.n_iter>=maxiter: 127 | brflag=True 128 | break 129 | return numpy.array(self.save) 130 | 131 | class Adam(Optimizer): 132 | def __init__(self,learning_rate=1e-3,beta_1=0.9,beta_2=0.999,epsilon=1e-8): 133 | super() 134 | self.learning_rate=learning_rate 135 | self.beta_1=beta_1 136 | self.beta_2=beta_2 137 | self.epsilon=1e-8 138 | self.first_run=True 139 | def reset(self): 140 | self.init_moments() 141 | def init_moments(self): 142 | self.m1=numpy.zeros_like(self.weights_as_arr()) 143 | self.m2=numpy.zeros_like(self.weights_as_arr()) 144 | self.t=0 145 | 146 | def fit(self,X,Y,model,batch_size=16,maxiter=100,verbose=True): 147 | self.model=model 148 | self.n_iter=0 149 | if self.first_run: 150 | self.init_moments() 151 | self.first_run=False 152 | brflag=False 153 | self.save=[] 154 | 155 | for i in range(0,100000): 156 | m=int(X.shape[0]/batch_size) 157 | for j in range(0,m): 158 | batch_x=X[j*batch_size:(j+1)*batch_size] 159 | batch_y=Y[j*batch_size:(j+1)*batch_size] 160 | 161 | #Calc raw gradients 162 | self.model.update(batch_x,batch_y) 163 | 164 | x=self.weights_as_arr() 165 | dw=self.weight_grads_as_arr() 166 | 167 | self.n_iter+=1 168 | 169 | #Adam 170 | self.t+=1 171 | 172 | self.m1=self.beta_1*self.m1+(1.0-self.beta_1)*dw 173 | self.m2=self.beta_2*self.m2+(1.0-self.beta_2)*(dw-self.m1)**2/numpy.abs(1.0/numpy.log(self.beta_1)) 174 | 175 | m1a=self.m1/(1.0-self.beta_1**self.t) 176 | m2a=self.m2/(1.0-self.beta_2**self.t) 177 | 178 | x=x-self.learning_rate*self.m1/numpy.sqrt(self.m2+1e-8) 179 | self.update_params_from_1darr(x) 180 | self.save.append(self.model.j) 181 | if verbose: 182 | strr="Epoch "+str(i+1)+": " + str(int(100.0*float(j)/m))+ " %. Loss: "+str(self.model.j) 183 | print(strr) 184 | if self.n_iter>=maxiter: 185 | brflag=True 186 | break 187 | if brflag: 188 | break 189 | return numpy.array(self.save) 190 | class SciPyMin(Optimizer): 191 | def __init__(self,method): 192 | super() 193 | self.method=method 194 | def objfn(self,x): 195 | self.update_params_from_1darr(x) 196 | 197 | self.preds=self.model.update(self.X,self.Y) 198 | 199 | return self.model.j,self.weight_grads_as_arr() 200 | 201 | def print_msg(self,x): 202 | if self.verbose: 203 | strr="Epoch " +str(self.epoch)+" . Cost: " + str(self.model.j) 204 | #if self.model.task==1: 205 | # strr+=". Acc: "+str(calc_acc(self.Y,self.preds)) 206 | #else: 207 | # strr+=". r2: " + str(r2(self.Y,self.preds)) 208 | print(strr) 209 | 210 | self.epoch+=1 211 | 212 | def fit(self,X,Y,model,batch_size=None, maxiter=100,verbose=True): 213 | self.X=X 214 | self.Y=Y 215 | self.model=model 216 | self.verbose=verbose 217 | self.epoch=1 218 | 219 | from scipy.optimize import minimize 220 | x0=self.weights_as_arr() 221 | res=minimize(self.objfn,x0,jac=True,method=self.method,tol=1e-16,options={'maxiter':maxiter},callback=self.print_msg) 222 | self.update_params_from_1darr(res['x']) 223 | 224 | class SDProp(Optimizer): 225 | def __init__(self,learning_rate=1e-3,beta_1=0.9,beta_2=0.99,epsilon=1e-8,num_bands=5): 226 | super() 227 | self.learning_rate=learning_rate 228 | self.beta_1=beta_1 229 | self.beta_2=beta_2 230 | self.epsilon=1e-8 231 | self.first_run=True 232 | self.expb=False 233 | self.num_bands=num_bands 234 | def reset(self): 235 | self.init_moments() 236 | def init_moments(self): 237 | self.m1=0.0 238 | self.m12=0.0 239 | self.m2=0.0 240 | self.covm=0.0 241 | 242 | self.save=[] 243 | self.t=0 244 | 245 | def fit(self,X,Y,model,batch_size=16,maxiter=100,verbose=True): 246 | self.model=model 247 | self.n_iter=0 248 | if self.first_run: 249 | self.init_moments() 250 | self.first_run=False 251 | brflag=False 252 | 253 | for i in range(0,100000): 254 | m=int(X.shape[0]/batch_size) 255 | for j in range(0,m): 256 | batch_x=X[j*batch_size:(j+1)*batch_size] 257 | batch_y=Y[j*batch_size:(j+1)*batch_size] 258 | #Calc raw gradients 259 | self.model.update(batch_x,batch_y) 260 | 261 | x=self.weights_as_arr() 262 | dw=self.weight_grads_as_arr() 263 | 264 | 265 | #Calc raw gradients 266 | self.model.update(batch_x,batch_y) 267 | self.t+=1 268 | self.n_iter+=1 269 | 270 | self.m1=self.beta_1*self.m1+(1.0-self.beta_1)*dw 271 | self.m12=self.beta_2*self.m2+(1.0-self.beta_2)*dw 272 | self.m2=self.beta_2*self.m2+(1.0-self.beta_2)*dw**2 273 | 274 | 275 | m1a=self.m1#/(1.0-self.beta_1**self.t) 276 | m12a=self.m12#/(1.0-self.beta_2**self.t) 277 | m2a=self.m2#/(1.0-self.beta_2**self.t) 278 | #/(1.0-self.beta_2**self.t) 279 | 280 | if self.expb: 281 | 282 | dwt=dw 283 | m12t=self.m12 284 | self.covm=self.beta_2*self.covm+(1.0-self.beta_2)*numpy.outer(dwt-m12t,dwt-m12t) 285 | 286 | w,v=eigh(self.covm+numpy.identity(len(self.covm))*1e-8) 287 | 288 | s2=w**(-0.5) 289 | s2=numpy.diag(s2) 290 | K=numpy.dot(v,numpy.dot(s2,v.T)) 291 | step=self.learning_rate*numpy.dot(K,dwt) 292 | x=x-step 293 | 294 | else: 295 | 296 | x=x-self.learning_rate*dw/(numpy.sqrt(m2a)+1e-8) 297 | self.update_params_from_1darr(x) 298 | 299 | self.save.append(self.model.j) 300 | if verbose: 301 | strr="Epoch "+str(i+1)+": " + str(int(100.0*float(j)/m))+ " %. Loss: "+str(self.model.j) 302 | 303 | print(strr) 304 | if self.n_iter>=maxiter: 305 | brflag=True 306 | break 307 | if brflag: 308 | break 309 | return numpy.array(self.save) 310 | 311 | from sklearn.cluster import KMeans 312 | class Adam2(Optimizer): 313 | def __init__(self,learning_rate=1e-3,beta_1=0.9,beta_2=0.999,epsilon=1e-8): 314 | super() 315 | self.learning_rate=learning_rate 316 | self.beta_1=beta_1 317 | self.beta_2=beta_2 318 | self.epsilon=1e-8 319 | self.first_run=True 320 | def reset(self): 321 | self.init_moments() 322 | def init_moments(self): 323 | self.m1=numpy.zeros_like(self.weights_as_arr()) 324 | self.m2=numpy.zeros_like(self.weights_as_arr()) 325 | self.t=0 326 | def fit(self,X,Y,model,batch_size=16,maxiter=100,verbose=True): 327 | self.model=model 328 | self.n_iter=0 329 | if self.first_run: 330 | self.init_moments() 331 | self.first_run=False 332 | brflag=False 333 | self.save=[] 334 | 335 | 336 | 337 | m=int(X.shape[0]/batch_size) 338 | for i in range(0,100000): 339 | A=self.model.fast_forward(X) 340 | 341 | jj=numpy.random.randint(A.shape[0]) 342 | 343 | d=numpy.sum((A[[jj]]-A)**2,1) 344 | ass=numpy.argsort(d) 345 | 346 | batch_x=X[ass[0:500]] 347 | batch_y=Y[ass[0:500]] 348 | 349 | 350 | self.model.update(batch_x,batch_y) 351 | 352 | x=self.weights_as_arr() 353 | dw=self.weight_grads_as_arr() 354 | 355 | self.n_iter+=1 356 | 357 | #Adam 358 | self.t+=1 359 | 360 | self.m1=self.beta_1*self.m1+(1.0-self.beta_1)*dw 361 | self.m2=self.beta_2*self.m2+(1.0-self.beta_2)*(dw-self.m1)**2/numpy.abs(1.0/numpy.log(self.beta_1)) 362 | 363 | m1a=self.m1/(1.0-self.beta_1**self.t) 364 | m2a=self.m2/(1.0-self.beta_2**self.t) 365 | 366 | x=x-self.learning_rate*self.m1/numpy.sqrt(self.m2+1e-8) 367 | self.update_params_from_1darr(x) 368 | self.save.append(self.model.j) 369 | if verbose: 370 | strr= str(i)+" "+str(int(100.0*float(i)/m))+ " %. Loss: "+str(self.model.j) + " " +str(jj) 371 | print(strr) 372 | if self.n_iter>=maxiter: 373 | brflag=True 374 | break 375 | return numpy.array(self.save) 376 | -------------------------------------------------------------------------------- /dknet/utils.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | def one_hot(x,n_classes): 4 | assert(len(x.shape)==1) 5 | A=numpy.zeros((x.shape[0],n_classes)) 6 | A[numpy.arange(len(x)),x]=1.0 7 | return A 8 | def calc_acc(y_true,y_pred): 9 | if y_true.shape[1] > 1: 10 | return numpy.average(numpy.argmax(y_true,1)==numpy.argmax(y_pred,1)) 11 | else: 12 | return numpy.average(1.0*(y_pred>=0.5) == y_true) 13 | def r2(y_true,y_pred): 14 | avg = numpy.mean(y_true,0) 15 | var = numpy.sum((y_true-avg)**2,0) 16 | err = numpy.sum((y_true-y_pred)**2,0) 17 | r2=1.0-err/var 18 | #print(r2) 19 | return r2 20 | def normalize(X,sub,div): 21 | return (numpy.copy(X)-sub)/div 22 | 23 | def unpickle(file): 24 | import pickle 25 | with open(file, 'rb') as fo: 26 | dict = pickle.load(fo, encoding='bytes') 27 | return dict 28 | 29 | def load_cifar(shuffle=False): 30 | x_train=numpy.zeros((0,32,32,3)) 31 | y_train=numpy.zeros((0,),dtype=numpy.int) 32 | x_test=numpy.zeros((0,32,32,3)) 33 | y_test=numpy.zeros((0,),dtype=numpy.int) 34 | for i in range(0,5): 35 | dat=unpickle("data/cifar10/data_batch_"+str(i+1)) 36 | print("KEYS: ") 37 | print(dat.keys()) 38 | xdat=numpy.zeros((len(dat[b'data']),32,32,3)) 39 | xdat[:,:,:,0]=dat[b'data'][:,0:1024].reshape(-1,32,32) 40 | xdat[:,:,:,1]=dat[b'data'][:,1024:2048].reshape(-1,32,32) 41 | xdat[:,:,:,2]=dat[b'data'][:,2048:3072].reshape(-1,32,32) 42 | x_train=numpy.concatenate((x_train,xdat),0) 43 | y_train=numpy.concatenate((y_train,dat[b"labels"])) 44 | 45 | dat=unpickle("data/cifar10/test_batch") 46 | xdat=numpy.zeros((len(dat[b'data']),32,32,3)) 47 | xdat[:,:,:,0]=dat[b'data'][:,0:1024].reshape(-1,32,32) 48 | xdat[:,:,:,1]=dat[b'data'][:,1024:2048].reshape(-1,32,32) 49 | xdat[:,:,:,2]=dat[b'data'][:,2048:3072].reshape(-1,32,32) 50 | x_test=numpy.concatenate((x_test,xdat),0) 51 | y_test=numpy.concatenate((y_test,dat[b"labels"])) 52 | 53 | x_train=x_train.astype('float32') 54 | x_test=x_test.astype('float32') 55 | x_train /= 255.0 56 | x_test /= 255.0 57 | 58 | y_train=y_train.astype('int') 59 | y_test=y_test.astype('int') 60 | print(y_train) 61 | y_train = one_hot(y_train, 10) 62 | y_test = one_hot(y_test, 10) 63 | 64 | if shuffle: 65 | #Shuffle data. 66 | tmp=numpy.arange(len(x_train)) 67 | numpy.random.shuffle(tmp) 68 | x_train,y_train=x_train[tmp],y_train[tmp] 69 | 70 | tmp=numpy.arange(len(x_test)) 71 | numpy.random.shuffle(tmp) 72 | x_test,y_test=x_test[tmp],y_test[tmp] 73 | 74 | return [[x_train,y_train],[x_test,y_test]] 75 | def load_mnist(shuffle=False): 76 | 77 | #If error loading files, use this to aquire mnist, if you have keras. 78 | # 79 | #from keras.datasets import mnist 80 | #(x_train, y_train), (x_test, y_test) = mnist.load_data() 81 | #numpy.savez_compressed("data/mnist/mnist_train",a=x_train,b=y_train) 82 | #numpy.savez_compressed("data/mnist/mnist_test",a=x_test,b=y_test) 83 | 84 | tftr,tfte=numpy.load("data/mnist/mnist_train.npz"),numpy.load("data/mnist/mnist_test.npz") 85 | x_train,y_train=tftr['a'],tftr['b'] 86 | x_test,y_test=tfte['a'],tfte['b'] 87 | 88 | x_train=x_train.astype('float32').reshape(-1,28,28,1) 89 | x_test=x_test.astype('float32').reshape(-1,28,28,1) 90 | x_train /= 255.0 91 | x_test /= 255.0 92 | y_train = one_hot(y_train, 10) 93 | y_test = one_hot(y_test, 10) 94 | 95 | if shuffle: 96 | #Shuffle data. 97 | tmp=numpy.arange(len(x_train)) 98 | numpy.random.shuffle(tmp) 99 | x_train,y_train=x_train[tmp],y_train[tmp] 100 | 101 | tmp=numpy.arange(len(x_test)) 102 | numpy.random.shuffle(tmp) 103 | x_test,y_test=x_test[tmp],y_test[tmp] 104 | 105 | return [[x_train,y_train],[x_test,y_test]] 106 | 107 | 108 | def grad_check(model,X,Y,check_n_params=50): 109 | eps=1e-7 110 | 111 | ll=[] 112 | for n in range(0,check_n_params): 113 | model.forward(X,gc=True) 114 | model.backward(Y) 115 | i=numpy.random.randint(len(model.layers)) 116 | while not model.layers[i].trainable: 117 | i=numpy.random.randint(len(model.layers)) 118 | nums=[] 119 | for j in range(0,len(model.layers[i].W.shape)): 120 | nums.append(numpy.random.randint(model.layers[i].W.shape[j])) 121 | nums=tuple(nums) 122 | 123 | bnum=[] 124 | for j in range(0,len(model.layers[i].b.shape)): 125 | bnum.append(numpy.random.randint(model.layers[i].b.shape[j])) 126 | bnum=tuple(bnum) 127 | 128 | dW=model.layers[i].dW.item(nums) 129 | db=model.layers[i].db.item(bnum) 130 | W=numpy.copy(model.layers[i].W) 131 | b=numpy.copy(model.layers[i].b) 132 | 133 | model.layers[i].W.itemset(nums,W.item(nums)+eps) 134 | model.forward(X,gc=True) 135 | model.backward(Y) 136 | jp=model.j 137 | 138 | model.layers[i].W.itemset(nums,W.item(nums)-eps) 139 | model.forward(X,gc=True) 140 | model.backward(Y) 141 | jm=model.j 142 | model.layers[i].W.itemset(nums,W.item(nums)) 143 | 144 | dW2=0.5*(jp-jm)/eps 145 | 146 | model.layers[i].b.itemset(bnum,b.item(bnum)+eps) 147 | model.forward(X,gc=True) 148 | model.backward(Y) 149 | jp=model.j 150 | model.layers[i].b.itemset(bnum,b.item(bnum)-eps) 151 | model.forward(X,gc=True) 152 | model.backward(Y) 153 | jm=model.j 154 | 155 | db2=0.5*(jp-jm)/eps 156 | model.layers[i].b.itemset(bnum,b.item(bnum)) 157 | tmp=[numpy.abs(db2-db),numpy.abs(dW2-dW)] 158 | ll.append(tmp) 159 | #print(ll) 160 | ll=numpy.array(ll) 161 | return numpy.max(ll,0) -------------------------------------------------------------------------------- /ex1_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/ex1_1.png -------------------------------------------------------------------------------- /ex1_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/ex1_2.png -------------------------------------------------------------------------------- /ex2_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/ex2_1.png -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | from dknet import NNRegressor 5 | from dknet.layers import Dense,CovMat,Dropout,Parametrize,Scale 6 | from dknet.optimizers import Adam,SciPyMin,SDProp 7 | from sklearn.gaussian_process import GaussianProcessClassifier,GaussianProcessRegressor 8 | from sklearn.gaussian_process.kernels import RBF,WhiteKernel,ConstantKernel 9 | def f(x): 10 | return (x+0.5>=0)*np.sin(64*(x+0.5)**4)#-1.0*(x>0)+numpy. 11 | 12 | np.random.seed(0) 13 | x_train=np.random.random(size=(70,1))-0.5 14 | y_train=f(x_train)+np.random.normal(0.0,0.01,size=x_train.shape) 15 | 16 | 17 | 18 | layers=[] 19 | #layers.append(Dense(64,activation='tanh')) 20 | #layers.append(Dropout(0.99)) 21 | layers.append(Dense(6,activation='tanh')) 22 | layers.append(Dropout(0.99)) 23 | layers.append(Dense(1)) 24 | layers.append(Scale(fixed=True,init_vals=64.0)) 25 | layers.append(CovMat(kernel='rbf',alpha_fixed=False)) 26 | 27 | opt=Adam(1e-3) 28 | #opt=SciPyMin('l-bfgs-b') 29 | 30 | gp=NNRegressor(layers,opt=opt,batch_size=x_train.shape[0],maxiter=10000,gp=True,verbose=True) 31 | gp.fit(x_train,y_train) 32 | #print(gp.grad_check(x_train[0:10],y_train[0:10])) 33 | x_test=np.linspace(-0.7,0.7,1000).reshape(-1,1) 34 | 35 | 36 | 37 | y_pred,std=gp.predict(x_test) 38 | 39 | 40 | plt.plot(x_test,gp.layers[-2].out) 41 | plt.xlabel('X') 42 | plt.ylabel('Z') 43 | plt.figure() 44 | 45 | plt.plot(x_train,y_train,'.') 46 | plt.plot(x_test,f(x_test)[:,0]) 47 | plt.plot(x_test,y_pred) 48 | plt.xlabel('X') 49 | plt.ylabel('Y') 50 | plt.fill_between(x_test[:,0],y_pred[:,0]-std,y_pred[:,0]+std,alpha=0.5) 51 | 52 | plt.legend(['Training samples', 'True function', 'Predicted function','Prediction stddev']) 53 | plt.show() 54 | -------------------------------------------------------------------------------- /example_mnist.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from dknet import NNRegressor 4 | from dknet.layers import Dense,Conv2D,MaxPool2D,Flatten,Dropout,CovMat,Scale 5 | from dknet.optimizers import Adam,SciPyMin,SDProp, Adam2 6 | from dknet.utils import load_mnist 7 | 8 | from sklearn.gaussian_process import GaussianProcessClassifier,GaussianProcessRegressor 9 | from sklearn.gaussian_process.kernels import RBF,WhiteKernel,ConstantKernel 10 | 11 | (x_train,y_train),(x_test,y_test)=load_mnist(shuffle=True) 12 | x_train=x_train.reshape(-1,28*28) 13 | x_test=x_test.reshape(-1,28*28) 14 | 15 | y_test=np.argmax(y_test,1).reshape(-1,1) 16 | y_train=np.argmax(y_train,1).reshape(-1,1) 17 | 18 | layers=[] 19 | layers.append(Dense(64,activation='tanh')) 20 | layers.append(Dense(64,activation='tanh')) 21 | layers.append(Dense(20)) 22 | layers.append(CovMat(alpha=0.3,var=1.0,kernel='rbf')) 23 | opt=SciPyMin('l-bfgs-b') 24 | n_train = 3000 25 | n_test = 10000 26 | 27 | 28 | opt=Adam(1e-3) 29 | batch_size=500 30 | gp=NNRegressor(layers,opt=opt,batch_size=batch_size,maxiter=500,gp=True,verbose=True) 31 | gp.fit(x_train,y_train) 32 | 33 | 34 | 35 | #Can extract mapping z(x) and hyperparams for use in other learning algorithm 36 | alph=gp.layers[-1].s_alpha 37 | var=gp.layers[-1].var 38 | 39 | A_full=gp.fast_forward(x_train) 40 | 41 | 42 | kernel=ConstantKernel(var)*RBF(np.ones(1))+WhiteKernel(alph) 43 | 44 | 45 | 46 | A_test=gp.fast_forward(x_test[0:n_test]) 47 | yp=np.zeros(n_test) 48 | std=np.zeros(n_test) 49 | gp1=GaussianProcessRegressor(kernel,optimizer=None) 50 | gp1.fit(A_full[0:500],y_train[0:500]) 51 | mu,stdt=gp1.predict(A_test,return_std=True) 52 | 53 | print("GP Regression:") 54 | print( np.sqrt( np.mean( (np.rint(mu)-y_test)**2 ) ) ) 55 | 56 | -------------------------------------------------------------------------------- /loo-loss/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/maka89/Deep-Kernel-GP/cff31dd419c9e2da999afbe16320fd7f62c5f0ef/loo-loss/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /loo-loss/kfold-loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable,gradcheck 3 | import numpy as np 4 | from scipy.linalg import cholesky,cho_solve,solve_triangular 5 | from utils import load_mnist 6 | from scipy.sparse.linalg import bicgstab,bicg 7 | from scipy.sparse import csr_matrix,vstack,lil_matrix 8 | import time 9 | np.random.seed(0) 10 | n_folds=6 11 | 12 | (xnp,ynp),(x_test,y_test) = load_mnist() 13 | xnp=xnp.reshape(-1,28*28).astype(np.float64) 14 | 15 | ynp=ynp.astype(np.float64) 16 | ynp=np.argmax(ynp,1).reshape(-1,1).astype(np.float64) 17 | x_test=x_test.reshape(-1,28*28).astype(np.float64) 18 | y_test=np.argmax(y_test,1).reshape(-1,1).astype(np.float64) 19 | y_test=y_test.astype(np.float64) 20 | 21 | 22 | x_test=Variable(torch.from_numpy(x_test),requires_grad=False) 23 | y_test=Variable(torch.from_numpy(y_test),requires_grad=False) 24 | 25 | nn=5000 26 | x,y,alphas=[],[],[] 27 | for i in range(0,n_folds): 28 | x.append(Variable(torch.from_numpy(xnp[i*nn:(i+1)*nn]),requires_grad=False) ) 29 | y.append(Variable(torch.from_numpy(ynp[i*nn:(i+1)*nn]),requires_grad=False) ) 30 | alphas.append(np.zeros((nn*(n_folds-1),ynp.shape[1]))) 31 | 32 | #Learnable parameters 33 | lamb = 3.5e-1 34 | 35 | 36 | hidden_size=64 37 | n_out=10 38 | w1,b1 = Variable(torch.from_numpy(np.random.normal(0.0,1.0,size=(xnp.shape[1],hidden_size))/np.sqrt(xnp.shape[1])),requires_grad=True),Variable(torch.from_numpy(np.zeros((1,hidden_size))),requires_grad=True) 39 | w2,b2 = Variable(torch.from_numpy(np.random.normal(0.0,1.0,size=(hidden_size,hidden_size))/np.sqrt(hidden_size)),requires_grad=True),Variable(torch.from_numpy(np.zeros((1,hidden_size))),requires_grad=True) 40 | w3,b3 = Variable(torch.from_numpy(np.random.normal(0.0,1.0,size=(hidden_size,n_out))/np.sqrt(hidden_size)),requires_grad=True),Variable(torch.from_numpy(np.zeros((1,n_out))),requires_grad=True) 41 | 42 | def neural_net(x): 43 | h=torch.tanh(torch.mm(x,w1)+b1) 44 | h=torch.tanh(torch.mm(h,w2)+b2) 45 | z=torch.mm(h,w3)+b3 46 | return z 47 | 48 | def pred(z_test,z_train,alpha): 49 | ztestnp=z_test.data.numpy() 50 | ztrainnp=z_train.data.numpy() 51 | pred=np.zeros(z_test.shape[0]) 52 | for i in range(0,ztestnp.shape[0]): 53 | tmp=np.exp(-0.5*np.sum((ztestnp[i]-ztrainnp)**2,1) ) 54 | pred[i] = np.dot(tmp.reshape(1,-1),alpha.reshape(-1,1)) 55 | print(pred.shape) 56 | return pred 57 | def kernel_np(z): 58 | tmp=[] 59 | for i in range(0,z.shape[1]): 60 | dists=z[:,[i]].T-z[:,[i]] 61 | tmp.append(dists.reshape(dists.shape[0],dists.shape[1],1)) 62 | dists=np.concatenate(tmp,2) 63 | return np.exp(-0.5*np.sum(dists**2,2)) 64 | 65 | def kernel_sparse(z): 66 | K=np.zeros((z.shape[0],z.shape[0])) 67 | for i in range(0,z.shape[0]): 68 | K[i,:]=np.exp(-0.5*np.sum((z[i]-z)**2,1)) 69 | K[i,i]+=lamb 70 | return K 71 | 72 | 73 | def kernel(z): 74 | tmp=[] 75 | for i in range(0,z.size()[1]): 76 | dists=torch.t(z[:,[i]])-z[:,[i]] 77 | tmp.append(dists.view(dists.size()[0],dists.size()[1],1)) 78 | dists=torch.cat(tmp,2) 79 | return torch.exp(-0.5*torch.sum(dists**2,2)) 80 | 81 | def kernel2(z1,z2): 82 | tmp=[] 83 | for i in range(0,z1.size()[1]): 84 | dists=z1[:,[i]]-torch.t(z2[:,[i]]) 85 | tmp.append(dists.view(dists.size()[0],dists.size()[1],1)) 86 | dists=torch.cat(tmp,2) 87 | return torch.exp(-0.5*torch.sum(dists**2,2)) 88 | 89 | 90 | t1=0.0 91 | t2=0.0 92 | def loss_fn(x,y): 93 | global t1,t2 94 | la2=[] 95 | for i in range(0,len(x)): 96 | 97 | x_val,y_val = x[i],y[i] 98 | 99 | idx=np.delete(np.arange(len(x)),i) 100 | x_train,y_train = x[idx[0]],y[idx[0]] 101 | for j in range(1,len(idx)): 102 | x_train=torch.cat([x_train,x[j]],0) 103 | y_train=torch.cat([y_train,y[j]],0) 104 | 105 | z_train=neural_net(x_train).detach() 106 | 107 | z_val=neural_net(x_val) 108 | 109 | tic=time.time() 110 | Knp=kernel_sparse(z_train.data.numpy()) 111 | t1+=time.time()-tic 112 | #Knp=kernel_np(z_train.data.numpy())+np.identity(z_train.size()[0])*lamb 113 | #Knp=(Knp>=1e-3)*Knp 114 | #Knp=csr_matrix(Knp) 115 | tic=time.time() 116 | la=[] 117 | for j in range(0,y_train.size()[1]): 118 | #print(i,j,y_train.shape,alphas[i][:,j].shape,Knp.shape) 119 | alphas[i][:,j],info=bicgstab(Knp,y_train[:,j].data.numpy(),x0=alphas[i][:,j],tol=1e-6) 120 | 121 | K2 = kernel2(z_val,z_train) 122 | y_pred= torch.mm(K2, Variable(torch.from_numpy(alphas[i][:,j].reshape(-1,1)),requires_grad=False).detach()) 123 | 124 | loss = torch.mean((y_val-y_pred)**2) 125 | 126 | la.append(loss) 127 | fl=torch.mean(torch.cat(la)) 128 | la2.append(fl) 129 | t2+=time.time()-tic 130 | final_loss=torch.mean(torch.cat(la2)) 131 | 132 | return final_loss 133 | 134 | 135 | 136 | 137 | learning_rate=1e-3 138 | 139 | dw12,db12=0.0,0.0 140 | dw22,db22=0.0,0.0 141 | dw32,db32=0.0,0.0 142 | dl2 = 0.0 143 | 144 | dw11,db11=0.0,0.0 145 | dw21,db21=0.0,0.0 146 | dw31,db31=0.0,0.0 147 | dl1 = 0.0 148 | 149 | beta_1=0.0 150 | beta_2=0.99 151 | 152 | for t in range(0,10000): 153 | 154 | 155 | 156 | loss=loss_fn(x,y) 157 | print(t,loss.data.numpy(),lamb,t1,t2) 158 | if t >0: 159 | #lamb.grad.data.zero_() 160 | w1.grad.data.zero_() 161 | w2.grad.data.zero_() 162 | w3.grad.data.zero_() 163 | b1.grad.data.zero_() 164 | b2.grad.data.zero_() 165 | b3.grad.data.zero_() 166 | loss.backward() 167 | 168 | 169 | dw12=beta_2*dw12 + (1.0-beta_2)*w1.grad.data**2 170 | db12=beta_2*db12 + (1.0-beta_2)*b1.grad.data**2 171 | dw22=beta_2*dw22 + (1.0-beta_2)*w2.grad.data**2 172 | db22=beta_2*db22 + (1.0-beta_2)*b2.grad.data**2 173 | dw32=beta_2*dw32 + (1.0-beta_2)*w3.grad.data**2 174 | db32=beta_2*db32 + (1.0-beta_2)*b3.grad.data**2 175 | #dl2=beta_2*dl2 + (1.0-beta_2)*lamb.grad.data**2 176 | 177 | dw11=beta_1*dw11 + (1.0-beta_1)*w1.grad.data 178 | db11=beta_1*db11 + (1.0-beta_1)*b1.grad.data 179 | dw21=beta_1*dw21 + (1.0-beta_1)*w2.grad.data 180 | db21=beta_1*db21 + (1.0-beta_1)*b2.grad.data 181 | dw31=beta_1*dw31 + (1.0-beta_1)*w3.grad.data 182 | db31=beta_1*db31 + (1.0-beta_1)*b3.grad.data 183 | #dl1=beta_1*dl1 + (1.0-beta_1)*lamb.grad.data 184 | 185 | #lamb.data = lamb.data -learning_rate*dl1/(np.sqrt(dl2)+1e-8) 186 | w1.data = w1.data - learning_rate*dw11/(np.sqrt(dw12)+1e-8) 187 | b1.data = b1.data - learning_rate*db11/(np.sqrt(db12)+1e-8) 188 | w2.data = w2.data - learning_rate*dw21/(np.sqrt(dw22)+1e-8) 189 | b2.data = b2.data - learning_rate*db21/(np.sqrt(db22)+1e-8) 190 | w3.data = w3.data - learning_rate*dw31/(np.sqrt(dw32)+1e-8) 191 | b3.data = b3.data - learning_rate*db31/(np.sqrt(db32)+1e-8) 192 | 193 | #lamb.data = lamb.data -learning_rate*lamb.grad.data 194 | #w1.data = w1.data - learning_rate*w1.grad.data 195 | #b1.data = b1.data - learning_rate*b1.grad.data 196 | #w2.data = w2.data - learning_rate*w2.grad.data 197 | #b2.data = b2.data - learning_rate*b2.grad.data 198 | ##w3.data = w3.data - learning_rate*w3.grad.data 199 | #b3.data = b3.data - learning_rate*b3.grad.data 200 | if t % 10==0: 201 | final_pred=np.zeros_like(y_test.data.numpy()) 202 | la2=[] 203 | for l in range(0,len(x)): 204 | 205 | x_val,y_val = x[l],y[l] 206 | 207 | idx=np.delete(np.arange(len(x)),l) 208 | x_train,y_train = x[idx[0]],y[idx[0]] 209 | for j in range(1,len(idx)): 210 | x_train=torch.cat([x_train,x[j]],0) 211 | y_train=torch.cat([y_train,y[j]],0) 212 | 213 | z_train=neural_net(x_train).detach() 214 | z_test=neural_net(x_test) 215 | 216 | la=[] 217 | for j in range(0,y_train.size()[1]): 218 | y_pred= pred(z_test,z_train,alphas[l][:,j]) 219 | loss = np.mean((y_test.data.numpy()-y_pred)**2) 220 | final_pred[:,j]+=y_pred 221 | 222 | la.append(loss) 223 | 224 | fl=np.mean(np.array(la)) 225 | la2.append(fl) 226 | final_pred/=n_folds 227 | final_loss=np.mean(np.array(la2)) 228 | print(np.sqrt(final_loss)) 229 | print(np.average(np.rint(final_pred)==y_test.data.numpy())) 230 | 231 | -------------------------------------------------------------------------------- /loo-loss/loo-loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable,gradcheck 3 | import numpy as np 4 | from scipy.linalg import cholesky,cho_solve,solve_triangular 5 | from utils import load_mnist 6 | 7 | class KernelInverse(torch.autograd.Function): 8 | 9 | def forward(self,input): 10 | 11 | K=input.numpy() 12 | L=cholesky(K,lower=True) 13 | L_inv=solve_triangular(L.T,np.eye(L.shape[0])) 14 | K_inv=L_inv.dot(L_inv.T) 15 | 16 | Kinv=torch.from_numpy(K_inv) 17 | self.save_for_backward(Kinv) 18 | return Kinv 19 | def backward(self,grad_output): 20 | Kinv, = self.saved_tensors 21 | return -torch.mm(Kinv,torch.mm(grad_output,Kinv)) 22 | 23 | np.random.seed(0) 24 | 25 | 26 | 27 | #xnp=np.random.random((15,1))-0.5 28 | #ynp=np.sin(10.0*xnp) 29 | #ynp=ynp+np.random.normal(0.0,0.15,size=ynp.shape) 30 | 31 | 32 | (xnp,ynp),(x_test,y_test) = load_mnist() 33 | xnp=xnp.reshape(-1,28*28).astype(np.float64) 34 | 35 | ynp=ynp.astype(np.float64) 36 | ynp=np.argmax(ynp,1).reshape(-1,1).astype(np.float64) 37 | x_test=x_test.reshape(-1,28*28).astype(np.float64) 38 | y_test=np.argmax(y_test,1).reshape(-1,1).astype(np.float64) 39 | y_test=y_test.astype(np.float64) 40 | 41 | #Learnable parameters 42 | lamb = Variable(torch.from_numpy(np.ones((1,1)))*-0.5, requires_grad=True) 43 | hidden_size=64 44 | n_out=50 45 | w1,b1 = Variable(torch.from_numpy(np.random.normal(0.0,1.0,size=(xnp.shape[1],hidden_size))/np.sqrt(xnp.shape[1])),requires_grad=True),Variable(torch.from_numpy(np.zeros((1,hidden_size))),requires_grad=True) 46 | w2,b2 = Variable(torch.from_numpy(np.random.normal(0.0,1.0,size=(hidden_size,hidden_size))/np.sqrt(hidden_size)),requires_grad=True),Variable(torch.from_numpy(np.zeros((1,hidden_size))),requires_grad=True) 47 | 48 | 49 | w3,b3 = Variable(torch.from_numpy(np.random.normal(0.0,1.0,size=(hidden_size,n_out))/np.sqrt(hidden_size)),requires_grad=True),Variable(torch.from_numpy(np.zeros((1,n_out))),requires_grad=True) 50 | 51 | 52 | 53 | 54 | def kernel(x): 55 | h=torch.tanh(torch.mm(x,w1)+b1) 56 | h=torch.tanh(torch.mm(h,w2)+b2) 57 | z=torch.mm(h,w3)+b3 58 | #z=10.0*z 59 | #z=x 60 | tmp=[] 61 | for i in range(0,z.size()[1]): 62 | dists=torch.t(z[:,[i]])-z[:,[i]] 63 | tmp.append(dists.view(dists.size()[0],dists.size()[1],1)) 64 | dists=torch.cat(tmp,2) 65 | return torch.exp(-0.5*torch.sum(dists**2,2))#torch.mm(torch.t(x),x) 66 | 67 | def hat_matrix(x,lamb): 68 | K=kernel(x) 69 | K2=K+Variable(torch.from_numpy(np.eye(K.size()[0])),requires_grad=True)*(1.0/(torch.exp(-lamb[0])+1.0)+1e-8) 70 | Kinv=KernelInverse()(K2) 71 | hat=torch.mm(K,Kinv) 72 | return hat 73 | 74 | def loss_fn(hat,y): 75 | la=[] 76 | for i in range(0,y.size()[1]): 77 | matt= Variable(torch.from_numpy(np.eye(hat.size()[0])),requires_grad=True) - hat 78 | fac1=torch.mm(y[:,i].contiguous().view(1,y.size()[0]),matt) 79 | fac2=torch.mm(matt,y[:,i].contiguous().view(y.size()[0],1)) 80 | 81 | diagg = torch.diag(1.0/torch.diag(matt)) 82 | fac1=torch.mm(fac1,diagg) 83 | fac2=torch.mm(diagg,fac2) 84 | 85 | loss=torch.mm(fac1,fac2)/y.size()[0] 86 | la.append(loss) 87 | fl=torch.mean(torch.cat(la)) 88 | return fl 89 | 90 | 91 | def f(xx): 92 | #xx=Variable(torch.from_numpy(np.random.random((5,1))-0.5), requires_grad=True) 93 | #y=Variable(torch.from_numpy(3.14*x.data.numpy()), requires_grad=False) 94 | 95 | #print(lamb) 96 | #lamb = Variable(torch.from_numpy(np.ones(1)*1e-2), requires_grad=False) 97 | #print(y.size()) 98 | return loss_fn(hat_matrix(xx,lamb),y) 99 | 100 | 101 | 102 | 103 | learning_rate=1e-3 104 | 105 | dw12,db12=0.0,0.0 106 | dw22,db22=0.0,0.0 107 | dw32,db32=0.0,0.0 108 | dl2 = 0.0 109 | 110 | dw11,db11=0.0,0.0 111 | dw21,db21=0.0,0.0 112 | dw31,db31=0.0,0.0 113 | dl1 = 0.0 114 | 115 | beta_1=0.0 116 | beta_2=0.99 117 | batch_size=500 118 | m=int(xnp.shape[0]/batch_size) 119 | for t in range(0,10000): 120 | 121 | 122 | for j in range(0,m): 123 | 124 | x=Variable(torch.from_numpy(xnp[j*batch_size:(j+1)*batch_size]), requires_grad=True) 125 | y=Variable(torch.from_numpy(ynp[j*batch_size:(j+1)*batch_size]), requires_grad=False) 126 | #print(gradcheck(f,[x])) 127 | #print(x.size()) 128 | 129 | loss=loss_fn(hat_matrix(x,lamb),y) 130 | print(t,loss.data.numpy(),1.0/(np.exp(-lamb.data.numpy()) + 1.0)) 131 | if t >0: 132 | lamb.grad.data.zero_() 133 | w1.grad.data.zero_() 134 | w2.grad.data.zero_() 135 | w3.grad.data.zero_() 136 | b1.grad.data.zero_() 137 | b2.grad.data.zero_() 138 | b3.grad.data.zero_() 139 | loss.backward() 140 | 141 | 142 | dw12=beta_2*dw12 + (1.0-beta_2)*w1.grad.data**2 143 | db12=beta_2*db12 + (1.0-beta_2)*b1.grad.data**2 144 | dw22=beta_2*dw22 + (1.0-beta_2)*w2.grad.data**2 145 | db22=beta_2*db22 + (1.0-beta_2)*b2.grad.data**2 146 | dw32=beta_2*dw32 + (1.0-beta_2)*w3.grad.data**2 147 | db32=beta_2*db32 + (1.0-beta_2)*b3.grad.data**2 148 | dl2=beta_2*dl2 + (1.0-beta_2)*lamb.grad.data**2 149 | 150 | dw11=beta_1*dw11 + (1.0-beta_1)*w1.grad.data 151 | db11=beta_1*db11 + (1.0-beta_1)*b1.grad.data 152 | dw21=beta_1*dw21 + (1.0-beta_1)*w2.grad.data 153 | db21=beta_1*db21 + (1.0-beta_1)*b2.grad.data 154 | dw31=beta_1*dw31 + (1.0-beta_1)*w3.grad.data 155 | db31=beta_1*db31 + (1.0-beta_1)*b3.grad.data 156 | dl1=beta_1*dl1 + (1.0-beta_1)*lamb.grad.data 157 | 158 | lamb.data = lamb.data -learning_rate*dl1/(np.sqrt(dl2)+1e-8) 159 | w1.data = w1.data - learning_rate*dw11/(np.sqrt(dw12)+1e-8) 160 | b1.data = b1.data - learning_rate*db11/(np.sqrt(db12)+1e-8) 161 | w2.data = w2.data - learning_rate*dw21/(np.sqrt(dw22)+1e-8) 162 | b2.data = b2.data - learning_rate*db21/(np.sqrt(db22)+1e-8) 163 | w3.data = w3.data - learning_rate*dw31/(np.sqrt(dw32)+1e-8) 164 | b3.data = b3.data - learning_rate*db31/(np.sqrt(db32)+1e-8) 165 | 166 | #lamb.data = lamb.data -learning_rate*lamb.grad.data 167 | #w1.data = w1.data - learning_rate*w1.grad.data 168 | #b1.data = b1.data - learning_rate*b1.grad.data 169 | #w2.data = w2.data - learning_rate*w2.grad.data 170 | #b2.data = b2.data - learning_rate*b2.grad.data 171 | ##w3.data = w3.data - learning_rate*w3.grad.data 172 | #b3.data = b3.data - learning_rate*b3.grad.data 173 | 174 | if j % 10 == 0: 175 | nug=lamb.data.numpy() 176 | K1=np.zeros((x_test.shape[0],xnp[j*batch_size:(j+1)*batch_size].shape[0])) 177 | K2=np.zeros((xnp[j*batch_size:(j+1)*batch_size].shape[0],xnp[j*batch_size:(j+1)*batch_size].shape[0])) 178 | 179 | 180 | 181 | z_test=np.tanh(np.dot(x_test,w1.data.numpy())+b1.data.numpy()) 182 | z_test=np.tanh(np.dot(z_test,w2.data.numpy())+b2.data.numpy()) 183 | z_test=np.dot(z_test,w3.data.numpy())+b3.data.numpy() 184 | #z_test=10.0*z_test 185 | 186 | znp=np.tanh(np.dot(xnp[j*batch_size:(j+1)*batch_size],w1.data.numpy())+b1.data.numpy()) 187 | znp=np.tanh(np.dot(znp,w2.data.numpy())+b2.data.numpy()) 188 | znp=np.dot(znp,w3.data.numpy())+b3.data.numpy() 189 | #znp=10.0*znp 190 | 191 | for k in range(0,xnp[j*batch_size:(j+1)*batch_size].shape[0]): 192 | K1[:,k]=np.exp(-0.5*np.sum((z_test-znp[[k],:])**2,1)) 193 | K2[:,k]=np.exp(-0.5*np.sum((znp-znp[[k],:])**2,1)) 194 | K2[k,k]+=(1.0/(np.exp(-nug)+1.0)+1e-8) 195 | L=cholesky(K2,lower=True) 196 | L_inv=solve_triangular(L.T,np.eye(L.shape[0])) 197 | K_inv=L_inv.dot(L_inv.T) 198 | 199 | yp=np.dot(K1,np.dot(K_inv,ynp[j*batch_size:(j+1)*batch_size])) 200 | yp2=np.rint(yp) 201 | print(np.average(yp2==y_test), np.sqrt(np.mean((yp-y_test)**2)) ) 202 | 203 | #print(np.average(np.argmax(yp,1)==np.argmax(y_test,1))) 204 | #print(yp) 205 | #print(y_test) 206 | 207 | 208 | -------------------------------------------------------------------------------- /loo-loss/utils.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | def one_hot(x,n_classes): 4 | assert(len(x.shape)==1) 5 | A=numpy.zeros((x.shape[0],n_classes)) 6 | A[numpy.arange(len(x)),x]=1.0 7 | return A 8 | def calc_acc(y_true,y_pred): 9 | if y_true.shape[1] > 1: 10 | return numpy.average(numpy.argmax(y_true,1)==numpy.argmax(y_pred,1)) 11 | else: 12 | return numpy.average(1.0*(y_pred>=0.5) == y_true) 13 | def r2(y_true,y_pred): 14 | avg = numpy.mean(y_true,0) 15 | var = numpy.sum((y_true-avg)**2,0) 16 | err = numpy.sum((y_true-y_pred)**2,0) 17 | r2=1.0-err/var 18 | #print(r2) 19 | return r2 20 | def normalize(X,sub,div): 21 | return (numpy.copy(X)-sub)/div 22 | 23 | def unpickle(file): 24 | import pickle 25 | with open(file, 'rb') as fo: 26 | dict = pickle.load(fo, encoding='bytes') 27 | return dict 28 | 29 | def load_cifar(shuffle=False): 30 | x_train=numpy.zeros((0,32,32,3)) 31 | y_train=numpy.zeros((0,),dtype=numpy.int) 32 | x_test=numpy.zeros((0,32,32,3)) 33 | y_test=numpy.zeros((0,),dtype=numpy.int) 34 | for i in range(0,5): 35 | dat=unpickle("data/cifar10/data_batch_"+str(i+1)) 36 | print("KEYS: ") 37 | print(dat.keys()) 38 | xdat=numpy.zeros((len(dat[b'data']),32,32,3)) 39 | xdat[:,:,:,0]=dat[b'data'][:,0:1024].reshape(-1,32,32) 40 | xdat[:,:,:,1]=dat[b'data'][:,1024:2048].reshape(-1,32,32) 41 | xdat[:,:,:,2]=dat[b'data'][:,2048:3072].reshape(-1,32,32) 42 | x_train=numpy.concatenate((x_train,xdat),0) 43 | y_train=numpy.concatenate((y_train,dat[b"labels"])) 44 | 45 | dat=unpickle("data/cifar10/test_batch") 46 | xdat=numpy.zeros((len(dat[b'data']),32,32,3)) 47 | xdat[:,:,:,0]=dat[b'data'][:,0:1024].reshape(-1,32,32) 48 | xdat[:,:,:,1]=dat[b'data'][:,1024:2048].reshape(-1,32,32) 49 | xdat[:,:,:,2]=dat[b'data'][:,2048:3072].reshape(-1,32,32) 50 | x_test=numpy.concatenate((x_test,xdat),0) 51 | y_test=numpy.concatenate((y_test,dat[b"labels"])) 52 | 53 | x_train=x_train.astype('float32') 54 | x_test=x_test.astype('float32') 55 | x_train /= 255.0 56 | x_test /= 255.0 57 | 58 | y_train=y_train.astype('int') 59 | y_test=y_test.astype('int') 60 | print(y_train) 61 | y_train = one_hot(y_train, 10) 62 | y_test = one_hot(y_test, 10) 63 | 64 | if shuffle: 65 | #Shuffle data. 66 | tmp=numpy.arange(len(x_train)) 67 | numpy.random.shuffle(tmp) 68 | x_train,y_train=x_train[tmp],y_train[tmp] 69 | 70 | tmp=numpy.arange(len(x_test)) 71 | numpy.random.shuffle(tmp) 72 | x_test,y_test=x_test[tmp],y_test[tmp] 73 | 74 | return [[x_train,y_train],[x_test,y_test]] 75 | def load_mnist(shuffle=False): 76 | 77 | #If error loading files, use this to aquire mnist, if you have keras. 78 | # 79 | #from keras.datasets import mnist 80 | #(x_train, y_train), (x_test, y_test) = mnist.load_data() 81 | #numpy.savez_compressed("data/mnist/mnist_train",a=x_train,b=y_train) 82 | #numpy.savez_compressed("data/mnist/mnist_test",a=x_test,b=y_test) 83 | 84 | tftr,tfte=numpy.load("../data/mnist/mnist_train.npz"),numpy.load("../data/mnist/mnist_test.npz") 85 | x_train,y_train=tftr['a'],tftr['b'] 86 | x_test,y_test=tfte['a'],tfte['b'] 87 | 88 | x_train=x_train.astype('float32').reshape(-1,28,28,1) 89 | x_test=x_test.astype('float32').reshape(-1,28,28,1) 90 | x_train /= 255.0 91 | x_test /= 255.0 92 | y_train = one_hot(y_train, 10) 93 | y_test = one_hot(y_test, 10) 94 | 95 | if shuffle: 96 | #Shuffle data. 97 | tmp=numpy.arange(len(x_train)) 98 | numpy.random.shuffle(tmp) 99 | x_train,y_train=x_train[tmp],y_train[tmp] 100 | 101 | tmp=numpy.arange(len(x_test)) 102 | numpy.random.shuffle(tmp) 103 | x_test,y_test=x_test[tmp],y_test[tmp] 104 | 105 | return [[x_train,y_train],[x_test,y_test]] 106 | 107 | 108 | def grad_check(model,X,Y,check_n_params=50): 109 | eps=1e-7 110 | 111 | ll=[] 112 | for n in range(0,check_n_params): 113 | model.forward(X,gc=True) 114 | model.backward(Y) 115 | i=numpy.random.randint(len(model.layers)) 116 | while not model.layers[i].trainable: 117 | i=numpy.random.randint(len(model.layers)) 118 | nums=[] 119 | for j in range(0,len(model.layers[i].W.shape)): 120 | nums.append(numpy.random.randint(model.layers[i].W.shape[j])) 121 | nums=tuple(nums) 122 | 123 | bnum=[] 124 | for j in range(0,len(model.layers[i].b.shape)): 125 | bnum.append(numpy.random.randint(model.layers[i].b.shape[j])) 126 | bnum=tuple(bnum) 127 | 128 | dW=model.layers[i].dW.item(nums) 129 | db=model.layers[i].db.item(bnum) 130 | W=numpy.copy(model.layers[i].W) 131 | b=numpy.copy(model.layers[i].b) 132 | 133 | model.layers[i].W.itemset(nums,W.item(nums)+eps) 134 | model.forward(X,gc=True) 135 | model.backward(Y) 136 | jp=model.j 137 | 138 | model.layers[i].W.itemset(nums,W.item(nums)-eps) 139 | model.forward(X,gc=True) 140 | model.backward(Y) 141 | jm=model.j 142 | model.layers[i].W.itemset(nums,W.item(nums)) 143 | 144 | dW2=0.5*(jp-jm)/eps 145 | 146 | model.layers[i].b.itemset(bnum,b.item(bnum)+eps) 147 | model.forward(X,gc=True) 148 | model.backward(Y) 149 | jp=model.j 150 | model.layers[i].b.itemset(bnum,b.item(bnum)-eps) 151 | model.forward(X,gc=True) 152 | model.backward(Y) 153 | jm=model.j 154 | 155 | db2=0.5*(jp-jm)/eps 156 | model.layers[i].b.itemset(bnum,b.item(bnum)) 157 | tmp=[numpy.abs(db2-db),numpy.abs(dW2-dW)] 158 | ll.append(tmp) 159 | #print(ll) 160 | ll=numpy.array(ll) 161 | return numpy.max(ll,0) 162 | --------------------------------------------------------------------------------