├── .gitignore ├── README.md ├── examples ├── data.py ├── linreg_example.py ├── mnist_logreg.py ├── mnist_nnet.py ├── nnet_example.py └── poisson_glm.py ├── kayak ├── __init__.py ├── batcher.py ├── convolution.py ├── crossval.py ├── differentiable.py ├── dropout.py ├── elem_ops.py ├── generic_ops.py ├── indexing.py ├── input_checking.py ├── losses.py ├── matrix_ops.py ├── nonlinearities.py ├── regularizers.py ├── root_nodes.py ├── stacking.py └── util.py ├── license.txt ├── setup.py └── tests ├── __init__.py ├── check_MemoryUse.py ├── test_BatchNormalize.py ├── test_Batcher.py ├── test_CacheFreshness.py ├── test_Constant.py ├── test_Convolve1d.py ├── test_Dropout.py ├── test_ElemAbs.py ├── test_ElemExp.py ├── test_ElemMult.py ├── test_ElemPower.py ├── test_Graphs.py ├── test_HardReLU.py ├── test_Horseshoe.py ├── test_Identity.py ├── test_Indexing.py ├── test_Inputs.py ├── test_L1Norm.py ├── test_L2Loss.py ├── test_L2Norm.py ├── test_LogMultinomialLoss.py ├── test_LogSoftMax.py ├── test_Logistic.py ├── test_MatAdd.py ├── test_MatConcat.py ├── test_MatDet.py ├── test_MatMean.py ├── test_MatMult.py ├── test_MatSum.py ├── test_NExp.py ├── test_OperatorAdd.py ├── test_OperatorMult.py ├── test_OperatorNeg.py ├── test_Parameter.py ├── test_Reshape.py ├── test_SoftMax.py ├── test_SoftReLU.py ├── test_Stacking.py ├── test_TanH.py ├── test_Targets.py ├── test_TensorMult.py └── test_Transpose.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.gz 2 | *.pyc 3 | *.pyo 4 | .DS_Store 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Don't use this: use [Autograd](http://github.com/hips/autograd) instead! 2 | ======================================= 3 | 4 | Kayak: Library for Deep Neural Networks 5 | ======================================= 6 | 7 | This is a library that implements some useful modules and provides 8 | automatic differentiation utilities for learning deep neural networks. 9 | It is similar in spirit to tools like 10 | [Theano](http://deeplearning.net/software/theano/) and 11 | [Torch](http://torch.ch/). The objective of Kayak is to be simple to 12 | use and extend, for rapid prototyping in Python. It is unlikely to be 13 | faster than these other tools, although it is competitive and 14 | sometimes faster in performance when the architectures are highly 15 | complex. It will certainly not be faster on convolutional 16 | architectures for visual object detection and recognition tasks than, 17 | e.g., [Alex Krizhevsky's CUDA 18 | Convnet](https://code.google.com/p/cuda-convnet2/) or 19 | [Caffe](http://caffe.berkeleyvision.org/). The point of Kayak is to 20 | be able to experiment in Python with patterns that look a lot like 21 | what you're already used to with Numpy. It makes it easy to manage 22 | batches of data and compute gradients with backpropagation. 23 | 24 | There are some examples in the 'examples' directory, but the main idea 25 | looks like this: 26 | 27 | import kayak 28 | import numpy.random as npr 29 | 30 | X = ... your feature matrix ... 31 | Y = ... your label matrix ... 32 | 33 | # Create Kayak objects for features and labels. 34 | inputs = kayak.Inputs(X) 35 | targets = kayak.Targets(Y) 36 | 37 | # Create Kayak objects first-layer weights and biases. Initialize 38 | # them with random Numpy matrices. 39 | weights_1 = kayak.Parameter(npr.randn( input_dims, hidsize_1 )) 40 | biases_1 = kayak.Parameter(npr.randn( 1, hidsize_1 )) 41 | 42 | # Create Kayak objects that implement a network layer. First, 43 | # multiply the features by weights and add biases. 44 | hiddens_1a = kayak.ElemAdd(kayak.MatMult( inputs, weights_1 ), biases_1) 45 | 46 | # Then, apply a "relu" (rectified linear) nonlinearity. 47 | # Alternatively, you can apply your own favorite nonlinearity, or 48 | # add one for an idea that you want to try out. 49 | hiddens_1b = kayak.HardReLU(hiddens_1a) 50 | 51 | # Now, apply a "dropout" layer to prevent co-adaptation. Got a 52 | # new idea for dropout? It's super easy to extend Kayak with it. 53 | hiddens_1 = kayak.Dropout(hiddens_1b, drop_prob=0.5) 54 | 55 | # Okay, with that layer constructed, let's make another one the 56 | # same way: linear transformation + bias with ReLU and dropout. 57 | # First, create the second-layer parameters. 58 | weights_2 = kayak.Parameter(npr.randn(hidsize_1, hidsize_2)) 59 | biases_2 = kayak.Parameter(npr.randn(1, hidsize_2)) 60 | 61 | # This time, let's compose all the steps, just to show we can. 62 | hiddens_2 = kayak.Dropout( kayak.HardReLU( kayak.ElemAdd( \ 63 | kayak.MatMult( hiddens_1, weights_2), biases_2)), drop_prob=0.5) 64 | 65 | # Make the output layer linear. 66 | weights_out = kayak.Parameter(npr.randn(hidsize_2, 1)) 67 | biases_out = kayak.Parameter(npr.randn()) 68 | out = kayak.ElemAdd( kayak.MatMult( hiddens_2, weights_out), biases_out) 69 | 70 | # Apply a loss function. In this case, we'll just do squared loss. 71 | loss = kayak.MatSum( kayak.L2Loss( out, targets )) 72 | 73 | # Maybe roll in an L1 norm for the first layer and an L2 norm for the others? 74 | objective = kayak.ElemAdd(loss, 75 | kayak.L1Norm(weights_1, weight=100.0), 76 | kayak.L2Norm(weights_2, weight=50.0), 77 | kayak.L2Norm(weights_out, weight=3.0)) 78 | 79 | # This is the fun part and is the whole point of Kayak. You can 80 | # now get the gradient of anything in terms of anything else. 81 | # Probably, if you're doing neural networks, you want the gradient 82 | # of the parameters in terms of the overall objective. That way 83 | # you can go off and do some kind of optimization. 84 | weights_1_grad = objective.grad(weights_1) 85 | biases_1_grad = objective.grad(biases_1) 86 | weights_2_grad = objective.grad(weights_2) 87 | biases_2_grad = objective.grad(biases_2) 88 | weights_out_grad = objective.grad(weights_out) 89 | biases_out-grad = objective.grad(biases_out) 90 | 91 | ... use the gradients for learning ... 92 | ... probably this whole thing would be in a loop ... 93 | ... in practice you'd probably also use minibatches ... 94 | 95 | This is a work in progress and we welcome contributions. Some 96 | nosetests are implemented. We're working on documentation. Whatever 97 | docs come into existence will end up at 98 | [http://hips.gihub.io/Kayak](http://hips.gihub.io/Kayak). 99 | 100 | This project is primarily develped by the [Harvard Intelligent 101 | Probabilistic Systems (HIPS)](http://hips.seas.harvard.edu) group in 102 | the [Harvard School of Engineering and Applied Sciences 103 | (SEAS)](http://www.seas.harvard.edu). The primary developers to date 104 | have been Ryan Adams, David Duvenaud, Scott Linderman, Dougal 105 | Maclaurin, and Jasper Snoek. 106 | 107 | Kayak is Copyrighted by The President and Fellows of Harvard 108 | University, and is distributed under an MIT license, which can be 109 | found in the license.txt file but is also below: 110 | 111 | Permission is hereby granted, free of charge, to any person obtaining a copy 112 | of this software and associated documentation files (the "Software"), to deal 113 | in the Software without restriction, including without limitation the rights 114 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 115 | copies of the Software, and to permit persons to whom the Software is 116 | furnished to do so, subject to the following conditions: 117 | 118 | The above copyright notice and this permission notice shall be included in all 119 | copies or substantial portions of the Software. 120 | 121 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 122 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 123 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 124 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 125 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 126 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 127 | SOFTWARE. 128 | -------------------------------------------------------------------------------- /examples/data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib 3 | import gzip 4 | import struct 5 | import array 6 | import numpy as np 7 | 8 | def download(url, filename): 9 | if not os.path.exists('data'): 10 | os.makedirs('data') 11 | out_file = os.path.join('data', filename) 12 | if not os.path.isfile(out_file): 13 | urllib.urlretrieve(url, out_file) 14 | 15 | def mnist(): 16 | base_url = 'http://yann.lecun.com/exdb/mnist/' 17 | 18 | def parse_labels(filename): 19 | with gzip.open(filename, 'rb') as fh: 20 | magic, num_data = struct.unpack(">II", fh.read(8)) 21 | return np.array(array.array("B", fh.read()), dtype=np.uint8) 22 | 23 | def parse_images(filename): 24 | with gzip.open(filename, 'rb') as fh: 25 | magic, num_data, rows, cols = struct.unpack(">IIII", fh.read(16)) 26 | return np.array(array.array("B", fh.read()), dtype=np.uint8).reshape(num_data, rows, cols) 27 | 28 | for filename in ['train-images-idx3-ubyte.gz', 29 | 'train-labels-idx1-ubyte.gz', 30 | 't10k-images-idx3-ubyte.gz', 31 | 't10k-labels-idx1-ubyte.gz']: 32 | download(base_url + filename, filename) 33 | 34 | train_images = parse_images('data/train-images-idx3-ubyte.gz') 35 | train_labels = parse_labels('data/train-labels-idx1-ubyte.gz') 36 | test_images = parse_images('data/t10k-images-idx3-ubyte.gz') 37 | test_labels = parse_labels('data/t10k-labels-idx1-ubyte.gz') 38 | 39 | return train_images, train_labels, test_images, test_labels 40 | -------------------------------------------------------------------------------- /examples/linreg_example.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import sys 5 | sys.path.append('..') 6 | 7 | import kayak 8 | 9 | N = 10000 10 | D = 5 11 | P = 3 12 | learn = 0.00001 13 | batch_size = 500 14 | 15 | # Random inputs. 16 | X = npr.randn(N,D) 17 | true_W = npr.randn(D,P) 18 | Y = np.dot(X, true_W) + 0.1*npr.randn(N,P) 19 | 20 | kyk_batcher = kayak.Batcher(batch_size, N) 21 | 22 | # Build network. 23 | kyk_inputs = kayak.Inputs(X, kyk_batcher) 24 | 25 | # Labels. 26 | kyk_targets = kayak.Targets(Y, kyk_batcher) 27 | 28 | # Weights. 29 | W = 0.01*npr.randn(D,P) 30 | kyk_W = kayak.Parameter(W) 31 | 32 | # Linear layer. 33 | kyk_out = kayak.MatMult( kyk_inputs, kyk_W ) 34 | 35 | # Elementwise Loss. 36 | kyk_el_loss = kayak.L2Loss(kyk_out, kyk_targets) 37 | 38 | # Sum the losses. 39 | kyk_loss = kayak.MatSum( kyk_el_loss ) 40 | 41 | for ii in xrange(100): 42 | 43 | for batch in kyk_batcher: 44 | loss = kyk_loss.value 45 | print loss, np.sum((kyk_W.value - true_W)**2) 46 | grad = kyk_loss.grad(kyk_W) 47 | kyk_W.value -= learn * grad 48 | -------------------------------------------------------------------------------- /examples/mnist_logreg.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import data 3 | import numpy as np 4 | import numpy.random as npr 5 | 6 | num_folds = 5 7 | 8 | sys.path.append('..') 9 | import kayak 10 | 11 | # Here I define a nice little training function that takes inputs and targets. 12 | def train(inputs, targets, batch_size, learn_rate, momentum, l1_weight, l2_weight, dropout): 13 | 14 | # Create a batcher object. 15 | batcher = kayak.Batcher(batch_size, inputs.shape[0]) 16 | 17 | # Inputs and targets need access to the batcher. 18 | X = kayak.Inputs(inputs, batcher) 19 | T = kayak.Targets(targets, batcher) 20 | 21 | # Weights and biases, with random initializations. 22 | W = kayak.Parameter( 0.1*npr.randn( inputs.shape[1], 10 )) 23 | B = kayak.Parameter( 0.1*npr.randn(1,10) ) 24 | 25 | # Nothing fancy here: inputs times weights, plus bias, then softmax. 26 | dropout_layer = kayak.Dropout(X, dropout, batcher=batcher) 27 | Y = kayak.LogSoftMax( kayak.ElemAdd( kayak.MatMult(dropout_layer, W), B ) ) 28 | 29 | # The training loss is negative multinomial log likelihood. 30 | loss = kayak.MatAdd(kayak.MatSum(kayak.LogMultinomialLoss(Y, T)), 31 | kayak.L2Norm(W, l2_weight), 32 | kayak.L1Norm(W, l1_weight)) 33 | 34 | # Use momentum for the gradient-based optimization. 35 | mom_grad_W = np.zeros(W.shape) 36 | 37 | # Loop over epochs. 38 | for epoch in xrange(10): 39 | 40 | # Track the total loss and the overall gradient. 41 | total_loss = 0.0 42 | total_grad_W = np.zeros(W.shape) 43 | 44 | # Loop over batches -- using batcher as iterator. 45 | for batch in batcher: 46 | # Compute the loss of this minibatch by asking the Kayak 47 | # object for its value and giving it reset=True. 48 | total_loss += loss.value 49 | 50 | # Now ask the loss for its gradient in terms of the 51 | # weights and the biases -- the two things we're trying to 52 | # learn here. 53 | grad_W = loss.grad(W) 54 | grad_B = loss.grad(B) 55 | 56 | # Use momentum on the weight gradient. 57 | mom_grad_W = momentum*mom_grad_W + (1.0-momentum)*grad_W 58 | 59 | # Now make the actual parameter updates. 60 | W.value -= learn_rate * mom_grad_W 61 | B.value -= learn_rate * grad_B 62 | 63 | # Keep track of the gradient to see if we're converging. 64 | total_grad_W += grad_W 65 | 66 | #print epoch, total_loss, np.sum(total_grad_W**2) 67 | 68 | # After we've trained, we return a sugary little function handle 69 | # that makes things easy. Basically, what we're doing here is 70 | # handing the output object (not the loss!) a dictionary where the 71 | # key is the Kayak input object 'X' (that is the features being 72 | # used here for logistic regression) and the value in that 73 | # dictionary is being determined by the argument to the lambda 74 | # expression. The point here is that we wind up with a function 75 | # handle the can be called with a numpy object and it produces the 76 | # target values for novel data, using the parameters we just learned. 77 | 78 | def compute_predictions(x): 79 | X.data = x 80 | batcher.test_mode() 81 | return Y.value 82 | 83 | return compute_predictions 84 | 85 | def evaluate(batch_size, learn_rate, momentum, l1_weight, l2_weight, dropout): 86 | 87 | # Load in the MNIST data. 88 | train_images, train_labels, test_images, test_labels = data.mnist() 89 | 90 | # Turn the uint8 images into floating-point vectors. 91 | train_images = np.reshape(train_images, 92 | (train_images.shape[0], 93 | train_images.shape[1]*train_images.shape[2]))/255.0 94 | 95 | # Use one-hot coding for the labels. 96 | train_labels = kayak.util.onehot(train_labels) 97 | test_labels = kayak.util.onehot(test_labels) 98 | 99 | # Hand the training data off to a cross-validation object. 100 | # This will create ten folds and allow us to easily iterate. 101 | CV = kayak.CrossValidator(num_folds, train_images, train_labels) 102 | 103 | valid_acc = 0.0 104 | 105 | # Loop over our cross validation folds. 106 | for ii, fold in enumerate(CV): 107 | 108 | # Get the training and validation data, according to this fold. 109 | train_images, train_labels = fold.train() 110 | valid_images, valid_labels = fold.valid() 111 | 112 | # Train on these data and get a prediction function back. 113 | pred_func = train(train_images, train_labels, batch_size, 114 | learn_rate, momentum, l1_weight, l2_weight, dropout) 115 | 116 | # Make predictions on the validation data. 117 | valid_preds = np.argmax(pred_func( valid_images ), axis=1) 118 | 119 | # How did we do? 120 | acc = np.mean(valid_preds == np.argmax(valid_labels, axis=1)) 121 | print "Fold %02d: %0.6f" % (ii+1, acc) 122 | valid_acc += acc 123 | 124 | 125 | print "Overall: %0.6f" % (valid_acc / num_folds) 126 | return valid_acc / num_folds 127 | 128 | if __name__ == '__main__': 129 | evaluate( batch_size = 256, 130 | learn_rate = 0.001, 131 | momentum = 0.9, 132 | l1_weight = 1.0, 133 | l2_weight = 1.0, 134 | dropout = 0.2 ) 135 | 136 | -------------------------------------------------------------------------------- /examples/mnist_nnet.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import data 4 | import numpy as np 5 | import numpy.random as npr 6 | 7 | sys.path.append('..') 8 | 9 | import kayak 10 | 11 | batch_size = 256 12 | learn_rate = 0.01 13 | momentum = 0.9 14 | layer1_sz = 500 15 | layer2_sz = 500 16 | layer1_dropout = 0.25 17 | layer2_dropout = 0.25 18 | 19 | npr.seed(1) 20 | 21 | # Load in the MNIST data. 22 | train_images, train_labels, test_images, test_labels = data.mnist() 23 | 24 | # Turn the uint8 images into floating-point vectors. 25 | train_images = np.reshape(train_images, 26 | (train_images.shape[0], 27 | train_images.shape[1]*train_images.shape[2]))/255.0 28 | 29 | # Use one-hot coding for the labels. 30 | train_labels = kayak.util.onehot(train_labels) 31 | test_labels = kayak.util.onehot(test_labels) 32 | 33 | # Hand the training data off to a cross-validation object. 34 | # This will create ten folds and allow us to easily iterate. 35 | CV = kayak.CrossValidator(10, train_images, train_labels) 36 | 37 | # Here I define a nice little training function that takes inputs and targets. 38 | def train(inputs, targets): 39 | # Create a batcher object. 40 | batcher = kayak.Batcher(batch_size, inputs.shape[0]) 41 | 42 | # Inputs and targets need access to the batcher. 43 | X = kayak.Inputs(inputs, batcher) 44 | T = kayak.Targets(targets, batcher) 45 | 46 | # First-layer weights and biases, with random initializations. 47 | W1 = kayak.Parameter( 0.1*npr.randn( inputs.shape[1], layer1_sz )) 48 | B1 = kayak.Parameter( 0.1*npr.randn(1, layer1_sz) ) 49 | 50 | # First hidden layer: ReLU + Dropout 51 | H1 = kayak.Dropout(kayak.HardReLU(kayak.ElemAdd(kayak.MatMult(X, W1), B1)), 52 | layer1_dropout, batcher=batcher) 53 | 54 | # Second-layer weights and biases, with random initializations. 55 | W2 = kayak.Parameter( 0.1*npr.randn( layer1_sz, layer2_sz )) 56 | B2 = kayak.Parameter( 0.1*npr.randn(1, layer2_sz) ) 57 | 58 | # Second hidden layer: ReLU + Dropout 59 | H2 = kayak.Dropout(kayak.HardReLU(kayak.ElemAdd(kayak.MatMult(H1, W2), B2)), 60 | layer2_dropout, batcher=batcher) 61 | 62 | # Output layer weights and biases, with random initializations. 63 | W3 = kayak.Parameter( 0.1*npr.randn( layer2_sz, 10 )) 64 | B3 = kayak.Parameter( 0.1*npr.randn(1, 10) ) 65 | 66 | # Output layer. 67 | Y = kayak.LogSoftMax( kayak.ElemAdd(kayak.MatMult(H2, W3), B3) ) 68 | 69 | # The training loss is negative multinomial log likelihood. 70 | loss = kayak.MatSum(kayak.LogMultinomialLoss(Y, T)) 71 | 72 | # Use momentum for the gradient-based optimization. 73 | mom_grad_W1 = np.zeros(W1.shape) 74 | mom_grad_W2 = np.zeros(W2.shape) 75 | mom_grad_W3 = np.zeros(W3.shape) 76 | 77 | # Loop over epochs. 78 | for epoch in xrange(10): 79 | 80 | # Track the total loss. 81 | total_loss = 0.0 82 | 83 | # Loop over batches -- using batcher as iterator. 84 | for batch in batcher: 85 | # Compute the loss of this minibatch by asking the Kayak 86 | # object for its value and giving it reset=True. 87 | total_loss += loss.value 88 | 89 | # Now ask the loss for its gradient in terms of the 90 | # weights and the biases -- the two things we're trying to 91 | # learn here. 92 | grad_W1 = loss.grad(W1) 93 | grad_B1 = loss.grad(B1) 94 | grad_W2 = loss.grad(W2) 95 | grad_B2 = loss.grad(B2) 96 | grad_W3 = loss.grad(W3) 97 | grad_B3 = loss.grad(B3) 98 | 99 | # Use momentum on the weight gradients. 100 | mom_grad_W1 = momentum*mom_grad_W1 + (1.0-momentum)*grad_W1 101 | mom_grad_W2 = momentum*mom_grad_W2 + (1.0-momentum)*grad_W2 102 | mom_grad_W3 = momentum*mom_grad_W3 + (1.0-momentum)*grad_W3 103 | 104 | # Now make the actual parameter updates. 105 | W1.value -= learn_rate * mom_grad_W1 106 | B1.value -= learn_rate * grad_B1 107 | W2.value -= learn_rate * mom_grad_W2 108 | B2.value -= learn_rate * grad_B2 109 | W3.value -= learn_rate * mom_grad_W3 110 | B3.value -= learn_rate * grad_B3 111 | 112 | print epoch, total_loss 113 | 114 | # After we've trained, we return a sugary little function handle 115 | # that makes things easy. Basically, what we're doing here is 116 | # handing the output object (not the loss!) a dictionary where the 117 | # key is the Kayak input object 'X' (that is the features being 118 | # used here for logistic regression) and the value in that 119 | # dictionary is being determined by the argument to the lambda 120 | # expression. The point here is that we wind up with a function 121 | # handle the can be called with a numpy object and it produces the 122 | # target values for novel data, using the parameters we just learned. 123 | 124 | def compute_predictions(x): 125 | X.data = x 126 | batcher.test_mode() 127 | return Y.value 128 | 129 | return compute_predictions 130 | 131 | # Loop over our cross validation folds. 132 | for ii, fold in enumerate(CV): 133 | print "Fold %d" % (ii+1) 134 | 135 | # Get the training and validation data, according to this fold. 136 | train_images, train_labels = fold.train() 137 | valid_images, valid_labels = fold.valid() 138 | 139 | # Train on these data and get a prediction function back. 140 | t0 = time.time() 141 | pred_func = train(train_images, train_labels) 142 | print "train():", time.time()-t0 143 | 144 | # Make predictions on the validation data. 145 | valid_preds = np.argmax(pred_func( valid_images ), axis=1) 146 | 147 | # How did we do? 148 | print np.mean(valid_preds == np.argmax(valid_labels, axis=1)) 149 | 150 | -------------------------------------------------------------------------------- /examples/nnet_example.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | import numpy.random as npr 4 | import sys 5 | 6 | sys.path.append('..') 7 | import kayak 8 | import kayak.util 9 | 10 | N = 1000 11 | D = 50 12 | H1 = 10 13 | P = 1 14 | batch_size = 256 15 | 16 | # Random data. 17 | X = npr.randn(N, D) 18 | Y = npr.randn(N, P) 19 | 20 | batcher = kayak.Batcher(batch_size, N) 21 | 22 | # Build network. 23 | kyk_inputs = kayak.Inputs(X, batcher) 24 | 25 | # Labels. 26 | kyk_targets = kayak.Targets(Y, batcher) 27 | 28 | # First layer weights and biases. 29 | kyk_W1 = kayak.Parameter( npr.randn(D, H1) ) 30 | kyk_B1 = kayak.Parameter( npr.randn(1,H1) ) 31 | 32 | # First layer weight mult plus biases, then nonlinearity. 33 | kyk_H1 = kayak.Dropout(kayak.HardReLU(kayak.ElemAdd(kayak.MatMult( kyk_inputs, kyk_W1 ), kyk_B1)), 34 | drop_prob=0.5, batcher=batcher) 35 | 36 | # Second layer weights and bias. 37 | kyk_W2 = kayak.Parameter( npr.randn(H1, P) ) 38 | kyk_B2 = kayak.Parameter( npr.randn(1,P) ) 39 | 40 | # Second layer multiplication. 41 | kyk_out = kayak.Dropout(kayak.HardReLU(kayak.ElemAdd(kayak.MatMult( kyk_H1, kyk_W2 ), kyk_B2)), 42 | drop_prob=0.5, batcher=batcher) 43 | 44 | # Elementwise Loss. 45 | kyk_el_loss = kayak.L2Loss(kyk_out, kyk_targets) 46 | 47 | # Sum the losses. 48 | kyk_loss = kayak.MatSum( kyk_el_loss ) 49 | 50 | # Roll in the weight regularization. 51 | kyk_obj = kayak.ElemAdd( kyk_loss, kayak.L1Norm(kyk_W1, weight=100.0), 52 | kayak.L1Norm(kyk_W2, weight=100.0)) 53 | 54 | print "W2:", kayak.util.checkgrad(kyk_W2, kyk_obj) 55 | print "B2:", kayak.util.checkgrad(kyk_B2, kyk_obj) 56 | print "W1:", kayak.util.checkgrad(kyk_W1, kyk_obj) 57 | print "B1:", kayak.util.checkgrad(kyk_B1, kyk_obj) 58 | 59 | t0 = time.time() 60 | for ii in xrange(10): 61 | 62 | for batch in batcher: 63 | val = kyk_obj.value 64 | grad_W1 = kyk_obj.grad(kyk_W1) 65 | grad_B1 = kyk_obj.grad(kyk_B1) 66 | grad_W2 = kyk_obj.grad(kyk_W2) 67 | grad_B2 = kyk_obj.grad(kyk_B2) 68 | 69 | t1 = time.time() 70 | print t1-t0 71 | -------------------------------------------------------------------------------- /examples/poisson_glm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import matplotlib.pyplot as plt 5 | 6 | import sys 7 | sys.path.append('..') 8 | 9 | import kayak 10 | 11 | N = 10000 12 | D = 5 13 | P = 1 14 | learn = 0.00001 15 | batch_size = 500 16 | 17 | # Random inputs. 18 | X = npr.randn(N,D) 19 | true_W = npr.randn(D,P) 20 | lam = np.exp(np.dot(X, true_W)) 21 | Y = npr.poisson(lam) 22 | 23 | kyk_batcher = kayak.Batcher(batch_size, N) 24 | 25 | # Build network. 26 | kyk_inputs = kayak.Inputs(X, kyk_batcher) 27 | 28 | # Labels. 29 | kyk_targets = kayak.Targets(Y, kyk_batcher) 30 | 31 | # Weights. 32 | W = 0.01*npr.randn(D,P) 33 | kyk_W = kayak.Parameter(W) 34 | 35 | # Linear layer. 36 | kyk_activation = kayak.MatMult( kyk_inputs, kyk_W) 37 | 38 | # Exponential inverse-link function. 39 | kyk_lam = kayak.ElemExp(kyk_activation) 40 | 41 | # Poisson negative log likelihood. 42 | kyk_nll = kyk_lam - kayak.ElemLog(kyk_lam) * kyk_targets 43 | 44 | # Sum the losses. 45 | kyk_loss = kayak.MatSum( kyk_nll ) 46 | 47 | for ii in xrange(100): 48 | 49 | for batch in kyk_batcher: 50 | loss = kyk_loss.value 51 | print loss, np.sum((kyk_W.value - true_W)**2) 52 | grad = kyk_loss.grad(kyk_W) 53 | kyk_W.value -= learn * grad 54 | 55 | # Plot the true and inferred rate for a subset of data. 56 | T_slice = slice(0,100) 57 | kyk_inputs.value = X[T_slice,:] 58 | plt.figure() 59 | plt.plot(lam[T_slice], 'k') 60 | plt.plot(kyk_lam.value, '--r') 61 | plt.show() -------------------------------------------------------------------------------- /kayak/__init__.py: -------------------------------------------------------------------------------- 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 2 | # http://hips.seas.harvard.edu 3 | # Ryan Adams, David Duvenaud, Scott Linderman, 4 | # Dougal Maclaurin, Jasper Snoek, and others 5 | # Copyright 2014, The President and Fellows of Harvard University 6 | # Distributed under an MIT license. See license.txt file. 7 | 8 | import sys 9 | import hashlib 10 | import numpy as np 11 | 12 | EPSILON = sys.float_info.epsilon 13 | 14 | from differentiable import Differentiable 15 | from root_nodes import Constant, Parameter, DataNode, Inputs, Targets 16 | from batcher import Batcher 17 | from matrix_ops import MatAdd, MatMult, MatElemMult, MatSum, MatMean, Transpose, Reshape, Concatenate, Identity, TensorMult, ListToArray, MatDet 18 | from elem_ops import ElemAdd, ElemMult, ElemExp, ElemLog, ElemPower, ElemAbs 19 | from nonlinearities import SoftReLU, HardReLU, LogSoftMax, TanH, Logistic, InputSoftMax, SoftMax 20 | from losses import L2Loss, LogMultinomialLoss 21 | from dropout import Dropout 22 | from regularizers import L2Norm, L1Norm, Horseshoe, NExp 23 | from crossval import CrossValidator 24 | from convolution import Convolve1d, Pool, TopKPool 25 | from indexing import Take 26 | from stacking import Hstack 27 | from generic_ops import Blank 28 | 29 | -------------------------------------------------------------------------------- /kayak/batcher.py: -------------------------------------------------------------------------------- 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 2 | # http://hips.seas.harvard.edu 3 | # Ryan Adams, David Duvenaud, Scott Linderman, 4 | # Dougal Maclaurin, Jasper Snoek, and others 5 | # Copyright 2014, The President and Fellows of Harvard University 6 | # Distributed under an MIT license. See license.txt file. 7 | 8 | import numpy as np 9 | import numpy.random as npr 10 | 11 | from . import Differentiable 12 | 13 | class Batcher(Differentiable): 14 | """Kayak class for managing batches of data. 15 | 16 | This class is intended to provide a simple interface for managing 17 | mini-batches of data, both on the input side and on the output 18 | side. It can be set up to either use random minibatches, or go 19 | through the data in the order provided. You tell it how many data 20 | you have and how large the mini-batches should be. It will 21 | provide a sequence of indices via an iterator for easy looping. 22 | 23 | To use this class, you would do something like this: 24 | 25 | # Create an instance of the batcher. 26 | kyk_batcher = Batcher( batch_size, num_data ) 27 | 28 | # When you create input and output objects, give them access to 29 | # the batcher. 30 | kyk_inputs = Inputs(X, kyk_batcher) 31 | kyk_targets = Targets(Y, kyk_batcher) 32 | 33 | # Probably you'll loop over training epochs. 34 | for epoch in xrange(num_epochs): 35 | 36 | # Then you can treat the batcher as an iterator. 37 | for batch in kyk_batcher: 38 | 39 | # Do your mini-batch training here. 40 | 41 | """ 42 | __slots__ = ['_rng', '_batch_size', '_total_size', '_random_batches', 43 | '_dropout_nodes', 'start', 'end', 'ordering'] 44 | def __init__(self, batch_size, total_size, random_batches=False, rng=None): 45 | """Constructor for the Kayak Batcher class. 46 | 47 | This creates the Batcher, which makes it easy to manage 48 | mini-batch indices for inputs and outputs. This allows you to 49 | iterate through things in the order provided, or in a random 50 | order. 51 | 52 | Arguments: 53 | 54 | batch_size: (Integer) Size of the mini-batches to produce. 55 | 56 | total_size: (Integer) Total number of data to iterate over. 57 | 58 | _random_batches: (Bool) Specifies whether the mini-batches 59 | should be random or not. 60 | """ 61 | super(Batcher, self).__init__([]) 62 | 63 | if rng is None: 64 | self._rng = npr.RandomState() 65 | else: 66 | self._rng = rng 67 | 68 | self._batch_size = batch_size 69 | self._total_size = total_size 70 | self._random_batches = random_batches 71 | self._dropout_nodes = [] 72 | self.reset() 73 | 74 | def reset(self): 75 | """Reset the state of the Kayak Batcher. 76 | 77 | It may happen that you want to 'reset the loop' and restart 78 | your iteration over the data. Calling this method does that. 79 | If, in the constructor, you set rng=None, then you'll go back 80 | to zero. If random_batches is true, you will get a new random 81 | permutation when you reset. 82 | 83 | This method is automatically called when the iterator 84 | completes its loop, so you don't need to explicitly call it 85 | when you're making multiple loops over the data. 86 | 87 | Arguments: None 88 | 89 | """ 90 | self.start = 0 91 | self.end = min(self.start+self._batch_size, self._total_size) 92 | 93 | if self._random_batches: 94 | self.ordering = self._rng.permutation(self._total_size) 95 | self.value = self.ordering[self.start:self.end] 96 | else: 97 | self.value = slice(self.start, self.end) 98 | 99 | for node in self._dropout_nodes: 100 | node.draw_new_mask() 101 | 102 | def __iter__(self): 103 | return self 104 | 105 | def next(self): 106 | """Implementation of iterator functionality. 107 | 108 | The Batcher class is used as an iterator. This method 109 | implements the iteration step forward. It will return lists 110 | of indices that are the data in each mini-batch. In general, 111 | these lists will be of size batch_size (as specified in the 112 | constructor). The last one may be smaller, if the number of 113 | data is not an integer multiple of the batch size. 114 | 115 | Arguments: None 116 | 117 | """ 118 | if self.start >= self._total_size: 119 | self.reset() 120 | raise StopIteration 121 | 122 | self._clear_value_cache() 123 | 124 | if self._random_batches: 125 | self.value = self.ordering[self.start:self.end] 126 | else: 127 | self.value = slice(self.start, self.end) 128 | 129 | self.start += self._batch_size 130 | self.end = min(self.start + self._batch_size, self._total_size) 131 | 132 | for node in self._dropout_nodes: 133 | node.draw_new_mask() 134 | 135 | return self.value 136 | 137 | def add_dropout_node(self, node): 138 | self._dropout_nodes.append(node) 139 | 140 | def test_mode(self): 141 | """ 142 | Turns off batching. Run before test-time. 143 | """ 144 | self._clear_value_cache() 145 | self.value = slice(None, None) # All indices 146 | for node in self._dropout_nodes: 147 | node.reinstate_units() 148 | -------------------------------------------------------------------------------- /kayak/convolution.py: -------------------------------------------------------------------------------- 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 2 | # http://hips.seas.harvard.edu 3 | # Ryan Adams, David Duvenaud, Scott Linderman, 4 | # Dougal Maclaurin, Jasper Snoek, and others 5 | # Copyright 2014, The President and Fellows of Harvard University 6 | # Distributed under an MIT license. See license.txt file. 7 | 8 | import numpy as np 9 | 10 | import util 11 | 12 | from . import Differentiable 13 | import sys 14 | 15 | class Convolve1d(Differentiable): 16 | __slots__ = ['A', 'B', 'ncolors', 'stride'] 17 | 18 | def __init__(self, A, B, ncolors=1, stride=1): 19 | super(Convolve1d, self).__init__([A,B]) 20 | self.A = A 21 | self.B = B 22 | self.ncolors = ncolors 23 | self.stride = stride 24 | 25 | def _compute_value(self): 26 | A = self.A.value 27 | B = self.B.value 28 | filtersize = B.shape[0]/self.ncolors 29 | 30 | # Broadcast to get color channels 31 | A = np.reshape(A, (A.shape[0], -1)) 32 | 33 | D = A.shape[-1]/self.ncolors/self.stride - filtersize + 1 34 | output = np.zeros((A.shape[0], D, B.shape[1])) 35 | 36 | inds = np.arange(filtersize) 37 | inds = np.concatenate([inds+(i*A.shape[1]/self.ncolors) for i in xrange(self.ncolors)]) 38 | for j in xrange(0, D): 39 | output[:,j,:] = np.dot(A[:, inds], B) 40 | inds += self.stride 41 | 42 | return output.reshape((A.shape[0], D*B.shape[1])) 43 | 44 | def _local_grad(self, parent, d_out_d_self): 45 | A = self.A.value 46 | A = np.reshape(A, (A.shape[0], -1)) 47 | filtersize = self.B.shape[0]/self.ncolors 48 | inds = np.arange(filtersize) 49 | inds = np.concatenate([inds+(i*A.shape[1]/self.ncolors) for i in xrange(self.ncolors)]) 50 | 51 | if parent == 0: 52 | output = np.zeros((self.A.shape)) 53 | B = self.B.value 54 | outgrad = d_out_d_self.reshape(d_out_d_self.shape[0], -1, B.shape[-1]) 55 | 56 | for j in xrange(outgrad.shape[1]): 57 | output[:,inds] += np.dot(outgrad[:,j,:], B.T) 58 | inds += self.stride 59 | 60 | return output 61 | 62 | elif parent == 1: 63 | output = np.zeros((self.B.shape[0], self.B.shape[1])) 64 | outgrad = np.reshape(d_out_d_self, (d_out_d_self.shape[0], -1, self.B.shape[1])) 65 | 66 | for j in xrange(0, outgrad.shape[1]): 67 | output += np.dot(A[:,inds].T, outgrad[:,j,:]) 68 | inds += self.stride 69 | 70 | return output 71 | else: 72 | raise Exception("Not a parent of me") 73 | 74 | class Pool(Differentiable): 75 | __slots__ = ['A', 'width', 'indices', 'ncolors'] 76 | 77 | def __init__(self, A, width, ncolors=1): 78 | super(Pool, self).__init__([A]) 79 | self.A = A 80 | self.width = width 81 | self.ncolors = ncolors 82 | self.indices = None 83 | 84 | def _compute_value(self): 85 | A = self.A.value 86 | 87 | # determine pooled shape variables 88 | conv_length = A.shape[1]/self.ncolors 89 | width_mod = conv_length % self.width 90 | width_aug = self.width - width_mod 91 | 92 | # augment convolution output to make pool width work 93 | if width_mod > 0: 94 | # insert at the back end of each convolution 95 | idx = np.ravel([[i*conv_length]*width_aug for i in range(1,self.ncolors+1)]) 96 | 97 | # insert -inf 98 | A = np.insert(A, idx, -np.inf, axis=1) 99 | 100 | # bring together elements in a pooling group 101 | A = np.reshape(A, (A.shape[0], self.ncolors, -1, self.width)) 102 | 103 | # get the index of the max within each pooling group 104 | self.indices = np.argmax(A, axis=3) 105 | 106 | # represent the first 3 dimensions of A 107 | x, z, t = np.indices(self.indices.shape) 108 | 109 | # index into the 4th dimension to pull out the maxes 110 | A = A[x, z, t, self.indices] 111 | 112 | # reshape back to the original form with the last dimension pooled 113 | A = A.reshape((self.A.shape[0],-1)) 114 | 115 | return A 116 | 117 | ''' 118 | try: 119 | A = np.reshape(A, (A.shape[0], self.ncolors, -1, self.width)) 120 | except: 121 | print 'Could not pool with a width of %d on a layer of size %d' % (self.width, A.shape[0]/self.ncolors) 122 | print A.shape 123 | print (A.shape[0], self.ncolors, -1, self.width) 124 | raise 125 | ''' 126 | 127 | def _local_grad(self, parent, d_out_d_self): 128 | if parent == 0: 129 | # determine pooled shape variables 130 | conv_length = self.A.shape[1]/self.ncolors 131 | width_mod = conv_length % self.width 132 | width_aug = self.width - width_mod 133 | pool_length = conv_length/self.width + 1*(width_mod>0) 134 | 135 | # create a zero matrix to match the reshaped version of A 136 | # that brings together elements in a pool group 137 | mask = np.zeros((self.A.shape[0], self.ncolors, pool_length, self.width)) 138 | 139 | # represent the first 3 dimensions of mask 140 | inds, inds2, inds3 = np.indices(self.indices.shape) 141 | 142 | # set the max indexes in mask to d_out_d_self, 143 | # reshaped to fit the shape of this reduced version of the full matrix A 144 | mask[inds, inds2, inds3, self.indices] = d_out_d_self.reshape((mask[inds, inds2, inds3, self.indices].shape)) 145 | 146 | # reshape to original form, with the last dimension pooled 147 | mask = mask.reshape((self.A.shape[0], -1)) 148 | 149 | # remove the added dummy columns 150 | if width_mod > 0: 151 | conv_length_aug = conv_length + width_aug 152 | idx = [i*conv_length_aug-m for i in range(1,self.ncolors+1) for m in range(1,width_aug+1)] 153 | mask = np.delete(mask, idx, axis=1) 154 | 155 | return mask 156 | 157 | ''' 158 | mask = np.zeros(self.A.shape).reshape((self.A.shape[0], self.ncolors, -1, self.width)) 159 | inds, inds2, inds3 = np.indices(self.indices.shape) 160 | mask[inds, inds2, inds3, self.indices] = d_out_d_self.reshape((mask[inds, inds2, inds3, self.indices].shape)) 161 | mask = mask.reshape((self.A.shape[0], -1)) 162 | ''' 163 | else: 164 | raise Exception("Not a parent of me") 165 | 166 | class TopKPool(Differentiable): 167 | __slots__ = ['A', 'k', 'indices', 'ncolors'] 168 | 169 | def __init__(self, A, k, ncolors=1): 170 | super(TopKPool, self).__init__([A]) 171 | self.A = A 172 | self.k = k 173 | self.ncolors = ncolors 174 | self.indices = None 175 | 176 | def _compute_value(self): 177 | A = self.A.value.copy() 178 | A = np.reshape(A, (A.shape[0], self.ncolors, -1)) 179 | self.indices = np.argsort(A, axis=2)[:,:,-self.k:] 180 | a, b, c = np.indices(self.indices.shape) 181 | A = A[a, b, self.indices] 182 | return A.reshape((self.A.shape[0],-1)) 183 | 184 | def _local_grad(self, parent, d_out_d_self): 185 | if parent == 0: 186 | mask = np.zeros(self.A.shape).reshape((self.A.shape[0], self.ncolors, -1)) 187 | inds, inds2, inds3 = np.indices(self.indices.shape) 188 | mask[inds, inds2, self.indices] = d_out_d_self.reshape((mask[inds, inds2, self.indices].shape)) 189 | mask = mask.reshape((self.A.shape[0], -1)) 190 | return mask 191 | else: 192 | raise Exception("Not a parent of me") 193 | -------------------------------------------------------------------------------- /kayak/crossval.py: -------------------------------------------------------------------------------- 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 2 | # http://hips.seas.harvard.edu 3 | # Ryan Adams, David Duvenaud, Scott Linderman, 4 | # Dougal Maclaurin, Jasper Snoek, and others 5 | # Copyright 2014, The President and Fellows of Harvard University 6 | # Distributed under an MIT license. See license.txt file. 7 | 8 | import itertools 9 | import numpy as np 10 | import numpy.random as npr 11 | 12 | class Fold(object): 13 | 14 | def __init__(self, cv, train, valid): 15 | self._cv = cv 16 | self._train = train 17 | self._valid = valid 18 | 19 | def train(self): 20 | if self._cv.targets is None: 21 | return self._cv.inputs[self._train,...] 22 | else: 23 | return self._cv.inputs[self._train,...], self._cv.targets[self._train,...] 24 | 25 | def valid(self): 26 | if self._cv.targets is None: 27 | return self._cv.inputs[self._valid,...] 28 | else: 29 | return self._cv.inputs[self._valid,...], self._cv.targets[self._valid,...] 30 | 31 | class CrossValidator(object): 32 | 33 | def __init__(self, num_folds, inputs, targets=None, permute=True): 34 | 35 | if permute: 36 | # Make a copy of the data, with a random permutation. 37 | self.ordering = npr.permutation(inputs.shape[0]) 38 | self.inputs = inputs[self.ordering,...].copy() 39 | if targets is not None: 40 | self.targets = targets[self.ordering,...].copy() 41 | else: 42 | self.targets = None 43 | else: 44 | self.ordering = np.arange(inputs.shape[0], dtype=int) 45 | self.inputs = inputs 46 | self.targets = targets 47 | 48 | self.fold_idx = 0 49 | self.num_folds = num_folds 50 | self.edges = np.linspace(0, self.inputs.shape[0], self.num_folds+1).astype(int) 51 | self.indices = [] 52 | for ii in xrange(self.num_folds): 53 | self.indices.append( np.arange(self.edges[ii], self.edges[ii+1], dtype=int) ) 54 | self.folds = [] 55 | for ii in xrange(self.num_folds): 56 | self.folds.append(Fold(self, 57 | np.array(list(itertools.chain.from_iterable([self.indices[jj] for jj in range(0,ii)+range(ii+1,self.num_folds)])), dtype=int), 58 | np.array(self.indices[ii], dtype=int))) 59 | 60 | def __iter__(self): 61 | return self 62 | 63 | def next(self): 64 | try: 65 | result = self.folds[self.fold_idx] 66 | self.fold_idx += 1 67 | return result 68 | except IndexError: 69 | self.fold_idx = 0 70 | raise StopIteration 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /kayak/differentiable.py: -------------------------------------------------------------------------------- 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 2 | # http://hips.seas.harvard.edu 3 | # Ryan Adams, David Duvenaud, Scott Linderman, 4 | # Dougal Maclaurin, Jasper Snoek, and others 5 | # Copyright 2014, The President and Fellows of Harvard University 6 | # Distributed under an MIT license. See license.txt file. 7 | 8 | import numpy as np 9 | import weakref 10 | 11 | class Differentiable(object): 12 | __slots__ = ['_value', '_grad', '_loss', '_parents', '_children','__weakref__','_parent_indices'] 13 | def __init__(self, parents=()): 14 | self._value = None # Cached value 15 | self._grad = None # Cached grad 16 | self._loss = None # Loss we are caching with respect to 17 | for parent_index, parent in enumerate(parents): 18 | parent._add_child(self, parent_index) 19 | 20 | self._parents = tuple(parents) 21 | self._children = weakref.WeakValueDictionary() 22 | # self._children = () 23 | 24 | @property 25 | def _children_with_parent_indices(self): 26 | return [(self._children[key], key[1]) for key in self._children.keys()] 27 | 28 | @property 29 | def value(self): 30 | """Compute the value of the function. This walks up the 31 | dependency graph and finds all of the Kayak objects with known 32 | values (such as Inputs and Targets, perhaps modulated by a 33 | Batcher) and then propagates their values forward through the 34 | modular computations of Differentiable subclasses. There are 35 | some subtleties to this behavior, which are determined by the 36 | method arguments. 37 | """ 38 | # If the value is not yet cached, compute it. 39 | if self._value is None: 40 | self._value = self._compute_value() 41 | 42 | return self._value 43 | 44 | @value.setter 45 | def value(self, new_value): 46 | self._clear_value_cache() 47 | self._value = new_value 48 | 49 | def _clear_value_cache(self): 50 | """ 51 | Sets the new value and clears the values of any dependencies. We 52 | maintain the invariant that cached values are never wrong relative 53 | to their parents' values. 54 | """ 55 | if self._value is not None: 56 | [child._clear_value_cache() for child in self._children.values()] 57 | # [child._clear_value_cache() for child, _ in self._children.values()] 58 | self._clear_grad_cache() 59 | self._value = None 60 | 61 | def _clear_grad_cache(self): 62 | if self._grad is not None: 63 | [parent._clear_grad_cache() for parent in self._parents] 64 | self._grad = None 65 | 66 | def grad(self, other): 67 | """Compute the gradient of this module in terms of another 68 | module. One of the main points of the Kayak setup is to 69 | easily compute gradients in terms of parameters. This is the 70 | interface for doing so. You call the grad() method on 71 | something that produces a scalar, providing as an argument 72 | some other object that appears deeper in the graph. You get 73 | out an array of the same shape as the deeper object, but which 74 | is the gradient. 75 | 76 | Arguments: 77 | 78 | other: (Kayak object) The other object, in terms of which 79 | you'd like to take this thing's gradient. 80 | """ 81 | grad = other._d_out_d_self(self) 82 | if grad is 0: 83 | # Make sure the output has the expected shape 84 | grad = np.zeros(other.shape) 85 | 86 | return grad 87 | 88 | @property 89 | def shape(self): 90 | return self.value.shape 91 | 92 | def _d_out_d_self(self, out): 93 | # Cached grad is not valid or refers to a different loss, 94 | # so we need to recompute compute the gradient 95 | if self._grad is None or self._loss is not out: 96 | if self is out: 97 | grad = np.ones(self.shape) 98 | elif not self._children: 99 | grad = 0 100 | else: 101 | grad = None 102 | for child, parent_index in self._children_with_parent_indices: 103 | if grad is None: 104 | grad = child._d_out_d_parent(out, parent_index) 105 | else: 106 | grad += child._d_out_d_parent(out, parent_index) 107 | 108 | self._loss = out 109 | self._grad = grad 110 | 111 | return self._grad 112 | 113 | def _d_out_d_parent(self, out, parent): 114 | d_out_d_self = self._d_out_d_self(out) 115 | if d_out_d_self is 0: 116 | # This avoid calling local_grad for paths that don't end in 'out' 117 | return 0 118 | else: 119 | return self._local_grad(parent, d_out_d_self) 120 | 121 | def _add_child(self, child, parent_index): 122 | """Parent_index is an int that tells out child which parent we are.""" 123 | self._children[(id(child), parent_index)] = child 124 | # self._children = self._children + ((child, parent_index), ) 125 | 126 | def _local_grad(self, parent, d_out_d_self): 127 | """Return d_out_d_self * d_self_d_parent""" 128 | raise Exception("Class 'Differentiable' is abstract.") 129 | 130 | def _compute_value(self): 131 | raise Exception("Class 'Differentiable' is abstract.") 132 | 133 | # Overload plus and times operators with elementwise operations 134 | # To avoid circular imports, we wait until the operator is called 135 | # to import the subclasses of Differentiable 136 | def __add__(self, other): 137 | from . import ElemAdd, Constant 138 | 139 | # If other is not a Differentiable object, 140 | # try to cast it as a constant. 141 | if not isinstance(other, Differentiable): 142 | other = Constant(other) 143 | return ElemAdd(self, other) 144 | 145 | def __radd__(self, other): 146 | return self.__add__(other) 147 | 148 | def __sub__(self, other): 149 | return self + -other 150 | 151 | def __rsub__(self, other): 152 | return other + -self 153 | 154 | def __mul__(self, other): 155 | from . import ElemMult, Constant 156 | # If other is not a Differentiable object, 157 | # try to cast it as a constant. 158 | if not isinstance(other, Differentiable): 159 | other = Constant(other) 160 | return ElemMult(self, other) 161 | 162 | def __rmul__(self, other): 163 | return self.__mul__(other) 164 | 165 | # NOTE: Assuming Python 2.x syntax for div 166 | def __div__(self, other): 167 | from . import ElemPower 168 | return self * ElemPower(other, -1) 169 | 170 | def __rdiv__(self, other): 171 | from . import ElemPower 172 | return other * ElemPower(self, -1) 173 | 174 | def __neg__(self): 175 | from . import ElemMult, Constant 176 | return ElemMult(Constant(-1.), self) 177 | 178 | def __pow__(self, power, modulo=None): 179 | from . import ElemPower 180 | return ElemPower(self, power) 181 | 182 | def __abs__(self): 183 | from . import ElemAbs 184 | return ElemAbs(self) 185 | 186 | 187 | 188 | -------------------------------------------------------------------------------- /kayak/dropout.py: -------------------------------------------------------------------------------- 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 2 | # http://hips.seas.harvard.edu 3 | # Ryan Adams, David Duvenaud, Scott Linderman, 4 | # Dougal Maclaurin, Jasper Snoek, and others 5 | # Copyright 2014, The President and Fellows of Harvard University 6 | # Distributed under an MIT license. See license.txt file. 7 | 8 | import numpy as np 9 | import numpy.random as npr 10 | 11 | from . import Differentiable, EPSILON 12 | 13 | class Dropout(Differentiable): 14 | __slots__ = ['X', 'drop_prob', '_rng', '_enhancement', '_mask'] 15 | 16 | def __init__(self, X, drop_prob=0.5, rng=None, batcher=None): 17 | if batcher is not None: 18 | super(Dropout, self).__init__([X, batcher]) 19 | batcher.add_dropout_node(self) 20 | else: 21 | super(Dropout, self).__init__([X]) 22 | 23 | self.X = X 24 | self.drop_prob = drop_prob 25 | 26 | if rng is None: 27 | self._rng = npr.RandomState() 28 | else: 29 | self._rng = rng 30 | 31 | self._enhancement = (1.0 + EPSILON)/(1.0 - self.drop_prob+EPSILON) 32 | self.draw_new_mask() 33 | 34 | def draw_new_mask(self): 35 | self._mask = self._enhancement * (self._rng.rand(*self.X.shape) 36 | > self.drop_prob) 37 | self._clear_value_cache() 38 | 39 | def reinstate_units(self): 40 | self._mask = np.ones(self.X.shape) 41 | self._clear_value_cache() 42 | 43 | def _compute_value(self): 44 | return self._mask * self.X.value 45 | 46 | def _local_grad(self, parent, d_out_d_self): 47 | return d_out_d_self * self._mask 48 | 49 | -------------------------------------------------------------------------------- /kayak/elem_ops.py: -------------------------------------------------------------------------------- 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 2 | # http://hips.seas.harvard.edu 3 | # Ryan Adams, David Duvenaud, Scott Linderman, 4 | # Dougal Maclaurin, Jasper Snoek, and others 5 | # Copyright 2014, The President and Fellows of Harvard University 6 | # Distributed under an MIT license. See license.txt file. 7 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 8 | # http://hips.seas.harvard.edu 9 | # Ryan Adams, David Duvenaud, Scott Linderman, 10 | # Dougal Maclaurin, Jasper Snoek, and others 11 | # Copyright 2014, The President and Fellows of Harvard University 12 | # Distributed under an MIT license. See license.txt file. 13 | 14 | import numpy as np 15 | from . import Differentiable 16 | import matrix_ops 17 | 18 | class Elementwise(Differentiable): 19 | __slots__ = ['X'] 20 | def __init__(self, X): 21 | super(Elementwise, self).__init__(X) 22 | self.X = X 23 | 24 | def _compute_shape(self, inputs=None): 25 | return self.X.shape 26 | 27 | # Just an alias for matrix addition and elementwise multiplication. 28 | ElemAdd = matrix_ops.MatAdd 29 | ElemMult = matrix_ops.MatElemMult 30 | 31 | class ElemExp(Elementwise): 32 | """ 33 | Elementwise exponentiation of an array 34 | """ 35 | __slots__ = ['A'] 36 | def __init__(self, A): 37 | super(ElemExp, self).__init__([A]) 38 | self.A = A 39 | 40 | def _compute_value(self): 41 | return np.exp(self.A.value) 42 | 43 | def _local_grad(self, parent, d_out_d_self): 44 | if parent == 0: 45 | return d_out_d_self * np.exp(self.A.value) 46 | else: 47 | raise Exception("Not a parent of me") 48 | 49 | class ElemLog(Elementwise): 50 | """ 51 | Elementwise logarithm of an array 52 | """ 53 | __slots__ = ['A'] 54 | def __init__(self, A): 55 | super(ElemLog, self).__init__([A]) 56 | self.A = A 57 | 58 | def _compute_value(self): 59 | return np.log(self.A.value) 60 | 61 | def _local_grad(self, parent, d_out_d_self): 62 | if parent == 0: 63 | return d_out_d_self / self.A.value 64 | else: 65 | raise Exception("Not a parent of me") 66 | 67 | class ElemPower(Elementwise): 68 | """ 69 | Elementwise power of an array. 70 | 71 | NOTE: Fractional powers are only defined for positive bases. 72 | We do not check for this; numpy will throw a runtime exception. 73 | """ 74 | __slots__ = ['A', 'pow'] 75 | def __init__(self, A, pow): 76 | super(ElemPower, self).__init__([A]) 77 | self.A = A 78 | assert np.isscalar(pow), 'Power must be a scalar value.' 79 | self.pow = pow 80 | 81 | def _compute_value(self): 82 | return np.power(self.A.value, self.pow) 83 | 84 | def _local_grad(self, parent, d_out_d_self): 85 | if parent == 0: 86 | return d_out_d_self * self.pow * np.power(self.A.value, self.pow-1) 87 | else: 88 | raise Exception("Not a parent of me") 89 | 90 | class ElemAbs(Elementwise): 91 | """ 92 | Elementwise absolute value of an array. 93 | """ 94 | __slots__ = ['A'] 95 | def __init__(self, A): 96 | super(ElemAbs, self).__init__([A]) 97 | self.A = A 98 | 99 | def _compute_value(self): 100 | return abs(self.A.value) 101 | 102 | def _local_grad(self, parent, d_out_d_self): 103 | if parent == 0: 104 | return d_out_d_self * np.sign(self.A.value) 105 | else: 106 | raise Exception("Not a parent of me") 107 | -------------------------------------------------------------------------------- /kayak/generic_ops.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from . import Differentiable 3 | 4 | class Blank(Differentiable): 5 | # Creates a custom kayak node on-the-fly with compute_grad and/or local_grad 6 | # functions passed in as arguments 7 | def __init__(self, args=[], compute_value=None, local_grad=None): 8 | super(Blank, self).__init__(args) 9 | self.compute_value_fun = compute_value 10 | self.local_grad_fun = local_grad 11 | 12 | def _compute_value(self): 13 | return self.compute_value_fun(self._parents) 14 | 15 | def _local_grad(self, parent, d_out_d_self): 16 | return self.local_grad_fun(self._parents, parent, d_out_d_self) 17 | -------------------------------------------------------------------------------- /kayak/indexing.py: -------------------------------------------------------------------------------- 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 2 | # http://hips.seas.harvard.edu 3 | # Ryan Adams, David Duvenaud, Scott Linderman, 4 | # Dougal Maclaurin, Jasper Snoek, and others 5 | # Copyright 2014, The President and Fellows of Harvard University 6 | # Distributed under an MIT license. See license.txt file. 7 | 8 | import numpy as np 9 | import numpy.random as npr 10 | 11 | from . import Differentiable 12 | 13 | class Take(Differentiable): 14 | __slots__ = ['X', '_inds', '_axis'] 15 | 16 | def __init__(self, X, inds, axis=1): 17 | super(Take, self).__init__([X]) 18 | 19 | self.X = X 20 | self._inds = inds 21 | self._axis = axis 22 | 23 | def _compute_value(self): 24 | slice_list = [slice(None), ] * self.X.value.ndim 25 | slice_list[self._axis] = self._inds 26 | return self.X.value[slice_list] 27 | 28 | def _local_grad(self, parent, d_out_d_self): 29 | result = np.zeros(self.X.shape) 30 | slice_list = [slice(None), ] * result.ndim 31 | slice_list[self._axis] = self._inds 32 | result[slice_list] = d_out_d_self 33 | return result 34 | -------------------------------------------------------------------------------- /kayak/input_checking.py: -------------------------------------------------------------------------------- 1 | from warnings import warn 2 | 3 | def check_equal_ndims_for_broadcasting(obj): 4 | ndims = [p.value.ndim for p in obj._parents] 5 | if not all([ndims[0] == ndims_other for ndims_other in ndims[1:]]): 6 | p_shapes = [p.shape for p in obj._parents] 7 | warn(("Broadcasting arrays with shapes %s " + 8 | "by prepending singleton dimensions.") % p_shapes, 9 | stacklevel=2) 10 | -------------------------------------------------------------------------------- /kayak/losses.py: -------------------------------------------------------------------------------- 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 2 | # http://hips.seas.harvard.edu 3 | # Ryan Adams, David Duvenaud, Scott Linderman, 4 | # Dougal Maclaurin, Jasper Snoek, and others 5 | # Copyright 2014, The President and Fellows of Harvard University 6 | # Distributed under an MIT license. See license.txt file. 7 | 8 | import numpy as np 9 | 10 | from input_checking import check_equal_ndims_for_broadcasting 11 | from . import Differentiable 12 | 13 | class Loss(Differentiable): 14 | __slots__ = ['preds', 'targs'] 15 | def __init__(self, predictions, targets): 16 | super(Loss, self).__init__((predictions, targets)) 17 | self.preds = predictions 18 | self.targs = targets 19 | 20 | _check_inputs = check_equal_ndims_for_broadcasting 21 | 22 | class L2Loss(Loss): 23 | __slots__ = ['axis', 'keepdims'] 24 | def __init__(self, predictions, targets, axis=None, keepdims=True): 25 | super(L2Loss, self).__init__(predictions, targets) 26 | self.axis = axis 27 | self.keepdims = keepdims 28 | 29 | def _compute_value(self): 30 | return np.sum((self.preds.value - self.targs.value)**2, 31 | axis=self.axis, keepdims=self.keepdims) 32 | 33 | def _local_grad(self, parent, d_out_d_self): 34 | assert parent is 0, "Shouldn't be taking derivative wrt targets" 35 | return 2 * (self.preds.value - self.targs.value) * d_out_d_self 36 | 37 | class LogMultinomialLoss(Loss): 38 | __slots__ = ['axis', 'keepdims'] 39 | def __init__(self, predictions, targets, axis=1, keepdims=True): 40 | # Predictions are log probabilities and targets are counts. 41 | super(LogMultinomialLoss, self).__init__(predictions, targets) 42 | self.axis = axis 43 | self.keepdims = keepdims 44 | 45 | def _compute_value(self): 46 | return -np.sum(self.targs.value * self.preds.value, 47 | axis=self.axis, keepdims=self.keepdims) 48 | 49 | def _local_grad(self, parent, d_out_d_self): 50 | return - d_out_d_self * self.targs.value 51 | -------------------------------------------------------------------------------- /kayak/matrix_ops.py: -------------------------------------------------------------------------------- 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 2 | # http://hips.seas.harvard.edu 3 | # Ryan Adams, David Duvenaud, Scott Linderman, 4 | # Dougal Maclaurin, Jasper Snoek, and others 5 | # Copyright 2014, The President and Fellows of Harvard University 6 | # Distributed under an MIT license. See license.txt file. 7 | 8 | import numpy as np 9 | import scipy.linalg as spla 10 | from . import Differentiable 11 | 12 | class MatMult(Differentiable): 13 | __slots__ = ['A', 'B'] 14 | def __init__(self, A, B, *args): 15 | # Recurse to handle lists of arguments. 16 | if len(args) > 0: 17 | B = MatMult(B, *args) 18 | super(MatMult, self).__init__((A, B)) 19 | self.A = A 20 | self.B = B 21 | 22 | def _compute_value(self): 23 | A_val, B_val = self.A.value, self.B.value 24 | if A_val.ndim > 2 or B_val.ndim > 2: 25 | raise Exception("Inputs of shape %s and %s are not matrices or vectors" % (self.A.shape)) 26 | if A_val.shape[-1] != B_val.shape[0]: 27 | raise Exception("Cannot multiply %s by %s matrices." % (self.A.shape, self.B.shape)) 28 | 29 | return np.dot(self.A.value, self.B.value) 30 | 31 | def _local_grad(self, parent, d_out_d_self): 32 | if parent == 0: 33 | B_val = self.B.value 34 | if B_val.ndim == 2: 35 | return np.dot(d_out_d_self, B_val.T) 36 | else: 37 | return np.outer(d_out_d_self, B_val) 38 | elif parent == 1: 39 | A_val = self.A.value 40 | if A_val.ndim ==2: 41 | return np.dot(A_val.T, d_out_d_self) 42 | else: 43 | return np.outer(A_val, d_out_d_self) 44 | else: 45 | raise Exception("Not a parent of me") 46 | 47 | class MatSum(Differentiable): 48 | __slots__ = ['A', 'axis', 'keepdims'] 49 | def __init__(self, A, axis=None, keepdims=True): 50 | super(MatSum, self).__init__((A,)) 51 | if axis is not None and type(axis) != int: 52 | raise Exception("Can only sum over one axis at a time.") 53 | self.A = A 54 | self.axis = axis 55 | self.keepdims = keepdims 56 | 57 | def _compute_value(self): 58 | return np.sum(self.A.value, axis=self.axis, keepdims=self.keepdims) 59 | 60 | def _local_grad(self, parent, d_out_d_self): 61 | # If self.keepdims == False then we need to 62 | # broadcast d_out_d_self along the summation axis 63 | if not self.keepdims and self.axis is not None: 64 | expanded_d_out_d_self = np.expand_dims(d_out_d_self, self.axis) 65 | return expanded_d_out_d_self * np.ones(self.A.shape) 66 | else: 67 | return d_out_d_self * np.ones(self.A.shape) 68 | 69 | class MatMean(Differentiable): 70 | __slots__ = ['A', 'axis', 'keepdims'] 71 | def __init__(self, A, axis=None, keepdims=True): 72 | super(MatMean, self).__init__((A,)) 73 | if axis is not None and type(axis) != int: 74 | raise Exception("Can only take the mean over one axis at a time.") 75 | self.A = A 76 | self.axis = axis 77 | self.keepdims = keepdims 78 | 79 | def _compute_value(self): 80 | return np.mean(self.A.value, axis=self.axis, keepdims=self.keepdims) 81 | 82 | def _local_grad(self, parent, d_out_d_self): 83 | # If self.keepdims == False then we need to 84 | # broadcast d_out_d_self along the summation axis 85 | N = float(self.A.value.size) if self.axis is None else float(self.A.shape[self.axis]) 86 | if not self.keepdims and self.axis is not None: 87 | expanded_d_out_d_self = np.expand_dims(d_out_d_self, self.axis) 88 | return expanded_d_out_d_self * 1.0/N * np.ones(self.A.shape) 89 | else: 90 | return d_out_d_self * 1.0/N * np.ones(self.A.shape) 91 | 92 | class MatAdd(Differentiable): 93 | __slots__ = [] 94 | def __init__(self, *args): 95 | super(MatAdd, self).__init__(args) 96 | 97 | def _compute_value(self): 98 | return sum([p.value for p in self._parents]) 99 | 100 | def _local_grad(self, parent, d_out_d_self): 101 | parent_shape = self._parents[parent].shape 102 | num_singletons = len(d_out_d_self.shape) - len(parent_shape) 103 | if num_singletons > 0: 104 | extra_singletons = tuple(range(num_singletons)) 105 | result = np.sum(d_out_d_self, axis=extra_singletons, keepdims=False) 106 | else: 107 | result = d_out_d_self 108 | 109 | assert len(result.shape) == len(parent_shape) 110 | original_singletons = tuple(np.where(np.array(parent_shape) == 1)[0]) 111 | return np.sum(result, axis=original_singletons, keepdims=True) 112 | 113 | class MatElemMult(Differentiable): 114 | """ 115 | Elementwise multiplication of two arrays of the same size. 116 | Note: This does not support broadcasting yet. Look at MatAdd for ideas. 117 | """ 118 | __slots__ = ['A', 'B'] 119 | def __init__(self, A, B, *args): 120 | # Recurse to handle lists of arguments. 121 | if len(args) > 0: 122 | B = MatElemMult(B, *args) 123 | 124 | super(MatElemMult, self).__init__((A,B)) 125 | 126 | self.A = A 127 | self.B = B 128 | 129 | def _compute_value(self): 130 | return self.A.value * self.B.value 131 | 132 | def _local_grad(self, parent, d_out_d_self): 133 | """ 134 | For element-wise multiplication d(A*B)/dA = d_out_d_self * B. 135 | However, to support broadcasting, we need to sum over the broadcast dimensions. 136 | For example, d(A*x)/dx, where A is a matrix and x is a scalar, is 137 | given by \sum_{d1} \ldots \sum_{dD} (d_out_d_self * A)[d1,...,dD] 138 | """ 139 | parent_shape = self._parents[parent].shape 140 | other_parent = 1 if parent == 0 else 0 141 | other_parent_value = self._parents[other_parent].value 142 | 143 | # Compute how many dimensions was parent broadcast along 144 | num_singletons = len(d_out_d_self.shape) - len(parent_shape) 145 | if num_singletons > 0: 146 | extra_singletons = tuple(range(num_singletons)) 147 | # Sum out the broadcast dimensions 148 | result = np.sum(d_out_d_self*other_parent_value, axis=extra_singletons, keepdims=False) 149 | else: 150 | result = d_out_d_self*other_parent_value 151 | 152 | # In mutliplying, we may have broadcast the parent. 153 | # Sum out those dimensions as well. 154 | assert len(result.shape) == len(parent_shape) 155 | original_singletons = tuple(np.where(np.array(parent_shape) == 1)[0]) 156 | return np.sum(result, axis=original_singletons, keepdims=True) 157 | 158 | class MatDet(Differentiable): 159 | __slots__ = ['A'] 160 | def __init__(self, A, axis=None, keepdims=True): 161 | super(MatDet, self).__init__((A,)) 162 | self.A = A 163 | 164 | def _compute_value(self): 165 | return np.linalg.det(self.A.value) 166 | 167 | def _local_grad(self, parent, d_out_d_self): 168 | det = self._compute_value() 169 | return d_out_d_self * det * np.linalg.inv(self.A.value).T 170 | 171 | class MatLogDet(Differentiable): 172 | pass 173 | 174 | class MatTrace(Differentiable): 175 | pass 176 | 177 | class Transpose(Differentiable): 178 | __slots__ = ['A', 'axes'] 179 | def __init__(self, A, axes=None): 180 | super(Transpose, self).__init__((A,)) 181 | self.A = A 182 | self.axes = axes 183 | 184 | def _compute_value(self): 185 | return np.transpose(self.A.value, axes=self.axes) 186 | 187 | def _local_grad(self, parent, d_out_d_self): 188 | if self.axes is None: 189 | return np.transpose(d_out_d_self) 190 | else: 191 | return np.transpose(d_out_d_self, axes=np.argsort(self.axes)) 192 | 193 | class Reshape(Differentiable): 194 | __slots__ = ['A', 'new_shape'] 195 | 196 | def __init__(self, A, new_shape): 197 | super(Reshape, self).__init__((A,)) 198 | self.A = A 199 | self.new_shape = new_shape 200 | 201 | def _compute_value(self): 202 | return np.reshape(self.A.value, self.new_shape) 203 | 204 | def _local_grad(self, parent, d_out_d_self): 205 | return np.reshape(d_out_d_self, self.A.shape) 206 | 207 | class Concatenate(Differentiable): 208 | __slots__ = ['axis'] 209 | def __init__(self, axis, *args): 210 | super(Concatenate, self).__init__(args) 211 | self.axis = axis 212 | 213 | def _compute_value(self): 214 | return np.concatenate([p.value for p in self._parents], axis=self.axis) 215 | 216 | def _local_grad(self, parent_ix, d_out_d_self): 217 | # Return the gradient only w.r.t. the matrix indexed by parent. 218 | start_ix = sum([p.shape[self.axis] for p in self._parents[0:parent_ix]]) 219 | end_ix = start_ix + self._parents[parent_ix].shape[self.axis] 220 | return index_along_axis(d_out_d_self, self.axis, start_ix, end_ix) 221 | 222 | class ListToArray(Differentiable): 223 | """Build an array out of a list of arrays by prepending a dimensions 224 | and concatenating.""" 225 | __slots__ = [] 226 | def __init__(self, *args): 227 | super(ListToArray, self).__init__(args) 228 | 229 | def _compute_value(self): 230 | return np.concatenate([p.value[None, :] for p in self._parents], axis=0) 231 | 232 | def _local_grad(self, parent_ix, d_out_d_self): 233 | return d_out_d_self[parent_ix, :] 234 | 235 | def index_along_axis(array, axis, start, end): 236 | """Return everything up to but not including end. 237 | 238 | For example: 239 | >>> index_along_axis(np.randn(10,20), 0, 10, 12).shape 240 | (2, 20) 241 | """ 242 | full_slice = [slice(None),] * array.ndim 243 | full_slice[axis] = slice(start,end) 244 | return array[full_slice] 245 | 246 | class TensorMult(Differentiable): 247 | __slots__ = ['axes'] 248 | def __init__(self, A, B, axes): 249 | super(TensorMult, self).__init__((A, B)) 250 | self.axes = axes 251 | 252 | def _compute_value(self): 253 | A = self._parents[0].value 254 | B = self._parents[1].value 255 | return np.tensordot(A, B, self.axes) 256 | 257 | def _local_grad(self, parent, d_out_d_self): 258 | diff = lambda A, B : [a for a in A if a not in B] 259 | rank = lambda L : list(np.argsort(np.argsort(L))) 260 | val = [p.value for p in self._parents] 261 | axes = self.axes 262 | n_axes = len(axes[0]) 263 | ignore_dims = [diff(range(val[i].ndim), axes[i]) for i in (0, 1)] 264 | ignore_ndims = [len(x) for x in ignore_dims] 265 | output_dims = (range(ignore_ndims[0]), 266 | range(ignore_ndims[0], ignore_ndims[0] + ignore_ndims[1])) 267 | X, Y = parent, 1 - parent 268 | wrong_order = np.tensordot(val[Y], d_out_d_self, (ignore_dims[Y], output_dims[Y])) 269 | permutation = [None] * val[X].ndim 270 | for final, cur in zip(list(axes[X]) + ignore_dims[X], 271 | rank(axes[Y]) + range(n_axes, val[X].ndim)): 272 | permutation[final] = cur 273 | 274 | return np.transpose(wrong_order, permutation) 275 | 276 | class Identity(Differentiable): 277 | __slots__ = [] 278 | def __init__(self, A): 279 | super(Identity, self).__init__((A,)) 280 | 281 | def _compute_value(self): 282 | return self._parents[0].value 283 | 284 | def _local_grad(self, parent_ix, d_out_d_self): 285 | return d_out_d_self 286 | -------------------------------------------------------------------------------- /kayak/nonlinearities.py: -------------------------------------------------------------------------------- 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 2 | # http://hips.seas.harvard.edu 3 | # Ryan Adams, David Duvenaud, Scott Linderman, 4 | # Dougal Maclaurin, Jasper Snoek, and others 5 | # Copyright 2014, The President and Fellows of Harvard University 6 | # Distributed under an MIT license. See license.txt file. 7 | 8 | import numpy as np 9 | from numpy import exp 10 | 11 | import util 12 | 13 | from . import Differentiable 14 | from kayak import EPSILON 15 | 16 | class Nonlinearity(Differentiable): 17 | __slots__ = ['X'] 18 | def __init__(self, X): 19 | super(Nonlinearity, self).__init__((X,)) 20 | self.X = X 21 | 22 | class SoftReLU(Nonlinearity): 23 | __slots__ = ['scale'] 24 | def __init__(self, X, scale=1.0): 25 | super(SoftReLU, self).__init__(X) 26 | self.scale = scale 27 | 28 | def _compute_value(self): 29 | # Somewhat complicated to handle overflow. 30 | X = self.X.value 31 | se = np.seterr(over='ignore') 32 | exp_X = np.exp(X / self.scale) 33 | result = np.log(1.0 + np.exp( X/self.scale ))*self.scale 34 | over = np.isinf(exp_X) 35 | result[over] = X[over]/self.scale 36 | return result 37 | 38 | def _local_grad(self, parent, d_out_d_self): 39 | return d_out_d_self/(1.0 + np.exp( - self.X.value/self.scale )) 40 | 41 | class HardReLU(Nonlinearity): 42 | __slots__ = [] 43 | def __init__(self, X): 44 | super(HardReLU, self).__init__(X) 45 | 46 | def _compute_value(self): 47 | return np.maximum(self.X.value, 0.0) 48 | 49 | def _local_grad(self, parent, d_out_d_self): 50 | return d_out_d_self * (self.X.value > 0) 51 | 52 | class TanH(Nonlinearity): 53 | __slots__ = [] 54 | def __init__(self, X): 55 | super(TanH, self).__init__(X) 56 | 57 | def _compute_value(self): 58 | return np.tanh(self.X.value) 59 | 60 | def _local_grad(self, parent, d_out_d_self): 61 | return d_out_d_self*(1.0 - np.tanh(self.X.value)**2) 62 | 63 | class Logistic(Nonlinearity): 64 | __slots__ = [] 65 | def __init__(self, X): 66 | super(Logistic, self).__init__(X) 67 | 68 | def _compute_value(self): 69 | return 1.0/(1.0 + np.exp(-self.X.value)) 70 | 71 | def _local_grad(self, parent, d_out_d_self): 72 | y = self.value 73 | return d_out_d_self * y * (1.0 - y) 74 | 75 | class LogSoftMax(Nonlinearity): 76 | __slots__ = ['axis'] 77 | def __init__(self, X, axis=1): 78 | super(LogSoftMax, self).__init__(X) 79 | self.axis = axis 80 | 81 | def _compute_value(self): 82 | X = self.X.value 83 | return X - util.logsumexp(X, axis=self.axis) 84 | 85 | def _local_grad(self, parent, d_out_d_self): 86 | return d_out_d_self - (np.exp(self.value) * np.sum(d_out_d_self, axis=self.axis, keepdims=True)) 87 | 88 | class SoftMax(Nonlinearity): 89 | __slots__ = ['axis'] 90 | def __init__(self, X, axis=1): 91 | super(SoftMax, self).__init__(X) 92 | self.axis = axis 93 | 94 | def _compute_value(self): 95 | X = self.X.value 96 | return np.exp(X - util.logsumexp(X, axis=self.axis)) 97 | 98 | def _local_grad(self, parent, d_out_d_self): 99 | val = self.value 100 | return val * (d_out_d_self - np.sum(val * d_out_d_self, axis=self.axis, keepdims=True)) 101 | 102 | class InputSoftMax(Nonlinearity): 103 | __slots__ = ['ncolors'] 104 | def __init__(self, X, ncolors=4): 105 | super(InputSoftMax, self).__init__(X) 106 | self.ncolors = ncolors 107 | 108 | def _compute_value(self): 109 | X = self.X.value 110 | A = np.reshape(X, (X.shape[0], self.ncolors, X.shape[1]//self.ncolors)) 111 | X = A 112 | return np.exp(X - util.logsumexp(X, axis=1)).reshape((self.X.shape)) 113 | 114 | def _local_grad(self, parent, d_out_d_self): 115 | X = self.X.value 116 | A = np.reshape(X, (X.shape[0], self.ncolors, X.shape[1]//self.ncolors)) 117 | val = self.value.reshape(A.shape) 118 | d_out_d_self = d_out_d_self.reshape(val.shape) 119 | return (val * (d_out_d_self - np.sum(val * d_out_d_self, axis=1, keepdims=True))).reshape((self.X.shape[0],-1)) 120 | 121 | class L2Normalize(Nonlinearity): 122 | __slots__ = ['axis'] 123 | def __init__(self, X, axis=1): 124 | super(L2Normalize, self).__init__(X) 125 | self.axis = axis 126 | assert np.all(X.value >= 0) 127 | 128 | def _compute_value(self): 129 | X = self.X.value 130 | lX = np.log(X + EPSILON) 131 | return np.exp(lX - 0.5*util.logsumexp(2*lX, axis=self.axis)) 132 | 133 | def _local_grad(self, parent, d_out_d_self): 134 | X = self.X.value + EPSILON 135 | val = self.value 136 | val2 = X / np.sum(X**2, axis=self.axis, keepdims=True) 137 | return val * (d_out_d_self / X - np.sum(val2 * d_out_d_self, axis=self.axis, keepdims=True)) 138 | 139 | class BatchNormalize(Nonlinearity): 140 | __slots__ = ['X'] 141 | def __init__(self, X): 142 | super(BatchNormalize, self).__init__(X) 143 | 144 | def _compute_value(self): 145 | X = self.X.value 146 | mu = np.mean(self.X.value, axis=0, keepdims=True) 147 | sig = np.mean((X - mu)**2, axis=0, keepdims=True) + 1e-6 148 | val = (X - mu) * sig**-0.5 149 | return val 150 | 151 | def _local_grad(self, parent, d_out_d_self): 152 | X = self.X.value 153 | mu = np.mean(self.X.value, axis=0, keepdims=True) 154 | diff = X - mu 155 | sig = np.mean(diff**2, axis=0, keepdims=True) + 1e-6 156 | invsqrtsig = sig**-0.5 157 | val = diff * invsqrtsig 158 | m = X.shape[0] 159 | 160 | dsig = np.sum(d_out_d_self*diff*(-0.5*sig**-(3./2.)), axis=0, keepdims=True) 161 | dmu = np.sum(d_out_d_self * -invsqrtsig, axis=0, keepdims=True) + dsig*np.mean(-2.0*diff, axis=0, keepdims=True) 162 | dx = d_out_d_self * invsqrtsig + dsig * 2.0 * diff/m + dmu/m 163 | return dx 164 | -------------------------------------------------------------------------------- /kayak/regularizers.py: -------------------------------------------------------------------------------- 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 2 | # http://hips.seas.harvard.edu 3 | # Ryan Adams, David Duvenaud, Scott Linderman, 4 | # Dougal Maclaurin, Jasper Snoek, and others 5 | # Copyright 2014, The President and Fellows of Harvard University 6 | # Distributed under an MIT license. See license.txt file. 7 | 8 | import numpy as np 9 | 10 | from . import Differentiable 11 | 12 | class Regularizer(Differentiable): 13 | __slots__ = ['X', 'weight'] 14 | def __init__(self, X, weight): 15 | super(Regularizer, self).__init__([X]) 16 | self.X = X 17 | self.weight = weight 18 | 19 | class L2Norm(Regularizer): 20 | __slots__ = [] 21 | def __init__(self, X, weight=1.0): 22 | super(L2Norm, self).__init__(X, weight) 23 | 24 | def _compute_value(self): 25 | return self.weight * np.sum(self.X.value**2) 26 | 27 | def _local_grad(self, parent, d_out_d_self): 28 | return self.weight * 2.0 * self.X.value * d_out_d_self 29 | 30 | class L1Norm(Regularizer): 31 | __slots__ = [] 32 | def __init__(self, X, weight=1.0): 33 | super(L1Norm, self).__init__(X, weight) 34 | 35 | def _compute_value(self): 36 | return self.weight * np.sum(np.abs(self.X.value)) 37 | 38 | def _local_grad(self, parent, d_out_d_self): 39 | return self.weight * np.sign(self.X.value) * d_out_d_self 40 | 41 | class Horseshoe(Regularizer): 42 | __slots__ = [] 43 | def __init__(self, X, weight=1.0): 44 | super(Horseshoe, self).__init__(X, weight) 45 | 46 | def _compute_value(self): 47 | return -self.weight * np.sum(np.log(np.log(1.0 + self.X.value**(-2)))) 48 | 49 | def _local_grad(self, parent, d_out_d_self): 50 | return -(self.weight * d_out_d_self * (1 / (np.log(1.0 + self.X.value**(-2)))) 51 | * (1.0/(1 + self.X.value**(-2))) * (-2*self.X.value**(-3))) 52 | 53 | class NExp(Regularizer): 54 | __slots__ = [] 55 | def __init__(self, X, weight=1.0): 56 | super(NExp, self).__init__(X, weight) 57 | 58 | def _compute_value(self): 59 | return self.weight * np.sum(1.0 - np.exp(-np.abs(self.X.value))) 60 | 61 | def _local_grad(self, parent, d_out_d_self): 62 | return self.weight * d_out_d_self * np.exp(-np.abs(self.X.value)) * np.sign(self.X.value) 63 | -------------------------------------------------------------------------------- /kayak/root_nodes.py: -------------------------------------------------------------------------------- 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 2 | # http://hips.seas.harvard.edu 3 | # Ryan Adams, David Duvenaud, Scott Linderman, 4 | # Dougal Maclaurin, Jasper Snoek, and others 5 | # Copyright 2014, The President and Fellows of Harvard University 6 | # Distributed under an MIT license. See license.txt file. 7 | 8 | import numpy as np 9 | from . import Differentiable 10 | 11 | class DataNode(Differentiable): 12 | __slots__ = ['_batcher', '_data','_children', '_value', '_grad', '_loss', '_parents'] 13 | def __init__(self, data, batcher=None): 14 | if batcher is None: 15 | super(DataNode, self).__init__([]) 16 | else: 17 | super(DataNode, self).__init__([batcher]) 18 | 19 | self._data = np.atleast_1d(data) 20 | self._batcher = batcher 21 | 22 | @property 23 | def data(self): 24 | return self._data 25 | 26 | @data.setter 27 | def data(self, new_data): 28 | self._data = new_data 29 | self._clear_value_cache() 30 | 31 | def _compute_value(self): 32 | if self._batcher is None: 33 | return self.data 34 | else: 35 | return self.data[self._batcher.value,...] 36 | 37 | def _local_grad(self, parent, d_out_d_self): 38 | raise Exception("Can't take gradient w.r.t. data") 39 | 40 | class Parameter(Differentiable): 41 | __slots__ = [] 42 | def __init__(self, val): 43 | super(Parameter, self).__init__([]) 44 | self.value = np.atleast_1d(val) 45 | 46 | def grad(self, other): 47 | return np.zeros(other.shape) 48 | 49 | def _compute_value(self): 50 | raise Exception("Shouldn't need this. Value should be cached") 51 | 52 | def _local_grad(self, parent, d_out_d_self): 53 | raise Exception("Shouldn't get here.") 54 | 55 | # These are just aliases 56 | Inputs = DataNode 57 | Targets = DataNode 58 | Constant = Parameter 59 | -------------------------------------------------------------------------------- /kayak/stacking.py: -------------------------------------------------------------------------------- 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 2 | # http://hips.seas.harvard.edu 3 | # Ryan Adams, David Duvenaud, Scott Linderman, 4 | # Dougal Maclaurin, Jasper Snoek, and others 5 | # Copyright 2014, The President and Fellows of Harvard University 6 | # Distributed under an MIT license. See license.txt file. 7 | 8 | import numpy as np 9 | import numpy.random as npr 10 | 11 | from . import Differentiable 12 | 13 | class Hstack(Differentiable): 14 | __slots__ = ['A', 'B'] 15 | 16 | def __init__(self, A, B): 17 | super(Hstack, self).__init__([A, B]) 18 | 19 | self.A = A 20 | self.B = B 21 | 22 | def _compute_value(self): 23 | return np.hstack((self.A.value, self.B.value)) 24 | 25 | def _local_grad(self, parent, d_out_d_self): 26 | if parent == 0: 27 | return d_out_d_self[:,:self.A.shape[1]] 28 | if parent == 1: 29 | return d_out_d_self[:,self.A.shape[1]:] 30 | -------------------------------------------------------------------------------- /kayak/util.py: -------------------------------------------------------------------------------- 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group 2 | # http://hips.seas.harvard.edu 3 | # Ryan Adams, David Duvenaud, Scott Linderman, 4 | # Dougal Maclaurin, Jasper Snoek, and others 5 | # Copyright 2014, The President and Fellows of Harvard University 6 | # Distributed under an MIT license. See license.txt file. 7 | 8 | import numpy as np 9 | import numpy.random as npr 10 | import itertools as it 11 | 12 | from . import EPSILON 13 | 14 | from root_nodes import Parameter 15 | 16 | def checkgrad(variable, output, epsilon=1e-4, verbose=False): 17 | if not isinstance(variable, Parameter): 18 | raise Exception("Cannot evaluate gradient in terms of non-Parameter type %s", (type(variable))) 19 | 20 | # Need to make sure all evals have the same random number generation. 21 | rng_seed = 1 22 | 23 | value = output.value 24 | an_grad = output.grad(variable) 25 | fd_grad = np.zeros(variable.shape) 26 | base_value = variable.value.copy() 27 | for in_dims in it.product(*map(range, variable.shape)): 28 | small_array = np.zeros(variable.shape) 29 | small_array[in_dims] = epsilon 30 | 31 | variable.value = base_value - 2*small_array 32 | fn_l2 = output.value 33 | variable.value = base_value - small_array 34 | fn_l1 = output.value 35 | variable.value = base_value + small_array 36 | fn_r1 = output.value 37 | variable.value = base_value + 2*small_array 38 | fn_r2 = output.value 39 | 40 | fd_grad[in_dims] = ((fn_l2 - fn_r2)/12. + (- fn_l1 + fn_r1)*2./3.) /epsilon # 2nd order method 41 | # fd_grad[in_dims] = (- fn_l1/2. + fn_r1/2.) /epsilon # 1st order method 42 | 43 | if verbose: 44 | print np.abs((an_grad[in_dims] - fd_grad[in_dims])/(fd_grad[in_dims]+EPSILON)), an_grad[in_dims], fd_grad[in_dims] 45 | 46 | variable.value = base_value 47 | print "Mean finite difference", np.mean(np.abs((an_grad - fd_grad)/(fd_grad+EPSILON))) 48 | return np.mean(np.abs((an_grad - fd_grad)/(fd_grad+EPSILON))) 49 | 50 | 51 | def logsumexp(X, axis=None): 52 | maxes = np.max(X, axis=axis, keepdims=True) 53 | return np.log(np.sum(np.exp(X - maxes), axis=axis, keepdims=True)) + maxes 54 | 55 | def onehot(T, num_labels=None): 56 | if num_labels is None: 57 | num_labels = np.max(T)+1 58 | labels = np.zeros((T.shape[0], num_labels), dtype=bool) 59 | labels[np.arange(T.shape[0], dtype=int), T] = 1 60 | return labels 61 | 62 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 by the President and Fellows of Harvard University 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | 4 | # Utility function to read the README file. 5 | # Used for the long_description. It's nice, because now 1) we have a top level 6 | # README file and 2) it's easier to type in the README file than to put a raw 7 | # string in below ... 8 | def read(fname): 9 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 10 | 11 | setup( 12 | name = "kayak", 13 | version = "0.1", 14 | author = "Ryan Adams, Dougal MacLaurin, Scott Linderman, Jasper Snoek, and David Duvenaud", 15 | author_email = "rpa@seas.harvard.edu, macLaurin@physics.harvard.edu, slinderman@seas.harvard.edu, jsnoek@seas.harvard.edu, dduvenaud@seas.harvard.edu", 16 | description = ("A package for automatic differentiation in deep learning models."), 17 | keywords = "automatic differentiation, deep learning, neural networks", 18 | packages=['kayak'], 19 | long_description=read('README.md'), 20 | ) -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | MAX_FLOAT_DIFF = 1e-9 4 | MAX_GRAD_DIFF = 1e-7 5 | NUM_TRIALS = 10 6 | 7 | def close_float(A, B): 8 | return np.abs(A-B) < MAX_FLOAT_DIFF 9 | -------------------------------------------------------------------------------- /tests/check_MemoryUse.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | from guppy import hpy 4 | import sys 5 | sys.path.append('..') 6 | import kayak 7 | 8 | def check_NodeMemory(): 9 | # Not a test. Useful for checking how much memory a node uses. 10 | np_A = npr.randn(5,6) 11 | A = kayak.Parameter(np_A) 12 | N = int(1e4) 13 | h = hpy() 14 | h.setref() 15 | for i in xrange(N): 16 | A = kayak.Identity(A) 17 | print "Created 10,000 objects" 18 | print h.heap() 19 | 20 | if __name__ == "__main__": 21 | check_NodeMemory() 22 | -------------------------------------------------------------------------------- /tests/test_BatchNormalize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_batchnorm_values_1(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | 13 | np_X = npr.randn(5,4) 14 | np_A = npr.randn(4,2) 15 | A = kayak.Parameter(np_A) 16 | X = kayak.Parameter(np_X) 17 | Y = kayak.BatchNormalize(X) 18 | J = kayak.TanH(kayak.MatMult(Y,A)) 19 | Z = kayak.MatSum(J) 20 | 21 | mu = np.mean(np_X, axis=0, keepdims=True) 22 | sig = np.mean((np_X - mu)**2, axis=0, keepdims=True) + 1e-6 23 | np_Y = (np_X - mu) / np.sqrt(sig) 24 | 25 | assert np.all(close_float(Y.value, np_Y)) 26 | assert kayak.util.checkgrad(X, Z, verbose=True) < MAX_GRAD_DIFF 27 | -------------------------------------------------------------------------------- /tests/test_Batcher.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_indices_1(): 9 | """Test with deterministic indices.""" 10 | 11 | for num_data in [1, 10, 100, 1000, 10000, 100000]: 12 | for batch_size in [1, 10, 11, 25, 50, 101, 500, 1000, 1011]: 13 | 14 | data_used = np.zeros((num_data,), dtype=bool) 15 | batcher = kayak.Batcher(batch_size, num_data) 16 | for batch in batcher: 17 | data_used[batch] = True 18 | 19 | assert np.all(data_used) 20 | 21 | def test_indices_2(): 22 | """Test with random seed.""" 23 | npr.seed(1) 24 | 25 | for num_data in [1, 10, 100, 1000, 10000, 100000]: 26 | for batch_size in [1, 10, 11, 25, 50, 101, 500, 1000, 1011]: 27 | 28 | data_used = np.zeros((num_data,), dtype=bool) 29 | batcher = kayak.Batcher(batch_size, num_data, random_batches=True) 30 | for batch in batcher: 31 | data_used[batch] = True 32 | 33 | assert np.all(data_used) 34 | 35 | def test_reset(): 36 | """Test resetting.""" 37 | 38 | for num_data in [1000, 10000, 100000]: 39 | for batch_size in [1, 10, 11, 25, 50, 101, 500]: 40 | 41 | batcher = kayak.Batcher(batch_size, num_data) 42 | 43 | # Start the batcher forward. 44 | batcher.next() 45 | 46 | # Now reset. 47 | batcher.reset() 48 | 49 | # Make sure we touch all of the data. 50 | data_used = np.zeros((num_data,), dtype=bool) 51 | for batch in batcher: 52 | data_used[batch] = True 53 | 54 | assert np.all(data_used) 55 | 56 | def test_batcher_updates_value(): 57 | batcher = kayak.Batcher(12, 20) 58 | data = npr.randn(20, 7) 59 | X = kayak.Inputs(data, batcher) 60 | for i, batch in enumerate(batcher): 61 | if i == 0: 62 | assert np.all(X.value == data[:12, :]) 63 | elif i == 1: 64 | assert np.all(X.value == data[12:, :]) 65 | else: 66 | assert False 67 | 68 | batcher.test_mode() 69 | assert np.all(X.value == data) 70 | 71 | def test_batcher_updates_dropout(): 72 | batcher = kayak.Batcher(5, 10) 73 | X = kayak.Inputs(np.random.randn(10,10)) 74 | Y = kayak.Dropout(X, batcher=batcher) 75 | val1 = Y.value 76 | batcher.next() 77 | val2 = Y.value 78 | assert not np.all(val1 == val2) 79 | 80 | def test_batcher_can_reinstate_dropout_mask(): 81 | batcher = kayak.Batcher(5, 10) 82 | X = kayak.Inputs(np.ones((10,10))) 83 | Y = kayak.Dropout(X, batcher=batcher) 84 | assert not np.all(Y.value == np.ones((10, 10))) 85 | batcher.test_mode() 86 | print "Y value", Y.value 87 | assert np.all(Y.value == np.ones((10, 10))) 88 | -------------------------------------------------------------------------------- /tests/test_CacheFreshness.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | # def test_MatSum_clears_cache(): 9 | # X = kayak.Inputs(np.array([[1, 2, 3], [2, 3, 4]])) 10 | # Y = kayak. 11 | 12 | def test_batcher_clears_value_cache(): 13 | batcher = kayak.Batcher(1, 2) 14 | X = kayak.Inputs(np.array([[1, 2, 3], [2, 3, 4]]), batcher) 15 | Y = kayak.MatSum(X) 16 | correct_vals = [6, 9] 17 | for ii, batch in enumerate(batcher): 18 | assert Y.value == correct_vals[ii] 19 | 20 | def test_batcher_clears_shape_cache(): 21 | batcher = kayak.Batcher(2, 3) 22 | X = kayak.Inputs(np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]]), batcher) 23 | Y = kayak.MatSum(X, axis=1) 24 | correct_shapes = [(2, 1), (1, 1)] 25 | for ii, batch in enumerate(batcher): 26 | assert Y.shape == correct_shapes[ii] 27 | 28 | def test_dropout_clears_value_cache(): 29 | X = kayak.Inputs(np.random.randn(10,10)) 30 | Y = kayak.Dropout(X) 31 | Z = kayak.MatSum(Y, axis=1) 32 | val1 = Z.value 33 | Y.draw_new_mask() 34 | val2 = Z.value 35 | assert not np.all(val1 == val2) 36 | assert np.all(Z.value == Z.value) 37 | 38 | def test_data_update_clears_value_cache(): 39 | X = kayak.Inputs(np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]])) 40 | assert np.all(X.value == [[1, 2, 3], [2, 3, 4], [3, 4, 5]]) 41 | X.data = [1,2] 42 | assert X._value is None 43 | assert np.all(X.value == [1, 2]) 44 | 45 | def test_param_change_clears_value_cache(): 46 | pass 47 | 48 | def test_param_change_clears_grad_cache(): 49 | pass 50 | -------------------------------------------------------------------------------- /tests/test_Constant.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_constant_scalar(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | 13 | np_X = npr.randn() 14 | X = kayak.Constant(np_X) 15 | 16 | assert close_float(X.value, np_X) 17 | 18 | def test_constant_vector(): 19 | npr.seed(1) 20 | 21 | for ii in xrange(NUM_TRIALS): 22 | 23 | np_X = npr.randn(10) 24 | X = kayak.Constant(np_X) 25 | 26 | assert np.all(close_float(X.value, np_X)) 27 | 28 | def test_constant_matrix(): 29 | npr.seed(1) 30 | 31 | for ii in xrange(NUM_TRIALS): 32 | 33 | np_X = npr.randn(10,20) 34 | X = kayak.Constant(np_X) 35 | 36 | assert np.all(close_float(X.value, np_X)) 37 | 38 | -------------------------------------------------------------------------------- /tests/test_Convolve1d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | from nose.tools import assert_equals, assert_less 8 | 9 | def test_convolve1d_1(): 10 | npr.seed(3) 11 | 12 | for ii in xrange(NUM_TRIALS): 13 | 14 | np_A = npr.randn(5,6) 15 | np_B = npr.randn(6,7) 16 | A = kayak.Parameter(np_A) 17 | B = kayak.Parameter(np_B) 18 | C = kayak.Convolve1d(A, B, ncolors=1) 19 | 20 | # If the filters are the same size as the data 21 | assert C.value.shape == (5,7) 22 | 23 | def test_convolve1d_2(): 24 | npr.seed(3) 25 | 26 | for ii in xrange(NUM_TRIALS): 27 | 28 | np_A = npr.randn(5,20) 29 | np_B = npr.randn(6,4) 30 | A = kayak.Parameter(np_A) 31 | B = kayak.Parameter(np_B) 32 | C = kayak.Convolve1d(A, B, ncolors=1) 33 | 34 | assert_equals(C.value.shape, (5,(20-6+1)*4)) 35 | 36 | def test_convolve1d_3(): 37 | npr.seed(3) 38 | 39 | for ii in xrange(NUM_TRIALS): 40 | 41 | np_A = npr.randn(5,50) 42 | np_B = npr.randn(6*5,4) 43 | A = kayak.Parameter(np_A) 44 | B = kayak.Parameter(np_B) 45 | C = kayak.Convolve1d(A, B, ncolors=5) 46 | 47 | assert_equals(C.value.shape, (5,(10-6+1)*4)) 48 | 49 | def test_convolve1d_grad_1(): 50 | npr.seed(3) 51 | 52 | for ii in xrange(NUM_TRIALS): 53 | 54 | np_A = npr.randn(5,6) 55 | np_B = npr.randn(6,7) 56 | A = kayak.Parameter(np_A) 57 | B = kayak.Parameter(np_B) 58 | C = kayak.Convolve1d(A, B) 59 | D = kayak.MatSum(C) 60 | 61 | D.value 62 | assert_equals(D.grad(A).shape, (5,6)) 63 | assert_equals(D.grad(B).shape, (6,7)) 64 | assert_less(kayak.util.checkgrad(A, D), MAX_GRAD_DIFF) 65 | assert_less(kayak.util.checkgrad(B, D), MAX_GRAD_DIFF) 66 | 67 | def test_pool_1(): 68 | npr.seed(3) 69 | 70 | for ii in xrange(NUM_TRIALS): 71 | 72 | np_A = npr.randn(5,6) 73 | A = kayak.Parameter(np_A) 74 | B = kayak.Pool(A, width=2) 75 | C = kayak.MatSum(B) 76 | 77 | C.value 78 | assert_equals(C.grad(A).shape, (5,6)) 79 | assert_equals(C.grad(B).shape, (5,3)) 80 | assert_less(kayak.util.checkgrad(A, C), MAX_GRAD_DIFF) 81 | 82 | def test_pool_2(): 83 | npr.seed(3) 84 | 85 | for ii in xrange(NUM_TRIALS): 86 | 87 | np_A = npr.randn(5, 6*4) 88 | A = kayak.Parameter(np_A) 89 | B = kayak.Pool(A, width=2, ncolors=4) 90 | C = kayak.MatSum(B) 91 | 92 | C.value 93 | assert_equals(C.grad(A).shape, (5, 6*4)) 94 | assert_equals(C.grad(B).shape, (5, 12)) 95 | assert_equals(B.shape, (5, 12)) 96 | assert_less(kayak.util.checkgrad(A, C), MAX_GRAD_DIFF) 97 | 98 | def test_pool_offwidth_1(): 99 | npr.seed(3) 100 | 101 | for ii in xrange(NUM_TRIALS): 102 | 103 | np_A = npr.randn(5,7) 104 | A = kayak.Parameter(np_A) 105 | B = kayak.Pool(A, width=3) 106 | C = kayak.MatSum(B) 107 | 108 | C.value 109 | assert_equals(C.grad(A).shape, (5,7)) 110 | assert_equals(C.grad(B).shape, (5,3)) 111 | assert_less(kayak.util.checkgrad(A, C), MAX_GRAD_DIFF) 112 | 113 | def test_pool_offwidth_2(): 114 | npr.seed(3) 115 | 116 | for ii in xrange(NUM_TRIALS): 117 | 118 | np_A = npr.randn(5, 7*4) 119 | A = kayak.Parameter(np_A) 120 | B = kayak.Pool(A, width=3, ncolors=4) 121 | C = kayak.MatSum(B) 122 | 123 | C.value 124 | assert_equals(C.grad(A).shape, (5, 7*4)) 125 | assert_equals(C.grad(B).shape, (5, 12)) 126 | assert_equals(B.shape, (5, 12)) 127 | assert_less(kayak.util.checkgrad(A, C), MAX_GRAD_DIFF) 128 | 129 | def test_topkpool_1(): 130 | npr.seed(3) 131 | 132 | for ii in xrange(NUM_TRIALS): 133 | 134 | np_A = npr.randn(5,9) 135 | A = kayak.Parameter(np_A) 136 | B = kayak.TopKPool(A, k=5) 137 | C = kayak.MatSum(B) 138 | 139 | C.value 140 | assert_equals(C.grad(A).shape, (5,9)) 141 | assert_equals(C.grad(B).shape, (5,5)) 142 | assert_less(kayak.util.checkgrad(A, C), MAX_GRAD_DIFF) 143 | 144 | def test_convolve1d_grad_2(): 145 | npr.seed(3) 146 | 147 | for ii in xrange(NUM_TRIALS): 148 | 149 | np_A = npr.randn(5,50) 150 | np_B = npr.randn(6,7) 151 | A = kayak.Parameter(np_A) 152 | B = kayak.Parameter(np_B) 153 | C = kayak.Convolve1d(A, B) 154 | D = kayak.MatSum(C) 155 | 156 | D.value 157 | assert_equals(D.grad(A).shape, (5,50)) 158 | assert_equals(D.grad(B).shape, (6,7)) 159 | assert_less(kayak.util.checkgrad(A, D), MAX_GRAD_DIFF) 160 | assert_less(kayak.util.checkgrad(B, D), MAX_GRAD_DIFF) 161 | 162 | def test_convolve1d_grad_2(): 163 | npr.seed(3) 164 | 165 | for ii in xrange(NUM_TRIALS): 166 | 167 | np_A = npr.randn(5,50) 168 | np_B = npr.randn(6*5,4) 169 | A = kayak.Parameter(np_A) 170 | B = kayak.Parameter(np_B) 171 | C = kayak.Convolve1d(A, B, ncolors=5) 172 | D = kayak.MatSum(C) 173 | 174 | D.value 175 | assert_equals(D.grad(A).shape, (5,50)) 176 | assert_equals(D.grad(B).shape, (6*5,4)) 177 | assert_less(kayak.util.checkgrad(A, D), MAX_GRAD_DIFF) 178 | assert_less(kayak.util.checkgrad(B, D), MAX_GRAD_DIFF) -------------------------------------------------------------------------------- /tests/test_Dropout.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_nondropout_values(): 9 | npr.seed(1) 10 | # First sanity check: don't actually drop anything out. 11 | # Make sure we get everything back. 12 | 13 | np_X = npr.randn(10,20) 14 | X = kayak.Parameter(np_X) 15 | Y = kayak.Dropout(X, drop_prob=0.0) 16 | 17 | assert np.all(close_float(Y.value, np_X)) 18 | 19 | def test_alldropout_values(): 20 | npr.seed(2) 21 | # Drop everything out. 22 | 23 | np_X = npr.randn(10,20) 24 | X = kayak.Parameter(np_X) 25 | Y = kayak.Dropout(X, drop_prob=1.0) 26 | 27 | assert np.all(Y.value == 0.0) 28 | 29 | def test_dropout_values(): 30 | # Drop some things out. 31 | npr.seed(3) 32 | 33 | for ii in xrange(NUM_TRIALS): 34 | prob = npr.rand() 35 | scale = 1.0 / (1.0 - prob) 36 | 37 | np_X = npr.randn(5,6) 38 | X = kayak.Parameter(np_X) 39 | Y = kayak.Dropout(X, drop_prob=prob) 40 | 41 | Y.value 42 | 43 | assert np.all(np.logical_xor(Y.value == 0.0, close_float(Y.value, scale*np_X))) 44 | 45 | def test_nondropout_grad(): 46 | npr.seed(4) 47 | 48 | np_X = npr.randn(10,20) 49 | X = kayak.Parameter(np_X) 50 | Y = kayak.Dropout(X, drop_prob=0.0) 51 | Z = kayak.MatSum(Y) 52 | 53 | Z.value 54 | assert Z.grad(X).shape == np_X.shape 55 | assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF 56 | 57 | def test_alldropout_grad(): 58 | npr.seed(5) 59 | 60 | np_X = npr.randn(10,20) 61 | X = kayak.Parameter(np_X) 62 | Y = kayak.Dropout(X, drop_prob=1.0) 63 | Z = kayak.MatSum(Y) 64 | 65 | Z.value 66 | assert Z.grad(X).shape == np_X.shape 67 | assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF 68 | 69 | def test_dropout_grad(): 70 | # Drop some things out. 71 | npr.seed(6) 72 | 73 | for ii in xrange(NUM_TRIALS): 74 | prob = npr.rand() 75 | scale = 1.0 / (1.0 - prob) 76 | 77 | np_X = npr.randn(5,6) 78 | X = kayak.Parameter(np_X) 79 | Y = kayak.Dropout(X, drop_prob=prob) 80 | Z = kayak.MatSum(Y) 81 | 82 | Z.value 83 | assert Z.grad(X).shape == np_X.shape 84 | assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF 85 | 86 | -------------------------------------------------------------------------------- /tests/test_ElemAbs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_elemabs_values_1(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | 13 | np_A = npr.randn(5,6) 14 | A = kayak.Parameter(np_A) 15 | C = kayak.ElemAbs(A) 16 | 17 | assert C.shape == np_A.shape 18 | assert np.all( close_float(C.value, abs(np_A))) 19 | 20 | def test_elemabs_values_2(): 21 | npr.seed(2) 22 | 23 | for ii in xrange(NUM_TRIALS): 24 | 25 | # Only nonnegative values allowed 26 | np_A = -np.log(npr.rand(1)) 27 | A = kayak.Parameter(np_A) 28 | D = kayak.ElemAbs(A) 29 | 30 | assert D.shape == np_A.shape 31 | assert np.all( close_float(D.value, abs(np_A))) 32 | 33 | def test_elemabs_grad_1(): 34 | npr.seed(3) 35 | 36 | for ii in xrange(NUM_TRIALS): 37 | 38 | np_A = npr.randn(5,6) 39 | 40 | A = kayak.Parameter(np_A) 41 | C = kayak.ElemAbs(A) 42 | D = kayak.MatSum(C) 43 | 44 | D.value 45 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 46 | 47 | def test_elemabs_grad_2(): 48 | npr.seed(9) 49 | 50 | for ii in xrange(NUM_TRIALS): 51 | 52 | np_A = npr.randn(1) 53 | A = kayak.Parameter(np_A) 54 | D = kayak.ElemAbs(A) 55 | E = kayak.MatSum(D) 56 | 57 | E.value 58 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 59 | -------------------------------------------------------------------------------- /tests/test_ElemExp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_elemexp_values_1(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | 13 | np_A = npr.randn(5,6) 14 | A = kayak.Parameter(np_A) 15 | C = kayak.ElemExp(A) 16 | 17 | assert C.shape == np_A.shape 18 | assert np.all( close_float(C.value, np.exp(np_A))) 19 | 20 | def test_elemexp_values_2(): 21 | npr.seed(2) 22 | 23 | for ii in xrange(NUM_TRIALS): 24 | 25 | np_A = npr.randn(1) 26 | A = kayak.Parameter(np_A) 27 | D = kayak.ElemExp(A) 28 | 29 | assert D.shape == np_A.shape 30 | assert np.all( close_float(D.value, np.exp(np_A))) 31 | 32 | def test_elemexp_grad_1(): 33 | npr.seed(8) 34 | 35 | for ii in xrange(NUM_TRIALS): 36 | 37 | np_A = npr.randn(5,6) 38 | A = kayak.Parameter(np_A) 39 | C = kayak.ElemExp(A) 40 | D = kayak.MatSum(C) 41 | 42 | D.value 43 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 44 | 45 | def test_elemexp_grad_2(): 46 | npr.seed(9) 47 | 48 | for ii in xrange(NUM_TRIALS): 49 | 50 | np_A = npr.randn(1) 51 | A = kayak.Parameter(np_A) 52 | D = kayak.ElemExp(A) 53 | E = kayak.MatSum(D) 54 | 55 | E.value 56 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 57 | -------------------------------------------------------------------------------- /tests/test_ElemMult.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_elemmult_values_1(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | 13 | np_A = npr.randn(5,6) 14 | np_B = npr.randn(5,6) 15 | A = kayak.Parameter(np_A) 16 | B = kayak.Parameter(np_B) 17 | C = kayak.ElemMult(A, B) 18 | 19 | assert C.shape == np_A.shape 20 | assert np.all( close_float(C.value, np_A*np_B)) 21 | 22 | def test_elemmult_values_2(): 23 | npr.seed(2) 24 | 25 | for ii in xrange(NUM_TRIALS): 26 | 27 | np_A = npr.randn(5,6) 28 | np_B = npr.randn(5,6) 29 | np_C = npr.randn(5,6) 30 | A = kayak.Parameter(np_A) 31 | B = kayak.Parameter(np_B) 32 | C = kayak.Parameter(np_C) 33 | D = kayak.ElemMult(A, B, C) 34 | 35 | assert D.shape == np_A.shape 36 | assert np.all( close_float(D.value, np_A*np_B*np_C)) 37 | 38 | def test_elemmult_values_3(): 39 | npr.seed(7) 40 | 41 | for ii in xrange(NUM_TRIALS): 42 | 43 | np_A = npr.randn(5,6) 44 | np_B = npr.randn(5,6) 45 | A = kayak.Parameter(np_A) 46 | B = kayak.Parameter(np_B) 47 | D = kayak.ElemMult(A, B, A) 48 | 49 | assert D.shape == (5,6) 50 | assert np.all( close_float(D.value, np_A**2 * np_B)) 51 | 52 | def test_elemmult_values_4(): 53 | npr.seed(1) 54 | 55 | for ii in xrange(NUM_TRIALS): 56 | 57 | np_A = npr.randn(5,1) 58 | np_B = npr.randn(5,6) 59 | A = kayak.Parameter(np_A) 60 | B = kayak.Parameter(np_B) 61 | C = kayak.ElemMult(A, B) 62 | 63 | assert C.shape == (5,6) 64 | assert np.all( close_float(C.value, np_A*np_B)) 65 | 66 | def test_elemmult_values_5(): 67 | npr.seed(2) 68 | 69 | for ii in xrange(NUM_TRIALS): 70 | 71 | np_A = npr.randn(5,1) 72 | np_B = npr.randn(1,6) 73 | np_C = npr.randn(1,1) 74 | A = kayak.Parameter(np_A) 75 | B = kayak.Parameter(np_B) 76 | C = kayak.Parameter(np_C) 77 | D = kayak.ElemMult(A, B, C) 78 | 79 | assert D.shape == (5,6) 80 | assert np.all( close_float(D.value, np_A*np_B*np_C)) 81 | 82 | def test_elemmult_values_6(): 83 | npr.seed(7) 84 | 85 | for ii in xrange(NUM_TRIALS): 86 | 87 | np_A = npr.randn(1,1) 88 | np_B = npr.randn(5,6) 89 | A = kayak.Parameter(np_A) 90 | B = kayak.Parameter(np_B) 91 | D = kayak.ElemMult(A, B, A) 92 | 93 | assert D.shape == (5,6) 94 | assert np.all( close_float(D.value, np_A**2 * np_B)) 95 | 96 | 97 | def test_elemmult_grad_1(): 98 | npr.seed(8) 99 | 100 | for ii in xrange(NUM_TRIALS): 101 | 102 | np_A = npr.randn(5,6) 103 | np_B = npr.randn(5,6) 104 | A = kayak.Parameter(np_A) 105 | B = kayak.Parameter(np_B) 106 | C = kayak.ElemMult(A, B) 107 | D = kayak.MatSum(C) 108 | 109 | D.value 110 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 111 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 112 | 113 | def test_elemmult_grad_2(): 114 | npr.seed(9) 115 | 116 | for ii in xrange(NUM_TRIALS): 117 | 118 | np_A = npr.randn(5,6) 119 | np_B = npr.randn(5,6) 120 | np_C = npr.randn(5,6) 121 | A = kayak.Parameter(np_A) 122 | B = kayak.Parameter(np_B) 123 | C = kayak.Parameter(np_C) 124 | D = kayak.ElemMult(A, B, C) 125 | E = kayak.MatSum(D) 126 | 127 | E.value 128 | assert E.grad(A).shape == np_A.shape 129 | assert E.grad(B).shape == np_B.shape 130 | assert E.grad(C).shape == np_C.shape 131 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 132 | assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF 133 | assert kayak.util.checkgrad(C, E) < MAX_GRAD_DIFF 134 | 135 | def test_elemmult_grad_3(): 136 | npr.seed(14) 137 | 138 | for ii in xrange(NUM_TRIALS): 139 | 140 | np_A = npr.randn(5,6) 141 | np_B = npr.randn(5,6) 142 | A = kayak.Parameter(np_A) 143 | B = kayak.Parameter(np_B) 144 | D = kayak.ElemMult(A, B, A) 145 | E = kayak.MatSum(D) 146 | 147 | E.value 148 | assert E.grad(A).shape == np_A.shape 149 | assert E.grad(B).shape == np_B.shape 150 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 151 | assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF 152 | 153 | def test_elemmult_grad_4(): 154 | npr.seed(15) 155 | 156 | for ii in xrange(NUM_TRIALS): 157 | 158 | np_A = npr.randn(5,6) 159 | A = kayak.Parameter(np_A) 160 | D = kayak.ElemMult(A, A) 161 | E = kayak.MatSum(D) 162 | 163 | E.value 164 | assert E.grad(A).shape == np_A.shape 165 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 166 | 167 | def test_elemmult_grad_5(): 168 | npr.seed(8) 169 | 170 | for ii in xrange(NUM_TRIALS): 171 | 172 | np_A = npr.randn(5,1) 173 | np_B = npr.randn(5,6) 174 | A = kayak.Parameter(np_A) 175 | B = kayak.Parameter(np_B) 176 | C = kayak.ElemMult(A, B) 177 | D = kayak.MatSum(C) 178 | 179 | D.value 180 | assert D.grad(A).shape == np_A.shape 181 | assert D.grad(B).shape == np_B.shape 182 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 183 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 184 | 185 | def test_elemmult_grad_6(): 186 | npr.seed(9) 187 | 188 | for ii in xrange(NUM_TRIALS): 189 | 190 | np_A = npr.randn(5,1) 191 | np_B = npr.randn(1,6) 192 | np_C = npr.randn(1,1) 193 | A = kayak.Parameter(np_A) 194 | B = kayak.Parameter(np_B) 195 | C = kayak.Parameter(np_C) 196 | D = kayak.ElemMult(A, B, C) 197 | E = kayak.MatSum(D) 198 | 199 | E.value 200 | assert E.grad(A).shape == np_A.shape 201 | assert E.grad(B).shape == np_B.shape 202 | assert E.grad(C).shape == np_C.shape 203 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 204 | assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF 205 | assert kayak.util.checkgrad(C, E) < MAX_GRAD_DIFF 206 | 207 | def test_elemmult_grad_7(): 208 | npr.seed(14) 209 | 210 | for ii in xrange(NUM_TRIALS): 211 | 212 | np_A = npr.randn(5,6) 213 | np_B = npr.randn(1,1) 214 | A = kayak.Parameter(np_A) 215 | B = kayak.Parameter(np_B) 216 | D = kayak.ElemMult(A, B, A) 217 | E = kayak.MatSum(D) 218 | 219 | E.value 220 | assert E.grad(A).shape == np_A.shape 221 | assert E.grad(B).shape == np_B.shape 222 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 223 | assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF 224 | 225 | def test_elemmult_grad_8(): 226 | npr.seed(15) 227 | 228 | for ii in xrange(NUM_TRIALS): 229 | 230 | np_A = npr.randn(5,6) 231 | A = kayak.Parameter(np_A) 232 | D = kayak.ElemMult(A, A) 233 | E = kayak.MatSum(D) 234 | 235 | assert E.grad(A).shape == np_A.shape 236 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 237 | -------------------------------------------------------------------------------- /tests/test_ElemPower.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_elempower_values_1(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | 13 | np_A = npr.randn(5,6) 14 | A = kayak.Parameter(np_A) 15 | C = kayak.ElemPower(A, 2) 16 | 17 | assert C.shape == np_A.shape 18 | assert np.all( close_float(C.value, np.power(np_A, 2))) 19 | 20 | def test_elempower_values_2(): 21 | npr.seed(2) 22 | 23 | for ii in xrange(NUM_TRIALS): 24 | 25 | # Only nonnegative values allowed 26 | np_A = -np.log(npr.rand(1)) 27 | A = kayak.Parameter(np_A) 28 | D = kayak.ElemPower(A, 0.5) 29 | 30 | assert D.shape == np_A.shape 31 | assert np.all( close_float(D.value, np.power(np_A, 0.5))) 32 | 33 | def test_elempower_values_3(): 34 | npr.seed(1) 35 | 36 | for ii in xrange(NUM_TRIALS): 37 | 38 | np_A = npr.randn(5,6) 39 | A = kayak.Parameter(np_A) 40 | C = kayak.ElemPower(A, -1) 41 | 42 | assert C.shape == np_A.shape 43 | assert np.all( close_float(C.value, np.power(np_A, -1))) 44 | 45 | def test_elempower_values_4(): 46 | npr.seed(2) 47 | 48 | for ii in xrange(NUM_TRIALS): 49 | 50 | np_A = npr.randn(1) 51 | A = kayak.Parameter(np_A) 52 | D = kayak.ElemPower(A, 3.) 53 | 54 | assert D.shape == np_A.shape 55 | assert np.all( close_float(D.value, np.power(np_A, 3.))) 56 | 57 | def test_elempower_grad_1(): 58 | npr.seed(3) 59 | 60 | for ii in xrange(NUM_TRIALS): 61 | 62 | np_A = npr.randn(5,6) 63 | 64 | # Avoid small values where the inverse is unstable 65 | err = np.where(abs(np_A) < 1e-2) 66 | np_A[err] = 1e-2 * np.sign(np_A[err]) 67 | 68 | A = kayak.Parameter(np_A) 69 | C = kayak.ElemPower(A, -1) 70 | D = kayak.MatSum(C) 71 | 72 | D.value 73 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 74 | 75 | def test_elempower_grad_2(): 76 | npr.seed(9) 77 | 78 | for ii in xrange(NUM_TRIALS): 79 | 80 | np_A = npr.randn(1) 81 | A = kayak.Parameter(np_A) 82 | D = kayak.ElemPower(A, 2) 83 | E = kayak.MatSum(D) 84 | 85 | E.value 86 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 87 | -------------------------------------------------------------------------------- /tests/test_Graphs.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import numpy.random as npr 4 | 5 | import kayak 6 | 7 | from . import * 8 | 9 | def test_graph_simple(): 10 | npr.seed(1) 11 | 12 | N = 1 13 | D = 1 14 | H1 = 1 15 | 16 | X = kayak.Inputs(npr.randn(N,D)) 17 | W1 = kayak.Parameter(npr.randn(D,H1)) 18 | U3 = kayak.MatMult(W1, X) 19 | 20 | out = U3 21 | 22 | print "Value: ", out.value 23 | print "Gradient: ", out.grad(W1) 24 | print "Grad error: ", kayak.util.checkgrad(W1, out) 25 | assert kayak.util.checkgrad(W1, out) < MAX_GRAD_DIFF 26 | 27 | def test_graph_chain(): 28 | npr.seed(1) 29 | 30 | N = 10 31 | D = 5 32 | H1 = 6 33 | H2 = 7 34 | 35 | X = kayak.Inputs(npr.randn(N,D)) 36 | W1 = kayak.Parameter(npr.randn(D,H1)) 37 | W2 = kayak.Parameter(npr.randn(H1,H2)) 38 | W3 = kayak.Parameter(npr.randn(H2,1)) 39 | 40 | U1 = kayak.SoftReLU(kayak.MatMult(X, W1)) 41 | U2 = kayak.SoftReLU(kayak.MatMult(U1, W2)) 42 | U3 = kayak.SoftReLU(kayak.MatMult(U2, W3)) 43 | 44 | out = kayak.MatSum(U3) 45 | 46 | out.value 47 | assert kayak.util.checkgrad(W1, out) < MAX_GRAD_DIFF 48 | assert kayak.util.checkgrad(W2, out) < MAX_GRAD_DIFF 49 | assert kayak.util.checkgrad(W3, out) < MAX_GRAD_DIFF 50 | 51 | def test_graph_diamond(): 52 | npr.seed(2) 53 | 54 | N = 10 55 | D = 5 56 | H1 = 6 57 | H2 = 7 58 | 59 | X = kayak.Inputs(npr.randn(N,D)) 60 | W1 = kayak.Parameter(npr.randn(D,H1)) 61 | W2a = kayak.Parameter(npr.randn(H1,H2)) 62 | W2b = kayak.Parameter(npr.randn(H1,H2)) 63 | W3 = kayak.Parameter(npr.randn(H2,1)) 64 | 65 | U1 = kayak.SoftReLU(kayak.MatMult(X, W1)) 66 | U2a = kayak.SoftReLU(kayak.MatMult(U1, W2a)) 67 | U2b = kayak.SoftReLU(kayak.MatMult(U1, W2b)) 68 | U3a = kayak.SoftReLU(kayak.MatMult(U2a, W3)) 69 | U3b = kayak.SoftReLU(kayak.MatMult(U2b, W3)) 70 | 71 | out = kayak.MatSum(kayak.MatAdd(U3a, U3b)) 72 | 73 | out.value 74 | print kayak.util.checkgrad(W1, out) 75 | print kayak.util.checkgrad(W2a, out) 76 | print kayak.util.checkgrad(W2b, out) 77 | print kayak.util.checkgrad(W3, out) 78 | assert kayak.util.checkgrad(W1, out) < MAX_GRAD_DIFF 79 | assert kayak.util.checkgrad(W2a, out) < MAX_GRAD_DIFF 80 | assert kayak.util.checkgrad(W2b, out) < MAX_GRAD_DIFF 81 | assert kayak.util.checkgrad(W3, out) < MAX_GRAD_DIFF 82 | 83 | def test_graph_dag(): 84 | npr.seed(3) 85 | 86 | num_layers = 7 87 | num_dims = 5 88 | 89 | for ii in xrange(NUM_TRIALS): 90 | probs = npr.rand() 91 | 92 | X = kayak.Inputs(npr.randn(25,num_dims)) 93 | 94 | wts = [] 95 | layers = [] 96 | for jj in xrange(num_layers): 97 | 98 | U = kayak.Constant(np.zeros((25,num_dims))) 99 | 100 | if npr.rand() < probs: 101 | W = kayak.Parameter(0.1*npr.randn(num_dims, num_dims)) 102 | wts.append(W) 103 | U = kayak.MatAdd( U, kayak.SoftReLU(kayak.MatMult(X, W)) ) 104 | 105 | for kk in xrange(jj): 106 | if npr.rand() < probs: 107 | W = kayak.Parameter(0.1*npr.randn(num_dims, num_dims)) 108 | wts.append(W) 109 | U = kayak.MatAdd( U, kayak.SoftReLU(kayak.MatMult(layers[kk], W)) ) 110 | 111 | layers.append(U) 112 | 113 | out = kayak.MatSum(layers[-1]) 114 | 115 | out.value 116 | for jj, wt in enumerate(wts): 117 | diff = kayak.util.checkgrad(wt, out, 1e-4) 118 | print diff 119 | assert diff < 1e-4 120 | 121 | def test_cache_utility(): 122 | npr.seed(3) 123 | 124 | num_layers = 17 125 | num_dims = 3 126 | 127 | X = kayak.Inputs(npr.randn(10, num_dims)) 128 | W1 = kayak.Parameter(npr.randn(num_dims, num_dims)) 129 | W2 = kayak.Parameter(npr.randn(num_dims, num_dims)) 130 | 131 | Z = kayak.MatMult(X, W1) 132 | 133 | for jj in xrange(num_layers): 134 | Z = kayak.SoftReLU(kayak.MatAdd(kayak.MatMult(Z, W2), 135 | kayak.MatMult(Z, W2))) 136 | 137 | out = kayak.MatSum(Z) 138 | assert kayak.util.checkgrad(W1, out) < 1e-4 139 | 140 | def test_irrelevant_outputs(): 141 | # Having an irrelevant output shouldn't cause problems. Indeed, its 142 | # gradient and value should not be called. 143 | class NoValue(kayak.Differentiable): 144 | def __init__(self, A, *args): 145 | # Recurse to handle lists of arguments. 146 | super(NoValue, self).__init__([A]) 147 | def _compute_value(self): 148 | raise AttributeError("Value should not be called") 149 | def _local_grad(self, parent, d_out_d_self): 150 | raise AttributeError("Grad should not be called") 151 | 152 | X = kayak.Inputs(npr.randn(10, 20)) 153 | Y = kayak.Inputs(npr.randn(10, 20)) 154 | Z = X + Y 155 | bad_output = NoValue(X) 156 | Z.grad(X) # Will raise AttributeError is W's value or grad is called 157 | 158 | def test_irrelevant_outputs_2(): 159 | # As above, with a chain of ouptuts 160 | class NoValue(kayak.Differentiable): 161 | def __init__(self, A, *args): 162 | # Recurse to handle lists of arguments. 163 | super(NoValue, self).__init__([A]) 164 | def _compute_value(self): 165 | raise AttributeError("Value should not be called") 166 | def _local_grad(self, parent, d_out_d_self): 167 | raise AttributeError("Grad should not be called") 168 | 169 | X = kayak.Inputs(npr.randn(10, 20)) 170 | Y = kayak.Inputs(npr.randn(10, 20)) 171 | Z = X + Y 172 | bad_pre_output = NoValue(X) 173 | bad_output = NoValue(bad_pre_output) 174 | 175 | Z.grad(X) # Will raise AttributeError is W's value or grad is called 176 | 177 | -------------------------------------------------------------------------------- /tests/test_HardReLU.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_hardrelu_values(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | np_X = npr.randn(6,5) 13 | X = kayak.Parameter(np_X) 14 | Y = kayak.HardReLU(X) 15 | 16 | assert np.all( Y.value >= 0.0 ) 17 | assert np.all(np.maximum(np_X, 0.0) == Y.value) 18 | 19 | def test_hardrelu_grad(): 20 | npr.seed(2) 21 | 22 | # Needs to be small due to non-differentiability. 23 | epsilon = 1e-6 24 | 25 | for ii in xrange(NUM_TRIALS): 26 | np_X = npr.randn(6,5) 27 | X = kayak.Parameter(np_X) 28 | Y = kayak.HardReLU(X) 29 | Z = kayak.MatSum(Y) 30 | 31 | Z.value 32 | assert np.all( Z.grad(X) >= 0.0 ) 33 | print "CHECKGRAD: ", ii, kayak.util.checkgrad(X, Z, epsilon) 34 | assert kayak.util.checkgrad(X, Z, epsilon) < MAX_GRAD_DIFF 35 | -------------------------------------------------------------------------------- /tests/test_Horseshoe.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_scalar_value(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | np_X = npr.randn() 13 | 14 | X = kayak.Parameter(np_X) 15 | out = kayak.Horseshoe(X) 16 | 17 | assert close_float(out.value, -np.log(np.log(1.0 + np_X**(-2)))) 18 | 19 | def test_scalar_grad(): 20 | npr.seed(2) 21 | 22 | for ii in xrange(NUM_TRIALS): 23 | while True: 24 | np_X = npr.randn() 25 | if np.abs(np_X) > 0.1: 26 | break 27 | 28 | X = kayak.Parameter(np_X) 29 | out = kayak.Horseshoe(X) 30 | 31 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 32 | 33 | def test_scalar_value_2(): 34 | npr.seed(3) 35 | 36 | for ii in xrange(NUM_TRIALS): 37 | np_X = npr.randn() 38 | wt = np.exp(npr.randn()) 39 | 40 | X = kayak.Parameter(np_X) 41 | out = kayak.Horseshoe(X, weight=wt) 42 | 43 | assert close_float(out.value, -wt * np.log(np.log(1.0 + np_X**-2))) 44 | 45 | def test_scalar_grad_2(): 46 | npr.seed(4) 47 | 48 | for ii in xrange(NUM_TRIALS): 49 | while True: 50 | np_X = npr.randn() 51 | if np.abs(np_X) > 0.1: 52 | break 53 | wt = np.exp(npr.randn()) 54 | 55 | X = kayak.Parameter(np_X) 56 | out = kayak.Horseshoe(X, weight=wt) 57 | 58 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 59 | 60 | def test_vector_value(): 61 | npr.seed(5) 62 | 63 | for ii in xrange(NUM_TRIALS): 64 | np_X = npr.randn(10,1) 65 | wt = np.exp(npr.randn()) 66 | 67 | X = kayak.Parameter(np_X) 68 | out = kayak.Horseshoe(X, weight=wt) 69 | 70 | assert close_float(out.value, -wt * np.sum(np.log(np.log(1.0 + np_X**-2)))) 71 | 72 | def test_vector_grad(): 73 | npr.seed(6) 74 | 75 | for ii in xrange(NUM_TRIALS): 76 | while True: 77 | np_X = npr.randn() 78 | if np.all(np.abs(np_X) > 0.1): 79 | break 80 | wt = np.exp(npr.randn()) 81 | 82 | X = kayak.Parameter(np_X) 83 | out = kayak.Horseshoe(X, weight=wt) 84 | 85 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 86 | 87 | def test_matrix_value(): 88 | npr.seed(7) 89 | 90 | for ii in xrange(NUM_TRIALS): 91 | np_X = npr.randn(10,20) 92 | wt = np.exp(npr.randn()) 93 | 94 | X = kayak.Parameter(np_X) 95 | out = kayak.Horseshoe(X, weight=wt) 96 | 97 | assert close_float(out.value, -wt * np.sum(np.log(np.log(1.0 + np_X**-2)))) 98 | 99 | def test_matrix_grad(): 100 | npr.seed(8) 101 | 102 | for ii in xrange(NUM_TRIALS): 103 | while True: 104 | np_X = npr.randn() 105 | if np.all(np.abs(np_X) > 0.1): 106 | break 107 | wt = np.exp(npr.randn()) 108 | 109 | X = kayak.Parameter(np_X) 110 | out = kayak.Horseshoe(X, weight=wt) 111 | 112 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 113 | 114 | def test_tensor_value(): 115 | npr.seed(9) 116 | 117 | for ii in xrange(NUM_TRIALS): 118 | np_X = npr.randn(10,20,5) 119 | wt = np.exp(npr.randn()) 120 | 121 | X = kayak.Parameter(np_X) 122 | out = kayak.Horseshoe(X, weight=wt) 123 | 124 | assert close_float(out.value, -wt * np.sum(np.log(np.log(1.0 + np_X**-2)))) 125 | 126 | def test_tensor_grad(): 127 | npr.seed(10) 128 | 129 | for ii in xrange(NUM_TRIALS): 130 | while True: 131 | np_X = npr.randn() 132 | if np.all(np.abs(np_X) > 0.1): 133 | break 134 | wt = np.exp(npr.randn()) 135 | 136 | X = kayak.Parameter(np_X) 137 | out = kayak.Horseshoe(X, weight=wt) 138 | 139 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 140 | 141 | -------------------------------------------------------------------------------- /tests/test_Identity.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_identity(): 9 | npr.seed(1) 10 | np_A = npr.randn(6,7) 11 | A = kayak.Parameter(np_A) 12 | B = kayak.Identity(A) 13 | assert np.all(close_float(B.value, np_A)) 14 | assert np.all(close_float(B.grad(A), np.ones((6,7)))) 15 | -------------------------------------------------------------------------------- /tests/test_Indexing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | from nose.tools import assert_less 8 | 9 | def test_indexing_values(): 10 | npr.seed(1) 11 | 12 | for ii in xrange(NUM_TRIALS): 13 | np_X = npr.randn(6,10) 14 | inds = npr.permutation(10)[:5] 15 | X = kayak.Parameter(np_X) 16 | Y = kayak.Take(X, inds,axis=1) 17 | assert(np.array_equal(Y.value, np.take(np_X, inds,axis=1))) 18 | 19 | def test_indexing_grad(): 20 | npr.seed(2) 21 | 22 | for ii in xrange(NUM_TRIALS): 23 | np_X = npr.randn(6,20) 24 | inds = npr.permutation(20)[:5] 25 | X = kayak.Parameter(np_X) 26 | Y = kayak.Take(X, inds,axis=1) 27 | Z = kayak.MatSum(Y) 28 | 29 | Z.value 30 | assert_less(kayak.util.checkgrad(X, Z), MAX_GRAD_DIFF) 31 | 32 | def test_indexing_grad_2(): 33 | npr.seed(3) 34 | 35 | for ii in xrange(NUM_TRIALS): 36 | np_X = npr.randn(6, 2, 7, 3) 37 | inds = npr.permutation(7)[:5] 38 | X = kayak.Parameter(np_X) 39 | Y = kayak.Take(X, inds,axis=2) 40 | Z = kayak.MatSum(Y) 41 | 42 | Z.value 43 | assert_less(kayak.util.checkgrad(X, Z), MAX_GRAD_DIFF) 44 | -------------------------------------------------------------------------------- /tests/test_Inputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HIPS/Kayak/1a7d4baa849bbd5a6f6d0486136169899cf25523/tests/test_Inputs.py -------------------------------------------------------------------------------- /tests/test_L1Norm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_scalar_value(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | np_X = npr.randn() 13 | 14 | X = kayak.Parameter(np_X) 15 | out = kayak.L1Norm(X) 16 | 17 | assert close_float(out.value, np.abs(np_X)) 18 | 19 | def test_scalar_grad(): 20 | npr.seed(2) 21 | 22 | for ii in xrange(NUM_TRIALS): 23 | while True: 24 | np_X = npr.randn() 25 | if np.abs(np_X) > 0.1: 26 | break 27 | 28 | X = kayak.Parameter(np_X) 29 | out = kayak.L1Norm(X) 30 | 31 | assert close_float(out.grad(X), np.sign(np_X)) 32 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 33 | 34 | def test_scalar_value_2(): 35 | npr.seed(3) 36 | 37 | for ii in xrange(NUM_TRIALS): 38 | np_X = npr.randn() 39 | wt = np.exp(npr.randn()) 40 | 41 | X = kayak.Parameter(np_X) 42 | out = kayak.L1Norm(X, weight=wt) 43 | 44 | assert close_float(out.value, wt * np.abs(np_X)) 45 | 46 | def test_scalar_grad_2(): 47 | npr.seed(4) 48 | 49 | for ii in xrange(NUM_TRIALS): 50 | while True: 51 | np_X = npr.randn() 52 | if np.abs(np_X) > 0.1: 53 | break 54 | wt = np.exp(npr.randn()) 55 | 56 | X = kayak.Parameter(np_X) 57 | out = kayak.L1Norm(X, weight=wt) 58 | 59 | assert close_float(out.grad(X), wt*np.sign(np_X)) 60 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 61 | 62 | def test_vector_value(): 63 | npr.seed(5) 64 | 65 | for ii in xrange(NUM_TRIALS): 66 | np_X = npr.randn(10,1) 67 | wt = np.exp(npr.randn()) 68 | 69 | X = kayak.Parameter(np_X) 70 | out = kayak.L1Norm(X, weight=wt) 71 | 72 | assert close_float(out.value, wt * np.sum(np.abs(np_X))) 73 | 74 | def test_vector_grad(): 75 | npr.seed(6) 76 | 77 | for ii in xrange(NUM_TRIALS): 78 | while True: 79 | np_X = npr.randn() 80 | if np.all(np.abs(np_X) > 0.1): 81 | break 82 | wt = np.exp(npr.randn()) 83 | 84 | X = kayak.Parameter(np_X) 85 | out = kayak.L1Norm(X, weight=wt) 86 | 87 | assert np.all(close_float(out.grad(X), wt*np.sign(np_X))) 88 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 89 | 90 | def test_matrix_value(): 91 | npr.seed(7) 92 | 93 | for ii in xrange(NUM_TRIALS): 94 | np_X = npr.randn(10,20) 95 | wt = np.exp(npr.randn()) 96 | 97 | X = kayak.Parameter(np_X) 98 | out = kayak.L1Norm(X, weight=wt) 99 | 100 | assert close_float(out.value, wt * np.sum(np.abs(np_X))) 101 | 102 | def test_matrix_grad(): 103 | npr.seed(8) 104 | 105 | for ii in xrange(NUM_TRIALS): 106 | while True: 107 | np_X = npr.randn() 108 | if np.all(np.abs(np_X) > 0.1): 109 | break 110 | wt = np.exp(npr.randn()) 111 | 112 | X = kayak.Parameter(np_X) 113 | out = kayak.L1Norm(X, weight=wt) 114 | 115 | assert np.all(close_float(out.grad(X), wt*np.sign(np_X))) 116 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 117 | 118 | def test_tensor_value(): 119 | npr.seed(9) 120 | 121 | for ii in xrange(NUM_TRIALS): 122 | np_X = npr.randn(10,20,5) 123 | wt = np.exp(npr.randn()) 124 | 125 | X = kayak.Parameter(np_X) 126 | out = kayak.L1Norm(X, weight=wt) 127 | 128 | assert close_float(out.value, wt * np.sum(np.abs(np_X))) 129 | 130 | def test_tensor_grad(): 131 | npr.seed(10) 132 | 133 | for ii in xrange(NUM_TRIALS): 134 | while True: 135 | np_X = npr.randn() 136 | if np.all(np.abs(np_X) > 0.1): 137 | break 138 | wt = np.exp(npr.randn()) 139 | 140 | X = kayak.Parameter(np_X) 141 | out = kayak.L1Norm(X, weight=wt) 142 | 143 | assert np.all(close_float(out.grad(X), wt*np.sign(np_X))) 144 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 145 | 146 | -------------------------------------------------------------------------------- /tests/test_L2Loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_scalar_value(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | np_pred = npr.randn() 13 | np_targ = npr.randn() 14 | 15 | pred = kayak.Parameter(np_pred) 16 | targ = kayak.Targets(np_targ) 17 | out = kayak.L2Loss(pred, targ) 18 | 19 | # Verify that a scalar is reproduced. 20 | assert close_float(out.value, (np_pred-np_targ)**2) 21 | 22 | def test_scalar_grad(): 23 | npr.seed(2) 24 | 25 | for ii in xrange(NUM_TRIALS): 26 | np_pred = npr.randn() 27 | np_targ = npr.randn() 28 | 29 | pred = kayak.Parameter(np_pred) 30 | targ = kayak.Targets(np_targ) 31 | out = kayak.L2Loss(pred, targ) 32 | 33 | assert close_float(out.grad(pred), 2*(np_pred-np_targ)) 34 | assert kayak.util.checkgrad(pred, out) < 1e-6 35 | 36 | def test_vector_value(): 37 | npr.seed(3) 38 | 39 | for ii in xrange(NUM_TRIALS): 40 | np_pred = npr.randn(10,1) 41 | np_targ = npr.randn(10,1) 42 | 43 | pred = kayak.Parameter(np_pred) 44 | targ = kayak.Targets(np_targ) 45 | out = kayak.L2Loss(pred, targ) 46 | 47 | assert close_float(out.value, np.sum((np_pred-np_targ)**2)) 48 | 49 | def test_vector_grad(): 50 | npr.seed(4) 51 | 52 | for ii in xrange(NUM_TRIALS): 53 | np_pred = npr.randn(10,1) 54 | np_targ = npr.randn(10,1) 55 | 56 | pred = kayak.Parameter(np_pred) 57 | targ = kayak.Targets(np_targ) 58 | out = kayak.L2Loss(pred, targ) 59 | 60 | assert np.all(close_float(out.grad(pred), 2*(np_pred-np_targ))) 61 | assert kayak.util.checkgrad(pred, out) < 1e-6 62 | 63 | def test_matrix_value_1(): 64 | npr.seed(5) 65 | 66 | for ii in xrange(NUM_TRIALS): 67 | np_pred = npr.randn(10,20) 68 | np_targ = npr.randn(10,20) 69 | 70 | pred = kayak.Parameter(np_pred) 71 | targ = kayak.Targets(np_targ) 72 | out = kayak.L2Loss(pred, targ) 73 | 74 | print out.value, (np_pred-np_targ)**2 75 | assert close_float(out.value, np.sum((np_pred-np_targ)**2)) 76 | 77 | def test_matrix_grad(): 78 | npr.seed(6) 79 | 80 | for ii in xrange(NUM_TRIALS): 81 | np_pred = npr.randn(10,20) 82 | np_targ = npr.randn(10,20) 83 | 84 | pred = kayak.Parameter(np_pred) 85 | targ = kayak.Targets(np_targ) 86 | out = kayak.L2Loss(pred, targ) 87 | 88 | assert np.all(close_float(out.grad(pred), 2*(np_pred-np_targ))) 89 | assert kayak.util.checkgrad(pred, out) < 1e-6 90 | 91 | def test_matrix_value_2(): 92 | npr.seed(7) 93 | 94 | for ii in xrange(NUM_TRIALS): 95 | np_pred = npr.randn(10,20) 96 | np_targ = npr.randn(10,20) 97 | 98 | pred = kayak.Parameter(np_pred) 99 | targ = kayak.Targets(np_targ) 100 | out = kayak.L2Loss(pred, targ, axis=0) 101 | 102 | print out.value, np.sum((np_pred-np_targ)**2, axis=0) 103 | assert np.all(close_float(out.value, np.sum((np_pred-np_targ)**2, axis=0))) 104 | 105 | def test_matrix_value_3(): 106 | npr.seed(8) 107 | 108 | for ii in xrange(NUM_TRIALS): 109 | np_pred = npr.randn(10,20) 110 | np_targ = npr.randn(10,20) 111 | 112 | pred = kayak.Parameter(np_pred) 113 | targ = kayak.Targets(np_targ) 114 | out = kayak.L2Loss(pred, targ, axis=1) 115 | 116 | print out.value, np.sum((np_pred-np_targ)**2, axis=1) 117 | assert np.all(close_float(out.value, np.sum((np_pred-np_targ)**2, axis=1, keepdims=True))) 118 | 119 | -------------------------------------------------------------------------------- /tests/test_L2Norm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_scalar_value(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | np_X = npr.randn() 13 | 14 | X = kayak.Parameter(np_X) 15 | out = kayak.L2Norm(X) 16 | 17 | assert close_float(out.value, np_X**2) 18 | 19 | def test_scalar_grad(): 20 | npr.seed(2) 21 | 22 | for ii in xrange(NUM_TRIALS): 23 | np_X = npr.randn() 24 | 25 | X = kayak.Parameter(np_X) 26 | out = kayak.L2Norm(X) 27 | 28 | assert close_float(out.grad(X), 2*np_X) 29 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 30 | 31 | def test_scalar_value_2(): 32 | npr.seed(3) 33 | 34 | for ii in xrange(NUM_TRIALS): 35 | np_X = npr.randn() 36 | wt = np.exp(npr.randn()) 37 | 38 | X = kayak.Parameter(np_X) 39 | out = kayak.L2Norm(X, weight=wt) 40 | 41 | assert close_float(out.value, wt * np_X**2) 42 | 43 | def test_scalar_grad_2(): 44 | npr.seed(4) 45 | 46 | for ii in xrange(NUM_TRIALS): 47 | np_X = npr.randn() 48 | wt = np.exp(npr.randn()) 49 | 50 | X = kayak.Parameter(np_X) 51 | out = kayak.L2Norm(X, weight=wt) 52 | 53 | assert close_float(out.grad(X), 2*wt*np_X) 54 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 55 | 56 | def test_vector_value(): 57 | npr.seed(5) 58 | 59 | for ii in xrange(NUM_TRIALS): 60 | np_X = npr.randn(10,1) 61 | wt = np.exp(npr.randn()) 62 | 63 | X = kayak.Parameter(np_X) 64 | out = kayak.L2Norm(X, weight=wt) 65 | 66 | assert close_float(out.value, wt * np.sum(np_X**2)) 67 | 68 | def test_vector_grad(): 69 | npr.seed(6) 70 | 71 | for ii in xrange(NUM_TRIALS): 72 | np_X = npr.randn(10,1) 73 | wt = np.exp(npr.randn()) 74 | 75 | X = kayak.Parameter(np_X) 76 | out = kayak.L2Norm(X, weight=wt) 77 | 78 | assert np.all(close_float(out.grad(X), 2*wt*np_X)) 79 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 80 | 81 | def test_matrix_value(): 82 | npr.seed(7) 83 | 84 | for ii in xrange(NUM_TRIALS): 85 | np_X = npr.randn(10,20) 86 | wt = np.exp(npr.randn()) 87 | 88 | X = kayak.Parameter(np_X) 89 | out = kayak.L2Norm(X, weight=wt) 90 | 91 | assert close_float(out.value, wt * np.sum(np_X**2)) 92 | 93 | def test_matrix_grad(): 94 | npr.seed(8) 95 | 96 | for ii in xrange(NUM_TRIALS): 97 | np_X = npr.randn(10,20) 98 | wt = np.exp(npr.randn()) 99 | 100 | X = kayak.Parameter(np_X) 101 | out = kayak.L2Norm(X, weight=wt) 102 | 103 | assert np.all(close_float(out.grad(X), 2*wt*np_X)) 104 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 105 | 106 | def test_tensor_value(): 107 | npr.seed(9) 108 | 109 | for ii in xrange(NUM_TRIALS): 110 | np_X = npr.randn(10,20,5) 111 | wt = np.exp(npr.randn()) 112 | 113 | X = kayak.Parameter(np_X) 114 | out = kayak.L2Norm(X, weight=wt) 115 | 116 | assert close_float(out.value, wt * np.sum(np_X**2)) 117 | 118 | def test_tensor_grad(): 119 | npr.seed(10) 120 | 121 | for ii in xrange(NUM_TRIALS): 122 | np_X = npr.randn(10,20,5) 123 | wt = np.exp(npr.randn()) 124 | 125 | X = kayak.Parameter(np_X) 126 | out = kayak.L2Norm(X, weight=wt) 127 | 128 | assert np.all(close_float(out.grad(X), 2*wt*np_X)) 129 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 130 | 131 | -------------------------------------------------------------------------------- /tests/test_LogMultinomialLoss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_vector_value(): 9 | npr.seed(3) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | np_pred = npr.randn(1,10) 13 | np_targ = npr.randn(1,10) 14 | 15 | pred = kayak.Parameter(np_pred) 16 | targ = kayak.Targets(np_targ) 17 | out = kayak.LogMultinomialLoss(pred, targ) 18 | 19 | assert close_float(out.value, -np.sum(np_pred * np_targ)) 20 | 21 | def test_vector_grad(): 22 | npr.seed(4) 23 | 24 | for ii in xrange(NUM_TRIALS): 25 | np_pred = npr.randn(1,10) 26 | np_targ = npr.randn(1,10) 27 | 28 | pred = kayak.Parameter(np_pred) 29 | targ = kayak.Targets(np_targ) 30 | out = kayak.LogMultinomialLoss(pred, targ) 31 | 32 | assert np.all(close_float(out.grad(pred), -np_targ)) 33 | assert kayak.util.checkgrad(pred, out) < MAX_GRAD_DIFF 34 | 35 | def test_matrix_value_1(): 36 | npr.seed(5) 37 | 38 | for ii in xrange(NUM_TRIALS): 39 | np_pred = npr.randn(10,20) 40 | np_targ = npr.randn(10,20) 41 | 42 | pred = kayak.Parameter(np_pred) 43 | targ = kayak.Targets(np_targ) 44 | out = kayak.LogMultinomialLoss(pred, targ) 45 | 46 | assert np.all(close_float(out.value, -np.sum(np_pred * np_targ, axis=1, keepdims=True))) 47 | 48 | def test_matrix_grad(): 49 | npr.seed(6) 50 | 51 | for ii in xrange(NUM_TRIALS): 52 | np_pred = npr.randn(10,20) 53 | np_targ = npr.randn(10,20) 54 | 55 | pred = kayak.Parameter(np_pred) 56 | targ = kayak.Targets(np_targ) 57 | out = kayak.MatSum(kayak.LogMultinomialLoss(pred, targ)) 58 | 59 | assert kayak.util.checkgrad(pred, out) < MAX_GRAD_DIFF 60 | 61 | def test_matrix_value_2(): 62 | npr.seed(7) 63 | 64 | for ii in xrange(NUM_TRIALS): 65 | np_pred = npr.randn(10,20) 66 | np_targ = npr.randn(10,20) 67 | 68 | pred = kayak.Parameter(np_pred) 69 | targ = kayak.Targets(np_targ) 70 | out = kayak.LogMultinomialLoss(pred, targ, axis=0) 71 | 72 | assert np.all(close_float(out.value, -np.sum(np_pred * np_targ, axis=0))) 73 | -------------------------------------------------------------------------------- /tests/test_LogSoftMax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_logsoftmax_values_1(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | 13 | np_X = npr.randn(5,6) 14 | X = kayak.Parameter(np_X) 15 | Y = kayak.LogSoftMax(X) 16 | 17 | np_Y = np.exp(np_X) 18 | np_Y = np_Y / np.sum(np_Y, axis=1)[:,np.newaxis] 19 | np_Y = np.log(np_Y) 20 | 21 | assert Y.shape == np_X.shape 22 | assert np.all(close_float(Y.value, np_Y)) 23 | 24 | def test_logsoftmax_values_2(): 25 | npr.seed(2) 26 | 27 | for ii in xrange(NUM_TRIALS): 28 | 29 | np_X = npr.randn(5,6) 30 | X = kayak.Parameter(np_X) 31 | Y = kayak.LogSoftMax(X, axis=0) 32 | 33 | np_Y = np.exp(np_X) 34 | np_Y = np_Y / np.sum(np_Y, axis=0)[np.newaxis,:] 35 | np_Y = np.log(np_Y) 36 | 37 | assert Y.shape == np_X.shape 38 | assert np.all(close_float(Y.value, np_Y)) 39 | 40 | def test_logsoftmax_grad_1(): 41 | npr.seed(3) 42 | 43 | for ii in xrange(NUM_TRIALS): 44 | 45 | np_X = npr.randn(5,6) 46 | X = kayak.Parameter(np_X) 47 | Y = kayak.LogSoftMax(X) 48 | Z = kayak.MatSum(Y) 49 | 50 | assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF 51 | 52 | def test_logsoftmax_grad_2(): 53 | npr.seed(4) 54 | 55 | for ii in xrange(NUM_TRIALS): 56 | 57 | np_X = npr.randn(5,6) 58 | X = kayak.Parameter(np_X) 59 | Y = kayak.LogSoftMax(X, axis=0) 60 | Z = kayak.MatSum(Y) 61 | 62 | assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF 63 | 64 | def test_logsoftmax_grad_3(): 65 | npr.seed(5) 66 | 67 | for ii in xrange(NUM_TRIALS): 68 | 69 | np_X = npr.randn(5,6) 70 | np_T = npr.randint(0, 10, np_X.shape) 71 | X = kayak.Parameter(np_X) 72 | T = kayak.Targets(np_T) 73 | Y = kayak.LogSoftMax(X) 74 | Z = kayak.MatSum(kayak.LogMultinomialLoss(Y, T)) 75 | 76 | assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF 77 | 78 | -------------------------------------------------------------------------------- /tests/test_Logistic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | from nose.tools import assert_less 8 | 9 | def test_logistic_values(): 10 | npr.seed(1) 11 | 12 | for ii in xrange(NUM_TRIALS): 13 | np_X = npr.randn(6,5) 14 | X = kayak.Parameter(np_X) 15 | Y = kayak.Logistic(X) 16 | 17 | assert np.all(close_float(1.0/(1.0+np.exp(-np_X)), Y.value)) 18 | 19 | def test_logistic_grad(): 20 | npr.seed(2) 21 | 22 | for ii in xrange(NUM_TRIALS): 23 | np_X = npr.randn(6,5) 24 | X = kayak.Parameter(np_X) 25 | Y = kayak.Logistic(X) 26 | Z = kayak.MatSum(Y) 27 | 28 | Z.value 29 | assert np.all( Z.grad(X) >= 0.0 ) 30 | assert_less(kayak.util.checkgrad(X, Z), MAX_GRAD_DIFF) 31 | -------------------------------------------------------------------------------- /tests/test_MatAdd.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | # These behaviors requires prepending singeltons. Do we want to keep them? 9 | # def test_0d_plus_2d_scalar_value(): 10 | # npr.seed(1) 11 | 12 | # for ii in xrange(NUM_TRIALS): 13 | # npX1 = npr.randn(1, 1) 14 | # X1 = kayak.Parameter( npX1 ) 15 | # npX2 = np.sum(npr.randn()) # generates a scalar with shape () 16 | # X2= kayak.Parameter( npX2 ) 17 | # Y = kayak.MatAdd(X1, X2) 18 | 19 | # # Verify that a scalar is reproduced. 20 | # assert close_float(Y.value, npX1 + npX2) 21 | 22 | 23 | # def test_0d_plus_2d_scalar_grad(): 24 | # npr.seed(2) 25 | # for ii in xrange(NUM_TRIALS): 26 | # npX1 = npr.randn(1, 1) 27 | # X1 = kayak.Parameter( npX1 ) 28 | # npX2 = np.sum(npr.randn()) # generates a scalar with shape () 29 | # X2= kayak.Parameter( npX2 ) 30 | # Y = kayak.MatAdd(X1, X2) 31 | 32 | # # Verify that the gradient is one. 33 | # assert Y.grad(X1) == 1.0 34 | # assert Y.grad(X2) == 1.0 35 | # assert kayak.util.checkgrad(X1, Y) < MAX_GRAD_DIFF 36 | # assert kayak.util.checkgrad(X2, Y) < MAX_GRAD_DIFF 37 | 38 | def test_matadd_values_1(): 39 | npr.seed(1) 40 | 41 | for ii in xrange(NUM_TRIALS): 42 | 43 | np_A = npr.randn(5,6) 44 | np_B = npr.randn(5,6) 45 | A = kayak.Parameter(np_A) 46 | B = kayak.Parameter(np_B) 47 | C = kayak.MatAdd(A, B) 48 | 49 | assert C.shape == np_A.shape 50 | assert np.all( close_float(C.value, np_A+np_B)) 51 | 52 | def test_matadd_values_2(): 53 | npr.seed(2) 54 | 55 | for ii in xrange(NUM_TRIALS): 56 | 57 | np_A = npr.randn(5,6) 58 | np_B = npr.randn(5,6) 59 | np_C = npr.randn(5,6) 60 | A = kayak.Parameter(np_A) 61 | B = kayak.Parameter(np_B) 62 | C = kayak.Parameter(np_C) 63 | D = kayak.MatAdd(A, B, C) 64 | 65 | assert D.shape == np_A.shape 66 | assert np.all( close_float(D.value, np_A+np_B+np_C)) 67 | 68 | def test_matadd_values_3(): 69 | npr.seed(3) 70 | 71 | for ii in xrange(NUM_TRIALS): 72 | 73 | np_A = npr.randn(5,6) 74 | np_B = npr.randn(1,6) 75 | A = kayak.Parameter(np_A) 76 | B = kayak.Parameter(np_B) 77 | C = kayak.MatAdd(A, B) 78 | 79 | assert C.shape == (5,6) 80 | assert np.all( close_float(C.value, np_A+np_B)) 81 | 82 | def test_matadd_values_4(): 83 | npr.seed(4) 84 | 85 | for ii in xrange(NUM_TRIALS): 86 | 87 | np_A = npr.randn(5,6) 88 | np_B = npr.randn(5,1) 89 | A = kayak.Parameter(np_A) 90 | B = kayak.Parameter(np_B) 91 | C = kayak.MatAdd(A, B) 92 | 93 | assert C.shape == (5,6) 94 | assert np.all( close_float(C.value, np_A+np_B)) 95 | 96 | def test_matadd_values_5(): 97 | npr.seed(5) 98 | 99 | for ii in xrange(NUM_TRIALS): 100 | 101 | np_A = npr.randn(1,6) 102 | np_B = npr.randn(5,1) 103 | A = kayak.Parameter(np_A) 104 | B = kayak.Parameter(np_B) 105 | C = kayak.MatAdd(A, B) 106 | 107 | assert C.shape == (5,6) 108 | assert np.all( close_float(C.value, np_A+np_B)) 109 | 110 | def test_matadd_values_6(): 111 | npr.seed(6) 112 | 113 | for ii in xrange(NUM_TRIALS): 114 | 115 | np_A = npr.randn(5,6) 116 | np_B = npr.randn(1,1) 117 | A = kayak.Parameter(np_A) 118 | B = kayak.Parameter(np_B) 119 | C = kayak.MatAdd(A, B) 120 | 121 | assert C.shape == (5,6) 122 | assert np.all( close_float(C.value, np_A+np_B)) 123 | 124 | def test_matadd_values_7(): 125 | npr.seed(7) 126 | 127 | for ii in xrange(NUM_TRIALS): 128 | 129 | np_A = npr.randn(5,6) 130 | np_B = npr.randn(5,6) 131 | A = kayak.Parameter(np_A) 132 | B = kayak.Parameter(np_B) 133 | D = kayak.MatAdd(A, B, A) 134 | 135 | assert D.shape == (5,6) 136 | assert np.all( close_float(D.value, 2*np_A + np_B)) 137 | 138 | def test_matadd_grad_1(): 139 | npr.seed(8) 140 | 141 | for ii in xrange(NUM_TRIALS): 142 | 143 | np_A = npr.randn(5,6) 144 | np_B = npr.randn(5,6) 145 | A = kayak.Parameter(np_A) 146 | B = kayak.Parameter(np_B) 147 | C = kayak.MatAdd(A, B) 148 | D = kayak.MatSum(C) 149 | 150 | D.value 151 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 152 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 153 | 154 | def test_matadd_grad_2(): 155 | npr.seed(9) 156 | 157 | for ii in xrange(NUM_TRIALS): 158 | 159 | np_A = npr.randn(5,6) 160 | np_B = npr.randn(5,6) 161 | np_C = npr.randn(5,6) 162 | A = kayak.Parameter(np_A) 163 | B = kayak.Parameter(np_B) 164 | C = kayak.Parameter(np_C) 165 | D = kayak.MatAdd(A, B, C) 166 | E = kayak.MatSum(D) 167 | 168 | E.value 169 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 170 | assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF 171 | assert kayak.util.checkgrad(C, E) < MAX_GRAD_DIFF 172 | 173 | def test_matadd_grad_3(): 174 | npr.seed(10) 175 | 176 | for ii in xrange(NUM_TRIALS): 177 | 178 | np_A = npr.randn(5,6) 179 | np_B = npr.randn(1,6) 180 | A = kayak.Parameter(np_A) 181 | B = kayak.Parameter(np_B) 182 | C = kayak.MatAdd(A, B) 183 | D = kayak.MatSum(C) 184 | 185 | D.value 186 | print np_A.shape, D.grad(A).shape 187 | print np_B.shape, D.grad(B).shape 188 | assert D.grad(A).shape == np_A.shape 189 | assert D.grad(B).shape == np_B.shape 190 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 191 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 192 | 193 | def test_matadd_grad_4(): 194 | npr.seed(11) 195 | 196 | for ii in xrange(NUM_TRIALS): 197 | 198 | np_A = npr.randn(5,1) 199 | np_B = npr.randn(5,6) 200 | A = kayak.Parameter(np_A) 201 | B = kayak.Parameter(np_B) 202 | C = kayak.MatAdd(A, B) 203 | D = kayak.MatSum(C) 204 | 205 | D.value 206 | assert D.grad(A).shape == np_A.shape 207 | assert D.grad(B).shape == np_B.shape 208 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 209 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 210 | 211 | def test_matadd_grad_5(): 212 | npr.seed(12) 213 | 214 | for ii in xrange(NUM_TRIALS): 215 | 216 | np_A = npr.randn(5,1) 217 | np_B = npr.randn(1,6) 218 | A = kayak.Parameter(np_A) 219 | B = kayak.Parameter(np_B) 220 | C = kayak.MatAdd(A, B) 221 | D = kayak.MatSum(C) 222 | 223 | D.value 224 | assert D.grad(A).shape == np_A.shape 225 | assert D.grad(B).shape == np_B.shape 226 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 227 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 228 | 229 | def test_matadd_grad_6(): 230 | npr.seed(13) 231 | 232 | for ii in xrange(NUM_TRIALS): 233 | 234 | np_A = npr.randn(5,6) 235 | np_B = npr.randn(1,1) 236 | A = kayak.Parameter(np_A) 237 | B = kayak.Parameter(np_B) 238 | C = kayak.MatAdd(A, B) 239 | D = kayak.MatSum(C) 240 | 241 | D.value 242 | assert D.grad(A).shape == np_A.shape 243 | assert D.grad(B).shape == np_B.shape 244 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 245 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 246 | 247 | def test_matadd_grad_7(): 248 | npr.seed(14) 249 | 250 | for ii in xrange(NUM_TRIALS): 251 | 252 | np_A = npr.randn(5,6) 253 | np_B = npr.randn(5,6) 254 | A = kayak.Parameter(np_A) 255 | B = kayak.Parameter(np_B) 256 | D = kayak.MatAdd(A, B, A) 257 | E = kayak.MatSum(D) 258 | 259 | E.value 260 | assert E.grad(A).shape == np_A.shape 261 | assert E.grad(B).shape == np_B.shape 262 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 263 | assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF 264 | 265 | def test_matadd_grad_8(): 266 | npr.seed(15) 267 | 268 | for ii in xrange(NUM_TRIALS): 269 | 270 | np_A = npr.randn(5,6) 271 | np_B = npr.randn(5,6) 272 | A = kayak.Parameter(np_A) 273 | D = kayak.MatAdd(A, A) 274 | E = kayak.MatSum(D) 275 | 276 | E.value 277 | assert E.grad(A).shape == np_A.shape 278 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 279 | -------------------------------------------------------------------------------- /tests/test_MatConcat.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_matconcat_values_1(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | 13 | np_A = npr.randn(5,6) 14 | np_B = npr.randn(5,6) 15 | A = kayak.Parameter(np_A) 16 | B = kayak.Parameter(np_B) 17 | 18 | C = kayak.Concatenate(0, A, B) 19 | assert C.value.shape == (10,6) 20 | 21 | C = kayak.Concatenate(1, A, B) 22 | assert C.value.shape == (5,12) 23 | 24 | 25 | 26 | def test_matconcat_grad_1(): 27 | npr.seed(3) 28 | 29 | for ii in xrange(NUM_TRIALS): 30 | 31 | np_A = npr.randn(5,6) 32 | np_B = npr.randn(5,6) 33 | A = kayak.Parameter(np_A) 34 | B = kayak.Parameter(np_B) 35 | C = kayak.Concatenate(0, A, B) 36 | D = kayak.MatSum(C) 37 | 38 | D.value 39 | assert D.grad(A).shape == (5,6) 40 | assert D.grad(B).shape == (5,6) 41 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 42 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 43 | 44 | 45 | def test_matconcat_grad_2(): 46 | npr.seed(3) 47 | 48 | for ii in xrange(NUM_TRIALS): 49 | 50 | np_A = npr.randn(5,6) 51 | np_B = npr.randn(5,6) 52 | A = kayak.Parameter(np_A) 53 | B = kayak.Parameter(np_B) 54 | C = kayak.Concatenate(1, A, B) 55 | D = kayak.MatSum(C) 56 | 57 | D.value 58 | assert D.grad(A).shape == (5,6) 59 | assert D.grad(B).shape == (5,6) 60 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 61 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 62 | 63 | 64 | def test_matconcat_grad_3(): 65 | npr.seed(3) 66 | 67 | for ii in xrange(NUM_TRIALS): 68 | 69 | np_A = npr.randn(5,6) 70 | A = kayak.Parameter(np_A) 71 | C = kayak.Concatenate(0, A, A) 72 | D = kayak.MatSum(C) 73 | 74 | D.value 75 | assert D.grad(A).shape == (5,6) 76 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 77 | 78 | def test_matconcat_grad_4(): 79 | npr.seed(3) 80 | 81 | for ii in xrange(NUM_TRIALS): 82 | 83 | np_A = npr.randn(5,6) 84 | np_B = npr.randn(5,3) 85 | np_C = npr.randn(5,7) 86 | A = kayak.Parameter(np_A) 87 | B = kayak.Parameter(np_B) 88 | C = kayak.Parameter(np_C) 89 | D = kayak.Concatenate(1, A, B, C) 90 | E = kayak.MatSum(D) 91 | 92 | assert E.grad(A).shape == (5,6) 93 | assert E.grad(B).shape == (5,3) 94 | assert E.grad(C).shape == (5,7) 95 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 96 | assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF 97 | assert kayak.util.checkgrad(C, E) < MAX_GRAD_DIFF 98 | -------------------------------------------------------------------------------- /tests/test_MatDet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | import scipy.linalg as spla 4 | import kayak 5 | 6 | from . import * 7 | from nose.tools import assert_less, assert_equal 8 | 9 | def test_matdet_values_1(): 10 | npr.seed(1) 11 | 12 | for ii in xrange(NUM_TRIALS): 13 | 14 | np_A = npr.randn(12,6) 15 | A = np.dot(np_A.T, np_A) + 1e-6*np.eye(6) 16 | B = kayak.Parameter(A) 17 | D = kayak.MatDet(B) 18 | 19 | assert_less((D.value - spla.det(A))**2, 1e-6) 20 | 21 | def test_matdet_grad_1(): 22 | npr.seed(1) 23 | 24 | for ii in xrange(NUM_TRIALS): 25 | 26 | np_A = npr.randn(12,6) 27 | A = np.dot(np_A.T, np_A) + 1e-6*np.eye(6) 28 | B = kayak.Parameter(A) 29 | D = kayak.MatDet(B) 30 | 31 | assert_less((D.value - spla.det(A))**2, 1e-6) 32 | 33 | assert_equal(D.grad(B).shape, B.shape) 34 | assert_less(kayak.util.checkgrad(B, D), MAX_GRAD_DIFF) 35 | 36 | -------------------------------------------------------------------------------- /tests/test_MatMean.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | from nose.tools import assert_less 6 | 7 | from . import * 8 | 9 | def test_scalar_value(): 10 | npr.seed(1) 11 | 12 | for ii in xrange(NUM_TRIALS): 13 | npX = npr.randn() 14 | X = kayak.Parameter( npX ) 15 | Y = kayak.MatMean(X) 16 | 17 | # Verify that a scalar is reproduced. 18 | assert close_float(Y.value, npX) 19 | 20 | def test_scalar_grad(): 21 | npr.seed(2) 22 | 23 | for ii in xrange(NUM_TRIALS): 24 | npX = npr.randn() 25 | X = kayak.Parameter( npX ) 26 | Y = kayak.MatMean(X) 27 | 28 | # Verify that the gradient is one. 29 | Y.value 30 | assert Y.grad(X) == 1.0 31 | assert_less(kayak.util.checkgrad(X, Y), MAX_GRAD_DIFF) 32 | 33 | def test_vector_value_1(): 34 | npr.seed(3) 35 | 36 | for ii in xrange(NUM_TRIALS): 37 | npX = npr.randn(10,1) 38 | X = kayak.Parameter( npX ) 39 | Y = kayak.MatMean(X) 40 | # Verify the sum. 41 | assert close_float(Y.value, np.mean(npX)) 42 | 43 | def test_vector_grad_1(): 44 | npr.seed(4) 45 | 46 | for ii in xrange(NUM_TRIALS): 47 | npX = npr.randn(10,1) 48 | X = kayak.Parameter( npX ) 49 | Y = kayak.MatMean(X) 50 | 51 | # Verify the gradient. 52 | Y.value 53 | assert Y.grad(X).shape == npX.shape 54 | assert np.all(close_float(Y.grad(X), 1.0/float(npX.size) * np.ones(npX.shape))) 55 | assert_less(kayak.util.checkgrad(X, Y), MAX_GRAD_DIFF) 56 | 57 | def test_vector_value_2(): 58 | npr.seed(5) 59 | 60 | for ii in xrange(NUM_TRIALS): 61 | npX = npr.randn(1,10) 62 | X = kayak.Parameter( npX ) 63 | Y = kayak.MatMean(X) 64 | 65 | # Verify the sum. 66 | assert close_float(Y.value, np.mean(npX)) 67 | 68 | def test_vector_grad_2(): 69 | npr.seed(6) 70 | 71 | for ii in xrange(NUM_TRIALS): 72 | npX = npr.randn(1,10) 73 | X = kayak.Parameter( npX ) 74 | Y = kayak.MatMean(X) 75 | 76 | # Verify the gradient. 77 | Y.value 78 | assert Y.grad(X).shape == npX.shape 79 | assert np.all(close_float(Y.grad(X), 1.0/float(np.prod(npX.shape)) * np.ones(npX.shape))) 80 | assert_less(kayak.util.checkgrad(X, Y), MAX_GRAD_DIFF) 81 | 82 | def test_matrix_value(): 83 | npr.seed(7) 84 | 85 | for ii in xrange(NUM_TRIALS): 86 | npX = npr.randn(10,20) 87 | X = kayak.Parameter( npX ) 88 | Y = kayak.MatMean(X) 89 | 90 | # Verify the value. 91 | assert close_float(Y.value, np.mean(npX)) 92 | 93 | def test_matrix_grad(): 94 | npr.seed(8) 95 | 96 | for ii in xrange(NUM_TRIALS): 97 | npX = npr.randn(10,20) 98 | X = kayak.Parameter( npX ) 99 | Y = kayak.MatMean(X) 100 | 101 | # Verify the value. 102 | Y.value 103 | assert Y.grad(X).shape == npX.shape 104 | assert np.all(close_float(Y.grad(X), 1.0/float(np.prod(npX.shape)) * np.ones(npX.shape))) 105 | assert_less(kayak.util.checkgrad(X, Y), MAX_GRAD_DIFF) 106 | 107 | def test_nested_value_1(): 108 | npr.seed(9) 109 | 110 | for ii in xrange(NUM_TRIALS): 111 | npX = npr.randn(10,20) 112 | X = kayak.Parameter( npX ) 113 | Y = kayak.MatMean(X, axis=0) 114 | Z = kayak.MatMean(Y) 115 | 116 | assert np.all(close_float(Y.value, np.mean(npX, axis=0))) 117 | assert close_float(Z.value, np.mean(npX)) 118 | 119 | def test_nested_grad_1(): 120 | npr.seed(10) 121 | 122 | for ii in xrange(NUM_TRIALS): 123 | npX = npr.randn(10,20) 124 | X = kayak.Parameter( npX ) 125 | Y = kayak.MatMean(X, axis=0) 126 | Z = kayak.MatMean(Y) 127 | 128 | assert Z.grad(X).shape == npX.shape 129 | assert np.all(close_float(Z.grad(X),1.0/float(np.prod(npX.shape)) * np.ones(npX.shape))) 130 | assert_less(kayak.util.checkgrad(X, Z), MAX_GRAD_DIFF) 131 | 132 | def test_nested_value_2(): 133 | npr.seed(11) 134 | 135 | for ii in xrange(NUM_TRIALS): 136 | npX = npr.randn(10,20) 137 | X = kayak.Parameter( npX ) 138 | Y = kayak.MatMean(X, axis=1) 139 | Z = kayak.MatMean(Y) 140 | 141 | assert np.all(close_float(Y.value.ravel(), np.mean(npX, axis=1))) 142 | assert close_float(Z.value, np.mean(npX)) 143 | 144 | def test_nested_grad_2(): 145 | npr.seed(12) 146 | 147 | for ii in xrange(NUM_TRIALS): 148 | npX = npr.randn(10,20) 149 | X = kayak.Parameter( npX ) 150 | Y = kayak.MatMean(X, axis=1) 151 | Z = kayak.MatMean(Y) 152 | 153 | assert Z.grad(X).shape == npX.shape 154 | assert np.all(close_float(Z.grad(X), 1.0/float(np.prod(npX.shape)) * np.ones(npX.shape))) 155 | assert_less(kayak.util.checkgrad(X, Z), MAX_GRAD_DIFF) 156 | 157 | def test_tensor_value_1(): 158 | npr.seed(13) 159 | 160 | for ii in xrange(NUM_TRIALS): 161 | npX = npr.randn(10,20,30) 162 | X = kayak.Parameter( npX ) 163 | Y = kayak.MatMean(X) 164 | 165 | assert X.shape == npX.shape 166 | assert close_float(Y.value, np.mean(npX)) 167 | 168 | def test_tensor_value_2(): 169 | npr.seed(14) 170 | 171 | for ii in xrange(NUM_TRIALS): 172 | npX = npr.randn(10,20,30) 173 | X = kayak.Parameter( npX ) 174 | Y = kayak.MatMean(X, axis=2) 175 | 176 | assert np.all(close_float(Y.value, np.expand_dims(np.mean(npX, axis=2), axis=2))) 177 | 178 | def test_tensor_value_3(): 179 | npr.seed(15) 180 | 181 | for ii in xrange(NUM_TRIALS): 182 | npX = npr.randn(10,20,30) 183 | X = kayak.Parameter( npX ) 184 | Y = kayak.MatMean(X, axis=1) 185 | 186 | assert np.all(close_float(Y.value, np.expand_dims(np.mean(npX, axis=1), axis=1))) 187 | 188 | def test_tensor_value_4(): 189 | npr.seed(16) 190 | 191 | for ii in xrange(NUM_TRIALS): 192 | npX = npr.randn(10,20,30) 193 | X = kayak.Parameter( npX ) 194 | Y = kayak.MatMean(X, axis=0) 195 | 196 | assert np.all(close_float(Y.value, np.expand_dims(np.mean(npX, axis=0), axis=0))) 197 | 198 | def test_keepdims_value_1(): 199 | npr.seed(9) 200 | 201 | for ii in xrange(NUM_TRIALS): 202 | npX = npr.randn(10,20) 203 | X = kayak.Parameter( npX ) 204 | Y = kayak.MatMean(X, axis=0, keepdims=False) 205 | Z = kayak.MatMean(Y) 206 | 207 | assert Y.shape == np.mean(npX, axis=0, keepdims=False).shape 208 | assert np.all(close_float(Y.value, np.mean(npX, axis=0, keepdims=False))) 209 | assert close_float(Z.value, np.mean(npX)) 210 | 211 | def test_keepdims_grad_1(): 212 | npr.seed(10) 213 | 214 | for ii in xrange(NUM_TRIALS): 215 | npX = npr.randn(10,20) 216 | X = kayak.Parameter( npX ) 217 | Y = kayak.MatMean(X, axis=0, keepdims=False) 218 | Z = kayak.MatMean(Y) 219 | 220 | assert Z.grad(X).shape == npX.shape 221 | assert np.all(close_float(Z.grad(X), 1.0/float(np.prod(npX.shape)) * np.ones(npX.shape))) 222 | assert_less(kayak.util.checkgrad(X, Z), MAX_GRAD_DIFF) 223 | 224 | def test_keepdims_grad_2(): 225 | npr.seed(10) 226 | 227 | for ii in xrange(NUM_TRIALS): 228 | npW = npr.randn(5,10,20) 229 | npX = npr.randn(5,10,20) 230 | W = kayak.Parameter( npW ) 231 | X = kayak.Parameter( npX ) 232 | Y = W * X 233 | Z = kayak.MatMean(Y, axis=2, keepdims=False) 234 | S = kayak.MatMean(Z) 235 | 236 | assert S.grad(W).shape == npW.shape 237 | assert_less(kayak.util.checkgrad(X, S), MAX_GRAD_DIFF) -------------------------------------------------------------------------------- /tests/test_MatMult.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_matmult_values_1(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | 13 | np_A = npr.randn(5,6) 14 | np_B = npr.randn(6,7) 15 | A = kayak.Parameter(np_A) 16 | B = kayak.Parameter(np_B) 17 | C = kayak.MatMult(A, B) 18 | 19 | assert C.value.shape == (5,7) 20 | assert np.all(close_float(C.value, np.dot(np_A, np_B))) 21 | 22 | def test_matmult_values_2(): 23 | npr.seed(2) 24 | 25 | for ii in xrange(NUM_TRIALS): 26 | 27 | np_A = npr.randn(5,5) 28 | A = kayak.Parameter(np_A) 29 | C = kayak.MatMult(A, A) 30 | 31 | assert C.value.shape == (5,5) 32 | assert np.all(close_float(C.value, np.dot(np_A, np_A))) 33 | 34 | def test_matmult_values_3(): 35 | npr.seed(3) 36 | 37 | for ii in xrange(NUM_TRIALS): 38 | 39 | np_A = npr.randn(5,6) 40 | np_B = npr.randn(6,7) 41 | np_C = npr.randn(7,8) 42 | A = kayak.Parameter(np_A) 43 | B = kayak.Parameter(np_B) 44 | C = kayak.Parameter(np_C) 45 | D = kayak.MatMult(A, B, C) 46 | 47 | assert D.value.shape == (5,8) 48 | assert np.all(close_float(D.value, np.dot(np_A, np.dot(np_B, np_C)))) 49 | 50 | def test_matmult_grad_1(): 51 | npr.seed(3) 52 | 53 | for ii in xrange(NUM_TRIALS): 54 | 55 | np_A = npr.randn(5,6) 56 | np_B = npr.randn(6,7) 57 | A = kayak.Parameter(np_A) 58 | B = kayak.Parameter(np_B) 59 | C = kayak.MatMult(A, B) 60 | D = kayak.MatSum(C) 61 | 62 | D.value 63 | assert D.grad(A).shape == (5,6) 64 | assert D.grad(B).shape == (6,7) 65 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 66 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 67 | 68 | def test_matmult_grad_2(): 69 | npr.seed(4) 70 | 71 | for ii in xrange(NUM_TRIALS): 72 | 73 | np_A = npr.randn(5,5) 74 | A = kayak.Parameter(np_A) 75 | C = kayak.MatMult(A, A) 76 | D = kayak.MatSum(C) 77 | 78 | D.value 79 | assert D.grad(A).shape == (5,5) 80 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 81 | 82 | def test_matmult_grad_3(): 83 | npr.seed(5) 84 | 85 | for ii in xrange(NUM_TRIALS): 86 | 87 | np_A = npr.randn(5,6) 88 | np_B = npr.randn(6,7) 89 | np_C = npr.randn(7,8) 90 | A = kayak.Parameter(np_A) 91 | B = kayak.Parameter(np_B) 92 | C = kayak.Parameter(np_C) 93 | D = kayak.MatMult(A, B, C) 94 | E = kayak.MatSum(kayak.SoftReLU(D)) 95 | 96 | assert E.grad(A).shape == (5,6) 97 | assert E.grad(B).shape == (6,7) 98 | assert E.grad(C).shape == (7,8) 99 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 100 | assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF 101 | assert kayak.util.checkgrad(C, E) < MAX_GRAD_DIFF 102 | 103 | def test_matmult_grad_mat_vect(): 104 | npr.seed(5) 105 | 106 | for ii in xrange(NUM_TRIALS): 107 | 108 | np_A = npr.randn(5,6) 109 | np_B = npr.randn(6) 110 | np_C = npr.randn(5,) 111 | A = kayak.Parameter(np_A) 112 | B = kayak.Parameter(np_B) 113 | C = kayak.Parameter(np_C) 114 | D = kayak.MatMult(A, B) 115 | E = kayak.MatSum(kayak.ElemMult(C, D)) 116 | 117 | assert E.grad(A).shape == (5,6) 118 | assert E.grad(B).shape == (6,) 119 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 120 | assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF 121 | 122 | def test_matmult_grad_vect_mat(): 123 | npr.seed(5) 124 | 125 | for ii in xrange(NUM_TRIALS): 126 | 127 | np_A = npr.randn(6,) 128 | np_B = npr.randn(6,7) 129 | np_C = npr.randn(7,) 130 | A = kayak.Parameter(np_A) 131 | B = kayak.Parameter(np_B) 132 | C = kayak.Parameter(np_C) 133 | D = kayak.MatMult(A, B) 134 | E = kayak.MatSum(kayak.ElemMult(C, D)) 135 | 136 | assert E.grad(A).shape == (6,) 137 | assert E.grad(B).shape == (6, 7) 138 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 139 | assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF 140 | -------------------------------------------------------------------------------- /tests/test_MatSum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_scalar_value(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | npX = npr.randn() 13 | X = kayak.Parameter( npX ) 14 | Y = kayak.MatSum(X) 15 | 16 | # Verify that a scalar is reproduced. 17 | assert close_float(Y.value, npX) 18 | 19 | def test_scalar_grad(): 20 | npr.seed(2) 21 | 22 | for ii in xrange(NUM_TRIALS): 23 | npX = npr.randn() 24 | X = kayak.Parameter( npX ) 25 | Y = kayak.MatSum(X) 26 | 27 | # Verify that the gradient is one. 28 | Y.value 29 | assert Y.grad(X) == 1.0 30 | assert kayak.util.checkgrad(X, Y) < MAX_GRAD_DIFF 31 | 32 | def test_vector_value_1(): 33 | npr.seed(3) 34 | 35 | for ii in xrange(NUM_TRIALS): 36 | npX = npr.randn(10,1) 37 | X = kayak.Parameter( npX ) 38 | Y = kayak.MatSum(X) 39 | # Verify the sum. 40 | assert close_float(Y.value, np.sum(npX)) 41 | 42 | def test_vector_grad_1(): 43 | npr.seed(4) 44 | 45 | for ii in xrange(NUM_TRIALS): 46 | npX = npr.randn(10,1) 47 | X = kayak.Parameter( npX ) 48 | Y = kayak.MatSum(X) 49 | 50 | # Verify the gradient. 51 | Y.value 52 | assert Y.grad(X).shape == npX.shape 53 | assert np.all(close_float(Y.grad(X), np.ones(npX.shape))) 54 | assert kayak.util.checkgrad(X, Y) < MAX_GRAD_DIFF 55 | 56 | def test_vector_value_2(): 57 | npr.seed(5) 58 | 59 | for ii in xrange(NUM_TRIALS): 60 | npX = npr.randn(1,10) 61 | X = kayak.Parameter( npX ) 62 | Y = kayak.MatSum(X) 63 | 64 | # Verify the sum. 65 | assert close_float(Y.value, np.sum(npX)) 66 | 67 | def test_vector_grad_2(): 68 | npr.seed(6) 69 | 70 | for ii in xrange(NUM_TRIALS): 71 | npX = npr.randn(1,10) 72 | X = kayak.Parameter( npX ) 73 | Y = kayak.MatSum(X) 74 | 75 | # Verify the gradient. 76 | Y.value 77 | assert Y.grad(X).shape == npX.shape 78 | assert np.all(close_float(Y.grad(X), np.ones(npX.shape))) 79 | assert kayak.util.checkgrad(X, Y) < MAX_GRAD_DIFF 80 | 81 | def test_matrix_value(): 82 | npr.seed(7) 83 | 84 | for ii in xrange(NUM_TRIALS): 85 | npX = npr.randn(10,20) 86 | X = kayak.Parameter( npX ) 87 | Y = kayak.MatSum(X) 88 | 89 | # Verify the value. 90 | assert close_float(Y.value, np.sum(npX)) 91 | 92 | def test_matrix_grad(): 93 | npr.seed(8) 94 | 95 | for ii in xrange(NUM_TRIALS): 96 | npX = npr.randn(10,20) 97 | X = kayak.Parameter( npX ) 98 | Y = kayak.MatSum(X) 99 | 100 | # Verify the value. 101 | Y.value 102 | assert Y.grad(X).shape == npX.shape 103 | assert np.all(close_float(Y.grad(X), np.ones(npX.shape))) 104 | assert kayak.util.checkgrad(X, Y) < MAX_GRAD_DIFF 105 | 106 | def test_nested_value_1(): 107 | npr.seed(9) 108 | 109 | for ii in xrange(NUM_TRIALS): 110 | npX = npr.randn(10,20) 111 | X = kayak.Parameter( npX ) 112 | Y = kayak.MatSum(X, axis=0) 113 | Z = kayak.MatSum(Y) 114 | 115 | assert np.all(close_float(Y.value, np.sum(npX, axis=0))) 116 | assert close_float(Z.value, np.sum(npX)) 117 | 118 | def test_nested_grad_1(): 119 | npr.seed(10) 120 | 121 | for ii in xrange(NUM_TRIALS): 122 | npX = npr.randn(10,20) 123 | X = kayak.Parameter( npX ) 124 | Y = kayak.MatSum(X, axis=0) 125 | Z = kayak.MatSum(Y) 126 | 127 | assert Z.grad(X).shape == npX.shape 128 | assert np.all(close_float(Z.grad(X), np.ones(npX.shape))) 129 | assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF 130 | 131 | def test_nested_value_2(): 132 | npr.seed(11) 133 | 134 | for ii in xrange(NUM_TRIALS): 135 | npX = npr.randn(10,20) 136 | X = kayak.Parameter( npX ) 137 | Y = kayak.MatSum(X, axis=1) 138 | Z = kayak.MatSum(Y) 139 | 140 | assert np.all(close_float(Y.value.ravel(), np.sum(npX, axis=1))) 141 | assert close_float(Z.value, np.sum(npX)) 142 | 143 | def test_nested_grad_2(): 144 | npr.seed(12) 145 | 146 | for ii in xrange(NUM_TRIALS): 147 | npX = npr.randn(10,20) 148 | X = kayak.Parameter( npX ) 149 | Y = kayak.MatSum(X, axis=1) 150 | Z = kayak.MatSum(Y) 151 | 152 | assert Z.grad(X).shape == npX.shape 153 | assert np.all(close_float(Z.grad(X), np.ones(npX.shape))) 154 | assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF 155 | 156 | def test_tensor_value_1(): 157 | npr.seed(13) 158 | 159 | for ii in xrange(NUM_TRIALS): 160 | npX = npr.randn(10,20,30) 161 | X = kayak.Parameter( npX ) 162 | Y = kayak.MatSum(X) 163 | 164 | assert X.shape == npX.shape 165 | assert close_float(Y.value, np.sum(npX)) 166 | 167 | def test_tensor_value_2(): 168 | npr.seed(14) 169 | 170 | for ii in xrange(NUM_TRIALS): 171 | npX = npr.randn(10,20,30) 172 | X = kayak.Parameter( npX ) 173 | Y = kayak.MatSum(X, axis=2) 174 | 175 | assert np.all(close_float(Y.value, np.expand_dims(np.sum(npX, axis=2), axis=2))) 176 | 177 | def test_tensor_value_3(): 178 | npr.seed(15) 179 | 180 | for ii in xrange(NUM_TRIALS): 181 | npX = npr.randn(10,20,30) 182 | X = kayak.Parameter( npX ) 183 | Y = kayak.MatSum(X, axis=1) 184 | 185 | assert np.all(close_float(Y.value, np.expand_dims(np.sum(npX, axis=1), axis=1))) 186 | 187 | def test_tensor_value_4(): 188 | npr.seed(16) 189 | 190 | for ii in xrange(NUM_TRIALS): 191 | npX = npr.randn(10,20,30) 192 | X = kayak.Parameter( npX ) 193 | Y = kayak.MatSum(X, axis=0) 194 | 195 | assert np.all(close_float(Y.value, np.expand_dims(np.sum(npX, axis=0), axis=0))) 196 | 197 | def test_keepdims_value_1(): 198 | npr.seed(9) 199 | 200 | for ii in xrange(NUM_TRIALS): 201 | npX = npr.randn(10,20) 202 | X = kayak.Parameter( npX ) 203 | Y = kayak.MatSum(X, axis=0, keepdims=False) 204 | Z = kayak.MatSum(Y) 205 | 206 | assert Y.shape == np.sum(npX, axis=0, keepdims=False).shape 207 | assert np.all(close_float(Y.value, np.sum(npX, axis=0, keepdims=False))) 208 | assert close_float(Z.value, np.sum(npX)) 209 | 210 | def test_keepdims_grad_1(): 211 | npr.seed(10) 212 | 213 | for ii in xrange(NUM_TRIALS): 214 | npX = npr.randn(10,20) 215 | X = kayak.Parameter( npX ) 216 | Y = kayak.MatSum(X, axis=0, keepdims=False) 217 | Z = kayak.MatSum(Y) 218 | 219 | assert Z.grad(X).shape == npX.shape 220 | assert np.all(close_float(Z.grad(X), np.ones(npX.shape))) 221 | assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF 222 | 223 | def test_keepdims_grad_2(): 224 | npr.seed(10) 225 | 226 | for ii in xrange(NUM_TRIALS): 227 | npW = npr.randn(5,10,20) 228 | npX = npr.randn(5,10,20) 229 | W = kayak.Parameter( npW ) 230 | X = kayak.Parameter( npX ) 231 | Y = W * X 232 | Z = kayak.MatSum(Y, axis=2, keepdims=False) 233 | S = kayak.MatSum(Z) 234 | 235 | assert S.grad(W).shape == npW.shape 236 | # assert np.all(close_float(Z.grad(X), np.ones(npX.shape))) 237 | assert kayak.util.checkgrad(X, S) < MAX_GRAD_DIFF -------------------------------------------------------------------------------- /tests/test_NExp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_scalar_value(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | np_X = npr.randn() 13 | 14 | X = kayak.Parameter(np_X) 15 | out = kayak.NExp(X) 16 | 17 | assert close_float(out.value, 1.0 - np.exp(-np.abs(np_X))) 18 | 19 | def test_scalar_grad(): 20 | npr.seed(2) 21 | 22 | for ii in xrange(NUM_TRIALS): 23 | while True: 24 | np_X = npr.randn() 25 | if np.abs(np_X) > 0.1: 26 | break 27 | 28 | X = kayak.Parameter(np_X) 29 | out = kayak.NExp(X) 30 | 31 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 32 | 33 | def test_scalar_value_2(): 34 | npr.seed(3) 35 | 36 | for ii in xrange(NUM_TRIALS): 37 | np_X = npr.randn() 38 | wt = np.exp(npr.randn()) 39 | 40 | X = kayak.Parameter(np_X) 41 | out = kayak.NExp(X, weight=wt) 42 | 43 | assert close_float(out.value, wt * (1.0 - np.exp(-np.abs(np_X)))) 44 | 45 | def test_scalar_grad_2(): 46 | npr.seed(4) 47 | 48 | for ii in xrange(NUM_TRIALS): 49 | while True: 50 | np_X = npr.randn() 51 | if np.abs(np_X) > 0.1: 52 | break 53 | wt = np.exp(npr.randn()) 54 | 55 | X = kayak.Parameter(np_X) 56 | out = kayak.NExp(X, weight=wt) 57 | 58 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 59 | 60 | def test_vector_value(): 61 | npr.seed(5) 62 | 63 | for ii in xrange(NUM_TRIALS): 64 | np_X = npr.randn(10,1) 65 | wt = np.exp(npr.randn()) 66 | 67 | X = kayak.Parameter(np_X) 68 | out = kayak.NExp(X, weight=wt) 69 | 70 | assert close_float(out.value, wt * np.sum(1.0 - np.exp(-np.abs(np_X)))) 71 | 72 | def test_vector_grad(): 73 | npr.seed(6) 74 | 75 | for ii in xrange(NUM_TRIALS): 76 | while True: 77 | np_X = npr.randn() 78 | if np.all(np.abs(np_X) > 0.1): 79 | break 80 | wt = np.exp(npr.randn()) 81 | 82 | X = kayak.Parameter(np_X) 83 | out = kayak.NExp(X, weight=wt) 84 | 85 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 86 | 87 | def test_matrix_value(): 88 | npr.seed(7) 89 | 90 | for ii in xrange(NUM_TRIALS): 91 | np_X = npr.randn(10,20) 92 | wt = np.exp(npr.randn()) 93 | 94 | X = kayak.Parameter(np_X) 95 | out = kayak.NExp(X, weight=wt) 96 | 97 | assert close_float(out.value, wt * np.sum(1.0 - np.exp(-np.abs(np_X)))) 98 | 99 | def test_matrix_grad(): 100 | npr.seed(8) 101 | 102 | for ii in xrange(NUM_TRIALS): 103 | while True: 104 | np_X = npr.randn() 105 | if np.all(np.abs(np_X) > 0.1): 106 | break 107 | wt = np.exp(npr.randn()) 108 | 109 | X = kayak.Parameter(np_X) 110 | out = kayak.NExp(X, weight=wt) 111 | 112 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 113 | 114 | def test_tensor_value(): 115 | npr.seed(9) 116 | 117 | for ii in xrange(NUM_TRIALS): 118 | np_X = npr.randn(10,20,5) 119 | wt = np.exp(npr.randn()) 120 | 121 | X = kayak.Parameter(np_X) 122 | out = kayak.NExp(X, weight=wt) 123 | 124 | assert close_float(out.value, wt * np.sum(1.0 - np.exp(-np.abs(np_X)))) 125 | 126 | def test_tensor_grad(): 127 | npr.seed(10) 128 | 129 | for ii in xrange(NUM_TRIALS): 130 | while True: 131 | np_X = npr.randn() 132 | if np.all(np.abs(np_X) > 0.1): 133 | break 134 | wt = np.exp(npr.randn()) 135 | 136 | X = kayak.Parameter(np_X) 137 | out = kayak.NExp(X, weight=wt) 138 | 139 | assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF 140 | 141 | -------------------------------------------------------------------------------- /tests/test_OperatorAdd.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | # These behaviors requires prepending singeltons. Do we want to keep them? 8 | # def test_0d_opplus_2d_scalar_value(): 9 | # npr.seed(1) 10 | 11 | # for ii in xrange(NUM_TRIALS): 12 | # npX1 = npr.randn(1, 1) 13 | # X1 = kayak.Parameter( npX1 ) 14 | # npX2 = np.sum(npr.randn()) # generates a scalar with shape () 15 | # X2= kayak.Parameter( npX2 ) 16 | # # Y = kayak.MatAdd(X1, X2) 17 | # Y = X1+X2 18 | 19 | # # Verify that a scalar is reproduced. 20 | # assert close_float(Y.value, npX1 + npX2) 21 | 22 | # def test_0d_plus_2d_scalar_grad(): 23 | # npr.seed(2) 24 | # for ii in xrange(NUM_TRIALS): 25 | # npX1 = npr.randn(1, 1) 26 | # X1 = kayak.Parameter( npX1 ) 27 | # npX2 = np.sum(npr.randn()) # generates a scalar with shape () 28 | # X2= kayak.Parameter( npX2 ) 29 | # Y = X1+X2 30 | 31 | # # Verify that the gradient is one. 32 | # assert Y.grad(X1) == 1.0 33 | # assert Y.grad(X2) == 1.0 34 | # assert kayak.util.checkgrad(X1, Y) < MAX_GRAD_DIFF 35 | # assert kayak.util.checkgrad(X2, Y) < MAX_GRAD_DIFF 36 | # 37 | def test_matadd_values_1(): 38 | npr.seed(1) 39 | 40 | for ii in xrange(NUM_TRIALS): 41 | 42 | np_A = npr.randn(5,6) 43 | np_B = npr.randn(5,6) 44 | A = kayak.Parameter(np_A) 45 | B = kayak.Parameter(np_B) 46 | C = A+B 47 | 48 | assert C.shape == np_A.shape 49 | assert np.all( close_float(C.value, np_A+np_B)) 50 | 51 | def test_matadd_values_2(): 52 | npr.seed(2) 53 | 54 | for ii in xrange(NUM_TRIALS): 55 | 56 | np_A = npr.randn(5,6) 57 | np_B = npr.randn(5,6) 58 | np_C = npr.randn(5,6) 59 | A = kayak.Parameter(np_A) 60 | B = kayak.Parameter(np_B) 61 | C = kayak.Parameter(np_C) 62 | D = A+B+C 63 | 64 | assert D.shape == np_A.shape 65 | assert np.all( close_float(D.value, np_A+np_B+np_C)) 66 | 67 | def test_matadd_values_3(): 68 | npr.seed(3) 69 | 70 | for ii in xrange(NUM_TRIALS): 71 | 72 | np_A = npr.randn(5,6) 73 | np_B = npr.randn(1,6) 74 | A = kayak.Parameter(np_A) 75 | B = kayak.Parameter(np_B) 76 | C = A+B 77 | 78 | assert C.shape == (5,6) 79 | assert np.all( close_float(C.value, np_A+np_B)) 80 | 81 | def test_matadd_values_4(): 82 | npr.seed(4) 83 | 84 | for ii in xrange(NUM_TRIALS): 85 | 86 | np_A = npr.randn(5,6) 87 | np_B = npr.randn(5,1) 88 | A = kayak.Parameter(np_A) 89 | B = kayak.Parameter(np_B) 90 | C = A+B 91 | 92 | assert C.shape == (5,6) 93 | assert np.all( close_float(C.value, np_A+np_B)) 94 | 95 | def test_matadd_values_5(): 96 | npr.seed(5) 97 | 98 | for ii in xrange(NUM_TRIALS): 99 | 100 | np_A = npr.randn(1,6) 101 | np_B = npr.randn(5,1) 102 | A = kayak.Parameter(np_A) 103 | B = kayak.Parameter(np_B) 104 | C = A+B 105 | 106 | assert C.shape == (5,6) 107 | assert np.all( close_float(C.value, np_A+np_B)) 108 | 109 | def test_matadd_values_6(): 110 | npr.seed(6) 111 | 112 | for ii in xrange(NUM_TRIALS): 113 | 114 | np_A = npr.randn(5,6) 115 | np_B = npr.randn(1,1) 116 | A = kayak.Parameter(np_A) 117 | B = kayak.Parameter(np_B) 118 | C = A+B 119 | 120 | assert C.shape == (5,6) 121 | assert np.all( close_float(C.value, np_A+np_B)) 122 | 123 | def test_matadd_values_7(): 124 | npr.seed(7) 125 | 126 | for ii in xrange(NUM_TRIALS): 127 | 128 | np_A = npr.randn(5,6) 129 | np_B = npr.randn(5,6) 130 | A = kayak.Parameter(np_A) 131 | B = kayak.Parameter(np_B) 132 | D = A+B+A 133 | 134 | assert D.shape == (5,6) 135 | assert np.all( close_float(D.value, 2*np_A + np_B)) 136 | 137 | def test_matadd_grad_1(): 138 | npr.seed(8) 139 | 140 | for ii in xrange(NUM_TRIALS): 141 | 142 | np_A = npr.randn(5,6) 143 | np_B = npr.randn(5,6) 144 | A = kayak.Parameter(np_A) 145 | B = kayak.Parameter(np_B) 146 | C = A+B 147 | D = kayak.MatSum(C) 148 | 149 | D.value 150 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 151 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 152 | 153 | def test_matadd_grad_2(): 154 | npr.seed(9) 155 | 156 | for ii in xrange(NUM_TRIALS): 157 | 158 | np_A = npr.randn(5,6) 159 | np_B = npr.randn(5,6) 160 | np_C = npr.randn(5,6) 161 | A = kayak.Parameter(np_A) 162 | B = kayak.Parameter(np_B) 163 | C = kayak.Parameter(np_C) 164 | D = A+B+C 165 | E = kayak.MatSum(D) 166 | 167 | E.value 168 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 169 | assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF 170 | assert kayak.util.checkgrad(C, E) < MAX_GRAD_DIFF 171 | 172 | def test_matadd_grad_3(): 173 | npr.seed(10) 174 | 175 | for ii in xrange(NUM_TRIALS): 176 | 177 | np_A = npr.randn(5,6) 178 | np_B = npr.randn(1,6) 179 | A = kayak.Parameter(np_A) 180 | B = kayak.Parameter(np_B) 181 | C = A+B 182 | D = kayak.MatSum(C) 183 | 184 | D.value 185 | print np_A.shape, D.grad(A).shape 186 | print np_B.shape, D.grad(B).shape 187 | assert D.grad(A).shape == np_A.shape 188 | assert D.grad(B).shape == np_B.shape 189 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 190 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 191 | 192 | def test_matadd_grad_4(): 193 | npr.seed(11) 194 | 195 | for ii in xrange(NUM_TRIALS): 196 | 197 | np_A = npr.randn(5,1) 198 | np_B = npr.randn(5,6) 199 | A = kayak.Parameter(np_A) 200 | B = kayak.Parameter(np_B) 201 | C = A+B 202 | D = kayak.MatSum(C) 203 | 204 | D.value 205 | assert D.grad(A).shape == np_A.shape 206 | assert D.grad(B).shape == np_B.shape 207 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 208 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 209 | 210 | def test_matadd_grad_5(): 211 | npr.seed(12) 212 | 213 | for ii in xrange(NUM_TRIALS): 214 | 215 | np_A = npr.randn(5,1) 216 | np_B = npr.randn(1,6) 217 | A = kayak.Parameter(np_A) 218 | B = kayak.Parameter(np_B) 219 | C = A+B 220 | D = kayak.MatSum(C) 221 | 222 | D.value 223 | assert D.grad(A).shape == np_A.shape 224 | assert D.grad(B).shape == np_B.shape 225 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 226 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 227 | 228 | def test_matadd_grad_6(): 229 | npr.seed(13) 230 | 231 | for ii in xrange(NUM_TRIALS): 232 | 233 | np_A = npr.randn(5,6) 234 | np_B = npr.randn(1,1) 235 | A = kayak.Parameter(np_A) 236 | B = kayak.Parameter(np_B) 237 | C = A+B 238 | D = kayak.MatSum(C) 239 | 240 | D.value 241 | assert D.grad(A).shape == np_A.shape 242 | assert D.grad(B).shape == np_B.shape 243 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 244 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 245 | 246 | def test_matadd_grad_7(): 247 | npr.seed(14) 248 | 249 | for ii in xrange(NUM_TRIALS): 250 | 251 | np_A = npr.randn(5,6) 252 | np_B = npr.randn(5,6) 253 | A = kayak.Parameter(np_A) 254 | B = kayak.Parameter(np_B) 255 | D = A+B+A 256 | E = kayak.MatSum(D) 257 | 258 | E.value 259 | assert E.grad(A).shape == np_A.shape 260 | assert E.grad(B).shape == np_B.shape 261 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 262 | assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF 263 | 264 | def test_matadd_grad_8(): 265 | npr.seed(15) 266 | 267 | for ii in xrange(NUM_TRIALS): 268 | 269 | np_A = npr.randn(5,6) 270 | np_B = npr.randn(5,6) 271 | A = kayak.Parameter(np_A) 272 | D = A+A 273 | E = kayak.MatSum(D) 274 | 275 | E.value 276 | assert E.grad(A).shape == np_A.shape 277 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 278 | -------------------------------------------------------------------------------- /tests/test_OperatorMult.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_elemmult_values_1(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | 13 | np_A = npr.randn(5,6) 14 | np_B = npr.randn(5,6) 15 | A = kayak.Parameter(np_A) 16 | B = kayak.Parameter(np_B) 17 | C = A*B 18 | 19 | assert C.shape == np_A.shape 20 | assert np.all( close_float(C.value, np_A*np_B)) 21 | 22 | def test_elemmult_values_2(): 23 | npr.seed(2) 24 | 25 | for ii in xrange(NUM_TRIALS): 26 | 27 | np_A = npr.randn(5,6) 28 | np_B = npr.randn(5,6) 29 | np_C = npr.randn(5,6) 30 | A = kayak.Parameter(np_A) 31 | B = kayak.Parameter(np_B) 32 | C = kayak.Parameter(np_C) 33 | D = A*B*C 34 | 35 | assert D.shape == np_A.shape 36 | assert np.all( close_float(D.value, np_A*np_B*np_C)) 37 | 38 | def test_elemmult_values_3(): 39 | npr.seed(7) 40 | 41 | for ii in xrange(NUM_TRIALS): 42 | 43 | np_A = npr.randn(5,6) 44 | np_B = npr.randn(5,6) 45 | A = kayak.Parameter(np_A) 46 | B = kayak.Parameter(np_B) 47 | D = A*B*A 48 | 49 | assert D.shape == (5,6) 50 | assert np.all( close_float(D.value, np_A**2 * np_B)) 51 | 52 | def test_elemmult_values_4(): 53 | npr.seed(1) 54 | 55 | for ii in xrange(NUM_TRIALS): 56 | 57 | np_A = npr.randn(5,6) 58 | np_B = npr.randn(5,1) 59 | A = kayak.Parameter(np_A) 60 | B = kayak.Parameter(np_B) 61 | C = A*B 62 | 63 | assert C.shape == np_A.shape 64 | assert np.all( close_float(C.value, np_A*np_B)) 65 | 66 | def test_elemmult_values_5(): 67 | npr.seed(2) 68 | 69 | for ii in xrange(NUM_TRIALS): 70 | 71 | np_A = npr.randn(5,1) 72 | np_B = npr.randn(1,6) 73 | np_C = npr.randn(1,1) 74 | A = kayak.Parameter(np_A) 75 | B = kayak.Parameter(np_B) 76 | C = kayak.Parameter(np_C) 77 | D = A*B*C 78 | 79 | assert D.shape == (5,6) 80 | assert np.all( close_float(D.value, np_A*np_B*np_C)) 81 | 82 | def test_elemmult_values_6(): 83 | npr.seed(7) 84 | 85 | for ii in xrange(NUM_TRIALS): 86 | 87 | np_A = npr.randn(5,6) 88 | np_B = npr.randn(1, 1) 89 | A = kayak.Parameter(np_A) 90 | B = kayak.Parameter(np_B) 91 | D = A*B*A 92 | 93 | assert D.shape == (5,6) 94 | assert np.all( close_float(D.value, np_A**2 * np_B)) 95 | 96 | def test_elemmult_grad_1(): 97 | npr.seed(8) 98 | 99 | for ii in xrange(NUM_TRIALS): 100 | 101 | np_A = npr.randn(5,6) 102 | np_B = npr.randn(5,6) 103 | A = kayak.Parameter(np_A) 104 | B = kayak.Parameter(np_B) 105 | C = A*B 106 | D = kayak.MatSum(C) 107 | 108 | D.value 109 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 110 | assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF 111 | 112 | def test_elemmult_grad_2(): 113 | npr.seed(9) 114 | 115 | for ii in xrange(NUM_TRIALS): 116 | 117 | np_A = npr.randn(5,6) 118 | np_B = npr.randn(5,6) 119 | np_C = npr.randn(5,6) 120 | A = kayak.Parameter(np_A) 121 | B = kayak.Parameter(np_B) 122 | C = kayak.Parameter(np_C) 123 | D = A*B*C 124 | E = kayak.MatSum(D) 125 | 126 | E.value 127 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 128 | assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF 129 | assert kayak.util.checkgrad(C, E) < MAX_GRAD_DIFF 130 | 131 | def test_elemmult_grad_3(): 132 | npr.seed(14) 133 | 134 | for ii in xrange(NUM_TRIALS): 135 | 136 | np_A = npr.randn(5,6) 137 | np_B = npr.randn(5,6) 138 | A = kayak.Parameter(np_A) 139 | B = kayak.Parameter(np_B) 140 | D = A*B*A 141 | E = kayak.MatSum(D) 142 | 143 | E.value 144 | assert E.grad(A).shape == np_A.shape 145 | assert E.grad(B).shape == np_B.shape 146 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 147 | assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF 148 | 149 | def test_elemmult_grad_4(): 150 | npr.seed(15) 151 | 152 | for ii in xrange(NUM_TRIALS): 153 | 154 | np_A = npr.randn(5,6) 155 | np_B = npr.randn(5,6) 156 | A = kayak.Parameter(np_A) 157 | D = A*A 158 | E = kayak.MatSum(D) 159 | 160 | E.value 161 | assert E.grad(A).shape == np_A.shape 162 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 163 | -------------------------------------------------------------------------------- /tests/test_OperatorNeg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_neg_values_1(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | 13 | np_A = npr.randn(5,6) 14 | A = kayak.Parameter(np_A) 15 | C = -A 16 | 17 | assert C.shape == np_A.shape 18 | assert np.all( close_float(C.value, -np_A)) 19 | 20 | def test_neg_values_2(): 21 | npr.seed(2) 22 | 23 | for ii in xrange(NUM_TRIALS): 24 | 25 | np_A = npr.randn(1) 26 | A = kayak.Parameter(np_A) 27 | D = -A 28 | 29 | assert D.shape == np_A.shape 30 | assert np.all( close_float(D.value, -np_A)) 31 | 32 | def test_neg_grad_1(): 33 | npr.seed(8) 34 | 35 | for ii in xrange(NUM_TRIALS): 36 | 37 | np_A = npr.randn(5,6) 38 | A = kayak.Parameter(np_A) 39 | C = -A 40 | D = kayak.MatSum(C) 41 | 42 | D.value 43 | assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF 44 | 45 | def test_neg_grad_2(): 46 | npr.seed(9) 47 | 48 | for ii in xrange(NUM_TRIALS): 49 | 50 | np_A = npr.randn(1) 51 | A = kayak.Parameter(np_A) 52 | D = -A 53 | E = kayak.MatSum(D) 54 | 55 | E.value 56 | assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF 57 | -------------------------------------------------------------------------------- /tests/test_Parameter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HIPS/Kayak/1a7d4baa849bbd5a6f6d0486136169899cf25523/tests/test_Parameter.py -------------------------------------------------------------------------------- /tests/test_Reshape.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_reshape_1(): 9 | npr.seed(1) 10 | 11 | np_A = npr.randn(5,10) 12 | A = kayak.Parameter(np_A) 13 | B = kayak.Reshape(A, (25,2)) 14 | 15 | B.value 16 | assert B.shape == (25,2) 17 | 18 | def test_reshape_2(): 19 | npr.seed(2) 20 | 21 | np_A = npr.randn(5,10) 22 | A = kayak.Parameter(np_A) 23 | B = kayak.Reshape(A, (2,25)) 24 | C = kayak.Parameter(npr.randn(25,5)) 25 | D = kayak.MatMult(B, C) 26 | out = kayak.MatSum(D) 27 | 28 | out.value 29 | assert out.grad(A).shape == np_A.shape 30 | assert kayak.util.checkgrad(A, out) < MAX_GRAD_DIFF 31 | 32 | 33 | -------------------------------------------------------------------------------- /tests/test_SoftMax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_softmax_values_1(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | 13 | np_X = npr.randn(5,6) 14 | X = kayak.Parameter(np_X) 15 | Y = kayak.SoftMax(X, axis=None) 16 | 17 | np_Y = np.exp(np_X) 18 | # np_Y = np_Y / np.sum(np_Y, axis=1)[:,np.newaxis] 19 | np_Y = np_Y / np.sum(np_Y) 20 | 21 | assert Y.shape == np_X.shape 22 | assert np.all(close_float(Y.value, np_Y)) 23 | 24 | def test_softmax_values_2(): 25 | npr.seed(2) 26 | 27 | for ii in xrange(NUM_TRIALS): 28 | 29 | np_X = npr.randn(5,6) 30 | X = kayak.Parameter(np_X) 31 | Y = kayak.SoftMax(X, axis=0) 32 | 33 | np_Y = np.exp(np_X) 34 | np_Y = np_Y / np.sum(np_Y, axis=0, keepdims=True) 35 | 36 | assert Y.shape == np_X.shape 37 | assert np.all(close_float(Y.value, np_Y)) 38 | 39 | def test_softmax_grad_1(): 40 | npr.seed(3) 41 | 42 | for ii in xrange(NUM_TRIALS): 43 | 44 | np_X = npr.randn(5,6) 45 | X = kayak.Parameter(np_X) 46 | Y = kayak.SoftMax(X) 47 | Z = kayak.MatSum(Y * Y) 48 | 49 | assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF 50 | 51 | def test_softmax_grad_2(): 52 | npr.seed(4) 53 | 54 | for ii in xrange(NUM_TRIALS): 55 | 56 | np_X = npr.randn(5,6) 57 | X = kayak.Parameter(np_X) 58 | Y = kayak.SoftMax(X, axis=0) 59 | Z = kayak.MatSum(Y * Y) 60 | 61 | assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF 62 | 63 | def test_softmax_grad_3(): 64 | npr.seed(5) 65 | 66 | for ii in xrange(NUM_TRIALS): 67 | 68 | np_X = npr.randn(5,6) 69 | np_T = npr.randint(0, 10, np_X.shape) 70 | X = kayak.Parameter(np_X) 71 | T = kayak.Targets(np_T) 72 | Y = kayak.SoftMax(X) 73 | Z = kayak.MatSum(kayak.LogMultinomialLoss(Y, T)) 74 | 75 | assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF 76 | 77 | -------------------------------------------------------------------------------- /tests/test_SoftReLU.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_softrelu_values(): 9 | npr.seed(1) 10 | 11 | for ii in xrange(NUM_TRIALS): 12 | np_X = npr.randn(6,5) 13 | X = kayak.Parameter(np_X) 14 | Y = kayak.SoftReLU(X) 15 | 16 | assert np.all( Y.value >= 0.0 ) 17 | assert np.all(close_float(np.log(1.0 + np.exp(np_X)), Y.value)) 18 | 19 | def test_softrelu_grad(): 20 | npr.seed(2) 21 | 22 | for ii in xrange(NUM_TRIALS): 23 | np_X = npr.randn(6,5) 24 | X = kayak.Parameter(np_X) 25 | Y = kayak.SoftReLU(X) 26 | Z = kayak.MatSum(Y) 27 | 28 | Z.value 29 | assert np.all( Z.grad(X) >= 0.0 ) 30 | assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF 31 | -------------------------------------------------------------------------------- /tests/test_Stacking.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | from nose.tools import assert_less 8 | 9 | def test_stacking_values(): 10 | npr.seed(1) 11 | 12 | for ii in xrange(NUM_TRIALS): 13 | np_A = npr.randn(6,10) 14 | np_B = npr.randn(6,5) 15 | A = kayak.Parameter(np_A) 16 | B = kayak.Parameter(np_B) 17 | Y = kayak.Hstack(A,B) 18 | 19 | assert(np.array_equal(Y.value[:, :A.shape[1]], np_A)) 20 | assert(np.array_equal(Y.value[:, A.shape[1]:], np_B)) 21 | 22 | def test_stacking_grad(): 23 | npr.seed(2) 24 | 25 | for ii in xrange(NUM_TRIALS): 26 | np_A = npr.randn(6,10) 27 | np_B = npr.randn(6,5) 28 | A = kayak.Parameter(np_A) 29 | B = kayak.Parameter(np_B) 30 | Y = kayak.Hstack(A, B) 31 | Z = kayak.MatSum(Y) 32 | 33 | Z.value 34 | assert_less(kayak.util.checkgrad(A, Z), MAX_GRAD_DIFF) 35 | assert_less(kayak.util.checkgrad(B, Z), MAX_GRAD_DIFF) 36 | -------------------------------------------------------------------------------- /tests/test_TanH.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | from nose.tools import assert_less 8 | 9 | def test_tanh_values(): 10 | npr.seed(1) 11 | 12 | for ii in xrange(NUM_TRIALS): 13 | np_X = npr.randn(6,5) 14 | X = kayak.Parameter(np_X) 15 | Y = kayak.TanH(X) 16 | 17 | assert np.all(close_float(np.tanh(np_X), Y.value)) 18 | 19 | def test_tanh_grad(): 20 | npr.seed(2) 21 | 22 | for ii in xrange(NUM_TRIALS): 23 | np_X = npr.randn(6,5) 24 | X = kayak.Parameter(np_X) 25 | Y = kayak.TanH(X) 26 | Z = kayak.MatSum(Y) 27 | 28 | Z.value 29 | assert np.all( Z.grad(X) >= 0.0 ) 30 | assert_less(kayak.util.checkgrad(X, Z), MAX_GRAD_DIFF) 31 | -------------------------------------------------------------------------------- /tests/test_Targets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HIPS/Kayak/1a7d4baa849bbd5a6f6d0486136169899cf25523/tests/test_Targets.py -------------------------------------------------------------------------------- /tests/test_TensorMult.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def check_tensormult(A_shape, B_shape, axes): 9 | 10 | np_A = npr.randn(*A_shape) 11 | np_B = npr.randn(*B_shape) 12 | A = kayak.Parameter(np_A) 13 | B = kayak.Parameter(np_B) 14 | C = kayak.TensorMult(A, B, axes) 15 | D = kayak.Parameter(npr.randn(*C.shape)) 16 | L = kayak.MatSum(kayak.ElemMult(C, D)) 17 | 18 | assert np.all(close_float(C.value, np.tensordot(np_A, np_B, axes))) 19 | assert kayak.util.checkgrad(A, L) < MAX_GRAD_DIFF 20 | assert kayak.util.checkgrad(B, L) < MAX_GRAD_DIFF 21 | 22 | def test_matmult_grad_1(): 23 | check_tensormult((3, 4), (4, 5), ((1,), (0,))) 24 | 25 | def test_matmult_grad_2(): 26 | check_tensormult((4, 3), (5, 4), ((0,), (1,))) 27 | 28 | def test_matmult_grad_3(): 29 | check_tensormult((3, 4), (4, 5, 6), ((1,), (0,))) 30 | 31 | def test_matmult_grad_4(): 32 | check_tensormult((2, 3, 4), (5, 7, 4, 3), ((1, 2), (3, 2))) 33 | check_tensormult((2, 3, 4), (5, 7, 4, 3), ((2, 1), (2, 3))) 34 | -------------------------------------------------------------------------------- /tests/test_Transpose.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.random as npr 3 | 4 | import kayak 5 | 6 | from . import * 7 | 8 | def test_transpose_1(): 9 | npr.seed(1) 10 | 11 | np_A = npr.randn(5,10) 12 | A = kayak.Parameter(np_A) 13 | B = kayak.Transpose(A) 14 | 15 | B.value 16 | assert B.shape == (10,5) 17 | for ii in xrange(np_A.shape[0]): 18 | for jj in xrange(np_A.shape[1]): 19 | assert np_A[ii,jj] == B.value[jj,ii] 20 | 21 | def test_transpose_2(): 22 | npr.seed(2) 23 | 24 | np_A = npr.randn(5,10,15) 25 | A = kayak.Parameter(np_A) 26 | B = kayak.Transpose(A) 27 | 28 | B.value 29 | assert B.shape == (15,10,5) 30 | for ii in xrange(np_A.shape[0]): 31 | for jj in xrange(np_A.shape[1]): 32 | for kk in xrange(np_A.shape[2]): 33 | assert np_A[ii,jj,kk] == B.value[kk,jj,ii] 34 | 35 | def test_transpose_3(): 36 | npr.seed(3) 37 | 38 | np_A = npr.randn(5,10) 39 | A = kayak.Parameter(np_A) 40 | B = kayak.Transpose(A) 41 | C = kayak.Parameter(npr.randn(5,5)) 42 | D = kayak.MatMult(B, C) 43 | out = kayak.MatSum(D) 44 | 45 | out.value 46 | assert out.grad(A).shape == np_A.shape 47 | assert kayak.util.checkgrad(A, out) < MAX_GRAD_DIFF 48 | 49 | def test_transpose_3(): 50 | npr.seed(3) 51 | 52 | np_A = npr.randn(5,10) 53 | A = kayak.Parameter(np_A) 54 | B = kayak.Transpose(A) 55 | C = kayak.Parameter(npr.randn(5,5)) 56 | D = kayak.MatMult(B, C) 57 | out = kayak.MatSum(D) 58 | 59 | out.value 60 | assert out.grad(A).shape == np_A.shape 61 | assert kayak.util.checkgrad(A, out) < MAX_GRAD_DIFF 62 | --------------------------------------------------------------------------------