├── .gitignore
├── README.md
├── examples
    ├── data.py
    ├── linreg_example.py
    ├── mnist_logreg.py
    ├── mnist_nnet.py
    ├── nnet_example.py
    └── poisson_glm.py
├── kayak
    ├── __init__.py
    ├── batcher.py
    ├── convolution.py
    ├── crossval.py
    ├── differentiable.py
    ├── dropout.py
    ├── elem_ops.py
    ├── generic_ops.py
    ├── indexing.py
    ├── input_checking.py
    ├── losses.py
    ├── matrix_ops.py
    ├── nonlinearities.py
    ├── regularizers.py
    ├── root_nodes.py
    ├── stacking.py
    └── util.py
├── license.txt
├── setup.py
└── tests
    ├── __init__.py
    ├── check_MemoryUse.py
    ├── test_BatchNormalize.py
    ├── test_Batcher.py
    ├── test_CacheFreshness.py
    ├── test_Constant.py
    ├── test_Convolve1d.py
    ├── test_Dropout.py
    ├── test_ElemAbs.py
    ├── test_ElemExp.py
    ├── test_ElemMult.py
    ├── test_ElemPower.py
    ├── test_Graphs.py
    ├── test_HardReLU.py
    ├── test_Horseshoe.py
    ├── test_Identity.py
    ├── test_Indexing.py
    ├── test_Inputs.py
    ├── test_L1Norm.py
    ├── test_L2Loss.py
    ├── test_L2Norm.py
    ├── test_LogMultinomialLoss.py
    ├── test_LogSoftMax.py
    ├── test_Logistic.py
    ├── test_MatAdd.py
    ├── test_MatConcat.py
    ├── test_MatDet.py
    ├── test_MatMean.py
    ├── test_MatMult.py
    ├── test_MatSum.py
    ├── test_NExp.py
    ├── test_OperatorAdd.py
    ├── test_OperatorMult.py
    ├── test_OperatorNeg.py
    ├── test_Parameter.py
    ├── test_Reshape.py
    ├── test_SoftMax.py
    ├── test_SoftReLU.py
    ├── test_Stacking.py
    ├── test_TanH.py
    ├── test_Targets.py
    ├── test_TensorMult.py
    └── test_Transpose.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.gz
2 | *.pyc
3 | *.pyo
4 | .DS_Store
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Don't use this: use [Autograd](http://github.com/hips/autograd) instead!
  2 | =======================================
  3 | 
  4 | Kayak: Library for Deep Neural Networks
  5 | =======================================
  6 | 
  7 | This is a library that implements some useful modules and provides
  8 | automatic differentiation utilities for learning deep neural networks.
  9 | It is similar in spirit to tools like
 10 | [Theano](http://deeplearning.net/software/theano/) and
 11 | [Torch](http://torch.ch/).  The objective of Kayak is to be simple to
 12 | use and extend, for rapid prototyping in Python.  It is unlikely to be
 13 | faster than these other tools, although it is competitive and
 14 | sometimes faster in performance when the architectures are highly
 15 | complex.  It will certainly not be faster on convolutional
 16 | architectures for visual object detection and recognition tasks than,
 17 | e.g., [Alex Krizhevsky's CUDA
 18 | Convnet](https://code.google.com/p/cuda-convnet2/) or
 19 | [Caffe](http://caffe.berkeleyvision.org/).  The point of Kayak is to
 20 | be able to experiment in Python with patterns that look a lot like
 21 | what you're already used to with Numpy.  It makes it easy to manage
 22 | batches of data and compute gradients with backpropagation.
 23 | 
 24 | There are some examples in the 'examples' directory, but the main idea
 25 | looks like this:
 26 | 
 27 |     import kayak
 28 |     import numpy.random as npr
 29 | 
 30 |     X = ... your feature matrix ...
 31 |     Y = ... your label matrix ...
 32 | 
 33 |     # Create Kayak objects for features and labels.
 34 |     inputs  = kayak.Inputs(X)
 35 |     targets = kayak.Targets(Y)
 36 | 
 37 |     # Create Kayak objects first-layer weights and biases.  Initialize
 38 |     # them with random Numpy matrices.
 39 |     weights_1 = kayak.Parameter(npr.randn( input_dims, hidsize_1 ))
 40 |     biases_1  = kayak.Parameter(npr.randn( 1, hidsize_1 ))
 41 | 
 42 |     # Create Kayak objects that implement a network layer.  First,
 43 |     # multiply the features by weights and add biases.
 44 |     hiddens_1a = kayak.ElemAdd(kayak.MatMult( inputs, weights_1 ), biases_1)
 45 | 
 46 |     # Then, apply a "relu" (rectified linear) nonlinearity.
 47 |     # Alternatively, you can apply your own favorite nonlinearity, or
 48 |     # add one for an idea that you want to try out.
 49 |     hiddens_1b = kayak.HardReLU(hiddens_1a)
 50 | 
 51 |     # Now, apply a "dropout" layer to prevent co-adaptation.  Got a
 52 |     # new idea for dropout?  It's super easy to extend Kayak with it.
 53 |     hiddens_1 = kayak.Dropout(hiddens_1b, drop_prob=0.5)
 54 | 
 55 |     # Okay, with that layer constructed, let's make another one the
 56 |     # same way: linear transformation + bias with ReLU and dropout.
 57 |     # First, create the second-layer parameters.
 58 |     weights_2 = kayak.Parameter(npr.randn(hidsize_1, hidsize_2))
 59 |     biases_2  = kayak.Parameter(npr.randn(1, hidsize_2))
 60 | 
 61 |     # This time, let's compose all the steps, just to show we can.
 62 |     hiddens_2 = kayak.Dropout( kayak.HardReLU( kayak.ElemAdd( \
 63 |                     kayak.MatMult( hiddens_1, weights_2), biases_2)), drop_prob=0.5)
 64 | 
 65 |     # Make the output layer linear.
 66 |     weights_out = kayak.Parameter(npr.randn(hidsize_2, 1))
 67 |     biases_out  = kayak.Parameter(npr.randn())
 68 |     out         = kayak.ElemAdd( kayak.MatMult( hiddens_2, weights_out), biases_out)
 69 | 
 70 |     # Apply a loss function.  In this case, we'll just do squared loss.
 71 |     loss = kayak.MatSum( kayak.L2Loss( out, targets ))
 72 | 
 73 |     # Maybe roll in an L1 norm for the first layer and an L2 norm for the others?
 74 |     objective = kayak.ElemAdd(loss,
 75 |                               kayak.L1Norm(weights_1, weight=100.0),
 76 |                               kayak.L2Norm(weights_2, weight=50.0),
 77 |                               kayak.L2Norm(weights_out, weight=3.0))
 78 | 
 79 |     # This is the fun part and is the whole point of Kayak.  You can
 80 |     # now get the gradient of anything in terms of anything else.
 81 |     # Probably, if you're doing neural networks, you want the gradient
 82 |     # of the parameters in terms of the overall objective. That way
 83 |     # you can go off and do some kind of optimization.
 84 |     weights_1_grad   = objective.grad(weights_1)
 85 |     biases_1_grad    = objective.grad(biases_1)
 86 |     weights_2_grad   = objective.grad(weights_2)
 87 |     biases_2_grad    = objective.grad(biases_2)
 88 |     weights_out_grad = objective.grad(weights_out)
 89 |     biases_out-grad  = objective.grad(biases_out)
 90 | 
 91 |     ... use the gradients for learning ...
 92 |     ... probably this whole thing would be in a loop ...
 93 |     ... in practice you'd probably also use minibatches ...
 94 | 
 95 | This is a work in progress and we welcome contributions. Some
 96 | nosetests are implemented.  We're working on documentation.  Whatever
 97 | docs come into existence will end up at
 98 | [http://hips.gihub.io/Kayak](http://hips.gihub.io/Kayak).
 99 | 
100 | This project is primarily develped by the [Harvard Intelligent
101 | Probabilistic Systems (HIPS)](http://hips.seas.harvard.edu) group in
102 | the [Harvard School of Engineering and Applied Sciences
103 | (SEAS)](http://www.seas.harvard.edu).  The primary developers to date
104 | have been Ryan Adams, David Duvenaud, Scott Linderman, Dougal
105 | Maclaurin, and Jasper Snoek.
106 | 
107 | Kayak is Copyrighted by The President and Fellows of Harvard
108 | University, and is distributed under an MIT license, which can be
109 | found in the license.txt file but is also below:
110 | 
111 | Permission is hereby granted, free of charge, to any person obtaining a copy
112 | of this software and associated documentation files (the "Software"), to deal
113 | in the Software without restriction, including without limitation the rights
114 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
115 | copies of the Software, and to permit persons to whom the Software is
116 | furnished to do so, subject to the following conditions:
117 | 
118 | The above copyright notice and this permission notice shall be included in all
119 | copies or substantial portions of the Software.
120 | 
121 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
122 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
123 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
124 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
125 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
126 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
127 | SOFTWARE.
128 | 


--------------------------------------------------------------------------------
/examples/data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import urllib
 3 | import gzip
 4 | import struct
 5 | import array
 6 | import numpy as np
 7 | 
 8 | def download(url, filename):
 9 |     if not os.path.exists('data'):
10 |         os.makedirs('data')
11 |     out_file = os.path.join('data', filename)
12 |     if not os.path.isfile(out_file):
13 |         urllib.urlretrieve(url, out_file)
14 | 
15 | def mnist():
16 |     base_url = 'http://yann.lecun.com/exdb/mnist/'
17 | 
18 |     def parse_labels(filename):
19 |         with gzip.open(filename, 'rb') as fh:
20 |             magic, num_data = struct.unpack(">II", fh.read(8))
21 |             return np.array(array.array("B", fh.read()), dtype=np.uint8)
22 | 
23 |     def parse_images(filename):
24 |         with gzip.open(filename, 'rb') as fh:
25 |             magic, num_data, rows, cols = struct.unpack(">IIII", fh.read(16))
26 |             return np.array(array.array("B", fh.read()), dtype=np.uint8).reshape(num_data, rows, cols)
27 | 
28 |     for filename in ['train-images-idx3-ubyte.gz',
29 |                      'train-labels-idx1-ubyte.gz',
30 |                      't10k-images-idx3-ubyte.gz',
31 |                      't10k-labels-idx1-ubyte.gz']:
32 |         download(base_url + filename, filename)
33 | 
34 |     train_images = parse_images('data/train-images-idx3-ubyte.gz')
35 |     train_labels = parse_labels('data/train-labels-idx1-ubyte.gz')
36 |     test_images  = parse_images('data/t10k-images-idx3-ubyte.gz')
37 |     test_labels  = parse_labels('data/t10k-labels-idx1-ubyte.gz')
38 | 
39 |     return train_images, train_labels, test_images, test_labels
40 | 


--------------------------------------------------------------------------------
/examples/linreg_example.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import sys
 5 | sys.path.append('..')
 6 | 
 7 | import kayak
 8 | 
 9 | N = 10000
10 | D = 5
11 | P = 3
12 | learn = 0.00001
13 | batch_size = 500
14 | 
15 | # Random inputs.
16 | X = npr.randn(N,D)
17 | true_W = npr.randn(D,P)
18 | Y = np.dot(X, true_W) + 0.1*npr.randn(N,P)
19 | 
20 | kyk_batcher = kayak.Batcher(batch_size, N)
21 | 
22 | # Build network.
23 | kyk_inputs = kayak.Inputs(X, kyk_batcher)
24 | 
25 | # Labels.
26 | kyk_targets = kayak.Targets(Y, kyk_batcher)
27 | 
28 | # Weights.
29 | W = 0.01*npr.randn(D,P)
30 | kyk_W = kayak.Parameter(W)
31 | 
32 | # Linear layer.
33 | kyk_out = kayak.MatMult( kyk_inputs, kyk_W )
34 | 
35 | # Elementwise Loss.
36 | kyk_el_loss = kayak.L2Loss(kyk_out, kyk_targets)
37 | 
38 | # Sum the losses.
39 | kyk_loss = kayak.MatSum( kyk_el_loss )
40 | 
41 | for ii in xrange(100):
42 | 
43 |     for batch in kyk_batcher:
44 |         loss = kyk_loss.value
45 |         print loss, np.sum((kyk_W.value - true_W)**2)
46 |         grad = kyk_loss.grad(kyk_W)
47 |         kyk_W.value -= learn * grad
48 | 


--------------------------------------------------------------------------------
/examples/mnist_logreg.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import data
  3 | import numpy as np
  4 | import numpy.random as npr
  5 | 
  6 | num_folds = 5
  7 | 
  8 | sys.path.append('..')
  9 | import kayak
 10 | 
 11 | # Here I define a nice little training function that takes inputs and targets.
 12 | def train(inputs, targets, batch_size, learn_rate, momentum, l1_weight, l2_weight, dropout):
 13 | 
 14 |     # Create a batcher object.
 15 |     batcher = kayak.Batcher(batch_size, inputs.shape[0])
 16 | 
 17 |     # Inputs and targets need access to the batcher.
 18 |     X    = kayak.Inputs(inputs, batcher)
 19 |     T    = kayak.Targets(targets, batcher)
 20 | 
 21 |     # Weights and biases, with random initializations.
 22 |     W    = kayak.Parameter( 0.1*npr.randn( inputs.shape[1], 10 ))
 23 |     B    = kayak.Parameter( 0.1*npr.randn(1,10) )
 24 | 
 25 |     # Nothing fancy here: inputs times weights, plus bias, then softmax.
 26 |     dropout_layer = kayak.Dropout(X, dropout, batcher=batcher)
 27 |     Y    = kayak.LogSoftMax( kayak.ElemAdd( kayak.MatMult(dropout_layer, W), B ) )
 28 | 
 29 |     # The training loss is negative multinomial log likelihood.
 30 |     loss = kayak.MatAdd(kayak.MatSum(kayak.LogMultinomialLoss(Y, T)),
 31 |                         kayak.L2Norm(W, l2_weight),
 32 |                         kayak.L1Norm(W, l1_weight))
 33 | 
 34 |     # Use momentum for the gradient-based optimization.
 35 |     mom_grad_W = np.zeros(W.shape)
 36 | 
 37 |     # Loop over epochs.
 38 |     for epoch in xrange(10):
 39 | 
 40 |         # Track the total loss and the overall gradient.
 41 |         total_loss   = 0.0
 42 |         total_grad_W = np.zeros(W.shape)
 43 | 
 44 |         # Loop over batches -- using batcher as iterator.
 45 |         for batch in batcher:
 46 |             # Compute the loss of this minibatch by asking the Kayak
 47 |             # object for its value and giving it reset=True.
 48 |             total_loss += loss.value
 49 | 
 50 |             # Now ask the loss for its gradient in terms of the
 51 |             # weights and the biases -- the two things we're trying to
 52 |             # learn here.
 53 |             grad_W = loss.grad(W)
 54 |             grad_B = loss.grad(B)
 55 |             
 56 |             # Use momentum on the weight gradient.
 57 |             mom_grad_W = momentum*mom_grad_W + (1.0-momentum)*grad_W
 58 | 
 59 |             # Now make the actual parameter updates.
 60 |             W.value -= learn_rate * mom_grad_W
 61 |             B.value -= learn_rate * grad_B
 62 | 
 63 |             # Keep track of the gradient to see if we're converging.
 64 |             total_grad_W += grad_W
 65 | 
 66 |         #print epoch, total_loss, np.sum(total_grad_W**2)
 67 | 
 68 |     # After we've trained, we return a sugary little function handle
 69 |     # that makes things easy.  Basically, what we're doing here is
 70 |     # handing the output object (not the loss!) a dictionary where the
 71 |     # key is the Kayak input object 'X' (that is the features being
 72 |     # used here for logistic regression) and the value in that
 73 |     # dictionary is being determined by the argument to the lambda
 74 |     # expression.  The point here is that we wind up with a function
 75 |     # handle the can be called with a numpy object and it produces the
 76 |     # target values for novel data, using the parameters we just learned.
 77 |     
 78 |     def compute_predictions(x):
 79 |         X.data = x
 80 |         batcher.test_mode()
 81 |         return Y.value
 82 | 
 83 |     return compute_predictions
 84 | 
 85 | def evaluate(batch_size, learn_rate, momentum, l1_weight, l2_weight, dropout):
 86 | 
 87 |     # Load in the MNIST data.
 88 |     train_images, train_labels, test_images, test_labels = data.mnist()
 89 | 
 90 |     # Turn the uint8 images into floating-point vectors.
 91 |     train_images = np.reshape(train_images,
 92 |                               (train_images.shape[0],
 93 |                                train_images.shape[1]*train_images.shape[2]))/255.0
 94 | 
 95 |     # Use one-hot coding for the labels.
 96 |     train_labels = kayak.util.onehot(train_labels)
 97 |     test_labels  = kayak.util.onehot(test_labels)
 98 | 
 99 |     # Hand the training data off to a cross-validation object.
100 |     # This will create ten folds and allow us to easily iterate.
101 |     CV = kayak.CrossValidator(num_folds, train_images, train_labels)
102 | 
103 |     valid_acc = 0.0
104 | 
105 |     # Loop over our cross validation folds.
106 |     for ii, fold in enumerate(CV):
107 |     
108 |         # Get the training and validation data, according to this fold.
109 |         train_images, train_labels = fold.train()
110 |         valid_images, valid_labels = fold.valid()
111 | 
112 |         # Train on these data and get a prediction function back.
113 |         pred_func = train(train_images, train_labels, batch_size,
114 |                           learn_rate, momentum, l1_weight, l2_weight, dropout)
115 | 
116 |         # Make predictions on the validation data.
117 |         valid_preds = np.argmax(pred_func( valid_images ), axis=1)
118 | 
119 |         # How did we do?
120 |         acc = np.mean(valid_preds == np.argmax(valid_labels, axis=1))
121 |         print "Fold %02d: %0.6f" % (ii+1, acc)
122 |         valid_acc += acc
123 |     
124 | 
125 |     print "Overall: %0.6f" % (valid_acc / num_folds)
126 |     return valid_acc / num_folds
127 | 
128 | if __name__ == '__main__':
129 |     evaluate( batch_size = 256,
130 |               learn_rate = 0.001,
131 |               momentum   = 0.9,
132 |               l1_weight  = 1.0,
133 |               l2_weight  = 1.0,
134 |               dropout    = 0.2 )
135 | 
136 | 


--------------------------------------------------------------------------------
/examples/mnist_nnet.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import data
  4 | import numpy        as np
  5 | import numpy.random as npr
  6 | 
  7 | sys.path.append('..')
  8 | 
  9 | import kayak
 10 | 
 11 | batch_size     = 256
 12 | learn_rate     = 0.01
 13 | momentum       = 0.9
 14 | layer1_sz      = 500
 15 | layer2_sz      = 500
 16 | layer1_dropout = 0.25
 17 | layer2_dropout = 0.25
 18 | 
 19 | npr.seed(1)
 20 | 
 21 | # Load in the MNIST data.
 22 | train_images, train_labels, test_images, test_labels = data.mnist()
 23 | 
 24 | # Turn the uint8 images into floating-point vectors.
 25 | train_images = np.reshape(train_images,
 26 |                           (train_images.shape[0],
 27 |                            train_images.shape[1]*train_images.shape[2]))/255.0
 28 | 
 29 | # Use one-hot coding for the labels.
 30 | train_labels = kayak.util.onehot(train_labels)
 31 | test_labels  = kayak.util.onehot(test_labels)
 32 | 
 33 | # Hand the training data off to a cross-validation object.
 34 | # This will create ten folds and allow us to easily iterate.
 35 | CV = kayak.CrossValidator(10, train_images, train_labels)
 36 | 
 37 | # Here I define a nice little training function that takes inputs and targets.
 38 | def train(inputs, targets):
 39 |     # Create a batcher object.
 40 |     batcher = kayak.Batcher(batch_size, inputs.shape[0])
 41 | 
 42 |     # Inputs and targets need access to the batcher.
 43 |     X = kayak.Inputs(inputs, batcher)
 44 |     T = kayak.Targets(targets, batcher)
 45 | 
 46 |     # First-layer weights and biases, with random initializations.
 47 |     W1 = kayak.Parameter( 0.1*npr.randn( inputs.shape[1], layer1_sz ))
 48 |     B1 = kayak.Parameter( 0.1*npr.randn(1, layer1_sz) )
 49 | 
 50 |     # First hidden layer: ReLU + Dropout
 51 |     H1 = kayak.Dropout(kayak.HardReLU(kayak.ElemAdd(kayak.MatMult(X, W1), B1)),
 52 |                        layer1_dropout, batcher=batcher)
 53 | 
 54 |     # Second-layer weights and biases, with random initializations.
 55 |     W2 = kayak.Parameter( 0.1*npr.randn( layer1_sz, layer2_sz ))
 56 |     B2 = kayak.Parameter( 0.1*npr.randn(1, layer2_sz) )
 57 | 
 58 |     # Second hidden layer: ReLU + Dropout
 59 |     H2 = kayak.Dropout(kayak.HardReLU(kayak.ElemAdd(kayak.MatMult(H1, W2), B2)),
 60 |                        layer2_dropout, batcher=batcher)
 61 | 
 62 |     # Output layer weights and biases, with random initializations.
 63 |     W3 = kayak.Parameter( 0.1*npr.randn( layer2_sz, 10 ))
 64 |     B3 = kayak.Parameter( 0.1*npr.randn(1, 10) )
 65 | 
 66 |     # Output layer.
 67 |     Y = kayak.LogSoftMax( kayak.ElemAdd(kayak.MatMult(H2, W3), B3) )
 68 | 
 69 |     # The training loss is negative multinomial log likelihood.
 70 |     loss = kayak.MatSum(kayak.LogMultinomialLoss(Y, T))
 71 | 
 72 |     # Use momentum for the gradient-based optimization.
 73 |     mom_grad_W1 = np.zeros(W1.shape)
 74 |     mom_grad_W2 = np.zeros(W2.shape)
 75 |     mom_grad_W3 = np.zeros(W3.shape)
 76 | 
 77 |     # Loop over epochs.
 78 |     for epoch in xrange(10):
 79 | 
 80 |         # Track the total loss.
 81 |         total_loss = 0.0
 82 | 
 83 |         # Loop over batches -- using batcher as iterator.
 84 |         for batch in batcher:
 85 |             # Compute the loss of this minibatch by asking the Kayak
 86 |             # object for its value and giving it reset=True.
 87 |             total_loss += loss.value
 88 | 
 89 |             # Now ask the loss for its gradient in terms of the
 90 |             # weights and the biases -- the two things we're trying to
 91 |             # learn here.
 92 |             grad_W1 = loss.grad(W1)
 93 |             grad_B1 = loss.grad(B1)
 94 |             grad_W2 = loss.grad(W2)
 95 |             grad_B2 = loss.grad(B2)
 96 |             grad_W3 = loss.grad(W3)
 97 |             grad_B3 = loss.grad(B3)
 98 |             
 99 |             # Use momentum on the weight gradients.
100 |             mom_grad_W1 = momentum*mom_grad_W1 + (1.0-momentum)*grad_W1
101 |             mom_grad_W2 = momentum*mom_grad_W2 + (1.0-momentum)*grad_W2
102 |             mom_grad_W3 = momentum*mom_grad_W3 + (1.0-momentum)*grad_W3
103 | 
104 |             # Now make the actual parameter updates.
105 |             W1.value -= learn_rate * mom_grad_W1
106 |             B1.value -= learn_rate * grad_B1
107 |             W2.value -= learn_rate * mom_grad_W2
108 |             B2.value -= learn_rate * grad_B2
109 |             W3.value -= learn_rate * mom_grad_W3
110 |             B3.value -= learn_rate * grad_B3
111 | 
112 |         print epoch, total_loss
113 | 
114 |     # After we've trained, we return a sugary little function handle
115 |     # that makes things easy.  Basically, what we're doing here is
116 |     # handing the output object (not the loss!) a dictionary where the
117 |     # key is the Kayak input object 'X' (that is the features being
118 |     # used here for logistic regression) and the value in that
119 |     # dictionary is being determined by the argument to the lambda
120 |     # expression.  The point here is that we wind up with a function
121 |     # handle the can be called with a numpy object and it produces the
122 |     # target values for novel data, using the parameters we just learned.
123 | 
124 |     def compute_predictions(x):
125 |         X.data = x
126 |         batcher.test_mode()
127 |         return Y.value
128 | 
129 |     return compute_predictions
130 | 
131 | # Loop over our cross validation folds.
132 | for ii, fold in enumerate(CV):
133 |     print "Fold %d" % (ii+1)
134 |     
135 |     # Get the training and validation data, according to this fold.
136 |     train_images, train_labels = fold.train()
137 |     valid_images, valid_labels = fold.valid()
138 | 
139 |     # Train on these data and get a prediction function back.
140 |     t0 = time.time()
141 |     pred_func = train(train_images, train_labels)
142 |     print "train():", time.time()-t0
143 | 
144 |     # Make predictions on the validation data.
145 |     valid_preds = np.argmax(pred_func( valid_images ), axis=1)
146 | 
147 |     # How did we do?
148 |     print np.mean(valid_preds == np.argmax(valid_labels, axis=1))
149 |     
150 | 


--------------------------------------------------------------------------------
/examples/nnet_example.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy        as np
 3 | import numpy.random as npr
 4 | import sys
 5 | 
 6 | sys.path.append('..')
 7 | import kayak
 8 | import kayak.util
 9 | 
10 | N  = 1000
11 | D  = 50
12 | H1 = 10
13 | P  = 1
14 | batch_size = 256
15 | 
16 | # Random data.
17 | X = npr.randn(N, D)
18 | Y = npr.randn(N, P)
19 | 
20 | batcher = kayak.Batcher(batch_size, N)
21 | 
22 | # Build network.
23 | kyk_inputs = kayak.Inputs(X, batcher)
24 | 
25 | # Labels.
26 | kyk_targets = kayak.Targets(Y, batcher)
27 | 
28 | # First layer weights and biases.
29 | kyk_W1 = kayak.Parameter( npr.randn(D, H1) )
30 | kyk_B1 = kayak.Parameter( npr.randn(1,H1) )
31 | 
32 | # First layer weight mult plus biases, then nonlinearity.
33 | kyk_H1 = kayak.Dropout(kayak.HardReLU(kayak.ElemAdd(kayak.MatMult( kyk_inputs, kyk_W1 ), kyk_B1)),
34 |                        drop_prob=0.5, batcher=batcher)
35 | 
36 | # Second layer weights and bias.
37 | kyk_W2 = kayak.Parameter( npr.randn(H1, P) )
38 | kyk_B2 = kayak.Parameter( npr.randn(1,P) )
39 | 
40 | # Second layer multiplication.
41 | kyk_out = kayak.Dropout(kayak.HardReLU(kayak.ElemAdd(kayak.MatMult( kyk_H1, kyk_W2 ), kyk_B2)),
42 |                         drop_prob=0.5, batcher=batcher)
43 | 
44 | # Elementwise Loss.
45 | kyk_el_loss = kayak.L2Loss(kyk_out, kyk_targets)
46 | 
47 | # Sum the losses.
48 | kyk_loss = kayak.MatSum( kyk_el_loss )
49 | 
50 | # Roll in the weight regularization.
51 | kyk_obj = kayak.ElemAdd( kyk_loss, kayak.L1Norm(kyk_W1, weight=100.0),
52 |                          kayak.L1Norm(kyk_W2, weight=100.0))
53 | 
54 | print "W2:", kayak.util.checkgrad(kyk_W2, kyk_obj)
55 | print "B2:", kayak.util.checkgrad(kyk_B2, kyk_obj)
56 | print "W1:", kayak.util.checkgrad(kyk_W1, kyk_obj)
57 | print "B1:", kayak.util.checkgrad(kyk_B1, kyk_obj)
58 | 
59 | t0 = time.time()
60 | for ii in xrange(10):
61 | 
62 |     for batch in batcher:
63 |         val = kyk_obj.value
64 |         grad_W1 = kyk_obj.grad(kyk_W1)
65 |         grad_B1 = kyk_obj.grad(kyk_B1)
66 |         grad_W2 = kyk_obj.grad(kyk_W2)
67 |         grad_B2 = kyk_obj.grad(kyk_B2)
68 | 
69 |         t1 = time.time()
70 | print t1-t0
71 | 


--------------------------------------------------------------------------------
/examples/poisson_glm.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | import sys
 7 | sys.path.append('..')
 8 | 
 9 | import kayak
10 | 
11 | N = 10000
12 | D = 5
13 | P = 1
14 | learn = 0.00001
15 | batch_size = 500
16 | 
17 | # Random inputs.
18 | X = npr.randn(N,D)
19 | true_W = npr.randn(D,P)
20 | lam = np.exp(np.dot(X, true_W))
21 | Y = npr.poisson(lam)
22 | 
23 | kyk_batcher = kayak.Batcher(batch_size, N)
24 | 
25 | # Build network.
26 | kyk_inputs = kayak.Inputs(X, kyk_batcher)
27 | 
28 | # Labels.
29 | kyk_targets = kayak.Targets(Y, kyk_batcher)
30 | 
31 | # Weights.
32 | W = 0.01*npr.randn(D,P)
33 | kyk_W = kayak.Parameter(W)
34 | 
35 | # Linear layer.
36 | kyk_activation = kayak.MatMult( kyk_inputs, kyk_W)
37 | 
38 | # Exponential inverse-link function.
39 | kyk_lam = kayak.ElemExp(kyk_activation)
40 | 
41 | # Poisson negative log likelihood.
42 | kyk_nll = kyk_lam - kayak.ElemLog(kyk_lam) * kyk_targets
43 | 
44 | # Sum the losses.
45 | kyk_loss = kayak.MatSum( kyk_nll )
46 | 
47 | for ii in xrange(100):
48 | 
49 |     for batch in kyk_batcher:
50 |         loss = kyk_loss.value
51 |         print loss, np.sum((kyk_W.value - true_W)**2)
52 |         grad = kyk_loss.grad(kyk_W)
53 |         kyk_W.value -= learn * grad
54 | 
55 | # Plot the true and inferred rate for a subset of data.
56 | T_slice = slice(0,100)
57 | kyk_inputs.value = X[T_slice,:]
58 | plt.figure()
59 | plt.plot(lam[T_slice], 'k')
60 | plt.plot(kyk_lam.value, '--r')
61 | plt.show()


--------------------------------------------------------------------------------
/kayak/__init__.py:
--------------------------------------------------------------------------------
 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
 2 | #          http://hips.seas.harvard.edu
 3 | #          Ryan Adams, David Duvenaud, Scott Linderman,
 4 | #          Dougal Maclaurin, Jasper Snoek, and others
 5 | # Copyright 2014, The President and Fellows of Harvard University
 6 | # Distributed under an MIT license. See license.txt file.
 7 | 
 8 | import sys
 9 | import hashlib
10 | import numpy as np
11 | 
12 | EPSILON = sys.float_info.epsilon
13 | 
14 | from differentiable import Differentiable
15 | from root_nodes     import Constant, Parameter, DataNode, Inputs, Targets
16 | from batcher        import Batcher
17 | from matrix_ops     import MatAdd, MatMult, MatElemMult, MatSum, MatMean, Transpose, Reshape, Concatenate, Identity, TensorMult, ListToArray, MatDet
18 | from elem_ops       import ElemAdd, ElemMult, ElemExp, ElemLog, ElemPower, ElemAbs
19 | from nonlinearities import SoftReLU, HardReLU, LogSoftMax, TanH, Logistic, InputSoftMax, SoftMax
20 | from losses         import L2Loss, LogMultinomialLoss
21 | from dropout        import Dropout
22 | from regularizers   import L2Norm, L1Norm, Horseshoe, NExp
23 | from crossval       import CrossValidator
24 | from convolution    import Convolve1d, Pool, TopKPool
25 | from indexing       import Take
26 | from stacking       import Hstack
27 | from generic_ops    import Blank
28 | 
29 | 


--------------------------------------------------------------------------------
/kayak/batcher.py:
--------------------------------------------------------------------------------
  1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
  2 | #          http://hips.seas.harvard.edu
  3 | #          Ryan Adams, David Duvenaud, Scott Linderman,
  4 | #          Dougal Maclaurin, Jasper Snoek, and others
  5 | # Copyright 2014, The President and Fellows of Harvard University
  6 | # Distributed under an MIT license. See license.txt file.
  7 | 
  8 | import numpy        as np
  9 | import numpy.random as npr
 10 | 
 11 | from . import Differentiable
 12 | 
 13 | class Batcher(Differentiable):
 14 |     """Kayak class for managing batches of data.
 15 |     
 16 |     This class is intended to provide a simple interface for managing
 17 |     mini-batches of data, both on the input side and on the output
 18 |     side.  It can be set up to either use random minibatches, or go
 19 |     through the data in the order provided.  You tell it how many data
 20 |     you have and how large the mini-batches should be.  It will
 21 |     provide a sequence of indices via an iterator for easy looping.
 22 | 
 23 |     To use this class, you would do something like this:
 24 | 
 25 |     # Create an instance of the batcher.
 26 |     kyk_batcher = Batcher( batch_size, num_data )
 27 | 
 28 |     # When you create input and output objects, give them access to
 29 |     # the batcher.
 30 |     kyk_inputs  = Inputs(X, kyk_batcher)
 31 |     kyk_targets = Targets(Y, kyk_batcher)
 32 | 
 33 |     # Probably you'll loop over training epochs.
 34 |     for epoch in xrange(num_epochs):
 35 | 
 36 |       # Then you can treat the batcher as an iterator.
 37 |       for batch in kyk_batcher:
 38 | 
 39 |         # Do your mini-batch training here.
 40 | 
 41 |     """
 42 |     __slots__ = ['_rng', '_batch_size', '_total_size', '_random_batches',
 43 |                  '_dropout_nodes', 'start', 'end', 'ordering']
 44 |     def __init__(self, batch_size, total_size, random_batches=False, rng=None):
 45 |         """Constructor for the Kayak Batcher class.
 46 | 
 47 |         This creates the Batcher, which makes it easy to manage
 48 |         mini-batch indices for inputs and outputs.  This allows you to
 49 |         iterate through things in the order provided, or in a random
 50 |         order.
 51 | 
 52 |         Arguments:
 53 | 
 54 |           batch_size: (Integer) Size of the mini-batches to produce.
 55 | 
 56 |           total_size: (Integer) Total number of data to iterate over.
 57 | 
 58 |           _random_batches: (Bool) Specifies whether the mini-batches
 59 |                           should be random or not.
 60 |         """
 61 |         super(Batcher, self).__init__([])
 62 | 
 63 |         if rng is None:
 64 |             self._rng = npr.RandomState()
 65 |         else:
 66 |             self._rng = rng
 67 | 
 68 |         self._batch_size = batch_size
 69 |         self._total_size = total_size
 70 |         self._random_batches = random_batches
 71 |         self._dropout_nodes = []
 72 |         self.reset()
 73 | 
 74 |     def reset(self):
 75 |         """Reset the state of the Kayak Batcher.
 76 | 
 77 |         It may happen that you want to 'reset the loop' and restart
 78 |         your iteration over the data.  Calling this method does that.
 79 |         If, in the constructor, you set rng=None, then you'll go back
 80 |         to zero. If random_batches is true, you will get a new random
 81 |         permutation when you reset.
 82 | 
 83 |         This method is automatically called when the iterator
 84 |         completes its loop, so you don't need to explicitly call it
 85 |         when you're making multiple loops over the data.
 86 | 
 87 |         Arguments: None
 88 | 
 89 |         """
 90 |         self.start    = 0
 91 |         self.end      = min(self.start+self._batch_size, self._total_size)
 92 | 
 93 |         if self._random_batches:
 94 |             self.ordering = self._rng.permutation(self._total_size)
 95 |             self.value = self.ordering[self.start:self.end]
 96 |         else:
 97 |             self.value = slice(self.start, self.end)
 98 | 
 99 |         for node in self._dropout_nodes:
100 |             node.draw_new_mask()
101 | 
102 |     def __iter__(self):
103 |         return self
104 | 
105 |     def next(self):
106 |         """Implementation of iterator functionality.
107 | 
108 |         The Batcher class is used as an iterator.  This method
109 |         implements the iteration step forward.  It will return lists
110 |         of indices that are the data in each mini-batch.  In general,
111 |         these lists will be of size batch_size (as specified in the
112 |         constructor).  The last one may be smaller, if the number of
113 |         data is not an integer multiple of the batch size.
114 | 
115 |         Arguments: None
116 | 
117 |         """
118 |         if self.start >= self._total_size:
119 |             self.reset()
120 |             raise StopIteration
121 | 
122 |         self._clear_value_cache()
123 | 
124 |         if self._random_batches:
125 |             self.value = self.ordering[self.start:self.end]
126 |         else:
127 |             self.value = slice(self.start, self.end)
128 | 
129 |         self.start += self._batch_size
130 |         self.end    = min(self.start + self._batch_size, self._total_size)
131 | 
132 |         for node in self._dropout_nodes:
133 |             node.draw_new_mask()
134 | 
135 |         return self.value
136 | 
137 |     def add_dropout_node(self, node):
138 |         self._dropout_nodes.append(node)
139 | 
140 |     def test_mode(self):
141 |         """
142 |         Turns off batching. Run before test-time.
143 |         """
144 |         self._clear_value_cache()
145 |         self.value = slice(None, None) # All indices
146 |         for node in self._dropout_nodes:
147 |             node.reinstate_units()
148 | 


--------------------------------------------------------------------------------
/kayak/convolution.py:
--------------------------------------------------------------------------------
  1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
  2 | #          http://hips.seas.harvard.edu
  3 | #          Ryan Adams, David Duvenaud, Scott Linderman,
  4 | #          Dougal Maclaurin, Jasper Snoek, and others
  5 | # Copyright 2014, The President and Fellows of Harvard University
  6 | # Distributed under an MIT license. See license.txt file.
  7 | 
  8 | import numpy as np
  9 | 
 10 | import util
 11 | 
 12 | from . import Differentiable
 13 | import sys
 14 | 
 15 | class Convolve1d(Differentiable):
 16 |     __slots__ = ['A', 'B', 'ncolors', 'stride']
 17 | 
 18 |     def __init__(self, A, B, ncolors=1, stride=1):
 19 |         super(Convolve1d, self).__init__([A,B])
 20 |         self.A       = A
 21 |         self.B       = B
 22 |         self.ncolors = ncolors
 23 |         self.stride  = stride
 24 | 
 25 |     def _compute_value(self):
 26 |         A = self.A.value
 27 |         B = self.B.value
 28 |         filtersize = B.shape[0]/self.ncolors
 29 | 
 30 |         # Broadcast to get color channels
 31 |         A = np.reshape(A, (A.shape[0], -1))
 32 | 
 33 |         D = A.shape[-1]/self.ncolors/self.stride - filtersize + 1
 34 |         output = np.zeros((A.shape[0], D, B.shape[1]))
 35 | 
 36 |         inds   = np.arange(filtersize)
 37 |         inds   = np.concatenate([inds+(i*A.shape[1]/self.ncolors) for i in xrange(self.ncolors)])
 38 |         for j in xrange(0, D):
 39 |             output[:,j,:] = np.dot(A[:, inds], B)
 40 |             inds   += self.stride
 41 | 
 42 |         return output.reshape((A.shape[0], D*B.shape[1]))
 43 | 
 44 |     def _local_grad(self, parent, d_out_d_self):
 45 |         A          = self.A.value
 46 |         A          = np.reshape(A, (A.shape[0], -1))        
 47 |         filtersize = self.B.shape[0]/self.ncolors
 48 |         inds   = np.arange(filtersize)
 49 |         inds   = np.concatenate([inds+(i*A.shape[1]/self.ncolors) for i in xrange(self.ncolors)])            
 50 | 
 51 |         if parent == 0:
 52 |             output     = np.zeros((self.A.shape))
 53 |             B          = self.B.value
 54 |             outgrad = d_out_d_self.reshape(d_out_d_self.shape[0], -1, B.shape[-1])
 55 | 
 56 |             for j in xrange(outgrad.shape[1]):
 57 |                 output[:,inds] += np.dot(outgrad[:,j,:], B.T)
 58 |                 inds += self.stride
 59 | 
 60 |             return output
 61 | 
 62 |         elif parent == 1:
 63 |             output     = np.zeros((self.B.shape[0], self.B.shape[1]))           
 64 |             outgrad    = np.reshape(d_out_d_self, (d_out_d_self.shape[0], -1, self.B.shape[1]))
 65 | 
 66 |             for j in xrange(0, outgrad.shape[1]):
 67 |                 output += np.dot(A[:,inds].T, outgrad[:,j,:])
 68 |                 inds   += self.stride
 69 | 
 70 |             return output
 71 |         else:   
 72 |             raise Exception("Not a parent of me")   
 73 | 
 74 | class Pool(Differentiable):
 75 |     __slots__ = ['A', 'width', 'indices', 'ncolors']
 76 | 
 77 |     def __init__(self, A, width, ncolors=1):
 78 |         super(Pool, self).__init__([A])
 79 |         self.A        = A
 80 |         self.width    = width
 81 |         self.ncolors  = ncolors
 82 |         self.indices  = None
 83 | 
 84 |     def _compute_value(self):
 85 |         A = self.A.value
 86 | 
 87 |         # determine pooled shape variables
 88 |         conv_length = A.shape[1]/self.ncolors
 89 |         width_mod = conv_length % self.width
 90 |         width_aug = self.width - width_mod
 91 | 
 92 |         # augment convolution output to make pool width work
 93 |         if width_mod > 0:
 94 |             # insert at the back end of each convolution
 95 |             idx = np.ravel([[i*conv_length]*width_aug for i in range(1,self.ncolors+1)])
 96 |             
 97 |             # insert -inf
 98 |             A = np.insert(A, idx, -np.inf, axis=1)
 99 | 
100 |         # bring together elements in a pooling group
101 |         A = np.reshape(A, (A.shape[0], self.ncolors, -1, self.width))
102 | 
103 |         # get the index of the max within each pooling group
104 |         self.indices = np.argmax(A, axis=3)
105 | 
106 |         # represent the first 3 dimensions of A
107 |         x, z, t = np.indices(self.indices.shape)
108 | 
109 |         # index into the 4th dimension to pull out the maxes
110 |         A = A[x, z, t, self.indices]
111 | 
112 |         # reshape back to the original form with the last dimension pooled
113 |         A = A.reshape((self.A.shape[0],-1))
114 |         
115 |         return A
116 | 
117 |         '''
118 |         try:
119 |             A = np.reshape(A, (A.shape[0], self.ncolors, -1, self.width))
120 |         except:
121 |             print 'Could not pool with a width of %d on a layer of size %d' % (self.width, A.shape[0]/self.ncolors)
122 |             print A.shape
123 |             print (A.shape[0], self.ncolors, -1, self.width)
124 |             raise
125 |         '''
126 | 
127 |     def _local_grad(self, parent, d_out_d_self):
128 |         if parent == 0:
129 |             # determine pooled shape variables
130 |             conv_length = self.A.shape[1]/self.ncolors
131 |             width_mod = conv_length % self.width
132 |             width_aug = self.width - width_mod
133 |             pool_length = conv_length/self.width + 1*(width_mod>0)
134 | 
135 |             # create a zero matrix to match the reshaped version of A
136 |             #  that brings together elements in a pool group
137 |             mask = np.zeros((self.A.shape[0], self.ncolors, pool_length, self.width))
138 | 
139 |             # represent the first 3 dimensions of mask
140 |             inds, inds2, inds3 = np.indices(self.indices.shape)
141 | 
142 |             # set the max indexes in mask to d_out_d_self,
143 |             #  reshaped to fit the shape of this reduced version of the full matrix A
144 |             mask[inds, inds2, inds3, self.indices] = d_out_d_self.reshape((mask[inds, inds2, inds3, self.indices].shape))
145 | 
146 |             # reshape to original form, with the last dimension pooled
147 |             mask = mask.reshape((self.A.shape[0], -1))
148 |             
149 |             # remove the added dummy columns
150 |             if width_mod > 0:
151 |                 conv_length_aug = conv_length + width_aug
152 |                 idx = [i*conv_length_aug-m for i in range(1,self.ncolors+1) for m in range(1,width_aug+1)]
153 |                 mask = np.delete(mask, idx, axis=1)
154 | 
155 |             return mask
156 | 
157 |             '''
158 |             mask = np.zeros(self.A.shape).reshape((self.A.shape[0], self.ncolors, -1, self.width))
159 |             inds, inds2, inds3 = np.indices(self.indices.shape)
160 |             mask[inds, inds2, inds3, self.indices] = d_out_d_self.reshape((mask[inds, inds2, inds3, self.indices].shape))
161 |             mask = mask.reshape((self.A.shape[0], -1))
162 |             '''
163 |         else:
164 |             raise Exception("Not a parent of me")
165 | 
166 | class TopKPool(Differentiable):
167 |     __slots__ = ['A', 'k', 'indices', 'ncolors']
168 | 
169 |     def __init__(self, A, k, ncolors=1):
170 |         super(TopKPool, self).__init__([A])
171 |         self.A        = A
172 |         self.k        = k
173 |         self.ncolors  = ncolors
174 |         self.indices  = None
175 | 
176 |     def _compute_value(self):
177 |         A = self.A.value.copy()
178 |         A = np.reshape(A, (A.shape[0], self.ncolors, -1))
179 |         self.indices = np.argsort(A, axis=2)[:,:,-self.k:]
180 |         a, b, c = np.indices(self.indices.shape)
181 |         A = A[a, b, self.indices]
182 |         return A.reshape((self.A.shape[0],-1))
183 | 
184 |     def _local_grad(self, parent, d_out_d_self):
185 |         if parent == 0:
186 |             mask = np.zeros(self.A.shape).reshape((self.A.shape[0], self.ncolors, -1))
187 |             inds, inds2, inds3 = np.indices(self.indices.shape)
188 |             mask[inds, inds2, self.indices] = d_out_d_self.reshape((mask[inds, inds2, self.indices].shape))
189 |             mask = mask.reshape((self.A.shape[0], -1))
190 |             return mask
191 |         else:
192 |             raise Exception("Not a parent of me")
193 | 


--------------------------------------------------------------------------------
/kayak/crossval.py:
--------------------------------------------------------------------------------
 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
 2 | #          http://hips.seas.harvard.edu
 3 | #          Ryan Adams, David Duvenaud, Scott Linderman,
 4 | #          Dougal Maclaurin, Jasper Snoek, and others
 5 | # Copyright 2014, The President and Fellows of Harvard University
 6 | # Distributed under an MIT license. See license.txt file.
 7 | 
 8 | import itertools
 9 | import numpy        as np
10 | import numpy.random as npr
11 | 
12 | class Fold(object):
13 |     
14 |     def __init__(self, cv, train, valid):
15 |         self._cv    = cv
16 |         self._train = train
17 |         self._valid = valid
18 | 
19 |     def train(self):
20 |         if self._cv.targets is None:
21 |             return self._cv.inputs[self._train,...]
22 |         else:
23 |             return self._cv.inputs[self._train,...], self._cv.targets[self._train,...]
24 | 
25 |     def valid(self):
26 |         if self._cv.targets is None:
27 |             return self._cv.inputs[self._valid,...]
28 |         else:
29 |             return self._cv.inputs[self._valid,...], self._cv.targets[self._valid,...]
30 | 
31 | class CrossValidator(object):
32 | 
33 |     def __init__(self, num_folds, inputs, targets=None, permute=True):
34 |         
35 |         if permute:
36 |             # Make a copy of the data, with a random permutation.
37 |             self.ordering = npr.permutation(inputs.shape[0])
38 |             self.inputs   = inputs[self.ordering,...].copy()
39 |             if targets is not None:
40 |                 self.targets = targets[self.ordering,...].copy()
41 |             else:
42 |                 self.targets = None
43 |         else:
44 |             self.ordering = np.arange(inputs.shape[0], dtype=int)
45 |             self.inputs   = inputs
46 |             self.targets  = targets
47 | 
48 |         self.fold_idx  = 0
49 |         self.num_folds = num_folds
50 |         self.edges     = np.linspace(0, self.inputs.shape[0], self.num_folds+1).astype(int)
51 |         self.indices   = []
52 |         for ii in xrange(self.num_folds):
53 |             self.indices.append( np.arange(self.edges[ii], self.edges[ii+1], dtype=int) )
54 |         self.folds = []
55 |         for ii in xrange(self.num_folds):
56 |             self.folds.append(Fold(self,
57 |                                    np.array(list(itertools.chain.from_iterable([self.indices[jj] for jj in range(0,ii)+range(ii+1,self.num_folds)])), dtype=int),
58 |                                    np.array(self.indices[ii], dtype=int)))
59 | 
60 |     def __iter__(self):
61 |         return self
62 |         
63 |     def next(self):
64 |         try:
65 |             result = self.folds[self.fold_idx]
66 |             self.fold_idx += 1
67 |             return result
68 |         except IndexError:
69 |             self.fold_idx = 0
70 |             raise StopIteration
71 |             
72 |             
73 | 
74 |         
75 | 


--------------------------------------------------------------------------------
/kayak/differentiable.py:
--------------------------------------------------------------------------------
  1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
  2 | #          http://hips.seas.harvard.edu
  3 | #          Ryan Adams, David Duvenaud, Scott Linderman,
  4 | #          Dougal Maclaurin, Jasper Snoek, and others
  5 | # Copyright 2014, The President and Fellows of Harvard University
  6 | # Distributed under an MIT license. See license.txt file.
  7 | 
  8 | import numpy as np
  9 | import weakref
 10 | 
 11 | class Differentiable(object):
 12 |     __slots__ = ['_value', '_grad', '_loss', '_parents', '_children','__weakref__','_parent_indices']
 13 |     def __init__(self, parents=()):
 14 |         self._value = None # Cached value
 15 |         self._grad  = None # Cached grad
 16 |         self._loss  = None # Loss we are caching with respect to
 17 |         for parent_index, parent in enumerate(parents):
 18 |             parent._add_child(self, parent_index)
 19 | 
 20 |         self._parents = tuple(parents)
 21 |         self._children = weakref.WeakValueDictionary()
 22 |         # self._children = ()
 23 | 
 24 |     @property
 25 |     def _children_with_parent_indices(self):
 26 |         return [(self._children[key], key[1]) for key in self._children.keys()]
 27 | 
 28 |     @property
 29 |     def value(self):
 30 |         """Compute the value of the function.  This walks up the
 31 |         dependency graph and finds all of the Kayak objects with known
 32 |         values (such as Inputs and Targets, perhaps modulated by a
 33 |         Batcher) and then propagates their values forward through the
 34 |         modular computations of Differentiable subclasses.  There are
 35 |         some subtleties to this behavior, which are determined by the
 36 |         method arguments.
 37 |         """
 38 |         # If the value is not yet cached, compute it.
 39 |         if self._value is None:
 40 |             self._value = self._compute_value()
 41 | 
 42 |         return self._value
 43 | 
 44 |     @value.setter
 45 |     def value(self, new_value):
 46 |         self._clear_value_cache()
 47 |         self._value = new_value
 48 | 
 49 |     def _clear_value_cache(self):
 50 |         """
 51 |         Sets the new value and clears the values of any dependencies. We
 52 |         maintain the invariant that cached values are never wrong relative
 53 |         to their parents' values.
 54 |         """
 55 |         if self._value is not None:
 56 |             [child._clear_value_cache() for child in self._children.values()]
 57 |             # [child._clear_value_cache() for child, _ in self._children.values()]
 58 |             self._clear_grad_cache()
 59 |             self._value = None
 60 | 
 61 |     def _clear_grad_cache(self):
 62 |         if self._grad is not None:
 63 |             [parent._clear_grad_cache() for parent in self._parents]
 64 |             self._grad = None
 65 |         
 66 |     def grad(self, other):
 67 |         """Compute the gradient of this module in terms of another
 68 |         module.  One of the main points of the Kayak setup is to
 69 |         easily compute gradients in terms of parameters.  This is the
 70 |         interface for doing so.  You call the grad() method on
 71 |         something that produces a scalar, providing as an argument
 72 |         some other object that appears deeper in the graph.  You get
 73 |         out an array of the same shape as the deeper object, but which
 74 |         is the gradient.
 75 | 
 76 |         Arguments:
 77 | 
 78 |           other: (Kayak object) The other object, in terms of which
 79 |                  you'd like to take this thing's gradient.
 80 |         """
 81 |         grad = other._d_out_d_self(self)
 82 |         if grad is 0:
 83 |             # Make sure the output has the expected shape
 84 |             grad = np.zeros(other.shape)
 85 | 
 86 |         return grad
 87 | 
 88 |     @property
 89 |     def shape(self):
 90 |         return self.value.shape
 91 | 
 92 |     def _d_out_d_self(self, out):
 93 |         # Cached grad is not valid or refers to a different loss,
 94 |         # so we need to recompute compute the gradient
 95 |         if self._grad is None or self._loss is not out:
 96 |             if self is out:
 97 |                 grad = np.ones(self.shape)
 98 |             elif not self._children:
 99 |                 grad = 0
100 |             else:
101 |                 grad = None
102 |                 for child, parent_index in self._children_with_parent_indices:
103 |                     if grad is None:
104 |                         grad = child._d_out_d_parent(out, parent_index)
105 |                     else:
106 |                         grad += child._d_out_d_parent(out, parent_index)
107 | 
108 |             self._loss = out
109 |             self._grad = grad
110 | 
111 |         return self._grad
112 | 
113 |     def _d_out_d_parent(self, out, parent):
114 |         d_out_d_self = self._d_out_d_self(out)
115 |         if d_out_d_self is 0:
116 |             # This avoid calling local_grad for paths that don't end in 'out'
117 |             return 0
118 |         else:
119 |             return self._local_grad(parent, d_out_d_self)
120 | 
121 |     def _add_child(self, child, parent_index):
122 |         """Parent_index is an int that tells out child which parent we are."""
123 |         self._children[(id(child), parent_index)] = child
124 |         # self._children = self._children + ((child, parent_index), )
125 | 
126 |     def _local_grad(self, parent, d_out_d_self):
127 |         """Return d_out_d_self * d_self_d_parent"""
128 |         raise Exception("Class 'Differentiable' is abstract.")
129 | 
130 |     def _compute_value(self):
131 |         raise Exception("Class 'Differentiable' is abstract.")
132 | 
133 |     # Overload plus and times operators with elementwise operations
134 |     # To avoid circular imports, we wait until the operator is called
135 |     # to import the subclasses of Differentiable
136 |     def __add__(self, other):
137 |         from . import ElemAdd, Constant
138 | 
139 |         # If other is not a Differentiable object,
140 |         # try to cast it as a constant.
141 |         if not isinstance(other, Differentiable):
142 |             other = Constant(other)
143 |         return ElemAdd(self, other)
144 | 
145 |     def __radd__(self, other):
146 |         return self.__add__(other)
147 | 
148 |     def __sub__(self, other):
149 |         return self + -other
150 | 
151 |     def __rsub__(self, other):
152 |         return other + -self
153 | 
154 |     def __mul__(self, other):
155 |         from . import ElemMult, Constant
156 |         # If other is not a Differentiable object,
157 |         # try to cast it as a constant.
158 |         if not isinstance(other, Differentiable):
159 |             other = Constant(other)
160 |         return ElemMult(self, other)
161 | 
162 |     def __rmul__(self, other):
163 |         return self.__mul__(other)
164 | 
165 |     # NOTE: Assuming Python 2.x syntax for div
166 |     def __div__(self, other):
167 |         from . import ElemPower
168 |         return self * ElemPower(other, -1)
169 | 
170 |     def __rdiv__(self, other):
171 |         from . import ElemPower
172 |         return other * ElemPower(self, -1)
173 | 
174 |     def __neg__(self):
175 |         from . import ElemMult, Constant
176 |         return ElemMult(Constant(-1.), self)
177 | 
178 |     def __pow__(self, power, modulo=None):
179 |         from . import ElemPower
180 |         return ElemPower(self, power)
181 | 
182 |     def __abs__(self):
183 |         from . import ElemAbs
184 |         return ElemAbs(self)
185 | 
186 | 
187 | 
188 | 


--------------------------------------------------------------------------------
/kayak/dropout.py:
--------------------------------------------------------------------------------
 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
 2 | #          http://hips.seas.harvard.edu
 3 | #          Ryan Adams, David Duvenaud, Scott Linderman,
 4 | #          Dougal Maclaurin, Jasper Snoek, and others
 5 | # Copyright 2014, The President and Fellows of Harvard University
 6 | # Distributed under an MIT license. See license.txt file.
 7 | 
 8 | import numpy        as np
 9 | import numpy.random as npr
10 | 
11 | from . import Differentiable, EPSILON
12 | 
13 | class Dropout(Differentiable):
14 |     __slots__ = ['X', 'drop_prob', '_rng', '_enhancement', '_mask']
15 | 
16 |     def __init__(self, X, drop_prob=0.5, rng=None, batcher=None):
17 |         if batcher is not None:
18 |             super(Dropout, self).__init__([X, batcher])
19 |             batcher.add_dropout_node(self)
20 |         else:
21 |             super(Dropout, self).__init__([X])
22 | 
23 |         self.X         = X
24 |         self.drop_prob = drop_prob
25 | 
26 |         if rng is None:
27 |             self._rng = npr.RandomState()
28 |         else:
29 |             self._rng = rng
30 | 
31 |         self._enhancement = (1.0 + EPSILON)/(1.0 - self.drop_prob+EPSILON)
32 |         self.draw_new_mask()
33 | 
34 |     def draw_new_mask(self):
35 |         self._mask = self._enhancement * (self._rng.rand(*self.X.shape)
36 |                                           > self.drop_prob)
37 |         self._clear_value_cache()
38 | 
39 |     def reinstate_units(self):
40 |         self._mask = np.ones(self.X.shape)
41 |         self._clear_value_cache()
42 | 
43 |     def _compute_value(self):
44 |         return self._mask * self.X.value
45 | 
46 |     def _local_grad(self, parent, d_out_d_self):
47 |         return d_out_d_self * self._mask
48 | 
49 | 


--------------------------------------------------------------------------------
/kayak/elem_ops.py:
--------------------------------------------------------------------------------
  1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
  2 | #          http://hips.seas.harvard.edu
  3 | #          Ryan Adams, David Duvenaud, Scott Linderman,
  4 | #          Dougal Maclaurin, Jasper Snoek, and others
  5 | # Copyright 2014, The President and Fellows of Harvard University
  6 | # Distributed under an MIT license. See license.txt file.
  7 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
  8 | #          http://hips.seas.harvard.edu
  9 | #          Ryan Adams, David Duvenaud, Scott Linderman,
 10 | #          Dougal Maclaurin, Jasper Snoek, and others
 11 | # Copyright 2014, The President and Fellows of Harvard University
 12 | # Distributed under an MIT license. See license.txt file.
 13 | 
 14 | import numpy as np
 15 | from . import Differentiable
 16 | import matrix_ops
 17 | 
 18 | class Elementwise(Differentiable):
 19 |     __slots__ = ['X']
 20 |     def __init__(self, X):
 21 |         super(Elementwise, self).__init__(X)
 22 |         self.X = X
 23 | 
 24 |     def _compute_shape(self, inputs=None):
 25 |         return self.X.shape
 26 | 
 27 | # Just an alias for matrix addition and elementwise multiplication.
 28 | ElemAdd = matrix_ops.MatAdd
 29 | ElemMult = matrix_ops.MatElemMult
 30 | 
 31 | class ElemExp(Elementwise):
 32 |     """
 33 |     Elementwise exponentiation of an array
 34 |     """
 35 |     __slots__ = ['A']
 36 |     def __init__(self, A):
 37 |         super(ElemExp, self).__init__([A])
 38 |         self.A = A
 39 | 
 40 |     def _compute_value(self):
 41 |         return np.exp(self.A.value)
 42 | 
 43 |     def _local_grad(self, parent, d_out_d_self):
 44 |         if parent == 0:
 45 |             return d_out_d_self * np.exp(self.A.value)
 46 |         else:
 47 |             raise Exception("Not a parent of me")
 48 | 
 49 | class ElemLog(Elementwise):
 50 |     """
 51 |     Elementwise logarithm of an array
 52 |     """
 53 |     __slots__ = ['A']
 54 |     def __init__(self, A):
 55 |         super(ElemLog, self).__init__([A])
 56 |         self.A = A
 57 | 
 58 |     def _compute_value(self):
 59 |         return np.log(self.A.value)
 60 | 
 61 |     def _local_grad(self, parent, d_out_d_self):
 62 |         if parent == 0:
 63 |             return d_out_d_self / self.A.value
 64 |         else:
 65 |             raise Exception("Not a parent of me")
 66 | 
 67 | class ElemPower(Elementwise):
 68 |     """
 69 |     Elementwise power of an array.
 70 | 
 71 |     NOTE: Fractional powers are only defined for positive bases.
 72 |           We do not check for this; numpy will throw a runtime exception.
 73 |     """
 74 |     __slots__ = ['A', 'pow']
 75 |     def __init__(self, A, pow):
 76 |         super(ElemPower, self).__init__([A])
 77 |         self.A = A
 78 |         assert np.isscalar(pow), 'Power must be a scalar value.'
 79 |         self.pow = pow
 80 | 
 81 |     def _compute_value(self):
 82 |         return np.power(self.A.value, self.pow)
 83 | 
 84 |     def _local_grad(self, parent, d_out_d_self):
 85 |         if parent == 0:
 86 |             return d_out_d_self * self.pow * np.power(self.A.value, self.pow-1)
 87 |         else:
 88 |             raise Exception("Not a parent of me")
 89 | 
 90 | class ElemAbs(Elementwise):
 91 |     """
 92 |     Elementwise absolute value of an array.
 93 |     """
 94 |     __slots__ = ['A']
 95 |     def __init__(self, A):
 96 |         super(ElemAbs, self).__init__([A])
 97 |         self.A = A
 98 | 
 99 |     def _compute_value(self):
100 |         return abs(self.A.value)
101 | 
102 |     def _local_grad(self, parent, d_out_d_self):
103 |         if parent == 0:
104 |             return d_out_d_self * np.sign(self.A.value)
105 |         else:
106 |             raise Exception("Not a parent of me")
107 | 


--------------------------------------------------------------------------------
/kayak/generic_ops.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .        import Differentiable
 3 | 
 4 | class Blank(Differentiable):
 5 |     # Creates a custom kayak node on-the-fly with compute_grad and/or local_grad
 6 |     # functions passed in as arguments
 7 |     def __init__(self, args=[], compute_value=None, local_grad=None):
 8 |         super(Blank, self).__init__(args)
 9 |         self.compute_value_fun = compute_value
10 |         self.local_grad_fun = local_grad
11 |     
12 |     def _compute_value(self):
13 |         return self.compute_value_fun(self._parents)
14 | 
15 |     def _local_grad(self, parent, d_out_d_self):
16 |         return self.local_grad_fun(self._parents, parent, d_out_d_self)
17 | 


--------------------------------------------------------------------------------
/kayak/indexing.py:
--------------------------------------------------------------------------------
 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
 2 | #          http://hips.seas.harvard.edu
 3 | #          Ryan Adams, David Duvenaud, Scott Linderman,
 4 | #          Dougal Maclaurin, Jasper Snoek, and others
 5 | # Copyright 2014, The President and Fellows of Harvard University
 6 | # Distributed under an MIT license. See license.txt file.
 7 | 
 8 | import numpy        as np
 9 | import numpy.random as npr
10 | 
11 | from . import Differentiable
12 | 
13 | class Take(Differentiable):
14 |     __slots__ = ['X', '_inds', '_axis']
15 | 
16 |     def __init__(self, X, inds, axis=1):
17 |         super(Take, self).__init__([X])
18 | 
19 |         self.X         = X
20 |         self._inds     = inds
21 |         self._axis     = axis
22 | 
23 |     def _compute_value(self):
24 |         slice_list = [slice(None), ] * self.X.value.ndim
25 |         slice_list[self._axis] = self._inds
26 |         return self.X.value[slice_list]
27 | 
28 |     def _local_grad(self, parent, d_out_d_self):
29 |         result = np.zeros(self.X.shape)
30 |         slice_list = [slice(None), ] * result.ndim
31 |         slice_list[self._axis] = self._inds
32 |         result[slice_list] = d_out_d_self
33 |         return result
34 | 


--------------------------------------------------------------------------------
/kayak/input_checking.py:
--------------------------------------------------------------------------------
 1 | from warnings import warn
 2 | 
 3 | def check_equal_ndims_for_broadcasting(obj):
 4 |     ndims = [p.value.ndim for p in obj._parents]
 5 |     if not all([ndims[0] == ndims_other for ndims_other in ndims[1:]]):
 6 |         p_shapes = [p.shape for p in obj._parents]
 7 |         warn(("Broadcasting arrays with shapes %s " +
 8 |               "by prepending singleton dimensions.") % p_shapes,
 9 |              stacklevel=2)
10 | 


--------------------------------------------------------------------------------
/kayak/losses.py:
--------------------------------------------------------------------------------
 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
 2 | #          http://hips.seas.harvard.edu
 3 | #          Ryan Adams, David Duvenaud, Scott Linderman,
 4 | #          Dougal Maclaurin, Jasper Snoek, and others
 5 | # Copyright 2014, The President and Fellows of Harvard University
 6 | # Distributed under an MIT license. See license.txt file.
 7 | 
 8 | import numpy as np
 9 | 
10 | from input_checking import check_equal_ndims_for_broadcasting
11 | from . import Differentiable
12 | 
13 | class Loss(Differentiable):
14 |     __slots__ = ['preds', 'targs']
15 |     def __init__(self, predictions, targets):
16 |         super(Loss, self).__init__((predictions, targets))
17 |         self.preds  = predictions
18 |         self.targs  = targets
19 | 
20 |     _check_inputs = check_equal_ndims_for_broadcasting
21 | 
22 | class L2Loss(Loss):
23 |     __slots__ = ['axis', 'keepdims']
24 |     def __init__(self, predictions, targets, axis=None, keepdims=True):
25 |         super(L2Loss, self).__init__(predictions, targets)
26 |         self.axis = axis
27 |         self.keepdims = keepdims
28 | 
29 |     def _compute_value(self):
30 |         return np.sum((self.preds.value - self.targs.value)**2,
31 |                       axis=self.axis, keepdims=self.keepdims)
32 | 
33 |     def _local_grad(self, parent, d_out_d_self):
34 |         assert parent is 0, "Shouldn't be taking derivative wrt targets"
35 |         return 2 * (self.preds.value - self.targs.value) * d_out_d_self
36 | 
37 | class LogMultinomialLoss(Loss):
38 |     __slots__ = ['axis', 'keepdims']
39 |     def __init__(self, predictions, targets, axis=1, keepdims=True):
40 |         # Predictions are log probabilities and targets are counts.
41 |         super(LogMultinomialLoss, self).__init__(predictions, targets)
42 |         self.axis = axis
43 |         self.keepdims = keepdims
44 | 
45 |     def _compute_value(self):
46 |         return -np.sum(self.targs.value * self.preds.value,
47 |                        axis=self.axis, keepdims=self.keepdims)
48 | 
49 |     def _local_grad(self, parent, d_out_d_self):
50 |         return - d_out_d_self * self.targs.value
51 | 


--------------------------------------------------------------------------------
/kayak/matrix_ops.py:
--------------------------------------------------------------------------------
  1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
  2 | #          http://hips.seas.harvard.edu
  3 | #          Ryan Adams, David Duvenaud, Scott Linderman,
  4 | #          Dougal Maclaurin, Jasper Snoek, and others
  5 | # Copyright 2014, The President and Fellows of Harvard University
  6 | # Distributed under an MIT license. See license.txt file.
  7 | 
  8 | import numpy as np
  9 | import scipy.linalg as spla
 10 | from .        import Differentiable
 11 | 
 12 | class MatMult(Differentiable):
 13 |     __slots__ = ['A', 'B']
 14 |     def __init__(self, A, B, *args):
 15 |         # Recurse to handle lists of arguments.
 16 |         if len(args) > 0:
 17 |             B = MatMult(B, *args)
 18 |         super(MatMult, self).__init__((A, B))
 19 |         self.A = A
 20 |         self.B = B
 21 | 
 22 |     def _compute_value(self):
 23 |         A_val, B_val = self.A.value, self.B.value
 24 |         if A_val.ndim > 2 or B_val.ndim > 2:
 25 |             raise Exception("Inputs of shape %s and %s are not matrices or vectors" % (self.A.shape))
 26 |         if A_val.shape[-1] != B_val.shape[0]:
 27 |             raise Exception("Cannot multiply %s by %s matrices." % (self.A.shape, self.B.shape))
 28 | 
 29 |         return np.dot(self.A.value, self.B.value)
 30 | 
 31 |     def _local_grad(self, parent, d_out_d_self):
 32 |         if parent == 0:
 33 |             B_val = self.B.value
 34 |             if B_val.ndim == 2:
 35 |                 return np.dot(d_out_d_self, B_val.T)
 36 |             else:
 37 |                 return np.outer(d_out_d_self, B_val)
 38 |         elif parent == 1:
 39 |             A_val = self.A.value
 40 |             if A_val.ndim ==2:
 41 |                 return np.dot(A_val.T, d_out_d_self)
 42 |             else:
 43 |                 return np.outer(A_val, d_out_d_self)
 44 |         else:
 45 |             raise Exception("Not a parent of me")
 46 | 
 47 | class MatSum(Differentiable):
 48 |     __slots__ = ['A', 'axis', 'keepdims']
 49 |     def __init__(self, A, axis=None, keepdims=True):
 50 |         super(MatSum, self).__init__((A,))
 51 |         if axis is not None and type(axis) != int:
 52 |             raise Exception("Can only sum over one axis at a time.")
 53 |         self.A    = A
 54 |         self.axis = axis
 55 |         self.keepdims = keepdims
 56 | 
 57 |     def _compute_value(self):
 58 |         return np.sum(self.A.value, axis=self.axis, keepdims=self.keepdims)
 59 | 
 60 |     def _local_grad(self, parent, d_out_d_self):
 61 |         # If self.keepdims == False then we need to
 62 |         # broadcast d_out_d_self along the summation axis
 63 |         if not self.keepdims and self.axis is not None:
 64 |             expanded_d_out_d_self = np.expand_dims(d_out_d_self, self.axis)
 65 |             return expanded_d_out_d_self * np.ones(self.A.shape)
 66 |         else:
 67 |             return d_out_d_self * np.ones(self.A.shape)
 68 | 
 69 | class MatMean(Differentiable):
 70 |     __slots__ = ['A', 'axis', 'keepdims']
 71 |     def __init__(self, A, axis=None, keepdims=True):
 72 |         super(MatMean, self).__init__((A,))
 73 |         if axis is not None and type(axis) != int:
 74 |             raise Exception("Can only take the mean over one axis at a time.")
 75 |         self.A    = A
 76 |         self.axis = axis
 77 |         self.keepdims = keepdims
 78 | 
 79 |     def _compute_value(self):
 80 |         return np.mean(self.A.value, axis=self.axis, keepdims=self.keepdims)
 81 | 
 82 |     def _local_grad(self, parent, d_out_d_self):
 83 |         # If self.keepdims == False then we need to
 84 |         # broadcast d_out_d_self along the summation axis
 85 |         N = float(self.A.value.size) if self.axis is None else float(self.A.shape[self.axis])
 86 |         if not self.keepdims and self.axis is not None:
 87 |             expanded_d_out_d_self = np.expand_dims(d_out_d_self, self.axis)
 88 |             return expanded_d_out_d_self * 1.0/N * np.ones(self.A.shape)
 89 |         else:
 90 |             return d_out_d_self * 1.0/N * np.ones(self.A.shape)
 91 | 
 92 | class MatAdd(Differentiable):
 93 |     __slots__ = []
 94 |     def __init__(self, *args):
 95 |         super(MatAdd, self).__init__(args)
 96 | 
 97 |     def _compute_value(self):
 98 |         return sum([p.value for p in self._parents])
 99 | 
100 |     def _local_grad(self, parent, d_out_d_self):
101 |         parent_shape = self._parents[parent].shape
102 |         num_singletons = len(d_out_d_self.shape) - len(parent_shape)
103 |         if num_singletons > 0:
104 |             extra_singletons = tuple(range(num_singletons))
105 |             result = np.sum(d_out_d_self, axis=extra_singletons, keepdims=False)
106 |         else:
107 |             result = d_out_d_self
108 | 
109 |         assert len(result.shape) == len(parent_shape)
110 |         original_singletons = tuple(np.where(np.array(parent_shape) == 1)[0])
111 |         return np.sum(result, axis=original_singletons, keepdims=True)
112 | 
113 | class MatElemMult(Differentiable):
114 |     """
115 |     Elementwise multiplication of two arrays of the same size.
116 |     Note: This does not support broadcasting yet. Look at MatAdd for ideas.
117 |     """
118 |     __slots__ = ['A', 'B']
119 |     def __init__(self, A, B, *args):
120 |         # Recurse to handle lists of arguments.
121 |         if len(args) > 0:
122 |             B = MatElemMult(B, *args)
123 | 
124 |         super(MatElemMult, self).__init__((A,B))
125 | 
126 |         self.A = A
127 |         self.B = B
128 | 
129 |     def _compute_value(self):
130 |         return self.A.value * self.B.value
131 | 
132 |     def _local_grad(self, parent, d_out_d_self):
133 |         """
134 |         For element-wise multiplication d(A*B)/dA = d_out_d_self * B.
135 |         However, to support  broadcasting, we need to sum over the broadcast dimensions.
136 |         For  example, d(A*x)/dx, where A is a matrix and x is a scalar, is
137 |         given by \sum_{d1} \ldots \sum_{dD} (d_out_d_self * A)[d1,...,dD]
138 |         """
139 |         parent_shape = self._parents[parent].shape
140 |         other_parent = 1 if parent == 0 else 0
141 |         other_parent_value = self._parents[other_parent].value
142 | 
143 |         # Compute how many dimensions was parent broadcast along
144 |         num_singletons = len(d_out_d_self.shape) - len(parent_shape)
145 |         if num_singletons > 0:
146 |             extra_singletons = tuple(range(num_singletons))
147 |             # Sum out the broadcast dimensions
148 |             result = np.sum(d_out_d_self*other_parent_value, axis=extra_singletons, keepdims=False)
149 |         else:
150 |             result = d_out_d_self*other_parent_value
151 | 
152 |         # In mutliplying, we may have broadcast the parent.
153 |         # Sum out those dimensions as well.
154 |         assert len(result.shape) == len(parent_shape)
155 |         original_singletons = tuple(np.where(np.array(parent_shape) == 1)[0])
156 |         return np.sum(result, axis=original_singletons, keepdims=True)
157 | 
158 | class MatDet(Differentiable):
159 |     __slots__ = ['A']
160 |     def __init__(self, A, axis=None, keepdims=True):
161 |         super(MatDet, self).__init__((A,))
162 |         self.A    = A
163 | 
164 |     def _compute_value(self):
165 |         return np.linalg.det(self.A.value)
166 | 
167 |     def _local_grad(self, parent, d_out_d_self):
168 |         det = self._compute_value()
169 |         return d_out_d_self * det * np.linalg.inv(self.A.value).T
170 | 
171 | class MatLogDet(Differentiable):
172 |     pass
173 | 
174 | class MatTrace(Differentiable):
175 |     pass
176 | 
177 | class Transpose(Differentiable):
178 |     __slots__ = ['A', 'axes']
179 |     def __init__(self, A, axes=None):
180 |         super(Transpose, self).__init__((A,))
181 |         self.A    = A
182 |         self.axes = axes
183 | 
184 |     def _compute_value(self):
185 |         return np.transpose(self.A.value, axes=self.axes)
186 | 
187 |     def _local_grad(self, parent, d_out_d_self):
188 |         if self.axes is None:
189 |             return np.transpose(d_out_d_self)
190 |         else:
191 |             return np.transpose(d_out_d_self, axes=np.argsort(self.axes))
192 | 
193 | class Reshape(Differentiable):
194 |     __slots__ = ['A', 'new_shape']
195 | 
196 |     def __init__(self, A, new_shape):
197 |         super(Reshape, self).__init__((A,))
198 |         self.A         = A
199 |         self.new_shape = new_shape
200 | 
201 |     def _compute_value(self):
202 |         return np.reshape(self.A.value, self.new_shape)
203 | 
204 |     def _local_grad(self, parent, d_out_d_self):
205 |         return np.reshape(d_out_d_self, self.A.shape)
206 | 
207 | class Concatenate(Differentiable):
208 |     __slots__ = ['axis']
209 |     def __init__(self, axis, *args):
210 |         super(Concatenate, self).__init__(args)
211 |         self.axis = axis
212 | 
213 |     def _compute_value(self):
214 |         return np.concatenate([p.value for p in self._parents], axis=self.axis)
215 | 
216 |     def _local_grad(self, parent_ix, d_out_d_self):
217 |         # Return the gradient only w.r.t. the matrix indexed by parent.
218 |         start_ix = sum([p.shape[self.axis] for p in self._parents[0:parent_ix]])
219 |         end_ix = start_ix + self._parents[parent_ix].shape[self.axis]
220 |         return index_along_axis(d_out_d_self, self.axis, start_ix, end_ix)
221 | 
222 | class ListToArray(Differentiable):
223 |     """Build an array out of a list of arrays by prepending a dimensions
224 |     and concatenating."""
225 |     __slots__ = []
226 |     def __init__(self, *args):
227 |         super(ListToArray, self).__init__(args)
228 | 
229 |     def _compute_value(self):
230 |         return np.concatenate([p.value[None, :] for p in self._parents], axis=0)
231 | 
232 |     def _local_grad(self, parent_ix, d_out_d_self):
233 |         return d_out_d_self[parent_ix, :]
234 | 
235 | def index_along_axis(array, axis, start, end):
236 |     """Return everything up to but not including end.
237 | 
238 |     For example:
239 |     >>> index_along_axis(np.randn(10,20), 0, 10, 12).shape
240 |     (2, 20)
241 |     """
242 |     full_slice = [slice(None),] * array.ndim
243 |     full_slice[axis] = slice(start,end)
244 |     return array[full_slice]
245 | 
246 | class TensorMult(Differentiable):
247 |     __slots__ = ['axes']
248 |     def __init__(self, A, B, axes):
249 |         super(TensorMult, self).__init__((A, B))
250 |         self.axes = axes
251 | 
252 |     def _compute_value(self):
253 |         A = self._parents[0].value
254 |         B = self._parents[1].value
255 |         return np.tensordot(A, B, self.axes)
256 | 
257 |     def _local_grad(self, parent, d_out_d_self):
258 |         diff = lambda A, B : [a for a in A if a not in B]
259 |         rank = lambda L : list(np.argsort(np.argsort(L)))
260 |         val = [p.value for p in self._parents]
261 |         axes = self.axes
262 |         n_axes = len(axes[0])
263 |         ignore_dims = [diff(range(val[i].ndim), axes[i]) for i in (0, 1)]
264 |         ignore_ndims = [len(x) for x in ignore_dims]
265 |         output_dims = (range(ignore_ndims[0]),
266 |                        range(ignore_ndims[0], ignore_ndims[0] + ignore_ndims[1]))
267 |         X, Y = parent, 1 - parent
268 |         wrong_order = np.tensordot(val[Y], d_out_d_self, (ignore_dims[Y], output_dims[Y]))
269 |         permutation = [None] * val[X].ndim
270 |         for final, cur in zip(list(axes[X]) + ignore_dims[X],
271 |                               rank(axes[Y]) + range(n_axes, val[X].ndim)):
272 |             permutation[final] = cur
273 | 
274 |         return np.transpose(wrong_order, permutation)
275 | 
276 | class Identity(Differentiable):
277 |     __slots__ = []
278 |     def __init__(self, A):
279 |         super(Identity, self).__init__((A,))
280 | 
281 |     def _compute_value(self):
282 |         return self._parents[0].value
283 | 
284 |     def _local_grad(self, parent_ix, d_out_d_self):
285 |         return d_out_d_self
286 | 


--------------------------------------------------------------------------------
/kayak/nonlinearities.py:
--------------------------------------------------------------------------------
  1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
  2 | #          http://hips.seas.harvard.edu
  3 | #          Ryan Adams, David Duvenaud, Scott Linderman,
  4 | #          Dougal Maclaurin, Jasper Snoek, and others
  5 | # Copyright 2014, The President and Fellows of Harvard University
  6 | # Distributed under an MIT license. See license.txt file.
  7 | 
  8 | import numpy as np
  9 | from numpy import exp
 10 | 
 11 | import util
 12 | 
 13 | from . import Differentiable
 14 | from kayak import EPSILON
 15 | 
 16 | class Nonlinearity(Differentiable):
 17 |     __slots__ = ['X']
 18 |     def __init__(self, X):
 19 |         super(Nonlinearity, self).__init__((X,))
 20 |         self.X = X
 21 | 
 22 | class SoftReLU(Nonlinearity):
 23 |     __slots__ = ['scale']
 24 |     def __init__(self, X, scale=1.0):
 25 |         super(SoftReLU, self).__init__(X)
 26 |         self.scale  = scale
 27 | 
 28 |     def _compute_value(self):
 29 |         # Somewhat complicated to handle overflow.
 30 |         X            = self.X.value
 31 |         se           = np.seterr(over='ignore')
 32 |         exp_X        = np.exp(X / self.scale)
 33 |         result       = np.log(1.0 + np.exp( X/self.scale ))*self.scale
 34 |         over         = np.isinf(exp_X)
 35 |         result[over] = X[over]/self.scale
 36 |         return result
 37 | 
 38 |     def _local_grad(self, parent, d_out_d_self):
 39 |         return d_out_d_self/(1.0 + np.exp( - self.X.value/self.scale ))
 40 | 
 41 | class HardReLU(Nonlinearity):
 42 |     __slots__ = []
 43 |     def __init__(self, X):
 44 |         super(HardReLU, self).__init__(X)
 45 | 
 46 |     def _compute_value(self):
 47 |         return np.maximum(self.X.value, 0.0)
 48 | 
 49 |     def _local_grad(self, parent, d_out_d_self):
 50 |         return d_out_d_self * (self.X.value > 0)
 51 | 
 52 | class TanH(Nonlinearity):
 53 |     __slots__ = []
 54 |     def __init__(self, X):
 55 |         super(TanH, self).__init__(X)
 56 | 
 57 |     def _compute_value(self):
 58 |         return np.tanh(self.X.value)
 59 | 
 60 |     def _local_grad(self, parent, d_out_d_self):
 61 |         return d_out_d_self*(1.0 - np.tanh(self.X.value)**2)
 62 | 
 63 | class Logistic(Nonlinearity):
 64 |     __slots__ = []
 65 |     def __init__(self, X):
 66 |         super(Logistic, self).__init__(X)
 67 | 
 68 |     def _compute_value(self):
 69 |         return 1.0/(1.0 + np.exp(-self.X.value))
 70 | 
 71 |     def _local_grad(self, parent, d_out_d_self):
 72 |         y = self.value
 73 |         return d_out_d_self * y * (1.0 - y)
 74 | 
 75 | class LogSoftMax(Nonlinearity):
 76 |     __slots__ = ['axis']
 77 |     def __init__(self, X, axis=1):
 78 |         super(LogSoftMax, self).__init__(X)
 79 |         self.axis = axis
 80 | 
 81 |     def _compute_value(self):
 82 |         X = self.X.value
 83 |         return X - util.logsumexp(X, axis=self.axis)
 84 | 
 85 |     def _local_grad(self, parent, d_out_d_self):
 86 |         return d_out_d_self - (np.exp(self.value) * np.sum(d_out_d_self, axis=self.axis, keepdims=True))
 87 | 
 88 | class SoftMax(Nonlinearity):
 89 |     __slots__ = ['axis']
 90 |     def __init__(self, X, axis=1):
 91 |         super(SoftMax, self).__init__(X)
 92 |         self.axis = axis
 93 | 
 94 |     def _compute_value(self):
 95 |         X = self.X.value
 96 |         return np.exp(X - util.logsumexp(X, axis=self.axis))
 97 | 
 98 |     def _local_grad(self, parent, d_out_d_self):
 99 |         val = self.value
100 |         return val * (d_out_d_self - np.sum(val * d_out_d_self, axis=self.axis, keepdims=True))
101 | 
102 | class InputSoftMax(Nonlinearity):
103 |     __slots__ = ['ncolors']
104 |     def __init__(self, X, ncolors=4):
105 |         super(InputSoftMax, self).__init__(X)
106 |         self.ncolors = ncolors
107 |         
108 |     def _compute_value(self):        
109 |         X = self.X.value
110 |         A = np.reshape(X, (X.shape[0], self.ncolors, X.shape[1]//self.ncolors))
111 |         X = A
112 |         return np.exp(X - util.logsumexp(X, axis=1)).reshape((self.X.shape))
113 |     
114 |     def _local_grad(self, parent, d_out_d_self):
115 |         X = self.X.value
116 |         A = np.reshape(X, (X.shape[0], self.ncolors, X.shape[1]//self.ncolors))
117 |         val = self.value.reshape(A.shape)
118 |         d_out_d_self = d_out_d_self.reshape(val.shape)
119 |         return (val * (d_out_d_self - np.sum(val * d_out_d_self, axis=1, keepdims=True))).reshape((self.X.shape[0],-1))
120 | 
121 | class L2Normalize(Nonlinearity):
122 |     __slots__ = ['axis']
123 |     def __init__(self, X, axis=1):
124 |         super(L2Normalize, self).__init__(X)
125 |         self.axis = axis
126 |         assert np.all(X.value >= 0)
127 | 
128 |     def _compute_value(self):
129 |         X = self.X.value
130 |         lX = np.log(X + EPSILON)
131 |         return np.exp(lX - 0.5*util.logsumexp(2*lX, axis=self.axis))
132 | 
133 |     def _local_grad(self, parent, d_out_d_self):
134 |         X = self.X.value + EPSILON
135 |         val = self.value
136 |         val2 = X / np.sum(X**2, axis=self.axis, keepdims=True)
137 |         return val * (d_out_d_self / X - np.sum(val2 * d_out_d_self, axis=self.axis, keepdims=True))
138 | 
139 | class BatchNormalize(Nonlinearity):
140 |     __slots__ = ['X']
141 |     def __init__(self, X):
142 |         super(BatchNormalize, self).__init__(X)
143 | 
144 |     def _compute_value(self):
145 |         X   = self.X.value
146 |         mu  = np.mean(self.X.value, axis=0, keepdims=True)
147 |         sig = np.mean((X - mu)**2, axis=0, keepdims=True) + 1e-6
148 |         val = (X - mu) * sig**-0.5
149 |         return val
150 | 
151 |     def _local_grad(self, parent, d_out_d_self):
152 |         X          = self.X.value
153 |         mu         = np.mean(self.X.value, axis=0, keepdims=True)
154 |         diff       = X - mu
155 |         sig        = np.mean(diff**2, axis=0, keepdims=True) + 1e-6
156 |         invsqrtsig = sig**-0.5
157 |         val        = diff * invsqrtsig
158 |         m          = X.shape[0]
159 | 
160 |         dsig = np.sum(d_out_d_self*diff*(-0.5*sig**-(3./2.)), axis=0, keepdims=True)
161 |         dmu  = np.sum(d_out_d_self * -invsqrtsig, axis=0, keepdims=True) + dsig*np.mean(-2.0*diff, axis=0, keepdims=True)
162 |         dx   = d_out_d_self * invsqrtsig + dsig * 2.0 * diff/m + dmu/m
163 |         return dx
164 | 


--------------------------------------------------------------------------------
/kayak/regularizers.py:
--------------------------------------------------------------------------------
 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
 2 | #          http://hips.seas.harvard.edu
 3 | #          Ryan Adams, David Duvenaud, Scott Linderman,
 4 | #          Dougal Maclaurin, Jasper Snoek, and others
 5 | # Copyright 2014, The President and Fellows of Harvard University
 6 | # Distributed under an MIT license. See license.txt file.
 7 | 
 8 | import numpy as np
 9 | 
10 | from . import Differentiable
11 | 
12 | class Regularizer(Differentiable):
13 |     __slots__ = ['X', 'weight']
14 |     def __init__(self, X, weight):
15 |         super(Regularizer, self).__init__([X])
16 |         self.X      = X
17 |         self.weight = weight
18 | 
19 | class L2Norm(Regularizer):
20 |     __slots__ = []
21 |     def __init__(self, X, weight=1.0):
22 |         super(L2Norm, self).__init__(X, weight)
23 | 
24 |     def _compute_value(self):
25 |         return self.weight * np.sum(self.X.value**2)
26 | 
27 |     def _local_grad(self, parent, d_out_d_self):
28 |         return self.weight * 2.0 * self.X.value * d_out_d_self
29 | 
30 | class L1Norm(Regularizer):
31 |     __slots__ = []
32 |     def __init__(self, X, weight=1.0):
33 |         super(L1Norm, self).__init__(X, weight)
34 | 
35 |     def _compute_value(self):
36 |         return self.weight * np.sum(np.abs(self.X.value))
37 | 
38 |     def _local_grad(self, parent, d_out_d_self):
39 |         return self.weight * np.sign(self.X.value) * d_out_d_self
40 | 
41 | class Horseshoe(Regularizer):
42 |     __slots__ = []
43 |     def __init__(self, X, weight=1.0):
44 |         super(Horseshoe, self).__init__(X, weight)
45 | 
46 |     def _compute_value(self):
47 |         return -self.weight * np.sum(np.log(np.log(1.0 + self.X.value**(-2))))
48 | 
49 |     def _local_grad(self, parent, d_out_d_self):
50 |         return -(self.weight * d_out_d_self * (1 / (np.log(1.0 + self.X.value**(-2))))
51 |                  * (1.0/(1 + self.X.value**(-2))) * (-2*self.X.value**(-3)))
52 | 
53 | class NExp(Regularizer):
54 |     __slots__ = []
55 |     def __init__(self, X, weight=1.0):
56 |         super(NExp, self).__init__(X, weight)
57 | 
58 |     def _compute_value(self):
59 |         return self.weight * np.sum(1.0 - np.exp(-np.abs(self.X.value)))
60 | 
61 |     def _local_grad(self, parent, d_out_d_self):
62 |         return self.weight * d_out_d_self * np.exp(-np.abs(self.X.value)) * np.sign(self.X.value)
63 | 


--------------------------------------------------------------------------------
/kayak/root_nodes.py:
--------------------------------------------------------------------------------
 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
 2 | #          http://hips.seas.harvard.edu
 3 | #          Ryan Adams, David Duvenaud, Scott Linderman,
 4 | #          Dougal Maclaurin, Jasper Snoek, and others
 5 | # Copyright 2014, The President and Fellows of Harvard University
 6 | # Distributed under an MIT license. See license.txt file.
 7 | 
 8 | import numpy as np
 9 | from . import Differentiable
10 | 
11 | class DataNode(Differentiable):
12 |     __slots__ = ['_batcher', '_data','_children', '_value', '_grad', '_loss', '_parents']
13 |     def __init__(self, data, batcher=None):
14 |         if batcher is None:
15 |             super(DataNode, self).__init__([])
16 |         else:
17 |             super(DataNode, self).__init__([batcher])
18 | 
19 |         self._data    = np.atleast_1d(data)
20 |         self._batcher = batcher
21 | 
22 |     @property
23 |     def data(self):
24 |         return self._data
25 | 
26 |     @data.setter
27 |     def data(self, new_data):
28 |         self._data = new_data
29 |         self._clear_value_cache()
30 | 
31 |     def _compute_value(self):
32 |         if self._batcher is None:
33 |             return self.data
34 |         else:
35 |             return self.data[self._batcher.value,...]
36 | 
37 |     def _local_grad(self, parent, d_out_d_self):
38 |         raise Exception("Can't take gradient w.r.t. data")
39 | 
40 | class Parameter(Differentiable):
41 |     __slots__ = []
42 |     def __init__(self, val):
43 |         super(Parameter, self).__init__([])
44 |         self.value = np.atleast_1d(val)
45 | 
46 |     def grad(self, other):
47 |         return np.zeros(other.shape)
48 | 
49 |     def _compute_value(self):
50 |         raise Exception("Shouldn't need this. Value should be cached")
51 | 
52 |     def _local_grad(self, parent, d_out_d_self):
53 |         raise Exception("Shouldn't get here.")
54 | 
55 | # These are just aliases
56 | Inputs = DataNode
57 | Targets = DataNode
58 | Constant = Parameter
59 | 


--------------------------------------------------------------------------------
/kayak/stacking.py:
--------------------------------------------------------------------------------
 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
 2 | #          http://hips.seas.harvard.edu
 3 | #          Ryan Adams, David Duvenaud, Scott Linderman,
 4 | #          Dougal Maclaurin, Jasper Snoek, and others
 5 | # Copyright 2014, The President and Fellows of Harvard University
 6 | # Distributed under an MIT license. See license.txt file.
 7 | 
 8 | import numpy        as np
 9 | import numpy.random as npr
10 | 
11 | from . import Differentiable
12 | 
13 | class Hstack(Differentiable):
14 |     __slots__ = ['A', 'B']
15 | 
16 |     def __init__(self, A, B):
17 |         super(Hstack, self).__init__([A, B])
18 | 
19 |         self.A         = A
20 |         self.B         = B
21 | 
22 |     def _compute_value(self):
23 |         return np.hstack((self.A.value, self.B.value))
24 | 
25 |     def _local_grad(self, parent, d_out_d_self):
26 |         if parent == 0:
27 |             return d_out_d_self[:,:self.A.shape[1]]
28 |         if parent == 1:
29 |             return d_out_d_self[:,self.A.shape[1]:]
30 | 


--------------------------------------------------------------------------------
/kayak/util.py:
--------------------------------------------------------------------------------
 1 | # Authors: Harvard Intelligent Probabilistic Systems (HIPS) Group
 2 | #          http://hips.seas.harvard.edu
 3 | #          Ryan Adams, David Duvenaud, Scott Linderman,
 4 | #          Dougal Maclaurin, Jasper Snoek, and others
 5 | # Copyright 2014, The President and Fellows of Harvard University
 6 | # Distributed under an MIT license. See license.txt file.
 7 | 
 8 | import numpy        as np
 9 | import numpy.random as npr
10 | import itertools    as it
11 | 
12 | from . import EPSILON
13 | 
14 | from root_nodes import Parameter
15 | 
16 | def checkgrad(variable, output, epsilon=1e-4, verbose=False):
17 |     if not isinstance(variable, Parameter):
18 |         raise Exception("Cannot evaluate gradient in terms of non-Parameter type %s", (type(variable)))
19 | 
20 |     # Need to make sure all evals have the same random number generation.
21 |     rng_seed = 1
22 | 
23 |     value = output.value
24 |     an_grad = output.grad(variable)
25 |     fd_grad = np.zeros(variable.shape)
26 |     base_value = variable.value.copy()
27 |     for in_dims in it.product(*map(range, variable.shape)):
28 |         small_array = np.zeros(variable.shape)
29 |         small_array[in_dims] = epsilon
30 | 
31 |         variable.value = base_value - 2*small_array
32 |         fn_l2 = output.value
33 |         variable.value = base_value - small_array
34 |         fn_l1 = output.value
35 |         variable.value = base_value + small_array
36 |         fn_r1 = output.value
37 |         variable.value = base_value + 2*small_array
38 |         fn_r2 = output.value
39 | 
40 |         fd_grad[in_dims] = ((fn_l2 - fn_r2)/12. + (- fn_l1 + fn_r1)*2./3.) /epsilon # 2nd order method
41 |         # fd_grad[in_dims] = (- fn_l1/2. + fn_r1/2.) /epsilon # 1st order method
42 | 
43 |         if verbose:
44 |             print np.abs((an_grad[in_dims] - fd_grad[in_dims])/(fd_grad[in_dims]+EPSILON)), an_grad[in_dims], fd_grad[in_dims]
45 | 
46 |     variable.value = base_value
47 |     print "Mean finite difference", np.mean(np.abs((an_grad - fd_grad)/(fd_grad+EPSILON)))
48 |     return np.mean(np.abs((an_grad - fd_grad)/(fd_grad+EPSILON)))
49 | 
50 | 
51 | def logsumexp(X, axis=None):
52 |     maxes = np.max(X, axis=axis, keepdims=True)
53 |     return np.log(np.sum(np.exp(X - maxes), axis=axis, keepdims=True)) + maxes
54 | 
55 | def onehot(T, num_labels=None):
56 |     if num_labels is None:
57 |         num_labels = np.max(T)+1
58 |     labels = np.zeros((T.shape[0], num_labels), dtype=bool)
59 |     labels[np.arange(T.shape[0], dtype=int), T] = 1
60 |     return labels
61 | 
62 | 


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 by the President and Fellows of Harvard University
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | # Utility function to read the README file.
 5 | # Used for the long_description.  It's nice, because now 1) we have a top level
 6 | # README file and 2) it's easier to type in the README file than to put a raw
 7 | # string in below ...
 8 | def read(fname):
 9 |     return open(os.path.join(os.path.dirname(__file__), fname)).read()
10 | 
11 | setup(
12 |     name = "kayak",
13 |     version = "0.1",
14 |     author = "Ryan Adams, Dougal MacLaurin, Scott Linderman, Jasper Snoek, and David Duvenaud",
15 |     author_email = "rpa@seas.harvard.edu, macLaurin@physics.harvard.edu, slinderman@seas.harvard.edu, jsnoek@seas.harvard.edu, dduvenaud@seas.harvard.edu",
16 |     description = ("A package for automatic differentiation in deep learning models."),
17 |     keywords = "automatic differentiation, deep learning, neural networks",
18 |     packages=['kayak'],
19 |     long_description=read('README.md'),
20 | )


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | MAX_FLOAT_DIFF = 1e-9
4 | MAX_GRAD_DIFF  = 1e-7
5 | NUM_TRIALS     = 10
6 | 
7 | def close_float(A, B):
8 |     return np.abs(A-B) < MAX_FLOAT_DIFF
9 | 


--------------------------------------------------------------------------------
/tests/check_MemoryUse.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | from guppy import hpy
 4 | import sys
 5 | sys.path.append('..')
 6 | import kayak
 7 | 
 8 | def check_NodeMemory():
 9 |     # Not a test. Useful for checking how much memory a node uses.
10 |     np_A = npr.randn(5,6)
11 |     A    = kayak.Parameter(np_A)
12 |     N = int(1e4)
13 |     h = hpy()
14 |     h.setref()
15 |     for i in xrange(N):
16 |         A = kayak.Identity(A)
17 |     print "Created 10,000 objects"
18 |     print h.heap()
19 | 
20 | if __name__ == "__main__":
21 |     check_NodeMemory()
22 | 


--------------------------------------------------------------------------------
/tests/test_BatchNormalize.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_batchnorm_values_1():
 9 |     npr.seed(1)
10 | 
11 |     for ii in xrange(NUM_TRIALS):
12 | 
13 |         np_X = npr.randn(5,4)
14 |         np_A = npr.randn(4,2)
15 |         A    = kayak.Parameter(np_A)
16 |         X    = kayak.Parameter(np_X)
17 |         Y    = kayak.BatchNormalize(X)        
18 |         J    = kayak.TanH(kayak.MatMult(Y,A))
19 |         Z    = kayak.MatSum(J)
20 | 
21 |         mu   = np.mean(np_X, axis=0, keepdims=True)
22 |         sig  = np.mean((np_X - mu)**2, axis=0, keepdims=True) + 1e-6
23 |         np_Y = (np_X - mu) / np.sqrt(sig)
24 | 
25 |         assert np.all(close_float(Y.value, np_Y))
26 |         assert kayak.util.checkgrad(X, Z, verbose=True) < MAX_GRAD_DIFF
27 | 


--------------------------------------------------------------------------------
/tests/test_Batcher.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_indices_1():
 9 |     """Test with deterministic indices."""
10 | 
11 |     for num_data in [1, 10, 100, 1000, 10000, 100000]:
12 |         for batch_size in [1, 10, 11, 25, 50, 101, 500, 1000, 1011]:
13 |             
14 |             data_used = np.zeros((num_data,), dtype=bool)
15 |             batcher = kayak.Batcher(batch_size, num_data)
16 |             for batch in batcher:
17 |                 data_used[batch] = True
18 |             
19 |             assert np.all(data_used)
20 | 
21 | def test_indices_2():
22 |     """Test with random seed."""
23 |     npr.seed(1)
24 | 
25 |     for num_data in [1, 10, 100, 1000, 10000, 100000]:
26 |         for batch_size in [1, 10, 11, 25, 50, 101, 500, 1000, 1011]:
27 |             
28 |             data_used = np.zeros((num_data,), dtype=bool)
29 |             batcher = kayak.Batcher(batch_size, num_data, random_batches=True)
30 |             for batch in batcher:
31 |                 data_used[batch] = True
32 |             
33 |             assert np.all(data_used)
34 | 
35 | def test_reset():
36 |     """Test resetting."""
37 | 
38 |     for num_data in [1000, 10000, 100000]:
39 |         for batch_size in [1, 10, 11, 25, 50, 101, 500]:
40 |             
41 |             batcher = kayak.Batcher(batch_size, num_data)
42 | 
43 |             # Start the batcher forward.
44 |             batcher.next()
45 | 
46 |             # Now reset.
47 |             batcher.reset()
48 | 
49 |             # Make sure we touch all of the data.
50 |             data_used = np.zeros((num_data,), dtype=bool)
51 |             for batch in batcher:
52 |                 data_used[batch] = True
53 |             
54 |             assert np.all(data_used)
55 | 
56 | def test_batcher_updates_value():
57 |     batcher = kayak.Batcher(12, 20)
58 |     data = npr.randn(20, 7)
59 |     X = kayak.Inputs(data, batcher)
60 |     for i, batch in enumerate(batcher):
61 |         if i == 0:
62 |             assert np.all(X.value == data[:12, :])
63 |         elif i == 1:
64 |             assert np.all(X.value == data[12:, :])
65 |         else:
66 |             assert False
67 |     
68 |     batcher.test_mode()
69 |     assert np.all(X.value == data)
70 | 
71 | def test_batcher_updates_dropout():
72 |     batcher = kayak.Batcher(5, 10)
73 |     X = kayak.Inputs(np.random.randn(10,10))
74 |     Y = kayak.Dropout(X, batcher=batcher)
75 |     val1 = Y.value
76 |     batcher.next()
77 |     val2 = Y.value
78 |     assert not np.all(val1 == val2)
79 | 
80 | def test_batcher_can_reinstate_dropout_mask():
81 |     batcher = kayak.Batcher(5, 10)
82 |     X = kayak.Inputs(np.ones((10,10)))
83 |     Y = kayak.Dropout(X, batcher=batcher)
84 |     assert not np.all(Y.value == np.ones((10, 10)))
85 |     batcher.test_mode()
86 |     print "Y value", Y.value
87 |     assert np.all(Y.value == np.ones((10, 10)))
88 | 


--------------------------------------------------------------------------------
/tests/test_CacheFreshness.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | # def test_MatSum_clears_cache():
 9 | #     X = kayak.Inputs(np.array([[1, 2, 3], [2, 3, 4]]))
10 | #     Y = kayak.
11 |                      
12 | def test_batcher_clears_value_cache():
13 |     batcher = kayak.Batcher(1, 2)
14 |     X = kayak.Inputs(np.array([[1, 2, 3], [2, 3, 4]]), batcher)
15 |     Y = kayak.MatSum(X)
16 |     correct_vals = [6, 9]
17 |     for ii, batch in enumerate(batcher):
18 |         assert Y.value == correct_vals[ii]
19 | 
20 | def test_batcher_clears_shape_cache():
21 |     batcher = kayak.Batcher(2, 3)
22 |     X = kayak.Inputs(np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]]), batcher)
23 |     Y = kayak.MatSum(X, axis=1)
24 |     correct_shapes = [(2, 1), (1, 1)]
25 |     for ii, batch in enumerate(batcher):
26 |         assert Y.shape == correct_shapes[ii]
27 | 
28 | def test_dropout_clears_value_cache():
29 |     X = kayak.Inputs(np.random.randn(10,10))
30 |     Y = kayak.Dropout(X)
31 |     Z = kayak.MatSum(Y, axis=1)
32 |     val1 = Z.value
33 |     Y.draw_new_mask()
34 |     val2 = Z.value
35 |     assert not np.all(val1 == val2)
36 |     assert np.all(Z.value == Z.value)
37 | 
38 | def test_data_update_clears_value_cache():
39 |     X = kayak.Inputs(np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]]))
40 |     assert np.all(X.value == [[1, 2, 3], [2, 3, 4], [3, 4, 5]])
41 |     X.data = [1,2]
42 |     assert X._value is None
43 |     assert np.all(X.value == [1, 2])
44 | 
45 | def test_param_change_clears_value_cache():
46 |     pass
47 | 
48 | def test_param_change_clears_grad_cache():
49 |     pass
50 | 


--------------------------------------------------------------------------------
/tests/test_Constant.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_constant_scalar():
 9 |     npr.seed(1)
10 | 
11 |     for ii in xrange(NUM_TRIALS):
12 | 
13 |         np_X = npr.randn()    
14 |         X    = kayak.Constant(np_X)
15 |         
16 |         assert close_float(X.value, np_X)
17 | 
18 | def test_constant_vector():
19 |     npr.seed(1)
20 | 
21 |     for ii in xrange(NUM_TRIALS):
22 | 
23 |         np_X = npr.randn(10)    
24 |         X    = kayak.Constant(np_X)
25 |         
26 |         assert np.all(close_float(X.value, np_X))
27 | 
28 | def test_constant_matrix():
29 |     npr.seed(1)
30 | 
31 |     for ii in xrange(NUM_TRIALS):
32 | 
33 |         np_X = npr.randn(10,20)
34 |         X    = kayak.Constant(np_X)
35 |         
36 |         assert np.all(close_float(X.value, np_X))
37 | 
38 | 


--------------------------------------------------------------------------------
/tests/test_Convolve1d.py:
--------------------------------------------------------------------------------
  1 | import numpy        as np
  2 | import numpy.random as npr
  3 | 
  4 | import kayak
  5 | 
  6 | from . import *
  7 | from nose.tools import assert_equals, assert_less
  8 | 
  9 | def test_convolve1d_1():
 10 |     npr.seed(3)
 11 | 
 12 |     for ii in xrange(NUM_TRIALS):
 13 |         
 14 |         np_A = npr.randn(5,6)
 15 |         np_B = npr.randn(6,7)
 16 |         A    = kayak.Parameter(np_A)
 17 |         B    = kayak.Parameter(np_B)
 18 |         C    = kayak.Convolve1d(A, B, ncolors=1)
 19 | 
 20 |         # If the filters are the same size as the data
 21 |         assert C.value.shape == (5,7)
 22 | 
 23 | def test_convolve1d_2():
 24 |     npr.seed(3)
 25 | 
 26 |     for ii in xrange(NUM_TRIALS):
 27 |         
 28 |         np_A = npr.randn(5,20)
 29 |         np_B = npr.randn(6,4)
 30 |         A    = kayak.Parameter(np_A)
 31 |         B    = kayak.Parameter(np_B)
 32 |         C    = kayak.Convolve1d(A, B, ncolors=1)
 33 | 
 34 |         assert_equals(C.value.shape, (5,(20-6+1)*4))
 35 | 
 36 | def test_convolve1d_3():
 37 |     npr.seed(3)
 38 | 
 39 |     for ii in xrange(NUM_TRIALS):
 40 |         
 41 |         np_A = npr.randn(5,50)
 42 |         np_B = npr.randn(6*5,4)
 43 |         A    = kayak.Parameter(np_A)
 44 |         B    = kayak.Parameter(np_B)
 45 |         C    = kayak.Convolve1d(A, B, ncolors=5)
 46 | 
 47 |         assert_equals(C.value.shape, (5,(10-6+1)*4))
 48 | 
 49 | def test_convolve1d_grad_1():
 50 |     npr.seed(3)
 51 | 
 52 |     for ii in xrange(NUM_TRIALS):
 53 |         
 54 |         np_A = npr.randn(5,6)
 55 |         np_B = npr.randn(6,7)
 56 |         A    = kayak.Parameter(np_A)
 57 |         B    = kayak.Parameter(np_B)
 58 |         C    = kayak.Convolve1d(A, B)
 59 |         D    = kayak.MatSum(C)
 60 | 
 61 |         D.value
 62 |         assert_equals(D.grad(A).shape, (5,6))
 63 |         assert_equals(D.grad(B).shape, (6,7))
 64 |         assert_less(kayak.util.checkgrad(A, D), MAX_GRAD_DIFF)
 65 |         assert_less(kayak.util.checkgrad(B, D), MAX_GRAD_DIFF)
 66 | 
 67 | def test_pool_1():
 68 |     npr.seed(3)
 69 | 
 70 |     for ii in xrange(NUM_TRIALS):
 71 |         
 72 |         np_A = npr.randn(5,6)
 73 |         A    = kayak.Parameter(np_A)
 74 |         B    = kayak.Pool(A, width=2)
 75 |         C    = kayak.MatSum(B)
 76 | 
 77 |         C.value
 78 |         assert_equals(C.grad(A).shape, (5,6))
 79 |         assert_equals(C.grad(B).shape, (5,3))
 80 |         assert_less(kayak.util.checkgrad(A, C), MAX_GRAD_DIFF)
 81 | 
 82 | def test_pool_2():
 83 |     npr.seed(3)
 84 | 
 85 |     for ii in xrange(NUM_TRIALS):
 86 |         
 87 |         np_A = npr.randn(5, 6*4)
 88 |         A    = kayak.Parameter(np_A)
 89 |         B    = kayak.Pool(A, width=2, ncolors=4)
 90 |         C    = kayak.MatSum(B)
 91 | 
 92 |         C.value
 93 |         assert_equals(C.grad(A).shape, (5, 6*4))
 94 |         assert_equals(C.grad(B).shape, (5, 12))
 95 |         assert_equals(B.shape, (5, 12))
 96 |         assert_less(kayak.util.checkgrad(A, C), MAX_GRAD_DIFF)
 97 | 
 98 | def test_pool_offwidth_1():
 99 |     npr.seed(3)
100 | 
101 |     for ii in xrange(NUM_TRIALS):
102 |         
103 |         np_A = npr.randn(5,7)
104 |         A    = kayak.Parameter(np_A)
105 |         B    = kayak.Pool(A, width=3)
106 |         C    = kayak.MatSum(B)
107 | 
108 |         C.value
109 |         assert_equals(C.grad(A).shape, (5,7))
110 |         assert_equals(C.grad(B).shape, (5,3))
111 |         assert_less(kayak.util.checkgrad(A, C), MAX_GRAD_DIFF)
112 | 
113 | def test_pool_offwidth_2():
114 |     npr.seed(3)
115 | 
116 |     for ii in xrange(NUM_TRIALS):
117 |         
118 |         np_A = npr.randn(5, 7*4)
119 |         A    = kayak.Parameter(np_A)
120 |         B    = kayak.Pool(A, width=3, ncolors=4)
121 |         C    = kayak.MatSum(B)
122 | 
123 |         C.value
124 |         assert_equals(C.grad(A).shape, (5, 7*4))
125 |         assert_equals(C.grad(B).shape, (5, 12))
126 |         assert_equals(B.shape, (5, 12))
127 |         assert_less(kayak.util.checkgrad(A, C), MAX_GRAD_DIFF)
128 | 
129 | def test_topkpool_1():
130 |     npr.seed(3)
131 | 
132 |     for ii in xrange(NUM_TRIALS):
133 |         
134 |         np_A = npr.randn(5,9)
135 |         A    = kayak.Parameter(np_A)
136 |         B    = kayak.TopKPool(A, k=5)
137 |         C    = kayak.MatSum(B)
138 | 
139 |         C.value
140 |         assert_equals(C.grad(A).shape, (5,9))
141 |         assert_equals(C.grad(B).shape, (5,5))
142 |         assert_less(kayak.util.checkgrad(A, C), MAX_GRAD_DIFF)
143 | 
144 | def test_convolve1d_grad_2():
145 |     npr.seed(3)
146 | 
147 |     for ii in xrange(NUM_TRIALS):
148 |         
149 |         np_A = npr.randn(5,50)
150 |         np_B = npr.randn(6,7)
151 |         A    = kayak.Parameter(np_A)
152 |         B    = kayak.Parameter(np_B)
153 |         C    = kayak.Convolve1d(A, B)
154 |         D    = kayak.MatSum(C)
155 | 
156 |         D.value
157 |         assert_equals(D.grad(A).shape, (5,50))
158 |         assert_equals(D.grad(B).shape, (6,7))
159 |         assert_less(kayak.util.checkgrad(A, D), MAX_GRAD_DIFF)
160 |         assert_less(kayak.util.checkgrad(B, D), MAX_GRAD_DIFF)
161 | 
162 | def test_convolve1d_grad_2():
163 |     npr.seed(3)
164 | 
165 |     for ii in xrange(NUM_TRIALS):
166 |         
167 |         np_A = npr.randn(5,50)
168 |         np_B = npr.randn(6*5,4)
169 |         A    = kayak.Parameter(np_A)
170 |         B    = kayak.Parameter(np_B)
171 |         C    = kayak.Convolve1d(A, B, ncolors=5)
172 |         D    = kayak.MatSum(C)
173 | 
174 |         D.value
175 |         assert_equals(D.grad(A).shape, (5,50))
176 |         assert_equals(D.grad(B).shape, (6*5,4))
177 |         assert_less(kayak.util.checkgrad(A, D), MAX_GRAD_DIFF)
178 |         assert_less(kayak.util.checkgrad(B, D), MAX_GRAD_DIFF)


--------------------------------------------------------------------------------
/tests/test_Dropout.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_nondropout_values():
 9 |     npr.seed(1)
10 |     # First sanity check: don't actually drop anything out.
11 |     # Make sure we get everything back.
12 | 
13 |     np_X = npr.randn(10,20)
14 |     X    = kayak.Parameter(np_X)
15 |     Y    = kayak.Dropout(X, drop_prob=0.0)
16 |     
17 |     assert np.all(close_float(Y.value, np_X))
18 | 
19 | def test_alldropout_values():
20 |     npr.seed(2)
21 |     # Drop everything out.
22 | 
23 |     np_X = npr.randn(10,20)
24 |     X    = kayak.Parameter(np_X)
25 |     Y    = kayak.Dropout(X, drop_prob=1.0)
26 |     
27 |     assert np.all(Y.value == 0.0)
28 | 
29 | def test_dropout_values():
30 |     # Drop some things out.
31 |     npr.seed(3)
32 | 
33 |     for ii in xrange(NUM_TRIALS):
34 |         prob = npr.rand()
35 |         scale = 1.0 / (1.0 - prob)
36 | 
37 |         np_X = npr.randn(5,6)
38 |         X    = kayak.Parameter(np_X)
39 |         Y    = kayak.Dropout(X, drop_prob=prob)
40 | 
41 |         Y.value
42 | 
43 |         assert np.all(np.logical_xor(Y.value == 0.0, close_float(Y.value, scale*np_X)))
44 | 
45 | def test_nondropout_grad():
46 |     npr.seed(4)
47 | 
48 |     np_X = npr.randn(10,20)
49 |     X    = kayak.Parameter(np_X)
50 |     Y    = kayak.Dropout(X, drop_prob=0.0)
51 |     Z    = kayak.MatSum(Y)
52 |     
53 |     Z.value
54 |     assert Z.grad(X).shape == np_X.shape
55 |     assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF
56 | 
57 | def test_alldropout_grad():
58 |     npr.seed(5)
59 | 
60 |     np_X = npr.randn(10,20)
61 |     X    = kayak.Parameter(np_X)
62 |     Y    = kayak.Dropout(X, drop_prob=1.0)
63 |     Z    = kayak.MatSum(Y)
64 |     
65 |     Z.value
66 |     assert Z.grad(X).shape == np_X.shape
67 |     assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF
68 | 
69 | def test_dropout_grad():
70 |     # Drop some things out.
71 |     npr.seed(6)
72 | 
73 |     for ii in xrange(NUM_TRIALS):
74 |         prob = npr.rand()
75 |         scale = 1.0 / (1.0 - prob)
76 | 
77 |         np_X = npr.randn(5,6)
78 |         X    = kayak.Parameter(np_X)
79 |         Y    = kayak.Dropout(X, drop_prob=prob)
80 |         Z    = kayak.MatSum(Y)
81 | 
82 |         Z.value
83 |         assert Z.grad(X).shape == np_X.shape
84 |         assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF
85 | 
86 | 


--------------------------------------------------------------------------------
/tests/test_ElemAbs.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_elemabs_values_1():
 9 |     npr.seed(1)
10 | 
11 |     for ii in xrange(NUM_TRIALS):
12 |         
13 |         np_A = npr.randn(5,6)
14 |         A    = kayak.Parameter(np_A)
15 |         C    = kayak.ElemAbs(A)
16 | 
17 |         assert C.shape == np_A.shape
18 |         assert np.all( close_float(C.value, abs(np_A)))
19 | 
20 | def test_elemabs_values_2():
21 |     npr.seed(2)
22 | 
23 |     for ii in xrange(NUM_TRIALS):
24 | 
25 |         # Only nonnegative values allowed
26 |         np_A = -np.log(npr.rand(1))
27 |         A    = kayak.Parameter(np_A)
28 |         D    = kayak.ElemAbs(A)
29 | 
30 |         assert D.shape == np_A.shape
31 |         assert np.all( close_float(D.value, abs(np_A)))
32 | 
33 | def test_elemabs_grad_1():
34 |     npr.seed(3)
35 | 
36 |     for ii in xrange(NUM_TRIALS):
37 |         
38 |         np_A = npr.randn(5,6)
39 | 
40 |         A    = kayak.Parameter(np_A)
41 |         C    = kayak.ElemAbs(A)
42 |         D    = kayak.MatSum(C)
43 | 
44 |         D.value
45 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
46 | 
47 | def test_elemabs_grad_2():
48 |     npr.seed(9)
49 | 
50 |     for ii in xrange(NUM_TRIALS):
51 |         
52 |         np_A = npr.randn(1)
53 |         A    = kayak.Parameter(np_A)
54 |         D    = kayak.ElemAbs(A)
55 |         E    = kayak.MatSum(D)
56 | 
57 |         E.value
58 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
59 | 


--------------------------------------------------------------------------------
/tests/test_ElemExp.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_elemexp_values_1():
 9 |     npr.seed(1)
10 | 
11 |     for ii in xrange(NUM_TRIALS):
12 |         
13 |         np_A = npr.randn(5,6)
14 |         A    = kayak.Parameter(np_A)
15 |         C    = kayak.ElemExp(A)
16 | 
17 |         assert C.shape == np_A.shape
18 |         assert np.all( close_float(C.value, np.exp(np_A)))
19 | 
20 | def test_elemexp_values_2():
21 |     npr.seed(2)
22 | 
23 |     for ii in xrange(NUM_TRIALS):
24 |         
25 |         np_A = npr.randn(1)
26 |         A    = kayak.Parameter(np_A)
27 |         D    = kayak.ElemExp(A)
28 | 
29 |         assert D.shape == np_A.shape
30 |         assert np.all( close_float(D.value, np.exp(np_A)))
31 | 
32 | def test_elemexp_grad_1():
33 |     npr.seed(8)
34 | 
35 |     for ii in xrange(NUM_TRIALS):
36 |         
37 |         np_A = npr.randn(5,6)
38 |         A    = kayak.Parameter(np_A)
39 |         C    = kayak.ElemExp(A)
40 |         D    = kayak.MatSum(C)
41 | 
42 |         D.value
43 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
44 | 
45 | def test_elemexp_grad_2():
46 |     npr.seed(9)
47 | 
48 |     for ii in xrange(NUM_TRIALS):
49 |         
50 |         np_A = npr.randn(1)
51 |         A    = kayak.Parameter(np_A)
52 |         D    = kayak.ElemExp(A)
53 |         E    = kayak.MatSum(D)
54 | 
55 |         E.value
56 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
57 | 


--------------------------------------------------------------------------------
/tests/test_ElemMult.py:
--------------------------------------------------------------------------------
  1 | import numpy        as np
  2 | import numpy.random as npr
  3 | 
  4 | import kayak
  5 | 
  6 | from . import *
  7 | 
  8 | def test_elemmult_values_1():
  9 |     npr.seed(1)
 10 | 
 11 |     for ii in xrange(NUM_TRIALS):
 12 |         
 13 |         np_A = npr.randn(5,6)
 14 |         np_B = npr.randn(5,6)
 15 |         A    = kayak.Parameter(np_A)
 16 |         B    = kayak.Parameter(np_B)
 17 |         C    = kayak.ElemMult(A, B)
 18 | 
 19 |         assert C.shape == np_A.shape
 20 |         assert np.all( close_float(C.value, np_A*np_B))
 21 | 
 22 | def test_elemmult_values_2():
 23 |     npr.seed(2)
 24 | 
 25 |     for ii in xrange(NUM_TRIALS):
 26 |         
 27 |         np_A = npr.randn(5,6)
 28 |         np_B = npr.randn(5,6)
 29 |         np_C = npr.randn(5,6)
 30 |         A    = kayak.Parameter(np_A)
 31 |         B    = kayak.Parameter(np_B)
 32 |         C    = kayak.Parameter(np_C)
 33 |         D    = kayak.ElemMult(A, B, C)
 34 | 
 35 |         assert D.shape == np_A.shape
 36 |         assert np.all( close_float(D.value, np_A*np_B*np_C))
 37 | 
 38 | def test_elemmult_values_3():
 39 |     npr.seed(7)
 40 | 
 41 |     for ii in xrange(NUM_TRIALS):
 42 |         
 43 |         np_A = npr.randn(5,6)
 44 |         np_B = npr.randn(5,6)
 45 |         A    = kayak.Parameter(np_A)
 46 |         B    = kayak.Parameter(np_B)
 47 |         D    = kayak.ElemMult(A, B, A)
 48 | 
 49 |         assert D.shape == (5,6)
 50 |         assert np.all( close_float(D.value, np_A**2 * np_B))
 51 | 
 52 | def test_elemmult_values_4():
 53 |     npr.seed(1)
 54 | 
 55 |     for ii in xrange(NUM_TRIALS):
 56 | 
 57 |         np_A = npr.randn(5,1)
 58 |         np_B = npr.randn(5,6)
 59 |         A    = kayak.Parameter(np_A)
 60 |         B    = kayak.Parameter(np_B)
 61 |         C    = kayak.ElemMult(A, B)
 62 | 
 63 |         assert C.shape == (5,6)
 64 |         assert np.all( close_float(C.value, np_A*np_B))
 65 | 
 66 | def test_elemmult_values_5():
 67 |     npr.seed(2)
 68 | 
 69 |     for ii in xrange(NUM_TRIALS):
 70 | 
 71 |         np_A = npr.randn(5,1)
 72 |         np_B = npr.randn(1,6)
 73 |         np_C = npr.randn(1,1)
 74 |         A    = kayak.Parameter(np_A)
 75 |         B    = kayak.Parameter(np_B)
 76 |         C    = kayak.Parameter(np_C)
 77 |         D    = kayak.ElemMult(A, B, C)
 78 | 
 79 |         assert D.shape == (5,6)
 80 |         assert np.all( close_float(D.value, np_A*np_B*np_C))
 81 | 
 82 | def test_elemmult_values_6():
 83 |     npr.seed(7)
 84 | 
 85 |     for ii in xrange(NUM_TRIALS):
 86 | 
 87 |         np_A = npr.randn(1,1)
 88 |         np_B = npr.randn(5,6)
 89 |         A    = kayak.Parameter(np_A)
 90 |         B    = kayak.Parameter(np_B)
 91 |         D    = kayak.ElemMult(A, B, A)
 92 | 
 93 |         assert D.shape == (5,6)
 94 |         assert np.all( close_float(D.value, np_A**2 * np_B))
 95 | 
 96 | 
 97 | def test_elemmult_grad_1():
 98 |     npr.seed(8)
 99 | 
100 |     for ii in xrange(NUM_TRIALS):
101 |         
102 |         np_A = npr.randn(5,6)
103 |         np_B = npr.randn(5,6)
104 |         A    = kayak.Parameter(np_A)
105 |         B    = kayak.Parameter(np_B)
106 |         C    = kayak.ElemMult(A, B)
107 |         D    = kayak.MatSum(C)
108 | 
109 |         D.value
110 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
111 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
112 | 
113 | def test_elemmult_grad_2():
114 |     npr.seed(9)
115 | 
116 |     for ii in xrange(NUM_TRIALS):
117 |         
118 |         np_A = npr.randn(5,6)
119 |         np_B = npr.randn(5,6)
120 |         np_C = npr.randn(5,6)
121 |         A    = kayak.Parameter(np_A)
122 |         B    = kayak.Parameter(np_B)
123 |         C    = kayak.Parameter(np_C)
124 |         D    = kayak.ElemMult(A, B, C)
125 |         E    = kayak.MatSum(D)
126 | 
127 |         E.value
128 |         assert E.grad(A).shape == np_A.shape
129 |         assert E.grad(B).shape == np_B.shape
130 |         assert E.grad(C).shape == np_C.shape
131 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
132 |         assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF
133 |         assert kayak.util.checkgrad(C, E) < MAX_GRAD_DIFF
134 | 
135 | def test_elemmult_grad_3():
136 |     npr.seed(14)
137 | 
138 |     for ii in xrange(NUM_TRIALS):
139 |         
140 |         np_A = npr.randn(5,6)
141 |         np_B = npr.randn(5,6)
142 |         A    = kayak.Parameter(np_A)
143 |         B    = kayak.Parameter(np_B)
144 |         D    = kayak.ElemMult(A, B, A)
145 |         E    = kayak.MatSum(D)
146 | 
147 |         E.value
148 |         assert E.grad(A).shape == np_A.shape
149 |         assert E.grad(B).shape == np_B.shape
150 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
151 |         assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF
152 | 
153 | def test_elemmult_grad_4():
154 |     npr.seed(15)
155 | 
156 |     for ii in xrange(NUM_TRIALS):
157 |         
158 |         np_A = npr.randn(5,6)
159 |         A    = kayak.Parameter(np_A)
160 |         D    = kayak.ElemMult(A, A)
161 |         E    = kayak.MatSum(D)
162 | 
163 |         E.value
164 |         assert E.grad(A).shape == np_A.shape
165 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
166 | 
167 | def test_elemmult_grad_5():
168 |     npr.seed(8)
169 | 
170 |     for ii in xrange(NUM_TRIALS):
171 | 
172 |         np_A = npr.randn(5,1)
173 |         np_B = npr.randn(5,6)
174 |         A    = kayak.Parameter(np_A)
175 |         B    = kayak.Parameter(np_B)
176 |         C    = kayak.ElemMult(A, B)
177 |         D    = kayak.MatSum(C)
178 | 
179 |         D.value
180 |         assert D.grad(A).shape == np_A.shape
181 |         assert D.grad(B).shape == np_B.shape
182 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
183 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
184 | 
185 | def test_elemmult_grad_6():
186 |     npr.seed(9)
187 | 
188 |     for ii in xrange(NUM_TRIALS):
189 | 
190 |         np_A = npr.randn(5,1)
191 |         np_B = npr.randn(1,6)
192 |         np_C = npr.randn(1,1)
193 |         A    = kayak.Parameter(np_A)
194 |         B    = kayak.Parameter(np_B)
195 |         C    = kayak.Parameter(np_C)
196 |         D    = kayak.ElemMult(A, B, C)
197 |         E    = kayak.MatSum(D)
198 | 
199 |         E.value
200 |         assert E.grad(A).shape == np_A.shape
201 |         assert E.grad(B).shape == np_B.shape
202 |         assert E.grad(C).shape == np_C.shape
203 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
204 |         assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF
205 |         assert kayak.util.checkgrad(C, E) < MAX_GRAD_DIFF
206 | 
207 | def test_elemmult_grad_7():
208 |     npr.seed(14)
209 | 
210 |     for ii in xrange(NUM_TRIALS):
211 | 
212 |         np_A = npr.randn(5,6)
213 |         np_B = npr.randn(1,1)
214 |         A    = kayak.Parameter(np_A)
215 |         B    = kayak.Parameter(np_B)
216 |         D    = kayak.ElemMult(A, B, A)
217 |         E    = kayak.MatSum(D)
218 | 
219 |         E.value
220 |         assert E.grad(A).shape == np_A.shape
221 |         assert E.grad(B).shape == np_B.shape
222 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
223 |         assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF
224 | 
225 | def test_elemmult_grad_8():
226 |     npr.seed(15)
227 | 
228 |     for ii in xrange(NUM_TRIALS):
229 | 
230 |         np_A = npr.randn(5,6)
231 |         A    = kayak.Parameter(np_A)
232 |         D    = kayak.ElemMult(A, A)
233 |         E    = kayak.MatSum(D)
234 | 
235 |         assert E.grad(A).shape == np_A.shape
236 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
237 | 


--------------------------------------------------------------------------------
/tests/test_ElemPower.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_elempower_values_1():
 9 |     npr.seed(1)
10 | 
11 |     for ii in xrange(NUM_TRIALS):
12 |         
13 |         np_A = npr.randn(5,6)
14 |         A    = kayak.Parameter(np_A)
15 |         C    = kayak.ElemPower(A, 2)
16 | 
17 |         assert C.shape == np_A.shape
18 |         assert np.all( close_float(C.value, np.power(np_A, 2)))
19 | 
20 | def test_elempower_values_2():
21 |     npr.seed(2)
22 | 
23 |     for ii in xrange(NUM_TRIALS):
24 | 
25 |         # Only nonnegative values allowed
26 |         np_A = -np.log(npr.rand(1))
27 |         A    = kayak.Parameter(np_A)
28 |         D    = kayak.ElemPower(A, 0.5)
29 | 
30 |         assert D.shape == np_A.shape
31 |         assert np.all( close_float(D.value, np.power(np_A, 0.5)))
32 | 
33 | def test_elempower_values_3():
34 |     npr.seed(1)
35 | 
36 |     for ii in xrange(NUM_TRIALS):
37 | 
38 |         np_A = npr.randn(5,6)
39 |         A    = kayak.Parameter(np_A)
40 |         C    = kayak.ElemPower(A, -1)
41 | 
42 |         assert C.shape == np_A.shape
43 |         assert np.all( close_float(C.value, np.power(np_A, -1)))
44 | 
45 | def test_elempower_values_4():
46 |     npr.seed(2)
47 | 
48 |     for ii in xrange(NUM_TRIALS):
49 | 
50 |         np_A = npr.randn(1)
51 |         A    = kayak.Parameter(np_A)
52 |         D    = kayak.ElemPower(A, 3.)
53 | 
54 |         assert D.shape == np_A.shape
55 |         assert np.all( close_float(D.value, np.power(np_A, 3.)))
56 | 
57 | def test_elempower_grad_1():
58 |     npr.seed(3)
59 | 
60 |     for ii in xrange(NUM_TRIALS):
61 |         
62 |         np_A = npr.randn(5,6)
63 | 
64 |         # Avoid small values where the inverse is unstable
65 |         err = np.where(abs(np_A) < 1e-2)
66 |         np_A[err] = 1e-2 * np.sign(np_A[err])
67 | 
68 |         A    = kayak.Parameter(np_A)
69 |         C    = kayak.ElemPower(A, -1)
70 |         D    = kayak.MatSum(C)
71 | 
72 |         D.value
73 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
74 | 
75 | def test_elempower_grad_2():
76 |     npr.seed(9)
77 | 
78 |     for ii in xrange(NUM_TRIALS):
79 |         
80 |         np_A = npr.randn(1)
81 |         A    = kayak.Parameter(np_A)
82 |         D    = kayak.ElemPower(A, 2)
83 |         E    = kayak.MatSum(D)
84 | 
85 |         E.value
86 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
87 | 


--------------------------------------------------------------------------------
/tests/test_Graphs.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy        as np
  3 | import numpy.random as npr
  4 | 
  5 | import kayak
  6 | 
  7 | from . import *
  8 | 
  9 | def test_graph_simple():
 10 |     npr.seed(1)
 11 | 
 12 |     N  = 1
 13 |     D  = 1
 14 |     H1 = 1
 15 | 
 16 |     X  = kayak.Inputs(npr.randn(N,D))
 17 |     W1 = kayak.Parameter(npr.randn(D,H1))
 18 |     U3 = kayak.MatMult(W1, X)
 19 | 
 20 |     out = U3
 21 | 
 22 |     print "Value: ", out.value
 23 |     print "Gradient: ", out.grad(W1)
 24 |     print "Grad error: ", kayak.util.checkgrad(W1, out)
 25 |     assert kayak.util.checkgrad(W1, out) < MAX_GRAD_DIFF
 26 | 
 27 | def test_graph_chain():
 28 |     npr.seed(1)
 29 | 
 30 |     N  = 10
 31 |     D  = 5
 32 |     H1 = 6
 33 |     H2 = 7
 34 | 
 35 |     X  = kayak.Inputs(npr.randn(N,D))
 36 |     W1 = kayak.Parameter(npr.randn(D,H1))
 37 |     W2 = kayak.Parameter(npr.randn(H1,H2))
 38 |     W3 = kayak.Parameter(npr.randn(H2,1))
 39 | 
 40 |     U1 = kayak.SoftReLU(kayak.MatMult(X, W1))
 41 |     U2 = kayak.SoftReLU(kayak.MatMult(U1, W2))
 42 |     U3 = kayak.SoftReLU(kayak.MatMult(U2, W3))
 43 |     
 44 |     out = kayak.MatSum(U3)
 45 | 
 46 |     out.value
 47 |     assert kayak.util.checkgrad(W1, out) < MAX_GRAD_DIFF
 48 |     assert kayak.util.checkgrad(W2, out) < MAX_GRAD_DIFF
 49 |     assert kayak.util.checkgrad(W3, out) < MAX_GRAD_DIFF
 50 | 
 51 | def test_graph_diamond():
 52 |     npr.seed(2)
 53 | 
 54 |     N  = 10
 55 |     D  = 5
 56 |     H1 = 6
 57 |     H2 = 7
 58 | 
 59 |     X   = kayak.Inputs(npr.randn(N,D))
 60 |     W1  = kayak.Parameter(npr.randn(D,H1))
 61 |     W2a = kayak.Parameter(npr.randn(H1,H2))
 62 |     W2b = kayak.Parameter(npr.randn(H1,H2))
 63 |     W3  = kayak.Parameter(npr.randn(H2,1))
 64 | 
 65 |     U1 = kayak.SoftReLU(kayak.MatMult(X, W1))
 66 |     U2a = kayak.SoftReLU(kayak.MatMult(U1, W2a))
 67 |     U2b = kayak.SoftReLU(kayak.MatMult(U1, W2b))
 68 |     U3a = kayak.SoftReLU(kayak.MatMult(U2a, W3))
 69 |     U3b = kayak.SoftReLU(kayak.MatMult(U2b, W3))
 70 |     
 71 |     out = kayak.MatSum(kayak.MatAdd(U3a, U3b))
 72 | 
 73 |     out.value
 74 |     print kayak.util.checkgrad(W1, out)
 75 |     print kayak.util.checkgrad(W2a, out)
 76 |     print kayak.util.checkgrad(W2b, out)
 77 |     print kayak.util.checkgrad(W3, out)
 78 |     assert kayak.util.checkgrad(W1, out) < MAX_GRAD_DIFF
 79 |     assert kayak.util.checkgrad(W2a, out) < MAX_GRAD_DIFF
 80 |     assert kayak.util.checkgrad(W2b, out) < MAX_GRAD_DIFF
 81 |     assert kayak.util.checkgrad(W3, out) < MAX_GRAD_DIFF
 82 | 
 83 | def test_graph_dag():
 84 |     npr.seed(3)
 85 | 
 86 |     num_layers = 7
 87 |     num_dims   = 5
 88 |     
 89 |     for ii in xrange(NUM_TRIALS):
 90 |         probs = npr.rand()
 91 | 
 92 |         X = kayak.Inputs(npr.randn(25,num_dims))
 93 | 
 94 |         wts    = []
 95 |         layers = []
 96 |         for jj in xrange(num_layers):
 97 | 
 98 |             U = kayak.Constant(np.zeros((25,num_dims)))
 99 | 
100 |             if npr.rand() < probs:
101 |                 W = kayak.Parameter(0.1*npr.randn(num_dims, num_dims))
102 |                 wts.append(W)
103 |                 U = kayak.MatAdd( U, kayak.SoftReLU(kayak.MatMult(X, W)) )
104 | 
105 |             for kk in xrange(jj):
106 |                 if npr.rand() < probs:
107 |                     W = kayak.Parameter(0.1*npr.randn(num_dims, num_dims))
108 |                     wts.append(W)
109 |                     U = kayak.MatAdd( U, kayak.SoftReLU(kayak.MatMult(layers[kk], W)) )
110 |             
111 |             layers.append(U)
112 |             
113 |         out = kayak.MatSum(layers[-1])
114 | 
115 |         out.value
116 |         for jj, wt in enumerate(wts):
117 |             diff = kayak.util.checkgrad(wt, out, 1e-4)
118 |             print diff
119 |             assert diff < 1e-4
120 | 
121 | def test_cache_utility():
122 |     npr.seed(3)
123 | 
124 |     num_layers = 17
125 |     num_dims   = 3
126 |     
127 |     X = kayak.Inputs(npr.randn(10, num_dims))
128 |     W1 = kayak.Parameter(npr.randn(num_dims, num_dims))
129 |     W2 = kayak.Parameter(npr.randn(num_dims, num_dims))
130 | 
131 |     Z = kayak.MatMult(X, W1)
132 | 
133 |     for jj in xrange(num_layers):
134 |         Z = kayak.SoftReLU(kayak.MatAdd(kayak.MatMult(Z, W2),
135 |                                         kayak.MatMult(Z, W2)))
136 | 
137 |     out = kayak.MatSum(Z)
138 |     assert kayak.util.checkgrad(W1, out) < 1e-4
139 | 
140 | def test_irrelevant_outputs():
141 |     # Having an irrelevant output shouldn't cause problems. Indeed, its
142 |     # gradient and value should not be called.
143 |     class NoValue(kayak.Differentiable):
144 |         def __init__(self, A, *args):
145 |             # Recurse to handle lists of arguments.
146 |             super(NoValue, self).__init__([A])
147 |         def _compute_value(self):
148 |             raise AttributeError("Value should not be called")
149 |         def _local_grad(self, parent, d_out_d_self):
150 |             raise AttributeError("Grad should not be called")
151 | 
152 |     X = kayak.Inputs(npr.randn(10, 20))
153 |     Y = kayak.Inputs(npr.randn(10, 20))
154 |     Z = X + Y
155 |     bad_output = NoValue(X)
156 |     Z.grad(X) # Will raise AttributeError is W's value or grad is called
157 | 
158 | def test_irrelevant_outputs_2():
159 |     # As above, with a chain of ouptuts
160 |     class NoValue(kayak.Differentiable):
161 |         def __init__(self, A, *args):
162 |             # Recurse to handle lists of arguments.
163 |             super(NoValue, self).__init__([A])
164 |         def _compute_value(self):
165 |             raise AttributeError("Value should not be called")
166 |         def _local_grad(self, parent, d_out_d_self):
167 |             raise AttributeError("Grad should not be called")
168 | 
169 |     X = kayak.Inputs(npr.randn(10, 20))
170 |     Y = kayak.Inputs(npr.randn(10, 20))
171 |     Z = X + Y
172 |     bad_pre_output = NoValue(X)
173 |     bad_output = NoValue(bad_pre_output)
174 | 
175 |     Z.grad(X) # Will raise AttributeError is W's value or grad is called
176 | 
177 | 


--------------------------------------------------------------------------------
/tests/test_HardReLU.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_hardrelu_values():
 9 |     npr.seed(1)
10 | 
11 |     for ii in xrange(NUM_TRIALS):
12 |         np_X = npr.randn(6,5)
13 |         X    = kayak.Parameter(np_X)
14 |         Y    = kayak.HardReLU(X)
15 | 
16 |         assert np.all( Y.value >= 0.0 )
17 |         assert np.all(np.maximum(np_X, 0.0) == Y.value)
18 |         
19 | def test_hardrelu_grad():
20 |     npr.seed(2)
21 | 
22 |     # Needs to be small due to non-differentiability.
23 |     epsilon = 1e-6
24 | 
25 |     for ii in xrange(NUM_TRIALS):
26 |         np_X = npr.randn(6,5)
27 |         X    = kayak.Parameter(np_X)
28 |         Y    = kayak.HardReLU(X)
29 |         Z    = kayak.MatSum(Y)
30 | 
31 |         Z.value
32 |         assert np.all( Z.grad(X) >= 0.0 )
33 |         print "CHECKGRAD: ", ii, kayak.util.checkgrad(X, Z, epsilon)
34 |         assert kayak.util.checkgrad(X, Z, epsilon) < MAX_GRAD_DIFF
35 | 


--------------------------------------------------------------------------------
/tests/test_Horseshoe.py:
--------------------------------------------------------------------------------
  1 | import numpy        as np
  2 | import numpy.random as npr
  3 | 
  4 | import kayak
  5 | 
  6 | from . import *
  7 | 
  8 | def test_scalar_value():
  9 |     npr.seed(1)
 10 | 
 11 |     for ii in xrange(NUM_TRIALS):
 12 |         np_X = npr.randn()
 13 |         
 14 |         X   = kayak.Parameter(np_X)
 15 |         out = kayak.Horseshoe(X)
 16 | 
 17 |         assert close_float(out.value, -np.log(np.log(1.0 + np_X**(-2))))
 18 | 
 19 | def test_scalar_grad():
 20 |     npr.seed(2)
 21 | 
 22 |     for ii in xrange(NUM_TRIALS):
 23 |         while True:
 24 |             np_X = npr.randn()
 25 |             if np.abs(np_X) > 0.1:
 26 |                 break
 27 |         
 28 |         X   = kayak.Parameter(np_X)
 29 |         out = kayak.Horseshoe(X)
 30 | 
 31 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
 32 | 
 33 | def test_scalar_value_2():
 34 |     npr.seed(3)
 35 | 
 36 |     for ii in xrange(NUM_TRIALS):
 37 |         np_X = npr.randn()
 38 |         wt   = np.exp(npr.randn())
 39 |         
 40 |         X   = kayak.Parameter(np_X)
 41 |         out = kayak.Horseshoe(X, weight=wt)
 42 | 
 43 |         assert close_float(out.value, -wt * np.log(np.log(1.0 + np_X**-2)))
 44 | 
 45 | def test_scalar_grad_2():
 46 |     npr.seed(4)
 47 | 
 48 |     for ii in xrange(NUM_TRIALS):
 49 |         while True:
 50 |             np_X = npr.randn()
 51 |             if np.abs(np_X) > 0.1:
 52 |                 break
 53 |         wt   = np.exp(npr.randn())
 54 |         
 55 |         X   = kayak.Parameter(np_X)
 56 |         out = kayak.Horseshoe(X, weight=wt)
 57 | 
 58 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
 59 | 
 60 | def test_vector_value():
 61 |     npr.seed(5)
 62 | 
 63 |     for ii in xrange(NUM_TRIALS):
 64 |         np_X = npr.randn(10,1)
 65 |         wt   = np.exp(npr.randn())
 66 |         
 67 |         X   = kayak.Parameter(np_X)
 68 |         out = kayak.Horseshoe(X, weight=wt)
 69 | 
 70 |         assert close_float(out.value, -wt * np.sum(np.log(np.log(1.0 + np_X**-2))))
 71 | 
 72 | def test_vector_grad():
 73 |     npr.seed(6)
 74 | 
 75 |     for ii in xrange(NUM_TRIALS):
 76 |         while True:
 77 |             np_X = npr.randn()
 78 |             if np.all(np.abs(np_X) > 0.1):
 79 |                 break
 80 |         wt   = np.exp(npr.randn())
 81 |         
 82 |         X   = kayak.Parameter(np_X)
 83 |         out = kayak.Horseshoe(X, weight=wt)
 84 | 
 85 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
 86 | 
 87 | def test_matrix_value():
 88 |     npr.seed(7)
 89 | 
 90 |     for ii in xrange(NUM_TRIALS):
 91 |         np_X = npr.randn(10,20)
 92 |         wt   = np.exp(npr.randn())
 93 |         
 94 |         X   = kayak.Parameter(np_X)
 95 |         out = kayak.Horseshoe(X, weight=wt)
 96 | 
 97 |         assert close_float(out.value, -wt * np.sum(np.log(np.log(1.0 + np_X**-2))))
 98 | 
 99 | def test_matrix_grad():
100 |     npr.seed(8)
101 | 
102 |     for ii in xrange(NUM_TRIALS):
103 |         while True:
104 |             np_X = npr.randn()
105 |             if np.all(np.abs(np_X) > 0.1):
106 |                 break
107 |         wt   = np.exp(npr.randn())
108 |         
109 |         X   = kayak.Parameter(np_X)
110 |         out = kayak.Horseshoe(X, weight=wt)
111 | 
112 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
113 | 
114 | def test_tensor_value():
115 |     npr.seed(9)
116 | 
117 |     for ii in xrange(NUM_TRIALS):
118 |         np_X = npr.randn(10,20,5)
119 |         wt   = np.exp(npr.randn())
120 |         
121 |         X   = kayak.Parameter(np_X)
122 |         out = kayak.Horseshoe(X, weight=wt)
123 | 
124 |         assert close_float(out.value, -wt * np.sum(np.log(np.log(1.0 + np_X**-2))))
125 | 
126 | def test_tensor_grad():
127 |     npr.seed(10)
128 | 
129 |     for ii in xrange(NUM_TRIALS):
130 |         while True:
131 |             np_X = npr.randn()
132 |             if np.all(np.abs(np_X) > 0.1):
133 |                 break
134 |         wt   = np.exp(npr.randn())
135 |         
136 |         X   = kayak.Parameter(np_X)
137 |         out = kayak.Horseshoe(X, weight=wt)
138 | 
139 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
140 | 
141 | 


--------------------------------------------------------------------------------
/tests/test_Identity.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_identity():
 9 |     npr.seed(1)
10 |     np_A = npr.randn(6,7)
11 |     A    = kayak.Parameter(np_A)
12 |     B    = kayak.Identity(A)
13 |     assert np.all(close_float(B.value, np_A))
14 |     assert np.all(close_float(B.grad(A), np.ones((6,7))))
15 | 


--------------------------------------------------------------------------------
/tests/test_Indexing.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | from nose.tools import assert_less
 8 | 
 9 | def test_indexing_values():
10 |     npr.seed(1)
11 | 
12 |     for ii in xrange(NUM_TRIALS):
13 |         np_X = npr.randn(6,10)
14 |         inds = npr.permutation(10)[:5]
15 |         X    = kayak.Parameter(np_X)
16 |         Y    = kayak.Take(X, inds,axis=1)
17 |         assert(np.array_equal(Y.value, np.take(np_X, inds,axis=1)))
18 |         
19 | def test_indexing_grad():
20 |     npr.seed(2)
21 | 
22 |     for ii in xrange(NUM_TRIALS):
23 |         np_X = npr.randn(6,20)
24 |         inds = npr.permutation(20)[:5]
25 |         X    = kayak.Parameter(np_X)
26 |         Y    = kayak.Take(X, inds,axis=1)
27 |         Z    = kayak.MatSum(Y)
28 | 
29 |         Z.value
30 |         assert_less(kayak.util.checkgrad(X, Z), MAX_GRAD_DIFF)
31 | 
32 | def test_indexing_grad_2():
33 |     npr.seed(3)
34 | 
35 |     for ii in xrange(NUM_TRIALS):
36 |         np_X = npr.randn(6, 2, 7, 3)
37 |         inds = npr.permutation(7)[:5]
38 |         X    = kayak.Parameter(np_X)
39 |         Y    = kayak.Take(X, inds,axis=2)
40 |         Z    = kayak.MatSum(Y)
41 | 
42 |         Z.value
43 |         assert_less(kayak.util.checkgrad(X, Z), MAX_GRAD_DIFF)
44 | 


--------------------------------------------------------------------------------
/tests/test_Inputs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HIPS/Kayak/1a7d4baa849bbd5a6f6d0486136169899cf25523/tests/test_Inputs.py


--------------------------------------------------------------------------------
/tests/test_L1Norm.py:
--------------------------------------------------------------------------------
  1 | import numpy        as np
  2 | import numpy.random as npr
  3 | 
  4 | import kayak
  5 | 
  6 | from . import *
  7 | 
  8 | def test_scalar_value():
  9 |     npr.seed(1)
 10 | 
 11 |     for ii in xrange(NUM_TRIALS):
 12 |         np_X = npr.randn()
 13 |         
 14 |         X   = kayak.Parameter(np_X)
 15 |         out = kayak.L1Norm(X)
 16 | 
 17 |         assert close_float(out.value, np.abs(np_X))
 18 | 
 19 | def test_scalar_grad():
 20 |     npr.seed(2)
 21 | 
 22 |     for ii in xrange(NUM_TRIALS):
 23 |         while True:
 24 |             np_X = npr.randn()
 25 |             if np.abs(np_X) > 0.1:
 26 |                 break
 27 |         
 28 |         X   = kayak.Parameter(np_X)
 29 |         out = kayak.L1Norm(X)
 30 | 
 31 |         assert close_float(out.grad(X), np.sign(np_X))
 32 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
 33 | 
 34 | def test_scalar_value_2():
 35 |     npr.seed(3)
 36 | 
 37 |     for ii in xrange(NUM_TRIALS):
 38 |         np_X = npr.randn()
 39 |         wt   = np.exp(npr.randn())
 40 |         
 41 |         X   = kayak.Parameter(np_X)
 42 |         out = kayak.L1Norm(X, weight=wt)
 43 | 
 44 |         assert close_float(out.value, wt * np.abs(np_X))
 45 | 
 46 | def test_scalar_grad_2():
 47 |     npr.seed(4)
 48 | 
 49 |     for ii in xrange(NUM_TRIALS):
 50 |         while True:
 51 |             np_X = npr.randn()
 52 |             if np.abs(np_X) > 0.1:
 53 |                 break
 54 |         wt   = np.exp(npr.randn())
 55 |         
 56 |         X   = kayak.Parameter(np_X)
 57 |         out = kayak.L1Norm(X, weight=wt)
 58 | 
 59 |         assert close_float(out.grad(X), wt*np.sign(np_X))
 60 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
 61 | 
 62 | def test_vector_value():
 63 |     npr.seed(5)
 64 | 
 65 |     for ii in xrange(NUM_TRIALS):
 66 |         np_X = npr.randn(10,1)
 67 |         wt   = np.exp(npr.randn())
 68 |         
 69 |         X   = kayak.Parameter(np_X)
 70 |         out = kayak.L1Norm(X, weight=wt)
 71 | 
 72 |         assert close_float(out.value, wt * np.sum(np.abs(np_X)))
 73 | 
 74 | def test_vector_grad():
 75 |     npr.seed(6)
 76 | 
 77 |     for ii in xrange(NUM_TRIALS):
 78 |         while True:
 79 |             np_X = npr.randn()
 80 |             if np.all(np.abs(np_X) > 0.1):
 81 |                 break
 82 |         wt   = np.exp(npr.randn())
 83 |         
 84 |         X   = kayak.Parameter(np_X)
 85 |         out = kayak.L1Norm(X, weight=wt)
 86 | 
 87 |         assert np.all(close_float(out.grad(X), wt*np.sign(np_X)))
 88 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
 89 | 
 90 | def test_matrix_value():
 91 |     npr.seed(7)
 92 | 
 93 |     for ii in xrange(NUM_TRIALS):
 94 |         np_X = npr.randn(10,20)
 95 |         wt   = np.exp(npr.randn())
 96 |         
 97 |         X   = kayak.Parameter(np_X)
 98 |         out = kayak.L1Norm(X, weight=wt)
 99 | 
100 |         assert close_float(out.value, wt * np.sum(np.abs(np_X)))
101 | 
102 | def test_matrix_grad():
103 |     npr.seed(8)
104 | 
105 |     for ii in xrange(NUM_TRIALS):
106 |         while True:
107 |             np_X = npr.randn()
108 |             if np.all(np.abs(np_X) > 0.1):
109 |                 break
110 |         wt   = np.exp(npr.randn())
111 |         
112 |         X   = kayak.Parameter(np_X)
113 |         out = kayak.L1Norm(X, weight=wt)
114 | 
115 |         assert np.all(close_float(out.grad(X), wt*np.sign(np_X)))
116 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
117 | 
118 | def test_tensor_value():
119 |     npr.seed(9)
120 | 
121 |     for ii in xrange(NUM_TRIALS):
122 |         np_X = npr.randn(10,20,5)
123 |         wt   = np.exp(npr.randn())
124 |         
125 |         X   = kayak.Parameter(np_X)
126 |         out = kayak.L1Norm(X, weight=wt)
127 | 
128 |         assert close_float(out.value, wt * np.sum(np.abs(np_X)))
129 | 
130 | def test_tensor_grad():
131 |     npr.seed(10)
132 | 
133 |     for ii in xrange(NUM_TRIALS):
134 |         while True:
135 |             np_X = npr.randn()
136 |             if np.all(np.abs(np_X) > 0.1):
137 |                 break
138 |         wt   = np.exp(npr.randn())
139 |         
140 |         X   = kayak.Parameter(np_X)
141 |         out = kayak.L1Norm(X, weight=wt)
142 | 
143 |         assert np.all(close_float(out.grad(X), wt*np.sign(np_X)))
144 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
145 | 
146 | 


--------------------------------------------------------------------------------
/tests/test_L2Loss.py:
--------------------------------------------------------------------------------
  1 | import numpy        as np
  2 | import numpy.random as npr
  3 | 
  4 | import kayak
  5 | 
  6 | from . import *
  7 | 
  8 | def test_scalar_value():
  9 |     npr.seed(1)
 10 | 
 11 |     for ii in xrange(NUM_TRIALS):
 12 |         np_pred = npr.randn()
 13 |         np_targ = npr.randn()
 14 |         
 15 |         pred = kayak.Parameter(np_pred)
 16 |         targ = kayak.Targets(np_targ)
 17 |         out  = kayak.L2Loss(pred, targ)
 18 | 
 19 |         # Verify that a scalar is reproduced.
 20 |         assert close_float(out.value, (np_pred-np_targ)**2)
 21 | 
 22 | def test_scalar_grad():
 23 |     npr.seed(2)
 24 | 
 25 |     for ii in xrange(NUM_TRIALS):
 26 |         np_pred = npr.randn()
 27 |         np_targ = npr.randn()
 28 |         
 29 |         pred = kayak.Parameter(np_pred)
 30 |         targ = kayak.Targets(np_targ)
 31 |         out  = kayak.L2Loss(pred, targ)
 32 | 
 33 |         assert close_float(out.grad(pred), 2*(np_pred-np_targ))
 34 |         assert kayak.util.checkgrad(pred, out) < 1e-6
 35 | 
 36 | def test_vector_value():
 37 |     npr.seed(3)
 38 | 
 39 |     for ii in xrange(NUM_TRIALS):
 40 |         np_pred = npr.randn(10,1)
 41 |         np_targ = npr.randn(10,1)
 42 |         
 43 |         pred = kayak.Parameter(np_pred)
 44 |         targ = kayak.Targets(np_targ)
 45 |         out  = kayak.L2Loss(pred, targ)
 46 | 
 47 |         assert close_float(out.value, np.sum((np_pred-np_targ)**2))
 48 | 
 49 | def test_vector_grad():
 50 |     npr.seed(4)
 51 | 
 52 |     for ii in xrange(NUM_TRIALS):
 53 |         np_pred = npr.randn(10,1)
 54 |         np_targ = npr.randn(10,1)
 55 |         
 56 |         pred = kayak.Parameter(np_pred)
 57 |         targ = kayak.Targets(np_targ)
 58 |         out  = kayak.L2Loss(pred, targ)
 59 | 
 60 |         assert np.all(close_float(out.grad(pred), 2*(np_pred-np_targ)))
 61 |         assert kayak.util.checkgrad(pred, out) < 1e-6
 62 | 
 63 | def test_matrix_value_1():
 64 |     npr.seed(5)
 65 | 
 66 |     for ii in xrange(NUM_TRIALS):
 67 |         np_pred = npr.randn(10,20)
 68 |         np_targ = npr.randn(10,20)
 69 |         
 70 |         pred = kayak.Parameter(np_pred)
 71 |         targ = kayak.Targets(np_targ)
 72 |         out  = kayak.L2Loss(pred, targ)
 73 | 
 74 |         print out.value, (np_pred-np_targ)**2
 75 |         assert close_float(out.value, np.sum((np_pred-np_targ)**2))
 76 | 
 77 | def test_matrix_grad():
 78 |     npr.seed(6)
 79 | 
 80 |     for ii in xrange(NUM_TRIALS):
 81 |         np_pred = npr.randn(10,20)
 82 |         np_targ = npr.randn(10,20)
 83 |         
 84 |         pred = kayak.Parameter(np_pred)
 85 |         targ = kayak.Targets(np_targ)
 86 |         out  = kayak.L2Loss(pred, targ)
 87 | 
 88 |         assert np.all(close_float(out.grad(pred), 2*(np_pred-np_targ)))
 89 |         assert kayak.util.checkgrad(pred, out) < 1e-6
 90 | 
 91 | def test_matrix_value_2():
 92 |     npr.seed(7)
 93 | 
 94 |     for ii in xrange(NUM_TRIALS):
 95 |         np_pred = npr.randn(10,20)
 96 |         np_targ = npr.randn(10,20)
 97 |         
 98 |         pred = kayak.Parameter(np_pred)
 99 |         targ = kayak.Targets(np_targ)
100 |         out  = kayak.L2Loss(pred, targ, axis=0)
101 | 
102 |         print out.value, np.sum((np_pred-np_targ)**2, axis=0)
103 |         assert np.all(close_float(out.value, np.sum((np_pred-np_targ)**2, axis=0)))
104 | 
105 | def test_matrix_value_3():
106 |     npr.seed(8)
107 | 
108 |     for ii in xrange(NUM_TRIALS):
109 |         np_pred = npr.randn(10,20)
110 |         np_targ = npr.randn(10,20)
111 |         
112 |         pred = kayak.Parameter(np_pred)
113 |         targ = kayak.Targets(np_targ)
114 |         out  = kayak.L2Loss(pred, targ, axis=1)
115 | 
116 |         print out.value, np.sum((np_pred-np_targ)**2, axis=1)
117 |         assert np.all(close_float(out.value, np.sum((np_pred-np_targ)**2, axis=1, keepdims=True)))
118 | 
119 | 


--------------------------------------------------------------------------------
/tests/test_L2Norm.py:
--------------------------------------------------------------------------------
  1 | import numpy        as np
  2 | import numpy.random as npr
  3 | 
  4 | import kayak
  5 | 
  6 | from . import *
  7 | 
  8 | def test_scalar_value():
  9 |     npr.seed(1)
 10 | 
 11 |     for ii in xrange(NUM_TRIALS):
 12 |         np_X = npr.randn()
 13 |         
 14 |         X   = kayak.Parameter(np_X)
 15 |         out = kayak.L2Norm(X)
 16 | 
 17 |         assert close_float(out.value, np_X**2)
 18 | 
 19 | def test_scalar_grad():
 20 |     npr.seed(2)
 21 | 
 22 |     for ii in xrange(NUM_TRIALS):
 23 |         np_X = npr.randn()
 24 |         
 25 |         X   = kayak.Parameter(np_X)
 26 |         out = kayak.L2Norm(X)
 27 | 
 28 |         assert close_float(out.grad(X), 2*np_X)
 29 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
 30 | 
 31 | def test_scalar_value_2():
 32 |     npr.seed(3)
 33 | 
 34 |     for ii in xrange(NUM_TRIALS):
 35 |         np_X = npr.randn()
 36 |         wt   = np.exp(npr.randn())
 37 |         
 38 |         X   = kayak.Parameter(np_X)
 39 |         out = kayak.L2Norm(X, weight=wt)
 40 | 
 41 |         assert close_float(out.value, wt * np_X**2)
 42 | 
 43 | def test_scalar_grad_2():
 44 |     npr.seed(4)
 45 | 
 46 |     for ii in xrange(NUM_TRIALS):
 47 |         np_X = npr.randn()
 48 |         wt   = np.exp(npr.randn())
 49 |         
 50 |         X   = kayak.Parameter(np_X)
 51 |         out = kayak.L2Norm(X, weight=wt)
 52 | 
 53 |         assert close_float(out.grad(X), 2*wt*np_X)
 54 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
 55 | 
 56 | def test_vector_value():
 57 |     npr.seed(5)
 58 | 
 59 |     for ii in xrange(NUM_TRIALS):
 60 |         np_X = npr.randn(10,1)
 61 |         wt   = np.exp(npr.randn())
 62 |         
 63 |         X   = kayak.Parameter(np_X)
 64 |         out = kayak.L2Norm(X, weight=wt)
 65 | 
 66 |         assert close_float(out.value, wt * np.sum(np_X**2))
 67 | 
 68 | def test_vector_grad():
 69 |     npr.seed(6)
 70 | 
 71 |     for ii in xrange(NUM_TRIALS):
 72 |         np_X = npr.randn(10,1)
 73 |         wt   = np.exp(npr.randn())
 74 |         
 75 |         X   = kayak.Parameter(np_X)
 76 |         out = kayak.L2Norm(X, weight=wt)
 77 | 
 78 |         assert np.all(close_float(out.grad(X), 2*wt*np_X))
 79 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
 80 | 
 81 | def test_matrix_value():
 82 |     npr.seed(7)
 83 | 
 84 |     for ii in xrange(NUM_TRIALS):
 85 |         np_X = npr.randn(10,20)
 86 |         wt   = np.exp(npr.randn())
 87 |         
 88 |         X   = kayak.Parameter(np_X)
 89 |         out = kayak.L2Norm(X, weight=wt)
 90 | 
 91 |         assert close_float(out.value, wt * np.sum(np_X**2))
 92 | 
 93 | def test_matrix_grad():
 94 |     npr.seed(8)
 95 | 
 96 |     for ii in xrange(NUM_TRIALS):
 97 |         np_X = npr.randn(10,20)
 98 |         wt   = np.exp(npr.randn())
 99 |         
100 |         X   = kayak.Parameter(np_X)
101 |         out = kayak.L2Norm(X, weight=wt)
102 | 
103 |         assert np.all(close_float(out.grad(X), 2*wt*np_X))
104 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
105 | 
106 | def test_tensor_value():
107 |     npr.seed(9)
108 | 
109 |     for ii in xrange(NUM_TRIALS):
110 |         np_X = npr.randn(10,20,5)
111 |         wt   = np.exp(npr.randn())
112 |         
113 |         X   = kayak.Parameter(np_X)
114 |         out = kayak.L2Norm(X, weight=wt)
115 | 
116 |         assert close_float(out.value, wt * np.sum(np_X**2))
117 | 
118 | def test_tensor_grad():
119 |     npr.seed(10)
120 | 
121 |     for ii in xrange(NUM_TRIALS):
122 |         np_X = npr.randn(10,20,5)
123 |         wt   = np.exp(npr.randn())
124 |         
125 |         X   = kayak.Parameter(np_X)
126 |         out = kayak.L2Norm(X, weight=wt)
127 | 
128 |         assert np.all(close_float(out.grad(X), 2*wt*np_X))
129 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
130 | 
131 | 


--------------------------------------------------------------------------------
/tests/test_LogMultinomialLoss.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_vector_value():
 9 |     npr.seed(3)
10 | 
11 |     for ii in xrange(NUM_TRIALS):
12 |         np_pred = npr.randn(1,10)
13 |         np_targ = npr.randn(1,10)
14 |         
15 |         pred = kayak.Parameter(np_pred)
16 |         targ = kayak.Targets(np_targ)
17 |         out  = kayak.LogMultinomialLoss(pred, targ)
18 | 
19 |         assert close_float(out.value, -np.sum(np_pred * np_targ))
20 | 
21 | def test_vector_grad():
22 |     npr.seed(4)
23 | 
24 |     for ii in xrange(NUM_TRIALS):
25 |         np_pred = npr.randn(1,10)
26 |         np_targ = npr.randn(1,10)
27 |         
28 |         pred = kayak.Parameter(np_pred)
29 |         targ = kayak.Targets(np_targ)
30 |         out  = kayak.LogMultinomialLoss(pred, targ)
31 | 
32 |         assert np.all(close_float(out.grad(pred), -np_targ))
33 |         assert kayak.util.checkgrad(pred, out) < MAX_GRAD_DIFF
34 | 
35 | def test_matrix_value_1():
36 |     npr.seed(5)
37 | 
38 |     for ii in xrange(NUM_TRIALS):
39 |         np_pred = npr.randn(10,20)
40 |         np_targ = npr.randn(10,20)
41 |         
42 |         pred = kayak.Parameter(np_pred)
43 |         targ = kayak.Targets(np_targ)
44 |         out  = kayak.LogMultinomialLoss(pred, targ)
45 | 
46 |         assert np.all(close_float(out.value, -np.sum(np_pred * np_targ, axis=1, keepdims=True)))
47 | 
48 | def test_matrix_grad():
49 |     npr.seed(6)
50 | 
51 |     for ii in xrange(NUM_TRIALS):
52 |         np_pred = npr.randn(10,20)
53 |         np_targ = npr.randn(10,20)
54 | 
55 |         pred = kayak.Parameter(np_pred)
56 |         targ = kayak.Targets(np_targ)
57 |         out  = kayak.MatSum(kayak.LogMultinomialLoss(pred, targ))
58 | 
59 |         assert kayak.util.checkgrad(pred, out) < MAX_GRAD_DIFF
60 | 
61 | def test_matrix_value_2():
62 |     npr.seed(7)
63 | 
64 |     for ii in xrange(NUM_TRIALS):
65 |         np_pred = npr.randn(10,20)
66 |         np_targ = npr.randn(10,20)
67 |         
68 |         pred = kayak.Parameter(np_pred)
69 |         targ = kayak.Targets(np_targ)
70 |         out  = kayak.LogMultinomialLoss(pred, targ, axis=0)
71 | 
72 |         assert np.all(close_float(out.value, -np.sum(np_pred * np_targ, axis=0)))
73 | 


--------------------------------------------------------------------------------
/tests/test_LogSoftMax.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_logsoftmax_values_1():
 9 |     npr.seed(1)
10 | 
11 |     for ii in xrange(NUM_TRIALS):
12 | 
13 |         np_X = npr.randn(5,6)
14 |         X    = kayak.Parameter(np_X)
15 |         Y    = kayak.LogSoftMax(X)
16 | 
17 |         np_Y = np.exp(np_X)
18 |         np_Y = np_Y / np.sum(np_Y, axis=1)[:,np.newaxis]
19 |         np_Y = np.log(np_Y)
20 | 
21 |         assert Y.shape == np_X.shape
22 |         assert np.all(close_float(Y.value, np_Y))
23 | 
24 | def test_logsoftmax_values_2():
25 |     npr.seed(2)
26 | 
27 |     for ii in xrange(NUM_TRIALS):
28 | 
29 |         np_X = npr.randn(5,6)
30 |         X    = kayak.Parameter(np_X)
31 |         Y    = kayak.LogSoftMax(X, axis=0)
32 | 
33 |         np_Y = np.exp(np_X)
34 |         np_Y = np_Y / np.sum(np_Y, axis=0)[np.newaxis,:]
35 |         np_Y = np.log(np_Y)
36 | 
37 |         assert Y.shape == np_X.shape
38 |         assert np.all(close_float(Y.value, np_Y))
39 | 
40 | def test_logsoftmax_grad_1():
41 |     npr.seed(3)
42 | 
43 |     for ii in xrange(NUM_TRIALS):
44 | 
45 |         np_X = npr.randn(5,6)
46 |         X    = kayak.Parameter(np_X)
47 |         Y    = kayak.LogSoftMax(X)
48 |         Z    = kayak.MatSum(Y)
49 | 
50 |         assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF
51 |         
52 | def test_logsoftmax_grad_2():
53 |     npr.seed(4)
54 | 
55 |     for ii in xrange(NUM_TRIALS):
56 | 
57 |         np_X = npr.randn(5,6)
58 |         X    = kayak.Parameter(np_X)
59 |         Y    = kayak.LogSoftMax(X, axis=0)
60 |         Z    = kayak.MatSum(Y)
61 | 
62 |         assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF
63 | 
64 | def test_logsoftmax_grad_3():
65 |     npr.seed(5)
66 | 
67 |     for ii in xrange(NUM_TRIALS):
68 | 
69 |         np_X = npr.randn(5,6)
70 |         np_T = npr.randint(0, 10, np_X.shape)
71 |         X    = kayak.Parameter(np_X)
72 |         T    = kayak.Targets(np_T)
73 |         Y    = kayak.LogSoftMax(X)
74 |         Z    = kayak.MatSum(kayak.LogMultinomialLoss(Y, T))
75 | 
76 |         assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF
77 |         
78 | 


--------------------------------------------------------------------------------
/tests/test_Logistic.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | from nose.tools import assert_less
 8 | 
 9 | def test_logistic_values():
10 |     npr.seed(1)
11 | 
12 |     for ii in xrange(NUM_TRIALS):
13 |         np_X = npr.randn(6,5)
14 |         X    = kayak.Parameter(np_X)
15 |         Y    = kayak.Logistic(X)
16 | 
17 |         assert np.all(close_float(1.0/(1.0+np.exp(-np_X)), Y.value))
18 |         
19 | def test_logistic_grad():
20 |     npr.seed(2)
21 | 
22 |     for ii in xrange(NUM_TRIALS):
23 |         np_X = npr.randn(6,5)
24 |         X    = kayak.Parameter(np_X)
25 |         Y    = kayak.Logistic(X)
26 |         Z    = kayak.MatSum(Y)
27 | 
28 |         Z.value
29 |         assert np.all( Z.grad(X) >= 0.0 )
30 |         assert_less(kayak.util.checkgrad(X, Z), MAX_GRAD_DIFF)
31 | 


--------------------------------------------------------------------------------
/tests/test_MatAdd.py:
--------------------------------------------------------------------------------
  1 | import numpy        as np
  2 | import numpy.random as npr
  3 | 
  4 | import kayak
  5 | 
  6 | from . import *
  7 | 
  8 | # These behaviors requires prepending singeltons. Do we want to keep them?
  9 | # def test_0d_plus_2d_scalar_value():
 10 | #     npr.seed(1)
 11 | 
 12 | #     for ii in xrange(NUM_TRIALS):
 13 | #         npX1 = npr.randn(1, 1)
 14 | #         X1 = kayak.Parameter( npX1 )
 15 | #         npX2 = np.sum(npr.randn()) # generates a scalar with shape ()
 16 | #         X2= kayak.Parameter( npX2 )
 17 | #         Y = kayak.MatAdd(X1, X2)
 18 | 
 19 | #         # Verify that a scalar is reproduced.
 20 | #         assert close_float(Y.value, npX1 + npX2)
 21 | 
 22 | 
 23 | # def test_0d_plus_2d_scalar_grad():
 24 | #     npr.seed(2)
 25 | #     for ii in xrange(NUM_TRIALS):
 26 | #         npX1 = npr.randn(1, 1)
 27 | #         X1 = kayak.Parameter( npX1 )
 28 | #         npX2 = np.sum(npr.randn()) # generates a scalar with shape ()
 29 | #         X2= kayak.Parameter( npX2 )
 30 | #         Y = kayak.MatAdd(X1, X2)
 31 | 
 32 | #         # Verify that the gradient is one.
 33 | #         assert Y.grad(X1) == 1.0
 34 | #         assert Y.grad(X2) == 1.0
 35 | #         assert kayak.util.checkgrad(X1, Y) < MAX_GRAD_DIFF
 36 | #         assert kayak.util.checkgrad(X2, Y) < MAX_GRAD_DIFF
 37 | 
 38 | def test_matadd_values_1():
 39 |     npr.seed(1)
 40 | 
 41 |     for ii in xrange(NUM_TRIALS):
 42 |         
 43 |         np_A = npr.randn(5,6)
 44 |         np_B = npr.randn(5,6)
 45 |         A    = kayak.Parameter(np_A)
 46 |         B    = kayak.Parameter(np_B)
 47 |         C    = kayak.MatAdd(A, B)
 48 | 
 49 |         assert C.shape == np_A.shape
 50 |         assert np.all( close_float(C.value, np_A+np_B))
 51 | 
 52 | def test_matadd_values_2():
 53 |     npr.seed(2)
 54 | 
 55 |     for ii in xrange(NUM_TRIALS):
 56 |         
 57 |         np_A = npr.randn(5,6)
 58 |         np_B = npr.randn(5,6)
 59 |         np_C = npr.randn(5,6)
 60 |         A    = kayak.Parameter(np_A)
 61 |         B    = kayak.Parameter(np_B)
 62 |         C    = kayak.Parameter(np_C)
 63 |         D    = kayak.MatAdd(A, B, C)
 64 | 
 65 |         assert D.shape == np_A.shape
 66 |         assert np.all( close_float(D.value, np_A+np_B+np_C))
 67 | 
 68 | def test_matadd_values_3():
 69 |     npr.seed(3)
 70 | 
 71 |     for ii in xrange(NUM_TRIALS):
 72 |         
 73 |         np_A = npr.randn(5,6)
 74 |         np_B = npr.randn(1,6)
 75 |         A    = kayak.Parameter(np_A)
 76 |         B    = kayak.Parameter(np_B)
 77 |         C    = kayak.MatAdd(A, B)
 78 | 
 79 |         assert C.shape == (5,6)
 80 |         assert np.all( close_float(C.value, np_A+np_B))
 81 | 
 82 | def test_matadd_values_4():
 83 |     npr.seed(4)
 84 | 
 85 |     for ii in xrange(NUM_TRIALS):
 86 |         
 87 |         np_A = npr.randn(5,6)
 88 |         np_B = npr.randn(5,1)
 89 |         A    = kayak.Parameter(np_A)
 90 |         B    = kayak.Parameter(np_B)
 91 |         C    = kayak.MatAdd(A, B)
 92 | 
 93 |         assert C.shape == (5,6)
 94 |         assert np.all( close_float(C.value, np_A+np_B))
 95 | 
 96 | def test_matadd_values_5():
 97 |     npr.seed(5)
 98 | 
 99 |     for ii in xrange(NUM_TRIALS):
100 |         
101 |         np_A = npr.randn(1,6)
102 |         np_B = npr.randn(5,1)
103 |         A    = kayak.Parameter(np_A)
104 |         B    = kayak.Parameter(np_B)
105 |         C    = kayak.MatAdd(A, B)
106 | 
107 |         assert C.shape == (5,6)
108 |         assert np.all( close_float(C.value, np_A+np_B))
109 | 
110 | def test_matadd_values_6():
111 |     npr.seed(6)
112 | 
113 |     for ii in xrange(NUM_TRIALS):
114 |         
115 |         np_A = npr.randn(5,6)
116 |         np_B = npr.randn(1,1)
117 |         A    = kayak.Parameter(np_A)
118 |         B    = kayak.Parameter(np_B)
119 |         C    = kayak.MatAdd(A, B)
120 | 
121 |         assert C.shape == (5,6)
122 |         assert np.all( close_float(C.value, np_A+np_B))
123 | 
124 | def test_matadd_values_7():
125 |     npr.seed(7)
126 | 
127 |     for ii in xrange(NUM_TRIALS):
128 |         
129 |         np_A = npr.randn(5,6)
130 |         np_B = npr.randn(5,6)
131 |         A    = kayak.Parameter(np_A)
132 |         B    = kayak.Parameter(np_B)
133 |         D    = kayak.MatAdd(A, B, A)
134 | 
135 |         assert D.shape == (5,6)
136 |         assert np.all( close_float(D.value, 2*np_A + np_B))
137 | 
138 | def test_matadd_grad_1():
139 |     npr.seed(8)
140 | 
141 |     for ii in xrange(NUM_TRIALS):
142 |         
143 |         np_A = npr.randn(5,6)
144 |         np_B = npr.randn(5,6)
145 |         A    = kayak.Parameter(np_A)
146 |         B    = kayak.Parameter(np_B)
147 |         C    = kayak.MatAdd(A, B)
148 |         D    = kayak.MatSum(C)
149 | 
150 |         D.value
151 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
152 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
153 | 
154 | def test_matadd_grad_2():
155 |     npr.seed(9)
156 | 
157 |     for ii in xrange(NUM_TRIALS):
158 |         
159 |         np_A = npr.randn(5,6)
160 |         np_B = npr.randn(5,6)
161 |         np_C = npr.randn(5,6)
162 |         A    = kayak.Parameter(np_A)
163 |         B    = kayak.Parameter(np_B)
164 |         C    = kayak.Parameter(np_C)
165 |         D    = kayak.MatAdd(A, B, C)
166 |         E    = kayak.MatSum(D)
167 | 
168 |         E.value
169 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
170 |         assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF
171 |         assert kayak.util.checkgrad(C, E) < MAX_GRAD_DIFF
172 | 
173 | def test_matadd_grad_3():
174 |     npr.seed(10)
175 | 
176 |     for ii in xrange(NUM_TRIALS):
177 |         
178 |         np_A = npr.randn(5,6)
179 |         np_B = npr.randn(1,6)
180 |         A    = kayak.Parameter(np_A)
181 |         B    = kayak.Parameter(np_B)
182 |         C    = kayak.MatAdd(A, B)
183 |         D    = kayak.MatSum(C)
184 | 
185 |         D.value
186 |         print np_A.shape, D.grad(A).shape
187 |         print np_B.shape, D.grad(B).shape
188 |         assert D.grad(A).shape == np_A.shape
189 |         assert D.grad(B).shape == np_B.shape
190 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
191 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
192 | 
193 | def test_matadd_grad_4():
194 |     npr.seed(11)
195 | 
196 |     for ii in xrange(NUM_TRIALS):
197 |         
198 |         np_A = npr.randn(5,1)
199 |         np_B = npr.randn(5,6)
200 |         A    = kayak.Parameter(np_A)
201 |         B    = kayak.Parameter(np_B)
202 |         C    = kayak.MatAdd(A, B)
203 |         D    = kayak.MatSum(C)
204 | 
205 |         D.value
206 |         assert D.grad(A).shape == np_A.shape
207 |         assert D.grad(B).shape == np_B.shape
208 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
209 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
210 | 
211 | def test_matadd_grad_5():
212 |     npr.seed(12)
213 | 
214 |     for ii in xrange(NUM_TRIALS):
215 |         
216 |         np_A = npr.randn(5,1)
217 |         np_B = npr.randn(1,6)
218 |         A    = kayak.Parameter(np_A)
219 |         B    = kayak.Parameter(np_B)
220 |         C    = kayak.MatAdd(A, B)
221 |         D    = kayak.MatSum(C)
222 | 
223 |         D.value
224 |         assert D.grad(A).shape == np_A.shape
225 |         assert D.grad(B).shape == np_B.shape
226 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
227 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
228 | 
229 | def test_matadd_grad_6():
230 |     npr.seed(13)
231 | 
232 |     for ii in xrange(NUM_TRIALS):
233 |         
234 |         np_A = npr.randn(5,6)
235 |         np_B = npr.randn(1,1)
236 |         A    = kayak.Parameter(np_A)
237 |         B    = kayak.Parameter(np_B)
238 |         C    = kayak.MatAdd(A, B)
239 |         D    = kayak.MatSum(C)
240 | 
241 |         D.value
242 |         assert D.grad(A).shape == np_A.shape
243 |         assert D.grad(B).shape == np_B.shape
244 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
245 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
246 | 
247 | def test_matadd_grad_7():
248 |     npr.seed(14)
249 | 
250 |     for ii in xrange(NUM_TRIALS):
251 |         
252 |         np_A = npr.randn(5,6)
253 |         np_B = npr.randn(5,6)
254 |         A    = kayak.Parameter(np_A)
255 |         B    = kayak.Parameter(np_B)
256 |         D    = kayak.MatAdd(A, B, A)
257 |         E    = kayak.MatSum(D)
258 | 
259 |         E.value
260 |         assert E.grad(A).shape == np_A.shape
261 |         assert E.grad(B).shape == np_B.shape
262 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
263 |         assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF
264 | 
265 | def test_matadd_grad_8():
266 |     npr.seed(15)
267 | 
268 |     for ii in xrange(NUM_TRIALS):
269 |         
270 |         np_A = npr.randn(5,6)
271 |         np_B = npr.randn(5,6)
272 |         A    = kayak.Parameter(np_A)
273 |         D    = kayak.MatAdd(A, A)
274 |         E    = kayak.MatSum(D)
275 | 
276 |         E.value
277 |         assert E.grad(A).shape == np_A.shape
278 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
279 | 


--------------------------------------------------------------------------------
/tests/test_MatConcat.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_matconcat_values_1():
 9 |     npr.seed(1)
10 | 
11 |     for ii in xrange(NUM_TRIALS):
12 |         
13 |         np_A = npr.randn(5,6)
14 |         np_B = npr.randn(5,6)
15 |         A    = kayak.Parameter(np_A)
16 |         B    = kayak.Parameter(np_B)
17 | 
18 |         C    = kayak.Concatenate(0, A, B)
19 |         assert C.value.shape == (10,6)
20 | 
21 |         C    = kayak.Concatenate(1, A, B)
22 |         assert C.value.shape == (5,12)
23 | 
24 | 
25 | 
26 | def test_matconcat_grad_1():
27 |     npr.seed(3)
28 | 
29 |     for ii in xrange(NUM_TRIALS):
30 |         
31 |         np_A = npr.randn(5,6)
32 |         np_B = npr.randn(5,6)
33 |         A    = kayak.Parameter(np_A)
34 |         B    = kayak.Parameter(np_B)
35 |         C    = kayak.Concatenate(0, A, B)
36 |         D    = kayak.MatSum(C)
37 | 
38 |         D.value
39 |         assert D.grad(A).shape == (5,6)
40 |         assert D.grad(B).shape == (5,6)
41 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
42 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
43 | 
44 | 
45 | def test_matconcat_grad_2():
46 |     npr.seed(3)
47 | 
48 |     for ii in xrange(NUM_TRIALS):
49 | 
50 |         np_A = npr.randn(5,6)
51 |         np_B = npr.randn(5,6)
52 |         A    = kayak.Parameter(np_A)
53 |         B    = kayak.Parameter(np_B)
54 |         C    = kayak.Concatenate(1, A, B)
55 |         D    = kayak.MatSum(C)
56 | 
57 |         D.value
58 |         assert D.grad(A).shape == (5,6)
59 |         assert D.grad(B).shape == (5,6)
60 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
61 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
62 | 
63 | 
64 | def test_matconcat_grad_3():
65 |     npr.seed(3)
66 | 
67 |     for ii in xrange(NUM_TRIALS):
68 | 
69 |         np_A = npr.randn(5,6)
70 |         A    = kayak.Parameter(np_A)
71 |         C    = kayak.Concatenate(0, A, A)
72 |         D    = kayak.MatSum(C)
73 | 
74 |         D.value
75 |         assert D.grad(A).shape == (5,6)
76 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
77 | 
78 | def test_matconcat_grad_4():
79 |     npr.seed(3)
80 | 
81 |     for ii in xrange(NUM_TRIALS):
82 |         
83 |         np_A = npr.randn(5,6)
84 |         np_B = npr.randn(5,3)
85 |         np_C = npr.randn(5,7)
86 |         A    = kayak.Parameter(np_A)
87 |         B    = kayak.Parameter(np_B)
88 |         C    = kayak.Parameter(np_C)
89 |         D    = kayak.Concatenate(1, A, B, C)
90 |         E    = kayak.MatSum(D)
91 | 
92 |         assert E.grad(A).shape == (5,6)
93 |         assert E.grad(B).shape == (5,3)
94 |         assert E.grad(C).shape == (5,7)
95 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
96 |         assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF
97 |         assert kayak.util.checkgrad(C, E) < MAX_GRAD_DIFF
98 | 


--------------------------------------------------------------------------------
/tests/test_MatDet.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | import scipy.linalg as spla
 4 | import kayak
 5 | 
 6 | from . import *
 7 | from nose.tools import assert_less, assert_equal
 8 | 
 9 | def test_matdet_values_1():
10 |     npr.seed(1)
11 | 
12 |     for ii in xrange(NUM_TRIALS):
13 |         
14 |         np_A = npr.randn(12,6)
15 |         A    = np.dot(np_A.T, np_A) + 1e-6*np.eye(6)
16 |         B    = kayak.Parameter(A)
17 |         D    = kayak.MatDet(B)
18 | 
19 |         assert_less((D.value - spla.det(A))**2, 1e-6)
20 | 
21 | def test_matdet_grad_1():
22 |     npr.seed(1)
23 | 
24 |     for ii in xrange(NUM_TRIALS):
25 |         
26 |         np_A = npr.randn(12,6)
27 |         A    = np.dot(np_A.T, np_A) + 1e-6*np.eye(6)
28 |         B    = kayak.Parameter(A)
29 |         D    = kayak.MatDet(B)
30 | 
31 |         assert_less((D.value - spla.det(A))**2, 1e-6)
32 | 
33 |         assert_equal(D.grad(B).shape, B.shape)
34 |         assert_less(kayak.util.checkgrad(B, D), MAX_GRAD_DIFF)
35 | 
36 | 


--------------------------------------------------------------------------------
/tests/test_MatMean.py:
--------------------------------------------------------------------------------
  1 | import numpy        as np
  2 | import numpy.random as npr
  3 | 
  4 | import kayak
  5 | from nose.tools import assert_less
  6 | 
  7 | from . import *
  8 | 
  9 | def test_scalar_value():
 10 |     npr.seed(1)
 11 | 
 12 |     for ii in xrange(NUM_TRIALS):
 13 |         npX = npr.randn()
 14 |         X = kayak.Parameter( npX )
 15 |         Y = kayak.MatMean(X)
 16 | 
 17 |         # Verify that a scalar is reproduced.
 18 |         assert close_float(Y.value, npX)
 19 | 
 20 | def test_scalar_grad():
 21 |     npr.seed(2)
 22 | 
 23 |     for ii in xrange(NUM_TRIALS):
 24 |         npX = npr.randn()
 25 |         X = kayak.Parameter( npX )
 26 |         Y = kayak.MatMean(X)
 27 | 
 28 |         # Verify that the gradient is one.
 29 |         Y.value
 30 |         assert Y.grad(X) == 1.0
 31 |         assert_less(kayak.util.checkgrad(X, Y), MAX_GRAD_DIFF)
 32 | 
 33 | def test_vector_value_1():
 34 |     npr.seed(3)
 35 | 
 36 |     for ii in xrange(NUM_TRIALS):
 37 |         npX = npr.randn(10,1)
 38 |         X = kayak.Parameter( npX )
 39 |         Y = kayak.MatMean(X)
 40 |         # Verify the sum.
 41 |         assert close_float(Y.value, np.mean(npX))
 42 | 
 43 | def test_vector_grad_1():
 44 |     npr.seed(4)
 45 | 
 46 |     for ii in xrange(NUM_TRIALS):
 47 |         npX = npr.randn(10,1)
 48 |         X = kayak.Parameter( npX )
 49 |         Y = kayak.MatMean(X)
 50 | 
 51 |         # Verify the gradient.
 52 |         Y.value
 53 |         assert Y.grad(X).shape == npX.shape
 54 |         assert np.all(close_float(Y.grad(X), 1.0/float(npX.size) * np.ones(npX.shape)))
 55 |         assert_less(kayak.util.checkgrad(X, Y), MAX_GRAD_DIFF)
 56 | 
 57 | def test_vector_value_2():
 58 |     npr.seed(5)
 59 | 
 60 |     for ii in xrange(NUM_TRIALS):
 61 |         npX = npr.randn(1,10)
 62 |         X = kayak.Parameter( npX )
 63 |         Y = kayak.MatMean(X)
 64 | 
 65 |         # Verify the sum.
 66 |         assert close_float(Y.value, np.mean(npX))
 67 | 
 68 | def test_vector_grad_2():
 69 |     npr.seed(6)
 70 | 
 71 |     for ii in xrange(NUM_TRIALS):
 72 |         npX = npr.randn(1,10)
 73 |         X = kayak.Parameter( npX )
 74 |         Y = kayak.MatMean(X)
 75 | 
 76 |         # Verify the gradient.
 77 |         Y.value
 78 |         assert Y.grad(X).shape == npX.shape
 79 |         assert np.all(close_float(Y.grad(X), 1.0/float(np.prod(npX.shape)) * np.ones(npX.shape)))
 80 |         assert_less(kayak.util.checkgrad(X, Y), MAX_GRAD_DIFF)
 81 | 
 82 | def test_matrix_value():
 83 |     npr.seed(7)
 84 | 
 85 |     for ii in xrange(NUM_TRIALS):
 86 |         npX = npr.randn(10,20)
 87 |         X   = kayak.Parameter( npX )
 88 |         Y   = kayak.MatMean(X)
 89 | 
 90 |         # Verify the value.
 91 |         assert close_float(Y.value, np.mean(npX))
 92 | 
 93 | def test_matrix_grad():
 94 |     npr.seed(8)
 95 | 
 96 |     for ii in xrange(NUM_TRIALS):
 97 |         npX = npr.randn(10,20)
 98 |         X   = kayak.Parameter( npX )
 99 |         Y   = kayak.MatMean(X)
100 | 
101 |         # Verify the value.
102 |         Y.value
103 |         assert Y.grad(X).shape == npX.shape
104 |         assert np.all(close_float(Y.grad(X), 1.0/float(np.prod(npX.shape)) * np.ones(npX.shape)))
105 |         assert_less(kayak.util.checkgrad(X, Y), MAX_GRAD_DIFF)
106 | 
107 | def test_nested_value_1():
108 |     npr.seed(9)
109 | 
110 |     for ii in xrange(NUM_TRIALS):
111 |         npX = npr.randn(10,20)
112 |         X   = kayak.Parameter( npX )
113 |         Y   = kayak.MatMean(X, axis=0)
114 |         Z   = kayak.MatMean(Y)
115 | 
116 |         assert np.all(close_float(Y.value, np.mean(npX, axis=0)))
117 |         assert close_float(Z.value, np.mean(npX))
118 | 
119 | def test_nested_grad_1():
120 |     npr.seed(10)
121 | 
122 |     for ii in xrange(NUM_TRIALS):
123 |         npX = npr.randn(10,20)
124 |         X   = kayak.Parameter( npX )
125 |         Y   = kayak.MatMean(X, axis=0)
126 |         Z   = kayak.MatMean(Y)
127 | 
128 |         assert Z.grad(X).shape == npX.shape
129 |         assert np.all(close_float(Z.grad(X),1.0/float(np.prod(npX.shape)) * np.ones(npX.shape)))
130 |         assert_less(kayak.util.checkgrad(X, Z), MAX_GRAD_DIFF)
131 | 
132 | def test_nested_value_2():
133 |     npr.seed(11)
134 | 
135 |     for ii in xrange(NUM_TRIALS):
136 |         npX = npr.randn(10,20)
137 |         X   = kayak.Parameter( npX )
138 |         Y   = kayak.MatMean(X, axis=1)
139 |         Z   = kayak.MatMean(Y)
140 | 
141 |         assert np.all(close_float(Y.value.ravel(), np.mean(npX, axis=1)))
142 |         assert close_float(Z.value, np.mean(npX))
143 | 
144 | def test_nested_grad_2():
145 |     npr.seed(12)
146 | 
147 |     for ii in xrange(NUM_TRIALS):
148 |         npX = npr.randn(10,20)
149 |         X   = kayak.Parameter( npX )
150 |         Y   = kayak.MatMean(X, axis=1)
151 |         Z   = kayak.MatMean(Y)
152 | 
153 |         assert Z.grad(X).shape == npX.shape
154 |         assert np.all(close_float(Z.grad(X), 1.0/float(np.prod(npX.shape)) * np.ones(npX.shape)))
155 |         assert_less(kayak.util.checkgrad(X, Z), MAX_GRAD_DIFF)
156 | 
157 | def test_tensor_value_1():
158 |     npr.seed(13)
159 | 
160 |     for ii in xrange(NUM_TRIALS):
161 |         npX = npr.randn(10,20,30)
162 |         X   = kayak.Parameter( npX )
163 |         Y   = kayak.MatMean(X)
164 | 
165 |         assert X.shape == npX.shape
166 |         assert close_float(Y.value, np.mean(npX))
167 | 
168 | def test_tensor_value_2():
169 |     npr.seed(14)
170 | 
171 |     for ii in xrange(NUM_TRIALS):
172 |         npX = npr.randn(10,20,30)
173 |         X   = kayak.Parameter( npX )
174 |         Y   = kayak.MatMean(X, axis=2)
175 |         
176 |         assert np.all(close_float(Y.value, np.expand_dims(np.mean(npX, axis=2), axis=2)))
177 | 
178 | def test_tensor_value_3():
179 |     npr.seed(15)
180 | 
181 |     for ii in xrange(NUM_TRIALS):
182 |         npX = npr.randn(10,20,30)
183 |         X   = kayak.Parameter( npX )
184 |         Y   = kayak.MatMean(X, axis=1)
185 |         
186 |         assert np.all(close_float(Y.value, np.expand_dims(np.mean(npX, axis=1), axis=1)))
187 | 
188 | def test_tensor_value_4():
189 |     npr.seed(16)
190 | 
191 |     for ii in xrange(NUM_TRIALS):
192 |         npX = npr.randn(10,20,30)
193 |         X   = kayak.Parameter( npX )
194 |         Y   = kayak.MatMean(X, axis=0)
195 |         
196 |         assert np.all(close_float(Y.value, np.expand_dims(np.mean(npX, axis=0), axis=0)))
197 | 
198 | def test_keepdims_value_1():
199 |     npr.seed(9)
200 | 
201 |     for ii in xrange(NUM_TRIALS):
202 |         npX = npr.randn(10,20)
203 |         X   = kayak.Parameter( npX )
204 |         Y   = kayak.MatMean(X, axis=0, keepdims=False)
205 |         Z   = kayak.MatMean(Y)
206 | 
207 |         assert Y.shape == np.mean(npX, axis=0, keepdims=False).shape
208 |         assert np.all(close_float(Y.value, np.mean(npX, axis=0, keepdims=False)))
209 |         assert close_float(Z.value, np.mean(npX))
210 | 
211 | def test_keepdims_grad_1():
212 |     npr.seed(10)
213 | 
214 |     for ii in xrange(NUM_TRIALS):
215 |         npX = npr.randn(10,20)
216 |         X   = kayak.Parameter( npX )
217 |         Y   = kayak.MatMean(X, axis=0, keepdims=False)
218 |         Z   = kayak.MatMean(Y)
219 | 
220 |         assert Z.grad(X).shape == npX.shape
221 |         assert np.all(close_float(Z.grad(X), 1.0/float(np.prod(npX.shape)) * np.ones(npX.shape)))
222 |         assert_less(kayak.util.checkgrad(X, Z), MAX_GRAD_DIFF)
223 | 
224 | def test_keepdims_grad_2():
225 |     npr.seed(10)
226 | 
227 |     for ii in xrange(NUM_TRIALS):
228 |         npW = npr.randn(5,10,20)
229 |         npX = npr.randn(5,10,20)
230 |         W   = kayak.Parameter( npW )
231 |         X   = kayak.Parameter( npX )
232 |         Y   = W * X
233 |         Z   = kayak.MatMean(Y, axis=2, keepdims=False)
234 |         S   = kayak.MatMean(Z)
235 | 
236 |         assert S.grad(W).shape == npW.shape
237 |         assert_less(kayak.util.checkgrad(X, S), MAX_GRAD_DIFF)


--------------------------------------------------------------------------------
/tests/test_MatMult.py:
--------------------------------------------------------------------------------
  1 | import numpy        as np
  2 | import numpy.random as npr
  3 | 
  4 | import kayak
  5 | 
  6 | from . import *
  7 | 
  8 | def test_matmult_values_1():
  9 |     npr.seed(1)
 10 | 
 11 |     for ii in xrange(NUM_TRIALS):
 12 |         
 13 |         np_A = npr.randn(5,6)
 14 |         np_B = npr.randn(6,7)
 15 |         A    = kayak.Parameter(np_A)
 16 |         B    = kayak.Parameter(np_B)
 17 |         C    = kayak.MatMult(A, B)
 18 | 
 19 |         assert C.value.shape == (5,7)
 20 |         assert np.all(close_float(C.value, np.dot(np_A, np_B)))
 21 | 
 22 | def test_matmult_values_2():
 23 |     npr.seed(2)
 24 | 
 25 |     for ii in xrange(NUM_TRIALS):
 26 |         
 27 |         np_A = npr.randn(5,5)
 28 |         A    = kayak.Parameter(np_A)
 29 |         C    = kayak.MatMult(A, A)
 30 | 
 31 |         assert C.value.shape == (5,5)
 32 |         assert np.all(close_float(C.value, np.dot(np_A, np_A)))
 33 | 
 34 | def test_matmult_values_3():
 35 |     npr.seed(3)
 36 | 
 37 |     for ii in xrange(NUM_TRIALS):
 38 |         
 39 |         np_A = npr.randn(5,6)
 40 |         np_B = npr.randn(6,7)
 41 |         np_C = npr.randn(7,8)
 42 |         A    = kayak.Parameter(np_A)
 43 |         B    = kayak.Parameter(np_B)
 44 |         C    = kayak.Parameter(np_C)
 45 |         D    = kayak.MatMult(A, B, C)
 46 | 
 47 |         assert D.value.shape == (5,8)
 48 |         assert np.all(close_float(D.value, np.dot(np_A, np.dot(np_B, np_C))))
 49 | 
 50 | def test_matmult_grad_1():
 51 |     npr.seed(3)
 52 | 
 53 |     for ii in xrange(NUM_TRIALS):
 54 |         
 55 |         np_A = npr.randn(5,6)
 56 |         np_B = npr.randn(6,7)
 57 |         A    = kayak.Parameter(np_A)
 58 |         B    = kayak.Parameter(np_B)
 59 |         C    = kayak.MatMult(A, B)
 60 |         D    = kayak.MatSum(C)
 61 | 
 62 |         D.value
 63 |         assert D.grad(A).shape == (5,6)
 64 |         assert D.grad(B).shape == (6,7)
 65 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
 66 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
 67 | 
 68 | def test_matmult_grad_2():
 69 |     npr.seed(4)
 70 | 
 71 |     for ii in xrange(NUM_TRIALS):
 72 |         
 73 |         np_A = npr.randn(5,5)
 74 |         A    = kayak.Parameter(np_A)
 75 |         C    = kayak.MatMult(A, A)
 76 |         D    = kayak.MatSum(C)
 77 | 
 78 |         D.value
 79 |         assert D.grad(A).shape == (5,5)
 80 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
 81 | 
 82 | def test_matmult_grad_3():
 83 |     npr.seed(5)
 84 | 
 85 |     for ii in xrange(NUM_TRIALS):
 86 |         
 87 |         np_A = npr.randn(5,6)
 88 |         np_B = npr.randn(6,7)
 89 |         np_C = npr.randn(7,8)
 90 |         A    = kayak.Parameter(np_A)
 91 |         B    = kayak.Parameter(np_B)
 92 |         C    = kayak.Parameter(np_C)
 93 |         D    = kayak.MatMult(A, B, C)
 94 |         E    = kayak.MatSum(kayak.SoftReLU(D))
 95 | 
 96 |         assert E.grad(A).shape == (5,6)
 97 |         assert E.grad(B).shape == (6,7)
 98 |         assert E.grad(C).shape == (7,8)
 99 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
100 |         assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF
101 |         assert kayak.util.checkgrad(C, E) < MAX_GRAD_DIFF
102 | 
103 | def test_matmult_grad_mat_vect():
104 |     npr.seed(5)
105 | 
106 |     for ii in xrange(NUM_TRIALS):
107 |         
108 |         np_A = npr.randn(5,6)
109 |         np_B = npr.randn(6)
110 |         np_C = npr.randn(5,)
111 |         A    = kayak.Parameter(np_A)
112 |         B    = kayak.Parameter(np_B)
113 |         C    = kayak.Parameter(np_C)
114 |         D    = kayak.MatMult(A, B)
115 |         E    = kayak.MatSum(kayak.ElemMult(C, D))
116 | 
117 |         assert E.grad(A).shape == (5,6)
118 |         assert E.grad(B).shape == (6,)
119 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
120 |         assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF
121 | 
122 | def test_matmult_grad_vect_mat():
123 |     npr.seed(5)
124 | 
125 |     for ii in xrange(NUM_TRIALS):
126 |         
127 |         np_A = npr.randn(6,)
128 |         np_B = npr.randn(6,7)
129 |         np_C = npr.randn(7,)
130 |         A    = kayak.Parameter(np_A)
131 |         B    = kayak.Parameter(np_B)
132 |         C    = kayak.Parameter(np_C)
133 |         D    = kayak.MatMult(A, B)
134 |         E    = kayak.MatSum(kayak.ElemMult(C, D))
135 | 
136 |         assert E.grad(A).shape == (6,)
137 |         assert E.grad(B).shape == (6, 7)
138 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
139 |         assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF
140 | 


--------------------------------------------------------------------------------
/tests/test_MatSum.py:
--------------------------------------------------------------------------------
  1 | import numpy        as np
  2 | import numpy.random as npr
  3 | 
  4 | import kayak
  5 | 
  6 | from . import *
  7 | 
  8 | def test_scalar_value():
  9 |     npr.seed(1)
 10 | 
 11 |     for ii in xrange(NUM_TRIALS):
 12 |         npX = npr.randn()
 13 |         X = kayak.Parameter( npX )
 14 |         Y = kayak.MatSum(X)
 15 | 
 16 |         # Verify that a scalar is reproduced.
 17 |         assert close_float(Y.value, npX)
 18 | 
 19 | def test_scalar_grad():
 20 |     npr.seed(2)
 21 | 
 22 |     for ii in xrange(NUM_TRIALS):
 23 |         npX = npr.randn()
 24 |         X = kayak.Parameter( npX )
 25 |         Y = kayak.MatSum(X)
 26 | 
 27 |         # Verify that the gradient is one.
 28 |         Y.value
 29 |         assert Y.grad(X) == 1.0
 30 |         assert kayak.util.checkgrad(X, Y) < MAX_GRAD_DIFF
 31 | 
 32 | def test_vector_value_1():
 33 |     npr.seed(3)
 34 | 
 35 |     for ii in xrange(NUM_TRIALS):
 36 |         npX = npr.randn(10,1)
 37 |         X = kayak.Parameter( npX )
 38 |         Y = kayak.MatSum(X)
 39 |         # Verify the sum.
 40 |         assert close_float(Y.value, np.sum(npX))
 41 | 
 42 | def test_vector_grad_1():
 43 |     npr.seed(4)
 44 | 
 45 |     for ii in xrange(NUM_TRIALS):
 46 |         npX = npr.randn(10,1)
 47 |         X = kayak.Parameter( npX )
 48 |         Y = kayak.MatSum(X)
 49 | 
 50 |         # Verify the gradient.
 51 |         Y.value
 52 |         assert Y.grad(X).shape == npX.shape
 53 |         assert np.all(close_float(Y.grad(X), np.ones(npX.shape)))
 54 |         assert kayak.util.checkgrad(X, Y) < MAX_GRAD_DIFF
 55 | 
 56 | def test_vector_value_2():
 57 |     npr.seed(5)
 58 | 
 59 |     for ii in xrange(NUM_TRIALS):
 60 |         npX = npr.randn(1,10)
 61 |         X = kayak.Parameter( npX )
 62 |         Y = kayak.MatSum(X)
 63 | 
 64 |         # Verify the sum.
 65 |         assert close_float(Y.value, np.sum(npX))
 66 | 
 67 | def test_vector_grad_2():
 68 |     npr.seed(6)
 69 | 
 70 |     for ii in xrange(NUM_TRIALS):
 71 |         npX = npr.randn(1,10)
 72 |         X = kayak.Parameter( npX )
 73 |         Y = kayak.MatSum(X)
 74 | 
 75 |         # Verify the gradient.
 76 |         Y.value
 77 |         assert Y.grad(X).shape == npX.shape
 78 |         assert np.all(close_float(Y.grad(X), np.ones(npX.shape)))
 79 |         assert kayak.util.checkgrad(X, Y) < MAX_GRAD_DIFF
 80 | 
 81 | def test_matrix_value():
 82 |     npr.seed(7)
 83 | 
 84 |     for ii in xrange(NUM_TRIALS):
 85 |         npX = npr.randn(10,20)
 86 |         X   = kayak.Parameter( npX )
 87 |         Y   = kayak.MatSum(X)
 88 | 
 89 |         # Verify the value.
 90 |         assert close_float(Y.value, np.sum(npX))
 91 | 
 92 | def test_matrix_grad():
 93 |     npr.seed(8)
 94 | 
 95 |     for ii in xrange(NUM_TRIALS):
 96 |         npX = npr.randn(10,20)
 97 |         X   = kayak.Parameter( npX )
 98 |         Y   = kayak.MatSum(X)
 99 | 
100 |         # Verify the value.
101 |         Y.value
102 |         assert Y.grad(X).shape == npX.shape
103 |         assert np.all(close_float(Y.grad(X), np.ones(npX.shape)))
104 |         assert kayak.util.checkgrad(X, Y) < MAX_GRAD_DIFF
105 | 
106 | def test_nested_value_1():
107 |     npr.seed(9)
108 | 
109 |     for ii in xrange(NUM_TRIALS):
110 |         npX = npr.randn(10,20)
111 |         X   = kayak.Parameter( npX )
112 |         Y   = kayak.MatSum(X, axis=0)
113 |         Z   = kayak.MatSum(Y)
114 | 
115 |         assert np.all(close_float(Y.value, np.sum(npX, axis=0)))
116 |         assert close_float(Z.value, np.sum(npX))
117 | 
118 | def test_nested_grad_1():
119 |     npr.seed(10)
120 | 
121 |     for ii in xrange(NUM_TRIALS):
122 |         npX = npr.randn(10,20)
123 |         X   = kayak.Parameter( npX )
124 |         Y   = kayak.MatSum(X, axis=0)
125 |         Z   = kayak.MatSum(Y)
126 | 
127 |         assert Z.grad(X).shape == npX.shape
128 |         assert np.all(close_float(Z.grad(X), np.ones(npX.shape)))
129 |         assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF
130 | 
131 | def test_nested_value_2():
132 |     npr.seed(11)
133 | 
134 |     for ii in xrange(NUM_TRIALS):
135 |         npX = npr.randn(10,20)
136 |         X   = kayak.Parameter( npX )
137 |         Y   = kayak.MatSum(X, axis=1)
138 |         Z   = kayak.MatSum(Y)
139 | 
140 |         assert np.all(close_float(Y.value.ravel(), np.sum(npX, axis=1)))
141 |         assert close_float(Z.value, np.sum(npX))
142 | 
143 | def test_nested_grad_2():
144 |     npr.seed(12)
145 | 
146 |     for ii in xrange(NUM_TRIALS):
147 |         npX = npr.randn(10,20)
148 |         X   = kayak.Parameter( npX )
149 |         Y   = kayak.MatSum(X, axis=1)
150 |         Z   = kayak.MatSum(Y)
151 | 
152 |         assert Z.grad(X).shape == npX.shape
153 |         assert np.all(close_float(Z.grad(X), np.ones(npX.shape)))
154 |         assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF
155 | 
156 | def test_tensor_value_1():
157 |     npr.seed(13)
158 | 
159 |     for ii in xrange(NUM_TRIALS):
160 |         npX = npr.randn(10,20,30)
161 |         X   = kayak.Parameter( npX )
162 |         Y   = kayak.MatSum(X)
163 | 
164 |         assert X.shape == npX.shape
165 |         assert close_float(Y.value, np.sum(npX))
166 | 
167 | def test_tensor_value_2():
168 |     npr.seed(14)
169 | 
170 |     for ii in xrange(NUM_TRIALS):
171 |         npX = npr.randn(10,20,30)
172 |         X   = kayak.Parameter( npX )
173 |         Y   = kayak.MatSum(X, axis=2)
174 |         
175 |         assert np.all(close_float(Y.value, np.expand_dims(np.sum(npX, axis=2), axis=2)))
176 | 
177 | def test_tensor_value_3():
178 |     npr.seed(15)
179 | 
180 |     for ii in xrange(NUM_TRIALS):
181 |         npX = npr.randn(10,20,30)
182 |         X   = kayak.Parameter( npX )
183 |         Y   = kayak.MatSum(X, axis=1)
184 |         
185 |         assert np.all(close_float(Y.value, np.expand_dims(np.sum(npX, axis=1), axis=1)))
186 | 
187 | def test_tensor_value_4():
188 |     npr.seed(16)
189 | 
190 |     for ii in xrange(NUM_TRIALS):
191 |         npX = npr.randn(10,20,30)
192 |         X   = kayak.Parameter( npX )
193 |         Y   = kayak.MatSum(X, axis=0)
194 |         
195 |         assert np.all(close_float(Y.value, np.expand_dims(np.sum(npX, axis=0), axis=0)))
196 | 
197 | def test_keepdims_value_1():
198 |     npr.seed(9)
199 | 
200 |     for ii in xrange(NUM_TRIALS):
201 |         npX = npr.randn(10,20)
202 |         X   = kayak.Parameter( npX )
203 |         Y   = kayak.MatSum(X, axis=0, keepdims=False)
204 |         Z   = kayak.MatSum(Y)
205 | 
206 |         assert Y.shape == np.sum(npX, axis=0, keepdims=False).shape
207 |         assert np.all(close_float(Y.value, np.sum(npX, axis=0, keepdims=False)))
208 |         assert close_float(Z.value, np.sum(npX))
209 | 
210 | def test_keepdims_grad_1():
211 |     npr.seed(10)
212 | 
213 |     for ii in xrange(NUM_TRIALS):
214 |         npX = npr.randn(10,20)
215 |         X   = kayak.Parameter( npX )
216 |         Y   = kayak.MatSum(X, axis=0, keepdims=False)
217 |         Z   = kayak.MatSum(Y)
218 | 
219 |         assert Z.grad(X).shape == npX.shape
220 |         assert np.all(close_float(Z.grad(X), np.ones(npX.shape)))
221 |         assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF
222 | 
223 | def test_keepdims_grad_2():
224 |     npr.seed(10)
225 | 
226 |     for ii in xrange(NUM_TRIALS):
227 |         npW = npr.randn(5,10,20)
228 |         npX = npr.randn(5,10,20)
229 |         W   = kayak.Parameter( npW )
230 |         X   = kayak.Parameter( npX )
231 |         Y   = W * X
232 |         Z   = kayak.MatSum(Y, axis=2, keepdims=False)
233 |         S   = kayak.MatSum(Z)
234 | 
235 |         assert S.grad(W).shape == npW.shape
236 |         # assert np.all(close_float(Z.grad(X), np.ones(npX.shape)))
237 |         assert kayak.util.checkgrad(X, S) < MAX_GRAD_DIFF


--------------------------------------------------------------------------------
/tests/test_NExp.py:
--------------------------------------------------------------------------------
  1 | import numpy        as np
  2 | import numpy.random as npr
  3 | 
  4 | import kayak
  5 | 
  6 | from . import *
  7 | 
  8 | def test_scalar_value():
  9 |     npr.seed(1)
 10 | 
 11 |     for ii in xrange(NUM_TRIALS):
 12 |         np_X = npr.randn()
 13 |         
 14 |         X   = kayak.Parameter(np_X)
 15 |         out = kayak.NExp(X)
 16 | 
 17 |         assert close_float(out.value, 1.0 - np.exp(-np.abs(np_X)))
 18 | 
 19 | def test_scalar_grad():
 20 |     npr.seed(2)
 21 | 
 22 |     for ii in xrange(NUM_TRIALS):
 23 |         while True:
 24 |             np_X = npr.randn()
 25 |             if np.abs(np_X) > 0.1:
 26 |                 break
 27 |         
 28 |         X   = kayak.Parameter(np_X)
 29 |         out = kayak.NExp(X)
 30 | 
 31 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
 32 | 
 33 | def test_scalar_value_2():
 34 |     npr.seed(3)
 35 | 
 36 |     for ii in xrange(NUM_TRIALS):
 37 |         np_X = npr.randn()
 38 |         wt   = np.exp(npr.randn())
 39 |         
 40 |         X   = kayak.Parameter(np_X)
 41 |         out = kayak.NExp(X, weight=wt)
 42 | 
 43 |         assert close_float(out.value, wt * (1.0 - np.exp(-np.abs(np_X))))
 44 | 
 45 | def test_scalar_grad_2():
 46 |     npr.seed(4)
 47 | 
 48 |     for ii in xrange(NUM_TRIALS):
 49 |         while True:
 50 |             np_X = npr.randn()
 51 |             if np.abs(np_X) > 0.1:
 52 |                 break
 53 |         wt   = np.exp(npr.randn())
 54 |         
 55 |         X   = kayak.Parameter(np_X)
 56 |         out = kayak.NExp(X, weight=wt)
 57 | 
 58 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
 59 | 
 60 | def test_vector_value():
 61 |     npr.seed(5)
 62 | 
 63 |     for ii in xrange(NUM_TRIALS):
 64 |         np_X = npr.randn(10,1)
 65 |         wt   = np.exp(npr.randn())
 66 |         
 67 |         X   = kayak.Parameter(np_X)
 68 |         out = kayak.NExp(X, weight=wt)
 69 | 
 70 |         assert close_float(out.value, wt * np.sum(1.0 - np.exp(-np.abs(np_X))))
 71 | 
 72 | def test_vector_grad():
 73 |     npr.seed(6)
 74 | 
 75 |     for ii in xrange(NUM_TRIALS):
 76 |         while True:
 77 |             np_X = npr.randn()
 78 |             if np.all(np.abs(np_X) > 0.1):
 79 |                 break
 80 |         wt   = np.exp(npr.randn())
 81 |         
 82 |         X   = kayak.Parameter(np_X)
 83 |         out = kayak.NExp(X, weight=wt)
 84 | 
 85 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
 86 | 
 87 | def test_matrix_value():
 88 |     npr.seed(7)
 89 | 
 90 |     for ii in xrange(NUM_TRIALS):
 91 |         np_X = npr.randn(10,20)
 92 |         wt   = np.exp(npr.randn())
 93 |         
 94 |         X   = kayak.Parameter(np_X)
 95 |         out = kayak.NExp(X, weight=wt)
 96 | 
 97 |         assert close_float(out.value, wt * np.sum(1.0 - np.exp(-np.abs(np_X))))
 98 | 
 99 | def test_matrix_grad():
100 |     npr.seed(8)
101 | 
102 |     for ii in xrange(NUM_TRIALS):
103 |         while True:
104 |             np_X = npr.randn()
105 |             if np.all(np.abs(np_X) > 0.1):
106 |                 break
107 |         wt   = np.exp(npr.randn())
108 |         
109 |         X   = kayak.Parameter(np_X)
110 |         out = kayak.NExp(X, weight=wt)
111 | 
112 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
113 | 
114 | def test_tensor_value():
115 |     npr.seed(9)
116 | 
117 |     for ii in xrange(NUM_TRIALS):
118 |         np_X = npr.randn(10,20,5)
119 |         wt   = np.exp(npr.randn())
120 |         
121 |         X   = kayak.Parameter(np_X)
122 |         out = kayak.NExp(X, weight=wt)
123 | 
124 |         assert close_float(out.value, wt * np.sum(1.0 - np.exp(-np.abs(np_X))))
125 | 
126 | def test_tensor_grad():
127 |     npr.seed(10)
128 | 
129 |     for ii in xrange(NUM_TRIALS):
130 |         while True:
131 |             np_X = npr.randn()
132 |             if np.all(np.abs(np_X) > 0.1):
133 |                 break
134 |         wt   = np.exp(npr.randn())
135 |         
136 |         X   = kayak.Parameter(np_X)
137 |         out = kayak.NExp(X, weight=wt)
138 | 
139 |         assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
140 | 
141 | 


--------------------------------------------------------------------------------
/tests/test_OperatorAdd.py:
--------------------------------------------------------------------------------
  1 | import numpy        as np
  2 | import numpy.random as npr
  3 | 
  4 | import kayak
  5 | 
  6 | from . import *
  7 | # These behaviors requires prepending singeltons. Do we want to keep them?
  8 | # def test_0d_opplus_2d_scalar_value():
  9 | #     npr.seed(1)
 10 | 
 11 | #     for ii in xrange(NUM_TRIALS):
 12 | #         npX1 = npr.randn(1, 1)
 13 | #         X1 = kayak.Parameter( npX1 )
 14 | #         npX2 = np.sum(npr.randn()) # generates a scalar with shape ()
 15 | #         X2= kayak.Parameter( npX2 )
 16 | #         # Y = kayak.MatAdd(X1, X2)
 17 | #         Y = X1+X2
 18 | 
 19 | #         # Verify that a scalar is reproduced.
 20 | #         assert close_float(Y.value, npX1 + npX2)
 21 | 
 22 | # def test_0d_plus_2d_scalar_grad():
 23 | #     npr.seed(2)
 24 | #     for ii in xrange(NUM_TRIALS):
 25 | #         npX1 = npr.randn(1, 1)
 26 | #         X1 = kayak.Parameter( npX1 )
 27 | #         npX2 = np.sum(npr.randn()) # generates a scalar with shape ()
 28 | #         X2= kayak.Parameter( npX2 )
 29 | #         Y = X1+X2
 30 | 
 31 | #         # Verify that the gradient is one.
 32 | #         assert Y.grad(X1) == 1.0
 33 | #         assert Y.grad(X2) == 1.0
 34 | #         assert kayak.util.checkgrad(X1, Y) < MAX_GRAD_DIFF
 35 | #         assert kayak.util.checkgrad(X2, Y) < MAX_GRAD_DIFF
 36 | #
 37 | def test_matadd_values_1():
 38 |     npr.seed(1)
 39 | 
 40 |     for ii in xrange(NUM_TRIALS):
 41 | 
 42 |         np_A = npr.randn(5,6)
 43 |         np_B = npr.randn(5,6)
 44 |         A    = kayak.Parameter(np_A)
 45 |         B    = kayak.Parameter(np_B)
 46 |         C    = A+B
 47 | 
 48 |         assert C.shape == np_A.shape
 49 |         assert np.all( close_float(C.value, np_A+np_B))
 50 | 
 51 | def test_matadd_values_2():
 52 |     npr.seed(2)
 53 | 
 54 |     for ii in xrange(NUM_TRIALS):
 55 | 
 56 |         np_A = npr.randn(5,6)
 57 |         np_B = npr.randn(5,6)
 58 |         np_C = npr.randn(5,6)
 59 |         A    = kayak.Parameter(np_A)
 60 |         B    = kayak.Parameter(np_B)
 61 |         C    = kayak.Parameter(np_C)
 62 |         D    = A+B+C
 63 | 
 64 |         assert D.shape == np_A.shape
 65 |         assert np.all( close_float(D.value, np_A+np_B+np_C))
 66 | 
 67 | def test_matadd_values_3():
 68 |     npr.seed(3)
 69 | 
 70 |     for ii in xrange(NUM_TRIALS):
 71 | 
 72 |         np_A = npr.randn(5,6)
 73 |         np_B = npr.randn(1,6)
 74 |         A    = kayak.Parameter(np_A)
 75 |         B    = kayak.Parameter(np_B)
 76 |         C    = A+B
 77 | 
 78 |         assert C.shape == (5,6)
 79 |         assert np.all( close_float(C.value, np_A+np_B))
 80 | 
 81 | def test_matadd_values_4():
 82 |     npr.seed(4)
 83 | 
 84 |     for ii in xrange(NUM_TRIALS):
 85 | 
 86 |         np_A = npr.randn(5,6)
 87 |         np_B = npr.randn(5,1)
 88 |         A    = kayak.Parameter(np_A)
 89 |         B    = kayak.Parameter(np_B)
 90 |         C    = A+B
 91 | 
 92 |         assert C.shape == (5,6)
 93 |         assert np.all( close_float(C.value, np_A+np_B))
 94 | 
 95 | def test_matadd_values_5():
 96 |     npr.seed(5)
 97 | 
 98 |     for ii in xrange(NUM_TRIALS):
 99 | 
100 |         np_A = npr.randn(1,6)
101 |         np_B = npr.randn(5,1)
102 |         A    = kayak.Parameter(np_A)
103 |         B    = kayak.Parameter(np_B)
104 |         C    = A+B
105 | 
106 |         assert C.shape == (5,6)
107 |         assert np.all( close_float(C.value, np_A+np_B))
108 | 
109 | def test_matadd_values_6():
110 |     npr.seed(6)
111 | 
112 |     for ii in xrange(NUM_TRIALS):
113 | 
114 |         np_A = npr.randn(5,6)
115 |         np_B = npr.randn(1,1)
116 |         A    = kayak.Parameter(np_A)
117 |         B    = kayak.Parameter(np_B)
118 |         C    = A+B
119 | 
120 |         assert C.shape == (5,6)
121 |         assert np.all( close_float(C.value, np_A+np_B))
122 | 
123 | def test_matadd_values_7():
124 |     npr.seed(7)
125 | 
126 |     for ii in xrange(NUM_TRIALS):
127 | 
128 |         np_A = npr.randn(5,6)
129 |         np_B = npr.randn(5,6)
130 |         A    = kayak.Parameter(np_A)
131 |         B    = kayak.Parameter(np_B)
132 |         D    = A+B+A
133 | 
134 |         assert D.shape == (5,6)
135 |         assert np.all( close_float(D.value, 2*np_A + np_B))
136 | 
137 | def test_matadd_grad_1():
138 |     npr.seed(8)
139 | 
140 |     for ii in xrange(NUM_TRIALS):
141 | 
142 |         np_A = npr.randn(5,6)
143 |         np_B = npr.randn(5,6)
144 |         A    = kayak.Parameter(np_A)
145 |         B    = kayak.Parameter(np_B)
146 |         C    = A+B
147 |         D    = kayak.MatSum(C)
148 | 
149 |         D.value
150 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
151 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
152 | 
153 | def test_matadd_grad_2():
154 |     npr.seed(9)
155 | 
156 |     for ii in xrange(NUM_TRIALS):
157 | 
158 |         np_A = npr.randn(5,6)
159 |         np_B = npr.randn(5,6)
160 |         np_C = npr.randn(5,6)
161 |         A    = kayak.Parameter(np_A)
162 |         B    = kayak.Parameter(np_B)
163 |         C    = kayak.Parameter(np_C)
164 |         D    = A+B+C
165 |         E    = kayak.MatSum(D)
166 | 
167 |         E.value
168 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
169 |         assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF
170 |         assert kayak.util.checkgrad(C, E) < MAX_GRAD_DIFF
171 | 
172 | def test_matadd_grad_3():
173 |     npr.seed(10)
174 | 
175 |     for ii in xrange(NUM_TRIALS):
176 | 
177 |         np_A = npr.randn(5,6)
178 |         np_B = npr.randn(1,6)
179 |         A    = kayak.Parameter(np_A)
180 |         B    = kayak.Parameter(np_B)
181 |         C    = A+B
182 |         D    = kayak.MatSum(C)
183 | 
184 |         D.value
185 |         print np_A.shape, D.grad(A).shape
186 |         print np_B.shape, D.grad(B).shape
187 |         assert D.grad(A).shape == np_A.shape
188 |         assert D.grad(B).shape == np_B.shape
189 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
190 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
191 | 
192 | def test_matadd_grad_4():
193 |     npr.seed(11)
194 | 
195 |     for ii in xrange(NUM_TRIALS):
196 | 
197 |         np_A = npr.randn(5,1)
198 |         np_B = npr.randn(5,6)
199 |         A    = kayak.Parameter(np_A)
200 |         B    = kayak.Parameter(np_B)
201 |         C    = A+B
202 |         D    = kayak.MatSum(C)
203 | 
204 |         D.value
205 |         assert D.grad(A).shape == np_A.shape
206 |         assert D.grad(B).shape == np_B.shape
207 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
208 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
209 | 
210 | def test_matadd_grad_5():
211 |     npr.seed(12)
212 | 
213 |     for ii in xrange(NUM_TRIALS):
214 | 
215 |         np_A = npr.randn(5,1)
216 |         np_B = npr.randn(1,6)
217 |         A    = kayak.Parameter(np_A)
218 |         B    = kayak.Parameter(np_B)
219 |         C    = A+B
220 |         D    = kayak.MatSum(C)
221 | 
222 |         D.value
223 |         assert D.grad(A).shape == np_A.shape
224 |         assert D.grad(B).shape == np_B.shape
225 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
226 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
227 | 
228 | def test_matadd_grad_6():
229 |     npr.seed(13)
230 | 
231 |     for ii in xrange(NUM_TRIALS):
232 | 
233 |         np_A = npr.randn(5,6)
234 |         np_B = npr.randn(1,1)
235 |         A    = kayak.Parameter(np_A)
236 |         B    = kayak.Parameter(np_B)
237 |         C    = A+B
238 |         D    = kayak.MatSum(C)
239 | 
240 |         D.value
241 |         assert D.grad(A).shape == np_A.shape
242 |         assert D.grad(B).shape == np_B.shape
243 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
244 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
245 | 
246 | def test_matadd_grad_7():
247 |     npr.seed(14)
248 | 
249 |     for ii in xrange(NUM_TRIALS):
250 | 
251 |         np_A = npr.randn(5,6)
252 |         np_B = npr.randn(5,6)
253 |         A    = kayak.Parameter(np_A)
254 |         B    = kayak.Parameter(np_B)
255 |         D    = A+B+A
256 |         E    = kayak.MatSum(D)
257 | 
258 |         E.value
259 |         assert E.grad(A).shape == np_A.shape
260 |         assert E.grad(B).shape == np_B.shape
261 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
262 |         assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF
263 | 
264 | def test_matadd_grad_8():
265 |     npr.seed(15)
266 | 
267 |     for ii in xrange(NUM_TRIALS):
268 | 
269 |         np_A = npr.randn(5,6)
270 |         np_B = npr.randn(5,6)
271 |         A    = kayak.Parameter(np_A)
272 |         D    = A+A
273 |         E    = kayak.MatSum(D)
274 | 
275 |         E.value
276 |         assert E.grad(A).shape == np_A.shape
277 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
278 | 


--------------------------------------------------------------------------------
/tests/test_OperatorMult.py:
--------------------------------------------------------------------------------
  1 | import numpy        as np
  2 | import numpy.random as npr
  3 | 
  4 | import kayak
  5 | 
  6 | from . import *
  7 | 
  8 | def test_elemmult_values_1():
  9 |     npr.seed(1)
 10 | 
 11 |     for ii in xrange(NUM_TRIALS):
 12 |         
 13 |         np_A = npr.randn(5,6)
 14 |         np_B = npr.randn(5,6)
 15 |         A    = kayak.Parameter(np_A)
 16 |         B    = kayak.Parameter(np_B)
 17 |         C    = A*B
 18 | 
 19 |         assert C.shape == np_A.shape
 20 |         assert np.all( close_float(C.value, np_A*np_B))
 21 | 
 22 | def test_elemmult_values_2():
 23 |     npr.seed(2)
 24 | 
 25 |     for ii in xrange(NUM_TRIALS):
 26 |         
 27 |         np_A = npr.randn(5,6)
 28 |         np_B = npr.randn(5,6)
 29 |         np_C = npr.randn(5,6)
 30 |         A    = kayak.Parameter(np_A)
 31 |         B    = kayak.Parameter(np_B)
 32 |         C    = kayak.Parameter(np_C)
 33 |         D    = A*B*C
 34 | 
 35 |         assert D.shape == np_A.shape
 36 |         assert np.all( close_float(D.value, np_A*np_B*np_C))
 37 | 
 38 | def test_elemmult_values_3():
 39 |     npr.seed(7)
 40 | 
 41 |     for ii in xrange(NUM_TRIALS):
 42 |         
 43 |         np_A = npr.randn(5,6)
 44 |         np_B = npr.randn(5,6)
 45 |         A    = kayak.Parameter(np_A)
 46 |         B    = kayak.Parameter(np_B)
 47 |         D    = A*B*A
 48 | 
 49 |         assert D.shape == (5,6)
 50 |         assert np.all( close_float(D.value, np_A**2 * np_B))
 51 | 
 52 | def test_elemmult_values_4():
 53 |     npr.seed(1)
 54 | 
 55 |     for ii in xrange(NUM_TRIALS):
 56 | 
 57 |         np_A = npr.randn(5,6)
 58 |         np_B = npr.randn(5,1)
 59 |         A    = kayak.Parameter(np_A)
 60 |         B    = kayak.Parameter(np_B)
 61 |         C    = A*B
 62 | 
 63 |         assert C.shape == np_A.shape
 64 |         assert np.all( close_float(C.value, np_A*np_B))
 65 | 
 66 | def test_elemmult_values_5():
 67 |     npr.seed(2)
 68 | 
 69 |     for ii in xrange(NUM_TRIALS):
 70 | 
 71 |         np_A = npr.randn(5,1)
 72 |         np_B = npr.randn(1,6)
 73 |         np_C = npr.randn(1,1)
 74 |         A    = kayak.Parameter(np_A)
 75 |         B    = kayak.Parameter(np_B)
 76 |         C    = kayak.Parameter(np_C)
 77 |         D    = A*B*C
 78 | 
 79 |         assert D.shape == (5,6)
 80 |         assert np.all( close_float(D.value, np_A*np_B*np_C))
 81 | 
 82 | def test_elemmult_values_6():
 83 |     npr.seed(7)
 84 | 
 85 |     for ii in xrange(NUM_TRIALS):
 86 | 
 87 |         np_A = npr.randn(5,6)
 88 |         np_B = npr.randn(1, 1)
 89 |         A    = kayak.Parameter(np_A)
 90 |         B    = kayak.Parameter(np_B)
 91 |         D    = A*B*A
 92 | 
 93 |         assert D.shape == (5,6)
 94 |         assert np.all( close_float(D.value, np_A**2 * np_B))
 95 | 
 96 | def test_elemmult_grad_1():
 97 |     npr.seed(8)
 98 | 
 99 |     for ii in xrange(NUM_TRIALS):
100 |         
101 |         np_A = npr.randn(5,6)
102 |         np_B = npr.randn(5,6)
103 |         A    = kayak.Parameter(np_A)
104 |         B    = kayak.Parameter(np_B)
105 |         C    = A*B
106 |         D    = kayak.MatSum(C)
107 | 
108 |         D.value
109 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
110 |         assert kayak.util.checkgrad(B, D) < MAX_GRAD_DIFF
111 | 
112 | def test_elemmult_grad_2():
113 |     npr.seed(9)
114 | 
115 |     for ii in xrange(NUM_TRIALS):
116 |         
117 |         np_A = npr.randn(5,6)
118 |         np_B = npr.randn(5,6)
119 |         np_C = npr.randn(5,6)
120 |         A    = kayak.Parameter(np_A)
121 |         B    = kayak.Parameter(np_B)
122 |         C    = kayak.Parameter(np_C)
123 |         D    = A*B*C
124 |         E    = kayak.MatSum(D)
125 | 
126 |         E.value
127 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
128 |         assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF
129 |         assert kayak.util.checkgrad(C, E) < MAX_GRAD_DIFF
130 | 
131 | def test_elemmult_grad_3():
132 |     npr.seed(14)
133 | 
134 |     for ii in xrange(NUM_TRIALS):
135 |         
136 |         np_A = npr.randn(5,6)
137 |         np_B = npr.randn(5,6)
138 |         A    = kayak.Parameter(np_A)
139 |         B    = kayak.Parameter(np_B)
140 |         D    = A*B*A
141 |         E    = kayak.MatSum(D)
142 | 
143 |         E.value
144 |         assert E.grad(A).shape == np_A.shape
145 |         assert E.grad(B).shape == np_B.shape
146 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
147 |         assert kayak.util.checkgrad(B, E) < MAX_GRAD_DIFF
148 | 
149 | def test_elemmult_grad_4():
150 |     npr.seed(15)
151 | 
152 |     for ii in xrange(NUM_TRIALS):
153 |         
154 |         np_A = npr.randn(5,6)
155 |         np_B = npr.randn(5,6)
156 |         A    = kayak.Parameter(np_A)
157 |         D    = A*A
158 |         E    = kayak.MatSum(D)
159 | 
160 |         E.value
161 |         assert E.grad(A).shape == np_A.shape
162 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
163 | 


--------------------------------------------------------------------------------
/tests/test_OperatorNeg.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_neg_values_1():
 9 |     npr.seed(1)
10 | 
11 |     for ii in xrange(NUM_TRIALS):
12 |         
13 |         np_A = npr.randn(5,6)
14 |         A    = kayak.Parameter(np_A)
15 |         C    = -A
16 | 
17 |         assert C.shape == np_A.shape
18 |         assert np.all( close_float(C.value, -np_A))
19 | 
20 | def test_neg_values_2():
21 |     npr.seed(2)
22 | 
23 |     for ii in xrange(NUM_TRIALS):
24 |         
25 |         np_A = npr.randn(1)
26 |         A    = kayak.Parameter(np_A)
27 |         D    = -A
28 | 
29 |         assert D.shape == np_A.shape
30 |         assert np.all( close_float(D.value, -np_A))
31 | 
32 | def test_neg_grad_1():
33 |     npr.seed(8)
34 | 
35 |     for ii in xrange(NUM_TRIALS):
36 |         
37 |         np_A = npr.randn(5,6)
38 |         A    = kayak.Parameter(np_A)
39 |         C    = -A
40 |         D    = kayak.MatSum(C)
41 | 
42 |         D.value
43 |         assert kayak.util.checkgrad(A, D) < MAX_GRAD_DIFF
44 | 
45 | def test_neg_grad_2():
46 |     npr.seed(9)
47 | 
48 |     for ii in xrange(NUM_TRIALS):
49 |         
50 |         np_A = npr.randn(1)
51 |         A    = kayak.Parameter(np_A)
52 |         D    = -A
53 |         E    = kayak.MatSum(D)
54 | 
55 |         E.value
56 |         assert kayak.util.checkgrad(A, E) < MAX_GRAD_DIFF
57 | 


--------------------------------------------------------------------------------
/tests/test_Parameter.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HIPS/Kayak/1a7d4baa849bbd5a6f6d0486136169899cf25523/tests/test_Parameter.py


--------------------------------------------------------------------------------
/tests/test_Reshape.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_reshape_1():
 9 |     npr.seed(1)
10 | 
11 |     np_A = npr.randn(5,10)
12 |     A    = kayak.Parameter(np_A)
13 |     B    = kayak.Reshape(A, (25,2))
14 | 
15 |     B.value
16 |     assert B.shape == (25,2)
17 | 
18 | def test_reshape_2():
19 |     npr.seed(2)
20 | 
21 |     np_A = npr.randn(5,10)
22 |     A    = kayak.Parameter(np_A)
23 |     B    = kayak.Reshape(A, (2,25))
24 |     C    = kayak.Parameter(npr.randn(25,5))
25 |     D    = kayak.MatMult(B, C)
26 |     out  = kayak.MatSum(D)
27 | 
28 |     out.value
29 |     assert out.grad(A).shape == np_A.shape
30 |     assert kayak.util.checkgrad(A, out) < MAX_GRAD_DIFF
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/tests/test_SoftMax.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_softmax_values_1():
 9 |     npr.seed(1)
10 | 
11 |     for ii in xrange(NUM_TRIALS):
12 | 
13 |         np_X = npr.randn(5,6)
14 |         X    = kayak.Parameter(np_X)
15 |         Y    = kayak.SoftMax(X, axis=None)
16 | 
17 |         np_Y = np.exp(np_X)
18 |         # np_Y = np_Y / np.sum(np_Y, axis=1)[:,np.newaxis]
19 |         np_Y = np_Y / np.sum(np_Y)
20 | 
21 |         assert Y.shape == np_X.shape
22 |         assert np.all(close_float(Y.value, np_Y))
23 | 
24 | def test_softmax_values_2():
25 |     npr.seed(2)
26 | 
27 |     for ii in xrange(NUM_TRIALS):
28 | 
29 |         np_X = npr.randn(5,6)
30 |         X    = kayak.Parameter(np_X)
31 |         Y    = kayak.SoftMax(X, axis=0)
32 | 
33 |         np_Y = np.exp(np_X)
34 |         np_Y = np_Y / np.sum(np_Y, axis=0, keepdims=True)
35 | 
36 |         assert Y.shape == np_X.shape
37 |         assert np.all(close_float(Y.value, np_Y))
38 | 
39 | def test_softmax_grad_1():
40 |     npr.seed(3)
41 | 
42 |     for ii in xrange(NUM_TRIALS):
43 | 
44 |         np_X = npr.randn(5,6)
45 |         X    = kayak.Parameter(np_X)
46 |         Y    = kayak.SoftMax(X)
47 |         Z    = kayak.MatSum(Y * Y)
48 | 
49 |         assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF
50 |         
51 | def test_softmax_grad_2():
52 |     npr.seed(4)
53 | 
54 |     for ii in xrange(NUM_TRIALS):
55 | 
56 |         np_X = npr.randn(5,6)
57 |         X    = kayak.Parameter(np_X)
58 |         Y    = kayak.SoftMax(X, axis=0)
59 |         Z    = kayak.MatSum(Y * Y)
60 | 
61 |         assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF
62 | 
63 | def test_softmax_grad_3():
64 |     npr.seed(5)
65 | 
66 |     for ii in xrange(NUM_TRIALS):
67 | 
68 |         np_X = npr.randn(5,6)
69 |         np_T = npr.randint(0, 10, np_X.shape)
70 |         X    = kayak.Parameter(np_X)
71 |         T    = kayak.Targets(np_T)
72 |         Y    = kayak.SoftMax(X)
73 |         Z    = kayak.MatSum(kayak.LogMultinomialLoss(Y, T))
74 | 
75 |         assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF
76 |         
77 | 


--------------------------------------------------------------------------------
/tests/test_SoftReLU.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_softrelu_values():
 9 |     npr.seed(1)
10 | 
11 |     for ii in xrange(NUM_TRIALS):
12 |         np_X = npr.randn(6,5)
13 |         X    = kayak.Parameter(np_X)
14 |         Y    = kayak.SoftReLU(X)
15 | 
16 |         assert np.all( Y.value >= 0.0 )
17 |         assert np.all(close_float(np.log(1.0 + np.exp(np_X)), Y.value))
18 |         
19 | def test_softrelu_grad():
20 |     npr.seed(2)
21 | 
22 |     for ii in xrange(NUM_TRIALS):
23 |         np_X = npr.randn(6,5)
24 |         X    = kayak.Parameter(np_X)
25 |         Y    = kayak.SoftReLU(X)
26 |         Z    = kayak.MatSum(Y)
27 | 
28 |         Z.value
29 |         assert np.all( Z.grad(X) >= 0.0 )
30 |         assert kayak.util.checkgrad(X, Z) < MAX_GRAD_DIFF
31 | 


--------------------------------------------------------------------------------
/tests/test_Stacking.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | from nose.tools import assert_less
 8 | 
 9 | def test_stacking_values():
10 |     npr.seed(1)
11 | 
12 |     for ii in xrange(NUM_TRIALS):
13 |         np_A = npr.randn(6,10)
14 |         np_B = npr.randn(6,5)
15 |         A    = kayak.Parameter(np_A)
16 |         B    = kayak.Parameter(np_B)
17 |         Y    = kayak.Hstack(A,B)
18 | 
19 |         assert(np.array_equal(Y.value[:, :A.shape[1]], np_A))
20 |         assert(np.array_equal(Y.value[:, A.shape[1]:], np_B))
21 | 
22 | def test_stacking_grad():
23 |     npr.seed(2)
24 | 
25 |     for ii in xrange(NUM_TRIALS):
26 |         np_A = npr.randn(6,10)
27 |         np_B = npr.randn(6,5)
28 |         A    = kayak.Parameter(np_A)
29 |         B    = kayak.Parameter(np_B)
30 |         Y    = kayak.Hstack(A, B)
31 |         Z    = kayak.MatSum(Y)
32 | 
33 |         Z.value
34 |         assert_less(kayak.util.checkgrad(A, Z), MAX_GRAD_DIFF)
35 |         assert_less(kayak.util.checkgrad(B, Z), MAX_GRAD_DIFF)
36 | 


--------------------------------------------------------------------------------
/tests/test_TanH.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | from nose.tools import assert_less
 8 | 
 9 | def test_tanh_values():
10 |     npr.seed(1)
11 | 
12 |     for ii in xrange(NUM_TRIALS):
13 |         np_X = npr.randn(6,5)
14 |         X    = kayak.Parameter(np_X)
15 |         Y    = kayak.TanH(X)
16 | 
17 |         assert np.all(close_float(np.tanh(np_X), Y.value))
18 |         
19 | def test_tanh_grad():
20 |     npr.seed(2)
21 | 
22 |     for ii in xrange(NUM_TRIALS):
23 |         np_X = npr.randn(6,5)
24 |         X    = kayak.Parameter(np_X)
25 |         Y    = kayak.TanH(X)
26 |         Z    = kayak.MatSum(Y)
27 | 
28 |         Z.value
29 |         assert np.all( Z.grad(X) >= 0.0 )
30 |         assert_less(kayak.util.checkgrad(X, Z), MAX_GRAD_DIFF)
31 | 


--------------------------------------------------------------------------------
/tests/test_Targets.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HIPS/Kayak/1a7d4baa849bbd5a6f6d0486136169899cf25523/tests/test_Targets.py


--------------------------------------------------------------------------------
/tests/test_TensorMult.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def check_tensormult(A_shape, B_shape, axes):
 9 | 
10 |     np_A = npr.randn(*A_shape)
11 |     np_B = npr.randn(*B_shape)
12 |     A = kayak.Parameter(np_A)
13 |     B = kayak.Parameter(np_B)
14 |     C = kayak.TensorMult(A, B, axes)
15 |     D = kayak.Parameter(npr.randn(*C.shape))
16 |     L = kayak.MatSum(kayak.ElemMult(C, D))
17 |     
18 |     assert np.all(close_float(C.value, np.tensordot(np_A, np_B, axes)))
19 |     assert kayak.util.checkgrad(A, L) < MAX_GRAD_DIFF
20 |     assert kayak.util.checkgrad(B, L) < MAX_GRAD_DIFF
21 | 
22 | def test_matmult_grad_1():
23 |     check_tensormult((3, 4), (4, 5), ((1,), (0,)))
24 | 
25 | def test_matmult_grad_2():
26 |     check_tensormult((4, 3), (5, 4), ((0,), (1,)))
27 | 
28 | def test_matmult_grad_3():
29 |     check_tensormult((3, 4), (4, 5, 6), ((1,), (0,)))
30 | 
31 | def test_matmult_grad_4():
32 |     check_tensormult((2, 3, 4), (5, 7, 4, 3), ((1, 2), (3, 2)))
33 |     check_tensormult((2, 3, 4), (5, 7, 4, 3), ((2, 1), (2, 3)))
34 | 


--------------------------------------------------------------------------------
/tests/test_Transpose.py:
--------------------------------------------------------------------------------
 1 | import numpy        as np
 2 | import numpy.random as npr
 3 | 
 4 | import kayak
 5 | 
 6 | from . import *
 7 | 
 8 | def test_transpose_1():
 9 |     npr.seed(1)
10 | 
11 |     np_A = npr.randn(5,10)
12 |     A    = kayak.Parameter(np_A)
13 |     B    = kayak.Transpose(A)
14 | 
15 |     B.value
16 |     assert B.shape == (10,5)
17 |     for ii in xrange(np_A.shape[0]):
18 |         for jj in xrange(np_A.shape[1]):
19 |             assert np_A[ii,jj] == B.value[jj,ii]
20 | 
21 | def test_transpose_2():
22 |     npr.seed(2)
23 | 
24 |     np_A = npr.randn(5,10,15)
25 |     A    = kayak.Parameter(np_A)
26 |     B    = kayak.Transpose(A)
27 | 
28 |     B.value
29 |     assert B.shape == (15,10,5)
30 |     for ii in xrange(np_A.shape[0]):
31 |         for jj in xrange(np_A.shape[1]):
32 |             for kk in xrange(np_A.shape[2]):
33 |                 assert np_A[ii,jj,kk] == B.value[kk,jj,ii]
34 | 
35 | def test_transpose_3():
36 |     npr.seed(3)
37 | 
38 |     np_A = npr.randn(5,10)
39 |     A    = kayak.Parameter(np_A)
40 |     B    = kayak.Transpose(A)
41 |     C    = kayak.Parameter(npr.randn(5,5))
42 |     D    = kayak.MatMult(B, C)
43 |     out  = kayak.MatSum(D)
44 | 
45 |     out.value
46 |     assert out.grad(A).shape == np_A.shape
47 |     assert kayak.util.checkgrad(A, out) < MAX_GRAD_DIFF
48 | 
49 | def test_transpose_3():
50 |     npr.seed(3)
51 | 
52 |     np_A = npr.randn(5,10)
53 |     A    = kayak.Parameter(np_A)
54 |     B    = kayak.Transpose(A)
55 |     C    = kayak.Parameter(npr.randn(5,5))
56 |     D    = kayak.MatMult(B, C)
57 |     out  = kayak.MatSum(D)
58 | 
59 |     out.value
60 |     assert out.grad(A).shape == np_A.shape
61 |     assert kayak.util.checkgrad(A, out) < MAX_GRAD_DIFF
62 | 


--------------------------------------------------------------------------------