├── lib ├── __init__.py ├── slide_code.py ├── pylearn2_log.yaml ├── sklearn_mnist.py ├── opendeep_mnist.py ├── graphlab_mnist.py ├── pylearn2_mnist.py ├── lasagne_mnist.py └── theano_mnist.py ├── .gitignore ├── README.md ├── Check_Sklearn.ipynb ├── Check_Lasagne.ipynb ├── Check_Graphlab.ipynb └── Check_Theano.ipynb /lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *data 2 | *keys 3 | *.pyc 4 | *.ipynb_checkpoints 5 | .DS_Store 6 | logs/ 7 | tmp/ 8 | var/ 9 | out/ 10 | .ipy 11 | 12 | *.pkl 13 | lib/outputs/ 14 | outputs/ -------------------------------------------------------------------------------- /lib/slide_code.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code in PyCon 2015 Presentation 3 | 4 | ''' 5 | 6 | import graphlab 7 | 8 | filename = 'http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train' 9 | 10 | inputs = graphlab.SFrame(filename) 11 | 12 | model = graphlab.neuralnet_classifier.create(inputs, target='label') 13 | 14 | model.evaluate(test_data) -------------------------------------------------------------------------------- /lib/pylearn2_log.yaml: -------------------------------------------------------------------------------- 1 | !obj:pylearn2.train.Train { 2 | dataset: &train !obj:pylearn2.datasets.dense_design_matrix.DenseDesignMatrix { 3 | X: !pkl: 'data/pylearn2/mnist_train_X.pkl', 4 | y: !pkl: 'data/pylearn2/mnist_train_y.pkl', 5 | y_labels: 10, 6 | }, 7 | model: !obj:lib.pylearn2_mnist.LogisticRegression { 8 | nvis: 784, 9 | nclasses: 10, 10 | }, 11 | algorithm: !obj:pylearn2.training_algorithms.sgd.SGD { 12 | batch_size: 200, 13 | learning_rate: 1e-3, 14 | monitoring_dataset: { 15 | 'train' : *train, 16 | 'valid' : !obj:pylearn2.datasets.dense_design_matrix.DenseDesignMatrix { 17 | X: !pkl: 'data/pylearn2/mnist_valid_X.pkl', 18 | y: !pkl: 'data/pylearn2/mnist_valid_y.pkl', 19 | y_labels: 10, 20 | }, 21 | 'test' : !obj:pylearn2.datasets.dense_design_matrix.DenseDesignMatrix { 22 | X: !pkl: 'data/pylearn2/mnist_test_X.pkl', 23 | y: !pkl: 'data/pylearn2/mnist_test_y.pkl', 24 | y_labels: 10, 25 | }, 26 | }, 27 | cost: !obj:lib.pylearn2_mnist.LogisticRegressionCost {}, 28 | termination_criterion: !obj:pylearn2.termination_criteria.EpochCounter { 29 | max_epochs: 15 30 | }, 31 | }, 32 | } -------------------------------------------------------------------------------- /lib/sklearn_mnist.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Scikit-Learn RBM MNIST Example / Tutorial 3 | 4 | ''' 5 | # import the necessary packages 6 | from sklearn.datasets import fetch_mldata 7 | from sklearn.cross_validation import train_test_split 8 | from sklearn.linear_model import LogisticRegression 9 | from sklearn.neural_network import BernoulliRBM 10 | from sklearn.pipeline import Pipeline 11 | from sklearn.metrics import classification_report 12 | import numpy as np 13 | 14 | def load_data(datasetPath='data/'): 15 | return fetch_mldata('MNIST original', data_home=datasetPath) 16 | 17 | def scale(X, eps = 0.001): 18 | # scale the data points s.t the columns of the feature space 19 | # (i.e the predictors) are within the range [0, 1] 20 | return (X - np.min(X, axis = 0)) / (np.max(X, axis = 0) + eps) 21 | 22 | def split_data(data, test_size, random_state): 23 | X = data['data'] 24 | y = data['target'] 25 | X = X.astype("float32") 26 | X = scale(X) 27 | return train_test_split(X, y, test_size=test_size, random_state=random_state) 28 | 29 | def main(test_size=0.1, n_iter = 10, learning_rate = 0.01): 30 | 31 | n_components = 200 # number of neurons in hidden layer 32 | verbose = True 33 | 34 | print '... load and setup data' 35 | dataset = load_data() 36 | trainX, testX, trainY, testY = split_data(dataset, test_size=test_size, random_state=42) 37 | 38 | print '... building the model structure' 39 | # initialize the RBM + Logistic Regression classifier with the cross-validated parameters 40 | 41 | rbm = BernoulliRBM(n_components = n_components, n_iter = n_iter, 42 | learning_rate = learning_rate) 43 | 44 | logistic = LogisticRegression(C = 1.0) 45 | 46 | print '... training the model' 47 | # train the classifier and show an evaluation report 48 | classifier = Pipeline([("rbm", rbm), ("logistic", logistic)]) 49 | classifier.fit(trainX, trainY) 50 | 51 | # rbm.fit(trainX, trainY) 52 | 53 | print '... evaluate model' 54 | # Predict does not exist and its not clear how to get value out 55 | print classifier.score(testX, testY) 56 | 57 | 58 | 59 | if __name__ == '__main__': 60 | main() -------------------------------------------------------------------------------- /lib/opendeep_mnist.py: -------------------------------------------------------------------------------- 1 | ''' 2 | OpenDeep MNIST Example / Tutorial 3 | 4 | ''' 5 | from __future__ import print_function 6 | from opendeep.log.logger import config_root_logger 7 | from opendeep.models.container import Prototype 8 | from opendeep.models.single_layer.basic import BasicLayer, SoftmaxLayer 9 | from opendeep.optimization.stochastic_gradient_descent import SGD 10 | from opendeep.data.standard_datasets.image.mnist import MNIST, datasets 11 | from opendeep.monitor.monitor import Monitor 12 | from opendeep.monitor.plot import Plot 13 | 14 | # set up the logger to print everything to stdout and log files in opendeep/log/logs/ 15 | config_root_logger() 16 | 17 | def split_data(mnist_dataset): 18 | return mnist_dataset.getSubset(datasets.TEST) 19 | 20 | def build_model(): 21 | # add layers one-by-one to a Prototype container to build neural net 22 | # inputs_hook created automatically by Prototype; thus, no need to specify 23 | mlp = Prototype() 24 | mlp.add(BasicLayer(input_size=28*28, output_size=512, activation='rectifier', noise='dropout')) 25 | mlp.add(BasicLayer(output_size=512, activation='rectifier', noise='dropout')) 26 | mlp.add(SoftmaxLayer(output_size=10)) 27 | 28 | return mlp 29 | 30 | def setup_optimization(model, n_epoch, mnist_dataset): 31 | # setup optimizer stochastic gradient descent 32 | optimizer = SGD(model=model, 33 | dataset=mnist_dataset, 34 | n_epoch=n_epoch, 35 | batch_size=600, 36 | learning_rate=.01, 37 | momentum=.9, 38 | nesterov_momentum=True, 39 | save_frequency=500, 40 | early_stop_threshold=0.997) 41 | 42 | # create a Monitor to view progress on a metric other than training cost 43 | error = Monitor('error', model.get_monitors()['softmax_error'], train=True, valid=True, test=True) 44 | 45 | return optimizer, error 46 | 47 | def evaluate(test_data, test_labels, model): 48 | n_examples = 50 49 | 50 | predictions = model.run(test_data[:n_examples].eval()) 51 | actual_labels = test_labels[:n_examples].eval().astype('int32') 52 | # print("Predictions:", predictions) 53 | # print("Actual :", actual_labels) 54 | print("Accuracy: ", (sum(predictions==actual_labels)*1.0/len(actual_labels))) 55 | 56 | def main(plot=None, n_epoch=10): 57 | print('... loading and seting-up data') 58 | # don't concatenate together train and valid sets 59 | mnist_dataset = MNIST(concat_train_valid=False) 60 | 61 | print('... building the model structure') 62 | # create the mlp model from a Prototype 63 | model = build_model() 64 | optimizer, error = setup_optimization(model, n_epoch, mnist_dataset) 65 | 66 | print('... training the model') 67 | # [optional] use keyboardInterrupt to save the latest parameters. 68 | if plot: 69 | plot = Plot("OpenDeep MLP Example", monitor_channels=error, open_browser=True) 70 | 71 | optimizer.train(monitor_channels=error, plot=plot) 72 | 73 | print('... evaluating model') 74 | test_data, test_labels = split_data(mnist_dataset) 75 | evaluate(test_data, test_labels, model) 76 | 77 | 78 | if __name__ == '__main__': 79 | main() -------------------------------------------------------------------------------- /lib/graphlab_mnist.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Graphlab/Dato 1 Layer Convolution Net MNIST Example / Tutorial 3 | 4 | ''' 5 | 6 | import graphlab 7 | import os 8 | import tarfile 9 | 10 | def load_data(filename): 11 | ''' 12 | Graphlab stored a train/test split of the data on S3. 13 | 14 | You can change the split by combing the data and splitting again. 15 | 16 | 4 Minutes to load. 17 | ''' 18 | return graphlab.SFrame(filename) 19 | #data['image'] = graphlab.image_analysis.resize(training_data['image'], 28, 28, 1) # potentialy need to resize data. 20 | 21 | def create_structure(): 22 | ''' 23 | Tune neural net hyper parameters. 24 | 25 | Creates the NN model template which is similar to creating the y=mx+b model template for linear regression. 26 | 27 | Optional and fundamentally using convolutional neural nets either way. 28 | ''' 29 | structure = graphlab.deeplearning.get_builtin_neuralnet('mnist') 30 | print "NN layer details: ", structure.layers 31 | print "NN hyper parameters summary: ", structure.params 32 | 33 | return structure 34 | 35 | def train_model(data, target, structure, max_iterations): 36 | ''' 37 | Create convolutional NN 1 layer model. 38 | 39 | Model automatically sets validation to check model performance when training 40 | ''' 41 | return graphlab.neuralnet_classifier.create(data, target=target, structure=structure, max_iterations=max_iterations) 42 | 43 | def get_features(data): 44 | ''' 45 | SArray of dense feature vectors, each of which is the concatenation of all the hidden unit values 46 | ''' 47 | return model.extract_features(data) 48 | 49 | def predict_values(model, data): 50 | ''' 51 | Predict classification based on input pixels 52 | 53 | Input a picture of a digit and output a prediciton 54 | ''' 55 | return model.classify(data) 56 | 57 | def evaluate_model(model, test_data): 58 | ''' 59 | Error rate between predictions and labels 60 | ''' 61 | eval_ = model.evaluate(test_data) 62 | print "Accuracy: ", eval_['accuracy'] 63 | 64 | cf_mat = eval_['confusion_matrix'] 65 | 66 | print "Confusion Matrix Correct Predictions" 67 | print cf_mat[cf_mat['target_label'] == cf_mat['predicted_label']].groupby('target_label', graphlab.aggregate.SUM('count')).sort('target_label') 68 | 69 | print "Confusion Matrix Prediction Mistakes" 70 | print cf_mat[cf_mat['target_label'] != cf_mat['predicted_label']].groupby('target_label', graphlab.aggregate.SUM('count')).sort('target_label') 71 | 72 | def main(set_net_structure=True): 73 | ''' 74 | Runs the full program to train the model and then evaluate 75 | ''' 76 | print '... load and setup data' 77 | train_data = load_data('http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train') 78 | test_data = load_data('http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test') 79 | 80 | print '... building the model structure' 81 | if set_net_structure: 82 | structure = create_structure() 83 | else: 84 | structure = None 85 | 86 | print '... training the model' 87 | model = train_model(train_data, 'label', structure, 3) 88 | 89 | print '... evaluate model' 90 | evaluate_model(model, test_data) 91 | 92 | print "Evaluation completed" 93 | 94 | 95 | if __name__ == '__main__': 96 | main() -------------------------------------------------------------------------------- /lib/pylearn2_mnist.py: -------------------------------------------------------------------------------- 1 | ''' 2 | PyLearn2 MNIST Example / Tutorial 3 | 4 | Note: Uses pylearn2_mnist yaml file 5 | 6 | ''' 7 | 8 | import os 9 | from pylearn2.utils import serial 10 | from pylearn2.models.model import Model 11 | from pylearn2.space import VectorSpace 12 | from pylearn2.utils import sharedX 13 | from pylearn2.costs.cost import Cost, DefaultDataSpecsMixin 14 | from pylearn2.space import CompositeSpace 15 | import theano.tensor as T 16 | from theano.compat.python2x import OrderedDict 17 | from subprocess import check_call 18 | import numpy as np 19 | 20 | def load_data(dataset): 21 | if os.path.isfile(dataset): 22 | if not os.path.isfile("data/pylearn2/mnist_train_X.pkl"): 23 | split_data(dataset) 24 | else: 25 | import urllib 26 | origin = ('http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz') 27 | print('Downloading data from %s' % origin) 28 | urllib.urlretrieve(origin, dataset) 29 | 30 | def split_data(dataset): 31 | path = dataset.split("/")[0:2] 32 | check_call(["gunzip" + dataset]) 33 | 34 | data = serial.load(dataset) 35 | serial.save(path +'mnist_train_X.pkl', data[0][0]) 36 | serial.save(path +'mnist_train_y.pkl', data[0][1].reshape((-1, 1))) 37 | serial.save(path +'mnist_valid_X.pkl', data[1][0]) 38 | serial.save(path +'mnist_valid_y.pkl', data[1][1].reshape((-1, 1))) 39 | serial.save(path +'mnist_test_X.pkl', data[2][0]) 40 | serial.save(path +'mnist_test_y.pkl', data[2][1].reshape((-1, 1))) 41 | 42 | class LogisticRegression(Model): 43 | def __init__(self, nvis, nclasses): 44 | super(LogisticRegression, self).__init__() 45 | 46 | # Number of input nodes 47 | self.nvis = nvis 48 | # Number of output nodes 49 | self.nclasses = nclasses 50 | 51 | W_value = np.random.uniform(size=(self.nvis, self.nclasses)) 52 | self.W = sharedX(W_value, 'W') # sharedX formats for GPUs 53 | 54 | b_value = np.zeros(self.nclasses) 55 | self.b = sharedX(b_value, 'b') 56 | 57 | self._params = [self.W, self.b] 58 | 59 | self.input_space = VectorSpace(dim=self.nvis) 60 | self.output_space = VectorSpace(dim=self.nclasses) 61 | 62 | # Linear transformation followed by non-linear softmax transformation 63 | def logistic_regression(self, inputs): 64 | return T.nnet.softmax(T.dot(inputs, self.W) + self.b) 65 | 66 | # Following two add comments on error rate during training 67 | def get_monitoring_data_specs(self): 68 | space = CompositeSpace([self.get_input_space(), 69 | self.get_target_space()]) 70 | source = (self.get_input_source(), self.get_target_source()) 71 | return (space, source) 72 | 73 | def get_monitoring_channels(self, data): 74 | print '... evaluate model' 75 | space, source = self.get_monitoring_data_specs() 76 | space.validate(data) 77 | 78 | X, y = data 79 | y_hat = self.logistic_regression(X) 80 | error = T.neq(y.argmax(axis=1), y_hat.argmax(axis=1)).mean() 81 | 82 | return OrderedDict([('error', error)]) 83 | 84 | class LogisticRegressionCost(DefaultDataSpecsMixin, Cost): 85 | supervised = True 86 | 87 | def expr(self, model, data): 88 | space, source = self.get_data_specs(model) 89 | # Checks data is valid - tensor variables expected 90 | space.validate(data) 91 | 92 | inputs, targets = data 93 | outputs = model.logistic_regression(inputs) 94 | # Negative log likelihood 95 | loss = -(targets * T.log(outputs)).sum(axis=1) 96 | return loss.mean() 97 | 98 | def main(dataset='data/pylearn2/mnist.pkl.gz', nn_config = "lib/pylearn2_log.yaml"): 99 | print '... load and setup data' 100 | load_data(dataset) 101 | 102 | print '... building the model structure' 103 | train_obj = serial.load_train_file(nn_config) 104 | 105 | print '... training the model' 106 | train_obj.main_loop() 107 | 108 | 109 | 110 | if __name__ == '__main__': 111 | main() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Neural_Net_Newbies 2 | ================== 3 | 4 | Repository of machine learning python packages' code examples to build, train and run a neural net on MNIST data. A small code sample is used in my PyCon 2015 presentation and the rest is for reference afterwards under lib/slide_code.py. 5 | 6 | The [MNIST Dataset](http://yann.lecun.com/exdb/mnist/) is the "hello world" of neural nets, and the link is the original source about the dataset. 7 | 8 | Note: All examples assume supervised learning. 9 | 10 | Theano 11 | -------- 12 | The MNIST example in this repo is based off this [link](http://deeplearning.net/tutorial/logreg.html) with modifications. 13 | * Data is located at this [link](http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz). 14 | * Additional tutorial at this [link](http://nbviewer.ipython.org/github/craffel/theano-tutorial/blob/master/Theano%20Tutorial.ipynb). 15 | 16 | Run code from command line: 17 | 18 | python lib/theano_mnist.py 19 | 20 | Setup: 21 | 22 | pip install theano 23 | 24 | [General Reference](http://deeplearning.net/software/theano/index.html) for more information about the package. 25 | 26 | Graphlab/Dato: 27 | -------- 28 | MNIST sample tutorial can be found at this [link](https://dato.com/products/create/docs/graphlab.toolkits.deeplearning.html). This machine learning library is built off of CXXNet. 29 | 30 | Run code from command line: 31 | 32 | python lib/graphlab_mnist.py 33 | 34 | Setup: 35 | 36 | pip install graphlab-create 37 | 38 | * Add product key to environment variable or config file 39 | 40 | [General Reference](https://dato.com/products/create/docs/generated/graphlab.neuralnet_classifier.NeuralNetClassifier.html) for more information about the package. 41 | 42 | OpenDeep 43 | -------- 44 | The MNIST example in this repo is based off this [link](http://www.opendeep.org/docs/tutorial-classifying-handwritten-mnist-images). This deep learning library is built off of Theano. 45 | 46 | Run from command line: 47 | 48 | python lib/opendeep_mnist.py 49 | 50 | Setup: 51 | * http://www.opendeep.org/docs/getting-started 52 | 53 | [General Reference](http://www.opendeep.org/) for more information about the package. 54 | 55 | Lasagne 56 | -------- 57 | The MNIST example in this repo is based off this [link](https://github.com/craffel/Lasagne-tutorial/blob/master/examples/mnist.py) with modifications. This machine learning library is built off of Theano. 58 | 59 | Run from command line: 60 | 61 | python lib/lasagne_mnist.py 62 | 63 | Setup: 64 | * http://lasagne.readthedocs.org/en/latest/user/installation.html 65 | 66 | [General Reference](http://lasagne.readthedocs.org/en/latest/) for more information about the package. 67 | 68 | PyLearn2 69 | -------- 70 | MNIST example in this repo is based off this [link](https://vdumoulin.github.io/articles/extending-pylearn2/) with modifications. Built off of Theano and requires a yaml file to config neural net structure and optimization method. 71 | 72 | Run from command line: 73 | 74 | python lib/pylearn2_mnist.py 75 | 76 | 77 | Setup: 78 | 79 | git clone git://github.com/lisa-lab/pylearn2.git 80 | 81 | cd pylearn2 && python setup.py develop 82 | 83 | OR 84 | 85 | cd pylearn2 && python setup.py develop --user 86 | 87 | * http://deeplearning.net/software/pylearn2/#download-and-installation 88 | * You may need to add a path to the package and/or data 89 | 90 | 91 | [General Reference](http://deeplearning.net/software/pylearn2/) for more information. 92 | 93 | Scikit-Learn 94 | -------- 95 | 96 | MNIST example in this repo is based off this [link](http://www.pyimagesearch.com/2014/06/23/applying-deep-learning-rbm-mnist-using-python/) with modifications. This library uses an RBM model and they are combining the RBM with Logistict Regression to create the model that runs predictions. 97 | 98 | Run from command line: 99 | 100 | python sklearn_mnist.py 101 | 102 | Setup: 103 | 104 | pip install -U numpy scipy scikit-learn 105 | 106 | * http://scikit-learn.org/stable/install.html 107 | 108 | [General Reference](http://scikit-learn.org/stable/modules/neural_networks.html) for more information. 109 | 110 | 111 | Other: 112 | -------- 113 | Most setup references assume python and pip installed. Check documentation for other options especially if setting up on GPUs. 114 | 115 | If you want to add to this repo, send me a PR. 116 | 117 | -------------------------------------------------------------------------------- /lib/lasagne_mnist.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Lasagne MNIST Example / Tutorial 3 | 4 | ''' 5 | 6 | from __future__ import print_function 7 | 8 | import cPickle as pickle 9 | import gzip 10 | import itertools 11 | import urllib 12 | 13 | import numpy as np 14 | import lasagne 15 | import theano 16 | import theano.tensor as T 17 | 18 | 19 | DATA_URL = 'http://deeplearning.net/data/mnist/mnist.pkl.gz' 20 | DATA_FILENAME = 'mnist.pkl.gz' 21 | 22 | NUM_EPOCHS = 500 23 | BATCH_SIZE = 600 24 | NUM_HIDDEN_UNITS = 512 25 | LEARNING_RATE = 0.01 26 | MOMENTUM = 0.9 27 | 28 | 29 | def _load_data(url=DATA_URL, filename=DATA_FILENAME): 30 | urllib.urlretrieve(url, filename) 31 | with gzip.open(filename, 'rb') as f: 32 | data = pickle.load(f) 33 | return data 34 | 35 | def load_data(): 36 | data = _load_data() 37 | X_train, y_train = data[0] 38 | X_valid, y_valid = data[1] 39 | X_test, y_test = data[2] 40 | 41 | return dict( 42 | X_train=theano.shared(lasagne.utils.floatX(X_train)), 43 | y_train=T.cast(theano.shared(y_train), 'int32'), 44 | X_valid=theano.shared(lasagne.utils.floatX(X_valid)), 45 | y_valid=T.cast(theano.shared(y_valid), 'int32'), 46 | X_test=theano.shared(lasagne.utils.floatX(X_test)), 47 | y_test=T.cast(theano.shared(y_test), 'int32'), 48 | num_examples_train=X_train.shape[0], 49 | num_examples_valid=X_valid.shape[0], 50 | num_examples_test=X_test.shape[0], 51 | input_dim=X_train.shape[1], 52 | output_dim=10, 53 | ) 54 | 55 | def build_model(input_dim, output_dim, 56 | batch_size=BATCH_SIZE, num_hidden_units=NUM_HIDDEN_UNITS): 57 | 58 | ''' 59 | Define neural net structure. 60 | 61 | Tune neural net hyper parameters input, output and hidden number of units. 62 | 63 | ''' 64 | l_in = lasagne.layers.InputLayer( 65 | shape=(batch_size, input_dim), 66 | ) 67 | l_hidden1 = lasagne.layers.DenseLayer( 68 | l_in, 69 | num_units=num_hidden_units, 70 | nonlinearity=lasagne.nonlinearities.rectify, 71 | ) 72 | # Regularizatoin with dropout -randomly drop neurons and connections in training 73 | l_hidden1_dropout = lasagne.layers.DropoutLayer( 74 | l_hidden1, 75 | p=0.5, 76 | ) 77 | l_hidden2 = lasagne.layers.DenseLayer( 78 | l_hidden1_dropout, 79 | num_units=num_hidden_units, 80 | nonlinearity=lasagne.nonlinearities.rectify, 81 | ) 82 | l_hidden2_dropout = lasagne.layers.DropoutLayer( 83 | l_hidden2, 84 | p=0.5, 85 | ) 86 | l_out = lasagne.layers.DenseLayer( 87 | l_hidden2_dropout, 88 | num_units=output_dim, 89 | nonlinearity=lasagne.nonlinearities.softmax, 90 | ) 91 | 92 | return l_out 93 | 94 | def create_iter_functions(dataset, output_layer, 95 | X_tensor_type=T.matrix, 96 | batch_size=BATCH_SIZE, 97 | learning_rate=LEARNING_RATE, momentum=MOMENTUM): 98 | ''' 99 | Define neural net methods to tune structure 100 | ''' 101 | 102 | batch_index = T.iscalar('batch_index') 103 | X_batch = X_tensor_type('x') 104 | y_batch = T.ivector('y') 105 | batch_slice = slice(batch_index * batch_size, (batch_index + 1) * batch_size) 106 | 107 | def loss(output): # negative log likelihood 108 | return -T.mean(T.log(output)[T.arange(y_batch.shape[0]), y_batch]) 109 | 110 | loss_train = loss(output_layer.get_output(X_batch)) 111 | loss_eval = loss(output_layer.get_output(X_batch, deterministic=True)) 112 | 113 | pred = T.argmax( 114 | output_layer.get_output(X_batch, deterministic=True), axis=1) 115 | 116 | accuracy = T.mean(T.eq(pred, y_batch)) # error rate 117 | 118 | all_params = lasagne.layers.get_all_params(output_layer) 119 | updates = lasagne.updates.nesterov_momentum( 120 | loss_train, all_params, learning_rate, momentum) 121 | 122 | iter_train = theano.function( 123 | [batch_index], loss_train, 124 | updates=updates, 125 | givens={ 126 | X_batch: dataset['X_train'][batch_slice], 127 | y_batch: dataset['y_train'][batch_slice], 128 | }, 129 | ) 130 | 131 | iter_valid = theano.function( 132 | [batch_index], [loss_eval, accuracy], 133 | givens={ 134 | X_batch: dataset['X_valid'][batch_slice], 135 | y_batch: dataset['y_valid'][batch_slice], 136 | }, 137 | ) 138 | 139 | iter_test = theano.function( 140 | [batch_index], [loss_eval, accuracy], 141 | givens={ 142 | X_batch: dataset['X_test'][batch_slice], 143 | y_batch: dataset['y_test'][batch_slice], 144 | }, 145 | ) 146 | 147 | return dict( 148 | train=iter_train, 149 | valid=iter_valid, 150 | test=iter_test, 151 | ) 152 | 153 | 154 | def train(iter_funcs, dataset, batch_size=BATCH_SIZE): 155 | num_batches_train = dataset['num_examples_train'] // batch_size 156 | num_batches_valid = dataset['num_examples_valid'] // batch_size 157 | num_batches_test = dataset['num_examples_test'] // batch_size 158 | 159 | for epoch in itertools.count(1): 160 | batch_train_losses = [] 161 | for b in range(num_batches_train): 162 | batch_train_loss = iter_funcs['train'](b) 163 | batch_train_losses.append(batch_train_loss) 164 | 165 | avg_train_loss = np.mean(batch_train_losses) 166 | 167 | batch_valid_losses = [] 168 | batch_valid_accuracies = [] 169 | for b in range(num_batches_valid): 170 | batch_valid_loss, batch_valid_accuracy = iter_funcs['valid'](b) 171 | batch_valid_losses.append(batch_valid_loss) 172 | batch_valid_accuracies.append(batch_valid_accuracy) 173 | 174 | avg_valid_loss = np.mean(batch_valid_losses) 175 | avg_valid_accuracy = np.mean(batch_valid_accuracies) 176 | 177 | yield { 178 | 'number': epoch, 179 | 'train_loss': avg_train_loss, 180 | 'valid_loss': avg_valid_loss, 181 | 'valid_accuracy': avg_valid_accuracy, 182 | } 183 | 184 | 185 | def main(num_epochs=NUM_EPOCHS): 186 | print('... loading and seting-up data') 187 | dataset = load_data() 188 | 189 | print('... building the model structure') 190 | output_layer = build_model( 191 | input_dim=dataset['input_dim'], 192 | output_dim=dataset['output_dim'], 193 | ) 194 | iter_funcs = create_iter_functions(dataset, output_layer) 195 | 196 | print('... training the model') 197 | for epoch in train(iter_funcs, dataset): 198 | print("Epoch %d of %d" % (epoch['number'], num_epochs)) 199 | print(" training loss:\t\t%.6f" % epoch['train_loss']) 200 | print(" validation loss:\t\t%.6f" % epoch['valid_loss']) 201 | print(" validation accuracy:\t\t%.2f %%" % 202 | (epoch['valid_accuracy'] * 100)) 203 | 204 | if epoch['number'] >= num_epochs: 205 | break 206 | 207 | return output_layer 208 | 209 | 210 | if __name__ == '__main__': 211 | main() -------------------------------------------------------------------------------- /Check_Sklearn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:5c4afa7279d7fdadc2ea57f1be7696cf1593b6a23cf7d7439deb0d07af852531" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "##Scikit-learn Example" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### Manual Run" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "collapsed": false, 28 | "input": [ 29 | "from sklearn.datasets import fetch_mldata\n", 30 | "from sklearn.cross_validation import train_test_split\n", 31 | "from sklearn.linear_model import LogisticRegression\n", 32 | "from sklearn.metrics import classification_report\n", 33 | "from sklearn.neural_network import BernoulliRBM\n", 34 | "from sklearn.pipeline import Pipeline\n", 35 | "import numpy as np" 36 | ], 37 | "language": "python", 38 | "metadata": {}, 39 | "outputs": [], 40 | "prompt_number": 20 41 | }, 42 | { 43 | "cell_type": "code", 44 | "collapsed": false, 45 | "input": [ 46 | " mnist = fetch_mldata('MNIST original', data_home='data/')" 47 | ], 48 | "language": "python", 49 | "metadata": {}, 50 | "outputs": [], 51 | "prompt_number": 2 52 | }, 53 | { 54 | "cell_type": "code", 55 | "collapsed": false, 56 | "input": [ 57 | "mnist['target'].shape" 58 | ], 59 | "language": "python", 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "metadata": {}, 64 | "output_type": "pyout", 65 | "prompt_number": 3, 66 | "text": [ 67 | "(70000,)" 68 | ] 69 | } 70 | ], 71 | "prompt_number": 3 72 | }, 73 | { 74 | "cell_type": "code", 75 | "collapsed": false, 76 | "input": [ 77 | "mnist" 78 | ], 79 | "language": "python", 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "metadata": {}, 84 | "output_type": "pyout", 85 | "prompt_number": 4, 86 | "text": [ 87 | "{'COL_NAMES': ['label', 'data'],\n", 88 | " 'DESCR': 'mldata.org dataset: mnist-original',\n", 89 | " 'data': array([[0, 0, 0, ..., 0, 0, 0],\n", 90 | " [0, 0, 0, ..., 0, 0, 0],\n", 91 | " [0, 0, 0, ..., 0, 0, 0],\n", 92 | " ..., \n", 93 | " [0, 0, 0, ..., 0, 0, 0],\n", 94 | " [0, 0, 0, ..., 0, 0, 0],\n", 95 | " [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),\n", 96 | " 'target': array([ 0., 0., 0., ..., 9., 9., 9.])}" 97 | ] 98 | } 99 | ], 100 | "prompt_number": 4 101 | }, 102 | { 103 | "cell_type": "code", 104 | "collapsed": false, 105 | "input": [ 106 | "X = mnist['data']\n", 107 | "y = mnist['target']" 108 | ], 109 | "language": "python", 110 | "metadata": {}, 111 | "outputs": [], 112 | "prompt_number": 5 113 | }, 114 | { 115 | "cell_type": "code", 116 | "collapsed": false, 117 | "input": [ 118 | "train_test_split?" 119 | ], 120 | "language": "python", 121 | "metadata": {}, 122 | "outputs": [], 123 | "prompt_number": 6 124 | }, 125 | { 126 | "cell_type": "code", 127 | "collapsed": false, 128 | "input": [ 129 | "trainX, testX, trainY, testY = train_test_split(X, y, test_size=0.1, random_state=42)" 130 | ], 131 | "language": "python", 132 | "metadata": {}, 133 | "outputs": [], 134 | "prompt_number": 7 135 | }, 136 | { 137 | "cell_type": "code", 138 | "collapsed": false, 139 | "input": [ 140 | "trainX.shape" 141 | ], 142 | "language": "python", 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "metadata": {}, 147 | "output_type": "pyout", 148 | "prompt_number": 8, 149 | "text": [ 150 | "(63000, 784)" 151 | ] 152 | } 153 | ], 154 | "prompt_number": 8 155 | }, 156 | { 157 | "cell_type": "code", 158 | "collapsed": false, 159 | "input": [ 160 | "testX.shape" 161 | ], 162 | "language": "python", 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "metadata": {}, 167 | "output_type": "pyout", 168 | "prompt_number": 9, 169 | "text": [ 170 | "(7000, 784)" 171 | ] 172 | } 173 | ], 174 | "prompt_number": 9 175 | }, 176 | { 177 | "cell_type": "code", 178 | "collapsed": false, 179 | "input": [ 180 | "def scale(X, eps = 0.001):\n", 181 | " # scale the data points binary within the range [0, 1]\n", 182 | " return (X - np.min(X, axis = 0)) / (np.max(X, axis = 0) + eps)" 183 | ], 184 | "language": "python", 185 | "metadata": {}, 186 | "outputs": [], 187 | "prompt_number": 10 188 | }, 189 | { 190 | "cell_type": "code", 191 | "collapsed": false, 192 | "input": [ 193 | "trainX = scale(trainX.astype(\"float32\"))\n", 194 | "testX = scale(testX.astype(\"float32\"))" 195 | ], 196 | "language": "python", 197 | "metadata": {}, 198 | "outputs": [], 199 | "prompt_number": 13 200 | }, 201 | { 202 | "cell_type": "code", 203 | "collapsed": false, 204 | "input": [ 205 | "logistic = LogisticRegression(C = 1.0)" 206 | ], 207 | "language": "python", 208 | "metadata": {}, 209 | "outputs": [], 210 | "prompt_number": 14 211 | }, 212 | { 213 | "cell_type": "code", 214 | "collapsed": false, 215 | "input": [ 216 | "rbm = BernoulliRBM(n_components = 200, n_iter = 10, learning_rate = 0.01, verbose = True, random_state=42)" 217 | ], 218 | "language": "python", 219 | "metadata": {}, 220 | "outputs": [], 221 | "prompt_number": 17 222 | }, 223 | { 224 | "cell_type": "code", 225 | "collapsed": false, 226 | "input": [ 227 | "print '... training the model'\n", 228 | "# train the classifier and show an evaluation report\n", 229 | "classifier = Pipeline([(\"rbm\", rbm), (\"logistic\", logistic)])\n", 230 | "classifier.fit(trainX, trainY)" 231 | ], 232 | "language": "python", 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "output_type": "stream", 237 | "stream": "stdout", 238 | "text": [ 239 | "... training the model\n", 240 | "Iteration 0, pseudo-likelihood = -128.70, time = 30.71s" 241 | ] 242 | }, 243 | { 244 | "output_type": "stream", 245 | "stream": "stdout", 246 | "text": [ 247 | "\n", 248 | "Iteration 1, pseudo-likelihood = -94.16, time = 24.38s" 249 | ] 250 | }, 251 | { 252 | "output_type": "stream", 253 | "stream": "stdout", 254 | "text": [ 255 | "\n", 256 | "Iteration 2, pseudo-likelihood = -84.09, time = 24.87s" 257 | ] 258 | }, 259 | { 260 | "output_type": "stream", 261 | "stream": "stdout", 262 | "text": [ 263 | "\n", 264 | "Iteration 3, pseudo-likelihood = -78.75, time = 24.93s" 265 | ] 266 | }, 267 | { 268 | "output_type": "stream", 269 | "stream": "stdout", 270 | "text": [ 271 | "\n", 272 | "Iteration 4, pseudo-likelihood = -76.37, time = 24.95s" 273 | ] 274 | }, 275 | { 276 | "output_type": "stream", 277 | "stream": "stdout", 278 | "text": [ 279 | "\n", 280 | "Iteration 5, pseudo-likelihood = -74.93, time = 24.82s" 281 | ] 282 | }, 283 | { 284 | "output_type": "stream", 285 | "stream": "stdout", 286 | "text": [ 287 | "\n", 288 | "Iteration 6, pseudo-likelihood = -73.81, time = 26.24s" 289 | ] 290 | }, 291 | { 292 | "output_type": "stream", 293 | "stream": "stdout", 294 | "text": [ 295 | "\n", 296 | "Iteration 7, pseudo-likelihood = -72.40, time = 25.46s" 297 | ] 298 | }, 299 | { 300 | "output_type": "stream", 301 | "stream": "stdout", 302 | "text": [ 303 | "\n", 304 | "Iteration 8, pseudo-likelihood = -71.49, time = 25.03s" 305 | ] 306 | }, 307 | { 308 | "output_type": "stream", 309 | "stream": "stdout", 310 | "text": [ 311 | "\n", 312 | "Iteration 9, pseudo-likelihood = -70.89, time = 25.48s" 313 | ] 314 | }, 315 | { 316 | "output_type": "stream", 317 | "stream": "stdout", 318 | "text": [ 319 | "\n" 320 | ] 321 | }, 322 | { 323 | "metadata": {}, 324 | "output_type": "pyout", 325 | "prompt_number": 21, 326 | "text": [ 327 | "Pipeline(steps=[('rbm', BernoulliRBM(batch_size=10, learning_rate=0.01, n_components=200, n_iter=10,\n", 328 | " random_state=42, verbose=True)), ('logistic', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", 329 | " intercept_scaling=1, penalty='l2', random_state=None, tol=0.0001))])" 330 | ] 331 | } 332 | ], 333 | "prompt_number": 21 334 | }, 335 | { 336 | "cell_type": "code", 337 | "collapsed": false, 338 | "input": [ 339 | "classifier.predict(testX[0])" 340 | ], 341 | "language": "python", 342 | "metadata": {}, 343 | "outputs": [ 344 | { 345 | "metadata": {}, 346 | "output_type": "pyout", 347 | "prompt_number": 34, 348 | "text": [ 349 | "array([ 7.])" 350 | ] 351 | } 352 | ], 353 | "prompt_number": 34 354 | }, 355 | { 356 | "cell_type": "code", 357 | "collapsed": false, 358 | "input": [ 359 | "testY[0]" 360 | ], 361 | "language": "python", 362 | "metadata": {}, 363 | "outputs": [ 364 | { 365 | "metadata": {}, 366 | "output_type": "pyout", 367 | "prompt_number": 33, 368 | "text": [ 369 | "7.0" 370 | ] 371 | } 372 | ], 373 | "prompt_number": 33 374 | }, 375 | { 376 | "cell_type": "code", 377 | "collapsed": false, 378 | "input": [ 379 | "classifier.score(testX, testY)" 380 | ], 381 | "language": "python", 382 | "metadata": {}, 383 | "outputs": [ 384 | { 385 | "metadata": {}, 386 | "output_type": "pyout", 387 | "prompt_number": 43, 388 | "text": [ 389 | "0.95299999999999996" 390 | ] 391 | } 392 | ], 393 | "prompt_number": 43 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "metadata": {}, 398 | "source": [ 399 | "### Script Run" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "collapsed": false, 405 | "input": [ 406 | "%load_ext autoreload" 407 | ], 408 | "language": "python", 409 | "metadata": {}, 410 | "outputs": [ 411 | { 412 | "output_type": "stream", 413 | "stream": "stdout", 414 | "text": [ 415 | "The autoreload extension is already loaded. To reload it, use:\n", 416 | " %reload_ext autoreload\n" 417 | ] 418 | } 419 | ], 420 | "prompt_number": 49 421 | }, 422 | { 423 | "cell_type": "code", 424 | "collapsed": false, 425 | "input": [ 426 | "%autoreload 2" 427 | ], 428 | "language": "python", 429 | "metadata": {}, 430 | "outputs": [], 431 | "prompt_number": 50 432 | }, 433 | { 434 | "cell_type": "code", 435 | "collapsed": false, 436 | "input": [ 437 | "import lib.sklearn_mnist as skl" 438 | ], 439 | "language": "python", 440 | "metadata": {}, 441 | "outputs": [], 442 | "prompt_number": 51 443 | }, 444 | { 445 | "cell_type": "code", 446 | "collapsed": false, 447 | "input": [ 448 | "skl.main()" 449 | ], 450 | "language": "python", 451 | "metadata": {}, 452 | "outputs": [ 453 | { 454 | "output_type": "stream", 455 | "stream": "stdout", 456 | "text": [ 457 | "... load and setup data\n", 458 | "... building the model structure" 459 | ] 460 | }, 461 | { 462 | "output_type": "stream", 463 | "stream": "stdout", 464 | "text": [ 465 | "\n", 466 | "... training the model\n", 467 | "... evaluate model" 468 | ] 469 | }, 470 | { 471 | "output_type": "stream", 472 | "stream": "stdout", 473 | "text": [ 474 | "\n", 475 | "0.952571428571" 476 | ] 477 | }, 478 | { 479 | "output_type": "stream", 480 | "stream": "stdout", 481 | "text": [ 482 | "\n" 483 | ] 484 | } 485 | ], 486 | "prompt_number": 52 487 | } 488 | ], 489 | "metadata": {} 490 | } 491 | ] 492 | } -------------------------------------------------------------------------------- /lib/theano_mnist.py: -------------------------------------------------------------------------------- 1 | """ 2 | Theano Multilayer Perceptron Net MNIST Example / Tutorial 3 | 4 | Tutorial covers logistic regression using Theano and stochastic 5 | gradient descent optimization method. 6 | 7 | Logistic regression is a probabilistic, linear classifier. It is parametrized 8 | by a weight matrix (W) and a bias vector (b). Classification is 9 | done by projecting data points onto a set of hyperplanes, the distance to 10 | which is used to determine a class membership probability. 11 | 12 | Mathematically, this can be written as: 13 | 14 | P(Y=i|x, W,b) = softmax_i(W*x + b) \\ 15 | = \frac {e^{W_i*x + b_i}} {\sum_j e^{W_j*x + b_j}} 16 | 17 | 18 | The output of the model or prediction is then done by taking the argmax of 19 | the vector whose i'th element is P(Y=i|x). 20 | 21 | y_{pred} = argmax_i P(Y=i|x,W,b) 22 | 23 | 24 | References: 25 | 26 | - textbooks: "Pattern Recognition and Machine Learning" - 27 | Christopher M. Bishop, section 4.3.2 28 | 29 | """ 30 | __docformat__ = 'restructedtext en' 31 | 32 | import cPickle 33 | import gzip 34 | import os 35 | import sys 36 | import time 37 | 38 | import numpy 39 | 40 | import theano 41 | import theano.tensor as T 42 | 43 | def shared_dataset(data_xy, borrow=True): 44 | """ Loads dataset into shared variables 45 | 46 | Store our dataset in shared variables to enably copying to GPU memory 47 | 48 | Break data into minibatches because copying data into the GPU is slow 49 | Thus improve performance with shared variables 50 | """ 51 | data_x, data_y = data_xy 52 | shared_x = theano.shared(numpy.asarray(data_x, 53 | dtype=theano.config.floatX), 54 | borrow=borrow) 55 | shared_y = theano.shared(numpy.asarray(data_y, 56 | dtype=theano.config.floatX), 57 | borrow=borrow) 58 | 59 | # GPU require float type to store data 60 | # Change labels to floatX 61 | # Need data as ints in computations becaused used as index 62 | # Instead of returning ``shared_y`` we will have to cast it to int. 63 | # A hack to get around this issue 64 | 65 | return shared_x, T.cast(shared_y, 'int32') 66 | 67 | def load_data(dataset): 68 | ''' Loads the dataset 69 | 70 | datset: string type and path to dataset 71 | 72 | train_set, valid_set, test_set: tuple(input, target) type 73 | input : 2 dimension matrix numpy.ndarray & example per row | tensor 74 | target: 1 dimension vector numpy.ndarray with same length as # input rows | elemwise 75 | 76 | index is used to map target to input 77 | 78 | dataset: string MNIST dataset file path (http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz) 79 | 80 | 81 | ''' 82 | 83 | # Load file if it does not exist under data dir 84 | data_dir, data_file = os.path.split(dataset) 85 | if data_dir == "" and not os.path.isfile(dataset): 86 | # Check if dataset is in the data directory. 87 | new_path = os.path.join(os.path.split('__file__')[0], "..", "data", dataset) 88 | if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz': 89 | dataset = new_path 90 | 91 | if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz': 92 | import urllib 93 | origin = ('http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz') 94 | print('Downloading data from %s' % origin) 95 | urllib.urlretrieve(origin, dataset) 96 | 97 | print '... loading data' 98 | 99 | try: 100 | f = gzip.open(dataset, 'rb') 101 | train_set, valid_set, test_set = cPickle.load(f) 102 | f.close() 103 | 104 | test_set_x, test_set_y = shared_dataset(test_set) 105 | valid_set_x, valid_set_y = shared_dataset(valid_set) 106 | train_set_x, train_set_y = shared_dataset(train_set) 107 | 108 | data_sets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), 109 | (test_set_x, test_set_y)] 110 | return data_sets 111 | except Exception, e: 112 | print('Data couldn\'t be loaded due to error: ', e) 113 | 114 | class LogisticRegression(object): 115 | """Multi-class Logistic Regression Class 116 | literall building logistic regression class 117 | 118 | 119 | Model coefficients/parameters are 120 | weight matrix :math:`W` 121 | bias vector :math:`b` 122 | 123 | Classification is done by mapping data points onto a set of hyperplanes and the distance from the division determines class membership probability. 124 | """ 125 | 126 | def __init__(self, input, n_in, n_out, borrow=True): 127 | """ Logistic regression parameters 128 | 129 | input: theano tensor type & one minibatch 130 | n_in: int & # of input units 131 | n_out: int & # ouptut units 132 | 133 | """ 134 | # initialize weights as 0 and matrix of shape (n_in, n_out) 135 | self.W = theano.shared( 136 | value=numpy.zeros( 137 | (n_in, n_out), 138 | dtype=theano.config.floatX 139 | ), 140 | name='W', 141 | borrow=borrow 142 | ) 143 | # initialize biases b as a vector of 0s and vector of shape (n_out) 144 | self.b = theano.shared( 145 | value=numpy.zeros( 146 | (n_out,), 147 | dtype=theano.config.floatX 148 | ), 149 | name='b', 150 | borrow=borrow 151 | ) 152 | 153 | # model structure to compute matrix of classification probabilities - depending on how many classification options there are it will return the probabilities of belonging to each class 154 | # W - matrix & column-k represents each class 155 | # x - matrix & row-j represents input training samples 156 | # b - vector & element-k represents free parameter of hyper plain-k 157 | self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b) 158 | 159 | # finds the classification with the max probability 160 | self.y_pred = T.argmax(self.p_y_given_x, axis=1) 161 | 162 | # parameters/coefficients of the model 163 | self.params = [self.W, self.b] 164 | 165 | def negative_log_likelihood(self, y): 166 | """ Negative Log_likelihood Loss 167 | Loss function / cost function to minimize 168 | 169 | Return the mean of the negative log-likelihood of the prediction 170 | of this model under a given target distribution. 171 | 172 | y: theano tensor type & vector of correct labels for each example 173 | 174 | y.shape[0]: number (n) of examples in a minibatch 175 | T.arange(y.shape[0]) creates [0,1,2,... n-1] vector 176 | T.log(self.p_y_given_x): log-probabilities (LP) matrix one row per example and one column per class 177 | LP[T.arange(y.shape[0]),y]: minibatch vector containing [LP[0,y[0]], LP[1,y[1]], ..., - returns the log-probability of the correct label at that point in the matrix 178 | T.mean(LP[T.arange(y.shape[0]),y]): the mean log-likelihood across the minibatch 179 | 180 | Note: we use the mean instead of the sum so that 181 | the learning rate is less dependent on the batch size 182 | cross entropy is a good alternative loss function for softmax 183 | """ 184 | 185 | return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]) 186 | 187 | def errors(self, y): 188 | """Zero-One Loss 189 | Loss function or cost function to minimize 190 | 191 | Return a float representing the number of errors in the minibatch 192 | over the total number of examples of the minibatch ; zero one 193 | loss over the size of the minibatch 194 | 195 | y: theano tensor type & vector of correct labels for each example 196 | y_pred: ? 197 | 198 | Note: this error rate is extremely expensive to scale and thus negative log-likelihood is more prefered 199 | """ 200 | 201 | # check if y has same dimension of y_pred 202 | if y.ndim != self.y_pred.ndim: 203 | raise TypeError( 204 | 'y should have the same shape as self.y_pred', 205 | ('y', y.type, 'y_pred', self.y_pred.type) 206 | ) 207 | # check if y is of the correct datatype 208 | if y.dtype.startswith('int'): 209 | # the T.neq operator returns a vector of 0s and 1s, where 1 210 | # represents a mistake in prediction 211 | return T.mean(T.neq(self.y_pred, y)) 212 | else: 213 | raise NotImplementedError() 214 | 215 | 216 | class NeuralNet(object): 217 | ''' 218 | 219 | batch_size: splitting the dataset 220 | 221 | ''' 222 | def __init__(self, batch_size, datasets): 223 | self.batch_size = batch_size 224 | self.train_set_x, self.train_set_y = datasets[0] 225 | self.valid_set_x, self.valid_set_y = datasets[1] 226 | self.test_set_x, self.test_set_y = datasets[2] 227 | self.best_validation_loss = numpy.inf 228 | self.test_score = 0. 229 | self.epoch = 0 230 | self.comput_minibatches() 231 | 232 | def comput_minibatches(self): 233 | self.n_train_batches = self.train_set_x.get_value(borrow=True).shape[0] / self.batch_size 234 | self.n_valid_batches = self.valid_set_x.get_value(borrow=True).shape[0] / self.batch_size 235 | self.n_test_batches = self.test_set_x.get_value(borrow=True).shape[0] / self.batch_size 236 | 237 | 238 | def create_structure(self, learning_rate=0.13): 239 | ''' 240 | Define neural net structure | Theano functions to address structure 241 | 242 | Apply loss function and define optimization method to tune neural net weights. 243 | 244 | learning_rate: float offsets how much adjustment is made to weights (stochastic gradient factor) 245 | 246 | ''' 247 | # allocate symbolic variables for the data 248 | index = T.lscalar() # index to a [mini]batch 249 | 250 | # generate symbolic variables for input (x and y represent a 251 | # minibatch) 252 | x = T.matrix('x') # data, presented as rasterized images 253 | y = T.ivector('y') # labels, presented as 1D vector of [int] labels 254 | 255 | # Construct the logistic regression class 256 | # Each MNIST image has size 28*28 so 784 node input and 10 node ouput 257 | classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) 258 | 259 | # the cost we minimize during training is the negative log likelihood of 260 | # the model in symbolic format 261 | cost = classifier.negative_log_likelihood(y) 262 | 263 | # creating structure to compute the gradient of cost with respect to theta = (W,b) 264 | g_W = T.grad(cost=cost, wrt=classifier.W) 265 | g_b = T.grad(cost=cost, wrt=classifier.b) 266 | 267 | # specify how to update the parameters of the model as a list of 268 | # (variable, update expression) pairs. 269 | updates = [(classifier.W, classifier.W - learning_rate * g_W), 270 | (classifier.b, classifier.b - learning_rate * g_b)] 271 | 272 | 273 | # compiling a Theano function `train_model` that returns the cost, but in 274 | # the same time updates the parameter of the model based on the rules 275 | # defined in `updates` 276 | self.train_model = theano.function( 277 | inputs=[index], 278 | outputs=cost, # negative log likelihood to update the weights 279 | updates=updates, 280 | givens={ 281 | x: self.train_set_x[index * self.batch_size: (index + 1) * self.batch_size], 282 | y: self.train_set_y[index * self.batch_size: (index + 1) * self.batch_size] 283 | } 284 | ) 285 | 286 | # compiling a Theano function that computes the mistakes that are made by 287 | # the model on a minibatch 288 | 289 | self.validate_model = theano.function( 290 | inputs=[index], 291 | outputs=classifier.errors(y), 292 | givens={ 293 | x: self.valid_set_x[index * self.batch_size: (index + 1) * self.batch_size], 294 | y: self.valid_set_y[index * self.batch_size: (index + 1) * self.batch_size] 295 | } 296 | ) 297 | 298 | self.test_model = theano.function( 299 | inputs=[index], 300 | outputs=classifier.errors(y), # correct class / total for accuracy rate 301 | givens={ 302 | x: self.test_set_x[index * self.batch_size: (index + 1) * self.batch_size], 303 | y: self.test_set_y[index * self.batch_size: (index + 1) * self.batch_size] 304 | } 305 | ) 306 | 307 | 308 | def train_nn_model(self, n_epochs=1000): 309 | ''' 310 | Use stochastic gradient descent for an optimization approach of a log-linear model 311 | 312 | n_epochs: int max number of iterations / epochs to run the optimizer 313 | 314 | ''' 315 | # early-stopping parameters 316 | patience = 5000 # look as this many examples regardless 317 | patience_increase = 2 # wait this much longer when a new best is 318 | # found 319 | improvement_threshold = 0.995 # a relative improvement of this much is 320 | # considered significant 321 | validation_frequency = min(self.n_train_batches, patience / 2) 322 | # go through this many 323 | # minibatche before checking the network 324 | # on the validation set; in this case we 325 | # check every epoch 326 | done_looping = False 327 | 328 | start_time = time.clock() 329 | 330 | 331 | while (self.epoch < n_epochs) and (not done_looping): 332 | self.epoch = self.epoch + 1 333 | 334 | # 83 training baches = 83 loops 335 | for minibatch_index in xrange(self.n_train_batches): 336 | 337 | minibatch_avg_cost = self.train_model(minibatch_index) 338 | # iteration number 339 | iter = (self.epoch - 1) * self.n_train_batches + minibatch_index 340 | 341 | if (iter + 1) % validation_frequency == 0: 342 | # compute zero-one loss on validation set 343 | val_losses = [self.validate_model(i) for i in xrange(self.n_valid_batches)] 344 | this_validation_loss = numpy.mean(val_losses) 345 | 346 | print('epoch %i, minibatch %i/%i, validation error %f %%' % (self.epoch, minibatch_index + 1, self.n_train_batches, this_validation_loss * 100.)) 347 | 348 | # if we got the best validation score until now 349 | if this_validation_loss < self.best_validation_loss: 350 | 351 | # improve patience if loss improvement is good enough 352 | # if major improvements in error reduction, wait a lot longer before stopping 353 | if this_validation_loss < (self.best_validation_loss * improvement_threshold): 354 | patience = max(patience, iter * patience_increase) 355 | 356 | self.best_validation_loss = this_validation_loss 357 | 358 | self.test_score = self.evaluate_model() 359 | 360 | print(('epoch %i, minibatch %i/%i, test error of best model %f %%') % (self.epoch, minibatch_index + 1, self.n_train_batches, self.test_score * 100.)) 361 | 362 | # patience at a min is 5000 & this determines if it will stop 363 | if patience <= iter: 364 | done_looping = True 365 | break 366 | 367 | end_time = time.clock() 368 | 369 | print 'Optimization complete with best validation score of %f %%, with test performance %f %%' % (self.best_validation_loss * 100., self.test_score * 100.) 370 | print 'The code run for %d epochs, with %f epochs/sec' % (self.epoch, 1. * self.epoch / (end_time - start_time)) 371 | print >> sys.stderr, 'The code for file ' + os.path.split('__file__')[1] + ' ran for %.1fs' % ((end_time - start_time)) 372 | 373 | 374 | def evaluate_model(self): 375 | # test it on the test set 376 | test_losses = [self.test_model(i) for i in xrange(self.n_test_batches)] 377 | return numpy.mean(test_losses) 378 | 379 | 380 | 381 | def main(): 382 | ''' 383 | Runs the full program to train the model and then evaluate 384 | ''' 385 | 386 | dataset='data/mnist.pkl.gz' 387 | batch_size=600 388 | learning_rate=0.13 389 | n_epochs=1000 390 | 391 | # Load & split datasets 392 | print '... load and setup data' 393 | 394 | datasets = load_data(dataset) 395 | 396 | print '... building the model' 397 | 398 | nn = NeuralNet(batch_size, datasets) 399 | nn.create_structure(learning_rate) 400 | 401 | print '... training the model' 402 | 403 | nn.train_nn_model(n_epochs) 404 | 405 | 406 | 407 | 408 | if __name__ == '__main__': 409 | main() 410 | -------------------------------------------------------------------------------- /Check_Lasagne.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "worksheets": [ 3 | { 4 | "cells": [ 5 | { 6 | "cell_type": "markdown", 7 | "metadata": {}, 8 | "source": [ 9 | "## Lasagne Example" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### Manual Run" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "metadata": {}, 22 | "outputs": [], 23 | "input": [ 24 | "import cPickle as pickle\n", 25 | "import gzip\n", 26 | "import itertools\n", 27 | "import urllib\n", 28 | "\n", 29 | "import numpy as np\n", 30 | "import lasagne\n", 31 | "import theano\n", 32 | "import theano.tensor as T" 33 | ], 34 | "language": "python", 35 | "prompt_number": 1 36 | }, 37 | { 38 | "cell_type": "code", 39 | "metadata": {}, 40 | "outputs": [], 41 | "input": [ 42 | "DATA_URL = 'http://deeplearning.net/data/mnist/mnist.pkl.gz'\n", 43 | "DATA_FILENAME = 'data/mnist.pkl.gz'\n", 44 | "\n", 45 | "NUM_EPOCHS = 500\n", 46 | "BATCH_SIZE = 600\n", 47 | "NUM_HIDDEN_UNITS = 512\n", 48 | "LEARNING_RATE = 0.01\n", 49 | "MOMENTUM = 0.9" 50 | ], 51 | "language": "python", 52 | "prompt_number": 2 53 | }, 54 | { 55 | "cell_type": "code", 56 | "metadata": {}, 57 | "outputs": [], 58 | "input": [ 59 | "urllib.urlretrieve(DATA_URL, DATA_FILENAME)\n", 60 | "with gzip.open(DATA_FILENAME, 'rb') as f:\n", 61 | " data = pickle.load(f)" 62 | ], 63 | "language": "python", 64 | "prompt_number": 3 65 | }, 66 | { 67 | "cell_type": "code", 68 | "metadata": {}, 69 | "outputs": [], 70 | "input": [ 71 | "X_train, y_train = data[0]\n", 72 | "X_valid, y_valid = data[1]\n", 73 | "X_test, y_test = data[2]" 74 | ], 75 | "language": "python", 76 | "prompt_number": 4 77 | }, 78 | { 79 | "cell_type": "code", 80 | "metadata": {}, 81 | "outputs": [], 82 | "input": [ 83 | "dataset = dict(\n", 84 | " X_train=theano.shared(lasagne.utils.floatX(X_train)),\n", 85 | " y_train=T.cast(theano.shared(y_train), 'int32'),\n", 86 | " X_valid=theano.shared(lasagne.utils.floatX(X_valid)),\n", 87 | " y_valid=T.cast(theano.shared(y_valid), 'int32'),\n", 88 | " X_test=theano.shared(lasagne.utils.floatX(X_test)),\n", 89 | " y_test=T.cast(theano.shared(y_test), 'int32'),\n", 90 | " num_examples_train=X_train.shape[0],\n", 91 | " num_examples_valid=X_valid.shape[0],\n", 92 | " num_examples_test=X_test.shape[0],\n", 93 | " input_dim=X_train.shape[1],\n", 94 | " output_dim=10,\n", 95 | " )" 96 | ], 97 | "language": "python", 98 | "prompt_number": 5 99 | }, 100 | { 101 | "cell_type": "code", 102 | "metadata": {}, 103 | "outputs": [], 104 | "input": [ 105 | "input_dim = dataset[\"input_dim\"]\n", 106 | "output_dim = dataset[\"output_dim\"]\n", 107 | "batch_size=BATCH_SIZE\n", 108 | "num_hidden_units=NUM_HIDDEN_UNITS" 109 | ], 110 | "language": "python", 111 | "prompt_number": 6 112 | }, 113 | { 114 | "cell_type": "code", 115 | "metadata": {}, 116 | "outputs": [], 117 | "input": [ 118 | "l_in = lasagne.layers.InputLayer(\n", 119 | " shape=(batch_size, input_dim),\n", 120 | " )\n", 121 | "l_hidden1 = lasagne.layers.DenseLayer(\n", 122 | " l_in,\n", 123 | " num_units=num_hidden_units,\n", 124 | " nonlinearity=lasagne.nonlinearities.rectify,\n", 125 | " )\n", 126 | "l_hidden1_dropout = lasagne.layers.DropoutLayer(\n", 127 | " l_hidden1,\n", 128 | " p=0.5,\n", 129 | " )\n", 130 | "l_hidden2 = lasagne.layers.DenseLayer(\n", 131 | " l_hidden1_dropout,\n", 132 | " num_units=num_hidden_units,\n", 133 | " nonlinearity=lasagne.nonlinearities.rectify,\n", 134 | " )\n", 135 | "l_hidden2_dropout = lasagne.layers.DropoutLayer(\n", 136 | " l_hidden2,\n", 137 | " p=0.5,\n", 138 | " )\n", 139 | "l_out = lasagne.layers.DenseLayer(\n", 140 | " l_hidden2_dropout,\n", 141 | " num_units=output_dim,\n", 142 | " nonlinearity=lasagne.nonlinearities.softmax,\n", 143 | " )" 144 | ], 145 | "language": "python", 146 | "prompt_number": 7 147 | }, 148 | { 149 | "cell_type": "code", 150 | "metadata": {}, 151 | "outputs": [ 152 | { 153 | "output_type": "pyout", 154 | "prompt_number": 8, 155 | "text": [ 156 | "Softmax.0" 157 | ], 158 | "metadata": {} 159 | } 160 | ], 161 | "input": [ 162 | "l_out.get_output()" 163 | ], 164 | "language": "python", 165 | "prompt_number": 8 166 | }, 167 | { 168 | "cell_type": "code", 169 | "metadata": {}, 170 | "outputs": [], 171 | "input": [ 172 | "output_layer = l_out\n", 173 | "X_tensor_type=T.matrix\n", 174 | "learning_rate=LEARNING_RATE\n", 175 | "momentum=MOMENTUM" 176 | ], 177 | "language": "python", 178 | "prompt_number": 9 179 | }, 180 | { 181 | "cell_type": "code", 182 | "metadata": {}, 183 | "outputs": [], 184 | "input": [ 185 | "batch_index = T.iscalar('batch_index')\n", 186 | "X_batch = X_tensor_type('x')\n", 187 | "y_batch = T.ivector('y')\n", 188 | "batch_slice = slice(batch_index * batch_size, (batch_index + 1) * batch_size)" 189 | ], 190 | "language": "python", 191 | "prompt_number": 10 192 | }, 193 | { 194 | "cell_type": "code", 195 | "metadata": {}, 196 | "outputs": [], 197 | "input": [ 198 | "def loss(output):\n", 199 | " return -T.mean(T.log(output)[T.arange(y_batch.shape[0]), y_batch])" 200 | ], 201 | "language": "python", 202 | "prompt_number": 11 203 | }, 204 | { 205 | "cell_type": "code", 206 | "metadata": {}, 207 | "outputs": [], 208 | "input": [ 209 | "loss_train = loss(output_layer.get_output(X_batch))\n", 210 | "loss_eval = loss(output_layer.get_output(X_batch, deterministic=True))" 211 | ], 212 | "language": "python", 213 | "prompt_number": 12 214 | }, 215 | { 216 | "cell_type": "code", 217 | "metadata": {}, 218 | "outputs": [], 219 | "input": [ 220 | "pred = T.argmax(\n", 221 | " output_layer.get_output(X_batch, deterministic=True), axis=1)\n", 222 | "accuracy = T.mean(T.eq(pred, y_batch))" 223 | ], 224 | "language": "python", 225 | "prompt_number": 13 226 | }, 227 | { 228 | "cell_type": "code", 229 | "metadata": {}, 230 | "outputs": [ 231 | { 232 | "output_type": "stream", 233 | "stream": "stderr", 234 | "text": [ 235 | "/usr/local/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/Lasagne-0.1.dev0-py2.7.egg/lasagne/layers/helper.py:52: UserWarning: get_all_layers() has been changed to return layers in topological order. The former implementation is still available as get_all_layers_old(), but will be removed before the first release of Lasagne. To ignore this warning, use `warnings.filterwarnings('ignore', '.*topo.*')`.\n", 236 | " warnings.warn(\"get_all_layers() has been changed to return layers in \"\n" 237 | ] 238 | } 239 | ], 240 | "input": [ 241 | "all_params = lasagne.layers.get_all_params(output_layer)\n", 242 | "updates = lasagne.updates.nesterov_momentum(\n", 243 | " loss_train, all_params, learning_rate, momentum)" 244 | ], 245 | "language": "python", 246 | "prompt_number": 14 247 | }, 248 | { 249 | "cell_type": "code", 250 | "metadata": {}, 251 | "outputs": [], 252 | "input": [ 253 | "iter_train = theano.function(\n", 254 | " [batch_index], loss_train,\n", 255 | " updates=updates,\n", 256 | " givens={\n", 257 | " X_batch: dataset['X_train'][batch_slice],\n", 258 | " y_batch: dataset['y_train'][batch_slice],\n", 259 | " },\n", 260 | " )\n", 261 | "\n", 262 | "iter_valid = theano.function(\n", 263 | " [batch_index], [loss_eval, accuracy],\n", 264 | " givens={\n", 265 | " X_batch: dataset['X_valid'][batch_slice],\n", 266 | " y_batch: dataset['y_valid'][batch_slice],\n", 267 | " },\n", 268 | " )\n", 269 | "\n", 270 | "iter_test = theano.function(\n", 271 | " [batch_index], [loss_eval, accuracy],\n", 272 | " givens={\n", 273 | " X_batch: dataset['X_test'][batch_slice],\n", 274 | " y_batch: dataset['y_test'][batch_slice],\n", 275 | " },\n", 276 | " )" 277 | ], 278 | "language": "python", 279 | "prompt_number": 16 280 | }, 281 | { 282 | "cell_type": "code", 283 | "metadata": {}, 284 | "outputs": [], 285 | "input": [ 286 | "iter_funcs = dict(\n", 287 | " train=iter_train,\n", 288 | " valid=iter_valid,\n", 289 | " test=iter_test,\n", 290 | " )" 291 | ], 292 | "language": "python", 293 | "prompt_number": 17 294 | }, 295 | { 296 | "cell_type": "code", 297 | "metadata": {}, 298 | "outputs": [], 299 | "input": [ 300 | "num_batches_train = dataset['num_examples_train'] // batch_size\n", 301 | "num_batches_valid = dataset['num_examples_valid'] // batch_size\n", 302 | "num_batches_test = dataset['num_examples_test'] // batch_size" 303 | ], 304 | "language": "python", 305 | "prompt_number": 18 306 | }, 307 | { 308 | "cell_type": "code", 309 | "metadata": {}, 310 | "outputs": [], 311 | "input": [ 312 | "def train(iter_funcs, dataset, batch_size=BATCH_SIZE):\n", 313 | " num_batches_train = dataset['num_examples_train'] // batch_size\n", 314 | " num_batches_valid = dataset['num_examples_valid'] // batch_size\n", 315 | " num_batches_test = dataset['num_examples_test'] // batch_size\n", 316 | "\n", 317 | " for epoch in itertools.count(1):\n", 318 | " batch_train_losses = []\n", 319 | " for b in range(num_batches_train):\n", 320 | " batch_train_loss = iter_funcs['train'](b)\n", 321 | " batch_train_losses.append(batch_train_loss)\n", 322 | "\n", 323 | " avg_train_loss = np.mean(batch_train_losses)\n", 324 | "\n", 325 | " batch_valid_losses = []\n", 326 | " batch_valid_accuracies = []\n", 327 | " for b in range(num_batches_valid):\n", 328 | " batch_valid_loss, batch_valid_accuracy = iter_funcs['valid'](b)\n", 329 | " batch_valid_losses.append(batch_valid_loss)\n", 330 | " batch_valid_accuracies.append(batch_valid_accuracy)\n", 331 | "\n", 332 | " avg_valid_loss = np.mean(batch_valid_losses)\n", 333 | " avg_valid_accuracy = np.mean(batch_valid_accuracies)\n", 334 | "\n", 335 | " yield {\n", 336 | " 'number': epoch,\n", 337 | " 'train_loss': avg_train_loss,\n", 338 | " 'valid_loss': avg_valid_loss,\n", 339 | " 'valid_accuracy': avg_valid_accuracy,\n", 340 | " }" 341 | ], 342 | "language": "python", 343 | "prompt_number": 19 344 | }, 345 | { 346 | "cell_type": "code", 347 | "metadata": {}, 348 | "outputs": [], 349 | "input": [ 350 | "num_epochs = 10" 351 | ], 352 | "language": "python", 353 | "prompt_number": 20 354 | }, 355 | { 356 | "cell_type": "code", 357 | "metadata": {}, 358 | "outputs": [ 359 | { 360 | "output_type": "stream", 361 | "stream": "stdout", 362 | "text": [ 363 | "Epoch 1 of 10\n", 364 | " training loss:\t\t1.333131\n", 365 | " validation loss:\t\t0.462975\n", 366 | " validation accuracy:\t\t87.49 %\n", 367 | "Epoch 2 of 10" 368 | ] 369 | }, 370 | { 371 | "output_type": "stream", 372 | "stream": "stdout", 373 | "text": [ 374 | "\n", 375 | " training loss:\t\t0.586310\n", 376 | " validation loss:\t\t0.326112\n", 377 | " validation accuracy:\t\t90.57 %\n", 378 | "Epoch 3 of 10" 379 | ] 380 | }, 381 | { 382 | "output_type": "stream", 383 | "stream": "stdout", 384 | "text": [ 385 | "\n", 386 | " training loss:\t\t0.460305\n", 387 | " validation loss:\t\t0.279471\n", 388 | " validation accuracy:\t\t91.79 %\n", 389 | "Epoch 4 of 10" 390 | ] 391 | }, 392 | { 393 | "output_type": "stream", 394 | "stream": "stdout", 395 | "text": [ 396 | "\n", 397 | " training loss:\t\t0.401532\n", 398 | " validation loss:\t\t0.248660\n", 399 | " validation accuracy:\t\t92.58 %\n", 400 | "Epoch 5 of 10" 401 | ] 402 | }, 403 | { 404 | "output_type": "stream", 405 | "stream": "stdout", 406 | "text": [ 407 | "\n", 408 | " training loss:\t\t0.361652\n", 409 | " validation loss:\t\t0.225353\n", 410 | " validation accuracy:\t\t93.34 %\n", 411 | "Epoch 6 of 10" 412 | ] 413 | }, 414 | { 415 | "output_type": "stream", 416 | "stream": "stdout", 417 | "text": [ 418 | "\n", 419 | " training loss:\t\t0.330177\n", 420 | " validation loss:\t\t0.209012\n", 421 | " validation accuracy:\t\t93.83 %\n", 422 | "Epoch 7 of 10" 423 | ] 424 | }, 425 | { 426 | "output_type": "stream", 427 | "stream": "stdout", 428 | "text": [ 429 | "\n", 430 | " training loss:\t\t0.308497\n", 431 | " validation loss:\t\t0.194022\n", 432 | " validation accuracy:\t\t94.28 %\n", 433 | "Epoch 8 of 10" 434 | ] 435 | }, 436 | { 437 | "output_type": "stream", 438 | "stream": "stdout", 439 | "text": [ 440 | "\n", 441 | " training loss:\t\t0.289160\n", 442 | " validation loss:\t\t0.181664\n", 443 | " validation accuracy:\t\t94.70 %\n", 444 | "Epoch 9 of 10" 445 | ] 446 | }, 447 | { 448 | "output_type": "stream", 449 | "stream": "stdout", 450 | "text": [ 451 | "\n", 452 | " training loss:\t\t0.267791\n", 453 | " validation loss:\t\t0.170311\n", 454 | " validation accuracy:\t\t94.99 %\n", 455 | "Epoch 10 of 10" 456 | ] 457 | }, 458 | { 459 | "output_type": "stream", 460 | "stream": "stdout", 461 | "text": [ 462 | "\n", 463 | " training loss:\t\t0.252842\n", 464 | " validation loss:\t\t0.161186\n", 465 | " validation accuracy:\t\t95.34 %\n" 466 | ] 467 | } 468 | ], 469 | "input": [ 470 | "for epoch in train(iter_funcs, dataset):\n", 471 | " print(\"Epoch %d of %d\" % (epoch['number'], num_epochs))\n", 472 | " print(\" training loss:\\t\\t%.6f\" % epoch['train_loss'])\n", 473 | " print(\" validation loss:\\t\\t%.6f\" % epoch['valid_loss'])\n", 474 | " print(\" validation accuracy:\\t\\t%.2f %%\" %\n", 475 | " (epoch['valid_accuracy'] * 100))\n", 476 | "\n", 477 | " if epoch['number'] >= num_epochs:\n", 478 | " break\n", 479 | "\n", 480 | "result= output_layer" 481 | ], 482 | "language": "python", 483 | "prompt_number": 21 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "### Script Run" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "metadata": {}, 495 | "outputs": [], 496 | "input": [ 497 | "%load_ext autoreload" 498 | ], 499 | "language": "python", 500 | "prompt_number": 22 501 | }, 502 | { 503 | "cell_type": "code", 504 | "metadata": {}, 505 | "outputs": [], 506 | "input": [ 507 | "%autoreload 2" 508 | ], 509 | "language": "python", 510 | "prompt_number": 23 511 | }, 512 | { 513 | "cell_type": "code", 514 | "metadata": {}, 515 | "outputs": [], 516 | "input": [ 517 | "import lib.lasagne_mnist as lasagne_mnist" 518 | ], 519 | "language": "python", 520 | "prompt_number": 24 521 | }, 522 | { 523 | "cell_type": "code", 524 | "metadata": {}, 525 | "outputs": [ 526 | { 527 | "output_type": "stream", 528 | "stream": "stdout", 529 | "text": [ 530 | "Starting training...\n", 531 | "Epoch 1 of 10" 532 | ] 533 | }, 534 | { 535 | "output_type": "stream", 536 | "stream": "stdout", 537 | "text": [ 538 | "\n", 539 | " training loss:\t\t1.337511\n", 540 | " validation loss:\t\t0.453990\n", 541 | " validation accuracy:\t\t87.86 %\n", 542 | "Epoch 2 of 10" 543 | ] 544 | }, 545 | { 546 | "output_type": "stream", 547 | "stream": "stdout", 548 | "text": [ 549 | "\n", 550 | " training loss:\t\t0.580036\n", 551 | " validation loss:\t\t0.324367\n", 552 | " validation accuracy:\t\t90.60 %\n", 553 | "Epoch 3 of 10" 554 | ] 555 | }, 556 | { 557 | "output_type": "stream", 558 | "stream": "stdout", 559 | "text": [ 560 | "\n", 561 | " training loss:\t\t0.463588\n", 562 | " validation loss:\t\t0.276408\n", 563 | " validation accuracy:\t\t91.95 %\n", 564 | "Epoch 4 of 10" 565 | ] 566 | }, 567 | { 568 | "output_type": "stream", 569 | "stream": "stdout", 570 | "text": [ 571 | "\n", 572 | " training loss:\t\t0.398257\n", 573 | " validation loss:\t\t0.247057\n", 574 | " validation accuracy:\t\t92.80 %\n", 575 | "Epoch 5 of 10" 576 | ] 577 | }, 578 | { 579 | "output_type": "stream", 580 | "stream": "stdout", 581 | "text": [ 582 | "\n", 583 | " training loss:\t\t0.359415\n", 584 | " validation loss:\t\t0.225045\n", 585 | " validation accuracy:\t\t93.38 %\n", 586 | "Epoch 6 of 10" 587 | ] 588 | }, 589 | { 590 | "output_type": "stream", 591 | "stream": "stdout", 592 | "text": [ 593 | "\n", 594 | " training loss:\t\t0.330612\n", 595 | " validation loss:\t\t0.207095\n", 596 | " validation accuracy:\t\t93.85 %\n", 597 | "Epoch 7 of 10" 598 | ] 599 | }, 600 | { 601 | "output_type": "stream", 602 | "stream": "stdout", 603 | "text": [ 604 | "\n", 605 | " training loss:\t\t0.309475\n", 606 | " validation loss:\t\t0.192656\n", 607 | " validation accuracy:\t\t94.33 %\n", 608 | "Epoch 8 of 10" 609 | ] 610 | }, 611 | { 612 | "output_type": "stream", 613 | "stream": "stdout", 614 | "text": [ 615 | "\n", 616 | " training loss:\t\t0.285780\n", 617 | " validation loss:\t\t0.179702\n", 618 | " validation accuracy:\t\t94.79 %\n", 619 | "Epoch 9 of 10" 620 | ] 621 | }, 622 | { 623 | "output_type": "stream", 624 | "stream": "stdout", 625 | "text": [ 626 | "\n", 627 | " training loss:\t\t0.268906\n", 628 | " validation loss:\t\t0.170092\n", 629 | " validation accuracy:\t\t95.00 %\n", 630 | "Epoch 10 of 10" 631 | ] 632 | }, 633 | { 634 | "output_type": "stream", 635 | "stream": "stdout", 636 | "text": [ 637 | "\n", 638 | " training loss:\t\t0.253436\n", 639 | " validation loss:\t\t0.160809\n", 640 | " validation accuracy:\t\t95.28 %\n" 641 | ] 642 | }, 643 | { 644 | "output_type": "pyout", 645 | "prompt_number": 25, 646 | "text": [ 647 | "" 648 | ], 649 | "metadata": {} 650 | } 651 | ], 652 | "input": [ 653 | "lasagne_mnist.main(10)" 654 | ], 655 | "language": "python", 656 | "prompt_number": 25 657 | }, 658 | { 659 | "cell_type": "code", 660 | "metadata": {}, 661 | "outputs": [], 662 | "input": [ 663 | "" 664 | ], 665 | "language": "python" 666 | } 667 | ] 668 | } 669 | ], 670 | "cells": [], 671 | "metadata": { 672 | "name": "", 673 | "signature": "sha256:7ab012cb829fff3f88b65491a6ea053c0247bb77de6ca610c1027e99fb6879ec" 674 | }, 675 | "nbformat": 3, 676 | "nbformat_minor": 0 677 | } -------------------------------------------------------------------------------- /Check_Graphlab.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:b3441e92f924f3c708ace0febae1dfbf2b871ef11fdc87241c5046cadc08f366" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "##Graphlab Example" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### Manual Run" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "collapsed": false, 28 | "input": [ 29 | "import graphlab" 30 | ], 31 | "language": "python", 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "output_type": "stream", 36 | "stream": "stderr", 37 | "text": [ 38 | "/Library/Python/2.7/site-packages/pandas/io/excel.py:626: UserWarning: Installed openpyxl is not supported at this time. Use >=1.6.1 and <2.0.0.\n", 39 | " .format(openpyxl_compat.start_ver, openpyxl_compat.stop_ver))\n" 40 | ] 41 | } 42 | ], 43 | "prompt_number": 1 44 | }, 45 | { 46 | "cell_type": "code", 47 | "collapsed": false, 48 | "input": [ 49 | "train_data = graphlab.SFrame('http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train')\n", 50 | "test_data = graphlab.SFrame('http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test')" 51 | ], 52 | "language": "python", 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "output_type": "stream", 57 | "stream": "stderr", 58 | "text": [ 59 | "[INFO] Start server at: ipc:///tmp/graphlab_server-3900 - Server binary: /usr/local/lib/python2.7/site-packages/graphlab/unity_server - Server log: /tmp/graphlab_server_1427672424.log\n" 60 | ] 61 | }, 62 | { 63 | "output_type": "stream", 64 | "stream": "stderr", 65 | "text": [ 66 | "[INFO] GraphLab Server Version: 1.3.0\n" 67 | ] 68 | }, 69 | { 70 | "html": [ 71 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/dir_archive.ini to /var/tmp/graphlab-mwarrick/3900/000000.ini
" 72 | ], 73 | "metadata": {}, 74 | "output_type": "display_data", 75 | "text": [ 76 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/dir_archive.ini to /var/tmp/graphlab-mwarrick/3900/000000.ini" 77 | ] 78 | }, 79 | { 80 | "html": [ 81 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/objects.bin to /var/tmp/graphlab-mwarrick/3900/000001.bin
" 82 | ], 83 | "metadata": {}, 84 | "output_type": "display_data", 85 | "text": [ 86 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/objects.bin to /var/tmp/graphlab-mwarrick/3900/000001.bin" 87 | ] 88 | }, 89 | { 90 | "html": [ 91 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.frame_idx to /var/tmp/graphlab-mwarrick/3900/000002.frame_idx
" 92 | ], 93 | "metadata": {}, 94 | "output_type": "display_data", 95 | "text": [ 96 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.frame_idx to /var/tmp/graphlab-mwarrick/3900/000002.frame_idx" 97 | ] 98 | }, 99 | { 100 | "html": [ 101 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.sidx to /var/tmp/graphlab-mwarrick/3900/000003.sidx
" 102 | ], 103 | "metadata": {}, 104 | "output_type": "display_data", 105 | "text": [ 106 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.sidx to /var/tmp/graphlab-mwarrick/3900/000003.sidx" 107 | ] 108 | }, 109 | { 110 | "html": [ 111 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0000 to /var/tmp/graphlab-mwarrick/3900/000004.0000
" 112 | ], 113 | "metadata": {}, 114 | "output_type": "display_data", 115 | "text": [ 116 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0000 to /var/tmp/graphlab-mwarrick/3900/000004.0000" 117 | ] 118 | }, 119 | { 120 | "html": [ 121 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0001 to /var/tmp/graphlab-mwarrick/3900/000005.0001
" 122 | ], 123 | "metadata": {}, 124 | "output_type": "display_data", 125 | "text": [ 126 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0001 to /var/tmp/graphlab-mwarrick/3900/000005.0001" 127 | ] 128 | }, 129 | { 130 | "html": [ 131 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0002 to /var/tmp/graphlab-mwarrick/3900/000006.0002
" 132 | ], 133 | "metadata": {}, 134 | "output_type": "display_data", 135 | "text": [ 136 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0002 to /var/tmp/graphlab-mwarrick/3900/000006.0002" 137 | ] 138 | }, 139 | { 140 | "html": [ 141 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0003 to /var/tmp/graphlab-mwarrick/3900/000007.0003
" 142 | ], 143 | "metadata": {}, 144 | "output_type": "display_data", 145 | "text": [ 146 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0003 to /var/tmp/graphlab-mwarrick/3900/000007.0003" 147 | ] 148 | }, 149 | { 150 | "html": [ 151 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0004 to /var/tmp/graphlab-mwarrick/3900/000008.0004
" 152 | ], 153 | "metadata": {}, 154 | "output_type": "display_data", 155 | "text": [ 156 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0004 to /var/tmp/graphlab-mwarrick/3900/000008.0004" 157 | ] 158 | }, 159 | { 160 | "html": [ 161 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0005 to /var/tmp/graphlab-mwarrick/3900/000009.0005
" 162 | ], 163 | "metadata": {}, 164 | "output_type": "display_data", 165 | "text": [ 166 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0005 to /var/tmp/graphlab-mwarrick/3900/000009.0005" 167 | ] 168 | }, 169 | { 170 | "html": [ 171 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0006 to /var/tmp/graphlab-mwarrick/3900/000010.0006
" 172 | ], 173 | "metadata": {}, 174 | "output_type": "display_data", 175 | "text": [ 176 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0006 to /var/tmp/graphlab-mwarrick/3900/000010.0006" 177 | ] 178 | }, 179 | { 180 | "html": [ 181 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0007 to /var/tmp/graphlab-mwarrick/3900/000011.0007
" 182 | ], 183 | "metadata": {}, 184 | "output_type": "display_data", 185 | "text": [ 186 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/train/m_4558118e.0007 to /var/tmp/graphlab-mwarrick/3900/000011.0007" 187 | ] 188 | }, 189 | { 190 | "html": [ 191 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/dir_archive.ini to /var/tmp/graphlab-mwarrick/3900/000012.ini
" 192 | ], 193 | "metadata": {}, 194 | "output_type": "display_data", 195 | "text": [ 196 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/dir_archive.ini to /var/tmp/graphlab-mwarrick/3900/000012.ini" 197 | ] 198 | }, 199 | { 200 | "html": [ 201 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/objects.bin to /var/tmp/graphlab-mwarrick/3900/000013.bin
" 202 | ], 203 | "metadata": {}, 204 | "output_type": "display_data", 205 | "text": [ 206 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/objects.bin to /var/tmp/graphlab-mwarrick/3900/000013.bin" 207 | ] 208 | }, 209 | { 210 | "html": [ 211 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.frame_idx to /var/tmp/graphlab-mwarrick/3900/000014.frame_idx
" 212 | ], 213 | "metadata": {}, 214 | "output_type": "display_data", 215 | "text": [ 216 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.frame_idx to /var/tmp/graphlab-mwarrick/3900/000014.frame_idx" 217 | ] 218 | }, 219 | { 220 | "html": [ 221 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.sidx to /var/tmp/graphlab-mwarrick/3900/000015.sidx
" 222 | ], 223 | "metadata": {}, 224 | "output_type": "display_data", 225 | "text": [ 226 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.sidx to /var/tmp/graphlab-mwarrick/3900/000015.sidx" 227 | ] 228 | }, 229 | { 230 | "html": [ 231 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0000 to /var/tmp/graphlab-mwarrick/3900/000016.0000
" 232 | ], 233 | "metadata": {}, 234 | "output_type": "display_data", 235 | "text": [ 236 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0000 to /var/tmp/graphlab-mwarrick/3900/000016.0000" 237 | ] 238 | }, 239 | { 240 | "html": [ 241 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0001 to /var/tmp/graphlab-mwarrick/3900/000017.0001
" 242 | ], 243 | "metadata": {}, 244 | "output_type": "display_data", 245 | "text": [ 246 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0001 to /var/tmp/graphlab-mwarrick/3900/000017.0001" 247 | ] 248 | }, 249 | { 250 | "html": [ 251 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0002 to /var/tmp/graphlab-mwarrick/3900/000018.0002
" 252 | ], 253 | "metadata": {}, 254 | "output_type": "display_data", 255 | "text": [ 256 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0002 to /var/tmp/graphlab-mwarrick/3900/000018.0002" 257 | ] 258 | }, 259 | { 260 | "html": [ 261 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0003 to /var/tmp/graphlab-mwarrick/3900/000019.0003
" 262 | ], 263 | "metadata": {}, 264 | "output_type": "display_data", 265 | "text": [ 266 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0003 to /var/tmp/graphlab-mwarrick/3900/000019.0003" 267 | ] 268 | }, 269 | { 270 | "html": [ 271 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0004 to /var/tmp/graphlab-mwarrick/3900/000020.0004
" 272 | ], 273 | "metadata": {}, 274 | "output_type": "display_data", 275 | "text": [ 276 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0004 to /var/tmp/graphlab-mwarrick/3900/000020.0004" 277 | ] 278 | }, 279 | { 280 | "html": [ 281 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0005 to /var/tmp/graphlab-mwarrick/3900/000021.0005
" 282 | ], 283 | "metadata": {}, 284 | "output_type": "display_data", 285 | "text": [ 286 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0005 to /var/tmp/graphlab-mwarrick/3900/000021.0005" 287 | ] 288 | }, 289 | { 290 | "html": [ 291 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0006 to /var/tmp/graphlab-mwarrick/3900/000022.0006
" 292 | ], 293 | "metadata": {}, 294 | "output_type": "display_data", 295 | "text": [ 296 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0006 to /var/tmp/graphlab-mwarrick/3900/000022.0006" 297 | ] 298 | }, 299 | { 300 | "html": [ 301 | "
PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0007 to /var/tmp/graphlab-mwarrick/3900/000023.0007
" 302 | ], 303 | "metadata": {}, 304 | "output_type": "display_data", 305 | "text": [ 306 | "PROGRESS: Downloading http://s3.amazonaws.com/GraphLab-Datasets/mnist/sframe/test/m_310c50b3.0007 to /var/tmp/graphlab-mwarrick/3900/000023.0007" 307 | ] 308 | } 309 | ], 310 | "prompt_number": 2 311 | }, 312 | { 313 | "cell_type": "code", 314 | "collapsed": false, 315 | "input": [ 316 | "print train_data.shape" 317 | ], 318 | "language": "python", 319 | "metadata": {}, 320 | "outputs": [ 321 | { 322 | "output_type": "stream", 323 | "stream": "stdout", 324 | "text": [ 325 | "(60000, 2)\n" 326 | ] 327 | } 328 | ], 329 | "prompt_number": 3 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "*Sample image data & label data - show structure*" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "collapsed": false, 341 | "input": [ 342 | "print train_data['image'][0]\n", 343 | "print train_data['label'][0]" 344 | ], 345 | "language": "python", 346 | "metadata": {}, 347 | "outputs": [ 348 | { 349 | "output_type": "stream", 350 | "stream": "stdout", 351 | "text": [ 352 | "Height: 28px\n", 353 | "Width: 28px\n", 354 | "Channels: 1\n", 355 | "\n", 356 | "5" 357 | ] 358 | }, 359 | { 360 | "output_type": "stream", 361 | "stream": "stdout", 362 | "text": [ 363 | "\n" 364 | ] 365 | } 366 | ], 367 | "prompt_number": 4 368 | }, 369 | { 370 | "cell_type": "code", 371 | "collapsed": false, 372 | "input": [ 373 | "model = graphlab.neuralnet_classifier.create(train_data, target='label', max_iterations=3)" 374 | ], 375 | "language": "python", 376 | "metadata": {}, 377 | "outputs": [ 378 | { 379 | "html": [ 380 | "
PROGRESS: Computing mean image...
" 381 | ], 382 | "metadata": {}, 383 | "output_type": "display_data", 384 | "text": [ 385 | "PROGRESS: Computing mean image..." 386 | ] 387 | }, 388 | { 389 | "html": [ 390 | "
PROGRESS: Done computing mean image.
" 391 | ], 392 | "metadata": {}, 393 | "output_type": "display_data", 394 | "text": [ 395 | "PROGRESS: Done computing mean image." 396 | ] 397 | }, 398 | { 399 | "html": [ 400 | "
PROGRESS: Creating neuralnet using device = cpu
" 401 | ], 402 | "metadata": {}, 403 | "output_type": "display_data", 404 | "text": [ 405 | "PROGRESS: Creating neuralnet using device = cpu" 406 | ] 407 | }, 408 | { 409 | "html": [ 410 | "
PROGRESS: +-----------+----------+--------------+-------------------+---------------------+-----------------+
" 411 | ], 412 | "metadata": {}, 413 | "output_type": "display_data", 414 | "text": [ 415 | "PROGRESS: +-----------+----------+--------------+-------------------+---------------------+-----------------+" 416 | ] 417 | }, 418 | { 419 | "html": [ 420 | "
PROGRESS: | Iteration | Examples | Elapsed Time | Training-accuracy | Validation-accuracy | Examples/second |
" 421 | ], 422 | "metadata": {}, 423 | "output_type": "display_data", 424 | "text": [ 425 | "PROGRESS: | Iteration | Examples | Elapsed Time | Training-accuracy | Validation-accuracy | Examples/second |" 426 | ] 427 | }, 428 | { 429 | "html": [ 430 | "
PROGRESS: +-----------+----------+--------------+-------------------+---------------------+-----------------+
" 431 | ], 432 | "metadata": {}, 433 | "output_type": "display_data", 434 | "text": [ 435 | "PROGRESS: +-----------+----------+--------------+-------------------+---------------------+-----------------+" 436 | ] 437 | }, 438 | { 439 | "html": [ 440 | "
PROGRESS: | 1         | 29200    | 10.020997    | 0.700103          |                     | 2913.883057     |
" 441 | ], 442 | "metadata": {}, 443 | "output_type": "display_data", 444 | "text": [ 445 | "PROGRESS: | 1 | 29200 | 10.020997 | 0.700103 | | 2913.883057 |" 446 | ] 447 | }, 448 | { 449 | "html": [ 450 | "
PROGRESS: | 1         | 57100    | 20.254615    | 0.791384          | 0.943333            | 2726.308105     |
" 451 | ], 452 | "metadata": {}, 453 | "output_type": "display_data", 454 | "text": [ 455 | "PROGRESS: | 1 | 57100 | 20.254615 | 0.791384 | 0.943333 | 2726.308105 |" 456 | ] 457 | }, 458 | { 459 | "html": [ 460 | "
PROGRESS: | 2         | 28300    | 30.276667    | 0.910636          |                     | 2823.829590     |
" 461 | ], 462 | "metadata": {}, 463 | "output_type": "display_data", 464 | "text": [ 465 | "PROGRESS: | 2 | 28300 | 30.276667 | 0.910636 | | 2823.829590 |" 466 | ] 467 | }, 468 | { 469 | "html": [ 470 | "
PROGRESS: | 2         | 57000    | 40.527586    | 0.915316          | 0.957333            | 2799.746826     |
" 471 | ], 472 | "metadata": {}, 473 | "output_type": "display_data", 474 | "text": [ 475 | "PROGRESS: | 2 | 57000 | 40.527586 | 0.915316 | 0.957333 | 2799.746826 |" 476 | ] 477 | }, 478 | { 479 | "html": [ 480 | "
PROGRESS: | 3         | 30500    | 50.548510    | 0.928951          |                     | 3043.704102     |
" 481 | ], 482 | "metadata": {}, 483 | "output_type": "display_data", 484 | "text": [ 485 | "PROGRESS: | 3 | 30500 | 50.548510 | 0.928951 | | 3043.704102 |" 486 | ] 487 | }, 488 | { 489 | "html": [ 490 | "
PROGRESS: | 3         | 57000    | 60.089661    | 0.931614          | 0.957333            | 2777.440674     |
" 491 | ], 492 | "metadata": {}, 493 | "output_type": "display_data", 494 | "text": [ 495 | "PROGRESS: | 3 | 57000 | 60.089661 | 0.931614 | 0.957333 | 2777.440674 |" 496 | ] 497 | }, 498 | { 499 | "html": [ 500 | "
PROGRESS: +-----------+----------+--------------+-------------------+---------------------+-----------------+
" 501 | ], 502 | "metadata": {}, 503 | "output_type": "display_data", 504 | "text": [ 505 | "PROGRESS: +-----------+----------+--------------+-------------------+---------------------+-----------------+" 506 | ] 507 | }, 508 | { 509 | "output_type": "stream", 510 | "stream": "stdout", 511 | "text": [ 512 | "Using network:\n", 513 | "\n", 514 | "### network layers ###\n", 515 | "layer[0]: ConvolutionLayer\n", 516 | " init_random = gaussian\n", 517 | " padding = 0\n", 518 | " stride = 2\n", 519 | " num_channels = 10\n", 520 | " num_groups = 1\n", 521 | " kernel_size = 3\n", 522 | "layer[1]: MaxPoolingLayer\n", 523 | " padding = 0\n", 524 | " stride = 2\n", 525 | " kernel_size = 3\n", 526 | "layer[2]: FlattenLayer\n", 527 | "layer[3]: FullConnectionLayer\n", 528 | " init_sigma = 0.01\n", 529 | " init_random = gaussian\n", 530 | " init_bias = 0\n", 531 | " num_hidden_units = 100\n", 532 | "layer[4]: RectifiedLinearLayer\n", 533 | "layer[5]: DropoutLayer\n", 534 | " threshold = 0.5\n", 535 | "layer[6]: FullConnectionLayer\n", 536 | " init_sigma = 0.01\n", 537 | " init_random = gaussian\n", 538 | " init_bias = 0\n", 539 | " num_hidden_units = 10\n", 540 | "layer[7]: SoftmaxLayer\n", 541 | "### end network layers ###\n", 542 | "\n", 543 | "### network parameters ###\n", 544 | "learning_rate = 0.001\n", 545 | "momentum = 0.9\n", 546 | "### end network parameters ###\n", 547 | "\n", 548 | "PROGRESS: Creating a validation set from 5 percent of training data. This may take a while.\n", 549 | " You can set ``validation_set=None`` to disable validation tracking.\n", 550 | "\n" 551 | ] 552 | } 553 | ], 554 | "prompt_number": 5 555 | }, 556 | { 557 | "cell_type": "code", 558 | "collapsed": false, 559 | "input": [ 560 | "eval_ = model.evaluate(test_data, metric=['accuracy', 'confusion_matrix'])" 561 | ], 562 | "language": "python", 563 | "metadata": {}, 564 | "outputs": [], 565 | "prompt_number": 8 566 | }, 567 | { 568 | "cell_type": "code", 569 | "collapsed": false, 570 | "input": [ 571 | "eval_" 572 | ], 573 | "language": "python", 574 | "metadata": {}, 575 | "outputs": [ 576 | { 577 | "metadata": {}, 578 | "output_type": "pyout", 579 | "prompt_number": 9, 580 | "text": [ 581 | "{'accuracy': 0.97079998254776, 'confusion_matrix': Columns:\n", 582 | " \ttarget_label\tint\n", 583 | " \tpredicted_label\tint\n", 584 | " \tcount\tint\n", 585 | " \n", 586 | " Rows: 73\n", 587 | " \n", 588 | " Data:\n", 589 | " +--------------+-----------------+-------+\n", 590 | " | target_label | predicted_label | count |\n", 591 | " +--------------+-----------------+-------+\n", 592 | " | 6 | 6 | 923 |\n", 593 | " | 8 | 6 | 4 |\n", 594 | " | 0 | 6 | 3 |\n", 595 | " | 5 | 6 | 3 |\n", 596 | " | 4 | 6 | 2 |\n", 597 | " | 1 | 6 | 2 |\n", 598 | " | 6 | 8 | 2 |\n", 599 | " | 8 | 8 | 926 |\n", 600 | " | 0 | 8 | 2 |\n", 601 | " | 3 | 8 | 3 |\n", 602 | " | ... | ... | ... |\n", 603 | " +--------------+-----------------+-------+\n", 604 | " [73 rows x 3 columns]\n", 605 | " Note: Only the head of the SFrame is printed.\n", 606 | " You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.}" 607 | ] 608 | } 609 | ], 610 | "prompt_number": 9 611 | }, 612 | { 613 | "cell_type": "code", 614 | "collapsed": false, 615 | "input": [ 616 | "cf_mat = eval_['confusion_matrix']" 617 | ], 618 | "language": "python", 619 | "metadata": {}, 620 | "outputs": [], 621 | "prompt_number": 10 622 | }, 623 | { 624 | "cell_type": "code", 625 | "collapsed": false, 626 | "input": [ 627 | "cf_mat" 628 | ], 629 | "language": "python", 630 | "metadata": {}, 631 | "outputs": [ 632 | { 633 | "html": [ 634 | "
\n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | "
target_labelpredicted_labelcount
66923
864
063
563
462
162
682
88926
082
383
.........
\n", 696 | "[73 rows x 3 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", 697 | "
" 698 | ], 699 | "metadata": {}, 700 | "output_type": "pyout", 701 | "prompt_number": 11, 702 | "text": [ 703 | "Columns:\n", 704 | "\ttarget_label\tint\n", 705 | "\tpredicted_label\tint\n", 706 | "\tcount\tint\n", 707 | "\n", 708 | "Rows: 73\n", 709 | "\n", 710 | "Data:\n", 711 | "+--------------+-----------------+-------+\n", 712 | "| target_label | predicted_label | count |\n", 713 | "+--------------+-----------------+-------+\n", 714 | "| 6 | 6 | 923 |\n", 715 | "| 8 | 6 | 4 |\n", 716 | "| 0 | 6 | 3 |\n", 717 | "| 5 | 6 | 3 |\n", 718 | "| 4 | 6 | 2 |\n", 719 | "| 1 | 6 | 2 |\n", 720 | "| 6 | 8 | 2 |\n", 721 | "| 8 | 8 | 926 |\n", 722 | "| 0 | 8 | 2 |\n", 723 | "| 3 | 8 | 3 |\n", 724 | "| ... | ... | ... |\n", 725 | "+--------------+-----------------+-------+\n", 726 | "[73 rows x 3 columns]\n", 727 | "Note: Only the head of the SFrame is printed.\n", 728 | "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." 729 | ] 730 | } 731 | ], 732 | "prompt_number": 11 733 | }, 734 | { 735 | "cell_type": "markdown", 736 | "metadata": {}, 737 | "source": [ 738 | "*Confusion Matrix Correct Predictions*" 739 | ] 740 | }, 741 | { 742 | "cell_type": "code", 743 | "collapsed": false, 744 | "input": [ 745 | "cf_mat[cf_mat['target_label'] == cf_mat['predicted_label']].groupby('target_label', graphlab.aggregate.SUM('count')).sort('target_label')" 746 | ], 747 | "language": "python", 748 | "metadata": {}, 749 | "outputs": [ 750 | { 751 | "html": [ 752 | "
\n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | "
target_labelSum of count
0970
11126
2995
3990
4970
5870
6923
7997
8926
9941
\n", 798 | "[10 rows x 2 columns]
\n", 799 | "
" 800 | ], 801 | "metadata": {}, 802 | "output_type": "pyout", 803 | "prompt_number": 14, 804 | "text": [ 805 | "Columns:\n", 806 | "\ttarget_label\tint\n", 807 | "\tSum of count\tint\n", 808 | "\n", 809 | "Rows: 10\n", 810 | "\n", 811 | "Data:\n", 812 | "+--------------+--------------+\n", 813 | "| target_label | Sum of count |\n", 814 | "+--------------+--------------+\n", 815 | "| 0 | 970 |\n", 816 | "| 1 | 1126 |\n", 817 | "| 2 | 995 |\n", 818 | "| 3 | 990 |\n", 819 | "| 4 | 970 |\n", 820 | "| 5 | 870 |\n", 821 | "| 6 | 923 |\n", 822 | "| 7 | 997 |\n", 823 | "| 8 | 926 |\n", 824 | "| 9 | 941 |\n", 825 | "+--------------+--------------+\n", 826 | "[10 rows x 2 columns]" 827 | ] 828 | } 829 | ], 830 | "prompt_number": 14 831 | }, 832 | { 833 | "cell_type": "markdown", 834 | "metadata": {}, 835 | "source": [ 836 | "*Confusion Matrix Prediction Mistakes*" 837 | ] 838 | }, 839 | { 840 | "cell_type": "code", 841 | "collapsed": false, 842 | "input": [ 843 | "cf_mat[cf_mat['target_label'] != cf_mat['predicted_label']].groupby('target_label', graphlab.aggregate.SUM('count')).sort('target_label')" 844 | ], 845 | "language": "python", 846 | "metadata": {}, 847 | "outputs": [ 848 | { 849 | "html": [ 850 | "
\n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | "
target_labelSum of count
010
19
237
320
412
522
635
731
848
968
\n", 896 | "[10 rows x 2 columns]
\n", 897 | "
" 898 | ], 899 | "metadata": {}, 900 | "output_type": "pyout", 901 | "prompt_number": 13, 902 | "text": [ 903 | "Columns:\n", 904 | "\ttarget_label\tint\n", 905 | "\tSum of count\tint\n", 906 | "\n", 907 | "Rows: 10\n", 908 | "\n", 909 | "Data:\n", 910 | "+--------------+--------------+\n", 911 | "| target_label | Sum of count |\n", 912 | "+--------------+--------------+\n", 913 | "| 0 | 10 |\n", 914 | "| 1 | 9 |\n", 915 | "| 2 | 37 |\n", 916 | "| 3 | 20 |\n", 917 | "| 4 | 12 |\n", 918 | "| 5 | 22 |\n", 919 | "| 6 | 35 |\n", 920 | "| 7 | 31 |\n", 921 | "| 8 | 48 |\n", 922 | "| 9 | 68 |\n", 923 | "+--------------+--------------+\n", 924 | "[10 rows x 2 columns]" 925 | ] 926 | } 927 | ], 928 | "prompt_number": 13 929 | }, 930 | { 931 | "cell_type": "markdown", 932 | "metadata": {}, 933 | "source": [ 934 | "### Script Run" 935 | ] 936 | }, 937 | { 938 | "cell_type": "code", 939 | "collapsed": false, 940 | "input": [ 941 | "%load_ext autoreload" 942 | ], 943 | "language": "python", 944 | "metadata": {}, 945 | "outputs": [], 946 | "prompt_number": 1 947 | }, 948 | { 949 | "cell_type": "code", 950 | "collapsed": false, 951 | "input": [ 952 | "%autoreload 2" 953 | ], 954 | "language": "python", 955 | "metadata": {}, 956 | "outputs": [], 957 | "prompt_number": 2 958 | }, 959 | { 960 | "cell_type": "code", 961 | "collapsed": false, 962 | "input": [ 963 | "import lib.graphlab_mnist as gl_mnist" 964 | ], 965 | "language": "python", 966 | "metadata": {}, 967 | "outputs": [], 968 | "prompt_number": 5 969 | }, 970 | { 971 | "cell_type": "code", 972 | "collapsed": false, 973 | "input": [ 974 | "gl_mnist.main()" 975 | ], 976 | "language": "python", 977 | "metadata": {}, 978 | "outputs": [ 979 | { 980 | "output_type": "stream", 981 | "stream": "stdout", 982 | "text": [ 983 | "NN layer details: layer[0]: ConvolutionLayer\n", 984 | " init_random = gaussian\n", 985 | " random_type = xavier\n", 986 | " padding = 1\n", 987 | " stride = 2\n", 988 | " num_channels = 32\n", 989 | " num_groups = 1\n", 990 | " kernel_size = 3\n", 991 | "layer[1]: MaxPoolingLayer\n", 992 | " padding = 0\n", 993 | " stride = 2\n", 994 | " kernel_size = 3\n", 995 | "layer[2]: FlattenLayer\n", 996 | "layer[3]: DropoutLayer\n", 997 | " threshold = 0.5\n", 998 | "layer[4]: FullConnectionLayer\n", 999 | " init_sigma = 0.01\n", 1000 | " init_random = gaussian\n", 1001 | " init_bias = 0\n", 1002 | " num_hidden_units = 100\n", 1003 | "layer[5]: SigmoidLayer\n", 1004 | "layer[6]: FullConnectionLayer\n", 1005 | " init_sigma = 0.01\n", 1006 | " init_random = gaussian\n", 1007 | " init_bias = 0\n", 1008 | " num_hidden_units = 10\n", 1009 | "layer[7]: SoftmaxLayer\n", 1010 | "NN hyper parameters summary: {'init_random': 'gaussian', 'learning_rate': 0.1, 'input_shape': '1,28,28', 'batch_size': 100, 'divideby': 255, 'l2_regularization': 0.0, 'momentum': 0.9}\n", 1011 | "Using network:\n", 1012 | "\n", 1013 | "### network layers ###\n", 1014 | "layer[0]: ConvolutionLayer\n", 1015 | " init_random = gaussian\n", 1016 | " padding = 0\n", 1017 | " stride = 2\n", 1018 | " num_channels = 10\n", 1019 | " num_groups = 1\n", 1020 | " kernel_size = 3\n", 1021 | "layer[1]: MaxPoolingLayer\n", 1022 | " padding = 0\n", 1023 | " stride = 2\n", 1024 | " kernel_size = 3\n", 1025 | "layer[2]: FlattenLayer\n", 1026 | "layer[3]: FullConnectionLayer\n", 1027 | " init_sigma = 0.01\n", 1028 | " init_random = gaussian\n", 1029 | " init_bias = 0\n", 1030 | " num_hidden_units = 100\n", 1031 | "layer[4]: RectifiedLinearLayer\n", 1032 | "layer[5]: DropoutLayer\n", 1033 | " threshold = 0.5\n", 1034 | "layer[6]: FullConnectionLayer\n", 1035 | " init_sigma = 0.01\n", 1036 | " init_random = gaussian\n", 1037 | " init_bias = 0\n", 1038 | " num_hidden_units = 10\n", 1039 | "layer[7]: SoftmaxLayer\n", 1040 | "### end network layers ###\n", 1041 | "\n", 1042 | "### network parameters ###\n", 1043 | "learning_rate = 0.001\n", 1044 | "momentum = 0.9\n", 1045 | "### end network parameters ###\n" 1046 | ] 1047 | }, 1048 | { 1049 | "html": [ 1050 | "
PROGRESS: Computing mean image...
" 1051 | ], 1052 | "metadata": {}, 1053 | "output_type": "display_data", 1054 | "text": [ 1055 | "PROGRESS: Computing mean image..." 1056 | ] 1057 | }, 1058 | { 1059 | "html": [ 1060 | "
PROGRESS: Done computing mean image.
" 1061 | ], 1062 | "metadata": {}, 1063 | "output_type": "display_data", 1064 | "text": [ 1065 | "PROGRESS: Done computing mean image." 1066 | ] 1067 | }, 1068 | { 1069 | "html": [ 1070 | "
PROGRESS: Creating neuralnet using device = cpu
" 1071 | ], 1072 | "metadata": {}, 1073 | "output_type": "display_data", 1074 | "text": [ 1075 | "PROGRESS: Creating neuralnet using device = cpu" 1076 | ] 1077 | }, 1078 | { 1079 | "html": [ 1080 | "
PROGRESS: +-----------+----------+--------------+-------------------+---------------------+-----------------+
" 1081 | ], 1082 | "metadata": {}, 1083 | "output_type": "display_data", 1084 | "text": [ 1085 | "PROGRESS: +-----------+----------+--------------+-------------------+---------------------+-----------------+" 1086 | ] 1087 | }, 1088 | { 1089 | "html": [ 1090 | "
PROGRESS: | Iteration | Examples | Elapsed Time | Training-accuracy | Validation-accuracy | Examples/second |
" 1091 | ], 1092 | "metadata": {}, 1093 | "output_type": "display_data", 1094 | "text": [ 1095 | "PROGRESS: | Iteration | Examples | Elapsed Time | Training-accuracy | Validation-accuracy | Examples/second |" 1096 | ] 1097 | }, 1098 | { 1099 | "html": [ 1100 | "
PROGRESS: +-----------+----------+--------------+-------------------+---------------------+-----------------+
" 1101 | ], 1102 | "metadata": {}, 1103 | "output_type": "display_data", 1104 | "text": [ 1105 | "PROGRESS: +-----------+----------+--------------+-------------------+---------------------+-----------------+" 1106 | ] 1107 | }, 1108 | { 1109 | "html": [ 1110 | "
PROGRESS: | 1         | 28700    | 10.007397    | 0.696516          |                     | 2867.880127     |
" 1111 | ], 1112 | "metadata": {}, 1113 | "output_type": "display_data", 1114 | "text": [ 1115 | "PROGRESS: | 1 | 28700 | 10.007397 | 0.696516 | | 2867.880127 |" 1116 | ] 1117 | }, 1118 | { 1119 | "html": [ 1120 | "
PROGRESS: | 1         | 57000    | 19.907098    | 0.803035          | 0.959677            | 2858.670654     |
" 1121 | ], 1122 | "metadata": {}, 1123 | "output_type": "display_data", 1124 | "text": [ 1125 | "PROGRESS: | 1 | 57000 | 19.907098 | 0.803035 | 0.959677 | 2858.670654 |" 1126 | ] 1127 | }, 1128 | { 1129 | "html": [ 1130 | "
PROGRESS: | 2         | 30000    | 29.910762    | 0.928900          |                     | 2998.957764     |
" 1131 | ], 1132 | "metadata": {}, 1133 | "output_type": "display_data", 1134 | "text": [ 1135 | "PROGRESS: | 2 | 30000 | 29.910762 | 0.928900 | | 2998.957764 |" 1136 | ] 1137 | }, 1138 | { 1139 | "html": [ 1140 | "
PROGRESS: | 2         | 57000    | 39.190411    | 0.934105          | 0.966000            | 2909.590820     |
" 1141 | ], 1142 | "metadata": {}, 1143 | "output_type": "display_data", 1144 | "text": [ 1145 | "PROGRESS: | 2 | 57000 | 39.190411 | 0.934105 | 0.966000 | 2909.590820 |" 1146 | ] 1147 | }, 1148 | { 1149 | "html": [ 1150 | "
PROGRESS: | 3         | 30100    | 49.197977    | 0.944219          |                     | 3007.786621     |
" 1151 | ], 1152 | "metadata": {}, 1153 | "output_type": "display_data", 1154 | "text": [ 1155 | "PROGRESS: | 3 | 30100 | 49.197977 | 0.944219 | | 3007.786621 |" 1156 | ] 1157 | }, 1158 | { 1159 | "html": [ 1160 | "
PROGRESS: | 3         | 57000    | 58.482153    | 0.947526          | 0.972667            | 2897.399902     |
" 1161 | ], 1162 | "metadata": {}, 1163 | "output_type": "display_data", 1164 | "text": [ 1165 | "PROGRESS: | 3 | 57000 | 58.482153 | 0.947526 | 0.972667 | 2897.399902 |" 1166 | ] 1167 | }, 1168 | { 1169 | "html": [ 1170 | "
PROGRESS: +-----------+----------+--------------+-------------------+---------------------+-----------------+
" 1171 | ], 1172 | "metadata": {}, 1173 | "output_type": "display_data", 1174 | "text": [ 1175 | "PROGRESS: +-----------+----------+--------------+-------------------+---------------------+-----------------+" 1176 | ] 1177 | }, 1178 | { 1179 | "output_type": "stream", 1180 | "stream": "stdout", 1181 | "text": [ 1182 | "\n", 1183 | "PROGRESS: Creating a validation set from 5 percent of training data. This may take a while.\n", 1184 | " You can set ``validation_set=None`` to disable validation tracking.\n", 1185 | "\n", 1186 | "Accuracy: " 1187 | ] 1188 | }, 1189 | { 1190 | "output_type": "stream", 1191 | "stream": "stdout", 1192 | "text": [ 1193 | " 0.975000023842\n", 1194 | "Confusion Matrix Correct Predictions\n", 1195 | "+--------------+--------------+\n", 1196 | "| target_label | Sum of count |\n", 1197 | "+--------------+--------------+\n", 1198 | "| 0 | 973 |\n", 1199 | "| 1 | 1123 |\n", 1200 | "| 2 | 1004 |\n", 1201 | "| 3 | 985 |\n", 1202 | "| 4 | 964 |\n", 1203 | "| 5 | 878 |\n", 1204 | "| 6 | 940 |\n", 1205 | "| 7 | 994 |\n", 1206 | "| 8 | 938 |\n", 1207 | "| 9 | 951 |\n", 1208 | "+--------------+--------------+\n", 1209 | "[10 rows x 2 columns]\n" 1210 | ] 1211 | }, 1212 | { 1213 | "output_type": "stream", 1214 | "stream": "stdout", 1215 | "text": [ 1216 | "\n", 1217 | "Confusion Matrix Prediction Mistakes\n", 1218 | "+--------------+--------------+\n", 1219 | "| target_label | Sum of count |\n", 1220 | "+--------------+--------------+\n", 1221 | "| 0 | 7 |\n", 1222 | "| 1 | 12 |\n", 1223 | "| 2 | 28 |\n", 1224 | "| 3 | 25 |\n", 1225 | "| 4 | 18 |\n", 1226 | "| 5 | 14 |\n", 1227 | "| 6 | 18 |\n", 1228 | "| 7 | 34 |\n", 1229 | "| 8 | 36 |\n", 1230 | "| 9 | 58 |\n", 1231 | "+--------------+--------------+\n", 1232 | "[10 rows x 2 columns]\n" 1233 | ] 1234 | }, 1235 | { 1236 | "output_type": "stream", 1237 | "stream": "stdout", 1238 | "text": [ 1239 | "\n", 1240 | "Evaluation completed\n" 1241 | ] 1242 | } 1243 | ], 1244 | "prompt_number": 6 1245 | }, 1246 | { 1247 | "cell_type": "code", 1248 | "collapsed": false, 1249 | "input": [], 1250 | "language": "python", 1251 | "metadata": {}, 1252 | "outputs": [] 1253 | } 1254 | ], 1255 | "metadata": {} 1256 | } 1257 | ] 1258 | } -------------------------------------------------------------------------------- /Check_Theano.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:b6946f4bce0b49dd86f4e23b917e92be3a7f47abbd83a0bcd9eab8fe729033cb" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "## Theano Example" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### Manual Run" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "collapsed": false, 28 | "input": [ 29 | "import cPickle\n", 30 | "import gzip\n", 31 | "import os\n", 32 | "import sys\n", 33 | "import time\n", 34 | "\n", 35 | "import numpy\n", 36 | "\n", 37 | "import theano\n", 38 | "import theano.tensor as T" 39 | ], 40 | "language": "python", 41 | "metadata": {}, 42 | "outputs": [], 43 | "prompt_number": 3 44 | }, 45 | { 46 | "cell_type": "code", 47 | "collapsed": false, 48 | "input": [ 49 | "class LogisticRegression(object):\n", 50 | " def __init__(self, input, n_in, n_out, borrow=True):\n", 51 | "\n", 52 | " self.W = theano.shared(value=numpy.zeros((n_in, n_out), dtype=theano.config.floatX), name='W', borrow=borrow)\n", 53 | " self.b = theano.shared(value=numpy.zeros((n_out,), dtype=theano.config.floatX), name='b', borrow=borrow)\n", 54 | " \n", 55 | " self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)\n", 56 | " self.y_pred = T.argmax(self.p_y_given_x, axis=1)\n", 57 | " self.params = [self.W, self.b]\n", 58 | "\n", 59 | " def negative_log_likelihood(self, y): \n", 60 | " return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])\n", 61 | "\n", 62 | " def errors(self, y):\n", 63 | " # check if y has same dimension of y_pred\n", 64 | " if y.ndim != self.y_pred.ndim:\n", 65 | " raise TypeError('y should have the same shape as self.y_pred', ('y', y.type, 'y_pred', self.y_pred.type))\n", 66 | " \n", 67 | " # check if y is of the correct datatype\n", 68 | " if y.dtype.startswith('int'):\n", 69 | " # the T.neq operator returns a vector of 0s and 1s, where 1\n", 70 | " # represents a mistake in prediction\n", 71 | " return T.mean(T.neq(self.y_pred, y))\n", 72 | " else:\n", 73 | " raise NotImplementedError()" 74 | ], 75 | "language": "python", 76 | "metadata": {}, 77 | "outputs": [], 78 | "prompt_number": 4 79 | }, 80 | { 81 | "cell_type": "code", 82 | "collapsed": false, 83 | "input": [ 84 | "def shared_dataset(data_xy, borrow=True):\n", 85 | " data_x, data_y = data_xy\n", 86 | " shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow)\n", 87 | " shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow)\n", 88 | " return shared_x, T.cast(shared_y, 'int32')" 89 | ], 90 | "language": "python", 91 | "metadata": {}, 92 | "outputs": [], 93 | "prompt_number": 5 94 | }, 95 | { 96 | "cell_type": "code", 97 | "collapsed": false, 98 | "input": [ 99 | "__docformat__ = 'restructedtext en'" 100 | ], 101 | "language": "python", 102 | "metadata": {}, 103 | "outputs": [], 104 | "prompt_number": 41 105 | }, 106 | { 107 | "cell_type": "code", 108 | "collapsed": false, 109 | "input": [ 110 | "os.path.abspath('__file__')" 111 | ], 112 | "language": "python", 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "metadata": {}, 117 | "output_type": "pyout", 118 | "prompt_number": 46, 119 | "text": [ 120 | "'/Users/mwarrick/Documents/Neural_Net_Newbies/__file__'" 121 | ] 122 | } 123 | ], 124 | "prompt_number": 46 125 | }, 126 | { 127 | "cell_type": "code", 128 | "collapsed": false, 129 | "input": [ 130 | "def load_data(dataset):\n", 131 | " data_dir, data_file = os.path.split(dataset)\n", 132 | " if data_dir == \"\" and not os.path.isfile(dataset):\n", 133 | " # Check if dataset is in the data directory.\n", 134 | " new_path = os.path.join(os.path.split('__file__')[0], \"..\", \"data\", dataset)\n", 135 | " if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':\n", 136 | " dataset = new_path\n", 137 | "\n", 138 | " if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':\n", 139 | " import urllib\n", 140 | " origin = ('http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz')\n", 141 | " print 'Downloading data from %s' % origin\n", 142 | " urllib.urlretrieve(origin, dataset)\n", 143 | " \n", 144 | " print '... loading data'\n", 145 | "\n", 146 | " # Load the dataset\n", 147 | " f = gzip.open(dataset, 'rb')\n", 148 | " train_set, valid_set, test_set = cPickle.load(f)\n", 149 | " f.close()\n", 150 | " \n", 151 | " test_set_x, test_set_y = shared_dataset(test_set)\n", 152 | " valid_set_x, valid_set_y = shared_dataset(valid_set)\n", 153 | " train_set_x, train_set_y = shared_dataset(train_set)\n", 154 | "\n", 155 | " data_sets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),\n", 156 | " (test_set_x, test_set_y)]\n", 157 | " return data_sets" 158 | ], 159 | "language": "python", 160 | "metadata": {}, 161 | "outputs": [], 162 | "prompt_number": 47 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "*SGD Opitmization Function code - Need to split out and put some calls in main*" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "collapsed": false, 174 | "input": [ 175 | "learning_rate=0.13\n", 176 | "n_epochs=100\n", 177 | "dataset='mnist.pkl.gz'\n", 178 | "batch_size=600" 179 | ], 180 | "language": "python", 181 | "metadata": {}, 182 | "outputs": [], 183 | "prompt_number": 49 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "*Build Model Structure*" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "collapsed": false, 195 | "input": [ 196 | "datasets = load_data(\"data/mnist.pkl.gz\")" 197 | ], 198 | "language": "python", 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "output_type": "stream", 203 | "stream": "stdout", 204 | "text": [ 205 | "... loading data\n" 206 | ] 207 | } 208 | ], 209 | "prompt_number": 8 210 | }, 211 | { 212 | "cell_type": "code", 213 | "collapsed": false, 214 | "input": [ 215 | "datasets[0]" 216 | ], 217 | "language": "python", 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "metadata": {}, 222 | "output_type": "pyout", 223 | "prompt_number": 12, 224 | "text": [ 225 | "(, Elemwise{Cast{int32}}.0)" 226 | ] 227 | } 228 | ], 229 | "prompt_number": 12 230 | }, 231 | { 232 | "cell_type": "code", 233 | "collapsed": false, 234 | "input": [ 235 | "train_set_x, train_set_y = datasets[0]\n", 236 | "valid_set_x, valid_set_y = datasets[1]\n", 237 | "test_set_x, test_set_y = datasets[2]" 238 | ], 239 | "language": "python", 240 | "metadata": {}, 241 | "outputs": [], 242 | "prompt_number": 11 243 | }, 244 | { 245 | "cell_type": "code", 246 | "collapsed": false, 247 | "input": [ 248 | "train_set_x" 249 | ], 250 | "language": "python", 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "metadata": {}, 255 | "output_type": "pyout", 256 | "prompt_number": 13, 257 | "text": [ 258 | "" 259 | ] 260 | } 261 | ], 262 | "prompt_number": 13 263 | }, 264 | { 265 | "cell_type": "code", 266 | "collapsed": false, 267 | "input": [ 268 | "n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size\n", 269 | "n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size\n", 270 | "n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size" 271 | ], 272 | "language": "python", 273 | "metadata": {}, 274 | "outputs": [], 275 | "prompt_number": 14 276 | }, 277 | { 278 | "cell_type": "code", 279 | "collapsed": false, 280 | "input": [ 281 | "n_train_batches" 282 | ], 283 | "language": "python", 284 | "metadata": {}, 285 | "outputs": [ 286 | { 287 | "metadata": {}, 288 | "output_type": "pyout", 289 | "prompt_number": 15, 290 | "text": [ 291 | "83" 292 | ] 293 | } 294 | ], 295 | "prompt_number": 15 296 | }, 297 | { 298 | "cell_type": "code", 299 | "collapsed": false, 300 | "input": [ 301 | "n_valid_batches" 302 | ], 303 | "language": "python", 304 | "metadata": {}, 305 | "outputs": [ 306 | { 307 | "metadata": {}, 308 | "output_type": "pyout", 309 | "prompt_number": 16, 310 | "text": [ 311 | "16" 312 | ] 313 | } 314 | ], 315 | "prompt_number": 16 316 | }, 317 | { 318 | "cell_type": "code", 319 | "collapsed": false, 320 | "input": [ 321 | "index = T.lscalar()" 322 | ], 323 | "language": "python", 324 | "metadata": {}, 325 | "outputs": [], 326 | "prompt_number": 17 327 | }, 328 | { 329 | "cell_type": "code", 330 | "collapsed": false, 331 | "input": [ 332 | "x = T.matrix('x')\n", 333 | "y = T.ivector('y')" 334 | ], 335 | "language": "python", 336 | "metadata": {}, 337 | "outputs": [], 338 | "prompt_number": 18 339 | }, 340 | { 341 | "cell_type": "code", 342 | "collapsed": false, 343 | "input": [ 344 | "classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)" 345 | ], 346 | "language": "python", 347 | "metadata": {}, 348 | "outputs": [], 349 | "prompt_number": 19 350 | }, 351 | { 352 | "cell_type": "code", 353 | "collapsed": false, 354 | "input": [ 355 | "cost = classifier.negative_log_likelihood(y)" 356 | ], 357 | "language": "python", 358 | "metadata": {}, 359 | "outputs": [], 360 | "prompt_number": 31 361 | }, 362 | { 363 | "cell_type": "code", 364 | "collapsed": false, 365 | "input": [ 366 | "train_model = theano.function(\n", 367 | " inputs=[index],\n", 368 | " outputs=cost,\n", 369 | " updates=updates,\n", 370 | " givens={\n", 371 | " x: train_set_x[index * batch_size: (index + 1) * batch_size],\n", 372 | " y: train_set_y[index * batch_size: (index + 1) * batch_size]\n", 373 | " }\n", 374 | ")" 375 | ], 376 | "language": "python", 377 | "metadata": {}, 378 | "outputs": [], 379 | "prompt_number": 34 380 | }, 381 | { 382 | "cell_type": "code", 383 | "collapsed": false, 384 | "input": [ 385 | "validate_model = theano.function(inputs=[index], outputs=classifier.errors(y),\n", 386 | " givens={\n", 387 | " x: valid_set_x[index * batch_size: (index + 1) * batch_size],\n", 388 | " y: valid_set_y[index * batch_size: (index + 1) * batch_size]\n", 389 | " }\n", 390 | ")\n" 391 | ], 392 | "language": "python", 393 | "metadata": {}, 394 | "outputs": [], 395 | "prompt_number": 29 396 | }, 397 | { 398 | "cell_type": "code", 399 | "collapsed": false, 400 | "input": [ 401 | "test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={\n", 402 | " x: test_set_x[index * batch_size: (index + 1) * batch_size],\n", 403 | " y: test_set_y[index * batch_size: (index + 1) * batch_size]\n", 404 | " }\n", 405 | ")\n" 406 | ], 407 | "language": "python", 408 | "metadata": {}, 409 | "outputs": [], 410 | "prompt_number": 26 411 | }, 412 | { 413 | "cell_type": "code", 414 | "collapsed": false, 415 | "input": [ 416 | "test_model" 417 | ], 418 | "language": "python", 419 | "metadata": {}, 420 | "outputs": [ 421 | { 422 | "metadata": {}, 423 | "output_type": "pyout", 424 | "prompt_number": 27, 425 | "text": [ 426 | "" 427 | ] 428 | } 429 | ], 430 | "prompt_number": 27 431 | }, 432 | { 433 | "cell_type": "code", 434 | "collapsed": false, 435 | "input": [ 436 | "g_W = T.grad(cost=cost, wrt=classifier.W)\n", 437 | "g_b = T.grad(cost=cost, wrt=classifier.b)" 438 | ], 439 | "language": "python", 440 | "metadata": {}, 441 | "outputs": [], 442 | "prompt_number": 32 443 | }, 444 | { 445 | "cell_type": "code", 446 | "collapsed": false, 447 | "input": [ 448 | "updates = [(classifier.W, classifier.W - learning_rate * g_W),\n", 449 | " (classifier.b, classifier.b - learning_rate * g_b)]\n" 450 | ], 451 | "language": "python", 452 | "metadata": {}, 453 | "outputs": [], 454 | "prompt_number": 33 455 | }, 456 | { 457 | "cell_type": "markdown", 458 | "metadata": {}, 459 | "source": [ 460 | "*Train Model*" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "collapsed": false, 466 | "input": [ 467 | "patience = 5000 # look as this many examples regardless\n", 468 | "patience_increase = 2 # wait this much longer when a new best is\n", 469 | " # found\n", 470 | "improvement_threshold = 0.995 # a relative improvement of this much is\n", 471 | " # considered significant\n", 472 | "validation_frequency = min(n_train_batches, patience / 2)\n", 473 | " # go through this many\n", 474 | " # minibatche before checking the network\n", 475 | " # on the validation set; in this case we\n", 476 | " # check every epoch\n", 477 | "\n", 478 | "best_validation_loss = numpy.inf\n", 479 | "test_score = 0.\n", 480 | "start_time = time.clock()\n", 481 | "\n", 482 | "done_looping = False\n", 483 | "epoch = 0" 484 | ], 485 | "language": "python", 486 | "metadata": {}, 487 | "outputs": [], 488 | "prompt_number": 37 489 | }, 490 | { 491 | "cell_type": "code", 492 | "collapsed": false, 493 | "input": [ 494 | "while (epoch < n_epochs) and (not done_looping):\n", 495 | " epoch = epoch + 1\n", 496 | " for minibatch_index in xrange(n_train_batches):\n", 497 | "\n", 498 | " minibatch_avg_cost = train_model(minibatch_index)\n", 499 | " # iteration number\n", 500 | " iter = (epoch - 1) * n_train_batches + minibatch_index\n", 501 | "\n", 502 | " if (iter + 1) % validation_frequency == 0:\n", 503 | " # compute zero-one loss on validation set\n", 504 | " validation_losses = [validate_model(i)\n", 505 | " for i in xrange(n_valid_batches)]\n", 506 | " this_validation_loss = numpy.mean(validation_losses)\n", 507 | "\n", 508 | " print(\n", 509 | " 'epoch %i, minibatch %i/%i, validation error %f %%' %\n", 510 | " (\n", 511 | " epoch,\n", 512 | " minibatch_index + 1,\n", 513 | " n_train_batches,\n", 514 | " this_validation_loss * 100.\n", 515 | " )\n", 516 | " )\n", 517 | "\n", 518 | " # if we got the best validation score until now\n", 519 | " if this_validation_loss < best_validation_loss:\n", 520 | " #improve patience if loss improvement is good enough\n", 521 | " if this_validation_loss < best_validation_loss * \\\n", 522 | " improvement_threshold:\n", 523 | " patience = max(patience, iter * patience_increase)\n", 524 | "\n", 525 | " best_validation_loss = this_validation_loss\n", 526 | " # test it on the test set\n", 527 | "\n", 528 | " test_losses = [test_model(i)\n", 529 | " for i in xrange(n_test_batches)]\n", 530 | " test_score = numpy.mean(test_losses)\n", 531 | "\n", 532 | " print(\n", 533 | " (\n", 534 | " ' epoch %i, minibatch %i/%i, test error of'\n", 535 | " ' best model %f %%'\n", 536 | " ) %\n", 537 | " (\n", 538 | " epoch,\n", 539 | " minibatch_index + 1,\n", 540 | " n_train_batches,\n", 541 | " test_score * 100.\n", 542 | " )\n", 543 | " )\n", 544 | "\n", 545 | " if patience <= iter:\n", 546 | " done_looping = True\n", 547 | " break\n", 548 | "\n", 549 | "end_time = time.clock()\n", 550 | "print(\n", 551 | " (\n", 552 | " 'Optimization complete with best validation score of %f %%,'\n", 553 | " 'with test performance %f %%'\n", 554 | " )\n", 555 | " % (best_validation_loss * 100., test_score * 100.)\n", 556 | ")\n", 557 | "print 'The code run for %d epochs, with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time))\n", 558 | "print >> sys.stderr, ('The code for file ' + os.path.split('__file__')[1] + ' ran for %.1fs' % ((end_time - start_time)))\n" 559 | ], 560 | "language": "python", 561 | "metadata": {}, 562 | "outputs": [ 563 | { 564 | "output_type": "stream", 565 | "stream": "stdout", 566 | "text": [ 567 | "epoch 4, minibatch 83/83, validation error 9.875000 %\n", 568 | " epoch 4, minibatch 83/83, test error of best model 9.833333 %\n", 569 | "epoch 5, minibatch 83/83, validation error 9.562500 %" 570 | ] 571 | }, 572 | { 573 | "output_type": "stream", 574 | "stream": "stdout", 575 | "text": [ 576 | "\n", 577 | " epoch 5, minibatch 83/83, test error of best model 9.479167 %\n", 578 | "epoch 6, minibatch 83/83, validation error 9.322917 %" 579 | ] 580 | }, 581 | { 582 | "output_type": "stream", 583 | "stream": "stdout", 584 | "text": [ 585 | "\n", 586 | " epoch 6, minibatch 83/83, test error of best model 9.291667 %\n", 587 | "epoch 7, minibatch 83/83, validation error 9.187500 %" 588 | ] 589 | }, 590 | { 591 | "output_type": "stream", 592 | "stream": "stdout", 593 | "text": [ 594 | "\n", 595 | " epoch 7, minibatch 83/83, test error of best model 9.000000 %\n", 596 | "epoch 8, minibatch 83/83, validation error 8.989583 %" 597 | ] 598 | }, 599 | { 600 | "output_type": "stream", 601 | "stream": "stdout", 602 | "text": [ 603 | "\n", 604 | " epoch 8, minibatch 83/83, test error of best model 8.958333 %\n", 605 | "epoch 9, minibatch 83/83, validation error 8.937500 %" 606 | ] 607 | }, 608 | { 609 | "output_type": "stream", 610 | "stream": "stdout", 611 | "text": [ 612 | "\n", 613 | " epoch 9, minibatch 83/83, test error of best model 8.812500 %\n", 614 | "epoch 10, minibatch 83/83, validation error 8.750000 %" 615 | ] 616 | }, 617 | { 618 | "output_type": "stream", 619 | "stream": "stdout", 620 | "text": [ 621 | "\n", 622 | " epoch 10, minibatch 83/83, test error of best model 8.666667 %" 623 | ] 624 | }, 625 | { 626 | "output_type": "stream", 627 | "stream": "stdout", 628 | "text": [ 629 | "\n", 630 | "epoch 11, minibatch 83/83, validation error 8.666667 %" 631 | ] 632 | }, 633 | { 634 | "output_type": "stream", 635 | "stream": "stdout", 636 | "text": [ 637 | "\n", 638 | " epoch 11, minibatch 83/83, test error of best model 8.520833 %" 639 | ] 640 | }, 641 | { 642 | "output_type": "stream", 643 | "stream": "stdout", 644 | "text": [ 645 | "\n", 646 | "epoch 12, minibatch 83/83, validation error 8.583333 %" 647 | ] 648 | }, 649 | { 650 | "output_type": "stream", 651 | "stream": "stdout", 652 | "text": [ 653 | "\n", 654 | " epoch 12, minibatch 83/83, test error of best model 8.416667 %\n", 655 | "epoch 13, minibatch 83/83, validation error 8.489583 %" 656 | ] 657 | }, 658 | { 659 | "output_type": "stream", 660 | "stream": "stdout", 661 | "text": [ 662 | "\n", 663 | " epoch 13, minibatch 83/83, test error of best model 8.291667 %\n", 664 | "epoch 14, minibatch 83/83, validation error 8.427083 %" 665 | ] 666 | }, 667 | { 668 | "output_type": "stream", 669 | "stream": "stdout", 670 | "text": [ 671 | "\n", 672 | " epoch 14, minibatch 83/83, test error of best model 8.281250 %\n", 673 | "epoch 15, minibatch 83/83, validation error 8.354167 %" 674 | ] 675 | }, 676 | { 677 | "output_type": "stream", 678 | "stream": "stdout", 679 | "text": [ 680 | "\n", 681 | " epoch 15, minibatch 83/83, test error of best model 8.270833 %\n", 682 | "epoch 16, minibatch 83/83, validation error 8.302083 %" 683 | ] 684 | }, 685 | { 686 | "output_type": "stream", 687 | "stream": "stdout", 688 | "text": [ 689 | "\n", 690 | " epoch 16, minibatch 83/83, test error of best model 8.239583 %\n", 691 | "epoch 17, minibatch 83/83, validation error 8.250000 %" 692 | ] 693 | }, 694 | { 695 | "output_type": "stream", 696 | "stream": "stdout", 697 | "text": [ 698 | "\n", 699 | " epoch 17, minibatch 83/83, test error of best model 8.177083 %\n", 700 | "epoch 18, minibatch 83/83, validation error 8.229167 %" 701 | ] 702 | }, 703 | { 704 | "output_type": "stream", 705 | "stream": "stdout", 706 | "text": [ 707 | "\n", 708 | " epoch 18, minibatch 83/83, test error of best model 8.062500 %\n", 709 | "epoch 19, minibatch 83/83, validation error 8.260417 %" 710 | ] 711 | }, 712 | { 713 | "output_type": "stream", 714 | "stream": "stdout", 715 | "text": [ 716 | "\n", 717 | "epoch 20, minibatch 83/83, validation error 8.260417 %" 718 | ] 719 | }, 720 | { 721 | "output_type": "stream", 722 | "stream": "stdout", 723 | "text": [ 724 | "\n", 725 | "epoch 21, minibatch 83/83, validation error 8.208333 %" 726 | ] 727 | }, 728 | { 729 | "output_type": "stream", 730 | "stream": "stdout", 731 | "text": [ 732 | "\n", 733 | " epoch 21, minibatch 83/83, test error of best model 7.947917 %\n", 734 | "epoch 22, minibatch 83/83, validation error 8.187500 %" 735 | ] 736 | }, 737 | { 738 | "output_type": "stream", 739 | "stream": "stdout", 740 | "text": [ 741 | "\n", 742 | " epoch 22, minibatch 83/83, test error of best model 7.927083 %\n", 743 | "epoch 23, minibatch 83/83, validation error 8.156250 %" 744 | ] 745 | }, 746 | { 747 | "output_type": "stream", 748 | "stream": "stdout", 749 | "text": [ 750 | "\n", 751 | " epoch 23, minibatch 83/83, test error of best model 7.958333 %\n", 752 | "epoch 24, minibatch 83/83, validation error 8.114583 %" 753 | ] 754 | }, 755 | { 756 | "output_type": "stream", 757 | "stream": "stdout", 758 | "text": [ 759 | "\n", 760 | " epoch 24, minibatch 83/83, test error of best model 7.947917 %\n", 761 | "epoch 25, minibatch 83/83, validation error 8.093750 %" 762 | ] 763 | }, 764 | { 765 | "output_type": "stream", 766 | "stream": "stdout", 767 | "text": [ 768 | "\n", 769 | " epoch 25, minibatch 83/83, test error of best model 7.947917 %\n", 770 | "epoch 26, minibatch 83/83, validation error 8.104167 %" 771 | ] 772 | }, 773 | { 774 | "output_type": "stream", 775 | "stream": "stdout", 776 | "text": [ 777 | "\n", 778 | "epoch 27, minibatch 83/83, validation error 8.104167 %" 779 | ] 780 | }, 781 | { 782 | "output_type": "stream", 783 | "stream": "stdout", 784 | "text": [ 785 | "\n", 786 | "epoch 28, minibatch 83/83, validation error 8.052083 %" 787 | ] 788 | }, 789 | { 790 | "output_type": "stream", 791 | "stream": "stdout", 792 | "text": [ 793 | "\n", 794 | " epoch 28, minibatch 83/83, test error of best model 7.843750 %\n", 795 | "epoch 29, minibatch 83/83, validation error 8.052083 %" 796 | ] 797 | }, 798 | { 799 | "output_type": "stream", 800 | "stream": "stdout", 801 | "text": [ 802 | "\n", 803 | "epoch 30, minibatch 83/83, validation error 8.031250 %" 804 | ] 805 | }, 806 | { 807 | "output_type": "stream", 808 | "stream": "stdout", 809 | "text": [ 810 | "\n", 811 | " epoch 30, minibatch 83/83, test error of best model 7.843750 %\n", 812 | "epoch 31, minibatch 83/83, validation error 8.010417 %" 813 | ] 814 | }, 815 | { 816 | "output_type": "stream", 817 | "stream": "stdout", 818 | "text": [ 819 | "\n", 820 | " epoch 31, minibatch 83/83, test error of best model 7.833333 %\n", 821 | "epoch 32, minibatch 83/83, validation error 7.979167 %" 822 | ] 823 | }, 824 | { 825 | "output_type": "stream", 826 | "stream": "stdout", 827 | "text": [ 828 | "\n", 829 | " epoch 32, minibatch 83/83, test error of best model 7.812500 %\n", 830 | "epoch 33, minibatch 83/83, validation error 7.947917 %" 831 | ] 832 | }, 833 | { 834 | "output_type": "stream", 835 | "stream": "stdout", 836 | "text": [ 837 | "\n", 838 | " epoch 33, minibatch 83/83, test error of best model 7.739583 %\n", 839 | "epoch 34, minibatch 83/83, validation error 7.875000 %" 840 | ] 841 | }, 842 | { 843 | "output_type": "stream", 844 | "stream": "stdout", 845 | "text": [ 846 | "\n", 847 | " epoch 34, minibatch 83/83, test error of best model 7.729167 %\n", 848 | "epoch 35, minibatch 83/83, validation error 7.885417 %" 849 | ] 850 | }, 851 | { 852 | "output_type": "stream", 853 | "stream": "stdout", 854 | "text": [ 855 | "\n", 856 | "epoch 36, minibatch 83/83, validation error 7.843750 %" 857 | ] 858 | }, 859 | { 860 | "output_type": "stream", 861 | "stream": "stdout", 862 | "text": [ 863 | "\n", 864 | " epoch 36, minibatch 83/83, test error of best model 7.697917 %\n", 865 | "epoch 37, minibatch 83/83, validation error 7.802083 %" 866 | ] 867 | }, 868 | { 869 | "output_type": "stream", 870 | "stream": "stdout", 871 | "text": [ 872 | "\n", 873 | " epoch 37, minibatch 83/83, test error of best model 7.635417 %\n", 874 | "epoch 38, minibatch 83/83, validation error 7.812500 %" 875 | ] 876 | }, 877 | { 878 | "output_type": "stream", 879 | "stream": "stdout", 880 | "text": [ 881 | "\n", 882 | "epoch 39, minibatch 83/83, validation error 7.812500 %" 883 | ] 884 | }, 885 | { 886 | "output_type": "stream", 887 | "stream": "stdout", 888 | "text": [ 889 | "\n", 890 | "epoch 40, minibatch 83/83, validation error 7.822917 %" 891 | ] 892 | }, 893 | { 894 | "output_type": "stream", 895 | "stream": "stdout", 896 | "text": [ 897 | "\n", 898 | "epoch 41, minibatch 83/83, validation error 7.791667 %" 899 | ] 900 | }, 901 | { 902 | "output_type": "stream", 903 | "stream": "stdout", 904 | "text": [ 905 | "\n", 906 | " epoch 41, minibatch 83/83, test error of best model 7.625000 %\n", 907 | "epoch 42, minibatch 83/83, validation error 7.770833 %" 908 | ] 909 | }, 910 | { 911 | "output_type": "stream", 912 | "stream": "stdout", 913 | "text": [ 914 | "\n", 915 | " epoch 42, minibatch 83/83, test error of best model 7.614583 %\n", 916 | "epoch 43, minibatch 83/83, validation error 7.750000 %" 917 | ] 918 | }, 919 | { 920 | "output_type": "stream", 921 | "stream": "stdout", 922 | "text": [ 923 | "\n", 924 | " epoch 43, minibatch 83/83, test error of best model 7.593750 %\n", 925 | "epoch 44, minibatch 83/83, validation error 7.739583 %" 926 | ] 927 | }, 928 | { 929 | "output_type": "stream", 930 | "stream": "stdout", 931 | "text": [ 932 | "\n", 933 | " epoch 44, minibatch 83/83, test error of best model 7.593750 %\n", 934 | "epoch 45, minibatch 83/83, validation error 7.739583 %" 935 | ] 936 | }, 937 | { 938 | "output_type": "stream", 939 | "stream": "stdout", 940 | "text": [ 941 | "\n", 942 | "epoch 46, minibatch 83/83, validation error 7.739583 %" 943 | ] 944 | }, 945 | { 946 | "output_type": "stream", 947 | "stream": "stdout", 948 | "text": [ 949 | "\n", 950 | "epoch 47, minibatch 83/83, validation error 7.739583 %" 951 | ] 952 | }, 953 | { 954 | "output_type": "stream", 955 | "stream": "stdout", 956 | "text": [ 957 | "\n", 958 | "epoch 48, minibatch 83/83, validation error 7.708333 %" 959 | ] 960 | }, 961 | { 962 | "output_type": "stream", 963 | "stream": "stdout", 964 | "text": [ 965 | "\n", 966 | " epoch 48, minibatch 83/83, test error of best model 7.583333 %\n", 967 | "epoch 49, minibatch 83/83, validation error 7.677083 %" 968 | ] 969 | }, 970 | { 971 | "output_type": "stream", 972 | "stream": "stdout", 973 | "text": [ 974 | "\n", 975 | " epoch 49, minibatch 83/83, test error of best model 7.572917 %\n", 976 | "epoch 50, minibatch 83/83, validation error 7.677083 %" 977 | ] 978 | }, 979 | { 980 | "output_type": "stream", 981 | "stream": "stdout", 982 | "text": [ 983 | "\n", 984 | "epoch 51, minibatch 83/83, validation error 7.677083 %" 985 | ] 986 | }, 987 | { 988 | "output_type": "stream", 989 | "stream": "stdout", 990 | "text": [ 991 | "\n", 992 | "epoch 52, minibatch 83/83, validation error 7.656250 %" 993 | ] 994 | }, 995 | { 996 | "output_type": "stream", 997 | "stream": "stdout", 998 | "text": [ 999 | "\n", 1000 | " epoch 52, minibatch 83/83, test error of best model 7.541667 %\n", 1001 | "epoch 53, minibatch 83/83, validation error 7.656250 %" 1002 | ] 1003 | }, 1004 | { 1005 | "output_type": "stream", 1006 | "stream": "stdout", 1007 | "text": [ 1008 | "\n", 1009 | "epoch 54, minibatch 83/83, validation error 7.635417 %" 1010 | ] 1011 | }, 1012 | { 1013 | "output_type": "stream", 1014 | "stream": "stdout", 1015 | "text": [ 1016 | "\n", 1017 | " epoch 54, minibatch 83/83, test error of best model 7.520833 %\n", 1018 | "epoch 55, minibatch 83/83, validation error 7.635417 %" 1019 | ] 1020 | }, 1021 | { 1022 | "output_type": "stream", 1023 | "stream": "stdout", 1024 | "text": [ 1025 | "\n", 1026 | "epoch 56, minibatch 83/83, validation error 7.635417 %" 1027 | ] 1028 | }, 1029 | { 1030 | "output_type": "stream", 1031 | "stream": "stdout", 1032 | "text": [ 1033 | "\n", 1034 | "epoch 57, minibatch 83/83, validation error 7.604167 %" 1035 | ] 1036 | }, 1037 | { 1038 | "output_type": "stream", 1039 | "stream": "stdout", 1040 | "text": [ 1041 | "\n", 1042 | " epoch 57, minibatch 83/83, test error of best model 7.489583 %\n", 1043 | "epoch 58, minibatch 83/83, validation error 7.583333 %" 1044 | ] 1045 | }, 1046 | { 1047 | "output_type": "stream", 1048 | "stream": "stdout", 1049 | "text": [ 1050 | "\n", 1051 | " epoch 58, minibatch 83/83, test error of best model 7.458333 %\n", 1052 | "epoch 59, minibatch 83/83, validation error 7.572917 %" 1053 | ] 1054 | }, 1055 | { 1056 | "output_type": "stream", 1057 | "stream": "stdout", 1058 | "text": [ 1059 | "\n", 1060 | " epoch 59, minibatch 83/83, test error of best model 7.468750 %\n", 1061 | "epoch 60, minibatch 83/83, validation error 7.572917 %" 1062 | ] 1063 | }, 1064 | { 1065 | "output_type": "stream", 1066 | "stream": "stdout", 1067 | "text": [ 1068 | "\n", 1069 | "epoch 61, minibatch 83/83, validation error 7.583333 %" 1070 | ] 1071 | }, 1072 | { 1073 | "output_type": "stream", 1074 | "stream": "stdout", 1075 | "text": [ 1076 | "\n", 1077 | "epoch 62, minibatch 83/83, validation error 7.572917 %" 1078 | ] 1079 | }, 1080 | { 1081 | "output_type": "stream", 1082 | "stream": "stdout", 1083 | "text": [ 1084 | "\n", 1085 | "epoch 63, minibatch 83/83, validation error 7.562500 %" 1086 | ] 1087 | }, 1088 | { 1089 | "output_type": "stream", 1090 | "stream": "stdout", 1091 | "text": [ 1092 | "\n", 1093 | " epoch 63, minibatch 83/83, test error of best model 7.510417 %\n", 1094 | "epoch 64, minibatch 83/83, validation error 7.572917 %" 1095 | ] 1096 | }, 1097 | { 1098 | "output_type": "stream", 1099 | "stream": "stdout", 1100 | "text": [ 1101 | "\n", 1102 | "epoch 65, minibatch 83/83, validation error 7.562500 %" 1103 | ] 1104 | }, 1105 | { 1106 | "output_type": "stream", 1107 | "stream": "stdout", 1108 | "text": [ 1109 | "\n", 1110 | "epoch 66, minibatch 83/83, validation error 7.552083 %" 1111 | ] 1112 | }, 1113 | { 1114 | "output_type": "stream", 1115 | "stream": "stdout", 1116 | "text": [ 1117 | "\n", 1118 | " epoch 66, minibatch 83/83, test error of best model 7.520833 %\n", 1119 | "epoch 67, minibatch 83/83, validation error 7.552083 %" 1120 | ] 1121 | }, 1122 | { 1123 | "output_type": "stream", 1124 | "stream": "stdout", 1125 | "text": [ 1126 | "\n", 1127 | "epoch 68, minibatch 83/83, validation error 7.531250 %" 1128 | ] 1129 | }, 1130 | { 1131 | "output_type": "stream", 1132 | "stream": "stdout", 1133 | "text": [ 1134 | "\n", 1135 | " epoch 68, minibatch 83/83, test error of best model 7.520833 %\n", 1136 | "epoch 69, minibatch 83/83, validation error 7.531250 %" 1137 | ] 1138 | }, 1139 | { 1140 | "output_type": "stream", 1141 | "stream": "stdout", 1142 | "text": [ 1143 | "\n", 1144 | "epoch 70, minibatch 83/83, validation error 7.510417 %" 1145 | ] 1146 | }, 1147 | { 1148 | "output_type": "stream", 1149 | "stream": "stdout", 1150 | "text": [ 1151 | "\n", 1152 | " epoch 70, minibatch 83/83, test error of best model 7.500000 %\n", 1153 | "epoch 71, minibatch 83/83, validation error 7.520833 %" 1154 | ] 1155 | }, 1156 | { 1157 | "output_type": "stream", 1158 | "stream": "stdout", 1159 | "text": [ 1160 | "\n", 1161 | "epoch 72, minibatch 83/83, validation error 7.510417 %" 1162 | ] 1163 | }, 1164 | { 1165 | "output_type": "stream", 1166 | "stream": "stdout", 1167 | "text": [ 1168 | "\n", 1169 | " epoch 72, minibatch 83/83, test error of best model 7.510417 %\n", 1170 | "epoch 73, minibatch 83/83, validation error 7.500000 %" 1171 | ] 1172 | }, 1173 | { 1174 | "output_type": "stream", 1175 | "stream": "stdout", 1176 | "text": [ 1177 | "\n", 1178 | " epoch 73, minibatch 83/83, test error of best model 7.489583 %\n", 1179 | "Optimization complete with best validation score of 7.500000 %,with test performance 7.489583 %" 1180 | ] 1181 | }, 1182 | { 1183 | "output_type": "stream", 1184 | "stream": "stdout", 1185 | "text": [ 1186 | "\n", 1187 | "The code run for 74 epochs, with 1.543878 epochs/sec\n" 1188 | ] 1189 | }, 1190 | { 1191 | "output_type": "stream", 1192 | "stream": "stderr", 1193 | "text": [ 1194 | "The code for file __file__ ran for 47.9s\n" 1195 | ] 1196 | } 1197 | ], 1198 | "prompt_number": 50 1199 | }, 1200 | { 1201 | "cell_type": "markdown", 1202 | "metadata": {}, 1203 | "source": [ 1204 | "## Script Run" 1205 | ] 1206 | }, 1207 | { 1208 | "cell_type": "code", 1209 | "collapsed": false, 1210 | "input": [ 1211 | "%load_ext autoreload" 1212 | ], 1213 | "language": "python", 1214 | "metadata": {}, 1215 | "outputs": [], 1216 | "prompt_number": 1 1217 | }, 1218 | { 1219 | "cell_type": "code", 1220 | "collapsed": false, 1221 | "input": [ 1222 | "%autoreload 2" 1223 | ], 1224 | "language": "python", 1225 | "metadata": {}, 1226 | "outputs": [], 1227 | "prompt_number": 2 1228 | }, 1229 | { 1230 | "cell_type": "code", 1231 | "collapsed": false, 1232 | "input": [ 1233 | "import lib.theano_mnist as th_mnist" 1234 | ], 1235 | "language": "python", 1236 | "metadata": {}, 1237 | "outputs": [], 1238 | "prompt_number": 3 1239 | }, 1240 | { 1241 | "cell_type": "code", 1242 | "collapsed": false, 1243 | "input": [ 1244 | "th_mnist.main()" 1245 | ], 1246 | "language": "python", 1247 | "metadata": {}, 1248 | "outputs": [ 1249 | { 1250 | "output_type": "stream", 1251 | "stream": "stdout", 1252 | "text": [ 1253 | "... load and setup data\n", 1254 | "... loading data\n", 1255 | "... building the model" 1256 | ] 1257 | }, 1258 | { 1259 | "output_type": "stream", 1260 | "stream": "stdout", 1261 | "text": [ 1262 | "\n", 1263 | "... training the model" 1264 | ] 1265 | }, 1266 | { 1267 | "output_type": "stream", 1268 | "stream": "stdout", 1269 | "text": [ 1270 | "\n", 1271 | "epoch 1, minibatch 83/83, validation error 12.458333 %" 1272 | ] 1273 | }, 1274 | { 1275 | "output_type": "stream", 1276 | "stream": "stdout", 1277 | "text": [ 1278 | "\n", 1279 | "epoch 1, minibatch 83/83, test error of best model 12.375000 %\n", 1280 | "epoch 2, minibatch 83/83, validation error 11.010417 %" 1281 | ] 1282 | }, 1283 | { 1284 | "output_type": "stream", 1285 | "stream": "stdout", 1286 | "text": [ 1287 | "\n", 1288 | "epoch 2, minibatch 83/83, test error of best model 10.958333 %\n", 1289 | "epoch 3, minibatch 83/83, validation error 10.312500 %" 1290 | ] 1291 | }, 1292 | { 1293 | "output_type": "stream", 1294 | "stream": "stdout", 1295 | "text": [ 1296 | "\n", 1297 | "epoch 3, minibatch 83/83, test error of best model 10.312500 %\n", 1298 | "epoch 4, minibatch 83/83, validation error 9.875000 %" 1299 | ] 1300 | }, 1301 | { 1302 | "output_type": "stream", 1303 | "stream": "stdout", 1304 | "text": [ 1305 | "\n", 1306 | "epoch 4, minibatch 83/83, test error of best model 9.833333 %\n", 1307 | "epoch 5, minibatch 83/83, validation error 9.562500 %" 1308 | ] 1309 | }, 1310 | { 1311 | "output_type": "stream", 1312 | "stream": "stdout", 1313 | "text": [ 1314 | "\n", 1315 | "epoch 5, minibatch 83/83, test error of best model 9.479167 %\n", 1316 | "epoch 6, minibatch 83/83, validation error 9.322917 %" 1317 | ] 1318 | }, 1319 | { 1320 | "output_type": "stream", 1321 | "stream": "stdout", 1322 | "text": [ 1323 | "\n", 1324 | "epoch 6, minibatch 83/83, test error of best model 9.291667 %\n", 1325 | "epoch 7, minibatch 83/83, validation error 9.187500 %" 1326 | ] 1327 | }, 1328 | { 1329 | "output_type": "stream", 1330 | "stream": "stdout", 1331 | "text": [ 1332 | "\n", 1333 | "epoch 7, minibatch 83/83, test error of best model 9.000000 %\n", 1334 | "epoch 8, minibatch 83/83, validation error 8.989583 %" 1335 | ] 1336 | }, 1337 | { 1338 | "output_type": "stream", 1339 | "stream": "stdout", 1340 | "text": [ 1341 | "\n", 1342 | "epoch 8, minibatch 83/83, test error of best model 8.958333 %\n", 1343 | "epoch 9, minibatch 83/83, validation error 8.937500 %" 1344 | ] 1345 | }, 1346 | { 1347 | "output_type": "stream", 1348 | "stream": "stdout", 1349 | "text": [ 1350 | "\n", 1351 | "epoch 9, minibatch 83/83, test error of best model 8.812500 %\n", 1352 | "epoch 10, minibatch 83/83, validation error 8.750000 %" 1353 | ] 1354 | }, 1355 | { 1356 | "output_type": "stream", 1357 | "stream": "stdout", 1358 | "text": [ 1359 | "\n", 1360 | "epoch 10, minibatch 83/83, test error of best model 8.666667 %\n", 1361 | "epoch 11, minibatch 83/83, validation error 8.666667 %" 1362 | ] 1363 | }, 1364 | { 1365 | "output_type": "stream", 1366 | "stream": "stdout", 1367 | "text": [ 1368 | "\n", 1369 | "epoch 11, minibatch 83/83, test error of best model 8.520833 %\n", 1370 | "epoch 12, minibatch 83/83, validation error 8.583333 %" 1371 | ] 1372 | }, 1373 | { 1374 | "output_type": "stream", 1375 | "stream": "stdout", 1376 | "text": [ 1377 | "\n", 1378 | "epoch 12, minibatch 83/83, test error of best model 8.416667 %\n", 1379 | "epoch 13, minibatch 83/83, validation error 8.489583 %" 1380 | ] 1381 | }, 1382 | { 1383 | "output_type": "stream", 1384 | "stream": "stdout", 1385 | "text": [ 1386 | "\n", 1387 | "epoch 13, minibatch 83/83, test error of best model 8.291667 %\n", 1388 | "epoch 14, minibatch 83/83, validation error 8.427083 %" 1389 | ] 1390 | }, 1391 | { 1392 | "output_type": "stream", 1393 | "stream": "stdout", 1394 | "text": [ 1395 | "\n", 1396 | "epoch 14, minibatch 83/83, test error of best model 8.281250 %\n", 1397 | "epoch 15, minibatch 83/83, validation error 8.354167 %" 1398 | ] 1399 | }, 1400 | { 1401 | "output_type": "stream", 1402 | "stream": "stdout", 1403 | "text": [ 1404 | "\n", 1405 | "epoch 15, minibatch 83/83, test error of best model 8.270833 %\n", 1406 | "epoch 16, minibatch 83/83, validation error 8.302083 %" 1407 | ] 1408 | }, 1409 | { 1410 | "output_type": "stream", 1411 | "stream": "stdout", 1412 | "text": [ 1413 | "\n", 1414 | "epoch 16, minibatch 83/83, test error of best model 8.239583 %\n", 1415 | "epoch 17, minibatch 83/83, validation error 8.250000 %" 1416 | ] 1417 | }, 1418 | { 1419 | "output_type": "stream", 1420 | "stream": "stdout", 1421 | "text": [ 1422 | "\n", 1423 | "epoch 17, minibatch 83/83, test error of best model 8.177083 %\n", 1424 | "epoch 18, minibatch 83/83, validation error 8.229167 %" 1425 | ] 1426 | }, 1427 | { 1428 | "output_type": "stream", 1429 | "stream": "stdout", 1430 | "text": [ 1431 | "\n", 1432 | "epoch 18, minibatch 83/83, test error of best model 8.062500 %\n", 1433 | "epoch 19, minibatch 83/83, validation error 8.260417 %" 1434 | ] 1435 | }, 1436 | { 1437 | "output_type": "stream", 1438 | "stream": "stdout", 1439 | "text": [ 1440 | "\n", 1441 | "epoch 20, minibatch 83/83, validation error 8.260417 %" 1442 | ] 1443 | }, 1444 | { 1445 | "output_type": "stream", 1446 | "stream": "stdout", 1447 | "text": [ 1448 | "\n", 1449 | "epoch 21, minibatch 83/83, validation error 8.208333 %" 1450 | ] 1451 | }, 1452 | { 1453 | "output_type": "stream", 1454 | "stream": "stdout", 1455 | "text": [ 1456 | "\n", 1457 | "epoch 21, minibatch 83/83, test error of best model 7.947917 %\n", 1458 | "epoch 22, minibatch 83/83, validation error 8.187500 %" 1459 | ] 1460 | }, 1461 | { 1462 | "output_type": "stream", 1463 | "stream": "stdout", 1464 | "text": [ 1465 | "\n", 1466 | "epoch 22, minibatch 83/83, test error of best model 7.927083 %\n", 1467 | "epoch 23, minibatch 83/83, validation error 8.156250 %" 1468 | ] 1469 | }, 1470 | { 1471 | "output_type": "stream", 1472 | "stream": "stdout", 1473 | "text": [ 1474 | "\n", 1475 | "epoch 23, minibatch 83/83, test error of best model 7.958333 %\n", 1476 | "epoch 24, minibatch 83/83, validation error 8.114583 %" 1477 | ] 1478 | }, 1479 | { 1480 | "output_type": "stream", 1481 | "stream": "stdout", 1482 | "text": [ 1483 | "\n", 1484 | "epoch 24, minibatch 83/83, test error of best model 7.947917 %\n", 1485 | "epoch 25, minibatch 83/83, validation error 8.093750 %" 1486 | ] 1487 | }, 1488 | { 1489 | "output_type": "stream", 1490 | "stream": "stdout", 1491 | "text": [ 1492 | "\n", 1493 | "epoch 25, minibatch 83/83, test error of best model 7.947917 %\n", 1494 | "epoch 26, minibatch 83/83, validation error 8.104167 %" 1495 | ] 1496 | }, 1497 | { 1498 | "output_type": "stream", 1499 | "stream": "stdout", 1500 | "text": [ 1501 | "\n", 1502 | "epoch 27, minibatch 83/83, validation error 8.104167 %" 1503 | ] 1504 | }, 1505 | { 1506 | "output_type": "stream", 1507 | "stream": "stdout", 1508 | "text": [ 1509 | "\n", 1510 | "epoch 28, minibatch 83/83, validation error 8.052083 %" 1511 | ] 1512 | }, 1513 | { 1514 | "output_type": "stream", 1515 | "stream": "stdout", 1516 | "text": [ 1517 | "\n", 1518 | "epoch 28, minibatch 83/83, test error of best model 7.843750 %" 1519 | ] 1520 | }, 1521 | { 1522 | "output_type": "stream", 1523 | "stream": "stdout", 1524 | "text": [ 1525 | "\n", 1526 | "epoch 29, minibatch 83/83, validation error 8.052083 %" 1527 | ] 1528 | }, 1529 | { 1530 | "output_type": "stream", 1531 | "stream": "stdout", 1532 | "text": [ 1533 | "\n", 1534 | "epoch 30, minibatch 83/83, validation error 8.031250 %" 1535 | ] 1536 | }, 1537 | { 1538 | "output_type": "stream", 1539 | "stream": "stdout", 1540 | "text": [ 1541 | "\n", 1542 | "epoch 30, minibatch 83/83, test error of best model 7.843750 %\n", 1543 | "epoch 31, minibatch 83/83, validation error 8.010417 %" 1544 | ] 1545 | }, 1546 | { 1547 | "output_type": "stream", 1548 | "stream": "stdout", 1549 | "text": [ 1550 | "\n", 1551 | "epoch 31, minibatch 83/83, test error of best model 7.833333 %\n", 1552 | "epoch 32, minibatch 83/83, validation error 7.979167 %" 1553 | ] 1554 | }, 1555 | { 1556 | "output_type": "stream", 1557 | "stream": "stdout", 1558 | "text": [ 1559 | "\n", 1560 | "epoch 32, minibatch 83/83, test error of best model 7.812500 %\n", 1561 | "epoch 33, minibatch 83/83, validation error 7.947917 %" 1562 | ] 1563 | }, 1564 | { 1565 | "output_type": "stream", 1566 | "stream": "stdout", 1567 | "text": [ 1568 | "\n", 1569 | "epoch 33, minibatch 83/83, test error of best model 7.739583 %\n", 1570 | "epoch 34, minibatch 83/83, validation error 7.875000 %" 1571 | ] 1572 | }, 1573 | { 1574 | "output_type": "stream", 1575 | "stream": "stdout", 1576 | "text": [ 1577 | "\n", 1578 | "epoch 34, minibatch 83/83, test error of best model 7.729167 %\n", 1579 | "epoch 35, minibatch 83/83, validation error 7.885417 %" 1580 | ] 1581 | }, 1582 | { 1583 | "output_type": "stream", 1584 | "stream": "stdout", 1585 | "text": [ 1586 | "\n", 1587 | "epoch 36, minibatch 83/83, validation error 7.843750 %" 1588 | ] 1589 | }, 1590 | { 1591 | "output_type": "stream", 1592 | "stream": "stdout", 1593 | "text": [ 1594 | "\n", 1595 | "epoch 36, minibatch 83/83, test error of best model 7.697917 %\n", 1596 | "epoch 37, minibatch 83/83, validation error 7.802083 %" 1597 | ] 1598 | }, 1599 | { 1600 | "output_type": "stream", 1601 | "stream": "stdout", 1602 | "text": [ 1603 | "\n", 1604 | "epoch 37, minibatch 83/83, test error of best model 7.635417 %" 1605 | ] 1606 | }, 1607 | { 1608 | "output_type": "stream", 1609 | "stream": "stdout", 1610 | "text": [ 1611 | "\n", 1612 | "epoch 38, minibatch 83/83, validation error 7.812500 %" 1613 | ] 1614 | }, 1615 | { 1616 | "output_type": "stream", 1617 | "stream": "stdout", 1618 | "text": [ 1619 | "\n", 1620 | "epoch 39, minibatch 83/83, validation error 7.812500 %" 1621 | ] 1622 | }, 1623 | { 1624 | "output_type": "stream", 1625 | "stream": "stdout", 1626 | "text": [ 1627 | "\n", 1628 | "epoch 40, minibatch 83/83, validation error 7.822917 %" 1629 | ] 1630 | }, 1631 | { 1632 | "output_type": "stream", 1633 | "stream": "stdout", 1634 | "text": [ 1635 | "\n", 1636 | "epoch 41, minibatch 83/83, validation error 7.791667 %" 1637 | ] 1638 | }, 1639 | { 1640 | "output_type": "stream", 1641 | "stream": "stdout", 1642 | "text": [ 1643 | "\n", 1644 | "epoch 41, minibatch 83/83, test error of best model 7.625000 %\n", 1645 | "epoch 42, minibatch 83/83, validation error 7.770833 %" 1646 | ] 1647 | }, 1648 | { 1649 | "output_type": "stream", 1650 | "stream": "stdout", 1651 | "text": [ 1652 | "\n", 1653 | "epoch 42, minibatch 83/83, test error of best model 7.614583 %\n", 1654 | "epoch 43, minibatch 83/83, validation error 7.750000 %" 1655 | ] 1656 | }, 1657 | { 1658 | "output_type": "stream", 1659 | "stream": "stdout", 1660 | "text": [ 1661 | "\n", 1662 | "epoch 43, minibatch 83/83, test error of best model 7.593750 %\n", 1663 | "epoch 44, minibatch 83/83, validation error 7.739583 %" 1664 | ] 1665 | }, 1666 | { 1667 | "output_type": "stream", 1668 | "stream": "stdout", 1669 | "text": [ 1670 | "\n", 1671 | "epoch 44, minibatch 83/83, test error of best model 7.593750 %\n", 1672 | "epoch 45, minibatch 83/83, validation error 7.739583 %" 1673 | ] 1674 | }, 1675 | { 1676 | "output_type": "stream", 1677 | "stream": "stdout", 1678 | "text": [ 1679 | "\n", 1680 | "epoch 46, minibatch 83/83, validation error 7.739583 %" 1681 | ] 1682 | }, 1683 | { 1684 | "output_type": "stream", 1685 | "stream": "stdout", 1686 | "text": [ 1687 | "\n", 1688 | "epoch 47, minibatch 83/83, validation error 7.739583 %" 1689 | ] 1690 | }, 1691 | { 1692 | "output_type": "stream", 1693 | "stream": "stdout", 1694 | "text": [ 1695 | "\n", 1696 | "epoch 48, minibatch 83/83, validation error 7.708333 %" 1697 | ] 1698 | }, 1699 | { 1700 | "output_type": "stream", 1701 | "stream": "stdout", 1702 | "text": [ 1703 | "\n", 1704 | "epoch 48, minibatch 83/83, test error of best model 7.583333 %\n", 1705 | "epoch 49, minibatch 83/83, validation error 7.677083 %" 1706 | ] 1707 | }, 1708 | { 1709 | "output_type": "stream", 1710 | "stream": "stdout", 1711 | "text": [ 1712 | "\n", 1713 | "epoch 49, minibatch 83/83, test error of best model 7.572917 %\n", 1714 | "epoch 50, minibatch 83/83, validation error 7.677083 %" 1715 | ] 1716 | }, 1717 | { 1718 | "output_type": "stream", 1719 | "stream": "stdout", 1720 | "text": [ 1721 | "\n", 1722 | "epoch 51, minibatch 83/83, validation error 7.677083 %" 1723 | ] 1724 | }, 1725 | { 1726 | "output_type": "stream", 1727 | "stream": "stdout", 1728 | "text": [ 1729 | "\n", 1730 | "epoch 52, minibatch 83/83, validation error 7.656250 %" 1731 | ] 1732 | }, 1733 | { 1734 | "output_type": "stream", 1735 | "stream": "stdout", 1736 | "text": [ 1737 | "\n", 1738 | "epoch 52, minibatch 83/83, test error of best model 7.541667 %\n", 1739 | "epoch 53, minibatch 83/83, validation error 7.656250 %" 1740 | ] 1741 | }, 1742 | { 1743 | "output_type": "stream", 1744 | "stream": "stdout", 1745 | "text": [ 1746 | "\n", 1747 | "epoch 54, minibatch 83/83, validation error 7.635417 %" 1748 | ] 1749 | }, 1750 | { 1751 | "output_type": "stream", 1752 | "stream": "stdout", 1753 | "text": [ 1754 | "\n", 1755 | "epoch 54, minibatch 83/83, test error of best model 7.520833 %\n", 1756 | "epoch 55, minibatch 83/83, validation error 7.635417 %" 1757 | ] 1758 | }, 1759 | { 1760 | "output_type": "stream", 1761 | "stream": "stdout", 1762 | "text": [ 1763 | "\n", 1764 | "epoch 56, minibatch 83/83, validation error 7.635417 %" 1765 | ] 1766 | }, 1767 | { 1768 | "output_type": "stream", 1769 | "stream": "stdout", 1770 | "text": [ 1771 | "\n", 1772 | "epoch 57, minibatch 83/83, validation error 7.604167 %" 1773 | ] 1774 | }, 1775 | { 1776 | "output_type": "stream", 1777 | "stream": "stdout", 1778 | "text": [ 1779 | "\n", 1780 | "epoch 57, minibatch 83/83, test error of best model 7.489583 %\n", 1781 | "epoch 58, minibatch 83/83, validation error 7.583333 %" 1782 | ] 1783 | }, 1784 | { 1785 | "output_type": "stream", 1786 | "stream": "stdout", 1787 | "text": [ 1788 | "\n", 1789 | "epoch 58, minibatch 83/83, test error of best model 7.458333 %\n", 1790 | "epoch 59, minibatch 83/83, validation error 7.572917 %" 1791 | ] 1792 | }, 1793 | { 1794 | "output_type": "stream", 1795 | "stream": "stdout", 1796 | "text": [ 1797 | "\n", 1798 | "epoch 59, minibatch 83/83, test error of best model 7.468750 %\n", 1799 | "epoch 60, minibatch 83/83, validation error 7.572917 %" 1800 | ] 1801 | }, 1802 | { 1803 | "output_type": "stream", 1804 | "stream": "stdout", 1805 | "text": [ 1806 | "\n", 1807 | "epoch 61, minibatch 83/83, validation error 7.583333 %" 1808 | ] 1809 | }, 1810 | { 1811 | "output_type": "stream", 1812 | "stream": "stdout", 1813 | "text": [ 1814 | "\n", 1815 | "epoch 62, minibatch 83/83, validation error 7.572917 %" 1816 | ] 1817 | }, 1818 | { 1819 | "output_type": "stream", 1820 | "stream": "stdout", 1821 | "text": [ 1822 | "\n", 1823 | "epoch 63, minibatch 83/83, validation error 7.562500 %" 1824 | ] 1825 | }, 1826 | { 1827 | "output_type": "stream", 1828 | "stream": "stdout", 1829 | "text": [ 1830 | "\n", 1831 | "epoch 63, minibatch 83/83, test error of best model 7.510417 %\n", 1832 | "epoch 64, minibatch 83/83, validation error 7.572917 %" 1833 | ] 1834 | }, 1835 | { 1836 | "output_type": "stream", 1837 | "stream": "stdout", 1838 | "text": [ 1839 | "\n", 1840 | "epoch 65, minibatch 83/83, validation error 7.562500 %" 1841 | ] 1842 | }, 1843 | { 1844 | "output_type": "stream", 1845 | "stream": "stdout", 1846 | "text": [ 1847 | "\n", 1848 | "epoch 66, minibatch 83/83, validation error 7.552083 %" 1849 | ] 1850 | }, 1851 | { 1852 | "output_type": "stream", 1853 | "stream": "stdout", 1854 | "text": [ 1855 | "\n", 1856 | "epoch 66, minibatch 83/83, test error of best model 7.520833 %\n", 1857 | "epoch 67, minibatch 83/83, validation error 7.552083 %" 1858 | ] 1859 | }, 1860 | { 1861 | "output_type": "stream", 1862 | "stream": "stdout", 1863 | "text": [ 1864 | "\n", 1865 | "epoch 68, minibatch 83/83, validation error 7.531250 %" 1866 | ] 1867 | }, 1868 | { 1869 | "output_type": "stream", 1870 | "stream": "stdout", 1871 | "text": [ 1872 | "\n", 1873 | "epoch 68, minibatch 83/83, test error of best model 7.520833 %\n", 1874 | "epoch 69, minibatch 83/83, validation error 7.531250 %" 1875 | ] 1876 | }, 1877 | { 1878 | "output_type": "stream", 1879 | "stream": "stdout", 1880 | "text": [ 1881 | "\n", 1882 | "epoch 70, minibatch 83/83, validation error 7.510417 %" 1883 | ] 1884 | }, 1885 | { 1886 | "output_type": "stream", 1887 | "stream": "stdout", 1888 | "text": [ 1889 | "\n", 1890 | "epoch 70, minibatch 83/83, test error of best model 7.500000 %\n", 1891 | "epoch 71, minibatch 83/83, validation error 7.520833 %" 1892 | ] 1893 | }, 1894 | { 1895 | "output_type": "stream", 1896 | "stream": "stdout", 1897 | "text": [ 1898 | "\n", 1899 | "epoch 72, minibatch 83/83, validation error 7.510417 %" 1900 | ] 1901 | }, 1902 | { 1903 | "output_type": "stream", 1904 | "stream": "stdout", 1905 | "text": [ 1906 | "\n", 1907 | "epoch 72, minibatch 83/83, test error of best model 7.510417 %\n", 1908 | "epoch 73, minibatch 83/83, validation error 7.500000 %" 1909 | ] 1910 | }, 1911 | { 1912 | "output_type": "stream", 1913 | "stream": "stdout", 1914 | "text": [ 1915 | "\n", 1916 | "epoch 73, minibatch 83/83, test error of best model 7.489583 %\n", 1917 | "Optimization complete with best validation score of 7.500000 %, with test performance 7.489583 %" 1918 | ] 1919 | }, 1920 | { 1921 | "output_type": "stream", 1922 | "stream": "stdout", 1923 | "text": [ 1924 | "\n", 1925 | "The code run for 74 epochs, with 1.748854 epochs/sec\n" 1926 | ] 1927 | }, 1928 | { 1929 | "output_type": "stream", 1930 | "stream": "stderr", 1931 | "text": [ 1932 | "The code for file __file__ ran for 42.3s\n" 1933 | ] 1934 | } 1935 | ], 1936 | "prompt_number": 6 1937 | }, 1938 | { 1939 | "cell_type": "code", 1940 | "collapsed": false, 1941 | "input": [], 1942 | "language": "python", 1943 | "metadata": {}, 1944 | "outputs": [] 1945 | } 1946 | ], 1947 | "metadata": {} 1948 | } 1949 | ] 1950 | } --------------------------------------------------------------------------------