├── .travis.yml ├── LICENSE.txt ├── README.md ├── TODO.txt └── code ├── MNISTdigits.py ├── PCA.py ├── checkgpuspeed.py ├── emotions.py ├── haarcascade_frontalface_default.xml ├── lib ├── __init__.py ├── activationfunctions.py ├── batchtrainer.py ├── cnnLayers.py ├── common.py ├── convNet.py ├── debug.py ├── deepbelief.py ├── restrictedBoltzmannMachine.py ├── trainingoptions.py └── utils.py ├── maxoutMain.py ├── old-version ├── MNISTdigits.py ├── PCA.py ├── common.py ├── deepbelief.py ├── readmnist.py ├── restrictedBoltzmannMachine.py └── utils.py ├── read ├── __init__.py ├── facedetection.py ├── readfacedatabases.py └── readmnist.py ├── similarity ├── similarity.py ├── similarityMain.py └── similarityUtils.py ├── spearmint-configs ├── adverserial-training │ ├── adversarial.py │ └── config.pb └── dbnmnist │ ├── config.pb │ └── mnistdbn.py ├── tests.py └── webcam-emotion-recognition ├── Readme.md ├── emotionrecognition.py ├── face_detection.py ├── haarcascade_frontalface_default.xml ├── icon_39345withoutalpha.png ├── icon_4895withoutalpha.png ├── icon_6231withoutalpha.png ├── ignoreoutput.py ├── mihaela-happy-sad.p ├── train-emotion-net.py └── webcam-emotions.py /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | 3 | language: python 4 | 5 | services: 6 | - docker 7 | 8 | before_install: 9 | - docker pull mihaelacr/pydeeplearn-labeled 10 | - docker run -d -p 127.0.0.1:80:4567 mihaelacr/pydeeplearn-labeled /bin/sh -c "cd pydeeplearn/code; python MNISTdigits.py --trainSize 100 --testSize 10 --db --train --rbmnesterov test.p --validation --save --save_best_weights --preTrainEpochs 0;" 11 | - docker ps -a 12 | - docker run mihaelacr/pydeeplearn-labeled /bin/sh -c "cd pydeeplearn/code; python MNISTdigits.py --trainSize 100 --testSize 10 --db --train --rbmnesterov test.p --validation --save --save_best_weights --preTrainEpochs 0;" 13 | 14 | script: 15 | - docker run mihaelacr/pydeeplearn-labeled /bin/sh -c "cd pydeeplearn/code; python MNISTdigits.py --trainSize 100 --testSize 10 --db --train --rbmnesterov test.p --validation --save --save_best_weights --preTrainEpochs 0;" 16 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013-2014, Mihaela Rosca 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | 10 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pydeeplearn [![Build Status](https://travis-ci.org/mihaelacr/pydeeplearn.png)](https://travis-ci.org/mihaelacr/pydeeplearn) 2 | =========== 3 | 4 | Library for deep belief nets and rbms and convolutional neural networks. Provides code for dropout, rmsprop, momentum, rectified linear units, sparsity constraints, weight decay, adversarial training, etc. Runs on GPUs for high performance by using [theano](http://deeplearning.net/software/theano/). Provides code and a demo for live emotion detection from the webcam. 5 | 6 | 7 | For more details see the [report](http://www.doc.ic.ac.uk/teaching/distinguished-projects/2014/mrosca.pdf) of the project which used this code and the [ poster](http://elarosca.net/poster.pdf) submitted for the participation to the SET awards. The project was made as part of the requirements for a master degree at Imperial College London and received a [prize of excellence](http://www3.imperial.ac.uk/computing/teaching/ug/ug-distinguished-projects). 8 | 9 | One of the key points of this implementation and API is that they do not impose theano on the user. While theano is a great tool it has quite a steep learning curve so I decided to take advantage of its power without imposing that the user learns theano. Hence all the interface code just requires python and basic numpy knowledge. To the best of my knowledge this is the only library with these features. 10 | 11 | The API provided by DBNs is compatible with scikit learn so you can use all the functionality from that library in conjuction with my implementation. 12 | 13 | In case you use my code for a paper, study or project please cite my report and if you can, add a link to this repository: 14 | 15 | ``` 16 | @article{rosca2014networks, 17 | title={Networks with emotions}, 18 | author={Rosca, Mihaela}, 19 | year={2014} 20 | } 21 | ``` 22 | 23 | ## Live emotion recognition from the webcam 24 | I used pydeeplearn and openCV to make an application which detects emotions live from the webcam stream. You can see a demo video of me fooling around at the camera here: http://elarosca.net/video.ogv 25 | 26 | If you want to try out the `code/webcam-emotion-recognition/` to check out the demo for yourself! The instructions in the [readme](https://github.com/mihaelacr/pydeeplearn/blob/master/code/webcam-emotion-recognition/Readme.md) will show you how to run the code. I also uploaded a trained network so that you do not have to run the training code to try it out. However, if you want to maximize results for yourself you might want to train a network from your own dataset. I have provided code to do that as well. 27 | 28 | ## User guide 29 | * The library is in `code/lib/`. You can find there the implementations of RBMs, CNNs and DBNs. 30 | * Multiple examples on how to use RBMs, DBNs and CNNs are in `code/MNISTdigits.py` and `code/emotions.py` 31 | * The code that implements a network which determines the similarity between two inputs is in `code/similarity` 32 | * The code for the webcam demo (includes training a network from the webcam data) can be found in `code/webcam-emotion-recognition/` 33 | * The old code that is not based on theano but only numpy is in `code/old-version`. This code is incomplete. Do not use it. It is there for educational purposes because it is easier to understand how to implement RBMs and DBNs without theano. 34 | * If you are a beginner in deep learning, please check out my [report](http://elarosca.net/report.pdf). It explains the foundation behind the concepts used in this library. 35 | * If you still have questions, pop me an email or a message. 36 | 37 | ## New dataset 38 | 39 | There is a new dataset of faces available: [Labeled Faces in the Wild](http://vis-www.cs.umass.edu/lfw/). Would love for someone to try pydeeplearn on that dataset!! Let me know if you are interested :) 40 | 41 | ## Docker container 42 | If you do not want to go trough all the hurdle of installing the dependencies needed for pydeeplearn then you can just use the docker container found at on [docker hub](https://hub.docker.com/r/mihaelacr/pydeeplearn-labeled/). 43 | The docker container comes with the MNIST digits so you do not have to download the files yourself. 44 | 45 | For GPU usage, I suggest using the [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) wrapper to ensure that docker works with the GPU. 46 | 47 | These instructions should get you up and running with `pydeeplearn`: 48 | ``` 49 | git clone https://github.com/NVIDIA/nvidia-docker.git 50 | cd nvidia-docker 51 | GPU=0 ./nvidia-docker run --rm -it mihaelacr/pydeeplearn-labeled bash 52 | cd pydeeplearn/code 53 | ``` 54 | 55 | Check that pydeeplearn works: 56 | ``` 57 | THEANO_FLAGS='device=gpu,floatX=float32' PATH=/usr/local/cuda/bin:$PATH LD_LIBRARY_PATH=/usr/local/cuda/lib64 python MNISTdigits.py --trainSize 1000 --testSize 10 --db --train --rbmnesterov test.p --save 58 | ``` 59 | 60 | If you just want to check that theano works with the GPU then just try: 61 | ``` 62 | THEANO_FLAGS='device=gpu,floatX=float32' PATH=/usr/local/cuda/bin:$PATH LD_LIBRARY_PATH=/usr/local/cuda/lib64 python -c 'import theano' 63 | ``` 64 | This should print something like `Using GPU device...`. 65 | 66 | ## Key features 67 | 68 | ### Network types 69 | * RBM 70 | * DBN 71 | * CNN 72 | * ANN (trough the dbn.py implementation by setting preTrainEpochs=0 in the constructor) 73 | * similarity networks (with RBM siamese networks) 74 | 75 | 76 | ### Training tricks supported 77 | * early stopping 78 | * simple momentum 79 | * Nesterov momentum 80 | * dropout (for the hidden and visible layer) 81 | * adversarial training 82 | * rmsprop 83 | * scaling the learning rate by momentum 84 | * multiple activation functions (and with ease we can support more) 85 | * integration with bayesian optimization framework for hyperparamter optimization (spearmint) 86 | * multiple hidden unit types (binary, real valued) 87 | 88 | Supported image preprocessing: 89 | * histogram equalization (using openCV) 90 | * face cropping (for face images, using openCV) 91 | 92 | ## Future and current work 93 | For the future plans that I have for the library please see the TODO.txt file. Note that currently pydeeplearn is a side project for me and some features might take some time to implement. 94 | 95 | If you want a feature implemented, please either send a pull request or let me know. I will do my best to get it up and running for you. 96 | 97 | ### Webcam demo 98 | You can do live emotion recongnition from the webcam with the code in `code/webcam-emotion-recognition/`. Note that you can replace the `pydeeplearn` classifier with another classifier, in case you wish to do so. 99 | 100 | ## Running examples 101 | 102 | ### MNIST 103 | In order to be able to use the MNIST digits examples, you have to first get the data from the [official website](http://yann.lecun.com/exdb/mnist/). The code/MNISTdigits.py script reads the data using the --path argument, so you must set that argument to point to the directory in which you now have the data. In order to see the different options available for training and testing a network for digit recognition, see the possible flags in MNISTdigits.py. Note that an unnamed containing a file for the stored network is required as a final argument: if training is performed (decided with the --train flag), the resulting network will be stored in that file, and if no training is performed, a network is assumed to be already stored in that file and will be retrivied using pickle. 104 | 105 | Example run: 106 | 107 | `python MNISTdigits.py --trainSize 60000 --testSize 10000 --nesterov --rbmnesterov --maxEpochs 1000 --miniBatchSize 20 --rmsprop network.p ` 108 | 109 | ### Emotion recognition 110 | The script in code/emotions.py contains code on how to do emotion recognition from images using deep belief networks. The code there uses multiple datasets: Jaffe, Chon-Kanade, MultiPie and other other unlabelled datasets. While Jaffe and Chon-Kanade are publically available, the MultiPie dataset is available only via purchase. Some code also hadndles the data available in a previous Kaggle competition (data can be found [here](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data). Note that I did not spend much time optimizing hyperparameters for the Kaggle compeition data, so if you are interested in obtaining better results you might start from using spearmint to obtain good values for the hyperparameters. 111 | 112 | ### Similarity detection using siamese networks 113 | You can train a network to detect if two images contain represent the same person or the same emotion using code in `code/similarity`. Note that in order to be able to train such a network, labelled data (with subjects or emotions) is needed. Most of the labelled data that I used for these experiments was taken from the MultiPie dataset. The code can be run as follows: 114 | 115 | `python similarityMain.py --diffsubjects --relu --epochs 90 --rbmepochs 10` 116 | 117 | ## Cloning the repo 118 | By now `pydeeplearn` has a big git history. If you do not want to get all of it (and you probably do not need it) use: 119 | 120 | `git clone https://github.com/mihaelacr/pydeeplearn.git --depth 1` 121 | 122 | ## How to install dependencies 123 | 124 | 1. Create a python virtualenv 125 | 126 | 2. Clone numpy 127 | 128 | `git clone https://github.com/numpy/numpy` 129 | 130 | 3. Optional: setup numpy to work with OpenBlas 131 | `git clone git://github.com/xianyi/OpenBLAS` 132 | `cd OpenBLAS && make FC=gfortran` 133 | `sudo make PREFIX=prefixpath install` 134 | `ldconfig (requires sudo if prefixpath is a global path)` 135 | 136 | in the directory numpy was downloaded: 137 | `vim site.cfg` 138 | set the contents to: 139 | 140 | `[atlas]` 141 | `atlas_libs = openblas` 142 | `library_dirs = prefixpath/lib` 143 | 144 | or for numpy 19.dev 145 | 146 | ` [openblas]` 147 | `libraries = openblas` 148 | `library_dirs = prefixpath/lib` 149 | `include_dirs = prefixpath/include` 150 | 151 | 4. `python setup.py config` to ensure everything is OK 152 | 5. `python setup.py build && python setup.py install` 153 | 6. `pip install --upgrade scikit-learn` 154 | 7. `pip install --upgrade cython` 155 | 8. `pip install --upgrade scikit-image` 156 | 9. `pip install theano` 157 | 10. install opencv for python (try the latest version) [for opencv3.0.0-dev](http://docs.opencv.org/trunk/doc/tutorials/introduction/linux_install/linux_install.html) 158 | 159 | 11. install matplotlib 160 | `easy_install -m matplotlib` 161 | 12. install sklearn 162 | 163 | See the instructions [here](http://scikit-learn.org/stable/install.html) 164 | 165 | ## Set up 166 | 167 | When running a pydeeplearn program you might have to set up some environment variables, depending on your configuration. If want to use the GPU for training/testing a model, you have to ensure that theano knows where your CUDA installation is (for detailed instructions see below). 168 | 169 | ### Setting up theano to work on the GPU 170 | 171 | `PATH` needs to contain the path to nvcc (usually `/usr/local/cuda/bin`) 172 | 173 | `CUDA_PATH` needs to contain the path to cuda (usually `/usr/local/cuda/`) 174 | 175 | `LD_LIBRARY_PATH` needs to contain the linker libraries for cuda (`/usr/local/cuda/lib64`) 176 | 177 | `LIBRARY_PATH` needs to contain the path to the nvidia driver (something like `/usr/lib/nvidia-331`) 178 | 179 | 180 | If you are not configuring theano globally (in /home/user/.theano), then you have to set up the THEANO_FLAGS variable: 181 | 182 | `export THEANO_FLAGS='floatX=float32,device=gpu0,nvcc.fastmath=True'` 183 | 184 | ### Setting up numpy/ theano to work with openblas 185 | 186 | `LD_LIBRARY_PATH` needs to contain the path to openblas (for numpy to find it: this is the prefix path you chose at step 3 in the installation instructions above) and the path to OpenCV in case it was not installed globally 187 | 188 | `LIBRARY_PATH` needs to contain the path to openblas (for numpy to find it: this is the prefix path you chose at step 3 in the installation instructions above) and the path to OpenCV in case it was not installed globally 189 | 190 | 191 | ## Acknowledgements 192 | 193 | I would like to thank the Department of Computing at Imperial College and Prof. Abbas Edalat for their supoprt during my thesis and for allowing me to continue with my experiments on lab equipment after graduation. 194 | -------------------------------------------------------------------------------- /TODO.txt: -------------------------------------------------------------------------------- 1 | 1. Batch normalization 2 | 2. LSTM implementation 3 | 3. Refactor the deep belief net code to also use layers: this makes it easy to add fully connected layers to conv nets and to delete the ann implementation, by merging it with the dbn implementation. 4 | 4. Consider using fuel for input data 5 | 5. Spearmint: do not use the maxEpochs as something that you optimize using spearmint 6 | 7 | Debugging wise/theano: 8 | 6. Use theano nanguardmode for debugging nans in theano. 9 | 7. Use test_value for theano tensor values to ensure that you get shape errors at compile time 10 | -------------------------------------------------------------------------------- /code/PCA.py: -------------------------------------------------------------------------------- 1 | __author__ = "Mihaela Rosca" 2 | __contact__ = "mihaela.c.rosca@gmail.com" 3 | 4 | import heapq 5 | import matplotlib 6 | import numpy 7 | import os 8 | import scipy 9 | import scipy.linalg 10 | import warnings 11 | 12 | from os.path import isfile, join 13 | from scipy import misc 14 | 15 | # Import all common functions 16 | from lib.common import * 17 | 18 | import matplotlib 19 | import os 20 | havedisplay = "DISPLAY" in os.environ 21 | if not havedisplay: 22 | exitval = os.system('python -c "import matplotlib.pyplot as plt; plt.figure()"') 23 | havedisplay = (exitval == 0) 24 | if havedisplay: 25 | import matplotlib.pyplot as plt 26 | else: 27 | matplotlib.use('Agg') 28 | import matplotlib.pyplot as plt 29 | 30 | # The directory path to the images 31 | PICTURE_PATH = "/pics/cambrdige_pics/" 32 | 33 | # The current directory where the script is ran 34 | currentDir = os.path.dirname(os.path.abspath(__file__)) 35 | 36 | """ 37 | Converts the data to zero mean data. 38 | """ 39 | def convertDataToZeroMean(data): 40 | mean = scipy.mean(data, axis=0) 41 | 42 | # Step2: Substract the mean of it's column from every element 43 | rows, cols = data.shape 44 | zeroMean = numpy.zeros((rows, cols)) 45 | for i in xrange(rows): 46 | zeroMean[i] = data[i] - mean 47 | 48 | assert zeroMean.shape == data.shape 49 | 50 | return mean, zeroMean 51 | 52 | 53 | """ 54 | Uses a heuristic to evaluate how many dimensions should the data be reduced 55 | to. 56 | 57 | Arguments: 58 | eigenValues: 59 | The eigen values of the covariance matrix, or numbers proportional to them. 60 | Should be a numpy 1-D array. 61 | Returns: 62 | The dimension the data should be reduced to. 63 | """ 64 | def dimensionFromEigenIndividualVariance(eigenValues): 65 | threshold = 0.01 66 | dimension = 0 67 | 68 | s = numpy.sum(eigenValues) 69 | print "sum eigen" + str(s) 70 | 71 | for eigen in eigenValues: 72 | r = eigen / s 73 | if r > threshold: 74 | dimension += 1 75 | 76 | return dimension 77 | 78 | # requires the eigen values to be sorted before 79 | def dimensionFromEigenTotalVariance(eigenValues): 80 | threshold = 0.95 81 | dimension = 0 82 | 83 | s = numpy.sum(eigenValues) 84 | print "sum eigen" + str(s) 85 | current = 0 86 | for eigen in eigenValues: 87 | r = (eigen / s) 88 | current += r 89 | if current >= threshold: 90 | break 91 | dimension += 1 92 | 93 | return dimension 94 | 95 | 96 | """ 97 | This method uses the Karhunen Lowe transform to fastly compute the 98 | eigen vaues of the data. 99 | 100 | It is faster than the SVD method below, but can be prone to floating point 101 | errors more than the SVD one. 102 | Arguments: 103 | train: 104 | Numpy array of arrays 105 | dimension: the dimension to which to reduce the size of the data set. 106 | 107 | Returns: 108 | The principal components of the data. 109 | """ 110 | # Returns the principal components of the given training 111 | # data by commputing the principal eigen vectors of the 112 | # covariance matrix of the data 113 | def pca(train, dimension): 114 | # Use the Karhunen Lowe transform to fastly compute 115 | # the principal components. 116 | rows, cols = train.shape 117 | # Step1: Get the mean of each column of the data 118 | # Ie create the average image 119 | mean, u = convertDataToZeroMean(train) 120 | 121 | # Step2: Compute the eigen values of the U * U^T matrix 122 | # the size of U * U^T is rows * rows (ie the number of data points you have 123 | # in your training) 124 | eigVals, eigVecs = scipy.linalg.eig(u.dot(u.T)) 125 | 126 | # Step3: Compute the eigen values of U^T*U from the eigen values of U * U^T 127 | bigEigVecs = numpy.zeros((rows, cols)) 128 | for i in xrange(rows): 129 | bigEigVecs[i] = u.T.dot(eigVecs[:, i]) 130 | 131 | # Step 4: Normalize the eigen vectors to get orthonormal components 132 | bigEigVecs = map(lambda x: x / scipy.linalg.norm(x), bigEigVecs) 133 | 134 | eigValsBigVecs = zip(eigVals, bigEigVecs) 135 | sortedEigValsBigVecs = sorted(eigValsBigVecs, key=lambda x : x[0], reverse=True) 136 | 137 | index = 0 138 | result = [] 139 | if dimension == None: 140 | # Get the eigen values 141 | # Note that these are not the eigen values of the covariance matrix 142 | # but the eigen values of U * U ^T 143 | # however, this is fine because they just differ by a factor 144 | # so the ratio between eigen values will be preserved 145 | eigenValues = map(lambda x : x[0], sortedEigValsBigVecs) 146 | dimension = dimensionFromEigenTotalVariance(eigenValues) 147 | print "Using PCA dimension " + str(dimension) 148 | 149 | 150 | for eigVal, vector in sortedEigValsBigVecs: 151 | if index >= dimension: 152 | break 153 | 154 | if eigVal <=0: 155 | print "Warning: Non-positive eigen value" 156 | 157 | result += [vector] 158 | index = index + 1 159 | 160 | return mean, result 161 | 162 | """ 163 | Arguments: 164 | train: 165 | Numpy array of arrays 166 | dimension: the dimension to which to reduce the size of the data set. 167 | 168 | Returns: 169 | The principal components of the data. 170 | 171 | This method should be preferred over the above: it is well known that the 172 | SVD methods are more stable than the ones that require the computation of 173 | the eigen values and eigen vectors. 174 | For more detail see: 175 | http://math.stackexchange.com/questions/3869/what-is-the-intuitive-relationship-between-svd-and-pca 176 | """ 177 | def pcaWithSVD(train, dimension=None): 178 | mean, zeroMean = convertDataToZeroMean(train) 179 | 180 | # SVD guaranteed that the singular values are in non-increasing order 181 | # this means that the u's are already ordered as required, according 182 | # to the magnitute of the eigen values 183 | u, s, vh = scipy.linalg.svd(zeroMean) 184 | 185 | if dimension == None: 186 | # Get the eigen values from the singular values 187 | eigenValues = s ** 2; 188 | dimension = dimensionFromEigenTotalVariance(eigenValues) 189 | print "Using PCA dimension " + str(dimension) 190 | 191 | return mean, vh[0:dimension-1] 192 | 193 | """ 194 | Arguments: 195 | pcaMethod: a method to use for PCA. 196 | images: A python list of images that have to be of the same size. 197 | dimension: the dimension to which to reduce the size of the data set. 198 | Returns: 199 | A tuple: 200 | The first element of the tuple is formed from the eigen faces of given 201 | images. 202 | The second element of the tuple if formed from the vector version of the 203 | eigen faces. This is kept for optimization reasons. 204 | """ 205 | def getEigenFaces(pcaMethod, images, dimension=None): 206 | 207 | imgSize = images[0].shape; 208 | # this call should not be here: the code should assume that the images have 209 | # been transofrmed to vectors before 210 | imgs = imagesToVectors(images) 211 | 212 | vectors = pcaMethod(imgs, dimension) 213 | eigenFaces = map(lambda x: vectorToImage(x, imgSize), vectors) 214 | 215 | return (eigenFaces, vectors) 216 | 217 | 218 | def reduce(principalComponents, vectors, mean, noSame=True): 219 | assert len(principalComponents) > 0 220 | 221 | print principalComponents[0].shape 222 | 223 | principalComponents = np.array(principalComponents) 224 | 225 | vectors = vectors - mean[np.newaxis, :] 226 | lowDimRepresentation = np.dot(vectors, principalComponents.T) 227 | if not noSame: 228 | # lowDimRepresentation = map(lambda x : vectors.dot(x), principalComponents) 229 | # sameDimRepresentation = \ 230 | # sum([ x * y for x, y in zip(principalComponents, lowDimRepresentation)]) 231 | # TODO: do this with einsum 232 | sameDimRepresentation = lowDimRepresentation[:, np.newaxis] * principalComponents.T 233 | sameDimRepresentation = sameDimRepresentation.sum(axis=2) 234 | sameDimRepresentation += mean[np.newaxis, :] 235 | # TODO: create the proper thing here so that you can 236 | # easily see what the ouput is 237 | else: 238 | sameDimRepresentation = None 239 | return (lowDimRepresentation, sameDimRepresentation) 240 | 241 | 242 | """ 243 | Reduces a 2D image represented by a numpy 2D array of integer values(pixels) 244 | to a lower dimension, dictated by the number of principal components. 245 | """ 246 | def reduceImageToLowerDimensions(principalComponents, image2D): 247 | assert len(principalComponents) > 0 248 | 249 | size = principalComponents[0].shape 250 | vector = vectorToImage(image2D, size) 251 | 252 | lowDimRepresentation = map(lambda x : x.T.dot(vector), principalComponents) 253 | sameDimRepresentation = \ 254 | sum([ x * y for x, y in zip(principalComponents, lowDimRepresentation)]) 255 | return (lowDimRepresentation, sameDimRepresentation) 256 | 257 | 258 | def main(): 259 | # Load all the image files in the current directory 260 | picFiles = [] 261 | path = currentDir + PICTURE_PATH 262 | for root, dirs, files in os.walk(path): 263 | if root != path: 264 | picFiles += map(lambda x: os.path.join(root, x), files) 265 | 266 | print len(picFiles) 267 | 268 | imgs = map(lambda x: misc.imread(x, flatten=True), picFiles) 269 | 270 | eigenFaces, principalComponents = getEigenFaces(pca, imgs) 271 | # plt.imshow(eigenFaces[0], cmap=plt.cm.gray) 272 | # plt.show() 273 | 274 | lowDimRepresentation, sameDimRepresentation = \ 275 | reduceImageToLowerDimensions(principalComponents, imgs[0]) 276 | 277 | plt.imshow(imgs[0], cmap=plt.cm.gray) 278 | plt.show() 279 | 280 | image2D = vectorToImage(sameDimRepresentation, imgs[0].shape) 281 | plt.imshow(image2D, cmap=plt.cm.gray) 282 | plt.show() 283 | print "done" 284 | 285 | 286 | 287 | if __name__ == '__main__': 288 | main() -------------------------------------------------------------------------------- /code/checkgpuspeed.py: -------------------------------------------------------------------------------- 1 | """ Use this file to check the speed difference between a big matrix multiplication 2 | performed on the GPU or on the CPU. 3 | """ 4 | 5 | import theano 6 | import theano.tensor as T 7 | from theano import function, shared 8 | import numpy as np 9 | 10 | 11 | import time 12 | x = T.matrix('x', dtype=theano.config.floatX) 13 | y = T.matrix('y', dtype=theano.config.floatX) 14 | 15 | sc = shared(np.zeros((10, 10), dtype = theano.config.floatX), name='sc') 16 | 17 | mydot = function( [x,y], updates=( (sc, T.dot(x,y)), )) 18 | 19 | # We need to declare the variables shared to run on GPU 20 | a = np.ones((20000, 20000), dtype = theano.config.floatX) * 40.0 21 | b = np.ones((20000, 20000), dtype = theano.config.floatX) * 23.0 22 | print "go" 23 | 24 | before = time.time() 25 | mydot(a,b) 26 | print time.time() - before 27 | 28 | print sc.get_value().sum() 29 | 30 | -------------------------------------------------------------------------------- /code/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mihaelacr/pydeeplearn/48c6df3f3f854195b7f8b830b9f70fac19acdc9a/code/lib/__init__.py -------------------------------------------------------------------------------- /code/lib/activationfunctions.py: -------------------------------------------------------------------------------- 1 | """ This file defines activation functions that can be used with the nets in pydeeplearn""" 2 | 3 | __author__ = "Mihaela Rosca" 4 | __contact__ = "mihaela.c.rosca@gmail.com" 5 | 6 | from theano import tensor as T 7 | from theano.tensor.shared_randomstreams import RandomStreams 8 | 9 | import theano 10 | import numpy as np 11 | 12 | 13 | theanoFloat = theano.config.floatX 14 | 15 | class ActivationFunction(object): 16 | 17 | def __getstate__(self): 18 | odict = self.__dict__.copy() # copy the dict since we change it 19 | if 'theanoGenerator' in odict: 20 | del odict['theanoGenerator'] 21 | return odict 22 | 23 | def __setstate__(self, dict): 24 | self.__dict__.update(dict) # update attributes 25 | 26 | def __getinitargs__(): 27 | return None 28 | 29 | 30 | class Sigmoid(ActivationFunction): 31 | 32 | def __init__(self): 33 | self.theanoGenerator = RandomStreams(seed=np.random.randint(1, 1000)) 34 | 35 | def nonDeterminstic(self, x): 36 | val = self.deterministic(x) 37 | return self.theanoGenerator.binomial(size=val.shape, 38 | n=1, p=val, 39 | dtype=theanoFloat) 40 | 41 | def deterministic(self, x): 42 | return T.nnet.sigmoid(x) 43 | 44 | 45 | def activationProbablity(self, x): 46 | return T.nnet.sigmoid(x) 47 | 48 | class Rectified(ActivationFunction): 49 | 50 | def __init__(self): 51 | pass 52 | 53 | def nonDeterminstic(self, x): 54 | return self.deterministic(x) 55 | 56 | def deterministic(self, x): 57 | return x * (x > 0.0) 58 | 59 | class RectifiedNoisy(ActivationFunction): 60 | 61 | def __init__(self): 62 | self.theanoGenerator = RandomStreams(seed=np.random.randint(1, 1000)) 63 | 64 | def nonDeterminstic(self, x): 65 | x += self.theanoGenerator.normal(avg=0.0, std=(T.sqrt(T.nnet.sigmoid(x)) + 1e-8)) 66 | return x * (x > 0.0) 67 | 68 | def deterministic(self, x): 69 | return expectedValueRectified(x, T.nnet.sigmoid(x) + 1e-08) 70 | 71 | def activationProbablity(self, x): 72 | return 1.0 - cdf(0, miu=x, variance=T.nnet.sigmoid(x)) 73 | 74 | class RectifiedNoisyVar1(ActivationFunction): 75 | 76 | def __init__(self): 77 | self.theanoGenerator = RandomStreams(seed=np.random.randint(1, 1000)) 78 | 79 | def nonDeterminstic(self, x): 80 | x += self.theanoGenerator.normal(avg=0.0, std=1.0) 81 | return x * (x > 0.0) 82 | 83 | def deterministic(self, x): 84 | return expectedValueRectified(x, 1.0) 85 | 86 | def activationProbablity(self, x): 87 | return 1.0 - cdf(0, miu=x, variance=1.0) 88 | 89 | class Identity(ActivationFunction): 90 | 91 | def deterministic(self, x): 92 | return x 93 | 94 | class Softmax(ActivationFunction): 95 | 96 | def deterministic(self, v): 97 | # Do not use theano's softmax, it is numerically unstable 98 | # and it causes Nans to appear 99 | # Semantically this is the same 100 | e_x = T.exp(v - v.max(axis=1, keepdims=True)) 101 | return e_x / e_x.sum(axis=1, keepdims=True) 102 | 103 | 104 | # TODO: try this for the non deterministic version as well 105 | class CappedRectifiedNoisy(ActivationFunction): 106 | 107 | def __init__(self): 108 | pass 109 | 110 | def nonDeterminstic(self, x): 111 | return self.deterministic(x) 112 | 113 | def deterministic(self, x): 114 | return x * (x > 0.0) * (x < 6.0) 115 | 116 | # TODO 117 | def activationProbablity(self, x): 118 | return None 119 | 120 | 121 | def expectedValueRectified(mean, variance): 122 | std = T.sqrt(variance) 123 | return std / T.sqrt(2.0 * np.pi) * T.exp(- mean**2 / (2.0 * variance)) + mean * cdf(mean / std) 124 | 125 | # Approximation of the cdf of a standard normal 126 | def cdf(x, miu=0.0, variance=1.0): 127 | return 1.0/2 * (1.0 + T.erf((x - miu)/ T.sqrt(2 * variance))) 128 | 129 | -------------------------------------------------------------------------------- /code/lib/batchtrainer.py: -------------------------------------------------------------------------------- 1 | """ A neural network trainer with various options: rmsprop, nerterov, momentum etc. 2 | This trainer is used for multiple types of neural nets: dbns and cnns and it is designed 3 | to be adaptable to other types of networks as well. 4 | """ 5 | 6 | __author__ = "Mihaela Rosca" 7 | __contact__ = "mihaela.c.rosca@gmail.com" 8 | 9 | import numpy as np 10 | import theano 11 | from theano import tensor as T 12 | import common 13 | 14 | import debug 15 | 16 | DEBUG = False 17 | theanoFloat = theano.config.floatX 18 | 19 | class BatchTrainer(object): 20 | """ 21 | Abstract class used to define updates on the parameters of neural networks 22 | during training. 23 | 24 | Subclasses must call the constructor of this class in their constructors, and 25 | have to define their cost function using a method called 'cost'. 26 | 27 | Supports momentum updates and nesterov updates, both with rmsprop 28 | or without (see the TrainingOptions class for more details in the available 29 | training options. Also supports L1 and L2 weight decay. 30 | """ 31 | 32 | def __init__(self, params, weights, training_options): 33 | self.params = params 34 | self.training_options = training_options 35 | self.weights = weights if weights else [] 36 | 37 | # Required for momentum and rmsprop 38 | self.oldUpdates = [] 39 | self.oldMeanSquares = [] 40 | for param in params: 41 | oldDParam = theano.shared(value=np.zeros(shape=param.shape.eval(), 42 | dtype=theanoFloat), 43 | name='oldDParam') 44 | 45 | self.oldUpdates += [oldDParam] 46 | oldMeanSquare = theano.shared(value=np.zeros(shape=param.shape.eval(), 47 | dtype=theanoFloat), 48 | name='oldMeanSquare') 49 | 50 | self.oldMeanSquares += [oldMeanSquare] 51 | 52 | 53 | def trainFixedEpochs(self, x, y, data, labels, maxEpochs): 54 | training_options = self.training_options 55 | trainModel = self._makeTrainFunction(x, y, data, labels) 56 | epochTrainingErrors = [] 57 | nrMiniBatchesTrain = max(data.shape.eval()[1] / training_options.miniBatchSize, 1) 58 | 59 | best_training_error = np.inf 60 | bestWeights = None 61 | bestBiases = None 62 | bestEpoch = 0 63 | 64 | save_best_weights = training_options.save_best_weights 65 | 66 | try: 67 | for epoch in xrange(maxEpochs): 68 | print "epoch " + str(epoch) 69 | momentum = training_options.momentumForEpochFunction(training_options.momentumMax, epoch) 70 | sum_error = 0.0 71 | for batchNr in xrange(nrMiniBatchesTrain): 72 | trainError = trainModel(batchNr, momentum) / training_options.miniBatchSize 73 | sum_error += trainError 74 | 75 | mean_error = sum_error / nrMiniBatchesTrain 76 | if save_best_weights: 77 | if mean_error < best_training_error: 78 | best_training_error = mean_error 79 | # Save the weights which are the best ones 80 | bestWeights = self.weights 81 | bestBiases = self.biases 82 | bestEpoch = epoch 83 | 84 | print "training error " + str(mean_error) 85 | epochTrainingErrors += [mean_error] 86 | except KeyboardInterrupt: 87 | print "you have interrupted training" 88 | print "we will continue testing with the state of the network as it is" 89 | 90 | if save_best_weights: 91 | if bestWeights is not None and bestBiases is not None: 92 | self.weights = bestWeights 93 | self.biases = bestBiases 94 | 95 | print "number of epochs" 96 | print epoch + 1 97 | 98 | def trainWithValidation(self, x, y, data, labels, validationData, validationLabels, 99 | classificationCost, maxEpochs, validation_criteria): 100 | if validation_criteria == "patience": 101 | self._trainModelPatience(x, y, data, labels, validationData, validationLabels, classificationCost, maxEpochs) 102 | elif validation_criteria == "consecutive_decrease": 103 | self._trainLoopWithValidation(x, y, data, labels, validationData, validationLabels, classificationCost, maxEpochs) 104 | else: 105 | raise Exception("unknown validation validation_criteria: " + str(validation_criteria)) 106 | 107 | def _trainLoopWithValidation(self, x, y, data, labels, validationData, validationLabels, 108 | classificationCost, maxEpochs): 109 | lastValidationError = np.inf 110 | consecutive_decrease_error_count = 0.0 111 | epoch = 0 112 | training_options = self.training_options 113 | save_best_weights = training_options.save_best_weights 114 | 115 | miniBatchSize = training_options.miniBatchSize 116 | nrMiniBatchesTrain = max(data.shape.eval()[0] / miniBatchSize, 1) 117 | miniBatchValidateSize = min(validationData.shape.eval()[0], miniBatchSize * 10) 118 | nrMiniBatchesValidate = max(validationData.shape.eval()[0] / miniBatchValidateSize, 1) 119 | 120 | trainModel = self._makeTrainFunction(x, y, data, labels) 121 | validateModel = self._makeValidateModelFunction( 122 | x, y, validationData, validationLabels, classificationCost, miniBatchValidateSize) 123 | trainNoDropout = self._makeValidateModelFunction( 124 | x, y, data, labels, classificationCost, miniBatchSize) 125 | 126 | validationErrors = [] 127 | trainingErrors = [] 128 | trainingErrorsNoDropout = [] 129 | 130 | bestValidationError = np.inf 131 | bestWeights = None 132 | bestBiases = None 133 | bestEpoch = 0 134 | 135 | try: 136 | while epoch < maxEpochs and consecutive_decrease_error_count < 8: 137 | print "epoch " + str(epoch) 138 | 139 | momentum = self.training_options.momentumForEpochFunction(training_options.momentumMax, epoch) 140 | sumErrors = 0.0 141 | sumErrorsNoDropout = 0.0 142 | for batchNr in xrange(nrMiniBatchesTrain): 143 | sumErrors += trainModel(batchNr, momentum) / miniBatchSize 144 | sumErrorsNoDropout += trainNoDropout(batchNr) / miniBatchSize 145 | 146 | trainingErrors += [sumErrors / nrMiniBatchesTrain] 147 | trainingErrorsNoDropout += [sumErrorsNoDropout / nrMiniBatchesTrain] 148 | 149 | meanValidations = map(validateModel, xrange(nrMiniBatchesValidate)) 150 | meanValidationError = sum(meanValidations) / len(meanValidations) 151 | validationErrors += [meanValidationError] 152 | 153 | if save_best_weights: 154 | if meanValidationError < bestValidationError: 155 | bestValidationError = meanValidationError 156 | # Save the weights which are the best ones 157 | bestWeights = self.weights 158 | bestBiases = self.biases 159 | bestEpoch = epoch 160 | 161 | consecutive_decrease_error_count = consecutive_decrease_error_count + 1 if meanValidationError > lastValidationError else 0 162 | lastValidationError = meanValidationError 163 | epoch += 1 164 | 165 | except KeyboardInterrupt: 166 | print "you have interrupted training" 167 | print "we will continue testing with the state of the network as it is" 168 | 169 | # TODO: flag for plotting 170 | common.plotTrainingAndValidationErros(trainingErrors, validationErrors) 171 | common.plotTrainingAndValidationErros(trainingErrorsNoDropout, validationErrors) 172 | 173 | print "number of epochs" 174 | print epoch + 1 175 | 176 | 177 | def _trainModelPatience(self, x, y, data, labels, validationData, validationLabels, 178 | classificationCost, maxEpochs): 179 | training_options = self.training_options 180 | save_best_weights = training_options.save_best_weights 181 | 182 | miniBatchSize = training_options.miniBatchSize 183 | nrMiniBatchesTrain = max(data.shape.eval()[0] / miniBatchSize, 1) 184 | miniBatchValidateSize = min(validationData.shape.eval()[0], miniBatchSize * 10) 185 | nrMiniBatchesValidate = max(validationData.shape.eval()[0] / miniBatchValidateSize, 1) 186 | 187 | trainModel = self._makeTrainFunction(x, y, data, labels) 188 | validateModel = self._makeValidateModelFunction( 189 | x, y, validationData, validationLabels, classificationCost, miniBatchValidateSize) 190 | trainNoDropout = self._makeValidateModelFunction( 191 | x, y, data, labels, classificationCost, miniBatchSize) 192 | 193 | epoch = 0 194 | doneTraining = False 195 | patience = 10 * nrMiniBatchesTrain # do at least 10 passes trough the data no matter what 196 | patienceIncrease = 2 # Increase our patience up to patience * patienceIncrease 197 | 198 | bestValidationError = np.inf 199 | bestWeights = None 200 | bestBiases = None 201 | bestEpoch = 0 202 | 203 | validationErrors = [] 204 | trainingErrors = [] 205 | trainingErrorNoDropout = [] 206 | 207 | try: 208 | while (epoch < maxEpochs) and not doneTraining: 209 | # Train the net with all data 210 | print "epoch " + str(epoch) 211 | momentum = training_options.momentumForEpochFunction(training_options.momentumMax, epoch) 212 | 213 | for batchNr in xrange(nrMiniBatchesTrain): 214 | iteration = epoch * nrMiniBatchesTrain + batchNr 215 | trainingErrorBatch = trainModel(batchNr, momentum) / training_options.miniBatchSize 216 | 217 | meanValidations = map(validateModel, xrange(nrMiniBatchesValidate)) 218 | meanValidationError = sum(meanValidations) / len(meanValidations) 219 | 220 | if meanValidationError < bestValidationError: 221 | print "increasing patience, still improving during training..." 222 | patience = max(patience, iteration * patienceIncrease) 223 | bestValidationError = meanValidationError 224 | if save_best_weights: 225 | # Save the weights which are the best ones 226 | bestWeights = self.weights 227 | bestBiases = self.biases 228 | bestEpoch = epoch 229 | 230 | validationErrors += [meanValidationError] 231 | trainingErrors += [trainingErrorBatch] 232 | trainingErrorNoDropout += [trainNoDropout(batchNr)] 233 | 234 | if patience <= iteration: 235 | doneTraining = True 236 | 237 | epoch += 1 238 | except KeyboardInterrupt: 239 | print "you have interrupted training" 240 | print "we will continue testing with the state of the network as it is" 241 | 242 | # TODO: double check 243 | if save_best_weights: 244 | if bestWeights is not None and bestBiases is not None: 245 | self.weights = bestWeights 246 | self.biases = bestBiases 247 | 248 | common. plotTrainingAndValidationErros(trainingErrors, validationErrors) 249 | common.plotTrainingAndValidationErros(trainingErrorNoDropout, validationErrors) 250 | 251 | print "number of epochs" 252 | print epoch 253 | 254 | 255 | # TODO: document cost 256 | def _makeValidateModelFunction(self, x, y, data, labels, cost, miniBatchSize): 257 | miniBatchIndex = T.lscalar() 258 | 259 | return theano.function( 260 | inputs=[miniBatchIndex], 261 | outputs=T.mean(cost(y)), 262 | givens={ 263 | x: data[miniBatchIndex * miniBatchSize:(miniBatchIndex + 1) * miniBatchSize], 264 | y: labels[miniBatchIndex * miniBatchSize:(miniBatchIndex + 1) * miniBatchSize]}) 265 | 266 | 267 | def _makeTrainFunction(self, x, y, data, labels): 268 | error = T.sum(self.cost(y)) 269 | training_options = self.training_options 270 | 271 | for w in self.weights: 272 | error += training_options.weightDecayL1 * T.sum(abs(w)) 273 | error += training_options.weightDecayL2 * T.sum(w ** 2) 274 | 275 | miniBatchIndex = T.lscalar() 276 | momentum = T.fscalar() 277 | 278 | if DEBUG: 279 | mode = theano.compile.MonitorMode(post_func=debug.detect_nan).excluding( 280 | 'local_elemwise_fusion', 'inplace') 281 | else: 282 | mode = None 283 | 284 | if training_options.nesterov: 285 | preDeltaUpdates, updates = self._buildUpdatesNesterov(error, training_options, momentum) 286 | updateParamsWithMomentum = theano.function( 287 | inputs=[momentum], 288 | outputs=[], 289 | updates=preDeltaUpdates, 290 | mode=mode) 291 | 292 | updateParamsWithGradient = theano.function( 293 | inputs =[miniBatchIndex, momentum], 294 | outputs=error, 295 | updates=updates, 296 | givens={ 297 | x: data[miniBatchIndex * training_options.miniBatchSize:(miniBatchIndex + 1) * training_options.miniBatchSize], 298 | y: labels[miniBatchIndex * training_options.miniBatchSize:(miniBatchIndex + 1) * training_options.miniBatchSize]}, 299 | mode=mode) 300 | 301 | def trainModel(miniBatchIndex, momentum): 302 | updateParamsWithMomentum(momentum) 303 | return updateParamsWithGradient(miniBatchIndex, momentum) 304 | 305 | else: 306 | updates = self._buildUpdatesSimpleMomentum(error, training_options, momentum) 307 | trainModel = theano.function( 308 | inputs=[miniBatchIndex, momentum], 309 | outputs=error, 310 | updates=updates, 311 | mode=mode, 312 | givens={ 313 | x: data[miniBatchIndex * training_options.miniBatchSize:(miniBatchIndex + 1) * training_options.miniBatchSize], 314 | y: labels[miniBatchIndex * training_options.miniBatchSize:(miniBatchIndex + 1) * training_options.miniBatchSize]}) 315 | 316 | # returns the function that trains the model 317 | return trainModel 318 | 319 | def _buildUpdatesNesterov(self, error, training_options, momentum): 320 | if training_options.momentumFactorForLearningRate: 321 | lrFactor = np.float32(1.0) - momentum 322 | else: 323 | lrFactor = np.float32(1.0) 324 | 325 | preDeltaUpdates = [] 326 | for param, oldUpdate in zip(self.params, self.oldUpdates): 327 | preDeltaUpdates.append((param, param + momentum * oldUpdate)) 328 | 329 | # specify how to update the parameters of the model as a list of 330 | # (variable, update expression) pairs 331 | deltaParams = T.grad(error, self.params) 332 | updates = [] 333 | parametersTuples = zip(self.params, 334 | deltaParams, 335 | self.oldUpdates, 336 | self.oldMeanSquares) 337 | 338 | for param, delta, oldUpdate, oldMeanSquare in parametersTuples: 339 | if training_options.rmsprop: 340 | meanSquare = 0.9 * oldMeanSquare + 0.1 * delta ** 2 341 | paramUpdate = - lrFactor * training_options.batchLearningRate * delta / T.sqrt(meanSquare + 1e-8) 342 | updates.append((oldMeanSquare, meanSquare)) 343 | else: 344 | paramUpdate = - lrFactor * training_options.batchLearningRate * delta 345 | 346 | newParam = param + paramUpdate 347 | 348 | updates.append((param, newParam)) 349 | updates.append((oldUpdate, momentum * oldUpdate + paramUpdate)) 350 | 351 | return preDeltaUpdates, updates 352 | 353 | def _buildUpdatesSimpleMomentum(self, error, training_options, momentum): 354 | if training_options.momentumFactorForLearningRate: 355 | lrFactor = np.float32(1.0) - momentum 356 | else: 357 | lrFactor = np.float32(1.0) 358 | 359 | deltaParams = T.grad(error, self.params) 360 | updates = [] 361 | parametersTuples = zip(self.params, 362 | deltaParams, 363 | self.oldUpdates, 364 | self.oldMeanSquares) 365 | 366 | for param, delta, oldUpdate, oldMeanSquare in parametersTuples: 367 | paramUpdate = momentum * oldUpdate 368 | if training_options.rmsprop: 369 | meanSquare = 0.9 * oldMeanSquare + 0.1 * delta ** 2 370 | paramUpdate += - lrFactor * training_options.batchLearningRate * delta / T.sqrt(meanSquare + 1e-8) 371 | updates.append((oldMeanSquare, meanSquare)) 372 | else: 373 | paramUpdate += - lrFactor * training_options.batchLearningRate * delta 374 | 375 | newParam = param + paramUpdate 376 | 377 | updates.append((param, newParam)) 378 | updates.append((oldUpdate, paramUpdate)) 379 | 380 | return updates 381 | -------------------------------------------------------------------------------- /code/lib/cnnLayers.py: -------------------------------------------------------------------------------- 1 | """ Types of conv net layers that pydeeplearn supports now.""" 2 | 3 | __author__ = "Mihaela Rosca" 4 | __contact__ = "mihaela.c.rosca@gmail.com" 5 | 6 | import numpy as np 7 | import numpy.random as random 8 | 9 | import theano 10 | from theano import tensor as T 11 | from theano.tensor.nnet import conv 12 | from theano.tensor.signal import downsample 13 | 14 | from activationfunctions import * 15 | from common import * 16 | 17 | theanoFloat = theano.config.floatX 18 | 19 | class ConvolutionalLayer(object): 20 | """ 21 | The input has to be a 4D tensor: 22 | 1) the size of a mini-batch (we do a forward pass for multiple images at a time) 23 | 2) the number of input channels (or number of kernels for the previous layer) 24 | 3) height 25 | 4) width 26 | 27 | The weights are also a 4D tensor: 28 | 1) Nr filters at next layer (chosen hyperparameter) 29 | 2) Nr filters at previous layer (note that if that is the input layer the number 30 | of filters is given by the number of input channels) 31 | 3) Height at next layer (given by the size of the filters) 32 | 4) Width at the next layer 33 | 34 | InitialWeights should be created randomly or with RBM. 35 | Note that for now we assume that we construct all possible receptive fields for convolutions. 36 | """ 37 | def __init__(self, nrKernels, kernelSize, activationFun): 38 | self.activationFun = activationFun 39 | self.kernelSize = kernelSize 40 | self.nrKernels = nrKernels 41 | 42 | 43 | # Can you not do the same thing as with the oldparam? eval or not? 44 | def _setUp(self, input, inputDimensions): 45 | self.inputDimensions = inputDimensions 46 | nrKernelsPrevious = inputDimensions[0] 47 | 48 | initialWeights = random.normal(loc=0.0, scale=0.1, 49 | size=(self.nrKernels, nrKernelsPrevious, self.kernelSize[0], self.kernelSize[1])) 50 | initialBiases = np.zeros(self.nrKernels) 51 | 52 | W = theano.shared(value=np.asarray(initialWeights, 53 | dtype=theanoFloat), 54 | name='Wconv') 55 | b = theano.shared(value=np.asarray(initialBiases, 56 | dtype=theanoFloat), 57 | name='bconv') 58 | 59 | self.output = self.activationFun.deterministic(conv.conv2d(input, W) + b.dimshuffle('x', 0, 'x', 'x')) 60 | self.weights = [W] 61 | self.params = [W, b] 62 | 63 | def _outputDimensions(self): 64 | a = self.inputDimensions[1] 65 | b = self.inputDimensions[2] 66 | return (self.nrKernels, a - self.kernelSize[0] + 1, b - self.kernelSize[1] + 1) 67 | 68 | 69 | # Possible types for pooling 70 | # l2 pooling 71 | # max pooling 72 | # average pooling 73 | # average pooling weighted by the distance to the center 74 | class PoolingLayer(object): 75 | 76 | # TODO: implement average pooling 77 | # TODO: support different pooling and subsampling factors 78 | """ 79 | Input is again a 4D tensor just like in ConvolutionalLayer 80 | 81 | Note that if you combine the pooling and the convolutional operation then you 82 | can save a bit of time by not applying the activation function before the subsampling. 83 | You can still try and do that as an optimization even if you have 2 layers separated. 84 | 85 | poolingFactor needs to be a 2D tuple (eg: (2, 2)) 86 | """ 87 | 88 | def __init__(self, poolingFactor): 89 | self.poolingFactor = poolingFactor 90 | 91 | def _setUp(self, input, inputDimensions): 92 | # The pooling operation does not change the number of kernels 93 | self.inputDimensions = inputDimensions 94 | # downsample.max_pool_2d only downsamples on the last 2 dimensions of the input tensor 95 | self.output = downsample.max_pool_2d(input, self.poolingFactor, ignore_border=False) 96 | # each layer has to have a parameter field so that it is easier to concatenate all the parameters 97 | # when performing gradient descent 98 | self.params = [] 99 | self.weights = [] 100 | 101 | 102 | def _outputDimensions(self): 103 | a = self.inputDimensions[1] 104 | b = self.inputDimensions[2] 105 | return (self.inputDimensions[0], a / self.poolingFactor[0], b / self.poolingFactor[1]) 106 | 107 | 108 | class SoftmaxLayer(object): 109 | 110 | def __init__(self, size): 111 | self.size = size 112 | 113 | """ 114 | input: 2D matrix 115 | """ 116 | def _setUp(self, input, inputDimensions): 117 | # can I get the size of the input even though it is a tensor var? should 118 | initialWeights = random.normal(loc=0.0, scale=0.1, size=(inputDimensions, self.size)) 119 | initialBiases = np.zeros(self.size) 120 | 121 | W = theano.shared(value=np.asarray(initialWeights, dtype=theanoFloat), 122 | name='Wsoftmax') 123 | b = theano.shared(value=np.asarray(initialBiases, dtype=theanoFloat), 124 | name='bsoftmax') 125 | 126 | softmax = Softmax() 127 | linearSum = T.dot(input, W) + b 128 | currentLayerValues = softmax.deterministic(linearSum) 129 | 130 | self.output = currentLayerValues 131 | self.weights = [W] 132 | self.params = [W, b] 133 | -------------------------------------------------------------------------------- /code/lib/common.py: -------------------------------------------------------------------------------- 1 | """Common functionalities required by the other modules. """ 2 | 3 | __author__ = "Mihaela Rosca" 4 | __contact__ = "mihaela.c.rosca@gmail.com" 5 | 6 | from sklearn import preprocessing 7 | from theano import tensor as T 8 | 9 | import itertools 10 | import matplotlib 11 | import numpy as np 12 | import utils 13 | import warnings 14 | 15 | import matplotlib 16 | import os 17 | havedisplay = "DISPLAY" in os.environ 18 | if not havedisplay: 19 | exitval = os.system('python -c "import matplotlib.pyplot as plt; plt.figure()"') 20 | havedisplay = (exitval == 0) 21 | if havedisplay: 22 | import matplotlib.pyplot as plt 23 | else: 24 | matplotlib.use('Agg') 25 | import matplotlib.pyplot as plt 26 | 27 | def getClassificationError(predicted, actual): 28 | return 1.0 - (predicted == actual).sum() * 1.0 / len(actual) 29 | 30 | def minDiff(vec): 31 | vec = np.sort(vec) 32 | rolled = np.roll(vec, -1) 33 | diff = rolled - vec 34 | return np.min(diff[0:-1]) 35 | 36 | def concatenateLists(lists): 37 | return list(itertools.chain.from_iterable(lists)) 38 | 39 | def scale(data): 40 | # return preprocessing.scale(data, axis=1) 41 | data = data / data.std(axis=1)[:, np.newaxis] 42 | data = data - data.mean(axis=1)[:, np.newaxis] 43 | 44 | # print data.std(axis=1).sum() 45 | # print np.ones((data.shape[0]), dtype='float') 46 | # assert np.array_equal(data.std(axis=1), np.ones((data.shape[0]), dtype='float')) 47 | # assert np.array_equal(data.mean(axis=1), np.zeros(data.shape[0])) 48 | return data 49 | 50 | def visualizeWeights(weights, imgShape, tileShape): 51 | return utils.tile_raster_images(weights, imgShape, 52 | tileShape, tile_spacing=(1, 1)) 53 | """ 54 | Arguments: 55 | vec: A numpy 1-D vector. 56 | size: A 2D tuple 57 | 58 | Returns: 59 | A 2-D vector of dimension 'size', only if 'vec' has compatible dimensions. 60 | Otherwise it throws an error. 61 | """ 62 | def vectorToImage(vec, size): 63 | return vec.reshape(size) 64 | 65 | """ Transforms the 2D images into 1D vectors 66 | Arguments: 67 | images: is a python list of numpy arrays 68 | Returns: 69 | A python list of 1-D numpy arrays, transformed from the input 2D ones 70 | No data is lost in the transformation. 71 | """ 72 | def imagesToVectors(images): 73 | return np.array(map(lambda x: x.reshape(-1), images)) 74 | 75 | def sample(p, size): 76 | return np.random.uniform(size=size) <= p 77 | 78 | # this can be done with a binomial 79 | def sampleAll(probs): 80 | return np.random.uniform(size=probs.shape) <= probs 81 | 82 | def enum(**enums): 83 | return type('Enum', (), enums) 84 | 85 | def rmse(prediction, actual): 86 | return np.linalg.norm(prediction - actual) / np.sqrt(len(prediction)) 87 | 88 | def safeLogFraction(p): 89 | assert p >=0 and p <= 1 90 | # TODO: think about this a bit better 91 | # you should not set them to be equal, on the contrary, 92 | # they should be opposites 93 | if p * (1 - p) == 0: 94 | return 0 95 | return np.log(p / (1 -p)) 96 | 97 | 98 | def labelsToVectors(labels, size): 99 | result = np.zeros((len(labels), size), dtype=float) 100 | for index, label in enumerate(labels): 101 | result[index, label] = 1.0 102 | 103 | return result 104 | 105 | def zerosFromShape(l): 106 | return map(lambda x: np.zeros(x.shape), l) 107 | 108 | def shuffle(*args): 109 | shuffled = shuffleList(*args) 110 | f = lambda x: np.array(x) 111 | return tuple(map(f, shuffled)) 112 | 113 | 114 | # Returns lists 115 | def shuffleList(*args): 116 | lenght = len(args[0]) 117 | 118 | # Assert they all have the same size 119 | assert np.array_equal(np.array(map(len, args)), np.ones(len(args)) * lenght) 120 | 121 | indexShuffle = np.random.permutation(lenght) 122 | 123 | f = lambda x: [x[i] for i in indexShuffle] 124 | return tuple(map(f, args)) 125 | 126 | def shuffle3(data1, data2, labels): 127 | indexShuffle = np.random.permutation(len(data1)) 128 | shuffledData1 = np.array([data1[i] for i in indexShuffle]) 129 | shuffledData2 = np.array([data2[i] for i in indexShuffle]) 130 | shuffledLabels = np.array([labels[i] for i in indexShuffle]) 131 | 132 | return shuffledData1, shuffledData2, shuffledLabels 133 | 134 | # Cost required for the sparsity in RBMs 135 | def squaredDiff(first, second): 136 | return T.sqr(first - second) 137 | 138 | # Makes a parameter grid required for cross validation 139 | # the input should be a list of tuples of size 3: min, max and number of steps 140 | # for each parameter 141 | # EG: makeParamsGrid([(1, 3, 2), (4,5,2)]) 142 | def makeParamsGrid(paramBorders): 143 | f = lambda x: np.linspace(*x) 144 | linspaces = map(f, paramBorders) 145 | 146 | return list(itertools.product(*tuple(linspaces))) 147 | 148 | # Makes a parameter grid required for cross validation 149 | # the input should be a list of tuples of size 3: min, max and number of steps 150 | # for each parameter 151 | # EG: makeParamsGrid([(1, 3, 2), (4,5,2)]) 152 | def makeParamsGrid(paramBorders): 153 | f = lambda x: np.linspace(*x) 154 | linspaces = map(f, paramBorders) 155 | 156 | return list(itertools.product(*tuple(linspaces))) 157 | 158 | 159 | def getMomentumForEpochLinearIncrease(momentumMax, epoch, step=0.01): 160 | return np.float32(min(np.float32(0.5) + epoch * np.float32(step), 161 | np.float32(momentumMax))) 162 | 163 | # This is called once per epoch so doing the 164 | # conversion again and again is not a problem 165 | # I do not like this hardcoding business for the GPU: TODO 166 | def getMomentumForEpochSimple(momentumMax, epoch): 167 | if epoch < 10: 168 | return np.float32(0.5) 169 | else: 170 | return np.float32(momentumMax) 171 | 172 | 173 | def plotTrainingAndValidationErros(trainingErrors, validationErrors): 174 | # if run remotely without a display 175 | try: 176 | plt.plot(trainingErrors, label="Training error") 177 | plt.plot(validationErrors, label="Validation error") 178 | plt.xlabel('Epoch') 179 | plt.ylabel('Cross entropy average error') 180 | plt.title('Training and validation error during DBN training') 181 | plt.legend() 182 | plt.show() 183 | except Exception as e: 184 | print "validation error plot not made" 185 | print "error ", e 186 | 187 | # If we had an error we are either not sshed with -X 188 | # or we are in a detached screen session. 189 | # so turn the io off and save the pic 190 | plt.ioff() 191 | plt.plot(trainingErrors, label="Training error") 192 | plt.plot(validationErrors, label="Validation error") 193 | plt.xlabel('Epoch') 194 | plt.ylabel('Cross entropy average error') 195 | plt.title('Training and validation error during DBN training') 196 | plt.legend() 197 | plt.savefig("validationandtrainingerror.png" , transparent=True) 198 | 199 | print "printing validation errors and training errors instead" 200 | print "validationErrors" 201 | print validationErrors 202 | print "trainingErrors" 203 | print trainingErrors 204 | 205 | def plotTraningError(trainingErrors): 206 | try: 207 | plt.plot(trainingErrors, label="Training error") 208 | plt.xlabel('Epoch') 209 | plt.ylabel('Cross entropy average error') 210 | plt.title('Training error during DBN training') 211 | plt.legend() 212 | plt.show() 213 | except Exception as e: 214 | print "plot not made" 215 | print "error ", e 216 | 217 | plt.ioff() 218 | plt.plot(trainingErrors, label="Training error") 219 | plt.xlabel('Epoch') 220 | plt.ylabel('Cross entropy average error') 221 | plt.title('Training error during DBN training') 222 | plt.legend() 223 | plt.savefig("trainingerror.png" , transparent=True) 224 | 225 | print "printing training errors " 226 | print "trainingErrors" 227 | print trainingErrors 228 | 229 | 230 | def plot3Errors(trainingErrors, trainWithDropout, validationErrors): 231 | # if run remotely without a display 232 | plt.plot(trainWithDropout, label="Training error on dropped out set.") 233 | plt.plot(trainingErrors, label="Training error") 234 | plt.plot(validationErrors, label="Validation error") 235 | plt.xlabel('Epoch') 236 | plt.ylabel('Cross entropy average error') 237 | plt.title('Training and validation error during DBN training') 238 | plt.legend() 239 | plt.show() 240 | -------------------------------------------------------------------------------- /code/lib/convNet.py: -------------------------------------------------------------------------------- 1 | """Implementation of a convolutional neural network. """ 2 | 3 | __author__ = "Mihaela Rosca" 4 | __contact__ = "mihaela.c.rosca@gmail.com" 5 | 6 | import numpy as np 7 | 8 | import theano 9 | from theano import tensor as T 10 | 11 | from batchtrainer import * 12 | from trainingoptions import * 13 | from common import * 14 | 15 | theanoFloat = theano.config.floatX 16 | 17 | # TODO: implicit zero padding for input 18 | # See Goodfellow book for advantages of that 19 | class CNNBatchTrainer(BatchTrainer): 20 | 21 | def __init__(self, layers, training_options): 22 | self.output = layers[-1].output 23 | # Create the params of the trainer which will be used for gradient descent 24 | params = concatenateLists([l.params for l in layers]) 25 | weights = concatenateLists([l.weights for l in layers]) 26 | 27 | super(CNNBatchTrainer, self).__init__(params, weights, training_options) 28 | 29 | def cost(self, y): 30 | return T.nnet.categorical_crossentropy(self.output, y) 31 | 32 | """ 33 | Convolutional neural network class. 34 | 35 | Supports only convolutional and pooling layers. 36 | For training, supports all training options provided by the TrainingOptions class, and 37 | supports rmsprop, momentum, nesterov momentum via the BatchTrainer abstract class. 38 | 39 | TODO(mihaela): support fully connected layers: refactor the deepbelief net code to also 40 | use the fully connected layers. 41 | """ 42 | class ConvolutionalNN(object): 43 | def __init__(self, layers, training_options, 44 | momentum_for_epoch_function=getMomentumForEpochLinearIncrease, 45 | nameDataset=''): 46 | self.layers = layers 47 | self.momentum_for_epoch_function = momentum_for_epoch_function 48 | self.training_options = training_options 49 | self.nameDataset = nameDataset 50 | 51 | def _setUpLayers(self, x, inputDimensions): 52 | 53 | inputVar = x 54 | inputDimensionsPrevious = inputDimensions 55 | 56 | for layer in self.layers[0:-1]: 57 | layer._setUp(inputVar, inputDimensionsPrevious) 58 | inputDimensionsPrevious = layer._outputDimensions() 59 | inputVar = layer.output 60 | 61 | # the fully connected layer, the softmax layer 62 | # TODO: if you allow (and you should) multiple all to all layers you need to change this 63 | # after some point 64 | self.layers[-1]._setUp(inputVar.flatten(2), 65 | inputDimensionsPrevious[0] * inputDimensionsPrevious[1] * inputDimensionsPrevious[2]) 66 | 67 | 68 | def _reshapeInputData(self, data): 69 | if len(data[0].shape) == 2: 70 | inputShape = (data.shape[0], 1, data[0].shape[0], data[0].shape[1]) 71 | data = data.reshape(inputShape) 72 | 73 | return data 74 | 75 | def train(self, data, labels, epochs=100): 76 | print "shuffling training data" 77 | data, labels = shuffle(data, labels) 78 | 79 | print "data.shape" 80 | print data.shape 81 | 82 | print "labels.shape" 83 | print labels.shape 84 | 85 | data = self._reshapeInputData(data) 86 | 87 | sharedData = theano.shared(np.asarray(data, dtype=theanoFloat)) 88 | sharedLabels = theano.shared(np.asarray(labels, dtype=theanoFloat)) 89 | 90 | miniBatchSize = self.training_options.miniBatchSize 91 | nrMinibatches = len(data) / miniBatchSize 92 | 93 | # Symbolic variable for the data matrix 94 | x = T.tensor4('x', dtype=theanoFloat) 95 | # the labels 96 | y = T.matrix('y', dtype=theanoFloat) 97 | 98 | # Set up the input variable as a field of the conv net 99 | # so that we can access it easily for testing 100 | self.x = x 101 | 102 | # Set up the layers with the appropriate theano structures 103 | self._setUpLayers(x, data[0].shape) 104 | 105 | # create the batch trainer and using it create the updates 106 | batchTrainer = CNNBatchTrainer(self.layers, self.training_options) 107 | 108 | # Set the batch trainer as a field in the conv net 109 | # then we can access it for a forward pass during testing 110 | self.batchTrainer = batchTrainer 111 | batchTrainer.trainFixedEpochs(x, y, sharedData, sharedLabels, epochs) 112 | 113 | def test(self, data): 114 | miniBatchIndex = T.lscalar() 115 | 116 | miniBatchSize = self.training_options.miniBatchSize 117 | 118 | data = self._reshapeInputData(data) 119 | sharedData = theano.shared(np.asarray(data, dtype=theanoFloat)) 120 | 121 | # Do a forward pass trough the network 122 | forwardPass = theano.function( 123 | inputs=[miniBatchIndex], 124 | outputs=self.batchTrainer.output, 125 | givens={ 126 | self.x: sharedData[miniBatchIndex * miniBatchSize: (miniBatchIndex + 1) * miniBatchSize]}) 127 | 128 | nrMinibatches = data.shape[0] / miniBatchSize 129 | 130 | # do the loop that actually predicts the data 131 | lastLayer = concatenateLists([forwardPass(i) for i in xrange(nrMinibatches)]) 132 | lastLayer = np.array(lastLayer) 133 | 134 | return lastLayer, np.argmax(lastLayer, axis=1) 135 | -------------------------------------------------------------------------------- /code/lib/debug.py: -------------------------------------------------------------------------------- 1 | """ Common debug utilities gathered here to not clutter code.""" 2 | 3 | import numpy as np 4 | import theano 5 | 6 | def detect_nan(i, node, fn): 7 | for output in fn.outputs: 8 | if np.isnan(output[0]).any(): 9 | print '*** NaN detected ***' 10 | theano.printing.debugprint(node) 11 | print 'Inputs : %s' % [input[0] for input in fn.inputs] 12 | print 'Outputs: %s' % [output[0] for output in fn.outputs] 13 | break 14 | 15 | def inspect_inputs(i, node, fn): 16 | print i, node, "input(s) value(s):", [input[0] for input in fn.inputs], 17 | 18 | def inspect_outputs(i, node, fn): 19 | print "output(s) value(s):", [output[0] for output in fn.outputs] 20 | -------------------------------------------------------------------------------- /code/lib/trainingoptions.py: -------------------------------------------------------------------------------- 1 | """ Defines a training options class as a holder for options that can be passed 2 | for training a neural network. 3 | """ 4 | 5 | __author__ = "Mihaela Rosca" 6 | __contact__ = "mihaela.c.rosca@gmail.com" 7 | 8 | import numpy as np 9 | # TODO: move from common here 10 | import common 11 | 12 | class TrainingOptions(object): 13 | 14 | def __init__(self, miniBatchSize, 15 | learningRate, 16 | momentumMax=0.0, 17 | rmsprop=False, 18 | weightDecayL1=0.0, 19 | weightDecayL2=0.0, 20 | nesterovMomentum=False, 21 | save_best_weights=False, 22 | momentumForEpochFunction=common.getMomentumForEpochLinearIncrease, 23 | momentumFactorForLearningRate=False): 24 | self.miniBatchSize = miniBatchSize 25 | self.learningRate = learningRate 26 | self.momentumMax = np.float32(momentumMax) 27 | self.rmsprop = rmsprop 28 | self.weightDecayL1 = weightDecayL1 29 | self.weightDecayL2 = weightDecayL2 30 | self.nesterov = nesterovMomentum 31 | self.momentumFactorForLearningRate = momentumFactorForLearningRate 32 | self.momentumForEpochFunction = momentumForEpochFunction 33 | self.batchLearningRate = np.float32(learningRate / miniBatchSize) 34 | self.save_best_weights = save_best_weights 35 | -------------------------------------------------------------------------------- /code/lib/utils.py: -------------------------------------------------------------------------------- 1 | """ This file contains different utility functions that are not connected 2 | in anyway to the networks presented in the tutorials, but rather help in 3 | processing the outputs into a more understandable way. 4 | 5 | For example ``tile_raster_images`` helps in generating a easy to grasp 6 | image from a set of samples or weights. 7 | 8 | 9 | CODE OBTAINED FROM: http://deeplearning.net/tutorial/code/utils.py 10 | """ 11 | import numpy 12 | 13 | def scale_to_unit_interval(ndar, eps=1e-8): 14 | """ Scales all values in the ndarray ndar to be between 0 and 1 """ 15 | ndar = ndar.copy() 16 | ndar -= ndar.min() 17 | ndar *= 1.0 / (ndar.max() + eps) 18 | return ndar 19 | 20 | 21 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0), 22 | scale_rows_to_unit_interval=True, 23 | output_pixel_vals=True): 24 | """ 25 | Transform an array with one flattened image per row, into an array in 26 | which images are reshaped and layed out like tiles on a floor. 27 | 28 | This function is useful for visualizing datasets whose rows are images, 29 | and also columns of matrices for transforming those rows 30 | (such as the first layer of a neural net). 31 | 32 | :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can 33 | be 2-D ndarrays or None; 34 | :param X: a 2-D array in which every row is a flattened image. 35 | 36 | :type img_shape: tuple; (height, width) 37 | :param img_shape: the original shape of each image 38 | 39 | :type tile_shape: tuple; (rows, cols) 40 | :param tile_shape: the number of images to tile (rows, cols) 41 | 42 | :param output_pixel_vals: if output should be pixel values (i.e. int8 43 | values) or floats 44 | 45 | :param scale_rows_to_unit_interval: if the values need to be scaled before 46 | being plotted to [0,1] or not 47 | 48 | 49 | :returns: array suitable for viewing as an image. 50 | (See:`PIL.Image.fromarray`.) 51 | :rtype: a 2-d array with same dtype as X. 52 | 53 | """ 54 | 55 | assert len(img_shape) == 2 56 | assert len(tile_shape) == 2 57 | assert len(tile_spacing) == 2 58 | 59 | # The expression below can be re-written in a more C style as 60 | # follows : 61 | # 62 | # out_shape = [0,0] 63 | # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] - 64 | # tile_spacing[0] 65 | # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] - 66 | # tile_spacing[1] 67 | out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp 68 | in zip(img_shape, tile_shape, tile_spacing)] 69 | 70 | if isinstance(X, tuple): 71 | assert len(X) == 4 72 | # Create an output numpy ndarray to store the image 73 | if output_pixel_vals: 74 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), 75 | dtype='uint8') 76 | else: 77 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), 78 | dtype=X.dtype) 79 | 80 | #colors default to 0, alpha defaults to 1 (opaque) 81 | if output_pixel_vals: 82 | channel_defaults = [0, 0, 0, 255] 83 | else: 84 | channel_defaults = [0., 0., 0., 1.] 85 | 86 | for i in xrange(4): 87 | if X[i] is None: 88 | # if channel is None, fill it with zeros of the correct 89 | # dtype 90 | dt = out_array.dtype 91 | if output_pixel_vals: 92 | dt = 'uint8' 93 | out_array[:, :, i] = numpy.zeros(out_shape, 94 | dtype=dt) + channel_defaults[i] 95 | else: 96 | # use a recurrent call to compute the channel and store it 97 | # in the output 98 | out_array[:, :, i] = tile_raster_images( 99 | X[i], img_shape, tile_shape, tile_spacing, 100 | scale_rows_to_unit_interval, output_pixel_vals) 101 | return out_array 102 | 103 | else: 104 | # if we are dealing with only one channel 105 | H, W = img_shape 106 | Hs, Ws = tile_spacing 107 | 108 | # generate a matrix to store the output 109 | dt = X.dtype 110 | if output_pixel_vals: 111 | dt = 'uint8' 112 | out_array = numpy.zeros(out_shape, dtype=dt) 113 | 114 | for tile_row in xrange(tile_shape[0]): 115 | for tile_col in xrange(tile_shape[1]): 116 | if tile_row * tile_shape[1] + tile_col < X.shape[0]: 117 | this_x = X[tile_row * tile_shape[1] + tile_col] 118 | if scale_rows_to_unit_interval: 119 | # if we should scale values to be between 0 and 1 120 | # do this by calling the `scale_to_unit_interval` 121 | # function 122 | this_img = scale_to_unit_interval( 123 | this_x.reshape(img_shape)) 124 | else: 125 | this_img = this_x.reshape(img_shape) 126 | # add the slice to the corresponding position in the 127 | # output array 128 | c = 1 129 | if output_pixel_vals: 130 | c = 255 131 | out_array[ 132 | tile_row * (H + Hs): tile_row * (H + Hs) + H, 133 | tile_col * (W + Ws): tile_col * (W + Ws) + W 134 | ] = this_img * c 135 | return out_array 136 | -------------------------------------------------------------------------------- /code/maxoutMain.py: -------------------------------------------------------------------------------- 1 | """Maxout examples. """ 2 | 3 | __author__ = "Mihaela Rosca" 4 | __contact__ = "mihaela.c.rosca@gmail.com" 5 | 6 | from __future__ import division 7 | import os 8 | 9 | import numpy as np 10 | import argparse 11 | 12 | from pylearn2.train import Train 13 | from pylearn2.datasets.mnist import MNIST 14 | from pylearn2.models import mlp, maxout 15 | from pylearn2.training_algorithms import sgd 16 | from pylearn2.termination_criteria import MonitorBased 17 | from pylearn2.train_extensions import best_params 18 | from pylearn2.training_algorithms.learning_rule import Momentum 19 | from pylearn2.training_algorithms.learning_rule import MomentumAdjustor 20 | from pylearn2.utils import serial 21 | from pylearn2.costs.mlp.dropout import Dropout 22 | from theano import function 23 | from theano import config 24 | from theano import tensor as T 25 | from sklearn import cross_validation 26 | from sklearn.metrics import confusion_matrix 27 | 28 | from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix 29 | 30 | from read.readfacedatabases import * 31 | from lib.common import * 32 | 33 | theanoFloat = config.floatX 34 | 35 | 36 | parser = argparse.ArgumentParser(description='digit recognition') 37 | parser.add_argument('--mnist', dest='mnist',action='store_true', default=False, 38 | help=("if true, trains the net on MNIST data")) 39 | parser.add_argument('--pie',dest='pie',action='store_true', default=False, 40 | help="if true, trains the net on MultiPIE data") 41 | parser.add_argument('--cv',dest='cv',action='store_true', default=False, 42 | help="if true, does CV on pie data") 43 | parser.add_argument('--train',dest='train',action='store_true', default=False, 44 | help=("if true, the network is trained from scratch from the" 45 | "training data")) 46 | args = parser.parse_args() 47 | 48 | class MultiPIE(DenseDesignMatrix): 49 | 50 | def __init__(self, name, indices=None): 51 | self.name = name 52 | 53 | x, y = readMultiPIE(vectorizeLabels=True) 54 | x = x[indices] 55 | y = y[indices] 56 | 57 | self.label_names = ['Neutral','Surprise','Squint','Smile','Disgust','Scream'] 58 | self.n_classes = len(self.label_names) 59 | 60 | self.label_map = {k: v for k, v in zip(self.label_names, range(self.n_classes))} 61 | self.label_unmap = {v: k for k, v in zip(self.label_names, range(self.n_classes))} 62 | 63 | axes=('c', 0, 1, 'b') 64 | 65 | super(MultiPIE, self).__init__(y=y, X=x, axes=axes) 66 | 67 | def convert(self, x): 68 | return self.label_map[x] 69 | 70 | def unconvert(self, x): 71 | return self.label_unmap[x] 72 | 73 | 74 | def MNISTmain(): 75 | # TODO: max_col_norm 76 | h0 = maxout.Maxout(layer_name='h0', num_units=1200, num_pieces=2, W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0) 77 | h1 = maxout.Maxout(layer_name='h1', num_units=1200, num_pieces=2, W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0) 78 | # h2 = maxout.Maxout(layer_name='h2, num_units=1200, num_pieces=2, W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0) 79 | outlayer = mlp.Softmax(layer_name='y', n_classes=10, irange=0) 80 | 81 | # layers = [h0, h1, h2, outlayer] 82 | layers = [h0, h1, outlayer] 83 | 84 | model = mlp.MLP(layers, nvis=784) 85 | train = MNIST('train', one_hot=1, start=0, stop=50000) 86 | valid = MNIST('train', one_hot=1, start=50000, stop=60000) 87 | test = MNIST('test', one_hot=1, start=0, stop=10000) 88 | 89 | monitoring = dict(valid=valid) 90 | termination = MonitorBased(channel_name="valid_y_misclass", N=100) 91 | extensions = [best_params.MonitorBasedSaveBest(channel_name="valid_y_misclass", 92 | save_path="/data/mcr10/train_best.pkl")] 93 | 94 | algorithm = sgd.SGD(0.1, batch_size=100, cost=Dropout(), learning_rule=Momentum(0.9), 95 | monitoring_dataset = monitoring, termination_criterion = termination) 96 | 97 | save_path = "/data/mcr10/train_best.pkl" 98 | 99 | if not args.train and os.path.exists(save_path): 100 | model = serial.load(save_path) 101 | else: 102 | print 'Running training' 103 | train_job = Train(train, model, algorithm, extensions=extensions, save_path="/data/mcr10/train.pkl", save_freq=10) 104 | train_job.main_loop() 105 | 106 | X = model.get_input_space().make_batch_theano() 107 | Y = model.fprop(X) 108 | 109 | y = T.argmax(Y, axis=1) 110 | f = function(inputs=[X], outputs=y) 111 | yhat = f(test.X) 112 | 113 | y = np.squeeze(test.get_targets()) 114 | 115 | print 'accuracy', (y==yhat).sum() / y.size 116 | 117 | def MultiPIEmain(): 118 | # h0 = maxout.Maxout(layer_name='h0', num_units=500, num_pieces=3, W_lr_scale=1.0, max_col_norm = 1.0,irange=0.005, b_lr_scale=1.0) 119 | # h1 = maxout.Maxout(layer_name='h1', num_units=500, num_pieces=3, W_lr_scale=1.0, max_col_norm = 1.0,irange=0.005, b_lr_scale=1.0) 120 | # h2 = maxout.Maxout(layer_name='h2', num_units=500, num_pieces=3, W_lr_scale=1.0, max_col_norm = 1.0,irange=0.005, b_lr_scale=1.0) 121 | h0 = maxout.Maxout(layer_name='h0', num_units=1000, num_pieces=2, W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0) 122 | h1 = maxout.Maxout(layer_name='h1', num_units=1000, num_pieces=2, W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0) 123 | h2 = maxout.Maxout(layer_name='h2', num_units=1000, num_pieces=2, W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0) 124 | 125 | outlayer = mlp.Softmax(layer_name='y', n_classes=6, irange=0) 126 | 127 | layers = [h0, h1, h2, outlayer] 128 | 129 | model = mlp.MLP(layers, nvis=1200) 130 | 131 | trainIndices, validationIndices, testIndices = getMultiPIEindices() 132 | train = MultiPIE('train', indices=trainIndices) 133 | valid = MultiPIE('valid', indices=validationIndices) 134 | test = MultiPIE('test', indices=testIndices) 135 | 136 | monitoring = dict(valid=valid) 137 | termination = MonitorBased(channel_name="valid_y_misclass", N=100) 138 | extensions = [best_params.MonitorBasedSaveBest(channel_name="valid_y_misclass", 139 | save_path="/data/mcr10/train_best.pkl"), 140 | MomentumAdjustor(final_momentum=0.7, start=1, saturate=250)] 141 | 142 | algorithm = sgd.SGD(0.05, batch_size=20, cost=Dropout(), learning_rule=Momentum(0.5), 143 | monitoring_dataset=monitoring, termination_criterion=termination) 144 | 145 | save_path = "/data/mcr10/train_best.pkl" 146 | 147 | if not args.train and os.path.exists(save_path): 148 | model = serial.load(save_path) 149 | else: 150 | print 'Running training' 151 | train_job = Train(train, model, algorithm, extensions=extensions, save_path="/data/mcr10/trainpie.pkl", save_freq=50) 152 | train_job.main_loop() 153 | 154 | X = model.get_input_space().make_batch_theano() 155 | Y = model.fprop(X) 156 | 157 | y = T.argmax(Y, axis=1) 158 | 159 | f = function(inputs=[X], outputs=y, allow_input_downcast=True) 160 | yhat = f(test.X) 161 | 162 | print sum(yhat) 163 | print yhat.shape 164 | 165 | y = np.argmax(np.squeeze(test.get_targets()), axis=1) 166 | 167 | print 'accuracy', (y==yhat).sum() / y.size 168 | 169 | 170 | def MultiPIECV(): 171 | # Learning rate, nr pieces 172 | parms = [(0.1, 2), (0.1, 3), (0.01, 2), (0.01, 3)] 173 | 174 | accuracies = [] 175 | 176 | for i in xrange(len(parms)): 177 | h0 = maxout.Maxout(layer_name='h0', num_units=1500, num_pieces=parms[i][1], W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0) 178 | h1 = maxout.Maxout(layer_name='h1', num_units=1500, num_pieces=parms[i][1], W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0) 179 | h2 = maxout.Maxout(layer_name='h2', num_units=1500, num_pieces=parms[i][1], W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0) 180 | outlayer = mlp.Softmax(layer_name='y', n_classes=6, irange=0) 181 | 182 | layers = [h0, h1, h2, outlayer] 183 | 184 | model = mlp.MLP(layers, nvis=1200) 185 | 186 | trainIndices, validationIndices, testIndices = getMultiPIEindices() 187 | train = MultiPIE('train', indices=trainIndices) 188 | valid = MultiPIE('valid', indices=validationIndices) 189 | test = MultiPIE('test', indices=testIndices) 190 | 191 | monitoring = dict(valid=valid) 192 | termination = MonitorBased(channel_name="valid_y_misclass", N=100) 193 | extensions = [best_params.MonitorBasedSaveBest(channel_name="valid_y_misclass", 194 | save_path="/data/mcr10/train_best.pkl")] 195 | 196 | algorithm = sgd.SGD(parms[i][0], batch_size=100, cost=Dropout(), 197 | monitoring_dataset=monitoring, termination_criterion=termination) 198 | 199 | save_path = "/data/mcr10/train_best.pkl" 200 | 201 | if not args.train and os.path.exists(save_path): 202 | model = serial.load(save_path) 203 | else: 204 | print 'Running training' 205 | train_job = Train(train, model, algorithm, extensions=extensions, save_path="/data/mcr10/trainpie.pkl", save_freq=1) 206 | train_job.main_loop() 207 | 208 | X = model.get_input_space().make_batch_theano() 209 | Y = model.fprop(X) 210 | 211 | y = T.argmax(Y, axis=1) 212 | 213 | f = function(inputs=[X], outputs=y, allow_input_downcast=True) 214 | yhat = f(test.X) 215 | 216 | print sum(yhat) 217 | print yhat.shape 218 | 219 | y = np.argmax(np.squeeze(test.get_targets()), axis=1) 220 | 221 | accuracy = (y==yhat).sum() / y.size 222 | accuracies += [accuracy] 223 | 224 | # TODO: some confusion matrix? 225 | for i in xrange(len(parms)): 226 | print "for parameter" + str(i) 227 | print "the correct rate was " + str(accuracies[i]) 228 | 229 | 230 | 231 | def getMultiPIEindices(): 232 | x, y = readMultiPIE() 233 | x = np.array(x, dtype=theanoFloat) 234 | y = np.array(y, dtype=theanoFloat) 235 | l = len(x) 236 | 237 | kf = cross_validation.KFold(n=l, n_folds=5) 238 | for train, test in kf: 239 | break 240 | 241 | allIndices = np.random.permutation(l) 242 | 243 | testIndices = allIndices[test] 244 | stopValidation = len(train)/10 245 | validationIndices = allIndices[train][0: stopValidation] 246 | trainIndices = allIndices[train][stopValidation: ] 247 | 248 | return trainIndices, validationIndices, testIndices 249 | 250 | if __name__ == '__main__': 251 | if args.mnist: 252 | MNISTmain() 253 | if args.pie: 254 | MultiPIEmain() 255 | if args.cv: 256 | MultiPIECV() -------------------------------------------------------------------------------- /code/old-version/MNISTdigits.py: -------------------------------------------------------------------------------- 1 | """ This module is manily created to test the deep belief and 2 | rbm implementations on MNIST""" 3 | 4 | __author__ = "Mihaela Rosca" 5 | __contact__ = "mihaela.c.rosca@gmail.com" 6 | 7 | import argparse 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | import cPickle as pickle 11 | import readmnist 12 | import restrictedBoltzmannMachine as rbm 13 | import deepbelief as db 14 | import utils 15 | import PCA 16 | import glob 17 | 18 | import DimensionalityReduction 19 | 20 | from common import * 21 | 22 | parser = argparse.ArgumentParser(description='RBM for digit recognition') 23 | parser.add_argument('--save',dest='save',action='store_true', default=False, 24 | help="if true, the network is serialized and saved") 25 | parser.add_argument('--train',dest='train',action='store_true', default=False, 26 | help=("if true, the network is trained from scratch from the" 27 | "traning data")) 28 | parser.add_argument('--pca', dest='pca',action='store_true', default=False, 29 | help=("if true, the code for running PCA on the data is run")) 30 | parser.add_argument('--rbm', dest='rbm',action='store_true', default=False, 31 | help=("if true, the code for traning an rbm on the data is run")) 32 | parser.add_argument('--rbmPCD', dest='rbmPCD',action='store_true', default=False, 33 | help=("if true, the code for traning an rbm on the data is run")) 34 | parser.add_argument('--db', dest='db',action='store_true', default=False, 35 | help=("if true, the code for traning a deepbelief net on the" 36 | "data is run")) 37 | parser.add_argument('--trainSize', type=int, default=10000, 38 | help='the number of tranining cases to be considered') 39 | parser.add_argument('--testSize', type=int, default=1000, 40 | help='the number of testing cases to be considered') 41 | parser.add_argument('netFile', help="file where the serialized network should be saved") 42 | parser.add_argument('--path',dest='path', default="MNIST", help="the path to the MNIST files") 43 | 44 | 45 | 46 | # Get the arguments of the program 47 | args = parser.parse_args() 48 | 49 | def visualizeWeights(weights, imgShape, tileShape): 50 | return utils.tile_raster_images(weights, imgShape, 51 | tileShape, tile_spacing=(1, 1)) 52 | 53 | def rbmMain(reconstructRandom=True): 54 | trainVectors, trainLabels =\ 55 | readmnist.read(0, args.trainSize, digits=None, bTrain=True, path=args.path) 56 | testingVectors, testLabels =\ 57 | readmnist.read(0, args.testSize, digits=None, bTrain=False, path=args.path) 58 | 59 | trainingScaledVectors = trainVectors / 255.0 60 | testingScaledVectors = testingVectors / 255.0 61 | 62 | # Train the network 63 | if args.train: 64 | # The number of hidden units is taken from a deep learning tutorial 65 | # The data are the values of the images have to be normalized before being 66 | # presented to the network 67 | nrVisible = len(trainingScaledVectors[0]) 68 | nrHidden = 500 69 | # use 1 dropout to test the rbm for now 70 | net = rbm.RBM(nrVisible, nrHidden, rbm.contrastiveDivergence, 1, 1) 71 | net.train(trainingScaledVectors) 72 | t = visualizeWeights(net.weights.T, (28,28), (10,10)) 73 | else: 74 | # Take the saved network and use that for reconstructions 75 | f = open(args.netFile, "rb") 76 | t = pickle.load(f) 77 | net = pickle.load(f) 78 | f.close() 79 | 80 | # Reconstruct an image and see that it actually looks like a digit 81 | test = testingScaledVectors[0,:] 82 | 83 | # get a random image and see it looks like 84 | if reconstructRandom: 85 | test = np.random.random_sample(test.shape) 86 | 87 | 88 | # Show the initial image first 89 | plt.imshow(vectorToImage(test, (28,28)), cmap=plt.cm.gray) 90 | plt.show() 91 | 92 | # Show the reconstruction 93 | recon = net.reconstruct(test.reshape(1, test.shape[0])) 94 | plt.imshow(vectorToImage(recon, (28,28)), cmap=plt.cm.gray) 95 | plt.axis('off') 96 | plt.savefig('1.png', transparent=True) 97 | # plt.show() 98 | 99 | # Show the weights and their form in a tile fashion 100 | # Plot the weights 101 | plt.imshow(t, cmap=plt.cm.gray) 102 | plt.axis('off') 103 | plt.savefig('weights.png', transparent=True) 104 | 105 | print "done" 106 | 107 | if args.save: 108 | f = open(args.netFile, "wb") 109 | pickle.dump(t, f) 110 | pickle.dump(net, f) 111 | 112 | 113 | def rbmMainPCD(): 114 | trainVectors, trainLabels =\ 115 | readmnist.read(0, args.trainSize, digits=None, bTrain=True, path=args.path) 116 | testingVectors, testLabels =\ 117 | readmnist.read(0, args.testSize, digits=None,bTrain=False, path=args.path) 118 | 119 | trainingScaledVectors = trainVectors / 255.0 120 | testingScaledVectors = testingVectors / 255.0 121 | 122 | # Train the network 123 | if args.train: 124 | # The number of hidden units is taken from a deep learning tutorial 125 | # The data are the values of the images have to be normalized before being 126 | # presented to the network 127 | nrVisible = len(trainingScaledVectors[0]) 128 | nrHidden = 500 129 | # use 1 dropout to test the rbm for now 130 | # net = rbm.RBM(nrVisible, nrHidden, rbm.contrastiveDivergence, 1, 1) 131 | net = rbm.RBM(nrVisible, nrHidden, rbm.PCD, 1, 1) 132 | net.train(trainingScaledVectors) 133 | t = visualizeWeights(net.weights.T, (28,28), (10,10)) 134 | else: 135 | # Take the saved network and use that for reconstructions 136 | f = open(args.netFile, "rb") 137 | t = pickle.load(f) 138 | net = pickle.load(f) 139 | f.close() 140 | 141 | # Reconstruct a training image and see that it actually looks like a digit 142 | test = testingScaledVectors[0,:] 143 | 144 | plt.imshow(vectorToImage(test, (28,28)), cmap=plt.cm.gray) 145 | plt.show() 146 | 147 | recon = net.reconstruct(test.reshape(1, test.shape[0])) 148 | plt.imshow(vectorToImage(recon, (28,28)), cmap=plt.cm.gray) 149 | plt.show() 150 | 151 | # Show the weights and their form in a tile fashion 152 | plt.imshow(t, cmap=plt.cm.gray) 153 | plt.axis('off') 154 | plt.savefig('weightsPCDall.png', transparent=True) 155 | 156 | print "done" 157 | 158 | if args.save: 159 | f = open(args.netFile, "wb") 160 | pickle.dump(t, f) 161 | pickle.dump(net, f) 162 | 163 | 164 | def shuffle(data, labels): 165 | indexShuffle = np.random.permutation(len(data)) 166 | shuffledData = np.array([data[i] for i in indexShuffle]) 167 | shuffledLabels = np.array([labels[i] for i in indexShuffle]) 168 | 169 | return shuffledData, shuffledLabels 170 | 171 | 172 | def pcaOnMnist(training, dimension=700): 173 | principalComponents = PCA.pca(training, dimension) 174 | low, same = PCA.reduce(principalComponents, training) 175 | 176 | image2DInitial = vectorToImage(training[0], (28,28)) 177 | print same[0].shape 178 | image2D = vectorToImage(same[0], (28,28)) 179 | 180 | plt.imshow(image2DInitial, cmap=plt.cm.gray) 181 | plt.show() 182 | plt.imshow(image2D, cmap=plt.cm.gray) 183 | plt.show() 184 | print "done" 185 | 186 | 187 | def deepbeliefMNIST(): 188 | training = args.trainSize 189 | testing = args.testSize 190 | 191 | trainVectors, trainLabels =\ 192 | readmnist.read(0, training, bTrain=True, path=args.path) 193 | testVectors, testLabels =\ 194 | readmnist.read(0, testing, bTrain=False, path=args.path) 195 | print trainVectors[0].shape 196 | 197 | trainVectors, trainLabels = shuffle(trainVectors, trainLabels) 198 | 199 | trainingScaledVectors = trainVectors / 255.0 200 | testingScaledVectors = testVectors / 255.0 201 | 202 | vectorLabels = labelsToVectors(trainLabels, 10) 203 | 204 | if args.train: 205 | # net = db.DBN(3, [784, 500, 10], [Sigmoid(), Softmax()]) 206 | # net = db.DBN(4, [784, 500, 500, 10], [Sigmoid, Sigmoid, Softmax]) 207 | 208 | net = db.DBN(5, [784, 1000, 1000, 1000, 10], 209 | [Sigmoid, Sigmoid, Sigmoid, Softmax], 210 | dropout=0.5, rbmDropout=0.5, visibleDropout=0.8, 211 | rbmVisibleDropout=1) 212 | # TODO: think about what the network should do for 2 layers 213 | net.train(trainingScaledVectors, vectorLabels) 214 | else: 215 | # Take the saved network and use that for reconstructions 216 | f = open(args.netFile, "rb") 217 | net = pickle.load(f) 218 | f.close() 219 | 220 | 221 | probs, predicted = net.classify(testingScaledVectors) 222 | correct = 0 223 | for i in xrange(testing): 224 | print "predicted" 225 | print "probs" 226 | print probs[i] 227 | print predicted[i] 228 | print "actual" 229 | actual = testLabels[i] 230 | print actual 231 | correct += (predicted[i] == actual) 232 | 233 | print "correct" 234 | print correct 235 | 236 | # for w in net.weights: 237 | # print w 238 | 239 | # for b in net.biases: 240 | # print b 241 | 242 | 243 | # t = visualizeWeights(net.weights[0].T, trainImages[0].(28, 28), (10,10)) 244 | # plt.imshow(t, cmap=plt.cm.gray) 245 | # plt.show() 246 | # print "done" 247 | 248 | if args.save: 249 | f = open(args.netFile, "wb") 250 | pickle.dump(net, f) 251 | f.close() 252 | 253 | """ 254 | Arguments: 255 | big: should the big or small images be used? 256 | folds: which folds should be used (1,..5) (a list). If None is passed all 257 | folds are used 258 | """ 259 | def deepBeliefKanade(big=False, folds=None): 260 | if big: 261 | files = glob.glob('kanade_150*.pickle') 262 | else: 263 | files = glob.glob('kanade_f*.pickle') 264 | 265 | if not folds: 266 | folds = range(1, 6) 267 | 268 | # Read the data from them. Sort out the files that do not have 269 | # the folds that we want 270 | # TODO: do this better (with regex in the file name) 271 | # DO not reply on the order returned 272 | files = files[folds] 273 | 274 | data = [] 275 | labels = [] 276 | for filename in files: 277 | with open(filename, "rb") as f: 278 | # Sort out the labels from the data 279 | dataAndLabels = pickle.load(f) 280 | foldData = dataAndLabels[0:-1 ,:] 281 | foldLabels = dataAndLabels[-1,:] 282 | data.append(foldData) 283 | labels.append(foldLabels) 284 | 285 | # Do LDA 286 | 287 | # Create the network 288 | 289 | # Test 290 | 291 | # You can also group the emotions into positive and negative to see 292 | # if you can get better results (probably yes) 293 | pass 294 | 295 | 296 | # TODO: fix this (look at the ML coursework for it) 297 | # Even better, use LDA 298 | # think of normalizing them to 0.1 for pca as well 299 | def pcaMain(): 300 | training = args.trainSize 301 | testing = args.testSize 302 | 303 | train, trainLabels =\ 304 | readmnist.read(0, training, bTrain=True, path=args.path) 305 | testVectors, testLabels =\ 306 | readmnist.read(0, testing, bTrain=False, path=args.path) 307 | print train[0].shape 308 | 309 | pcaOnMnist(train, dimension=100) 310 | 311 | def main(): 312 | if args.db + args.pca + args.rbm + args.rbmPCD != 1: 313 | raise Exception("You decide on one main method to run") 314 | 315 | if args.db: 316 | deepbeliefMNIST() 317 | if args.pca: 318 | pcaMain() 319 | if args.rbmPCD: 320 | rbmMainPCD() 321 | if args.rbm: 322 | rbmMain() 323 | 324 | 325 | if __name__ == '__main__': 326 | main() 327 | -------------------------------------------------------------------------------- /code/old-version/PCA.py: -------------------------------------------------------------------------------- 1 | __author__ = "Mihaela Rosca" 2 | __contact__ = "mihaela.c.rosca@gmail.com" 3 | 4 | import heapq 5 | import matplotlib.pyplot as plt 6 | import numpy 7 | import os 8 | import scipy 9 | import scipy.linalg 10 | 11 | from os.path import isfile, join 12 | from scipy import misc 13 | 14 | # Import all common functions 15 | from common import * 16 | 17 | 18 | # The directory path to the images 19 | PICTURE_PATH = "/pics/cambrdige_pics/" 20 | 21 | # The current directory where the script is ran 22 | currentDir = os.path.dirname(os.path.abspath(__file__)) 23 | 24 | """ 25 | Converts the data to zero mean data. 26 | """ 27 | def convertDataToZeroMean(data): 28 | means = scipy.mean(data, axis=0) 29 | rows, cols = data.shape 30 | zeroMean = numpy.zeros((rows, cols)) 31 | for i in xrange(rows): 32 | zeroMean[i] = data[i] - means 33 | 34 | assert zeroMean.shape == data.shape 35 | 36 | return zeroMean 37 | 38 | 39 | """ 40 | Uses a heuristic to evaluate how many dimensions should the data be reduced 41 | to. 42 | 43 | Arguments: 44 | eigenValues: 45 | The eigen values of the covariance matrix, or numbers proportional to them. 46 | Should be a numpy 1-D array. 47 | Returns: 48 | The dimension the data should be reduced to. 49 | """ 50 | def dimensionFromEigenIndividualVariance(eigenValues): 51 | threshold = 0.01 52 | dimension = 0 53 | 54 | s = numpy.sum(eigenValues) 55 | print "sum eigen" + str(s) 56 | 57 | for eigen in eigenValues: 58 | r = eigen / s 59 | if r > threshold: 60 | dimension += 1 61 | 62 | return dimension 63 | 64 | # requires the eigen values to be sorted before 65 | def dimensionFromEigenTotalVariance(eigenValues): 66 | threshold = 0.95 67 | dimension = 0 68 | 69 | s = numpy.sum(eigenValues) 70 | print "sum eigen" + str(s) 71 | current = 0 72 | for eigen in eigenValues: 73 | r = (eigen / s) 74 | current += r 75 | if current >= threshold: 76 | break 77 | dimension += 1 78 | 79 | return dimension 80 | 81 | 82 | """ 83 | This method uses the Karhunen Lowe transform to fastly compute the 84 | eigen vaues of the data. 85 | 86 | It is faster than the SVD method below, but can be prone to floating point 87 | errors more than the SVD one. 88 | Arguments: 89 | train: 90 | Numpy array of arrays 91 | dimension: the dimension to which to reduce the size of the data set. 92 | 93 | Returns: 94 | The principal components of the data. 95 | """ 96 | # Returns the principal components of the given training 97 | # data by commputing the principal eigen vectors of the 98 | # covariance matrix of the data 99 | def pca(train, dimension): 100 | # Use the Karhunen Lowe transform to fastly compute 101 | # the principal components. 102 | rows, cols = train.shape 103 | # Step1: Get the mean of each column of the data 104 | # Ie create the average image 105 | u = convertDataToZeroMean(train) 106 | 107 | # Step2: Compute the eigen values of the U * U^T matrix 108 | # the size of U * U^T is rows * rows (ie the number of data points you have 109 | # in your training) 110 | eigVals, eigVecs = scipy.linalg.eig(u.dot(u.T)) 111 | 112 | 113 | # Step3: Compute the eigen values of U^T*U from the eigen values of U * U^T 114 | bigEigVecs = numpy.zeros((rows, cols)) 115 | for i in xrange(rows): 116 | bigEigVecs[i] = u.T.dot(eigVecs[:, i]) 117 | 118 | # Step 4: Normalize the eigen vectors to get orthonormal components 119 | bigEigVecs = map(lambda x: x / scipy.linalg.norm(x), bigEigVecs) 120 | 121 | eigValsBigVecs = zip(eigVals, bigEigVecs) 122 | sortedEigValsBigVecs = sorted(eigValsBigVecs, key=lambda x : x[0], reverse=True) 123 | 124 | index = 0 125 | if dimension == None: 126 | # Get the eigen values 127 | # Note that these are not the eigen values of the covariance matrix 128 | # but the eigen values of U * U ^T 129 | # however, this is fine because they just differ by a factor 130 | # so the ratio between eigen values will be preserved 131 | eigenValues = map(lambda x : x[0], sortedEigValsBigVecs) 132 | dimension = dimensionFromEigenTotalVariance(eigenValues) 133 | print "Using PCA dimension " + str(dimension) 134 | 135 | 136 | result = np.empty(rows, dimension) 137 | for eigVal, vector in sortedEigValsBigVecs: 138 | if index >= dimension: 139 | break 140 | 141 | if eigVal <=0: 142 | print "Warning: Non-positive eigen value" 143 | 144 | result[:, index] = vector 145 | index = index + 1 146 | 147 | return result 148 | 149 | """ 150 | Arguments: 151 | train: 152 | Numpy array of arrays 153 | dimension: the dimension to which to reduce the size of the data set. 154 | 155 | Returns: 156 | The principal components of the data. 157 | 158 | This method should be preferred over the above: it is well known that the 159 | SVD methods are more stable than the ones that require the computation of 160 | the eigen values and eigen vectors. 161 | For more detail see: 162 | http://math.stackexchange.com/questions/3869/what-is-the-intuitive-relationship-between-svd-and-pca 163 | """ 164 | def pcaWithSVD(train, dimension=None): 165 | zeroMean = convertDataToZeroMean(train) 166 | 167 | # SVD guaranteed that the singular values are in non-increasing order 168 | # this means that the u's are already ordered as required, according 169 | # to the magnitute of the eigen values 170 | u, s, vh = scipy.linalg.svd(zeroMean) 171 | 172 | if dimension == None: 173 | # Get the eigen values from the singular values 174 | eigenValues = s ** 2; 175 | dimension = dimensionFromEigenTotalVariance(eigenValues) 176 | print "Using PCA dimension " + str(dimension) 177 | 178 | return vh[0:dimension-1] 179 | 180 | """ 181 | Arguments: 182 | pcaMethod: a method to use for PCA. 183 | images: A python list of images that have to be of the same size. 184 | dimension: the dimension to which to reduce the size of the data set. 185 | Returns: 186 | A tuple: 187 | The first element of the tuple is formed from the eigen faces of given 188 | images. 189 | The second element of the tuple if formed from the vector version of the 190 | eigen faces. This is kept for optimization reasons. 191 | """ 192 | def getEigenFaces(pcaMethod, images, dimension=None): 193 | 194 | imgSize = images[0].shape; 195 | # this call should not be here: the code should assume that the images have 196 | # been transofrmed to vectors before 197 | imgs = imagesToVectors(images) 198 | 199 | vectors = pcaMethod(imgs, dimension) 200 | eigenFaces = map(lambda x: vectorToImage(x, imgSize), vectors) 201 | 202 | return (eigenFaces, vectors) 203 | 204 | 205 | def reduce(principalComponents, vectors): 206 | assert len(principalComponents) > 0 207 | 208 | print principalComponents[0].shape 209 | 210 | principalComponents = np.array(principalComponents) 211 | 212 | lowDimRepresentation = np.dot(vectors, principalComponents.T) 213 | # lowDimRepresentation = map(lambda x : vectors.dot(x), principalComponents) 214 | # sameDimRepresentation = \ 215 | # sum([ x * y for x, y in zip(principalComponents, lowDimRepresentation)]) 216 | # TODO: do this with einsum 217 | sameDimRepresentation = lowDimRepresentation[:, np.newaxis] * principalComponents.T 218 | sameDimRepresentation = sameDimRepresentation.sum(axis=2) 219 | # TODO: create the proper thing here so that you can 220 | # easily see what the ouput is 221 | return (lowDimRepresentation, sameDimRepresentation) 222 | 223 | 224 | """ 225 | Reduces a 2D image represented by a numpy 2D array of integer values(pixels) 226 | to a lower dimension, dictated by the number of principal components. 227 | """ 228 | def reduceImageToLowerDimensions(principalComponents, image2D): 229 | assert len(principalComponents) > 0 230 | 231 | size = principalComponents[0].shape 232 | vector = vectorToImage(image2D, size) 233 | 234 | lowDimRepresentation = map(lambda x : x.T.dot(vector), principalComponents) 235 | sameDimRepresentation = \ 236 | sum([ x * y for x, y in zip(principalComponents, lowDimRepresentation)]) 237 | return (lowDimRepresentation, sameDimRepresentation) 238 | 239 | 240 | def main(): 241 | # Load all the image files in the current directory 242 | picFiles = [] 243 | path = currentDir + PICTURE_PATH 244 | for root, dirs, files in os.walk(path): 245 | if root != path: 246 | picFiles += map(lambda x: os.path.join(root, x), files) 247 | 248 | print len(picFiles) 249 | 250 | imgs = map(lambda x: misc.imread(x, flatten=True), picFiles) 251 | 252 | eigenFaces, principalComponents = getEigenFaces(pca, imgs) 253 | # plt.imshow(eigenFaces[0], cmap=plt.cm.gray) 254 | # plt.show() 255 | 256 | lowDimRepresentation, sameDimRepresentation = \ 257 | reduceImageToLowerDimensions(principalComponents, imgs[0]) 258 | 259 | plt.imshow(imgs[0], cmap=plt.cm.gray) 260 | plt.show() 261 | 262 | image2D = vectorToImage(sameDimRepresentation, imgs[0].shape) 263 | plt.imshow(image2D, cmap=plt.cm.gray) 264 | plt.show() 265 | print "done" 266 | 267 | 268 | 269 | if __name__ == '__main__': 270 | main() -------------------------------------------------------------------------------- /code/old-version/common.py: -------------------------------------------------------------------------------- 1 | __author__ = "Mihaela Rosca" 2 | __contact__ = "mihaela.c.rosca@gmail.com" 3 | 4 | import numpy as np 5 | 6 | """ 7 | Arguments: 8 | vec: A numpy 1-D vector. 9 | size: A 2D tuple 10 | 11 | Returns: 12 | A 2-D vector of dimension 'size', only if 'vec' has compatible dimensions. 13 | Otherwise it throws an error. 14 | """ 15 | def vectorToImage(vec, size): 16 | return vec.reshape(size) 17 | 18 | """ Transforms the 2D images into 1D vectors 19 | Arguments: 20 | images: is a python list of numpy arrays 21 | Returns: 22 | A python list of 1-D numpy arrays, transformed from the input 2D ones 23 | No data is lost in the transformation. 24 | """ 25 | def imagesToVectors(images): 26 | return np.array(map(lambda x: x.reshape(-1), images)) 27 | 28 | def sigmoid(x): 29 | return 1 / (1 + np.exp(-x)) 30 | 31 | def softmax(activation): 32 | out = np.exp(activation) 33 | return out / out.sum() 34 | 35 | def sample(p, size): 36 | return np.random.uniform(size=size) <= p 37 | 38 | def sampleAll(probs): 39 | return np.random.uniform(size=probs.shape) <= probs 40 | 41 | def enum(**enums): 42 | return type('Enum', (), enums) 43 | 44 | # Create an enum for visible and hidden, for 45 | Layer = enum(VISIBLE=0, HIDDEN=1) 46 | 47 | def rmse(prediction, actual): 48 | return np.linalg.norm(prediction - actual) / np.sqrt(len(prediction)) 49 | 50 | def safeLogFraction(p): 51 | assert p >=0 and p <= 1 52 | # TODO: think about this a bit better 53 | # you should not set them to be equal, on the contrary, 54 | # they should be opposites 55 | if p * (1 - p) == 0: 56 | return 0 57 | return np.log(p / (1 -p)) 58 | 59 | # Takes the value of the sigmoid function and returns the derivative 60 | # Works for numpy arrays as well 61 | def softmaxDerivativeFromVal(valueFunction): 62 | return valueFunction * (1.0 - valueFunction) 63 | 64 | def labelsToVectors(labels, size): 65 | result = np.zeros((len(labels), size), dtype=float) 66 | for index, label in enumerate(labels): 67 | result[index, label] = 1.0 68 | 69 | return result 70 | 71 | def zerosFromShape(l): 72 | return map(lambda x: np.zeros(x.shape), l) 73 | 74 | # can make the thing class methods 75 | class ActivationFunction(object): 76 | pass 77 | 78 | """ Implementation of the softmax activation function. 79 | Used for classification (represents a probablity distribution) 80 | """ 81 | class Softmax(ActivationFunction): 82 | 83 | @staticmethod 84 | def value(inputVector): 85 | out = np.exp(inputVector) 86 | return out / (out.sum(axis=1)[:,None]) 87 | 88 | @staticmethod 89 | def derivativeFromValue(value): 90 | return value * (1.0 - value) 91 | 92 | @staticmethod 93 | def derivativeForLinearSum(topLayerDerivatives, topLayerActivations): 94 | d = - topLayerActivations[:, :, np.newaxis] * topLayerActivations[:, np.newaxis, :] 95 | 96 | vals = topLayerActivations * (1 - topLayerActivations) 97 | for index in xrange(len(d)): 98 | d[index][np.diag_indices_from(d[index])] = vals[index] 99 | 100 | res = (topLayerDerivatives[:, :, np.newaxis] * d).sum(axis=1) 101 | return res 102 | 103 | 104 | """ Implementation of the sigmoid activation function.""" 105 | class Sigmoid(ActivationFunction): 106 | 107 | @staticmethod 108 | def value(inputVector): 109 | return 1 / (1 + np.exp(-inputVector)) 110 | 111 | @staticmethod 112 | def derivativeFromValue(value): 113 | return value * (1.0 - value) 114 | 115 | @staticmethod 116 | def derivativeForLinearSum(topLayerDerivatives, topLayerActivations): 117 | return topLayerActivations * (1 - topLayerActivations) * topLayerDerivatives 118 | 119 | """ Implementation of the tanh activation function.""" 120 | 121 | # TODO: I think better use this when you do not have the 0,1 binary constraints 122 | # anymore (but then you use the noisy rectified linear unit) 123 | class Tanh(ActivationFunction): 124 | 125 | @staticmethod 126 | def value(inputVector): 127 | return (np.tanh(inputVector) + 1) / 2 128 | 129 | @staticmethod 130 | def derivativeFromValue(value): 131 | # return (1.0 - value * value) / 2 132 | return 2 * value * (1 - value) 133 | 134 | @staticmethod 135 | def derivativeForLinearSum(topLayerDerivatives, topLayerActivations): 136 | return 2 * topLayerActivations * (1 - topLayerActivations) * topLayerDerivatives 137 | # return ((1.0 - topLayerActivations * topLayerActivations) * topLayerDerivatives) / 2 138 | 139 | -------------------------------------------------------------------------------- /code/old-version/deepbelief.py: -------------------------------------------------------------------------------- 1 | __author__ = "Mihaela Rosca" 2 | __contact__ = "mihaela.c.rosca@gmail.com" 3 | 4 | import numpy as np 5 | 6 | import restrictedBoltzmannMachine as rbm 7 | 8 | # TODO: use conjugate gradient for backpropagation instead of steepest descent 9 | # see here for a theano example http://deeplearning.net/tutorial/code/logistic_cg.py 10 | # TODO: add weight decay in back prop but especially with the constraint 11 | # on the weights 12 | # TODO: monitor the changes in error and change the learning rate according 13 | # to that 14 | # TODO: wake sleep for improving generation 15 | # TODO: nesterov method for momentum 16 | 17 | """In all the above topLayer does not mean the top most layer, but rather the 18 | layer above the current one.""" 19 | 20 | from common import * 21 | 22 | """ Class that implements a deep belief network, for classification """ 23 | class DBN(object): 24 | 25 | """ 26 | Arguments: 27 | nrLayers: the number of layers of the network. In case of discriminative 28 | traning, also contains the classifcation layer 29 | (the last softmax layer) 30 | type: integer 31 | layerSizes: the sizes of the individual layers. 32 | type: list of integers of size nrLayers 33 | activationFunctions: the functions that are used to transform 34 | the input of a neuron into its output. The functions should be 35 | vectorized (as per numpy) to be able to apply them for an entire 36 | layer. 37 | type: list of objects of type ActivationFunction 38 | """ 39 | def __init__(self, nrLayers, layerSizes, activationFunctions, 40 | dropout=0.5, rbmDropout=0.5, visibleDropout=0.8, rbmVisibleDropout=1): 41 | self.nrLayers = nrLayers 42 | self.layerSizes = layerSizes 43 | # Note that for the first one the activatiom function does not matter 44 | # So for that one there is no need to pass in an activation function 45 | self.activationFunctions = activationFunctions 46 | self.initialized = False 47 | self.dropout = dropout 48 | self.rbmDropout = rbmDropout 49 | self.visibleDropout = visibleDropout 50 | self.rbmVisibleDropout = rbmVisibleDropout 51 | 52 | assert len(layerSizes) == nrLayers 53 | assert len(activationFunctions) == nrLayers - 1 54 | 55 | """ 56 | TODO: 57 | If labels = None, only does the generative training 58 | with fine tuning for generation, not for discrimintaiton 59 | TODO: what happens if you do both? do the fine tuning for generation and 60 | then do backprop for discrimintaiton 61 | """ 62 | 63 | def train(self, data, labels=None): 64 | # This depends if you have generative or not 65 | nrRbms = self.nrLayers - 2 66 | 67 | self.weights = [] 68 | self.biases = [] 69 | currentData = data 70 | for i in xrange(nrRbms): 71 | net = rbm.RBM(self.layerSizes[i], self.layerSizes[i+1], 72 | rbm.contrastiveDivergence, 73 | self.rbmDropout, 74 | self.rbmVisibleDropout, 75 | self.activationFunctions[i].value) 76 | net.train(currentData) 77 | self.weights += [net.weights / self.dropout] 78 | self.biases += [net.biases[1]] 79 | 80 | currentData = net.hiddenRepresentation(currentData) 81 | 82 | # This depends if you have generative or not 83 | # Initialize the last layer of weights to zero if you have 84 | # a discriminative net 85 | self.weights += [np.zeros((self.layerSizes[-2], self.layerSizes[-1]))] 86 | self.biases += [np.zeros(self.layerSizes[-1])] 87 | 88 | assert len(self.weights) == self.nrLayers - 1 89 | assert len(self.biases) == self.nrLayers - 1 90 | # Does backprop or wake sleep? 91 | self.fineTune(data, labels) 92 | self.classifcationWeights = map(lambda x: x * self.dropout, self.weights) 93 | self.classifcationBiases = self.biases 94 | 95 | """Fine tunes the weigths and biases using backpropagation. 96 | Arguments: 97 | data: The data used for traning and fine tuning 98 | labels: A numpy nd array. Each label should be transformed into a binary 99 | base vector before passed into this function. 100 | miniBatch: The number of instances to be used in a miniBatch 101 | epochs: The number of epochs to use for fine tuning 102 | """ 103 | def fineTune(self, data, labels, miniBatchSize=10, epochs=100): 104 | learningRate = 0.1 105 | batchLearningRate = learningRate / miniBatchSize 106 | 107 | nrMiniBatches = len(data) / miniBatchSize 108 | 109 | oldDWeights = zerosFromShape(self.weights) 110 | oldDBias = zerosFromShape(self.biases) 111 | 112 | stages = len(self.weights) 113 | 114 | # TODO: maybe find a better way than this to find a stopping criteria 115 | for epoch in xrange(epochs): 116 | 117 | if epoch < epochs / 10: 118 | momentum = 0.5 119 | else: 120 | momentum = 0.95 121 | 122 | for batch in xrange(nrMiniBatches): 123 | start = batch * miniBatchSize 124 | end = (batch + 1) * miniBatchSize 125 | batchData = data[start: end] 126 | 127 | # this is a list of layer activities 128 | layerValues = forwardPassDropout(self.weights, self.biases, 129 | self.activationFunctions, batchData, 130 | self.dropout, self.visibleDropout) 131 | finalLayerErrors = derivativesCrossEntropyError(labels[start:end], 132 | layerValues[-1]) 133 | 134 | # Compute all derivatives 135 | dWeights, dBias = backprop(self.weights, layerValues, 136 | finalLayerErrors, self.activationFunctions) 137 | 138 | # Update the weights and biases using gradient descent 139 | # Also update the old weights 140 | for index in xrange(stages): 141 | oldDWeights[index] = momentum * oldDWeights[index] - batchLearningRate * dWeights[index] 142 | oldDBias[index] = momentum * oldDBias[index] - batchLearningRate * dBias[index] 143 | self.weights[index] += oldDWeights[index] 144 | self.biases[index] += oldDBias[index] 145 | 146 | 147 | def classify(self, dataInstaces): 148 | lastLayerValues = forwardPass(self.classifcationWeights, 149 | self.classifcationBiases, 150 | self.activationFunctions, 151 | dataInstaces)[-1] 152 | return lastLayerValues, np.argmax(lastLayerValues, axis=1) 153 | 154 | """ 155 | Arguments: 156 | weights: list of numpy nd-arrays 157 | layerValues: list of numpy arrays, each array representing the values of the 158 | neurons obtained during a forward pass of the network 159 | finalLayerErrors: errors on the final layer, they depend on the error function 160 | chosen. For softmax activation function on the last layer, use cross 161 | entropy as an error function. 162 | """ 163 | def backprop(weights, layerValues, finalLayerErrors, activationFunctions): 164 | nrLayers = len(weights) + 1 165 | deDw = [] 166 | deDbias = [] 167 | upperLayerErrors = finalLayerErrors 168 | 169 | for layer in xrange(nrLayers - 1, 0, -1): 170 | deDz = activationFunctions[layer - 1].derivativeForLinearSum( 171 | upperLayerErrors, layerValues[layer]) 172 | # upperLayerErrors = np.dot(deDz, weights[layer - 1].T) 173 | upperLayerErrors = np.tensordot(deDz, weights[layer - 1].T, [[deDz.ndim - 1], [weights[layer - 1].T.ndim -2]]) 174 | 175 | dw = np.einsum('ij,ik->jk', layerValues[layer - 1], deDz) 176 | 177 | dbias = deDz.sum(axis=0) 178 | 179 | # Iterating in decreasing order of layers, so we are required to 180 | # append the weight derivatives at the front as we go along 181 | deDw.insert(0, dw) 182 | deDbias.insert(0, dbias) 183 | 184 | return deDw, deDbias 185 | 186 | """ Does not do dropout. Used for classification. """ 187 | def forwardPass(weights, biases, activationFunctions, dataInstaces): 188 | currentLayerValues = dataInstaces 189 | layerValues = [currentLayerValues] 190 | size = dataInstaces.shape[0] 191 | 192 | for stage in xrange(len(weights)): 193 | w = weights[stage] 194 | b = biases[stage] 195 | activation = activationFunctions[stage] 196 | 197 | linearSum = np.dot(currentLayerValues, w) + np.tile(b, (size, 1)) 198 | currentLayerValues = activation.value(linearSum) 199 | layerValues += [currentLayerValues] 200 | 201 | return layerValues 202 | 203 | 204 | """Does a forward pass trought the network and computes the values of the 205 | neurons in all the layers. 206 | Required for backpropagation and classification. 207 | 208 | Arguments: 209 | dataInstaces: The instances to be run trough the network. 210 | """ 211 | def forwardPassDropout(weights, biases, activationFunctions, 212 | dataInstaces, dropout, visibleDropout): 213 | # dropout on the visible units 214 | # generally this is around 80% 215 | visibleOn = sample(visibleDropout, dataInstaces.shape) 216 | thinnedValues = dataInstaces * visibleOn 217 | layerValues = [thinnedValues] 218 | size = dataInstaces.shape[0] 219 | 220 | for stage in xrange(len(weights)): 221 | w = weights[stage] 222 | b = biases[stage] 223 | activation = activationFunctions[stage] 224 | 225 | linearSum = np.dot(thinnedValues, w) + np.tile(b, (size, 1)) 226 | currentLayerValues = activation.value(linearSum) 227 | # this is the way to do it, because of how backprop works the wij 228 | # will cancel out if the unit on the layer is non active 229 | # de/ dw_i_j = de / d_z_j * d_z_j / d_w_i_j = de / d_z_j * y_i 230 | # so if we set a unit as non active here (and we have to because 231 | # of this exact same reason and of ow we backpropagate) 232 | if stage != len(weights) - 1: 233 | 234 | on = sample(dropout, currentLayerValues.shape) 235 | thinnedValues = on * currentLayerValues 236 | layerValues += [thinnedValues] 237 | else: 238 | layerValues += [currentLayerValues] 239 | 240 | return layerValues 241 | 242 | 243 | """ Computes the derivatives of the top most layer given their output and the 244 | target labels. This is computed using the cross entropy function. 245 | See: http://en.wikipedia.org/wiki/Cross_entropy for the discrete case. 246 | Since it is used with a softmax unit for classification, the output of the unit 247 | represent a discrete probablity distribution and the expected values are 248 | composed of a base vector, with 1 for the correct class and 0 for all the rest. 249 | """ 250 | def derivativesCrossEntropyError(expected, actual): 251 | return - expected * (1.0 / actual) 252 | 253 | # Only works with binary units 254 | def wakeSleep(): 255 | pass 256 | # need to alternate between wake and sleep pahses 257 | -------------------------------------------------------------------------------- /code/old-version/readmnist.py: -------------------------------------------------------------------------------- 1 | """ Disclaimer: this code was adapted from 2 | http://g.sweyla.com/blog/2012/mnist-numpy/ 3 | """ 4 | 5 | __author__ = "Mihaela Rosca" 6 | __contact__ = "mihaela.c.rosca@gmail.com" 7 | 8 | import os, struct 9 | import numpy as np 10 | 11 | from array import array as pyarray 12 | 13 | """ 14 | Arguments: 15 | Returns: 16 | """ 17 | def read(startExample, count, digits=None, bTrain=True, path=".", 18 | returnImages=False): 19 | if digits == None: 20 | digits = range(0, 10) 21 | 22 | if bTrain: 23 | fname_img = os.path.join(path, 'train-images-idx3-ubyte') 24 | fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') 25 | else: 26 | fname_img = os.path.join(path, 't10k-images-idx3-ubyte') 27 | fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') 28 | 29 | fImages = open(fname_img,'rb') 30 | fLabels = open(fname_lbl,'rb') 31 | 32 | # read the header information in the images file. 33 | s1, s2, s3, s4 = fImages.read(4), fImages.read(4), fImages.read(4), fImages.read(4) 34 | mnIm = struct.unpack('>I',s1)[0] 35 | numIm = struct.unpack('>I',s2)[0] 36 | rowsIm = struct.unpack('>I',s3)[0] 37 | colsIm = struct.unpack('>I',s4)[0] 38 | # seek to the image we want to start on 39 | fImages.seek(16+startExample*rowsIm*colsIm) 40 | 41 | # read the header information in the labels file and seek to position 42 | # in the file for the image we want to start on. 43 | mnL = struct.unpack('>I',fLabels.read(4))[0] 44 | numL = struct.unpack('>I',fLabels.read(4))[0] 45 | fLabels.seek(8+startExample) 46 | 47 | inputVectors = [] # list of (input, correct label) pairs 48 | labels = [] 49 | 50 | for c in range(count): 51 | # get the correct label from the labels file. 52 | val = struct.unpack('>B',fLabels.read(1))[0] 53 | labels.append(val) 54 | 55 | vec = map(lambda x: struct.unpack('>B',fImages.read(1))[0], 56 | range(rowsIm*colsIm)) 57 | # get the input from the image file 58 | inputVectors.append(np.array(vec)) 59 | 60 | 61 | # Filter out the unwanted digits 62 | ind = [k for k in xrange(len(labels)) if labels[k] in digits ] 63 | labels = map(lambda x: labels[x], ind) 64 | inputVectors = map(lambda x: inputVectors[x], ind) 65 | 66 | fImages.close() 67 | fLabels.close() 68 | 69 | if returnImages: 70 | images = map(lambda x: x.reshape(28,28), inputVectors) 71 | return np.array(images), np.array(labels) 72 | else: 73 | return np.array(inputVectors), np.array(labels) 74 | -------------------------------------------------------------------------------- /code/old-version/restrictedBoltzmannMachine.py: -------------------------------------------------------------------------------- 1 | """Implementation of restricted boltzmann machine 2 | 3 | You need to be able to deal with different energy functions 4 | This allows you to deal with real valued units. 5 | 6 | TODO: monitor overfitting 7 | """ 8 | 9 | __author__ = "Mihaela Rosca" 10 | __contact__ = "mihaela.c.rosca@gmail.com" 11 | 12 | 13 | import numpy as np 14 | from common import * 15 | 16 | EXPENSIVE_CHECKS_ON = False 17 | 18 | # TODO: different learning rates for weights and biases 19 | # TODO: nesterov method for momentum 20 | # TODO: rmsprop 21 | """ 22 | Represents a RBM 23 | """ 24 | class RBM(object): 25 | 26 | def __init__(self, nrVisible, nrHidden, trainingFunction, dropout, 27 | visibleDropout, activationFun=sigmoid): 28 | # dropout = 1 means no dropout, keep all the weights 29 | self.dropout = dropout 30 | # dropout = 1 means no dropout, keep all the weights 31 | self.visibleDropout = visibleDropout 32 | self.nrHidden = nrHidden 33 | self.nrVisible = nrVisible 34 | self.trainingFunction = trainingFunction 35 | self.activationFun = activationFun 36 | self.initialized = False 37 | 38 | def train(self, data): 39 | # If the network has not been initialized yet, do it now 40 | # Ie if this is the time it is traning batch of traning 41 | if not self.initialized: 42 | self.weights = self.initializeWeights(self.nrVisible, self.nrHidden) 43 | self.biases = self.intializeBiases(data, self.nrHidden) 44 | # self.data = data 45 | # else: 46 | # self.data = np.concatenate(self.data, data) 47 | 48 | self.biases, self.weights = self.trainingFunction(data, 49 | self.biases, 50 | self.weights, 51 | self.activationFun, 52 | self.dropout, 53 | self.visibleDropout) 54 | self.testWeights = self.weights * self.dropout 55 | 56 | assert self.weights.shape == (self.nrVisible, self.nrHidden) 57 | assert self.biases[0].shape[0] == self.nrVisible 58 | assert self.biases[1].shape[0] == self.nrHidden 59 | 60 | """ Reconstructs the data given using this boltzmann machine.""" 61 | def reconstruct(self, dataInstances): 62 | return reconstruct(self.biases, self.testWeights, dataInstances, 63 | self.activationFun) 64 | 65 | def hiddenRepresentation(self, dataInstances): 66 | return updateLayer(Layer.HIDDEN, dataInstances, self.biases, 67 | self.testWeights, self.activationFun, True) 68 | 69 | @classmethod 70 | def initializeWeights(cls, nrVisible, nrHidden): 71 | return np.random.normal(0, 0.01, (nrVisible, nrHidden)) 72 | 73 | @classmethod 74 | def intializeBiases(cls, data, nrHidden): 75 | # get the procentage of data points that have the i'th unit on 76 | # and set the visible vias to log (p/(1-p)) 77 | percentages = data.mean(axis=0, dtype='float') 78 | vectorized = np.vectorize(safeLogFraction, otypes=[np.float]) 79 | visibleBiases = vectorized(percentages) 80 | 81 | hiddenBiases = np.zeros(nrHidden) 82 | return np.array([visibleBiases, hiddenBiases]) 83 | 84 | def reconstruct(biases, weights, dataInstances, activationFun): 85 | hidden = updateLayer(Layer.HIDDEN, dataInstances, biases, weights, 86 | activationFun, True) 87 | 88 | visibleReconstructions = updateLayer(Layer.VISIBLE, hidden, 89 | biases, weights, activationFun, False) 90 | return visibleReconstructions 91 | 92 | def reconstructionError(biases, weights, data, activationFun): 93 | # Returns the rmse of the reconstruction of the data 94 | # Good to keep track of it, should decrease trough training 95 | # Initially faster, and then slower 96 | reconstructions = reconstruct(biases, weights, data, activationFun) 97 | return rmse(reconstructions, data) 98 | 99 | """ Training functions.""" 100 | 101 | """ Full CD function. 102 | Arguments: 103 | data: the data to use for traning. 104 | A numpy ndarray. 105 | biases: 106 | 107 | Returns: 108 | 109 | Defaults the mini batch size 1, so normal learning 110 | """ 111 | # Think of removing the step method all together and keep one to just 112 | # optimize the code but also make it easier to change them 113 | # rather than have a function that you pass in for every batch 114 | # if nice and easy refactoring can be seen then you can do that 115 | def contrastiveDivergence(data, biases, weights, activationFun, dropout, 116 | visibleDropout, miniBatchSize=10): 117 | N = len(data) 118 | epochs = N / miniBatchSize 119 | 120 | # sample the probabily distributions allow you to chose from the 121 | # visible units for dropout 122 | on = sample(visibleDropout, data.shape) 123 | dropoutData = data * on 124 | 125 | epsilon = 0.01 126 | decayFactor = 0.0002 127 | weightDecay = True 128 | reconstructionStep = 50 129 | 130 | oldDeltaWeights = np.zeros(weights.shape) 131 | oldDeltaVisible = np.zeros(biases[0].shape) 132 | oldDeltaHidden = np.zeros(biases[1].shape) 133 | 134 | batchLearningRate = epsilon / miniBatchSize 135 | print "batchLearningRate" 136 | print batchLearningRate 137 | 138 | for epoch in xrange(epochs): 139 | batchData = dropoutData[epoch * miniBatchSize: (epoch + 1) * miniBatchSize, :] 140 | if epoch < epochs / 100: 141 | momentum = 0.5 142 | else: 143 | momentum = 0.95 144 | 145 | if epoch < (N/7) * 10: 146 | cdSteps = 3 147 | elif epoch < (N/9) * 10: 148 | cdSteps = 5 149 | else: 150 | cdSteps = 10 151 | 152 | if EXPENSIVE_CHECKS_ON: 153 | if epoch % reconstructionStep == 0: 154 | print "reconstructionError" 155 | print reconstructionError(biases, weights, data, activationFun) 156 | 157 | weightsDiff, visibleBiasDiff, hiddenBiasDiff =\ 158 | modelAndDataSampleDiffs(batchData, biases, weights, 159 | activationFun, dropout, cdSteps) 160 | # Update the weights 161 | # data - model 162 | # Positive phase - negative 163 | # Weight decay factor 164 | deltaWeights = (batchLearningRate * weightsDiff 165 | - epsilon * weightDecay * decayFactor * weights) 166 | 167 | deltaVisible = batchLearningRate * visibleBiasDiff 168 | deltaHidden = batchLearningRate * hiddenBiasDiff 169 | 170 | deltaWeights += momentum * oldDeltaWeights 171 | deltaVisible += momentum * oldDeltaVisible 172 | deltaHidden += momentum * oldDeltaHidden 173 | 174 | oldDeltaWeights = deltaWeights 175 | oldDeltaVisible = deltaVisible 176 | oldDeltaHidden = deltaHidden 177 | 178 | # Update the weighths 179 | weights += deltaWeights 180 | # Update the visible biases 181 | biases[0] += deltaVisible 182 | 183 | # Update the hidden biases 184 | biases[1] += deltaHidden 185 | 186 | print reconstructionError(biases, weights, data, activationFun) 187 | return biases, weights 188 | 189 | def modelAndDataSampleDiffs(batchData, biases, weights, activationFun, 190 | dropout, cdSteps): 191 | # Reconstruct the hidden weigs from the data 192 | hidden = updateLayer(Layer.HIDDEN, batchData, biases, weights, activationFun, 193 | binary=True) 194 | 195 | # Chose the units to be active at this point 196 | # different sets for each element in the mini batches 197 | on = sample(dropout, hidden.shape) 198 | dropoutHidden = on * hidden 199 | hiddenReconstruction = dropoutHidden 200 | 201 | for i in xrange(cdSteps - 1): 202 | visibleReconstruction = updateLayer(Layer.VISIBLE, hiddenReconstruction, 203 | biases, weights, activationFun, 204 | binary=False) 205 | hiddenReconstruction = updateLayer(Layer.HIDDEN, visibleReconstruction, 206 | biases, weights, activationFun, 207 | binary=True) 208 | # sample the hidden units active (for dropout) 209 | hiddenReconstruction = hiddenReconstruction * on 210 | 211 | # Do the last reconstruction from the probabilities in the last phase 212 | visibleReconstruction = updateLayer(Layer.VISIBLE, hiddenReconstruction, 213 | biases, weights, activationFun, 214 | binary=False) 215 | hiddenReconstruction = updateLayer(Layer.HIDDEN, visibleReconstruction, 216 | biases, weights, activationFun, 217 | binary=False) 218 | 219 | hiddenReconstruction = hiddenReconstruction * on 220 | # here it should be hidden * on - hiddenreconstruction 221 | # also below in the hidden bias 222 | weightsDiff = np.dot(batchData.T, dropoutHidden) -\ 223 | np.dot(visibleReconstruction.T, hiddenReconstruction) 224 | assert weightsDiff.shape == weights.shape 225 | 226 | visibleBiasDiff = np.sum(batchData - visibleReconstruction, axis=0) 227 | assert visibleBiasDiff.shape == biases[0].shape 228 | 229 | hiddenBiasDiff = np.sum(dropoutHidden - hiddenReconstruction, axis=0) 230 | assert hiddenBiasDiff.shape == biases[1].shape 231 | 232 | return weightsDiff, visibleBiasDiff, hiddenBiasDiff 233 | 234 | """ Updates an entire layer. This procedure can be used both in training 235 | and in testing. 236 | Can even take multiple values of the layer, each of them given as rows 237 | Uses matrix operations. 238 | """ 239 | def updateLayer(layer, otherLayerValues, biases, weights, activationFun, 240 | binary=False): 241 | 242 | bias = biases[layer] 243 | size = otherLayerValues.shape[0] 244 | 245 | if layer == Layer.VISIBLE: 246 | activation = np.dot(otherLayerValues, weights.T) 247 | else: 248 | activation = np.dot(otherLayerValues, weights) 249 | 250 | probs = activationFun(np.tile(bias, (size, 1)) + activation) 251 | 252 | if binary: 253 | # Sample from the distributions 254 | return sampleAll(probs) 255 | 256 | return probs 257 | 258 | # Another training algorithm. Slower than Contrastive divergence, but 259 | # gives better results. Not used in practice as it is too slow. 260 | # This is what Hinton said but it is not OK due to NIPS paper 261 | # This is huge code copy paste but keep it like this for now 262 | def PCD(data, biases, weights, activationFun, dropout, 263 | visibleDropout, miniBatchSize=10): 264 | N = len(data) 265 | epochs = N / miniBatchSize 266 | 267 | # sample the probabily distributions allow you to chose from the 268 | # visible units for dropout 269 | # on = sample(visibleDropout, data.shape) 270 | # dropoutData = data * on 271 | dropoutData = data 272 | 273 | epsilon = 0.01 274 | decayFactor = 0.0002 275 | weightDecay = True 276 | reconstructionStep = 50 277 | 278 | oldDeltaWeights = np.zeros(weights.shape) 279 | oldDeltaVisible = np.zeros(biases[0].shape) 280 | oldDeltaHidden = np.zeros(biases[1].shape) 281 | 282 | batchLearningRate = epsilon / miniBatchSize 283 | print "batchLearningRate" 284 | print batchLearningRate 285 | 286 | # make this an argument or something 287 | nrFantasyParticles = miniBatchSize 288 | 289 | fantVisible = np.random.randint(2, size=(nrFantasyParticles, weights.shape[0])) 290 | fantHidden = np.random.randint(2, size=(nrFantasyParticles, weights.shape[1])) 291 | 292 | fantasyParticles = (fantVisible, fantHidden) 293 | steps = 10 294 | 295 | for epoch in xrange(epochs): 296 | batchData = dropoutData[epoch * miniBatchSize: (epoch + 1) * miniBatchSize, :] 297 | if epoch < epochs / 100: 298 | momentum = 0.5 299 | else: 300 | momentum = 0.95 301 | 302 | if EXPENSIVE_CHECKS_ON: 303 | if epoch % reconstructionStep == 0: 304 | print "reconstructionError" 305 | print reconstructionError(biases, weights, data, activationFun) 306 | 307 | print fantasyParticles[0] 308 | print fantasyParticles[1] 309 | weightsDiff, visibleBiasDiff, hiddenBiasDiff, fantasyParticles =\ 310 | modelAndDataSampleDiffsPCD(batchData, biases, weights, 311 | activationFun, dropout, steps, fantasyParticles) 312 | 313 | # Update the weights 314 | # data - model 315 | # Positive phase - negative 316 | # Weight decay factor 317 | deltaWeights = (batchLearningRate * weightsDiff 318 | - epsilon * weightDecay * decayFactor * weights) 319 | 320 | deltaVisible = batchLearningRate * visibleBiasDiff 321 | deltaHidden = batchLearningRate * hiddenBiasDiff 322 | 323 | deltaWeights += momentum * oldDeltaWeights 324 | deltaVisible += momentum * oldDeltaVisible 325 | deltaHidden += momentum * oldDeltaHidden 326 | 327 | oldDeltaWeights = deltaWeights 328 | oldDeltaVisible = deltaVisible 329 | oldDeltaHidden = deltaHidden 330 | 331 | # Update the weighths 332 | weights += deltaWeights 333 | # Update the visible biases 334 | biases[0] += deltaVisible 335 | 336 | # Update the hidden biases 337 | biases[1] += deltaHidden 338 | 339 | print reconstructionError(biases, weights, data, activationFun) 340 | return biases, weights 341 | 342 | 343 | # Same modelAndDataSampleDiff but for persistent contrastive divergence 344 | # First run it without dropout 345 | def modelAndDataSampleDiffsPCD(batchData, biases, weights, activationFun, 346 | dropout, steps, fantasyParticles): 347 | # Reconstruct the hidden weigs from the data 348 | hidden = updateLayer(Layer.HIDDEN, batchData, biases, weights, activationFun, 349 | binary=True) 350 | 351 | # Chose the units to be active at this point 352 | # different sets for each element in the mini batches 353 | # on = sample(dropout, hidden.shape) 354 | # dropoutHidden = on * hidden 355 | # hiddenReconstruction = dropoutHidden 356 | 357 | for i in xrange(steps): 358 | visibleReconstruction = updateLayer(Layer.VISIBLE, fantasyParticles[1], 359 | biases, weights, activationFun, 360 | binary=False) 361 | hiddenReconstruction = updateLayer(Layer.HIDDEN, visibleReconstruction, 362 | biases, weights, activationFun, 363 | binary=True) 364 | 365 | # sample the hidden units active (for dropout) 366 | # hiddenReconstruction = hiddenReconstruction * on 367 | 368 | fantasyParticles = (visibleReconstruction, hiddenReconstruction) 369 | 370 | # here it should be hidden * on - hiddenReconstruction 371 | # also below in the hidden bias 372 | weightsDiff = np.dot(batchData.T, hidden) -\ 373 | np.dot(visibleReconstruction.T, hiddenReconstruction) 374 | assert weightsDiff.shape == weights.shape 375 | 376 | visibleBiasDiff = np.sum(batchData - visibleReconstruction, axis=0) 377 | assert visibleBiasDiff.shape == biases[0].shape 378 | 379 | hiddenBiasDiff = np.sum(hidden - hiddenReconstruction, axis=0) 380 | assert hiddenBiasDiff.shape == biases[1].shape 381 | 382 | return weightsDiff, visibleBiasDiff, hiddenBiasDiff, fantasyParticles -------------------------------------------------------------------------------- /code/old-version/utils.py: -------------------------------------------------------------------------------- 1 | """ This file contains different utility functions that are not connected 2 | in anyway to the networks presented in the tutorials, but rather help in 3 | processing the outputs into a more understandable way. 4 | 5 | For example ``tile_raster_images`` helps in generating a easy to grasp 6 | image from a set of samples or weights. 7 | 8 | CODE OBTAINED FROM: http://deeplearning.net/tutorial/code/utils.py 9 | 10 | """ 11 | import numpy 12 | 13 | def scale_to_unit_interval(ndar, eps=1e-8): 14 | """ Scales all values in the ndarray ndar to be between 0 and 1 """ 15 | ndar = ndar.copy() 16 | ndar -= ndar.min() 17 | ndar *= 1.0 / (ndar.max() + eps) 18 | return ndar 19 | 20 | 21 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0), 22 | scale_rows_to_unit_interval=True, 23 | output_pixel_vals=True): 24 | """ 25 | Transform an array with one flattened image per row, into an array in 26 | which images are reshaped and layed out like tiles on a floor. 27 | 28 | This function is useful for visualizing datasets whose rows are images, 29 | and also columns of matrices for transforming those rows 30 | (such as the first layer of a neural net). 31 | 32 | :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can 33 | be 2-D ndarrays or None; 34 | :param X: a 2-D array in which every row is a flattened image. 35 | 36 | :type img_shape: tuple; (height, width) 37 | :param img_shape: the original shape of each image 38 | 39 | :type tile_shape: tuple; (rows, cols) 40 | :param tile_shape: the number of images to tile (rows, cols) 41 | 42 | :param output_pixel_vals: if output should be pixel values (i.e. int8 43 | values) or floats 44 | 45 | :param scale_rows_to_unit_interval: if the values need to be scaled before 46 | being plotted to [0,1] or not 47 | 48 | 49 | :returns: array suitable for viewing as an image. 50 | (See:`PIL.Image.fromarray`.) 51 | :rtype: a 2-d array with same dtype as X. 52 | 53 | """ 54 | 55 | assert len(img_shape) == 2 56 | assert len(tile_shape) == 2 57 | assert len(tile_spacing) == 2 58 | 59 | # The expression below can be re-written in a more C style as 60 | # follows : 61 | # 62 | # out_shape = [0,0] 63 | # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] - 64 | # tile_spacing[0] 65 | # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] - 66 | # tile_spacing[1] 67 | out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp 68 | in zip(img_shape, tile_shape, tile_spacing)] 69 | 70 | if isinstance(X, tuple): 71 | assert len(X) == 4 72 | # Create an output numpy ndarray to store the image 73 | if output_pixel_vals: 74 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), 75 | dtype='uint8') 76 | else: 77 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), 78 | dtype=X.dtype) 79 | 80 | #colors default to 0, alpha defaults to 1 (opaque) 81 | if output_pixel_vals: 82 | channel_defaults = [0, 0, 0, 255] 83 | else: 84 | channel_defaults = [0., 0., 0., 1.] 85 | 86 | for i in xrange(4): 87 | if X[i] is None: 88 | # if channel is None, fill it with zeros of the correct 89 | # dtype 90 | dt = out_array.dtype 91 | if output_pixel_vals: 92 | dt = 'uint8' 93 | out_array[:, :, i] = numpy.zeros(out_shape, 94 | dtype=dt) + channel_defaults[i] 95 | else: 96 | # use a recurrent call to compute the channel and store it 97 | # in the output 98 | out_array[:, :, i] = tile_raster_images( 99 | X[i], img_shape, tile_shape, tile_spacing, 100 | scale_rows_to_unit_interval, output_pixel_vals) 101 | return out_array 102 | 103 | else: 104 | # if we are dealing with only one channel 105 | H, W = img_shape 106 | Hs, Ws = tile_spacing 107 | 108 | # generate a matrix to store the output 109 | dt = X.dtype 110 | if output_pixel_vals: 111 | dt = 'uint8' 112 | out_array = numpy.zeros(out_shape, dtype=dt) 113 | 114 | for tile_row in xrange(tile_shape[0]): 115 | for tile_col in xrange(tile_shape[1]): 116 | if tile_row * tile_shape[1] + tile_col < X.shape[0]: 117 | this_x = X[tile_row * tile_shape[1] + tile_col] 118 | if scale_rows_to_unit_interval: 119 | # if we should scale values to be between 0 and 1 120 | # do this by calling the `scale_to_unit_interval` 121 | # function 122 | this_img = scale_to_unit_interval( 123 | this_x.reshape(img_shape)) 124 | else: 125 | this_img = this_x.reshape(img_shape) 126 | # add the slice to the corresponding position in the 127 | # output array 128 | c = 1 129 | if output_pixel_vals: 130 | c = 255 131 | out_array[ 132 | tile_row * (H + Hs): tile_row * (H + Hs) + H, 133 | tile_col * (W + Ws): tile_col * (W + Ws) + W 134 | ] = this_img * c 135 | return out_array 136 | -------------------------------------------------------------------------------- /code/read/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mihaelacr/pydeeplearn/48c6df3f3f854195b7f8b830b9f70fac19acdc9a/code/read/__init__.py -------------------------------------------------------------------------------- /code/read/facedetection.py: -------------------------------------------------------------------------------- 1 | """ The aim of this module is to do face detection. This is required in order to 2 | 3 | crop some of the input databases, because they are not centered""" 4 | 5 | __author__ = "Mihaela Rosca" 6 | __contact__ = "mihaela.c.rosca@gmail.com" 7 | 8 | import cv2 9 | 10 | # XML file with the recognition data 11 | CASCADE_FN = "haarcascade_frontalface_default.xml" 12 | 13 | 14 | """Needs the image to already be black and white """ 15 | def cropFace(image, rescaleForReconigtion=2): 16 | cascade = cv2.CascadeClassifier(CASCADE_FN) 17 | imageScaled = cv2.resize(image, (image.shape[0] / rescaleForReconigtion , 18 | image.shape[1] / rescaleForReconigtion)) 19 | 20 | # The image might already be equalized, so no need for that here 21 | gray = cv2.equalizeHist(imageScaled) 22 | rects = cascade.detectMultiScale(gray, 1.1, 3) 23 | 24 | # You need to find exactly one face in the picture 25 | print "len(rects)" 26 | print len(rects) 27 | if len(rects) is not 1: 28 | return None 29 | 30 | x, y, w, h = map(lambda x: x * rescaleForReconigtion, rects[0]) 31 | face = image[y:y + h, x:x + w] 32 | return face 33 | -------------------------------------------------------------------------------- /code/read/readmnist.py: -------------------------------------------------------------------------------- 1 | """ Disclaimer: this code was adapted from 2 | http://g.sweyla.com/blog/2012/mnist-numpy/ 3 | """ 4 | __author__ = "Mihaela Rosca" 5 | __contact__ = "mihaela.c.rosca@gmail.com" 6 | 7 | 8 | import os, struct 9 | import numpy as np 10 | 11 | from array import array as pyarray 12 | 13 | """ 14 | Arguments: 15 | Returns: 16 | """ 17 | def read(startExample, count, digits=None, bTrain=True, path=".", 18 | returnImages=False): 19 | if digits == None: 20 | digits = range(0, 10) 21 | 22 | if bTrain: 23 | fname_img = os.path.join(path, 'train-images-idx3-ubyte') 24 | fname_lbl = os.path.join(path, 'train-labels-idx1-ubyte') 25 | else: 26 | fname_img = os.path.join(path, 't10k-images-idx3-ubyte') 27 | fname_lbl = os.path.join(path, 't10k-labels-idx1-ubyte') 28 | 29 | fImages = open(fname_img,'rb') 30 | fLabels = open(fname_lbl,'rb') 31 | 32 | # read the header information in the images file. 33 | s1, s2, s3, s4 = fImages.read(4), fImages.read(4), fImages.read(4), fImages.read(4) 34 | mnIm = struct.unpack('>I',s1)[0] 35 | numIm = struct.unpack('>I',s2)[0] 36 | rowsIm = struct.unpack('>I',s3)[0] 37 | colsIm = struct.unpack('>I',s4)[0] 38 | # seek to the image we want to start on 39 | fImages.seek(16+startExample*rowsIm*colsIm) 40 | 41 | # read the header information in the labels file and seek to position 42 | # in the file for the image we want to start on. 43 | mnL = struct.unpack('>I',fLabels.read(4))[0] 44 | numL = struct.unpack('>I',fLabels.read(4))[0] 45 | fLabels.seek(8+startExample) 46 | 47 | inputVectors = [] # list of (input, correct label) pairs 48 | labels = [] 49 | 50 | for c in range(count): 51 | # get the correct label from the labels file. 52 | val = struct.unpack('>B',fLabels.read(1))[0] 53 | labels.append(val) 54 | 55 | vec = map(lambda x: struct.unpack('>B',fImages.read(1))[0], 56 | range(rowsIm*colsIm)) 57 | # get the input from the image file 58 | inputVectors.append(np.array(vec)) 59 | 60 | 61 | # Filter out the unwanted digits 62 | ind = [k for k in xrange(len(labels)) if labels[k] in digits ] 63 | labels = map(lambda x: labels[x], ind) 64 | inputVectors = map(lambda x: inputVectors[x], ind) 65 | 66 | fImages.close() 67 | fLabels.close() 68 | 69 | if returnImages: 70 | images = map(lambda x: x.reshape(28,28), inputVectors) 71 | return np.array(images), np.array(labels) 72 | else: 73 | return np.array(inputVectors), np.array(labels) 74 | -------------------------------------------------------------------------------- /code/similarity/similarity.py: -------------------------------------------------------------------------------- 1 | """ This module implements the idea of finding out emotions similarities 2 | by using the experiments similar to what Hinton describes in his NRelu paper.""" 3 | 4 | __author__ = "Mihaela Rosca" 5 | __contact__ = "mihaela.c.rosca@gmail.com" 6 | 7 | import numpy as np 8 | import theano 9 | from theano import tensor as T 10 | 11 | import sys 12 | # We need this to import other modules 13 | sys.path.append("..") 14 | 15 | from lib import restrictedBoltzmannMachine as rbm 16 | from lib.common import * 17 | from similarityUtils import * 18 | 19 | theanoFloat = theano.config.floatX 20 | 21 | 22 | class Trainer(object): 23 | 24 | def __init__(self, input1, input2, net): 25 | 26 | self.w = theano.shared(value=np.float32(0)) 27 | self.b = theano.shared(value=np.float32(0)) 28 | self.net = net 29 | 30 | self.oldMeanSquarew = theano.shared(value=np.float32(0)) 31 | self.oldMeanSquareb = theano.shared(value=np.float32(0)) 32 | self.oldMeanSquareWeights = theano.shared(value=np.zeros(self.net.weights.shape , dtype=theanoFloat)) 33 | self.oldMeanSquareBias = theano.shared(value=np.zeros(self.net.biases[1].shape , dtype=theanoFloat)) 34 | 35 | self.oldDw = theano.shared(value=np.float32(0)) 36 | self.oldDb = theano.shared(value=np.float32(0)) 37 | self.oldDWeights = theano.shared(value=np.zeros(self.net.weights.shape , dtype=theanoFloat)) 38 | self.oldDBias = theano.shared(value=np.zeros(self.net.biases[1].shape , dtype=theanoFloat)) 39 | 40 | hiddenBias = net.sharedBiases[1] 41 | self.params = [self.w, self.b, self.net.sharedWeights, hiddenBias] 42 | self.oldDParams = [self.oldDw, self.oldDb, self.oldDWeights, self.oldDBias] 43 | self.oldMeanSquares = [self.oldMeanSquarew, self.oldMeanSquareb, self.oldMeanSquareWeights, self.oldMeanSquareBias] 44 | 45 | 46 | _, weightForHidden = rbm.testWeights(self.net.sharedWeights, 47 | visibleDropout=self.net.visibleDropout, hiddenDropout=self.net.hiddenDropout) 48 | 49 | hiddenActivations1 = net.hiddenActivationFunction.deterministic(T.dot(input1, weightForHidden) + hiddenBias) 50 | hiddenActivations2 = net.hiddenActivationFunction.deterministic(T.dot(input2, weightForHidden) + hiddenBias) 51 | 52 | # Use the cosine distance between the two vectors of of activations 53 | cos = cosineDistance(hiddenActivations1, hiddenActivations2) 54 | 55 | self.cos = cos 56 | prob = 1.0 /( 1.0 + T.exp(self.w * cos + self.b)) 57 | 58 | self.output = prob 59 | 60 | 61 | class SimilarityNet(object): 62 | 63 | def __init__(self, learningRate, maxMomentum, rbmNrVis, rbmNrHid, rbmLearningRate, 64 | visibleActivationFunction, hiddenActivationFunction, 65 | rbmDropoutVis, rbmDropoutHid, rmsprop,trainingEpochsRBM, 66 | nesterovRbm, 67 | momentumFactorForLearningRateRBM, 68 | sparsityConstraint, sparsityRegularization, sparsityTraget): 69 | 70 | self.learningRate = np.float32(learningRate) 71 | self.rmsprop = rmsprop 72 | self.rbmNrVis = rbmNrVis 73 | self.maxMomentum = np.float32(maxMomentum) 74 | self.rbmNrHid = rbmNrHid 75 | self.rbmLearningRate = np.float32(rbmLearningRate) 76 | self.rbmDropoutHid = rbmDropoutHid 77 | self.rbmDropoutVis = rbmDropoutVis 78 | self.trainingEpochsRBM = trainingEpochsRBM 79 | self.visibleActivationFunction = visibleActivationFunction 80 | self.hiddenActivationFunction = hiddenActivationFunction 81 | self.nesterovRbm = nesterovRbm 82 | self.momentumFactorForLearningRateRBM = momentumFactorForLearningRateRBM 83 | 84 | self.sparsityConstraint = sparsityConstraint 85 | self.sparsityRegularization = sparsityRegularization 86 | self.sparsityTraget = sparsityTraget 87 | 88 | if rmsprop: 89 | print "training similarity net with rmsprop" 90 | 91 | def _trainRBM(self, data1, data2): 92 | data = np.vstack([data1, data2]) 93 | 94 | net = rbm.RBM(self.rbmNrVis, self.rbmNrHid, 95 | self.rbmLearningRate, 96 | hiddenDropout=self.rbmDropoutHid, 97 | visibleDropout=self.rbmDropoutVis, 98 | visibleActivationFunction=self.visibleActivationFunction, 99 | hiddenActivationFunction=self.hiddenActivationFunction, 100 | rmsprop=True, 101 | momentumFactorForLearningRate=self.momentumFactorForLearningRateRBM, 102 | nesterov=self.nesterovRbm, 103 | trainingEpochs=self.trainingEpochsRBM, 104 | sparsityConstraint=self.sparsityConstraint, 105 | sparsityCostFunction=squaredDiff, 106 | sparsityRegularization=self.sparsityRegularization, 107 | sparsityTraget=self.sparsityTraget) 108 | net.train(data) 109 | 110 | return net 111 | 112 | 113 | def train(self, data1, data2, similarities, miniBatchSize=20, epochs=200): 114 | self.miniBatchSize = miniBatchSize 115 | nrMiniBatches = len(data1) / miniBatchSize 116 | miniBatchIndex = T.lscalar() 117 | momentum = T.fscalar() 118 | learningRate = T.fscalar() 119 | 120 | learningRateMiniBatch = np.float32(self.learningRate / miniBatchSize) 121 | print "learningRateMiniBatch in similarity net" 122 | print learningRateMiniBatch 123 | 124 | net = self._trainRBM(data1, data2) 125 | 126 | data1 = theano.shared(np.asarray(data1,dtype=theanoFloat)) 127 | data2 = theano.shared(np.asarray(data2,dtype=theanoFloat)) 128 | similarities = theano.shared(np.asarray(similarities,dtype=theanoFloat)) 129 | 130 | # The mini-batch data is a matrix 131 | x = T.matrix('x', dtype=theanoFloat) 132 | y = T.matrix('y', dtype=theanoFloat) 133 | self.x = x 134 | self.y = y 135 | 136 | z = T.vector('z', dtype=theanoFloat) 137 | 138 | trainer = Trainer(x, y, net) 139 | self.trainer = trainer 140 | 141 | # error = T.sum(T.sqr(trainer.output-z)) 142 | error = T.sum(T.nnet.binary_crossentropy(trainer.output, z)) 143 | 144 | updates = self.buildUpdates(trainer, error, learningRate, momentum) 145 | 146 | # Now you have to define the theano function 147 | discriminativeTraining = theano.function( 148 | inputs=[miniBatchIndex, learningRate, momentum], 149 | outputs=[trainer.output, trainer.cos, error], 150 | updates=updates, 151 | givens={ 152 | x: data1[miniBatchIndex * miniBatchSize:(miniBatchIndex + 1) * miniBatchSize], 153 | y: data2[miniBatchIndex * miniBatchSize:(miniBatchIndex + 1) * miniBatchSize], 154 | z: similarities[miniBatchIndex * miniBatchSize:(miniBatchIndex + 1) * miniBatchSize], 155 | }) 156 | 157 | try: 158 | for epoch in xrange(epochs): 159 | print "epoch", epoch 160 | momentum = np.float32(min(np.float32(0.5) + epoch * np.float32(0.05), 161 | np.float32(self.maxMomentum))) 162 | 163 | for miniBatch in xrange(nrMiniBatches): 164 | output, cos, error = discriminativeTraining(miniBatch, learningRateMiniBatch, momentum) 165 | 166 | print error /self.miniBatchSize 167 | 168 | except KeyboardInterrupt: 169 | print "you have decided to interrupt training" 170 | print "we continue testing" 171 | 172 | print trainer.w.get_value() 173 | print trainer.b.get_value() 174 | 175 | def test(self, testData1, testData2): 176 | # If it is too slow try adding mini batches 177 | testData1 = np.array(testData1, dtype=theanoFloat) 178 | testData2 = np.array(testData2, dtype=theanoFloat) 179 | 180 | # TODO : think of making data1 and data2 shared 181 | testFunction = theano.function( 182 | inputs=[], 183 | outputs=self.trainer.output, 184 | givens={self.x: testData1, 185 | self.y: testData2 186 | }) 187 | 188 | return testFunction() 189 | 190 | def buildUpdates(self, trainer, error, learningRate, momentum): 191 | if self.rmsprop: 192 | return self.buildUpdatesRmsprop(trainer, error, learningRate, momentum) 193 | else: 194 | return self.buildUpdatesNoRmsprop(trainer, error, learningRate, momentum) 195 | 196 | def buildUpdatesNoRmsprop(self, trainer, error, learningRate, momentum): 197 | updates = [] 198 | gradients = T.grad(error, trainer.params) 199 | 200 | for param, oldParamUpdate, gradient in zip(trainer.params, trainer.oldDParams, gradients): 201 | paramUpdate = momentum * oldParamUpdate - learningRate * gradient 202 | updates.append((param, param + paramUpdate)) 203 | updates.append((oldParamUpdate, paramUpdate)) 204 | 205 | return updates 206 | 207 | def buildUpdatesRmsprop(self, trainer, error, learningRate, momentum): 208 | updates = [] 209 | gradients = T.grad(error, trainer.params) 210 | 211 | for param, oldParamUpdate, oldMeanSquare, gradient in zip(trainer.params, trainer.oldDParams, trainer.oldMeanSquares, gradients): 212 | meanSquare = 0.9 * oldMeanSquare + 0.1 * gradient ** 2 213 | paramUpdate = momentum * oldParamUpdate - learningRate * gradient / T.sqrt(meanSquare + 1e-08) 214 | updates.append((param, param + paramUpdate)) 215 | updates.append((oldParamUpdate, paramUpdate)) 216 | updates.append((oldMeanSquare, meanSquare)) 217 | 218 | return updates 219 | 220 | 221 | def cosineDistance(first, second): 222 | normFirst = T.sqrt(T.sum(T.sqr(first), axis=1)) 223 | normSecond = T.sqrt(T.sum(T.sqr(second), axis=1)) 224 | return 1.0 - T.sum(first * second, axis=1) / (normFirst * normSecond + 1e-08) 225 | 226 | -------------------------------------------------------------------------------- /code/similarity/similarityUtils.py: -------------------------------------------------------------------------------- 1 | """ Utils for the similarity experiments. """ 2 | 3 | __author__ = "Mihaela Rosca" 4 | __contact__ = "mihaela.c.rosca@gmail.com" 5 | 6 | from sklearn import cross_validation 7 | import matplotlib.pyplot as plt 8 | import itertools 9 | 10 | import sys 11 | # We need this to import other modules 12 | sys.path.append("..") 13 | 14 | from read.readfacedatabases import * 15 | DEBUG = False 16 | 17 | # TODO: move to common? 18 | def splitTrainTest(data1, data2, labels1, labels2, ratio): 19 | assert len(data1) == len(data2) 20 | assert len(labels1) == len(labels2) 21 | assert len(labels1) == len(data1) 22 | assert len(labels1) == len(labels2) 23 | 24 | data1, data2, labels1, labels2 = shuffle(data1, data2, labels1, labels2) 25 | 26 | # Random data for training and testing 27 | kf = cross_validation.KFold(n=len(data1), n_folds=ratio) 28 | for train, test in kf: 29 | break 30 | 31 | return (data1[train], data1[test], data2[train], data2[test], 32 | labels1[train], labels1[test], labels2[train], labels2[test]) 33 | 34 | """ Splits the data in pairs such that no instances in the same pairs have the same label. """ 35 | def splitShuffling(shuffling, labelsShuffling): 36 | 37 | shuffling, labelsShuffling = shuffle(shuffling, labelsShuffling) 38 | 39 | if DEBUG: 40 | print "len(shuffling)" 41 | print len(shuffling) 42 | print "len(labelsShuffling)" 43 | print len(labelsShuffling) 44 | 45 | labels = np.unique(labelsShuffling) 46 | 47 | # TODO: we already had this? maybe not remake it 48 | labelsToData = {} 49 | for label in labels: 50 | labelsToData[label] = list(shuffling[labelsShuffling == label]) 51 | if DEBUG: 52 | print len(labelsToData[label]) 53 | 54 | shuffledData1 = [] 55 | shuffledData2 = [] 56 | labelsData1 = [] 57 | labelsData2 = [] 58 | 59 | currentLabels = list(labels) 60 | 61 | while len(shuffledData1) + len(shuffledData2) <= len(shuffling): 62 | if DEBUG: 63 | print "len(shuffledData1) + len(shuffledData2)" 64 | print len(shuffledData1) + len(shuffledData2) 65 | print "len(shuffling)" 66 | print len(shuffling) 67 | 68 | print "currentLabels" 69 | print currentLabels 70 | 71 | label1 = np.random.choice(currentLabels, 1, replace=False) 72 | label1 = label1[0] 73 | 74 | if DEBUG: 75 | print "label1" 76 | print label1 77 | 78 | copyCurrentLabels = list(currentLabels) 79 | copyCurrentLabels.remove(label1) 80 | 81 | if not copyCurrentLabels: 82 | break 83 | 84 | label2 = np.random.choice(np.array(copyCurrentLabels), 1, replace=False) 85 | label2 = label2[0] 86 | 87 | if DEBUG: 88 | print "label2" 89 | print label2 90 | 91 | print label1 92 | print label2 93 | 94 | # print "labelsToData" 95 | # print labelsToData 96 | 97 | dataLabel1 = labelsToData[label1] 98 | dataLabel2 = labelsToData[label2] 99 | if len(dataLabel1) == 0: 100 | currentLabels.remove(label1) 101 | continue 102 | if len(dataLabel2) == 0: 103 | currentLabels.remove(label2) 104 | continue 105 | 106 | shuffledData1 += [dataLabel1[0]] 107 | shuffledData2 += [dataLabel2[0]] 108 | labelsData1 += [label1] 109 | labelsData2 += [label2] 110 | 111 | del labelsToData[label1][0] 112 | del labelsToData[label2][0] 113 | 114 | shuffledData1 = np.vstack(shuffledData1) 115 | shuffledData2 = np.vstack(shuffledData2) 116 | 117 | labelsData1 = np.hstack(labelsData1) 118 | labelsData2 = np.hstack(labelsData2) 119 | 120 | 121 | """ SND method""" 122 | # remaing = list(shuffling) 123 | # remaininLabels = list(labelsShuffling) 124 | 125 | # print "shuffling size" 126 | # print len(shuffling) 127 | 128 | # shuffledData1 = [] 129 | # shuffledData2 = [] 130 | # labelsData1 = [] 131 | # labelsData2 = [] 132 | 133 | # for label in labels: 134 | # print "label" 135 | # print label 136 | 137 | # nrRemainingData = len(remaing) 138 | 139 | # if nrRemainingData == 0: 140 | # break 141 | 142 | # labelIndices = np.array(remaininLabels) == label 143 | # concreteIndices = np.arange(nrRemainingData)[labelIndices] 144 | 145 | # # If nothing of this label is left, just continue 146 | # if len(concreteIndices) == 0: 147 | # continue 148 | 149 | # otherIndices = np.arange(nrRemainingData)[np.invert(labelIndices)] 150 | 151 | # if len(otherIndices) == 0: 152 | # continue 153 | 154 | # indicesToTake = min(len(concreteIndices), len(otherIndices)) 155 | 156 | # otherIndices = np.random.choice(otherIndices, indicesToTake, replace=False) 157 | 158 | 159 | # # concreteData = np.array(remaing)[concreteIndices] 160 | 161 | # shuffledData1 += [np.array(remaing)[concreteIndices]] 162 | # labelsData1 += [np.array(remaininLabels)[concreteIndices]] 163 | 164 | # shuffledData2 += [np.array(remaing)[otherIndices]] 165 | # labelsData2 += [np.array(remaininLabels)[otherIndices]] 166 | 167 | # indicesToRemove = np.hstack((otherIndices, concreteIndices)) 168 | # remaing = [v for i, v in enumerate(remaing) if i not in indicesToRemove] 169 | # remaininLabels = [v for i, v in enumerate(remaininLabels) if i not in indicesToRemove] 170 | 171 | # assert len(remaing) == len(remaininLabels) 172 | 173 | # shuffledData1 = np.vstack(shuffledData1) 174 | # shuffledData2 = np.vstack(shuffledData2) 175 | 176 | # labelsData1 = np.hstack(labelsData1) 177 | # labelsData2 = np.hstack(labelsData2) 178 | 179 | # print shuffledData1.shape 180 | # print shuffledData2.shape 181 | 182 | # assert len(shuffledData1) == len(shuffledData2) 183 | # assert len(labelsData1) == len(labelsData2) 184 | 185 | # assert len(shuffledData1) <= len(shuffling) / 2 186 | 187 | # """ STOP NEW METHOD """ 188 | # shuffling, labelsShuffling = shuffle(shuffling, labelsShuffling) 189 | 190 | # shuffledData1 = shuffling[0: len(shuffling) / 2] 191 | # shuffledData2 = shuffling[len(shuffling)/2 :] 192 | 193 | # labelsData1 = labelsShuffling[0: len(shuffling) /2] 194 | # labelsData2 = labelsShuffling[len(shuffling)/2:] 195 | 196 | return shuffledData1, shuffledData2, labelsData1, labelsData2 197 | 198 | # TODO: I think this can be written easier with the code similar to the Emotions one 199 | # you can create more tuples than just one per image 200 | # you can put each image in 5 tuples and that will probably owrk better 201 | # it might be useful to also give the same image twice 202 | def splitDataMultiPIESubject(imgsPerLabel=None, instanceToPairRatio=1, equalize=False): 203 | subjectsToImgs = readMultiPIESubjects(equalize) 204 | 205 | data1, data2, subjects1, subjects2, shuffling, subjectsShuffling =\ 206 | splitDataInPairsWithLabels(subjectsToImgs, imgsPerLabel, 207 | None, instanceToPairRatio=instanceToPairRatio) 208 | 209 | trainData1, testData1, trainData2, testData2, trainSubjects1, testSubjects1,\ 210 | trainSubjects2, testSubjects2 = splitTrainTest(data1, data2, subjects1, subjects2, 5) 211 | 212 | print "trainData1.shape" 213 | print trainData1.shape 214 | 215 | shuffledData1, shuffledData2, subjectsData1, subjectsData2 = splitShuffling(shuffling, subjectsShuffling) 216 | 217 | print len(shuffledData1) 218 | print len(shuffledData2) 219 | 220 | trainShuffedData1, testShuffedData1, trainShuffedData2, testShuffedData2,\ 221 | trainShuffledSubjects1, testShuffledSubjects1, trainShuffledSubjects2, testShuffledSubjects2 =\ 222 | splitTrainTest(shuffledData1, shuffledData2, 223 | subjectsData1, subjectsData2, 5) 224 | 225 | trainData1 = np.vstack((trainData1, trainShuffedData1)) 226 | 227 | print "trainData1.shape" 228 | print trainData1.shape 229 | 230 | trainData2 = np.vstack((trainData2, trainShuffedData2)) 231 | 232 | testData1 = np.vstack((testData1, testShuffedData1)) 233 | testData2 = np.vstack((testData2, testShuffedData2)) 234 | 235 | trainSubjects1 = np.hstack((trainSubjects1, trainShuffledSubjects1)) 236 | testSubjects1 = np.hstack((testSubjects1, testShuffledSubjects1)) 237 | 238 | trainSubjects2 = np.hstack((trainSubjects2, trainShuffledSubjects2)) 239 | testSubjects2 = np.hstack((testSubjects2, testShuffledSubjects2)) 240 | 241 | assert len(subjects1) == len(subjects2) 242 | assert len(trainSubjects1) == len(trainSubjects1) 243 | assert len(testSubjects1) == len(testSubjects2) 244 | 245 | similaritiesTrain = similarityDifferentLabels(trainSubjects1, trainSubjects2) 246 | similaritiesTest = similarityDifferentLabels(testSubjects1, testSubjects2) 247 | 248 | print "trainSubjects1.shape" 249 | print trainSubjects1.shape 250 | 251 | print "similaritiesTrain.shape" 252 | print similaritiesTrain.shape 253 | # print similaritiesTrain 254 | 255 | assert len(trainData1) == len(trainData2) 256 | assert len(testData1) == len(testData2) 257 | 258 | trainData1, trainData2, similaritiesTrain = shuffle3(trainData1, trainData2, similaritiesTrain) 259 | testData1, testData2, similaritiesTest = shuffle3(testData1, testData2, similaritiesTest) 260 | 261 | return trainData1, trainData2, testData1, testData2, similaritiesTrain, similaritiesTest 262 | 263 | 264 | """ 265 | instanceToPairRatio: the number of pairs a data instance needs to be in. 266 | now the half becomes completely random. For each data instance you can randomly 267 | choose k without it(or do not bother to check) from the ones with the same label 268 | you can then randomly choose from the ones without a labels 269 | but do it so that each instance is chosen once. 270 | 271 | ie for each one you choose anohter one so then you get 2 instances in total. 272 | """ 273 | def splitDataInPairsWithLabels(labelsToImages, imgsPerLabel, labelsToTake=None, instanceToPairRatio=1): 274 | data1 = [] 275 | data2 = [] 276 | 277 | shuffling = [] 278 | labelsShuffling = [] 279 | labels1 = [] 280 | labels2 = [] 281 | 282 | for label, images in labelsToImages.iteritems(): 283 | if labelsToTake is not None and label not in labelsToTake: 284 | print "skipping subject" 285 | continue 286 | 287 | # The database might contain the labels in similar 288 | # poses, and illumination conditions, so shuffle before 289 | if imgsPerLabel is not None: 290 | images = images[:imgsPerLabel] 291 | 292 | for i in xrange(instanceToPairRatio): 293 | # Each time get a new shuffle of the data 294 | np.random.shuffle(images) 295 | 296 | delta = len(images) / 4 + label % 2 297 | last2Index = 2 * delta 298 | data1 += images[0: delta] 299 | data2 += images[delta: last2Index] 300 | 301 | labels1 += [label] * delta 302 | labels2 += [label] * delta 303 | 304 | imagesForShuffling = images[last2Index : ] 305 | shuffling += imagesForShuffling 306 | labelsShuffling += [label] * len(imagesForShuffling) 307 | 308 | print "len(labelsShuffling)" 309 | print len(labelsShuffling) 310 | 311 | print "shuffling" 312 | print len(shuffling) 313 | 314 | assert len(shuffling) == len(labelsShuffling) 315 | shuffling, labelsShuffling = shuffleList(shuffling, labelsShuffling) 316 | 317 | print len(data1) 318 | print len(data2) 319 | assert len(data1) == len(data2) 320 | 321 | data1 = np.array(data1) 322 | data2 = np.array(data2) 323 | labels1 = np.array(labels1) 324 | labels2 = np.array(labels2) 325 | shuffling = np.array(shuffling) 326 | labelsShuffling = np.array(labelsShuffling) 327 | 328 | return data1, data2, labels1, labels2, shuffling, labelsShuffling 329 | 330 | def splitDataAccordingToLabels(labelsToImages, labelsToTake, imgsPerLabel=None, instanceToPairRatio=1): 331 | data1, data2, labels1, labels2, shuffling, labelsShuffling =\ 332 | splitDataInPairsWithLabels(labelsToImages, imgsPerLabel, 333 | labelsToTake=labelsToTake, 334 | instanceToPairRatio=instanceToPairRatio) 335 | 336 | shuffledData1, shuffledData2, labelsData1, labelsData2 = splitShuffling(shuffling, labelsShuffling) 337 | 338 | data1 = np.vstack((data1, shuffledData1)) 339 | data2 = np.vstack((data2, shuffledData2)) 340 | 341 | labels1 = np.hstack((labels1, labelsData1)) 342 | labels2 = np.hstack((labels2, labelsData2)) 343 | 344 | return data1, data2, labels1, labels2 345 | 346 | def similarityDifferentLabels(labels1, labels2): 347 | assert len(labels1) == len(labels2) 348 | return labels1 == labels2 349 | 350 | def splitSimilarityYale(instanceToPairRatio, equalize): 351 | subjectsToImgs = readCroppedYaleSubjects(equalize) 352 | 353 | # Get all subjects 354 | data1, data2, subjects1, subjects2 = splitDataAccordingToLabels(subjectsToImgs, 355 | None, imgsPerLabel=None, 356 | instanceToPairRatio=instanceToPairRatio) 357 | 358 | return data1, data2, similarityDifferentLabels(subjects1, subjects2) 359 | 360 | 361 | def splitSimilaritiesPIE(instanceToPairRatio, equalize): 362 | emotionToImages = readMultiPIEEmotions(equalize) 363 | # Get all emotions 364 | data1, data2, emotions1, emotions2 = splitDataAccordingToLabels(emotionToImages, 365 | None, imgsPerLabel=None, 366 | instanceToPairRatio=instanceToPairRatio) 367 | 368 | labels = similarityDifferentLabels(emotions1, emotions2) 369 | 370 | data1, data2, labels = shuffle3(data1, data2, labels) 371 | 372 | return data1, data2, labels 373 | 374 | def splitSimilaritiesPIEEmotions(instanceToPairRatio, equalize): 375 | data1, data2, labels = splitSimilaritiesPIE(instanceToPairRatio, equalize) 376 | 377 | kf = cross_validation.KFold(n=len(data1), n_folds=5) 378 | for train, test in kf: 379 | break 380 | 381 | return (data1[train], data2[train], labels[train], 382 | data1[test], data2[test], labels[test]) 383 | 384 | 385 | def splitEmotionsMultiPieKeepSubjects(instanceToPairRatio, equalize): 386 | subjectToEmotions = readMultiPIEEmotionsPerSubject(equalize) 387 | 388 | totalData1 = [] 389 | totalData2 = [] 390 | totalLabels1 = [] 391 | totalLabels2 = [] 392 | for subject, emotionToImages in enumerate(subjectToEmotions): 393 | data1, data2, labels1, labels2 = splitDataAccordingToLabels(emotionToImages, None, None, instanceToPairRatio) 394 | totalData1 += [data1] 395 | totalData2 += [data2] 396 | totalLabels1 += [labels1] 397 | totalLabels2 += [labels2] 398 | 399 | totalData1 = np.vstack(totalData1) 400 | totalData2 = np.vstack(totalData2) 401 | totalLabels1 = np.hstack(totalLabels1) 402 | totalLabels2 = np.hstack(totalLabels2) 403 | return totalData1, totalData2, totalLabels1, totalLabels2 404 | 405 | # Do not add the similarity code her because 406 | # I will use this for both emotion difference and similarity 407 | # you can just change this due to 408 | def splitEmotionsMultiPieKeepSubjectsTestTrain(instanceToPairRatio, equalize): 409 | 410 | totalData1, totalData2, totalLabels1, totalLabels2 =\ 411 | splitEmotionsMultiPieKeepSubjects(instanceToPairRatio, equalize) 412 | 413 | kf = cross_validation.KFold(n=len(totalData1), n_folds=5) 414 | for train, test in kf: 415 | break 416 | 417 | totalData1, totalData2, totalLabels1, totalLabels2 = shuffle(totalData1, 418 | totalData2, totalLabels1, totalLabels2) 419 | 420 | labels = similarityDifferentLabels(totalLabels1, totalLabels2) 421 | 422 | return (totalData1[train], totalData2[train], labels[train], 423 | totalData1[test], totalData2[test], labels[test]) 424 | 425 | 426 | 427 | # TODO: you can also make a simpler version in which you build 428 | # different models for different emotions testing 429 | # However in that case you can get slightly different results due to 430 | # the model differences 431 | def splitForSimilaritySameSubjectsDifferentEmotions(equalize, emotions, perSubject): 432 | 433 | # For now only do these types of comparisons for 3 emotions 434 | assert len(emotions) == 3 435 | subjectToEmotions = readMultiPIEEmotionsPerSubject(equalize) 436 | 437 | subjectToEmotionsTest = [] 438 | subjectsToImgsTrain = {} 439 | for subject, emotionToImages in enumerate(subjectToEmotions): 440 | subjectsToImgsTrain[subject] = [] 441 | emotionToTest = {} 442 | 443 | for emotion, images in emotionToImages.iteritems(): 444 | images = np.array(images) 445 | np.random.shuffle(images) 446 | # Split the images: training and testing 447 | # For the testing images we do not have more requirements than 448 | # the usual subject testing 449 | if emotion not in emotions: 450 | subjectsToImgsTrain[subject] += list(images) 451 | else: 452 | # take a multiple of 5 of images for this emotion 453 | # we will use them to create the testing data 454 | testImages = images[0: 5 * perSubject] 455 | trainImages = images[5 * perSubject: ] 456 | subjectsToImgsTrain[subject] += list(trainImages) 457 | emotionToTest[emotion] = list(testImages) 458 | 459 | subjectToEmotionsTest += [emotionToTest] 460 | 461 | data1, data2, labels1, labels2 = splitDataAccordingToLabels(subjectsToImgsTrain, 462 | None, imgsPerLabel=None, instanceToPairRatio=2) 463 | 464 | similaritiesTrain = similarityDifferentLabels(labels1, labels2) 465 | 466 | testData1, testData2, groups = makeTestGroupsDifferentSubjects(subjectToEmotionsTest) 467 | 468 | return data1, data2, similaritiesTrain, testData1, testData2, groups 469 | 470 | 471 | def makeTestGroups(subjectToEmotionsTest): 472 | totalData1 = [] 473 | totalData2 = [] 474 | totalLabels = [] 475 | for subject, emotionToImages in enumerate(subjectToEmotionsTest): 476 | data1, data2, labels1, labels2 = splitDataAccordingToLabels(emotionToImages, None, None, instanceToPairRatio=2) 477 | combinedLabel = zip(labels1, labels2) 478 | totalData1 += [data1] 479 | totalData2 += [data2] 480 | totalLabels += [combinedLabel] 481 | 482 | totalData1 = np.vstack(totalData1) 483 | totalData2 = np.vstack(totalData2) 484 | totalLabels = list(itertools.chain(*totalLabels)) 485 | 486 | totalLabels = np.array(totalLabels) 487 | return totalData1, totalData2, totalLabels 488 | 489 | 490 | 491 | def makeTestGroupsDifferentSubjects(subjectToEmotionsTest): 492 | # splitShuffling is what I need for the subjects 493 | totalData1 = [] 494 | totalData2 = [] 495 | totalLabels = [] 496 | 497 | 498 | # flatten out to two arrays: one for subjects and one for (emotion, image) tuples 499 | subjects = [] 500 | emotions = [] 501 | images = [] 502 | for subject, emotionToImages in enumerate(subjectToEmotionsTest): 503 | for emotion, imagesemotion in emotionToImages.iteritems(): 504 | subjects += [subject] 505 | emotions += [emotion] 506 | images += imagesemotion 507 | 508 | subjects = np.array(subjects) 509 | images = np.array(images) 510 | emotions = np.array(emotions) 511 | 512 | indices = np.arange(len(subjects)) 513 | indices1, indices2, _, _ = splitShuffling(indices, subjects) 514 | totalData1 = images[indices1] 515 | totalData2 = images[indices2] 516 | 517 | emotions1 = emotions[indices1] 518 | emotions2 = emotions[indices2] 519 | 520 | totalLabels = [(emotions1[i], emotions2[i]) for i in xrange(len(emotions1))] 521 | totalLabels = np.array(totalLabels) 522 | 523 | totalLabels = np.squeeze(totalLabels) 524 | print totalLabels.shape 525 | 526 | # for i in xrange(0, len(totalData1), 10): 527 | # plt.imshow(totalData1[i].reshape((40, 30)), cmap=plt.cm.gray) 528 | # plt.show() 529 | 530 | # plt.imshow(totalData2[i].reshape((40, 30)), cmap=plt.cm.gray) 531 | # plt.show() 532 | 533 | # print totalLabels[i] 534 | 535 | totalData1 = np.squeeze(totalData1) 536 | totalData2 = np.squeeze(totalData2) 537 | print totalData1.shape 538 | print totalData2.shape 539 | 540 | return totalData1, totalData2, totalLabels 541 | 542 | 543 | def testShuffling(): 544 | shuffling = [1,2,3, 4] 545 | labelsShuffling = [1,2,3, 4] 546 | a, b, c, d = splitShuffling(shuffling, labelsShuffling) 547 | assert not c[0] == d[0] 548 | assert not c[1] == d[1] 549 | assert sorted(list(a) + list(b)) == [1,2,3,4] 550 | 551 | shuffling = [1,2,3] 552 | labelsShuffling = [1,2,3] 553 | a, b, c, d = splitShuffling(shuffling, labelsShuffling) 554 | assert not c[0] == d[0] 555 | assert not a[0] == b[0] 556 | 557 | shuffling = [1,2,4,5] 558 | labelsShuffling = [1,2,1,2] 559 | a, b, c, d = splitShuffling(shuffling, labelsShuffling) 560 | assert not c[0] == d[0] 561 | assert not c[1] == d[1] 562 | 563 | print "a" 564 | print a 565 | print "b" 566 | print b 567 | print "c" 568 | print c 569 | print "d" 570 | print d 571 | 572 | # fst = sorted(list(a[0])) 573 | # snd = sorted(list(b[0])) 574 | 575 | # assert fst == [1,4] or fst == [2,5], fst 576 | # assert snd == [2,5] or snd == [1,4], snd 577 | 578 | # if fst == [1,4]: 579 | # assert c[0] == 1 580 | # else: 581 | # assert c[0] == 2 582 | 583 | shuffling = [ np.array([1,1]), np.array([2,2]), np.array([4,4]), np.array([5,5]), 584 | np.array([6, 6]), np.array([7, 7]) ] 585 | labelsShuffling = [1, 2, 3, 2, 1, 3] 586 | a, b, c, d = splitShuffling(shuffling, labelsShuffling) 587 | assert not c[0] == d[0] 588 | assert not c[1] == d[1] 589 | assert not c[2] == d[2] 590 | 591 | print zip(a, c) 592 | print zip(b, d) 593 | 594 | if __name__ == '__main__': 595 | # print shuffleList([1,2], [3,4]) 596 | print len(shuffle([1,2], [3,4])) 597 | testShuffling() 598 | -------------------------------------------------------------------------------- /code/spearmint-configs/adverserial-training/adversarial.py: -------------------------------------------------------------------------------- 1 | """Spearmint for the DBN module in pydeeplearn.""" 2 | 3 | __author__ = "Mihaela Rosca" 4 | __contact__ = "mihaela.c.rosca@gmail.com" 5 | 6 | from lib import deepbelief as db 7 | from lib.common import * 8 | from lib.activationfunctions import * 9 | 10 | from read import readmnist 11 | 12 | 13 | PATH = "/data/mcr10/pydeeplearn/MNIST" 14 | TRAIN = 10000 15 | TEST = 1000 16 | 17 | def trainDBN(unsupervisedLearningRate, 18 | supervisedLearningRate, 19 | visibleDropout, 20 | hiddenDropout, 21 | miniBatchSize, 22 | momentumMax, 23 | maxEpochs): 24 | print 'in trainDBN' 25 | trainVectors, trainLabels =\ 26 | readmnist.read(0, TRAIN, digits=None, bTrain=True, path=PATH) 27 | 28 | testVectors, testLabels =\ 29 | readmnist.read(TRAIN, TRAIN + TEST, 30 | digits=None, bTrain=True, path=PATH) 31 | 32 | trainVectors, trainLabels = shuffle(trainVectors, trainLabels) 33 | print 'done reading' 34 | trainVectors = np.array(trainVectors, dtype='float') 35 | trainingScaledVectors = scale(trainVectors) 36 | 37 | testVectors = np.array(testVectors, dtype='float') 38 | testingScaledVectors = scale(testVectors) 39 | 40 | trainVectorLabels = labelsToVectors(trainLabels, 10) 41 | print 'done scaling data' 42 | print 'creating DBN' 43 | net = db.DBN(5, [784, 1000, 1000, 1000, 10], 44 | binary=False, 45 | unsupervisedLearningRate=unsupervisedLearningRate, 46 | supervisedLearningRate=supervisedLearningRate, 47 | momentumMax=momentumMax, 48 | nesterovMomentum=True, 49 | rbmNesterovMomentum=True, 50 | activationFunction=Rectified(), 51 | rbmActivationFunctionVisible=Identity(), 52 | rbmActivationFunctionHidden=RectifiedNoisy(), 53 | rmsprop=True, 54 | visibleDropout=visibleDropout, 55 | hiddenDropout=hiddenDropout, 56 | weightDecayL1=0, 57 | weightDecayL2=0, 58 | rbmHiddenDropout=1.0, 59 | rbmVisibleDropout=1.0, 60 | miniBatchSize=miniBatchSize, 61 | adversarial_training=True, 62 | # TODO: make this a learned param 63 | preTrainEpochs=100, 64 | sparsityConstraintRbm=False, 65 | sparsityTragetRbm=0.01, 66 | sparsityRegularizationRbm=None) 67 | 68 | net.train(trainingScaledVectors, trainVectorLabels, 69 | maxEpochs=200, validation=False) 70 | 71 | proabilities, predicted = net.classify(testingScaledVectors) 72 | error = getClassificationError(predicted, testLabels) 73 | print "error", error 74 | return error 75 | 76 | 77 | # Write a function like this called 'main' 78 | def main(job_id, params): 79 | print 'params', params 80 | return trainDBN(unsupervisedLearningRate=params['unsupervisedLearningRate'][0], 81 | supervisedLearningRate=params['supervisedLearningRate'][0], 82 | visibleDropout=params['visibleDropout'][0], 83 | hiddenDropout=params['hiddenDropout'][0], 84 | miniBatchSize=params['miniBatchSize'][0], 85 | momentumMax=params['momentumMax'][0]) 86 | 87 | -------------------------------------------------------------------------------- /code/spearmint-configs/adverserial-training/config.pb: -------------------------------------------------------------------------------- 1 | language: PYTHON 2 | name: "adversarial" 3 | 4 | variable { 5 | name: "supervisedLearningRate" 6 | type: FLOAT 7 | size: 1 8 | min: 0.00001 9 | max: 1.0 10 | } 11 | 12 | variable { 13 | name: "unsupervisedLearningRate" 14 | type: FLOAT 15 | size: 1 16 | min: 0.00001 17 | max: 1 18 | } 19 | 20 | variable { 21 | name: "momentumMax" 22 | type: FLOAT 23 | size: 1 24 | min: 0.5 25 | max: 1.0 26 | } 27 | 28 | variable { 29 | name: "hiddenDropout" 30 | type: FLOAT 31 | size: 1 32 | min: 0.5 33 | max: 1.0 34 | } 35 | 36 | variable { 37 | name: "visibleDropout" 38 | type: FLOAT 39 | size: 1 40 | min: 0.5 41 | max: 1.0 42 | } 43 | 44 | variable { 45 | name: "miniBatchSize" 46 | type: INT 47 | size: 1 48 | min: 10 49 | max: 1000 50 | } 51 | -------------------------------------------------------------------------------- /code/spearmint-configs/dbnmnist/config.pb: -------------------------------------------------------------------------------- 1 | language: PYTHON 2 | name: "mnistdbn" 3 | 4 | variable { 5 | name: "supervisedLearningRate" 6 | type: FLOAT 7 | size: 1 8 | min: 0.00001 9 | max: 1.0 10 | } 11 | 12 | variable { 13 | name: "unsupervisedLearningRate" 14 | type: FLOAT 15 | size: 1 16 | min: 0.00001 17 | max: 1 18 | } 19 | 20 | variable { 21 | name: "momentumMax" 22 | type: FLOAT 23 | size: 1 24 | min: 0.5 25 | max: 1.0 26 | } 27 | 28 | variable { 29 | name: "hiddenDropout" 30 | type: FLOAT 31 | size: 1 32 | min: 0.5 33 | max: 1.0 34 | } 35 | 36 | variable { 37 | name: "visibleDropout" 38 | type: FLOAT 39 | size: 1 40 | min: 0.5 41 | max: 1.0 42 | } 43 | 44 | variable { 45 | name: "miniBatchSize" 46 | type: INT 47 | size: 1 48 | min: 10 49 | max: 1000 50 | } 51 | 52 | variable { 53 | name: "maxEpochs" 54 | type: INT 55 | size: 1 56 | min: 100 57 | max: 1000 58 | } 59 | 60 | # TODO(mihaelacr): add the number of pretraining epochs 61 | # variable { 62 | # name: "preTrainEpochs" 63 | # type: INT 64 | # size: 1 65 | # min: 10 66 | # max: 1000 67 | # } 68 | 69 | -------------------------------------------------------------------------------- /code/spearmint-configs/dbnmnist/mnistdbn.py: -------------------------------------------------------------------------------- 1 | """Spearmint for the DBN module in pydeeplearn.""" 2 | 3 | __author__ = "Mihaela Rosca" 4 | __contact__ = "mihaela.c.rosca@gmail.com" 5 | 6 | from lib import deepbelief as db 7 | from lib.common import * 8 | from lib.activationfunctions import * 9 | 10 | from read import readmnist 11 | 12 | 13 | PATH = "/data/mcr10/project/pydeeplearn/code/MNIST" 14 | TRAIN = 10000 15 | TEST = 1000 16 | 17 | def trainDBN(unsupervisedLearningRate, 18 | supervisedLearningRate, 19 | visibleDropout, 20 | hiddenDropout, 21 | miniBatchSize, 22 | momentumMax, 23 | maxEpochs): 24 | trainVectors, trainLabels =\ 25 | readmnist.read(0, TRAIN, digits=None, bTrain=True, path=PATH) 26 | 27 | testVectors, testLabels =\ 28 | readmnist.read(TRAIN, TRAIN + TEST, 29 | digits=None, bTrain=True, path=PATH) 30 | 31 | trainVectors, trainLabels = shuffle(trainVectors, trainLabels) 32 | 33 | trainVectors = np.array(trainVectors, dtype='float') 34 | trainingScaledVectors = scale(trainVectors) 35 | 36 | testVectors = np.array(testVectors, dtype='float') 37 | testingScaledVectors = scale(testVectors) 38 | 39 | trainVectorLabels = labelsToVectors(trainLabels, 10) 40 | 41 | net = db.DBN(5, [784, 1000, 1000, 1000, 10], 42 | binary=False, 43 | unsupervisedLearningRate=unsupervisedLearningRate, 44 | supervisedLearningRate=supervisedLearningRate, 45 | momentumMax=momentumMax, 46 | nesterovMomentum=True, 47 | rbmNesterovMomentum=True, 48 | activationFunction=Rectified(), 49 | rbmActivationFunctionVisible=Identity(), 50 | rbmActivationFunctionHidden=RectifiedNoisy(), 51 | rmsprop=True, 52 | adversarial_training=False, 53 | visibleDropout=visibleDropout, 54 | hiddenDropout=hiddenDropout, 55 | weightDecayL1=0, 56 | weightDecayL2=0, 57 | rbmHiddenDropout=1.0, 58 | rbmVisibleDropout=1.0, 59 | miniBatchSize=miniBatchSize, 60 | # TODO: make this a learned param 61 | preTrainEpochs=100, 62 | sparsityConstraintRbm=False, 63 | sparsityTragetRbm=0.01, 64 | sparsityRegularizationRbm=None) 65 | 66 | net.train(trainingScaledVectors, trainVectorLabels, 67 | maxEpochs=maxEpochs, validation=False) 68 | 69 | proabilities, predicted = net.classify(testingScaledVectors) 70 | error = getClassificationError(predicted, testLabels) 71 | print "error", error 72 | return error 73 | 74 | 75 | # Write a function like this called 'main' 76 | def main(job_id, params): 77 | print 'params', params 78 | return trainDBN(unsupervisedLearningRate=params['unsupervisedLearningRate'][0], 79 | supervisedLearningRate=params['supervisedLearningRate'][0], 80 | visibleDropout=params['visibleDropout'][0], 81 | hiddenDropout=params['hiddenDropout'][0], 82 | miniBatchSize=params['miniBatchSize'][0], 83 | momentumMax=params['momentumMax'][0], 84 | maxEpochs=params['maxEpochs'][0]) 85 | 86 | -------------------------------------------------------------------------------- /code/tests.py: -------------------------------------------------------------------------------- 1 | __author__ = "Mihaela Rosca" 2 | __contact__ = "mihaela.c.rosca@gmail.com" 3 | 4 | import cPickle as pickle 5 | from sklearn import cross_validation 6 | import argparse 7 | 8 | 9 | import numpy as np 10 | 11 | from lib import deepbelief as db 12 | from lib import restrictedBoltzmannMachine as rbm 13 | 14 | from lib.common import * 15 | from read.readfacedatabases import * 16 | from lib.activationfunctions import * 17 | 18 | parser = argparse.ArgumentParser(description='tests') 19 | parser.add_argument('netFile', help="file where the serialized network should be saved") 20 | parser.add_argument('--relu', dest='relu',action='store_true', default=False, 21 | help=("if true, trains the RBM or DBN with a rectified linear unit")) 22 | 23 | 24 | # Get the arguments of the program 25 | args = parser.parse_args() 26 | 27 | 28 | def testPicklingDBN(): 29 | data, labels = readKanade(False, None, equalize=False) 30 | 31 | print "data.shape" 32 | print data.shape 33 | print "labels.shape" 34 | print labels.shape 35 | 36 | # Random data for training and testing 37 | kf = cross_validation.KFold(n=len(data), n_folds=5) 38 | for train, test in kf: 39 | break 40 | 41 | if args.relu: 42 | activationFunction = Rectified() 43 | unsupervisedLearningRate = 0.05 44 | supervisedLearningRate = 0.01 45 | momentumMax = 0.95 46 | data = scale(data) 47 | rbmActivationFunctionVisible = Identity() 48 | rbmActivationFunctionHidden = RectifiedNoisy() 49 | 50 | else: 51 | activationFunction = Sigmoid() 52 | rbmActivationFunctionVisible = Sigmoid() 53 | rbmActivationFunctionHidden = Sigmoid() 54 | 55 | unsupervisedLearningRate = 0.5 56 | supervisedLearningRate = 0.1 57 | momentumMax = 0.9 58 | 59 | trainData = data[train] 60 | trainLabels = labels[train] 61 | 62 | # TODO: this might require more thought 63 | net = db.DBN(5, [1200, 1500, 1500, 1500, 7], 64 | binary=1-args.relu, 65 | activationFunction=activationFunction, 66 | rbmActivationFunctionVisible=rbmActivationFunctionVisible, 67 | rbmActivationFunctionHidden=rbmActivationFunctionHidden, 68 | unsupervisedLearningRate=unsupervisedLearningRate, 69 | supervisedLearningRate=supervisedLearningRate, 70 | momentumMax=momentumMax, 71 | nesterovMomentum=True, 72 | rbmNesterovMomentum=True, 73 | rmsprop=True, 74 | miniBatchSize=20, 75 | hiddenDropout=0.5, 76 | visibleDropout=0.8, 77 | rbmVisibleDropout=1.0, 78 | rbmHiddenDropout=1.0, 79 | preTrainEpochs=1) 80 | 81 | net.train(trainData, trainLabels, maxEpochs=10, 82 | validation=False, 83 | unsupervisedData=None, 84 | trainingIndices=train) 85 | 86 | initialDict = net.__dict__ 87 | 88 | 89 | with open(args.netFile, "wb") as f: 90 | pickle.dump(net, f) 91 | 92 | with open(args.netFile, "rb") as f: 93 | net = pickle.load(f) 94 | 95 | afterDict = net.__dict__ 96 | 97 | del initialDict['rbmActivationFunctionHidden'] 98 | del initialDict['rbmActivationFunctionVisible'] 99 | 100 | del afterDict['rbmActivationFunctionHidden'] 101 | del afterDict['rbmActivationFunctionVisible'] 102 | 103 | 104 | for key in initialDict: 105 | assert key in afterDict 106 | if isinstance(initialDict[key], (np.ndarray, np.generic)): 107 | assert np.arrays_equal(initialDict[key], afterDict[key]) 108 | else: 109 | assert initialDict[key] == afterDict[key] 110 | 111 | 112 | 113 | def main(): 114 | testPicklingDBN() 115 | 116 | if __name__ == '__main__': 117 | main() 118 | 119 | 120 | -------------------------------------------------------------------------------- /code/webcam-emotion-recognition/Readme.md: -------------------------------------------------------------------------------- 1 | # Emotion recognition from webcam capture 2 | 3 | This directory contains the code that does emotion recognition from the camera. 4 | 5 | This code was used to obtain the demo [here](http://elarosca.net/video.ogv). 6 | 7 | ## Applications 8 | Apart from the theoretical applications, you can use this functionality to control your computer. For example, you can record certain specific expressions and associate them with a command that you want the computer to exectute. For example, you can lock your screen when you close both your eyes very strongly and open your favourite browser when you make a big grin. For best results you can train the classifier with pictures of yourself with the code provided in this directory, as explained below. 9 | 10 | ## Strategy 11 | The code uses OpenCV to detect faces from the webcam stream and then uses an emotion classifier to detect the emotion. 12 | 13 | ## Preprocessing 14 | The webcam detected images are processed as follows: 15 | * Cropped according to the rectangle suggested by OpenCV as the face 16 | * Takes the image into black and white 17 | * Uses histogram normalization 18 | * Data is scaled to have zero mean and unit variance 19 | 20 | The same preprocessing should be used on the input data when training the emotion classifier. 21 | 22 | ## Main files in this directory 23 | * `webcam-emotions.py`: live emotion recognition from the webcam using an already trained network using `pydeeplearn` 24 | * `webcam-emotions.py`: collect data from the webcam for training a network. 25 | * `train-emotion-net.py`: trains a network with the collected data. 26 | 27 | ## Webcam emotion recognition example run 28 | ``` python webcam-emotions.py --displayWebcam --seeFaces --netFile mihaela-happy-sad.p ``` 29 | 30 | Note: `mihaela-happy-sad.p` is a network trained with pictures of me. It works very well for my face, but might not work for others (I wear glasses and I have long hair, using a network trained with the standard databases did not work well as they do not contain pictures of people wearing glasses and also not a lot of women are present). 31 | 32 | ## Getting training data 33 | 34 | To get best results (and tailored for the person who is using the webcam app), you can use the `webcam-emotions.py` script to record data, as follows: 35 | 36 | ```python webcam-emotions.py --displayWebcam --seeFaces --gather_training_data --recording_emotion sad``` 37 | 38 | ## Training a `pydeeplearn` model 39 | If you have recorded your data as explained above, you can train a `pydeeplearn` model using the following command: 40 | ```python train-emotion-net.py --emotions happy sad --display_example_data --path_to_data . --net_file trained.p``` 41 | 42 | ## Emotion classifier 43 | The emotion classifier I used for the [demo](http://elarosca.net/video.ogv) was a DBN network trained usign `pydeeplearn`. 44 | 45 | There is no need for the user of the code under this directory to use `pydeeplearn` as the emotion classifier. The user can pass in as a flag a pickle file of a model that has a `classify` method: 46 | 47 | ```model.classify(image) ``` 48 | that returns the probabilities obtained from the network as well as the classification label. 49 | 50 | Replacing a `pydeeplearn` classifier with another classifier can be made easier. If you are interested in that, please either send a pull request or create an issue. 51 | 52 | ## Recognition for icons 53 | * Happy emotiocon: Person by Catherine Please from The Noun Project 54 | * Sad emoticon: Sad by Cengiz SARI from The Noun Project 55 | * Surprised emoticon: Surprise designed by Chris McDonnell from the thenounproject.com 56 | -------------------------------------------------------------------------------- /code/webcam-emotion-recognition/emotionrecognition.py: -------------------------------------------------------------------------------- 1 | from skimage.transform import resize 2 | import cv2 3 | import numpy as np 4 | import sys 5 | 6 | import face_detection 7 | 8 | # We need this to import other modules 9 | sys.path.append("..") 10 | from read import readfacedatabases 11 | from lib import common 12 | 13 | SMALL_SIZE = (40, 30) 14 | SQUARE_SIZE = (48, 48) 15 | 16 | 17 | def preprocess(image, faceCoordinates, return_vector=False): 18 | """Preprocess the input image according to the face coordinates detected 19 | by a face recognition engine. 20 | 21 | This method: 22 | * crops the input image, keeping only the face given by faceCoordinates 23 | * transforms the picture into black and white 24 | * equalizes the input image 25 | 26 | If return_vector is True, returns a vector by concatenating the rows of the 27 | processed image. Otherwise, a matrix (2-d numpy array) is returned. 28 | 29 | This method needs to be called both for training and testing. 30 | """ 31 | image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 32 | 33 | # Step 1: crop the the image 34 | cropped = face_detection.cropFace(image, faceCoordinates) 35 | 36 | # Step 2: Resize 37 | resized = np.ascontiguousarray(resize(cropped, SMALL_SIZE)) 38 | 39 | # Step 3: Equalize the image (needs to be done in the same way it has been with the training data) 40 | equalized = readfacedatabases.equalizeFromFloatCLAHE(resized, SMALL_SIZE) 41 | if return_vector: 42 | return equalized 43 | return np.reshape(equalized, SMALL_SIZE) 44 | 45 | def testImage(image, faceCoordinates, emotion_classifier): 46 | """Classifies the emotions in the input image according to the face coordinates 47 | detected by a face detection engine. 48 | 49 | First calls preprocess and then uses the given emotion_classifier to detect 50 | emotions in the processed image. 51 | 52 | """ 53 | testImg = preprocess(image, faceCoordinates, return_vector=True) 54 | 55 | # IMPORTANT: scale the image for it to be testable 56 | test = common.scale(testImg.reshape(1, len(testImg))) 57 | probs, emotion = emotion_classifier.classify(test) 58 | 59 | # classify returns a vector, as it is made to classify multiple test instances 60 | # at the same time. 61 | # We check if emotion is iterable before getting the first element, in case 62 | # someone uses an api in which a vector is not returned. 63 | if hasattr(emotion, '__iter__'): 64 | emotion = emotion[0] 65 | 66 | print "probs" 67 | print probs 68 | print "label" 69 | print emotion 70 | 71 | return emotion 72 | -------------------------------------------------------------------------------- /code/webcam-emotion-recognition/face_detection.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | # Create window for image display 5 | CASCADE_FN = "haarcascade_frontalface_default.xml" 6 | 7 | # The scale used for face recognition. 8 | # It is important as the face recognition algorithm works better on small images 9 | # Also helps with removing faces that are too far away 10 | RESIZE_SCALE = 3 11 | RECTANGE_COLOUR = (117, 30, 104) 12 | BOX_COLOR = (255, 255, 255) 13 | THICKNESS = 2 14 | 15 | 16 | def getFaceCoordinates(image): 17 | """Uses openCV to detect the face present in the input image. 18 | 19 | Returns a list of length 4, with the two corners of the rectangle that define 20 | the position of the face: [x1, y1, x2, y2], where (x1, y1) and (x2, y2) 21 | are the defining corners of the rectangle. 22 | """ 23 | 24 | cascade = cv2.CascadeClassifier(CASCADE_FN) 25 | img_copy = cv2.resize(image, (image.shape[1]/RESIZE_SCALE, 26 | image.shape[0]/RESIZE_SCALE)) 27 | gray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) 28 | gray = cv2.equalizeHist(gray) 29 | rects = cascade.detectMultiScale(gray, 1.2, 3) 30 | 31 | # If there is no face or if we have more than 2 faces return None 32 | # because we do not deal with that yet 33 | if len(rects) != 1: 34 | return None 35 | 36 | r = rects[0] 37 | corners = [r[0], r[1], r[0] + r[2], r[1] + r[3]] 38 | 39 | return map((lambda x: RESIZE_SCALE * x), corners) 40 | 41 | 42 | # Turns an image into a rbg image by replicating the 2d data on each of the axis. 43 | def to_rgb1(im): 44 | w, h = im.shape 45 | ret = np.empty((w, h, 3), dtype=np.uint8) 46 | ret[:, :, 0] = im 47 | ret[:, :, 1] = im 48 | ret[:, :, 2] = im 49 | return ret 50 | 51 | def drawFace(image, faceCoordinates, emotion, emotion_to_text, emotion_to_image=None): 52 | """ Draws the face information (together with emotion information) on the input image. """ 53 | 54 | # Draw the face detection rectangles. 55 | cv2.rectangle(np.asarray(image), 56 | (faceCoordinates[0], faceCoordinates[1]), 57 | (faceCoordinates[2], faceCoordinates[3]), 58 | RECTANGE_COLOUR, 59 | thickness=THICKNESS) 60 | 61 | # Display the emotion on the webcam stream. 62 | if emotion is not None: 63 | cv2.putText(image, 64 | # Get the text associated with this emotion, but 65 | # if we do not have one just display the integer. 66 | emotion_to_text.get(emotion, ""), 67 | (faceCoordinates[0], faceCoordinates[2]), 68 | cv2.FONT_HERSHEY_SIMPLEX, 69 | 2, 70 | BOX_COLOR, 71 | thickness=2) 72 | 73 | # Add a nice smiley to show the classification 74 | if emotion_to_image: 75 | smallImage = emotion_to_image[emotion] 76 | smallImage = cv2.resize(smallImage, (faceCoordinates[0], faceCoordinates[1])) 77 | smallImage = to_rgb1(smallImage) 78 | if smallImage.shape[0] > image.shape[0] or smallImage.shape[1] > image.shape[0]: 79 | return 80 | image[0:0+smallImage.shape[0], 0:0 + smallImage.shape[1]] = smallImage 81 | 82 | 83 | def cropFace(image, faceCoordinates): 84 | return image[faceCoordinates[1]: faceCoordinates[3], 85 | faceCoordinates[0]: faceCoordinates[2]] 86 | -------------------------------------------------------------------------------- /code/webcam-emotion-recognition/icon_39345withoutalpha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mihaelacr/pydeeplearn/48c6df3f3f854195b7f8b830b9f70fac19acdc9a/code/webcam-emotion-recognition/icon_39345withoutalpha.png -------------------------------------------------------------------------------- /code/webcam-emotion-recognition/icon_4895withoutalpha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mihaelacr/pydeeplearn/48c6df3f3f854195b7f8b830b9f70fac19acdc9a/code/webcam-emotion-recognition/icon_4895withoutalpha.png -------------------------------------------------------------------------------- /code/webcam-emotion-recognition/icon_6231withoutalpha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mihaelacr/pydeeplearn/48c6df3f3f854195b7f8b830b9f70fac19acdc9a/code/webcam-emotion-recognition/icon_6231withoutalpha.png -------------------------------------------------------------------------------- /code/webcam-emotion-recognition/ignoreoutput.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # Code from: 4 | # http://stackoverflow.com/questions/11130156/suppress-stdout-stderr-print-from-python-functions 5 | 6 | # Define a context manager to suppress stdout and stderr. 7 | class suppress_stdout_stderr(object): 8 | ''' 9 | A context manager for doing a "deep suppression" of stdout and stderr in 10 | Python, i.e. will suppress all print, even if the print originates in a 11 | compiled C/Fortran sub-function. 12 | This will not suppress raised exceptions, since exceptions are printed 13 | to stderr just before a script exits, and after the context manager has 14 | exited (at least, I think that is why it lets exceptions through). 15 | 16 | ''' 17 | def __init__(self): 18 | # Open a pair of null files 19 | self.null_fds = [os.open(os.devnull,os.O_RDWR) for x in range(2)] 20 | # Save the actual stdout (1) and stderr (2) file descriptors. 21 | self.save_fds = (os.dup(1), os.dup(2)) 22 | 23 | def __enter__(self): 24 | # Assign the null pointers to stdout and stderr. 25 | os.dup2(self.null_fds[0],1) 26 | os.dup2(self.null_fds[1],2) 27 | 28 | def __exit__(self, *_): 29 | # Re-assign the real stdout/stderr back to (1) and (2) 30 | os.dup2(self.save_fds[0],1) 31 | os.dup2(self.save_fds[1],2) 32 | # Close the null files 33 | os.close(self.null_fds[0]) 34 | os.close(self.null_fds[1]) 35 | 36 | 37 | -------------------------------------------------------------------------------- /code/webcam-emotion-recognition/train-emotion-net.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import fnmatch 4 | import cv2 5 | import sys 6 | import random 7 | import matplotlib.pyplot as plt 8 | import cPickle as pickle 9 | import numpy as np 10 | 11 | from sklearn import cross_validation 12 | from sklearn.metrics import confusion_matrix 13 | 14 | 15 | parser = argparse.ArgumentParser(description=("make database")) 16 | parser.add_argument('--net_file', 17 | help="file where the serialized network should be saved", 18 | type=str, 19 | default="trained_net.p") 20 | parser.add_argument('--path_to_data', 21 | help="The path to where the training data is", 22 | type=str, 23 | default="") 24 | parser.add_argument('--emotions', 25 | nargs='+', 26 | help='The emotions to be used to train the network.', 27 | type=str) 28 | parser.add_argument('--display_example_data', 29 | dest='display_example_data', 30 | action='store_true', 31 | default=False, 32 | help=("if true, shows a couple of the training examples.")) 33 | parser.add_argument('--cv', 34 | dest='cv', 35 | action='store_true', 36 | default=False, 37 | help=("if true, runs the cv code to try multiple hyperparameters " 38 | "to find one which does best.")) 39 | args = parser.parse_args() 40 | 41 | sys.path.append("..") 42 | 43 | from lib import activationfunctions 44 | from lib import deepbelief as db 45 | from lib import common 46 | 47 | 48 | 49 | def getFiles(cateogry, show=False): 50 | """Returns all the png files in the subdirectory given by the input argument. 51 | 52 | The images are returns as a list of numpy arrays. 53 | """ 54 | imgs = [] 55 | 56 | extension = "png" 57 | path = os.path.join(args.path_to_data, cateogry) 58 | 59 | print "reading files from", path 60 | 61 | imageFiles = [(os.path.join(dirpath, f), f) 62 | for dirpath, dirnames, files in os.walk(path) 63 | for f in fnmatch.filter(files, '*.' + extension)] 64 | 65 | for fullPath, shortPath in imageFiles: 66 | img = cv2.imread(fullPath, 0) 67 | print img.reshape(-1).shape 68 | imgs += [img.reshape(-1)] 69 | if show: 70 | plt.imshow(img, cmap=plt.cm.gray) 71 | plt.show() 72 | 73 | return imgs 74 | 75 | 76 | def createTrainingSet(show=False): 77 | # Read the unsupervised data. 78 | unsupervised = getFiles("unsupervised") 79 | 80 | labels = [] 81 | data = [] 82 | 83 | for index, emotion in enumerate(args.emotions): 84 | # Read the data for this emotion faces 85 | print 'Reading data for emotion', emotion 86 | data_for_emotion = getFiles(emotion) 87 | data += data_for_emotion 88 | labels += [index] * len(data_for_emotion) 89 | 90 | data = np.array(data) 91 | print "data.shape" 92 | print data.shape 93 | data, labels = common.shuffle(data, labels) 94 | 95 | unsupervised = np.array(unsupervised) 96 | labels = common.labelsToVectors(labels, len(args.emotions)) 97 | 98 | return unsupervised, data, labels 99 | 100 | 101 | def visualizeTrainingData(): 102 | unsupervised, data, labels = createTrainingSet() 103 | 104 | if unsupervised: 105 | print 'show unsupervised data' 106 | for i in xrange(5): 107 | plt.imshow(unsupervised[i].reshape((40,30)), cmap=plt.cm.gray) 108 | plt.show() 109 | 110 | print 'showing supervised data' 111 | for i in xrange(20): 112 | plt.imshow(data[i].reshape((40,30)), cmap=plt.cm.gray) 113 | plt.show() 114 | 115 | 116 | def trainNetWithAllData(): 117 | unsupervisedData, data, labels = createTrainingSet() 118 | 119 | print "data.shape" 120 | print data.shape 121 | print "labels.shape" 122 | print labels.shape 123 | 124 | data = common.scale(data) 125 | unsupervisedData = None 126 | 127 | activationFunction = activationfunctions.Rectified() 128 | rbmActivationFunctionVisible = activationfunctions.Identity() 129 | rbmActivationFunctionHidden = activationfunctions.RectifiedNoisy() 130 | 131 | unsupervisedLearningRate = 0.0001 132 | supervisedLearningRate = 0.001 133 | momentumMax = 0.99 134 | 135 | net = db.DBN(4, [1200, 1500, 1000, len(args.emotions)], 136 | binary=False, 137 | activationFunction=activationFunction, 138 | rbmActivationFunctionVisible=rbmActivationFunctionVisible, 139 | rbmActivationFunctionHidden=rbmActivationFunctionHidden, 140 | unsupervisedLearningRate=unsupervisedLearningRate, 141 | supervisedLearningRate=supervisedLearningRate, 142 | momentumMax=momentumMax, 143 | nesterovMomentum=True, 144 | rbmNesterovMomentum=True, 145 | rmsprop=True, 146 | miniBatchSize=20, 147 | hiddenDropout=0.5, 148 | visibleDropout=0.8, 149 | momentumFactorForLearningRateRBM=False, 150 | firstRBMheuristic=False, 151 | rbmVisibleDropout=1.0, 152 | rbmHiddenDropout=1.0, 153 | preTrainEpochs=10, 154 | sparsityConstraintRbm=False, 155 | sparsityRegularizationRbm=0.001, 156 | sparsityTragetRbm=0.01) 157 | 158 | net.train(data, labels, maxEpochs=200, 159 | validation=False, 160 | unsupervisedData=unsupervisedData) 161 | 162 | with open(args.net_file, "wb") as f: 163 | pickle.dump(net, f) 164 | return net 165 | 166 | 167 | def trainAndTestNet(): 168 | unsupervisedData, data, labels = createTrainingSet() 169 | 170 | print np.unique(np.argmax(labels, axis=1)) 171 | 172 | print "data.shape" 173 | print data.shape 174 | print "labels.shape" 175 | print labels.shape 176 | 177 | # Random data for training and testing 178 | kf = cross_validation.KFold(n=len(data), k=5) 179 | for train, test in kf: 180 | break 181 | 182 | print data 183 | data = common.scale(data) 184 | unsupervisedData = None 185 | 186 | activationFunction = activationfunctions.Rectified() 187 | rbmActivationFunctionVisible = activationfunctions.Identity() 188 | rbmActivationFunctionHidden = activationfunctions.RectifiedNoisy() 189 | 190 | unsupervisedLearningRate = 0.0001 191 | supervisedLearningRate = 0.001 192 | momentumMax = 0.99 193 | 194 | trainData = data[train] 195 | trainLabels = labels[train] 196 | 197 | net = db.DBN(4, [1200, 1500, 1000, len(args.emotions)], 198 | binary=False, 199 | activationFunction=activationFunction, 200 | rbmActivationFunctionVisible=rbmActivationFunctionVisible, 201 | rbmActivationFunctionHidden=rbmActivationFunctionHidden, 202 | unsupervisedLearningRate=unsupervisedLearningRate, 203 | supervisedLearningRate=supervisedLearningRate, 204 | momentumMax=momentumMax, 205 | nesterovMomentum=True, 206 | rbmNesterovMomentum=True, 207 | rmsprop=True, 208 | miniBatchSize=20, 209 | hiddenDropout=0.5, 210 | visibleDropout=0.8, 211 | momentumFactorForLearningRateRBM=False, 212 | firstRBMheuristic=False, 213 | rbmVisibleDropout=1.0, 214 | rbmHiddenDropout=1.0, 215 | preTrainEpochs=10, 216 | sparsityConstraintRbm=False, 217 | sparsityRegularizationRbm=0.001, 218 | sparsityTragetRbm=0.01) 219 | 220 | net.train(trainData, trainLabels, maxEpochs=200, 221 | validation=False, 222 | unsupervisedData=unsupervisedData) 223 | 224 | probs, predicted = net.classify(data[test]) 225 | 226 | actualLabels = labels[test] 227 | correct = 0 228 | errorCases = [] 229 | 230 | for i in xrange(len(test)): 231 | actual = actualLabels[i] 232 | print probs[i] 233 | if predicted[i] == np.argmax(actual): 234 | correct += 1 235 | else: 236 | errorCases.append(i) 237 | 238 | print "correct" 239 | print correct 240 | 241 | print "percentage correct" 242 | print correct * 1.0 / len(test) 243 | 244 | confMatrix = confusion_matrix(np.argmax(actualLabels, axis=1), predicted) 245 | print "confusion matrix" 246 | print confMatrix 247 | 248 | with open(args.net_file, "wb") as f: 249 | pickle.dump(net, f) 250 | return net 251 | 252 | # Performs CV to choose the best hyperparameters given the data. 253 | def getHyperParamsAndBestNet(): 254 | unsupervisedData, data, labels = createTrainingSet() 255 | 256 | print np.unique(np.argmax(labels, axis=1)) 257 | 258 | print "data.shape" 259 | print data.shape 260 | print "labels.shape" 261 | print labels.shape 262 | 263 | print data 264 | data = common.scale(data) 265 | unsupervisedData = None 266 | 267 | activationFunction = activationfunctions.Rectified() 268 | rbmActivationFunctionVisible = activationfunctions.Identity() 269 | rbmActivationFunctionHidden = activationfunctions.RectifiedNoisy() 270 | 271 | tried_params = [] 272 | percentages = [] 273 | best_index = 0 274 | index = 0 275 | best_correct = 0 276 | 277 | # Random data for training and testing 278 | kf = cross_validation.KFold(n=len(data), n_folds=10) 279 | for train, test in kf: 280 | unsupervisedLearningRate = random.uniform(0.0001, 0.2) 281 | supervisedLearningRate = random.uniform(0.0001, 0.2) 282 | momentumMax = random.uniform(0.7, 1) 283 | 284 | tried_params += [{'unsupervisedLearningRate': unsupervisedLearningRate, 285 | 'supervisedLearningRate': supervisedLearningRate, 286 | 'momentumMax': momentumMax}] 287 | 288 | trainData = data[train] 289 | trainLabels = labels[train] 290 | 291 | net = db.DBN(4, [1200, 1500, 1000, len(args.emotions)], 292 | binary=False, 293 | activationFunction=activationFunction, 294 | rbmActivationFunctionVisible=rbmActivationFunctionVisible, 295 | rbmActivationFunctionHidden=rbmActivationFunctionHidden, 296 | unsupervisedLearningRate=unsupervisedLearningRate, 297 | supervisedLearningRate=supervisedLearningRate, 298 | momentumMax=momentumMax, 299 | nesterovMomentum=True, 300 | rbmNesterovMomentum=True, 301 | rmsprop=True, 302 | miniBatchSize=20, 303 | hiddenDropout=0.5, 304 | visibleDropout=0.8, 305 | momentumFactorForLearningRateRBM=False, 306 | firstRBMheuristic=False, 307 | rbmVisibleDropout=1.0, 308 | rbmHiddenDropout=1.0, 309 | preTrainEpochs=10, 310 | sparsityConstraintRbm=False, 311 | sparsityRegularizationRbm=0.001, 312 | sparsityTragetRbm=0.01) 313 | 314 | net.train(trainData, trainLabels, maxEpochs=200, 315 | validation=False, 316 | unsupervisedData=unsupervisedData) 317 | 318 | probs, predicted = net.classify(data[test]) 319 | 320 | actualLabels = labels[test] 321 | correct = 0 322 | 323 | for i in xrange(len(test)): 324 | actual = actualLabels[i] 325 | print probs[i] 326 | if predicted[i] == np.argmax(actual): 327 | correct += 1 328 | 329 | percentage_correct = correct * 1.0 / len(test) 330 | print "percentage correct" 331 | print percentage_correct 332 | 333 | if percentage_correct > best_correct: 334 | best_index = index 335 | best_correct = percentage_correct 336 | with open(args.net_file, "wb") as f: 337 | pickle.dump(net, f) 338 | 339 | percentages += [percentage_correct] 340 | index += 1 341 | 342 | print 'best params' 343 | print tried_params[best_index] 344 | print 'precision' 345 | print best_correct 346 | 347 | 348 | if __name__ == '__main__': 349 | if args.display_example_data: 350 | visualizeTrainingData() 351 | 352 | if args.cv: 353 | getHyperParamsAndBestNet() 354 | else: 355 | trainNetWithAllData() 356 | 357 | -------------------------------------------------------------------------------- /code/webcam-emotion-recognition/webcam-emotions.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import cv2 3 | import os 4 | import scipy 5 | import signal 6 | import sys 7 | import time 8 | 9 | import face_detection 10 | import ignoreoutput 11 | import emotionrecognition 12 | import cPickle as pickle 13 | 14 | # We need this to import other modules 15 | sys.path.append("..") 16 | 17 | from lib import deepbelief 18 | 19 | WINDOW_NAME = "Emotion recognition" 20 | TIME_BETWEEN_FACE_CHECKS = 0.1 21 | 22 | parser = argparse.ArgumentParser(description=("Live emotion recognition from the webcam")) 23 | parser.add_argument('--displayWebcam', action='store_const', const=True, 24 | help="determines if the image from the webcam is displayed") 25 | parser.add_argument('--gather_training_data', action='store_const', const=True, 26 | default=False, 27 | help=("if false, detects emotions by using the trained network " 28 | "given by netFile. If this flag is set to true, this script " 29 | "is used to collect training data. In that case, the " 30 | "recording_emotion flag needs to be set.")) 31 | parser.add_argument("--seeFaces", action='store_const', const=True, 32 | help=("If passed as argument, the webcam image will show the " 33 | "detected faces. Note that this automatically ensures " 34 | "that the camera will be displayed.")) 35 | parser.add_argument('--recording_emotion', 36 | help="the emotion for which to record training data. " 37 | "used only when this script is used for recording " 38 | "training data. Example: happy. The user sitting in front " 39 | "of the webcam should display this emotion while the program " 40 | "is running and recording data.", 41 | type=str, 42 | default="") 43 | parser.add_argument('--emotions', 44 | nargs='+', 45 | help=('The emotions labels that were used to train the network. ' 46 | 'These should be strings to associate to the integer classes ' 47 | 'given by the network. For example, if the network should output ' 48 | '0 when the input face displays a happy emotion and 1 otherwise, ' 49 | 'this map should be \'happy neutral\''), 50 | type=str) 51 | parser.add_argument("--frequency", type=float, default=TIME_BETWEEN_FACE_CHECKS, 52 | help="How often should the camera be queried for a face") 53 | parser.add_argument("--netFile", 54 | help=("pickle file from which to read the network for testing the camera stream." 55 | "Used only if the gather_training_data flag is set to False.")) 56 | 57 | 58 | args = parser.parse_args() 59 | 60 | # Parse the user given arguments 61 | displayCam = args.displayWebcam 62 | frequency = args.frequency 63 | displayFaces = args.seeFaces 64 | 65 | emotion_to_text = {} 66 | 67 | if args.emotions: 68 | for index, emotion in enumerate(args.emotions): 69 | emotion_to_text[index] = emotion 70 | 71 | # Person by Catherine Please from The Noun Project 72 | HAPPY_IMAGE = cv2.imread("icon_4895withoutalpha.png", cv2.IMREAD_GRAYSCALE) 73 | # Sad by Cengiz SARI from The Noun Project 74 | SAD_IMAGE = cv2.imread("icon_39345withoutalpha.png", cv2.IMREAD_GRAYSCALE) 75 | # Surprise designed by Chris McDonnell from the thenounproject.com 76 | SUPRISED_IMAGE = cv2.imread("icon_6231withoutalpha.png", cv2.IMREAD_GRAYSCALE) 77 | 78 | EMOTION_TO_IMAGE = { 79 | 0: HAPPY_IMAGE, 80 | 1: SAD_IMAGE, 81 | 2: SUPRISED_IMAGE 82 | } 83 | 84 | # When user presses Control-C, gracefully exit program 85 | def signal_handler(signal, frame): 86 | print "The emotion recognition program will terminate." 87 | sys.exit(0) 88 | 89 | signal.signal(signal.SIGINT, signal_handler) 90 | 91 | 92 | def getCameraCapture(): 93 | with ignoreoutput.suppress_stdout_stderr(): 94 | # 0 is supposed to detected any webcam connected to the device 95 | capture = cv2.VideoCapture(0) 96 | if not capture: 97 | print "Failed VideoCapture: unable to open device 0" 98 | sys.exit(1) 99 | return capture 100 | 101 | 102 | def showFrame(frame, faceCoordinates, emotion=None, draw=False): 103 | if draw and faceCoordinates: 104 | # Draw emotions here as well 105 | face_detection.drawFace(frame, faceCoordinates, emotion, 106 | emotion_to_text, EMOTION_TO_IMAGE) 107 | 108 | cv2.imshow(WINDOW_NAME, frame) 109 | 110 | 111 | def readNetwork(): 112 | with open(args.netFile, "rb") as f: 113 | net = pickle.load(f) 114 | return net 115 | 116 | 117 | def recogintionWork(image, faceCoordinates, net): 118 | return emotionrecognition.testImage(image, faceCoordinates, net) 119 | 120 | 121 | def saveFaceImage(capture, frequency, display, drawFaces): 122 | img_count = 0 123 | 124 | # Create the directory in which we record the training examples. 125 | if not os.path.exists(args.recording_emotion): 126 | os.makedirs(args.recording_emotion) 127 | 128 | while True: 129 | flag, frame = capture.read() 130 | 131 | if flag: 132 | faceCoordinates = face_detection.getFaceCoordinates(frame) 133 | if faceCoordinates: 134 | image = emotionrecognition.preprocess(frame, faceCoordinates) 135 | # Save the image that will later be used for training. 136 | scipy.misc.imsave(os.path.join(args.recording_emotion, 137 | args.recording_emotion + str(img_count) + '.png'), 138 | image) 139 | 140 | if display: 141 | showFrame(frame, faceCoordinates, None, drawFaces) 142 | img_count = img_count + 1 143 | 144 | time.sleep(frequency) 145 | 146 | 147 | # Draw faces argument is only taken into account if display was set as true. 148 | def detectedAndDisplayFaces(capture, net, display=False, drawFaces=False): 149 | recognition = True 150 | # Flag gives us some information about the capture 151 | # Frame is the webcam frame (a numpy image) 152 | flag, frame = capture.read() 153 | # Not sure if there is an error from the cam if we should lock the screen 154 | if flag: 155 | faceCoordinates = face_detection.getFaceCoordinates(frame) 156 | if faceCoordinates and recognition: 157 | emotion = recogintionWork(frame, faceCoordinates, net) 158 | else: 159 | emotion = None 160 | if display: 161 | showFrame(frame, faceCoordinates, emotion, drawFaces) 162 | if faceCoordinates: 163 | return True 164 | else: 165 | return True 166 | 167 | 168 | def detectEmotions(capture, frequency, display=False, drawFaces=False): 169 | net = readNetwork() 170 | 171 | while True: 172 | detectedAndDisplayFaces(capture, net, display, drawFaces) 173 | time.sleep(frequency) 174 | 175 | 176 | def main(): 177 | global frequency 178 | 179 | if displayFaces: 180 | showCam = True 181 | else: 182 | showCam = displayCam 183 | 184 | capture = getCameraCapture() 185 | 186 | if showCam: 187 | cv2.startWindowThread() 188 | cv2.namedWindow(WINDOW_NAME, cv2.WND_PROP_FULLSCREEN) 189 | cv2.setWindowProperty(WINDOW_NAME, cv2.WND_PROP_FULLSCREEN, cv2.WND_PROP_FULLSCREEN) 190 | 191 | if args.gather_training_data: 192 | print 'Recording training data for emotion:', args.recording_emotion 193 | print 'Please try to display that emotion during the recording.' 194 | saveFaceImage(capture, frequency, showCam, displayFaces) 195 | else: 196 | print 'Detection emotions from net ', args.netFile 197 | detectEmotions(capture, frequency, showCam, displayFaces) 198 | 199 | if __name__ == '__main__': 200 | main() 201 | --------------------------------------------------------------------------------