├── keras ├── __init__.py ├── utils │ ├── __init__.py │ ├── theano_utils.py │ ├── test_utils.py │ ├── np_utils.py │ ├── io_utils.py │ ├── generic_utils.py │ └── layer_utils.py ├── datasets │ ├── __init__.py │ ├── mnist.py │ ├── cifar.py │ ├── cifar100.py │ ├── cifar10.py │ ├── data_utils.py │ ├── imdb.py │ └── reuters.py ├── layers │ ├── __init__.py │ ├── noise.py │ ├── advanced_activations.py │ └── normalization.py ├── wrappers │ └── __init__.py ├── preprocessing │ ├── __init__.py │ ├── sequence.py │ └── text.py ├── activations.py ├── constraints.py ├── objectives.py ├── regularizers.py └── initializations.py ├── tests ├── __init__.py ├── manual │ ├── __init__.py │ ├── check_save_weights.py │ ├── check_model_utils.py │ ├── check_constraints.py │ ├── check_yaml.py │ ├── check_wrappers.py │ ├── check_masked_recurrent.py │ └── check_autoencoder.py └── auto │ ├── test_loss_masking.py │ ├── test_embeddings.py │ ├── test_datasets.py │ ├── keras │ ├── layers │ │ ├── test_recurrent.py │ │ └── test_core.py │ ├── test_constraints.py │ ├── test_activations.py │ └── test_normalization.py │ ├── test_optimizers.py │ ├── test_regularizers.py │ ├── test_loss_weighting.py │ └── test_tasks.py ├── setup.cfg ├── .gitignore ├── docs ├── README.md ├── sources │ ├── layers │ │ ├── containers.md │ │ ├── normalization.md │ │ ├── noise.md │ │ ├── advanced_activations.md │ │ ├── embeddings.md │ │ └── convolutional.md │ ├── constraints.md │ ├── initializations.md │ ├── documentation.md │ ├── objectives.md │ ├── activations.md │ ├── regularizers.md │ ├── preprocessing │ │ ├── image.md │ │ ├── sequence.md │ │ └── text.md │ ├── optimizers.md │ ├── callbacks.md │ ├── datasets.md │ ├── examples.md │ └── index.md └── mkdocs.yml ├── setup.py ├── .travis.yml ├── LICENSE └── examples ├── mnist_mlp.py ├── reuters_mlp.py ├── mnist_cnn.py ├── imdb_lstm.py ├── imdb_cnn.py ├── mnist_irnn.py ├── lstm_text_generation.py ├── kaggle_otto_nn.py ├── cifar10_cnn.py └── addition_rnn.py /keras/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /keras/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /keras/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /keras/layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /keras/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/manual/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /keras/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.pyc 3 | *.swp 4 | temp/* 5 | dist/* 6 | build/* 7 | keras/datasets/data/* 8 | keras/datasets/temp/* 9 | docs/site/* 10 | docs/theme/* 11 | tags 12 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Keras Documentation 2 | 3 | The source for Keras documentation is in this directory under `sources/`. 4 | Our documentation uses extended Markdown, as implemented by [MkDocs](http://mkdocs.org). 5 | 6 | ## Building the documentation 7 | 8 | - install MkDocs: `sudo pip install mkdocs` 9 | - `cd` to the `docs/` folder and run: `mkdocs serve` -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | 5 | setup(name='Keras', 6 | version='0.1.2', 7 | description='Theano-based Deep Learning library', 8 | author='Francois Chollet', 9 | author_email='francois.chollet@gmail.com', 10 | url='https://github.com/fchollet/keras', 11 | download_url='https://github.com/fchollet/keras/tarball/0.1.2', 12 | license='MIT', 13 | install_requires=['theano', 'pyyaml', 'h5py'], 14 | packages=find_packages()) 15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | # Setup anaconda 4 | before_install: 5 | - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh 6 | - chmod +x miniconda.sh 7 | - ./miniconda.sh -b 8 | - export PATH=/home/travis/miniconda/bin:$PATH 9 | - conda update --yes conda 10 | python: 11 | - "3.4" 12 | # command to install dependencies 13 | install: 14 | - conda install --yes python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib pandas pytest h5py 15 | # Coverage packages are on my binstar channel 16 | - python setup.py install 17 | # command to run tests 18 | script: py.test tests/ 19 | -------------------------------------------------------------------------------- /keras/datasets/mnist.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import gzip 3 | from .data_utils import get_file 4 | import six.moves.cPickle 5 | import sys 6 | 7 | 8 | def load_data(path="mnist.pkl.gz"): 9 | path = get_file(path, origin="https://s3.amazonaws.com/img-datasets/mnist.pkl.gz") 10 | 11 | if path.endswith(".gz"): 12 | f = gzip.open(path, 'rb') 13 | else: 14 | f = open(path, 'rb') 15 | 16 | if sys.version_info < (3,): 17 | data = six.moves.cPickle.load(f) 18 | else: 19 | data = six.moves.cPickle.load(f, encoding="bytes") 20 | 21 | f.close() 22 | 23 | return data # (X_train, y_train), (X_test, y_test) 24 | -------------------------------------------------------------------------------- /docs/sources/layers/containers.md: -------------------------------------------------------------------------------- 1 | Containers are ensembles of layers that can be interacted with through the same API as `Layer` objects. 2 | 3 | ## Sequential 4 | 5 | ```python 6 | keras.layers.containers.Sequential(layers=[]) 7 | ``` 8 | 9 | The Sequential container is a linear stack of layers. Apart from the `add` methods and the `layers` constructor argument, the API is identical to that of the `Layer` class. 10 | 11 | This class is also the basis for the `keras.models.Sequential` architecture. 12 | 13 | The `layers` constructor argument is a list of Layer instances. 14 | 15 | __Methods__: 16 | 17 | ```python 18 | add(layer) 19 | ``` 20 | 21 | Add a new layer to the stack. -------------------------------------------------------------------------------- /keras/datasets/cifar.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | import sys 4 | import six.moves.cPickle 5 | from six.moves import range 6 | 7 | def load_batch(fpath, label_key='labels'): 8 | f = open(fpath, 'rb') 9 | if sys.version_info < (3,): 10 | d = six.moves.cPickle.load(f) 11 | else: 12 | d = six.moves.cPickle.load(f, encoding="bytes") 13 | # decode utf8 14 | for k, v in d.items(): 15 | del(d[k]) 16 | d[k.decode("utf8")] = v 17 | f.close() 18 | data = d["data"] 19 | labels = d[label_key] 20 | 21 | data = data.reshape(data.shape[0], 3, 32, 32) 22 | return data, labels 23 | -------------------------------------------------------------------------------- /docs/sources/layers/normalization.md: -------------------------------------------------------------------------------- 1 | 2 | ## BatchNormalization 3 | 4 | ```python 5 | keras.layers.normalization.BatchNormalization(input_shape, epsilon=1e-6, weights=None) 6 | ``` 7 | 8 | Normalize the activations of the previous layer at each batch. 9 | 10 | - __Input shape__: Same as `input_shape`. This layer cannot be used as first layer in a model. 11 | 12 | - __Output shape__: Same as input. 13 | 14 | - __Arguments__: 15 | - __input_shape__: tuple. 16 | - __epsilon__: small float > 0. Fuzz parameter. 17 | - __weights__: Initialization weights. List of 2 numpy arrays, with shapes: `[(input_shape,), (input_shape,)]` 18 | 19 | - __References__: 20 | - [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](http://arxiv.org/pdf/1502.03167v3.pdf) -------------------------------------------------------------------------------- /tests/auto/test_loss_masking.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import unittest 3 | from keras.models import Sequential 4 | from keras.layers.core import TimeDistributedDense, Masking 5 | 6 | 7 | class TestLossMasking(unittest.TestCase): 8 | def test_loss_masking(self): 9 | X = np.array( 10 | [[[1, 1], [2, 1], [3, 1], [5, 5]], 11 | [[1, 5], [5, 0], [0, 0], [0, 0]]], dtype=np.int32) 12 | model = Sequential() 13 | model.add(Masking(mask_value=0)) 14 | model.add(TimeDistributedDense(2, 1, init='one')) 15 | model.compile(loss='mse', optimizer='sgd') 16 | y = model.predict(X) 17 | loss = model.fit(X, 4*y, nb_epoch=1, batch_size=2, verbose=1).history['loss'][0] 18 | assert loss == 282.375 19 | 20 | 21 | if __name__ == '__main__': 22 | print('Test loss masking') 23 | unittest.main() 24 | -------------------------------------------------------------------------------- /docs/sources/constraints.md: -------------------------------------------------------------------------------- 1 | ## Usage of constraints 2 | 3 | Functions from the `constraints` module allow setting constraints (eg. non-negativity) on network parameters during optimization. 4 | 5 | The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D` and `Convolution2D` have a unified API. 6 | 7 | These layers expose 2 keyword arguments: 8 | 9 | - `W_constraint` for the main weights matrix 10 | - `b_constraint` for the bias. 11 | 12 | 13 | ```python 14 | from keras.constraints import maxnorm 15 | model.add(Dense(64, 64, W_constraint = maxnorm(2))) 16 | ``` 17 | 18 | ## Available constraints 19 | 20 | - __maxnorm__(m=2): maximum-norm constraint 21 | - __nonneg__(): non-negativity constraint 22 | - __unitnorm__(): unit-norm constraint, enforces the matrix to have unit norm along the last axis -------------------------------------------------------------------------------- /docs/sources/initializations.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of initializations 3 | 4 | Initializations define the probability distribution used to set the initial random weights of Keras layers. 5 | 6 | The keyword arguments used for passing initializations to layers will depend on the layer. Usually it is simply `init`: 7 | 8 | ```python 9 | model.add(Dense(64, 64, init='uniform')) 10 | ``` 11 | 12 | ## Available initializations 13 | 14 | - __uniform__ 15 | - __lecun_uniform__: Uniform initialization scaled by the square root of the number of inputs (LeCun 98). 16 | - __normal__ 17 | - __identity__: Use with square 2D layers (`shape[0] == shape[1]`). 18 | - __orthogonal__: Use with square 2D layers (`shape[0] == shape[1]`). 19 | - __zero__ 20 | - __glorot_normal__: Gaussian initialization scaled by fan_in + fan_out (Glorot 2010) 21 | - __glorot_uniform__ 22 | - __he_normal__: Gaussian initialization scaled by fan_in (He et al., 2014) 23 | - __he_uniform__ 24 | -------------------------------------------------------------------------------- /docs/sources/documentation.md: -------------------------------------------------------------------------------- 1 | # Keras Documentation Index 2 | 3 | ## Introduction 4 | 5 | - [Home](index.md) 6 | - [Index](documentation.md) 7 | - [Examples](examples.md) 8 | 9 | --- 10 | 11 | ## Base functionality 12 | 13 | - [Optimizers](optimizers.md) 14 | - [Objectives](objectives.md) 15 | - [Models](models.md) 16 | - [Activations](activations.md) 17 | - [Initializations](initializations.md) 18 | - [Regularizers](regularizers.md) 19 | - [Constraints](constraints.md) 20 | - [Callbacks](callbacks.md) 21 | - [Datasets](datasets.md) 22 | 23 | --- 24 | 25 | ## Layers 26 | - [Core](layers/core.md) 27 | - [Convolutional](layers/convolutional.md) 28 | - [Recurrent](layers/recurrent.md) 29 | - [Advanced Activations](layers/advanced_activations.md) 30 | - [Normalization](layers/normalization.md) 31 | - [Embeddings](layers/embeddings.md) 32 | 33 | --- 34 | 35 | ## Preprocessing 36 | - [Sequence](preprocessing/sequence.md) 37 | - [Text](preprocessing/text.md) 38 | - [Image](preprocessing/image.md) 39 | -------------------------------------------------------------------------------- /keras/datasets/cifar100.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .cifar import load_batch 3 | from .data_utils import get_file 4 | import numpy as np 5 | import os 6 | 7 | 8 | def load_data(label_mode='fine'): 9 | if label_mode not in ['fine', 'coarse']: 10 | raise Exception('label_mode must be one of "fine" "coarse".') 11 | 12 | dirname = "cifar-100-python" 13 | origin = "http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" 14 | path = get_file(dirname, origin=origin, untar=True) 15 | 16 | nb_test_samples = 10000 17 | nb_train_samples = 50000 18 | 19 | fpath = os.path.join(path, 'train') 20 | X_train, y_train = load_batch(fpath, label_key=label_mode+'_labels') 21 | 22 | fpath = os.path.join(path, 'test') 23 | X_test, y_test = load_batch(fpath, label_key=label_mode+'_labels') 24 | 25 | y_train = np.reshape(y_train, (len(y_train), 1)) 26 | y_test = np.reshape(y_test, (len(y_test), 1)) 27 | 28 | return (X_train, y_train), (X_test, y_test) 29 | -------------------------------------------------------------------------------- /keras/activations.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import theano.tensor as T 3 | 4 | 5 | def softmax(x): 6 | return T.nnet.softmax(x.reshape((-1, x.shape[-1]))).reshape(x.shape) 7 | 8 | 9 | def time_distributed_softmax(x): 10 | import warnings 11 | warnings.warn("time_distributed_softmax is deprecated. Just use softmax!", DeprecationWarning) 12 | return softmax(x) 13 | 14 | 15 | def softplus(x): 16 | return T.nnet.softplus(x) 17 | 18 | 19 | def relu(x): 20 | return (x + abs(x)) / 2.0 21 | 22 | 23 | def tanh(x): 24 | return T.tanh(x) 25 | 26 | 27 | def sigmoid(x): 28 | return T.nnet.sigmoid(x) 29 | 30 | 31 | def hard_sigmoid(x): 32 | return T.nnet.hard_sigmoid(x) 33 | 34 | 35 | def linear(x): 36 | ''' 37 | The function returns the variable that is passed in, so all types work 38 | ''' 39 | return x 40 | 41 | 42 | from .utils.generic_utils import get_from_module 43 | def get(identifier): 44 | return get_from_module(identifier, globals(), 'activation function') 45 | -------------------------------------------------------------------------------- /keras/datasets/cifar10.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .cifar import load_batch 3 | from .data_utils import get_file 4 | import numpy as np 5 | import os 6 | 7 | 8 | def load_data(): 9 | dirname = "cifar-10-batches-py" 10 | origin = "http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" 11 | path = get_file(dirname, origin=origin, untar=True) 12 | 13 | nb_test_samples = 10000 14 | nb_train_samples = 50000 15 | 16 | X_train = np.zeros((nb_train_samples, 3, 32, 32), dtype="uint8") 17 | y_train = np.zeros((nb_train_samples,), dtype="uint8") 18 | 19 | for i in range(1, 6): 20 | fpath = os.path.join(path, 'data_batch_' + str(i)) 21 | data, labels = load_batch(fpath) 22 | X_train[(i-1)*10000:i*10000, :, :, :] = data 23 | y_train[(i-1)*10000:i*10000] = labels 24 | 25 | fpath = os.path.join(path, 'test_batch') 26 | X_test, y_test = load_batch(fpath) 27 | 28 | y_train = np.reshape(y_train, (len(y_train), 1)) 29 | y_test = np.reshape(y_test, (len(y_test), 1)) 30 | 31 | return (X_train, y_train), (X_test, y_test) 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /keras/utils/theano_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import theano 4 | import theano.tensor as T 5 | 6 | 7 | def floatX(X): 8 | return np.asarray(X, dtype=theano.config.floatX) 9 | 10 | 11 | def sharedX(X, dtype=theano.config.floatX, name=None): 12 | return theano.shared(np.asarray(X, dtype=dtype), name=name) 13 | 14 | 15 | def shared_zeros(shape, dtype=theano.config.floatX, name=None): 16 | return sharedX(np.zeros(shape), dtype=dtype, name=name) 17 | 18 | 19 | def shared_scalar(val=0., dtype=theano.config.floatX, name=None): 20 | return theano.shared(np.cast[dtype](val)) 21 | 22 | 23 | def shared_ones(shape, dtype=theano.config.floatX, name=None): 24 | return sharedX(np.ones(shape), dtype=dtype, name=name) 25 | 26 | 27 | def alloc_zeros_matrix(*dims): 28 | return T.alloc(np.cast[theano.config.floatX](0.), *dims) 29 | 30 | 31 | def ndim_tensor(ndim): 32 | if ndim == 1: 33 | return T.vector() 34 | elif ndim == 2: 35 | return T.matrix() 36 | elif ndim == 3: 37 | return T.tensor3() 38 | elif ndim == 4: 39 | return T.tensor4() 40 | return T.matrix() 41 | -------------------------------------------------------------------------------- /tests/auto/test_embeddings.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from keras.models import Sequential 4 | from keras.layers.core import Merge, Dense, Activation, Flatten 5 | from keras.layers.embeddings import Embedding 6 | from theano import function 7 | from keras.constraints import unitnorm 8 | 9 | 10 | class TestEmbedding(unittest.TestCase): 11 | def setUp(self): 12 | self.X1 = np.array([[1], [2]], dtype='int32') 13 | self.W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]], dtype='float32') 14 | 15 | def test_unitnorm_constraint(self): 16 | lookup = Sequential() 17 | lookup.add(Embedding(3, 2, weights=[self.W1], W_constraint=unitnorm())) 18 | lookup.add(Flatten()) 19 | lookup.add(Dense(2, 1)) 20 | lookup.add(Activation('sigmoid')) 21 | lookup.compile(loss='binary_crossentropy', optimizer='sgd', class_mode='binary') 22 | lookup.train_on_batch(self.X1, np.array([[1], [0]], dtype='int32')) 23 | norm = np.linalg.norm(lookup.params[0].get_value(), axis=1) 24 | self.assertTrue(np.allclose(norm, np.ones_like(norm).astype('float32'))) 25 | 26 | if __name__ == '__main__': 27 | unittest.main() 28 | -------------------------------------------------------------------------------- /keras/utils/test_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def get_test_data(nb_train=1000, nb_test=500, input_shape=(10,), output_shape=(2,), 5 | classification=True, nb_class=2): 6 | ''' 7 | classification=True overrides output_shape 8 | (i.e. output_shape is set to (1,)) and the output 9 | consists in integers in [0, nb_class-1]. 10 | 11 | Otherwise: float output with shape output_shape. 12 | ''' 13 | nb_sample = nb_train + nb_test 14 | if classification: 15 | y = np.random.randint(0, nb_class, size=(nb_sample, 1)) 16 | X = np.zeros((nb_sample,) + input_shape) 17 | for i in range(nb_sample): 18 | X[i] = np.random.normal(loc=y[i], scale=1.0, size=input_shape) 19 | else: 20 | y_loc = np.random.random((nb_sample,)) 21 | X = np.zeros((nb_sample,) + input_shape) 22 | y = np.zeros((nb_sample,) + output_shape) 23 | for i in range(nb_sample): 24 | X[i] = np.random.normal(loc=y_loc[i], scale=1.0, size=input_shape) 25 | y[i] = np.random.normal(loc=y_loc[i], scale=1.0, size=output_shape) 26 | 27 | return (X[:nb_train], y[:nb_train]), (X[nb_train:], y[nb_train:]) 28 | -------------------------------------------------------------------------------- /keras/constraints.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import theano 3 | import theano.tensor as T 4 | import numpy as np 5 | 6 | 7 | class Constraint(object): 8 | def __call__(self, p): 9 | return p 10 | 11 | def get_config(self): 12 | return {"name": self.__class__.__name__} 13 | 14 | 15 | class MaxNorm(Constraint): 16 | def __init__(self, m=2): 17 | self.m = m 18 | 19 | def __call__(self, p): 20 | norms = T.sqrt(T.sum(T.sqr(p), axis=0)) 21 | desired = T.clip(norms, 0, self.m) 22 | p = p * (desired / (1e-7 + norms)) 23 | return p 24 | 25 | def get_config(self): 26 | return {"name": self.__class__.__name__, 27 | "m": self.m} 28 | 29 | 30 | class NonNeg(Constraint): 31 | def __call__(self, p): 32 | p *= T.ge(p, 0) 33 | return p 34 | 35 | 36 | class UnitNorm(Constraint): 37 | def __call__(self, p): 38 | return p / T.sqrt(T.sum(p**2, axis=-1, keepdims=True)) 39 | 40 | identity = Constraint 41 | maxnorm = MaxNorm 42 | nonneg = NonNeg 43 | unitnorm = UnitNorm 44 | 45 | from .utils.generic_utils import get_from_module 46 | def get(identifier, kwargs=None): 47 | return get_from_module(identifier, globals(), 'constraint', instantiate=True, kwargs=kwargs) 48 | -------------------------------------------------------------------------------- /docs/sources/layers/noise.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## GaussianNoise 4 | ```python 5 | keras.layers.noise.GaussianNoise(sigma) 6 | ``` 7 | Apply to the input an additive zero-centred gaussian noise with standard deviation `sigma`. This is useful to mitigate overfitting (you could see it as a kind of random data augmentation). Gaussian Noise (GS) is a natural choice as corruption process for real valued inputs. 8 | 9 | The Gaussian noise is only added at training time. 10 | 11 | - __Input shape__: This layer does not assume a specific input shape. 12 | 13 | - __Output shape__: Same as input. 14 | 15 | - __Arguments__: 16 | 17 | - __sigma__: float, standard deviation of the noise distribution. 18 | 19 | --- 20 | 21 | ## GaussianDropout 22 | ```python 23 | keras.layers.noise.GaussianDropout(p) 24 | ``` 25 | Apply to the input an multiplicative one-centred gaussian noise with standard deviation `sqrt(p/(1-p))`. p refers to drop probability to match Dropout layer syntax. 26 | 27 | http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf 28 | 29 | The Gaussian noise is only used at training time. 30 | 31 | - __Input shape__: This layer does not assume a specific input shape. 32 | 33 | - __Output shape__: Same as input. 34 | 35 | - __Arguments__: 36 | 37 | - __p__: float, drop probability as with Dropout. 38 | 39 | -------------------------------------------------------------------------------- /docs/sources/objectives.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of objectives 3 | 4 | An objective function (or loss function, or optimization score function) is one of the two parameters required to compile a model: 5 | 6 | ```python 7 | model.compile(loss='mean_squared_error', optimizer='sgd') 8 | ``` 9 | 10 | You can either pass the name of an existing objective, or pass a Theano symbolic function that returns a scalar for each data-point and takes the following two arguments: 11 | 12 | - __y_true__: True labels. Theano tensor. 13 | - __y_pred__: Predictions. Theano tensor of the same shape as y_true. 14 | 15 | The actual optimized objective is the mean of the output array across all datapoints. 16 | 17 | For a few examples of such functions, check out the [objectives source](https://github.com/fchollet/keras/blob/master/keras/objectives.py). 18 | 19 | ## Available objectives 20 | 21 | - __mean_squared_error__ / __mse__ 22 | - __mean_absolute_error__ / __mae__ 23 | - __mean_absolute_percentage_error__ / __mape__ 24 | - __mean_squared_logarithmic_error__ / __msle__ 25 | - __squared_hinge__ 26 | - __hinge__ 27 | - __binary_crossentropy__: Also known as logloss. 28 | - __categorical_crossentropy__: Also known as multiclass logloss. __Note__: using this objective requires that your labels are binary arrays of shape `(nb_samples, nb_classes)`. 29 | -------------------------------------------------------------------------------- /docs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Keras Documentation 2 | theme: readthedocs 3 | docs_dir: sources 4 | repo_url: http://github.com/fchollet/keras 5 | site_url: http://keras.io/ 6 | #theme_dir: theme 7 | site_description: Documentation for fast and lightweight Keras Deep Learning library. 8 | 9 | dev_addr: '0.0.0.0:8000' 10 | google_analytics: ['UA-61785484-1', 'keras.io'] 11 | 12 | 13 | pages: 14 | - Home: index.md 15 | - Index: documentation.md 16 | - Examples: examples.md 17 | - Optimizers: optimizers.md 18 | - Objectives: objectives.md 19 | - Models: models.md 20 | - Activations: activations.md 21 | - Initializations: initializations.md 22 | - Regularizers: regularizers.md 23 | - Constraints: constraints.md 24 | - Callbacks: callbacks.md 25 | - Datasets: datasets.md 26 | - Layers: 27 | - Core Layers: layers/core.md 28 | - Convolutional Layers: layers/convolutional.md 29 | - Recurrent Layers: layers/recurrent.md 30 | - Advanced Activations Layers: layers/advanced_activations.md 31 | - Normalization Layers: layers/normalization.md 32 | - Embedding Layers: layers/embeddings.md 33 | - Noise layers: layers/noise.md 34 | - Containers: layers/containers.md 35 | - Preprocessing: 36 | - Sequence Preprocessing: preprocessing/sequence.md 37 | - Text Preprocessing: preprocessing/text.md 38 | - Image Preprocessing: preprocessing/image.md 39 | 40 | -------------------------------------------------------------------------------- /docs/sources/activations.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of activations 3 | 4 | Activations can either be used through an `Activation` layer, or through the `activation` argument supported by all forward layers: 5 | 6 | ```python 7 | from keras.layers.core import Activation, Dense 8 | 9 | model.add(Dense(64, 64, init='uniform')) 10 | model.add(Activation('tanh')) 11 | ``` 12 | is equivalent to: 13 | ```python 14 | model.add(Dense(20, 64, init='uniform', activation='tanh')) 15 | ``` 16 | 17 | You can also pass an element-wise Theano function as an activation: 18 | 19 | ```python 20 | def tanh(x): 21 | return theano.tensor.tanh(x) 22 | 23 | model.add(Dense(20, 64, init='uniform', activation=tanh)) 24 | model.add(Activation(tanh)) 25 | ``` 26 | 27 | ## Available activations 28 | 29 | - __softmax__: Softmax applied across inputs last dimension. Expects shape either `(nb_samples, nb_timesteps, nb_dims)` or `(nb_samples, nb_dims)`. 30 | - __softplus__ 31 | - __relu__ 32 | - __tanh__ 33 | - __sigmoid__ 34 | - __hard_sigmoid__ 35 | - __linear__ 36 | 37 | ## On Advanced Activations 38 | 39 | Activations that are more complex than a simple Theano function (eg. learnable activations, configurable activations, etc.) are available as [Advanced Activation layers](layers/advanced_activations.md), and can be found in the module `keras.layers.advanced_activations`. These include PReLU and LeakyReLU. 40 | -------------------------------------------------------------------------------- /tests/manual/check_save_weights.py: -------------------------------------------------------------------------------- 1 | from keras.models import Sequential 2 | from keras.layers.core import Dense, Dropout, Activation 3 | from keras.optimizers import SGD 4 | 5 | import sys 6 | sys.setrecursionlimit(10000) # to be able to pickle Theano compiled functions 7 | 8 | import pickle, numpy 9 | 10 | def create_model(): 11 | model = Sequential() 12 | model.add(Dense(256, 2048, init='uniform', activation='relu')) 13 | model.add(Dropout(0.5)) 14 | model.add(Dense(2048, 2048, init='uniform', activation='relu')) 15 | model.add(Dropout(0.5)) 16 | model.add(Dense(2048, 2048, init='uniform', activation='relu')) 17 | model.add(Dropout(0.5)) 18 | model.add(Dense(2048, 2048, init='uniform', activation='relu')) 19 | model.add(Dropout(0.5)) 20 | model.add(Dense(2048, 256, init='uniform', activation='linear')) 21 | return model 22 | 23 | model = create_model() 24 | sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) 25 | model.compile(loss='mse', optimizer=sgd) 26 | 27 | pickle.dump(model, open('/tmp/model.pkl', 'wb')) 28 | model.save_weights('/tmp/model_weights.hdf5') 29 | 30 | model_loaded = create_model() 31 | model_loaded.load_weights('/tmp/model_weights.hdf5') 32 | 33 | for k in range(len(model.layers)): 34 | weights_orig = model.layers[k].get_weights() 35 | weights_loaded = model_loaded.layers[k].get_weights() 36 | for x, y in zip(weights_orig, weights_loaded): 37 | if numpy.any(x != y): 38 | raise ValueError('Loaded weights are different from pickled weights!') 39 | 40 | 41 | -------------------------------------------------------------------------------- /tests/auto/test_datasets.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import unittest 3 | from keras.datasets import cifar10, cifar100, reuters, imdb, mnist 4 | 5 | 6 | class TestDatasets(unittest.TestCase): 7 | def test_cifar(self): 8 | print('cifar10') 9 | (X_train, y_train), (X_test, y_test) = cifar10.load_data() 10 | print(X_train.shape) 11 | print(X_test.shape) 12 | print(y_train.shape) 13 | print(y_test.shape) 14 | 15 | print('cifar100 fine') 16 | (X_train, y_train), (X_test, y_test) = cifar100.load_data('fine') 17 | print(X_train.shape) 18 | print(X_test.shape) 19 | print(y_train.shape) 20 | print(y_test.shape) 21 | 22 | print('cifar100 coarse') 23 | (X_train, y_train), (X_test, y_test) = cifar100.load_data('coarse') 24 | print(X_train.shape) 25 | print(X_test.shape) 26 | print(y_train.shape) 27 | print(y_test.shape) 28 | 29 | def test_reuters(self): 30 | print('reuters') 31 | (X_train, y_train), (X_test, y_test) = reuters.load_data() 32 | 33 | def test_mnist(self): 34 | print('mnist') 35 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 36 | print(X_train.shape) 37 | print(X_test.shape) 38 | print(y_train.shape) 39 | print(y_test.shape) 40 | 41 | def test_imdb(self): 42 | print('imdb') 43 | (X_train, y_train), (X_test, y_test) = imdb.load_data() 44 | 45 | 46 | if __name__ == '__main__': 47 | print('Test datasets') 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /keras/utils/np_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import scipy as sp 4 | from six.moves import range 5 | from six.moves import zip 6 | 7 | 8 | def to_categorical(y, nb_classes=None): 9 | '''Convert class vector (integers from 0 to nb_classes) 10 | to binary class matrix, for use with categorical_crossentropy 11 | ''' 12 | y = np.asarray(y, dtype='int32') 13 | if not nb_classes: 14 | nb_classes = np.max(y)+1 15 | Y = np.zeros((len(y), nb_classes)) 16 | for i in range(len(y)): 17 | Y[i, y[i]] = 1. 18 | return Y 19 | 20 | 21 | def normalize(a, axis=-1, order=2): 22 | l2 = np.atleast_1d(np.linalg.norm(a, order, axis)) 23 | l2[l2 == 0] = 1 24 | return a / np.expand_dims(l2, axis) 25 | 26 | 27 | def binary_logloss(p, y): 28 | epsilon = 1e-15 29 | p = sp.maximum(epsilon, p) 30 | p = sp.minimum(1-epsilon, p) 31 | res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p))) 32 | res *= -1.0/len(y) 33 | return res 34 | 35 | 36 | def multiclass_logloss(P, Y): 37 | score = 0. 38 | npreds = [P[i][Y[i]-1] for i in range(len(Y))] 39 | score = -(1. / len(Y)) * np.sum(np.log(npreds)) 40 | return score 41 | 42 | 43 | def accuracy(p, y): 44 | return np.mean([a == b for a, b in zip(p, y)]) 45 | 46 | 47 | def probas_to_classes(y_pred): 48 | if len(y_pred.shape) > 1 and y_pred.shape[1] > 1: 49 | return categorical_probas_to_classes(y_pred) 50 | return np.array([1 if p > 0.5 else 0 for p in y_pred]) 51 | 52 | 53 | def categorical_probas_to_classes(p): 54 | return np.argmax(p, axis=1) 55 | -------------------------------------------------------------------------------- /docs/sources/regularizers.md: -------------------------------------------------------------------------------- 1 | ## Usage of regularizers 2 | 3 | Regularizers allow to apply penalties on layer parameters or layer activity during optimization. These penalties are incorporated in the loss function that the network optimizes. 4 | 5 | The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D` and `Convolution2D` have a unified API. 6 | 7 | These layers expose 3 keyword arguments: 8 | 9 | - `W_regularizer`: instance of `keras.regularizers.WeightRegularizer` 10 | - `b_regularizer`: instance of `keras.regularizers.WeightRegularizer` 11 | - `activity_regularizer`: instance of `keras.regularizers.ActivityRegularizer` 12 | 13 | 14 | ## Example 15 | 16 | ```python 17 | from keras.regularizers import l2, activity_l2 18 | model.add(Dense(64, 64, W_regularizer=l2(0.01), activity_regularizer=activity_l2(0.01))) 19 | ``` 20 | 21 | ## Available penalties 22 | 23 | ```python 24 | keras.regularizers.WeightRegularizer(l1=0., l2=0.) 25 | ``` 26 | 27 | ```python 28 | keras.regularizers.ActivityRegularizer(l1=0., l2=0.) 29 | ``` 30 | 31 | ## Shortcuts 32 | 33 | These are shortcut functions available in `keras.regularizers`. 34 | 35 | - __l1__(l=0.01): L1 weight regularization penalty, also known as LASSO 36 | - __l2__(l=0.01): L2 weight regularization penalty, also known as weight decay, or Ridge 37 | - __l1l2__(l1=0.01, l2=0.01): L1-L2 weight regularization penalty, also known as ElasticNet 38 | - __activity_l1__(l=0.01): L1 activity regularization 39 | - __activity_l2__(l=0.01): L2 activity regularization 40 | - __activity_l1l2__(l1=0.01, l2=0.01): L1+L2 activity regularization 41 | -------------------------------------------------------------------------------- /keras/datasets/data_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import tarfile, inspect, os 5 | from six.moves.urllib.request import FancyURLopener 6 | 7 | from ..utils.generic_utils import Progbar 8 | 9 | class ParanoidURLopener(FancyURLopener): 10 | def http_error_default(self, url, fp, errcode, errmsg, headers): 11 | raise Exception('URL fetch failure on {}: {} -- {}'.format(url, errcode, errmsg)) 12 | 13 | def get_file(fname, origin, untar=False): 14 | datadir = os.path.expanduser(os.path.join('~', '.keras', 'datasets')) 15 | if not os.path.exists(datadir): 16 | os.makedirs(datadir) 17 | 18 | if untar: 19 | untar_fpath = os.path.join(datadir, fname) 20 | fpath = untar_fpath + '.tar.gz' 21 | else: 22 | fpath = os.path.join(datadir, fname) 23 | 24 | try: 25 | f = open(fpath) 26 | except: 27 | print('Downloading data from', origin) 28 | 29 | global progbar 30 | progbar = None 31 | 32 | def dl_progress(count, block_size, total_size): 33 | global progbar 34 | if progbar is None: 35 | progbar = Progbar(total_size) 36 | else: 37 | progbar.update(count*block_size) 38 | 39 | ParanoidURLopener().retrieve(origin, fpath, dl_progress) 40 | progbar = None 41 | 42 | if untar: 43 | if not os.path.exists(untar_fpath): 44 | print('Untaring file...') 45 | tfile = tarfile.open(fpath, 'r:gz') 46 | tfile.extractall(path=datadir) 47 | tfile.close() 48 | return untar_fpath 49 | 50 | return fpath 51 | -------------------------------------------------------------------------------- /tests/manual/check_model_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from keras.models import Sequential, Graph 4 | from keras.layers.core import Layer, Activation, Dense, Flatten, Reshape, Merge 5 | from keras.layers.convolutional import Convolution2D, MaxPooling2D 6 | import keras.utils.layer_utils as layer_utils 7 | 8 | print('-- Sequential model') 9 | left = Sequential() 10 | left.add(Convolution2D(32, 1, 3, 3, border_mode='valid')) 11 | left.add(MaxPooling2D(poolsize=(2, 2))) 12 | left.add(Flatten()) 13 | left.add(Dense(32 * 13 * 13, 50)) 14 | left.add(Activation('relu')) 15 | 16 | right = Sequential() 17 | right.add(Dense(784, 30)) 18 | right.add(Activation('relu')) 19 | 20 | model = Sequential() 21 | model.add(Merge([left, right], mode='concat')) 22 | 23 | model.add(Dense(80, 10)) 24 | model.add(Activation('softmax')) 25 | 26 | layer_utils.print_layer_shapes(model, [(1, 1, 28, 28), (1, 784)]) 27 | 28 | print('-- Graph model') 29 | graph = Graph() 30 | graph.add_input(name='input1', ndim=2) 31 | graph.add_input(name='input2', ndim=4) 32 | graph.add_node(Dense(32, 16), name='dense1', input='input1') 33 | graph.add_node(Dense(16, 4), name='dense3', input='dense1') 34 | 35 | graph.add_node(Convolution2D(32, 1, 3, 3), name='conv1', input='input2') 36 | graph.add_node(Flatten(), name='flatten1', input='conv1') 37 | graph.add_node(Dense(32 * 13 * 13, 10), name='dense4', input='flatten1') 38 | 39 | graph.add_output(name='output1', inputs=['dense1', 'dense3'], merge_mode='sum') 40 | graph.add_output(name='output2', inputs=['dense1', 'dense4'], merge_mode='concat') 41 | 42 | layer_utils.print_layer_shapes(graph, {'input1': (1, 32), 'input2': (1, 1, 28, 28)}) 43 | 44 | print('Test script complete') 45 | -------------------------------------------------------------------------------- /tests/auto/keras/layers/test_recurrent.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | import theano 4 | 5 | from keras.layers import recurrent 6 | 7 | nb_samples, timesteps, input_dim, output_dim = 3, 3, 10, 5 8 | 9 | 10 | def _runner(layer_class): 11 | """ 12 | All the recurrent layers share the same interface, so we can run through them with a single 13 | function. 14 | """ 15 | for weights in [None, [np.ones((input_dim, output_dim))]]: 16 | for ret_seq in [True, False]: 17 | layer = layer_class(input_dim, output_dim, return_sequences=ret_seq, weights=weights) 18 | layer.input = theano.shared(value=np.ones((nb_samples, timesteps, input_dim))) 19 | config = layer.get_config() 20 | 21 | for train in [True, False]: 22 | out = layer.get_output(train).eval() 23 | # Make sure the output has the desired shape 24 | if ret_seq: 25 | assert(out.shape == (nb_samples, timesteps, output_dim)) 26 | else: 27 | assert(out.shape == (nb_samples, output_dim)) 28 | 29 | mask = layer.get_output_mask(train) 30 | 31 | 32 | class TestRNNS(unittest.TestCase): 33 | """ 34 | Test all the RNNs using a generic test runner function defined above. 35 | """ 36 | def test_simple(self): 37 | _runner(recurrent.SimpleRNN) 38 | 39 | def test_simple_deep(self): 40 | _runner(recurrent.SimpleDeepRNN) 41 | 42 | def test_gru(self): 43 | _runner(recurrent.GRU) 44 | 45 | def test_lstm(self): 46 | _runner(recurrent.LSTM) 47 | 48 | def test_jzs1(self): 49 | _runner(recurrent.JZS1) 50 | 51 | def test_jzs2(self): 52 | _runner(recurrent.JZS2) 53 | 54 | def test_jzs3(self): 55 | _runner(recurrent.JZS3) 56 | 57 | 58 | if __name__ == '__main__': 59 | unittest.main() 60 | -------------------------------------------------------------------------------- /examples/mnist_mlp.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import numpy as np 4 | np.random.seed(1337) # for reproducibility 5 | 6 | from keras.datasets import mnist 7 | from keras.models import Sequential 8 | from keras.layers.core import Dense, Dropout, Activation 9 | from keras.optimizers import SGD, Adam, RMSprop 10 | from keras.utils import np_utils 11 | 12 | ''' 13 | Train a simple deep NN on the MNIST dataset. 14 | 15 | Get to 98.30% test accuracy after 20 epochs (there is *a lot* of margin for parameter tuning). 16 | 2 seconds per epoch on a GRID K520 GPU. 17 | ''' 18 | 19 | batch_size = 128 20 | nb_classes = 10 21 | nb_epoch = 20 22 | 23 | # the data, shuffled and split between tran and test sets 24 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 25 | 26 | X_train = X_train.reshape(60000, 784) 27 | X_test = X_test.reshape(10000, 784) 28 | X_train = X_train.astype("float32") 29 | X_test = X_test.astype("float32") 30 | X_train /= 255 31 | X_test /= 255 32 | print(X_train.shape[0], 'train samples') 33 | print(X_test.shape[0], 'test samples') 34 | 35 | # convert class vectors to binary class matrices 36 | Y_train = np_utils.to_categorical(y_train, nb_classes) 37 | Y_test = np_utils.to_categorical(y_test, nb_classes) 38 | 39 | model = Sequential() 40 | model.add(Dense(784, 128)) 41 | model.add(Activation('relu')) 42 | model.add(Dropout(0.2)) 43 | model.add(Dense(128, 128)) 44 | model.add(Activation('relu')) 45 | model.add(Dropout(0.2)) 46 | model.add(Dense(128, 10)) 47 | model.add(Activation('softmax')) 48 | 49 | rms = RMSprop() 50 | model.compile(loss='categorical_crossentropy', optimizer=rms) 51 | 52 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=2, validation_data=(X_test, Y_test)) 53 | score = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0) 54 | print('Test score:', score[0]) 55 | print('Test accuracy:', score[1]) 56 | -------------------------------------------------------------------------------- /keras/layers/noise.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | from .core import MaskedLayer 4 | import theano 5 | import theano.tensor as T 6 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams 7 | 8 | 9 | class GaussianNoise(MaskedLayer): 10 | ''' 11 | Corruption process with GaussianNoise 12 | ''' 13 | def __init__(self, sigma): 14 | super(GaussianNoise, self).__init__() 15 | self.sigma = sigma 16 | self.srng = RandomStreams(seed=np.random.randint(10e6)) 17 | 18 | def get_output(self, train=False): 19 | X = self.get_input(train) 20 | if not train or self.sigma == 0: 21 | return X 22 | else: 23 | return X + self.srng.normal(size=X.shape, avg=0.0, std=self.sigma, 24 | dtype=theano.config.floatX) 25 | 26 | def get_config(self): 27 | return {"name": self.__class__.__name__, 28 | "sigma": self.sigma} 29 | 30 | 31 | class GaussianDropout(MaskedLayer): 32 | ''' 33 | Multiplicative Gaussian Noise 34 | Reference: 35 | Dropout: A Simple Way to Prevent Neural Networks from Overfitting 36 | Srivastava, Hinton, et al. 2014 37 | http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf 38 | ''' 39 | def __init__(self, p): 40 | super(GaussianDropout, self).__init__() 41 | self.p = p 42 | self.srng = RandomStreams(seed=np.random.randint(10e6)) 43 | 44 | def get_output(self, train): 45 | X = self.get_input(train) 46 | if train: 47 | # self.p refers to drop probability rather than retain probability (as in paper) to match Dropout layer syntax 48 | X *= self.srng.normal(size=X.shape, avg=1.0, std=T.sqrt(self.p / (1.0 - self.p)), dtype=theano.config.floatX) 49 | return X 50 | 51 | def get_config(self): 52 | return {"name": self.__class__.__name__, 53 | "p": self.p} 54 | -------------------------------------------------------------------------------- /keras/objectives.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import theano 3 | import theano.tensor as T 4 | import numpy as np 5 | from six.moves import range 6 | 7 | if theano.config.floatX == 'float64': 8 | epsilon = 1.0e-9 9 | else: 10 | epsilon = 1.0e-7 11 | 12 | 13 | def mean_squared_error(y_true, y_pred): 14 | return T.sqr(y_pred - y_true).mean(axis=-1) 15 | 16 | 17 | def mean_absolute_error(y_true, y_pred): 18 | return T.abs_(y_pred - y_true).mean(axis=-1) 19 | 20 | 21 | def mean_absolute_percentage_error(y_true, y_pred): 22 | return T.abs_((y_true - y_pred) / T.clip(T.abs_(y_true), epsilon, np.inf)).mean(axis=-1) * 100. 23 | 24 | 25 | def mean_squared_logarithmic_error(y_true, y_pred): 26 | return T.sqr(T.log(T.clip(y_pred, epsilon, np.inf) + 1.) - T.log(T.clip(y_true, epsilon, np.inf) + 1.)).mean(axis=-1) 27 | 28 | 29 | def squared_hinge(y_true, y_pred): 30 | return T.sqr(T.maximum(1. - y_true * y_pred, 0.)).mean(axis=-1) 31 | 32 | 33 | def hinge(y_true, y_pred): 34 | return T.maximum(1. - y_true * y_pred, 0.).mean(axis=-1) 35 | 36 | 37 | def categorical_crossentropy(y_true, y_pred): 38 | '''Expects a binary class matrix instead of a vector of scalar classes 39 | ''' 40 | y_pred = T.clip(y_pred, epsilon, 1.0 - epsilon) 41 | # scale preds so that the class probas of each sample sum to 1 42 | y_pred /= y_pred.sum(axis=-1, keepdims=True) 43 | cce = T.nnet.categorical_crossentropy(y_pred, y_true) 44 | return cce 45 | 46 | 47 | def binary_crossentropy(y_true, y_pred): 48 | y_pred = T.clip(y_pred, epsilon, 1.0 - epsilon) 49 | bce = T.nnet.binary_crossentropy(y_pred, y_true).mean(axis=-1) 50 | return bce 51 | 52 | 53 | def poisson_loss(y_true, y_pred): 54 | return T.mean(y_pred - y_true * T.log(y_pred), axis=-1) 55 | 56 | # aliases 57 | mse = MSE = mean_squared_error 58 | mae = MAE = mean_absolute_error 59 | mape = MAPE = mean_absolute_percentage_error 60 | msle = MSLE = mean_squared_logarithmic_error 61 | 62 | from .utils.generic_utils import get_from_module 63 | def get(identifier): 64 | return get_from_module(identifier, globals(), 'objective') 65 | -------------------------------------------------------------------------------- /keras/datasets/imdb.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from six.moves import cPickle 3 | import gzip 4 | from .data_utils import get_file 5 | import random 6 | from six.moves import zip 7 | import numpy as np 8 | 9 | 10 | def load_data(path="imdb.pkl", nb_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113, 11 | start_char=1, oov_char=2, index_from=3): 12 | 13 | path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/imdb.pkl") 14 | 15 | if path.endswith(".gz"): 16 | f = gzip.open(path, 'rb') 17 | else: 18 | f = open(path, 'rb') 19 | 20 | X, labels = six.moves.cPickle.load(f) 21 | f.close() 22 | 23 | np.random.seed(seed) 24 | np.random.shuffle(X) 25 | np.random.seed(seed) 26 | np.random.shuffle(labels) 27 | 28 | if start_char is not None: 29 | X = [[start_char] + [w + index_from for w in x] for x in X] 30 | elif index_from: 31 | X = [[w + index_from for w in x] for x in X] 32 | 33 | if maxlen: 34 | new_X = [] 35 | new_labels = [] 36 | for x, y in zip(X, labels): 37 | if len(x) < maxlen: 38 | new_X.append(x) 39 | new_labels.append(y) 40 | X = new_X 41 | labels = new_labels 42 | 43 | if not nb_words: 44 | nb_words = max([max(x) for x in X]) 45 | 46 | # by convention, use 2 as OOV word 47 | # reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV) 48 | if oov_char is not None: 49 | X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X] 50 | else: 51 | nX = [] 52 | for x in X: 53 | nx = [] 54 | for w in x: 55 | if (w >= nb_words or w < skip_top): 56 | nx.append(w) 57 | nX.append(nx) 58 | X = nX 59 | 60 | X_train = X[:int(len(X)*(1-test_split))] 61 | y_train = labels[:int(len(X)*(1-test_split))] 62 | 63 | X_test = X[int(len(X)*(1-test_split)):] 64 | y_test = labels[int(len(X)*(1-test_split)):] 65 | 66 | return (X_train, y_train), (X_test, y_test) 67 | -------------------------------------------------------------------------------- /docs/sources/layers/advanced_activations.md: -------------------------------------------------------------------------------- 1 | 2 | ## LeakyReLU 3 | 4 | ```python 5 | keras.layers.advanced_activations.LeakyReLU(alpha=0.3) 6 | ``` 7 | 8 | Special version of a Rectified Linear Unit that allows a small gradient when the unit is not active (`f(x) = alpha*x for x < 0`). 9 | 10 | - __Input shape__: This layer does not assume a specific input shape. As a result, it cannot be used as the first layer in a model. 11 | 12 | - __Output shape__: Same as input. 13 | 14 | - __Arguments__: 15 | - __alpha__: float >= 0. Negative slope coefficient. 16 | 17 | --- 18 | 19 | ## PReLU 20 | 21 | ```python 22 | keras.layers.advanced_activations.PReLU(input_shape) 23 | ``` 24 | 25 | Parametrized linear unit. Similar to a LeakyReLU, where each input unit has its alpha coefficient, and where these coefficients are learned during training. 26 | 27 | - __Input shape__: Same as `input_shape`. This layer cannot be used as first layer in a model. 28 | 29 | - __Output shape__: Same as input. 30 | 31 | - __Arguments__: 32 | - __input_shape__: tuple. 33 | 34 | - __References__: 35 | - [Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification](http://arxiv.org/pdf/1502.01852v1.pdf) 36 | 37 | --- 38 | 39 | ## ParametricSoftplus 40 | 41 | ```python 42 | keras.layers.advanced_activations.ParametricSoftplus(input_shape) 43 | ``` 44 | 45 | Parametric Softplus of the form: (`f(x) = alpha * (1 + exp(beta * x))`). This is essentially a smooth version of ReLU where the parameters control the sharpness of the rectification. The parameters are initialized to more closely approximate a ReLU than the standard `softplus`: `alpha` initialized to `0.2` and `beta` initialized to `5.0`. The parameters are fit separately for each hidden unit. 46 | 47 | - __Input shape__: Same as `input_shape`. This layer cannot be used as first layer in a model. 48 | 49 | - __Output shape__: Same as input. 50 | 51 | - __Arguments__: 52 | - __input_shape__: tuple. 53 | 54 | - __References__: 55 | - [Inferring Nonlinear Neuronal Computation Based on Physiologically Plausible Inputs](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1003143) -------------------------------------------------------------------------------- /tests/auto/test_optimizers.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | np.random.seed(1337) 4 | 5 | from keras.utils.test_utils import get_test_data 6 | from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam 7 | from keras.models import Sequential 8 | from keras.layers.core import Dense, Activation 9 | from keras.utils.np_utils import to_categorical 10 | import unittest 11 | 12 | 13 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(10,), 14 | classification=True, nb_class=2) 15 | y_train = to_categorical(y_train) 16 | y_test = to_categorical(y_test) 17 | 18 | 19 | def get_model(input_dim, nb_hidden, output_dim): 20 | model = Sequential() 21 | model.add(Dense(input_dim, nb_hidden)) 22 | model.add(Activation('relu')) 23 | model.add(Dense(nb_hidden, output_dim)) 24 | model.add(Activation('softmax')) 25 | return model 26 | 27 | 28 | def _test_optimizer(optimizer, target=0.9): 29 | model = get_model(X_train.shape[1], 10, y_train.shape[1]) 30 | model.compile(loss='categorical_crossentropy', optimizer=optimizer) 31 | history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=2) 32 | return history.history['val_acc'][-1] > target 33 | 34 | 35 | class TestOptimizers(unittest.TestCase): 36 | def test_sgd(self): 37 | print('test SGD') 38 | sgd = SGD(lr=0.01, momentum=0.9, nesterov=True) 39 | self.assertTrue(_test_optimizer(sgd)) 40 | 41 | def test_rmsprop(self): 42 | print('test RMSprop') 43 | self.assertTrue(_test_optimizer(RMSprop())) 44 | 45 | def test_adagrad(self): 46 | print('test Adagrad') 47 | self.assertTrue(_test_optimizer(Adagrad())) 48 | 49 | def test_adadelta(self): 50 | print('test Adadelta') 51 | self.assertTrue(_test_optimizer(Adadelta())) 52 | 53 | def test_adam(self): 54 | print('test Adam') 55 | self.assertTrue(_test_optimizer(Adam())) 56 | 57 | if __name__ == '__main__': 58 | print('Test optimizers') 59 | unittest.main() 60 | -------------------------------------------------------------------------------- /keras/regularizers.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import theano.tensor as T 3 | 4 | 5 | class Regularizer(object): 6 | def set_param(self, p): 7 | self.p = p 8 | 9 | def set_layer(self, layer): 10 | self.layer = layer 11 | 12 | def __call__(self, loss): 13 | return loss 14 | 15 | def get_config(self): 16 | return {"name": self.__class__.__name__} 17 | 18 | 19 | class WeightRegularizer(Regularizer): 20 | def __init__(self, l1=0., l2=0.): 21 | self.l1 = l1 22 | self.l2 = l2 23 | 24 | def set_param(self, p): 25 | self.p = p 26 | 27 | def __call__(self, loss): 28 | loss += T.sum(abs(self.p)) * self.l1 29 | loss += T.sum(self.p ** 2) * self.l2 30 | return loss 31 | 32 | def get_config(self): 33 | return {"name": self.__class__.__name__, 34 | "l1": self.l1, 35 | "l2": self.l2} 36 | 37 | 38 | class ActivityRegularizer(Regularizer): 39 | def __init__(self, l1=0., l2=0.): 40 | self.l1 = l1 41 | self.l2 = l2 42 | 43 | def set_layer(self, layer): 44 | self.layer = layer 45 | 46 | def __call__(self, loss): 47 | loss += self.l1 * T.sum(T.mean(abs(self.layer.get_output(True)), axis=0)) 48 | loss += self.l2 * T.sum(T.mean(self.layer.get_output(True) ** 2, axis=0)) 49 | return loss 50 | 51 | def get_config(self): 52 | return {"name": self.__class__.__name__, 53 | "l1": self.l1, 54 | "l2": self.l2} 55 | 56 | 57 | def l1(l=0.01): 58 | return WeightRegularizer(l1=l) 59 | 60 | 61 | def l2(l=0.01): 62 | return WeightRegularizer(l2=l) 63 | 64 | 65 | def l1l2(l1=0.01, l2=0.01): 66 | return WeightRegularizer(l1=l1, l2=l2) 67 | 68 | 69 | def activity_l1(l=0.01): 70 | return ActivityRegularizer(l1=l) 71 | 72 | 73 | def activity_l2(l=0.01): 74 | return ActivityRegularizer(l2=l) 75 | 76 | 77 | def activity_l1l2(l1=0.01, l2=0.01): 78 | return ActivityRegularizer(l1=l1, l2=l2) 79 | 80 | identity = Regularizer 81 | 82 | from .utils.generic_utils import get_from_module 83 | def get(identifier, kwargs=None): 84 | return get_from_module(identifier, globals(), 'regularizer', instantiate=True, kwargs=kwargs) 85 | -------------------------------------------------------------------------------- /keras/utils/io_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import h5py 3 | import numpy as np 4 | from collections import defaultdict 5 | 6 | 7 | class HDF5Matrix(): 8 | refs = defaultdict(int) 9 | 10 | def __init__(self, datapath, dataset, start, end, normalizer=None): 11 | if datapath not in list(self.refs.keys()): 12 | f = h5py.File(datapath) 13 | self.refs[datapath] = f 14 | else: 15 | f = self.refs[datapath] 16 | self.start = start 17 | self.end = end 18 | self.data = f[dataset] 19 | self.normalizer = normalizer 20 | 21 | def __len__(self): 22 | return self.end - self.start 23 | 24 | def __getitem__(self, key): 25 | if isinstance(key, slice): 26 | if key.stop + self.start <= self.end: 27 | idx = slice(key.start+self.start, key.stop + self.start) 28 | else: 29 | raise IndexError 30 | elif isinstance(key, int): 31 | if key + self.start < self.end: 32 | idx = key+self.start 33 | else: 34 | raise IndexError 35 | elif isinstance(key, np.ndarray): 36 | if np.max(key) + self.start < self.end: 37 | idx = (self.start + key).tolist() 38 | else: 39 | raise IndexError 40 | elif isinstance(key, list): 41 | if max(key) + self.start < self.end: 42 | idx = [x + self.start for x in key] 43 | else: 44 | raise IndexError 45 | if self.normalizer is not None: 46 | return self.normalizer(self.data[idx]) 47 | else: 48 | return self.data[idx] 49 | 50 | @property 51 | def shape(self): 52 | return tuple([self.end - self.start, self.data.shape[1]]) 53 | 54 | 55 | def save_array(array, name): 56 | import tables 57 | f = tables.open_file(name, 'w') 58 | atom = tables.Atom.from_dtype(array.dtype) 59 | ds = f.createCArray(f.root, 'data', atom, array.shape) 60 | ds[:] = array 61 | f.close() 62 | 63 | 64 | def load_array(name): 65 | import tables 66 | f = tables.open_file(name) 67 | array = f.root.data 68 | a = np.empty(shape=array.shape, dtype=array.dtype) 69 | a[:] = array[:] 70 | f.close() 71 | return a 72 | -------------------------------------------------------------------------------- /examples/reuters_mlp.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import numpy as np 4 | np.random.seed(1337) # for reproducibility 5 | 6 | from keras.datasets import reuters 7 | from keras.models import Sequential 8 | from keras.layers.core import Dense, Dropout, Activation 9 | from keras.layers.normalization import BatchNormalization 10 | from keras.utils import np_utils 11 | from keras.preprocessing.text import Tokenizer 12 | 13 | ''' 14 | Train and evaluate a simple MLP on the Reuters newswire topic classification task. 15 | GPU run command: 16 | THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python examples/reuters_mlp.py 17 | CPU run command: 18 | python examples/reuters_mlp.py 19 | ''' 20 | 21 | max_words = 1000 22 | batch_size = 32 23 | nb_epoch = 5 24 | 25 | print("Loading data...") 26 | (X_train, y_train), (X_test, y_test) = reuters.load_data(nb_words=max_words, test_split=0.2) 27 | print(len(X_train), 'train sequences') 28 | print(len(X_test), 'test sequences') 29 | 30 | nb_classes = np.max(y_train)+1 31 | print(nb_classes, 'classes') 32 | 33 | print("Vectorizing sequence data...") 34 | tokenizer = Tokenizer(nb_words=max_words) 35 | X_train = tokenizer.sequences_to_matrix(X_train, mode="binary") 36 | X_test = tokenizer.sequences_to_matrix(X_test, mode="binary") 37 | print('X_train shape:', X_train.shape) 38 | print('X_test shape:', X_test.shape) 39 | 40 | print("Convert class vector to binary class matrix (for use with categorical_crossentropy)") 41 | Y_train = np_utils.to_categorical(y_train, nb_classes) 42 | Y_test = np_utils.to_categorical(y_test, nb_classes) 43 | print('Y_train shape:', Y_train.shape) 44 | print('Y_test shape:', Y_test.shape) 45 | 46 | print("Building model...") 47 | model = Sequential() 48 | model.add(Dense(max_words, 512)) 49 | model.add(Activation('relu')) 50 | model.add(Dropout(0.5)) 51 | model.add(Dense(512, nb_classes)) 52 | model.add(Activation('softmax')) 53 | 54 | model.compile(loss='categorical_crossentropy', optimizer='adam') 55 | 56 | history = model.fit(X_train, Y_train, nb_epoch=nb_epoch, batch_size=batch_size, verbose=1, show_accuracy=True, validation_split=0.1) 57 | score = model.evaluate(X_test, Y_test, batch_size=batch_size, verbose=1, show_accuracy=True) 58 | print('Test score:', score[0]) 59 | print('Test accuracy:', score[1]) 60 | -------------------------------------------------------------------------------- /examples/mnist_cnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import numpy as np 4 | np.random.seed(1337) # for reproducibility 5 | 6 | from keras.datasets import mnist 7 | from keras.models import Sequential 8 | from keras.layers.core import Dense, Dropout, Activation, Flatten 9 | from keras.layers.convolutional import Convolution2D, MaxPooling2D 10 | from keras.utils import np_utils 11 | 12 | ''' 13 | Train a simple convnet on the MNIST dataset. 14 | 15 | Run on GPU: THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python mnist_cnn.py 16 | 17 | Get to 99.25% test accuracy after 12 epochs (there is still a lot of margin for parameter tuning). 18 | 16 seconds per epoch on a GRID K520 GPU. 19 | ''' 20 | 21 | batch_size = 128 22 | nb_classes = 10 23 | nb_epoch = 12 24 | 25 | # the data, shuffled and split between tran and test sets 26 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 27 | 28 | X_train = X_train.reshape(X_train.shape[0], 1, 28, 28) 29 | X_test = X_test.reshape(X_test.shape[0], 1, 28, 28) 30 | X_train = X_train.astype("float32") 31 | X_test = X_test.astype("float32") 32 | X_train /= 255 33 | X_test /= 255 34 | print('X_train shape:', X_train.shape) 35 | print(X_train.shape[0], 'train samples') 36 | print(X_test.shape[0], 'test samples') 37 | 38 | # convert class vectors to binary class matrices 39 | Y_train = np_utils.to_categorical(y_train, nb_classes) 40 | Y_test = np_utils.to_categorical(y_test, nb_classes) 41 | 42 | model = Sequential() 43 | 44 | model.add(Convolution2D(32, 1, 3, 3, border_mode='full')) 45 | model.add(Activation('relu')) 46 | model.add(Convolution2D(32, 32, 3, 3)) 47 | model.add(Activation('relu')) 48 | model.add(MaxPooling2D(poolsize=(2, 2))) 49 | model.add(Dropout(0.25)) 50 | 51 | model.add(Flatten()) 52 | model.add(Dense(32*196, 128)) 53 | model.add(Activation('relu')) 54 | model.add(Dropout(0.5)) 55 | 56 | model.add(Dense(128, nb_classes)) 57 | model.add(Activation('softmax')) 58 | 59 | model.compile(loss='categorical_crossentropy', optimizer='adadelta') 60 | 61 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=1, validation_data=(X_test, Y_test)) 62 | score = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0) 63 | print('Test score:', score[0]) 64 | print('Test accuracy:', score[1]) 65 | -------------------------------------------------------------------------------- /examples/imdb_lstm.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import numpy as np 4 | np.random.seed(1337) # for reproducibility 5 | 6 | from keras.preprocessing import sequence 7 | from keras.optimizers import SGD, RMSprop, Adagrad 8 | from keras.utils import np_utils 9 | from keras.models import Sequential 10 | from keras.layers.core import Dense, Dropout, Activation 11 | from keras.layers.embeddings import Embedding 12 | from keras.layers.recurrent import LSTM, GRU 13 | from keras.datasets import imdb 14 | 15 | ''' 16 | Train a LSTM on the IMDB sentiment classification task. 17 | 18 | The dataset is actually too small for LSTM to be of any advantage 19 | compared to simpler, much faster methods such as TF-IDF+LogReg. 20 | 21 | Notes: 22 | 23 | - RNNs are tricky. Choice of batch size is important, 24 | choice of loss and optimizer is critical, etc. 25 | Some configurations won't converge. 26 | 27 | - LSTM loss decrease patterns during training can be quite different 28 | from what you see with CNNs/MLPs/etc. 29 | 30 | GPU command: 31 | THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python imdb_lstm.py 32 | ''' 33 | 34 | max_features = 20000 35 | maxlen = 100 # cut texts after this number of words (among top max_features most common words) 36 | batch_size = 32 37 | 38 | print("Loading data...") 39 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, test_split=0.2) 40 | print(len(X_train), 'train sequences') 41 | print(len(X_test), 'test sequences') 42 | 43 | print("Pad sequences (samples x time)") 44 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 45 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 46 | print('X_train shape:', X_train.shape) 47 | print('X_test shape:', X_test.shape) 48 | 49 | print('Build model...') 50 | model = Sequential() 51 | model.add(Embedding(max_features, 128)) 52 | model.add(LSTM(128, 128)) # try using a GRU instead, for fun 53 | model.add(Dropout(0.5)) 54 | model.add(Dense(128, 1)) 55 | model.add(Activation('sigmoid')) 56 | 57 | # try using different optimizers and different optimizer configs 58 | model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary") 59 | 60 | print("Train...") 61 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=4, validation_data=(X_test, y_test), show_accuracy=True) 62 | score, acc = model.evaluate(X_test, y_test, batch_size=batch_size, show_accuracy=True) 63 | print('Test score:', score) 64 | print('Test accuracy:', acc) 65 | -------------------------------------------------------------------------------- /tests/auto/test_regularizers.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | np.random.seed(1337) # for reproducibility 4 | 5 | from keras.models import Sequential 6 | from keras.layers.core import Merge, Dense, Activation, Flatten, ActivityRegularization 7 | from keras.layers.embeddings import Embedding 8 | from keras.datasets import mnist 9 | from keras.utils import np_utils 10 | from keras import regularizers 11 | 12 | nb_classes = 10 13 | batch_size = 128 14 | nb_epoch = 5 15 | weighted_class = 9 16 | standard_weight = 1 17 | high_weight = 5 18 | max_train_samples = 5000 19 | max_test_samples = 1000 20 | 21 | # the data, shuffled and split between tran and test sets 22 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 23 | X_train = X_train.reshape(60000, 784)[:max_train_samples] 24 | X_test = X_test.reshape(10000, 784)[:max_test_samples] 25 | X_train = X_train.astype("float32") / 255 26 | X_test = X_test.astype("float32") / 255 27 | 28 | # convert class vectors to binary class matrices 29 | y_train = y_train[:max_train_samples] 30 | y_test = y_test[:max_test_samples] 31 | Y_train = np_utils.to_categorical(y_train, nb_classes) 32 | Y_test = np_utils.to_categorical(y_test, nb_classes) 33 | test_ids = np.where(y_test == np.array(weighted_class))[0] 34 | 35 | 36 | def create_model(weight_reg=None, activity_reg=None): 37 | model = Sequential() 38 | model.add(Dense(784, 50)) 39 | model.add(Activation('relu')) 40 | model.add(Dense(50, 10, W_regularizer=weight_reg, activity_regularizer=activity_reg)) 41 | model.add(Activation('softmax')) 42 | return model 43 | 44 | 45 | class TestRegularizers(unittest.TestCase): 46 | def test_W_reg(self): 47 | for reg in [regularizers.identity(), regularizers.l1(), regularizers.l2(), regularizers.l1l2()]: 48 | model = create_model(weight_reg=reg) 49 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 50 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0) 51 | model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0) 52 | 53 | def test_A_reg(self): 54 | for reg in [regularizers.activity_l1(), regularizers.activity_l2()]: 55 | model = create_model(activity_reg=reg) 56 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 57 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0) 58 | model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0) 59 | 60 | if __name__ == '__main__': 61 | print('Test weight and activity regularizers') 62 | unittest.main() 63 | -------------------------------------------------------------------------------- /keras/initializations.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import theano 3 | import theano.tensor as T 4 | import numpy as np 5 | 6 | from .utils.theano_utils import sharedX, shared_zeros, shared_ones 7 | 8 | 9 | def get_fans(shape): 10 | fan_in = shape[0] if len(shape) == 2 else np.prod(shape[1:]) 11 | fan_out = shape[1] if len(shape) == 2 else shape[0] 12 | return fan_in, fan_out 13 | 14 | 15 | def uniform(shape, scale=0.05): 16 | return sharedX(np.random.uniform(low=-scale, high=scale, size=shape)) 17 | 18 | 19 | def normal(shape, scale=0.05): 20 | return sharedX(np.random.randn(*shape) * scale) 21 | 22 | 23 | def lecun_uniform(shape): 24 | ''' Reference: LeCun 98, Efficient Backprop 25 | http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf 26 | ''' 27 | fan_in, fan_out = get_fans(shape) 28 | scale = np.sqrt(3. / fan_in) 29 | return uniform(shape, scale) 30 | 31 | 32 | def glorot_normal(shape): 33 | ''' Reference: Glorot & Bengio, AISTATS 2010 34 | ''' 35 | fan_in, fan_out = get_fans(shape) 36 | s = np.sqrt(2. / (fan_in + fan_out)) 37 | return normal(shape, s) 38 | 39 | 40 | def glorot_uniform(shape): 41 | fan_in, fan_out = get_fans(shape) 42 | s = np.sqrt(6. / (fan_in + fan_out)) 43 | return uniform(shape, s) 44 | 45 | 46 | def he_normal(shape): 47 | ''' Reference: He et al., http://arxiv.org/abs/1502.01852 48 | ''' 49 | fan_in, fan_out = get_fans(shape) 50 | s = np.sqrt(2. / fan_in) 51 | return normal(shape, s) 52 | 53 | 54 | def he_uniform(shape): 55 | fan_in, fan_out = get_fans(shape) 56 | s = np.sqrt(6. / fan_in) 57 | return uniform(shape, s) 58 | 59 | 60 | def orthogonal(shape, scale=1.1): 61 | ''' From Lasagne 62 | ''' 63 | flat_shape = (shape[0], np.prod(shape[1:])) 64 | a = np.random.normal(0.0, 1.0, flat_shape) 65 | u, _, v = np.linalg.svd(a, full_matrices=False) 66 | # pick the one with the correct shape 67 | q = u if u.shape == flat_shape else v 68 | q = q.reshape(shape) 69 | return sharedX(scale * q[:shape[0], :shape[1]]) 70 | 71 | 72 | def identity(shape, scale=1): 73 | if len(shape) != 2 or shape[0] != shape[1]: 74 | raise Exception("Identity matrix initialization can only be used for 2D square matrices") 75 | else: 76 | return sharedX(scale * np.identity(shape[0])) 77 | 78 | 79 | def zero(shape): 80 | return shared_zeros(shape) 81 | 82 | 83 | def one(shape): 84 | return shared_ones(shape) 85 | 86 | 87 | from .utils.generic_utils import get_from_module 88 | def get(identifier): 89 | return get_from_module(identifier, globals(), 'initialization') 90 | -------------------------------------------------------------------------------- /tests/auto/keras/test_constraints.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | from theano import tensor as T 5 | 6 | 7 | class TestConstraints(unittest.TestCase): 8 | def setUp(self): 9 | self.some_values = [0.1, 0.5, 3, 8, 1e-7] 10 | np.random.seed(3537) 11 | self.example_array = np.random.random((100, 100)) * 100. - 50. 12 | self.example_array[0, 0] = 0. # 0 could possibly cause trouble 13 | 14 | def test_maxnorm(self): 15 | from keras.constraints import maxnorm 16 | 17 | for m in self.some_values: 18 | norm_instance = maxnorm(m) 19 | normed = norm_instance(self.example_array) 20 | assert (np.all(normed.eval() < m)) 21 | 22 | # a more explicit example 23 | norm_instance = maxnorm(2.0) 24 | x = np.array([[0, 0, 0], [1.0, 0, 0], [3, 0, 0], [3, 3, 3]]).T 25 | x_normed_target = np.array([[0, 0, 0], [1.0, 0, 0], [2.0, 0, 0], [2./np.sqrt(3), 2./np.sqrt(3), 2./np.sqrt(3)]]).T 26 | x_normed_actual = norm_instance(x).eval() 27 | assert_allclose(x_normed_actual, x_normed_target) 28 | 29 | def test_nonneg(self): 30 | from keras.constraints import nonneg 31 | 32 | nonneg_instance = nonneg() 33 | 34 | normed = nonneg_instance(self.example_array) 35 | assert (np.all(np.min(normed.eval(), axis=1) == 0.)) 36 | 37 | def test_identity(self): 38 | from keras.constraints import identity 39 | 40 | identity_instance = identity() 41 | 42 | normed = identity_instance(self.example_array) 43 | assert (np.all(normed == self.example_array)) 44 | 45 | def test_identity_oddballs(self): 46 | """ 47 | test the identity constraint on some more exotic input. 48 | this does not need to pass for the desired real life behaviour, 49 | but it should in the current implementation. 50 | """ 51 | from keras.constraints import identity 52 | identity_instance = identity() 53 | 54 | oddball_examples = ["Hello", [1], -1, None] 55 | assert(oddball_examples == identity_instance(oddball_examples)) 56 | 57 | def test_unitnorm(self): 58 | from keras.constraints import unitnorm 59 | unitnorm_instance = unitnorm() 60 | 61 | normalized = unitnorm_instance(self.example_array) 62 | 63 | norm_of_normalized = np.sqrt(np.sum(normalized.eval()**2, axis=1)) 64 | difference = norm_of_normalized - 1. #in the unit norm constraint, it should be equal to 1. 65 | largest_difference = np.max(np.abs(difference)) 66 | self.assertAlmostEqual(largest_difference, 0.) 67 | 68 | if __name__ == '__main__': 69 | unittest.main() 70 | -------------------------------------------------------------------------------- /tests/auto/keras/test_activations.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import keras 4 | import theano 5 | import theano.tensor as T 6 | 7 | import numpy 8 | 9 | def list_assert_equal(a, b, round_to=7): 10 | ''' 11 | This will do a pairwise, rounded equality test across two lists of 12 | numbers. 13 | ''' 14 | pairs = zip(a, b) 15 | for i, j in pairs: 16 | assert round(i, round_to) == round(j, round_to) 17 | 18 | def get_standard_values(): 19 | ''' 20 | These are just a set of floats used for testing the activation 21 | functions, and are useful in multiple tests. 22 | ''' 23 | 24 | return [0,0.1,0.5,0.9,1.0] 25 | 26 | def test_softmax(): 27 | 28 | from keras.activations import softmax as s 29 | 30 | # Test using a reference implementation of softmax 31 | def softmax(values): 32 | m = max(values) 33 | values = numpy.array(values) 34 | e = numpy.exp(values - m) 35 | dist = list(e / numpy.sum(e)) 36 | 37 | return dist 38 | 39 | x = T.vector() 40 | exp = s(x) 41 | f = theano.function([x], exp) 42 | test_values=get_standard_values() 43 | 44 | result = f(test_values) 45 | expected = softmax(test_values) 46 | 47 | print(str(result)) 48 | print(str(expected)) 49 | 50 | list_assert_equal(result, expected) 51 | 52 | def test_relu(): 53 | ''' 54 | Relu implementation doesn't depend on the value being 55 | a theano variable. Testing ints, floats and theano tensors. 56 | ''' 57 | 58 | from keras.activations import relu as r 59 | 60 | assert r(5) == 5 61 | assert r(-5) == 0 62 | assert r(-0.1) == 0 63 | assert r(0.1) == 0.1 64 | 65 | x = T.vector() 66 | exp = r(x) 67 | f = theano.function([x], exp) 68 | 69 | test_values = get_standard_values() 70 | result = f(test_values) 71 | 72 | list_assert_equal(result, test_values) # because no negatives in test values 73 | 74 | 75 | def test_tanh(): 76 | 77 | from keras.activations import tanh as t 78 | test_values = get_standard_values() 79 | 80 | x = T.vector() 81 | exp = t(x) 82 | f = theano.function([x], exp) 83 | 84 | result = f(test_values) 85 | expected = [math.tanh(v) for v in test_values] 86 | 87 | print(result) 88 | print(expected) 89 | 90 | list_assert_equal(result, expected) 91 | 92 | 93 | def test_linear(): 94 | ''' 95 | This function does no input validation, it just returns the thing 96 | that was passed in. 97 | ''' 98 | 99 | from keras.activations import linear as l 100 | 101 | xs = [1, 5, True, None, 'foo'] 102 | 103 | for x in xs: 104 | assert x == l(x) 105 | -------------------------------------------------------------------------------- /keras/layers/advanced_activations.py: -------------------------------------------------------------------------------- 1 | from .. import initializations 2 | from ..layers.core import Layer, MaskedLayer 3 | from ..utils.theano_utils import shared_zeros, shared_ones, sharedX 4 | import theano.tensor as T 5 | import numpy as np 6 | 7 | 8 | class LeakyReLU(MaskedLayer): 9 | def __init__(self, alpha=0.3): 10 | super(LeakyReLU, self).__init__() 11 | self.alpha = alpha 12 | 13 | def get_output(self, train): 14 | X = self.get_input(train) 15 | return ((X + abs(X)) / 2.0) + self.alpha * ((X - abs(X)) / 2.0) 16 | 17 | def get_config(self): 18 | return {"name": self.__class__.__name__, 19 | "alpha": self.alpha} 20 | 21 | 22 | class PReLU(MaskedLayer): 23 | ''' 24 | Reference: 25 | Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification 26 | http://arxiv.org/pdf/1502.01852v1.pdf 27 | ''' 28 | def __init__(self, input_shape, init='zero', weights=None): 29 | super(PReLU, self).__init__() 30 | self.init = initializations.get(init) 31 | self.alphas = self.init(input_shape) 32 | self.params = [self.alphas] 33 | self.input_shape = input_shape 34 | 35 | if weights is not None: 36 | self.set_weights(weights) 37 | 38 | def get_output(self, train): 39 | X = self.get_input(train) 40 | pos = ((X + abs(X)) / 2.0) 41 | neg = self.alphas * ((X - abs(X)) / 2.0) 42 | return pos + neg 43 | 44 | def get_config(self): 45 | return {"name": self.__class__.__name__, 46 | "input_shape": self.input_shape, 47 | "init": self.init.__name__} 48 | 49 | 50 | class ParametricSoftplus(MaskedLayer): 51 | ''' 52 | Parametric Softplus of the form: alpha * (1 + exp(beta * X)) 53 | 54 | Reference: 55 | Inferring Nonlinear Neuronal Computation Based on Physiologically Plausible Inputs 56 | http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1003143 57 | ''' 58 | def __init__(self, input_shape, alpha_init=0.2, beta_init=5.0, weights=None): 59 | 60 | super(ParametricSoftplus, self).__init__() 61 | self.alpha_init = alpha_init 62 | self.beta_init = beta_init 63 | self.alphas = sharedX(alpha_init * np.ones(input_shape)) 64 | self.betas = sharedX(beta_init * np.ones(input_shape)) 65 | self.params = [self.alphas, self.betas] 66 | self.input_shape = input_shape 67 | 68 | if weights is not None: 69 | self.set_weights(weights) 70 | 71 | def get_output(self, train): 72 | X = self.get_input(train) 73 | return T.nnet.softplus(self.betas * X) * self.alphas 74 | 75 | def get_config(self): 76 | return {"name": self.__class__.__name__, 77 | "input_shape": self.input_shape, 78 | "alpha_init": self.alpha_init, 79 | "beta_init": self.beta_init} 80 | -------------------------------------------------------------------------------- /examples/imdb_cnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import numpy as np 4 | np.random.seed(1337) # for reproducibility 5 | 6 | from keras.preprocessing import sequence 7 | from keras.optimizers import RMSprop 8 | from keras.models import Sequential 9 | from keras.layers.core import Dense, Dropout, Activation, Flatten 10 | from keras.layers.embeddings import Embedding 11 | from keras.layers.convolutional import Convolution1D, MaxPooling1D 12 | from keras.datasets import imdb 13 | 14 | ''' 15 | This example demonstrates the use of Convolution1D 16 | for text classification. 17 | 18 | Run on GPU: THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python imdb_cnn.py 19 | 20 | Get to 0.8330 test accuracy after 3 epochs. 100s/epoch on K520 GPU. 21 | ''' 22 | 23 | # set parameters: 24 | max_features = 5000 25 | maxlen = 100 26 | batch_size = 32 27 | embedding_dims = 100 28 | nb_filters = 250 29 | filter_length = 3 30 | hidden_dims = 250 31 | nb_epoch = 3 32 | 33 | print("Loading data...") 34 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, 35 | test_split=0.2) 36 | print(len(X_train), 'train sequences') 37 | print(len(X_test), 'test sequences') 38 | 39 | print("Pad sequences (samples x time)") 40 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 41 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 42 | print('X_train shape:', X_train.shape) 43 | print('X_test shape:', X_test.shape) 44 | 45 | print('Build model...') 46 | model = Sequential() 47 | 48 | # we start off with an efficient embedding layer which maps 49 | # our vocab indices into embedding_dims dimensions 50 | model.add(Embedding(max_features, embedding_dims)) 51 | model.add(Dropout(0.25)) 52 | 53 | # we add a Convolution1D, which will learn nb_filters 54 | # word group filters of size filter_length: 55 | model.add(Convolution1D(input_dim=embedding_dims, 56 | nb_filter=nb_filters, 57 | filter_length=filter_length, 58 | border_mode="valid", 59 | activation="relu", 60 | subsample_length=1)) 61 | 62 | # we use standard max pooling (halving the output of the previous layer): 63 | model.add(MaxPooling1D(pool_length=2)) 64 | 65 | # We flatten the output of the conv layer, so that we can add a vanilla dense layer: 66 | model.add(Flatten()) 67 | 68 | # Computing the output shape of a conv layer can be tricky; 69 | # for a good tutorial, see: http://cs231n.github.io/convolutional-networks/ 70 | output_size = nb_filters * (((maxlen - filter_length) / 1) + 1) / 2 71 | 72 | # We add a vanilla hidden layer: 73 | model.add(Dense(output_size, hidden_dims)) 74 | model.add(Dropout(0.25)) 75 | model.add(Activation('relu')) 76 | 77 | # We project onto a single unit output layer, and squash it with a sigmoid: 78 | model.add(Dense(hidden_dims, 1)) 79 | model.add(Activation('sigmoid')) 80 | 81 | model.compile(loss='binary_crossentropy', optimizer='rmsprop', class_mode="binary") 82 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, validation_data=(X_test, y_test)) 83 | -------------------------------------------------------------------------------- /docs/sources/preprocessing/image.md: -------------------------------------------------------------------------------- 1 | 2 | ## ImageDataGenerator 3 | 4 | ```python 5 | keras.preprocessing.image.ImageDataGenerator(featurewise_center=True, 6 | samplewise_center=False, 7 | featurewise_std_normalization=True, 8 | samplewise_std_normalization=False, 9 | zca_whitening=False, 10 | rotation_range=0., 11 | width_shift_range=0., 12 | height_shift_range=0., 13 | horizontal_flip=False, 14 | vertical_flip=False) 15 | ``` 16 | 17 | Generate batches of tensor image data with real-time data augmentation. 18 | 19 | - __Arguments__: 20 | - __featurewise_center__: Boolean. Set input mean to 0 over the dataset. 21 | - __samplewise_center__: Boolean. Set each sample mean to 0. 22 | - __featurewise_std_normalization__: Boolean. Divide inputs by std of the dataset. 23 | - __samplewise_std_normalization__: Boolean. Divide each input by its std. 24 | - __zca_whitening__: Boolean. Apply ZCA whitening. 25 | - __rotation_range__: Int. Degree range for random rotations. 26 | - __width_shift_range__: Float (fraction of total width). Range for random horizontal shifts. 27 | - __height_shift_range__: Float (fraction of total height). Range for random vertical shifts. 28 | - __horizontal_flip__: Boolean. Randomly flip inputs horizontally. 29 | - __vertical_flip__: Boolean. Randomly flip inputs vertically. 30 | 31 | - __Methods__: 32 | - __fit(X)__: Required if featurewise_center or featurewise_std_normalization or zca_whitening. Compute necessary quantities on some sample data. 33 | - __Arguments__: 34 | - __X__: sample data. 35 | - __augment__: Boolean (default: False). Whether to fit on randomly augmented samples. 36 | - __rounds__: int (default: 1). If augment, how many augmentation passes over the data to use. 37 | - __flow(X, y)__: 38 | - __Arguments__: 39 | - __X__: data. 40 | - __y__: labels. 41 | - __batch_size__: int (default: 32). 42 | - __shuffle__: boolean (defaut: False). 43 | - __save_to_dir__: None or str. This allows you to optimally specify a directory to which to save the augmented pictures being generated (useful for visualizing what you are doing). 44 | - __save_prefix__: str. Prefix to use for filenames of saved pictures. 45 | - __save_format__: one of "png", jpeg". 46 | 47 | - __Example__: 48 | ```python 49 | (X_train, y_train), (X_test, y_test) = cifar10.load_data(test_split=0.1) 50 | Y_train = np_utils.to_categorical(y_train, nb_classes) 51 | Y_test = np_utils.to_categorical(y_test, nb_classes) 52 | 53 | datagen = ImageDataGenerator( 54 | featurewise_center=True, 55 | featurewise_std_normalization=True, 56 | rotation_range=20, 57 | width_shift_range=0.2, 58 | height_shift_range=0.2, 59 | horizontal_flip=True) 60 | 61 | # compute quantities required for featurewise normalization 62 | # (std, mean, and principal components if ZCA whitening is applied) 63 | datagen.fit(X_train) 64 | 65 | for e in range(nb_epoch): 66 | print 'Epoch', e 67 | # batch train with realtime data augmentation 68 | for X_batch, Y_batch in datagen.flow(X_train, Y_train): 69 | loss = model.train(X_batch, Y_batch) 70 | ``` -------------------------------------------------------------------------------- /tests/manual/check_constraints.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import keras 4 | from keras.datasets import mnist 5 | import keras.models 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Dropout, Activation 8 | from keras.regularizers import l2, l1 9 | from keras.constraints import maxnorm, nonneg 10 | from keras.optimizers import SGD, Adam, RMSprop 11 | from keras.utils import np_utils, generic_utils 12 | import theano 13 | import theano.tensor as T 14 | import numpy as np 15 | import scipy 16 | 17 | batch_size = 100 18 | nb_classes = 10 19 | nb_epoch = 10 20 | 21 | # the data, shuffled and split between tran and test sets 22 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 23 | X_train=X_train.reshape(60000,784) 24 | X_test=X_test.reshape(10000,784) 25 | X_train = X_train.astype("float32") 26 | X_test = X_test.astype("float32") 27 | X_train /= 255 28 | X_test /= 255 29 | 30 | # convert class vectors to binary class matrices 31 | Y_train = np_utils.to_categorical(y_train, nb_classes) 32 | Y_test = np_utils.to_categorical(y_test, nb_classes) 33 | 34 | model = Sequential() 35 | model.add(Dense(784, 20, W_constraint=maxnorm(1))) 36 | model.add(Activation('relu')) 37 | model.add(Dropout(0.1)) 38 | model.add(Dense(20, 20, W_constraint=nonneg())) 39 | model.add(Activation('relu')) 40 | model.add(Dropout(0.1)) 41 | model.add(Dense(20, 10, W_constraint=maxnorm(1))) 42 | model.add(Activation('softmax')) 43 | 44 | 45 | rms = RMSprop() 46 | model.compile(loss='categorical_crossentropy', optimizer=rms) 47 | 48 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0) 49 | 50 | a=model.params[0].eval() 51 | if np.isclose(np.max(np.sqrt(np.sum(a**2, axis=0))),1): 52 | print('Maxnorm test passed') 53 | else: 54 | raise ValueError('Maxnorm test failed!') 55 | 56 | b=model.params[2].eval() 57 | if np.min(b)==0 and np.min(a)!=0: 58 | print('Nonneg test passed') 59 | else: 60 | raise ValueError('Nonneg test failed!') 61 | 62 | 63 | model = Sequential() 64 | model.add(Dense(784, 20)) 65 | model.add(Activation('relu')) 66 | model.add(Dense(20, 20, W_regularizer=l1(.01))) 67 | model.add(Activation('relu')) 68 | model.add(Dense(20, 10)) 69 | model.add(Activation('softmax')) 70 | 71 | 72 | rms = RMSprop() 73 | model.compile(loss='categorical_crossentropy', optimizer=rms) 74 | 75 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=20, show_accuracy=True, verbose=0) 76 | 77 | a=model.params[2].eval().reshape(400) 78 | (D, p1) = scipy.stats.kurtosistest(a) 79 | 80 | model = Sequential() 81 | model.add(Dense(784, 20)) 82 | model.add(Activation('relu')) 83 | model.add(Dense(20, 20, W_regularizer=l2(.01))) 84 | model.add(Activation('relu')) 85 | model.add(Dense(20, 10)) 86 | model.add(Activation('softmax')) 87 | 88 | 89 | rms = RMSprop() 90 | model.compile(loss='categorical_crossentropy', optimizer=rms) 91 | 92 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=20, show_accuracy=True, verbose=0) 93 | 94 | a=model.params[2].eval().reshape(400) 95 | (D, p2) = scipy.stats.kurtosistest(a) 96 | 97 | if p1<.01 and p2>.01: 98 | print('L1 and L2 regularization tests passed') 99 | else: 100 | raise ValueError('L1 and L2 regularization tests failed!') -------------------------------------------------------------------------------- /docs/sources/optimizers.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of optimizers 3 | 4 | An optimizer is one of the two arguments required for compiling a Keras model: 5 | 6 | ```python 7 | model = Sequential() 8 | model.add(Dense(20, 64, init='uniform')) 9 | model.add(Activation('tanh')) 10 | model.add(Activation('softmax')) 11 | 12 | sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) 13 | model.compile(loss='mean_squared_error', optimizer=sgd) 14 | ``` 15 | 16 | You can either instantiate an optimizer before passing it to `model.compile()` , as in the above example, or you can call it by its name. In the latter case, the default parameters for the optimizer will be used. 17 | 18 | ```python 19 | # pass optimizer by name: default parameters will be used 20 | model.compile(loss='mean_squared_error', optimizer='sgd') 21 | ``` 22 | 23 | --- 24 | 25 | ## Base class 26 | 27 | ```python 28 | keras.optimizers.Optimizer(**kwargs) 29 | ``` 30 | 31 | All optimizers descended from this class support the following keyword argument: 32 | 33 | - __clipnorm__: float >= 0. 34 | 35 | Note: this is base class for building optimizers, not an actual optimizer that can be used for training models. 36 | 37 | --- 38 | 39 | ## SGD 40 | 41 | ```python 42 | keras.optimizers.SGD(lr=0.01, momentum=0., decay=0., nesterov=False) 43 | ``` 44 | 45 | __Arguments__: 46 | 47 | - __lr__: float >= 0. Learning rate. 48 | - __momentum__: float >= 0. Parameter updates momentum. 49 | - __decay__: float >= 0. Learning rate decay over each update. 50 | - __nesterov__: boolean. Whether to apply Nesterov momentum. 51 | 52 | --- 53 | 54 | ## Adagrad 55 | 56 | ```python 57 | keras.optimizers.Adagrad(lr=0.01, epsilon=1e-6) 58 | ``` 59 | 60 | It is recommended to leave the parameters of this optimizer at their default values. 61 | 62 | __Arguments__: 63 | 64 | - __lr__: float >= 0. Learning rate. 65 | - __epsilon__: float >= 0. 66 | 67 | --- 68 | 69 | ## Adadelta 70 | 71 | ```python 72 | keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=1e-6) 73 | ``` 74 | 75 | It is recommended to leave the parameters of this optimizer at their default values. 76 | 77 | __Arguments__: 78 | 79 | - __lr__: float >= 0. Learning rate. It is recommended to leave it at the default value. 80 | - __rho__: float >= 0. 81 | - __epsilon__: float >= 0. Fuzz factor. 82 | 83 | For more info, see *"Adadelta: an adaptive learning rate method"* by Matthew Zeiler. 84 | 85 | --- 86 | 87 | ## RMSprop 88 | 89 | ```python 90 | keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-6) 91 | ``` 92 | 93 | It is recommended to leave the parameters of this optimizer at their default values. 94 | 95 | __Arguments__: 96 | 97 | - __lr__: float >= 0. Learning rate. 98 | - __rho__: float >= 0. 99 | - __epsilon__: float >= 0. Fuzz factor. 100 | 101 | --- 102 | 103 | ## Adam 104 | 105 | ```python 106 | keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8) 107 | ``` 108 | 109 | Adam optimizer, proposed by Kingma and Lei Ba in [Adam: A Method For Stochastic Optimization](http://arxiv.org/pdf/1412.6980v8.pdf). Default parameters are those suggested in the paper. 110 | 111 | __Arguments__: 112 | 113 | - __lr__: float >= 0. Learning rate. 114 | - __beta_1__, __beta_2__: floats, 0 < beta < 1. Generally close to 1. 115 | - __epsilon__: float >= 0. Fuzz factor. 116 | 117 | --- 118 | -------------------------------------------------------------------------------- /examples/mnist_irnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import numpy as np 4 | np.random.seed(1337) # for reproducibility 5 | 6 | from keras.datasets import mnist 7 | from keras.models import Sequential 8 | from keras.layers.core import Dense, Activation 9 | from keras.initializations import normal, identity 10 | from keras.layers.recurrent import SimpleRNN, LSTM 11 | from keras.optimizers import RMSprop 12 | from keras.utils import np_utils 13 | 14 | ''' 15 | This is a reproduction of the IRNN experiment 16 | with pixel-by-pixel sequential MNIST in 17 | "A Simple Way to Initialize Recurrent Networks of Rectified Linear Units " 18 | by Quoc V. Le, Navdeep Jaitly, Geoffrey E. Hinton 19 | 20 | arXiv:1504.00941v2 [cs.NE] 7 Apr 201 21 | http://arxiv.org/pdf/1504.00941v2.pdf 22 | 23 | Optimizer is replaced with RMSprop which yields more stable and steady 24 | improvement. 25 | 26 | Reaches 0.93 train/test accuracy after 900 epochs (which roughly corresponds 27 | to 1687500 steps in the original paper.) 28 | ''' 29 | 30 | batch_size = 32 31 | nb_classes = 10 32 | nb_epochs = 200 33 | hidden_units = 100 34 | 35 | learning_rate = 1e-6 36 | clip_norm = 1.0 37 | BPTT_truncate = 28*28 38 | 39 | # the data, shuffled and split between train and test sets 40 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 41 | 42 | X_train = X_train.reshape(X_train.shape[0], -1, 1) 43 | X_test = X_test.reshape(X_test.shape[0], -1, 1) 44 | X_train = X_train.astype("float32") 45 | X_test = X_test.astype("float32") 46 | X_train /= 255 47 | X_test /= 255 48 | print('X_train shape:', X_train.shape) 49 | print(X_train.shape[0], 'train samples') 50 | print(X_test.shape[0], 'test samples') 51 | 52 | # convert class vectors to binary class matrices 53 | Y_train = np_utils.to_categorical(y_train, nb_classes) 54 | Y_test = np_utils.to_categorical(y_test, nb_classes) 55 | 56 | print('Evaluate IRNN...') 57 | model = Sequential() 58 | model.add(SimpleRNN(input_dim=1, output_dim=hidden_units, 59 | init=lambda shape: normal(shape, scale=0.001), 60 | inner_init=lambda shape: identity(shape, scale=1.0), 61 | activation='relu', truncate_gradient=BPTT_truncate)) 62 | model.add(Dense(hidden_units, nb_classes)) 63 | model.add(Activation('softmax')) 64 | rmsprop = RMSprop(lr=learning_rate) 65 | model.compile(loss='categorical_crossentropy', optimizer=rmsprop) 66 | 67 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epochs, 68 | show_accuracy=True, verbose=1, validation_data=(X_test, Y_test)) 69 | 70 | scores = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0) 71 | print('IRNN test score:', scores[0]) 72 | print('IRNN test accuracy:', scores[1]) 73 | 74 | print('Compare to LSTM...') 75 | model = Sequential() 76 | model.add(LSTM(1, hidden_units)) 77 | model.add(Dense(hidden_units, nb_classes)) 78 | model.add(Activation('softmax')) 79 | rmsprop = RMSprop(lr=learning_rate) 80 | model.compile(loss='categorical_crossentropy', optimizer=rmsprop) 81 | 82 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epochs, 83 | show_accuracy=True, verbose=1, validation_data=(X_test, Y_test)) 84 | 85 | scores = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0) 86 | print('LSTM test score:', scores[0]) 87 | print('LSTM test accuracy:', scores[1]) 88 | -------------------------------------------------------------------------------- /docs/sources/preprocessing/sequence.md: -------------------------------------------------------------------------------- 1 | ## pad_sequences 2 | 3 | ```python 4 | keras.preprocessing.sequence.pad_sequences(sequences, maxlen=None, dtype='int32') 5 | ``` 6 | 7 | Transform a list of `nb_samples sequences` (lists of scalars) into a 2D numpy array of shape `(nb_samples, nb_timesteps)`. `nb_timesteps` is either the `maxlen` argument if provided, or the length of the longest sequence otherwise. Sequences that are shorter than `nb_timesteps` are padded with zeros at the end. 8 | 9 | - __Return__: 2D numpy array of shape `(nb_samples, nb_timesteps)`. 10 | 11 | - __Arguments__: 12 | - __sequences__: List of lists of int or float. 13 | - __maxlen__: None or int. Maximum sequence length, longer sequences are truncated and shorter sequences are padded with zeros at the end. 14 | - __dtype__: datatype of the numpy array returned. 15 | 16 | --- 17 | 18 | ## skipgrams 19 | 20 | ```python 21 | keras.preprocessing.sequence.skipgrams(sequence, vocabulary_size, 22 | window_size=4, negative_samples=1., shuffle=True, 23 | categorical=False, sampling_table=None) 24 | ``` 25 | 26 | Transforms a sequence of word indexes (list of int) into couples of the form: 27 | 28 | - (word, word in the same window), with label 1 (positive samples). 29 | - (word, random word from the vocabulary), with label 0 (negative samples). 30 | 31 | Read more about Skipgram in this gnomic paper by Mikolov et al.: [Efficient Estimation of Word Representations in 32 | Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf) 33 | 34 | - __Return__: tuple `(couples, labels)`. 35 | - `couples` is a list of 2-elements lists of int: `[word_index, other_word_index]`. 36 | - `labels` is a list of 0 and 1, where 1 indicates that `other_word_index` was found in the same window as `word_index`, and 0 indicates that `other_word_index` was random. 37 | - if categorical is set to True, the labels are categorical, ie. 1 becomes [0,1], and 0 becomes [1, 0]. 38 | 39 | - __Arguments__: 40 | - __sequence__: list of int indexes. If using a sampling_table, the index of a word should be its the rank in the dataset (starting at 1). 41 | - __vocabulary_size__: int. 42 | - __window_size__: int. maximum distance between two words in a positive couple. 43 | - __negative_samples__: float >= 0. 0 for no negative (=random) samples. 1 for same number as positive samples. etc. 44 | - __shuffle__: boolean. Whether to shuffle the samples. 45 | - __categorical__: boolean. Whether to make the returned labels categorical. 46 | - __sampling_table__: numpy array of shape `(vocabulary_size,)` where `sampling_table[i]` is the probability of sampling the word with index i (assumed to be i-th most common word in the dataset). 47 | 48 | 49 | --- 50 | 51 | ## make_sampling_table 52 | 53 | ```python 54 | keras.preprocessing.sequence.make_sampling_table(size, sampling_factor=1e-5) 55 | ``` 56 | 57 | Used for generating the `sampling_table` argument for `skipgrams`. `sampling_table[i]` is the probability of sampling the word i-th most common word in a dataset (more common words should be sampled less frequently, for balance). 58 | 59 | - __Return__: numpy array of shape `(size,)`. 60 | 61 | - __Arguments__: 62 | - __size__: size of the vocabulary considered. 63 | - __sampling_factor__: lower values result in a longer probability decay (common words will be sampled less frequently). If set to 1, no subsampling will be performed (all sampling probabilities will be 1). 64 | -------------------------------------------------------------------------------- /docs/sources/preprocessing/text.md: -------------------------------------------------------------------------------- 1 | 2 | ## text_to_word_sequence 3 | 4 | ```python 5 | keras.preprocessing.text.text_to_word_sequence(text, 6 | filters=base_filter(), lower=True, split=" ") 7 | ``` 8 | 9 | Split a sentence into a list of words. 10 | 11 | - __Return__: List of words (str). 12 | 13 | - __Arguments__: 14 | - __text__: str. 15 | - __filters__: list (or concatenation) of characters to filter out, such as punctuation. Default: base_filter(), includes basic punctuation, tabs, and newlines. 16 | - __lower__: boolean. Whether to set the text to lowercase. 17 | - __split__: str. Separator for word splitting. 18 | 19 | ## one_hot 20 | 21 | ```python 22 | keras.preprocessing.text.one_hot(text, n, 23 | filters=base_filter(), lower=True, split=" ") 24 | ``` 25 | 26 | One-hot encode a text into a list of word indexes in a vocabulary of size n. 27 | 28 | - __Return__: List of integers in [1, n]. Each integer encodes a word (unicity non-guaranteed). 29 | 30 | - __Arguments__: Same as `text_to_word_sequence` above. 31 | - __n__: int. Size of vocabulary. 32 | 33 | ## Tokenizer 34 | 35 | ```python 36 | keras.preprocessing.text.Tokenizer(nb_words=None, filters=base_filter(), 37 | lower=True, split=" ") 38 | ``` 39 | 40 | Class for vectorizing texts, or/and turning texts into sequences (=list of word indexes, where the word of rank i in the dataset (starting at 1) has index i). 41 | 42 | - __Arguments__: Same as `text_to_word_sequence` above. 43 | - __nb_words__: None or int. Maximum number of words to work with (if set, tokenization will be restricted to the top nb_words most common words in the dataset). 44 | 45 | - __Methods__: 46 | 47 | - __fit_on_texts(texts)__: 48 | - __Arguments__: 49 | - __texts__: list of texts to train on. 50 | 51 | - __texts_to_sequences(texts)__ 52 | - __Arguments__: 53 | - __texts__: list of texts to turn to sequences. 54 | - __Return__: list of sequences (one per text input). 55 | 56 | - __texts_to_sequences_generator(texts)__: generator version of the above. 57 | - __Return__: yield one sequence per input text. 58 | 59 | - __texts_to_matrix(texts)__: 60 | - __Return__: numpy array of shape `(len(texts), nb_words)`. 61 | - __Arguments__: 62 | - __texts__: list of texts to vectorize. 63 | - __mode__: one of "binary", "count", "tfidf", "freq" (default: "binary"). 64 | 65 | - __fit_on_sequences(sequences)__: 66 | - __Arguments__: 67 | - __sequences__: list of sequences to train on. 68 | 69 | - __sequences_to_matrix(sequences)__: 70 | - __Return__: numpy array of shape `(len(sequences), nb_words)`. 71 | - __Arguments__: 72 | - __sequences__: list of sequences to vectorize. 73 | - __mode__: one of "binary", "count", "tfidf", "freq" (default: "binary"). 74 | 75 | - __Attributes__: 76 | - __word_counts__: dictionary mapping words (str) to the number of times they appeared on during fit. Only set after fit_on_texts was called. 77 | - __word_docs__: dictionary mapping words (str) to the number of documents/texts they appeared on during fit. Only set after fit_on_texts was called. 78 | - __word_index__: dictionary mapping words (str) to their rank/index (int). Only set after fit_on_texts was called. 79 | - __document_count__: int. Number of documents (texts/sequences) the tokenizer was trained on. Only set after fit_on_texts or fit_on_sequences was called. 80 | 81 | 82 | -------------------------------------------------------------------------------- /docs/sources/layers/embeddings.md: -------------------------------------------------------------------------------- 1 | 2 | ## Embedding 3 | 4 | ```python 5 | keras.layers.embeddings.Embedding(input_dim, output_dim, init='uniform', weights=None, W_regularizer=None, W_constraint=None, mask_zero=False) 6 | ``` 7 | 8 | Turn positive integers (indexes) into denses vectors of fixed size, 9 | eg. `[[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]` 10 | 11 | - __Input shape__: 2D tensor with shape: `(nb_samples, maxlen)`. 12 | 13 | - __Output shape__: 3D tensor with shape: `(nb_samples, maxlen, output_dim)`. 14 | 15 | - __Arguments__: 16 | 17 | - __input_dim__: int >= 0. Size of the vocabulary, ie. 1+maximum integer index occuring in the input data. 18 | - __output_dim__: int >= 0. Dimension of the dense embedding. 19 | - __init__: name of initialization function for the weights of the layer (see: [initializations](../initializations.md)), or alternatively, Theano function to use for weights initialization. This parameter is only relevant if you don't pass a `weights` argument. 20 | - __weights__: list of numpy arrays to set as initial weights. The list should have 1 element, of shape `(input_dim, output_dim)`. 21 | - __W_regularizer__: instance of the [regularizers](../regularizers.md) module (eg. L1 or L2 regularization), applied to the embedding matrix. 22 | - __W_constraint__: instance of the [constraints](../constraints.md) module (eg. maxnorm, nonneg), applied to the embedding matrix. 23 | - __mask_zero__: Whether or not the input value 0 is a special "padding" value that should be masked out. This is useful for [recurrent layers](recurrent.md) which may take variable length input. If this is `True` then all subsequent layers in the model need to support masking or an exception will be raised. 24 | 25 | 26 | ## WordContextProduct 27 | 28 | ```python 29 | keras.layers.embeddings.WordContextProduct(input_dim, proj_dim=128, 30 | init='uniform', activation='sigmoid', weights=None) 31 | ``` 32 | 33 | This layer turns a pair of words (a pivot word + a context word, ie. a word from the same context as a pivot, or a random, out-of-context word), indentified by their indices in a vocabulary, into two dense reprensentations (word representation and context representation). 34 | 35 | Then it returns `activation(dot(pivot_embedding, context_embedding))`, which can be trained to encode the probability of finding the context word in the context of the pivot word (or reciprocally depending on your training procedure). 36 | 37 | For more context, see Mikolov et al.: [Efficient Estimation of Word reprensentations in Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf) 38 | 39 | - __Input shape__: 2D tensor with shape: `(nb_samples, 2)`. 40 | 41 | - __Output shape__: 2D tensor with shape: `(nb_samples, 1)`. 42 | 43 | - __Arguments__: 44 | 45 | - __input_dim__: int >= 0. Size of the vocabulary, ie. 1+maximum integer index occuring in the input data. 46 | - __proj_dim__: int >= 0. Dimension of the dense embedding used internally. 47 | - __init__: name of initialization function for the embeddings (see: [initializations](../initializations.md)), or alternatively, Theano function to use for weights initialization. This parameter is only relevant if you don't pass a `weights` argument. 48 | - __activation__: name of activation function to use (see: [activations](../activations.md)), or alternatively, elementwise Theano function. 49 | - __weights__: list of numpy arrays to set as initial weights. The list should have 2 element, both of shape `(input_dim, proj_dim)`. The first element is the word embedding weights, the second one is the context embedding weights. 50 | -------------------------------------------------------------------------------- /tests/manual/check_yaml.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import numpy as np 4 | 5 | from keras.utils.test_utils import get_test_data 6 | from keras.preprocessing import sequence 7 | from keras.optimizers import SGD, RMSprop, Adagrad 8 | from keras.utils import np_utils 9 | from keras.models import Sequential, Graph 10 | from keras.layers.core import Dense, Dropout, Activation, Merge 11 | from keras.layers.embeddings import Embedding 12 | from keras.layers.recurrent import LSTM, GRU 13 | from keras.datasets import imdb 14 | from keras.models import model_from_yaml 15 | 16 | ''' 17 | This is essentially the IMDB test. Deserialized models should yield 18 | the same config as the original one. 19 | ''' 20 | 21 | max_features = 10000 22 | maxlen = 100 23 | batch_size = 32 24 | 25 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, test_split=0.2) 26 | 27 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 28 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 29 | 30 | model = Sequential() 31 | model.add(Embedding(max_features, 128)) 32 | model.add(LSTM(128, 128)) 33 | model.add(Dropout(0.5)) 34 | model.add(Dense(128, 1, W_regularizer='identity', b_constraint='maxnorm')) 35 | model.add(Activation('sigmoid')) 36 | 37 | model.get_config(verbose=1) 38 | 39 | ##################################### 40 | # save model w/o parameters to yaml # 41 | ##################################### 42 | 43 | yaml_no_params = model.to_yaml() 44 | 45 | no_param_model = model_from_yaml(yaml_no_params) 46 | no_param_model.get_config(verbose=1) 47 | 48 | ###################################### 49 | # save multi-branch sequential model # 50 | ###################################### 51 | 52 | seq = Sequential() 53 | seq.add(Merge([model, model], mode='sum')) 54 | seq.get_config(verbose=1) 55 | merge_yaml = seq.to_yaml() 56 | merge_model = model_from_yaml(merge_yaml) 57 | 58 | large_model = Sequential() 59 | large_model.add(Merge([seq,model], mode='concat')) 60 | large_model.get_config(verbose=1) 61 | large_model.to_yaml() 62 | 63 | #################### 64 | # save graph model # 65 | #################### 66 | 67 | X = np.random.random((100, 32)) 68 | X2 = np.random.random((100, 32)) 69 | y = np.random.random((100, 4)) 70 | y2 = np.random.random((100,)) 71 | 72 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(32,), 73 | classification=False, output_shape=(4,)) 74 | 75 | graph = Graph() 76 | 77 | graph.add_input(name='input1', ndim=2) 78 | 79 | graph.add_node(Dense(32, 16), name='dense1', input='input1') 80 | graph.add_node(Dense(32, 4), name='dense2', input='input1') 81 | graph.add_node(Dense(16, 4), name='dense3', input='dense1') 82 | 83 | graph.add_output(name='output1', inputs=['dense2', 'dense3'], merge_mode='sum') 84 | graph.compile('rmsprop', {'output1': 'mse'}) 85 | 86 | graph.get_config(verbose=1) 87 | 88 | history = graph.fit({'input1': X_train, 'output1': y_train}, nb_epoch=10) 89 | original_pred = graph.predict({'input1': X_test}) 90 | 91 | graph_yaml = graph.to_yaml() 92 | graph.save_weights('temp.h5', overwrite=True) 93 | 94 | reloaded_graph = model_from_yaml(graph_yaml) 95 | reloaded_graph.load_weights('temp.h5') 96 | reloaded_graph.get_config(verbose=1) 97 | 98 | reloaded_graph.compile('rmsprop', {'output1': 'mse'}) 99 | new_pred = reloaded_graph.predict({'input1': X_test}) 100 | 101 | assert(np.sum(new_pred['output1'] - original_pred['output1']) == 0) 102 | -------------------------------------------------------------------------------- /examples/lstm_text_generation.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from keras.models import Sequential 3 | from keras.layers.core import Dense, Activation, Dropout 4 | from keras.layers.recurrent import LSTM 5 | from keras.datasets.data_utils import get_file 6 | import numpy as np 7 | import random, sys 8 | 9 | ''' 10 | Example script to generate text from Nietzsche's writings. 11 | 12 | At least 20 epochs are required before the generated text 13 | starts sounding coherent. 14 | 15 | It is recommended to run this script on GPU, as recurrent 16 | networks are quite computationally intensive. 17 | 18 | If you try this script on new data, make sure your corpus 19 | has at least ~100k characters. ~1M is better. 20 | ''' 21 | 22 | path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt") 23 | text = open(path).read().lower() 24 | print('corpus length:', len(text)) 25 | 26 | chars = set(text) 27 | print('total chars:', len(chars)) 28 | char_indices = dict((c, i) for i, c in enumerate(chars)) 29 | indices_char = dict((i, c) for i, c in enumerate(chars)) 30 | 31 | # cut the text in semi-redundant sequences of maxlen characters 32 | maxlen = 20 33 | step = 3 34 | sentences = [] 35 | next_chars = [] 36 | for i in range(0, len(text) - maxlen, step): 37 | sentences.append(text[i : i + maxlen]) 38 | next_chars.append(text[i + maxlen]) 39 | print('nb sequences:', len(sentences)) 40 | 41 | print('Vectorization...') 42 | X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) 43 | y = np.zeros((len(sentences), len(chars)), dtype=np.bool) 44 | for i, sentence in enumerate(sentences): 45 | for t, char in enumerate(sentence): 46 | X[i, t, char_indices[char]] = 1 47 | y[i, char_indices[next_chars[i]]] = 1 48 | 49 | 50 | # build the model: 2 stacked LSTM 51 | print('Build model...') 52 | model = Sequential() 53 | model.add(LSTM(len(chars), 512, return_sequences=True)) 54 | model.add(Dropout(0.2)) 55 | model.add(LSTM(512, 512, return_sequences=False)) 56 | model.add(Dropout(0.2)) 57 | model.add(Dense(512, len(chars))) 58 | model.add(Activation('softmax')) 59 | 60 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 61 | 62 | # helper function to sample an index from a probability array 63 | def sample(a, temperature=1.0): 64 | a = np.log(a)/temperature 65 | a = np.exp(a)/np.sum(np.exp(a)) 66 | return np.argmax(np.random.multinomial(1,a,1)) 67 | 68 | # train the model, output generated text after each iteration 69 | for iteration in range(1, 60): 70 | print() 71 | print('-' * 50) 72 | print('Iteration', iteration) 73 | model.fit(X, y, batch_size=128, nb_epoch=1) 74 | 75 | start_index = random.randint(0, len(text) - maxlen - 1) 76 | 77 | for diversity in [0.2, 0.5, 1.0, 1.2]: 78 | print() 79 | print('----- diversity:', diversity) 80 | 81 | generated = '' 82 | sentence = text[start_index : start_index + maxlen] 83 | generated += sentence 84 | print('----- Generating with seed: "' + sentence + '"') 85 | sys.stdout.write(generated) 86 | 87 | for iteration in range(400): 88 | x = np.zeros((1, maxlen, len(chars))) 89 | for t, char in enumerate(sentence): 90 | x[0, t, char_indices[char]] = 1. 91 | 92 | preds = model.predict(x, verbose=0)[0] 93 | next_index = sample(preds, diversity) 94 | next_char = indices_char[next_index] 95 | 96 | generated += next_char 97 | sentence = sentence[1:] + next_char 98 | 99 | sys.stdout.write(next_char) 100 | sys.stdout.flush() 101 | print() 102 | -------------------------------------------------------------------------------- /keras/layers/normalization.py: -------------------------------------------------------------------------------- 1 | from ..layers.core import Layer 2 | from ..utils.theano_utils import shared_zeros, shared_ones, ndim_tensor 3 | from .. import initializations 4 | 5 | import theano.tensor as T 6 | 7 | 8 | class BatchNormalization(Layer): 9 | ''' 10 | Reference: 11 | Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift 12 | http://arxiv.org/pdf/1502.03167v3.pdf 13 | 14 | mode: 0 -> featurewise normalization 15 | 1 -> samplewise normalization (may sometimes outperform featurewise mode) 16 | 17 | momentum: momentum term in the computation of a running estimate of the mean and std of the data 18 | ''' 19 | def __init__(self, input_shape, epsilon=1e-6, mode=0, momentum=0.9, weights=None): 20 | super(BatchNormalization, self).__init__() 21 | self.init = initializations.get("uniform") 22 | self.input_shape = input_shape 23 | self.epsilon = epsilon 24 | self.mode = mode 25 | self.momentum = momentum 26 | self.input = ndim_tensor(len(self.input_shape) + 1) 27 | 28 | self.gamma = self.init((self.input_shape)) 29 | self.beta = shared_zeros(self.input_shape) 30 | 31 | self.params = [self.gamma, self.beta] 32 | if weights is not None: 33 | self.set_weights(weights) 34 | 35 | def init_updates(self): 36 | self.running_mean = shared_zeros(self.input_shape) 37 | self.running_std = shared_ones((self.input_shape)) 38 | X = self.get_input(train=True) 39 | m = X.mean(axis=0) 40 | std = T.mean((X - m) ** 2 + self.epsilon, axis=0) ** 0.5 41 | mean_update = self.momentum * self.running_mean + (1-self.momentum) * m 42 | std_update = self.momentum * self.running_std + (1-self.momentum) * std 43 | self.updates = [(self.running_mean, mean_update), (self.running_std, std_update)] 44 | 45 | def get_output(self, train): 46 | X = self.get_input(train) 47 | 48 | if self.mode == 0: 49 | X_normed = (X - self.running_mean) / (self.running_std + self.epsilon) 50 | 51 | elif self.mode == 1: 52 | m = X.mean(axis=-1, keepdims=True) 53 | std = X.std(axis=-1, keepdims=True) 54 | X_normed = (X - m) / (std + self.epsilon) 55 | 56 | out = self.gamma * X_normed + self.beta 57 | return out 58 | 59 | def get_config(self): 60 | return {"name": self.__class__.__name__, 61 | "input_shape": self.input_shape, 62 | "epsilon": self.epsilon, 63 | "mode": self.mode} 64 | 65 | 66 | class LRN2D(Layer): 67 | """ 68 | This code is adapted from pylearn2. 69 | License at: https://github.com/lisa-lab/pylearn2/blob/master/LICENSE.txt 70 | """ 71 | 72 | def __init__(self, alpha=1e-4, k=2, beta=0.75, n=5): 73 | if n % 2 == 0: 74 | raise NotImplementedError("LRN2D only works with odd n. n provided: " + str(n)) 75 | super(LRN2D, self).__init__() 76 | self.alpha = alpha 77 | self.k = k 78 | self.beta = beta 79 | self.n = n 80 | 81 | def get_output(self, train): 82 | X = self.get_input(train) 83 | b, ch, r, c = X.shape 84 | half_n = self.n // 2 85 | input_sqr = T.sqr(X) 86 | extra_channels = T.alloc(0., b, ch + 2*half_n, r, c) 87 | input_sqr = T.set_subtensor(extra_channels[:, half_n:half_n+ch, :, :], input_sqr) 88 | scale = self.k 89 | for i in range(self.n): 90 | scale += self.alpha * input_sqr[:, i:i+ch, :, :] 91 | scale = scale ** self.beta 92 | return X / scale 93 | 94 | def get_config(self): 95 | return {"name": self.__class__.__name__, 96 | "alpha": self.alpha, 97 | "k": self.k, 98 | "beta": self.beta, 99 | "n": self.n} 100 | -------------------------------------------------------------------------------- /tests/manual/check_wrappers.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from keras.datasets import mnist 4 | from keras.models import Sequential 5 | from keras.layers.core import Dense, Activation 6 | from keras.utils import np_utils 7 | from keras.wrappers.scikit_learn import * 8 | import numpy as np 9 | 10 | batch_size = 128 11 | nb_epoch = 1 12 | 13 | nb_classes = 10 14 | max_train_samples = 5000 15 | max_test_samples = 1000 16 | 17 | np.random.seed(1337) # for reproducibility 18 | 19 | ############################################ 20 | # scikit-learn classification wrapper test # 21 | ############################################ 22 | print('Beginning scikit-learn classification wrapper test') 23 | 24 | print('Loading data') 25 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 26 | 27 | X_train = X_train.reshape(60000, 784)[:max_train_samples] 28 | X_test = X_test.reshape(10000, 784)[:max_test_samples] 29 | X_train = X_train.astype('float32') 30 | X_test = X_test.astype('float32') 31 | X_train /= 255 32 | X_test /= 255 33 | 34 | Y_train = np_utils.to_categorical(y_train, nb_classes)[:max_train_samples] 35 | Y_test = np_utils.to_categorical(y_test, nb_classes)[:max_test_samples] 36 | 37 | print('Defining model') 38 | model = Sequential() 39 | model.add(Dense(784, 50)) 40 | model.add(Activation('relu')) 41 | model.add(Dense(50, 10)) 42 | model.add(Activation('softmax')) 43 | 44 | print('Creating wrapper') 45 | classifier = KerasClassifier(model, train_batch_size=batch_size, nb_epoch=nb_epoch) 46 | 47 | print('Fitting model') 48 | classifier.fit(X_train, Y_train) 49 | 50 | print('Testing score function') 51 | score = classifier.score(X_train, Y_train) 52 | print('Score: ', score) 53 | 54 | print('Testing predict function') 55 | preds = classifier.predict(X_test) 56 | print('Preds.shape: ', preds.shape) 57 | 58 | print('Testing predict proba function') 59 | proba = classifier.predict_proba(X_test) 60 | print('Proba.shape: ', proba.shape) 61 | 62 | print('Testing get params') 63 | print(classifier.get_params()) 64 | 65 | print('Testing set params') 66 | classifier.set_params(optimizer='sgd', loss='binary_crossentropy') 67 | print(classifier.get_params()) 68 | 69 | print('Testing attributes') 70 | print('Classes') 71 | print(classifier.classes_) 72 | print('Config') 73 | print(classifier.config_) 74 | print('Weights') 75 | print(classifier.weights_) 76 | print('Compiled model') 77 | print(classifier.compiled_model_) 78 | 79 | ######################################## 80 | # scikit-learn regression wrapper test # 81 | ######################################## 82 | print('Beginning scikit-learn regression wrapper test') 83 | 84 | print('Generating data') 85 | X_train = np.random.random((5000, 100)) 86 | X_test = np.random.random((1000, 100)) 87 | y_train = np.random.random(5000) 88 | y_test = np.random.random(1000) 89 | 90 | print('Defining model') 91 | model = Sequential() 92 | model.add(Dense(100, 50)) 93 | model.add(Activation('relu')) 94 | model.add(Dense(50, 1)) 95 | model.add(Activation('linear')) 96 | 97 | print('Creating wrapper') 98 | regressor = KerasRegressor(model, train_batch_size=batch_size, nb_epoch=nb_epoch) 99 | 100 | print('Fitting model') 101 | regressor.fit(X_train, y_train) 102 | 103 | print('Testing score function') 104 | score = regressor.score(X_train, y_train) 105 | print('Score: ', score) 106 | 107 | print('Testing predict function') 108 | preds = regressor.predict(X_test) 109 | print('Preds.shape: ', preds.shape) 110 | 111 | print('Testing get params') 112 | print(regressor.get_params()) 113 | 114 | print('Testing set params') 115 | regressor.set_params(optimizer='sgd', loss='mean_absolute_error') 116 | print(regressor.get_params()) 117 | 118 | print('Testing attributes') 119 | print('Config') 120 | print(regressor.config_) 121 | print('Weights') 122 | print(regressor.weights_) 123 | print('Compiled model') 124 | print(regressor.compiled_model_) 125 | 126 | print('Test script complete.') 127 | -------------------------------------------------------------------------------- /tests/auto/keras/test_normalization.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | from theano import tensor as T 5 | from keras.layers import normalization 6 | from keras.models import Sequential 7 | 8 | 9 | class TestBatchNormalization(unittest.TestCase): 10 | def setUp(self): 11 | self.input_1 = np.arange(10) 12 | self.input_2 = np.zeros(10) 13 | self.input_3 = np.ones((10)) 14 | 15 | self.input_shapes = [np.ones((10, 10)), np.ones((10, 10, 10))] 16 | 17 | def test_setup(self): 18 | norm_m0 = normalization.BatchNormalization((10, 10)) 19 | norm_m1 = normalization.BatchNormalization((10, 10), mode=1) 20 | 21 | # mode 3 does not exist 22 | self.assertRaises(Exception, normalization.BatchNormalization((10, 10), mode=3)) 23 | 24 | def test_mode_0(self): 25 | model = Sequential() 26 | norm_m0 = normalization.BatchNormalization((10,)) 27 | model.add(norm_m0) 28 | model.compile(loss='mse', optimizer='sgd') 29 | 30 | # centered on 5.0, variance 10.0 31 | X = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10)) 32 | model.fit(X, X, nb_epoch=5, verbose=0) 33 | norm_m0.input = X 34 | out = (norm_m0.get_output(train=True) - norm_m0.beta) / norm_m0.gamma 35 | 36 | self.assertAlmostEqual(out.mean().eval(), 0.0, places=1) 37 | self.assertAlmostEqual(out.std().eval(), 1.0, places=1) 38 | 39 | def test_mode_1(self): 40 | norm_m1 = normalization.BatchNormalization((10,), mode=1) 41 | norm_m1.init_updates() 42 | 43 | for inp in [self.input_1, self.input_2, self.input_3]: 44 | norm_m1.input = inp 45 | out = (norm_m1.get_output(train=True) - norm_m1.beta) / norm_m1.gamma 46 | self.assertAlmostEqual(out.mean().eval(), 0.0) 47 | if inp.std() > 0.: 48 | self.assertAlmostEqual(out.std().eval(), 1.0, places=2) 49 | else: 50 | self.assertAlmostEqual(out.std().eval(), 0.0, places=2) 51 | 52 | def test_shapes(self): 53 | """ 54 | Test batch normalization with various input shapes 55 | """ 56 | for inp in self.input_shapes: 57 | norm_m0 = normalization.BatchNormalization(inp.shape, mode=0) 58 | norm_m0.init_updates() 59 | norm_m0.input = inp 60 | out = (norm_m0.get_output(train=True) - norm_m0.beta) / norm_m0.gamma 61 | 62 | norm_m1 = normalization.BatchNormalization(inp.shape, mode=1) 63 | norm_m1.input = inp 64 | out = (norm_m1.get_output(train=True) - norm_m1.beta) / norm_m1.gamma 65 | 66 | def test_weight_init(self): 67 | """ 68 | Test weight initialization 69 | """ 70 | 71 | norm_m1 = normalization.BatchNormalization((10,), mode=1, weights=[np.ones(10), np.ones(10)]) 72 | norm_m1.init_updates() 73 | 74 | for inp in [self.input_1, self.input_2, self.input_3]: 75 | norm_m1.input = inp 76 | out = (norm_m1.get_output(train=True) - np.ones(10)) / 1. 77 | self.assertAlmostEqual(out.mean().eval(), 0.0) 78 | if inp.std() > 0.: 79 | self.assertAlmostEqual(out.std().eval(), 1.0, places=2) 80 | else: 81 | self.assertAlmostEqual(out.std().eval(), 0.0, places=2) 82 | 83 | assert_allclose(norm_m1.gamma.eval(), np.ones(10)) 84 | assert_allclose(norm_m1.beta.eval(), np.ones(10)) 85 | 86 | # Weights must be an iterable of gamma AND beta. 87 | self.assertRaises(Exception, normalization.BatchNormalization((10,)), weights=np.ones(10)) 88 | 89 | def test_config(self): 90 | norm = normalization.BatchNormalization((10, 10), mode=1, epsilon=0.1) 91 | conf = norm.get_config() 92 | conf_target = {"input_shape": (10, 10), "name": normalization.BatchNormalization.__name__, 93 | "epsilon": 0.1, "mode": 1} 94 | 95 | self.assertDictEqual(conf, conf_target) 96 | 97 | 98 | if __name__ == '__main__': 99 | unittest.main() 100 | -------------------------------------------------------------------------------- /examples/kaggle_otto_nn.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import numpy as np 5 | import pandas as pd 6 | np.random.seed(1337) # for reproducibility 7 | 8 | from keras.models import Sequential 9 | from keras.layers.core import Dense, Dropout, Activation 10 | from keras.layers.normalization import BatchNormalization 11 | from keras.layers.advanced_activations import PReLU 12 | from keras.utils import np_utils, generic_utils 13 | 14 | from sklearn.preprocessing import LabelEncoder 15 | from sklearn.preprocessing import StandardScaler 16 | 17 | ''' 18 | This demonstrates how to reach a score of 0.4890 (local validation) 19 | on the Kaggle Otto challenge, with a deep net using Keras. 20 | 21 | Compatible Python 2.7-3.4. Requires Scikit-Learn and Pandas. 22 | 23 | Recommended to run on GPU: 24 | Command: THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python kaggle_otto_nn.py 25 | On EC2 g2.2xlarge instance: 19s/epoch. 6-7 minutes total training time. 26 | 27 | Best validation score at epoch 21: 0.4881 28 | 29 | Try it at home: 30 | - with/without BatchNormalization (BatchNormalization helps!) 31 | - with ReLU or with PReLU (PReLU helps!) 32 | - with smaller layers, largers layers 33 | - with more layers, less layers 34 | - with different optimizers (SGD+momentum+decay is probably better than Adam!) 35 | 36 | Get the data from Kaggle: https://www.kaggle.com/c/otto-group-product-classification-challenge/data 37 | ''' 38 | 39 | 40 | def load_data(path, train=True): 41 | df = pd.read_csv(path) 42 | X = df.values.copy() 43 | if train: 44 | np.random.shuffle(X) # https://youtu.be/uyUXoap67N8 45 | X, labels = X[:, 1:-1].astype(np.float32), X[:, -1] 46 | return X, labels 47 | else: 48 | X, ids = X[:, 1:].astype(np.float32), X[:, 0].astype(str) 49 | return X, ids 50 | 51 | 52 | def preprocess_data(X, scaler=None): 53 | if not scaler: 54 | scaler = StandardScaler() 55 | scaler.fit(X) 56 | X = scaler.transform(X) 57 | return X, scaler 58 | 59 | 60 | def preprocess_labels(labels, encoder=None, categorical=True): 61 | if not encoder: 62 | encoder = LabelEncoder() 63 | encoder.fit(labels) 64 | y = encoder.transform(labels).astype(np.int32) 65 | if categorical: 66 | y = np_utils.to_categorical(y) 67 | return y, encoder 68 | 69 | 70 | def make_submission(y_prob, ids, encoder, fname): 71 | with open(fname, 'w') as f: 72 | f.write('id,') 73 | f.write(','.join([str(i) for i in encoder.classes_])) 74 | f.write('\n') 75 | for i, probs in zip(ids, y_prob): 76 | probas = ','.join([i] + [str(p) for p in probs.tolist()]) 77 | f.write(probas) 78 | f.write('\n') 79 | print("Wrote submission to file {}.".format(fname)) 80 | 81 | 82 | print("Loading data...") 83 | X, labels = load_data('train.csv', train=True) 84 | X, scaler = preprocess_data(X) 85 | y, encoder = preprocess_labels(labels) 86 | 87 | X_test, ids = load_data('test.csv', train=False) 88 | X_test, _ = preprocess_data(X_test, scaler) 89 | 90 | nb_classes = y.shape[1] 91 | print(nb_classes, 'classes') 92 | 93 | dims = X.shape[1] 94 | print(dims, 'dims') 95 | 96 | print("Building model...") 97 | 98 | model = Sequential() 99 | model.add(Dense(dims, 512, init='glorot_uniform')) 100 | model.add(PReLU((512,))) 101 | model.add(BatchNormalization((512,))) 102 | model.add(Dropout(0.5)) 103 | 104 | model.add(Dense(512, 512, init='glorot_uniform')) 105 | model.add(PReLU((512,))) 106 | model.add(BatchNormalization((512,))) 107 | model.add(Dropout(0.5)) 108 | 109 | model.add(Dense(512, 512, init='glorot_uniform')) 110 | model.add(PReLU((512,))) 111 | model.add(BatchNormalization((512,))) 112 | model.add(Dropout(0.5)) 113 | 114 | model.add(Dense(512, nb_classes, init='glorot_uniform')) 115 | model.add(Activation('softmax')) 116 | 117 | model.compile(loss='categorical_crossentropy', optimizer="adam") 118 | 119 | print("Training model...") 120 | 121 | model.fit(X, y, nb_epoch=20, batch_size=128, validation_split=0.15) 122 | 123 | print("Generating submission...") 124 | 125 | proba = model.predict_proba(X_test) 126 | make_submission(proba, ids, encoder, fname='keras-otto.csv') 127 | -------------------------------------------------------------------------------- /docs/sources/callbacks.md: -------------------------------------------------------------------------------- 1 | ## Usage of callbacks 2 | 3 | A callback is a set of functions to be applied at given stages of the training procedure. You can use callbacks to get a view on internal states and statistics of the model during training. You can pass a list of callbacks (as the keyword argument `callbacks`) to the `.fit()` method of the `Sequential` model. The relevant methods of the callbacks will then be called at each stage of the training. 4 | 5 | --- 6 | 7 | ## Base class 8 | 9 | ```python 10 | keras.callbacks.Callback() 11 | ``` 12 | - __Properties__: 13 | - __params__: dict. Training parameters (eg. verbosity, batch size, number of epochs...). 14 | - __model__: `keras.models.Model`. Reference of the model being trained. 15 | - __Methods__: 16 | - __on_train_begin__(logs={}): Method called at the beginning of training. 17 | - __on_train_end__(logs={}): Method called at the end of training. 18 | - __on_epoch_begin__(epoch, logs={}): Method called at the beginning of epoch `epoch`. 19 | - __on_epoch_end__(epoch, logs={}): Method called at the end of epoch `epoch`. 20 | - __on_batch_begin__(batch, logs={}): Method called at the beginning of batch `batch`. 21 | - __on_batch_end__(batch, logs={}): Method called at the end of batch `batch`. 22 | 23 | The `logs` dictionary will contain keys for quantities relevant to the current batch or epoch. Currently, the `.fit()` method of the `Sequential` model class will include the following quantities in the `logs` that it passes to its callbacks: 24 | - __on_epoch_end__: logs optionally include `val_loss` (if validation is enabled in `fit`), and `val_accuracy` (if validation and accuracy monitoring are enabled). 25 | - __on_batch_begin__: logs include `size`, the number of samples in the current batch. 26 | - __on_batch_end__: logs include `loss`, and optionally `accuracy` (if accuracy monitoring is enabled). 27 | 28 | --- 29 | 30 | ## Available callbacks 31 | 32 | ```python 33 | keras.callbacks.ModelCheckpoint(filepath, verbose=0, save_best_only=False) 34 | ``` 35 | 36 | Save the model after every epoch. If `save_best_only=True`, the latest best model according to the validation loss will not be overwritten. 37 | 38 | 39 | ```python 40 | keras.callbacks.EarlyStopping(monitor='val_loss', patience=0, verbose=0) 41 | ``` 42 | 43 | Stop training after no improvement of the metric `monitor` is seen for `patience` epochs. 44 | 45 | --- 46 | 47 | 48 | ## Create a callback 49 | 50 | You can create a custom callback by extending the base class `keras.callbacks.Callback`. A callback has access to its associated model through the class property `self.model`. 51 | 52 | Here's a simple example saving a list of losses over each batch during training: 53 | ```python 54 | class LossHistory(keras.callbacks.Callback): 55 | def on_train_begin(self, logs={}): 56 | self.losses = [] 57 | 58 | def on_batch_end(self, batch, logs={}): 59 | self.losses.append(logs.get('loss')) 60 | ``` 61 | 62 | --- 63 | 64 | ### Example: recording loss history 65 | 66 | ```python 67 | class LossHistory(keras.callbacks.Callback): 68 | def on_train_begin(self, logs={}): 69 | self.losses = [] 70 | 71 | def on_batch_end(self, batch, logs={}): 72 | self.losses.append(logs.get('loss')) 73 | 74 | model = Sequential() 75 | model.add(Dense(784, 10, init='uniform')) 76 | model.add(Activation('softmax')) 77 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 78 | 79 | history = LossHistory() 80 | model.fit(X_train, Y_train, batch_size=128, nb_epoch=20, verbose=0, callbacks=[history]) 81 | 82 | print history.losses 83 | # outputs 84 | ''' 85 | [0.66047596406559383, 0.3547245744908703, ..., 0.25953155204159617, 0.25901699725311789] 86 | ''' 87 | ``` 88 | 89 | --- 90 | 91 | ### Example: model checkpoints 92 | 93 | ```python 94 | from keras.callbacks import ModelCheckpoint 95 | 96 | model = Sequential() 97 | model.add(Dense(784, 10, init='uniform')) 98 | model.add(Activation('softmax')) 99 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 100 | 101 | ''' 102 | saves the model weights after each epoch if the validation loss decreased 103 | ''' 104 | checkpointer = ModelCheckpoint(filepath="/tmp/weights.hdf5", verbose=1, save_best_only=True) 105 | model.fit(X_train, Y_train, batch_size=128, nb_epoch=20, verbose=0, validation_data=(X_test, Y_test), callbacks=[checkpointer]) 106 | 107 | ``` 108 | 109 | -------------------------------------------------------------------------------- /keras/utils/generic_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import time 4 | import sys 5 | import six 6 | 7 | 8 | def get_from_module(identifier, module_params, module_name, instantiate=False, kwargs=None): 9 | if isinstance(identifier, six.string_types): 10 | res = module_params.get(identifier) 11 | if not res: 12 | raise Exception('Invalid ' + str(module_name) + ': ' + str(identifier)) 13 | if instantiate and not kwargs: 14 | return res() 15 | elif instantiate and kwargs: 16 | return res(**kwargs) 17 | else: 18 | return res 19 | return identifier 20 | 21 | 22 | def make_tuple(*args): 23 | return args 24 | 25 | 26 | def printv(v, prefix=''): 27 | if type(v) == dict: 28 | if 'name' in v: 29 | print(prefix + '#' + v['name']) 30 | del v['name'] 31 | prefix += '...' 32 | for nk, nv in v.items(): 33 | if type(nv) in [dict, list]: 34 | print(prefix + nk + ':') 35 | printv(nv, prefix) 36 | else: 37 | print(prefix + nk + ':' + str(nv)) 38 | elif type(v) == list: 39 | prefix += '...' 40 | for i, nv in enumerate(v): 41 | print(prefix + '#' + str(i)) 42 | printv(nv, prefix) 43 | else: 44 | prefix += '...' 45 | print(prefix + str(v)) 46 | 47 | 48 | class Progbar(object): 49 | def __init__(self, target, width=30, verbose=1): 50 | ''' 51 | @param target: total number of steps expected 52 | ''' 53 | self.width = width 54 | self.target = target 55 | self.sum_values = {} 56 | self.unique_values = [] 57 | self.start = time.time() 58 | self.total_width = 0 59 | self.seen_so_far = 0 60 | self.verbose = verbose 61 | 62 | def update(self, current, values=[]): 63 | ''' 64 | @param current: index of current step 65 | @param values: list of tuples (name, value_for_last_step). 66 | The progress bar will display averages for these values. 67 | ''' 68 | for k, v in values: 69 | if k not in self.sum_values: 70 | self.sum_values[k] = [v * (current - self.seen_so_far), current - self.seen_so_far] 71 | self.unique_values.append(k) 72 | else: 73 | self.sum_values[k][0] += v * (current - self.seen_so_far) 74 | self.sum_values[k][1] += (current - self.seen_so_far) 75 | self.seen_so_far = current 76 | 77 | now = time.time() 78 | if self.verbose == 1: 79 | prev_total_width = self.total_width 80 | sys.stdout.write("\b" * prev_total_width) 81 | sys.stdout.write("\r") 82 | 83 | numdigits = int(np.floor(np.log10(self.target))) + 1 84 | barstr = '%%%dd/%%%dd [' % (numdigits, numdigits) 85 | bar = barstr % (current, self.target) 86 | prog = float(current)/self.target 87 | prog_width = int(self.width*prog) 88 | if prog_width > 0: 89 | bar += ('='*(prog_width-1)) 90 | if current < self.target: 91 | bar += '>' 92 | else: 93 | bar += '=' 94 | bar += ('.'*(self.width-prog_width)) 95 | bar += ']' 96 | sys.stdout.write(bar) 97 | self.total_width = len(bar) 98 | 99 | if current: 100 | time_per_unit = (now - self.start) / current 101 | else: 102 | time_per_unit = 0 103 | eta = time_per_unit*(self.target - current) 104 | info = '' 105 | if current < self.target: 106 | info += ' - ETA: %ds' % eta 107 | else: 108 | info += ' - %ds' % (now - self.start) 109 | for k in self.unique_values: 110 | info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1])) 111 | 112 | self.total_width += len(info) 113 | if prev_total_width > self.total_width: 114 | info += ((prev_total_width-self.total_width) * " ") 115 | 116 | sys.stdout.write(info) 117 | sys.stdout.flush() 118 | 119 | if current >= self.target: 120 | sys.stdout.write("\n") 121 | 122 | if self.verbose == 2: 123 | if current >= self.target: 124 | info = '%ds' % (now - self.start) 125 | for k in self.unique_values: 126 | info += ' - %s: %.4f' % (k, self.sum_values[k][0] / max(1, self.sum_values[k][1])) 127 | sys.stdout.write(info + "\n") 128 | 129 | def add(self, n, values=[]): 130 | self.update(self.seen_so_far+n, values) 131 | -------------------------------------------------------------------------------- /examples/cifar10_cnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from keras.datasets import cifar10 4 | from keras.preprocessing.image import ImageDataGenerator 5 | from keras.models import Sequential 6 | from keras.layers.core import Dense, Dropout, Activation, Flatten 7 | from keras.layers.convolutional import Convolution2D, MaxPooling2D 8 | from keras.optimizers import SGD, Adadelta, Adagrad 9 | from keras.utils import np_utils, generic_utils 10 | from six.moves import range 11 | 12 | ''' 13 | Train a (fairly simple) deep CNN on the CIFAR10 small images dataset. 14 | 15 | GPU run command: 16 | THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python cifar10_cnn.py 17 | 18 | It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs. 19 | (it's still underfitting at that point, though). 20 | 21 | Note: the data was pickled with Python 2, and some encoding issues might prevent you 22 | from loading it in Python 3. You might have to load it in Python 2, 23 | save it in a different format, load it in Python 3 and repickle it. 24 | ''' 25 | 26 | batch_size = 32 27 | nb_classes = 10 28 | nb_epoch = 200 29 | data_augmentation = True 30 | 31 | # the data, shuffled and split between tran and test sets 32 | (X_train, y_train), (X_test, y_test) = cifar10.load_data() 33 | print('X_train shape:', X_train.shape) 34 | print(X_train.shape[0], 'train samples') 35 | print(X_test.shape[0], 'test samples') 36 | 37 | # convert class vectors to binary class matrices 38 | Y_train = np_utils.to_categorical(y_train, nb_classes) 39 | Y_test = np_utils.to_categorical(y_test, nb_classes) 40 | 41 | model = Sequential() 42 | 43 | model.add(Convolution2D(32, 3, 3, 3, border_mode='full')) 44 | model.add(Activation('relu')) 45 | model.add(Convolution2D(32, 32, 3, 3)) 46 | model.add(Activation('relu')) 47 | model.add(MaxPooling2D(poolsize=(2, 2))) 48 | model.add(Dropout(0.25)) 49 | 50 | model.add(Convolution2D(64, 32, 3, 3, border_mode='full')) 51 | model.add(Activation('relu')) 52 | model.add(Convolution2D(64, 64, 3, 3)) 53 | model.add(Activation('relu')) 54 | model.add(MaxPooling2D(poolsize=(2, 2))) 55 | model.add(Dropout(0.25)) 56 | 57 | model.add(Flatten()) 58 | model.add(Dense(64*8*8, 512)) 59 | model.add(Activation('relu')) 60 | model.add(Dropout(0.5)) 61 | 62 | model.add(Dense(512, nb_classes)) 63 | model.add(Activation('softmax')) 64 | 65 | # let's train the model using SGD + momentum (how original). 66 | sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) 67 | model.compile(loss='categorical_crossentropy', optimizer=sgd) 68 | 69 | if not data_augmentation: 70 | print("Not using data augmentation or normalization") 71 | 72 | X_train = X_train.astype("float32") 73 | X_test = X_test.astype("float32") 74 | X_train /= 255 75 | X_test /= 255 76 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch) 77 | score = model.evaluate(X_test, Y_test, batch_size=batch_size) 78 | print('Test score:', score) 79 | 80 | else: 81 | print("Using real time data augmentation") 82 | 83 | # this will do preprocessing and realtime data augmentation 84 | datagen = ImageDataGenerator( 85 | featurewise_center=True, # set input mean to 0 over the dataset 86 | samplewise_center=False, # set each sample mean to 0 87 | featurewise_std_normalization=True, # divide inputs by std of the dataset 88 | samplewise_std_normalization=False, # divide each input by its std 89 | zca_whitening=False, # apply ZCA whitening 90 | rotation_range=20, # randomly rotate images in the range (degrees, 0 to 180) 91 | width_shift_range=0.2, # randomly shift images horizontally (fraction of total width) 92 | height_shift_range=0.2, # randomly shift images vertically (fraction of total height) 93 | horizontal_flip=True, # randomly flip images 94 | vertical_flip=False) # randomly flip images 95 | 96 | # compute quantities required for featurewise normalization 97 | # (std, mean, and principal components if ZCA whitening is applied) 98 | datagen.fit(X_train) 99 | 100 | for e in range(nb_epoch): 101 | print('-'*40) 102 | print('Epoch', e) 103 | print('-'*40) 104 | print("Training...") 105 | # batch train with realtime data augmentation 106 | progbar = generic_utils.Progbar(X_train.shape[0]) 107 | for X_batch, Y_batch in datagen.flow(X_train, Y_train): 108 | loss = model.train_on_batch(X_batch, Y_batch) 109 | progbar.add(X_batch.shape[0], values=[("train loss", loss)]) 110 | 111 | print("Testing...") 112 | # test time! 113 | progbar = generic_utils.Progbar(X_test.shape[0]) 114 | for X_batch, Y_batch in datagen.flow(X_test, Y_test): 115 | score = model.test_on_batch(X_batch, Y_batch) 116 | progbar.add(X_batch.shape[0], values=[("test loss", score)]) 117 | -------------------------------------------------------------------------------- /keras/preprocessing/sequence.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | import random 5 | from six.moves import range 6 | 7 | def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.): 8 | """ 9 | Pad each sequence to the same length: 10 | the length of the longuest sequence. 11 | 12 | If maxlen is provided, any sequence longer 13 | than maxlen is truncated to maxlen. Truncation happens off either the beginning (default) or 14 | the end of the sequence. 15 | 16 | Supports post-padding and pre-padding (default). 17 | 18 | """ 19 | lengths = [len(s) for s in sequences] 20 | 21 | nb_samples = len(sequences) 22 | if maxlen is None: 23 | maxlen = np.max(lengths) 24 | 25 | x = (np.ones((nb_samples, maxlen)) * value).astype(dtype) 26 | for idx, s in enumerate(sequences): 27 | if truncating == 'pre': 28 | trunc = s[-maxlen:] 29 | elif truncating == 'post': 30 | trunc = s[:maxlen] 31 | else: 32 | raise ValueError("Truncating type '%s' not understood" % padding) 33 | 34 | if padding == 'post': 35 | x[idx, :len(trunc)] = trunc 36 | elif padding == 'pre': 37 | x[idx, -len(trunc):] = trunc 38 | else: 39 | raise ValueError("Padding type '%s' not understood" % padding) 40 | return x 41 | 42 | 43 | def make_sampling_table(size, sampling_factor=1e-5): 44 | ''' 45 | This generates an array where the ith element 46 | is the probability that a word of rank i would be sampled, 47 | according to the sampling distribution used in word2vec. 48 | 49 | The word2vec formula is: 50 | p(word) = min(1, sqrt(word.frequency/sampling_factor) / (word.frequency/sampling_factor)) 51 | 52 | We assume that the word frequencies follow Zipf's law (s=1) to derive 53 | a numerical approximation of frequency(rank): 54 | frequency(rank) ~ 1/(rank * (log(rank) + gamma) + 1/2 - 1/(12*rank)) 55 | where gamma is the Euler-Mascheroni constant. 56 | ''' 57 | gamma = 0.577 58 | rank = np.array(list(range(size))) 59 | rank[0] = 1 60 | inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1./(12.*rank) 61 | f = sampling_factor * inv_fq 62 | return np.minimum(1., f / np.sqrt(f)) 63 | 64 | 65 | def skipgrams(sequence, vocabulary_size, 66 | window_size=4, negative_samples=1., shuffle=True, 67 | categorical=False, sampling_table=None): 68 | ''' 69 | Take a sequence (list of indexes of words), 70 | returns couples of [word_index, other_word index] and labels (1s or 0s), 71 | where label = 1 if 'other_word' belongs to the context of 'word', 72 | and label=0 if 'other_word' is ramdomly sampled 73 | 74 | @param vocabulary_size: int. maximum possible word index + 1 75 | @param window_size: int. actually half-window. The window of a word wi will be [i-window_size, i+window_size+1] 76 | @param negative_samples: float >= 0. 0 for no negative (=random) samples. 1 for same number as positive samples. etc. 77 | @param categorical: bool. if False, labels will be integers (eg. [0, 1, 1 .. ]), 78 | if True labels will be categorical eg. [[1,0],[0,1],[0,1] .. ] 79 | 80 | Note: by convention, index 0 in the vocabulary is a non-word and will be skipped. 81 | ''' 82 | couples = [] 83 | labels = [] 84 | for i, wi in enumerate(sequence): 85 | if not wi: 86 | continue 87 | if sampling_table is not None: 88 | if sampling_table[wi] < random.random(): 89 | continue 90 | 91 | window_start = max(0, i-window_size) 92 | window_end = min(len(sequence), i+window_size+1) 93 | for j in range(window_start, window_end): 94 | if j != i: 95 | wj = sequence[j] 96 | if not wj: 97 | continue 98 | couples.append([wi, wj]) 99 | if categorical: 100 | labels.append([0,1]) 101 | else: 102 | labels.append(1) 103 | 104 | if negative_samples > 0: 105 | nb_negative_samples = int(len(labels) * negative_samples) 106 | words = [c[0] for c in couples] 107 | random.shuffle(words) 108 | 109 | couples += [[words[i%len(words)], random.randint(1, vocabulary_size-1)] for i in range(nb_negative_samples)] 110 | if categorical: 111 | labels += [[1,0]]*nb_negative_samples 112 | else: 113 | labels += [0]*nb_negative_samples 114 | 115 | if shuffle: 116 | seed = random.randint(0,10e6) 117 | random.seed(seed) 118 | random.shuffle(couples) 119 | random.seed(seed) 120 | random.shuffle(labels) 121 | 122 | return couples, labels 123 | -------------------------------------------------------------------------------- /docs/sources/datasets.md: -------------------------------------------------------------------------------- 1 | # Datasets 2 | 3 | ## CIFAR10 small image classification 4 | 5 | `keras.datasets.cifar10` 6 | 7 | Dataset of 50,000 32x32 color training images, labeled over 10 categories, and 10,000 test images. 8 | 9 | ### Usage: 10 | 11 | ```python 12 | (X_train, y_train), (X_test, y_test) = cifar10.load_data() 13 | ``` 14 | 15 | - __Return:__ 16 | - 2 tuples: 17 | - __X_train, X_test__: uint8 array of RGB image data with shape (nb_samples, 3, 32, 32). 18 | - __y_train, y_test__: uint8 array of category labels (integers in range 0-9) with shape (nb_samples,). 19 | 20 | --- 21 | 22 | ## CIFAR100 small image classification 23 | 24 | `keras.datasets.cifar100` 25 | 26 | Dataset of 50,000 32x32 color training images, labeled over 100 categories, and 10,000 test images. 27 | 28 | ### Usage: 29 | 30 | ```python 31 | (X_train, y_train), (X_test, y_test) = cifar100.load_data(label_mode='fine') 32 | ``` 33 | 34 | - __Return:__ 35 | - 2 tuples: 36 | - __X_train, X_test__: uint8 array of RGB image data with shape (nb_samples, 3, 32, 32). 37 | - __y_train, y_test__: uint8 array of category labels with shape (nb_samples,). 38 | 39 | - __Arguments:__ 40 | 41 | - __label_mode__: "fine" or "coarse". 42 | 43 | --- 44 | 45 | ## IMDB Movie reviews sentiment classification 46 | 47 | `keras.datasets.imdb` 48 | 49 | Dataset of 25,000 movies reviews from IMDB, labeled by sentiment (positive/negative). Reviews have been preprocessed, and each review is encoded as a [sequence](preprocessing/sequence.md) of word indexes (integers). For convenience, words are indexed by overall frequency in the dataset, so that for instance the integer "3" encodes the 3rd most frequent word in the data. This allows for quick filtering operations such as: "only consider the top 10,000 most common words, but eliminate the top 20 most common words". 50 | 51 | As a convention, "0" does not stand for a specific word, but instead is used to encode any unknown word. 52 | 53 | ### Usage: 54 | 55 | ```python 56 | (X_train, y_train), (X_test, y_test) = imdb.load_data(path="imdb.pkl", \ 57 | nb_words=None, skip_top=0, maxlen=None, test_split=0.1, seed=113) 58 | ``` 59 | - __Return:__ 60 | - 2 tuples: 61 | - __X_train, X_test__: list of sequences, which are lists of indexes (integers). If the nb_words argument was specific, the maximum possible index value is nb_words-1. If the maxlen argument was specified, the largest possible sequence length is maxlen. 62 | - __y_train, y_test__: list of integer labels (1 or 0). 63 | 64 | - __Arguments:__ 65 | 66 | - __path__: if you do have the data locally (at `'~/.keras/datasets/' + path`), if will be downloaded to this location (in cPickle format). 67 | - __nb_words__: integer or None. Top most frequent words to consider. Any less frequent word will appear as 0 in the sequence data. 68 | - __skip_top__: integer. Top most frequent words to ignore (they will appear as 0s in the sequence data). 69 | - __maxlen__: int. Maximum sequence length. Any longer sequence will be truncated. 70 | - __test_split__: float. Fraction of the dataset to be used as test data. 71 | - __seed__: int. Seed for reproducible data shuffling. 72 | 73 | --- 74 | 75 | ## Reuters newswire topics classification 76 | 77 | `keras.datasets.reuters` 78 | 79 | Dataset of 11,228 newswires from Reuters, labeled over 46 topics. As with the IMDB dataset, each wire is encoded as a sequence of word indexes (same conventions). 80 | 81 | ### Usage: 82 | 83 | ```python 84 | (X_train, y_train), (X_test, y_test) = reuters.load_data(path="reuters.pkl", \ 85 | nb_words=None, skip_top=0, maxlen=None, test_split=0.1, seed=113) 86 | ``` 87 | 88 | The specifications are the same as that of the IMDB dataset. 89 | 90 | This dataset also makes available the word index used for encoding the sequences: 91 | 92 | ```python 93 | word_index = reuters.get_word_index(path="reuters_word_index.pkl") 94 | ``` 95 | 96 | - __Return:__ A dictionary where key are words (str) and values are indexes (integer). eg. `word_index["giraffe"]` might return `1234`. 97 | 98 | - __Arguments:__ 99 | 100 | - __path__: if you do have the index file locally (at `'~/.keras/datasets/' + path`), if will be downloaded to this location (in cPickle format). 101 | 102 | ## MNIST database of handwritten digits 103 | 104 | `keras.datasets.mnist` 105 | 106 | Dataset of 60,000 28x28 grayscale images of the 10 digits, along with a test set of 10,000 images. 107 | 108 | ### Usage: 109 | 110 | ```python 111 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 112 | ``` 113 | 114 | - __Return:__ 115 | - 2 tuples: 116 | - __X_train, X_test__: uint8 array of grayscale image data with shape (nb_samples, 28, 28). 117 | - __y_train, y_test__: uint8 array of digit labels (integers in range 0-9) with shape (nb_samples,). 118 | 119 | - __Arguments:__ 120 | 121 | - __path__: if you do have the index file locally (at `'~/.keras/datasets/' + path`), if will be downloaded to this location (in cPickle format). 122 | -------------------------------------------------------------------------------- /keras/datasets/reuters.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import print_function 4 | from .data_utils import get_file 5 | import string 6 | import random 7 | import os 8 | import six.moves.cPickle 9 | from six.moves import zip 10 | import numpy as np 11 | 12 | 13 | def make_reuters_dataset(path=os.path.join('datasets', 'temp', 'reuters21578'), min_samples_per_topic=15): 14 | import re 15 | from ..preprocessing.text import Tokenizer 16 | 17 | wire_topics = [] 18 | topic_counts = {} 19 | wire_bodies = [] 20 | 21 | for fname in os.listdir(path): 22 | if 'sgm' in fname: 23 | s = open(os.path.join(path, fname)).read() 24 | tag = '' 25 | while tag in s: 26 | s = s[s.find(tag)+len(tag):] 27 | topics = s[:s.find('' not in topics: 29 | topic = topics.replace('', '').replace('', '') 30 | wire_topics.append(topic) 31 | topic_counts[topic] = topic_counts.get(topic, 0) + 1 32 | else: 33 | continue 34 | 35 | bodytag = '' 36 | body = s[s.find(bodytag)+len(bodytag):] 37 | body = body[:body.find('= min_samples_per_topic: 47 | kept_topics.add(x[0]) 48 | print('-') 49 | print('Kept topics:', len(kept_topics)) 50 | 51 | # filter wires with rare topics 52 | kept_wires = [] 53 | labels = [] 54 | topic_indexes = {} 55 | for t, b in zip(wire_topics, wire_bodies): 56 | if t in kept_topics: 57 | if t not in topic_indexes: 58 | topic_index = len(topic_indexes) 59 | topic_indexes[t] = topic_index 60 | else: 61 | topic_index = topic_indexes[t] 62 | 63 | labels.append(topic_index) 64 | kept_wires.append(b) 65 | 66 | # vectorize wires 67 | tokenizer = Tokenizer() 68 | tokenizer.fit_on_texts(kept_wires) 69 | X = tokenizer.texts_to_sequences(kept_wires) 70 | 71 | print('Sanity check:') 72 | for w in ["banana", "oil", "chocolate", "the", "dsft"]: 73 | print('...index of', w, ':', tokenizer.word_index.get(w)) 74 | print('text reconstruction:') 75 | reverse_word_index = dict([(v, k) for k, v in tokenizer.word_index.items()]) 76 | print(' '.join(reverse_word_index[i] for i in X[10])) 77 | 78 | dataset = (X, labels) 79 | print('-') 80 | print('Saving...') 81 | six.moves.cPickle.dump(dataset, open(os.path.join('datasets', 'data', 'reuters.pkl'), 'w')) 82 | six.moves.cPickle.dump(tokenizer.word_index, open(os.path.join('datasets', 'data', 'reuters_word_index.pkl'), 'w')) 83 | 84 | 85 | def load_data(path="reuters.pkl", nb_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113, 86 | start_char=1, oov_char=2, index_from=3): 87 | 88 | path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/reuters.pkl") 89 | f = open(path, 'rb') 90 | 91 | X, labels = six.moves.cPickle.load(f) 92 | f.close() 93 | 94 | np.random.seed(seed) 95 | np.random.shuffle(X) 96 | np.random.seed(seed) 97 | np.random.shuffle(labels) 98 | 99 | if start_char is not None: 100 | X = [[start_char] + [w + index_from for w in x] for x in X] 101 | elif index_from: 102 | X = [[w + index_from for w in x] for x in X] 103 | 104 | if maxlen: 105 | new_X = [] 106 | new_labels = [] 107 | for x, y in zip(X, labels): 108 | if len(x) < maxlen: 109 | new_X.append(x) 110 | new_labels.append(y) 111 | X = new_X 112 | labels = new_labels 113 | 114 | if not nb_words: 115 | nb_words = max([max(x) for x in X]) 116 | 117 | # by convention, use 2 as OOV word 118 | # reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV) 119 | if oov_char is not None: 120 | X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X] 121 | else: 122 | nX = [] 123 | for x in X: 124 | nx = [] 125 | for w in x: 126 | if (w >= nb_words or w < skip_top): 127 | nx.append(w) 128 | nX.append(nx) 129 | X = nX 130 | 131 | X_train = X[:int(len(X)*(1-test_split))] 132 | y_train = labels[:int(len(X)*(1-test_split))] 133 | 134 | X_test = X[int(len(X)*(1-test_split)):] 135 | y_test = labels[int(len(X)*(1-test_split)):] 136 | 137 | return (X_train, y_train), (X_test, y_test) 138 | 139 | 140 | def get_word_index(path="reuters_word_index.pkl"): 141 | path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/reuters_word_index.pkl") 142 | f = open(path, 'rb') 143 | return six.moves.cPickle.load(f) 144 | 145 | 146 | if __name__ == "__main__": 147 | make_reuters_dataset() 148 | (X_train, y_train), (X_test, y_test) = load_data() 149 | -------------------------------------------------------------------------------- /keras/utils/layer_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import inspect 3 | import numpy as np 4 | import theano 5 | import copy 6 | 7 | from ..layers.advanced_activations import LeakyReLU, PReLU 8 | from ..layers.core import Dense, Merge, Dropout, Activation, Reshape, Flatten, RepeatVector, Layer, AutoEncoder 9 | from ..layers.core import ActivityRegularization, TimeDistributedDense, AutoEncoder, MaxoutDense 10 | from ..layers.convolutional import Convolution1D, Convolution2D, MaxPooling1D, MaxPooling2D, ZeroPadding2D 11 | from ..layers.embeddings import Embedding, WordContextProduct 12 | from ..layers.noise import GaussianNoise, GaussianDropout 13 | from ..layers.normalization import BatchNormalization 14 | from ..layers.recurrent import SimpleRNN, SimpleDeepRNN, GRU, LSTM, JZS1, JZS2, JZS3 15 | from ..layers import containers 16 | from .. import regularizers 17 | from .. import constraints 18 | 19 | 20 | def container_from_config(original_layer_dict): 21 | layer_dict = copy.deepcopy(original_layer_dict) 22 | name = layer_dict.get('name') 23 | 24 | if name == 'Merge': 25 | mode = layer_dict.get('mode') 26 | layers = layer_dict.get('layers') 27 | layer_list = [] 28 | for layer in layers: 29 | init_layer = container_from_config(layer) 30 | layer_list.append(init_layer) 31 | merge_layer = Merge(layer_list, mode) 32 | return merge_layer 33 | 34 | elif name == 'Sequential': 35 | layers = layer_dict.get('layers') 36 | layer_list = [] 37 | for layer in layers: 38 | init_layer = container_from_config(layer) 39 | layer_list.append(init_layer) 40 | seq_layer = containers.Sequential(layer_list) 41 | return seq_layer 42 | 43 | elif name == 'Graph': 44 | graph_layer = containers.Graph() 45 | inputs = layer_dict.get('input_config') 46 | 47 | for input in inputs: 48 | graph_layer.add_input(**input) 49 | 50 | nodes = layer_dict.get('node_config') 51 | for node in nodes: 52 | layer = container_from_config(layer_dict['nodes'].get(node['name'])) 53 | node['layer'] = layer 54 | graph_layer.add_node(**node) 55 | 56 | outputs = layer_dict.get('output_config') 57 | for output in outputs: 58 | graph_layer.add_output(**output) 59 | return graph_layer 60 | 61 | elif name == 'AutoEncoder': 62 | kwargs = {'encoder': container_from_config(layer_dict.get('encoder_config')), 63 | 'decoder': container_from_config(layer_dict.get('decoder_config'))} 64 | for kwarg in ['output_reconstruction', 'weights']: 65 | if kwarg in layer_dict: 66 | kwargs[kwarg] = layer_dict[kwarg] 67 | return AutoEncoder(**kwargs) 68 | 69 | else: 70 | layer_dict.pop('name') 71 | 72 | for k, v in layer_dict.items(): 73 | # For now, this can only happen for regularizers and constraints 74 | if isinstance(v, dict): 75 | vname = v.get('name') 76 | v.pop('name') 77 | if vname in [x for x, y in inspect.getmembers(constraints, predicate=inspect.isclass)]: 78 | layer_dict[k] = constraints.get(vname, v) 79 | if vname in [x for x, y in inspect.getmembers(regularizers, predicate=inspect.isclass)]: 80 | layer_dict[k] = regularizers.get(vname, v) 81 | 82 | base_layer = get_layer(name, layer_dict) 83 | return base_layer 84 | 85 | 86 | def print_layer_shapes(model, input_shapes): 87 | """ 88 | Utility function to print the shape of the output at each layer of a Model 89 | 90 | Arguments: 91 | model: instance of Model / Merge 92 | input_shapes: dict (Graph), list of tuples (Merge) or tuple (Sequential) 93 | """ 94 | if model.__class__.__name__ in ['Sequential', 'Merge']: 95 | # in this case input_shapes is a tuple, or a list [shape1, shape2] 96 | if not isinstance(input_shapes[0], tuple): 97 | input_shapes = [input_shapes] 98 | 99 | inputs = model.get_input(train=False) 100 | if not isinstance(inputs, list): 101 | inputs = [inputs] 102 | input_dummy = [np.zeros(shape, dtype=np.float32) 103 | for shape in input_shapes] 104 | layers = model.layers 105 | 106 | elif model.__class__.__name__ == 'Graph': 107 | # in this case input_shapes is a dictionary 108 | inputs = [model.inputs[name].input 109 | for name in model.input_order] 110 | input_dummy = [np.zeros(input_shapes[name], dtype=np.float32) 111 | for name in model.input_order] 112 | layers = [model.nodes[c['name']] for c in model.node_config] 113 | 114 | print("input shapes : ", input_shapes) 115 | for l in layers: 116 | shape_f = theano.function(inputs, l.get_output(train=False).shape, 117 | on_unused_input='ignore') 118 | out_shape = tuple(shape_f(*input_dummy)) 119 | config = l.get_config() 120 | print('shape after %s: %s' % (config['name'], out_shape)) 121 | 122 | 123 | from .generic_utils import get_from_module 124 | def get_layer(identifier, kwargs=None): 125 | return get_from_module(identifier, globals(), 'layer', instantiate=True, kwargs=kwargs) 126 | -------------------------------------------------------------------------------- /docs/sources/examples.md: -------------------------------------------------------------------------------- 1 | 2 | Here are a few examples to get you started! 3 | 4 | ### Multilayer Perceptron (MLP) 5 | 6 | ```python 7 | from keras.models import Sequential 8 | from keras.layers.core import Dense, Dropout, Activation 9 | from keras.optimizers import SGD 10 | 11 | model = Sequential() 12 | model.add(Dense(20, 64, init='uniform')) 13 | model.add(Activation('tanh')) 14 | model.add(Dropout(0.5)) 15 | model.add(Dense(64, 64, init='uniform')) 16 | model.add(Activation('tanh')) 17 | model.add(Dropout(0.5)) 18 | model.add(Dense(64, 2, init='uniform')) 19 | model.add(Activation('softmax')) 20 | 21 | sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) 22 | model.compile(loss='mean_squared_error', optimizer=sgd) 23 | 24 | model.fit(X_train, y_train, nb_epoch=20, batch_size=16) 25 | score = model.evaluate(X_test, y_test, batch_size=16) 26 | ``` 27 | 28 | --- 29 | 30 | ### Alternative implementation of MLP 31 | 32 | ```python 33 | model = Sequential() 34 | model.add(Dense(20, 64, init='uniform', activation='tanh')) 35 | model.add(Dropout(0.5)) 36 | model.add(Dense(64, 64, init='uniform', activation='tanh')) 37 | model.add(Dropout(0.5)) 38 | model.add(Dense(64, 2, init='uniform', activation='softmax')) 39 | 40 | sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) 41 | model.compile(loss='mean_squared_error', optimizer=sgd) 42 | ``` 43 | 44 | --- 45 | 46 | ### VGG-like convnet 47 | 48 | ```python 49 | from keras.models import Sequential 50 | from keras.layers.core import Dense, Dropout, Activation, Flatten 51 | from keras.layers.convolutional import Convolution2D, MaxPooling2D 52 | from keras.optimizers import SGD 53 | 54 | model = Sequential() 55 | model.add(Convolution2D(32, 3, 3, 3, border_mode='full')) 56 | model.add(Activation('relu')) 57 | model.add(Convolution2D(32, 32, 3, 3)) 58 | model.add(Activation('relu')) 59 | model.add(MaxPooling2D(poolsize=(2, 2))) 60 | model.add(Dropout(0.25)) 61 | 62 | model.add(Convolution2D(64, 32, 3, 3, border_mode='full')) 63 | model.add(Activation('relu')) 64 | model.add(Convolution2D(64, 64, 3, 3)) 65 | model.add(Activation('relu')) 66 | model.add(MaxPooling2D(poolsize=(2, 2))) 67 | model.add(Dropout(0.25)) 68 | 69 | model.add(Flatten()) 70 | model.add(Dense(64*8*8, 256)) 71 | model.add(Activation('relu')) 72 | model.add(Dropout(0.5)) 73 | 74 | model.add(Dense(256, 10)) 75 | model.add(Activation('softmax')) 76 | 77 | sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) 78 | model.compile(loss='categorical_crossentropy', optimizer=sgd) 79 | 80 | model.fit(X_train, Y_train, batch_size=32, nb_epoch=1) 81 | 82 | ``` 83 | 84 | --- 85 | 86 | ### Sequence classification with LSTM 87 | 88 | ```python 89 | from keras.models import Sequential 90 | from keras.layers.core import Dense, Dropout, Activation 91 | from keras.layers.embeddings import Embedding 92 | from keras.layers.recurrent import LSTM 93 | 94 | model = Sequential() 95 | # Add a mask_zero=True to the Embedding connstructor if 0 is a left-padding value in your data 96 | model.add(Embedding(max_features, 256)) 97 | model.add(LSTM(256, 128, activation='sigmoid', inner_activation='hard_sigmoid')) 98 | model.add(Dropout(0.5)) 99 | model.add(Dense(128, 1)) 100 | model.add(Activation('sigmoid')) 101 | 102 | model.compile(loss='binary_crossentropy', optimizer='rmsprop') 103 | 104 | model.fit(X_train, Y_train, batch_size=16, nb_epoch=10) 105 | score = model.evaluate(X_test, Y_test, batch_size=16) 106 | ``` 107 | 108 | --- 109 | 110 | ### Image captioning 111 | 112 | Architecture for learning image captions with a convnet and a Gated Recurrent Unit (word-level embedding, caption of maximum length 16 words). 113 | 114 | Note that getting this to actually "work" will require using a bigger convnet, initialized with pre-trained weights. 115 | Displaying readable results will also require an embedding decoder. 116 | 117 | ```python 118 | max_caption_len = 16 119 | 120 | model = Sequential() 121 | model.add(Convolution2D(32, 3, 3, 3, border_mode='full')) 122 | model.add(Activation('relu')) 123 | model.add(Convolution2D(32, 32, 3, 3)) 124 | model.add(Activation('relu')) 125 | model.add(MaxPooling2D(poolsize=(2, 2))) 126 | 127 | model.add(Convolution2D(64, 32, 3, 3, border_mode='full')) 128 | model.add(Activation('relu')) 129 | model.add(Convolution2D(64, 64, 3, 3)) 130 | model.add(Activation('relu')) 131 | model.add(MaxPooling2D(poolsize=(2, 2))) 132 | 133 | model.add(Convolution2D(128, 64, 3, 3, border_mode='full')) 134 | model.add(Activation('relu')) 135 | model.add(Convolution2D(128, 128, 3, 3)) 136 | model.add(Activation('relu')) 137 | model.add(MaxPooling2D(poolsize=(2, 2))) 138 | 139 | model.add(Flatten()) 140 | model.add(Dense(128*4*4, 256)) 141 | model.add(Activation('relu')) 142 | model.add(Dropout(0.5)) 143 | 144 | model.add(RepeatVector(max_caption_len)) 145 | # the GRU below returns sequences of max_caption_len vectors of size 256 (our word embedding size) 146 | model.add(GRU(256, 256, return_sequences=True)) 147 | 148 | model.compile(loss='mean_squared_error', optimizer='rmsprop') 149 | 150 | # "images" is a numpy array of shape (nb_samples, nb_channels=3, width, height) 151 | # "captions" is a numpy array of shape (nb_samples, max_caption_len=16, embedding_dim=256) 152 | # captions are supposed already embedded (dense vectors). 153 | model.fit(images, captions, batch_size=16, nb_epoch=100) 154 | 155 | ``` 156 | 157 | --- 158 | 159 | In the [examples folder](https://github.com/fchollet/keras/tree/master/examples), you will find example models for real datasets: 160 | 161 | - CIFAR10 small images classification: Convnet with realtime data augmentation 162 | - IMDB movie review sentiment classification: LSTM over sequences of words 163 | - Reuters newswires topic classification: Multilayer Perceptron 164 | -------------------------------------------------------------------------------- /tests/manual/check_masked_recurrent.py: -------------------------------------------------------------------------------- 1 | # Dummy test data as input to RNN. This input is 3 timesteps long where the third timestep always matches the 2 | # first. Without masking it should be able to learn it, with masking it should fail. 3 | 4 | import numpy as np 5 | from keras.utils.theano_utils import sharedX 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Activation, Merge, Dropout, TimeDistributedDense 8 | from keras.layers.embeddings import Embedding 9 | from keras.layers.recurrent import SimpleRNN, SimpleDeepRNN, LSTM, GRU 10 | import theano 11 | 12 | theano.config.exception_verbosity = 'high' 13 | 14 | # (nb_samples, timesteps, dimensions) 15 | X = np.random.random_integers(1, 4, size=(500000, 15)) 16 | 17 | print("About to compile the first model") 18 | model = Sequential() 19 | model.add(Embedding(5, 4, mask_zero=True)) 20 | model.add(TimeDistributedDense(4, 4)) # obviously this is redundant. Just testing. 21 | model.add(SimpleRNN(4, 4, activation='relu', return_sequences=True)) 22 | model.add(Dropout(0.5)) 23 | model.add(SimpleDeepRNN(4, 4, depth=2, activation='relu')) 24 | model.add(Dropout(0.5)) 25 | model.add(Dense(4, 4, activation='softmax')) 26 | model.compile(loss='categorical_crossentropy', 27 | optimizer='rmsprop', theano_mode=theano.compile.mode.FAST_RUN) 28 | print("Compiled model") 29 | 30 | W = model.get_weights() # We'll save these so we can reset it later 31 | 32 | X[:, : 10] = 0 33 | Xmask0 = X.copy() 34 | Xmask0[:, 10] = 0 35 | 36 | Xmask12 = X.copy() 37 | Xmask12[:, 11] = 0 38 | Xmask12[:, 12] = 0 39 | 40 | X0_onehot = np.zeros((X.shape[0], 4)) 41 | X1_onehot = np.zeros((X.shape[0], 4)) 42 | for i, row in enumerate(X): 43 | X0_onehot[i, row[10] - 1] = 1 44 | X1_onehot[i, row[11] - 1] = 1 45 | 46 | # Uniform score: 4 options = ln(4) nats (2 bits) 47 | # we should not do better than this when we mask out the part of the input 48 | # that gives us the correct answer 49 | uniform_score = np.log(4) 50 | batch_size=512 51 | 52 | # Train it to guess 0th dim 53 | model.fit(X, X0_onehot, nb_epoch=1, batch_size=batch_size) 54 | score = model.evaluate(X, X0_onehot, batch_size=batch_size) 55 | if score > uniform_score * 0.9: 56 | raise Exception('Failed to learn to copy timestep 0, score %f' % score) 57 | 58 | 59 | model.set_weights(W) 60 | 61 | # Train without showing it the 0th dim to learn 1st dim 62 | model.fit(X[: , 1:], X1_onehot, nb_epoch=1, batch_size=batch_size) 63 | score = model.evaluate(X[:, 1:], X1_onehot, batch_size=batch_size) 64 | if score > uniform_score * 0.9: 65 | raise Exception('Failed to learn to copy timestep 1, score %f' % score) 66 | 67 | model.set_weights(W) 68 | 69 | # Train to guess 0th dim when 0th dim has been masked (should fail) 70 | model.fit(Xmask0, X0_onehot, nb_epoch=1, batch_size=batch_size) 71 | score = model.evaluate(Xmask0, X0_onehot, batch_size=batch_size) 72 | if score < uniform_score * 0.9: 73 | raise Exception('Somehow learned to copy timestep 0 despite mask, score %f' % score) 74 | 75 | model.set_weights(W) 76 | 77 | # Train to guess 1st dim when 0th dim has been masked (should succeed) 78 | model.fit(Xmask0, X1_onehot, nb_epoch=1, batch_size=batch_size) 79 | score = model.evaluate(Xmask0, X1_onehot, batch_size=batch_size) 80 | if score > uniform_score * 0.9: 81 | raise Exception('Failed to learn to copy timestep 1 in masked model, score %f' % score) 82 | 83 | model.set_weights(W) 84 | 85 | # Finally, make sure the mask is actually blocking input, mask out timesteps 1 and 2, and see if 86 | # it can learn timestep 0 (should fail) 87 | model.fit(Xmask12, X0_onehot, nb_epoch=1, batch_size=batch_size) 88 | 89 | score = model.evaluate(Xmask12, X0_onehot, batch_size=batch_size) 90 | if score < uniform_score * 0.9: 91 | raise Exception('Somehow learned to copy timestep 0 despite masking 1, score %f' % score) 92 | 93 | # Another testing approach, just initialize models and make sure that prepending zeros doesn't affect 94 | # their output 95 | print("About to compile the second model") 96 | model2 = Sequential() 97 | model2.add(Embedding(5, 4, mask_zero=True)) 98 | model2.add(TimeDistributedDense(4, 4)) 99 | model2.add(Activation('time_distributed_softmax')) 100 | model2.add(LSTM(4, 4, return_sequences=True)) 101 | model2.add(Activation('tanh')) 102 | model2.add(GRU(4, 4, activation='softmax', return_sequences=True)) 103 | model2.add(SimpleDeepRNN(4, 4, depth=2, activation='relu', return_sequences=True)) 104 | model2.add(SimpleRNN(4, 4, activation='relu', return_sequences=True)) 105 | model2.compile(loss='categorical_crossentropy', 106 | optimizer='rmsprop', theano_mode=theano.compile.mode.FAST_RUN) 107 | print("Compiled model2") 108 | 109 | X2 = np.random.random_integers(1, 4, size=(2, 5)) 110 | y2 = np.random.random((X2.shape[0], X2.shape[1], 4)) 111 | 112 | ref = model2.predict(X2) 113 | ref_eval = model2.evaluate(X2, y2) 114 | mask = np.ones((y2.shape[0], y2.shape[1], 1)) 115 | 116 | for pre_zeros in range(1, 10): 117 | padded_X2 = np.concatenate((np.zeros((X2.shape[0], pre_zeros)), X2), axis=1) 118 | padded_mask = np.concatenate((np.zeros((mask.shape[0], pre_zeros, mask.shape[2])), mask), axis=1) 119 | padded_y2 = np.concatenate((np.zeros((y2.shape[0], pre_zeros, y2.shape[2])), y2), axis=1) 120 | 121 | pred = model2.predict(padded_X2) 122 | if not np.allclose(ref[:, -1, :], pred[:, -1, :]): 123 | raise Exception("Different result after left-padding %d zeros. Ref: %s, Pred: %s" % (pre_zeros, ref, pred)) 124 | 125 | pad_eval = model2.evaluate(padded_X2, padded_y2, weights=padded_mask) 126 | if not np.allclose([pad_eval], [ref_eval]): 127 | raise Exception("Got dissimilar categorical_crossentropy after left-padding %d zeros. Ref: %f, Pred %f" %\ 128 | (pref_eval, pred_val)) 129 | 130 | 131 | -------------------------------------------------------------------------------- /docs/sources/layers/convolutional.md: -------------------------------------------------------------------------------- 1 | 2 | ## Convolution1D 3 | 4 | ```python 5 | keras.layers.convolutional.Convolution1D(input_dim, nb_filter, filter_length, 6 | init='uniform', activation='linear', weights=None, 7 | border_mode='valid', subsample_length=1, 8 | W_regularizer=None, b_regularizer=None, W_constraint=None, 9 | b_constraint=None) 10 | ``` 11 | 12 | Convolution operator for filtering neighborhoods of one-dimensional inputs. 13 | 14 | 15 | - __Input shape__: 3D tensor with shape: `(nb_samples, steps, input_dim)`. 16 | 17 | - __Output shape__: 3D tensor with shape: `(nb_samples, steps, nb_filter)`. `steps` value might have changed due to padding. 18 | 19 | - __Arguments__: 20 | - __input_dim__: Number of channels/dimensions in the input. 21 | - __nb_filter__: Number of convolution kernels to use (dimensionality of the output). 22 | - __filter_length__: The extension (spatial or temporal) of each filter. 23 | - __init__: name of initialization function for the weights of the layer (see: [initializations](../initializations.md)), or alternatively, Theano function to use for weights initialization. This parameter is only relevant if you don't pass a `weights` argument. 24 | - __activation__: name of activation function to use (see: [activations](../activations.md)), or alternatively, elementwise Theano function. If you don't specify anything, no activation is applied (ie. "linear" activation: a(x) = x). 25 | - __weights__: list of numpy arrays to set as initial weights. 26 | - __border_mode__: 'valid' or 'full'. see scipy.signal.convolve2d. 27 | - __subsample_length__: factor by which to subsample output. 28 | - __W_regularizer__: instance of [WeightRegularizer](../regularizers.md) (eg. L1 or L2 regularization), applied to the main weights matrix. 29 | - __b_regularizer__: instance of [WeightRegularizer](../regularizers.md), applied to the bias. 30 | - __activity_regularizer__: instance of [ActivityRegularizer](../regularizers.md), applied to the network output. 31 | - __W_constraint__: instance of the [constraints](../constraints.md) module (eg. maxnorm, nonneg), applied to the main weights matrix. 32 | - __b_constraint__: instance of the [constraints](../constraints.md) module, applied to the bias. 33 | 34 | --- 35 | 36 | ## Convolution2D 37 | 38 | ```python 39 | keras.layers.convolutional.Convolution2D(nb_filter, stack_size, nb_row, nb_col, 40 | init='glorot_uniform', activation='linear', weights=None, 41 | border_mode='valid', subsample=(1, 1), 42 | W_regularizer=None, b_regularizer=None, W_constraint=None) 43 | ``` 44 | 45 | Convolution operator for filtering windows of two-dimensional inputs. 46 | 47 | - __Input shape__: 4D tensor with shape: `(nb_samples, stack_size, nb_row, nb_col)`. 48 | 49 | - __Output shape__: 4D tensor with shape: `(nb_samples, nb_filter, nb_row, nb_col)`. `nb_row`, `nb_col` might have changed due to padding. 50 | 51 | 52 | - __Arguments__: 53 | 54 | - __nb_filter__: Number of convolution kernels to use. 55 | - __stack_size__: Number of channels in the input. 56 | - __nb_row__: Number of rows in the convolution kernels 57 | - __nb_col__: Number of columns in the convolution kernels 58 | - __init__: name of initialization function for the weights of the layer (see: [initializations](../initializations.md)), or alternatively, Theano function to use for weights initialization. This parameter is only relevant if you don't pass a `weights` argument. 59 | - __activation__: name of activation function to use (see: [activations](../activations.md)), or alternatively, elementwise Theano function. If you don't specify anything, no activation is applied (ie. "linear" activation: a(x) = x). 60 | - __weights__: list of numpy arrays to set as initial weights. 61 | - __border_mode__: 'valid', 'full', or 'same'. [See scipy.signal.convolve2d](http://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.convolve2d.html). 62 | - __subsample__: tuple of length 2. Factor by which to subsample output. Also called strides elsewhere. 63 | - __W_regularizer__: instance of [WeightRegularizer](../regularizers.md) (eg. L1 or L2 regularization), applied to the main weights matrix. 64 | - __b_regularizer__: instance of [WeightRegularizer](../regularizers.md), applied to the bias. 65 | - __activity_regularizer__: instance of [ActivityRegularizer](../regularizers.md), applied to the network output. 66 | - __W_constraint__: instance of the [constraints](../constraints.md) module (eg. maxnorm, nonneg), applied to the main weights matrix. 67 | - __b_constraint__: instance of the [constraints](../constraints.md) module, applied to the bias. 68 | 69 | 70 | --- 71 | 72 | ## MaxPooling1D 73 | 74 | ```python 75 | keras.layers.convolutional.MaxPooling1D(pool_length=2, stride=None, ignore_border=True) 76 | ``` 77 | 78 | - __Input shape__: 3D tensor with shape: `(nb_samples, steps, dim)`. 79 | 80 | - __Output shape__: 3D tensor with shape: `(nb_samples, downsampled_steps, dim)`. 81 | 82 | - __Arguments__: 83 | 84 | - __pool_length__: factor by which to downscale. 2 will halve the input. 85 | - __stride__: integer or None. Stride value. 86 | - __ignore_border__: boolean. 87 | 88 | --- 89 | 90 | ## MaxPooling2D 91 | 92 | ```python 93 | keras.layers.convolutional.MaxPooling2D(poolsize=(2, 2), ignore_border=True) 94 | ``` 95 | 96 | - __Input shape__: 4D tensor with shape: `(nb_samples, stack_size, nb_row, nb_col)`. 97 | 98 | - __Output shape__: 4D tensor with shape: `(nb_samples, stack_size, new_nb_row, new_nb_col)`. 99 | 100 | - __Arguments__: 101 | 102 | - __pool_size__: factor by which to downscale (vertical ds, horizontal ds). (2, 2) will halve the image in each dimension. 103 | - __ignore_border__: boolean. When True, (5, 5) input with pool_size=(2, 2) will generate a (2, 2) output, (3, 3) otherwise. 104 | 105 | -------------------------------------------------------------------------------- /tests/auto/keras/layers/test_core.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | import theano 5 | 6 | from keras.layers import core 7 | 8 | 9 | class TestLayerBase(unittest.TestCase): 10 | def test_input_output(self): 11 | nb_samples = 10 12 | input_dim = 5 13 | layer = core.Layer() 14 | 15 | # As long as there is no input, an error should be raised. 16 | for train in [True, False]: 17 | self.assertRaises(AttributeError, layer.get_input, train) 18 | self.assertRaises(AttributeError, layer.get_output, train) 19 | 20 | # Once an input is provided, it should be reachable through the 21 | # appropriate getters 22 | input = np.ones((nb_samples, input_dim)) 23 | layer.input = theano.shared(value=input) 24 | for train in [True, False]: 25 | assert_allclose(layer.get_input(train).eval(), input) 26 | assert_allclose(layer.get_output(train).eval(), input) 27 | 28 | def test_connections(self): 29 | nb_samples = 10 30 | input_dim = 5 31 | layer1 = core.Layer() 32 | layer2 = core.Layer() 33 | 34 | input = np.ones((nb_samples, input_dim)) 35 | layer1.input = theano.shared(value=input) 36 | 37 | # As long as there is no previous layer, an error should be raised. 38 | for train in [True, False]: 39 | self.assertRaises(AttributeError, layer2.get_input, train) 40 | 41 | # After connecting, input of layer1 should be passed through 42 | layer2.set_previous(layer1) 43 | for train in [True, False]: 44 | assert_allclose(layer2.get_input(train).eval(), input) 45 | assert_allclose(layer2.get_output(train).eval(), input) 46 | 47 | 48 | class TestConfigParams(unittest.TestCase): 49 | """ 50 | Test the constructor, config and params functions of all layers in core. 51 | """ 52 | 53 | def _runner(self, layer): 54 | conf = layer.get_config() 55 | assert (type(conf) == dict) 56 | 57 | param = layer.get_params() 58 | # Typically a list or a tuple, but may be any iterable 59 | assert hasattr(param, '__iter__') 60 | 61 | def test_base(self): 62 | layer = core.Layer() 63 | self._runner(layer) 64 | 65 | def test_masked(self): 66 | layer = core.MaskedLayer() 67 | self._runner(layer) 68 | 69 | def test_merge(self): 70 | layer_1 = core.Layer() 71 | layer_2 = core.Layer() 72 | layer = core.Merge([layer_1, layer_2]) 73 | self._runner(layer) 74 | 75 | def test_dropout(self): 76 | layer = core.Dropout(0.5) 77 | self._runner(layer) 78 | 79 | def test_activation(self): 80 | layer = core.Activation('linear') 81 | self._runner(layer) 82 | 83 | def test_reshape(self): 84 | layer = core.Reshape(10, 10) 85 | self._runner(layer) 86 | 87 | def test_flatten(self): 88 | layer = core.Flatten() 89 | self._runner(layer) 90 | 91 | def test_repeat_vector(self): 92 | layer = core.RepeatVector(10) 93 | self._runner(layer) 94 | 95 | def test_dense(self): 96 | layer = core.Dense(10, 10) 97 | self._runner(layer) 98 | 99 | def test_act_reg(self): 100 | layer = core.ActivityRegularization(0.5, 0.5) 101 | self._runner(layer) 102 | 103 | def test_time_dist_dense(self): 104 | layer = core.TimeDistributedDense(10, 10) 105 | self._runner(layer) 106 | 107 | def test_autoencoder(self): 108 | layer_1 = core.Layer() 109 | layer_2 = core.Layer() 110 | 111 | layer = core.AutoEncoder(layer_1, layer_2) 112 | self._runner(layer) 113 | 114 | def test_maxout_dense(self): 115 | layer = core.MaxoutDense(10, 10) 116 | self._runner(layer) 117 | 118 | 119 | class TestMasking(unittest.TestCase): 120 | """Test the Masking class""" 121 | 122 | def test_sequences(self): 123 | """Test masking sequences with zeroes as padding""" 124 | # integer inputs, one per timestep, like embeddings 125 | layer = core.Masking() 126 | func = theano.function([layer.input], layer.get_output_mask()) 127 | self.assertTrue(np.all( 128 | # get mask for this input 129 | func(np.array( 130 | [[[1], [2], [3], [0]], 131 | [[0], [4], [5], [0]]], dtype=np.int32)) == 132 | # This is the expected output mask, one dimension less 133 | np.array([[1, 1, 1, 0], [0, 1, 1, 0]]))) 134 | 135 | def test_non_zero(self): 136 | """Test masking with non-zero mask value""" 137 | layer = core.Masking(5) 138 | func = theano.function([layer.input], layer.get_output_mask()) 139 | self.assertTrue(np.all( 140 | # get mask for this input, if not all the values are 5, shouldn't masked 141 | func(np.array( 142 | [[[1, 1], [2, 1], [3, 1], [5, 5]], 143 | [[1, 5], [5, 0], [0, 0], [0, 0]]], dtype=np.int32)) == 144 | # This is the expected output mask, one dimension less 145 | np.array([[1, 1, 1, 0], [1, 1, 1, 1]]))) 146 | 147 | def test_non_zero_output(self): 148 | """Test output of masking layer with non-zero mask value""" 149 | layer = core.Masking(5) 150 | func = theano.function([layer.input], layer.get_output()) 151 | self.assertTrue(np.all( 152 | # get output for this input, replace padding with 0 153 | func(np.array( 154 | [[[1, 1], [2, 1], [3, 1], [5, 5]], 155 | [[1, 5], [5, 0], [0, 0], [0, 0]]], dtype=np.int32)) == 156 | # This is the expected output 157 | np.array([[[1, 1], [2, 1], [3, 1], [0, 0]], 158 | [[1, 5], [5, 0], [0, 0], [0, 0]]]))) 159 | 160 | 161 | if __name__ == '__main__': 162 | unittest.main() 163 | -------------------------------------------------------------------------------- /tests/auto/test_loss_weighting.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import numpy as np 5 | np.random.seed(1336) # for reproducibility 6 | 7 | from keras.datasets import mnist 8 | from keras.models import Sequential, Graph 9 | from keras.layers.core import Dense, Activation 10 | from keras.utils import np_utils 11 | import unittest 12 | 13 | nb_classes = 10 14 | batch_size = 128 15 | nb_epoch = 5 16 | weighted_class = 9 17 | standard_weight = 1 18 | high_weight = 5 19 | max_train_samples = 5000 20 | max_test_samples = 1000 21 | 22 | # the data, shuffled and split between tran and test sets 23 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 24 | X_train = X_train.reshape(60000, 784)[:max_train_samples] 25 | X_test = X_test.reshape(10000, 784)[:max_test_samples] 26 | X_train = X_train.astype("float32") / 255 27 | X_test = X_test.astype("float32") / 255 28 | 29 | # convert class vectors to binary class matrices 30 | y_train = y_train[:max_train_samples] 31 | y_test = y_test[:max_test_samples] 32 | Y_train = np_utils.to_categorical(y_train, nb_classes) 33 | Y_test = np_utils.to_categorical(y_test, nb_classes) 34 | test_ids = np.where(y_test == np.array(weighted_class))[0] 35 | 36 | class_weight = dict([(i, standard_weight) for i in range(nb_classes)]) 37 | class_weight[weighted_class] = high_weight 38 | 39 | sample_weight = np.ones((y_train.shape[0])) * standard_weight 40 | sample_weight[y_train == weighted_class] = high_weight 41 | 42 | 43 | def create_sequential_model(): 44 | model = Sequential() 45 | model.add(Dense(784, 50)) 46 | model.add(Activation('relu')) 47 | model.add(Dense(50, 10)) 48 | model.add(Activation('softmax')) 49 | return model 50 | 51 | 52 | def create_graph_model(): 53 | model = Graph() 54 | model.add_input(name='input') 55 | model.add_node(Dense(784, 50, activation='relu'), name='d1', input='input') 56 | model.add_node(Dense(50, 10, activation='softmax'), name='d2', input='d1') 57 | model.add_output(name='output', input='d2') 58 | return model 59 | 60 | 61 | def _test_weights_sequential(model, class_weight=None, sample_weight=None): 62 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, 63 | class_weight=class_weight, sample_weight=sample_weight) 64 | model.train_on_batch(X_train[:32], Y_train[:32], 65 | class_weight=class_weight, sample_weight=sample_weight[:32] if sample_weight is not None else None) 66 | model.test_on_batch(X_train[:32], Y_train[:32], 67 | sample_weight=sample_weight[:32] if sample_weight is not None else None) 68 | score = model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0) 69 | return score 70 | 71 | 72 | def _test_weights_graph(model, class_weight=None, sample_weight=None): 73 | model.fit({'input': X_train, 'output': Y_train}, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, 74 | class_weight={'output': class_weight}, sample_weight={'output': sample_weight}) 75 | model.train_on_batch({'input': X_train[:32], 'output': Y_train[:32]}, 76 | class_weight={'output': class_weight}, sample_weight={'output': sample_weight[:32] if sample_weight is not None else None}) 77 | model.test_on_batch({'input': X_train[:32], 'output': Y_train[:32]}, 78 | sample_weight={'output': sample_weight[:32] if sample_weight is not None else None}) 79 | score = model.evaluate({'input': X_test[test_ids, :], 'output': Y_test[test_ids, :]}, verbose=0) 80 | return score 81 | 82 | 83 | class TestLossWeighting(unittest.TestCase): 84 | def test_sequential(self): 85 | for loss in ['mae', 'mse', 'categorical_crossentropy']: 86 | print('loss:', loss) 87 | print('sequential') 88 | # no weights: reference point 89 | model = create_sequential_model() 90 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 91 | standard_score = _test_weights_sequential(model) 92 | # test class_weight 93 | model = create_sequential_model() 94 | model.compile(loss=loss, optimizer='rmsprop') 95 | score = _test_weights_sequential(model, class_weight=class_weight) 96 | print('score:', score, ' vs.', standard_score) 97 | self.assertTrue(score < standard_score) 98 | # test sample_weight 99 | model = create_sequential_model() 100 | model.compile(loss=loss, optimizer='rmsprop') 101 | score = _test_weights_sequential(model, sample_weight=sample_weight) 102 | print('score:', score, ' vs.', standard_score) 103 | self.assertTrue(score < standard_score) 104 | 105 | def test_graph(self): 106 | for loss in ['mae', 'mse', 'categorical_crossentropy']: 107 | print('loss:', loss) 108 | print('graph') 109 | # no weights: reference point 110 | model = create_graph_model() 111 | model.compile(loss={'output': 'categorical_crossentropy'}, optimizer='rmsprop') 112 | standard_score = _test_weights_graph(model) 113 | # test class_weight 114 | model = create_graph_model() 115 | model.compile(loss={'output': 'categorical_crossentropy'}, optimizer='rmsprop') 116 | score = _test_weights_graph(model, class_weight=class_weight) 117 | print('score:', score, ' vs.', standard_score) 118 | self.assertTrue(score < standard_score) 119 | # test sample_weight 120 | model = create_graph_model() 121 | model.compile(loss={'output': 'categorical_crossentropy'}, optimizer='rmsprop') 122 | score = _test_weights_graph(model, sample_weight=sample_weight) 123 | print('score:', score, ' vs.', standard_score) 124 | self.assertTrue(score < standard_score) 125 | 126 | 127 | if __name__ == '__main__': 128 | print('Test class_weight and sample_weight') 129 | unittest.main() 130 | -------------------------------------------------------------------------------- /tests/manual/check_autoencoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | from keras.datasets import mnist 4 | from keras.models import Sequential, model_from_config 5 | from keras.layers.core import AutoEncoder, Dense, Activation, TimeDistributedDense, Flatten 6 | from keras.layers.recurrent import LSTM 7 | from keras.layers.embeddings import Embedding 8 | from keras.layers.core import Layer 9 | from keras.layers import containers 10 | from keras.utils import np_utils 11 | import numpy as np 12 | 13 | nb_classes = 10 14 | batch_size = 128 15 | nb_epoch = 5 16 | activation = 'linear' 17 | 18 | input_dim = 784 19 | hidden_dim = 392 20 | 21 | max_train_samples = 5000 22 | max_test_samples = 1000 23 | 24 | # the data, shuffled and split between tran and test sets 25 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 26 | 27 | X_train = X_train.reshape(60000, input_dim)[:max_train_samples] 28 | X_test = X_test.reshape(10000, input_dim)[:max_test_samples] 29 | X_train = X_train.astype("float32") 30 | X_test = X_test.astype("float32") 31 | X_train /= 255 32 | X_test /= 255 33 | 34 | # convert class vectors to binary class matrices 35 | Y_train = np_utils.to_categorical(y_train, nb_classes)[:max_train_samples] 36 | Y_test = np_utils.to_categorical(y_test, nb_classes)[:max_test_samples] 37 | 38 | print("X_train: ", X_train.shape) 39 | print("X_test: ", X_test.shape) 40 | 41 | 42 | ########################## 43 | # dense model test # 44 | ########################## 45 | 46 | print("Training classical fully connected layer for classification") 47 | model_classical = Sequential() 48 | model_classical.add(Dense(input_dim, 10, activation=activation)) 49 | model_classical.add(Activation('softmax')) 50 | model_classical.get_config(verbose=1) 51 | model_classical.compile(loss='categorical_crossentropy', optimizer='adam') 52 | model_classical.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_data=(X_test, Y_test)) 53 | classical_score = model_classical.evaluate(X_test, Y_test, verbose=0, show_accuracy=True) 54 | print('\nclassical_score:', classical_score) 55 | 56 | ########################## 57 | # autoencoder model test # 58 | ########################## 59 | 60 | 61 | def build_lstm_autoencoder(autoencoder, X_train, X_test): 62 | X_train = X_train[:, np.newaxis, :] 63 | X_test = X_test[:, np.newaxis, :] 64 | print("Modified X_train: ", X_train.shape) 65 | print("Modified X_test: ", X_test.shape) 66 | 67 | # The TimeDistributedDense isn't really necessary, however you need a lot of GPU memory to do 784x394-394x784 68 | autoencoder.add(TimeDistributedDense(input_dim, 16)) 69 | autoencoder.add(AutoEncoder(encoder=LSTM(16, 8, activation=activation, return_sequences=True), 70 | decoder=LSTM(8, input_dim, activation=activation, return_sequences=True), 71 | output_reconstruction=False)) 72 | return autoencoder, X_train, X_test 73 | 74 | 75 | def build_deep_classical_autoencoder(autoencoder): 76 | encoder = containers.Sequential([Dense(input_dim, hidden_dim, activation=activation), Dense(hidden_dim, hidden_dim/2, activation=activation)]) 77 | decoder = containers.Sequential([Dense(hidden_dim/2, hidden_dim, activation=activation), Dense(hidden_dim, input_dim, activation=activation)]) 78 | autoencoder.add(AutoEncoder(encoder=encoder, decoder=decoder, output_reconstruction=False)) 79 | return autoencoder 80 | 81 | # Try different things here: 'lstm' or 'classical' or 'denoising' 82 | # or 'deep_denoising' 83 | 84 | for autoencoder_type in ['classical', 'lstm']: 85 | print(autoencoder_type) 86 | print('-'*40) 87 | # Build our autoencoder model 88 | autoencoder = Sequential() 89 | if autoencoder_type == 'lstm': 90 | print("Training LSTM AutoEncoder") 91 | autoencoder, X_train, X_test = build_lstm_autoencoder(autoencoder, X_train, X_test) 92 | elif autoencoder_type == 'classical': 93 | print("Training Classical AutoEncoder") 94 | autoencoder = build_deep_classical_autoencoder(autoencoder) 95 | else: 96 | print("Error: unknown autoencoder type!") 97 | exit(-1) 98 | 99 | autoencoder.compile(loss='mean_squared_error', optimizer='adam') 100 | # Do NOT use validation data with return output_reconstruction=True 101 | autoencoder.fit(X_train, X_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=1) 102 | 103 | # Do an inference pass 104 | prefilter_train = autoencoder.predict(X_train, verbose=0) 105 | prefilter_test = autoencoder.predict(X_test, verbose=0) 106 | print("prefilter_train: ", prefilter_train.shape) 107 | print("prefilter_test: ", prefilter_test.shape) 108 | 109 | # Classify results from Autoencoder 110 | print("Building classical fully connected layer for classification") 111 | model = Sequential() 112 | if autoencoder_type == 'lstm': 113 | model.add(TimeDistributedDense(8, nb_classes, activation=activation)) 114 | model.add(Flatten()) 115 | elif autoencoder_type == 'classical': 116 | model.add(Dense(prefilter_train.shape[1], nb_classes, activation=activation)) 117 | else: 118 | model.add(Dense(prefilter_train.shape[1], nb_classes, activation=activation)) 119 | 120 | model.add(Activation('softmax')) 121 | 122 | model.get_config(verbose=1) 123 | model.compile(loss='categorical_crossentropy', optimizer='adam') 124 | model.fit(prefilter_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_data=(prefilter_test, Y_test)) 125 | 126 | score = model.evaluate(prefilter_test, Y_test, verbose=0, show_accuracy=True) 127 | print('\nscore:', score) 128 | 129 | print('Loss change:', (score[0] - classical_score[0])/classical_score[0], '%') 130 | print('Accuracy change:', (score[1] - classical_score[1])/classical_score[1], '%') 131 | 132 | # check serialization 133 | config = autoencoder.get_config(verbose=1) 134 | autoencoder = model_from_config(config) 135 | -------------------------------------------------------------------------------- /tests/auto/test_tasks.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | np.random.seed(1337) 4 | 5 | from keras.utils.test_utils import get_test_data 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Activation, TimeDistributedDense, Flatten 8 | from keras.layers.recurrent import GRU 9 | from keras.layers.convolutional import Convolution2D 10 | from keras.utils.np_utils import to_categorical 11 | import unittest 12 | 13 | 14 | class TestRegularizers(unittest.TestCase): 15 | def test_vector_clf(self): 16 | nb_hidden = 10 17 | 18 | print('vector classification data:') 19 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(10,), 20 | classification=True, nb_class=2) 21 | print('X_train:', X_train.shape) 22 | print('X_test:', X_test.shape) 23 | print('y_train:', y_train.shape) 24 | print('y_test:', y_test.shape) 25 | 26 | y_train = to_categorical(y_train) 27 | y_test = to_categorical(y_test) 28 | 29 | model = Sequential() 30 | model.add(Dense(X_train.shape[-1], nb_hidden)) 31 | model.add(Activation('relu')) 32 | model.add(Dense(nb_hidden, y_train.shape[-1])) 33 | model.add(Activation('softmax')) 34 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 35 | history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=2) 36 | print(history.history) 37 | self.assertTrue(history.history['val_acc'][-1] > 0.9) 38 | 39 | def test_vector_reg(self): 40 | nb_hidden = 10 41 | print('vector regression data:') 42 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(10,), output_shape=(2,), 43 | classification=False) 44 | print('X_train:', X_train.shape) 45 | print('X_test:', X_test.shape) 46 | print('y_train:', y_train.shape) 47 | print('y_test:', y_test.shape) 48 | 49 | model = Sequential() 50 | model.add(Dense(X_train.shape[-1], nb_hidden)) 51 | model.add(Activation('tanh')) 52 | model.add(Dense(nb_hidden, y_train.shape[-1])) 53 | model.compile(loss='hinge', optimizer='adagrad') 54 | history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2) 55 | self.assertTrue(history.history['val_loss'][-1] < 0.9) 56 | 57 | def test_temporal_clf(self): 58 | print('temporal classification data:') 59 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5,10), 60 | classification=True, nb_class=2) 61 | print('X_train:', X_train.shape) 62 | print('X_test:', X_test.shape) 63 | print('y_train:', y_train.shape) 64 | print('y_test:', y_test.shape) 65 | 66 | y_train = to_categorical(y_train) 67 | y_test = to_categorical(y_test) 68 | 69 | model = Sequential() 70 | model.add(GRU(X_train.shape[-1], y_train.shape[-1])) 71 | model.add(Activation('softmax')) 72 | model.compile(loss='categorical_crossentropy', optimizer='adadelta') 73 | history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=2) 74 | self.assertTrue(history.history['val_acc'][-1] > 0.9) 75 | 76 | def test_temporal_reg(self): 77 | print('temporal regression data:') 78 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5, 10), output_shape=(2,), 79 | classification=False) 80 | print('X_train:', X_train.shape) 81 | print('X_test:', X_test.shape) 82 | print('y_train:', y_train.shape) 83 | print('y_test:', y_test.shape) 84 | 85 | model = Sequential() 86 | model.add(GRU(X_train.shape[-1], y_train.shape[-1])) 87 | model.compile(loss='hinge', optimizer='adam') 88 | history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2) 89 | self.assertTrue(history.history['val_loss'][-1] < 0.8) 90 | 91 | def test_seq_to_seq(self): 92 | print('sequence to sequence data:') 93 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(5, 10), output_shape=(5, 10), 94 | classification=False) 95 | print('X_train:', X_train.shape) 96 | print('X_test:', X_test.shape) 97 | print('y_train:', y_train.shape) 98 | print('y_test:', y_test.shape) 99 | 100 | model = Sequential() 101 | model.add(TimeDistributedDense(X_train.shape[-1], y_train.shape[-1])) 102 | model.compile(loss='hinge', optimizer='rmsprop') 103 | history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), verbose=2) 104 | self.assertTrue(history.history['val_loss'][-1] < 0.75) 105 | 106 | def test_img_clf(self): 107 | print('image classification data:') 108 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, nb_test=200, input_shape=(3, 32, 32), 109 | classification=True, nb_class=2) 110 | print('X_train:', X_train.shape) 111 | print('X_test:', X_test.shape) 112 | print('y_train:', y_train.shape) 113 | print('y_test:', y_test.shape) 114 | 115 | y_train = to_categorical(y_train) 116 | y_test = to_categorical(y_test) 117 | 118 | model = Sequential() 119 | model.add(Convolution2D(32, 3, 32, 32)) 120 | model.add(Activation('sigmoid')) 121 | model.add(Flatten()) 122 | model.add(Dense(32, y_test.shape[-1])) 123 | model.add(Activation('softmax')) 124 | model.compile(loss='categorical_crossentropy', optimizer='sgd') 125 | history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, validation_data=(X_test, y_test), show_accuracy=True, verbose=2) 126 | self.assertTrue(history.history['val_acc'][-1] > 0.9) 127 | 128 | 129 | if __name__ == '__main__': 130 | print('Test different types of classification and regression tasks') 131 | unittest.main() 132 | -------------------------------------------------------------------------------- /examples/addition_rnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function 3 | from keras.models import Sequential, slice_X 4 | from keras.layers.core import Activation, Dense, RepeatVector 5 | from keras.layers import recurrent 6 | from sklearn.utils import shuffle 7 | import numpy as np 8 | 9 | """ 10 | An implementation of sequence to sequence learning for performing addition 11 | Input: "535+61" 12 | Output: "596" 13 | Padding is handled by using a repeated sentinel character (space) 14 | 15 | By default, the JZS1 recurrent neural network is used 16 | JZS1 was an "evolved" recurrent neural network performing well on arithmetic benchmark in: 17 | "An Empirical Exploration of Recurrent Network Architectures" 18 | http://jmlr.org/proceedings/papers/v37/jozefowicz15.pdf 19 | 20 | Input may optionally be inverted, shown to increase performance in many tasks in: 21 | "Learning to Execute" 22 | http://arxiv.org/abs/1410.4615 23 | and 24 | "Sequence to Sequence Learning with Neural Networks" 25 | http://papers.nips.cc/paper/5346-sequence-to-sequence-learning-with-neural-networks.pdf 26 | Theoretically it introduces shorter term dependencies between source and target. 27 | 28 | 29 | Two digits inverted: 30 | + One layer JZS1 (128 HN), 5k training examples = 99% train/test accuracy in 55 epochs 31 | 32 | Three digits inverted: 33 | + One layer JZS1 (128 HN), 50k training examples = 99% train/test accuracy in 100 epochs 34 | 35 | 36 | Four digits inverted: 37 | + One layer JZS1 (128 HN), 400k training examples = 99% train/test accuracy in 20 epochs 38 | 39 | 40 | Five digits inverted: 41 | + One layer JZS1 (128 HN), 550k training examples = 99% train/test accuracy in 30 epochs 42 | 43 | """ 44 | 45 | 46 | class CharacterTable(object): 47 | """ 48 | Given a set of characters: 49 | + Encode them to a one hot integer representation 50 | + Decode the one hot integer representation to their character output 51 | + Decode a vector of probabilties to their character output 52 | """ 53 | def __init__(self, chars, maxlen): 54 | self.chars = sorted(set(chars)) 55 | self.char_indices = dict((c, i) for i, c in enumerate(self.chars)) 56 | self.indices_char = dict((i, c) for i, c in enumerate(self.chars)) 57 | self.maxlen = maxlen 58 | 59 | def encode(self, C, maxlen=None): 60 | maxlen = maxlen if maxlen else self.maxlen 61 | X = np.zeros((maxlen, len(self.chars))) 62 | for i, c in enumerate(C): 63 | X[i, self.char_indices[c]] = 1 64 | return X 65 | 66 | def decode(self, X, calc_argmax=True): 67 | if calc_argmax: 68 | X = X.argmax(axis=-1) 69 | return ''.join(self.indices_char[x] for x in X) 70 | 71 | 72 | class colors: 73 | ok = '\033[92m' 74 | fail = '\033[91m' 75 | close = '\033[0m' 76 | 77 | # Parameters for the model and dataset 78 | TRAINING_SIZE = 50000 79 | DIGITS = 3 80 | INVERT = True 81 | # Try replacing JZS1 with LSTM, GRU, or SimpleRNN 82 | RNN = recurrent.JZS1 83 | HIDDEN_SIZE = 128 84 | BATCH_SIZE = 128 85 | LAYERS = 1 86 | MAXLEN = DIGITS + 1 + DIGITS 87 | 88 | chars = '0123456789+ ' 89 | ctable = CharacterTable(chars, MAXLEN) 90 | 91 | questions = [] 92 | expected = [] 93 | seen = set() 94 | print('Generating data...') 95 | while len(questions) < TRAINING_SIZE: 96 | f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in xrange(np.random.randint(1, DIGITS + 1)))) 97 | a, b = f(), f() 98 | # Skip any addition questions we've already seen 99 | # Also skip any such that X+Y == Y+X (hence the sorting) 100 | key = tuple(sorted((a, b))) 101 | if key in seen: 102 | continue 103 | seen.add(key) 104 | # Pad the data with spaces such that it is always MAXLEN 105 | q = '{}+{}'.format(a, b) 106 | query = q + ' ' * (MAXLEN - len(q)) 107 | ans = str(a + b) 108 | # Answers can be of maximum size DIGITS + 1 109 | ans += ' ' * (DIGITS + 1 - len(ans)) 110 | if INVERT: 111 | query = query[::-1] 112 | questions.append(query) 113 | expected.append(ans) 114 | print('Total addition questions:', len(questions)) 115 | 116 | print('Vectorization...') 117 | X = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool) 118 | y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool) 119 | for i, sentence in enumerate(questions): 120 | X[i] = ctable.encode(sentence, maxlen=MAXLEN) 121 | for i, sentence in enumerate(expected): 122 | y[i] = ctable.encode(sentence, maxlen=DIGITS + 1) 123 | 124 | # Shuffle (X, y) in unison as the later parts of X will almost all be larger digits 125 | X, y = shuffle(X, y) 126 | # Explicitly set apart 10% for validation data that we never train over 127 | split_at = len(X) - len(X) / 10 128 | (X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at)) 129 | (y_train, y_val) = (y[:split_at], y[split_at:]) 130 | 131 | print('Build model...') 132 | model = Sequential() 133 | # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE 134 | model.add(RNN(len(chars), HIDDEN_SIZE)) 135 | # For the decoder's input, we repeat the encoded input for each time step 136 | model.add(RepeatVector(DIGITS + 1)) 137 | # The decoder RNN could be multiple layers stacked or a single layer 138 | for _ in xrange(LAYERS): 139 | model.add(RNN(HIDDEN_SIZE, HIDDEN_SIZE, return_sequences=True)) 140 | # For each of step of the output sequence, decide which character should be chosen 141 | model.add(Dense(HIDDEN_SIZE, len(chars))) 142 | model.add(Activation('softmax')) 143 | 144 | model.compile(loss='categorical_crossentropy', optimizer='adam') 145 | 146 | # Train the model each generation and show predictions against the validation dataset 147 | for iteration in range(1, 200): 148 | print() 149 | print('-' * 50) 150 | print('Iteration', iteration) 151 | model.fit(X, y, batch_size=BATCH_SIZE, nb_epoch=1, validation_data=(X_val, y_val), show_accuracy=True) 152 | ### 153 | # Select 10 samples from the validation set at random so we can visualize errors 154 | for i in xrange(10): 155 | ind = np.random.randint(0, len(X_val)) 156 | rowX, rowy = X_val[np.array([ind])], y_val[np.array([ind])] 157 | preds = model.predict_classes(rowX, verbose=0) 158 | q = ctable.decode(rowX[0]) 159 | correct = ctable.decode(rowy[0]) 160 | guess = ctable.decode(preds[0], calc_argmax=False) 161 | print('Q', q[::-1] if INVERT else q) 162 | print('T', correct) 163 | print(colors.ok + '☑' + colors.close if correct == guess else colors.fail + '☒' + colors.close, guess) 164 | print('---') 165 | -------------------------------------------------------------------------------- /keras/preprocessing/text.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ''' 3 | These preprocessing utils would greatly benefit 4 | from a fast Cython rewrite. 5 | ''' 6 | from __future__ import absolute_import 7 | 8 | import string, sys 9 | import numpy as np 10 | from six.moves import range 11 | from six.moves import zip 12 | 13 | if sys.version_info < (3,): 14 | maketrans = string.maketrans 15 | else: 16 | maketrans = str.maketrans 17 | 18 | 19 | def base_filter(): 20 | f = string.punctuation 21 | f = f.replace("'", '') 22 | f += '\t\n' 23 | return f 24 | 25 | 26 | def text_to_word_sequence(text, filters=base_filter(), lower=True, split=" "): 27 | '''prune: sequence of characters to filter out 28 | ''' 29 | if lower: 30 | text = text.lower() 31 | text = text.translate(maketrans(filters, split*len(filters))) 32 | seq = text.split(split) 33 | return [_f for _f in seq if _f] 34 | 35 | 36 | def one_hot(text, n, filters=base_filter(), lower=True, split=" "): 37 | seq = text_to_word_sequence(text, filters=filters, lower=lower, split=split) 38 | return [(abs(hash(w)) % (n - 1) + 1) for w in seq] 39 | 40 | 41 | class Tokenizer(object): 42 | def __init__(self, nb_words=None, filters=base_filter(), lower=True, split=" "): 43 | self.word_counts = {} 44 | self.word_docs = {} 45 | self.filters = filters 46 | self.split = split 47 | self.lower = lower 48 | self.nb_words = nb_words 49 | self.document_count = 0 50 | 51 | def fit_on_texts(self, texts): 52 | ''' 53 | required before using texts_to_sequences or texts_to_matrix 54 | @param texts: can be a list or a generator (for memory-efficiency) 55 | ''' 56 | self.document_count = 0 57 | for text in texts: 58 | self.document_count += 1 59 | seq = text_to_word_sequence(text, self.filters, self.lower, self.split) 60 | for w in seq: 61 | if w in self.word_counts: 62 | self.word_counts[w] += 1 63 | else: 64 | self.word_counts[w] = 1 65 | for w in set(seq): 66 | if w in self.word_docs: 67 | self.word_docs[w] += 1 68 | else: 69 | self.word_docs[w] = 1 70 | 71 | wcounts = list(self.word_counts.items()) 72 | wcounts.sort(key=lambda x: x[1], reverse=True) 73 | sorted_voc = [wc[0] for wc in wcounts] 74 | self.word_index = dict(list(zip(sorted_voc, list(range(1, len(sorted_voc) + 1))))) 75 | 76 | self.index_docs = {} 77 | for w, c in list(self.word_docs.items()): 78 | self.index_docs[self.word_index[w]] = c 79 | 80 | def fit_on_sequences(self, sequences): 81 | ''' 82 | required before using sequences_to_matrix 83 | (if fit_on_texts was never called) 84 | ''' 85 | self.document_count = len(sequences) 86 | self.index_docs = {} 87 | for seq in sequences: 88 | seq = set(seq) 89 | for i in seq: 90 | if i not in self.index_docs: 91 | self.index_docs[i] = 1 92 | else: 93 | self.index_docs[i] += 1 94 | 95 | def texts_to_sequences(self, texts): 96 | ''' 97 | Transform each text in texts in a sequence of integers. 98 | Only top "nb_words" most frequent words will be taken into account. 99 | Only words known by the tokenizer will be taken into account. 100 | 101 | Returns a list of sequences. 102 | ''' 103 | res = [] 104 | for vect in self.texts_to_sequences_generator(texts): 105 | res.append(vect) 106 | return res 107 | 108 | def texts_to_sequences_generator(self, texts): 109 | ''' 110 | Transform each text in texts in a sequence of integers. 111 | Only top "nb_words" most frequent words will be taken into account. 112 | Only words known by the tokenizer will be taken into account. 113 | 114 | Yields individual sequences. 115 | ''' 116 | nb_words = self.nb_words 117 | for text in texts: 118 | seq = text_to_word_sequence(text, self.filters, self.lower, self.split) 119 | vect = [] 120 | for w in seq: 121 | i = self.word_index.get(w) 122 | if i is not None: 123 | if nb_words and i >= nb_words: 124 | pass 125 | else: 126 | vect.append(i) 127 | yield vect 128 | 129 | def texts_to_matrix(self, texts, mode="binary"): 130 | ''' 131 | modes: binary, count, tfidf, freq 132 | ''' 133 | sequences = self.texts_to_sequences(texts) 134 | return self.sequences_to_matrix(sequences, mode=mode) 135 | 136 | def sequences_to_matrix(self, sequences, mode="binary"): 137 | ''' 138 | modes: binary, count, tfidf, freq 139 | ''' 140 | if not self.nb_words: 141 | if self.word_index: 142 | nb_words = len(self.word_index) + 1 143 | else: 144 | raise Exception("Specify a dimension (nb_words argument), or fit on some text data first") 145 | else: 146 | nb_words = self.nb_words 147 | 148 | if mode == "tfidf" and not self.document_count: 149 | raise Exception("Fit the Tokenizer on some data before using tfidf mode") 150 | 151 | X = np.zeros((len(sequences), nb_words)) 152 | for i, seq in enumerate(sequences): 153 | if not seq: 154 | pass 155 | counts = {} 156 | for j in seq: 157 | if j >= nb_words: 158 | pass 159 | if j not in counts: 160 | counts[j] = 1. 161 | else: 162 | counts[j] += 1 163 | for j, c in list(counts.items()): 164 | if mode == "count": 165 | X[i][j] = c 166 | elif mode == "freq": 167 | X[i][j] = c / len(seq) 168 | elif mode == "binary": 169 | X[i][j] = 1 170 | elif mode == "tfidf": 171 | tf = np.log(c / len(seq)) 172 | df = (1 + np.log(1 + self.index_docs.get(j, 0) / (1 + self.document_count))) 173 | X[i][j] = tf / df 174 | else: 175 | raise Exception("Unknown vectorization mode: " + str(mode)) 176 | return X 177 | -------------------------------------------------------------------------------- /docs/sources/index.md: -------------------------------------------------------------------------------- 1 | # Keras: Theano-based Deep Learning library 2 | 3 | ## Overview 4 | 5 | Keras is a minimalist, highly modular neural network library in the spirit of Torch, written in Python, that uses [Theano](http://deeplearning.net/software/theano/) under the hood for optimized tensor manipulation on GPU and CPU. It was developed with a focus on enabling fast experimentation. 6 | 7 | Use Keras if you need a deep learning library that: 8 | 9 | - allows for easy and fast prototyping (through total modularity, minimalism, and extensibility). 10 | - supports both convolutional networks and recurrent networks, as well as combinations of the two. 11 | - supports arbitrary connectivity schemes (including multi-input and multi-output training). 12 | - runs seamlessly on CPU and GPU. 13 | 14 | ## Guiding principles 15 | 16 | - __Modularity.__ A model is understood as a sequence or a graph of standalone, fully-configurable modules that can be plugged together with as little restrictions as possible. In particular, neural layers, cost functions, optimizers, initialization schemes, activation functions, regularization schemes are all standalone modules that you can combine to create new models. 17 | 18 | - __Minimalism.__ Each module should be kept short and simple (<100 lines of code). Every piece of code should be transparent upon first reading. No black magic: it hurts iteration speed and ability to innovate. 19 | 20 | - __Easy extensibility.__ New modules are dead simple to add (as new classes/functions), and existing modules provide ample examples. To be able to easily create new modules allows for total expressiveness, making Keras suitable for advanced research. 21 | 22 | - __Work with Python__. No separate models configuration files in a declarative format (like in Caffe or PyLearn2). Models are described in Python code, which is compact, easier to debug, and allows for ease of extensibility. 23 | 24 | ## Code 25 | 26 | Find the code on Github: [fchollet/keras](https://github.com/fchollet/keras). 27 | 28 | ## License 29 | 30 | Keras is licensed under the [MIT license](http://opensource.org/licenses/MIT). 31 | 32 | ## Getting started: 30 seconds to Keras 33 | 34 | The core datastructure of Keras is a __model__, a way to organize layers. There are two types of models: [`Sequential`](/models/#sequential) and [`Graph`](/models/#graph). 35 | 36 | Here's the `Sequential` model (a linear pile of layers): 37 | 38 | ```python 39 | from keras.models import Sequential 40 | 41 | model = Sequential() 42 | ``` 43 | 44 | Stacking layers is as easy as `.add()`: 45 | 46 | ```python 47 | from keras.layers.core import Dense, Activation 48 | 49 | model.add(Dense(input_dim=100, output_dim=64, init="glorot_uniform")) 50 | model.add(Activation("relu")) 51 | model.add(Dense(input_dim=64, output_dim=10, init="glorot_uniform")) 52 | model.add(Activation("softmax")) 53 | ``` 54 | 55 | Once your model looks good, configure its learning process with `.compile()`: 56 | ```python 57 | model.compile(loss='categorical_crossentropy', optimizer='sgd') 58 | ``` 59 | 60 | If you need to, you can further configure your optimizer. A core principle of Keras is make things things reasonably simple, while allowing the user to be fully in control when they need to (the ultimate control being the easy extensibility of the source code). 61 | ```python 62 | from keras.optimizers import SGD 63 | model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.01, momentum=0.9, nesterov=True)) 64 | ``` 65 | 66 | You can now iterate on your training data in batches: 67 | ```python 68 | model.fit(X_train, Y_train, nb_epoch=5, batch_size=32) 69 | ``` 70 | 71 | Alternatively, you can feed batches to your model manually: 72 | ```python 73 | model.train_on_batch(X_batch, Y_batch) 74 | ``` 75 | 76 | Evaluate your performance in one line: 77 | ```python 78 | objective_score = model.evaluate(X_test, Y_test, batch_size=32) 79 | ``` 80 | 81 | Or generate predictions on new data: 82 | ```python 83 | classes = model.predict_classes(X_test, batch_size=32) 84 | proba = model.predict_proba(X_test, batch_size=32) 85 | ``` 86 | 87 | Building a network of LSTMs, a deep CNN, a Neural Turing Machine, a word2vec embedder or any other model is just as fast. The ideas behind deep learning are simple, so why should their implementation be painful? 88 | 89 | Have a look at the [examples](examples.md). 90 | 91 | ## Installation 92 | 93 | Keras uses the following dependencies: 94 | 95 | - __numpy__, __scipy__ 96 | - __pyyaml__ 97 | - __Theano__ 98 | - See [installation instructions](http://deeplearning.net/software/theano/install.html#install). 99 | - __HDF5__ and __h5py__ (optional, required if you use model saving/loading functions) 100 | - Optional but recommended if you use CNNs: __cuDNN__. 101 | 102 | Once you have the dependencies installed, clone the repo: 103 | ```bash 104 | git clone https://github.com/fchollet/keras.git 105 | ``` 106 | Go to the Keras folder and run the install command: 107 | ```bash 108 | cd keras 109 | sudo python setup.py install 110 | ``` 111 | You can also install Keras from PyPI: 112 | ``` 113 | sudo pip install keras 114 | ``` 115 | 116 | ## Support 117 | 118 | You can ask questions and join the development discussion on the [Keras Google group](https://groups.google.com/forum/#!forum/keras-users). 119 | 120 | ## Contribution Guidelines 121 | 122 | Keras welcomes all contributions from the community. 123 | 124 | - Keep a pragmatic mindset and avoid bloat. Only add to the source if that is the only path forward. 125 | - New features should be documented. Make sure you update the documentation along with your Pull Request. 126 | - The documentation for every new feature should include a usage example in the form of a code snippet. 127 | - All changes should be tested. Make sure any new feature you add has a corresponding unit test. 128 | - Please no Pull Requests about coding style. 129 | - Even if you don't contribute to the Keras source code, if you have an application of Keras that is concise and powerful, please consider adding it to our collection of [examples](https://github.com/fchollet/keras/tree/master/examples). 130 | 131 | 132 | ## Why this name, Keras? 133 | 134 | Keras (κέρας) means _horn_ in Greek. It is a reference to a literary image from ancient Greek and Latin literature, first found in the _Odyssey_, where dream spirits (_Oneiroi_, singular _Oneiros_) are divided between those who deceive men with false visions, who arrive to Earth through a gate of ivory, and those who announce a future that will come to pass, who arrive through a gate of horn. It's a play on the words κέρας (horn) / κραίνω (fulfill), and ἐλέφας (ivory) / ἐλεφαίρομαι (deceive). 135 | 136 | Keras was developed as part of the research effort of project __ONEIROS__ (*Open-ended Neuro-Electronic Intelligent Robot Operating System*). 137 | 138 | > _"Oneiroi are beyond our unravelling --who can be sure what tale they tell? Not all that men look for comes to pass. Two gates there are that give passage to fleeting Oneiroi; one is made of horn, one of ivory. The Oneiroi that pass through sawn ivory are deceitful, bearing a message that will not be fulfilled; those that come out through polished horn have truth behind them, to be accomplished for men who see them."_ 139 | 140 | > -- Homer, Odyssey 19. 562 ff (Shewring translation). 141 | --------------------------------------------------------------------------------