├── keras ├── legacy │ ├── __init__.py │ └── models.py ├── wrappers │ └── __init__.py ├── preprocessing │ └── __init__.py ├── objectives.py ├── datasets │ ├── __init__.py │ ├── mnist.py │ ├── cifar.py │ ├── boston_housing.py │ ├── fashion_mnist.py │ ├── cifar10.py │ ├── cifar100.py │ ├── reuters.py │ └── imdb.py ├── applications │ └── __init__.py ├── engine │ └── __init__.py ├── __init__.py ├── utils │ ├── __init__.py │ ├── np_utils.py │ ├── io_utils.py │ └── vis_utils.py ├── layers │ └── __init__.py ├── regularizers.py ├── metrics.py ├── activations.py ├── backend │ ├── __init__.py │ └── common.py └── losses.py ├── setup.cfg ├── docker ├── theanorc ├── Makefile ├── Dockerfile └── README.md ├── docs ├── templates │ ├── index.md │ ├── models │ │ ├── sequential.md │ │ ├── model.md │ │ └── about-keras-models.md │ ├── visualization.md │ ├── constraints.md │ ├── activations.md │ ├── initializers.md │ ├── layers │ │ ├── about-keras-layers.md │ │ └── writing-your-own-keras-layers.md │ ├── optimizers.md │ ├── losses.md │ ├── regularizers.md │ ├── metrics.md │ ├── scikit-learn-api.md │ ├── callbacks.md │ ├── preprocessing │ │ └── sequence.md │ └── backend.md ├── README.md └── mkdocs.yml ├── MANIFEST.in ├── .gitignore ├── pytest.ini ├── .github └── stale.yml ├── ISSUE_TEMPLATE.md ├── tests ├── keras │ ├── layers │ │ ├── advanced_activations_test.py │ │ ├── noise_test.py │ │ ├── embeddings_test.py │ │ ├── local_test.py │ │ └── normalization_test.py │ ├── utils │ │ ├── vis_utils_test.py │ │ ├── layer_utils_test.py │ │ ├── generic_utils_test.py │ │ └── io_utils_test.py │ ├── regularizers_test.py │ ├── preprocessing │ │ ├── text_test.py │ │ └── sequence_test.py │ ├── losses_test.py │ ├── constraints_test.py │ ├── datasets │ │ └── test_datasets.py │ ├── metrics_test.py │ └── optimizers_test.py ├── test_loss_masking.py ├── integration_tests │ ├── test_image_data_tasks.py │ └── test_vector_data_tasks.py └── test_dynamic_trainability.py ├── setup.py ├── examples ├── imdb_bidirectional_lstm.py ├── mnist_mlp.py ├── imdb_lstm.py ├── reuters_mlp.py ├── imdb_cnn_lstm.py ├── imdb_cnn.py ├── mnist_cnn.py ├── mnist_irnn.py ├── mnist_hierarchical_rnn.py ├── lstm_text_generation.py ├── antirectifier.py ├── mnist_sklearn_wrapper.py ├── mnist_transfer_cnn.py ├── variational_autoencoder.py ├── README.md ├── cifar10_cnn.py ├── conv_filter_visualization.py └── mnist_siamese.py ├── LICENSE └── .travis.yml /keras/legacy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /keras/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /keras/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /docker/theanorc: -------------------------------------------------------------------------------- 1 | [global] 2 | floatX = float32 3 | optimizer=None 4 | device = cuda 5 | 6 | -------------------------------------------------------------------------------- /docs/templates/index.md: -------------------------------------------------------------------------------- 1 | # Keras: The Python Deep Learning library 2 | 3 | {{autogenerated}} -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | graft docs 4 | graft examples 5 | graft tests 6 | -------------------------------------------------------------------------------- /keras/objectives.py: -------------------------------------------------------------------------------- 1 | """Legacy objectives module. 2 | 3 | Only kept for backwards API compatibility. 4 | """ 5 | from __future__ import absolute_import 6 | from .losses import * 7 | -------------------------------------------------------------------------------- /keras/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import mnist 4 | from . import imdb 5 | from . import reuters 6 | from . import cifar10 7 | from . import cifar100 8 | from . import boston_housing 9 | from . import fashion_mnist 10 | -------------------------------------------------------------------------------- /keras/applications/__init__.py: -------------------------------------------------------------------------------- 1 | from .vgg16 import VGG16 2 | from .vgg19 import VGG19 3 | from .resnet50 import ResNet50 4 | from .inception_v3 import InceptionV3 5 | from .inception_resnet_v2 import InceptionResNetV2 6 | from .xception import Xception 7 | from .mobilenet import MobileNet 8 | -------------------------------------------------------------------------------- /keras/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # note: topology.Node is an internal class, 2 | # it isn't meant to be used by Keras users. 3 | from .topology import InputSpec 4 | from .topology import Input 5 | from .topology import InputLayer 6 | from .topology import Layer 7 | from .topology import get_source_inputs 8 | from .training import Model 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.pyc 3 | *.swp 4 | temp/* 5 | dist/* 6 | build/* 7 | keras/datasets/data/* 8 | keras/datasets/temp/* 9 | docs/site/* 10 | docs/theme/* 11 | docs/sources/* 12 | tags 13 | Keras.egg-info 14 | examples/img/* 15 | 16 | # test-related 17 | .coverage 18 | .cache 19 | 20 | # developer environments 21 | .idea 22 | .vscode 23 | -------------------------------------------------------------------------------- /docs/templates/models/sequential.md: -------------------------------------------------------------------------------- 1 | # The Sequential model API 2 | 3 | To get started, read [this guide to the Keras Sequential model](/getting-started/sequential-model-guide). 4 | 5 | ## Useful attributes of Model 6 | 7 | - `model.layers` is a list of the layers added to the model. 8 | 9 | 10 | ---- 11 | 12 | ## Sequential model methods 13 | 14 | {{autogenerated}} -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Keras Documentation 2 | 3 | The source for Keras documentation is in this directory under `sources/`. 4 | Our documentation uses extended Markdown, as implemented by [MkDocs](http://mkdocs.org). 5 | 6 | ## Building the documentation 7 | 8 | - install MkDocs: `pip install mkdocs` 9 | - `cd` to the `docs/` folder and run: 10 | - `python autogen.py` 11 | - `mkdocs serve` # Starts a local webserver: [localhost:8000](localhost:8000) 12 | - `mkdocs build` # Builds a static site in "site" directory 13 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | # Configuration of py.test 2 | [pytest] 3 | addopts=-v 4 | -n 2 5 | --durations=10 6 | 7 | # Do not run tests in the build folder 8 | norecursedirs= build 9 | 10 | # PEP-8 The following are ignored: 11 | # E501 line too long (82 > 79 characters) 12 | # E402 module level import not at top of file - temporary measure to continue adding ros python packaged in sys.path 13 | # E731 do not assign a lambda expression, use a def 14 | 15 | pep8ignore=* E501 \ 16 | * E402 \ 17 | * E731 \ 18 | 19 | -------------------------------------------------------------------------------- /keras/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import utils 4 | from . import activations 5 | from . import applications 6 | from . import backend 7 | from . import datasets 8 | from . import engine 9 | from . import layers 10 | from . import preprocessing 11 | from . import wrappers 12 | from . import callbacks 13 | from . import constraints 14 | from . import initializers 15 | from . import metrics 16 | from . import models 17 | from . import losses 18 | from . import optimizers 19 | from . import regularizers 20 | # Importable from root because it's technically not a layer 21 | from .layers import Input 22 | 23 | __version__ = '2.0.8' 24 | -------------------------------------------------------------------------------- /keras/datasets/mnist.py: -------------------------------------------------------------------------------- 1 | from ..utils.data_utils import get_file 2 | import numpy as np 3 | 4 | 5 | def load_data(path='mnist.npz'): 6 | """Loads the MNIST dataset. 7 | 8 | # Arguments 9 | path: path where to cache the dataset locally 10 | (relative to ~/.keras/datasets). 11 | 12 | # Returns 13 | Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. 14 | """ 15 | path = get_file(path, 16 | origin='https://s3.amazonaws.com/img-datasets/mnist.npz', 17 | file_hash='8a61469f7ea1b51cbae51d4f78837e45') 18 | f = np.load(path) 19 | x_train, y_train = f['x_train'], f['y_train'] 20 | x_test, y_test = f['x_test'], f['y_test'] 21 | f.close() 22 | return (x_train, y_train), (x_test, y_test) 23 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Configuration for probot-stale - https://github.com/probot/stale 2 | 3 | # Number of days of inactivity before an Issue or Pull Request becomes stale 4 | daysUntilStale: 90 5 | # Number of days of inactivity before a stale Issue or Pull Request is closed 6 | daysUntilClose: 30 7 | # Issues or Pull Requests with these labels will never be considered stale 8 | exemptLabels: 9 | - bug 10 | - Announcement 11 | - help wanted 12 | - To investigate 13 | # Label to use when marking as stale 14 | staleLabel: stale 15 | # Comment to post when marking as stale. Set to `false` to disable 16 | markComment: > 17 | This issue has been automatically marked as stale because it has not had 18 | recent activity. It will be closed after 30 days if no further activity 19 | occurs, but feel free to re-open a closed issue if needed. 20 | -------------------------------------------------------------------------------- /docs/templates/visualization.md: -------------------------------------------------------------------------------- 1 | 2 | ## Model visualization 3 | 4 | The `keras.utils.vis_utils` module provides utility functions to plot 5 | a Keras model (using `graphviz`). 6 | 7 | This will plot a graph of the model and save it to a file: 8 | ```python 9 | from keras.utils import plot_model 10 | plot_model(model, to_file='model.png') 11 | ``` 12 | 13 | `plot_model` takes two optional arguments: 14 | 15 | - `show_shapes` (defaults to False) controls whether output shapes are shown in the graph. 16 | - `show_layer_names` (defaults to True) controls whether layer names are shown in the graph. 17 | 18 | You can also directly obtain the `pydot.Graph` object and render it yourself, 19 | for example to show it in an ipython notebook : 20 | ```python 21 | from IPython.display import SVG 22 | from keras.utils.vis_utils import model_to_dot 23 | 24 | SVG(model_to_dot(model).create(prog='dot', format='svg')) 25 | ``` 26 | -------------------------------------------------------------------------------- /docs/templates/constraints.md: -------------------------------------------------------------------------------- 1 | ## Usage of constraints 2 | 3 | Functions from the `constraints` module allow setting constraints (eg. non-negativity) on network parameters during optimization. 4 | 5 | The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `Conv1D`, `Conv2D` and `Conv3D` have a unified API. 6 | 7 | These layers expose 2 keyword arguments: 8 | 9 | - `kernel_constraint` for the main weights matrix 10 | - `bias_constraint` for the bias. 11 | 12 | 13 | ```python 14 | from keras.constraints import max_norm 15 | model.add(Dense(64, kernel_constraint=max_norm(2.))) 16 | ``` 17 | 18 | ## Available constraints 19 | 20 | - __max_norm(max_value=2, axis=0)__: maximum-norm constraint 21 | - __non_neg()__: non-negativity constraint 22 | - __unit_norm(axis=0)__: unit-norm constraint 23 | - __min_max_norm(min_value=0.0, max_value=1.0, rate=1.0, axis=0)__: minimum/maximum-norm constraint 24 | -------------------------------------------------------------------------------- /keras/datasets/cifar.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | import sys 4 | from six.moves import cPickle 5 | 6 | 7 | def load_batch(fpath, label_key='labels'): 8 | """Internal utility for parsing CIFAR data. 9 | 10 | # Arguments 11 | fpath: path the file to parse. 12 | label_key: key for label data in the retrieve 13 | dictionary. 14 | 15 | # Returns 16 | A tuple `(data, labels)`. 17 | """ 18 | f = open(fpath, 'rb') 19 | if sys.version_info < (3,): 20 | d = cPickle.load(f) 21 | else: 22 | d = cPickle.load(f, encoding='bytes') 23 | # decode utf8 24 | d_decoded = {} 25 | for k, v in d.items(): 26 | d_decoded[k.decode('utf8')] = v 27 | d = d_decoded 28 | f.close() 29 | data = d['data'] 30 | labels = d[label_key] 31 | 32 | data = data.reshape(data.shape[0], 3, 32, 32) 33 | return data, labels 34 | -------------------------------------------------------------------------------- /keras/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from . import np_utils 3 | from . import generic_utils 4 | from . import data_utils 5 | from . import io_utils 6 | from . import conv_utils 7 | 8 | # Globally-importable utils. 9 | from .io_utils import HDF5Matrix 10 | from .data_utils import get_file 11 | from .data_utils import Sequence 12 | from .data_utils import GeneratorEnqueuer 13 | from .data_utils import OrderedEnqueuer 14 | from .generic_utils import CustomObjectScope 15 | from .generic_utils import custom_object_scope 16 | from .generic_utils import get_custom_objects 17 | from .generic_utils import serialize_keras_object 18 | from .generic_utils import deserialize_keras_object 19 | from .generic_utils import Progbar 20 | from .layer_utils import convert_all_kernels_in_model 21 | from .vis_utils import plot_model 22 | from .np_utils import to_categorical 23 | from .np_utils import normalize 24 | from .training_utils import multi_gpu_model 25 | -------------------------------------------------------------------------------- /keras/legacy/models.py: -------------------------------------------------------------------------------- 1 | from .layers import Merge 2 | 3 | 4 | def needs_legacy_support(model): 5 | return isinstance(model.layers[0], Merge) 6 | 7 | 8 | def legacy_sequential_layers(model): 9 | layers = [] 10 | if model.layers: 11 | if isinstance(model.layers[0], Merge): 12 | merge = model.layers[0] 13 | for layer in merge.layers: 14 | if hasattr(layer, 'layers'): 15 | for sublayer in layer.layers: 16 | if sublayer not in layers: 17 | layers.append(sublayer) 18 | else: 19 | if layer not in layers: 20 | layers.append(layer) 21 | else: 22 | if model.layers[0] not in layers: 23 | layers.append(model.layers[0]) 24 | for layer in model.layers[1:]: 25 | if layer not in layers: 26 | layers.append(layer) 27 | return layers 28 | -------------------------------------------------------------------------------- /docker/Makefile: -------------------------------------------------------------------------------- 1 | help: 2 | @cat Makefile 3 | 4 | DATA?="${HOME}/Data" 5 | GPU?=0 6 | DOCKER_FILE=Dockerfile 7 | DOCKER=GPU=$(GPU) nvidia-docker 8 | BACKEND=tensorflow 9 | PYTHON_VERSION?=3.5 10 | CUDA_VERSION?=8.0 11 | CUDNN_VERSION?=6 12 | TEST=tests/ 13 | SRC?=$(shell dirname `pwd`) 14 | 15 | build: 16 | docker build -t keras --build-arg python_version=$(PYTHON_VERSION) --build-arg cuda_version=$(CUDA_VERSION) --build-arg cudnn_version=$(CUDNN_VERSION) -f $(DOCKER_FILE) . 17 | 18 | bash: build 19 | $(DOCKER) run -it -v $(SRC):/src/workspace -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras bash 20 | 21 | ipython: build 22 | $(DOCKER) run -it -v $(SRC):/src/workspace -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras ipython 23 | 24 | notebook: build 25 | $(DOCKER) run -it -v $(SRC):/src/workspace -v $(DATA):/data --net=host --env KERAS_BACKEND=$(BACKEND) keras 26 | 27 | test: build 28 | $(DOCKER) run -it -v $(SRC):/src/workspace -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras py.test $(TEST) 29 | 30 | -------------------------------------------------------------------------------- /docs/templates/models/model.md: -------------------------------------------------------------------------------- 1 | # Model class API 2 | 3 | In the functional API, given some input tensor(s) and output tensor(s), you can instantiate a `Model` via: 4 | 5 | ```python 6 | from keras.models import Model 7 | from keras.layers import Input, Dense 8 | 9 | a = Input(shape=(32,)) 10 | b = Dense(32)(a) 11 | model = Model(inputs=a, outputs=b) 12 | ``` 13 | 14 | This model will include all layers required in the computation of `b` given `a`. 15 | 16 | In the case of multi-input or multi-output models, you can use lists as well: 17 | 18 | ```python 19 | model = Model(inputs=[a1, a2], outputs=[b1, b3, b3]) 20 | ``` 21 | 22 | For a detailed introduction of what `Model` can do, read [this guide to the Keras functional API](/getting-started/functional-api-guide). 23 | 24 | ## Useful attributes of Model 25 | 26 | - `model.layers` is a flattened list of the layers comprising the model graph. 27 | - `model.inputs` is the list of input tensors. 28 | - `model.outputs` is the list of output tensors. 29 | 30 | ## Methods 31 | 32 | {{autogenerated}} 33 | -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Please make sure that the boxes below are checked before you submit your issue. If your issue is an implementation question, please ask your question on [StackOverflow](http://stackoverflow.com/questions/tagged/keras) or [join the Keras Slack channel](https://keras-slack-autojoin.herokuapp.com/) and ask there instead of filing a GitHub issue. 2 | 3 | Thank you! 4 | 5 | - [ ] Check that you are up-to-date with the master branch of Keras. You can update with: 6 | pip install git+git://github.com/fchollet/keras.git --upgrade --no-deps 7 | 8 | - [ ] If running on TensorFlow, check that you are up-to-date with the latest version. The installation instructions can be found [here](https://www.tensorflow.org/get_started/os_setup). 9 | 10 | - [ ] If running on Theano, check that you are up-to-date with the master branch of Theano. You can update with: 11 | pip install git+git://github.com/Theano/Theano.git --upgrade --no-deps 12 | 13 | - [ ] Provide a link to a GitHub Gist of a Python script that can reproduce your issue (or just copy the script here if it is short). 14 | -------------------------------------------------------------------------------- /docs/templates/activations.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of activations 3 | 4 | Activations can either be used through an `Activation` layer, or through the `activation` argument supported by all forward layers: 5 | 6 | ```python 7 | from keras.layers import Activation, Dense 8 | 9 | model.add(Dense(64)) 10 | model.add(Activation('tanh')) 11 | ``` 12 | 13 | This is equivalent to: 14 | 15 | ```python 16 | model.add(Dense(64, activation='tanh')) 17 | ``` 18 | 19 | You can also pass an element-wise TensorFlow/Theano/CNTK function as an activation: 20 | 21 | ```python 22 | from keras import backend as K 23 | 24 | model.add(Dense(64, activation=K.tanh)) 25 | model.add(Activation(K.tanh)) 26 | ``` 27 | 28 | ## Available activations 29 | 30 | {{autogenerated}} 31 | 32 | ## On "Advanced Activations" 33 | 34 | Activations that are more complex than a simple TensorFlow/Theano/CNTK function (eg. learnable activations, which maintain a state) are available as [Advanced Activation layers](layers/advanced-activations.md), and can be found in the module `keras.layers.advanced_activations`. These include `PReLU` and `LeakyReLU`. 35 | -------------------------------------------------------------------------------- /tests/keras/layers/advanced_activations_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from keras.utils.test_utils import layer_test 3 | from keras.utils.test_utils import keras_test 4 | from keras import layers 5 | 6 | 7 | @keras_test 8 | def test_leaky_relu(): 9 | for alpha in [0., .5, -1.]: 10 | layer_test(layers.LeakyReLU, kwargs={'alpha': alpha}, 11 | input_shape=(2, 3, 4)) 12 | 13 | 14 | @keras_test 15 | def test_prelu(): 16 | layer_test(layers.PReLU, kwargs={}, 17 | input_shape=(2, 3, 4)) 18 | 19 | 20 | @keras_test 21 | def test_prelu_share(): 22 | layer_test(layers.PReLU, kwargs={'shared_axes': 1}, 23 | input_shape=(2, 3, 4)) 24 | 25 | 26 | @keras_test 27 | def test_elu(): 28 | for alpha in [0., .5, -1.]: 29 | layer_test(layers.ELU, kwargs={'alpha': alpha}, 30 | input_shape=(2, 3, 4)) 31 | 32 | 33 | @keras_test 34 | def test_thresholded_relu(): 35 | layer_test(layers.ThresholdedReLU, kwargs={'theta': 0.5}, 36 | input_shape=(2, 3, 4)) 37 | 38 | 39 | if __name__ == '__main__': 40 | pytest.main([__file__]) 41 | -------------------------------------------------------------------------------- /tests/keras/layers/noise_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from keras.utils.test_utils import layer_test 3 | from keras.utils.test_utils import keras_test 4 | from keras.layers import noise 5 | from keras import backend as K 6 | 7 | 8 | @keras_test 9 | @pytest.mark.skipif((K.backend() == 'cntk'), 10 | reason="cntk does not support it yet") 11 | def test_GaussianNoise(): 12 | layer_test(noise.GaussianNoise, 13 | kwargs={'stddev': 1.}, 14 | input_shape=(3, 2, 3)) 15 | 16 | 17 | @keras_test 18 | @pytest.mark.skipif((K.backend() == 'cntk'), 19 | reason="cntk does not support it yet") 20 | def test_GaussianDropout(): 21 | layer_test(noise.GaussianDropout, 22 | kwargs={'rate': 0.5}, 23 | input_shape=(3, 2, 3)) 24 | 25 | 26 | @keras_test 27 | @pytest.mark.skipif((K.backend() == 'cntk'), 28 | reason="cntk does not support it yet") 29 | def test_AlphaDropout(): 30 | layer_test(noise.AlphaDropout, 31 | kwargs={'rate': 0.1}, 32 | input_shape=(3, 2, 3)) 33 | 34 | 35 | if __name__ == '__main__': 36 | pytest.main([__file__]) 37 | -------------------------------------------------------------------------------- /tests/keras/utils/vis_utils_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | import sys 4 | import numpy as np 5 | from keras.layers import Conv2D 6 | from keras.layers import Dense 7 | from keras.layers import Flatten 8 | from keras.layers import LSTM 9 | from keras.layers import TimeDistributed 10 | from keras.models import Sequential 11 | from keras.utils import vis_utils 12 | 13 | 14 | @pytest.mark.skipif(sys.version_info > (3, 0), reason='pydot-ng currently supports python 3.4') 15 | def test_plot_model(): 16 | model = Sequential() 17 | model.add(Conv2D(filters=2, kernel_size=(2, 3), input_shape=(3, 5, 5), name='conv')) 18 | model.add(Flatten(name='flat')) 19 | model.add(Dense(5, name='dense1')) 20 | vis_utils.plot_model(model, to_file='model1.png', show_layer_names=False) 21 | os.remove('model1.png') 22 | 23 | model = Sequential() 24 | model.add(LSTM(16, return_sequences=True, input_shape=(2, 3), name='lstm')) 25 | model.add(TimeDistributed(Dense(5, name='dense2'))) 26 | vis_utils.plot_model(model, to_file='model2.png', show_shapes=True) 27 | os.remove('model2.png') 28 | 29 | 30 | if __name__ == '__main__': 31 | pytest.main([__file__]) 32 | -------------------------------------------------------------------------------- /keras/utils/np_utils.py: -------------------------------------------------------------------------------- 1 | """Numpy-related utilities.""" 2 | from __future__ import absolute_import 3 | 4 | import numpy as np 5 | 6 | 7 | def to_categorical(y, num_classes=None): 8 | """Converts a class vector (integers) to binary class matrix. 9 | 10 | E.g. for use with categorical_crossentropy. 11 | 12 | # Arguments 13 | y: class vector to be converted into a matrix 14 | (integers from 0 to num_classes). 15 | num_classes: total number of classes. 16 | 17 | # Returns 18 | A binary matrix representation of the input. 19 | """ 20 | y = np.array(y, dtype='int').ravel() 21 | if not num_classes: 22 | num_classes = np.max(y) + 1 23 | n = y.shape[0] 24 | categorical = np.zeros((n, num_classes)) 25 | categorical[np.arange(n), y] = 1 26 | return categorical 27 | 28 | 29 | def normalize(x, axis=-1, order=2): 30 | """Normalizes a Numpy array. 31 | 32 | # Arguments 33 | x: Numpy array to normalize. 34 | axis: axis along which to normalize. 35 | order: Normalization order (e.g. 2 for L2 norm). 36 | 37 | # Returns 38 | A normalized copy of the array. 39 | """ 40 | l2 = np.atleast_1d(np.linalg.norm(x, order, axis)) 41 | l2[l2 == 0] = 1 42 | return x / np.expand_dims(l2, axis) 43 | -------------------------------------------------------------------------------- /tests/keras/layers/embeddings_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from keras.utils.test_utils import layer_test, keras_test 3 | from keras.layers.embeddings import Embedding 4 | import keras.backend as K 5 | 6 | 7 | @keras_test 8 | def test_embedding(): 9 | layer_test(Embedding, 10 | kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2}, 11 | input_shape=(3, 2), 12 | input_dtype='int32', 13 | expected_output_dtype=K.floatx()) 14 | layer_test(Embedding, 15 | kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True}, 16 | input_shape=(3, 2), 17 | input_dtype='int32', 18 | expected_output_dtype=K.floatx()) 19 | layer_test(Embedding, 20 | kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True}, 21 | input_shape=(3, 2, 5), 22 | input_dtype='int32', 23 | expected_output_dtype=K.floatx()) 24 | layer_test(Embedding, 25 | kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, 'input_length': (None, 5)}, 26 | input_shape=(3, 2, 5), 27 | input_dtype='int32', 28 | expected_output_dtype=K.floatx()) 29 | 30 | 31 | if __name__ == '__main__': 32 | pytest.main([__file__]) 33 | -------------------------------------------------------------------------------- /keras/datasets/boston_housing.py: -------------------------------------------------------------------------------- 1 | from ..utils.data_utils import get_file 2 | import numpy as np 3 | 4 | 5 | def load_data(path='boston_housing.npz', seed=113, test_split=0.2): 6 | """Loads the Boston Housing dataset. 7 | 8 | # Arguments 9 | path: path where to cache the dataset locally 10 | (relative to ~/.keras/datasets). 11 | seed: Random seed for shuffling the data 12 | before computing the test split. 13 | test_split: fraction of the data to reserve as test set. 14 | 15 | # Returns 16 | Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. 17 | """ 18 | assert 0 <= test_split < 1 19 | path = get_file(path, 20 | origin='https://s3.amazonaws.com/keras-datasets/boston_housing.npz', 21 | file_hash='f553886a1f8d56431e820c5b82552d9d95cfcb96d1e678153f8839538947dff5') 22 | f = np.load(path) 23 | x = f['x'] 24 | y = f['y'] 25 | f.close() 26 | 27 | np.random.seed(seed) 28 | np.random.shuffle(x) 29 | np.random.seed(seed) 30 | np.random.shuffle(y) 31 | 32 | x_train = np.array(x[:int(len(x) * (1 - test_split))]) 33 | y_train = np.array(y[:int(len(x) * (1 - test_split))]) 34 | x_test = np.array(x[int(len(x) * (1 - test_split)):]) 35 | y_test = np.array(y[int(len(x) * (1 - test_split)):]) 36 | return (x_train, y_train), (x_test, y_test) 37 | -------------------------------------------------------------------------------- /keras/datasets/fashion_mnist.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import os 3 | 4 | from ..utils.data_utils import get_file 5 | import numpy as np 6 | 7 | 8 | def load_data(): 9 | """Loads the Fashion-MNIST dataset. 10 | 11 | # Returns 12 | Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. 13 | """ 14 | dirname = os.path.join('datasets', 'fashion-mnist') 15 | base = 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/' 16 | files = ['train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz', 17 | 't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz'] 18 | 19 | paths = [] 20 | for file in files: 21 | paths.append(get_file(file, origin=base + file, cache_subdir=dirname)) 22 | 23 | with gzip.open(paths[0], 'rb') as lbpath: 24 | y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8) 25 | 26 | with gzip.open(paths[1], 'rb') as imgpath: 27 | x_train = np.frombuffer(imgpath.read(), np.uint8, 28 | offset=16).reshape(len(y_train), 28, 28) 29 | 30 | with gzip.open(paths[2], 'rb') as lbpath: 31 | y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8) 32 | 33 | with gzip.open(paths[3], 'rb') as imgpath: 34 | x_test = np.frombuffer(imgpath.read(), np.uint8, 35 | offset=16).reshape(len(y_test), 28, 28) 36 | 37 | return (x_train, y_train), (x_test, y_test) 38 | -------------------------------------------------------------------------------- /keras/datasets/cifar10.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .cifar import load_batch 3 | from ..utils.data_utils import get_file 4 | from .. import backend as K 5 | import numpy as np 6 | import os 7 | 8 | 9 | def load_data(): 10 | """Loads CIFAR10 dataset. 11 | 12 | # Returns 13 | Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. 14 | """ 15 | dirname = 'cifar-10-batches-py' 16 | origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' 17 | path = get_file(dirname, origin=origin, untar=True) 18 | 19 | num_train_samples = 50000 20 | 21 | x_train = np.zeros((num_train_samples, 3, 32, 32), dtype='uint8') 22 | y_train = np.zeros((num_train_samples,), dtype='uint8') 23 | 24 | for i in range(1, 6): 25 | fpath = os.path.join(path, 'data_batch_' + str(i)) 26 | data, labels = load_batch(fpath) 27 | x_train[(i - 1) * 10000: i * 10000, :, :, :] = data 28 | y_train[(i - 1) * 10000: i * 10000] = labels 29 | 30 | fpath = os.path.join(path, 'test_batch') 31 | x_test, y_test = load_batch(fpath) 32 | 33 | y_train = np.reshape(y_train, (len(y_train), 1)) 34 | y_test = np.reshape(y_test, (len(y_test), 1)) 35 | 36 | if K.image_data_format() == 'channels_last': 37 | x_train = x_train.transpose(0, 2, 3, 1) 38 | x_test = x_test.transpose(0, 2, 3, 1) 39 | 40 | return (x_train, y_train), (x_test, y_test) 41 | -------------------------------------------------------------------------------- /docs/templates/initializers.md: -------------------------------------------------------------------------------- 1 | ## Usage of initializers 2 | 3 | Initializations define the way to set the initial random weights of Keras layers. 4 | 5 | The keyword arguments used for passing initializers to layers will depend on the layer. Usually it is simply `kernel_initializer` and `bias_initializer`: 6 | 7 | ```python 8 | model.add(Dense(64, 9 | kernel_initializer='random_uniform', 10 | bias_initializer='zeros')) 11 | ``` 12 | 13 | ## Available initializers 14 | 15 | The following built-in initializers are available as part of the `keras.initializers` module: 16 | 17 | {{autogenerated}} 18 | 19 | 20 | An initializer may be passed as a string (must match one of the available initializers above), or as a callable: 21 | 22 | ```python 23 | from keras import initializers 24 | 25 | model.add(Dense(64, kernel_initializer=initializers.random_normal(stddev=0.01))) 26 | 27 | # also works; will use the default parameters. 28 | model.add(Dense(64, kernel_initializer='random_normal')) 29 | ``` 30 | 31 | 32 | ## Using custom initializers 33 | 34 | If passing a custom callable, then it must take the argument `shape` (shape of the variable to initialize) and `dtype` (dtype of generated values): 35 | 36 | ```python 37 | from keras import backend as K 38 | 39 | def my_init(shape, dtype=None): 40 | return K.random_normal(shape, dtype=dtype) 41 | 42 | model.add(Dense(64, kernel_initializer=my_init)) 43 | ``` 44 | -------------------------------------------------------------------------------- /keras/datasets/cifar100.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .cifar import load_batch 3 | from ..utils.data_utils import get_file 4 | from .. import backend as K 5 | import numpy as np 6 | import os 7 | 8 | 9 | def load_data(label_mode='fine'): 10 | """Loads CIFAR100 dataset. 11 | 12 | # Arguments 13 | label_mode: one of "fine", "coarse". 14 | 15 | # Returns 16 | Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. 17 | 18 | # Raises 19 | ValueError: in case of invalid `label_mode`. 20 | """ 21 | if label_mode not in ['fine', 'coarse']: 22 | raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.') 23 | 24 | dirname = 'cifar-100-python' 25 | origin = 'http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' 26 | path = get_file(dirname, origin=origin, untar=True) 27 | 28 | fpath = os.path.join(path, 'train') 29 | x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') 30 | 31 | fpath = os.path.join(path, 'test') 32 | x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') 33 | 34 | y_train = np.reshape(y_train, (len(y_train), 1)) 35 | y_test = np.reshape(y_test, (len(y_test), 1)) 36 | 37 | if K.image_data_format() == 'channels_last': 38 | x_train = x_train.transpose(0, 2, 3, 1) 39 | x_test = x_test.transpose(0, 2, 3, 1) 40 | 41 | return (x_train, y_train), (x_test, y_test) 42 | -------------------------------------------------------------------------------- /docs/templates/layers/about-keras-layers.md: -------------------------------------------------------------------------------- 1 | # About Keras layers 2 | 3 | All Keras layers have a number of methods in common: 4 | 5 | - `layer.get_weights()`: returns the weights of the layer as a list of Numpy arrays. 6 | - `layer.set_weights(weights)`: sets the weights of the layer from a list of Numpy arrays (with the same shapes as the output of `get_weights`). 7 | - `layer.get_config()`: returns a dictionary containing the configuration of the layer. The layer can be reinstantiated from its config via: 8 | 9 | ```python 10 | layer = Dense(32) 11 | config = layer.get_config() 12 | reconstructed_layer = Dense.from_config(config) 13 | ``` 14 | 15 | Or: 16 | 17 | ```python 18 | from keras import layers 19 | 20 | config = layer.get_config() 21 | layer = layers.deserialize({'class_name': layer.__class__.__name__, 22 | 'config': config}) 23 | ``` 24 | 25 | If a layer has a single node (i.e. if it isn't a shared layer), you can get its input tensor, output tensor, input shape and output shape via: 26 | 27 | - `layer.input` 28 | - `layer.output` 29 | - `layer.input_shape` 30 | - `layer.output_shape` 31 | 32 | If the layer has multiple nodes (see: [the concept of layer node and shared layers](/getting-started/functional-api-guide/#the-concept-of-layer-node)), you can use the following methods: 33 | 34 | - `layer.get_input_at(node_index)` 35 | - `layer.get_output_at(node_index)` 36 | - `layer.get_input_shape_at(node_index)` 37 | - `layer.get_output_shape_at(node_index)` -------------------------------------------------------------------------------- /tests/test_loss_masking.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from keras.models import Sequential 5 | from keras.engine.training import _weighted_masked_objective 6 | from keras.layers import TimeDistributed, Masking, Dense 7 | from keras.utils.test_utils import keras_test 8 | from keras import losses 9 | from keras import backend as K 10 | 11 | 12 | @keras_test 13 | def test_masking(): 14 | np.random.seed(1337) 15 | x = np.array([[[1], [1]], 16 | [[0], [0]]]) 17 | model = Sequential() 18 | model.add(Masking(mask_value=0, input_shape=(2, 1))) 19 | model.add(TimeDistributed(Dense(1, kernel_initializer='one'))) 20 | model.compile(loss='mse', optimizer='sgd') 21 | y = np.array([[[1], [1]], 22 | [[1], [1]]]) 23 | loss = model.train_on_batch(x, y) 24 | assert loss == 0 25 | 26 | 27 | @keras_test 28 | def test_loss_masking(): 29 | weighted_loss = _weighted_masked_objective(losses.get('mae')) 30 | shape = (3, 4, 2) 31 | x = np.arange(24).reshape(shape) 32 | y = 2 * x 33 | 34 | # Normally the trailing 1 is added by standardize_weights 35 | weights = np.ones((3,)) 36 | mask = np.ones((3, 4)) 37 | mask[1, 0] = 0 38 | 39 | out = K.eval(weighted_loss(K.variable(x), 40 | K.variable(y), 41 | K.variable(weights), 42 | K.variable(mask))) 43 | 44 | 45 | if __name__ == '__main__': 46 | pytest.main([__file__]) 47 | -------------------------------------------------------------------------------- /docs/templates/optimizers.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of optimizers 3 | 4 | An optimizer is one of the two arguments required for compiling a Keras model: 5 | 6 | ```python 7 | from keras import optimizers 8 | 9 | model = Sequential() 10 | model.add(Dense(64, kernel_initializer='uniform', input_shape=(10,))) 11 | model.add(Activation('tanh')) 12 | model.add(Activation('softmax')) 13 | 14 | sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) 15 | model.compile(loss='mean_squared_error', optimizer=sgd) 16 | ``` 17 | 18 | You can either instantiate an optimizer before passing it to `model.compile()` , as in the above example, or you can call it by its name. In the latter case, the default parameters for the optimizer will be used. 19 | 20 | ```python 21 | # pass optimizer by name: default parameters will be used 22 | model.compile(loss='mean_squared_error', optimizer='sgd') 23 | ``` 24 | 25 | --- 26 | 27 | ## Parameters common to all Keras optimizers 28 | 29 | The parameters `clipnorm` and `clipvalue` can be used with all optimizers to control gradient clipping: 30 | 31 | ```python 32 | from keras import optimizers 33 | 34 | # All parameter gradients will be clipped to 35 | # a maximum norm of 1. 36 | sgd = optimizers.SGD(lr=0.01, clipnorm=1.) 37 | ``` 38 | 39 | ```python 40 | from keras import optimizers 41 | 42 | # All parameter gradients will be clipped to 43 | # a maximum value of 0.5 and 44 | # a minimum value of -0.5. 45 | sgd = optimizers.SGD(lr=0.01, clipvalue=0.5) 46 | ``` 47 | 48 | --- 49 | 50 | {{autogenerated}} 51 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | 5 | setup(name='Keras', 6 | version='2.0.8', 7 | description='Deep Learning for Python', 8 | author='Francois Chollet', 9 | author_email='francois.chollet@gmail.com', 10 | url='https://github.com/fchollet/keras', 11 | download_url='https://github.com/fchollet/keras/tarball/2.0.8', 12 | license='MIT', 13 | install_requires=['numpy>=1.9.1', 14 | 'scipy>=0.14', 15 | 'six>=1.9.0', 16 | 'pyyaml'], 17 | extras_require={ 18 | 'h5py': ['h5py'], 19 | 'visualize': ['pydot>=1.2.0'], 20 | 'tests': ['pytest', 21 | 'pytest-pep8', 22 | 'pytest-xdist', 23 | 'pytest-cov'], 24 | }, 25 | classifiers=[ 26 | 'Development Status :: 5 - Production/Stable', 27 | 'Intended Audience :: Developers', 28 | 'Intended Audience :: Education', 29 | 'Intended Audience :: Science/Research', 30 | 'License :: OSI Approved :: MIT License', 31 | 'Programming Language :: Python :: 2', 32 | 'Programming Language :: Python :: 2.7', 33 | 'Programming Language :: Python :: 3', 34 | 'Programming Language :: Python :: 3.6', 35 | 'Topic :: Software Development :: Libraries', 36 | 'Topic :: Software Development :: Libraries :: Python Modules' 37 | ], 38 | packages=find_packages()) 39 | -------------------------------------------------------------------------------- /docs/templates/losses.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of loss functions 3 | 4 | A loss function (or objective function, or optimization score function) is one of the two parameters required to compile a model: 5 | 6 | ```python 7 | model.compile(loss='mean_squared_error', optimizer='sgd') 8 | ``` 9 | 10 | ```python 11 | from keras import losses 12 | 13 | model.compile(loss=losses.mean_squared_error, optimizer='sgd') 14 | ``` 15 | 16 | You can either pass the name of an existing loss function, or pass a TensorFlow/Theano symbolic function that returns a scalar for each data-point and takes the following two arguments: 17 | 18 | - __y_true__: True labels. TensorFlow/Theano tensor. 19 | - __y_pred__: Predictions. TensorFlow/Theano tensor of the same shape as y_true. 20 | 21 | The actual optimized objective is the mean of the output array across all datapoints. 22 | 23 | For a few examples of such functions, check out the [losses source](https://github.com/fchollet/keras/blob/master/keras/losses.py). 24 | 25 | ## Available loss functions 26 | 27 | {{autogenerated}} 28 | 29 | ---- 30 | 31 | **Note**: when using the `categorical_crossentropy` loss, your targets should be in categorical format (e.g. if you have 10 classes, the target for each sample should be a 10-dimensional vector that is all-zeros expect for a 1 at the index corresponding to the class of the sample). In order to convert *integer targets* into *categorical targets*, you can use the Keras utility `to_categorical`: 32 | 33 | ```python 34 | from keras.utils.np_utils import to_categorical 35 | 36 | categorical_labels = to_categorical(int_labels, num_classes=None) 37 | ``` 38 | -------------------------------------------------------------------------------- /examples/imdb_bidirectional_lstm.py: -------------------------------------------------------------------------------- 1 | '''Train a Bidirectional LSTM on the IMDB sentiment classification task. 2 | 3 | Output after 4 epochs on CPU: ~0.8146 4 | Time per epoch on CPU (Core i7): ~150s. 5 | ''' 6 | 7 | from __future__ import print_function 8 | import numpy as np 9 | 10 | from keras.preprocessing import sequence 11 | from keras.models import Sequential 12 | from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional 13 | from keras.datasets import imdb 14 | 15 | 16 | max_features = 20000 17 | # cut texts after this number of words 18 | # (among top max_features most common words) 19 | maxlen = 100 20 | batch_size = 32 21 | 22 | print('Loading data...') 23 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) 24 | print(len(x_train), 'train sequences') 25 | print(len(x_test), 'test sequences') 26 | 27 | print('Pad sequences (samples x time)') 28 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen) 29 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen) 30 | print('x_train shape:', x_train.shape) 31 | print('x_test shape:', x_test.shape) 32 | y_train = np.array(y_train) 33 | y_test = np.array(y_test) 34 | 35 | model = Sequential() 36 | model.add(Embedding(max_features, 128, input_length=maxlen)) 37 | model.add(Bidirectional(LSTM(64))) 38 | model.add(Dropout(0.5)) 39 | model.add(Dense(1, activation='sigmoid')) 40 | 41 | # try using different optimizers and different optimizer configs 42 | model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) 43 | 44 | print('Train...') 45 | model.fit(x_train, y_train, 46 | batch_size=batch_size, 47 | epochs=4, 48 | validation_data=[x_test, y_test]) 49 | -------------------------------------------------------------------------------- /docs/templates/regularizers.md: -------------------------------------------------------------------------------- 1 | ## Usage of regularizers 2 | 3 | Regularizers allow to apply penalties on layer parameters or layer activity during optimization. These penalties are incorporated in the loss function that the network optimizes. 4 | 5 | The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `Conv1D`, `Conv2D` and `Conv3D` have a unified API. 6 | 7 | These layers expose 3 keyword arguments: 8 | 9 | - `kernel_regularizer`: instance of `keras.regularizers.Regularizer` 10 | - `bias_regularizer`: instance of `keras.regularizers.Regularizer` 11 | - `activity_regularizer`: instance of `keras.regularizers.Regularizer` 12 | 13 | 14 | ## Example 15 | 16 | ```python 17 | from keras import regularizers 18 | model.add(Dense(64, input_dim=64, 19 | kernel_regularizer=regularizers.l2(0.01), 20 | activity_regularizer=regularizers.l1(0.01))) 21 | ``` 22 | 23 | ## Available penalties 24 | 25 | ```python 26 | keras.regularizers.l1(0.) 27 | keras.regularizers.l2(0.) 28 | keras.regularizers.l1_l2(0.) 29 | ``` 30 | 31 | ## Developing new regularizers 32 | 33 | Any function that takes in a weight matrix and returns a loss contribution tensor can be used as a regularizer, e.g.: 34 | 35 | ```python 36 | from keras import backend as K 37 | 38 | def l1_reg(weight_matrix): 39 | return 0.01 * K.sum(K.abs(weight_matrix)) 40 | 41 | model.add(Dense(64, input_dim=64, 42 | kernel_regularizer=l1_reg)) 43 | ``` 44 | 45 | Alternatively, you can write your regularizers in an object-oriented way; 46 | see the [keras/regularizers.py](https://github.com/fchollet/keras/blob/master/keras/regularizers.py) module for examples. -------------------------------------------------------------------------------- /docs/templates/metrics.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of metrics 3 | 4 | A metric is a function that is used to judge the performance of your model. Metric functions are to be supplied in the `metrics` parameter when a model is compiled. 5 | 6 | ```python 7 | model.compile(loss='mean_squared_error', 8 | optimizer='sgd', 9 | metrics=['mae', 'acc']) 10 | ``` 11 | 12 | ```python 13 | from keras import metrics 14 | 15 | model.compile(loss='mean_squared_error', 16 | optimizer='sgd', 17 | metrics=[metrics.mae, metrics.categorical_accuracy]) 18 | ``` 19 | 20 | A metric function is similar to an [loss function](/losses), except that the results from evaluating a metric are not used when training the model. 21 | 22 | You can either pass the name of an existing metric, or pass a Theano/TensorFlow symbolic function (see [Custom metrics](#custom-metrics)). 23 | 24 | #### Arguments 25 | - __y_true__: True labels. Theano/TensorFlow tensor. 26 | - __y_pred__: Predictions. Theano/TensorFlow tensor of the same shape as y_true. 27 | 28 | #### Returns 29 | Single tensor value representing the mean of the output array across all 30 | datapoints. 31 | 32 | ---- 33 | 34 | ## Available metrics 35 | 36 | 37 | {{autogenerated}} 38 | 39 | ---- 40 | 41 | ## Custom metrics 42 | 43 | Custom metrics can be passed at the compilation step. The 44 | function would need to take `(y_true, y_pred)` as arguments and return 45 | a single tensor value. 46 | 47 | ```python 48 | import keras.backend as K 49 | 50 | def mean_pred(y_true, y_pred): 51 | return K.mean(y_pred) 52 | 53 | model.compile(optimizer='rmsprop', 54 | loss='binary_crossentropy', 55 | metrics=['accuracy', mean_pred]) 56 | ``` -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | COPYRIGHT 2 | 3 | All contributions by François Chollet: 4 | Copyright (c) 2015, François Chollet. 5 | All rights reserved. 6 | 7 | All contributions by Google: 8 | Copyright (c) 2015, Google, Inc. 9 | All rights reserved. 10 | 11 | All contributions by Microsoft: 12 | Copyright (c) 2017, Microsoft, Inc. 13 | All rights reserved. 14 | 15 | All other contributions: 16 | Copyright (c) 2015 - 2017, the respective contributors. 17 | All rights reserved. 18 | 19 | Each contributor holds copyright over their respective contributions. 20 | The project versioning (Git) records all such contribution source information. 21 | 22 | LICENSE 23 | 24 | The MIT License (MIT) 25 | 26 | Permission is hereby granted, free of charge, to any person obtaining a copy 27 | of this software and associated documentation files (the "Software"), to deal 28 | in the Software without restriction, including without limitation the rights 29 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 30 | copies of the Software, and to permit persons to whom the Software is 31 | furnished to do so, subject to the following conditions: 32 | 33 | The above copyright notice and this permission notice shall be included in all 34 | copies or substantial portions of the Software. 35 | 36 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 39 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 40 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 41 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 42 | SOFTWARE. 43 | 44 | -------------------------------------------------------------------------------- /keras/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from ..utils.generic_utils import deserialize_keras_object 4 | from ..engine import Layer 5 | from ..engine import Input 6 | from ..engine import InputLayer 7 | from ..engine import InputSpec 8 | from .merge import * 9 | from .core import * 10 | from .convolutional import * 11 | from .pooling import * 12 | from .local import * 13 | from .recurrent import * 14 | from .cudnn_recurrent import * 15 | from .normalization import * 16 | from .embeddings import * 17 | from .noise import * 18 | from .advanced_activations import * 19 | from .wrappers import * 20 | from .convolutional_recurrent import * 21 | from ..legacy.layers import * 22 | 23 | 24 | def serialize(layer): 25 | """Serialize a layer. 26 | 27 | # Arguments 28 | layer: a Layer object. 29 | 30 | # Returns 31 | dictionary with config. 32 | """ 33 | return {'class_name': layer.__class__.__name__, 34 | 'config': layer.get_config()} 35 | 36 | 37 | def deserialize(config, custom_objects=None): 38 | """Instantiate a layer from a config dictionary. 39 | 40 | # Arguments 41 | config: dict of the form {'class_name': str, 'config': dict} 42 | custom_objects: dict mapping class names (or function names) 43 | of custom (non-Keras) objects to class/functions 44 | 45 | # Returns 46 | Layer instance (may be Model, Sequential, Layer...) 47 | """ 48 | from .. import models 49 | globs = globals() # All layers. 50 | globs['Model'] = models.Model 51 | globs['Sequential'] = models.Sequential 52 | return deserialize_keras_object(config, 53 | module_objects=globs, 54 | custom_objects=custom_objects, 55 | printable_module_name='layer') 56 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG cuda_version=8.0 2 | ARG cudnn_version=6 3 | FROM nvidia/cuda:${cuda_version}-cudnn${cudnn_version}-devel 4 | 5 | ENV CONDA_DIR /opt/conda 6 | ENV PATH $CONDA_DIR/bin:$PATH 7 | 8 | RUN mkdir -p $CONDA_DIR && \ 9 | echo export PATH=$CONDA_DIR/bin:'$PATH' > /etc/profile.d/conda.sh && \ 10 | apt-get update && \ 11 | apt-get install -y wget git libhdf5-dev g++ graphviz openmpi-bin && \ 12 | wget --quiet https://repo.continuum.io/miniconda/Miniconda3-4.2.12-Linux-x86_64.sh && \ 13 | echo "c59b3dd3cad550ac7596e0d599b91e75d88826db132e4146030ef471bb434e9a *Miniconda3-4.2.12-Linux-x86_64.sh" | sha256sum -c - && \ 14 | /bin/bash /Miniconda3-4.2.12-Linux-x86_64.sh -f -b -p $CONDA_DIR && \ 15 | rm Miniconda3-4.2.12-Linux-x86_64.sh 16 | 17 | ENV NB_USER keras 18 | ENV NB_UID 1000 19 | 20 | RUN useradd -m -s /bin/bash -N -u $NB_UID $NB_USER && \ 21 | mkdir -p $CONDA_DIR && \ 22 | chown keras $CONDA_DIR -R && \ 23 | mkdir -p /src && \ 24 | chown keras /src 25 | 26 | USER keras 27 | 28 | # Python 29 | ARG python_version=3.5 30 | 31 | RUN conda install -y python=${python_version} && \ 32 | pip install --upgrade pip && \ 33 | pip install tensorflow-gpu && \ 34 | pip install https://cntk.ai/PythonWheel/GPU/cntk-2.1-cp35-cp35m-linux_x86_64.whl && \ 35 | conda install Pillow scikit-learn notebook pandas matplotlib mkl nose pyyaml six h5py && \ 36 | conda install theano pygpu bcolz && \ 37 | pip install sklearn_pandas && \ 38 | git clone git://github.com/fchollet/keras.git /src && pip install -e /src[tests] && \ 39 | pip install git+git://github.com/fchollet/keras.git && \ 40 | conda clean -yt 41 | 42 | ADD theanorc /home/keras/.theanorc 43 | 44 | ENV PYTHONPATH='/src/:$PYTHONPATH' 45 | 46 | WORKDIR /src 47 | 48 | EXPOSE 8888 49 | 50 | CMD jupyter notebook --port=8888 --ip=0.0.0.0 51 | 52 | -------------------------------------------------------------------------------- /examples/mnist_mlp.py: -------------------------------------------------------------------------------- 1 | '''Trains a simple deep NN on the MNIST dataset. 2 | 3 | Gets to 98.40% test accuracy after 20 epochs 4 | (there is *a lot* of margin for parameter tuning). 5 | 2 seconds per epoch on a K520 GPU. 6 | ''' 7 | 8 | from __future__ import print_function 9 | 10 | import keras 11 | from keras.datasets import mnist 12 | from keras.models import Sequential 13 | from keras.layers import Dense, Dropout 14 | from keras.optimizers import RMSprop 15 | 16 | batch_size = 128 17 | num_classes = 10 18 | epochs = 20 19 | 20 | # the data, shuffled and split between train and test sets 21 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 22 | 23 | x_train = x_train.reshape(60000, 784) 24 | x_test = x_test.reshape(10000, 784) 25 | x_train = x_train.astype('float32') 26 | x_test = x_test.astype('float32') 27 | x_train /= 255 28 | x_test /= 255 29 | print(x_train.shape[0], 'train samples') 30 | print(x_test.shape[0], 'test samples') 31 | 32 | # convert class vectors to binary class matrices 33 | y_train = keras.utils.to_categorical(y_train, num_classes) 34 | y_test = keras.utils.to_categorical(y_test, num_classes) 35 | 36 | model = Sequential() 37 | model.add(Dense(512, activation='relu', input_shape=(784,))) 38 | model.add(Dropout(0.2)) 39 | model.add(Dense(512, activation='relu')) 40 | model.add(Dropout(0.2)) 41 | model.add(Dense(num_classes, activation='softmax')) 42 | 43 | model.summary() 44 | 45 | model.compile(loss='categorical_crossentropy', 46 | optimizer=RMSprop(), 47 | metrics=['accuracy']) 48 | 49 | history = model.fit(x_train, y_train, 50 | batch_size=batch_size, 51 | epochs=epochs, 52 | verbose=1, 53 | validation_data=(x_test, y_test)) 54 | score = model.evaluate(x_test, y_test, verbose=0) 55 | print('Test loss:', score[0]) 56 | print('Test accuracy:', score[1]) 57 | -------------------------------------------------------------------------------- /tests/integration_tests/test_image_data_tasks.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | import pytest 4 | 5 | from keras.utils.test_utils import get_test_data, keras_test 6 | from keras.models import Sequential 7 | from keras import layers 8 | from keras.utils.np_utils import to_categorical 9 | 10 | 11 | @keras_test 12 | def test_image_classification(): 13 | np.random.seed(1337) 14 | input_shape = (16, 16, 3) 15 | (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500, 16 | num_test=200, 17 | input_shape=input_shape, 18 | classification=True, 19 | num_classes=4) 20 | y_train = to_categorical(y_train) 21 | y_test = to_categorical(y_test) 22 | 23 | model = Sequential([ 24 | layers.Conv2D(filters=8, kernel_size=3, 25 | activation='relu', 26 | input_shape=input_shape), 27 | layers.MaxPooling2D(pool_size=2), 28 | layers.Conv2D(filters=4, kernel_size=(3, 3), 29 | activation='relu', padding='same'), 30 | layers.GlobalAveragePooling2D(), 31 | layers.Dense(y_test.shape[-1], activation='softmax') 32 | ]) 33 | model.compile(loss='categorical_crossentropy', 34 | optimizer='rmsprop', 35 | metrics=['accuracy']) 36 | model.summary() 37 | history = model.fit(x_train, y_train, epochs=10, batch_size=16, 38 | validation_data=(x_test, y_test), 39 | verbose=0) 40 | assert history.history['val_acc'][-1] > 0.75 41 | config = model.get_config() 42 | model = Sequential.from_config(config) 43 | 44 | 45 | if __name__ == '__main__': 46 | pytest.main([__file__]) 47 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Using Keras via Docker 2 | 3 | This directory contains `Dockerfile` to make it easy to get up and running with 4 | Keras via [Docker](http://www.docker.com/). 5 | 6 | ## Installing Docker 7 | 8 | General installation instructions are 9 | [on the Docker site](https://docs.docker.com/installation/), but we give some 10 | quick links here: 11 | 12 | * [OSX](https://docs.docker.com/installation/mac/): [docker toolbox](https://www.docker.com/toolbox) 13 | * [ubuntu](https://docs.docker.com/installation/ubuntulinux/) 14 | 15 | ## Running the container 16 | 17 | We are using `Makefile` to simplify docker commands within make commands. 18 | 19 | Build the container and start a Jupyter Notebook 20 | 21 | $ make notebook 22 | 23 | Build the container and start an iPython shell 24 | 25 | $ make ipython 26 | 27 | Build the container and start a bash 28 | 29 | $ make bash 30 | 31 | For GPU support install NVIDIA drivers (ideally latest) and 32 | [nvidia-docker](https://github.com/NVIDIA/nvidia-docker). Run using 33 | 34 | $ make notebook GPU=0 # or [ipython, bash] 35 | 36 | Switch between Theano and TensorFlow 37 | 38 | $ make notebook BACKEND=theano 39 | $ make notebook BACKEND=tensorflow 40 | 41 | Mount a volume for external data sets 42 | 43 | $ make DATA=~/mydata 44 | 45 | Prints all make tasks 46 | 47 | $ make help 48 | 49 | You can change Theano parameters by editing `/docker/theanorc`. 50 | 51 | 52 | Note: If you would have a problem running nvidia-docker you may try the old way 53 | we have used. But it is not recommended. If you find a bug in the nvidia-docker report 54 | it there please and try using the nvidia-docker as described above. 55 | 56 | $ export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}') 57 | $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') 58 | $ docker run -it -p 8888:8888 $CUDA_SO $DEVICES gcr.io/tensorflow/tensorflow:latest-gpu 59 | -------------------------------------------------------------------------------- /docs/templates/layers/writing-your-own-keras-layers.md: -------------------------------------------------------------------------------- 1 | # Writing your own Keras layers 2 | 3 | For simple, stateless custom operations, you are probably better off using `layers.core.Lambda` layers. But for any custom operation that has trainable weights, you should implement your own layer. 4 | 5 | Here is the skeleton of a Keras layer, **as of Keras 2.0** (if you have an older version, please upgrade). There are only three methods you need to implement: 6 | 7 | - `build(input_shape)`: this is where you will define your weights. This method must set `self.built = True`, which can be done by calling `super([Layer], self).build()`. 8 | - `call(x)`: this is where the layer's logic lives. Unless you want your layer to support masking, you only have to care about the first argument passed to `call`: the input tensor. 9 | - `compute_output_shape(input_shape)`: in case your layer modifies the shape of its input, you should specify here the shape transformation logic. This allows Keras to do automatic shape inference. 10 | 11 | ```python 12 | from keras import backend as K 13 | from keras.engine.topology import Layer 14 | import numpy as np 15 | 16 | class MyLayer(Layer): 17 | 18 | def __init__(self, output_dim, **kwargs): 19 | self.output_dim = output_dim 20 | super(MyLayer, self).__init__(**kwargs) 21 | 22 | def build(self, input_shape): 23 | # Create a trainable weight variable for this layer. 24 | self.kernel = self.add_weight(name='kernel', 25 | shape=(input_shape[1], self.output_dim), 26 | initializer='uniform', 27 | trainable=True) 28 | super(MyLayer, self).build(input_shape) # Be sure to call this somewhere! 29 | 30 | def call(self, x): 31 | return K.dot(x, self.kernel) 32 | 33 | def compute_output_shape(self, input_shape): 34 | return (input_shape[0], self.output_dim) 35 | ``` 36 | 37 | The existing Keras layers provide examples of how to implement almost anything. Never hesitate to read the source code! 38 | -------------------------------------------------------------------------------- /docs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Keras Documentation 2 | theme: readthedocs 3 | #theme_dir: theme 4 | docs_dir: sources 5 | repo_url: http://github.com/fchollet/keras 6 | site_url: http://keras.io/ 7 | site_description: 'Documentation for Keras, the Python Deep Learning library.' 8 | 9 | dev_addr: '0.0.0.0:8000' 10 | google_analytics: ['UA-61785484-1', 'keras.io'] 11 | 12 | pages: 13 | - Home: index.md 14 | - Getting started: 15 | - Guide to the Sequential model: getting-started/sequential-model-guide.md 16 | - Guide to the Functional API: getting-started/functional-api-guide.md 17 | - FAQ: getting-started/faq.md 18 | - Models: 19 | - About Keras models: models/about-keras-models.md 20 | - Sequential: models/sequential.md 21 | - Model (functional API): models/model.md 22 | - Layers: 23 | - About Keras layers: layers/about-keras-layers.md 24 | - Core Layers: layers/core.md 25 | - Convolutional Layers: layers/convolutional.md 26 | - Pooling Layers: layers/pooling.md 27 | - Locally-connected Layers: layers/local.md 28 | - Recurrent Layers: layers/recurrent.md 29 | - Embedding Layers: layers/embeddings.md 30 | - Merge Layers: layers/merge.md 31 | - Advanced Activations Layers: layers/advanced-activations.md 32 | - Normalization Layers: layers/normalization.md 33 | - Noise layers: layers/noise.md 34 | - Layer wrappers: layers/wrappers.md 35 | - Writing your own Keras layers: layers/writing-your-own-keras-layers.md 36 | - Preprocessing: 37 | - Sequence Preprocessing: preprocessing/sequence.md 38 | - Text Preprocessing: preprocessing/text.md 39 | - Image Preprocessing: preprocessing/image.md 40 | - Losses: losses.md 41 | - Metrics: metrics.md 42 | - Optimizers: optimizers.md 43 | - Activations: activations.md 44 | - Callbacks: callbacks.md 45 | - Datasets: datasets.md 46 | - Applications: applications.md 47 | - Backend: backend.md 48 | - Initializers: initializers.md 49 | - Regularizers: regularizers.md 50 | - Constraints: constraints.md 51 | - Visualization: visualization.md 52 | - Scikit-learn API: scikit-learn-api.md 53 | - Utils: utils.md 54 | - Contributing: contributing.md 55 | -------------------------------------------------------------------------------- /examples/imdb_lstm.py: -------------------------------------------------------------------------------- 1 | '''Trains an LSTM model on the IMDB sentiment classification task. 2 | The dataset is actually too small for LSTM to be of any advantage 3 | compared to simpler, much faster methods such as TF-IDF + LogReg. 4 | Notes: 5 | 6 | - RNNs are tricky. Choice of batch size is important, 7 | choice of loss and optimizer is critical, etc. 8 | Some configurations won't converge. 9 | 10 | - LSTM loss decrease patterns during training can be quite different 11 | from what you see with CNNs/MLPs/etc. 12 | ''' 13 | from __future__ import print_function 14 | 15 | from keras.preprocessing import sequence 16 | from keras.models import Sequential 17 | from keras.layers import Dense, Embedding 18 | from keras.layers import LSTM 19 | from keras.datasets import imdb 20 | 21 | max_features = 20000 22 | maxlen = 80 # cut texts after this number of words (among top max_features most common words) 23 | batch_size = 32 24 | 25 | print('Loading data...') 26 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) 27 | print(len(x_train), 'train sequences') 28 | print(len(x_test), 'test sequences') 29 | 30 | print('Pad sequences (samples x time)') 31 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen) 32 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen) 33 | print('x_train shape:', x_train.shape) 34 | print('x_test shape:', x_test.shape) 35 | 36 | print('Build model...') 37 | model = Sequential() 38 | model.add(Embedding(max_features, 128)) 39 | model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) 40 | model.add(Dense(1, activation='sigmoid')) 41 | 42 | # try using different optimizers and different optimizer configs 43 | model.compile(loss='binary_crossentropy', 44 | optimizer='adam', 45 | metrics=['accuracy']) 46 | 47 | print('Train...') 48 | model.fit(x_train, y_train, 49 | batch_size=batch_size, 50 | epochs=15, 51 | validation_data=(x_test, y_test)) 52 | score, acc = model.evaluate(x_test, y_test, 53 | batch_size=batch_size) 54 | print('Test score:', score) 55 | print('Test accuracy:', acc) 56 | -------------------------------------------------------------------------------- /docs/templates/models/about-keras-models.md: -------------------------------------------------------------------------------- 1 | # About Keras models 2 | 3 | There are two types of models available in Keras: [the Sequential model](/models/sequential) and [the Model class used with functional API](/models/model). 4 | 5 | These models have a number of methods in common: 6 | 7 | - `model.summary()`: prints a summary representation of your model. 8 | - `model.get_config()`: returns a dictionary containing the configuration of the model. The model can be reinstantiated from its config via: 9 | ```python 10 | config = model.get_config() 11 | model = Model.from_config(config) 12 | # or, for Sequential: 13 | model = Sequential.from_config(config) 14 | ``` 15 | 16 | - `model.get_weights()`: returns a list of all weight tensors in the model, as Numpy arrays. 17 | - `model.set_weights(weights)`: sets the values of the weights of the model, from a list of Numpy arrays. The arrays in the list should have the same shape as those returned by `get_weights()`. 18 | - `model.to_json()`: returns a representation of the model as a JSON string. Note that the representation does not include the weights, only the architecture. You can reinstantiate the same model (with reinitialized weights) from the JSON string via: 19 | ```python 20 | from models import model_from_json 21 | 22 | json_string = model.to_json() 23 | model = model_from_json(json_string) 24 | ``` 25 | - `model.to_yaml()`: returns a representation of the model as a YAML string. Note that the representation does not include the weights, only the architecture. You can reinstantiate the same model (with reinitialized weights) from the YAML string via: 26 | ```python 27 | from models import model_from_yaml 28 | 29 | yaml_string = model.to_yaml() 30 | model = model_from_yaml(yaml_string) 31 | ``` 32 | - `model.save_weights(filepath)`: saves the weights of the model as a HDF5 file. 33 | - `model.load_weights(filepath, by_name=False)`: loads the weights of the model from a HDF5 file (created by `save_weights`). By default, the architecture is expected to be unchanged. To load weights into a different architecture (with some layers in common), use `by_name=True` to load only those layers with the same name. -------------------------------------------------------------------------------- /tests/keras/regularizers_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from keras.models import Sequential 4 | from keras.layers import Dense 5 | from keras.utils import np_utils 6 | from keras.utils import test_utils 7 | from keras import regularizers 8 | 9 | data_dim = 5 10 | num_classes = 2 11 | epochs = 1 12 | batch_size = 10 13 | 14 | 15 | def get_data(): 16 | (x_train, y_train), (x_test, y_test) = test_utils.get_test_data( 17 | num_train=batch_size, 18 | num_test=batch_size, 19 | input_shape=(data_dim,), 20 | classification=True, 21 | num_classes=num_classes) 22 | y_train = np_utils.to_categorical(y_train, num_classes) 23 | y_test = np_utils.to_categorical(y_test, num_classes) 24 | 25 | return (x_train, y_train), (x_test, y_test) 26 | 27 | 28 | def create_model(kernel_regularizer=None, activity_regularizer=None): 29 | model = Sequential() 30 | model.add(Dense(num_classes, 31 | kernel_regularizer=kernel_regularizer, 32 | activity_regularizer=activity_regularizer, 33 | input_shape=(data_dim,))) 34 | return model 35 | 36 | 37 | def test_kernel_regularization(): 38 | (x_train, y_train), (x_test, y_test) = get_data() 39 | for reg in [regularizers.l1(), 40 | regularizers.l2(), 41 | regularizers.l1_l2()]: 42 | model = create_model(kernel_regularizer=reg) 43 | model.compile(loss='categorical_crossentropy', optimizer='sgd') 44 | assert len(model.losses) == 1 45 | model.fit(x_train, y_train, batch_size=batch_size, 46 | epochs=epochs, verbose=0) 47 | 48 | 49 | def test_activity_regularization(): 50 | (x_train, y_train), (x_test, y_test) = get_data() 51 | for reg in [regularizers.l1(), regularizers.l2()]: 52 | model = create_model(activity_regularizer=reg) 53 | model.compile(loss='categorical_crossentropy', optimizer='sgd') 54 | assert len(model.losses) == 1 55 | model.fit(x_train, y_train, batch_size=batch_size, 56 | epochs=epochs, verbose=0) 57 | 58 | 59 | if __name__ == '__main__': 60 | pytest.main([__file__]) 61 | -------------------------------------------------------------------------------- /examples/reuters_mlp.py: -------------------------------------------------------------------------------- 1 | '''Trains and evaluate a simple MLP 2 | on the Reuters newswire topic classification task. 3 | ''' 4 | from __future__ import print_function 5 | 6 | import numpy as np 7 | import keras 8 | from keras.datasets import reuters 9 | from keras.models import Sequential 10 | from keras.layers import Dense, Dropout, Activation 11 | from keras.preprocessing.text import Tokenizer 12 | 13 | max_words = 1000 14 | batch_size = 32 15 | epochs = 5 16 | 17 | print('Loading data...') 18 | (x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words, 19 | test_split=0.2) 20 | print(len(x_train), 'train sequences') 21 | print(len(x_test), 'test sequences') 22 | 23 | num_classes = np.max(y_train) + 1 24 | print(num_classes, 'classes') 25 | 26 | print('Vectorizing sequence data...') 27 | tokenizer = Tokenizer(num_words=max_words) 28 | x_train = tokenizer.sequences_to_matrix(x_train, mode='binary') 29 | x_test = tokenizer.sequences_to_matrix(x_test, mode='binary') 30 | print('x_train shape:', x_train.shape) 31 | print('x_test shape:', x_test.shape) 32 | 33 | print('Convert class vector to binary class matrix ' 34 | '(for use with categorical_crossentropy)') 35 | y_train = keras.utils.to_categorical(y_train, num_classes) 36 | y_test = keras.utils.to_categorical(y_test, num_classes) 37 | print('y_train shape:', y_train.shape) 38 | print('y_test shape:', y_test.shape) 39 | 40 | print('Building model...') 41 | model = Sequential() 42 | model.add(Dense(512, input_shape=(max_words,))) 43 | model.add(Activation('relu')) 44 | model.add(Dropout(0.5)) 45 | model.add(Dense(num_classes)) 46 | model.add(Activation('softmax')) 47 | 48 | model.compile(loss='categorical_crossentropy', 49 | optimizer='adam', 50 | metrics=['accuracy']) 51 | 52 | history = model.fit(x_train, y_train, 53 | batch_size=batch_size, 54 | epochs=epochs, 55 | verbose=1, 56 | validation_split=0.1) 57 | score = model.evaluate(x_test, y_test, 58 | batch_size=batch_size, verbose=1) 59 | print('Test score:', score[0]) 60 | print('Test accuracy:', score[1]) 61 | -------------------------------------------------------------------------------- /tests/keras/preprocessing/text_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from keras.preprocessing.text import Tokenizer, one_hot, hashing_trick, text_to_word_sequence 7 | 8 | 9 | def test_one_hot(): 10 | text = 'The cat sat on the mat.' 11 | encoded = one_hot(text, 5) 12 | assert len(encoded) == 6 13 | assert np.max(encoded) <= 4 14 | assert np.min(encoded) >= 0 15 | 16 | 17 | def test_hashing_trick_hash(): 18 | text = 'The cat sat on the mat.' 19 | encoded = hashing_trick(text, 5) 20 | assert len(encoded) == 6 21 | assert np.max(encoded) <= 4 22 | assert np.min(encoded) >= 1 23 | 24 | 25 | def test_hashing_trick_md5(): 26 | text = 'The cat sat on the mat.' 27 | encoded = hashing_trick(text, 5, hash_function='md5') 28 | assert len(encoded) == 6 29 | assert np.max(encoded) <= 4 30 | assert np.min(encoded) >= 1 31 | 32 | 33 | def test_tokenizer(): 34 | texts = ['The cat sat on the mat.', 35 | 'The dog sat on the log.', 36 | 'Dogs and cats living together.'] 37 | tokenizer = Tokenizer(num_words=10) 38 | tokenizer.fit_on_texts(texts) 39 | 40 | sequences = [] 41 | for seq in tokenizer.texts_to_sequences_generator(texts): 42 | sequences.append(seq) 43 | assert np.max(np.max(sequences)) < 10 44 | assert np.min(np.min(sequences)) == 1 45 | 46 | tokenizer.fit_on_sequences(sequences) 47 | 48 | for mode in ['binary', 'count', 'tfidf', 'freq']: 49 | matrix = tokenizer.texts_to_matrix(texts, mode) 50 | 51 | 52 | def test_text_to_word_sequence(): 53 | text = 'hello! ? world!' 54 | assert text_to_word_sequence(text) == ['hello', 'world'] 55 | 56 | 57 | def test_text_to_word_sequence_unicode(): 58 | text = u'ali! veli? kırk dokuz elli' 59 | assert text_to_word_sequence(text) == [u'ali', u'veli', u'kırk', u'dokuz', u'elli'] 60 | 61 | 62 | def test_tokenizer_unicode(): 63 | texts = [u'ali veli kırk dokuz elli', u'ali veli kırk dokuz elli veli kırk dokuz'] 64 | tokenizer = Tokenizer(num_words=5) 65 | tokenizer.fit_on_texts(texts) 66 | 67 | assert len(tokenizer.word_counts) == 5 68 | 69 | 70 | if __name__ == '__main__': 71 | pytest.main([__file__]) 72 | -------------------------------------------------------------------------------- /docs/templates/scikit-learn-api.md: -------------------------------------------------------------------------------- 1 | # Wrappers for the Scikit-Learn API 2 | 3 | You can use `Sequential` Keras models (single-input only) as part of your Scikit-Learn workflow via the wrappers found at `keras.wrappers.scikit_learn.py`. 4 | 5 | There are two wrappers available: 6 | 7 | `keras.wrappers.scikit_learn.KerasClassifier(build_fn=None, **sk_params)`, which implements the Scikit-Learn classifier interface, 8 | 9 | `keras.wrappers.scikit_learn.KerasRegressor(build_fn=None, **sk_params)`, which implements the Scikit-Learn regressor interface. 10 | 11 | ### Arguments 12 | 13 | - __build_fn__: callable function or class instance 14 | - __sk_params__: model parameters & fitting parameters 15 | 16 | `build_fn` should construct, compile and return a Keras model, which 17 | will then be used to fit/predict. One of the following 18 | three values could be passed to build_fn: 19 | 20 | 1. A function 21 | 2. An instance of a class that implements the __call__ method 22 | 3. None. This means you implement a class that inherits from either 23 | `KerasClassifier` or `KerasRegressor`. The __call__ method of the 24 | present class will then be treated as the default build_fn. 25 | 26 | `sk_params` takes both model parameters and fitting parameters. Legal model 27 | parameters are the arguments of `build_fn`. Note that like all other 28 | estimators in scikit-learn, 'build_fn' should provide default values for 29 | its arguments, so that you could create the estimator without passing any 30 | values to `sk_params`. 31 | 32 | `sk_params` could also accept parameters for calling `fit`, `predict`, 33 | `predict_proba`, and `score` methods (e.g., `epochs`, `batch_size`). 34 | fitting (predicting) parameters are selected in the following order: 35 | 36 | 1. Values passed to the dictionary arguments of 37 | `fit`, `predict`, `predict_proba`, and `score` methods 38 | 2. Values passed to `sk_params` 39 | 3. The default values of the `keras.models.Sequential` 40 | `fit`, `predict`, `predict_proba` and `score` methods 41 | 42 | When using scikit-learn's `grid_search` API, legal tunable parameters are 43 | those you could pass to `sk_params`, including fitting parameters. 44 | In other words, you could use `grid_search` to search for the best 45 | `batch_size` or `epochs` as well as the model parameters. 46 | -------------------------------------------------------------------------------- /examples/imdb_cnn_lstm.py: -------------------------------------------------------------------------------- 1 | '''Train a recurrent convolutional network on the IMDB sentiment 2 | classification task. 3 | 4 | Gets to 0.8498 test accuracy after 2 epochs. 41s/epoch on K520 GPU. 5 | ''' 6 | from __future__ import print_function 7 | 8 | from keras.preprocessing import sequence 9 | from keras.models import Sequential 10 | from keras.layers import Dense, Dropout, Activation 11 | from keras.layers import Embedding 12 | from keras.layers import LSTM 13 | from keras.layers import Conv1D, MaxPooling1D 14 | from keras.datasets import imdb 15 | 16 | # Embedding 17 | max_features = 20000 18 | maxlen = 100 19 | embedding_size = 128 20 | 21 | # Convolution 22 | kernel_size = 5 23 | filters = 64 24 | pool_size = 4 25 | 26 | # LSTM 27 | lstm_output_size = 70 28 | 29 | # Training 30 | batch_size = 30 31 | epochs = 2 32 | 33 | ''' 34 | Note: 35 | batch_size is highly sensitive. 36 | Only 2 epochs are needed as the dataset is very small. 37 | ''' 38 | 39 | print('Loading data...') 40 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) 41 | print(len(x_train), 'train sequences') 42 | print(len(x_test), 'test sequences') 43 | 44 | print('Pad sequences (samples x time)') 45 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen) 46 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen) 47 | print('x_train shape:', x_train.shape) 48 | print('x_test shape:', x_test.shape) 49 | 50 | print('Build model...') 51 | 52 | model = Sequential() 53 | model.add(Embedding(max_features, embedding_size, input_length=maxlen)) 54 | model.add(Dropout(0.25)) 55 | model.add(Conv1D(filters, 56 | kernel_size, 57 | padding='valid', 58 | activation='relu', 59 | strides=1)) 60 | model.add(MaxPooling1D(pool_size=pool_size)) 61 | model.add(LSTM(lstm_output_size)) 62 | model.add(Dense(1)) 63 | model.add(Activation('sigmoid')) 64 | 65 | model.compile(loss='binary_crossentropy', 66 | optimizer='adam', 67 | metrics=['accuracy']) 68 | 69 | print('Train...') 70 | model.fit(x_train, y_train, 71 | batch_size=batch_size, 72 | epochs=epochs, 73 | validation_data=(x_test, y_test)) 74 | score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) 75 | print('Test score:', score) 76 | print('Test accuracy:', acc) 77 | -------------------------------------------------------------------------------- /tests/keras/layers/local_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from keras.utils.test_utils import layer_test 4 | from keras.utils.test_utils import keras_test 5 | from keras.layers import local 6 | 7 | 8 | @keras_test 9 | def test_locallyconnected_1d(): 10 | num_samples = 2 11 | num_steps = 8 12 | input_dim = 5 13 | filter_length = 3 14 | filters = 4 15 | padding = 'valid' 16 | strides = 1 17 | 18 | layer_test(local.LocallyConnected1D, 19 | kwargs={'filters': filters, 20 | 'kernel_size': filter_length, 21 | 'padding': padding, 22 | 'kernel_regularizer': 'l2', 23 | 'bias_regularizer': 'l2', 24 | 'activity_regularizer': 'l2', 25 | 'strides': strides}, 26 | input_shape=(num_samples, num_steps, input_dim)) 27 | 28 | 29 | @keras_test 30 | def test_locallyconnected_2d(): 31 | num_samples = 5 32 | filters = 3 33 | stack_size = 4 34 | num_row = 6 35 | num_col = 8 36 | padding = 'valid' 37 | 38 | for strides in [(1, 1), (2, 2)]: 39 | layer_test(local.LocallyConnected2D, 40 | kwargs={'filters': filters, 41 | 'kernel_size': 3, 42 | 'padding': padding, 43 | 'kernel_regularizer': 'l2', 44 | 'bias_regularizer': 'l2', 45 | 'activity_regularizer': 'l2', 46 | 'strides': strides, 47 | 'data_format': 'channels_last'}, 48 | input_shape=(num_samples, num_row, num_col, stack_size)) 49 | 50 | layer_test(local.LocallyConnected2D, 51 | kwargs={'filters': filters, 52 | 'kernel_size': (3, 3), 53 | 'padding': padding, 54 | 'kernel_regularizer': 'l2', 55 | 'bias_regularizer': 'l2', 56 | 'activity_regularizer': 'l2', 57 | 'strides': strides, 58 | 'data_format': 'channels_first'}, 59 | input_shape=(num_samples, stack_size, num_row, num_col)) 60 | 61 | 62 | if __name__ == '__main__': 63 | pytest.main([__file__]) 64 | -------------------------------------------------------------------------------- /keras/regularizers.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import six 3 | from . import backend as K 4 | from .utils.generic_utils import serialize_keras_object 5 | from .utils.generic_utils import deserialize_keras_object 6 | 7 | 8 | class Regularizer(object): 9 | """Regularizer base class. 10 | """ 11 | 12 | def __call__(self, x): 13 | return 0. 14 | 15 | @classmethod 16 | def from_config(cls, config): 17 | return cls(**config) 18 | 19 | 20 | class L1L2(Regularizer): 21 | """Regularizer for L1 and L2 regularization. 22 | 23 | # Arguments 24 | l1: Float; L1 regularization factor. 25 | l2: Float; L2 regularization factor. 26 | """ 27 | 28 | def __init__(self, l1=0., l2=0.): 29 | self.l1 = K.cast_to_floatx(l1) 30 | self.l2 = K.cast_to_floatx(l2) 31 | 32 | def __call__(self, x): 33 | regularization = 0. 34 | if self.l1: 35 | regularization += K.sum(self.l1 * K.abs(x)) 36 | if self.l2: 37 | regularization += K.sum(self.l2 * K.square(x)) 38 | return regularization 39 | 40 | def get_config(self): 41 | return {'l1': float(self.l1), 42 | 'l2': float(self.l2)} 43 | 44 | 45 | # Aliases. 46 | 47 | 48 | def l1(l=0.01): 49 | return L1L2(l1=l) 50 | 51 | 52 | def l2(l=0.01): 53 | return L1L2(l2=l) 54 | 55 | 56 | def l1_l2(l1=0.01, l2=0.01): 57 | return L1L2(l1=l1, l2=l2) 58 | 59 | 60 | def serialize(regularizer): 61 | return serialize_keras_object(regularizer) 62 | 63 | 64 | def deserialize(config, custom_objects=None): 65 | return deserialize_keras_object(config, 66 | module_objects=globals(), 67 | custom_objects=custom_objects, 68 | printable_module_name='regularizer') 69 | 70 | 71 | def get(identifier): 72 | if identifier is None: 73 | return None 74 | if isinstance(identifier, dict): 75 | return deserialize(identifier) 76 | elif isinstance(identifier, six.string_types): 77 | config = {'class_name': str(identifier), 'config': {}} 78 | return deserialize(config) 79 | elif callable(identifier): 80 | return identifier 81 | else: 82 | raise ValueError('Could not interpret regularizer identifier:', 83 | identifier) 84 | -------------------------------------------------------------------------------- /examples/imdb_cnn.py: -------------------------------------------------------------------------------- 1 | '''This example demonstrates the use of Convolution1D for text classification. 2 | 3 | Gets to 0.89 test accuracy after 2 epochs. 4 | 90s/epoch on Intel i5 2.4Ghz CPU. 5 | 10s/epoch on Tesla K40 GPU. 6 | 7 | ''' 8 | 9 | from __future__ import print_function 10 | 11 | from keras.preprocessing import sequence 12 | from keras.models import Sequential 13 | from keras.layers import Dense, Dropout, Activation 14 | from keras.layers import Embedding 15 | from keras.layers import Conv1D, GlobalMaxPooling1D 16 | from keras.datasets import imdb 17 | 18 | # set parameters: 19 | max_features = 5000 20 | maxlen = 400 21 | batch_size = 32 22 | embedding_dims = 50 23 | filters = 250 24 | kernel_size = 3 25 | hidden_dims = 250 26 | epochs = 2 27 | 28 | print('Loading data...') 29 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) 30 | print(len(x_train), 'train sequences') 31 | print(len(x_test), 'test sequences') 32 | 33 | print('Pad sequences (samples x time)') 34 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen) 35 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen) 36 | print('x_train shape:', x_train.shape) 37 | print('x_test shape:', x_test.shape) 38 | 39 | print('Build model...') 40 | model = Sequential() 41 | 42 | # we start off with an efficient embedding layer which maps 43 | # our vocab indices into embedding_dims dimensions 44 | model.add(Embedding(max_features, 45 | embedding_dims, 46 | input_length=maxlen)) 47 | model.add(Dropout(0.2)) 48 | 49 | # we add a Convolution1D, which will learn filters 50 | # word group filters of size filter_length: 51 | model.add(Conv1D(filters, 52 | kernel_size, 53 | padding='valid', 54 | activation='relu', 55 | strides=1)) 56 | # we use max pooling: 57 | model.add(GlobalMaxPooling1D()) 58 | 59 | # We add a vanilla hidden layer: 60 | model.add(Dense(hidden_dims)) 61 | model.add(Dropout(0.2)) 62 | model.add(Activation('relu')) 63 | 64 | # We project onto a single unit output layer, and squash it with a sigmoid: 65 | model.add(Dense(1)) 66 | model.add(Activation('sigmoid')) 67 | 68 | model.compile(loss='binary_crossentropy', 69 | optimizer='adam', 70 | metrics=['accuracy']) 71 | model.fit(x_train, y_train, 72 | batch_size=batch_size, 73 | epochs=epochs, 74 | validation_data=(x_test, y_test)) 75 | -------------------------------------------------------------------------------- /docs/templates/callbacks.md: -------------------------------------------------------------------------------- 1 | ## Usage of callbacks 2 | 3 | A callback is a set of functions to be applied at given stages of the training procedure. You can use callbacks to get a view on internal states and statistics of the model during training. You can pass a list of callbacks (as the keyword argument `callbacks`) to the `.fit()` method of the `Sequential` or `Model` classes. The relevant methods of the callbacks will then be called at each stage of the training. 4 | 5 | --- 6 | 7 | {{autogenerated}} 8 | 9 | --- 10 | 11 | 12 | # Create a callback 13 | 14 | You can create a custom callback by extending the base class `keras.callbacks.Callback`. A callback has access to its associated model through the class property `self.model`. 15 | 16 | Here's a simple example saving a list of losses over each batch during training: 17 | ```python 18 | class LossHistory(keras.callbacks.Callback): 19 | def on_train_begin(self, logs={}): 20 | self.losses = [] 21 | 22 | def on_batch_end(self, batch, logs={}): 23 | self.losses.append(logs.get('loss')) 24 | ``` 25 | 26 | --- 27 | 28 | ### Example: recording loss history 29 | 30 | ```python 31 | class LossHistory(keras.callbacks.Callback): 32 | def on_train_begin(self, logs={}): 33 | self.losses = [] 34 | 35 | def on_batch_end(self, batch, logs={}): 36 | self.losses.append(logs.get('loss')) 37 | 38 | model = Sequential() 39 | model.add(Dense(10, input_dim=784, kernel_initializer='uniform')) 40 | model.add(Activation('softmax')) 41 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 42 | 43 | history = LossHistory() 44 | model.fit(x_train, y_train, batch_size=128, epochs=20, verbose=0, callbacks=[history]) 45 | 46 | print(history.losses) 47 | # outputs 48 | ''' 49 | [0.66047596406559383, 0.3547245744908703, ..., 0.25953155204159617, 0.25901699725311789] 50 | ''' 51 | ``` 52 | 53 | --- 54 | 55 | ### Example: model checkpoints 56 | 57 | ```python 58 | from keras.callbacks import ModelCheckpoint 59 | 60 | model = Sequential() 61 | model.add(Dense(10, input_dim=784, kernel_initializer='uniform')) 62 | model.add(Activation('softmax')) 63 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 64 | 65 | ''' 66 | saves the model weights after each epoch if the validation loss decreased 67 | ''' 68 | checkpointer = ModelCheckpoint(filepath='/tmp/weights.hdf5', verbose=1, save_best_only=True) 69 | model.fit(x_train, y_train, batch_size=128, epochs=20, verbose=0, validation_data=(X_test, Y_test), callbacks=[checkpointer]) 70 | ``` 71 | -------------------------------------------------------------------------------- /examples/mnist_cnn.py: -------------------------------------------------------------------------------- 1 | '''Trains a simple convnet on the MNIST dataset. 2 | 3 | Gets to 99.25% test accuracy after 12 epochs 4 | (there is still a lot of margin for parameter tuning). 5 | 16 seconds per epoch on a GRID K520 GPU. 6 | ''' 7 | 8 | from __future__ import print_function 9 | import keras 10 | from keras.datasets import mnist 11 | from keras.models import Sequential 12 | from keras.layers import Dense, Dropout, Flatten 13 | from keras.layers import Conv2D, MaxPooling2D 14 | from keras import backend as K 15 | 16 | batch_size = 128 17 | num_classes = 10 18 | epochs = 12 19 | 20 | # input image dimensions 21 | img_rows, img_cols = 28, 28 22 | 23 | # the data, shuffled and split between train and test sets 24 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 25 | 26 | if K.image_data_format() == 'channels_first': 27 | x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) 28 | x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) 29 | input_shape = (1, img_rows, img_cols) 30 | else: 31 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) 32 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) 33 | input_shape = (img_rows, img_cols, 1) 34 | 35 | x_train = x_train.astype('float32') 36 | x_test = x_test.astype('float32') 37 | x_train /= 255 38 | x_test /= 255 39 | print('x_train shape:', x_train.shape) 40 | print(x_train.shape[0], 'train samples') 41 | print(x_test.shape[0], 'test samples') 42 | 43 | # convert class vectors to binary class matrices 44 | y_train = keras.utils.to_categorical(y_train, num_classes) 45 | y_test = keras.utils.to_categorical(y_test, num_classes) 46 | 47 | model = Sequential() 48 | model.add(Conv2D(32, kernel_size=(3, 3), 49 | activation='relu', 50 | input_shape=input_shape)) 51 | model.add(Conv2D(64, (3, 3), activation='relu')) 52 | model.add(MaxPooling2D(pool_size=(2, 2))) 53 | model.add(Dropout(0.25)) 54 | model.add(Flatten()) 55 | model.add(Dense(128, activation='relu')) 56 | model.add(Dropout(0.5)) 57 | model.add(Dense(num_classes, activation='softmax')) 58 | 59 | model.compile(loss=keras.losses.categorical_crossentropy, 60 | optimizer=keras.optimizers.Adadelta(), 61 | metrics=['accuracy']) 62 | 63 | model.fit(x_train, y_train, 64 | batch_size=batch_size, 65 | epochs=epochs, 66 | verbose=1, 67 | validation_data=(x_test, y_test)) 68 | score = model.evaluate(x_test, y_test, verbose=0) 69 | print('Test loss:', score[0]) 70 | print('Test accuracy:', score[1]) 71 | -------------------------------------------------------------------------------- /keras/metrics.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import six 3 | from . import backend as K 4 | from .losses import mean_squared_error 5 | from .losses import mean_absolute_error 6 | from .losses import mean_absolute_percentage_error 7 | from .losses import mean_squared_logarithmic_error 8 | from .losses import hinge 9 | from .losses import logcosh 10 | from .losses import squared_hinge 11 | from .losses import categorical_crossentropy 12 | from .losses import sparse_categorical_crossentropy 13 | from .losses import binary_crossentropy 14 | from .losses import kullback_leibler_divergence 15 | from .losses import poisson 16 | from .losses import cosine_proximity 17 | from .utils.generic_utils import deserialize_keras_object 18 | 19 | 20 | def binary_accuracy(y_true, y_pred): 21 | return K.mean(K.equal(y_true, K.round(y_pred)), axis=-1) 22 | 23 | 24 | def categorical_accuracy(y_true, y_pred): 25 | return K.cast(K.equal(K.argmax(y_true, axis=-1), 26 | K.argmax(y_pred, axis=-1)), 27 | K.floatx()) 28 | 29 | 30 | def sparse_categorical_accuracy(y_true, y_pred): 31 | return K.cast(K.equal(K.max(y_true, axis=-1), 32 | K.cast(K.argmax(y_pred, axis=-1), K.floatx())), 33 | K.floatx()) 34 | 35 | 36 | def top_k_categorical_accuracy(y_true, y_pred, k=5): 37 | return K.mean(K.in_top_k(y_pred, K.argmax(y_true, axis=-1), k), axis=-1) 38 | 39 | 40 | def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5): 41 | return K.mean(K.in_top_k(y_pred, K.cast(K.max(y_true, axis=-1), 'int32'), k), axis=-1) 42 | 43 | 44 | # Aliases 45 | 46 | mse = MSE = mean_squared_error 47 | mae = MAE = mean_absolute_error 48 | mape = MAPE = mean_absolute_percentage_error 49 | msle = MSLE = mean_squared_logarithmic_error 50 | cosine = cosine_proximity 51 | 52 | 53 | def serialize(metric): 54 | return metric.__name__ 55 | 56 | 57 | def deserialize(name, custom_objects=None): 58 | return deserialize_keras_object(name, 59 | module_objects=globals(), 60 | custom_objects=custom_objects, 61 | printable_module_name='metric function') 62 | 63 | 64 | def get(identifier): 65 | if isinstance(identifier, six.string_types): 66 | identifier = str(identifier) 67 | return deserialize(identifier) 68 | elif callable(identifier): 69 | return identifier 70 | else: 71 | raise ValueError('Could not interpret ' 72 | 'metric function identifier:', identifier) 73 | -------------------------------------------------------------------------------- /examples/mnist_irnn.py: -------------------------------------------------------------------------------- 1 | '''This is a reproduction of the IRNN experiment 2 | with pixel-by-pixel sequential MNIST in 3 | "A Simple Way to Initialize Recurrent Networks of Rectified Linear Units" 4 | by Quoc V. Le, Navdeep Jaitly, Geoffrey E. Hinton 5 | 6 | arxiv:1504.00941v2 [cs.NE] 7 Apr 2015 7 | http://arxiv.org/pdf/1504.00941v2.pdf 8 | 9 | Optimizer is replaced with RMSprop which yields more stable and steady 10 | improvement. 11 | 12 | Reaches 0.93 train/test accuracy after 900 epochs 13 | (which roughly corresponds to 1687500 steps in the original paper.) 14 | ''' 15 | 16 | from __future__ import print_function 17 | 18 | import keras 19 | from keras.datasets import mnist 20 | from keras.models import Sequential 21 | from keras.layers import Dense, Activation 22 | from keras.layers import SimpleRNN 23 | from keras import initializers 24 | from keras.optimizers import RMSprop 25 | 26 | batch_size = 32 27 | num_classes = 10 28 | epochs = 200 29 | hidden_units = 100 30 | 31 | learning_rate = 1e-6 32 | clip_norm = 1.0 33 | 34 | # the data, shuffled and split between train and test sets 35 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 36 | 37 | x_train = x_train.reshape(x_train.shape[0], -1, 1) 38 | x_test = x_test.reshape(x_test.shape[0], -1, 1) 39 | x_train = x_train.astype('float32') 40 | x_test = x_test.astype('float32') 41 | x_train /= 255 42 | x_test /= 255 43 | print('x_train shape:', x_train.shape) 44 | print(x_train.shape[0], 'train samples') 45 | print(x_test.shape[0], 'test samples') 46 | 47 | # convert class vectors to binary class matrices 48 | y_train = keras.utils.to_categorical(y_train, num_classes) 49 | y_test = keras.utils.to_categorical(y_test, num_classes) 50 | 51 | print('Evaluate IRNN...') 52 | model = Sequential() 53 | model.add(SimpleRNN(hidden_units, 54 | kernel_initializer=initializers.RandomNormal(stddev=0.001), 55 | recurrent_initializer=initializers.Identity(gain=1.0), 56 | activation='relu', 57 | input_shape=x_train.shape[1:])) 58 | model.add(Dense(num_classes)) 59 | model.add(Activation('softmax')) 60 | rmsprop = RMSprop(lr=learning_rate) 61 | model.compile(loss='categorical_crossentropy', 62 | optimizer=rmsprop, 63 | metrics=['accuracy']) 64 | 65 | model.fit(x_train, y_train, 66 | batch_size=batch_size, 67 | epochs=epochs, 68 | verbose=1, 69 | validation_data=(x_test, y_test)) 70 | 71 | scores = model.evaluate(x_test, y_test, verbose=0) 72 | print('IRNN test score:', scores[0]) 73 | print('IRNN test accuracy:', scores[1]) 74 | -------------------------------------------------------------------------------- /tests/keras/utils/layer_utils_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | from keras import backend as K 5 | from keras.layers import Conv2D 6 | from keras.layers import Dense 7 | from keras.layers import Flatten 8 | from keras.models import Sequential 9 | from keras.utils import layer_utils 10 | from keras.utils.test_utils import keras_test 11 | 12 | 13 | @keras_test 14 | def test_convert_weights(): 15 | def get_model(shape, data_format): 16 | model = Sequential() 17 | model.add(Conv2D(filters=2, 18 | kernel_size=(4, 3), 19 | input_shape=shape, 20 | data_format=data_format)) 21 | model.add(Flatten()) 22 | model.add(Dense(5)) 23 | return model 24 | 25 | for data_format in ['channels_first', 'channels_last']: 26 | if data_format == 'channels_first': 27 | shape = (3, 5, 5) 28 | target_shape = (5, 5, 3) 29 | prev_shape = (2, 3, 2) 30 | flip = lambda x: np.flip(np.flip(x, axis=2), axis=3) 31 | transpose = lambda x: np.transpose(x, (0, 2, 3, 1)) 32 | target_data_format = 'channels_last' 33 | elif data_format == 'channels_last': 34 | shape = (5, 5, 3) 35 | target_shape = (3, 5, 5) 36 | prev_shape = (2, 2, 3) 37 | flip = lambda x: np.flip(np.flip(x, axis=1), axis=2) 38 | transpose = lambda x: np.transpose(x, (0, 3, 1, 2)) 39 | target_data_format = 'channels_first' 40 | 41 | model1 = get_model(shape, data_format) 42 | model2 = get_model(target_shape, target_data_format) 43 | conv = K.function([model1.input], [model1.layers[0].output]) 44 | 45 | x = np.random.random((1,) + shape) 46 | 47 | # Test equivalence of convert_all_kernels_in_model 48 | convout1 = conv([x])[0] 49 | layer_utils.convert_all_kernels_in_model(model1) 50 | convout2 = flip(conv([flip(x)])[0]) 51 | 52 | assert_allclose(convout1, convout2, atol=1e-5) 53 | 54 | # Test equivalence of convert_dense_weights_data_format 55 | out1 = model1.predict(x) 56 | layer_utils.convert_dense_weights_data_format(model1.layers[2], prev_shape, target_data_format) 57 | for (src, dst) in zip(model1.layers, model2.layers): 58 | dst.set_weights(src.get_weights()) 59 | out2 = model2.predict(transpose(x)) 60 | 61 | assert_allclose(out1, out2, atol=1e-5) 62 | 63 | 64 | if __name__ == '__main__': 65 | pytest.main([__file__]) 66 | -------------------------------------------------------------------------------- /tests/keras/losses_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from keras import losses 5 | from keras import backend as K 6 | 7 | 8 | allobj = [losses.mean_squared_error, 9 | losses.mean_absolute_error, 10 | losses.mean_absolute_percentage_error, 11 | losses.mean_squared_logarithmic_error, 12 | losses.squared_hinge, 13 | losses.hinge, 14 | losses.categorical_crossentropy, 15 | losses.binary_crossentropy, 16 | losses.kullback_leibler_divergence, 17 | losses.poisson, 18 | losses.cosine_proximity, 19 | losses.logcosh, 20 | losses.categorical_hinge] 21 | 22 | 23 | def test_objective_shapes_3d(): 24 | y_a = K.variable(np.random.random((5, 6, 7))) 25 | y_b = K.variable(np.random.random((5, 6, 7))) 26 | for obj in allobj: 27 | objective_output = obj(y_a, y_b) 28 | assert K.eval(objective_output).shape == (5, 6) 29 | 30 | 31 | def test_objective_shapes_2d(): 32 | y_a = K.variable(np.random.random((6, 7))) 33 | y_b = K.variable(np.random.random((6, 7))) 34 | for obj in allobj: 35 | objective_output = obj(y_a, y_b) 36 | assert K.eval(objective_output).shape == (6,) 37 | 38 | 39 | def test_cce_one_hot(): 40 | y_a = K.variable(np.random.randint(0, 7, (5, 6))) 41 | y_b = K.variable(np.random.random((5, 6, 7))) 42 | objective_output = losses.sparse_categorical_crossentropy(y_a, y_b) 43 | assert K.eval(objective_output).shape == (5, 6) 44 | 45 | y_a = K.variable(np.random.randint(0, 7, (6,))) 46 | y_b = K.variable(np.random.random((6, 7))) 47 | assert K.eval(losses.sparse_categorical_crossentropy(y_a, y_b)).shape == (6,) 48 | 49 | 50 | def test_categorical_hinge(): 51 | y_pred = K.variable(np.array([[0.3, 0.2, 0.1], 52 | [0.1, 0.2, 0.7]])) 53 | y_true = K.variable(np.array([[0, 1, 0], 54 | [1, 0, 0]])) 55 | expected_loss = ((0.3 - 0.2 + 1) + (0.7 - 0.1 + 1)) / 2.0 56 | loss = K.eval(losses.categorical_hinge(y_true, y_pred)) 57 | assert np.isclose(expected_loss, np.mean(loss)) 58 | 59 | 60 | def test_sparse_categorical_crossentropy(): 61 | y_pred = K.variable(np.array([[0.3, 0.6, 0.1], 62 | [0.1, 0.2, 0.7]])) 63 | y_true = K.variable(np.array([1, 2])) 64 | expected_loss = - (np.log(0.6) + np.log(0.7)) / 2 65 | loss = K.eval(losses.sparse_categorical_crossentropy(y_true, y_pred)) 66 | assert np.isclose(expected_loss, np.mean(loss)) 67 | 68 | 69 | if __name__ == '__main__': 70 | pytest.main([__file__]) 71 | -------------------------------------------------------------------------------- /tests/keras/utils/generic_utils_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pytest 3 | import numpy as np 4 | from keras.utils.generic_utils import custom_object_scope 5 | from keras.utils.generic_utils import has_arg 6 | from keras.utils.generic_utils import Progbar 7 | from keras.utils.test_utils import keras_test 8 | from keras import activations 9 | from keras import regularizers 10 | 11 | 12 | @keras_test 13 | def test_progbar(): 14 | n = 2 15 | input_arr = np.random.random((n, n, n)) 16 | bar = Progbar(n) 17 | 18 | for i, arr in enumerate(input_arr): 19 | bar.update(i, list(arr)) 20 | 21 | 22 | def test_custom_objects_scope(): 23 | 24 | def custom_fn(): 25 | pass 26 | 27 | class CustomClass(object): 28 | pass 29 | 30 | with custom_object_scope({'CustomClass': CustomClass, 31 | 'custom_fn': custom_fn}): 32 | act = activations.get('custom_fn') 33 | assert act == custom_fn 34 | cl = regularizers.get('CustomClass') 35 | assert cl.__class__ == CustomClass 36 | 37 | 38 | @pytest.mark.parametrize('fn, name, accept_all, expected', [ 39 | ('f(x)', 'x', False, True), 40 | ('f(x)', 'y', False, False), 41 | ('f(x)', 'y', True, False), 42 | ('f(x, y)', 'y', False, True), 43 | ('f(x, y=1)', 'y', False, True), 44 | ('f(x, **kwargs)', 'x', False, True), 45 | ('f(x, **kwargs)', 'y', False, False), 46 | ('f(x, **kwargs)', 'y', True, True), 47 | ('f(x, y=1, **kwargs)', 'y', False, True), 48 | # Keyword-only arguments (Python 3 only) 49 | ('f(x, *args, y=1)', 'y', False, True), 50 | ('f(x, *args, y=1)', 'z', True, False), 51 | ('f(x, *, y=1)', 'x', False, True), 52 | ('f(x, *, y=1)', 'y', False, True), 53 | # lambda 54 | (lambda x: x, 'x', False, True), 55 | (lambda x: x, 'y', False, False), 56 | (lambda x: x, 'y', True, False), 57 | ]) 58 | def test_has_arg(fn, name, accept_all, expected): 59 | if isinstance(fn, str): 60 | context = dict() 61 | try: 62 | exec('def {}: pass'.format(fn), context) 63 | except SyntaxError: 64 | if sys.version_info >= (3,): 65 | raise 66 | pytest.skip('Function is not compatible with Python 2') 67 | context.pop('__builtins__', None) # Sometimes exec adds builtins to the context 68 | fn, = context.values() 69 | 70 | assert has_arg(fn, name, accept_all) is expected 71 | 72 | 73 | @pytest.mark.xfail(sys.version_info < (3, 3), 74 | reason='inspect API does not reveal positional-only arguments') 75 | def test_has_arg_positional_only(): 76 | assert has_arg(pow, 'x') is False 77 | 78 | 79 | if __name__ == '__main__': 80 | pytest.main([__file__]) 81 | -------------------------------------------------------------------------------- /tests/keras/constraints_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | 5 | from keras import backend as K 6 | from keras import constraints 7 | from keras.utils.test_utils import keras_test 8 | 9 | 10 | def get_test_values(): 11 | return [0.1, 0.5, 3, 8, 1e-7] 12 | 13 | 14 | def get_example_array(): 15 | np.random.seed(3537) 16 | example_array = np.random.random((100, 100)) * 100. - 50. 17 | example_array[0, 0] = 0. # 0 could possibly cause trouble 18 | return example_array 19 | 20 | 21 | def test_serialization(): 22 | all_activations = ['max_norm', 'non_neg', 23 | 'unit_norm', 'min_max_norm'] 24 | for name in all_activations: 25 | fn = constraints.get(name) 26 | ref_fn = getattr(constraints, name)() 27 | assert fn.__class__ == ref_fn.__class__ 28 | config = constraints.serialize(fn) 29 | fn = constraints.deserialize(config) 30 | assert fn.__class__ == ref_fn.__class__ 31 | 32 | 33 | @keras_test 34 | def test_max_norm(): 35 | array = get_example_array() 36 | for m in get_test_values(): 37 | norm_instance = constraints.max_norm(m) 38 | normed = norm_instance(K.variable(array)) 39 | assert(np.all(K.eval(normed) < m)) 40 | 41 | # a more explicit example 42 | norm_instance = constraints.max_norm(2.0) 43 | x = np.array([[0, 0, 0], [1.0, 0, 0], [3, 0, 0], [3, 3, 3]]).T 44 | x_normed_target = np.array([[0, 0, 0], [1.0, 0, 0], 45 | [2.0, 0, 0], 46 | [2. / np.sqrt(3), 47 | 2. / np.sqrt(3), 48 | 2. / np.sqrt(3)]]).T 49 | x_normed_actual = K.eval(norm_instance(K.variable(x))) 50 | assert_allclose(x_normed_actual, x_normed_target, rtol=1e-05) 51 | 52 | 53 | @keras_test 54 | def test_non_neg(): 55 | non_neg_instance = constraints.non_neg() 56 | normed = non_neg_instance(K.variable(get_example_array())) 57 | assert(np.all(np.min(K.eval(normed), axis=1) == 0.)) 58 | 59 | 60 | @keras_test 61 | def test_unit_norm(): 62 | unit_norm_instance = constraints.unit_norm() 63 | normalized = unit_norm_instance(K.variable(get_example_array())) 64 | norm_of_normalized = np.sqrt(np.sum(K.eval(normalized) ** 2, axis=0)) 65 | # In the unit norm constraint, it should be equal to 1. 66 | difference = norm_of_normalized - 1. 67 | largest_difference = np.max(np.abs(difference)) 68 | assert(np.abs(largest_difference) < 10e-5) 69 | 70 | 71 | @keras_test 72 | def test_min_max_norm(): 73 | array = get_example_array() 74 | for m in get_test_values(): 75 | norm_instance = constraints.min_max_norm(min_value=m, max_value=m * 2) 76 | normed = norm_instance(K.variable(array)) 77 | value = K.eval(normed) 78 | l2 = np.sqrt(np.sum(np.square(value), axis=0)) 79 | assert not l2[l2 < m] 80 | assert not l2[l2 > m * 2 + 1e-5] 81 | 82 | 83 | if __name__ == '__main__': 84 | pytest.main([__file__]) 85 | -------------------------------------------------------------------------------- /tests/keras/preprocessing/sequence_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import assert_allclose 3 | 4 | import pytest 5 | 6 | from keras.preprocessing.sequence import pad_sequences 7 | from keras.preprocessing.sequence import make_sampling_table 8 | from keras.preprocessing.sequence import skipgrams 9 | 10 | 11 | def test_pad_sequences(): 12 | a = [[1], [1, 2], [1, 2, 3]] 13 | 14 | # test padding 15 | b = pad_sequences(a, maxlen=3, padding='pre') 16 | assert_allclose(b, [[0, 0, 1], [0, 1, 2], [1, 2, 3]]) 17 | b = pad_sequences(a, maxlen=3, padding='post') 18 | assert_allclose(b, [[1, 0, 0], [1, 2, 0], [1, 2, 3]]) 19 | 20 | # test truncating 21 | b = pad_sequences(a, maxlen=2, truncating='pre') 22 | assert_allclose(b, [[0, 1], [1, 2], [2, 3]]) 23 | b = pad_sequences(a, maxlen=2, truncating='post') 24 | assert_allclose(b, [[0, 1], [1, 2], [1, 2]]) 25 | 26 | # test value 27 | b = pad_sequences(a, maxlen=3, value=1) 28 | assert_allclose(b, [[1, 1, 1], [1, 1, 2], [1, 2, 3]]) 29 | 30 | 31 | def test_pad_sequences_vector(): 32 | a = [[[1, 1]], 33 | [[2, 1], [2, 2]], 34 | [[3, 1], [3, 2], [3, 3]]] 35 | 36 | # test padding 37 | b = pad_sequences(a, maxlen=3, padding='pre') 38 | assert_allclose(b, [[[0, 0], [0, 0], [1, 1]], 39 | [[0, 0], [2, 1], [2, 2]], 40 | [[3, 1], [3, 2], [3, 3]]]) 41 | b = pad_sequences(a, maxlen=3, padding='post') 42 | assert_allclose(b, [[[1, 1], [0, 0], [0, 0]], 43 | [[2, 1], [2, 2], [0, 0]], 44 | [[3, 1], [3, 2], [3, 3]]]) 45 | 46 | # test truncating 47 | b = pad_sequences(a, maxlen=2, truncating='pre') 48 | assert_allclose(b, [[[0, 0], [1, 1]], 49 | [[2, 1], [2, 2]], 50 | [[3, 2], [3, 3]]]) 51 | 52 | b = pad_sequences(a, maxlen=2, truncating='post') 53 | assert_allclose(b, [[[0, 0], [1, 1]], 54 | [[2, 1], [2, 2]], 55 | [[3, 1], [3, 2]]]) 56 | 57 | # test value 58 | b = pad_sequences(a, maxlen=3, value=1) 59 | assert_allclose(b, [[[1, 1], [1, 1], [1, 1]], 60 | [[1, 1], [2, 1], [2, 2]], 61 | [[3, 1], [3, 2], [3, 3]]]) 62 | 63 | 64 | def test_make_sampling_table(): 65 | a = make_sampling_table(3) 66 | assert_allclose(a, np.asarray([0.00315225, 0.00315225, 0.00547597]), 67 | rtol=.1) 68 | 69 | 70 | def test_skipgrams(): 71 | # test with no window size and binary labels 72 | couples, labels = skipgrams(np.arange(3), vocabulary_size=3) 73 | for couple in couples: 74 | assert couple[0] in [0, 1, 2] and couple[1] in [0, 1, 2] 75 | 76 | # test window size and categorical labels 77 | couples, labels = skipgrams(np.arange(5), vocabulary_size=5, window_size=1, 78 | categorical=True) 79 | for couple in couples: 80 | assert couple[0] - couple[1] <= 3 81 | for l in labels: 82 | assert len(l) == 2 83 | 84 | 85 | if __name__ == '__main__': 86 | pytest.main([__file__]) 87 | -------------------------------------------------------------------------------- /keras/activations.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import six 3 | import warnings 4 | from . import backend as K 5 | from .utils.generic_utils import deserialize_keras_object 6 | from .engine import Layer 7 | 8 | 9 | def softmax(x, axis=-1): 10 | """Softmax activation function. 11 | 12 | # Arguments 13 | x : Tensor. 14 | axis: Integer, axis along which the softmax normalization is applied. 15 | 16 | # Returns 17 | Tensor, output of softmax transformation. 18 | 19 | # Raises 20 | ValueError: In case `dim(x) == 1`. 21 | """ 22 | ndim = K.ndim(x) 23 | if ndim == 2: 24 | return K.softmax(x) 25 | elif ndim > 2: 26 | e = K.exp(x - K.max(x, axis=axis, keepdims=True)) 27 | s = K.sum(e, axis=axis, keepdims=True) 28 | return e / s 29 | else: 30 | raise ValueError('Cannot apply softmax to a tensor that is 1D') 31 | 32 | 33 | def elu(x, alpha=1.0): 34 | return K.elu(x, alpha) 35 | 36 | 37 | def selu(x): 38 | """Scaled Exponential Linear Unit. (Klambauer et al., 2017) 39 | 40 | # Arguments 41 | x: A tensor or variable to compute the activation function for. 42 | 43 | # References 44 | - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) 45 | """ 46 | alpha = 1.6732632423543772848170429916717 47 | scale = 1.0507009873554804934193349852946 48 | return scale * K.elu(x, alpha) 49 | 50 | 51 | def softplus(x): 52 | return K.softplus(x) 53 | 54 | 55 | def softsign(x): 56 | return K.softsign(x) 57 | 58 | 59 | def relu(x, alpha=0., max_value=None): 60 | return K.relu(x, alpha=alpha, max_value=max_value) 61 | 62 | 63 | def tanh(x): 64 | return K.tanh(x) 65 | 66 | 67 | def sigmoid(x): 68 | return K.sigmoid(x) 69 | 70 | 71 | def hard_sigmoid(x): 72 | return K.hard_sigmoid(x) 73 | 74 | 75 | def linear(x): 76 | return x 77 | 78 | 79 | def serialize(activation): 80 | return activation.__name__ 81 | 82 | 83 | def deserialize(name, custom_objects=None): 84 | return deserialize_keras_object(name, 85 | module_objects=globals(), 86 | custom_objects=custom_objects, 87 | printable_module_name='activation function') 88 | 89 | 90 | def get(identifier): 91 | if identifier is None: 92 | return linear 93 | if isinstance(identifier, six.string_types): 94 | identifier = str(identifier) 95 | return deserialize(identifier) 96 | elif callable(identifier): 97 | if isinstance(identifier, Layer): 98 | warnings.warn(( 99 | 'Do not pass a layer instance (such as {identifier}) as the ' 100 | 'activation argument of another layer. Instead, advanced ' 101 | 'activation layers should be used just like any other ' 102 | 'layer in a model.' 103 | ).format(identifier=identifier.__class__.__name__)) 104 | return identifier 105 | else: 106 | raise ValueError('Could not interpret ' 107 | 'activation function identifier:', identifier) 108 | -------------------------------------------------------------------------------- /tests/keras/utils/io_utils_test.py: -------------------------------------------------------------------------------- 1 | '''Tests for functions in io_utils.py. 2 | ''' 3 | import os 4 | import pytest 5 | from keras.models import Sequential 6 | from keras.layers import Dense 7 | from keras.utils.io_utils import HDF5Matrix 8 | import numpy as np 9 | import warnings 10 | import h5py 11 | 12 | 13 | @pytest.fixture 14 | def in_tmpdir(tmpdir): 15 | """Runs a function in a temporary directory. 16 | 17 | Checks that the directory is empty afterwards. 18 | """ 19 | with tmpdir.as_cwd(): 20 | yield None 21 | assert not tmpdir.listdir() 22 | 23 | 24 | def create_dataset(h5_path='test.h5'): 25 | X = np.random.randn(200, 10).astype('float32') 26 | y = np.random.randint(0, 2, size=(200, 1)) 27 | f = h5py.File(h5_path, 'w') 28 | # Creating dataset to store features 29 | X_dset = f.create_dataset('my_data', (200, 10), dtype='f') 30 | X_dset[:] = X 31 | # Creating dataset to store labels 32 | y_dset = f.create_dataset('my_labels', (200, 1), dtype='i') 33 | y_dset[:] = y 34 | f.close() 35 | 36 | 37 | def test_io_utils(in_tmpdir): 38 | '''Tests the HDF5Matrix code using the sample from @jfsantos at 39 | https://gist.github.com/jfsantos/e2ef822c744357a4ed16ec0c885100a3 40 | ''' 41 | h5_path = 'test.h5' 42 | create_dataset(h5_path) 43 | 44 | # Instantiating HDF5Matrix for the training set, which is a slice of the first 150 elements 45 | X_train = HDF5Matrix(h5_path, 'my_data', start=0, end=150) 46 | y_train = HDF5Matrix(h5_path, 'my_labels', start=0, end=150) 47 | 48 | # Likewise for the test set 49 | X_test = HDF5Matrix(h5_path, 'my_data', start=150, end=200) 50 | y_test = HDF5Matrix(h5_path, 'my_labels', start=150, end=200) 51 | 52 | # HDF5Matrix behave more or less like Numpy matrices with regards to indexing 53 | assert y_train.shape == (150, 1), 'HDF5Matrix shape should match input array' 54 | # But they do not support negative indices, so don't try print(X_train[-1]) 55 | 56 | assert y_train.dtype == np.dtype('i'), 'HDF5Matrix dtype should match input array' 57 | assert y_train.ndim == 2, 'HDF5Matrix ndim should match input array' 58 | assert y_train.size == 150, 'HDF5Matrix ndim should match input array' 59 | 60 | model = Sequential() 61 | model.add(Dense(64, input_shape=(10,), activation='relu')) 62 | model.add(Dense(1, activation='sigmoid')) 63 | 64 | model.compile(loss='binary_crossentropy', optimizer='sgd') 65 | 66 | # Note: you have to use shuffle='batch' or False with HDF5Matrix 67 | model.fit(X_train, y_train, batch_size=32, shuffle='batch', verbose=False) 68 | # test that evalutation and prediction don't crash and return reasonable results 69 | out_pred = model.predict(X_test, batch_size=32, verbose=False) 70 | out_eval = model.evaluate(X_test, y_test, batch_size=32, verbose=False) 71 | 72 | assert out_pred.shape == (50, 1), 'Prediction shape does not match' 73 | assert out_eval.shape == (), 'Shape of evaluation does not match' 74 | assert out_eval > 0, 'Evaluation value does not meet criteria: {}'.format(out_eval) 75 | 76 | # test slicing for shortened array 77 | assert len(X_train[0:]) == len(X_train), 'Incorrect shape for sliced data' 78 | 79 | os.remove(h5_path) 80 | 81 | 82 | if __name__ == '__main__': 83 | pytest.main([__file__]) 84 | -------------------------------------------------------------------------------- /keras/backend/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import os 4 | import json 5 | import sys 6 | from .common import epsilon 7 | from .common import floatx 8 | from .common import set_epsilon 9 | from .common import set_floatx 10 | from .common import cast_to_floatx 11 | from .common import image_data_format 12 | from .common import set_image_data_format 13 | 14 | # Obtain Keras base dir path: either ~/.keras or /tmp. 15 | _keras_base_dir = os.path.expanduser('~') 16 | if not os.access(_keras_base_dir, os.W_OK): 17 | _keras_base_dir = '/tmp' 18 | _keras_dir = os.path.join(_keras_base_dir, '.keras') 19 | 20 | # Default backend: TensorFlow. 21 | _BACKEND = 'tensorflow' 22 | 23 | # Attempt to read Keras config file. 24 | _config_path = os.path.expanduser(os.path.join(_keras_dir, 'keras.json')) 25 | if os.path.exists(_config_path): 26 | try: 27 | _config = json.load(open(_config_path)) 28 | except ValueError: 29 | _config = {} 30 | _floatx = _config.get('floatx', floatx()) 31 | assert _floatx in {'float16', 'float32', 'float64'} 32 | _epsilon = _config.get('epsilon', epsilon()) 33 | assert isinstance(_epsilon, float) 34 | _backend = _config.get('backend', _BACKEND) 35 | assert _backend in {'theano', 'tensorflow', 'cntk'} 36 | _image_data_format = _config.get('image_data_format', 37 | image_data_format()) 38 | assert _image_data_format in {'channels_last', 'channels_first'} 39 | 40 | set_floatx(_floatx) 41 | set_epsilon(_epsilon) 42 | set_image_data_format(_image_data_format) 43 | _BACKEND = _backend 44 | 45 | # Save config file, if possible. 46 | if not os.path.exists(_keras_dir): 47 | try: 48 | os.makedirs(_keras_dir) 49 | except OSError: 50 | # Except permission denied and potential race conditions 51 | # in multi-threaded environments. 52 | pass 53 | 54 | if not os.path.exists(_config_path): 55 | _config = { 56 | 'floatx': floatx(), 57 | 'epsilon': epsilon(), 58 | 'backend': _BACKEND, 59 | 'image_data_format': image_data_format() 60 | } 61 | try: 62 | with open(_config_path, 'w') as f: 63 | f.write(json.dumps(_config, indent=4)) 64 | except IOError: 65 | # Except permission denied. 66 | pass 67 | 68 | # Set backend based on KERAS_BACKEND flag, if applicable. 69 | if 'KERAS_BACKEND' in os.environ: 70 | _backend = os.environ['KERAS_BACKEND'] 71 | assert _backend in {'theano', 'tensorflow', 'cntk'} 72 | _BACKEND = _backend 73 | 74 | # Import backend functions. 75 | if _BACKEND == 'cntk': 76 | sys.stderr.write('Using CNTK backend\n') 77 | from .cntk_backend import * 78 | elif _BACKEND == 'theano': 79 | sys.stderr.write('Using Theano backend.\n') 80 | from .theano_backend import * 81 | elif _BACKEND == 'tensorflow': 82 | sys.stderr.write('Using TensorFlow backend.\n') 83 | from .tensorflow_backend import * 84 | else: 85 | raise ValueError('Unknown backend: ' + str(_BACKEND)) 86 | 87 | 88 | def backend(): 89 | """Publicly accessible method 90 | for determining the current backend. 91 | 92 | # Returns 93 | String, the name of the backend Keras is currently using. 94 | 95 | # Example 96 | ```python 97 | >>> keras.backend.backend() 98 | 'tensorflow' 99 | ``` 100 | """ 101 | return _BACKEND 102 | -------------------------------------------------------------------------------- /tests/keras/datasets/test_datasets.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pytest 3 | import time 4 | import random 5 | from keras.datasets import cifar10 6 | from keras.datasets import cifar100 7 | from keras.datasets import reuters 8 | from keras.datasets import imdb 9 | from keras.datasets import mnist 10 | from keras.datasets import boston_housing 11 | from keras.datasets import fashion_mnist 12 | 13 | 14 | def test_cifar(): 15 | # only run data download tests 20% of the time 16 | # to speed up frequent testing 17 | random.seed(time.time()) 18 | if random.random() > 0.8: 19 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 20 | assert len(x_train) == len(y_train) == 50000 21 | assert len(x_test) == len(y_test) == 10000 22 | (x_train, y_train), (x_test, y_test) = cifar100.load_data('fine') 23 | assert len(x_train) == len(y_train) == 50000 24 | assert len(x_test) == len(y_test) == 10000 25 | (x_train, y_train), (x_test, y_test) = cifar100.load_data('coarse') 26 | assert len(x_train) == len(y_train) == 50000 27 | assert len(x_test) == len(y_test) == 10000 28 | 29 | 30 | def test_reuters(): 31 | # only run data download tests 20% of the time 32 | # to speed up frequent testing 33 | random.seed(time.time()) 34 | if random.random() > 0.8: 35 | (x_train, y_train), (x_test, y_test) = reuters.load_data() 36 | assert len(x_train) == len(y_train) 37 | assert len(x_test) == len(y_test) 38 | assert len(x_train) + len(x_test) == 11228 39 | (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10) 40 | assert len(x_train) == len(y_train) 41 | assert len(x_test) == len(y_test) 42 | word_index = reuters.get_word_index() 43 | assert isinstance(word_index, dict) 44 | 45 | 46 | def test_mnist(): 47 | # only run data download tests 20% of the time 48 | # to speed up frequent testing 49 | random.seed(time.time()) 50 | if random.random() > 0.8: 51 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 52 | assert len(x_train) == len(y_train) == 60000 53 | assert len(x_test) == len(y_test) == 10000 54 | 55 | 56 | def test_imdb(): 57 | # only run data download tests 20% of the time 58 | # to speed up frequent testing 59 | random.seed(time.time()) 60 | if random.random() > 0.8: 61 | (x_train, y_train), (x_test, y_test) = imdb.load_data() 62 | (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40) 63 | assert len(x_train) == len(y_train) 64 | assert len(x_test) == len(y_test) 65 | word_index = imdb.get_word_index() 66 | assert isinstance(word_index, dict) 67 | 68 | 69 | def test_boston_housing(): 70 | # only run data download tests 20% of the time 71 | # to speed up frequent testing 72 | random.seed(time.time()) 73 | if random.random() > 0.8: 74 | (x_train, y_train), (x_test, y_test) = boston_housing.load_data() 75 | assert len(x_train) == len(y_train) 76 | assert len(x_test) == len(y_test) 77 | 78 | 79 | def test_fashion_mnist(): 80 | # only run data download tests 20% of the time 81 | # to speed up frequent testing 82 | random.seed(time.time()) 83 | if random.random() > 0.8: 84 | (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() 85 | assert len(x_train) == len(y_train) == 60000 86 | assert len(x_test) == len(y_test) == 10000 87 | 88 | 89 | if __name__ == '__main__': 90 | pytest.main([__file__]) 91 | -------------------------------------------------------------------------------- /keras/losses.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import six 3 | from . import backend as K 4 | from .utils.generic_utils import deserialize_keras_object 5 | 6 | 7 | # noinspection SpellCheckingInspection 8 | def mean_squared_error(y_true, y_pred): 9 | return K.mean(K.square(y_pred - y_true), axis=-1) 10 | 11 | 12 | def mean_absolute_error(y_true, y_pred): 13 | return K.mean(K.abs(y_pred - y_true), axis=-1) 14 | 15 | 16 | def mean_absolute_percentage_error(y_true, y_pred): 17 | diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), 18 | K.epsilon(), 19 | None)) 20 | return 100. * K.mean(diff, axis=-1) 21 | 22 | 23 | def mean_squared_logarithmic_error(y_true, y_pred): 24 | first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.) 25 | second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.) 26 | return K.mean(K.square(first_log - second_log), axis=-1) 27 | 28 | 29 | def squared_hinge(y_true, y_pred): 30 | return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)), axis=-1) 31 | 32 | 33 | def hinge(y_true, y_pred): 34 | return K.mean(K.maximum(1. - y_true * y_pred, 0.), axis=-1) 35 | 36 | 37 | def categorical_hinge(y_true, y_pred): 38 | pos = K.sum(y_true * y_pred, axis=-1) 39 | neg = K.max((1. - y_true) * y_pred, axis=-1) 40 | return K.maximum(0., neg - pos + 1.) 41 | 42 | 43 | def logcosh(y_true, y_pred): 44 | def cosh(x): 45 | return (K.exp(x) + K.exp(-x)) / 2 46 | return K.mean(K.log(cosh(y_pred - y_true)), axis=-1) 47 | 48 | 49 | def categorical_crossentropy(y_true, y_pred): 50 | return K.categorical_crossentropy(y_true, y_pred) 51 | 52 | 53 | def sparse_categorical_crossentropy(y_true, y_pred): 54 | return K.sparse_categorical_crossentropy(y_true, y_pred) 55 | 56 | 57 | def binary_crossentropy(y_true, y_pred): 58 | return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1) 59 | 60 | 61 | def kullback_leibler_divergence(y_true, y_pred): 62 | y_true = K.clip(y_true, K.epsilon(), 1) 63 | y_pred = K.clip(y_pred, K.epsilon(), 1) 64 | return K.sum(y_true * K.log(y_true / y_pred), axis=-1) 65 | 66 | 67 | def poisson(y_true, y_pred): 68 | return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon()), axis=-1) 69 | 70 | 71 | def cosine_proximity(y_true, y_pred): 72 | y_true = K.l2_normalize(y_true, axis=-1) 73 | y_pred = K.l2_normalize(y_pred, axis=-1) 74 | return -K.sum(y_true * y_pred, axis=-1) 75 | 76 | 77 | # Aliases. 78 | 79 | mse = MSE = mean_squared_error 80 | mae = MAE = mean_absolute_error 81 | mape = MAPE = mean_absolute_percentage_error 82 | msle = MSLE = mean_squared_logarithmic_error 83 | kld = KLD = kullback_leibler_divergence 84 | cosine = cosine_proximity 85 | 86 | 87 | def serialize(loss): 88 | return loss.__name__ 89 | 90 | 91 | def deserialize(name, custom_objects=None): 92 | return deserialize_keras_object(name, 93 | module_objects=globals(), 94 | custom_objects=custom_objects, 95 | printable_module_name='loss function') 96 | 97 | 98 | def get(identifier): 99 | if identifier is None: 100 | return None 101 | if isinstance(identifier, six.string_types): 102 | identifier = str(identifier) 103 | return deserialize(identifier) 104 | elif callable(identifier): 105 | return identifier 106 | else: 107 | raise ValueError('Could not interpret ' 108 | 'loss function identifier:', identifier) 109 | -------------------------------------------------------------------------------- /examples/mnist_hierarchical_rnn.py: -------------------------------------------------------------------------------- 1 | """This is an example of using Hierarchical RNN (HRNN) to classify MNIST digits. 2 | 3 | HRNNs can learn across multiple levels of temporal hierarchy over a complex sequence. 4 | Usually, the first recurrent layer of an HRNN encodes a sentence (e.g. of word vectors) 5 | into a sentence vector. The second recurrent layer then encodes a sequence of 6 | such vectors (encoded by the first layer) into a document vector. This 7 | document vector is considered to preserve both the word-level and 8 | sentence-level structure of the context. 9 | 10 | # References 11 | - [A Hierarchical Neural Autoencoder for Paragraphs and Documents](https://arxiv.org/abs/1506.01057) 12 | Encodes paragraphs and documents with HRNN. 13 | Results have shown that HRNN outperforms standard 14 | RNNs and may play some role in more sophisticated generation tasks like 15 | summarization or question answering. 16 | - [Hierarchical recurrent neural network for skeleton based action recognition](http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7298714) 17 | Achieved state-of-the-art results on skeleton based action recognition with 3 levels 18 | of bidirectional HRNN combined with fully connected layers. 19 | 20 | In the below MNIST example the first LSTM layer first encodes every 21 | column of pixels of shape (28, 1) to a column vector of shape (128,). The second LSTM 22 | layer encodes then these 28 column vectors of shape (28, 128) to a image vector 23 | representing the whole image. A final Dense layer is added for prediction. 24 | 25 | After 5 epochs: train acc: 0.9858, val acc: 0.9864 26 | """ 27 | from __future__ import print_function 28 | 29 | import keras 30 | from keras.datasets import mnist 31 | from keras.models import Model 32 | from keras.layers import Input, Dense, TimeDistributed 33 | from keras.layers import LSTM 34 | 35 | # Training parameters. 36 | batch_size = 32 37 | num_classes = 10 38 | epochs = 5 39 | 40 | # Embedding dimensions. 41 | row_hidden = 128 42 | col_hidden = 128 43 | 44 | # The data, shuffled and split between train and test sets. 45 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 46 | 47 | # Reshapes data to 4D for Hierarchical RNN. 48 | x_train = x_train.reshape(x_train.shape[0], 28, 28, 1) 49 | x_test = x_test.reshape(x_test.shape[0], 28, 28, 1) 50 | x_train = x_train.astype('float32') 51 | x_test = x_test.astype('float32') 52 | x_train /= 255 53 | x_test /= 255 54 | print('x_train shape:', x_train.shape) 55 | print(x_train.shape[0], 'train samples') 56 | print(x_test.shape[0], 'test samples') 57 | 58 | # Converts class vectors to binary class matrices. 59 | y_train = keras.utils.to_categorical(y_train, num_classes) 60 | y_test = keras.utils.to_categorical(y_test, num_classes) 61 | 62 | row, col, pixel = x_train.shape[1:] 63 | 64 | # 4D input. 65 | x = Input(shape=(row, col, pixel)) 66 | 67 | # Encodes a row of pixels using TimeDistributed Wrapper. 68 | encoded_rows = TimeDistributed(LSTM(row_hidden))(x) 69 | 70 | # Encodes columns of encoded rows. 71 | encoded_columns = LSTM(col_hidden)(encoded_rows) 72 | 73 | # Final predictions and model. 74 | prediction = Dense(num_classes, activation='softmax')(encoded_columns) 75 | model = Model(x, prediction) 76 | model.compile(loss='categorical_crossentropy', 77 | optimizer='rmsprop', 78 | metrics=['accuracy']) 79 | 80 | # Training. 81 | model.fit(x_train, y_train, 82 | batch_size=batch_size, 83 | epochs=epochs, 84 | verbose=1, 85 | validation_data=(x_test, y_test)) 86 | 87 | # Evaluation. 88 | scores = model.evaluate(x_test, y_test, verbose=0) 89 | print('Test loss:', scores[0]) 90 | print('Test accuracy:', scores[1]) 91 | -------------------------------------------------------------------------------- /tests/keras/metrics_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from keras import metrics 5 | from keras import backend as K 6 | 7 | all_metrics = [ 8 | metrics.binary_accuracy, 9 | metrics.categorical_accuracy, 10 | metrics.mean_squared_error, 11 | metrics.mean_absolute_error, 12 | metrics.mean_absolute_percentage_error, 13 | metrics.mean_squared_logarithmic_error, 14 | metrics.squared_hinge, 15 | metrics.hinge, 16 | metrics.categorical_crossentropy, 17 | metrics.binary_crossentropy, 18 | metrics.poisson, 19 | metrics.cosine_proximity, 20 | metrics.logcosh, 21 | ] 22 | 23 | all_sparse_metrics = [ 24 | metrics.sparse_categorical_accuracy, 25 | metrics.sparse_categorical_crossentropy, 26 | ] 27 | 28 | 29 | def test_metrics(): 30 | y_a = K.variable(np.random.random((6, 7))) 31 | y_b = K.variable(np.random.random((6, 7))) 32 | for metric in all_metrics: 33 | output = metric(y_a, y_b) 34 | print(metric.__name__) 35 | assert K.eval(output).shape == (6,) 36 | 37 | 38 | def test_sparse_metrics(): 39 | for metric in all_sparse_metrics: 40 | y_a = K.variable(np.random.randint(0, 7, (6,)), dtype=K.floatx()) 41 | y_b = K.variable(np.random.random((6, 7)), dtype=K.floatx()) 42 | assert K.eval(metric(y_a, y_b)).shape == (6,) 43 | 44 | 45 | def test_serialize(): 46 | '''This is a mock 'round trip' of serialize and deserialize. 47 | ''' 48 | 49 | class MockMetric: 50 | def __init__(self): 51 | self.__name__ = "mock_metric" 52 | 53 | mock = MockMetric() 54 | found = metrics.serialize(mock) 55 | assert found == "mock_metric" 56 | 57 | found = metrics.deserialize('mock_metric', 58 | custom_objects={'mock_metric': True}) 59 | assert found is True 60 | 61 | 62 | def test_invalid_get(): 63 | 64 | with pytest.raises(ValueError): 65 | metrics.get(5) 66 | 67 | 68 | @pytest.mark.skipif((K.backend() == 'cntk'), 69 | reason="keras cntk backend does not support top_k yet") 70 | def test_top_k_categorical_accuracy(): 71 | y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) 72 | y_true = K.variable(np.array([[0, 1, 0], [1, 0, 0]])) 73 | success_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, 74 | k=3)) 75 | assert success_result == 1 76 | partial_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, 77 | k=2)) 78 | assert partial_result == 0.5 79 | failure_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, 80 | k=1)) 81 | assert failure_result == 0 82 | 83 | 84 | @pytest.mark.skipif((K.backend() == 'cntk'), 85 | reason="keras cntk backend does not support top_k yet") 86 | def test_sparse_top_k_categorical_accuracy(): 87 | y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) 88 | y_true = K.variable(np.array([[1], [0]])) 89 | success_result = K.eval( 90 | metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3)) 91 | 92 | assert success_result == 1 93 | partial_result = K.eval( 94 | metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2)) 95 | 96 | assert partial_result == 0.5 97 | failure_result = K.eval( 98 | metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)) 99 | 100 | assert failure_result == 0 101 | 102 | 103 | if __name__ == '__main__': 104 | pytest.main([__file__]) 105 | -------------------------------------------------------------------------------- /examples/lstm_text_generation.py: -------------------------------------------------------------------------------- 1 | '''Example script to generate text from Nietzsche's writings. 2 | 3 | At least 20 epochs are required before the generated text 4 | starts sounding coherent. 5 | 6 | It is recommended to run this script on GPU, as recurrent 7 | networks are quite computationally intensive. 8 | 9 | If you try this script on new data, make sure your corpus 10 | has at least ~100k characters. ~1M is better. 11 | ''' 12 | 13 | from __future__ import print_function 14 | from keras.models import Sequential 15 | from keras.layers import Dense, Activation 16 | from keras.layers import LSTM 17 | from keras.optimizers import RMSprop 18 | from keras.utils.data_utils import get_file 19 | import numpy as np 20 | import random 21 | import sys 22 | 23 | path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt') 24 | text = open(path).read().lower() 25 | print('corpus length:', len(text)) 26 | 27 | chars = sorted(list(set(text))) 28 | print('total chars:', len(chars)) 29 | char_indices = dict((c, i) for i, c in enumerate(chars)) 30 | indices_char = dict((i, c) for i, c in enumerate(chars)) 31 | 32 | # cut the text in semi-redundant sequences of maxlen characters 33 | maxlen = 40 34 | step = 3 35 | sentences = [] 36 | next_chars = [] 37 | for i in range(0, len(text) - maxlen, step): 38 | sentences.append(text[i: i + maxlen]) 39 | next_chars.append(text[i + maxlen]) 40 | print('nb sequences:', len(sentences)) 41 | 42 | print('Vectorization...') 43 | X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) 44 | y = np.zeros((len(sentences), len(chars)), dtype=np.bool) 45 | for i, sentence in enumerate(sentences): 46 | for t, char in enumerate(sentence): 47 | X[i, t, char_indices[char]] = 1 48 | y[i, char_indices[next_chars[i]]] = 1 49 | 50 | 51 | # build the model: a single LSTM 52 | print('Build model...') 53 | model = Sequential() 54 | model.add(LSTM(128, input_shape=(maxlen, len(chars)))) 55 | model.add(Dense(len(chars))) 56 | model.add(Activation('softmax')) 57 | 58 | optimizer = RMSprop(lr=0.01) 59 | model.compile(loss='categorical_crossentropy', optimizer=optimizer) 60 | 61 | 62 | def sample(preds, temperature=1.0): 63 | # helper function to sample an index from a probability array 64 | preds = np.asarray(preds).astype('float64') 65 | preds = np.log(preds) / temperature 66 | exp_preds = np.exp(preds) 67 | preds = exp_preds / np.sum(exp_preds) 68 | probas = np.random.multinomial(1, preds, 1) 69 | return np.argmax(probas) 70 | 71 | # train the model, output generated text after each iteration 72 | for iteration in range(1, 60): 73 | print() 74 | print('-' * 50) 75 | print('Iteration', iteration) 76 | model.fit(X, y, 77 | batch_size=128, 78 | epochs=1) 79 | 80 | start_index = random.randint(0, len(text) - maxlen - 1) 81 | 82 | for diversity in [0.2, 0.5, 1.0, 1.2]: 83 | print() 84 | print('----- diversity:', diversity) 85 | 86 | generated = '' 87 | sentence = text[start_index: start_index + maxlen] 88 | generated += sentence 89 | print('----- Generating with seed: "' + sentence + '"') 90 | sys.stdout.write(generated) 91 | 92 | for i in range(400): 93 | x = np.zeros((1, maxlen, len(chars))) 94 | for t, char in enumerate(sentence): 95 | x[0, t, char_indices[char]] = 1. 96 | 97 | preds = model.predict(x, verbose=0)[0] 98 | next_index = sample(preds, diversity) 99 | next_char = indices_char[next_index] 100 | 101 | generated += next_char 102 | sentence = sentence[1:] + next_char 103 | 104 | sys.stdout.write(next_char) 105 | sys.stdout.flush() 106 | print() 107 | -------------------------------------------------------------------------------- /examples/antirectifier.py: -------------------------------------------------------------------------------- 1 | '''The example demonstrates how to write custom layers for Keras. 2 | 3 | We build a custom activation layer called 'Antirectifier', 4 | which modifies the shape of the tensor that passes through it. 5 | We need to specify two methods: `compute_output_shape` and `call`. 6 | 7 | Note that the same result can also be achieved via a Lambda layer. 8 | 9 | Because our custom layer is written with primitives from the Keras 10 | backend (`K`), our code can run both on TensorFlow and Theano. 11 | ''' 12 | 13 | from __future__ import print_function 14 | import keras 15 | from keras.models import Sequential 16 | from keras import layers 17 | from keras.datasets import mnist 18 | from keras import backend as K 19 | 20 | 21 | class Antirectifier(layers.Layer): 22 | '''This is the combination of a sample-wise 23 | L2 normalization with the concatenation of the 24 | positive part of the input with the negative part 25 | of the input. The result is a tensor of samples that are 26 | twice as large as the input samples. 27 | 28 | It can be used in place of a ReLU. 29 | 30 | # Input shape 31 | 2D tensor of shape (samples, n) 32 | 33 | # Output shape 34 | 2D tensor of shape (samples, 2*n) 35 | 36 | # Theoretical justification 37 | When applying ReLU, assuming that the distribution 38 | of the previous output is approximately centered around 0., 39 | you are discarding half of your input. This is inefficient. 40 | 41 | Antirectifier allows to return all-positive outputs like ReLU, 42 | without discarding any data. 43 | 44 | Tests on MNIST show that Antirectifier allows to train networks 45 | with twice less parameters yet with comparable 46 | classification accuracy as an equivalent ReLU-based network. 47 | ''' 48 | 49 | def compute_output_shape(self, input_shape): 50 | shape = list(input_shape) 51 | assert len(shape) == 2 # only valid for 2D tensors 52 | shape[-1] *= 2 53 | return tuple(shape) 54 | 55 | def call(self, inputs): 56 | inputs -= K.mean(inputs, axis=1, keepdims=True) 57 | inputs = K.l2_normalize(inputs, axis=1) 58 | pos = K.relu(inputs) 59 | neg = K.relu(-inputs) 60 | return K.concatenate([pos, neg], axis=1) 61 | 62 | # global parameters 63 | batch_size = 128 64 | num_classes = 10 65 | epochs = 40 66 | 67 | # the data, shuffled and split between train and test sets 68 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 69 | 70 | x_train = x_train.reshape(60000, 784) 71 | x_test = x_test.reshape(10000, 784) 72 | x_train = x_train.astype('float32') 73 | x_test = x_test.astype('float32') 74 | x_train /= 255 75 | x_test /= 255 76 | print(x_train.shape[0], 'train samples') 77 | print(x_test.shape[0], 'test samples') 78 | 79 | # convert class vectors to binary class matrices 80 | y_train = keras.utils.to_categorical(y_train, num_classes) 81 | y_test = keras.utils.to_categorical(y_test, num_classes) 82 | 83 | # build the model 84 | model = Sequential() 85 | model.add(layers.Dense(256, input_shape=(784,))) 86 | model.add(Antirectifier()) 87 | model.add(layers.Dropout(0.1)) 88 | model.add(layers.Dense(256)) 89 | model.add(Antirectifier()) 90 | model.add(layers.Dropout(0.1)) 91 | model.add(layers.Dense(num_classes)) 92 | model.add(layers.Activation('softmax')) 93 | 94 | # compile the model 95 | model.compile(loss='categorical_crossentropy', 96 | optimizer='rmsprop', 97 | metrics=['accuracy']) 98 | 99 | # train the model 100 | model.fit(x_train, y_train, 101 | batch_size=batch_size, 102 | epochs=epochs, 103 | verbose=1, 104 | validation_data=(x_test, y_test)) 105 | 106 | # next, compare with an equivalent network 107 | # with2x bigger Dense layers and ReLU 108 | -------------------------------------------------------------------------------- /docs/templates/preprocessing/sequence.md: -------------------------------------------------------------------------------- 1 | ## pad_sequences 2 | 3 | ```python 4 | keras.preprocessing.sequence.pad_sequences(sequences, maxlen=None, dtype='int32', 5 | padding='pre', truncating='pre', value=0.) 6 | ``` 7 | 8 | Transform a list of `num_samples` sequences (lists of scalars) into a 2D Numpy array of shape `(num_samples, num_timesteps)`. `num_timesteps` is either the `maxlen` argument if provided, or the length of the longest sequence otherwise. Sequences that are shorter than `num_timesteps` are padded with `value` at the end. Sequences longer than `num_timesteps` are truncated so that it fits the desired length. Position where padding or truncation happens is determined by `padding` or `truncating`, respectively. 9 | 10 | - __Return__: 2D Numpy array of shape `(num_samples, num_timesteps)`. 11 | 12 | - __Arguments__: 13 | - __sequences__: List of lists of int or float. 14 | - __maxlen__: None or int. Maximum sequence length, longer sequences are truncated and shorter sequences are padded with zeros at the end. 15 | - __dtype__: datatype of the Numpy array returned. 16 | - __padding__: 'pre' or 'post', pad either before or after each sequence. 17 | - __truncating__: 'pre' or 'post', remove values from sequences larger than maxlen either in the beginning or in the end of the sequence 18 | - __value__: float, value to pad the sequences to the desired value. 19 | 20 | --- 21 | 22 | ## skipgrams 23 | 24 | ```python 25 | keras.preprocessing.sequence.skipgrams(sequence, vocabulary_size, 26 | window_size=4, negative_samples=1., shuffle=True, 27 | categorical=False, sampling_table=None) 28 | ``` 29 | 30 | Transforms a sequence of word indexes (list of int) into couples of the form: 31 | 32 | - (word, word in the same window), with label 1 (positive samples). 33 | - (word, random word from the vocabulary), with label 0 (negative samples). 34 | 35 | Read more about Skipgram in this gnomic paper by Mikolov et al.: [Efficient Estimation of Word Representations in 36 | Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf) 37 | 38 | - __Return__: tuple `(couples, labels)`. 39 | - `couples` is a list of 2-elements lists of int: `[word_index, other_word_index]`. 40 | - `labels` is a list of 0 and 1, where 1 indicates that `other_word_index` was found in the same window as `word_index`, and 0 indicates that `other_word_index` was random. 41 | - if categorical is set to True, the labels are categorical, ie. 1 becomes [0,1], and 0 becomes [1, 0]. 42 | 43 | - __Arguments__: 44 | - __sequence__: list of int indexes. If using a sampling_table, the index of a word should be its the rank in the dataset (starting at 1). 45 | - __vocabulary_size__: int. 46 | - __window_size__: int. maximum distance between two words in a positive couple. 47 | - __negative_samples__: float >= 0. 0 for no negative (=random) samples. 1 for same number as positive samples. etc. 48 | - __shuffle__: boolean. Whether to shuffle the samples. 49 | - __categorical__: boolean. Whether to make the returned labels categorical. 50 | - __sampling_table__: Numpy array of shape `(vocabulary_size,)` where `sampling_table[i]` is the probability of sampling the word with index i (assumed to be i-th most common word in the dataset). 51 | 52 | 53 | --- 54 | 55 | ## make_sampling_table 56 | 57 | ```python 58 | keras.preprocessing.sequence.make_sampling_table(size, sampling_factor=1e-5) 59 | ``` 60 | 61 | Used for generating the `sampling_table` argument for `skipgrams`. `sampling_table[i]` is the probability of sampling the word i-th most common word in a dataset (more common words should be sampled less frequently, for balance). 62 | 63 | - __Return__: Numpy array of shape `(size,)`. 64 | 65 | - __Arguments__: 66 | - __size__: size of the vocabulary considered. 67 | - __sampling_factor__: lower values result in a longer probability decay (common words will be sampled less frequently). If set to 1, no subsampling will be performed (all sampling probabilities will be 1). 68 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | dist: trusty 3 | language: python 4 | matrix: 5 | include: 6 | - python: 2.7 7 | env: KERAS_BACKEND=tensorflow TEST_MODE=PEP8 8 | - python: 2.7 9 | env: KERAS_BACKEND=tensorflow TEST_MODE=INTEGRATION_TESTS 10 | - python: 3.6 11 | env: KERAS_BACKEND=tensorflow TEST_MODE=DOC 12 | - python: 2.7 13 | env: KERAS_BACKEND=tensorflow 14 | - python: 3.6 15 | env: KERAS_BACKEND=tensorflow 16 | - python: 2.7 17 | env: KERAS_BACKEND=theano THEANO_FLAGS=optimizer=fast_compile 18 | - python: 3.6 19 | env: KERAS_BACKEND=theano THEANO_FLAGS=optimizer=fast_compile 20 | - python: 2.7 21 | env: KERAS_BACKEND=cntk PYTHONWARNINGS=ignore 22 | - python: 3.6 23 | env: KERAS_BACKEND=cntk PYTHONWARNINGS=ignore 24 | install: 25 | # code below is taken from http://conda.pydata.org/docs/travis.html 26 | # We do this conditionally because it saves us some downloading if the 27 | # version is the same. 28 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 29 | wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; 30 | else 31 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 32 | fi 33 | - bash miniconda.sh -b -p $HOME/miniconda 34 | - export PATH="$HOME/miniconda/bin:$PATH" 35 | - hash -r 36 | - conda config --set always_yes yes --set changeps1 no 37 | - conda update -q conda 38 | # Useful for debugging any issues with conda 39 | - conda info -a 40 | 41 | - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib pandas pytest h5py 42 | - source activate test-environment 43 | - pip install theano 44 | 45 | # set library path 46 | - export LD_LIBRARY_PATH=$HOME/miniconda/envs/test-environment/lib/:$LD_LIBRARY_PATH 47 | 48 | # install PIL for preprocessing tests 49 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 50 | conda install pil; 51 | elif [[ "$TRAVIS_PYTHON_VERSION" == "3.6" ]]; then 52 | conda install Pillow; 53 | fi 54 | 55 | - pip install -e .[tests] 56 | 57 | # install TensorFlow (CPU version). 58 | - pip install tensorflow 59 | 60 | # install cntk 61 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 62 | pip install https://cntk.ai/PythonWheel/CPU-Only/cntk-2.2-cp27-cp27mu-linux_x86_64.whl; 63 | elif [[ "$TRAVIS_PYTHON_VERSION" == "3.6" ]]; then 64 | pip install https://cntk.ai/PythonWheel/CPU-Only/cntk-2.2-cp36-cp36m-linux_x86_64.whl; 65 | fi 66 | 67 | # install pydot for visualization tests 68 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 69 | conda install pydot graphviz; 70 | fi 71 | 72 | #install open mpi 73 | - rm -rf ~/mpi 74 | - mkdir ~/mpi 75 | - pushd ~/mpi 76 | - wget http://cntk.ai/PythonWheel/ForKeras/depends/openmpi_1.10-3.zip 77 | - unzip ./openmpi_1.10-3.zip 78 | - sudo dpkg -i openmpi_1.10-3.deb 79 | - popd 80 | 81 | # command to run tests 82 | script: 83 | # run keras backend init to initialize backend config 84 | - python -c "import keras.backend" 85 | # create dataset directory to avoid concurrent directory creation at runtime 86 | - mkdir ~/.keras/datasets 87 | # set up keras backend 88 | - sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json; 89 | - echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)" 90 | - if [[ "$TEST_MODE" == "INTEGRATION_TESTS" ]]; then 91 | PYTHONPATH=$PWD:$PYTHONPATH py.test tests/integration_tests; 92 | elif [[ "$TEST_MODE" == "PEP8" ]]; then 93 | PYTHONPATH=$PWD:$PYTHONPATH py.test --pep8 -m pep8 -n0; 94 | elif [[ "$TEST_MODE" == "DOC" ]]; then 95 | PYTHONPATH=$PWD:$PYTHONPATH py.test tests/test_documentation.py; 96 | else 97 | PYTHONPATH=$PWD:$PYTHONPATH py.test tests/ --ignore=tests/integration_tests --ignore=tests/test_documentation.py --cov=keras tests/ --cov-fail-under 79 --cov-report term-missing; 98 | fi 99 | -------------------------------------------------------------------------------- /tests/test_dynamic_trainability.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import pytest 4 | 5 | from keras.utils.test_utils import keras_test 6 | from keras.models import Model, Sequential 7 | from keras.layers import Dense, Input 8 | 9 | 10 | @keras_test 11 | def test_layer_trainability_switch(): 12 | # with constructor argument, in Sequential 13 | model = Sequential() 14 | model.add(Dense(2, trainable=False, input_dim=1)) 15 | assert model.trainable_weights == [] 16 | 17 | # by setting the `trainable` argument, in Sequential 18 | model = Sequential() 19 | layer = Dense(2, input_dim=1) 20 | model.add(layer) 21 | assert model.trainable_weights == layer.trainable_weights 22 | layer.trainable = False 23 | assert model.trainable_weights == [] 24 | 25 | # with constructor argument, in Model 26 | x = Input(shape=(1,)) 27 | y = Dense(2, trainable=False)(x) 28 | model = Model(x, y) 29 | assert model.trainable_weights == [] 30 | 31 | # by setting the `trainable` argument, in Model 32 | x = Input(shape=(1,)) 33 | layer = Dense(2) 34 | y = layer(x) 35 | model = Model(x, y) 36 | assert model.trainable_weights == layer.trainable_weights 37 | layer.trainable = False 38 | assert model.trainable_weights == [] 39 | 40 | 41 | @keras_test 42 | def test_model_trainability_switch(): 43 | # a non-trainable model has no trainable weights 44 | x = Input(shape=(1,)) 45 | y = Dense(2)(x) 46 | model = Model(x, y) 47 | model.trainable = False 48 | assert model.trainable_weights == [] 49 | 50 | # same for Sequential 51 | model = Sequential() 52 | model.add(Dense(2, input_dim=1)) 53 | model.trainable = False 54 | assert model.trainable_weights == [] 55 | 56 | 57 | @keras_test 58 | def test_nested_model_trainability(): 59 | # a Sequential inside a Model 60 | inner_model = Sequential() 61 | inner_model.add(Dense(2, input_dim=1)) 62 | 63 | x = Input(shape=(1,)) 64 | y = inner_model(x) 65 | outer_model = Model(x, y) 66 | assert outer_model.trainable_weights == inner_model.trainable_weights 67 | inner_model.trainable = False 68 | assert outer_model.trainable_weights == [] 69 | inner_model.trainable = True 70 | inner_model.layers[-1].trainable = False 71 | assert outer_model.trainable_weights == [] 72 | 73 | # a Sequential inside a Sequential 74 | inner_model = Sequential() 75 | inner_model.add(Dense(2, input_dim=1)) 76 | outer_model = Sequential() 77 | outer_model.add(inner_model) 78 | assert outer_model.trainable_weights == inner_model.trainable_weights 79 | inner_model.trainable = False 80 | assert outer_model.trainable_weights == [] 81 | inner_model.trainable = True 82 | inner_model.layers[-1].trainable = False 83 | assert outer_model.trainable_weights == [] 84 | 85 | # a Model inside a Model 86 | x = Input(shape=(1,)) 87 | y = Dense(2)(x) 88 | inner_model = Model(x, y) 89 | x = Input(shape=(1,)) 90 | y = inner_model(x) 91 | outer_model = Model(x, y) 92 | assert outer_model.trainable_weights == inner_model.trainable_weights 93 | inner_model.trainable = False 94 | assert outer_model.trainable_weights == [] 95 | inner_model.trainable = True 96 | inner_model.layers[-1].trainable = False 97 | assert outer_model.trainable_weights == [] 98 | 99 | # a Model inside a Sequential 100 | x = Input(shape=(1,)) 101 | y = Dense(2)(x) 102 | inner_model = Model(x, y) 103 | outer_model = Sequential() 104 | outer_model.add(inner_model) 105 | assert outer_model.trainable_weights == inner_model.trainable_weights 106 | inner_model.trainable = False 107 | assert outer_model.trainable_weights == [] 108 | inner_model.trainable = True 109 | inner_model.layers[-1].trainable = False 110 | assert outer_model.trainable_weights == [] 111 | 112 | 113 | if __name__ == '__main__': 114 | pytest.main([__file__]) 115 | -------------------------------------------------------------------------------- /tests/integration_tests/test_vector_data_tasks.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pytest 3 | 4 | from keras.utils.test_utils import get_test_data, keras_test 5 | from keras.models import Sequential 6 | from keras import layers 7 | import keras 8 | from keras.utils.np_utils import to_categorical 9 | 10 | num_classes = 2 11 | 12 | 13 | @keras_test 14 | def test_vector_classification(): 15 | ''' 16 | Classify random float vectors into 2 classes with logistic regression 17 | using 2 layer neural network with ReLU hidden units. 18 | ''' 19 | (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500, 20 | num_test=200, 21 | input_shape=(20,), 22 | classification=True, 23 | num_classes=num_classes) 24 | y_train = to_categorical(y_train) 25 | y_test = to_categorical(y_test) 26 | 27 | # Test with Sequential API 28 | model = Sequential([ 29 | layers.Dense(16, input_shape=(x_train.shape[-1],), activation='relu'), 30 | layers.Dense(8), 31 | layers.Activation('relu'), 32 | layers.Dense(num_classes, activation='softmax') 33 | ]) 34 | model.compile(loss='categorical_crossentropy', 35 | optimizer='rmsprop', 36 | metrics=['accuracy']) 37 | model.summary() 38 | history = model.fit(x_train, y_train, epochs=15, batch_size=16, 39 | validation_data=(x_test, y_test), 40 | verbose=0) 41 | assert(history.history['val_acc'][-1] > 0.8) 42 | config = model.get_config() 43 | model = Sequential.from_config(config) 44 | 45 | 46 | @keras_test 47 | def test_vector_classification_functional(): 48 | (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500, 49 | num_test=200, 50 | input_shape=(20,), 51 | classification=True, 52 | num_classes=num_classes) 53 | # Test with functional API 54 | inputs = layers.Input(shape=(x_train.shape[-1],)) 55 | x = layers.Dense(16, activation=keras.activations.relu)(inputs) 56 | x = layers.Dense(8)(x) 57 | x = layers.Activation('relu')(x) 58 | outputs = layers.Dense(num_classes, activation='softmax')(x) 59 | model = keras.models.Model(inputs, outputs) 60 | model.compile(loss=keras.losses.sparse_categorical_crossentropy, 61 | optimizer=keras.optimizers.RMSprop(), 62 | metrics=['acc']) 63 | history = model.fit(x_train, y_train, epochs=15, batch_size=16, 64 | validation_data=(x_test, y_test), 65 | verbose=0) 66 | assert(history.history['val_acc'][-1] > 0.8) 67 | 68 | 69 | @keras_test 70 | def test_vector_regression(): 71 | ''' 72 | Perform float data prediction (regression) using 2 layer MLP 73 | with tanh and sigmoid activations. 74 | ''' 75 | (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500, 76 | num_test=200, 77 | input_shape=(20,), 78 | output_shape=(num_classes,), 79 | classification=False) 80 | 81 | model = Sequential([ 82 | layers.Dense(16, input_shape=(x_train.shape[-1],), activation='tanh'), 83 | layers.Dense(num_classes) 84 | ]) 85 | 86 | model.compile(loss='hinge', optimizer='adagrad') 87 | history = model.fit(x_train, y_train, epochs=20, batch_size=16, 88 | validation_data=(x_test, y_test), verbose=0) 89 | assert (history.history['val_loss'][-1] < 0.9) 90 | 91 | 92 | if __name__ == '__main__': 93 | pytest.main([__file__]) 94 | -------------------------------------------------------------------------------- /examples/mnist_sklearn_wrapper.py: -------------------------------------------------------------------------------- 1 | '''Example of how to use sklearn wrapper 2 | 3 | Builds simple CNN models on MNIST and uses sklearn's GridSearchCV to find best model 4 | ''' 5 | 6 | from __future__ import print_function 7 | 8 | import keras 9 | from keras.datasets import mnist 10 | from keras.models import Sequential 11 | from keras.layers import Dense, Dropout, Activation, Flatten 12 | from keras.layers import Conv2D, MaxPooling2D 13 | from keras.wrappers.scikit_learn import KerasClassifier 14 | from keras import backend as K 15 | from sklearn.grid_search import GridSearchCV 16 | 17 | 18 | num_classes = 10 19 | 20 | # input image dimensions 21 | img_rows, img_cols = 28, 28 22 | 23 | # load training data and do basic data normalization 24 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 25 | 26 | if K.image_data_format() == 'channels_first': 27 | x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) 28 | x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) 29 | input_shape = (1, img_rows, img_cols) 30 | else: 31 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) 32 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) 33 | input_shape = (img_rows, img_cols, 1) 34 | 35 | x_train = x_train.astype('float32') 36 | x_test = x_test.astype('float32') 37 | x_train /= 255 38 | x_test /= 255 39 | 40 | # convert class vectors to binary class matrices 41 | y_train = keras.utils.to_categorical(y_train, num_classes) 42 | y_test = keras.utils.to_categorical(y_test, num_classes) 43 | 44 | 45 | def make_model(dense_layer_sizes, filters, kernel_size, pool_size): 46 | '''Creates model comprised of 2 convolutional layers followed by dense layers 47 | 48 | dense_layer_sizes: List of layer sizes. 49 | This list has one number for each layer 50 | filters: Number of convolutional filters in each convolutional layer 51 | kernel_size: Convolutional kernel size 52 | pool_size: Size of pooling area for max pooling 53 | ''' 54 | 55 | model = Sequential() 56 | model.add(Conv2D(filters, kernel_size, 57 | padding='valid', 58 | input_shape=input_shape)) 59 | model.add(Activation('relu')) 60 | model.add(Conv2D(filters, kernel_size)) 61 | model.add(Activation('relu')) 62 | model.add(MaxPooling2D(pool_size=pool_size)) 63 | model.add(Dropout(0.25)) 64 | 65 | model.add(Flatten()) 66 | for layer_size in dense_layer_sizes: 67 | model.add(Dense(layer_size)) 68 | model.add(Activation('relu')) 69 | model.add(Dropout(0.5)) 70 | model.add(Dense(num_classes)) 71 | model.add(Activation('softmax')) 72 | 73 | model.compile(loss='categorical_crossentropy', 74 | optimizer='adadelta', 75 | metrics=['accuracy']) 76 | 77 | return model 78 | 79 | dense_size_candidates = [[32], [64], [32, 32], [64, 64]] 80 | my_classifier = KerasClassifier(make_model, batch_size=32) 81 | validator = GridSearchCV(my_classifier, 82 | param_grid={'dense_layer_sizes': dense_size_candidates, 83 | # epochs is avail for tuning even when not 84 | # an argument to model building function 85 | 'epochs': [3, 6], 86 | 'filters': [8], 87 | 'kernel_size': [3], 88 | 'pool_size': [2]}, 89 | scoring='neg_log_loss', 90 | n_jobs=1) 91 | validator.fit(x_train, y_train) 92 | 93 | print('The parameters of the best model are: ') 94 | print(validator.best_params_) 95 | 96 | # validator.best_estimator_ returns sklearn-wrapped version of best model. 97 | # validator.best_estimator_.model returns the (unwrapped) keras model 98 | best_model = validator.best_estimator_.model 99 | metric_names = best_model.metrics_names 100 | metric_values = best_model.evaluate(x_test, y_test) 101 | for metric, value in zip(metric_names, metric_values): 102 | print(metric, ': ', value) 103 | -------------------------------------------------------------------------------- /keras/datasets/reuters.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from ..utils.data_utils import get_file 4 | from ..preprocessing.sequence import _remove_long_seq 5 | from six.moves import zip 6 | import numpy as np 7 | import json 8 | import warnings 9 | 10 | 11 | def load_data(path='reuters.npz', num_words=None, skip_top=0, 12 | maxlen=None, test_split=0.2, seed=113, 13 | start_char=1, oov_char=2, index_from=3, **kwargs): 14 | """Loads the Reuters newswire classification dataset. 15 | 16 | # Arguments 17 | path: where to cache the data (relative to `~/.keras/dataset`). 18 | num_words: max number of words to include. Words are ranked 19 | by how often they occur (in the training set) and only 20 | the most frequent words are kept 21 | skip_top: skip the top N most frequently occurring words 22 | (which may not be informative). 23 | maxlen: truncate sequences after this length. 24 | test_split: Fraction of the dataset to be used as test data. 25 | seed: random seed for sample shuffling. 26 | start_char: The start of a sequence will be marked with this character. 27 | Set to 1 because 0 is usually the padding character. 28 | oov_char: words that were cut out because of the `num_words` 29 | or `skip_top` limit will be replaced with this character. 30 | index_from: index actual words with this index and higher. 31 | 32 | # Returns 33 | Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. 34 | 35 | Note that the 'out of vocabulary' character is only used for 36 | words that were present in the training set but are not included 37 | because they're not making the `num_words` cut here. 38 | Words that were not seen in the training set but are in the test set 39 | have simply been skipped. 40 | """ 41 | # Legacy support 42 | if 'nb_words' in kwargs: 43 | warnings.warn('The `nb_words` argument in `load_data` ' 44 | 'has been renamed `num_words`.') 45 | num_words = kwargs.pop('nb_words') 46 | if kwargs: 47 | raise TypeError('Unrecognized keyword arguments: ' + str(kwargs)) 48 | 49 | path = get_file(path, 50 | origin='https://s3.amazonaws.com/text-datasets/reuters.npz', 51 | file_hash='87aedbeb0cb229e378797a632c1997b6') 52 | with np.load(path) as f: 53 | xs, labels = f['x'], f['y'] 54 | 55 | np.random.seed(seed) 56 | np.random.shuffle(xs) 57 | np.random.seed(seed) 58 | np.random.shuffle(labels) 59 | 60 | if start_char is not None: 61 | xs = [[start_char] + [w + index_from for w in x] for x in xs] 62 | elif index_from: 63 | xs = [[w + index_from for w in x] for x in xs] 64 | 65 | if maxlen: 66 | xs, labels = _remove_long_seq(maxlen, xs, labels) 67 | 68 | if not num_words: 69 | num_words = max([max(x) for x in xs]) 70 | 71 | # by convention, use 2 as OOV word 72 | # reserve 'index_from' (=3 by default) characters: 73 | # 0 (padding), 1 (start), 2 (OOV) 74 | if oov_char is not None: 75 | xs = [[w if (skip_top <= w < num_words) else oov_char for w in x] for x in xs] 76 | else: 77 | xs = [[w for w in x if (skip_top <= w < num_words)] for x in xs] 78 | 79 | idx = int(len(xs) * (1 - test_split)) 80 | x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx]) 81 | x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:]) 82 | 83 | return (x_train, y_train), (x_test, y_test) 84 | 85 | 86 | def get_word_index(path='reuters_word_index.json'): 87 | """Retrieves the dictionary mapping word indices back to words. 88 | 89 | # Arguments 90 | path: where to cache the data (relative to `~/.keras/dataset`). 91 | 92 | # Returns 93 | The word index dictionary. 94 | """ 95 | path = get_file(path, 96 | origin='https://s3.amazonaws.com/text-datasets/reuters_word_index.json', 97 | file_hash='4d44cc38712099c9e383dc6e5f11a921') 98 | f = open(path) 99 | data = json.load(f) 100 | f.close() 101 | return data 102 | -------------------------------------------------------------------------------- /examples/mnist_transfer_cnn.py: -------------------------------------------------------------------------------- 1 | '''Transfer learning toy example: 2 | 3 | 1- Train a simple convnet on the MNIST dataset the first 5 digits [0..4]. 4 | 2- Freeze convolutional layers and fine-tune dense layers 5 | for the classification of digits [5..9]. 6 | 7 | Run on GPU: THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python mnist_transfer_cnn.py 8 | 9 | Get to 99.8% test accuracy after 5 epochs 10 | for the first five digits classifier 11 | and 99.2% for the last five digits after transfer + fine-tuning. 12 | ''' 13 | 14 | from __future__ import print_function 15 | 16 | import datetime 17 | import keras 18 | from keras.datasets import mnist 19 | from keras.models import Sequential 20 | from keras.layers import Dense, Dropout, Activation, Flatten 21 | from keras.layers import Conv2D, MaxPooling2D 22 | from keras import backend as K 23 | 24 | now = datetime.datetime.now 25 | 26 | batch_size = 128 27 | num_classes = 5 28 | epochs = 5 29 | 30 | # input image dimensions 31 | img_rows, img_cols = 28, 28 32 | # number of convolutional filters to use 33 | filters = 32 34 | # size of pooling area for max pooling 35 | pool_size = 2 36 | # convolution kernel size 37 | kernel_size = 3 38 | 39 | if K.image_data_format() == 'channels_first': 40 | input_shape = (1, img_rows, img_cols) 41 | else: 42 | input_shape = (img_rows, img_cols, 1) 43 | 44 | 45 | def train_model(model, train, test, num_classes): 46 | x_train = train[0].reshape((train[0].shape[0],) + input_shape) 47 | x_test = test[0].reshape((test[0].shape[0],) + input_shape) 48 | x_train = x_train.astype('float32') 49 | x_test = x_test.astype('float32') 50 | x_train /= 255 51 | x_test /= 255 52 | print('x_train shape:', x_train.shape) 53 | print(x_train.shape[0], 'train samples') 54 | print(x_test.shape[0], 'test samples') 55 | 56 | # convert class vectors to binary class matrices 57 | y_train = keras.utils.to_categorical(train[1], num_classes) 58 | y_test = keras.utils.to_categorical(test[1], num_classes) 59 | 60 | model.compile(loss='categorical_crossentropy', 61 | optimizer='adadelta', 62 | metrics=['accuracy']) 63 | 64 | t = now() 65 | model.fit(x_train, y_train, 66 | batch_size=batch_size, 67 | epochs=epochs, 68 | verbose=1, 69 | validation_data=(x_test, y_test)) 70 | print('Training time: %s' % (now() - t)) 71 | score = model.evaluate(x_test, y_test, verbose=0) 72 | print('Test score:', score[0]) 73 | print('Test accuracy:', score[1]) 74 | 75 | 76 | # the data, shuffled and split between train and test sets 77 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 78 | 79 | # create two datasets one with digits below 5 and one with 5 and above 80 | x_train_lt5 = x_train[y_train < 5] 81 | y_train_lt5 = y_train[y_train < 5] 82 | x_test_lt5 = x_test[y_test < 5] 83 | y_test_lt5 = y_test[y_test < 5] 84 | 85 | x_train_gte5 = x_train[y_train >= 5] 86 | y_train_gte5 = y_train[y_train >= 5] - 5 87 | x_test_gte5 = x_test[y_test >= 5] 88 | y_test_gte5 = y_test[y_test >= 5] - 5 89 | 90 | # define two groups of layers: feature (convolutions) and classification (dense) 91 | feature_layers = [ 92 | Conv2D(filters, kernel_size, 93 | padding='valid', 94 | input_shape=input_shape), 95 | Activation('relu'), 96 | Conv2D(filters, kernel_size), 97 | Activation('relu'), 98 | MaxPooling2D(pool_size=pool_size), 99 | Dropout(0.25), 100 | Flatten(), 101 | ] 102 | 103 | classification_layers = [ 104 | Dense(128), 105 | Activation('relu'), 106 | Dropout(0.5), 107 | Dense(num_classes), 108 | Activation('softmax') 109 | ] 110 | 111 | # create complete model 112 | model = Sequential(feature_layers + classification_layers) 113 | 114 | # train model for 5-digit classification [0..4] 115 | train_model(model, 116 | (x_train_lt5, y_train_lt5), 117 | (x_test_lt5, y_test_lt5), num_classes) 118 | 119 | # freeze feature layers and rebuild model 120 | for l in feature_layers: 121 | l.trainable = False 122 | 123 | # transfer: train dense layers for new classification task [5..9] 124 | train_model(model, 125 | (x_train_gte5, y_train_gte5), 126 | (x_test_gte5, y_test_gte5), num_classes) 127 | -------------------------------------------------------------------------------- /examples/variational_autoencoder.py: -------------------------------------------------------------------------------- 1 | '''This script demonstrates how to build a variational autoencoder with Keras. 2 | 3 | Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114 4 | ''' 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from scipy.stats import norm 8 | 9 | from keras.layers import Input, Dense, Lambda, Layer 10 | from keras.models import Model 11 | from keras import backend as K 12 | from keras import metrics 13 | from keras.datasets import mnist 14 | 15 | batch_size = 100 16 | original_dim = 784 17 | latent_dim = 2 18 | intermediate_dim = 256 19 | epochs = 50 20 | epsilon_std = 1.0 21 | 22 | 23 | x = Input(shape=(original_dim,)) 24 | h = Dense(intermediate_dim, activation='relu')(x) 25 | z_mean = Dense(latent_dim)(h) 26 | z_log_var = Dense(latent_dim)(h) 27 | 28 | 29 | def sampling(args): 30 | z_mean, z_log_var = args 31 | epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., 32 | stddev=epsilon_std) 33 | return z_mean + K.exp(z_log_var / 2) * epsilon 34 | 35 | # note that "output_shape" isn't necessary with the TensorFlow backend 36 | z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var]) 37 | 38 | # we instantiate these layers separately so as to reuse them later 39 | decoder_h = Dense(intermediate_dim, activation='relu') 40 | decoder_mean = Dense(original_dim, activation='sigmoid') 41 | h_decoded = decoder_h(z) 42 | x_decoded_mean = decoder_mean(h_decoded) 43 | 44 | 45 | # Custom loss layer 46 | class CustomVariationalLayer(Layer): 47 | def __init__(self, **kwargs): 48 | self.is_placeholder = True 49 | super(CustomVariationalLayer, self).__init__(**kwargs) 50 | 51 | def vae_loss(self, x, x_decoded_mean): 52 | xent_loss = original_dim * metrics.binary_crossentropy(x, x_decoded_mean) 53 | kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) 54 | return K.mean(xent_loss + kl_loss) 55 | 56 | def call(self, inputs): 57 | x = inputs[0] 58 | x_decoded_mean = inputs[1] 59 | loss = self.vae_loss(x, x_decoded_mean) 60 | self.add_loss(loss, inputs=inputs) 61 | # We won't actually use the output. 62 | return x 63 | 64 | y = CustomVariationalLayer()([x, x_decoded_mean]) 65 | vae = Model(x, y) 66 | vae.compile(optimizer='rmsprop', loss=None) 67 | 68 | 69 | # train the VAE on MNIST digits 70 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 71 | 72 | x_train = x_train.astype('float32') / 255. 73 | x_test = x_test.astype('float32') / 255. 74 | x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) 75 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) 76 | 77 | vae.fit(x_train, 78 | shuffle=True, 79 | epochs=epochs, 80 | batch_size=batch_size, 81 | validation_data=(x_test, None)) 82 | 83 | # build a model to project inputs on the latent space 84 | encoder = Model(x, z_mean) 85 | 86 | # display a 2D plot of the digit classes in the latent space 87 | x_test_encoded = encoder.predict(x_test, batch_size=batch_size) 88 | plt.figure(figsize=(6, 6)) 89 | plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test) 90 | plt.colorbar() 91 | plt.show() 92 | 93 | # build a digit generator that can sample from the learned distribution 94 | decoder_input = Input(shape=(latent_dim,)) 95 | _h_decoded = decoder_h(decoder_input) 96 | _x_decoded_mean = decoder_mean(_h_decoded) 97 | generator = Model(decoder_input, _x_decoded_mean) 98 | 99 | # display a 2D manifold of the digits 100 | n = 15 # figure with 15x15 digits 101 | digit_size = 28 102 | figure = np.zeros((digit_size * n, digit_size * n)) 103 | # linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian 104 | # to produce values of the latent variables z, since the prior of the latent space is Gaussian 105 | grid_x = norm.ppf(np.linspace(0.05, 0.95, n)) 106 | grid_y = norm.ppf(np.linspace(0.05, 0.95, n)) 107 | 108 | for i, yi in enumerate(grid_x): 109 | for j, xi in enumerate(grid_y): 110 | z_sample = np.array([[xi, yi]]) 111 | x_decoded = generator.predict(z_sample) 112 | digit = x_decoded[0].reshape(digit_size, digit_size) 113 | figure[i * digit_size: (i + 1) * digit_size, 114 | j * digit_size: (j + 1) * digit_size] = digit 115 | 116 | plt.figure(figsize=(10, 10)) 117 | plt.imshow(figure, cmap='Greys_r') 118 | plt.show() 119 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Keras examples directory 2 | 3 | ## Vision models examples 4 | 5 | [mnist_mlp.py](mnist_mlp.py) 6 | Trains a simple deep multi-layer perceptron on the MNIST dataset. 7 | 8 | [mnist_cnn.py](mnist_cnn.py) 9 | Trains a simple convnet on the MNIST dataset. 10 | 11 | [cifar10_cnn.py](cifar10_cnn.py) 12 | Trains a simple deep CNN on the CIFAR10 small images dataset. 13 | 14 | [cifar10_resnet.py](cifar10_resnet.py) 15 | Trains a ResNet on the CIFAR10 small images dataset. 16 | 17 | [conv_lstm.py](conv_lstm.py) 18 | Demonstrates the use of a convolutional LSTM network. 19 | 20 | [image_ocr.py](image_ocr.py) 21 | Trains a convolutional stack followed by a recurrent stack and a CTC logloss function to perform optical character recognition (OCR). 22 | 23 | [mnist_acgan.py](mnist_acgan.py) 24 | Implementation of AC-GAN (Auxiliary Classifier GAN) on the MNIST dataset 25 | 26 | [mnist_hierarchical_rnn.py](mnist_hierarchical_rnn.py) 27 | Trains a Hierarchical RNN (HRNN) to classify MNIST digits. 28 | 29 | [mnist_siamese.py](mnist_siamese.py) 30 | Trains a Siamese multi-layer perceptron on pairs of digits from the MNIST dataset. 31 | 32 | [mnist_swwae.py](mnist_swwae.py) 33 | Trains a Stacked What-Where AutoEncoder built on residual blocks on the MNIST dataset. 34 | 35 | [mnist_transfer_cnn.py](mnist_transfer_cnn.py) 36 | Transfer learning toy example. 37 | 38 | ---- 39 | 40 | ## Text & sequences examples 41 | 42 | [addition_rnn.py](addition_rnn.py) 43 | Implementation of sequence to sequence learning for performing addition of two numbers (as strings). 44 | 45 | [babi_rnn.py](babi_rnn.py) 46 | Trains a two-branch recurrent network on the bAbI dataset for reading comprehension. 47 | 48 | [babi_memnn.py](babi_memnn.py) 49 | Trains a memory network on the bAbI dataset for reading comprehension. 50 | 51 | [imdb_bidirectional_lstm.py](imdb_bidirectional_lstm.py) 52 | Trains a Bidirectional LSTM on the IMDB sentiment classification task. 53 | 54 | [imdb_cnn.py](imdb_cnn.py) 55 | Demonstrates the use of Convolution1D for text classification. 56 | 57 | [imdb_cnn_lstm.py](imdb_cnn_lstm.py) 58 | Trains a convolutional stack followed by a recurrent stack network on the IMDB sentiment classification task. 59 | 60 | [imdb_fasttext.py](imdb_fasttext.py) 61 | Trains a FastText model on the IMDB sentiment classification task. 62 | 63 | [imdb_lstm.py](imdb_lstm.py) 64 | Trains an LSTM model on the IMDB sentiment classification task. 65 | 66 | [lstm_stateful.py](lstm_stateful.py) 67 | Demonstrates how to use stateful RNNs to model long sequences efficiently. 68 | 69 | [pretrained_word_embeddings.py](pretrained_word_embeddings.py) 70 | Loads pre-trained word embeddings (GloVe embeddings) into a frozen Keras Embedding layer, and uses it to train a text classification model on the 20 Newsgroup dataset. 71 | 72 | [reuters_mlp.py](reuters_mlp.py) 73 | Trains and evaluate a simple MLP on the Reuters newswire topic classification task. 74 | 75 | ---- 76 | 77 | ## Generative models examples 78 | 79 | [lstm_text_generation.py](lstm_text_generation.py) 80 | Generates text from Nietzsche's writings. 81 | 82 | [conv_filter_visualization.py](conv_filter_visualization.py) 83 | Visualization of the filters of VGG16, via gradient ascent in input space. 84 | 85 | [deep_dream.py](deep_dream.py) 86 | Deep Dreams in Keras. 87 | 88 | [neural_doodle.py](neural_doodle.py) 89 | Neural doodle. 90 | 91 | [neural_style_transfer.py](neural_style_transfer.py) 92 | Neural style transfer. 93 | 94 | [variational_autoencoder.py](variational_autoencoder.py) 95 | Demonstrates how to build a variational autoencoder. 96 | 97 | [variational_autoencoder_deconv.py](variational_autoencoder_deconv.py) 98 | Demonstrates how to build a variational autoencoder with Keras using deconvolution layers. 99 | 100 | ---- 101 | 102 | ## Examples demonstrating specific Keras functionality 103 | 104 | [antirectifier.py](antirectifier.py) 105 | Demonstrates how to write custom layers for Keras. 106 | 107 | [mnist_sklearn_wrapper.py](mnist_sklearn_wrapper.py) 108 | Demonstrates how to use the sklearn wrapper. 109 | 110 | [mnist_irnn.py](mnist_irnn.py) 111 | Reproduction of the IRNN experiment with pixel-by-pixel sequential MNIST in "A Simple Way to Initialize Recurrent Networks of Rectified Linear Units" by Le et al. 112 | 113 | [mnist_net2net.py](mnist_net2net.py) 114 | Reproduction of the Net2Net experiment with MNIST in "Net2Net: Accelerating Learning via Knowledge Transfer". 115 | 116 | [reuters_mlp_relu_vs_selu.py](reuters_mlp_relu_vs_selu.py) 117 | Compares self-normalizing MLPs with regular MLPs. 118 | 119 | [mnist_tfrecord.py](mnist_tfrecord.py) 120 | MNIST dataset with TFRecords, the standard TensorFlow data format. 121 | -------------------------------------------------------------------------------- /examples/cifar10_cnn.py: -------------------------------------------------------------------------------- 1 | '''Train a simple deep CNN on the CIFAR10 small images dataset. 2 | 3 | GPU run command with Theano backend (with TensorFlow, the GPU is automatically used): 4 | THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatx=float32 python cifar10_cnn.py 5 | 6 | It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs. 7 | (it's still underfitting at that point, though). 8 | ''' 9 | 10 | from __future__ import print_function 11 | import keras 12 | from keras.datasets import cifar10 13 | from keras.preprocessing.image import ImageDataGenerator 14 | from keras.models import Sequential 15 | from keras.layers import Dense, Dropout, Activation, Flatten 16 | from keras.layers import Conv2D, MaxPooling2D 17 | 18 | import os 19 | 20 | batch_size = 32 21 | num_classes = 10 22 | epochs = 200 23 | data_augmentation = True 24 | num_predictions = 20 25 | save_dir = os.path.join(os.getcwd(), 'saved_models') 26 | model_name = 'keras_cifar10_trained_model.h5' 27 | 28 | # The data, shuffled and split between train and test sets: 29 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 30 | print('x_train shape:', x_train.shape) 31 | print(x_train.shape[0], 'train samples') 32 | print(x_test.shape[0], 'test samples') 33 | 34 | # Convert class vectors to binary class matrices. 35 | y_train = keras.utils.to_categorical(y_train, num_classes) 36 | y_test = keras.utils.to_categorical(y_test, num_classes) 37 | 38 | model = Sequential() 39 | model.add(Conv2D(32, (3, 3), padding='same', 40 | input_shape=x_train.shape[1:])) 41 | model.add(Activation('relu')) 42 | model.add(Conv2D(32, (3, 3))) 43 | model.add(Activation('relu')) 44 | model.add(MaxPooling2D(pool_size=(2, 2))) 45 | model.add(Dropout(0.25)) 46 | 47 | model.add(Conv2D(64, (3, 3), padding='same')) 48 | model.add(Activation('relu')) 49 | model.add(Conv2D(64, (3, 3))) 50 | model.add(Activation('relu')) 51 | model.add(MaxPooling2D(pool_size=(2, 2))) 52 | model.add(Dropout(0.25)) 53 | 54 | model.add(Flatten()) 55 | model.add(Dense(512)) 56 | model.add(Activation('relu')) 57 | model.add(Dropout(0.5)) 58 | model.add(Dense(num_classes)) 59 | model.add(Activation('softmax')) 60 | 61 | # initiate RMSprop optimizer 62 | opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) 63 | 64 | # Let's train the model using RMSprop 65 | model.compile(loss='categorical_crossentropy', 66 | optimizer=opt, 67 | metrics=['accuracy']) 68 | 69 | x_train = x_train.astype('float32') 70 | x_test = x_test.astype('float32') 71 | x_train /= 255 72 | x_test /= 255 73 | 74 | if not data_augmentation: 75 | print('Not using data augmentation.') 76 | model.fit(x_train, y_train, 77 | batch_size=batch_size, 78 | epochs=epochs, 79 | validation_data=(x_test, y_test), 80 | shuffle=True) 81 | else: 82 | print('Using real-time data augmentation.') 83 | # This will do preprocessing and realtime data augmentation: 84 | datagen = ImageDataGenerator( 85 | featurewise_center=False, # set input mean to 0 over the dataset 86 | samplewise_center=False, # set each sample mean to 0 87 | featurewise_std_normalization=False, # divide inputs by std of the dataset 88 | samplewise_std_normalization=False, # divide each input by its std 89 | zca_whitening=False, # apply ZCA whitening 90 | rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) 91 | width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) 92 | height_shift_range=0.1, # randomly shift images vertically (fraction of total height) 93 | horizontal_flip=True, # randomly flip images 94 | vertical_flip=False) # randomly flip images 95 | 96 | # Compute quantities required for feature-wise normalization 97 | # (std, mean, and principal components if ZCA whitening is applied). 98 | datagen.fit(x_train) 99 | 100 | # Fit the model on the batches generated by datagen.flow(). 101 | model.fit_generator(datagen.flow(x_train, y_train, 102 | batch_size=batch_size), 103 | steps_per_epoch=x_train.shape[0] // batch_size, 104 | epochs=epochs, 105 | validation_data=(x_test, y_test), 106 | workers=4) 107 | 108 | # Save model and weights 109 | if not os.path.isdir(save_dir): 110 | os.makedirs(save_dir) 111 | model_path = os.path.join(save_dir, model_name) 112 | model.save(model_path) 113 | print('Saved trained model at %s ' % model_path) 114 | 115 | # Score trained model. 116 | scores = model.evaluate(x_test, y_test, verbose=1) 117 | print('Test loss:', scores[0]) 118 | print('Test accuracy:', scores[1]) 119 | -------------------------------------------------------------------------------- /keras/datasets/imdb.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from ..utils.data_utils import get_file 3 | from ..preprocessing.sequence import _remove_long_seq 4 | from six.moves import zip 5 | import numpy as np 6 | import json 7 | import warnings 8 | 9 | 10 | def load_data(path='imdb.npz', num_words=None, skip_top=0, 11 | maxlen=None, seed=113, 12 | start_char=1, oov_char=2, index_from=3, **kwargs): 13 | """Loads the IMDB dataset. 14 | 15 | # Arguments 16 | path: where to cache the data (relative to `~/.keras/dataset`). 17 | num_words: max number of words to include. Words are ranked 18 | by how often they occur (in the training set) and only 19 | the most frequent words are kept 20 | skip_top: skip the top N most frequently occurring words 21 | (which may not be informative). 22 | maxlen: truncate sequences after this length. 23 | seed: random seed for sample shuffling. 24 | start_char: The start of a sequence will be marked with this character. 25 | Set to 1 because 0 is usually the padding character. 26 | oov_char: words that were cut out because of the `num_words` 27 | or `skip_top` limit will be replaced with this character. 28 | index_from: index actual words with this index and higher. 29 | 30 | # Returns 31 | Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. 32 | 33 | # Raises 34 | ValueError: in case `maxlen` is so low 35 | that no input sequence could be kept. 36 | 37 | Note that the 'out of vocabulary' character is only used for 38 | words that were present in the training set but are not included 39 | because they're not making the `num_words` cut here. 40 | Words that were not seen in the training set but are in the test set 41 | have simply been skipped. 42 | """ 43 | # Legacy support 44 | if 'nb_words' in kwargs: 45 | warnings.warn('The `nb_words` argument in `load_data` ' 46 | 'has been renamed `num_words`.') 47 | num_words = kwargs.pop('nb_words') 48 | if kwargs: 49 | raise TypeError('Unrecognized keyword arguments: ' + str(kwargs)) 50 | 51 | path = get_file(path, 52 | origin='https://s3.amazonaws.com/text-datasets/imdb.npz', 53 | file_hash='599dadb1135973df5b59232a0e9a887c') 54 | with np.load(path) as f: 55 | x_train, labels_train = f['x_train'], f['y_train'] 56 | x_test, labels_test = f['x_test'], f['y_test'] 57 | 58 | np.random.seed(seed) 59 | np.random.shuffle(x_train) 60 | np.random.seed(seed) 61 | np.random.shuffle(labels_train) 62 | 63 | np.random.seed(seed * 2) 64 | np.random.shuffle(x_test) 65 | np.random.seed(seed * 2) 66 | np.random.shuffle(labels_test) 67 | 68 | xs = np.concatenate([x_train, x_test]) 69 | labels = np.concatenate([labels_train, labels_test]) 70 | 71 | if start_char is not None: 72 | xs = [[start_char] + [w + index_from for w in x] for x in xs] 73 | elif index_from: 74 | xs = [[w + index_from for w in x] for x in xs] 75 | 76 | if maxlen: 77 | xs, labels = _remove_long_seq(maxlen, xs, labels) 78 | if not xs: 79 | raise ValueError('After filtering for sequences shorter than maxlen=' + 80 | str(maxlen) + ', no sequence was kept. ' 81 | 'Increase maxlen.') 82 | if not num_words: 83 | num_words = max([max(x) for x in xs]) 84 | 85 | # by convention, use 2 as OOV word 86 | # reserve 'index_from' (=3 by default) characters: 87 | # 0 (padding), 1 (start), 2 (OOV) 88 | if oov_char is not None: 89 | xs = [[w if (skip_top <= w < num_words) else oov_char for w in x] for x in xs] 90 | else: 91 | xs = [[w for w in x if (skip_top <= w < num_words)] for x in xs] 92 | 93 | idx = len(x_train) 94 | x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx]) 95 | x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:]) 96 | 97 | return (x_train, y_train), (x_test, y_test) 98 | 99 | 100 | def get_word_index(path='imdb_word_index.json'): 101 | """Retrieves the dictionary mapping word indices back to words. 102 | 103 | # Arguments 104 | path: where to cache the data (relative to `~/.keras/dataset`). 105 | 106 | # Returns 107 | The word index dictionary. 108 | """ 109 | path = get_file(path, 110 | origin='https://s3.amazonaws.com/text-datasets/imdb_word_index.json', 111 | file_hash='bfafd718b763782e994055a2d397834f') 112 | f = open(path) 113 | data = json.load(f) 114 | f.close() 115 | return data 116 | -------------------------------------------------------------------------------- /examples/conv_filter_visualization.py: -------------------------------------------------------------------------------- 1 | '''Visualization of the filters of VGG16, via gradient ascent in input space. 2 | 3 | This script can run on CPU in a few minutes (with the TensorFlow backend). 4 | 5 | Results example: http://i.imgur.com/4nj4KjN.jpg 6 | ''' 7 | from __future__ import print_function 8 | 9 | from scipy.misc import imsave 10 | import numpy as np 11 | import time 12 | from keras.applications import vgg16 13 | from keras import backend as K 14 | 15 | # dimensions of the generated pictures for each filter. 16 | img_width = 128 17 | img_height = 128 18 | 19 | # the name of the layer we want to visualize 20 | # (see model definition at keras/applications/vgg16.py) 21 | layer_name = 'block5_conv1' 22 | 23 | # util function to convert a tensor into a valid image 24 | 25 | 26 | def deprocess_image(x): 27 | # normalize tensor: center on 0., ensure std is 0.1 28 | x -= x.mean() 29 | x /= (x.std() + 1e-5) 30 | x *= 0.1 31 | 32 | # clip to [0, 1] 33 | x += 0.5 34 | x = np.clip(x, 0, 1) 35 | 36 | # convert to RGB array 37 | x *= 255 38 | if K.image_data_format() == 'channels_first': 39 | x = x.transpose((1, 2, 0)) 40 | x = np.clip(x, 0, 255).astype('uint8') 41 | return x 42 | 43 | # build the VGG16 network with ImageNet weights 44 | model = vgg16.VGG16(weights='imagenet', include_top=False) 45 | print('Model loaded.') 46 | 47 | model.summary() 48 | 49 | # this is the placeholder for the input images 50 | input_img = model.input 51 | 52 | # get the symbolic outputs of each "key" layer (we gave them unique names). 53 | layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) 54 | 55 | 56 | def normalize(x): 57 | # utility function to normalize a tensor by its L2 norm 58 | return x / (K.sqrt(K.mean(K.square(x))) + 1e-5) 59 | 60 | 61 | kept_filters = [] 62 | for filter_index in range(200): 63 | # we only scan through the first 200 filters, 64 | # but there are actually 512 of them 65 | print('Processing filter %d' % filter_index) 66 | start_time = time.time() 67 | 68 | # we build a loss function that maximizes the activation 69 | # of the nth filter of the layer considered 70 | layer_output = layer_dict[layer_name].output 71 | if K.image_data_format() == 'channels_first': 72 | loss = K.mean(layer_output[:, filter_index, :, :]) 73 | else: 74 | loss = K.mean(layer_output[:, :, :, filter_index]) 75 | 76 | # we compute the gradient of the input picture wrt this loss 77 | grads = K.gradients(loss, input_img)[0] 78 | 79 | # normalization trick: we normalize the gradient 80 | grads = normalize(grads) 81 | 82 | # this function returns the loss and grads given the input picture 83 | iterate = K.function([input_img], [loss, grads]) 84 | 85 | # step size for gradient ascent 86 | step = 1. 87 | 88 | # we start from a gray image with some random noise 89 | if K.image_data_format() == 'channels_first': 90 | input_img_data = np.random.random((1, 3, img_width, img_height)) 91 | else: 92 | input_img_data = np.random.random((1, img_width, img_height, 3)) 93 | input_img_data = (input_img_data - 0.5) * 20 + 128 94 | 95 | # we run gradient ascent for 20 steps 96 | for i in range(20): 97 | loss_value, grads_value = iterate([input_img_data]) 98 | input_img_data += grads_value * step 99 | 100 | print('Current loss value:', loss_value) 101 | if loss_value <= 0.: 102 | # some filters get stuck to 0, we can skip them 103 | break 104 | 105 | # decode the resulting input image 106 | if loss_value > 0: 107 | img = deprocess_image(input_img_data[0]) 108 | kept_filters.append((img, loss_value)) 109 | end_time = time.time() 110 | print('Filter %d processed in %ds' % (filter_index, end_time - start_time)) 111 | 112 | # we will stich the best 64 filters on a 8 x 8 grid. 113 | n = 8 114 | 115 | # the filters that have the highest loss are assumed to be better-looking. 116 | # we will only keep the top 64 filters. 117 | kept_filters.sort(key=lambda x: x[1], reverse=True) 118 | kept_filters = kept_filters[:n * n] 119 | 120 | # build a black picture with enough space for 121 | # our 8 x 8 filters of size 128 x 128, with a 5px margin in between 122 | margin = 5 123 | width = n * img_width + (n - 1) * margin 124 | height = n * img_height + (n - 1) * margin 125 | stitched_filters = np.zeros((width, height, 3)) 126 | 127 | # fill the picture with our saved filters 128 | for i in range(n): 129 | for j in range(n): 130 | img, loss = kept_filters[i * n + j] 131 | stitched_filters[(img_width + margin) * i: (img_width + margin) * i + img_width, 132 | (img_height + margin) * j: (img_height + margin) * j + img_height, :] = img 133 | 134 | # save the result to disk 135 | imsave('stitched_filters_%dx%d.png' % (n, n), stitched_filters) 136 | -------------------------------------------------------------------------------- /tests/keras/optimizers_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pytest 3 | import numpy as np 4 | from numpy.testing import assert_allclose 5 | 6 | from keras.utils import test_utils 7 | from keras import optimizers 8 | from keras.models import Sequential 9 | from keras.layers.core import Dense, Activation 10 | from keras.utils.test_utils import keras_test 11 | from keras.utils.np_utils import to_categorical 12 | from keras import backend as K 13 | 14 | num_classes = 2 15 | 16 | 17 | def get_test_data(): 18 | np.random.seed(1337) 19 | (x_train, y_train), _ = test_utils.get_test_data(num_train=1000, 20 | num_test=200, 21 | input_shape=(10,), 22 | classification=True, 23 | num_classes=num_classes) 24 | y_train = to_categorical(y_train) 25 | return x_train, y_train 26 | 27 | 28 | def _test_optimizer(optimizer, target=0.75): 29 | x_train, y_train = get_test_data() 30 | 31 | model = Sequential() 32 | model.add(Dense(10, input_shape=(x_train.shape[1],))) 33 | model.add(Activation('relu')) 34 | model.add(Dense(y_train.shape[1])) 35 | model.add(Activation('softmax')) 36 | model.compile(loss='categorical_crossentropy', 37 | optimizer=optimizer, 38 | metrics=['accuracy']) 39 | 40 | history = model.fit(x_train, y_train, epochs=2, batch_size=16, verbose=0) 41 | assert history.history['acc'][-1] >= target 42 | config = optimizers.serialize(optimizer) 43 | optim = optimizers.deserialize(config) 44 | new_config = optimizers.serialize(optim) 45 | new_config['class_name'] = new_config['class_name'].lower() 46 | assert config == new_config 47 | 48 | # Test constraints. 49 | model = Sequential() 50 | dense = Dense(10, 51 | input_shape=(x_train.shape[1],), 52 | kernel_constraint=lambda x: 0. * x + 1., 53 | bias_constraint=lambda x: 0. * x + 2.,) 54 | model.add(dense) 55 | model.add(Activation('relu')) 56 | model.add(Dense(y_train.shape[1])) 57 | model.add(Activation('softmax')) 58 | model.compile(loss='categorical_crossentropy', 59 | optimizer=optimizer, 60 | metrics=['accuracy']) 61 | model.train_on_batch(x_train[:10], y_train[:10]) 62 | kernel, bias = dense.get_weights() 63 | assert_allclose(kernel, 1.) 64 | assert_allclose(bias, 2.) 65 | 66 | 67 | @keras_test 68 | def test_sgd(): 69 | sgd = optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True) 70 | _test_optimizer(sgd) 71 | 72 | 73 | @keras_test 74 | def test_rmsprop(): 75 | _test_optimizer(optimizers.RMSprop()) 76 | _test_optimizer(optimizers.RMSprop(decay=1e-3)) 77 | 78 | 79 | @keras_test 80 | def test_adagrad(): 81 | _test_optimizer(optimizers.Adagrad()) 82 | _test_optimizer(optimizers.Adagrad(decay=1e-3)) 83 | 84 | 85 | @keras_test 86 | def test_adadelta(): 87 | _test_optimizer(optimizers.Adadelta(), target=0.6) 88 | _test_optimizer(optimizers.Adadelta(decay=1e-3), target=0.6) 89 | 90 | 91 | @keras_test 92 | def test_adam(): 93 | _test_optimizer(optimizers.Adam()) 94 | _test_optimizer(optimizers.Adam(decay=1e-3)) 95 | 96 | 97 | @keras_test 98 | def test_adamax(): 99 | _test_optimizer(optimizers.Adamax()) 100 | _test_optimizer(optimizers.Adamax(decay=1e-3)) 101 | 102 | 103 | @keras_test 104 | def test_nadam(): 105 | _test_optimizer(optimizers.Nadam()) 106 | 107 | 108 | @keras_test 109 | def test_clipnorm(): 110 | sgd = optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=0.5) 111 | _test_optimizer(sgd) 112 | 113 | 114 | @keras_test 115 | def test_clipvalue(): 116 | sgd = optimizers.SGD(lr=0.01, momentum=0.9, clipvalue=0.5) 117 | _test_optimizer(sgd) 118 | 119 | 120 | @keras_test 121 | @pytest.mark.skipif((K.backend() != 'tensorflow'), 122 | reason='Requires TensorFlow backend') 123 | def test_tfoptimizer(): 124 | from keras import constraints 125 | from tensorflow import train 126 | optimizer = optimizers.TFOptimizer(train.AdamOptimizer()) 127 | model = Sequential() 128 | model.add(Dense(num_classes, input_shape=(3,), kernel_constraint=constraints.MaxNorm(1))) 129 | model.compile(loss='mean_squared_error', optimizer=optimizer) 130 | model.fit(np.random.random((5, 3)), np.random.random((5, num_classes)), 131 | epochs=1, batch_size=5, verbose=0) 132 | # not supported 133 | with pytest.raises(NotImplementedError): 134 | optimizer.weights 135 | with pytest.raises(NotImplementedError): 136 | optimizer.get_config() 137 | with pytest.raises(NotImplementedError): 138 | optimizer.from_config(None) 139 | 140 | 141 | if __name__ == '__main__': 142 | pytest.main([__file__]) 143 | -------------------------------------------------------------------------------- /tests/keras/layers/normalization_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | 5 | from keras.layers import Input 6 | from keras.utils.test_utils import layer_test, keras_test 7 | from keras.layers import normalization 8 | from keras.models import Sequential, Model 9 | from keras import backend as K 10 | 11 | input_1 = np.arange(10) 12 | input_2 = np.zeros(10) 13 | input_3 = np.ones((10)) 14 | input_shapes = [np.ones((10, 10)), np.ones((10, 10, 10))] 15 | 16 | 17 | @keras_test 18 | def test_basic_batchnorm(): 19 | from keras import regularizers 20 | layer_test(normalization.BatchNormalization, 21 | kwargs={'momentum': 0.9, 22 | 'epsilon': 0.1, 23 | 'gamma_regularizer': regularizers.l2(0.01), 24 | 'beta_regularizer': regularizers.l2(0.01)}, 25 | input_shape=(3, 4, 2)) 26 | layer_test(normalization.BatchNormalization, 27 | kwargs={'gamma_initializer': 'ones', 28 | 'beta_initializer': 'ones', 29 | 'moving_mean_initializer': 'zeros', 30 | 'moving_variance_initializer': 'ones'}, 31 | input_shape=(3, 4, 2)) 32 | 33 | 34 | @keras_test 35 | def test_batchnorm_correctness(): 36 | model = Sequential() 37 | norm = normalization.BatchNormalization(input_shape=(10,), momentum=0.8) 38 | model.add(norm) 39 | model.compile(loss='mse', optimizer='sgd') 40 | 41 | # centered on 5.0, variance 10.0 42 | x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10)) 43 | model.fit(x, x, epochs=4, verbose=0) 44 | out = model.predict(x) 45 | out -= K.eval(norm.beta) 46 | out /= K.eval(norm.gamma) 47 | 48 | assert_allclose(out.mean(), 0.0, atol=1e-1) 49 | assert_allclose(out.std(), 1.0, atol=1e-1) 50 | 51 | 52 | @keras_test 53 | def test_batchnorm_training_argument(): 54 | bn1 = normalization.BatchNormalization(input_shape=(10,)) 55 | x1 = Input(shape=(10,)) 56 | y1 = bn1(x1, training=True) 57 | assert bn1.updates 58 | 59 | model1 = Model(x1, y1) 60 | np.random.seed(123) 61 | x = np.random.normal(loc=5.0, scale=10.0, size=(20, 10)) 62 | output_a = model1.predict(x) 63 | 64 | model1.compile(loss='mse', optimizer='rmsprop') 65 | model1.fit(x, x, epochs=1, verbose=0) 66 | output_b = model1.predict(x) 67 | assert np.abs(np.sum(output_a - output_b)) > 0.1 68 | assert_allclose(output_b.mean(), 0.0, atol=1e-1) 69 | assert_allclose(output_b.std(), 1.0, atol=1e-1) 70 | 71 | bn2 = normalization.BatchNormalization(input_shape=(10,)) 72 | x2 = Input(shape=(10,)) 73 | bn2(x2, training=False) 74 | assert not bn2.updates 75 | 76 | 77 | @keras_test 78 | def test_batchnorm_mode_twice(): 79 | # This is a regression test for issue #4881 with the old 80 | # batch normalization functions in the Theano backend. 81 | model = Sequential() 82 | model.add(normalization.BatchNormalization(input_shape=(10, 5, 5), axis=1)) 83 | model.add(normalization.BatchNormalization(input_shape=(10, 5, 5), axis=1)) 84 | model.compile(loss='mse', optimizer='sgd') 85 | 86 | x = np.random.normal(loc=5.0, scale=10.0, size=(20, 10, 5, 5)) 87 | model.fit(x, x, epochs=1, verbose=0) 88 | model.predict(x) 89 | 90 | 91 | @keras_test 92 | def test_batchnorm_convnet(): 93 | model = Sequential() 94 | norm = normalization.BatchNormalization(axis=1, input_shape=(3, 4, 4), momentum=0.8) 95 | model.add(norm) 96 | model.compile(loss='mse', optimizer='sgd') 97 | 98 | # centered on 5.0, variance 10.0 99 | x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4)) 100 | model.fit(x, x, epochs=4, verbose=0) 101 | out = model.predict(x) 102 | out -= np.reshape(K.eval(norm.beta), (1, 3, 1, 1)) 103 | out /= np.reshape(K.eval(norm.gamma), (1, 3, 1, 1)) 104 | 105 | assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1) 106 | assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1) 107 | 108 | 109 | @keras_test 110 | def test_shared_batchnorm(): 111 | '''Test that a BN layer can be shared 112 | across different data streams. 113 | ''' 114 | # Test single layer reuse 115 | bn = normalization.BatchNormalization(input_shape=(10,)) 116 | x1 = Input(shape=(10,)) 117 | bn(x1) 118 | 119 | x2 = Input(shape=(10,)) 120 | y2 = bn(x2) 121 | 122 | x = np.random.normal(loc=5.0, scale=10.0, size=(2, 10)) 123 | model = Model(x2, y2) 124 | assert len(model.updates) == 2 125 | model.compile('sgd', 'mse') 126 | model.train_on_batch(x, x) 127 | 128 | # Test model-level reuse 129 | x3 = Input(shape=(10,)) 130 | y3 = model(x3) 131 | new_model = Model(x3, y3) 132 | assert len(model.updates) == 2 133 | new_model.compile('sgd', 'mse') 134 | new_model.train_on_batch(x, x) 135 | 136 | 137 | if __name__ == '__main__': 138 | pytest.main([__file__]) 139 | -------------------------------------------------------------------------------- /examples/mnist_siamese.py: -------------------------------------------------------------------------------- 1 | '''Train a Siamese MLP on pairs of digits from the MNIST dataset. 2 | 3 | It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the 4 | output of the shared network and by optimizing the contrastive loss (see paper 5 | for mode details). 6 | 7 | [1] "Dimensionality Reduction by Learning an Invariant Mapping" 8 | http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf 9 | 10 | Gets to 97.2% test accuracy after 20 epochs. 11 | 2 seconds per epoch on a Titan X Maxwell GPU 12 | ''' 13 | from __future__ import absolute_import 14 | from __future__ import print_function 15 | import numpy as np 16 | 17 | import random 18 | from keras.datasets import mnist 19 | from keras.models import Model 20 | from keras.layers import Input, Flatten, Dense, Dropout, Lambda 21 | from keras.optimizers import RMSprop 22 | from keras import backend as K 23 | 24 | num_classes = 10 25 | epochs = 20 26 | 27 | 28 | def euclidean_distance(vects): 29 | x, y = vects 30 | return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon())) 31 | 32 | 33 | def eucl_dist_output_shape(shapes): 34 | shape1, shape2 = shapes 35 | return (shape1[0], 1) 36 | 37 | 38 | def contrastive_loss(y_true, y_pred): 39 | '''Contrastive loss from Hadsell-et-al.'06 40 | http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf 41 | ''' 42 | margin = 1 43 | return K.mean(y_true * K.square(y_pred) + 44 | (1 - y_true) * K.square(K.maximum(margin - y_pred, 0))) 45 | 46 | 47 | def create_pairs(x, digit_indices): 48 | '''Positive and negative pair creation. 49 | Alternates between positive and negative pairs. 50 | ''' 51 | pairs = [] 52 | labels = [] 53 | n = min([len(digit_indices[d]) for d in range(num_classes)]) - 1 54 | for d in range(num_classes): 55 | for i in range(n): 56 | z1, z2 = digit_indices[d][i], digit_indices[d][i + 1] 57 | pairs += [[x[z1], x[z2]]] 58 | inc = random.randrange(1, num_classes) 59 | dn = (d + inc) % num_classes 60 | z1, z2 = digit_indices[d][i], digit_indices[dn][i] 61 | pairs += [[x[z1], x[z2]]] 62 | labels += [1, 0] 63 | return np.array(pairs), np.array(labels) 64 | 65 | 66 | def create_base_network(input_shape): 67 | '''Base network to be shared (eq. to feature extraction). 68 | ''' 69 | input = Input(shape=input_shape) 70 | x = Flatten()(input) 71 | x = Dense(128, activation='relu')(x) 72 | x = Dropout(0.1)(x) 73 | x = Dense(128, activation='relu')(x) 74 | x = Dropout(0.1)(x) 75 | x = Dense(128, activation='relu')(x) 76 | return Model(input, x) 77 | 78 | 79 | def compute_accuracy(y_true, y_pred): 80 | '''Compute classification accuracy with a fixed threshold on distances. 81 | ''' 82 | pred = y_pred.ravel() < 0.5 83 | return np.mean(pred == y_true) 84 | 85 | 86 | def accuracy(y_true, y_pred): 87 | '''Compute classification accuracy with a fixed threshold on distances. 88 | ''' 89 | return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype))) 90 | 91 | 92 | # the data, shuffled and split between train and test sets 93 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 94 | x_train = x_train.astype('float32') 95 | x_test = x_test.astype('float32') 96 | x_train /= 255 97 | x_test /= 255 98 | input_shape = x_train.shape[1:] 99 | 100 | # create training+test positive and negative pairs 101 | digit_indices = [np.where(y_train == i)[0] for i in range(num_classes)] 102 | tr_pairs, tr_y = create_pairs(x_train, digit_indices) 103 | 104 | digit_indices = [np.where(y_test == i)[0] for i in range(num_classes)] 105 | te_pairs, te_y = create_pairs(x_test, digit_indices) 106 | 107 | # network definition 108 | base_network = create_base_network(input_shape) 109 | 110 | input_a = Input(shape=input_shape) 111 | input_b = Input(shape=input_shape) 112 | 113 | # because we re-use the same instance `base_network`, 114 | # the weights of the network 115 | # will be shared across the two branches 116 | processed_a = base_network(input_a) 117 | processed_b = base_network(input_b) 118 | 119 | distance = Lambda(euclidean_distance, 120 | output_shape=eucl_dist_output_shape)([processed_a, processed_b]) 121 | 122 | model = Model([input_a, input_b], distance) 123 | 124 | # train 125 | rms = RMSprop() 126 | model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy]) 127 | model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y, 128 | batch_size=128, 129 | epochs=epochs, 130 | validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y)) 131 | 132 | # compute final accuracy on training and test sets 133 | y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]]) 134 | tr_acc = compute_accuracy(tr_y, y_pred) 135 | y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]]) 136 | te_acc = compute_accuracy(te_y, y_pred) 137 | 138 | print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc)) 139 | print('* Accuracy on test set: %0.2f%%' % (100 * te_acc)) 140 | -------------------------------------------------------------------------------- /keras/backend/common.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # the type of float to use throughout the session. 4 | _FLOATX = 'float32' 5 | _EPSILON = 10e-8 6 | _IMAGE_DATA_FORMAT = 'channels_last' 7 | 8 | 9 | def epsilon(): 10 | """Returns the value of the fuzz factor used in numeric expressions. 11 | 12 | # Returns 13 | A float. 14 | 15 | # Example 16 | ```python 17 | >>> keras.backend.epsilon() 18 | 1e-08 19 | ``` 20 | """ 21 | return _EPSILON 22 | 23 | 24 | def set_epsilon(e): 25 | """Sets the value of the fuzz factor used in numeric expressions. 26 | 27 | # Arguments 28 | e: float. New value of epsilon. 29 | 30 | # Example 31 | ```python 32 | >>> from keras import backend as K 33 | >>> K.epsilon() 34 | 1e-08 35 | >>> K.set_epsilon(1e-05) 36 | >>> K.epsilon() 37 | 1e-05 38 | ``` 39 | """ 40 | global _EPSILON 41 | _EPSILON = e 42 | 43 | 44 | def floatx(): 45 | """Returns the default float type, as a string. 46 | (e.g. 'float16', 'float32', 'float64'). 47 | 48 | # Returns 49 | String, the current default float type. 50 | 51 | # Example 52 | ```python 53 | >>> keras.backend.floatx() 54 | 'float32' 55 | ``` 56 | """ 57 | return _FLOATX 58 | 59 | 60 | def set_floatx(floatx): 61 | """Sets the default float type. 62 | 63 | # Arguments 64 | floatx: String, 'float16', 'float32', or 'float64'. 65 | 66 | # Example 67 | ```python 68 | >>> from keras import backend as K 69 | >>> K.floatx() 70 | 'float32' 71 | >>> K.set_floatx('float16') 72 | >>> K.floatx() 73 | 'float16' 74 | ``` 75 | """ 76 | global _FLOATX 77 | if floatx not in {'float16', 'float32', 'float64'}: 78 | raise ValueError('Unknown floatx type: ' + str(floatx)) 79 | _FLOATX = str(floatx) 80 | 81 | 82 | def cast_to_floatx(x): 83 | """Cast a Numpy array to the default Keras float type. 84 | 85 | # Arguments 86 | x: Numpy array. 87 | 88 | # Returns 89 | The same Numpy array, cast to its new type. 90 | 91 | # Example 92 | ```python 93 | >>> from keras import backend as K 94 | >>> K.floatx() 95 | 'float32' 96 | >>> arr = numpy.array([1.0, 2.0], dtype='float64') 97 | >>> arr.dtype 98 | dtype('float64') 99 | >>> new_arr = K.cast_to_floatx(arr) 100 | >>> new_arr 101 | array([ 1., 2.], dtype=float32) 102 | >>> new_arr.dtype 103 | dtype('float32') 104 | ``` 105 | """ 106 | return np.asarray(x, dtype=_FLOATX) 107 | 108 | 109 | def image_data_format(): 110 | """Returns the default image data format convention ('channels_first' or 'channels_last'). 111 | 112 | # Returns 113 | A string, either `'channels_first'` or `'channels_last'` 114 | 115 | # Example 116 | ```python 117 | >>> keras.backend.image_data_format() 118 | 'channels_first' 119 | ``` 120 | """ 121 | return _IMAGE_DATA_FORMAT 122 | 123 | 124 | def set_image_data_format(data_format): 125 | """Sets the value of the data format convention. 126 | 127 | # Arguments 128 | data_format: string. `'channels_first'` or `'channels_last'`. 129 | 130 | # Example 131 | ```python 132 | >>> from keras import backend as K 133 | >>> K.image_data_format() 134 | 'channels_first' 135 | >>> K.set_image_data_format('channels_last') 136 | >>> K.image_data_format() 137 | 'channels_last' 138 | ``` 139 | """ 140 | global _IMAGE_DATA_FORMAT 141 | if data_format not in {'channels_last', 'channels_first'}: 142 | raise ValueError('Unknown data_format:', data_format) 143 | _IMAGE_DATA_FORMAT = str(data_format) 144 | 145 | 146 | # Legacy methods 147 | 148 | def set_image_dim_ordering(dim_ordering): 149 | """Legacy setter for `image_data_format`. 150 | 151 | # Arguments 152 | dim_ordering: string. `tf` or `th`. 153 | 154 | # Example 155 | ```python 156 | >>> from keras import backend as K 157 | >>> K.image_data_format() 158 | 'channels_first' 159 | >>> K.set_image_data_format('channels_last') 160 | >>> K.image_data_format() 161 | 'channels_last' 162 | ``` 163 | 164 | # Raises 165 | ValueError: if `dim_ordering` is invalid. 166 | """ 167 | global _IMAGE_DATA_FORMAT 168 | if dim_ordering not in {'tf', 'th'}: 169 | raise ValueError('Unknown dim_ordering:', dim_ordering) 170 | if dim_ordering == 'th': 171 | data_format = 'channels_first' 172 | else: 173 | data_format = 'channels_last' 174 | _IMAGE_DATA_FORMAT = data_format 175 | 176 | 177 | def image_dim_ordering(): 178 | """Legacy getter for `image_data_format`. 179 | 180 | # Returns 181 | string, one of `'th'`, `'tf'` 182 | """ 183 | if _IMAGE_DATA_FORMAT == 'channels_first': 184 | return 'th' 185 | else: 186 | return 'tf' 187 | -------------------------------------------------------------------------------- /docs/templates/backend.md: -------------------------------------------------------------------------------- 1 | # Keras backends 2 | 3 | ## What is a "backend"? 4 | 5 | Keras is a model-level library, providing high-level building blocks for developing deep learning models. It does not handle itself low-level operations such as tensor products, convolutions and so on. Instead, it relies on a specialized, well-optimized tensor manipulation library to do so, serving as the "backend engine" of Keras. Rather than picking one single tensor library and making the implementation of Keras tied to that library, Keras handles the problem in a modular way, and several different backend engines can be plugged seamlessly into Keras. 6 | 7 | At this time, Keras has three backend implementations available: the **TensorFlow** backend, the **Theano** backend, and the **CNTK** backend. 8 | 9 | - [TensorFlow](http://www.tensorflow.org/) is an open-source symbolic tensor manipulation framework developed by Google. 10 | - [Theano](http://deeplearning.net/software/theano/) is an open-source symbolic tensor manipulation framework developed by LISA Lab at Université de Montréal. 11 | - [CNTK](https://www.microsoft.com/en-us/cognitive-toolkit/) is an open-source toolkit for deep learning developed by Microsoft. 12 | 13 | In the future, we are likely to add more backend options. 14 | 15 | ---- 16 | 17 | ## Switching from one backend to another 18 | 19 | If you have run Keras at least once, you will find the Keras configuration file at: 20 | 21 | `$HOME/.keras/keras.json` 22 | 23 | If it isn't there, you can create it. 24 | 25 | **NOTE for Windows Users:** Please replace `$HOME` with `%USERPROFILE%`. 26 | 27 | The default configuration file looks like this: 28 | 29 | ``` 30 | { 31 | "image_data_format": "channels_last", 32 | "epsilon": 1e-07, 33 | "floatx": "float32", 34 | "backend": "tensorflow" 35 | } 36 | ``` 37 | 38 | Simply change the field `backend` to `"theano"`, `"tensorflow"`, or `"cntk"`, and Keras will use the new configuration next time you run any Keras code. 39 | 40 | You can also define the environment variable ``KERAS_BACKEND`` and this will 41 | override what is defined in your config file : 42 | 43 | ```bash 44 | KERAS_BACKEND=tensorflow python -c "from keras import backend" 45 | Using TensorFlow backend. 46 | ``` 47 | 48 | ---- 49 | 50 | ## keras.json details 51 | 52 | 53 | The `keras.json` configuration file contains the following settings: 54 | 55 | ``` 56 | { 57 | "image_data_format": "channels_last", 58 | "epsilon": 1e-07, 59 | "floatx": "float32", 60 | "backend": "tensorflow" 61 | } 62 | ``` 63 | 64 | You can change these settings by editing `$HOME/.keras/keras.json`. 65 | 66 | * `image_data_format`: String, either `"channels_last"` or `"channels_first"`. It specifies which data format convention Keras will follow. (`keras.backend.image_data_format()` returns it.) 67 | - For 2D data (e.g. image), `"channels_last"` assumes `(rows, cols, channels)` while `"channels_first"` assumes `(channels, rows, cols)`. 68 | - For 3D data, `"channels_last"` assumes `(conv_dim1, conv_dim2, conv_dim3, channels)` while `"channels_first"` assumes `(channels, conv_dim1, conv_dim2, conv_dim3)`. 69 | * `epsilon`: Float, a numeric fuzzing constant used to avoid dividing by zero in some operations. 70 | * `floatx`: String, `"float16"`, `"float32"`, or `"float64"`. Default float precision. 71 | * `backend`: String, `"tensorflow"`, `"theano"`, or `"cntk"`. 72 | 73 | ---- 74 | 75 | ## Using the abstract Keras backend to write new code 76 | 77 | If you want the Keras modules you write to be compatible with both Theano (`th`) and TensorFlow (`tf`), you have to write them via the abstract Keras backend API. Here's an intro. 78 | 79 | You can import the backend module via: 80 | ```python 81 | from keras import backend as K 82 | ``` 83 | 84 | The code below instantiates an input placeholder. It's equivalent to `tf.placeholder()` or `th.tensor.matrix()`, `th.tensor.tensor3()`, etc. 85 | 86 | ```python 87 | inputs = K.placeholder(shape=(2, 4, 5)) 88 | # also works: 89 | inputs = K.placeholder(shape=(None, 4, 5)) 90 | # also works: 91 | inputs = K.placeholder(ndim=3) 92 | ``` 93 | 94 | The code below instantiates a variable. It's equivalent to `tf.Variable()` or `th.shared()`. 95 | 96 | ```python 97 | import numpy as np 98 | val = np.random.random((3, 4, 5)) 99 | var = K.variable(value=val) 100 | 101 | # all-zeros variable: 102 | var = K.zeros(shape=(3, 4, 5)) 103 | # all-ones: 104 | var = K.ones(shape=(3, 4, 5)) 105 | ``` 106 | 107 | Most tensor operations you will need can be done as you would in TensorFlow or Theano: 108 | 109 | ```python 110 | # Initializing Tensors with Random Numbers 111 | b = K.random_uniform_variable(shape=(3, 4)). # Uniform distribution 112 | c = K.random_normal_variable(shape=(3, 4)). # Gaussian distribution 113 | d = K.random_normal_variable(shape=(3, 4)). 114 | 115 | # Tensor Arithmetic 116 | a = b + c * K.abs(d) 117 | c = K.dot(a, K.transpose(b)) 118 | a = K.sum(b, axis=1) 119 | a = K.softmax(b) 120 | a = K.concatenate([b, c], axis=-1) 121 | # etc... 122 | ``` 123 | 124 | ---- 125 | 126 | ## Backend functions 127 | 128 | 129 | {{autogenerated}} 130 | 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /keras/utils/io_utils.py: -------------------------------------------------------------------------------- 1 | """Utilities related to disk I/O.""" 2 | from __future__ import absolute_import 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import sys 7 | from collections import defaultdict 8 | 9 | try: 10 | import h5py 11 | except ImportError: 12 | h5py = None 13 | 14 | 15 | class HDF5Matrix(object): 16 | """Representation of HDF5 dataset to be used instead of a Numpy array. 17 | 18 | # Example 19 | 20 | ```python 21 | x_data = HDF5Matrix('input/file.hdf5', 'data') 22 | model.predict(x_data) 23 | ``` 24 | 25 | Providing `start` and `end` allows use of a slice of the dataset. 26 | 27 | Optionally, a normalizer function (or lambda) can be given. This will 28 | be called on every slice of data retrieved. 29 | 30 | # Arguments 31 | datapath: string, path to a HDF5 file 32 | dataset: string, name of the HDF5 dataset in the file specified 33 | in datapath 34 | start: int, start of desired slice of the specified dataset 35 | end: int, end of desired slice of the specified dataset 36 | normalizer: function to be called on data when retrieved 37 | 38 | # Returns 39 | An array-like HDF5 dataset. 40 | """ 41 | refs = defaultdict(int) 42 | 43 | def __init__(self, datapath, dataset, start=0, end=None, normalizer=None): 44 | if h5py is None: 45 | raise ImportError('The use of HDF5Matrix requires ' 46 | 'HDF5 and h5py installed.') 47 | 48 | if datapath not in list(self.refs.keys()): 49 | f = h5py.File(datapath) 50 | self.refs[datapath] = f 51 | else: 52 | f = self.refs[datapath] 53 | self.data = f[dataset] 54 | self.start = start 55 | if end is None: 56 | self.end = self.data.shape[0] 57 | else: 58 | self.end = end 59 | self.normalizer = normalizer 60 | 61 | def __len__(self): 62 | return self.end - self.start 63 | 64 | def __getitem__(self, key): 65 | if isinstance(key, slice): 66 | start, stop = key.start, key.stop 67 | if start is None: 68 | start = 0 69 | if stop is None: 70 | stop = self.shape[0] 71 | if stop + self.start <= self.end: 72 | idx = slice(start + self.start, stop + self.start) 73 | else: 74 | raise IndexError 75 | elif isinstance(key, (int, np.integer)): 76 | if key + self.start < self.end: 77 | idx = key + self.start 78 | else: 79 | raise IndexError 80 | elif isinstance(key, np.ndarray): 81 | if np.max(key) + self.start < self.end: 82 | idx = (self.start + key).tolist() 83 | else: 84 | raise IndexError 85 | elif isinstance(key, list): 86 | if max(key) + self.start < self.end: 87 | idx = [x + self.start for x in key] 88 | else: 89 | raise IndexError 90 | else: 91 | raise IndexError 92 | if self.normalizer is not None: 93 | return self.normalizer(self.data[idx]) 94 | else: 95 | return self.data[idx] 96 | 97 | @property 98 | def shape(self): 99 | """Gets a numpy-style shape tuple giving the dataset dimensions. 100 | 101 | # Returns 102 | A numpy-style shape tuple. 103 | """ 104 | return (self.end - self.start,) + self.data.shape[1:] 105 | 106 | @property 107 | def dtype(self): 108 | """Gets the datatype of the dataset. 109 | 110 | # Returns 111 | A numpy dtype string. 112 | """ 113 | return self.data.dtype 114 | 115 | @property 116 | def ndim(self): 117 | """Gets the number of dimensions (rank) of the dataset. 118 | 119 | # Returns 120 | An integer denoting the number of dimensions (rank) of the dataset. 121 | """ 122 | return self.data.ndim 123 | 124 | @property 125 | def size(self): 126 | """Gets the total dataset size (number of elements). 127 | 128 | # Returns 129 | An integer denoting the number of elements in the dataset. 130 | """ 131 | return np.prod(self.shape) 132 | 133 | 134 | def ask_to_proceed_with_overwrite(filepath): 135 | """Produces a prompt asking about overwriting a file. 136 | 137 | # Arguments 138 | filepath: the path to the file to be overwritten. 139 | 140 | # Returns 141 | True if we can proceed with overwrite, False otherwise. 142 | """ 143 | get_input = input 144 | if sys.version_info[:2] <= (2, 7): 145 | get_input = raw_input 146 | overwrite = get_input('[WARNING] %s already exists - overwrite? ' 147 | '[y/n]' % (filepath)) 148 | while overwrite not in ['y', 'n']: 149 | overwrite = get_input('Enter "y" (overwrite) or "n" (cancel).') 150 | if overwrite == 'n': 151 | return False 152 | print('[TIP] Next time specify overwrite=True!') 153 | return True 154 | -------------------------------------------------------------------------------- /keras/utils/vis_utils.py: -------------------------------------------------------------------------------- 1 | """Utilities related to model visualization.""" 2 | import os 3 | 4 | try: 5 | # pydot-ng is a fork of pydot that is better maintained. 6 | import pydot_ng as pydot 7 | except ImportError: 8 | # pydotplus is an improved version of pydot 9 | try: 10 | import pydotplus as pydot 11 | except ImportError: 12 | # Fall back on pydot if necessary. 13 | try: 14 | import pydot 15 | except ImportError: 16 | pydot = None 17 | 18 | 19 | def _check_pydot(): 20 | try: 21 | # Attempt to create an image of a blank graph 22 | # to check the pydot/graphviz installation. 23 | pydot.Dot.create(pydot.Dot()) 24 | except Exception: 25 | # pydot raises a generic Exception here, 26 | # so no specific class can be caught. 27 | raise ImportError('Failed to import pydot. You must install pydot' 28 | ' and graphviz for `pydotprint` to work.') 29 | 30 | 31 | def model_to_dot(model, 32 | show_shapes=False, 33 | show_layer_names=True, 34 | rankdir='TB'): 35 | """Convert a Keras model to dot format. 36 | 37 | # Arguments 38 | model: A Keras model instance. 39 | show_shapes: whether to display shape information. 40 | show_layer_names: whether to display layer names. 41 | rankdir: `rankdir` argument passed to PyDot, 42 | a string specifying the format of the plot: 43 | 'TB' creates a vertical plot; 44 | 'LR' creates a horizontal plot. 45 | 46 | # Returns 47 | A `pydot.Dot` instance representing the Keras model. 48 | """ 49 | from ..layers.wrappers import Wrapper 50 | from ..models import Sequential 51 | 52 | _check_pydot() 53 | dot = pydot.Dot() 54 | dot.set('rankdir', rankdir) 55 | dot.set('concentrate', True) 56 | dot.set_node_defaults(shape='record') 57 | 58 | if isinstance(model, Sequential): 59 | if not model.built: 60 | model.build() 61 | model = model.model 62 | layers = model.layers 63 | 64 | # Create graph nodes. 65 | for layer in layers: 66 | layer_id = str(id(layer)) 67 | 68 | # Append a wrapped layer's label to node's label, if it exists. 69 | layer_name = layer.name 70 | class_name = layer.__class__.__name__ 71 | if isinstance(layer, Wrapper): 72 | layer_name = '{}({})'.format(layer_name, layer.layer.name) 73 | child_class_name = layer.layer.__class__.__name__ 74 | class_name = '{}({})'.format(class_name, child_class_name) 75 | 76 | # Create node's label. 77 | if show_layer_names: 78 | label = '{}: {}'.format(layer_name, class_name) 79 | else: 80 | label = class_name 81 | 82 | # Rebuild the label as a table including input/output shapes. 83 | if show_shapes: 84 | try: 85 | outputlabels = str(layer.output_shape) 86 | except AttributeError: 87 | outputlabels = 'multiple' 88 | if hasattr(layer, 'input_shape'): 89 | inputlabels = str(layer.input_shape) 90 | elif hasattr(layer, 'input_shapes'): 91 | inputlabels = ', '.join( 92 | [str(ishape) for ishape in layer.input_shapes]) 93 | else: 94 | inputlabels = 'multiple' 95 | label = '%s\n|{input:|output:}|{{%s}|{%s}}' % (label, 96 | inputlabels, 97 | outputlabels) 98 | node = pydot.Node(layer_id, label=label) 99 | dot.add_node(node) 100 | 101 | # Connect nodes with edges. 102 | for layer in layers: 103 | layer_id = str(id(layer)) 104 | for i, node in enumerate(layer.inbound_nodes): 105 | node_key = layer.name + '_ib-' + str(i) 106 | if node_key in model.container_nodes: 107 | for inbound_layer in node.inbound_layers: 108 | inbound_layer_id = str(id(inbound_layer)) 109 | layer_id = str(id(layer)) 110 | dot.add_edge(pydot.Edge(inbound_layer_id, layer_id)) 111 | return dot 112 | 113 | 114 | def plot_model(model, 115 | to_file='model.png', 116 | show_shapes=False, 117 | show_layer_names=True, 118 | rankdir='TB'): 119 | """Converts a Keras model to dot format and save to a file. 120 | 121 | # Arguments 122 | model: A Keras model instance 123 | to_file: File name of the plot image. 124 | show_shapes: whether to display shape information. 125 | show_layer_names: whether to display layer names. 126 | rankdir: `rankdir` argument passed to PyDot, 127 | a string specifying the format of the plot: 128 | 'TB' creates a vertical plot; 129 | 'LR' creates a horizontal plot. 130 | """ 131 | dot = model_to_dot(model, show_shapes, show_layer_names, rankdir) 132 | _, extension = os.path.splitext(to_file) 133 | if not extension: 134 | extension = 'png' 135 | else: 136 | extension = extension[1:] 137 | dot.write(to_file, format=extension) 138 | --------------------------------------------------------------------------------