├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE.md ├── LICENSE ├── README.md ├── docker ├── Dockerfile ├── Makefile ├── README.md └── theanorc ├── docs ├── README.md ├── autogen.py ├── mkdocs.yml └── templates │ ├── activations.md │ ├── backend.md │ ├── callbacks.md │ ├── constraints.md │ ├── datasets.md │ ├── getting-started │ ├── faq.md │ ├── functional-api-guide.md │ └── sequential-model-guide.md │ ├── index.md │ ├── initializations.md │ ├── layers │ ├── about-keras-layers.md │ └── writing-your-own-keras-layers.md │ ├── models │ ├── about-keras-models.md │ ├── model.md │ └── sequential.md │ ├── objectives.md │ ├── optimizers.md │ ├── preprocessing │ ├── image.md │ ├── sequence.md │ └── text.md │ ├── regularizers.md │ ├── scikit-learn-api.md │ └── visualization.md ├── examples ├── addition_rnn.py ├── antirectifier.py ├── babi_memnn.py ├── babi_rnn.py ├── cifar10_cnn.py ├── conv_filter_visualization.py ├── deep_dream.py ├── imdb_bidirectional_lstm.py ├── imdb_cnn.py ├── imdb_cnn_lstm.py ├── imdb_lstm.py ├── inception_v3.py ├── lstm_benchmark.py ├── lstm_text_generation.py ├── mnist_cnn.py ├── mnist_irnn.py ├── mnist_mlp.py ├── mnist_siamese_graph.py ├── mnist_sklearn_wrapper.py ├── mnist_transfer_cnn.py ├── neural_style_transfer.py ├── pretrained_word_embeddings.py ├── reuters_mlp.py ├── stateful_lstm.py ├── variational_autoencoder.py └── variational_autoencoder_deconv.py ├── keras ├── __init__.py ├── activations.py ├── backend │ ├── __init__.py │ ├── common.py │ ├── tensorflow_backend.py │ └── theano_backend.py ├── callbacks.py ├── constraints.py ├── datasets │ ├── __init__.py │ ├── cifar.py │ ├── cifar10.py │ ├── cifar100.py │ ├── data_utils.py │ ├── imdb.py │ ├── mnist.py │ └── reuters.py ├── engine │ ├── __init__.py │ ├── topology.py │ └── training.py ├── initializations.py ├── layers │ ├── __init__.py │ ├── advanced_activations.py │ ├── convolutional.py │ ├── core.py │ ├── embeddings.py │ ├── local.py │ ├── noise.py │ ├── normalization.py │ ├── pooling.py │ ├── recurrent.py │ └── wrappers.py ├── legacy │ ├── __init__.py │ └── models.py ├── metrics.py ├── models.py ├── objectives.py ├── optimizers.py ├── preprocessing │ ├── __init__.py │ ├── image.py │ ├── sequence.py │ └── text.py ├── regularizers.py ├── utils │ ├── __init__.py │ ├── data_utils.py │ ├── generic_utils.py │ ├── io_utils.py │ ├── layer_utils.py │ ├── np_utils.py │ ├── test_utils.py │ └── visualize_util.py └── wrappers │ ├── __init__.py │ └── scikit_learn.py ├── pytest.ini ├── setup.cfg ├── setup.py └── tests ├── integration_tests ├── test_image_data_tasks.py ├── test_temporal_data_tasks.py └── test_vector_data_tasks.py ├── keras ├── backend │ └── test_backends.py ├── datasets │ └── test_datasets.py ├── engine │ ├── test_topology.py │ └── test_training.py ├── layers │ ├── test_advanced_activations.py │ ├── test_convolutional.py │ ├── test_core.py │ ├── test_embeddings.py │ ├── test_local.py │ ├── test_noise.py │ ├── test_normalization.py │ ├── test_recurrent.py │ └── test_wrappers.py ├── preprocessing │ ├── test_image.py │ ├── test_sequence.py │ └── test_text.py ├── test_activations.py ├── test_callbacks.py ├── test_constraints.py ├── test_graph_model.py ├── test_initializations.py ├── test_metrics.py ├── test_multiprocessing.py ├── test_objectives.py ├── test_optimizers.py ├── test_regularizers.py ├── test_sequential_model.py └── wrappers │ └── test_scikit_learn.py ├── test_loss_masking.py ├── test_loss_weighting.py └── test_model_saving.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.pyc 3 | *.swp 4 | temp/* 5 | dist/* 6 | build/* 7 | keras/datasets/data/* 8 | keras/datasets/temp/* 9 | docs/site/* 10 | docs/theme/* 11 | tags 12 | Keras.egg-info 13 | 14 | # test-related 15 | .coverage 16 | .cache 17 | 18 | # developer environments 19 | .idea 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | dist: trusty 3 | language: python 4 | matrix: 5 | include: 6 | - python: 3.4 7 | env: KERAS_BACKEND=theano 8 | - python: 3.4 9 | env: KERAS_BACKEND=tensorflow 10 | - python: 2.7 11 | env: KERAS_BACKEND=theano 12 | - python: 2.7 13 | env: KERAS_BACKEND=tensorflow 14 | - python: 2.7 15 | env: KERAS_BACKEND=theano TEST_MODE=INTEGRATION_TESTS 16 | - python: 2.7 17 | env: KERAS_BACKEND=theano TEST_MODE=PEP8 18 | install: 19 | # code below is taken from http://conda.pydata.org/docs/travis.html 20 | # We do this conditionally because it saves us some downloading if the 21 | # version is the same. 22 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 23 | wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; 24 | else 25 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 26 | fi 27 | - bash miniconda.sh -b -p $HOME/miniconda 28 | - export PATH="$HOME/miniconda/bin:$PATH" 29 | - hash -r 30 | - conda config --set always_yes yes --set changeps1 no 31 | - conda update -q conda 32 | # Useful for debugging any issues with conda 33 | - conda info -a 34 | 35 | - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib pandas pytest h5py 36 | - source activate test-environment 37 | - pip install pytest-cov python-coveralls pytest-xdist coverage==3.7.1 #we need this version of coverage for coveralls.io to work 38 | - pip install pep8 pytest-pep8 39 | - pip install git+git://github.com/Theano/Theano.git 40 | 41 | # install PIL for preprocessing tests 42 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 43 | conda install pil; 44 | elif [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then 45 | conda install Pillow; 46 | fi 47 | 48 | - python setup.py install 49 | 50 | # install TensorFlow 51 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 52 | pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl; 53 | elif [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then 54 | pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl; 55 | fi 56 | # command to run tests 57 | script: 58 | # run keras backend init to initialize backend config 59 | - python -c "import keras.backend" 60 | # create dataset directory to avoid concurrent directory creation at runtime 61 | - mkdir ~/.keras/datasets 62 | # set up keras backend 63 | - sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json; 64 | - echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)" 65 | - if [[ "$TEST_MODE" == "INTEGRATION_TESTS" ]]; then 66 | PYTHONPATH=$PWD:$PYTHONPATH py.test tests/integration_tests; 67 | elif [[ "$TEST_MODE" == "PEP8" ]]; then 68 | PYTHONPATH=$PWD:$PYTHONPATH py.test --pep8 -m pep8 -n0; 69 | else 70 | PYTHONPATH=$PWD:$PYTHONPATH py.test tests/ --ignore=tests/integration_tests; 71 | fi 72 | after_success: 73 | - coveralls 74 | -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Please make sure that the boxes below are checked before you submit your issue. Thank you! 2 | 3 | - [ ] Check that you are up-to-date with the master branch of Keras. You can update with: 4 | pip install git+git://github.com/fchollet/keras.git --upgrade --no-deps 5 | 6 | - [ ] If running on Theano, check that you are up-to-date with the master branch of Theano. You can update with: 7 | pip install git+git://github.com/Theano/Theano.git --upgrade --no-deps 8 | 9 | - [ ] Provide a link to a GitHub Gist of a Python script that can reproduce your issue (or just copy the script here if it is short). -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | COPYRIGHT 2 | 3 | All contributions by François Chollet: 4 | Copyright (c) 2015, François Chollet. 5 | All rights reserved. 6 | 7 | All contributions by Google: 8 | Copyright (c) 2015, Google, Inc. 9 | All rights reserved. 10 | 11 | All other contributions: 12 | Copyright (c) 2015, the respective contributors. 13 | All rights reserved. 14 | 15 | Each contributor holds copyright over their respective contributions. 16 | The project versioning (Git) records all such contribution source information. 17 | 18 | LICENSE 19 | 20 | The MIT License (MIT) 21 | 22 | Permission is hereby granted, free of charge, to any person obtaining a copy 23 | of this software and associated documentation files (the "Software"), to deal 24 | in the Software without restriction, including without limitation the rights 25 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 26 | copies of the Software, and to permit persons to whom the Software is 27 | furnished to do so, subject to the following conditions: 28 | 29 | The above copyright notice and this permission notice shall be included in all 30 | copies or substantial portions of the Software. 31 | 32 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 33 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 34 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 35 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 36 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 37 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 38 | SOFTWARE. 39 | 40 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:7.5-cudnn5-devel 2 | 3 | ENV CONDA_DIR /opt/conda 4 | ENV PATH $CONDA_DIR/bin:$PATH 5 | 6 | RUN mkdir -p $CONDA_DIR && \ 7 | echo export PATH=$CONDA_DIR/bin:'$PATH' > /etc/profile.d/conda.sh && \ 8 | apt-get update && \ 9 | apt-get install -y wget git libhdf5-dev g++ graphviz && \ 10 | wget --quiet https://repo.continuum.io/miniconda/Miniconda3-3.9.1-Linux-x86_64.sh && \ 11 | echo "6c6b44acdd0bc4229377ee10d52c8ac6160c336d9cdd669db7371aa9344e1ac3 *Miniconda3-3.9.1-Linux-x86_64.sh" | sha256sum -c - && \ 12 | /bin/bash /Miniconda3-3.9.1-Linux-x86_64.sh -f -b -p $CONDA_DIR && \ 13 | rm Miniconda3-3.9.1-Linux-x86_64.sh 14 | 15 | ENV NB_USER keras 16 | ENV NB_UID 1000 17 | 18 | RUN useradd -m -s /bin/bash -N -u $NB_UID $NB_USER && \ 19 | mkdir -p $CONDA_DIR && \ 20 | chown keras $CONDA_DIR -R && \ 21 | mkdir -p /src && \ 22 | chown keras /src 23 | 24 | USER keras 25 | 26 | # Python 27 | ARG python_version=3.5.1 28 | ARG tensorflow_version=0.9.0rc0-cp35-cp35m 29 | RUN conda install -y python=${python_version} && \ 30 | pip install https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-${tensorflow_version}-linux_x86_64.whl && \ 31 | pip install git+git://github.com/Theano/Theano.git && \ 32 | pip install ipdb pytest pytest-cov python-coveralls coverage==3.7.1 pytest-xdist pep8 pytest-pep8 pydot_ng && \ 33 | conda install Pillow scikit-learn notebook pandas matplotlib nose pyyaml six h5py && \ 34 | pip install git+git://github.com/fchollet/keras.git && \ 35 | conda clean -yt 36 | 37 | ADD theanorc /home/keras/.theanorc 38 | 39 | ENV PYTHONPATH='/src/:$PYTHONPATH' 40 | 41 | WORKDIR /src 42 | 43 | EXPOSE 8888 44 | 45 | CMD jupyter notebook --port=8888 --ip=0.0.0.0 46 | 47 | -------------------------------------------------------------------------------- /docker/Makefile: -------------------------------------------------------------------------------- 1 | help: 2 | @cat Makefile 3 | 4 | DATA?="${HOME}/Data" 5 | GPU?=0 6 | DOCKER_FILE=Dockerfile 7 | DOCKER=GPU=$(GPU) nvidia-docker 8 | BACKEND=tensorflow 9 | TEST=tests/ 10 | SRC=$(shell dirname `pwd`) 11 | 12 | build: 13 | docker build -t keras --build-arg python_version=3.5 -f $(DOCKER_FILE) . 14 | 15 | bash: build 16 | $(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras bash 17 | 18 | ipython: build 19 | $(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras ipython 20 | 21 | notebook: build 22 | $(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --net=host --env KERAS_BACKEND=$(BACKEND) keras 23 | 24 | test: build 25 | $(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras py.test $(TEST) 26 | 27 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Using Keras via Docker 2 | 3 | This directory contains `Dockerfile` to make it easy to get up and running with 4 | Keras via [Docker](http://www.docker.com/). 5 | 6 | ## Installing Docker 7 | 8 | General installation instructions are 9 | [on the Docker site](https://docs.docker.com/installation/), but we give some 10 | quick links here: 11 | 12 | * [OSX](https://docs.docker.com/installation/mac/): [docker toolbox](https://www.docker.com/toolbox) 13 | * [ubuntu](https://docs.docker.com/installation/ubuntulinux/) 14 | 15 | ## Running the container 16 | 17 | We are using `Makefile` to simplify docker commands within make commands. 18 | 19 | Build the container and start a jupyter notebook 20 | 21 | $ make notebook 22 | 23 | Build the container and start an iPython shell 24 | 25 | $ make ipython 26 | 27 | Build the container and start a bash 28 | 29 | $ make bash 30 | 31 | For GPU support install NVidia drivers (ideally latest) and 32 | [nvidia-docker](https://github.com/NVIDIA/nvidia-docker). Run using 33 | 34 | $ make notebook GPU=0 # or [ipython, bash] 35 | 36 | Switch between Theano and TensorFlow 37 | 38 | $ make notebook BACKEND=theano 39 | $ make notebook BACKEND=tensorflow 40 | 41 | Mount a volume for external data sets 42 | 43 | $ make DATA=~/mydata 44 | 45 | Prints all make tasks 46 | 47 | $ make help 48 | 49 | You can change Theano parameters by editing `/docker/theanorc`. 50 | 51 | 52 | Note: If you would have a problem running nvidia-docker you may try the old way 53 | we have used. But it is not recommended. If you find a bug in the nvidia-docker report 54 | it there please and try using the nvidia-docker as described above. 55 | 56 | $ export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}') 57 | $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') 58 | $ docker run -it -p 8888:8888 $CUDA_SO $DEVICES gcr.io/tensorflow/tensorflow:latest-gpu 59 | -------------------------------------------------------------------------------- /docker/theanorc: -------------------------------------------------------------------------------- 1 | [global] 2 | floatX = float32 3 | optimizer=None 4 | device = gpu 5 | 6 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Keras Documentation 2 | 3 | The source for Keras documentation is in this directory under `sources/`. 4 | Our documentation uses extended Markdown, as implemented by [MkDocs](http://mkdocs.org). 5 | 6 | ## Building the documentation 7 | 8 | - install MkDocs: `pip install mkdocs` 9 | - `cd` to the `docs/` folder and run: 10 | - `python autogen.py` 11 | - `mkdocs serve` # Starts a local webserver: [localhost:8000](localhost:8000) 12 | - `mkdocs build` # Builds a static site in "site" directory 13 | -------------------------------------------------------------------------------- /docs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Keras Documentation 2 | theme: readthedocs 3 | docs_dir: sources 4 | repo_url: http://github.com/fchollet/keras 5 | site_url: http://keras.io/ 6 | # theme_dir: theme 7 | site_description: 'Documentation for Keras, the Python Deep Learning library.' 8 | 9 | dev_addr: '0.0.0.0:8000' 10 | google_analytics: ['UA-61785484-1', 'keras.io'] 11 | 12 | 13 | pages: 14 | - Home: index.md 15 | - Getting started: 16 | - Guide to the Sequential model: getting-started/sequential-model-guide.md 17 | - Guide to the Functional API: getting-started/functional-api-guide.md 18 | - FAQ: getting-started/faq.md 19 | - Models: 20 | - About Keras models: models/about-keras-models.md 21 | - Sequential: models/sequential.md 22 | - Model (functional API): models/model.md 23 | - Layers: 24 | - About Keras layers: layers/about-keras-layers.md 25 | - Core Layers: layers/core.md 26 | - Convolutional Layers: layers/convolutional.md 27 | - Pooling Layers: layers/pooling.md 28 | - Recurrent Layers: layers/recurrent.md 29 | - Embedding Layers: layers/embeddings.md 30 | - Advanced Activations Layers: layers/advanced-activations.md 31 | - Normalization Layers: layers/normalization.md 32 | - Noise layers: layers/noise.md 33 | - Layer wrappers: layers/wrappers.md 34 | - Writing your own Keras layers: layers/writing-your-own-keras-layers.md 35 | - Preprocessing: 36 | - Sequence Preprocessing: preprocessing/sequence.md 37 | - Text Preprocessing: preprocessing/text.md 38 | - Image Preprocessing: preprocessing/image.md 39 | - Objectives: objectives.md 40 | - Optimizers: optimizers.md 41 | - Activations: activations.md 42 | - Callbacks: callbacks.md 43 | - Datasets: datasets.md 44 | - Backend: backend.md 45 | - Initializations: initializations.md 46 | - Regularizers: regularizers.md 47 | - Constraints: constraints.md 48 | - Visualization: visualization.md 49 | - Scikit-learn API: scikit-learn-api.md 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /docs/templates/activations.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of activations 3 | 4 | Activations can either be used through an `Activation` layer, or through the `activation` argument supported by all forward layers: 5 | 6 | ```python 7 | from keras.layers.core import Activation, Dense 8 | 9 | model.add(Dense(64)) 10 | model.add(Activation('tanh')) 11 | ``` 12 | is equivalent to: 13 | ```python 14 | model.add(Dense(64, activation='tanh')) 15 | ``` 16 | 17 | You can also pass an element-wise Theano/TensorFlow function as an activation: 18 | 19 | ```python 20 | from keras import backend as K 21 | 22 | def tanh(x): 23 | return K.tanh(x) 24 | 25 | model.add(Dense(64, activation=tanh)) 26 | model.add(Activation(tanh)) 27 | ``` 28 | 29 | ## Available activations 30 | 31 | - __softmax__: Softmax applied across inputs last dimension. Expects shape either `(nb_samples, nb_timesteps, nb_dims)` or `(nb_samples, nb_dims)`. 32 | - __softplus__ 33 | - __softsign__ 34 | - __relu__ 35 | - __tanh__ 36 | - __sigmoid__ 37 | - __hard_sigmoid__ 38 | - __linear__ 39 | 40 | ## On Advanced Activations 41 | 42 | Activations that are more complex than a simple Theano/TensorFlow function (eg. learnable activations, configurable activations, etc.) are available as [Advanced Activation layers](layers/advanced-activations.md), and can be found in the module `keras.layers.advanced_activations`. These include PReLU and LeakyReLU. 43 | -------------------------------------------------------------------------------- /docs/templates/backend.md: -------------------------------------------------------------------------------- 1 | # Keras backends 2 | 3 | ## What is a "backend"? 4 | 5 | Keras is a model-level library, providing high-level building blocks for developing deep learning models. It does not handle itself low-level operations such as tensor products, convolutions and so on. Instead, it relies on a specialized, well-optimized tensor manipulation library to do so, serving as the "backend engine" of Keras. Rather than picking one single tensor library and making the implementation of Keras tied to that library, Keras handles the problem in a modular way, and several different backend engines can be plugged seamlessly into Keras. 6 | 7 | At this time, Keras has two backend implementations available: the **Theano** backend and the **TensorFlow** backend. 8 | 9 | - [Theano](http://deeplearning.net/software/theano/) is an open-source symbolic tensor manipulation framework developed by LISA/MILA Lab at Université de Montréal. 10 | - [TensorFlow](http://www.tensorflow.org/) is an open-source symbolic tensor manipulation framework developed by Google, Inc. 11 | 12 | ---- 13 | 14 | ## Switching from one backend to another 15 | 16 | If you have run Keras at least once, you will find the Keras configuration file at: 17 | 18 | `~/.keras/keras.json` 19 | 20 | If it isn't there, you can create it. 21 | 22 | It probably looks like this: 23 | 24 | `{"epsilon": 1e-07, "floatx": "float32", "backend": "theano"}` 25 | 26 | Simply change the field `backend` to either `"theano"` or `"tensorflow"`, and Keras will use the new configuration next time you run any Keras code. 27 | 28 | You can also define the environment variable ``KERAS_BACKEND`` and this will 29 | override what is defined in your config file : 30 | 31 | ```bash 32 | KERAS_BACKEND=tensorflow python -c "from keras import backend; print(backend._BACKEND)" 33 | Using TensorFlow backend. 34 | tensorflow 35 | ``` 36 | 37 | ---- 38 | 39 | ## Using the abstract Keras backend to write new code 40 | 41 | If you want the Keras modules you write to be compatible with both Theano and TensorFlow, you have to write them via the abstract Keras backend API. Here's an intro. 42 | 43 | You can import the backend module via: 44 | ```python 45 | from keras import backend as K 46 | ``` 47 | 48 | The code below instantiates an input placeholder. It's equivalent to `tf.placeholder()` or `T.matrix()`, `T.tensor3()`, etc. 49 | 50 | ```python 51 | input = K.placeholder(shape=(2, 4, 5)) 52 | # also works: 53 | input = K.placeholder(shape=(None, 4, 5)) 54 | # also works: 55 | input = K.placeholder(ndim=3) 56 | ``` 57 | 58 | The code below instantiates a shared variable. It's equivalent to `tf.variable()` or `theano.shared()`. 59 | 60 | ```python 61 | val = np.random.random((3, 4, 5)) 62 | var = K.variable(value=val) 63 | 64 | # all-zeros variable: 65 | var = K.zeros(shape=(3, 4, 5)) 66 | # all-ones: 67 | var = K.ones(shape=(3, 4, 5)) 68 | ``` 69 | 70 | Most tensor operations you will need can be done as you would in TensorFlow or Theano: 71 | 72 | ```python 73 | a = b + c * K.abs(d) 74 | c = K.dot(a, K.transpose(b)) 75 | a = K.sum(b, axis=2) 76 | a = K.softmax(b) 77 | a = concatenate([b, c], axis=-1) 78 | # etc... 79 | ``` 80 | 81 | ---- 82 | 83 | ## Backend functions 84 | 85 | 86 | {{autogenerated}} 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /docs/templates/callbacks.md: -------------------------------------------------------------------------------- 1 | ## Usage of callbacks 2 | 3 | A callback is a set of functions to be applied at given stages of the training procedure. You can use callbacks to get a view on internal states and statistics of the model during training. You can pass a list of callbacks (as the keyword argument `callbacks`) to the `.fit()` method of the `Sequential` model. The relevant methods of the callbacks will then be called at each stage of the training. 4 | 5 | --- 6 | 7 | {{autogenerated}} 8 | 9 | --- 10 | 11 | 12 | # Create a callback 13 | 14 | You can create a custom callback by extending the base class `keras.callbacks.Callback`. A callback has access to its associated model through the class property `self.model`. 15 | 16 | Here's a simple example saving a list of losses over each batch during training: 17 | ```python 18 | class LossHistory(keras.callbacks.Callback): 19 | def on_train_begin(self, logs={}): 20 | self.losses = [] 21 | 22 | def on_batch_end(self, batch, logs={}): 23 | self.losses.append(logs.get('loss')) 24 | ``` 25 | 26 | --- 27 | 28 | ### Example: recording loss history 29 | 30 | ```python 31 | class LossHistory(keras.callbacks.Callback): 32 | def on_train_begin(self, logs={}): 33 | self.losses = [] 34 | 35 | def on_batch_end(self, batch, logs={}): 36 | self.losses.append(logs.get('loss')) 37 | 38 | model = Sequential() 39 | model.add(Dense(10, input_dim=784, init='uniform')) 40 | model.add(Activation('softmax')) 41 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 42 | 43 | history = LossHistory() 44 | model.fit(X_train, Y_train, batch_size=128, nb_epoch=20, verbose=0, callbacks=[history]) 45 | 46 | print history.losses 47 | # outputs 48 | ''' 49 | [0.66047596406559383, 0.3547245744908703, ..., 0.25953155204159617, 0.25901699725311789] 50 | ''' 51 | ``` 52 | 53 | --- 54 | 55 | ### Example: model checkpoints 56 | 57 | ```python 58 | from keras.callbacks import ModelCheckpoint 59 | 60 | model = Sequential() 61 | model.add(Dense(10, input_dim=784, init='uniform')) 62 | model.add(Activation('softmax')) 63 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 64 | 65 | ''' 66 | saves the model weights after each epoch if the validation loss decreased 67 | ''' 68 | checkpointer = ModelCheckpoint(filepath="/tmp/weights.hdf5", verbose=1, save_best_only=True) 69 | model.fit(X_train, Y_train, batch_size=128, nb_epoch=20, verbose=0, validation_data=(X_test, Y_test), callbacks=[checkpointer]) 70 | 71 | ``` 72 | 73 | -------------------------------------------------------------------------------- /docs/templates/constraints.md: -------------------------------------------------------------------------------- 1 | ## Usage of constraints 2 | 3 | Functions from the `constraints` module allow setting constraints (eg. non-negativity) on network parameters during optimization. 4 | 5 | The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D` and `Convolution2D` have a unified API. 6 | 7 | These layers expose 2 keyword arguments: 8 | 9 | - `W_constraint` for the main weights matrix 10 | - `b_constraint` for the bias. 11 | 12 | 13 | ```python 14 | from keras.constraints import maxnorm 15 | model.add(Dense(64, W_constraint = maxnorm(2))) 16 | ``` 17 | 18 | ## Available constraints 19 | 20 | - __maxnorm__(m=2): maximum-norm constraint 21 | - __nonneg__(): non-negativity constraint 22 | - __unitnorm__(): unit-norm constraint, enforces the matrix to have unit norm along the last axis -------------------------------------------------------------------------------- /docs/templates/initializations.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of initializations 3 | 4 | Initializations define the way to set the initial random weights of Keras layers. 5 | 6 | The keyword arguments used for passing initializations to layers will depend on the layer. Usually it is simply `init`: 7 | 8 | ```python 9 | model.add(Dense(64, init='uniform')) 10 | ``` 11 | 12 | ## Available initializations 13 | 14 | - __uniform__ 15 | - __lecun_uniform__: Uniform initialization scaled by the square root of the number of inputs (LeCun 98). 16 | - __normal__ 17 | - __identity__: Use with square 2D layers (`shape[0] == shape[1]`). 18 | - __orthogonal__: Use with square 2D layers (`shape[0] == shape[1]`). 19 | - __zero__ 20 | - __glorot_normal__: Gaussian initialization scaled by fan_in + fan_out (Glorot 2010) 21 | - __glorot_uniform__ 22 | - __he_normal__: Gaussian initialization scaled by fan_in (He et al., 2014) 23 | - __he_uniform__ 24 | 25 | 26 | An initialization may be passed as a string (must match one of the available initializations above), or as a callable. 27 | If a callable, then it must take two arguments: `shape` (shape of the variable to initialize) and `name` (name of the variable), 28 | and it must return a variable (e.g. output of `K.variable()`): 29 | 30 | ```python 31 | from keras import backend as K 32 | import numpy as np 33 | 34 | def my_init(shape, name=None): 35 | value = np.random.random(shape) 36 | return K.variable(value, name=name) 37 | 38 | model.add(Dense(64, init=my_init)) 39 | ``` 40 | 41 | You could also use functions from `keras.initializations` in this way: 42 | 43 | ```python 44 | from keras import initializations 45 | 46 | def my_init(shape, name=None): 47 | return initializations.normal(shape, scale=0.01, name=name) 48 | 49 | model.add(Dense(64, init=my_init)) 50 | ``` -------------------------------------------------------------------------------- /docs/templates/layers/about-keras-layers.md: -------------------------------------------------------------------------------- 1 | # About Keras layers 2 | 3 | All Keras layers have a number of methods in common: 4 | 5 | - `layer.get_weights()`: returns the weights of the layer as a list of Numpy arrays. 6 | - `layer.set_weights(weights)`: sets the weights of the layer from a list of Numpy arrays (with the same shapes as the output of `get_weights`). 7 | - `layer.get_config()`: returns a dictionary containing the configuration of the layer. The layer can be reinstantiated from its config via: 8 | ```python 9 | from keras.utils.layer_utils import layer_from_config 10 | 11 | config = layer.get_config() 12 | layer = layer_from_config(config) 13 | ``` 14 | 15 | If a layer has a single node (i.e. if it isn't a shared layer), you can get its input tensor, output tensor, input shape and output shape via: 16 | 17 | - `layer.input` 18 | - `layer.output` 19 | - `layer.input_shape` 20 | - `layer.output_shape` 21 | 22 | If the layer has multiple nodes (see: [the concept of layer node and shared layers](/getting-started/functional-api-guide/#the-concept-of-layer-node)), you can use the following methods: 23 | 24 | - `layer.get_input_at(node_index)` 25 | - `layer.get_output_at(node_index)` 26 | - `layer.get_input_shape_at(node_index)` 27 | - `layer.get_output_shape_at(node_index)` -------------------------------------------------------------------------------- /docs/templates/layers/writing-your-own-keras-layers.md: -------------------------------------------------------------------------------- 1 | # Writing your own Keras layers 2 | 3 | For simple, stateless custom operations, you are probably better off using `layers.core.Lambda` layers. But for any custom operation that has trainable weights, you should implement your own layer. 4 | 5 | Here is the skeleton of a Keras layer. There are only three methods you need to implement: 6 | 7 | - `build(input_shape)`: this is where you will define your weights. Trainable weights should be added to the list `self.trainable_weights`. Other attributes of note are: `self.non_trainable_weights` (list) and `self.updates` (list of update tuples (tensor, new_tensor)). For an example of how to use `non_trainable_weights` and `updates`, see the code for the `BatchNormalization` layer. 8 | - `call(x)`: this is where the layer's logic lives. Unless you want your layer to support masking, you only have to care about the first argument passed to `call`: the input tensor. 9 | - `get_output_shape_for(input_shape)`: in case your layer modifies the shape of its input, you should specify here the shape transformation logic. This allows Keras to do automatic shape inference. 10 | 11 | ```python 12 | from keras import backend as K 13 | from keras.engine.topology import Layer 14 | import numpy as np 15 | 16 | class MyLayer(Layer): 17 | def __init__(self, output_dim, **kwargs): 18 | self.output_dim = output_dim 19 | super(MyLayer, self).__init__(**kwargs) 20 | 21 | def build(self, input_shape): 22 | input_dim = input_shape[1] 23 | initial_weight_value = np.random.random((input_dim, output_dim)) 24 | self.W = K.variable(initial_weight_value) 25 | self.trainable_weights = [self.W] 26 | 27 | def call(self, x, mask=None): 28 | return K.dot(x, self.W) 29 | 30 | def get_output_shape_for(self, input_shape): 31 | return (input_shape[0], self.output_dim) 32 | ``` 33 | 34 | The existing Keras layers provide ample examples of how to implement almost anything. Never hesitate to read the source code! -------------------------------------------------------------------------------- /docs/templates/models/about-keras-models.md: -------------------------------------------------------------------------------- 1 | # About Keras models 2 | 3 | There are two types of models available in Keras: [the Sequential model](/models/sequential) and [the Model class used with functional API](/models/model). 4 | 5 | These models have a number of methods in common: 6 | 7 | - `model.summary()`: prints a summary representation of your model. 8 | - `model.get_config()`: returns a dictionary containing the configuration of the model. The model can be reinstantiated from its config via: 9 | ```python 10 | config = model.get_config() 11 | model = Model.from_config(config) 12 | # or, for Sequential: 13 | model = Sequential.from_config(config) 14 | ``` 15 | 16 | - `model.get_weights()`: returns a list of all weight tensors in the model, as Numpy arrays. 17 | - `model.set_weights(weights)`: sets the values of the weights of the model, from a list of Numpy arrays. The arrays in the list should have the same shape as those returned by `get_weights()`. 18 | - `model.to_json()`: returns a representation of the model as a JSON string. Note that the representation does not include the weights, only the architecture. You can reinstantiate the same model (with reinitialized weights) from the JSON string via: 19 | ```python 20 | from models import model_from_json 21 | 22 | json_string = model.to_json() 23 | model = model_from_json(json_string) 24 | ``` 25 | - `model.to_yaml()`: returns a representation of the model as a YAML string. Note that the representation does not include the weights, only the architecture. You can reinstantiate the same model (with reinitialized weights) from the YAML string via: 26 | ```python 27 | from models import model_from_yaml 28 | 29 | yaml_string = model.to_yaml() 30 | model = model_from_yaml(yaml_string) 31 | ``` 32 | - `model.save_weights(filepath)`: saves the weights of the model as a HDF5 file. 33 | - `model.load_weights(filepath)`: loads the weights of the model from a HDF5 file (created by `save_weights`). -------------------------------------------------------------------------------- /docs/templates/models/model.md: -------------------------------------------------------------------------------- 1 | # Model class API 2 | 3 | In the functional API, given an input tensor and output tensor, you can instantiate a `Model` via: 4 | 5 | ```python 6 | from keras.models import Model 7 | from keras.layers import Input, Dense 8 | 9 | a = Input(shape=(32,)) 10 | b = Dense(32)(a) 11 | model = Model(input=a, output=b) 12 | ``` 13 | 14 | This model will include all layers required in the computation of `b` given `a`. 15 | 16 | In the case of multi-input or multi-output models, you can use lists as well: 17 | 18 | ```python 19 | model = Model(input=[a1, a2], output=[b1, b3, b3]) 20 | ``` 21 | 22 | For a detailed introduction of what `Model` can do, read [this guide to the Keras functional API](/getting-started/functional-api-guide). 23 | 24 | ## Useful attributes of Model 25 | 26 | - `model.layers` is a flattened list of the layers comprising the model graph. 27 | - `model.inputs` is the list of input tensors. 28 | - `model.outputs` is the list of output tensors. 29 | 30 | ## Methods 31 | 32 | {{autogenerated}} 33 | -------------------------------------------------------------------------------- /docs/templates/models/sequential.md: -------------------------------------------------------------------------------- 1 | # The Sequential model API 2 | 3 | To get started, read [this guide to the Keras Sequential model](/getting-started/sequential-model-guide). 4 | 5 | ## Useful attributes of Model 6 | 7 | - `model.layers` is a list of the layers added to the model. 8 | 9 | 10 | ---- 11 | 12 | ## Sequential model methods 13 | 14 | {{autogenerated}} -------------------------------------------------------------------------------- /docs/templates/objectives.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of objectives 3 | 4 | An objective function (or loss function, or optimization score function) is one of the two parameters required to compile a model: 5 | 6 | ```python 7 | model.compile(loss='mean_squared_error', optimizer='sgd') 8 | ``` 9 | 10 | You can either pass the name of an existing objective, or pass a Theano/TensorFlow symbolic function that returns a scalar for each data-point and takes the following two arguments: 11 | 12 | - __y_true__: True labels. Theano/TensorFlow tensor. 13 | - __y_pred__: Predictions. Theano/TensorFlow tensor of the same shape as y_true. 14 | 15 | The actual optimized objective is the mean of the output array across all datapoints. 16 | 17 | For a few examples of such functions, check out the [objectives source](https://github.com/fchollet/keras/blob/master/keras/objectives.py). 18 | 19 | ## Available objectives 20 | 21 | - __mean_squared_error__ / __mse__ 22 | - __mean_absolute_error__ / __mae__ 23 | - __mean_absolute_percentage_error__ / __mape__ 24 | - __mean_squared_logarithmic_error__ / __msle__ 25 | - __squared_hinge__ 26 | - __hinge__ 27 | - __binary_crossentropy__: Also known as logloss. 28 | - __categorical_crossentropy__: Also known as multiclass logloss. __Note__: using this objective requires that your labels are binary arrays of shape `(nb_samples, nb_classes)`. 29 | - __sparse_categorical_crossentropy__: As above but accepts sparse labels. __Note__: this objective still requires that your labels have the same number of dimensions as your outputs; you may need to add a length-1 dimension to the shape of your labels, e.g with `np.expand_dims(y, -1)`. 30 | - __kullback_leibler_divergence__ / __kld__: Information gain from a predicted probability distribution Q to a true probability distribution P. Gives a measure of difference between both distributions. 31 | - __poisson__: Mean of `(predictions - targets * log(predictions))` 32 | - __cosine_proximity__: The opposite (negative) of the mean cosine proximity between predictions and targets. 33 | -------------------------------------------------------------------------------- /docs/templates/optimizers.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of optimizers 3 | 4 | An optimizer is one of the two arguments required for compiling a Keras model: 5 | 6 | ```python 7 | model = Sequential() 8 | model.add(Dense(64, init='uniform', input_dim=10)) 9 | model.add(Activation('tanh')) 10 | model.add(Activation('softmax')) 11 | 12 | sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) 13 | model.compile(loss='mean_squared_error', optimizer=sgd) 14 | ``` 15 | 16 | You can either instantiate an optimizer before passing it to `model.compile()` , as in the above example, or you can call it by its name. In the latter case, the default parameters for the optimizer will be used. 17 | 18 | ```python 19 | # pass optimizer by name: default parameters will be used 20 | model.compile(loss='mean_squared_error', optimizer='sgd') 21 | ``` 22 | 23 | --- 24 | 25 | {{autogenerated}} -------------------------------------------------------------------------------- /docs/templates/preprocessing/sequence.md: -------------------------------------------------------------------------------- 1 | ## pad_sequences 2 | 3 | ```python 4 | keras.preprocessing.sequence.pad_sequences(sequences, maxlen=None, dtype='int32') 5 | ``` 6 | 7 | Transform a list of `nb_samples sequences` (lists of scalars) into a 2D Numpy array of shape `(nb_samples, nb_timesteps)`. `nb_timesteps` is either the `maxlen` argument if provided, or the length of the longest sequence otherwise. Sequences that are shorter than `nb_timesteps` are padded with zeros at the end. 8 | 9 | - __Return__: 2D Numpy array of shape `(nb_samples, nb_timesteps)`. 10 | 11 | - __Arguments__: 12 | - __sequences__: List of lists of int or float. 13 | - __maxlen__: None or int. Maximum sequence length, longer sequences are truncated and shorter sequences are padded with zeros at the end. 14 | - __dtype__: datatype of the Numpy array returned. 15 | - __padding__: 'pre' or 'post', pad either before or after each sequence. 16 | - __truncating__: 'pre' or 'post', remove values from sequences larger than maxlen either in the beginning or in the end of the sequence 17 | - __value__: float, value to pad the sequences to the desired value. 18 | 19 | --- 20 | 21 | ## skipgrams 22 | 23 | ```python 24 | keras.preprocessing.sequence.skipgrams(sequence, vocabulary_size, 25 | window_size=4, negative_samples=1., shuffle=True, 26 | categorical=False, sampling_table=None) 27 | ``` 28 | 29 | Transforms a sequence of word indexes (list of int) into couples of the form: 30 | 31 | - (word, word in the same window), with label 1 (positive samples). 32 | - (word, random word from the vocabulary), with label 0 (negative samples). 33 | 34 | Read more about Skipgram in this gnomic paper by Mikolov et al.: [Efficient Estimation of Word Representations in 35 | Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf) 36 | 37 | - __Return__: tuple `(couples, labels)`. 38 | - `couples` is a list of 2-elements lists of int: `[word_index, other_word_index]`. 39 | - `labels` is a list of 0 and 1, where 1 indicates that `other_word_index` was found in the same window as `word_index`, and 0 indicates that `other_word_index` was random. 40 | - if categorical is set to True, the labels are categorical, ie. 1 becomes [0,1], and 0 becomes [1, 0]. 41 | 42 | - __Arguments__: 43 | - __sequence__: list of int indexes. If using a sampling_table, the index of a word should be its the rank in the dataset (starting at 1). 44 | - __vocabulary_size__: int. 45 | - __window_size__: int. maximum distance between two words in a positive couple. 46 | - __negative_samples__: float >= 0. 0 for no negative (=random) samples. 1 for same number as positive samples. etc. 47 | - __shuffle__: boolean. Whether to shuffle the samples. 48 | - __categorical__: boolean. Whether to make the returned labels categorical. 49 | - __sampling_table__: Numpy array of shape `(vocabulary_size,)` where `sampling_table[i]` is the probability of sampling the word with index i (assumed to be i-th most common word in the dataset). 50 | 51 | 52 | --- 53 | 54 | ## make_sampling_table 55 | 56 | ```python 57 | keras.preprocessing.sequence.make_sampling_table(size, sampling_factor=1e-5) 58 | ``` 59 | 60 | Used for generating the `sampling_table` argument for `skipgrams`. `sampling_table[i]` is the probability of sampling the word i-th most common word in a dataset (more common words should be sampled less frequently, for balance). 61 | 62 | - __Return__: Numpy array of shape `(size,)`. 63 | 64 | - __Arguments__: 65 | - __size__: size of the vocabulary considered. 66 | - __sampling_factor__: lower values result in a longer probability decay (common words will be sampled less frequently). If set to 1, no subsampling will be performed (all sampling probabilities will be 1). 67 | -------------------------------------------------------------------------------- /docs/templates/preprocessing/text.md: -------------------------------------------------------------------------------- 1 | 2 | ## text_to_word_sequence 3 | 4 | ```python 5 | keras.preprocessing.text.text_to_word_sequence(text, 6 | filters=base_filter(), lower=True, split=" ") 7 | ``` 8 | 9 | Split a sentence into a list of words. 10 | 11 | - __Return__: List of words (str). 12 | 13 | - __Arguments__: 14 | - __text__: str. 15 | - __filters__: list (or concatenation) of characters to filter out, such as punctuation. Default: base_filter(), includes basic punctuation, tabs, and newlines. 16 | - __lower__: boolean. Whether to set the text to lowercase. 17 | - __split__: str. Separator for word splitting. 18 | 19 | ## one_hot 20 | 21 | ```python 22 | keras.preprocessing.text.one_hot(text, n, 23 | filters=base_filter(), lower=True, split=" ") 24 | ``` 25 | 26 | One-hot encode a text into a list of word indexes in a vocabulary of size n. 27 | 28 | - __Return__: List of integers in [1, n]. Each integer encodes a word (unicity non-guaranteed). 29 | 30 | - __Arguments__: Same as `text_to_word_sequence` above. 31 | - __n__: int. Size of vocabulary. 32 | 33 | ## Tokenizer 34 | 35 | ```python 36 | keras.preprocessing.text.Tokenizer(nb_words=None, filters=base_filter(), 37 | lower=True, split=" ") 38 | ``` 39 | 40 | Class for vectorizing texts, or/and turning texts into sequences (=list of word indexes, where the word of rank i in the dataset (starting at 1) has index i). 41 | 42 | - __Arguments__: Same as `text_to_word_sequence` above. 43 | - __nb_words__: None or int. Maximum number of words to work with (if set, tokenization will be restricted to the top nb_words most common words in the dataset). 44 | 45 | - __Methods__: 46 | 47 | - __fit_on_texts(texts)__: 48 | - __Arguments__: 49 | - __texts__: list of texts to train on. 50 | 51 | - __texts_to_sequences(texts)__ 52 | - __Arguments__: 53 | - __texts__: list of texts to turn to sequences. 54 | - __Return__: list of sequences (one per text input). 55 | 56 | - __texts_to_sequences_generator(texts)__: generator version of the above. 57 | - __Return__: yield one sequence per input text. 58 | 59 | - __texts_to_matrix(texts)__: 60 | - __Return__: numpy array of shape `(len(texts), nb_words)`. 61 | - __Arguments__: 62 | - __texts__: list of texts to vectorize. 63 | - __mode__: one of "binary", "count", "tfidf", "freq" (default: "binary"). 64 | 65 | - __fit_on_sequences(sequences)__: 66 | - __Arguments__: 67 | - __sequences__: list of sequences to train on. 68 | 69 | - __sequences_to_matrix(sequences)__: 70 | - __Return__: numpy array of shape `(len(sequences), nb_words)`. 71 | - __Arguments__: 72 | - __sequences__: list of sequences to vectorize. 73 | - __mode__: one of "binary", "count", "tfidf", "freq" (default: "binary"). 74 | 75 | - __Attributes__: 76 | - __word_counts__: dictionary mapping words (str) to the number of times they appeared on during fit. Only set after fit_on_texts was called. 77 | - __word_docs__: dictionary mapping words (str) to the number of documents/texts they appeared on during fit. Only set after fit_on_texts was called. 78 | - __word_index__: dictionary mapping words (str) to their rank/index (int). Only set after fit_on_texts was called. 79 | - __document_count__: int. Number of documents (texts/sequences) the tokenizer was trained on. Only set after fit_on_texts or fit_on_sequences was called. 80 | 81 | 82 | -------------------------------------------------------------------------------- /docs/templates/regularizers.md: -------------------------------------------------------------------------------- 1 | ## Usage of regularizers 2 | 3 | Regularizers allow to apply penalties on layer parameters or layer activity during optimization. These penalties are incorporated in the loss function that the network optimizes. 4 | 5 | The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D` and `Convolution2D` have a unified API. 6 | 7 | These layers expose 3 keyword arguments: 8 | 9 | - `W_regularizer`: instance of `keras.regularizers.WeightRegularizer` 10 | - `b_regularizer`: instance of `keras.regularizers.WeightRegularizer` 11 | - `activity_regularizer`: instance of `keras.regularizers.ActivityRegularizer` 12 | 13 | 14 | ## Example 15 | 16 | ```python 17 | from keras.regularizers import l2, activity_l2 18 | model.add(Dense(64, input_dim=64, W_regularizer=l2(0.01), activity_regularizer=activity_l2(0.01))) 19 | ``` 20 | 21 | ## Available penalties 22 | 23 | ```python 24 | keras.regularizers.WeightRegularizer(l1=0., l2=0.) 25 | ``` 26 | 27 | ```python 28 | keras.regularizers.ActivityRegularizer(l1=0., l2=0.) 29 | ``` 30 | 31 | ## Shortcuts 32 | 33 | These are shortcut functions available in `keras.regularizers`. 34 | 35 | - __l1__(l=0.01): L1 weight regularization penalty, also known as LASSO 36 | - __l2__(l=0.01): L2 weight regularization penalty, also known as weight decay, or Ridge 37 | - __l1l2__(l1=0.01, l2=0.01): L1-L2 weight regularization penalty, also known as ElasticNet 38 | - __activity_l1__(l=0.01): L1 activity regularization 39 | - __activity_l2__(l=0.01): L2 activity regularization 40 | - __activity_l1l2__(l1=0.01, l2=0.01): L1+L2 activity regularization 41 | -------------------------------------------------------------------------------- /docs/templates/scikit-learn-api.md: -------------------------------------------------------------------------------- 1 | # Wrappers for the Scikit-Learn API 2 | 3 | You can use `Sequential` Keras models (single-input only) as part of your Scikit-Learn workflow via the wrappers found at `keras.wrappers.sklearn.py`. 4 | 5 | There are two wrappers available: 6 | 7 | `keras.wrappers.sklearn.KerasClassifier(build_fn=None, **sk_params)`, which implements the sklearn classifier interface, 8 | 9 | `keras.wrappers.sklearn.KerasRegressor(build_fn=None, **sk_params)`, which implements the sklearn regressor interface. 10 | 11 | ### Arguments 12 | 13 | - __build_fn__: callable function or class instance 14 | - __sk_params__: model parameters & fitting parameters 15 | 16 | `build_fn` should construct, compile and return a Keras model, which 17 | will then be used to fit/predict. One of the following 18 | three values could be passed to build_fn: 19 | 20 | 1. A function 21 | 2. An instance of a class that implements the __call__ method 22 | 3. None. This means you implement a class that inherits from either 23 | `KerasClassifier` or `KerasRegressor`. The __call__ method of the 24 | present class will then be treated as the default build_fn. 25 | 26 | `sk_params` takes both model parameters and fitting parameters. Legal model 27 | parameters are the arguments of `build_fn`. Note that like all other 28 | estimators in scikit-learn, 'build_fn' should provide default values for 29 | its arguments, so that you could create the estimator without passing any 30 | values to `sk_params`. 31 | 32 | `sk_params` could also accept parameters for calling `fit`, `predict`, 33 | `predict_proba`, and `score` methods (e.g., `nb_epoch`, `batch_size`). 34 | fitting (predicting) parameters are selected in the following order: 35 | 36 | 1. Values passed to the dictionary arguments of 37 | `fit`, `predict`, `predict_proba`, and `score` methods 38 | 2. Values passed to `sk_params` 39 | 3. The default values of the `keras.models.Sequential` 40 | `fit`, `predict`, `predict_proba` and `score` methods 41 | 42 | When using scikit-learn's `grid_search` API, legal tunable parameters are 43 | those you could pass to `sk_params`, including fitting parameters. 44 | In other words, you could use `grid_search` to search for the best 45 | `batch_size` or `nb_epoch` as well as the model parameters. 46 | -------------------------------------------------------------------------------- /docs/templates/visualization.md: -------------------------------------------------------------------------------- 1 | 2 | ## Model visualization 3 | 4 | The `keras.utils.visualize_util` module provides utility functions to plot 5 | a Keras model (using graphviz). 6 | 7 | This will plot a graph of the model and save it to a file: 8 | ```python 9 | from keras.utils.visualize_util import plot 10 | plot(model, to_file='model.png') 11 | ``` 12 | 13 | `plot` takes two optional arguments: 14 | 15 | - `show_shapes` (defaults to False) controls whether output shapes are shown in the graph. 16 | - `show_layer_names` (defaults to True) controls whether layer names are shown in the graph. 17 | 18 | You can also directly obtain the `pydot.Graph` object and render it yourself, 19 | for example to show it in an ipython notebook : 20 | ```python 21 | from IPython.display import SVG 22 | from keras.utils.visualize_util import model_to_dot 23 | 24 | SVG(model_to_dot(model).create(prog='dot', format='svg')) 25 | ``` 26 | -------------------------------------------------------------------------------- /examples/antirectifier.py: -------------------------------------------------------------------------------- 1 | '''The example demonstrates how to write custom layers for Keras. 2 | 3 | We build a custom activation layer called 'Antirectifier', 4 | which modifies the shape of the tensor that passes through it. 5 | We need to specify two methods: `get_output_shape_for` and `call`. 6 | 7 | Note that the same result can also be achieved via a Lambda layer. 8 | 9 | Because our custom layer is written with primitives from the Keras 10 | backend (`K`), our code can run both on TensorFlow and Theano. 11 | ''' 12 | 13 | from __future__ import print_function 14 | from keras.models import Sequential 15 | from keras.layers import Dense, Dropout, Layer, Activation 16 | from keras.datasets import mnist 17 | from keras import backend as K 18 | from keras.utils import np_utils 19 | 20 | 21 | class Antirectifier(Layer): 22 | '''This is the combination of a sample-wise 23 | L2 normalization with the concatenation of the 24 | positive part of the input with the negative part 25 | of the input. The result is a tensor of samples that are 26 | twice as large as the input samples. 27 | 28 | It can be used in place of a ReLU. 29 | 30 | # Input shape 31 | 2D tensor of shape (samples, n) 32 | 33 | # Output shape 34 | 2D tensor of shape (samples, 2*n) 35 | 36 | # Theoretical justification 37 | When applying ReLU, assuming that the distribution 38 | of the previous output is approximately centered around 0., 39 | you are discarding half of your input. This is inefficient. 40 | 41 | Antirectifier allows to return all-positive outputs like ReLU, 42 | without discarding any data. 43 | 44 | Tests on MNIST show that Antirectifier allows to train networks 45 | with twice less parameters yet with comparable 46 | classification accuracy as an equivalent ReLU-based network. 47 | ''' 48 | def get_output_shape_for(self, input_shape): 49 | shape = list(input_shape) 50 | assert len(shape) == 2 # only valid for 2D tensors 51 | shape[-1] *= 2 52 | return tuple(shape) 53 | 54 | def call(self, x, mask=None): 55 | x -= K.mean(x, axis=1, keepdims=True) 56 | x = K.l2_normalize(x, axis=1) 57 | pos = K.relu(x) 58 | neg = K.relu(-x) 59 | return K.concatenate([pos, neg], axis=1) 60 | 61 | # global parameters 62 | batch_size = 128 63 | nb_classes = 10 64 | nb_epoch = 40 65 | 66 | # the data, shuffled and split between train and test sets 67 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 68 | 69 | X_train = X_train.reshape(60000, 784) 70 | X_test = X_test.reshape(10000, 784) 71 | X_train = X_train.astype('float32') 72 | X_test = X_test.astype('float32') 73 | X_train /= 255 74 | X_test /= 255 75 | print(X_train.shape[0], 'train samples') 76 | print(X_test.shape[0], 'test samples') 77 | 78 | # convert class vectors to binary class matrices 79 | Y_train = np_utils.to_categorical(y_train, nb_classes) 80 | Y_test = np_utils.to_categorical(y_test, nb_classes) 81 | 82 | # build the model 83 | model = Sequential() 84 | model.add(Dense(256, input_shape=(784,))) 85 | model.add(Antirectifier()) 86 | model.add(Dropout(0.1)) 87 | model.add(Dense(256)) 88 | model.add(Antirectifier()) 89 | model.add(Dropout(0.1)) 90 | model.add(Dense(10)) 91 | model.add(Activation('softmax')) 92 | 93 | # compile the model 94 | model.compile(loss='categorical_crossentropy', 95 | optimizer='rmsprop', 96 | metrics=['accuracy']) 97 | 98 | # train the model 99 | model.fit(X_train, Y_train, 100 | batch_size=batch_size, nb_epoch=nb_epoch, 101 | verbose=1, validation_data=(X_test, Y_test)) 102 | 103 | # next, compare with an equivalent network 104 | # with2x bigger Dense layers and ReLU 105 | -------------------------------------------------------------------------------- /examples/cifar10_cnn.py: -------------------------------------------------------------------------------- 1 | '''Train a simple deep CNN on the CIFAR10 small images dataset. 2 | 3 | GPU run command: 4 | THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python cifar10_cnn.py 5 | 6 | It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs. 7 | (it's still underfitting at that point, though). 8 | 9 | Note: the data was pickled with Python 2, and some encoding issues might prevent you 10 | from loading it in Python 3. You might have to load it in Python 2, 11 | save it in a different format, load it in Python 3 and repickle it. 12 | ''' 13 | 14 | from __future__ import print_function 15 | from keras.datasets import cifar10 16 | from keras.preprocessing.image import ImageDataGenerator 17 | from keras.models import Sequential 18 | from keras.layers import Dense, Dropout, Activation, Flatten 19 | from keras.layers import Convolution2D, MaxPooling2D 20 | from keras.optimizers import SGD 21 | from keras.utils import np_utils 22 | 23 | batch_size = 32 24 | nb_classes = 10 25 | nb_epoch = 200 26 | data_augmentation = True 27 | 28 | # input image dimensions 29 | img_rows, img_cols = 32, 32 30 | # the CIFAR10 images are RGB 31 | img_channels = 3 32 | 33 | # the data, shuffled and split between train and test sets 34 | (X_train, y_train), (X_test, y_test) = cifar10.load_data() 35 | print('X_train shape:', X_train.shape) 36 | print(X_train.shape[0], 'train samples') 37 | print(X_test.shape[0], 'test samples') 38 | 39 | # convert class vectors to binary class matrices 40 | Y_train = np_utils.to_categorical(y_train, nb_classes) 41 | Y_test = np_utils.to_categorical(y_test, nb_classes) 42 | 43 | model = Sequential() 44 | 45 | model.add(Convolution2D(32, 3, 3, border_mode='same', 46 | input_shape=(img_channels, img_rows, img_cols))) 47 | model.add(Activation('relu')) 48 | model.add(Convolution2D(32, 3, 3)) 49 | model.add(Activation('relu')) 50 | model.add(MaxPooling2D(pool_size=(2, 2))) 51 | model.add(Dropout(0.25)) 52 | 53 | model.add(Convolution2D(64, 3, 3, border_mode='same')) 54 | model.add(Activation('relu')) 55 | model.add(Convolution2D(64, 3, 3)) 56 | model.add(Activation('relu')) 57 | model.add(MaxPooling2D(pool_size=(2, 2))) 58 | model.add(Dropout(0.25)) 59 | 60 | model.add(Flatten()) 61 | model.add(Dense(512)) 62 | model.add(Activation('relu')) 63 | model.add(Dropout(0.5)) 64 | model.add(Dense(nb_classes)) 65 | model.add(Activation('softmax')) 66 | 67 | # let's train the model using SGD + momentum (how original). 68 | sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) 69 | model.compile(loss='categorical_crossentropy', 70 | optimizer=sgd, 71 | metrics=['accuracy']) 72 | 73 | X_train = X_train.astype('float32') 74 | X_test = X_test.astype('float32') 75 | X_train /= 255 76 | X_test /= 255 77 | 78 | if not data_augmentation: 79 | print('Not using data augmentation.') 80 | model.fit(X_train, Y_train, 81 | batch_size=batch_size, 82 | nb_epoch=nb_epoch, 83 | validation_data=(X_test, Y_test), 84 | shuffle=True) 85 | else: 86 | print('Using real-time data augmentation.') 87 | 88 | # this will do preprocessing and realtime data augmentation 89 | datagen = ImageDataGenerator( 90 | featurewise_center=False, # set input mean to 0 over the dataset 91 | samplewise_center=False, # set each sample mean to 0 92 | featurewise_std_normalization=False, # divide inputs by std of the dataset 93 | samplewise_std_normalization=False, # divide each input by its std 94 | zca_whitening=False, # apply ZCA whitening 95 | rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) 96 | width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) 97 | height_shift_range=0.1, # randomly shift images vertically (fraction of total height) 98 | horizontal_flip=True, # randomly flip images 99 | vertical_flip=False) # randomly flip images 100 | 101 | # compute quantities required for featurewise normalization 102 | # (std, mean, and principal components if ZCA whitening is applied) 103 | datagen.fit(X_train) 104 | 105 | # fit the model on the batches generated by datagen.flow() 106 | model.fit_generator(datagen.flow(X_train, Y_train, 107 | batch_size=batch_size), 108 | samples_per_epoch=X_train.shape[0], 109 | nb_epoch=nb_epoch, 110 | validation_data=(X_test, Y_test)) 111 | -------------------------------------------------------------------------------- /examples/imdb_bidirectional_lstm.py: -------------------------------------------------------------------------------- 1 | '''Train a Bidirectional LSTM on the IMDB sentiment classification task. 2 | 3 | Output after 4 epochs on CPU: ~0.8146 4 | Time per epoch on CPU (Core i7): ~150s. 5 | ''' 6 | 7 | from __future__ import print_function 8 | import numpy as np 9 | np.random.seed(1337) # for reproducibility 10 | 11 | from keras.preprocessing import sequence 12 | from keras.models import Model 13 | from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge 14 | from keras.datasets import imdb 15 | 16 | 17 | max_features = 20000 18 | maxlen = 100 # cut texts after this number of words (among top max_features most common words) 19 | batch_size = 32 20 | 21 | print('Loading data...') 22 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) 23 | print(len(X_train), 'train sequences') 24 | print(len(X_test), 'test sequences') 25 | 26 | print("Pad sequences (samples x time)") 27 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 28 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 29 | print('X_train shape:', X_train.shape) 30 | print('X_test shape:', X_test.shape) 31 | y_train = np.array(y_train) 32 | y_test = np.array(y_test) 33 | 34 | 35 | # this is the placeholder tensor for the input sequences 36 | sequence = Input(shape=(maxlen,), dtype='int32') 37 | # this embedding layer will transform the sequences of integers 38 | # into vectors of size 128 39 | embedded = Embedding(max_features, 128, input_length=maxlen)(sequence) 40 | 41 | # apply forwards LSTM 42 | forwards = LSTM(64)(embedded) 43 | # apply backwards LSTM 44 | backwards = LSTM(64, go_backwards=True)(embedded) 45 | 46 | # concatenate the outputs of the 2 LSTMs 47 | merged = merge([forwards, backwards], mode='concat', concat_axis=-1) 48 | after_dp = Dropout(0.5)(merged) 49 | output = Dense(1, activation='sigmoid')(after_dp) 50 | 51 | model = Model(input=sequence, output=output) 52 | 53 | # try using different optimizers and different optimizer configs 54 | model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) 55 | 56 | print('Train...') 57 | model.fit(X_train, y_train, 58 | batch_size=batch_size, 59 | nb_epoch=4, 60 | validation_data=[X_test, y_test]) 61 | -------------------------------------------------------------------------------- /examples/imdb_cnn.py: -------------------------------------------------------------------------------- 1 | '''This example demonstrates the use of Convolution1D for text classification. 2 | 3 | Gets to 0.89 test accuracy after 2 epochs. 4 | 90s/epoch on Intel i5 2.4Ghz CPU. 5 | 10s/epoch on Tesla K40 GPU. 6 | 7 | ''' 8 | 9 | from __future__ import print_function 10 | import numpy as np 11 | np.random.seed(1337) # for reproducibility 12 | 13 | from keras.preprocessing import sequence 14 | from keras.models import Sequential 15 | from keras.layers import Dense, Dropout, Activation, Flatten 16 | from keras.layers import Embedding 17 | from keras.layers import Convolution1D, MaxPooling1D 18 | from keras.datasets import imdb 19 | from keras import backend as K 20 | 21 | 22 | # set parameters: 23 | max_features = 5000 24 | maxlen = 400 25 | batch_size = 32 26 | embedding_dims = 50 27 | nb_filter = 250 28 | filter_length = 3 29 | hidden_dims = 250 30 | nb_epoch = 2 31 | 32 | print('Loading data...') 33 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) 34 | print(len(X_train), 'train sequences') 35 | print(len(X_test), 'test sequences') 36 | 37 | print('Pad sequences (samples x time)') 38 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 39 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 40 | print('X_train shape:', X_train.shape) 41 | print('X_test shape:', X_test.shape) 42 | 43 | print('Build model...') 44 | model = Sequential() 45 | 46 | # we start off with an efficient embedding layer which maps 47 | # our vocab indices into embedding_dims dimensions 48 | model.add(Embedding(max_features, 49 | embedding_dims, 50 | input_length=maxlen, 51 | dropout=0.2)) 52 | 53 | # we add a Convolution1D, which will learn nb_filter 54 | # word group filters of size filter_length: 55 | model.add(Convolution1D(nb_filter=nb_filter, 56 | filter_length=filter_length, 57 | border_mode='valid', 58 | activation='relu', 59 | subsample_length=1)) 60 | # we use max pooling: 61 | model.add(MaxPooling1D(pool_length=model.output_shape[1])) 62 | 63 | # We flatten the output of the conv layer, 64 | # so that we can add a vanilla dense layer: 65 | model.add(Flatten()) 66 | 67 | # We add a vanilla hidden layer: 68 | model.add(Dense(hidden_dims)) 69 | model.add(Dropout(0.2)) 70 | model.add(Activation('relu')) 71 | 72 | # We project onto a single unit output layer, and squash it with a sigmoid: 73 | model.add(Dense(1)) 74 | model.add(Activation('sigmoid')) 75 | 76 | model.compile(loss='binary_crossentropy', 77 | optimizer='adam', 78 | metrics=['accuracy']) 79 | model.fit(X_train, y_train, 80 | batch_size=batch_size, 81 | nb_epoch=nb_epoch, 82 | validation_data=(X_test, y_test)) 83 | -------------------------------------------------------------------------------- /examples/imdb_cnn_lstm.py: -------------------------------------------------------------------------------- 1 | '''Train a recurrent convolutional network on the IMDB sentiment 2 | classification task. 3 | 4 | Gets to 0.8498 test accuracy after 2 epochs. 41s/epoch on K520 GPU. 5 | ''' 6 | from __future__ import print_function 7 | import numpy as np 8 | np.random.seed(1337) # for reproducibility 9 | 10 | from keras.preprocessing import sequence 11 | from keras.models import Sequential 12 | from keras.layers import Dense, Dropout, Activation 13 | from keras.layers import Embedding 14 | from keras.layers import LSTM, GRU, SimpleRNN 15 | from keras.layers import Convolution1D, MaxPooling1D 16 | from keras.datasets import imdb 17 | 18 | 19 | # Embedding 20 | max_features = 20000 21 | maxlen = 100 22 | embedding_size = 128 23 | 24 | # Convolution 25 | filter_length = 5 26 | nb_filter = 64 27 | pool_length = 4 28 | 29 | # LSTM 30 | lstm_output_size = 70 31 | 32 | # Training 33 | batch_size = 30 34 | nb_epoch = 2 35 | 36 | ''' 37 | Note: 38 | batch_size is highly sensitive. 39 | Only 2 epochs are needed as the dataset is very small. 40 | ''' 41 | 42 | print('Loading data...') 43 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) 44 | print(len(X_train), 'train sequences') 45 | print(len(X_test), 'test sequences') 46 | 47 | print('Pad sequences (samples x time)') 48 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 49 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 50 | print('X_train shape:', X_train.shape) 51 | print('X_test shape:', X_test.shape) 52 | 53 | print('Build model...') 54 | 55 | model = Sequential() 56 | model.add(Embedding(max_features, embedding_size, input_length=maxlen)) 57 | model.add(Dropout(0.25)) 58 | model.add(Convolution1D(nb_filter=nb_filter, 59 | filter_length=filter_length, 60 | border_mode='valid', 61 | activation='relu', 62 | subsample_length=1)) 63 | model.add(MaxPooling1D(pool_length=pool_length)) 64 | model.add(LSTM(lstm_output_size)) 65 | model.add(Dense(1)) 66 | model.add(Activation('sigmoid')) 67 | 68 | model.compile(loss='binary_crossentropy', 69 | optimizer='adam', 70 | metrics=['accuracy']) 71 | 72 | print('Train...') 73 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, 74 | validation_data=(X_test, y_test)) 75 | score, acc = model.evaluate(X_test, y_test, batch_size=batch_size) 76 | print('Test score:', score) 77 | print('Test accuracy:', acc) 78 | -------------------------------------------------------------------------------- /examples/imdb_lstm.py: -------------------------------------------------------------------------------- 1 | '''Trains a LSTM on the IMDB sentiment classification task. 2 | 3 | The dataset is actually too small for LSTM to be of any advantage 4 | compared to simpler, much faster methods such as TF-IDF + LogReg. 5 | 6 | Notes: 7 | 8 | - RNNs are tricky. Choice of batch size is important, 9 | choice of loss and optimizer is critical, etc. 10 | Some configurations won't converge. 11 | 12 | - LSTM loss decrease patterns during training can be quite different 13 | from what you see with CNNs/MLPs/etc. 14 | ''' 15 | from __future__ import print_function 16 | import numpy as np 17 | np.random.seed(1337) # for reproducibility 18 | 19 | from keras.preprocessing import sequence 20 | from keras.utils import np_utils 21 | from keras.models import Sequential 22 | from keras.layers import Dense, Dropout, Activation, Embedding 23 | from keras.layers import LSTM, SimpleRNN, GRU 24 | from keras.datasets import imdb 25 | 26 | max_features = 20000 27 | maxlen = 80 # cut texts after this number of words (among top max_features most common words) 28 | batch_size = 32 29 | 30 | print('Loading data...') 31 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) 32 | print(len(X_train), 'train sequences') 33 | print(len(X_test), 'test sequences') 34 | 35 | print('Pad sequences (samples x time)') 36 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 37 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 38 | print('X_train shape:', X_train.shape) 39 | print('X_test shape:', X_test.shape) 40 | 41 | print('Build model...') 42 | model = Sequential() 43 | model.add(Embedding(max_features, 128, input_length=maxlen, dropout=0.2)) 44 | model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2)) # try using a GRU instead, for fun 45 | model.add(Dense(1)) 46 | model.add(Activation('sigmoid')) 47 | 48 | # try using different optimizers and different optimizer configs 49 | model.compile(loss='binary_crossentropy', 50 | optimizer='adam', 51 | metrics=['accuracy']) 52 | 53 | print('Train...') 54 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15, 55 | validation_data=(X_test, y_test)) 56 | score, acc = model.evaluate(X_test, y_test, 57 | batch_size=batch_size) 58 | print('Test score:', score) 59 | print('Test accuracy:', acc) 60 | -------------------------------------------------------------------------------- /examples/lstm_benchmark.py: -------------------------------------------------------------------------------- 1 | '''Compare LSTM implementations on the IMDB sentiment classification task. 2 | 3 | consume_less='cpu' preprocesses input to the LSTM which typically results in 4 | faster computations at the expense of increased peak memory usage as the 5 | preprocessed input must be kept in memory. 6 | 7 | consume_less='mem' does away with the preprocessing, meaning that it might take 8 | a little longer, but should require less peak memory. 9 | 10 | consume_less='gpu' concatenates the input, output and forget gate's weights 11 | into one, large matrix, resulting in faster computation time as the GPU can 12 | utilize more cores, at the expense of reduced regularization because the same 13 | dropout is shared across the gates. 14 | 15 | Note that the relative performance of the different `consume_less` modes 16 | can vary depending on your device, your model and the size of your data. 17 | ''' 18 | 19 | import time 20 | import numpy as np 21 | import matplotlib.pyplot as plt 22 | 23 | from keras.preprocessing import sequence 24 | from keras.models import Sequential 25 | from keras.layers import Embedding, Dense, LSTM 26 | from keras.datasets import imdb 27 | 28 | max_features = 20000 29 | max_length = 80 30 | embedding_dim = 256 31 | batch_size = 128 32 | epochs = 10 33 | modes = ['cpu', 'mem', 'gpu'] 34 | 35 | print('Loading data...') 36 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) 37 | X_train = sequence.pad_sequences(X_train, max_length) 38 | X_test = sequence.pad_sequences(X_test, max_length) 39 | 40 | # Compile and train different models while meauring performance. 41 | results = [] 42 | for mode in modes: 43 | print('Testing mode: consume_less="{}"'.format(mode)) 44 | 45 | model = Sequential() 46 | model.add(Embedding(max_features, embedding_dim, input_length=max_length, dropout=0.2)) 47 | model.add(LSTM(embedding_dim, dropout_W=0.2, dropout_U=0.2, consume_less=mode)) 48 | model.add(Dense(1, activation='sigmoid')) 49 | model.compile(loss='binary_crossentropy', 50 | optimizer='adam', 51 | metrics=['accuracy']) 52 | 53 | start_time = time.time() 54 | history = model.fit(X_train, y_train, 55 | batch_size=batch_size, 56 | nb_epoch=epochs, 57 | validation_data=(X_test, y_test)) 58 | average_time_per_epoch = (time.time() - start_time) / epochs 59 | 60 | results.append((history, average_time_per_epoch)) 61 | 62 | # Compare models' accuracy, loss and elapsed time per epoch. 63 | plt.style.use('ggplot') 64 | ax1 = plt.subplot2grid((2, 2), (0, 0)) 65 | ax1.set_title('Accuracy') 66 | ax1.set_ylabel('Validation Accuracy') 67 | ax1.set_xlabel('Epochs') 68 | ax2 = plt.subplot2grid((2, 2), (1, 0)) 69 | ax2.set_title('Loss') 70 | ax2.set_ylabel('Validation Loss') 71 | ax2.set_xlabel('Epochs') 72 | ax3 = plt.subplot2grid((2, 2), (0, 1), rowspan=2) 73 | ax3.set_title('Time') 74 | ax3.set_ylabel('Seconds') 75 | for mode, result in zip(modes, results): 76 | ax1.plot(result[0].epoch, result[0].history['val_acc'], label=mode) 77 | ax2.plot(result[0].epoch, result[0].history['val_loss'], label=mode) 78 | ax1.legend() 79 | ax2.legend() 80 | ax3.bar(np.arange(len(results)), [x[1] for x in results], 81 | tick_label=modes, align='center') 82 | plt.tight_layout() 83 | plt.show() 84 | -------------------------------------------------------------------------------- /examples/lstm_text_generation.py: -------------------------------------------------------------------------------- 1 | '''Example script to generate text from Nietzsche's writings. 2 | 3 | At least 20 epochs are required before the generated text 4 | starts sounding coherent. 5 | 6 | It is recommended to run this script on GPU, as recurrent 7 | networks are quite computationally intensive. 8 | 9 | If you try this script on new data, make sure your corpus 10 | has at least ~100k characters. ~1M is better. 11 | ''' 12 | 13 | from __future__ import print_function 14 | from keras.models import Sequential 15 | from keras.layers import Dense, Activation, Dropout 16 | from keras.layers import LSTM 17 | from keras.optimizers import RMSprop 18 | from keras.utils.data_utils import get_file 19 | import numpy as np 20 | import random 21 | import sys 22 | 23 | path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt") 24 | text = open(path).read().lower() 25 | print('corpus length:', len(text)) 26 | 27 | chars = sorted(list(set(text))) 28 | print('total chars:', len(chars)) 29 | char_indices = dict((c, i) for i, c in enumerate(chars)) 30 | indices_char = dict((i, c) for i, c in enumerate(chars)) 31 | 32 | # cut the text in semi-redundant sequences of maxlen characters 33 | maxlen = 40 34 | step = 3 35 | sentences = [] 36 | next_chars = [] 37 | for i in range(0, len(text) - maxlen, step): 38 | sentences.append(text[i: i + maxlen]) 39 | next_chars.append(text[i + maxlen]) 40 | print('nb sequences:', len(sentences)) 41 | 42 | print('Vectorization...') 43 | X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) 44 | y = np.zeros((len(sentences), len(chars)), dtype=np.bool) 45 | for i, sentence in enumerate(sentences): 46 | for t, char in enumerate(sentence): 47 | X[i, t, char_indices[char]] = 1 48 | y[i, char_indices[next_chars[i]]] = 1 49 | 50 | 51 | # build the model: 2 stacked LSTM 52 | print('Build model...') 53 | model = Sequential() 54 | model.add(LSTM(128, input_shape=(maxlen, len(chars)))) 55 | model.add(Dense(len(chars))) 56 | model.add(Activation('softmax')) 57 | 58 | optimizer = RMSprop(lr=0.01) 59 | model.compile(loss='categorical_crossentropy', optimizer=optimizer) 60 | 61 | 62 | def sample(preds, temperature=1.0): 63 | # helper function to sample an index from a probability array 64 | preds = np.asarray(preds).astype('float64') 65 | preds = np.log(preds) / temperature 66 | exp_preds = np.exp(preds) 67 | preds = exp_preds / np.sum(exp_preds) 68 | probas = np.random.multinomial(1, preds, 1) 69 | return np.argmax(probas) 70 | 71 | # train the model, output generated text after each iteration 72 | for iteration in range(1, 60): 73 | print() 74 | print('-' * 50) 75 | print('Iteration', iteration) 76 | model.fit(X, y, batch_size=128, nb_epoch=1) 77 | 78 | start_index = random.randint(0, len(text) - maxlen - 1) 79 | 80 | for diversity in [0.2, 0.5, 1.0, 1.2]: 81 | print() 82 | print('----- diversity:', diversity) 83 | 84 | generated = '' 85 | sentence = text[start_index: start_index + maxlen] 86 | generated += sentence 87 | print('----- Generating with seed: "' + sentence + '"') 88 | sys.stdout.write(generated) 89 | 90 | for i in range(400): 91 | x = np.zeros((1, maxlen, len(chars))) 92 | for t, char in enumerate(sentence): 93 | x[0, t, char_indices[char]] = 1. 94 | 95 | preds = model.predict(x, verbose=0)[0] 96 | next_index = sample(preds, diversity) 97 | next_char = indices_char[next_index] 98 | 99 | generated += next_char 100 | sentence = sentence[1:] + next_char 101 | 102 | sys.stdout.write(next_char) 103 | sys.stdout.flush() 104 | print() 105 | -------------------------------------------------------------------------------- /examples/mnist_cnn.py: -------------------------------------------------------------------------------- 1 | '''Trains a simple convnet on the MNIST dataset. 2 | 3 | Gets to 99.25% test accuracy after 12 epochs 4 | (there is still a lot of margin for parameter tuning). 5 | 16 seconds per epoch on a GRID K520 GPU. 6 | ''' 7 | 8 | from __future__ import print_function 9 | import numpy as np 10 | np.random.seed(1337) # for reproducibility 11 | 12 | from keras.datasets import mnist 13 | from keras.models import Sequential 14 | from keras.layers import Dense, Dropout, Activation, Flatten 15 | from keras.layers import Convolution2D, MaxPooling2D 16 | from keras.utils import np_utils 17 | 18 | batch_size = 128 19 | nb_classes = 10 20 | nb_epoch = 12 21 | 22 | # input image dimensions 23 | img_rows, img_cols = 28, 28 24 | # number of convolutional filters to use 25 | nb_filters = 32 26 | # size of pooling area for max pooling 27 | nb_pool = 2 28 | # convolution kernel size 29 | nb_conv = 3 30 | 31 | # the data, shuffled and split between train and test sets 32 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 33 | 34 | X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) 35 | X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols) 36 | X_train = X_train.astype('float32') 37 | X_test = X_test.astype('float32') 38 | X_train /= 255 39 | X_test /= 255 40 | print('X_train shape:', X_train.shape) 41 | print(X_train.shape[0], 'train samples') 42 | print(X_test.shape[0], 'test samples') 43 | 44 | # convert class vectors to binary class matrices 45 | Y_train = np_utils.to_categorical(y_train, nb_classes) 46 | Y_test = np_utils.to_categorical(y_test, nb_classes) 47 | 48 | model = Sequential() 49 | 50 | model.add(Convolution2D(nb_filters, nb_conv, nb_conv, 51 | border_mode='valid', 52 | input_shape=(1, img_rows, img_cols))) 53 | model.add(Activation('relu')) 54 | model.add(Convolution2D(nb_filters, nb_conv, nb_conv)) 55 | model.add(Activation('relu')) 56 | model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) 57 | model.add(Dropout(0.25)) 58 | 59 | model.add(Flatten()) 60 | model.add(Dense(128)) 61 | model.add(Activation('relu')) 62 | model.add(Dropout(0.5)) 63 | model.add(Dense(nb_classes)) 64 | model.add(Activation('softmax')) 65 | 66 | model.compile(loss='categorical_crossentropy', 67 | optimizer='adadelta', 68 | metrics=['accuracy']) 69 | 70 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, 71 | verbose=1, validation_data=(X_test, Y_test)) 72 | score = model.evaluate(X_test, Y_test, verbose=0) 73 | print('Test score:', score[0]) 74 | print('Test accuracy:', score[1]) 75 | -------------------------------------------------------------------------------- /examples/mnist_irnn.py: -------------------------------------------------------------------------------- 1 | '''This is a reproduction of the IRNN experiment 2 | with pixel-by-pixel sequential MNIST in 3 | "A Simple Way to Initialize Recurrent Networks of Rectified Linear Units" 4 | by Quoc V. Le, Navdeep Jaitly, Geoffrey E. Hinton 5 | 6 | arXiv:1504.00941v2 [cs.NE] 7 Apr 2015 7 | http://arxiv.org/pdf/1504.00941v2.pdf 8 | 9 | Optimizer is replaced with RMSprop which yields more stable and steady 10 | improvement. 11 | 12 | Reaches 0.93 train/test accuracy after 900 epochs 13 | (which roughly corresponds to 1687500 steps in the original paper.) 14 | ''' 15 | 16 | from __future__ import print_function 17 | 18 | from keras.datasets import mnist 19 | from keras.models import Sequential 20 | from keras.layers import Dense, Activation 21 | from keras.layers import SimpleRNN 22 | from keras.initializations import normal, identity 23 | from keras.optimizers import RMSprop 24 | from keras.utils import np_utils 25 | 26 | batch_size = 32 27 | nb_classes = 10 28 | nb_epochs = 200 29 | hidden_units = 100 30 | 31 | learning_rate = 1e-6 32 | clip_norm = 1.0 33 | 34 | # the data, shuffled and split between train and test sets 35 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 36 | 37 | X_train = X_train.reshape(X_train.shape[0], -1, 1) 38 | X_test = X_test.reshape(X_test.shape[0], -1, 1) 39 | X_train = X_train.astype('float32') 40 | X_test = X_test.astype('float32') 41 | X_train /= 255 42 | X_test /= 255 43 | print('X_train shape:', X_train.shape) 44 | print(X_train.shape[0], 'train samples') 45 | print(X_test.shape[0], 'test samples') 46 | 47 | # convert class vectors to binary class matrices 48 | Y_train = np_utils.to_categorical(y_train, nb_classes) 49 | Y_test = np_utils.to_categorical(y_test, nb_classes) 50 | 51 | print('Evaluate IRNN...') 52 | model = Sequential() 53 | model.add(SimpleRNN(output_dim=hidden_units, 54 | init=lambda shape, name: normal(shape, scale=0.001, name=name), 55 | inner_init=lambda shape, name: identity(shape, scale=1.0, name=name), 56 | activation='relu', 57 | input_shape=X_train.shape[1:])) 58 | model.add(Dense(nb_classes)) 59 | model.add(Activation('softmax')) 60 | rmsprop = RMSprop(lr=learning_rate) 61 | model.compile(loss='categorical_crossentropy', 62 | optimizer=rmsprop, 63 | metrics=['accuracy']) 64 | 65 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epochs, 66 | verbose=1, validation_data=(X_test, Y_test)) 67 | 68 | scores = model.evaluate(X_test, Y_test, verbose=0) 69 | print('IRNN test score:', scores[0]) 70 | print('IRNN test accuracy:', scores[1]) 71 | -------------------------------------------------------------------------------- /examples/mnist_mlp.py: -------------------------------------------------------------------------------- 1 | '''Trains a simple deep NN on the MNIST dataset. 2 | 3 | Gets to 98.40% test accuracy after 20 epochs 4 | (there is *a lot* of margin for parameter tuning). 5 | 2 seconds per epoch on a K520 GPU. 6 | ''' 7 | 8 | from __future__ import print_function 9 | import numpy as np 10 | np.random.seed(1337) # for reproducibility 11 | 12 | from keras.datasets import mnist 13 | from keras.models import Sequential 14 | from keras.layers.core import Dense, Dropout, Activation 15 | from keras.optimizers import SGD, Adam, RMSprop 16 | from keras.utils import np_utils 17 | 18 | 19 | batch_size = 128 20 | nb_classes = 10 21 | nb_epoch = 20 22 | 23 | # the data, shuffled and split between train and test sets 24 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 25 | 26 | X_train = X_train.reshape(60000, 784) 27 | X_test = X_test.reshape(10000, 784) 28 | X_train = X_train.astype('float32') 29 | X_test = X_test.astype('float32') 30 | X_train /= 255 31 | X_test /= 255 32 | print(X_train.shape[0], 'train samples') 33 | print(X_test.shape[0], 'test samples') 34 | 35 | # convert class vectors to binary class matrices 36 | Y_train = np_utils.to_categorical(y_train, nb_classes) 37 | Y_test = np_utils.to_categorical(y_test, nb_classes) 38 | 39 | model = Sequential() 40 | model.add(Dense(512, input_shape=(784,))) 41 | model.add(Activation('relu')) 42 | model.add(Dropout(0.2)) 43 | model.add(Dense(512)) 44 | model.add(Activation('relu')) 45 | model.add(Dropout(0.2)) 46 | model.add(Dense(10)) 47 | model.add(Activation('softmax')) 48 | 49 | model.summary() 50 | 51 | model.compile(loss='categorical_crossentropy', 52 | optimizer=RMSprop(), 53 | metrics=['accuracy']) 54 | 55 | history = model.fit(X_train, Y_train, 56 | batch_size=batch_size, nb_epoch=nb_epoch, 57 | verbose=1, validation_data=(X_test, Y_test)) 58 | score = model.evaluate(X_test, Y_test, verbose=0) 59 | print('Test score:', score[0]) 60 | print('Test accuracy:', score[1]) 61 | -------------------------------------------------------------------------------- /examples/mnist_siamese_graph.py: -------------------------------------------------------------------------------- 1 | '''Train a Siamese MLP on pairs of digits from the MNIST dataset. 2 | 3 | It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the 4 | output of the shared network and by optimizing the contrastive loss (see paper 5 | for mode details). 6 | 7 | [1] "Dimensionality Reduction by Learning an Invariant Mapping" 8 | http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf 9 | 10 | Gets to 99.5% test accuracy after 20 epochs. 11 | 3 seconds per epoch on a Titan X GPU 12 | ''' 13 | from __future__ import absolute_import 14 | from __future__ import print_function 15 | import numpy as np 16 | np.random.seed(1337) # for reproducibility 17 | 18 | import random 19 | from keras.datasets import mnist 20 | from keras.models import Sequential, Model 21 | from keras.layers import Dense, Dropout, Input, Lambda 22 | from keras.optimizers import SGD, RMSprop 23 | from keras import backend as K 24 | 25 | 26 | def euclidean_distance(vects): 27 | x, y = vects 28 | return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True)) 29 | 30 | 31 | def eucl_dist_output_shape(shapes): 32 | shape1, shape2 = shapes 33 | return (shape1[0], 1) 34 | 35 | 36 | def contrastive_loss(y_true, y_pred): 37 | '''Contrastive loss from Hadsell-et-al.'06 38 | http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf 39 | ''' 40 | margin = 1 41 | return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0))) 42 | 43 | 44 | def create_pairs(x, digit_indices): 45 | '''Positive and negative pair creation. 46 | Alternates between positive and negative pairs. 47 | ''' 48 | pairs = [] 49 | labels = [] 50 | n = min([len(digit_indices[d]) for d in range(10)]) - 1 51 | for d in range(10): 52 | for i in range(n): 53 | z1, z2 = digit_indices[d][i], digit_indices[d][i+1] 54 | pairs += [[x[z1], x[z2]]] 55 | inc = random.randrange(1, 10) 56 | dn = (d + inc) % 10 57 | z1, z2 = digit_indices[d][i], digit_indices[dn][i] 58 | pairs += [[x[z1], x[z2]]] 59 | labels += [1, 0] 60 | return np.array(pairs), np.array(labels) 61 | 62 | 63 | def create_base_network(input_dim): 64 | '''Base network to be shared (eq. to feature extraction). 65 | ''' 66 | seq = Sequential() 67 | seq.add(Dense(128, input_shape=(input_dim,), activation='relu')) 68 | seq.add(Dropout(0.1)) 69 | seq.add(Dense(128, activation='relu')) 70 | seq.add(Dropout(0.1)) 71 | seq.add(Dense(128, activation='relu')) 72 | return seq 73 | 74 | 75 | def compute_accuracy(predictions, labels): 76 | '''Compute classification accuracy with a fixed threshold on distances. 77 | ''' 78 | return labels[predictions.ravel() < 0.5].mean() 79 | 80 | 81 | # the data, shuffled and split between train and test sets 82 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 83 | X_train = X_train.reshape(60000, 784) 84 | X_test = X_test.reshape(10000, 784) 85 | X_train = X_train.astype('float32') 86 | X_test = X_test.astype('float32') 87 | X_train /= 255 88 | X_test /= 255 89 | input_dim = 784 90 | nb_epoch = 20 91 | 92 | # create training+test positive and negative pairs 93 | digit_indices = [np.where(y_train == i)[0] for i in range(10)] 94 | tr_pairs, tr_y = create_pairs(X_train, digit_indices) 95 | 96 | digit_indices = [np.where(y_test == i)[0] for i in range(10)] 97 | te_pairs, te_y = create_pairs(X_test, digit_indices) 98 | 99 | # network definition 100 | base_network = create_base_network(input_dim) 101 | 102 | input_a = Input(shape=(input_dim,)) 103 | input_b = Input(shape=(input_dim,)) 104 | 105 | # because we re-use the same instance `base_network`, 106 | # the weights of the network 107 | # will be shared across the two branches 108 | processed_a = base_network(input_a) 109 | processed_b = base_network(input_b) 110 | 111 | distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b]) 112 | 113 | model = Model(input=[input_a, input_b], output=distance) 114 | 115 | # train 116 | rms = RMSprop() 117 | model.compile(loss=contrastive_loss, optimizer=rms) 118 | model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y, 119 | validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y), 120 | batch_size=128, 121 | nb_epoch=nb_epoch) 122 | 123 | # compute final accuracy on training and test sets 124 | pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]]) 125 | tr_acc = compute_accuracy(pred, tr_y) 126 | pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]]) 127 | te_acc = compute_accuracy(pred, te_y) 128 | 129 | print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc)) 130 | print('* Accuracy on test set: %0.2f%%' % (100 * te_acc)) 131 | -------------------------------------------------------------------------------- /examples/mnist_sklearn_wrapper.py: -------------------------------------------------------------------------------- 1 | '''Example of how to use sklearn wrapper 2 | 3 | Builds simple CNN models on MNIST and uses sklearn's GridSearchCV to find best model 4 | ''' 5 | 6 | from __future__ import print_function 7 | import numpy as np 8 | np.random.seed(1337) # for reproducibility 9 | 10 | from keras.datasets import mnist 11 | from keras.models import Sequential 12 | from keras.layers import Dense, Dropout, Activation, Flatten 13 | from keras.layers import Convolution2D, MaxPooling2D 14 | from keras.utils import np_utils 15 | from keras.wrappers.scikit_learn import KerasClassifier 16 | from sklearn.grid_search import GridSearchCV 17 | 18 | 19 | nb_classes = 10 20 | 21 | # input image dimensions 22 | img_rows, img_cols = 28, 28 23 | 24 | # load training data and do basic data normalization 25 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 26 | X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) 27 | X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols) 28 | X_train = X_train.astype('float32') 29 | X_test = X_test.astype('float32') 30 | X_train /= 255 31 | X_test /= 255 32 | 33 | # convert class vectors to binary class matrices 34 | y_train = np_utils.to_categorical(y_train, nb_classes) 35 | y_test = np_utils.to_categorical(y_test, nb_classes) 36 | 37 | def make_model(dense_layer_sizes, nb_filters, nb_conv, nb_pool): 38 | '''Creates model comprised of 2 convolutional layers followed by dense layers 39 | 40 | dense_layer_sizes: List of layer sizes. This list has one number for each layer 41 | nb_filters: Number of convolutional filters in each convolutional layer 42 | nb_conv: Convolutional kernel size 43 | nb_pool: Size of pooling area for max pooling 44 | ''' 45 | 46 | model = Sequential() 47 | 48 | model.add(Convolution2D(nb_filters, nb_conv, nb_conv, 49 | border_mode='valid', 50 | input_shape=(1, img_rows, img_cols))) 51 | model.add(Activation('relu')) 52 | model.add(Convolution2D(nb_filters, nb_conv, nb_conv)) 53 | model.add(Activation('relu')) 54 | model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) 55 | model.add(Dropout(0.25)) 56 | 57 | model.add(Flatten()) 58 | for layer_size in dense_layer_sizes: 59 | model.add(Dense(layer_size)) 60 | model.add(Activation('relu')) 61 | model.add(Dropout(0.5)) 62 | model.add(Dense(nb_classes)) 63 | model.add(Activation('softmax')) 64 | 65 | model.compile(loss='categorical_crossentropy', 66 | optimizer='adadelta', 67 | metrics=['accuracy']) 68 | 69 | return model 70 | 71 | dense_size_candidates = [[32], [64], [32, 32], [64, 64]] 72 | my_classifier = KerasClassifier(make_model, batch_size=32) 73 | validator = GridSearchCV(my_classifier, 74 | param_grid={'dense_layer_sizes': dense_size_candidates, 75 | # nb_epoch is avail for tuning even when not 76 | # an argument to model building function 77 | 'nb_epoch': [3, 6], 78 | 'nb_filters': [8], 79 | 'nb_conv': [3], 80 | 'nb_pool': [2]}, 81 | scoring='log_loss', 82 | n_jobs=1) 83 | validator.fit(X_train, y_train) 84 | 85 | print('The parameters of the best model are: ') 86 | print(validator.best_params_) 87 | 88 | # validator.best_estimator_ returns sklearn-wrapped version of best model. 89 | # validator.best_estimator_.model returns the (unwrapped) keras model 90 | best_model = validator.best_estimator_.model 91 | metric_names = best_model.metrics_names 92 | metric_values = best_model.evaluate(X_test, y_test) 93 | for metric, value in zip(metric_names, metric_values): 94 | print(metric, ': ', value) 95 | -------------------------------------------------------------------------------- /examples/mnist_transfer_cnn.py: -------------------------------------------------------------------------------- 1 | '''Transfer learning toy example: 2 | 3 | 1- Train a simple convnet on the MNIST dataset the first 5 digits [0..4]. 4 | 2- Freeze convolutional layers and fine-tune dense layers 5 | for the classification of digits [5..9]. 6 | 7 | Run on GPU: THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python mnist_transfer_cnn.py 8 | 9 | Get to 99.8% test accuracy after 5 epochs 10 | for the first five digits classifier 11 | and 99.2% for the last five digits after transfer + fine-tuning. 12 | ''' 13 | 14 | from __future__ import print_function 15 | import numpy as np 16 | import datetime 17 | 18 | np.random.seed(1337) # for reproducibility 19 | 20 | from keras.datasets import mnist 21 | from keras.models import Sequential 22 | from keras.layers import Dense, Dropout, Activation, Flatten 23 | from keras.layers import Convolution2D, MaxPooling2D 24 | from keras.utils import np_utils 25 | 26 | 27 | now = datetime.datetime.now 28 | 29 | batch_size = 128 30 | nb_classes = 5 31 | nb_epoch = 5 32 | 33 | # input image dimensions 34 | img_rows, img_cols = 28, 28 35 | # number of convolutional filters to use 36 | nb_filters = 32 37 | # size of pooling area for max pooling 38 | nb_pool = 2 39 | # convolution kernel size 40 | nb_conv = 3 41 | 42 | 43 | def train_model(model, train, test, nb_classes): 44 | X_train = train[0].reshape(train[0].shape[0], 1, img_rows, img_cols) 45 | X_test = test[0].reshape(test[0].shape[0], 1, img_rows, img_cols) 46 | X_train = X_train.astype('float32') 47 | X_test = X_test.astype('float32') 48 | X_train /= 255 49 | X_test /= 255 50 | print('X_train shape:', X_train.shape) 51 | print(X_train.shape[0], 'train samples') 52 | print(X_test.shape[0], 'test samples') 53 | 54 | # convert class vectors to binary class matrices 55 | Y_train = np_utils.to_categorical(train[1], nb_classes) 56 | Y_test = np_utils.to_categorical(test[1], nb_classes) 57 | 58 | model.compile(loss='categorical_crossentropy', 59 | optimizer='adadelta', 60 | metrics=['accuracy']) 61 | 62 | t = now() 63 | model.fit(X_train, Y_train, 64 | batch_size=batch_size, nb_epoch=nb_epoch, 65 | verbose=1, 66 | validation_data=(X_test, Y_test)) 67 | print('Training time: %s' % (now() - t)) 68 | score = model.evaluate(X_test, Y_test, verbose=0) 69 | print('Test score:', score[0]) 70 | print('Test accuracy:', score[1]) 71 | 72 | 73 | # the data, shuffled and split between train and test sets 74 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 75 | 76 | # create two datasets one with digits below 5 and one with 5 and above 77 | X_train_lt5 = X_train[y_train < 5] 78 | y_train_lt5 = y_train[y_train < 5] 79 | X_test_lt5 = X_test[y_test < 5] 80 | y_test_lt5 = y_test[y_test < 5] 81 | 82 | X_train_gte5 = X_train[y_train >= 5] 83 | y_train_gte5 = y_train[y_train >= 5] - 5 # make classes start at 0 for 84 | X_test_gte5 = X_test[y_test >= 5] # np_utils.to_categorical 85 | y_test_gte5 = y_test[y_test >= 5] - 5 86 | 87 | # define two groups of layers: feature (convolutions) and classification (dense) 88 | feature_layers = [ 89 | Convolution2D(nb_filters, nb_conv, nb_conv, 90 | border_mode='valid', 91 | input_shape=(1, img_rows, img_cols)), 92 | Activation('relu'), 93 | Convolution2D(nb_filters, nb_conv, nb_conv), 94 | Activation('relu'), 95 | MaxPooling2D(pool_size=(nb_pool, nb_pool)), 96 | Dropout(0.25), 97 | Flatten(), 98 | ] 99 | classification_layers = [ 100 | Dense(128), 101 | Activation('relu'), 102 | Dropout(0.5), 103 | Dense(nb_classes), 104 | Activation('softmax') 105 | ] 106 | 107 | # create complete model 108 | model = Sequential() 109 | for l in feature_layers + classification_layers: 110 | model.add(l) 111 | 112 | # train model for 5-digit classification [0..4] 113 | train_model(model, 114 | (X_train_lt5, y_train_lt5), 115 | (X_test_lt5, y_test_lt5), nb_classes) 116 | 117 | # freeze feature layers and rebuild model 118 | for l in feature_layers: 119 | l.trainable = False 120 | 121 | # transfer: train dense layers for new classification task [5..9] 122 | train_model(model, 123 | (X_train_gte5, y_train_gte5), 124 | (X_test_gte5, y_test_gte5), nb_classes) 125 | -------------------------------------------------------------------------------- /examples/reuters_mlp.py: -------------------------------------------------------------------------------- 1 | '''Trains and evaluate a simple MLP 2 | on the Reuters newswire topic classification task. 3 | ''' 4 | 5 | from __future__ import print_function 6 | import numpy as np 7 | np.random.seed(1337) # for reproducibility 8 | 9 | from keras.datasets import reuters 10 | from keras.models import Sequential 11 | from keras.layers import Dense, Dropout, Activation 12 | from keras.utils import np_utils 13 | from keras.preprocessing.text import Tokenizer 14 | 15 | max_words = 1000 16 | batch_size = 32 17 | nb_epoch = 5 18 | 19 | print('Loading data...') 20 | (X_train, y_train), (X_test, y_test) = reuters.load_data(nb_words=max_words, test_split=0.2) 21 | print(len(X_train), 'train sequences') 22 | print(len(X_test), 'test sequences') 23 | 24 | nb_classes = np.max(y_train)+1 25 | print(nb_classes, 'classes') 26 | 27 | print('Vectorizing sequence data...') 28 | tokenizer = Tokenizer(nb_words=max_words) 29 | X_train = tokenizer.sequences_to_matrix(X_train, mode='binary') 30 | X_test = tokenizer.sequences_to_matrix(X_test, mode='binary') 31 | print('X_train shape:', X_train.shape) 32 | print('X_test shape:', X_test.shape) 33 | 34 | print('Convert class vector to binary class matrix (for use with categorical_crossentropy)') 35 | Y_train = np_utils.to_categorical(y_train, nb_classes) 36 | Y_test = np_utils.to_categorical(y_test, nb_classes) 37 | print('Y_train shape:', Y_train.shape) 38 | print('Y_test shape:', Y_test.shape) 39 | 40 | print('Building model...') 41 | model = Sequential() 42 | model.add(Dense(512, input_shape=(max_words,))) 43 | model.add(Activation('relu')) 44 | model.add(Dropout(0.5)) 45 | model.add(Dense(nb_classes)) 46 | model.add(Activation('softmax')) 47 | 48 | model.compile(loss='categorical_crossentropy', 49 | optimizer='adam', 50 | metrics=['accuracy']) 51 | 52 | history = model.fit(X_train, Y_train, 53 | nb_epoch=nb_epoch, batch_size=batch_size, 54 | verbose=1, validation_split=0.1) 55 | score = model.evaluate(X_test, Y_test, 56 | batch_size=batch_size, verbose=1) 57 | print('Test score:', score[0]) 58 | print('Test accuracy:', score[1]) 59 | -------------------------------------------------------------------------------- /examples/stateful_lstm.py: -------------------------------------------------------------------------------- 1 | '''Example script showing how to use stateful RNNs 2 | to model long sequences efficiently. 3 | ''' 4 | from __future__ import print_function 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from keras.models import Sequential 8 | from keras.layers import Dense, LSTM 9 | 10 | 11 | # since we are using stateful rnn tsteps can be set to 1 12 | tsteps = 1 13 | batch_size = 25 14 | epochs = 25 15 | # number of elements ahead that are used to make the prediction 16 | lahead = 1 17 | 18 | 19 | def gen_cosine_amp(amp=100, period=1000, x0=0, xn=50000, step=1, k=0.0001): 20 | """Generates an absolute cosine time series with the amplitude 21 | exponentially decreasing 22 | 23 | Arguments: 24 | amp: amplitude of the cosine function 25 | period: period of the cosine function 26 | x0: initial x of the time series 27 | xn: final x of the time series 28 | step: step of the time series discretization 29 | k: exponential rate 30 | """ 31 | cos = np.zeros(((xn - x0) * step, 1, 1)) 32 | for i in range(len(cos)): 33 | idx = x0 + i * step 34 | cos[i, 0, 0] = amp * np.cos(2 * np.pi * idx / period) 35 | cos[i, 0, 0] = cos[i, 0, 0] * np.exp(-k * idx) 36 | return cos 37 | 38 | 39 | print('Generating Data') 40 | cos = gen_cosine_amp() 41 | print('Input shape:', cos.shape) 42 | 43 | expected_output = np.zeros((len(cos), 1)) 44 | for i in range(len(cos) - lahead): 45 | expected_output[i, 0] = np.mean(cos[i + 1:i + lahead + 1]) 46 | 47 | print('Output shape') 48 | print(expected_output.shape) 49 | 50 | print('Creating Model') 51 | model = Sequential() 52 | model.add(LSTM(50, 53 | batch_input_shape=(batch_size, tsteps, 1), 54 | return_sequences=True, 55 | stateful=True)) 56 | model.add(LSTM(50, 57 | batch_input_shape=(batch_size, tsteps, 1), 58 | return_sequences=False, 59 | stateful=True)) 60 | model.add(Dense(1)) 61 | model.compile(loss='mse', optimizer='rmsprop') 62 | 63 | print('Training') 64 | for i in range(epochs): 65 | print('Epoch', i, '/', epochs) 66 | model.fit(cos, 67 | expected_output, 68 | batch_size=batch_size, 69 | verbose=1, 70 | nb_epoch=1, 71 | shuffle=False) 72 | model.reset_states() 73 | 74 | print('Predicting') 75 | predicted_output = model.predict(cos, batch_size=batch_size) 76 | 77 | print('Plotting Results') 78 | plt.subplot(2, 1, 1) 79 | plt.plot(expected_output) 80 | plt.title('Expected') 81 | plt.subplot(2, 1, 2) 82 | plt.plot(predicted_output) 83 | plt.title('Predicted') 84 | plt.show() 85 | -------------------------------------------------------------------------------- /examples/variational_autoencoder.py: -------------------------------------------------------------------------------- 1 | '''This script demonstrates how to build a variational autoencoder with Keras. 2 | 3 | Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114 4 | ''' 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | from keras.layers import Input, Dense, Lambda 9 | from keras.models import Model 10 | from keras import backend as K 11 | from keras import objectives 12 | from keras.datasets import mnist 13 | 14 | batch_size = 100 15 | original_dim = 784 16 | latent_dim = 2 17 | intermediate_dim = 256 18 | nb_epoch = 50 19 | 20 | x = Input(batch_shape=(batch_size, original_dim)) 21 | h = Dense(intermediate_dim, activation='relu')(x) 22 | z_mean = Dense(latent_dim)(h) 23 | z_log_var = Dense(latent_dim)(h) 24 | 25 | 26 | def sampling(args): 27 | z_mean, z_log_var = args 28 | epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0.) 29 | return z_mean + K.exp(z_log_var / 2) * epsilon 30 | 31 | # note that "output_shape" isn't necessary with the TensorFlow backend 32 | z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var]) 33 | 34 | # we instantiate these layers separately so as to reuse them later 35 | decoder_h = Dense(intermediate_dim, activation='relu') 36 | decoder_mean = Dense(original_dim, activation='sigmoid') 37 | h_decoded = decoder_h(z) 38 | x_decoded_mean = decoder_mean(h_decoded) 39 | 40 | 41 | def vae_loss(x, x_decoded_mean): 42 | xent_loss = objectives.binary_crossentropy(x, x_decoded_mean) 43 | kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) 44 | return xent_loss + kl_loss 45 | 46 | vae = Model(x, x_decoded_mean) 47 | vae.compile(optimizer='rmsprop', loss=vae_loss) 48 | 49 | # train the VAE on MNIST digits 50 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 51 | 52 | x_train = x_train.astype('float32') / 255. 53 | x_test = x_test.astype('float32') / 255. 54 | x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) 55 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) 56 | 57 | vae.fit(x_train, x_train, 58 | shuffle=True, 59 | nb_epoch=nb_epoch, 60 | batch_size=batch_size, 61 | validation_data=(x_test, x_test)) 62 | 63 | # build a model to project inputs on the latent space 64 | encoder = Model(x, z_mean) 65 | 66 | # display a 2D plot of the digit classes in the latent space 67 | x_test_encoded = encoder.predict(x_test, batch_size=batch_size) 68 | plt.figure(figsize=(6, 6)) 69 | plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test) 70 | plt.colorbar() 71 | plt.show() 72 | 73 | # build a digit generator that can sample from the learned distribution 74 | decoder_input = Input(shape=(latent_dim,)) 75 | _h_decoded = decoder_h(decoder_input) 76 | _x_decoded_mean = decoder_mean(_h_decoded) 77 | generator = Model(decoder_input, _x_decoded_mean) 78 | 79 | # display a 2D manifold of the digits 80 | n = 15 # figure with 15x15 digits 81 | digit_size = 28 82 | figure = np.zeros((digit_size * n, digit_size * n)) 83 | # we will sample n points within [-15, 15] standard deviations 84 | grid_x = np.linspace(-15, 15, n) 85 | grid_y = np.linspace(-15, 15, n) 86 | 87 | for i, yi in enumerate(grid_x): 88 | for j, xi in enumerate(grid_y): 89 | z_sample = np.array([[xi, yi]]) 90 | x_decoded = generator.predict(z_sample) 91 | digit = x_decoded[0].reshape(digit_size, digit_size) 92 | figure[i * digit_size: (i + 1) * digit_size, 93 | j * digit_size: (j + 1) * digit_size] = digit 94 | 95 | plt.figure(figsize=(10, 10)) 96 | plt.imshow(figure) 97 | plt.show() 98 | -------------------------------------------------------------------------------- /examples/variational_autoencoder_deconv.py: -------------------------------------------------------------------------------- 1 | '''This script demonstrates how to build a variational autoencoder with Keras and deconvolution layers. 2 | 3 | Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114 4 | ''' 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | from keras.layers import Input, Dense, Lambda, Flatten, Reshape 9 | from keras.layers import Convolution2D, Deconvolution2D, MaxPooling2D 10 | from keras.models import Model 11 | from keras import backend as K 12 | from keras import objectives 13 | from keras.datasets import mnist 14 | 15 | # input image dimensions 16 | img_rows, img_cols, img_chns = 28, 28, 1 17 | # number of convolutional filters to use 18 | nb_filters = 32 19 | # convolution kernel size 20 | nb_conv = 3 21 | 22 | batch_size = 16 23 | original_dim = (img_chns, img_rows, img_cols) 24 | latent_dim = 2 25 | intermediate_dim = 128 26 | epsilon_std = 0.01 27 | nb_epoch = 5 28 | 29 | 30 | x = Input(batch_shape=(batch_size,) + original_dim) 31 | c = Convolution2D(nb_filters, nb_conv, nb_conv, border_mode='same', activation='relu')(x) 32 | f = Flatten()(c) 33 | h = Dense(intermediate_dim, activation='relu')(f) 34 | 35 | z_mean = Dense(latent_dim)(h) 36 | z_log_var = Dense(latent_dim)(h) 37 | 38 | 39 | def sampling(args): 40 | z_mean, z_log_var = args 41 | epsilon = K.random_normal(shape=(batch_size, latent_dim), 42 | mean=0., std=epsilon_std) 43 | return z_mean + K.exp(z_log_var) * epsilon 44 | 45 | # note that "output_shape" isn't necessary with the TensorFlow backend 46 | # so you could write `Lambda(sampling)([z_mean, z_log_var])` 47 | z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var]) 48 | 49 | # we instantiate these layers separately so as to reuse them later 50 | decoder_h = Dense(intermediate_dim, activation='relu') 51 | decoder_f = Dense(nb_filters*img_rows*img_cols, activation='relu') 52 | decoder_c = Reshape((nb_filters, img_rows, img_cols)) 53 | decoder_mean = Deconvolution2D(img_chns, nb_conv, nb_conv, 54 | (batch_size, img_chns, img_rows, img_cols), 55 | border_mode='same') 56 | 57 | h_decoded = decoder_h(z) 58 | f_decoded = decoder_f(h_decoded) 59 | c_decoded = decoder_c(f_decoded) 60 | x_decoded_mean = decoder_mean(c_decoded) 61 | 62 | 63 | def vae_loss(x, x_decoded_mean): 64 | # NOTE: binary_crossentropy expects a batch_size by dim for x and x_decoded_mean, so we MUST flatten these! 65 | x = K.flatten(x) 66 | x_decoded_mean = K.flatten(x_decoded_mean) 67 | xent_loss = objectives.binary_crossentropy(x, x_decoded_mean) 68 | kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) 69 | return xent_loss + kl_loss 70 | 71 | vae = Model(x, x_decoded_mean) 72 | vae.compile(optimizer='rmsprop', loss=vae_loss) 73 | vae.summary() 74 | 75 | # train the VAE on MNIST digits 76 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 77 | 78 | x_train = x_train.astype('float32')[:, None, :, :] / 255. 79 | x_test = x_test.astype('float32')[:, None, :, :] / 255. 80 | 81 | vae.fit(x_train, x_train, 82 | shuffle=True, 83 | nb_epoch=nb_epoch, 84 | batch_size=batch_size, 85 | validation_data=(x_test, x_test)) 86 | 87 | 88 | # build a model to project inputs on the latent space 89 | encoder = Model(x, z_mean) 90 | 91 | # display a 2D plot of the digit classes in the latent space 92 | x_test_encoded = encoder.predict(x_test, batch_size=batch_size) 93 | plt.figure(figsize=(6, 6)) 94 | plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test) 95 | plt.colorbar() 96 | plt.show() 97 | 98 | # build a digit generator that can sample from the learned distribution 99 | decoder_input = Input(shape=(latent_dim,)) 100 | _h_decoded = decoder_h(decoder_input) 101 | _f_decoded = decoder_f(_h_decoded) 102 | _c_decoded = decoder_c(_f_decoded) 103 | _x_decoded_mean = decoder_mean(_c_decoded) 104 | generator = Model(decoder_input, _x_decoded_mean) 105 | 106 | # display a 2D manifold of the digits 107 | n = 15 # figure with 15x15 digits 108 | digit_size = 28 109 | figure = np.zeros((digit_size * n, digit_size * n)) 110 | # we will sample n points within [-15, 15] standard deviations 111 | grid_x = np.linspace(-15, 15, n) 112 | grid_y = np.linspace(-15, 15, n) 113 | 114 | for i, yi in enumerate(grid_x): 115 | for j, xi in enumerate(grid_y): 116 | z_sample = np.array([[xi, yi]]) 117 | x_decoded = generator.predict(z_sample) 118 | digit = x_decoded[0].reshape(digit_size, digit_size) 119 | figure[i * digit_size: (i + 1) * digit_size, 120 | j * digit_size: (j + 1) * digit_size] = digit 121 | 122 | plt.figure(figsize=(10, 10)) 123 | plt.imshow(figure) 124 | plt.show() 125 | -------------------------------------------------------------------------------- /keras/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from . import backend 3 | from . import datasets 4 | from . import engine 5 | from . import layers 6 | from . import preprocessing 7 | from . import utils 8 | from . import wrappers 9 | from . import callbacks 10 | from . import constraints 11 | from . import initializations 12 | from . import metrics 13 | from . import models 14 | from . import objectives 15 | from . import optimizers 16 | from . import regularizers 17 | 18 | __version__ = '1.0.6' 19 | -------------------------------------------------------------------------------- /keras/activations.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from . import backend as K 3 | 4 | 5 | def softmax(x): 6 | ndim = K.ndim(x) 7 | if ndim == 2: 8 | return K.softmax(x) 9 | elif ndim == 3: 10 | e = K.exp(x - K.max(x, axis=-1, keepdims=True)) 11 | s = K.sum(e, axis=-1, keepdims=True) 12 | return e / s 13 | else: 14 | raise Exception('Cannot apply softmax to a tensor that is not 2D or 3D. ' + 15 | 'Here, ndim=' + str(ndim)) 16 | 17 | 18 | def softplus(x): 19 | return K.softplus(x) 20 | 21 | 22 | def softsign(x): 23 | return K.softsign(x) 24 | 25 | 26 | def relu(x, alpha=0., max_value=None): 27 | return K.relu(x, alpha=alpha, max_value=max_value) 28 | 29 | 30 | def tanh(x): 31 | return K.tanh(x) 32 | 33 | 34 | def sigmoid(x): 35 | return K.sigmoid(x) 36 | 37 | 38 | def hard_sigmoid(x): 39 | return K.hard_sigmoid(x) 40 | 41 | 42 | def linear(x): 43 | ''' 44 | The function returns the variable that is passed in, so all types work. 45 | ''' 46 | return x 47 | 48 | 49 | from .utils.generic_utils import get_from_module 50 | def get(identifier): 51 | if identifier is None: 52 | return linear 53 | return get_from_module(identifier, globals(), 'activation function') 54 | -------------------------------------------------------------------------------- /keras/backend/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import os 4 | import json 5 | import sys 6 | from .common import epsilon 7 | from .common import floatx 8 | from .common import set_epsilon 9 | from .common import set_floatx 10 | from .common import get_uid 11 | from .common import cast_to_floatx 12 | from .common import image_dim_ordering 13 | from .common import set_image_dim_ordering 14 | 15 | _keras_base_dir = os.path.expanduser('~') 16 | if not os.access(_keras_base_dir, os.W_OK): 17 | _keras_base_dir = '/tmp' 18 | 19 | _keras_dir = os.path.join(_keras_base_dir, '.keras') 20 | if not os.path.exists(_keras_dir): 21 | os.makedirs(_keras_dir) 22 | 23 | _BACKEND = 'theano' 24 | _config_path = os.path.expanduser(os.path.join(_keras_dir, 'keras.json')) 25 | if os.path.exists(_config_path): 26 | _config = json.load(open(_config_path)) 27 | _floatx = _config.get('floatx', floatx()) 28 | assert _floatx in {'float16', 'float32', 'float64'} 29 | _epsilon = _config.get('epsilon', epsilon()) 30 | assert type(_epsilon) == float 31 | _backend = _config.get('backend', _BACKEND) 32 | assert _backend in {'theano', 'tensorflow'} 33 | _image_dim_ordering = _config.get('image_dim_ordering', image_dim_ordering()) 34 | assert _image_dim_ordering in {'tf', 'th'} 35 | 36 | set_floatx(_floatx) 37 | set_epsilon(_epsilon) 38 | set_image_dim_ordering(_image_dim_ordering) 39 | _BACKEND = _backend 40 | 41 | # save config file 42 | _config = {'floatx': floatx(), 43 | 'epsilon': epsilon(), 44 | 'backend': _BACKEND, 45 | 'image_dim_ordering': image_dim_ordering()} 46 | with open(_config_path, 'w') as f: 47 | f.write(json.dumps(_config, indent=4)) 48 | 49 | if 'KERAS_BACKEND' in os.environ: 50 | _backend = os.environ['KERAS_BACKEND'] 51 | assert _backend in {'theano', 'tensorflow'} 52 | _BACKEND = _backend 53 | 54 | # import backend 55 | if _BACKEND == 'theano': 56 | sys.stderr.write('Using Theano backend.\n') 57 | from .theano_backend import * 58 | elif _BACKEND == 'tensorflow': 59 | sys.stderr.write('Using TensorFlow backend.\n') 60 | from .tensorflow_backend import * 61 | else: 62 | raise Exception('Unknown backend: ' + str(_BACKEND)) 63 | -------------------------------------------------------------------------------- /keras/backend/common.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from collections import defaultdict 4 | 5 | # the type of float to use throughout the session. 6 | _FLOATX = 'float32' 7 | _EPSILON = 10e-8 8 | _UID_PREFIXES = defaultdict(int) 9 | _IMAGE_DIM_ORDERING = 'th' 10 | 11 | 12 | def epsilon(): 13 | '''Returns the value of the fuzz 14 | factor used in numeric expressions. 15 | ''' 16 | return _EPSILON 17 | 18 | 19 | def set_epsilon(e): 20 | '''Sets the value of the fuzz 21 | factor used in numeric expressions. 22 | ''' 23 | global _EPSILON 24 | _EPSILON = e 25 | 26 | 27 | def floatx(): 28 | '''Returns the default float type, as a string 29 | (e.g. 'float16', 'float32', 'float64'). 30 | ''' 31 | return _FLOATX 32 | 33 | 34 | def set_floatx(floatx): 35 | global _FLOATX 36 | if floatx not in {'float16', 'float32', 'float64'}: 37 | raise Exception('Unknown floatx type: ' + str(floatx)) 38 | _FLOATX = str(floatx) 39 | 40 | 41 | def cast_to_floatx(x): 42 | '''Cast a Numpy array to floatx. 43 | ''' 44 | return np.asarray(x, dtype=_FLOATX) 45 | 46 | 47 | def image_dim_ordering(): 48 | '''Returns the image dimension ordering 49 | convention ('th' or 'tf'). 50 | ''' 51 | return _IMAGE_DIM_ORDERING 52 | 53 | 54 | def set_image_dim_ordering(dim_ordering): 55 | '''Sets the value of the image dimension 56 | ordering convention ('th' or 'tf'). 57 | ''' 58 | global _IMAGE_DIM_ORDERING 59 | if dim_ordering not in {'tf', 'th'}: 60 | raise Exception('Unknown dim_ordering:', dim_ordering) 61 | _IMAGE_DIM_ORDERING = str(dim_ordering) 62 | 63 | 64 | def get_uid(prefix=''): 65 | _UID_PREFIXES[prefix] += 1 66 | return _UID_PREFIXES[prefix] 67 | 68 | 69 | def reset_uids(): 70 | global _UID_PREFIXES 71 | _UID_PREFIXES = defaultdict(int) 72 | -------------------------------------------------------------------------------- /keras/constraints.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from . import backend as K 3 | 4 | 5 | class Constraint(object): 6 | def __call__(self, p): 7 | return p 8 | 9 | def get_config(self): 10 | return {'name': self.__class__.__name__} 11 | 12 | 13 | class MaxNorm(Constraint): 14 | '''Constrain the weights incident to each hidden unit to have a norm less than or equal to a desired value. 15 | 16 | # Arguments 17 | m: the maximum norm for the incoming weights. 18 | axis: integer, axis along which to calculate weight norms. For instance, 19 | in a `Dense` layer the weight matrix has shape (input_dim, output_dim), 20 | set `axis` to `0` to constrain each weight vector of length (input_dim). 21 | In a `MaxoutDense` layer the weight tensor has shape (nb_feature, input_dim, output_dim), 22 | set `axis` to `1` to constrain each weight vector of length (input_dim), 23 | i.e. constrain the filters incident to the `max` operation. 24 | In a `Convolution2D` layer with the Theano backend, the weight tensor 25 | has shape (nb_filter, stack_size, nb_row, nb_col), set `axis` to `[1,2,3]` 26 | to constrain the weights of each filter tensor of size (stack_size, nb_row, nb_col). 27 | In a `Convolution2D` layer with the TensorFlow backend, the weight tensor 28 | has shape (nb_row, nb_col, stack_size, nb_filter), set `axis` to `[0,1,2]` 29 | to constrain the weights of each filter tensor of size (nb_row, nb_col, stack_size). 30 | 31 | # References 32 | - [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf) 33 | ''' 34 | def __init__(self, m=2, axis=0): 35 | self.m = m 36 | self.axis = axis 37 | 38 | def __call__(self, p): 39 | norms = K.sqrt(K.sum(K.square(p), axis=self.axis, keepdims=True)) 40 | desired = K.clip(norms, 0, self.m) 41 | p = p * (desired / (K.epsilon() + norms)) 42 | return p 43 | 44 | def get_config(self): 45 | return {'name': self.__class__.__name__, 46 | 'm': self.m, 47 | 'axis': self.axis} 48 | 49 | 50 | class NonNeg(Constraint): 51 | '''Constrain the weights to be non-negative. 52 | ''' 53 | def __call__(self, p): 54 | p *= K.cast(p >= 0., K.floatx()) 55 | return p 56 | 57 | 58 | class UnitNorm(Constraint): 59 | '''Constrain the weights incident to each hidden unit to have unit norm. 60 | 61 | # Arguments 62 | axis: integer, axis along which to calculate weight norms. For instance, 63 | in a `Dense` layer the weight matrix has shape (input_dim, output_dim), 64 | set `axis` to `0` to constrain each weight vector of length (input_dim). 65 | In a `MaxoutDense` layer the weight tensor has shape (nb_feature, input_dim, output_dim), 66 | set `axis` to `1` to constrain each weight vector of length (input_dim), 67 | i.e. constrain the filters incident to the `max` operation. 68 | In a `Convolution2D` layer with the Theano backend, the weight tensor 69 | has shape (nb_filter, stack_size, nb_row, nb_col), set `axis` to `[1,2,3]` 70 | to constrain the weights of each filter tensor of size (stack_size, nb_row, nb_col). 71 | In a `Convolution2D` layer with the TensorFlow backend, the weight tensor 72 | has shape (nb_row, nb_col, stack_size, nb_filter), set `axis` to `[0,1,2]` 73 | to constrain the weights of each filter tensor of size (nb_row, nb_col, stack_size). 74 | ''' 75 | def __init__(self, axis=0): 76 | self.axis = axis 77 | 78 | def __call__(self, p): 79 | return p / (K.epsilon() + K.sqrt(K.sum(K.square(p), axis=self.axis, keepdims=True))) 80 | 81 | def get_config(self): 82 | return {'name': self.__class__.__name__, 83 | 'axis': self.axis} 84 | 85 | 86 | maxnorm = MaxNorm 87 | nonneg = NonNeg 88 | unitnorm = UnitNorm 89 | 90 | from .utils.generic_utils import get_from_module 91 | def get(identifier, kwargs=None): 92 | return get_from_module(identifier, globals(), 'constraint', 93 | instantiate=True, kwargs=kwargs) 94 | -------------------------------------------------------------------------------- /keras/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oeway/keras/833aa73b4959a7c14096083a03fc035595842cc7/keras/datasets/__init__.py -------------------------------------------------------------------------------- /keras/datasets/cifar.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | import sys 4 | from six.moves import cPickle 5 | 6 | 7 | def load_batch(fpath, label_key='labels'): 8 | f = open(fpath, 'rb') 9 | if sys.version_info < (3,): 10 | d = cPickle.load(f) 11 | else: 12 | d = cPickle.load(f, encoding="bytes") 13 | # decode utf8 14 | for k, v in d.items(): 15 | del(d[k]) 16 | d[k.decode("utf8")] = v 17 | f.close() 18 | data = d["data"] 19 | labels = d[label_key] 20 | 21 | data = data.reshape(data.shape[0], 3, 32, 32) 22 | return data, labels 23 | -------------------------------------------------------------------------------- /keras/datasets/cifar10.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .cifar import load_batch 3 | from ..utils.data_utils import get_file 4 | import numpy as np 5 | import os 6 | 7 | 8 | def load_data(): 9 | dirname = "cifar-10-batches-py" 10 | origin = "http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" 11 | path = get_file(dirname, origin=origin, untar=True) 12 | 13 | nb_train_samples = 50000 14 | 15 | X_train = np.zeros((nb_train_samples, 3, 32, 32), dtype="uint8") 16 | y_train = np.zeros((nb_train_samples,), dtype="uint8") 17 | 18 | for i in range(1, 6): 19 | fpath = os.path.join(path, 'data_batch_' + str(i)) 20 | data, labels = load_batch(fpath) 21 | X_train[(i-1)*10000:i*10000, :, :, :] = data 22 | y_train[(i-1)*10000:i*10000] = labels 23 | 24 | fpath = os.path.join(path, 'test_batch') 25 | X_test, y_test = load_batch(fpath) 26 | 27 | y_train = np.reshape(y_train, (len(y_train), 1)) 28 | y_test = np.reshape(y_test, (len(y_test), 1)) 29 | 30 | return (X_train, y_train), (X_test, y_test) 31 | -------------------------------------------------------------------------------- /keras/datasets/cifar100.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .cifar import load_batch 3 | from ..utils.data_utils import get_file 4 | import numpy as np 5 | import os 6 | 7 | 8 | def load_data(label_mode='fine'): 9 | if label_mode not in ['fine', 'coarse']: 10 | raise Exception('label_mode must be one of "fine" "coarse".') 11 | 12 | dirname = "cifar-100-python" 13 | origin = "http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" 14 | path = get_file(dirname, origin=origin, untar=True) 15 | 16 | nb_test_samples = 10000 17 | nb_train_samples = 50000 18 | 19 | fpath = os.path.join(path, 'train') 20 | X_train, y_train = load_batch(fpath, label_key=label_mode+'_labels') 21 | 22 | fpath = os.path.join(path, 'test') 23 | X_test, y_test = load_batch(fpath, label_key=label_mode+'_labels') 24 | 25 | y_train = np.reshape(y_train, (len(y_train), 1)) 26 | y_test = np.reshape(y_test, (len(y_test), 1)) 27 | 28 | return (X_train, y_train), (X_test, y_test) 29 | -------------------------------------------------------------------------------- /keras/datasets/data_utils.py: -------------------------------------------------------------------------------- 1 | from ..utils.data_utils import * 2 | import warnings 3 | 4 | warnings.warn('data_utils has been moved to keras.utils.data_utils.') 5 | -------------------------------------------------------------------------------- /keras/datasets/imdb.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from six.moves import cPickle 3 | import gzip 4 | from ..utils.data_utils import get_file 5 | from six.moves import zip 6 | import numpy as np 7 | import sys 8 | 9 | 10 | def load_data(path='imdb_full.pkl', nb_words=None, skip_top=0, 11 | maxlen=None, seed=113, 12 | start_char=1, oov_char=2, index_from=3): 13 | ''' 14 | # Arguments 15 | path: where to store the data (in `/.keras/dataset`) 16 | nb_words: max number of words to include. Words are ranked 17 | by how often they occur (in the training set) and only 18 | the most frequent words are kept 19 | skip_top: skip the top N most frequently occuring words 20 | (which may not be informative). 21 | maxlen: truncate sequences after this length. 22 | seed: random seed for sample shuffling. 23 | start_char: The start of a sequence will be marked with this character. 24 | Set to 1 because 0 is usually the padding character. 25 | oov_char: words that were cut out because of the `nb_words` 26 | or `skip_top` limit will be replaced with this character. 27 | index_from: index actual words with this index and higher. 28 | 29 | Note that the 'out of vocabulary' character is only used for 30 | words that were present in the training set but are not included 31 | because they're not making the `nb_words` cut here. 32 | Words that were not seen in the trining set but are in the test set 33 | have simply been skipped. 34 | ''' 35 | path = get_file(path, 36 | origin='https://s3.amazonaws.com/text-datasets/imdb_full.pkl', 37 | md5_hash='d091312047c43cf9e4e38fef92437263') 38 | 39 | if path.endswith('.gz'): 40 | f = gzip.open(path, 'rb') 41 | else: 42 | f = open(path, 'rb') 43 | 44 | (x_train, labels_train), (x_test, labels_test) = cPickle.load(f) 45 | f.close() 46 | 47 | np.random.seed(seed) 48 | np.random.shuffle(x_train) 49 | np.random.seed(seed) 50 | np.random.shuffle(labels_train) 51 | 52 | np.random.seed(seed * 2) 53 | np.random.shuffle(x_test) 54 | np.random.seed(seed * 2) 55 | np.random.shuffle(labels_test) 56 | 57 | X = x_train + x_test 58 | labels = labels_train + labels_test 59 | 60 | if start_char is not None: 61 | X = [[start_char] + [w + index_from for w in x] for x in X] 62 | elif index_from: 63 | X = [[w + index_from for w in x] for x in X] 64 | 65 | if maxlen: 66 | new_X = [] 67 | new_labels = [] 68 | for x, y in zip(X, labels): 69 | if len(x) < maxlen: 70 | new_X.append(x) 71 | new_labels.append(y) 72 | X = new_X 73 | labels = new_labels 74 | if not X: 75 | raise Exception('After filtering for sequences shorter than maxlen=' + 76 | str(maxlen) + ', no sequence was kept. ' 77 | 'Increase maxlen.') 78 | if not nb_words: 79 | nb_words = max([max(x) for x in X]) 80 | 81 | # by convention, use 2 as OOV word 82 | # reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV) 83 | if oov_char is not None: 84 | X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X] 85 | else: 86 | nX = [] 87 | for x in X: 88 | nx = [] 89 | for w in x: 90 | if (w >= nb_words or w < skip_top): 91 | nx.append(w) 92 | nX.append(nx) 93 | X = nX 94 | 95 | X_train = np.array(X[:len(x_train)]) 96 | y_train = np.array(labels[:len(x_train)]) 97 | 98 | X_test = np.array(X[len(x_train):]) 99 | y_test = np.array(labels[len(x_train):]) 100 | 101 | return (X_train, y_train), (X_test, y_test) 102 | 103 | 104 | def get_word_index(path='imdb_word_index.pkl'): 105 | path = get_file(path, 106 | origin='https://s3.amazonaws.com/text-datasets/imdb_word_index.pkl', 107 | md5_hash='72d94b01291be4ff843198d3b0e1e4d7') 108 | f = open(path, 'rb') 109 | 110 | if sys.version_info < (3,): 111 | data = cPickle.load(f) 112 | else: 113 | data = cPickle.load(f, encoding='latin1') 114 | 115 | f.close() 116 | return data 117 | -------------------------------------------------------------------------------- /keras/datasets/mnist.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import gzip 3 | from ..utils.data_utils import get_file 4 | from six.moves import cPickle 5 | import sys 6 | 7 | 8 | def load_data(path="mnist.pkl.gz"): 9 | path = get_file(path, origin="https://s3.amazonaws.com/img-datasets/mnist.pkl.gz") 10 | 11 | if path.endswith(".gz"): 12 | f = gzip.open(path, 'rb') 13 | else: 14 | f = open(path, 'rb') 15 | 16 | if sys.version_info < (3,): 17 | data = cPickle.load(f) 18 | else: 19 | data = cPickle.load(f, encoding="bytes") 20 | 21 | f.close() 22 | return data # (X_train, y_train), (X_test, y_test) 23 | -------------------------------------------------------------------------------- /keras/datasets/reuters.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from ..utils.data_utils import get_file 4 | from six.moves import cPickle 5 | from six.moves import zip 6 | import numpy as np 7 | import sys 8 | 9 | 10 | def load_data(path='reuters.pkl', nb_words=None, skip_top=0, 11 | maxlen=None, test_split=0.2, seed=113, 12 | start_char=1, oov_char=2, index_from=3): 13 | 14 | path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/reuters.pkl') 15 | f = open(path, 'rb') 16 | X, labels = cPickle.load(f) 17 | f.close() 18 | 19 | np.random.seed(seed) 20 | np.random.shuffle(X) 21 | np.random.seed(seed) 22 | np.random.shuffle(labels) 23 | 24 | if start_char is not None: 25 | X = [[start_char] + [w + index_from for w in x] for x in X] 26 | elif index_from: 27 | X = [[w + index_from for w in x] for x in X] 28 | 29 | if maxlen: 30 | new_X = [] 31 | new_labels = [] 32 | for x, y in zip(X, labels): 33 | if len(x) < maxlen: 34 | new_X.append(x) 35 | new_labels.append(y) 36 | X = new_X 37 | labels = new_labels 38 | 39 | if not nb_words: 40 | nb_words = max([max(x) for x in X]) 41 | 42 | # by convention, use 2 as OOV word 43 | # reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV) 44 | if oov_char is not None: 45 | X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X] 46 | else: 47 | nX = [] 48 | for x in X: 49 | nx = [] 50 | for w in x: 51 | if (w >= nb_words or w < skip_top): 52 | nx.append(w) 53 | nX.append(nx) 54 | X = nX 55 | 56 | X_train = X[:int(len(X) * (1 - test_split))] 57 | y_train = labels[:int(len(X) * (1 - test_split))] 58 | 59 | X_test = X[int(len(X) * (1 - test_split)):] 60 | y_test = labels[int(len(X) * (1 - test_split)):] 61 | 62 | return (X_train, y_train), (X_test, y_test) 63 | 64 | 65 | def get_word_index(path='reuters_word_index.pkl'): 66 | path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/reuters_word_index.pkl') 67 | f = open(path, 'rb') 68 | 69 | if sys.version_info < (3,): 70 | data = cPickle.load(f) 71 | else: 72 | data = cPickle.load(f, encoding='latin1') 73 | 74 | f.close() 75 | return data 76 | -------------------------------------------------------------------------------- /keras/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # note: topology.Node is an internal class, 2 | # it isn't meant to be used by Keras users. 3 | from .topology import InputSpec 4 | from .topology import Input 5 | from .topology import InputLayer 6 | from .topology import Layer 7 | from .topology import Merge 8 | from .topology import merge 9 | from .topology import get_source_inputs 10 | from .training import Model 11 | -------------------------------------------------------------------------------- /keras/initializations.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | from . import backend as K 4 | 5 | 6 | def get_fans(shape, dim_ordering='th'): 7 | if len(shape) == 2: 8 | fan_in = shape[0] 9 | fan_out = shape[1] 10 | elif len(shape) == 4 or len(shape) == 5: 11 | # assuming convolution kernels (2D or 3D). 12 | # TH kernel shape: (depth, input_depth, ...) 13 | # TF kernel shape: (..., input_depth, depth) 14 | if dim_ordering == 'th': 15 | receptive_field_size = np.prod(shape[2:]) 16 | fan_in = shape[1] * receptive_field_size 17 | fan_out = shape[0] * receptive_field_size 18 | elif dim_ordering == 'tf': 19 | receptive_field_size = np.prod(shape[:2]) 20 | fan_in = shape[-2] * receptive_field_size 21 | fan_out = shape[-1] * receptive_field_size 22 | else: 23 | raise Exception('Invalid dim_ordering: ' + dim_ordering) 24 | else: 25 | # no specific assumptions 26 | fan_in = np.sqrt(np.prod(shape)) 27 | fan_out = np.sqrt(np.prod(shape)) 28 | return fan_in, fan_out 29 | 30 | 31 | def uniform(shape, scale=0.05, name=None): 32 | return K.random_uniform_variable(shape, -scale, scale, name=name) 33 | 34 | 35 | def normal(shape, scale=0.05, name=None): 36 | return K.random_normal_variable(shape, 0.0, scale, name=name) 37 | 38 | 39 | def lecun_uniform(shape, name=None, dim_ordering='th'): 40 | ''' Reference: LeCun 98, Efficient Backprop 41 | http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf 42 | ''' 43 | fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) 44 | scale = np.sqrt(3. / fan_in) 45 | return uniform(shape, scale, name=name) 46 | 47 | 48 | def glorot_normal(shape, name=None, dim_ordering='th'): 49 | ''' Reference: Glorot & Bengio, AISTATS 2010 50 | ''' 51 | fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) 52 | s = np.sqrt(2. / (fan_in + fan_out)) 53 | return normal(shape, s, name=name) 54 | 55 | 56 | def glorot_uniform(shape, name=None, dim_ordering='th'): 57 | fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) 58 | s = np.sqrt(6. / (fan_in + fan_out)) 59 | return uniform(shape, s, name=name) 60 | 61 | 62 | def he_normal(shape, name=None, dim_ordering='th'): 63 | ''' Reference: He et al., http://arxiv.org/abs/1502.01852 64 | ''' 65 | fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) 66 | s = np.sqrt(2. / fan_in) 67 | return normal(shape, s, name=name) 68 | 69 | 70 | def he_uniform(shape, name=None, dim_ordering='th'): 71 | fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) 72 | s = np.sqrt(6. / fan_in) 73 | return uniform(shape, s, name=name) 74 | 75 | 76 | def orthogonal(shape, scale=1.1, name=None): 77 | ''' From Lasagne. Reference: Saxe et al., http://arxiv.org/abs/1312.6120 78 | ''' 79 | flat_shape = (shape[0], np.prod(shape[1:])) 80 | a = np.random.normal(0.0, 1.0, flat_shape) 81 | u, _, v = np.linalg.svd(a, full_matrices=False) 82 | # pick the one with the correct shape 83 | q = u if u.shape == flat_shape else v 84 | q = q.reshape(shape) 85 | return K.variable(scale * q[:shape[0], :shape[1]], name=name) 86 | 87 | 88 | def identity(shape, scale=1, name=None): 89 | if len(shape) != 2 or shape[0] != shape[1]: 90 | raise Exception('Identity matrix initialization can only be used ' 91 | 'for 2D square matrices.') 92 | else: 93 | return K.variable(scale * np.identity(shape[0]), name=name) 94 | 95 | 96 | def zero(shape, name=None): 97 | return K.zeros(shape, name=name) 98 | 99 | 100 | def one(shape, name=None): 101 | return K.ones(shape, name=name) 102 | 103 | 104 | from .utils.generic_utils import get_from_module 105 | def get(identifier, **kwargs): 106 | return get_from_module(identifier, globals(), 107 | 'initialization', kwargs=kwargs) 108 | -------------------------------------------------------------------------------- /keras/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from ..engine import Layer, Input, InputLayer, Merge, merge, InputSpec 3 | from .core import * 4 | from .convolutional import * 5 | from .pooling import * 6 | from .local import * 7 | from .recurrent import * 8 | from .normalization import * 9 | from .embeddings import * 10 | from .noise import * 11 | from .advanced_activations import * 12 | from .wrappers import * 13 | -------------------------------------------------------------------------------- /keras/layers/noise.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from ..engine import Layer 3 | from .. import backend as K 4 | import numpy as np 5 | 6 | 7 | class GaussianNoise(Layer): 8 | '''Apply to the input an additive zero-centered Gaussian noise with 9 | standard deviation `sigma`. This is useful to mitigate overfitting 10 | (you could see it as a kind of random data augmentation). 11 | Gaussian Noise (GS) is a natural choice as corruption process 12 | for real valued inputs. 13 | 14 | As it is a regularization layer, it is only active at training time. 15 | 16 | # Arguments 17 | sigma: float, standard deviation of the noise distribution. 18 | 19 | # Input shape 20 | Arbitrary. Use the keyword argument `input_shape` 21 | (tuple of integers, does not include the samples axis) 22 | when using this layer as the first layer in a model. 23 | 24 | # Output shape 25 | Same shape as input. 26 | ''' 27 | def __init__(self, sigma, **kwargs): 28 | self.supports_masking = True 29 | self.sigma = sigma 30 | self.uses_learning_phase = True 31 | super(GaussianNoise, self).__init__(**kwargs) 32 | 33 | def call(self, x, mask=None): 34 | noise_x = x + K.random_normal(shape=K.shape(x), 35 | mean=0., 36 | std=self.sigma) 37 | return K.in_train_phase(noise_x, x) 38 | 39 | def get_config(self): 40 | config = {'sigma': self.sigma} 41 | base_config = super(GaussianNoise, self).get_config() 42 | return dict(list(base_config.items()) + list(config.items())) 43 | 44 | 45 | class GaussianDropout(Layer): 46 | '''Apply to the input an multiplicative one-centered Gaussian noise 47 | with standard deviation `sqrt(p/(1-p))`. 48 | 49 | As it is a regularization layer, it is only active at training time. 50 | 51 | # Arguments 52 | p: float, drop probability (as with `Dropout`). 53 | 54 | # Input shape 55 | Arbitrary. Use the keyword argument `input_shape` 56 | (tuple of integers, does not include the samples axis) 57 | when using this layer as the first layer in a model. 58 | 59 | # Output shape 60 | Same shape as input. 61 | 62 | # References 63 | [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf) 64 | ''' 65 | def __init__(self, p, **kwargs): 66 | self.supports_masking = True 67 | self.p = p 68 | if 0 < p < 1: 69 | self.uses_learning_phase = True 70 | super(GaussianDropout, self).__init__(**kwargs) 71 | 72 | def call(self, x, mask=None): 73 | if 0 < self.p < 1: 74 | noise_x = x * K.random_normal(shape=K.shape(x), mean=1.0, 75 | std=np.sqrt(self.p / (1.0 - self.p))) 76 | return K.in_train_phase(noise_x, x) 77 | return x 78 | 79 | def get_config(self): 80 | config = {'p': self.p} 81 | base_config = super(GaussianDropout, self).get_config() 82 | return dict(list(base_config.items()) + list(config.items())) 83 | -------------------------------------------------------------------------------- /keras/legacy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oeway/keras/833aa73b4959a7c14096083a03fc035595842cc7/keras/legacy/__init__.py -------------------------------------------------------------------------------- /keras/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from . import backend as K 3 | 4 | 5 | def binary_accuracy(y_true, y_pred): 6 | return K.mean(K.equal(y_true, K.round(y_pred))) 7 | 8 | 9 | def categorical_accuracy(y_true, y_pred): 10 | return K.mean(K.equal(K.argmax(y_true, axis=-1), 11 | K.argmax(y_pred, axis=-1))) 12 | 13 | 14 | def sparse_categorical_accuracy(y_true, y_pred): 15 | return K.mean(K.equal(K.max(y_true, axis=-1), 16 | K.cast(K.argmax(y_pred, axis=-1), K.floatx()))) 17 | 18 | 19 | def mean_squared_error(y_true, y_pred): 20 | return K.mean(K.square(y_pred - y_true)) 21 | 22 | 23 | def mean_absolute_error(y_true, y_pred): 24 | return K.mean(K.abs(y_pred - y_true)) 25 | 26 | 27 | def mean_absolute_percentage_error(y_true, y_pred): 28 | diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), np.inf)) 29 | return 100. * K.mean(diff) 30 | 31 | 32 | def mean_squared_logarithmic_error(y_true, y_pred): 33 | first_log = K.log(K.clip(y_pred, K.epsilon(), np.inf) + 1.) 34 | second_log = K.log(K.clip(y_true, K.epsilon(), np.inf) + 1.) 35 | return K.mean(K.square(first_log - second_log)) 36 | 37 | 38 | def squared_hinge(y_true, y_pred): 39 | return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.))) 40 | 41 | 42 | def hinge(y_true, y_pred): 43 | return K.mean(K.maximum(1. - y_true * y_pred, 0.)) 44 | 45 | 46 | def categorical_crossentropy(y_true, y_pred): 47 | '''Expects a binary class matrix instead of a vector of scalar classes. 48 | ''' 49 | return K.mean(K.categorical_crossentropy(y_pred, y_true)) 50 | 51 | 52 | def sparse_categorical_crossentropy(y_true, y_pred): 53 | '''expects an array of integer classes. 54 | Note: labels shape must have the same number of dimensions as output shape. 55 | If you get a shape error, add a length-1 dimension to labels. 56 | ''' 57 | return K.mean(K.sparse_categorical_crossentropy(y_pred, y_true)) 58 | 59 | 60 | def binary_crossentropy(y_true, y_pred): 61 | return K.mean(K.binary_crossentropy(y_pred, y_true)) 62 | 63 | 64 | def poisson(y_true, y_pred): 65 | return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon())) 66 | 67 | 68 | def cosine_proximity(y_true, y_pred): 69 | y_true = K.l2_normalize(y_true, axis=-1) 70 | y_pred = K.l2_normalize(y_pred, axis=-1) 71 | return -K.mean(y_true * y_pred) 72 | 73 | 74 | # aliases 75 | mse = MSE = mean_squared_error 76 | mae = MAE = mean_absolute_error 77 | mape = MAPE = mean_absolute_percentage_error 78 | msle = MSLE = mean_squared_logarithmic_error 79 | cosine = cosine_proximity 80 | 81 | 82 | from .utils.generic_utils import get_from_module 83 | def get(identifier): 84 | return get_from_module(identifier, globals(), 'metric') 85 | -------------------------------------------------------------------------------- /keras/objectives.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | from . import backend as K 4 | 5 | 6 | def mean_squared_error(y_true, y_pred): 7 | return K.mean(K.square(y_pred - y_true), axis=-1) 8 | 9 | 10 | def mean_absolute_error(y_true, y_pred): 11 | return K.mean(K.abs(y_pred - y_true), axis=-1) 12 | 13 | 14 | def mean_absolute_percentage_error(y_true, y_pred): 15 | diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), np.inf)) 16 | return 100. * K.mean(diff, axis=-1) 17 | 18 | 19 | def mean_squared_logarithmic_error(y_true, y_pred): 20 | first_log = K.log(K.clip(y_pred, K.epsilon(), np.inf) + 1.) 21 | second_log = K.log(K.clip(y_true, K.epsilon(), np.inf) + 1.) 22 | return K.mean(K.square(first_log - second_log), axis=-1) 23 | 24 | 25 | def squared_hinge(y_true, y_pred): 26 | return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)), axis=-1) 27 | 28 | 29 | def hinge(y_true, y_pred): 30 | return K.mean(K.maximum(1. - y_true * y_pred, 0.), axis=-1) 31 | 32 | 33 | def categorical_crossentropy(y_true, y_pred): 34 | '''Expects a binary class matrix instead of a vector of scalar classes. 35 | ''' 36 | return K.categorical_crossentropy(y_pred, y_true) 37 | 38 | 39 | def sparse_categorical_crossentropy(y_true, y_pred): 40 | '''expects an array of integer classes. 41 | Note: labels shape must have the same number of dimensions as output shape. 42 | If you get a shape error, add a length-1 dimension to labels. 43 | ''' 44 | return K.sparse_categorical_crossentropy(y_pred, y_true) 45 | 46 | 47 | def binary_crossentropy(y_true, y_pred): 48 | return K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1) 49 | 50 | 51 | def kullback_leibler_divergence(y_true, y_pred): 52 | y_true = K.clip(y_true, K.epsilon(), 1) 53 | y_pred = K.clip(y_pred, K.epsilon(), 1) 54 | return K.sum(y_true * K.log(y_true / y_pred), axis=-1) 55 | 56 | 57 | def poisson(y_true, y_pred): 58 | return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon()), axis=-1) 59 | 60 | 61 | def cosine_proximity(y_true, y_pred): 62 | y_true = K.l2_normalize(y_true, axis=-1) 63 | y_pred = K.l2_normalize(y_pred, axis=-1) 64 | return -K.mean(y_true * y_pred, axis=-1) 65 | 66 | 67 | # aliases 68 | mse = MSE = mean_squared_error 69 | mae = MAE = mean_absolute_error 70 | mape = MAPE = mean_absolute_percentage_error 71 | msle = MSLE = mean_squared_logarithmic_error 72 | kld = KLD = kullback_leibler_divergence 73 | cosine = cosine_proximity 74 | 75 | from .utils.generic_utils import get_from_module 76 | def get(identifier): 77 | return get_from_module(identifier, globals(), 'objective') 78 | -------------------------------------------------------------------------------- /keras/preprocessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oeway/keras/833aa73b4959a7c14096083a03fc035595842cc7/keras/preprocessing/__init__.py -------------------------------------------------------------------------------- /keras/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oeway/keras/833aa73b4959a7c14096083a03fc035595842cc7/keras/utils/__init__.py -------------------------------------------------------------------------------- /keras/utils/data_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import tarfile 5 | import os 6 | import sys 7 | import shutil 8 | import hashlib 9 | from six.moves.urllib.request import urlopen 10 | from six.moves.urllib.error import URLError, HTTPError 11 | 12 | from ..utils.generic_utils import Progbar 13 | 14 | 15 | # Under Python 2, 'urlretrieve' relies on FancyURLopener from legacy 16 | # urllib module, known to have issues with proxy management 17 | if sys.version_info[0] == 2: 18 | def urlretrieve(url, filename, reporthook=None, data=None): 19 | def chunk_read(response, chunk_size=8192, reporthook=None): 20 | total_size = response.info().get('Content-Length').strip() 21 | total_size = int(total_size) 22 | count = 0 23 | while 1: 24 | chunk = response.read(chunk_size) 25 | if not chunk: 26 | break 27 | count += 1 28 | if reporthook: 29 | reporthook(count, chunk_size, total_size) 30 | yield chunk 31 | 32 | response = urlopen(url, data) 33 | with open(filename, 'wb') as fd: 34 | for chunk in chunk_read(response, reporthook=reporthook): 35 | fd.write(chunk) 36 | else: 37 | from six.moves.urllib.request import urlretrieve 38 | 39 | 40 | def get_file(fname, origin, untar=False, md5_hash=None): 41 | datadir_base = os.path.expanduser(os.path.join('~', '.keras')) 42 | if not os.access(datadir_base, os.W_OK): 43 | datadir_base = os.path.join('/tmp', '.keras') 44 | datadir = os.path.join(datadir_base, 'datasets') 45 | if not os.path.exists(datadir): 46 | os.makedirs(datadir) 47 | 48 | if untar: 49 | untar_fpath = os.path.join(datadir, fname) 50 | fpath = untar_fpath + '.tar.gz' 51 | else: 52 | fpath = os.path.join(datadir, fname) 53 | 54 | download = False 55 | if os.path.exists(fpath): 56 | # file found; verify integrity if a hash was provided 57 | if md5_hash is not None: 58 | if not validate_file(fpath, md5_hash): 59 | print('A local file was found, but it seems to be ' 60 | 'incomplete or outdated.') 61 | download = True 62 | else: 63 | download = True 64 | 65 | if download: 66 | print('Downloading data from', origin) 67 | global progbar 68 | progbar = None 69 | 70 | def dl_progress(count, block_size, total_size): 71 | global progbar 72 | if progbar is None: 73 | progbar = Progbar(total_size) 74 | else: 75 | progbar.update(count*block_size) 76 | 77 | error_msg = 'URL fetch failure on {}: {} -- {}' 78 | try: 79 | try: 80 | urlretrieve(origin, fpath, dl_progress) 81 | except URLError as e: 82 | raise Exception(error_msg.format(origin, e.errno, e.reason)) 83 | except HTTPError as e: 84 | raise Exception(error_msg.format(origin, e.code, e.msg)) 85 | except (Exception, KeyboardInterrupt) as e: 86 | if os.path.exists(fpath): 87 | os.remove(fpath) 88 | raise 89 | progbar = None 90 | 91 | if untar: 92 | if not os.path.exists(untar_fpath): 93 | print('Untaring file...') 94 | tfile = tarfile.open(fpath, 'r:gz') 95 | try: 96 | tfile.extractall(path=datadir) 97 | except (Exception, KeyboardInterrupt) as e: 98 | if os.path.exists(untar_fpath): 99 | if os.path.isfile(untar_fpath): 100 | os.remove(untar_fpath) 101 | else: 102 | shutil.rmtree(untar_fpath) 103 | raise 104 | tfile.close() 105 | return untar_fpath 106 | 107 | return fpath 108 | 109 | 110 | def validate_file(fpath, md5_hash): 111 | hasher = hashlib.md5() 112 | with open(fpath, 'rb') as f: 113 | buf = f.read() 114 | hasher.update(buf) 115 | if str(hasher.hexdigest()) == str(md5_hash): 116 | return True 117 | else: 118 | return False 119 | -------------------------------------------------------------------------------- /keras/utils/io_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import h5py 4 | import numpy as np 5 | import sys 6 | from collections import defaultdict 7 | 8 | 9 | class HDF5Matrix(): 10 | refs = defaultdict(int) 11 | 12 | def __init__(self, datapath, dataset, start, end, normalizer=None): 13 | if datapath not in list(self.refs.keys()): 14 | f = h5py.File(datapath) 15 | self.refs[datapath] = f 16 | else: 17 | f = self.refs[datapath] 18 | self.start = start 19 | self.end = end 20 | self.data = f[dataset] 21 | self.normalizer = normalizer 22 | 23 | def __len__(self): 24 | return self.end - self.start 25 | 26 | def __getitem__(self, key): 27 | if isinstance(key, slice): 28 | if key.stop + self.start <= self.end: 29 | idx = slice(key.start+self.start, key.stop + self.start) 30 | else: 31 | raise IndexError 32 | elif isinstance(key, int): 33 | if key + self.start < self.end: 34 | idx = key+self.start 35 | else: 36 | raise IndexError 37 | elif isinstance(key, np.ndarray): 38 | if np.max(key) + self.start < self.end: 39 | idx = (self.start + key).tolist() 40 | else: 41 | raise IndexError 42 | elif isinstance(key, list): 43 | if max(key) + self.start < self.end: 44 | idx = [x + self.start for x in key] 45 | else: 46 | raise IndexError 47 | if self.normalizer is not None: 48 | return self.normalizer(self.data[idx]) 49 | else: 50 | return self.data[idx] 51 | 52 | @property 53 | def shape(self): 54 | return tuple([self.end - self.start, self.data.shape[1]]) 55 | 56 | 57 | def save_array(array, name): 58 | import tables 59 | f = tables.open_file(name, 'w') 60 | atom = tables.Atom.from_dtype(array.dtype) 61 | ds = f.createCArray(f.root, 'data', atom, array.shape) 62 | ds[:] = array 63 | f.close() 64 | 65 | 66 | def load_array(name): 67 | import tables 68 | f = tables.open_file(name) 69 | array = f.root.data 70 | a = np.empty(shape=array.shape, dtype=array.dtype) 71 | a[:] = array[:] 72 | f.close() 73 | return a 74 | 75 | 76 | def ask_to_proceed_with_overwrite(filepath): 77 | get_input = input 78 | if sys.version_info[:2] <= (2, 7): 79 | get_input = raw_input 80 | overwrite = get_input('[WARNING] %s already exists - overwrite? ' 81 | '[y/n]' % (filepath)) 82 | while overwrite not in ['y', 'n']: 83 | overwrite = get_input('Enter "y" (overwrite) or "n" (cancel).') 84 | if overwrite == 'n': 85 | return False 86 | print('[TIP] Next time specify overwrite=True!') 87 | return True 88 | -------------------------------------------------------------------------------- /keras/utils/layer_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from .generic_utils import get_from_module 4 | from ..layers import * 5 | from ..models import Model, Sequential, Graph 6 | from .. import backend as K 7 | 8 | 9 | def layer_from_config(config, custom_objects={}): 10 | ''' 11 | # Arguments 12 | config: dict of the form {'class_name': str, 'config': dict} 13 | custom_objects: dict mapping class names (or function names) 14 | of custom (non-Keras) objects to class/functions 15 | 16 | # Returns 17 | Layer instance (may be Model, Sequential, Graph, Layer...) 18 | ''' 19 | # Insert custom layers into globals so they can 20 | # be accessed by `get_from_module`. 21 | for cls_key in custom_objects: 22 | globals()[cls_key] = custom_objects[cls_key] 23 | 24 | class_name = config['class_name'] 25 | 26 | if class_name == 'Sequential': 27 | layer_class = Sequential 28 | elif class_name == 'Graph': 29 | layer_class = Graph 30 | elif class_name in ['Model', 'Container']: 31 | layer_class = Model 32 | else: 33 | layer_class = get_from_module(class_name, globals(), 'layer', 34 | instantiate=False) 35 | return layer_class.from_config(config['config']) 36 | 37 | 38 | def print_summary(layers, relevant_nodes=None, line_length=100, positions=[.33, .55, .67, 1.]): 39 | # line_length: total length of printed lines 40 | # positions: relative or absolute positions of log elements in each line 41 | if positions[-1] <= 1: 42 | positions = [int(line_length * p) for p in positions] 43 | # header names for the different log elements 44 | to_display = ['Layer (type)', 'Output Shape', 'Param #', 'Connected to'] 45 | 46 | def print_row(fields, positions): 47 | line = '' 48 | for i in range(len(fields)): 49 | line += str(fields[i]) 50 | line = line[:positions[i]] 51 | line += ' ' * (positions[i] - len(line)) 52 | print(line) 53 | 54 | print('_' * line_length) 55 | print_row(to_display, positions) 56 | print('=' * line_length) 57 | 58 | def print_layer_summary(layer): 59 | try: 60 | output_shape = layer.output_shape 61 | except: 62 | output_shape = 'multiple' 63 | connections = [] 64 | for node_index, node in enumerate(layer.inbound_nodes): 65 | if relevant_nodes: 66 | node_key = layer.name + '_ib-' + str(node_index) 67 | if node_key not in relevant_nodes: 68 | # node is node part of the current network 69 | continue 70 | for i in range(len(node.inbound_layers)): 71 | inbound_layer = node.inbound_layers[i].name 72 | inbound_node_index = node.node_indices[i] 73 | inbound_tensor_index = node.tensor_indices[i] 74 | connections.append(inbound_layer + '[' + str(inbound_node_index) + '][' + str(inbound_tensor_index) + ']') 75 | 76 | name = layer.name 77 | cls_name = layer.__class__.__name__ 78 | if not connections: 79 | first_connection = '' 80 | else: 81 | first_connection = connections[0] 82 | fields = [name + ' (' + cls_name + ')', output_shape, layer.count_params(), first_connection] 83 | print_row(fields, positions) 84 | if len(connections) > 1: 85 | for i in range(1, len(connections)): 86 | fields = ['', '', '', connections[i]] 87 | print_row(fields, positions) 88 | 89 | total_params = 0 90 | for i in range(len(layers)): 91 | print_layer_summary(layers[i]) 92 | if i == len(layers) - 1: 93 | print('=' * line_length) 94 | else: 95 | print('_' * line_length) 96 | total_params += layers[i].count_params() 97 | 98 | print('Total params: %s' % total_params) 99 | print('_' * line_length) 100 | -------------------------------------------------------------------------------- /keras/utils/np_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import scipy as sp 4 | from six.moves import range 5 | from six.moves import zip 6 | 7 | 8 | def to_categorical(y, nb_classes=None): 9 | '''Convert class vector (integers from 0 to nb_classes) 10 | to binary class matrix, for use with categorical_crossentropy. 11 | ''' 12 | if not nb_classes: 13 | nb_classes = np.max(y)+1 14 | Y = np.zeros((len(y), nb_classes)) 15 | for i in range(len(y)): 16 | Y[i, y[i]] = 1. 17 | return Y 18 | 19 | 20 | def normalize(a, axis=-1, order=2): 21 | l2 = np.atleast_1d(np.linalg.norm(a, order, axis)) 22 | l2[l2 == 0] = 1 23 | return a / np.expand_dims(l2, axis) 24 | 25 | 26 | def binary_logloss(p, y): 27 | epsilon = 1e-15 28 | p = sp.maximum(epsilon, p) 29 | p = sp.minimum(1-epsilon, p) 30 | res = sum(y * sp.log(p) + sp.subtract(1, y) * sp.log(sp.subtract(1, p))) 31 | res *= -1.0/len(y) 32 | return res 33 | 34 | 35 | def multiclass_logloss(P, Y): 36 | npreds = [P[i][Y[i]-1] for i in range(len(Y))] 37 | score = -(1. / len(Y)) * np.sum(np.log(npreds)) 38 | return score 39 | 40 | 41 | def accuracy(p, y): 42 | return np.mean([a == b for a, b in zip(p, y)]) 43 | 44 | 45 | def probas_to_classes(y_pred): 46 | if len(y_pred.shape) > 1 and y_pred.shape[1] > 1: 47 | return categorical_probas_to_classes(y_pred) 48 | return np.array([1 if p > 0.5 else 0 for p in y_pred]) 49 | 50 | 51 | def categorical_probas_to_classes(p): 52 | return np.argmax(p, axis=1) 53 | 54 | 55 | def convert_kernel(kernel, dim_ordering='th'): 56 | '''Converts a kernel matrix (Numpy array) 57 | from Theano format to TensorFlow format 58 | (or reciprocally, since the transformation 59 | is its own inverse). 60 | ''' 61 | new_kernel = np.copy(kernel) 62 | if kernel.ndim == 4: 63 | # conv 2d 64 | # TH kernel shape: (depth, input_depth, rows, cols) 65 | # TF kernel shape: (rows, cols, input_depth, depth) 66 | if dim_ordering == 'th': 67 | w = kernel.shape[2] 68 | h = kernel.shape[3] 69 | for i in range(w): 70 | for j in range(h): 71 | new_kernel[:, :, i, j] = kernel[:, :, w - i - 1, h - j - 1] 72 | elif dim_ordering == 'tf': 73 | w = kernel.shape[0] 74 | h = kernel.shape[1] 75 | for i in range(w): 76 | for j in range(h): 77 | new_kernel[i, j, :, :] = kernel[w - i - 1, h - j - 1, :, :] 78 | else: 79 | raise Exception('Invalid dim_ordering: ' + str(dim_ordering)) 80 | elif kernel.ndim == 5: 81 | # conv 3d 82 | # TH kernel shape: (out_depth, input_depth, kernel_dim1, kernel_dim2, kernel_dim3) 83 | # TF kernel shape: (kernel_dim1, kernel_dim2, kernel_dim3, input_depth, out_depth) 84 | if dim_ordering == 'th': 85 | w = kernel.shape[2] 86 | h = kernel.shape[3] 87 | z = kernel.shape[4] 88 | for i in range(w): 89 | for j in range(h): 90 | for k in range(z): 91 | new_kernel[:, :, i, j, k] = kernel[:, :, 92 | w - i - 1, 93 | h - j - 1, 94 | z - k - 1] 95 | elif dim_ordering == 'tf': 96 | w = kernel.shape[0] 97 | h = kernel.shape[1] 98 | z = kernel.shape[2] 99 | for i in range(w): 100 | for j in range(h): 101 | for k in range(z): 102 | new_kernel[i, j, k, :, :] = kernel[w - i - 1, 103 | h - j - 1, 104 | z - k - 1, 105 | :, :] 106 | else: 107 | raise Exception('Invalid dim_ordering: ' + str(dim_ordering)) 108 | else: 109 | raise ValueError('Invalid kernel shape:', kernel.shape) 110 | return new_kernel 111 | 112 | 113 | def conv_output_length(input_length, filter_size, border_mode, stride, dilation=1): 114 | if input_length is None: 115 | return None 116 | assert border_mode in {'same', 'valid'} 117 | dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1) 118 | if border_mode == 'same': 119 | output_length = input_length 120 | elif border_mode == 'valid': 121 | output_length = input_length - dilated_filter_size + 1 122 | return (output_length + stride - 1) // stride 123 | 124 | def conv_input_length(output_length, filter_size, border_mode, stride): 125 | if output_length is None: 126 | return None 127 | assert border_mode in {'same', 'valid'} 128 | if border_mode == 'same': 129 | pad = filter_size // 2 130 | elif border_mode == 'valid': 131 | pad = 0 132 | return (output_length - 1) * stride - 2 * pad + filter_size 133 | -------------------------------------------------------------------------------- /keras/utils/test_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import assert_allclose 3 | import inspect 4 | import functools 5 | 6 | from ..engine import Model, Input 7 | from ..models import Sequential, model_from_json 8 | from .. import backend as K 9 | 10 | 11 | def get_test_data(nb_train=1000, nb_test=500, input_shape=(10,), 12 | output_shape=(2,), 13 | classification=True, nb_class=2): 14 | ''' 15 | classification=True overrides output_shape 16 | (i.e. output_shape is set to (1,)) and the output 17 | consists in integers in [0, nb_class-1]. 18 | 19 | Otherwise: float output with shape output_shape. 20 | ''' 21 | nb_sample = nb_train + nb_test 22 | if classification: 23 | y = np.random.randint(0, nb_class, size=(nb_sample,)) 24 | X = np.zeros((nb_sample,) + input_shape) 25 | for i in range(nb_sample): 26 | X[i] = np.random.normal(loc=y[i], scale=0.7, size=input_shape) 27 | else: 28 | y_loc = np.random.random((nb_sample,)) 29 | X = np.zeros((nb_sample,) + input_shape) 30 | y = np.zeros((nb_sample,) + output_shape) 31 | for i in range(nb_sample): 32 | X[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=input_shape) 33 | y[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=output_shape) 34 | 35 | return (X[:nb_train], y[:nb_train]), (X[nb_train:], y[nb_train:]) 36 | 37 | 38 | def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None, 39 | input_data=None, expected_output=None, 40 | expected_output_dtype=None, fixed_batch_size=False): 41 | '''Test routine for a layer with a single input tensor 42 | and single output tensor. 43 | ''' 44 | if input_data is None: 45 | assert input_shape 46 | if not input_dtype: 47 | input_dtype = K.floatx() 48 | input_data = (10 * np.random.random(input_shape)).astype(input_dtype) 49 | elif input_shape is None: 50 | input_shape = input_data.shape 51 | 52 | if expected_output_dtype is None: 53 | expected_output_dtype = input_dtype 54 | 55 | # instantiation 56 | layer = layer_cls(**kwargs) 57 | 58 | # test get_weights , set_weights 59 | weights = layer.get_weights() 60 | layer.set_weights(weights) 61 | 62 | # test and instantiation from weights 63 | if 'weights' in inspect.getargspec(layer_cls.__init__): 64 | kwargs['weights'] = weights 65 | layer = layer_cls(**kwargs) 66 | 67 | # test in functional API 68 | if fixed_batch_size: 69 | x = Input(batch_shape=input_shape, dtype=input_dtype) 70 | else: 71 | x = Input(shape=input_shape[1:], dtype=input_dtype) 72 | y = layer(x) 73 | assert K.dtype(y) == expected_output_dtype 74 | 75 | model = Model(input=x, output=y) 76 | model.compile('rmsprop', 'mse') 77 | 78 | expected_output_shape = layer.get_output_shape_for(input_shape) 79 | actual_output = model.predict(input_data) 80 | actual_output_shape = actual_output.shape 81 | assert expected_output_shape == actual_output_shape 82 | if expected_output is not None: 83 | assert_allclose(actual_output, expected_output, rtol=1e-3) 84 | 85 | # test serialization 86 | model_config = model.get_config() 87 | model = Model.from_config(model_config) 88 | model.compile('rmsprop', 'mse') 89 | 90 | # test as first layer in Sequential API 91 | layer_config = layer.get_config() 92 | layer_config['batch_input_shape'] = input_shape 93 | layer = layer.__class__.from_config(layer_config) 94 | 95 | model = Sequential() 96 | model.add(layer) 97 | model.compile('rmsprop', 'mse') 98 | actual_output = model.predict(input_data) 99 | actual_output_shape = actual_output.shape 100 | assert expected_output_shape == actual_output_shape 101 | if expected_output is not None: 102 | assert_allclose(actual_output, expected_output, rtol=1e-3) 103 | 104 | # test JSON serialization 105 | json_model = model.to_json() 106 | model = model_from_json(json_model) 107 | 108 | # for further checks in the caller function 109 | return actual_output 110 | 111 | 112 | def keras_test(func): 113 | '''Clean up after tensorflow tests. 114 | ''' 115 | @functools.wraps(func) 116 | def wrapper(*args, **kwargs): 117 | output = func(*args, **kwargs) 118 | if K._BACKEND == 'tensorflow': 119 | K.clear_session() 120 | return output 121 | return wrapper 122 | -------------------------------------------------------------------------------- /keras/utils/visualize_util.py: -------------------------------------------------------------------------------- 1 | try: 2 | # pydot-ng is a fork of pydot that is better maintained 3 | import pydot_ng as pydot 4 | except ImportError: 5 | # fall back on pydot if necessary 6 | import pydot 7 | if not pydot.find_graphviz(): 8 | raise RuntimeError('Failed to import pydot. You must install pydot' 9 | ' and graphviz for `pydotprint` to work.') 10 | 11 | 12 | def model_to_dot(model, show_shapes=False, show_layer_names=True): 13 | dot = pydot.Dot() 14 | dot.set('rankdir', 'TB') 15 | dot.set('concentrate', True) 16 | dot.set_node_defaults(shape='record') 17 | 18 | if model.__class__.__name__ == 'Sequential': 19 | if not model.built: 20 | model.build() 21 | model = model.model 22 | layers = model.layers 23 | 24 | # first, populate the nodes of the graph 25 | for layer in layers: 26 | layer_id = str(id(layer)) 27 | if show_layer_names: 28 | label = str(layer.name) + ' (' + layer.__class__.__name__ + ')' 29 | else: 30 | label = layer.__class__.__name__ 31 | 32 | if show_shapes: 33 | # Build the label that will actually contain a table with the 34 | # input/output 35 | try: 36 | outputlabels = str(layer.output_shape) 37 | except: 38 | outputlabels = 'multiple' 39 | if hasattr(layer, 'input_shape'): 40 | inputlabels = str(layer.input_shape) 41 | elif hasattr(layer, 'input_shapes'): 42 | inputlabels = ', '.join( 43 | [str(ishape) for ishape in layer.input_shapes]) 44 | else: 45 | inputlabels = 'multiple' 46 | label = '%s\n|{input:|output:}|{{%s}|{%s}}' % (label, inputlabels, outputlabels) 47 | 48 | node = pydot.Node(layer_id, label=label) 49 | dot.add_node(node) 50 | 51 | # second, add the edges 52 | for layer in layers: 53 | layer_id = str(id(layer)) 54 | for i, node in enumerate(layer.inbound_nodes): 55 | node_key = layer.name + '_ib-' + str(i) 56 | if node_key in model.container_nodes: 57 | # add edges 58 | for inbound_layer in node.inbound_layers: 59 | inbound_layer_id = str(id(inbound_layer)) 60 | layer_id = str(id(layer)) 61 | dot.add_edge(pydot.Edge(inbound_layer_id, layer_id)) 62 | return dot 63 | 64 | 65 | def plot(model, to_file='model.png', show_shapes=False, show_layer_names=True): 66 | dot = model_to_dot(model, show_shapes, show_layer_names) 67 | dot.write_png(to_file) 68 | -------------------------------------------------------------------------------- /keras/wrappers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oeway/keras/833aa73b4959a7c14096083a03fc035595842cc7/keras/wrappers/__init__.py -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | # Configuration of py.test 2 | [pytest] 3 | addopts=-v 4 | -n 2 5 | --durations=10 6 | --cov-report term-missing 7 | --cov=keras 8 | 9 | # Do not run tests in the build folder 10 | norecursedirs= build 11 | 12 | # PEP-8 The following are ignored: 13 | # E251 unexpected spaces around keyword / parameter equals 14 | # E225 missing whitespace around operator 15 | # E226 missing whitespace around arithmetic operator 16 | # W291 trailing whitespace 17 | # W293 blank line contains whitespace 18 | # E501 line too long (82 > 79 characters) 19 | # E402 module level import not at top of file - temporary measure to coninue adding ros python packaged in sys.path 20 | # E731 do not assign a lambda expression, use a def 21 | # E302 two blank lines between the functions 22 | # E231 missing whitespace after , 23 | # E241 multiple spaces after ',' 24 | # E261 at least two spaces before inline comment 25 | 26 | 27 | pep8ignore=* E251 \ 28 | * E225 \ 29 | * E226 \ 30 | * W291 \ 31 | * W293 \ 32 | * E501 \ 33 | * E402 \ 34 | * E731 \ 35 | * E302 \ 36 | * E231 \ 37 | * E241 \ 38 | * E261 39 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | 5 | setup(name='Keras', 6 | version='1.0.6', 7 | description='Deep Learning for Python', 8 | author='Francois Chollet', 9 | author_email='francois.chollet@gmail.com', 10 | url='https://github.com/fchollet/keras', 11 | download_url='https://github.com/fchollet/keras/tarball/1.0.6', 12 | license='MIT', 13 | install_requires=['theano', 'pyyaml', 'six'], 14 | extras_require={ 15 | 'h5py': ['h5py'], 16 | }, 17 | packages=find_packages()) 18 | -------------------------------------------------------------------------------- /tests/integration_tests/test_image_data_tasks.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | import pytest 4 | 5 | from keras.utils.test_utils import get_test_data, keras_test 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Flatten, Activation 8 | from keras.layers.convolutional import Convolution2D, MaxPooling2D 9 | from keras.utils.np_utils import to_categorical 10 | 11 | 12 | @keras_test 13 | def test_image_classification(): 14 | ''' 15 | Classify random 16x16 color images into several classes using logistic regression 16 | with convolutional hidden layer. 17 | ''' 18 | np.random.seed(1337) 19 | input_shape = (3, 16, 16) 20 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500, 21 | nb_test=200, 22 | input_shape=input_shape, 23 | classification=True, 24 | nb_class=4) 25 | y_train = to_categorical(y_train) 26 | y_test = to_categorical(y_test) 27 | # convolution kernel size 28 | nb_conv = 3 29 | # size of pooling area for max pooling 30 | nb_pool = 2 31 | 32 | model = Sequential([ 33 | Convolution2D(nb_filter=8, nb_row=nb_conv, nb_col=nb_conv, input_shape=input_shape), 34 | MaxPooling2D(pool_size=(nb_pool, nb_pool)), 35 | Flatten(), 36 | Activation('relu'), 37 | Dense(y_test.shape[-1], activation='softmax') 38 | ]) 39 | model.compile(loss='categorical_crossentropy', 40 | optimizer='rmsprop', 41 | metrics=['accuracy']) 42 | history = model.fit(X_train, y_train, nb_epoch=10, batch_size=16, 43 | validation_data=(X_test, y_test), 44 | verbose=0) 45 | assert(history.history['val_acc'][-1] > 0.85) 46 | 47 | 48 | if __name__ == '__main__': 49 | pytest.main([__file__]) 50 | -------------------------------------------------------------------------------- /tests/integration_tests/test_vector_data_tasks.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | import pytest 4 | 5 | from keras.utils.test_utils import get_test_data, keras_test 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense 8 | from keras.utils.np_utils import to_categorical 9 | 10 | 11 | @keras_test 12 | def test_vector_classification(): 13 | ''' 14 | Classify random float vectors into 2 classes with logistic regression 15 | using 2 layer neural network with ReLU hidden units. 16 | ''' 17 | np.random.seed(1337) 18 | nb_hidden = 10 19 | 20 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500, 21 | nb_test=200, 22 | input_shape=(20,), 23 | classification=True, 24 | nb_class=2) 25 | y_train = to_categorical(y_train) 26 | y_test = to_categorical(y_test) 27 | 28 | model = Sequential([ 29 | Dense(nb_hidden, input_shape=(X_train.shape[-1],), activation='relu'), 30 | Dense(y_train.shape[-1], activation='softmax') 31 | ]) 32 | model.compile(loss='categorical_crossentropy', 33 | optimizer='rmsprop', 34 | metrics=['accuracy']) 35 | history = model.fit(X_train, y_train, nb_epoch=15, batch_size=16, 36 | validation_data=(X_test, y_test), 37 | verbose=0) 38 | assert(history.history['val_acc'][-1] > 0.8) 39 | 40 | 41 | @keras_test 42 | def test_vector_regression(): 43 | ''' 44 | Perform float data prediction (regression) using 2 layer MLP 45 | with tanh and sigmoid activations. 46 | ''' 47 | np.random.seed(1337) 48 | nb_hidden = 10 49 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500, 50 | nb_test=200, 51 | input_shape=(20,), 52 | output_shape=(2,), 53 | classification=False) 54 | 55 | model = Sequential([ 56 | Dense(nb_hidden, input_shape=(X_train.shape[-1],), activation='tanh'), 57 | Dense(y_train.shape[-1]) 58 | ]) 59 | 60 | model.compile(loss='hinge', optimizer='adagrad') 61 | history = model.fit(X_train, y_train, nb_epoch=20, batch_size=16, 62 | validation_data=(X_test, y_test), verbose=0) 63 | assert (history.history['val_loss'][-1] < 0.9) 64 | 65 | 66 | if __name__ == '__main__': 67 | pytest.main([__file__]) 68 | -------------------------------------------------------------------------------- /tests/keras/datasets/test_datasets.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pytest 3 | import time 4 | import random 5 | from keras.datasets import cifar10, cifar100, reuters, imdb, mnist 6 | 7 | 8 | def test_cifar(): 9 | # only run data download tests 20% of the time 10 | # to speed up frequent testing 11 | random.seed(time.time()) 12 | if random.random() > 0.8: 13 | (X_train, y_train), (X_test, y_test) = cifar10.load_data() 14 | (X_train, y_train), (X_test, y_test) = cifar100.load_data('fine') 15 | (X_train, y_train), (X_test, y_test) = cifar100.load_data('coarse') 16 | 17 | 18 | def test_reuters(): 19 | # only run data download tests 20% of the time 20 | # to speed up frequent testing 21 | random.seed(time.time()) 22 | if random.random() > 0.8: 23 | (X_train, y_train), (X_test, y_test) = reuters.load_data() 24 | (X_train, y_train), (X_test, y_test) = reuters.load_data(maxlen=10) 25 | 26 | 27 | def test_mnist(): 28 | # only run data download tests 20% of the time 29 | # to speed up frequent testing 30 | random.seed(time.time()) 31 | if random.random() > 0.8: 32 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 33 | 34 | 35 | def test_imdb(): 36 | # only run data download tests 20% of the time 37 | # to speed up frequent testing 38 | random.seed(time.time()) 39 | if random.random() > 0.8: 40 | (X_train, y_train), (X_test, y_test) = imdb.load_data() 41 | (X_train, y_train), (X_test, y_test) = imdb.load_data(maxlen=40) 42 | 43 | 44 | if __name__ == '__main__': 45 | pytest.main([__file__]) 46 | -------------------------------------------------------------------------------- /tests/keras/layers/test_advanced_activations.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from keras.utils.test_utils import layer_test, keras_test 3 | 4 | 5 | @keras_test 6 | def test_leaky_relu(): 7 | from keras.layers.advanced_activations import LeakyReLU 8 | for alpha in [0., .5, -1.]: 9 | layer_test(LeakyReLU, kwargs={'alpha': alpha}, 10 | input_shape=(2, 3, 4)) 11 | 12 | 13 | @keras_test 14 | def test_prelu(): 15 | from keras.layers.advanced_activations import PReLU 16 | layer_test(PReLU, kwargs={}, 17 | input_shape=(2, 3, 4)) 18 | 19 | 20 | @keras_test 21 | def test_elu(): 22 | from keras.layers.advanced_activations import ELU 23 | for alpha in [0., .5, -1.]: 24 | layer_test(ELU, kwargs={'alpha': alpha}, 25 | input_shape=(2, 3, 4)) 26 | 27 | 28 | @keras_test 29 | def test_parametric_softplus(): 30 | from keras.layers.advanced_activations import ParametricSoftplus 31 | for alpha in [0., .5, -1.]: 32 | layer_test(ParametricSoftplus, 33 | kwargs={'alpha_init': 1., 34 | 'beta_init': -1}, 35 | input_shape=(2, 3, 4)) 36 | 37 | 38 | @keras_test 39 | def test_thresholded_relu(): 40 | from keras.layers.advanced_activations import ThresholdedReLU 41 | layer_test(ThresholdedReLU, kwargs={'theta': 0.5}, 42 | input_shape=(2, 3, 4)) 43 | 44 | 45 | @keras_test 46 | def test_srelu(): 47 | from keras.layers.advanced_activations import SReLU 48 | layer_test(SReLU, kwargs={}, 49 | input_shape=(2, 3, 4)) 50 | 51 | 52 | if __name__ == '__main__': 53 | pytest.main([__file__]) 54 | -------------------------------------------------------------------------------- /tests/keras/layers/test_embeddings.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from keras.utils.test_utils import layer_test, keras_test 3 | from keras.layers.embeddings import Embedding 4 | import keras.backend as K 5 | 6 | 7 | @keras_test 8 | def test_embedding(): 9 | layer_test(Embedding, 10 | kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2}, 11 | input_shape=(3, 2), 12 | input_dtype='int32', 13 | expected_output_dtype=K.floatx()) 14 | 15 | 16 | if __name__ == '__main__': 17 | pytest.main([__file__]) 18 | -------------------------------------------------------------------------------- /tests/keras/layers/test_local.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from keras.utils.test_utils import layer_test, keras_test 4 | from keras.layers import local 5 | 6 | 7 | @keras_test 8 | def test_locallyconnected_1d(): 9 | nb_samples = 2 10 | nb_steps = 8 11 | input_dim = 5 12 | filter_length = 3 13 | nb_filter = 4 14 | 15 | for border_mode in ['valid']: 16 | for subsample_length in [1]: 17 | if border_mode == 'same' and subsample_length != 1: 18 | continue 19 | layer_test(local.LocallyConnected1D, 20 | kwargs={'nb_filter': nb_filter, 21 | 'filter_length': filter_length, 22 | 'border_mode': border_mode, 23 | 'subsample_length': subsample_length}, 24 | input_shape=(nb_samples, nb_steps, input_dim)) 25 | 26 | layer_test(local.LocallyConnected1D, 27 | kwargs={'nb_filter': nb_filter, 28 | 'filter_length': filter_length, 29 | 'border_mode': border_mode, 30 | 'W_regularizer': 'l2', 31 | 'b_regularizer': 'l2', 32 | 'activity_regularizer': 'activity_l2', 33 | 'subsample_length': subsample_length}, 34 | input_shape=(nb_samples, nb_steps, input_dim)) 35 | 36 | 37 | @keras_test 38 | def test_locallyconnected_2d(): 39 | nb_samples = 8 40 | nb_filter = 3 41 | stack_size = 4 42 | nb_row = 6 43 | nb_col = 10 44 | 45 | for border_mode in ['valid']: 46 | for subsample in [(1, 1), (2, 2)]: 47 | if border_mode == 'same' and subsample != (1, 1): 48 | continue 49 | 50 | layer_test(local.LocallyConnected2D, 51 | kwargs={'nb_filter': nb_filter, 52 | 'nb_row': 3, 53 | 'nb_col': 3, 54 | 'border_mode': border_mode, 55 | 'W_regularizer': 'l2', 56 | 'b_regularizer': 'l2', 57 | 'activity_regularizer': 'activity_l2', 58 | 'subsample': subsample, 59 | 'dim_ordering': 'tf'}, 60 | input_shape=(nb_samples, nb_row, nb_col, stack_size)) 61 | 62 | layer_test(local.LocallyConnected2D, 63 | kwargs={'nb_filter': nb_filter, 64 | 'nb_row': 3, 65 | 'nb_col': 3, 66 | 'border_mode': border_mode, 67 | 'W_regularizer': 'l2', 68 | 'b_regularizer': 'l2', 69 | 'activity_regularizer': 'activity_l2', 70 | 'subsample': subsample, 71 | 'dim_ordering': 'th'}, 72 | input_shape=(nb_samples, stack_size, nb_row, nb_col)) 73 | 74 | 75 | if __name__ == '__main__': 76 | pytest.main([__file__]) 77 | -------------------------------------------------------------------------------- /tests/keras/layers/test_noise.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from keras.utils.test_utils import layer_test, keras_test 3 | from keras.layers import noise 4 | 5 | 6 | @keras_test 7 | def test_GaussianNoise(): 8 | layer_test(noise.GaussianNoise, 9 | kwargs={'sigma': 1.}, 10 | input_shape=(3, 2, 3)) 11 | 12 | 13 | @keras_test 14 | def test_GaussianDropout(): 15 | layer_test(noise.GaussianDropout, 16 | kwargs={'p': 0.5}, 17 | input_shape=(3, 2, 3)) 18 | 19 | 20 | if __name__ == '__main__': 21 | pytest.main([__file__]) 22 | -------------------------------------------------------------------------------- /tests/keras/layers/test_normalization.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | 5 | from keras.layers.core import Dense, Activation 6 | from keras.utils.test_utils import layer_test, keras_test 7 | from keras.layers import normalization 8 | from keras.models import Sequential, Graph 9 | from keras import backend as K 10 | 11 | input_1 = np.arange(10) 12 | input_2 = np.zeros(10) 13 | input_3 = np.ones((10)) 14 | input_shapes = [np.ones((10, 10)), np.ones((10, 10, 10))] 15 | 16 | 17 | @keras_test 18 | def basic_batchnorm_test(): 19 | layer_test(normalization.BatchNormalization, 20 | kwargs={'mode': 1}, 21 | input_shape=(3, 4, 2)) 22 | layer_test(normalization.BatchNormalization, 23 | kwargs={'mode': 0}, 24 | input_shape=(3, 4, 2)) 25 | 26 | 27 | @keras_test 28 | def test_batchnorm_mode_0_or_2(): 29 | for mode in [0, 2]: 30 | model = Sequential() 31 | norm_m0 = normalization.BatchNormalization(mode=mode, input_shape=(10,), momentum=0.8) 32 | model.add(norm_m0) 33 | model.compile(loss='mse', optimizer='sgd') 34 | 35 | # centered on 5.0, variance 10.0 36 | X = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10)) 37 | model.fit(X, X, nb_epoch=4, verbose=0) 38 | out = model.predict(X) 39 | out -= K.eval(norm_m0.beta) 40 | out /= K.eval(norm_m0.gamma) 41 | 42 | assert_allclose(out.mean(), 0.0, atol=1e-1) 43 | assert_allclose(out.std(), 1.0, atol=1e-1) 44 | 45 | 46 | @keras_test 47 | def test_batchnorm_mode_0_convnet(): 48 | model = Sequential() 49 | norm_m0 = normalization.BatchNormalization(mode=0, axis=1, input_shape=(3, 4, 4), momentum=0.8) 50 | model.add(norm_m0) 51 | model.compile(loss='mse', optimizer='sgd') 52 | 53 | # centered on 5.0, variance 10.0 54 | X = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4)) 55 | model.fit(X, X, nb_epoch=4, verbose=0) 56 | out = model.predict(X) 57 | out -= np.reshape(K.eval(norm_m0.beta), (1, 3, 1, 1)) 58 | out /= np.reshape(K.eval(norm_m0.gamma), (1, 3, 1, 1)) 59 | 60 | assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1) 61 | assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1) 62 | 63 | 64 | @keras_test 65 | def test_batchnorm_mode_1(): 66 | norm_m1 = normalization.BatchNormalization(input_shape=(10,), mode=1) 67 | norm_m1.build(input_shape=(None, 10)) 68 | 69 | for inp in [input_1, input_2, input_3]: 70 | out = (norm_m1.call(K.variable(inp)) - norm_m1.beta) / norm_m1.gamma 71 | assert_allclose(K.eval(K.mean(out)), 0.0, atol=1e-1) 72 | if inp.std() > 0.: 73 | assert_allclose(K.eval(K.std(out)), 1.0, atol=1e-1) 74 | else: 75 | assert_allclose(K.eval(K.std(out)), 0.0, atol=1e-1) 76 | 77 | 78 | if __name__ == '__main__': 79 | pytest.main([__file__]) 80 | -------------------------------------------------------------------------------- /tests/keras/layers/test_recurrent.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | 5 | from keras.utils.test_utils import layer_test 6 | from keras.layers import recurrent, embeddings 7 | from keras.models import Sequential 8 | from keras.layers.core import Masking 9 | from keras import regularizers 10 | from keras.utils.test_utils import keras_test 11 | 12 | from keras import backend as K 13 | 14 | nb_samples, timesteps, embedding_dim, output_dim = 2, 5, 4, 3 15 | embedding_num = 12 16 | 17 | 18 | def _runner(layer_class): 19 | """ 20 | All the recurrent layers share the same interface, 21 | so we can run through them with a single function. 22 | """ 23 | # check return_sequences 24 | layer_test(layer_class, 25 | kwargs={'output_dim': output_dim, 26 | 'return_sequences': True}, 27 | input_shape=(nb_samples, timesteps, embedding_dim)) 28 | 29 | # check dropout 30 | layer_test(layer_class, 31 | kwargs={'output_dim': output_dim, 32 | 'dropout_U': 0.1, 33 | 'dropout_W': 0.1}, 34 | input_shape=(nb_samples, timesteps, embedding_dim)) 35 | 36 | # check implementation modes 37 | for mode in ['cpu', 'mem', 'gpu']: 38 | layer_test(layer_class, 39 | kwargs={'output_dim': output_dim, 40 | 'consume_less': mode}, 41 | input_shape=(nb_samples, timesteps, embedding_dim)) 42 | 43 | # check statefulness 44 | model = Sequential() 45 | model.add(embeddings.Embedding(embedding_num, embedding_dim, 46 | mask_zero=True, 47 | input_length=timesteps, 48 | batch_input_shape=(nb_samples, timesteps))) 49 | layer = layer_class(output_dim, return_sequences=False, 50 | stateful=True, 51 | weights=None) 52 | model.add(layer) 53 | model.compile(optimizer='sgd', loss='mse') 54 | out1 = model.predict(np.ones((nb_samples, timesteps))) 55 | assert(out1.shape == (nb_samples, output_dim)) 56 | 57 | # train once so that the states change 58 | model.train_on_batch(np.ones((nb_samples, timesteps)), 59 | np.ones((nb_samples, output_dim))) 60 | out2 = model.predict(np.ones((nb_samples, timesteps))) 61 | 62 | # if the state is not reset, output should be different 63 | assert(out1.max() != out2.max()) 64 | 65 | # check that output changes after states are reset 66 | # (even though the model itself didn't change) 67 | layer.reset_states() 68 | out3 = model.predict(np.ones((nb_samples, timesteps))) 69 | assert(out2.max() != out3.max()) 70 | 71 | # check that container-level reset_states() works 72 | model.reset_states() 73 | out4 = model.predict(np.ones((nb_samples, timesteps))) 74 | assert_allclose(out3, out4, atol=1e-5) 75 | 76 | # check that the call to `predict` updated the states 77 | out5 = model.predict(np.ones((nb_samples, timesteps))) 78 | assert(out4.max() != out5.max()) 79 | 80 | # Check masking 81 | layer.reset_states() 82 | 83 | left_padded_input = np.ones((nb_samples, timesteps)) 84 | left_padded_input[0, :1] = 0 85 | left_padded_input[1, :2] = 0 86 | out6 = model.predict(left_padded_input) 87 | 88 | layer.reset_states() 89 | 90 | right_padded_input = np.ones((nb_samples, timesteps)) 91 | right_padded_input[0, -1:] = 0 92 | right_padded_input[1, -2:] = 0 93 | out7 = model.predict(right_padded_input) 94 | 95 | assert_allclose(out7, out6, atol=1e-5) 96 | 97 | # check regularizers 98 | layer = layer_class(output_dim, return_sequences=False, weights=None, 99 | batch_input_shape=(nb_samples, timesteps, embedding_dim), 100 | W_regularizer=regularizers.WeightRegularizer(l1=0.01), 101 | U_regularizer=regularizers.WeightRegularizer(l1=0.01), 102 | b_regularizer='l2') 103 | shape = (nb_samples, timesteps, embedding_dim) 104 | layer.set_input(K.variable(np.ones(shape)), 105 | shape=shape) 106 | K.eval(layer.output) 107 | 108 | 109 | @keras_test 110 | def test_SimpleRNN(): 111 | _runner(recurrent.SimpleRNN) 112 | 113 | 114 | @keras_test 115 | def test_GRU(): 116 | _runner(recurrent.GRU) 117 | 118 | 119 | @keras_test 120 | def test_LSTM(): 121 | _runner(recurrent.LSTM) 122 | 123 | 124 | @keras_test 125 | def test_masking_layer(): 126 | ''' This test based on a previously failing issue here: 127 | https://github.com/fchollet/keras/issues/1567 128 | 129 | ''' 130 | model = Sequential() 131 | model.add(Masking(input_shape=(3, 4))) 132 | model.add(recurrent.LSTM(output_dim=5, return_sequences=True)) 133 | model.compile(loss='categorical_crossentropy', optimizer='adam') 134 | I = np.random.random((6, 3, 4)) 135 | V = np.abs(np.random.random((6, 3, 5))) 136 | V /= V.sum(axis=-1, keepdims=True) 137 | model.fit(I, V, nb_epoch=1, batch_size=100, verbose=1) 138 | 139 | 140 | if __name__ == '__main__': 141 | pytest.main([__file__]) 142 | -------------------------------------------------------------------------------- /tests/keras/layers/test_wrappers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | from keras.utils.test_utils import keras_test 5 | from keras.layers import wrappers, Input 6 | from keras.layers import core, convolutional 7 | from keras.models import Sequential, Model, model_from_json 8 | 9 | 10 | @keras_test 11 | def test_TimeDistributed(): 12 | # first, test with Dense layer 13 | model = Sequential() 14 | model.add(wrappers.TimeDistributed(core.Dense(2), input_shape=(3, 4))) 15 | model.add(core.Activation('relu')) 16 | model.compile(optimizer='rmsprop', loss='mse') 17 | model.fit(np.random.random((10, 3, 4)), np.random.random((10, 3, 2)), nb_epoch=1, batch_size=10) 18 | 19 | # test config 20 | model.get_config() 21 | 22 | # compare to TimeDistributedDense 23 | test_input = np.random.random((1, 3, 4)) 24 | test_output = model.predict(test_input) 25 | weights = model.layers[0].get_weights() 26 | 27 | reference = Sequential() 28 | reference.add(core.TimeDistributedDense(2, input_shape=(3, 4), weights=weights)) 29 | reference.add(core.Activation('relu')) 30 | reference.compile(optimizer='rmsprop', loss='mse') 31 | 32 | reference_output = reference.predict(test_input) 33 | assert_allclose(test_output, reference_output, atol=1e-05) 34 | 35 | # test when specifying a batch_input_shape 36 | reference = Sequential() 37 | reference.add(core.TimeDistributedDense(2, batch_input_shape=(1, 3, 4), weights=weights)) 38 | reference.add(core.Activation('relu')) 39 | reference.compile(optimizer='rmsprop', loss='mse') 40 | 41 | reference_output = reference.predict(test_input) 42 | assert_allclose(test_output, reference_output, atol=1e-05) 43 | 44 | # test with Convolution2D 45 | model = Sequential() 46 | model.add(wrappers.TimeDistributed(convolutional.Convolution2D(5, 2, 2, border_mode='same'), input_shape=(2, 3, 4, 4))) 47 | model.add(core.Activation('relu')) 48 | model.compile(optimizer='rmsprop', loss='mse') 49 | model.train_on_batch(np.random.random((1, 2, 3, 4, 4)), np.random.random((1, 2, 5, 4, 4))) 50 | 51 | model = model_from_json(model.to_json()) 52 | model.summary() 53 | 54 | # test stacked layers 55 | model = Sequential() 56 | model.add(wrappers.TimeDistributed(core.Dense(2), input_shape=(3, 4))) 57 | model.add(wrappers.TimeDistributed(core.Dense(3))) 58 | model.add(core.Activation('relu')) 59 | model.compile(optimizer='rmsprop', loss='mse') 60 | 61 | model.fit(np.random.random((10, 3, 4)), np.random.random((10, 3, 3)), nb_epoch=1, batch_size=10) 62 | 63 | # test wrapping Sequential model 64 | model = Sequential() 65 | model.add(core.Dense(3, input_dim=2)) 66 | outer_model = Sequential() 67 | outer_model.add(wrappers.TimeDistributed(model, input_shape=(3, 2))) 68 | outer_model.compile(optimizer='rmsprop', loss='mse') 69 | outer_model.fit(np.random.random((10, 3, 2)), np.random.random((10, 3, 3)), nb_epoch=1, batch_size=10) 70 | 71 | # test with functional API 72 | x = Input(shape=(3, 2)) 73 | y = wrappers.TimeDistributed(model)(x) 74 | outer_model = Model(x, y) 75 | outer_model.compile(optimizer='rmsprop', loss='mse') 76 | outer_model.fit(np.random.random((10, 3, 2)), np.random.random((10, 3, 3)), nb_epoch=1, batch_size=10) 77 | 78 | 79 | if __name__ == '__main__': 80 | pytest.main([__file__]) 81 | -------------------------------------------------------------------------------- /tests/keras/preprocessing/test_image.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from keras.preprocessing.image import * 3 | from PIL import Image 4 | import numpy as np 5 | import os 6 | import shutil 7 | import tempfile 8 | 9 | 10 | class TestImage: 11 | 12 | def setup_class(cls): 13 | img_w = img_h = 20 14 | rgb_images = [] 15 | gray_images = [] 16 | for n in range(8): 17 | bias = np.random.rand(img_w, img_h, 1) * 64 18 | variance = np.random.rand(img_w, img_h, 1) * (255-64) 19 | imarray = np.random.rand(img_w, img_h, 3) * variance + bias 20 | im = Image.fromarray(imarray.astype('uint8')).convert('RGB') 21 | rgb_images.append(im) 22 | 23 | imarray = np.random.rand(img_w, img_h, 1) * variance + bias 24 | im = Image.fromarray(imarray.astype('uint8').squeeze()).convert('L') 25 | gray_images.append(im) 26 | 27 | cls.all_test_images = [rgb_images, gray_images] 28 | 29 | def teardown_class(cls): 30 | del cls.all_test_images 31 | 32 | def test_image_data_generator(self): 33 | for test_images in self.all_test_images: 34 | img_list = [] 35 | for im in test_images: 36 | img_list.append(img_to_array(im)[None, ...]) 37 | 38 | images = np.vstack(img_list) 39 | generator = ImageDataGenerator( 40 | featurewise_center=True, 41 | samplewise_center=True, 42 | featurewise_std_normalization=True, 43 | samplewise_std_normalization=True, 44 | zca_whitening=True, 45 | rotation_range=90., 46 | width_shift_range=0.1, 47 | height_shift_range=0.1, 48 | shear_range=0.5, 49 | zoom_range=0.2, 50 | channel_shift_range=0., 51 | fill_mode='nearest', 52 | cval=0.5, 53 | horizontal_flip=True, 54 | vertical_flip=True) 55 | generator.fit(images, augment=True) 56 | 57 | tmp_folder = tempfile.mkdtemp(prefix='test_images') 58 | for x, y in generator.flow(images, np.arange(images.shape[0]), 59 | shuffle=True, save_to_dir=tmp_folder): 60 | assert x.shape[1:] == images.shape[1:] 61 | break 62 | shutil.rmtree(tmp_folder) 63 | 64 | def test_img_flip(self): 65 | x = np.array(range(4)).reshape([1, 1, 2, 2]) 66 | assert (flip_axis(x, 0) == x).all() 67 | assert (flip_axis(x, 1) == x).all() 68 | assert (flip_axis(x, 2) == [[[[2, 3], [0, 1]]]]).all() 69 | assert (flip_axis(x, 3) == [[[[1, 0], [3, 2]]]]).all() 70 | 71 | dim_ordering_and_col_index = (('tf', 2), ('th', 3)) 72 | for dim_ordering, col_index in dim_ordering_and_col_index: 73 | image_generator_th = ImageDataGenerator( 74 | featurewise_center=False, 75 | samplewise_center=False, 76 | featurewise_std_normalization=False, 77 | samplewise_std_normalization=False, 78 | zca_whitening=False, 79 | rotation_range=0, 80 | width_shift_range=0, 81 | height_shift_range=0, 82 | shear_range=0, 83 | zoom_range=0, 84 | channel_shift_range=0, 85 | horizontal_flip=True, 86 | vertical_flip=False, 87 | dim_ordering=dim_ordering).flow(x, [1]) 88 | for i in range(10): 89 | potentially_flipped_x, _ = next(image_generator_th) 90 | assert ((potentially_flipped_x == x).all() or 91 | (potentially_flipped_x == flip_axis(x, col_index)).all()) 92 | 93 | 94 | if __name__ == '__main__': 95 | pytest.main([__file__]) 96 | -------------------------------------------------------------------------------- /tests/keras/preprocessing/test_sequence.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import assert_allclose 3 | 4 | import pytest 5 | 6 | from keras.preprocessing.sequence import pad_sequences 7 | from keras.preprocessing.sequence import make_sampling_table 8 | from keras.preprocessing.sequence import skipgrams 9 | 10 | 11 | def test_pad_sequences(): 12 | a = [[1], [1, 2], [1, 2, 3]] 13 | 14 | # test padding 15 | b = pad_sequences(a, maxlen=3, padding='pre') 16 | assert_allclose(b, [[0, 0, 1], [0, 1, 2], [1, 2, 3]]) 17 | b = pad_sequences(a, maxlen=3, padding='post') 18 | assert_allclose(b, [[1, 0, 0], [1, 2, 0], [1, 2, 3]]) 19 | 20 | # test truncating 21 | b = pad_sequences(a, maxlen=2, truncating='pre') 22 | assert_allclose(b, [[0, 1], [1, 2], [2, 3]]) 23 | b = pad_sequences(a, maxlen=2, truncating='post') 24 | assert_allclose(b, [[0, 1], [1, 2], [1, 2]]) 25 | 26 | # test value 27 | b = pad_sequences(a, maxlen=3, value=1) 28 | assert_allclose(b, [[1, 1, 1], [1, 1, 2], [1, 2, 3]]) 29 | 30 | 31 | def test_pad_sequences_vector(): 32 | a = [[[1, 1]], 33 | [[2, 1], [2, 2]], 34 | [[3, 1], [3, 2], [3, 3]]] 35 | 36 | # test padding 37 | b = pad_sequences(a, maxlen=3, padding='pre') 38 | assert_allclose(b, [[[0, 0], [0, 0], [1, 1]], 39 | [[0, 0], [2, 1], [2, 2]], 40 | [[3, 1], [3, 2], [3, 3]]]) 41 | b = pad_sequences(a, maxlen=3, padding='post') 42 | assert_allclose(b, [[[1, 1], [0, 0], [0, 0]], 43 | [[2, 1], [2, 2], [0, 0]], 44 | [[3, 1], [3, 2], [3, 3]]]) 45 | 46 | # test truncating 47 | b = pad_sequences(a, maxlen=2, truncating='pre') 48 | assert_allclose(b, [[[0, 0], [1, 1]], 49 | [[2, 1], [2, 2]], 50 | [[3, 2], [3, 3]]]) 51 | 52 | b = pad_sequences(a, maxlen=2, truncating='post') 53 | assert_allclose(b, [[[0, 0], [1, 1]], 54 | [[2, 1], [2, 2]], 55 | [[3, 1], [3, 2]]]) 56 | 57 | # test value 58 | b = pad_sequences(a, maxlen=3, value=1) 59 | assert_allclose(b, [[[1, 1], [1, 1], [1, 1]], 60 | [[1, 1], [2, 1], [2, 2]], 61 | [[3, 1], [3, 2], [3, 3]]]) 62 | 63 | 64 | def test_make_sampling_table(): 65 | a = make_sampling_table(3) 66 | assert_allclose(a, np.asarray([0.00315225, 0.00315225, 0.00547597]), 67 | rtol=.1) 68 | 69 | 70 | def test_skipgrams(): 71 | # test with no window size and binary labels 72 | couples, labels = skipgrams(np.arange(3), vocabulary_size=3) 73 | for couple in couples: 74 | assert couple[0] in [0, 1, 2] and couple[1] in [0, 1, 2] 75 | 76 | # test window size and categorical labels 77 | couples, labels = skipgrams(np.arange(5), vocabulary_size=5, window_size=1, 78 | categorical=True) 79 | for couple in couples: 80 | assert couple[0] - couple[1] <= 3 81 | for l in labels: 82 | assert len(l) == 2 83 | 84 | 85 | if __name__ == '__main__': 86 | pytest.main([__file__]) 87 | -------------------------------------------------------------------------------- /tests/keras/preprocessing/test_text.py: -------------------------------------------------------------------------------- 1 | from keras.preprocessing.text import Tokenizer, one_hot 2 | import pytest 3 | import numpy as np 4 | 5 | 6 | def test_one_hot(): 7 | text = 'The cat sat on the mat.' 8 | encoded = one_hot(text, 5) 9 | assert len(encoded) == 6 10 | assert np.max(encoded) <= 4 11 | assert np.min(encoded) >= 0 12 | 13 | 14 | def test_tokenizer(): 15 | texts = ['The cat sat on the mat.', 16 | 'The dog sat on the log.', 17 | 'Dogs and cats living together.'] 18 | tokenizer = Tokenizer(nb_words=10) 19 | tokenizer.fit_on_texts(texts) 20 | 21 | sequences = [] 22 | for seq in tokenizer.texts_to_sequences_generator(texts): 23 | sequences.append(seq) 24 | assert np.max(np.max(sequences)) < 10 25 | assert np.min(np.min(sequences)) == 1 26 | 27 | tokenizer.fit_on_sequences(sequences) 28 | 29 | for mode in ['binary', 'count', 'tfidf', 'freq']: 30 | matrix = tokenizer.texts_to_matrix(texts, mode) 31 | 32 | 33 | if __name__ == '__main__': 34 | pytest.main([__file__]) 35 | -------------------------------------------------------------------------------- /tests/keras/test_activations.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | 5 | from keras import backend as K 6 | from keras import activations 7 | 8 | 9 | def get_standard_values(): 10 | ''' 11 | These are just a set of floats used for testing the activation 12 | functions, and are useful in multiple tests. 13 | ''' 14 | return np.array([[0, 0.1, 0.5, 0.9, 1.0]], dtype=K.floatx()) 15 | 16 | 17 | def test_softmax(): 18 | ''' 19 | Test using a reference implementation of softmax 20 | ''' 21 | def softmax(values): 22 | m = np.max(values) 23 | e = np.exp(values - m) 24 | return e / np.sum(e) 25 | 26 | x = K.placeholder(ndim=2) 27 | f = K.function([x], [activations.softmax(x)]) 28 | test_values = get_standard_values() 29 | 30 | result = f([test_values])[0] 31 | expected = softmax(test_values) 32 | assert_allclose(result, expected, rtol=1e-05) 33 | 34 | 35 | def test_time_distributed_softmax(): 36 | x = K.placeholder(shape=(1, 1, 5)) 37 | f = K.function([x], [activations.softmax(x)]) 38 | test_values = get_standard_values() 39 | test_values = np.reshape(test_values, (1, 1, np.size(test_values))) 40 | f([test_values])[0] 41 | 42 | 43 | def test_softplus(): 44 | ''' 45 | Test using a reference softplus implementation 46 | ''' 47 | def softplus(x): 48 | return np.log(np.ones_like(x) + np.exp(x)) 49 | 50 | x = K.placeholder(ndim=2) 51 | f = K.function([x], [activations.softplus(x)]) 52 | test_values = get_standard_values() 53 | 54 | result = f([test_values])[0] 55 | expected = softplus(test_values) 56 | assert_allclose(result, expected, rtol=1e-05) 57 | 58 | 59 | def test_softsign(): 60 | ''' 61 | Test using a reference softsign implementation 62 | ''' 63 | def softsign(x): 64 | return np.divide(x, np.ones_like(x) + np.absolute(x)) 65 | 66 | x = K.placeholder(ndim=2) 67 | f = K.function([x], [activations.softsign(x)]) 68 | test_values = get_standard_values() 69 | 70 | result = f([test_values])[0] 71 | expected = softsign(test_values) 72 | assert_allclose(result, expected, rtol=1e-05) 73 | 74 | 75 | def test_sigmoid(): 76 | ''' 77 | Test using a numerically stable reference sigmoid implementation 78 | ''' 79 | def ref_sigmoid(x): 80 | if x >= 0: 81 | return 1 / (1 + np.exp(-x)) 82 | else: 83 | z = np.exp(x) 84 | return z / (1 + z) 85 | sigmoid = np.vectorize(ref_sigmoid) 86 | 87 | x = K.placeholder(ndim=2) 88 | f = K.function([x], [activations.sigmoid(x)]) 89 | test_values = get_standard_values() 90 | 91 | result = f([test_values])[0] 92 | expected = sigmoid(test_values) 93 | assert_allclose(result, expected, rtol=1e-05) 94 | 95 | 96 | def test_hard_sigmoid(): 97 | ''' 98 | Test using a reference hard sigmoid implementation 99 | ''' 100 | def ref_hard_sigmoid(x): 101 | ''' 102 | Reference hard sigmoid with slope and shift values from theano, see 103 | https://github.com/Theano/Theano/blob/master/theano/tensor/nnet/sigm.py 104 | ''' 105 | x = (x * 0.2) + 0.5 106 | z = 0.0 if x <= 0 else (1.0 if x >= 1 else x) 107 | return z 108 | hard_sigmoid = np.vectorize(ref_hard_sigmoid) 109 | 110 | x = K.placeholder(ndim=2) 111 | f = K.function([x], [activations.hard_sigmoid(x)]) 112 | test_values = get_standard_values() 113 | 114 | result = f([test_values])[0] 115 | expected = hard_sigmoid(test_values) 116 | assert_allclose(result, expected, rtol=1e-05) 117 | 118 | 119 | def test_relu(): 120 | ''' 121 | Relu implementation doesn't depend on the value being 122 | a theano variable. Testing ints, floats and theano tensors. 123 | ''' 124 | x = K.placeholder(ndim=2) 125 | f = K.function([x], [activations.relu(x)]) 126 | 127 | test_values = get_standard_values() 128 | result = f([test_values])[0] 129 | 130 | # because no negatives in test values 131 | assert_allclose(result, test_values, rtol=1e-05) 132 | 133 | 134 | def test_tanh(): 135 | test_values = get_standard_values() 136 | 137 | x = K.placeholder(ndim=2) 138 | exp = activations.tanh(x) 139 | f = K.function([x], [exp]) 140 | 141 | result = f([test_values])[0] 142 | expected = np.tanh(test_values) 143 | assert_allclose(result, expected, rtol=1e-05) 144 | 145 | 146 | def test_linear(): 147 | ''' 148 | This function does no input validation, it just returns the thing 149 | that was passed in. 150 | ''' 151 | xs = [1, 5, True, None, 'foo'] 152 | for x in xs: 153 | assert(x == activations.linear(x)) 154 | 155 | 156 | if __name__ == '__main__': 157 | pytest.main([__file__]) 158 | -------------------------------------------------------------------------------- /tests/keras/test_constraints.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | 5 | from keras import backend as K 6 | from keras import constraints 7 | 8 | 9 | test_values = [0.1, 0.5, 3, 8, 1e-7] 10 | np.random.seed(3537) 11 | example_array = np.random.random((100, 100)) * 100. - 50. 12 | example_array[0, 0] = 0. # 0 could possibly cause trouble 13 | 14 | 15 | def test_maxnorm(): 16 | for m in test_values: 17 | norm_instance = constraints.maxnorm(m) 18 | normed = norm_instance(K.variable(example_array)) 19 | assert(np.all(K.eval(normed) < m)) 20 | 21 | # a more explicit example 22 | norm_instance = constraints.maxnorm(2.0) 23 | x = np.array([[0, 0, 0], [1.0, 0, 0], [3, 0, 0], [3, 3, 3]]).T 24 | x_normed_target = np.array([[0, 0, 0], [1.0, 0, 0], 25 | [2.0, 0, 0], 26 | [2. / np.sqrt(3), 2. / np.sqrt(3), 2. / np.sqrt(3)]]).T 27 | x_normed_actual = K.eval(norm_instance(K.variable(x))) 28 | assert_allclose(x_normed_actual, x_normed_target, rtol=1e-05) 29 | 30 | 31 | def test_nonneg(): 32 | nonneg_instance = constraints.nonneg() 33 | normed = nonneg_instance(K.variable(example_array)) 34 | assert(np.all(np.min(K.eval(normed), axis=1) == 0.)) 35 | 36 | 37 | def test_unitnorm(): 38 | unitnorm_instance = constraints.unitnorm() 39 | normalized = unitnorm_instance(K.variable(example_array)) 40 | norm_of_normalized = np.sqrt(np.sum(K.eval(normalized)**2, axis=0)) 41 | # in the unit norm constraint, it should be equal to 1. 42 | difference = norm_of_normalized - 1. 43 | largest_difference = np.max(np.abs(difference)) 44 | assert(np.abs(largest_difference) < 10e-5) 45 | 46 | 47 | if __name__ == '__main__': 48 | pytest.main([__file__]) 49 | -------------------------------------------------------------------------------- /tests/keras/test_initializations.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from keras import initializations 5 | from keras import backend as K 6 | 7 | # 2D tensor test fixture 8 | FC_SHAPE = (100, 100) 9 | 10 | # 4D convolution in th order. This shape has the same effective shape as FC_SHAPE 11 | CONV_SHAPE = (25, 25, 2, 2) 12 | 13 | # The equivalent shape of both test fixtures 14 | SHAPE = (100, 100) 15 | 16 | def _runner(init, shape, target_mean=None, target_std=None, 17 | target_max=None, target_min=None): 18 | variable = init(shape) 19 | output = K.get_value(variable) 20 | lim = 1e-2 21 | if target_std is not None: 22 | assert abs(output.std() - target_std) < lim 23 | if target_mean is not None: 24 | assert abs(output.mean() - target_mean) < lim 25 | if target_max is not None: 26 | assert abs(output.max() - target_max) < lim 27 | if target_min is not None: 28 | assert abs(output.min() - target_min) < lim 29 | 30 | 31 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 32 | def test_uniform(tensor_shape): 33 | _runner(initializations.uniform, tensor_shape, target_mean=0., 34 | target_max=0.05, target_min=-0.05) 35 | 36 | 37 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 38 | def test_normal(tensor_shape): 39 | _runner(initializations.normal, tensor_shape, target_mean=0., target_std=0.05) 40 | 41 | 42 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 43 | def test_lecun_uniform(tensor_shape): 44 | scale = np.sqrt(3. / SHAPE[0]) 45 | _runner(initializations.lecun_uniform, tensor_shape, 46 | target_mean=0., target_max=scale, target_min=-scale) 47 | 48 | 49 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 50 | def test_glorot_uniform(tensor_shape): 51 | scale = np.sqrt(6. / (SHAPE[0] + SHAPE[1])) 52 | _runner(initializations.glorot_uniform, tensor_shape, target_mean=0., 53 | target_max=scale, target_min=-scale) 54 | 55 | 56 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 57 | def test_glorot_normal(tensor_shape): 58 | scale = np.sqrt(2. / (SHAPE[0] + SHAPE[1])) 59 | _runner(initializations.glorot_normal, tensor_shape, 60 | target_mean=0., target_std=scale) 61 | 62 | 63 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 64 | def test_he_uniform(tensor_shape): 65 | scale = np.sqrt(6. / SHAPE[0]) 66 | _runner(initializations.he_uniform, tensor_shape, target_mean=0., 67 | target_max=scale, target_min=-scale) 68 | 69 | 70 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 71 | def test_he_normal(tensor_shape): 72 | scale = np.sqrt(2. / SHAPE[0]) 73 | _runner(initializations.he_normal, tensor_shape, 74 | target_mean=0., target_std=scale) 75 | 76 | 77 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 78 | def test_orthogonal(tensor_shape): 79 | _runner(initializations.orthogonal, tensor_shape, 80 | target_mean=0.) 81 | 82 | 83 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 84 | def test_identity(tensor_shape): 85 | if len(tensor_shape) > 2: 86 | with pytest.raises(Exception): 87 | _runner(initializations.identity, tensor_shape, 88 | target_mean=1./SHAPE[0], target_max=1.) 89 | else: 90 | _runner(initializations.identity, tensor_shape, 91 | target_mean=1./SHAPE[0], target_max=1.) 92 | 93 | 94 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 95 | def test_zero(tensor_shape): 96 | _runner(initializations.zero, tensor_shape, 97 | target_mean=0., target_max=0.) 98 | 99 | 100 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 101 | def test_one(tensor_shape): 102 | _runner(initializations.one, tensor_shape, 103 | target_mean=1., target_max=1.) 104 | 105 | 106 | if __name__ == '__main__': 107 | pytest.main([__file__]) 108 | -------------------------------------------------------------------------------- /tests/keras/test_metrics.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from keras import metrics 5 | from keras import backend as K 6 | 7 | all_metrics = [ 8 | metrics.binary_accuracy, 9 | metrics.categorical_accuracy, 10 | metrics.mean_squared_error, 11 | metrics.mean_absolute_error, 12 | metrics.mean_absolute_percentage_error, 13 | metrics.mean_squared_logarithmic_error, 14 | metrics.squared_hinge, 15 | metrics.hinge, 16 | metrics.categorical_crossentropy, 17 | metrics.binary_crossentropy, 18 | metrics.poisson, 19 | metrics.cosine_proximity, 20 | ] 21 | 22 | all_sparse_metrics = [ 23 | metrics.sparse_categorical_accuracy, 24 | metrics.sparse_categorical_crossentropy, 25 | ] 26 | 27 | 28 | def test_metrics(): 29 | y_a = K.variable(np.random.random((6, 7))) 30 | y_b = K.variable(np.random.random((6, 7))) 31 | for metric in all_metrics: 32 | output = metric(y_a, y_b) 33 | assert K.eval(output).shape == () 34 | 35 | 36 | def test_sparse_metrics(): 37 | for metric in all_sparse_metrics: 38 | y_a = K.variable(np.random.randint(0, 7, (6,)), dtype=K.floatx()) 39 | y_b = K.variable(np.random.random((6, 7)), dtype=K.floatx()) 40 | assert K.eval(metric(y_a, y_b)).shape == () 41 | 42 | 43 | if __name__ == "__main__": 44 | pytest.main([__file__]) 45 | -------------------------------------------------------------------------------- /tests/keras/test_objectives.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from keras import objectives 5 | from keras import backend as K 6 | 7 | 8 | allobj = [objectives.mean_squared_error, 9 | objectives.mean_absolute_error, 10 | objectives.mean_absolute_percentage_error, 11 | objectives.mean_squared_logarithmic_error, 12 | objectives.squared_hinge, 13 | objectives.hinge, objectives.categorical_crossentropy, 14 | objectives.binary_crossentropy, 15 | objectives.kullback_leibler_divergence, 16 | objectives.poisson, 17 | objectives.cosine_proximity] 18 | 19 | 20 | def test_objective_shapes_3d(): 21 | y_a = K.variable(np.random.random((5, 6, 7))) 22 | y_b = K.variable(np.random.random((5, 6, 7))) 23 | for obj in allobj: 24 | objective_output = obj(y_a, y_b) 25 | assert K.eval(objective_output).shape == (5, 6) 26 | 27 | 28 | def test_objective_shapes_2d(): 29 | y_a = K.variable(np.random.random((6, 7))) 30 | y_b = K.variable(np.random.random((6, 7))) 31 | for obj in allobj: 32 | objective_output = obj(y_a, y_b) 33 | assert K.eval(objective_output).shape == (6,) 34 | 35 | 36 | def test_cce_one_hot(): 37 | y_a = K.variable(np.random.randint(0, 7, (5, 6))) 38 | y_b = K.variable(np.random.random((5, 6, 7))) 39 | objective_output = objectives.sparse_categorical_crossentropy(y_a, y_b) 40 | assert K.eval(objective_output).shape == (5, 6) 41 | 42 | y_a = K.variable(np.random.randint(0, 7, (6,))) 43 | y_b = K.variable(np.random.random((6, 7))) 44 | assert K.eval(objectives.sparse_categorical_crossentropy(y_a, y_b)).shape == (6,) 45 | 46 | 47 | if __name__ == "__main__": 48 | pytest.main([__file__]) 49 | -------------------------------------------------------------------------------- /tests/keras/test_optimizers.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pytest 3 | 4 | from keras.utils.test_utils import get_test_data 5 | from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax, Nadam 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Activation 8 | from keras.utils.np_utils import to_categorical 9 | 10 | 11 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, 12 | nb_test=200, 13 | input_shape=(10,), 14 | classification=True, 15 | nb_class=2) 16 | y_train = to_categorical(y_train) 17 | y_test = to_categorical(y_test) 18 | 19 | 20 | def get_model(input_dim, nb_hidden, output_dim): 21 | model = Sequential() 22 | model.add(Dense(nb_hidden, input_shape=(input_dim,))) 23 | model.add(Activation('relu')) 24 | model.add(Dense(output_dim)) 25 | model.add(Activation('softmax')) 26 | return model 27 | 28 | 29 | def _test_optimizer(optimizer, target=0.89): 30 | model = get_model(X_train.shape[1], 10, y_train.shape[1]) 31 | model.compile(loss='categorical_crossentropy', 32 | optimizer=optimizer, 33 | metrics=['accuracy']) 34 | history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, 35 | validation_data=(X_test, y_test), verbose=2) 36 | config = optimizer.get_config() 37 | assert type(config) == dict 38 | assert history.history['val_acc'][-1] >= target 39 | 40 | 41 | def test_sgd(): 42 | sgd = SGD(lr=0.01, momentum=0.9, nesterov=True) 43 | _test_optimizer(sgd) 44 | 45 | 46 | def test_rmsprop(): 47 | _test_optimizer(RMSprop()) 48 | 49 | 50 | def test_adagrad(): 51 | _test_optimizer(Adagrad()) 52 | 53 | 54 | def test_adadelta(): 55 | _test_optimizer(Adadelta()) 56 | 57 | 58 | def test_adam(): 59 | _test_optimizer(Adam()) 60 | 61 | 62 | def test_adamax(): 63 | _test_optimizer(Adamax()) 64 | 65 | 66 | def test_nadam(): 67 | _test_optimizer(Nadam()) 68 | 69 | 70 | if __name__ == '__main__': 71 | pytest.main([__file__]) 72 | -------------------------------------------------------------------------------- /tests/keras/test_regularizers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | np.random.seed(1337) 4 | 5 | from keras.models import Sequential 6 | from keras.layers import Merge 7 | from keras.layers import Dense 8 | from keras.layers import Activation 9 | from keras.layers import Flatten 10 | from keras.layers import ActivityRegularization 11 | from keras.layers import Embedding 12 | from keras.datasets import mnist 13 | from keras.utils import np_utils 14 | from keras import regularizers 15 | 16 | nb_classes = 10 17 | batch_size = 128 18 | nb_epoch = 5 19 | weighted_class = 9 20 | standard_weight = 1 21 | high_weight = 5 22 | max_train_samples = 5000 23 | max_test_samples = 1000 24 | 25 | 26 | def get_data(): 27 | # the data, shuffled and split between tran and test sets 28 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 29 | X_train = X_train.reshape(60000, 784)[:max_train_samples] 30 | X_test = X_test.reshape(10000, 784)[:max_test_samples] 31 | X_train = X_train.astype("float32") / 255 32 | X_test = X_test.astype("float32") / 255 33 | 34 | # convert class vectors to binary class matrices 35 | y_train = y_train[:max_train_samples] 36 | y_test = y_test[:max_test_samples] 37 | Y_train = np_utils.to_categorical(y_train, nb_classes) 38 | Y_test = np_utils.to_categorical(y_test, nb_classes) 39 | test_ids = np.where(y_test == np.array(weighted_class))[0] 40 | 41 | return (X_train, Y_train), (X_test, Y_test), test_ids 42 | 43 | 44 | def create_model(weight_reg=None, activity_reg=None): 45 | model = Sequential() 46 | model.add(Dense(50, input_shape=(784,))) 47 | model.add(Activation('relu')) 48 | model.add(Dense(10, W_regularizer=weight_reg, 49 | activity_regularizer=activity_reg)) 50 | model.add(Activation('softmax')) 51 | return model 52 | 53 | 54 | def test_Eigenvalue_reg(): 55 | (X_train, Y_train), (X_test, Y_test), test_ids = get_data() 56 | reg = regularizers.EigenvalueRegularizer(0.01) 57 | model = create_model(weight_reg=reg) 58 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 59 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0) 60 | model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0) 61 | 62 | 63 | def test_W_reg(): 64 | (X_train, Y_train), (X_test, Y_test), test_ids = get_data() 65 | for reg in [regularizers.l1(), 66 | regularizers.l2(), 67 | regularizers.l1l2()]: 68 | model = create_model(weight_reg=reg) 69 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 70 | model.fit(X_train, Y_train, batch_size=batch_size, 71 | nb_epoch=nb_epoch, verbose=0) 72 | model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0) 73 | 74 | 75 | def test_A_reg(): 76 | (X_train, Y_train), (X_test, Y_test), test_ids = get_data() 77 | for reg in [regularizers.activity_l1(), regularizers.activity_l2()]: 78 | model = create_model(activity_reg=reg) 79 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 80 | model.fit(X_train, Y_train, batch_size=batch_size, 81 | nb_epoch=nb_epoch, verbose=0) 82 | model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0) 83 | 84 | 85 | if __name__ == '__main__': 86 | pytest.main([__file__]) 87 | -------------------------------------------------------------------------------- /tests/keras/wrappers/test_scikit_learn.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from keras.utils.test_utils import get_test_data 5 | from keras.utils import np_utils 6 | from keras import backend as K 7 | 8 | from keras.models import Sequential 9 | from keras.layers.core import Dense, Activation 10 | from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor 11 | 12 | np.random.seed(1337) 13 | 14 | input_dim = 10 15 | nb_class = 3 16 | batch_size = 32 17 | nb_epoch = 1 18 | verbosity = 0 19 | optim = 'adam' 20 | loss = 'categorical_crossentropy' 21 | 22 | 23 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=400, 24 | nb_test=200, 25 | input_shape=(input_dim,), 26 | classification=True, 27 | nb_class=nb_class) 28 | y_train = np_utils.to_categorical(y_train, nb_classes=nb_class) 29 | y_test = np_utils.to_categorical(y_test, nb_classes=nb_class) 30 | 31 | 32 | (X_train_reg, y_train_reg), (X_test_reg, y_test_reg) = get_test_data(nb_train=400, 33 | nb_test=200, 34 | input_shape=(input_dim,), 35 | classification=False, 36 | nb_class=1, 37 | output_shape=(1,)) 38 | 39 | 40 | def build_fn_clf(hidden_dims=50): 41 | model = Sequential() 42 | model.add(Dense(input_dim, input_shape=(input_dim,))) 43 | model.add(Activation('relu')) 44 | model.add(Dense(hidden_dims)) 45 | model.add(Activation('relu')) 46 | model.add(Dense(nb_class)) 47 | model.add(Activation('softmax')) 48 | model.compile(optimizer='sgd', loss='categorical_crossentropy', 49 | metrics=['accuracy']) 50 | return model 51 | 52 | 53 | class Class_build_fn_clf(object): 54 | def __call__(self, hidden_dims): 55 | return build_fn_clf(hidden_dims) 56 | 57 | 58 | class Inherit_class_build_fn_clf(KerasClassifier): 59 | def __call__(self, hidden_dims): 60 | return build_fn_clf(hidden_dims) 61 | 62 | 63 | def build_fn_reg(hidden_dims=50): 64 | model = Sequential() 65 | model.add(Dense(input_dim, input_shape=(input_dim,))) 66 | model.add(Activation('relu')) 67 | model.add(Dense(hidden_dims)) 68 | model.add(Activation('relu')) 69 | model.add(Dense(1)) 70 | model.add(Activation('linear')) 71 | model.compile(optimizer='sgd', loss='mean_absolute_error', 72 | metrics=['accuracy']) 73 | return model 74 | 75 | 76 | class Class_build_fn_reg(object): 77 | def __call__(self, hidden_dims): 78 | return build_fn_reg(hidden_dims) 79 | 80 | 81 | class Inherit_class_build_fn_reg(KerasRegressor): 82 | def __call__(self, hidden_dims): 83 | return build_fn_reg(hidden_dims) 84 | 85 | for fn in [build_fn_clf, Class_build_fn_clf(), Inherit_class_build_fn_clf]: 86 | if fn is Inherit_class_build_fn_clf: 87 | classifier = Inherit_class_build_fn_clf( 88 | build_fn=None, hidden_dims=50, batch_size=batch_size, nb_epoch=nb_epoch) 89 | else: 90 | classifier = KerasClassifier( 91 | build_fn=fn, hidden_dims=50, batch_size=batch_size, nb_epoch=nb_epoch) 92 | 93 | classifier.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch) 94 | score = classifier.score(X_train, y_train, batch_size=batch_size) 95 | preds = classifier.predict(X_test, batch_size=batch_size) 96 | proba = classifier.predict_proba(X_test, batch_size=batch_size) 97 | 98 | 99 | for fn in [build_fn_reg, Class_build_fn_reg(), Inherit_class_build_fn_reg]: 100 | if fn is Inherit_class_build_fn_reg: 101 | regressor = Inherit_class_build_fn_reg( 102 | build_fn=None, hidden_dims=50, batch_size=batch_size, nb_epoch=nb_epoch) 103 | else: 104 | regressor = KerasRegressor( 105 | build_fn=fn, hidden_dims=50, batch_size=batch_size, nb_epoch=nb_epoch) 106 | 107 | regressor.fit(X_train_reg, y_train_reg, 108 | batch_size=batch_size, nb_epoch=nb_epoch) 109 | score = regressor.score(X_train_reg, y_train_reg, batch_size=batch_size) 110 | preds = regressor.predict(X_test, batch_size=batch_size) 111 | 112 | 113 | # Usage of sklearn's grid_search 114 | # from sklearn import grid_search 115 | # parameters = dict(hidden_dims = [20, 30], batch_size=[64, 128], nb_epoch=[2], verbose=[0]) 116 | # classifier = Inherit_class_build_fn_clf() 117 | # clf = grid_search.GridSearchCV(classifier, parameters) 118 | # clf.fit(X_train, y_train) 119 | # parameters = dict(hidden_dims = [20, 30], batch_size=[64, 128], nb_epoch=[2], verbose=[0]) 120 | # regressor = Inherit_class_build_fn_reg() 121 | # reg = grid_search.GridSearchCV(regressor, parameters, scoring='mean_squared_error', n_jobs=1, cv=2, verbose=2) 122 | # reg.fit(X_train_reg, y_train_reg) 123 | -------------------------------------------------------------------------------- /tests/test_loss_masking.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from keras.models import Sequential 5 | from keras.engine.training import weighted_objective 6 | from keras.layers.core import TimeDistributedDense, Masking 7 | from keras.utils.test_utils import keras_test 8 | from keras import objectives 9 | from keras import backend as K 10 | 11 | 12 | @keras_test 13 | def test_masking(): 14 | np.random.seed(1337) 15 | X = np.array([[[1], [1]], 16 | [[0], [0]]]) 17 | model = Sequential() 18 | model.add(Masking(mask_value=0, input_shape=(2, 1))) 19 | model.add(TimeDistributedDense(1, init='one')) 20 | model.compile(loss='mse', optimizer='sgd') 21 | y = np.array([[[1], [1]], 22 | [[1], [1]]]) 23 | loss = model.train_on_batch(X, y) 24 | assert loss == 0 25 | 26 | 27 | @keras_test 28 | def test_loss_masking(): 29 | weighted_loss = weighted_objective(objectives.get('mae')) 30 | shape = (3, 4, 2) 31 | X = np.arange(24).reshape(shape) 32 | Y = 2 * X 33 | 34 | # Normally the trailing 1 is added by standardize_weights 35 | weights = np.ones((3,)) 36 | mask = np.ones((3, 4)) 37 | mask[1, 0] = 0 38 | 39 | out = K.eval(weighted_loss(K.variable(X), 40 | K.variable(Y), 41 | K.variable(weights), 42 | K.variable(mask))) 43 | 44 | 45 | if __name__ == '__main__': 46 | pytest.main([__file__]) 47 | --------------------------------------------------------------------------------