├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE.md ├── LICENSE ├── README.md ├── docker ├── Dockerfile ├── Makefile ├── README.md └── theanorc ├── docs ├── README.md ├── autogen.py ├── mkdocs.yml └── templates │ ├── activations.md │ ├── applications.md │ ├── backend.md │ ├── callbacks.md │ ├── constraints.md │ ├── datasets.md │ ├── getting-started │ ├── faq.md │ ├── functional-api-guide.md │ └── sequential-model-guide.md │ ├── index.md │ ├── initializations.md │ ├── layers │ ├── about-keras-layers.md │ └── writing-your-own-keras-layers.md │ ├── metrics.md │ ├── models │ ├── about-keras-models.md │ ├── model.md │ └── sequential.md │ ├── objectives.md │ ├── optimizers.md │ ├── preprocessing │ ├── image.md │ ├── sequence.md │ └── text.md │ ├── regularizers.md │ ├── scikit-learn-api.md │ └── visualization.md ├── examples ├── README.md ├── addition_rnn.py ├── antirectifier.py ├── babi_memnn.py ├── babi_rnn.py ├── cifar10_cnn.py ├── conv_filter_visualization.py ├── deep_dream.py ├── image_ocr.py ├── imdb_bidirectional_lstm.py ├── imdb_cnn.py ├── imdb_cnn_lstm.py ├── imdb_fasttext.py ├── imdb_lstm.py ├── lstm_benchmark.py ├── lstm_text_generation.py ├── mnist_cnn.py ├── mnist_hierarchical_rnn.py ├── mnist_irnn.py ├── mnist_mlp.py ├── mnist_net2net.py ├── mnist_siamese_graph.py ├── mnist_sklearn_wrapper.py ├── mnist_swwae.py ├── mnist_transfer_cnn.py ├── neural_doodle.py ├── neural_style_transfer.py ├── opti │ └── addition_rnn.py ├── pretrained_word_embeddings.py ├── reuters_mlp.py ├── stateful_lstm.py ├── variational_autoencoder.py └── variational_autoencoder_deconv.py ├── keras ├── __init__.py ├── activations.py ├── applications │ ├── __init__.py │ ├── audio_conv_utils.py │ ├── imagenet_utils.py │ ├── inception_v3.py │ ├── music_tagger_crnn.py │ ├── resnet50.py │ ├── vgg16.py │ ├── vgg19.py │ └── xception.py ├── backend │ ├── __init__.py │ ├── common.py │ ├── tensorflow_backend.py │ └── theano_backend.py ├── callbacks.py ├── constraints.py ├── datasets │ ├── __init__.py │ ├── cifar.py │ ├── cifar10.py │ ├── cifar100.py │ ├── data_utils.py │ ├── imdb.py │ ├── mnist.py │ └── reuters.py ├── engine │ ├── __init__.py │ ├── topology.py │ └── training.py ├── initializations.py ├── layers │ ├── __init__.py │ ├── advanced_activations.py │ ├── convolutional.py │ ├── core.py │ ├── embeddings.py │ ├── local.py │ ├── noise.py │ ├── normalization.py │ ├── pooling.py │ ├── recurrent.py │ └── wrappers.py ├── legacy │ ├── __init__.py │ └── models.py ├── metrics.py ├── models.py ├── objectives.py ├── optimizers.py ├── preprocessing │ ├── __init__.py │ ├── image.py │ ├── sequence.py │ └── text.py ├── regularizers.py ├── utils │ ├── __init__.py │ ├── data_utils.py │ ├── generic_utils.py │ ├── io_utils.py │ ├── layer_utils.py │ ├── np_utils.py │ ├── test_utils.py │ └── visualize_util.py └── wrappers │ ├── __init__.py │ └── scikit_learn.py ├── pytest.ini ├── setup.cfg ├── setup.py └── tests ├── integration_tests ├── test_image_data_tasks.py ├── test_temporal_data_tasks.py └── test_vector_data_tasks.py ├── keras ├── backend │ └── test_backends.py ├── datasets │ └── test_datasets.py ├── engine │ ├── test_topology.py │ └── test_training.py ├── layers │ ├── test_advanced_activations.py │ ├── test_convolutional.py │ ├── test_core.py │ ├── test_embeddings.py │ ├── test_local.py │ ├── test_noise.py │ ├── test_normalization.py │ ├── test_recurrent.py │ └── test_wrappers.py ├── preprocessing │ ├── test_image.py │ ├── test_sequence.py │ └── test_text.py ├── test_activations.py ├── test_callbacks.py ├── test_constraints.py ├── test_initializations.py ├── test_metrics.py ├── test_multiprocessing.py ├── test_objectives.py ├── test_optimizers.py ├── test_regularizers.py ├── test_sequential_model.py ├── test_sparse.py └── wrappers │ └── test_scikit_learn.py ├── test_loss_masking.py ├── test_loss_weighting.py └── test_model_saving.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.pyc 3 | *.swp 4 | temp/* 5 | dist/* 6 | build/* 7 | keras/datasets/data/* 8 | keras/datasets/temp/* 9 | docs/site/* 10 | docs/theme/* 11 | tags 12 | Keras.egg-info 13 | 14 | # test-related 15 | .coverage 16 | .cache 17 | 18 | # developer environments 19 | .idea 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | dist: trusty 3 | language: python 4 | matrix: 5 | include: 6 | - python: 3.4 7 | env: KERAS_BACKEND=theano 8 | - python: 3.4 9 | env: KERAS_BACKEND=tensorflow 10 | - python: 2.7 11 | env: KERAS_BACKEND=theano 12 | - python: 2.7 13 | env: KERAS_BACKEND=tensorflow 14 | - python: 2.7 15 | env: KERAS_BACKEND=theano TEST_MODE=INTEGRATION_TESTS 16 | - python: 2.7 17 | env: KERAS_BACKEND=theano TEST_MODE=PEP8 18 | install: 19 | # code below is taken from http://conda.pydata.org/docs/travis.html 20 | # We do this conditionally because it saves us some downloading if the 21 | # version is the same. 22 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 23 | wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; 24 | else 25 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 26 | fi 27 | - bash miniconda.sh -b -p $HOME/miniconda 28 | - export PATH="$HOME/miniconda/bin:$PATH" 29 | - hash -r 30 | - conda config --set always_yes yes --set changeps1 no 31 | - conda update -q conda 32 | # Useful for debugging any issues with conda 33 | - conda info -a 34 | 35 | - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib pandas pytest h5py 36 | - source activate test-environment 37 | - pip install pytest-cov python-coveralls pytest-xdist coverage==3.7.1 #we need this version of coverage for coveralls.io to work 38 | - pip install pep8 pytest-pep8 39 | - pip install git+git://github.com/Theano/Theano.git 40 | 41 | # install PIL for preprocessing tests 42 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 43 | conda install pil; 44 | elif [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then 45 | conda install Pillow; 46 | fi 47 | 48 | - python setup.py install 49 | 50 | # install TensorFlow 51 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 52 | pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl; 53 | elif [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then 54 | pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp34-cp34m-linux_x86_64.whl; 55 | fi 56 | # command to run tests 57 | script: 58 | # run keras backend init to initialize backend config 59 | - python -c "import keras.backend" 60 | # create dataset directory to avoid concurrent directory creation at runtime 61 | - mkdir ~/.keras/datasets 62 | # set up keras backend 63 | - sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json; 64 | - echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)" 65 | - if [[ "$TEST_MODE" == "INTEGRATION_TESTS" ]]; then 66 | PYTHONPATH=$PWD:$PYTHONPATH py.test tests/integration_tests; 67 | elif [[ "$TEST_MODE" == "PEP8" ]]; then 68 | PYTHONPATH=$PWD:$PYTHONPATH py.test --pep8 -m pep8 -n0; 69 | else 70 | PYTHONPATH=$PWD:$PYTHONPATH py.test tests/ --ignore=tests/integration_tests; 71 | fi 72 | after_success: 73 | - coveralls 74 | -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Please make sure that the boxes below are checked before you submit your issue. Thank you! 2 | 3 | - [ ] Check that you are up-to-date with the master branch of Keras. You can update with: 4 | pip install git+git://github.com/fchollet/keras.git --upgrade --no-deps 5 | 6 | - [ ] If running on Theano, check that you are up-to-date with the master branch of Theano. You can update with: 7 | pip install git+git://github.com/Theano/Theano.git --upgrade --no-deps 8 | 9 | - [ ] Provide a link to a GitHub Gist of a Python script that can reproduce your issue (or just copy the script here if it is short). -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | COPYRIGHT 2 | 3 | All contributions by François Chollet: 4 | Copyright (c) 2015, François Chollet. 5 | All rights reserved. 6 | 7 | All contributions by Google: 8 | Copyright (c) 2015, Google, Inc. 9 | All rights reserved. 10 | 11 | All other contributions: 12 | Copyright (c) 2015, the respective contributors. 13 | All rights reserved. 14 | 15 | Each contributor holds copyright over their respective contributions. 16 | The project versioning (Git) records all such contribution source information. 17 | 18 | LICENSE 19 | 20 | The MIT License (MIT) 21 | 22 | Permission is hereby granted, free of charge, to any person obtaining a copy 23 | of this software and associated documentation files (the "Software"), to deal 24 | in the Software without restriction, including without limitation the rights 25 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 26 | copies of the Software, and to permit persons to whom the Software is 27 | furnished to do so, subject to the following conditions: 28 | 29 | The above copyright notice and this permission notice shall be included in all 30 | copies or substantial portions of the Software. 31 | 32 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 33 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 34 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 35 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 36 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 37 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 38 | SOFTWARE. 39 | 40 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:7.5-cudnn5-devel 2 | 3 | ENV CONDA_DIR /opt/conda 4 | ENV PATH $CONDA_DIR/bin:$PATH 5 | 6 | RUN mkdir -p $CONDA_DIR && \ 7 | echo export PATH=$CONDA_DIR/bin:'$PATH' > /etc/profile.d/conda.sh && \ 8 | apt-get update && \ 9 | apt-get install -y wget git libhdf5-dev g++ graphviz && \ 10 | wget --quiet https://repo.continuum.io/miniconda/Miniconda3-3.9.1-Linux-x86_64.sh && \ 11 | echo "6c6b44acdd0bc4229377ee10d52c8ac6160c336d9cdd669db7371aa9344e1ac3 *Miniconda3-3.9.1-Linux-x86_64.sh" | sha256sum -c - && \ 12 | /bin/bash /Miniconda3-3.9.1-Linux-x86_64.sh -f -b -p $CONDA_DIR && \ 13 | rm Miniconda3-3.9.1-Linux-x86_64.sh 14 | 15 | ENV NB_USER keras 16 | ENV NB_UID 1000 17 | 18 | RUN useradd -m -s /bin/bash -N -u $NB_UID $NB_USER && \ 19 | mkdir -p $CONDA_DIR && \ 20 | chown keras $CONDA_DIR -R && \ 21 | mkdir -p /src && \ 22 | chown keras /src 23 | 24 | USER keras 25 | 26 | # Python 27 | ARG python_version=3.5.1 28 | ARG tensorflow_version=0.9.0rc0-cp35-cp35m 29 | RUN conda install -y python=${python_version} && \ 30 | pip install https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-${tensorflow_version}-linux_x86_64.whl && \ 31 | pip install git+git://github.com/Theano/Theano.git && \ 32 | pip install ipdb pytest pytest-cov python-coveralls coverage==3.7.1 pytest-xdist pep8 pytest-pep8 pydot_ng && \ 33 | conda install Pillow scikit-learn notebook pandas matplotlib nose pyyaml six h5py && \ 34 | pip install git+git://github.com/fchollet/keras.git && \ 35 | conda clean -yt 36 | 37 | ADD theanorc /home/keras/.theanorc 38 | 39 | ENV PYTHONPATH='/src/:$PYTHONPATH' 40 | 41 | WORKDIR /src 42 | 43 | EXPOSE 8888 44 | 45 | CMD jupyter notebook --port=8888 --ip=0.0.0.0 46 | 47 | -------------------------------------------------------------------------------- /docker/Makefile: -------------------------------------------------------------------------------- 1 | help: 2 | @cat Makefile 3 | 4 | DATA?="${HOME}/Data" 5 | GPU?=0 6 | DOCKER_FILE=Dockerfile 7 | DOCKER=GPU=$(GPU) nvidia-docker 8 | BACKEND=tensorflow 9 | TEST=tests/ 10 | SRC=$(shell dirname `pwd`) 11 | 12 | build: 13 | docker build -t keras --build-arg python_version=3.5 -f $(DOCKER_FILE) . 14 | 15 | bash: build 16 | $(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras bash 17 | 18 | ipython: build 19 | $(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras ipython 20 | 21 | notebook: build 22 | $(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --net=host --env KERAS_BACKEND=$(BACKEND) keras 23 | 24 | test: build 25 | $(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras py.test $(TEST) 26 | 27 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Using Keras via Docker 2 | 3 | This directory contains `Dockerfile` to make it easy to get up and running with 4 | Keras via [Docker](http://www.docker.com/). 5 | 6 | ## Installing Docker 7 | 8 | General installation instructions are 9 | [on the Docker site](https://docs.docker.com/installation/), but we give some 10 | quick links here: 11 | 12 | * [OSX](https://docs.docker.com/installation/mac/): [docker toolbox](https://www.docker.com/toolbox) 13 | * [ubuntu](https://docs.docker.com/installation/ubuntulinux/) 14 | 15 | ## Running the container 16 | 17 | We are using `Makefile` to simplify docker commands within make commands. 18 | 19 | Build the container and start a jupyter notebook 20 | 21 | $ make notebook 22 | 23 | Build the container and start an iPython shell 24 | 25 | $ make ipython 26 | 27 | Build the container and start a bash 28 | 29 | $ make bash 30 | 31 | For GPU support install NVidia drivers (ideally latest) and 32 | [nvidia-docker](https://github.com/NVIDIA/nvidia-docker). Run using 33 | 34 | $ make notebook GPU=0 # or [ipython, bash] 35 | 36 | Switch between Theano and TensorFlow 37 | 38 | $ make notebook BACKEND=theano 39 | $ make notebook BACKEND=tensorflow 40 | 41 | Mount a volume for external data sets 42 | 43 | $ make DATA=~/mydata 44 | 45 | Prints all make tasks 46 | 47 | $ make help 48 | 49 | You can change Theano parameters by editing `/docker/theanorc`. 50 | 51 | 52 | Note: If you would have a problem running nvidia-docker you may try the old way 53 | we have used. But it is not recommended. If you find a bug in the nvidia-docker report 54 | it there please and try using the nvidia-docker as described above. 55 | 56 | $ export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}') 57 | $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') 58 | $ docker run -it -p 8888:8888 $CUDA_SO $DEVICES gcr.io/tensorflow/tensorflow:latest-gpu 59 | -------------------------------------------------------------------------------- /docker/theanorc: -------------------------------------------------------------------------------- 1 | [global] 2 | floatX = float32 3 | optimizer=None 4 | device = gpu 5 | 6 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Keras Documentation 2 | 3 | The source for Keras documentation is in this directory under `sources/`. 4 | Our documentation uses extended Markdown, as implemented by [MkDocs](http://mkdocs.org). 5 | 6 | ## Building the documentation 7 | 8 | - install MkDocs: `pip install mkdocs` 9 | - `cd` to the `docs/` folder and run: 10 | - `python autogen.py` 11 | - `mkdocs serve` # Starts a local webserver: [localhost:8000](localhost:8000) 12 | - `mkdocs build` # Builds a static site in "site" directory 13 | -------------------------------------------------------------------------------- /docs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Keras Documentation 2 | theme: readthedocs 3 | docs_dir: sources 4 | repo_url: http://github.com/fchollet/keras 5 | site_url: http://keras.io/ 6 | # theme_dir: theme 7 | site_description: 'Documentation for Keras, the Python Deep Learning library.' 8 | 9 | dev_addr: '0.0.0.0:8000' 10 | google_analytics: ['UA-61785484-1', 'keras.io'] 11 | 12 | 13 | pages: 14 | - Home: index.md 15 | - Getting started: 16 | - Guide to the Sequential model: getting-started/sequential-model-guide.md 17 | - Guide to the Functional API: getting-started/functional-api-guide.md 18 | - FAQ: getting-started/faq.md 19 | - Models: 20 | - About Keras models: models/about-keras-models.md 21 | - Sequential: models/sequential.md 22 | - Model (functional API): models/model.md 23 | - Layers: 24 | - About Keras layers: layers/about-keras-layers.md 25 | - Core Layers: layers/core.md 26 | - Convolutional Layers: layers/convolutional.md 27 | - Pooling Layers: layers/pooling.md 28 | - Locally-connected Layers: layers/local.md 29 | - Recurrent Layers: layers/recurrent.md 30 | - Embedding Layers: layers/embeddings.md 31 | - Advanced Activations Layers: layers/advanced-activations.md 32 | - Normalization Layers: layers/normalization.md 33 | - Noise layers: layers/noise.md 34 | - Layer wrappers: layers/wrappers.md 35 | - Writing your own Keras layers: layers/writing-your-own-keras-layers.md 36 | - Preprocessing: 37 | - Sequence Preprocessing: preprocessing/sequence.md 38 | - Text Preprocessing: preprocessing/text.md 39 | - Image Preprocessing: preprocessing/image.md 40 | - Objectives: objectives.md 41 | - Metrics: metrics.md 42 | - Optimizers: optimizers.md 43 | - Activations: activations.md 44 | - Callbacks: callbacks.md 45 | - Datasets: datasets.md 46 | - Applications: applications.md 47 | - Backend: backend.md 48 | - Initializations: initializations.md 49 | - Regularizers: regularizers.md 50 | - Constraints: constraints.md 51 | - Visualization: visualization.md 52 | - Scikit-learn API: scikit-learn-api.md 53 | - Utils: 54 | - Data Utils: utils/data_utils.md 55 | - I/O Utils: utils/io_utils.md 56 | - Layer Utils: utils/layer_utils.md 57 | - Numpy Utils: utils/np_utils.md 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /docs/templates/activations.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of activations 3 | 4 | Activations can either be used through an `Activation` layer, or through the `activation` argument supported by all forward layers: 5 | 6 | ```python 7 | from keras.layers.core import Activation, Dense 8 | 9 | model.add(Dense(64)) 10 | model.add(Activation('tanh')) 11 | ``` 12 | is equivalent to: 13 | ```python 14 | model.add(Dense(64, activation='tanh')) 15 | ``` 16 | 17 | You can also pass an element-wise Theano/TensorFlow function as an activation: 18 | 19 | ```python 20 | from keras import backend as K 21 | 22 | def tanh(x): 23 | return K.tanh(x) 24 | 25 | model.add(Dense(64, activation=tanh)) 26 | model.add(Activation(tanh)) 27 | ``` 28 | 29 | ## Available activations 30 | 31 | - __softmax__: Softmax applied across inputs last dimension. Expects shape either `(nb_samples, nb_timesteps, nb_dims)` or `(nb_samples, nb_dims)`. 32 | - __softplus__ 33 | - __softsign__ 34 | - __relu__ 35 | - __tanh__ 36 | - __sigmoid__ 37 | - __hard_sigmoid__ 38 | - __linear__ 39 | 40 | ## On Advanced Activations 41 | 42 | Activations that are more complex than a simple Theano/TensorFlow function (eg. learnable activations, configurable activations, etc.) are available as [Advanced Activation layers](layers/advanced-activations.md), and can be found in the module `keras.layers.advanced_activations`. These include PReLU and LeakyReLU. 43 | -------------------------------------------------------------------------------- /docs/templates/backend.md: -------------------------------------------------------------------------------- 1 | # Keras backends 2 | 3 | ## What is a "backend"? 4 | 5 | Keras is a model-level library, providing high-level building blocks for developing deep learning models. It does not handle itself low-level operations such as tensor products, convolutions and so on. Instead, it relies on a specialized, well-optimized tensor manipulation library to do so, serving as the "backend engine" of Keras. Rather than picking one single tensor library and making the implementation of Keras tied to that library, Keras handles the problem in a modular way, and several different backend engines can be plugged seamlessly into Keras. 6 | 7 | At this time, Keras has two backend implementations available: the **TensorFlow** backend and the **Theano** backend. 8 | 9 | - [TensorFlow](http://www.tensorflow.org/) is an open-source symbolic tensor manipulation framework developed by Google, Inc. 10 | - [Theano](http://deeplearning.net/software/theano/) is an open-source symbolic tensor manipulation framework developed by LISA/MILA Lab at Université de Montréal. 11 | 12 | In the future, we are likely to add more backend options. If you are interested in developing a new backend, get in touch! 13 | 14 | ---- 15 | 16 | ## Switching from one backend to another 17 | 18 | If you have run Keras at least once, you will find the Keras configuration file at: 19 | 20 | `~/.keras/keras.json` 21 | 22 | If it isn't there, you can create it. 23 | 24 | The default configuration file looks like this: 25 | 26 | ``` 27 | { 28 | "image_dim_ordering": "tf", 29 | "epsilon": 1e-07, 30 | "floatx": "float32", 31 | "backend": "tensorflow" 32 | } 33 | ``` 34 | 35 | Simply change the field `backend` to either `"theano"` or `"tensorflow"`, and Keras will use the new configuration next time you run any Keras code. 36 | 37 | You can also define the environment variable ``KERAS_BACKEND`` and this will 38 | override what is defined in your config file : 39 | 40 | ```bash 41 | KERAS_BACKEND=tensorflow python -c "from keras import backend" 42 | Using TensorFlow backend. 43 | ``` 44 | 45 | ---- 46 | 47 | ## Using the abstract Keras backend to write new code 48 | 49 | If you want the Keras modules you write to be compatible with both Theano and TensorFlow, you have to write them via the abstract Keras backend API. Here's an intro. 50 | 51 | You can import the backend module via: 52 | ```python 53 | from keras import backend as K 54 | ``` 55 | 56 | The code below instantiates an input placeholder. It's equivalent to `tf.placeholder()` or `T.matrix()`, `T.tensor3()`, etc. 57 | 58 | ```python 59 | input = K.placeholder(shape=(2, 4, 5)) 60 | # also works: 61 | input = K.placeholder(shape=(None, 4, 5)) 62 | # also works: 63 | input = K.placeholder(ndim=3) 64 | ``` 65 | 66 | The code below instantiates a shared variable. It's equivalent to `tf.variable()` or `theano.shared()`. 67 | 68 | ```python 69 | val = np.random.random((3, 4, 5)) 70 | var = K.variable(value=val) 71 | 72 | # all-zeros variable: 73 | var = K.zeros(shape=(3, 4, 5)) 74 | # all-ones: 75 | var = K.ones(shape=(3, 4, 5)) 76 | ``` 77 | 78 | Most tensor operations you will need can be done as you would in TensorFlow or Theano: 79 | 80 | ```python 81 | a = b + c * K.abs(d) 82 | c = K.dot(a, K.transpose(b)) 83 | a = K.sum(b, axis=2) 84 | a = K.softmax(b) 85 | a = concatenate([b, c], axis=-1) 86 | # etc... 87 | ``` 88 | 89 | ---- 90 | 91 | ## Backend functions 92 | 93 | 94 | {{autogenerated}} 95 | 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /docs/templates/callbacks.md: -------------------------------------------------------------------------------- 1 | ## Usage of callbacks 2 | 3 | A callback is a set of functions to be applied at given stages of the training procedure. You can use callbacks to get a view on internal states and statistics of the model during training. You can pass a list of callbacks (as the keyword argument `callbacks`) to the `.fit()` method of the `Sequential` model. The relevant methods of the callbacks will then be called at each stage of the training. 4 | 5 | --- 6 | 7 | {{autogenerated}} 8 | 9 | --- 10 | 11 | 12 | # Create a callback 13 | 14 | You can create a custom callback by extending the base class `keras.callbacks.Callback`. A callback has access to its associated model through the class property `self.model`. 15 | 16 | Here's a simple example saving a list of losses over each batch during training: 17 | ```python 18 | class LossHistory(keras.callbacks.Callback): 19 | def on_train_begin(self, logs={}): 20 | self.losses = [] 21 | 22 | def on_batch_end(self, batch, logs={}): 23 | self.losses.append(logs.get('loss')) 24 | ``` 25 | 26 | --- 27 | 28 | ### Example: recording loss history 29 | 30 | ```python 31 | class LossHistory(keras.callbacks.Callback): 32 | def on_train_begin(self, logs={}): 33 | self.losses = [] 34 | 35 | def on_batch_end(self, batch, logs={}): 36 | self.losses.append(logs.get('loss')) 37 | 38 | model = Sequential() 39 | model.add(Dense(10, input_dim=784, init='uniform')) 40 | model.add(Activation('softmax')) 41 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 42 | 43 | history = LossHistory() 44 | model.fit(X_train, Y_train, batch_size=128, nb_epoch=20, verbose=0, callbacks=[history]) 45 | 46 | print history.losses 47 | # outputs 48 | ''' 49 | [0.66047596406559383, 0.3547245744908703, ..., 0.25953155204159617, 0.25901699725311789] 50 | ''' 51 | ``` 52 | 53 | --- 54 | 55 | ### Example: model checkpoints 56 | 57 | ```python 58 | from keras.callbacks import ModelCheckpoint 59 | 60 | model = Sequential() 61 | model.add(Dense(10, input_dim=784, init='uniform')) 62 | model.add(Activation('softmax')) 63 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 64 | 65 | ''' 66 | saves the model weights after each epoch if the validation loss decreased 67 | ''' 68 | checkpointer = ModelCheckpoint(filepath="/tmp/weights.hdf5", verbose=1, save_best_only=True) 69 | model.fit(X_train, Y_train, batch_size=128, nb_epoch=20, verbose=0, validation_data=(X_test, Y_test), callbacks=[checkpointer]) 70 | 71 | ``` 72 | 73 | -------------------------------------------------------------------------------- /docs/templates/constraints.md: -------------------------------------------------------------------------------- 1 | ## Usage of constraints 2 | 3 | Functions from the `constraints` module allow setting constraints (eg. non-negativity) on network parameters during optimization. 4 | 5 | The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D` and `Convolution2D` have a unified API. 6 | 7 | These layers expose 2 keyword arguments: 8 | 9 | - `W_constraint` for the main weights matrix 10 | - `b_constraint` for the bias. 11 | 12 | 13 | ```python 14 | from keras.constraints import maxnorm 15 | model.add(Dense(64, W_constraint = maxnorm(2))) 16 | ``` 17 | 18 | ## Available constraints 19 | 20 | - __maxnorm__(m=2): maximum-norm constraint 21 | - __nonneg__(): non-negativity constraint 22 | - __unitnorm__(): unit-norm constraint, enforces the matrix to have unit norm along the last axis -------------------------------------------------------------------------------- /docs/templates/initializations.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of initializations 3 | 4 | Initializations define the way to set the initial random weights of Keras layers. 5 | 6 | The keyword arguments used for passing initializations to layers will depend on the layer. Usually it is simply `init`: 7 | 8 | ```python 9 | model.add(Dense(64, init='uniform')) 10 | ``` 11 | 12 | ## Available initializations 13 | 14 | - __uniform__ 15 | - __lecun_uniform__: Uniform initialization scaled by the square root of the number of inputs (LeCun 98). 16 | - __normal__ 17 | - __identity__: Use with square 2D layers (`shape[0] == shape[1]`). 18 | - __orthogonal__: Use with square 2D layers (`shape[0] == shape[1]`). 19 | - __zero__ 20 | - __glorot_normal__: Gaussian initialization scaled by fan_in + fan_out (Glorot 2010) 21 | - __glorot_uniform__ 22 | - __he_normal__: Gaussian initialization scaled by fan_in (He et al., 2014) 23 | - __he_uniform__ 24 | 25 | 26 | An initialization may be passed as a string (must match one of the available initializations above), or as a callable. 27 | If a callable, then it must take two arguments: `shape` (shape of the variable to initialize) and `name` (name of the variable), 28 | and it must return a variable (e.g. output of `K.variable()`): 29 | 30 | ```python 31 | from keras import backend as K 32 | import numpy as np 33 | 34 | def my_init(shape, name=None): 35 | value = np.random.random(shape) 36 | return K.variable(value, name=name) 37 | 38 | model.add(Dense(64, init=my_init)) 39 | ``` 40 | 41 | You could also use functions from `keras.initializations` in this way: 42 | 43 | ```python 44 | from keras import initializations 45 | 46 | def my_init(shape, name=None): 47 | return initializations.normal(shape, scale=0.01, name=name) 48 | 49 | model.add(Dense(64, init=my_init)) 50 | ``` -------------------------------------------------------------------------------- /docs/templates/layers/about-keras-layers.md: -------------------------------------------------------------------------------- 1 | # About Keras layers 2 | 3 | All Keras layers have a number of methods in common: 4 | 5 | - `layer.get_weights()`: returns the weights of the layer as a list of Numpy arrays. 6 | - `layer.set_weights(weights)`: sets the weights of the layer from a list of Numpy arrays (with the same shapes as the output of `get_weights`). 7 | - `layer.get_config()`: returns a dictionary containing the configuration of the layer. The layer can be reinstantiated from its config via: 8 | ```python 9 | from keras.utils.layer_utils import layer_from_config 10 | 11 | config = layer.get_config() 12 | layer = layer_from_config(config) 13 | ``` 14 | 15 | If a layer has a single node (i.e. if it isn't a shared layer), you can get its input tensor, output tensor, input shape and output shape via: 16 | 17 | - `layer.input` 18 | - `layer.output` 19 | - `layer.input_shape` 20 | - `layer.output_shape` 21 | 22 | If the layer has multiple nodes (see: [the concept of layer node and shared layers](/getting-started/functional-api-guide/#the-concept-of-layer-node)), you can use the following methods: 23 | 24 | - `layer.get_input_at(node_index)` 25 | - `layer.get_output_at(node_index)` 26 | - `layer.get_input_shape_at(node_index)` 27 | - `layer.get_output_shape_at(node_index)` -------------------------------------------------------------------------------- /docs/templates/layers/writing-your-own-keras-layers.md: -------------------------------------------------------------------------------- 1 | # Writing your own Keras layers 2 | 3 | For simple, stateless custom operations, you are probably better off using `layers.core.Lambda` layers. But for any custom operation that has trainable weights, you should implement your own layer. 4 | 5 | Here is the skeleton of a Keras layer. There are only three methods you need to implement: 6 | 7 | - `build(input_shape)`: this is where you will define your weights. Trainable weights should be added to the list `self.trainable_weights`. Other attributes of note are: `self.non_trainable_weights` (list) and `self.updates` (list of update tuples (tensor, new_tensor)). For an example of how to use `non_trainable_weights` and `updates`, see the code for the `BatchNormalization` layer. 8 | - `call(x)`: this is where the layer's logic lives. Unless you want your layer to support masking, you only have to care about the first argument passed to `call`: the input tensor. 9 | - `get_output_shape_for(input_shape)`: in case your layer modifies the shape of its input, you should specify here the shape transformation logic. This allows Keras to do automatic shape inference. 10 | 11 | ```python 12 | from keras import backend as K 13 | from keras.engine.topology import Layer 14 | import numpy as np 15 | 16 | class MyLayer(Layer): 17 | def __init__(self, output_dim, **kwargs): 18 | self.output_dim = output_dim 19 | super(MyLayer, self).__init__(**kwargs) 20 | 21 | def build(self, input_shape): 22 | input_dim = input_shape[1] 23 | initial_weight_value = np.random.random((input_dim, output_dim)) 24 | self.W = K.variable(initial_weight_value) 25 | self.trainable_weights = [self.W] 26 | 27 | def call(self, x, mask=None): 28 | return K.dot(x, self.W) 29 | 30 | def get_output_shape_for(self, input_shape): 31 | return (input_shape[0], self.output_dim) 32 | ``` 33 | 34 | The existing Keras layers provide ample examples of how to implement almost anything. Never hesitate to read the source code! -------------------------------------------------------------------------------- /docs/templates/metrics.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of metrics 3 | 4 | A metric is a function that is used to judge the performance of your model. Metric functions are to be supplied in the `metrics` parameter when a model is compiled. 5 | 6 | A metric function is similar to an [objective function](/objectives), except that the results from evaluating a metric are not used when training the model. 7 | 8 | You can either pass the name of an existing metric, or pass a Theano/TensorFlow symbolic function (see [Custom metrics](#custom-metrics)). 9 | 10 | #### Arguments 11 | - __y_true__: True labels. Theano/TensorFlow tensor. 12 | - __y_pred__: Predictions. Theano/TensorFlow tensor of the same shape as y_true. 13 | 14 | #### Returns 15 | Single tensor value representing the mean of the output array across all 16 | datapoints. 17 | 18 | ---- 19 | 20 | ## Available metrics 21 | 22 | 23 | {{autogenerated}} 24 | 25 | ---- 26 | 27 | ## Custom metrics 28 | 29 | Custom metrics can be defined and passed via the compilation step. The 30 | function would need to take `(y_true, y_pred)` as arguments and return 31 | either a single tensor value or a dict `metric_name -> metric_value`. 32 | 33 | ```python 34 | # for custom metrics 35 | import keras.backend as K 36 | 37 | def mean_pred(y_true, y_pred): 38 | return K.mean(y_pred) 39 | 40 | def false_rates(y_true, y_pred): 41 | false_neg = ... 42 | false_pos = ... 43 | return { 44 | 'false_neg': false_neg, 45 | 'false_pos': false_pos, 46 | } 47 | 48 | model.compile(optimizer='rmsprop', 49 | loss='binary_crossentropy', 50 | metrics=['accuracy', mean_pred, false_rates]) 51 | ``` 52 | -------------------------------------------------------------------------------- /docs/templates/models/about-keras-models.md: -------------------------------------------------------------------------------- 1 | # About Keras models 2 | 3 | There are two types of models available in Keras: [the Sequential model](/models/sequential) and [the Model class used with functional API](/models/model). 4 | 5 | These models have a number of methods in common: 6 | 7 | - `model.summary()`: prints a summary representation of your model. 8 | - `model.get_config()`: returns a dictionary containing the configuration of the model. The model can be reinstantiated from its config via: 9 | ```python 10 | config = model.get_config() 11 | model = Model.from_config(config) 12 | # or, for Sequential: 13 | model = Sequential.from_config(config) 14 | ``` 15 | 16 | - `model.get_weights()`: returns a list of all weight tensors in the model, as Numpy arrays. 17 | - `model.set_weights(weights)`: sets the values of the weights of the model, from a list of Numpy arrays. The arrays in the list should have the same shape as those returned by `get_weights()`. 18 | - `model.to_json()`: returns a representation of the model as a JSON string. Note that the representation does not include the weights, only the architecture. You can reinstantiate the same model (with reinitialized weights) from the JSON string via: 19 | ```python 20 | from models import model_from_json 21 | 22 | json_string = model.to_json() 23 | model = model_from_json(json_string) 24 | ``` 25 | - `model.to_yaml()`: returns a representation of the model as a YAML string. Note that the representation does not include the weights, only the architecture. You can reinstantiate the same model (with reinitialized weights) from the YAML string via: 26 | ```python 27 | from models import model_from_yaml 28 | 29 | yaml_string = model.to_yaml() 30 | model = model_from_yaml(yaml_string) 31 | ``` 32 | - `model.save_weights(filepath)`: saves the weights of the model as a HDF5 file. 33 | - `model.load_weights(filepath, by_name=False)`: loads the weights of the model from a HDF5 file (created by `save_weights`). By default, the architecture is expected to be unchanged. To load weights into a different architecture (with some layers in common), use `by_name=True` to load only those layers with the same name. -------------------------------------------------------------------------------- /docs/templates/models/model.md: -------------------------------------------------------------------------------- 1 | # Model class API 2 | 3 | In the functional API, given an input tensor and output tensor, you can instantiate a `Model` via: 4 | 5 | ```python 6 | from keras.models import Model 7 | from keras.layers import Input, Dense 8 | 9 | a = Input(shape=(32,)) 10 | b = Dense(32)(a) 11 | model = Model(input=a, output=b) 12 | ``` 13 | 14 | This model will include all layers required in the computation of `b` given `a`. 15 | 16 | In the case of multi-input or multi-output models, you can use lists as well: 17 | 18 | ```python 19 | model = Model(input=[a1, a2], output=[b1, b3, b3]) 20 | ``` 21 | 22 | For a detailed introduction of what `Model` can do, read [this guide to the Keras functional API](/getting-started/functional-api-guide). 23 | 24 | ## Useful attributes of Model 25 | 26 | - `model.layers` is a flattened list of the layers comprising the model graph. 27 | - `model.inputs` is the list of input tensors. 28 | - `model.outputs` is the list of output tensors. 29 | 30 | ## Methods 31 | 32 | {{autogenerated}} 33 | -------------------------------------------------------------------------------- /docs/templates/models/sequential.md: -------------------------------------------------------------------------------- 1 | # The Sequential model API 2 | 3 | To get started, read [this guide to the Keras Sequential model](/getting-started/sequential-model-guide). 4 | 5 | ## Useful attributes of Model 6 | 7 | - `model.layers` is a list of the layers added to the model. 8 | 9 | 10 | ---- 11 | 12 | ## Sequential model methods 13 | 14 | {{autogenerated}} -------------------------------------------------------------------------------- /docs/templates/objectives.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of objectives 3 | 4 | An objective function (or loss function, or optimization score function) is one of the two parameters required to compile a model: 5 | 6 | ```python 7 | model.compile(loss='mean_squared_error', optimizer='sgd') 8 | ``` 9 | 10 | You can either pass the name of an existing objective, or pass a Theano/TensorFlow symbolic function that returns a scalar for each data-point and takes the following two arguments: 11 | 12 | - __y_true__: True labels. Theano/TensorFlow tensor. 13 | - __y_pred__: Predictions. Theano/TensorFlow tensor of the same shape as y_true. 14 | 15 | The actual optimized objective is the mean of the output array across all datapoints. 16 | 17 | For a few examples of such functions, check out the [objectives source](https://github.com/fchollet/keras/blob/master/keras/objectives.py). 18 | 19 | ## Available objectives 20 | 21 | - __mean_squared_error__ / __mse__ 22 | - __mean_absolute_error__ / __mae__ 23 | - __mean_absolute_percentage_error__ / __mape__ 24 | - __mean_squared_logarithmic_error__ / __msle__ 25 | - __squared_hinge__ 26 | - __hinge__ 27 | - __binary_crossentropy__: Also known as logloss. 28 | - __categorical_crossentropy__: Also known as multiclass logloss. __Note__: using this objective requires that your labels are binary arrays of shape `(nb_samples, nb_classes)`. 29 | - __sparse_categorical_crossentropy__: As above but accepts sparse labels. __Note__: this objective still requires that your labels have the same number of dimensions as your outputs; you may need to add a length-1 dimension to the shape of your labels, e.g with `np.expand_dims(y, -1)`. 30 | - __kullback_leibler_divergence__ / __kld__: Information gain from a predicted probability distribution Q to a true probability distribution P. Gives a measure of difference between both distributions. 31 | - __poisson__: Mean of `(predictions - targets * log(predictions))` 32 | - __cosine_proximity__: The opposite (negative) of the mean cosine proximity between predictions and targets. 33 | 34 | **Note**: when using the `categorical_crossentropy` objective, your targets should be in categorical format (e.g. if you have 10 classes, the target for each sample should be a 10-dimensional vector that is all-zeros expect for a 1 at the index corresponding to the class of the sample). In order to convert *integer targets* into *categorical targets*, you can use the Keras utility `to_categorical`: 35 | 36 | ```python 37 | from keras.utils.np_utils import to_categorical 38 | 39 | categorical_labels = to_categorical(int_labels, nb_classes=None) 40 | ``` 41 | -------------------------------------------------------------------------------- /docs/templates/optimizers.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of optimizers 3 | 4 | An optimizer is one of the two arguments required for compiling a Keras model: 5 | 6 | ```python 7 | model = Sequential() 8 | model.add(Dense(64, init='uniform', input_dim=10)) 9 | model.add(Activation('tanh')) 10 | model.add(Activation('softmax')) 11 | 12 | sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) 13 | model.compile(loss='mean_squared_error', optimizer=sgd) 14 | ``` 15 | 16 | You can either instantiate an optimizer before passing it to `model.compile()` , as in the above example, or you can call it by its name. In the latter case, the default parameters for the optimizer will be used. 17 | 18 | ```python 19 | # pass optimizer by name: default parameters will be used 20 | model.compile(loss='mean_squared_error', optimizer='sgd') 21 | ``` 22 | 23 | --- 24 | 25 | ## Parameters common to all Keras optimizers 26 | 27 | The parameters `clipnorm` and `clipvalue` can be used with all optimizers to control gradient clipping: 28 | 29 | ```python 30 | # all parameter gradients will be clipped to 31 | # a maximum norm of 1. 32 | sgd = SGD(lr=0.01, clipnorm=1.) 33 | ``` 34 | 35 | ```python 36 | # all parameter gradients will be clipped to 37 | # a maximum value of 0.5 and 38 | # a minimum value of -0.5. 39 | sgd = SGD(lr=0.01, clipvalue=0.5) 40 | ``` 41 | 42 | --- 43 | 44 | {{autogenerated}} -------------------------------------------------------------------------------- /docs/templates/preprocessing/sequence.md: -------------------------------------------------------------------------------- 1 | ## pad_sequences 2 | 3 | ```python 4 | keras.preprocessing.sequence.pad_sequences(sequences, maxlen=None, dtype='int32') 5 | ``` 6 | 7 | Transform a list of `nb_samples sequences` (lists of scalars) into a 2D Numpy array of shape `(nb_samples, nb_timesteps)`. `nb_timesteps` is either the `maxlen` argument if provided, or the length of the longest sequence otherwise. Sequences that are shorter than `nb_timesteps` are padded with zeros at the end. 8 | 9 | - __Return__: 2D Numpy array of shape `(nb_samples, nb_timesteps)`. 10 | 11 | - __Arguments__: 12 | - __sequences__: List of lists of int or float. 13 | - __maxlen__: None or int. Maximum sequence length, longer sequences are truncated and shorter sequences are padded with zeros at the end. 14 | - __dtype__: datatype of the Numpy array returned. 15 | - __padding__: 'pre' or 'post', pad either before or after each sequence. 16 | - __truncating__: 'pre' or 'post', remove values from sequences larger than maxlen either in the beginning or in the end of the sequence 17 | - __value__: float, value to pad the sequences to the desired value. 18 | 19 | --- 20 | 21 | ## skipgrams 22 | 23 | ```python 24 | keras.preprocessing.sequence.skipgrams(sequence, vocabulary_size, 25 | window_size=4, negative_samples=1., shuffle=True, 26 | categorical=False, sampling_table=None) 27 | ``` 28 | 29 | Transforms a sequence of word indexes (list of int) into couples of the form: 30 | 31 | - (word, word in the same window), with label 1 (positive samples). 32 | - (word, random word from the vocabulary), with label 0 (negative samples). 33 | 34 | Read more about Skipgram in this gnomic paper by Mikolov et al.: [Efficient Estimation of Word Representations in 35 | Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf) 36 | 37 | - __Return__: tuple `(couples, labels)`. 38 | - `couples` is a list of 2-elements lists of int: `[word_index, other_word_index]`. 39 | - `labels` is a list of 0 and 1, where 1 indicates that `other_word_index` was found in the same window as `word_index`, and 0 indicates that `other_word_index` was random. 40 | - if categorical is set to True, the labels are categorical, ie. 1 becomes [0,1], and 0 becomes [1, 0]. 41 | 42 | - __Arguments__: 43 | - __sequence__: list of int indexes. If using a sampling_table, the index of a word should be its the rank in the dataset (starting at 1). 44 | - __vocabulary_size__: int. 45 | - __window_size__: int. maximum distance between two words in a positive couple. 46 | - __negative_samples__: float >= 0. 0 for no negative (=random) samples. 1 for same number as positive samples. etc. 47 | - __shuffle__: boolean. Whether to shuffle the samples. 48 | - __categorical__: boolean. Whether to make the returned labels categorical. 49 | - __sampling_table__: Numpy array of shape `(vocabulary_size,)` where `sampling_table[i]` is the probability of sampling the word with index i (assumed to be i-th most common word in the dataset). 50 | 51 | 52 | --- 53 | 54 | ## make_sampling_table 55 | 56 | ```python 57 | keras.preprocessing.sequence.make_sampling_table(size, sampling_factor=1e-5) 58 | ``` 59 | 60 | Used for generating the `sampling_table` argument for `skipgrams`. `sampling_table[i]` is the probability of sampling the word i-th most common word in a dataset (more common words should be sampled less frequently, for balance). 61 | 62 | - __Return__: Numpy array of shape `(size,)`. 63 | 64 | - __Arguments__: 65 | - __size__: size of the vocabulary considered. 66 | - __sampling_factor__: lower values result in a longer probability decay (common words will be sampled less frequently). If set to 1, no subsampling will be performed (all sampling probabilities will be 1). 67 | -------------------------------------------------------------------------------- /docs/templates/preprocessing/text.md: -------------------------------------------------------------------------------- 1 | 2 | ## text_to_word_sequence 3 | 4 | ```python 5 | keras.preprocessing.text.text_to_word_sequence(text, 6 | filters=base_filter(), lower=True, split=" ") 7 | ``` 8 | 9 | Split a sentence into a list of words. 10 | 11 | - __Return__: List of words (str). 12 | 13 | - __Arguments__: 14 | - __text__: str. 15 | - __filters__: list (or concatenation) of characters to filter out, such as punctuation. Default: base_filter(), includes basic punctuation, tabs, and newlines. 16 | - __lower__: boolean. Whether to set the text to lowercase. 17 | - __split__: str. Separator for word splitting. 18 | 19 | ## one_hot 20 | 21 | ```python 22 | keras.preprocessing.text.one_hot(text, n, 23 | filters=base_filter(), lower=True, split=" ") 24 | ``` 25 | 26 | One-hot encode a text into a list of word indexes in a vocabulary of size n. 27 | 28 | - __Return__: List of integers in [1, n]. Each integer encodes a word (unicity non-guaranteed). 29 | 30 | - __Arguments__: Same as `text_to_word_sequence` above. 31 | - __n__: int. Size of vocabulary. 32 | 33 | ## Tokenizer 34 | 35 | ```python 36 | keras.preprocessing.text.Tokenizer(nb_words=None, filters=base_filter(), 37 | lower=True, split=" ") 38 | ``` 39 | 40 | Class for vectorizing texts, or/and turning texts into sequences (=list of word indexes, where the word of rank i in the dataset (starting at 1) has index i). 41 | 42 | - __Arguments__: Same as `text_to_word_sequence` above. 43 | - __nb_words__: None or int. Maximum number of words to work with (if set, tokenization will be restricted to the top nb_words most common words in the dataset). 44 | 45 | - __Methods__: 46 | 47 | - __fit_on_texts(texts)__: 48 | - __Arguments__: 49 | - __texts__: list of texts to train on. 50 | 51 | - __texts_to_sequences(texts)__ 52 | - __Arguments__: 53 | - __texts__: list of texts to turn to sequences. 54 | - __Return__: list of sequences (one per text input). 55 | 56 | - __texts_to_sequences_generator(texts)__: generator version of the above. 57 | - __Return__: yield one sequence per input text. 58 | 59 | - __texts_to_matrix(texts)__: 60 | - __Return__: numpy array of shape `(len(texts), nb_words)`. 61 | - __Arguments__: 62 | - __texts__: list of texts to vectorize. 63 | - __mode__: one of "binary", "count", "tfidf", "freq" (default: "binary"). 64 | 65 | - __fit_on_sequences(sequences)__: 66 | - __Arguments__: 67 | - __sequences__: list of sequences to train on. 68 | 69 | - __sequences_to_matrix(sequences)__: 70 | - __Return__: numpy array of shape `(len(sequences), nb_words)`. 71 | - __Arguments__: 72 | - __sequences__: list of sequences to vectorize. 73 | - __mode__: one of "binary", "count", "tfidf", "freq" (default: "binary"). 74 | 75 | - __Attributes__: 76 | - __word_counts__: dictionary mapping words (str) to the number of times they appeared on during fit. Only set after fit_on_texts was called. 77 | - __word_docs__: dictionary mapping words (str) to the number of documents/texts they appeared on during fit. Only set after fit_on_texts was called. 78 | - __word_index__: dictionary mapping words (str) to their rank/index (int). Only set after fit_on_texts was called. 79 | - __document_count__: int. Number of documents (texts/sequences) the tokenizer was trained on. Only set after fit_on_texts or fit_on_sequences was called. 80 | 81 | 82 | -------------------------------------------------------------------------------- /docs/templates/regularizers.md: -------------------------------------------------------------------------------- 1 | ## Usage of regularizers 2 | 3 | Regularizers allow to apply penalties on layer parameters or layer activity during optimization. These penalties are incorporated in the loss function that the network optimizes. 4 | 5 | The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D` and `Convolution2D` have a unified API. 6 | 7 | These layers expose 3 keyword arguments: 8 | 9 | - `W_regularizer`: instance of `keras.regularizers.WeightRegularizer` 10 | - `b_regularizer`: instance of `keras.regularizers.WeightRegularizer` 11 | - `activity_regularizer`: instance of `keras.regularizers.ActivityRegularizer` 12 | 13 | 14 | ## Example 15 | 16 | ```python 17 | from keras.regularizers import l2, activity_l2 18 | model.add(Dense(64, input_dim=64, W_regularizer=l2(0.01), activity_regularizer=activity_l2(0.01))) 19 | ``` 20 | 21 | ## Available penalties 22 | 23 | ```python 24 | keras.regularizers.WeightRegularizer(l1=0., l2=0.) 25 | ``` 26 | 27 | ```python 28 | keras.regularizers.ActivityRegularizer(l1=0., l2=0.) 29 | ``` 30 | 31 | ## Shortcuts 32 | 33 | These are shortcut functions available in `keras.regularizers`. 34 | 35 | - __l1__(l=0.01): L1 weight regularization penalty, also known as LASSO 36 | - __l2__(l=0.01): L2 weight regularization penalty, also known as weight decay, or Ridge 37 | - __l1l2__(l1=0.01, l2=0.01): L1-L2 weight regularization penalty, also known as ElasticNet 38 | - __activity_l1__(l=0.01): L1 activity regularization 39 | - __activity_l2__(l=0.01): L2 activity regularization 40 | - __activity_l1l2__(l1=0.01, l2=0.01): L1+L2 activity regularization 41 | -------------------------------------------------------------------------------- /docs/templates/scikit-learn-api.md: -------------------------------------------------------------------------------- 1 | # Wrappers for the Scikit-Learn API 2 | 3 | You can use `Sequential` Keras models (single-input only) as part of your Scikit-Learn workflow via the wrappers found at `keras.wrappers.scikit_learn.py`. 4 | 5 | There are two wrappers available: 6 | 7 | `keras.wrappers.scikit_learn.KerasClassifier(build_fn=None, **sk_params)`, which implements the Scikit-Learn classifier interface, 8 | 9 | `keras.wrappers.scikit_learn.KerasRegressor(build_fn=None, **sk_params)`, which implements the Scikit-Learn regressor interface. 10 | 11 | ### Arguments 12 | 13 | - __build_fn__: callable function or class instance 14 | - __sk_params__: model parameters & fitting parameters 15 | 16 | `build_fn` should construct, compile and return a Keras model, which 17 | will then be used to fit/predict. One of the following 18 | three values could be passed to build_fn: 19 | 20 | 1. A function 21 | 2. An instance of a class that implements the __call__ method 22 | 3. None. This means you implement a class that inherits from either 23 | `KerasClassifier` or `KerasRegressor`. The __call__ method of the 24 | present class will then be treated as the default build_fn. 25 | 26 | `sk_params` takes both model parameters and fitting parameters. Legal model 27 | parameters are the arguments of `build_fn`. Note that like all other 28 | estimators in scikit-learn, 'build_fn' should provide default values for 29 | its arguments, so that you could create the estimator without passing any 30 | values to `sk_params`. 31 | 32 | `sk_params` could also accept parameters for calling `fit`, `predict`, 33 | `predict_proba`, and `score` methods (e.g., `nb_epoch`, `batch_size`). 34 | fitting (predicting) parameters are selected in the following order: 35 | 36 | 1. Values passed to the dictionary arguments of 37 | `fit`, `predict`, `predict_proba`, and `score` methods 38 | 2. Values passed to `sk_params` 39 | 3. The default values of the `keras.models.Sequential` 40 | `fit`, `predict`, `predict_proba` and `score` methods 41 | 42 | When using scikit-learn's `grid_search` API, legal tunable parameters are 43 | those you could pass to `sk_params`, including fitting parameters. 44 | In other words, you could use `grid_search` to search for the best 45 | `batch_size` or `nb_epoch` as well as the model parameters. 46 | -------------------------------------------------------------------------------- /docs/templates/visualization.md: -------------------------------------------------------------------------------- 1 | 2 | ## Model visualization 3 | 4 | The `keras.utils.visualize_util` module provides utility functions to plot 5 | a Keras model (using graphviz). 6 | 7 | This will plot a graph of the model and save it to a file: 8 | ```python 9 | from keras.utils.visualize_util import plot 10 | plot(model, to_file='model.png') 11 | ``` 12 | 13 | `plot` takes two optional arguments: 14 | 15 | - `show_shapes` (defaults to False) controls whether output shapes are shown in the graph. 16 | - `show_layer_names` (defaults to True) controls whether layer names are shown in the graph. 17 | 18 | You can also directly obtain the `pydot.Graph` object and render it yourself, 19 | for example to show it in an ipython notebook : 20 | ```python 21 | from IPython.display import SVG 22 | from keras.utils.visualize_util import model_to_dot 23 | 24 | SVG(model_to_dot(model).create(prog='dot', format='svg')) 25 | ``` 26 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Keras examples directory 2 | 3 | [addition_rnn.py](addition_rnn.py) 4 | Implementation of sequence to sequence learning for performing addition of two numbers (as strings). 5 | 6 | [antirectifier.py](antirectifier.py) 7 | Demonstrates how to write custom layers for Keras. 8 | 9 | [babi_memnn.py](babi_memnn.py) 10 | Trains a memory network on the bAbI dataset for reading comprehension. 11 | 12 | [babi_rnn.py](babi_rnn.py) 13 | Trains a two-branch recurrent network on the bAbI dataset for reading comprehension. 14 | 15 | [cifar10_cnn.py](cifar10_cnn.py) 16 | Trains a simple deep CNN on the CIFAR10 small images dataset. 17 | 18 | [conv_filter_visualization.py](conv_filter_visualization.py) 19 | Visualization of the filters of VGG16, via gradient ascent in input space. 20 | 21 | [deep_dream.py](deep_dream.py) 22 | Deep Dreams in Keras. 23 | 24 | [image_ocr.py](image_ocr.py) 25 | Trains a convolutional stack followed by a recurrent stack and a CTC logloss function to perform optical character recognition (OCR). 26 | 27 | [imdb_bidirectional_lstm.py](imdb_bidirectional_lstm.py) 28 | Trains a Bidirectional LSTM on the IMDB sentiment classification task. 29 | 30 | [imdb_cnn.py](imdb_cnn.py) 31 | Demonstrates the use of Convolution1D for text classification. 32 | 33 | [imdb_cnn_lstm.py](imdb_cnn_lstm.py) 34 | Trains a convolutional stack followed by a recurrent stack network on the IMDB sentiment classification task. 35 | 36 | [imdb_fasttext.py](imdb_fasttext.py) 37 | Trains a FastText model on the IMDB sentiment classification task. 38 | 39 | [imdb_lstm.py](imdb_lstm.py) 40 | Trains a LSTM on the IMDB sentiment classification task. 41 | 42 | [lstm_benchmark.py](lstm_benchmark.py) 43 | Compares different LSTM implementations on the IMDB sentiment classification task. 44 | 45 | [lstm_text_generation.py](lstm_text_generation.py) 46 | Generates text from Nietzsche's writings. 47 | 48 | [mnist_cnn.py](mnist_cnn.py) 49 | Trains a simple convnet on the MNIST dataset. 50 | 51 | [mnist_hierarchical_rnn.py](mnist_hierarchical_rnn.py) 52 | Trains a Hierarchical RNN (HRNN) to classify MNIST digits. 53 | 54 | [mnist_irnn.py](mnist_irnn.py) 55 | Reproduction of the IRNN experiment with pixel-by-pixel sequential MNIST in "A Simple Way to Initialize Recurrent Networks of Rectified Linear Units" by Le et al. 56 | 57 | [mnist_mlp.py](mnist_mlp.py) 58 | Trains a simple deep multi-layer perceptron on the MNIST dataset. 59 | 60 | [mnist_net2net.py](mnist_net2net.py) 61 | Reproduction of the Net2Net experiment with MNIST in "Net2Net: Accelerating Learning via Knowledge Transfer". 62 | 63 | [mnist_siamese_graph.py](mnist_siamese_graph.py) 64 | Trains a Siamese multi-layer perceptron on pairs of digits from the MNIST dataset. 65 | 66 | [mnist_sklearn_wrapper.py](mnist_sklearn_wrapper.py) 67 | Demonstrates how to use the sklearn wrapper. 68 | 69 | [mnist_swwae.py](mnist_swwae.py) 70 | Trains a Stacked What-Where AutoEncoder built on residual blocks on the MNIST dataset. 71 | 72 | [mnist_transfer_cnn.py](mnist_transfer_cnn.py) 73 | Transfer learning toy example. 74 | 75 | [neural_doodle.py](neural_doodle.py) 76 | Neural doodle. 77 | 78 | [neural_style_transfer.py](neural_style_transfer.py) 79 | Neural style transfer. 80 | 81 | [pretrained_word_embeddings.py](pretrained_word_embeddings.py) 82 | Loads pre-trained word embeddings (GloVe embeddings) into a frozen Keras Embedding layer, and uses it to train a text classification model on the 20 Newsgroup dataset. 83 | 84 | [reuters_mlp.py](reuters_mlp.py) 85 | Trains and evaluate a simple MLP on the Reuters newswire topic classification task. 86 | 87 | [stateful_lstm.py](stateful_lstm.py) 88 | Demonstrates how to use stateful RNNs to model long sequences efficiently. 89 | 90 | [variational_autoencoder.py](variational_autoencoder.py) 91 | Demonstrates how to build a variational autoencoder. 92 | 93 | [variational_autoencoder_deconv.py](variational_autoencoder_deconv.py) 94 | Demonstrates how to build a variational autoencoder with Keras using deconvolution layers. 95 | -------------------------------------------------------------------------------- /examples/antirectifier.py: -------------------------------------------------------------------------------- 1 | '''The example demonstrates how to write custom layers for Keras. 2 | 3 | We build a custom activation layer called 'Antirectifier', 4 | which modifies the shape of the tensor that passes through it. 5 | We need to specify two methods: `get_output_shape_for` and `call`. 6 | 7 | Note that the same result can also be achieved via a Lambda layer. 8 | 9 | Because our custom layer is written with primitives from the Keras 10 | backend (`K`), our code can run both on TensorFlow and Theano. 11 | ''' 12 | 13 | from __future__ import print_function 14 | from keras.models import Sequential 15 | from keras.layers import Dense, Dropout, Layer, Activation 16 | from keras.datasets import mnist 17 | from keras import backend as K 18 | from keras.utils import np_utils 19 | 20 | 21 | class Antirectifier(Layer): 22 | '''This is the combination of a sample-wise 23 | L2 normalization with the concatenation of the 24 | positive part of the input with the negative part 25 | of the input. The result is a tensor of samples that are 26 | twice as large as the input samples. 27 | 28 | It can be used in place of a ReLU. 29 | 30 | # Input shape 31 | 2D tensor of shape (samples, n) 32 | 33 | # Output shape 34 | 2D tensor of shape (samples, 2*n) 35 | 36 | # Theoretical justification 37 | When applying ReLU, assuming that the distribution 38 | of the previous output is approximately centered around 0., 39 | you are discarding half of your input. This is inefficient. 40 | 41 | Antirectifier allows to return all-positive outputs like ReLU, 42 | without discarding any data. 43 | 44 | Tests on MNIST show that Antirectifier allows to train networks 45 | with twice less parameters yet with comparable 46 | classification accuracy as an equivalent ReLU-based network. 47 | ''' 48 | def get_output_shape_for(self, input_shape): 49 | shape = list(input_shape) 50 | assert len(shape) == 2 # only valid for 2D tensors 51 | shape[-1] *= 2 52 | return tuple(shape) 53 | 54 | def call(self, x, mask=None): 55 | x -= K.mean(x, axis=1, keepdims=True) 56 | x = K.l2_normalize(x, axis=1) 57 | pos = K.relu(x) 58 | neg = K.relu(-x) 59 | return K.concatenate([pos, neg], axis=1) 60 | 61 | # global parameters 62 | batch_size = 128 63 | nb_classes = 10 64 | nb_epoch = 40 65 | 66 | # the data, shuffled and split between train and test sets 67 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 68 | 69 | X_train = X_train.reshape(60000, 784) 70 | X_test = X_test.reshape(10000, 784) 71 | X_train = X_train.astype('float32') 72 | X_test = X_test.astype('float32') 73 | X_train /= 255 74 | X_test /= 255 75 | print(X_train.shape[0], 'train samples') 76 | print(X_test.shape[0], 'test samples') 77 | 78 | # convert class vectors to binary class matrices 79 | Y_train = np_utils.to_categorical(y_train, nb_classes) 80 | Y_test = np_utils.to_categorical(y_test, nb_classes) 81 | 82 | # build the model 83 | model = Sequential() 84 | model.add(Dense(256, input_shape=(784,))) 85 | model.add(Antirectifier()) 86 | model.add(Dropout(0.1)) 87 | model.add(Dense(256)) 88 | model.add(Antirectifier()) 89 | model.add(Dropout(0.1)) 90 | model.add(Dense(10)) 91 | model.add(Activation('softmax')) 92 | 93 | # compile the model 94 | model.compile(loss='categorical_crossentropy', 95 | optimizer='rmsprop', 96 | metrics=['accuracy']) 97 | 98 | # train the model 99 | model.fit(X_train, Y_train, 100 | batch_size=batch_size, nb_epoch=nb_epoch, 101 | verbose=1, validation_data=(X_test, Y_test)) 102 | 103 | # next, compare with an equivalent network 104 | # with2x bigger Dense layers and ReLU 105 | -------------------------------------------------------------------------------- /examples/cifar10_cnn.py: -------------------------------------------------------------------------------- 1 | '''Train a simple deep CNN on the CIFAR10 small images dataset. 2 | 3 | GPU run command: 4 | THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python cifar10_cnn.py 5 | 6 | It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs. 7 | (it's still underfitting at that point, though). 8 | 9 | Note: the data was pickled with Python 2, and some encoding issues might prevent you 10 | from loading it in Python 3. You might have to load it in Python 2, 11 | save it in a different format, load it in Python 3 and repickle it. 12 | ''' 13 | 14 | from __future__ import print_function 15 | from keras.datasets import cifar10 16 | from keras.preprocessing.image import ImageDataGenerator 17 | from keras.models import Sequential 18 | from keras.layers import Dense, Dropout, Activation, Flatten 19 | from keras.layers import Convolution2D, MaxPooling2D 20 | from keras.optimizers import SGD 21 | from keras.utils import np_utils 22 | 23 | batch_size = 32 24 | nb_classes = 10 25 | nb_epoch = 200 26 | data_augmentation = True 27 | 28 | # input image dimensions 29 | img_rows, img_cols = 32, 32 30 | # the CIFAR10 images are RGB 31 | img_channels = 3 32 | 33 | # the data, shuffled and split between train and test sets 34 | (X_train, y_train), (X_test, y_test) = cifar10.load_data() 35 | print('X_train shape:', X_train.shape) 36 | print(X_train.shape[0], 'train samples') 37 | print(X_test.shape[0], 'test samples') 38 | 39 | # convert class vectors to binary class matrices 40 | Y_train = np_utils.to_categorical(y_train, nb_classes) 41 | Y_test = np_utils.to_categorical(y_test, nb_classes) 42 | 43 | model = Sequential() 44 | 45 | model.add(Convolution2D(32, 3, 3, border_mode='same', 46 | input_shape=X_train.shape[1:])) 47 | model.add(Activation('relu')) 48 | model.add(Convolution2D(32, 3, 3)) 49 | model.add(Activation('relu')) 50 | model.add(MaxPooling2D(pool_size=(2, 2))) 51 | model.add(Dropout(0.25)) 52 | 53 | model.add(Convolution2D(64, 3, 3, border_mode='same')) 54 | model.add(Activation('relu')) 55 | model.add(Convolution2D(64, 3, 3)) 56 | model.add(Activation('relu')) 57 | model.add(MaxPooling2D(pool_size=(2, 2))) 58 | model.add(Dropout(0.25)) 59 | 60 | model.add(Flatten()) 61 | model.add(Dense(512)) 62 | model.add(Activation('relu')) 63 | model.add(Dropout(0.5)) 64 | model.add(Dense(nb_classes)) 65 | model.add(Activation('softmax')) 66 | 67 | # let's train the model using SGD + momentum (how original). 68 | sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) 69 | model.compile(loss='categorical_crossentropy', 70 | optimizer=sgd, 71 | metrics=['accuracy']) 72 | 73 | X_train = X_train.astype('float32') 74 | X_test = X_test.astype('float32') 75 | X_train /= 255 76 | X_test /= 255 77 | 78 | if not data_augmentation: 79 | print('Not using data augmentation.') 80 | model.fit(X_train, Y_train, 81 | batch_size=batch_size, 82 | nb_epoch=nb_epoch, 83 | validation_data=(X_test, Y_test), 84 | shuffle=True) 85 | else: 86 | print('Using real-time data augmentation.') 87 | 88 | # this will do preprocessing and realtime data augmentation 89 | datagen = ImageDataGenerator( 90 | featurewise_center=False, # set input mean to 0 over the dataset 91 | samplewise_center=False, # set each sample mean to 0 92 | featurewise_std_normalization=False, # divide inputs by std of the dataset 93 | samplewise_std_normalization=False, # divide each input by its std 94 | zca_whitening=False, # apply ZCA whitening 95 | rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) 96 | width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) 97 | height_shift_range=0.1, # randomly shift images vertically (fraction of total height) 98 | horizontal_flip=True, # randomly flip images 99 | vertical_flip=False) # randomly flip images 100 | 101 | # compute quantities required for featurewise normalization 102 | # (std, mean, and principal components if ZCA whitening is applied) 103 | datagen.fit(X_train) 104 | 105 | # fit the model on the batches generated by datagen.flow() 106 | model.fit_generator(datagen.flow(X_train, Y_train, 107 | batch_size=batch_size), 108 | samples_per_epoch=X_train.shape[0], 109 | nb_epoch=nb_epoch, 110 | validation_data=(X_test, Y_test)) 111 | -------------------------------------------------------------------------------- /examples/conv_filter_visualization.py: -------------------------------------------------------------------------------- 1 | '''Visualization of the filters of VGG16, via gradient ascent in input space. 2 | 3 | This script can run on CPU in a few minutes (with the TensorFlow backend). 4 | 5 | Results example: http://i.imgur.com/4nj4KjN.jpg 6 | ''' 7 | from __future__ import print_function 8 | from scipy.misc import imsave 9 | import numpy as np 10 | import time 11 | from keras.applications import vgg16 12 | from keras import backend as K 13 | 14 | # dimensions of the generated pictures for each filter. 15 | img_width = 128 16 | img_height = 128 17 | 18 | # the name of the layer we want to visualize 19 | # (see model definition at keras/applications/vgg16.py) 20 | layer_name = 'block5_conv1' 21 | 22 | # util function to convert a tensor into a valid image 23 | def deprocess_image(x): 24 | # normalize tensor: center on 0., ensure std is 0.1 25 | x -= x.mean() 26 | x /= (x.std() + 1e-5) 27 | x *= 0.1 28 | 29 | # clip to [0, 1] 30 | x += 0.5 31 | x = np.clip(x, 0, 1) 32 | 33 | # convert to RGB array 34 | x *= 255 35 | if K.image_dim_ordering() == 'th': 36 | x = x.transpose((1, 2, 0)) 37 | x = np.clip(x, 0, 255).astype('uint8') 38 | return x 39 | 40 | # build the VGG16 network with ImageNet weights 41 | model = vgg16.VGG16(weights='imagenet', include_top=False) 42 | print('Model loaded.') 43 | 44 | model.summary() 45 | 46 | # this is the placeholder for the input images 47 | input_img = model.input 48 | 49 | # get the symbolic outputs of each "key" layer (we gave them unique names). 50 | layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) 51 | 52 | 53 | def normalize(x): 54 | # utility function to normalize a tensor by its L2 norm 55 | return x / (K.sqrt(K.mean(K.square(x))) + 1e-5) 56 | 57 | 58 | kept_filters = [] 59 | for filter_index in range(0, 200): 60 | # we only scan through the first 200 filters, 61 | # but there are actually 512 of them 62 | print('Processing filter %d' % filter_index) 63 | start_time = time.time() 64 | 65 | # we build a loss function that maximizes the activation 66 | # of the nth filter of the layer considered 67 | layer_output = layer_dict[layer_name].output 68 | if K.image_dim_ordering() == 'th': 69 | loss = K.mean(layer_output[:, filter_index, :, :]) 70 | else: 71 | loss = K.mean(layer_output[:, :, :, filter_index]) 72 | 73 | # we compute the gradient of the input picture wrt this loss 74 | grads = K.gradients(loss, input_img)[0] 75 | 76 | # normalization trick: we normalize the gradient 77 | grads = normalize(grads) 78 | 79 | # this function returns the loss and grads given the input picture 80 | iterate = K.function([input_img], [loss, grads]) 81 | 82 | # step size for gradient ascent 83 | step = 1. 84 | 85 | # we start from a gray image with some random noise 86 | if K.image_dim_ordering() == 'th': 87 | input_img_data = np.random.random((1, 3, img_width, img_height)) 88 | else: 89 | input_img_data = np.random.random((1, img_width, img_height, 3)) 90 | input_img_data = (input_img_data - 0.5) * 20 + 128 91 | 92 | # we run gradient ascent for 20 steps 93 | for i in range(20): 94 | loss_value, grads_value = iterate([input_img_data]) 95 | input_img_data += grads_value * step 96 | 97 | print('Current loss value:', loss_value) 98 | if loss_value <= 0.: 99 | # some filters get stuck to 0, we can skip them 100 | break 101 | 102 | # decode the resulting input image 103 | if loss_value > 0: 104 | img = deprocess_image(input_img_data[0]) 105 | kept_filters.append((img, loss_value)) 106 | end_time = time.time() 107 | print('Filter %d processed in %ds' % (filter_index, end_time - start_time)) 108 | 109 | # we will stich the best 64 filters on a 8 x 8 grid. 110 | n = 8 111 | 112 | # the filters that have the highest loss are assumed to be better-looking. 113 | # we will only keep the top 64 filters. 114 | kept_filters.sort(key=lambda x: x[1], reverse=True) 115 | kept_filters = kept_filters[:n * n] 116 | 117 | # build a black picture with enough space for 118 | # our 8 x 8 filters of size 128 x 128, with a 5px margin in between 119 | margin = 5 120 | width = n * img_width + (n - 1) * margin 121 | height = n * img_height + (n - 1) * margin 122 | stitched_filters = np.zeros((width, height, 3)) 123 | 124 | # fill the picture with our saved filters 125 | for i in range(n): 126 | for j in range(n): 127 | img, loss = kept_filters[i * n + j] 128 | stitched_filters[(img_width + margin) * i: (img_width + margin) * i + img_width, 129 | (img_height + margin) * j: (img_height + margin) * j + img_height, :] = img 130 | 131 | # save the result to disk 132 | imsave('stitched_filters_%dx%d.png' % (n, n), stitched_filters) 133 | -------------------------------------------------------------------------------- /examples/imdb_bidirectional_lstm.py: -------------------------------------------------------------------------------- 1 | '''Train a Bidirectional LSTM on the IMDB sentiment classification task. 2 | 3 | Output after 4 epochs on CPU: ~0.8146 4 | Time per epoch on CPU (Core i7): ~150s. 5 | ''' 6 | 7 | from __future__ import print_function 8 | import numpy as np 9 | np.random.seed(1337) # for reproducibility 10 | 11 | from keras.preprocessing import sequence 12 | from keras.models import Sequential 13 | from keras.layers import Dense, Dropout, Embedding, LSTM, Input, Bidirectional 14 | from keras.datasets import imdb 15 | 16 | 17 | max_features = 20000 18 | maxlen = 100 # cut texts after this number of words (among top max_features most common words) 19 | batch_size = 32 20 | 21 | print('Loading data...') 22 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) 23 | print(len(X_train), 'train sequences') 24 | print(len(X_test), 'test sequences') 25 | 26 | print("Pad sequences (samples x time)") 27 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 28 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 29 | print('X_train shape:', X_train.shape) 30 | print('X_test shape:', X_test.shape) 31 | y_train = np.array(y_train) 32 | y_test = np.array(y_test) 33 | 34 | model = Sequential() 35 | model.add(Embedding(max_features, 128, input_length=maxlen)) 36 | model.add(Bidirectional(LSTM(64))) 37 | model.add(Dropout(0.5)) 38 | model.add(Dense(1, activation='sigmoid')) 39 | 40 | # try using different optimizers and different optimizer configs 41 | model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) 42 | 43 | print('Train...') 44 | model.fit(X_train, y_train, 45 | batch_size=batch_size, 46 | nb_epoch=4, 47 | validation_data=[X_test, y_test]) 48 | -------------------------------------------------------------------------------- /examples/imdb_cnn.py: -------------------------------------------------------------------------------- 1 | '''This example demonstrates the use of Convolution1D for text classification. 2 | 3 | Gets to 0.89 test accuracy after 2 epochs. 4 | 90s/epoch on Intel i5 2.4Ghz CPU. 5 | 10s/epoch on Tesla K40 GPU. 6 | 7 | ''' 8 | 9 | from __future__ import print_function 10 | import numpy as np 11 | np.random.seed(1337) # for reproducibility 12 | 13 | from keras.preprocessing import sequence 14 | from keras.models import Sequential 15 | from keras.layers import Dense, Dropout, Activation 16 | from keras.layers import Embedding 17 | from keras.layers import Convolution1D, GlobalMaxPooling1D 18 | from keras.datasets import imdb 19 | from keras import backend as K 20 | 21 | 22 | # set parameters: 23 | max_features = 5000 24 | maxlen = 400 25 | batch_size = 32 26 | embedding_dims = 50 27 | nb_filter = 250 28 | filter_length = 3 29 | hidden_dims = 250 30 | nb_epoch = 2 31 | 32 | print('Loading data...') 33 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) 34 | print(len(X_train), 'train sequences') 35 | print(len(X_test), 'test sequences') 36 | 37 | print('Pad sequences (samples x time)') 38 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 39 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 40 | print('X_train shape:', X_train.shape) 41 | print('X_test shape:', X_test.shape) 42 | 43 | print('Build model...') 44 | model = Sequential() 45 | 46 | # we start off with an efficient embedding layer which maps 47 | # our vocab indices into embedding_dims dimensions 48 | model.add(Embedding(max_features, 49 | embedding_dims, 50 | input_length=maxlen, 51 | dropout=0.2)) 52 | 53 | # we add a Convolution1D, which will learn nb_filter 54 | # word group filters of size filter_length: 55 | model.add(Convolution1D(nb_filter=nb_filter, 56 | filter_length=filter_length, 57 | border_mode='valid', 58 | activation='relu', 59 | subsample_length=1)) 60 | # we use max pooling: 61 | model.add(GlobalMaxPooling1D()) 62 | 63 | # We add a vanilla hidden layer: 64 | model.add(Dense(hidden_dims)) 65 | model.add(Dropout(0.2)) 66 | model.add(Activation('relu')) 67 | 68 | # We project onto a single unit output layer, and squash it with a sigmoid: 69 | model.add(Dense(1)) 70 | model.add(Activation('sigmoid')) 71 | 72 | model.compile(loss='binary_crossentropy', 73 | optimizer='adam', 74 | metrics=['accuracy']) 75 | model.fit(X_train, y_train, 76 | batch_size=batch_size, 77 | nb_epoch=nb_epoch, 78 | validation_data=(X_test, y_test)) 79 | -------------------------------------------------------------------------------- /examples/imdb_cnn_lstm.py: -------------------------------------------------------------------------------- 1 | '''Train a recurrent convolutional network on the IMDB sentiment 2 | classification task. 3 | 4 | Gets to 0.8498 test accuracy after 2 epochs. 41s/epoch on K520 GPU. 5 | ''' 6 | from __future__ import print_function 7 | import numpy as np 8 | np.random.seed(1337) # for reproducibility 9 | 10 | from keras.preprocessing import sequence 11 | from keras.models import Sequential 12 | from keras.layers import Dense, Dropout, Activation 13 | from keras.layers import Embedding 14 | from keras.layers import LSTM 15 | from keras.layers import Convolution1D, MaxPooling1D 16 | from keras.datasets import imdb 17 | 18 | 19 | # Embedding 20 | max_features = 20000 21 | maxlen = 100 22 | embedding_size = 128 23 | 24 | # Convolution 25 | filter_length = 5 26 | nb_filter = 64 27 | pool_length = 4 28 | 29 | # LSTM 30 | lstm_output_size = 70 31 | 32 | # Training 33 | batch_size = 30 34 | nb_epoch = 2 35 | 36 | ''' 37 | Note: 38 | batch_size is highly sensitive. 39 | Only 2 epochs are needed as the dataset is very small. 40 | ''' 41 | 42 | print('Loading data...') 43 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) 44 | print(len(X_train), 'train sequences') 45 | print(len(X_test), 'test sequences') 46 | 47 | print('Pad sequences (samples x time)') 48 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 49 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 50 | print('X_train shape:', X_train.shape) 51 | print('X_test shape:', X_test.shape) 52 | 53 | print('Build model...') 54 | 55 | model = Sequential() 56 | model.add(Embedding(max_features, embedding_size, input_length=maxlen)) 57 | model.add(Dropout(0.25)) 58 | model.add(Convolution1D(nb_filter=nb_filter, 59 | filter_length=filter_length, 60 | border_mode='valid', 61 | activation='relu', 62 | subsample_length=1)) 63 | model.add(MaxPooling1D(pool_length=pool_length)) 64 | model.add(LSTM(lstm_output_size)) 65 | model.add(Dense(1)) 66 | model.add(Activation('sigmoid')) 67 | 68 | model.compile(loss='binary_crossentropy', 69 | optimizer='adam', 70 | metrics=['accuracy']) 71 | 72 | print('Train...') 73 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, 74 | validation_data=(X_test, y_test)) 75 | score, acc = model.evaluate(X_test, y_test, batch_size=batch_size) 76 | print('Test score:', score) 77 | print('Test accuracy:', acc) 78 | -------------------------------------------------------------------------------- /examples/imdb_lstm.py: -------------------------------------------------------------------------------- 1 | '''Trains a LSTM on the IMDB sentiment classification task. 2 | The dataset is actually too small for LSTM to be of any advantage 3 | compared to simpler, much faster methods such as TF-IDF + LogReg. 4 | Notes: 5 | 6 | - RNNs are tricky. Choice of batch size is important, 7 | choice of loss and optimizer is critical, etc. 8 | Some configurations won't converge. 9 | 10 | - LSTM loss decrease patterns during training can be quite different 11 | from what you see with CNNs/MLPs/etc. 12 | ''' 13 | from __future__ import print_function 14 | import numpy as np 15 | np.random.seed(1337) # for reproducibility 16 | 17 | from keras.preprocessing import sequence 18 | from keras.utils import np_utils 19 | from keras.models import Sequential 20 | from keras.layers import Dense, Dropout, Activation, Embedding 21 | from keras.layers import LSTM, SimpleRNN, GRU 22 | from keras.datasets import imdb 23 | 24 | max_features = 20000 25 | maxlen = 80 # cut texts after this number of words (among top max_features most common words) 26 | batch_size = 32 27 | 28 | print('Loading data...') 29 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) 30 | print(len(X_train), 'train sequences') 31 | print(len(X_test), 'test sequences') 32 | 33 | print('Pad sequences (samples x time)') 34 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 35 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 36 | print('X_train shape:', X_train.shape) 37 | print('X_test shape:', X_test.shape) 38 | 39 | print('Build model...') 40 | model = Sequential() 41 | model.add(Embedding(max_features, 128, dropout=0.2)) 42 | model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2)) # try using a GRU instead, for fun 43 | model.add(Dense(1)) 44 | model.add(Activation('sigmoid')) 45 | 46 | # try using different optimizers and different optimizer configs 47 | model.compile(loss='binary_crossentropy', 48 | optimizer='adam', 49 | metrics=['accuracy']) 50 | 51 | print('Train...') 52 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15, 53 | validation_data=(X_test, y_test)) 54 | score, acc = model.evaluate(X_test, y_test, 55 | batch_size=batch_size) 56 | print('Test score:', score) 57 | print('Test accuracy:', acc) 58 | -------------------------------------------------------------------------------- /examples/lstm_benchmark.py: -------------------------------------------------------------------------------- 1 | '''Compare LSTM implementations on the IMDB sentiment classification task. 2 | 3 | consume_less='cpu' preprocesses input to the LSTM which typically results in 4 | faster computations at the expense of increased peak memory usage as the 5 | preprocessed input must be kept in memory. 6 | 7 | consume_less='mem' does away with the preprocessing, meaning that it might take 8 | a little longer, but should require less peak memory. 9 | 10 | consume_less='gpu' concatenates the input, output and forget gate's weights 11 | into one, large matrix, resulting in faster computation time as the GPU can 12 | utilize more cores, at the expense of reduced regularization because the same 13 | dropout is shared across the gates. 14 | 15 | Note that the relative performance of the different `consume_less` modes 16 | can vary depending on your device, your model and the size of your data. 17 | ''' 18 | 19 | import time 20 | import numpy as np 21 | import matplotlib.pyplot as plt 22 | 23 | from keras.preprocessing import sequence 24 | from keras.models import Sequential 25 | from keras.layers import Embedding, Dense, LSTM 26 | from keras.datasets import imdb 27 | 28 | max_features = 20000 29 | max_length = 80 30 | embedding_dim = 256 31 | batch_size = 128 32 | epochs = 10 33 | modes = ['cpu', 'mem', 'gpu'] 34 | 35 | print('Loading data...') 36 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) 37 | X_train = sequence.pad_sequences(X_train, max_length) 38 | X_test = sequence.pad_sequences(X_test, max_length) 39 | 40 | # Compile and train different models while meauring performance. 41 | results = [] 42 | for mode in modes: 43 | print('Testing mode: consume_less="{}"'.format(mode)) 44 | 45 | model = Sequential() 46 | model.add(Embedding(max_features, embedding_dim, input_length=max_length, dropout=0.2)) 47 | model.add(LSTM(embedding_dim, dropout_W=0.2, dropout_U=0.2, consume_less=mode)) 48 | model.add(Dense(1, activation='sigmoid')) 49 | model.compile(loss='binary_crossentropy', 50 | optimizer='adam', 51 | metrics=['accuracy']) 52 | 53 | start_time = time.time() 54 | history = model.fit(X_train, y_train, 55 | batch_size=batch_size, 56 | nb_epoch=epochs, 57 | validation_data=(X_test, y_test)) 58 | average_time_per_epoch = (time.time() - start_time) / epochs 59 | 60 | results.append((history, average_time_per_epoch)) 61 | 62 | # Compare models' accuracy, loss and elapsed time per epoch. 63 | plt.style.use('ggplot') 64 | ax1 = plt.subplot2grid((2, 2), (0, 0)) 65 | ax1.set_title('Accuracy') 66 | ax1.set_ylabel('Validation Accuracy') 67 | ax1.set_xlabel('Epochs') 68 | ax2 = plt.subplot2grid((2, 2), (1, 0)) 69 | ax2.set_title('Loss') 70 | ax2.set_ylabel('Validation Loss') 71 | ax2.set_xlabel('Epochs') 72 | ax3 = plt.subplot2grid((2, 2), (0, 1), rowspan=2) 73 | ax3.set_title('Time') 74 | ax3.set_ylabel('Seconds') 75 | for mode, result in zip(modes, results): 76 | ax1.plot(result[0].epoch, result[0].history['val_acc'], label=mode) 77 | ax2.plot(result[0].epoch, result[0].history['val_loss'], label=mode) 78 | ax1.legend() 79 | ax2.legend() 80 | ax3.bar(np.arange(len(results)), [x[1] for x in results], 81 | tick_label=modes, align='center') 82 | plt.tight_layout() 83 | plt.show() 84 | -------------------------------------------------------------------------------- /examples/lstm_text_generation.py: -------------------------------------------------------------------------------- 1 | '''Example script to generate text from Nietzsche's writings. 2 | 3 | At least 20 epochs are required before the generated text 4 | starts sounding coherent. 5 | 6 | It is recommended to run this script on GPU, as recurrent 7 | networks are quite computationally intensive. 8 | 9 | If you try this script on new data, make sure your corpus 10 | has at least ~100k characters. ~1M is better. 11 | ''' 12 | 13 | from __future__ import print_function 14 | from keras.models import Sequential 15 | from keras.layers import Dense, Activation, Dropout 16 | from keras.layers import LSTM 17 | from keras.optimizers import RMSprop 18 | from keras.utils.data_utils import get_file 19 | import numpy as np 20 | import random 21 | import sys 22 | 23 | path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt") 24 | text = open(path).read().lower() 25 | print('corpus length:', len(text)) 26 | 27 | chars = sorted(list(set(text))) 28 | print('total chars:', len(chars)) 29 | char_indices = dict((c, i) for i, c in enumerate(chars)) 30 | indices_char = dict((i, c) for i, c in enumerate(chars)) 31 | 32 | # cut the text in semi-redundant sequences of maxlen characters 33 | maxlen = 40 34 | step = 3 35 | sentences = [] 36 | next_chars = [] 37 | for i in range(0, len(text) - maxlen, step): 38 | sentences.append(text[i: i + maxlen]) 39 | next_chars.append(text[i + maxlen]) 40 | print('nb sequences:', len(sentences)) 41 | 42 | print('Vectorization...') 43 | X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) 44 | y = np.zeros((len(sentences), len(chars)), dtype=np.bool) 45 | for i, sentence in enumerate(sentences): 46 | for t, char in enumerate(sentence): 47 | X[i, t, char_indices[char]] = 1 48 | y[i, char_indices[next_chars[i]]] = 1 49 | 50 | 51 | # build the model: a single LSTM 52 | print('Build model...') 53 | model = Sequential() 54 | model.add(LSTM(128, input_shape=(maxlen, len(chars)))) 55 | model.add(Dense(len(chars))) 56 | model.add(Activation('softmax')) 57 | 58 | optimizer = RMSprop(lr=0.01) 59 | model.compile(loss='categorical_crossentropy', optimizer=optimizer) 60 | 61 | 62 | def sample(preds, temperature=1.0): 63 | # helper function to sample an index from a probability array 64 | preds = np.asarray(preds).astype('float64') 65 | preds = np.log(preds) / temperature 66 | exp_preds = np.exp(preds) 67 | preds = exp_preds / np.sum(exp_preds) 68 | probas = np.random.multinomial(1, preds, 1) 69 | return np.argmax(probas) 70 | 71 | # train the model, output generated text after each iteration 72 | for iteration in range(1, 60): 73 | print() 74 | print('-' * 50) 75 | print('Iteration', iteration) 76 | model.fit(X, y, batch_size=128, nb_epoch=1) 77 | 78 | start_index = random.randint(0, len(text) - maxlen - 1) 79 | 80 | for diversity in [0.2, 0.5, 1.0, 1.2]: 81 | print() 82 | print('----- diversity:', diversity) 83 | 84 | generated = '' 85 | sentence = text[start_index: start_index + maxlen] 86 | generated += sentence 87 | print('----- Generating with seed: "' + sentence + '"') 88 | sys.stdout.write(generated) 89 | 90 | for i in range(400): 91 | x = np.zeros((1, maxlen, len(chars))) 92 | for t, char in enumerate(sentence): 93 | x[0, t, char_indices[char]] = 1. 94 | 95 | preds = model.predict(x, verbose=0)[0] 96 | next_index = sample(preds, diversity) 97 | next_char = indices_char[next_index] 98 | 99 | generated += next_char 100 | sentence = sentence[1:] + next_char 101 | 102 | sys.stdout.write(next_char) 103 | sys.stdout.flush() 104 | print() 105 | -------------------------------------------------------------------------------- /examples/mnist_cnn.py: -------------------------------------------------------------------------------- 1 | '''Trains a simple convnet on the MNIST dataset. 2 | 3 | Gets to 99.25% test accuracy after 12 epochs 4 | (there is still a lot of margin for parameter tuning). 5 | 16 seconds per epoch on a GRID K520 GPU. 6 | ''' 7 | 8 | from __future__ import print_function 9 | import numpy as np 10 | np.random.seed(1337) # for reproducibility 11 | 12 | from keras.datasets import mnist 13 | from keras.models import Sequential 14 | from keras.layers import Dense, Dropout, Activation, Flatten 15 | from keras.layers import Convolution2D, MaxPooling2D 16 | from keras.utils import np_utils 17 | from keras import backend as K 18 | 19 | batch_size = 128 20 | nb_classes = 10 21 | nb_epoch = 12 22 | 23 | # input image dimensions 24 | img_rows, img_cols = 28, 28 25 | # number of convolutional filters to use 26 | nb_filters = 32 27 | # size of pooling area for max pooling 28 | pool_size = (2, 2) 29 | # convolution kernel size 30 | kernel_size = (3, 3) 31 | 32 | # the data, shuffled and split between train and test sets 33 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 34 | 35 | if K.image_dim_ordering() == 'th': 36 | X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) 37 | X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols) 38 | input_shape = (1, img_rows, img_cols) 39 | else: 40 | X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) 41 | X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1) 42 | input_shape = (img_rows, img_cols, 1) 43 | 44 | X_train = X_train.astype('float32') 45 | X_test = X_test.astype('float32') 46 | X_train /= 255 47 | X_test /= 255 48 | print('X_train shape:', X_train.shape) 49 | print(X_train.shape[0], 'train samples') 50 | print(X_test.shape[0], 'test samples') 51 | 52 | # convert class vectors to binary class matrices 53 | Y_train = np_utils.to_categorical(y_train, nb_classes) 54 | Y_test = np_utils.to_categorical(y_test, nb_classes) 55 | 56 | model = Sequential() 57 | 58 | model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1], 59 | border_mode='valid', 60 | input_shape=input_shape)) 61 | model.add(Activation('relu')) 62 | model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1])) 63 | model.add(Activation('relu')) 64 | model.add(MaxPooling2D(pool_size=pool_size)) 65 | model.add(Dropout(0.25)) 66 | 67 | model.add(Flatten()) 68 | model.add(Dense(128)) 69 | model.add(Activation('relu')) 70 | model.add(Dropout(0.5)) 71 | model.add(Dense(nb_classes)) 72 | model.add(Activation('softmax')) 73 | 74 | model.compile(loss='categorical_crossentropy', 75 | optimizer='adadelta', 76 | metrics=['accuracy']) 77 | 78 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, 79 | verbose=1, validation_data=(X_test, Y_test)) 80 | score = model.evaluate(X_test, Y_test, verbose=0) 81 | print('Test score:', score[0]) 82 | print('Test accuracy:', score[1]) 83 | -------------------------------------------------------------------------------- /examples/mnist_hierarchical_rnn.py: -------------------------------------------------------------------------------- 1 | """This is an example of using Hierarchical RNN (HRNN) to classify MNIST digits. 2 | 3 | HRNNs can learn across multiple levels of temporal hiearchy over a complex sequence. 4 | Usually, the first recurrent layer of an HRNN encodes a sentence (e.g. of word vectors) 5 | into a sentence vector. The second recurrent layer then encodes a sequence of 6 | such vectors (encoded by the first layer) into a document vector. This 7 | document vector is considered to preserve both the word-level and 8 | sentence-level structure of the context. 9 | 10 | # References 11 | - [A Hierarchical Neural Autoencoder for Paragraphs and Documents](https://web.stanford.edu/~jurafsky/pubs/P15-1107.pdf) 12 | Encodes paragraphs and documents with HRNN. 13 | Results have shown that HRNN outperforms standard 14 | RNNs and may play some role in more sophisticated generation tasks like 15 | summarization or question answering. 16 | - [Hierarchical recurrent neural network for skeleton based action recognition](http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7298714) 17 | Achieved state-of-the-art results on skeleton based action recognition with 3 levels 18 | of bidirectional HRNN combined with fully connected layers. 19 | 20 | In the below MNIST example the first LSTM layer first encodes every 21 | column of pixels of shape (28, 1) to a column vector of shape (128,). The second LSTM 22 | layer encodes then these 28 column vectors of shape (28, 128) to a image vector 23 | representing the whole image. A final Dense layer is added for prediction. 24 | 25 | After 5 epochs: train acc: 0.9858, val acc: 0.9864 26 | """ 27 | from __future__ import print_function 28 | 29 | from keras.datasets import mnist 30 | from keras.models import Sequential, Model 31 | from keras.layers import Input, Dense, TimeDistributed 32 | from keras.layers import LSTM 33 | from keras.utils import np_utils 34 | 35 | # Training parameters. 36 | batch_size = 32 37 | nb_classes = 10 38 | nb_epochs = 5 39 | 40 | # Embedding dimensions. 41 | row_hidden = 128 42 | col_hidden = 128 43 | 44 | # The data, shuffled and split between train and test sets. 45 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 46 | 47 | # Reshapes data to 4D for Hierarchical RNN. 48 | X_train = X_train.reshape(X_train.shape[0], 28, 28, 1) 49 | X_test = X_test.reshape(X_test.shape[0], 28, 28, 1) 50 | X_train = X_train.astype('float32') 51 | X_test = X_test.astype('float32') 52 | X_train /= 255 53 | X_test /= 255 54 | print('X_train shape:', X_train.shape) 55 | print(X_train.shape[0], 'train samples') 56 | print(X_test.shape[0], 'test samples') 57 | 58 | # Converts class vectors to binary class matrices. 59 | Y_train = np_utils.to_categorical(y_train, nb_classes) 60 | Y_test = np_utils.to_categorical(y_test, nb_classes) 61 | 62 | row, col, pixel = X_train.shape[1:] 63 | 64 | # 4D input. 65 | x = Input(shape=(row, col, pixel)) 66 | 67 | # Encodes a row of pixels using TimeDistributed Wrapper. 68 | encoded_rows = TimeDistributed(LSTM(output_dim=row_hidden))(x) 69 | 70 | # Encodes columns of encoded rows. 71 | encoded_columns = LSTM(col_hidden)(encoded_rows) 72 | 73 | # Final predictions and model. 74 | prediction = Dense(nb_classes, activation='softmax')(encoded_columns) 75 | model = Model(input=x, output=prediction) 76 | model.compile(loss='categorical_crossentropy', 77 | optimizer='rmsprop', 78 | metrics=['accuracy']) 79 | 80 | # Training. 81 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epochs, 82 | verbose=1, validation_data=(X_test, Y_test)) 83 | 84 | # Evaluation. 85 | scores = model.evaluate(X_test, Y_test, verbose=0) 86 | print('Test loss:', scores[0]) 87 | print('Test accuracy:', scores[1]) 88 | -------------------------------------------------------------------------------- /examples/mnist_irnn.py: -------------------------------------------------------------------------------- 1 | '''This is a reproduction of the IRNN experiment 2 | with pixel-by-pixel sequential MNIST in 3 | "A Simple Way to Initialize Recurrent Networks of Rectified Linear Units" 4 | by Quoc V. Le, Navdeep Jaitly, Geoffrey E. Hinton 5 | 6 | arXiv:1504.00941v2 [cs.NE] 7 Apr 2015 7 | http://arxiv.org/pdf/1504.00941v2.pdf 8 | 9 | Optimizer is replaced with RMSprop which yields more stable and steady 10 | improvement. 11 | 12 | Reaches 0.93 train/test accuracy after 900 epochs 13 | (which roughly corresponds to 1687500 steps in the original paper.) 14 | ''' 15 | 16 | from __future__ import print_function 17 | 18 | from keras.datasets import mnist 19 | from keras.models import Sequential 20 | from keras.layers import Dense, Activation 21 | from keras.layers import SimpleRNN 22 | from keras.initializations import normal, identity 23 | from keras.optimizers import RMSprop 24 | from keras.utils import np_utils 25 | 26 | batch_size = 32 27 | nb_classes = 10 28 | nb_epochs = 200 29 | hidden_units = 100 30 | 31 | learning_rate = 1e-6 32 | clip_norm = 1.0 33 | 34 | # the data, shuffled and split between train and test sets 35 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 36 | 37 | X_train = X_train.reshape(X_train.shape[0], -1, 1) 38 | X_test = X_test.reshape(X_test.shape[0], -1, 1) 39 | X_train = X_train.astype('float32') 40 | X_test = X_test.astype('float32') 41 | X_train /= 255 42 | X_test /= 255 43 | print('X_train shape:', X_train.shape) 44 | print(X_train.shape[0], 'train samples') 45 | print(X_test.shape[0], 'test samples') 46 | 47 | # convert class vectors to binary class matrices 48 | Y_train = np_utils.to_categorical(y_train, nb_classes) 49 | Y_test = np_utils.to_categorical(y_test, nb_classes) 50 | 51 | print('Evaluate IRNN...') 52 | model = Sequential() 53 | model.add(SimpleRNN(output_dim=hidden_units, 54 | init=lambda shape, name: normal(shape, scale=0.001, name=name), 55 | inner_init=lambda shape, name: identity(shape, scale=1.0, name=name), 56 | activation='relu', 57 | input_shape=X_train.shape[1:])) 58 | model.add(Dense(nb_classes)) 59 | model.add(Activation('softmax')) 60 | rmsprop = RMSprop(lr=learning_rate) 61 | model.compile(loss='categorical_crossentropy', 62 | optimizer=rmsprop, 63 | metrics=['accuracy']) 64 | 65 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epochs, 66 | verbose=1, validation_data=(X_test, Y_test)) 67 | 68 | scores = model.evaluate(X_test, Y_test, verbose=0) 69 | print('IRNN test score:', scores[0]) 70 | print('IRNN test accuracy:', scores[1]) 71 | -------------------------------------------------------------------------------- /examples/mnist_mlp.py: -------------------------------------------------------------------------------- 1 | '''Trains a simple deep NN on the MNIST dataset. 2 | 3 | Gets to 98.40% test accuracy after 20 epochs 4 | (there is *a lot* of margin for parameter tuning). 5 | 2 seconds per epoch on a K520 GPU. 6 | ''' 7 | 8 | from __future__ import print_function 9 | import numpy as np 10 | np.random.seed(1337) # for reproducibility 11 | 12 | from keras.datasets import mnist 13 | from keras.models import Sequential 14 | from keras.layers.core import Dense, Dropout, Activation 15 | from keras.optimizers import SGD, Adam, RMSprop 16 | from keras.utils import np_utils 17 | 18 | 19 | batch_size = 128 20 | nb_classes = 10 21 | nb_epoch = 20 22 | 23 | # the data, shuffled and split between train and test sets 24 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 25 | 26 | X_train = X_train.reshape(60000, 784) 27 | X_test = X_test.reshape(10000, 784) 28 | X_train = X_train.astype('float32') 29 | X_test = X_test.astype('float32') 30 | X_train /= 255 31 | X_test /= 255 32 | print(X_train.shape[0], 'train samples') 33 | print(X_test.shape[0], 'test samples') 34 | 35 | # convert class vectors to binary class matrices 36 | Y_train = np_utils.to_categorical(y_train, nb_classes) 37 | Y_test = np_utils.to_categorical(y_test, nb_classes) 38 | 39 | model = Sequential() 40 | model.add(Dense(512, input_shape=(784,))) 41 | model.add(Activation('relu')) 42 | model.add(Dropout(0.2)) 43 | model.add(Dense(512)) 44 | model.add(Activation('relu')) 45 | model.add(Dropout(0.2)) 46 | model.add(Dense(10)) 47 | model.add(Activation('softmax')) 48 | 49 | model.summary() 50 | 51 | model.compile(loss='categorical_crossentropy', 52 | optimizer=RMSprop(), 53 | metrics=['accuracy']) 54 | 55 | history = model.fit(X_train, Y_train, 56 | batch_size=batch_size, nb_epoch=nb_epoch, 57 | verbose=1, validation_data=(X_test, Y_test)) 58 | score = model.evaluate(X_test, Y_test, verbose=0) 59 | print('Test score:', score[0]) 60 | print('Test accuracy:', score[1]) 61 | -------------------------------------------------------------------------------- /examples/mnist_siamese_graph.py: -------------------------------------------------------------------------------- 1 | '''Train a Siamese MLP on pairs of digits from the MNIST dataset. 2 | 3 | It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the 4 | output of the shared network and by optimizing the contrastive loss (see paper 5 | for mode details). 6 | 7 | [1] "Dimensionality Reduction by Learning an Invariant Mapping" 8 | http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf 9 | 10 | Gets to 99.5% test accuracy after 20 epochs. 11 | 3 seconds per epoch on a Titan X GPU 12 | ''' 13 | from __future__ import absolute_import 14 | from __future__ import print_function 15 | import numpy as np 16 | np.random.seed(1337) # for reproducibility 17 | 18 | import random 19 | from keras.datasets import mnist 20 | from keras.models import Sequential, Model 21 | from keras.layers import Dense, Dropout, Input, Lambda 22 | from keras.optimizers import SGD, RMSprop 23 | from keras import backend as K 24 | 25 | 26 | def euclidean_distance(vects): 27 | x, y = vects 28 | return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True)) 29 | 30 | 31 | def eucl_dist_output_shape(shapes): 32 | shape1, shape2 = shapes 33 | return (shape1[0], 1) 34 | 35 | 36 | def contrastive_loss(y_true, y_pred): 37 | '''Contrastive loss from Hadsell-et-al.'06 38 | http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf 39 | ''' 40 | margin = 1 41 | return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0))) 42 | 43 | 44 | def create_pairs(x, digit_indices): 45 | '''Positive and negative pair creation. 46 | Alternates between positive and negative pairs. 47 | ''' 48 | pairs = [] 49 | labels = [] 50 | n = min([len(digit_indices[d]) for d in range(10)]) - 1 51 | for d in range(10): 52 | for i in range(n): 53 | z1, z2 = digit_indices[d][i], digit_indices[d][i+1] 54 | pairs += [[x[z1], x[z2]]] 55 | inc = random.randrange(1, 10) 56 | dn = (d + inc) % 10 57 | z1, z2 = digit_indices[d][i], digit_indices[dn][i] 58 | pairs += [[x[z1], x[z2]]] 59 | labels += [1, 0] 60 | return np.array(pairs), np.array(labels) 61 | 62 | 63 | def create_base_network(input_dim): 64 | '''Base network to be shared (eq. to feature extraction). 65 | ''' 66 | seq = Sequential() 67 | seq.add(Dense(128, input_shape=(input_dim,), activation='relu')) 68 | seq.add(Dropout(0.1)) 69 | seq.add(Dense(128, activation='relu')) 70 | seq.add(Dropout(0.1)) 71 | seq.add(Dense(128, activation='relu')) 72 | return seq 73 | 74 | 75 | def compute_accuracy(predictions, labels): 76 | '''Compute classification accuracy with a fixed threshold on distances. 77 | ''' 78 | return labels[predictions.ravel() < 0.5].mean() 79 | 80 | 81 | # the data, shuffled and split between train and test sets 82 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 83 | X_train = X_train.reshape(60000, 784) 84 | X_test = X_test.reshape(10000, 784) 85 | X_train = X_train.astype('float32') 86 | X_test = X_test.astype('float32') 87 | X_train /= 255 88 | X_test /= 255 89 | input_dim = 784 90 | nb_epoch = 20 91 | 92 | # create training+test positive and negative pairs 93 | digit_indices = [np.where(y_train == i)[0] for i in range(10)] 94 | tr_pairs, tr_y = create_pairs(X_train, digit_indices) 95 | 96 | digit_indices = [np.where(y_test == i)[0] for i in range(10)] 97 | te_pairs, te_y = create_pairs(X_test, digit_indices) 98 | 99 | # network definition 100 | base_network = create_base_network(input_dim) 101 | 102 | input_a = Input(shape=(input_dim,)) 103 | input_b = Input(shape=(input_dim,)) 104 | 105 | # because we re-use the same instance `base_network`, 106 | # the weights of the network 107 | # will be shared across the two branches 108 | processed_a = base_network(input_a) 109 | processed_b = base_network(input_b) 110 | 111 | distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b]) 112 | 113 | model = Model(input=[input_a, input_b], output=distance) 114 | 115 | # train 116 | rms = RMSprop() 117 | model.compile(loss=contrastive_loss, optimizer=rms) 118 | model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y, 119 | validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y), 120 | batch_size=128, 121 | nb_epoch=nb_epoch) 122 | 123 | # compute final accuracy on training and test sets 124 | pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]]) 125 | tr_acc = compute_accuracy(pred, tr_y) 126 | pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]]) 127 | te_acc = compute_accuracy(pred, te_y) 128 | 129 | print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc)) 130 | print('* Accuracy on test set: %0.2f%%' % (100 * te_acc)) 131 | -------------------------------------------------------------------------------- /examples/mnist_sklearn_wrapper.py: -------------------------------------------------------------------------------- 1 | '''Example of how to use sklearn wrapper 2 | 3 | Builds simple CNN models on MNIST and uses sklearn's GridSearchCV to find best model 4 | ''' 5 | 6 | from __future__ import print_function 7 | import numpy as np 8 | np.random.seed(1337) # for reproducibility 9 | 10 | from keras.datasets import mnist 11 | from keras.models import Sequential 12 | from keras.layers import Dense, Dropout, Activation, Flatten 13 | from keras.layers import Convolution2D, MaxPooling2D 14 | from keras.utils import np_utils 15 | from keras.wrappers.scikit_learn import KerasClassifier 16 | from sklearn.grid_search import GridSearchCV 17 | 18 | 19 | nb_classes = 10 20 | 21 | # input image dimensions 22 | img_rows, img_cols = 28, 28 23 | 24 | # load training data and do basic data normalization 25 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 26 | X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) 27 | X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols) 28 | X_train = X_train.astype('float32') 29 | X_test = X_test.astype('float32') 30 | X_train /= 255 31 | X_test /= 255 32 | 33 | # convert class vectors to binary class matrices 34 | y_train = np_utils.to_categorical(y_train, nb_classes) 35 | y_test = np_utils.to_categorical(y_test, nb_classes) 36 | 37 | def make_model(dense_layer_sizes, nb_filters, nb_conv, nb_pool): 38 | '''Creates model comprised of 2 convolutional layers followed by dense layers 39 | 40 | dense_layer_sizes: List of layer sizes. This list has one number for each layer 41 | nb_filters: Number of convolutional filters in each convolutional layer 42 | nb_conv: Convolutional kernel size 43 | nb_pool: Size of pooling area for max pooling 44 | ''' 45 | 46 | model = Sequential() 47 | 48 | model.add(Convolution2D(nb_filters, nb_conv, nb_conv, 49 | border_mode='valid', 50 | input_shape=(1, img_rows, img_cols))) 51 | model.add(Activation('relu')) 52 | model.add(Convolution2D(nb_filters, nb_conv, nb_conv)) 53 | model.add(Activation('relu')) 54 | model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) 55 | model.add(Dropout(0.25)) 56 | 57 | model.add(Flatten()) 58 | for layer_size in dense_layer_sizes: 59 | model.add(Dense(layer_size)) 60 | model.add(Activation('relu')) 61 | model.add(Dropout(0.5)) 62 | model.add(Dense(nb_classes)) 63 | model.add(Activation('softmax')) 64 | 65 | model.compile(loss='categorical_crossentropy', 66 | optimizer='adadelta', 67 | metrics=['accuracy']) 68 | 69 | return model 70 | 71 | dense_size_candidates = [[32], [64], [32, 32], [64, 64]] 72 | my_classifier = KerasClassifier(make_model, batch_size=32) 73 | validator = GridSearchCV(my_classifier, 74 | param_grid={'dense_layer_sizes': dense_size_candidates, 75 | # nb_epoch is avail for tuning even when not 76 | # an argument to model building function 77 | 'nb_epoch': [3, 6], 78 | 'nb_filters': [8], 79 | 'nb_conv': [3], 80 | 'nb_pool': [2]}, 81 | scoring='log_loss', 82 | n_jobs=1) 83 | validator.fit(X_train, y_train) 84 | 85 | print('The parameters of the best model are: ') 86 | print(validator.best_params_) 87 | 88 | # validator.best_estimator_ returns sklearn-wrapped version of best model. 89 | # validator.best_estimator_.model returns the (unwrapped) keras model 90 | best_model = validator.best_estimator_.model 91 | metric_names = best_model.metrics_names 92 | metric_values = best_model.evaluate(X_test, y_test) 93 | for metric, value in zip(metric_names, metric_values): 94 | print(metric, ': ', value) 95 | -------------------------------------------------------------------------------- /examples/mnist_transfer_cnn.py: -------------------------------------------------------------------------------- 1 | '''Transfer learning toy example: 2 | 3 | 1- Train a simple convnet on the MNIST dataset the first 5 digits [0..4]. 4 | 2- Freeze convolutional layers and fine-tune dense layers 5 | for the classification of digits [5..9]. 6 | 7 | Run on GPU: THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python mnist_transfer_cnn.py 8 | 9 | Get to 99.8% test accuracy after 5 epochs 10 | for the first five digits classifier 11 | and 99.2% for the last five digits after transfer + fine-tuning. 12 | ''' 13 | 14 | from __future__ import print_function 15 | import numpy as np 16 | import datetime 17 | 18 | np.random.seed(1337) # for reproducibility 19 | 20 | from keras.datasets import mnist 21 | from keras.models import Sequential 22 | from keras.layers import Dense, Dropout, Activation, Flatten 23 | from keras.layers import Convolution2D, MaxPooling2D 24 | from keras.utils import np_utils 25 | from keras import backend as K 26 | 27 | now = datetime.datetime.now 28 | 29 | batch_size = 128 30 | nb_classes = 5 31 | nb_epoch = 5 32 | 33 | # input image dimensions 34 | img_rows, img_cols = 28, 28 35 | # number of convolutional filters to use 36 | nb_filters = 32 37 | # size of pooling area for max pooling 38 | pool_size = 2 39 | # convolution kernel size 40 | kernel_size = 3 41 | 42 | if K.image_dim_ordering() == 'th': 43 | input_shape = (1, img_rows, img_cols) 44 | else: 45 | input_shape = (img_rows, img_cols, 1) 46 | 47 | 48 | def train_model(model, train, test, nb_classes): 49 | X_train = train[0].reshape((train[0].shape[0],) + input_shape) 50 | X_test = test[0].reshape((test[0].shape[0],) + input_shape) 51 | X_train = X_train.astype('float32') 52 | X_test = X_test.astype('float32') 53 | X_train /= 255 54 | X_test /= 255 55 | print('X_train shape:', X_train.shape) 56 | print(X_train.shape[0], 'train samples') 57 | print(X_test.shape[0], 'test samples') 58 | 59 | # convert class vectors to binary class matrices 60 | Y_train = np_utils.to_categorical(train[1], nb_classes) 61 | Y_test = np_utils.to_categorical(test[1], nb_classes) 62 | 63 | model.compile(loss='categorical_crossentropy', 64 | optimizer='adadelta', 65 | metrics=['accuracy']) 66 | 67 | t = now() 68 | model.fit(X_train, Y_train, 69 | batch_size=batch_size, nb_epoch=nb_epoch, 70 | verbose=1, 71 | validation_data=(X_test, Y_test)) 72 | print('Training time: %s' % (now() - t)) 73 | score = model.evaluate(X_test, Y_test, verbose=0) 74 | print('Test score:', score[0]) 75 | print('Test accuracy:', score[1]) 76 | 77 | 78 | # the data, shuffled and split between train and test sets 79 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 80 | 81 | # create two datasets one with digits below 5 and one with 5 and above 82 | X_train_lt5 = X_train[y_train < 5] 83 | y_train_lt5 = y_train[y_train < 5] 84 | X_test_lt5 = X_test[y_test < 5] 85 | y_test_lt5 = y_test[y_test < 5] 86 | 87 | X_train_gte5 = X_train[y_train >= 5] 88 | y_train_gte5 = y_train[y_train >= 5] - 5 # make classes start at 0 for 89 | X_test_gte5 = X_test[y_test >= 5] # np_utils.to_categorical 90 | y_test_gte5 = y_test[y_test >= 5] - 5 91 | 92 | # define two groups of layers: feature (convolutions) and classification (dense) 93 | feature_layers = [ 94 | Convolution2D(nb_filters, kernel_size, kernel_size, 95 | border_mode='valid', 96 | input_shape=input_shape), 97 | Activation('relu'), 98 | Convolution2D(nb_filters, kernel_size, kernel_size), 99 | Activation('relu'), 100 | MaxPooling2D(pool_size=(pool_size, pool_size)), 101 | Dropout(0.25), 102 | Flatten(), 103 | ] 104 | classification_layers = [ 105 | Dense(128), 106 | Activation('relu'), 107 | Dropout(0.5), 108 | Dense(nb_classes), 109 | Activation('softmax') 110 | ] 111 | 112 | # create complete model 113 | model = Sequential(feature_layers + classification_layers) 114 | 115 | # train model for 5-digit classification [0..4] 116 | train_model(model, 117 | (X_train_lt5, y_train_lt5), 118 | (X_test_lt5, y_test_lt5), nb_classes) 119 | 120 | # freeze feature layers and rebuild model 121 | for l in feature_layers: 122 | l.trainable = False 123 | 124 | # transfer: train dense layers for new classification task [5..9] 125 | train_model(model, 126 | (X_train_gte5, y_train_gte5), 127 | (X_test_gte5, y_test_gte5), nb_classes) 128 | -------------------------------------------------------------------------------- /examples/reuters_mlp.py: -------------------------------------------------------------------------------- 1 | '''Trains and evaluate a simple MLP 2 | on the Reuters newswire topic classification task. 3 | ''' 4 | 5 | from __future__ import print_function 6 | import numpy as np 7 | np.random.seed(1337) # for reproducibility 8 | 9 | from keras.datasets import reuters 10 | from keras.models import Sequential 11 | from keras.layers import Dense, Dropout, Activation 12 | from keras.utils import np_utils 13 | from keras.preprocessing.text import Tokenizer 14 | 15 | max_words = 1000 16 | batch_size = 32 17 | nb_epoch = 5 18 | 19 | print('Loading data...') 20 | (X_train, y_train), (X_test, y_test) = reuters.load_data(nb_words=max_words, test_split=0.2) 21 | print(len(X_train), 'train sequences') 22 | print(len(X_test), 'test sequences') 23 | 24 | nb_classes = np.max(y_train)+1 25 | print(nb_classes, 'classes') 26 | 27 | print('Vectorizing sequence data...') 28 | tokenizer = Tokenizer(nb_words=max_words) 29 | X_train = tokenizer.sequences_to_matrix(X_train, mode='binary') 30 | X_test = tokenizer.sequences_to_matrix(X_test, mode='binary') 31 | print('X_train shape:', X_train.shape) 32 | print('X_test shape:', X_test.shape) 33 | 34 | print('Convert class vector to binary class matrix (for use with categorical_crossentropy)') 35 | Y_train = np_utils.to_categorical(y_train, nb_classes) 36 | Y_test = np_utils.to_categorical(y_test, nb_classes) 37 | print('Y_train shape:', Y_train.shape) 38 | print('Y_test shape:', Y_test.shape) 39 | 40 | print('Building model...') 41 | model = Sequential() 42 | model.add(Dense(512, input_shape=(max_words,))) 43 | model.add(Activation('relu')) 44 | model.add(Dropout(0.5)) 45 | model.add(Dense(nb_classes)) 46 | model.add(Activation('softmax')) 47 | 48 | model.compile(loss='categorical_crossentropy', 49 | optimizer='adam', 50 | metrics=['accuracy']) 51 | 52 | history = model.fit(X_train, Y_train, 53 | nb_epoch=nb_epoch, batch_size=batch_size, 54 | verbose=1, validation_split=0.1) 55 | score = model.evaluate(X_test, Y_test, 56 | batch_size=batch_size, verbose=1) 57 | print('Test score:', score[0]) 58 | print('Test accuracy:', score[1]) 59 | -------------------------------------------------------------------------------- /examples/stateful_lstm.py: -------------------------------------------------------------------------------- 1 | '''Example script showing how to use stateful RNNs 2 | to model long sequences efficiently. 3 | ''' 4 | from __future__ import print_function 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from keras.models import Sequential 8 | from keras.layers import Dense, LSTM 9 | 10 | 11 | # since we are using stateful rnn tsteps can be set to 1 12 | tsteps = 1 13 | batch_size = 25 14 | epochs = 25 15 | # number of elements ahead that are used to make the prediction 16 | lahead = 1 17 | 18 | 19 | def gen_cosine_amp(amp=100, period=1000, x0=0, xn=50000, step=1, k=0.0001): 20 | """Generates an absolute cosine time series with the amplitude 21 | exponentially decreasing 22 | 23 | Arguments: 24 | amp: amplitude of the cosine function 25 | period: period of the cosine function 26 | x0: initial x of the time series 27 | xn: final x of the time series 28 | step: step of the time series discretization 29 | k: exponential rate 30 | """ 31 | cos = np.zeros(((xn - x0) * step, 1, 1)) 32 | for i in range(len(cos)): 33 | idx = x0 + i * step 34 | cos[i, 0, 0] = amp * np.cos(2 * np.pi * idx / period) 35 | cos[i, 0, 0] = cos[i, 0, 0] * np.exp(-k * idx) 36 | return cos 37 | 38 | 39 | print('Generating Data') 40 | cos = gen_cosine_amp() 41 | print('Input shape:', cos.shape) 42 | 43 | expected_output = np.zeros((len(cos), 1)) 44 | for i in range(len(cos) - lahead): 45 | expected_output[i, 0] = np.mean(cos[i + 1:i + lahead + 1]) 46 | 47 | print('Output shape') 48 | print(expected_output.shape) 49 | 50 | print('Creating Model') 51 | model = Sequential() 52 | model.add(LSTM(50, 53 | batch_input_shape=(batch_size, tsteps, 1), 54 | return_sequences=True, 55 | stateful=True)) 56 | model.add(LSTM(50, 57 | batch_input_shape=(batch_size, tsteps, 1), 58 | return_sequences=False, 59 | stateful=True)) 60 | model.add(Dense(1)) 61 | model.compile(loss='mse', optimizer='rmsprop') 62 | 63 | print('Training') 64 | for i in range(epochs): 65 | print('Epoch', i, '/', epochs) 66 | model.fit(cos, 67 | expected_output, 68 | batch_size=batch_size, 69 | verbose=1, 70 | nb_epoch=1, 71 | shuffle=False) 72 | model.reset_states() 73 | 74 | print('Predicting') 75 | predicted_output = model.predict(cos, batch_size=batch_size) 76 | 77 | print('Plotting Results') 78 | plt.subplot(2, 1, 1) 79 | plt.plot(expected_output) 80 | plt.title('Expected') 81 | plt.subplot(2, 1, 2) 82 | plt.plot(predicted_output) 83 | plt.title('Predicted') 84 | plt.show() 85 | -------------------------------------------------------------------------------- /examples/variational_autoencoder.py: -------------------------------------------------------------------------------- 1 | '''This script demonstrates how to build a variational autoencoder with Keras. 2 | 3 | Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114 4 | ''' 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | from keras.layers import Input, Dense, Lambda 9 | from keras.models import Model 10 | from keras import backend as K 11 | from keras import objectives 12 | from keras.datasets import mnist 13 | 14 | batch_size = 100 15 | original_dim = 784 16 | latent_dim = 2 17 | intermediate_dim = 256 18 | nb_epoch = 50 19 | epsilon_std = 0.01 20 | 21 | x = Input(batch_shape=(batch_size, original_dim)) 22 | h = Dense(intermediate_dim, activation='relu')(x) 23 | z_mean = Dense(latent_dim)(h) 24 | z_log_var = Dense(latent_dim)(h) 25 | 26 | 27 | def sampling(args): 28 | z_mean, z_log_var = args 29 | epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0., 30 | std=epsilon_std) 31 | return z_mean + K.exp(z_log_var / 2) * epsilon 32 | 33 | # note that "output_shape" isn't necessary with the TensorFlow backend 34 | z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var]) 35 | 36 | # we instantiate these layers separately so as to reuse them later 37 | decoder_h = Dense(intermediate_dim, activation='relu') 38 | decoder_mean = Dense(original_dim, activation='sigmoid') 39 | h_decoded = decoder_h(z) 40 | x_decoded_mean = decoder_mean(h_decoded) 41 | 42 | 43 | def vae_loss(x, x_decoded_mean): 44 | xent_loss = original_dim * objectives.binary_crossentropy(x, x_decoded_mean) 45 | kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) 46 | return xent_loss + kl_loss 47 | 48 | vae = Model(x, x_decoded_mean) 49 | vae.compile(optimizer='rmsprop', loss=vae_loss) 50 | 51 | # train the VAE on MNIST digits 52 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 53 | 54 | x_train = x_train.astype('float32') / 255. 55 | x_test = x_test.astype('float32') / 255. 56 | x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) 57 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) 58 | 59 | vae.fit(x_train, x_train, 60 | shuffle=True, 61 | nb_epoch=nb_epoch, 62 | batch_size=batch_size, 63 | validation_data=(x_test, x_test)) 64 | 65 | # build a model to project inputs on the latent space 66 | encoder = Model(x, z_mean) 67 | 68 | # display a 2D plot of the digit classes in the latent space 69 | x_test_encoded = encoder.predict(x_test, batch_size=batch_size) 70 | plt.figure(figsize=(6, 6)) 71 | plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test) 72 | plt.colorbar() 73 | plt.show() 74 | 75 | # build a digit generator that can sample from the learned distribution 76 | decoder_input = Input(shape=(latent_dim,)) 77 | _h_decoded = decoder_h(decoder_input) 78 | _x_decoded_mean = decoder_mean(_h_decoded) 79 | generator = Model(decoder_input, _x_decoded_mean) 80 | 81 | # display a 2D manifold of the digits 82 | n = 15 # figure with 15x15 digits 83 | digit_size = 28 84 | figure = np.zeros((digit_size * n, digit_size * n)) 85 | # we will sample n points within [-15, 15] standard deviations 86 | grid_x = np.linspace(-15, 15, n) 87 | grid_y = np.linspace(-15, 15, n) 88 | 89 | for i, yi in enumerate(grid_x): 90 | for j, xi in enumerate(grid_y): 91 | z_sample = np.array([[xi, yi]]) 92 | x_decoded = generator.predict(z_sample) 93 | digit = x_decoded[0].reshape(digit_size, digit_size) 94 | figure[i * digit_size: (i + 1) * digit_size, 95 | j * digit_size: (j + 1) * digit_size] = digit 96 | 97 | plt.figure(figsize=(10, 10)) 98 | plt.imshow(figure) 99 | plt.show() 100 | -------------------------------------------------------------------------------- /keras/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from . import backend 3 | from . import datasets 4 | from . import engine 5 | from . import layers 6 | from . import preprocessing 7 | from . import utils 8 | from . import wrappers 9 | from . import callbacks 10 | from . import constraints 11 | from . import initializations 12 | from . import metrics 13 | from . import models 14 | from . import objectives 15 | from . import optimizers 16 | from . import regularizers 17 | 18 | __version__ = '1.1.0' 19 | -------------------------------------------------------------------------------- /keras/activations.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from . import backend as K 3 | 4 | 5 | def softmax(x): 6 | ndim = K.ndim(x) 7 | if ndim == 2: 8 | return K.softmax(x) 9 | elif ndim == 3: 10 | e = K.exp(x - K.max(x, axis=-1, keepdims=True)) 11 | s = K.sum(e, axis=-1, keepdims=True) 12 | return e / s 13 | else: 14 | raise Exception('Cannot apply softmax to a tensor that is not 2D or 3D. ' + 15 | 'Here, ndim=' + str(ndim)) 16 | 17 | 18 | def elu(x, alpha=1.0): 19 | return K.elu(x, alpha) 20 | 21 | def softplus(x): 22 | return K.softplus(x) 23 | 24 | 25 | def softsign(x): 26 | return K.softsign(x) 27 | 28 | 29 | def relu(x, alpha=0., max_value=None): 30 | return K.relu(x, alpha=alpha, max_value=max_value) 31 | 32 | 33 | def tanh(x): 34 | return K.tanh(x) 35 | 36 | 37 | def sigmoid(x): 38 | return K.sigmoid(x) 39 | 40 | 41 | def hard_sigmoid(x): 42 | return K.hard_sigmoid(x) 43 | 44 | 45 | def linear(x): 46 | ''' 47 | The function returns the variable that is passed in, so all types work. 48 | ''' 49 | return x 50 | 51 | 52 | from .utils.generic_utils import get_from_module 53 | def get(identifier): 54 | if identifier is None: 55 | return linear 56 | return get_from_module(identifier, globals(), 'activation function') 57 | -------------------------------------------------------------------------------- /keras/applications/__init__.py: -------------------------------------------------------------------------------- 1 | from .vgg16 import VGG16 2 | from .vgg19 import VGG19 3 | from .resnet50 import ResNet50 4 | from .inception_v3 import InceptionV3 5 | from .xception import Xception 6 | -------------------------------------------------------------------------------- /keras/applications/audio_conv_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .. import backend as K 3 | 4 | 5 | TAGS = ['rock', 'pop', 'alternative', 'indie', 'electronic', 6 | 'female vocalists', 'dance', '00s', 'alternative rock', 'jazz', 7 | 'beautiful', 'metal', 'chillout', 'male vocalists', 8 | 'classic rock', 'soul', 'indie rock', 'Mellow', 'electronica', 9 | '80s', 'folk', '90s', 'chill', 'instrumental', 'punk', 10 | 'oldies', 'blues', 'hard rock', 'ambient', 'acoustic', 11 | 'experimental', 'female vocalist', 'guitar', 'Hip-Hop', 12 | '70s', 'party', 'country', 'easy listening', 13 | 'sexy', 'catchy', 'funk', 'electro', 'heavy metal', 14 | 'Progressive rock', '60s', 'rnb', 'indie pop', 15 | 'sad', 'House', 'happy'] 16 | 17 | 18 | def librosa_exists(): 19 | try: 20 | __import__('librosa') 21 | except ImportError: 22 | return False 23 | else: 24 | return True 25 | 26 | 27 | def preprocess_input(audio_path, dim_ordering='default'): 28 | '''Reads an audio file and outputs a Mel-spectrogram. 29 | ''' 30 | if dim_ordering == 'default': 31 | dim_ordering = K.image_dim_ordering() 32 | assert dim_ordering in {'tf', 'th'} 33 | 34 | if librosa_exists(): 35 | import librosa 36 | else: 37 | raise RuntimeError('Librosa is required to process audio files.\n' + 38 | 'Install it via `pip install librosa` \nor visit ' + 39 | 'http://librosa.github.io/librosa/ for details.') 40 | 41 | # mel-spectrogram parameters 42 | SR = 12000 43 | N_FFT = 512 44 | N_MELS = 96 45 | HOP_LEN = 256 46 | DURA = 29.12 47 | 48 | src, sr = librosa.load(audio_path, sr=SR) 49 | n_sample = src.shape[0] 50 | n_sample_wanted = int(DURA * SR) 51 | 52 | # trim the signal at the center 53 | if n_sample < n_sample_wanted: # if too short 54 | src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,)))) 55 | elif n_sample > n_sample_wanted: # if too long 56 | src = src[(n_sample - n_sample_wanted) / 2: 57 | (n_sample + n_sample_wanted) / 2] 58 | 59 | logam = librosa.logamplitude 60 | melgram = librosa.feature.melspectrogram 61 | x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN, 62 | n_fft=N_FFT, n_mels=N_MELS) ** 2, 63 | ref_power=1.0) 64 | 65 | if dim_ordering == 'th': 66 | x = np.expand_dims(x, axis=0) 67 | elif dim_ordering == 'tf': 68 | x = np.expand_dims(x, axis=3) 69 | return x 70 | 71 | 72 | def decode_predictions(preds, top_n=5): 73 | '''Decode the output of a music tagger model. 74 | 75 | # Arguments 76 | preds: 2-dimensional numpy array 77 | top_n: integer in [0, 50], number of items to show 78 | 79 | ''' 80 | assert len(preds.shape) == 2 and preds.shape[1] == 50 81 | results = [] 82 | for pred in preds: 83 | result = zip(TAGS, pred) 84 | result = sorted(result, key=lambda x: x[1], reverse=True) 85 | results.append(result[:top_n]) 86 | return results 87 | -------------------------------------------------------------------------------- /keras/applications/imagenet_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import json 3 | 4 | from ..utils.data_utils import get_file 5 | from .. import backend as K 6 | 7 | CLASS_INDEX = None 8 | CLASS_INDEX_PATH = 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json' 9 | 10 | 11 | def preprocess_input(x, dim_ordering='default'): 12 | if dim_ordering == 'default': 13 | dim_ordering = K.image_dim_ordering() 14 | assert dim_ordering in {'tf', 'th'} 15 | 16 | if dim_ordering == 'th': 17 | # 'RGB'->'BGR' 18 | x = x[:, ::-1, :, :] 19 | # Zero-center by mean pixel 20 | x[:, 0, :, :] -= 103.939 21 | x[:, 1, :, :] -= 116.779 22 | x[:, 2, :, :] -= 123.68 23 | else: 24 | # 'RGB'->'BGR' 25 | x = x[:, :, :, ::-1] 26 | # Zero-center by mean pixel 27 | x[:, :, :, 0] -= 103.939 28 | x[:, :, :, 1] -= 116.779 29 | x[:, :, :, 2] -= 123.68 30 | return x 31 | 32 | 33 | def decode_predictions(preds, top=5): 34 | global CLASS_INDEX 35 | if len(preds.shape) != 2 or preds.shape[1] != 1000: 36 | raise ValueError('`decode_predictions` expects ' 37 | 'a batch of predictions ' 38 | '(i.e. a 2D array of shape (samples, 1000)). ' 39 | 'Found array with shape: ' + str(preds.shape)) 40 | if CLASS_INDEX is None: 41 | fpath = get_file('imagenet_class_index.json', 42 | CLASS_INDEX_PATH, 43 | cache_subdir='models') 44 | CLASS_INDEX = json.load(open(fpath)) 45 | results = [] 46 | for pred in preds: 47 | top_indices = np.argpartition(pred, -top)[-top:][::-1] 48 | result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices] 49 | results.append(result) 50 | return results 51 | -------------------------------------------------------------------------------- /keras/backend/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import os 4 | import json 5 | import sys 6 | from .common import epsilon 7 | from .common import floatx 8 | from .common import set_epsilon 9 | from .common import set_floatx 10 | from .common import get_uid 11 | from .common import cast_to_floatx 12 | from .common import image_dim_ordering 13 | from .common import set_image_dim_ordering 14 | from .common import is_keras_tensor 15 | from .common import legacy_weight_ordering 16 | from .common import set_legacy_weight_ordering 17 | 18 | _keras_base_dir = os.path.expanduser('~') 19 | if not os.access(_keras_base_dir, os.W_OK): 20 | _keras_base_dir = '/tmp' 21 | 22 | _keras_dir = os.path.join(_keras_base_dir, '.keras') 23 | if not os.path.exists(_keras_dir): 24 | os.makedirs(_keras_dir) 25 | 26 | # Set theano as default backend for Windows users since tensorflow is not available for Windows yet. 27 | if os.name == 'nt': 28 | _BACKEND = 'theano' 29 | else: 30 | _BACKEND = 'theano' 31 | 32 | _config_path = os.path.expanduser(os.path.join(_keras_dir, 'keras.json')) 33 | if os.path.exists(_config_path): 34 | _config = json.load(open(_config_path)) 35 | _floatx = _config.get('floatx', floatx()) 36 | assert _floatx in {'float16', 'float32', 'float64'} 37 | _epsilon = _config.get('epsilon', epsilon()) 38 | assert type(_epsilon) == float 39 | _backend = _config.get('backend', _BACKEND) 40 | assert _backend in {'theano', 'tensorflow'} 41 | _image_dim_ordering = _config.get('image_dim_ordering', image_dim_ordering()) 42 | assert _image_dim_ordering in {'tf', 'th'} 43 | 44 | set_floatx(_floatx) 45 | set_epsilon(_epsilon) 46 | set_image_dim_ordering(_image_dim_ordering) 47 | _BACKEND = _backend 48 | 49 | # save config file 50 | if not os.path.exists(_config_path): 51 | _config = {'floatx': floatx(), 52 | 'epsilon': epsilon(), 53 | 'backend': _BACKEND, 54 | 'image_dim_ordering': image_dim_ordering()} 55 | with open(_config_path, 'w') as f: 56 | f.write(json.dumps(_config, indent=4)) 57 | 58 | if 'KERAS_BACKEND' in os.environ: 59 | _backend = os.environ['KERAS_BACKEND'] 60 | assert _backend in {'theano', 'tensorflow'} 61 | _BACKEND = _backend 62 | 63 | # import backend 64 | if _BACKEND == 'theano': 65 | sys.stderr.write('Using Theano backend.\n') 66 | from .theano_backend import * 67 | elif _BACKEND == 'tensorflow': 68 | sys.stderr.write('Using TensorFlow backend.\n') 69 | from .tensorflow_backend import * 70 | else: 71 | raise Exception('Unknown backend: ' + str(_BACKEND)) 72 | 73 | 74 | def backend(): 75 | '''Publicly accessible method 76 | for determining the current backend. 77 | ''' 78 | return _BACKEND 79 | -------------------------------------------------------------------------------- /keras/backend/common.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from collections import defaultdict 4 | 5 | # the type of float to use throughout the session. 6 | _FLOATX = 'float32' 7 | _EPSILON = 10e-8 8 | _UID_PREFIXES = defaultdict(int) 9 | _IMAGE_DIM_ORDERING = 'tf' 10 | _LEGACY_WEIGHT_ORDERING = False 11 | 12 | 13 | def epsilon(): 14 | '''Returns the value of the fuzz 15 | factor used in numeric expressions. 16 | ''' 17 | return _EPSILON 18 | 19 | 20 | def set_epsilon(e): 21 | '''Sets the value of the fuzz 22 | factor used in numeric expressions. 23 | ''' 24 | global _EPSILON 25 | _EPSILON = e 26 | 27 | 28 | def floatx(): 29 | '''Returns the default float type, as a string 30 | (e.g. 'float16', 'float32', 'float64'). 31 | ''' 32 | return _FLOATX 33 | 34 | 35 | def set_floatx(floatx): 36 | global _FLOATX 37 | if floatx not in {'float16', 'float32', 'float64'}: 38 | raise Exception('Unknown floatx type: ' + str(floatx)) 39 | _FLOATX = str(floatx) 40 | 41 | 42 | def cast_to_floatx(x): 43 | '''Cast a Numpy array to floatx. 44 | ''' 45 | return np.asarray(x, dtype=_FLOATX) 46 | 47 | 48 | def image_dim_ordering(): 49 | '''Returns the image dimension ordering 50 | convention ('th' or 'tf'). 51 | ''' 52 | return _IMAGE_DIM_ORDERING 53 | 54 | 55 | def set_image_dim_ordering(dim_ordering): 56 | '''Sets the value of the image dimension 57 | ordering convention ('th' or 'tf'). 58 | ''' 59 | global _IMAGE_DIM_ORDERING 60 | if dim_ordering not in {'tf', 'th'}: 61 | raise Exception('Unknown dim_ordering:', dim_ordering) 62 | _IMAGE_DIM_ORDERING = str(dim_ordering) 63 | 64 | 65 | def get_uid(prefix=''): 66 | _UID_PREFIXES[prefix] += 1 67 | return _UID_PREFIXES[prefix] 68 | 69 | 70 | def reset_uids(): 71 | global _UID_PREFIXES 72 | _UID_PREFIXES = defaultdict(int) 73 | 74 | 75 | def is_keras_tensor(x): 76 | if hasattr(x, '_keras_shape'): 77 | return True 78 | else: 79 | return False 80 | 81 | 82 | def set_legacy_weight_ordering(value): 83 | global _LEGACY_WEIGHT_ORDERING 84 | assert value in {True, False} 85 | _LEGACY_WEIGHT_ORDERING = value 86 | 87 | 88 | def legacy_weight_ordering(): 89 | return _LEGACY_WEIGHT_ORDERING 90 | -------------------------------------------------------------------------------- /keras/constraints.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from . import backend as K 3 | 4 | 5 | class Constraint(object): 6 | def __call__(self, p): 7 | return p 8 | 9 | def get_config(self): 10 | return {'name': self.__class__.__name__} 11 | 12 | 13 | class MaxNorm(Constraint): 14 | '''Constrain the weights incident to each hidden unit to have a norm less than or equal to a desired value. 15 | 16 | # Arguments 17 | m: the maximum norm for the incoming weights. 18 | axis: integer, axis along which to calculate weight norms. For instance, 19 | in a `Dense` layer the weight matrix has shape (input_dim, output_dim), 20 | set `axis` to `0` to constrain each weight vector of length (input_dim). 21 | In a `MaxoutDense` layer the weight tensor has shape (nb_feature, input_dim, output_dim), 22 | set `axis` to `1` to constrain each weight vector of length (input_dim), 23 | i.e. constrain the filters incident to the `max` operation. 24 | In a `Convolution2D` layer with the Theano backend, the weight tensor 25 | has shape (nb_filter, stack_size, nb_row, nb_col), set `axis` to `[1,2,3]` 26 | to constrain the weights of each filter tensor of size (stack_size, nb_row, nb_col). 27 | In a `Convolution2D` layer with the TensorFlow backend, the weight tensor 28 | has shape (nb_row, nb_col, stack_size, nb_filter), set `axis` to `[0,1,2]` 29 | to constrain the weights of each filter tensor of size (nb_row, nb_col, stack_size). 30 | 31 | # References 32 | - [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf) 33 | ''' 34 | def __init__(self, m=2, axis=0): 35 | self.m = m 36 | self.axis = axis 37 | 38 | def __call__(self, p): 39 | norms = K.sqrt(K.sum(K.square(p), axis=self.axis, keepdims=True)) 40 | desired = K.clip(norms, 0, self.m) 41 | p = p * (desired / (K.epsilon() + norms)) 42 | return p 43 | 44 | def get_config(self): 45 | return {'name': self.__class__.__name__, 46 | 'm': self.m, 47 | 'axis': self.axis} 48 | 49 | 50 | class NonNeg(Constraint): 51 | '''Constrain the weights to be non-negative. 52 | ''' 53 | def __call__(self, p): 54 | p *= K.cast(p >= 0., K.floatx()) 55 | return p 56 | 57 | 58 | class UnitNorm(Constraint): 59 | '''Constrain the weights incident to each hidden unit to have unit norm. 60 | 61 | # Arguments 62 | axis: integer, axis along which to calculate weight norms. For instance, 63 | in a `Dense` layer the weight matrix has shape (input_dim, output_dim), 64 | set `axis` to `0` to constrain each weight vector of length (input_dim). 65 | In a `MaxoutDense` layer the weight tensor has shape (nb_feature, input_dim, output_dim), 66 | set `axis` to `1` to constrain each weight vector of length (input_dim), 67 | i.e. constrain the filters incident to the `max` operation. 68 | In a `Convolution2D` layer with the Theano backend, the weight tensor 69 | has shape (nb_filter, stack_size, nb_row, nb_col), set `axis` to `[1,2,3]` 70 | to constrain the weights of each filter tensor of size (stack_size, nb_row, nb_col). 71 | In a `Convolution2D` layer with the TensorFlow backend, the weight tensor 72 | has shape (nb_row, nb_col, stack_size, nb_filter), set `axis` to `[0,1,2]` 73 | to constrain the weights of each filter tensor of size (nb_row, nb_col, stack_size). 74 | ''' 75 | def __init__(self, axis=0): 76 | self.axis = axis 77 | 78 | def __call__(self, p): 79 | return p / (K.epsilon() + K.sqrt(K.sum(K.square(p), axis=self.axis, keepdims=True))) 80 | 81 | def get_config(self): 82 | return {'name': self.__class__.__name__, 83 | 'axis': self.axis} 84 | 85 | 86 | maxnorm = MaxNorm 87 | nonneg = NonNeg 88 | unitnorm = UnitNorm 89 | 90 | from .utils.generic_utils import get_from_module 91 | def get(identifier, kwargs=None): 92 | return get_from_module(identifier, globals(), 'constraint', 93 | instantiate=True, kwargs=kwargs) 94 | -------------------------------------------------------------------------------- /keras/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/keras/ced92ff0293f95bf1c200b55af098e8e136686c2/keras/datasets/__init__.py -------------------------------------------------------------------------------- /keras/datasets/cifar.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | import sys 4 | from six.moves import cPickle 5 | 6 | 7 | def load_batch(fpath, label_key='labels'): 8 | f = open(fpath, 'rb') 9 | if sys.version_info < (3,): 10 | d = cPickle.load(f) 11 | else: 12 | d = cPickle.load(f, encoding="bytes") 13 | # decode utf8 14 | for k, v in d.items(): 15 | del(d[k]) 16 | d[k.decode("utf8")] = v 17 | f.close() 18 | data = d["data"] 19 | labels = d[label_key] 20 | 21 | data = data.reshape(data.shape[0], 3, 32, 32) 22 | return data, labels 23 | -------------------------------------------------------------------------------- /keras/datasets/cifar10.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .cifar import load_batch 3 | from ..utils.data_utils import get_file 4 | from .. import backend as K 5 | import numpy as np 6 | import os 7 | 8 | 9 | def load_data(): 10 | dirname = "cifar-10-batches-py" 11 | origin = "http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" 12 | path = get_file(dirname, origin=origin, untar=True) 13 | 14 | nb_train_samples = 50000 15 | 16 | X_train = np.zeros((nb_train_samples, 3, 32, 32), dtype="uint8") 17 | y_train = np.zeros((nb_train_samples,), dtype="uint8") 18 | 19 | for i in range(1, 6): 20 | fpath = os.path.join(path, 'data_batch_' + str(i)) 21 | data, labels = load_batch(fpath) 22 | X_train[(i - 1) * 10000: i * 10000, :, :, :] = data 23 | y_train[(i - 1) * 10000: i * 10000] = labels 24 | 25 | fpath = os.path.join(path, 'test_batch') 26 | X_test, y_test = load_batch(fpath) 27 | 28 | y_train = np.reshape(y_train, (len(y_train), 1)) 29 | y_test = np.reshape(y_test, (len(y_test), 1)) 30 | 31 | if K.image_dim_ordering() == 'tf': 32 | X_train = X_train.transpose(0, 2, 3, 1) 33 | X_test = X_test.transpose(0, 2, 3, 1) 34 | 35 | return (X_train, y_train), (X_test, y_test) 36 | -------------------------------------------------------------------------------- /keras/datasets/cifar100.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .cifar import load_batch 3 | from ..utils.data_utils import get_file 4 | from .. import backend as K 5 | import numpy as np 6 | import os 7 | 8 | 9 | def load_data(label_mode='fine'): 10 | if label_mode not in ['fine', 'coarse']: 11 | raise Exception('label_mode must be one of "fine" "coarse".') 12 | 13 | dirname = "cifar-100-python" 14 | origin = "http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" 15 | path = get_file(dirname, origin=origin, untar=True) 16 | 17 | fpath = os.path.join(path, 'train') 18 | X_train, y_train = load_batch(fpath, label_key=label_mode+'_labels') 19 | 20 | fpath = os.path.join(path, 'test') 21 | X_test, y_test = load_batch(fpath, label_key=label_mode+'_labels') 22 | 23 | y_train = np.reshape(y_train, (len(y_train), 1)) 24 | y_test = np.reshape(y_test, (len(y_test), 1)) 25 | 26 | if K.image_dim_ordering() == 'tf': 27 | X_train = X_train.transpose(0, 2, 3, 1) 28 | X_test = X_test.transpose(0, 2, 3, 1) 29 | 30 | return (X_train, y_train), (X_test, y_test) 31 | -------------------------------------------------------------------------------- /keras/datasets/data_utils.py: -------------------------------------------------------------------------------- 1 | from ..utils.data_utils import * 2 | import warnings 3 | 4 | warnings.warn('data_utils has been moved to keras.utils.data_utils.') 5 | -------------------------------------------------------------------------------- /keras/datasets/imdb.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from six.moves import cPickle 3 | import gzip 4 | from ..utils.data_utils import get_file 5 | from six.moves import zip 6 | import numpy as np 7 | import sys 8 | 9 | 10 | def load_data(path='imdb_full.pkl', nb_words=None, skip_top=0, 11 | maxlen=None, seed=113, 12 | start_char=1, oov_char=2, index_from=3): 13 | ''' 14 | # Arguments 15 | path: where to store the data (in `/.keras/dataset`) 16 | nb_words: max number of words to include. Words are ranked 17 | by how often they occur (in the training set) and only 18 | the most frequent words are kept 19 | skip_top: skip the top N most frequently occuring words 20 | (which may not be informative). 21 | maxlen: truncate sequences after this length. 22 | seed: random seed for sample shuffling. 23 | start_char: The start of a sequence will be marked with this character. 24 | Set to 1 because 0 is usually the padding character. 25 | oov_char: words that were cut out because of the `nb_words` 26 | or `skip_top` limit will be replaced with this character. 27 | index_from: index actual words with this index and higher. 28 | 29 | Note that the 'out of vocabulary' character is only used for 30 | words that were present in the training set but are not included 31 | because they're not making the `nb_words` cut here. 32 | Words that were not seen in the trining set but are in the test set 33 | have simply been skipped. 34 | ''' 35 | path = get_file(path, 36 | origin='https://s3.amazonaws.com/text-datasets/imdb_full.pkl', 37 | md5_hash='d091312047c43cf9e4e38fef92437263') 38 | 39 | if path.endswith('.gz'): 40 | f = gzip.open(path, 'rb') 41 | else: 42 | f = open(path, 'rb') 43 | 44 | (x_train, labels_train), (x_test, labels_test) = cPickle.load(f) 45 | f.close() 46 | 47 | np.random.seed(seed) 48 | np.random.shuffle(x_train) 49 | np.random.seed(seed) 50 | np.random.shuffle(labels_train) 51 | 52 | np.random.seed(seed * 2) 53 | np.random.shuffle(x_test) 54 | np.random.seed(seed * 2) 55 | np.random.shuffle(labels_test) 56 | 57 | X = x_train + x_test 58 | labels = labels_train + labels_test 59 | 60 | if start_char is not None: 61 | X = [[start_char] + [w + index_from for w in x] for x in X] 62 | elif index_from: 63 | X = [[w + index_from for w in x] for x in X] 64 | 65 | if maxlen: 66 | new_X = [] 67 | new_labels = [] 68 | for x, y in zip(X, labels): 69 | if len(x) < maxlen: 70 | new_X.append(x) 71 | new_labels.append(y) 72 | X = new_X 73 | labels = new_labels 74 | if not X: 75 | raise Exception('After filtering for sequences shorter than maxlen=' + 76 | str(maxlen) + ', no sequence was kept. ' 77 | 'Increase maxlen.') 78 | if not nb_words: 79 | nb_words = max([max(x) for x in X]) 80 | 81 | # by convention, use 2 as OOV word 82 | # reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV) 83 | if oov_char is not None: 84 | X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X] 85 | else: 86 | nX = [] 87 | for x in X: 88 | nx = [] 89 | for w in x: 90 | if (w >= nb_words or w < skip_top): 91 | nx.append(w) 92 | nX.append(nx) 93 | X = nX 94 | 95 | X_train = np.array(X[:len(x_train)]) 96 | y_train = np.array(labels[:len(x_train)]) 97 | 98 | X_test = np.array(X[len(x_train):]) 99 | y_test = np.array(labels[len(x_train):]) 100 | 101 | return (X_train, y_train), (X_test, y_test) 102 | 103 | 104 | def get_word_index(path='imdb_word_index.pkl'): 105 | path = get_file(path, 106 | origin='https://s3.amazonaws.com/text-datasets/imdb_word_index.pkl', 107 | md5_hash='72d94b01291be4ff843198d3b0e1e4d7') 108 | f = open(path, 'rb') 109 | 110 | if sys.version_info < (3,): 111 | data = cPickle.load(f) 112 | else: 113 | data = cPickle.load(f, encoding='latin1') 114 | 115 | f.close() 116 | return data 117 | -------------------------------------------------------------------------------- /keras/datasets/mnist.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | from ..utils.data_utils import get_file 3 | from six.moves import cPickle 4 | import sys 5 | 6 | 7 | def load_data(path='mnist.pkl.gz'): 8 | path = get_file(path, origin='https://s3.amazonaws.com/img-datasets/mnist.pkl.gz') 9 | 10 | if path.endswith('.gz'): 11 | f = gzip.open(path, 'rb') 12 | else: 13 | f = open(path, 'rb') 14 | 15 | if sys.version_info < (3,): 16 | data = cPickle.load(f) 17 | else: 18 | data = cPickle.load(f, encoding='bytes') 19 | 20 | f.close() 21 | return data # (X_train, y_train), (X_test, y_test) 22 | -------------------------------------------------------------------------------- /keras/datasets/reuters.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from ..utils.data_utils import get_file 4 | from six.moves import cPickle 5 | from six.moves import zip 6 | import numpy as np 7 | import sys 8 | 9 | 10 | def load_data(path='reuters.pkl', nb_words=None, skip_top=0, 11 | maxlen=None, test_split=0.2, seed=113, 12 | start_char=1, oov_char=2, index_from=3): 13 | '''Loads the Reuters newswire classification dataset. 14 | 15 | # Arguments 16 | path: where to store the data (in `/.keras/dataset`) 17 | nb_words: max number of words to include. Words are ranked 18 | by how often they occur (in the training set) and only 19 | the most frequent words are kept 20 | skip_top: skip the top N most frequently occuring words 21 | (which may not be informative). 22 | maxlen: truncate sequences after this length. 23 | test_split: Fraction of the dataset to be used as test data. 24 | seed: random seed for sample shuffling. 25 | start_char: The start of a sequence will be marked with this character. 26 | Set to 1 because 0 is usually the padding character. 27 | oov_char: words that were cut out because of the `nb_words` 28 | or `skip_top` limit will be replaced with this character. 29 | index_from: index actual words with this index and higher. 30 | 31 | Note that the 'out of vocabulary' character is only used for 32 | words that were present in the training set but are not included 33 | because they're not making the `nb_words` cut here. 34 | Words that were not seen in the trining set but are in the test set 35 | have simply been skipped. 36 | ''' 37 | 38 | path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/reuters.pkl') 39 | f = open(path, 'rb') 40 | X, labels = cPickle.load(f) 41 | f.close() 42 | 43 | np.random.seed(seed) 44 | np.random.shuffle(X) 45 | np.random.seed(seed) 46 | np.random.shuffle(labels) 47 | 48 | if start_char is not None: 49 | X = [[start_char] + [w + index_from for w in x] for x in X] 50 | elif index_from: 51 | X = [[w + index_from for w in x] for x in X] 52 | 53 | if maxlen: 54 | new_X = [] 55 | new_labels = [] 56 | for x, y in zip(X, labels): 57 | if len(x) < maxlen: 58 | new_X.append(x) 59 | new_labels.append(y) 60 | X = new_X 61 | labels = new_labels 62 | 63 | if not nb_words: 64 | nb_words = max([max(x) for x in X]) 65 | 66 | # by convention, use 2 as OOV word 67 | # reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV) 68 | if oov_char is not None: 69 | X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X] 70 | else: 71 | nX = [] 72 | for x in X: 73 | nx = [] 74 | for w in x: 75 | if (w >= nb_words or w < skip_top): 76 | nx.append(w) 77 | nX.append(nx) 78 | X = nX 79 | 80 | X_train = X[:int(len(X) * (1 - test_split))] 81 | y_train = labels[:int(len(X) * (1 - test_split))] 82 | 83 | X_test = X[int(len(X) * (1 - test_split)):] 84 | y_test = labels[int(len(X) * (1 - test_split)):] 85 | 86 | return (X_train, y_train), (X_test, y_test) 87 | 88 | 89 | def get_word_index(path='reuters_word_index.pkl'): 90 | path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/reuters_word_index.pkl') 91 | f = open(path, 'rb') 92 | 93 | if sys.version_info < (3,): 94 | data = cPickle.load(f) 95 | else: 96 | data = cPickle.load(f, encoding='latin1') 97 | 98 | f.close() 99 | return data 100 | -------------------------------------------------------------------------------- /keras/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # note: topology.Node is an internal class, 2 | # it isn't meant to be used by Keras users. 3 | from .topology import InputSpec 4 | from .topology import Input 5 | from .topology import InputLayer 6 | from .topology import Layer 7 | from .topology import Merge 8 | from .topology import merge 9 | from .topology import get_source_inputs 10 | from .training import Model 11 | -------------------------------------------------------------------------------- /keras/initializations.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | from . import backend as K 4 | 5 | 6 | def get_fans(shape, dim_ordering='th'): 7 | if len(shape) == 2: 8 | fan_in = shape[0] 9 | fan_out = shape[1] 10 | elif len(shape) == 4 or len(shape) == 5: 11 | # assuming convolution kernels (2D or 3D). 12 | # TH kernel shape: (depth, input_depth, ...) 13 | # TF kernel shape: (..., input_depth, depth) 14 | if dim_ordering == 'th': 15 | receptive_field_size = np.prod(shape[2:]) 16 | fan_in = shape[1] * receptive_field_size 17 | fan_out = shape[0] * receptive_field_size 18 | elif dim_ordering == 'tf': 19 | receptive_field_size = np.prod(shape[:2]) 20 | fan_in = shape[-2] * receptive_field_size 21 | fan_out = shape[-1] * receptive_field_size 22 | else: 23 | raise Exception('Invalid dim_ordering: ' + dim_ordering) 24 | else: 25 | # no specific assumptions 26 | fan_in = np.sqrt(np.prod(shape)) 27 | fan_out = np.sqrt(np.prod(shape)) 28 | return fan_in, fan_out 29 | 30 | 31 | def uniform(shape, scale=0.05, name=None): 32 | return K.random_uniform_variable(shape, -scale, scale, name=name) 33 | 34 | 35 | def normal(shape, scale=0.05, name=None): 36 | return K.random_normal_variable(shape, 0.0, scale, name=name) 37 | 38 | 39 | def lecun_uniform(shape, name=None, dim_ordering='th'): 40 | ''' Reference: LeCun 98, Efficient Backprop 41 | http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf 42 | ''' 43 | fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) 44 | scale = np.sqrt(3. / fan_in) 45 | return uniform(shape, scale, name=name) 46 | 47 | 48 | def glorot_normal(shape, name=None, dim_ordering='th'): 49 | ''' Reference: Glorot & Bengio, AISTATS 2010 50 | ''' 51 | fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) 52 | s = np.sqrt(2. / (fan_in + fan_out)) 53 | return normal(shape, s, name=name) 54 | 55 | 56 | def glorot_uniform(shape, name=None, dim_ordering='th'): 57 | fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) 58 | s = np.sqrt(6. / (fan_in + fan_out)) 59 | return uniform(shape, s, name=name) 60 | 61 | 62 | def he_normal(shape, name=None, dim_ordering='th'): 63 | ''' Reference: He et al., http://arxiv.org/abs/1502.01852 64 | ''' 65 | fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) 66 | s = np.sqrt(2. / fan_in) 67 | return normal(shape, s, name=name) 68 | 69 | 70 | def he_uniform(shape, name=None, dim_ordering='th'): 71 | fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) 72 | s = np.sqrt(6. / fan_in) 73 | return uniform(shape, s, name=name) 74 | 75 | 76 | def orthogonal(shape, scale=1.1, name=None): 77 | ''' From Lasagne. Reference: Saxe et al., http://arxiv.org/abs/1312.6120 78 | ''' 79 | flat_shape = (shape[0], np.prod(shape[1:])) 80 | a = np.random.normal(0.0, 1.0, flat_shape) 81 | u, _, v = np.linalg.svd(a, full_matrices=False) 82 | # pick the one with the correct shape 83 | q = u if u.shape == flat_shape else v 84 | q = q.reshape(shape) 85 | return K.variable(scale * q[:shape[0], :shape[1]], name=name) 86 | 87 | 88 | def identity(shape, scale=1, name=None): 89 | if len(shape) != 2 or shape[0] != shape[1]: 90 | raise Exception('Identity matrix initialization can only be used ' 91 | 'for 2D square matrices.') 92 | else: 93 | return K.variable(scale * np.identity(shape[0]), name=name) 94 | 95 | 96 | def zero(shape, name=None): 97 | return K.zeros(shape, name=name) 98 | 99 | 100 | def one(shape, name=None): 101 | return K.ones(shape, name=name) 102 | 103 | 104 | from .utils.generic_utils import get_from_module 105 | def get(identifier, **kwargs): 106 | return get_from_module(identifier, globals(), 107 | 'initialization', kwargs=kwargs) 108 | -------------------------------------------------------------------------------- /keras/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from ..engine import Layer, Input, InputLayer, Merge, merge, InputSpec 3 | from .core import * 4 | from .convolutional import * 5 | from .pooling import * 6 | from .local import * 7 | from .recurrent import * 8 | from .normalization import * 9 | from .embeddings import * 10 | from .noise import * 11 | from .advanced_activations import * 12 | from .wrappers import * 13 | -------------------------------------------------------------------------------- /keras/layers/noise.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from ..engine import Layer 3 | from .. import backend as K 4 | import numpy as np 5 | 6 | 7 | class GaussianNoise(Layer): 8 | '''Apply to the input an additive zero-centered Gaussian noise with 9 | standard deviation `sigma`. This is useful to mitigate overfitting 10 | (you could see it as a kind of random data augmentation). 11 | Gaussian Noise (GS) is a natural choice as corruption process 12 | for real valued inputs. 13 | 14 | As it is a regularization layer, it is only active at training time. 15 | 16 | # Arguments 17 | sigma: float, standard deviation of the noise distribution. 18 | 19 | # Input shape 20 | Arbitrary. Use the keyword argument `input_shape` 21 | (tuple of integers, does not include the samples axis) 22 | when using this layer as the first layer in a model. 23 | 24 | # Output shape 25 | Same shape as input. 26 | ''' 27 | def __init__(self, sigma, **kwargs): 28 | self.supports_masking = True 29 | self.sigma = sigma 30 | self.uses_learning_phase = True 31 | super(GaussianNoise, self).__init__(**kwargs) 32 | 33 | def call(self, x, mask=None): 34 | noise_x = x + K.random_normal(shape=K.shape(x), 35 | mean=0., 36 | std=self.sigma) 37 | return K.in_train_phase(noise_x, x) 38 | 39 | def get_config(self): 40 | config = {'sigma': self.sigma} 41 | base_config = super(GaussianNoise, self).get_config() 42 | return dict(list(base_config.items()) + list(config.items())) 43 | 44 | 45 | class GaussianDropout(Layer): 46 | '''Apply to the input an multiplicative one-centered Gaussian noise 47 | with standard deviation `sqrt(p/(1-p))`. 48 | 49 | As it is a regularization layer, it is only active at training time. 50 | 51 | # Arguments 52 | p: float, drop probability (as with `Dropout`). 53 | 54 | # Input shape 55 | Arbitrary. Use the keyword argument `input_shape` 56 | (tuple of integers, does not include the samples axis) 57 | when using this layer as the first layer in a model. 58 | 59 | # Output shape 60 | Same shape as input. 61 | 62 | # References 63 | [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf) 64 | ''' 65 | def __init__(self, p, **kwargs): 66 | self.supports_masking = True 67 | self.p = p 68 | if 0 < p < 1: 69 | self.uses_learning_phase = True 70 | super(GaussianDropout, self).__init__(**kwargs) 71 | 72 | def call(self, x, mask=None): 73 | if 0 < self.p < 1: 74 | noise_x = x * K.random_normal(shape=K.shape(x), mean=1.0, 75 | std=np.sqrt(self.p / (1.0 - self.p))) 76 | return K.in_train_phase(noise_x, x) 77 | return x 78 | 79 | def get_config(self): 80 | config = {'p': self.p} 81 | base_config = super(GaussianDropout, self).get_config() 82 | return dict(list(base_config.items()) + list(config.items())) 83 | -------------------------------------------------------------------------------- /keras/legacy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/keras/ced92ff0293f95bf1c200b55af098e8e136686c2/keras/legacy/__init__.py -------------------------------------------------------------------------------- /keras/objectives.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | from . import backend as K 4 | 5 | 6 | def mean_squared_error(y_true, y_pred): 7 | return K.mean(K.square(y_pred - y_true), axis=-1) 8 | 9 | 10 | def mean_absolute_error(y_true, y_pred): 11 | return K.mean(K.abs(y_pred - y_true), axis=-1) 12 | 13 | 14 | def mean_absolute_percentage_error(y_true, y_pred): 15 | diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), np.inf)) 16 | return 100. * K.mean(diff, axis=-1) 17 | 18 | 19 | def mean_squared_logarithmic_error(y_true, y_pred): 20 | first_log = K.log(K.clip(y_pred, K.epsilon(), np.inf) + 1.) 21 | second_log = K.log(K.clip(y_true, K.epsilon(), np.inf) + 1.) 22 | return K.mean(K.square(first_log - second_log), axis=-1) 23 | 24 | 25 | def squared_hinge(y_true, y_pred): 26 | return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)), axis=-1) 27 | 28 | 29 | def hinge(y_true, y_pred): 30 | return K.mean(K.maximum(1. - y_true * y_pred, 0.), axis=-1) 31 | 32 | 33 | def categorical_crossentropy(y_true, y_pred): 34 | '''Expects a binary class matrix instead of a vector of scalar classes. 35 | ''' 36 | return K.categorical_crossentropy(y_pred, y_true) 37 | 38 | 39 | def sparse_categorical_crossentropy(y_true, y_pred): 40 | '''expects an array of integer classes. 41 | Note: labels shape must have the same number of dimensions as output shape. 42 | If you get a shape error, add a length-1 dimension to labels. 43 | ''' 44 | return K.sparse_categorical_crossentropy(y_pred, y_true) 45 | 46 | 47 | def binary_crossentropy(y_true, y_pred): 48 | return K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1) 49 | 50 | 51 | def kullback_leibler_divergence(y_true, y_pred): 52 | y_true = K.clip(y_true, K.epsilon(), 1) 53 | y_pred = K.clip(y_pred, K.epsilon(), 1) 54 | return K.sum(y_true * K.log(y_true / y_pred), axis=-1) 55 | 56 | 57 | def poisson(y_true, y_pred): 58 | return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon()), axis=-1) 59 | 60 | 61 | def cosine_proximity(y_true, y_pred): 62 | y_true = K.l2_normalize(y_true, axis=-1) 63 | y_pred = K.l2_normalize(y_pred, axis=-1) 64 | return -K.mean(y_true * y_pred, axis=-1) 65 | 66 | 67 | # aliases 68 | mse = MSE = mean_squared_error 69 | mae = MAE = mean_absolute_error 70 | mape = MAPE = mean_absolute_percentage_error 71 | msle = MSLE = mean_squared_logarithmic_error 72 | kld = KLD = kullback_leibler_divergence 73 | cosine = cosine_proximity 74 | 75 | from .utils.generic_utils import get_from_module 76 | def get(identifier): 77 | return get_from_module(identifier, globals(), 'objective') 78 | -------------------------------------------------------------------------------- /keras/preprocessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/keras/ced92ff0293f95bf1c200b55af098e8e136686c2/keras/preprocessing/__init__.py -------------------------------------------------------------------------------- /keras/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/keras/ced92ff0293f95bf1c200b55af098e8e136686c2/keras/utils/__init__.py -------------------------------------------------------------------------------- /keras/utils/data_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import tarfile 5 | import os 6 | import sys 7 | import shutil 8 | import hashlib 9 | from six.moves.urllib.request import urlopen 10 | from six.moves.urllib.error import URLError, HTTPError 11 | 12 | from ..utils.generic_utils import Progbar 13 | 14 | 15 | # Under Python 2, 'urlretrieve' relies on FancyURLopener from legacy 16 | # urllib module, known to have issues with proxy management 17 | if sys.version_info[0] == 2: 18 | def urlretrieve(url, filename, reporthook=None, data=None): 19 | def chunk_read(response, chunk_size=8192, reporthook=None): 20 | total_size = response.info().get('Content-Length').strip() 21 | total_size = int(total_size) 22 | count = 0 23 | while 1: 24 | chunk = response.read(chunk_size) 25 | count += 1 26 | if not chunk: 27 | reporthook(count, total_size, total_size) 28 | break 29 | if reporthook: 30 | reporthook(count, chunk_size, total_size) 31 | yield chunk 32 | 33 | response = urlopen(url, data) 34 | with open(filename, 'wb') as fd: 35 | for chunk in chunk_read(response, reporthook=reporthook): 36 | fd.write(chunk) 37 | else: 38 | from six.moves.urllib.request import urlretrieve 39 | 40 | 41 | def get_file(fname, origin, untar=False, 42 | md5_hash=None, cache_subdir='datasets'): 43 | '''Downloads a file from a URL if it not already in the cache. 44 | 45 | Passing the MD5 hash will verify the file after download as well as if it is already present in the cache. 46 | 47 | # Arguments 48 | fname: name of the file 49 | origin: original URL of the file 50 | untar: boolean, whether the file should be decompressed 51 | md5_hash: MD5 hash of the file for verification 52 | cache_subdir: directory being used as the cache 53 | 54 | # Returns 55 | Path to the downloaded file 56 | ''' 57 | datadir_base = os.path.expanduser(os.path.join('~', '.keras')) 58 | if not os.access(datadir_base, os.W_OK): 59 | datadir_base = os.path.join('/tmp', '.keras') 60 | datadir = os.path.join(datadir_base, cache_subdir) 61 | if not os.path.exists(datadir): 62 | os.makedirs(datadir) 63 | 64 | if untar: 65 | untar_fpath = os.path.join(datadir, fname) 66 | fpath = untar_fpath + '.tar.gz' 67 | else: 68 | fpath = os.path.join(datadir, fname) 69 | 70 | download = False 71 | if os.path.exists(fpath): 72 | # file found; verify integrity if a hash was provided 73 | if md5_hash is not None: 74 | if not validate_file(fpath, md5_hash): 75 | print('A local file was found, but it seems to be ' 76 | 'incomplete or outdated.') 77 | download = True 78 | else: 79 | download = True 80 | 81 | if download: 82 | print('Downloading data from', origin) 83 | global progbar 84 | progbar = None 85 | 86 | def dl_progress(count, block_size, total_size): 87 | global progbar 88 | if progbar is None: 89 | progbar = Progbar(total_size) 90 | else: 91 | progbar.update(count * block_size) 92 | 93 | error_msg = 'URL fetch failure on {}: {} -- {}' 94 | try: 95 | try: 96 | urlretrieve(origin, fpath, dl_progress) 97 | except URLError as e: 98 | raise Exception(error_msg.format(origin, e.errno, e.reason)) 99 | except HTTPError as e: 100 | raise Exception(error_msg.format(origin, e.code, e.msg)) 101 | except (Exception, KeyboardInterrupt) as e: 102 | if os.path.exists(fpath): 103 | os.remove(fpath) 104 | raise 105 | progbar = None 106 | 107 | if untar: 108 | if not os.path.exists(untar_fpath): 109 | print('Untaring file...') 110 | tfile = tarfile.open(fpath, 'r:gz') 111 | try: 112 | tfile.extractall(path=datadir) 113 | except (Exception, KeyboardInterrupt) as e: 114 | if os.path.exists(untar_fpath): 115 | if os.path.isfile(untar_fpath): 116 | os.remove(untar_fpath) 117 | else: 118 | shutil.rmtree(untar_fpath) 119 | raise 120 | tfile.close() 121 | return untar_fpath 122 | 123 | return fpath 124 | 125 | 126 | def validate_file(fpath, md5_hash): 127 | '''Validates a file against a MD5 hash 128 | 129 | # Arguments 130 | fpath: path to the file being validated 131 | md5_hash: the MD5 hash being validated against 132 | 133 | # Returns 134 | Whether the file is valid 135 | ''' 136 | hasher = hashlib.md5() 137 | with open(fpath, 'rb') as f: 138 | buf = f.read() 139 | hasher.update(buf) 140 | if str(hasher.hexdigest()) == str(md5_hash): 141 | return True 142 | else: 143 | return False 144 | -------------------------------------------------------------------------------- /keras/utils/io_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import numpy as np 4 | import sys 5 | from collections import defaultdict 6 | 7 | 8 | class HDF5Matrix(): 9 | '''Representation of HDF5 dataset which can be used instead of a 10 | Numpy array. 11 | 12 | # Example 13 | 14 | ```python 15 | X_data = HDF5Matrix('input/file.hdf5', 'data') 16 | model.predict(X_data) 17 | ``` 18 | 19 | Providing start and end allows use of a slice of the dataset. 20 | 21 | Optionally, a normalizer function (or lambda) can be given. This will 22 | be called on every slice of data retrieved. 23 | 24 | # Arguments 25 | datapath: string, path to a HDF5 file 26 | dataset: string, name of the HDF5 dataset in the file specified 27 | in datapath 28 | start: int, start of desired slice of the specified dataset 29 | end: int, end of desired slice of the specified dataset 30 | normalizer: function to be called on data when retrieved 31 | 32 | ''' 33 | refs = defaultdict(int) 34 | 35 | def __init__(self, datapath, dataset, start=0, end=None, normalizer=None): 36 | import h5py 37 | 38 | if datapath not in list(self.refs.keys()): 39 | f = h5py.File(datapath) 40 | self.refs[datapath] = f 41 | else: 42 | f = self.refs[datapath] 43 | self.data = f[dataset] 44 | self.start = start 45 | if end is None: 46 | self.end = self.data.shape[0] 47 | else: 48 | self.end = end 49 | self.normalizer = normalizer 50 | 51 | def __len__(self): 52 | return self.end - self.start 53 | 54 | def __getitem__(self, key): 55 | if isinstance(key, slice): 56 | if key.stop + self.start <= self.end: 57 | idx = slice(key.start+self.start, key.stop + self.start) 58 | else: 59 | raise IndexError 60 | elif isinstance(key, int): 61 | if key + self.start < self.end: 62 | idx = key + self.start 63 | else: 64 | raise IndexError 65 | elif isinstance(key, np.ndarray): 66 | if np.max(key) + self.start < self.end: 67 | idx = (self.start + key).tolist() 68 | else: 69 | raise IndexError 70 | elif isinstance(key, list): 71 | if max(key) + self.start < self.end: 72 | idx = [x + self.start for x in key] 73 | else: 74 | raise IndexError 75 | if self.normalizer is not None: 76 | return self.normalizer(self.data[idx]) 77 | else: 78 | return self.data[idx] 79 | 80 | @property 81 | def shape(self): 82 | return (self.end - self.start,) + self.data.shape[1:] 83 | 84 | 85 | def save_array(array, name): 86 | import tables 87 | f = tables.open_file(name, 'w') 88 | atom = tables.Atom.from_dtype(array.dtype) 89 | ds = f.createCArray(f.root, 'data', atom, array.shape) 90 | ds[:] = array 91 | f.close() 92 | 93 | 94 | def load_array(name): 95 | import tables 96 | f = tables.open_file(name) 97 | array = f.root.data 98 | a = np.empty(shape=array.shape, dtype=array.dtype) 99 | a[:] = array[:] 100 | f.close() 101 | return a 102 | 103 | 104 | def ask_to_proceed_with_overwrite(filepath): 105 | get_input = input 106 | if sys.version_info[:2] <= (2, 7): 107 | get_input = raw_input 108 | overwrite = get_input('[WARNING] %s already exists - overwrite? ' 109 | '[y/n]' % (filepath)) 110 | while overwrite not in ['y', 'n']: 111 | overwrite = get_input('Enter "y" (overwrite) or "n" (cancel).') 112 | if overwrite == 'n': 113 | return False 114 | print('[TIP] Next time specify overwrite=True!') 115 | return True 116 | -------------------------------------------------------------------------------- /keras/utils/test_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import assert_allclose 3 | import inspect 4 | import functools 5 | 6 | from ..engine import Model, Input 7 | from ..models import Sequential, model_from_json 8 | from .. import backend as K 9 | 10 | 11 | def get_test_data(nb_train=1000, nb_test=500, input_shape=(10,), 12 | output_shape=(2,), 13 | classification=True, nb_class=2): 14 | ''' 15 | classification=True overrides output_shape 16 | (i.e. output_shape is set to (1,)) and the output 17 | consists in integers in [0, nb_class-1]. 18 | 19 | Otherwise: float output with shape output_shape. 20 | ''' 21 | nb_sample = nb_train + nb_test 22 | if classification: 23 | y = np.random.randint(0, nb_class, size=(nb_sample,)) 24 | X = np.zeros((nb_sample,) + input_shape) 25 | for i in range(nb_sample): 26 | X[i] = np.random.normal(loc=y[i], scale=0.7, size=input_shape) 27 | else: 28 | y_loc = np.random.random((nb_sample,)) 29 | X = np.zeros((nb_sample,) + input_shape) 30 | y = np.zeros((nb_sample,) + output_shape) 31 | for i in range(nb_sample): 32 | X[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=input_shape) 33 | y[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=output_shape) 34 | 35 | return (X[:nb_train], y[:nb_train]), (X[nb_train:], y[nb_train:]) 36 | 37 | 38 | def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None, 39 | input_data=None, expected_output=None, 40 | expected_output_dtype=None, fixed_batch_size=False): 41 | '''Test routine for a layer with a single input tensor 42 | and single output tensor. 43 | ''' 44 | if input_data is None: 45 | assert input_shape 46 | if not input_dtype: 47 | input_dtype = K.floatx() 48 | input_data = (10 * np.random.random(input_shape)).astype(input_dtype) 49 | elif input_shape is None: 50 | input_shape = input_data.shape 51 | 52 | if expected_output_dtype is None: 53 | expected_output_dtype = input_dtype 54 | 55 | # instantiation 56 | layer = layer_cls(**kwargs) 57 | 58 | # test get_weights , set_weights 59 | weights = layer.get_weights() 60 | layer.set_weights(weights) 61 | 62 | # test and instantiation from weights 63 | if 'weights' in inspect.getargspec(layer_cls.__init__): 64 | kwargs['weights'] = weights 65 | layer = layer_cls(**kwargs) 66 | 67 | # test in functional API 68 | if fixed_batch_size: 69 | x = Input(batch_shape=input_shape, dtype=input_dtype) 70 | else: 71 | x = Input(shape=input_shape[1:], dtype=input_dtype) 72 | y = layer(x) 73 | assert K.dtype(y) == expected_output_dtype 74 | 75 | model = Model(input=x, output=y) 76 | model.compile('rmsprop', 'mse') 77 | 78 | expected_output_shape = layer.get_output_shape_for(input_shape) 79 | actual_output = model.predict(input_data) 80 | actual_output_shape = actual_output.shape 81 | assert expected_output_shape == actual_output_shape 82 | if expected_output is not None: 83 | assert_allclose(actual_output, expected_output, rtol=1e-3) 84 | 85 | # test serialization 86 | model_config = model.get_config() 87 | model = Model.from_config(model_config) 88 | model.compile('rmsprop', 'mse') 89 | 90 | # test as first layer in Sequential API 91 | layer_config = layer.get_config() 92 | layer_config['batch_input_shape'] = input_shape 93 | layer = layer.__class__.from_config(layer_config) 94 | 95 | model = Sequential() 96 | model.add(layer) 97 | model.compile('rmsprop', 'mse') 98 | actual_output = model.predict(input_data) 99 | actual_output_shape = actual_output.shape 100 | assert expected_output_shape == actual_output_shape 101 | if expected_output is not None: 102 | assert_allclose(actual_output, expected_output, rtol=1e-3) 103 | 104 | # test JSON serialization 105 | json_model = model.to_json() 106 | model = model_from_json(json_model) 107 | 108 | # for further checks in the caller function 109 | return actual_output 110 | 111 | 112 | def keras_test(func): 113 | '''Clean up after tensorflow tests. 114 | ''' 115 | @functools.wraps(func) 116 | def wrapper(*args, **kwargs): 117 | output = func(*args, **kwargs) 118 | if K._BACKEND == 'tensorflow': 119 | K.clear_session() 120 | return output 121 | return wrapper 122 | -------------------------------------------------------------------------------- /keras/utils/visualize_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from ..layers.wrappers import Wrapper 4 | 5 | try: 6 | # pydot-ng is a fork of pydot that is better maintained 7 | import pydot_ng as pydot 8 | except ImportError: 9 | # fall back on pydot if necessary 10 | import pydot 11 | if not pydot.find_graphviz(): 12 | raise RuntimeError('Failed to import pydot. You must install pydot' 13 | ' and graphviz for `pydotprint` to work.') 14 | 15 | 16 | def model_to_dot(model, show_shapes=False, show_layer_names=True): 17 | dot = pydot.Dot() 18 | dot.set('rankdir', 'TB') 19 | dot.set('concentrate', True) 20 | dot.set_node_defaults(shape='record') 21 | 22 | if model.__class__.__name__ == 'Sequential': 23 | if not model.built: 24 | model.build() 25 | model = model.model 26 | layers = model.layers 27 | 28 | # Create graph nodes. 29 | for layer in layers: 30 | layer_id = str(id(layer)) 31 | 32 | # Append a wrapped layer's label to node's label, if it exists. 33 | layer_name = layer.name 34 | class_name = layer.__class__.__name__ 35 | if isinstance(layer, Wrapper): 36 | layer_name = '{}({})'.format(layer_name, layer.layer.name) 37 | class_name = '{}({})'.format(class_name, layer.layer.__class__.__name__) 38 | 39 | # Create node's label. 40 | if show_layer_names: 41 | label = '{}: {}'.format(layer_name, class_name) 42 | else: 43 | label = class_name 44 | 45 | # Rebuild the label as a table including input/output shapes. 46 | if show_shapes: 47 | try: 48 | outputlabels = str(layer.output_shape) 49 | except: 50 | outputlabels = 'multiple' 51 | if hasattr(layer, 'input_shape'): 52 | inputlabels = str(layer.input_shape) 53 | elif hasattr(layer, 'input_shapes'): 54 | inputlabels = ', '.join( 55 | [str(ishape) for ishape in layer.input_shapes]) 56 | else: 57 | inputlabels = 'multiple' 58 | label = '%s\n|{input:|output:}|{{%s}|{%s}}' % (label, inputlabels, outputlabels) 59 | 60 | node = pydot.Node(layer_id, label=label) 61 | dot.add_node(node) 62 | 63 | # Connect nodes with edges. 64 | for layer in layers: 65 | layer_id = str(id(layer)) 66 | for i, node in enumerate(layer.inbound_nodes): 67 | node_key = layer.name + '_ib-' + str(i) 68 | if node_key in model.container_nodes: 69 | for inbound_layer in node.inbound_layers: 70 | inbound_layer_id = str(id(inbound_layer)) 71 | layer_id = str(id(layer)) 72 | dot.add_edge(pydot.Edge(inbound_layer_id, layer_id)) 73 | return dot 74 | 75 | 76 | def plot(model, to_file='model.png', show_shapes=False, show_layer_names=True): 77 | dot = model_to_dot(model, show_shapes, show_layer_names) 78 | _, format = os.path.splitext(to_file) 79 | if not format: 80 | format = 'png' 81 | else: 82 | format = format[1:] 83 | dot.write(to_file, format=format) 84 | -------------------------------------------------------------------------------- /keras/wrappers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/keras/ced92ff0293f95bf1c200b55af098e8e136686c2/keras/wrappers/__init__.py -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | # Configuration of py.test 2 | [pytest] 3 | addopts=-v 4 | -n 2 5 | --durations=10 6 | --cov-report term-missing 7 | --cov=keras 8 | 9 | # Do not run tests in the build folder 10 | norecursedirs= build 11 | 12 | # PEP-8 The following are ignored: 13 | # E251 unexpected spaces around keyword / parameter equals 14 | # E225 missing whitespace around operator 15 | # E226 missing whitespace around arithmetic operator 16 | # W293 blank line contains whitespace 17 | # E501 line too long (82 > 79 characters) 18 | # E402 module level import not at top of file - temporary measure to coninue adding ros python packaged in sys.path 19 | # E731 do not assign a lambda expression, use a def 20 | # E302 two blank lines between the functions 21 | # E261 at least two spaces before inline comment 22 | 23 | 24 | pep8ignore=* E251 \ 25 | * E225 \ 26 | * E226 \ 27 | * W293 \ 28 | * E501 \ 29 | * E402 \ 30 | * E731 \ 31 | * E302 \ 32 | * E261 33 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | 5 | setup(name='Keras', 6 | version='1.1.0', 7 | description='Deep Learning for Python', 8 | author='Francois Chollet', 9 | author_email='francois.chollet@gmail.com', 10 | url='https://github.com/fchollet/keras', 11 | download_url='https://github.com/fchollet/keras/tarball/1.1.0', 12 | license='MIT', 13 | install_requires=['theano', 'pyyaml', 'six'], 14 | extras_require={ 15 | 'h5py': ['h5py'], 16 | 'visualize': ['pydot-ng'], 17 | }, 18 | packages=find_packages()) 19 | -------------------------------------------------------------------------------- /tests/integration_tests/test_image_data_tasks.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | import pytest 4 | 5 | from keras.utils.test_utils import get_test_data, keras_test 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Flatten, Activation 8 | from keras.layers.convolutional import Convolution2D, MaxPooling2D 9 | from keras.utils.np_utils import to_categorical 10 | 11 | 12 | @keras_test 13 | def test_image_classification(): 14 | ''' 15 | Classify random 16x16 color images into several classes using logistic regression 16 | with convolutional hidden layer. 17 | ''' 18 | np.random.seed(1337) 19 | input_shape = (16, 16, 3) 20 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500, 21 | nb_test=200, 22 | input_shape=input_shape, 23 | classification=True, 24 | nb_class=4) 25 | y_train = to_categorical(y_train) 26 | y_test = to_categorical(y_test) 27 | # convolution kernel size 28 | nb_conv = 3 29 | # size of pooling area for max pooling 30 | nb_pool = 2 31 | 32 | model = Sequential([ 33 | Convolution2D(nb_filter=8, nb_row=nb_conv, nb_col=nb_conv, input_shape=input_shape), 34 | MaxPooling2D(pool_size=(nb_pool, nb_pool)), 35 | Flatten(), 36 | Activation('relu'), 37 | Dense(y_test.shape[-1], activation='softmax') 38 | ]) 39 | model.compile(loss='categorical_crossentropy', 40 | optimizer='rmsprop', 41 | metrics=['accuracy']) 42 | history = model.fit(X_train, y_train, nb_epoch=10, batch_size=16, 43 | validation_data=(X_test, y_test), 44 | verbose=0) 45 | assert(history.history['val_acc'][-1] > 0.85) 46 | 47 | 48 | if __name__ == '__main__': 49 | pytest.main([__file__]) 50 | -------------------------------------------------------------------------------- /tests/integration_tests/test_vector_data_tasks.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | import pytest 4 | 5 | from keras.utils.test_utils import get_test_data, keras_test 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense 8 | from keras.utils.np_utils import to_categorical 9 | 10 | 11 | @keras_test 12 | def test_vector_classification(): 13 | ''' 14 | Classify random float vectors into 2 classes with logistic regression 15 | using 2 layer neural network with ReLU hidden units. 16 | ''' 17 | np.random.seed(1337) 18 | nb_hidden = 10 19 | 20 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500, 21 | nb_test=200, 22 | input_shape=(20,), 23 | classification=True, 24 | nb_class=2) 25 | y_train = to_categorical(y_train) 26 | y_test = to_categorical(y_test) 27 | 28 | model = Sequential([ 29 | Dense(nb_hidden, input_shape=(X_train.shape[-1],), activation='relu'), 30 | Dense(y_train.shape[-1], activation='softmax') 31 | ]) 32 | model.compile(loss='categorical_crossentropy', 33 | optimizer='rmsprop', 34 | metrics=['accuracy']) 35 | history = model.fit(X_train, y_train, nb_epoch=15, batch_size=16, 36 | validation_data=(X_test, y_test), 37 | verbose=0) 38 | assert(history.history['val_acc'][-1] > 0.8) 39 | 40 | 41 | @keras_test 42 | def test_vector_regression(): 43 | ''' 44 | Perform float data prediction (regression) using 2 layer MLP 45 | with tanh and sigmoid activations. 46 | ''' 47 | np.random.seed(1337) 48 | nb_hidden = 10 49 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500, 50 | nb_test=200, 51 | input_shape=(20,), 52 | output_shape=(2,), 53 | classification=False) 54 | 55 | model = Sequential([ 56 | Dense(nb_hidden, input_shape=(X_train.shape[-1],), activation='tanh'), 57 | Dense(y_train.shape[-1]) 58 | ]) 59 | 60 | model.compile(loss='hinge', optimizer='adagrad') 61 | history = model.fit(X_train, y_train, nb_epoch=20, batch_size=16, 62 | validation_data=(X_test, y_test), verbose=0) 63 | assert (history.history['val_loss'][-1] < 0.9) 64 | 65 | 66 | if __name__ == '__main__': 67 | pytest.main([__file__]) 68 | -------------------------------------------------------------------------------- /tests/keras/datasets/test_datasets.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pytest 3 | import time 4 | import random 5 | from keras.datasets import cifar10, cifar100, reuters, imdb, mnist 6 | 7 | 8 | def test_cifar(): 9 | # only run data download tests 20% of the time 10 | # to speed up frequent testing 11 | random.seed(time.time()) 12 | if random.random() > 0.8: 13 | (X_train, y_train), (X_test, y_test) = cifar10.load_data() 14 | (X_train, y_train), (X_test, y_test) = cifar100.load_data('fine') 15 | (X_train, y_train), (X_test, y_test) = cifar100.load_data('coarse') 16 | 17 | 18 | def test_reuters(): 19 | # only run data download tests 20% of the time 20 | # to speed up frequent testing 21 | random.seed(time.time()) 22 | if random.random() > 0.8: 23 | (X_train, y_train), (X_test, y_test) = reuters.load_data() 24 | (X_train, y_train), (X_test, y_test) = reuters.load_data(maxlen=10) 25 | 26 | 27 | def test_mnist(): 28 | # only run data download tests 20% of the time 29 | # to speed up frequent testing 30 | random.seed(time.time()) 31 | if random.random() > 0.8: 32 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 33 | 34 | 35 | def test_imdb(): 36 | # only run data download tests 20% of the time 37 | # to speed up frequent testing 38 | random.seed(time.time()) 39 | if random.random() > 0.8: 40 | (X_train, y_train), (X_test, y_test) = imdb.load_data() 41 | (X_train, y_train), (X_test, y_test) = imdb.load_data(maxlen=40) 42 | 43 | 44 | if __name__ == '__main__': 45 | pytest.main([__file__]) 46 | -------------------------------------------------------------------------------- /tests/keras/layers/test_advanced_activations.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from keras.utils.test_utils import layer_test, keras_test 3 | 4 | 5 | @keras_test 6 | def test_leaky_relu(): 7 | from keras.layers.advanced_activations import LeakyReLU 8 | for alpha in [0., .5, -1.]: 9 | layer_test(LeakyReLU, kwargs={'alpha': alpha}, 10 | input_shape=(2, 3, 4)) 11 | 12 | 13 | @keras_test 14 | def test_prelu(): 15 | from keras.layers.advanced_activations import PReLU 16 | layer_test(PReLU, kwargs={}, 17 | input_shape=(2, 3, 4)) 18 | 19 | 20 | @keras_test 21 | def test_elu(): 22 | from keras.layers.advanced_activations import ELU 23 | for alpha in [0., .5, -1.]: 24 | layer_test(ELU, kwargs={'alpha': alpha}, 25 | input_shape=(2, 3, 4)) 26 | 27 | 28 | @keras_test 29 | def test_parametric_softplus(): 30 | from keras.layers.advanced_activations import ParametricSoftplus 31 | for alpha in [0., .5, -1.]: 32 | layer_test(ParametricSoftplus, 33 | kwargs={'alpha_init': 1., 34 | 'beta_init': -1}, 35 | input_shape=(2, 3, 4)) 36 | 37 | 38 | @keras_test 39 | def test_thresholded_relu(): 40 | from keras.layers.advanced_activations import ThresholdedReLU 41 | layer_test(ThresholdedReLU, kwargs={'theta': 0.5}, 42 | input_shape=(2, 3, 4)) 43 | 44 | 45 | @keras_test 46 | def test_srelu(): 47 | from keras.layers.advanced_activations import SReLU 48 | layer_test(SReLU, kwargs={}, 49 | input_shape=(2, 3, 4)) 50 | 51 | 52 | if __name__ == '__main__': 53 | pytest.main([__file__]) 54 | -------------------------------------------------------------------------------- /tests/keras/layers/test_embeddings.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from keras.utils.test_utils import layer_test, keras_test 3 | from keras.layers.embeddings import Embedding 4 | import keras.backend as K 5 | 6 | 7 | @keras_test 8 | def test_embedding(): 9 | layer_test(Embedding, 10 | kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2}, 11 | input_shape=(3, 2), 12 | input_dtype='int32', 13 | expected_output_dtype=K.floatx()) 14 | 15 | 16 | if __name__ == '__main__': 17 | pytest.main([__file__]) 18 | -------------------------------------------------------------------------------- /tests/keras/layers/test_local.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from keras.utils.test_utils import layer_test, keras_test 4 | from keras.layers import local 5 | 6 | 7 | @keras_test 8 | def test_locallyconnected_1d(): 9 | nb_samples = 2 10 | nb_steps = 8 11 | input_dim = 5 12 | filter_length = 3 13 | nb_filter = 4 14 | 15 | for border_mode in ['valid']: 16 | for subsample_length in [1]: 17 | if border_mode == 'same' and subsample_length != 1: 18 | continue 19 | layer_test(local.LocallyConnected1D, 20 | kwargs={'nb_filter': nb_filter, 21 | 'filter_length': filter_length, 22 | 'border_mode': border_mode, 23 | 'subsample_length': subsample_length}, 24 | input_shape=(nb_samples, nb_steps, input_dim)) 25 | 26 | layer_test(local.LocallyConnected1D, 27 | kwargs={'nb_filter': nb_filter, 28 | 'filter_length': filter_length, 29 | 'border_mode': border_mode, 30 | 'W_regularizer': 'l2', 31 | 'b_regularizer': 'l2', 32 | 'activity_regularizer': 'activity_l2', 33 | 'subsample_length': subsample_length}, 34 | input_shape=(nb_samples, nb_steps, input_dim)) 35 | 36 | 37 | @keras_test 38 | def test_locallyconnected_2d(): 39 | nb_samples = 8 40 | nb_filter = 3 41 | stack_size = 4 42 | nb_row = 6 43 | nb_col = 10 44 | 45 | for border_mode in ['valid']: 46 | for subsample in [(1, 1), (2, 2)]: 47 | if border_mode == 'same' and subsample != (1, 1): 48 | continue 49 | 50 | layer_test(local.LocallyConnected2D, 51 | kwargs={'nb_filter': nb_filter, 52 | 'nb_row': 3, 53 | 'nb_col': 3, 54 | 'border_mode': border_mode, 55 | 'W_regularizer': 'l2', 56 | 'b_regularizer': 'l2', 57 | 'activity_regularizer': 'activity_l2', 58 | 'subsample': subsample, 59 | 'dim_ordering': 'tf'}, 60 | input_shape=(nb_samples, nb_row, nb_col, stack_size)) 61 | 62 | layer_test(local.LocallyConnected2D, 63 | kwargs={'nb_filter': nb_filter, 64 | 'nb_row': 3, 65 | 'nb_col': 3, 66 | 'border_mode': border_mode, 67 | 'W_regularizer': 'l2', 68 | 'b_regularizer': 'l2', 69 | 'activity_regularizer': 'activity_l2', 70 | 'subsample': subsample, 71 | 'dim_ordering': 'th'}, 72 | input_shape=(nb_samples, stack_size, nb_row, nb_col)) 73 | 74 | 75 | if __name__ == '__main__': 76 | pytest.main([__file__]) 77 | -------------------------------------------------------------------------------- /tests/keras/layers/test_noise.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from keras.utils.test_utils import layer_test, keras_test 3 | from keras.layers import noise 4 | 5 | 6 | @keras_test 7 | def test_GaussianNoise(): 8 | layer_test(noise.GaussianNoise, 9 | kwargs={'sigma': 1.}, 10 | input_shape=(3, 2, 3)) 11 | 12 | 13 | @keras_test 14 | def test_GaussianDropout(): 15 | layer_test(noise.GaussianDropout, 16 | kwargs={'p': 0.5}, 17 | input_shape=(3, 2, 3)) 18 | 19 | 20 | if __name__ == '__main__': 21 | pytest.main([__file__]) 22 | -------------------------------------------------------------------------------- /tests/keras/layers/test_normalization.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | 5 | from keras.layers.core import Dense, Activation 6 | from keras.utils.test_utils import layer_test, keras_test 7 | from keras.layers import normalization 8 | from keras.models import Sequential 9 | from keras import backend as K 10 | 11 | input_1 = np.arange(10) 12 | input_2 = np.zeros(10) 13 | input_3 = np.ones((10)) 14 | input_shapes = [np.ones((10, 10)), np.ones((10, 10, 10))] 15 | 16 | 17 | @keras_test 18 | def basic_batchnorm_test(): 19 | from keras import regularizers 20 | layer_test(normalization.BatchNormalization, 21 | kwargs={'mode': 1, 22 | 'gamma_regularizer': regularizers.l2(0.01), 23 | 'beta_regularizer': regularizers.l2(0.01)}, 24 | input_shape=(3, 4, 2)) 25 | layer_test(normalization.BatchNormalization, 26 | kwargs={'mode': 0}, 27 | input_shape=(3, 4, 2)) 28 | 29 | 30 | @keras_test 31 | def test_batchnorm_mode_0_or_2(): 32 | for mode in [0, 2]: 33 | model = Sequential() 34 | norm_m0 = normalization.BatchNormalization(mode=mode, input_shape=(10,), momentum=0.8) 35 | model.add(norm_m0) 36 | model.compile(loss='mse', optimizer='sgd') 37 | 38 | # centered on 5.0, variance 10.0 39 | X = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10)) 40 | model.fit(X, X, nb_epoch=4, verbose=0) 41 | out = model.predict(X) 42 | out -= K.eval(norm_m0.beta) 43 | out /= K.eval(norm_m0.gamma) 44 | 45 | assert_allclose(out.mean(), 0.0, atol=1e-1) 46 | assert_allclose(out.std(), 1.0, atol=1e-1) 47 | 48 | 49 | @keras_test 50 | def test_batchnorm_mode_0_convnet(): 51 | model = Sequential() 52 | norm_m0 = normalization.BatchNormalization(mode=0, axis=1, input_shape=(3, 4, 4), momentum=0.8) 53 | model.add(norm_m0) 54 | model.compile(loss='mse', optimizer='sgd') 55 | 56 | # centered on 5.0, variance 10.0 57 | X = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4)) 58 | model.fit(X, X, nb_epoch=4, verbose=0) 59 | out = model.predict(X) 60 | out -= np.reshape(K.eval(norm_m0.beta), (1, 3, 1, 1)) 61 | out /= np.reshape(K.eval(norm_m0.gamma), (1, 3, 1, 1)) 62 | 63 | assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1) 64 | assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1) 65 | 66 | 67 | @keras_test 68 | def test_batchnorm_mode_1(): 69 | norm_m1 = normalization.BatchNormalization(input_shape=(10,), mode=1) 70 | norm_m1.build(input_shape=(None, 10)) 71 | 72 | for inp in [input_1, input_2, input_3]: 73 | out = (norm_m1.call(K.variable(inp)) - norm_m1.beta) / norm_m1.gamma 74 | assert_allclose(K.eval(K.mean(out)), 0.0, atol=1e-1) 75 | if inp.std() > 0.: 76 | assert_allclose(K.eval(K.std(out)), 1.0, atol=1e-1) 77 | else: 78 | assert_allclose(K.eval(K.std(out)), 0.0, atol=1e-1) 79 | 80 | 81 | if __name__ == '__main__': 82 | pytest.main([__file__]) 83 | -------------------------------------------------------------------------------- /tests/keras/layers/test_wrappers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | from keras.utils.test_utils import keras_test 5 | from keras.layers import wrappers, Input 6 | from keras.layers import core, convolutional, recurrent 7 | from keras.models import Sequential, Model, model_from_json 8 | 9 | 10 | @keras_test 11 | def test_TimeDistributed(): 12 | # first, test with Dense layer 13 | model = Sequential() 14 | model.add(wrappers.TimeDistributed(core.Dense(2), input_shape=(3, 4))) 15 | model.add(core.Activation('relu')) 16 | model.compile(optimizer='rmsprop', loss='mse') 17 | model.fit(np.random.random((10, 3, 4)), np.random.random((10, 3, 2)), nb_epoch=1, batch_size=10) 18 | 19 | # test config 20 | model.get_config() 21 | 22 | # compare to TimeDistributedDense 23 | test_input = np.random.random((1, 3, 4)) 24 | test_output = model.predict(test_input) 25 | weights = model.layers[0].get_weights() 26 | 27 | reference = Sequential() 28 | reference.add(core.TimeDistributedDense(2, input_shape=(3, 4), weights=weights)) 29 | reference.add(core.Activation('relu')) 30 | reference.compile(optimizer='rmsprop', loss='mse') 31 | 32 | reference_output = reference.predict(test_input) 33 | assert_allclose(test_output, reference_output, atol=1e-05) 34 | 35 | # test when specifying a batch_input_shape 36 | reference = Sequential() 37 | reference.add(core.TimeDistributedDense(2, batch_input_shape=(1, 3, 4), weights=weights)) 38 | reference.add(core.Activation('relu')) 39 | reference.compile(optimizer='rmsprop', loss='mse') 40 | 41 | reference_output = reference.predict(test_input) 42 | assert_allclose(test_output, reference_output, atol=1e-05) 43 | 44 | # test with Convolution2D 45 | model = Sequential() 46 | model.add(wrappers.TimeDistributed(convolutional.Convolution2D(5, 2, 2, border_mode='same'), input_shape=(2, 4, 4, 3))) 47 | model.add(core.Activation('relu')) 48 | model.compile(optimizer='rmsprop', loss='mse') 49 | model.train_on_batch(np.random.random((1, 2, 4, 4, 3)), np.random.random((1, 2, 4, 4, 5))) 50 | 51 | model = model_from_json(model.to_json()) 52 | model.summary() 53 | 54 | # test stacked layers 55 | model = Sequential() 56 | model.add(wrappers.TimeDistributed(core.Dense(2), input_shape=(3, 4))) 57 | model.add(wrappers.TimeDistributed(core.Dense(3))) 58 | model.add(core.Activation('relu')) 59 | model.compile(optimizer='rmsprop', loss='mse') 60 | 61 | model.fit(np.random.random((10, 3, 4)), np.random.random((10, 3, 3)), nb_epoch=1, batch_size=10) 62 | 63 | # test wrapping Sequential model 64 | model = Sequential() 65 | model.add(core.Dense(3, input_dim=2)) 66 | outer_model = Sequential() 67 | outer_model.add(wrappers.TimeDistributed(model, input_shape=(3, 2))) 68 | outer_model.compile(optimizer='rmsprop', loss='mse') 69 | outer_model.fit(np.random.random((10, 3, 2)), np.random.random((10, 3, 3)), nb_epoch=1, batch_size=10) 70 | 71 | # test with functional API 72 | x = Input(shape=(3, 2)) 73 | y = wrappers.TimeDistributed(model)(x) 74 | outer_model = Model(x, y) 75 | outer_model.compile(optimizer='rmsprop', loss='mse') 76 | outer_model.fit(np.random.random((10, 3, 2)), np.random.random((10, 3, 3)), nb_epoch=1, batch_size=10) 77 | 78 | 79 | @keras_test 80 | def test_Bidirectional(): 81 | rnn = recurrent.SimpleRNN 82 | nb_sample = 2 83 | dim = 2 84 | timesteps = 2 85 | output_dim = 2 86 | for mode in ['sum', 'concat']: 87 | x = np.random.random((nb_sample, timesteps, dim)) 88 | target_dim = 2 * output_dim if mode == 'concat' else output_dim 89 | y = np.random.random((nb_sample, target_dim)) 90 | 91 | # test with Sequential model 92 | model = Sequential() 93 | model.add(wrappers.Bidirectional(rnn(output_dim), 94 | merge_mode=mode, input_shape=(timesteps, dim))) 95 | model.compile(loss='mse', optimizer='sgd') 96 | model.fit(x, y, nb_epoch=1, batch_size=1) 97 | 98 | # test config 99 | model.get_config() 100 | model = model_from_json(model.to_json()) 101 | model.summary() 102 | 103 | # test stacked bidirectional layers 104 | model = Sequential() 105 | model.add(wrappers.Bidirectional(rnn(output_dim, return_sequences=True), 106 | merge_mode=mode, input_shape=(timesteps, dim))) 107 | model.add(wrappers.Bidirectional(rnn(output_dim), merge_mode=mode)) 108 | model.compile(loss='mse', optimizer='sgd') 109 | model.fit(x, y, nb_epoch=1, batch_size=1) 110 | 111 | # test with functional API 112 | input = Input((timesteps, dim)) 113 | output = wrappers.Bidirectional(rnn(output_dim), merge_mode=mode)(input) 114 | model = Model(input, output) 115 | model.compile(loss='mse', optimizer='sgd') 116 | model.fit(x, y, nb_epoch=1, batch_size=1) 117 | 118 | 119 | if __name__ == '__main__': 120 | pytest.main([__file__]) 121 | -------------------------------------------------------------------------------- /tests/keras/preprocessing/test_image.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from keras.preprocessing.image import * 3 | from PIL import Image 4 | import numpy as np 5 | import os 6 | import shutil 7 | import tempfile 8 | 9 | 10 | class TestImage: 11 | 12 | def setup_class(cls): 13 | img_w = img_h = 20 14 | rgb_images = [] 15 | gray_images = [] 16 | for n in range(8): 17 | bias = np.random.rand(img_w, img_h, 1) * 64 18 | variance = np.random.rand(img_w, img_h, 1) * (255-64) 19 | imarray = np.random.rand(img_w, img_h, 3) * variance + bias 20 | im = Image.fromarray(imarray.astype('uint8')).convert('RGB') 21 | rgb_images.append(im) 22 | 23 | imarray = np.random.rand(img_w, img_h, 1) * variance + bias 24 | im = Image.fromarray(imarray.astype('uint8').squeeze()).convert('L') 25 | gray_images.append(im) 26 | 27 | cls.all_test_images = [rgb_images, gray_images] 28 | 29 | def teardown_class(cls): 30 | del cls.all_test_images 31 | 32 | def test_image_data_generator(self): 33 | for test_images in self.all_test_images: 34 | img_list = [] 35 | for im in test_images: 36 | img_list.append(img_to_array(im)[None, ...]) 37 | 38 | images = np.vstack(img_list) 39 | generator = ImageDataGenerator( 40 | featurewise_center=True, 41 | samplewise_center=True, 42 | featurewise_std_normalization=True, 43 | samplewise_std_normalization=True, 44 | zca_whitening=True, 45 | rotation_range=90., 46 | width_shift_range=0.1, 47 | height_shift_range=0.1, 48 | shear_range=0.5, 49 | zoom_range=0.2, 50 | channel_shift_range=0., 51 | fill_mode='nearest', 52 | cval=0.5, 53 | horizontal_flip=True, 54 | vertical_flip=True) 55 | generator.fit(images, augment=True) 56 | 57 | tmp_folder = tempfile.mkdtemp(prefix='test_images') 58 | for x, y in generator.flow(images, np.arange(images.shape[0]), 59 | shuffle=True, save_to_dir=tmp_folder): 60 | assert x.shape[1:] == images.shape[1:] 61 | break 62 | shutil.rmtree(tmp_folder) 63 | 64 | def test_img_flip(self): 65 | x = np.array(range(4)).reshape([1, 1, 2, 2]) 66 | assert (flip_axis(x, 0) == x).all() 67 | assert (flip_axis(x, 1) == x).all() 68 | assert (flip_axis(x, 2) == [[[[2, 3], [0, 1]]]]).all() 69 | assert (flip_axis(x, 3) == [[[[1, 0], [3, 2]]]]).all() 70 | 71 | dim_ordering_and_col_index = (('tf', 2), ('th', 3)) 72 | for dim_ordering, col_index in dim_ordering_and_col_index: 73 | image_generator_th = ImageDataGenerator( 74 | featurewise_center=False, 75 | samplewise_center=False, 76 | featurewise_std_normalization=False, 77 | samplewise_std_normalization=False, 78 | zca_whitening=False, 79 | rotation_range=0, 80 | width_shift_range=0, 81 | height_shift_range=0, 82 | shear_range=0, 83 | zoom_range=0, 84 | channel_shift_range=0, 85 | horizontal_flip=True, 86 | vertical_flip=False, 87 | dim_ordering=dim_ordering).flow(x, [1]) 88 | for i in range(10): 89 | potentially_flipped_x, _ = next(image_generator_th) 90 | assert ((potentially_flipped_x == x).all() or 91 | (potentially_flipped_x == flip_axis(x, col_index)).all()) 92 | 93 | 94 | if __name__ == '__main__': 95 | pytest.main([__file__]) 96 | -------------------------------------------------------------------------------- /tests/keras/preprocessing/test_sequence.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import assert_allclose 3 | 4 | import pytest 5 | 6 | from keras.preprocessing.sequence import pad_sequences 7 | from keras.preprocessing.sequence import make_sampling_table 8 | from keras.preprocessing.sequence import skipgrams 9 | 10 | 11 | def test_pad_sequences(): 12 | a = [[1], [1, 2], [1, 2, 3]] 13 | 14 | # test padding 15 | b = pad_sequences(a, maxlen=3, padding='pre') 16 | assert_allclose(b, [[0, 0, 1], [0, 1, 2], [1, 2, 3]]) 17 | b = pad_sequences(a, maxlen=3, padding='post') 18 | assert_allclose(b, [[1, 0, 0], [1, 2, 0], [1, 2, 3]]) 19 | 20 | # test truncating 21 | b = pad_sequences(a, maxlen=2, truncating='pre') 22 | assert_allclose(b, [[0, 1], [1, 2], [2, 3]]) 23 | b = pad_sequences(a, maxlen=2, truncating='post') 24 | assert_allclose(b, [[0, 1], [1, 2], [1, 2]]) 25 | 26 | # test value 27 | b = pad_sequences(a, maxlen=3, value=1) 28 | assert_allclose(b, [[1, 1, 1], [1, 1, 2], [1, 2, 3]]) 29 | 30 | 31 | def test_pad_sequences_vector(): 32 | a = [[[1, 1]], 33 | [[2, 1], [2, 2]], 34 | [[3, 1], [3, 2], [3, 3]]] 35 | 36 | # test padding 37 | b = pad_sequences(a, maxlen=3, padding='pre') 38 | assert_allclose(b, [[[0, 0], [0, 0], [1, 1]], 39 | [[0, 0], [2, 1], [2, 2]], 40 | [[3, 1], [3, 2], [3, 3]]]) 41 | b = pad_sequences(a, maxlen=3, padding='post') 42 | assert_allclose(b, [[[1, 1], [0, 0], [0, 0]], 43 | [[2, 1], [2, 2], [0, 0]], 44 | [[3, 1], [3, 2], [3, 3]]]) 45 | 46 | # test truncating 47 | b = pad_sequences(a, maxlen=2, truncating='pre') 48 | assert_allclose(b, [[[0, 0], [1, 1]], 49 | [[2, 1], [2, 2]], 50 | [[3, 2], [3, 3]]]) 51 | 52 | b = pad_sequences(a, maxlen=2, truncating='post') 53 | assert_allclose(b, [[[0, 0], [1, 1]], 54 | [[2, 1], [2, 2]], 55 | [[3, 1], [3, 2]]]) 56 | 57 | # test value 58 | b = pad_sequences(a, maxlen=3, value=1) 59 | assert_allclose(b, [[[1, 1], [1, 1], [1, 1]], 60 | [[1, 1], [2, 1], [2, 2]], 61 | [[3, 1], [3, 2], [3, 3]]]) 62 | 63 | 64 | def test_make_sampling_table(): 65 | a = make_sampling_table(3) 66 | assert_allclose(a, np.asarray([0.00315225, 0.00315225, 0.00547597]), 67 | rtol=.1) 68 | 69 | 70 | def test_skipgrams(): 71 | # test with no window size and binary labels 72 | couples, labels = skipgrams(np.arange(3), vocabulary_size=3) 73 | for couple in couples: 74 | assert couple[0] in [0, 1, 2] and couple[1] in [0, 1, 2] 75 | 76 | # test window size and categorical labels 77 | couples, labels = skipgrams(np.arange(5), vocabulary_size=5, window_size=1, 78 | categorical=True) 79 | for couple in couples: 80 | assert couple[0] - couple[1] <= 3 81 | for l in labels: 82 | assert len(l) == 2 83 | 84 | 85 | if __name__ == '__main__': 86 | pytest.main([__file__]) 87 | -------------------------------------------------------------------------------- /tests/keras/preprocessing/test_text.py: -------------------------------------------------------------------------------- 1 | from keras.preprocessing.text import Tokenizer, one_hot 2 | import pytest 3 | import numpy as np 4 | 5 | 6 | def test_one_hot(): 7 | text = 'The cat sat on the mat.' 8 | encoded = one_hot(text, 5) 9 | assert len(encoded) == 6 10 | assert np.max(encoded) <= 4 11 | assert np.min(encoded) >= 0 12 | 13 | 14 | def test_tokenizer(): 15 | texts = ['The cat sat on the mat.', 16 | 'The dog sat on the log.', 17 | 'Dogs and cats living together.'] 18 | tokenizer = Tokenizer(nb_words=10) 19 | tokenizer.fit_on_texts(texts) 20 | 21 | sequences = [] 22 | for seq in tokenizer.texts_to_sequences_generator(texts): 23 | sequences.append(seq) 24 | assert np.max(np.max(sequences)) < 10 25 | assert np.min(np.min(sequences)) == 1 26 | 27 | tokenizer.fit_on_sequences(sequences) 28 | 29 | for mode in ['binary', 'count', 'tfidf', 'freq']: 30 | matrix = tokenizer.texts_to_matrix(texts, mode) 31 | 32 | 33 | if __name__ == '__main__': 34 | pytest.main([__file__]) 35 | -------------------------------------------------------------------------------- /tests/keras/test_constraints.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | 5 | from keras import backend as K 6 | from keras import constraints 7 | 8 | 9 | test_values = [0.1, 0.5, 3, 8, 1e-7] 10 | np.random.seed(3537) 11 | example_array = np.random.random((100, 100)) * 100. - 50. 12 | example_array[0, 0] = 0. # 0 could possibly cause trouble 13 | 14 | 15 | def test_maxnorm(): 16 | for m in test_values: 17 | norm_instance = constraints.maxnorm(m) 18 | normed = norm_instance(K.variable(example_array)) 19 | assert(np.all(K.eval(normed) < m)) 20 | 21 | # a more explicit example 22 | norm_instance = constraints.maxnorm(2.0) 23 | x = np.array([[0, 0, 0], [1.0, 0, 0], [3, 0, 0], [3, 3, 3]]).T 24 | x_normed_target = np.array([[0, 0, 0], [1.0, 0, 0], 25 | [2.0, 0, 0], 26 | [2. / np.sqrt(3), 2. / np.sqrt(3), 2. / np.sqrt(3)]]).T 27 | x_normed_actual = K.eval(norm_instance(K.variable(x))) 28 | assert_allclose(x_normed_actual, x_normed_target, rtol=1e-05) 29 | 30 | 31 | def test_nonneg(): 32 | nonneg_instance = constraints.nonneg() 33 | normed = nonneg_instance(K.variable(example_array)) 34 | assert(np.all(np.min(K.eval(normed), axis=1) == 0.)) 35 | 36 | 37 | def test_unitnorm(): 38 | unitnorm_instance = constraints.unitnorm() 39 | normalized = unitnorm_instance(K.variable(example_array)) 40 | norm_of_normalized = np.sqrt(np.sum(K.eval(normalized)**2, axis=0)) 41 | # in the unit norm constraint, it should be equal to 1. 42 | difference = norm_of_normalized - 1. 43 | largest_difference = np.max(np.abs(difference)) 44 | assert(np.abs(largest_difference) < 10e-5) 45 | 46 | 47 | if __name__ == '__main__': 48 | pytest.main([__file__]) 49 | -------------------------------------------------------------------------------- /tests/keras/test_initializations.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from keras import initializations 5 | from keras import backend as K 6 | 7 | # 2D tensor test fixture 8 | FC_SHAPE = (100, 100) 9 | 10 | # 4D convolution in th order. This shape has the same effective shape as FC_SHAPE 11 | CONV_SHAPE = (25, 25, 2, 2) 12 | 13 | # The equivalent shape of both test fixtures 14 | SHAPE = (100, 100) 15 | 16 | def _runner(init, shape, target_mean=None, target_std=None, 17 | target_max=None, target_min=None): 18 | variable = init(shape) 19 | output = K.get_value(variable) 20 | lim = 1e-2 21 | if target_std is not None: 22 | assert abs(output.std() - target_std) < lim 23 | if target_mean is not None: 24 | assert abs(output.mean() - target_mean) < lim 25 | if target_max is not None: 26 | assert abs(output.max() - target_max) < lim 27 | if target_min is not None: 28 | assert abs(output.min() - target_min) < lim 29 | 30 | 31 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 32 | def test_uniform(tensor_shape): 33 | _runner(initializations.uniform, tensor_shape, target_mean=0., 34 | target_max=0.05, target_min=-0.05) 35 | 36 | 37 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 38 | def test_normal(tensor_shape): 39 | _runner(initializations.normal, tensor_shape, target_mean=0., target_std=0.05) 40 | 41 | 42 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 43 | def test_lecun_uniform(tensor_shape): 44 | scale = np.sqrt(3. / SHAPE[0]) 45 | _runner(initializations.lecun_uniform, tensor_shape, 46 | target_mean=0., target_max=scale, target_min=-scale) 47 | 48 | 49 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 50 | def test_glorot_uniform(tensor_shape): 51 | scale = np.sqrt(6. / (SHAPE[0] + SHAPE[1])) 52 | _runner(initializations.glorot_uniform, tensor_shape, target_mean=0., 53 | target_max=scale, target_min=-scale) 54 | 55 | 56 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 57 | def test_glorot_normal(tensor_shape): 58 | scale = np.sqrt(2. / (SHAPE[0] + SHAPE[1])) 59 | _runner(initializations.glorot_normal, tensor_shape, 60 | target_mean=0., target_std=scale) 61 | 62 | 63 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 64 | def test_he_uniform(tensor_shape): 65 | scale = np.sqrt(6. / SHAPE[0]) 66 | _runner(initializations.he_uniform, tensor_shape, target_mean=0., 67 | target_max=scale, target_min=-scale) 68 | 69 | 70 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 71 | def test_he_normal(tensor_shape): 72 | scale = np.sqrt(2. / SHAPE[0]) 73 | _runner(initializations.he_normal, tensor_shape, 74 | target_mean=0., target_std=scale) 75 | 76 | 77 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 78 | def test_orthogonal(tensor_shape): 79 | _runner(initializations.orthogonal, tensor_shape, 80 | target_mean=0.) 81 | 82 | 83 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 84 | def test_identity(tensor_shape): 85 | if len(tensor_shape) > 2: 86 | with pytest.raises(Exception): 87 | _runner(initializations.identity, tensor_shape, 88 | target_mean=1./SHAPE[0], target_max=1.) 89 | else: 90 | _runner(initializations.identity, tensor_shape, 91 | target_mean=1./SHAPE[0], target_max=1.) 92 | 93 | 94 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 95 | def test_zero(tensor_shape): 96 | _runner(initializations.zero, tensor_shape, 97 | target_mean=0., target_max=0.) 98 | 99 | 100 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 101 | def test_one(tensor_shape): 102 | _runner(initializations.one, tensor_shape, 103 | target_mean=1., target_max=1.) 104 | 105 | 106 | if __name__ == '__main__': 107 | pytest.main([__file__]) 108 | -------------------------------------------------------------------------------- /tests/keras/test_metrics.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from keras import metrics 5 | from keras import backend as K 6 | 7 | all_metrics = [ 8 | metrics.binary_accuracy, 9 | metrics.categorical_accuracy, 10 | metrics.mean_squared_error, 11 | metrics.mean_absolute_error, 12 | metrics.mean_absolute_percentage_error, 13 | metrics.mean_squared_logarithmic_error, 14 | metrics.squared_hinge, 15 | metrics.hinge, 16 | metrics.categorical_crossentropy, 17 | metrics.binary_crossentropy, 18 | metrics.poisson, 19 | metrics.cosine_proximity, 20 | metrics.matthews_correlation, 21 | ] 22 | 23 | all_sparse_metrics = [ 24 | metrics.sparse_categorical_accuracy, 25 | metrics.sparse_categorical_crossentropy, 26 | ] 27 | 28 | 29 | def test_metrics(): 30 | y_a = K.variable(np.random.random((6, 7))) 31 | y_b = K.variable(np.random.random((6, 7))) 32 | for metric in all_metrics: 33 | output = metric(y_a, y_b) 34 | assert K.eval(output).shape == () 35 | 36 | 37 | def test_matthews_correlation(): 38 | y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0])) 39 | y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0])) 40 | 41 | # Calculated using sklearn.metrics.matthews_corrcoef 42 | expected = -0.14907119849998601 43 | 44 | actual = K.eval(metrics.matthews_correlation(y_true, y_pred)) 45 | epsilon = 1e-05 46 | assert expected - epsilon <= actual <= expected + epsilon 47 | 48 | 49 | def test_fbeta_score(): 50 | y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0])) 51 | y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0])) 52 | 53 | # Calculated using sklearn.metrics.f1_score 54 | expected = 0.33333333333333331 55 | 56 | actual = K.eval(metrics.fbeta_score(y_true, y_pred)) 57 | epsilon = 1e-05 58 | assert expected - epsilon <= actual <= expected + epsilon 59 | 60 | 61 | def test_sparse_metrics(): 62 | for metric in all_sparse_metrics: 63 | y_a = K.variable(np.random.randint(0, 7, (6,)), dtype=K.floatx()) 64 | y_b = K.variable(np.random.random((6, 7)), dtype=K.floatx()) 65 | assert K.eval(metric(y_a, y_b)).shape == () 66 | 67 | 68 | def test_top_k_categorical_accuracy(): 69 | y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) 70 | y_true = K.variable(np.array([[0, 1, 0], [1, 0, 0]])) 71 | success_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, 72 | k=3)) 73 | assert success_result == 1 74 | partial_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, 75 | k=2)) 76 | assert partial_result == 0.5 77 | failure_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, 78 | k=1)) 79 | assert failure_result == 0 80 | 81 | 82 | if __name__ == "__main__": 83 | pytest.main([__file__]) 84 | -------------------------------------------------------------------------------- /tests/keras/test_objectives.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from keras import objectives 5 | from keras import backend as K 6 | 7 | 8 | allobj = [objectives.mean_squared_error, 9 | objectives.mean_absolute_error, 10 | objectives.mean_absolute_percentage_error, 11 | objectives.mean_squared_logarithmic_error, 12 | objectives.squared_hinge, 13 | objectives.hinge, objectives.categorical_crossentropy, 14 | objectives.binary_crossentropy, 15 | objectives.kullback_leibler_divergence, 16 | objectives.poisson, 17 | objectives.cosine_proximity] 18 | 19 | 20 | def test_objective_shapes_3d(): 21 | y_a = K.variable(np.random.random((5, 6, 7))) 22 | y_b = K.variable(np.random.random((5, 6, 7))) 23 | for obj in allobj: 24 | objective_output = obj(y_a, y_b) 25 | assert K.eval(objective_output).shape == (5, 6) 26 | 27 | 28 | def test_objective_shapes_2d(): 29 | y_a = K.variable(np.random.random((6, 7))) 30 | y_b = K.variable(np.random.random((6, 7))) 31 | for obj in allobj: 32 | objective_output = obj(y_a, y_b) 33 | assert K.eval(objective_output).shape == (6,) 34 | 35 | 36 | def test_cce_one_hot(): 37 | y_a = K.variable(np.random.randint(0, 7, (5, 6))) 38 | y_b = K.variable(np.random.random((5, 6, 7))) 39 | objective_output = objectives.sparse_categorical_crossentropy(y_a, y_b) 40 | assert K.eval(objective_output).shape == (5, 6) 41 | 42 | y_a = K.variable(np.random.randint(0, 7, (6,))) 43 | y_b = K.variable(np.random.random((6, 7))) 44 | assert K.eval(objectives.sparse_categorical_crossentropy(y_a, y_b)).shape == (6,) 45 | 46 | 47 | if __name__ == "__main__": 48 | pytest.main([__file__]) 49 | -------------------------------------------------------------------------------- /tests/keras/test_optimizers.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pytest 3 | 4 | from keras.utils.test_utils import get_test_data 5 | from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax, Nadam 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Activation 8 | from keras.utils.np_utils import to_categorical 9 | 10 | 11 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, 12 | nb_test=200, 13 | input_shape=(10,), 14 | classification=True, 15 | nb_class=2) 16 | y_train = to_categorical(y_train) 17 | y_test = to_categorical(y_test) 18 | 19 | 20 | def get_model(input_dim, nb_hidden, output_dim): 21 | model = Sequential() 22 | model.add(Dense(nb_hidden, input_shape=(input_dim,))) 23 | model.add(Activation('relu')) 24 | model.add(Dense(output_dim)) 25 | model.add(Activation('softmax')) 26 | return model 27 | 28 | 29 | def _test_optimizer(optimizer, target=0.89): 30 | model = get_model(X_train.shape[1], 10, y_train.shape[1]) 31 | model.compile(loss='categorical_crossentropy', 32 | optimizer=optimizer, 33 | metrics=['accuracy']) 34 | history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, 35 | validation_data=(X_test, y_test), verbose=2) 36 | config = optimizer.get_config() 37 | assert type(config) == dict 38 | assert history.history['val_acc'][-1] >= target 39 | 40 | 41 | def test_sgd(): 42 | sgd = SGD(lr=0.01, momentum=0.9, nesterov=True) 43 | _test_optimizer(sgd) 44 | 45 | 46 | def test_rmsprop(): 47 | _test_optimizer(RMSprop()) 48 | _test_optimizer(RMSprop(decay=1e-3)) 49 | 50 | 51 | def test_adagrad(): 52 | _test_optimizer(Adagrad()) 53 | _test_optimizer(Adagrad(decay=1e-3)) 54 | 55 | 56 | def test_adadelta(): 57 | _test_optimizer(Adadelta(), target=0.83) 58 | _test_optimizer(Adadelta(decay=1e-3), target=0.83) 59 | 60 | 61 | def test_adam(): 62 | _test_optimizer(Adam()) 63 | _test_optimizer(Adam(decay=1e-3)) 64 | 65 | 66 | def test_adamax(): 67 | _test_optimizer(Adamax()) 68 | _test_optimizer(Adamax(decay=1e-3)) 69 | 70 | 71 | def test_nadam(): 72 | _test_optimizer(Nadam()) 73 | 74 | 75 | if __name__ == '__main__': 76 | pytest.main([__file__]) 77 | -------------------------------------------------------------------------------- /tests/keras/test_regularizers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | np.random.seed(1337) 4 | 5 | from keras.models import Sequential 6 | from keras.layers import Merge 7 | from keras.layers import Dense 8 | from keras.layers import Activation 9 | from keras.layers import Flatten 10 | from keras.layers import ActivityRegularization 11 | from keras.layers import Embedding 12 | from keras.datasets import mnist 13 | from keras.utils import np_utils 14 | from keras import regularizers 15 | 16 | nb_classes = 10 17 | batch_size = 128 18 | nb_epoch = 5 19 | weighted_class = 9 20 | standard_weight = 1 21 | high_weight = 5 22 | max_train_samples = 5000 23 | max_test_samples = 1000 24 | 25 | 26 | def get_data(): 27 | # the data, shuffled and split between tran and test sets 28 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 29 | X_train = X_train.reshape(60000, 784)[:max_train_samples] 30 | X_test = X_test.reshape(10000, 784)[:max_test_samples] 31 | X_train = X_train.astype("float32") / 255 32 | X_test = X_test.astype("float32") / 255 33 | 34 | # convert class vectors to binary class matrices 35 | y_train = y_train[:max_train_samples] 36 | y_test = y_test[:max_test_samples] 37 | Y_train = np_utils.to_categorical(y_train, nb_classes) 38 | Y_test = np_utils.to_categorical(y_test, nb_classes) 39 | test_ids = np.where(y_test == np.array(weighted_class))[0] 40 | 41 | return (X_train, Y_train), (X_test, Y_test), test_ids 42 | 43 | 44 | def create_model(weight_reg=None, activity_reg=None): 45 | model = Sequential() 46 | model.add(Dense(50, input_shape=(784,))) 47 | model.add(Activation('relu')) 48 | model.add(Dense(10, W_regularizer=weight_reg, 49 | activity_regularizer=activity_reg)) 50 | model.add(Activation('softmax')) 51 | return model 52 | 53 | 54 | def test_Eigenvalue_reg(): 55 | (X_train, Y_train), (X_test, Y_test), test_ids = get_data() 56 | reg = regularizers.EigenvalueRegularizer(0.01) 57 | model = create_model(weight_reg=reg) 58 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 59 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0) 60 | model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0) 61 | 62 | 63 | def test_W_reg(): 64 | (X_train, Y_train), (X_test, Y_test), test_ids = get_data() 65 | for reg in [regularizers.l1(), 66 | regularizers.l2(), 67 | regularizers.l1l2()]: 68 | model = create_model(weight_reg=reg) 69 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 70 | model.fit(X_train, Y_train, batch_size=batch_size, 71 | nb_epoch=nb_epoch, verbose=0) 72 | model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0) 73 | 74 | 75 | def test_A_reg(): 76 | (X_train, Y_train), (X_test, Y_test), test_ids = get_data() 77 | for reg in [regularizers.activity_l1(), regularizers.activity_l2()]: 78 | model = create_model(activity_reg=reg) 79 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 80 | model.fit(X_train, Y_train, batch_size=batch_size, 81 | nb_epoch=nb_epoch, verbose=0) 82 | model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0) 83 | 84 | 85 | if __name__ == '__main__': 86 | pytest.main([__file__]) 87 | -------------------------------------------------------------------------------- /tests/keras/test_sparse.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import pytest 4 | 5 | from keras.models import Model 6 | from keras.layers import Dense, Input 7 | from keras.utils.test_utils import keras_test 8 | from keras import backend as K 9 | from keras.backend import theano_backend as KTH 10 | from keras.backend import tensorflow_backend as KTF 11 | 12 | import scipy.sparse as sparse 13 | import numpy as np 14 | np.random.seed(1337) 15 | 16 | 17 | input_dim = 16 18 | nb_hidden = 8 19 | nb_class = 4 20 | batch_size = 32 21 | nb_epoch = 1 22 | 23 | 24 | def do_sparse(): 25 | return K == KTF or KTH.th_sparse_module 26 | 27 | 28 | @keras_test 29 | def test_sparse_mlp(): 30 | if not do_sparse(): 31 | return 32 | 33 | input = Input(batch_shape=(None, input_dim), sparse=True) 34 | hidden = Dense(nb_hidden, activation='relu')(input) 35 | hidden = Dense(nb_hidden, activation='relu')(hidden) 36 | predictions = Dense(nb_class, activation='sigmoid')(hidden) 37 | model = Model(input=[input], output=predictions) 38 | model.compile(loss='mse', optimizer='sgd') 39 | x = sparse.rand(batch_size, input_dim, density=0.1, format='csr') 40 | y = np.random.random((batch_size, nb_class)) 41 | model.fit(x, y, nb_epoch=1) 42 | -------------------------------------------------------------------------------- /tests/test_loss_masking.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from keras.models import Sequential 5 | from keras.engine.training import weighted_objective 6 | from keras.layers.core import TimeDistributedDense, Masking 7 | from keras.utils.test_utils import keras_test 8 | from keras import objectives 9 | from keras import backend as K 10 | 11 | 12 | @keras_test 13 | def test_masking(): 14 | np.random.seed(1337) 15 | X = np.array([[[1], [1]], 16 | [[0], [0]]]) 17 | model = Sequential() 18 | model.add(Masking(mask_value=0, input_shape=(2, 1))) 19 | model.add(TimeDistributedDense(1, init='one')) 20 | model.compile(loss='mse', optimizer='sgd') 21 | y = np.array([[[1], [1]], 22 | [[1], [1]]]) 23 | loss = model.train_on_batch(X, y) 24 | assert loss == 0 25 | 26 | 27 | @keras_test 28 | def test_loss_masking(): 29 | weighted_loss = weighted_objective(objectives.get('mae')) 30 | shape = (3, 4, 2) 31 | X = np.arange(24).reshape(shape) 32 | Y = 2 * X 33 | 34 | # Normally the trailing 1 is added by standardize_weights 35 | weights = np.ones((3,)) 36 | mask = np.ones((3, 4)) 37 | mask[1, 0] = 0 38 | 39 | out = K.eval(weighted_loss(K.variable(X), 40 | K.variable(Y), 41 | K.variable(weights), 42 | K.variable(mask))) 43 | 44 | 45 | if __name__ == '__main__': 46 | pytest.main([__file__]) 47 | --------------------------------------------------------------------------------