├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE.md ├── LICENSE ├── README.md ├── docker ├── Dockerfile ├── Makefile ├── README.md └── theanorc ├── docs ├── README.md ├── autogen.py ├── mkdocs.yml └── templates │ ├── activations.md │ ├── applications.md │ ├── backend.md │ ├── callbacks.md │ ├── constraints.md │ ├── datasets.md │ ├── getting-started │ ├── faq.md │ ├── functional-api-guide.md │ └── sequential-model-guide.md │ ├── index.md │ ├── initializations.md │ ├── layers │ ├── about-keras-layers.md │ └── writing-your-own-keras-layers.md │ ├── metrics.md │ ├── models │ ├── about-keras-models.md │ ├── model.md │ └── sequential.md │ ├── objectives.md │ ├── optimizers.md │ ├── preprocessing │ ├── image.md │ ├── sequence.md │ └── text.md │ ├── regularizers.md │ ├── scikit-learn-api.md │ └── visualization.md ├── examples ├── README.md ├── addition_rnn.py ├── antirectifier.py ├── babi_memnn.py ├── babi_rnn.py ├── cifar10_cnn.py ├── cifar10_resnet50.py ├── conv_filter_visualization.py ├── conv_lstm.py ├── deep_dream.py ├── image_ocr.py ├── imdb_bidirectional_lstm.py ├── imdb_cnn.py ├── imdb_cnn_lstm.py ├── imdb_fasttext.py ├── imdb_lstm.py ├── lstm_benchmark.py ├── lstm_text_generation.py ├── mnist_acgan.py ├── mnist_cnn.py ├── mnist_hierarchical_rnn.py ├── mnist_irnn.py ├── mnist_mlp.py ├── mnist_net2net.py ├── mnist_siamese_graph.py ├── mnist_sklearn_wrapper.py ├── mnist_swwae.py ├── mnist_transfer_cnn.py ├── neural_doodle.py ├── neural_style_transfer.py ├── pretrained_word_embeddings.py ├── reuters_mlp.py ├── stateful_lstm.py ├── variational_autoencoder.py └── variational_autoencoder_deconv.py ├── keras ├── __init__.py ├── activations.py ├── applications │ ├── __init__.py │ ├── audio_conv_utils.py │ ├── imagenet_utils.py │ ├── inception_v3.py │ ├── music_tagger_crnn.py │ ├── resnet50.py │ ├── vgg16.py │ ├── vgg19.py │ └── xception.py ├── backend │ ├── __init__.py │ ├── common.py │ ├── mxnet_backend.py │ ├── tensorflow_backend.py │ └── theano_backend.py ├── callbacks.py ├── constraints.py ├── datasets │ ├── __init__.py │ ├── cifar.py │ ├── cifar10.py │ ├── cifar100.py │ ├── imdb.py │ ├── mnist.py │ └── reuters.py ├── engine │ ├── __init__.py │ ├── topology.py │ └── training.py ├── initializations.py ├── layers │ ├── __init__.py │ ├── advanced_activations.py │ ├── convolutional.py │ ├── convolutional_recurrent.py │ ├── core.py │ ├── embeddings.py │ ├── local.py │ ├── noise.py │ ├── normalization.py │ ├── pooling.py │ ├── recurrent.py │ └── wrappers.py ├── metrics.py ├── models.py ├── objectives.py ├── optimizers.py ├── preprocessing │ ├── __init__.py │ ├── image.py │ ├── sequence.py │ └── text.py ├── regularizers.py ├── utils │ ├── __init__.py │ ├── data_utils.py │ ├── generic_utils.py │ ├── io_utils.py │ ├── layer_utils.py │ ├── np_utils.py │ ├── test_utils.py │ └── visualize_util.py └── wrappers │ ├── __init__.py │ └── scikit_learn.py ├── pytest.ini ├── setup.cfg ├── setup.py └── tests ├── integration_tests ├── test_image_data_tasks.py ├── test_temporal_data_tasks.py └── test_vector_data_tasks.py ├── keras ├── backend │ └── test_backends.py ├── datasets │ └── test_datasets.py ├── engine │ ├── test_topology.py │ └── test_training.py ├── layers │ ├── test_advanced_activations.py │ ├── test_convolutional.py │ ├── test_convolutional_recurrent.py │ ├── test_core.py │ ├── test_embeddings.py │ ├── test_local.py │ ├── test_noise.py │ ├── test_normalization.py │ ├── test_recurrent.py │ └── test_wrappers.py ├── preprocessing │ ├── test_image.py │ ├── test_sequence.py │ └── test_text.py ├── test_activations.py ├── test_callbacks.py ├── test_constraints.py ├── test_initializations.py ├── test_metrics.py ├── test_multiprocessing.py ├── test_objectives.py ├── test_optimizers.py ├── test_regularizers.py ├── test_sequential_model.py ├── test_sparse.py ├── utils │ └── test_generic_utils.py └── wrappers │ └── test_scikit_learn.py ├── test_dynamic_trainability.py ├── test_loss_masking.py ├── test_loss_weighting.py └── test_model_saving.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.pyc 3 | *.swp 4 | temp/* 5 | dist/* 6 | build/* 7 | keras/datasets/data/* 8 | keras/datasets/temp/* 9 | docs/site/* 10 | docs/theme/* 11 | tags 12 | Keras.egg-info 13 | 14 | # test-related 15 | .coverage 16 | .cache 17 | 18 | # developer environments 19 | .idea 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | dist: trusty 3 | language: python 4 | matrix: 5 | include: 6 | - python: 2.7 7 | env: KERAS_BACKEND=theano TEST_MODE=PEP8 8 | - python: 2.7 9 | env: KERAS_BACKEND=theano TEST_MODE=INTEGRATION_TESTS 10 | - python: 2.7 11 | env: KERAS_BACKEND=tensorflow 12 | - python: 3.5 13 | env: KERAS_BACKEND=tensorflow 14 | - python: 2.7 15 | env: KERAS_BACKEND=theano 16 | - python: 3.5 17 | env: KERAS_BACKEND=theano 18 | - python: 2.7 19 | env: KERAS_BACKEND=mxnet 20 | - python: 3.5 21 | env: KERAS_BACKEND=mxnet 22 | install: 23 | # code below is taken from http://conda.pydata.org/docs/travis.html 24 | # We do this conditionally because it saves us some downloading if the 25 | # version is the same. 26 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 27 | wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; 28 | else 29 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 30 | fi 31 | - bash miniconda.sh -b -p $HOME/miniconda 32 | - export PATH="$HOME/miniconda/bin:$PATH" 33 | - hash -r 34 | - conda config --set always_yes yes --set changeps1 no 35 | - conda update -q conda 36 | # Useful for debugging any issues with conda 37 | - conda info -a 38 | 39 | - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib pandas pytest h5py 40 | - source activate test-environment 41 | - pip install git+git://github.com/Theano/Theano.git 42 | 43 | # install PIL for preprocessing tests 44 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 45 | conda install pil; 46 | elif [[ "$TRAVIS_PYTHON_VERSION" == "3.5" ]]; then 47 | conda install Pillow; 48 | fi 49 | 50 | - pip install -e .[tests] 51 | 52 | # install TensorFlow 53 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 54 | pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.12.1-cp27-none-linux_x86_64.whl; 55 | elif [[ "$TRAVIS_PYTHON_VERSION" == "3.5" ]]; then 56 | pip install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.12.1-cp35-cp35m-linux_x86_64.whl; 57 | fi 58 | 59 | # install MXNet 60 | - pip install mxnet 61 | # command to run tests 62 | script: 63 | # run keras backend init to initialize backend config 64 | - python -c "import keras.backend" 65 | # create dataset directory to avoid concurrent directory creation at runtime 66 | - mkdir ~/.keras/datasets 67 | # set up keras backend 68 | - sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json; 69 | - echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)" 70 | - if [[ "$TEST_MODE" == "INTEGRATION_TESTS" ]]; then 71 | PYTHONPATH=$PWD:$PYTHONPATH py.test tests/integration_tests; 72 | elif [[ "$TEST_MODE" == "PEP8" ]]; then 73 | PYTHONPATH=$PWD:$PYTHONPATH py.test --pep8 -m pep8 -n0; 74 | else 75 | PYTHONPATH=$PWD:$PYTHONPATH py.test tests/ --ignore=tests/integration_tests; 76 | fi 77 | after_success: 78 | - coveralls 79 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # On Github Issues and Pull Requests 2 | 3 | Found a bug? Have a new feature to suggest? Want to contribute changes to the codebase? Make sure to read this first. 4 | 5 | ## Bug reporting 6 | 7 | Your code doesn't work, and you have determined that the issue lies with Keras? Follow these steps to report a bug. 8 | 9 | 1. Your bug may already be fixed. Make sure to update to the current Keras master branch, as well as the latest Theano/TensorFlow master branch. 10 | To easily update Theano: `pip install git+git://github.com/Theano/Theano.git --upgrade` 11 | 12 | 2. Search for similar issues. Make sure to delete `is:open` on the issue search to find solved tickets as well. It's possible somebody has encountered this bug already. Also remember to check out Keras' [FAQ](http://keras.io/faq/). Still having a problem? Open an issue on Github to let us know. 13 | 14 | 3. Make sure you provide us with useful information about your configuration: what OS are you using? What Keras backend are you using? Are you running on GPU? If so, what is your version of Cuda, of cuDNN? What is your GPU? 15 | 16 | 4. Provide us with a script to reproduce the issue. This script should be runnable as-is and should not require external data download (use randomly generated data if you need to run a model on some test data). We recommend that you use Github Gists to post your code. Any issue that cannot be reproduced is likely to be closed. 17 | 18 | 5. If possible, take a stab at fixing the bug yourself --if you can! 19 | 20 | The more information you provide, the easier it is for us to validate that there is a bug and the faster we'll be able to take action. If you want your issue to be resolved quickly, following the steps above is crucial. 21 | 22 | 23 | ## Requesting a Feature 24 | 25 | You can also use Github issues to request features you would like to see in Keras, or changes in the Keras API. 26 | 27 | 1. Provide a clear and detailed explanation of the feature you want and why it's important to add. Keep in mind that we want features that will be useful to the majority of our users and not just a small subset. If you're just targeting a minority of users, consider writing an add-on library for Keras. It is crucial for Keras to avoid bloating the API and codebase. 28 | 29 | 2. Provide code snippets demonstrating the API you have in mind and illustrating the use cases of your feature. Of course, you don't need to write any real code at this point! 30 | 31 | 3. After discussing the feature you may choose to attempt a Pull Request. If you're at all able, start writing some code. We always have more work to do than time to do it. If you can write some code then that will speed the process along. 32 | 33 | ## Pull Requests 34 | 35 | We love pull requests. Here's a quick guide: 36 | 37 | 1. If your PR introduces a change in functionality, make sure you start by opening an issue to discuss whether the change should be made, and how to handle it. This will save you from having your PR closed down the road! Of course, if your PR is a simple bug fix, you don't need to do that. 38 | 39 | 2. Write the code. This is the hard part! 40 | 41 | 3. Make sure any new function or class you introduce has proper docstrings. Make sure any code you touch still has up-to-date docstrings and documentation. 42 | 43 | 4. Write tests. Your code should have full unit test coverage. If you want to see your PR merged promptly, this is crucial. 44 | 45 | 5. Run our test suite locally. It's easy: from the Keras folder, simply run: `py.test tests/`. 46 | - You will need to install the test requirements as well: `pip install -e .[tests]`. 47 | 48 | 6. Make sure all tests are passing: 49 | - with the Theano backend, on Python 2.7 and Python 3.5 50 | - with the TensorFlow backend, on Python 2.7 51 | 52 | 7. We use PEP8 syntax conventions, but we aren't dogmatic when it comes to line length. Make sure your lines stay reasonably sized, though. To make your life easier, we recommend running a PEP8 linter: 53 | - Install PEP8 packages: `pip install pep8 pytest-pep8 autopep8` 54 | - Run a standalone PEP8 check: `py.test --pep8 -m pep8` 55 | - You can automatically fix some PEP8 error by running: `autopep8 -i --select ` for example: `autopep8 -i --select E128 tests/keras/backend/test_backends.py` 56 | 57 | 8. When committing, use appropriate, descriptive commit messages. Make sure that your branch history is not a string of "bug fix", "fix", "oops", etc. When submitting your PR, squash your commits into a single commit with an appropriate commit message, to make sure the project history stays clean and readable. See ['rebase and squash'](http://rebaseandsqua.sh/) for technical help on how to squash your commits. 58 | 59 | 9. Update the documentation. If introducing new functionality, make sure you include code snippets demonstrating the usage of your new feature. 60 | 61 | 10. Submit your PR. If your changes have been approved in a previous discussion, and if you have complete (and passing) unit tests, your PR is likely to be merged promptly. Otherwise, well... 62 | 63 | ## Adding new examples 64 | 65 | Even if you don't contribute to the Keras source code, if you have an application of Keras that is concise and powerful, please consider adding it to our collection of examples. [Existing examples](https://github.com/fchollet/keras/tree/master/examples) show idiomatic Keras code: make sure to keep your own script in the same spirit. 66 | -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Please make sure that the boxes below are checked before you submit your issue. If your issue is an implementation question, please ask your question on [StackOverflow](http://stackoverflow.com/questions/tagged/keras) or [join the Keras Slack channel](https://keras-slack-autojoin.herokuapp.com/) and ask there instead of filing a GitHub issue. 2 | 3 | Thank you! 4 | 5 | - [ ] Check that you are up-to-date with the master branch of Keras. You can update with: 6 | pip install git+git://github.com/fchollet/keras.git --upgrade --no-deps 7 | 8 | - [ ] If running on TensorFlow, check that you are up-to-date with the latest version. The installation instructions can be found [here](https://www.tensorflow.org/get_started/os_setup). 9 | 10 | - [ ] If running on Theano, check that you are up-to-date with the master branch of Theano. You can update with: 11 | pip install git+git://github.com/Theano/Theano.git --upgrade --no-deps 12 | 13 | - [ ] Provide a link to a GitHub Gist of a Python script that can reproduce your issue (or just copy the script here if it is short). 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | COPYRIGHT 2 | 3 | All contributions by François Chollet: 4 | Copyright (c) 2015, François Chollet. 5 | All rights reserved. 6 | 7 | All contributions by Google: 8 | Copyright (c) 2015, Google, Inc. 9 | All rights reserved. 10 | 11 | All other contributions: 12 | Copyright (c) 2015, the respective contributors. 13 | All rights reserved. 14 | 15 | Each contributor holds copyright over their respective contributions. 16 | The project versioning (Git) records all such contribution source information. 17 | 18 | LICENSE 19 | 20 | The MIT License (MIT) 21 | 22 | Permission is hereby granted, free of charge, to any person obtaining a copy 23 | of this software and associated documentation files (the "Software"), to deal 24 | in the Software without restriction, including without limitation the rights 25 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 26 | copies of the Software, and to permit persons to whom the Software is 27 | furnished to do so, subject to the following conditions: 28 | 29 | The above copyright notice and this permission notice shall be included in all 30 | copies or substantial portions of the Software. 31 | 32 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 33 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 34 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 35 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 36 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 37 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 38 | SOFTWARE. 39 | 40 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:8.0-cudnn5-devel 2 | 3 | ENV CONDA_DIR /opt/conda 4 | ENV PATH $CONDA_DIR/bin:$PATH 5 | 6 | RUN mkdir -p $CONDA_DIR && \ 7 | echo export PATH=$CONDA_DIR/bin:'$PATH' > /etc/profile.d/conda.sh && \ 8 | apt-get update && \ 9 | apt-get install -y wget git libhdf5-dev g++ graphviz && \ 10 | wget --quiet https://repo.continuum.io/miniconda/Miniconda3-3.9.1-Linux-x86_64.sh && \ 11 | echo "6c6b44acdd0bc4229377ee10d52c8ac6160c336d9cdd669db7371aa9344e1ac3 *Miniconda3-3.9.1-Linux-x86_64.sh" | sha256sum -c - && \ 12 | /bin/bash /Miniconda3-3.9.1-Linux-x86_64.sh -f -b -p $CONDA_DIR && \ 13 | rm Miniconda3-3.9.1-Linux-x86_64.sh 14 | 15 | ENV NB_USER keras 16 | ENV NB_UID 1000 17 | 18 | RUN useradd -m -s /bin/bash -N -u $NB_UID $NB_USER && \ 19 | mkdir -p $CONDA_DIR && \ 20 | chown keras $CONDA_DIR -R && \ 21 | mkdir -p /src && \ 22 | chown keras /src 23 | 24 | USER keras 25 | 26 | # Python 27 | ARG python_version=3.5.2 28 | ARG tensorflow_version=0.12.0rc0-cp35-cp35m 29 | RUN conda install -y python=${python_version} && \ 30 | pip install https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-${tensorflow_version}-linux_x86_64.whl && \ 31 | pip install git+git://github.com/Theano/Theano.git && \ 32 | pip install ipdb pytest pytest-cov python-coveralls coverage==3.7.1 pytest-xdist pep8 pytest-pep8 pydot_ng && \ 33 | conda install Pillow scikit-learn notebook pandas matplotlib nose pyyaml six h5py && \ 34 | pip install git+git://github.com/fchollet/keras.git && \ 35 | conda clean -yt 36 | 37 | ADD theanorc /home/keras/.theanorc 38 | 39 | ENV PYTHONPATH='/src/:$PYTHONPATH' 40 | 41 | WORKDIR /src 42 | 43 | EXPOSE 8888 44 | 45 | CMD jupyter notebook --port=8888 --ip=0.0.0.0 46 | 47 | -------------------------------------------------------------------------------- /docker/Makefile: -------------------------------------------------------------------------------- 1 | help: 2 | @cat Makefile 3 | 4 | DATA?="${HOME}/Data" 5 | GPU?=0 6 | DOCKER_FILE=Dockerfile 7 | DOCKER=GPU=$(GPU) nvidia-docker 8 | BACKEND=tensorflow 9 | TEST=tests/ 10 | SRC=$(shell dirname `pwd`) 11 | 12 | build: 13 | docker build -t keras --build-arg python_version=3.5 -f $(DOCKER_FILE) . 14 | 15 | bash: build 16 | $(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras bash 17 | 18 | ipython: build 19 | $(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras ipython 20 | 21 | notebook: build 22 | $(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --net=host --env KERAS_BACKEND=$(BACKEND) keras 23 | 24 | test: build 25 | $(DOCKER) run -it -v $(SRC):/src -v $(DATA):/data --env KERAS_BACKEND=$(BACKEND) keras py.test $(TEST) 26 | 27 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Using Keras via Docker 2 | 3 | This directory contains `Dockerfile` to make it easy to get up and running with 4 | Keras via [Docker](http://www.docker.com/). 5 | 6 | ## Installing Docker 7 | 8 | General installation instructions are 9 | [on the Docker site](https://docs.docker.com/installation/), but we give some 10 | quick links here: 11 | 12 | * [OSX](https://docs.docker.com/installation/mac/): [docker toolbox](https://www.docker.com/toolbox) 13 | * [ubuntu](https://docs.docker.com/installation/ubuntulinux/) 14 | 15 | ## Running the container 16 | 17 | We are using `Makefile` to simplify docker commands within make commands. 18 | 19 | Build the container and start a jupyter notebook 20 | 21 | $ make notebook 22 | 23 | Build the container and start an iPython shell 24 | 25 | $ make ipython 26 | 27 | Build the container and start a bash 28 | 29 | $ make bash 30 | 31 | For GPU support install NVidia drivers (ideally latest) and 32 | [nvidia-docker](https://github.com/NVIDIA/nvidia-docker). Run using 33 | 34 | $ make notebook GPU=0 # or [ipython, bash] 35 | 36 | Switch between Theano and TensorFlow 37 | 38 | $ make notebook BACKEND=theano 39 | $ make notebook BACKEND=tensorflow 40 | 41 | Mount a volume for external data sets 42 | 43 | $ make DATA=~/mydata 44 | 45 | Prints all make tasks 46 | 47 | $ make help 48 | 49 | You can change Theano parameters by editing `/docker/theanorc`. 50 | 51 | 52 | Note: If you would have a problem running nvidia-docker you may try the old way 53 | we have used. But it is not recommended. If you find a bug in the nvidia-docker report 54 | it there please and try using the nvidia-docker as described above. 55 | 56 | $ export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}') 57 | $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') 58 | $ docker run -it -p 8888:8888 $CUDA_SO $DEVICES gcr.io/tensorflow/tensorflow:latest-gpu 59 | -------------------------------------------------------------------------------- /docker/theanorc: -------------------------------------------------------------------------------- 1 | [global] 2 | floatX = float32 3 | optimizer=None 4 | device = gpu 5 | 6 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Keras Documentation 2 | 3 | The source for Keras documentation is in this directory under `sources/`. 4 | Our documentation uses extended Markdown, as implemented by [MkDocs](http://mkdocs.org). 5 | 6 | ## Building the documentation 7 | 8 | - install MkDocs: `pip install mkdocs` 9 | - `cd` to the `docs/` folder and run: 10 | - `python autogen.py` 11 | - `mkdocs serve` # Starts a local webserver: [localhost:8000](localhost:8000) 12 | - `mkdocs build` # Builds a static site in "site" directory 13 | -------------------------------------------------------------------------------- /docs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Keras Documentation 2 | theme: readthedocs 3 | docs_dir: sources 4 | repo_url: http://github.com/fchollet/keras 5 | site_url: http://keras.io/ 6 | # theme_dir: theme 7 | site_description: 'Documentation for Keras, the Python Deep Learning library.' 8 | 9 | dev_addr: '0.0.0.0:8000' 10 | google_analytics: ['UA-61785484-1', 'keras.io'] 11 | 12 | 13 | pages: 14 | - Home: index.md 15 | - Getting started: 16 | - Guide to the Sequential model: getting-started/sequential-model-guide.md 17 | - Guide to the Functional API: getting-started/functional-api-guide.md 18 | - FAQ: getting-started/faq.md 19 | - Models: 20 | - About Keras models: models/about-keras-models.md 21 | - Sequential: models/sequential.md 22 | - Model (functional API): models/model.md 23 | - Layers: 24 | - About Keras layers: layers/about-keras-layers.md 25 | - Core Layers: layers/core.md 26 | - Convolutional Layers: layers/convolutional.md 27 | - Pooling Layers: layers/pooling.md 28 | - Locally-connected Layers: layers/local.md 29 | - Recurrent Layers: layers/recurrent.md 30 | - Embedding Layers: layers/embeddings.md 31 | - Advanced Activations Layers: layers/advanced-activations.md 32 | - Normalization Layers: layers/normalization.md 33 | - Noise layers: layers/noise.md 34 | - Layer wrappers: layers/wrappers.md 35 | - Writing your own Keras layers: layers/writing-your-own-keras-layers.md 36 | - Preprocessing: 37 | - Sequence Preprocessing: preprocessing/sequence.md 38 | - Text Preprocessing: preprocessing/text.md 39 | - Image Preprocessing: preprocessing/image.md 40 | - Objectives: objectives.md 41 | - Metrics: metrics.md 42 | - Optimizers: optimizers.md 43 | - Activations: activations.md 44 | - Callbacks: callbacks.md 45 | - Datasets: datasets.md 46 | - Applications: applications.md 47 | - Backend: backend.md 48 | - Initializations: initializations.md 49 | - Regularizers: regularizers.md 50 | - Constraints: constraints.md 51 | - Visualization: visualization.md 52 | - Scikit-learn API: scikit-learn-api.md 53 | - Utils: 54 | - Data Utils: utils/data_utils.md 55 | - I/O Utils: utils/io_utils.md 56 | - Layer Utils: utils/layer_utils.md 57 | - Numpy Utils: utils/np_utils.md 58 | - Generic Utils: utils/generic_utils.md 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /docs/templates/activations.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of activations 3 | 4 | Activations can either be used through an `Activation` layer, or through the `activation` argument supported by all forward layers: 5 | 6 | ```python 7 | from keras.layers.core import Activation, Dense 8 | 9 | model.add(Dense(64)) 10 | model.add(Activation('tanh')) 11 | ``` 12 | is equivalent to: 13 | ```python 14 | model.add(Dense(64, activation='tanh')) 15 | ``` 16 | 17 | You can also pass an element-wise Theano/TensorFlow function as an activation: 18 | 19 | ```python 20 | from keras import backend as K 21 | 22 | def tanh(x): 23 | return K.tanh(x) 24 | 25 | model.add(Dense(64, activation=tanh)) 26 | model.add(Activation(tanh)) 27 | ``` 28 | 29 | ## Available activations 30 | 31 | - __softmax__: Softmax applied across inputs last dimension. Expects shape either `(nb_samples, nb_timesteps, nb_dims)` or `(nb_samples, nb_dims)`. 32 | - __softplus__ 33 | - __softsign__ 34 | - __relu__ 35 | - __tanh__ 36 | - __sigmoid__ 37 | - __hard_sigmoid__ 38 | - __linear__ 39 | 40 | ## On Advanced Activations 41 | 42 | Activations that are more complex than a simple Theano/TensorFlow function (eg. learnable activations, configurable activations, etc.) are available as [Advanced Activation layers](layers/advanced-activations.md), and can be found in the module `keras.layers.advanced_activations`. These include PReLU and LeakyReLU. 43 | -------------------------------------------------------------------------------- /docs/templates/backend.md: -------------------------------------------------------------------------------- 1 | # Keras backends 2 | 3 | ## What is a "backend"? 4 | 5 | Keras is a model-level library, providing high-level building blocks for developing deep learning models. It does not handle itself low-level operations such as tensor products, convolutions and so on. Instead, it relies on a specialized, well-optimized tensor manipulation library to do so, serving as the "backend engine" of Keras. Rather than picking one single tensor library and making the implementation of Keras tied to that library, Keras handles the problem in a modular way, and several different backend engines can be plugged seamlessly into Keras. 6 | 7 | At this time, Keras has two backend implementations available: the **TensorFlow** backend and the **Theano** backend. 8 | 9 | - [TensorFlow](http://www.tensorflow.org/) is an open-source symbolic tensor manipulation framework developed by Google, Inc. 10 | - [Theano](http://deeplearning.net/software/theano/) is an open-source symbolic tensor manipulation framework developed by LISA/MILA Lab at Université de Montréal. 11 | 12 | In the future, we are likely to add more backend options. If you are interested in developing a new backend, get in touch! 13 | 14 | ---- 15 | 16 | ## Switching from one backend to another 17 | 18 | If you have run Keras at least once, you will find the Keras configuration file at: 19 | 20 | `~/.keras/keras.json` 21 | 22 | If it isn't there, you can create it. 23 | 24 | The default configuration file looks like this: 25 | 26 | ``` 27 | { 28 | "image_dim_ordering": "tf", 29 | "epsilon": 1e-07, 30 | "floatx": "float32", 31 | "backend": "tensorflow" 32 | } 33 | ``` 34 | 35 | Simply change the field `backend` to either `"theano"` or `"tensorflow"`, and Keras will use the new configuration next time you run any Keras code. 36 | 37 | You can also define the environment variable ``KERAS_BACKEND`` and this will 38 | override what is defined in your config file : 39 | 40 | ```bash 41 | KERAS_BACKEND=tensorflow python -c "from keras import backend" 42 | Using TensorFlow backend. 43 | ``` 44 | 45 | ---- 46 | 47 | ## keras.json details 48 | 49 | 50 | ``` 51 | { 52 | "image_dim_ordering": "tf", 53 | "epsilon": 1e-07, 54 | "floatx": "float32", 55 | "backend": "tensorflow" 56 | } 57 | ``` 58 | 59 | You can change these settings by editing `~/.keras/keras.json`. 60 | 61 | * `image_dim_ordering`: string, either `"tf"` or `"th"`. It specifies which dimension ordering convention Keras will follow. (`keras.backend.image_dim_ordering()` returns it.) 62 | - For 2D data (e.g. image), `"tf"` assumes `(rows, cols, channels)` while `"th"` assumes `(channels, rows, cols)`. 63 | - For 3D data, `"tf"` assumes `(conv_dim1, conv_dim2, conv_dim3, channels)` while `"th"` assumes `(channels, conv_dim1, conv_dim2, conv_dim3)`. 64 | * `epsilon`: float, a numeric fuzzing constant used to avoid dividing by zero in some operations. 65 | * `floatx`: string, `"float16"`, `"float32"`, or `"float64"`. Default float precision. 66 | * `backend`: string, `"tensorflow"` or `"theano"`. 67 | 68 | ---- 69 | 70 | ## Using the abstract Keras backend to write new code 71 | 72 | If you want the Keras modules you write to be compatible with both Theano and TensorFlow, you have to write them via the abstract Keras backend API. Here's an intro. 73 | 74 | You can import the backend module via: 75 | ```python 76 | from keras import backend as K 77 | ``` 78 | 79 | The code below instantiates an input placeholder. It's equivalent to `tf.placeholder()` or `T.matrix()`, `T.tensor3()`, etc. 80 | 81 | ```python 82 | input = K.placeholder(shape=(2, 4, 5)) 83 | # also works: 84 | input = K.placeholder(shape=(None, 4, 5)) 85 | # also works: 86 | input = K.placeholder(ndim=3) 87 | ``` 88 | 89 | The code below instantiates a shared variable. It's equivalent to `tf.variable()` or `theano.shared()`. 90 | 91 | ```python 92 | val = np.random.random((3, 4, 5)) 93 | var = K.variable(value=val) 94 | 95 | # all-zeros variable: 96 | var = K.zeros(shape=(3, 4, 5)) 97 | # all-ones: 98 | var = K.ones(shape=(3, 4, 5)) 99 | ``` 100 | 101 | Most tensor operations you will need can be done as you would in TensorFlow or Theano: 102 | 103 | ```python 104 | a = b + c * K.abs(d) 105 | c = K.dot(a, K.transpose(b)) 106 | a = K.sum(b, axis=2) 107 | a = K.softmax(b) 108 | a = concatenate([b, c], axis=-1) 109 | # etc... 110 | ``` 111 | 112 | ---- 113 | 114 | ## Backend functions 115 | 116 | 117 | {{autogenerated}} 118 | 119 | 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /docs/templates/callbacks.md: -------------------------------------------------------------------------------- 1 | ## Usage of callbacks 2 | 3 | A callback is a set of functions to be applied at given stages of the training procedure. You can use callbacks to get a view on internal states and statistics of the model during training. You can pass a list of callbacks (as the keyword argument `callbacks`) to the `.fit()` method of the `Sequential` model. The relevant methods of the callbacks will then be called at each stage of the training. 4 | 5 | --- 6 | 7 | {{autogenerated}} 8 | 9 | --- 10 | 11 | 12 | # Create a callback 13 | 14 | You can create a custom callback by extending the base class `keras.callbacks.Callback`. A callback has access to its associated model through the class property `self.model`. 15 | 16 | Here's a simple example saving a list of losses over each batch during training: 17 | ```python 18 | class LossHistory(keras.callbacks.Callback): 19 | def on_train_begin(self, logs={}): 20 | self.losses = [] 21 | 22 | def on_batch_end(self, batch, logs={}): 23 | self.losses.append(logs.get('loss')) 24 | ``` 25 | 26 | --- 27 | 28 | ### Example: recording loss history 29 | 30 | ```python 31 | class LossHistory(keras.callbacks.Callback): 32 | def on_train_begin(self, logs={}): 33 | self.losses = [] 34 | 35 | def on_batch_end(self, batch, logs={}): 36 | self.losses.append(logs.get('loss')) 37 | 38 | model = Sequential() 39 | model.add(Dense(10, input_dim=784, init='uniform')) 40 | model.add(Activation('softmax')) 41 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 42 | 43 | history = LossHistory() 44 | model.fit(X_train, Y_train, batch_size=128, nb_epoch=20, verbose=0, callbacks=[history]) 45 | 46 | print history.losses 47 | # outputs 48 | ''' 49 | [0.66047596406559383, 0.3547245744908703, ..., 0.25953155204159617, 0.25901699725311789] 50 | ''' 51 | ``` 52 | 53 | --- 54 | 55 | ### Example: model checkpoints 56 | 57 | ```python 58 | from keras.callbacks import ModelCheckpoint 59 | 60 | model = Sequential() 61 | model.add(Dense(10, input_dim=784, init='uniform')) 62 | model.add(Activation('softmax')) 63 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 64 | 65 | ''' 66 | saves the model weights after each epoch if the validation loss decreased 67 | ''' 68 | checkpointer = ModelCheckpoint(filepath="/tmp/weights.hdf5", verbose=1, save_best_only=True) 69 | model.fit(X_train, Y_train, batch_size=128, nb_epoch=20, verbose=0, validation_data=(X_test, Y_test), callbacks=[checkpointer]) 70 | 71 | ``` 72 | 73 | -------------------------------------------------------------------------------- /docs/templates/constraints.md: -------------------------------------------------------------------------------- 1 | ## Usage of constraints 2 | 3 | Functions from the `constraints` module allow setting constraints (eg. non-negativity) on network parameters during optimization. 4 | 5 | The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D`, `Convolution2D` and `Convolution3D` have a unified API. 6 | 7 | These layers expose 2 keyword arguments: 8 | 9 | - `W_constraint` for the main weights matrix 10 | - `b_constraint` for the bias. 11 | 12 | 13 | ```python 14 | from keras.constraints import maxnorm 15 | model.add(Dense(64, W_constraint = maxnorm(2))) 16 | ``` 17 | 18 | ## Available constraints 19 | 20 | - __maxnorm__(m=2): maximum-norm constraint 21 | - __nonneg__(): non-negativity constraint 22 | - __unitnorm__(): unit-norm constraint, enforces the matrix to have unit norm along the last axis -------------------------------------------------------------------------------- /docs/templates/initializations.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of initializations 3 | 4 | Initializations define the way to set the initial random weights of Keras layers. 5 | 6 | The keyword arguments used for passing initializations to layers will depend on the layer. Usually it is simply `init`: 7 | 8 | ```python 9 | model.add(Dense(64, init='uniform')) 10 | ``` 11 | 12 | ## Available initializations 13 | 14 | - __uniform__ 15 | - __lecun_uniform__: Uniform initialization scaled by the square root of the number of inputs (LeCun 98). 16 | - __normal__ 17 | - __identity__: Use with square 2D layers (`shape[0] == shape[1]`). 18 | - __orthogonal__: Use with square 2D layers (`shape[0] == shape[1]`). 19 | - __zero__ 20 | - __one__ 21 | - __glorot_normal__: Gaussian initialization scaled by fan_in + fan_out (Glorot 2010) 22 | - __glorot_uniform__ 23 | - __he_normal__: Gaussian initialization scaled by fan_in (He et al., 2014) 24 | - __he_uniform__ 25 | 26 | 27 | An initialization may be passed as a string (must match one of the available initializations above), or as a callable. 28 | If a callable, then it must take two arguments: `shape` (shape of the variable to initialize) and `name` (name of the variable), 29 | and it must return a variable (e.g. output of `K.variable()`): 30 | 31 | ```python 32 | from keras import backend as K 33 | import numpy as np 34 | 35 | def my_init(shape, name=None): 36 | value = np.random.random(shape) 37 | return K.variable(value, name=name) 38 | 39 | model.add(Dense(64, init=my_init)) 40 | ``` 41 | 42 | You could also use functions from `keras.initializations` in this way: 43 | 44 | ```python 45 | from keras import initializations 46 | 47 | def my_init(shape, name=None): 48 | return initializations.normal(shape, scale=0.01, name=name) 49 | 50 | model.add(Dense(64, init=my_init)) 51 | ``` 52 | -------------------------------------------------------------------------------- /docs/templates/layers/about-keras-layers.md: -------------------------------------------------------------------------------- 1 | # About Keras layers 2 | 3 | All Keras layers have a number of methods in common: 4 | 5 | - `layer.get_weights()`: returns the weights of the layer as a list of Numpy arrays. 6 | - `layer.set_weights(weights)`: sets the weights of the layer from a list of Numpy arrays (with the same shapes as the output of `get_weights`). 7 | - `layer.get_config()`: returns a dictionary containing the configuration of the layer. The layer can be reinstantiated from its config via: 8 | 9 | ```python 10 | layer = Dense(32) 11 | config = layer.get_config() 12 | reconstructed_layer = Dense.from_config(config) 13 | ``` 14 | 15 | Or: 16 | 17 | ```python 18 | from keras.utils.layer_utils import layer_from_config 19 | 20 | config = layer.get_config() 21 | layer = layer_from_config({'class_name': layer.__class__.__name__, 22 | 'config': config}) 23 | ``` 24 | 25 | If a layer has a single node (i.e. if it isn't a shared layer), you can get its input tensor, output tensor, input shape and output shape via: 26 | 27 | - `layer.input` 28 | - `layer.output` 29 | - `layer.input_shape` 30 | - `layer.output_shape` 31 | 32 | If the layer has multiple nodes (see: [the concept of layer node and shared layers](/getting-started/functional-api-guide/#the-concept-of-layer-node)), you can use the following methods: 33 | 34 | - `layer.get_input_at(node_index)` 35 | - `layer.get_output_at(node_index)` 36 | - `layer.get_input_shape_at(node_index)` 37 | - `layer.get_output_shape_at(node_index)` -------------------------------------------------------------------------------- /docs/templates/layers/writing-your-own-keras-layers.md: -------------------------------------------------------------------------------- 1 | # Writing your own Keras layers 2 | 3 | For simple, stateless custom operations, you are probably better off using `layers.core.Lambda` layers. But for any custom operation that has trainable weights, you should implement your own layer. 4 | 5 | Here is the skeleton of a Keras layer, **as of Keras 1.2.0** (if you have an older version, please upgrade). There are only three methods you need to implement: 6 | 7 | - `build(input_shape)`: this is where you will define your weights. This method must set `self.built = True`, which can be done by calling `super([Layer], self).build()`. 8 | - `call(x)`: this is where the layer's logic lives. Unless you want your layer to support masking, you only have to care about the first argument passed to `call`: the input tensor. 9 | - `get_output_shape_for(input_shape)`: in case your layer modifies the shape of its input, you should specify here the shape transformation logic. This allows Keras to do automatic shape inference. 10 | 11 | ```python 12 | from keras import backend as K 13 | from keras.engine.topology import Layer 14 | import numpy as np 15 | 16 | class MyLayer(Layer): 17 | def __init__(self, output_dim, **kwargs): 18 | self.output_dim = output_dim 19 | super(MyLayer, self).__init__(**kwargs) 20 | 21 | def build(self, input_shape): 22 | # Create a trainable weight variable for this layer. 23 | self.W = self.add_weight(shape=(input_shape[1], self.output_dim), 24 | initializer='uniform', 25 | trainable=True) 26 | super(MyLayer, self).build(input_shape) # Be sure to call this somewhere! 27 | 28 | def call(self, x, mask=None): 29 | return K.dot(x, self.W) 30 | 31 | def get_output_shape_for(self, input_shape): 32 | return (input_shape[0], self.output_dim) 33 | ``` 34 | 35 | The existing Keras layers provide examples of how to implement almost anything. Never hesitate to read the source code! 36 | -------------------------------------------------------------------------------- /docs/templates/metrics.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of metrics 3 | 4 | A metric is a function that is used to judge the performance of your model. Metric functions are to be supplied in the `metrics` parameter when a model is compiled. 5 | 6 | A metric function is similar to an [objective function](/objectives), except that the results from evaluating a metric are not used when training the model. 7 | 8 | You can either pass the name of an existing metric, or pass a Theano/TensorFlow symbolic function (see [Custom metrics](#custom-metrics)). 9 | 10 | #### Arguments 11 | - __y_true__: True labels. Theano/TensorFlow tensor. 12 | - __y_pred__: Predictions. Theano/TensorFlow tensor of the same shape as y_true. 13 | 14 | #### Returns 15 | Single tensor value representing the mean of the output array across all 16 | datapoints. 17 | 18 | ---- 19 | 20 | ## Available metrics 21 | 22 | 23 | {{autogenerated}} 24 | 25 | ---- 26 | 27 | ## Custom metrics 28 | 29 | Custom metrics can be defined and passed via the compilation step. The 30 | function would need to take `(y_true, y_pred)` as arguments and return 31 | either a single tensor value or a dict `metric_name -> metric_value`. 32 | 33 | ```python 34 | # for custom metrics 35 | import keras.backend as K 36 | 37 | def mean_pred(y_true, y_pred): 38 | return K.mean(y_pred) 39 | 40 | def false_rates(y_true, y_pred): 41 | false_neg = ... 42 | false_pos = ... 43 | return { 44 | 'false_neg': false_neg, 45 | 'false_pos': false_pos, 46 | } 47 | 48 | model.compile(optimizer='rmsprop', 49 | loss='binary_crossentropy', 50 | metrics=['accuracy', mean_pred, false_rates]) 51 | ``` 52 | -------------------------------------------------------------------------------- /docs/templates/models/about-keras-models.md: -------------------------------------------------------------------------------- 1 | # About Keras models 2 | 3 | There are two types of models available in Keras: [the Sequential model](/models/sequential) and [the Model class used with functional API](/models/model). 4 | 5 | These models have a number of methods in common: 6 | 7 | - `model.summary()`: prints a summary representation of your model. 8 | - `model.get_config()`: returns a dictionary containing the configuration of the model. The model can be reinstantiated from its config via: 9 | ```python 10 | config = model.get_config() 11 | model = Model.from_config(config) 12 | # or, for Sequential: 13 | model = Sequential.from_config(config) 14 | ``` 15 | 16 | - `model.get_weights()`: returns a list of all weight tensors in the model, as Numpy arrays. 17 | - `model.set_weights(weights)`: sets the values of the weights of the model, from a list of Numpy arrays. The arrays in the list should have the same shape as those returned by `get_weights()`. 18 | - `model.to_json()`: returns a representation of the model as a JSON string. Note that the representation does not include the weights, only the architecture. You can reinstantiate the same model (with reinitialized weights) from the JSON string via: 19 | ```python 20 | from models import model_from_json 21 | 22 | json_string = model.to_json() 23 | model = model_from_json(json_string) 24 | ``` 25 | - `model.to_yaml()`: returns a representation of the model as a YAML string. Note that the representation does not include the weights, only the architecture. You can reinstantiate the same model (with reinitialized weights) from the YAML string via: 26 | ```python 27 | from models import model_from_yaml 28 | 29 | yaml_string = model.to_yaml() 30 | model = model_from_yaml(yaml_string) 31 | ``` 32 | - `model.save_weights(filepath)`: saves the weights of the model as a HDF5 file. 33 | - `model.load_weights(filepath, by_name=False)`: loads the weights of the model from a HDF5 file (created by `save_weights`). By default, the architecture is expected to be unchanged. To load weights into a different architecture (with some layers in common), use `by_name=True` to load only those layers with the same name. -------------------------------------------------------------------------------- /docs/templates/models/model.md: -------------------------------------------------------------------------------- 1 | # Model class API 2 | 3 | In the functional API, given an input tensor and output tensor, you can instantiate a `Model` via: 4 | 5 | ```python 6 | from keras.models import Model 7 | from keras.layers import Input, Dense 8 | 9 | a = Input(shape=(32,)) 10 | b = Dense(32)(a) 11 | model = Model(input=a, output=b) 12 | ``` 13 | 14 | This model will include all layers required in the computation of `b` given `a`. 15 | 16 | In the case of multi-input or multi-output models, you can use lists as well: 17 | 18 | ```python 19 | model = Model(input=[a1, a2], output=[b1, b3, b3]) 20 | ``` 21 | 22 | For a detailed introduction of what `Model` can do, read [this guide to the Keras functional API](/getting-started/functional-api-guide). 23 | 24 | ## Useful attributes of Model 25 | 26 | - `model.layers` is a flattened list of the layers comprising the model graph. 27 | - `model.inputs` is the list of input tensors. 28 | - `model.outputs` is the list of output tensors. 29 | 30 | ## Methods 31 | 32 | {{autogenerated}} 33 | -------------------------------------------------------------------------------- /docs/templates/models/sequential.md: -------------------------------------------------------------------------------- 1 | # The Sequential model API 2 | 3 | To get started, read [this guide to the Keras Sequential model](/getting-started/sequential-model-guide). 4 | 5 | ## Useful attributes of Model 6 | 7 | - `model.layers` is a list of the layers added to the model. 8 | 9 | 10 | ---- 11 | 12 | ## Sequential model methods 13 | 14 | {{autogenerated}} -------------------------------------------------------------------------------- /docs/templates/objectives.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of objectives 3 | 4 | An objective function (or loss function, or optimization score function) is one of the two parameters required to compile a model: 5 | 6 | ```python 7 | model.compile(loss='mean_squared_error', optimizer='sgd') 8 | ``` 9 | 10 | You can either pass the name of an existing objective, or pass a Theano/TensorFlow symbolic function that returns a scalar for each data-point and takes the following two arguments: 11 | 12 | - __y_true__: True labels. Theano/TensorFlow tensor. 13 | - __y_pred__: Predictions. Theano/TensorFlow tensor of the same shape as y_true. 14 | 15 | The actual optimized objective is the mean of the output array across all datapoints. 16 | 17 | For a few examples of such functions, check out the [objectives source](https://github.com/fchollet/keras/blob/master/keras/objectives.py). 18 | 19 | ## Available objectives 20 | 21 | - __mean_squared_error__ / __mse__ 22 | - __mean_absolute_error__ / __mae__ 23 | - __mean_absolute_percentage_error__ / __mape__ 24 | - __mean_squared_logarithmic_error__ / __msle__ 25 | - __squared_hinge__ 26 | - __hinge__ 27 | - __binary_crossentropy__: Also known as logloss. 28 | - __categorical_crossentropy__: Also known as multiclass logloss. __Note__: using this objective requires that your labels are binary arrays of shape `(nb_samples, nb_classes)`. 29 | - __sparse_categorical_crossentropy__: As above but accepts sparse labels. __Note__: this objective still requires that your labels have the same number of dimensions as your outputs; you may need to add a length-1 dimension to the shape of your labels, e.g with `np.expand_dims(y, -1)`. 30 | - __kullback_leibler_divergence__ / __kld__: Information gain from a predicted probability distribution Q to a true probability distribution P. Gives a measure of difference between both distributions. 31 | - __poisson__: Mean of `(predictions - targets * log(predictions))` 32 | - __cosine_proximity__: The opposite (negative) of the mean cosine proximity between predictions and targets. 33 | 34 | **Note**: when using the `categorical_crossentropy` objective, your targets should be in categorical format (e.g. if you have 10 classes, the target for each sample should be a 10-dimensional vector that is all-zeros expect for a 1 at the index corresponding to the class of the sample). In order to convert *integer targets* into *categorical targets*, you can use the Keras utility `to_categorical`: 35 | 36 | ```python 37 | from keras.utils.np_utils import to_categorical 38 | 39 | categorical_labels = to_categorical(int_labels, nb_classes=None) 40 | ``` 41 | -------------------------------------------------------------------------------- /docs/templates/optimizers.md: -------------------------------------------------------------------------------- 1 | 2 | ## Usage of optimizers 3 | 4 | An optimizer is one of the two arguments required for compiling a Keras model: 5 | 6 | ```python 7 | model = Sequential() 8 | model.add(Dense(64, init='uniform', input_dim=10)) 9 | model.add(Activation('tanh')) 10 | model.add(Activation('softmax')) 11 | 12 | sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) 13 | model.compile(loss='mean_squared_error', optimizer=sgd) 14 | ``` 15 | 16 | You can either instantiate an optimizer before passing it to `model.compile()` , as in the above example, or you can call it by its name. In the latter case, the default parameters for the optimizer will be used. 17 | 18 | ```python 19 | # pass optimizer by name: default parameters will be used 20 | model.compile(loss='mean_squared_error', optimizer='sgd') 21 | ``` 22 | 23 | --- 24 | 25 | ## Parameters common to all Keras optimizers 26 | 27 | The parameters `clipnorm` and `clipvalue` can be used with all optimizers to control gradient clipping: 28 | 29 | ```python 30 | # all parameter gradients will be clipped to 31 | # a maximum norm of 1. 32 | sgd = SGD(lr=0.01, clipnorm=1.) 33 | ``` 34 | 35 | ```python 36 | # all parameter gradients will be clipped to 37 | # a maximum value of 0.5 and 38 | # a minimum value of -0.5. 39 | sgd = SGD(lr=0.01, clipvalue=0.5) 40 | ``` 41 | 42 | --- 43 | 44 | {{autogenerated}} -------------------------------------------------------------------------------- /docs/templates/preprocessing/sequence.md: -------------------------------------------------------------------------------- 1 | ## pad_sequences 2 | 3 | ```python 4 | keras.preprocessing.sequence.pad_sequences(sequences, maxlen=None, dtype='int32', 5 | padding='pre', truncating='pre', value=0.) 6 | ``` 7 | 8 | Transform a list of `nb_samples` sequences (lists of scalars) into a 2D Numpy array of shape `(nb_samples, nb_timesteps)`. `nb_timesteps` is either the `maxlen` argument if provided, or the length of the longest sequence otherwise. Sequences that are shorter than `nb_timesteps` are padded with `value` at the end. Sequences longer than `nb_timesteps` are truncated so that it fits the desired length. Position where padding or truncation happens is determined by `padding` or `truncating`, respectively. 9 | 10 | - __Return__: 2D Numpy array of shape `(nb_samples, nb_timesteps)`. 11 | 12 | - __Arguments__: 13 | - __sequences__: List of lists of int or float. 14 | - __maxlen__: None or int. Maximum sequence length, longer sequences are truncated and shorter sequences are padded with zeros at the end. 15 | - __dtype__: datatype of the Numpy array returned. 16 | - __padding__: 'pre' or 'post', pad either before or after each sequence. 17 | - __truncating__: 'pre' or 'post', remove values from sequences larger than maxlen either in the beginning or in the end of the sequence 18 | - __value__: float, value to pad the sequences to the desired value. 19 | 20 | --- 21 | 22 | ## skipgrams 23 | 24 | ```python 25 | keras.preprocessing.sequence.skipgrams(sequence, vocabulary_size, 26 | window_size=4, negative_samples=1., shuffle=True, 27 | categorical=False, sampling_table=None) 28 | ``` 29 | 30 | Transforms a sequence of word indexes (list of int) into couples of the form: 31 | 32 | - (word, word in the same window), with label 1 (positive samples). 33 | - (word, random word from the vocabulary), with label 0 (negative samples). 34 | 35 | Read more about Skipgram in this gnomic paper by Mikolov et al.: [Efficient Estimation of Word Representations in 36 | Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf) 37 | 38 | - __Return__: tuple `(couples, labels)`. 39 | - `couples` is a list of 2-elements lists of int: `[word_index, other_word_index]`. 40 | - `labels` is a list of 0 and 1, where 1 indicates that `other_word_index` was found in the same window as `word_index`, and 0 indicates that `other_word_index` was random. 41 | - if categorical is set to True, the labels are categorical, ie. 1 becomes [0,1], and 0 becomes [1, 0]. 42 | 43 | - __Arguments__: 44 | - __sequence__: list of int indexes. If using a sampling_table, the index of a word should be its the rank in the dataset (starting at 1). 45 | - __vocabulary_size__: int. 46 | - __window_size__: int. maximum distance between two words in a positive couple. 47 | - __negative_samples__: float >= 0. 0 for no negative (=random) samples. 1 for same number as positive samples. etc. 48 | - __shuffle__: boolean. Whether to shuffle the samples. 49 | - __categorical__: boolean. Whether to make the returned labels categorical. 50 | - __sampling_table__: Numpy array of shape `(vocabulary_size,)` where `sampling_table[i]` is the probability of sampling the word with index i (assumed to be i-th most common word in the dataset). 51 | 52 | 53 | --- 54 | 55 | ## make_sampling_table 56 | 57 | ```python 58 | keras.preprocessing.sequence.make_sampling_table(size, sampling_factor=1e-5) 59 | ``` 60 | 61 | Used for generating the `sampling_table` argument for `skipgrams`. `sampling_table[i]` is the probability of sampling the word i-th most common word in a dataset (more common words should be sampled less frequently, for balance). 62 | 63 | - __Return__: Numpy array of shape `(size,)`. 64 | 65 | - __Arguments__: 66 | - __size__: size of the vocabulary considered. 67 | - __sampling_factor__: lower values result in a longer probability decay (common words will be sampled less frequently). If set to 1, no subsampling will be performed (all sampling probabilities will be 1). 68 | -------------------------------------------------------------------------------- /docs/templates/preprocessing/text.md: -------------------------------------------------------------------------------- 1 | 2 | ## text_to_word_sequence 3 | 4 | ```python 5 | keras.preprocessing.text.text_to_word_sequence(text, 6 | filters=base_filter(), lower=True, split=" ") 7 | ``` 8 | 9 | Split a sentence into a list of words. 10 | 11 | - __Return__: List of words (str). 12 | 13 | - __Arguments__: 14 | - __text__: str. 15 | - __filters__: list (or concatenation) of characters to filter out, such as punctuation. Default: base_filter(), includes basic punctuation, tabs, and newlines. 16 | - __lower__: boolean. Whether to set the text to lowercase. 17 | - __split__: str. Separator for word splitting. 18 | 19 | ## one_hot 20 | 21 | ```python 22 | keras.preprocessing.text.one_hot(text, n, 23 | filters=base_filter(), lower=True, split=" ") 24 | ``` 25 | 26 | One-hot encode a text into a list of word indexes in a vocabulary of size n. 27 | 28 | - __Return__: List of integers in [1, n]. Each integer encodes a word (unicity non-guaranteed). 29 | 30 | - __Arguments__: Same as `text_to_word_sequence` above. 31 | - __n__: int. Size of vocabulary. 32 | 33 | ## Tokenizer 34 | 35 | ```python 36 | keras.preprocessing.text.Tokenizer(nb_words=None, filters=base_filter(), 37 | lower=True, split=" ") 38 | ``` 39 | 40 | Class for vectorizing texts, or/and turning texts into sequences (=list of word indexes, where the word of rank i in the dataset (starting at 1) has index i). 41 | 42 | - __Arguments__: Same as `text_to_word_sequence` above. 43 | - __nb_words__: None or int. Maximum number of words to work with (if set, tokenization will be restricted to the top nb_words most common words in the dataset). 44 | 45 | - __Methods__: 46 | 47 | - __fit_on_texts(texts)__: 48 | - __Arguments__: 49 | - __texts__: list of texts to train on. 50 | 51 | - __texts_to_sequences(texts)__ 52 | - __Arguments__: 53 | - __texts__: list of texts to turn to sequences. 54 | - __Return__: list of sequences (one per text input). 55 | 56 | - __texts_to_sequences_generator(texts)__: generator version of the above. 57 | - __Return__: yield one sequence per input text. 58 | 59 | - __texts_to_matrix(texts)__: 60 | - __Return__: numpy array of shape `(len(texts), nb_words)`. 61 | - __Arguments__: 62 | - __texts__: list of texts to vectorize. 63 | - __mode__: one of "binary", "count", "tfidf", "freq" (default: "binary"). 64 | 65 | - __fit_on_sequences(sequences)__: 66 | - __Arguments__: 67 | - __sequences__: list of sequences to train on. 68 | 69 | - __sequences_to_matrix(sequences)__: 70 | - __Return__: numpy array of shape `(len(sequences), nb_words)`. 71 | - __Arguments__: 72 | - __sequences__: list of sequences to vectorize. 73 | - __mode__: one of "binary", "count", "tfidf", "freq" (default: "binary"). 74 | 75 | - __Attributes__: 76 | - __word_counts__: dictionary mapping words (str) to the number of times they appeared on during fit. Only set after fit_on_texts was called. 77 | - __word_docs__: dictionary mapping words (str) to the number of documents/texts they appeared on during fit. Only set after fit_on_texts was called. 78 | - __word_index__: dictionary mapping words (str) to their rank/index (int). Only set after fit_on_texts was called. 79 | - __document_count__: int. Number of documents (texts/sequences) the tokenizer was trained on. Only set after fit_on_texts or fit_on_sequences was called. 80 | 81 | 82 | -------------------------------------------------------------------------------- /docs/templates/regularizers.md: -------------------------------------------------------------------------------- 1 | ## Usage of regularizers 2 | 3 | Regularizers allow to apply penalties on layer parameters or layer activity during optimization. These penalties are incorporated in the loss function that the network optimizes. 4 | 5 | The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `TimeDistributedDense`, `MaxoutDense`, `Convolution1D`, `Convolution2D` and `Convolution3D` have a unified API. 6 | 7 | These layers expose 3 keyword arguments: 8 | 9 | - `W_regularizer`: instance of `keras.regularizers.WeightRegularizer` 10 | - `b_regularizer`: instance of `keras.regularizers.WeightRegularizer` 11 | - `activity_regularizer`: instance of `keras.regularizers.ActivityRegularizer` 12 | 13 | 14 | ## Example 15 | 16 | ```python 17 | from keras.regularizers import l2, activity_l2 18 | model.add(Dense(64, input_dim=64, W_regularizer=l2(0.01), activity_regularizer=activity_l2(0.01))) 19 | ``` 20 | 21 | ## Available penalties 22 | 23 | ```python 24 | keras.regularizers.WeightRegularizer(l1=0., l2=0.) 25 | ``` 26 | 27 | ```python 28 | keras.regularizers.ActivityRegularizer(l1=0., l2=0.) 29 | ``` 30 | 31 | ## Shortcuts 32 | 33 | These are shortcut functions available in `keras.regularizers`. 34 | 35 | - __l1__(l=0.01): L1 weight regularization penalty, also known as LASSO 36 | - __l2__(l=0.01): L2 weight regularization penalty, also known as weight decay, or Ridge 37 | - __l1l2__(l1=0.01, l2=0.01): L1-L2 weight regularization penalty, also known as ElasticNet 38 | - __activity_l1__(l=0.01): L1 activity regularization 39 | - __activity_l2__(l=0.01): L2 activity regularization 40 | - __activity_l1l2__(l1=0.01, l2=0.01): L1+L2 activity regularization 41 | -------------------------------------------------------------------------------- /docs/templates/scikit-learn-api.md: -------------------------------------------------------------------------------- 1 | # Wrappers for the Scikit-Learn API 2 | 3 | You can use `Sequential` Keras models (single-input only) as part of your Scikit-Learn workflow via the wrappers found at `keras.wrappers.scikit_learn.py`. 4 | 5 | There are two wrappers available: 6 | 7 | `keras.wrappers.scikit_learn.KerasClassifier(build_fn=None, **sk_params)`, which implements the Scikit-Learn classifier interface, 8 | 9 | `keras.wrappers.scikit_learn.KerasRegressor(build_fn=None, **sk_params)`, which implements the Scikit-Learn regressor interface. 10 | 11 | ### Arguments 12 | 13 | - __build_fn__: callable function or class instance 14 | - __sk_params__: model parameters & fitting parameters 15 | 16 | `build_fn` should construct, compile and return a Keras model, which 17 | will then be used to fit/predict. One of the following 18 | three values could be passed to build_fn: 19 | 20 | 1. A function 21 | 2. An instance of a class that implements the __call__ method 22 | 3. None. This means you implement a class that inherits from either 23 | `KerasClassifier` or `KerasRegressor`. The __call__ method of the 24 | present class will then be treated as the default build_fn. 25 | 26 | `sk_params` takes both model parameters and fitting parameters. Legal model 27 | parameters are the arguments of `build_fn`. Note that like all other 28 | estimators in scikit-learn, 'build_fn' should provide default values for 29 | its arguments, so that you could create the estimator without passing any 30 | values to `sk_params`. 31 | 32 | `sk_params` could also accept parameters for calling `fit`, `predict`, 33 | `predict_proba`, and `score` methods (e.g., `nb_epoch`, `batch_size`). 34 | fitting (predicting) parameters are selected in the following order: 35 | 36 | 1. Values passed to the dictionary arguments of 37 | `fit`, `predict`, `predict_proba`, and `score` methods 38 | 2. Values passed to `sk_params` 39 | 3. The default values of the `keras.models.Sequential` 40 | `fit`, `predict`, `predict_proba` and `score` methods 41 | 42 | When using scikit-learn's `grid_search` API, legal tunable parameters are 43 | those you could pass to `sk_params`, including fitting parameters. 44 | In other words, you could use `grid_search` to search for the best 45 | `batch_size` or `nb_epoch` as well as the model parameters. 46 | -------------------------------------------------------------------------------- /docs/templates/visualization.md: -------------------------------------------------------------------------------- 1 | 2 | ## Model visualization 3 | 4 | The `keras.utils.visualize_util` module provides utility functions to plot 5 | a Keras model (using graphviz). 6 | 7 | This will plot a graph of the model and save it to a file: 8 | ```python 9 | from keras.utils.visualize_util import plot 10 | plot(model, to_file='model.png') 11 | ``` 12 | 13 | `plot` takes two optional arguments: 14 | 15 | - `show_shapes` (defaults to False) controls whether output shapes are shown in the graph. 16 | - `show_layer_names` (defaults to True) controls whether layer names are shown in the graph. 17 | 18 | You can also directly obtain the `pydot.Graph` object and render it yourself, 19 | for example to show it in an ipython notebook : 20 | ```python 21 | from IPython.display import SVG 22 | from keras.utils.visualize_util import model_to_dot 23 | 24 | SVG(model_to_dot(model).create(prog='dot', format='svg')) 25 | ``` 26 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Keras examples directory 2 | 3 | [addition_rnn.py](addition_rnn.py) 4 | Implementation of sequence to sequence learning for performing addition of two numbers (as strings). 5 | 6 | [antirectifier.py](antirectifier.py) 7 | Demonstrates how to write custom layers for Keras. 8 | 9 | [babi_memnn.py](babi_memnn.py) 10 | Trains a memory network on the bAbI dataset for reading comprehension. 11 | 12 | [babi_rnn.py](babi_rnn.py) 13 | Trains a two-branch recurrent network on the bAbI dataset for reading comprehension. 14 | 15 | [cifar10_cnn.py](cifar10_cnn.py) 16 | Trains a simple deep CNN on the CIFAR10 small images dataset. 17 | 18 | [conv_filter_visualization.py](conv_filter_visualization.py) 19 | Visualization of the filters of VGG16, via gradient ascent in input space. 20 | 21 | [conv_lstm.py](conv_lstm.py) 22 | Demonstrates the use of a convolutional LSTM network. 23 | 24 | [deep_dream.py](deep_dream.py) 25 | Deep Dreams in Keras. 26 | 27 | [image_ocr.py](image_ocr.py) 28 | Trains a convolutional stack followed by a recurrent stack and a CTC logloss function to perform optical character recognition (OCR). 29 | 30 | [imdb_bidirectional_lstm.py](imdb_bidirectional_lstm.py) 31 | Trains a Bidirectional LSTM on the IMDB sentiment classification task. 32 | 33 | [imdb_cnn.py](imdb_cnn.py) 34 | Demonstrates the use of Convolution1D for text classification. 35 | 36 | [imdb_cnn_lstm.py](imdb_cnn_lstm.py) 37 | Trains a convolutional stack followed by a recurrent stack network on the IMDB sentiment classification task. 38 | 39 | [imdb_fasttext.py](imdb_fasttext.py) 40 | Trains a FastText model on the IMDB sentiment classification task. 41 | 42 | [imdb_lstm.py](imdb_lstm.py) 43 | Trains a LSTM on the IMDB sentiment classification task. 44 | 45 | [lstm_benchmark.py](lstm_benchmark.py) 46 | Compares different LSTM implementations on the IMDB sentiment classification task. 47 | 48 | [lstm_text_generation.py](lstm_text_generation.py) 49 | Generates text from Nietzsche's writings. 50 | 51 | [mnist_acgan.py](mnist_acgan.py) 52 | Implementation of AC-GAN ( Auxiliary Classifier GAN ) on the MNIST dataset 53 | 54 | [mnist_cnn.py](mnist_cnn.py) 55 | Trains a simple convnet on the MNIST dataset. 56 | 57 | [mnist_hierarchical_rnn.py](mnist_hierarchical_rnn.py) 58 | Trains a Hierarchical RNN (HRNN) to classify MNIST digits. 59 | 60 | [mnist_irnn.py](mnist_irnn.py) 61 | Reproduction of the IRNN experiment with pixel-by-pixel sequential MNIST in "A Simple Way to Initialize Recurrent Networks of Rectified Linear Units" by Le et al. 62 | 63 | [mnist_mlp.py](mnist_mlp.py) 64 | Trains a simple deep multi-layer perceptron on the MNIST dataset. 65 | 66 | [mnist_net2net.py](mnist_net2net.py) 67 | Reproduction of the Net2Net experiment with MNIST in "Net2Net: Accelerating Learning via Knowledge Transfer". 68 | 69 | [mnist_siamese_graph.py](mnist_siamese_graph.py) 70 | Trains a Siamese multi-layer perceptron on pairs of digits from the MNIST dataset. 71 | 72 | [mnist_sklearn_wrapper.py](mnist_sklearn_wrapper.py) 73 | Demonstrates how to use the sklearn wrapper. 74 | 75 | [mnist_swwae.py](mnist_swwae.py) 76 | Trains a Stacked What-Where AutoEncoder built on residual blocks on the MNIST dataset. 77 | 78 | [mnist_transfer_cnn.py](mnist_transfer_cnn.py) 79 | Transfer learning toy example. 80 | 81 | [neural_doodle.py](neural_doodle.py) 82 | Neural doodle. 83 | 84 | [neural_style_transfer.py](neural_style_transfer.py) 85 | Neural style transfer. 86 | 87 | [pretrained_word_embeddings.py](pretrained_word_embeddings.py) 88 | Loads pre-trained word embeddings (GloVe embeddings) into a frozen Keras Embedding layer, and uses it to train a text classification model on the 20 Newsgroup dataset. 89 | 90 | [reuters_mlp.py](reuters_mlp.py) 91 | Trains and evaluate a simple MLP on the Reuters newswire topic classification task. 92 | 93 | [stateful_lstm.py](stateful_lstm.py) 94 | Demonstrates how to use stateful RNNs to model long sequences efficiently. 95 | 96 | [variational_autoencoder.py](variational_autoencoder.py) 97 | Demonstrates how to build a variational autoencoder. 98 | 99 | [variational_autoencoder_deconv.py](variational_autoencoder_deconv.py) 100 | Demonstrates how to build a variational autoencoder with Keras using deconvolution layers. 101 | -------------------------------------------------------------------------------- /examples/antirectifier.py: -------------------------------------------------------------------------------- 1 | '''The example demonstrates how to write custom layers for Keras. 2 | 3 | We build a custom activation layer called 'Antirectifier', 4 | which modifies the shape of the tensor that passes through it. 5 | We need to specify two methods: `get_output_shape_for` and `call`. 6 | 7 | Note that the same result can also be achieved via a Lambda layer. 8 | 9 | Because our custom layer is written with primitives from the Keras 10 | backend (`K`), our code can run both on TensorFlow and Theano. 11 | ''' 12 | 13 | from __future__ import print_function 14 | from keras.models import Sequential 15 | from keras.layers import Dense, Dropout, Layer, Activation 16 | from keras.datasets import mnist 17 | from keras import backend as K 18 | from keras.utils import np_utils 19 | 20 | 21 | class Antirectifier(Layer): 22 | '''This is the combination of a sample-wise 23 | L2 normalization with the concatenation of the 24 | positive part of the input with the negative part 25 | of the input. The result is a tensor of samples that are 26 | twice as large as the input samples. 27 | 28 | It can be used in place of a ReLU. 29 | 30 | # Input shape 31 | 2D tensor of shape (samples, n) 32 | 33 | # Output shape 34 | 2D tensor of shape (samples, 2*n) 35 | 36 | # Theoretical justification 37 | When applying ReLU, assuming that the distribution 38 | of the previous output is approximately centered around 0., 39 | you are discarding half of your input. This is inefficient. 40 | 41 | Antirectifier allows to return all-positive outputs like ReLU, 42 | without discarding any data. 43 | 44 | Tests on MNIST show that Antirectifier allows to train networks 45 | with twice less parameters yet with comparable 46 | classification accuracy as an equivalent ReLU-based network. 47 | ''' 48 | 49 | def get_output_shape_for(self, input_shape): 50 | shape = list(input_shape) 51 | assert len(shape) == 2 # only valid for 2D tensors 52 | shape[-1] *= 2 53 | return tuple(shape) 54 | 55 | def call(self, x, mask=None): 56 | x -= K.mean(x, axis=1, keepdims=True) 57 | x = K.l2_normalize(x, axis=1) 58 | pos = K.relu(x) 59 | neg = K.relu(-x) 60 | return K.concatenate([pos, neg], axis=1) 61 | 62 | # global parameters 63 | batch_size = 128 64 | nb_classes = 10 65 | nb_epoch = 40 66 | 67 | # the data, shuffled and split between train and test sets 68 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 69 | 70 | X_train = X_train.reshape(60000, 784) 71 | X_test = X_test.reshape(10000, 784) 72 | X_train = X_train.astype('float32') 73 | X_test = X_test.astype('float32') 74 | X_train /= 255 75 | X_test /= 255 76 | print(X_train.shape[0], 'train samples') 77 | print(X_test.shape[0], 'test samples') 78 | 79 | # convert class vectors to binary class matrices 80 | Y_train = np_utils.to_categorical(y_train, nb_classes) 81 | Y_test = np_utils.to_categorical(y_test, nb_classes) 82 | 83 | # build the model 84 | model = Sequential() 85 | model.add(Dense(256, input_shape=(784,))) 86 | model.add(Antirectifier()) 87 | model.add(Dropout(0.1)) 88 | model.add(Dense(256)) 89 | model.add(Antirectifier()) 90 | model.add(Dropout(0.1)) 91 | model.add(Dense(10)) 92 | model.add(Activation('softmax')) 93 | 94 | # compile the model 95 | model.compile(loss='categorical_crossentropy', 96 | optimizer='rmsprop', 97 | metrics=['accuracy']) 98 | 99 | # train the model 100 | model.fit(X_train, Y_train, 101 | batch_size=batch_size, nb_epoch=nb_epoch, 102 | verbose=1, validation_data=(X_test, Y_test)) 103 | 104 | # next, compare with an equivalent network 105 | # with2x bigger Dense layers and ReLU 106 | -------------------------------------------------------------------------------- /examples/cifar10_cnn.py: -------------------------------------------------------------------------------- 1 | '''Train a simple deep CNN on the CIFAR10 small images dataset. 2 | 3 | GPU run command with Theano backend (with TensorFlow, the GPU is automatically used): 4 | THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python cifar10_cnn.py 5 | 6 | It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs. 7 | (it's still underfitting at that point, though). 8 | ''' 9 | 10 | from __future__ import print_function 11 | from keras.datasets import cifar10 12 | from keras.preprocessing.image import ImageDataGenerator 13 | from keras.models import Sequential 14 | from keras.layers import Dense, Dropout, Activation, Flatten 15 | from keras.layers import Convolution2D, MaxPooling2D 16 | from keras.utils import np_utils 17 | 18 | batch_size = 32 19 | nb_classes = 10 20 | nb_epoch = 200 21 | data_augmentation = True 22 | 23 | # input image dimensions 24 | img_rows, img_cols = 32, 32 25 | # The CIFAR10 images are RGB. 26 | img_channels = 3 27 | 28 | # The data, shuffled and split between train and test sets: 29 | (X_train, y_train), (X_test, y_test) = cifar10.load_data() 30 | print('X_train shape:', X_train.shape) 31 | print(X_train.shape[0], 'train samples') 32 | print(X_test.shape[0], 'test samples') 33 | 34 | # Convert class vectors to binary class matrices. 35 | Y_train = np_utils.to_categorical(y_train, nb_classes) 36 | Y_test = np_utils.to_categorical(y_test, nb_classes) 37 | 38 | model = Sequential() 39 | 40 | model.add(Convolution2D(32, 3, 3, border_mode='same', 41 | input_shape=X_train.shape[1:])) 42 | model.add(Activation('relu')) 43 | model.add(Convolution2D(32, 3, 3)) 44 | model.add(Activation('relu')) 45 | model.add(MaxPooling2D(pool_size=(2, 2))) 46 | model.add(Dropout(0.25)) 47 | 48 | model.add(Convolution2D(64, 3, 3, border_mode='same')) 49 | model.add(Activation('relu')) 50 | model.add(Convolution2D(64, 3, 3)) 51 | model.add(Activation('relu')) 52 | model.add(MaxPooling2D(pool_size=(2, 2))) 53 | model.add(Dropout(0.25)) 54 | 55 | model.add(Flatten()) 56 | model.add(Dense(512)) 57 | model.add(Activation('relu')) 58 | model.add(Dropout(0.5)) 59 | model.add(Dense(nb_classes)) 60 | model.add(Activation('softmax')) 61 | 62 | # Let's train the model using RMSprop 63 | model.compile(loss='categorical_crossentropy', 64 | optimizer='rmsprop', 65 | metrics=['accuracy']) 66 | 67 | X_train = X_train.astype('float32') 68 | X_test = X_test.astype('float32') 69 | X_train /= 255 70 | X_test /= 255 71 | 72 | if not data_augmentation: 73 | print('Not using data augmentation.') 74 | model.fit(X_train, Y_train, 75 | batch_size=batch_size, 76 | nb_epoch=nb_epoch, 77 | validation_data=(X_test, Y_test), 78 | shuffle=True) 79 | else: 80 | print('Using real-time data augmentation.') 81 | # This will do preprocessing and realtime data augmentation: 82 | datagen = ImageDataGenerator( 83 | featurewise_center=False, # set input mean to 0 over the dataset 84 | samplewise_center=False, # set each sample mean to 0 85 | featurewise_std_normalization=False, # divide inputs by std of the dataset 86 | samplewise_std_normalization=False, # divide each input by its std 87 | zca_whitening=False, # apply ZCA whitening 88 | rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) 89 | width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) 90 | height_shift_range=0.1, # randomly shift images vertically (fraction of total height) 91 | horizontal_flip=True, # randomly flip images 92 | vertical_flip=False) # randomly flip images 93 | 94 | # Compute quantities required for featurewise normalization 95 | # (std, mean, and principal components if ZCA whitening is applied). 96 | datagen.fit(X_train) 97 | 98 | # Fit the model on the batches generated by datagen.flow(). 99 | model.fit_generator(datagen.flow(X_train, Y_train, 100 | batch_size=batch_size), 101 | samples_per_epoch=X_train.shape[0], 102 | nb_epoch=nb_epoch, 103 | validation_data=(X_test, Y_test)) 104 | -------------------------------------------------------------------------------- /examples/conv_filter_visualization.py: -------------------------------------------------------------------------------- 1 | '''Visualization of the filters of VGG16, via gradient ascent in input space. 2 | 3 | This script can run on CPU in a few minutes (with the TensorFlow backend). 4 | 5 | Results example: http://i.imgur.com/4nj4KjN.jpg 6 | ''' 7 | from __future__ import print_function 8 | from scipy.misc import imsave 9 | import numpy as np 10 | import time 11 | from keras.applications import vgg16 12 | from keras import backend as K 13 | 14 | # dimensions of the generated pictures for each filter. 15 | img_width = 128 16 | img_height = 128 17 | 18 | # the name of the layer we want to visualize 19 | # (see model definition at keras/applications/vgg16.py) 20 | layer_name = 'block5_conv1' 21 | 22 | # util function to convert a tensor into a valid image 23 | 24 | 25 | def deprocess_image(x): 26 | # normalize tensor: center on 0., ensure std is 0.1 27 | x -= x.mean() 28 | x /= (x.std() + 1e-5) 29 | x *= 0.1 30 | 31 | # clip to [0, 1] 32 | x += 0.5 33 | x = np.clip(x, 0, 1) 34 | 35 | # convert to RGB array 36 | x *= 255 37 | if K.image_dim_ordering() == 'th': 38 | x = x.transpose((1, 2, 0)) 39 | x = np.clip(x, 0, 255).astype('uint8') 40 | return x 41 | 42 | # build the VGG16 network with ImageNet weights 43 | model = vgg16.VGG16(weights='imagenet', include_top=False) 44 | print('Model loaded.') 45 | 46 | model.summary() 47 | 48 | # this is the placeholder for the input images 49 | input_img = model.input 50 | 51 | # get the symbolic outputs of each "key" layer (we gave them unique names). 52 | layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) 53 | 54 | 55 | def normalize(x): 56 | # utility function to normalize a tensor by its L2 norm 57 | return x / (K.sqrt(K.mean(K.square(x))) + 1e-5) 58 | 59 | 60 | kept_filters = [] 61 | for filter_index in range(0, 200): 62 | # we only scan through the first 200 filters, 63 | # but there are actually 512 of them 64 | print('Processing filter %d' % filter_index) 65 | start_time = time.time() 66 | 67 | # we build a loss function that maximizes the activation 68 | # of the nth filter of the layer considered 69 | layer_output = layer_dict[layer_name].output 70 | if K.image_dim_ordering() == 'th': 71 | loss = K.mean(layer_output[:, filter_index, :, :]) 72 | else: 73 | loss = K.mean(layer_output[:, :, :, filter_index]) 74 | 75 | # we compute the gradient of the input picture wrt this loss 76 | grads = K.gradients(loss, input_img)[0] 77 | 78 | # normalization trick: we normalize the gradient 79 | grads = normalize(grads) 80 | 81 | # this function returns the loss and grads given the input picture 82 | iterate = K.function([input_img], [loss, grads]) 83 | 84 | # step size for gradient ascent 85 | step = 1. 86 | 87 | # we start from a gray image with some random noise 88 | if K.image_dim_ordering() == 'th': 89 | input_img_data = np.random.random((1, 3, img_width, img_height)) 90 | else: 91 | input_img_data = np.random.random((1, img_width, img_height, 3)) 92 | input_img_data = (input_img_data - 0.5) * 20 + 128 93 | 94 | # we run gradient ascent for 20 steps 95 | for i in range(20): 96 | loss_value, grads_value = iterate([input_img_data]) 97 | input_img_data += grads_value * step 98 | 99 | print('Current loss value:', loss_value) 100 | if loss_value <= 0.: 101 | # some filters get stuck to 0, we can skip them 102 | break 103 | 104 | # decode the resulting input image 105 | if loss_value > 0: 106 | img = deprocess_image(input_img_data[0]) 107 | kept_filters.append((img, loss_value)) 108 | end_time = time.time() 109 | print('Filter %d processed in %ds' % (filter_index, end_time - start_time)) 110 | 111 | # we will stich the best 64 filters on a 8 x 8 grid. 112 | n = 8 113 | 114 | # the filters that have the highest loss are assumed to be better-looking. 115 | # we will only keep the top 64 filters. 116 | kept_filters.sort(key=lambda x: x[1], reverse=True) 117 | kept_filters = kept_filters[:n * n] 118 | 119 | # build a black picture with enough space for 120 | # our 8 x 8 filters of size 128 x 128, with a 5px margin in between 121 | margin = 5 122 | width = n * img_width + (n - 1) * margin 123 | height = n * img_height + (n - 1) * margin 124 | stitched_filters = np.zeros((width, height, 3)) 125 | 126 | # fill the picture with our saved filters 127 | for i in range(n): 128 | for j in range(n): 129 | img, loss = kept_filters[i * n + j] 130 | stitched_filters[(img_width + margin) * i: (img_width + margin) * i + img_width, 131 | (img_height + margin) * j: (img_height + margin) * j + img_height, :] = img 132 | 133 | # save the result to disk 134 | imsave('stitched_filters_%dx%d.png' % (n, n), stitched_filters) 135 | -------------------------------------------------------------------------------- /examples/imdb_bidirectional_lstm.py: -------------------------------------------------------------------------------- 1 | '''Train a Bidirectional LSTM on the IMDB sentiment classification task. 2 | 3 | Output after 4 epochs on CPU: ~0.8146 4 | Time per epoch on CPU (Core i7): ~150s. 5 | ''' 6 | 7 | from __future__ import print_function 8 | import numpy as np 9 | np.random.seed(1337) # for reproducibility 10 | 11 | from keras.preprocessing import sequence 12 | from keras.models import Sequential 13 | from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional 14 | from keras.datasets import imdb 15 | 16 | 17 | max_features = 20000 18 | maxlen = 100 # cut texts after this number of words (among top max_features most common words) 19 | batch_size = 32 20 | 21 | print('Loading data...') 22 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) 23 | print(len(X_train), 'train sequences') 24 | print(len(X_test), 'test sequences') 25 | 26 | print("Pad sequences (samples x time)") 27 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 28 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 29 | print('X_train shape:', X_train.shape) 30 | print('X_test shape:', X_test.shape) 31 | y_train = np.array(y_train) 32 | y_test = np.array(y_test) 33 | 34 | model = Sequential() 35 | model.add(Embedding(max_features, 128, input_length=maxlen)) 36 | model.add(Bidirectional(LSTM(64))) 37 | model.add(Dropout(0.5)) 38 | model.add(Dense(1, activation='sigmoid')) 39 | 40 | # try using different optimizers and different optimizer configs 41 | model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) 42 | 43 | print('Train...') 44 | model.fit(X_train, y_train, 45 | batch_size=batch_size, 46 | nb_epoch=4, 47 | validation_data=[X_test, y_test]) 48 | -------------------------------------------------------------------------------- /examples/imdb_cnn.py: -------------------------------------------------------------------------------- 1 | '''This example demonstrates the use of Convolution1D for text classification. 2 | 3 | Gets to 0.89 test accuracy after 2 epochs. 4 | 90s/epoch on Intel i5 2.4Ghz CPU. 5 | 10s/epoch on Tesla K40 GPU. 6 | 7 | ''' 8 | 9 | from __future__ import print_function 10 | import numpy as np 11 | np.random.seed(1337) # for reproducibility 12 | 13 | from keras.preprocessing import sequence 14 | from keras.models import Sequential 15 | from keras.layers import Dense, Dropout, Activation 16 | from keras.layers import Embedding 17 | from keras.layers import Convolution1D, GlobalMaxPooling1D 18 | from keras.datasets import imdb 19 | 20 | 21 | # set parameters: 22 | max_features = 5000 23 | maxlen = 400 24 | batch_size = 32 25 | embedding_dims = 50 26 | nb_filter = 250 27 | filter_length = 3 28 | hidden_dims = 250 29 | nb_epoch = 2 30 | 31 | print('Loading data...') 32 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) 33 | print(len(X_train), 'train sequences') 34 | print(len(X_test), 'test sequences') 35 | 36 | print('Pad sequences (samples x time)') 37 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 38 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 39 | print('X_train shape:', X_train.shape) 40 | print('X_test shape:', X_test.shape) 41 | 42 | print('Build model...') 43 | model = Sequential() 44 | 45 | # we start off with an efficient embedding layer which maps 46 | # our vocab indices into embedding_dims dimensions 47 | model.add(Embedding(max_features, 48 | embedding_dims, 49 | input_length=maxlen, 50 | dropout=0.2)) 51 | 52 | # we add a Convolution1D, which will learn nb_filter 53 | # word group filters of size filter_length: 54 | model.add(Convolution1D(nb_filter=nb_filter, 55 | filter_length=filter_length, 56 | border_mode='valid', 57 | activation='relu', 58 | subsample_length=1)) 59 | # we use max pooling: 60 | model.add(GlobalMaxPooling1D()) 61 | 62 | # We add a vanilla hidden layer: 63 | model.add(Dense(hidden_dims)) 64 | model.add(Dropout(0.2)) 65 | model.add(Activation('relu')) 66 | 67 | # We project onto a single unit output layer, and squash it with a sigmoid: 68 | model.add(Dense(1)) 69 | model.add(Activation('sigmoid')) 70 | 71 | model.compile(loss='binary_crossentropy', 72 | optimizer='adam', 73 | metrics=['accuracy']) 74 | model.fit(X_train, y_train, 75 | batch_size=batch_size, 76 | nb_epoch=nb_epoch, 77 | validation_data=(X_test, y_test)) 78 | -------------------------------------------------------------------------------- /examples/imdb_cnn_lstm.py: -------------------------------------------------------------------------------- 1 | '''Train a recurrent convolutional network on the IMDB sentiment 2 | classification task. 3 | 4 | Gets to 0.8498 test accuracy after 2 epochs. 41s/epoch on K520 GPU. 5 | ''' 6 | from __future__ import print_function 7 | import numpy as np 8 | np.random.seed(1337) # for reproducibility 9 | 10 | from keras.preprocessing import sequence 11 | from keras.models import Sequential 12 | from keras.layers import Dense, Dropout, Activation 13 | from keras.layers import Embedding 14 | from keras.layers import LSTM 15 | from keras.layers import Convolution1D, MaxPooling1D 16 | from keras.datasets import imdb 17 | 18 | 19 | # Embedding 20 | max_features = 20000 21 | maxlen = 100 22 | embedding_size = 128 23 | 24 | # Convolution 25 | filter_length = 5 26 | nb_filter = 64 27 | pool_length = 4 28 | 29 | # LSTM 30 | lstm_output_size = 70 31 | 32 | # Training 33 | batch_size = 30 34 | nb_epoch = 2 35 | 36 | ''' 37 | Note: 38 | batch_size is highly sensitive. 39 | Only 2 epochs are needed as the dataset is very small. 40 | ''' 41 | 42 | print('Loading data...') 43 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) 44 | print(len(X_train), 'train sequences') 45 | print(len(X_test), 'test sequences') 46 | 47 | print('Pad sequences (samples x time)') 48 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 49 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 50 | print('X_train shape:', X_train.shape) 51 | print('X_test shape:', X_test.shape) 52 | 53 | print('Build model...') 54 | 55 | model = Sequential() 56 | model.add(Embedding(max_features, embedding_size, input_length=maxlen)) 57 | model.add(Dropout(0.25)) 58 | model.add(Convolution1D(nb_filter=nb_filter, 59 | filter_length=filter_length, 60 | border_mode='valid', 61 | activation='relu', 62 | subsample_length=1)) 63 | model.add(MaxPooling1D(pool_length=pool_length)) 64 | model.add(LSTM(lstm_output_size)) 65 | model.add(Dense(1)) 66 | model.add(Activation('sigmoid')) 67 | 68 | model.compile(loss='binary_crossentropy', 69 | optimizer='adam', 70 | metrics=['accuracy']) 71 | 72 | print('Train...') 73 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, 74 | validation_data=(X_test, y_test)) 75 | score, acc = model.evaluate(X_test, y_test, batch_size=batch_size) 76 | print('Test score:', score) 77 | print('Test accuracy:', acc) 78 | -------------------------------------------------------------------------------- /examples/imdb_lstm.py: -------------------------------------------------------------------------------- 1 | '''Trains a LSTM on the IMDB sentiment classification task. 2 | The dataset is actually too small for LSTM to be of any advantage 3 | compared to simpler, much faster methods such as TF-IDF + LogReg. 4 | Notes: 5 | 6 | - RNNs are tricky. Choice of batch size is important, 7 | choice of loss and optimizer is critical, etc. 8 | Some configurations won't converge. 9 | 10 | - LSTM loss decrease patterns during training can be quite different 11 | from what you see with CNNs/MLPs/etc. 12 | ''' 13 | from __future__ import print_function 14 | import numpy as np 15 | np.random.seed(1337) # for reproducibility 16 | 17 | from keras.preprocessing import sequence 18 | from keras.models import Sequential 19 | from keras.layers import Dense, Activation, Embedding 20 | from keras.layers import LSTM 21 | from keras.datasets import imdb 22 | 23 | max_features = 20000 24 | maxlen = 80 # cut texts after this number of words (among top max_features most common words) 25 | batch_size = 32 26 | 27 | print('Loading data...') 28 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) 29 | print(len(X_train), 'train sequences') 30 | print(len(X_test), 'test sequences') 31 | 32 | print('Pad sequences (samples x time)') 33 | X_train = sequence.pad_sequences(X_train, maxlen=maxlen) 34 | X_test = sequence.pad_sequences(X_test, maxlen=maxlen) 35 | print('X_train shape:', X_train.shape) 36 | print('X_test shape:', X_test.shape) 37 | 38 | print('Build model...') 39 | model = Sequential() 40 | model.add(Embedding(max_features, 128, dropout=0.2)) 41 | model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2)) # try using a GRU instead, for fun 42 | model.add(Dense(1)) 43 | model.add(Activation('sigmoid')) 44 | 45 | # try using different optimizers and different optimizer configs 46 | model.compile(loss='binary_crossentropy', 47 | optimizer='adam', 48 | metrics=['accuracy']) 49 | 50 | print('Train...') 51 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15, 52 | validation_data=(X_test, y_test)) 53 | score, acc = model.evaluate(X_test, y_test, 54 | batch_size=batch_size) 55 | print('Test score:', score) 56 | print('Test accuracy:', acc) 57 | -------------------------------------------------------------------------------- /examples/lstm_benchmark.py: -------------------------------------------------------------------------------- 1 | '''Compare LSTM implementations on the IMDB sentiment classification task. 2 | 3 | consume_less='cpu' preprocesses input to the LSTM which typically results in 4 | faster computations at the expense of increased peak memory usage as the 5 | preprocessed input must be kept in memory. 6 | 7 | consume_less='mem' does away with the preprocessing, meaning that it might take 8 | a little longer, but should require less peak memory. 9 | 10 | consume_less='gpu' concatenates the input, output and forget gate's weights 11 | into one, large matrix, resulting in faster computation time as the GPU can 12 | utilize more cores, at the expense of reduced regularization because the same 13 | dropout is shared across the gates. 14 | 15 | Note that the relative performance of the different `consume_less` modes 16 | can vary depending on your device, your model and the size of your data. 17 | ''' 18 | 19 | import time 20 | import numpy as np 21 | import matplotlib.pyplot as plt 22 | 23 | from keras.preprocessing import sequence 24 | from keras.models import Sequential 25 | from keras.layers import Embedding, Dense, LSTM 26 | from keras.datasets import imdb 27 | 28 | max_features = 20000 29 | max_length = 80 30 | embedding_dim = 256 31 | batch_size = 128 32 | epochs = 10 33 | modes = ['cpu', 'mem', 'gpu'] 34 | 35 | print('Loading data...') 36 | (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) 37 | X_train = sequence.pad_sequences(X_train, max_length) 38 | X_test = sequence.pad_sequences(X_test, max_length) 39 | 40 | # Compile and train different models while meauring performance. 41 | results = [] 42 | for mode in modes: 43 | print('Testing mode: consume_less="{}"'.format(mode)) 44 | 45 | model = Sequential() 46 | model.add(Embedding(max_features, embedding_dim, input_length=max_length, dropout=0.2)) 47 | model.add(LSTM(embedding_dim, dropout_W=0.2, dropout_U=0.2, consume_less=mode)) 48 | model.add(Dense(1, activation='sigmoid')) 49 | model.compile(loss='binary_crossentropy', 50 | optimizer='adam', 51 | metrics=['accuracy']) 52 | 53 | start_time = time.time() 54 | history = model.fit(X_train, y_train, 55 | batch_size=batch_size, 56 | nb_epoch=epochs, 57 | validation_data=(X_test, y_test)) 58 | average_time_per_epoch = (time.time() - start_time) / epochs 59 | 60 | results.append((history, average_time_per_epoch)) 61 | 62 | # Compare models' accuracy, loss and elapsed time per epoch. 63 | plt.style.use('ggplot') 64 | ax1 = plt.subplot2grid((2, 2), (0, 0)) 65 | ax1.set_title('Accuracy') 66 | ax1.set_ylabel('Validation Accuracy') 67 | ax1.set_xlabel('Epochs') 68 | ax2 = plt.subplot2grid((2, 2), (1, 0)) 69 | ax2.set_title('Loss') 70 | ax2.set_ylabel('Validation Loss') 71 | ax2.set_xlabel('Epochs') 72 | ax3 = plt.subplot2grid((2, 2), (0, 1), rowspan=2) 73 | ax3.set_title('Time') 74 | ax3.set_ylabel('Seconds') 75 | for mode, result in zip(modes, results): 76 | ax1.plot(result[0].epoch, result[0].history['val_acc'], label=mode) 77 | ax2.plot(result[0].epoch, result[0].history['val_loss'], label=mode) 78 | ax1.legend() 79 | ax2.legend() 80 | ax3.bar(np.arange(len(results)), [x[1] for x in results], 81 | tick_label=modes, align='center') 82 | plt.tight_layout() 83 | plt.show() 84 | -------------------------------------------------------------------------------- /examples/lstm_text_generation.py: -------------------------------------------------------------------------------- 1 | '''Example script to generate text from Nietzsche's writings. 2 | 3 | At least 20 epochs are required before the generated text 4 | starts sounding coherent. 5 | 6 | It is recommended to run this script on GPU, as recurrent 7 | networks are quite computationally intensive. 8 | 9 | If you try this script on new data, make sure your corpus 10 | has at least ~100k characters. ~1M is better. 11 | ''' 12 | 13 | from __future__ import print_function 14 | from keras.models import Sequential 15 | from keras.layers import Dense, Activation 16 | from keras.layers import LSTM 17 | from keras.optimizers import RMSprop 18 | from keras.utils.data_utils import get_file 19 | import numpy as np 20 | import random 21 | import sys 22 | 23 | path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt") 24 | text = open(path).read().lower() 25 | print('corpus length:', len(text)) 26 | 27 | chars = sorted(list(set(text))) 28 | print('total chars:', len(chars)) 29 | char_indices = dict((c, i) for i, c in enumerate(chars)) 30 | indices_char = dict((i, c) for i, c in enumerate(chars)) 31 | 32 | # cut the text in semi-redundant sequences of maxlen characters 33 | maxlen = 40 34 | step = 3 35 | sentences = [] 36 | next_chars = [] 37 | for i in range(0, len(text) - maxlen, step): 38 | sentences.append(text[i: i + maxlen]) 39 | next_chars.append(text[i + maxlen]) 40 | print('nb sequences:', len(sentences)) 41 | 42 | print('Vectorization...') 43 | X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) 44 | y = np.zeros((len(sentences), len(chars)), dtype=np.bool) 45 | for i, sentence in enumerate(sentences): 46 | for t, char in enumerate(sentence): 47 | X[i, t, char_indices[char]] = 1 48 | y[i, char_indices[next_chars[i]]] = 1 49 | 50 | 51 | # build the model: a single LSTM 52 | print('Build model...') 53 | model = Sequential() 54 | model.add(LSTM(128, input_shape=(maxlen, len(chars)))) 55 | model.add(Dense(len(chars))) 56 | model.add(Activation('softmax')) 57 | 58 | optimizer = RMSprop(lr=0.01) 59 | model.compile(loss='categorical_crossentropy', optimizer=optimizer) 60 | 61 | 62 | def sample(preds, temperature=1.0): 63 | # helper function to sample an index from a probability array 64 | preds = np.asarray(preds).astype('float64') 65 | preds = np.log(preds) / temperature 66 | exp_preds = np.exp(preds) 67 | preds = exp_preds / np.sum(exp_preds) 68 | probas = np.random.multinomial(1, preds, 1) 69 | return np.argmax(probas) 70 | 71 | # train the model, output generated text after each iteration 72 | for iteration in range(1, 60): 73 | print() 74 | print('-' * 50) 75 | print('Iteration', iteration) 76 | model.fit(X, y, batch_size=128, nb_epoch=1) 77 | 78 | start_index = random.randint(0, len(text) - maxlen - 1) 79 | 80 | for diversity in [0.2, 0.5, 1.0, 1.2]: 81 | print() 82 | print('----- diversity:', diversity) 83 | 84 | generated = '' 85 | sentence = text[start_index: start_index + maxlen] 86 | generated += sentence 87 | print('----- Generating with seed: "' + sentence + '"') 88 | sys.stdout.write(generated) 89 | 90 | for i in range(400): 91 | x = np.zeros((1, maxlen, len(chars))) 92 | for t, char in enumerate(sentence): 93 | x[0, t, char_indices[char]] = 1. 94 | 95 | preds = model.predict(x, verbose=0)[0] 96 | next_index = sample(preds, diversity) 97 | next_char = indices_char[next_index] 98 | 99 | generated += next_char 100 | sentence = sentence[1:] + next_char 101 | 102 | sys.stdout.write(next_char) 103 | sys.stdout.flush() 104 | print() 105 | -------------------------------------------------------------------------------- /examples/mnist_cnn.py: -------------------------------------------------------------------------------- 1 | '''Trains a simple convnet on the MNIST dataset. 2 | 3 | Gets to 99.25% test accuracy after 12 epochs 4 | (there is still a lot of margin for parameter tuning). 5 | 16 seconds per epoch on a GRID K520 GPU. 6 | ''' 7 | 8 | from __future__ import print_function 9 | import numpy as np 10 | np.random.seed(1337) # for reproducibility 11 | 12 | from keras.datasets import mnist 13 | from keras.models import Sequential 14 | from keras.layers import Dense, Dropout, Activation, Flatten 15 | from keras.layers import Convolution2D, MaxPooling2D 16 | from keras.utils import np_utils 17 | from keras import backend as K 18 | 19 | batch_size = 128 20 | nb_classes = 10 21 | nb_epoch = 12 22 | 23 | # input image dimensions 24 | img_rows, img_cols = 28, 28 25 | # number of convolutional filters to use 26 | nb_filters = 32 27 | # size of pooling area for max pooling 28 | pool_size = (2, 2) 29 | # convolution kernel size 30 | kernel_size = (3, 3) 31 | 32 | # the data, shuffled and split between train and test sets 33 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 34 | 35 | if K.image_dim_ordering() == 'th': 36 | X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) 37 | X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols) 38 | input_shape = (1, img_rows, img_cols) 39 | else: 40 | X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) 41 | X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1) 42 | input_shape = (img_rows, img_cols, 1) 43 | 44 | X_train = X_train.astype('float32') 45 | X_test = X_test.astype('float32') 46 | X_train /= 255 47 | X_test /= 255 48 | print('X_train shape:', X_train.shape) 49 | print(X_train.shape[0], 'train samples') 50 | print(X_test.shape[0], 'test samples') 51 | 52 | # convert class vectors to binary class matrices 53 | Y_train = np_utils.to_categorical(y_train, nb_classes) 54 | Y_test = np_utils.to_categorical(y_test, nb_classes) 55 | 56 | model = Sequential() 57 | 58 | model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1], 59 | border_mode='valid', 60 | input_shape=input_shape)) 61 | model.add(Activation('relu')) 62 | model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1])) 63 | model.add(Activation('relu')) 64 | model.add(MaxPooling2D(pool_size=pool_size)) 65 | model.add(Dropout(0.25)) 66 | 67 | model.add(Flatten()) 68 | model.add(Dense(128)) 69 | model.add(Activation('relu')) 70 | model.add(Dropout(0.5)) 71 | model.add(Dense(nb_classes)) 72 | model.add(Activation('softmax')) 73 | 74 | model.compile(loss='categorical_crossentropy', 75 | optimizer='adadelta', 76 | metrics=['accuracy']) 77 | 78 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, 79 | verbose=1, validation_data=(X_test, Y_test)) 80 | score = model.evaluate(X_test, Y_test, verbose=0) 81 | print('Test score:', score[0]) 82 | print('Test accuracy:', score[1]) 83 | -------------------------------------------------------------------------------- /examples/mnist_hierarchical_rnn.py: -------------------------------------------------------------------------------- 1 | """This is an example of using Hierarchical RNN (HRNN) to classify MNIST digits. 2 | 3 | HRNNs can learn across multiple levels of temporal hiearchy over a complex sequence. 4 | Usually, the first recurrent layer of an HRNN encodes a sentence (e.g. of word vectors) 5 | into a sentence vector. The second recurrent layer then encodes a sequence of 6 | such vectors (encoded by the first layer) into a document vector. This 7 | document vector is considered to preserve both the word-level and 8 | sentence-level structure of the context. 9 | 10 | # References 11 | - [A Hierarchical Neural Autoencoder for Paragraphs and Documents](https://arxiv.org/abs/1506.01057) 12 | Encodes paragraphs and documents with HRNN. 13 | Results have shown that HRNN outperforms standard 14 | RNNs and may play some role in more sophisticated generation tasks like 15 | summarization or question answering. 16 | - [Hierarchical recurrent neural network for skeleton based action recognition](http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7298714) 17 | Achieved state-of-the-art results on skeleton based action recognition with 3 levels 18 | of bidirectional HRNN combined with fully connected layers. 19 | 20 | In the below MNIST example the first LSTM layer first encodes every 21 | column of pixels of shape (28, 1) to a column vector of shape (128,). The second LSTM 22 | layer encodes then these 28 column vectors of shape (28, 128) to a image vector 23 | representing the whole image. A final Dense layer is added for prediction. 24 | 25 | After 5 epochs: train acc: 0.9858, val acc: 0.9864 26 | """ 27 | from __future__ import print_function 28 | 29 | from keras.datasets import mnist 30 | from keras.models import Model 31 | from keras.layers import Input, Dense, TimeDistributed 32 | from keras.layers import LSTM 33 | from keras.utils import np_utils 34 | 35 | # Training parameters. 36 | batch_size = 32 37 | nb_classes = 10 38 | nb_epochs = 5 39 | 40 | # Embedding dimensions. 41 | row_hidden = 128 42 | col_hidden = 128 43 | 44 | # The data, shuffled and split between train and test sets. 45 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 46 | 47 | # Reshapes data to 4D for Hierarchical RNN. 48 | X_train = X_train.reshape(X_train.shape[0], 28, 28, 1) 49 | X_test = X_test.reshape(X_test.shape[0], 28, 28, 1) 50 | X_train = X_train.astype('float32') 51 | X_test = X_test.astype('float32') 52 | X_train /= 255 53 | X_test /= 255 54 | print('X_train shape:', X_train.shape) 55 | print(X_train.shape[0], 'train samples') 56 | print(X_test.shape[0], 'test samples') 57 | 58 | # Converts class vectors to binary class matrices. 59 | Y_train = np_utils.to_categorical(y_train, nb_classes) 60 | Y_test = np_utils.to_categorical(y_test, nb_classes) 61 | 62 | row, col, pixel = X_train.shape[1:] 63 | 64 | # 4D input. 65 | x = Input(shape=(row, col, pixel)) 66 | 67 | # Encodes a row of pixels using TimeDistributed Wrapper. 68 | encoded_rows = TimeDistributed(LSTM(output_dim=row_hidden))(x) 69 | 70 | # Encodes columns of encoded rows. 71 | encoded_columns = LSTM(col_hidden)(encoded_rows) 72 | 73 | # Final predictions and model. 74 | prediction = Dense(nb_classes, activation='softmax')(encoded_columns) 75 | model = Model(input=x, output=prediction) 76 | model.compile(loss='categorical_crossentropy', 77 | optimizer='rmsprop', 78 | metrics=['accuracy']) 79 | 80 | # Training. 81 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epochs, 82 | verbose=1, validation_data=(X_test, Y_test)) 83 | 84 | # Evaluation. 85 | scores = model.evaluate(X_test, Y_test, verbose=0) 86 | print('Test loss:', scores[0]) 87 | print('Test accuracy:', scores[1]) 88 | -------------------------------------------------------------------------------- /examples/mnist_irnn.py: -------------------------------------------------------------------------------- 1 | '''This is a reproduction of the IRNN experiment 2 | with pixel-by-pixel sequential MNIST in 3 | "A Simple Way to Initialize Recurrent Networks of Rectified Linear Units" 4 | by Quoc V. Le, Navdeep Jaitly, Geoffrey E. Hinton 5 | 6 | arXiv:1504.00941v2 [cs.NE] 7 Apr 2015 7 | http://arxiv.org/pdf/1504.00941v2.pdf 8 | 9 | Optimizer is replaced with RMSprop which yields more stable and steady 10 | improvement. 11 | 12 | Reaches 0.93 train/test accuracy after 900 epochs 13 | (which roughly corresponds to 1687500 steps in the original paper.) 14 | ''' 15 | 16 | from __future__ import print_function 17 | 18 | from keras.datasets import mnist 19 | from keras.models import Sequential 20 | from keras.layers import Dense, Activation 21 | from keras.layers import SimpleRNN 22 | from keras.initializations import normal, identity 23 | from keras.optimizers import RMSprop 24 | from keras.utils import np_utils 25 | 26 | batch_size = 32 27 | nb_classes = 10 28 | nb_epochs = 200 29 | hidden_units = 100 30 | 31 | learning_rate = 1e-6 32 | clip_norm = 1.0 33 | 34 | # the data, shuffled and split between train and test sets 35 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 36 | 37 | X_train = X_train.reshape(X_train.shape[0], -1, 1) 38 | X_test = X_test.reshape(X_test.shape[0], -1, 1) 39 | X_train = X_train.astype('float32') 40 | X_test = X_test.astype('float32') 41 | X_train /= 255 42 | X_test /= 255 43 | print('X_train shape:', X_train.shape) 44 | print(X_train.shape[0], 'train samples') 45 | print(X_test.shape[0], 'test samples') 46 | 47 | # convert class vectors to binary class matrices 48 | Y_train = np_utils.to_categorical(y_train, nb_classes) 49 | Y_test = np_utils.to_categorical(y_test, nb_classes) 50 | 51 | print('Evaluate IRNN...') 52 | model = Sequential() 53 | model.add(SimpleRNN(output_dim=hidden_units, 54 | init=lambda shape, name: normal(shape, scale=0.001, name=name), 55 | inner_init=lambda shape, name: identity(shape, scale=1.0, name=name), 56 | activation='relu', 57 | input_shape=X_train.shape[1:])) 58 | model.add(Dense(nb_classes)) 59 | model.add(Activation('softmax')) 60 | rmsprop = RMSprop(lr=learning_rate) 61 | model.compile(loss='categorical_crossentropy', 62 | optimizer=rmsprop, 63 | metrics=['accuracy']) 64 | 65 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epochs, 66 | verbose=1, validation_data=(X_test, Y_test)) 67 | 68 | scores = model.evaluate(X_test, Y_test, verbose=0) 69 | print('IRNN test score:', scores[0]) 70 | print('IRNN test accuracy:', scores[1]) 71 | -------------------------------------------------------------------------------- /examples/mnist_mlp.py: -------------------------------------------------------------------------------- 1 | '''Trains a simple deep NN on the MNIST dataset. 2 | 3 | Gets to 98.40% test accuracy after 20 epochs 4 | (there is *a lot* of margin for parameter tuning). 5 | 2 seconds per epoch on a K520 GPU. 6 | ''' 7 | 8 | from __future__ import print_function 9 | import numpy as np 10 | np.random.seed(1337) # for reproducibility 11 | 12 | from keras.datasets import mnist 13 | from keras.models import Sequential 14 | from keras.layers.core import Dense, Dropout, Activation 15 | from keras.optimizers import SGD 16 | from keras.utils import np_utils 17 | 18 | 19 | batch_size = 128 20 | nb_classes = 10 21 | nb_epoch = 20 22 | 23 | # the data, shuffled and split between train and test sets 24 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 25 | 26 | X_train = X_train.reshape(60000, 784) 27 | X_test = X_test.reshape(10000, 784) 28 | X_train = X_train.astype('float32') 29 | X_test = X_test.astype('float32') 30 | X_train /= 255 31 | X_test /= 255 32 | print(X_train.shape[0], 'train samples') 33 | print(X_test.shape[0], 'test samples') 34 | 35 | # convert class vectors to binary class matrices 36 | Y_train = np_utils.to_categorical(y_train, nb_classes) 37 | Y_test = np_utils.to_categorical(y_test, nb_classes) 38 | 39 | model = Sequential() 40 | model.add(Dense(512, input_shape=(784,))) 41 | model.add(Activation('relu')) 42 | model.add(Dropout(0.2)) 43 | model.add(Dense(512)) 44 | model.add(Activation('relu')) 45 | model.add(Dropout(0.2)) 46 | model.add(Dense(10)) 47 | model.add(Activation('softmax')) 48 | 49 | model.summary() 50 | 51 | model.compile(loss='categorical_crossentropy', 52 | optimizer=SGD(), 53 | metrics=['accuracy']) 54 | 55 | history = model.fit(X_train, Y_train, 56 | batch_size=batch_size, nb_epoch=nb_epoch, 57 | verbose=1, validation_data=(X_test, Y_test)) 58 | score = model.evaluate(X_test, Y_test, verbose=0) 59 | print('Test score:', score[0]) 60 | print('Test accuracy:', score[1]) 61 | -------------------------------------------------------------------------------- /examples/mnist_siamese_graph.py: -------------------------------------------------------------------------------- 1 | '''Train a Siamese MLP on pairs of digits from the MNIST dataset. 2 | 3 | It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the 4 | output of the shared network and by optimizing the contrastive loss (see paper 5 | for mode details). 6 | 7 | [1] "Dimensionality Reduction by Learning an Invariant Mapping" 8 | http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf 9 | 10 | Gets to 99.5% test accuracy after 20 epochs. 11 | 3 seconds per epoch on a Titan X GPU 12 | ''' 13 | from __future__ import absolute_import 14 | from __future__ import print_function 15 | import numpy as np 16 | np.random.seed(1337) # for reproducibility 17 | 18 | import random 19 | from keras.datasets import mnist 20 | from keras.models import Sequential, Model 21 | from keras.layers import Dense, Dropout, Input, Lambda 22 | from keras.optimizers import RMSprop 23 | from keras import backend as K 24 | 25 | 26 | def euclidean_distance(vects): 27 | x, y = vects 28 | return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True)) 29 | 30 | 31 | def eucl_dist_output_shape(shapes): 32 | shape1, shape2 = shapes 33 | return (shape1[0], 1) 34 | 35 | 36 | def contrastive_loss(y_true, y_pred): 37 | '''Contrastive loss from Hadsell-et-al.'06 38 | http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf 39 | ''' 40 | margin = 1 41 | return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0))) 42 | 43 | 44 | def create_pairs(x, digit_indices): 45 | '''Positive and negative pair creation. 46 | Alternates between positive and negative pairs. 47 | ''' 48 | pairs = [] 49 | labels = [] 50 | n = min([len(digit_indices[d]) for d in range(10)]) - 1 51 | for d in range(10): 52 | for i in range(n): 53 | z1, z2 = digit_indices[d][i], digit_indices[d][i + 1] 54 | pairs += [[x[z1], x[z2]]] 55 | inc = random.randrange(1, 10) 56 | dn = (d + inc) % 10 57 | z1, z2 = digit_indices[d][i], digit_indices[dn][i] 58 | pairs += [[x[z1], x[z2]]] 59 | labels += [1, 0] 60 | return np.array(pairs), np.array(labels) 61 | 62 | 63 | def create_base_network(input_dim): 64 | '''Base network to be shared (eq. to feature extraction). 65 | ''' 66 | seq = Sequential() 67 | seq.add(Dense(128, input_shape=(input_dim,), activation='relu')) 68 | seq.add(Dropout(0.1)) 69 | seq.add(Dense(128, activation='relu')) 70 | seq.add(Dropout(0.1)) 71 | seq.add(Dense(128, activation='relu')) 72 | return seq 73 | 74 | 75 | def compute_accuracy(predictions, labels): 76 | '''Compute classification accuracy with a fixed threshold on distances. 77 | ''' 78 | return labels[predictions.ravel() < 0.5].mean() 79 | 80 | 81 | # the data, shuffled and split between train and test sets 82 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 83 | X_train = X_train.reshape(60000, 784) 84 | X_test = X_test.reshape(10000, 784) 85 | X_train = X_train.astype('float32') 86 | X_test = X_test.astype('float32') 87 | X_train /= 255 88 | X_test /= 255 89 | input_dim = 784 90 | nb_epoch = 20 91 | 92 | # create training+test positive and negative pairs 93 | digit_indices = [np.where(y_train == i)[0] for i in range(10)] 94 | tr_pairs, tr_y = create_pairs(X_train, digit_indices) 95 | 96 | digit_indices = [np.where(y_test == i)[0] for i in range(10)] 97 | te_pairs, te_y = create_pairs(X_test, digit_indices) 98 | 99 | # network definition 100 | base_network = create_base_network(input_dim) 101 | 102 | input_a = Input(shape=(input_dim,)) 103 | input_b = Input(shape=(input_dim,)) 104 | 105 | # because we re-use the same instance `base_network`, 106 | # the weights of the network 107 | # will be shared across the two branches 108 | processed_a = base_network(input_a) 109 | processed_b = base_network(input_b) 110 | 111 | distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b]) 112 | 113 | model = Model(input=[input_a, input_b], output=distance) 114 | 115 | # train 116 | rms = RMSprop() 117 | model.compile(loss=contrastive_loss, optimizer=rms) 118 | model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y, 119 | validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y), 120 | batch_size=128, 121 | nb_epoch=nb_epoch) 122 | 123 | # compute final accuracy on training and test sets 124 | pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]]) 125 | tr_acc = compute_accuracy(pred, tr_y) 126 | pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]]) 127 | te_acc = compute_accuracy(pred, te_y) 128 | 129 | print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc)) 130 | print('* Accuracy on test set: %0.2f%%' % (100 * te_acc)) 131 | -------------------------------------------------------------------------------- /examples/mnist_sklearn_wrapper.py: -------------------------------------------------------------------------------- 1 | '''Example of how to use sklearn wrapper 2 | 3 | Builds simple CNN models on MNIST and uses sklearn's GridSearchCV to find best model 4 | ''' 5 | 6 | from __future__ import print_function 7 | import numpy as np 8 | np.random.seed(1337) # for reproducibility 9 | 10 | from keras.datasets import mnist 11 | from keras.models import Sequential 12 | from keras.layers import Dense, Dropout, Activation, Flatten 13 | from keras.layers import Convolution2D, MaxPooling2D 14 | from keras.utils import np_utils 15 | from keras.wrappers.scikit_learn import KerasClassifier 16 | from keras import backend as K 17 | from sklearn.grid_search import GridSearchCV 18 | 19 | 20 | nb_classes = 10 21 | 22 | # input image dimensions 23 | img_rows, img_cols = 28, 28 24 | 25 | # load training data and do basic data normalization 26 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 27 | 28 | if K.image_dim_ordering() == 'th': 29 | X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) 30 | X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols) 31 | input_shape = (1, img_rows, img_cols) 32 | else: 33 | X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) 34 | X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1) 35 | input_shape = (img_rows, img_cols, 1) 36 | 37 | X_train = X_train.astype('float32') 38 | X_test = X_test.astype('float32') 39 | X_train /= 255 40 | X_test /= 255 41 | 42 | # convert class vectors to binary class matrices 43 | y_train = np_utils.to_categorical(y_train, nb_classes) 44 | y_test = np_utils.to_categorical(y_test, nb_classes) 45 | 46 | 47 | def make_model(dense_layer_sizes, nb_filters, nb_conv, nb_pool): 48 | '''Creates model comprised of 2 convolutional layers followed by dense layers 49 | 50 | dense_layer_sizes: List of layer sizes. This list has one number for each layer 51 | nb_filters: Number of convolutional filters in each convolutional layer 52 | nb_conv: Convolutional kernel size 53 | nb_pool: Size of pooling area for max pooling 54 | ''' 55 | 56 | model = Sequential() 57 | 58 | model.add(Convolution2D(nb_filters, nb_conv, nb_conv, 59 | border_mode='valid', 60 | input_shape=input_shape)) 61 | model.add(Activation('relu')) 62 | model.add(Convolution2D(nb_filters, nb_conv, nb_conv)) 63 | model.add(Activation('relu')) 64 | model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) 65 | model.add(Dropout(0.25)) 66 | 67 | model.add(Flatten()) 68 | for layer_size in dense_layer_sizes: 69 | model.add(Dense(layer_size)) 70 | model.add(Activation('relu')) 71 | model.add(Dropout(0.5)) 72 | model.add(Dense(nb_classes)) 73 | model.add(Activation('softmax')) 74 | 75 | model.compile(loss='categorical_crossentropy', 76 | optimizer='adadelta', 77 | metrics=['accuracy']) 78 | 79 | return model 80 | 81 | dense_size_candidates = [[32], [64], [32, 32], [64, 64]] 82 | my_classifier = KerasClassifier(make_model, batch_size=32) 83 | validator = GridSearchCV(my_classifier, 84 | param_grid={'dense_layer_sizes': dense_size_candidates, 85 | # nb_epoch is avail for tuning even when not 86 | # an argument to model building function 87 | 'nb_epoch': [3, 6], 88 | 'nb_filters': [8], 89 | 'nb_conv': [3], 90 | 'nb_pool': [2]}, 91 | scoring='log_loss', 92 | n_jobs=1) 93 | validator.fit(X_train, y_train) 94 | 95 | print('The parameters of the best model are: ') 96 | print(validator.best_params_) 97 | 98 | # validator.best_estimator_ returns sklearn-wrapped version of best model. 99 | # validator.best_estimator_.model returns the (unwrapped) keras model 100 | best_model = validator.best_estimator_.model 101 | metric_names = best_model.metrics_names 102 | metric_values = best_model.evaluate(X_test, y_test) 103 | for metric, value in zip(metric_names, metric_values): 104 | print(metric, ': ', value) 105 | -------------------------------------------------------------------------------- /examples/mnist_transfer_cnn.py: -------------------------------------------------------------------------------- 1 | '''Transfer learning toy example: 2 | 3 | 1- Train a simple convnet on the MNIST dataset the first 5 digits [0..4]. 4 | 2- Freeze convolutional layers and fine-tune dense layers 5 | for the classification of digits [5..9]. 6 | 7 | Run on GPU: THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python mnist_transfer_cnn.py 8 | 9 | Get to 99.8% test accuracy after 5 epochs 10 | for the first five digits classifier 11 | and 99.2% for the last five digits after transfer + fine-tuning. 12 | ''' 13 | 14 | from __future__ import print_function 15 | import numpy as np 16 | import datetime 17 | 18 | np.random.seed(1337) # for reproducibility 19 | 20 | from keras.datasets import mnist 21 | from keras.models import Sequential 22 | from keras.layers import Dense, Dropout, Activation, Flatten 23 | from keras.layers import Convolution2D, MaxPooling2D 24 | from keras.utils import np_utils 25 | from keras import backend as K 26 | 27 | now = datetime.datetime.now 28 | 29 | batch_size = 128 30 | nb_classes = 5 31 | nb_epoch = 5 32 | 33 | # input image dimensions 34 | img_rows, img_cols = 28, 28 35 | # number of convolutional filters to use 36 | nb_filters = 32 37 | # size of pooling area for max pooling 38 | pool_size = 2 39 | # convolution kernel size 40 | kernel_size = 3 41 | 42 | if K.image_dim_ordering() == 'th': 43 | input_shape = (1, img_rows, img_cols) 44 | else: 45 | input_shape = (img_rows, img_cols, 1) 46 | 47 | 48 | def train_model(model, train, test, nb_classes): 49 | X_train = train[0].reshape((train[0].shape[0],) + input_shape) 50 | X_test = test[0].reshape((test[0].shape[0],) + input_shape) 51 | X_train = X_train.astype('float32') 52 | X_test = X_test.astype('float32') 53 | X_train /= 255 54 | X_test /= 255 55 | print('X_train shape:', X_train.shape) 56 | print(X_train.shape[0], 'train samples') 57 | print(X_test.shape[0], 'test samples') 58 | 59 | # convert class vectors to binary class matrices 60 | Y_train = np_utils.to_categorical(train[1], nb_classes) 61 | Y_test = np_utils.to_categorical(test[1], nb_classes) 62 | 63 | model.compile(loss='categorical_crossentropy', 64 | optimizer='adadelta', 65 | metrics=['accuracy']) 66 | 67 | t = now() 68 | model.fit(X_train, Y_train, 69 | batch_size=batch_size, nb_epoch=nb_epoch, 70 | verbose=1, 71 | validation_data=(X_test, Y_test)) 72 | print('Training time: %s' % (now() - t)) 73 | score = model.evaluate(X_test, Y_test, verbose=0) 74 | print('Test score:', score[0]) 75 | print('Test accuracy:', score[1]) 76 | 77 | 78 | # the data, shuffled and split between train and test sets 79 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 80 | 81 | # create two datasets one with digits below 5 and one with 5 and above 82 | X_train_lt5 = X_train[y_train < 5] 83 | y_train_lt5 = y_train[y_train < 5] 84 | X_test_lt5 = X_test[y_test < 5] 85 | y_test_lt5 = y_test[y_test < 5] 86 | 87 | X_train_gte5 = X_train[y_train >= 5] 88 | y_train_gte5 = y_train[y_train >= 5] - 5 # make classes start at 0 for 89 | X_test_gte5 = X_test[y_test >= 5] # np_utils.to_categorical 90 | y_test_gte5 = y_test[y_test >= 5] - 5 91 | 92 | # define two groups of layers: feature (convolutions) and classification (dense) 93 | feature_layers = [ 94 | Convolution2D(nb_filters, kernel_size, kernel_size, 95 | border_mode='valid', 96 | input_shape=input_shape), 97 | Activation('relu'), 98 | Convolution2D(nb_filters, kernel_size, kernel_size), 99 | Activation('relu'), 100 | MaxPooling2D(pool_size=(pool_size, pool_size)), 101 | Dropout(0.25), 102 | Flatten(), 103 | ] 104 | classification_layers = [ 105 | Dense(128), 106 | Activation('relu'), 107 | Dropout(0.5), 108 | Dense(nb_classes), 109 | Activation('softmax') 110 | ] 111 | 112 | # create complete model 113 | model = Sequential(feature_layers + classification_layers) 114 | 115 | # train model for 5-digit classification [0..4] 116 | train_model(model, 117 | (X_train_lt5, y_train_lt5), 118 | (X_test_lt5, y_test_lt5), nb_classes) 119 | 120 | # freeze feature layers and rebuild model 121 | for l in feature_layers: 122 | l.trainable = False 123 | 124 | # transfer: train dense layers for new classification task [5..9] 125 | train_model(model, 126 | (X_train_gte5, y_train_gte5), 127 | (X_test_gte5, y_test_gte5), nb_classes) 128 | -------------------------------------------------------------------------------- /examples/reuters_mlp.py: -------------------------------------------------------------------------------- 1 | '''Trains and evaluate a simple MLP 2 | on the Reuters newswire topic classification task. 3 | ''' 4 | 5 | from __future__ import print_function 6 | import numpy as np 7 | np.random.seed(1337) # for reproducibility 8 | 9 | from keras.datasets import reuters 10 | from keras.models import Sequential 11 | from keras.layers import Dense, Dropout, Activation 12 | from keras.utils import np_utils 13 | from keras.preprocessing.text import Tokenizer 14 | 15 | max_words = 1000 16 | batch_size = 32 17 | nb_epoch = 5 18 | 19 | print('Loading data...') 20 | (X_train, y_train), (X_test, y_test) = reuters.load_data(nb_words=max_words, test_split=0.2) 21 | print(len(X_train), 'train sequences') 22 | print(len(X_test), 'test sequences') 23 | 24 | nb_classes = np.max(y_train) + 1 25 | print(nb_classes, 'classes') 26 | 27 | print('Vectorizing sequence data...') 28 | tokenizer = Tokenizer(nb_words=max_words) 29 | X_train = tokenizer.sequences_to_matrix(X_train, mode='binary') 30 | X_test = tokenizer.sequences_to_matrix(X_test, mode='binary') 31 | print('X_train shape:', X_train.shape) 32 | print('X_test shape:', X_test.shape) 33 | 34 | print('Convert class vector to binary class matrix (for use with categorical_crossentropy)') 35 | Y_train = np_utils.to_categorical(y_train, nb_classes) 36 | Y_test = np_utils.to_categorical(y_test, nb_classes) 37 | print('Y_train shape:', Y_train.shape) 38 | print('Y_test shape:', Y_test.shape) 39 | 40 | print('Building model...') 41 | model = Sequential() 42 | model.add(Dense(512, input_shape=(max_words,))) 43 | model.add(Activation('relu')) 44 | model.add(Dropout(0.5)) 45 | model.add(Dense(nb_classes)) 46 | model.add(Activation('softmax')) 47 | 48 | model.compile(loss='categorical_crossentropy', 49 | optimizer='adam', 50 | metrics=['accuracy']) 51 | 52 | history = model.fit(X_train, Y_train, 53 | nb_epoch=nb_epoch, batch_size=batch_size, 54 | verbose=1, validation_split=0.1) 55 | score = model.evaluate(X_test, Y_test, 56 | batch_size=batch_size, verbose=1) 57 | print('Test score:', score[0]) 58 | print('Test accuracy:', score[1]) 59 | -------------------------------------------------------------------------------- /examples/stateful_lstm.py: -------------------------------------------------------------------------------- 1 | '''Example script showing how to use stateful RNNs 2 | to model long sequences efficiently. 3 | ''' 4 | from __future__ import print_function 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from keras.models import Sequential 8 | from keras.layers import Dense, LSTM 9 | 10 | 11 | # since we are using stateful rnn tsteps can be set to 1 12 | tsteps = 1 13 | batch_size = 25 14 | epochs = 25 15 | # number of elements ahead that are used to make the prediction 16 | lahead = 1 17 | 18 | 19 | def gen_cosine_amp(amp=100, period=1000, x0=0, xn=50000, step=1, k=0.0001): 20 | """Generates an absolute cosine time series with the amplitude 21 | exponentially decreasing 22 | 23 | Arguments: 24 | amp: amplitude of the cosine function 25 | period: period of the cosine function 26 | x0: initial x of the time series 27 | xn: final x of the time series 28 | step: step of the time series discretization 29 | k: exponential rate 30 | """ 31 | cos = np.zeros(((xn - x0) * step, 1, 1)) 32 | for i in range(len(cos)): 33 | idx = x0 + i * step 34 | cos[i, 0, 0] = amp * np.cos(2 * np.pi * idx / period) 35 | cos[i, 0, 0] = cos[i, 0, 0] * np.exp(-k * idx) 36 | return cos 37 | 38 | 39 | print('Generating Data') 40 | cos = gen_cosine_amp() 41 | print('Input shape:', cos.shape) 42 | 43 | expected_output = np.zeros((len(cos), 1)) 44 | for i in range(len(cos) - lahead): 45 | expected_output[i, 0] = np.mean(cos[i + 1:i + lahead + 1]) 46 | 47 | print('Output shape') 48 | print(expected_output.shape) 49 | 50 | print('Creating Model') 51 | model = Sequential() 52 | model.add(LSTM(50, 53 | batch_input_shape=(batch_size, tsteps, 1), 54 | return_sequences=True, 55 | stateful=True)) 56 | model.add(LSTM(50, 57 | return_sequences=False, 58 | stateful=True)) 59 | model.add(Dense(1)) 60 | model.compile(loss='mse', optimizer='rmsprop') 61 | 62 | print('Training') 63 | for i in range(epochs): 64 | print('Epoch', i, '/', epochs) 65 | model.fit(cos, 66 | expected_output, 67 | batch_size=batch_size, 68 | verbose=1, 69 | nb_epoch=1, 70 | shuffle=False) 71 | model.reset_states() 72 | 73 | print('Predicting') 74 | predicted_output = model.predict(cos, batch_size=batch_size) 75 | 76 | print('Plotting Results') 77 | plt.subplot(2, 1, 1) 78 | plt.plot(expected_output) 79 | plt.title('Expected') 80 | plt.subplot(2, 1, 2) 81 | plt.plot(predicted_output) 82 | plt.title('Predicted') 83 | plt.show() 84 | -------------------------------------------------------------------------------- /examples/variational_autoencoder.py: -------------------------------------------------------------------------------- 1 | '''This script demonstrates how to build a variational autoencoder with Keras. 2 | 3 | Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114 4 | ''' 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from scipy.stats import norm 8 | 9 | from keras.layers import Input, Dense, Lambda 10 | from keras.models import Model 11 | from keras import backend as K 12 | from keras import objectives 13 | from keras.datasets import mnist 14 | 15 | batch_size = 100 16 | original_dim = 784 17 | latent_dim = 2 18 | intermediate_dim = 256 19 | nb_epoch = 50 20 | epsilon_std = 1.0 21 | 22 | x = Input(batch_shape=(batch_size, original_dim)) 23 | h = Dense(intermediate_dim, activation='relu')(x) 24 | z_mean = Dense(latent_dim)(h) 25 | z_log_var = Dense(latent_dim)(h) 26 | 27 | 28 | def sampling(args): 29 | z_mean, z_log_var = args 30 | epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0., 31 | std=epsilon_std) 32 | return z_mean + K.exp(z_log_var / 2) * epsilon 33 | 34 | # note that "output_shape" isn't necessary with the TensorFlow backend 35 | z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var]) 36 | 37 | # we instantiate these layers separately so as to reuse them later 38 | decoder_h = Dense(intermediate_dim, activation='relu') 39 | decoder_mean = Dense(original_dim, activation='sigmoid') 40 | h_decoded = decoder_h(z) 41 | x_decoded_mean = decoder_mean(h_decoded) 42 | 43 | 44 | def vae_loss(x, x_decoded_mean): 45 | xent_loss = original_dim * objectives.binary_crossentropy(x, x_decoded_mean) 46 | kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) 47 | return xent_loss + kl_loss 48 | 49 | vae = Model(x, x_decoded_mean) 50 | vae.compile(optimizer='rmsprop', loss=vae_loss) 51 | 52 | # train the VAE on MNIST digits 53 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 54 | 55 | x_train = x_train.astype('float32') / 255. 56 | x_test = x_test.astype('float32') / 255. 57 | x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) 58 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) 59 | 60 | vae.fit(x_train, x_train, 61 | shuffle=True, 62 | nb_epoch=nb_epoch, 63 | batch_size=batch_size, 64 | validation_data=(x_test, x_test)) 65 | 66 | # build a model to project inputs on the latent space 67 | encoder = Model(x, z_mean) 68 | 69 | # display a 2D plot of the digit classes in the latent space 70 | x_test_encoded = encoder.predict(x_test, batch_size=batch_size) 71 | plt.figure(figsize=(6, 6)) 72 | plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test) 73 | plt.colorbar() 74 | plt.show() 75 | 76 | # build a digit generator that can sample from the learned distribution 77 | decoder_input = Input(shape=(latent_dim,)) 78 | _h_decoded = decoder_h(decoder_input) 79 | _x_decoded_mean = decoder_mean(_h_decoded) 80 | generator = Model(decoder_input, _x_decoded_mean) 81 | 82 | # display a 2D manifold of the digits 83 | n = 15 # figure with 15x15 digits 84 | digit_size = 28 85 | figure = np.zeros((digit_size * n, digit_size * n)) 86 | # linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian 87 | # to produce values of the latent variables z, since the prior of the latent space is Gaussian 88 | grid_x = norm.ppf(np.linspace(0.05, 0.95, n)) 89 | grid_y = norm.ppf(np.linspace(0.05, 0.95, n)) 90 | 91 | for i, yi in enumerate(grid_x): 92 | for j, xi in enumerate(grid_y): 93 | z_sample = np.array([[xi, yi]]) 94 | x_decoded = generator.predict(z_sample) 95 | digit = x_decoded[0].reshape(digit_size, digit_size) 96 | figure[i * digit_size: (i + 1) * digit_size, 97 | j * digit_size: (j + 1) * digit_size] = digit 98 | 99 | plt.figure(figsize=(10, 10)) 100 | plt.imshow(figure, cmap='Greys_r') 101 | plt.show() 102 | -------------------------------------------------------------------------------- /keras/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from . import backend 3 | from . import datasets 4 | from . import engine 5 | from . import layers 6 | from . import preprocessing 7 | from . import utils 8 | from . import wrappers 9 | from . import callbacks 10 | from . import constraints 11 | from . import initializations 12 | from . import metrics 13 | from . import models 14 | from . import objectives 15 | from . import optimizers 16 | from . import regularizers 17 | 18 | __version__ = '1.2.2' 19 | -------------------------------------------------------------------------------- /keras/activations.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from . import backend as K 3 | from .utils.generic_utils import get_from_module 4 | 5 | 6 | def softmax(x): 7 | ndim = K.ndim(x) 8 | if ndim == 2: 9 | return K.softmax(x) 10 | elif ndim == 3: 11 | e = K.exp(x - K.max(x, axis=-1, keepdims=True)) 12 | s = K.sum(e, axis=-1, keepdims=True) 13 | return e / s 14 | else: 15 | raise ValueError('Cannot apply softmax to a tensor ' 16 | 'that is not 2D or 3D. ' 17 | 'Here, ndim=' + str(ndim)) 18 | 19 | 20 | def elu(x, alpha=1.0): 21 | return K.elu(x, alpha) 22 | 23 | 24 | def softplus(x): 25 | return K.softplus(x) 26 | 27 | 28 | def softsign(x): 29 | return K.softsign(x) 30 | 31 | 32 | def relu(x, alpha=0., max_value=None): 33 | return K.relu(x, alpha=alpha, max_value=max_value) 34 | 35 | 36 | def tanh(x): 37 | return K.tanh(x) 38 | 39 | 40 | def sigmoid(x): 41 | return K.sigmoid(x) 42 | 43 | 44 | def hard_sigmoid(x): 45 | return K.hard_sigmoid(x) 46 | 47 | 48 | def linear(x): 49 | return x 50 | 51 | 52 | def get(identifier): 53 | if identifier is None: 54 | return linear 55 | return get_from_module(identifier, globals(), 'activation function') 56 | -------------------------------------------------------------------------------- /keras/applications/__init__.py: -------------------------------------------------------------------------------- 1 | from .vgg16 import VGG16 2 | from .vgg19 import VGG19 3 | from .resnet50 import ResNet50 4 | from .inception_v3 import InceptionV3 5 | from .xception import Xception 6 | -------------------------------------------------------------------------------- /keras/applications/audio_conv_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .. import backend as K 3 | 4 | try: 5 | import librosa 6 | except ImportError: 7 | librosa = None 8 | 9 | 10 | TAGS = ['rock', 'pop', 'alternative', 'indie', 'electronic', 11 | 'female vocalists', 'dance', '00s', 'alternative rock', 'jazz', 12 | 'beautiful', 'metal', 'chillout', 'male vocalists', 13 | 'classic rock', 'soul', 'indie rock', 'Mellow', 'electronica', 14 | '80s', 'folk', '90s', 'chill', 'instrumental', 'punk', 15 | 'oldies', 'blues', 'hard rock', 'ambient', 'acoustic', 16 | 'experimental', 'female vocalist', 'guitar', 'Hip-Hop', 17 | '70s', 'party', 'country', 'easy listening', 18 | 'sexy', 'catchy', 'funk', 'electro', 'heavy metal', 19 | 'Progressive rock', '60s', 'rnb', 'indie pop', 20 | 'sad', 'House', 'happy'] 21 | 22 | 23 | def preprocess_input(audio_path, dim_ordering='default'): 24 | """Reads an audio file and outputs a Mel-spectrogram. 25 | 26 | # Arguments 27 | audio_path: path to the target audio file. 28 | dim_ordering: data format for the output spectrogram image. 29 | 30 | # Returns 31 | 3D Numpy tensor encoding the Mel-spectrogram. 32 | 33 | # Raises 34 | ImportError: if librosa is not available. 35 | """ 36 | if dim_ordering == 'default': 37 | dim_ordering = K.image_dim_ordering() 38 | assert dim_ordering in {'tf', 'th'} 39 | 40 | if librosa is None: 41 | raise ImportError('Librosa is required to process audio files. ' 42 | 'Install it via `pip install librosa` or visit ' 43 | 'http://librosa.github.io/librosa/ for details.') 44 | 45 | # mel-spectrogram parameters 46 | sr = 12000 47 | n_fft = 512 48 | n_mels = 96 49 | hop_length = 256 50 | duration = 29.12 51 | 52 | src, sr = librosa.load(audio_path, sr=sr) 53 | n_sample = src.shape[0] 54 | n_sample_wanted = int(duration * sr) 55 | 56 | # trim the signal at the center 57 | if n_sample < n_sample_wanted: # if too short 58 | src = np.hstack((src, np.zeros((int(duration * sr) - n_sample,)))) 59 | elif n_sample > n_sample_wanted: # if too long 60 | src = src[(n_sample - n_sample_wanted) // 2: 61 | (n_sample + n_sample_wanted) // 2] 62 | 63 | logam = librosa.logamplitude 64 | melgram = librosa.feature.melspectrogram 65 | x = logam(melgram(y=src, sr=sr, hop_length=hop_length, 66 | n_fft=n_fft, n_mels=n_mels) ** 2, 67 | ref_power=1.0) 68 | 69 | if dim_ordering == 'th': 70 | x = np.expand_dims(x, axis=0) 71 | elif dim_ordering == 'tf': 72 | x = np.expand_dims(x, axis=3) 73 | return x 74 | 75 | 76 | def decode_predictions(preds, top_n=5): 77 | """Decode the output of a music tagger model. 78 | 79 | # Arguments 80 | preds: 2-dimensional numpy array 81 | top_n: integer in [0, 50], number of items to show. 82 | 83 | # Returns 84 | Decoded output. 85 | """ 86 | assert len(preds.shape) == 2 and preds.shape[1] == 50 87 | results = [] 88 | for pred in preds: 89 | result = zip(TAGS, pred) 90 | result = sorted(result, key=lambda x: x[1], reverse=True) 91 | results.append(result[:top_n]) 92 | return results 93 | -------------------------------------------------------------------------------- /keras/backend/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import os 4 | import json 5 | import sys 6 | from .common import epsilon 7 | from .common import floatx 8 | from .common import set_epsilon 9 | from .common import set_floatx 10 | from .common import get_uid 11 | from .common import cast_to_floatx 12 | from .common import image_dim_ordering 13 | from .common import set_image_dim_ordering 14 | from .common import is_keras_tensor 15 | from .common import legacy_weight_ordering 16 | from .common import set_legacy_weight_ordering 17 | 18 | _keras_base_dir = os.path.expanduser('~') 19 | if not os.access(_keras_base_dir, os.W_OK): 20 | _keras_base_dir = '/tmp' 21 | 22 | _keras_dir = os.path.join(_keras_base_dir, '.keras') 23 | if not os.path.exists(_keras_dir): 24 | os.makedirs(_keras_dir) 25 | 26 | # Default backend: TensorFlow. 27 | _BACKEND = 'mxnet' 28 | 29 | _config_path = os.path.expanduser(os.path.join(_keras_dir, 'keras.json')) 30 | if os.path.exists(_config_path): 31 | _config = json.load(open(_config_path)) 32 | _floatx = _config.get('floatx', floatx()) 33 | assert _floatx in {'float16', 'float32', 'float64'} 34 | _epsilon = _config.get('epsilon', epsilon()) 35 | assert isinstance(_epsilon, float) 36 | _backend = _config.get('backend', _BACKEND) 37 | assert _backend in {'theano', 'tensorflow', 'mxnet'} 38 | _image_dim_ordering = _config.get('image_dim_ordering', 39 | image_dim_ordering()) 40 | assert _image_dim_ordering in {'tf', 'th'} 41 | 42 | set_floatx(_floatx) 43 | set_epsilon(_epsilon) 44 | set_image_dim_ordering(_image_dim_ordering) 45 | _BACKEND = _backend 46 | 47 | # save config file 48 | if not os.path.exists(_config_path): 49 | _config = {'floatx': floatx(), 50 | 'epsilon': epsilon(), 51 | 'backend': _BACKEND, 52 | 'image_dim_ordering': image_dim_ordering()} 53 | with open(_config_path, 'w') as f: 54 | f.write(json.dumps(_config, indent=4)) 55 | 56 | if 'KERAS_BACKEND' in os.environ: 57 | _backend = os.environ['KERAS_BACKEND'] 58 | assert _backend in {'theano', 'tensorflow', 'mxnet'} 59 | _BACKEND = _backend 60 | 61 | # import backend 62 | if _BACKEND == 'theano': 63 | sys.stderr.write('Using Theano backend.\n') 64 | from .theano_backend import * 65 | elif _BACKEND == 'tensorflow': 66 | sys.stderr.write('Using TensorFlow backend.\n') 67 | from .tensorflow_backend import * 68 | elif _BACKEND == 'mxnet': 69 | sys.stderr.write('Using MXNet backend.\n') 70 | from .mxnet_backend import * 71 | else: 72 | raise ValueError('Unknown backend: ' + str(_BACKEND)) 73 | 74 | 75 | def backend(): 76 | """Publicly accessible method 77 | for determining the current backend. 78 | """ 79 | return _BACKEND 80 | -------------------------------------------------------------------------------- /keras/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/keras/a73b319264459b43462fba8c78669d3d185710d0/keras/datasets/__init__.py -------------------------------------------------------------------------------- /keras/datasets/cifar.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | import sys 4 | from six.moves import cPickle 5 | 6 | 7 | def load_batch(fpath, label_key='labels'): 8 | """Internal utility for parsing CIFAR data. 9 | 10 | # Arguments 11 | fpath: path the file to parse. 12 | label_key: key for label data in the retrieve 13 | dictionary. 14 | 15 | # Returns 16 | A tuple `(data, labels)`. 17 | """ 18 | f = open(fpath, 'rb') 19 | if sys.version_info < (3,): 20 | d = cPickle.load(f) 21 | else: 22 | d = cPickle.load(f, encoding='bytes') 23 | # decode utf8 24 | d_decoded = {} 25 | for k, v in d.items(): 26 | d_decoded[k.decode('utf8')] = v 27 | d = d_decoded 28 | f.close() 29 | data = d['data'] 30 | labels = d[label_key] 31 | 32 | data = data.reshape(data.shape[0], 3, 32, 32) 33 | return data, labels 34 | -------------------------------------------------------------------------------- /keras/datasets/cifar10.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .cifar import load_batch 3 | from ..utils.data_utils import get_file 4 | from .. import backend as K 5 | import numpy as np 6 | import os 7 | 8 | 9 | def load_data(): 10 | """Loads CIFAR10 dataset. 11 | 12 | # Returns 13 | Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. 14 | """ 15 | dirname = 'cifar-10-batches-py' 16 | origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' 17 | path = get_file(dirname, origin=origin, untar=True) 18 | 19 | nb_train_samples = 50000 20 | 21 | x_train = np.zeros((nb_train_samples, 3, 32, 32), dtype='uint8') 22 | y_train = np.zeros((nb_train_samples,), dtype='uint8') 23 | 24 | for i in range(1, 6): 25 | fpath = os.path.join(path, 'data_batch_' + str(i)) 26 | data, labels = load_batch(fpath) 27 | x_train[(i - 1) * 10000: i * 10000, :, :, :] = data 28 | y_train[(i - 1) * 10000: i * 10000] = labels 29 | 30 | fpath = os.path.join(path, 'test_batch') 31 | x_test, y_test = load_batch(fpath) 32 | 33 | y_train = np.reshape(y_train, (len(y_train), 1)) 34 | y_test = np.reshape(y_test, (len(y_test), 1)) 35 | 36 | if K.image_dim_ordering() == 'tf': 37 | x_train = x_train.transpose(0, 2, 3, 1) 38 | x_test = x_test.transpose(0, 2, 3, 1) 39 | 40 | return (x_train, y_train), (x_test, y_test) 41 | -------------------------------------------------------------------------------- /keras/datasets/cifar100.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .cifar import load_batch 3 | from ..utils.data_utils import get_file 4 | from .. import backend as K 5 | import numpy as np 6 | import os 7 | 8 | 9 | def load_data(label_mode='fine'): 10 | """Loads CIFAR100 dataset. 11 | 12 | # Arguments 13 | label_mode: one of "fine", "coarse". 14 | 15 | # Returns 16 | Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. 17 | 18 | # Raises 19 | ValueError: in case of invalid `label_mode`. 20 | """ 21 | if label_mode not in ['fine', 'coarse']: 22 | raise ValueError('label_mode must be one of "fine" "coarse".') 23 | 24 | dirname = 'cifar-100-python' 25 | origin = 'http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' 26 | path = get_file(dirname, origin=origin, untar=True) 27 | 28 | fpath = os.path.join(path, 'train') 29 | x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') 30 | 31 | fpath = os.path.join(path, 'test') 32 | x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') 33 | 34 | y_train = np.reshape(y_train, (len(y_train), 1)) 35 | y_test = np.reshape(y_test, (len(y_test), 1)) 36 | 37 | if K.image_dim_ordering() == 'tf': 38 | x_train = x_train.transpose(0, 2, 3, 1) 39 | x_test = x_test.transpose(0, 2, 3, 1) 40 | 41 | return (x_train, y_train), (x_test, y_test) 42 | -------------------------------------------------------------------------------- /keras/datasets/imdb.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from six.moves import cPickle 3 | import gzip 4 | from ..utils.data_utils import get_file 5 | from six.moves import zip 6 | import numpy as np 7 | import sys 8 | 9 | 10 | def load_data(path='imdb_full.pkl', nb_words=None, skip_top=0, 11 | maxlen=None, seed=113, 12 | start_char=1, oov_char=2, index_from=3): 13 | """Loads the IMDB dataset. 14 | 15 | # Arguments 16 | path: where to cache the data (relative to `~/.keras/dataset`). 17 | nb_words: max number of words to include. Words are ranked 18 | by how often they occur (in the training set) and only 19 | the most frequent words are kept 20 | skip_top: skip the top N most frequently occuring words 21 | (which may not be informative). 22 | maxlen: truncate sequences after this length. 23 | seed: random seed for sample shuffling. 24 | start_char: The start of a sequence will be marked with this character. 25 | Set to 1 because 0 is usually the padding character. 26 | oov_char: words that were cut out because of the `nb_words` 27 | or `skip_top` limit will be replaced with this character. 28 | index_from: index actual words with this index and higher. 29 | 30 | # Returns 31 | Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. 32 | 33 | # Raises 34 | ValueError: in case `maxlen` is so low 35 | that no input sequence could be kept. 36 | 37 | Note that the 'out of vocabulary' character is only used for 38 | words that were present in the training set but are not included 39 | because they're not making the `nb_words` cut here. 40 | Words that were not seen in the trining set but are in the test set 41 | have simply been skipped. 42 | """ 43 | path = get_file(path, 44 | origin='https://s3.amazonaws.com/text-datasets/imdb_full.pkl', 45 | md5_hash='d091312047c43cf9e4e38fef92437263') 46 | 47 | if path.endswith('.gz'): 48 | f = gzip.open(path, 'rb') 49 | else: 50 | f = open(path, 'rb') 51 | 52 | (x_train, labels_train), (x_test, labels_test) = cPickle.load(f) 53 | f.close() 54 | 55 | np.random.seed(seed) 56 | np.random.shuffle(x_train) 57 | np.random.seed(seed) 58 | np.random.shuffle(labels_train) 59 | 60 | np.random.seed(seed * 2) 61 | np.random.shuffle(x_test) 62 | np.random.seed(seed * 2) 63 | np.random.shuffle(labels_test) 64 | 65 | xs = x_train + x_test 66 | labels = labels_train + labels_test 67 | 68 | if start_char is not None: 69 | xs = [[start_char] + [w + index_from for w in x] for x in xs] 70 | elif index_from: 71 | xs = [[w + index_from for w in x] for x in xs] 72 | 73 | if maxlen: 74 | new_xs = [] 75 | new_labels = [] 76 | for x, y in zip(xs, labels): 77 | if len(x) < maxlen: 78 | new_xs.append(x) 79 | new_labels.append(y) 80 | xs = new_xs 81 | labels = new_labels 82 | if not xs: 83 | raise ValueError('After filtering for sequences shorter than maxlen=' + 84 | str(maxlen) + ', no sequence was kept. ' 85 | 'Increase maxlen.') 86 | if not nb_words: 87 | nb_words = max([max(x) for x in xs]) 88 | 89 | # by convention, use 2 as OOV word 90 | # reserve 'index_from' (=3 by default) characters: 91 | # 0 (padding), 1 (start), 2 (OOV) 92 | if oov_char is not None: 93 | xs = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in xs] 94 | else: 95 | new_xs = [] 96 | for x in xs: 97 | nx = [] 98 | for w in x: 99 | if w >= nb_words or w < skip_top: 100 | nx.append(w) 101 | new_xs.append(nx) 102 | xs = new_xs 103 | 104 | x_train = np.array(xs[:len(x_train)]) 105 | y_train = np.array(labels[:len(x_train)]) 106 | 107 | x_test = np.array(xs[len(x_train):]) 108 | y_test = np.array(labels[len(x_train):]) 109 | 110 | return (x_train, y_train), (x_test, y_test) 111 | 112 | 113 | def get_word_index(path='imdb_word_index.pkl'): 114 | """Retrieves the dictionary mapping word indices back to words. 115 | 116 | # Arguments 117 | path: where to cache the data (relative to `~/.keras/dataset`). 118 | 119 | # Returns 120 | The word index dictionary. 121 | """ 122 | path = get_file(path, 123 | origin='https://s3.amazonaws.com/text-datasets/imdb_word_index.pkl', 124 | md5_hash='72d94b01291be4ff843198d3b0e1e4d7') 125 | f = open(path, 'rb') 126 | 127 | if sys.version_info < (3,): 128 | data = cPickle.load(f) 129 | else: 130 | data = cPickle.load(f, encoding='latin1') 131 | 132 | f.close() 133 | return data 134 | -------------------------------------------------------------------------------- /keras/datasets/mnist.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | from ..utils.data_utils import get_file 3 | from six.moves import cPickle 4 | import sys 5 | 6 | 7 | def load_data(path='mnist.pkl.gz'): 8 | """Loads the MNIST dataset. 9 | 10 | # Arguments 11 | path: path where to cache the dataset locally 12 | (relative to ~/.keras/datasets). 13 | 14 | # Returns 15 | Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. 16 | """ 17 | path = get_file(path, origin='https://s3.amazonaws.com/img-datasets/mnist.pkl.gz') 18 | 19 | if path.endswith('.gz'): 20 | f = gzip.open(path, 'rb') 21 | else: 22 | f = open(path, 'rb') 23 | 24 | if sys.version_info < (3,): 25 | data = cPickle.load(f) 26 | else: 27 | data = cPickle.load(f, encoding='bytes') 28 | 29 | f.close() 30 | return data # (x_train, y_train), (x_test, y_test) 31 | -------------------------------------------------------------------------------- /keras/datasets/reuters.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from ..utils.data_utils import get_file 4 | from six.moves import cPickle 5 | from six.moves import zip 6 | import numpy as np 7 | import sys 8 | 9 | 10 | def load_data(path='reuters.pkl', nb_words=None, skip_top=0, 11 | maxlen=None, test_split=0.2, seed=113, 12 | start_char=1, oov_char=2, index_from=3): 13 | """Loads the Reuters newswire classification dataset. 14 | 15 | # Arguments 16 | path: where to cache the data (relative to `~/.keras/dataset`). 17 | nb_words: max number of words to include. Words are ranked 18 | by how often they occur (in the training set) and only 19 | the most frequent words are kept 20 | skip_top: skip the top N most frequently occuring words 21 | (which may not be informative). 22 | maxlen: truncate sequences after this length. 23 | test_split: Fraction of the dataset to be used as test data. 24 | seed: random seed for sample shuffling. 25 | start_char: The start of a sequence will be marked with this character. 26 | Set to 1 because 0 is usually the padding character. 27 | oov_char: words that were cut out because of the `nb_words` 28 | or `skip_top` limit will be replaced with this character. 29 | index_from: index actual words with this index and higher. 30 | 31 | # Returns 32 | Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. 33 | 34 | Note that the 'out of vocabulary' character is only used for 35 | words that were present in the training set but are not included 36 | because they're not making the `nb_words` cut here. 37 | Words that were not seen in the trining set but are in the test set 38 | have simply been skipped. 39 | """ 40 | 41 | path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/reuters.pkl') 42 | f = open(path, 'rb') 43 | xs, labels = cPickle.load(f) 44 | f.close() 45 | 46 | np.random.seed(seed) 47 | np.random.shuffle(xs) 48 | np.random.seed(seed) 49 | np.random.shuffle(labels) 50 | 51 | if start_char is not None: 52 | xs = [[start_char] + [w + index_from for w in x] for x in xs] 53 | elif index_from: 54 | xs = [[w + index_from for w in x] for x in xs] 55 | 56 | if maxlen: 57 | new_xs = [] 58 | new_labels = [] 59 | for x, y in zip(xs, labels): 60 | if len(x) < maxlen: 61 | new_xs.append(x) 62 | new_labels.append(y) 63 | xs = new_xs 64 | labels = new_labels 65 | 66 | if not nb_words: 67 | nb_words = max([max(x) for x in xs]) 68 | 69 | # by convention, use 2 as OOV word 70 | # reserve 'index_from' (=3 by default) characters: 71 | # 0 (padding), 1 (start), 2 (OOV) 72 | if oov_char is not None: 73 | xs = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in xs] 74 | else: 75 | new_xs = [] 76 | for x in xs: 77 | nx = [] 78 | for w in x: 79 | if w >= nb_words or w < skip_top: 80 | nx.append(w) 81 | new_xs.append(nx) 82 | xs = new_xs 83 | 84 | x_train = xs[:int(len(xs) * (1 - test_split))] 85 | y_train = labels[:int(len(xs) * (1 - test_split))] 86 | 87 | x_test = xs[int(len(xs) * (1 - test_split)):] 88 | y_test = labels[int(len(xs) * (1 - test_split)):] 89 | 90 | return (x_train, y_train), (x_test, y_test) 91 | 92 | 93 | def get_word_index(path='reuters_word_index.pkl'): 94 | """Retrieves the dictionary mapping word indices back to words. 95 | 96 | # Arguments 97 | path: where to cache the data (relative to `~/.keras/dataset`). 98 | 99 | # Returns 100 | The word index dictionary. 101 | """ 102 | path = get_file(path, origin='https://s3.amazonaws.com/text-datasets/reuters_word_index.pkl') 103 | f = open(path, 'rb') 104 | 105 | if sys.version_info < (3,): 106 | data = cPickle.load(f) 107 | else: 108 | data = cPickle.load(f, encoding='latin1') 109 | 110 | f.close() 111 | return data 112 | -------------------------------------------------------------------------------- /keras/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # note: topology.Node is an internal class, 2 | # it isn't meant to be used by Keras users. 3 | from .topology import InputSpec 4 | from .topology import Input 5 | from .topology import InputLayer 6 | from .topology import Layer 7 | from .topology import Merge 8 | from .topology import merge 9 | from .topology import get_source_inputs 10 | from .training import Model 11 | -------------------------------------------------------------------------------- /keras/initializations.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | from . import backend as K 4 | from .utils.generic_utils import get_from_module 5 | 6 | 7 | def get_fans(shape, dim_ordering='th'): 8 | if len(shape) == 2: 9 | fan_in = shape[0] 10 | fan_out = shape[1] 11 | elif len(shape) == 4 or len(shape) == 5: 12 | # Assuming convolution kernels (2D or 3D). 13 | # TH kernel shape: (depth, input_depth, ...) 14 | # TF kernel shape: (..., input_depth, depth) 15 | if dim_ordering == 'th': 16 | receptive_field_size = np.prod(shape[2:]) 17 | fan_in = shape[1] * receptive_field_size 18 | fan_out = shape[0] * receptive_field_size 19 | elif dim_ordering == 'tf': 20 | receptive_field_size = np.prod(shape[:2]) 21 | fan_in = shape[-2] * receptive_field_size 22 | fan_out = shape[-1] * receptive_field_size 23 | else: 24 | raise ValueError('Invalid dim_ordering: ' + dim_ordering) 25 | else: 26 | # No specific assumptions. 27 | fan_in = np.sqrt(np.prod(shape)) 28 | fan_out = np.sqrt(np.prod(shape)) 29 | return fan_in, fan_out 30 | 31 | 32 | def uniform(shape, scale=0.05, name=None, dim_ordering='th'): 33 | return K.random_uniform_variable(shape, -scale, scale, name=name) 34 | 35 | 36 | def normal(shape, scale=0.05, name=None, dim_ordering='th'): 37 | return K.random_normal_variable(shape, 0.0, scale, name=name) 38 | 39 | 40 | def lecun_uniform(shape, name=None, dim_ordering='th'): 41 | """LeCun uniform variance scaling initializer. 42 | 43 | # References 44 | LeCun 98, Efficient Backprop, 45 | http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf 46 | """ 47 | fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) 48 | scale = np.sqrt(3. / fan_in) 49 | return uniform(shape, scale, name=name) 50 | 51 | 52 | def glorot_normal(shape, name=None, dim_ordering='th'): 53 | """Glorot normal variance scaling initializer. 54 | 55 | # References 56 | Glorot & Bengio, AISTATS 2010 57 | """ 58 | fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) 59 | s = np.sqrt(2. / (fan_in + fan_out)) 60 | return normal(shape, s, name=name) 61 | 62 | 63 | def glorot_uniform(shape, name=None, dim_ordering='th'): 64 | fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) 65 | s = np.sqrt(6. / (fan_in + fan_out)) 66 | return uniform(shape, s, name=name) 67 | 68 | 69 | def he_normal(shape, name=None, dim_ordering='th'): 70 | """He normal variance scaling initializer. 71 | 72 | # References 73 | He et al., http://arxiv.org/abs/1502.01852 74 | """ 75 | fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) 76 | s = np.sqrt(2. / fan_in) 77 | return normal(shape, s, name=name) 78 | 79 | 80 | def he_uniform(shape, name=None, dim_ordering='th'): 81 | """He uniform variance scaling initializer. 82 | """ 83 | fan_in, fan_out = get_fans(shape, dim_ordering=dim_ordering) 84 | s = np.sqrt(6. / fan_in) 85 | return uniform(shape, s, name=name) 86 | 87 | 88 | def orthogonal(shape, scale=1.1, name=None, dim_ordering='th'): 89 | """Orthogonal initializer. 90 | 91 | # References 92 | Saxe et al., http://arxiv.org/abs/1312.6120 93 | """ 94 | flat_shape = (shape[0], np.prod(shape[1:])) 95 | a = np.random.normal(0.0, 1.0, flat_shape) 96 | u, _, v = np.linalg.svd(a, full_matrices=False) 97 | # Pick the one with the correct shape. 98 | q = u if u.shape == flat_shape else v 99 | q = q.reshape(shape) 100 | return K.variable(scale * q[:shape[0], :shape[1]], name=name) 101 | 102 | 103 | def identity(shape, scale=1, name=None, dim_ordering='th'): 104 | if len(shape) != 2 or shape[0] != shape[1]: 105 | raise ValueError('Identity matrix initialization can only be used ' 106 | 'for 2D square matrices.') 107 | else: 108 | return K.variable(scale * np.identity(shape[0]), name=name) 109 | 110 | 111 | def zero(shape, name=None, dim_ordering='th'): 112 | return K.zeros(shape, name=name) 113 | 114 | 115 | def one(shape, name=None, dim_ordering='th'): 116 | return K.ones(shape, name=name) 117 | 118 | 119 | def get(identifier, **kwargs): 120 | return get_from_module(identifier, globals(), 121 | 'initialization', kwargs=kwargs) 122 | -------------------------------------------------------------------------------- /keras/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from ..engine import Layer, Input, InputLayer, Merge, merge, InputSpec 3 | from .core import * 4 | from .convolutional import * 5 | from .pooling import * 6 | from .local import * 7 | from .recurrent import * 8 | from .normalization import * 9 | from .embeddings import * 10 | from .noise import * 11 | from .advanced_activations import * 12 | from .wrappers import * 13 | from .convolutional_recurrent import * 14 | -------------------------------------------------------------------------------- /keras/layers/noise.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from ..engine import Layer 3 | from .. import backend as K 4 | import numpy as np 5 | 6 | 7 | class GaussianNoise(Layer): 8 | """Apply additive zero-centered Gaussian noise. 9 | 10 | This is useful to mitigate overfitting 11 | (you could see it as a form of random data augmentation). 12 | Gaussian Noise (GS) is a natural choice as corruption process 13 | for real valued inputs. 14 | 15 | As it is a regularization layer, it is only active at training time. 16 | 17 | # Arguments 18 | sigma: float, standard deviation of the noise distribution. 19 | 20 | # Input shape 21 | Arbitrary. Use the keyword argument `input_shape` 22 | (tuple of integers, does not include the samples axis) 23 | when using this layer as the first layer in a model. 24 | 25 | # Output shape 26 | Same shape as input. 27 | """ 28 | 29 | def __init__(self, sigma, **kwargs): 30 | self.supports_masking = True 31 | self.sigma = sigma 32 | self.uses_learning_phase = True 33 | super(GaussianNoise, self).__init__(**kwargs) 34 | 35 | def call(self, x, mask=None): 36 | noise_x = x + K.random_normal(shape=K.shape(x), 37 | mean=0., 38 | std=self.sigma) 39 | return K.in_train_phase(noise_x, x) 40 | 41 | def get_config(self): 42 | config = {'sigma': self.sigma} 43 | base_config = super(GaussianNoise, self).get_config() 44 | return dict(list(base_config.items()) + list(config.items())) 45 | 46 | 47 | class GaussianDropout(Layer): 48 | """Apply multiplicative 1-centered Gaussian noise. 49 | 50 | As it is a regularization layer, it is only active at training time. 51 | 52 | # Arguments 53 | p: float, drop probability (as with `Dropout`). 54 | The multiplicative noise will have 55 | standard deviation `sqrt(p / (1 - p))`. 56 | 57 | # Input shape 58 | Arbitrary. Use the keyword argument `input_shape` 59 | (tuple of integers, does not include the samples axis) 60 | when using this layer as the first layer in a model. 61 | 62 | # Output shape 63 | Same shape as input. 64 | 65 | # References 66 | - [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf) 67 | """ 68 | 69 | def __init__(self, p, **kwargs): 70 | self.supports_masking = True 71 | self.p = p 72 | if 0 < p < 1: 73 | self.uses_learning_phase = True 74 | super(GaussianDropout, self).__init__(**kwargs) 75 | 76 | def call(self, x, mask=None): 77 | if 0 < self.p < 1: 78 | noise_x = x * K.random_normal(shape=K.shape(x), mean=1.0, 79 | std=np.sqrt(self.p / (1.0 - self.p))) 80 | return K.in_train_phase(noise_x, x) 81 | return x 82 | 83 | def get_config(self): 84 | config = {'p': self.p} 85 | base_config = super(GaussianDropout, self).get_config() 86 | return dict(list(base_config.items()) + list(config.items())) 87 | -------------------------------------------------------------------------------- /keras/objectives.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import backend as K 4 | from .utils.generic_utils import get_from_module 5 | 6 | 7 | def mean_squared_error(y_true, y_pred): 8 | return K.mean(K.square(y_pred - y_true), axis=-1) 9 | 10 | 11 | def mean_absolute_error(y_true, y_pred): 12 | return K.mean(K.abs(y_pred - y_true), axis=-1) 13 | 14 | 15 | def mean_absolute_percentage_error(y_true, y_pred): 16 | diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), 17 | K.epsilon(), 18 | None)) 19 | return 100. * K.mean(diff, axis=-1) 20 | 21 | 22 | def mean_squared_logarithmic_error(y_true, y_pred): 23 | first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.) 24 | second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.) 25 | return K.mean(K.square(first_log - second_log), axis=-1) 26 | 27 | 28 | def squared_hinge(y_true, y_pred): 29 | return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)), axis=-1) 30 | 31 | 32 | def hinge(y_true, y_pred): 33 | return K.mean(K.maximum(1. - y_true * y_pred, 0.), axis=-1) 34 | 35 | 36 | def categorical_crossentropy(y_true, y_pred): 37 | return K.categorical_crossentropy(y_pred, y_true) 38 | 39 | 40 | def sparse_categorical_crossentropy(y_true, y_pred): 41 | return K.sparse_categorical_crossentropy(y_pred, y_true) 42 | 43 | 44 | def binary_crossentropy(y_true, y_pred): 45 | return K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1) 46 | 47 | 48 | def kullback_leibler_divergence(y_true, y_pred): 49 | y_true = K.clip(y_true, K.epsilon(), 1) 50 | y_pred = K.clip(y_pred, K.epsilon(), 1) 51 | return K.sum(y_true * K.log(y_true / y_pred), axis=-1) 52 | 53 | 54 | def poisson(y_true, y_pred): 55 | return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon()), axis=-1) 56 | 57 | 58 | def cosine_proximity(y_true, y_pred): 59 | y_true = K.l2_normalize(y_true, axis=-1) 60 | y_pred = K.l2_normalize(y_pred, axis=-1) 61 | return -K.mean(y_true * y_pred, axis=-1) 62 | 63 | 64 | # Aliases. 65 | 66 | mse = MSE = mean_squared_error 67 | mae = MAE = mean_absolute_error 68 | mape = MAPE = mean_absolute_percentage_error 69 | msle = MSLE = mean_squared_logarithmic_error 70 | kld = KLD = kullback_leibler_divergence 71 | cosine = cosine_proximity 72 | 73 | 74 | def get(identifier): 75 | return get_from_module(identifier, globals(), 'objective') 76 | -------------------------------------------------------------------------------- /keras/preprocessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/keras/a73b319264459b43462fba8c78669d3d185710d0/keras/preprocessing/__init__.py -------------------------------------------------------------------------------- /keras/regularizers.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from . import backend as K 3 | from .utils.generic_utils import get_from_module 4 | import warnings 5 | 6 | 7 | class Regularizer(object): 8 | """Regularizer base class. 9 | """ 10 | 11 | def __call__(self, x): 12 | return 0 13 | 14 | def get_config(self): 15 | return {'name': self.__class__.__name__} 16 | 17 | def set_param(self, _): 18 | warnings.warn('The `set_param` method on regularizers is deprecated. ' 19 | 'It no longer does anything, ' 20 | 'and it will be removed after 06/2017.') 21 | 22 | def set_layer(self, _): 23 | warnings.warn('The `set_layer` method on regularizers is deprecated. ' 24 | 'It no longer does anything, ' 25 | 'and it will be removed after 06/2017.') 26 | 27 | 28 | class EigenvalueRegularizer(Regularizer): 29 | """Regularizer based on the eignvalues of a weight matrix. 30 | 31 | Only available for tensors of rank 2. 32 | 33 | # Arguments 34 | k: Float; modulates the amount of regularization to apply. 35 | """ 36 | 37 | def __init__(self, k): 38 | self.k = k 39 | 40 | def __call__(self, x): 41 | if K.ndim(x) != 2: 42 | raise ValueError('EigenvalueRegularizer ' 43 | 'is only available for tensors of rank 2.') 44 | covariance = K.dot(K.transpose(x), x) 45 | dim1, dim2 = K.eval(K.shape(covariance)) 46 | 47 | # Power method for approximating the dominant eigenvector: 48 | power = 9 # Number of iterations of the power method. 49 | o = K.ones([dim1, 1]) # Initial values for the dominant eigenvector. 50 | main_eigenvect = K.dot(covariance, o) 51 | for n in range(power - 1): 52 | main_eigenvect = K.dot(covariance, main_eigenvect) 53 | covariance_d = K.dot(covariance, main_eigenvect) 54 | 55 | # The corresponding dominant eigenvalue: 56 | main_eigenval = (K.dot(K.transpose(covariance_d), main_eigenvect) / 57 | K.dot(K.transpose(main_eigenvect), main_eigenvect)) 58 | # Multiply by the given regularization gain. 59 | regularization = (main_eigenval ** 0.5) * self.k 60 | return K.sum(regularization) 61 | 62 | 63 | class L1L2Regularizer(Regularizer): 64 | """Regularizer for L1 and L2 regularization. 65 | 66 | # Arguments 67 | l1: Float; L1 regularization factor. 68 | l2: Float; L2 regularization factor. 69 | """ 70 | 71 | def __init__(self, l1=0., l2=0.): 72 | self.l1 = K.cast_to_floatx(l1) 73 | self.l2 = K.cast_to_floatx(l2) 74 | 75 | def __call__(self, x): 76 | regularization = 0 77 | if self.l1: 78 | regularization += K.sum(self.l1 * K.abs(x)) 79 | if self.l2: 80 | regularization += K.sum(self.l2 * K.square(x)) 81 | return regularization 82 | 83 | def get_config(self): 84 | return {'name': self.__class__.__name__, 85 | 'l1': float(self.l1), 86 | 'l2': float(self.l2)} 87 | 88 | 89 | # Aliases. 90 | 91 | WeightRegularizer = L1L2Regularizer 92 | ActivityRegularizer = L1L2Regularizer 93 | 94 | 95 | def l1(l=0.01): 96 | return L1L2Regularizer(l1=l) 97 | 98 | 99 | def l2(l=0.01): 100 | return L1L2Regularizer(l2=l) 101 | 102 | 103 | def l1l2(l1=0.01, l2=0.01): 104 | return L1L2Regularizer(l1=l1, l2=l2) 105 | 106 | 107 | def activity_l1(l=0.01): 108 | return L1L2Regularizer(l1=l) 109 | 110 | 111 | def activity_l2(l=0.01): 112 | return L1L2Regularizer(l2=l) 113 | 114 | 115 | def activity_l1l2(l1=0.01, l2=0.01): 116 | return L1L2Regularizer(l1=l1, l2=l2) 117 | 118 | 119 | def get(identifier, kwargs=None): 120 | return get_from_module(identifier, globals(), 'regularizer', 121 | instantiate=True, kwargs=kwargs) 122 | -------------------------------------------------------------------------------- /keras/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/keras/a73b319264459b43462fba8c78669d3d185710d0/keras/utils/__init__.py -------------------------------------------------------------------------------- /keras/utils/io_utils.py: -------------------------------------------------------------------------------- 1 | """Utilities related to disk I/O.""" 2 | from __future__ import absolute_import 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import sys 7 | from collections import defaultdict 8 | 9 | try: 10 | import h5py 11 | except ImportError: 12 | h5py = None 13 | 14 | try: 15 | import tables 16 | except ImportError: 17 | tables = None 18 | 19 | 20 | class HDF5Matrix(object): 21 | """Representation of HDF5 dataset to be used instead of a Numpy array. 22 | 23 | # Example 24 | 25 | ```python 26 | x_data = HDF5Matrix('input/file.hdf5', 'data') 27 | model.predict(x_data) 28 | ``` 29 | 30 | Providing `start` and `end` allows use of a slice of the dataset. 31 | 32 | Optionally, a normalizer function (or lambda) can be given. This will 33 | be called on every slice of data retrieved. 34 | 35 | # Arguments 36 | datapath: string, path to a HDF5 file 37 | dataset: string, name of the HDF5 dataset in the file specified 38 | in datapath 39 | start: int, start of desired slice of the specified dataset 40 | end: int, end of desired slice of the specified dataset 41 | normalizer: function to be called on data when retrieved 42 | 43 | # Returns 44 | An array-like HDF5 dataset. 45 | """ 46 | refs = defaultdict(int) 47 | 48 | def __init__(self, datapath, dataset, start=0, end=None, normalizer=None): 49 | if h5py is None: 50 | raise ImportError('The use of HDF5Matrix requires ' 51 | 'HDF5 and h5py installed.') 52 | 53 | if datapath not in list(self.refs.keys()): 54 | f = h5py.File(datapath) 55 | self.refs[datapath] = f 56 | else: 57 | f = self.refs[datapath] 58 | self.data = f[dataset] 59 | self.start = start 60 | if end is None: 61 | self.end = self.data.shape[0] 62 | else: 63 | self.end = end 64 | self.normalizer = normalizer 65 | 66 | def __len__(self): 67 | return self.end - self.start 68 | 69 | def __getitem__(self, key): 70 | if isinstance(key, slice): 71 | if key.stop + self.start <= self.end: 72 | idx = slice(key.start + self.start, key.stop + self.start) 73 | else: 74 | raise IndexError 75 | elif isinstance(key, int): 76 | if key + self.start < self.end: 77 | idx = key + self.start 78 | else: 79 | raise IndexError 80 | elif isinstance(key, np.ndarray): 81 | if np.max(key) + self.start < self.end: 82 | idx = (self.start + key).tolist() 83 | else: 84 | raise IndexError 85 | elif isinstance(key, list): 86 | if max(key) + self.start < self.end: 87 | idx = [x + self.start for x in key] 88 | else: 89 | raise IndexError 90 | if self.normalizer is not None: 91 | return self.normalizer(self.data[idx]) 92 | else: 93 | return self.data[idx] 94 | 95 | @property 96 | def shape(self): 97 | return (self.end - self.start,) + self.data.shape[1:] 98 | 99 | 100 | def save_array(array, name): 101 | if tables is None: 102 | raise ImportError('The use of `save_array` requires ' 103 | 'the tables module.') 104 | f = tables.open_file(name, 'w') 105 | atom = tables.Atom.from_dtype(array.dtype) 106 | ds = f.create_carray(f.root, 'data', atom, array.shape) 107 | ds[:] = array 108 | f.close() 109 | 110 | 111 | def load_array(name): 112 | if tables is None: 113 | raise ImportError('The use of `load_array` requires ' 114 | 'the tables module.') 115 | f = tables.open_file(name) 116 | array = f.root.data 117 | a = np.empty(shape=array.shape, dtype=array.dtype) 118 | a[:] = array[:] 119 | f.close() 120 | return a 121 | 122 | 123 | def ask_to_proceed_with_overwrite(filepath): 124 | """Produces a prompt asking about overwriting a file. 125 | 126 | # Arguments 127 | filepath: the path to the file to be overwritten. 128 | 129 | # Returns 130 | True if we can proceed with overwrite, False otherwise. 131 | """ 132 | get_input = input 133 | if sys.version_info[:2] <= (2, 7): 134 | get_input = raw_input 135 | overwrite = get_input('[WARNING] %s already exists - overwrite? ' 136 | '[y/n]' % (filepath)) 137 | while overwrite not in ['y', 'n']: 138 | overwrite = get_input('Enter "y" (overwrite) or "n" (cancel).') 139 | if overwrite == 'n': 140 | return False 141 | print('[TIP] Next time specify overwrite=True!') 142 | return True 143 | -------------------------------------------------------------------------------- /keras/utils/np_utils.py: -------------------------------------------------------------------------------- 1 | """Numpy-related utilities.""" 2 | from __future__ import absolute_import 3 | 4 | import numpy as np 5 | from six.moves import range 6 | from six.moves import zip 7 | from .. import backend as K 8 | 9 | 10 | def to_categorical(y, nb_classes=None): 11 | """Converts a class vector (integers) to binary class matrix. 12 | 13 | E.g. for use with categorical_crossentropy. 14 | 15 | # Arguments 16 | y: class vector to be converted into a matrix 17 | (integers from 0 to nb_classes). 18 | nb_classes: total number of classes. 19 | 20 | # Returns 21 | A binary matrix representation of the input. 22 | """ 23 | y = np.array(y, dtype='int').ravel() 24 | if not nb_classes: 25 | nb_classes = np.max(y) + 1 26 | n = y.shape[0] 27 | categorical = np.zeros((n, nb_classes)) 28 | categorical[np.arange(n), y] = 1 29 | return categorical 30 | 31 | 32 | def normalize(a, axis=-1, order=2): 33 | l2 = np.atleast_1d(np.linalg.norm(a, order, axis)) 34 | l2[l2 == 0] = 1 35 | return a / np.expand_dims(l2, axis) 36 | 37 | 38 | def binary_logloss(p, y): 39 | epsilon = 1e-15 40 | p = np.maximum(epsilon, p) 41 | p = np.minimum(1 - epsilon, p) 42 | res = sum(y * np.log(p) + np.subtract(1, y) * np.log(np.subtract(1, p))) 43 | res *= -1.0 / len(y) 44 | return res 45 | 46 | 47 | def multiclass_logloss(p, y): 48 | npreds = [p[i][y[i] - 1] for i in range(len(y))] 49 | score = -(1. / len(y)) * np.sum(np.log(npreds)) 50 | return score 51 | 52 | 53 | def accuracy(p, y): 54 | return np.mean([a == b for a, b in zip(p, y)]) 55 | 56 | 57 | def probas_to_classes(y_pred): 58 | if len(y_pred.shape) > 1 and y_pred.shape[1] > 1: 59 | return categorical_probas_to_classes(y_pred) 60 | return np.array([1 if p > 0.5 else 0 for p in y_pred]) 61 | 62 | 63 | def categorical_probas_to_classes(p): 64 | return np.argmax(p, axis=1) 65 | 66 | 67 | def convert_kernel(kernel, dim_ordering=None): 68 | """Converts a Numpy kernel matrix from Theano format to TensorFlow format. 69 | 70 | Also works reciprocally, since the transformation is its own inverse. 71 | 72 | # Arguments 73 | kernel: Numpy array (4D or 5D). 74 | dim_ordering: the data format. 75 | 76 | # Returns 77 | The converted kernel. 78 | 79 | # Raises 80 | ValueError: in case of invalid kernel shape or invalid dim_ordering. 81 | """ 82 | if dim_ordering is None: 83 | dim_ordering = K.image_dim_ordering() 84 | if not 4 <= kernel.ndim <= 5: 85 | raise ValueError('Invalid kernel shape:', kernel.shape) 86 | 87 | slices = [slice(None, None, -1) for _ in range(kernel.ndim)] 88 | no_flip = (slice(None, None), slice(None, None)) 89 | if dim_ordering == 'th': # (out_depth, input_depth, ...) 90 | slices[:2] = no_flip 91 | elif dim_ordering == 'tf': # (..., input_depth, out_depth) 92 | slices[-2:] = no_flip 93 | else: 94 | raise ValueError('Invalid dim_ordering:', dim_ordering) 95 | 96 | return np.copy(kernel[slices]) 97 | 98 | 99 | def conv_output_length(input_length, filter_size, 100 | border_mode, stride, dilation=1): 101 | """Determines output length of a convolution given input length. 102 | 103 | # Arguments 104 | input_length: integer. 105 | filter_size: integer. 106 | border_mode: one of "same", "valid", "full". 107 | stride: integer. 108 | dilation: dilation rate, integer. 109 | 110 | # Returns 111 | The output length (integer). 112 | """ 113 | if input_length is None: 114 | return None 115 | assert border_mode in {'same', 'valid', 'full'} 116 | dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1) 117 | if border_mode == 'same': 118 | output_length = input_length 119 | elif border_mode == 'valid': 120 | output_length = input_length - dilated_filter_size + 1 121 | elif border_mode == 'full': 122 | output_length = input_length + dilated_filter_size - 1 123 | return (output_length + stride - 1) // stride 124 | 125 | 126 | def conv_input_length(output_length, filter_size, border_mode, stride): 127 | """Determines input length of a convolution given output length. 128 | 129 | # Arguments 130 | output_length: integer. 131 | filter_size: integer. 132 | border_mode: one of "same", "valid", "full". 133 | stride: integer. 134 | 135 | # Returns 136 | The input length (integer). 137 | """ 138 | if output_length is None: 139 | return None 140 | assert border_mode in {'same', 'valid', 'full'} 141 | if border_mode == 'same': 142 | pad = filter_size // 2 143 | elif border_mode == 'valid': 144 | pad = 0 145 | elif border_mode == 'full': 146 | pad = filter_size - 1 147 | return (output_length - 1) * stride - 2 * pad + filter_size 148 | -------------------------------------------------------------------------------- /keras/utils/visualize_util.py: -------------------------------------------------------------------------------- 1 | """Utilities related to model visualization.""" 2 | import os 3 | 4 | from ..layers.wrappers import Wrapper 5 | from ..models import Sequential 6 | 7 | try: 8 | # pydot-ng is a fork of pydot that is better maintained. 9 | import pydot_ng as pydot 10 | except ImportError: 11 | # Fall back on pydot if necessary. 12 | import pydot 13 | if not pydot.find_graphviz(): 14 | raise ImportError('Failed to import pydot. You must install pydot' 15 | ' and graphviz for `pydotprint` to work.') 16 | 17 | 18 | def model_to_dot(model, show_shapes=False, show_layer_names=True): 19 | """Converts a Keras model to dot format. 20 | 21 | # Arguments 22 | model: A Keras model instance. 23 | show_shapes: whether to display shape information. 24 | show_layer_names: whether to display layer names. 25 | 26 | # Returns 27 | A `pydot.Dot` instance representing the Keras model. 28 | """ 29 | dot = pydot.Dot() 30 | dot.set('rankdir', 'TB') 31 | dot.set('concentrate', True) 32 | dot.set_node_defaults(shape='record') 33 | 34 | if isinstance(model, Sequential): 35 | if not model.built: 36 | model.build() 37 | model = model.model 38 | layers = model.layers 39 | 40 | # Create graph nodes. 41 | for layer in layers: 42 | layer_id = str(id(layer)) 43 | 44 | # Append a wrapped layer's label to node's label, if it exists. 45 | layer_name = layer.name 46 | class_name = layer.__class__.__name__ 47 | if isinstance(layer, Wrapper): 48 | layer_name = '{}({})'.format(layer_name, layer.layer.name) 49 | child_class_name = layer.layer.__class__.__name__ 50 | class_name = '{}({})'.format(class_name, child_class_name) 51 | 52 | # Create node's label. 53 | if show_layer_names: 54 | label = '{}: {}'.format(layer_name, class_name) 55 | else: 56 | label = class_name 57 | 58 | # Rebuild the label as a table including input/output shapes. 59 | if show_shapes: 60 | try: 61 | outputlabels = str(layer.output_shape) 62 | except AttributeError: 63 | outputlabels = 'multiple' 64 | if hasattr(layer, 'input_shape'): 65 | inputlabels = str(layer.input_shape) 66 | elif hasattr(layer, 'input_shapes'): 67 | inputlabels = ', '.join( 68 | [str(ishape) for ishape in layer.input_shapes]) 69 | else: 70 | inputlabels = 'multiple' 71 | label = '%s\n|{input:|output:}|{{%s}|{%s}}' % (label, inputlabels, outputlabels) 72 | 73 | node = pydot.Node(layer_id, label=label) 74 | dot.add_node(node) 75 | 76 | # Connect nodes with edges. 77 | for layer in layers: 78 | layer_id = str(id(layer)) 79 | for i, node in enumerate(layer.inbound_nodes): 80 | node_key = layer.name + '_ib-' + str(i) 81 | if node_key in model.container_nodes: 82 | for inbound_layer in node.inbound_layers: 83 | inbound_layer_id = str(id(inbound_layer)) 84 | layer_id = str(id(layer)) 85 | dot.add_edge(pydot.Edge(inbound_layer_id, layer_id)) 86 | return dot 87 | 88 | 89 | def plot(model, to_file='model.png', show_shapes=False, show_layer_names=True): 90 | dot = model_to_dot(model, show_shapes, show_layer_names) 91 | _, extension = os.path.splitext(to_file) 92 | if not extension: 93 | extension = 'png' 94 | else: 95 | extension = extension[1:] 96 | dot.write(to_file, format=extension) 97 | -------------------------------------------------------------------------------- /keras/wrappers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/keras/a73b319264459b43462fba8c78669d3d185710d0/keras/wrappers/__init__.py -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | # Configuration of py.test 2 | [pytest] 3 | addopts=-v 4 | -n 2 5 | --durations=10 6 | --cov-report term-missing 7 | --cov=keras 8 | 9 | # Do not run tests in the build folder 10 | norecursedirs= build 11 | 12 | # PEP-8 The following are ignored: 13 | # E501 line too long (82 > 79 characters) 14 | # E402 module level import not at top of file - temporary measure to continue adding ros python packaged in sys.path 15 | # E731 do not assign a lambda expression, use a def 16 | 17 | pep8ignore=* E501 \ 18 | * E402 \ 19 | * E731 \ 20 | 21 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | 5 | setup(name='Keras', 6 | version='1.2.2', 7 | description='Deep Learning for Python', 8 | author='Francois Chollet', 9 | author_email='francois.chollet@gmail.com', 10 | url='https://github.com/fchollet/keras', 11 | download_url='https://github.com/fchollet/keras/tarball/1.2.2', 12 | license='MIT', 13 | install_requires=['theano', 'pyyaml', 'six'], 14 | extras_require={ 15 | 'h5py': ['h5py'], 16 | 'visualize': ['pydot-ng'], 17 | 'tests': ['pytest', 18 | 'pytest-cov', 19 | 'pytest-pep8', 20 | 'pytest-xdist', 21 | 'python-coveralls', 22 | 'coverage==3.7.1'], 23 | }, 24 | packages=find_packages()) 25 | -------------------------------------------------------------------------------- /tests/integration_tests/test_image_data_tasks.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | import pytest 4 | 5 | from keras.utils.test_utils import get_test_data, keras_test 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Flatten, Activation 8 | from keras.layers.convolutional import Convolution2D, MaxPooling2D 9 | from keras.utils.np_utils import to_categorical 10 | 11 | 12 | @keras_test 13 | @pytest.mark.skip(reason="Test case has image ordering issue.") 14 | def test_image_classification(): 15 | ''' 16 | Classify random 16x16 color images into several classes using logistic regression 17 | with convolutional hidden layer. 18 | ''' 19 | np.random.seed(1337) 20 | input_shape = (16, 16, 3) 21 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500, 22 | nb_test=200, 23 | input_shape=input_shape, 24 | classification=True, 25 | nb_class=4) 26 | y_train = to_categorical(y_train) 27 | y_test = to_categorical(y_test) 28 | # convolution kernel size 29 | nb_conv = 3 30 | # size of pooling area for max pooling 31 | nb_pool = 2 32 | 33 | model = Sequential([ 34 | Convolution2D(nb_filter=8, nb_row=nb_conv, nb_col=nb_conv, input_shape=input_shape), 35 | MaxPooling2D(pool_size=(nb_pool, nb_pool)), 36 | Flatten(), 37 | Activation('relu'), 38 | Dense(y_test.shape[-1], activation='softmax') 39 | ]) 40 | model.compile(loss='categorical_crossentropy', 41 | optimizer='rmsprop', 42 | metrics=['accuracy']) 43 | history = model.fit(X_train, y_train, nb_epoch=10, batch_size=16, 44 | validation_data=(X_test, y_test), 45 | verbose=0) 46 | assert(history.history['val_acc'][-1] > 0.85) 47 | 48 | 49 | if __name__ == '__main__': 50 | pytest.main([__file__]) 51 | -------------------------------------------------------------------------------- /tests/integration_tests/test_vector_data_tasks.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | import pytest 4 | 5 | from keras.utils.test_utils import get_test_data, keras_test 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense 8 | from keras.utils.np_utils import to_categorical 9 | 10 | 11 | @keras_test 12 | def test_vector_classification(): 13 | ''' 14 | Classify random float vectors into 2 classes with logistic regression 15 | using 2 layer neural network with ReLU hidden units. 16 | ''' 17 | np.random.seed(1337) 18 | nb_hidden = 10 19 | 20 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500, 21 | nb_test=200, 22 | input_shape=(20,), 23 | classification=True, 24 | nb_class=2) 25 | y_train = to_categorical(y_train) 26 | y_test = to_categorical(y_test) 27 | 28 | model = Sequential([ 29 | Dense(nb_hidden, input_shape=(X_train.shape[-1],), activation='relu'), 30 | Dense(y_train.shape[-1], activation='softmax') 31 | ]) 32 | model.compile(loss='categorical_crossentropy', 33 | optimizer='rmsprop', 34 | metrics=['accuracy']) 35 | history = model.fit(X_train, y_train, nb_epoch=15, batch_size=16, 36 | validation_data=(X_test, y_test), 37 | verbose=0) 38 | assert(history.history['val_acc'][-1] > 0.8) 39 | 40 | 41 | @keras_test 42 | def test_vector_regression(): 43 | ''' 44 | Perform float data prediction (regression) using 2 layer MLP 45 | with tanh and sigmoid activations. 46 | ''' 47 | np.random.seed(1337) 48 | nb_hidden = 10 49 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500, 50 | nb_test=200, 51 | input_shape=(20,), 52 | output_shape=(2,), 53 | classification=False) 54 | 55 | model = Sequential([ 56 | Dense(nb_hidden, input_shape=(X_train.shape[-1],), activation='tanh'), 57 | Dense(y_train.shape[-1]) 58 | ]) 59 | 60 | model.compile(loss='hinge', optimizer='adagrad') 61 | history = model.fit(X_train, y_train, nb_epoch=20, batch_size=16, 62 | validation_data=(X_test, y_test), verbose=0) 63 | assert (history.history['val_loss'][-1] < 0.9) 64 | 65 | 66 | if __name__ == '__main__': 67 | pytest.main([__file__]) 68 | -------------------------------------------------------------------------------- /tests/keras/datasets/test_datasets.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pytest 3 | import time 4 | import random 5 | from keras.datasets import cifar10, cifar100, reuters, imdb, mnist 6 | 7 | 8 | @pytest.mark.skip(reason="too slow to do download") 9 | def test_cifar(): 10 | # only run data download tests 20% of the time 11 | # to speed up frequent testing 12 | random.seed(time.time()) 13 | if random.random() > 0.8: 14 | (X_train, y_train), (X_test, y_test) = cifar10.load_data() 15 | (X_train, y_train), (X_test, y_test) = cifar100.load_data('fine') 16 | (X_train, y_train), (X_test, y_test) = cifar100.load_data('coarse') 17 | 18 | 19 | @pytest.mark.skip(reason="too slow to do download") 20 | def test_reuters(): 21 | # only run data download tests 20% of the time 22 | # to speed up frequent testing 23 | random.seed(time.time()) 24 | if random.random() > 0.8: 25 | (X_train, y_train), (X_test, y_test) = reuters.load_data() 26 | (X_train, y_train), (X_test, y_test) = reuters.load_data(maxlen=10) 27 | 28 | 29 | def test_mnist(): 30 | # only run data download tests 20% of the time 31 | # to speed up frequent testing 32 | random.seed(time.time()) 33 | if random.random() > 0.8: 34 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 35 | 36 | 37 | @pytest.mark.skip(reason="too slow to do download") 38 | def test_imdb(): 39 | # only run data download tests 20% of the time 40 | # to speed up frequent testing 41 | random.seed(time.time()) 42 | if random.random() > 0.8: 43 | (X_train, y_train), (X_test, y_test) = imdb.load_data() 44 | (X_train, y_train), (X_test, y_test) = imdb.load_data(maxlen=40) 45 | 46 | 47 | if __name__ == '__main__': 48 | pytest.main([__file__]) 49 | -------------------------------------------------------------------------------- /tests/keras/layers/test_advanced_activations.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from keras.utils.test_utils import layer_test, keras_test 3 | 4 | 5 | @keras_test 6 | def test_leaky_relu(): 7 | from keras.layers.advanced_activations import LeakyReLU 8 | for alpha in [0., .5, -1.]: 9 | layer_test(LeakyReLU, kwargs={'alpha': alpha}, 10 | input_shape=(2, 3, 4)) 11 | 12 | 13 | @keras_test 14 | def test_prelu(): 15 | from keras.layers.advanced_activations import PReLU 16 | layer_test(PReLU, kwargs={}, 17 | input_shape=(2, 3, 4)) 18 | 19 | 20 | @keras_test 21 | def test_prelu_share(): 22 | from keras.layers.advanced_activations import PReLU 23 | layer_test(PReLU, kwargs={'shared_axes': 1}, 24 | input_shape=(2, 3, 4)) 25 | 26 | 27 | @keras_test 28 | def test_elu(): 29 | from keras.layers.advanced_activations import ELU 30 | for alpha in [0., .5, -1.]: 31 | layer_test(ELU, kwargs={'alpha': alpha}, 32 | input_shape=(2, 3, 4)) 33 | 34 | 35 | @keras_test 36 | def test_parametric_softplus(): 37 | from keras.layers.advanced_activations import ParametricSoftplus 38 | layer_test(ParametricSoftplus, 39 | kwargs={'alpha_init': 1., 40 | 'beta_init': -1}, 41 | input_shape=(2, 3, 4)) 42 | 43 | 44 | @keras_test 45 | def test_parametric_softplus_share(): 46 | from keras.layers.advanced_activations import ParametricSoftplus 47 | layer_test(ParametricSoftplus, 48 | kwargs={'shared_axes': 1, 49 | 'alpha_init': 1., 50 | 'beta_init': -1}, 51 | input_shape=(2, 3, 4)) 52 | 53 | 54 | @keras_test 55 | def test_thresholded_relu(): 56 | from keras.layers.advanced_activations import ThresholdedReLU 57 | layer_test(ThresholdedReLU, kwargs={'theta': 0.5}, 58 | input_shape=(2, 3, 4)) 59 | 60 | 61 | @keras_test 62 | def test_srelu(): 63 | from keras.layers.advanced_activations import SReLU 64 | layer_test(SReLU, kwargs={}, 65 | input_shape=(2, 3, 4)) 66 | 67 | 68 | @keras_test 69 | def test_srelu_share(): 70 | from keras.layers.advanced_activations import SReLU 71 | layer_test(SReLU, kwargs={'shared_axes': 1}, 72 | input_shape=(2, 3, 4)) 73 | 74 | 75 | if __name__ == '__main__': 76 | pytest.main([__file__]) 77 | -------------------------------------------------------------------------------- /tests/keras/layers/test_embeddings.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from keras.utils.test_utils import layer_test, keras_test 3 | from keras.layers.embeddings import Embedding 4 | import keras.backend as K 5 | 6 | 7 | @keras_test 8 | def test_embedding(): 9 | layer_test(Embedding, 10 | kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2}, 11 | input_shape=(3, 2), 12 | input_dtype='int32', 13 | expected_output_dtype=K.floatx()) 14 | 15 | 16 | if __name__ == '__main__': 17 | pytest.main([__file__]) 18 | -------------------------------------------------------------------------------- /tests/keras/layers/test_local.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from keras.utils.test_utils import layer_test, keras_test 4 | from keras.layers import local 5 | 6 | 7 | @keras_test 8 | def test_locallyconnected_1d(): 9 | nb_samples = 2 10 | nb_steps = 8 11 | input_dim = 5 12 | filter_length = 3 13 | nb_filter = 4 14 | 15 | for border_mode in ['valid']: 16 | for subsample_length in [1]: 17 | if border_mode == 'same' and subsample_length != 1: 18 | continue 19 | layer_test(local.LocallyConnected1D, 20 | kwargs={'nb_filter': nb_filter, 21 | 'filter_length': filter_length, 22 | 'border_mode': border_mode, 23 | 'subsample_length': subsample_length}, 24 | input_shape=(nb_samples, nb_steps, input_dim)) 25 | 26 | layer_test(local.LocallyConnected1D, 27 | kwargs={'nb_filter': nb_filter, 28 | 'filter_length': filter_length, 29 | 'border_mode': border_mode, 30 | 'W_regularizer': 'l2', 31 | 'b_regularizer': 'l2', 32 | 'activity_regularizer': 'activity_l2', 33 | 'subsample_length': subsample_length}, 34 | input_shape=(nb_samples, nb_steps, input_dim)) 35 | 36 | 37 | @keras_test 38 | def test_locallyconnected_2d(): 39 | nb_samples = 8 40 | nb_filter = 3 41 | stack_size = 4 42 | nb_row = 6 43 | nb_col = 10 44 | 45 | for border_mode in ['valid']: 46 | for subsample in [(1, 1), (2, 2)]: 47 | if border_mode == 'same' and subsample != (1, 1): 48 | continue 49 | 50 | layer_test(local.LocallyConnected2D, 51 | kwargs={'nb_filter': nb_filter, 52 | 'nb_row': 3, 53 | 'nb_col': 3, 54 | 'border_mode': border_mode, 55 | 'W_regularizer': 'l2', 56 | 'b_regularizer': 'l2', 57 | 'activity_regularizer': 'activity_l2', 58 | 'subsample': subsample, 59 | 'dim_ordering': 'tf'}, 60 | input_shape=(nb_samples, nb_row, nb_col, stack_size)) 61 | 62 | layer_test(local.LocallyConnected2D, 63 | kwargs={'nb_filter': nb_filter, 64 | 'nb_row': 3, 65 | 'nb_col': 3, 66 | 'border_mode': border_mode, 67 | 'W_regularizer': 'l2', 68 | 'b_regularizer': 'l2', 69 | 'activity_regularizer': 'activity_l2', 70 | 'subsample': subsample, 71 | 'dim_ordering': 'th'}, 72 | input_shape=(nb_samples, stack_size, nb_row, nb_col)) 73 | 74 | 75 | if __name__ == '__main__': 76 | pytest.main([__file__]) 77 | -------------------------------------------------------------------------------- /tests/keras/layers/test_noise.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from keras.utils.test_utils import layer_test, keras_test 3 | from keras.layers import noise 4 | 5 | 6 | @keras_test 7 | def test_GaussianNoise(): 8 | layer_test(noise.GaussianNoise, 9 | kwargs={'sigma': 1.}, 10 | input_shape=(3, 2, 3)) 11 | 12 | 13 | @keras_test 14 | def test_GaussianDropout(): 15 | layer_test(noise.GaussianDropout, 16 | kwargs={'p': 0.5}, 17 | input_shape=(3, 2, 3)) 18 | 19 | 20 | if __name__ == '__main__': 21 | pytest.main([__file__]) 22 | -------------------------------------------------------------------------------- /tests/keras/layers/test_normalization.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | 5 | from keras.layers import Dense, Activation, Input 6 | from keras.utils.test_utils import layer_test, keras_test 7 | from keras.layers import normalization 8 | from keras.models import Sequential, Model 9 | from keras import backend as K 10 | 11 | input_1 = np.arange(10) 12 | input_2 = np.zeros(10) 13 | input_3 = np.ones((10)) 14 | input_shapes = [np.ones((10, 10)), np.ones((10, 10, 10))] 15 | 16 | 17 | @keras_test 18 | def basic_batchnorm_test(): 19 | from keras import regularizers 20 | layer_test(normalization.BatchNormalization, 21 | kwargs={'mode': 1, 22 | 'gamma_regularizer': regularizers.l2(0.01), 23 | 'beta_regularizer': regularizers.l2(0.01)}, 24 | input_shape=(3, 4, 2)) 25 | layer_test(normalization.BatchNormalization, 26 | kwargs={'mode': 0}, 27 | input_shape=(3, 4, 2)) 28 | 29 | 30 | @keras_test 31 | @pytest.mark.skip(reason="Unsupported function.") 32 | def test_batchnorm_mode_0_or_2(): 33 | for mode in [0, 2]: 34 | model = Sequential() 35 | norm_m0 = normalization.BatchNormalization(mode=mode, input_shape=(10,), momentum=0.8) 36 | model.add(norm_m0) 37 | model.compile(loss='mse', optimizer='sgd') 38 | 39 | # centered on 5.0, variance 10.0 40 | X = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10)) 41 | model.fit(X, X, nb_epoch=4, verbose=0) 42 | out = model.predict(X) 43 | out -= K.eval(norm_m0.beta) 44 | out /= K.eval(norm_m0.gamma) 45 | 46 | assert_allclose(out.mean(), 0.0, atol=1e-1) 47 | assert_allclose(out.std(), 1.0, atol=1e-1) 48 | 49 | 50 | @keras_test 51 | def test_batchnorm_mode_0_or_2_twice(): 52 | # This is a regression test for issue #4881 with the old 53 | # batch normalization functions in the Theano backend. 54 | model = Sequential() 55 | model.add(normalization.BatchNormalization(mode=0, input_shape=(10, 5, 5), axis=1)) 56 | model.add(normalization.BatchNormalization(mode=0, input_shape=(10, 5, 5), axis=1)) 57 | model.compile(loss='mse', optimizer='sgd') 58 | 59 | X = np.random.normal(loc=5.0, scale=10.0, size=(20, 10, 5, 5)) 60 | model.fit(X, X, nb_epoch=1, verbose=0) 61 | model.predict(X) 62 | 63 | 64 | @keras_test 65 | def test_batchnorm_mode_0_convnet(): 66 | model = Sequential() 67 | norm_m0 = normalization.BatchNormalization(mode=0, axis=1, input_shape=(3, 4, 4), momentum=0.8) 68 | model.add(norm_m0) 69 | model.compile(loss='mse', optimizer='sgd') 70 | 71 | # centered on 5.0, variance 10.0 72 | X = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4)) 73 | model.fit(X, X, nb_epoch=4, verbose=0) 74 | out = model.predict(X) 75 | out -= np.reshape(K.eval(norm_m0.beta), (1, 3, 1, 1)) 76 | out /= np.reshape(K.eval(norm_m0.gamma), (1, 3, 1, 1)) 77 | 78 | assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1) 79 | assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1) 80 | 81 | 82 | @keras_test 83 | def test_batchnorm_mode_1(): 84 | norm_m1 = normalization.BatchNormalization(input_shape=(10,), mode=1) 85 | norm_m1.build(input_shape=(None, 10)) 86 | 87 | for inp in [input_1, input_2, input_3]: 88 | out = (norm_m1.call(K.variable(inp)) - norm_m1.beta) / norm_m1.gamma 89 | assert_allclose(K.eval(K.mean(out)), 0.0, atol=1e-1) 90 | if inp.std() > 0.: 91 | assert_allclose(K.eval(K.std(out)), 1.0, atol=1e-1) 92 | else: 93 | assert_allclose(K.eval(K.std(out)), 0.0, atol=1e-1) 94 | 95 | 96 | @keras_test 97 | @pytest.mark.skip(reason="Unsupported function.") 98 | def test_shared_batchnorm(): 99 | '''Test that a BN layer can be shared 100 | across different data streams. 101 | ''' 102 | # Test single layer reuse 103 | bn = normalization.BatchNormalization(input_shape=(10,), mode=0) 104 | x1 = Input(shape=(10,)) 105 | bn(x1) 106 | 107 | x2 = Input(shape=(10,)) 108 | y2 = bn(x2) 109 | 110 | x = np.random.normal(loc=5.0, scale=10.0, size=(2, 10)) 111 | model = Model(x2, y2) 112 | assert len(model.updates) == 2 113 | model.compile('sgd', 'mse') 114 | model.train_on_batch(x, x) 115 | 116 | # Test model-level reuse 117 | x3 = Input(shape=(10,)) 118 | y3 = model(x3) 119 | new_model = Model(x3, y3) 120 | assert len(model.updates) == 2 121 | new_model.compile('sgd', 'mse') 122 | new_model.train_on_batch(x, x) 123 | 124 | 125 | if __name__ == '__main__': 126 | pytest.main([__file__]) 127 | -------------------------------------------------------------------------------- /tests/keras/preprocessing/test_sequence.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import assert_allclose 3 | 4 | import pytest 5 | 6 | from keras.preprocessing.sequence import pad_sequences 7 | from keras.preprocessing.sequence import make_sampling_table 8 | from keras.preprocessing.sequence import skipgrams 9 | 10 | 11 | def test_pad_sequences(): 12 | a = [[1], [1, 2], [1, 2, 3]] 13 | 14 | # test padding 15 | b = pad_sequences(a, maxlen=3, padding='pre') 16 | assert_allclose(b, [[0, 0, 1], [0, 1, 2], [1, 2, 3]]) 17 | b = pad_sequences(a, maxlen=3, padding='post') 18 | assert_allclose(b, [[1, 0, 0], [1, 2, 0], [1, 2, 3]]) 19 | 20 | # test truncating 21 | b = pad_sequences(a, maxlen=2, truncating='pre') 22 | assert_allclose(b, [[0, 1], [1, 2], [2, 3]]) 23 | b = pad_sequences(a, maxlen=2, truncating='post') 24 | assert_allclose(b, [[0, 1], [1, 2], [1, 2]]) 25 | 26 | # test value 27 | b = pad_sequences(a, maxlen=3, value=1) 28 | assert_allclose(b, [[1, 1, 1], [1, 1, 2], [1, 2, 3]]) 29 | 30 | 31 | def test_pad_sequences_vector(): 32 | a = [[[1, 1]], 33 | [[2, 1], [2, 2]], 34 | [[3, 1], [3, 2], [3, 3]]] 35 | 36 | # test padding 37 | b = pad_sequences(a, maxlen=3, padding='pre') 38 | assert_allclose(b, [[[0, 0], [0, 0], [1, 1]], 39 | [[0, 0], [2, 1], [2, 2]], 40 | [[3, 1], [3, 2], [3, 3]]]) 41 | b = pad_sequences(a, maxlen=3, padding='post') 42 | assert_allclose(b, [[[1, 1], [0, 0], [0, 0]], 43 | [[2, 1], [2, 2], [0, 0]], 44 | [[3, 1], [3, 2], [3, 3]]]) 45 | 46 | # test truncating 47 | b = pad_sequences(a, maxlen=2, truncating='pre') 48 | assert_allclose(b, [[[0, 0], [1, 1]], 49 | [[2, 1], [2, 2]], 50 | [[3, 2], [3, 3]]]) 51 | 52 | b = pad_sequences(a, maxlen=2, truncating='post') 53 | assert_allclose(b, [[[0, 0], [1, 1]], 54 | [[2, 1], [2, 2]], 55 | [[3, 1], [3, 2]]]) 56 | 57 | # test value 58 | b = pad_sequences(a, maxlen=3, value=1) 59 | assert_allclose(b, [[[1, 1], [1, 1], [1, 1]], 60 | [[1, 1], [2, 1], [2, 2]], 61 | [[3, 1], [3, 2], [3, 3]]]) 62 | 63 | 64 | def test_make_sampling_table(): 65 | a = make_sampling_table(3) 66 | assert_allclose(a, np.asarray([0.00315225, 0.00315225, 0.00547597]), 67 | rtol=.1) 68 | 69 | 70 | def test_skipgrams(): 71 | # test with no window size and binary labels 72 | couples, labels = skipgrams(np.arange(3), vocabulary_size=3) 73 | for couple in couples: 74 | assert couple[0] in [0, 1, 2] and couple[1] in [0, 1, 2] 75 | 76 | # test window size and categorical labels 77 | couples, labels = skipgrams(np.arange(5), vocabulary_size=5, window_size=1, 78 | categorical=True) 79 | for couple in couples: 80 | assert couple[0] - couple[1] <= 3 81 | for l in labels: 82 | assert len(l) == 2 83 | 84 | 85 | if __name__ == '__main__': 86 | pytest.main([__file__]) 87 | -------------------------------------------------------------------------------- /tests/keras/preprocessing/test_text.py: -------------------------------------------------------------------------------- 1 | from keras.preprocessing.text import Tokenizer, one_hot 2 | import pytest 3 | import numpy as np 4 | 5 | 6 | def test_one_hot(): 7 | text = 'The cat sat on the mat.' 8 | encoded = one_hot(text, 5) 9 | assert len(encoded) == 6 10 | assert np.max(encoded) <= 4 11 | assert np.min(encoded) >= 0 12 | 13 | 14 | def test_tokenizer(): 15 | texts = ['The cat sat on the mat.', 16 | 'The dog sat on the log.', 17 | 'Dogs and cats living together.'] 18 | tokenizer = Tokenizer(nb_words=10) 19 | tokenizer.fit_on_texts(texts) 20 | 21 | sequences = [] 22 | for seq in tokenizer.texts_to_sequences_generator(texts): 23 | sequences.append(seq) 24 | assert np.max(np.max(sequences)) < 10 25 | assert np.min(np.min(sequences)) == 1 26 | 27 | tokenizer.fit_on_sequences(sequences) 28 | 29 | for mode in ['binary', 'count', 'tfidf', 'freq']: 30 | matrix = tokenizer.texts_to_matrix(texts, mode) 31 | 32 | 33 | if __name__ == '__main__': 34 | pytest.main([__file__]) 35 | -------------------------------------------------------------------------------- /tests/keras/test_constraints.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | 5 | from keras import backend as K 6 | from keras import constraints 7 | 8 | 9 | test_values = [0.1, 0.5, 3, 8, 1e-7] 10 | np.random.seed(3537) 11 | example_array = np.random.random((100, 100)) * 100. - 50. 12 | example_array[0, 0] = 0. # 0 could possibly cause trouble 13 | 14 | 15 | def test_maxnorm(): 16 | for m in test_values: 17 | norm_instance = constraints.maxnorm(m) 18 | normed = norm_instance(K.variable(example_array)) 19 | assert(np.all(K.eval(normed) < m)) 20 | 21 | # a more explicit example 22 | norm_instance = constraints.maxnorm(2.0) 23 | x = np.array([[0, 0, 0], [1.0, 0, 0], [3, 0, 0], [3, 3, 3]]).T 24 | x_normed_target = np.array([[0, 0, 0], [1.0, 0, 0], 25 | [2.0, 0, 0], 26 | [2. / np.sqrt(3), 2. / np.sqrt(3), 2. / np.sqrt(3)]]).T 27 | x_normed_actual = K.eval(norm_instance(K.variable(x))) 28 | assert_allclose(x_normed_actual, x_normed_target, rtol=1e-05) 29 | 30 | 31 | def test_nonneg(): 32 | nonneg_instance = constraints.nonneg() 33 | normed = nonneg_instance(K.variable(example_array)) 34 | assert(np.all(np.min(K.eval(normed), axis=1) == 0.)) 35 | 36 | 37 | def test_unitnorm(): 38 | unitnorm_instance = constraints.unitnorm() 39 | normalized = unitnorm_instance(K.variable(example_array)) 40 | norm_of_normalized = np.sqrt(np.sum(K.eval(normalized)**2, axis=0)) 41 | # in the unit norm constraint, it should be equal to 1. 42 | difference = norm_of_normalized - 1. 43 | largest_difference = np.max(np.abs(difference)) 44 | assert(np.abs(largest_difference) < 10e-5) 45 | 46 | 47 | if __name__ == '__main__': 48 | pytest.main([__file__]) 49 | -------------------------------------------------------------------------------- /tests/keras/test_initializations.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from keras import initializations 5 | from keras import backend as K 6 | 7 | # 2D tensor test fixture 8 | FC_SHAPE = (100, 100) 9 | 10 | # 4D convolution in th order. This shape has the same effective shape as FC_SHAPE 11 | CONV_SHAPE = (25, 25, 2, 2) 12 | 13 | # The equivalent shape of both test fixtures 14 | SHAPE = (100, 100) 15 | 16 | 17 | def _runner(init, shape, target_mean=None, target_std=None, 18 | target_max=None, target_min=None): 19 | variable = init(shape) 20 | output = K.get_value(variable) 21 | lim = 1e-2 22 | if target_std is not None: 23 | assert abs(output.std() - target_std) < lim 24 | if target_mean is not None: 25 | assert abs(output.mean() - target_mean) < lim 26 | if target_max is not None: 27 | assert abs(output.max() - target_max) < lim 28 | if target_min is not None: 29 | assert abs(output.min() - target_min) < lim 30 | 31 | 32 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 33 | def test_uniform(tensor_shape): 34 | _runner(initializations.uniform, tensor_shape, target_mean=0., 35 | target_max=0.05, target_min=-0.05) 36 | 37 | 38 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 39 | def test_normal(tensor_shape): 40 | _runner(initializations.normal, tensor_shape, target_mean=0., target_std=0.05) 41 | 42 | 43 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 44 | def test_lecun_uniform(tensor_shape): 45 | scale = np.sqrt(3. / SHAPE[0]) 46 | _runner(initializations.lecun_uniform, tensor_shape, 47 | target_mean=0., target_max=scale, target_min=-scale) 48 | 49 | 50 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 51 | def test_glorot_uniform(tensor_shape): 52 | scale = np.sqrt(6. / (SHAPE[0] + SHAPE[1])) 53 | _runner(initializations.glorot_uniform, tensor_shape, target_mean=0., 54 | target_max=scale, target_min=-scale) 55 | 56 | 57 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 58 | def test_glorot_normal(tensor_shape): 59 | scale = np.sqrt(2. / (SHAPE[0] + SHAPE[1])) 60 | _runner(initializations.glorot_normal, tensor_shape, 61 | target_mean=0., target_std=scale) 62 | 63 | 64 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 65 | def test_he_uniform(tensor_shape): 66 | scale = np.sqrt(6. / SHAPE[0]) 67 | _runner(initializations.he_uniform, tensor_shape, target_mean=0., 68 | target_max=scale, target_min=-scale) 69 | 70 | 71 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 72 | def test_he_normal(tensor_shape): 73 | scale = np.sqrt(2. / SHAPE[0]) 74 | _runner(initializations.he_normal, tensor_shape, 75 | target_mean=0., target_std=scale) 76 | 77 | 78 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 79 | def test_orthogonal(tensor_shape): 80 | _runner(initializations.orthogonal, tensor_shape, 81 | target_mean=0.) 82 | 83 | 84 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 85 | def test_identity(tensor_shape): 86 | if len(tensor_shape) > 2: 87 | with pytest.raises(Exception): 88 | _runner(initializations.identity, tensor_shape, 89 | target_mean=1. / SHAPE[0], target_max=1.) 90 | else: 91 | _runner(initializations.identity, tensor_shape, 92 | target_mean=1. / SHAPE[0], target_max=1.) 93 | 94 | 95 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 96 | def test_zero(tensor_shape): 97 | _runner(initializations.zero, tensor_shape, 98 | target_mean=0., target_max=0.) 99 | 100 | 101 | @pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) 102 | def test_one(tensor_shape): 103 | _runner(initializations.one, tensor_shape, 104 | target_mean=1., target_max=1.) 105 | 106 | 107 | if __name__ == '__main__': 108 | pytest.main([__file__]) 109 | -------------------------------------------------------------------------------- /tests/keras/test_metrics.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from keras import metrics 5 | from keras import backend as K 6 | 7 | all_metrics = [ 8 | metrics.binary_accuracy, 9 | metrics.categorical_accuracy, 10 | metrics.mean_squared_error, 11 | metrics.mean_absolute_error, 12 | metrics.mean_absolute_percentage_error, 13 | metrics.mean_squared_logarithmic_error, 14 | metrics.squared_hinge, 15 | metrics.hinge, 16 | metrics.categorical_crossentropy, 17 | metrics.binary_crossentropy, 18 | metrics.poisson, 19 | metrics.cosine_proximity, 20 | metrics.matthews_correlation, 21 | ] 22 | 23 | all_sparse_metrics = [ 24 | metrics.sparse_categorical_accuracy, 25 | metrics.sparse_categorical_crossentropy, 26 | ] 27 | 28 | 29 | def test_metrics(): 30 | y_a = K.variable(np.random.random((6, 7))) 31 | y_b = K.variable(np.random.random((6, 7))) 32 | for metric in all_metrics: 33 | output = metric(y_a, y_b) 34 | assert K.eval(output).shape == () 35 | 36 | 37 | def test_matthews_correlation(): 38 | y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0])) 39 | y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0])) 40 | 41 | # Calculated using sklearn.metrics.matthews_corrcoef 42 | expected = -0.14907119849998601 43 | 44 | actual = K.eval(metrics.matthews_correlation(y_true, y_pred)) 45 | epsilon = 1e-05 46 | assert expected - epsilon <= actual <= expected + epsilon 47 | 48 | 49 | def test_precision(): 50 | y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0])) 51 | y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0])) 52 | 53 | # Calculated using sklearn.metrics.precision_score 54 | expected = 0.40000000000000002 55 | 56 | actual = K.eval(metrics.precision(y_true, y_pred)) 57 | epsilon = 1e-05 58 | assert expected - epsilon <= actual <= expected + epsilon 59 | 60 | 61 | def test_recall(): 62 | y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0])) 63 | y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0])) 64 | 65 | # Calculated using sklearn.metrics.recall_score 66 | expected = 0.2857142857142857 67 | 68 | actual = K.eval(metrics.recall(y_true, y_pred)) 69 | epsilon = 1e-05 70 | assert expected - epsilon <= actual <= expected + epsilon 71 | 72 | 73 | def test_fbeta_score(): 74 | y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0])) 75 | y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0])) 76 | 77 | # Calculated using sklearn.metrics.fbeta_score 78 | expected = 0.30303030303030304 79 | 80 | actual = K.eval(metrics.fbeta_score(y_true, y_pred, beta=2)) 81 | epsilon = 1e-05 82 | assert expected - epsilon <= actual <= expected + epsilon 83 | 84 | 85 | def test_fmeasure(): 86 | y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0])) 87 | y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0])) 88 | 89 | # Calculated using sklearn.metrics.f1_score 90 | expected = 0.33333333333333331 91 | 92 | actual = K.eval(metrics.fmeasure(y_true, y_pred)) 93 | epsilon = 1e-05 94 | assert expected - epsilon <= actual <= expected + epsilon 95 | 96 | 97 | @pytest.mark.skip(reason="no sparse support") 98 | def test_sparse_metrics(): 99 | for metric in all_sparse_metrics: 100 | y_a = K.variable(np.random.randint(0, 7, (6,)), dtype=K.floatx()) 101 | y_b = K.variable(np.random.random((6, 7)), dtype=K.floatx()) 102 | assert K.eval(metric(y_a, y_b)).shape == () 103 | 104 | 105 | @pytest.mark.xfail 106 | def test_top_k_categorical_accuracy(): 107 | y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) 108 | y_true = K.variable(np.array([[0, 1, 0], [1, 0, 0]])) 109 | success_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, 110 | k=3)) 111 | assert success_result == 1 112 | partial_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, 113 | k=2)) 114 | assert partial_result == 0.5 115 | failure_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, 116 | k=1)) 117 | assert failure_result == 0 118 | 119 | 120 | if __name__ == "__main__": 121 | pytest.main([__file__]) 122 | -------------------------------------------------------------------------------- /tests/keras/test_objectives.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from keras import objectives 5 | from keras import backend as K 6 | 7 | 8 | allobj = [objectives.mean_squared_error, 9 | objectives.mean_absolute_error, 10 | objectives.mean_absolute_percentage_error, 11 | objectives.mean_squared_logarithmic_error, 12 | objectives.squared_hinge, 13 | objectives.hinge, objectives.categorical_crossentropy, 14 | objectives.binary_crossentropy, 15 | objectives.kullback_leibler_divergence, 16 | objectives.poisson, 17 | objectives.cosine_proximity] 18 | 19 | 20 | def test_objective_shapes_3d(): 21 | y_a = K.variable(np.random.random((5, 6, 7))) 22 | y_b = K.variable(np.random.random((5, 6, 7))) 23 | for obj in allobj: 24 | objective_output = obj(y_a, y_b) 25 | assert K.eval(objective_output).shape == (5, 6) 26 | 27 | 28 | def test_objective_shapes_2d(): 29 | y_a = K.variable(np.random.random((6, 7))) 30 | y_b = K.variable(np.random.random((6, 7))) 31 | for obj in allobj: 32 | objective_output = obj(y_a, y_b) 33 | assert K.eval(objective_output).shape == (6,) 34 | 35 | 36 | @pytest.mark.skip(reason="no sparse support") 37 | def test_cce_one_hot(): 38 | y_a = K.variable(np.random.randint(0, 7, (5, 6))) 39 | y_b = K.variable(np.random.random((5, 6, 7))) 40 | objective_output = objectives.sparse_categorical_crossentropy(y_a, y_b) 41 | assert K.eval(objective_output).shape == (5, 6) 42 | 43 | y_a = K.variable(np.random.randint(0, 7, (6,))) 44 | y_b = K.variable(np.random.random((6, 7))) 45 | assert K.eval(objectives.sparse_categorical_crossentropy(y_a, y_b)).shape == (6,) 46 | 47 | 48 | if __name__ == "__main__": 49 | pytest.main([__file__]) 50 | -------------------------------------------------------------------------------- /tests/keras/test_optimizers.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pytest 3 | import numpy as np 4 | np.random.seed(1337) 5 | 6 | from keras.utils.test_utils import get_test_data 7 | from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax, Nadam 8 | from keras.models import Sequential 9 | from keras.layers.core import Dense, Activation 10 | from keras.utils.np_utils import to_categorical 11 | 12 | 13 | (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=1000, 14 | nb_test=200, 15 | input_shape=(10,), 16 | classification=True, 17 | nb_class=2) 18 | y_train = to_categorical(y_train) 19 | y_test = to_categorical(y_test) 20 | 21 | 22 | def get_model(input_dim, nb_hidden, output_dim): 23 | model = Sequential() 24 | model.add(Dense(nb_hidden, input_shape=(input_dim,))) 25 | model.add(Activation('relu')) 26 | model.add(Dense(output_dim)) 27 | model.add(Activation('softmax')) 28 | return model 29 | 30 | 31 | def _test_optimizer(optimizer, target=0.89): 32 | model = get_model(X_train.shape[1], 10, y_train.shape[1]) 33 | model.compile(loss='categorical_crossentropy', 34 | optimizer=optimizer, 35 | metrics=['accuracy']) 36 | history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16, 37 | validation_data=(X_test, y_test), verbose=2) 38 | config = optimizer.get_config() 39 | assert type(config) == dict 40 | assert history.history['val_acc'][-1] >= target 41 | 42 | 43 | def test_sgd(): 44 | sgd = SGD(lr=0.01, momentum=0.9, nesterov=True) 45 | _test_optimizer(sgd) 46 | 47 | 48 | def test_rmsprop(): 49 | _test_optimizer(RMSprop()) 50 | _test_optimizer(RMSprop(decay=1e-3)) 51 | 52 | 53 | def test_adagrad(): 54 | _test_optimizer(Adagrad()) 55 | _test_optimizer(Adagrad(decay=1e-3)) 56 | 57 | 58 | def test_adadelta(): 59 | _test_optimizer(Adadelta(), target=0.83) 60 | _test_optimizer(Adadelta(decay=1e-3), target=0.83) 61 | 62 | 63 | def test_adam(): 64 | _test_optimizer(Adam()) 65 | _test_optimizer(Adam(decay=1e-3)) 66 | 67 | 68 | @pytest.mark.xfail 69 | def test_adamax(): 70 | _test_optimizer(Adamax()) 71 | _test_optimizer(Adamax(decay=1e-3)) 72 | 73 | 74 | @pytest.mark.xfail 75 | def test_nadam(): 76 | _test_optimizer(Nadam()) 77 | 78 | 79 | def test_clipnorm(): 80 | sgd = SGD(lr=0.01, momentum=0.9, clipnorm=0.5) 81 | _test_optimizer(sgd) 82 | 83 | 84 | @pytest.mark.xfail 85 | def test_clipvalue(): 86 | sgd = SGD(lr=0.01, momentum=0.9, clipvalue=0.5) 87 | _test_optimizer(sgd) 88 | 89 | 90 | if __name__ == '__main__': 91 | pytest.main([__file__]) 92 | -------------------------------------------------------------------------------- /tests/keras/test_regularizers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | np.random.seed(1337) 4 | 5 | from keras.models import Sequential 6 | from keras.layers import Merge 7 | from keras.layers import Dense 8 | from keras.layers import Activation 9 | from keras.layers import Flatten 10 | from keras.layers import ActivityRegularization 11 | from keras.layers import Embedding 12 | from keras.datasets import mnist 13 | from keras.utils import np_utils 14 | from keras import regularizers 15 | 16 | nb_classes = 10 17 | batch_size = 128 18 | nb_epoch = 5 19 | weighted_class = 9 20 | standard_weight = 1 21 | high_weight = 5 22 | max_train_samples = 5000 23 | max_test_samples = 1000 24 | 25 | 26 | def get_data(): 27 | # the data, shuffled and split between tran and test sets 28 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 29 | X_train = X_train.reshape(60000, 784)[:max_train_samples] 30 | X_test = X_test.reshape(10000, 784)[:max_test_samples] 31 | X_train = X_train.astype("float32") / 255 32 | X_test = X_test.astype("float32") / 255 33 | 34 | # convert class vectors to binary class matrices 35 | y_train = y_train[:max_train_samples] 36 | y_test = y_test[:max_test_samples] 37 | Y_train = np_utils.to_categorical(y_train, nb_classes) 38 | Y_test = np_utils.to_categorical(y_test, nb_classes) 39 | test_ids = np.where(y_test == np.array(weighted_class))[0] 40 | 41 | return (X_train, Y_train), (X_test, Y_test), test_ids 42 | 43 | 44 | def create_model(weight_reg=None, activity_reg=None): 45 | model = Sequential() 46 | model.add(Dense(50, input_shape=(784,))) 47 | model.add(Activation('relu')) 48 | model.add(Dense(10, W_regularizer=weight_reg, 49 | activity_regularizer=activity_reg)) 50 | model.add(Activation('softmax')) 51 | return model 52 | 53 | @pytest.mark.skip(reason="Unsupported function.") 54 | def test_Eigenvalue_reg(): 55 | (X_train, Y_train), (X_test, Y_test), test_ids = get_data() 56 | reg = regularizers.EigenvalueRegularizer(0.01) 57 | model = create_model(weight_reg=reg) 58 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 59 | model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0) 60 | model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0) 61 | 62 | 63 | def test_W_reg(): 64 | (X_train, Y_train), (X_test, Y_test), test_ids = get_data() 65 | for reg in [regularizers.l1(), 66 | regularizers.l2(), 67 | regularizers.l1l2()]: 68 | model = create_model(weight_reg=reg) 69 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 70 | assert len(model.losses) == 1 71 | model.fit(X_train, Y_train, batch_size=batch_size, 72 | nb_epoch=nb_epoch, verbose=0) 73 | model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0) 74 | 75 | 76 | def test_A_reg(): 77 | (X_train, Y_train), (X_test, Y_test), test_ids = get_data() 78 | for reg in [regularizers.activity_l1(), regularizers.activity_l2()]: 79 | model = create_model(activity_reg=reg) 80 | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') 81 | assert len(model.losses) == 1 82 | model.fit(X_train, Y_train, batch_size=batch_size, 83 | nb_epoch=nb_epoch, verbose=0) 84 | model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0) 85 | 86 | 87 | if __name__ == '__main__': 88 | pytest.main([__file__]) 89 | -------------------------------------------------------------------------------- /tests/keras/test_sparse.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import pytest 4 | 5 | from keras.models import Model 6 | from keras.layers import Dense, Input 7 | from keras.utils.test_utils import keras_test 8 | from keras import backend as K 9 | from keras.backend import theano_backend as KTH 10 | from keras.backend import tensorflow_backend as KTF 11 | 12 | import scipy.sparse as sparse 13 | import numpy as np 14 | np.random.seed(1337) 15 | 16 | 17 | input_dim = 16 18 | nb_hidden = 8 19 | nb_class = 4 20 | batch_size = 32 21 | nb_epoch = 1 22 | 23 | 24 | def do_sparse(): 25 | return K == KTF or KTH.th_sparse_module 26 | 27 | 28 | @keras_test 29 | @pytest.mark.skip(reason="MXNet doesn't support sparse yet.") 30 | def test_sparse_mlp(): 31 | if not do_sparse(): 32 | return 33 | 34 | input = Input(batch_shape=(None, input_dim), sparse=True) 35 | hidden = Dense(nb_hidden, activation='relu')(input) 36 | hidden = Dense(nb_hidden, activation='relu')(hidden) 37 | predictions = Dense(nb_class, activation='sigmoid')(hidden) 38 | model = Model(input=[input], output=predictions) 39 | model.compile(loss='mse', optimizer='sgd') 40 | x = sparse.rand(batch_size, input_dim, density=0.1, format='csr') 41 | y = np.random.random((batch_size, nb_class)) 42 | model.fit(x, y, nb_epoch=1) 43 | -------------------------------------------------------------------------------- /tests/keras/utils/test_generic_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import keras 3 | from keras import backend as K 4 | from keras.utils.generic_utils import custom_object_scope, get_custom_objects, get_from_module 5 | 6 | 7 | def test_custom_object_scope_adds_objects(): 8 | get_custom_objects().clear() 9 | assert (len(get_custom_objects()) == 0) 10 | with custom_object_scope({"Test1": object, "Test2": object}, {"Test3": object}): 11 | assert (len(get_custom_objects()) == 3) 12 | assert (len(get_custom_objects()) == 0) 13 | 14 | 15 | class CustomObject(object): 16 | def __init__(self): 17 | pass 18 | 19 | 20 | def test_get_from_module_uses_custom_object(): 21 | get_custom_objects().clear() 22 | assert (get_from_module("CustomObject", globals(), "test_generic_utils") == CustomObject) 23 | with pytest.raises(ValueError): 24 | get_from_module("TestObject", globals(), "test_generic_utils") 25 | with custom_object_scope({"TestObject": CustomObject}): 26 | assert (get_from_module("TestObject", globals(), "test_generic_utils") == CustomObject) 27 | 28 | 29 | if __name__ == '__main__': 30 | pytest.main([__file__]) 31 | -------------------------------------------------------------------------------- /tests/test_dynamic_trainability.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | import pytest 4 | 5 | from keras.utils.test_utils import keras_test 6 | from keras.models import Model, Sequential 7 | from keras.layers import Dense, Input 8 | 9 | 10 | @keras_test 11 | def test_layer_trainability_switch(): 12 | # with constructor argument, in Sequential 13 | model = Sequential() 14 | model.add(Dense(2, trainable=False, input_dim=1)) 15 | assert model.trainable_weights == [] 16 | 17 | # by setting the `trainable` argument, in Sequential 18 | model = Sequential() 19 | layer = Dense(2, input_dim=1) 20 | model.add(layer) 21 | assert model.trainable_weights == layer.trainable_weights 22 | layer.trainable = False 23 | assert model.trainable_weights == [] 24 | 25 | # with constructor argument, in Model 26 | x = Input(shape=(1,)) 27 | y = Dense(2, trainable=False)(x) 28 | model = Model(x, y) 29 | assert model.trainable_weights == [] 30 | 31 | # by setting the `trainable` argument, in Model 32 | x = Input(shape=(1,)) 33 | layer = Dense(2) 34 | y = layer(x) 35 | model = Model(x, y) 36 | assert model.trainable_weights == layer.trainable_weights 37 | layer.trainable = False 38 | assert model.trainable_weights == [] 39 | 40 | 41 | @keras_test 42 | def test_model_trainability_switch(): 43 | # a non-trainable model has no trainable weights 44 | x = Input(shape=(1,)) 45 | y = Dense(2)(x) 46 | model = Model(x, y) 47 | model.trainable = False 48 | assert model.trainable_weights == [] 49 | 50 | # same for Sequential 51 | model = Sequential() 52 | model.add(Dense(2, input_dim=1)) 53 | model.trainable = False 54 | assert model.trainable_weights == [] 55 | 56 | 57 | @keras_test 58 | def test_nested_model_trainability(): 59 | # a Sequential inside a Model 60 | inner_model = Sequential() 61 | inner_model.add(Dense(2, input_dim=1)) 62 | 63 | x = Input(shape=(1,)) 64 | y = inner_model(x) 65 | outer_model = Model(x, y) 66 | assert outer_model.trainable_weights == inner_model.trainable_weights 67 | inner_model.trainable = False 68 | assert outer_model.trainable_weights == [] 69 | inner_model.trainable = True 70 | inner_model.layers[-1].trainable = False 71 | assert outer_model.trainable_weights == [] 72 | 73 | # a Sequential inside a Sequential 74 | inner_model = Sequential() 75 | inner_model.add(Dense(2, input_dim=1)) 76 | outer_model = Sequential() 77 | outer_model.add(inner_model) 78 | assert outer_model.trainable_weights == inner_model.trainable_weights 79 | inner_model.trainable = False 80 | assert outer_model.trainable_weights == [] 81 | inner_model.trainable = True 82 | inner_model.layers[-1].trainable = False 83 | assert outer_model.trainable_weights == [] 84 | 85 | # a Model inside a Model 86 | x = Input(shape=(1,)) 87 | y = Dense(2)(x) 88 | inner_model = Model(x, y) 89 | x = Input(shape=(1,)) 90 | y = inner_model(x) 91 | outer_model = Model(x, y) 92 | assert outer_model.trainable_weights == inner_model.trainable_weights 93 | inner_model.trainable = False 94 | assert outer_model.trainable_weights == [] 95 | inner_model.trainable = True 96 | inner_model.layers[-1].trainable = False 97 | assert outer_model.trainable_weights == [] 98 | 99 | # a Model inside a Sequential 100 | x = Input(shape=(1,)) 101 | y = Dense(2)(x) 102 | inner_model = Model(x, y) 103 | outer_model = Sequential() 104 | outer_model.add(inner_model) 105 | assert outer_model.trainable_weights == inner_model.trainable_weights 106 | inner_model.trainable = False 107 | assert outer_model.trainable_weights == [] 108 | inner_model.trainable = True 109 | inner_model.layers[-1].trainable = False 110 | assert outer_model.trainable_weights == [] 111 | 112 | 113 | if __name__ == '__main__': 114 | pytest.main([__file__]) 115 | -------------------------------------------------------------------------------- /tests/test_loss_masking.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from keras.models import Sequential 5 | from keras.engine.training import weighted_objective 6 | from keras.layers.core import TimeDistributedDense, Masking 7 | from keras.utils.test_utils import keras_test 8 | from keras import objectives 9 | from keras import backend as K 10 | 11 | 12 | @keras_test 13 | def test_masking(): 14 | np.random.seed(1337) 15 | X = np.array([[[1], [1]], 16 | [[0], [0]]]) 17 | model = Sequential() 18 | model.add(Masking(mask_value=0, input_shape=(2, 1))) 19 | model.add(TimeDistributedDense(1, init='one')) 20 | model.compile(loss='mse', optimizer='sgd') 21 | y = np.array([[[1], [1]], 22 | [[1], [1]]]) 23 | loss = model.train_on_batch(X, y) 24 | assert loss == 0 25 | 26 | 27 | @keras_test 28 | def test_loss_masking(): 29 | weighted_loss = weighted_objective(objectives.get('mae')) 30 | shape = (3, 4, 2) 31 | X = np.arange(24).reshape(shape) 32 | Y = 2 * X 33 | 34 | # Normally the trailing 1 is added by standardize_weights 35 | weights = np.ones((3,)) 36 | mask = np.ones((3, 4)) 37 | mask[1, 0] = 0 38 | 39 | out = K.eval(weighted_loss(K.variable(X), 40 | K.variable(Y), 41 | K.variable(weights), 42 | K.variable(mask))) 43 | 44 | 45 | if __name__ == '__main__': 46 | pytest.main([__file__]) 47 | --------------------------------------------------------------------------------