├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── keras_gradient_noise ├── __init__.py └── gradient_noise.py ├── setup.py └── test_general.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Max Schumacher 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # keras_gradient_noise 2 | 3 | Simple way to add gradient noise to any Keras / TensorFlow-Keras optimizer. 4 | 5 | Install via: `pip install keras_gradient_noise` 6 | 7 | 8 | ## Gradient Noise 9 | 10 | Introduced by 11 | ["Adding Gradient Noise Improves Learning for Very Deep Networks" (Neelakantan et al 2015)](https://arxiv.org/abs/1511.06807), 12 | the idea is to add a bit of decaying Gaussian noise to your gradients before 13 | each update step. This is shown to reduce overfitting and training loss. 14 | 15 | Equation 1 of the paper defines two parameters for the method: 16 | 17 | * η defines the total amount of noise (recommended to be one of {0.01, 0.3, 1.0}) 18 | * γ defines the decay rate of the noise (recommended to be 0.55) 19 | 20 | 21 | ## How to use in your code 22 | 23 | Simply wrap your optimizer class with the provided `add_gradient_noise()` 24 | function: 25 | 26 | ```python 27 | from keras.optimizers import Adam 28 | from keras_gradient_noise import add_gradient_noise 29 | 30 | # ... 31 | 32 | NoisyAdam = add_gradient_noise(Adam) 33 | 34 | model.compile(optimizer=NoisyAdam()) 35 | ``` 36 | 37 | Note the use of brackets. `add_gradient_noise()` expects a Keras-compatible 38 | optimizer *class*, not an *instance* of one. 39 | 40 | You can adjust the two parameters η and γ via initialization arguments. They 41 | have the following default values: 42 | 43 | ```python 44 | NoisyOptimizer(noise_eta=0.3, noise_gamma=0.55) 45 | ``` 46 | 47 | 48 | ## Keras vs TF.Keras 49 | 50 | The package tries to be smart about whether to use `tf.keras` or standalone `keras`. 51 | If you get an error in your case, try passing a specific Keras-module to the 52 | `add_gradient_noise` function. E.g. 53 | 54 | ``` 55 | import keras 56 | 57 | ... 58 | 59 | add_gradient_noise(MyOptim, keras=keras) 60 | ``` 61 | 62 | 63 | ## Feedback, contributions, etc. 64 | 65 | Please don't hesitate to reach out via GitHub issues or a quick email! Thanks! 66 | -------------------------------------------------------------------------------- /keras_gradient_noise/__init__.py: -------------------------------------------------------------------------------- 1 | from .gradient_noise import add_gradient_noise 2 | -------------------------------------------------------------------------------- /keras_gradient_noise/gradient_noise.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import importlib 3 | 4 | def add_gradient_noise(BaseOptimizer, keras=None): 5 | """ 6 | Given a Keras-compatible optimizer class, returns a modified class that 7 | supports adding gradient noise as introduced in this paper: 8 | https://arxiv.org/abs/1511.06807 9 | The relevant parameters from equation 1 in the paper can be set via 10 | noise_eta and noise_gamma, set by default to 0.3 and 0.55 respectively. 11 | By default, tries to guess whether to use default Keras or tf.keras based 12 | on where the optimizer was imported from. You can also specify which Keras 13 | to use by passing the imported module. 14 | """ 15 | if keras is None: 16 | # Import it automatically. Try to guess from the optimizer's module 17 | if hasattr(BaseOptimizer, '__module__') and BaseOptimizer.__module__.startswith('keras'): 18 | keras = importlib.import_module('keras') 19 | else: 20 | keras = importlib.import_module('tensorflow.keras') 21 | 22 | K = keras.backend 23 | 24 | if not ( 25 | inspect.isclass(BaseOptimizer) and 26 | issubclass(BaseOptimizer, keras.optimizers.Optimizer) 27 | ): 28 | raise ValueError( 29 | 'add_gradient_noise() expects a valid Keras optimizer' 30 | ) 31 | 32 | def _get_shape(x): 33 | if hasattr(x, 'dense_shape'): 34 | return x.dense_shape 35 | 36 | return K.shape(x) 37 | 38 | class NoisyOptimizer(BaseOptimizer): 39 | def __init__(self, noise_eta=0.3, noise_gamma=0.55, **kwargs): 40 | super(NoisyOptimizer, self).__init__(**kwargs) 41 | with K.name_scope(self.__class__.__name__): 42 | self.noise_eta = K.variable(noise_eta, name='noise_eta') 43 | self.noise_gamma = K.variable(noise_gamma, name='noise_gamma') 44 | 45 | def get_gradients(self, loss, params): 46 | grads = super(NoisyOptimizer, self).get_gradients(loss, params) 47 | 48 | # Add decayed gaussian noise 49 | t = K.cast(self.iterations, K.dtype(grads[0])) 50 | variance = self.noise_eta / ((1 + t) ** self.noise_gamma) 51 | 52 | grads = [ 53 | grad + K.random_normal( 54 | _get_shape(grad), 55 | mean=0.0, 56 | stddev=K.sqrt(variance), 57 | dtype=K.dtype(grads[0]) 58 | ) 59 | for grad in grads 60 | ] 61 | 62 | return grads 63 | 64 | def get_config(self): 65 | config = {'noise_eta': float(K.get_value(self.noise_eta)), 66 | 'noise_gamma': float(K.get_value(self.noise_gamma))} 67 | base_config = super(NoisyOptimizer, self).get_config() 68 | return dict(list(base_config.items()) + list(config.items())) 69 | 70 | NoisyOptimizer.__name__ = 'Noisy{}'.format(BaseOptimizer.__name__) 71 | 72 | return NoisyOptimizer 73 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='keras_gradient_noise', 5 | version='0.11', 6 | description='Gradient Noise for Keras', 7 | classifiers=[ 8 | 'Development Status :: 3 - Alpha', 9 | 'License :: OSI Approved :: MIT License', 10 | 'Programming Language :: Python', 11 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 12 | 'Intended Audience :: Developers', 13 | 'Intended Audience :: Education', 14 | 'Intended Audience :: Science/Research', 15 | 'Topic :: Software Development :: Libraries', 16 | 'Topic :: Software Development :: Libraries :: Python Modules', 17 | ], 18 | url='https://github.com/cpury/keras_gradient_noise', 19 | author='Max Schumacher', 20 | author_email='max@maxschumacher.info', 21 | license='MIT', 22 | packages=['keras_gradient_noise'], 23 | install_requires=[ 24 | 'keras' 25 | ], 26 | extras_require={ 27 | 'tests': [ 28 | 'pytest' 29 | ] 30 | }, 31 | zip_safe=False 32 | ) 33 | -------------------------------------------------------------------------------- /test_general.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.models import Sequential 3 | from keras.layers import Dense 4 | from keras.optimizers import Adam 5 | 6 | from keras_gradient_noise import add_gradient_noise 7 | 8 | 9 | NoisyAdam = add_gradient_noise(Adam) 10 | 11 | x = np.array([ 12 | [0.1], 13 | [0.5], 14 | [0.8], 15 | [0.3], 16 | ]) 17 | y = np.array([ 18 | [0.9], 19 | [0.5], 20 | [0.2], 21 | [0.7], 22 | ]) 23 | 24 | 25 | def test_noisy_optimizer_with_simple_model_training(): 26 | try: 27 | model = Sequential() 28 | model.add(Dense(1, input_shape=(1,))) 29 | model.compile(optimizer=NoisyAdam(), loss='mse') 30 | model.fit(x, y, epochs=4, batch_size=2, verbose=0) 31 | except Exception as e: 32 | pytest.fail("Unexpected MyError: " + str(e)) 33 | --------------------------------------------------------------------------------