├── .gitattributes ├── utilities ├── webfiles.zip ├── __init__.py ├── tboard.py └── callbacks.py ├── MDNT.code-workspace ├── layers ├── deprecated │ ├── __init__.py │ ├── external.py │ └── conv.py ├── __init__.py ├── utils.py ├── dropout.py ├── external.py ├── dense.py └── normalize.py ├── data ├── deprecated │ ├── __init__.py │ └── h5py.py └── __init__.py ├── LICENSE ├── functions ├── __init__.py ├── others.py ├── metrics.py └── losses.py ├── optimizers ├── __init__.py ├── _default.py ├── adaptive.py └── mixture.py ├── .gitignore ├── compat.py ├── __init__.py └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /utilities/webfiles.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cainmagi/MDNT/HEAD/utilities/webfiles.zip -------------------------------------------------------------------------------- /MDNT.code-workspace: -------------------------------------------------------------------------------- 1 | { 2 | "folders": [ 3 | { 4 | "path": "." 5 | } 6 | ], 7 | "settings": {} 8 | } -------------------------------------------------------------------------------- /layers/deprecated/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Layers (deprecated) 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Put the deprecated libs here. 10 | # Version: 0.10 # 2019/5/23 11 | # Comments: 12 | # Create this submodule. 13 | ################################################################ 14 | ''' 15 | 16 | # Import sub-modules 17 | from .external import External 18 | 19 | __all__ = ['External'] -------------------------------------------------------------------------------- /data/deprecated/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Data (deprecated) 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Put the deprecated libs here. 10 | # Version: 0.10 # 2019/3/26 11 | # Comments: 12 | # Create this submodule. 13 | ################################################################ 14 | ''' 15 | 16 | # Import sub-modules 17 | from .h5py import H5HGParser, H5SupSaver, H5GParser 18 | 19 | __all__ = ['H5HGParser', 'H5SupSaver', 'H5GParser'] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Yuchen Jin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /functions/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Functions 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Extended functions for MDNT. Mainly including new losses and 10 | # metrics. 11 | # Version: 0.10 # 2019/6/13 12 | # Comments: 13 | # Create this submodule. 14 | ################################################################ 15 | ''' 16 | 17 | # Import sub-modules 18 | from . import losses 19 | from . import metrics 20 | 21 | # Set layer dictionaries 22 | customObjects = { 23 | 'linear_jaccard_index': losses.linear_jaccard_loss, 24 | 'lovasz_jaccard_loss': losses.lovasz_jaccard_loss, 25 | 'signal_to_noise': metrics.signal_to_noise, 26 | 'correlation': metrics.correlation, 27 | 'jaccard_index': metrics.jaccard_index 28 | } 29 | 30 | __all__ = list(customObjects.keys()) 31 | 32 | # Set this local module as the prefered one 33 | from pkgutil import extend_path 34 | __path__ = extend_path(__path__, __name__) 35 | 36 | # Delete private sub-modules 37 | del extend_path -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Data 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Extended data parser for tf-K standard IO APIs. 10 | # Version: 0.18 # 2020/02/10 11 | # Comments: 12 | # Add `H5Converter` into this module. 13 | # Version: 0.16 # 2019/10/23 14 | # Comments: 15 | # Add `H5VGParser` into this module. 16 | # Version: 0.15 # 2019/3/30 17 | # Comments: 18 | # Add `H5GCombiner` into this module. 19 | # Version: 0.10 # 2019/3/26 20 | # Comments: 21 | # Create this submodule. 22 | ################################################################ 23 | ''' 24 | 25 | # Import sub-modules 26 | from .h5py import H5HGParser, H5SupSaver, H5GParser, H5GCombiner, H5VGParser, H5Converter 27 | 28 | __all__ = ['H5HGParser', 'H5SupSaver', 'H5GParser', 'H5GCombiner', 'H5VGParser', 'H5Converter'] 29 | 30 | # Set this local module as the prefered one 31 | from pkgutil import extend_path 32 | __path__ = extend_path(__path__, __name__) 33 | 34 | # Delete private sub-modules 35 | del extend_path -------------------------------------------------------------------------------- /optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Optimizers 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Wrapping the optimizers in tf-K with default options. In this 10 | # module, we would also try to propose some newly introduced 11 | # optimizers if need. 12 | # Version: 0.10 # 2019/3/23 13 | # Comments: 14 | # Create this submodule. 15 | ################################################################ 16 | ''' 17 | # Import sub-modules 18 | from ._default import optimizer as optimizer 19 | from .mixture import Adam2SGD, Nadam2NSGD, SWATS 20 | from .adaptive import Nadabound, Adabound, MNadam 21 | 22 | # Set optimizer dictionaries 23 | customObjects = { 24 | 'SWATS': SWATS, 25 | 'Adam2SGD': Adam2SGD, 26 | 'Nadam2NSGD': Nadam2NSGD, 27 | 'Nadabound': Nadabound, 28 | 'Adabound': Adabound, 29 | 'MNadam': MNadam 30 | } 31 | 32 | # Set this local module as the prefered one 33 | from pkgutil import extend_path 34 | __path__ = extend_path(__path__, __name__) 35 | 36 | __all__ = list(customObjects.keys()) 37 | 38 | # Delete private sub-modules and objects 39 | del _default 40 | del extend_path -------------------------------------------------------------------------------- /utilities/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Utilities 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # matplotlib 3.1.1+ 10 | # Extended utilities for MDNT. This module includes useful tools 11 | # that are not directly related to deep network architecture. 12 | # For example, it has callbacks for fitting a network, the pre- 13 | # processing and postprocessing tools and APIs for drawing 14 | # figures. 15 | # Version: 0.30 # 2019/11/27 16 | # Comments: 17 | # Finish the submodule: tboard. 18 | # Version: 0.20 # 2019/11/26 19 | # Comments: 20 | # Finish the submodule: draw. 21 | # Version: 0.10 # 2019/6/16 22 | # Comments: 23 | # Create this submodule. 24 | ################################################################ 25 | ''' 26 | 27 | # Import sub-modules 28 | from . import callbacks, draw, tboard 29 | from ._default import save_model, load_model 30 | 31 | __all__ = [ 32 | 'callbacks', 'draw', 'tboard', 33 | 'save_model', 'load_model' 34 | ] 35 | 36 | # Set this local module as the prefered one 37 | from pkgutil import extend_path 38 | __path__ = extend_path(__path__, __name__) 39 | 40 | # Delete private sub-modules 41 | del extend_path -------------------------------------------------------------------------------- /functions/others.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Functions - Others 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # ============================================================= 10 | # Warning: 11 | # THIS MODULE IS A PRIVATE ONE, USERS SHOULD NOT GET ACCESS TO 12 | # THIS PART. 13 | # ============================================================= 14 | # Some basic functions. 15 | # Version: 0.10 # 2019/6/13 16 | # Comments: 17 | # Create this submodule. 18 | ################################################################ 19 | ''' 20 | 21 | from tensorflow.python.keras import backend as K 22 | 23 | def get_channels(y, data_format=None): 24 | '''get channels 25 | Get all dimensions other than the channel dimension and the batch dimension. 26 | Arguments: 27 | data_format: 'channels_first' or 'channels_last', 28 | Input: 29 | y: tensor, where we need to find the dimension list. 30 | Output: 31 | tuple, the channel (dimension) list. 32 | ''' 33 | get_dims = len(y.get_shape()) 34 | if get_dims < 3: 35 | raise ValueError('The input tensor should has channel dimension, i.e. it should have at least 3 axes.') 36 | if data_format is None: 37 | data_format = K.image_data_format() 38 | if data_format == 'channels_last': 39 | get_reduced_axes = tuple(range(1, get_dims-1)) 40 | else: 41 | get_reduced_axes = tuple(range(2, get_dims)) 42 | return get_reduced_axes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Datafiles 2 | /checkpoints/ 3 | /logs/ 4 | *.h5 5 | 6 | # vscode 7 | /.vscode/ 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # celery beat schedule file 87 | celerybeat-schedule 88 | 89 | # SageMath parsed files 90 | *.sage.py 91 | 92 | # Environments 93 | .env 94 | .venv 95 | env/ 96 | venv/ 97 | ENV/ 98 | env.bak/ 99 | venv.bak/ 100 | 101 | # Spyder project settings 102 | .spyderproject 103 | .spyproject 104 | 105 | # Rope project settings 106 | .ropeproject 107 | 108 | # mkdocs documentation 109 | /site 110 | 111 | # mypy 112 | .mypy_cache/ -------------------------------------------------------------------------------- /compat.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Compatibility check 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Use this module to check whether we need to open the 10 | # compatible mode. 11 | # Version: 0.20 # 2020/8/30 12 | # Comments: 13 | # 1. Extend the compatible mode for future updates. 14 | # Version: 0.20 # 2019/6/12 15 | # Comments: 16 | # 1. Modify the required version for compatible mode. 17 | # 2. Provide a stronger property collecting method for 18 | # compatibility. 19 | # Version: 0.10 # 2019/3/27 20 | # Comments: 21 | # Create this compatible module. 22 | ################################################################ 23 | ''' 24 | 25 | # Check compatibility 26 | import tensorflow 27 | 28 | def set_compatible(): 29 | compat_mode = { 30 | '1.12': False, 31 | '1.14': False 32 | } 33 | parse_ver = [int(i) for i in tensorflow.__version__.split('-')[0].split('.')] 34 | if parse_ver >= [1, 14]: 35 | compat_mode['1.14'] = True 36 | if parse_ver < [1, 13]: 37 | compat_mode['1.12'] = True 38 | return compat_mode 39 | 40 | COMPATIBLE_MODE = set_compatible() 41 | 42 | def collect_properties(layer, sublayer): 43 | ''' 44 | Collect the following parameters from sublayer to layer: 45 | _trainable_weights 46 | _non_trainable_weights 47 | _updates 48 | _losses 49 | ''' 50 | if COMPATIBLE_MODE['1.12']: # for compatibility 51 | layer._trainable_weights.extend(sublayer._trainable_weights) 52 | layer._non_trainable_weights.extend(sublayer._non_trainable_weights) 53 | layer._updates.extend(sublayer._updates) 54 | layer._losses.extend(sublayer._losses) 55 | if hasattr(layer, '_callable_losses') and hasattr(sublayer, '_callable_losses'): # for compatibility on 1.12.0 56 | layer._callable_losses.extend(sublayer._callable_losses) 57 | -------------------------------------------------------------------------------- /layers/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Layers 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Modern network layers. This sub-module would include some 10 | # effective network layers which are not introduced in tf-K. 11 | # All of these modules are produced by standard tf-K APIs. 12 | # Version: 0.10 # 2019/3/23 13 | # Comments: 14 | # Create this submodule. 15 | ################################################################ 16 | ''' 17 | 18 | # Import sub-modules 19 | from .activation import Slice, Restrict, RestrictSub, ExpandDims 20 | from .dense import Ghost, DenseTied 21 | from .dropout import InstanceGaussianNoise 22 | from .normalize import InstanceNormalization, GroupNormalization 23 | from .conv import AConv1D, AConv2D, AConv3D, AConv1DTranspose, AConv2DTranspose, AConv3DTranspose, GroupConv1D, GroupConv2D, GroupConv3D, Conv1DTied, Conv2DTied, Conv3DTied 24 | from .external import PyExternal 25 | from .residual import Residual1D, Residual1DTranspose, Residual2D, Residual2DTranspose, Residual3D, Residual3DTranspose, Resnext1D, Resnext1DTranspose, Resnext2D, Resnext2DTranspose, Resnext3D, Resnext3DTranspose 26 | from .inception import Inception1D, Inception2D, Inception3D, Inception1DTranspose, Inception2DTranspose, Inception3DTranspose, Inceptres1D, Inceptres2D, Inceptres3D, Inceptres1DTranspose, Inceptres2DTranspose, Inceptres3DTranspose, Inceptplus1D, Inceptplus2D, Inceptplus3D, Inceptplus1DTranspose, Inceptplus2DTranspose, Inceptplus3DTranspose 27 | 28 | # Set layer dictionaries 29 | customObjects = { 30 | 'Slice': Slice, 31 | 'Restrict': Restrict, 32 | 'RestrictSub': RestrictSub, 33 | 'ExpandDims': ExpandDims, 34 | 'Ghost': Ghost, 35 | 'DenseTied': DenseTied, 36 | 'InstanceGaussianNoise': InstanceGaussianNoise, 37 | 'InstanceNormalization': InstanceNormalization, 38 | 'GroupNormalization': GroupNormalization, 39 | 'Conv1DTied': Conv1DTied, 40 | 'Conv2DTied': Conv2DTied, 41 | 'Conv3DTied': Conv3DTied, 42 | 'AConv1D': AConv1D, 43 | 'AConv2D': AConv2D, 44 | 'AConv3D': AConv3D, 45 | 'GroupConv1D': GroupConv1D, 46 | 'GroupConv2D': GroupConv2D, 47 | 'GroupConv3D': GroupConv3D, 48 | 'AConv1DTranspose': AConv1DTranspose, 49 | 'AConv2DTranspose': AConv2DTranspose, 50 | 'AConv3DTranspose': AConv3DTranspose, 51 | 'Residual1D': Residual1D, 52 | 'Residual2D': Residual2D, 53 | 'Residual3D': Residual3D, 54 | 'Residual1DTranspose': Residual1DTranspose, 55 | 'Residual2DTranspose': Residual2DTranspose, 56 | 'Residual3DTranspose': Residual3DTranspose, 57 | 'Resnext1D': Resnext1D, 58 | 'Resnext2D': Resnext2D, 59 | 'Resnext3D': Resnext3D, 60 | 'Resnext1DTranspose': Resnext1DTranspose, 61 | 'Resnext2DTranspose': Resnext2DTranspose, 62 | 'Resnext3DTranspose': Resnext3DTranspose, 63 | 'Inception1D': Inception1D, 64 | 'Inception2D': Inception2D, 65 | 'Inception3D': Inception3D, 66 | 'Inception1DTranspose': Inception1DTranspose, 67 | 'Inception2DTranspose': Inception2DTranspose, 68 | 'Inception3DTranspose': Inception3DTranspose, 69 | 'Inceptres1D': Inceptres1D, 70 | 'Inceptres2D': Inceptres2D, 71 | 'Inceptres3D': Inceptres3D, 72 | 'Inceptres1DTranspose': Inceptres1DTranspose, 73 | 'Inceptres2DTranspose': Inceptres2DTranspose, 74 | 'Inceptres3DTranspose': Inceptres3DTranspose, 75 | 'Inceptplus1D': Inceptplus1D, 76 | 'Inceptplus2D': Inceptplus2D, 77 | 'Inceptplus3D': Inceptplus3D, 78 | 'Inceptplus1DTranspose': Inceptplus1DTranspose, 79 | 'Inceptplus2DTranspose': Inceptplus2DTranspose, 80 | 'Inceptplus3DTranspose': Inceptplus3DTranspose, 81 | 'PyExternal': PyExternal 82 | } 83 | 84 | __all__ = list(customObjects.keys()) 85 | 86 | # Set alias 87 | #res = residual 88 | 89 | # Set this local module as the prefered one 90 | from pkgutil import extend_path 91 | __path__ = extend_path(__path__, __name__) 92 | 93 | # Delete private sub-modules 94 | del extend_path -------------------------------------------------------------------------------- /optimizers/_default.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Optimizers - Default tools. 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Basic tools for this module. 10 | # The default tools would be imported directly into the current 11 | # sub-module. 12 | # Version: 0.16 # 2019/6/24 13 | # Comments: 14 | # Change the quick interface to support more MDNT optimizers. 15 | # Version: 0.16 # 2019/6/24 16 | # Comments: 17 | # Change the warning interface to tensorflow version. 18 | # Version: 0.15 # 2019/6/23 19 | # Comments: 20 | # Add support for plain momentum SGD. 21 | # Version: 0.12 # 2019/6/21 22 | # Comments: 23 | # 1. Support two more tensorflow based optimizers in fast 24 | # interface. 25 | # 2. Adjust the default momentum rate of Nesterov SGD to 0.9. 26 | # Version: 0.10 # 2019/3/23 27 | # Comments: 28 | # Create this submodule. 29 | ################################################################ 30 | ''' 31 | 32 | from tensorflow.python.keras import optimizers 33 | from tensorflow.python.training import adagrad_da, proximal_gradient_descent 34 | from tensorflow.contrib.opt.python.training import weight_decay_optimizers 35 | from tensorflow.python.platform import tf_logging as logging 36 | from .adaptive import MNadam, Adabound, Nadabound 37 | 38 | from .. import compat 39 | 40 | def _raise_TF_warn(): 41 | logging.warning('You are using TFOptimizer in this case. ' 42 | 'It does not support saveing/loading optimizer' 43 | ' via save_model() and load_model(). In some ' 44 | 'cases, the option decay may not apply to this' 45 | ' interface.') 46 | 47 | def optimizer(name='adam', l_rate=0.01, decay=0.0, **kwargs): 48 | ''' 49 | Define the optimizer by default parameters except learning rate. 50 | Note that most of optimizers do not suggest users to modify their 51 | speically designed parameters. 52 | We suggest users to specify gamma according to the practice when 53 | using Adabound optimizers. 54 | Options: 55 | name: the name of optimizer (default='adam') (available: 'adam', 56 | 'amsgrad', 'adamax', 'adabound', 'amsbound', 'nadam', 57 | 'namsgrad', 'nadabound', 'namsbound', 'adadelta', 'rms', 58 | 'adagrad', 'adamw', 'nmoment', 'sgd', 'proximal') 59 | l_rate: learning rate (default=0.01) 60 | decay: decay ratio ('adadeltaDA' do not support this option) 61 | other parameters: see the usage of the specific optimizer. 62 | Return: 63 | the particular optimizer object. 64 | ''' 65 | name = name.casefold() 66 | if name == 'adam': 67 | return optimizers.Adam(l_rate, decay=decay, **kwargs) 68 | elif name == 'amsgrad': 69 | return optimizers.Adam(l_rate, decay=decay, amsgrad=True, **kwargs) 70 | elif name == 'adamax': 71 | return optimizers.Adamax(l_rate, decay=decay, **kwargs) 72 | elif name == 'adabound': 73 | return Adabound(l_rate, decay=decay, **kwargs) 74 | elif name == 'amsbound': 75 | return Adabound(l_rate, decay=decay, amsgrad=True, **kwargs) 76 | elif name == 'nadam': 77 | return MNadam(l_rate, decay=decay, **kwargs) 78 | elif name == 'namsgrad': 79 | return MNadam(l_rate, decay=decay, amsgrad=True, **kwargs) 80 | elif name == 'nadabound': 81 | return Nadabound(l_rate, decay=decay, **kwargs) 82 | elif name == 'namsbound': 83 | return Nadabound(l_rate, decay=decay, amsgrad=True, **kwargs) 84 | elif name == 'adadelta': 85 | return optimizers.Adadelta(l_rate, decay=decay, **kwargs) 86 | elif name == 'rms': 87 | return optimizers.RMSprop(l_rate, decay=decay, **kwargs) 88 | elif name == 'adagrad': 89 | return optimizers.Adagrad(l_rate, decay=decay, **kwargs) 90 | elif name == 'adamw': 91 | if compat.COMPATIBLE_MODE['1.14']: 92 | raise ImportError('This optimizer is not allowed for compatibility, because it require contrib lib.') 93 | _raise_TF_warn() 94 | if decay != 0.0: 95 | logging.warning('This optimizer uses \'decay\' as \'weight_decay\'.') 96 | else: 97 | raise ValueError('Should use \'decay\' > 0 for AdamW.') 98 | return weight_decay_optimizers.AdamWOptimizer(weight_decay=decay, learning_rate=l_rate, **kwargs) 99 | elif name == 'nmoment': 100 | return optimizers.SGD(lr=l_rate, momentum=0.9, decay=decay, nesterov=True, **kwargs) 101 | elif name == 'moment': 102 | return optimizers.SGD(lr=l_rate, momentum=0.9, decay=decay, nesterov=False, **kwargs) 103 | elif name == 'sgd': 104 | return optimizers.SGD(lr=l_rate, decay=decay, **kwargs) 105 | elif name == 'proximal': 106 | _raise_TF_warn() 107 | if decay != 0.0: 108 | logging.warning('This optimizer does not support \'decay\'.') 109 | return proximal_gradient_descent.ProximalGradientDescentOptimizer(l_rate, **kwargs) -------------------------------------------------------------------------------- /layers/utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Layers - Utilities 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Utilities for some newly defined layers. 10 | # Version: 0.10 # 2019/10/20 11 | # Comments: 12 | # Create this submodule. 13 | ################################################################ 14 | ''' 15 | 16 | def normalize_slice(value): 17 | '''Transform an iterable of integers (or `None`) into a slice. 18 | Arguments: 19 | value: The input iterable object, which would be converted to a slice 20 | indicating tuple. 21 | Returns: 22 | A tuple of 2/3 integers or `None`. 23 | Raises: 24 | ValueError: If something else than an int/long or iterable thereof was 25 | passed. 26 | ''' 27 | try: 28 | value_tuple = tuple(value) 29 | except TypeError: 30 | raise ValueError('One input value object could not be converted into' 31 | ' a slice: ' + str(value)) 32 | if len(value_tuple) not in (2, 3): 33 | raise ValueError(str(value) + ' should has 2/3 integers or `None`, but' 34 | ' actually has ' + str(len(value_tuple)) + ' elements.') 35 | for single_value in value_tuple: 36 | if single_value is None: 37 | continue 38 | try: 39 | int(single_value) 40 | except (ValueError, TypeError): 41 | raise ValueError(str(value) + ' should only include integers or `None`' 42 | ', but actually includes element ' + str(single_value) + 43 | ' of type ' + str(type(single_value))) 44 | return value_tuple 45 | 46 | def normalize_slices(value, name): 47 | '''Transforms an iterable of tuples into a slice tuple. 48 | Arguments: 49 | value: The input iterable object, which would be converted to a slice 50 | indicating tuple. 51 | name: The name of the argument being validated. 52 | Returns: 53 | A tuple of n tuples, where n is inferred by input value. 54 | Raises: 55 | ValueError: If something else than an int/long or iterable thereof was 56 | passed. 57 | ''' 58 | try: 59 | value_tuple = (normalize_slice(value),) 60 | except ValueError: 61 | value_tuple = [] 62 | try: 63 | value_tuple = tuple(map(normalize_slice, value)) 64 | except ValueError as e: 65 | raise ValueError('The `' + name + '` argument must be a tuple of slices' + 66 | '. Received: ' + str(value) + ' including element with' + 67 | ' error: ' + str(e)) 68 | return value_tuple 69 | 70 | def normalize_abtuple(value, name, n=None): 71 | '''Transforms a single integer or iterable of integers into an integer tuple 72 | with an arbitrary length. 73 | Arguments: 74 | value: The value to validate and convert. Could an int, or any iterable 75 | of ints. 76 | n: The size of the tuple to be returned, if set `None`, the tuple would 77 | have an arbitrary length. 78 | name: The name of the argument being validated, e.g. "strides" or 79 | "dims". This is only used to format error messages. 80 | Returns: 81 | A tuple of n integers. If n is None, the tuple length is inferred by 82 | input value. 83 | Raises: 84 | ValueError: If something else than an int/long or iterable thereof was 85 | passed. 86 | ''' 87 | str_n = ('a tuple of ' + str(n) + 'integers') if n is None else ('a tuple') 88 | if isinstance(value, int): 89 | if n is None: 90 | n = 1 91 | return (value,) * n 92 | else: 93 | try: 94 | value_tuple = tuple(value) 95 | except TypeError: 96 | raise ValueError('The `' + name + '` argument must be ' + str_n + 97 | '. Received: ' + str(value)) 98 | if n is not None and len(value_tuple) != n: 99 | raise ValueError('The `' + name + '` argument must be ' + str_n + 100 | '. Received: ' + str(value)) 101 | for single_value in value_tuple: 102 | try: 103 | int(single_value) 104 | except (ValueError, TypeError): 105 | raise ValueError('The `' + name + '` argument must be ' + str_n + 106 | '. Received: ' + str(value) + ' including element ' + 107 | str(single_value) + ' of type ' + 108 | str(type(single_value))) 109 | return value_tuple 110 | 111 | def slice_len_for(slc, seqlen): 112 | ''' 113 | Infer the length of a slice object 114 | slc: Slice object. 115 | seqlen: Where the slice is applied. 116 | ''' 117 | start, stop, step = slc.indices(seqlen) 118 | return max(0, (stop - start + (step - (1 if step > 0 else -1))) // step) -------------------------------------------------------------------------------- /functions/metrics.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Functions - Metrics 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Extend metrics. These functions should not be used as train- 10 | # ing losses. 11 | # Version: 0.10 # 2019/6/13 12 | # Comments: 13 | # Create this submodule, and finish signal_to_noise, 14 | # correlation and jaccard_index. 15 | ################################################################ 16 | ''' 17 | 18 | from tensorflow.python.framework import constant_op 19 | from tensorflow.python.ops import array_ops 20 | from tensorflow.python.ops import math_ops 21 | from tensorflow.python.ops import gen_math_ops 22 | from tensorflow.python.ops import control_flow_ops 23 | from .others import get_channels 24 | 25 | def signal_to_noise(y_true, y_pred, mode='snr', data_format=None, epsilon=1e-8): 26 | '''Signal-to-noise ratio. (metric) 27 | Calculate the signal-to-noise ratio. It support different modes. 28 | Arguments: 29 | mode: (1) snr: mean [ y_true^2 / (y_pred - y_true)^2 ] 30 | (2) psnr: mean [ max( y_true^2 ) / (y_pred - y_true)^2 ] 31 | data_format: 'channels_first' or 'channels_last'. The default setting is generally 32 | 'channels_last' like other tf.keras APIs. 33 | epsilon: used for avoid zero division. 34 | Input: 35 | y_true: label, tensor in any shape. 36 | y_pred: prediction, tensor in any shape. 37 | Output: 38 | scalar, the mean SNR. 39 | ''' 40 | get_reduced_axes = get_channels(y_true, data_format) 41 | if mode.casefold() == 'psnr': 42 | signal = math_ops.reduce_max(gen_math_ops.square(y_true), axis=get_reduced_axes) 43 | else: 44 | signal = math_ops.reduce_sum(gen_math_ops.square(y_true), axis=get_reduced_axes) 45 | noise = math_ops.reduce_sum(gen_math_ops.square(y_true - y_pred), axis=get_reduced_axes) + epsilon 46 | coeff = (10.0/2.3025851) # 10/log_e(10) 47 | return coeff*math_ops.reduce_mean(gen_math_ops.log(math_ops.divide(signal, noise))) 48 | 49 | def correlation(y_true, y_pred): 50 | '''Pearson correlation coefficient. (metric) 51 | The linear corrlation between y_true and y_pred is between -1.0 and 1.0, indicating 52 | positive correlation and negative correlation respectively. In particular, if the 53 | correlation is 0.0, it means y_true and y_pred are irrelevant linearly. 54 | This function is implemented by: 55 | corr = [mean(y_true * y_pred) - mean(y_true) * mean(y_pred)] 56 | / [ std(y_true) * std(m_y_pred) ] 57 | This function has been revised to prevent the division fail (0/0). When either y_true 58 | or y_pred is 0, the correlation would be set as 0.0. 59 | Input: 60 | y_true: label, tensor in any shape. 61 | y_pred: prediction, tensor in any shape. 62 | Output: 63 | scalar, the mean linear correlation between y_true and y_pred. 64 | ''' 65 | m_y_true = math_ops.reduce_mean(y_true, axis=0) 66 | m_y_pred = math_ops.reduce_mean(y_pred, axis=0) 67 | s_y_true = gen_math_ops.sqrt(math_ops.reduce_mean(gen_math_ops.square(y_true), axis=0) - gen_math_ops.square(m_y_true)) 68 | s_y_pred = gen_math_ops.sqrt(math_ops.reduce_mean(gen_math_ops.square(y_pred), axis=0) - gen_math_ops.square(m_y_pred)) 69 | s_denom = s_y_true * s_y_pred 70 | s_numer = math_ops.reduce_mean(y_true * y_pred, axis=0) - m_y_true * m_y_pred 71 | s_index = gen_math_ops.greater(s_denom, 0) 72 | f1 = lambda: constant_op.constant(0.0) 73 | f2 = lambda: math_ops.reduce_mean(array_ops.boolean_mask(s_numer,s_index)/array_ops.boolean_mask(s_denom,s_index)) 74 | return control_flow_ops.case([(math_ops.reduce_any(s_index), f2)], default=f1) 75 | 76 | def jaccard_index(y_true, y_pred, data_format=None): 77 | '''Jaccard index, or Intersection over Union (IoU). (metric) 78 | The IoU is thought to be a better measurement to estimate the accuracy for segmentation. 79 | If both y_true and y_pred are binary, the intersection I(y_true, y_pred) shows the part 80 | where the prediction is correct, while the union U(y_true, y_pred) contains both correct 81 | prediction and wrong prediction. I/U shows the proportion of correct prediction. 82 | Compared to other error functions (like MSE), it is more concentrated on the part where 83 | y_true=1 or y_pred=1. 84 | This function is implemented by: 85 | jacc = logical_and(y_true, y_pred) / logical_or(y_true, y_pred) 86 | Arguments: 87 | data_format: 'channels_first' or 'channels_last'. The default setting is generally 88 | 'channels_last' like other tf.keras APIs. 89 | Input: 90 | y_true: label, tensor in any shape, should have at least 3 axes. 91 | y_pred: prediction, tensor in any shape, should have at least 3 axes. 92 | Output: 93 | scalar, the mean Jaccard index between y_true and y_pred over all channels. 94 | ''' 95 | get_reduced_axes = get_channels(y_true, data_format) 96 | bin_y_true = gen_math_ops.greater(y_true, 0.5) 97 | bin_y_pred = gen_math_ops.greater(y_pred, 0.5) 98 | valNumer = gen_math_ops.logical_and(bin_y_pred, bin_y_true) 99 | valDomin = gen_math_ops.logical_or(bin_y_pred, bin_y_true) 100 | valNumer = math_ops.reduce_sum(math_ops.cast(valNumer, dtype=y_pred.dtype), axis=get_reduced_axes) 101 | valDomin = math_ops.reduce_sum(math_ops.cast(valDomin, dtype=y_pred.dtype), axis=get_reduced_axes) 102 | return math_ops.reduce_mean(math_ops.div_no_nan(valNumer, valDomin)) -------------------------------------------------------------------------------- /layers/dropout.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Layers - Extended dropout and noise layers 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Extend the methods for adding dropouts and noises. Such 10 | # methods may help the network avoid overfitting problems. 11 | # Version: 0.10 # 2019/6/11 12 | # Comments: 13 | # Create this submodule. 14 | ################################################################ 15 | ''' 16 | 17 | from tensorflow.python.keras import backend as K 18 | from tensorflow.python.keras.engine.base_layer import Layer 19 | 20 | from tensorflow.python.keras.layers.core import Dropout, SpatialDropout1D, SpatialDropout2D, SpatialDropout3D 21 | from tensorflow.python.keras.layers.noise import GaussianDropout, AlphaDropout 22 | 23 | from .. import compat 24 | if compat.COMPATIBLE_MODE['1.12']: 25 | from tensorflow.python.keras.engine.base_layer import InputSpec 26 | else: 27 | from tensorflow.python.keras.engine.input_spec import InputSpec 28 | 29 | class InstanceGaussianNoise(Layer): 30 | """Apply additive zero-centered Gaussian noise. 31 | This is useful to mitigate overfitting 32 | (you could see it as a form of random data augmentation). 33 | Gaussian Noise (GS) is a natural choice as corruption process 34 | for real valued inputs. 35 | As it is a regularization layer, it is only active at training time. 36 | Different from tf.keras.layers.GaussianNoise, in this method, we 37 | add the noise in the instance normalized space: 38 | `output = std * ( (input-mean) / std + N(0, eps) ) + mean`. 39 | where `eps ~ U(0, alpha)`. So the noise strength would be scale- 40 | invariant to the input data. 41 | # Arguments 42 | axis: Integer, the axis that should be normalized 43 | (typically the features axis). 44 | For instance, after a `Conv2D` layer with 45 | `data_format="channels_first"`, 46 | set `axis=1` in `InstanceGaussianNoise`. 47 | Setting `axis=None` will normalize all values in each 48 | instance of the batch (Layer Normalization). 49 | Axis 0 is the batch dimension. `axis` cannot be set to 0 50 | to avoid errors. 51 | alpha: float, maximal standard deviation of the noise 52 | distribution. For example, when alpha = 0.3, it means 53 | the noise would be at most 30% of the input. 54 | epsilon: Small float added to variance to avoid dividing by 55 | zero. 56 | # Input shape 57 | Arbitrary. Use the keyword argument `input_shape` 58 | (tuple of integers, does not include the samples axis) 59 | when using this layer as the first layer in a model. 60 | # Output shape 61 | Same shape as input. 62 | """ 63 | def __init__(self, 64 | axis=None, 65 | alpha=0.3, 66 | epsilon=1e-3, 67 | **kwargs): 68 | super(InstanceGaussianNoise, self).__init__(**kwargs) 69 | self.supports_masking = True 70 | self.axis = axis 71 | self.alpha = alpha 72 | self.epsilon = epsilon 73 | 74 | def build(self, input_shape): 75 | ndim = len(input_shape) 76 | if self.axis == 0: 77 | raise ValueError('Axis cannot be zero') 78 | 79 | if (self.axis is not None) and (ndim == 2): 80 | raise ValueError('Cannot specify axis for rank 1 tensor') 81 | 82 | self.input_spec = InputSpec(ndim=ndim) 83 | 84 | if self.axis is None: 85 | shape = (1,) 86 | else: 87 | shape = (input_shape[self.axis],) 88 | 89 | self.built = True 90 | 91 | def call(self, inputs, training=None): 92 | input_shape = K.int_shape(inputs) 93 | reduction_axes = list(range(0, len(input_shape))) 94 | 95 | if self.axis is not None: 96 | del reduction_axes[self.axis] 97 | 98 | del reduction_axes[0] 99 | 100 | mean = K.mean(inputs, reduction_axes, keepdims=True) 101 | stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon 102 | normed = (inputs - mean) / stddev 103 | 104 | def noised(): 105 | eps = K.random_uniform(shape=[1], maxval=self.alpha) 106 | return inputs + K.random_normal(shape=K.shape(inputs), 107 | mean=0., 108 | stddev=eps) 109 | get_noised = K.in_train_phase(noised, normed, training=training) 110 | 111 | retrived = stddev * get_noised + mean 112 | return retrived 113 | 114 | def compute_output_shape(self, input_shape): 115 | return input_shape 116 | 117 | def get_config(self): 118 | config = { 119 | 'axis': self.axis, 120 | 'alpha': self.alpha 121 | } 122 | base_config = super(InstanceGaussianNoise, self).get_config() 123 | return dict(list(base_config.items()) + list(config.items())) 124 | 125 | def return_dropout(dropout_type, dropout_rate, axis=-1, rank=None): 126 | if dropout_type is None: 127 | return None 128 | elif dropout_type == 'plain': 129 | return Dropout(rate=dropout_rate) 130 | elif dropout_type == 'add': 131 | return InstanceGaussianNoise(axis=axis, alpha=dropout_rate) 132 | elif dropout_type == 'mul': 133 | return GaussianDropout(rate=dropout_rate) 134 | elif dropout_type == 'alpha': 135 | return AlphaDropout(rate=dropout_rate) 136 | elif dropout_type == 'spatial': 137 | if axis == 1: 138 | dformat = 'channels_first' 139 | else: 140 | dformat = 'channels_last' 141 | if rank == 1: 142 | return SpatialDropout1D(rate=dropout_rate) 143 | elif rank == 2: 144 | return SpatialDropout2D(rate=dropout_rate, data_format=dformat) 145 | elif rank == 3: 146 | return SpatialDropout3D(rate=dropout_rate, data_format=dformat) 147 | else: 148 | return None 149 | else: 150 | return None -------------------------------------------------------------------------------- /functions/losses.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Functions - Losses 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Extend loss functions. These functions could serve as both 10 | # losses and metrics. 11 | # Version: 0.10 # 2019/6/13 12 | # Comments: 13 | # Create this submodule, and finish linear_jaccard_loss 14 | # and lovasz_jaccard_loss. 15 | ################################################################ 16 | ''' 17 | 18 | from tensorflow.python.keras import backend as K 19 | from tensorflow.python.keras import losses 20 | from tensorflow.python.ops import sort_ops 21 | from tensorflow.python.ops import array_ops 22 | from tensorflow.python.ops import math_ops 23 | from tensorflow.python.ops import functional_ops 24 | from tensorflow.python.ops import gen_math_ops 25 | from tensorflow.python.ops import gen_array_ops 26 | 27 | from .others import get_channels 28 | 29 | from functools import reduce 30 | def _get_prod(x): 31 | try: 32 | return reduce(lambda a,b:a*b, x) 33 | except TypeError: 34 | return x 35 | 36 | def linear_jaccard_loss(y_true, y_pred, data_format=None): 37 | '''Simple linear approximation for Jaccard index, 38 | or Intersection over Union (IoU). (loss) 39 | This function is a simple and linear approximation for IoU. The main idea is: 40 | 1. logical_and(y_true * y_pred) could be approximated by y_true * y_pred; 41 | 2. logical_or(y_true * y_pred) could be approximated by 42 | y_true + y_pred - y_true * y_pred. 43 | Such an approximation could ensure that when both y_true and y_pred are 44 | binary, this approximation would returns the exact same value compared to 45 | the original metric, IoU. 46 | It has been proved that when both x, y in [0, 1], there is 47 | x * y < x + y - x * y. 48 | To learn more about IoU, please check mdnt.metrics.jaccard_index. 49 | This function is implemented by: 50 | appx_jacc = 1 - [ sum(y_true * y_pred) ] / [ sum(y_true + y_pred - y_true * y_pred) ] 51 | We use unsafe division in the above equation. When x / y = 0, the unsafe division would 52 | returns 0. 53 | NOTE THAT THIS IMPLEMENTATION IS THE COMPLEMENTARY OF JACCARD INDEX. 54 | Arguments: 55 | data_format: 'channels_first' or 'channels_last'. The default setting is generally 56 | 'channels_last' like other tf.keras APIs. 57 | Input: 58 | y_true: label, tensor in any shape, should have at least 3 axes. 59 | y_pred: prediction, tensor in any shape, should have at least 3 axes. 60 | Output: 61 | scalar, the approximated and complementary mean Jaccard index between y_true and 62 | y_pred over all channels. 63 | ''' 64 | get_reduced_axes = get_channels(y_true, data_format) 65 | get_mul = y_true * y_pred 66 | valNumer = math_ops.reduce_sum(get_mul, axis=get_reduced_axes) 67 | valDomin = math_ops.reduce_sum(y_true + y_pred - get_mul, axis=get_reduced_axes) 68 | return 1-math_ops.reduce_mean(math_ops.div_no_nan(valNumer, valDomin)) 69 | 70 | def _lovasz_jaccard_flat(errors, y_true): 71 | '''PRIVATE: calculate lovasz extension for jaccard index along a vector. 72 | Input: 73 | errors: error vector (should be in 0~1). 74 | y_true: labels. 75 | Output: 76 | scalar: the jaccard index calculated on the input vector. 77 | ''' 78 | p = errors.get_shape().as_list() 79 | if len(p) != 1: 80 | raise ValueError('Input should be vectors (1D).') 81 | p = p[0] 82 | bin_y_true = math_ops.cast(gen_math_ops.greater(y_true, 0.5), dtype=errors.dtype) 83 | error_ind = sort_ops.argsort(errors, direction='DESCENDING') 84 | sorted_errors = array_ops.gather(errors, error_ind) 85 | sorted_labels = array_ops.gather(bin_y_true, error_ind) 86 | get_sum = math_ops.reduce_sum(sorted_labels) 87 | intersection = get_sum - math_ops.cumsum(sorted_labels) 88 | union = get_sum + math_ops.cumsum(1.0 - sorted_labels) 89 | g = 1.0 - math_ops.div_no_nan(intersection, union) 90 | if p > 1: 91 | g = array_ops.concat((g[0:1], g[1:] - g[:-1]), axis=0) 92 | return math_ops.reduce_sum(sorted_errors*gen_array_ops.stop_gradient(g)) 93 | 94 | def lovasz_jaccard_loss(y_true, y_pred, error_func=None, data_format=None): 95 | '''Lovasz extension for Jaccard index, or Intersection over Union (IoU). (loss) 96 | This function applies the theory of Lovasz extension. Although Lovasz extension could 97 | be used on any submodular set function, the implementation is aimed at constructing 98 | the trainable complementary of IoU. 99 | To learn more about this topic, please refer: 100 | The Lovasz-Softmax loss: A tractable surrogate for the optimization of the 101 | intersection-over-union measure in neural networks 102 | https://arxiv.org/abs/1705.08790 103 | This implementation is not adapted from the author's github codes. It computes the 104 | Lovasz loss on each channel of each sample independently, and then calculate the 105 | average value. 106 | NOTE THAT THIS IMPLEMENTATION IS THE COMPLEMENTARY OF JACCARD INDEX. 107 | Arguments: 108 | error_func: the function that is used to calculate errors. If set None, would use 109 | L1 norm (linear interpolation). 110 | data_format: 'channels_first' or 'channels_last'. The default setting is generally 111 | 'channels_last' like other tf.keras APIs. 112 | Input: 113 | y_true: label, tensor in any shape, should have at least 3 axes. 114 | y_pred: prediction, tensor in any shape, should have at least 3 axes. 115 | Output: 116 | scalar, the approximated and complementary mean Jaccard index between y_true and 117 | y_pred over all channels. 118 | ''' 119 | get_shapes = y_true.get_shape().as_list() 120 | get_dims = len(get_shapes) 121 | if get_dims < 3: 122 | raise ValueError('The input tensor should has channel dimension, i.e. it should have at least 3 axes.') 123 | if data_format is None: 124 | data_format = K.image_data_format() 125 | if data_format == 'channels_last': 126 | get_permute_axes = (0, get_dims-1, *range(1, get_dims-1)) 127 | get_length = _get_prod(get_shapes[1:-1]) 128 | y_true = array_ops.transpose(y_true, perm=get_permute_axes) # switch to channels_first 129 | y_pred = array_ops.transpose(y_pred, perm=get_permute_axes) 130 | else: 131 | get_length = _get_prod(get_shapes[2:]) 132 | y_true = gen_array_ops.reshape([-1, get_length]) 133 | y_pred = gen_array_ops.reshape([-1, get_length]) 134 | if error_func is None: 135 | error_func = losses.mean_absolute_error 136 | def split_process(inputs): 137 | get_y_true, get_y_pred = inputs 138 | get_errors = error_func(get_y_true, get_y_pred) 139 | return _lovasz_jaccard_flat(get_errors, get_y_true) 140 | get_losses = functional_ops.map_fn(split_process, (y_true, y_pred), dtype=y_pred.dtype) 141 | return math_ops.reduce_mean(get_losses) -------------------------------------------------------------------------------- /layers/deprecated/external.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Layers - External API layer (deprecated) 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # An abstract utility for introducing the outside API into the 10 | # tf-keras architecture. 11 | # Version: 0.20 # 2019/5/23 12 | # Comments: 13 | # Add class 'External' to this submodule. 14 | # Version: 0.10 # 2019/3/23 15 | # Comments: 16 | # Create this submodule. 17 | ################################################################ 18 | ''' 19 | 20 | import uuid 21 | from tensorflow.python.eager import context 22 | from tensorflow.python.framework import common_shapes 23 | from tensorflow.python.framework import ops 24 | from tensorflow.python.framework import tensor_shape 25 | from tensorflow.python.framework import dtypes 26 | from tensorflow.python.keras import activations 27 | from tensorflow.python.keras import backend as K 28 | from tensorflow.python.keras import constraints 29 | from tensorflow.python.keras import initializers 30 | from tensorflow.python.keras import regularizers 31 | from tensorflow.python.keras.engine.base_layer import Layer 32 | from tensorflow.python.ops import gen_math_ops 33 | from tensorflow.python.ops import nn 34 | from tensorflow.python.ops import standard_ops 35 | from tensorflow.python.ops import script_ops 36 | from tensorflow.python.ops import variables 37 | from tensorflow.python.keras.utils import tf_utils 38 | 39 | from .. import compat 40 | if compat.COMPATIBLE_MODE['1.12']: 41 | from tensorflow.python.keras.engine.base_layer import InputSpec 42 | else: 43 | from tensorflow.python.keras.engine.input_spec import InputSpec 44 | 45 | def dtype_serialize(input_dtypes): 46 | if isinstance(input_dtypes, list): 47 | return [dtypes.as_dtype(get_dt).as_datatype_enum() for get_dt in input_dtypes] 48 | else: 49 | return [dtypes.as_dtype(input_dtypes).as_datatype_enum()] 50 | 51 | def dtype_get(input_serials): 52 | if isinstance(input_serials, list): 53 | return [dtypes.as_dtype(get_dt) for get_dt in input_serials] 54 | else: 55 | return [dtypes.as_dtype(input_serials)] 56 | 57 | class External(Layer): 58 | """External API layer. 59 | `External` is used to introduce a non-parameter function from an 60 | external library and enable it to participate the learning workflow. 61 | Therefore, this layer is requires users to provide: 62 | 1. The forward propagation function `forward()`. 63 | 2. The back propagation function `backward()`. 64 | Arguments: 65 | forward: the forward propagating function. 66 | backward: the back propagation function. 67 | Tin: a list of input tf.DType. 68 | Tout: a list of output tf.DType. 69 | stateful: a bool flag used to define whether the forward/backward 70 | function is calculated based on previous calculation. 71 | output_shape: a tf.TensorShape, a tuple/list or a function. It is 72 | used for estimating the output shape fast. 73 | Input shape: 74 | nD tensor with shape: `(batch_size, ..., output_dim_of_tied_layer)`. 75 | The most common situation would be 76 | a 2D input with shape `(batch_size, output_dim_of_tied_layer)`. 77 | Output shape: 78 | nD tensor with shape: `(batch_size, ..., units)`. 79 | For instance, for a 2D input with shape 80 | `(batch_size, input_dim_of_tied_layer)`, 81 | the output would have shape `(batch_size, input_dim_of_tied_layer)`. 82 | """ 83 | 84 | def __init__(self, 85 | forward, 86 | backward, 87 | Tin, 88 | Tout, 89 | output_shape=None, 90 | id=None, 91 | **kwargs): 92 | if 'input_shape' not in kwargs and 'input_dim' in kwargs: 93 | kwargs['input_shape'] = (kwargs.pop('input_dim'),) 94 | 95 | super(External, self).__init__(**kwargs) 96 | self.forward = activations.get(forward) 97 | self.backward = activations.get(backward) 98 | self.Tin = dtype_get(Tin) 99 | self.Tout = dtype_get(Tout) 100 | 101 | if id is None: # id is used to tag the newly created instance 102 | self._id = 'PyExternal' + str(uuid.uuid4()) 103 | else: 104 | self._id = id 105 | 106 | if output_shape is None: 107 | raise NotImplementedError('We could not automatically infer ' 108 | 'the static shape of the External\'s output.' 109 | ' Please specify the `output_shape` for' 110 | ' this External.') 111 | else: 112 | self._output_shape = activations.get(output_shape) 113 | 114 | self.supports_masking = True 115 | 116 | def backward_tensor(self, op, *grad): 117 | x = op.inputs 118 | return script_ops.py_func(self.backward, [*x, *grad], self.Tin, name=self.name+'Grad') 119 | 120 | def call(self, inputs): 121 | if isinstance(inputs, list): 122 | inputs = [ops.convert_to_tensor(one_input) for one_input in inputs] 123 | else: 124 | inputs = [ops.convert_to_tensor(inputs)] 125 | # Register and override the gradients 126 | ops.RegisterGradient(self._id)(self.backward_tensor) 127 | g = ops.get_default_graph() 128 | with g.gradient_override_map({"PyFunc": self._id, "pyfunc_0": self._id, "PyFuncStateless": self._id}): 129 | res = script_ops.py_func(self.forward, inputs, self.Tout, name=self.name) 130 | oshape = self._output_shape([inp.get_shape() for inp in inputs]) 131 | if isinstance(res, list): 132 | for i in range(len(res)): 133 | res[i].set_shape(oshape[i]) 134 | return res 135 | 136 | @tf_utils.shape_type_conversion 137 | def compute_output_shape(self, input_shape): 138 | if self._output_shape is None: 139 | raise NotImplementedError('We could not automatically infer ' 140 | 'the static shape of the External\'s output.' 141 | ' Please specify the `output_shape` for' 142 | ' this External.') 143 | else: 144 | shape = self._output_shape(input_shape) 145 | if not isinstance(shape, (list, tuple)): 146 | raise ValueError( 147 | '`output_shape` function must return a tuple or a list of tuples.') 148 | # List here can represent multiple outputs or single output. 149 | if isinstance(shape, list): 150 | # Convert list representing single output into a tuple. 151 | if isinstance(shape[0], (int, type(None))): 152 | shape = tuple(shape) 153 | else: 154 | return [ 155 | tensor_shape.TensorShape(single_shape) for single_shape in shape 156 | ] 157 | return tensor_shape.TensorShape(shape) 158 | 159 | def get_config(self): 160 | config = { 161 | 'forward': activations.serialize(self.forward), 162 | 'backward': activations.serialize(self.backward), 163 | 'Tin': dtype_serialize(self.Tin), 164 | 'Tout': dtype_serialize(self.Tout), 165 | 'output_shape': activations.serialize(self._output_shape), 166 | 'id': self._id, 167 | } 168 | base_config = super(External, self).get_config() 169 | return dict(list(base_config.items()) + list(config.items())) 170 | -------------------------------------------------------------------------------- /layers/external.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Layers - External API layer 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # An abstract utility for introducing the outside API into the 10 | # tf-keras architecture. 11 | # Version: 0.25 # 2019/11/27 12 | # Comments: 13 | # Fix a bug for 'External' when having multiple inputs. 14 | # Version: 0.20 # 2019/5/23 15 | # Comments: 16 | # Add class 'External' to this submodule. 17 | # Version: 0.10 # 2019/3/23 18 | # Comments: 19 | # Create this submodule. 20 | ################################################################ 21 | ''' 22 | 23 | from tensorflow.python.framework import ops 24 | from tensorflow.python.framework import tensor_shape 25 | from tensorflow.python.framework import dtypes 26 | from tensorflow.python.keras import activations 27 | from tensorflow.python.keras import backend as K 28 | from tensorflow.python.keras.engine.base_layer import Layer 29 | from tensorflow.python.ops import array_ops 30 | from tensorflow.python.ops import script_ops 31 | from tensorflow.python.ops import custom_gradient 32 | from tensorflow.python.keras.utils import tf_utils 33 | 34 | def dtype_serialize(input_dtypes): 35 | if isinstance(input_dtypes, list): 36 | return [dtypes.as_dtype(get_dt).as_datatype_enum for get_dt in input_dtypes] 37 | else: 38 | return [dtypes.as_dtype(input_dtypes).as_datatype_enum] 39 | 40 | def dtype_get(input_serials): 41 | if isinstance(input_serials, list): 42 | return [dtypes.as_dtype(get_dt) for get_dt in input_serials] 43 | else: 44 | return [dtypes.as_dtype(input_serials)] 45 | 46 | class PyExternal(Layer): 47 | """External API layer for generic python function. 48 | `PyExternal` is used to introduce a non-parameter function from an 49 | external library and enable it to participate the learning workflow. 50 | Therefore, this layer is requires users to provide: 51 | 1. The forward propagation function `forward()`. 52 | 2. The back propagation function `backward()`. 53 | 3. The shape calculation function `output_shape()`. 54 | Arguments: 55 | forward: the forward propagating function. It serves as 56 | `y=F(x)`, where `x` may be a list of multiple inputs. 57 | backward: the back propagation function. It serves as 58 | `dx=B(...)`, where the input of this function is 59 | determined by `xEnable`, `yEnable`, `dyEnable`. 60 | Tin: a list of input tf.DType. 61 | Tout: a list of output tf.DType. 62 | output_shape: a tf.TensorShape, a tuple/list or a function. It is 63 | used for estimating the output shape fast. 64 | xEnable, 65 | yEnable, 66 | dyEnable: enable users to customize the input of the backward 67 | function. If only the `xEnable` is `True`, the input 68 | of the function would be `B(x)`, For another example, 69 | if only both `yEnable` and `dyEnable` are `True`, the 70 | input of the function would be `B(y, dy)`. 71 | Input shape: 72 | nD tensor with shape: `(batch_size, ..., output_dim_of_tied_layer)`. 73 | The most common situation would be 74 | a 2D input with shape `(batch_size, output_dim_of_tied_layer)`. 75 | Output shape: 76 | nD tensor with shape: `(batch_size, ..., units)`. 77 | For instance, for a 2D input with shape 78 | `(batch_size, input_dim_of_tied_layer)`, 79 | the output would have shape `(batch_size, input_dim_of_tied_layer)`. 80 | """ 81 | 82 | def __init__(self, 83 | forward, 84 | backward, 85 | output_shape, 86 | Tin, 87 | Tout, 88 | xEnable=True, 89 | yEnable=False, 90 | dyEnable=True, 91 | **kwargs): 92 | if 'input_shape' not in kwargs and 'input_dim' in kwargs: 93 | kwargs['input_shape'] = (kwargs.pop('input_dim'),) 94 | 95 | super(PyExternal, self).__init__(**kwargs) 96 | self.forward = activations.get(forward) 97 | self.backward = activations.get(backward) 98 | self.Tin = dtype_get(Tin) 99 | self.Tout = dtype_get(Tout) 100 | self.xEnable = xEnable 101 | self.yEnable = yEnable 102 | self.dyEnable = dyEnable 103 | 104 | if output_shape is None: 105 | raise NotImplementedError('We could not automatically infer ' 106 | 'the static shape of the PyExternal\'s output.' 107 | ' Please specify the `output_shape` for' 108 | ' this PyExternal.') 109 | else: 110 | self._output_shape = activations.get(output_shape) 111 | 112 | self.supports_masking = True 113 | 114 | def call(self, inputs): 115 | if isinstance(inputs, list): 116 | inputs = [ops.convert_to_tensor(one_input) for one_input in inputs] 117 | else: 118 | inputs = [ops.convert_to_tensor(inputs)] 119 | 120 | # Define ops with first-order gradients 121 | @custom_gradient.custom_gradient 122 | def _external_func(*x): 123 | y = script_ops.eager_py_func(self.forward, x, self.Tout, name='pyfunc') 124 | def _external_func_grad(*grad): 125 | iList = [] 126 | if self.xEnable: 127 | iList.extend(x) 128 | if self.yEnable: 129 | if isinstance(y, (list, tuple)): 130 | iList.extend(y) 131 | else: 132 | iList.append(y) 133 | if self.dyEnable: 134 | iList.extend(grad) 135 | return script_ops.eager_py_func(self.backward, iList, self.Tin) 136 | return y, _external_func_grad 137 | 138 | res = _external_func(*inputs) 139 | oshape = self._output_shape([inp.get_shape() for inp in inputs]) 140 | if isinstance(res, list): 141 | for i in range(len(res)): 142 | res[i].set_shape(oshape[i]) 143 | return res 144 | 145 | @tf_utils.shape_type_conversion 146 | def compute_output_shape(self, input_shape): 147 | shape = self._output_shape(input_shape) 148 | if not isinstance(shape, (list, tuple)): 149 | raise ValueError( 150 | '`output_shape` function must return a tuple or a list of tuples.') 151 | # List here can represent multiple outputs or single output. 152 | if isinstance(shape, list): 153 | # Convert list representing single output into a tuple. 154 | if isinstance(shape[0], (int, type(None))): 155 | shape = tuple(shape) 156 | else: 157 | return [ 158 | tensor_shape.TensorShape(single_shape) for single_shape in shape 159 | ] 160 | return tensor_shape.TensorShape(shape) 161 | 162 | def compute_mask(self, inputs, mask=None): 163 | if mask is None: 164 | return None 165 | if (not isinstance(inputs, list)) and (not isinstance(mask, list)): 166 | return super(PyExternal, self).compute_mask(inputs=inputs, mask=mask) 167 | if not isinstance(mask, list): 168 | raise ValueError('`mask` should be a list.') 169 | if not isinstance(inputs, list): 170 | raise ValueError('`inputs` should be a list.') 171 | if len(mask) != len(inputs): 172 | raise ValueError('The lists `inputs` and `mask` ' 173 | 'should have the same length.') 174 | if all(m is None for m in mask): 175 | return None 176 | masks = [array_ops.expand_dims(m, axis=0) for m in mask if m is not None] 177 | return K.all(K.concatenate(masks, axis=0), axis=0, keepdims=False) 178 | 179 | def get_config(self): 180 | config = { 181 | 'forward': activations.serialize(self.forward), 182 | 'backward': activations.serialize(self.backward), 183 | 'Tin': dtype_serialize(self.Tin), 184 | 'Tout': dtype_serialize(self.Tout), 185 | 'output_shape': activations.serialize(self._output_shape), 186 | 'xEnable': self.xEnable, 187 | 'yEnable': self.yEnable, 188 | 'dyEnable': self.dyEnable 189 | } 190 | base_config = super(PyExternal, self).get_config() 191 | return dict(list(base_config.items()) + list(config.items())) 192 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Modern Deep Network Toolkits for Tensorflow-Keras 4 | # Yuchen Jin @ cainmagi@gmail.com 5 | # Requirements: (Pay attention to version) 6 | # python 3.6+ 7 | # tensorflow r1.13+ 8 | # This is a pakage for extending the tensorflow-keras to modern 9 | # deep network design. It would introduce some state-of-art 10 | # network blocks, data parsing utilities, logging modules and 11 | # more extensions. 12 | # Loading this module would not cause conflictions on other 13 | # modules (if users do not use `from mdnt import *` to override 14 | # utilites from other modules. However, it will provide some 15 | # tools with the same/similar name and functions compared to 16 | # plain tensorflow-keras. 17 | # Version: 0.80 # 2020/08/30 18 | # Comments: 19 | # 1. Extend the compatible mode. 20 | # 2. Fix bugs and add features in .utilities.draw. 21 | # 3. Add features in .data.h5py 22 | # 4. Fix bugs in .layers.activation. 23 | # Version: 0.79 # 2020/02/10 24 | # Comments: 25 | # 1. Finish H5Converter in .data. 26 | # Version: 0.78-b # 2019/12/05 27 | # Comments: 28 | # 1. Fix some bugs and add features in .utilities.draw. 29 | # 2. Add webfiles.zip for .utilities.tboard. 30 | # 3. Fix a small bug in .utilities. 31 | # Version: 0.78 # 2019/11/27 32 | # Comments: 33 | # 1. Enhance the save_model/load_model for supportting 34 | # storing/recovering customized loss/metric class. 35 | # 2. Finish the submodule .utilities.draw for providing 36 | # extended visualizations. 37 | # 3. Finish the submodule .utilities.tboard for providing 38 | # extended tensorboard interfaces. 39 | # 4. Fix some bugs. 40 | # Version: 0.73-b # 2019/10/27 41 | # Comments: 42 | # 1. Let save_model support compression. 43 | # 2. Revise the optional arguments for RestrictSub in 44 | # .layers. 45 | # Version 0.73 # 2019/10/24 46 | # Comments: 47 | # 1. Fix a bug for H5GCombiner in .data. 48 | # 2. Finish H5VGParser in .data. 49 | # 3. Finish ExpandDims in .layers. 50 | # 4. Enable ModelCheckpoint in .utilities.callbacks to 51 | # support the option for not saving optimizer. 52 | # Version 0.72 # 2019/10/22 53 | # Comments: 54 | # 1. Fix a bug for Ghost in .layers. 55 | # 2. Finish Slice, Restrict and RestrictSub in .layers. 56 | # Version 0.70 # 2019/10/15 57 | # Comments: 58 | # 1. Let save_model/load_model support storing/recovering 59 | # variable loss weights. 60 | # 2. Finish LossWeightsScheduler in .utilities.callbacks. 61 | # Version 0.69-b # 2019/10/7 62 | # Comments: 63 | # Enable the H5SupSaver to add more data to an existed file. 64 | # Version 0.69 # 2019/9/10 65 | # Comments: 66 | # Enable the H5SupSaver in .data to expand if data is dumped 67 | # in series. 68 | # Version 0.68 # 2019/6/27 69 | # Comments: 70 | # 1. Finish MNadam, Adabound and Nadabound in .optimizers. 71 | # 2. Slightly change .optimizers.mixture. 72 | # 3. Change the quick interface in .optimizers. 73 | # Version 0.64-b # 2019/6/26 74 | # Comments: 75 | # 1. Finish the demo version for SWATS in .optimizers. 76 | # 2. Fix a small bug for .load_model 77 | # Version 0.64 # 2019/6/24 78 | # Comments: 79 | # 1. Finish ModelWeightsReducer in .utilities.callbacks. 80 | # 2. Finish Ghost in .layers. 81 | # 3. Fix small bugs. 82 | # Version 0.63 # 2019/6/23 83 | # Comments: 84 | # 1. Fix the bugs of manually switched optimizers in 85 | # .optimizers. Now they require to be used with a callback 86 | # or switch the phase by switch(). 87 | # 2. Add a plain momentum SGD optimizer to fast interface in 88 | # .optimizers. 89 | # 3. Finish OptimizerSwitcher in .utilities.callbacks. It 90 | # is used to control the phase of the manually swtiched 91 | # optimizers. 92 | # 4. Improve the efficiency for Adam2SGD and NAdam2NSGD in 93 | # .optimizers. 94 | # Version 0.62 # 2019/6/21 95 | # Comments: 96 | # 1. Finish the manually switched optimizers in .optimizers: 97 | # Adam2SGD and NAdam2NSGD. Both of them supports amsgrad 98 | # mode. 99 | # 2. Adjust the fast interface .optimizers.optimizer. Now 100 | # it supports 2 more tensorflow based optimizers and the 101 | # default momentum of Nesterov SGD optimizer is changed 102 | # to 0.9. 103 | # Version 0.60-b # 2019/6/20 104 | # Comments: 105 | # 1. Fix some bugs in .layers.conv and .layers.unit. 106 | # 2. Remove the normalization layer from all projection 107 | # branches in .layers.residual and .layers.inception. 108 | # Version 0.60 # 2019/6/19 109 | # Comments: 110 | # 1. Support totally new save_model and load_model APIs in 111 | # .utilites. 112 | # 2. Finish ModelCheckpoint in .utilities.callbacks. 113 | # Version: 0.56 # 2019/6/13 114 | # Comments: 115 | # Finish losses.linear_jaccard_index, 116 | # losses.lovasz_jaccard_loss, 117 | # metrics.signal_to_noise, 118 | # metrics.correlation, 119 | # metrics.jaccard_index 120 | # in .functions (may require tests in the future). 121 | # Version: 0.54 # 2019/6/12 122 | # Comments: 123 | # 1. Add dropout options to all advanced blocks (including 124 | # residual, ResNeXt, inception, incept-res and incept- 125 | # plus). 126 | # 2. Strengthen the compatibility. 127 | # 3. Fix minor bugs for spatial dropout in 0.50-b. 128 | # 4. Thanks to GOD! .layers has been finished, although it 129 | # may require modification in the future. 130 | # Version: 0.50-b # 2019/6/11 131 | # Comments: 132 | # 1. Fix a bug for implementing the channel_first mode for 133 | # AConv in .layers. 134 | # 2. Finish InstanceGaussianNoise in .layers. 135 | # 3. Prepare the test for adding dropout to residual layers 136 | # in .layers. 137 | # Version: 0.50 # 2019/6/11 138 | # Comments: 139 | # 1. Finish Conv1DTied, Conv2DTied, Conv3DTied in .layers. 140 | # 2. Switch back to the 0.48 version for .layers.DenseTied 141 | # APIs because testing show that the modification in 142 | # 0.48-b will cause bugs. 143 | # Version: 0.48-b # 2019/6/10 144 | # Comments: 145 | # A Test on replacing the .layers.DenseTied APIs like 146 | # tf.keras.layers.Wrappers. 147 | # Version: 0.48 # 2019/6/9 148 | # Comments: 149 | # 1. Finish Inceptplus1D, Inceptplus2D, Inceptplus3D, 150 | # Inceptplus1DTranspose, Inceptplus2DTranspose, 151 | # Inceptplus3DTranspose in .layers. 152 | # 2. Minor changes for docstrings and default settings in 153 | # .layers.inception. 154 | # Version: 0.45-b # 2019/6/7 155 | # Comments: 156 | # 1. Enable the ResNeXt to estimate the latent group and local 157 | # filter number. 158 | # 2. Make a failed try on implementing quick group convolution, 159 | # testing results show that using tf.nn.depthwise_conv2d 160 | # to replace multiple convND ops would cause the computation 161 | # to be even slower. 162 | # Version: 0.45 # 2019/6/6 163 | # Comments: 164 | # 1. Enable Modern convolutional layers to work with group 165 | # convolution. 166 | # 2. Reduce the memory consumption for network construction 167 | # when using ResNeXt layers in case of out of memory (OOM) 168 | # problems. 169 | # 3. Fix a minor bug for group convolution. 170 | # Version: 0.42 # 2019/6/5 171 | # Comments: 172 | # 1. Add GroupConv1D, GroupConv2D, GroupConv3D in .layers. 173 | # 2. Fix the bugs in channel detections for residual and 174 | # inception layers. 175 | # Version: 0.40 # 2019/6/5 176 | # Comments: 177 | # 1. Finish Resnext1D, Resnext2D, Resnext3D, 178 | # Resnext1DTranspose, Resnext2DTranspose, 179 | # Resnext3DTranspose in .layers. 180 | # 2. Fix the repeating biases problems in inception-residual 181 | # layers. 182 | # Version: 0.38 # 2019/6/4 183 | # Comments: 184 | # 1. Finish Inceptres1D, Inceptres2D, Inceptres3D, 185 | # Inceptres1DTranspose, Inceptres2DTranspose, 186 | # Inceptres3DTranspose in .layers. 187 | # 2. Fix some bugs and revise docstrings for .layers.residual and 188 | # .layers.inception. 189 | # Version: 0.36 # 2019/6/1 190 | # Comments: 191 | # Finish Inception1D, Inception2D, Inception3D, 192 | # Inception1DTranspose, Inception2DTranspose, 193 | # Inception3DTranspose in .layers. 194 | # Version: 0.32 # 2019/5/31 195 | # Comments: 196 | # Finish Residual1D, Residual2D, Residual3D, Residual1DTranspose, 197 | # Residual2DTranspose, Residual3DTranspose in .layers. 198 | # Version: 0.28 # 2019/5/24 199 | # Comments: 200 | # 1. Fix the bug about padding for transposed dilation 201 | # convolutional layers. 202 | # 2. Add a new option output_mshape to help transposed 203 | # convolutional layers to control the desired output shape. 204 | # 3. Finish PyExternal in .layers. 205 | # Version: 0.24 # 2019/3/31 206 | # Comments: 207 | # Finish H5GCombiner in .data. 208 | # Version: 0.23 # 2019/3/26 209 | # Comments: 210 | # 1. Use keras.Sequence() to redefine H5GParser and 211 | # H5HGParser. 212 | # 2. Add compatible check. 213 | # Version: 0.22 # 2019/3/26 214 | # Comments: 215 | # Adjust the .data.h5py module to make it more generalized. 216 | # Version: 0.20 # 2019/3/26 217 | # Comments: 218 | # Finish H5HGParser, H5SupSaver, H5GParser in .data. 219 | # Finish DenseTied, InstanceNormalization, GroupNormalization, 220 | # AConv1D, AConv2D, AConv3D, AConv1DTranspose, 221 | # AConv2DTranspose, AConv3DTranspose in .layers. 222 | # Version: 0.10 # 2019/3/23 223 | # Comments: 224 | # Create this project. 225 | ################################################################ 226 | ''' 227 | 228 | # Import sub-modules 229 | from . import optimizers 230 | from . import layers 231 | from . import data 232 | from . import functions 233 | from . import utilities 234 | 235 | __version__ = '0.80' 236 | 237 | # Alias 238 | save_model = utilities.save_model 239 | load_model = utilities.load_model 240 | 241 | __all__ = [ 242 | 'optimizers', 'layers', 'data', 'functions', 'utilities', 243 | 'save_model', 'load_model' 244 | ] 245 | 246 | # Set this local module as the prefered one 247 | from pkgutil import extend_path 248 | __path__ = extend_path(__path__, __name__) 249 | 250 | # Delete private sub-modules and objects 251 | del extend_path -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Modern Deep Network Toolkits for Tensorflow-Keras 2 | 3 | We proudly present our newest produce, a totally well-defined extension for Tensorflow-Keras users! 4 | 5 | ## Documentation 6 | 7 | Still not available now, will implement in the future. 8 | 9 | ## Progress 10 | 11 | Now we have such progress on the semi-product: 12 | 13 | - [x] optimzers: 14 | - [x] Manually switched optimizers (`Adam2SGD` and `NAdam2NSGD`). 15 | - [x] Automatically switched optimizer (`SWATS`). 16 | - [x] Advanced adaptive optimizers ( `Adabound`, `Nadabound` and `MNadam` supporting `amsgrad`). 17 | - [x] Wrapped default optimizers. 18 | - [x] layers: 19 | - [x] Ghost layer (used to construct trainable input layer). 20 | - [x] Tied dense layer for the symmetric autoencoder. 21 | - [x] Extended dropout and noise layers. 22 | - [x] Extended activation layers. 23 | - [x] Extended normalization layers. 24 | - [x] Group convolutional layers. 25 | - [x] Modern convolutional layers (support group convolution). 26 | - [x] Modern transposed convolutional layers (support group convolution). 27 | - [x] Tied (trivial) transposed convolutional layers for the symmetric autoencoder. 28 | - [x] Residual layers (or blocks) and their transposed versions. 29 | - [x] ResNeXt layers (or blocks) and their transposed versions. 30 | - [x] Inception-v4 layers (or blocks) and their transposed versions. 31 | - [x] InceptionRes-v2 layers (or blocks) and their transposed versions. 32 | - [x] InceptionPlus layers (or blocks) and their transposed versions. 33 | - [x] External interface for using generic python function. 34 | - [x] Droupout method options for all avaliable modern layers. 35 | - [ ] data: 36 | - [x] Basic h5py (HDF5) IO handles. 37 | - [ ] Basic SQLite IO handles. 38 | - [ ] Basic Bcolz IO handles. 39 | - [ ] Basic CSV IO handles. 40 | - [ ] Basic JSON IO handles. 41 | - [ ] Data parsing utilities. 42 | - [ ] estimators: 43 | - [ ] VGG16 44 | - [ ] U-Net 45 | - [ ] ResNet 46 | - [x] functions: 47 | - [x] (loss): Lovasz loss for IoU 48 | - [x] (loss): Linear interpolated loss for IoU 49 | - [x] (metrics): signal-to-noise ratio (SNR and PSNR) 50 | - [x] (metrics): Pearson correlation coefficient 51 | - [x] (metrics): IoU / Jaccard index 52 | - [ ] utilities: 53 | - [x] Revised save and load model functions. 54 | - [ ] Beholder plug-in callback. 55 | - [x] Revised ModelCheckpoint callback. 56 | - [x] LossWeightsScheduler callback (for changing the loss weights during the training). 57 | - [x] OptimizerSwitcher callback (for using manually switched optimizers). 58 | - [x] ModelWeightsReducer callback (parameter decay strategy including L1 decay and L2 decay). 59 | - [x] Extended data visualization tools. 60 | - [x] Tensorboard log file parser. 61 | 62 | ## Demos 63 | 64 | Check the branch [`demos`][brch-demos] to learn more details. 65 | 66 | ## Update records 67 | 68 | ### 0.79 @ 02/10/2020 69 | 70 | 1. Finish H5Converter `H5Converter` in `.data`. 71 | 72 | ### 0.78-b @ 12/05/2019 73 | 74 | 1. Fix some bugs and add features in `.utilities.draw`. 75 | 2. Add `webfiles.zip` for `.utilities.tboard`. 76 | 3. Fix a small bug in `.utilities`. 77 | 78 | ### 0.78 @ 11/27/2019 79 | 80 | 1. Enhance the `save_model`/`load_model` for supportting storing/recovering customized loss/metric class. 81 | 2. Finish the submodule `.utilities.draw` for providing extended visualizations. 82 | 3. Finish the submodule `.utilities.tboard` for providing extended tensorboard interfaces. 83 | 4. Fix some bugs. 84 | 85 | ### 0.73-b @ 10/27/2019 86 | 87 | 1. Let `.save_model` support compression. 88 | 2. Revise the optional arguments for `RestrictSub` in `.layers`. 89 | 90 | ### 0.73 @ 10/24/2019 91 | 92 | 1. Fix a bug for `H5GCombiner` in `.data` when adding more parsers. 93 | 2. Finish `H5VGParser` in `.data`, this parser is used for splitting validation set from a dataset. 94 | 3. Finish `ExpandDims` in `.layers`, it is a layer version of `tf.expand_dims`. 95 | 4. Enable `ModelCheckpoint` in `.utilities.callbacks` to support the option for not saving optimizer. 96 | 97 | ### 0.72 @ 10/22/2019 98 | 99 | 1. Fix a bug for serializing `Ghost` in `.layers`. 100 | 2. Finish activation layers in `.layers`, including `Slice`, `Restrict` and `RestrictSub`. 101 | 102 | ### 0.70 @ 10/15/2019 103 | 104 | 1. Let `.save_model`/`.load_model` supports storing/recovering variable loss weights. 105 | 2. Finish `LossWeightsScheduler` in `.utilities.callbacks`. 106 | 107 | ### 0.69-b @ 10/07/2019 108 | 109 | Enable the `H5SupSaver` in `.data` to add more data to an existed file. 110 | 111 | ### 0.69 @ 09/10/2019 112 | 113 | Enable the `H5SupSaver` in `.data` to expand if data is dumped in series. 114 | 115 | ### 0.68 @ 06/27/2019 116 | 117 | 1. Finish `MNadam`, `Adabound` and `Nadabound` in `.optimizers`. 118 | 2. Slightly change `.optimizers.mixture`. 119 | 3. Change the quick interface in `.optimizers`. 120 | 121 | ### 0.64-b @ 06/26/2019 122 | 123 | 1. Finish the demo version for `SWATS` in `.optimizers`. Need further tests. 124 | 2. Fix a small bug for `.load_model`. 125 | 3. Change the warning backend to tensorflow version. 126 | 127 | ### 0.64 @ 06/24/2019 128 | 129 | 1. Finish `ModelWeightsReducer` in `.utilities.callbacks`. 130 | 2. Finish `Ghost` in `.layers`. 131 | 3. Fix small bugs. 132 | 133 | ### 0.63 @ 06/23/2019 134 | 135 | 1. Fix the bugs of manually switched optimizers in `.optimizers.` Now they require to be used with a callback or switch the phase by `switch()`. 136 | 2. Add a plain momentum SGD optimizer to fast interface in `.optimizers`. 137 | 3. Finish `OptimizerSwitcher` in `.utilities.callbacks`. It is used to control the phase of the manually swtiched optimizers. 138 | 4. Improve the efficiency for `Adam2SGD` and `NAdam2NSGD` in `.optimizers`. 139 | 140 | ### 0.62 @ 06/21/2019 141 | 142 | 1. Finish the manually switched optimizers in `.optimizers`: `Adam2SGD` and `NAdam2NSGD`. Both of them supports amsgrad mode. 143 | 2. Adjust the fast interface `.optimizers.optimizer`. Now it supports 2 more tensorflow based optimizers and the default momentum of Nesterov SGD optimizer is changed to 0.9. 144 | 145 | ### 0.60-b @ 06/20/2019 146 | 147 | 1. Fix some bugs in `.layers.conv` and `.layers.unit`. 148 | 2. Remove the normalization layer from all projection branches in `.layers.residual` and `.layers.inception`. 149 | 150 | ### 0.60 @ 06/19/2019 151 | 152 | 1. Support totally new `save_model` and `load_model` APIs in `.utilites`. 153 | 2. Finish `ModelCheckpoint` in `.utilities.callbacks`. 154 | 155 | ### 0.56 @ 06/13/2019 156 | 157 | Finish `losses.linear_jaccard_index`, `losses.lovasz_jaccard_loss`, `metrics.signal_to_noise`, `metrics.correlation`, `metrics.jaccard_index` in `.functions` (may require tests in the future). 158 | 159 | ### 0.54 @ 06/12/2019 160 | 161 | 1. Add dropout options to all advanced blocks (including residual, ResNeXt, inception, incept-res and incept-plus). 162 | 2. Strengthen the compatibility. 163 | 3. Fix minor bugs for spatial dropout in `0.50-b`. 164 | 4. Thanks to GOD! `.layers` has been finished, although it may require modification in the future. 165 | 166 | ### 0.50-b @ 06/11/2019 167 | 168 | 1. Fix a bug for implementing the channel_first mode for `AConv` in `.layers`. 169 | 2. Finish `InstanceGaussianNoise` in `.layers`. 170 | 3. Prepare the test for adding dropout to residual layers in `.layers`. 171 | 172 | ### 0.50 @ 06/11/2019 173 | 174 | 1. Finish `Conv1DTied`, `Conv2DTied`, `Conv3DTied` in `.layers`. 175 | 2. Switch back to the 0.48 version for `.layers.DenseTied` APIs because testing show that the modification in 0.48-b will cause bugs. 176 | 177 | ### 0.48-b @ 06/10/2019 178 | 179 | A Test on replacing the `.layers.DenseTied` APIs like `tf.keras.layers.Wrappers`. 180 | 181 | ### 0.48 @ 06/09/2019 182 | 183 | 1. Finish `Inceptplus1D`, `Inceptplus2D`, `Inceptplus3D`, `Inceptplus1DTranspose`, `Inceptplus2DTranspose`, `Inceptplus3DTranspose` in `.layers`. 184 | 2. Minor changes for docstrings and default settings in `.layers.inception`. 185 | 186 | ### 0.45-b @ 06/07/2019 187 | 188 | 1. Enable the `ResNeXt` to estimate the latent group and local filter number. 189 | 2. Make a failed try on implementing quick group convolution, testing results show that using `tf.nn.depthwise_conv2d` to replace multiple `convND` ops would cause the computation to be even slower. 190 | 191 | ### 0.45 @ 06/06/2019 192 | 193 | 1. Enable Modern convolutional layers to work with group convolution. 194 | 2. Reduce the memory consumption for network construction when using ResNeXt layers in case of out of memory (OOM) problems. 195 | 3. Fix a minor bug for group convolution. 196 | 197 | ### 0.42 @ 06/05/2019 198 | 199 | 1. Finish `GroupConv1D`, `GroupConv2D`, `GroupConv3D` in `.layers`. 200 | 2. Fix the bugs in channel detections for residual and inception layers. 201 | 202 | ### 0.40 @ 06/05/2019 203 | 204 | 1. Finish `Resnext1D`, `Resnext2D`, `Resnext3D`, `Resnext1DTranspose`, `Resnext2DTranspose`, `Resnext3DTranspose` in `.layers`. 205 | 2. Fix the repeating biases problems in inception-residual layers. 206 | 207 | ### 0.38 @ 06/04/2019 208 | 209 | 1. Finish `Inceptres1D`, `Inceptres2D`, `Inceptres3D`, `Inceptres1DTranspose`, `Inceptres2DTranspose`, `Inceptres3DTranspose` in `.layers`. 210 | 2. Fix some bugs and revise docstrings for `.layers.residual` and `.layers.inception`. 211 | 212 | ### 0.36 @ 06/01/2019 213 | 214 | Finish `Inception1D`, `Inception2D`, `Inception3D`, `Inception1DTranspose`, `Inception2DTranspose`, `Inception3DTranspose` in `.layers`. 215 | 216 | ### 0.32 @ 05/31/2019 217 | 218 | Finish `Residual1D`, `Residual2D`, `Residual3D`, `Residual1DTranspose`, `Residual2DTranspose`, `Residual3DTranspose` in `.layers`. 219 | 220 | ### 0.28 @ 05/24/2019 221 | 222 | 1. Fix the bug about padding for transposed dilation convolutional layers. 223 | 2. Add a new option `output_mshape` to help transposed convolutional layers to control the desired output shape. 224 | 3. Finish `PyExternal` in `.layers`. 225 | 226 | ### 0.24 @ 03/31/2019 227 | 228 | Finish `H5GCombiner` in `.data`. 229 | 230 | ### 0.23 @ 03/27/2019 231 | 232 | 1. Use `keras.Sequence()` to redefine `H5GParser` and `H5HGParser`. 233 | 2. Add compatible check. 234 | 235 | ### 0.22 @ 03/26/2019 236 | 237 | Adjust the `.data.h5py` module to make it more generalized. 238 | 239 | ### 0.20 @ 03/26/2019 240 | 241 | 1. Finish `H5HGParser`, `H5SupSaver`, `H5GParser` in `.data`. 242 | 2. Finish `DenseTied`, `InstanceNormalization`, `GroupNormalization`, `AConv1D`, `AConv2D`, `AConv3D`, `AConv1DTranspose`, `AConv2DTranspose`, `AConv3DTranspose` in `.layers`. 243 | 244 | ### 0.10 @ 03/23/2019 245 | 246 | Create this project. 247 | 248 | [brch-demos]:https://github.com/cainmagi/MDNT/tree/demos -------------------------------------------------------------------------------- /data/deprecated/h5py.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Data - h5py (deprecated) 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Use tf-K standard dataset API to wrap the h5py APIs. 10 | # Warning: 11 | # The standard tf dataset is proved to be incompatible with 12 | # tf-K architecture. We need to wait until tf fix the bug. 13 | # Version: 0.10 # 2019/3/26 14 | # Comments: 15 | # Create this submodule. 16 | ################################################################ 17 | ''' 18 | 19 | import h5py 20 | import numpy as np 21 | import tensorflow as tf 22 | import os 23 | 24 | REMOVE_DEPRECATION = False 25 | 26 | def depcatedInfo(): 27 | try: 28 | raise DeprecationWarning('This library has been deprecated.') 29 | except Exception as e: 30 | if not REMOVE_DEPRECATION: 31 | raise e 32 | 33 | class H5SupSaver: 34 | '''Save supervised data set as .h5 file 35 | This class allows users to dump multiple datasets into one file 36 | handle, then it would save it as a .h5 file. The keywords of the 37 | sets should be assigned by users. 38 | ''' 39 | def __init__(self, fileName): 40 | ''' 41 | Create the .h5 file while initialization. 42 | Arguments: 43 | fileName: a path where we save the file. 44 | ''' 45 | self.f = None 46 | depcatedInfo() 47 | self.logver = 0 48 | self.__kwargs = dict() 49 | self.open(fileName) 50 | self.config(dtype='f') 51 | 52 | def config(self, **kwargs): 53 | ''' 54 | Make configuration for the saver. 55 | Argumetns for this class: 56 | logver (int): the log level for dumping files. 57 | Arguments often used: 58 | chunks (tuple): size of data blocks. 59 | compression (str): compression method. 60 | compression_opts (int): compression parameter. 61 | shuffle (bool): shuffle filter for data compression. 62 | fletcher32 (bool): check sum for chunks. 63 | Learn more available arguments here: 64 | http://docs.h5py.org/en/latest/high/dataset.html 65 | ''' 66 | logver = kwargs.pop('logver', None) 67 | if logver is not None: 68 | self.logver = logver 69 | self.__kwargs.update(kwargs) 70 | if self.logver > 0: 71 | print('Current configuration is:', self.__kwargs) 72 | 73 | def dump(self, keyword, data): 74 | ''' 75 | Dump the dataset with a keyword into the file. 76 | Arguments: 77 | keyword: the keyword of the dumped dataset. 78 | data: dataset, should be a numpy array. 79 | ''' 80 | if self.f is None: 81 | raise OSError('Should not dump data before opening a file.') 82 | self.f.create_dataset(keyword, data=data, **self.__kwargs) 83 | if self.logver > 0: 84 | print('Dump {0} into the file. The data shape is {1}.'.format(keyword, data.shape)) 85 | 86 | def open(self, fileName): 87 | ''' 88 | The dumped file name (path), it will produce a .h5 file. 89 | Arguments: 90 | fileName: a path where we save the file. 91 | ''' 92 | if fileName[-3:] != '.h5': 93 | fileName += '.h5' 94 | self.close() 95 | self.f = h5py.File(fileName, 'w') 96 | if self.logver > 0: 97 | print('Open a new file:', fileName) 98 | 99 | def close(self): 100 | if self.f is not None: 101 | self.f.close() 102 | self.f = None 103 | 104 | class H5HGParser: 105 | '''Homogeneously parsing .h5 file by h5py module 106 | This class allows users to feed one .h5 file, and convert it to 107 | tf.data.Dataset. The realization could be described as: 108 | (1) Create .h5 file handle. 109 | (2) Estimate the dataset size, and generate indexes. 110 | (3) Use the indexes to create a tf.data.Dataset, and allows it 111 | to randomly shuffle the indexes in each epoch. 112 | (4) Use Dataset.map() to address the data by the index from the 113 | index dataset. 114 | Note that in the file, there may be multiple datasets. This parser 115 | supports reading both single set and multiple sets. 116 | Note that all datasets in the same file should share the same shape. 117 | ''' 118 | def __init__(self, fileName, batchSize=32): 119 | ''' 120 | Create the parser and its h5py file handle. 121 | Arguments: 122 | fileName: the data path of the file (could be without postfix). 123 | batchSize: number of samples in each batch. 124 | ''' 125 | self.f = None 126 | depcatedInfo() 127 | if (not os.path.isfile(fileName)) and (os.path.isfile(fileName+'.h5')): 128 | fileName += '.h5' 129 | self.f = h5py.File(fileName, 'r') 130 | self.size = self.__createSize() 131 | self.__dataset = self.__indexDataset() 132 | allNum = np.sum(self.size) 133 | self.__dataset = self.__dataset.shuffle(buffer_size=allNum) 134 | if self.mutlipleMode: 135 | self.__dataset = self.__dataset.map(lambda index: tf.py_function(self.__mapMultiple, [index], [tf.int32])) 136 | else: 137 | self.__dataset = self.__dataset.map(lambda index: tf.py_function(self.__mapSingle, [index], [tf.int32])) 138 | self.__dataset = self.__dataset.batch(batchSize) 139 | self.__dataset = self.__dataset.repeat() 140 | 141 | def getDataset(self): 142 | ''' 143 | Get the produced tf dataset. 144 | ''' 145 | return self.__dataset 146 | 147 | def __createSize(self): 148 | ''' 149 | Find the number of items in the dataset, only need to be run for once. 150 | ''' 151 | if len(self.f) == 1: 152 | self.mutlipleMode = False 153 | self.__dnameIndex = list(self.f.keys())[0] 154 | return len(self.f[self.__dnameIndex]) 155 | else: 156 | self.mutlipleMode = True 157 | self.__dnameIndex = list(self.f.keys()) 158 | return tuple(len(self.f[fk]) for fk in self.__dnameIndex) 159 | 160 | def __indexDataset(self): 161 | ''' 162 | Create a tensorflow index dataset, only need to be run for once. 163 | Should be run after __createSize 164 | ''' 165 | if self.mutlipleMode: 166 | def genOneIndex(num): 167 | return np.stack((num*np.ones(self.size[num], dtype=np.int), np.arange(self.size[num], dtype=np.int)), axis=1) 168 | indexes = np.concatenate(list(genOneIndex(n) for n in range(len(self.size))), axis=0) 169 | return tf.data.Dataset.from_tensor_slices(indexes) 170 | else: 171 | return tf.data.Dataset.from_tensor_slices(np.arange(self.size, dtype=np.int)) 172 | 173 | def __mapMultiple(self, index): 174 | ''' 175 | Map function, for multiple datasets mode. 176 | ''' 177 | dname = self.__dnameIndex[index[0]] 178 | numSample = index[1] 179 | return self.f[dname][numSample] 180 | 181 | def __mapSingle(self, index): 182 | ''' 183 | Map function, for multiple datasets mode. 184 | ''' 185 | dname = self.__dnameIndex 186 | numSample = index 187 | return self.f[dname][numSample] 188 | 189 | def __del__(self): 190 | ''' 191 | Destructor 192 | ''' 193 | if self.f is not None: 194 | self.f.close() 195 | 196 | class H5GParser: 197 | '''Grouply parsing dataset 198 | This class allows users to feed one .h5 file, and convert it to 199 | tf.data.Dataset. The realization could be described as: 200 | (1) Create .h5 file handle. 201 | (2) Using the user defined keywords to get a group of datasets. 202 | (3) Estimate the dataset sizes, and generate indexes. Note each 203 | dataset should share the same size (but could be different 204 | shapes). 205 | (4) Use the indexes to create a tf.data.Dataset, and allows it 206 | to randomly shuffle the indexes in each epoch. 207 | (5) Use Dataset.map() to address the data by the index from the 208 | index dataset. 209 | Certainly, you could use this parser to load a single dataset. 210 | ''' 211 | def __init__(self, fileName, keywords, batchSize=32, preprocfunc=None): 212 | ''' 213 | Create the parser and its h5py file handle. 214 | Arguments: 215 | fileName: the data path of the file (could be without postfix). 216 | keywords: should be a list of keywords (or a single keyword). 217 | batchSize: number of samples in each batch. 218 | preprocfunc: this function would be added to the produced data 219 | so that it could serve as a pre-processing tool. 220 | ''' 221 | self.f = None 222 | depcatedInfo() 223 | if isinstance(keywords, str): 224 | self.keywords = (keywords,) 225 | else: 226 | self.keywords = keywords 227 | if (not os.path.isfile(fileName)) and (os.path.isfile(fileName+'.h5')): 228 | fileName += '.h5' 229 | self.f = h5py.File(fileName, 'r') 230 | self.__dsets = self.__creatDataSets() 231 | self.size = self.__createSize() 232 | self.__dataset = self.__indexDataset() 233 | allNum = np.sum(self.size) 234 | self.__dataset = self.__dataset.shuffle(buffer_size=allNum) 235 | self.__dataset = self.__dataset.map(lambda index: tf.py_function(self.__mapSingle, [index], [tf.float32]*len(self.__dsets))) 236 | if preprocfunc is not None: 237 | self.__dataset = self.__dataset.map(preprocfunc) 238 | self.__dataset = self.__dataset.batch(batchSize) 239 | self.__dataset = self.__dataset.repeat() 240 | 241 | def getDataset(self): 242 | ''' 243 | Get the produced tf dataset. 244 | ''' 245 | return self.__dataset 246 | 247 | def __creatDataSets(self): 248 | ''' 249 | Find all desired dataset handles, and store them. 250 | ''' 251 | dsets = [] 252 | for key in self.keywords: 253 | dsets.append(self.f[key]) 254 | if not dsets: 255 | raise KeyError('Keywords are not mapped to datasets in the file.') 256 | return dsets 257 | 258 | def __createSize(self): 259 | ''' 260 | Find the number of items in the dataset, only need to be run for once. 261 | Should be run after __creatDataSets. 262 | ''' 263 | sze = len(self.__dsets[0]) 264 | for dset in self.__dsets: 265 | if sze != len(dset): 266 | raise TypeError('The assigned keywords do not correspond to each other.') 267 | return sze 268 | 269 | def __indexDataset(self): 270 | ''' 271 | Create a tensorflow index dataset, only need to be run for once. 272 | Should be run after __createSize. 273 | ''' 274 | return tf.data.Dataset.from_tensor_slices(np.arange(self.size, dtype=np.int)) 275 | 276 | def __mapSingle(self, index): 277 | ''' 278 | Map function, for multiple datasets mode. 279 | ''' 280 | return tuple(dset[index] for dset in self.__dsets) 281 | 282 | def __del__(self): 283 | ''' 284 | Destructor 285 | ''' 286 | if self.f is not None: 287 | self.f.close() -------------------------------------------------------------------------------- /layers/dense.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Layers - Dense 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Extend the dense layer API with tied version. 10 | # Version: 0.16 # 2019/10/22 11 | # Comments: 12 | # Fix a small bug for Ghost. 13 | # Version: 0.15 # 2019/6/24 14 | # Comments: 15 | # 1. Add the Ghost layer for implementing trainable input layer. 16 | # 2. Fix a small bug for Ghost. 17 | # Version: 0.11 # 2019/3/27 18 | # Comments: 19 | # Add compatible support. 20 | # Version: 0.10 # 2019/3/23 21 | # Comments: 22 | # Create this submodule. 23 | ################################################################ 24 | ''' 25 | 26 | from tensorflow.python.eager import context 27 | from tensorflow.python.framework import common_shapes 28 | from tensorflow.python.framework import ops 29 | from tensorflow.python.framework import tensor_shape 30 | from tensorflow.python.keras import activations 31 | from tensorflow.python.keras import backend as K 32 | from tensorflow.python.keras import constraints 33 | from tensorflow.python.keras import initializers 34 | from tensorflow.python.keras import regularizers 35 | from tensorflow.python.keras.engine.base_layer import Layer 36 | from tensorflow.python.ops import gen_math_ops 37 | from tensorflow.python.ops import nn 38 | from tensorflow.python.ops import standard_ops 39 | from tensorflow.python.ops import variables 40 | 41 | from .. import compat 42 | if compat.COMPATIBLE_MODE['1.12']: 43 | from tensorflow.python.keras.engine.base_layer import InputSpec 44 | else: 45 | from tensorflow.python.keras.engine.input_spec import InputSpec 46 | 47 | class Ghost(Layer): 48 | """Ghost layer for setting tunable input 49 | Since tf-Keras does not support users to build a trainable input layer, we use 50 | an interesting trick, i.e. "Ghost" to realize the trainable input. Our Ghost 51 | layer is implemented like this: 52 | ouput = kernel * input + bias 53 | where both kernel and bias share the same shape of input tensor. 54 | There are two ways to build a tunable input layer. The first way is using 55 | kernel solely: 56 | input = Input(shape=shape) # feeding constant 1.0 57 | tunable_input = Ghost(use_kernel=True)(input) = kernel * 1.0 = kernel 58 | The second way is using bias solely: 59 | input = Input(shape=shape) # feeding constant 0.0 60 | tunable_input = Ghost(use_bias=True)(input) = bias + 0.0 = bias 61 | Because both kernel and bias are trainable, such a technique enables tf-Keras 62 | users to create a tunable input layer easily. 63 | It is not allowed to use kernel and bias in the same time, because in this 64 | case the solution for Ghost layer would become ill-posed. 65 | Arguments: 66 | use_kernel: Boolean, whether the layer uses multiplicative strategy to 67 | define the variable. 68 | use_bias: Boolean, whether the layer uses additive strategy to define 69 | the variable. 70 | var_initializer: Initializer for the tunable variable. The variable 71 | depends on setting use_kernel or setting use_bias. 72 | var_regularizer: Regularizer function applied to the tunable variable. 73 | var_constraint: Constraint function applied to the tunable variable. 74 | Input shape: 75 | Any shape. The shape should be totally known except the batch number. 76 | Output shape: 77 | The same as input shape. 78 | """ 79 | def __init__(self, 80 | use_kernel=False, 81 | use_bias=False, 82 | var_initializer='glorot_uniform', 83 | var_regularizer=None, 84 | var_constraint=None, 85 | **kwargs): 86 | if 'input_shape' not in kwargs and 'input_dim' in kwargs: 87 | kwargs['input_shape'] = (kwargs.pop('input_dim'),) 88 | 89 | super(Ghost, self).__init__( 90 | activity_regularizer=None, **kwargs) 91 | if not (use_kernel or use_bias): 92 | raise ValueError('Need to specify either "use_kernel" or "use_bias".') 93 | if use_kernel and use_bias: 94 | raise ValueError('Should not specify "use_kernel" and "use_bias" in the same time.') 95 | self.use_kernel = use_kernel 96 | self.use_bias = use_bias 97 | self.var_initializer = initializers.get(var_initializer) 98 | self.var_regularizer = regularizers.get(var_regularizer) 99 | self.var_constraint = constraints.get(var_constraint) 100 | self.supports_masking = True 101 | 102 | def build(self, input_shape): 103 | input_shape = tensor_shape.TensorShape(input_shape) 104 | for i in range(1, len(input_shape)): 105 | if tensor_shape.dimension_value(input_shape[i]) is None: 106 | raise ValueError('The input shape [1:] should be defined, but found element `None`.') 107 | if self.use_kernel: 108 | varName = 'kernel' 109 | elif self.use_bias: 110 | varName = 'bias' 111 | get_in = input_shape.as_list()[1:] 112 | self.get_var = self.add_weight( 113 | varName, 114 | shape=get_in, 115 | initializer=self.var_initializer, 116 | regularizer=self.var_regularizer, 117 | constraint=self.var_constraint, 118 | dtype=self.dtype, 119 | trainable=True) 120 | super(Ghost, self).build(input_shape) 121 | 122 | def call(self, inputs): 123 | inputs = ops.convert_to_tensor(inputs) 124 | input_shape = K.int_shape(inputs) 125 | broadcast_shape = [1] + list(input_shape[1:]) 126 | broadcast_var = K.reshape(self.get_var, broadcast_shape) 127 | if self.use_kernel: 128 | return broadcast_var * inputs 129 | elif self.use_bias: 130 | return broadcast_var + inputs 131 | 132 | def compute_output_shape(self, input_shape): 133 | return input_shape 134 | 135 | def get_config(self): 136 | config = { 137 | 'use_kernel': self.use_kernel, 138 | 'use_bias': self.use_bias, 139 | 'var_initializer': initializers.serialize(self.var_initializer), 140 | 'var_regularizer': regularizers.serialize(self.var_regularizer), 141 | 'var_constraint': constraints.serialize(self.var_constraint) 142 | } 143 | base_config = super(Ghost, self).get_config() 144 | return dict(list(base_config.items()) + list(config.items())) 145 | 146 | class DenseTied(Layer): 147 | """Tied densely-connected NN layer. 148 | `DenseTied` implements the operation: 149 | `output = activation(dot(input, kernel.T) + bias)` 150 | where kernel comes from another Dense layer. 151 | NOTE THAT ALTHOUGH WE HAVE SUCCESSED TO MAKE THIS LAYER SERIALIZABLE, 152 | IT MAY BE STILL PROBLEMATIC FOR TRAINING ALGORITHM. PLEASE BE CAREFUL 153 | WHEN USING SUCH KIND OF LAYERS. 154 | IN MULTIPLE MODELS, THIS INSTANCE MAY CAUSING CONFLICTS BECAUSE IT 155 | USES GLOBAL VARIABLE NAME TO SERIALIZE CROSSED LAYERS. IT IS 156 | RECOMMENDED TO SEPARATE NAME SCOPES WHEN USING MULTIPLE MODELS. 157 | Arguments: 158 | tied_layer: A Dense layer instance where this layer is tied. 159 | activation: Activation function to use. 160 | If you don't specify anything, no activation is applied 161 | (ie. "linear" activation: `a(x) = x`). 162 | use_bias: Boolean, whether the layer uses a bias vector. 163 | bias_initializer: Initializer for the bias vector. 164 | bias_regularizer: Regularizer function applied to the bias vector. 165 | activity_regularizer: Regularizer function applied to 166 | the output of the layer (its "activation").. 167 | bias_constraint: Constraint function applied to the bias vector. 168 | Reserved arguments: 169 | varName, varShape: only used when saving and restoring the layer. 170 | Input shape: 171 | nD tensor with shape: `(batch_size, ..., output_dim_of_tied_layer)`. 172 | The most common situation would be 173 | a 2D input with shape `(batch_size, output_dim_of_tied_layer)`. 174 | Output shape: 175 | nD tensor with shape: `(batch_size, ..., units)`. 176 | For instance, for a 2D input with shape 177 | `(batch_size, input_dim_of_tied_layer)`, 178 | the output would have shape `(batch_size, input_dim_of_tied_layer)`. 179 | """ 180 | def __init__(self, 181 | tied_layer='', 182 | activation=None, 183 | use_bias=True, 184 | bias_initializer='zeros', 185 | bias_regularizer=None, 186 | activity_regularizer=None, 187 | bias_constraint=None, 188 | varName='', varShape=[], 189 | **kwargs): 190 | if 'input_shape' not in kwargs and 'input_dim' in kwargs: 191 | kwargs['input_shape'] = (kwargs.pop('input_dim'),) 192 | 193 | super(DenseTied, self).__init__( 194 | activity_regularizer=regularizers.get(activity_regularizer), **kwargs) 195 | if tied_layer != '': 196 | self.kernelFrom = tied_layer.kernel.name 197 | self.varName = varName 198 | self.varShape = varShape 199 | self.activation = activations.get(activation) 200 | self.use_bias = use_bias 201 | self.bias_initializer = initializers.get(bias_initializer) 202 | self.bias_regularizer = regularizers.get(bias_regularizer) 203 | self.bias_constraint = constraints.get(bias_constraint) 204 | 205 | self.supports_masking = True 206 | self.input_spec = InputSpec(min_ndim=2) 207 | 208 | def build(self, input_shape): 209 | input_shape = tensor_shape.TensorShape(input_shape) 210 | if tensor_shape.dimension_value(input_shape[-1]) is None: 211 | raise ValueError('The last dimension of the inputs to `Dense` ' 212 | 'should be defined. Found `None`.') 213 | last_dim = tensor_shape.dimension_value(input_shape[-1]) 214 | self.input_spec = InputSpec(min_ndim=2, 215 | axes={-1: last_dim}) 216 | if self.varName == '': 217 | kernelFrom = list(filter(lambda x:x.name==self.kernelFrom, [op for op in variables.global_variables(scope=None)]))[0] 218 | self.kernel = K.transpose(kernelFrom) 219 | self.o_shape = self.kernel.get_shape().as_list() 220 | self.varName = kernelFrom.name 221 | self.varShape = kernelFrom.get_shape().as_list() 222 | else: 223 | kernelFrom = list(filter(lambda x:x.name==self.varName, [op for op in variables.global_variables(scope=None)]))[0] 224 | self.kernel = K.transpose(kernelFrom) 225 | self.o_shape = self.kernel.get_shape().as_list() 226 | if self.use_bias: 227 | self.bias = self.add_weight( 228 | 'bias', 229 | shape=[self.o_shape[-1],], 230 | initializer=self.bias_initializer, 231 | regularizer=self.bias_regularizer, 232 | constraint=self.bias_constraint, 233 | dtype=self.dtype, 234 | trainable=True) 235 | else: 236 | self.bias = None 237 | self.built = True 238 | 239 | def call(self, inputs): 240 | inputs = ops.convert_to_tensor(inputs) 241 | rank = common_shapes.rank(inputs) 242 | if rank > 2: 243 | # Broadcasting is required for the inputs. 244 | outputs = standard_ops.tensordot(inputs, self.kernel, [[rank - 1], [0]]) 245 | # Reshape the output back to the original ndim of the input. 246 | if not context.executing_eagerly(): 247 | shape = inputs.get_shape().as_list() 248 | output_shape = shape[:-1] + [self.o_shape] 249 | outputs.set_shape(output_shape) 250 | else: 251 | outputs = gen_math_ops.mat_mul(inputs, self.kernel) 252 | if self.use_bias: 253 | outputs = nn.bias_add(outputs, self.bias) 254 | if self.activation is not None: 255 | return self.activation(outputs) # pylint: disable=not-callable 256 | return outputs 257 | 258 | def compute_output_shape(self, input_shape): 259 | input_shape = tensor_shape.TensorShape(input_shape) 260 | input_shape = input_shape.with_rank_at_least(2) 261 | if tensor_shape.dimension_value(input_shape[-1]) is None: 262 | raise ValueError( 263 | 'The innermost dimension of input_shape must be defined, but saw: %s' 264 | % input_shape) 265 | return input_shape[:-1].concatenate(self.o_shape) 266 | 267 | def get_config(self): 268 | config = { 269 | 'tied_layer': '', 270 | 'activation': activations.serialize(self.activation), 271 | 'use_bias': self.use_bias, 272 | 'bias_initializer': initializers.serialize(self.bias_initializer), 273 | 'bias_regularizer': regularizers.serialize(self.bias_regularizer), 274 | 'activity_regularizer': 275 | regularizers.serialize(self.activity_regularizer), 276 | 'bias_constraint': constraints.serialize(self.bias_constraint), 277 | 'varName': self.varName, 'varShape': self.varShape 278 | } 279 | base_config = super(DenseTied, self).get_config() 280 | return dict(list(base_config.items()) + list(config.items())) -------------------------------------------------------------------------------- /layers/deprecated/conv.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Layers - Modern convolutional layers (deprecated) 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # We store the failed versions of APIs for .conv here. 10 | # Version: 0.10 # 2019/6/7 11 | # Comments: 12 | # A failed try for quick group convolution (QGroupConv), move 13 | # it to deprecated. 14 | ################################################################ 15 | ''' 16 | 17 | from tensorflow.python.framework import tensor_shape 18 | from tensorflow.python.keras import activations 19 | from tensorflow.python.keras import backend as K 20 | from tensorflow.python.keras import constraints 21 | from tensorflow.python.keras import initializers 22 | from tensorflow.python.keras import regularizers 23 | from tensorflow.python.keras.utils import conv_utils 24 | from tensorflow.python.keras.engine.base_layer import Layer 25 | from tensorflow.python.ops import array_ops 26 | from tensorflow.python.ops import nn 27 | from tensorflow.python.ops import nn_ops 28 | from tensorflow.python.ops import nn_impl 29 | from tensorflow.python.ops import math_ops 30 | 31 | from tensorflow.keras.layers import BatchNormalization, LeakyReLU, PReLU 32 | from tensorflow.python.keras.layers.convolutional import Conv, Conv2DTranspose, Conv3DTranspose, UpSampling1D, UpSampling2D, UpSampling3D, ZeroPadding1D, ZeroPadding2D, ZeroPadding3D, Cropping1D, Cropping2D, Cropping3D 33 | from .normalize import InstanceNormalization, GroupNormalization 34 | 35 | from .. import compat 36 | if compat.COMPATIBLE_MODE['1.12']: 37 | from tensorflow.python.keras.engine.base_layer import InputSpec 38 | else: 39 | from tensorflow.python.keras.engine.input_spec import InputSpec 40 | 41 | NEW_CONV_TRANSPOSE = True 42 | USE_QUICK_GCONV = False 43 | 44 | def _get_macro_conv(key='NEW_CONV_TRANSPOSE'): 45 | if key == 'USE_QUICK_GCONV': 46 | return USE_QUICK_GCONV 47 | else: 48 | return NEW_CONV_TRANSPOSE 49 | 50 | class _QGroupConv(_GroupConv): 51 | """Quick computing version for abstract nD group convolution layer. 52 | This is the quick computing version of the convolution. 53 | The work flow of `GroupConv` could be viewed as 54 | output = concat (i=1~G) ( convND(input[group_i]) ) 55 | which means if we have G groups, we need to compute the `convND` op for G times. 56 | The original implementation calls operator `convND` for many times, which is 57 | inefficient. To solve this problem, we use such a work flow: 58 | output = sum (i=1~G) ( depth_convND(input)[group_i] ) 59 | The difference is, we only need to call `depth_convND` (tf.nn.depthwise_conv2d) 60 | once. Furthermore, if we apply tf.reshape and tf.sum, we could also calculate 61 | the sum operator once. This is why we could use the above method to improve the 62 | efficiency. 63 | However, since there is only tf.nn.depthwise_conv2d in tensorflow, we could not 64 | use it to calculate GroupConv3D. But we could still calculate GroupConv1D by 65 | reducing the 2D convolution to 1D case. 66 | To learn more about group convolution, see the docstring for `GroupConv`. 67 | Arguments: 68 | rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution. 69 | (rank > 2 is not allowed.) 70 | lgroups: Integer, the group number of the latent convolution branch. The 71 | number of filters in the whole latent space is lgroups * lfilters. 72 | lfilters: Integer, the dimensionality in each the lattent group (i.e. the 73 | number of filters in each latent convolution branch). 74 | kernel_size: An integer or tuple/list of n integers, specifying the 75 | length of the convolution window. 76 | strides: An integer or tuple/list of n integers, 77 | specifying the stride length of the convolution. 78 | Specifying any stride value != 1 is incompatible with specifying 79 | any `dilation_rate` value != 1. 80 | padding: One of `"valid"` or `"same"` (case-insensitive). 81 | data_format: A string, one of `channels_last` (default) or `channels_first`. 82 | The ordering of the dimensions in the inputs. 83 | `channels_last` corresponds to inputs with shape 84 | `(batch, ..., channels)` while `channels_first` corresponds to 85 | inputs with shape `(batch, channels, ...)`. 86 | dilation_rate: An integer or tuple/list of n integers, specifying 87 | the dilation rate to use for dilated convolution. 88 | Currently, specifying any `dilation_rate` value != 1 is 89 | incompatible with specifying any `strides` value != 1. 90 | activation: Activation function. Set it to None to maintain a 91 | linear activation. 92 | use_bias: Boolean, whether the layer uses a bias. 93 | kernel_initializer: An initializer for the convolution kernel. 94 | bias_initializer: An initializer for the bias vector. If None, the default 95 | initializer will be used. 96 | kernel_regularizer: Optional regularizer for the convolution kernel. 97 | bias_regularizer: Optional regularizer for the bias vector. 98 | activity_regularizer: Optional regularizer function for the output. 99 | kernel_constraint: Optional projection function to be applied to the 100 | kernel after being updated by an `Optimizer` (e.g. used to implement 101 | norm constraints or value constraints for layer weights). The function 102 | must take as input the unprojected variable and must return the 103 | projected variable (which must have the same shape). Constraints are 104 | not safe to use when doing asynchronous distributed training. 105 | bias_constraint: Optional projection function to be applied to the 106 | bias after being updated by an `Optimizer`. 107 | trainable: Boolean, if `True` also add variables to the graph collection 108 | `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). 109 | name: A string, the name of the layer. 110 | """ 111 | 112 | def __init__(self, rank, 113 | lgroups, 114 | lfilters, 115 | kernel_size, 116 | strides=1, 117 | padding='valid', 118 | data_format=None, 119 | dilation_rate=1, 120 | activation=None, 121 | use_bias=True, 122 | kernel_initializer='glorot_uniform', 123 | bias_initializer='zeros', 124 | kernel_regularizer=None, 125 | bias_regularizer=None, 126 | activity_regularizer=None, 127 | kernel_constraint=None, 128 | bias_constraint=None, 129 | trainable=True, 130 | name=None, 131 | **kwargs): 132 | super(_GroupConv, self).__init__( 133 | trainable=trainable, 134 | name=name, 135 | activity_regularizer=regularizers.get(activity_regularizer), 136 | **kwargs) 137 | self.rank = rank 138 | if rank > 2: 139 | raise ValueError('The quick group convolution does not support 3D or any higher dimension.') 140 | initRank = rank 141 | self.lgroups = lgroups 142 | self.lfilters = lfilters 143 | self.kernel_size = conv_utils.normalize_tuple( 144 | kernel_size, rank, 'kernel_size') 145 | self.strides = conv_utils.normalize_tuple(strides, rank, 'strides') 146 | self.padding = conv_utils.normalize_padding(padding) 147 | if (self.padding == 'causal' and not isinstance(self, (Conv1D, SeparableConv1D))): 148 | raise ValueError('Causal padding is only supported for `Conv1D` and ``SeparableConv1D`.') 149 | self.data_format = conv_utils.normalize_data_format(data_format) 150 | self.dilation_rate = conv_utils.normalize_tuple( 151 | dilation_rate, rank, 'dilation_rate') 152 | if rank == 1: # when rank=1, expand the tuples to 2D case. 153 | self.kernel_size = (1, *self.kernel_size) 154 | self.strides = (1, *self.strides) 155 | self.dilation_rate = (1, *self.dilation_rate) 156 | self.activation = activations.get(activation) 157 | self.use_bias = use_bias 158 | self.kernel_initializer = initializers.get(kernel_initializer) 159 | self.bias_initializer = initializers.get(bias_initializer) 160 | self.kernel_regularizer = regularizers.get(kernel_regularizer) 161 | self.bias_regularizer = regularizers.get(bias_regularizer) 162 | self.kernel_constraint = constraints.get(kernel_constraint) 163 | self.bias_constraint = constraints.get(bias_constraint) 164 | self.input_spec = InputSpec(ndim=self.rank + 2) 165 | 166 | self.group_input_dim = None 167 | self.exp_dim_pos = None 168 | 169 | def build(self, input_shape): 170 | input_shape = tensor_shape.TensorShape(input_shape) 171 | if self.data_format == 'channels_first': 172 | channel_axis = 1 173 | self._data_format = 'NCHW' 174 | if self.rank == 1: 175 | self.exp_dim_pos = 2 176 | else: 177 | channel_axis = -1 178 | if self.rank == 1: 179 | self.exp_dim_pos = 1 180 | self._data_format = 'NHWC' 181 | if input_shape.dims[channel_axis].value is None: 182 | raise ValueError('The channel dimension of the inputs should be defined. Found `None`.') 183 | input_dim = int(input_shape[channel_axis]) 184 | if input_dim % self.lgroups != 0: 185 | raise ValueError('To grouplize the input channels, the input channel number should be a multiple of group number (N*{0}), but given {1}'.format(self.lgroups, input_dim)) 186 | self.group_input_dim = input_dim // self.lgroups 187 | self._strides = (1, *self.strides, 1) 188 | kernel_shape = self.kernel_size + (input_dim, self.lfilters) 189 | 190 | self.kernel = self.add_weight( 191 | name='kernel', 192 | shape=kernel_shape, 193 | initializer=self.kernel_initializer, 194 | regularizer=self.kernel_regularizer, 195 | constraint=self.kernel_constraint, 196 | trainable=True, 197 | dtype=self.dtype) 198 | if self.use_bias: 199 | self.bias = self.add_weight( 200 | name='bias', 201 | shape=(self.lfilters * self.lgroups,), 202 | initializer=self.bias_initializer, 203 | regularizer=self.bias_regularizer, 204 | constraint=self.bias_constraint, 205 | trainable=True, 206 | dtype=self.dtype) 207 | else: 208 | self.bias = None 209 | self.input_spec = InputSpec(ndim=self.rank + 2, axes={channel_axis: input_dim}) 210 | if self.padding == 'causal': 211 | self.op_padding = 'valid' 212 | else: 213 | self.op_padding = self.padding 214 | self.built = True 215 | 216 | def call(self, inputs): 217 | if self.rank == 1: 218 | inputs = array_ops.expand_dims(inputs, axis=self.exp_dim_pos) 219 | outputs= nn_impl.depthwise_conv2d(input=inputs, 220 | filter=self.kernel, 221 | strides=self._strides, 222 | padding=self.op_padding.upper(), 223 | rate=self.dilation_rate, 224 | data_format=self._data_format) 225 | # Grouplize the output channels. 226 | r2_outputs_shape = outputs.get_shape().as_list() 227 | if self.data_format == 'channels_first': 228 | #get_oshape = r2_outputs_shape[:1].concatenate([self.lgroups*self.lfilters, self.group_input_dim]).concatenate(r2_outputs_shape[2:]) 229 | get_oshape = [-1, self.lgroups*self.lfilters, self.group_input_dim, *r2_outputs_shape[2:]] 230 | outputs = array_ops.reshape(outputs, get_oshape) 231 | outputs = math_ops.reduce_sum(outputs, axis=1, keepdims=False) 232 | else: 233 | #get_oshape = r2_outputs_shape[:-1].concatenate([self.lgroups*self.lfilters, self.group_input_dim]) 234 | get_oshape = [-1, *r2_outputs_shape[1:-1], self.lgroups*self.lfilters, self.group_input_dim] 235 | outputs = array_ops.reshape(outputs, get_oshape) 236 | outputs = math_ops.reduce_sum(outputs, axis=-1, keepdims=False) 237 | if self.rank == 1: 238 | outputs = array_ops.squeeze(outputs, axis=self.exp_dim_pos) 239 | outputs_list = [] 240 | 241 | if self.use_bias: 242 | if self.data_format == 'channels_first': 243 | if self.rank == 1: 244 | # nn.bias_add does not accept a 1D input tensor. 245 | bias = array_ops.reshape(self.bias, (1, self.lfilters * self.lgroups, 1)) 246 | outputs += bias 247 | if self.rank == 2: 248 | outputs = nn.bias_add(outputs, self.bias, data_format='NCHW') 249 | else: 250 | outputs = nn.bias_add(outputs, self.bias, data_format='NHWC') 251 | 252 | if self.activation is not None: 253 | return self.activation(outputs) 254 | return outputs -------------------------------------------------------------------------------- /layers/normalize.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Layers - Extended normalization layers 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Extend the normalization with instance normalization and 10 | # layer normalization. 11 | # See here to learn the differences between different kinds of 12 | # normalizations: 13 | # https://arxiv.org/abs/1803.08494 14 | # Version: 0.11 # 2019/3/27 15 | # Comments: 16 | # Add compatible support. 17 | # Version: 0.10 # 2019/3/24 18 | # Comments: 19 | # Create this submodule. 20 | ################################################################ 21 | ''' 22 | 23 | from tensorflow.python.keras import backend as K 24 | from tensorflow.python.keras import constraints 25 | from tensorflow.python.keras import initializers 26 | from tensorflow.python.keras import regularizers 27 | from tensorflow.python.keras.engine.base_layer import Layer 28 | from tensorflow.python.ops import nn_impl 29 | 30 | from .. import compat 31 | if compat.COMPATIBLE_MODE['1.12']: 32 | from tensorflow.python.keras.engine.base_layer import InputSpec 33 | else: 34 | from tensorflow.python.keras.engine.input_spec import InputSpec 35 | 36 | class InstanceNormalization(Layer): 37 | """Instance normalization layer. 38 | This layer is borrorwed from 39 | https://github.com/keras-team/keras-contrib/blob/master/ 40 | keras_contrib/layers/normalization/instancenormalization.py 41 | Normalize the activations of the previous layer at each step, 42 | i.e. applies a transformation that maintains the mean activation 43 | close to 0 and the activation standard deviation close to 1. 44 | Arguments: 45 | axis: Integer, the axis that should be normalized 46 | (typically the features axis). 47 | For instance, after a `Conv2D` layer with 48 | `data_format="channels_first"`, 49 | set `axis=1` in `InstanceNormalization` (Instance Normalization). 50 | Setting `axis=None` will normalize all values in each 51 | instance of the batch (Layer Normalization). 52 | Axis 0 is the batch dimension. `axis` cannot be set to 0 to avoid errors. 53 | epsilon: Small float added to variance to avoid dividing by zero. 54 | center: If True, add offset of `beta` to normalized tensor. 55 | If False, `beta` is ignored. 56 | scale: If True, multiply by `gamma`. 57 | If False, `gamma` is not used. 58 | When the next layer is linear (also e.g. `nn.relu`), 59 | this can be disabled since the scaling 60 | will be done by the next layer. 61 | beta_initializer: Initializer for the beta weight. 62 | gamma_initializer: Initializer for the gamma weight. 63 | beta_regularizer: Optional regularizer for the beta weight. 64 | gamma_regularizer: Optional regularizer for the gamma weight. 65 | beta_constraint: Optional constraint for the beta weight. 66 | gamma_constraint: Optional constraint for the gamma weight. 67 | Input shape: 68 | Arbitrary. Use the keyword argument `input_shape` 69 | (tuple of integers, does not include the samples axis) 70 | when using this layer as the first layer in a Sequential model. 71 | Output shape: 72 | Same shape as input. 73 | References: 74 | - [Layer Normalization](https://arxiv.org/abs/1607.06450) 75 | - [Instance Normalization: The Missing Ingredient for Fast Stylization]( 76 | https://arxiv.org/abs/1607.08022) 77 | """ 78 | def __init__(self, 79 | axis=None, 80 | epsilon=1e-3, 81 | center=True, 82 | scale=True, 83 | beta_initializer='zeros', 84 | gamma_initializer='ones', 85 | beta_regularizer=None, 86 | gamma_regularizer=None, 87 | beta_constraint=None, 88 | gamma_constraint=None, 89 | **kwargs): 90 | super(InstanceNormalization, self).__init__(**kwargs) 91 | self.supports_masking = True 92 | self.axis = axis 93 | self.epsilon = epsilon 94 | self.center = center 95 | self.scale = scale 96 | self.beta_initializer = initializers.get(beta_initializer) 97 | self.gamma_initializer = initializers.get(gamma_initializer) 98 | self.beta_regularizer = regularizers.get(beta_regularizer) 99 | self.gamma_regularizer = regularizers.get(gamma_regularizer) 100 | self.beta_constraint = constraints.get(beta_constraint) 101 | self.gamma_constraint = constraints.get(gamma_constraint) 102 | 103 | def build(self, input_shape): 104 | ndim = len(input_shape) 105 | if self.axis == 0: 106 | raise ValueError('Axis cannot be zero') 107 | 108 | if (self.axis is not None) and (ndim == 2): 109 | raise ValueError('Cannot specify axis for rank 1 tensor') 110 | 111 | self.input_spec = InputSpec(ndim=ndim) 112 | 113 | if self.axis is None: 114 | shape = (1,) 115 | else: 116 | shape = (input_shape[self.axis],) 117 | 118 | if self.scale: 119 | self.gamma = self.add_weight(shape=shape, 120 | name='gamma', 121 | initializer=self.gamma_initializer, 122 | regularizer=self.gamma_regularizer, 123 | constraint=self.gamma_constraint) 124 | else: 125 | self.gamma = None 126 | if self.center: 127 | self.beta = self.add_weight(shape=shape, 128 | name='beta', 129 | initializer=self.beta_initializer, 130 | regularizer=self.beta_regularizer, 131 | constraint=self.beta_constraint) 132 | else: 133 | self.beta = None 134 | self.built = True 135 | 136 | def call(self, inputs, training=None): 137 | input_shape = K.int_shape(inputs) 138 | reduction_axes = list(range(0, len(input_shape))) 139 | 140 | if self.axis is not None: 141 | del reduction_axes[self.axis] 142 | 143 | del reduction_axes[0] 144 | 145 | mean = K.mean(inputs, reduction_axes, keepdims=True) 146 | stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon 147 | normed = (inputs - mean) / stddev 148 | 149 | broadcast_shape = [1] * len(input_shape) 150 | if self.axis is not None: 151 | broadcast_shape[self.axis] = input_shape[self.axis] 152 | 153 | if self.scale: 154 | broadcast_gamma = K.reshape(self.gamma, broadcast_shape) 155 | normed = normed * broadcast_gamma 156 | if self.center: 157 | broadcast_beta = K.reshape(self.beta, broadcast_shape) 158 | normed = normed + broadcast_beta 159 | return normed 160 | 161 | def compute_output_shape(self, input_shape): 162 | return input_shape 163 | 164 | def get_config(self): 165 | config = { 166 | 'axis': self.axis, 167 | 'epsilon': self.epsilon, 168 | 'center': self.center, 169 | 'scale': self.scale, 170 | 'beta_initializer': initializers.serialize(self.beta_initializer), 171 | 'gamma_initializer': initializers.serialize(self.gamma_initializer), 172 | 'beta_regularizer': regularizers.serialize(self.beta_regularizer), 173 | 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), 174 | 'beta_constraint': constraints.serialize(self.beta_constraint), 175 | 'gamma_constraint': constraints.serialize(self.gamma_constraint) 176 | } 177 | base_config = super(InstanceNormalization, self).get_config() 178 | return dict(list(base_config.items()) + list(config.items())) 179 | 180 | class GroupNormalization(Layer): 181 | """Group normalization layer. 182 | This layer is borrorwed from 183 | https://github.com/keras-team/keras-contrib/blob/master/ 184 | keras_contrib/layers/normalization/groupnormalization.py 185 | Group Normalization divides the channels into groups and computes 186 | within each group 187 | the mean and variance for normalization. 188 | Group Normalization's computation is independent 189 | of batch sizes, and its accuracy is stable in a wide range of batch sizes. 190 | Relation to Layer Normalization: 191 | If the number of groups is set to 1, then this operation becomes identical to 192 | Layer Normalization. 193 | Relation to Instance Normalization: 194 | If the number of groups is set to the 195 | input dimension (number of groups is equal 196 | to number of channels), then this operation becomes 197 | identical to Instance Normalization. 198 | Arguments: 199 | groups: Integer, the number of groups for Group Normalization. 200 | Can be in the range [1, N] where N is the input dimension. 201 | The input dimension must be divisible by the number of groups. 202 | axis: Integer, the axis that should be normalized 203 | (typically the features axis). 204 | For instance, after a `Conv2D` layer with 205 | `data_format="channels_first"`, 206 | set `axis=1` in `BatchNormalization`. 207 | epsilon: Small float added to variance to avoid dividing by zero. 208 | center: If True, add offset of `beta` to normalized tensor. 209 | If False, `beta` is ignored. 210 | scale: If True, multiply by `gamma`. 211 | If False, `gamma` is not used. 212 | When the next layer is linear (also e.g. `nn.relu`), 213 | this can be disabled since the scaling 214 | will be done by the next layer. 215 | beta_initializer: Initializer for the beta weight. 216 | gamma_initializer: Initializer for the gamma weight. 217 | beta_regularizer: Optional regularizer for the beta weight. 218 | gamma_regularizer: Optional regularizer for the gamma weight. 219 | beta_constraint: Optional constraint for the beta weight. 220 | gamma_constraint: Optional constraint for the gamma weight. 221 | Input shape: 222 | Arbitrary. Use the keyword argument `input_shape` 223 | (tuple of integers, does not include the samples axis) 224 | when using this layer as the first layer in a model. 225 | Output shape: 226 | Same shape as input. 227 | References: 228 | - [Group Normalization](https://arxiv.org/abs/1803.08494) 229 | """ 230 | 231 | def __init__(self, 232 | groups=32, 233 | axis=-1, 234 | epsilon=1e-5, 235 | center=True, 236 | scale=True, 237 | beta_initializer='zeros', 238 | gamma_initializer='ones', 239 | beta_regularizer=None, 240 | gamma_regularizer=None, 241 | beta_constraint=None, 242 | gamma_constraint=None, 243 | **kwargs): 244 | super(GroupNormalization, self).__init__(**kwargs) 245 | self.supports_masking = True 246 | self.groups = groups 247 | self.axis = axis 248 | self.epsilon = epsilon 249 | self.center = center 250 | self.scale = scale 251 | self.beta_initializer = initializers.get(beta_initializer) 252 | self.gamma_initializer = initializers.get(gamma_initializer) 253 | self.beta_regularizer = regularizers.get(beta_regularizer) 254 | self.gamma_regularizer = regularizers.get(gamma_regularizer) 255 | self.beta_constraint = constraints.get(beta_constraint) 256 | self.gamma_constraint = constraints.get(gamma_constraint) 257 | 258 | def build(self, input_shape): 259 | dim = input_shape[self.axis] 260 | 261 | if dim is None: 262 | raise ValueError('Axis ' + str(self.axis) + ' of ' 263 | 'input tensor should have a defined dimension ' 264 | 'but the layer received an input with shape ' + 265 | str(input_shape) + '.') 266 | 267 | if dim < self.groups: 268 | raise ValueError('Number of groups (' + str(self.groups) + ') cannot be ' 269 | 'more than the number of channels (' + 270 | str(dim) + ').') 271 | 272 | if dim % self.groups != 0: 273 | raise ValueError('Number of groups (' + str(self.groups) + ') must be a ' 274 | 'multiple of the number of channels (' + 275 | str(dim) + ').') 276 | 277 | self.input_spec = InputSpec(ndim=len(input_shape), 278 | axes={self.axis: dim}) 279 | shape = (dim,) 280 | 281 | if self.scale: 282 | self.gamma = self.add_weight(shape=shape, 283 | name='gamma', 284 | initializer=self.gamma_initializer, 285 | regularizer=self.gamma_regularizer, 286 | constraint=self.gamma_constraint) 287 | else: 288 | self.gamma = None 289 | if self.center: 290 | self.beta = self.add_weight(shape=shape, 291 | name='beta', 292 | initializer=self.beta_initializer, 293 | regularizer=self.beta_regularizer, 294 | constraint=self.beta_constraint) 295 | else: 296 | self.beta = None 297 | self.built = True 298 | 299 | def call(self, inputs, **kwargs): 300 | input_shape = K.int_shape(inputs) 301 | tensor_input_shape = K.shape(inputs) 302 | 303 | # Prepare broadcasting shape. 304 | reduction_axes = list(range(len(input_shape))) 305 | del reduction_axes[self.axis] 306 | broadcast_shape = [1] * len(input_shape) 307 | broadcast_shape[self.axis] = input_shape[self.axis] // self.groups 308 | broadcast_shape.insert(1, self.groups) 309 | 310 | reshape_group_shape = K.shape(inputs) 311 | group_axes = [reshape_group_shape[i] for i in range(len(input_shape))] 312 | group_axes[self.axis] = input_shape[self.axis] // self.groups 313 | group_axes.insert(1, self.groups) 314 | 315 | # reshape inputs to new group shape 316 | group_shape = [group_axes[0], self.groups] + group_axes[2:] 317 | group_shape = K.stack(group_shape) 318 | inputs = K.reshape(inputs, group_shape) 319 | 320 | group_reduction_axes = list(range(len(group_axes))) 321 | mean, variance = nn_impl.moments(inputs, group_reduction_axes[2:], shift=None, keep_dims=True) 322 | inputs = (inputs - mean) / (K.sqrt(variance + self.epsilon)) 323 | 324 | # prepare broadcast shape 325 | inputs = K.reshape(inputs, group_shape) 326 | 327 | outputs = inputs 328 | 329 | # In this case we must explicitly broadcast all parameters. 330 | if self.scale: 331 | broadcast_gamma = K.reshape(self.gamma, broadcast_shape) 332 | outputs = outputs * broadcast_gamma 333 | 334 | if self.center: 335 | broadcast_beta = K.reshape(self.beta, broadcast_shape) 336 | outputs = outputs + broadcast_beta 337 | 338 | # finally we reshape the output back to the input shape 339 | outputs = K.reshape(outputs, tensor_input_shape) 340 | 341 | return outputs 342 | 343 | def get_config(self): 344 | config = { 345 | 'groups': self.groups, 346 | 'axis': self.axis, 347 | 'epsilon': self.epsilon, 348 | 'center': self.center, 349 | 'scale': self.scale, 350 | 'beta_initializer': initializers.serialize(self.beta_initializer), 351 | 'gamma_initializer': initializers.serialize(self.gamma_initializer), 352 | 'beta_regularizer': regularizers.serialize(self.beta_regularizer), 353 | 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), 354 | 'beta_constraint': constraints.serialize(self.beta_constraint), 355 | 'gamma_constraint': constraints.serialize(self.gamma_constraint) 356 | } 357 | base_config = super(GroupNormalization, self).get_config() 358 | return dict(list(base_config.items()) + list(config.items())) 359 | 360 | def compute_output_shape(self, input_shape): 361 | return input_shape -------------------------------------------------------------------------------- /utilities/tboard.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Utilities - Extended tensorboard tools 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Extended tools for parsing the logs in the tensorboard file. 10 | # It enables users to extract records from tensorboard without 11 | # launching the web interface. It also provides a python func- 12 | # tion for launching the web interface. 13 | # Version: 0.22 # 2019/12/05 14 | # Comments: 15 | # 1. Minor change for provdiding verbose option to 16 | # TensorLogHandle.tohdf5. 17 | # Version: 0.20 # 2019/11/27 18 | # Comments: 19 | # 1. Finish TensorLogHandle. It may be updated in future 20 | # versions. 21 | # 2. Fix a minor bug for TensorLogHandle.tohdf5. 22 | # Version: 0.10 # 2019/11/26 23 | # Comments: 24 | # Create this submodule and finish TensorBoardTool, launch. 25 | ################################################################ 26 | ''' 27 | 28 | import os, sys, logging 29 | import numpy as np 30 | import h5py 31 | from tensorboard import default 32 | from tensorboard import program 33 | from tensorboard.backend.event_processing import event_accumulator 34 | 35 | class TensorBoardTool: 36 | '''Tensorboard web interface launcher. 37 | Adapted from the original work here: 38 | https://stackoverflow.com/a/52295534 39 | This class is equivalent to call launch() in this module. 40 | Arguments: 41 | log_dir: the path where we store the logs. 42 | ip [optional]: the IP address for the web interface. 43 | port [optional]: the port number for the web interface. 44 | ''' 45 | def __init__(self, log_dir, ip=None, port=None): 46 | '''Initialization 47 | see the docstring of this class. 48 | ''' 49 | self.log_dir = log_dir 50 | self.ip = ip 51 | self.port = port 52 | 53 | def __collect_argvs(self): 54 | argvs = [None, '--logdir', str(self.log_dir)] 55 | if self.ip: 56 | argvs.extend(['--host', str(self.ip)]) 57 | if self.port: 58 | argvs.extend(['--port', str(self.port)]) 59 | return argvs 60 | 61 | def run(self): 62 | '''Launch the tensorboard. 63 | Note that this method would not block the main thread, we 64 | suggest to use launch() instead of this when you do not need 65 | to work with subthread. 66 | ''' 67 | program.setup_environment() 68 | # Remove http messages 69 | log = logging.getLogger('werkzeug').setLevel(logging.ERROR) 70 | # Start tensorboard server 71 | _tb = program.TensorBoard( 72 | default.get_plugins(), 73 | program.get_default_assets_zip_provider()) 74 | _tb.configure(argv=self.__collect_argvs()) 75 | url = _tb.launch() 76 | print('TensorBoard at {0}, working on path: {1}.'.format(url, self.log_dir)) 77 | 78 | class TensorLogHandle: 79 | '''Read a tensorboard log file. 80 | This is a dictionary-lite interface for parsing a tensorboard 81 | file. It manages a EventAccumulator and wrap it with key-driven 82 | interfaces. 83 | Sometimes the handle may be slow, this is caused by the backend 84 | EventAccumulator. A possible way for solving this problem is 85 | passing a size guide during the initialization, but this sugge- 86 | stion could not guarantee the efficiency. 87 | Arguments: 88 | path: A file path to a directory containing tf events 89 | files, or a single tf events file. The accumulator 90 | will load events from this path. 91 | mode: The default working mode. Should be one of the 92 | avaliable list: 93 | (1) scalars (2) images (3) audio (4) histograms 94 | (5) distributions (6) tensors (7) metadata 95 | size_guidance: Information on how much data the 96 | EventAccumulator should store in memory. The 97 | DEFAULT_SIZE_GUIDANCE tries not to store too much so as 98 | to avoid OOMing the client. The size_guidance should be 99 | a map from a `tagType` string to an integer representing 100 | the number of items to keep per tag for items of that 101 | `tagType`. If the size is 0, all events are stored. 102 | compression_bps: Information on how the `EventAccumulator` 103 | should compress histogram data for the 104 | `CompressedHistograms` tag (for details see 105 | `ProcessCompressedHistogram`). 106 | purge_orphaned_data: Whether to discard any events that 107 | were "orphaned" by a TensorFlow restart. 108 | ''' 109 | MODE_LIST = {'scalars':event_accumulator.SCALARS, 110 | 'images': event_accumulator.IMAGES, 111 | 'audio': event_accumulator.AUDIO, 112 | 'histograms': event_accumulator.HISTOGRAMS, 113 | 'distributions': event_accumulator.COMPRESSED_HISTOGRAMS, 114 | 'tensors': event_accumulator.TENSORS, 115 | 'metadata': event_accumulator.RUN_METADATA} 116 | 117 | def __init__(self, path, mode='scalars', size_guidance=None, 118 | compression_bps=event_accumulator.NORMAL_HISTOGRAM_BPS, 119 | purge_orphaned_data=True): 120 | '''Initialization 121 | see the docstring of this class. 122 | ''' 123 | self.__curMode = None 124 | self.setDefaultMode(mode) 125 | self.accumulator = event_accumulator.EventAccumulator(path=path, 126 | size_guidance=size_guidance, compression_bps=compression_bps, 127 | purge_orphaned_data=purge_orphaned_data) 128 | self.accumulator.Reload() 129 | self.__keys = self.accumulator.Tags() 130 | 131 | def setDefaultMode(self, mode): 132 | '''Set the default working mode. 133 | Arguments: 134 | mode: The default mode, should be chosen from the avaliable 135 | list: 136 | (1) scalars (2) images (3) audio (4) histograms 137 | (5) tensors 138 | ''' 139 | if self.__checkMode(mode): 140 | self.__curMode = self.MODE_LIST[mode] 141 | else: 142 | raise KeyError('Should choose mode from: {0}.'.format(self.MODE_LIST.keys())) 143 | 144 | @classmethod 145 | def __checkMode(cls, mode): 146 | return mode in cls.MODE_LIST 147 | 148 | def __contains__(self, key): 149 | return key in self.__keys[self.__curMode] 150 | 151 | def __getitem__(self, key): 152 | try: 153 | if isinstance(key, tuple) and len(key) == 2: 154 | if not (key[1] in self.__keys[self.MODE_LIST[key[0]]]): 155 | raise KeyError 156 | return self.__getval(self.MODE_LIST[key[0]], key[1]) 157 | else: 158 | if not (key in self.__keys[self.__curMode]): 159 | raise KeyError 160 | return self.__getval(self.__curMode, key) 161 | except KeyError: 162 | raise KeyError('Could not find the item: {0}.'.format(key)) 163 | 164 | def __len__(self): 165 | return len(self.__keys[self.__curMode]) 166 | 167 | def __bool__(self): 168 | return bool(self.__keys[self.__curMode]) 169 | 170 | def __iter__(self): 171 | return iter(self.__keys[self.__curMode]) 172 | 173 | def keys(self, mode=None): 174 | '''Get all avaliable keys. 175 | Arguments: 176 | mode: The working mode, if not specified, would use 177 | default mode. 178 | ''' 179 | if mode is not None: 180 | if not self.__checkMode(mode): 181 | raise KeyError('The specified mode is invalid, should choose from {0}.'.format(self.MODE_LIST.keys())) 182 | return iter(self.__keys[self.MODE_LIST[mode]]) 183 | else: 184 | return iter(self.__keys[self.__curMode]) 185 | 186 | def items(self, mode=None): 187 | '''Get all avaliable (k, v) pairs. 188 | Arguments: 189 | mode: The working mode, if not specified, would use 190 | default mode. 191 | ''' 192 | if mode is not None: 193 | if not self.__checkMode(mode): 194 | raise KeyError('The specified mode is invalid, should choose from {0}.'.format(self.MODE_LIST.keys())) 195 | return map(lambda key: (key, self.__getval(self.MODE_LIST[mode], key)), self.__keys[self.MODE_LIST[mode]]) 196 | else: 197 | return map(lambda key: (key, self.__getval(self.__curMode, key)), self.__keys[self.__curMode]) 198 | 199 | def values(self, mode=None): 200 | '''Get all avaliable values. 201 | Arguments: 202 | mode: The working mode, if not specified, would use 203 | default mode. 204 | ''' 205 | if mode is not None: 206 | if not self.__checkMode(mode): 207 | raise KeyError('The specified mode is invalid, should choose from {0}.'.format(self.MODE_LIST.keys())) 208 | return map(lambda key: self.__getval(mode, key), self.__keys[self.MODE_LIST[mode]]) 209 | else: 210 | return map(lambda key: self.__getval(self.__curMode, key), self.__keys[self.__curMode]) 211 | 212 | def __getval(self, mode, key): 213 | '''Protected function for getting item. 214 | Should not be called by users. 215 | ''' 216 | if mode == event_accumulator.SCALARS: 217 | return self.__parserScalar(self.accumulator.Scalars(key)) 218 | elif mode == event_accumulator.IMAGES: 219 | return self.accumulator.Images(key) 220 | elif mode == event_accumulator.AUDIO: 221 | return self.accumulator.Audio(key) 222 | elif mode == event_accumulator.HISTOGRAMS: 223 | return self.__parserHistogram(self.accumulator.Histograms(key)) 224 | elif mode == event_accumulator.COMPRESSED_HISTOGRAMS: 225 | return self.__parserDistribution(self.accumulator.CompressedHistograms(key)) 226 | elif mode == event_accumulator.RUN_METADATA: 227 | return self.accumulator.RunMetadata(key) 228 | elif mode == event_accumulator.TENSORS: 229 | return self.accumulator.Tensors(key) 230 | else: 231 | raise KeyError('The specified mode is invalid.') 232 | 233 | @staticmethod 234 | def __parserScalar(scalars): 235 | '''Parse the scalar list, and arrange the results.''' 236 | resDict = dict() 237 | if not scalars: 238 | return resDict 239 | else: 240 | for k in scalars[0]._asdict(): 241 | resDict[k] = [] 242 | for i in scalars: 243 | for k, v in i._asdict().items(): 244 | resDict[k].append(v) 245 | for k, v in resDict.items(): 246 | resDict[k] = np.asarray(v, dtype=np.float32) 247 | return resDict 248 | 249 | @staticmethod 250 | def __parserHistogram(histograms): 251 | '''Parse the histogram list, and arrange the results.''' 252 | resDict = dict() 253 | if not histograms: 254 | return resDict 255 | else: 256 | for k in histograms[0]._asdict(): 257 | resDict[k] = [] 258 | for i in histograms: 259 | for k, v in i._asdict().items(): 260 | if k == 'histogram_value': 261 | v = { 262 | 'x': np.asarray(v.bucket_limit, dtype=np.float32), 263 | 'n': np.asarray(v.bucket, dtype=np.float32), 264 | 'count': v.num 265 | } 266 | resDict[k].append(v) 267 | for k, v in resDict.items(): 268 | if k in ('wall_time', 'step'): 269 | resDict[k] = np.asarray(v, dtype=np.float32) 270 | return resDict 271 | 272 | @staticmethod 273 | def __parserDistribution(distributions): 274 | '''Parse the distribution list, and arrange the results.''' 275 | resDict = dict() 276 | if not distributions: 277 | return resDict 278 | else: 279 | for k in distributions[0]._asdict(): 280 | resDict[k] = [] 281 | for i in distributions: 282 | for k, v in i._asdict().items(): 283 | if k == 'compressed_histogram_values': 284 | x = [] 285 | val = [] 286 | for j in v: 287 | x.append(j.basis_point) 288 | val.append(j.value) 289 | v = np.stack([x, val], axis=0) 290 | resDict[k].append(v) 291 | for k, v in resDict.items(): 292 | resDict[k] = np.asarray(v, dtype=np.float32) 293 | return resDict 294 | 295 | def tohdf5(self, f, mode=None, compressed=True, verbose=1): 296 | '''Convert all data in a specific mode to HDF5 format. 297 | Arguments: 298 | f: a file path (would create a new file). 299 | or an h5py file object. 300 | or an h5py data group object. 301 | mode: the selected mode, if left None, would use the 302 | default mode. 303 | compressed: whether to apply the compression. 304 | verbose: The level for showing messages during the 305 | conversion. 306 | ''' 307 | if mode is None: 308 | mode = self.__curMode 309 | if mode not in (event_accumulator.SCALARS, 310 | event_accumulator.HISTOGRAMS, 311 | event_accumulator.COMPRESSED_HISTOGRAMS): 312 | raise ValueError('Your current mode is {0}, this type does' 313 | 'not support HDF5 conversion.'.format(mode)) 314 | holdF = isinstance(f, str) 315 | if holdF: 316 | f = os.path.splitext(f)[0] + '.h5' 317 | f = h5py.File(f, 'w') 318 | try: 319 | name = f.filename 320 | except AttributeError: 321 | name = f.name 322 | f.attrs['type'] = mode 323 | for k, v in self.items(): 324 | g = f.create_group(k) 325 | self.__recursive_writer(g=g, obj=v, compressed=compressed) 326 | if verbose > 0: 327 | print('Having dumped {0}.'.format(k)) 328 | if holdF: 329 | f.close() 330 | if verbose > 0: 331 | print('Having dumped the data {0} successfully.'.format(name)) 332 | 333 | @classmethod 334 | def __recursive_writer(cls, g, obj, compressed=True): 335 | '''Recursive writer 336 | Should not be gotten accessed by users''' 337 | if isinstance(obj, dict): 338 | for k, v in obj.items(): 339 | cls.__recursive_writer_work(g, k, v, compressed) 340 | elif isinstance(obj, (list, tuple)): 341 | for i, v in enumerate(obj): 342 | cls.__recursive_writer_work(g, str(i), v, compressed) 343 | else: 344 | raise ValueError('The data part could not get parsed, check {0}'.format(obj)) 345 | 346 | @classmethod 347 | def __recursive_writer_work(cls, g, k, v, compressed=True): 348 | compression = 'gzip' if compressed else None 349 | if isinstance(v, (int, float)): 350 | g.create_dataset(k, data=float(v), dtype=np.float32) 351 | elif isinstance(v, np.ndarray): 352 | g.create_dataset(k, data=v, dtype=np.float32, chunks=((v.ndim>1) or compressed), compression=compression, maxshape=(None, *v.shape[1:])) 353 | elif isinstance(v, (dict, list, tuple)): 354 | newg = g.create_group(k) 355 | cls.__recursive_writer(newg, obj=v, compressed=compressed) 356 | else: 357 | raise ValueError('The data part could not get parsed, check {0}: {1}'.format(k, v)) 358 | 359 | def launch(log_dir, ip=None, port=None): 360 | '''Tensorboard web interface launcher (function). 361 | Functional interface for launching a tensorboard. 362 | This class is equivalent to call TensorBoardTool.run() in this 363 | module. 364 | Arguments: 365 | log_dir: the path where we store the logs. 366 | ip [optional]: the IP address for the web interface. 367 | port [optional]: the port number for the web interface. 368 | ''' 369 | osKey = 'GCS_READ_CACHE_DISABLED' 370 | getOS = os.environ.get(osKey, None) 371 | os.environ[osKey] = '1' 372 | tb = TensorBoardTool(log_dir, ip=ip, port=port) 373 | tb.run() 374 | input('Press Enter to ternimate this program.') 375 | if getOS is None: 376 | os.environ.pop(osKey) 377 | else: 378 | os.environ[osKey] = getOS 379 | 380 | if __name__ == '__main__': 381 | os.chdir(sys.path[0]) 382 | def test_thandle(): 383 | th = TensorLogHandle('../../logs/test', 'scalars') 384 | #print(th['residual2d_transpose/alpha_0']) 385 | th.tohdf5('../../getscalar') 386 | 387 | #launch('../../logs/test', 'localhost', 8000) 388 | test_thandle() 389 | -------------------------------------------------------------------------------- /optimizers/adaptive.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Optimizers - Extended adaptive learning rate optimizers. 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # This module contains extended optimizers that are based on 10 | # adaptive learning rate theory. Generally these optimizers 11 | # could converge more quickly while the solution is easier to 12 | # be overfitting. 13 | # Version: 0.10 # 2019/6/27 14 | # Comments: 15 | # Create this submodule, finish MNadam, Adabound and 16 | # Nadabound. 17 | ################################################################ 18 | ''' 19 | 20 | from tensorflow.python.framework import ops 21 | from tensorflow.python.keras import optimizers 22 | from tensorflow.python.keras import backend as K 23 | from tensorflow.python.ops import math_ops 24 | from tensorflow.python.ops import state_ops 25 | from tensorflow.python.ops import gen_math_ops 26 | 27 | class Nadabound(optimizers.Optimizer): 28 | """Nesterov Adabound optimizer 29 | The Nesterov version of the Adabound optimizer. This implementation is 30 | modified from mdnt.optimizers.Nadam and mdnt.optimizers.Adabound. Compared 31 | to Adabound optimizer, it uses estimated Nesterov gradient to update the 32 | momentum. 33 | Arguments: 34 | lr: float >= 0. Learning rate. 35 | lr_boost: float >=0. Suggest to > 1, because generally SGD optimizer 36 | requires a larger learning rate than Adam. 37 | gamma: float > 0. learning rate converging speed control factor. 38 | beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1. 39 | epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. 40 | decay: float >= 0. Learning rate decay over each update. 41 | amsgrad: boolean. Whether to apply the AMSGrad variant of this 42 | algorithm from the paper "On the Convergence of Adam and 43 | Beyond". 44 | sgdcorr: boolean. Because adam and SGD update momentum by different ways, 45 | when setting this flag True, the momentum updating rate would be 46 | approaching from 1. - beta_1 to 1. This correction is not applied in 47 | the original paper. Users should determine whether to use it carefully. 48 | """ 49 | 50 | def __init__(self, 51 | lr=0.002, 52 | lr_boost=10.0, 53 | gamma=1e-3, 54 | beta_1=0.9, 55 | beta_2=0.999, 56 | epsilon=None, 57 | decay=0., 58 | schedule_decay=0.004, 59 | amsgrad=False, 60 | sgdcorr=True, 61 | **kwargs): 62 | super(Nadabound, self).__init__(**kwargs) 63 | with K.name_scope(self.__class__.__name__): 64 | self.iterations = K.variable(0, dtype='int64', name='iterations') 65 | self.m_schedule = K.variable(1., name='m_schedule') 66 | self.lr = K.variable(lr, name='lr') 67 | self.beta_1 = K.variable(beta_1, name='beta_1') 68 | self.beta_2 = K.variable(beta_2, name='beta_2') 69 | self.decay = K.variable(decay, name='decay') 70 | self.lr_boost = K.variable(lr_boost, name='lr_boost') 71 | self.gamma = K.variable(gamma, name='gamma') 72 | if epsilon is None: 73 | epsilon = K.epsilon() 74 | self.epsilon = epsilon 75 | self.initial_decay = decay 76 | self.schedule_decay = schedule_decay 77 | self.amsgrad = amsgrad 78 | self.sgdcorr = sgdcorr 79 | 80 | def get_updates(self, loss, params): 81 | grads = self.get_gradients(loss, params) 82 | self.updates = [state_ops.assign_add(self.iterations, 1)] 83 | 84 | lr = self.lr 85 | if self.initial_decay > 0: 86 | lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) ) 87 | 88 | t = math_ops.cast(self.iterations, K.floatx()) + 1 89 | 90 | lower_bound = self.lr_boost * (1. - 1. / (self.gamma * t + 1.)) 91 | upper_bound = self.lr_boost * (1. + 1. / (self.gamma * t)) 92 | if self.sgdcorr: 93 | m_rate = 1. - self.beta_1 / (self.gamma * t + 1.) 94 | else: 95 | m_rate = 1. - self.beta_1 96 | 97 | # Due to the recommendations in [2], i.e. warming momentum schedule 98 | momentum_cache_t = self.beta_1 * ( 99 | 1. - 0.5 * 100 | (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) 101 | momentum_cache_t_1 = self.beta_1 * ( 102 | 1. - 0.5 * 103 | (math_ops.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) 104 | m_schedule_new = self.m_schedule * momentum_cache_t 105 | m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 106 | self.updates.append((self.m_schedule, m_schedule_new)) 107 | 108 | ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 109 | vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 110 | if self.amsgrad: 111 | vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 112 | else: 113 | vhats = [K.zeros(1) for _ in params] 114 | 115 | self.weights = [self.iterations] + ms + vs + vhats 116 | 117 | for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): 118 | # the following equations given in [1] 119 | g_prime = g / (1. - m_schedule_new) 120 | m_t = self.beta_1 * m + m_rate * g 121 | m_t_prime = m_t / (1. - m_schedule_next) 122 | v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g) 123 | if self.amsgrad: 124 | vhat_t = math_ops.maximum(vhat, v_t) 125 | self.updates.append(state_ops.assign(vhat, vhat_t)) 126 | v_t_prime = vhat_t / (1. - math_ops.pow(self.beta_2, t)) 127 | else: 128 | v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t)) 129 | m_t_bar = (m_rate / (1.-self.beta_1)) * (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime 130 | beta_1_reduce = 1. - math_ops.pow(self.beta_1, t) 131 | lr_v = gen_math_ops.reciprocal((gen_math_ops.sqrt(v_t_prime) + self.epsilon) * beta_1_reduce) 132 | 133 | self.updates.append(state_ops.assign(m, m_t)) 134 | self.updates.append(state_ops.assign(v, v_t)) 135 | 136 | lr_bound = gen_math_ops.minimum(gen_math_ops.maximum(lr_v, lower_bound), upper_bound) 137 | p_t = p - lr * lr_bound * beta_1_reduce * m_t_bar 138 | 139 | new_p = p_t 140 | 141 | # Apply constraints. 142 | if getattr(p, 'constraint', None) is not None: 143 | new_p = p.constraint(new_p) 144 | 145 | self.updates.append(state_ops.assign(p, new_p)) 146 | return self.updates 147 | 148 | def get_config(self): 149 | config = { 150 | 'lr': float(K.get_value(self.lr)), 151 | 'lr_boost': float(K.get_value(self.lr_boost)), 152 | 'gamma': float(K.get_value(self.gamma)), 153 | 'beta_1': float(K.get_value(self.beta_1)), 154 | 'beta_2': float(K.get_value(self.beta_2)), 155 | 'epsilon': self.epsilon, 156 | 'decay': float(K.get_value(self.decay)), 157 | 'schedule_decay': self.schedule_decay, 158 | 'amsgrad': self.amsgrad, 159 | 'sgdcorr': self.sgdcorr 160 | } 161 | base_config = super(Nadabound, self).get_config() 162 | return dict(list(base_config.items()) + list(config.items())) 163 | 164 | class Adabound(optimizers.Optimizer): 165 | """Adabound optimizer. 166 | This optimizer would get initialized by an initial learning rate, a final 167 | learning rate and a converging speed control parameter, i.e. gamma which 168 | would define the upper bound and the lower bound of the adaptive learning 169 | rate. At the beginning, the optimizer behaves like Adam, but when its 170 | learning rate get converged, it would behaves like SGD+Momentum. 171 | The converging speed control function is defined as: 172 | lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.)) 173 | upper_bound = final_lr * (1. + 1. / (self.gamma * t)) 174 | The lower_bound would be (0.95 * final_lr) after (gamma * t = 19). 175 | The upper_bound would be (0.95 * final_lr) after (gamma * t = 20). 176 | Users need to specify proper gamma to make sure that the algorithm would not 177 | get converged too quickly. 178 | Arguments: 179 | lr: float >= 0. Learning rate. 180 | lr_boost: float >=0. Final learning rate (for SGD) is defined as: 181 | final_lr = lr * lr_boost. 182 | gamma: float > 0. learning rate converging speed control factor. 183 | beta_1: float, 0 < beta < 1. Generally close to 1. 184 | beta_2: float, 0 < beta < 1. Generally close to 1. 185 | epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. 186 | decay: float >= 0. Learning rate decay over each update. 187 | amsgrad: boolean. Whether to apply the AMSGrad variant of this 188 | algorithm from the paper "On the Convergence of Adam and 189 | Beyond". 190 | sgdcorr: boolean. Because adam and SGD update momentum by different ways, 191 | when setting this flag True, the momentum updating rate would be 192 | approaching from 1. - beta_1 to 1. This correction is not applied in 193 | the original paper. Users should determine whether to use it carefully. 194 | """ 195 | 196 | def __init__(self, 197 | lr=0.001, 198 | lr_boost=10.0, 199 | gamma=1e-3, 200 | beta_1=0.9, 201 | beta_2=0.999, 202 | epsilon=None, 203 | decay=0., 204 | amsgrad=False, 205 | sgdcorr=True, 206 | **kwargs): 207 | super(Adabound, self).__init__(**kwargs) 208 | with K.name_scope(self.__class__.__name__): 209 | self.iterations = K.variable(0, dtype='int64', name='iterations') 210 | self.lr = K.variable(lr, name='lr') 211 | self.beta_1 = K.variable(beta_1, name='beta_1') 212 | self.beta_2 = K.variable(beta_2, name='beta_2') 213 | self.decay = K.variable(decay, name='decay') 214 | self.lr_boost = K.variable(lr_boost, name='lr_boost') 215 | self.gamma = K.variable(gamma, name='gamma') 216 | if epsilon is None: 217 | epsilon = K.epsilon() 218 | self.epsilon = epsilon 219 | self.initial_decay = decay 220 | self.amsgrad = amsgrad 221 | self.sgdcorr = sgdcorr 222 | 223 | def get_updates(self, loss, params): 224 | grads = self.get_gradients(loss, params) 225 | self.updates = [] 226 | 227 | lr = self.lr 228 | if self.initial_decay > 0: 229 | lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) ) 230 | 231 | with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]): 232 | t = math_ops.cast(self.iterations, K.floatx()) 233 | lr_t = gen_math_ops.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t)) 234 | 235 | lower_bound = self.lr_boost * (1. - 1. / (self.gamma * t + 1.)) 236 | upper_bound = self.lr_boost * (1. + 1. / (self.gamma * t)) 237 | if self.sgdcorr: 238 | m_rate = 1. - self.beta_1 / (self.gamma * t + 1.) 239 | else: 240 | m_rate = 1. - self.beta_1 241 | 242 | ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 243 | vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 244 | if self.amsgrad: 245 | vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 246 | else: 247 | vhats = [K.zeros(1) for _ in params] 248 | self.weights = [self.iterations] + ms + vs + vhats 249 | 250 | for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): 251 | m_t = (self.beta_1 * m) + m_rate * g 252 | v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g) 253 | if self.amsgrad: 254 | vhat_t = math_ops.maximum(vhat, v_t) 255 | lr_v = gen_math_ops.reciprocal(gen_math_ops.sqrt(vhat_t) + self.epsilon) 256 | self.updates.append(state_ops.assign(vhat, vhat_t)) 257 | else: 258 | lr_v = gen_math_ops.reciprocal(gen_math_ops.sqrt(v_t) + self.epsilon) 259 | 260 | lr_bound = gen_math_ops.minimum(gen_math_ops.maximum(lr_t * lr_v, lower_bound), upper_bound) 261 | p_t = p - lr * lr_bound * m_t 262 | 263 | self.updates.append(state_ops.assign(m, m_t)) 264 | self.updates.append(state_ops.assign(v, v_t)) 265 | 266 | new_p = p_t 267 | 268 | # Apply constraints. 269 | if getattr(p, 'constraint', None) is not None: 270 | new_p = p.constraint(new_p) 271 | 272 | self.updates.append(state_ops.assign(p, new_p)) 273 | return self.updates 274 | 275 | def get_config(self): 276 | config = { 277 | 'lr': float(K.get_value(self.lr)), 278 | 'lr_boost': float(K.get_value(self.lr_boost)), 279 | 'gamma': float(K.get_value(self.gamma)), 280 | 'beta_1': float(K.get_value(self.beta_1)), 281 | 'beta_2': float(K.get_value(self.beta_2)), 282 | 'decay': float(K.get_value(self.decay)), 283 | 'epsilon': self.epsilon, 284 | 'amsgrad': self.amsgrad, 285 | 'sgdcorr': self.sgdcorr 286 | } 287 | base_config = super(Adabound, self).get_config() 288 | return dict(list(base_config.items()) + list(config.items())) 289 | 290 | class MNadam(optimizers.Optimizer): 291 | """Nesterov Adam optimizer (MDNT version) 292 | We use MNadam here to avoid the name conflict on tf.keras.optimizers.Nadam. 293 | Much like Adam is essentially RMSprop with momentum, Nadam is Adam RMSprop with 294 | Nesterov momentum. Default parameters follow those provided in the paper. 295 | It is recommended to leave the parameters of this optimizer at their default 296 | values. 297 | This optimizer is modifed based on tf.keras.optimizers.Nadam. Compared to 298 | original implementation, this version supports two more things: 299 | 1. Decay rate for the initial learning rate. 300 | 2. Amsgrad option. 301 | Arguments: 302 | lr: float >= 0. Learning rate. 303 | beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1. 304 | epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. 305 | decay: float >= 0. Learning rate decay over each update. 306 | amsgrad: boolean. Whether to apply the AMSGrad variant of this 307 | algorithm from the paper "On the Convergence of Adam and Beyond". 308 | """ 309 | 310 | def __init__(self, 311 | lr=0.002, 312 | beta_1=0.9, 313 | beta_2=0.999, 314 | epsilon=None, 315 | decay=0., 316 | schedule_decay=0.004, 317 | amsgrad=False, 318 | **kwargs): 319 | super(MNadam, self).__init__(**kwargs) 320 | with K.name_scope(self.__class__.__name__): 321 | self.iterations = K.variable(0, dtype='int64', name='iterations') 322 | self.m_schedule = K.variable(1., name='m_schedule') 323 | self.lr = K.variable(lr, name='lr') 324 | self.beta_1 = K.variable(beta_1, name='beta_1') 325 | self.beta_2 = K.variable(beta_2, name='beta_2') 326 | self.decay = K.variable(decay, name='decay') 327 | if epsilon is None: 328 | epsilon = K.epsilon() 329 | self.epsilon = epsilon 330 | self.initial_decay = decay 331 | self.schedule_decay = schedule_decay 332 | self.amsgrad = amsgrad 333 | 334 | def get_updates(self, loss, params): 335 | grads = self.get_gradients(loss, params) 336 | self.updates = [state_ops.assign_add(self.iterations, 1)] 337 | 338 | lr = self.lr 339 | if self.initial_decay > 0: 340 | lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) ) 341 | 342 | t = math_ops.cast(self.iterations, K.floatx()) + 1 343 | 344 | # Due to the recommendations in [2], i.e. warming momentum schedule 345 | momentum_cache_t = self.beta_1 * ( 346 | 1. - 0.5 * 347 | (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) 348 | momentum_cache_t_1 = self.beta_1 * ( 349 | 1. - 0.5 * 350 | (math_ops.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) 351 | m_schedule_new = self.m_schedule * momentum_cache_t 352 | m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 353 | self.updates.append((self.m_schedule, m_schedule_new)) 354 | 355 | ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 356 | vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 357 | if self.amsgrad: 358 | vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 359 | else: 360 | vhats = [K.zeros(1) for _ in params] 361 | 362 | self.weights = [self.iterations] + ms + vs + vhats 363 | 364 | for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): 365 | # the following equations given in [1] 366 | g_prime = g / (1. - m_schedule_new) 367 | m_t = self.beta_1 * m + (1. - self.beta_1) * g 368 | m_t_prime = m_t / (1. - m_schedule_next) 369 | v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g) 370 | if self.amsgrad: 371 | vhat_t = math_ops.maximum(vhat, v_t) 372 | self.updates.append(state_ops.assign(vhat, vhat_t)) 373 | v_t_prime = vhat_t / (1. - math_ops.pow(self.beta_2, t)) 374 | else: 375 | v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t)) 376 | m_t_bar = (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime 377 | 378 | self.updates.append(state_ops.assign(m, m_t)) 379 | self.updates.append(state_ops.assign(v, v_t)) 380 | 381 | p_t = p - lr * m_t_bar / (gen_math_ops.sqrt(v_t_prime) + self.epsilon) 382 | 383 | new_p = p_t 384 | 385 | # Apply constraints. 386 | if getattr(p, 'constraint', None) is not None: 387 | new_p = p.constraint(new_p) 388 | 389 | self.updates.append(state_ops.assign(p, new_p)) 390 | return self.updates 391 | 392 | def get_config(self): 393 | config = { 394 | 'lr': float(K.get_value(self.lr)), 395 | 'beta_1': float(K.get_value(self.beta_1)), 396 | 'beta_2': float(K.get_value(self.beta_2)), 397 | 'epsilon': self.epsilon, 398 | 'decay': float(K.get_value(self.decay)), 399 | 'schedule_decay': self.schedule_decay, 400 | 'amsgrad': self.amsgrad 401 | } 402 | base_config = super(MNadam, self).get_config() 403 | return dict(list(base_config.items()) + list(config.items())) -------------------------------------------------------------------------------- /utilities/callbacks.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Utilities - Callbacks 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # Extend loss functions. These functions could serve as both 10 | # losses and metrics. 11 | # Version: 0.23 # 2019/10/27 12 | # Comments: 13 | # Enable ModelCheckpoint to use compression to save models. 14 | # Version: 0.22 # 2019/10/23 15 | # Comments: 16 | # Enable ModelCheckpoint to not save optimizer. 17 | # Version: 0.20 # 2019/10/15 18 | # Comments: 19 | # Finish LossWeightsScheduler. 20 | # Version: 0.18 # 2019/6/24 21 | # Comments: 22 | # 1. Finish ModelWeightsReducer. 23 | # 2. Fix bugs for ModelWeightsReducer. 24 | # 3. Find a better way for implementing the soft thresholding 25 | # for ModelWeightsReducer. 26 | # Version: 0.16 # 2019/6/23 27 | # Comments: 28 | # Add OptimizerSwitcher and fix a bug. 29 | # Version: 0.10 # 2019/6/13 30 | # Comments: 31 | # Create this submodule, and finish ModelCheckpoint. 32 | ################################################################ 33 | ''' 34 | 35 | from datetime import datetime 36 | import os 37 | import numpy as np 38 | from tensorflow.python.ops import variables 39 | from tensorflow.python.keras import callbacks 40 | from tensorflow.python.keras import backend as K 41 | from tensorflow.python.platform import tf_logging as logging 42 | from tensorflow.python.ops import array_ops 43 | from tensorflow.python.ops import math_ops 44 | from tensorflow.python.ops import random_ops 45 | from tensorflow.python.ops import state_ops 46 | from tensorflow.python.ops import gen_math_ops 47 | 48 | from . import _default 49 | 50 | class LossWeightsScheduler(callbacks.Callback): 51 | """Learning rate scheduler. 52 | Arguments: 53 | schedule: a function that takes an epoch index as input 54 | (integer, indexed from 0) and returns a new 55 | loss weights as output. 56 | verbose: int. 0: quiet, 1: update messages. 57 | Here we show two examples: 58 | ```python 59 | # This function is designed for a two-phase training. In the 60 | # first phase, the learning rate is (0.8, 0.2); 61 | # In the second phase, the learning rate is 62 | # (0.2, 0.8); 63 | def scheduler(epoch): 64 | if epoch < 10: 65 | return [0.8, 0.2] 66 | else: 67 | return [0.2, 0.8] 68 | model.compile(..., loss_weights=[K.variable(0.5), 69 | K.variable(0.5)]) 70 | callback = mdnt.utilities.callbacks.LossWeightsScheduler(scheduler) 71 | model.fit(data, labels, epochs=100, callbacks=[callback], 72 | validation_data=(val_data, val_labels)) 73 | ``` 74 | ```python 75 | # This function is designed for a two-phase training. In the 76 | # first phase, the learning rate is (alpha=0.8, beta=0.2); 77 | # In the second phase, the learning rate is 78 | # (alpha=0.2, beta=0.8); 79 | def scheduler(epoch): 80 | if epoch < 10: 81 | return {'alpha':0.8, 'beta':0.2} 82 | else: 83 | return {'alpha':0.2, 'beta':0.8} 84 | model.compile(..., loss_weights={'alpha':K.variable(0.5), 85 | 'beta':K.variable(0.5)}) 86 | callback = mdnt.utilities.callbacks.LossWeightsScheduler(scheduler) 87 | model.fit(data, labels, epochs=100, callbacks=[callback], 88 | validation_data=(val_data, val_labels)) 89 | ``` 90 | """ 91 | 92 | def __init__(self, schedule, verbose=0): 93 | super(LossWeightsScheduler, self).__init__() 94 | self.schedule = schedule 95 | self.verbose = verbose 96 | 97 | def on_epoch_begin(self, epoch, logs=None): 98 | if not hasattr(self.model, 'loss_weights'): 99 | raise ValueError('Model must have a "loss_weights" attribute.') 100 | lw = self.model.loss_weights 101 | if lw is None: 102 | raise ValueError('model.loss_weights needs to be set.') 103 | lw_val = self.schedule(epoch) # Get losses 104 | if isinstance(lw, dict): 105 | if not isinstance(lw_val, dict): 106 | raise ValueError('model.loss_weights is a dict, you need to ' 107 | 'provides a corresponding dict for updating it.') 108 | for k, v in lw: 109 | if isinstance(v, variables.Variable): 110 | K.set_value(v, lw_val[k]) 111 | elif isinstance(lw, (list, tuple)): 112 | if not isinstance(lw_val, (list, tuple, np.ndarray)): 113 | raise ValueError('model.loss_weights is a sequence, you need to ' 114 | 'provides a corresponding sequence for updating it.') 115 | s = 0 116 | for v in lw: 117 | if isinstance(v, variables.Variable): 118 | K.set_value(v, lw_val[s]) 119 | s += 1 120 | else: 121 | raise ValueError('model.loss_weights could not be updated, please check' 122 | 'your definition.') 123 | if self.verbose > 0: 124 | print('\nEpoch %05d: LossWeightsScheduler set var.lw to %s.' % (epoch + 1, lw_val)) 125 | 126 | def on_epoch_end(self, epoch, logs=None): 127 | logs = logs or {} 128 | lw = self.model.loss_weights 129 | lw_var = None 130 | if isinstance(lw, dict): 131 | lw_var = {} 132 | for k, v in lw: 133 | if isinstance(v, variables.Variable): 134 | lw_var[k] = K.get_value(v) 135 | else: 136 | lw_var[k] = v 137 | elif isinstance(lw, (list, tuple)): 138 | lw_var = [] 139 | for v in lw: 140 | if isinstance(v, variables.Variable): 141 | lw_var.append(K.get_value(v)) 142 | else: 143 | lw_var.append(v) 144 | logs['loss_weights'] = lw_var 145 | 146 | class ModelWeightsReducer(callbacks.Callback): 147 | """Model weights reducer 148 | Insert a weight decay operation before each iteration during the training. 149 | When it is applied to pure SGD, this callback is equivalent to adding 150 | L1/L2 regularization to each kernel. 151 | However, the optimizer with momentum or adaptive learning rate would make 152 | the regularization terms not equivalent to weight decay. As an alternative, 153 | Tensorflow provides AdamW (weight decayed Adam) in contribution module. 154 | This callback serves as an alternative for using weight decayed optimizers. 155 | For example, using ModelWeightsReducer(mu=0.1) + Adam is equivalent to 156 | using AdamW(weight_decay=0.1). 157 | This callback provides both soft threshold method and weight decay method, 158 | which are used for maintained the sparsity and small module length respec- 159 | tively. Compared to adding regularization terms, this callback does not 160 | get influenced by a specific optimizing algorithm. 161 | Arguments: 162 | lam: proximal coefficient. It is used to apply soft thresholding and 163 | maintain the sparsity of all kernels. 164 | It only take effects when > 0.0. 165 | mu: Tikhonov coefficient. It is used to apply the weight decay method 166 | and maintain the reduced length of the weight module. 167 | It only take effects when > 0.0. 168 | """ 169 | def __init__(self, lam=0.0, mu=0.0, epsilon=1e-5): 170 | with K.name_scope(self.__class__.__name__): 171 | self.get_lambda = K.variable(lam, name='lambda') 172 | self.get_mu = K.variable(mu, name='mu') 173 | self.bool_l1 = lam > 0.0 174 | self.bool_l2 = mu > 0.0 175 | self.session = None 176 | if not (self.bool_l1 or self.bool_l2): 177 | raise ValueError('Need to specify either one of "lam" and "mu".') 178 | 179 | def on_train_begin(self, logs=None): 180 | # First collect all trainable weights 181 | self.model._check_trainable_weights_consistency() 182 | get_w_list = self.model.trainable_weights 183 | get_w_dec_list = [] 184 | # Filter all weights and select those named 'kernel' 185 | for w in get_w_list: 186 | getname = w.name 187 | pos = getname.rfind('/') 188 | if pos != -1: 189 | checked = 'kernel' in getname[pos+1:] 190 | else: 191 | checked = 'kernel' in getname 192 | if checked: 193 | get_w_dec_list.append(w) 194 | if not get_w_dec_list: 195 | raise ValueError('The trainable weights of the model do not include any kernel.') 196 | # Define the update ops 197 | getlr = self.model.optimizer.lr 198 | with K.name_scope(self.__class__.__name__): 199 | self.w_updates = [] 200 | self.w_updates_aft = [] 201 | for w in get_w_dec_list: 202 | w_l = w 203 | if self.bool_l2: 204 | w_l = (1 - getlr * self.get_mu) * w_l 205 | if self.bool_l1: 206 | w_abs = math_ops.abs(w_l) + self.get_lambda 207 | w_l = ( gen_math_ops.sign(w_l) + gen_math_ops.sign(random_ops.random_uniform(w_l.get_shape(), minval=-1.0, maxval=1.0)) * math_ops.cast(gen_math_ops.equal(w_l, 0), dtype=w_l.dtype) ) * w_abs 208 | w_abs_x = math_ops.abs(w) - self.get_lambda 209 | w_x = gen_math_ops.sign(w) * math_ops.cast(gen_math_ops.greater(w_abs_x, 0), dtype=w.dtype) * w_abs_x 210 | self.w_updates_aft.append(state_ops.assign(w, w_x)) 211 | self.w_updates.append(state_ops.assign(w, w_l)) 212 | # Get and store the session 213 | self.session = K.get_session() 214 | 215 | def on_train_end(self, logs=None): 216 | self.session = None 217 | 218 | def on_train_batch_begin(self, batch, logs=None): 219 | # Define the updating function 220 | self.session.run(fetches=self.w_updates) 221 | 222 | def on_train_batch_end(self, batch, logs=None): 223 | if self.bool_l1: 224 | self.session.run(fetches=self.w_updates_aft) 225 | 226 | class OptimizerSwitcher(callbacks.Callback): 227 | """Optimizer switcher 228 | Need to use with MDNT optimizers that support mannual phase-switching 229 | method `optimizer.switch()`. 230 | Now such optimizers include: 231 | mdnt.optimizers.Adam2SGD 232 | mdnt.optimizers.NAdam2NSGD 233 | Arguments: 234 | switch_epochs: an int or an int list which determines when to switch 235 | the optimizer phase. The switch would happens on the end of 236 | assigned epochs. Should start with 1 (the first epoch). 237 | verbose: int. 0: quiet, 1: update messages. 238 | """ 239 | 240 | def __init__(self, switch_epochs, verbose=0): 241 | super(OptimizerSwitcher, self).__init__() 242 | if isinstance(switch_epochs, (list, tuple)): 243 | if all(type(i)==int for i in switch_epochs): 244 | self.switch_epochs = list(switch_epochs) 245 | else: 246 | raise ValueError('The input list switch_epochs should only contains int elements.') 247 | else: 248 | if type(switch_epochs) != int: 249 | raise ValueError('The input scalar switch_epochs should be an int element.') 250 | self.switch_epochs = [switch_epochs] 251 | self.switch_epochs.sort(reverse=True) 252 | self.verbose = verbose 253 | 254 | def on_train_begin(self, logs=None): 255 | if not callable(getattr(self.model.optimizer, 'switch')): 256 | raise ValueError('Optimizer must have a "switch" method to support manually switching the training phase.') 257 | popflag = False 258 | while self.switch_epochs and self.switch_epochs[-1] < 1: 259 | self.switch_epochs.pop() 260 | popflag = True 261 | if popflag and self.verbose > 0: 262 | print('The input switch_epochs is revised as {0}.'.format(self.switch_epochs)) 263 | 264 | def on_epoch_end(self, epoch, logs=None, mode='train'): 265 | if mode == 'train' and self.switch_epochs: 266 | if self.switch_epochs[-1] == (epoch + 1): 267 | self.model.optimizer.switch(None) 268 | if self.verbose > 0: 269 | print('\nEpoch {0:05d}: Optimizer switcher switches the optimizer phase'.format(epoch + 1)) 270 | self.switch_epochs.pop() 271 | 272 | class ModelCheckpoint(callbacks.Callback): 273 | """Save the model after every epoch. (Revised) 274 | Revised Model checkpointer. Compared to original version, it supports 275 | such new features: 276 | 1. When `save_weights_only` is set `False`, it uses the MDNT version 277 | of model saver and avoid the heading excessing problem of saving 278 | HDF5 file. 279 | 2. The model configurations and the network weights are splitted. 280 | It will be easier for user to see the configuration through the 281 | saved JSON file. 282 | 3. When setting `keep_max`, only recent weights would be retained. 283 | Now `filepath` should not contain named formatting options, because 284 | the format options are moved into `record_format`. The final output 285 | configuration file name should be: 286 | `filapath + '.json'` 287 | while the weights file name should be: 288 | `filepath + '-' + record_format.format(...) + '.h5'` 289 | For example, if `filepath` is `'model'` while `record_format` is 290 | `'e{epoch:02d}_v{val_loss:.2f}'`, the latter part will be filled the 291 | value of `epoch` and keys in `logs` (passed in `on_epoch_end`). The 292 | output may be like: 293 | `'model.json'` and `'model-e05_v0.33.h5'`. 294 | Then the model checkpoints will be saved with the epoch number and 295 | the validation loss in the filename. 296 | Arguments: 297 | filepath: string, path to save the model file. 298 | record_format: the format of the using records. If set None, it 299 | would be set as a time stamp. 300 | monitor: quantity to monitor. 301 | verbose: verbosity mode, 0 or 1. 302 | keep_max: the maximum of kept weight file during the training 303 | phase. If set None, all files would be kept. This option 304 | requires users to have the authority to delete files in the 305 | saved path. 306 | save_optimizer: If `save_optimizer=True`, the optimizer configu- 307 | rations would be dumped as a json file. 308 | save_best_only: if `save_best_only=True`, 309 | the latest best model according to 310 | the quantity monitored will not be overwritten. 311 | compress: whether to apply the compression for saving models. 312 | this option is only avaliable when save_weights_only=False. 313 | mode: one of {auto, min, max}. 314 | If `save_best_only=True`, the decision 315 | to overwrite the current save file is made 316 | based on either the maximization or the 317 | minimization of the monitored quantity. For `val_acc`, 318 | this should be `max`, for `val_loss` this should 319 | be `min`, etc. In `auto` mode, the direction is 320 | automatically inferred from the name of the monitored quantity. 321 | save_weights_only: if True, then only the model's weights will be 322 | saved (`model.save_weights(filepath)`), else the full model 323 | is saved (`model.save(filepath)`). 324 | period: Interval (number of epochs) between checkpoints. 325 | """ 326 | 327 | def __init__(self, 328 | filepath, 329 | record_format=None, 330 | monitor='val_loss', 331 | verbose=0, 332 | keep_max=None, 333 | save_optimizer=True, 334 | save_best_only=False, 335 | save_weights_only=False, 336 | compress=True, 337 | mode='auto', 338 | period=1): 339 | super(ModelCheckpoint, self).__init__() 340 | self.monitor = monitor 341 | self.verbose = verbose 342 | self.filepath = filepath 343 | self.record_format = record_format 344 | if set('{}%').issubset(set(self.filepath)): 345 | raise TypeError('filepath should not contains formats anymore. Use `record_format` to define that part.') 346 | self.keep_max = keep_max 347 | if keep_max is not None: 348 | self.__keep_list = [] 349 | self.__current_num = 0 350 | else: 351 | self.__keep_list = None 352 | self.__current_num = None 353 | self.save_optimizer = save_optimizer 354 | self.save_best_only = save_best_only 355 | self.save_weights_only = save_weights_only 356 | self.compress = compress 357 | self.period = period 358 | self.epochs_since_last_save = 0 359 | 360 | if mode not in ['auto', 'min', 'max']: 361 | logging.warning('ModelCheckpoint mode %s is unknown, ' 362 | 'fallback to auto mode.', mode) 363 | mode = 'auto' 364 | 365 | if mode == 'min': 366 | self.monitor_op = np.less 367 | self.best = np.Inf 368 | elif mode == 'max': 369 | self.monitor_op = np.greater 370 | self.best = -np.Inf 371 | else: 372 | if 'acc' in self.monitor or self.monitor.startswith('fmeasure'): 373 | self.monitor_op = np.greater 374 | self.best = -np.Inf 375 | else: 376 | self.monitor_op = np.less 377 | self.best = np.Inf 378 | 379 | def __keep_max_function(self, new_file_names): 380 | if self.keep_max is None: 381 | return 382 | if self.__current_num < self.keep_max: 383 | self.__current_num += 1 384 | self.__keep_list.append(new_file_names) 385 | else: 386 | old_file_names = self.__keep_list.pop(0) 387 | for old_file_name in old_file_names: 388 | if os.path.exists(old_file_name): 389 | os.remove(old_file_name) 390 | self.__keep_list.append(new_file_names) 391 | 392 | def on_epoch_end(self, epoch, logs=None): 393 | logs = logs or {} 394 | self.epochs_since_last_save += 1 395 | if self.epochs_since_last_save >= self.period: 396 | self.epochs_since_last_save = 0 397 | configpath = self.filepath + '.json' 398 | if self.record_format: 399 | weightpath = self.filepath + '-' + self.record_format 400 | weightpath = weightpath.format(epoch=epoch + 1, **logs) 401 | else: 402 | weightpath = self.filepath + datetime.timestamp(datetime.now()) 403 | optmpath = weightpath + '.json' 404 | weightpath = weightpath + '.h5' 405 | if self.save_best_only: 406 | current = logs.get(self.monitor) 407 | if current is None: 408 | logging.warning('Can save best model only with %s available, ' 409 | 'skipping.', self.monitor) 410 | else: 411 | if self.monitor_op(current, self.best): 412 | if self.verbose > 0: 413 | print('\nEpoch %05d: %s improved from %0.5f to %0.5f,' 414 | ' saving model to %s' % (epoch + 1, self.monitor, self.best, 415 | current, weightpath)) 416 | self.best = current 417 | if self.save_weights_only: 418 | self.model.save_weights(weightpath, overwrite=True) 419 | else: 420 | self.__keep_max_function((weightpath, optmpath)) 421 | _default.save_model(self.model, weightpath, configpath, optmpath, overwrite=True, include_optimizer=self.save_optimizer, compress=self.compress) 422 | #self.model.save(filepath, overwrite=True) 423 | else: 424 | if self.verbose > 0: 425 | print('\nEpoch %05d: %s did not improve from %0.5f' % 426 | (epoch + 1, self.monitor, self.best)) 427 | else: 428 | if self.verbose > 0: 429 | print('\nEpoch %05d: saving model to %s' % (epoch + 1, weightpath)) 430 | if self.save_weights_only: 431 | self.model.save_weights(weightpath, overwrite=True) 432 | else: 433 | self.__keep_max_function((weightpath, optmpath)) 434 | _default.save_model(self.model, weightpath, configpath, optmpath, overwrite=True, include_optimizer=self.save_optimizer, compress=self.compress) 435 | #self.model.save(filepath, overwrite=True) -------------------------------------------------------------------------------- /optimizers/mixture.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ################################################################ 3 | # Optimizers - Phase-mixed optimizers. 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras 5 | # Yuchen Jin @ cainmagi@gmail.com 6 | # Requirements: (Pay attention to version) 7 | # python 3.6+ 8 | # tensorflow r1.13+ 9 | # This module contains the optimizers that has multiple phases. 10 | # In different phases, those optimizers would adopt different 11 | # algorithms. A typical example is the SWATS optimizer. 12 | # Version: 0.21 # 2019/6/27 13 | # Comments: 14 | # Slightly change the implementation. 15 | # Version: 0.20 # 2019/6/26 16 | # Comments: 17 | # Finish the demo version for SWATS. 18 | # Version: 0.17 # 2019/6/23 19 | # Comments: 20 | # Improve the efficiency of Adam2SGD and Nadam2NSGD. 21 | # Version: 0.15 # 2019/6/23 22 | # Comments: 23 | # 1. Fix the bugs in manually switched optimizers. Now it 24 | # requires users to call switch() to change the phase or 25 | # using mdnt.utilities.callbacks.OptimizerSwitcher. 26 | # 2. Revise the manually switched optimizers to ensure that 27 | # they use equivalent algorithm during the SGD phases. 28 | # Version: 0.10 # 2019/6/21 29 | # Comments: 30 | # Create this submodule, finish Adam2SGD and Nadam2NSGD. 31 | ################################################################ 32 | ''' 33 | 34 | from tensorflow.python.framework import ops 35 | from tensorflow.python.keras import optimizers 36 | from tensorflow.python.keras import backend as K 37 | from tensorflow.python.ops import math_ops 38 | from tensorflow.python.ops import state_ops 39 | from tensorflow.python.ops import control_flow_ops 40 | from tensorflow.python.ops import gen_math_ops 41 | 42 | def m_switch(pred, tensor_a, tensor_b): 43 | ''' 44 | Use cleaner API to replace m_switch to accelerate computation. 45 | ''' 46 | def f_true(): return tensor_a 47 | def f_false(): return tensor_b 48 | return control_flow_ops.cond(pred, f_true, f_false, strict=True) 49 | 50 | class SWATS(optimizers.Optimizer): 51 | """Switches from Adam to SGD (SWATS) 52 | From Adam optimizer to SGD optimizer automatically. 53 | This method provides an automatic scheme for switching from Adam/Amsgrad to 54 | SGD optimizer. Different from manually switched optimizer. The switching point 55 | where Adam switches to SGD is determined by the algorithm, so users do not 56 | need to call the switch notifier or use manually switching callback. 57 | The key technique for this method is checking the convergence of the relation- 58 | ship between the adaptive momentum and the SGD gradient. If the proportion of 59 | the gradient projected in the direction of the momentum becomes converged, the 60 | algorithm would switch to SGD silently. 61 | One difference between SWATS and manually switched optimizers is that SWATS 62 | determine the algorithm phases for each optimized parameter independently, i.e. 63 | different parameters may be optimized in different phases. 64 | Another difference is that the SWATS would determine the learning rate of SGD 65 | automatically to ensure that the learning rate would not change drastically 66 | after the switching point. 67 | This implementation has such modifications compared to the original work: 68 | 1. The manually set learning rate is separated from lambda to make sure 69 | that users could modify learning rate after switching to SGD. 70 | 2. The finally converged lambda is restricted to > 0, otherwise it will 71 | not switch from Adam to SGD. 72 | 3. It supports Amsgrad -> SGD. 73 | Due to the limitation of the tensorflow, this implementation is not highly 74 | efficient. Users should estimate whether it is necessary to use this optimi- 75 | zer. 76 | Arguments: 77 | lr: float >= 0. Learning rate. 78 | beta_1: float, 0 < beta < 1. Generally close to 1. 79 | beta_2: float, 0 < beta < 1. Generally close to 1. 80 | epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. 81 | decay: float >= 0. Learning rate decay over each update. 82 | amsgrad: boolean. Whether to apply the AMSGrad variant of this 83 | algorithm from the paper "On the Convergence of Adam and 84 | Beyond". 85 | Reference: 86 | This optimizer is derived from this paper: 87 | Improving Generalization Performance by Switching from Adam to SGD 88 | https://arxiv.org/abs/1712.07628 89 | An unofficial implementation which inspires this work could be referred 90 | here: 91 | https://github.com/sloth2012/scalaML 92 | """ 93 | 94 | def __init__(self, 95 | lr=0.001, 96 | lr_boost=10.0, 97 | beta_1=0.9, 98 | beta_2=0.999, 99 | epsilon=None, 100 | decay=0., 101 | amsgrad=False, 102 | **kwargs): 103 | super(SWATS, self).__init__(**kwargs) 104 | with K.name_scope(self.__class__.__name__): 105 | self.iterations = K.variable(0, dtype='int64', name='iterations') 106 | self.lr = K.variable(lr, name='lr') 107 | self.beta_1 = K.variable(beta_1, name='beta_1') 108 | self.beta_2 = K.variable(beta_2, name='beta_2') 109 | self.decay = K.variable(decay, name='decay') 110 | if epsilon is None: 111 | epsilon = K.epsilon() 112 | self.epsilon = epsilon 113 | self.initial_decay = decay 114 | self.amsgrad = amsgrad 115 | 116 | def get_updates(self, loss, params): 117 | grads = self.get_gradients(loss, params) 118 | self.updates = [] 119 | 120 | lr = self.lr 121 | if self.initial_decay > 0: 122 | lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) ) 123 | 124 | with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]): 125 | t = math_ops.cast(self.iterations, K.floatx()) 126 | lr_bc = gen_math_ops.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t)) 127 | 128 | ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 129 | vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 130 | lams = [K.zeros(1, dtype=K.dtype(p)) for p in params] 131 | conds = [K.variable(False, dtype='bool') for p in params] 132 | if self.amsgrad: 133 | vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 134 | else: 135 | vhats = [K.zeros(1) for _ in params] 136 | self.weights = [self.iterations] + ms + vs + vhats + lams + conds 137 | 138 | for p, g, m, v, vhat, lam, cond in zip(params, grads, ms, vs, vhats, lams, conds): 139 | beta_g = m_switch(cond, 1.0, 1.0 - self.beta_1) 140 | m_t = (self.beta_1 * m) + beta_g * g 141 | v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g) 142 | if self.amsgrad: 143 | vhat_t = math_ops.maximum(vhat, v_t) 144 | p_t_ada = lr_bc * m_t / (gen_math_ops.sqrt(vhat_t) + self.epsilon) 145 | self.updates.append(state_ops.assign(vhat, vhat_t)) 146 | else: 147 | p_t_ada = lr_bc * m_t / (gen_math_ops.sqrt(v_t) + self.epsilon) 148 | gamma_den = math_ops.reduce_sum(p_t_ada * g) 149 | gamma = math_ops.reduce_sum(gen_math_ops.square(p_t_ada)) / (math_ops.abs(gamma_den) + self.epsilon) * (gen_math_ops.sign(gamma_den) + self.epsilon) 150 | lam_t = (self.beta_2 * lam) + (1. - self.beta_2) * gamma 151 | lam_prime = lam / (1. - math_ops.pow(self.beta_2, t)) 152 | lam_t_prime = lam_t / (1. - math_ops.pow(self.beta_2, t)) 153 | lg_err = math_ops.abs( lam_t_prime - gamma ) 154 | cond_update = gen_math_ops.logical_or(gen_math_ops.logical_and(gen_math_ops.logical_and( self.iterations > 1, lg_err < 1e-5 ), lam_t > 0 ), cond )[0] 155 | lam_update = m_switch(cond_update, lam, lam_t) 156 | self.updates.append(state_ops.assign(lam, lam_update)) 157 | self.updates.append(state_ops.assign(cond, cond_update)) 158 | 159 | p_t_sgd = (1. - self.beta_1) * lam_prime * m_t 160 | 161 | self.updates.append(state_ops.assign(m, m_t)) 162 | self.updates.append(state_ops.assign(v, v_t)) 163 | 164 | new_p = m_switch(cond, p - lr * p_t_sgd, p - lr * p_t_ada) 165 | 166 | # Apply constraints. 167 | if getattr(p, 'constraint', None) is not None: 168 | new_p = p.constraint(new_p) 169 | 170 | self.updates.append(state_ops.assign(p, new_p)) 171 | return self.updates 172 | 173 | def get_config(self): 174 | config = { 175 | 'lr': float(K.get_value(self.lr)), 176 | 'beta_1': float(K.get_value(self.beta_1)), 177 | 'beta_2': float(K.get_value(self.beta_2)), 178 | 'decay': float(K.get_value(self.decay)), 179 | 'epsilon': self.epsilon, 180 | 'amsgrad': self.amsgrad 181 | } 182 | base_config = super(SWATS, self).get_config() 183 | return dict(list(base_config.items()) + list(config.items())) 184 | 185 | class Adam2SGD(optimizers.Optimizer): 186 | """Adam optimizer -> SGD optimizer. 187 | From Adam optimizer to SGD optimizer. 188 | This optimizer need users to control the switch point manually. After switching 189 | to SGD, the momentum from Adam would be retained so the optimizer could switch 190 | to SGD smoothly. beta_1 would also be applied to SGD for calculating the 191 | momentum. 192 | Special tips: 193 | This optimizer need to be used with 194 | mdnt.utilities.callbacks.OptimizerSwitcher 195 | together. That callback would trigger the method `self.switch(True)` and 196 | notify the optimizer enter the SGD phase. Otherwise, it would stay in 197 | the Adam/Amsgrad phase. Users could also call `self.switch` manually if 198 | using `train_on_batch()` to train the model. 199 | Arguments: 200 | lr: float >= 0. Learning rate. 201 | lr_boost: float >=0. Suggest to > 1, because generally SGD optimizer 202 | requires a larger learning rate than Adam. 203 | beta_1: float, 0 < beta < 1. Generally close to 1. 204 | beta_2: float, 0 < beta < 1. Generally close to 1. 205 | epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. 206 | decay: float >= 0. Learning rate decay over each update. 207 | amsgrad: boolean. Whether to apply the AMSGrad variant of this 208 | algorithm from the paper "On the Convergence of Adam and 209 | Beyond". 210 | switch_flag: the initial state of the optimizer phase. If set `False`, 211 | start with Adam/Amsgrad, otherwise start with SGD. 212 | """ 213 | 214 | def __init__(self, 215 | lr=0.001, 216 | lr_boost=10.0, 217 | beta_1=0.9, 218 | beta_2=0.999, 219 | epsilon=None, 220 | decay=0., 221 | amsgrad=False, 222 | switch_flag=False, 223 | **kwargs): 224 | super(Adam2SGD, self).__init__(**kwargs) 225 | with K.name_scope(self.__class__.__name__): 226 | self.iterations = K.variable(0, dtype='int64', name='iterations') 227 | self.lr = K.variable(lr, name='lr') 228 | self.beta_1 = K.variable(beta_1, name='beta_1') 229 | if switch_flag: # using SGD 230 | self.beta_g = K.variable(1.0, name='beta_g') 231 | else: # using Adam 232 | self.beta_g = K.variable(1.0 - beta_1, name='beta_g') 233 | self.beta_2 = K.variable(beta_2, name='beta_2') 234 | self.decay = K.variable(decay, name='decay') 235 | self.switch_flag = K.variable(switch_flag, dtype='bool', name='switch_flag') 236 | if epsilon is None: 237 | epsilon = K.epsilon() 238 | self.epsilon = epsilon 239 | self.initial_decay = decay 240 | self.amsgrad = amsgrad 241 | self.lr_boost = lr_boost 242 | 243 | def switch(self, switch_flag=None): 244 | ''' 245 | Switch the phase of the optimizer. 246 | Arguments: 247 | switch_flag: if set `True`, use SGD with momentum; Otherwise, use 248 | Adam/Amsgrad. If set None, it would switch the phase according to 249 | the current phase. 250 | ''' 251 | if switch_flag is None: 252 | switch_flag = not bool(K.get_value(self.switch_flag)) 253 | else: 254 | switch_flag = bool(switch_flag) 255 | if switch_flag: # using SGD 256 | self.beta_g = K.set_value(self.beta_g, 1.0) 257 | else: # using Adam 258 | self.beta_g = K.set_value(self.beta_g, 1.0 - K.get_value(self.beta_1)) 259 | K.set_value(self.switch_flag, bool(switch_flag)) 260 | 261 | def get_updates(self, loss, params): 262 | grads = self.get_gradients(loss, params) 263 | self.updates = [] 264 | 265 | lr = self.lr 266 | if self.initial_decay > 0: 267 | lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) ) 268 | 269 | with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]): 270 | t = math_ops.cast(self.iterations, K.floatx()) 271 | lr_t = lr * ( gen_math_ops.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t)) ) 272 | 273 | ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 274 | vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 275 | if self.amsgrad: 276 | vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 277 | else: 278 | vhats = [K.zeros(1) for _ in params] 279 | self.weights = [self.iterations] + ms + vs + vhats 280 | 281 | for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): 282 | m_t = (self.beta_1 * m) + self.beta_g * g 283 | v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g) 284 | if self.amsgrad: 285 | vhat_t = math_ops.maximum(vhat, v_t) 286 | p_t_ada = p - lr_t * m_t / (gen_math_ops.sqrt(vhat_t) + self.epsilon) 287 | self.updates.append(state_ops.assign(vhat, vhat_t)) 288 | else: 289 | p_t_ada = p - lr_t * m_t / (gen_math_ops.sqrt(v_t) + self.epsilon) 290 | p_t_sgd = p - self.lr_boost * lr * m_t 291 | 292 | self.updates.append(state_ops.assign(m, m_t)) 293 | self.updates.append(state_ops.assign(v, v_t)) 294 | 295 | new_p = m_switch(self.switch_flag, p_t_sgd, p_t_ada) 296 | 297 | # Apply constraints. 298 | if getattr(p, 'constraint', None) is not None: 299 | new_p = p.constraint(new_p) 300 | 301 | self.updates.append(state_ops.assign(p, new_p)) 302 | return self.updates 303 | 304 | def get_config(self): 305 | config = { 306 | 'lr': float(K.get_value(self.lr)), 307 | 'lr_boost': self.lr_boost, 308 | 'beta_1': float(K.get_value(self.beta_1)), 309 | 'beta_2': float(K.get_value(self.beta_2)), 310 | 'decay': float(K.get_value(self.decay)), 311 | 'epsilon': self.epsilon, 312 | 'amsgrad': self.amsgrad, 313 | 'switch_flag': bool(K.get_value(self.switch_flag)) 314 | } 315 | base_config = super(Adam2SGD, self).get_config() 316 | return dict(list(base_config.items()) + list(config.items())) 317 | 318 | class Nadam2NSGD(optimizers.Optimizer): 319 | """Nesterov Adam optimizer -> Nesterov SGD optimizer. 320 | Much like Adam is essentially RMSprop with momentum, Nadam is Adam RMSprop with 321 | Nesterov momentum. Default parameters follow those provided in the paper. 322 | It is recommended to leave the parameters of this optimizer at their default 323 | values. 324 | This optimizer need users to control the switch point manually. After switching 325 | to SGD, the momentum from Adam would be retained so the optimizer could switch 326 | to SGD smoothly. beta_1 would also be applied to SGD for calculating the 327 | momentum. 328 | Special tips: 329 | This optimizer need to be used with 330 | mdnt.utilities.callbacks.OptimizerSwitcher 331 | together. That callback would trigger the method `self.switch(True)` and 332 | notify the optimizer enter the NSGD phase. Otherwise, it would stay in 333 | the Nadam/Namsgrad phase. Users could also call `self.switch` manually if 334 | using `train_on_batch()` to train the model. 335 | Arguments: 336 | lr: float >= 0. Learning rate. 337 | lr_boost: float >=0. Suggest to > 1, because generally SGD optimizer 338 | requires a larger learning rate than Adam. 339 | beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1. 340 | epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. 341 | decay: float >= 0. Learning rate decay over each update. 342 | amsgrad: boolean. Whether to apply the AMSGrad variant of this 343 | algorithm from the paper "On the Convergence of Adam and Beyond". 344 | switch_flag: the initial state of the optimizer phase. If set `False`, 345 | start with Nadam/Namsgrad, otherwise start with NSGD. 346 | """ 347 | 348 | def __init__(self, 349 | lr=0.002, 350 | lr_boost=10.0, 351 | beta_1=0.9, 352 | beta_2=0.999, 353 | epsilon=None, 354 | decay=0., 355 | schedule_decay=0.004, 356 | amsgrad=False, 357 | switch_flag=False, 358 | **kwargs): 359 | super(Nadam2NSGD, self).__init__(**kwargs) 360 | with K.name_scope(self.__class__.__name__): 361 | self.iterations = K.variable(0, dtype='int64', name='iterations') 362 | self.m_schedule = K.variable(1., name='m_schedule') 363 | self.lr = K.variable(lr, name='lr') 364 | self.beta_1 = K.variable(beta_1, name='beta_1') 365 | if switch_flag: # using NSGD 366 | self.beta_g = K.variable(1.0, name='beta_g') 367 | else: # using Nadam 368 | self.beta_g = K.variable(1.0 - beta_1, name='beta_g') 369 | self.beta_2 = K.variable(beta_2, name='beta_2') 370 | self.decay = K.variable(decay, name='decay') 371 | self.switch_flag = K.variable(switch_flag, dtype='bool', name='switch_flag') 372 | if epsilon is None: 373 | epsilon = K.epsilon() 374 | self.epsilon = epsilon 375 | self.initial_decay = decay 376 | self.schedule_decay = schedule_decay 377 | self.amsgrad = amsgrad 378 | self.lr_boost = lr_boost 379 | 380 | def switch(self, switch_flag=None): 381 | ''' 382 | Switch the phase of the optimizer. 383 | Arguments: 384 | switch_flag: if set `True`, use SGD with nesterov momentum; Otherwise, 385 | use NAdam/NAmsgrad. If set None, it would switch the phase according to 386 | the current phase. 387 | ''' 388 | if switch_flag is None: 389 | switch_flag = not bool(K.get_value(self.switch_flag)) 390 | else: 391 | switch_flag = bool(switch_flag) 392 | if switch_flag: # using NSGD 393 | self.beta_g = K.set_value(self.beta_g, 1.0) 394 | else: # using Nadam 395 | self.beta_g = K.set_value(self.beta_g, 1.0 - K.get_value(self.beta_1)) 396 | K.set_value(self.switch_flag, bool(switch_flag)) 397 | 398 | def get_updates(self, loss, params): 399 | grads = self.get_gradients(loss, params) 400 | self.updates = [state_ops.assign_add(self.iterations, 1)] 401 | 402 | lr = self.lr 403 | if self.initial_decay > 0: 404 | lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) ) 405 | 406 | t = math_ops.cast(self.iterations, K.floatx()) + 1 407 | 408 | # Due to the recommendations in [2], i.e. warming momentum schedule 409 | momentum_cache_t = self.beta_1 * ( 410 | 1. - 0.5 * 411 | (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) 412 | momentum_cache_t_1 = self.beta_1 * ( 413 | 1. - 0.5 * 414 | (math_ops.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) 415 | m_schedule_new = self.m_schedule * momentum_cache_t 416 | m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 417 | self.updates.append((self.m_schedule, m_schedule_new)) 418 | 419 | ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 420 | vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 421 | if self.amsgrad: 422 | vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 423 | else: 424 | vhats = [K.zeros(1) for _ in params] 425 | 426 | self.weights = [self.iterations] + ms + vs + vhats 427 | 428 | for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): 429 | # the following equations given in [1] 430 | g_prime = g / (1. - m_schedule_new) 431 | m_t = self.beta_1 * m + self.beta_g * g 432 | m_t_prime = m_t / (1. - m_schedule_next) 433 | v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g) 434 | if self.amsgrad: 435 | vhat_t = math_ops.maximum(vhat, v_t) 436 | self.updates.append(state_ops.assign(vhat, vhat_t)) 437 | v_t_prime = vhat_t / (1. - math_ops.pow(self.beta_2, t)) 438 | else: 439 | v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t)) 440 | m_t_bar = (self.beta_g / (1.-self.beta_1)) * (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime 441 | 442 | self.updates.append(state_ops.assign(m, m_t)) 443 | self.updates.append(state_ops.assign(v, v_t)) 444 | 445 | p_t_ada = p - lr * m_t_bar / (gen_math_ops.sqrt(v_t_prime) + self.epsilon) 446 | p_t_sgd = p - self.lr_boost * lr * m_t_bar 447 | 448 | new_p = m_switch(self.switch_flag, p_t_sgd, p_t_ada) 449 | 450 | # Apply constraints. 451 | if getattr(p, 'constraint', None) is not None: 452 | new_p = p.constraint(new_p) 453 | 454 | self.updates.append(state_ops.assign(p, new_p)) 455 | return self.updates 456 | 457 | def get_config(self): 458 | config = { 459 | 'lr': float(K.get_value(self.lr)), 460 | 'lr_boost': self.lr_boost, 461 | 'beta_1': float(K.get_value(self.beta_1)), 462 | 'beta_2': float(K.get_value(self.beta_2)), 463 | 'epsilon': self.epsilon, 464 | 'decay': float(K.get_value(self.decay)), 465 | 'schedule_decay': self.schedule_decay, 466 | 'amsgrad': self.amsgrad, 467 | 'switch_flag': bool(K.get_value(self.switch_flag)) 468 | } 469 | base_config = super(Nadam2NSGD, self).get_config() 470 | return dict(list(base_config.items()) + list(config.items())) --------------------------------------------------------------------------------