├── .gitattributes
├── utilities
    ├── webfiles.zip
    ├── __init__.py
    ├── tboard.py
    └── callbacks.py
├── MDNT.code-workspace
├── layers
    ├── deprecated
    │   ├── __init__.py
    │   ├── external.py
    │   └── conv.py
    ├── __init__.py
    ├── utils.py
    ├── dropout.py
    ├── external.py
    ├── dense.py
    └── normalize.py
├── data
    ├── deprecated
    │   ├── __init__.py
    │   └── h5py.py
    └── __init__.py
├── LICENSE
├── functions
    ├── __init__.py
    ├── others.py
    ├── metrics.py
    └── losses.py
├── optimizers
    ├── __init__.py
    ├── _default.py
    ├── adaptive.py
    └── mixture.py
├── .gitignore
├── compat.py
├── __init__.py
└── README.md


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/utilities/webfiles.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cainmagi/MDNT/HEAD/utilities/webfiles.zip


--------------------------------------------------------------------------------
/MDNT.code-workspace:
--------------------------------------------------------------------------------
1 | {
2 | 	"folders": [
3 | 		{
4 | 			"path": "."
5 | 		}
6 | 	],
7 | 	"settings": {}
8 | }


--------------------------------------------------------------------------------
/layers/deprecated/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | ################################################################
 3 | # Layers (deprecated)
 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
 5 | # Yuchen Jin @ cainmagi@gmail.com
 6 | # Requirements: (Pay attention to version)
 7 | #   python 3.6+
 8 | #   tensorflow r1.13+
 9 | # Put the deprecated libs here.
10 | # Version: 0.10 # 2019/5/23
11 | # Comments:
12 | #   Create this submodule.
13 | ################################################################
14 | '''
15 | 
16 | # Import sub-modules
17 | from .external import External
18 | 
19 | __all__ = ['External']


--------------------------------------------------------------------------------
/data/deprecated/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | ################################################################
 3 | # Data (deprecated)
 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
 5 | # Yuchen Jin @ cainmagi@gmail.com
 6 | # Requirements: (Pay attention to version)
 7 | #   python 3.6+
 8 | #   tensorflow r1.13+
 9 | # Put the deprecated libs here.
10 | # Version: 0.10 # 2019/3/26
11 | # Comments:
12 | #   Create this submodule.
13 | ################################################################
14 | '''
15 | 
16 | # Import sub-modules
17 | from .h5py import H5HGParser, H5SupSaver, H5GParser
18 | 
19 | __all__ = ['H5HGParser', 'H5SupSaver', 'H5GParser']


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Yuchen Jin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/functions/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | ################################################################
 3 | # Functions
 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
 5 | # Yuchen Jin @ cainmagi@gmail.com
 6 | # Requirements: (Pay attention to version)
 7 | #   python 3.6+
 8 | #   tensorflow r1.13+
 9 | # Extended functions for MDNT. Mainly including new losses and
10 | # metrics. 
11 | # Version: 0.10 # 2019/6/13
12 | # Comments:
13 | #   Create this submodule.
14 | ################################################################
15 | '''
16 | 
17 | # Import sub-modules
18 | from . import losses
19 | from . import metrics
20 | 
21 | # Set layer dictionaries
22 | customObjects = {
23 |     'linear_jaccard_index': losses.linear_jaccard_loss,
24 |     'lovasz_jaccard_loss': losses.lovasz_jaccard_loss,
25 |     'signal_to_noise': metrics.signal_to_noise,
26 |     'correlation': metrics.correlation,
27 |     'jaccard_index': metrics.jaccard_index
28 | }
29 | 
30 | __all__ = list(customObjects.keys())
31 | 
32 | # Set this local module as the prefered one
33 | from pkgutil import extend_path
34 | __path__ = extend_path(__path__, __name__)
35 | 
36 | # Delete private sub-modules
37 | del extend_path


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | ################################################################
 3 | # Data
 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
 5 | # Yuchen Jin @ cainmagi@gmail.com
 6 | # Requirements: (Pay attention to version)
 7 | #   python 3.6+
 8 | #   tensorflow r1.13+
 9 | # Extended data parser for tf-K standard IO APIs.
10 | # Version: 0.18 # 2020/02/10
11 | # Comments:
12 | #   Add `H5Converter` into this module.
13 | # Version: 0.16 # 2019/10/23
14 | # Comments:
15 | #   Add `H5VGParser` into this module.
16 | # Version: 0.15 # 2019/3/30
17 | # Comments:
18 | #   Add `H5GCombiner` into this module.
19 | # Version: 0.10 # 2019/3/26
20 | # Comments:
21 | #   Create this submodule.
22 | ################################################################
23 | '''
24 | 
25 | # Import sub-modules
26 | from .h5py import H5HGParser, H5SupSaver, H5GParser, H5GCombiner, H5VGParser, H5Converter
27 | 
28 | __all__ = ['H5HGParser', 'H5SupSaver', 'H5GParser', 'H5GCombiner', 'H5VGParser', 'H5Converter']
29 | 
30 | # Set this local module as the prefered one
31 | from pkgutil import extend_path
32 | __path__ = extend_path(__path__, __name__)
33 | 
34 | # Delete private sub-modules
35 | del extend_path


--------------------------------------------------------------------------------
/optimizers/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | ################################################################
 3 | # Optimizers
 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
 5 | # Yuchen Jin @ cainmagi@gmail.com
 6 | # Requirements: (Pay attention to version)
 7 | #   python 3.6+
 8 | #   tensorflow r1.13+
 9 | # Wrapping the optimizers in tf-K with default options. In this
10 | # module, we would also try to propose some newly introduced 
11 | # optimizers if need. 
12 | # Version: 0.10 # 2019/3/23
13 | # Comments:
14 | #   Create this submodule.
15 | ################################################################
16 | '''
17 | # Import sub-modules
18 | from ._default import optimizer as optimizer
19 | from .mixture import Adam2SGD, Nadam2NSGD, SWATS
20 | from .adaptive import Nadabound, Adabound, MNadam
21 | 
22 | # Set optimizer dictionaries
23 | customObjects = {
24 |     'SWATS': SWATS,
25 |     'Adam2SGD': Adam2SGD,
26 |     'Nadam2NSGD': Nadam2NSGD,
27 |     'Nadabound': Nadabound,
28 |     'Adabound': Adabound,
29 |     'MNadam': MNadam
30 | }
31 | 
32 | # Set this local module as the prefered one
33 | from pkgutil import extend_path
34 | __path__ = extend_path(__path__, __name__)
35 | 
36 | __all__ = list(customObjects.keys())
37 | 
38 | # Delete private sub-modules and objects
39 | del _default
40 | del extend_path


--------------------------------------------------------------------------------
/utilities/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | ################################################################
 3 | # Utilities
 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
 5 | # Yuchen Jin @ cainmagi@gmail.com
 6 | # Requirements: (Pay attention to version)
 7 | #   python 3.6+
 8 | #   tensorflow r1.13+
 9 | #   matplotlib 3.1.1+
10 | # Extended utilities for MDNT. This module includes useful tools
11 | # that are not directly related to deep network architecture. 
12 | # For example, it has callbacks for fitting a network, the pre-
13 | # processing and postprocessing tools and APIs for drawing
14 | # figures. 
15 | # Version: 0.30 # 2019/11/27
16 | # Comments:
17 | #   Finish the submodule: tboard.
18 | # Version: 0.20 # 2019/11/26
19 | # Comments:
20 | #   Finish the submodule: draw.
21 | # Version: 0.10 # 2019/6/16
22 | # Comments:
23 | #   Create this submodule.
24 | ################################################################
25 | '''
26 | 
27 | # Import sub-modules
28 | from . import callbacks, draw, tboard
29 | from ._default import save_model, load_model
30 | 
31 | __all__ = [
32 |             'callbacks', 'draw', 'tboard',
33 |             'save_model', 'load_model'
34 |           ]
35 | 
36 | # Set this local module as the prefered one
37 | from pkgutil import extend_path
38 | __path__ = extend_path(__path__, __name__)
39 | 
40 | # Delete private sub-modules
41 | del extend_path


--------------------------------------------------------------------------------
/functions/others.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | ################################################################
 3 | # Functions - Others
 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
 5 | # Yuchen Jin @ cainmagi@gmail.com
 6 | # Requirements: (Pay attention to version)
 7 | #   python 3.6+
 8 | #   tensorflow r1.13+
 9 | # =============================================================
10 | # Warning:
11 | # THIS MODULE IS A PRIVATE ONE, USERS SHOULD NOT GET ACCESS TO
12 | # THIS PART.
13 | # =============================================================
14 | # Some basic functions.
15 | # Version: 0.10 # 2019/6/13
16 | # Comments:
17 | #   Create this submodule.
18 | ################################################################
19 | '''
20 | 
21 | from tensorflow.python.keras import backend as K
22 | 
23 | def get_channels(y, data_format=None):
24 |     '''get channels
25 |     Get all dimensions other than the channel dimension and the batch dimension.
26 |     Arguments:
27 |         data_format: 'channels_first' or 'channels_last', 
28 |     Input:
29 |         y: tensor, where we need to find the dimension list.
30 |     Output:
31 |         tuple, the channel (dimension) list.
32 |     '''
33 |     get_dims = len(y.get_shape())
34 |     if get_dims < 3:
35 |         raise ValueError('The input tensor should has channel dimension, i.e. it should have at least 3 axes.')
36 |     if data_format is None:
37 |         data_format = K.image_data_format()
38 |     if data_format == 'channels_last':
39 |         get_reduced_axes = tuple(range(1, get_dims-1))
40 |     else:
41 |         get_reduced_axes = tuple(range(2, get_dims))
42 |     return get_reduced_axes


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Datafiles
  2 | /checkpoints/
  3 | /logs/
  4 | *.h5
  5 | 
  6 | # vscode
  7 | /.vscode/
  8 | 
  9 | # Byte-compiled / optimized / DLL files
 10 | __pycache__/
 11 | *.py[cod]
 12 | *$py.class
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # celery beat schedule file
 87 | celerybeat-schedule
 88 | 
 89 | # SageMath parsed files
 90 | *.sage.py
 91 | 
 92 | # Environments
 93 | .env
 94 | .venv
 95 | env/
 96 | venv/
 97 | ENV/
 98 | env.bak/
 99 | venv.bak/
100 | 
101 | # Spyder project settings
102 | .spyderproject
103 | .spyproject
104 | 
105 | # Rope project settings
106 | .ropeproject
107 | 
108 | # mkdocs documentation
109 | /site
110 | 
111 | # mypy
112 | .mypy_cache/


--------------------------------------------------------------------------------
/compat.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | ################################################################
 3 | # Compatibility check
 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
 5 | # Yuchen Jin @ cainmagi@gmail.com
 6 | # Requirements: (Pay attention to version)
 7 | #   python 3.6+
 8 | #   tensorflow r1.13+
 9 | # Use this module to check whether we need to open the
10 | # compatible mode.
11 | # Version: 0.20 # 2020/8/30
12 | # Comments:
13 | # 1. Extend the compatible mode for future updates.
14 | # Version: 0.20 # 2019/6/12
15 | # Comments:
16 | # 1. Modify the required version for compatible mode.
17 | # 2. Provide a stronger property collecting method for
18 | #    compatibility.
19 | # Version: 0.10 # 2019/3/27
20 | # Comments:
21 | #   Create this compatible module.
22 | ################################################################
23 | '''
24 | 
25 | # Check compatibility
26 | import tensorflow
27 | 
28 | def set_compatible():
29 |     compat_mode = {
30 |         '1.12': False,
31 |         '1.14': False
32 |     }
33 |     parse_ver = [int(i) for i in tensorflow.__version__.split('-')[0].split('.')]
34 |     if parse_ver >= [1, 14]:
35 |         compat_mode['1.14'] = True
36 |     if parse_ver < [1, 13]:
37 |         compat_mode['1.12'] = True
38 |     return compat_mode
39 |     
40 | COMPATIBLE_MODE = set_compatible()
41 | 
42 | def collect_properties(layer, sublayer):
43 |     '''
44 |     Collect the following parameters from sublayer to layer:
45 |         _trainable_weights
46 |         _non_trainable_weights
47 |         _updates
48 |         _losses
49 |     '''
50 |     if COMPATIBLE_MODE['1.12']: # for compatibility
51 |         layer._trainable_weights.extend(sublayer._trainable_weights)
52 |         layer._non_trainable_weights.extend(sublayer._non_trainable_weights)
53 |         layer._updates.extend(sublayer._updates)
54 |         layer._losses.extend(sublayer._losses)
55 |         if hasattr(layer, '_callable_losses') and hasattr(sublayer, '_callable_losses'): # for compatibility on 1.12.0
56 |             layer._callable_losses.extend(sublayer._callable_losses)
57 |             


--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | ################################################################
 3 | # Layers
 4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
 5 | # Yuchen Jin @ cainmagi@gmail.com
 6 | # Requirements: (Pay attention to version)
 7 | #   python 3.6+
 8 | #   tensorflow r1.13+
 9 | # Modern network layers. This sub-module would include some 
10 | # effective network layers which are not introduced in tf-K.
11 | # All of these modules are produced by standard tf-K APIs.
12 | # Version: 0.10 # 2019/3/23
13 | # Comments:
14 | #   Create this submodule.
15 | ################################################################
16 | '''
17 | 
18 | # Import sub-modules
19 | from .activation import Slice, Restrict, RestrictSub, ExpandDims
20 | from .dense import Ghost, DenseTied
21 | from .dropout import InstanceGaussianNoise
22 | from .normalize import InstanceNormalization, GroupNormalization
23 | from .conv import AConv1D, AConv2D, AConv3D, AConv1DTranspose, AConv2DTranspose, AConv3DTranspose, GroupConv1D, GroupConv2D, GroupConv3D, Conv1DTied, Conv2DTied, Conv3DTied
24 | from .external import PyExternal
25 | from .residual import Residual1D, Residual1DTranspose, Residual2D, Residual2DTranspose, Residual3D, Residual3DTranspose, Resnext1D, Resnext1DTranspose, Resnext2D, Resnext2DTranspose, Resnext3D, Resnext3DTranspose
26 | from .inception import Inception1D, Inception2D, Inception3D, Inception1DTranspose, Inception2DTranspose, Inception3DTranspose, Inceptres1D, Inceptres2D, Inceptres3D, Inceptres1DTranspose, Inceptres2DTranspose, Inceptres3DTranspose, Inceptplus1D, Inceptplus2D, Inceptplus3D, Inceptplus1DTranspose, Inceptplus2DTranspose, Inceptplus3DTranspose
27 | 
28 | # Set layer dictionaries
29 | customObjects = {
30 |     'Slice': Slice,
31 |     'Restrict': Restrict,
32 |     'RestrictSub': RestrictSub,
33 |     'ExpandDims': ExpandDims,
34 |     'Ghost': Ghost,
35 |     'DenseTied': DenseTied,
36 |     'InstanceGaussianNoise': InstanceGaussianNoise,
37 |     'InstanceNormalization': InstanceNormalization,
38 |     'GroupNormalization': GroupNormalization,
39 |     'Conv1DTied': Conv1DTied,
40 |     'Conv2DTied': Conv2DTied,
41 |     'Conv3DTied': Conv3DTied,
42 |     'AConv1D': AConv1D,
43 |     'AConv2D': AConv2D,
44 |     'AConv3D': AConv3D,
45 |     'GroupConv1D': GroupConv1D,
46 |     'GroupConv2D': GroupConv2D,
47 |     'GroupConv3D': GroupConv3D,
48 |     'AConv1DTranspose': AConv1DTranspose,
49 |     'AConv2DTranspose': AConv2DTranspose,
50 |     'AConv3DTranspose': AConv3DTranspose,
51 |     'Residual1D': Residual1D,
52 |     'Residual2D': Residual2D,
53 |     'Residual3D': Residual3D,
54 |     'Residual1DTranspose': Residual1DTranspose,
55 |     'Residual2DTranspose': Residual2DTranspose,
56 |     'Residual3DTranspose': Residual3DTranspose,
57 |     'Resnext1D': Resnext1D,
58 |     'Resnext2D': Resnext2D,
59 |     'Resnext3D': Resnext3D,
60 |     'Resnext1DTranspose': Resnext1DTranspose,
61 |     'Resnext2DTranspose': Resnext2DTranspose,
62 |     'Resnext3DTranspose': Resnext3DTranspose,
63 |     'Inception1D': Inception1D,
64 |     'Inception2D': Inception2D,
65 |     'Inception3D': Inception3D,
66 |     'Inception1DTranspose': Inception1DTranspose,
67 |     'Inception2DTranspose': Inception2DTranspose,
68 |     'Inception3DTranspose': Inception3DTranspose,
69 |     'Inceptres1D': Inceptres1D,
70 |     'Inceptres2D': Inceptres2D,
71 |     'Inceptres3D': Inceptres3D,
72 |     'Inceptres1DTranspose': Inceptres1DTranspose,
73 |     'Inceptres2DTranspose': Inceptres2DTranspose,
74 |     'Inceptres3DTranspose': Inceptres3DTranspose,
75 |     'Inceptplus1D': Inceptplus1D,
76 |     'Inceptplus2D': Inceptplus2D,
77 |     'Inceptplus3D': Inceptplus3D,
78 |     'Inceptplus1DTranspose': Inceptplus1DTranspose,
79 |     'Inceptplus2DTranspose': Inceptplus2DTranspose,
80 |     'Inceptplus3DTranspose': Inceptplus3DTranspose,
81 |     'PyExternal': PyExternal
82 | }
83 | 
84 | __all__ = list(customObjects.keys())
85 | 
86 | # Set alias
87 | #res = residual
88 | 
89 | # Set this local module as the prefered one
90 | from pkgutil import extend_path
91 | __path__ = extend_path(__path__, __name__)
92 | 
93 | # Delete private sub-modules
94 | del extend_path


--------------------------------------------------------------------------------
/optimizers/_default.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Optimizers - Default tools.
  4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
  5 | # Yuchen Jin @ cainmagi@gmail.com
  6 | # Requirements: (Pay attention to version)
  7 | #   python 3.6+
  8 | #   tensorflow r1.13+
  9 | # Basic tools for this module.
 10 | # The default tools would be imported directly into the current
 11 | # sub-module.
 12 | # Version: 0.16 # 2019/6/24
 13 | # Comments:
 14 | #   Change the quick interface to support more MDNT optimizers.
 15 | # Version: 0.16 # 2019/6/24
 16 | # Comments:
 17 | #   Change the warning interface to tensorflow version.
 18 | # Version: 0.15 # 2019/6/23
 19 | # Comments:
 20 | #   Add support for plain momentum SGD.
 21 | # Version: 0.12 # 2019/6/21
 22 | # Comments:
 23 | #   1. Support two more tensorflow based optimizers in fast
 24 | #      interface.
 25 | #   2. Adjust the default momentum rate of Nesterov SGD to 0.9.
 26 | # Version: 0.10 # 2019/3/23
 27 | # Comments:
 28 | #   Create this submodule.
 29 | ################################################################
 30 | '''
 31 | 
 32 | from tensorflow.python.keras import optimizers
 33 | from tensorflow.python.training import adagrad_da, proximal_gradient_descent
 34 | from tensorflow.contrib.opt.python.training import weight_decay_optimizers
 35 | from tensorflow.python.platform import tf_logging as logging
 36 | from .adaptive import MNadam, Adabound, Nadabound
 37 | 
 38 | from .. import compat
 39 | 
 40 | def _raise_TF_warn():
 41 |     logging.warning('You are using TFOptimizer in this case. '
 42 |                   'It does not support saveing/loading optimizer'
 43 |                   ' via save_model() and load_model(). In some '
 44 |                   'cases, the option decay may not apply to this'
 45 |                   ' interface.')
 46 | 
 47 | def optimizer(name='adam', l_rate=0.01, decay=0.0, **kwargs):
 48 |     '''
 49 |     Define the optimizer by default parameters except learning rate.
 50 |     Note that most of optimizers do not suggest users to modify their
 51 |     speically designed parameters.
 52 |     We suggest users to specify gamma according to the practice when
 53 |     using Adabound optimizers.
 54 |     Options:
 55 |         name: the name of optimizer (default='adam') (available: 'adam', 
 56 |               'amsgrad', 'adamax', 'adabound', 'amsbound', 'nadam', 
 57 |               'namsgrad', 'nadabound', 'namsbound', 'adadelta', 'rms', 
 58 |               'adagrad', 'adamw', 'nmoment', 'sgd', 'proximal')
 59 |         l_rate: learning rate (default=0.01)
 60 |         decay: decay ratio ('adadeltaDA' do not support this option)
 61 |         other parameters: see the usage of the specific optimizer.
 62 |     Return:
 63 |         the particular optimizer object.
 64 |     '''
 65 |     name = name.casefold()
 66 |     if name == 'adam':
 67 |         return optimizers.Adam(l_rate, decay=decay, **kwargs)
 68 |     elif name == 'amsgrad':
 69 |         return optimizers.Adam(l_rate, decay=decay, amsgrad=True, **kwargs)
 70 |     elif name == 'adamax':
 71 |         return optimizers.Adamax(l_rate, decay=decay, **kwargs)
 72 |     elif name == 'adabound':
 73 |         return Adabound(l_rate, decay=decay, **kwargs)
 74 |     elif name == 'amsbound':
 75 |         return Adabound(l_rate, decay=decay, amsgrad=True, **kwargs) 
 76 |     elif name == 'nadam':
 77 |         return MNadam(l_rate, decay=decay, **kwargs)
 78 |     elif name == 'namsgrad':
 79 |         return MNadam(l_rate, decay=decay, amsgrad=True, **kwargs)
 80 |     elif name == 'nadabound':
 81 |         return Nadabound(l_rate, decay=decay, **kwargs)
 82 |     elif name == 'namsbound':
 83 |         return Nadabound(l_rate, decay=decay, amsgrad=True, **kwargs)
 84 |     elif name == 'adadelta':
 85 |         return optimizers.Adadelta(l_rate, decay=decay, **kwargs)
 86 |     elif name == 'rms':
 87 |         return optimizers.RMSprop(l_rate, decay=decay, **kwargs)
 88 |     elif name == 'adagrad':
 89 |         return optimizers.Adagrad(l_rate, decay=decay, **kwargs)
 90 |     elif name == 'adamw':
 91 |         if compat.COMPATIBLE_MODE['1.14']:
 92 |             raise ImportError('This optimizer is not allowed for compatibility, because it require contrib lib.')
 93 |         _raise_TF_warn()
 94 |         if decay != 0.0:
 95 |             logging.warning('This optimizer uses \'decay\' as \'weight_decay\'.')
 96 |         else:
 97 |             raise ValueError('Should use \'decay\' > 0 for AdamW.')
 98 |         return weight_decay_optimizers.AdamWOptimizer(weight_decay=decay, learning_rate=l_rate, **kwargs)
 99 |     elif name == 'nmoment':
100 |         return optimizers.SGD(lr=l_rate, momentum=0.9, decay=decay, nesterov=True, **kwargs)
101 |     elif name == 'moment':
102 |         return optimizers.SGD(lr=l_rate, momentum=0.9, decay=decay, nesterov=False, **kwargs)
103 |     elif name == 'sgd':
104 |         return optimizers.SGD(lr=l_rate, decay=decay, **kwargs)
105 |     elif name == 'proximal':
106 |         _raise_TF_warn()
107 |         if decay != 0.0:
108 |             logging.warning('This optimizer does not support \'decay\'.')
109 |         return proximal_gradient_descent.ProximalGradientDescentOptimizer(l_rate, **kwargs)


--------------------------------------------------------------------------------
/layers/utils.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Layers - Utilities
  4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
  5 | # Yuchen Jin @ cainmagi@gmail.com
  6 | # Requirements: (Pay attention to version)
  7 | #   python 3.6+
  8 | #   tensorflow r1.13+
  9 | # Utilities for some newly defined layers.
 10 | # Version: 0.10 # 2019/10/20
 11 | # Comments:
 12 | #   Create this submodule.
 13 | ################################################################
 14 | '''
 15 | 
 16 | def normalize_slice(value):
 17 |     '''Transform an iterable of integers (or `None`) into a slice.
 18 |     Arguments:
 19 |         value: The input iterable object, which would be converted to a slice
 20 |                indicating tuple.
 21 |     Returns:
 22 |         A tuple of 2/3 integers or `None`.
 23 |     Raises:
 24 |         ValueError: If something else than an int/long or iterable thereof was
 25 |                     passed.
 26 |     '''
 27 |     try:
 28 |         value_tuple = tuple(value)
 29 |     except TypeError:
 30 |         raise ValueError('One input value object could not be converted into'
 31 |                          ' a slice: ' + str(value))
 32 |     if len(value_tuple) not in (2, 3):
 33 |         raise ValueError(str(value) + ' should has 2/3 integers or `None`, but'
 34 |                          ' actually has ' + str(len(value_tuple)) + ' elements.')
 35 |     for single_value in value_tuple:
 36 |         if single_value is None:
 37 |             continue
 38 |         try:
 39 |             int(single_value)
 40 |         except (ValueError, TypeError):
 41 |             raise ValueError(str(value) + ' should only include integers or `None`'
 42 |                              ', but actually includes element ' + str(single_value) +
 43 |                              ' of type ' + str(type(single_value)))
 44 |     return value_tuple
 45 | 
 46 | def normalize_slices(value, name):
 47 |     '''Transforms an iterable of tuples into a slice tuple.
 48 |     Arguments:
 49 |         value: The input iterable object, which would be converted to a slice
 50 |                indicating tuple.
 51 |         name:  The name of the argument being validated.
 52 |     Returns:
 53 |         A tuple of n tuples, where n is inferred by input value.
 54 |     Raises:
 55 |         ValueError: If something else than an int/long or iterable thereof was
 56 |                     passed.
 57 |     '''
 58 |     try:
 59 |         value_tuple = (normalize_slice(value),)
 60 |     except ValueError:
 61 |         value_tuple = []
 62 |         try:
 63 |             value_tuple = tuple(map(normalize_slice, value))
 64 |         except ValueError as e:
 65 |             raise ValueError('The `' + name + '` argument must be a tuple of slices' +
 66 |                              '. Received: ' + str(value) + ' including element with' +
 67 |                              ' error: ' + str(e))
 68 |     return value_tuple
 69 |     
 70 | def normalize_abtuple(value, name, n=None):
 71 |     '''Transforms a single integer or iterable of integers into an integer tuple
 72 |        with an arbitrary length.
 73 |     Arguments:
 74 |         value: The value to validate and convert. Could an int, or any iterable
 75 |         of ints.
 76 |         n: The size of the tuple to be returned, if set `None`, the tuple would
 77 |            have an arbitrary length.
 78 |         name: The name of the argument being validated, e.g. "strides" or
 79 |               "dims". This is only used to format error messages.
 80 |     Returns:
 81 |         A tuple of n integers. If n is None, the tuple length is inferred by
 82 |         input value.
 83 |     Raises:
 84 |         ValueError: If something else than an int/long or iterable thereof was
 85 |                     passed.
 86 |     '''
 87 |     str_n = ('a tuple of ' + str(n) + 'integers') if n is None else ('a tuple')
 88 |     if isinstance(value, int):
 89 |         if n is None:
 90 |             n = 1
 91 |         return (value,) * n
 92 |     else:
 93 |         try:
 94 |             value_tuple = tuple(value)
 95 |         except TypeError:
 96 |             raise ValueError('The `' + name + '` argument must be ' + str_n +
 97 |                              '. Received: ' + str(value))
 98 |         if n is not None and len(value_tuple) != n:
 99 |             raise ValueError('The `' + name + '` argument must be ' + str_n +
100 |                              '. Received: ' + str(value))
101 |     for single_value in value_tuple:
102 |         try:
103 |             int(single_value)
104 |         except (ValueError, TypeError):
105 |             raise ValueError('The `' + name + '` argument must be ' + str_n +
106 |                              '. Received: ' + str(value) + ' including element ' +
107 |                              str(single_value) + ' of type ' + 
108 |                              str(type(single_value)))
109 |     return value_tuple
110 | 
111 | def slice_len_for(slc, seqlen):
112 |     '''
113 |     Infer the length of a slice object
114 |         slc:    Slice object.
115 |         seqlen: Where the slice is applied.
116 |     '''
117 |     start, stop, step = slc.indices(seqlen)
118 |     return max(0, (stop - start + (step - (1 if step > 0 else -1))) // step)


--------------------------------------------------------------------------------
/functions/metrics.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Functions - Metrics
  4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
  5 | # Yuchen Jin @ cainmagi@gmail.com
  6 | # Requirements: (Pay attention to version)
  7 | #   python 3.6+
  8 | #   tensorflow r1.13+
  9 | # Extend metrics. These functions should not be used as train-
 10 | # ing losses.
 11 | # Version: 0.10 # 2019/6/13
 12 | # Comments:
 13 | #   Create this submodule, and finish signal_to_noise, 
 14 | #   correlation and jaccard_index.
 15 | ################################################################
 16 | '''
 17 | 
 18 | from tensorflow.python.framework import constant_op
 19 | from tensorflow.python.ops import array_ops
 20 | from tensorflow.python.ops import math_ops
 21 | from tensorflow.python.ops import gen_math_ops
 22 | from tensorflow.python.ops import control_flow_ops
 23 | from .others import get_channels
 24 | 
 25 | def signal_to_noise(y_true, y_pred, mode='snr', data_format=None, epsilon=1e-8):
 26 |     '''Signal-to-noise ratio. (metric)
 27 |     Calculate the signal-to-noise ratio. It support different modes.
 28 |     Arguments:
 29 |         mode:        (1)  snr: mean [ y_true^2 / (y_pred - y_true)^2 ]
 30 |                      (2) psnr: mean [ max( y_true^2 ) / (y_pred - y_true)^2 ]
 31 |         data_format: 'channels_first' or 'channels_last'. The default setting is generally
 32 |                      'channels_last' like other tf.keras APIs.
 33 |         epsilon:      used for avoid zero division.
 34 |     Input:
 35 |         y_true: label, tensor in any shape.
 36 |         y_pred: prediction, tensor in any shape.
 37 |     Output:
 38 |         scalar, the mean SNR.
 39 |     '''
 40 |     get_reduced_axes = get_channels(y_true, data_format)
 41 |     if mode.casefold() == 'psnr':
 42 |         signal = math_ops.reduce_max(gen_math_ops.square(y_true), axis=get_reduced_axes)
 43 |     else:
 44 |         signal = math_ops.reduce_sum(gen_math_ops.square(y_true), axis=get_reduced_axes)
 45 |     noise = math_ops.reduce_sum(gen_math_ops.square(y_true - y_pred), axis=get_reduced_axes) + epsilon
 46 |     coeff = (10.0/2.3025851) # 10/log_e(10)
 47 |     return coeff*math_ops.reduce_mean(gen_math_ops.log(math_ops.divide(signal, noise)))
 48 | 
 49 | def correlation(y_true, y_pred):
 50 |     '''Pearson correlation coefficient. (metric)
 51 |     The linear corrlation between y_true and y_pred is between -1.0 and 1.0, indicating
 52 |     positive correlation and negative correlation respectively. In particular, if the 
 53 |     correlation is 0.0, it means y_true and y_pred are irrelevant linearly.
 54 |     This function is implemented by:
 55 |         corr = [mean(y_true * y_pred) - mean(y_true) * mean(y_pred)] 
 56 |                / [ std(y_true) * std(m_y_pred) ]
 57 |     This function has been revised to prevent the division fail (0/0). When either y_true
 58 |     or y_pred is 0, the correlation would be set as 0.0.
 59 |     Input:
 60 |         y_true: label, tensor in any shape.
 61 |         y_pred: prediction, tensor in any shape.
 62 |     Output:
 63 |         scalar, the mean linear correlation between y_true and y_pred.
 64 |     '''
 65 |     m_y_true = math_ops.reduce_mean(y_true, axis=0)
 66 |     m_y_pred = math_ops.reduce_mean(y_pred, axis=0)
 67 |     s_y_true = gen_math_ops.sqrt(math_ops.reduce_mean(gen_math_ops.square(y_true), axis=0) - gen_math_ops.square(m_y_true))
 68 |     s_y_pred = gen_math_ops.sqrt(math_ops.reduce_mean(gen_math_ops.square(y_pred), axis=0) - gen_math_ops.square(m_y_pred))
 69 |     s_denom = s_y_true * s_y_pred
 70 |     s_numer = math_ops.reduce_mean(y_true * y_pred, axis=0) - m_y_true * m_y_pred
 71 |     s_index = gen_math_ops.greater(s_denom, 0)
 72 |     f1 = lambda: constant_op.constant(0.0)
 73 |     f2 = lambda: math_ops.reduce_mean(array_ops.boolean_mask(s_numer,s_index)/array_ops.boolean_mask(s_denom,s_index))
 74 |     return control_flow_ops.case([(math_ops.reduce_any(s_index), f2)], default=f1)
 75 | 
 76 | def jaccard_index(y_true, y_pred, data_format=None):
 77 |     '''Jaccard index, or Intersection over Union (IoU). (metric)
 78 |     The IoU is thought to be a better measurement to estimate the accuracy for segmentation.
 79 |     If both y_true and y_pred are binary, the intersection I(y_true, y_pred) shows the part
 80 |     where the prediction is correct, while the union U(y_true, y_pred) contains both correct
 81 |     prediction and wrong prediction. I/U shows the proportion of correct prediction.
 82 |     Compared to other error functions (like MSE), it is more concentrated on the part where
 83 |     y_true=1 or y_pred=1.
 84 |     This function is implemented by:
 85 |         jacc = logical_and(y_true, y_pred) / logical_or(y_true, y_pred)
 86 |     Arguments:
 87 |         data_format: 'channels_first' or 'channels_last'. The default setting is generally
 88 |                      'channels_last' like other tf.keras APIs.
 89 |     Input:
 90 |         y_true: label, tensor in any shape, should have at least 3 axes.
 91 |         y_pred: prediction, tensor in any shape, should have at least 3 axes.
 92 |     Output:
 93 |         scalar, the mean Jaccard index between y_true and y_pred over all channels.
 94 |     '''
 95 |     get_reduced_axes = get_channels(y_true, data_format)
 96 |     bin_y_true = gen_math_ops.greater(y_true, 0.5)
 97 |     bin_y_pred = gen_math_ops.greater(y_pred, 0.5)
 98 |     valNumer = gen_math_ops.logical_and(bin_y_pred, bin_y_true)
 99 |     valDomin = gen_math_ops.logical_or(bin_y_pred, bin_y_true)
100 |     valNumer = math_ops.reduce_sum(math_ops.cast(valNumer, dtype=y_pred.dtype), axis=get_reduced_axes)
101 |     valDomin = math_ops.reduce_sum(math_ops.cast(valDomin, dtype=y_pred.dtype), axis=get_reduced_axes)
102 |     return math_ops.reduce_mean(math_ops.div_no_nan(valNumer, valDomin))


--------------------------------------------------------------------------------
/layers/dropout.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Layers - Extended dropout and noise layers
  4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
  5 | # Yuchen Jin @ cainmagi@gmail.com
  6 | # Requirements: (Pay attention to version)
  7 | #   python 3.6+
  8 | #   tensorflow r1.13+
  9 | # Extend the methods for adding dropouts and noises. Such
 10 | # methods may help the network avoid overfitting problems.
 11 | # Version: 0.10 # 2019/6/11
 12 | # Comments:
 13 | #   Create this submodule.
 14 | ################################################################
 15 | '''
 16 | 
 17 | from tensorflow.python.keras import backend as K
 18 | from tensorflow.python.keras.engine.base_layer import Layer
 19 | 
 20 | from tensorflow.python.keras.layers.core import Dropout, SpatialDropout1D, SpatialDropout2D, SpatialDropout3D
 21 | from tensorflow.python.keras.layers.noise import GaussianDropout, AlphaDropout
 22 | 
 23 | from .. import compat
 24 | if compat.COMPATIBLE_MODE['1.12']:
 25 |     from tensorflow.python.keras.engine.base_layer import InputSpec
 26 | else:
 27 |     from tensorflow.python.keras.engine.input_spec import InputSpec
 28 | 
 29 | class InstanceGaussianNoise(Layer):
 30 |     """Apply additive zero-centered Gaussian noise.
 31 |     This is useful to mitigate overfitting
 32 |     (you could see it as a form of random data augmentation).
 33 |     Gaussian Noise (GS) is a natural choice as corruption process
 34 |     for real valued inputs.
 35 |     As it is a regularization layer, it is only active at training time.
 36 |     Different from tf.keras.layers.GaussianNoise, in this method, we
 37 |     add the noise in the instance normalized space:
 38 |         `output = std * ( (input-mean) / std + N(0, eps) ) + mean`.
 39 |     where `eps ~ U(0, alpha)`. So the noise strength would be scale-
 40 |     invariant to the input data.
 41 |     # Arguments
 42 |         axis: Integer, the axis that should be normalized
 43 |             (typically the features axis).
 44 |             For instance, after a `Conv2D` layer with
 45 |             `data_format="channels_first"`,
 46 |             set `axis=1` in `InstanceGaussianNoise`.
 47 |             Setting `axis=None` will normalize all values in each
 48 |             instance of the batch （Layer Normalization）.
 49 |             Axis 0 is the batch dimension. `axis` cannot be set to 0
 50 |             to avoid errors.
 51 |         alpha: float, maximal standard deviation of the noise 
 52 |             distribution. For example, when alpha = 0.3, it means
 53 |             the noise would be at most 30% of the input. 
 54 |         epsilon: Small float added to variance to avoid dividing by
 55 |             zero.
 56 |     # Input shape
 57 |         Arbitrary. Use the keyword argument `input_shape`
 58 |         (tuple of integers, does not include the samples axis)
 59 |         when using this layer as the first layer in a model.
 60 |     # Output shape
 61 |         Same shape as input.
 62 |     """
 63 |     def __init__(self,
 64 |                  axis=None,
 65 |                  alpha=0.3,
 66 |                  epsilon=1e-3,
 67 |                  **kwargs):
 68 |         super(InstanceGaussianNoise, self).__init__(**kwargs)
 69 |         self.supports_masking = True
 70 |         self.axis = axis
 71 |         self.alpha = alpha
 72 |         self.epsilon = epsilon
 73 | 
 74 |     def build(self, input_shape):
 75 |         ndim = len(input_shape)
 76 |         if self.axis == 0:
 77 |             raise ValueError('Axis cannot be zero')
 78 | 
 79 |         if (self.axis is not None) and (ndim == 2):
 80 |             raise ValueError('Cannot specify axis for rank 1 tensor')
 81 | 
 82 |         self.input_spec = InputSpec(ndim=ndim)
 83 | 
 84 |         if self.axis is None:
 85 |             shape = (1,)
 86 |         else:
 87 |             shape = (input_shape[self.axis],)
 88 | 
 89 |         self.built = True
 90 | 
 91 |     def call(self, inputs, training=None):
 92 |         input_shape = K.int_shape(inputs)
 93 |         reduction_axes = list(range(0, len(input_shape)))
 94 | 
 95 |         if self.axis is not None:
 96 |             del reduction_axes[self.axis]
 97 | 
 98 |         del reduction_axes[0]
 99 | 
100 |         mean = K.mean(inputs, reduction_axes, keepdims=True)
101 |         stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon
102 |         normed = (inputs - mean) / stddev
103 | 
104 |         def noised():
105 |             eps = K.random_uniform(shape=[1], maxval=self.alpha)
106 |             return inputs + K.random_normal(shape=K.shape(inputs),
107 |                                             mean=0.,
108 |                                             stddev=eps)
109 |         get_noised = K.in_train_phase(noised, normed, training=training)
110 | 
111 |         retrived = stddev * get_noised + mean
112 |         return retrived
113 |         
114 |     def compute_output_shape(self, input_shape):
115 |         return input_shape
116 | 
117 |     def get_config(self):
118 |         config = {
119 |             'axis': self.axis,
120 |             'alpha': self.alpha
121 |         }
122 |         base_config = super(InstanceGaussianNoise, self).get_config()
123 |         return dict(list(base_config.items()) + list(config.items()))
124 | 
125 | def return_dropout(dropout_type, dropout_rate, axis=-1, rank=None):
126 |     if dropout_type is None:
127 |         return None
128 |     elif dropout_type == 'plain':
129 |         return Dropout(rate=dropout_rate)
130 |     elif dropout_type == 'add':
131 |         return InstanceGaussianNoise(axis=axis, alpha=dropout_rate)
132 |     elif dropout_type == 'mul':
133 |         return GaussianDropout(rate=dropout_rate)
134 |     elif dropout_type == 'alpha':
135 |         return AlphaDropout(rate=dropout_rate)
136 |     elif dropout_type == 'spatial':
137 |         if axis == 1:
138 |             dformat = 'channels_first'
139 |         else:
140 |             dformat = 'channels_last'
141 |         if rank == 1:
142 |             return SpatialDropout1D(rate=dropout_rate)
143 |         elif rank == 2:
144 |             return SpatialDropout2D(rate=dropout_rate, data_format=dformat)
145 |         elif rank == 3:
146 |             return SpatialDropout3D(rate=dropout_rate, data_format=dformat)
147 |         else:
148 |             return None
149 |     else:
150 |         return None


--------------------------------------------------------------------------------
/functions/losses.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Functions - Losses
  4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
  5 | # Yuchen Jin @ cainmagi@gmail.com
  6 | # Requirements: (Pay attention to version)
  7 | #   python 3.6+
  8 | #   tensorflow r1.13+
  9 | # Extend loss functions. These functions could serve as both
 10 | # losses and metrics.
 11 | # Version: 0.10 # 2019/6/13
 12 | # Comments:
 13 | #   Create this submodule, and finish linear_jaccard_loss
 14 | #   and lovasz_jaccard_loss.
 15 | ################################################################
 16 | '''
 17 | 
 18 | from tensorflow.python.keras import backend as K
 19 | from tensorflow.python.keras import losses
 20 | from tensorflow.python.ops import sort_ops
 21 | from tensorflow.python.ops import array_ops
 22 | from tensorflow.python.ops import math_ops
 23 | from tensorflow.python.ops import functional_ops
 24 | from tensorflow.python.ops import gen_math_ops
 25 | from tensorflow.python.ops import gen_array_ops
 26 | 
 27 | from .others import get_channels
 28 | 
 29 | from functools import reduce
 30 | def _get_prod(x):
 31 |     try:
 32 |         return reduce(lambda a,b:a*b, x)
 33 |     except TypeError:
 34 |         return x
 35 | 
 36 | def linear_jaccard_loss(y_true, y_pred, data_format=None):
 37 |     '''Simple linear approximation for Jaccard index, 
 38 |            or Intersection over Union (IoU). (loss)
 39 |     This function is a simple and linear approximation for IoU. The main idea is:
 40 |         1. logical_and(y_true * y_pred) could be approximated by y_true * y_pred;
 41 |         2. logical_or(y_true * y_pred) could be approximated by 
 42 |            y_true + y_pred - y_true * y_pred.
 43 |     Such an approximation could ensure that when both y_true and y_pred are
 44 |     binary, this approximation would returns the exact same value compared to
 45 |     the original metric, IoU.
 46 |     It has been proved that when both x, y in [0, 1], there is
 47 |         x * y < x + y - x * y.
 48 |     To learn more about IoU, please check mdnt.metrics.jaccard_index.
 49 |     This function is implemented by:
 50 |         appx_jacc = 1 - [ sum(y_true * y_pred) ] / [ sum(y_true + y_pred - y_true * y_pred) ]
 51 |     We use unsafe division in the above equation. When x / y = 0, the unsafe division would
 52 |     returns 0.
 53 |     NOTE THAT THIS IMPLEMENTATION IS THE COMPLEMENTARY OF JACCARD INDEX.
 54 |     Arguments:
 55 |         data_format: 'channels_first' or 'channels_last'. The default setting is generally
 56 |                      'channels_last' like other tf.keras APIs.
 57 |     Input:
 58 |         y_true: label, tensor in any shape, should have at least 3 axes.
 59 |         y_pred: prediction, tensor in any shape, should have at least 3 axes.
 60 |     Output:
 61 |         scalar, the approximated and complementary mean Jaccard index between y_true and
 62 |         y_pred over all channels.
 63 |     '''
 64 |     get_reduced_axes = get_channels(y_true, data_format)
 65 |     get_mul = y_true * y_pred
 66 |     valNumer = math_ops.reduce_sum(get_mul, axis=get_reduced_axes)
 67 |     valDomin = math_ops.reduce_sum(y_true + y_pred - get_mul, axis=get_reduced_axes)
 68 |     return 1-math_ops.reduce_mean(math_ops.div_no_nan(valNumer, valDomin))
 69 | 
 70 | def _lovasz_jaccard_flat(errors, y_true):
 71 |     '''PRIVATE: calculate lovasz extension for jaccard index along a vector.
 72 |     Input:
 73 |         errors: error vector (should be in 0~1).
 74 |         y_true: labels.
 75 |     Output:
 76 |         scalar: the jaccard index calculated on the input vector. 
 77 |     '''
 78 |     p = errors.get_shape().as_list()
 79 |     if len(p) != 1:
 80 |         raise ValueError('Input should be vectors (1D).')
 81 |     p = p[0]
 82 |     bin_y_true = math_ops.cast(gen_math_ops.greater(y_true, 0.5), dtype=errors.dtype)
 83 |     error_ind = sort_ops.argsort(errors, direction='DESCENDING')
 84 |     sorted_errors = array_ops.gather(errors, error_ind)
 85 |     sorted_labels = array_ops.gather(bin_y_true, error_ind)
 86 |     get_sum = math_ops.reduce_sum(sorted_labels)
 87 |     intersection = get_sum - math_ops.cumsum(sorted_labels)
 88 |     union = get_sum + math_ops.cumsum(1.0 - sorted_labels)
 89 |     g = 1.0 - math_ops.div_no_nan(intersection, union)
 90 |     if p > 1:
 91 |         g = array_ops.concat((g[0:1], g[1:] - g[:-1]), axis=0)
 92 |     return math_ops.reduce_sum(sorted_errors*gen_array_ops.stop_gradient(g))
 93 | 
 94 | def lovasz_jaccard_loss(y_true, y_pred, error_func=None, data_format=None):
 95 |     '''Lovasz extension for Jaccard index, or Intersection over Union (IoU). (loss)
 96 |     This function applies the theory of Lovasz extension. Although Lovasz extension could
 97 |     be used on any submodular set function, the implementation is aimed at constructing
 98 |     the trainable complementary of IoU.
 99 |     To learn more about this topic, please refer:
100 |         The Lovasz-Softmax loss: A tractable surrogate for the optimization of the 
101 |         intersection-over-union measure in neural networks
102 |         https://arxiv.org/abs/1705.08790
103 |     This implementation is not adapted from the author's github codes. It computes the
104 |     Lovasz loss on each channel of each sample independently, and then calculate the
105 |     average value.
106 |     NOTE THAT THIS IMPLEMENTATION IS THE COMPLEMENTARY OF JACCARD INDEX.
107 |     Arguments:
108 |         error_func:  the function that is used to calculate errors. If set None, would use
109 |                      L1 norm (linear interpolation).
110 |         data_format: 'channels_first' or 'channels_last'. The default setting is generally
111 |                      'channels_last' like other tf.keras APIs.
112 |     Input:
113 |         y_true: label, tensor in any shape, should have at least 3 axes.
114 |         y_pred: prediction, tensor in any shape, should have at least 3 axes.
115 |     Output:
116 |         scalar, the approximated and complementary mean Jaccard index between y_true and
117 |         y_pred over all channels.
118 |     '''
119 |     get_shapes = y_true.get_shape().as_list()
120 |     get_dims = len(get_shapes)
121 |     if get_dims < 3:
122 |         raise ValueError('The input tensor should has channel dimension, i.e. it should have at least 3 axes.')
123 |     if data_format is None:
124 |         data_format = K.image_data_format()
125 |     if data_format == 'channels_last':
126 |         get_permute_axes = (0, get_dims-1, *range(1, get_dims-1))
127 |         get_length = _get_prod(get_shapes[1:-1])
128 |         y_true = array_ops.transpose(y_true, perm=get_permute_axes) # switch to channels_first
129 |         y_pred = array_ops.transpose(y_pred, perm=get_permute_axes)
130 |     else:
131 |         get_length = _get_prod(get_shapes[2:])
132 |     y_true = gen_array_ops.reshape([-1, get_length])
133 |     y_pred = gen_array_ops.reshape([-1, get_length])
134 |     if error_func is None:
135 |         error_func = losses.mean_absolute_error
136 |     def split_process(inputs):
137 |         get_y_true, get_y_pred = inputs
138 |         get_errors = error_func(get_y_true, get_y_pred)
139 |         return _lovasz_jaccard_flat(get_errors, get_y_true)
140 |     get_losses = functional_ops.map_fn(split_process, (y_true, y_pred), dtype=y_pred.dtype)
141 |     return math_ops.reduce_mean(get_losses)


--------------------------------------------------------------------------------
/layers/deprecated/external.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Layers - External API layer (deprecated)
  4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
  5 | # Yuchen Jin @ cainmagi@gmail.com
  6 | # Requirements: (Pay attention to version)
  7 | #   python 3.6+
  8 | #   tensorflow r1.13+
  9 | # An abstract utility for introducing the outside API into the
 10 | # tf-keras architecture.
 11 | # Version: 0.20 # 2019/5/23
 12 | # Comments:
 13 | #   Add class 'External' to this submodule.
 14 | # Version: 0.10 # 2019/3/23
 15 | # Comments:
 16 | #   Create this submodule.
 17 | ################################################################
 18 | '''
 19 | 
 20 | import uuid
 21 | from tensorflow.python.eager import context
 22 | from tensorflow.python.framework import common_shapes
 23 | from tensorflow.python.framework import ops
 24 | from tensorflow.python.framework import tensor_shape
 25 | from tensorflow.python.framework import dtypes
 26 | from tensorflow.python.keras import activations
 27 | from tensorflow.python.keras import backend as K
 28 | from tensorflow.python.keras import constraints
 29 | from tensorflow.python.keras import initializers
 30 | from tensorflow.python.keras import regularizers
 31 | from tensorflow.python.keras.engine.base_layer import Layer
 32 | from tensorflow.python.ops import gen_math_ops
 33 | from tensorflow.python.ops import nn
 34 | from tensorflow.python.ops import standard_ops
 35 | from tensorflow.python.ops import script_ops
 36 | from tensorflow.python.ops import variables
 37 | from tensorflow.python.keras.utils import tf_utils
 38 | 
 39 | from .. import compat
 40 | if compat.COMPATIBLE_MODE['1.12']:
 41 |     from tensorflow.python.keras.engine.base_layer import InputSpec
 42 | else:
 43 |     from tensorflow.python.keras.engine.input_spec import InputSpec
 44 | 
 45 | def dtype_serialize(input_dtypes):
 46 |     if isinstance(input_dtypes, list):
 47 |         return [dtypes.as_dtype(get_dt).as_datatype_enum() for get_dt in input_dtypes]
 48 |     else:
 49 |         return [dtypes.as_dtype(input_dtypes).as_datatype_enum()]
 50 | 
 51 | def dtype_get(input_serials):
 52 |     if isinstance(input_serials, list):
 53 |         return [dtypes.as_dtype(get_dt) for get_dt in input_serials]
 54 |     else:
 55 |         return [dtypes.as_dtype(input_serials)]
 56 | 
 57 | class External(Layer):
 58 |     """External API layer.
 59 |     `External` is used to introduce a non-parameter function from an 
 60 |     external library and enable it to participate the learning workflow.
 61 |     Therefore, this layer is requires users to provide:
 62 |         1. The forward propagation function `forward()`.
 63 |         2. The back propagation function `backward()`.
 64 |     Arguments:
 65 |         forward:      the forward propagating function.
 66 |         backward:     the back propagation function.
 67 |         Tin:          a list of input tf.DType.
 68 |         Tout:         a list of output tf.DType.
 69 |         stateful:     a bool flag used to define whether the forward/backward
 70 |                       function is calculated based on previous calculation.
 71 |         output_shape: a tf.TensorShape, a tuple/list or a function. It is
 72 |                       used for estimating the output shape fast.
 73 |     Input shape:
 74 |         nD tensor with shape: `(batch_size, ..., output_dim_of_tied_layer)`.
 75 |         The most common situation would be
 76 |         a 2D input with shape `(batch_size, output_dim_of_tied_layer)`.
 77 |     Output shape:
 78 |         nD tensor with shape: `(batch_size, ..., units)`.
 79 |         For instance, for a 2D input with shape 
 80 |             `(batch_size, input_dim_of_tied_layer)`,
 81 |         the output would have shape `(batch_size, input_dim_of_tied_layer)`.
 82 |     """
 83 | 
 84 |     def __init__(self,
 85 |                  forward,
 86 |                  backward,
 87 |                  Tin,
 88 |                  Tout,
 89 |                  output_shape=None,
 90 |                  id=None,
 91 |                  **kwargs):
 92 |         if 'input_shape' not in kwargs and 'input_dim' in kwargs:
 93 |           kwargs['input_shape'] = (kwargs.pop('input_dim'),)
 94 | 
 95 |         super(External, self).__init__(**kwargs)
 96 |         self.forward = activations.get(forward)
 97 |         self.backward = activations.get(backward)
 98 |         self.Tin = dtype_get(Tin)
 99 |         self.Tout = dtype_get(Tout)
100 | 
101 |         if id is None: # id is used to tag the newly created instance
102 |             self._id = 'PyExternal' + str(uuid.uuid4())
103 |         else:
104 |             self._id = id
105 | 
106 |         if output_shape is None:
107 |             raise NotImplementedError('We could not automatically infer '
108 |                                       'the static shape of the External\'s output.'
109 |                                       ' Please specify the `output_shape` for'
110 |                                       ' this External.')
111 |         else:
112 |             self._output_shape = activations.get(output_shape)
113 | 
114 |         self.supports_masking = True
115 | 
116 |     def backward_tensor(self, op, *grad):
117 |         x = op.inputs
118 |         return script_ops.py_func(self.backward, [*x, *grad], self.Tin, name=self.name+'Grad')
119 | 
120 |     def call(self, inputs):
121 |         if isinstance(inputs, list):
122 |             inputs = [ops.convert_to_tensor(one_input) for one_input in inputs]
123 |         else:
124 |             inputs = [ops.convert_to_tensor(inputs)]
125 |         # Register and override the gradients
126 |         ops.RegisterGradient(self._id)(self.backward_tensor)
127 |         g = ops.get_default_graph()
128 |         with g.gradient_override_map({"PyFunc": self._id, "pyfunc_0": self._id, "PyFuncStateless": self._id}):
129 |             res = script_ops.py_func(self.forward, inputs, self.Tout, name=self.name)
130 |             oshape = self._output_shape([inp.get_shape() for inp in inputs])
131 |             if isinstance(res, list):
132 |                 for i in range(len(res)):
133 |                     res[i].set_shape(oshape[i])
134 |             return res
135 | 
136 |     @tf_utils.shape_type_conversion
137 |     def compute_output_shape(self, input_shape):
138 |         if self._output_shape is None:
139 |             raise NotImplementedError('We could not automatically infer '
140 |                                       'the static shape of the External\'s output.'
141 |                                       ' Please specify the `output_shape` for'
142 |                                       ' this External.')
143 |         else:
144 |             shape = self._output_shape(input_shape)
145 |             if not isinstance(shape, (list, tuple)):
146 |                 raise ValueError(
147 |                     '`output_shape` function must return a tuple or a list of tuples.')
148 |             # List here can represent multiple outputs or single output.
149 |             if isinstance(shape, list):
150 |                 # Convert list representing single output into a tuple.
151 |                 if isinstance(shape[0], (int, type(None))):
152 |                     shape = tuple(shape)
153 |                 else:
154 |                     return [
155 |                         tensor_shape.TensorShape(single_shape) for single_shape in shape
156 |                     ]
157 |             return tensor_shape.TensorShape(shape)
158 |     
159 |     def get_config(self):
160 |         config = {
161 |             'forward': activations.serialize(self.forward),
162 |             'backward': activations.serialize(self.backward),
163 |             'Tin': dtype_serialize(self.Tin),
164 |             'Tout': dtype_serialize(self.Tout),
165 |             'output_shape': activations.serialize(self._output_shape),
166 |             'id': self._id,
167 |         }
168 |         base_config = super(External, self).get_config()
169 |         return dict(list(base_config.items()) + list(config.items()))
170 | 


--------------------------------------------------------------------------------
/layers/external.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Layers - External API layer
  4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
  5 | # Yuchen Jin @ cainmagi@gmail.com
  6 | # Requirements: (Pay attention to version)
  7 | #   python 3.6+
  8 | #   tensorflow r1.13+
  9 | # An abstract utility for introducing the outside API into the
 10 | # tf-keras architecture.
 11 | # Version: 0.25 # 2019/11/27
 12 | # Comments:
 13 | #   Fix a bug for 'External' when having multiple inputs.
 14 | # Version: 0.20 # 2019/5/23
 15 | # Comments:
 16 | #   Add class 'External' to this submodule.
 17 | # Version: 0.10 # 2019/3/23
 18 | # Comments:
 19 | #   Create this submodule.
 20 | ################################################################
 21 | '''
 22 | 
 23 | from tensorflow.python.framework import ops
 24 | from tensorflow.python.framework import tensor_shape
 25 | from tensorflow.python.framework import dtypes
 26 | from tensorflow.python.keras import activations
 27 | from tensorflow.python.keras import backend as K
 28 | from tensorflow.python.keras.engine.base_layer import Layer
 29 | from tensorflow.python.ops import array_ops
 30 | from tensorflow.python.ops import script_ops
 31 | from tensorflow.python.ops import custom_gradient
 32 | from tensorflow.python.keras.utils import tf_utils
 33 | 
 34 | def dtype_serialize(input_dtypes):
 35 |     if isinstance(input_dtypes, list):
 36 |         return [dtypes.as_dtype(get_dt).as_datatype_enum for get_dt in input_dtypes]
 37 |     else:
 38 |         return [dtypes.as_dtype(input_dtypes).as_datatype_enum]
 39 | 
 40 | def dtype_get(input_serials):
 41 |     if isinstance(input_serials, list):
 42 |         return [dtypes.as_dtype(get_dt) for get_dt in input_serials]
 43 |     else:
 44 |         return [dtypes.as_dtype(input_serials)]
 45 | 
 46 | class PyExternal(Layer):
 47 |     """External API layer for generic python function.
 48 |     `PyExternal` is used to introduce a non-parameter function from an 
 49 |     external library and enable it to participate the learning workflow.
 50 |     Therefore, this layer is requires users to provide:
 51 |         1. The forward propagation function `forward()`.
 52 |         2. The back propagation function `backward()`.
 53 |         3. The shape calculation function `output_shape()`.
 54 |     Arguments:
 55 |         forward:      the forward propagating function. It serves as 
 56 |                       `y=F(x)`, where `x` may be a list of multiple inputs.
 57 |         backward:     the back propagation function. It serves as
 58 |                       `dx=B(...)`, where the input of this function is
 59 |                       determined by `xEnable`, `yEnable`, `dyEnable`.
 60 |         Tin:          a list of input tf.DType.
 61 |         Tout:         a list of output tf.DType.
 62 |         output_shape: a tf.TensorShape, a tuple/list or a function. It is
 63 |                       used for estimating the output shape fast.
 64 |         xEnable,
 65 |         yEnable,
 66 |         dyEnable:     enable users to customize the input of the backward
 67 |                       function. If only the `xEnable` is `True`, the input
 68 |                       of the function would be `B(x)`, For another example,
 69 |                       if only both `yEnable` and `dyEnable` are `True`, the
 70 |                       input of the function would be `B(y, dy)`.
 71 |     Input shape:
 72 |         nD tensor with shape: `(batch_size, ..., output_dim_of_tied_layer)`.
 73 |         The most common situation would be
 74 |         a 2D input with shape `(batch_size, output_dim_of_tied_layer)`.
 75 |     Output shape:
 76 |         nD tensor with shape: `(batch_size, ..., units)`.
 77 |         For instance, for a 2D input with shape 
 78 |             `(batch_size, input_dim_of_tied_layer)`,
 79 |         the output would have shape `(batch_size, input_dim_of_tied_layer)`.
 80 |     """
 81 | 
 82 |     def __init__(self,
 83 |                  forward,
 84 |                  backward,
 85 |                  output_shape,
 86 |                  Tin,
 87 |                  Tout,
 88 |                  xEnable=True,
 89 |                  yEnable=False,
 90 |                  dyEnable=True,
 91 |                  **kwargs):
 92 |         if 'input_shape' not in kwargs and 'input_dim' in kwargs:
 93 |           kwargs['input_shape'] = (kwargs.pop('input_dim'),)
 94 | 
 95 |         super(PyExternal, self).__init__(**kwargs)
 96 |         self.forward = activations.get(forward)
 97 |         self.backward = activations.get(backward)
 98 |         self.Tin = dtype_get(Tin)
 99 |         self.Tout = dtype_get(Tout)
100 |         self.xEnable = xEnable
101 |         self.yEnable = yEnable
102 |         self.dyEnable = dyEnable
103 | 
104 |         if output_shape is None:
105 |             raise NotImplementedError('We could not automatically infer '
106 |                                       'the static shape of the PyExternal\'s output.'
107 |                                       ' Please specify the `output_shape` for'
108 |                                       ' this PyExternal.')
109 |         else:
110 |             self._output_shape = activations.get(output_shape)
111 | 
112 |         self.supports_masking = True
113 | 
114 |     def call(self, inputs):
115 |         if isinstance(inputs, list):
116 |             inputs = [ops.convert_to_tensor(one_input) for one_input in inputs]
117 |         else:
118 |             inputs = [ops.convert_to_tensor(inputs)]
119 | 
120 |         # Define ops with first-order gradients
121 |         @custom_gradient.custom_gradient
122 |         def _external_func(*x):
123 |             y = script_ops.eager_py_func(self.forward, x, self.Tout, name='pyfunc')
124 |             def _external_func_grad(*grad):
125 |                 iList = []
126 |                 if self.xEnable:
127 |                     iList.extend(x)
128 |                 if self.yEnable:
129 |                     if isinstance(y, (list, tuple)):
130 |                         iList.extend(y)
131 |                     else:
132 |                         iList.append(y)
133 |                 if self.dyEnable:
134 |                     iList.extend(grad)
135 |                 return script_ops.eager_py_func(self.backward, iList, self.Tin)
136 |             return y, _external_func_grad
137 | 
138 |         res = _external_func(*inputs)
139 |         oshape = self._output_shape([inp.get_shape() for inp in inputs])
140 |         if isinstance(res, list):
141 |             for i in range(len(res)):
142 |                 res[i].set_shape(oshape[i])
143 |         return res
144 | 
145 |     @tf_utils.shape_type_conversion
146 |     def compute_output_shape(self, input_shape):
147 |         shape = self._output_shape(input_shape)
148 |         if not isinstance(shape, (list, tuple)):
149 |             raise ValueError(
150 |                 '`output_shape` function must return a tuple or a list of tuples.')
151 |         # List here can represent multiple outputs or single output.
152 |         if isinstance(shape, list):
153 |             # Convert list representing single output into a tuple.
154 |             if isinstance(shape[0], (int, type(None))):
155 |                 shape = tuple(shape)
156 |             else:
157 |                 return [
158 |                     tensor_shape.TensorShape(single_shape) for single_shape in shape
159 |                 ]
160 |         return tensor_shape.TensorShape(shape)
161 | 
162 |     def compute_mask(self, inputs, mask=None):
163 |         if mask is None:
164 |             return None
165 |         if (not isinstance(inputs, list)) and (not isinstance(mask, list)):
166 |             return super(PyExternal, self).compute_mask(inputs=inputs, mask=mask)
167 |         if not isinstance(mask, list):
168 |             raise ValueError('`mask` should be a list.')
169 |         if not isinstance(inputs, list):
170 |             raise ValueError('`inputs` should be a list.')
171 |         if len(mask) != len(inputs):
172 |             raise ValueError('The lists `inputs` and `mask` '
173 |                              'should have the same length.')
174 |         if all(m is None for m in mask):
175 |             return None
176 |         masks = [array_ops.expand_dims(m, axis=0) for m in mask if m is not None]
177 |         return K.all(K.concatenate(masks, axis=0), axis=0, keepdims=False)
178 |     
179 |     def get_config(self):
180 |         config = {
181 |             'forward': activations.serialize(self.forward),
182 |             'backward': activations.serialize(self.backward),
183 |             'Tin': dtype_serialize(self.Tin),
184 |             'Tout': dtype_serialize(self.Tout),
185 |             'output_shape': activations.serialize(self._output_shape),
186 |             'xEnable': self.xEnable,
187 |             'yEnable': self.yEnable,
188 |             'dyEnable': self.dyEnable
189 |         }
190 |         base_config = super(PyExternal, self).get_config()
191 |         return dict(list(base_config.items()) + list(config.items()))
192 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Modern Deep Network Toolkits for Tensorflow-Keras
  4 | # Yuchen Jin @ cainmagi@gmail.com
  5 | # Requirements: (Pay attention to version)
  6 | #   python 3.6+
  7 | #   tensorflow r1.13+
  8 | # This is a pakage for extending the tensorflow-keras to modern
  9 | # deep network design. It would introduce some state-of-art
 10 | # network blocks, data parsing utilities, logging modules and
 11 | # more extensions.
 12 | # Loading this module would not cause conflictions on other
 13 | # modules (if users do not use `from mdnt import *` to override
 14 | # utilites from other modules. However, it will provide some
 15 | # tools with the same/similar name and functions compared to
 16 | # plain tensorflow-keras.
 17 | # Version: 0.80 # 2020/08/30
 18 | # Comments:
 19 | #   1. Extend the compatible mode.
 20 | #   2. Fix bugs and add features in .utilities.draw.
 21 | #   3. Add features in .data.h5py
 22 | #   4. Fix bugs in .layers.activation.
 23 | # Version: 0.79 # 2020/02/10
 24 | # Comments:
 25 | #   1. Finish H5Converter in .data.
 26 | # Version: 0.78-b # 2019/12/05
 27 | # Comments:
 28 | #   1. Fix some bugs and add features in .utilities.draw.
 29 | #   2. Add webfiles.zip for .utilities.tboard.
 30 | #   3. Fix a small bug in .utilities.
 31 | # Version: 0.78 # 2019/11/27
 32 | # Comments:
 33 | #   1. Enhance the save_model/load_model for supportting
 34 | #      storing/recovering customized loss/metric class.
 35 | #   2. Finish the submodule .utilities.draw for providing
 36 | #      extended visualizations.
 37 | #   3. Finish the submodule .utilities.tboard for providing
 38 | #      extended tensorboard interfaces.
 39 | #   4. Fix some bugs.
 40 | # Version: 0.73-b # 2019/10/27
 41 | # Comments:
 42 | #   1. Let save_model support compression.
 43 | #   2. Revise the optional arguments for RestrictSub in
 44 | #      .layers.
 45 | # Version 0.73 # 2019/10/24
 46 | # Comments:
 47 | #   1. Fix a bug for H5GCombiner in .data.
 48 | #   2. Finish H5VGParser in .data.
 49 | #   3. Finish ExpandDims in .layers.
 50 | #   4. Enable ModelCheckpoint in .utilities.callbacks to
 51 | #      support the option for not saving optimizer.
 52 | # Version 0.72 # 2019/10/22
 53 | # Comments:
 54 | #   1. Fix a bug for Ghost in .layers.
 55 | #   2. Finish Slice, Restrict and RestrictSub in .layers.
 56 | # Version 0.70 # 2019/10/15
 57 | # Comments:
 58 | #   1. Let save_model/load_model support storing/recovering
 59 | #      variable loss weights.
 60 | #   2. Finish LossWeightsScheduler in .utilities.callbacks.
 61 | # Version 0.69-b # 2019/10/7
 62 | # Comments:
 63 | #   Enable the H5SupSaver to add more data to an existed file.
 64 | # Version 0.69 # 2019/9/10
 65 | # Comments:
 66 | #   Enable the H5SupSaver in .data to expand if data is dumped
 67 | #   in series.
 68 | # Version 0.68 # 2019/6/27
 69 | # Comments:
 70 | #   1. Finish MNadam, Adabound and Nadabound in .optimizers.
 71 | #   2. Slightly change .optimizers.mixture.
 72 | #   3. Change the quick interface in .optimizers.
 73 | # Version 0.64-b # 2019/6/26
 74 | # Comments:
 75 | #   1. Finish the demo version for SWATS in .optimizers.
 76 | #   2. Fix a small bug for .load_model
 77 | # Version 0.64 # 2019/6/24
 78 | # Comments:
 79 | #   1. Finish ModelWeightsReducer in .utilities.callbacks.
 80 | #   2. Finish Ghost in .layers.
 81 | #   3. Fix small bugs.
 82 | # Version 0.63 # 2019/6/23
 83 | # Comments:
 84 | #   1. Fix the bugs of manually switched optimizers in 
 85 | #      .optimizers. Now they require to be used with a callback
 86 | #      or switch the phase by switch().
 87 | #   2. Add a plain momentum SGD optimizer to fast interface in
 88 | #      .optimizers.
 89 | #   3. Finish OptimizerSwitcher in .utilities.callbacks. It
 90 | #      is used to control the phase of the manually swtiched
 91 | #      optimizers.
 92 | #   4. Improve the efficiency for Adam2SGD and NAdam2NSGD in
 93 | #      .optimizers.
 94 | # Version 0.62 # 2019/6/21
 95 | # Comments:
 96 | #   1. Finish the manually switched optimizers in .optimizers:
 97 | #      Adam2SGD and NAdam2NSGD. Both of them supports amsgrad
 98 | #      mode.
 99 | #   2. Adjust the fast interface .optimizers.optimizer. Now
100 | #      it supports 2 more tensorflow based optimizers and the
101 | #      default momentum of Nesterov SGD optimizer is changed
102 | #      to 0.9.
103 | # Version 0.60-b # 2019/6/20
104 | # Comments:
105 | #   1. Fix some bugs in .layers.conv and .layers.unit.
106 | #   2. Remove the normalization layer from all projection 
107 | #      branches in .layers.residual and .layers.inception.
108 | # Version 0.60 # 2019/6/19
109 | # Comments:
110 | #   1. Support totally new save_model and load_model APIs in
111 | #      .utilites.
112 | #   2. Finish ModelCheckpoint in .utilities.callbacks.
113 | # Version: 0.56 # 2019/6/13
114 | # Comments:
115 | #   Finish losses.linear_jaccard_index, 
116 | #          losses.lovasz_jaccard_loss, 
117 | #          metrics.signal_to_noise,
118 | #          metrics.correlation,
119 | #          metrics.jaccard_index
120 | #          in .functions (may require tests in the future).
121 | # Version: 0.54 # 2019/6/12
122 | # Comments:
123 | #   1. Add dropout options to all advanced blocks (including
124 | #      residual, ResNeXt, inception, incept-res and incept-
125 | #      plus).
126 | #   2. Strengthen the compatibility.
127 | #   3. Fix minor bugs for spatial dropout in 0.50-b.
128 | #   4. Thanks to GOD! .layers has been finished, although it
129 | #      may require modification in the future.
130 | # Version: 0.50-b # 2019/6/11
131 | # Comments:
132 | #   1. Fix a bug for implementing the channel_first mode for
133 | #      AConv in .layers.
134 | #   2. Finish InstanceGaussianNoise in .layers.
135 | #   3. Prepare the test for adding dropout to residual layers
136 | #      in .layers.
137 | # Version: 0.50 # 2019/6/11
138 | # Comments:
139 | #   1. Finish Conv1DTied, Conv2DTied, Conv3DTied in .layers.
140 | #   2. Switch back to the 0.48 version for .layers.DenseTied
141 | #      APIs because testing show that the modification in
142 | #      0.48-b will cause bugs.
143 | # Version: 0.48-b # 2019/6/10
144 | # Comments:
145 | #   A Test on replacing the .layers.DenseTied APIs like 
146 | #   tf.keras.layers.Wrappers.
147 | # Version: 0.48 # 2019/6/9
148 | # Comments:
149 | # 1. Finish Inceptplus1D, Inceptplus2D, Inceptplus3D,
150 | #           Inceptplus1DTranspose, Inceptplus2DTranspose,
151 | #           Inceptplus3DTranspose in .layers.
152 | # 2. Minor changes for docstrings and default settings in 
153 | #    .layers.inception.
154 | # Version: 0.45-b # 2019/6/7
155 | # Comments:
156 | # 1. Enable the ResNeXt to estimate the latent group and local 
157 | #    filter number.
158 | # 2. Make a failed try on implementing quick group convolution,
159 | #    testing results show that using tf.nn.depthwise_conv2d
160 | #    to replace multiple convND ops would cause the computation
161 | #    to be even slower.
162 | # Version: 0.45 # 2019/6/6
163 | # Comments:
164 | # 1. Enable Modern convolutional layers to work with group
165 | #    convolution.
166 | # 2. Reduce the memory consumption for network construction
167 | #    when using ResNeXt layers in case of out of memory (OOM)
168 | #    problems.
169 | # 3. Fix a minor bug for group convolution.
170 | # Version: 0.42 # 2019/6/5
171 | # Comments:
172 | # 1. Add GroupConv1D, GroupConv2D, GroupConv3D in .layers.
173 | # 2. Fix the bugs in channel detections for residual and
174 | #    inception layers.
175 | # Version: 0.40 # 2019/6/5
176 | # Comments:
177 | # 1. Finish Resnext1D, Resnext2D, Resnext3D,
178 | #           Resnext1DTranspose, Resnext2DTranspose,
179 | #           Resnext3DTranspose in .layers.
180 | # 2. Fix the repeating biases problems in inception-residual
181 | #    layers.
182 | # Version: 0.38 # 2019/6/4
183 | # Comments:
184 | # 1. Finish Inceptres1D, Inceptres2D, Inceptres3D, 
185 | #           Inceptres1DTranspose, Inceptres2DTranspose,
186 | #           Inceptres3DTranspose in .layers.
187 | # 2. Fix some bugs and revise docstrings for .layers.residual and
188 | #    .layers.inception.
189 | # Version: 0.36 # 2019/6/1
190 | # Comments:
191 | #   Finish Inception1D, Inception2D, Inception3D, 
192 | #          Inception1DTranspose, Inception2DTranspose,
193 | #          Inception3DTranspose in .layers.
194 | # Version: 0.32 # 2019/5/31
195 | # Comments:
196 | #   Finish Residual1D, Residual2D, Residual3D, Residual1DTranspose, 
197 | #          Residual2DTranspose, Residual3DTranspose in .layers.
198 | # Version: 0.28 # 2019/5/24
199 | # Comments:
200 | # 1. Fix the bug about padding for transposed dilation 
201 | #    convolutional layers.
202 | # 2. Add a new option output_mshape to help transposed 
203 | #    convolutional layers to control the desired output shape.
204 | # 3. Finish PyExternal in .layers.
205 | # Version: 0.24 # 2019/3/31
206 | # Comments:
207 | #   Finish H5GCombiner in .data.
208 | # Version: 0.23 # 2019/3/26
209 | # Comments:
210 | #   1. Use keras.Sequence() to redefine H5GParser and 
211 | #      H5HGParser.
212 | #   2. Add compatible check.
213 | # Version: 0.22 # 2019/3/26
214 | # Comments:
215 | #   Adjust the .data.h5py module to make it more generalized.
216 | # Version: 0.20 # 2019/3/26
217 | # Comments:
218 | #   Finish H5HGParser, H5SupSaver, H5GParser in .data.
219 | #   Finish DenseTied, InstanceNormalization, GroupNormalization,
220 | #          AConv1D, AConv2D, AConv3D, AConv1DTranspose, 
221 | #          AConv2DTranspose, AConv3DTranspose in .layers.
222 | # Version: 0.10 # 2019/3/23
223 | # Comments:
224 | #   Create this project.
225 | ################################################################
226 | '''
227 | 
228 | # Import sub-modules
229 | from . import optimizers
230 | from . import layers
231 | from . import data
232 | from . import functions
233 | from . import utilities
234 | 
235 | __version__ = '0.80'
236 | 
237 | # Alias
238 | save_model = utilities.save_model
239 | load_model = utilities.load_model
240 | 
241 | __all__ = [
242 |             'optimizers', 'layers', 'data', 'functions', 'utilities',
243 |             'save_model', 'load_model'
244 |           ]
245 | 
246 | # Set this local module as the prefered one
247 | from pkgutil import extend_path
248 | __path__ = extend_path(__path__, __name__)
249 |     
250 | # Delete private sub-modules and objects
251 | del extend_path


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Modern Deep Network Toolkits for Tensorflow-Keras
  2 | 
  3 | We proudly present our newest produce, a totally well-defined extension for Tensorflow-Keras users!
  4 | 
  5 | ## Documentation
  6 | 
  7 | Still not available now, will implement in the future.
  8 | 
  9 | ## Progress
 10 | 
 11 | Now we have such progress on the semi-product:
 12 | 
 13 | - [x] optimzers:
 14 |     - [x] Manually switched optimizers (`Adam2SGD` and `NAdam2NSGD`).
 15 |     - [x] Automatically switched optimizer (`SWATS`).
 16 |     - [x] Advanced adaptive optimizers ( `Adabound`, `Nadabound` and `MNadam` supporting `amsgrad`).
 17 |     - [x] Wrapped default optimizers.
 18 | - [x] layers:
 19 |     - [x] Ghost layer (used to construct trainable input layer).
 20 |     - [x] Tied dense layer for the symmetric autoencoder.
 21 |     - [x] Extended dropout and noise layers.
 22 |     - [x] Extended activation layers.
 23 |     - [x] Extended normalization layers.
 24 |     - [x] Group convolutional layers.
 25 |     - [x] Modern convolutional layers (support group convolution).
 26 |     - [x] Modern transposed convolutional layers (support group convolution).
 27 |     - [x] Tied (trivial) transposed convolutional layers for the symmetric autoencoder.
 28 |     - [x] Residual layers (or blocks) and their transposed versions.
 29 |     - [x] ResNeXt layers (or blocks) and their transposed versions.
 30 |     - [x] Inception-v4 layers (or blocks) and their transposed versions.
 31 |     - [x] InceptionRes-v2 layers (or blocks) and their transposed versions.
 32 |     - [x] InceptionPlus layers (or blocks) and their transposed versions.
 33 |     - [x] External interface for using generic python function.
 34 |     - [x] Droupout method options for all avaliable modern layers.
 35 | - [ ] data:
 36 |     - [x] Basic h5py (HDF5) IO handles.
 37 |     - [ ] Basic SQLite IO handles.
 38 |     - [ ] Basic Bcolz IO handles.
 39 |     - [ ] Basic CSV IO handles.
 40 |     - [ ] Basic JSON IO handles.
 41 |     - [ ] Data parsing utilities.
 42 | - [ ] estimators:
 43 |     - [ ] VGG16
 44 |     - [ ] U-Net
 45 |     - [ ] ResNet
 46 | - [x] functions:
 47 |     - [x] (loss):    Lovasz loss for IoU
 48 |     - [x] (loss):    Linear interpolated loss for IoU
 49 |     - [x] (metrics): signal-to-noise ratio (SNR and PSNR)
 50 |     - [x] (metrics): Pearson correlation coefficient
 51 |     - [x] (metrics): IoU / Jaccard index
 52 | - [ ] utilities:
 53 |     - [x] Revised save and load model functions.
 54 |     - [ ] Beholder plug-in callback.
 55 |     - [x] Revised ModelCheckpoint callback.
 56 |     - [x] LossWeightsScheduler callback (for changing the loss weights during the training).
 57 |     - [x] OptimizerSwitcher callback (for using manually switched optimizers).
 58 |     - [x] ModelWeightsReducer callback (parameter decay strategy including L1 decay and L2 decay).
 59 |     - [x] Extended data visualization tools.
 60 |     - [x] Tensorboard log file parser.
 61 | 
 62 | ## Demos
 63 | 
 64 | Check the branch [`demos`][brch-demos] to learn more details.
 65 | 
 66 | ## Update records
 67 | 
 68 | ### 0.79 @ 02/10/2020
 69 | 
 70 | 1. Finish H5Converter  `H5Converter` in `.data`.
 71 | 
 72 | ### 0.78-b @ 12/05/2019
 73 | 
 74 | 1. Fix some bugs and add features in `.utilities.draw`.
 75 | 2. Add `webfiles.zip` for `.utilities.tboard`.
 76 | 3. Fix a small bug in `.utilities`.
 77 | 
 78 | ### 0.78 @ 11/27/2019
 79 | 
 80 | 1. Enhance the `save_model`/`load_model` for supportting storing/recovering customized loss/metric class.
 81 | 2. Finish the submodule `.utilities.draw` for providing extended visualizations.
 82 | 3. Finish the submodule `.utilities.tboard` for providing extended tensorboard interfaces.
 83 | 4. Fix some bugs.
 84 | 
 85 | ### 0.73-b @ 10/27/2019
 86 | 
 87 | 1. Let `.save_model` support compression.
 88 | 2. Revise the optional arguments for `RestrictSub` in `.layers`.
 89 | 
 90 | ### 0.73 @ 10/24/2019
 91 | 
 92 | 1. Fix a bug for `H5GCombiner` in `.data` when adding more parsers.
 93 | 2. Finish `H5VGParser` in `.data`, this parser is used for splitting validation set from a dataset.
 94 | 3. Finish `ExpandDims` in `.layers`, it is a layer version of `tf.expand_dims`.
 95 | 4. Enable `ModelCheckpoint` in `.utilities.callbacks` to support the option for not saving optimizer.
 96 | 
 97 | ### 0.72 @ 10/22/2019
 98 | 
 99 | 1. Fix a bug for serializing `Ghost` in `.layers`.
100 | 2. Finish activation layers in `.layers`, including `Slice`, `Restrict` and `RestrictSub`.
101 | 
102 | ### 0.70 @ 10/15/2019
103 | 
104 | 1. Let `.save_model`/`.load_model` supports storing/recovering variable loss weights.
105 | 2. Finish `LossWeightsScheduler` in `.utilities.callbacks`.
106 | 
107 | ### 0.69-b @ 10/07/2019
108 | 
109 | Enable the `H5SupSaver` in `.data` to add more data to an existed file.
110 |     
111 | ### 0.69 @ 09/10/2019
112 | 
113 | Enable the `H5SupSaver` in `.data` to expand if data is dumped in series.
114 | 
115 | ### 0.68 @ 06/27/2019
116 | 
117 | 1. Finish `MNadam`, `Adabound` and `Nadabound` in `.optimizers`.
118 | 2. Slightly change `.optimizers.mixture`.
119 | 3. Change the quick interface in `.optimizers`.
120 | 
121 | ### 0.64-b @ 06/26/2019
122 | 
123 | 1. Finish the demo version for `SWATS` in `.optimizers`. Need further tests.
124 | 2. Fix a small bug for `.load_model`.
125 | 3. Change the warning backend to tensorflow version.
126 | 
127 | ### 0.64 @ 06/24/2019
128 | 
129 | 1. Finish `ModelWeightsReducer` in `.utilities.callbacks`.
130 | 2. Finish `Ghost` in `.layers`.
131 | 3. Fix small bugs.
132 | 
133 | ### 0.63 @ 06/23/2019
134 | 
135 | 1. Fix the bugs of manually switched optimizers in `.optimizers.` Now they require to be used with a callback or switch the phase by `switch()`.
136 | 2. Add a plain momentum SGD optimizer to fast interface in `.optimizers`.
137 | 3. Finish `OptimizerSwitcher` in `.utilities.callbacks`. It is used to control the phase of the manually swtiched optimizers.
138 | 4. Improve the efficiency for `Adam2SGD` and `NAdam2NSGD` in `.optimizers`.
139 | 
140 | ### 0.62 @ 06/21/2019
141 | 
142 | 1. Finish the manually switched optimizers in `.optimizers`: `Adam2SGD` and `NAdam2NSGD`. Both of them supports amsgrad mode.
143 | 2. Adjust the fast interface `.optimizers.optimizer`. Now it supports 2 more tensorflow based optimizers and the default momentum of Nesterov SGD optimizer is changed to 0.9.
144 | 
145 | ### 0.60-b @ 06/20/2019
146 | 
147 | 1. Fix some bugs in `.layers.conv` and `.layers.unit`.
148 | 2. Remove the normalization layer from all projection branches in `.layers.residual` and `.layers.inception`.
149 | 
150 | ### 0.60 @ 06/19/2019
151 | 
152 | 1. Support totally new `save_model` and `load_model` APIs in `.utilites`.
153 | 2. Finish `ModelCheckpoint` in `.utilities.callbacks`.
154 | 
155 | ### 0.56 @ 06/13/2019
156 | 
157 | Finish `losses.linear_jaccard_index`, `losses.lovasz_jaccard_loss`, `metrics.signal_to_noise`, `metrics.correlation`, `metrics.jaccard_index` in `.functions` (may require tests in the future).
158 | 
159 | ### 0.54 @ 06/12/2019
160 | 
161 | 1. Add dropout options to all advanced blocks (including residual, ResNeXt, inception, incept-res and incept-plus).
162 | 2. Strengthen the compatibility.
163 | 3. Fix minor bugs for spatial dropout in `0.50-b`.
164 | 4. Thanks to GOD! `.layers` has been finished, although it may require modification in the future.
165 | 
166 | ### 0.50-b @ 06/11/2019
167 | 
168 | 1. Fix a bug for implementing the channel_first mode for `AConv` in `.layers`.
169 | 2. Finish `InstanceGaussianNoise` in `.layers`.
170 | 3. Prepare the test for adding dropout to residual layers in `.layers`.
171 | 
172 | ### 0.50 @ 06/11/2019
173 | 
174 | 1. Finish `Conv1DTied`, `Conv2DTied`, `Conv3DTied` in `.layers`.
175 | 2. Switch back to the 0.48 version for `.layers.DenseTied` APIs because testing show that the modification in 0.48-b will cause bugs.
176 | 
177 | ### 0.48-b @ 06/10/2019
178 | 
179 | A Test on replacing the `.layers.DenseTied` APIs like `tf.keras.layers.Wrappers`.
180 | 
181 | ### 0.48 @ 06/09/2019
182 | 
183 | 1. Finish `Inceptplus1D`, `Inceptplus2D`, `Inceptplus3D`, `Inceptplus1DTranspose`,  `Inceptplus2DTranspose`, `Inceptplus3DTranspose` in `.layers`.
184 | 2. Minor changes for docstrings and default settings in `.layers.inception`.
185 | 
186 | ### 0.45-b @ 06/07/2019
187 | 
188 | 1. Enable the `ResNeXt` to estimate the latent group and local filter number.
189 | 2. Make a failed try on implementing quick group convolution, testing results show that using `tf.nn.depthwise_conv2d` to replace multiple `convND` ops would cause the computation to be even slower.
190 | 
191 | ### 0.45 @ 06/06/2019
192 | 
193 | 1. Enable Modern convolutional layers to work with group convolution.
194 | 2. Reduce the memory consumption for network construction when using ResNeXt layers in case of out of memory (OOM) problems.
195 | 3. Fix a minor bug for group convolution.
196 | 
197 | ### 0.42 @ 06/05/2019
198 | 
199 | 1. Finish `GroupConv1D`, `GroupConv2D`, `GroupConv3D` in `.layers`.
200 | 2. Fix the bugs in channel detections for residual and inception layers.
201 | 
202 | ### 0.40 @ 06/05/2019
203 | 
204 | 1. Finish `Resnext1D`, `Resnext2D`, `Resnext3D`, `Resnext1DTranspose`,  `Resnext2DTranspose`, `Resnext3DTranspose` in `.layers`.
205 | 2. Fix the repeating biases problems in inception-residual layers.
206 | 
207 | ### 0.38 @ 06/04/2019
208 | 
209 | 1. Finish `Inceptres1D`, `Inceptres2D`, `Inceptres3D`, `Inceptres1DTranspose`,  `Inceptres2DTranspose`, `Inceptres3DTranspose` in `.layers`.
210 | 2. Fix some bugs and revise docstrings for `.layers.residual` and `.layers.inception`.
211 | 
212 | ### 0.36 @ 06/01/2019
213 | 
214 | Finish `Inception1D`, `Inception2D`, `Inception3D`, `Inception1DTranspose`,  `Inception2DTranspose`, `Inception3DTranspose` in `.layers`.
215 | 
216 | ### 0.32 @ 05/31/2019
217 | 
218 | Finish `Residual1D`, `Residual2D`, `Residual3D`, `Residual1DTranspose`,  `Residual2DTranspose`, `Residual3DTranspose` in `.layers`.
219 | 
220 | ### 0.28 @ 05/24/2019
221 | 
222 | 1. Fix the bug about padding for transposed dilation convolutional layers.
223 | 2. Add a new option `output_mshape` to help transposed convolutional layers to control the desired output shape.
224 | 3. Finish `PyExternal` in `.layers`.
225 | 
226 | ### 0.24 @ 03/31/2019
227 | 
228 | Finish `H5GCombiner` in `.data`.
229 | 
230 | ### 0.23 @ 03/27/2019
231 | 
232 | 1. Use `keras.Sequence()` to redefine `H5GParser` and `H5HGParser`.
233 | 2. Add compatible check.
234 | 
235 | ### 0.22 @ 03/26/2019
236 | 
237 | Adjust the `.data.h5py` module to make it more generalized.
238 | 
239 | ### 0.20 @ 03/26/2019
240 | 
241 | 1. Finish `H5HGParser`, `H5SupSaver`, `H5GParser` in `.data`.
242 | 2. Finish `DenseTied`, `InstanceNormalization`, `GroupNormalization`, `AConv1D`, `AConv2D`, `AConv3D`, `AConv1DTranspose`,  `AConv2DTranspose`, `AConv3DTranspose` in `.layers`.
243 | 
244 | ### 0.10 @ 03/23/2019
245 | 
246 | Create this project.
247 | 
248 | [brch-demos]:https://github.com/cainmagi/MDNT/tree/demos


--------------------------------------------------------------------------------
/data/deprecated/h5py.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Data - h5py (deprecated)
  4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
  5 | # Yuchen Jin @ cainmagi@gmail.com
  6 | # Requirements: (Pay attention to version)
  7 | #   python 3.6+
  8 | #   tensorflow r1.13+
  9 | # Use tf-K standard dataset API to wrap the h5py APIs.
 10 | # Warning:
 11 | #   The standard tf dataset is proved to be incompatible with 
 12 | #   tf-K architecture. We need to wait until tf fix the bug.
 13 | # Version: 0.10 # 2019/3/26
 14 | # Comments:
 15 | #   Create this submodule.
 16 | ################################################################
 17 | '''
 18 | 
 19 | import h5py
 20 | import numpy as np
 21 | import tensorflow as tf
 22 | import os
 23 | 
 24 | REMOVE_DEPRECATION = False
 25 | 
 26 | def depcatedInfo():
 27 |     try:
 28 |         raise DeprecationWarning('This library has been deprecated.')
 29 |     except Exception as e:
 30 |         if not REMOVE_DEPRECATION:
 31 |             raise e
 32 | 
 33 | class H5SupSaver:
 34 |     '''Save supervised data set as .h5 file
 35 |     This class allows users to dump multiple datasets into one file
 36 |     handle, then it would save it as a .h5 file. The keywords of the
 37 |     sets should be assigned by users.
 38 |     '''
 39 |     def __init__(self, fileName):
 40 |         '''
 41 |         Create the .h5 file while initialization.
 42 |         Arguments:
 43 |             fileName: a path where we save the file.
 44 |         '''
 45 |         self.f = None
 46 |         depcatedInfo()
 47 |         self.logver = 0
 48 |         self.__kwargs = dict()
 49 |         self.open(fileName)
 50 |         self.config(dtype='f')
 51 |         
 52 |     def config(self, **kwargs):
 53 |         '''
 54 |         Make configuration for the saver.
 55 |         Argumetns for this class:
 56 |             logver (int): the log level for dumping files.
 57 |         Arguments often used:
 58 |             chunks (tuple):         size of data blocks.
 59 |             compression (str):      compression method.
 60 |             compression_opts (int): compression parameter.
 61 |             shuffle (bool):         shuffle filter for data compression.
 62 |             fletcher32 (bool):      check sum for chunks.
 63 |         Learn more available arguments here:
 64 |             http://docs.h5py.org/en/latest/high/dataset.html
 65 |         '''
 66 |         logver = kwargs.pop('logver', None)
 67 |         if logver is not None:
 68 |             self.logver = logver
 69 |         self.__kwargs.update(kwargs)
 70 |         if self.logver > 0:
 71 |             print('Current configuration is:', self.__kwargs)
 72 |     
 73 |     def dump(self, keyword, data):
 74 |         '''
 75 |         Dump the dataset with a keyword into the file.
 76 |         Arguments:
 77 |             keyword: the keyword of the dumped dataset.
 78 |             data:    dataset, should be a numpy array.
 79 |         '''
 80 |         if self.f is None:
 81 |             raise OSError('Should not dump data before opening a file.')
 82 |         self.f.create_dataset(keyword, data=data, **self.__kwargs)
 83 |         if self.logver > 0:
 84 |             print('Dump {0} into the file. The data shape is {1}.'.format(keyword, data.shape))
 85 |     
 86 |     def open(self, fileName):
 87 |         '''
 88 |         The dumped file name (path), it will produce a .h5 file.
 89 |         Arguments:
 90 |             fileName: a path where we save the file.
 91 |         '''
 92 |         if fileName[-3:] != '.h5':
 93 |             fileName += '.h5'
 94 |         self.close()
 95 |         self.f = h5py.File(fileName, 'w')
 96 |         if self.logver > 0:
 97 |             print('Open a new file:', fileName)
 98 |         
 99 |     def close(self):
100 |         if self.f is not None:
101 |             self.f.close()
102 |         self.f = None
103 |         
104 | class H5HGParser:
105 |     '''Homogeneously parsing .h5 file by h5py module
106 |     This class allows users to feed one .h5 file, and convert it to 
107 |     tf.data.Dataset. The realization could be described as:
108 |         (1) Create .h5 file handle.
109 |         (2) Estimate the dataset size, and generate indexes.
110 |         (3) Use the indexes to create a tf.data.Dataset, and allows it
111 |             to randomly shuffle the indexes in each epoch.
112 |         (4) Use Dataset.map() to address the data by the index from the
113 |             index dataset.
114 |     Note that in the file, there may be multiple datasets. This parser
115 |     supports reading both single set and multiple sets.
116 |     Note that all datasets in the same file should share the same shape.
117 |     '''
118 |     def __init__(self, fileName, batchSize=32):
119 |         '''
120 |         Create the parser and its h5py file handle.
121 |         Arguments:
122 |             fileName: the data path of the file (could be without postfix).
123 |             batchSize: number of samples in each batch.
124 |         '''
125 |         self.f = None
126 |         depcatedInfo()
127 |         if (not os.path.isfile(fileName)) and (os.path.isfile(fileName+'.h5')):
128 |             fileName += '.h5'
129 |         self.f = h5py.File(fileName, 'r')
130 |         self.size = self.__createSize()
131 |         self.__dataset = self.__indexDataset()
132 |         allNum = np.sum(self.size)
133 |         self.__dataset = self.__dataset.shuffle(buffer_size=allNum)
134 |         if self.mutlipleMode:
135 |             self.__dataset = self.__dataset.map(lambda index: tf.py_function(self.__mapMultiple, [index], [tf.int32]))
136 |         else:
137 |             self.__dataset = self.__dataset.map(lambda index: tf.py_function(self.__mapSingle, [index], [tf.int32]))
138 |         self.__dataset = self.__dataset.batch(batchSize)
139 |         self.__dataset = self.__dataset.repeat()
140 |         
141 |     def getDataset(self):
142 |         '''
143 |         Get the produced tf dataset.
144 |         '''
145 |         return self.__dataset
146 |         
147 |     def __createSize(self):
148 |         '''
149 |         Find the number of items in the dataset, only need to be run for once.
150 |         '''
151 |         if len(self.f) == 1:
152 |             self.mutlipleMode = False
153 |             self.__dnameIndex = list(self.f.keys())[0]
154 |             return len(self.f[self.__dnameIndex])
155 |         else:
156 |             self.mutlipleMode = True
157 |             self.__dnameIndex = list(self.f.keys())
158 |             return tuple(len(self.f[fk]) for fk in self.__dnameIndex)
159 |         
160 |     def __indexDataset(self):
161 |         '''
162 |         Create a tensorflow index dataset, only need to be run for once.
163 |         Should be run after __createSize
164 |         '''
165 |         if self.mutlipleMode:
166 |             def genOneIndex(num):
167 |                 return np.stack((num*np.ones(self.size[num], dtype=np.int), np.arange(self.size[num], dtype=np.int)), axis=1)
168 |             indexes = np.concatenate(list(genOneIndex(n) for n in range(len(self.size))), axis=0)
169 |             return tf.data.Dataset.from_tensor_slices(indexes)
170 |         else:
171 |             return tf.data.Dataset.from_tensor_slices(np.arange(self.size, dtype=np.int))
172 |     
173 |     def __mapMultiple(self, index):
174 |         '''
175 |         Map function, for multiple datasets mode.
176 |         '''
177 |         dname = self.__dnameIndex[index[0]]
178 |         numSample = index[1]
179 |         return self.f[dname][numSample]
180 |         
181 |     def __mapSingle(self, index):
182 |         '''
183 |         Map function, for multiple datasets mode.
184 |         '''
185 |         dname = self.__dnameIndex
186 |         numSample = index
187 |         return self.f[dname][numSample]
188 |     
189 |     def __del__(self):
190 |         '''
191 |         Destructor
192 |         '''
193 |         if self.f is not None:
194 |             self.f.close()
195 |             
196 | class H5GParser:
197 |     '''Grouply parsing dataset
198 |     This class allows users to feed one .h5 file, and convert it to 
199 |     tf.data.Dataset. The realization could be described as:
200 |         (1) Create .h5 file handle.
201 |         (2) Using the user defined keywords to get a group of datasets.
202 |         (3) Estimate the dataset sizes, and generate indexes. Note each
203 |             dataset should share the same size (but could be different 
204 |             shapes).
205 |         (4) Use the indexes to create a tf.data.Dataset, and allows it
206 |             to randomly shuffle the indexes in each epoch.
207 |         (5) Use Dataset.map() to address the data by the index from the
208 |             index dataset.
209 |     Certainly, you could use this parser to load a single dataset.
210 |     '''
211 |     def __init__(self, fileName, keywords, batchSize=32, preprocfunc=None):
212 |         '''
213 |         Create the parser and its h5py file handle.
214 |         Arguments:
215 |             fileName: the data path of the file (could be without postfix).
216 |             keywords: should be a list of keywords (or a single keyword).
217 |             batchSize: number of samples in each batch.
218 |             preprocfunc: this function would be added to the produced data
219 |                          so that it could serve as a pre-processing tool.
220 |         '''
221 |         self.f = None
222 |         depcatedInfo()
223 |         if isinstance(keywords, str):
224 |             self.keywords = (keywords,)
225 |         else:
226 |             self.keywords = keywords
227 |         if (not os.path.isfile(fileName)) and (os.path.isfile(fileName+'.h5')):
228 |             fileName += '.h5'
229 |         self.f = h5py.File(fileName, 'r')
230 |         self.__dsets = self.__creatDataSets()
231 |         self.size = self.__createSize()
232 |         self.__dataset = self.__indexDataset()
233 |         allNum = np.sum(self.size)
234 |         self.__dataset = self.__dataset.shuffle(buffer_size=allNum)
235 |         self.__dataset = self.__dataset.map(lambda index: tf.py_function(self.__mapSingle, [index], [tf.float32]*len(self.__dsets)))
236 |         if preprocfunc is not None:
237 |             self.__dataset = self.__dataset.map(preprocfunc)
238 |         self.__dataset = self.__dataset.batch(batchSize)
239 |         self.__dataset = self.__dataset.repeat()
240 |         
241 |     def getDataset(self):
242 |         '''
243 |         Get the produced tf dataset.
244 |         '''
245 |         return self.__dataset
246 |         
247 |     def __creatDataSets(self):
248 |         '''
249 |         Find all desired dataset handles, and store them.
250 |         '''
251 |         dsets = []
252 |         for key in self.keywords:
253 |             dsets.append(self.f[key])
254 |         if not dsets:
255 |             raise KeyError('Keywords are not mapped to datasets in the file.')
256 |         return dsets
257 |         
258 |     def __createSize(self):
259 |         '''
260 |         Find the number of items in the dataset, only need to be run for once.
261 |         Should be run after __creatDataSets.
262 |         '''
263 |         sze = len(self.__dsets[0])
264 |         for dset in self.__dsets:
265 |             if sze != len(dset):
266 |                 raise TypeError('The assigned keywords do not correspond to each other.')
267 |         return sze
268 |         
269 |     def __indexDataset(self):
270 |         '''
271 |         Create a tensorflow index dataset, only need to be run for once.
272 |         Should be run after __createSize.
273 |         '''
274 |         return tf.data.Dataset.from_tensor_slices(np.arange(self.size, dtype=np.int))
275 | 
276 |     def __mapSingle(self, index):
277 |         '''
278 |         Map function, for multiple datasets mode.
279 |         '''
280 |         return tuple(dset[index] for dset in self.__dsets)
281 |     
282 |     def __del__(self):
283 |         '''
284 |         Destructor
285 |         '''
286 |         if self.f is not None:
287 |             self.f.close()


--------------------------------------------------------------------------------
/layers/dense.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Layers - Dense
  4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
  5 | # Yuchen Jin @ cainmagi@gmail.com
  6 | # Requirements: (Pay attention to version)
  7 | #   python 3.6+
  8 | #   tensorflow r1.13+
  9 | # Extend the dense layer API with tied version.
 10 | # Version: 0.16 # 2019/10/22
 11 | # Comments:
 12 | #   Fix a small bug for Ghost.
 13 | # Version: 0.15 # 2019/6/24
 14 | # Comments:
 15 | # 1. Add the Ghost layer for implementing trainable input layer.
 16 | # 2. Fix a small bug for Ghost.
 17 | # Version: 0.11 # 2019/3/27
 18 | # Comments:
 19 | #   Add compatible support.
 20 | # Version: 0.10 # 2019/3/23
 21 | # Comments:
 22 | #   Create this submodule.
 23 | ################################################################
 24 | '''
 25 | 
 26 | from tensorflow.python.eager import context
 27 | from tensorflow.python.framework import common_shapes
 28 | from tensorflow.python.framework import ops
 29 | from tensorflow.python.framework import tensor_shape
 30 | from tensorflow.python.keras import activations
 31 | from tensorflow.python.keras import backend as K
 32 | from tensorflow.python.keras import constraints
 33 | from tensorflow.python.keras import initializers
 34 | from tensorflow.python.keras import regularizers
 35 | from tensorflow.python.keras.engine.base_layer import Layer
 36 | from tensorflow.python.ops import gen_math_ops
 37 | from tensorflow.python.ops import nn
 38 | from tensorflow.python.ops import standard_ops
 39 | from tensorflow.python.ops import variables
 40 | 
 41 | from .. import compat
 42 | if compat.COMPATIBLE_MODE['1.12']:
 43 |     from tensorflow.python.keras.engine.base_layer import InputSpec
 44 | else:
 45 |     from tensorflow.python.keras.engine.input_spec import InputSpec
 46 | 
 47 | class Ghost(Layer):
 48 |     """Ghost layer for setting tunable input
 49 |     Since tf-Keras does not support users to build a trainable input layer, we use
 50 |     an interesting trick, i.e. "Ghost" to realize the trainable input. Our Ghost
 51 |     layer is implemented like this:
 52 |         ouput = kernel * input + bias
 53 |     where both kernel and bias share the same shape of input tensor.
 54 |     There are two ways to build a tunable input layer. The first way is using
 55 |     kernel solely:
 56 |         input = Input(shape=shape) # feeding constant 1.0
 57 |         tunable_input = Ghost(use_kernel=True)(input) = kernel * 1.0 = kernel
 58 |     The second way is using bias solely:
 59 |         input = Input(shape=shape) # feeding constant 0.0
 60 |         tunable_input = Ghost(use_bias=True)(input) = bias + 0.0 = bias
 61 |     Because both kernel and bias are trainable, such a technique enables tf-Keras
 62 |     users to create a tunable input layer easily.
 63 |     It is not allowed to use kernel and bias in the same time, because in this
 64 |     case the solution for Ghost layer would become ill-posed.
 65 |     Arguments:
 66 |         use_kernel: Boolean, whether the layer uses multiplicative strategy to
 67 |             define the variable.
 68 |         use_bias: Boolean, whether the layer uses additive strategy to define
 69 |             the variable.
 70 |         var_initializer: Initializer for the tunable variable. The variable
 71 |             depends on setting use_kernel or setting use_bias.
 72 |         var_regularizer: Regularizer function applied to the tunable variable.
 73 |         var_constraint: Constraint function applied to the tunable variable.
 74 |     Input shape:
 75 |         Any shape. The shape should be totally known except the batch number.
 76 |     Output shape:
 77 |         The same as input shape.
 78 |     """
 79 |     def __init__(self,
 80 |                  use_kernel=False,
 81 |                  use_bias=False,
 82 |                  var_initializer='glorot_uniform',
 83 |                  var_regularizer=None,
 84 |                  var_constraint=None,
 85 |                  **kwargs):
 86 |         if 'input_shape' not in kwargs and 'input_dim' in kwargs:
 87 |           kwargs['input_shape'] = (kwargs.pop('input_dim'),)
 88 | 
 89 |         super(Ghost, self).__init__(
 90 |             activity_regularizer=None, **kwargs)
 91 |         if not (use_kernel or use_bias):
 92 |             raise ValueError('Need to specify either "use_kernel" or "use_bias".')
 93 |         if use_kernel and use_bias:
 94 |             raise ValueError('Should not specify "use_kernel" and "use_bias" in the same time.')
 95 |         self.use_kernel = use_kernel
 96 |         self.use_bias = use_bias
 97 |         self.var_initializer = initializers.get(var_initializer)
 98 |         self.var_regularizer = regularizers.get(var_regularizer)
 99 |         self.var_constraint = constraints.get(var_constraint)
100 |         self.supports_masking = True
101 | 
102 |     def build(self, input_shape):
103 |         input_shape = tensor_shape.TensorShape(input_shape)
104 |         for i in range(1, len(input_shape)):
105 |             if tensor_shape.dimension_value(input_shape[i]) is None:
106 |                 raise ValueError('The input shape [1:] should be defined, but found element `None`.')
107 |         if self.use_kernel:
108 |             varName = 'kernel'
109 |         elif self.use_bias:
110 |             varName = 'bias'
111 |         get_in = input_shape.as_list()[1:]
112 |         self.get_var = self.add_weight(
113 |             varName,
114 |             shape=get_in,
115 |             initializer=self.var_initializer,
116 |             regularizer=self.var_regularizer,
117 |             constraint=self.var_constraint,
118 |             dtype=self.dtype,
119 |             trainable=True)
120 |         super(Ghost, self).build(input_shape)
121 | 
122 |     def call(self, inputs):
123 |         inputs = ops.convert_to_tensor(inputs)
124 |         input_shape = K.int_shape(inputs)
125 |         broadcast_shape = [1] + list(input_shape[1:])
126 |         broadcast_var = K.reshape(self.get_var, broadcast_shape)
127 |         if self.use_kernel:
128 |             return broadcast_var * inputs
129 |         elif self.use_bias:
130 |             return broadcast_var + inputs
131 | 
132 |     def compute_output_shape(self, input_shape):
133 |         return input_shape
134 | 
135 |     def get_config(self):
136 |         config = {
137 |             'use_kernel': self.use_kernel,
138 |             'use_bias': self.use_bias,
139 |             'var_initializer': initializers.serialize(self.var_initializer),
140 |             'var_regularizer': regularizers.serialize(self.var_regularizer),
141 |             'var_constraint': constraints.serialize(self.var_constraint)
142 |         }
143 |         base_config = super(Ghost, self).get_config()
144 |         return dict(list(base_config.items()) + list(config.items()))
145 | 
146 | class DenseTied(Layer):
147 |     """Tied densely-connected NN layer.
148 |     `DenseTied` implements the operation:
149 |     `output = activation(dot(input, kernel.T) + bias)`
150 |     where kernel comes from another Dense layer.
151 |     NOTE THAT ALTHOUGH WE HAVE SUCCESSED TO MAKE THIS LAYER SERIALIZABLE,
152 |     IT MAY BE STILL PROBLEMATIC FOR TRAINING ALGORITHM. PLEASE BE CAREFUL
153 |     WHEN USING SUCH KIND OF LAYERS.
154 |     IN MULTIPLE MODELS, THIS INSTANCE MAY CAUSING CONFLICTS BECAUSE IT
155 |     USES GLOBAL VARIABLE NAME TO SERIALIZE CROSSED LAYERS. IT IS
156 |     RECOMMENDED TO SEPARATE NAME SCOPES WHEN USING MULTIPLE MODELS.
157 |     Arguments:
158 |         tied_layer: A Dense layer instance where this layer is tied.
159 |         activation: Activation function to use.
160 |             If you don't specify anything, no activation is applied
161 |             (ie. "linear" activation: `a(x) = x`).
162 |         use_bias: Boolean, whether the layer uses a bias vector.
163 |         bias_initializer: Initializer for the bias vector.
164 |         bias_regularizer: Regularizer function applied to the bias vector.
165 |         activity_regularizer: Regularizer function applied to
166 |             the output of the layer (its "activation")..
167 |         bias_constraint: Constraint function applied to the bias vector.
168 |     Reserved arguments:
169 |         varName, varShape: only used when saving and restoring the layer.
170 |     Input shape:
171 |         nD tensor with shape: `(batch_size, ..., output_dim_of_tied_layer)`.
172 |         The most common situation would be
173 |         a 2D input with shape `(batch_size, output_dim_of_tied_layer)`.
174 |     Output shape:
175 |         nD tensor with shape: `(batch_size, ..., units)`.
176 |         For instance, for a 2D input with shape 
177 |             `(batch_size, input_dim_of_tied_layer)`,
178 |         the output would have shape `(batch_size, input_dim_of_tied_layer)`.
179 |     """
180 |     def __init__(self,
181 |                  tied_layer='',
182 |                  activation=None,
183 |                  use_bias=True,
184 |                  bias_initializer='zeros',
185 |                  bias_regularizer=None,
186 |                  activity_regularizer=None,
187 |                  bias_constraint=None,
188 |                  varName='', varShape=[],
189 |                  **kwargs):
190 |         if 'input_shape' not in kwargs and 'input_dim' in kwargs:
191 |           kwargs['input_shape'] = (kwargs.pop('input_dim'),)
192 | 
193 |         super(DenseTied, self).__init__(
194 |             activity_regularizer=regularizers.get(activity_regularizer), **kwargs)
195 |         if tied_layer != '':
196 |             self.kernelFrom = tied_layer.kernel.name
197 |         self.varName = varName
198 |         self.varShape = varShape
199 |         self.activation = activations.get(activation)
200 |         self.use_bias = use_bias
201 |         self.bias_initializer = initializers.get(bias_initializer)
202 |         self.bias_regularizer = regularizers.get(bias_regularizer)
203 |         self.bias_constraint = constraints.get(bias_constraint)
204 | 
205 |         self.supports_masking = True
206 |         self.input_spec = InputSpec(min_ndim=2)
207 | 
208 |     def build(self, input_shape):
209 |         input_shape = tensor_shape.TensorShape(input_shape)
210 |         if tensor_shape.dimension_value(input_shape[-1]) is None:
211 |             raise ValueError('The last dimension of the inputs to `Dense` '
212 |                            'should be defined. Found `None`.')
213 |         last_dim = tensor_shape.dimension_value(input_shape[-1])
214 |         self.input_spec = InputSpec(min_ndim=2,
215 |                                     axes={-1: last_dim})
216 |         if self.varName == '':
217 |             kernelFrom = list(filter(lambda x:x.name==self.kernelFrom, [op for op in variables.global_variables(scope=None)]))[0]
218 |             self.kernel = K.transpose(kernelFrom)
219 |             self.o_shape = self.kernel.get_shape().as_list()
220 |             self.varName = kernelFrom.name
221 |             self.varShape = kernelFrom.get_shape().as_list()
222 |         else:
223 |             kernelFrom = list(filter(lambda x:x.name==self.varName, [op for op in variables.global_variables(scope=None)]))[0]
224 |             self.kernel = K.transpose(kernelFrom)
225 |             self.o_shape = self.kernel.get_shape().as_list()
226 |         if self.use_bias:
227 |             self.bias = self.add_weight(
228 |                 'bias',
229 |                 shape=[self.o_shape[-1],],
230 |                 initializer=self.bias_initializer,
231 |                 regularizer=self.bias_regularizer,
232 |                 constraint=self.bias_constraint,
233 |                 dtype=self.dtype,
234 |                 trainable=True)
235 |         else:
236 |             self.bias = None
237 |         self.built = True
238 | 
239 |     def call(self, inputs):
240 |         inputs = ops.convert_to_tensor(inputs)
241 |         rank = common_shapes.rank(inputs)
242 |         if rank > 2:
243 |             # Broadcasting is required for the inputs.
244 |             outputs = standard_ops.tensordot(inputs, self.kernel, [[rank - 1], [0]])
245 |             # Reshape the output back to the original ndim of the input.
246 |             if not context.executing_eagerly():
247 |                 shape = inputs.get_shape().as_list()
248 |                 output_shape = shape[:-1] + [self.o_shape]
249 |                 outputs.set_shape(output_shape)
250 |         else:
251 |             outputs = gen_math_ops.mat_mul(inputs, self.kernel)
252 |         if self.use_bias:
253 |             outputs = nn.bias_add(outputs, self.bias)
254 |         if self.activation is not None:
255 |             return self.activation(outputs)  # pylint: disable=not-callable
256 |         return outputs
257 | 
258 |     def compute_output_shape(self, input_shape):
259 |         input_shape = tensor_shape.TensorShape(input_shape)
260 |         input_shape = input_shape.with_rank_at_least(2)
261 |         if tensor_shape.dimension_value(input_shape[-1]) is None:
262 |             raise ValueError(
263 |                 'The innermost dimension of input_shape must be defined, but saw: %s'
264 |                 % input_shape)
265 |         return input_shape[:-1].concatenate(self.o_shape)
266 |     
267 |     def get_config(self):
268 |         config = {
269 |             'tied_layer': '',
270 |             'activation': activations.serialize(self.activation),
271 |             'use_bias': self.use_bias,
272 |             'bias_initializer': initializers.serialize(self.bias_initializer),
273 |             'bias_regularizer': regularizers.serialize(self.bias_regularizer),
274 |             'activity_regularizer':
275 |                 regularizers.serialize(self.activity_regularizer),
276 |             'bias_constraint': constraints.serialize(self.bias_constraint),
277 |             'varName': self.varName, 'varShape': self.varShape
278 |         }
279 |         base_config = super(DenseTied, self).get_config()
280 |         return dict(list(base_config.items()) + list(config.items()))


--------------------------------------------------------------------------------
/layers/deprecated/conv.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Layers - Modern convolutional layers (deprecated)
  4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
  5 | # Yuchen Jin @ cainmagi@gmail.com
  6 | # Requirements: (Pay attention to version)
  7 | #   python 3.6+
  8 | #   tensorflow r1.13+
  9 | # We store the failed versions of APIs for .conv here.
 10 | # Version: 0.10 # 2019/6/7
 11 | # Comments:
 12 | #   A failed try for quick group convolution (QGroupConv), move
 13 | #   it to deprecated.
 14 | ################################################################
 15 | '''
 16 | 
 17 | from tensorflow.python.framework import tensor_shape
 18 | from tensorflow.python.keras import activations
 19 | from tensorflow.python.keras import backend as K
 20 | from tensorflow.python.keras import constraints
 21 | from tensorflow.python.keras import initializers
 22 | from tensorflow.python.keras import regularizers
 23 | from tensorflow.python.keras.utils import conv_utils
 24 | from tensorflow.python.keras.engine.base_layer import Layer
 25 | from tensorflow.python.ops import array_ops
 26 | from tensorflow.python.ops import nn
 27 | from tensorflow.python.ops import nn_ops
 28 | from tensorflow.python.ops import nn_impl
 29 | from tensorflow.python.ops import math_ops
 30 | 
 31 | from tensorflow.keras.layers import BatchNormalization, LeakyReLU, PReLU
 32 | from tensorflow.python.keras.layers.convolutional import Conv, Conv2DTranspose, Conv3DTranspose, UpSampling1D, UpSampling2D, UpSampling3D, ZeroPadding1D, ZeroPadding2D, ZeroPadding3D, Cropping1D, Cropping2D, Cropping3D
 33 | from .normalize import InstanceNormalization, GroupNormalization
 34 | 
 35 | from .. import compat
 36 | if compat.COMPATIBLE_MODE['1.12']:
 37 |     from tensorflow.python.keras.engine.base_layer import InputSpec
 38 | else:
 39 |     from tensorflow.python.keras.engine.input_spec import InputSpec
 40 | 
 41 | NEW_CONV_TRANSPOSE = True
 42 | USE_QUICK_GCONV = False
 43 | 
 44 | def _get_macro_conv(key='NEW_CONV_TRANSPOSE'):
 45 |     if key == 'USE_QUICK_GCONV':
 46 |         return USE_QUICK_GCONV
 47 |     else:
 48 |         return NEW_CONV_TRANSPOSE
 49 | 
 50 | class _QGroupConv(_GroupConv):
 51 |     """Quick computing version for abstract nD group convolution layer.
 52 |     This is the quick computing version of the convolution.
 53 |     The work flow of `GroupConv` could be viewed as
 54 |         output = concat (i=1~G) ( convND(input[group_i]) )
 55 |     which means if we have G groups, we need to compute the `convND` op for G times.
 56 |     The original implementation calls operator `convND` for many times, which is
 57 |     inefficient. To solve this problem, we use such a work flow:
 58 |         output = sum (i=1~G) ( depth_convND(input)[group_i] )
 59 |     The difference is, we only need to call `depth_convND` (tf.nn.depthwise_conv2d) 
 60 |     once. Furthermore, if we apply tf.reshape and tf.sum, we could also calculate 
 61 |     the sum operator once. This is why we could use the above method to improve the
 62 |     efficiency.
 63 |     However, since there is only tf.nn.depthwise_conv2d in tensorflow, we could not
 64 |     use it to calculate GroupConv3D. But we could still calculate GroupConv1D by
 65 |     reducing the 2D convolution to 1D case.
 66 |     To learn more about group convolution, see the docstring for `GroupConv`.
 67 |     Arguments:
 68 |         rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution.
 69 |               (rank > 2 is not allowed.)
 70 |         lgroups: Integer, the group number of the latent convolution branch. The
 71 |             number of filters in the whole latent space is lgroups * lfilters.
 72 |         lfilters: Integer, the dimensionality in each the lattent group (i.e. the
 73 |             number of filters in each latent convolution branch).
 74 |         kernel_size: An integer or tuple/list of n integers, specifying the
 75 |             length of the convolution window.
 76 |         strides: An integer or tuple/list of n integers,
 77 |             specifying the stride length of the convolution.
 78 |             Specifying any stride value != 1 is incompatible with specifying
 79 |             any `dilation_rate` value != 1.
 80 |         padding: One of `"valid"` or `"same"` (case-insensitive).
 81 |         data_format: A string, one of `channels_last` (default) or `channels_first`.
 82 |             The ordering of the dimensions in the inputs.
 83 |             `channels_last` corresponds to inputs with shape
 84 |             `(batch, ..., channels)` while `channels_first` corresponds to
 85 |             inputs with shape `(batch, channels, ...)`.
 86 |         dilation_rate: An integer or tuple/list of n integers, specifying
 87 |             the dilation rate to use for dilated convolution.
 88 |             Currently, specifying any `dilation_rate` value != 1 is
 89 |             incompatible with specifying any `strides` value != 1.
 90 |         activation: Activation function. Set it to None to maintain a
 91 |             linear activation.
 92 |         use_bias: Boolean, whether the layer uses a bias.
 93 |         kernel_initializer: An initializer for the convolution kernel.
 94 |         bias_initializer: An initializer for the bias vector. If None, the default
 95 |             initializer will be used.
 96 |         kernel_regularizer: Optional regularizer for the convolution kernel.
 97 |         bias_regularizer: Optional regularizer for the bias vector.
 98 |         activity_regularizer: Optional regularizer function for the output.
 99 |         kernel_constraint: Optional projection function to be applied to the
100 |             kernel after being updated by an `Optimizer` (e.g. used to implement
101 |             norm constraints or value constraints for layer weights). The function
102 |             must take as input the unprojected variable and must return the
103 |             projected variable (which must have the same shape). Constraints are
104 |             not safe to use when doing asynchronous distributed training.
105 |         bias_constraint: Optional projection function to be applied to the
106 |             bias after being updated by an `Optimizer`.
107 |         trainable: Boolean, if `True` also add variables to the graph collection
108 |             `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
109 |         name: A string, the name of the layer.
110 |     """
111 | 
112 |     def __init__(self, rank,
113 |                  lgroups,
114 |                  lfilters,
115 |                  kernel_size,
116 |                  strides=1,
117 |                  padding='valid',
118 |                  data_format=None,
119 |                  dilation_rate=1,
120 |                  activation=None,
121 |                  use_bias=True,
122 |                  kernel_initializer='glorot_uniform',
123 |                  bias_initializer='zeros',
124 |                  kernel_regularizer=None,
125 |                  bias_regularizer=None,
126 |                  activity_regularizer=None,
127 |                  kernel_constraint=None,
128 |                  bias_constraint=None,
129 |                  trainable=True,
130 |                  name=None,
131 |                  **kwargs):
132 |         super(_GroupConv, self).__init__(
133 |                 trainable=trainable,
134 |                 name=name,
135 |                 activity_regularizer=regularizers.get(activity_regularizer),
136 |                 **kwargs)
137 |         self.rank = rank
138 |         if rank > 2:
139 |             raise ValueError('The quick group convolution does not support 3D or any higher dimension.')
140 |         initRank = rank
141 |         self.lgroups = lgroups
142 |         self.lfilters = lfilters
143 |         self.kernel_size = conv_utils.normalize_tuple(
144 |                 kernel_size, rank, 'kernel_size')
145 |         self.strides = conv_utils.normalize_tuple(strides, rank, 'strides')
146 |         self.padding = conv_utils.normalize_padding(padding)
147 |         if (self.padding == 'causal' and not isinstance(self, (Conv1D, SeparableConv1D))):
148 |             raise ValueError('Causal padding is only supported for `Conv1D` and ``SeparableConv1D`.')
149 |         self.data_format = conv_utils.normalize_data_format(data_format)
150 |         self.dilation_rate = conv_utils.normalize_tuple(
151 |              dilation_rate, rank, 'dilation_rate')
152 |         if rank == 1: # when rank=1, expand the tuples to 2D case.
153 |             self.kernel_size = (1, *self.kernel_size)
154 |             self.strides = (1, *self.strides)
155 |             self.dilation_rate = (1, *self.dilation_rate)
156 |         self.activation = activations.get(activation)
157 |         self.use_bias = use_bias
158 |         self.kernel_initializer = initializers.get(kernel_initializer)
159 |         self.bias_initializer = initializers.get(bias_initializer)
160 |         self.kernel_regularizer = regularizers.get(kernel_regularizer)
161 |         self.bias_regularizer = regularizers.get(bias_regularizer)
162 |         self.kernel_constraint = constraints.get(kernel_constraint)
163 |         self.bias_constraint = constraints.get(bias_constraint)
164 |         self.input_spec = InputSpec(ndim=self.rank + 2)
165 | 
166 |         self.group_input_dim = None
167 |         self.exp_dim_pos = None
168 | 
169 |     def build(self, input_shape):
170 |         input_shape = tensor_shape.TensorShape(input_shape)
171 |         if self.data_format == 'channels_first':
172 |             channel_axis = 1
173 |             self._data_format = 'NCHW'
174 |             if self.rank == 1:
175 |                 self.exp_dim_pos = 2
176 |         else:
177 |             channel_axis = -1
178 |             if self.rank == 1:
179 |                 self.exp_dim_pos = 1
180 |             self._data_format = 'NHWC'
181 |         if input_shape.dims[channel_axis].value is None:
182 |             raise ValueError('The channel dimension of the inputs should be defined. Found `None`.')
183 |         input_dim = int(input_shape[channel_axis])
184 |         if input_dim % self.lgroups != 0:
185 |             raise ValueError('To grouplize the input channels, the input channel number should be a multiple of group number (N*{0}), but given {1}'.format(self.lgroups, input_dim))
186 |         self.group_input_dim = input_dim // self.lgroups
187 |         self._strides = (1, *self.strides, 1)
188 |         kernel_shape = self.kernel_size + (input_dim, self.lfilters)
189 | 
190 |         self.kernel = self.add_weight(
191 |                 name='kernel',
192 |                 shape=kernel_shape,
193 |                 initializer=self.kernel_initializer,
194 |                 regularizer=self.kernel_regularizer,
195 |                 constraint=self.kernel_constraint,
196 |                 trainable=True,
197 |                 dtype=self.dtype)
198 |         if self.use_bias:
199 |             self.bias = self.add_weight(
200 |                 name='bias',
201 |                 shape=(self.lfilters * self.lgroups,),
202 |                 initializer=self.bias_initializer,
203 |                 regularizer=self.bias_regularizer,
204 |                 constraint=self.bias_constraint,
205 |                 trainable=True,
206 |                 dtype=self.dtype)
207 |         else:
208 |             self.bias = None
209 |         self.input_spec = InputSpec(ndim=self.rank + 2, axes={channel_axis: input_dim})
210 |         if self.padding == 'causal':
211 |             self.op_padding = 'valid'
212 |         else:
213 |             self.op_padding = self.padding
214 |         self.built = True
215 | 
216 |     def call(self, inputs):
217 |         if self.rank == 1:
218 |             inputs = array_ops.expand_dims(inputs, axis=self.exp_dim_pos)
219 |         outputs= nn_impl.depthwise_conv2d(input=inputs,
220 |                                           filter=self.kernel,
221 |                                           strides=self._strides,
222 |                                           padding=self.op_padding.upper(),
223 |                                           rate=self.dilation_rate,
224 |                                           data_format=self._data_format)
225 |         # Grouplize the output channels.
226 |         r2_outputs_shape = outputs.get_shape().as_list()
227 |         if self.data_format == 'channels_first':
228 |             #get_oshape = r2_outputs_shape[:1].concatenate([self.lgroups*self.lfilters, self.group_input_dim]).concatenate(r2_outputs_shape[2:])
229 |             get_oshape = [-1, self.lgroups*self.lfilters, self.group_input_dim, *r2_outputs_shape[2:]]
230 |             outputs = array_ops.reshape(outputs,  get_oshape)
231 |             outputs = math_ops.reduce_sum(outputs, axis=1, keepdims=False)
232 |         else:
233 |             #get_oshape = r2_outputs_shape[:-1].concatenate([self.lgroups*self.lfilters, self.group_input_dim])
234 |             get_oshape = [-1, *r2_outputs_shape[1:-1], self.lgroups*self.lfilters, self.group_input_dim]
235 |             outputs = array_ops.reshape(outputs, get_oshape)
236 |             outputs = math_ops.reduce_sum(outputs, axis=-1, keepdims=False)
237 |         if self.rank == 1:
238 |             outputs = array_ops.squeeze(outputs, axis=self.exp_dim_pos)
239 |         outputs_list = []
240 | 
241 |         if self.use_bias:
242 |             if self.data_format == 'channels_first':
243 |                 if self.rank == 1:
244 |                     # nn.bias_add does not accept a 1D input tensor.
245 |                     bias = array_ops.reshape(self.bias, (1, self.lfilters * self.lgroups, 1))
246 |                     outputs += bias
247 |                 if self.rank == 2:
248 |                     outputs = nn.bias_add(outputs, self.bias, data_format='NCHW')
249 |             else:
250 |                 outputs = nn.bias_add(outputs, self.bias, data_format='NHWC')
251 | 
252 |         if self.activation is not None:
253 |             return self.activation(outputs)
254 |         return outputs


--------------------------------------------------------------------------------
/layers/normalize.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Layers - Extended normalization layers
  4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
  5 | # Yuchen Jin @ cainmagi@gmail.com
  6 | # Requirements: (Pay attention to version)
  7 | #   python 3.6+
  8 | #   tensorflow r1.13+
  9 | # Extend the normalization with instance normalization and 
 10 | # layer normalization.
 11 | # See here to learn the differences between different kinds of
 12 | # normalizations:
 13 | #     https://arxiv.org/abs/1803.08494
 14 | # Version: 0.11 # 2019/3/27
 15 | # Comments:
 16 | #   Add compatible support.
 17 | # Version: 0.10 # 2019/3/24
 18 | # Comments:
 19 | #   Create this submodule.
 20 | ################################################################
 21 | '''
 22 | 
 23 | from tensorflow.python.keras import backend as K
 24 | from tensorflow.python.keras import constraints
 25 | from tensorflow.python.keras import initializers
 26 | from tensorflow.python.keras import regularizers
 27 | from tensorflow.python.keras.engine.base_layer import Layer
 28 | from tensorflow.python.ops import nn_impl
 29 | 
 30 | from .. import compat
 31 | if compat.COMPATIBLE_MODE['1.12']:
 32 |     from tensorflow.python.keras.engine.base_layer import InputSpec
 33 | else:
 34 |     from tensorflow.python.keras.engine.input_spec import InputSpec
 35 | 
 36 | class InstanceNormalization(Layer):
 37 |     """Instance normalization layer.
 38 |     This layer is borrorwed from
 39 |         https://github.com/keras-team/keras-contrib/blob/master/
 40 |                 keras_contrib/layers/normalization/instancenormalization.py
 41 |     Normalize the activations of the previous layer at each step,
 42 |     i.e. applies a transformation that maintains the mean activation
 43 |     close to 0 and the activation standard deviation close to 1.
 44 |     Arguments:
 45 |         axis: Integer, the axis that should be normalized
 46 |             (typically the features axis).
 47 |             For instance, after a `Conv2D` layer with
 48 |             `data_format="channels_first"`,
 49 |             set `axis=1` in `InstanceNormalization` (Instance Normalization).
 50 |             Setting `axis=None` will normalize all values in each
 51 |             instance of the batch （Layer Normalization）.
 52 |             Axis 0 is the batch dimension. `axis` cannot be set to 0 to avoid errors.
 53 |         epsilon: Small float added to variance to avoid dividing by zero.
 54 |         center: If True, add offset of `beta` to normalized tensor.
 55 |             If False, `beta` is ignored.
 56 |         scale: If True, multiply by `gamma`.
 57 |             If False, `gamma` is not used.
 58 |             When the next layer is linear (also e.g. `nn.relu`),
 59 |             this can be disabled since the scaling
 60 |             will be done by the next layer.
 61 |         beta_initializer: Initializer for the beta weight.
 62 |         gamma_initializer: Initializer for the gamma weight.
 63 |         beta_regularizer: Optional regularizer for the beta weight.
 64 |         gamma_regularizer: Optional regularizer for the gamma weight.
 65 |         beta_constraint: Optional constraint for the beta weight.
 66 |         gamma_constraint: Optional constraint for the gamma weight.
 67 |     Input shape:
 68 |         Arbitrary. Use the keyword argument `input_shape`
 69 |         (tuple of integers, does not include the samples axis)
 70 |         when using this layer as the first layer in a Sequential model.
 71 |     Output shape:
 72 |         Same shape as input.
 73 |     References:
 74 |         - [Layer Normalization](https://arxiv.org/abs/1607.06450)
 75 |         - [Instance Normalization: The Missing Ingredient for Fast Stylization](
 76 |         https://arxiv.org/abs/1607.08022)
 77 |     """
 78 |     def __init__(self,
 79 |                  axis=None,
 80 |                  epsilon=1e-3,
 81 |                  center=True,
 82 |                  scale=True,
 83 |                  beta_initializer='zeros',
 84 |                  gamma_initializer='ones',
 85 |                  beta_regularizer=None,
 86 |                  gamma_regularizer=None,
 87 |                  beta_constraint=None,
 88 |                  gamma_constraint=None,
 89 |                  **kwargs):
 90 |         super(InstanceNormalization, self).__init__(**kwargs)
 91 |         self.supports_masking = True
 92 |         self.axis = axis
 93 |         self.epsilon = epsilon
 94 |         self.center = center
 95 |         self.scale = scale
 96 |         self.beta_initializer = initializers.get(beta_initializer)
 97 |         self.gamma_initializer = initializers.get(gamma_initializer)
 98 |         self.beta_regularizer = regularizers.get(beta_regularizer)
 99 |         self.gamma_regularizer = regularizers.get(gamma_regularizer)
100 |         self.beta_constraint = constraints.get(beta_constraint)
101 |         self.gamma_constraint = constraints.get(gamma_constraint)
102 | 
103 |     def build(self, input_shape):
104 |         ndim = len(input_shape)
105 |         if self.axis == 0:
106 |             raise ValueError('Axis cannot be zero')
107 | 
108 |         if (self.axis is not None) and (ndim == 2):
109 |             raise ValueError('Cannot specify axis for rank 1 tensor')
110 | 
111 |         self.input_spec = InputSpec(ndim=ndim)
112 | 
113 |         if self.axis is None:
114 |             shape = (1,)
115 |         else:
116 |             shape = (input_shape[self.axis],)
117 | 
118 |         if self.scale:
119 |             self.gamma = self.add_weight(shape=shape,
120 |                                          name='gamma',
121 |                                          initializer=self.gamma_initializer,
122 |                                          regularizer=self.gamma_regularizer,
123 |                                          constraint=self.gamma_constraint)
124 |         else:
125 |             self.gamma = None
126 |         if self.center:
127 |             self.beta = self.add_weight(shape=shape,
128 |                                         name='beta',
129 |                                         initializer=self.beta_initializer,
130 |                                         regularizer=self.beta_regularizer,
131 |                                         constraint=self.beta_constraint)
132 |         else:
133 |             self.beta = None
134 |         self.built = True
135 | 
136 |     def call(self, inputs, training=None):
137 |         input_shape = K.int_shape(inputs)
138 |         reduction_axes = list(range(0, len(input_shape)))
139 | 
140 |         if self.axis is not None:
141 |             del reduction_axes[self.axis]
142 | 
143 |         del reduction_axes[0]
144 | 
145 |         mean = K.mean(inputs, reduction_axes, keepdims=True)
146 |         stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon
147 |         normed = (inputs - mean) / stddev
148 | 
149 |         broadcast_shape = [1] * len(input_shape)
150 |         if self.axis is not None:
151 |             broadcast_shape[self.axis] = input_shape[self.axis]
152 | 
153 |         if self.scale:
154 |             broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
155 |             normed = normed * broadcast_gamma
156 |         if self.center:
157 |             broadcast_beta = K.reshape(self.beta, broadcast_shape)
158 |             normed = normed + broadcast_beta
159 |         return normed
160 |         
161 |     def compute_output_shape(self, input_shape):
162 |         return input_shape
163 | 
164 |     def get_config(self):
165 |         config = {
166 |             'axis': self.axis,
167 |             'epsilon': self.epsilon,
168 |             'center': self.center,
169 |             'scale': self.scale,
170 |             'beta_initializer': initializers.serialize(self.beta_initializer),
171 |             'gamma_initializer': initializers.serialize(self.gamma_initializer),
172 |             'beta_regularizer': regularizers.serialize(self.beta_regularizer),
173 |             'gamma_regularizer': regularizers.serialize(self.gamma_regularizer),
174 |             'beta_constraint': constraints.serialize(self.beta_constraint),
175 |             'gamma_constraint': constraints.serialize(self.gamma_constraint)
176 |         }
177 |         base_config = super(InstanceNormalization, self).get_config()
178 |         return dict(list(base_config.items()) + list(config.items()))
179 |         
180 | class GroupNormalization(Layer):
181 |     """Group normalization layer.
182 |     This layer is borrorwed from
183 |         https://github.com/keras-team/keras-contrib/blob/master/
184 |                 keras_contrib/layers/normalization/groupnormalization.py
185 |     Group Normalization divides the channels into groups and computes
186 |     within each group
187 |     the mean and variance for normalization.
188 |     Group Normalization's computation is independent
189 |      of batch sizes, and its accuracy is stable in a wide range of batch sizes.
190 |     Relation to Layer Normalization:
191 |     If the number of groups is set to 1, then this operation becomes identical to
192 |     Layer Normalization.
193 |     Relation to Instance Normalization:
194 |     If the number of groups is set to the
195 |     input dimension (number of groups is equal
196 |     to number of channels), then this operation becomes
197 |     identical to Instance Normalization.
198 |     Arguments:
199 |         groups: Integer, the number of groups for Group Normalization.
200 |             Can be in the range [1, N] where N is the input dimension.
201 |             The input dimension must be divisible by the number of groups.
202 |         axis: Integer, the axis that should be normalized
203 |             (typically the features axis).
204 |             For instance, after a `Conv2D` layer with
205 |             `data_format="channels_first"`,
206 |             set `axis=1` in `BatchNormalization`.
207 |         epsilon: Small float added to variance to avoid dividing by zero.
208 |         center: If True, add offset of `beta` to normalized tensor.
209 |             If False, `beta` is ignored.
210 |         scale: If True, multiply by `gamma`.
211 |             If False, `gamma` is not used.
212 |             When the next layer is linear (also e.g. `nn.relu`),
213 |             this can be disabled since the scaling
214 |             will be done by the next layer.
215 |         beta_initializer: Initializer for the beta weight.
216 |         gamma_initializer: Initializer for the gamma weight.
217 |         beta_regularizer: Optional regularizer for the beta weight.
218 |         gamma_regularizer: Optional regularizer for the gamma weight.
219 |         beta_constraint: Optional constraint for the beta weight.
220 |         gamma_constraint: Optional constraint for the gamma weight.
221 |     Input shape:
222 |         Arbitrary. Use the keyword argument `input_shape`
223 |         (tuple of integers, does not include the samples axis)
224 |         when using this layer as the first layer in a model.
225 |     Output shape:
226 |         Same shape as input.
227 |     References:
228 |         - [Group Normalization](https://arxiv.org/abs/1803.08494)
229 |     """
230 | 
231 |     def __init__(self,
232 |                  groups=32,
233 |                  axis=-1,
234 |                  epsilon=1e-5,
235 |                  center=True,
236 |                  scale=True,
237 |                  beta_initializer='zeros',
238 |                  gamma_initializer='ones',
239 |                  beta_regularizer=None,
240 |                  gamma_regularizer=None,
241 |                  beta_constraint=None,
242 |                  gamma_constraint=None,
243 |                  **kwargs):
244 |         super(GroupNormalization, self).__init__(**kwargs)
245 |         self.supports_masking = True
246 |         self.groups = groups
247 |         self.axis = axis
248 |         self.epsilon = epsilon
249 |         self.center = center
250 |         self.scale = scale
251 |         self.beta_initializer = initializers.get(beta_initializer)
252 |         self.gamma_initializer = initializers.get(gamma_initializer)
253 |         self.beta_regularizer = regularizers.get(beta_regularizer)
254 |         self.gamma_regularizer = regularizers.get(gamma_regularizer)
255 |         self.beta_constraint = constraints.get(beta_constraint)
256 |         self.gamma_constraint = constraints.get(gamma_constraint)
257 | 
258 |     def build(self, input_shape):
259 |         dim = input_shape[self.axis]
260 | 
261 |         if dim is None:
262 |             raise ValueError('Axis ' + str(self.axis) + ' of '
263 |                              'input tensor should have a defined dimension '
264 |                              'but the layer received an input with shape ' +
265 |                              str(input_shape) + '.')
266 | 
267 |         if dim < self.groups:
268 |             raise ValueError('Number of groups (' + str(self.groups) + ') cannot be '
269 |                              'more than the number of channels (' +
270 |                              str(dim) + ').')
271 | 
272 |         if dim % self.groups != 0:
273 |             raise ValueError('Number of groups (' + str(self.groups) + ') must be a '
274 |                              'multiple of the number of channels (' +
275 |                              str(dim) + ').')
276 | 
277 |         self.input_spec = InputSpec(ndim=len(input_shape),
278 |                                     axes={self.axis: dim})
279 |         shape = (dim,)
280 | 
281 |         if self.scale:
282 |             self.gamma = self.add_weight(shape=shape,
283 |                                          name='gamma',
284 |                                          initializer=self.gamma_initializer,
285 |                                          regularizer=self.gamma_regularizer,
286 |                                          constraint=self.gamma_constraint)
287 |         else:
288 |             self.gamma = None
289 |         if self.center:
290 |             self.beta = self.add_weight(shape=shape,
291 |                                         name='beta',
292 |                                         initializer=self.beta_initializer,
293 |                                         regularizer=self.beta_regularizer,
294 |                                         constraint=self.beta_constraint)
295 |         else:
296 |             self.beta = None
297 |         self.built = True
298 | 
299 |     def call(self, inputs, **kwargs):
300 |         input_shape = K.int_shape(inputs)
301 |         tensor_input_shape = K.shape(inputs)
302 | 
303 |         # Prepare broadcasting shape.
304 |         reduction_axes = list(range(len(input_shape)))
305 |         del reduction_axes[self.axis]
306 |         broadcast_shape = [1] * len(input_shape)
307 |         broadcast_shape[self.axis] = input_shape[self.axis] // self.groups
308 |         broadcast_shape.insert(1, self.groups)
309 | 
310 |         reshape_group_shape = K.shape(inputs)
311 |         group_axes = [reshape_group_shape[i] for i in range(len(input_shape))]
312 |         group_axes[self.axis] = input_shape[self.axis] // self.groups
313 |         group_axes.insert(1, self.groups)
314 | 
315 |         # reshape inputs to new group shape
316 |         group_shape = [group_axes[0], self.groups] + group_axes[2:]
317 |         group_shape = K.stack(group_shape)
318 |         inputs = K.reshape(inputs, group_shape)
319 | 
320 |         group_reduction_axes = list(range(len(group_axes)))
321 |         mean, variance = nn_impl.moments(inputs, group_reduction_axes[2:], shift=None, keep_dims=True)
322 |         inputs = (inputs - mean) / (K.sqrt(variance + self.epsilon))
323 | 
324 |         # prepare broadcast shape
325 |         inputs = K.reshape(inputs, group_shape)
326 | 
327 |         outputs = inputs
328 | 
329 |         # In this case we must explicitly broadcast all parameters.
330 |         if self.scale:
331 |             broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
332 |             outputs = outputs * broadcast_gamma
333 | 
334 |         if self.center:
335 |             broadcast_beta = K.reshape(self.beta, broadcast_shape)
336 |             outputs = outputs + broadcast_beta
337 | 
338 |         # finally we reshape the output back to the input shape
339 |         outputs = K.reshape(outputs, tensor_input_shape)
340 | 
341 |         return outputs
342 | 
343 |     def get_config(self):
344 |         config = {
345 |             'groups': self.groups,
346 |             'axis': self.axis,
347 |             'epsilon': self.epsilon,
348 |             'center': self.center,
349 |             'scale': self.scale,
350 |             'beta_initializer': initializers.serialize(self.beta_initializer),
351 |             'gamma_initializer': initializers.serialize(self.gamma_initializer),
352 |             'beta_regularizer': regularizers.serialize(self.beta_regularizer),
353 |             'gamma_regularizer': regularizers.serialize(self.gamma_regularizer),
354 |             'beta_constraint': constraints.serialize(self.beta_constraint),
355 |             'gamma_constraint': constraints.serialize(self.gamma_constraint)
356 |         }
357 |         base_config = super(GroupNormalization, self).get_config()
358 |         return dict(list(base_config.items()) + list(config.items()))
359 | 
360 |     def compute_output_shape(self, input_shape):
361 |         return input_shape


--------------------------------------------------------------------------------
/utilities/tboard.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Utilities - Extended tensorboard tools
  4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
  5 | # Yuchen Jin @ cainmagi@gmail.com
  6 | # Requirements: (Pay attention to version)
  7 | #   python 3.6+
  8 | #   tensorflow r1.13+
  9 | # Extended tools for parsing the logs in the tensorboard file.
 10 | # It enables users to extract records from tensorboard without
 11 | # launching the web interface. It also provides a python func-
 12 | # tion for launching the web interface.
 13 | # Version: 0.22 # 2019/12/05
 14 | # Comments:
 15 | #   1. Minor change for provdiding verbose option to
 16 | #      TensorLogHandle.tohdf5.
 17 | # Version: 0.20 # 2019/11/27
 18 | # Comments:
 19 | #   1. Finish TensorLogHandle. It may be updated in future
 20 | #      versions.
 21 | #   2. Fix a minor bug for TensorLogHandle.tohdf5.
 22 | # Version: 0.10 # 2019/11/26
 23 | # Comments:
 24 | #   Create this submodule and finish TensorBoardTool, launch.
 25 | ################################################################
 26 | '''
 27 | 
 28 | import os, sys, logging
 29 | import numpy as np
 30 | import h5py
 31 | from tensorboard import default
 32 | from tensorboard import program
 33 | from tensorboard.backend.event_processing import event_accumulator
 34 | 
 35 | class TensorBoardTool:
 36 |     '''Tensorboard web interface launcher.
 37 |     Adapted from the original work here:
 38 |         https://stackoverflow.com/a/52295534
 39 |     This class is equivalent to call launch() in this module.
 40 |     Arguments:
 41 |         log_dir: the path where we store the logs.
 42 |         ip [optional]: the IP address for the web interface.
 43 |         port [optional]: the port number for the web interface.
 44 |     '''
 45 |     def __init__(self, log_dir, ip=None, port=None):
 46 |         '''Initialization
 47 |         see the docstring of this class.
 48 |         '''
 49 |         self.log_dir = log_dir
 50 |         self.ip = ip
 51 |         self.port = port
 52 | 
 53 |     def __collect_argvs(self):
 54 |         argvs = [None, '--logdir', str(self.log_dir)]
 55 |         if self.ip:
 56 |             argvs.extend(['--host', str(self.ip)])
 57 |         if self.port:
 58 |             argvs.extend(['--port', str(self.port)])
 59 |         return argvs
 60 | 
 61 |     def run(self):
 62 |         '''Launch the tensorboard.
 63 |         Note that this method would not block the main thread, we
 64 |         suggest to use launch() instead of this when you do not need
 65 |         to work with subthread.
 66 |         '''
 67 |         program.setup_environment()
 68 |         # Remove http messages
 69 |         log = logging.getLogger('werkzeug').setLevel(logging.ERROR)
 70 |         # Start tensorboard server
 71 |         _tb = program.TensorBoard(
 72 |             default.get_plugins(),
 73 |             program.get_default_assets_zip_provider())
 74 |         _tb.configure(argv=self.__collect_argvs())
 75 |         url = _tb.launch()
 76 |         print('TensorBoard at {0}, working on path: {1}.'.format(url, self.log_dir))
 77 | 
 78 | class TensorLogHandle:
 79 |     '''Read a tensorboard log file.
 80 |     This is a dictionary-lite interface for parsing a tensorboard
 81 |     file. It manages a EventAccumulator and wrap it with key-driven
 82 |     interfaces.
 83 |     Sometimes the handle may be slow, this is caused by the backend
 84 |     EventAccumulator. A possible way for solving this problem is
 85 |     passing a size guide during the initialization, but this sugge-
 86 |     stion could not guarantee the efficiency.
 87 |     Arguments:
 88 |         path: A file path to a directory containing tf events
 89 |             files, or a single tf events file. The accumulator
 90 |             will load events from this path.
 91 |         mode: The default working mode. Should be one of the
 92 |             avaliable list:
 93 |             (1) scalars (2) images (3) audio (4) histograms
 94 |             (5) distributions (6) tensors (7) metadata
 95 |         size_guidance: Information on how much data the
 96 |             EventAccumulator should store in memory. The 
 97 |             DEFAULT_SIZE_GUIDANCE tries not to store too much so as
 98 |             to avoid OOMing the client. The size_guidance should be
 99 |             a map from a `tagType` string to an integer representing
100 |             the number of items to keep per tag for items of that
101 |             `tagType`. If the size is 0, all events are stored.
102 |         compression_bps: Information on how the `EventAccumulator`
103 |             should compress histogram data for the
104 |             `CompressedHistograms` tag (for details see
105 |             `ProcessCompressedHistogram`).
106 |         purge_orphaned_data: Whether to discard any events that
107 |             were "orphaned" by a TensorFlow restart.
108 |     '''
109 |     MODE_LIST = {'scalars':event_accumulator.SCALARS,
110 |                  'images': event_accumulator.IMAGES,
111 |                  'audio': event_accumulator.AUDIO,
112 |                  'histograms': event_accumulator.HISTOGRAMS,
113 |                  'distributions': event_accumulator.COMPRESSED_HISTOGRAMS,
114 |                  'tensors': event_accumulator.TENSORS,
115 |                  'metadata': event_accumulator.RUN_METADATA}
116 | 
117 |     def __init__(self, path, mode='scalars', size_guidance=None,
118 |                  compression_bps=event_accumulator.NORMAL_HISTOGRAM_BPS,
119 |                  purge_orphaned_data=True):
120 |         '''Initialization
121 |         see the docstring of this class.
122 |         '''
123 |         self.__curMode = None
124 |         self.setDefaultMode(mode)
125 |         self.accumulator = event_accumulator.EventAccumulator(path=path, 
126 |             size_guidance=size_guidance, compression_bps=compression_bps,
127 |             purge_orphaned_data=purge_orphaned_data)
128 |         self.accumulator.Reload()
129 |         self.__keys = self.accumulator.Tags()
130 | 
131 |     def setDefaultMode(self, mode):
132 |         '''Set the default working mode.
133 |         Arguments:
134 |             mode: The default mode, should be chosen from the avaliable
135 |                 list:
136 |                 (1) scalars (2) images (3) audio (4) histograms
137 |                 (5) tensors
138 |         '''
139 |         if self.__checkMode(mode):
140 |             self.__curMode = self.MODE_LIST[mode]
141 |         else:
142 |             raise KeyError('Should choose mode from: {0}.'.format(self.MODE_LIST.keys()))
143 |     
144 |     @classmethod
145 |     def __checkMode(cls, mode):
146 |         return mode in cls.MODE_LIST
147 | 
148 |     def __contains__(self, key):
149 |         return key in self.__keys[self.__curMode]
150 | 
151 |     def __getitem__(self, key):
152 |         try:
153 |             if isinstance(key, tuple) and len(key) == 2:
154 |                 if not (key[1] in self.__keys[self.MODE_LIST[key[0]]]):
155 |                     raise KeyError
156 |                 return self.__getval(self.MODE_LIST[key[0]], key[1])
157 |             else:
158 |                 if not (key in self.__keys[self.__curMode]):
159 |                     raise KeyError
160 |                 return self.__getval(self.__curMode, key)
161 |         except KeyError:
162 |             raise KeyError('Could not find the item: {0}.'.format(key))
163 | 
164 |     def __len__(self):
165 |         return len(self.__keys[self.__curMode])
166 | 
167 |     def __bool__(self):
168 |         return bool(self.__keys[self.__curMode])
169 | 
170 |     def __iter__(self):
171 |         return iter(self.__keys[self.__curMode])
172 | 
173 |     def keys(self, mode=None):
174 |         '''Get all avaliable keys.
175 |         Arguments:
176 |             mode: The working mode, if not specified, would use
177 |                 default mode.
178 |         '''
179 |         if mode is not None:
180 |             if not self.__checkMode(mode):
181 |                 raise KeyError('The specified mode is invalid, should choose from {0}.'.format(self.MODE_LIST.keys()))
182 |             return iter(self.__keys[self.MODE_LIST[mode]])
183 |         else:
184 |             return iter(self.__keys[self.__curMode])
185 | 
186 |     def items(self, mode=None):
187 |         '''Get all avaliable (k, v) pairs.
188 |         Arguments:
189 |             mode: The working mode, if not specified, would use
190 |                 default mode.
191 |         '''
192 |         if mode is not None:
193 |             if not self.__checkMode(mode):
194 |                 raise KeyError('The specified mode is invalid, should choose from {0}.'.format(self.MODE_LIST.keys()))
195 |             return map(lambda key: (key, self.__getval(self.MODE_LIST[mode], key)), self.__keys[self.MODE_LIST[mode]])
196 |         else:
197 |             return map(lambda key: (key, self.__getval(self.__curMode, key)), self.__keys[self.__curMode])
198 |     
199 |     def values(self, mode=None):
200 |         '''Get all avaliable values.
201 |         Arguments:
202 |             mode: The working mode, if not specified, would use
203 |                 default mode.
204 |         '''
205 |         if mode is not None:
206 |             if not self.__checkMode(mode):
207 |                 raise KeyError('The specified mode is invalid, should choose from {0}.'.format(self.MODE_LIST.keys()))
208 |             return map(lambda key: self.__getval(mode, key), self.__keys[self.MODE_LIST[mode]])
209 |         else:
210 |             return map(lambda key: self.__getval(self.__curMode, key), self.__keys[self.__curMode])
211 | 
212 |     def __getval(self, mode, key):
213 |         '''Protected function for getting item.
214 |         Should not be called by users.
215 |         '''
216 |         if mode == event_accumulator.SCALARS:
217 |             return self.__parserScalar(self.accumulator.Scalars(key))
218 |         elif mode == event_accumulator.IMAGES:
219 |             return self.accumulator.Images(key)
220 |         elif mode == event_accumulator.AUDIO:
221 |             return self.accumulator.Audio(key)
222 |         elif mode == event_accumulator.HISTOGRAMS:
223 |             return self.__parserHistogram(self.accumulator.Histograms(key))
224 |         elif mode == event_accumulator.COMPRESSED_HISTOGRAMS:
225 |             return self.__parserDistribution(self.accumulator.CompressedHistograms(key))
226 |         elif mode == event_accumulator.RUN_METADATA:
227 |             return self.accumulator.RunMetadata(key)
228 |         elif mode == event_accumulator.TENSORS:
229 |             return self.accumulator.Tensors(key)
230 |         else:
231 |             raise KeyError('The specified mode is invalid.')
232 | 
233 |     @staticmethod
234 |     def __parserScalar(scalars):
235 |         '''Parse the scalar list, and arrange the results.'''
236 |         resDict = dict()
237 |         if not scalars:
238 |             return resDict
239 |         else:
240 |             for k in scalars[0]._asdict():
241 |                 resDict[k] = []
242 |         for i in scalars:
243 |             for k, v in i._asdict().items():
244 |                 resDict[k].append(v)
245 |         for k, v in resDict.items():
246 |             resDict[k] = np.asarray(v, dtype=np.float32)
247 |         return resDict
248 | 
249 |     @staticmethod
250 |     def __parserHistogram(histograms):
251 |         '''Parse the histogram list, and arrange the results.'''
252 |         resDict = dict()
253 |         if not histograms:
254 |             return resDict
255 |         else:
256 |             for k in histograms[0]._asdict():
257 |                 resDict[k] = []
258 |         for i in histograms:
259 |             for k, v in i._asdict().items():
260 |                 if k == 'histogram_value':
261 |                     v = {
262 |                         'x': np.asarray(v.bucket_limit, dtype=np.float32),
263 |                         'n': np.asarray(v.bucket, dtype=np.float32),
264 |                         'count': v.num
265 |                     }
266 |                 resDict[k].append(v)
267 |         for k, v in resDict.items():
268 |             if k in ('wall_time', 'step'):
269 |                 resDict[k] = np.asarray(v, dtype=np.float32)
270 |         return resDict
271 | 
272 |     @staticmethod
273 |     def __parserDistribution(distributions):
274 |         '''Parse the distribution list, and arrange the results.'''
275 |         resDict = dict()
276 |         if not distributions:
277 |             return resDict
278 |         else:
279 |             for k in distributions[0]._asdict():
280 |                 resDict[k] = []
281 |         for i in distributions:
282 |             for k, v in i._asdict().items():
283 |                 if k == 'compressed_histogram_values':
284 |                     x = []
285 |                     val = []
286 |                     for j in v:
287 |                         x.append(j.basis_point)
288 |                         val.append(j.value)
289 |                     v = np.stack([x, val], axis=0)
290 |                 resDict[k].append(v)
291 |         for k, v in resDict.items():
292 |             resDict[k] = np.asarray(v, dtype=np.float32)
293 |         return resDict
294 | 
295 |     def tohdf5(self, f, mode=None, compressed=True, verbose=1):
296 |         '''Convert all data in a specific mode to HDF5 format.
297 |         Arguments:
298 |             f: a file path (would create a new file).
299 |                 or an h5py file object.
300 |                 or an h5py data group object.
301 |             mode: the selected mode, if left None, would use the
302 |                 default mode.
303 |             compressed: whether to apply the compression.
304 |             verbose: The level for showing messages during the
305 |                 conversion.
306 |         '''
307 |         if mode is None:
308 |             mode = self.__curMode
309 |         if mode not in (event_accumulator.SCALARS, 
310 |             event_accumulator.HISTOGRAMS, 
311 |             event_accumulator.COMPRESSED_HISTOGRAMS):
312 |             raise ValueError('Your current mode is {0}, this type does'
313 |                              'not support HDF5 conversion.'.format(mode))
314 |         holdF = isinstance(f, str)
315 |         if holdF:
316 |             f = os.path.splitext(f)[0] + '.h5'
317 |             f = h5py.File(f, 'w')
318 |         try:
319 |             name = f.filename
320 |         except AttributeError:
321 |             name = f.name
322 |         f.attrs['type'] = mode
323 |         for k, v in self.items():
324 |             g = f.create_group(k)
325 |             self.__recursive_writer(g=g, obj=v, compressed=compressed)
326 |             if verbose > 0:
327 |                 print('Having dumped {0}.'.format(k))
328 |         if holdF:
329 |             f.close()
330 |         if verbose > 0:
331 |             print('Having dumped the data {0} successfully.'.format(name))
332 |         
333 |     @classmethod
334 |     def __recursive_writer(cls, g, obj, compressed=True):
335 |         '''Recursive writer
336 |         Should not be gotten accessed by users'''
337 |         if isinstance(obj, dict):
338 |             for k, v in obj.items():
339 |                 cls.__recursive_writer_work(g, k, v, compressed)
340 |         elif isinstance(obj, (list, tuple)):
341 |             for i, v in enumerate(obj):
342 |                 cls.__recursive_writer_work(g, str(i), v, compressed)
343 |         else:
344 |             raise ValueError('The data part could not get parsed, check {0}'.format(obj))
345 | 
346 |     @classmethod 
347 |     def __recursive_writer_work(cls, g, k, v, compressed=True):
348 |         compression = 'gzip' if compressed else None
349 |         if isinstance(v, (int, float)):
350 |             g.create_dataset(k, data=float(v), dtype=np.float32)
351 |         elif isinstance(v, np.ndarray):
352 |             g.create_dataset(k, data=v, dtype=np.float32, chunks=((v.ndim>1) or compressed), compression=compression, maxshape=(None, *v.shape[1:]))
353 |         elif isinstance(v, (dict, list, tuple)):
354 |             newg = g.create_group(k)
355 |             cls.__recursive_writer(newg, obj=v, compressed=compressed)
356 |         else:
357 |             raise ValueError('The data part could not get parsed, check {0}: {1}'.format(k, v))
358 | 
359 | def launch(log_dir, ip=None, port=None):
360 |     '''Tensorboard web interface launcher (function).
361 |     Functional interface for launching a tensorboard.
362 |     This class is equivalent to call TensorBoardTool.run() in this
363 |     module.
364 |     Arguments:
365 |         log_dir: the path where we store the logs.
366 |         ip [optional]: the IP address for the web interface.
367 |         port [optional]: the port number for the web interface.
368 |     '''
369 |     osKey = 'GCS_READ_CACHE_DISABLED'
370 |     getOS = os.environ.get(osKey, None)
371 |     os.environ[osKey] = '1'
372 |     tb = TensorBoardTool(log_dir, ip=ip, port=port)
373 |     tb.run()
374 |     input('Press Enter to ternimate this program.')
375 |     if getOS is None:
376 |         os.environ.pop(osKey)
377 |     else:
378 |         os.environ[osKey] = getOS
379 | 
380 | if __name__ == '__main__':
381 |     os.chdir(sys.path[0])
382 |     def test_thandle():
383 |         th = TensorLogHandle('../../logs/test', 'scalars')
384 |         #print(th['residual2d_transpose/alpha_0'])
385 |         th.tohdf5('../../getscalar')
386 | 
387 |     #launch('../../logs/test', 'localhost', 8000)
388 |     test_thandle()
389 | 


--------------------------------------------------------------------------------
/optimizers/adaptive.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Optimizers - Extended adaptive learning rate optimizers.
  4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
  5 | # Yuchen Jin @ cainmagi@gmail.com
  6 | # Requirements: (Pay attention to version)
  7 | #   python 3.6+
  8 | #   tensorflow r1.13+
  9 | # This module contains extended optimizers that are based on
 10 | # adaptive learning rate theory. Generally these optimizers
 11 | # could converge more quickly while the solution is easier to
 12 | # be overfitting.
 13 | # Version: 0.10 # 2019/6/27
 14 | # Comments:
 15 | #   Create this submodule, finish MNadam, Adabound and
 16 | #   Nadabound.
 17 | ################################################################
 18 | '''
 19 | 
 20 | from tensorflow.python.framework import ops
 21 | from tensorflow.python.keras import optimizers
 22 | from tensorflow.python.keras import backend as K
 23 | from tensorflow.python.ops import math_ops
 24 | from tensorflow.python.ops import state_ops
 25 | from tensorflow.python.ops import gen_math_ops
 26 | 
 27 | class Nadabound(optimizers.Optimizer):
 28 |     """Nesterov Adabound optimizer
 29 |     The Nesterov version of the Adabound optimizer. This implementation is
 30 |     modified from mdnt.optimizers.Nadam and mdnt.optimizers.Adabound. Compared
 31 |     to Adabound optimizer, it uses estimated Nesterov gradient to update the
 32 |     momentum.
 33 |     Arguments:
 34 |         lr: float >= 0. Learning rate.
 35 |         lr_boost: float >=0. Suggest to > 1, because generally SGD optimizer
 36 |             requires a larger learning rate than Adam.
 37 |         gamma: float > 0. learning rate converging speed control factor.
 38 |         beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1.
 39 |         epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
 40 |         decay: float >= 0. Learning rate decay over each update.
 41 |         amsgrad: boolean. Whether to apply the AMSGrad variant of this
 42 |             algorithm from the paper "On the Convergence of Adam and
 43 |             Beyond".
 44 |         sgdcorr: boolean. Because adam and SGD update momentum by different ways,
 45 |             when setting this flag True, the momentum updating rate would be
 46 |             approaching from 1. - beta_1 to 1. This correction is not applied in
 47 |             the original paper. Users should determine whether to use it carefully.
 48 |     """
 49 | 
 50 |     def __init__(self,
 51 |                  lr=0.002,
 52 |                  lr_boost=10.0,
 53 |                  gamma=1e-3,
 54 |                  beta_1=0.9,
 55 |                  beta_2=0.999,
 56 |                  epsilon=None,
 57 |                  decay=0.,
 58 |                  schedule_decay=0.004,
 59 |                  amsgrad=False,
 60 |                  sgdcorr=True,
 61 |                  **kwargs):
 62 |         super(Nadabound, self).__init__(**kwargs)
 63 |         with K.name_scope(self.__class__.__name__):
 64 |             self.iterations = K.variable(0, dtype='int64', name='iterations')
 65 |             self.m_schedule = K.variable(1., name='m_schedule')
 66 |             self.lr = K.variable(lr, name='lr')
 67 |             self.beta_1 = K.variable(beta_1, name='beta_1')
 68 |             self.beta_2 = K.variable(beta_2, name='beta_2')
 69 |             self.decay = K.variable(decay, name='decay')
 70 |             self.lr_boost = K.variable(lr_boost, name='lr_boost')
 71 |             self.gamma = K.variable(gamma, name='gamma')
 72 |         if epsilon is None:
 73 |             epsilon = K.epsilon()
 74 |         self.epsilon = epsilon
 75 |         self.initial_decay = decay
 76 |         self.schedule_decay = schedule_decay
 77 |         self.amsgrad = amsgrad
 78 |         self.sgdcorr = sgdcorr
 79 | 
 80 |     def get_updates(self, loss, params):
 81 |         grads = self.get_gradients(loss, params)
 82 |         self.updates = [state_ops.assign_add(self.iterations, 1)]
 83 | 
 84 |         lr = self.lr
 85 |         if self.initial_decay > 0:
 86 |             lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) )
 87 | 
 88 |         t = math_ops.cast(self.iterations, K.floatx()) + 1
 89 | 
 90 |         lower_bound = self.lr_boost * (1. - 1. / (self.gamma * t + 1.))
 91 |         upper_bound = self.lr_boost * (1. + 1. / (self.gamma * t))
 92 |         if self.sgdcorr:
 93 |             m_rate = 1. - self.beta_1 / (self.gamma * t + 1.)
 94 |         else:
 95 |             m_rate = 1. - self.beta_1
 96 | 
 97 |         # Due to the recommendations in [2], i.e. warming momentum schedule
 98 |         momentum_cache_t = self.beta_1 * (
 99 |             1. - 0.5 *
100 |             (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
101 |         momentum_cache_t_1 = self.beta_1 * (
102 |             1. - 0.5 *
103 |             (math_ops.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay)))
104 |         m_schedule_new = self.m_schedule * momentum_cache_t
105 |         m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
106 |         self.updates.append((self.m_schedule, m_schedule_new))
107 | 
108 |         ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
109 |         vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
110 |         if self.amsgrad:
111 |             vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
112 |         else:
113 |             vhats = [K.zeros(1) for _ in params]
114 | 
115 |         self.weights = [self.iterations] + ms + vs + vhats
116 | 
117 |         for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
118 |             # the following equations given in [1]
119 |             g_prime = g / (1. - m_schedule_new)
120 |             m_t = self.beta_1 * m + m_rate * g
121 |             m_t_prime = m_t / (1. - m_schedule_next)
122 |             v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g)
123 |             if self.amsgrad:
124 |                 vhat_t = math_ops.maximum(vhat, v_t)
125 |                 self.updates.append(state_ops.assign(vhat, vhat_t))
126 |                 v_t_prime = vhat_t / (1. - math_ops.pow(self.beta_2, t))
127 |             else:
128 |                 v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t))
129 |             m_t_bar = (m_rate / (1.-self.beta_1)) * (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime
130 |             beta_1_reduce = 1. - math_ops.pow(self.beta_1, t)
131 |             lr_v = gen_math_ops.reciprocal((gen_math_ops.sqrt(v_t_prime) + self.epsilon) * beta_1_reduce)
132 | 
133 |             self.updates.append(state_ops.assign(m, m_t))
134 |             self.updates.append(state_ops.assign(v, v_t))
135 | 
136 |             lr_bound = gen_math_ops.minimum(gen_math_ops.maximum(lr_v, lower_bound), upper_bound)
137 |             p_t = p - lr * lr_bound * beta_1_reduce * m_t_bar
138 |             
139 |             new_p = p_t
140 | 
141 |             # Apply constraints.
142 |             if getattr(p, 'constraint', None) is not None:
143 |                 new_p = p.constraint(new_p)
144 | 
145 |             self.updates.append(state_ops.assign(p, new_p))
146 |         return self.updates
147 | 
148 |     def get_config(self):
149 |         config = {
150 |             'lr': float(K.get_value(self.lr)),
151 |             'lr_boost': float(K.get_value(self.lr_boost)),
152 |             'gamma': float(K.get_value(self.gamma)),
153 |             'beta_1': float(K.get_value(self.beta_1)),
154 |             'beta_2': float(K.get_value(self.beta_2)),
155 |             'epsilon': self.epsilon,
156 |             'decay': float(K.get_value(self.decay)),
157 |             'schedule_decay': self.schedule_decay,
158 |             'amsgrad': self.amsgrad,
159 |             'sgdcorr': self.sgdcorr
160 |         }
161 |         base_config = super(Nadabound, self).get_config()
162 |         return dict(list(base_config.items()) + list(config.items()))
163 | 
164 | class Adabound(optimizers.Optimizer):
165 |     """Adabound optimizer.
166 |     This optimizer would get initialized by an initial learning rate, a final
167 |     learning rate and a converging speed control parameter, i.e. gamma which
168 |     would define the upper bound and the lower bound of the adaptive learning
169 |     rate. At the beginning, the optimizer behaves like Adam, but when its
170 |     learning rate get converged, it would behaves like SGD+Momentum.
171 |     The converging speed control function is defined as:
172 |         lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.))
173 |         upper_bound = final_lr * (1. + 1. / (self.gamma * t))
174 |     The lower_bound would be (0.95 * final_lr) after (gamma * t = 19).
175 |     The upper_bound would be (0.95 * final_lr) after (gamma * t = 20).
176 |     Users need to specify proper gamma to make sure that the algorithm would not
177 |     get converged too quickly.
178 |     Arguments:
179 |         lr: float >= 0. Learning rate.
180 |         lr_boost: float >=0. Final learning rate (for SGD) is defined as:
181 |             final_lr = lr * lr_boost.
182 |         gamma: float > 0. learning rate converging speed control factor.
183 |         beta_1: float, 0 < beta < 1. Generally close to 1.
184 |         beta_2: float, 0 < beta < 1. Generally close to 1.
185 |         epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
186 |         decay: float >= 0. Learning rate decay over each update.
187 |         amsgrad: boolean. Whether to apply the AMSGrad variant of this
188 |             algorithm from the paper "On the Convergence of Adam and
189 |             Beyond".
190 |         sgdcorr: boolean. Because adam and SGD update momentum by different ways,
191 |             when setting this flag True, the momentum updating rate would be
192 |             approaching from 1. - beta_1 to 1. This correction is not applied in
193 |             the original paper. Users should determine whether to use it carefully.
194 |     """
195 | 
196 |     def __init__(self,
197 |                  lr=0.001,
198 |                  lr_boost=10.0,
199 |                  gamma=1e-3,
200 |                  beta_1=0.9,
201 |                  beta_2=0.999,
202 |                  epsilon=None,
203 |                  decay=0.,
204 |                  amsgrad=False,
205 |                  sgdcorr=True,
206 |                  **kwargs):
207 |         super(Adabound, self).__init__(**kwargs)
208 |         with K.name_scope(self.__class__.__name__):
209 |             self.iterations = K.variable(0, dtype='int64', name='iterations')
210 |             self.lr = K.variable(lr, name='lr')
211 |             self.beta_1 = K.variable(beta_1, name='beta_1')
212 |             self.beta_2 = K.variable(beta_2, name='beta_2')
213 |             self.decay = K.variable(decay, name='decay')
214 |             self.lr_boost = K.variable(lr_boost, name='lr_boost')
215 |             self.gamma = K.variable(gamma, name='gamma')
216 |         if epsilon is None:
217 |             epsilon = K.epsilon()
218 |         self.epsilon = epsilon
219 |         self.initial_decay = decay
220 |         self.amsgrad = amsgrad
221 |         self.sgdcorr = sgdcorr
222 | 
223 |     def get_updates(self, loss, params):
224 |         grads = self.get_gradients(loss, params)
225 |         self.updates = []
226 | 
227 |         lr = self.lr
228 |         if self.initial_decay > 0:
229 |             lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) )
230 | 
231 |         with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]):
232 |             t = math_ops.cast(self.iterations, K.floatx())
233 |         lr_t = gen_math_ops.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t))
234 | 
235 |         lower_bound = self.lr_boost * (1. - 1. / (self.gamma * t + 1.))
236 |         upper_bound = self.lr_boost * (1. + 1. / (self.gamma * t))
237 |         if self.sgdcorr:
238 |             m_rate = 1. - self.beta_1 / (self.gamma * t + 1.)
239 |         else:
240 |             m_rate = 1. - self.beta_1
241 | 
242 |         ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
243 |         vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
244 |         if self.amsgrad:
245 |             vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
246 |         else:
247 |             vhats = [K.zeros(1) for _ in params]
248 |         self.weights = [self.iterations] + ms + vs + vhats
249 | 
250 |         for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
251 |             m_t = (self.beta_1 * m) + m_rate * g
252 |             v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g)
253 |             if self.amsgrad:
254 |                 vhat_t = math_ops.maximum(vhat, v_t)
255 |                 lr_v = gen_math_ops.reciprocal(gen_math_ops.sqrt(vhat_t) + self.epsilon)
256 |                 self.updates.append(state_ops.assign(vhat, vhat_t))
257 |             else:
258 |                 lr_v = gen_math_ops.reciprocal(gen_math_ops.sqrt(v_t) + self.epsilon)
259 | 
260 |             lr_bound = gen_math_ops.minimum(gen_math_ops.maximum(lr_t * lr_v, lower_bound), upper_bound)
261 |             p_t = p - lr * lr_bound * m_t
262 | 
263 |             self.updates.append(state_ops.assign(m, m_t))
264 |             self.updates.append(state_ops.assign(v, v_t))
265 |             
266 |             new_p = p_t
267 | 
268 |             # Apply constraints.
269 |             if getattr(p, 'constraint', None) is not None:
270 |                 new_p = p.constraint(new_p)
271 | 
272 |             self.updates.append(state_ops.assign(p, new_p))
273 |         return self.updates
274 | 
275 |     def get_config(self):
276 |         config = {
277 |             'lr': float(K.get_value(self.lr)),
278 |             'lr_boost': float(K.get_value(self.lr_boost)),
279 |             'gamma': float(K.get_value(self.gamma)),
280 |             'beta_1': float(K.get_value(self.beta_1)),
281 |             'beta_2': float(K.get_value(self.beta_2)),
282 |             'decay': float(K.get_value(self.decay)),
283 |             'epsilon': self.epsilon,
284 |             'amsgrad': self.amsgrad,
285 |             'sgdcorr': self.sgdcorr
286 |         }
287 |         base_config = super(Adabound, self).get_config()
288 |         return dict(list(base_config.items()) + list(config.items()))
289 | 
290 | class MNadam(optimizers.Optimizer):
291 |     """Nesterov Adam optimizer (MDNT version)
292 |     We use MNadam here to avoid the name conflict on tf.keras.optimizers.Nadam.
293 |     Much like Adam is essentially RMSprop with momentum, Nadam is Adam RMSprop with
294 |     Nesterov momentum. Default parameters follow those provided in the paper.
295 |     It is recommended to leave the parameters of this optimizer at their default 
296 |     values.
297 |     This optimizer is modifed based on tf.keras.optimizers.Nadam. Compared to
298 |     original implementation, this version supports two more things:
299 |         1. Decay rate for the initial learning rate.
300 |         2. Amsgrad option.
301 |     Arguments:
302 |         lr: float >= 0. Learning rate.
303 |         beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1.
304 |         epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
305 |         decay: float >= 0. Learning rate decay over each update.
306 |         amsgrad: boolean. Whether to apply the AMSGrad variant of this
307 |             algorithm from the paper "On the Convergence of Adam and Beyond".
308 |     """
309 | 
310 |     def __init__(self,
311 |                  lr=0.002,
312 |                  beta_1=0.9,
313 |                  beta_2=0.999,
314 |                  epsilon=None,
315 |                  decay=0.,
316 |                  schedule_decay=0.004,
317 |                  amsgrad=False,
318 |                  **kwargs):
319 |         super(MNadam, self).__init__(**kwargs)
320 |         with K.name_scope(self.__class__.__name__):
321 |             self.iterations = K.variable(0, dtype='int64', name='iterations')
322 |             self.m_schedule = K.variable(1., name='m_schedule')
323 |             self.lr = K.variable(lr, name='lr')
324 |             self.beta_1 = K.variable(beta_1, name='beta_1')
325 |             self.beta_2 = K.variable(beta_2, name='beta_2')
326 |             self.decay = K.variable(decay, name='decay')
327 |         if epsilon is None:
328 |             epsilon = K.epsilon()
329 |         self.epsilon = epsilon
330 |         self.initial_decay = decay
331 |         self.schedule_decay = schedule_decay
332 |         self.amsgrad = amsgrad
333 | 
334 |     def get_updates(self, loss, params):
335 |         grads = self.get_gradients(loss, params)
336 |         self.updates = [state_ops.assign_add(self.iterations, 1)]
337 | 
338 |         lr = self.lr
339 |         if self.initial_decay > 0:
340 |             lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) )
341 | 
342 |         t = math_ops.cast(self.iterations, K.floatx()) + 1
343 | 
344 |         # Due to the recommendations in [2], i.e. warming momentum schedule
345 |         momentum_cache_t = self.beta_1 * (
346 |             1. - 0.5 *
347 |             (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
348 |         momentum_cache_t_1 = self.beta_1 * (
349 |             1. - 0.5 *
350 |             (math_ops.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay)))
351 |         m_schedule_new = self.m_schedule * momentum_cache_t
352 |         m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
353 |         self.updates.append((self.m_schedule, m_schedule_new))
354 | 
355 |         ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
356 |         vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
357 |         if self.amsgrad:
358 |             vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
359 |         else:
360 |             vhats = [K.zeros(1) for _ in params]
361 | 
362 |         self.weights = [self.iterations] + ms + vs + vhats
363 | 
364 |         for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
365 |             # the following equations given in [1]
366 |             g_prime = g / (1. - m_schedule_new)
367 |             m_t = self.beta_1 * m + (1. - self.beta_1) * g
368 |             m_t_prime = m_t / (1. - m_schedule_next)
369 |             v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g)
370 |             if self.amsgrad:
371 |                 vhat_t = math_ops.maximum(vhat, v_t)
372 |                 self.updates.append(state_ops.assign(vhat, vhat_t))
373 |                 v_t_prime = vhat_t / (1. - math_ops.pow(self.beta_2, t))
374 |             else:
375 |                 v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t))
376 |             m_t_bar = (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime
377 | 
378 |             self.updates.append(state_ops.assign(m, m_t))
379 |             self.updates.append(state_ops.assign(v, v_t))
380 | 
381 |             p_t = p - lr * m_t_bar / (gen_math_ops.sqrt(v_t_prime) + self.epsilon)
382 |             
383 |             new_p = p_t
384 | 
385 |             # Apply constraints.
386 |             if getattr(p, 'constraint', None) is not None:
387 |                 new_p = p.constraint(new_p)
388 | 
389 |             self.updates.append(state_ops.assign(p, new_p))
390 |         return self.updates
391 | 
392 |     def get_config(self):
393 |         config = {
394 |             'lr': float(K.get_value(self.lr)),
395 |             'beta_1': float(K.get_value(self.beta_1)),
396 |             'beta_2': float(K.get_value(self.beta_2)),
397 |             'epsilon': self.epsilon,
398 |             'decay': float(K.get_value(self.decay)),
399 |             'schedule_decay': self.schedule_decay,
400 |             'amsgrad': self.amsgrad
401 |         }
402 |         base_config = super(MNadam, self).get_config()
403 |         return dict(list(base_config.items()) + list(config.items()))


--------------------------------------------------------------------------------
/utilities/callbacks.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Utilities - Callbacks
  4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
  5 | # Yuchen Jin @ cainmagi@gmail.com
  6 | # Requirements: (Pay attention to version)
  7 | #   python 3.6+
  8 | #   tensorflow r1.13+
  9 | # Extend loss functions. These functions could serve as both
 10 | # losses and metrics.
 11 | # Version: 0.23 # 2019/10/27
 12 | # Comments:
 13 | #   Enable ModelCheckpoint to use compression to save models.
 14 | # Version: 0.22 # 2019/10/23
 15 | # Comments:
 16 | #   Enable ModelCheckpoint to not save optimizer.
 17 | # Version: 0.20 # 2019/10/15
 18 | # Comments:
 19 | #   Finish LossWeightsScheduler.
 20 | # Version: 0.18 # 2019/6/24
 21 | # Comments:
 22 | # 1. Finish ModelWeightsReducer.
 23 | # 2. Fix bugs for ModelWeightsReducer.
 24 | # 3. Find a better way for implementing the soft thresholding
 25 | #    for ModelWeightsReducer.
 26 | # Version: 0.16 # 2019/6/23
 27 | # Comments:
 28 | #   Add OptimizerSwitcher and fix a bug.
 29 | # Version: 0.10 # 2019/6/13
 30 | # Comments:
 31 | #   Create this submodule, and finish ModelCheckpoint.
 32 | ################################################################
 33 | '''
 34 | 
 35 | from datetime import datetime
 36 | import os
 37 | import numpy as np
 38 | from tensorflow.python.ops import variables
 39 | from tensorflow.python.keras import callbacks
 40 | from tensorflow.python.keras import backend as K
 41 | from tensorflow.python.platform import tf_logging as logging
 42 | from tensorflow.python.ops import array_ops
 43 | from tensorflow.python.ops import math_ops
 44 | from tensorflow.python.ops import random_ops
 45 | from tensorflow.python.ops import state_ops
 46 | from tensorflow.python.ops import gen_math_ops
 47 | 
 48 | from . import _default
 49 | 
 50 | class LossWeightsScheduler(callbacks.Callback):
 51 |     """Learning rate scheduler.
 52 |     Arguments:
 53 |         schedule: a function that takes an epoch index as input
 54 |             (integer, indexed from 0) and returns a new
 55 |             loss weights as output.
 56 |         verbose: int. 0: quiet, 1: update messages.
 57 |     Here we show two examples:
 58 |     ```python
 59 |     # This function is designed for a two-phase training. In the
 60 |     # first phase, the learning rate is (0.8, 0.2);
 61 |     # In the second phase, the learning rate is 
 62 |     # (0.2, 0.8);
 63 |     def scheduler(epoch):
 64 |         if epoch < 10:
 65 |             return [0.8, 0.2]
 66 |         else:
 67 |             return [0.2, 0.8]
 68 |     model.compile(..., loss_weights=[K.variable(0.5),
 69 |                   K.variable(0.5)])
 70 |     callback = mdnt.utilities.callbacks.LossWeightsScheduler(scheduler)
 71 |     model.fit(data, labels, epochs=100, callbacks=[callback],
 72 |               validation_data=(val_data, val_labels))
 73 |     ```
 74 |     ```python
 75 |     # This function is designed for a two-phase training. In the
 76 |     # first phase, the learning rate is (alpha=0.8, beta=0.2);
 77 |     # In the second phase, the learning rate is 
 78 |     # (alpha=0.2, beta=0.8);
 79 |     def scheduler(epoch):
 80 |         if epoch < 10:
 81 |             return {'alpha':0.8, 'beta':0.2}
 82 |         else:
 83 |             return {'alpha':0.2, 'beta':0.8}
 84 |     model.compile(..., loss_weights={'alpha':K.variable(0.5), 
 85 |                   'beta':K.variable(0.5)})
 86 |     callback = mdnt.utilities.callbacks.LossWeightsScheduler(scheduler)
 87 |     model.fit(data, labels, epochs=100, callbacks=[callback],
 88 |               validation_data=(val_data, val_labels))
 89 |     ```
 90 |     """
 91 | 
 92 |     def __init__(self, schedule, verbose=0):
 93 |         super(LossWeightsScheduler, self).__init__()
 94 |         self.schedule = schedule
 95 |         self.verbose = verbose
 96 | 
 97 |     def on_epoch_begin(self, epoch, logs=None):
 98 |         if not hasattr(self.model, 'loss_weights'):
 99 |             raise ValueError('Model must have a "loss_weights" attribute.')
100 |         lw = self.model.loss_weights
101 |         if lw is None:
102 |             raise ValueError('model.loss_weights needs to be set.')
103 |         lw_val = self.schedule(epoch) # Get losses
104 |         if isinstance(lw, dict):
105 |             if not isinstance(lw_val, dict):
106 |                 raise ValueError('model.loss_weights is a dict, you need to '
107 |                                  'provides a corresponding dict for updating it.')
108 |             for k, v in lw:
109 |                 if isinstance(v, variables.Variable):
110 |                     K.set_value(v, lw_val[k])
111 |         elif isinstance(lw, (list, tuple)):
112 |             if not isinstance(lw_val, (list, tuple, np.ndarray)):
113 |                 raise ValueError('model.loss_weights is a sequence, you need to '
114 |                                  'provides a corresponding sequence for updating it.')
115 |             s = 0
116 |             for v in lw:
117 |                 if isinstance(v, variables.Variable):
118 |                     K.set_value(v, lw_val[s])
119 |                     s += 1
120 |         else:
121 |             raise ValueError('model.loss_weights could not be updated, please check'
122 |                              'your definition.')
123 |         if self.verbose > 0:
124 |             print('\nEpoch %05d: LossWeightsScheduler set var.lw to %s.' % (epoch + 1, lw_val))
125 | 
126 |     def on_epoch_end(self, epoch, logs=None):
127 |         logs = logs or {}
128 |         lw = self.model.loss_weights
129 |         lw_var = None
130 |         if isinstance(lw, dict):
131 |             lw_var = {}
132 |             for k, v in lw:
133 |                 if isinstance(v, variables.Variable):
134 |                     lw_var[k] = K.get_value(v)
135 |                 else:
136 |                     lw_var[k] = v
137 |         elif isinstance(lw, (list, tuple)):
138 |             lw_var = []
139 |             for v in lw:
140 |                 if isinstance(v, variables.Variable):
141 |                     lw_var.append(K.get_value(v))
142 |                 else:
143 |                     lw_var.append(v)
144 |         logs['loss_weights'] = lw_var
145 | 
146 | class ModelWeightsReducer(callbacks.Callback):
147 |     """Model weights reducer
148 |     Insert a weight decay operation before each iteration during the training.
149 |     When it is applied to pure SGD, this callback is equivalent to adding
150 |     L1/L2 regularization to each kernel.
151 |     However, the optimizer with momentum or adaptive learning rate would make
152 |     the regularization terms not equivalent to weight decay. As an alternative,
153 |     Tensorflow provides AdamW (weight decayed Adam) in contribution module.
154 |     This callback serves as an alternative for using weight decayed optimizers.
155 |     For example, using ModelWeightsReducer(mu=0.1) + Adam is equivalent to
156 |     using AdamW(weight_decay=0.1).
157 |     This callback provides both soft threshold method and weight decay method,
158 |     which are used for maintained the sparsity and small module length respec-
159 |     tively. Compared to adding regularization terms, this callback does not
160 |     get influenced by a specific optimizing algorithm.
161 |     Arguments:
162 |         lam: proximal coefficient. It is used to apply soft thresholding and
163 |             maintain the sparsity of all kernels.
164 |             It only take effects when > 0.0.
165 |         mu: Tikhonov coefficient. It is used to apply the weight decay method
166 |             and maintain the reduced length of the weight module.
167 |             It only take effects when > 0.0.
168 |     """
169 |     def __init__(self, lam=0.0, mu=0.0, epsilon=1e-5):
170 |         with K.name_scope(self.__class__.__name__):
171 |             self.get_lambda = K.variable(lam, name='lambda')
172 |             self.get_mu = K.variable(mu, name='mu')
173 |         self.bool_l1 = lam > 0.0
174 |         self.bool_l2 = mu > 0.0
175 |         self.session = None
176 |         if not (self.bool_l1 or self.bool_l2):
177 |             raise ValueError('Need to specify either one of "lam" and "mu".')
178 |     
179 |     def on_train_begin(self, logs=None):
180 |         # First collect all trainable weights
181 |         self.model._check_trainable_weights_consistency()
182 |         get_w_list = self.model.trainable_weights
183 |         get_w_dec_list = []
184 |         # Filter all weights and select those named 'kernel'
185 |         for w in get_w_list:
186 |             getname = w.name
187 |             pos = getname.rfind('/')
188 |             if pos != -1:
189 |                 checked = 'kernel' in getname[pos+1:]
190 |             else:
191 |                 checked = 'kernel' in getname
192 |             if checked:
193 |                 get_w_dec_list.append(w)
194 |         if not get_w_dec_list:
195 |             raise ValueError('The trainable weights of the model do not include any kernel.')
196 |         # Define the update ops
197 |         getlr = self.model.optimizer.lr
198 |         with K.name_scope(self.__class__.__name__):
199 |             self.w_updates = []
200 |             self.w_updates_aft = []
201 |             for w in get_w_dec_list:
202 |                 w_l = w
203 |                 if self.bool_l2:
204 |                     w_l = (1 - getlr * self.get_mu) * w_l
205 |                 if self.bool_l1:
206 |                     w_abs = math_ops.abs(w_l) + self.get_lambda
207 |                     w_l = ( gen_math_ops.sign(w_l) + gen_math_ops.sign(random_ops.random_uniform(w_l.get_shape(), minval=-1.0, maxval=1.0)) * math_ops.cast(gen_math_ops.equal(w_l, 0), dtype=w_l.dtype) ) * w_abs
208 |                     w_abs_x = math_ops.abs(w) - self.get_lambda
209 |                     w_x = gen_math_ops.sign(w) * math_ops.cast(gen_math_ops.greater(w_abs_x, 0), dtype=w.dtype) * w_abs_x
210 |                     self.w_updates_aft.append(state_ops.assign(w, w_x))
211 |                 self.w_updates.append(state_ops.assign(w, w_l))
212 |         # Get and store the session
213 |         self.session = K.get_session()
214 | 
215 |     def on_train_end(self, logs=None):
216 |         self.session = None
217 | 
218 |     def on_train_batch_begin(self, batch, logs=None):
219 |         # Define the updating function
220 |         self.session.run(fetches=self.w_updates)
221 | 
222 |     def on_train_batch_end(self, batch, logs=None):
223 |         if self.bool_l1:
224 |             self.session.run(fetches=self.w_updates_aft)
225 | 
226 | class OptimizerSwitcher(callbacks.Callback):
227 |     """Optimizer switcher
228 |     Need to use with MDNT optimizers that support mannual phase-switching
229 |     method `optimizer.switch()`. 
230 |     Now such optimizers include:
231 |         mdnt.optimizers.Adam2SGD
232 |         mdnt.optimizers.NAdam2NSGD
233 |     Arguments:
234 |         switch_epochs: an int or an int list which determines when to switch
235 |             the optimizer phase. The switch would happens on the end of 
236 |             assigned epochs. Should start with 1 (the first epoch).
237 |         verbose: int. 0: quiet, 1: update messages.
238 |     """
239 | 
240 |     def __init__(self, switch_epochs, verbose=0):
241 |         super(OptimizerSwitcher, self).__init__()
242 |         if isinstance(switch_epochs, (list, tuple)):
243 |             if all(type(i)==int for i in switch_epochs):
244 |                 self.switch_epochs = list(switch_epochs)
245 |             else:
246 |                 raise ValueError('The input list switch_epochs should only contains int elements.')
247 |         else:
248 |             if type(switch_epochs) != int:
249 |                 raise ValueError('The input scalar switch_epochs should be an int element.')
250 |             self.switch_epochs = [switch_epochs]
251 |         self.switch_epochs.sort(reverse=True)
252 |         self.verbose = verbose
253 | 
254 |     def on_train_begin(self, logs=None):
255 |         if not callable(getattr(self.model.optimizer, 'switch')):
256 |             raise ValueError('Optimizer must have a "switch" method to support manually switching the training phase.')
257 |         popflag = False
258 |         while self.switch_epochs and self.switch_epochs[-1] < 1:
259 |             self.switch_epochs.pop()
260 |             popflag = True
261 |         if popflag and self.verbose > 0:
262 |             print('The input switch_epochs is revised as {0}.'.format(self.switch_epochs))
263 | 
264 |     def on_epoch_end(self, epoch, logs=None, mode='train'):
265 |         if mode == 'train' and self.switch_epochs:
266 |             if self.switch_epochs[-1] == (epoch + 1):
267 |                 self.model.optimizer.switch(None)
268 |                 if self.verbose > 0:
269 |                     print('\nEpoch {0:05d}: Optimizer switcher switches the optimizer phase'.format(epoch + 1))
270 |                 self.switch_epochs.pop()
271 | 
272 | class ModelCheckpoint(callbacks.Callback):
273 |     """Save the model after every epoch. (Revised)
274 |     Revised Model checkpointer. Compared to original version, it supports
275 |     such new features:
276 |         1. When `save_weights_only` is set `False`, it uses the MDNT version
277 |            of model saver and avoid the heading excessing problem of saving
278 |            HDF5 file.
279 |         2. The model configurations and the network weights are splitted.
280 |            It will be easier for user to see the configuration through the
281 |            saved JSON file.
282 |         3. When setting `keep_max`, only recent weights would be retained.
283 |     Now `filepath` should not contain named formatting options, because
284 |     the format options are moved into `record_format`. The final output
285 |     configuration file name should be:
286 |         `filapath + '.json'`
287 |     while the weights file name should be:
288 |         `filepath + '-' + record_format.format(...) + '.h5'`
289 |     For example, if `filepath` is `'model'` while `record_format` is
290 |     `'e{epoch:02d}_v{val_loss:.2f}'`, the latter part will be filled the 
291 |     value of `epoch` and keys in `logs` (passed in `on_epoch_end`). The 
292 |     output may be like:
293 |         `'model.json'` and `'model-e05_v0.33.h5'`.
294 |     Then the model checkpoints will be saved with the epoch number and
295 |     the validation loss in the filename.
296 |     Arguments:
297 |         filepath: string, path to save the model file.
298 |         record_format: the format of the using records. If set None, it
299 |             would be set as a time stamp.
300 |         monitor: quantity to monitor.
301 |         verbose: verbosity mode, 0 or 1.
302 |         keep_max: the maximum of kept weight file during the training
303 |             phase. If set None, all files would be kept. This option
304 |             requires users to have the authority to delete files in the 
305 |             saved path.
306 |         save_optimizer: If `save_optimizer=True`, the optimizer configu-
307 |             rations would be dumped as a json file.
308 |         save_best_only: if `save_best_only=True`,
309 |             the latest best model according to
310 |             the quantity monitored will not be overwritten.
311 |         compress: whether to apply the compression for saving models.
312 |             this option is only avaliable when save_weights_only=False.
313 |         mode: one of {auto, min, max}.
314 |             If `save_best_only=True`, the decision
315 |             to overwrite the current save file is made
316 |             based on either the maximization or the
317 |             minimization of the monitored quantity. For `val_acc`,
318 |             this should be `max`, for `val_loss` this should
319 |             be `min`, etc. In `auto` mode, the direction is
320 |             automatically inferred from the name of the monitored quantity.
321 |         save_weights_only: if True, then only the model's weights will be
322 |             saved (`model.save_weights(filepath)`), else the full model
323 |             is saved (`model.save(filepath)`).
324 |         period: Interval (number of epochs) between checkpoints.
325 |     """
326 | 
327 |     def __init__(self,
328 |                  filepath,
329 |                  record_format=None,
330 |                  monitor='val_loss',
331 |                  verbose=0,
332 |                  keep_max=None,
333 |                  save_optimizer=True,
334 |                  save_best_only=False,
335 |                  save_weights_only=False,
336 |                  compress=True,
337 |                  mode='auto',
338 |                  period=1):
339 |         super(ModelCheckpoint, self).__init__()
340 |         self.monitor = monitor
341 |         self.verbose = verbose
342 |         self.filepath = filepath
343 |         self.record_format = record_format
344 |         if set('{}%').issubset(set(self.filepath)):
345 |             raise TypeError('filepath should not contains formats anymore. Use `record_format` to define that part.')
346 |         self.keep_max = keep_max
347 |         if keep_max is not None:
348 |             self.__keep_list = []
349 |             self.__current_num = 0
350 |         else:
351 |             self.__keep_list = None
352 |             self.__current_num = None
353 |         self.save_optimizer = save_optimizer
354 |         self.save_best_only = save_best_only
355 |         self.save_weights_only = save_weights_only
356 |         self.compress = compress
357 |         self.period = period
358 |         self.epochs_since_last_save = 0
359 | 
360 |         if mode not in ['auto', 'min', 'max']:
361 |             logging.warning('ModelCheckpoint mode %s is unknown, '
362 |                             'fallback to auto mode.', mode)
363 |             mode = 'auto'
364 | 
365 |         if mode == 'min':
366 |             self.monitor_op = np.less
367 |             self.best = np.Inf
368 |         elif mode == 'max':
369 |             self.monitor_op = np.greater
370 |             self.best = -np.Inf
371 |         else:
372 |             if 'acc' in self.monitor or self.monitor.startswith('fmeasure'):
373 |                 self.monitor_op = np.greater
374 |                 self.best = -np.Inf
375 |             else:
376 |                 self.monitor_op = np.less
377 |                 self.best = np.Inf
378 |     
379 |     def __keep_max_function(self, new_file_names):
380 |         if self.keep_max is None:
381 |             return
382 |         if self.__current_num < self.keep_max:
383 |             self.__current_num += 1
384 |             self.__keep_list.append(new_file_names)
385 |         else:
386 |             old_file_names = self.__keep_list.pop(0)
387 |             for old_file_name in old_file_names:
388 |                 if os.path.exists(old_file_name):
389 |                     os.remove(old_file_name)
390 |             self.__keep_list.append(new_file_names)
391 | 
392 |     def on_epoch_end(self, epoch, logs=None):
393 |         logs = logs or {}
394 |         self.epochs_since_last_save += 1
395 |         if self.epochs_since_last_save >= self.period:
396 |             self.epochs_since_last_save = 0
397 |             configpath = self.filepath + '.json'
398 |             if self.record_format:
399 |                 weightpath = self.filepath + '-' + self.record_format
400 |                 weightpath = weightpath.format(epoch=epoch + 1, **logs)
401 |             else:
402 |                 weightpath = self.filepath + datetime.timestamp(datetime.now())
403 |             optmpath = weightpath + '.json'
404 |             weightpath = weightpath + '.h5'
405 |             if self.save_best_only:
406 |                 current = logs.get(self.monitor)
407 |                 if current is None:
408 |                     logging.warning('Can save best model only with %s available, '
409 |                                                     'skipping.', self.monitor)
410 |                 else:
411 |                     if self.monitor_op(current, self.best):
412 |                         if self.verbose > 0:
413 |                             print('\nEpoch %05d: %s improved from %0.5f to %0.5f,'
414 |                                   ' saving model to %s' % (epoch + 1, self.monitor, self.best,
415 |                                   current, weightpath))
416 |                         self.best = current
417 |                         if self.save_weights_only:
418 |                             self.model.save_weights(weightpath, overwrite=True)
419 |                         else:
420 |                             self.__keep_max_function((weightpath, optmpath))
421 |                             _default.save_model(self.model, weightpath, configpath, optmpath, overwrite=True, include_optimizer=self.save_optimizer, compress=self.compress)
422 |                             #self.model.save(filepath, overwrite=True)
423 |                     else:
424 |                         if self.verbose > 0:
425 |                             print('\nEpoch %05d: %s did not improve from %0.5f' %
426 |                                   (epoch + 1, self.monitor, self.best))
427 |             else:
428 |                 if self.verbose > 0:
429 |                     print('\nEpoch %05d: saving model to %s' % (epoch + 1, weightpath))
430 |                 if self.save_weights_only:
431 |                     self.model.save_weights(weightpath, overwrite=True)
432 |                 else:
433 |                     self.__keep_max_function((weightpath, optmpath))
434 |                     _default.save_model(self.model, weightpath, configpath, optmpath, overwrite=True, include_optimizer=self.save_optimizer, compress=self.compress)
435 |                     #self.model.save(filepath, overwrite=True)


--------------------------------------------------------------------------------
/optimizers/mixture.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | ################################################################
  3 | # Optimizers - Phase-mixed optimizers.
  4 | # @ Modern Deep Network Toolkits for Tensorflow-Keras
  5 | # Yuchen Jin @ cainmagi@gmail.com
  6 | # Requirements: (Pay attention to version)
  7 | #   python 3.6+
  8 | #   tensorflow r1.13+
  9 | # This module contains the optimizers that has multiple phases.
 10 | # In different phases, those optimizers would adopt different
 11 | # algorithms. A typical example is the SWATS optimizer.
 12 | # Version: 0.21 # 2019/6/27
 13 | # Comments:
 14 | #    Slightly change the implementation.
 15 | # Version: 0.20 # 2019/6/26
 16 | # Comments:
 17 | #    Finish the demo version for SWATS.
 18 | # Version: 0.17 # 2019/6/23
 19 | # Comments:
 20 | #    Improve the efficiency of Adam2SGD and Nadam2NSGD.
 21 | # Version: 0.15 # 2019/6/23
 22 | # Comments:
 23 | # 1. Fix the bugs in manually switched optimizers. Now it
 24 | #    requires users to call switch() to change the phase or
 25 | #    using mdnt.utilities.callbacks.OptimizerSwitcher.
 26 | # 2. Revise the manually switched optimizers to ensure that
 27 | #    they use equivalent algorithm during the SGD phases.
 28 | # Version: 0.10 # 2019/6/21
 29 | # Comments:
 30 | #   Create this submodule, finish Adam2SGD and Nadam2NSGD.
 31 | ################################################################
 32 | '''
 33 | 
 34 | from tensorflow.python.framework import ops
 35 | from tensorflow.python.keras import optimizers
 36 | from tensorflow.python.keras import backend as K
 37 | from tensorflow.python.ops import math_ops
 38 | from tensorflow.python.ops import state_ops
 39 | from tensorflow.python.ops import control_flow_ops
 40 | from tensorflow.python.ops import gen_math_ops
 41 | 
 42 | def m_switch(pred, tensor_a, tensor_b):
 43 |     '''
 44 |     Use cleaner API to replace m_switch to accelerate computation.
 45 |     '''
 46 |     def f_true(): return tensor_a
 47 |     def f_false(): return tensor_b
 48 |     return control_flow_ops.cond(pred, f_true, f_false, strict=True)
 49 | 
 50 | class SWATS(optimizers.Optimizer):
 51 |     """Switches from Adam to SGD (SWATS)
 52 |     From Adam optimizer to SGD optimizer automatically.
 53 |     This method provides an automatic scheme for switching from Adam/Amsgrad to
 54 |     SGD optimizer. Different from manually switched optimizer. The switching point
 55 |     where Adam switches to SGD is determined by the algorithm, so users do not
 56 |     need to call the switch notifier or use manually switching callback.
 57 |     The key technique for this method is checking the convergence of the relation-
 58 |     ship between the adaptive momentum and the SGD gradient. If the proportion of
 59 |     the gradient projected in the direction of the momentum becomes converged, the
 60 |     algorithm would switch to SGD silently.
 61 |     One difference between SWATS and manually switched optimizers is that SWATS
 62 |     determine the algorithm phases for each optimized parameter independently, i.e.
 63 |     different parameters may be optimized in different phases.
 64 |     Another difference is that the SWATS would determine the learning rate of SGD
 65 |     automatically to ensure that the learning rate would not change drastically
 66 |     after the switching point.
 67 |     This implementation has such modifications compared to the original work:
 68 |         1. The manually set learning rate is separated from lambda to make sure
 69 |            that users could modify learning rate after switching to SGD.
 70 |         2. The finally converged lambda is restricted to > 0, otherwise it will
 71 |            not switch from Adam to SGD.
 72 |         3. It supports Amsgrad -> SGD.
 73 |     Due to the limitation of the tensorflow, this implementation is not highly
 74 |     efficient. Users should estimate whether it is necessary to use this optimi-
 75 |     zer.
 76 |     Arguments:
 77 |         lr: float >= 0. Learning rate.
 78 |         beta_1: float, 0 < beta < 1. Generally close to 1.
 79 |         beta_2: float, 0 < beta < 1. Generally close to 1.
 80 |         epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
 81 |         decay: float >= 0. Learning rate decay over each update.
 82 |         amsgrad: boolean. Whether to apply the AMSGrad variant of this
 83 |             algorithm from the paper "On the Convergence of Adam and
 84 |             Beyond".
 85 |     Reference:
 86 |         This optimizer is derived from this paper:
 87 |             Improving Generalization Performance by Switching from Adam to SGD
 88 |             https://arxiv.org/abs/1712.07628
 89 |         An unofficial implementation which inspires this work could be referred
 90 |         here:
 91 |             https://github.com/sloth2012/scalaML
 92 |     """
 93 | 
 94 |     def __init__(self,
 95 |                  lr=0.001,
 96 |                  lr_boost=10.0,
 97 |                  beta_1=0.9,
 98 |                  beta_2=0.999,
 99 |                  epsilon=None,
100 |                  decay=0.,
101 |                  amsgrad=False,
102 |                  **kwargs):
103 |         super(SWATS, self).__init__(**kwargs)
104 |         with K.name_scope(self.__class__.__name__):
105 |             self.iterations = K.variable(0, dtype='int64', name='iterations')
106 |             self.lr = K.variable(lr, name='lr')
107 |             self.beta_1 = K.variable(beta_1, name='beta_1')
108 |             self.beta_2 = K.variable(beta_2, name='beta_2')
109 |             self.decay = K.variable(decay, name='decay')
110 |         if epsilon is None:
111 |             epsilon = K.epsilon()
112 |         self.epsilon = epsilon
113 |         self.initial_decay = decay
114 |         self.amsgrad = amsgrad
115 | 
116 |     def get_updates(self, loss, params):
117 |         grads = self.get_gradients(loss, params)
118 |         self.updates = []
119 | 
120 |         lr = self.lr
121 |         if self.initial_decay > 0:
122 |             lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) )
123 | 
124 |         with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]):
125 |             t = math_ops.cast(self.iterations, K.floatx())
126 |         lr_bc = gen_math_ops.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t))
127 | 
128 |         ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
129 |         vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
130 |         lams = [K.zeros(1, dtype=K.dtype(p)) for p in params]
131 |         conds = [K.variable(False, dtype='bool') for p in params]
132 |         if self.amsgrad:
133 |             vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
134 |         else:
135 |             vhats = [K.zeros(1) for _ in params]
136 |         self.weights = [self.iterations] + ms + vs + vhats + lams + conds
137 | 
138 |         for p, g, m, v, vhat, lam, cond in zip(params, grads, ms, vs, vhats, lams, conds):
139 |             beta_g = m_switch(cond, 1.0, 1.0 - self.beta_1)
140 |             m_t = (self.beta_1 * m) + beta_g * g
141 |             v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g)
142 |             if self.amsgrad:
143 |                 vhat_t = math_ops.maximum(vhat, v_t)
144 |                 p_t_ada = lr_bc * m_t / (gen_math_ops.sqrt(vhat_t) + self.epsilon)
145 |                 self.updates.append(state_ops.assign(vhat, vhat_t))
146 |             else:
147 |                 p_t_ada = lr_bc * m_t / (gen_math_ops.sqrt(v_t) + self.epsilon)
148 |             gamma_den = math_ops.reduce_sum(p_t_ada * g)
149 |             gamma = math_ops.reduce_sum(gen_math_ops.square(p_t_ada)) / (math_ops.abs(gamma_den) + self.epsilon) * (gen_math_ops.sign(gamma_den) + self.epsilon)
150 |             lam_t = (self.beta_2 * lam) + (1. - self.beta_2) * gamma
151 |             lam_prime = lam / (1. - math_ops.pow(self.beta_2, t))
152 |             lam_t_prime = lam_t / (1. - math_ops.pow(self.beta_2, t))
153 |             lg_err = math_ops.abs( lam_t_prime - gamma )
154 |             cond_update = gen_math_ops.logical_or(gen_math_ops.logical_and(gen_math_ops.logical_and( self.iterations > 1,  lg_err < 1e-5 ),   lam_t > 0 ), cond )[0]
155 |             lam_update = m_switch(cond_update, lam, lam_t)
156 |             self.updates.append(state_ops.assign(lam, lam_update))
157 |             self.updates.append(state_ops.assign(cond, cond_update))
158 | 
159 |             p_t_sgd = (1. - self.beta_1) * lam_prime * m_t
160 | 
161 |             self.updates.append(state_ops.assign(m, m_t))
162 |             self.updates.append(state_ops.assign(v, v_t))
163 |             
164 |             new_p = m_switch(cond, p - lr * p_t_sgd, p - lr * p_t_ada)
165 | 
166 |             # Apply constraints.
167 |             if getattr(p, 'constraint', None) is not None:
168 |                 new_p = p.constraint(new_p)
169 | 
170 |             self.updates.append(state_ops.assign(p, new_p))
171 |         return self.updates
172 | 
173 |     def get_config(self):
174 |         config = {
175 |             'lr': float(K.get_value(self.lr)),
176 |             'beta_1': float(K.get_value(self.beta_1)),
177 |             'beta_2': float(K.get_value(self.beta_2)),
178 |             'decay': float(K.get_value(self.decay)),
179 |             'epsilon': self.epsilon,
180 |             'amsgrad': self.amsgrad
181 |         }
182 |         base_config = super(SWATS, self).get_config()
183 |         return dict(list(base_config.items()) + list(config.items()))
184 | 
185 | class Adam2SGD(optimizers.Optimizer):
186 |     """Adam optimizer -> SGD optimizer.
187 |     From Adam optimizer to SGD optimizer.
188 |     This optimizer need users to control the switch point manually. After switching
189 |     to SGD, the momentum from Adam would be retained so the optimizer could switch
190 |     to SGD smoothly. beta_1 would also be applied to SGD for calculating the
191 |     momentum.
192 |     Special tips:
193 |         This optimizer need to be used with
194 |             mdnt.utilities.callbacks.OptimizerSwitcher
195 |         together. That callback would trigger the method `self.switch(True)` and
196 |         notify the optimizer enter the SGD phase. Otherwise, it would stay in
197 |         the Adam/Amsgrad phase. Users could also call `self.switch` manually if
198 |         using `train_on_batch()` to train the model.
199 |     Arguments:
200 |         lr: float >= 0. Learning rate.
201 |         lr_boost: float >=0. Suggest to > 1, because generally SGD optimizer
202 |             requires a larger learning rate than Adam.
203 |         beta_1: float, 0 < beta < 1. Generally close to 1.
204 |         beta_2: float, 0 < beta < 1. Generally close to 1.
205 |         epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
206 |         decay: float >= 0. Learning rate decay over each update.
207 |         amsgrad: boolean. Whether to apply the AMSGrad variant of this
208 |             algorithm from the paper "On the Convergence of Adam and
209 |             Beyond".
210 |         switch_flag: the initial state of the optimizer phase. If set `False`,
211 |             start with Adam/Amsgrad, otherwise start with SGD.
212 |     """
213 | 
214 |     def __init__(self,
215 |                  lr=0.001,
216 |                  lr_boost=10.0,
217 |                  beta_1=0.9,
218 |                  beta_2=0.999,
219 |                  epsilon=None,
220 |                  decay=0.,
221 |                  amsgrad=False,
222 |                  switch_flag=False,
223 |                  **kwargs):
224 |         super(Adam2SGD, self).__init__(**kwargs)
225 |         with K.name_scope(self.__class__.__name__):
226 |             self.iterations = K.variable(0, dtype='int64', name='iterations')
227 |             self.lr = K.variable(lr, name='lr')
228 |             self.beta_1 = K.variable(beta_1, name='beta_1')
229 |             if switch_flag: # using SGD
230 |                 self.beta_g = K.variable(1.0, name='beta_g')
231 |             else: # using Adam
232 |                 self.beta_g = K.variable(1.0 - beta_1, name='beta_g')
233 |             self.beta_2 = K.variable(beta_2, name='beta_2')
234 |             self.decay = K.variable(decay, name='decay')
235 |             self.switch_flag = K.variable(switch_flag, dtype='bool', name='switch_flag')
236 |         if epsilon is None:
237 |             epsilon = K.epsilon()
238 |         self.epsilon = epsilon
239 |         self.initial_decay = decay
240 |         self.amsgrad = amsgrad
241 |         self.lr_boost = lr_boost
242 | 
243 |     def switch(self, switch_flag=None):
244 |         '''
245 |         Switch the phase of the optimizer.
246 |         Arguments:
247 |             switch_flag: if set `True`, use SGD with momentum; Otherwise, use
248 |             Adam/Amsgrad. If set None, it would switch the phase according to
249 |             the current phase.
250 |         '''
251 |         if switch_flag is None:
252 |             switch_flag = not bool(K.get_value(self.switch_flag))
253 |         else:
254 |             switch_flag = bool(switch_flag)
255 |         if switch_flag: # using SGD
256 |             self.beta_g = K.set_value(self.beta_g, 1.0)
257 |         else: # using Adam
258 |             self.beta_g = K.set_value(self.beta_g, 1.0 - K.get_value(self.beta_1))
259 |         K.set_value(self.switch_flag, bool(switch_flag))
260 | 
261 |     def get_updates(self, loss, params):
262 |         grads = self.get_gradients(loss, params)
263 |         self.updates = []
264 | 
265 |         lr = self.lr
266 |         if self.initial_decay > 0:
267 |             lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) )
268 | 
269 |         with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]):
270 |             t = math_ops.cast(self.iterations, K.floatx())
271 |         lr_t = lr * ( gen_math_ops.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t)) )
272 | 
273 |         ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
274 |         vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
275 |         if self.amsgrad:
276 |             vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
277 |         else:
278 |             vhats = [K.zeros(1) for _ in params]
279 |         self.weights = [self.iterations] + ms + vs + vhats
280 | 
281 |         for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
282 |             m_t = (self.beta_1 * m) + self.beta_g * g
283 |             v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g)
284 |             if self.amsgrad:
285 |                 vhat_t = math_ops.maximum(vhat, v_t)
286 |                 p_t_ada = p - lr_t * m_t / (gen_math_ops.sqrt(vhat_t) + self.epsilon)
287 |                 self.updates.append(state_ops.assign(vhat, vhat_t))
288 |             else:
289 |                 p_t_ada = p - lr_t * m_t / (gen_math_ops.sqrt(v_t) + self.epsilon)
290 |             p_t_sgd = p - self.lr_boost * lr * m_t
291 | 
292 |             self.updates.append(state_ops.assign(m, m_t))
293 |             self.updates.append(state_ops.assign(v, v_t))
294 |             
295 |             new_p = m_switch(self.switch_flag, p_t_sgd, p_t_ada)
296 | 
297 |             # Apply constraints.
298 |             if getattr(p, 'constraint', None) is not None:
299 |                 new_p = p.constraint(new_p)
300 | 
301 |             self.updates.append(state_ops.assign(p, new_p))
302 |         return self.updates
303 | 
304 |     def get_config(self):
305 |         config = {
306 |             'lr': float(K.get_value(self.lr)),
307 |             'lr_boost': self.lr_boost,
308 |             'beta_1': float(K.get_value(self.beta_1)),
309 |             'beta_2': float(K.get_value(self.beta_2)),
310 |             'decay': float(K.get_value(self.decay)),
311 |             'epsilon': self.epsilon,
312 |             'amsgrad': self.amsgrad,
313 |             'switch_flag': bool(K.get_value(self.switch_flag))
314 |         }
315 |         base_config = super(Adam2SGD, self).get_config()
316 |         return dict(list(base_config.items()) + list(config.items()))
317 | 
318 | class Nadam2NSGD(optimizers.Optimizer):
319 |     """Nesterov Adam optimizer -> Nesterov SGD optimizer.
320 |     Much like Adam is essentially RMSprop with momentum, Nadam is Adam RMSprop with
321 |     Nesterov momentum. Default parameters follow those provided in the paper.
322 |     It is recommended to leave the parameters of this optimizer at their default 
323 |     values.
324 |     This optimizer need users to control the switch point manually. After switching
325 |     to SGD, the momentum from Adam would be retained so the optimizer could switch
326 |     to SGD smoothly. beta_1 would also be applied to SGD for calculating the
327 |     momentum.
328 |     Special tips:
329 |         This optimizer need to be used with
330 |             mdnt.utilities.callbacks.OptimizerSwitcher
331 |         together. That callback would trigger the method `self.switch(True)` and
332 |         notify the optimizer enter the NSGD phase. Otherwise, it would stay in
333 |         the Nadam/Namsgrad phase. Users could also call `self.switch` manually if 
334 |         using `train_on_batch()` to train the model.
335 |     Arguments:
336 |         lr: float >= 0. Learning rate.
337 |         lr_boost: float >=0. Suggest to > 1, because generally SGD optimizer
338 |             requires a larger learning rate than Adam.
339 |         beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1.
340 |         epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
341 |         decay: float >= 0. Learning rate decay over each update.
342 |         amsgrad: boolean. Whether to apply the AMSGrad variant of this
343 |             algorithm from the paper "On the Convergence of Adam and Beyond".
344 |         switch_flag: the initial state of the optimizer phase. If set `False`,
345 |             start with Nadam/Namsgrad, otherwise start with NSGD.
346 |     """
347 | 
348 |     def __init__(self,
349 |                  lr=0.002,
350 |                  lr_boost=10.0,
351 |                  beta_1=0.9,
352 |                  beta_2=0.999,
353 |                  epsilon=None,
354 |                  decay=0.,
355 |                  schedule_decay=0.004,
356 |                  amsgrad=False,
357 |                  switch_flag=False,
358 |                  **kwargs):
359 |         super(Nadam2NSGD, self).__init__(**kwargs)
360 |         with K.name_scope(self.__class__.__name__):
361 |             self.iterations = K.variable(0, dtype='int64', name='iterations')
362 |             self.m_schedule = K.variable(1., name='m_schedule')
363 |             self.lr = K.variable(lr, name='lr')
364 |             self.beta_1 = K.variable(beta_1, name='beta_1')
365 |             if switch_flag: # using NSGD
366 |                 self.beta_g = K.variable(1.0, name='beta_g')
367 |             else: # using Nadam
368 |                 self.beta_g = K.variable(1.0 - beta_1, name='beta_g')
369 |             self.beta_2 = K.variable(beta_2, name='beta_2')
370 |             self.decay = K.variable(decay, name='decay')
371 |             self.switch_flag = K.variable(switch_flag, dtype='bool', name='switch_flag')
372 |         if epsilon is None:
373 |             epsilon = K.epsilon()
374 |         self.epsilon = epsilon
375 |         self.initial_decay = decay
376 |         self.schedule_decay = schedule_decay
377 |         self.amsgrad = amsgrad
378 |         self.lr_boost = lr_boost
379 | 
380 |     def switch(self, switch_flag=None):
381 |         '''
382 |         Switch the phase of the optimizer.
383 |         Arguments:
384 |             switch_flag: if set `True`, use SGD with nesterov momentum; Otherwise,
385 |             use NAdam/NAmsgrad. If set None, it would switch the phase according to
386 |             the current phase.
387 |         '''
388 |         if switch_flag is None:
389 |             switch_flag = not bool(K.get_value(self.switch_flag))
390 |         else:
391 |             switch_flag = bool(switch_flag)
392 |         if switch_flag: # using NSGD
393 |             self.beta_g = K.set_value(self.beta_g, 1.0)
394 |         else: # using Nadam
395 |             self.beta_g = K.set_value(self.beta_g, 1.0 - K.get_value(self.beta_1))
396 |         K.set_value(self.switch_flag, bool(switch_flag))
397 | 
398 |     def get_updates(self, loss, params):
399 |         grads = self.get_gradients(loss, params)
400 |         self.updates = [state_ops.assign_add(self.iterations, 1)]
401 | 
402 |         lr = self.lr
403 |         if self.initial_decay > 0:
404 |             lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) )
405 | 
406 |         t = math_ops.cast(self.iterations, K.floatx()) + 1
407 | 
408 |         # Due to the recommendations in [2], i.e. warming momentum schedule
409 |         momentum_cache_t = self.beta_1 * (
410 |             1. - 0.5 *
411 |             (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
412 |         momentum_cache_t_1 = self.beta_1 * (
413 |             1. - 0.5 *
414 |             (math_ops.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay)))
415 |         m_schedule_new = self.m_schedule * momentum_cache_t
416 |         m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
417 |         self.updates.append((self.m_schedule, m_schedule_new))
418 | 
419 |         ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
420 |         vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
421 |         if self.amsgrad:
422 |             vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
423 |         else:
424 |             vhats = [K.zeros(1) for _ in params]
425 | 
426 |         self.weights = [self.iterations] + ms + vs + vhats
427 | 
428 |         for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
429 |             # the following equations given in [1]
430 |             g_prime = g / (1. - m_schedule_new)
431 |             m_t = self.beta_1 * m + self.beta_g * g
432 |             m_t_prime = m_t / (1. - m_schedule_next)
433 |             v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g)
434 |             if self.amsgrad:
435 |                 vhat_t = math_ops.maximum(vhat, v_t)
436 |                 self.updates.append(state_ops.assign(vhat, vhat_t))
437 |                 v_t_prime = vhat_t / (1. - math_ops.pow(self.beta_2, t))
438 |             else:
439 |                 v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t))
440 |             m_t_bar = (self.beta_g / (1.-self.beta_1)) * (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime
441 | 
442 |             self.updates.append(state_ops.assign(m, m_t))
443 |             self.updates.append(state_ops.assign(v, v_t))
444 | 
445 |             p_t_ada = p - lr * m_t_bar / (gen_math_ops.sqrt(v_t_prime) + self.epsilon)
446 |             p_t_sgd = p - self.lr_boost * lr * m_t_bar
447 |             
448 |             new_p = m_switch(self.switch_flag, p_t_sgd, p_t_ada)
449 | 
450 |             # Apply constraints.
451 |             if getattr(p, 'constraint', None) is not None:
452 |                 new_p = p.constraint(new_p)
453 | 
454 |             self.updates.append(state_ops.assign(p, new_p))
455 |         return self.updates
456 | 
457 |     def get_config(self):
458 |         config = {
459 |             'lr': float(K.get_value(self.lr)),
460 |             'lr_boost': self.lr_boost,
461 |             'beta_1': float(K.get_value(self.beta_1)),
462 |             'beta_2': float(K.get_value(self.beta_2)),
463 |             'epsilon': self.epsilon,
464 |             'decay': float(K.get_value(self.decay)),
465 |             'schedule_decay': self.schedule_decay,
466 |             'amsgrad': self.amsgrad,
467 |             'switch_flag': bool(K.get_value(self.switch_flag))
468 |         }
469 |         base_config = super(Nadam2NSGD, self).get_config()
470 |         return dict(list(base_config.items()) + list(config.items()))


--------------------------------------------------------------------------------