├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── keras_applications ├── __init__.py ├── densenet.py ├── efficientnet.py ├── imagenet_utils.py ├── inception_resnet_v2.py ├── inception_v3.py ├── mobilenet.py ├── mobilenet_v2.py ├── mobilenet_v3.py ├── nasnet.py ├── resnet.py ├── resnet50.py ├── resnet_common.py ├── resnet_v2.py ├── resnext.py ├── vgg16.py ├── vgg19.py └── xception.py ├── pytest.ini ├── setup.py └── tests ├── applications_test.py ├── data └── elephant.jpg └── imagenet_utils_test.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.pyc 3 | temp/* 4 | dist/* 5 | build/* 6 | tags 7 | Keras_Applications.egg-info 8 | 9 | # test-related 10 | .coverage 11 | .cache 12 | .pytest_cache 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | dist: trusty 3 | language: python 4 | matrix: 5 | include: 6 | - python: 2.7 7 | env: KERAS_BACKEND=tensorflow TEST_MODE=PEP8 8 | - python: 2.7 9 | env: KERAS_BACKEND=tensorflow 10 | - python: 2.7 11 | env: KERAS_BACKEND=tensorflow KERAS_HEAD=true 12 | - python: 3.6 13 | env: KERAS_BACKEND=tensorflow 14 | - python: 2.7 15 | env: KERAS_BACKEND=theano KERAS_HEAD=true THEANO_FLAGS=optimizer=fast_compile 16 | - python: 3.6 17 | env: KERAS_BACKEND=theano THEANO_FLAGS=optimizer=fast_compile 18 | - python: 2.7 19 | env: KERAS_BACKEND=cntk KERAS_HEAD=true PYTHONWARNINGS=ignore 20 | - python: 3.6 21 | env: KERAS_BACKEND=cntk PYTHONWARNINGS=ignore 22 | install: 23 | # code below is taken from http://conda.pydata.org/docs/travis.html 24 | # We do this conditionally because it saves us some downloading if the 25 | # version is the same. 26 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 27 | wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; 28 | else 29 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 30 | fi 31 | - bash miniconda.sh -b -p $HOME/miniconda 32 | - export PATH="$HOME/miniconda/bin:$PATH" 33 | - hash -r 34 | - conda config --set always_yes yes --set changeps1 no 35 | - conda update -q conda 36 | # Useful for debugging any issues with conda 37 | - conda info -a 38 | 39 | - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION pytest pandas 40 | - source activate test-environment 41 | - pip install --only-binary=numpy,scipy numpy nose scipy matplotlib h5py theano keras==2.2.4 42 | - conda install mkl mkl-service 43 | 44 | # set library path 45 | - export LD_LIBRARY_PATH=$HOME/miniconda/envs/test-environment/lib/:$LD_LIBRARY_PATH 46 | 47 | # install PIL 48 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 49 | conda install pil; 50 | elif [[ "$TRAVIS_PYTHON_VERSION" == "3.6" ]]; then 51 | conda install Pillow; 52 | fi 53 | 54 | #- if [[ $KERAS_HEAD == "true" ]]; then 55 | # pip install --no-deps git+https://github.com/keras-team/keras.git; 56 | # fi 57 | - pip install -e .[tests] 58 | 59 | # install TensorFlow (CPU version). 60 | - pip install tensorflow==1.9 61 | 62 | # install cntk 63 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 64 | pip install https://cntk.ai/PythonWheel/CPU-Only/cntk-2.3.1-cp27-cp27mu-linux_x86_64.whl; 65 | elif [[ "$TRAVIS_PYTHON_VERSION" == "3.6" ]]; then 66 | pip install https://cntk.ai/PythonWheel/CPU-Only/cntk-2.3.1-cp36-cp36m-linux_x86_64.whl; 67 | fi 68 | 69 | # install pydot for visualization tests 70 | - conda install pydot graphviz 71 | 72 | # detect one of markdown files is changed or not 73 | - export DOC_ONLY_CHANGED=False; 74 | - if [ $(git diff --name-only HEAD~1 | wc -l) == "1" ] && [[ "$(git diff --name-only HEAD~1)" == *"md" ]]; then 75 | export DOC_ONLY_CHANGED=True; 76 | fi 77 | 78 | #install open mpi 79 | - rm -rf ~/mpi 80 | - mkdir ~/mpi 81 | - pushd ~/mpi 82 | - wget http://cntk.ai/PythonWheel/ForKeras/depends/openmpi_1.10-3.zip 83 | - unzip ./openmpi_1.10-3.zip 84 | - sudo dpkg -i openmpi_1.10-3.deb 85 | - popd 86 | 87 | # command to run tests 88 | script: 89 | - export MKL_THREADING_LAYER="GNU" 90 | # run keras backend init to initialize backend config 91 | - python -c "import keras.backend" 92 | # create models directory to avoid concurrent directory creation at runtime 93 | - mkdir ~/.keras/models 94 | # set up keras backend 95 | - sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json; 96 | - echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)" 97 | - if [[ "$DOC_ONLY_CHANGED" == "False" ]]; then 98 | if [[ "$TEST_MODE" == "PEP8" ]]; then 99 | PYTHONPATH=$PWD:$PYTHONPATH py.test --pep8 -m pep8 -n0; 100 | else 101 | PYTHONPATH=$PWD:$PYTHONPATH py.test tests/ --cov-config .coveragerc --cov=keras_applications tests/; 102 | fi; 103 | fi 104 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keras-team/keras-applications/06fbeb0f16e1304f239b2296578d1c50b15a983a/CONTRIBUTING.md -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | ### Summary 18 | 19 | ### Environment 20 | - Python version: 21 | - Keras version: 22 | - Keras-applications version: 23 | - Keras backend with version: 24 | 25 | ### Logs or source codes for reproduction 26 | 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | COPYRIGHT 2 | 3 | Copyright (c) 2016 - 2018, the respective contributors. 4 | All rights reserved. 5 | 6 | Each contributor holds copyright over their respective contributions. 7 | The project versioning (Git) records all such contribution source information. 8 | The initial code of this repository came from https://github.com/keras-team/keras 9 | (the Keras repository), hence, for author information regarding commits 10 | that occured earlier than the first commit in the present repository, 11 | please see the original Keras repository. 12 | 13 | LICENSE 14 | 15 | The MIT License (MIT) 16 | 17 | Permission is hereby granted, free of charge, to any person obtaining a copy 18 | of this software and associated documentation files (the "Software"), to deal 19 | in the Software without restriction, including without limitation the rights 20 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 21 | copies of the Software, and to permit persons to whom the Software is 22 | furnished to do so, subject to the following conditions: 23 | 24 | The above copyright notice and this permission notice shall be included in all 25 | copies or substantial portions of the Software. 26 | 27 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 28 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 29 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 30 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 31 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 32 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 | SOFTWARE. 34 | 35 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | include CONTRIBUTING.md 4 | graft tests 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Keras Applications 2 | 3 | ⚠️ This GitHub repository is now deprecated -- All Keras Applications models have moved into the core Keras repository and the TensorFlow pip package. All code changes and discussion should move to the Keras repository. 4 | 5 | For users looking for a place to start using premade models, consult the [Keras API documentation](https://keras.io/api/applications/). 6 | -------------------------------------------------------------------------------- /keras_applications/__init__.py: -------------------------------------------------------------------------------- 1 | """Enables dynamic setting of underlying Keras module. 2 | """ 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | _KERAS_BACKEND = None 8 | _KERAS_LAYERS = None 9 | _KERAS_MODELS = None 10 | _KERAS_UTILS = None 11 | 12 | 13 | def get_submodules_from_kwargs(kwargs): 14 | backend = kwargs.get('backend', _KERAS_BACKEND) 15 | layers = kwargs.get('layers', _KERAS_LAYERS) 16 | models = kwargs.get('models', _KERAS_MODELS) 17 | utils = kwargs.get('utils', _KERAS_UTILS) 18 | for key in kwargs.keys(): 19 | if key not in ['backend', 'layers', 'models', 'utils']: 20 | raise TypeError('Invalid keyword argument: %s', key) 21 | return backend, layers, models, utils 22 | 23 | 24 | def correct_pad(backend, inputs, kernel_size): 25 | """Returns a tuple for zero-padding for 2D convolution with downsampling. 26 | 27 | # Arguments 28 | input_size: An integer or tuple/list of 2 integers. 29 | kernel_size: An integer or tuple/list of 2 integers. 30 | 31 | # Returns 32 | A tuple. 33 | """ 34 | img_dim = 2 if backend.image_data_format() == 'channels_first' else 1 35 | input_size = backend.int_shape(inputs)[img_dim:(img_dim + 2)] 36 | 37 | if isinstance(kernel_size, int): 38 | kernel_size = (kernel_size, kernel_size) 39 | 40 | if input_size[0] is None: 41 | adjust = (1, 1) 42 | else: 43 | adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) 44 | 45 | correct = (kernel_size[0] // 2, kernel_size[1] // 2) 46 | 47 | return ((correct[0] - adjust[0], correct[0]), 48 | (correct[1] - adjust[1], correct[1])) 49 | 50 | __version__ = '1.0.8' 51 | 52 | 53 | from . import vgg16 54 | from . import vgg19 55 | from . import inception_v3 56 | from . import inception_resnet_v2 57 | from . import xception 58 | from . import mobilenet 59 | from . import mobilenet_v2 60 | from . import mobilenet_v3 61 | from . import densenet 62 | from . import nasnet 63 | from . import resnet 64 | from . import resnet_v2 65 | from . import resnext 66 | from . import efficientnet 67 | -------------------------------------------------------------------------------- /keras_applications/densenet.py: -------------------------------------------------------------------------------- 1 | """DenseNet models for Keras. 2 | 3 | # Reference paper 4 | 5 | - [Densely Connected Convolutional Networks] 6 | (https://arxiv.org/abs/1608.06993) (CVPR 2017 Best Paper Award) 7 | 8 | # Reference implementation 9 | 10 | - [Torch DenseNets] 11 | (https://github.com/liuzhuang13/DenseNet/blob/master/models/densenet.lua) 12 | - [TensorNets] 13 | (https://github.com/taehoonlee/tensornets/blob/master/tensornets/densenets.py) 14 | """ 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import os 20 | 21 | from . import get_submodules_from_kwargs 22 | from . import imagenet_utils 23 | from .imagenet_utils import decode_predictions 24 | from .imagenet_utils import _obtain_input_shape 25 | 26 | 27 | BASE_WEIGTHS_PATH = ( 28 | 'https://github.com/keras-team/keras-applications/' 29 | 'releases/download/densenet/') 30 | DENSENET121_WEIGHT_PATH = ( 31 | BASE_WEIGTHS_PATH + 32 | 'densenet121_weights_tf_dim_ordering_tf_kernels.h5') 33 | DENSENET121_WEIGHT_PATH_NO_TOP = ( 34 | BASE_WEIGTHS_PATH + 35 | 'densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5') 36 | DENSENET169_WEIGHT_PATH = ( 37 | BASE_WEIGTHS_PATH + 38 | 'densenet169_weights_tf_dim_ordering_tf_kernels.h5') 39 | DENSENET169_WEIGHT_PATH_NO_TOP = ( 40 | BASE_WEIGTHS_PATH + 41 | 'densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5') 42 | DENSENET201_WEIGHT_PATH = ( 43 | BASE_WEIGTHS_PATH + 44 | 'densenet201_weights_tf_dim_ordering_tf_kernels.h5') 45 | DENSENET201_WEIGHT_PATH_NO_TOP = ( 46 | BASE_WEIGTHS_PATH + 47 | 'densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5') 48 | 49 | backend = None 50 | layers = None 51 | models = None 52 | keras_utils = None 53 | 54 | 55 | def dense_block(x, blocks, name): 56 | """A dense block. 57 | 58 | # Arguments 59 | x: input tensor. 60 | blocks: integer, the number of building blocks. 61 | name: string, block label. 62 | 63 | # Returns 64 | output tensor for the block. 65 | """ 66 | for i in range(blocks): 67 | x = conv_block(x, 32, name=name + '_block' + str(i + 1)) 68 | return x 69 | 70 | 71 | def transition_block(x, reduction, name): 72 | """A transition block. 73 | 74 | # Arguments 75 | x: input tensor. 76 | reduction: float, compression rate at transition layers. 77 | name: string, block label. 78 | 79 | # Returns 80 | output tensor for the block. 81 | """ 82 | bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 83 | x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, 84 | name=name + '_bn')(x) 85 | x = layers.Activation('relu', name=name + '_relu')(x) 86 | x = layers.Conv2D(int(backend.int_shape(x)[bn_axis] * reduction), 1, 87 | use_bias=False, 88 | name=name + '_conv')(x) 89 | x = layers.AveragePooling2D(2, strides=2, name=name + '_pool')(x) 90 | return x 91 | 92 | 93 | def conv_block(x, growth_rate, name): 94 | """A building block for a dense block. 95 | 96 | # Arguments 97 | x: input tensor. 98 | growth_rate: float, growth rate at dense layers. 99 | name: string, block label. 100 | 101 | # Returns 102 | Output tensor for the block. 103 | """ 104 | bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 105 | x1 = layers.BatchNormalization(axis=bn_axis, 106 | epsilon=1.001e-5, 107 | name=name + '_0_bn')(x) 108 | x1 = layers.Activation('relu', name=name + '_0_relu')(x1) 109 | x1 = layers.Conv2D(4 * growth_rate, 1, 110 | use_bias=False, 111 | name=name + '_1_conv')(x1) 112 | x1 = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, 113 | name=name + '_1_bn')(x1) 114 | x1 = layers.Activation('relu', name=name + '_1_relu')(x1) 115 | x1 = layers.Conv2D(growth_rate, 3, 116 | padding='same', 117 | use_bias=False, 118 | name=name + '_2_conv')(x1) 119 | x = layers.Concatenate(axis=bn_axis, name=name + '_concat')([x, x1]) 120 | return x 121 | 122 | 123 | def DenseNet(blocks, 124 | include_top=True, 125 | weights='imagenet', 126 | input_tensor=None, 127 | input_shape=None, 128 | pooling=None, 129 | classes=1000, 130 | **kwargs): 131 | """Instantiates the DenseNet architecture. 132 | 133 | Optionally loads weights pre-trained on ImageNet. 134 | Note that the data format convention used by the model is 135 | the one specified in your Keras config at `~/.keras/keras.json`. 136 | 137 | # Arguments 138 | blocks: numbers of building blocks for the four dense layers. 139 | include_top: whether to include the fully-connected 140 | layer at the top of the network. 141 | weights: one of `None` (random initialization), 142 | 'imagenet' (pre-training on ImageNet), 143 | or the path to the weights file to be loaded. 144 | input_tensor: optional Keras tensor 145 | (i.e. output of `layers.Input()`) 146 | to use as image input for the model. 147 | input_shape: optional shape tuple, only to be specified 148 | if `include_top` is False (otherwise the input shape 149 | has to be `(224, 224, 3)` (with `'channels_last'` data format) 150 | or `(3, 224, 224)` (with `'channels_first'` data format). 151 | It should have exactly 3 inputs channels, 152 | and width and height should be no smaller than 32. 153 | E.g. `(200, 200, 3)` would be one valid value. 154 | pooling: optional pooling mode for feature extraction 155 | when `include_top` is `False`. 156 | - `None` means that the output of the model will be 157 | the 4D tensor output of the 158 | last convolutional block. 159 | - `avg` means that global average pooling 160 | will be applied to the output of the 161 | last convolutional block, and thus 162 | the output of the model will be a 2D tensor. 163 | - `max` means that global max pooling will 164 | be applied. 165 | classes: optional number of classes to classify images 166 | into, only to be specified if `include_top` is True, and 167 | if no `weights` argument is specified. 168 | 169 | # Returns 170 | A Keras model instance. 171 | 172 | # Raises 173 | ValueError: in case of invalid argument for `weights`, 174 | or invalid input shape. 175 | """ 176 | global backend, layers, models, keras_utils 177 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) 178 | 179 | if not (weights in {'imagenet', None} or os.path.exists(weights)): 180 | raise ValueError('The `weights` argument should be either ' 181 | '`None` (random initialization), `imagenet` ' 182 | '(pre-training on ImageNet), ' 183 | 'or the path to the weights file to be loaded.') 184 | 185 | if weights == 'imagenet' and include_top and classes != 1000: 186 | raise ValueError('If using `weights` as `"imagenet"` with `include_top`' 187 | ' as true, `classes` should be 1000') 188 | 189 | # Determine proper input shape 190 | input_shape = _obtain_input_shape(input_shape, 191 | default_size=224, 192 | min_size=32, 193 | data_format=backend.image_data_format(), 194 | require_flatten=include_top, 195 | weights=weights) 196 | 197 | if input_tensor is None: 198 | img_input = layers.Input(shape=input_shape) 199 | else: 200 | if not backend.is_keras_tensor(input_tensor): 201 | img_input = layers.Input(tensor=input_tensor, shape=input_shape) 202 | else: 203 | img_input = input_tensor 204 | 205 | bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 206 | 207 | x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)))(img_input) 208 | x = layers.Conv2D(64, 7, strides=2, use_bias=False, name='conv1/conv')(x) 209 | x = layers.BatchNormalization( 210 | axis=bn_axis, epsilon=1.001e-5, name='conv1/bn')(x) 211 | x = layers.Activation('relu', name='conv1/relu')(x) 212 | x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)))(x) 213 | x = layers.MaxPooling2D(3, strides=2, name='pool1')(x) 214 | 215 | x = dense_block(x, blocks[0], name='conv2') 216 | x = transition_block(x, 0.5, name='pool2') 217 | x = dense_block(x, blocks[1], name='conv3') 218 | x = transition_block(x, 0.5, name='pool3') 219 | x = dense_block(x, blocks[2], name='conv4') 220 | x = transition_block(x, 0.5, name='pool4') 221 | x = dense_block(x, blocks[3], name='conv5') 222 | 223 | x = layers.BatchNormalization( 224 | axis=bn_axis, epsilon=1.001e-5, name='bn')(x) 225 | x = layers.Activation('relu', name='relu')(x) 226 | 227 | if include_top: 228 | x = layers.GlobalAveragePooling2D(name='avg_pool')(x) 229 | x = layers.Dense(classes, activation='softmax', name='fc1000')(x) 230 | else: 231 | if pooling == 'avg': 232 | x = layers.GlobalAveragePooling2D(name='avg_pool')(x) 233 | elif pooling == 'max': 234 | x = layers.GlobalMaxPooling2D(name='max_pool')(x) 235 | 236 | # Ensure that the model takes into account 237 | # any potential predecessors of `input_tensor`. 238 | if input_tensor is not None: 239 | inputs = keras_utils.get_source_inputs(input_tensor) 240 | else: 241 | inputs = img_input 242 | 243 | # Create model. 244 | if blocks == [6, 12, 24, 16]: 245 | model = models.Model(inputs, x, name='densenet121') 246 | elif blocks == [6, 12, 32, 32]: 247 | model = models.Model(inputs, x, name='densenet169') 248 | elif blocks == [6, 12, 48, 32]: 249 | model = models.Model(inputs, x, name='densenet201') 250 | else: 251 | model = models.Model(inputs, x, name='densenet') 252 | 253 | # Load weights. 254 | if weights == 'imagenet': 255 | if include_top: 256 | if blocks == [6, 12, 24, 16]: 257 | weights_path = keras_utils.get_file( 258 | 'densenet121_weights_tf_dim_ordering_tf_kernels.h5', 259 | DENSENET121_WEIGHT_PATH, 260 | cache_subdir='models', 261 | file_hash='9d60b8095a5708f2dcce2bca79d332c7') 262 | elif blocks == [6, 12, 32, 32]: 263 | weights_path = keras_utils.get_file( 264 | 'densenet169_weights_tf_dim_ordering_tf_kernels.h5', 265 | DENSENET169_WEIGHT_PATH, 266 | cache_subdir='models', 267 | file_hash='d699b8f76981ab1b30698df4c175e90b') 268 | elif blocks == [6, 12, 48, 32]: 269 | weights_path = keras_utils.get_file( 270 | 'densenet201_weights_tf_dim_ordering_tf_kernels.h5', 271 | DENSENET201_WEIGHT_PATH, 272 | cache_subdir='models', 273 | file_hash='1ceb130c1ea1b78c3bf6114dbdfd8807') 274 | else: 275 | if blocks == [6, 12, 24, 16]: 276 | weights_path = keras_utils.get_file( 277 | 'densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5', 278 | DENSENET121_WEIGHT_PATH_NO_TOP, 279 | cache_subdir='models', 280 | file_hash='30ee3e1110167f948a6b9946edeeb738') 281 | elif blocks == [6, 12, 32, 32]: 282 | weights_path = keras_utils.get_file( 283 | 'densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5', 284 | DENSENET169_WEIGHT_PATH_NO_TOP, 285 | cache_subdir='models', 286 | file_hash='b8c4d4c20dd625c148057b9ff1c1176b') 287 | elif blocks == [6, 12, 48, 32]: 288 | weights_path = keras_utils.get_file( 289 | 'densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5', 290 | DENSENET201_WEIGHT_PATH_NO_TOP, 291 | cache_subdir='models', 292 | file_hash='c13680b51ded0fb44dff2d8f86ac8bb1') 293 | model.load_weights(weights_path) 294 | elif weights is not None: 295 | model.load_weights(weights) 296 | 297 | return model 298 | 299 | 300 | def DenseNet121(include_top=True, 301 | weights='imagenet', 302 | input_tensor=None, 303 | input_shape=None, 304 | pooling=None, 305 | classes=1000, 306 | **kwargs): 307 | return DenseNet([6, 12, 24, 16], 308 | include_top, weights, 309 | input_tensor, input_shape, 310 | pooling, classes, 311 | **kwargs) 312 | 313 | 314 | def DenseNet169(include_top=True, 315 | weights='imagenet', 316 | input_tensor=None, 317 | input_shape=None, 318 | pooling=None, 319 | classes=1000, 320 | **kwargs): 321 | return DenseNet([6, 12, 32, 32], 322 | include_top, weights, 323 | input_tensor, input_shape, 324 | pooling, classes, 325 | **kwargs) 326 | 327 | 328 | def DenseNet201(include_top=True, 329 | weights='imagenet', 330 | input_tensor=None, 331 | input_shape=None, 332 | pooling=None, 333 | classes=1000, 334 | **kwargs): 335 | return DenseNet([6, 12, 48, 32], 336 | include_top, weights, 337 | input_tensor, input_shape, 338 | pooling, classes, 339 | **kwargs) 340 | 341 | 342 | def preprocess_input(x, data_format=None, **kwargs): 343 | """Preprocesses a numpy array encoding a batch of images. 344 | 345 | # Arguments 346 | x: a 3D or 4D numpy array consists of RGB values within [0, 255]. 347 | data_format: data format of the image tensor. 348 | 349 | # Returns 350 | Preprocessed array. 351 | """ 352 | return imagenet_utils.preprocess_input(x, data_format, 353 | mode='torch', **kwargs) 354 | 355 | 356 | setattr(DenseNet121, '__doc__', DenseNet.__doc__) 357 | setattr(DenseNet169, '__doc__', DenseNet.__doc__) 358 | setattr(DenseNet201, '__doc__', DenseNet.__doc__) 359 | -------------------------------------------------------------------------------- /keras_applications/efficientnet.py: -------------------------------------------------------------------------------- 1 | """EfficientNet models for Keras. 2 | 3 | # Reference paper 4 | 5 | - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks] 6 | (https://arxiv.org/abs/1905.11946) (ICML 2019) 7 | 8 | # Reference implementation 9 | 10 | - [TensorFlow] 11 | (https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet) 12 | """ 13 | from __future__ import absolute_import 14 | from __future__ import division 15 | from __future__ import print_function 16 | 17 | import os 18 | import math 19 | 20 | from . import correct_pad 21 | from . import get_submodules_from_kwargs 22 | from . import imagenet_utils 23 | from .imagenet_utils import decode_predictions 24 | from .imagenet_utils import _obtain_input_shape 25 | 26 | 27 | backend = None 28 | layers = None 29 | models = None 30 | keras_utils = None 31 | 32 | 33 | BASE_WEIGHTS_PATH = ( 34 | 'https://github.com/Callidior/keras-applications/' 35 | 'releases/download/efficientnet/') 36 | WEIGHTS_HASHES = { 37 | 'b0': ('e9e877068bd0af75e0a36691e03c072c', 38 | '345255ed8048c2f22c793070a9c1a130'), 39 | 'b1': ('8f83b9aecab222a9a2480219843049a1', 40 | 'b20160ab7b79b7a92897fcb33d52cc61'), 41 | 'b2': ('b6185fdcd190285d516936c09dceeaa4', 42 | 'c6e46333e8cddfa702f4d8b8b6340d70'), 43 | 'b3': ('b2db0f8aac7c553657abb2cb46dcbfbb', 44 | 'e0cf8654fad9d3625190e30d70d0c17d'), 45 | 'b4': ('ab314d28135fe552e2f9312b31da6926', 46 | 'b46702e4754d2022d62897e0618edc7b'), 47 | 'b5': ('8d60b903aff50b09c6acf8eaba098e09', 48 | '0a839ac36e46552a881f2975aaab442f'), 49 | 'b6': ('a967457886eac4f5ab44139bdd827920', 50 | '375a35c17ef70d46f9c664b03b4437f2'), 51 | 'b7': ('e964fd6e26e9a4c144bcb811f2a10f20', 52 | 'd55674cc46b805f4382d18bc08ed43c1') 53 | } 54 | 55 | 56 | DEFAULT_BLOCKS_ARGS = [ 57 | {'kernel_size': 3, 'repeats': 1, 'filters_in': 32, 'filters_out': 16, 58 | 'expand_ratio': 1, 'id_skip': True, 'strides': 1, 'se_ratio': 0.25}, 59 | {'kernel_size': 3, 'repeats': 2, 'filters_in': 16, 'filters_out': 24, 60 | 'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25}, 61 | {'kernel_size': 5, 'repeats': 2, 'filters_in': 24, 'filters_out': 40, 62 | 'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25}, 63 | {'kernel_size': 3, 'repeats': 3, 'filters_in': 40, 'filters_out': 80, 64 | 'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25}, 65 | {'kernel_size': 5, 'repeats': 3, 'filters_in': 80, 'filters_out': 112, 66 | 'expand_ratio': 6, 'id_skip': True, 'strides': 1, 'se_ratio': 0.25}, 67 | {'kernel_size': 5, 'repeats': 4, 'filters_in': 112, 'filters_out': 192, 68 | 'expand_ratio': 6, 'id_skip': True, 'strides': 2, 'se_ratio': 0.25}, 69 | {'kernel_size': 3, 'repeats': 1, 'filters_in': 192, 'filters_out': 320, 70 | 'expand_ratio': 6, 'id_skip': True, 'strides': 1, 'se_ratio': 0.25} 71 | ] 72 | 73 | CONV_KERNEL_INITIALIZER = { 74 | 'class_name': 'VarianceScaling', 75 | 'config': { 76 | 'scale': 2.0, 77 | 'mode': 'fan_out', 78 | # EfficientNet actually uses an untruncated normal distribution for 79 | # initializing conv layers, but keras.initializers.VarianceScaling use 80 | # a truncated distribution. 81 | # We decided against a custom initializer for better serializability. 82 | 'distribution': 'normal' 83 | } 84 | } 85 | 86 | DENSE_KERNEL_INITIALIZER = { 87 | 'class_name': 'VarianceScaling', 88 | 'config': { 89 | 'scale': 1. / 3., 90 | 'mode': 'fan_out', 91 | 'distribution': 'uniform' 92 | } 93 | } 94 | 95 | 96 | def swish(x): 97 | """Swish activation function. 98 | 99 | # Arguments 100 | x: Input tensor. 101 | 102 | # Returns 103 | The Swish activation: `x * sigmoid(x)`. 104 | 105 | # References 106 | [Searching for Activation Functions](https://arxiv.org/abs/1710.05941) 107 | """ 108 | if backend.backend() == 'tensorflow': 109 | try: 110 | # The native TF implementation has a more 111 | # memory-efficient gradient implementation 112 | return backend.tf.nn.swish(x) 113 | except AttributeError: 114 | pass 115 | 116 | return x * backend.sigmoid(x) 117 | 118 | 119 | def block(inputs, activation_fn=swish, drop_rate=0., name='', 120 | filters_in=32, filters_out=16, kernel_size=3, strides=1, 121 | expand_ratio=1, se_ratio=0., id_skip=True): 122 | """A mobile inverted residual block. 123 | 124 | # Arguments 125 | inputs: input tensor. 126 | activation_fn: activation function. 127 | drop_rate: float between 0 and 1, fraction of the input units to drop. 128 | name: string, block label. 129 | filters_in: integer, the number of input filters. 130 | filters_out: integer, the number of output filters. 131 | kernel_size: integer, the dimension of the convolution window. 132 | strides: integer, the stride of the convolution. 133 | expand_ratio: integer, scaling coefficient for the input filters. 134 | se_ratio: float between 0 and 1, fraction to squeeze the input filters. 135 | id_skip: boolean. 136 | 137 | # Returns 138 | output tensor for the block. 139 | """ 140 | bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 141 | 142 | # Expansion phase 143 | filters = filters_in * expand_ratio 144 | if expand_ratio != 1: 145 | x = layers.Conv2D(filters, 1, 146 | padding='same', 147 | use_bias=False, 148 | kernel_initializer=CONV_KERNEL_INITIALIZER, 149 | name=name + 'expand_conv')(inputs) 150 | x = layers.BatchNormalization(axis=bn_axis, name=name + 'expand_bn')(x) 151 | x = layers.Activation(activation_fn, name=name + 'expand_activation')(x) 152 | else: 153 | x = inputs 154 | 155 | # Depthwise Convolution 156 | if strides == 2: 157 | x = layers.ZeroPadding2D(padding=correct_pad(backend, x, kernel_size), 158 | name=name + 'dwconv_pad')(x) 159 | conv_pad = 'valid' 160 | else: 161 | conv_pad = 'same' 162 | x = layers.DepthwiseConv2D(kernel_size, 163 | strides=strides, 164 | padding=conv_pad, 165 | use_bias=False, 166 | depthwise_initializer=CONV_KERNEL_INITIALIZER, 167 | name=name + 'dwconv')(x) 168 | x = layers.BatchNormalization(axis=bn_axis, name=name + 'bn')(x) 169 | x = layers.Activation(activation_fn, name=name + 'activation')(x) 170 | 171 | # Squeeze and Excitation phase 172 | if 0 < se_ratio <= 1: 173 | filters_se = max(1, int(filters_in * se_ratio)) 174 | se = layers.GlobalAveragePooling2D(name=name + 'se_squeeze')(x) 175 | if bn_axis == 1: 176 | se = layers.Reshape((filters, 1, 1), name=name + 'se_reshape')(se) 177 | else: 178 | se = layers.Reshape((1, 1, filters), name=name + 'se_reshape')(se) 179 | se = layers.Conv2D(filters_se, 1, 180 | padding='same', 181 | activation=activation_fn, 182 | kernel_initializer=CONV_KERNEL_INITIALIZER, 183 | name=name + 'se_reduce')(se) 184 | se = layers.Conv2D(filters, 1, 185 | padding='same', 186 | activation='sigmoid', 187 | kernel_initializer=CONV_KERNEL_INITIALIZER, 188 | name=name + 'se_expand')(se) 189 | if backend.backend() == 'theano': 190 | # For the Theano backend, we have to explicitly make 191 | # the excitation weights broadcastable. 192 | se = layers.Lambda( 193 | lambda x: backend.pattern_broadcast(x, [True, True, True, False]), 194 | output_shape=lambda input_shape: input_shape, 195 | name=name + 'se_broadcast')(se) 196 | x = layers.multiply([x, se], name=name + 'se_excite') 197 | 198 | # Output phase 199 | x = layers.Conv2D(filters_out, 1, 200 | padding='same', 201 | use_bias=False, 202 | kernel_initializer=CONV_KERNEL_INITIALIZER, 203 | name=name + 'project_conv')(x) 204 | x = layers.BatchNormalization(axis=bn_axis, name=name + 'project_bn')(x) 205 | if (id_skip is True and strides == 1 and filters_in == filters_out): 206 | if drop_rate > 0: 207 | x = layers.Dropout(drop_rate, 208 | noise_shape=(None, 1, 1, 1), 209 | name=name + 'drop')(x) 210 | x = layers.add([x, inputs], name=name + 'add') 211 | 212 | return x 213 | 214 | 215 | def EfficientNet(width_coefficient, 216 | depth_coefficient, 217 | default_size, 218 | dropout_rate=0.2, 219 | drop_connect_rate=0.2, 220 | depth_divisor=8, 221 | activation_fn=swish, 222 | blocks_args=DEFAULT_BLOCKS_ARGS, 223 | model_name='efficientnet', 224 | include_top=True, 225 | weights='imagenet', 226 | input_tensor=None, 227 | input_shape=None, 228 | pooling=None, 229 | classes=1000, 230 | **kwargs): 231 | """Instantiates the EfficientNet architecture using given scaling coefficients. 232 | 233 | Optionally loads weights pre-trained on ImageNet. 234 | Note that the data format convention used by the model is 235 | the one specified in your Keras config at `~/.keras/keras.json`. 236 | 237 | # Arguments 238 | width_coefficient: float, scaling coefficient for network width. 239 | depth_coefficient: float, scaling coefficient for network depth. 240 | default_size: integer, default input image size. 241 | dropout_rate: float, dropout rate before final classifier layer. 242 | drop_connect_rate: float, dropout rate at skip connections. 243 | depth_divisor: integer, a unit of network width. 244 | activation_fn: activation function. 245 | blocks_args: list of dicts, parameters to construct block modules. 246 | model_name: string, model name. 247 | include_top: whether to include the fully-connected 248 | layer at the top of the network. 249 | weights: one of `None` (random initialization), 250 | 'imagenet' (pre-training on ImageNet), 251 | or the path to the weights file to be loaded. 252 | input_tensor: optional Keras tensor 253 | (i.e. output of `layers.Input()`) 254 | to use as image input for the model. 255 | input_shape: optional shape tuple, only to be specified 256 | if `include_top` is False. 257 | It should have exactly 3 inputs channels. 258 | pooling: optional pooling mode for feature extraction 259 | when `include_top` is `False`. 260 | - `None` means that the output of the model will be 261 | the 4D tensor output of the 262 | last convolutional layer. 263 | - `avg` means that global average pooling 264 | will be applied to the output of the 265 | last convolutional layer, and thus 266 | the output of the model will be a 2D tensor. 267 | - `max` means that global max pooling will 268 | be applied. 269 | classes: optional number of classes to classify images 270 | into, only to be specified if `include_top` is True, and 271 | if no `weights` argument is specified. 272 | 273 | # Returns 274 | A Keras model instance. 275 | 276 | # Raises 277 | ValueError: in case of invalid argument for `weights`, 278 | or invalid input shape. 279 | """ 280 | global backend, layers, models, keras_utils 281 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) 282 | 283 | if not (weights in {'imagenet', None} or os.path.exists(weights)): 284 | raise ValueError('The `weights` argument should be either ' 285 | '`None` (random initialization), `imagenet` ' 286 | '(pre-training on ImageNet), ' 287 | 'or the path to the weights file to be loaded.') 288 | 289 | if weights == 'imagenet' and include_top and classes != 1000: 290 | raise ValueError('If using `weights` as `"imagenet"` with `include_top`' 291 | ' as true, `classes` should be 1000') 292 | 293 | # Determine proper input shape 294 | input_shape = _obtain_input_shape(input_shape, 295 | default_size=default_size, 296 | min_size=32, 297 | data_format=backend.image_data_format(), 298 | require_flatten=include_top, 299 | weights=weights) 300 | 301 | if input_tensor is None: 302 | img_input = layers.Input(shape=input_shape) 303 | else: 304 | if not backend.is_keras_tensor(input_tensor): 305 | img_input = layers.Input(tensor=input_tensor, shape=input_shape) 306 | else: 307 | img_input = input_tensor 308 | 309 | bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 310 | 311 | def round_filters(filters, divisor=depth_divisor): 312 | """Round number of filters based on depth multiplier.""" 313 | filters *= width_coefficient 314 | new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor) 315 | # Make sure that round down does not go down by more than 10%. 316 | if new_filters < 0.9 * filters: 317 | new_filters += divisor 318 | return int(new_filters) 319 | 320 | def round_repeats(repeats): 321 | """Round number of repeats based on depth multiplier.""" 322 | return int(math.ceil(depth_coefficient * repeats)) 323 | 324 | # Build stem 325 | x = img_input 326 | x = layers.ZeroPadding2D(padding=correct_pad(backend, x, 3), 327 | name='stem_conv_pad')(x) 328 | x = layers.Conv2D(round_filters(32), 3, 329 | strides=2, 330 | padding='valid', 331 | use_bias=False, 332 | kernel_initializer=CONV_KERNEL_INITIALIZER, 333 | name='stem_conv')(x) 334 | x = layers.BatchNormalization(axis=bn_axis, name='stem_bn')(x) 335 | x = layers.Activation(activation_fn, name='stem_activation')(x) 336 | 337 | # Build blocks 338 | from copy import deepcopy 339 | blocks_args = deepcopy(blocks_args) 340 | 341 | b = 0 342 | blocks = float(sum(args['repeats'] for args in blocks_args)) 343 | for (i, args) in enumerate(blocks_args): 344 | assert args['repeats'] > 0 345 | # Update block input and output filters based on depth multiplier. 346 | args['filters_in'] = round_filters(args['filters_in']) 347 | args['filters_out'] = round_filters(args['filters_out']) 348 | 349 | for j in range(round_repeats(args.pop('repeats'))): 350 | # The first block needs to take care of stride and filter size increase. 351 | if j > 0: 352 | args['strides'] = 1 353 | args['filters_in'] = args['filters_out'] 354 | x = block(x, activation_fn, drop_connect_rate * b / blocks, 355 | name='block{}{}_'.format(i + 1, chr(j + 97)), **args) 356 | b += 1 357 | 358 | # Build top 359 | x = layers.Conv2D(round_filters(1280), 1, 360 | padding='same', 361 | use_bias=False, 362 | kernel_initializer=CONV_KERNEL_INITIALIZER, 363 | name='top_conv')(x) 364 | x = layers.BatchNormalization(axis=bn_axis, name='top_bn')(x) 365 | x = layers.Activation(activation_fn, name='top_activation')(x) 366 | if include_top: 367 | x = layers.GlobalAveragePooling2D(name='avg_pool')(x) 368 | if dropout_rate > 0: 369 | x = layers.Dropout(dropout_rate, name='top_dropout')(x) 370 | x = layers.Dense(classes, 371 | activation='softmax', 372 | kernel_initializer=DENSE_KERNEL_INITIALIZER, 373 | name='probs')(x) 374 | else: 375 | if pooling == 'avg': 376 | x = layers.GlobalAveragePooling2D(name='avg_pool')(x) 377 | elif pooling == 'max': 378 | x = layers.GlobalMaxPooling2D(name='max_pool')(x) 379 | 380 | # Ensure that the model takes into account 381 | # any potential predecessors of `input_tensor`. 382 | if input_tensor is not None: 383 | inputs = keras_utils.get_source_inputs(input_tensor) 384 | else: 385 | inputs = img_input 386 | 387 | # Create model. 388 | model = models.Model(inputs, x, name=model_name) 389 | 390 | # Load weights. 391 | if weights == 'imagenet': 392 | if include_top: 393 | file_suff = '_weights_tf_dim_ordering_tf_kernels_autoaugment.h5' 394 | file_hash = WEIGHTS_HASHES[model_name[-2:]][0] 395 | else: 396 | file_suff = '_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5' 397 | file_hash = WEIGHTS_HASHES[model_name[-2:]][1] 398 | file_name = model_name + file_suff 399 | weights_path = keras_utils.get_file(file_name, 400 | BASE_WEIGHTS_PATH + file_name, 401 | cache_subdir='models', 402 | file_hash=file_hash) 403 | model.load_weights(weights_path) 404 | elif weights is not None: 405 | model.load_weights(weights) 406 | 407 | return model 408 | 409 | 410 | def EfficientNetB0(include_top=True, 411 | weights='imagenet', 412 | input_tensor=None, 413 | input_shape=None, 414 | pooling=None, 415 | classes=1000, 416 | **kwargs): 417 | return EfficientNet(1.0, 1.0, 224, 0.2, 418 | model_name='efficientnet-b0', 419 | include_top=include_top, weights=weights, 420 | input_tensor=input_tensor, input_shape=input_shape, 421 | pooling=pooling, classes=classes, 422 | **kwargs) 423 | 424 | 425 | def EfficientNetB1(include_top=True, 426 | weights='imagenet', 427 | input_tensor=None, 428 | input_shape=None, 429 | pooling=None, 430 | classes=1000, 431 | **kwargs): 432 | return EfficientNet(1.0, 1.1, 240, 0.2, 433 | model_name='efficientnet-b1', 434 | include_top=include_top, weights=weights, 435 | input_tensor=input_tensor, input_shape=input_shape, 436 | pooling=pooling, classes=classes, 437 | **kwargs) 438 | 439 | 440 | def EfficientNetB2(include_top=True, 441 | weights='imagenet', 442 | input_tensor=None, 443 | input_shape=None, 444 | pooling=None, 445 | classes=1000, 446 | **kwargs): 447 | return EfficientNet(1.1, 1.2, 260, 0.3, 448 | model_name='efficientnet-b2', 449 | include_top=include_top, weights=weights, 450 | input_tensor=input_tensor, input_shape=input_shape, 451 | pooling=pooling, classes=classes, 452 | **kwargs) 453 | 454 | 455 | def EfficientNetB3(include_top=True, 456 | weights='imagenet', 457 | input_tensor=None, 458 | input_shape=None, 459 | pooling=None, 460 | classes=1000, 461 | **kwargs): 462 | return EfficientNet(1.2, 1.4, 300, 0.3, 463 | model_name='efficientnet-b3', 464 | include_top=include_top, weights=weights, 465 | input_tensor=input_tensor, input_shape=input_shape, 466 | pooling=pooling, classes=classes, 467 | **kwargs) 468 | 469 | 470 | def EfficientNetB4(include_top=True, 471 | weights='imagenet', 472 | input_tensor=None, 473 | input_shape=None, 474 | pooling=None, 475 | classes=1000, 476 | **kwargs): 477 | return EfficientNet(1.4, 1.8, 380, 0.4, 478 | model_name='efficientnet-b4', 479 | include_top=include_top, weights=weights, 480 | input_tensor=input_tensor, input_shape=input_shape, 481 | pooling=pooling, classes=classes, 482 | **kwargs) 483 | 484 | 485 | def EfficientNetB5(include_top=True, 486 | weights='imagenet', 487 | input_tensor=None, 488 | input_shape=None, 489 | pooling=None, 490 | classes=1000, 491 | **kwargs): 492 | return EfficientNet(1.6, 2.2, 456, 0.4, 493 | model_name='efficientnet-b5', 494 | include_top=include_top, weights=weights, 495 | input_tensor=input_tensor, input_shape=input_shape, 496 | pooling=pooling, classes=classes, 497 | **kwargs) 498 | 499 | 500 | def EfficientNetB6(include_top=True, 501 | weights='imagenet', 502 | input_tensor=None, 503 | input_shape=None, 504 | pooling=None, 505 | classes=1000, 506 | **kwargs): 507 | return EfficientNet(1.8, 2.6, 528, 0.5, 508 | model_name='efficientnet-b6', 509 | include_top=include_top, weights=weights, 510 | input_tensor=input_tensor, input_shape=input_shape, 511 | pooling=pooling, classes=classes, 512 | **kwargs) 513 | 514 | 515 | def EfficientNetB7(include_top=True, 516 | weights='imagenet', 517 | input_tensor=None, 518 | input_shape=None, 519 | pooling=None, 520 | classes=1000, 521 | **kwargs): 522 | return EfficientNet(2.0, 3.1, 600, 0.5, 523 | model_name='efficientnet-b7', 524 | include_top=include_top, weights=weights, 525 | input_tensor=input_tensor, input_shape=input_shape, 526 | pooling=pooling, classes=classes, 527 | **kwargs) 528 | 529 | 530 | def preprocess_input(x, data_format=None, **kwargs): 531 | """Preprocesses a numpy array encoding a batch of images. 532 | 533 | # Arguments 534 | x: a 3D or 4D numpy array consists of RGB values within [0, 255]. 535 | data_format: data format of the image tensor. 536 | 537 | # Returns 538 | Preprocessed array. 539 | """ 540 | return imagenet_utils.preprocess_input(x, data_format, 541 | mode='torch', **kwargs) 542 | 543 | 544 | setattr(EfficientNetB0, '__doc__', EfficientNet.__doc__) 545 | setattr(EfficientNetB1, '__doc__', EfficientNet.__doc__) 546 | setattr(EfficientNetB2, '__doc__', EfficientNet.__doc__) 547 | setattr(EfficientNetB3, '__doc__', EfficientNet.__doc__) 548 | setattr(EfficientNetB4, '__doc__', EfficientNet.__doc__) 549 | setattr(EfficientNetB5, '__doc__', EfficientNet.__doc__) 550 | setattr(EfficientNetB6, '__doc__', EfficientNet.__doc__) 551 | setattr(EfficientNetB7, '__doc__', EfficientNet.__doc__) 552 | -------------------------------------------------------------------------------- /keras_applications/imagenet_utils.py: -------------------------------------------------------------------------------- 1 | """Utilities for ImageNet data preprocessing & prediction decoding. 2 | """ 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import json 8 | import warnings 9 | import numpy as np 10 | 11 | from . import get_submodules_from_kwargs 12 | 13 | CLASS_INDEX = None 14 | CLASS_INDEX_PATH = ('https://storage.googleapis.com/download.tensorflow.org/' 15 | 'data/imagenet_class_index.json') 16 | 17 | 18 | def _preprocess_numpy_input(x, data_format, mode, **kwargs): 19 | """Preprocesses a Numpy array encoding a batch of images. 20 | 21 | # Arguments 22 | x: Input array, 3D or 4D. 23 | data_format: Data format of the image array. 24 | mode: One of "caffe", "tf" or "torch". 25 | - caffe: will convert the images from RGB to BGR, 26 | then will zero-center each color channel with 27 | respect to the ImageNet dataset, 28 | without scaling. 29 | - tf: will scale pixels between -1 and 1, 30 | sample-wise. 31 | - torch: will scale pixels between 0 and 1 and then 32 | will normalize each channel with respect to the 33 | ImageNet dataset. 34 | 35 | # Returns 36 | Preprocessed Numpy array. 37 | """ 38 | backend, _, _, _ = get_submodules_from_kwargs(kwargs) 39 | if not issubclass(x.dtype.type, np.floating): 40 | x = x.astype(backend.floatx(), copy=False) 41 | 42 | if mode == 'tf': 43 | x /= 127.5 44 | x -= 1. 45 | return x 46 | 47 | if mode == 'torch': 48 | x /= 255. 49 | mean = [0.485, 0.456, 0.406] 50 | std = [0.229, 0.224, 0.225] 51 | else: 52 | if data_format == 'channels_first': 53 | # 'RGB'->'BGR' 54 | if x.ndim == 3: 55 | x = x[::-1, ...] 56 | else: 57 | x = x[:, ::-1, ...] 58 | else: 59 | # 'RGB'->'BGR' 60 | x = x[..., ::-1] 61 | mean = [103.939, 116.779, 123.68] 62 | std = None 63 | 64 | # Zero-center by mean pixel 65 | if data_format == 'channels_first': 66 | if x.ndim == 3: 67 | x[0, :, :] -= mean[0] 68 | x[1, :, :] -= mean[1] 69 | x[2, :, :] -= mean[2] 70 | if std is not None: 71 | x[0, :, :] /= std[0] 72 | x[1, :, :] /= std[1] 73 | x[2, :, :] /= std[2] 74 | else: 75 | x[:, 0, :, :] -= mean[0] 76 | x[:, 1, :, :] -= mean[1] 77 | x[:, 2, :, :] -= mean[2] 78 | if std is not None: 79 | x[:, 0, :, :] /= std[0] 80 | x[:, 1, :, :] /= std[1] 81 | x[:, 2, :, :] /= std[2] 82 | else: 83 | x[..., 0] -= mean[0] 84 | x[..., 1] -= mean[1] 85 | x[..., 2] -= mean[2] 86 | if std is not None: 87 | x[..., 0] /= std[0] 88 | x[..., 1] /= std[1] 89 | x[..., 2] /= std[2] 90 | return x 91 | 92 | 93 | def _preprocess_symbolic_input(x, data_format, mode, **kwargs): 94 | """Preprocesses a tensor encoding a batch of images. 95 | 96 | # Arguments 97 | x: Input tensor, 3D or 4D. 98 | data_format: Data format of the image tensor. 99 | mode: One of "caffe", "tf" or "torch". 100 | - caffe: will convert the images from RGB to BGR, 101 | then will zero-center each color channel with 102 | respect to the ImageNet dataset, 103 | without scaling. 104 | - tf: will scale pixels between -1 and 1, 105 | sample-wise. 106 | - torch: will scale pixels between 0 and 1 and then 107 | will normalize each channel with respect to the 108 | ImageNet dataset. 109 | 110 | # Returns 111 | Preprocessed tensor. 112 | """ 113 | 114 | backend, _, _, _ = get_submodules_from_kwargs(kwargs) 115 | 116 | if mode == 'tf': 117 | x /= 127.5 118 | x -= 1. 119 | return x 120 | 121 | if mode == 'torch': 122 | x /= 255. 123 | mean = [0.485, 0.456, 0.406] 124 | std = [0.229, 0.224, 0.225] 125 | else: 126 | if data_format == 'channels_first': 127 | # 'RGB'->'BGR' 128 | if backend.ndim(x) == 3: 129 | x = x[::-1, ...] 130 | else: 131 | x = x[:, ::-1, ...] 132 | else: 133 | # 'RGB'->'BGR' 134 | x = x[..., ::-1] 135 | mean = [103.939, 116.779, 123.68] 136 | std = None 137 | 138 | mean_tensor = backend.constant(-np.array(mean)) 139 | 140 | # Zero-center by mean pixel 141 | if backend.dtype(x) != backend.dtype(mean_tensor): 142 | x = backend.bias_add( 143 | x, backend.cast(mean_tensor, backend.dtype(x)), 144 | data_format=data_format) 145 | else: 146 | x = backend.bias_add(x, mean_tensor, data_format) 147 | if std is not None: 148 | x /= std 149 | return x 150 | 151 | 152 | def preprocess_input(x, data_format=None, mode='caffe', **kwargs): 153 | """Preprocesses a tensor or Numpy array encoding a batch of images. 154 | 155 | # Arguments 156 | x: Input Numpy or symbolic tensor, 3D or 4D. 157 | The preprocessed data is written over the input data 158 | if the data types are compatible. To avoid this 159 | behaviour, `numpy.copy(x)` can be used. 160 | data_format: Data format of the image tensor/array. 161 | mode: One of "caffe", "tf" or "torch". 162 | - caffe: will convert the images from RGB to BGR, 163 | then will zero-center each color channel with 164 | respect to the ImageNet dataset, 165 | without scaling. 166 | - tf: will scale pixels between -1 and 1, 167 | sample-wise. 168 | - torch: will scale pixels between 0 and 1 and then 169 | will normalize each channel with respect to the 170 | ImageNet dataset. 171 | 172 | # Returns 173 | Preprocessed tensor or Numpy array. 174 | 175 | # Raises 176 | ValueError: In case of unknown `data_format` argument. 177 | """ 178 | backend, _, _, _ = get_submodules_from_kwargs(kwargs) 179 | 180 | if data_format is None: 181 | data_format = backend.image_data_format() 182 | if data_format not in {'channels_first', 'channels_last'}: 183 | raise ValueError('Unknown data_format ' + str(data_format)) 184 | 185 | if isinstance(x, np.ndarray): 186 | return _preprocess_numpy_input(x, data_format=data_format, 187 | mode=mode, **kwargs) 188 | else: 189 | return _preprocess_symbolic_input(x, data_format=data_format, 190 | mode=mode, **kwargs) 191 | 192 | 193 | def decode_predictions(preds, top=5, **kwargs): 194 | """Decodes the prediction of an ImageNet model. 195 | 196 | # Arguments 197 | preds: Numpy tensor encoding a batch of predictions. 198 | top: Integer, how many top-guesses to return. 199 | 200 | # Returns 201 | A list of lists of top class prediction tuples 202 | `(class_name, class_description, score)`. 203 | One list of tuples per sample in batch input. 204 | 205 | # Raises 206 | ValueError: In case of invalid shape of the `pred` array 207 | (must be 2D). 208 | """ 209 | global CLASS_INDEX 210 | 211 | backend, _, _, keras_utils = get_submodules_from_kwargs(kwargs) 212 | 213 | if len(preds.shape) != 2 or preds.shape[1] != 1000: 214 | raise ValueError('`decode_predictions` expects ' 215 | 'a batch of predictions ' 216 | '(i.e. a 2D array of shape (samples, 1000)). ' 217 | 'Found array with shape: ' + str(preds.shape)) 218 | if CLASS_INDEX is None: 219 | fpath = keras_utils.get_file( 220 | 'imagenet_class_index.json', 221 | CLASS_INDEX_PATH, 222 | cache_subdir='models', 223 | file_hash='c2c37ea517e94d9795004a39431a14cb') 224 | with open(fpath) as f: 225 | CLASS_INDEX = json.load(f) 226 | results = [] 227 | for pred in preds: 228 | top_indices = pred.argsort()[-top:][::-1] 229 | result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices] 230 | result.sort(key=lambda x: x[2], reverse=True) 231 | results.append(result) 232 | return results 233 | 234 | 235 | def _obtain_input_shape(input_shape, 236 | default_size, 237 | min_size, 238 | data_format, 239 | require_flatten, 240 | weights=None): 241 | """Internal utility to compute/validate a model's input shape. 242 | 243 | # Arguments 244 | input_shape: Either None (will return the default network input shape), 245 | or a user-provided shape to be validated. 246 | default_size: Default input width/height for the model. 247 | min_size: Minimum input width/height accepted by the model. 248 | data_format: Image data format to use. 249 | require_flatten: Whether the model is expected to 250 | be linked to a classifier via a Flatten layer. 251 | weights: One of `None` (random initialization) 252 | or 'imagenet' (pre-training on ImageNet). 253 | If weights='imagenet' input channels must be equal to 3. 254 | 255 | # Returns 256 | An integer shape tuple (may include None entries). 257 | 258 | # Raises 259 | ValueError: In case of invalid argument values. 260 | """ 261 | if weights != 'imagenet' and input_shape and len(input_shape) == 3: 262 | if data_format == 'channels_first': 263 | if input_shape[0] not in {1, 3}: 264 | warnings.warn( 265 | 'This model usually expects 1 or 3 input channels. ' 266 | 'However, it was passed an input_shape with ' + 267 | str(input_shape[0]) + ' input channels.') 268 | default_shape = (input_shape[0], default_size, default_size) 269 | else: 270 | if input_shape[-1] not in {1, 3}: 271 | warnings.warn( 272 | 'This model usually expects 1 or 3 input channels. ' 273 | 'However, it was passed an input_shape with ' + 274 | str(input_shape[-1]) + ' input channels.') 275 | default_shape = (default_size, default_size, input_shape[-1]) 276 | else: 277 | if data_format == 'channels_first': 278 | default_shape = (3, default_size, default_size) 279 | else: 280 | default_shape = (default_size, default_size, 3) 281 | if weights == 'imagenet' and require_flatten: 282 | if input_shape is not None: 283 | if input_shape != default_shape: 284 | raise ValueError('When setting `include_top=True` ' 285 | 'and loading `imagenet` weights, ' 286 | '`input_shape` should be ' + 287 | str(default_shape) + '.') 288 | return default_shape 289 | if input_shape: 290 | if data_format == 'channels_first': 291 | if input_shape is not None: 292 | if len(input_shape) != 3: 293 | raise ValueError( 294 | '`input_shape` must be a tuple of three integers.') 295 | if input_shape[0] != 3 and weights == 'imagenet': 296 | raise ValueError('The input must have 3 channels; got ' 297 | '`input_shape=' + str(input_shape) + '`') 298 | if ((input_shape[1] is not None and input_shape[1] < min_size) or 299 | (input_shape[2] is not None and input_shape[2] < min_size)): 300 | raise ValueError('Input size must be at least ' + 301 | str(min_size) + 'x' + str(min_size) + 302 | '; got `input_shape=' + 303 | str(input_shape) + '`') 304 | else: 305 | if input_shape is not None: 306 | if len(input_shape) != 3: 307 | raise ValueError( 308 | '`input_shape` must be a tuple of three integers.') 309 | if input_shape[-1] != 3 and weights == 'imagenet': 310 | raise ValueError('The input must have 3 channels; got ' 311 | '`input_shape=' + str(input_shape) + '`') 312 | if ((input_shape[0] is not None and input_shape[0] < min_size) or 313 | (input_shape[1] is not None and input_shape[1] < min_size)): 314 | raise ValueError('Input size must be at least ' + 315 | str(min_size) + 'x' + str(min_size) + 316 | '; got `input_shape=' + 317 | str(input_shape) + '`') 318 | else: 319 | if require_flatten: 320 | input_shape = default_shape 321 | else: 322 | if data_format == 'channels_first': 323 | input_shape = (3, None, None) 324 | else: 325 | input_shape = (None, None, 3) 326 | if require_flatten: 327 | if None in input_shape: 328 | raise ValueError('If `include_top` is True, ' 329 | 'you should specify a static `input_shape`. ' 330 | 'Got `input_shape=' + str(input_shape) + '`') 331 | return input_shape 332 | -------------------------------------------------------------------------------- /keras_applications/inception_resnet_v2.py: -------------------------------------------------------------------------------- 1 | """Inception-ResNet V2 model for Keras. 2 | 3 | Model naming and structure follows TF-slim implementation 4 | (which has some additional layers and different number of 5 | filters from the original arXiv paper): 6 | https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_resnet_v2.py 7 | 8 | Pre-trained ImageNet weights are also converted from TF-slim, 9 | which can be found in: 10 | https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models 11 | 12 | # Reference 13 | - [Inception-v4, Inception-ResNet and the Impact of 14 | Residual Connections on Learning](https://arxiv.org/abs/1602.07261) (AAAI 2017) 15 | 16 | """ 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import os 22 | 23 | from . import get_submodules_from_kwargs 24 | from . import imagenet_utils 25 | from .imagenet_utils import decode_predictions 26 | from .imagenet_utils import _obtain_input_shape 27 | 28 | 29 | BASE_WEIGHT_URL = ('https://github.com/fchollet/deep-learning-models/' 30 | 'releases/download/v0.7/') 31 | 32 | backend = None 33 | layers = None 34 | models = None 35 | keras_utils = None 36 | 37 | 38 | def preprocess_input(x, **kwargs): 39 | """Preprocesses a numpy array encoding a batch of images. 40 | 41 | # Arguments 42 | x: a 4D numpy array consists of RGB values within [0, 255]. 43 | 44 | # Returns 45 | Preprocessed array. 46 | """ 47 | return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) 48 | 49 | 50 | def conv2d_bn(x, 51 | filters, 52 | kernel_size, 53 | strides=1, 54 | padding='same', 55 | activation='relu', 56 | use_bias=False, 57 | name=None): 58 | """Utility function to apply conv + BN. 59 | 60 | # Arguments 61 | x: input tensor. 62 | filters: filters in `Conv2D`. 63 | kernel_size: kernel size as in `Conv2D`. 64 | strides: strides in `Conv2D`. 65 | padding: padding mode in `Conv2D`. 66 | activation: activation in `Conv2D`. 67 | use_bias: whether to use a bias in `Conv2D`. 68 | name: name of the ops; will become `name + '_ac'` for the activation 69 | and `name + '_bn'` for the batch norm layer. 70 | 71 | # Returns 72 | Output tensor after applying `Conv2D` and `BatchNormalization`. 73 | """ 74 | x = layers.Conv2D(filters, 75 | kernel_size, 76 | strides=strides, 77 | padding=padding, 78 | use_bias=use_bias, 79 | name=name)(x) 80 | if not use_bias: 81 | bn_axis = 1 if backend.image_data_format() == 'channels_first' else 3 82 | bn_name = None if name is None else name + '_bn' 83 | x = layers.BatchNormalization(axis=bn_axis, 84 | scale=False, 85 | name=bn_name)(x) 86 | if activation is not None: 87 | ac_name = None if name is None else name + '_ac' 88 | x = layers.Activation(activation, name=ac_name)(x) 89 | return x 90 | 91 | 92 | def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): 93 | """Adds a Inception-ResNet block. 94 | 95 | This function builds 3 types of Inception-ResNet blocks mentioned 96 | in the paper, controlled by the `block_type` argument (which is the 97 | block name used in the official TF-slim implementation): 98 | - Inception-ResNet-A: `block_type='block35'` 99 | - Inception-ResNet-B: `block_type='block17'` 100 | - Inception-ResNet-C: `block_type='block8'` 101 | 102 | # Arguments 103 | x: input tensor. 104 | scale: scaling factor to scale the residuals (i.e., the output of 105 | passing `x` through an inception module) before adding them 106 | to the shortcut branch. 107 | Let `r` be the output from the residual branch, 108 | the output of this block will be `x + scale * r`. 109 | block_type: `'block35'`, `'block17'` or `'block8'`, determines 110 | the network structure in the residual branch. 111 | block_idx: an `int` used for generating layer names. 112 | The Inception-ResNet blocks 113 | are repeated many times in this network. 114 | We use `block_idx` to identify 115 | each of the repetitions. For example, 116 | the first Inception-ResNet-A block 117 | will have `block_type='block35', block_idx=0`, 118 | and the layer names will have 119 | a common prefix `'block35_0'`. 120 | activation: activation function to use at the end of the block 121 | (see [activations](../activations.md)). 122 | When `activation=None`, no activation is applied 123 | (i.e., "linear" activation: `a(x) = x`). 124 | 125 | # Returns 126 | Output tensor for the block. 127 | 128 | # Raises 129 | ValueError: if `block_type` is not one of `'block35'`, 130 | `'block17'` or `'block8'`. 131 | """ 132 | if block_type == 'block35': 133 | branch_0 = conv2d_bn(x, 32, 1) 134 | branch_1 = conv2d_bn(x, 32, 1) 135 | branch_1 = conv2d_bn(branch_1, 32, 3) 136 | branch_2 = conv2d_bn(x, 32, 1) 137 | branch_2 = conv2d_bn(branch_2, 48, 3) 138 | branch_2 = conv2d_bn(branch_2, 64, 3) 139 | branches = [branch_0, branch_1, branch_2] 140 | elif block_type == 'block17': 141 | branch_0 = conv2d_bn(x, 192, 1) 142 | branch_1 = conv2d_bn(x, 128, 1) 143 | branch_1 = conv2d_bn(branch_1, 160, [1, 7]) 144 | branch_1 = conv2d_bn(branch_1, 192, [7, 1]) 145 | branches = [branch_0, branch_1] 146 | elif block_type == 'block8': 147 | branch_0 = conv2d_bn(x, 192, 1) 148 | branch_1 = conv2d_bn(x, 192, 1) 149 | branch_1 = conv2d_bn(branch_1, 224, [1, 3]) 150 | branch_1 = conv2d_bn(branch_1, 256, [3, 1]) 151 | branches = [branch_0, branch_1] 152 | else: 153 | raise ValueError('Unknown Inception-ResNet block type. ' 154 | 'Expects "block35", "block17" or "block8", ' 155 | 'but got: ' + str(block_type)) 156 | 157 | block_name = block_type + '_' + str(block_idx) 158 | channel_axis = 1 if backend.image_data_format() == 'channels_first' else 3 159 | mixed = layers.Concatenate( 160 | axis=channel_axis, name=block_name + '_mixed')(branches) 161 | up = conv2d_bn(mixed, 162 | backend.int_shape(x)[channel_axis], 163 | 1, 164 | activation=None, 165 | use_bias=True, 166 | name=block_name + '_conv') 167 | 168 | x = layers.Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale, 169 | output_shape=backend.int_shape(x)[1:], 170 | arguments={'scale': scale}, 171 | name=block_name)([x, up]) 172 | if activation is not None: 173 | x = layers.Activation(activation, name=block_name + '_ac')(x) 174 | return x 175 | 176 | 177 | def InceptionResNetV2(include_top=True, 178 | weights='imagenet', 179 | input_tensor=None, 180 | input_shape=None, 181 | pooling=None, 182 | classes=1000, 183 | **kwargs): 184 | """Instantiates the Inception-ResNet v2 architecture. 185 | 186 | Optionally loads weights pre-trained on ImageNet. 187 | Note that the data format convention used by the model is 188 | the one specified in your Keras config at `~/.keras/keras.json`. 189 | 190 | # Arguments 191 | include_top: whether to include the fully-connected 192 | layer at the top of the network. 193 | weights: one of `None` (random initialization), 194 | 'imagenet' (pre-training on ImageNet), 195 | or the path to the weights file to be loaded. 196 | input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) 197 | to use as image input for the model. 198 | input_shape: optional shape tuple, only to be specified 199 | if `include_top` is `False` (otherwise the input shape 200 | has to be `(299, 299, 3)` (with `'channels_last'` data format) 201 | or `(3, 299, 299)` (with `'channels_first'` data format). 202 | It should have exactly 3 inputs channels, 203 | and width and height should be no smaller than 75. 204 | E.g. `(150, 150, 3)` would be one valid value. 205 | pooling: Optional pooling mode for feature extraction 206 | when `include_top` is `False`. 207 | - `None` means that the output of the model will be 208 | the 4D tensor output of the last convolutional block. 209 | - `'avg'` means that global average pooling 210 | will be applied to the output of the 211 | last convolutional block, and thus 212 | the output of the model will be a 2D tensor. 213 | - `'max'` means that global max pooling will be applied. 214 | classes: optional number of classes to classify images 215 | into, only to be specified if `include_top` is `True`, and 216 | if no `weights` argument is specified. 217 | 218 | # Returns 219 | A Keras `Model` instance. 220 | 221 | # Raises 222 | ValueError: in case of invalid argument for `weights`, 223 | or invalid input shape. 224 | """ 225 | global backend, layers, models, keras_utils 226 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) 227 | 228 | if not (weights in {'imagenet', None} or os.path.exists(weights)): 229 | raise ValueError('The `weights` argument should be either ' 230 | '`None` (random initialization), `imagenet` ' 231 | '(pre-training on ImageNet), ' 232 | 'or the path to the weights file to be loaded.') 233 | 234 | if weights == 'imagenet' and include_top and classes != 1000: 235 | raise ValueError('If using `weights` as `"imagenet"` with `include_top`' 236 | ' as true, `classes` should be 1000') 237 | 238 | # Determine proper input shape 239 | input_shape = _obtain_input_shape( 240 | input_shape, 241 | default_size=299, 242 | min_size=75, 243 | data_format=backend.image_data_format(), 244 | require_flatten=include_top, 245 | weights=weights) 246 | 247 | if input_tensor is None: 248 | img_input = layers.Input(shape=input_shape) 249 | else: 250 | if not backend.is_keras_tensor(input_tensor): 251 | img_input = layers.Input(tensor=input_tensor, shape=input_shape) 252 | else: 253 | img_input = input_tensor 254 | 255 | # Stem block: 35 x 35 x 192 256 | x = conv2d_bn(img_input, 32, 3, strides=2, padding='valid') 257 | x = conv2d_bn(x, 32, 3, padding='valid') 258 | x = conv2d_bn(x, 64, 3) 259 | x = layers.MaxPooling2D(3, strides=2)(x) 260 | x = conv2d_bn(x, 80, 1, padding='valid') 261 | x = conv2d_bn(x, 192, 3, padding='valid') 262 | x = layers.MaxPooling2D(3, strides=2)(x) 263 | 264 | # Mixed 5b (Inception-A block): 35 x 35 x 320 265 | branch_0 = conv2d_bn(x, 96, 1) 266 | branch_1 = conv2d_bn(x, 48, 1) 267 | branch_1 = conv2d_bn(branch_1, 64, 5) 268 | branch_2 = conv2d_bn(x, 64, 1) 269 | branch_2 = conv2d_bn(branch_2, 96, 3) 270 | branch_2 = conv2d_bn(branch_2, 96, 3) 271 | branch_pool = layers.AveragePooling2D(3, strides=1, padding='same')(x) 272 | branch_pool = conv2d_bn(branch_pool, 64, 1) 273 | branches = [branch_0, branch_1, branch_2, branch_pool] 274 | channel_axis = 1 if backend.image_data_format() == 'channels_first' else 3 275 | x = layers.Concatenate(axis=channel_axis, name='mixed_5b')(branches) 276 | 277 | # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320 278 | for block_idx in range(1, 11): 279 | x = inception_resnet_block(x, 280 | scale=0.17, 281 | block_type='block35', 282 | block_idx=block_idx) 283 | 284 | # Mixed 6a (Reduction-A block): 17 x 17 x 1088 285 | branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='valid') 286 | branch_1 = conv2d_bn(x, 256, 1) 287 | branch_1 = conv2d_bn(branch_1, 256, 3) 288 | branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='valid') 289 | branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x) 290 | branches = [branch_0, branch_1, branch_pool] 291 | x = layers.Concatenate(axis=channel_axis, name='mixed_6a')(branches) 292 | 293 | # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088 294 | for block_idx in range(1, 21): 295 | x = inception_resnet_block(x, 296 | scale=0.1, 297 | block_type='block17', 298 | block_idx=block_idx) 299 | 300 | # Mixed 7a (Reduction-B block): 8 x 8 x 2080 301 | branch_0 = conv2d_bn(x, 256, 1) 302 | branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='valid') 303 | branch_1 = conv2d_bn(x, 256, 1) 304 | branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='valid') 305 | branch_2 = conv2d_bn(x, 256, 1) 306 | branch_2 = conv2d_bn(branch_2, 288, 3) 307 | branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='valid') 308 | branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x) 309 | branches = [branch_0, branch_1, branch_2, branch_pool] 310 | x = layers.Concatenate(axis=channel_axis, name='mixed_7a')(branches) 311 | 312 | # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080 313 | for block_idx in range(1, 10): 314 | x = inception_resnet_block(x, 315 | scale=0.2, 316 | block_type='block8', 317 | block_idx=block_idx) 318 | x = inception_resnet_block(x, 319 | scale=1., 320 | activation=None, 321 | block_type='block8', 322 | block_idx=10) 323 | 324 | # Final convolution block: 8 x 8 x 1536 325 | x = conv2d_bn(x, 1536, 1, name='conv_7b') 326 | 327 | if include_top: 328 | # Classification block 329 | x = layers.GlobalAveragePooling2D(name='avg_pool')(x) 330 | x = layers.Dense(classes, activation='softmax', name='predictions')(x) 331 | else: 332 | if pooling == 'avg': 333 | x = layers.GlobalAveragePooling2D()(x) 334 | elif pooling == 'max': 335 | x = layers.GlobalMaxPooling2D()(x) 336 | 337 | # Ensure that the model takes into account 338 | # any potential predecessors of `input_tensor`. 339 | if input_tensor is not None: 340 | inputs = keras_utils.get_source_inputs(input_tensor) 341 | else: 342 | inputs = img_input 343 | 344 | # Create model. 345 | model = models.Model(inputs, x, name='inception_resnet_v2') 346 | 347 | # Load weights. 348 | if weights == 'imagenet': 349 | if include_top: 350 | fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5' 351 | weights_path = keras_utils.get_file( 352 | fname, 353 | BASE_WEIGHT_URL + fname, 354 | cache_subdir='models', 355 | file_hash='e693bd0210a403b3192acc6073ad2e96') 356 | else: 357 | fname = ('inception_resnet_v2_weights_' 358 | 'tf_dim_ordering_tf_kernels_notop.h5') 359 | weights_path = keras_utils.get_file( 360 | fname, 361 | BASE_WEIGHT_URL + fname, 362 | cache_subdir='models', 363 | file_hash='d19885ff4a710c122648d3b5c3b684e4') 364 | model.load_weights(weights_path) 365 | elif weights is not None: 366 | model.load_weights(weights) 367 | 368 | return model 369 | -------------------------------------------------------------------------------- /keras_applications/inception_v3.py: -------------------------------------------------------------------------------- 1 | """Inception V3 model for Keras. 2 | 3 | Note that the input image format for this model is different than for 4 | the VGG16 and ResNet models (299x299 instead of 224x224), 5 | and that the input preprocessing function is also different (same as Xception). 6 | 7 | # Reference 8 | 9 | - [Rethinking the Inception Architecture for Computer Vision]( 10 | http://arxiv.org/abs/1512.00567) (CVPR 2016) 11 | 12 | """ 13 | from __future__ import absolute_import 14 | from __future__ import division 15 | from __future__ import print_function 16 | 17 | import os 18 | 19 | from . import get_submodules_from_kwargs 20 | from . import imagenet_utils 21 | from .imagenet_utils import decode_predictions 22 | from .imagenet_utils import _obtain_input_shape 23 | 24 | 25 | WEIGHTS_PATH = ( 26 | 'https://github.com/fchollet/deep-learning-models/' 27 | 'releases/download/v0.5/' 28 | 'inception_v3_weights_tf_dim_ordering_tf_kernels.h5') 29 | WEIGHTS_PATH_NO_TOP = ( 30 | 'https://github.com/fchollet/deep-learning-models/' 31 | 'releases/download/v0.5/' 32 | 'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5') 33 | 34 | backend = None 35 | layers = None 36 | models = None 37 | keras_utils = None 38 | 39 | 40 | def conv2d_bn(x, 41 | filters, 42 | num_row, 43 | num_col, 44 | padding='same', 45 | strides=(1, 1), 46 | name=None): 47 | """Utility function to apply conv + BN. 48 | 49 | # Arguments 50 | x: input tensor. 51 | filters: filters in `Conv2D`. 52 | num_row: height of the convolution kernel. 53 | num_col: width of the convolution kernel. 54 | padding: padding mode in `Conv2D`. 55 | strides: strides in `Conv2D`. 56 | name: name of the ops; will become `name + '_conv'` 57 | for the convolution and `name + '_bn'` for the 58 | batch norm layer. 59 | 60 | # Returns 61 | Output tensor after applying `Conv2D` and `BatchNormalization`. 62 | """ 63 | if name is not None: 64 | bn_name = name + '_bn' 65 | conv_name = name + '_conv' 66 | else: 67 | bn_name = None 68 | conv_name = None 69 | if backend.image_data_format() == 'channels_first': 70 | bn_axis = 1 71 | else: 72 | bn_axis = 3 73 | x = layers.Conv2D( 74 | filters, (num_row, num_col), 75 | strides=strides, 76 | padding=padding, 77 | use_bias=False, 78 | name=conv_name)(x) 79 | x = layers.BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) 80 | x = layers.Activation('relu', name=name)(x) 81 | return x 82 | 83 | 84 | def InceptionV3(include_top=True, 85 | weights='imagenet', 86 | input_tensor=None, 87 | input_shape=None, 88 | pooling=None, 89 | classes=1000, 90 | **kwargs): 91 | """Instantiates the Inception v3 architecture. 92 | 93 | Optionally loads weights pre-trained on ImageNet. 94 | Note that the data format convention used by the model is 95 | the one specified in your Keras config at `~/.keras/keras.json`. 96 | 97 | # Arguments 98 | include_top: whether to include the fully-connected 99 | layer at the top of the network. 100 | weights: one of `None` (random initialization), 101 | 'imagenet' (pre-training on ImageNet), 102 | or the path to the weights file to be loaded. 103 | input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) 104 | to use as image input for the model. 105 | input_shape: optional shape tuple, only to be specified 106 | if `include_top` is False (otherwise the input shape 107 | has to be `(299, 299, 3)` (with `channels_last` data format) 108 | or `(3, 299, 299)` (with `channels_first` data format). 109 | It should have exactly 3 inputs channels, 110 | and width and height should be no smaller than 75. 111 | E.g. `(150, 150, 3)` would be one valid value. 112 | pooling: Optional pooling mode for feature extraction 113 | when `include_top` is `False`. 114 | - `None` means that the output of the model will be 115 | the 4D tensor output of the 116 | last convolutional block. 117 | - `avg` means that global average pooling 118 | will be applied to the output of the 119 | last convolutional block, and thus 120 | the output of the model will be a 2D tensor. 121 | - `max` means that global max pooling will 122 | be applied. 123 | classes: optional number of classes to classify images 124 | into, only to be specified if `include_top` is True, and 125 | if no `weights` argument is specified. 126 | 127 | # Returns 128 | A Keras model instance. 129 | 130 | # Raises 131 | ValueError: in case of invalid argument for `weights`, 132 | or invalid input shape. 133 | """ 134 | global backend, layers, models, keras_utils 135 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) 136 | 137 | if not (weights in {'imagenet', None} or os.path.exists(weights)): 138 | raise ValueError('The `weights` argument should be either ' 139 | '`None` (random initialization), `imagenet` ' 140 | '(pre-training on ImageNet), ' 141 | 'or the path to the weights file to be loaded.') 142 | 143 | if weights == 'imagenet' and include_top and classes != 1000: 144 | raise ValueError('If using `weights` as `"imagenet"` with `include_top`' 145 | ' as true, `classes` should be 1000') 146 | 147 | # Determine proper input shape 148 | input_shape = _obtain_input_shape( 149 | input_shape, 150 | default_size=299, 151 | min_size=75, 152 | data_format=backend.image_data_format(), 153 | require_flatten=include_top, 154 | weights=weights) 155 | 156 | if input_tensor is None: 157 | img_input = layers.Input(shape=input_shape) 158 | else: 159 | if not backend.is_keras_tensor(input_tensor): 160 | img_input = layers.Input(tensor=input_tensor, shape=input_shape) 161 | else: 162 | img_input = input_tensor 163 | 164 | if backend.image_data_format() == 'channels_first': 165 | channel_axis = 1 166 | else: 167 | channel_axis = 3 168 | 169 | x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid') 170 | x = conv2d_bn(x, 32, 3, 3, padding='valid') 171 | x = conv2d_bn(x, 64, 3, 3) 172 | x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 173 | 174 | x = conv2d_bn(x, 80, 1, 1, padding='valid') 175 | x = conv2d_bn(x, 192, 3, 3, padding='valid') 176 | x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 177 | 178 | # mixed 0: 35 x 35 x 256 179 | branch1x1 = conv2d_bn(x, 64, 1, 1) 180 | 181 | branch5x5 = conv2d_bn(x, 48, 1, 1) 182 | branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) 183 | 184 | branch3x3dbl = conv2d_bn(x, 64, 1, 1) 185 | branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) 186 | branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) 187 | 188 | branch_pool = layers.AveragePooling2D((3, 3), 189 | strides=(1, 1), 190 | padding='same')(x) 191 | branch_pool = conv2d_bn(branch_pool, 32, 1, 1) 192 | x = layers.concatenate( 193 | [branch1x1, branch5x5, branch3x3dbl, branch_pool], 194 | axis=channel_axis, 195 | name='mixed0') 196 | 197 | # mixed 1: 35 x 35 x 288 198 | branch1x1 = conv2d_bn(x, 64, 1, 1) 199 | 200 | branch5x5 = conv2d_bn(x, 48, 1, 1) 201 | branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) 202 | 203 | branch3x3dbl = conv2d_bn(x, 64, 1, 1) 204 | branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) 205 | branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) 206 | 207 | branch_pool = layers.AveragePooling2D((3, 3), 208 | strides=(1, 1), 209 | padding='same')(x) 210 | branch_pool = conv2d_bn(branch_pool, 64, 1, 1) 211 | x = layers.concatenate( 212 | [branch1x1, branch5x5, branch3x3dbl, branch_pool], 213 | axis=channel_axis, 214 | name='mixed1') 215 | 216 | # mixed 2: 35 x 35 x 288 217 | branch1x1 = conv2d_bn(x, 64, 1, 1) 218 | 219 | branch5x5 = conv2d_bn(x, 48, 1, 1) 220 | branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) 221 | 222 | branch3x3dbl = conv2d_bn(x, 64, 1, 1) 223 | branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) 224 | branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) 225 | 226 | branch_pool = layers.AveragePooling2D((3, 3), 227 | strides=(1, 1), 228 | padding='same')(x) 229 | branch_pool = conv2d_bn(branch_pool, 64, 1, 1) 230 | x = layers.concatenate( 231 | [branch1x1, branch5x5, branch3x3dbl, branch_pool], 232 | axis=channel_axis, 233 | name='mixed2') 234 | 235 | # mixed 3: 17 x 17 x 768 236 | branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid') 237 | 238 | branch3x3dbl = conv2d_bn(x, 64, 1, 1) 239 | branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) 240 | branch3x3dbl = conv2d_bn( 241 | branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid') 242 | 243 | branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 244 | x = layers.concatenate( 245 | [branch3x3, branch3x3dbl, branch_pool], 246 | axis=channel_axis, 247 | name='mixed3') 248 | 249 | # mixed 4: 17 x 17 x 768 250 | branch1x1 = conv2d_bn(x, 192, 1, 1) 251 | 252 | branch7x7 = conv2d_bn(x, 128, 1, 1) 253 | branch7x7 = conv2d_bn(branch7x7, 128, 1, 7) 254 | branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) 255 | 256 | branch7x7dbl = conv2d_bn(x, 128, 1, 1) 257 | branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) 258 | branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7) 259 | branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) 260 | branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) 261 | 262 | branch_pool = layers.AveragePooling2D((3, 3), 263 | strides=(1, 1), 264 | padding='same')(x) 265 | branch_pool = conv2d_bn(branch_pool, 192, 1, 1) 266 | x = layers.concatenate( 267 | [branch1x1, branch7x7, branch7x7dbl, branch_pool], 268 | axis=channel_axis, 269 | name='mixed4') 270 | 271 | # mixed 5, 6: 17 x 17 x 768 272 | for i in range(2): 273 | branch1x1 = conv2d_bn(x, 192, 1, 1) 274 | 275 | branch7x7 = conv2d_bn(x, 160, 1, 1) 276 | branch7x7 = conv2d_bn(branch7x7, 160, 1, 7) 277 | branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) 278 | 279 | branch7x7dbl = conv2d_bn(x, 160, 1, 1) 280 | branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) 281 | branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7) 282 | branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) 283 | branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) 284 | 285 | branch_pool = layers.AveragePooling2D( 286 | (3, 3), strides=(1, 1), padding='same')(x) 287 | branch_pool = conv2d_bn(branch_pool, 192, 1, 1) 288 | x = layers.concatenate( 289 | [branch1x1, branch7x7, branch7x7dbl, branch_pool], 290 | axis=channel_axis, 291 | name='mixed' + str(5 + i)) 292 | 293 | # mixed 7: 17 x 17 x 768 294 | branch1x1 = conv2d_bn(x, 192, 1, 1) 295 | 296 | branch7x7 = conv2d_bn(x, 192, 1, 1) 297 | branch7x7 = conv2d_bn(branch7x7, 192, 1, 7) 298 | branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) 299 | 300 | branch7x7dbl = conv2d_bn(x, 192, 1, 1) 301 | branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) 302 | branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) 303 | branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) 304 | branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) 305 | 306 | branch_pool = layers.AveragePooling2D((3, 3), 307 | strides=(1, 1), 308 | padding='same')(x) 309 | branch_pool = conv2d_bn(branch_pool, 192, 1, 1) 310 | x = layers.concatenate( 311 | [branch1x1, branch7x7, branch7x7dbl, branch_pool], 312 | axis=channel_axis, 313 | name='mixed7') 314 | 315 | # mixed 8: 8 x 8 x 1280 316 | branch3x3 = conv2d_bn(x, 192, 1, 1) 317 | branch3x3 = conv2d_bn(branch3x3, 320, 3, 3, 318 | strides=(2, 2), padding='valid') 319 | 320 | branch7x7x3 = conv2d_bn(x, 192, 1, 1) 321 | branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7) 322 | branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1) 323 | branch7x7x3 = conv2d_bn( 324 | branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid') 325 | 326 | branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 327 | x = layers.concatenate( 328 | [branch3x3, branch7x7x3, branch_pool], 329 | axis=channel_axis, 330 | name='mixed8') 331 | 332 | # mixed 9: 8 x 8 x 2048 333 | for i in range(2): 334 | branch1x1 = conv2d_bn(x, 320, 1, 1) 335 | 336 | branch3x3 = conv2d_bn(x, 384, 1, 1) 337 | branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3) 338 | branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1) 339 | branch3x3 = layers.concatenate( 340 | [branch3x3_1, branch3x3_2], 341 | axis=channel_axis, 342 | name='mixed9_' + str(i)) 343 | 344 | branch3x3dbl = conv2d_bn(x, 448, 1, 1) 345 | branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3) 346 | branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3) 347 | branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1) 348 | branch3x3dbl = layers.concatenate( 349 | [branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis) 350 | 351 | branch_pool = layers.AveragePooling2D( 352 | (3, 3), strides=(1, 1), padding='same')(x) 353 | branch_pool = conv2d_bn(branch_pool, 192, 1, 1) 354 | x = layers.concatenate( 355 | [branch1x1, branch3x3, branch3x3dbl, branch_pool], 356 | axis=channel_axis, 357 | name='mixed' + str(9 + i)) 358 | if include_top: 359 | # Classification block 360 | x = layers.GlobalAveragePooling2D(name='avg_pool')(x) 361 | x = layers.Dense(classes, activation='softmax', name='predictions')(x) 362 | else: 363 | if pooling == 'avg': 364 | x = layers.GlobalAveragePooling2D()(x) 365 | elif pooling == 'max': 366 | x = layers.GlobalMaxPooling2D()(x) 367 | 368 | # Ensure that the model takes into account 369 | # any potential predecessors of `input_tensor`. 370 | if input_tensor is not None: 371 | inputs = keras_utils.get_source_inputs(input_tensor) 372 | else: 373 | inputs = img_input 374 | # Create model. 375 | model = models.Model(inputs, x, name='inception_v3') 376 | 377 | # Load weights. 378 | if weights == 'imagenet': 379 | if include_top: 380 | weights_path = keras_utils.get_file( 381 | 'inception_v3_weights_tf_dim_ordering_tf_kernels.h5', 382 | WEIGHTS_PATH, 383 | cache_subdir='models', 384 | file_hash='9a0d58056eeedaa3f26cb7ebd46da564') 385 | else: 386 | weights_path = keras_utils.get_file( 387 | 'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5', 388 | WEIGHTS_PATH_NO_TOP, 389 | cache_subdir='models', 390 | file_hash='bcbd6486424b2319ff4ef7d526e38f63') 391 | model.load_weights(weights_path) 392 | elif weights is not None: 393 | model.load_weights(weights) 394 | 395 | return model 396 | 397 | 398 | def preprocess_input(x, **kwargs): 399 | """Preprocesses a numpy array encoding a batch of images. 400 | 401 | # Arguments 402 | x: a 4D numpy array consists of RGB values within [0, 255]. 403 | 404 | # Returns 405 | Preprocessed array. 406 | """ 407 | return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) 408 | -------------------------------------------------------------------------------- /keras_applications/mobilenet.py: -------------------------------------------------------------------------------- 1 | """MobileNet v1 models for Keras. 2 | 3 | MobileNet is a general architecture and can be used for multiple use cases. 4 | Depending on the use case, it can use different input layer size and 5 | different width factors. This allows different width models to reduce 6 | the number of multiply-adds and thereby 7 | reduce inference cost on mobile devices. 8 | 9 | MobileNets support any input size greater than 32 x 32, with larger image sizes 10 | offering better performance. 11 | The number of parameters and number of multiply-adds 12 | can be modified by using the `alpha` parameter, 13 | which increases/decreases the number of filters in each layer. 14 | By altering the image size and `alpha` parameter, 15 | all 16 models from the paper can be built, with ImageNet weights provided. 16 | 17 | The paper demonstrates the performance of MobileNets using `alpha` values of 18 | 1.0 (also called 100 % MobileNet), 0.75, 0.5 and 0.25. 19 | For each of these `alpha` values, weights for 4 different input image sizes 20 | are provided (224, 192, 160, 128). 21 | 22 | The following table describes the size and accuracy of the 100% MobileNet 23 | on size 224 x 224: 24 | ---------------------------------------------------------------------------- 25 | Width Multiplier (alpha) | ImageNet Acc | Multiply-Adds (M) | Params (M) 26 | ---------------------------------------------------------------------------- 27 | | 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | 28 | | 0.75 MobileNet-224 | 68.4 % | 325 | 2.6 | 29 | | 0.50 MobileNet-224 | 63.7 % | 149 | 1.3 | 30 | | 0.25 MobileNet-224 | 50.6 % | 41 | 0.5 | 31 | ---------------------------------------------------------------------------- 32 | 33 | The following table describes the performance of 34 | the 100 % MobileNet on various input sizes: 35 | ------------------------------------------------------------------------ 36 | Resolution | ImageNet Acc | Multiply-Adds (M) | Params (M) 37 | ------------------------------------------------------------------------ 38 | | 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | 39 | | 1.0 MobileNet-192 | 69.1 % | 529 | 4.2 | 40 | | 1.0 MobileNet-160 | 67.2 % | 529 | 4.2 | 41 | | 1.0 MobileNet-128 | 64.4 % | 529 | 4.2 | 42 | ------------------------------------------------------------------------ 43 | 44 | The weights for all 16 models are obtained and translated 45 | from TensorFlow checkpoints found at 46 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md 47 | 48 | # Reference 49 | 50 | - [MobileNets: Efficient Convolutional Neural Networks for 51 | Mobile Vision Applications](https://arxiv.org/abs/1704.04861) 52 | """ 53 | from __future__ import print_function 54 | from __future__ import absolute_import 55 | from __future__ import division 56 | 57 | import os 58 | import warnings 59 | 60 | from . import get_submodules_from_kwargs 61 | from . import imagenet_utils 62 | from .imagenet_utils import decode_predictions 63 | from .imagenet_utils import _obtain_input_shape 64 | 65 | 66 | BASE_WEIGHT_PATH = ('https://github.com/fchollet/deep-learning-models/' 67 | 'releases/download/v0.6/') 68 | 69 | backend = None 70 | layers = None 71 | models = None 72 | keras_utils = None 73 | 74 | 75 | def preprocess_input(x, **kwargs): 76 | """Preprocesses a numpy array encoding a batch of images. 77 | 78 | # Arguments 79 | x: a 4D numpy array consists of RGB values within [0, 255]. 80 | 81 | # Returns 82 | Preprocessed array. 83 | """ 84 | return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) 85 | 86 | 87 | def MobileNet(input_shape=None, 88 | alpha=1.0, 89 | depth_multiplier=1, 90 | dropout=1e-3, 91 | include_top=True, 92 | weights='imagenet', 93 | input_tensor=None, 94 | pooling=None, 95 | classes=1000, 96 | **kwargs): 97 | """Instantiates the MobileNet architecture. 98 | 99 | # Arguments 100 | input_shape: optional shape tuple, only to be specified 101 | if `include_top` is False (otherwise the input shape 102 | has to be `(224, 224, 3)` 103 | (with `channels_last` data format) 104 | or (3, 224, 224) (with `channels_first` data format). 105 | It should have exactly 3 inputs channels, 106 | and width and height should be no smaller than 32. 107 | E.g. `(200, 200, 3)` would be one valid value. 108 | alpha: controls the width of the network. This is known as the 109 | width multiplier in the MobileNet paper. 110 | - If `alpha` < 1.0, proportionally decreases the number 111 | of filters in each layer. 112 | - If `alpha` > 1.0, proportionally increases the number 113 | of filters in each layer. 114 | - If `alpha` = 1, default number of filters from the paper 115 | are used at each layer. 116 | depth_multiplier: depth multiplier for depthwise convolution. This 117 | is called the resolution multiplier in the MobileNet paper. 118 | dropout: dropout rate 119 | include_top: whether to include the fully-connected 120 | layer at the top of the network. 121 | weights: one of `None` (random initialization), 122 | 'imagenet' (pre-training on ImageNet), 123 | or the path to the weights file to be loaded. 124 | input_tensor: optional Keras tensor (i.e. output of 125 | `layers.Input()`) 126 | to use as image input for the model. 127 | pooling: Optional pooling mode for feature extraction 128 | when `include_top` is `False`. 129 | - `None` means that the output of the model 130 | will be the 4D tensor output of the 131 | last convolutional block. 132 | - `avg` means that global average pooling 133 | will be applied to the output of the 134 | last convolutional block, and thus 135 | the output of the model will be a 136 | 2D tensor. 137 | - `max` means that global max pooling will 138 | be applied. 139 | classes: optional number of classes to classify images 140 | into, only to be specified if `include_top` is True, and 141 | if no `weights` argument is specified. 142 | 143 | # Returns 144 | A Keras model instance. 145 | 146 | # Raises 147 | ValueError: in case of invalid argument for `weights`, 148 | or invalid input shape. 149 | RuntimeError: If attempting to run this model with a 150 | backend that does not support separable convolutions. 151 | """ 152 | global backend, layers, models, keras_utils 153 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) 154 | 155 | if not (weights in {'imagenet', None} or os.path.exists(weights)): 156 | raise ValueError('The `weights` argument should be either ' 157 | '`None` (random initialization), `imagenet` ' 158 | '(pre-training on ImageNet), ' 159 | 'or the path to the weights file to be loaded.') 160 | 161 | if weights == 'imagenet' and include_top and classes != 1000: 162 | raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' 163 | 'as true, `classes` should be 1000') 164 | 165 | # Determine proper input shape and default size. 166 | if input_shape is None: 167 | default_size = 224 168 | else: 169 | if backend.image_data_format() == 'channels_first': 170 | rows = input_shape[1] 171 | cols = input_shape[2] 172 | else: 173 | rows = input_shape[0] 174 | cols = input_shape[1] 175 | 176 | if rows == cols and rows in [128, 160, 192, 224]: 177 | default_size = rows 178 | else: 179 | default_size = 224 180 | 181 | input_shape = _obtain_input_shape(input_shape, 182 | default_size=default_size, 183 | min_size=32, 184 | data_format=backend.image_data_format(), 185 | require_flatten=include_top, 186 | weights=weights) 187 | 188 | if backend.image_data_format() == 'channels_last': 189 | row_axis, col_axis = (0, 1) 190 | else: 191 | row_axis, col_axis = (1, 2) 192 | rows = input_shape[row_axis] 193 | cols = input_shape[col_axis] 194 | 195 | if weights == 'imagenet': 196 | if depth_multiplier != 1: 197 | raise ValueError('If imagenet weights are being loaded, ' 198 | 'depth multiplier must be 1') 199 | 200 | if alpha not in [0.25, 0.50, 0.75, 1.0]: 201 | raise ValueError('If imagenet weights are being loaded, ' 202 | 'alpha can be one of' 203 | '`0.25`, `0.50`, `0.75` or `1.0` only.') 204 | 205 | if rows != cols or rows not in [128, 160, 192, 224]: 206 | rows = 224 207 | warnings.warn('`input_shape` is undefined or non-square, ' 208 | 'or `rows` is not in [128, 160, 192, 224]. ' 209 | 'Weights for input shape (224, 224) will be' 210 | ' loaded as the default.') 211 | 212 | if input_tensor is None: 213 | img_input = layers.Input(shape=input_shape) 214 | else: 215 | if not backend.is_keras_tensor(input_tensor): 216 | img_input = layers.Input(tensor=input_tensor, shape=input_shape) 217 | else: 218 | img_input = input_tensor 219 | 220 | x = _conv_block(img_input, 32, alpha, strides=(2, 2)) 221 | x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) 222 | 223 | x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, 224 | strides=(2, 2), block_id=2) 225 | x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) 226 | 227 | x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, 228 | strides=(2, 2), block_id=4) 229 | x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) 230 | 231 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, 232 | strides=(2, 2), block_id=6) 233 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) 234 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) 235 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) 236 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) 237 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) 238 | 239 | x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, 240 | strides=(2, 2), block_id=12) 241 | x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) 242 | 243 | if include_top: 244 | if backend.image_data_format() == 'channels_first': 245 | shape = (int(1024 * alpha), 1, 1) 246 | else: 247 | shape = (1, 1, int(1024 * alpha)) 248 | 249 | x = layers.GlobalAveragePooling2D()(x) 250 | x = layers.Reshape(shape, name='reshape_1')(x) 251 | x = layers.Dropout(dropout, name='dropout')(x) 252 | x = layers.Conv2D(classes, (1, 1), 253 | padding='same', 254 | name='conv_preds')(x) 255 | x = layers.Reshape((classes,), name='reshape_2')(x) 256 | x = layers.Activation('softmax', name='act_softmax')(x) 257 | else: 258 | if pooling == 'avg': 259 | x = layers.GlobalAveragePooling2D()(x) 260 | elif pooling == 'max': 261 | x = layers.GlobalMaxPooling2D()(x) 262 | 263 | # Ensure that the model takes into account 264 | # any potential predecessors of `input_tensor`. 265 | if input_tensor is not None: 266 | inputs = keras_utils.get_source_inputs(input_tensor) 267 | else: 268 | inputs = img_input 269 | 270 | # Create model. 271 | model = models.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows)) 272 | 273 | # Load weights. 274 | if weights == 'imagenet': 275 | if alpha == 1.0: 276 | alpha_text = '1_0' 277 | elif alpha == 0.75: 278 | alpha_text = '7_5' 279 | elif alpha == 0.50: 280 | alpha_text = '5_0' 281 | else: 282 | alpha_text = '2_5' 283 | 284 | if include_top: 285 | model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows) 286 | weight_path = BASE_WEIGHT_PATH + model_name 287 | weights_path = keras_utils.get_file(model_name, 288 | weight_path, 289 | cache_subdir='models') 290 | else: 291 | model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows) 292 | weight_path = BASE_WEIGHT_PATH + model_name 293 | weights_path = keras_utils.get_file(model_name, 294 | weight_path, 295 | cache_subdir='models') 296 | model.load_weights(weights_path) 297 | elif weights is not None: 298 | model.load_weights(weights) 299 | 300 | return model 301 | 302 | 303 | def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): 304 | """Adds an initial convolution layer (with batch normalization and relu6). 305 | 306 | # Arguments 307 | inputs: Input tensor of shape `(rows, cols, 3)` 308 | (with `channels_last` data format) or 309 | (3, rows, cols) (with `channels_first` data format). 310 | It should have exactly 3 inputs channels, 311 | and width and height should be no smaller than 32. 312 | E.g. `(224, 224, 3)` would be one valid value. 313 | filters: Integer, the dimensionality of the output space 314 | (i.e. the number of output filters in the convolution). 315 | alpha: controls the width of the network. 316 | - If `alpha` < 1.0, proportionally decreases the number 317 | of filters in each layer. 318 | - If `alpha` > 1.0, proportionally increases the number 319 | of filters in each layer. 320 | - If `alpha` = 1, default number of filters from the paper 321 | are used at each layer. 322 | kernel: An integer or tuple/list of 2 integers, specifying the 323 | width and height of the 2D convolution window. 324 | Can be a single integer to specify the same value for 325 | all spatial dimensions. 326 | strides: An integer or tuple/list of 2 integers, 327 | specifying the strides of the convolution 328 | along the width and height. 329 | Can be a single integer to specify the same value for 330 | all spatial dimensions. 331 | Specifying any stride value != 1 is incompatible with specifying 332 | any `dilation_rate` value != 1. 333 | 334 | # Input shape 335 | 4D tensor with shape: 336 | `(samples, channels, rows, cols)` if data_format='channels_first' 337 | or 4D tensor with shape: 338 | `(samples, rows, cols, channels)` if data_format='channels_last'. 339 | 340 | # Output shape 341 | 4D tensor with shape: 342 | `(samples, filters, new_rows, new_cols)` 343 | if data_format='channels_first' 344 | or 4D tensor with shape: 345 | `(samples, new_rows, new_cols, filters)` 346 | if data_format='channels_last'. 347 | `rows` and `cols` values might have changed due to stride. 348 | 349 | # Returns 350 | Output tensor of block. 351 | """ 352 | channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 353 | filters = int(filters * alpha) 354 | x = layers.ZeroPadding2D(padding=((0, 1), (0, 1)), name='conv1_pad')(inputs) 355 | x = layers.Conv2D(filters, kernel, 356 | padding='valid', 357 | use_bias=False, 358 | strides=strides, 359 | name='conv1')(x) 360 | x = layers.BatchNormalization(axis=channel_axis, name='conv1_bn')(x) 361 | return layers.ReLU(6., name='conv1_relu')(x) 362 | 363 | 364 | def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, 365 | depth_multiplier=1, strides=(1, 1), block_id=1): 366 | """Adds a depthwise convolution block. 367 | 368 | A depthwise convolution block consists of a depthwise conv, 369 | batch normalization, relu6, pointwise convolution, 370 | batch normalization and relu6 activation. 371 | 372 | # Arguments 373 | inputs: Input tensor of shape `(rows, cols, channels)` 374 | (with `channels_last` data format) or 375 | (channels, rows, cols) (with `channels_first` data format). 376 | pointwise_conv_filters: Integer, the dimensionality of the output space 377 | (i.e. the number of output filters in the pointwise convolution). 378 | alpha: controls the width of the network. 379 | - If `alpha` < 1.0, proportionally decreases the number 380 | of filters in each layer. 381 | - If `alpha` > 1.0, proportionally increases the number 382 | of filters in each layer. 383 | - If `alpha` = 1, default number of filters from the paper 384 | are used at each layer. 385 | depth_multiplier: The number of depthwise convolution output channels 386 | for each input channel. 387 | The total number of depthwise convolution output 388 | channels will be equal to `filters_in * depth_multiplier`. 389 | strides: An integer or tuple/list of 2 integers, 390 | specifying the strides of the convolution 391 | along the width and height. 392 | Can be a single integer to specify the same value for 393 | all spatial dimensions. 394 | Specifying any stride value != 1 is incompatible with specifying 395 | any `dilation_rate` value != 1. 396 | block_id: Integer, a unique identification designating 397 | the block number. 398 | 399 | # Input shape 400 | 4D tensor with shape: 401 | `(batch, channels, rows, cols)` if data_format='channels_first' 402 | or 4D tensor with shape: 403 | `(batch, rows, cols, channels)` if data_format='channels_last'. 404 | 405 | # Output shape 406 | 4D tensor with shape: 407 | `(batch, filters, new_rows, new_cols)` 408 | if data_format='channels_first' 409 | or 4D tensor with shape: 410 | `(batch, new_rows, new_cols, filters)` 411 | if data_format='channels_last'. 412 | `rows` and `cols` values might have changed due to stride. 413 | 414 | # Returns 415 | Output tensor of block. 416 | """ 417 | channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 418 | pointwise_conv_filters = int(pointwise_conv_filters * alpha) 419 | 420 | if strides == (1, 1): 421 | x = inputs 422 | else: 423 | x = layers.ZeroPadding2D(((0, 1), (0, 1)), 424 | name='conv_pad_%d' % block_id)(inputs) 425 | x = layers.DepthwiseConv2D((3, 3), 426 | padding='same' if strides == (1, 1) else 'valid', 427 | depth_multiplier=depth_multiplier, 428 | strides=strides, 429 | use_bias=False, 430 | name='conv_dw_%d' % block_id)(x) 431 | x = layers.BatchNormalization( 432 | axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x) 433 | x = layers.ReLU(6., name='conv_dw_%d_relu' % block_id)(x) 434 | 435 | x = layers.Conv2D(pointwise_conv_filters, (1, 1), 436 | padding='same', 437 | use_bias=False, 438 | strides=(1, 1), 439 | name='conv_pw_%d' % block_id)(x) 440 | x = layers.BatchNormalization(axis=channel_axis, 441 | name='conv_pw_%d_bn' % block_id)(x) 442 | return layers.ReLU(6., name='conv_pw_%d_relu' % block_id)(x) 443 | -------------------------------------------------------------------------------- /keras_applications/mobilenet_v2.py: -------------------------------------------------------------------------------- 1 | """MobileNet v2 models for Keras. 2 | 3 | MobileNetV2 is a general architecture and can be used for multiple use cases. 4 | Depending on the use case, it can use different input layer size and 5 | different width factors. This allows different width models to reduce 6 | the number of multiply-adds and thereby 7 | reduce inference cost on mobile devices. 8 | 9 | MobileNetV2 is very similar to the original MobileNet, 10 | except that it uses inverted residual blocks with 11 | bottlenecking features. It has a drastically lower 12 | parameter count than the original MobileNet. 13 | MobileNets support any input size greater 14 | than 32 x 32, with larger image sizes 15 | offering better performance. 16 | 17 | The number of parameters and number of multiply-adds 18 | can be modified by using the `alpha` parameter, 19 | which increases/decreases the number of filters in each layer. 20 | By altering the image size and `alpha` parameter, 21 | all 22 models from the paper can be built, with ImageNet weights provided. 22 | 23 | The paper demonstrates the performance of MobileNets using `alpha` values of 24 | 1.0 (also called 100 % MobileNet), 0.35, 0.5, 0.75, 1.0, 1.3, and 1.4 25 | 26 | For each of these `alpha` values, weights for 5 different input image sizes 27 | are provided (224, 192, 160, 128, and 96). 28 | 29 | 30 | The following table describes the performance of 31 | MobileNet on various input sizes: 32 | ------------------------------------------------------------------------ 33 | MACs stands for Multiply Adds 34 | 35 | Classification Checkpoint| MACs (M) | Parameters (M)| Top 1 Accuracy| Top 5 Accuracy 36 | --------------------------|------------|---------------|---------|----|------------- 37 | | [mobilenet_v2_1.4_224] | 582 | 6.06 | 75.0 | 92.5 | 38 | | [mobilenet_v2_1.3_224] | 509 | 5.34 | 74.4 | 92.1 | 39 | | [mobilenet_v2_1.0_224] | 300 | 3.47 | 71.8 | 91.0 | 40 | | [mobilenet_v2_1.0_192] | 221 | 3.47 | 70.7 | 90.1 | 41 | | [mobilenet_v2_1.0_160] | 154 | 3.47 | 68.8 | 89.0 | 42 | | [mobilenet_v2_1.0_128] | 99 | 3.47 | 65.3 | 86.9 | 43 | | [mobilenet_v2_1.0_96] | 56 | 3.47 | 60.3 | 83.2 | 44 | | [mobilenet_v2_0.75_224] | 209 | 2.61 | 69.8 | 89.6 | 45 | | [mobilenet_v2_0.75_192] | 153 | 2.61 | 68.7 | 88.9 | 46 | | [mobilenet_v2_0.75_160] | 107 | 2.61 | 66.4 | 87.3 | 47 | | [mobilenet_v2_0.75_128] | 69 | 2.61 | 63.2 | 85.3 | 48 | | [mobilenet_v2_0.75_96] | 39 | 2.61 | 58.8 | 81.6 | 49 | | [mobilenet_v2_0.5_224] | 97 | 1.95 | 65.4 | 86.4 | 50 | | [mobilenet_v2_0.5_192] | 71 | 1.95 | 63.9 | 85.4 | 51 | | [mobilenet_v2_0.5_160] | 50 | 1.95 | 61.0 | 83.2 | 52 | | [mobilenet_v2_0.5_128] | 32 | 1.95 | 57.7 | 80.8 | 53 | | [mobilenet_v2_0.5_96] | 18 | 1.95 | 51.2 | 75.8 | 54 | | [mobilenet_v2_0.35_224] | 59 | 1.66 | 60.3 | 82.9 | 55 | | [mobilenet_v2_0.35_192] | 43 | 1.66 | 58.2 | 81.2 | 56 | | [mobilenet_v2_0.35_160] | 30 | 1.66 | 55.7 | 79.1 | 57 | | [mobilenet_v2_0.35_128] | 20 | 1.66 | 50.8 | 75.0 | 58 | | [mobilenet_v2_0.35_96] | 11 | 1.66 | 45.5 | 70.4 | 59 | 60 | The weights for all 16 models are obtained and 61 | translated from the Tensorflow checkpoints 62 | from TensorFlow checkpoints found [here] 63 | (https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/README.md). 64 | 65 | # Reference 66 | 67 | This file contains building code for MobileNetV2, based on 68 | [MobileNetV2: Inverted Residuals and Linear Bottlenecks] 69 | (https://arxiv.org/abs/1801.04381) (CVPR 2018) 70 | 71 | Tests comparing this model to the existing Tensorflow model can be 72 | found at [mobilenet_v2_keras] 73 | (https://github.com/JonathanCMitchell/mobilenet_v2_keras) 74 | """ 75 | from __future__ import print_function 76 | from __future__ import absolute_import 77 | from __future__ import division 78 | 79 | import os 80 | import warnings 81 | import numpy as np 82 | 83 | from . import correct_pad 84 | from . import get_submodules_from_kwargs 85 | from . import imagenet_utils 86 | from .imagenet_utils import decode_predictions 87 | from .imagenet_utils import _obtain_input_shape 88 | 89 | # TODO Change path to v1.1 90 | BASE_WEIGHT_PATH = ('https://github.com/JonathanCMitchell/mobilenet_v2_keras/' 91 | 'releases/download/v1.1/') 92 | 93 | backend = None 94 | layers = None 95 | models = None 96 | keras_utils = None 97 | 98 | 99 | def preprocess_input(x, **kwargs): 100 | """Preprocesses a numpy array encoding a batch of images. 101 | 102 | # Arguments 103 | x: a 4D numpy array consists of RGB values within [0, 255]. 104 | 105 | # Returns 106 | Preprocessed array. 107 | """ 108 | return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) 109 | 110 | 111 | # This function is taken from the original tf repo. 112 | # It ensures that all layers have a channel number that is divisible by 8 113 | # It can be seen here: 114 | # https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 115 | 116 | 117 | def _make_divisible(v, divisor, min_value=None): 118 | if min_value is None: 119 | min_value = divisor 120 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 121 | # Make sure that round down does not go down by more than 10%. 122 | if new_v < 0.9 * v: 123 | new_v += divisor 124 | return new_v 125 | 126 | 127 | def MobileNetV2(input_shape=None, 128 | alpha=1.0, 129 | include_top=True, 130 | weights='imagenet', 131 | input_tensor=None, 132 | pooling=None, 133 | classes=1000, 134 | **kwargs): 135 | """Instantiates the MobileNetV2 architecture. 136 | 137 | # Arguments 138 | input_shape: optional shape tuple, to be specified if you would 139 | like to use a model with an input img resolution that is not 140 | (224, 224, 3). 141 | It should have exactly 3 inputs channels (224, 224, 3). 142 | You can also omit this option if you would like 143 | to infer input_shape from an input_tensor. 144 | If you choose to include both input_tensor and input_shape then 145 | input_shape will be used if they match, if the shapes 146 | do not match then we will throw an error. 147 | E.g. `(160, 160, 3)` would be one valid value. 148 | alpha: controls the width of the network. This is known as the 149 | width multiplier in the MobileNetV2 paper, but the name is kept for 150 | consistency with MobileNetV1 in Keras. 151 | - If `alpha` < 1.0, proportionally decreases the number 152 | of filters in each layer. 153 | - If `alpha` > 1.0, proportionally increases the number 154 | of filters in each layer. 155 | - If `alpha` = 1, default number of filters from the paper 156 | are used at each layer. 157 | include_top: whether to include the fully-connected 158 | layer at the top of the network. 159 | weights: one of `None` (random initialization), 160 | 'imagenet' (pre-training on ImageNet), 161 | or the path to the weights file to be loaded. 162 | input_tensor: optional Keras tensor (i.e. output of 163 | `layers.Input()`) 164 | to use as image input for the model. 165 | pooling: Optional pooling mode for feature extraction 166 | when `include_top` is `False`. 167 | - `None` means that the output of the model 168 | will be the 4D tensor output of the 169 | last convolutional block. 170 | - `avg` means that global average pooling 171 | will be applied to the output of the 172 | last convolutional block, and thus 173 | the output of the model will be a 174 | 2D tensor. 175 | - `max` means that global max pooling will 176 | be applied. 177 | classes: optional number of classes to classify images 178 | into, only to be specified if `include_top` is True, and 179 | if no `weights` argument is specified. 180 | 181 | # Returns 182 | A Keras model instance. 183 | 184 | # Raises 185 | ValueError: in case of invalid argument for `weights`, 186 | or invalid input shape or invalid alpha, rows when 187 | weights='imagenet' 188 | """ 189 | global backend, layers, models, keras_utils 190 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) 191 | 192 | if not (weights in {'imagenet', None} or os.path.exists(weights)): 193 | raise ValueError('The `weights` argument should be either ' 194 | '`None` (random initialization), `imagenet` ' 195 | '(pre-training on ImageNet), ' 196 | 'or the path to the weights file to be loaded.') 197 | 198 | if weights == 'imagenet' and include_top and classes != 1000: 199 | raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' 200 | 'as true, `classes` should be 1000') 201 | 202 | # Determine proper input shape and default size. 203 | # If both input_shape and input_tensor are used, they should match 204 | if input_shape is not None and input_tensor is not None: 205 | try: 206 | is_input_t_tensor = backend.is_keras_tensor(input_tensor) 207 | except ValueError: 208 | try: 209 | is_input_t_tensor = backend.is_keras_tensor( 210 | keras_utils.get_source_inputs(input_tensor)) 211 | except ValueError: 212 | raise ValueError('input_tensor: ', input_tensor, 213 | 'is not type input_tensor') 214 | if is_input_t_tensor: 215 | if backend.image_data_format == 'channels_first': 216 | if backend.int_shape(input_tensor)[1] != input_shape[1]: 217 | raise ValueError('input_shape: ', input_shape, 218 | 'and input_tensor: ', input_tensor, 219 | 'do not meet the same shape requirements') 220 | else: 221 | if backend.int_shape(input_tensor)[2] != input_shape[1]: 222 | raise ValueError('input_shape: ', input_shape, 223 | 'and input_tensor: ', input_tensor, 224 | 'do not meet the same shape requirements') 225 | else: 226 | raise ValueError('input_tensor specified: ', input_tensor, 227 | 'is not a keras tensor') 228 | 229 | # If input_shape is None, infer shape from input_tensor 230 | if input_shape is None and input_tensor is not None: 231 | 232 | try: 233 | backend.is_keras_tensor(input_tensor) 234 | except ValueError: 235 | raise ValueError('input_tensor: ', input_tensor, 236 | 'is type: ', type(input_tensor), 237 | 'which is not a valid type') 238 | 239 | if input_shape is None and not backend.is_keras_tensor(input_tensor): 240 | default_size = 224 241 | elif input_shape is None and backend.is_keras_tensor(input_tensor): 242 | if backend.image_data_format() == 'channels_first': 243 | rows = backend.int_shape(input_tensor)[2] 244 | cols = backend.int_shape(input_tensor)[3] 245 | else: 246 | rows = backend.int_shape(input_tensor)[1] 247 | cols = backend.int_shape(input_tensor)[2] 248 | 249 | if rows == cols and rows in [96, 128, 160, 192, 224]: 250 | default_size = rows 251 | else: 252 | default_size = 224 253 | 254 | # If input_shape is None and no input_tensor 255 | elif input_shape is None: 256 | default_size = 224 257 | 258 | # If input_shape is not None, assume default size 259 | else: 260 | if backend.image_data_format() == 'channels_first': 261 | rows = input_shape[1] 262 | cols = input_shape[2] 263 | else: 264 | rows = input_shape[0] 265 | cols = input_shape[1] 266 | 267 | if rows == cols and rows in [96, 128, 160, 192, 224]: 268 | default_size = rows 269 | else: 270 | default_size = 224 271 | 272 | input_shape = _obtain_input_shape(input_shape, 273 | default_size=default_size, 274 | min_size=32, 275 | data_format=backend.image_data_format(), 276 | require_flatten=include_top, 277 | weights=weights) 278 | 279 | if backend.image_data_format() == 'channels_last': 280 | row_axis, col_axis = (0, 1) 281 | else: 282 | row_axis, col_axis = (1, 2) 283 | rows = input_shape[row_axis] 284 | cols = input_shape[col_axis] 285 | 286 | if weights == 'imagenet': 287 | if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]: 288 | raise ValueError('If imagenet weights are being loaded, ' 289 | 'alpha can be one of `0.35`, `0.50`, `0.75`, ' 290 | '`1.0`, `1.3` or `1.4` only.') 291 | 292 | if rows != cols or rows not in [96, 128, 160, 192, 224]: 293 | rows = 224 294 | warnings.warn('`input_shape` is undefined or non-square, ' 295 | 'or `rows` is not in [96, 128, 160, 192, 224].' 296 | ' Weights for input shape (224, 224) will be' 297 | ' loaded as the default.') 298 | 299 | if input_tensor is None: 300 | img_input = layers.Input(shape=input_shape) 301 | else: 302 | if not backend.is_keras_tensor(input_tensor): 303 | img_input = layers.Input(tensor=input_tensor, shape=input_shape) 304 | else: 305 | img_input = input_tensor 306 | 307 | channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 308 | 309 | first_block_filters = _make_divisible(32 * alpha, 8) 310 | x = layers.ZeroPadding2D(padding=correct_pad(backend, img_input, 3), 311 | name='Conv1_pad')(img_input) 312 | x = layers.Conv2D(first_block_filters, 313 | kernel_size=3, 314 | strides=(2, 2), 315 | padding='valid', 316 | use_bias=False, 317 | name='Conv1')(x) 318 | x = layers.BatchNormalization(axis=channel_axis, 319 | epsilon=1e-3, 320 | momentum=0.999, 321 | name='bn_Conv1')(x) 322 | x = layers.ReLU(6., name='Conv1_relu')(x) 323 | 324 | x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, 325 | expansion=1, block_id=0) 326 | 327 | x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, 328 | expansion=6, block_id=1) 329 | x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, 330 | expansion=6, block_id=2) 331 | 332 | x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, 333 | expansion=6, block_id=3) 334 | x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, 335 | expansion=6, block_id=4) 336 | x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, 337 | expansion=6, block_id=5) 338 | 339 | x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2, 340 | expansion=6, block_id=6) 341 | x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, 342 | expansion=6, block_id=7) 343 | x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, 344 | expansion=6, block_id=8) 345 | x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, 346 | expansion=6, block_id=9) 347 | 348 | x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, 349 | expansion=6, block_id=10) 350 | x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, 351 | expansion=6, block_id=11) 352 | x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, 353 | expansion=6, block_id=12) 354 | 355 | x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, 356 | expansion=6, block_id=13) 357 | x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, 358 | expansion=6, block_id=14) 359 | x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, 360 | expansion=6, block_id=15) 361 | 362 | x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, 363 | expansion=6, block_id=16) 364 | 365 | # no alpha applied to last conv as stated in the paper: 366 | # if the width multiplier is greater than 1 we 367 | # increase the number of output channels 368 | if alpha > 1.0: 369 | last_block_filters = _make_divisible(1280 * alpha, 8) 370 | else: 371 | last_block_filters = 1280 372 | 373 | x = layers.Conv2D(last_block_filters, 374 | kernel_size=1, 375 | use_bias=False, 376 | name='Conv_1')(x) 377 | x = layers.BatchNormalization(axis=channel_axis, 378 | epsilon=1e-3, 379 | momentum=0.999, 380 | name='Conv_1_bn')(x) 381 | x = layers.ReLU(6., name='out_relu')(x) 382 | 383 | if include_top: 384 | x = layers.GlobalAveragePooling2D()(x) 385 | x = layers.Dense(classes, activation='softmax', 386 | use_bias=True, name='Logits')(x) 387 | else: 388 | if pooling == 'avg': 389 | x = layers.GlobalAveragePooling2D()(x) 390 | elif pooling == 'max': 391 | x = layers.GlobalMaxPooling2D()(x) 392 | 393 | # Ensure that the model takes into account 394 | # any potential predecessors of `input_tensor`. 395 | if input_tensor is not None: 396 | inputs = keras_utils.get_source_inputs(input_tensor) 397 | else: 398 | inputs = img_input 399 | 400 | # Create model. 401 | model = models.Model(inputs, x, 402 | name='mobilenetv2_%0.2f_%s' % (alpha, rows)) 403 | 404 | # Load weights. 405 | if weights == 'imagenet': 406 | if include_top: 407 | model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + 408 | str(alpha) + '_' + str(rows) + '.h5') 409 | weight_path = BASE_WEIGHT_PATH + model_name 410 | weights_path = keras_utils.get_file( 411 | model_name, weight_path, cache_subdir='models') 412 | else: 413 | model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + 414 | str(alpha) + '_' + str(rows) + '_no_top' + '.h5') 415 | weight_path = BASE_WEIGHT_PATH + model_name 416 | weights_path = keras_utils.get_file( 417 | model_name, weight_path, cache_subdir='models') 418 | model.load_weights(weights_path) 419 | elif weights is not None: 420 | model.load_weights(weights) 421 | 422 | return model 423 | 424 | 425 | def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): 426 | channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 427 | 428 | in_channels = backend.int_shape(inputs)[channel_axis] 429 | pointwise_conv_filters = int(filters * alpha) 430 | pointwise_filters = _make_divisible(pointwise_conv_filters, 8) 431 | x = inputs 432 | prefix = 'block_{}_'.format(block_id) 433 | 434 | if block_id: 435 | # Expand 436 | x = layers.Conv2D(expansion * in_channels, 437 | kernel_size=1, 438 | padding='same', 439 | use_bias=False, 440 | activation=None, 441 | name=prefix + 'expand')(x) 442 | x = layers.BatchNormalization(axis=channel_axis, 443 | epsilon=1e-3, 444 | momentum=0.999, 445 | name=prefix + 'expand_BN')(x) 446 | x = layers.ReLU(6., name=prefix + 'expand_relu')(x) 447 | else: 448 | prefix = 'expanded_conv_' 449 | 450 | # Depthwise 451 | if stride == 2: 452 | x = layers.ZeroPadding2D(padding=correct_pad(backend, x, 3), 453 | name=prefix + 'pad')(x) 454 | x = layers.DepthwiseConv2D(kernel_size=3, 455 | strides=stride, 456 | activation=None, 457 | use_bias=False, 458 | padding='same' if stride == 1 else 'valid', 459 | name=prefix + 'depthwise')(x) 460 | x = layers.BatchNormalization(axis=channel_axis, 461 | epsilon=1e-3, 462 | momentum=0.999, 463 | name=prefix + 'depthwise_BN')(x) 464 | 465 | x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x) 466 | 467 | # Project 468 | x = layers.Conv2D(pointwise_filters, 469 | kernel_size=1, 470 | padding='same', 471 | use_bias=False, 472 | activation=None, 473 | name=prefix + 'project')(x) 474 | x = layers.BatchNormalization(axis=channel_axis, 475 | epsilon=1e-3, 476 | momentum=0.999, 477 | name=prefix + 'project_BN')(x) 478 | 479 | if in_channels == pointwise_filters and stride == 1: 480 | return layers.Add(name=prefix + 'add')([inputs, x]) 481 | return x 482 | -------------------------------------------------------------------------------- /keras_applications/resnet.py: -------------------------------------------------------------------------------- 1 | """ResNet models for Keras. 2 | 3 | # Reference paper 4 | 5 | - [Deep Residual Learning for Image Recognition] 6 | (https://arxiv.org/abs/1512.03385) (CVPR 2016 Best Paper Award) 7 | 8 | # Reference implementations 9 | 10 | - [TensorNets] 11 | (https://github.com/taehoonlee/tensornets/blob/master/tensornets/resnets.py) 12 | - [Caffe ResNet] 13 | (https://github.com/KaimingHe/deep-residual-networks/tree/master/prototxt) 14 | 15 | """ 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from . import imagenet_utils 21 | from .imagenet_utils import decode_predictions 22 | from .resnet_common import ResNet50 23 | from .resnet_common import ResNet101 24 | from .resnet_common import ResNet152 25 | 26 | 27 | def preprocess_input(x, **kwargs): 28 | """Preprocesses a numpy array encoding a batch of images. 29 | 30 | # Arguments 31 | x: a 4D numpy array consists of RGB values within [0, 255]. 32 | data_format: data format of the image tensor. 33 | 34 | # Returns 35 | Preprocessed array. 36 | """ 37 | return imagenet_utils.preprocess_input(x, mode='caffe', **kwargs) 38 | -------------------------------------------------------------------------------- /keras_applications/resnet50.py: -------------------------------------------------------------------------------- 1 | """ResNet50 model for Keras. 2 | 3 | # Reference: 4 | 5 | - [Deep Residual Learning for Image Recognition]( 6 | https://arxiv.org/abs/1512.03385) (CVPR 2016 Best Paper Award) 7 | 8 | Adapted from code contributed by BigMoyan. 9 | """ 10 | from __future__ import absolute_import 11 | from __future__ import division 12 | from __future__ import print_function 13 | 14 | import os 15 | import warnings 16 | 17 | from . import get_submodules_from_kwargs 18 | from . import imagenet_utils 19 | from .imagenet_utils import decode_predictions 20 | from .imagenet_utils import _obtain_input_shape 21 | 22 | preprocess_input = imagenet_utils.preprocess_input 23 | 24 | WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/' 25 | 'releases/download/v0.2/' 26 | 'resnet50_weights_tf_dim_ordering_tf_kernels.h5') 27 | WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/' 28 | 'releases/download/v0.2/' 29 | 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5') 30 | 31 | backend = None 32 | layers = None 33 | models = None 34 | keras_utils = None 35 | 36 | 37 | def identity_block(input_tensor, kernel_size, filters, stage, block): 38 | """The identity block is the block that has no conv layer at shortcut. 39 | 40 | # Arguments 41 | input_tensor: input tensor 42 | kernel_size: default 3, the kernel size of 43 | middle conv layer at main path 44 | filters: list of integers, the filters of 3 conv layer at main path 45 | stage: integer, current stage label, used for generating layer names 46 | block: 'a','b'..., current block label, used for generating layer names 47 | 48 | # Returns 49 | Output tensor for the block. 50 | """ 51 | filters1, filters2, filters3 = filters 52 | if backend.image_data_format() == 'channels_last': 53 | bn_axis = 3 54 | else: 55 | bn_axis = 1 56 | conv_name_base = 'res' + str(stage) + block + '_branch' 57 | bn_name_base = 'bn' + str(stage) + block + '_branch' 58 | 59 | x = layers.Conv2D(filters1, (1, 1), 60 | kernel_initializer='he_normal', 61 | name=conv_name_base + '2a')(input_tensor) 62 | x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) 63 | x = layers.Activation('relu')(x) 64 | 65 | x = layers.Conv2D(filters2, kernel_size, 66 | padding='same', 67 | kernel_initializer='he_normal', 68 | name=conv_name_base + '2b')(x) 69 | x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) 70 | x = layers.Activation('relu')(x) 71 | 72 | x = layers.Conv2D(filters3, (1, 1), 73 | kernel_initializer='he_normal', 74 | name=conv_name_base + '2c')(x) 75 | x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) 76 | 77 | x = layers.add([x, input_tensor]) 78 | x = layers.Activation('relu')(x) 79 | return x 80 | 81 | 82 | def conv_block(input_tensor, 83 | kernel_size, 84 | filters, 85 | stage, 86 | block, 87 | strides=(2, 2)): 88 | """A block that has a conv layer at shortcut. 89 | 90 | # Arguments 91 | input_tensor: input tensor 92 | kernel_size: default 3, the kernel size of 93 | middle conv layer at main path 94 | filters: list of integers, the filters of 3 conv layer at main path 95 | stage: integer, current stage label, used for generating layer names 96 | block: 'a','b'..., current block label, used for generating layer names 97 | strides: Strides for the first conv layer in the block. 98 | 99 | # Returns 100 | Output tensor for the block. 101 | 102 | Note that from stage 3, 103 | the first conv layer at main path is with strides=(2, 2) 104 | And the shortcut should have strides=(2, 2) as well 105 | """ 106 | filters1, filters2, filters3 = filters 107 | if backend.image_data_format() == 'channels_last': 108 | bn_axis = 3 109 | else: 110 | bn_axis = 1 111 | conv_name_base = 'res' + str(stage) + block + '_branch' 112 | bn_name_base = 'bn' + str(stage) + block + '_branch' 113 | 114 | x = layers.Conv2D(filters1, (1, 1), strides=strides, 115 | kernel_initializer='he_normal', 116 | name=conv_name_base + '2a')(input_tensor) 117 | x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) 118 | x = layers.Activation('relu')(x) 119 | 120 | x = layers.Conv2D(filters2, kernel_size, padding='same', 121 | kernel_initializer='he_normal', 122 | name=conv_name_base + '2b')(x) 123 | x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) 124 | x = layers.Activation('relu')(x) 125 | 126 | x = layers.Conv2D(filters3, (1, 1), 127 | kernel_initializer='he_normal', 128 | name=conv_name_base + '2c')(x) 129 | x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) 130 | 131 | shortcut = layers.Conv2D(filters3, (1, 1), strides=strides, 132 | kernel_initializer='he_normal', 133 | name=conv_name_base + '1')(input_tensor) 134 | shortcut = layers.BatchNormalization( 135 | axis=bn_axis, name=bn_name_base + '1')(shortcut) 136 | 137 | x = layers.add([x, shortcut]) 138 | x = layers.Activation('relu')(x) 139 | return x 140 | 141 | 142 | def ResNet50(include_top=True, 143 | weights='imagenet', 144 | input_tensor=None, 145 | input_shape=None, 146 | pooling=None, 147 | classes=1000, 148 | **kwargs): 149 | """Instantiates the ResNet50 architecture. 150 | 151 | Optionally loads weights pre-trained on ImageNet. 152 | Note that the data format convention used by the model is 153 | the one specified in your Keras config at `~/.keras/keras.json`. 154 | 155 | # Arguments 156 | include_top: whether to include the fully-connected 157 | layer at the top of the network. 158 | weights: one of `None` (random initialization), 159 | 'imagenet' (pre-training on ImageNet), 160 | or the path to the weights file to be loaded. 161 | input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) 162 | to use as image input for the model. 163 | input_shape: optional shape tuple, only to be specified 164 | if `include_top` is False (otherwise the input shape 165 | has to be `(224, 224, 3)` (with `channels_last` data format) 166 | or `(3, 224, 224)` (with `channels_first` data format). 167 | It should have exactly 3 inputs channels, 168 | and width and height should be no smaller than 32. 169 | E.g. `(200, 200, 3)` would be one valid value. 170 | pooling: Optional pooling mode for feature extraction 171 | when `include_top` is `False`. 172 | - `None` means that the output of the model will be 173 | the 4D tensor output of the 174 | last convolutional block. 175 | - `avg` means that global average pooling 176 | will be applied to the output of the 177 | last convolutional block, and thus 178 | the output of the model will be a 2D tensor. 179 | - `max` means that global max pooling will 180 | be applied. 181 | classes: optional number of classes to classify images 182 | into, only to be specified if `include_top` is True, and 183 | if no `weights` argument is specified. 184 | 185 | # Returns 186 | A Keras model instance. 187 | 188 | # Raises 189 | ValueError: in case of invalid argument for `weights`, 190 | or invalid input shape. 191 | """ 192 | global backend, layers, models, keras_utils 193 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) 194 | 195 | if not (weights in {'imagenet', None} or os.path.exists(weights)): 196 | raise ValueError('The `weights` argument should be either ' 197 | '`None` (random initialization), `imagenet` ' 198 | '(pre-training on ImageNet), ' 199 | 'or the path to the weights file to be loaded.') 200 | 201 | if weights == 'imagenet' and include_top and classes != 1000: 202 | raise ValueError('If using `weights` as `"imagenet"` with `include_top`' 203 | ' as true, `classes` should be 1000') 204 | 205 | # Determine proper input shape 206 | input_shape = _obtain_input_shape(input_shape, 207 | default_size=224, 208 | min_size=32, 209 | data_format=backend.image_data_format(), 210 | require_flatten=include_top, 211 | weights=weights) 212 | 213 | if input_tensor is None: 214 | img_input = layers.Input(shape=input_shape) 215 | else: 216 | if not backend.is_keras_tensor(input_tensor): 217 | img_input = layers.Input(tensor=input_tensor, shape=input_shape) 218 | else: 219 | img_input = input_tensor 220 | if backend.image_data_format() == 'channels_last': 221 | bn_axis = 3 222 | else: 223 | bn_axis = 1 224 | 225 | x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input) 226 | x = layers.Conv2D(64, (7, 7), 227 | strides=(2, 2), 228 | padding='valid', 229 | kernel_initializer='he_normal', 230 | name='conv1')(x) 231 | x = layers.BatchNormalization(axis=bn_axis, name='bn_conv1')(x) 232 | x = layers.Activation('relu')(x) 233 | x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x) 234 | x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) 235 | 236 | x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) 237 | x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') 238 | x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') 239 | 240 | x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') 241 | x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') 242 | x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') 243 | x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') 244 | 245 | x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') 246 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') 247 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') 248 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') 249 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') 250 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') 251 | 252 | x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') 253 | x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') 254 | x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') 255 | 256 | if include_top: 257 | x = layers.GlobalAveragePooling2D(name='avg_pool')(x) 258 | x = layers.Dense(classes, activation='softmax', name='fc1000')(x) 259 | else: 260 | if pooling == 'avg': 261 | x = layers.GlobalAveragePooling2D()(x) 262 | elif pooling == 'max': 263 | x = layers.GlobalMaxPooling2D()(x) 264 | else: 265 | warnings.warn('The output shape of `ResNet50(include_top=False)` ' 266 | 'has been changed since Keras 2.2.0.') 267 | 268 | # Ensure that the model takes into account 269 | # any potential predecessors of `input_tensor`. 270 | if input_tensor is not None: 271 | inputs = keras_utils.get_source_inputs(input_tensor) 272 | else: 273 | inputs = img_input 274 | # Create model. 275 | model = models.Model(inputs, x, name='resnet50') 276 | 277 | # Load weights. 278 | if weights == 'imagenet': 279 | if include_top: 280 | weights_path = keras_utils.get_file( 281 | 'resnet50_weights_tf_dim_ordering_tf_kernels.h5', 282 | WEIGHTS_PATH, 283 | cache_subdir='models', 284 | md5_hash='a7b3fe01876f51b976af0dea6bc144eb') 285 | else: 286 | weights_path = keras_utils.get_file( 287 | 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', 288 | WEIGHTS_PATH_NO_TOP, 289 | cache_subdir='models', 290 | md5_hash='a268eb855778b3df3c7506639542a6af') 291 | model.load_weights(weights_path) 292 | if backend.backend() == 'theano': 293 | keras_utils.convert_all_kernels_in_model(model) 294 | elif weights is not None: 295 | model.load_weights(weights) 296 | 297 | return model 298 | -------------------------------------------------------------------------------- /keras_applications/resnet_v2.py: -------------------------------------------------------------------------------- 1 | """ResNetV2 models for Keras. 2 | 3 | # Reference paper 4 | 5 | - [Aggregated Residual Transformations for Deep Neural Networks] 6 | (https://arxiv.org/abs/1611.05431) (CVPR 2017) 7 | 8 | # Reference implementations 9 | 10 | - [TensorNets] 11 | (https://github.com/taehoonlee/tensornets/blob/master/tensornets/resnets.py) 12 | - [Torch ResNetV2] 13 | (https://github.com/facebook/fb.resnet.torch/blob/master/models/preresnet.lua) 14 | 15 | """ 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from . import imagenet_utils 21 | from .imagenet_utils import decode_predictions 22 | from .resnet_common import ResNet50V2 23 | from .resnet_common import ResNet101V2 24 | from .resnet_common import ResNet152V2 25 | 26 | 27 | def preprocess_input(x, **kwargs): 28 | """Preprocesses a numpy array encoding a batch of images. 29 | 30 | # Arguments 31 | x: a 4D numpy array consists of RGB values within [0, 255]. 32 | data_format: data format of the image tensor. 33 | 34 | # Returns 35 | Preprocessed array. 36 | """ 37 | return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) 38 | -------------------------------------------------------------------------------- /keras_applications/resnext.py: -------------------------------------------------------------------------------- 1 | """ResNeXt models for Keras. 2 | 3 | # Reference paper 4 | 5 | - [Aggregated Residual Transformations for Deep Neural Networks] 6 | (https://arxiv.org/abs/1611.05431) (CVPR 2017) 7 | 8 | # Reference implementations 9 | 10 | - [TensorNets] 11 | (https://github.com/taehoonlee/tensornets/blob/master/tensornets/resnets.py) 12 | - [Torch ResNeXt] 13 | (https://github.com/facebookresearch/ResNeXt/blob/master/models/resnext.lua) 14 | 15 | """ 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import os 21 | 22 | from . import imagenet_utils 23 | from .imagenet_utils import decode_predictions 24 | from .resnet_common import ResNeXt50 25 | from .resnet_common import ResNeXt101 26 | 27 | 28 | def preprocess_input(x, **kwargs): 29 | """Preprocesses a numpy array encoding a batch of images. 30 | 31 | # Arguments 32 | x: a 4D numpy array consists of RGB values within [0, 255]. 33 | data_format: data format of the image tensor. 34 | 35 | # Returns 36 | Preprocessed array. 37 | """ 38 | return imagenet_utils.preprocess_input(x, mode='torch', **kwargs) 39 | -------------------------------------------------------------------------------- /keras_applications/vgg16.py: -------------------------------------------------------------------------------- 1 | """VGG16 model for Keras. 2 | 3 | # Reference 4 | 5 | - [Very Deep Convolutional Networks for Large-Scale Image Recognition]( 6 | https://arxiv.org/abs/1409.1556) (ICLR 2015) 7 | 8 | """ 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import os 14 | 15 | from . import get_submodules_from_kwargs 16 | from . import imagenet_utils 17 | from .imagenet_utils import decode_predictions 18 | from .imagenet_utils import _obtain_input_shape 19 | 20 | preprocess_input = imagenet_utils.preprocess_input 21 | 22 | WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/' 23 | 'releases/download/v0.1/' 24 | 'vgg16_weights_tf_dim_ordering_tf_kernels.h5') 25 | WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/' 26 | 'releases/download/v0.1/' 27 | 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5') 28 | 29 | 30 | def VGG16(include_top=True, 31 | weights='imagenet', 32 | input_tensor=None, 33 | input_shape=None, 34 | pooling=None, 35 | classes=1000, 36 | **kwargs): 37 | """Instantiates the VGG16 architecture. 38 | 39 | Optionally loads weights pre-trained on ImageNet. 40 | Note that the data format convention used by the model is 41 | the one specified in your Keras config at `~/.keras/keras.json`. 42 | 43 | # Arguments 44 | include_top: whether to include the 3 fully-connected 45 | layers at the top of the network. 46 | weights: one of `None` (random initialization), 47 | 'imagenet' (pre-training on ImageNet), 48 | or the path to the weights file to be loaded. 49 | input_tensor: optional Keras tensor 50 | (i.e. output of `layers.Input()`) 51 | to use as image input for the model. 52 | input_shape: optional shape tuple, only to be specified 53 | if `include_top` is False (otherwise the input shape 54 | has to be `(224, 224, 3)` 55 | (with `channels_last` data format) 56 | or `(3, 224, 224)` (with `channels_first` data format). 57 | It should have exactly 3 input channels, 58 | and width and height should be no smaller than 32. 59 | E.g. `(200, 200, 3)` would be one valid value. 60 | pooling: Optional pooling mode for feature extraction 61 | when `include_top` is `False`. 62 | - `None` means that the output of the model will be 63 | the 4D tensor output of the 64 | last convolutional block. 65 | - `avg` means that global average pooling 66 | will be applied to the output of the 67 | last convolutional block, and thus 68 | the output of the model will be a 2D tensor. 69 | - `max` means that global max pooling will 70 | be applied. 71 | classes: optional number of classes to classify images 72 | into, only to be specified if `include_top` is True, and 73 | if no `weights` argument is specified. 74 | 75 | # Returns 76 | A Keras model instance. 77 | 78 | # Raises 79 | ValueError: in case of invalid argument for `weights`, 80 | or invalid input shape. 81 | """ 82 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) 83 | 84 | if not (weights in {'imagenet', None} or os.path.exists(weights)): 85 | raise ValueError('The `weights` argument should be either ' 86 | '`None` (random initialization), `imagenet` ' 87 | '(pre-training on ImageNet), ' 88 | 'or the path to the weights file to be loaded.') 89 | 90 | if weights == 'imagenet' and include_top and classes != 1000: 91 | raise ValueError('If using `weights` as `"imagenet"` with `include_top`' 92 | ' as true, `classes` should be 1000') 93 | # Determine proper input shape 94 | input_shape = _obtain_input_shape(input_shape, 95 | default_size=224, 96 | min_size=32, 97 | data_format=backend.image_data_format(), 98 | require_flatten=include_top, 99 | weights=weights) 100 | 101 | if input_tensor is None: 102 | img_input = layers.Input(shape=input_shape) 103 | else: 104 | if not backend.is_keras_tensor(input_tensor): 105 | img_input = layers.Input(tensor=input_tensor, shape=input_shape) 106 | else: 107 | img_input = input_tensor 108 | # Block 1 109 | x = layers.Conv2D(64, (3, 3), 110 | activation='relu', 111 | padding='same', 112 | name='block1_conv1')(img_input) 113 | x = layers.Conv2D(64, (3, 3), 114 | activation='relu', 115 | padding='same', 116 | name='block1_conv2')(x) 117 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) 118 | 119 | # Block 2 120 | x = layers.Conv2D(128, (3, 3), 121 | activation='relu', 122 | padding='same', 123 | name='block2_conv1')(x) 124 | x = layers.Conv2D(128, (3, 3), 125 | activation='relu', 126 | padding='same', 127 | name='block2_conv2')(x) 128 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) 129 | 130 | # Block 3 131 | x = layers.Conv2D(256, (3, 3), 132 | activation='relu', 133 | padding='same', 134 | name='block3_conv1')(x) 135 | x = layers.Conv2D(256, (3, 3), 136 | activation='relu', 137 | padding='same', 138 | name='block3_conv2')(x) 139 | x = layers.Conv2D(256, (3, 3), 140 | activation='relu', 141 | padding='same', 142 | name='block3_conv3')(x) 143 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) 144 | 145 | # Block 4 146 | x = layers.Conv2D(512, (3, 3), 147 | activation='relu', 148 | padding='same', 149 | name='block4_conv1')(x) 150 | x = layers.Conv2D(512, (3, 3), 151 | activation='relu', 152 | padding='same', 153 | name='block4_conv2')(x) 154 | x = layers.Conv2D(512, (3, 3), 155 | activation='relu', 156 | padding='same', 157 | name='block4_conv3')(x) 158 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) 159 | 160 | # Block 5 161 | x = layers.Conv2D(512, (3, 3), 162 | activation='relu', 163 | padding='same', 164 | name='block5_conv1')(x) 165 | x = layers.Conv2D(512, (3, 3), 166 | activation='relu', 167 | padding='same', 168 | name='block5_conv2')(x) 169 | x = layers.Conv2D(512, (3, 3), 170 | activation='relu', 171 | padding='same', 172 | name='block5_conv3')(x) 173 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) 174 | 175 | if include_top: 176 | # Classification block 177 | x = layers.Flatten(name='flatten')(x) 178 | x = layers.Dense(4096, activation='relu', name='fc1')(x) 179 | x = layers.Dense(4096, activation='relu', name='fc2')(x) 180 | x = layers.Dense(classes, activation='softmax', name='predictions')(x) 181 | else: 182 | if pooling == 'avg': 183 | x = layers.GlobalAveragePooling2D()(x) 184 | elif pooling == 'max': 185 | x = layers.GlobalMaxPooling2D()(x) 186 | 187 | # Ensure that the model takes into account 188 | # any potential predecessors of `input_tensor`. 189 | if input_tensor is not None: 190 | inputs = keras_utils.get_source_inputs(input_tensor) 191 | else: 192 | inputs = img_input 193 | # Create model. 194 | model = models.Model(inputs, x, name='vgg16') 195 | 196 | # Load weights. 197 | if weights == 'imagenet': 198 | if include_top: 199 | weights_path = keras_utils.get_file( 200 | 'vgg16_weights_tf_dim_ordering_tf_kernels.h5', 201 | WEIGHTS_PATH, 202 | cache_subdir='models', 203 | file_hash='64373286793e3c8b2b4e3219cbf3544b') 204 | else: 205 | weights_path = keras_utils.get_file( 206 | 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5', 207 | WEIGHTS_PATH_NO_TOP, 208 | cache_subdir='models', 209 | file_hash='6d6bbae143d832006294945121d1f1fc') 210 | model.load_weights(weights_path) 211 | if backend.backend() == 'theano': 212 | keras_utils.convert_all_kernels_in_model(model) 213 | elif weights is not None: 214 | model.load_weights(weights) 215 | 216 | return model 217 | -------------------------------------------------------------------------------- /keras_applications/vgg19.py: -------------------------------------------------------------------------------- 1 | """VGG19 model for Keras. 2 | 3 | # Reference 4 | 5 | - [Very Deep Convolutional Networks for Large-Scale Image Recognition]( 6 | https://arxiv.org/abs/1409.1556) (ICLR 2015) 7 | 8 | """ 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import os 14 | 15 | from . import get_submodules_from_kwargs 16 | from . import imagenet_utils 17 | from .imagenet_utils import decode_predictions 18 | from .imagenet_utils import _obtain_input_shape 19 | 20 | preprocess_input = imagenet_utils.preprocess_input 21 | 22 | WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/' 23 | 'releases/download/v0.1/' 24 | 'vgg19_weights_tf_dim_ordering_tf_kernels.h5') 25 | WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/' 26 | 'releases/download/v0.1/' 27 | 'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5') 28 | 29 | 30 | def VGG19(include_top=True, 31 | weights='imagenet', 32 | input_tensor=None, 33 | input_shape=None, 34 | pooling=None, 35 | classes=1000, 36 | **kwargs): 37 | """Instantiates the VGG19 architecture. 38 | 39 | Optionally loads weights pre-trained on ImageNet. 40 | Note that the data format convention used by the model is 41 | the one specified in your Keras config at `~/.keras/keras.json`. 42 | 43 | # Arguments 44 | include_top: whether to include the 3 fully-connected 45 | layers at the top of the network. 46 | weights: one of `None` (random initialization), 47 | 'imagenet' (pre-training on ImageNet), 48 | or the path to the weights file to be loaded. 49 | input_tensor: optional Keras tensor 50 | (i.e. output of `layers.Input()`) 51 | to use as image input for the model. 52 | input_shape: optional shape tuple, only to be specified 53 | if `include_top` is False (otherwise the input shape 54 | has to be `(224, 224, 3)` 55 | (with `channels_last` data format) 56 | or `(3, 224, 224)` (with `channels_first` data format). 57 | It should have exactly 3 inputs channels, 58 | and width and height should be no smaller than 32. 59 | E.g. `(200, 200, 3)` would be one valid value. 60 | pooling: Optional pooling mode for feature extraction 61 | when `include_top` is `False`. 62 | - `None` means that the output of the model will be 63 | the 4D tensor output of the 64 | last convolutional block. 65 | - `avg` means that global average pooling 66 | will be applied to the output of the 67 | last convolutional block, and thus 68 | the output of the model will be a 2D tensor. 69 | - `max` means that global max pooling will 70 | be applied. 71 | classes: optional number of classes to classify images 72 | into, only to be specified if `include_top` is True, and 73 | if no `weights` argument is specified. 74 | 75 | # Returns 76 | A Keras model instance. 77 | 78 | # Raises 79 | ValueError: in case of invalid argument for `weights`, 80 | or invalid input shape. 81 | """ 82 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) 83 | 84 | if not (weights in {'imagenet', None} or os.path.exists(weights)): 85 | raise ValueError('The `weights` argument should be either ' 86 | '`None` (random initialization), `imagenet` ' 87 | '(pre-training on ImageNet), ' 88 | 'or the path to the weights file to be loaded.') 89 | 90 | if weights == 'imagenet' and include_top and classes != 1000: 91 | raise ValueError('If using `weights` as `"imagenet"` with `include_top`' 92 | ' as true, `classes` should be 1000') 93 | # Determine proper input shape 94 | input_shape = _obtain_input_shape(input_shape, 95 | default_size=224, 96 | min_size=32, 97 | data_format=backend.image_data_format(), 98 | require_flatten=include_top, 99 | weights=weights) 100 | 101 | if input_tensor is None: 102 | img_input = layers.Input(shape=input_shape) 103 | else: 104 | if not backend.is_keras_tensor(input_tensor): 105 | img_input = layers.Input(tensor=input_tensor, shape=input_shape) 106 | else: 107 | img_input = input_tensor 108 | # Block 1 109 | x = layers.Conv2D(64, (3, 3), 110 | activation='relu', 111 | padding='same', 112 | name='block1_conv1')(img_input) 113 | x = layers.Conv2D(64, (3, 3), 114 | activation='relu', 115 | padding='same', 116 | name='block1_conv2')(x) 117 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) 118 | 119 | # Block 2 120 | x = layers.Conv2D(128, (3, 3), 121 | activation='relu', 122 | padding='same', 123 | name='block2_conv1')(x) 124 | x = layers.Conv2D(128, (3, 3), 125 | activation='relu', 126 | padding='same', 127 | name='block2_conv2')(x) 128 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) 129 | 130 | # Block 3 131 | x = layers.Conv2D(256, (3, 3), 132 | activation='relu', 133 | padding='same', 134 | name='block3_conv1')(x) 135 | x = layers.Conv2D(256, (3, 3), 136 | activation='relu', 137 | padding='same', 138 | name='block3_conv2')(x) 139 | x = layers.Conv2D(256, (3, 3), 140 | activation='relu', 141 | padding='same', 142 | name='block3_conv3')(x) 143 | x = layers.Conv2D(256, (3, 3), 144 | activation='relu', 145 | padding='same', 146 | name='block3_conv4')(x) 147 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) 148 | 149 | # Block 4 150 | x = layers.Conv2D(512, (3, 3), 151 | activation='relu', 152 | padding='same', 153 | name='block4_conv1')(x) 154 | x = layers.Conv2D(512, (3, 3), 155 | activation='relu', 156 | padding='same', 157 | name='block4_conv2')(x) 158 | x = layers.Conv2D(512, (3, 3), 159 | activation='relu', 160 | padding='same', 161 | name='block4_conv3')(x) 162 | x = layers.Conv2D(512, (3, 3), 163 | activation='relu', 164 | padding='same', 165 | name='block4_conv4')(x) 166 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) 167 | 168 | # Block 5 169 | x = layers.Conv2D(512, (3, 3), 170 | activation='relu', 171 | padding='same', 172 | name='block5_conv1')(x) 173 | x = layers.Conv2D(512, (3, 3), 174 | activation='relu', 175 | padding='same', 176 | name='block5_conv2')(x) 177 | x = layers.Conv2D(512, (3, 3), 178 | activation='relu', 179 | padding='same', 180 | name='block5_conv3')(x) 181 | x = layers.Conv2D(512, (3, 3), 182 | activation='relu', 183 | padding='same', 184 | name='block5_conv4')(x) 185 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) 186 | 187 | if include_top: 188 | # Classification block 189 | x = layers.Flatten(name='flatten')(x) 190 | x = layers.Dense(4096, activation='relu', name='fc1')(x) 191 | x = layers.Dense(4096, activation='relu', name='fc2')(x) 192 | x = layers.Dense(classes, activation='softmax', name='predictions')(x) 193 | else: 194 | if pooling == 'avg': 195 | x = layers.GlobalAveragePooling2D()(x) 196 | elif pooling == 'max': 197 | x = layers.GlobalMaxPooling2D()(x) 198 | 199 | # Ensure that the model takes into account 200 | # any potential predecessors of `input_tensor`. 201 | if input_tensor is not None: 202 | inputs = keras_utils.get_source_inputs(input_tensor) 203 | else: 204 | inputs = img_input 205 | # Create model. 206 | model = models.Model(inputs, x, name='vgg19') 207 | 208 | # Load weights. 209 | if weights == 'imagenet': 210 | if include_top: 211 | weights_path = keras_utils.get_file( 212 | 'vgg19_weights_tf_dim_ordering_tf_kernels.h5', 213 | WEIGHTS_PATH, 214 | cache_subdir='models', 215 | file_hash='cbe5617147190e668d6c5d5026f83318') 216 | else: 217 | weights_path = keras_utils.get_file( 218 | 'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', 219 | WEIGHTS_PATH_NO_TOP, 220 | cache_subdir='models', 221 | file_hash='253f8cb515780f3b799900260a226db6') 222 | model.load_weights(weights_path) 223 | if backend.backend() == 'theano': 224 | keras_utils.convert_all_kernels_in_model(model) 225 | elif weights is not None: 226 | model.load_weights(weights) 227 | 228 | return model 229 | -------------------------------------------------------------------------------- /keras_applications/xception.py: -------------------------------------------------------------------------------- 1 | """Xception V1 model for Keras. 2 | 3 | On ImageNet, this model gets to a top-1 validation accuracy of 0.790 4 | and a top-5 validation accuracy of 0.945. 5 | 6 | Do note that the input image format for this model is different than for 7 | the VGG16 and ResNet models (299x299 instead of 224x224), 8 | and that the input preprocessing function 9 | is also different (same as Inception V3). 10 | 11 | # Reference 12 | 13 | - [Xception: Deep Learning with Depthwise Separable Convolutions]( 14 | https://arxiv.org/abs/1610.02357) (CVPR 2017) 15 | 16 | """ 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import os 22 | import warnings 23 | 24 | from . import get_submodules_from_kwargs 25 | from . import imagenet_utils 26 | from .imagenet_utils import decode_predictions 27 | from .imagenet_utils import _obtain_input_shape 28 | 29 | 30 | TF_WEIGHTS_PATH = ( 31 | 'https://github.com/fchollet/deep-learning-models/' 32 | 'releases/download/v0.4/' 33 | 'xception_weights_tf_dim_ordering_tf_kernels.h5') 34 | TF_WEIGHTS_PATH_NO_TOP = ( 35 | 'https://github.com/fchollet/deep-learning-models/' 36 | 'releases/download/v0.4/' 37 | 'xception_weights_tf_dim_ordering_tf_kernels_notop.h5') 38 | 39 | 40 | def Xception(include_top=True, 41 | weights='imagenet', 42 | input_tensor=None, 43 | input_shape=None, 44 | pooling=None, 45 | classes=1000, 46 | **kwargs): 47 | """Instantiates the Xception architecture. 48 | 49 | Optionally loads weights pre-trained on ImageNet. 50 | Note that the data format convention used by the model is 51 | the one specified in your Keras config at `~/.keras/keras.json`. 52 | 53 | Note that the default input image size for this model is 299x299. 54 | 55 | # Arguments 56 | include_top: whether to include the fully-connected 57 | layer at the top of the network. 58 | weights: one of `None` (random initialization), 59 | 'imagenet' (pre-training on ImageNet), 60 | or the path to the weights file to be loaded. 61 | input_tensor: optional Keras tensor 62 | (i.e. output of `layers.Input()`) 63 | to use as image input for the model. 64 | input_shape: optional shape tuple, only to be specified 65 | if `include_top` is False (otherwise the input shape 66 | has to be `(299, 299, 3)`. 67 | It should have exactly 3 inputs channels, 68 | and width and height should be no smaller than 71. 69 | E.g. `(150, 150, 3)` would be one valid value. 70 | pooling: Optional pooling mode for feature extraction 71 | when `include_top` is `False`. 72 | - `None` means that the output of the model will be 73 | the 4D tensor output of the 74 | last convolutional block. 75 | - `avg` means that global average pooling 76 | will be applied to the output of the 77 | last convolutional block, and thus 78 | the output of the model will be a 2D tensor. 79 | - `max` means that global max pooling will 80 | be applied. 81 | classes: optional number of classes to classify images 82 | into, only to be specified if `include_top` is True, 83 | and if no `weights` argument is specified. 84 | 85 | # Returns 86 | A Keras model instance. 87 | 88 | # Raises 89 | ValueError: in case of invalid argument for `weights`, 90 | or invalid input shape. 91 | RuntimeError: If attempting to run this model with a 92 | backend that does not support separable convolutions. 93 | """ 94 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) 95 | 96 | if not (weights in {'imagenet', None} or os.path.exists(weights)): 97 | raise ValueError('The `weights` argument should be either ' 98 | '`None` (random initialization), `imagenet` ' 99 | '(pre-training on ImageNet), ' 100 | 'or the path to the weights file to be loaded.') 101 | 102 | if weights == 'imagenet' and include_top and classes != 1000: 103 | raise ValueError('If using `weights` as `"imagenet"` with `include_top`' 104 | ' as true, `classes` should be 1000') 105 | 106 | # Determine proper input shape 107 | input_shape = _obtain_input_shape(input_shape, 108 | default_size=299, 109 | min_size=71, 110 | data_format=backend.image_data_format(), 111 | require_flatten=include_top, 112 | weights=weights) 113 | 114 | if input_tensor is None: 115 | img_input = layers.Input(shape=input_shape) 116 | else: 117 | if not backend.is_keras_tensor(input_tensor): 118 | img_input = layers.Input(tensor=input_tensor, shape=input_shape) 119 | else: 120 | img_input = input_tensor 121 | 122 | channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 123 | 124 | x = layers.Conv2D(32, (3, 3), 125 | strides=(2, 2), 126 | use_bias=False, 127 | name='block1_conv1')(img_input) 128 | x = layers.BatchNormalization(axis=channel_axis, name='block1_conv1_bn')(x) 129 | x = layers.Activation('relu', name='block1_conv1_act')(x) 130 | x = layers.Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x) 131 | x = layers.BatchNormalization(axis=channel_axis, name='block1_conv2_bn')(x) 132 | x = layers.Activation('relu', name='block1_conv2_act')(x) 133 | 134 | residual = layers.Conv2D(128, (1, 1), 135 | strides=(2, 2), 136 | padding='same', 137 | use_bias=False)(x) 138 | residual = layers.BatchNormalization(axis=channel_axis)(residual) 139 | 140 | x = layers.SeparableConv2D(128, (3, 3), 141 | padding='same', 142 | use_bias=False, 143 | name='block2_sepconv1')(x) 144 | x = layers.BatchNormalization(axis=channel_axis, name='block2_sepconv1_bn')(x) 145 | x = layers.Activation('relu', name='block2_sepconv2_act')(x) 146 | x = layers.SeparableConv2D(128, (3, 3), 147 | padding='same', 148 | use_bias=False, 149 | name='block2_sepconv2')(x) 150 | x = layers.BatchNormalization(axis=channel_axis, name='block2_sepconv2_bn')(x) 151 | 152 | x = layers.MaxPooling2D((3, 3), 153 | strides=(2, 2), 154 | padding='same', 155 | name='block2_pool')(x) 156 | x = layers.add([x, residual]) 157 | 158 | residual = layers.Conv2D(256, (1, 1), strides=(2, 2), 159 | padding='same', use_bias=False)(x) 160 | residual = layers.BatchNormalization(axis=channel_axis)(residual) 161 | 162 | x = layers.Activation('relu', name='block3_sepconv1_act')(x) 163 | x = layers.SeparableConv2D(256, (3, 3), 164 | padding='same', 165 | use_bias=False, 166 | name='block3_sepconv1')(x) 167 | x = layers.BatchNormalization(axis=channel_axis, name='block3_sepconv1_bn')(x) 168 | x = layers.Activation('relu', name='block3_sepconv2_act')(x) 169 | x = layers.SeparableConv2D(256, (3, 3), 170 | padding='same', 171 | use_bias=False, 172 | name='block3_sepconv2')(x) 173 | x = layers.BatchNormalization(axis=channel_axis, name='block3_sepconv2_bn')(x) 174 | 175 | x = layers.MaxPooling2D((3, 3), strides=(2, 2), 176 | padding='same', 177 | name='block3_pool')(x) 178 | x = layers.add([x, residual]) 179 | 180 | residual = layers.Conv2D(728, (1, 1), 181 | strides=(2, 2), 182 | padding='same', 183 | use_bias=False)(x) 184 | residual = layers.BatchNormalization(axis=channel_axis)(residual) 185 | 186 | x = layers.Activation('relu', name='block4_sepconv1_act')(x) 187 | x = layers.SeparableConv2D(728, (3, 3), 188 | padding='same', 189 | use_bias=False, 190 | name='block4_sepconv1')(x) 191 | x = layers.BatchNormalization(axis=channel_axis, name='block4_sepconv1_bn')(x) 192 | x = layers.Activation('relu', name='block4_sepconv2_act')(x) 193 | x = layers.SeparableConv2D(728, (3, 3), 194 | padding='same', 195 | use_bias=False, 196 | name='block4_sepconv2')(x) 197 | x = layers.BatchNormalization(axis=channel_axis, name='block4_sepconv2_bn')(x) 198 | 199 | x = layers.MaxPooling2D((3, 3), strides=(2, 2), 200 | padding='same', 201 | name='block4_pool')(x) 202 | x = layers.add([x, residual]) 203 | 204 | for i in range(8): 205 | residual = x 206 | prefix = 'block' + str(i + 5) 207 | 208 | x = layers.Activation('relu', name=prefix + '_sepconv1_act')(x) 209 | x = layers.SeparableConv2D(728, (3, 3), 210 | padding='same', 211 | use_bias=False, 212 | name=prefix + '_sepconv1')(x) 213 | x = layers.BatchNormalization(axis=channel_axis, 214 | name=prefix + '_sepconv1_bn')(x) 215 | x = layers.Activation('relu', name=prefix + '_sepconv2_act')(x) 216 | x = layers.SeparableConv2D(728, (3, 3), 217 | padding='same', 218 | use_bias=False, 219 | name=prefix + '_sepconv2')(x) 220 | x = layers.BatchNormalization(axis=channel_axis, 221 | name=prefix + '_sepconv2_bn')(x) 222 | x = layers.Activation('relu', name=prefix + '_sepconv3_act')(x) 223 | x = layers.SeparableConv2D(728, (3, 3), 224 | padding='same', 225 | use_bias=False, 226 | name=prefix + '_sepconv3')(x) 227 | x = layers.BatchNormalization(axis=channel_axis, 228 | name=prefix + '_sepconv3_bn')(x) 229 | 230 | x = layers.add([x, residual]) 231 | 232 | residual = layers.Conv2D(1024, (1, 1), strides=(2, 2), 233 | padding='same', use_bias=False)(x) 234 | residual = layers.BatchNormalization(axis=channel_axis)(residual) 235 | 236 | x = layers.Activation('relu', name='block13_sepconv1_act')(x) 237 | x = layers.SeparableConv2D(728, (3, 3), 238 | padding='same', 239 | use_bias=False, 240 | name='block13_sepconv1')(x) 241 | x = layers.BatchNormalization(axis=channel_axis, name='block13_sepconv1_bn')(x) 242 | x = layers.Activation('relu', name='block13_sepconv2_act')(x) 243 | x = layers.SeparableConv2D(1024, (3, 3), 244 | padding='same', 245 | use_bias=False, 246 | name='block13_sepconv2')(x) 247 | x = layers.BatchNormalization(axis=channel_axis, name='block13_sepconv2_bn')(x) 248 | 249 | x = layers.MaxPooling2D((3, 3), 250 | strides=(2, 2), 251 | padding='same', 252 | name='block13_pool')(x) 253 | x = layers.add([x, residual]) 254 | 255 | x = layers.SeparableConv2D(1536, (3, 3), 256 | padding='same', 257 | use_bias=False, 258 | name='block14_sepconv1')(x) 259 | x = layers.BatchNormalization(axis=channel_axis, name='block14_sepconv1_bn')(x) 260 | x = layers.Activation('relu', name='block14_sepconv1_act')(x) 261 | 262 | x = layers.SeparableConv2D(2048, (3, 3), 263 | padding='same', 264 | use_bias=False, 265 | name='block14_sepconv2')(x) 266 | x = layers.BatchNormalization(axis=channel_axis, name='block14_sepconv2_bn')(x) 267 | x = layers.Activation('relu', name='block14_sepconv2_act')(x) 268 | 269 | if include_top: 270 | x = layers.GlobalAveragePooling2D(name='avg_pool')(x) 271 | x = layers.Dense(classes, activation='softmax', name='predictions')(x) 272 | else: 273 | if pooling == 'avg': 274 | x = layers.GlobalAveragePooling2D()(x) 275 | elif pooling == 'max': 276 | x = layers.GlobalMaxPooling2D()(x) 277 | 278 | # Ensure that the model takes into account 279 | # any potential predecessors of `input_tensor`. 280 | if input_tensor is not None: 281 | inputs = keras_utils.get_source_inputs(input_tensor) 282 | else: 283 | inputs = img_input 284 | # Create model. 285 | model = models.Model(inputs, x, name='xception') 286 | 287 | # Load weights. 288 | if weights == 'imagenet': 289 | if include_top: 290 | weights_path = keras_utils.get_file( 291 | 'xception_weights_tf_dim_ordering_tf_kernels.h5', 292 | TF_WEIGHTS_PATH, 293 | cache_subdir='models', 294 | file_hash='0a58e3b7378bc2990ea3b43d5981f1f6') 295 | else: 296 | weights_path = keras_utils.get_file( 297 | 'xception_weights_tf_dim_ordering_tf_kernels_notop.h5', 298 | TF_WEIGHTS_PATH_NO_TOP, 299 | cache_subdir='models', 300 | file_hash='b0042744bf5b25fce3cb969f33bebb97') 301 | model.load_weights(weights_path) 302 | if backend.backend() == 'theano': 303 | keras_utils.convert_all_kernels_in_model(model) 304 | elif weights is not None: 305 | model.load_weights(weights) 306 | 307 | return model 308 | 309 | 310 | def preprocess_input(x, **kwargs): 311 | """Preprocesses a numpy array encoding a batch of images. 312 | 313 | # Arguments 314 | x: a 4D numpy array consists of RGB values within [0, 255]. 315 | 316 | # Returns 317 | Preprocessed array. 318 | """ 319 | return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) 320 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | # Configuration of py.test 2 | [pytest] 3 | addopts=-v 4 | -n 2 5 | --durations=20 6 | 7 | # Do not run tests in the build folder 8 | norecursedirs= build 9 | 10 | # Use 85 as max line length in PEP8 test. 11 | pep8maxlinelength=85 12 | 13 | # PEP-8 The following are ignored: 14 | # E731 do not assign a lambda expression, use a def 15 | # E402 module level import not at top of file 16 | 17 | pep8ignore=* E731 \ 18 | * E402 \ -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | long_description = ''' 5 | Keras Applications is the `applications` module of 6 | the Keras deep learning library. 7 | It provides model definitions and pre-trained weights for a number 8 | of popular archictures, such as VGG16, ResNet50, Xception, MobileNet, and more. 9 | 10 | Read the documentation at: https://keras.io/applications/ 11 | 12 | Keras Applications may be imported directly 13 | from an up-to-date installation of Keras: 14 | 15 | ``` 16 | from keras import applications 17 | ``` 18 | 19 | Keras Applications is compatible with Python 2.7-3.6 20 | and is distributed under the MIT license. 21 | ''' 22 | 23 | setup(name='Keras_Applications', 24 | version='1.0.8', 25 | description='Reference implementations of popular deep learning models', 26 | long_description=long_description, 27 | author='Keras Team', 28 | url='https://github.com/keras-team/keras-applications', 29 | download_url='https://github.com/keras-team/' 30 | 'keras-applications/tarball/1.0.8', 31 | license='MIT', 32 | install_requires=['numpy>=1.9.1', 33 | 'h5py'], 34 | extras_require={ 35 | 'tests': ['pytest', 36 | 'pytest-pep8', 37 | 'pytest-xdist', 38 | 'pytest-cov'], 39 | }, 40 | classifiers=[ 41 | 'Development Status :: 5 - Production/Stable', 42 | 'Intended Audience :: Developers', 43 | 'Intended Audience :: Education', 44 | 'Intended Audience :: Science/Research', 45 | 'License :: OSI Approved :: MIT License', 46 | 'Programming Language :: Python :: 2', 47 | 'Programming Language :: Python :: 2.7', 48 | 'Programming Language :: Python :: 3', 49 | 'Programming Language :: Python :: 3.6', 50 | 'Topic :: Software Development :: Libraries', 51 | 'Topic :: Software Development :: Libraries :: Python Modules' 52 | ], 53 | packages=find_packages()) 54 | -------------------------------------------------------------------------------- /tests/applications_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import random 3 | import six 4 | import numpy as np 5 | 6 | import keras_applications 7 | from keras.applications import densenet 8 | from keras.applications import inception_resnet_v2 9 | from keras.applications import inception_v3 10 | from keras.applications import mobilenet 11 | try: 12 | from keras.applications import mobilenet_v2 13 | except ImportError: 14 | from keras.applications import mobilenetv2 as mobilenet_v2 15 | from keras.applications import nasnet 16 | from keras.applications import resnet50 17 | from keras.applications import vgg16 18 | from keras.applications import vgg19 19 | from keras.applications import xception 20 | from keras.preprocessing import image 21 | from keras import backend 22 | from keras import layers 23 | from keras import models 24 | from keras import utils 25 | 26 | from multiprocessing import Process, Queue 27 | 28 | 29 | def keras_modules_injection(base_fun): 30 | 31 | def wrapper(*args, **kwargs): 32 | kwargs['backend'] = backend 33 | kwargs['layers'] = layers 34 | kwargs['models'] = models 35 | kwargs['utils'] = utils 36 | return base_fun(*args, **kwargs) 37 | return wrapper 38 | 39 | 40 | for (name, module) in [('resnet', keras_applications.resnet), 41 | ('resnet_v2', keras_applications.resnet_v2), 42 | ('resnext', keras_applications.resnext), 43 | ('efficientnet', keras_applications.efficientnet), 44 | ('mobilenet_v3', keras_applications.mobilenet_v3)]: 45 | module.decode_predictions = keras_modules_injection(module.decode_predictions) 46 | module.preprocess_input = keras_modules_injection(module.preprocess_input) 47 | for app in dir(module): 48 | if app[0].isupper() and callable(getattr(module, app)): 49 | setattr(module, app, keras_modules_injection(getattr(module, app))) 50 | setattr(keras_applications, name, module) 51 | 52 | 53 | RESNET_LIST = [keras_applications.resnet.ResNet50, 54 | keras_applications.resnet.ResNet101, 55 | keras_applications.resnet.ResNet152] 56 | RESNETV2_LIST = [keras_applications.resnet_v2.ResNet50V2, 57 | keras_applications.resnet_v2.ResNet101V2, 58 | keras_applications.resnet_v2.ResNet152V2] 59 | RESNEXT_LIST = [keras_applications.resnext.ResNeXt50, 60 | keras_applications.resnext.ResNeXt101] 61 | MOBILENET_LIST = [(mobilenet.MobileNet, mobilenet, 1024), 62 | (mobilenet_v2.MobileNetV2, mobilenet_v2, 1280), 63 | (keras_applications.mobilenet_v3.MobileNetV3Small, 64 | keras_applications.mobilenet_v3, 576), 65 | (keras_applications.mobilenet_v3.MobileNetV3Large, 66 | keras_applications.mobilenet_v3, 960)] 67 | DENSENET_LIST = [(densenet.DenseNet121, 1024), 68 | (densenet.DenseNet169, 1664), 69 | (densenet.DenseNet201, 1920)] 70 | NASNET_LIST = [(nasnet.NASNetMobile, 1056), 71 | (nasnet.NASNetLarge, 4032)] 72 | EFFICIENTNET_LIST = [(keras_applications.efficientnet.EfficientNetB0, 1280), 73 | (keras_applications.efficientnet.EfficientNetB1, 1280), 74 | (keras_applications.efficientnet.EfficientNetB2, 1408), 75 | (keras_applications.efficientnet.EfficientNetB3, 1536), 76 | (keras_applications.efficientnet.EfficientNetB4, 1792), 77 | (keras_applications.efficientnet.EfficientNetB5, 2048)] 78 | 79 | 80 | def keras_test(func): 81 | """Function wrapper to clean up after TensorFlow tests. 82 | # Arguments 83 | func: test function to clean up after. 84 | # Returns 85 | A function wrapping the input function. 86 | """ 87 | @six.wraps(func) 88 | def wrapper(*args, **kwargs): 89 | output = func(*args, **kwargs) 90 | if backend.backend() == 'tensorflow' or backend.backend() == 'cntk': 91 | backend.clear_session() 92 | return output 93 | return wrapper 94 | 95 | 96 | def _get_elephant(target_size): 97 | # For models that don't include a Flatten step, 98 | # the default is to accept variable-size inputs 99 | # even when loading ImageNet weights (since it is possible). 100 | # In this case, default to 299x299. 101 | if target_size[0] is None: 102 | target_size = (299, 299) 103 | img = image.load_img('tests/data/elephant.jpg', 104 | target_size=tuple(target_size)) 105 | x = image.img_to_array(img) 106 | return np.expand_dims(x, axis=0) 107 | 108 | 109 | def _get_output_shape(model_fn, preprocess_input=None): 110 | if backend.backend() == 'cntk': 111 | # Create model in a subprocess so that 112 | # the memory consumed by InceptionResNetV2 will be 113 | # released back to the system after this test 114 | # (to deal with OOM error on CNTK backend). 115 | # TODO: remove the use of multiprocessing from these tests 116 | # once a memory clearing mechanism 117 | # is implemented in the CNTK backend. 118 | def target(queue): 119 | model = model_fn() 120 | if preprocess_input is None: 121 | queue.put(model.output_shape) 122 | else: 123 | x = _get_elephant(model.input_shape[1:3]) 124 | x = preprocess_input(x) 125 | queue.put((model.output_shape, model.predict(x))) 126 | queue = Queue() 127 | p = Process(target=target, args=(queue,)) 128 | p.start() 129 | p.join() 130 | # The error in a subprocess won't propagate 131 | # to the main process, so we check if the model 132 | # is successfully created by checking if the output shape 133 | # has been put into the queue 134 | assert not queue.empty(), 'Model creation failed.' 135 | return queue.get_nowait() 136 | else: 137 | model = model_fn() 138 | if preprocess_input is None: 139 | return model.output_shape 140 | else: 141 | x = _get_elephant(model.input_shape[1:3]) 142 | x = preprocess_input(x) 143 | return (model.output_shape, model.predict(x)) 144 | 145 | 146 | @keras_test 147 | def _test_application_basic(app, last_dim=1000, module=None): 148 | if module is None: 149 | output_shape = _get_output_shape(lambda: app(weights=None)) 150 | assert output_shape == (None, None, None, last_dim) 151 | else: 152 | output_shape, preds = _get_output_shape( 153 | lambda: app(weights='imagenet'), module.preprocess_input) 154 | assert output_shape == (None, last_dim) 155 | 156 | names = [p[1] for p in module.decode_predictions(preds)[0]] 157 | # Test correct label is in top 3 (weak correctness test). 158 | assert 'African_elephant' in names[:3] 159 | 160 | 161 | @keras_test 162 | def _test_application_notop(app, last_dim): 163 | output_shape = _get_output_shape( 164 | lambda: app(weights=None, include_top=False)) 165 | assert output_shape == (None, None, None, last_dim) 166 | 167 | 168 | @keras_test 169 | def _test_application_variable_input_channels(app, last_dim): 170 | if backend.image_data_format() == 'channels_first': 171 | input_shape = (1, None, None) 172 | else: 173 | input_shape = (None, None, 1) 174 | output_shape = _get_output_shape( 175 | lambda: app(weights=None, include_top=False, input_shape=input_shape)) 176 | assert output_shape == (None, None, None, last_dim) 177 | 178 | if backend.image_data_format() == 'channels_first': 179 | input_shape = (4, None, None) 180 | else: 181 | input_shape = (None, None, 4) 182 | output_shape = _get_output_shape( 183 | lambda: app(weights=None, include_top=False, input_shape=input_shape)) 184 | assert output_shape == (None, None, None, last_dim) 185 | 186 | 187 | @keras_test 188 | def _test_app_pooling(app, last_dim): 189 | output_shape = _get_output_shape( 190 | lambda: app(weights=None, 191 | include_top=False, 192 | pooling=random.choice(['avg', 'max']))) 193 | assert output_shape == (None, last_dim) 194 | 195 | 196 | def test_resnet(): 197 | app = random.choice(RESNET_LIST) 198 | module = keras_applications.resnet 199 | last_dim = 2048 200 | _test_application_basic(app, module=module) 201 | _test_application_notop(app, last_dim) 202 | _test_application_variable_input_channels(app, last_dim) 203 | _test_app_pooling(app, last_dim) 204 | 205 | 206 | def test_resnetv2(): 207 | app = random.choice(RESNETV2_LIST) 208 | module = keras_applications.resnet_v2 209 | last_dim = 2048 210 | _test_application_basic(app, module=module) 211 | _test_application_notop(app, last_dim) 212 | _test_application_variable_input_channels(app, last_dim) 213 | _test_app_pooling(app, last_dim) 214 | 215 | 216 | def test_resnext(): 217 | app = random.choice(RESNEXT_LIST) 218 | module = keras_applications.resnext 219 | last_dim = 2048 220 | _test_application_basic(app, module=module) 221 | _test_application_notop(app, last_dim) 222 | _test_application_variable_input_channels(app, last_dim) 223 | _test_app_pooling(app, last_dim) 224 | 225 | 226 | def test_vgg(): 227 | app = random.choice([vgg16.VGG16, vgg19.VGG19]) 228 | module = vgg16 229 | last_dim = 512 230 | _test_application_basic(app, module=module) 231 | _test_application_notop(app, last_dim) 232 | _test_application_variable_input_channels(app, last_dim) 233 | _test_app_pooling(app, last_dim) 234 | 235 | 236 | def test_xception(): 237 | app = xception.Xception 238 | module = xception 239 | last_dim = 2048 240 | _test_application_basic(app, module=module) 241 | _test_application_notop(app, last_dim) 242 | _test_application_variable_input_channels(app, last_dim) 243 | _test_app_pooling(app, last_dim) 244 | 245 | 246 | def test_inceptionv3(): 247 | app = inception_v3.InceptionV3 248 | module = inception_v3 249 | last_dim = 2048 250 | _test_application_basic(app, module=module) 251 | _test_application_notop(app, last_dim) 252 | _test_application_variable_input_channels(app, last_dim) 253 | _test_app_pooling(app, last_dim) 254 | 255 | 256 | def test_inceptionresnetv2(): 257 | app = inception_resnet_v2.InceptionResNetV2 258 | module = inception_resnet_v2 259 | last_dim = 1536 260 | _test_application_basic(app, module=module) 261 | _test_application_notop(app, last_dim) 262 | _test_application_variable_input_channels(app, last_dim) 263 | _test_app_pooling(app, last_dim) 264 | 265 | 266 | def test_mobilenet(): 267 | app, module, last_dim = random.choice(MOBILENET_LIST) 268 | _test_application_basic(app, module=module) 269 | _test_application_notop(app, last_dim) 270 | _test_application_variable_input_channels(app, last_dim) 271 | _test_app_pooling(app, last_dim) 272 | 273 | 274 | def test_densenet(): 275 | app, last_dim = random.choice(DENSENET_LIST) 276 | module = densenet 277 | _test_application_basic(app, module=module) 278 | _test_application_notop(app, last_dim) 279 | _test_application_variable_input_channels(app, last_dim) 280 | _test_app_pooling(app, last_dim) 281 | 282 | 283 | def test_nasnet(): 284 | app, last_dim = NASNET_LIST[0] # NASNetLarge is too heavy to test on Travis 285 | module = nasnet 286 | _test_application_basic(app, module=module) 287 | # _test_application_notop(app, last_dim) 288 | # _test_application_variable_input_channels(app, last_dim) 289 | _test_app_pooling(app, last_dim) 290 | 291 | 292 | def test_efficientnet(): 293 | app, last_dim = random.choice(EFFICIENTNET_LIST) 294 | module = keras_applications.efficientnet 295 | _test_application_basic(app, module=module) 296 | _test_application_notop(app, last_dim) 297 | _test_application_variable_input_channels(app, last_dim) 298 | _test_app_pooling(app, last_dim) 299 | 300 | 301 | if __name__ == '__main__': 302 | pytest.main([__file__]) 303 | -------------------------------------------------------------------------------- /tests/data/elephant.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/keras-team/keras-applications/06fbeb0f16e1304f239b2296578d1c50b15a983a/tests/data/elephant.jpg -------------------------------------------------------------------------------- /tests/imagenet_utils_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.testing import assert_allclose 4 | 5 | # We don't use keras.applications.imagenet_utils here 6 | # because we also test _obtain_input_shape which is not exposed. 7 | from keras_applications import imagenet_utils as utils 8 | from keras import backend 9 | from keras import models 10 | from keras import layers 11 | from keras import utils as keras_utils 12 | 13 | 14 | def decode_predictions(*args, **kwargs): 15 | kwargs['backend'] = backend 16 | kwargs['utils'] = keras_utils 17 | return utils.decode_predictions(*args, **kwargs) 18 | 19 | 20 | def preprocess_input(*args, **kwargs): 21 | kwargs['backend'] = backend 22 | return utils.preprocess_input(*args, **kwargs) 23 | 24 | 25 | def test_preprocess_input(): 26 | # Test image batch with float and int image input 27 | x = np.random.uniform(0, 255, (2, 10, 10, 3)) 28 | xint = x.astype('int32') 29 | assert preprocess_input(x).shape == x.shape 30 | assert preprocess_input(xint).shape == xint.shape 31 | 32 | out1 = preprocess_input(x, 'channels_last') 33 | out1int = preprocess_input(xint, 'channels_last') 34 | out2 = preprocess_input(np.transpose(x, (0, 3, 1, 2)), 'channels_first') 35 | out2int = preprocess_input(np.transpose(xint, (0, 3, 1, 2)), 'channels_first') 36 | assert_allclose(out1, out2.transpose(0, 2, 3, 1)) 37 | assert_allclose(out1int, out2int.transpose(0, 2, 3, 1)) 38 | 39 | # Test single image 40 | x = np.random.uniform(0, 255, (10, 10, 3)) 41 | xint = x.astype('int32') 42 | assert preprocess_input(x).shape == x.shape 43 | assert preprocess_input(xint).shape == xint.shape 44 | 45 | out1 = preprocess_input(x, 'channels_last') 46 | out1int = preprocess_input(xint, 'channels_last') 47 | out2 = preprocess_input(np.transpose(x, (2, 0, 1)), 'channels_first') 48 | out2int = preprocess_input(np.transpose(xint, (2, 0, 1)), 'channels_first') 49 | assert_allclose(out1, out2.transpose(1, 2, 0)) 50 | assert_allclose(out1int, out2int.transpose(1, 2, 0)) 51 | 52 | # Test that writing over the input data works predictably 53 | for mode in ['torch', 'tf']: 54 | x = np.random.uniform(0, 255, (2, 10, 10, 3)) 55 | xint = x.astype('int') 56 | x2 = preprocess_input(x, mode=mode) 57 | xint2 = preprocess_input(xint) 58 | assert_allclose(x, x2) 59 | assert xint.astype('float').max() != xint2.max() 60 | # Caffe mode works differently from the others 61 | x = np.random.uniform(0, 255, (2, 10, 10, 3)) 62 | xint = x.astype('int') 63 | x2 = preprocess_input(x, data_format='channels_last', mode='caffe') 64 | xint2 = preprocess_input(xint) 65 | assert_allclose(x, x2[..., ::-1]) 66 | assert xint.astype('float').max() != xint2.max() 67 | 68 | 69 | def test_preprocess_input_symbolic(): 70 | # Test image batch 71 | x = np.random.uniform(0, 255, (2, 10, 10, 3)) 72 | inputs = layers.Input(shape=x.shape[1:]) 73 | outputs = layers.Lambda(preprocess_input, output_shape=x.shape[1:])(inputs) 74 | model = models.Model(inputs, outputs) 75 | assert model.predict(x).shape == x.shape 76 | 77 | outputs1 = layers.Lambda( 78 | lambda x: preprocess_input(x, 'channels_last'), 79 | output_shape=x.shape[1:])(inputs) 80 | model1 = models.Model(inputs, outputs1) 81 | out1 = model1.predict(x) 82 | x2 = np.transpose(x, (0, 3, 1, 2)) 83 | inputs2 = layers.Input(shape=x2.shape[1:]) 84 | outputs2 = layers.Lambda( 85 | lambda x: preprocess_input(x, 'channels_first'), 86 | output_shape=x2.shape[1:])(inputs2) 87 | model2 = models.Model(inputs2, outputs2) 88 | out2 = model2.predict(x2) 89 | assert_allclose(out1, out2.transpose(0, 2, 3, 1)) 90 | 91 | # Test single image 92 | x = np.random.uniform(0, 255, (10, 10, 3)) 93 | inputs = layers.Input(shape=x.shape) 94 | outputs = layers.Lambda(preprocess_input, output_shape=x.shape)(inputs) 95 | model = models.Model(inputs, outputs) 96 | assert model.predict(x[np.newaxis])[0].shape == x.shape 97 | 98 | outputs1 = layers.Lambda( 99 | lambda x: preprocess_input(x, 'channels_last'), 100 | output_shape=x.shape)(inputs) 101 | model1 = models.Model(inputs, outputs1) 102 | out1 = model1.predict(x[np.newaxis])[0] 103 | x2 = np.transpose(x, (2, 0, 1)) 104 | inputs2 = layers.Input(shape=x2.shape) 105 | outputs2 = layers.Lambda( 106 | lambda x: preprocess_input(x, 'channels_first'), 107 | output_shape=x2.shape)(inputs2) 108 | model2 = models.Model(inputs2, outputs2) 109 | out2 = model2.predict(x2[np.newaxis])[0] 110 | assert_allclose(out1, out2.transpose(1, 2, 0)) 111 | 112 | 113 | def test_decode_predictions(): 114 | x = np.zeros((2, 1000)) 115 | x[0, 372] = 1.0 116 | x[1, 549] = 1.0 117 | outs = decode_predictions(x, top=1) 118 | scores = [out[0][2] for out in outs] 119 | assert scores[0] == scores[1] 120 | 121 | # the numbers of columns and ImageNet classes are not identical. 122 | with pytest.raises(ValueError): 123 | decode_predictions(np.ones((2, 100))) 124 | 125 | 126 | def test_obtain_input_shape(): 127 | # input_shape and default_size are not identical. 128 | with pytest.raises(ValueError): 129 | utils._obtain_input_shape( 130 | input_shape=(224, 224, 3), 131 | default_size=299, 132 | min_size=139, 133 | data_format='channels_last', 134 | require_flatten=True, 135 | weights='imagenet') 136 | 137 | # Test invalid use cases 138 | for data_format in ['channels_last', 'channels_first']: 139 | # test warning 140 | shape = (139, 139) 141 | if data_format == 'channels_last': 142 | input_shape = shape + (99,) 143 | else: 144 | input_shape = (99,) + shape 145 | with pytest.warns(UserWarning): 146 | utils._obtain_input_shape( 147 | input_shape=input_shape, 148 | default_size=None, 149 | min_size=139, 150 | data_format=data_format, 151 | require_flatten=False, 152 | weights='fake_weights') 153 | 154 | # input_shape is smaller than min_size. 155 | shape = (100, 100) 156 | if data_format == 'channels_last': 157 | input_shape = shape + (3,) 158 | else: 159 | input_shape = (3,) + shape 160 | with pytest.raises(ValueError): 161 | utils._obtain_input_shape( 162 | input_shape=input_shape, 163 | default_size=None, 164 | min_size=139, 165 | data_format=data_format, 166 | require_flatten=False) 167 | 168 | # shape is 1D. 169 | shape = (100,) 170 | if data_format == 'channels_last': 171 | input_shape = shape + (3,) 172 | else: 173 | input_shape = (3,) + shape 174 | with pytest.raises(ValueError): 175 | utils._obtain_input_shape( 176 | input_shape=input_shape, 177 | default_size=None, 178 | min_size=139, 179 | data_format=data_format, 180 | require_flatten=False) 181 | 182 | # the number of channels is 5 not 3. 183 | shape = (100, 100) 184 | if data_format == 'channels_last': 185 | input_shape = shape + (5,) 186 | else: 187 | input_shape = (5,) + shape 188 | with pytest.raises(ValueError): 189 | utils._obtain_input_shape( 190 | input_shape=input_shape, 191 | default_size=None, 192 | min_size=139, 193 | data_format=data_format, 194 | require_flatten=False) 195 | 196 | # require_flatten=True with dynamic input shape. 197 | with pytest.raises(ValueError): 198 | utils._obtain_input_shape( 199 | input_shape=None, 200 | default_size=None, 201 | min_size=139, 202 | data_format='channels_first', 203 | require_flatten=True) 204 | 205 | # test include top 206 | assert utils._obtain_input_shape( 207 | input_shape=(3, 200, 200), 208 | default_size=None, 209 | min_size=139, 210 | data_format='channels_first', 211 | require_flatten=True) == (3, 200, 200) 212 | 213 | assert utils._obtain_input_shape( 214 | input_shape=None, 215 | default_size=None, 216 | min_size=139, 217 | data_format='channels_last', 218 | require_flatten=False) == (None, None, 3) 219 | 220 | assert utils._obtain_input_shape( 221 | input_shape=None, 222 | default_size=None, 223 | min_size=139, 224 | data_format='channels_first', 225 | require_flatten=False) == (3, None, None) 226 | 227 | assert utils._obtain_input_shape( 228 | input_shape=None, 229 | default_size=None, 230 | min_size=139, 231 | data_format='channels_last', 232 | require_flatten=False) == (None, None, 3) 233 | 234 | assert utils._obtain_input_shape( 235 | input_shape=(150, 150, 3), 236 | default_size=None, 237 | min_size=139, 238 | data_format='channels_last', 239 | require_flatten=False) == (150, 150, 3) 240 | 241 | assert utils._obtain_input_shape( 242 | input_shape=(3, None, None), 243 | default_size=None, 244 | min_size=139, 245 | data_format='channels_first', 246 | require_flatten=False) == (3, None, None) 247 | 248 | 249 | if __name__ == '__main__': 250 | pytest.main([__file__]) 251 | --------------------------------------------------------------------------------