├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── cifar10_deepncm.py ├── cifar10_download_and_extract.py ├── deepncm_do.sh ├── deepncm_experiments.txt ├── figs ├── c100dict.npy ├── c10dict.npy ├── cifar10_rmd_eval.npz ├── cifar10_rmd_train.npz ├── deepncm_overview.ipynb ├── deepncm_rmd.ipynb ├── exp_cifar10_rmd.pdf ├── exp_cifar_best.pdf ├── exp_cifar_overview.pdf └── exp_cifar_overview.png ├── imagenet_deepncm.py ├── resnet_deepncm_run_loop.py ├── resnet_deepx.py ├── resnet_model.py ├── resnet_ncm.py └── resnet_ncmequal.py /.gitignore: -------------------------------------------------------------------------------- 1 | # DeepNCM Experiment Log Files: 2 | logs/ 3 | 4 | # GitHub provided Python .gitignore 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # dotenv 87 | .env 88 | 89 | # virtualenv 90 | .venv 91 | venv/ 92 | ENV/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tf/models"] 2 | path = tf/models 3 | url = https://github.com/tensorflow/models.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2018, Thomas Mensink 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepNCM: Deep Nearest Class Means 2 | This software provides DeepNCM models based on the TensFlow Models Official ResNet implementation. 3 | 4 | ## Citation 5 | When using this code, or the ideas of DeepNCM, please cite the following paper ([openreview](https://openreview.net/forum?id=rkPLZ4JPM)) 6 | 7 | @INPROCEEDINGS{guerriero18openreview, 8 | author = {Samantha Guerriero and Barbara Caputo and Thomas Mensink}, 9 | title = {DeepNCM: Deep Nearest Class Mean Classifiers}, 10 | booktitle = {International Conference on Learning Representations - Workshop (ICLRw)}, 11 | year = {2018}, 12 | } 13 | 14 | ### Dependencies / Notes 15 | DeepNCM is written in python, and follows (as closely as possible) the Tensorflow official ResNet implementation. 16 | - The code is developed with Python 3.6 and TensorFlow 1.6.0 (with GPU support) on Linux 17 | - Reported to work also with Python 2.7. For Python 2.7 change `resnet_ncm.py`, line 113 `ncm['method'].casefold()` to `ncm['method'].lower()` 18 | - Requires TensorFlow Models 19 | - Included as submodule, so to get the required version, after cloning/getting DeepNCM do 20 | `git submodule update --init` 21 | - For reasons of my convenience, `model_dir` and `data_dir` are required to be `model_dir = /tmp/deepncm/cifar10_data` `data_dir = /tmp/deepncm/cifar10_deepncm` -- errors might pop-up when other directories are used. 22 | - The experiments (deepncm_do.sh) uses GNU Parallel for parallelisation (Tange, GNU Parallel - The Command-Line Power Tool, 2011) 23 | 24 | ## Experimental overview on Cifar10/Cifar100 25 | Below are the full experiments, using two learning rates, different condensation (omreset) and decay rates. 26 | ![DeepNCM Experimental Overiew](https://github.com/tmensink/deepncm/blob/master/figs/exp_cifar_overview.png) 27 | Comparison of the following methods: Softmax (sof), Online Means (onl), Mean Condensation (con), Decay Mean (dec), in the legend the maximum Top-1 accuracy is reported. 28 | 29 | The code for the figures above can be found in `figs/deepncm_overview.ipynb` 30 | 31 | # Future research (ideas) 32 | - Current optimiser and learning-rate schedule is optimised for softmax learning. 33 | - Gradient clipping is now set to (-1.0,1.0), this is not tuned 34 | - Experiments on larger datasets, _e.g._, ImageNet 35 | - Class incremental / Open Set learning 36 | 37 | Please contact me when you're interested to collaborate on this! 38 | 39 | ### Copyright (2017-2018) 40 | Thomas Mensink, University of Amsterdam, thomas.mensink@uva.nl 41 | Some preliminary source code is written by Samantha Guerriero and Thomas Mensink. 42 | -------------------------------------------------------------------------------- /cifar10_deepncm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Thomas Mensink, University of Amsterdam, thomas.mensink@uva.nl 2 | # 3 | # Beloning to the DeepNCM repository 4 | # DeepNCM is proposed in 5 | # Samantha Guerriero, Barbara Caputo, and Thomas Mensink 6 | # DeepNCM: Deep Nearest Class Mean Classifiers 7 | # ICLR Workshop 2018 8 | # https://openreview.net/forum?id=rkPLZ4JPM 9 | # 10 | # This file (cifar10_deepncm) is based on cifar10_main.py from the 11 | # TensorFlow Models Official ResNet library (release 1.8.0/1.7.0) 12 | # https://github.com/tensorflow/models/tree/master/official/resnet 13 | # With the following copyright notice: 14 | # 15 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 16 | # 17 | # Licensed under the Apache License, Version 2.0 (the "License"); 18 | # you may not use this file except in compliance with the License. 19 | # You may obtain a copy of the License at 20 | # 21 | # http://www.apache.org/licenses/LICENSE-2.0 22 | # 23 | # Unless required by applicable law or agreed to in writing, software 24 | # distributed under the License is distributed on an "AS IS" BASIS, 25 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 26 | # See the License for the specific language governing permissions and 27 | # limitations under the License. 28 | # ============================================================================== 29 | """Runs a ResNet model on the CIFAR-10 dataset.""" 30 | 31 | # Changed to include as well CIFAR100 experiments 32 | # Based on the Research ResNet model: 33 | # https://github.com/tensorflow/models/blob/master/research/resnet/cifar_input.py 34 | 35 | from __future__ import absolute_import 36 | from __future__ import division 37 | from __future__ import print_function 38 | 39 | import os 40 | import sys 41 | sys.path.append("./tf/models/") 42 | 43 | import tensorflow as tf # pylint: disable=g-bad-import-order 44 | 45 | import resnet_ncm as resnet 46 | import resnet_deepncm_run_loop as rrl 47 | 48 | ALLOW_MULTIPLE_MODELS = True 49 | DS = None 50 | 51 | def set_DS_global(dataset="cifar10"): 52 | global DS 53 | DS = set_dataset(dataset=dataset) 54 | 55 | def set_dataset(dataset="cifar10"): 56 | print("set_dataset") 57 | s = type('', (), {})() 58 | 59 | s.NUM_IMAGES = { 60 | 'train': 50000, 61 | 'validation': 10000, 62 | } 63 | s.HEIGHT = 32 64 | s.WIDTH = 32 65 | s.NUM_CHANNELS = 3 66 | s.DEFAULT_IMAGE_BYTES = s.HEIGHT * s.WIDTH * s.NUM_CHANNELS 67 | if dataset == "cifar10": 68 | s.DATASET = 'cifar10' 69 | s.NUM_CLASSES = 10 70 | s.NUM_DATA_FILES = 5 71 | s.DEFAULT_MODEL_DIR = "/tmp/deepncm/cifar10_resnet/" 72 | s.DATA_DIR = '/tmp/deepncm/cifar10_data' 73 | s.DATA_PATH = 'cifar-10-batches-bin' 74 | s.DATA_LABEL_O = 0 75 | s.DATA_LABEL_B = 1 76 | else: 77 | s.DATASET = 'cifar100' 78 | s.DATA_LABEL_O = 1 79 | s.DATA_LABEL_B = 1 80 | s.NUM_CLASSES = 100 81 | s.NUM_DATA_FILES = 1 82 | s.DATA_DIR = '/tmp/deepncm/cifar100_data' 83 | s.DEFAULT_MODEL_DIR = "/tmp/deepncm/cifar100_resnet/" 84 | s.DATA_PATH = 'cifar-100-binary' 85 | 86 | s.RECORD_BYTES = s.DEFAULT_IMAGE_BYTES + s.DATA_LABEL_O + s.DATA_LABEL_B 87 | return s 88 | 89 | ############################################################################### 90 | # Data processing 91 | ############################################################################### 92 | def get_filenames(is_training, data_dir): 93 | """Returns a list of filenames.""" 94 | data_dir = os.path.join(data_dir, DS.DATA_PATH) 95 | 96 | assert os.path.exists(data_dir), ( 97 | 'Run cifar10_download_and_extract.py first to download and extract the ' 98 | 'CIFAR-10/CIFAR-100 data.') 99 | 100 | if DS.DATASET == 'cifar10': 101 | if is_training: 102 | return [ 103 | os.path.join(data_dir, 'data_batch_%d.bin' % i) 104 | for i in range(1, DS.NUM_DATA_FILES + 1) 105 | ] 106 | else: 107 | return [os.path.join(data_dir, 'test_batch.bin')] 108 | 109 | else: 110 | if is_training: 111 | return [os.path.join(data_dir, 'train.bin')] 112 | else: 113 | return [os.path.join(data_dir, 'test.bin')] 114 | 115 | 116 | 117 | def parse_record(raw_record, is_training): 118 | """Parse CIFAR-10/100 image and label from a raw record.""" 119 | # Convert bytes to a vector of uint8 that is record_bytes long. 120 | record_vector = tf.decode_raw(raw_record, tf.uint8) 121 | 122 | # The first byte represents the label, which we convert from uint8 to int32 123 | # and then to one-hot. 124 | label = tf.cast(tf.slice(record_vector, [DS.DATA_LABEL_O], [DS.DATA_LABEL_B]), tf.int32) 125 | #label = tf.cast(record_vector[0],tf.int32) 126 | label = tf.one_hot(tf.squeeze(label), DS.NUM_CLASSES) 127 | 128 | # The remaining bytes after the label represent the image, which we reshape 129 | # from [depth * height * width] to [depth, height, width]. 130 | depth_major = tf.reshape(tf.slice(record_vector, [DS.DATA_LABEL_O + DS.DATA_LABEL_B], [DS.DEFAULT_IMAGE_BYTES]),[DS.NUM_CHANNELS, DS.HEIGHT, DS.WIDTH]) 131 | 132 | # Convert from [depth, height, width] to [height, width, depth], and cast as 133 | # float32. 134 | image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32) 135 | 136 | image = preprocess_image(image, is_training) 137 | 138 | return image, label 139 | 140 | 141 | 142 | def preprocess_image(image, is_training): 143 | """Preprocess a single image of layout [height, width, depth].""" 144 | if is_training: 145 | # Resize the image to add four extra pixels on each side. 146 | image = tf.image.resize_image_with_crop_or_pad( 147 | image, DS.HEIGHT + 8, DS.WIDTH + 8) 148 | 149 | # Randomly crop a [_HEIGHT, _WIDTH] section of the image. 150 | image = tf.random_crop(image, [DS.HEIGHT, DS.WIDTH, DS.NUM_CHANNELS]) 151 | 152 | # Randomly flip the image horizontally. 153 | image = tf.image.random_flip_left_right(image) 154 | 155 | # Subtract off the mean and divide by the variance of the pixels. 156 | image = tf.image.per_image_standardization(image) 157 | return image 158 | 159 | 160 | def input_fn(is_training, data_dir, batch_size, num_epochs=1, 161 | num_parallel_calls=1, multi_gpu=False): 162 | """Input_fn using the tf.data input pipeline for CIFAR-10 dataset. 163 | 164 | Args: 165 | is_training: A boolean denoting whether the input is for training. 166 | data_dir: The directory containing the input data. 167 | batch_size: The number of samples per batch. 168 | num_epochs: The number of epochs to repeat the dataset. 169 | num_parallel_calls: The number of records that are processed in parallel. 170 | This can be optimized per data set but for generally homogeneous data 171 | sets, should be approximately the number of available CPU cores. 172 | multi_gpu: Whether this is run multi-GPU. Note that this is only required 173 | currently to handle the batch leftovers, and can be removed 174 | when that is handled directly by Estimator. 175 | 176 | Returns: 177 | A dataset that can be used for iteration. 178 | """ 179 | filenames = get_filenames(is_training, data_dir) 180 | dataset = tf.data.FixedLengthRecordDataset(filenames, DS.RECORD_BYTES) 181 | 182 | num_images = is_training and DS.NUM_IMAGES['train'] or DS.NUM_IMAGES['validation'] 183 | 184 | return rrl.process_record_dataset(dataset, is_training, batch_size, DS.NUM_IMAGES['train'], 185 | parse_record, num_epochs, num_parallel_calls, 186 | examples_per_epoch=num_images, multi_gpu=multi_gpu) 187 | 188 | 189 | 190 | def get_synth_input_fn(): 191 | return rrl.get_synth_input_fn(DS.HEIGHT, DS.WIDTH, DS.NUM_CHANNELS, DS.NUM_CLASSES) 192 | 193 | ############################################################################### 194 | # Running the model 195 | ############################################################################### 196 | ############################################################################### 197 | # Running the model 198 | ############################################################################### 199 | class Cifar10Model(resnet.NCMResModel): 200 | """Model class with appropriate defaults for CIFAR-10 data.""" 201 | 202 | def __init__(self, resnet_size, data_format=None, num_classes=None,version=resnet.RESNET_DEFAULT_VERSION,ncm=resnet.NCM_DEFAULT): 203 | """These are the parameters that work for CIFAR-10 data. 204 | 205 | Args: 206 | resnet_size: The number of convolutional layers needed in the model. 207 | data_format: Either 'channels_first' or 'channels_last', specifying which 208 | data format to use when setting up the model. 209 | num_classes: The number of output classes needed from the model. This 210 | enables users to extend the same model to their own datasets. 211 | version: Integer representing which version of the ResNet network to use. 212 | See README for details. Valid values: [1, 2] 213 | 214 | Raises: 215 | ValueError: if invalid resnet_size is chosen 216 | """ 217 | if resnet_size % 6 != 2: 218 | raise ValueError('resnet_size must be 6n + 2:', resnet_size) 219 | 220 | num_blocks = (resnet_size - 2) // 6 221 | 222 | super(Cifar10Model, self).__init__(resnet_size=resnet_size,bottleneck=False,num_classes=num_classes,num_filters=16,kernel_size=3,conv_stride=1,first_pool_size=None,first_pool_stride=None,second_pool_size=8,second_pool_stride=1,block_sizes=[num_blocks] * 3,block_strides=[1, 2, 2],final_size=64,version=version,data_format=data_format,ncm=ncm) 223 | 224 | 225 | def cifar10_model_fn(features, labels, mode, params): 226 | """Model function for CIFAR-10.""" 227 | features = tf.reshape(features, [-1, DS.HEIGHT, DS.WIDTH, DS.NUM_CHANNELS]) 228 | 229 | learning_rate_fn = rrl.learning_rate_with_decay(batch_size=params['batch_size'], batch_denom=params['batch_size'],num_images=DS.NUM_IMAGES['train'], boundary_epochs=[100, 150, 200],decay_rates=[1, 0.1, 0.01, 0.001],initial_learning_scale=params['initial_learning_scale']) 230 | 231 | # We use a weight decay of 0.0002, which performs better 232 | # than the 0.0001 that was originally suggested. 233 | weight_decay = 2e-4 234 | 235 | # Empirical testing showed that including batch_normalization variables 236 | # in the calculation of regularized loss helped validation accuracy 237 | # for the CIFAR-10 dataset, perhaps because the regularization prevents 238 | # overfitting on the small data set. We therefore include all vars when 239 | # regularizing and computing loss during training. 240 | def loss_filter_fn(_): 241 | return True 242 | 243 | ncm = {'method' : params['ncmmethod'],'param' : params['ncmparam']} 244 | 245 | return rrl.resnet_model_fn(features, labels, mode, 246 | Cifar10Model,resnet_size=params['resnet_size'], 247 | weight_decay=weight_decay,learning_rate_fn=learning_rate_fn, 248 | momentum=0.9,data_format=params['data_format'], 249 | version=params['version'],loss_filter_fn=loss_filter_fn, 250 | multi_gpu=params['multi_gpu'],ncm=ncm) 251 | 252 | def main(argv): 253 | global DS 254 | parser = rrl.ResnetArgParser() 255 | # Set defaults that are reasonable for this model. 256 | parser.set_defaults(resnet_size=32, 257 | train_epochs=250, 258 | epochs_between_evals=1, 259 | batch_size=128, 260 | ) 261 | 262 | flags = parser.parse_args(args=argv[1:]) 263 | 264 | #if not flags.dataset == DS.DATASET: 265 | DS = set_dataset(flags.dataset) 266 | 267 | 268 | flags.model_dir = DS.DEFAULT_MODEL_DIR 269 | flags.model_dir += flags.ncmmethod 270 | 271 | if flags.ncmmethod[-2:] == "eq": 272 | flags.ncmmethod= flags.ncmmethod[:-2] 273 | 274 | if flags.ncmmethod == "decaymean": 275 | flags.model_dir += "_d%02d" %(flags.ncmparam*100) 276 | elif flags.ncmmethod == "omreset": 277 | flags.model_dir += "_r%04d" %(flags.ncmparam) 278 | 279 | flags.model_dir += "_lr%5.0e" %(flags.initial_learning_scale) 280 | 281 | print(flags.model_dir) 282 | flags.data_dir = DS.DATA_DIR 283 | print(flags.data_dir) 284 | 285 | if flags.scratch > 0 and os.path.isdir(flags.model_dir): 286 | print ("Clear model_directory") 287 | import shutil 288 | shutil.rmtree(flags.model_dir) 289 | elif flags.continu > 0: 290 | assert os.path.isdir(flags.model_dir), "Model dir is empty, while continue is set" 291 | elif flags.continu == 0 and flags.scratch == 0: 292 | assert not os.path.isdir(flags.model_dir), "Model dir is not empty, nor continu or scratch is set" 293 | 294 | 295 | input_function = input_fn 296 | 297 | rrl.resnet_main(flags, cifar10_model_fn, input_function,shape=[DS.HEIGHT, DS.WIDTH, DS.NUM_CHANNELS]) 298 | 299 | if __name__ == '__main__': 300 | tf.logging.set_verbosity(tf.logging.INFO) 301 | main(argv=sys.argv) 302 | -------------------------------------------------------------------------------- /cifar10_download_and_extract.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Thomas Mensink, University of Amsterdam, thomas.mensink@uva.nl 2 | # 3 | # Beloning to the DeepNCM repository 4 | # DeepNCM is proposed in 5 | # Samantha Guerriero, Barbara Caputo, and Thomas Mensink 6 | # DeepNCM: Deep Nearest Class Mean Classifiers 7 | # ICLR Workshop 2018 8 | # https://openreview.net/forum?id=rkPLZ4JPM 9 | # 10 | # This file (cifar10cifar10_download_and_extract) is based on the 11 | # TensorFlow Models Official ResNet library (release 1.8.0/1.7.0) 12 | # https://github.com/tensorflow/models/tree/master/official/resnet 13 | # It is changed to include both CIFAR10 as well as CIFAR100 dataset 14 | 15 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 16 | # 17 | # Licensed under the Apache License, Version 2.0 (the "License"); 18 | # you may not use this file except in compliance with the License. 19 | # You may obtain a copy of the License at 20 | # 21 | # http://www.apache.org/licenses/LICENSE-2.0 22 | # 23 | # Unless required by applicable law or agreed to in writing, software 24 | # distributed under the License is distributed on an "AS IS" BASIS, 25 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 26 | # See the License for the specific language governing permissions and 27 | # limitations under the License. 28 | # ============================================================================== 29 | 30 | """Downloads and extracts the binary version of the CIFAR-10/CIFAR-100 dataset.""" 31 | 32 | from __future__ import absolute_import 33 | from __future__ import division 34 | from __future__ import print_function 35 | 36 | import argparse 37 | import os 38 | import sys 39 | import tarfile 40 | 41 | from six.moves import urllib 42 | import tensorflow as tf 43 | 44 | C10_DIR = '/tmp/deepncm/cifar10_data' 45 | C100_DIR = '/tmp/deepncm/cifar100_data' 46 | 47 | C10_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz' 48 | C100_URL = 'https://www.cs.toronto.edu/~kriz/cifar-100-binary.tar.gz' 49 | 50 | parser = argparse.ArgumentParser() 51 | 52 | parser.add_argument( 53 | '--dataset', type=str, default='cifar10', 54 | help='Dataset to download Cifar10 or Cifar100') 55 | 56 | parser.add_argument( 57 | '--data_dir', type=str, default=C10_DIR, 58 | help='Directory to download data and extract the tarball') 59 | 60 | 61 | def main(_): 62 | """Download and extract the tarball from Alex's website.""" 63 | print(FLAGS.dataset) 64 | 65 | if FLAGS.dataset == 'cifar10': 66 | DATA_URL = C10_URL 67 | else: 68 | DATA_URL = C100_URL 69 | if FLAGS.data_dir == C10_DIR: 70 | FLAGS.data_dir = C100_DIR 71 | 72 | print(FLAGS.data_dir) 73 | print(DATA_URL) 74 | 75 | if not os.path.exists(FLAGS.data_dir): 76 | os.makedirs(FLAGS.data_dir) 77 | 78 | filename = DATA_URL.split('/')[-1] 79 | filepath = os.path.join(FLAGS.data_dir, filename) 80 | 81 | if not os.path.exists(filepath): 82 | def _progress(count, block_size, total_size): 83 | sys.stdout.write('\r>> Downloading %s %.1f%%' % ( 84 | filename, 100.0 * count * block_size / total_size)) 85 | sys.stdout.flush() 86 | 87 | filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress) 88 | print() 89 | statinfo = os.stat(filepath) 90 | print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') 91 | 92 | tarfile.open(filepath, 'r:gz').extractall(FLAGS.data_dir) 93 | 94 | 95 | if __name__ == '__main__': 96 | FLAGS, unparsed = parser.parse_known_args() 97 | tf.app.run(argv=[sys.argv[0]] + unparsed) 98 | -------------------------------------------------------------------------------- /deepncm_do.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Thomas Mensink, University of Amsterdam, thomas.mensink@uva.nl 2 | # 3 | # Beloning to the DeepNCM repository 4 | # DeepNCM is proposed in 5 | # Samantha Guerriero, Barbara Caputo, and Thomas Mensink 6 | # DeepNCM: Deep Nearest Class Mean Classifiers 7 | # ICLR Workshop 2018 8 | # https://openreview.net/forum?id=rkPLZ4JPM 9 | # 10 | # This file runs the experiments. Making uses of Parallel 11 | # Tange, GNU Parallel - The Command-Line Power Tool, 2011 12 | # 13 | # Define an experiment run 14 | doexp() { 15 | x=$1 16 | dataset=$(echo "$x" | cut -f 1 -d ";" | xargs) 17 | method=$(echo "$x" | cut -f 2 -d ";" | xargs) 18 | param=$(echo "$x" | cut -f 3 -d ";" | xargs) 19 | lr=$(echo "$x" | cut -f 4 -d ";" | xargs) 20 | logfile="logs/${dataset}_${method}_${param}_${lr}.log" 21 | cmd="python cifar10_deepncm.py --dataset ${dataset} --ncmmethod ${method} --ncmparam ${param} -l ${lr} >> ${logfile} 2>&1" 22 | echo ${cmd} 23 | rm ${logfile} 24 | eval ${cmd} 25 | } 26 | export -f doexp 27 | # parallel -P ## (4) inidcates number of parallel calls: 28 | cat deepncm_experiments.txt | parallel -P 4 doexp 29 | -------------------------------------------------------------------------------- /deepncm_experiments.txt: -------------------------------------------------------------------------------- 1 | cifar10 ; softmax ; 0 ; 0.1 2 | cifar10 ; softmax ; 0 ; 0.01 3 | cifar10 ; onlinemean ; 0 ; 0.1 4 | cifar10 ; onlinemean ; 0 ; 0.01 5 | cifar100 ; softmax ; 0 ; 0.1 6 | cifar100 ; softmax ; 0 ; 0.01 7 | cifar100 ; onlinemean ; 0 ; 0.1 8 | cifar100 ; onlinemean ; 0 ; 0.01 9 | cifar10 ; decaymean ; 0.9 ; 0.1 10 | cifar10 ; decaymean ; 0.95 ; 0.1 11 | cifar10 ; decaymean ; 0.75 ; 0.1 12 | cifar10 ; decaymean ; 0.5 ; 0.1 13 | cifar100 ; decaymean ; 0.9 ; 0.1 14 | cifar100 ; decaymean ; 0.95 ; 0.1 15 | cifar100 ; decaymean ; 0.75 ; 0.1 16 | cifar100 ; decaymean ; 0.5 ; 0.1 17 | cifar10 ; omreset ; 100 ; 0.1 18 | cifar10 ; omreset ; 195 ; 0.1 19 | cifar10 ; omreset ; 390 ; 0.1 20 | cifar10 ; omreset ; 781 ; 0.1 21 | cifar100 ; omreset ; 100 ; 0.1 22 | cifar100 ; omreset ; 195 ; 0.1 23 | cifar100 ; omreset ; 390 ; 0.1 24 | cifar100 ; omreset ; 781 ; 0.1 25 | -------------------------------------------------------------------------------- /figs/c100dict.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmensink/deepncm/fe7cdd43eb7276f4374c9c51715bf6cf417f994b/figs/c100dict.npy -------------------------------------------------------------------------------- /figs/c10dict.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmensink/deepncm/fe7cdd43eb7276f4374c9c51715bf6cf417f994b/figs/c10dict.npy -------------------------------------------------------------------------------- /figs/cifar10_rmd_eval.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmensink/deepncm/fe7cdd43eb7276f4374c9c51715bf6cf417f994b/figs/cifar10_rmd_eval.npz -------------------------------------------------------------------------------- /figs/cifar10_rmd_train.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmensink/deepncm/fe7cdd43eb7276f4374c9c51715bf6cf417f994b/figs/cifar10_rmd_train.npz -------------------------------------------------------------------------------- /figs/deepncm_rmd.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Experimental Overview\n", 8 | "This jupyter notebook shows the code to generate the plots as used in\n", 9 | "\n", 10 | "**Samantha Guerriero and Barbara Caputo and Thomas Mensink**, \n", 11 | "*DeepNCM: Deep Nearest Class Mean Classifiers* \n", 12 | "ICLR-Workshop 2018" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "#### code update:\n", 20 | "When running the \"RMD\" experiment below, some parts of the resnet_deepncm_run_loop has to be adjusted:\n", 21 | "\n", 22 | " # Resnet_deepncm_run_loop.py (line 217)\n", 23 | " # The following is only required for the Relative Mean Distance Experiment\n", 24 | " # Uncomment the following two lines:\n", 25 | " # metrics['batchmeans'] = tf.metrics.mean_tensor(tf.transpose(bm),weights=bmc)\n", 26 | " # metrics['deepmean'] = tf.metrics.mean_tensor(dm)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "from __future__ import absolute_import\n", 36 | "from __future__ import division\n", 37 | "from __future__ import print_function\n", 38 | "\n", 39 | "import os\n", 40 | "import sys\n", 41 | "\n", 42 | "import tensorflow as tf # pylint: disable=g-bad-import-order\n", 43 | "import numpy as np\n", 44 | "\n", 45 | "sys.path.append(\"../tf/models/\")\n", 46 | "from official.utils.arg_parsers import parsers\n", 47 | "from official.utils.export import export\n", 48 | "from official.utils.logging import hooks_helper\n", 49 | "from official.utils.logging import logger\n", 50 | "\n", 51 | "sys.path.append(\"..\")\n", 52 | "import resnet_ncm as resnet\n", 53 | "import resnet_deepncm_run_loop as rrl\n", 54 | "import cifar10_deepncm as c10\n", 55 | "\n", 56 | "ALLOW_MULTIPLE_MODELS = True" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "# Code from cifar10_ncmnet.py" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 3, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "name": "stdout", 73 | "output_type": "stream", 74 | "text": [ 75 | "set_dataset\n", 76 | "set_dataset\n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "ds = 'cifar10'\n", 82 | "c10.set_DS_global(ds)\n", 83 | "DS = c10.set_dataset(ds)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 4, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "parser = rrl.ResnetArgParser()\n", 93 | "parser.set_defaults(resnet_size=32,\n", 94 | " train_epochs=250,\n", 95 | " epochs_between_evals=1,\n", 96 | " batch_size=128,\n", 97 | " )\n", 98 | "flags = parser.parse_args([\"--dataset\",\"cifar10\",\"--ncmmethod\",\"onlinemean\",\"--ncmparam\",\"10\",\"-l\",\"0.1\"])" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 5, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "name": "stdout", 108 | "output_type": "stream", 109 | "text": [ 110 | "/tmp/deepncm/cifar10_resnet/onlinemean_lr1e-01\n", 111 | "/tmp/deepncm/cifar10_data\n" 112 | ] 113 | } 114 | ], 115 | "source": [ 116 | "flags.model_dir = DS.DEFAULT_MODEL_DIR\n", 117 | "flags.model_dir += flags.ncmmethod\n", 118 | "\n", 119 | "if flags.ncmmethod == \"decaymean\":\n", 120 | " flags.model_dir += \"_d%02d\" %(flags.ncmparam*100)\n", 121 | "elif flags.ncmmethod == \"omreset\":\n", 122 | " flags.model_dir += \"_r%04d\" %(flags.ncmparam)\n", 123 | "\n", 124 | "flags.model_dir += \"_lr%5.0e\" %(flags.initial_learning_scale)\n", 125 | "flags.data_dir = DS.DATA_DIR\n", 126 | "\n", 127 | "print(flags.model_dir)\n", 128 | "print(flags.data_dir)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 6, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "input_function = c10.input_fn\n", 138 | "model_function = c10.cifar10_model_fn\n", 139 | "shape=[DS.HEIGHT, DS.WIDTH, DS.NUM_CHANNELS]" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "## Insert RRL ResNet Main code to see if mean reset works as expected" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 7, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'\n", 156 | "session_config = tf.ConfigProto(\n", 157 | " inter_op_parallelism_threads=flags.inter_op_parallelism_threads,\n", 158 | " intra_op_parallelism_threads=flags.intra_op_parallelism_threads,\n", 159 | " allow_soft_placement=True)\n", 160 | "\n", 161 | "if ALLOW_MULTIPLE_MODELS:\n", 162 | " session_config.gpu_options.allow_growth = True\n", 163 | "\n", 164 | "run_config = tf.estimator.RunConfig().replace(\n", 165 | " save_checkpoints_secs = 5*60, # Save checkpoints every X minutes.\n", 166 | " keep_checkpoint_max = 1000, # Retain the 1000 most recent checkpoints.\n", 167 | " #tf_random_seed = 5739, # Set random seed for \"reproducible\" results\n", 168 | " save_summary_steps = 10, # Number of steps between summaries\n", 169 | " session_config=session_config)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 8, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "name": "stdout", 179 | "output_type": "stream", 180 | "text": [ 181 | "INFO:tensorflow:Using config: {'_model_dir': '/tmp/deepncm/cifar10_resnet/onlinemean_lr1e-01', '_tf_random_seed': None, '_save_summary_steps': 10, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 300, '_session_config': gpu_options {\n", 182 | " allow_growth: true\n", 183 | "}\n", 184 | "allow_soft_placement: true\n", 185 | ", '_keep_checkpoint_max': 1000, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': , '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}\n" 186 | ] 187 | } 188 | ], 189 | "source": [ 190 | "classifier = tf.estimator.Estimator(\n", 191 | " model_fn=model_function, \n", 192 | " model_dir=flags.model_dir, \n", 193 | " config=run_config,\n", 194 | " params={\n", 195 | " 'resnet_size': flags.resnet_size,\n", 196 | " 'data_format': flags.data_format,\n", 197 | " 'batch_size': flags.batch_size,\n", 198 | " 'multi_gpu': flags.multi_gpu,\n", 199 | " 'version': flags.version,\n", 200 | " 'ncmmethod': flags.ncmmethod,\n", 201 | " 'ncmparam' : flags.ncmparam,\n", 202 | " 'initial_learning_scale' : flags.initial_learning_scale\n", 203 | " })" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 9, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "def input_fn_eval():\n", 213 | " return input_function(False, flags.data_dir, flags.batch_size,1, flags.num_parallel_calls, flags.multi_gpu)\n", 214 | " \n", 215 | "def input_fn_evaltrain():\n", 216 | " return input_function(True, flags.data_dir, flags.batch_size,1, flags.num_parallel_calls, flags.multi_gpu)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 10, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "data": { 226 | "text/plain": [ 227 | "True" 228 | ] 229 | }, 230 | "execution_count": 10, 231 | "metadata": {}, 232 | "output_type": "execute_result" 233 | } 234 | ], 235 | "source": [ 236 | "MFILENAME = \"cifar10_rmd_train.npz\"\n", 237 | "os.path.exists(MFILENAME)" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 11, 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "name": "stdout", 247 | "output_type": "stream", 248 | "text": [ 249 | "Load cifar10_rmd_train.npz\n" 250 | ] 251 | } 252 | ], 253 | "source": [ 254 | "tf.logging.set_verbosity(tf.logging.FATAL)\n", 255 | "MFILENAME = \"cifar10_rmd_train.npz\"\n", 256 | "RUN = False\n", 257 | "\n", 258 | "if (not os.path.exists(MFILENAME)) or RUN:\n", 259 | " inx = [(i*391 + 1) for i in range(0,251)]\n", 260 | " inx[-1] = inx[-1]-1\n", 261 | " #inx = [(i*391 + 1) for i in range(0,10)]\n", 262 | " \n", 263 | " methods = [\"onlinemean_lr1e-01\",\"omreset_r0390_lr1e-01\",\"decaymean_d90_lr1e-01\"]\n", 264 | " M = np.zeros((len(inx),3))\n", 265 | " for m in range(len(methods)):\n", 266 | " for i in range(len(inx)): \n", 267 | " iinx = inx[i]\n", 268 | " chckptf = \"/tmp/deepncm/cifar10_resnet/%s/model.ckpt-%d\" %(methods[m],inx[i])\n", 269 | " eval_results = classifier.evaluate(input_fn=input_fn_evaltrain,checkpoint_path=chckptf)\n", 270 | " \n", 271 | " bmnorm = np.power((eval_results['batchmeans']),2).sum(axis=0)\n", 272 | " diffnorm = np.power((eval_results['batchmeans']-eval_results['deepmean']),2).sum(axis=0)\n", 273 | " rmd = (diffnorm/bmnorm).mean()\n", 274 | " M[i,m] = rmd\n", 275 | " print (\"%03d %d %30s %7d | %10.5f\" %(i,m,methods[m],inx[i],rmd))\n", 276 | " np.savez(MFILENAME,M=M,methods=methods,inx=inx)\n", 277 | "else:\n", 278 | " print(\"Load %s\" %(MFILENAME))\n", 279 | " npzfile = np.load(MFILENAME)\n", 280 | " methods = npzfile['methods']\n", 281 | " inx = npzfile['inx']\n", 282 | " M = npzfile['M']" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 12, 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "name": "stdout", 292 | "output_type": "stream", 293 | "text": [ 294 | "['onlinemean_lr1e-01', 'condensation_r0390_lr1e-01', 'decaymean_d90_lr1e-01']\n" 295 | ] 296 | } 297 | ], 298 | "source": [ 299 | "methodsname = [m.replace(\"omreset\",\"condensation\") for m in methods]\n", 300 | "print(methodsname)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 13, 306 | "metadata": {}, 307 | "outputs": [], 308 | "source": [ 309 | "import matplotlib.pyplot as plt" 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": {}, 315 | "source": [ 316 | "# Workshop/Poster Figure" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 23, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "data": { 326 | "image/png": "\n", 327 | "text/plain": [ 328 | "
" 329 | ] 330 | }, 331 | "metadata": {}, 332 | "output_type": "display_data" 333 | } 334 | ], 335 | "source": [ 336 | "import matplotlib.pylab as pylab\n", 337 | "params = {'font.size' : 14,\n", 338 | " 'legend.fontsize': 'x-large',\n", 339 | " 'figure.figsize': (10, 5),\n", 340 | " 'axes.labelsize': 'x-large',\n", 341 | " 'axes.titlesize':'x-large',\n", 342 | " 'xtick.labelsize':'x-large',\n", 343 | " 'ytick.labelsize':'x-large'}\n", 344 | "pylab.rcParams.update(params)\n", 345 | "\n", 346 | "plt.plot(np.arange(0,251),M)\n", 347 | "plt.legend([m.split('_')[0] for m in methodsname],markerfirst=False,loc=\"lower right\")\n", 348 | "plt.xlim(0,100)\n", 349 | "plt.yscale(\"log\")\n", 350 | "plt.grid()\n", 351 | "plt.title(\"Relative Mean Distance\")\n", 352 | "plt.savefig(\"./exp_cifar10_rmd.pdf\",dpi=250,bbox_inches=\"tight\",pad_inches=0.1,transparent=False,facecolor='w')\n", 353 | "plt.show()" 354 | ] 355 | } 356 | ], 357 | "metadata": { 358 | "kernelspec": { 359 | "display_name": "Python 3", 360 | "language": "python", 361 | "name": "python3" 362 | }, 363 | "language_info": { 364 | "codemirror_mode": { 365 | "name": "ipython", 366 | "version": 3 367 | }, 368 | "file_extension": ".py", 369 | "mimetype": "text/x-python", 370 | "name": "python", 371 | "nbconvert_exporter": "python", 372 | "pygments_lexer": "ipython3", 373 | "version": "3.6.4" 374 | } 375 | }, 376 | "nbformat": 4, 377 | "nbformat_minor": 2 378 | } 379 | -------------------------------------------------------------------------------- /figs/exp_cifar10_rmd.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmensink/deepncm/fe7cdd43eb7276f4374c9c51715bf6cf417f994b/figs/exp_cifar10_rmd.pdf -------------------------------------------------------------------------------- /figs/exp_cifar_best.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmensink/deepncm/fe7cdd43eb7276f4374c9c51715bf6cf417f994b/figs/exp_cifar_best.pdf -------------------------------------------------------------------------------- /figs/exp_cifar_overview.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmensink/deepncm/fe7cdd43eb7276f4374c9c51715bf6cf417f994b/figs/exp_cifar_overview.pdf -------------------------------------------------------------------------------- /figs/exp_cifar_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tmensink/deepncm/fe7cdd43eb7276f4374c9c51715bf6cf417f994b/figs/exp_cifar_overview.png -------------------------------------------------------------------------------- /imagenet_deepncm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Runs a ResNet model on the ImageNet dataset.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import os 22 | import sys 23 | 24 | import tensorflow as tf # pylint: disable=g-bad-import-order 25 | 26 | import os 27 | import sys 28 | sys.path.append("./tf/models/") 29 | 30 | from official.resnet import imagenet_preprocessing 31 | 32 | import resnet_ncm as resnet 33 | import resnet_deepncm_run_loop as rrl 34 | 35 | _DEFAULT_IMAGE_SIZE = 224 36 | _NUM_CHANNELS = 3 37 | _NUM_CLASSES = 1001 38 | 39 | _NUM_IMAGES = { 40 | 'train': 1281167, 41 | 'validation': 50000, 42 | } 43 | 44 | _NUM_TRAIN_FILES = 1024 45 | _SHUFFLE_BUFFER = 1500 46 | 47 | 48 | ############################################################################### 49 | # Data processing 50 | ############################################################################### 51 | def get_filenames(is_training, data_dir): 52 | """Return filenames for dataset.""" 53 | if is_training: 54 | return [ 55 | os.path.join(data_dir, 'train-%05d-of-01024' % i) 56 | for i in range(_NUM_TRAIN_FILES)] 57 | else: 58 | return [ 59 | os.path.join(data_dir, 'validation-%05d-of-00128' % i) 60 | for i in range(128)] 61 | 62 | 63 | def _parse_example_proto(example_serialized): 64 | """Parses an Example proto containing a training example of an image. 65 | 66 | The output of the build_image_data.py image preprocessing script is a dataset 67 | containing serialized Example protocol buffers. Each Example proto contains 68 | the following fields (values are included as examples): 69 | 70 | image/height: 462 71 | image/width: 581 72 | image/colorspace: 'RGB' 73 | image/channels: 3 74 | image/class/label: 615 75 | image/class/synset: 'n03623198' 76 | image/class/text: 'knee pad' 77 | image/object/bbox/xmin: 0.1 78 | image/object/bbox/xmax: 0.9 79 | image/object/bbox/ymin: 0.2 80 | image/object/bbox/ymax: 0.6 81 | image/object/bbox/label: 615 82 | image/format: 'JPEG' 83 | image/filename: 'ILSVRC2012_val_00041207.JPEG' 84 | image/encoded: 85 | 86 | Args: 87 | example_serialized: scalar Tensor tf.string containing a serialized 88 | Example protocol buffer. 89 | 90 | Returns: 91 | image_buffer: Tensor tf.string containing the contents of a JPEG file. 92 | label: Tensor tf.int32 containing the label. 93 | bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] 94 | where each coordinate is [0, 1) and the coordinates are arranged as 95 | [ymin, xmin, ymax, xmax]. 96 | """ 97 | # Dense features in Example proto. 98 | feature_map = { 99 | 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, 100 | default_value=''), 101 | 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, 102 | default_value=-1), 103 | 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, 104 | default_value=''), 105 | } 106 | sparse_float32 = tf.VarLenFeature(dtype=tf.float32) 107 | # Sparse features in Example proto. 108 | feature_map.update( 109 | {k: sparse_float32 for k in ['image/object/bbox/xmin', 110 | 'image/object/bbox/ymin', 111 | 'image/object/bbox/xmax', 112 | 'image/object/bbox/ymax']}) 113 | 114 | features = tf.parse_single_example(example_serialized, feature_map) 115 | label = tf.cast(features['image/class/label'], dtype=tf.int32) 116 | 117 | xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) 118 | ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) 119 | xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) 120 | ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) 121 | 122 | # Note that we impose an ordering of (y, x) just to make life difficult. 123 | bbox = tf.concat([ymin, xmin, ymax, xmax], 0) 124 | 125 | # Force the variable number of bounding boxes into the shape 126 | # [1, num_boxes, coords]. 127 | bbox = tf.expand_dims(bbox, 0) 128 | bbox = tf.transpose(bbox, [0, 2, 1]) 129 | 130 | return features['image/encoded'], label, bbox 131 | 132 | 133 | def parse_record(raw_record, is_training): 134 | """Parses a record containing a training example of an image. 135 | 136 | The input record is parsed into a label and image, and the image is passed 137 | through preprocessing steps (cropping, flipping, and so on). 138 | 139 | Args: 140 | raw_record: scalar Tensor tf.string containing a serialized 141 | Example protocol buffer. 142 | is_training: A boolean denoting whether the input is for training. 143 | 144 | Returns: 145 | Tuple with processed image tensor and one-hot-encoded label tensor. 146 | """ 147 | image_buffer, label, bbox = _parse_example_proto(raw_record) 148 | 149 | image = imagenet_preprocessing.preprocess_image( 150 | image_buffer=image_buffer, 151 | bbox=bbox, 152 | output_height=_DEFAULT_IMAGE_SIZE, 153 | output_width=_DEFAULT_IMAGE_SIZE, 154 | num_channels=_NUM_CHANNELS, 155 | is_training=is_training) 156 | 157 | label = tf.one_hot(tf.reshape(label, shape=[]), _NUM_CLASSES) 158 | 159 | return image, label 160 | 161 | 162 | def input_fn(is_training, data_dir, batch_size, num_epochs=1, 163 | num_parallel_calls=1, multi_gpu=False): 164 | """Input function which provides batches for train or eval. 165 | 166 | Args: 167 | is_training: A boolean denoting whether the input is for training. 168 | data_dir: The directory containing the input data. 169 | batch_size: The number of samples per batch. 170 | num_epochs: The number of epochs to repeat the dataset. 171 | num_parallel_calls: The number of records that are processed in parallel. 172 | This can be optimized per data set but for generally homogeneous data 173 | sets, should be approximately the number of available CPU cores. 174 | multi_gpu: Whether this is run multi-GPU. Note that this is only required 175 | currently to handle the batch leftovers, and can be removed 176 | when that is handled directly by Estimator. 177 | 178 | Returns: 179 | A dataset that can be used for iteration. 180 | """ 181 | filenames = get_filenames(is_training, data_dir) 182 | dataset = tf.data.Dataset.from_tensor_slices(filenames) 183 | 184 | if is_training: 185 | # Shuffle the input files 186 | dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES) 187 | 188 | num_images = is_training and _NUM_IMAGES['train'] or _NUM_IMAGES['validation'] 189 | 190 | # Convert to individual records 191 | dataset = dataset.flat_map(tf.data.TFRecordDataset) 192 | 193 | return rrl.process_record_dataset( 194 | dataset, is_training, batch_size, _SHUFFLE_BUFFER, parse_record, 195 | num_epochs, num_parallel_calls, examples_per_epoch=num_images, 196 | multi_gpu=multi_gpu) 197 | 198 | 199 | def get_synth_input_fn(): 200 | return rrl.get_synth_input_fn( 201 | _DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS, _NUM_CLASSES) 202 | 203 | 204 | ############################################################################### 205 | # Running the model 206 | ############################################################################### 207 | class ImagenetModel(resnet.NCMResModel): 208 | """Model class with appropriate defaults for Imagenet data.""" 209 | 210 | def __init__(self, resnet_size, data_format=None, num_classes=_NUM_CLASSES, 211 | version=resnet.RESNET_DEFAULT_VERSION,ncm=resnet.NCM_DEFAULT): 212 | """These are the parameters that work for Imagenet data. 213 | 214 | Args: 215 | resnet_size: The number of convolutional layers needed in the model. 216 | data_format: Either 'channels_first' or 'channels_last', specifying which 217 | data format to use when setting up the model. 218 | num_classes: The number of output classes needed from the model. This 219 | enables users to extend the same model to their own datasets. 220 | version: Integer representing which version of the ResNet network to use. 221 | See README for details. Valid values: [1, 2] 222 | """ 223 | 224 | # For bigger models, we want to use "bottleneck" layers 225 | if resnet_size < 50: 226 | bottleneck = False 227 | final_size = 512 228 | else: 229 | bottleneck = True 230 | final_size = 2048 231 | 232 | super(ImagenetModel, self).__init__( 233 | resnet_size=resnet_size, 234 | bottleneck=bottleneck, 235 | num_classes=num_classes, 236 | num_filters=64, 237 | kernel_size=7, 238 | conv_stride=2, 239 | first_pool_size=3, 240 | first_pool_stride=2, 241 | second_pool_size=7, 242 | second_pool_stride=1, 243 | block_sizes=_get_block_sizes(resnet_size), 244 | block_strides=[1, 2, 2, 2], 245 | final_size=final_size, 246 | version=version, 247 | data_format=data_format) 248 | 249 | 250 | def _get_block_sizes(resnet_size): 251 | """Retrieve the size of each block_layer in the ResNet model. 252 | 253 | The number of block layers used for the Resnet model varies according 254 | to the size of the model. This helper grabs the layer set we want, throwing 255 | an error if a non-standard size has been selected. 256 | 257 | Args: 258 | resnet_size: The number of convolutional layers needed in the model. 259 | 260 | Returns: 261 | A list of block sizes to use in building the model. 262 | 263 | Raises: 264 | KeyError: if invalid resnet_size is received. 265 | """ 266 | choices = { 267 | 18: [2, 2, 2, 2], 268 | 34: [3, 4, 6, 3], 269 | 50: [3, 4, 6, 3], 270 | 101: [3, 4, 23, 3], 271 | 152: [3, 8, 36, 3], 272 | 200: [3, 24, 36, 3] 273 | } 274 | 275 | try: 276 | return choices[resnet_size] 277 | except KeyError: 278 | err = ('Could not find layers for selected Resnet size.\n' 279 | 'Size received: {}; sizes allowed: {}.'.format( 280 | resnet_size, choices.keys())) 281 | raise ValueError(err) 282 | 283 | 284 | def imagenet_model_fn(features, labels, mode, params): 285 | """Our model_fn for ResNet to be used with our Estimator.""" 286 | learning_rate_fn = rrl.learning_rate_with_decay( 287 | batch_size=params['batch_size'], batch_denom=256, 288 | num_images=_NUM_IMAGES['train'], boundary_epochs=[30, 60, 80, 90], 289 | decay_rates=[1, 0.1, 0.01, 0.001, 1e-4]) 290 | 291 | return rrl.resnet_model_fn(features, labels, mode, ImagenetModel, 292 | resnet_size=params['resnet_size'], 293 | weight_decay=1e-4, 294 | learning_rate_fn=learning_rate_fn, 295 | momentum=0.9, 296 | data_format=params['data_format'], 297 | version=params['version'], 298 | loss_filter_fn=None, 299 | multi_gpu=params['multi_gpu']) 300 | 301 | 302 | def main(argv): 303 | parser = rrl.ResnetArgParser( 304 | resnet_size_choices=[18, 34, 50, 101, 152, 200]) 305 | 306 | parser.set_defaults( 307 | resnet_size=50, 308 | train_epochs=100, 309 | data_dir= "/tmp/deepncm/data/imagenet/tf/", 310 | model_dir="/tmp/deepncm/exp/imagenet/" 311 | ) 312 | 313 | flags = parser.parse_args(args=argv[1:]) 314 | 315 | flags.model_dir += "resnet-%d/%s" %(flags.resnet_size,flags.ncmmethod) 316 | 317 | if flags.ncmmethod == "decaymean": 318 | flags.model_dir += "_d%02d" %(flags.ncmparam*100) 319 | elif flags.ncmmethod == "omreset": 320 | flags.model_dir += "_r%04d" %(flags.ncmparam) 321 | 322 | flags.model_dir += "_lr%5.0e" %(flags.initial_learning_scale) 323 | 324 | print(flags.model_dir) 325 | 326 | 327 | input_function = flags.use_synthetic_data and get_synth_input_fn() or input_fn 328 | 329 | rrl.resnet_main( 330 | flags, imagenet_model_fn, input_function, 331 | shape=[_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE, _NUM_CHANNELS]) 332 | 333 | 334 | if __name__ == '__main__': 335 | tf.logging.set_verbosity(tf.logging.INFO) 336 | main(argv=sys.argv) 337 | -------------------------------------------------------------------------------- /resnet_deepncm_run_loop.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Thomas Mensink, University of Amsterdam, thomas.mensink@uva.nl 2 | # 3 | # Beloning to the DeepNCM repository 4 | # DeepNCM is proposed in 5 | # Samantha Guerriero, Barbara Caputo, and Thomas Mensink 6 | # DeepNCM: Deep Nearest Class Mean Classifiers 7 | # ICLR Workshop 2018 8 | # https://openreview.net/forum?id=rkPLZ4JPM 9 | # 10 | # This file (resnet_deepncm_run_loop) is based on resnet_run_loop from the 11 | # TensorFlow Models Official ResNet library (release 1.8.0/1.7.0) 12 | # https://github.com/tensorflow/models/tree/master/official/resnet 13 | # 14 | # It contains code to support the ResNet DeepNCM models 15 | # Modifications are made to 16 | # - resnet_model_fn call, to incorporate the NCM update ops 17 | # - ResnetArgParser, to include different command line arguments 18 | # - Main, to allow multiple models at the same GPU 19 | """Contains utility and supporting functions for ResNet. 20 | 21 | This module contains ResNet code which does not directly build layers. This 22 | includes dataset management, hyperparameter and optimizer code, and argument 23 | parsing. Code for defining the ResNet layers can be found in resnet_ncm.py. 24 | """ 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | import argparse 31 | import os 32 | 33 | import tensorflow as tf # pylint: disable=g-bad-import-order 34 | 35 | import numpy as np 36 | 37 | #import resnet_ncm as rncm 38 | import resnet_ncmequal as rncm 39 | 40 | ALLOW_MULTIPLE_MODELS = True 41 | 42 | import sys 43 | sys.path.append("./tf/models/") 44 | from official.utils.arg_parsers import parsers 45 | from official.utils.export import export 46 | from official.utils.logging import hooks_helper 47 | from official.utils.logging import logger 48 | from official.resnet import resnet_run_loop as rrl 49 | ################################################################################ 50 | # Functions for input processing. 51 | ################################################################################ 52 | def process_record_dataset(dataset, is_training, batch_size, shuffle_buffer, 53 | parse_record_fn, num_epochs=1, num_parallel_calls=1, 54 | examples_per_epoch=0, multi_gpu=False): 55 | return rrl.process_record_dataset(dataset, is_training, batch_size, shuffle_buffer, 56 | parse_record_fn, num_epochs=num_epochs, num_parallel_calls=num_parallel_calls, 57 | examples_per_epoch=examples_per_epoch, multi_gpu=multi_gpu) 58 | 59 | def get_synth_input_fn(height, width, num_channels, num_classes): 60 | return rrl.get_synth_input_fn(height, width, num_channles, num_classes) 61 | 62 | ################################################################################ 63 | # Functions for running training/eval/validation loops for the model. 64 | ################################################################################ 65 | def learning_rate_with_decay( 66 | batch_size, batch_denom, num_images, boundary_epochs, decay_rates,initial_learning_scale=0.1): 67 | """Get a learning rate that decays step-wise as training progresses. 68 | 69 | Args: 70 | batch_size: the number of examples processed in each training batch. 71 | batch_denom: this value will be used to scale the base learning rate. 72 | `0.1 * batch size` is divided by this number, such that when 73 | batch_denom == batch_size, the initial learning rate will be 0.1. 74 | num_images: total number of images that will be used for training. 75 | boundary_epochs: list of ints representing the epochs at which we 76 | decay the learning rate. 77 | decay_rates: list of floats representing the decay rates to be used 78 | for scaling the learning rate. It should have one more element 79 | than `boundary_epochs`, and all elements should have the same type. 80 | 81 | Returns: 82 | Returns a function that takes a single argument - the number of batches 83 | trained so far (global_step)- and returns the learning rate to be used 84 | for training the next batch. 85 | """ 86 | initial_learning_rate = initial_learning_scale * batch_size / batch_denom 87 | batches_per_epoch = num_images / batch_size 88 | 89 | # Multiply the learning rate by 0.1 at 100, 150, and 200 epochs. 90 | boundaries = [int(batches_per_epoch * epoch) for epoch in boundary_epochs] 91 | vals = [initial_learning_rate * decay for decay in decay_rates] 92 | 93 | def learning_rate_fn(global_step): 94 | global_step = tf.cast(global_step, tf.int32) 95 | return tf.train.piecewise_constant(global_step, boundaries, vals) 96 | 97 | return learning_rate_fn 98 | 99 | 100 | def resnet_model_fn(features, labels, mode, model_class, 101 | resnet_size, weight_decay, learning_rate_fn, momentum, 102 | data_format, version, loss_filter_fn=None, multi_gpu=False, ncm=rncm.NCM_DEFAULT): 103 | """Shared functionality for different resnet model_fns. 104 | 105 | Initializes the ResnetModel representing the model layers 106 | and uses that model to build the necessary EstimatorSpecs for 107 | the `mode` in question. For training, this means building losses, 108 | the optimizer, and the train op that get passed into the EstimatorSpec. 109 | For evaluation and prediction, the EstimatorSpec is returned without 110 | a train op, but with the necessary parameters for the given mode. 111 | 112 | Args: 113 | features: tensor representing input images 114 | labels: tensor representing class labels for all input images 115 | mode: current estimator mode; should be one of 116 | `tf.estimator.ModeKeys.TRAIN`, `EVALUATE`, `PREDICT` 117 | model_class: a class representing a TensorFlow model that has a __call__ 118 | function. We assume here that this is a subclass of ResnetModel. 119 | resnet_size: A single integer for the size of the ResNet model. 120 | weight_decay: weight decay loss rate used to regularize learned variables. 121 | learning_rate_fn: function that returns the current learning rate given 122 | the current global_step 123 | momentum: momentum term used for optimization 124 | data_format: Input format ('channels_last', 'channels_first', or None). 125 | If set to None, the format is dependent on whether a GPU is available. 126 | version: Integer representing which version of the ResNet network to use. 127 | See README for details. Valid values: [1, 2] 128 | loss_filter_fn: function that takes a string variable name and returns 129 | True if the var should be included in loss calculation, and False 130 | otherwise. If None, batch_normalization variables will be excluded 131 | from the loss. 132 | multi_gpu: If True, wrap the optimizer in a TowerOptimizer suitable for 133 | data-parallel distribution across multiple GPUs. 134 | 135 | Returns: 136 | EstimatorSpec parameterized according to the input params and the 137 | current mode. 138 | """ 139 | # Generate a summary node for the images 140 | tf.summary.image('images', features, max_outputs=6) 141 | 142 | model = model_class(resnet_size, data_format=data_format, num_classes=labels.shape[1].value, version=version,ncm=ncm) 143 | logits, deep_x, deepmean = model(features, mode == tf.estimator.ModeKeys.TRAIN) 144 | 145 | predictions = { 146 | 'classes': tf.argmax(logits, axis=1), 147 | 'probabilities': tf.nn.softmax(logits, name='softmax_tensor'), 148 | } 149 | 150 | 151 | dm = tf.identity(deepmean,"DM") 152 | if not (model.ncmmethod == "softmax"): 153 | rdist,mmsk = model.get_relative_mean_distance(deep_x=deep_x,labels=labels) 154 | mcmd = tf.metrics.mean(rdist,weights=mmsk) 155 | rmd = tf.identity(mcmd[1],name="rmd") 156 | rmd = tf.summary.scalar('rmd', rmd) 157 | predictions['rmd'] = tf.identity(rmd,name="rmd") 158 | 159 | if mode == tf.estimator.ModeKeys.PREDICT: 160 | # Return the predictions and the specification for serving a SavedModel 161 | return tf.estimator.EstimatorSpec( 162 | mode=mode, 163 | predictions=predictions, 164 | export_outputs={ 165 | 'predict': tf.estimator.export.PredictOutput(predictions) 166 | } 167 | ) 168 | 169 | # Calculate loss, which includes softmax cross entropy and L2 regularization. 170 | cross_entropy = tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=labels) 171 | 172 | # Create a tensor named cross_entropy for logging purposes. 173 | tf.identity(cross_entropy, name='cross_entropy') 174 | tf.summary.scalar('cross_entropy', cross_entropy) 175 | 176 | # If no loss_filter_fn is passed, assume we want the default behavior, 177 | # which is that batch_normalization variables are excluded from loss. 178 | def exclude_batch_norm(name): 179 | return 'batch_normalization' not in name 180 | loss_filter_fn = loss_filter_fn or exclude_batch_norm 181 | 182 | # Add weight decay to the loss. 183 | l2_loss = weight_decay * tf.add_n( 184 | [tf.nn.l2_loss(v) for v in tf.trainable_variables() 185 | if loss_filter_fn(v.name)]) 186 | tf.summary.scalar('l2_loss', l2_loss) 187 | loss = cross_entropy + l2_loss 188 | 189 | if mode == tf.estimator.ModeKeys.TRAIN: 190 | ncm_ops = model.get_ncm_ops(deep_x=deep_x,labels=labels) 191 | 192 | # Create a tensor named learning_rate for logging purposes 193 | global_step = tf.train.get_or_create_global_step() 194 | learning_rate = learning_rate_fn(global_step) 195 | tf.identity(learning_rate, name='learning_rate') 196 | tf.summary.scalar('learning_rate', learning_rate) 197 | 198 | # Create loss_op using Gradient clipping 199 | optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,momentum=momentum) 200 | gavs = optimizer.compute_gradients(loss) 201 | gavsc= [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gavs] 202 | loss_op = optimizer.apply_gradients(gavsc,global_step=global_step) 203 | 204 | # Update ops from Graph 205 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 206 | 207 | train_op = tf.group(loss_op, update_ops, ncm_ops) 208 | else: 209 | train_op = None 210 | 211 | accuracy = tf.metrics.accuracy(tf.argmax(labels, axis=1), predictions['classes']) 212 | 213 | dm,bm,bmc = model.get_mean_and_batch_mean(deep_x=deep_x,labels=labels) 214 | 215 | metrics = {'accuracy': accuracy} 216 | if not (model.ncmmethod == "softmax"): 217 | metrics['mcmdistance'] = mcmd 218 | # The following is only required for the Relative Mean Distance Experiment 219 | #metrics['batchmeans'] = tf.metrics.mean_tensor(tf.transpose(bm),weights=bmc) 220 | #metrics['deepmean'] = tf.metrics.mean_tensor(dm) 221 | 222 | # Create a tensor named train_accuracy for logging purposes 223 | tf.identity(accuracy[1], name='train_accuracy') 224 | tf.summary.scalar('train_accuracy', accuracy[1]) 225 | 226 | return tf.estimator.EstimatorSpec( 227 | mode=mode, 228 | predictions=predictions, 229 | loss=loss, 230 | train_op=train_op, 231 | eval_metric_ops=metrics) 232 | 233 | 234 | def resnet_main(flags, model_function, input_function, shape=None): 235 | """Shared main loop for ResNet Models. 236 | 237 | Args: 238 | flags: FLAGS object that contains the params for running. See 239 | ResnetArgParser for created flags. 240 | model_function: the function that instantiates the Model and builds the 241 | ops for train/eval. This will be passed directly into the estimator. 242 | input_function: the function that processes the dataset and returns a 243 | dataset that the estimator can train on. This will be wrapped with 244 | all the relevant flags for running and passed to estimator. 245 | shape: list of ints representing the shape of the images used for training. 246 | This is only used if flags.export_dir is passed. 247 | """ 248 | 249 | # Using the Winograd non-fused algorithms provides a small performance boost. 250 | os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1' 251 | 252 | # Create session config based on values of inter_op_parallelism_threads and 253 | # intra_op_parallelism_threads. Note that we default to having 254 | # allow_soft_placement = True, which is required for multi-GPU and not 255 | # harmful for other modes. 256 | session_config = tf.ConfigProto( 257 | inter_op_parallelism_threads=flags.inter_op_parallelism_threads, 258 | intra_op_parallelism_threads=flags.intra_op_parallelism_threads, 259 | allow_soft_placement=True) 260 | 261 | if ALLOW_MULTIPLE_MODELS: 262 | session_config.gpu_options.allow_growth = True 263 | 264 | # Set up a RunConfig to save checkpoint and set session config. 265 | run_config = tf.estimator.RunConfig().replace( 266 | save_checkpoints_secs = 5*60, # Save checkpoints every X minutes. 267 | keep_checkpoint_max = 1000, # Retain the 1000 most recent checkpoints. 268 | #tf_random_seed = 5739, # Set random seed for "reproducible" results 269 | save_summary_steps = 10000, # Number of steps between summaries 270 | session_config=session_config) 271 | 272 | classifier = tf.estimator.Estimator( 273 | model_fn=model_function, model_dir=flags.model_dir, config=run_config, 274 | params={ 275 | 'resnet_size': flags.resnet_size, 276 | 'data_format': flags.data_format, 277 | 'batch_size': flags.batch_size, 278 | 'multi_gpu': flags.multi_gpu, 279 | 'version': flags.version, 280 | 'ncmmethod': flags.ncmmethod, 281 | 'ncmparam' : flags.ncmparam, 282 | 'initial_learning_scale' : flags.initial_learning_scale 283 | }) 284 | 285 | if flags.benchmark_log_dir is not None: 286 | benchmark_logger = logger.BenchmarkLogger(flags.benchmark_log_dir) 287 | benchmark_logger.log_run_info("resnet") 288 | else: 289 | benchmark_logger = None 290 | 291 | for _ in range(flags.train_epochs // flags.epochs_between_evals): 292 | train_hooks = hooks_helper.get_train_hooks( 293 | flags.hooks, 294 | batch_size=flags.batch_size, 295 | benchmark_log_dir=flags.benchmark_log_dir) 296 | #tensors_to_log = {"iter": "m_iter","deep-cnt": "m_cnt", "deep-sum": "m_sum"} 297 | #logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=1) 298 | 299 | 300 | print('Starting a training cycle.') 301 | 302 | def input_fn_train(): 303 | return input_function(True, flags.data_dir, flags.batch_size, 304 | flags.epochs_between_evals, 305 | flags.num_parallel_calls, flags.multi_gpu) 306 | 307 | classifier.train(input_fn=input_fn_train, hooks=train_hooks,max_steps=flags.max_train_steps) 308 | 309 | print('Starting to evaluate.') 310 | # Evaluate the model and print results 311 | def input_fn_eval(): 312 | return input_function(False, flags.data_dir, flags.batch_size, 313 | 1, flags.num_parallel_calls, flags.multi_gpu) 314 | 315 | # flags.max_train_steps is generally associated with testing and profiling. 316 | # As a result it is frequently called with synthetic data, which will 317 | # iterate forever. Passing steps=flags.max_train_steps allows the eval 318 | # (which is generally unimportant in those circumstances) to terminate. 319 | # Note that eval will run for max_train_steps each loop, regardless of the 320 | # global_step count. 321 | eval_results = classifier.evaluate(input_fn=input_fn_eval, 322 | steps=flags.max_train_steps) 323 | print(eval_results) 324 | 325 | if benchmark_logger: 326 | benchmark_logger.log_estimator_evaluation_result(eval_results) 327 | 328 | if flags.export_dir is not None: 329 | # Exports a saved model for the given classifier. 330 | input_receiver_fn = export.build_tensor_serving_input_receiver_fn( 331 | shape, batch_size=flags.batch_size) 332 | classifier.export_savedmodel(flags.export_dir, input_receiver_fn) 333 | 334 | 335 | class ResnetArgParser(argparse.ArgumentParser): 336 | """Arguments for configuring and running a Resnet Model.""" 337 | 338 | def __init__(self, resnet_size_choices=None): 339 | super(ResnetArgParser, self).__init__(parents=[ 340 | parsers.BaseParser(), 341 | parsers.PerformanceParser(), 342 | parsers.ImageModelParser(), 343 | parsers.ExportParser(), 344 | parsers.BenchmarkParser(), 345 | ]) 346 | 347 | self.add_argument('--dataset','-d',default="cifar10", 348 | help='Which dataset to use (currently cifar10/cifar100)' 349 | ) 350 | 351 | self.add_argument( 352 | '--version', '-v', type=int, choices=[1, 2], 353 | default=rncm.RESNET_DEFAULT_VERSION, 354 | help='Version of ResNet. (1 or 2) See README.md for details.' 355 | ) 356 | 357 | self.add_argument( 358 | '--resnet_size', '-rs', type=int, default=50, 359 | choices=resnet_size_choices, 360 | help='[default: %(default)s] The size of the ResNet model to use.', 361 | metavar='' if resnet_size_choices is None else None 362 | ) 363 | 364 | self.add_argument( 365 | '--continu',type=int,default=0, 366 | help='Continue with an existing model, or start from scratch' 367 | ) 368 | 369 | self.add_argument( 370 | '--scratch',type=int,default=0, 371 | help='Start from scratch even if model exist' 372 | ) 373 | 374 | self.add_argument( 375 | '--ncmmethod', default=rncm.NCM_DEFAULT_METHOD, 376 | help='[default: %(default)s] Which NCM method to use', 377 | ) 378 | 379 | self.add_argument( 380 | '--ncmparam', default=rncm.NCM_DEFAULT_PARAMETER, type=float, 381 | help='[default: %(default)s] additional NCM parameter to use', 382 | ) 383 | 384 | self.add_argument( 385 | '--initial_learning_scale', '-l', default=0.1, type=float, 386 | help='Intial Learning Scale (default: %(default)s)', 387 | ) 388 | -------------------------------------------------------------------------------- /resnet_deepx.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Thomas Mensink, University of Amsterdam, thomas.mensink@uva.nl 2 | # 3 | # Beloning to the DeepNCM repository 4 | # DeepNCM is proposed in 5 | # Samantha Guerriero, Barbara Caputo, and Thomas Mensink 6 | # DeepNCM: Deep Nearest Class Mean Classifiers 7 | # ICLR Workshop 2018 8 | # https://openreview.net/forum?id=rkPLZ4JPM 9 | # 10 | # This file (resnet_deepx) is based on resnet_model from the 11 | # TensorFlow Models Official ResNet library (release 1.8.0/1.7.0) 12 | # https://github.com/tensorflow/models/tree/master/official/resnet 13 | # 14 | # It contains the ResNet code to create a deepnetwork, without a final layer. 15 | # 16 | # resnet_model.py has the following copyright notice: 17 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 18 | # 19 | # Licensed under the Apache License, Version 2.0 (the "License"); 20 | # you may not use this file except in compliance with the License. 21 | # You may obtain a copy of the License at 22 | # 23 | # http://www.apache.org/licenses/LICENSE-2.0 24 | # 25 | # Unless required by applicable law or agreed to in writing, software 26 | # distributed under the License is distributed on an "AS IS" BASIS, 27 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 28 | # See the License for the specific language governing permissions and 29 | # limitations under the License. 30 | # ============================================================================== 31 | """Contains definitions for Residual Networks. 32 | 33 | Residual networks ('v1' ResNets) were originally proposed in: 34 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 35 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 36 | 37 | The full preactivation 'v2' ResNet variant was introduced by: 38 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 39 | Identity Mappings in Deep Residual Networks. arXiv: 1603.05027 40 | 41 | The key difference of the full preactivation 'v2' variant compared to the 42 | 'v1' variant in [1] is the use of batch normalization before every weight layer 43 | rather than after. 44 | """ 45 | 46 | from __future__ import absolute_import 47 | from __future__ import division 48 | from __future__ import print_function 49 | 50 | import tensorflow as tf 51 | 52 | _BATCH_NORM_DECAY = 0.997 53 | _BATCH_NORM_EPSILON = 1e-5 54 | DEFAULT_VERSION = 2 55 | 56 | 57 | ################################################################################ 58 | # Convenience functions for building the ResNet model. 59 | ################################################################################ 60 | def batch_norm(inputs, training, data_format): 61 | """Performs a batch normalization using a standard set of parameters.""" 62 | # We set fused=True for a significant performance boost. See 63 | # https://www.tensorflow.org/performance/performance_guide#common_fused_ops 64 | return tf.layers.batch_normalization( 65 | inputs=inputs, axis=1 if data_format == 'channels_first' else 3, 66 | momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON, center=True, 67 | scale=True, training=training, fused=True) 68 | 69 | 70 | def fixed_padding(inputs, kernel_size, data_format): 71 | """Pads the input along the spatial dimensions independently of input size. 72 | 73 | Args: 74 | inputs: A tensor of size [batch, channels, height_in, width_in] or 75 | [batch, height_in, width_in, channels] depending on data_format. 76 | kernel_size: The kernel to be used in the conv2d or max_pool2d operation. 77 | Should be a positive integer. 78 | data_format: The input format ('channels_last' or 'channels_first'). 79 | 80 | Returns: 81 | A tensor with the same format as the input with the data either intact 82 | (if kernel_size == 1) or padded (if kernel_size > 1). 83 | """ 84 | pad_total = kernel_size - 1 85 | pad_beg = pad_total // 2 86 | pad_end = pad_total - pad_beg 87 | 88 | if data_format == 'channels_first': 89 | padded_inputs = tf.pad(inputs, [[0, 0], [0, 0], 90 | [pad_beg, pad_end], [pad_beg, pad_end]]) 91 | else: 92 | padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], 93 | [pad_beg, pad_end], [0, 0]]) 94 | return padded_inputs 95 | 96 | 97 | def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format): 98 | """Strided 2-D convolution with explicit padding.""" 99 | # The padding is consistent and is based only on `kernel_size`, not on the 100 | # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). 101 | if strides > 1: 102 | inputs = fixed_padding(inputs, kernel_size, data_format) 103 | 104 | return tf.layers.conv2d( 105 | inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides, 106 | padding=('SAME' if strides == 1 else 'VALID'), use_bias=False, 107 | kernel_initializer=tf.variance_scaling_initializer(), 108 | data_format=data_format) 109 | 110 | 111 | ################################################################################ 112 | # ResNet block definitions. 113 | ################################################################################ 114 | def _building_block_v1(inputs, filters, training, projection_shortcut, strides, 115 | data_format): 116 | """A single block for ResNet v1, without a bottleneck. 117 | 118 | Convolution then batch normalization then ReLU as described by: 119 | Deep Residual Learning for Image Recognition 120 | https://arxiv.org/pdf/1512.03385.pdf 121 | by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015. 122 | 123 | Args: 124 | inputs: A tensor of size [batch, channels, height_in, width_in] or 125 | [batch, height_in, width_in, channels] depending on data_format. 126 | filters: The number of filters for the convolutions. 127 | training: A Boolean for whether the model is in training or inference 128 | mode. Needed for batch normalization. 129 | projection_shortcut: The function to use for projection shortcuts 130 | (typically a 1x1 convolution when downsampling the input). 131 | strides: The block's stride. If greater than 1, this block will ultimately 132 | downsample the input. 133 | data_format: The input format ('channels_last' or 'channels_first'). 134 | 135 | Returns: 136 | The output tensor of the block; shape should match inputs. 137 | """ 138 | shortcut = inputs 139 | 140 | if projection_shortcut is not None: 141 | shortcut = projection_shortcut(inputs) 142 | shortcut = batch_norm(inputs=shortcut, training=training, 143 | data_format=data_format) 144 | 145 | inputs = conv2d_fixed_padding( 146 | inputs=inputs, filters=filters, kernel_size=3, strides=strides, 147 | data_format=data_format) 148 | inputs = batch_norm(inputs, training, data_format) 149 | inputs = tf.nn.relu(inputs) 150 | 151 | inputs = conv2d_fixed_padding( 152 | inputs=inputs, filters=filters, kernel_size=3, strides=1, 153 | data_format=data_format) 154 | inputs = batch_norm(inputs, training, data_format) 155 | inputs += shortcut 156 | inputs = tf.nn.relu(inputs) 157 | 158 | return inputs 159 | 160 | 161 | def _building_block_v2(inputs, filters, training, projection_shortcut, strides, 162 | data_format): 163 | """A single block for ResNet v2, without a bottleneck. 164 | 165 | Batch normalization then ReLu then convolution as described by: 166 | Identity Mappings in Deep Residual Networks 167 | https://arxiv.org/pdf/1603.05027.pdf 168 | by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Jul 2016. 169 | 170 | Args: 171 | inputs: A tensor of size [batch, channels, height_in, width_in] or 172 | [batch, height_in, width_in, channels] depending on data_format. 173 | filters: The number of filters for the convolutions. 174 | training: A Boolean for whether the model is in training or inference 175 | mode. Needed for batch normalization. 176 | projection_shortcut: The function to use for projection shortcuts 177 | (typically a 1x1 convolution when downsampling the input). 178 | strides: The block's stride. If greater than 1, this block will ultimately 179 | downsample the input. 180 | data_format: The input format ('channels_last' or 'channels_first'). 181 | 182 | Returns: 183 | The output tensor of the block; shape should match inputs. 184 | """ 185 | shortcut = inputs 186 | inputs = batch_norm(inputs, training, data_format) 187 | inputs = tf.nn.relu(inputs) 188 | 189 | # The projection shortcut should come after the first batch norm and ReLU 190 | # since it performs a 1x1 convolution. 191 | if projection_shortcut is not None: 192 | shortcut = projection_shortcut(inputs) 193 | 194 | inputs = conv2d_fixed_padding( 195 | inputs=inputs, filters=filters, kernel_size=3, strides=strides, 196 | data_format=data_format) 197 | 198 | inputs = batch_norm(inputs, training, data_format) 199 | inputs = tf.nn.relu(inputs) 200 | inputs = conv2d_fixed_padding( 201 | inputs=inputs, filters=filters, kernel_size=3, strides=1, 202 | data_format=data_format) 203 | 204 | return inputs + shortcut 205 | 206 | 207 | def _bottleneck_block_v1(inputs, filters, training, projection_shortcut, 208 | strides, data_format): 209 | """A single block for ResNet v1, with a bottleneck. 210 | 211 | Similar to _building_block_v1(), except using the "bottleneck" blocks 212 | described in: 213 | Convolution then batch normalization then ReLU as described by: 214 | Deep Residual Learning for Image Recognition 215 | https://arxiv.org/pdf/1512.03385.pdf 216 | by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015. 217 | 218 | Args: 219 | inputs: A tensor of size [batch, channels, height_in, width_in] or 220 | [batch, height_in, width_in, channels] depending on data_format. 221 | filters: The number of filters for the convolutions. 222 | training: A Boolean for whether the model is in training or inference 223 | mode. Needed for batch normalization. 224 | projection_shortcut: The function to use for projection shortcuts 225 | (typically a 1x1 convolution when downsampling the input). 226 | strides: The block's stride. If greater than 1, this block will ultimately 227 | downsample the input. 228 | data_format: The input format ('channels_last' or 'channels_first'). 229 | 230 | Returns: 231 | The output tensor of the block; shape should match inputs. 232 | """ 233 | shortcut = inputs 234 | 235 | if projection_shortcut is not None: 236 | shortcut = projection_shortcut(inputs) 237 | shortcut = batch_norm(inputs=shortcut, training=training, 238 | data_format=data_format) 239 | 240 | inputs = conv2d_fixed_padding( 241 | inputs=inputs, filters=filters, kernel_size=1, strides=1, 242 | data_format=data_format) 243 | inputs = batch_norm(inputs, training, data_format) 244 | inputs = tf.nn.relu(inputs) 245 | 246 | inputs = conv2d_fixed_padding( 247 | inputs=inputs, filters=filters, kernel_size=3, strides=strides, 248 | data_format=data_format) 249 | inputs = batch_norm(inputs, training, data_format) 250 | inputs = tf.nn.relu(inputs) 251 | 252 | inputs = conv2d_fixed_padding( 253 | inputs=inputs, filters=4 * filters, kernel_size=1, strides=1, 254 | data_format=data_format) 255 | inputs = batch_norm(inputs, training, data_format) 256 | inputs += shortcut 257 | inputs = tf.nn.relu(inputs) 258 | 259 | return inputs 260 | 261 | 262 | def _bottleneck_block_v2(inputs, filters, training, projection_shortcut, 263 | strides, data_format): 264 | """A single block for ResNet v2, without a bottleneck. 265 | 266 | Similar to _building_block_v2(), except using the "bottleneck" blocks 267 | described in: 268 | Convolution then batch normalization then ReLU as described by: 269 | Deep Residual Learning for Image Recognition 270 | https://arxiv.org/pdf/1512.03385.pdf 271 | by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015. 272 | 273 | Adapted to the ordering conventions of: 274 | Batch normalization then ReLu then convolution as described by: 275 | Identity Mappings in Deep Residual Networks 276 | https://arxiv.org/pdf/1603.05027.pdf 277 | by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Jul 2016. 278 | 279 | Args: 280 | inputs: A tensor of size [batch, channels, height_in, width_in] or 281 | [batch, height_in, width_in, channels] depending on data_format. 282 | filters: The number of filters for the convolutions. 283 | training: A Boolean for whether the model is in training or inference 284 | mode. Needed for batch normalization. 285 | projection_shortcut: The function to use for projection shortcuts 286 | (typically a 1x1 convolution when downsampling the input). 287 | strides: The block's stride. If greater than 1, this block will ultimately 288 | downsample the input. 289 | data_format: The input format ('channels_last' or 'channels_first'). 290 | 291 | Returns: 292 | The output tensor of the block; shape should match inputs. 293 | """ 294 | shortcut = inputs 295 | inputs = batch_norm(inputs, training, data_format) 296 | inputs = tf.nn.relu(inputs) 297 | 298 | # The projection shortcut should come after the first batch norm and ReLU 299 | # since it performs a 1x1 convolution. 300 | if projection_shortcut is not None: 301 | shortcut = projection_shortcut(inputs) 302 | 303 | inputs = conv2d_fixed_padding( 304 | inputs=inputs, filters=filters, kernel_size=1, strides=1, 305 | data_format=data_format) 306 | 307 | inputs = batch_norm(inputs, training, data_format) 308 | inputs = tf.nn.relu(inputs) 309 | inputs = conv2d_fixed_padding( 310 | inputs=inputs, filters=filters, kernel_size=3, strides=strides, 311 | data_format=data_format) 312 | 313 | inputs = batch_norm(inputs, training, data_format) 314 | inputs = tf.nn.relu(inputs) 315 | inputs = conv2d_fixed_padding( 316 | inputs=inputs, filters=4 * filters, kernel_size=1, strides=1, 317 | data_format=data_format) 318 | 319 | return inputs + shortcut 320 | 321 | 322 | def block_layer(inputs, filters, bottleneck, block_fn, blocks, strides, 323 | training, name, data_format): 324 | """Creates one layer of blocks for the ResNet model. 325 | 326 | Args: 327 | inputs: A tensor of size [batch, channels, height_in, width_in] or 328 | [batch, height_in, width_in, channels] depending on data_format. 329 | filters: The number of filters for the first convolution of the layer. 330 | bottleneck: Is the block created a bottleneck block. 331 | block_fn: The block to use within the model, either `building_block` or 332 | `bottleneck_block`. 333 | blocks: The number of blocks contained in the layer. 334 | strides: The stride to use for the first convolution of the layer. If 335 | greater than 1, this layer will ultimately downsample the input. 336 | training: Either True or False, whether we are currently training the 337 | model. Needed for batch norm. 338 | name: A string name for the tensor output of the block layer. 339 | data_format: The input format ('channels_last' or 'channels_first'). 340 | 341 | Returns: 342 | The output tensor of the block layer. 343 | """ 344 | 345 | # Bottleneck blocks end with 4x the number of filters as they start with 346 | filters_out = filters * 4 if bottleneck else filters 347 | 348 | def projection_shortcut(inputs): 349 | return conv2d_fixed_padding( 350 | inputs=inputs, filters=filters_out, kernel_size=1, strides=strides, 351 | data_format=data_format) 352 | 353 | # Only the first block per block_layer uses projection_shortcut and strides 354 | inputs = block_fn(inputs, filters, training, projection_shortcut, strides, 355 | data_format) 356 | 357 | for _ in range(1, blocks): 358 | inputs = block_fn(inputs, filters, training, None, 1, data_format) 359 | 360 | return tf.identity(inputs, name) 361 | 362 | 363 | class ResNetX(object): 364 | """Base class for building the Resnet Model.""" 365 | 366 | def __init__(self, resnet_size, bottleneck, num_classes, num_filters, 367 | kernel_size, 368 | conv_stride, first_pool_size, first_pool_stride, 369 | second_pool_size, second_pool_stride, block_sizes, block_strides, 370 | final_size, version=DEFAULT_VERSION, data_format=None): 371 | """Creates a model for classifying an image. 372 | 373 | Args: 374 | resnet_size: A single integer for the size of the ResNet model. 375 | bottleneck: Use regular blocks or bottleneck blocks. 376 | num_classes: The number of classes used as labels. 377 | num_filters: The number of filters to use for the first block layer 378 | of the model. This number is then doubled for each subsequent block 379 | layer. 380 | kernel_size: The kernel size to use for convolution. 381 | conv_stride: stride size for the initial convolutional layer 382 | first_pool_size: Pool size to be used for the first pooling layer. 383 | If none, the first pooling layer is skipped. 384 | first_pool_stride: stride size for the first pooling layer. Not used 385 | if first_pool_size is None. 386 | second_pool_size: Pool size to be used for the second pooling layer. 387 | second_pool_stride: stride size for the final pooling layer 388 | block_sizes: A list containing n values, where n is the number of sets of 389 | block layers desired. Each value should be the number of blocks in the 390 | i-th set. 391 | block_strides: List of integers representing the desired stride size for 392 | each of the sets of block layers. Should be same length as block_sizes. 393 | final_size: The expected size of the model after the second pooling. 394 | version: Integer representing which version of the ResNet network to use. 395 | See README for details. Valid values: [1, 2] 396 | data_format: Input format ('channels_last', 'channels_first', or None). 397 | If set to None, the format is dependent on whether a GPU is available. 398 | 399 | Raises: 400 | ValueError: if invalid version is selected. 401 | """ 402 | self.resnet_size = resnet_size 403 | 404 | if not data_format: 405 | data_format = ( 406 | 'channels_first' if tf.test.is_built_with_cuda() else 'channels_last') 407 | 408 | self.resnet_version = version 409 | if version not in (1, 2): 410 | raise ValueError( 411 | 'Resnet version should be 1 or 2. See README for citations.') 412 | 413 | self.bottleneck = bottleneck 414 | if bottleneck: 415 | if version == 1: 416 | self.block_fn = _bottleneck_block_v1 417 | else: 418 | self.block_fn = _bottleneck_block_v2 419 | else: 420 | if version == 1: 421 | self.block_fn = _building_block_v1 422 | else: 423 | self.block_fn = _building_block_v2 424 | 425 | self.data_format = data_format 426 | self.num_classes = num_classes 427 | self.num_filters = num_filters 428 | self.kernel_size = kernel_size 429 | self.conv_stride = conv_stride 430 | self.first_pool_size = first_pool_size 431 | self.first_pool_stride = first_pool_stride 432 | self.second_pool_size = second_pool_size 433 | self.second_pool_stride = second_pool_stride 434 | self.block_sizes = block_sizes 435 | self.block_strides = block_strides 436 | self.final_size = final_size 437 | 438 | def __call__(self, inputs, training): 439 | """Add operations to classify a batch of input images. 440 | 441 | Args: 442 | inputs: A Tensor representing a batch of input images. 443 | training: A boolean. Set to True to add operations required only when 444 | training the classifier. 445 | 446 | Returns: 447 | A logits Tensor with shape [, self.num_classes]. 448 | """ 449 | 450 | if self.data_format == 'channels_first': 451 | # Convert the inputs from channels_last (NHWC) to channels_first (NCHW). 452 | # This provides a large performance boost on GPU. See 453 | # https://www.tensorflow.org/performance/performance_guide#data_formats 454 | inputs = tf.transpose(inputs, [0, 3, 1, 2]) 455 | 456 | inputs = conv2d_fixed_padding( 457 | inputs=inputs, filters=self.num_filters, kernel_size=self.kernel_size, 458 | strides=self.conv_stride, data_format=self.data_format) 459 | inputs = tf.identity(inputs, 'initial_conv') 460 | 461 | if self.first_pool_size: 462 | inputs = tf.layers.max_pooling2d( 463 | inputs=inputs, pool_size=self.first_pool_size, 464 | strides=self.first_pool_stride, padding='SAME', 465 | data_format=self.data_format) 466 | inputs = tf.identity(inputs, 'initial_max_pool') 467 | 468 | for i, num_blocks in enumerate(self.block_sizes): 469 | num_filters = self.num_filters * (2**i) 470 | inputs = block_layer( 471 | inputs=inputs, filters=num_filters, bottleneck=self.bottleneck, 472 | block_fn=self.block_fn, blocks=num_blocks, 473 | strides=self.block_strides[i], training=training, 474 | name='block_layer{}'.format(i + 1), data_format=self.data_format) 475 | 476 | inputs = batch_norm(inputs, training, self.data_format) 477 | inputs = tf.nn.relu(inputs) 478 | 479 | # The current top layer has shape 480 | # `batch_size x pool_size x pool_size x final_size`. 481 | # ResNet does an Average Pooling layer over pool_size, 482 | # but that is the same as doing a reduce_mean. We do a reduce_mean 483 | # here because it performs better than AveragePooling2D. 484 | axes = [2, 3] if self.data_format == 'channels_first' else [1, 2] 485 | inputs = tf.reduce_mean(inputs, axes, keepdims=True) 486 | inputs = tf.identity(inputs, 'final_reduce_mean') 487 | 488 | inputs = tf.reshape(inputs, [-1, self.final_size]) 489 | #inputs = tf.layers.dense(inputs=inputs, units=self.num_classes) 490 | #inputs = tf.identity(inputs, 'final_dense') 491 | 492 | return inputs 493 | -------------------------------------------------------------------------------- /resnet_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Thomas Mensink, University of Amsterdam, thomas.mensink@uva.nl 2 | # 3 | # Beloning to the DeepNCM repository 4 | # DeepNCM is proposed in 5 | # Samantha Guerriero, Barbara Caputo, and Thomas Mensink 6 | # DeepNCM: Deep Nearest Class Mean Classifiers 7 | # ICLR Workshop 2018 8 | # https://openreview.net/forum?id=rkPLZ4JPM 9 | # 10 | # This file (resnet_model) is based on resnet_model from the 11 | # TensorFlow Models Official ResNet library (release 1.8.0/1.7.0) 12 | # https://github.com/tensorflow/models/tree/master/official/resnet 13 | # 14 | # It contains the ResNet code to mimic th resnet_model making use of resnet_deepx. 15 | # ============================================================================== 16 | """Contains definitions for Residual Networks based on resnet_deepx 17 | """ 18 | 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | from __future__ import print_function 22 | 23 | import tensorflow as tf 24 | import resnet_deepx as rn 25 | 26 | DEFAULT_VERSION = rn.DEFAULT_VERSION 27 | 28 | class ResNetModel(rn.ResNetX): 29 | """Base class for building the Resnet Model.""" 30 | 31 | def __init__(self, resnet_size, bottleneck, num_classes, num_filters, 32 | kernel_size, 33 | conv_stride, first_pool_size, first_pool_stride, 34 | second_pool_size, second_pool_stride, block_sizes, block_strides, 35 | final_size, version=DEFAULT_VERSION, data_format=None): 36 | super(ResNetModel,self).__init__(resnet_size, bottleneck, num_classes, num_filters, 37 | kernel_size, 38 | conv_stride, first_pool_size, first_pool_stride, 39 | second_pool_size, second_pool_stride, block_sizes, block_strides, 40 | final_size, version, data_format) 41 | 42 | def __call__(self, inputs, training): 43 | inputs = super(ResNetModel,self).__call__(inputs, training) 44 | inputs = tf.layers.dense(inputs=inputs, units=self.num_classes) 45 | inputs = tf.identity(inputs, 'final_dense') 46 | 47 | return inputs 48 | -------------------------------------------------------------------------------- /resnet_ncm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Thomas Mensink, University of Amsterdam, thomas.mensink@uva.nl 2 | # 3 | # Beloning to the DeepNCM repository 4 | # DeepNCM is proposed in 5 | # Samantha Guerriero, Barbara Caputo, and Thomas Mensink 6 | # DeepNCM: Deep Nearest Class Mean Classifiers 7 | # ICLR Workshop 2018 8 | # https://openreview.net/forum?id=rkPLZ4JPM 9 | # 10 | # This file (resnet_ncm) has the code for different DeepNCM variantsis 11 | # including: 12 | # softmax (as baseline) 13 | # online means (onlinemean) 14 | # mean condensation (omreset) 15 | # decay mean (decaymean) 16 | # 17 | """Contains definitions for DeepNCM Residual Networks. 18 | 19 | DeepNCM is proposed in: 20 | [1] Samantha Guerriero, Barbara Caputo, and Thomas Mensink 21 | DeepNCM: Deep Nearest Class Mean Classifiers 22 | ICLR Workshop 2018 23 | https://openreview.net/forum?id=rkPLZ4JPM 24 | """ 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | import tensorflow as tf 31 | import resnet_deepx as rn 32 | 33 | RESNET_DEFAULT_VERSION = rn.DEFAULT_VERSION 34 | NCM_DEFAULT_METHOD = "omreset" 35 | NCM_DEFAULT_PARAMETER = 100 36 | NCM_DEFAULT = { 37 | 'method' : NCM_DEFAULT_METHOD, 38 | 'param' : NCM_DEFAULT_PARAMETER, 39 | } 40 | 41 | ############################################################## 42 | ### Code to compute batch counts and means 43 | ############################################################## 44 | def ncm_batch_counts(batch_x,batch_y,oneHot=True): 45 | if oneHot: 46 | by = tf.identity(batch_y) 47 | else: 48 | by = tf.one_hot(batch_y,depth=_TRAININGCLASSES,dtype=tf.float32) 49 | 50 | lBMn = tf.reduce_sum(by,axis=0,keepdims=True) 51 | lBM = tf.matmul(by,batch_x,transpose_a=True) 52 | lBM = tf.transpose(lBM) 53 | return lBM, lBMn 54 | 55 | def ncm_batch_means(batch_x,batch_y,oneHot=True): 56 | lBC, lBMn = ncm_batch_counts(batch_x,batch_y,oneHot=oneHot) 57 | lBMz = lBMn + tf.cast(tf.equal(lBMn,0),dtype=tf.float32) 58 | lBM = tf.transpose(lBC/lBMz) 59 | return lBM, lBMn 60 | 61 | def ncm_sq_dist_bt_norm(a,b): 62 | anorm = tf.reshape(tf.reduce_sum(tf.square(a), 1),[-1, 1]) 63 | bnorm = tf.reshape(tf.reduce_sum(tf.square(b), 0),[1, -1]) 64 | d = -2*tf.matmul(a,b,transpose_b=False)+anorm + bnorm 65 | return d, anorm 66 | 67 | def ncm_sq_dist_bt(a,b): 68 | d, bnorm = ncm_sq_dist_bt_norm(a,b) 69 | return d 70 | 71 | from tensorflow.python.framework import ops 72 | from tensorflow.python.ops import array_ops 73 | from tensorflow.python.ops import math_ops 74 | from tensorflow.python.ops import state_ops 75 | 76 | def save_batch_mean(batch_mean,batch_counts,decay_mean): 77 | condition = tf.cast(math_ops.greater(batch_counts,0),dtype=tf.float32) 78 | sbm1 = tf.multiply(batch_mean,condition) 79 | sbm2 = tf.multiply(tf.transpose(decay_mean),1-condition) 80 | return sbm1 + sbm2 81 | 82 | def _safe_div(numerator, denominator, name): 83 | """Divides two tensors element-wise, returning 0 if the denominator is <= 0. 84 | Args: 85 | numerator: A real `Tensor`. 86 | denominator: A real `Tensor`, with dtype matching `numerator`. 87 | name: Name for the returned op. 88 | Returns: 89 | 0 if `denominator` <= 0, else `numerator` / `denominator` 90 | 91 | Copied from TensorFlow Metrics 92 | """ 93 | t = math_ops.truediv(numerator, denominator) 94 | zero = array_ops.zeros_like(t, dtype=denominator.dtype) 95 | condition = math_ops.greater(denominator, zero) 96 | zero = math_ops.cast(zero, t.dtype) 97 | return array_ops.where(condition, t, zero, name=name) 98 | 99 | 100 | class NCMResModel(rn.ResNetX): 101 | """NCM ResNet class for building the DeepNCM Resnet Model.""" 102 | 103 | def __init__(self, resnet_size, bottleneck, num_classes, num_filters, 104 | kernel_size, 105 | conv_stride, first_pool_size, first_pool_stride, 106 | second_pool_size, second_pool_stride, block_sizes, block_strides, 107 | final_size, version=RESNET_DEFAULT_VERSION, ncm=NCM_DEFAULT, data_format=None): 108 | super(NCMResModel,self).__init__(resnet_size, bottleneck, num_classes, num_filters, 109 | kernel_size, 110 | conv_stride, first_pool_size, first_pool_stride, 111 | second_pool_size, second_pool_stride, block_sizes, block_strides, 112 | final_size, version, data_format) 113 | self.ncmmethod = ncm['method'].casefold() 114 | self.ncmparam = ncm['param'] 115 | 116 | if self.ncmmethod == "decaymean": 117 | assert 0 <= self.ncmparam < 1, "Decay means requires ncmparam between 0 and 1" 118 | 119 | self.iter = tf.get_variable("iter", [],dtype=tf.float32,trainable=False, initializer=tf.initializers.constant(0)) 120 | self.total = tf.get_variable("total",[final_size,num_classes],dtype=tf.float32,trainable=False, initializer=tf.initializers.constant(0)) 121 | self.count = tf.get_variable("count",[1,num_classes],dtype=tf.float32,trainable=False, initializer=tf.initializers.constant(0)) 122 | 123 | 124 | def get_mean_and_batch_mean(self,deep_x=None,labels=None): 125 | bmean,bcounts = ncm_batch_means(deep_x,labels) 126 | return _safe_div(self.total,self.count,name="deepmean"), bmean, bcounts 127 | 128 | def get_relative_mean_distance(self,deep_x=None,labels=None): 129 | bmean,bcounts = ncm_batch_means(deep_x,labels) 130 | dm,dmnorm = ncm_sq_dist_bt_norm(bmean,_safe_div(self.total, self.count,name='deepmean')) 131 | rdist = _safe_div(tf.diag_part(dm),dmnorm,name='relativedist') 132 | 133 | return rdist, tf.cast(tf.greater(bcounts,0),rdist.dtype) 134 | 135 | 136 | def get_reset_op(self,update_op): 137 | reset_total_op = state_ops.assign(self.total,update_op,use_locking=True) 138 | with ops.control_dependencies([update_op]): 139 | reset_count_op = state_ops.assign(self.count,array_ops.ones_like(self.count),use_locking=True) 140 | 141 | reset_op = _safe_div(reset_total_op,reset_count_op, 'reset_op') 142 | return reset_op 143 | 144 | def get_ncm_ops(self,deep_x=None,labels=None): 145 | iter_op = state_ops.assign_add(self.iter,tf.ones([])) 146 | 147 | if self.ncmmethod == "onlinemean" or self.ncmmethod == "omreset": 148 | batchsums,batchcnts = ncm_batch_counts(deep_x,labels) 149 | update_total_op = state_ops.assign_add(self.total, batchsums,use_locking=True) 150 | with ops.control_dependencies([batchsums]): 151 | update_count_op = state_ops.assign_add(self.count, batchcnts,use_locking=True) 152 | 153 | update_op = _safe_div(update_total_op, update_count_op, 'update_op') 154 | 155 | if self.ncmmethod == "decaymean": 156 | batchmeans,batchcnts = ncm_batch_means(deep_x,labels) 157 | batchcnts = tf.transpose(batchcnts) 158 | sbm = save_batch_mean(batchmeans,batchcnts,self.total) 159 | sbm = tf.transpose(sbm) 160 | ndm = self.ncmparam * self.total + (1-self.ncmparam) * sbm 161 | update_total_op = state_ops.assign(self.total,ndm, use_locking=True) 162 | with ops.control_dependencies([ndm]): 163 | update_count_op = state_ops.assign(self.count, array_ops.ones_like(self.count),use_locking=True) 164 | 165 | update_op = _safe_div(update_total_op, update_count_op, 'update_op') 166 | 167 | if self.ncmmethod == "onlinemean" or self.ncmmethod == "decaymean": 168 | ncm_op = tf.group(iter_op,update_op) 169 | elif self.ncmmethod == "omreset": 170 | ncm_op = tf.cond(tf.equal(tf.mod(self.iter,self.ncmparam),0), false_fn=lambda: tf.group(iter_op,update_op),true_fn=lambda: tf.group(iter_op,self.get_reset_op(update_op))) 171 | else: #SOFTMAX case 172 | ncm_op = iter_op 173 | 174 | return ncm_op 175 | 176 | def __call__(self, inputs, training): 177 | deepx = super(NCMResModel,self).__call__(inputs, training) 178 | deepx = tf.identity(deepx, 'deep-representation') 179 | 180 | deepmean = _safe_div(self.total, self.count, 'deepmean') 181 | deepmean = tf.identity(deepmean,name="DeepMeanValue") 182 | 183 | if self.ncmmethod == "softmax": 184 | logits = tf.layers.dense(inputs=deepx, units=self.num_classes) 185 | 186 | elif self.ncmmethod == "onlinemean" or self.ncmmethod == "omreset" or self.ncmmethod == "decaymean": 187 | logits = -ncm_sq_dist_bt(deepx,deepmean) 188 | 189 | logits = tf.identity(logits, 'logits') 190 | return logits, deepx, deepmean 191 | -------------------------------------------------------------------------------- /resnet_ncmequal.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Thomas Mensink, University of Amsterdam, thomas.mensink@uva.nl 2 | # 3 | # Beloning to the DeepNCM repository 4 | # DeepNCM is proposed in 5 | # Samantha Guerriero, Barbara Caputo, and Thomas Mensink 6 | # DeepNCM: Deep Nearest Class Mean Classifiers 7 | # ICLR Workshop 2018 8 | # https://openreview.net/forum?id=rkPLZ4JPM 9 | # 10 | # This file (resnet_ncm) has the code for different DeepNCM variantsis 11 | # including: 12 | # softmax (as baseline) 13 | # online means (onlinemean) 14 | # mean condensation (omreset) 15 | # decay mean (decaymean) 16 | # 17 | """Contains definitions for DeepNCM Residual Networks. 18 | 19 | DeepNCM is proposed in: 20 | [1] Samantha Guerriero, Barbara Caputo, and Thomas Mensink 21 | DeepNCM: Deep Nearest Class Mean Classifiers 22 | ICLR Workshop 2018 23 | https://openreview.net/forum?id=rkPLZ4JPM 24 | """ 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | import tensorflow as tf 31 | import resnet_deepx as rn 32 | 33 | RESNET_DEFAULT_VERSION = rn.DEFAULT_VERSION 34 | NCM_DEFAULT_METHOD = "omreset" 35 | NCM_DEFAULT_PARAMETER = 100 36 | NCM_DEFAULT = { 37 | 'method' : NCM_DEFAULT_METHOD, 38 | 'param' : NCM_DEFAULT_PARAMETER, 39 | } 40 | 41 | ############################################################## 42 | ### Code to compute batch counts and means 43 | ############################################################## 44 | def ncm_batch_counts(batch_x,batch_y,oneHot=True): 45 | if oneHot: 46 | by = tf.identity(batch_y) 47 | else: 48 | by = tf.one_hot(batch_y,depth=_TRAININGCLASSES,dtype=tf.float32) 49 | 50 | lBMn = tf.reduce_sum(by,axis=0,keepdims=True) 51 | lBM = tf.matmul(by,batch_x,transpose_a=True) 52 | lBM = tf.transpose(lBM) 53 | return lBM, lBMn 54 | 55 | def ncm_batch_means(batch_x,batch_y,oneHot=True): 56 | lBC, lBMn = ncm_batch_counts(batch_x,batch_y,oneHot=oneHot) 57 | lBMz = lBMn + tf.cast(tf.equal(lBMn,0),dtype=tf.float32) 58 | lBM = tf.transpose(lBC/lBMz) 59 | return lBM, lBMn 60 | 61 | def ncm_sq_dist_bt_norm(a,b): 62 | anorm = tf.reshape(tf.reduce_sum(tf.square(a), 1),[-1, 1]) 63 | bnorm = tf.reshape(tf.reduce_sum(tf.square(b), 0),[1, -1]) 64 | d = -2*tf.matmul(a,b,transpose_b=False)+anorm + bnorm 65 | return d, anorm 66 | 67 | def ncm_sq_dist_bt(a,b): 68 | d, bnorm = ncm_sq_dist_bt_norm(a,b) 69 | return d 70 | 71 | from tensorflow.python.framework import ops 72 | from tensorflow.python.ops import array_ops 73 | from tensorflow.python.ops import math_ops 74 | from tensorflow.python.ops import state_ops 75 | 76 | def save_batch_mean(batch_mean,batch_counts,decay_mean): 77 | condition = tf.cast(math_ops.greater(batch_counts,0),dtype=tf.float32) 78 | sbm1 = tf.multiply(batch_mean,condition) 79 | sbm2 = tf.multiply(tf.transpose(decay_mean),1-condition) 80 | return sbm1 + sbm2 81 | 82 | def _safe_div(numerator, denominator, name): 83 | """Divides two tensors element-wise, returning 0 if the denominator is <= 0. 84 | Args: 85 | numerator: A real `Tensor`. 86 | denominator: A real `Tensor`, with dtype matching `numerator`. 87 | name: Name for the returned op. 88 | Returns: 89 | 0 if `denominator` <= 0, else `numerator` / `denominator` 90 | 91 | Copied from TensorFlow Metrics 92 | """ 93 | t = math_ops.truediv(numerator, denominator) 94 | zero = array_ops.zeros_like(t, dtype=denominator.dtype) 95 | condition = math_ops.greater(denominator, zero) 96 | zero = math_ops.cast(zero, t.dtype) 97 | return array_ops.where(condition, t, zero, name=name) 98 | 99 | 100 | class NCMResModel(rn.ResNetX): 101 | """NCM ResNet class for building the DeepNCM Resnet Model.""" 102 | 103 | def __init__(self, resnet_size, bottleneck, num_classes, num_filters, 104 | kernel_size, 105 | conv_stride, first_pool_size, first_pool_stride, 106 | second_pool_size, second_pool_stride, block_sizes, block_strides, 107 | final_size, version=RESNET_DEFAULT_VERSION, ncm=NCM_DEFAULT, data_format=None): 108 | super(NCMResModel,self).__init__(resnet_size, bottleneck, num_classes, num_filters, 109 | kernel_size, 110 | conv_stride, first_pool_size, first_pool_stride, 111 | second_pool_size, second_pool_stride, block_sizes, block_strides, 112 | final_size, version, data_format) 113 | self.ncmmethod = ncm['method'].casefold() 114 | self.ncmparam = ncm['param'] 115 | 116 | if self.ncmmethod == "decaymean": 117 | assert 0 <= self.ncmparam < 1, "Decay means requires ncmparam between 0 and 1" 118 | 119 | self.iter = tf.get_variable("iter", [],dtype=tf.float32,trainable=False, initializer=tf.initializers.constant(0)) 120 | self.total = tf.get_variable("total",[num_classes,num_classes],dtype=tf.float32,trainable=False, initializer=tf.initializers.constant(0)) 121 | self.count = tf.get_variable("count",[1,num_classes],dtype=tf.float32,trainable=False, initializer=tf.initializers.constant(0)) 122 | 123 | 124 | def get_mean_and_batch_mean(self,deep_x=None,labels=None): 125 | bmean,bcounts = ncm_batch_means(deep_x,labels) 126 | return _safe_div(self.total,self.count,name="deepmean"), bmean, bcounts 127 | 128 | def get_relative_mean_distance(self,deep_x=None,labels=None): 129 | bmean,bcounts = ncm_batch_means(deep_x,labels) 130 | dm,dmnorm = ncm_sq_dist_bt_norm(bmean,_safe_div(self.total, self.count,name='deepmean')) 131 | rdist = _safe_div(tf.diag_part(dm),dmnorm,name='relativedist') 132 | 133 | return rdist, tf.cast(tf.greater(bcounts,0),rdist.dtype) 134 | 135 | 136 | def get_reset_op(self,update_op): 137 | reset_total_op = state_ops.assign(self.total,update_op,use_locking=True) 138 | with ops.control_dependencies([update_op]): 139 | reset_count_op = state_ops.assign(self.count,array_ops.ones_like(self.count),use_locking=True) 140 | 141 | reset_op = _safe_div(reset_total_op,reset_count_op, 'reset_op') 142 | return reset_op 143 | 144 | def get_ncm_ops(self,deep_x=None,labels=None): 145 | iter_op = state_ops.assign_add(self.iter,tf.ones([])) 146 | 147 | if self.ncmmethod == "onlinemean" or self.ncmmethod == "omreset": 148 | batchsums,batchcnts = ncm_batch_counts(deep_x,labels) 149 | update_total_op = state_ops.assign_add(self.total, batchsums,use_locking=True) 150 | with ops.control_dependencies([batchsums]): 151 | update_count_op = state_ops.assign_add(self.count, batchcnts,use_locking=True) 152 | 153 | update_op = _safe_div(update_total_op, update_count_op, 'update_op') 154 | 155 | if self.ncmmethod == "decaymean": 156 | batchmeans,batchcnts = ncm_batch_means(deep_x,labels) 157 | batchcnts = tf.transpose(batchcnts) 158 | sbm = save_batch_mean(batchmeans,batchcnts,self.total) 159 | sbm = tf.transpose(sbm) 160 | ndm = self.ncmparam * self.total + (1-self.ncmparam) * sbm 161 | update_total_op = state_ops.assign(self.total,ndm, use_locking=True) 162 | with ops.control_dependencies([ndm]): 163 | update_count_op = state_ops.assign(self.count, array_ops.ones_like(self.count),use_locking=True) 164 | 165 | update_op = _safe_div(update_total_op, update_count_op, 'update_op') 166 | 167 | if self.ncmmethod == "onlinemean" or self.ncmmethod == "decaymean": 168 | ncm_op = tf.group(iter_op,update_op) 169 | elif self.ncmmethod == "omreset": 170 | ncm_op = tf.cond(tf.equal(tf.mod(self.iter,self.ncmparam),0), false_fn=lambda: tf.group(iter_op,update_op),true_fn=lambda: tf.group(iter_op,self.get_reset_op(update_op))) 171 | else: #SOFTMAX case 172 | ncm_op = iter_op 173 | 174 | return ncm_op 175 | 176 | def __call__(self, inputs, training): 177 | deepx = super(NCMResModel,self).__call__(inputs, training) 178 | deepx = tf.identity(deepx, 'deep-representation') 179 | 180 | deepmean = _safe_div(self.total, self.count, 'deepmean') 181 | deepmean = tf.identity(deepmean,name="DeepMeanValue") 182 | 183 | if self.ncmmethod == "softmax": 184 | logits = tf.layers.dense(inputs=deepx, units=self.num_classes) 185 | 186 | elif self.ncmmethod == "onlinemean" or self.ncmmethod == "omreset" or self.ncmmethod == "decaymean": 187 | deepx = tf.layers.dense(inputs=deepx, units=self.num_classes) 188 | logits = -ncm_sq_dist_bt(deepx,deepmean) 189 | 190 | logits = tf.identity(logits, 'logits') 191 | return logits, deepx, deepmean 192 | --------------------------------------------------------------------------------