├── .gitignore ├── LICENSE ├── NIPS17-TernGrad-slides-v3.pdf ├── Poster_Wen_NIPS2017.pdf ├── README.md ├── slim ├── BUILD ├── README.md ├── __init__.py ├── datasets │ ├── __init__.py │ ├── cifar10.py │ ├── dataset_factory.py │ ├── dataset_utils.py │ ├── download_and_convert_cifar10.py │ ├── download_and_convert_flowers.py │ ├── download_and_convert_mnist.py │ ├── download_convert_and_shard_cifar10.py │ ├── flowers.py │ ├── imagenet.py │ └── mnist.py ├── deployment │ ├── __init__.py │ ├── model_deploy.py │ └── model_deploy_test.py ├── download_and_convert_data.py ├── eval_image_classifier.py ├── nets │ ├── __init__.py │ ├── alexnet.py │ ├── alexnet_test.py │ ├── cifarnet.py │ ├── inception.py │ ├── inception_resnet_v2.py │ ├── inception_resnet_v2_test.py │ ├── inception_utils.py │ ├── inception_v1.py │ ├── inception_v1_test.py │ ├── inception_v2.py │ ├── inception_v2_test.py │ ├── inception_v3.py │ ├── inception_v3_test.py │ ├── inception_v4.py │ ├── inception_v4_test.py │ ├── lenet.py │ ├── nets_factory.py │ ├── nets_factory_test.py │ ├── overfeat.py │ ├── overfeat_test.py │ ├── resnet_utils.py │ ├── resnet_v1.py │ ├── resnet_v1_test.py │ ├── resnet_v2.py │ ├── resnet_v2_test.py │ ├── vgg.py │ └── vgg_test.py ├── preprocessing │ ├── __init__.py │ ├── cifarnet_preprocessing.py │ ├── inception_preprocessing.py │ ├── lenet_preprocessing.py │ ├── preprocessing_factory.py │ └── vgg_preprocessing.py ├── scripts │ ├── finetune_inception_v1_on_flowers.sh │ ├── finetune_inception_v3_on_flowers.sh │ ├── finetune_resnet_v1_50_on_flowers.sh │ ├── train_cifarnet_on_cifar10.sh │ └── train_lenet_on_mnist.sh ├── slim_walkthrough.ipynb └── train_image_classifier.py └── terngrad ├── WORKSPACE ├── build_all.sh ├── config_dist.sh ├── deprecated ├── run_multi_gpus.sh ├── run_multi_gpus_cifar10_quick.sh ├── run_multi_gpus_googlenet.sh └── run_multi_gpus_vggnet.sh ├── g3doc └── inception_v3_architecture.png ├── inception ├── BUILD ├── __init__.py ├── bingrad_common.py ├── cifar10_data.py ├── cifar10_distributed_train.py ├── cifar10_eval.py ├── cifar10_train.py ├── data │ ├── __init__.py │ ├── build_image_data.py │ ├── build_imagenet_data.py │ ├── download_and_preprocess_flowers.sh │ ├── download_and_preprocess_flowers_mac.sh │ ├── download_and_preprocess_imagenet.sh │ ├── download_imagenet.sh │ ├── imagenet_2012_validation_synset_labels.txt │ ├── imagenet_lsvrc_2015_synsets.txt │ ├── imagenet_metadata.txt │ ├── preprocess_imagenet_validation_data.py │ └── process_bounding_boxes.py ├── dataset.py ├── flowers_data.py ├── flowers_eval.py ├── flowers_train.py ├── image_processing.py ├── imagenet_data.py ├── imagenet_distributed_train.py ├── imagenet_eval.py ├── imagenet_train.py ├── inception_distributed_train.py ├── inception_eval.py ├── inception_model.py ├── inception_train.py ├── lenet_preprocessing.py ├── mnist_data.py ├── mnist_eval.py ├── mnist_train.py ├── models.py ├── slim │ ├── BUILD │ ├── README.md │ ├── __init__.py │ ├── collections_test.py │ ├── inception_model.py │ ├── inception_test.py │ ├── inception_utils.py │ ├── inception_v1.py │ ├── losses.py │ ├── losses_test.py │ ├── models.py │ ├── ops.py │ ├── ops_test.py │ ├── scopes.py │ ├── scopes_test.py │ ├── slim.py │ ├── variables.py │ └── variables_test.py └── vgg_preprocessing.py ├── kill_local.sh ├── run_dist.sh ├── run_dist_cifar10.sh ├── run_multi_gpus_alexnet.sh ├── run_multi_gpus_cifar10.sh ├── run_multi_gpus_googlenet_quick.sh ├── run_multi_gpus_lenet.sh ├── run_single_ps_cifar10.sh ├── run_single_ps_imagenet.sh ├── run_single_worker_alexnet.sh ├── run_single_worker_cifarnet.sh ├── serialize_tensorboard.py ├── split_dataset.sh ├── stop_dist.sh └── test └── test_ternary_encoder_decoder.py /.gitignore: -------------------------------------------------------------------------------- 1 | terngrad/bazel-bin 2 | terngrad/bazel-ci_build-cache 3 | terngrad/bazel-genfiles 4 | terngrad/bazel-out 5 | terngrad/bazel-inception 6 | terngrad/bazel-terngrad 7 | terngrad/bazel-testlogs 8 | terngrad/bazel-tf 9 | /.idea 10 | */.idea 11 | *.pyc 12 | -------------------------------------------------------------------------------- /NIPS17-TernGrad-slides-v3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenwei202/terngrad/ec4f75e9a3a1e1c4b2e6494d830fbdfdd2e03ddc/NIPS17-TernGrad-slides-v3.pdf -------------------------------------------------------------------------------- /Poster_Wen_NIPS2017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenwei202/terngrad/ec4f75e9a3a1e1c4b2e6494d830fbdfdd2e03ddc/Poster_Wen_NIPS2017.pdf -------------------------------------------------------------------------------- /slim/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /slim/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /slim/datasets/cifar10.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Provides data for the Cifar10 dataset. 16 | 17 | The dataset scripts used to create the dataset can be found at: 18 | tensorflow/models/slim/datasets/download_and_convert_cifar10.py 19 | """ 20 | 21 | from __future__ import absolute_import 22 | from __future__ import division 23 | from __future__ import print_function 24 | 25 | import os 26 | import tensorflow as tf 27 | 28 | from datasets import dataset_utils 29 | 30 | slim = tf.contrib.slim 31 | 32 | _FILE_PATTERN = 'cifar10_%s.tfrecord' 33 | 34 | SPLITS_TO_SIZES = {'train': 50000, 'test': 10000} 35 | 36 | _NUM_CLASSES = 10 37 | 38 | _ITEMS_TO_DESCRIPTIONS = { 39 | 'image': 'A [32 x 32 x 3] color image.', 40 | 'label': 'A single integer between 0 and 9', 41 | } 42 | 43 | 44 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None): 45 | """Gets a dataset tuple with instructions for reading cifar10. 46 | 47 | Args: 48 | split_name: A train/test split name. 49 | dataset_dir: The base directory of the dataset sources. 50 | file_pattern: The file pattern to use when matching the dataset sources. 51 | It is assumed that the pattern contains a '%s' string so that the split 52 | name can be inserted. 53 | reader: The TensorFlow reader type. 54 | 55 | Returns: 56 | A `Dataset` namedtuple. 57 | 58 | Raises: 59 | ValueError: if `split_name` is not a valid train/test split. 60 | """ 61 | if split_name not in SPLITS_TO_SIZES: 62 | raise ValueError('split name %s was not recognized.' % split_name) 63 | 64 | if not file_pattern: 65 | file_pattern = _FILE_PATTERN 66 | file_pattern = os.path.join(dataset_dir, file_pattern % split_name) 67 | 68 | # Allowing None in the signature so that dataset_factory can use the default. 69 | if not reader: 70 | reader = tf.TFRecordReader 71 | 72 | keys_to_features = { 73 | 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 74 | 'image/format': tf.FixedLenFeature((), tf.string, default_value='png'), 75 | 'image/class/label': tf.FixedLenFeature( 76 | [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), 77 | } 78 | 79 | items_to_handlers = { 80 | 'image': slim.tfexample_decoder.Image(shape=[32, 32, 3]), 81 | 'label': slim.tfexample_decoder.Tensor('image/class/label'), 82 | } 83 | 84 | decoder = slim.tfexample_decoder.TFExampleDecoder( 85 | keys_to_features, items_to_handlers) 86 | 87 | labels_to_names = None 88 | if dataset_utils.has_labels(dataset_dir): 89 | labels_to_names = dataset_utils.read_label_file(dataset_dir) 90 | 91 | return slim.dataset.Dataset( 92 | data_sources=file_pattern, 93 | reader=reader, 94 | decoder=decoder, 95 | num_samples=SPLITS_TO_SIZES[split_name], 96 | items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, 97 | num_classes=_NUM_CLASSES, 98 | labels_to_names=labels_to_names) 99 | -------------------------------------------------------------------------------- /slim/datasets/dataset_factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """A factory-pattern class which returns classification image/label pairs.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from datasets import cifar10 22 | from datasets import flowers 23 | from datasets import imagenet 24 | from datasets import mnist 25 | 26 | datasets_map = { 27 | 'cifar10': cifar10, 28 | 'flowers': flowers, 29 | 'imagenet': imagenet, 30 | 'mnist': mnist, 31 | } 32 | 33 | 34 | def get_dataset(name, split_name, dataset_dir, file_pattern=None, reader=None): 35 | """Given a dataset name and a split_name returns a Dataset. 36 | 37 | Args: 38 | name: String, the name of the dataset. 39 | split_name: A train/test split name. 40 | dataset_dir: The directory where the dataset files are stored. 41 | file_pattern: The file pattern to use for matching the dataset source files. 42 | reader: The subclass of tf.ReaderBase. If left as `None`, then the default 43 | reader defined by each dataset is used. 44 | 45 | Returns: 46 | A `Dataset` class. 47 | 48 | Raises: 49 | ValueError: If the dataset `name` is unknown. 50 | """ 51 | if name not in datasets_map: 52 | raise ValueError('Name of dataset unknown %s' % name) 53 | return datasets_map[name].get_split( 54 | split_name, 55 | dataset_dir, 56 | file_pattern, 57 | reader) 58 | -------------------------------------------------------------------------------- /slim/datasets/dataset_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains utilities for downloading and converting datasets.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import os 21 | import sys 22 | import tarfile 23 | 24 | from six.moves import urllib 25 | import tensorflow as tf 26 | 27 | LABELS_FILENAME = 'labels.txt' 28 | 29 | 30 | def int64_feature(values): 31 | """Returns a TF-Feature of int64s. 32 | 33 | Args: 34 | values: A scalar or list of values. 35 | 36 | Returns: 37 | a TF-Feature. 38 | """ 39 | if not isinstance(values, (tuple, list)): 40 | values = [values] 41 | return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) 42 | 43 | def float_feature(value): 44 | """Wrapper for inserting float features into Example proto.""" 45 | if not isinstance(value, list): 46 | value = [value] 47 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 48 | 49 | def bytes_feature(values): 50 | """Returns a TF-Feature of bytes. 51 | 52 | Args: 53 | values: A string. 54 | 55 | Returns: 56 | a TF-Feature. 57 | """ 58 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values])) 59 | 60 | 61 | def image_to_tfexample(image_data, image_format, height, width, class_id, human_label='', channels=3): 62 | colorspace = b'RGB' 63 | #channels = 3 64 | xmin = [] 65 | ymin = [] 66 | xmax = [] 67 | ymax = [] 68 | return tf.train.Example(features=tf.train.Features(feature={ 69 | 'image/encoded': bytes_feature(image_data), 70 | 'image/format': bytes_feature(image_format), 71 | 'image/class/label': int64_feature(class_id), 72 | 'image/height': int64_feature(height), 73 | 'image/width': int64_feature(width), 74 | 'image/colorspace': bytes_feature(colorspace), 75 | 'image/channels': int64_feature(channels), 76 | 'image/object/bbox/xmin': float_feature(xmin), 77 | 'image/object/bbox/xmax': float_feature(xmax), 78 | 'image/object/bbox/ymin': float_feature(ymin), 79 | 'image/object/bbox/ymax': float_feature(ymax), 80 | 'image/class/text': bytes_feature(human_label), 81 | 82 | })) 83 | 84 | 85 | def download_and_uncompress_tarball(tarball_url, dataset_dir): 86 | """Downloads the `tarball_url` and uncompresses it locally. 87 | 88 | Args: 89 | tarball_url: The URL of a tarball file. 90 | dataset_dir: The directory where the temporary files are stored. 91 | """ 92 | filename = tarball_url.split('/')[-1] 93 | filepath = os.path.join(dataset_dir, filename) 94 | 95 | def _progress(count, block_size, total_size): 96 | sys.stdout.write('\r>> Downloading %s %.1f%%' % ( 97 | filename, float(count * block_size) / float(total_size) * 100.0)) 98 | sys.stdout.flush() 99 | filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress) 100 | print() 101 | statinfo = os.stat(filepath) 102 | print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') 103 | tarfile.open(filepath, 'r:gz').extractall(dataset_dir) 104 | 105 | 106 | def write_label_file(labels_to_class_names, dataset_dir, 107 | filename=LABELS_FILENAME): 108 | """Writes a file with the list of class names. 109 | 110 | Args: 111 | labels_to_class_names: A map of (integer) labels to class names. 112 | dataset_dir: The directory in which the labels file should be written. 113 | filename: The filename where the class names are written. 114 | """ 115 | labels_filename = os.path.join(dataset_dir, filename) 116 | with tf.gfile.Open(labels_filename, 'w') as f: 117 | for label in labels_to_class_names: 118 | class_name = labels_to_class_names[label] 119 | f.write('%d:%s\n' % (label, class_name)) 120 | 121 | 122 | def has_labels(dataset_dir, filename=LABELS_FILENAME): 123 | """Specifies whether or not the dataset directory contains a label map file. 124 | 125 | Args: 126 | dataset_dir: The directory in which the labels file is found. 127 | filename: The filename where the class names are written. 128 | 129 | Returns: 130 | `True` if the labels file exists and `False` otherwise. 131 | """ 132 | return tf.gfile.Exists(os.path.join(dataset_dir, filename)) 133 | 134 | 135 | def read_label_file(dataset_dir, filename=LABELS_FILENAME): 136 | """Reads the labels file and returns a mapping from ID to class name. 137 | 138 | Args: 139 | dataset_dir: The directory in which the labels file is found. 140 | filename: The filename where the class names are written. 141 | 142 | Returns: 143 | A map from a label (integer) to class name. 144 | """ 145 | labels_filename = os.path.join(dataset_dir, filename) 146 | with tf.gfile.Open(labels_filename, 'r') as f: 147 | lines = f.read().decode() 148 | lines = lines.split('\n') 149 | lines = filter(None, lines) 150 | 151 | labels_to_class_names = {} 152 | for line in lines: 153 | index = line.index(':') 154 | labels_to_class_names[int(line[:index])] = line[index+1:] 155 | return labels_to_class_names 156 | -------------------------------------------------------------------------------- /slim/datasets/flowers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Provides data for the flowers dataset. 16 | 17 | The dataset scripts used to create the dataset can be found at: 18 | tensorflow/models/slim/datasets/download_and_convert_flowers.py 19 | """ 20 | 21 | from __future__ import absolute_import 22 | from __future__ import division 23 | from __future__ import print_function 24 | 25 | import os 26 | import tensorflow as tf 27 | 28 | from datasets import dataset_utils 29 | 30 | slim = tf.contrib.slim 31 | 32 | _FILE_PATTERN = 'flowers_%s_*.tfrecord' 33 | 34 | SPLITS_TO_SIZES = {'train': 3320, 'validation': 350} 35 | 36 | _NUM_CLASSES = 5 37 | 38 | _ITEMS_TO_DESCRIPTIONS = { 39 | 'image': 'A color image of varying size.', 40 | 'label': 'A single integer between 0 and 4', 41 | } 42 | 43 | 44 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None): 45 | """Gets a dataset tuple with instructions for reading flowers. 46 | 47 | Args: 48 | split_name: A train/validation split name. 49 | dataset_dir: The base directory of the dataset sources. 50 | file_pattern: The file pattern to use when matching the dataset sources. 51 | It is assumed that the pattern contains a '%s' string so that the split 52 | name can be inserted. 53 | reader: The TensorFlow reader type. 54 | 55 | Returns: 56 | A `Dataset` namedtuple. 57 | 58 | Raises: 59 | ValueError: if `split_name` is not a valid train/validation split. 60 | """ 61 | if split_name not in SPLITS_TO_SIZES: 62 | raise ValueError('split name %s was not recognized.' % split_name) 63 | 64 | if not file_pattern: 65 | file_pattern = _FILE_PATTERN 66 | file_pattern = os.path.join(dataset_dir, file_pattern % split_name) 67 | 68 | # Allowing None in the signature so that dataset_factory can use the default. 69 | if reader is None: 70 | reader = tf.TFRecordReader 71 | 72 | keys_to_features = { 73 | 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 74 | 'image/format': tf.FixedLenFeature((), tf.string, default_value='png'), 75 | 'image/class/label': tf.FixedLenFeature( 76 | [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), 77 | } 78 | 79 | items_to_handlers = { 80 | 'image': slim.tfexample_decoder.Image(), 81 | 'label': slim.tfexample_decoder.Tensor('image/class/label'), 82 | } 83 | 84 | decoder = slim.tfexample_decoder.TFExampleDecoder( 85 | keys_to_features, items_to_handlers) 86 | 87 | labels_to_names = None 88 | if dataset_utils.has_labels(dataset_dir): 89 | labels_to_names = dataset_utils.read_label_file(dataset_dir) 90 | 91 | return slim.dataset.Dataset( 92 | data_sources=file_pattern, 93 | reader=reader, 94 | decoder=decoder, 95 | num_samples=SPLITS_TO_SIZES[split_name], 96 | items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, 97 | num_classes=_NUM_CLASSES, 98 | labels_to_names=labels_to_names) 99 | -------------------------------------------------------------------------------- /slim/datasets/mnist.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Provides data for the MNIST dataset. 16 | 17 | The dataset scripts used to create the dataset can be found at: 18 | tensorflow/models/slim/datasets/download_and_convert_mnist.py 19 | """ 20 | 21 | from __future__ import absolute_import 22 | from __future__ import division 23 | from __future__ import print_function 24 | 25 | import os 26 | import tensorflow as tf 27 | 28 | from datasets import dataset_utils 29 | 30 | slim = tf.contrib.slim 31 | 32 | _FILE_PATTERN = 'mnist_%s.tfrecord' 33 | 34 | _SPLITS_TO_SIZES = {'train': 60000, 'test': 10000} 35 | 36 | _NUM_CLASSES = 10 37 | 38 | _ITEMS_TO_DESCRIPTIONS = { 39 | 'image': 'A [28 x 28 x 1] grayscale image.', 40 | 'label': 'A single integer between 0 and 9', 41 | } 42 | 43 | 44 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None): 45 | """Gets a dataset tuple with instructions for reading MNIST. 46 | 47 | Args: 48 | split_name: A train/test split name. 49 | dataset_dir: The base directory of the dataset sources. 50 | file_pattern: The file pattern to use when matching the dataset sources. 51 | It is assumed that the pattern contains a '%s' string so that the split 52 | name can be inserted. 53 | reader: The TensorFlow reader type. 54 | 55 | Returns: 56 | A `Dataset` namedtuple. 57 | 58 | Raises: 59 | ValueError: if `split_name` is not a valid train/test split. 60 | """ 61 | if split_name not in _SPLITS_TO_SIZES: 62 | raise ValueError('split name %s was not recognized.' % split_name) 63 | 64 | if not file_pattern: 65 | file_pattern = _FILE_PATTERN 66 | file_pattern = os.path.join(dataset_dir, file_pattern % split_name) 67 | 68 | # Allowing None in the signature so that dataset_factory can use the default. 69 | if reader is None: 70 | reader = tf.TFRecordReader 71 | 72 | keys_to_features = { 73 | 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 74 | 'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'), 75 | 'image/class/label': tf.FixedLenFeature( 76 | [1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)), 77 | } 78 | 79 | items_to_handlers = { 80 | 'image': slim.tfexample_decoder.Image(shape=[28, 28, 1], channels=1), 81 | 'label': slim.tfexample_decoder.Tensor('image/class/label', shape=[]), 82 | } 83 | 84 | decoder = slim.tfexample_decoder.TFExampleDecoder( 85 | keys_to_features, items_to_handlers) 86 | 87 | labels_to_names = None 88 | if dataset_utils.has_labels(dataset_dir): 89 | labels_to_names = dataset_utils.read_label_file(dataset_dir) 90 | 91 | return slim.dataset.Dataset( 92 | data_sources=file_pattern, 93 | reader=reader, 94 | decoder=decoder, 95 | num_samples=_SPLITS_TO_SIZES[split_name], 96 | num_classes=_NUM_CLASSES, 97 | items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, 98 | labels_to_names=labels_to_names) 99 | -------------------------------------------------------------------------------- /slim/deployment/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /slim/download_and_convert_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | r"""Downloads and converts a particular dataset. 16 | 17 | Usage: 18 | ```shell 19 | 20 | $ python download_and_convert_data.py \ 21 | --dataset_name=mnist \ 22 | --dataset_dir=/tmp/mnist 23 | 24 | $ python download_and_convert_data.py \ 25 | --dataset_name=cifar10 \ 26 | --dataset_dir=/tmp/cifar10 27 | 28 | $ python download_and_convert_data.py \ 29 | --dataset_name=flowers \ 30 | --dataset_dir=/tmp/flowers 31 | ``` 32 | """ 33 | from __future__ import absolute_import 34 | from __future__ import division 35 | from __future__ import print_function 36 | 37 | import tensorflow as tf 38 | 39 | from datasets import download_and_convert_cifar10 40 | from datasets import download_convert_and_shard_cifar10 41 | from datasets import download_and_convert_flowers 42 | from datasets import download_and_convert_mnist 43 | 44 | FLAGS = tf.app.flags.FLAGS 45 | 46 | tf.app.flags.DEFINE_string( 47 | 'dataset_name', 48 | None, 49 | 'The name of the dataset to convert, one of "cifar10", "flowers", "mnist".') 50 | 51 | tf.app.flags.DEFINE_string( 52 | 'dataset_dir', 53 | None, 54 | 'The directory where the output TFRecords and temporary files are saved.') 55 | 56 | tf.app.flags.DEFINE_bool( 57 | 'shard', 58 | False, 59 | 'If break cifar10 train dataset to multiple TFRecord shards.') 60 | 61 | def main(_): 62 | if not FLAGS.dataset_name: 63 | raise ValueError('You must supply the dataset name with --dataset_name') 64 | if not FLAGS.dataset_dir: 65 | raise ValueError('You must supply the dataset directory with --dataset_dir') 66 | 67 | if FLAGS.dataset_name == 'cifar10': 68 | if FLAGS.shard: 69 | download_convert_and_shard_cifar10.run(FLAGS.dataset_dir) 70 | else: 71 | download_and_convert_cifar10.run(FLAGS.dataset_dir) 72 | elif FLAGS.dataset_name == 'flowers': 73 | download_and_convert_flowers.run(FLAGS.dataset_dir) 74 | elif FLAGS.dataset_name == 'mnist': 75 | download_and_convert_mnist.run(FLAGS.dataset_dir) 76 | else: 77 | raise ValueError( 78 | 'dataset_name [%s] was not recognized.' % FLAGS.dataset_dir) 79 | 80 | if __name__ == '__main__': 81 | tf.app.run() 82 | 83 | -------------------------------------------------------------------------------- /slim/nets/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /slim/nets/alexnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a model definition for AlexNet. 16 | 17 | This work was first described in: 18 | ImageNet Classification with Deep Convolutional Neural Networks 19 | Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton 20 | 21 | and later refined in: 22 | One weird trick for parallelizing convolutional neural networks 23 | Alex Krizhevsky, 2014 24 | 25 | Here we provide the implementation proposed in "One weird trick" and not 26 | "ImageNet Classification", as per the paper, the LRN layers have been removed. 27 | 28 | Usage: 29 | with slim.arg_scope(alexnet.alexnet_v2_arg_scope()): 30 | outputs, end_points = alexnet.alexnet_v2(inputs) 31 | 32 | @@alexnet_v2 33 | """ 34 | 35 | from __future__ import absolute_import 36 | from __future__ import division 37 | from __future__ import print_function 38 | 39 | import tensorflow as tf 40 | 41 | slim = tf.contrib.slim 42 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) 43 | 44 | 45 | def alexnet_v2_arg_scope(weight_decay=0.0005): 46 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 47 | activation_fn=tf.nn.relu, 48 | biases_initializer=tf.constant_initializer(0.1), 49 | weights_regularizer=slim.l2_regularizer(weight_decay)): 50 | with slim.arg_scope([slim.conv2d], padding='SAME'): 51 | with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: 52 | return arg_sc 53 | 54 | 55 | def alexnet_v2(inputs, 56 | num_classes=1000, 57 | is_training=True, 58 | dropout_keep_prob=0.5, 59 | spatial_squeeze=True, 60 | scope='alexnet_v2'): 61 | """AlexNet version 2. 62 | 63 | Described in: http://arxiv.org/pdf/1404.5997v2.pdf 64 | Parameters from: 65 | github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ 66 | layers-imagenet-1gpu.cfg 67 | 68 | Note: All the fully_connected layers have been transformed to conv2d layers. 69 | To use in classification mode, resize input to 224x224. To use in fully 70 | convolutional mode, set spatial_squeeze to false. 71 | The LRN layers have been removed and change the initializers from 72 | random_normal_initializer to xavier_initializer. 73 | 74 | Args: 75 | inputs: a tensor of size [batch_size, height, width, channels]. 76 | num_classes: number of predicted classes. 77 | is_training: whether or not the model is being trained. 78 | dropout_keep_prob: the probability that activations are kept in the dropout 79 | layers during training. 80 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 81 | outputs. Useful to remove unnecessary dimensions for classification. 82 | scope: Optional scope for the variables. 83 | 84 | Returns: 85 | the last op containing the log predictions and end_points dict. 86 | """ 87 | with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: 88 | end_points_collection = sc.name + '_end_points' 89 | # Collect outputs for conv2d, fully_connected and max_pool2d. 90 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], 91 | outputs_collections=[end_points_collection]): 92 | net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 93 | scope='conv1') 94 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool1') 95 | net = slim.conv2d(net, 192, [5, 5], scope='conv2') 96 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool2') 97 | net = slim.conv2d(net, 384, [3, 3], scope='conv3') 98 | net = slim.conv2d(net, 384, [3, 3], scope='conv4') 99 | net = slim.conv2d(net, 256, [3, 3], scope='conv5') 100 | net = slim.max_pool2d(net, [3, 3], 2, scope='pool5') 101 | 102 | # Use conv2d instead of fully_connected layers. 103 | with slim.arg_scope([slim.conv2d], 104 | weights_initializer=trunc_normal(0.005), 105 | biases_initializer=tf.constant_initializer(0.1)): 106 | net = slim.conv2d(net, 4096, [5, 5], padding='VALID', 107 | scope='fc6') 108 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 109 | scope='dropout6') 110 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7') 111 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 112 | scope='dropout7') 113 | net = slim.conv2d(net, num_classes, [1, 1], 114 | activation_fn=None, 115 | normalizer_fn=None, 116 | biases_initializer=tf.zeros_initializer(), 117 | scope='fc8') 118 | 119 | # Convert end_points_collection into a end_point dict. 120 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 121 | if spatial_squeeze: 122 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed') 123 | end_points[sc.name + '/fc8'] = net 124 | return net, end_points 125 | alexnet_v2.default_image_size = 224 126 | -------------------------------------------------------------------------------- /slim/nets/alexnet_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.nets.alexnet.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from nets import alexnet 23 | 24 | slim = tf.contrib.slim 25 | 26 | 27 | class AlexnetV2Test(tf.test.TestCase): 28 | 29 | def testBuild(self): 30 | batch_size = 5 31 | height, width = 224, 224 32 | num_classes = 1000 33 | with self.test_session(): 34 | inputs = tf.random_uniform((batch_size, height, width, 3)) 35 | logits, _ = alexnet.alexnet_v2(inputs, num_classes) 36 | self.assertEquals(logits.op.name, 'alexnet_v2/fc8/squeezed') 37 | self.assertListEqual(logits.get_shape().as_list(), 38 | [batch_size, num_classes]) 39 | 40 | def testFullyConvolutional(self): 41 | batch_size = 1 42 | height, width = 300, 400 43 | num_classes = 1000 44 | with self.test_session(): 45 | inputs = tf.random_uniform((batch_size, height, width, 3)) 46 | logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False) 47 | self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd') 48 | self.assertListEqual(logits.get_shape().as_list(), 49 | [batch_size, 4, 7, num_classes]) 50 | 51 | def testEndPoints(self): 52 | batch_size = 5 53 | height, width = 224, 224 54 | num_classes = 1000 55 | with self.test_session(): 56 | inputs = tf.random_uniform((batch_size, height, width, 3)) 57 | _, end_points = alexnet.alexnet_v2(inputs, num_classes) 58 | expected_names = ['alexnet_v2/conv1', 59 | 'alexnet_v2/pool1', 60 | 'alexnet_v2/conv2', 61 | 'alexnet_v2/pool2', 62 | 'alexnet_v2/conv3', 63 | 'alexnet_v2/conv4', 64 | 'alexnet_v2/conv5', 65 | 'alexnet_v2/pool5', 66 | 'alexnet_v2/fc6', 67 | 'alexnet_v2/fc7', 68 | 'alexnet_v2/fc8' 69 | ] 70 | self.assertSetEqual(set(end_points.keys()), set(expected_names)) 71 | 72 | def testModelVariables(self): 73 | batch_size = 5 74 | height, width = 224, 224 75 | num_classes = 1000 76 | with self.test_session(): 77 | inputs = tf.random_uniform((batch_size, height, width, 3)) 78 | alexnet.alexnet_v2(inputs, num_classes) 79 | expected_names = ['alexnet_v2/conv1/weights', 80 | 'alexnet_v2/conv1/biases', 81 | 'alexnet_v2/conv2/weights', 82 | 'alexnet_v2/conv2/biases', 83 | 'alexnet_v2/conv3/weights', 84 | 'alexnet_v2/conv3/biases', 85 | 'alexnet_v2/conv4/weights', 86 | 'alexnet_v2/conv4/biases', 87 | 'alexnet_v2/conv5/weights', 88 | 'alexnet_v2/conv5/biases', 89 | 'alexnet_v2/fc6/weights', 90 | 'alexnet_v2/fc6/biases', 91 | 'alexnet_v2/fc7/weights', 92 | 'alexnet_v2/fc7/biases', 93 | 'alexnet_v2/fc8/weights', 94 | 'alexnet_v2/fc8/biases', 95 | ] 96 | model_variables = [v.op.name for v in slim.get_model_variables()] 97 | self.assertSetEqual(set(model_variables), set(expected_names)) 98 | 99 | def testEvaluation(self): 100 | batch_size = 2 101 | height, width = 224, 224 102 | num_classes = 1000 103 | with self.test_session(): 104 | eval_inputs = tf.random_uniform((batch_size, height, width, 3)) 105 | logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False) 106 | self.assertListEqual(logits.get_shape().as_list(), 107 | [batch_size, num_classes]) 108 | predictions = tf.argmax(logits, 1) 109 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) 110 | 111 | def testTrainEvalWithReuse(self): 112 | train_batch_size = 2 113 | eval_batch_size = 1 114 | train_height, train_width = 224, 224 115 | eval_height, eval_width = 300, 400 116 | num_classes = 1000 117 | with self.test_session(): 118 | train_inputs = tf.random_uniform( 119 | (train_batch_size, train_height, train_width, 3)) 120 | logits, _ = alexnet.alexnet_v2(train_inputs) 121 | self.assertListEqual(logits.get_shape().as_list(), 122 | [train_batch_size, num_classes]) 123 | tf.get_variable_scope().reuse_variables() 124 | eval_inputs = tf.random_uniform( 125 | (eval_batch_size, eval_height, eval_width, 3)) 126 | logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False, 127 | spatial_squeeze=False) 128 | self.assertListEqual(logits.get_shape().as_list(), 129 | [eval_batch_size, 4, 7, num_classes]) 130 | logits = tf.reduce_mean(logits, [1, 2]) 131 | predictions = tf.argmax(logits, 1) 132 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) 133 | 134 | def testForward(self): 135 | batch_size = 1 136 | height, width = 224, 224 137 | with self.test_session() as sess: 138 | inputs = tf.random_uniform((batch_size, height, width, 3)) 139 | logits, _ = alexnet.alexnet_v2(inputs) 140 | sess.run(tf.global_variables_initializer()) 141 | output = sess.run(logits) 142 | self.assertTrue(output.any()) 143 | 144 | if __name__ == '__main__': 145 | tf.test.main() 146 | -------------------------------------------------------------------------------- /slim/nets/cifarnet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a variant of the CIFAR-10 model definition.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | slim = tf.contrib.slim 24 | 25 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(stddev=stddev) 26 | 27 | 28 | def cifarnet(images, num_classes=10, is_training=False, 29 | dropout_keep_prob=0.5, 30 | prediction_fn=slim.softmax, 31 | scope='CifarNet'): 32 | """Creates a variant of the CifarNet model. 33 | 34 | Note that since the output is a set of 'logits', the values fall in the 35 | interval of (-infinity, infinity). Consequently, to convert the outputs to a 36 | probability distribution over the characters, one will need to convert them 37 | using the softmax function: 38 | 39 | logits = cifarnet.cifarnet(images, is_training=False) 40 | probabilities = tf.nn.softmax(logits) 41 | predictions = tf.argmax(logits, 1) 42 | 43 | Args: 44 | images: A batch of `Tensors` of size [batch_size, height, width, channels]. 45 | num_classes: the number of classes in the dataset. 46 | is_training: specifies whether or not we're currently training the model. 47 | This variable will determine the behaviour of the dropout layer. 48 | dropout_keep_prob: the percentage of activation values that are retained. 49 | prediction_fn: a function to get predictions out of logits. 50 | scope: Optional variable_scope. 51 | 52 | Returns: 53 | logits: the pre-softmax activations, a tensor of size 54 | [batch_size, `num_classes`] 55 | end_points: a dictionary from components of the network to the corresponding 56 | activation. 57 | """ 58 | end_points = {} 59 | 60 | with tf.variable_scope(scope, 'CifarNet', [images, num_classes]): 61 | net = slim.conv2d(images, 64, [5, 5], scope='conv1') 62 | end_points['conv1'] = net 63 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') 64 | end_points['pool1'] = net 65 | net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1') 66 | net = slim.conv2d(net, 64, [5, 5], scope='conv2') 67 | end_points['conv2'] = net 68 | net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2') 69 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') 70 | end_points['pool2'] = net 71 | net = slim.flatten(net) 72 | end_points['Flatten'] = net 73 | net = slim.fully_connected(net, 384, scope='fc3') 74 | end_points['fc3'] = net 75 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 76 | scope='dropout3') 77 | net = slim.fully_connected(net, 192, scope='fc4') 78 | end_points['fc4'] = net 79 | logits = slim.fully_connected(net, num_classes, 80 | biases_initializer=tf.zeros_initializer(), 81 | weights_initializer=trunc_normal(1/192.0), 82 | weights_regularizer=None, 83 | activation_fn=None, 84 | scope='logits') 85 | 86 | end_points['Logits'] = logits 87 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions') 88 | 89 | return logits, end_points 90 | cifarnet.default_image_size = 32 91 | 92 | 93 | def cifarnet_arg_scope(weight_decay=0.004): 94 | """Defines the default cifarnet argument scope. 95 | 96 | Args: 97 | weight_decay: The weight decay to use for regularizing the model. 98 | 99 | Returns: 100 | An `arg_scope` to use for the inception v3 model. 101 | """ 102 | with slim.arg_scope( 103 | [slim.conv2d], 104 | weights_initializer=tf.truncated_normal_initializer(stddev=5e-2), 105 | activation_fn=tf.nn.relu): 106 | with slim.arg_scope( 107 | [slim.fully_connected], 108 | biases_initializer=tf.constant_initializer(0.1), 109 | weights_initializer=trunc_normal(0.04), 110 | weights_regularizer=slim.l2_regularizer(weight_decay), 111 | activation_fn=tf.nn.relu) as sc: 112 | return sc 113 | -------------------------------------------------------------------------------- /slim/nets/inception.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Brings all inception models under one namespace.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | # pylint: disable=unused-import 22 | from nets.inception_resnet_v2 import inception_resnet_v2 23 | from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope 24 | from nets.inception_v1 import inception_v1 25 | from nets.inception_v1 import inception_v1_arg_scope 26 | from nets.inception_v1 import inception_v1_base 27 | from nets.inception_v2 import inception_v2 28 | from nets.inception_v2 import inception_v2_arg_scope 29 | from nets.inception_v2 import inception_v2_base 30 | from nets.inception_v3 import inception_v3 31 | from nets.inception_v3 import inception_v3_arg_scope 32 | from nets.inception_v3 import inception_v3_base 33 | from nets.inception_v4 import inception_v4 34 | from nets.inception_v4 import inception_v4_arg_scope 35 | from nets.inception_v4 import inception_v4_base 36 | # pylint: enable=unused-import 37 | -------------------------------------------------------------------------------- /slim/nets/inception_resnet_v2_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.inception_resnet_v2.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from nets import inception 23 | 24 | 25 | class InceptionTest(tf.test.TestCase): 26 | 27 | def testBuildLogits(self): 28 | batch_size = 5 29 | height, width = 299, 299 30 | num_classes = 1000 31 | with self.test_session(): 32 | inputs = tf.random_uniform((batch_size, height, width, 3)) 33 | logits, _ = inception.inception_resnet_v2(inputs, num_classes) 34 | self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits')) 35 | self.assertListEqual(logits.get_shape().as_list(), 36 | [batch_size, num_classes]) 37 | 38 | def testBuildEndPoints(self): 39 | batch_size = 5 40 | height, width = 299, 299 41 | num_classes = 1000 42 | with self.test_session(): 43 | inputs = tf.random_uniform((batch_size, height, width, 3)) 44 | _, end_points = inception.inception_resnet_v2(inputs, num_classes) 45 | self.assertTrue('Logits' in end_points) 46 | logits = end_points['Logits'] 47 | self.assertListEqual(logits.get_shape().as_list(), 48 | [batch_size, num_classes]) 49 | self.assertTrue('AuxLogits' in end_points) 50 | aux_logits = end_points['AuxLogits'] 51 | self.assertListEqual(aux_logits.get_shape().as_list(), 52 | [batch_size, num_classes]) 53 | pre_pool = end_points['PrePool'] 54 | self.assertListEqual(pre_pool.get_shape().as_list(), 55 | [batch_size, 8, 8, 1536]) 56 | 57 | def testVariablesSetDevice(self): 58 | batch_size = 5 59 | height, width = 299, 299 60 | num_classes = 1000 61 | with self.test_session(): 62 | inputs = tf.random_uniform((batch_size, height, width, 3)) 63 | # Force all Variables to reside on the device. 64 | with tf.variable_scope('on_cpu'), tf.device('/cpu:0'): 65 | inception.inception_resnet_v2(inputs, num_classes) 66 | with tf.variable_scope('on_gpu'), tf.device('/gpu:0'): 67 | inception.inception_resnet_v2(inputs, num_classes) 68 | for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_cpu'): 69 | self.assertDeviceEqual(v.device, '/cpu:0') 70 | for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_gpu'): 71 | self.assertDeviceEqual(v.device, '/gpu:0') 72 | 73 | def testHalfSizeImages(self): 74 | batch_size = 5 75 | height, width = 150, 150 76 | num_classes = 1000 77 | with self.test_session(): 78 | inputs = tf.random_uniform((batch_size, height, width, 3)) 79 | logits, end_points = inception.inception_resnet_v2(inputs, num_classes) 80 | self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits')) 81 | self.assertListEqual(logits.get_shape().as_list(), 82 | [batch_size, num_classes]) 83 | pre_pool = end_points['PrePool'] 84 | self.assertListEqual(pre_pool.get_shape().as_list(), 85 | [batch_size, 3, 3, 1536]) 86 | 87 | def testUnknownBatchSize(self): 88 | batch_size = 1 89 | height, width = 299, 299 90 | num_classes = 1000 91 | with self.test_session() as sess: 92 | inputs = tf.placeholder(tf.float32, (None, height, width, 3)) 93 | logits, _ = inception.inception_resnet_v2(inputs, num_classes) 94 | self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits')) 95 | self.assertListEqual(logits.get_shape().as_list(), 96 | [None, num_classes]) 97 | images = tf.random_uniform((batch_size, height, width, 3)) 98 | sess.run(tf.global_variables_initializer()) 99 | output = sess.run(logits, {inputs: images.eval()}) 100 | self.assertEquals(output.shape, (batch_size, num_classes)) 101 | 102 | def testEvaluation(self): 103 | batch_size = 2 104 | height, width = 299, 299 105 | num_classes = 1000 106 | with self.test_session() as sess: 107 | eval_inputs = tf.random_uniform((batch_size, height, width, 3)) 108 | logits, _ = inception.inception_resnet_v2(eval_inputs, 109 | num_classes, 110 | is_training=False) 111 | predictions = tf.argmax(logits, 1) 112 | sess.run(tf.global_variables_initializer()) 113 | output = sess.run(predictions) 114 | self.assertEquals(output.shape, (batch_size,)) 115 | 116 | def testTrainEvalWithReuse(self): 117 | train_batch_size = 5 118 | eval_batch_size = 2 119 | height, width = 150, 150 120 | num_classes = 1000 121 | with self.test_session() as sess: 122 | train_inputs = tf.random_uniform((train_batch_size, height, width, 3)) 123 | inception.inception_resnet_v2(train_inputs, num_classes) 124 | eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3)) 125 | logits, _ = inception.inception_resnet_v2(eval_inputs, 126 | num_classes, 127 | is_training=False, 128 | reuse=True) 129 | predictions = tf.argmax(logits, 1) 130 | sess.run(tf.global_variables_initializer()) 131 | output = sess.run(predictions) 132 | self.assertEquals(output.shape, (eval_batch_size,)) 133 | 134 | 135 | if __name__ == '__main__': 136 | tf.test.main() 137 | -------------------------------------------------------------------------------- /slim/nets/inception_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains common code shared by all inception models. 16 | 17 | Usage of arg scope: 18 | with slim.arg_scope(inception_arg_scope()): 19 | logits, end_points = inception.inception_v3(images, num_classes, 20 | is_training=is_training) 21 | 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import tensorflow as tf 28 | 29 | slim = tf.contrib.slim 30 | 31 | 32 | def inception_arg_scope(weight_decay=0.00004, 33 | use_batch_norm=True, 34 | batch_norm_decay=0.9997, 35 | batch_norm_epsilon=0.001): 36 | """Defines the default arg scope for inception models. 37 | 38 | Args: 39 | weight_decay: The weight decay to use for regularizing the model. 40 | use_batch_norm: "If `True`, batch_norm is applied after each convolution. 41 | batch_norm_decay: Decay for batch norm moving average. 42 | batch_norm_epsilon: Small float added to variance to avoid dividing by zero 43 | in batch norm. 44 | 45 | Returns: 46 | An `arg_scope` to use for the inception models. 47 | """ 48 | batch_norm_params = { 49 | # Decay for the moving averages. 50 | 'decay': batch_norm_decay, 51 | # epsilon to prevent 0s in variance. 52 | 'epsilon': batch_norm_epsilon, 53 | # collection containing update_ops. 54 | 'updates_collections': tf.GraphKeys.UPDATE_OPS, 55 | } 56 | if use_batch_norm: 57 | normalizer_fn = slim.batch_norm 58 | normalizer_params = batch_norm_params 59 | else: 60 | normalizer_fn = None 61 | normalizer_params = {} 62 | # Set weight_decay for weights in Conv and FC layers. 63 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 64 | weights_regularizer=slim.l2_regularizer(weight_decay)): 65 | with slim.arg_scope( 66 | [slim.conv2d], 67 | weights_initializer=slim.variance_scaling_initializer(), 68 | activation_fn=tf.nn.relu, 69 | normalizer_fn=normalizer_fn, 70 | normalizer_params=normalizer_params) as sc: 71 | return sc 72 | -------------------------------------------------------------------------------- /slim/nets/lenet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a variant of the LeNet model definition.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | slim = tf.contrib.slim 24 | 25 | 26 | def lenet(images, num_classes=10, is_training=False, 27 | dropout_keep_prob=0.5, 28 | prediction_fn=slim.softmax, 29 | scope='LeNet'): 30 | """Creates a variant of the LeNet model. 31 | 32 | Note that since the output is a set of 'logits', the values fall in the 33 | interval of (-infinity, infinity). Consequently, to convert the outputs to a 34 | probability distribution over the characters, one will need to convert them 35 | using the softmax function: 36 | 37 | logits = lenet.lenet(images, is_training=False) 38 | probabilities = tf.nn.softmax(logits) 39 | predictions = tf.argmax(logits, 1) 40 | 41 | Args: 42 | images: A batch of `Tensors` of size [batch_size, height, width, channels]. 43 | num_classes: the number of classes in the dataset. 44 | is_training: specifies whether or not we're currently training the model. 45 | This variable will determine the behaviour of the dropout layer. 46 | dropout_keep_prob: the percentage of activation values that are retained. 47 | prediction_fn: a function to get predictions out of logits. 48 | scope: Optional variable_scope. 49 | 50 | Returns: 51 | logits: the pre-softmax activations, a tensor of size 52 | [batch_size, `num_classes`] 53 | end_points: a dictionary from components of the network to the corresponding 54 | activation. 55 | """ 56 | end_points = {} 57 | 58 | with tf.variable_scope(scope, 'LeNet', [images, num_classes]): 59 | net = slim.conv2d(images, 32, [5, 5], scope='conv1') 60 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') 61 | net = slim.conv2d(net, 64, [5, 5], scope='conv2') 62 | net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') 63 | net = slim.flatten(net) 64 | end_points['Flatten'] = net 65 | 66 | net = slim.fully_connected(net, 1024, scope='fc3') 67 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 68 | scope='dropout3') 69 | logits = slim.fully_connected(net, num_classes, activation_fn=None, 70 | scope='fc4') 71 | 72 | end_points['Logits'] = logits 73 | end_points['Predictions'] = prediction_fn(logits, scope='Predictions') 74 | 75 | return logits, end_points 76 | lenet.default_image_size = 28 77 | 78 | 79 | def lenet_arg_scope(weight_decay=0.0): 80 | """Defines the default lenet argument scope. 81 | 82 | Args: 83 | weight_decay: The weight decay to use for regularizing the model. 84 | 85 | Returns: 86 | An `arg_scope` to use for the inception v3 model. 87 | """ 88 | with slim.arg_scope( 89 | [slim.conv2d, slim.fully_connected], 90 | weights_regularizer=slim.l2_regularizer(weight_decay), 91 | weights_initializer=tf.truncated_normal_initializer(stddev=0.1), 92 | activation_fn=tf.nn.relu) as sc: 93 | return sc 94 | -------------------------------------------------------------------------------- /slim/nets/nets_factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a factory for building various models.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | import functools 21 | 22 | import tensorflow as tf 23 | 24 | from nets import alexnet 25 | from nets import cifarnet 26 | from nets import inception 27 | from nets import lenet 28 | from nets import overfeat 29 | from nets import resnet_v1 30 | from nets import resnet_v2 31 | from nets import vgg 32 | 33 | slim = tf.contrib.slim 34 | 35 | networks_map = {'alexnet_v2': alexnet.alexnet_v2, 36 | 'cifarnet': cifarnet.cifarnet, 37 | 'overfeat': overfeat.overfeat, 38 | 'vgg_a': vgg.vgg_a, 39 | 'vgg_16': vgg.vgg_16, 40 | 'vgg_19': vgg.vgg_19, 41 | 'inception_v1': inception.inception_v1, 42 | 'inception_v2': inception.inception_v2, 43 | 'inception_v3': inception.inception_v3, 44 | 'inception_v4': inception.inception_v4, 45 | 'inception_resnet_v2': inception.inception_resnet_v2, 46 | 'lenet': lenet.lenet, 47 | 'resnet_v1_50': resnet_v1.resnet_v1_50, 48 | 'resnet_v1_101': resnet_v1.resnet_v1_101, 49 | 'resnet_v1_152': resnet_v1.resnet_v1_152, 50 | 'resnet_v1_200': resnet_v1.resnet_v1_200, 51 | 'resnet_v2_50': resnet_v2.resnet_v2_50, 52 | 'resnet_v2_101': resnet_v2.resnet_v2_101, 53 | 'resnet_v2_152': resnet_v2.resnet_v2_152, 54 | 'resnet_v2_200': resnet_v2.resnet_v2_200, 55 | } 56 | 57 | arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope, 58 | 'cifarnet': cifarnet.cifarnet_arg_scope, 59 | 'overfeat': overfeat.overfeat_arg_scope, 60 | 'vgg_a': vgg.vgg_arg_scope, 61 | 'vgg_16': vgg.vgg_arg_scope, 62 | 'vgg_19': vgg.vgg_arg_scope, 63 | 'inception_v1': inception.inception_v3_arg_scope, 64 | 'inception_v2': inception.inception_v3_arg_scope, 65 | 'inception_v3': inception.inception_v3_arg_scope, 66 | 'inception_v4': inception.inception_v4_arg_scope, 67 | 'inception_resnet_v2': 68 | inception.inception_resnet_v2_arg_scope, 69 | 'lenet': lenet.lenet_arg_scope, 70 | 'resnet_v1_50': resnet_v1.resnet_arg_scope, 71 | 'resnet_v1_101': resnet_v1.resnet_arg_scope, 72 | 'resnet_v1_152': resnet_v1.resnet_arg_scope, 73 | 'resnet_v1_200': resnet_v1.resnet_arg_scope, 74 | 'resnet_v2_50': resnet_v2.resnet_arg_scope, 75 | 'resnet_v2_101': resnet_v2.resnet_arg_scope, 76 | 'resnet_v2_152': resnet_v2.resnet_arg_scope, 77 | 'resnet_v2_200': resnet_v2.resnet_arg_scope, 78 | } 79 | 80 | 81 | def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False): 82 | """Returns a network_fn such as `logits, end_points = network_fn(images)`. 83 | 84 | Args: 85 | name: The name of the network. 86 | num_classes: The number of classes to use for classification. 87 | weight_decay: The l2 coefficient for the model weights. 88 | is_training: `True` if the model is being used for training and `False` 89 | otherwise. 90 | 91 | Returns: 92 | network_fn: A function that applies the model to a batch of images. It has 93 | the following signature: 94 | logits, end_points = network_fn(images) 95 | Raises: 96 | ValueError: If network `name` is not recognized. 97 | """ 98 | if name not in networks_map: 99 | raise ValueError('Name of network unknown %s' % name) 100 | arg_scope = arg_scopes_map[name](weight_decay=weight_decay) 101 | func = networks_map[name] 102 | @functools.wraps(func) 103 | def network_fn(images): 104 | with slim.arg_scope(arg_scope): 105 | return func(images, num_classes, is_training=is_training) 106 | if hasattr(func, 'default_image_size'): 107 | network_fn.default_image_size = func.default_image_size 108 | 109 | return network_fn 110 | -------------------------------------------------------------------------------- /slim/nets/nets_factory_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for slim.inception.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | 23 | import tensorflow as tf 24 | 25 | from nets import nets_factory 26 | 27 | 28 | class NetworksTest(tf.test.TestCase): 29 | 30 | def testGetNetworkFn(self): 31 | batch_size = 5 32 | num_classes = 1000 33 | for net in nets_factory.networks_map: 34 | with self.test_session(): 35 | net_fn = nets_factory.get_network_fn(net, num_classes) 36 | # Most networks use 224 as their default_image_size 37 | image_size = getattr(net_fn, 'default_image_size', 224) 38 | inputs = tf.random_uniform((batch_size, image_size, image_size, 3)) 39 | logits, end_points = net_fn(inputs) 40 | self.assertTrue(isinstance(logits, tf.Tensor)) 41 | self.assertTrue(isinstance(end_points, dict)) 42 | self.assertEqual(logits.get_shape().as_list()[0], batch_size) 43 | self.assertEqual(logits.get_shape().as_list()[-1], num_classes) 44 | 45 | if __name__ == '__main__': 46 | tf.test.main() 47 | -------------------------------------------------------------------------------- /slim/nets/overfeat.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains the model definition for the OverFeat network. 16 | 17 | The definition for the network was obtained from: 18 | OverFeat: Integrated Recognition, Localization and Detection using 19 | Convolutional Networks 20 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and 21 | Yann LeCun, 2014 22 | http://arxiv.org/abs/1312.6229 23 | 24 | Usage: 25 | with slim.arg_scope(overfeat.overfeat_arg_scope()): 26 | outputs, end_points = overfeat.overfeat(inputs) 27 | 28 | @@overfeat 29 | """ 30 | from __future__ import absolute_import 31 | from __future__ import division 32 | from __future__ import print_function 33 | 34 | import tensorflow as tf 35 | 36 | slim = tf.contrib.slim 37 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) 38 | 39 | 40 | def overfeat_arg_scope(weight_decay=0.0005): 41 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 42 | activation_fn=tf.nn.relu, 43 | weights_regularizer=slim.l2_regularizer(weight_decay), 44 | biases_initializer=tf.zeros_initializer()): 45 | with slim.arg_scope([slim.conv2d], padding='SAME'): 46 | with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: 47 | return arg_sc 48 | 49 | 50 | def overfeat(inputs, 51 | num_classes=1000, 52 | is_training=True, 53 | dropout_keep_prob=0.5, 54 | spatial_squeeze=True, 55 | scope='overfeat'): 56 | """Contains the model definition for the OverFeat network. 57 | 58 | The definition for the network was obtained from: 59 | OverFeat: Integrated Recognition, Localization and Detection using 60 | Convolutional Networks 61 | Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and 62 | Yann LeCun, 2014 63 | http://arxiv.org/abs/1312.6229 64 | 65 | Note: All the fully_connected layers have been transformed to conv2d layers. 66 | To use in classification mode, resize input to 231x231. To use in fully 67 | convolutional mode, set spatial_squeeze to false. 68 | 69 | Args: 70 | inputs: a tensor of size [batch_size, height, width, channels]. 71 | num_classes: number of predicted classes. 72 | is_training: whether or not the model is being trained. 73 | dropout_keep_prob: the probability that activations are kept in the dropout 74 | layers during training. 75 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 76 | outputs. Useful to remove unnecessary dimensions for classification. 77 | scope: Optional scope for the variables. 78 | 79 | Returns: 80 | the last op containing the log predictions and end_points dict. 81 | 82 | """ 83 | with tf.variable_scope(scope, 'overfeat', [inputs]) as sc: 84 | end_points_collection = sc.name + '_end_points' 85 | # Collect outputs for conv2d, fully_connected and max_pool2d 86 | with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], 87 | outputs_collections=end_points_collection): 88 | net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 89 | scope='conv1') 90 | net = slim.max_pool2d(net, [2, 2], scope='pool1') 91 | net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2') 92 | net = slim.max_pool2d(net, [2, 2], scope='pool2') 93 | net = slim.conv2d(net, 512, [3, 3], scope='conv3') 94 | net = slim.conv2d(net, 1024, [3, 3], scope='conv4') 95 | net = slim.conv2d(net, 1024, [3, 3], scope='conv5') 96 | net = slim.max_pool2d(net, [2, 2], scope='pool5') 97 | with slim.arg_scope([slim.conv2d], 98 | weights_initializer=trunc_normal(0.005), 99 | biases_initializer=tf.constant_initializer(0.1)): 100 | # Use conv2d instead of fully_connected layers. 101 | net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6') 102 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 103 | scope='dropout6') 104 | net = slim.conv2d(net, 4096, [1, 1], scope='fc7') 105 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 106 | scope='dropout7') 107 | net = slim.conv2d(net, num_classes, [1, 1], 108 | activation_fn=None, 109 | normalizer_fn=None, 110 | biases_initializer=tf.zeros_initializer(), 111 | scope='fc8') 112 | # Convert end_points_collection into a end_point dict. 113 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 114 | if spatial_squeeze: 115 | net = tf.squeeze(net, [1, 2], name='fc8/squeezed') 116 | end_points[sc.name + '/fc8'] = net 117 | return net, end_points 118 | overfeat.default_image_size = 231 119 | -------------------------------------------------------------------------------- /slim/nets/overfeat_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.nets.overfeat.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from nets import overfeat 23 | 24 | slim = tf.contrib.slim 25 | 26 | 27 | class OverFeatTest(tf.test.TestCase): 28 | 29 | def testBuild(self): 30 | batch_size = 5 31 | height, width = 231, 231 32 | num_classes = 1000 33 | with self.test_session(): 34 | inputs = tf.random_uniform((batch_size, height, width, 3)) 35 | logits, _ = overfeat.overfeat(inputs, num_classes) 36 | self.assertEquals(logits.op.name, 'overfeat/fc8/squeezed') 37 | self.assertListEqual(logits.get_shape().as_list(), 38 | [batch_size, num_classes]) 39 | 40 | def testFullyConvolutional(self): 41 | batch_size = 1 42 | height, width = 281, 281 43 | num_classes = 1000 44 | with self.test_session(): 45 | inputs = tf.random_uniform((batch_size, height, width, 3)) 46 | logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False) 47 | self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd') 48 | self.assertListEqual(logits.get_shape().as_list(), 49 | [batch_size, 2, 2, num_classes]) 50 | 51 | def testEndPoints(self): 52 | batch_size = 5 53 | height, width = 231, 231 54 | num_classes = 1000 55 | with self.test_session(): 56 | inputs = tf.random_uniform((batch_size, height, width, 3)) 57 | _, end_points = overfeat.overfeat(inputs, num_classes) 58 | expected_names = ['overfeat/conv1', 59 | 'overfeat/pool1', 60 | 'overfeat/conv2', 61 | 'overfeat/pool2', 62 | 'overfeat/conv3', 63 | 'overfeat/conv4', 64 | 'overfeat/conv5', 65 | 'overfeat/pool5', 66 | 'overfeat/fc6', 67 | 'overfeat/fc7', 68 | 'overfeat/fc8' 69 | ] 70 | self.assertSetEqual(set(end_points.keys()), set(expected_names)) 71 | 72 | def testModelVariables(self): 73 | batch_size = 5 74 | height, width = 231, 231 75 | num_classes = 1000 76 | with self.test_session(): 77 | inputs = tf.random_uniform((batch_size, height, width, 3)) 78 | overfeat.overfeat(inputs, num_classes) 79 | expected_names = ['overfeat/conv1/weights', 80 | 'overfeat/conv1/biases', 81 | 'overfeat/conv2/weights', 82 | 'overfeat/conv2/biases', 83 | 'overfeat/conv3/weights', 84 | 'overfeat/conv3/biases', 85 | 'overfeat/conv4/weights', 86 | 'overfeat/conv4/biases', 87 | 'overfeat/conv5/weights', 88 | 'overfeat/conv5/biases', 89 | 'overfeat/fc6/weights', 90 | 'overfeat/fc6/biases', 91 | 'overfeat/fc7/weights', 92 | 'overfeat/fc7/biases', 93 | 'overfeat/fc8/weights', 94 | 'overfeat/fc8/biases', 95 | ] 96 | model_variables = [v.op.name for v in slim.get_model_variables()] 97 | self.assertSetEqual(set(model_variables), set(expected_names)) 98 | 99 | def testEvaluation(self): 100 | batch_size = 2 101 | height, width = 231, 231 102 | num_classes = 1000 103 | with self.test_session(): 104 | eval_inputs = tf.random_uniform((batch_size, height, width, 3)) 105 | logits, _ = overfeat.overfeat(eval_inputs, is_training=False) 106 | self.assertListEqual(logits.get_shape().as_list(), 107 | [batch_size, num_classes]) 108 | predictions = tf.argmax(logits, 1) 109 | self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) 110 | 111 | def testTrainEvalWithReuse(self): 112 | train_batch_size = 2 113 | eval_batch_size = 1 114 | train_height, train_width = 231, 231 115 | eval_height, eval_width = 281, 281 116 | num_classes = 1000 117 | with self.test_session(): 118 | train_inputs = tf.random_uniform( 119 | (train_batch_size, train_height, train_width, 3)) 120 | logits, _ = overfeat.overfeat(train_inputs) 121 | self.assertListEqual(logits.get_shape().as_list(), 122 | [train_batch_size, num_classes]) 123 | tf.get_variable_scope().reuse_variables() 124 | eval_inputs = tf.random_uniform( 125 | (eval_batch_size, eval_height, eval_width, 3)) 126 | logits, _ = overfeat.overfeat(eval_inputs, is_training=False, 127 | spatial_squeeze=False) 128 | self.assertListEqual(logits.get_shape().as_list(), 129 | [eval_batch_size, 2, 2, num_classes]) 130 | logits = tf.reduce_mean(logits, [1, 2]) 131 | predictions = tf.argmax(logits, 1) 132 | self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) 133 | 134 | def testForward(self): 135 | batch_size = 1 136 | height, width = 231, 231 137 | with self.test_session() as sess: 138 | inputs = tf.random_uniform((batch_size, height, width, 3)) 139 | logits, _ = overfeat.overfeat(inputs) 140 | sess.run(tf.global_variables_initializer()) 141 | output = sess.run(logits) 142 | self.assertTrue(output.any()) 143 | 144 | if __name__ == '__main__': 145 | tf.test.main() 146 | -------------------------------------------------------------------------------- /slim/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /slim/preprocessing/cifarnet_preprocessing.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Provides utilities to preprocess images in CIFAR-10. 16 | 17 | """ 18 | 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | from __future__ import print_function 22 | 23 | import tensorflow as tf 24 | 25 | _PADDING = 4 26 | 27 | slim = tf.contrib.slim 28 | 29 | 30 | def preprocess_for_train(image, 31 | output_height, 32 | output_width, 33 | padding=_PADDING): 34 | """Preprocesses the given image for training. 35 | 36 | Note that the actual resizing scale is sampled from 37 | [`resize_size_min`, `resize_size_max`]. 38 | 39 | Args: 40 | image: A `Tensor` representing an image of arbitrary size. 41 | output_height: The height of the image after preprocessing. 42 | output_width: The width of the image after preprocessing. 43 | padding: The amound of padding before and after each dimension of the image. 44 | 45 | Returns: 46 | A preprocessed image. 47 | """ 48 | tf.summary.image('image', tf.expand_dims(image, 0)) 49 | 50 | # Transform the image to floats. 51 | image = tf.to_float(image) 52 | if padding > 0: 53 | image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]]) 54 | # Randomly crop a [height, width] section of the image. 55 | distorted_image = tf.random_crop(image, 56 | [output_height, output_width, 3]) 57 | 58 | # Randomly flip the image horizontally. 59 | distorted_image = tf.image.random_flip_left_right(distorted_image) 60 | 61 | tf.summary.image('distorted_image', tf.expand_dims(distorted_image, 0)) 62 | 63 | # Because these operations are not commutative, consider randomizing 64 | # the order their operation. 65 | distorted_image = tf.image.random_brightness(distorted_image, 66 | max_delta=63) 67 | distorted_image = tf.image.random_contrast(distorted_image, 68 | lower=0.2, upper=1.8) 69 | # Subtract off the mean and divide by the variance of the pixels. 70 | return tf.image.per_image_standardization(distorted_image) 71 | 72 | 73 | def preprocess_for_eval(image, output_height, output_width): 74 | """Preprocesses the given image for evaluation. 75 | 76 | Args: 77 | image: A `Tensor` representing an image of arbitrary size. 78 | output_height: The height of the image after preprocessing. 79 | output_width: The width of the image after preprocessing. 80 | 81 | Returns: 82 | A preprocessed image. 83 | """ 84 | tf.summary.image('image', tf.expand_dims(image, 0)) 85 | # Transform the image to floats. 86 | image = tf.to_float(image) 87 | 88 | # Resize and crop if needed. 89 | resized_image = tf.image.resize_image_with_crop_or_pad(image, 90 | output_width, 91 | output_height) 92 | tf.summary.image('resized_image', tf.expand_dims(resized_image, 0)) 93 | 94 | # Subtract off the mean and divide by the variance of the pixels. 95 | return tf.image.per_image_standardization(resized_image) 96 | 97 | 98 | def preprocess_image(image, output_height, output_width, is_training=False): 99 | """Preprocesses the given image. 100 | 101 | Args: 102 | image: A `Tensor` representing an image of arbitrary size. 103 | output_height: The height of the image after preprocessing. 104 | output_width: The width of the image after preprocessing. 105 | is_training: `True` if we're preprocessing the image for training and 106 | `False` otherwise. 107 | 108 | Returns: 109 | A preprocessed image. 110 | """ 111 | if is_training: 112 | return preprocess_for_train(image, output_height, output_width) 113 | else: 114 | return preprocess_for_eval(image, output_height, output_width) 115 | -------------------------------------------------------------------------------- /slim/preprocessing/lenet_preprocessing.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Provides utilities for preprocessing.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | slim = tf.contrib.slim 24 | 25 | 26 | def preprocess_image(image, output_height, output_width, is_training): 27 | """Preprocesses the given image. 28 | 29 | Args: 30 | image: A `Tensor` representing an image of arbitrary size. 31 | output_height: The height of the image after preprocessing. 32 | output_width: The width of the image after preprocessing. 33 | is_training: `True` if we're preprocessing the image for training and 34 | `False` otherwise. 35 | 36 | Returns: 37 | A preprocessed image. 38 | """ 39 | image = tf.to_float(image) 40 | image = tf.image.resize_image_with_crop_or_pad( 41 | image, output_width, output_height) 42 | image = tf.subtract(image, 128.0) 43 | image = tf.div(image, 128.0) 44 | return image 45 | -------------------------------------------------------------------------------- /slim/preprocessing/preprocessing_factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains a factory for building various models.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | from preprocessing import cifarnet_preprocessing 24 | from preprocessing import inception_preprocessing 25 | from preprocessing import lenet_preprocessing 26 | from preprocessing import vgg_preprocessing 27 | 28 | slim = tf.contrib.slim 29 | 30 | 31 | def get_preprocessing(name, is_training=False): 32 | """Returns preprocessing_fn(image, height, width, **kwargs). 33 | 34 | Args: 35 | name: The name of the preprocessing function. 36 | is_training: `True` if the model is being used for training and `False` 37 | otherwise. 38 | 39 | Returns: 40 | preprocessing_fn: A function that preprocessing a single image (pre-batch). 41 | It has the following signature: 42 | image = preprocessing_fn(image, output_height, output_width, ...). 43 | 44 | Raises: 45 | ValueError: If Preprocessing `name` is not recognized. 46 | """ 47 | preprocessing_fn_map = { 48 | 'cifarnet': cifarnet_preprocessing, 49 | 'inception': inception_preprocessing, 50 | 'inception_v1': inception_preprocessing, 51 | 'inception_v2': inception_preprocessing, 52 | 'inception_v3': inception_preprocessing, 53 | 'inception_v4': inception_preprocessing, 54 | 'inception_resnet_v2': inception_preprocessing, 55 | 'lenet': lenet_preprocessing, 56 | 'resnet_v1_50': vgg_preprocessing, 57 | 'resnet_v1_101': vgg_preprocessing, 58 | 'resnet_v1_152': vgg_preprocessing, 59 | 'resnet_v2_50': vgg_preprocessing, 60 | 'resnet_v2_101': vgg_preprocessing, 61 | 'resnet_v2_152': vgg_preprocessing, 62 | 'vgg': vgg_preprocessing, 63 | 'vgg_a': vgg_preprocessing, 64 | 'vgg_16': vgg_preprocessing, 65 | 'vgg_19': vgg_preprocessing, 66 | } 67 | 68 | if name not in preprocessing_fn_map: 69 | raise ValueError('Preprocessing name [%s] was not recognized' % name) 70 | 71 | def preprocessing_fn(image, output_height, output_width, **kwargs): 72 | return preprocessing_fn_map[name].preprocess_image( 73 | image, output_height, output_width, is_training=is_training, **kwargs) 74 | 75 | return preprocessing_fn 76 | -------------------------------------------------------------------------------- /slim/scripts/finetune_inception_v1_on_flowers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This script performs the following operations: 4 | # 1. Downloads the Flowers dataset 5 | # 2. Fine-tunes an InceptionV1 model on the Flowers training set. 6 | # 3. Evaluates the model on the Flowers validation set. 7 | # 8 | # Usage: 9 | # cd slim 10 | # ./slim/scripts/finetune_inception_v1_on_flowers.sh 11 | 12 | # Where the pre-trained InceptionV1 checkpoint is saved to. 13 | PRETRAINED_CHECKPOINT_DIR=/tmp/checkpoints 14 | 15 | # Where the training (fine-tuned) checkpoint and logs will be saved to. 16 | TRAIN_DIR=/tmp/flowers-models/inception_v1 17 | 18 | # Where the dataset is saved to. 19 | DATASET_DIR=/tmp/flowers 20 | 21 | # Download the pre-trained checkpoint. 22 | if [ ! -d "$PRETRAINED_CHECKPOINT_DIR" ]; then 23 | mkdir ${PRETRAINED_CHECKPOINT_DIR} 24 | fi 25 | if [ ! -f ${PRETRAINED_CHECKPOINT_DIR}/inception_v1.ckpt ]; then 26 | wget http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz 27 | tar -xvf inception_v1_2016_08_28.tar.gz 28 | mv inception_v1.ckpt ${PRETRAINED_CHECKPOINT_DIR}/inception_v1.ckpt 29 | rm inception_v1_2016_08_28.tar.gz 30 | fi 31 | 32 | # Download the dataset 33 | python download_and_convert_data.py \ 34 | --dataset_name=flowers \ 35 | --dataset_dir=${DATASET_DIR} 36 | 37 | # Fine-tune only the new layers for 2000 steps. 38 | python train_image_classifier.py \ 39 | --train_dir=${TRAIN_DIR} \ 40 | --dataset_name=flowers \ 41 | --dataset_split_name=train \ 42 | --dataset_dir=${DATASET_DIR} \ 43 | --model_name=inception_v1 \ 44 | --checkpoint_path=${PRETRAINED_CHECKPOINT_DIR}/inception_v1.ckpt \ 45 | --checkpoint_exclude_scopes=InceptionV1/Logits \ 46 | --trainable_scopes=InceptionV1/Logits \ 47 | --max_number_of_steps=3000 \ 48 | --batch_size=32 \ 49 | --learning_rate=0.01 \ 50 | --save_interval_secs=60 \ 51 | --save_summaries_secs=60 \ 52 | --log_every_n_steps=100 \ 53 | --optimizer=rmsprop \ 54 | --weight_decay=0.00004 55 | 56 | # Run evaluation. 57 | python eval_image_classifier.py \ 58 | --checkpoint_path=${TRAIN_DIR} \ 59 | --eval_dir=${TRAIN_DIR} \ 60 | --dataset_name=flowers \ 61 | --dataset_split_name=validation \ 62 | --dataset_dir=${DATASET_DIR} \ 63 | --model_name=inception_v1 64 | 65 | # Fine-tune all the new layers for 1000 steps. 66 | python train_image_classifier.py \ 67 | --train_dir=${TRAIN_DIR}/all \ 68 | --dataset_name=flowers \ 69 | --dataset_split_name=train \ 70 | --dataset_dir=${DATASET_DIR} \ 71 | --checkpoint_path=${TRAIN_DIR} \ 72 | --model_name=inception_v1 \ 73 | --max_number_of_steps=1000 \ 74 | --batch_size=32 \ 75 | --learning_rate=0.001 \ 76 | --save_interval_secs=60 \ 77 | --save_summaries_secs=60 \ 78 | --log_every_n_steps=100 \ 79 | --optimizer=rmsprop \ 80 | --weight_decay=0.00004 81 | 82 | # Run evaluation. 83 | python eval_image_classifier.py \ 84 | --checkpoint_path=${TRAIN_DIR}/all \ 85 | --eval_dir=${TRAIN_DIR}/all \ 86 | --dataset_name=flowers \ 87 | --dataset_split_name=validation \ 88 | --dataset_dir=${DATASET_DIR} \ 89 | --model_name=inception_v1 90 | -------------------------------------------------------------------------------- /slim/scripts/finetune_inception_v3_on_flowers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This script performs the following operations: 4 | # 1. Downloads the Flowers dataset 5 | # 2. Fine-tunes an InceptionV3 model on the Flowers training set. 6 | # 3. Evaluates the model on the Flowers validation set. 7 | # 8 | # Usage: 9 | # cd slim 10 | # ./slim/scripts/finetune_inceptionv3_on_flowers.sh 11 | 12 | # Where the pre-trained InceptionV3 checkpoint is saved to. 13 | PRETRAINED_CHECKPOINT_DIR=/tmp/checkpoints 14 | 15 | # Where the training (fine-tuned) checkpoint and logs will be saved to. 16 | TRAIN_DIR=/tmp/flowers-models/inception_v3 17 | 18 | # Where the dataset is saved to. 19 | DATASET_DIR=/tmp/flowers 20 | 21 | # Download the pre-trained checkpoint. 22 | if [ ! -d "$PRETRAINED_CHECKPOINT_DIR" ]; then 23 | mkdir ${PRETRAINED_CHECKPOINT_DIR} 24 | fi 25 | if [ ! -f ${PRETRAINED_CHECKPOINT_DIR}/inception_v3.ckpt ]; then 26 | wget http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz 27 | tar -xvf inception_v3_2016_08_28.tar.gz 28 | mv inception_v3.ckpt ${PRETRAINED_CHECKPOINT_DIR}/inception_v3.ckpt 29 | rm inception_v3_2016_08_28.tar.gz 30 | fi 31 | 32 | # Download the dataset 33 | python download_and_convert_data.py \ 34 | --dataset_name=flowers \ 35 | --dataset_dir=${DATASET_DIR} 36 | 37 | # Fine-tune only the new layers for 1000 steps. 38 | python train_image_classifier.py \ 39 | --train_dir=${TRAIN_DIR} \ 40 | --dataset_name=flowers \ 41 | --dataset_split_name=train \ 42 | --dataset_dir=${DATASET_DIR} \ 43 | --model_name=inception_v3 \ 44 | --checkpoint_path=${PRETRAINED_CHECKPOINT_DIR}/inception_v3.ckpt \ 45 | --checkpoint_exclude_scopes=InceptionV3/Logits,InceptionV3/AuxLogits \ 46 | --trainable_scopes=InceptionV3/Logits,InceptionV3/AuxLogits \ 47 | --max_number_of_steps=1000 \ 48 | --batch_size=32 \ 49 | --learning_rate=0.01 \ 50 | --learning_rate_decay_type=fixed \ 51 | --save_interval_secs=60 \ 52 | --save_summaries_secs=60 \ 53 | --log_every_n_steps=100 \ 54 | --optimizer=rmsprop \ 55 | --weight_decay=0.00004 56 | 57 | # Run evaluation. 58 | python eval_image_classifier.py \ 59 | --checkpoint_path=${TRAIN_DIR} \ 60 | --eval_dir=${TRAIN_DIR} \ 61 | --dataset_name=flowers \ 62 | --dataset_split_name=validation \ 63 | --dataset_dir=${DATASET_DIR} \ 64 | --model_name=inception_v3 65 | 66 | # Fine-tune all the new layers for 500 steps. 67 | python train_image_classifier.py \ 68 | --train_dir=${TRAIN_DIR}/all \ 69 | --dataset_name=flowers \ 70 | --dataset_split_name=train \ 71 | --dataset_dir=${DATASET_DIR} \ 72 | --model_name=inception_v3 \ 73 | --checkpoint_path=${TRAIN_DIR} \ 74 | --max_number_of_steps=500 \ 75 | --batch_size=32 \ 76 | --learning_rate=0.0001 \ 77 | --learning_rate_decay_type=fixed \ 78 | --save_interval_secs=60 \ 79 | --save_summaries_secs=60 \ 80 | --log_every_n_steps=10 \ 81 | --optimizer=rmsprop \ 82 | --weight_decay=0.00004 83 | 84 | # Run evaluation. 85 | python eval_image_classifier.py \ 86 | --checkpoint_path=${TRAIN_DIR}/all \ 87 | --eval_dir=${TRAIN_DIR}/all \ 88 | --dataset_name=flowers \ 89 | --dataset_split_name=validation \ 90 | --dataset_dir=${DATASET_DIR} \ 91 | --model_name=inception_v3 92 | -------------------------------------------------------------------------------- /slim/scripts/finetune_resnet_v1_50_on_flowers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This script performs the following operations: 4 | # 1. Downloads the Flowers dataset 5 | # 2. Fine-tunes a ResNetV1-50 model on the Flowers training set. 6 | # 3. Evaluates the model on the Flowers validation set. 7 | # 8 | # Usage: 9 | # cd slim 10 | # ./slim/scripts/finetune_resnet_v1_50_on_flowers.sh 11 | 12 | # Where the pre-trained ResNetV1-50 checkpoint is saved to. 13 | PRETRAINED_CHECKPOINT_DIR=/tmp/checkpoints 14 | 15 | # Where the training (fine-tuned) checkpoint and logs will be saved to. 16 | TRAIN_DIR=/tmp/flowers-models/resnet_v1_50 17 | 18 | # Where the dataset is saved to. 19 | DATASET_DIR=/tmp/flowers 20 | 21 | # Download the pre-trained checkpoint. 22 | if [ ! -d "$PRETRAINED_CHECKPOINT_DIR" ]; then 23 | mkdir ${PRETRAINED_CHECKPOINT_DIR} 24 | fi 25 | if [ ! -f ${PRETRAINED_CHECKPOINT_DIR}/resnet_v1_50.ckpt ]; then 26 | wget http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz 27 | tar -xvf resnet_v1_50_2016_08_28.tar.gz 28 | mv resnet_v1_50.ckpt ${PRETRAINED_CHECKPOINT_DIR}/resnet_v1_50.ckpt 29 | rm resnet_v1_50_2016_08_28.tar.gz 30 | fi 31 | 32 | # Download the dataset 33 | python download_and_convert_data.py \ 34 | --dataset_name=flowers \ 35 | --dataset_dir=${DATASET_DIR} 36 | 37 | # Fine-tune only the new layers for 3000 steps. 38 | python train_image_classifier.py \ 39 | --train_dir=${TRAIN_DIR} \ 40 | --dataset_name=flowers \ 41 | --dataset_split_name=train \ 42 | --dataset_dir=${DATASET_DIR} \ 43 | --model_name=resnet_v1_50 \ 44 | --checkpoint_path=${PRETRAINED_CHECKPOINT_DIR}/resnet_v1_50.ckpt \ 45 | --checkpoint_exclude_scopes=resnet_v1_50/logits \ 46 | --trainable_scopes=resnet_v1_50/logits \ 47 | --max_number_of_steps=3000 \ 48 | --batch_size=32 \ 49 | --learning_rate=0.01 \ 50 | --save_interval_secs=60 \ 51 | --save_summaries_secs=60 \ 52 | --log_every_n_steps=100 \ 53 | --optimizer=rmsprop \ 54 | --weight_decay=0.00004 55 | 56 | # Run evaluation. 57 | python eval_image_classifier.py \ 58 | --checkpoint_path=${TRAIN_DIR} \ 59 | --eval_dir=${TRAIN_DIR} \ 60 | --dataset_name=flowers \ 61 | --dataset_split_name=validation \ 62 | --dataset_dir=${DATASET_DIR} \ 63 | --model_name=resnet_v1_50 64 | 65 | # Fine-tune all the new layers for 1000 steps. 66 | python train_image_classifier.py \ 67 | --train_dir=${TRAIN_DIR}/all \ 68 | --dataset_name=flowers \ 69 | --dataset_split_name=train \ 70 | --dataset_dir=${DATASET_DIR} \ 71 | --checkpoint_path=${TRAIN_DIR} \ 72 | --model_name=resnet_v1_50 \ 73 | --max_number_of_steps=1000 \ 74 | --batch_size=32 \ 75 | --learning_rate=0.001 \ 76 | --save_interval_secs=60 \ 77 | --save_summaries_secs=60 \ 78 | --log_every_n_steps=100 \ 79 | --optimizer=rmsprop \ 80 | --weight_decay=0.00004 81 | 82 | # Run evaluation. 83 | python eval_image_classifier.py \ 84 | --checkpoint_path=${TRAIN_DIR}/all \ 85 | --eval_dir=${TRAIN_DIR}/all \ 86 | --dataset_name=flowers \ 87 | --dataset_split_name=validation \ 88 | --dataset_dir=${DATASET_DIR} \ 89 | --model_name=resnet_v1_50 90 | -------------------------------------------------------------------------------- /slim/scripts/train_cifarnet_on_cifar10.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This script performs the following operations: 4 | # 1. Downloads the Cifar10 dataset 5 | # 2. Trains a CifarNet model on the Cifar10 training set. 6 | # 3. Evaluates the model on the Cifar10 testing set. 7 | # 8 | # Usage: 9 | # cd slim 10 | # ./scripts/train_cifar_net_on_mnist.sh 11 | 12 | # Where the checkpoint and logs will be saved to. 13 | TRAIN_DIR=/tmp/cifarnet-model 14 | 15 | # Where the dataset is saved to. 16 | DATASET_DIR=/tmp/cifar10 17 | 18 | # Download the dataset 19 | python download_and_convert_data.py \ 20 | --dataset_name=cifar10 \ 21 | --dataset_dir=${DATASET_DIR} 22 | 23 | # Run training. 24 | python train_image_classifier.py \ 25 | --train_dir=${TRAIN_DIR} \ 26 | --dataset_name=cifar10 \ 27 | --dataset_split_name=train \ 28 | --dataset_dir=${DATASET_DIR} \ 29 | --model_name=cifarnet \ 30 | --preprocessing_name=cifarnet \ 31 | --max_number_of_steps=100000 \ 32 | --batch_size=128 \ 33 | --save_interval_secs=120 \ 34 | --save_summaries_secs=120 \ 35 | --log_every_n_steps=100 \ 36 | --optimizer=sgd \ 37 | --learning_rate=0.1 \ 38 | --learning_rate_decay_factor=0.1 \ 39 | --num_epochs_per_decay=200 \ 40 | --weight_decay=0.004 41 | 42 | # Run evaluation. 43 | python eval_image_classifier.py \ 44 | --checkpoint_path=${TRAIN_DIR} \ 45 | --eval_dir=${TRAIN_DIR} \ 46 | --dataset_name=cifar10 \ 47 | --dataset_split_name=test \ 48 | --dataset_dir=${DATASET_DIR} \ 49 | --model_name=cifarnet 50 | -------------------------------------------------------------------------------- /slim/scripts/train_lenet_on_mnist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This script performs the following operations: 4 | # 1. Downloads the MNIST dataset 5 | # 2. Trains a LeNet model on the MNIST training set. 6 | # 3. Evaluates the model on the MNIST testing set. 7 | # 8 | # Usage: 9 | # cd slim 10 | # ./slim/scripts/train_lenet_on_mnist.sh 11 | 12 | # Where the checkpoint and logs will be saved to. 13 | TRAIN_DIR=/tmp/lenet-model 14 | 15 | # Where the dataset is saved to. 16 | DATASET_DIR=/tmp/mnist 17 | 18 | # Download the dataset 19 | python download_and_convert_data.py \ 20 | --dataset_name=mnist \ 21 | --dataset_dir=${DATASET_DIR} 22 | 23 | # Run training. 24 | python train_image_classifier.py \ 25 | --train_dir=${TRAIN_DIR} \ 26 | --dataset_name=mnist \ 27 | --dataset_split_name=train \ 28 | --dataset_dir=${DATASET_DIR} \ 29 | --model_name=lenet \ 30 | --preprocessing_name=lenet \ 31 | --max_number_of_steps=20000 \ 32 | --batch_size=50 \ 33 | --learning_rate=0.01 \ 34 | --save_interval_secs=60 \ 35 | --save_summaries_secs=60 \ 36 | --log_every_n_steps=100 \ 37 | --optimizer=sgd \ 38 | --learning_rate_decay_type=fixed \ 39 | --weight_decay=0 40 | 41 | # Run evaluation. 42 | python eval_image_classifier.py \ 43 | --checkpoint_path=${TRAIN_DIR} \ 44 | --eval_dir=${TRAIN_DIR} \ 45 | --dataset_name=mnist \ 46 | --dataset_split_name=test \ 47 | --dataset_dir=${DATASET_DIR} \ 48 | --model_name=lenet 49 | -------------------------------------------------------------------------------- /terngrad/WORKSPACE: -------------------------------------------------------------------------------- 1 | workspace(name = "inception") 2 | -------------------------------------------------------------------------------- /terngrad/build_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | bazel build inception/download_and_preprocess_imagenet 7 | 8 | bazel build inception/mnist_train 9 | bazel build inception/mnist_eval 10 | 11 | bazel build inception/cifar10_train 12 | bazel build inception/cifar10_eval 13 | 14 | bazel build inception/imagenet_train 15 | bazel build inception/imagenet_eval 16 | 17 | bazel build inception/imagenet_distributed_train 18 | bazel build inception/cifar10_distributed_train 19 | -------------------------------------------------------------------------------- /terngrad/config_dist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | set -x 4 | 5 | ########################## Example settup ############################### 6 | # Parameter server is 10.236.176.29:2222 7 | # Worker 0 is GPU 1 in 10.236.176.28:2224 using ~/dataset/cifar10-data-shard-0-499 as training data 8 | # Worker 1 is GPU 2 in 10.236.176.29:2226 using ~/dataset/cifar10-data-shard-500-999 as training data 9 | # The whole cifar10 dataset are split to cifar10-data-shard-0-499 and cifar10-data-shard-500-999 10 | 11 | 12 | ######################## Workspace of TernGrad ########################## 13 | # The path of executables (terngrad/terngrad). Must be the same across all nodes 14 | WORKSPACE="~/github/users/wenwei202/terngrad/terngrad" 15 | 16 | 17 | 18 | 19 | #################### Scripts to start workers and ps ##################### 20 | # The script to start worker 21 | # Customize WORKER_SCRIPT for your own training 22 | WORKER_SCRIPT="./run_single_worker_cifarnet.sh" 23 | #WORKER_SCRIPT="./run_single_worker_alexnet.sh" 24 | 25 | # The script to start ps (depending on dataset only) 26 | # Select one from those below 27 | PS_SCRIPT="./run_single_ps_cifar10.sh" 28 | #PS_SCRIPT="./run_single_ps_imagenet.sh" 29 | 30 | 31 | 32 | ######################### Configurations of ps ########################### 33 | # The list of hosts and ports of ps 34 | # Multiple ps not tested yet 35 | PS_HOSTS=( \ 36 | 10.236.176.29:2222 \ 37 | ) 38 | 39 | 40 | 41 | ######################### Configurations of workers ####################### 42 | # The list of hosts and ports of workers 43 | WORKER_HOSTS=( \ 44 | 10.236.176.28:2224 \ 45 | 10.236.176.29:2226 \ 46 | ) 47 | # GPU IDs in corresponding workers 48 | WORKER_DEVICES=( \ 49 | 1 \ 50 | 2 \ 51 | ) 52 | # Paths of dataset shards in corresponding workers 53 | DATA_DIR=( \ 54 | ~/dataset/cifar10-data-shard-0-499 \ 55 | ~/dataset/cifar10-data-shard-500-999 \ 56 | ) 57 | -------------------------------------------------------------------------------- /terngrad/deprecated/run_multi_gpus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | set -x 4 | 5 | DATASET_NAME=imagenet # imagenet or cifar10 6 | ROOT_WORKSPACE=/tmp/ # the location to store tf.summary and logs 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location 8 | FINETUNED_MODEL_PATH= 9 | NUM_GPUS=2 10 | export CUDA_VISIBLE_DEVICES=0,1 # specify visible gpus to tensorflow 11 | OPTIMIZER=momentum 12 | NET=alexnet 13 | IMAGE_SIZE=224 14 | GRAD_BITS=32 15 | BASE_LR=0.01 16 | CLIP_FACTOR=0.0 # 0.0 means no clipping 17 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back 18 | FLOATING_GRAD_EPOCH=0 # 0 means no switching 19 | WEIGHT_DECAY=0.0005 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004 20 | DROPOUT_KEEP_PROB=0.5 # The probability to keep in dropout 21 | MOMENTUM=0.9 22 | SIZE_TO_BINARIZE=1 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1 23 | TRAIN_BATCH_SIZE=256 # total batch size 24 | VAL_BATCH_SIZE=50 # set smaller to avoid OOM 25 | NUM_EPOCHS_PER_DECAY=20 # per decay learning rate 26 | MAX_STEPS=370000 27 | VAL_TOWER=0 # -1 for cpu 28 | EVAL_INTERVAL_SECS=900 # seconds to evaluate the accuracy 29 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0" 30 | RESTORE_AVG_VAR=True # use the moving average parameters to eval? 31 | SEED=123 # use ${RANDOM} if no duplicable results are required 32 | 33 | if [ ! -d "$ROOT_WORKSPACE" ]; then 34 | echo "${ROOT_WORKSPACE} does not exsit!" 35 | exit 36 | fi 37 | 38 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/ 39 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/ 40 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/ 41 | if [ ! -d "${INFO_WORKSPACE}" ]; then 42 | echo "Creating ${INFO_WORKSPACE} ..." 43 | mkdir -p ${INFO_WORKSPACE} 44 | fi 45 | current_time=$(date) 46 | current_time=${current_time// /_} 47 | current_time=${current_time//:/-} 48 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_GPUS}_${current_time} 49 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME} 50 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME} 51 | if [ ! -d "$TRAIN_DIR" ]; then 52 | echo "Creating ${TRAIN_DIR} ..." 53 | mkdir -p ${TRAIN_DIR} 54 | fi 55 | if [ ! -d "$EVAL_DIR" ]; then 56 | echo "Creating ${EVAL_DIR} ..." 57 | mkdir -p ${EVAL_DIR} 58 | fi 59 | 60 | bazel-bin/inception/${DATASET_NAME}_eval \ 61 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \ 62 | --device ${EVAL_DEVICE} \ 63 | --restore_avg_var ${RESTORE_AVG_VAR} \ 64 | --data_dir ${DATA_DIR} \ 65 | --net ${NET} \ 66 | --image_size ${IMAGE_SIZE} \ 67 | --batch_size ${VAL_BATCH_SIZE} \ 68 | --max_steps ${MAX_STEPS} \ 69 | --checkpoint_dir ${TRAIN_DIR} \ 70 | --tower ${VAL_TOWER} \ 71 | --eval_dir ${EVAL_DIR} > ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 & 72 | 73 | bazel-bin/inception/${DATASET_NAME}_train \ 74 | --seed ${SEED} \ 75 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \ 76 | --num_epochs_per_decay ${NUM_EPOCHS_PER_DECAY} \ 77 | --initial_learning_rate ${BASE_LR} \ 78 | --grad_bits ${GRAD_BITS} \ 79 | --clip_factor ${CLIP_FACTOR} \ 80 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \ 81 | --weight_decay ${WEIGHT_DECAY} \ 82 | --dropout_keep_prob ${DROPOUT_KEEP_PROB} \ 83 | --momentum ${MOMENTUM} \ 84 | --size_to_binarize ${SIZE_TO_BINARIZE} \ 85 | --optimizer ${OPTIMIZER} \ 86 | --net ${NET} \ 87 | --image_size ${IMAGE_SIZE} \ 88 | --num_gpus ${NUM_GPUS} \ 89 | --batch_size ${TRAIN_BATCH_SIZE} \ 90 | --max_steps ${MAX_STEPS} \ 91 | --train_dir ${TRAIN_DIR} \ 92 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 & 93 | -------------------------------------------------------------------------------- /terngrad/deprecated/run_multi_gpus_cifar10_quick.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | set -x 4 | 5 | DATASET_NAME=cifar10 # imagenet or cifar10 6 | ROOT_WORKSPACE=${HOME}/dataset/results/cifar10/ # the location to store summary and logs 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location 8 | FINETUNED_MODEL_PATH= 9 | NUM_GPUS=2 # num of physical gpus 10 | export CUDA_VISIBLE_DEVICES=0,1 # specify visible gpus to tensorflow 11 | NUM_NODES=2 # num of virtual nodes on physical gpus 12 | OPTIMIZER=momentum 13 | NET=cifar10_alexnet 14 | IMAGE_SIZE=24 15 | GRAD_BITS=32 16 | BASE_LR=0.01 17 | CLIP_FACTOR=0.0 # 0.0 means no clipping 18 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back 19 | FLOATING_GRAD_EPOCH=0 # 0 means no switching 20 | WEIGHT_DECAY=0.004 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004 21 | MOMENTUM=0.9 22 | LR_DECAY_TYPE="polynomial" 23 | SIZE_TO_BINARIZE=1 # The min size of variable to enable binarizing. e.g., 385 means biases are excluded from binarizing 24 | TRAIN_BATCH_SIZE=128 25 | SAVE_ITER=2000 # Save summaries and checkpoint per iterations 26 | QUANTIZE_LOGITS=True # If quantize the gradients in the last logits layer. 27 | VAL_BATCH_SIZE=50 # set smaller to avoid OOM 28 | MAX_STEPS=80000 29 | VAL_TOWER=0 # -1 for cpu 30 | EVAL_INTERVAL_SECS=10 31 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0" 32 | RESTORE_AVG_VAR=True # use the moving average parameters to eval? 33 | SEED=123 # use ${RANDOM} if no duplicable results are required 34 | 35 | if [ ! -d "$ROOT_WORKSPACE" ]; then 36 | echo "${ROOT_WORKSPACE} does not exsit!" 37 | exit 38 | fi 39 | 40 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/ 41 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/ 42 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/ 43 | if [ ! -d "${INFO_WORKSPACE}" ]; then 44 | echo "Creating ${INFO_WORKSPACE} ..." 45 | mkdir -p ${INFO_WORKSPACE} 46 | fi 47 | current_time=$(date) 48 | current_time=${current_time// /_} 49 | current_time=${current_time//:/-} 50 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_NODES}_${current_time} 51 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME} 52 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME} 53 | if [ ! -d "$TRAIN_DIR" ]; then 54 | echo "Creating ${TRAIN_DIR} ..." 55 | mkdir -p ${TRAIN_DIR} 56 | fi 57 | if [ ! -d "$EVAL_DIR" ]; then 58 | echo "Creating ${EVAL_DIR} ..." 59 | mkdir -p ${EVAL_DIR} 60 | fi 61 | 62 | bazel-bin/inception/${DATASET_NAME}_eval \ 63 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \ 64 | --device ${EVAL_DEVICE} \ 65 | --restore_avg_var ${RESTORE_AVG_VAR} \ 66 | --data_dir ${DATA_DIR} \ 67 | --subset "test" \ 68 | --net ${NET} \ 69 | --image_size ${IMAGE_SIZE} \ 70 | --batch_size ${VAL_BATCH_SIZE} \ 71 | --max_steps ${MAX_STEPS} \ 72 | --checkpoint_dir ${TRAIN_DIR} \ 73 | --tower ${VAL_TOWER} \ 74 | --eval_dir ${EVAL_DIR} > ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 & 75 | 76 | bazel-bin/inception/${DATASET_NAME}_train \ 77 | --seed ${SEED} \ 78 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \ 79 | --initial_learning_rate ${BASE_LR} \ 80 | --grad_bits ${GRAD_BITS} \ 81 | --clip_factor ${CLIP_FACTOR} \ 82 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \ 83 | --weight_decay ${WEIGHT_DECAY} \ 84 | --momentum ${MOMENTUM} \ 85 | --learning_rate_decay_type ${LR_DECAY_TYPE} \ 86 | --size_to_binarize ${SIZE_TO_BINARIZE} \ 87 | --optimizer ${OPTIMIZER} \ 88 | --net ${NET} \ 89 | --image_size ${IMAGE_SIZE} \ 90 | --num_gpus ${NUM_GPUS} \ 91 | --num_nodes ${NUM_NODES} \ 92 | --batch_size ${TRAIN_BATCH_SIZE} \ 93 | --save_iter ${SAVE_ITER} \ 94 | --quantize_logits ${QUANTIZE_LOGITS} \ 95 | --max_steps ${MAX_STEPS} \ 96 | --train_dir ${TRAIN_DIR} \ 97 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 & 98 | -------------------------------------------------------------------------------- /terngrad/deprecated/run_multi_gpus_googlenet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | set -x 4 | 5 | DATASET_NAME=imagenet # imagenet or cifar10 6 | ROOT_WORKSPACE=${HOME}/dataset/results/imagenet # the location to store tf.summary and logs 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location 8 | FINETUNED_MODEL_PATH= 9 | NUM_GPUS=2 10 | export CUDA_VISIBLE_DEVICES=0,1 # specify visible gpus to tensorflow 11 | OPTIMIZER=momentum 12 | NET=googlenet 13 | IMAGE_SIZE=224 14 | GRAD_BITS=32 15 | BASE_LR=0.01 16 | CLIP_FACTOR=0.0 # 0.0 means no clipping 17 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back 18 | FLOATING_GRAD_EPOCH=0 # 0 means no switching 19 | WEIGHT_DECAY=0.00004 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004 20 | DROPOUT_KEEP_PROB=0.8 # The probability to keep in dropout 21 | MOMENTUM=0.9 22 | LR_DECAY_FACTOR=0.96 # learning rate decay factor 23 | SIZE_TO_BINARIZE=1 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1 24 | TRAIN_BATCH_SIZE=32 # total batch size 25 | VAL_BATCH_SIZE=25 # set smaller to avoid OOM 26 | NUM_EPOCHS_PER_DECAY=8 # per decay learning rate 27 | MAX_STEPS=10000000 28 | VAL_TOWER=0 # -1 for cpu 29 | EVAL_INTERVAL_SECS=900 # seconds to evaluate the accuracy 30 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0" 31 | RESTORE_AVG_VAR=True # use the moving average parameters to eval? 32 | SEED=123 # use ${RANDOM} if no duplicable results are required 33 | 34 | if [ ! -d "$ROOT_WORKSPACE" ]; then 35 | echo "${ROOT_WORKSPACE} does not exsit!" 36 | exit 37 | fi 38 | 39 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/ 40 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/ 41 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/ 42 | if [ ! -d "${INFO_WORKSPACE}" ]; then 43 | echo "Creating ${INFO_WORKSPACE} ..." 44 | mkdir -p ${INFO_WORKSPACE} 45 | fi 46 | current_time=$(date) 47 | current_time=${current_time// /_} 48 | current_time=${current_time//:/-} 49 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_GPUS}_${current_time} 50 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME} 51 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME} 52 | if [ ! -d "$TRAIN_DIR" ]; then 53 | echo "Creating ${TRAIN_DIR} ..." 54 | mkdir -p ${TRAIN_DIR} 55 | fi 56 | if [ ! -d "$EVAL_DIR" ]; then 57 | echo "Creating ${EVAL_DIR} ..." 58 | mkdir -p ${EVAL_DIR} 59 | fi 60 | 61 | bazel-bin/inception/${DATASET_NAME}_eval \ 62 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \ 63 | --device ${EVAL_DEVICE} \ 64 | --restore_avg_var ${RESTORE_AVG_VAR} \ 65 | --data_dir ${DATA_DIR} \ 66 | --net ${NET} \ 67 | --image_size ${IMAGE_SIZE} \ 68 | --batch_size ${VAL_BATCH_SIZE} \ 69 | --max_steps ${MAX_STEPS} \ 70 | --checkpoint_dir ${TRAIN_DIR} \ 71 | --tower ${VAL_TOWER} \ 72 | --eval_dir ${EVAL_DIR} > ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 & 73 | 74 | bazel-bin/inception/${DATASET_NAME}_train \ 75 | --seed ${SEED} \ 76 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \ 77 | --num_epochs_per_decay ${NUM_EPOCHS_PER_DECAY} \ 78 | --initial_learning_rate ${BASE_LR} \ 79 | --grad_bits ${GRAD_BITS} \ 80 | --clip_factor ${CLIP_FACTOR} \ 81 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \ 82 | --weight_decay ${WEIGHT_DECAY} \ 83 | --dropout_keep_prob ${DROPOUT_KEEP_PROB} \ 84 | --momentum ${MOMENTUM} \ 85 | --learning_rate_decay_factor ${LR_DECAY_FACTOR} \ 86 | --size_to_binarize ${SIZE_TO_BINARIZE} \ 87 | --optimizer ${OPTIMIZER} \ 88 | --net ${NET} \ 89 | --image_size ${IMAGE_SIZE} \ 90 | --num_gpus ${NUM_GPUS} \ 91 | --batch_size ${TRAIN_BATCH_SIZE} \ 92 | --max_steps ${MAX_STEPS} \ 93 | --train_dir ${TRAIN_DIR} \ 94 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 & 95 | -------------------------------------------------------------------------------- /terngrad/deprecated/run_multi_gpus_vggnet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | set -x 4 | 5 | DATASET_NAME=imagenet # imagenet or cifar10 6 | ROOT_WORKSPACE=${HOME}/dataset/results/imagenet # the location to store tf.summary and logs 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location 8 | FINETUNED_MODEL_PATH= 9 | NUM_GPUS=4 10 | # export CUDA_VISIBLE_DEVICES=0,1,2,3 # specify visible gpus to tensorflow 11 | OPTIMIZER=momentum 12 | NET=vgg_a # vgg_a or vgg_16 13 | IMAGE_SIZE=224 14 | GRAD_BITS=32 15 | BASE_LR=0.01 16 | CLIP_FACTOR=0.0 # 0.0 means no clipping 17 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back 18 | FLOATING_GRAD_EPOCH=0 # 0 means no switching 19 | WEIGHT_DECAY=0.0005 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004 20 | DROPOUT_KEEP_PROB=0.5 # The probability to keep in dropout 21 | MOMENTUM=0.9 22 | SIZE_TO_BINARIZE=1 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1 23 | TRAIN_BATCH_SIZE=256 # total batch size 24 | VAL_BATCH_SIZE=25 # set smaller to avoid OOM 25 | NUM_EPOCHS_PER_DECAY=20 # per decay learning rate 26 | MAX_STEPS=370000 27 | VAL_TOWER=0 # -1 for cpu 28 | EVAL_INTERVAL_SECS=900 # seconds to evaluate the accuracy 29 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0" 30 | RESTORE_AVG_VAR=True # use the moving average parameters to eval? 31 | SEED=123 # use ${RANDOM} if no duplicable results are required 32 | 33 | if [ ! -d "$ROOT_WORKSPACE" ]; then 34 | echo "${ROOT_WORKSPACE} does not exsit!" 35 | exit 36 | fi 37 | 38 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/ 39 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/ 40 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/ 41 | if [ ! -d "${INFO_WORKSPACE}" ]; then 42 | echo "Creating ${INFO_WORKSPACE} ..." 43 | mkdir -p ${INFO_WORKSPACE} 44 | fi 45 | current_time=$(date) 46 | current_time=${current_time// /_} 47 | current_time=${current_time//:/-} 48 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_GPUS}_${current_time} 49 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME} 50 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME} 51 | if [ ! -d "$TRAIN_DIR" ]; then 52 | echo "Creating ${TRAIN_DIR} ..." 53 | mkdir -p ${TRAIN_DIR} 54 | fi 55 | if [ ! -d "$EVAL_DIR" ]; then 56 | echo "Creating ${EVAL_DIR} ..." 57 | mkdir -p ${EVAL_DIR} 58 | fi 59 | 60 | bazel-bin/inception/${DATASET_NAME}_eval \ 61 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \ 62 | --device ${EVAL_DEVICE} \ 63 | --restore_avg_var ${RESTORE_AVG_VAR} \ 64 | --data_dir ${DATA_DIR} \ 65 | --net ${NET} \ 66 | --image_size ${IMAGE_SIZE} \ 67 | --batch_size ${VAL_BATCH_SIZE} \ 68 | --max_steps ${MAX_STEPS} \ 69 | --checkpoint_dir ${TRAIN_DIR} \ 70 | --tower ${VAL_TOWER} \ 71 | --eval_dir ${EVAL_DIR} > ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 & 72 | 73 | bazel-bin/inception/${DATASET_NAME}_train \ 74 | --seed ${SEED} \ 75 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \ 76 | --num_epochs_per_decay ${NUM_EPOCHS_PER_DECAY} \ 77 | --initial_learning_rate ${BASE_LR} \ 78 | --grad_bits ${GRAD_BITS} \ 79 | --clip_factor ${CLIP_FACTOR} \ 80 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \ 81 | --weight_decay ${WEIGHT_DECAY} \ 82 | --dropout_keep_prob ${DROPOUT_KEEP_PROB} \ 83 | --momentum ${MOMENTUM} \ 84 | --size_to_binarize ${SIZE_TO_BINARIZE} \ 85 | --optimizer ${OPTIMIZER} \ 86 | --net ${NET} \ 87 | --image_size ${IMAGE_SIZE} \ 88 | --num_gpus ${NUM_GPUS} \ 89 | --batch_size ${TRAIN_BATCH_SIZE} \ 90 | --max_steps ${MAX_STEPS} \ 91 | --train_dir ${TRAIN_DIR} \ 92 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 & 93 | -------------------------------------------------------------------------------- /terngrad/g3doc/inception_v3_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenwei202/terngrad/ec4f75e9a3a1e1c4b2e6494d830fbdfdd2e03ddc/terngrad/g3doc/inception_v3_architecture.png -------------------------------------------------------------------------------- /terngrad/inception/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Makes helper libraries available in the cifar10 package.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function -------------------------------------------------------------------------------- /terngrad/inception/cifar10_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Small library that points to the cifar-10 data set. 16 | """ 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | 22 | import tensorflow as tf 23 | from inception.dataset import Dataset 24 | import os 25 | 26 | FLAGS = tf.app.flags.FLAGS 27 | 28 | class Cifar10Data(Dataset): 29 | """cifar-10 data set.""" 30 | 31 | def __init__(self, subset): 32 | super(Cifar10Data, self).__init__('cifar10', subset) 33 | 34 | def num_classes(self): 35 | """Returns the number of classes in the data set.""" 36 | return 10 37 | 38 | def num_examples_per_epoch(self): 39 | """Returns the number of examples in the data set.""" 40 | if self.subset == 'train': 41 | return 50000 42 | if self.subset == 'test': 43 | return 10000 44 | 45 | def download_message(self): 46 | """Instruction to download and extract the tarball from Flowers website.""" 47 | 48 | print('Failed to find any cifar10 %s files'% self.subset) 49 | print('') 50 | print('If you have already downloaded and processed the data, then make ' 51 | 'sure to set --data_dir to point to the directory containing the ' 52 | 'location of the sharded TFRecords.\n') 53 | print('If you have not downloaded and prepared the cifar10 data in the ' 54 | 'TFRecord format, you will need to do this at least once. This ' 55 | 'process could take a while depending on the speed of your ' 56 | 'computer and network connection\n') 57 | print('Please see README.md for instructions on how to build ' 58 | 'the cifar10 dataset using download_and_convert_data.py. For example: \n') 59 | print ('cd ./slim\n') 60 | print ('python download_and_convert_data.py ' 61 | '--dataset_name cifar10 --dataset_dir ~/dataset/cifar10-data/ [--shard True]\n') 62 | 63 | def available_subsets(self): 64 | """Returns the list of available subsets.""" 65 | return ['train', 'test'] -------------------------------------------------------------------------------- /terngrad/inception/cifar10_distributed_train.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | # pylint: disable=line-too-long 16 | """A binary to train Inception in a distributed manner using multiple systems. 17 | 18 | Please see accompanying README.md for details and instructions. 19 | """ 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | 24 | import tensorflow as tf 25 | 26 | from inception import inception_distributed_train 27 | from inception.cifar10_data import Cifar10Data 28 | 29 | FLAGS = tf.app.flags.FLAGS 30 | 31 | 32 | def main(unused_args): 33 | FLAGS.dataset_name = 'cifar10' 34 | 35 | assert FLAGS.job_name in ['ps', 'worker'], 'job_name must be ps or worker' 36 | 37 | # Extract all the hostnames for the ps and worker jobs to construct the 38 | # cluster spec. 39 | ps_hosts = FLAGS.ps_hosts.split(',') 40 | worker_hosts = FLAGS.worker_hosts.split(',') 41 | tf.logging.info('PS hosts are: %s' % ps_hosts) 42 | tf.logging.info('Worker hosts are: %s' % worker_hosts) 43 | 44 | cluster_spec = tf.train.ClusterSpec({'ps': ps_hosts, 45 | 'worker': worker_hosts}) 46 | sess_config = tf.ConfigProto() 47 | sess_config.gpu_options.allow_growth = True 48 | 49 | server = tf.train.Server( 50 | {'ps': ps_hosts, 51 | 'worker': worker_hosts}, 52 | job_name=FLAGS.job_name, 53 | task_index=FLAGS.task_id, 54 | config=sess_config) 55 | 56 | if FLAGS.job_name == 'ps': 57 | # `ps` jobs wait for incoming connections from the workers. 58 | server.join() 59 | else: 60 | # `worker` jobs will actually do the work. 61 | dataset = Cifar10Data(subset=FLAGS.subset) 62 | assert dataset.data_files() 63 | # Only the chief checks for or creates train_dir. 64 | if FLAGS.task_id == 0: 65 | if not tf.gfile.Exists(FLAGS.train_dir): 66 | tf.gfile.MakeDirs(FLAGS.train_dir) 67 | inception_distributed_train.train(server.target, dataset, cluster_spec) 68 | 69 | if __name__ == '__main__': 70 | tf.logging.set_verbosity(tf.logging.INFO) 71 | tf.app.run() 72 | -------------------------------------------------------------------------------- /terngrad/inception/cifar10_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """A binary to evaluate Inception on the flowers data set. 16 | 17 | Note that using the supplied pre-trained inception checkpoint, the eval should 18 | achieve: 19 | precision @ 1 = 0.7874 recall @ 5 = 0.9436 [50000 examples] 20 | 21 | See the README.md for more details. 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | 28 | import tensorflow as tf 29 | 30 | from inception import inception_eval 31 | from inception.cifar10_data import Cifar10Data 32 | 33 | FLAGS = tf.app.flags.FLAGS 34 | 35 | 36 | def main(unused_argv=None): 37 | dataset = Cifar10Data(subset=FLAGS.subset) 38 | assert dataset.data_files() 39 | if tf.gfile.Exists(FLAGS.eval_dir): 40 | tf.gfile.DeleteRecursively(FLAGS.eval_dir) 41 | tf.gfile.MakeDirs(FLAGS.eval_dir) 42 | FLAGS.dataset_name = 'cifar10' 43 | FLAGS.num_examples = dataset.num_examples_per_epoch() 44 | inception_eval.evaluate(dataset) 45 | 46 | 47 | if __name__ == '__main__': 48 | tf.app.run() 49 | -------------------------------------------------------------------------------- /terngrad/inception/cifar10_train.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """A binary to train Inception on the ImageNet data set. 16 | """ 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | 22 | 23 | import tensorflow as tf 24 | 25 | from inception import inception_train 26 | from inception.cifar10_data import Cifar10Data 27 | 28 | FLAGS = tf.app.flags.FLAGS 29 | 30 | 31 | def main(_): 32 | dataset = Cifar10Data(subset=FLAGS.subset) 33 | assert dataset.data_files() 34 | if tf.gfile.Exists(FLAGS.train_dir): 35 | tf.gfile.DeleteRecursively(FLAGS.train_dir) 36 | tf.gfile.MakeDirs(FLAGS.train_dir) 37 | FLAGS.dataset_name = 'cifar10' 38 | inception_train.train(dataset) 39 | 40 | 41 | if __name__ == '__main__': 42 | tf.app.run() 43 | -------------------------------------------------------------------------------- /terngrad/inception/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Makes helper libraries available in the cifar10 package.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function -------------------------------------------------------------------------------- /terngrad/inception/data/download_and_preprocess_flowers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | # Script to download and preprocess the flowers data set. This data set 18 | # provides a demonstration for how to perform fine-tuning (i.e. tranfer 19 | # learning) from one model to a new data set. 20 | # 21 | # This script provides a demonstration for how to prepare an arbitrary 22 | # data set for training an Inception v3 model. 23 | # 24 | # We demonstrate this with the flowers data set which consists of images 25 | # of labeled flower images from 5 classes: 26 | # 27 | # daisy, dandelion, roses, sunflowers, tulips 28 | # 29 | # The final output of this script are sharded TFRecord files containing 30 | # serialized Example protocol buffers. See build_image_data.py for 31 | # details of how the Example protocol buffer contains image data. 32 | # 33 | # usage: 34 | # ./download_and_preprocess_flowers.sh [data-dir] 35 | set -e 36 | 37 | if [ -z "$1" ]; then 38 | echo "usage download_and_preprocess_flowers.sh [data dir]" 39 | exit 40 | fi 41 | 42 | # Create the output and temporary directories. 43 | DATA_DIR="${1%/}" 44 | SCRATCH_DIR="${DATA_DIR}/raw-data/" 45 | mkdir -p "${DATA_DIR}" 46 | mkdir -p "${SCRATCH_DIR}" 47 | WORK_DIR="$0.runfiles/inception/inception" 48 | 49 | # Download the flowers data. 50 | DATA_URL="http://download.tensorflow.org/example_images/flower_photos.tgz" 51 | CURRENT_DIR=$(pwd) 52 | cd "${DATA_DIR}" 53 | TARBALL="flower_photos.tgz" 54 | if [ ! -f ${TARBALL} ]; then 55 | echo "Downloading flower data set." 56 | wget -O ${TARBALL} "${DATA_URL}" 57 | else 58 | echo "Skipping download of flower data." 59 | fi 60 | 61 | # Note the locations of the train and validation data. 62 | TRAIN_DIRECTORY="${SCRATCH_DIR}train/" 63 | VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/" 64 | 65 | # Expands the data into the flower_photos/ directory and rename it as the 66 | # train directory. 67 | tar xf flower_photos.tgz 68 | rm -rf "${TRAIN_DIRECTORY}" "${VALIDATION_DIRECTORY}" 69 | mv flower_photos "${TRAIN_DIRECTORY}" 70 | 71 | # Generate a list of 5 labels: daisy, dandelion, roses, sunflowers, tulips 72 | LABELS_FILE="${SCRATCH_DIR}/labels.txt" 73 | ls -1 "${TRAIN_DIRECTORY}" | grep -v 'LICENSE' | sed 's/\///' | sort > "${LABELS_FILE}" 74 | 75 | # Generate the validation data set. 76 | while read LABEL; do 77 | VALIDATION_DIR_FOR_LABEL="${VALIDATION_DIRECTORY}${LABEL}" 78 | TRAIN_DIR_FOR_LABEL="${TRAIN_DIRECTORY}${LABEL}" 79 | 80 | # Move the first randomly selected 100 images to the validation set. 81 | mkdir -p "${VALIDATION_DIR_FOR_LABEL}" 82 | VALIDATION_IMAGES=$(ls -1 "${TRAIN_DIR_FOR_LABEL}" | shuf | head -100) 83 | for IMAGE in ${VALIDATION_IMAGES}; do 84 | mv -f "${TRAIN_DIRECTORY}${LABEL}/${IMAGE}" "${VALIDATION_DIR_FOR_LABEL}" 85 | done 86 | done < "${LABELS_FILE}" 87 | 88 | # Build the TFRecords version of the image data. 89 | cd "${CURRENT_DIR}" 90 | BUILD_SCRIPT="${WORK_DIR}/build_image_data" 91 | OUTPUT_DIRECTORY="${DATA_DIR}" 92 | "${BUILD_SCRIPT}" \ 93 | --train_directory="${TRAIN_DIRECTORY}" \ 94 | --validation_directory="${VALIDATION_DIRECTORY}" \ 95 | --output_directory="${OUTPUT_DIRECTORY}" \ 96 | --labels_file="${LABELS_FILE}" 97 | -------------------------------------------------------------------------------- /terngrad/inception/data/download_and_preprocess_flowers_mac.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | # Script to download and preprocess the flowers data set. This data set 18 | # provides a demonstration for how to perform fine-tuning (i.e. tranfer 19 | # learning) from one model to a new data set. 20 | # 21 | # This script provides a demonstration for how to prepare an arbitrary 22 | # data set for training an Inception v3 model. 23 | # 24 | # We demonstrate this with the flowers data set which consists of images 25 | # of labeled flower images from 5 classes: 26 | # 27 | # daisy, dandelion, roses, sunflowers, tulips 28 | # 29 | # The final output of this script are sharded TFRecord files containing 30 | # serialized Example protocol buffers. See build_image_data.py for 31 | # details of how the Example protocol buffer contains image data. 32 | # 33 | # usage: 34 | # ./download_and_preprocess_flowers.sh [data-dir] 35 | set -e 36 | 37 | if [ -z "$1" ]; then 38 | echo "usage download_and_preprocess_flowers.sh [data dir]" 39 | exit 40 | fi 41 | 42 | # Create the output and temporary directories. 43 | DATA_DIR="${1%/}" 44 | SCRATCH_DIR="${DATA_DIR}/raw-data/" 45 | mkdir -p "${DATA_DIR}" 46 | mkdir -p "${SCRATCH_DIR}" 47 | WORK_DIR="$0.runfiles/inception/inception" 48 | 49 | # Download the flowers data. 50 | DATA_URL="http://download.tensorflow.org/example_images/flower_photos.tgz" 51 | CURRENT_DIR=$(pwd) 52 | cd "${DATA_DIR}" 53 | TARBALL="flower_photos.tgz" 54 | if [ ! -f ${TARBALL} ]; then 55 | echo "Downloading flower data set." 56 | wget -O ${TARBALL} "${DATA_URL}" 57 | else 58 | echo "Skipping download of flower data." 59 | fi 60 | 61 | # Note the locations of the train and validation data. 62 | TRAIN_DIRECTORY="${SCRATCH_DIR}train/" 63 | VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/" 64 | 65 | # Expands the data into the flower_photos/ directory and rename it as the 66 | # train directory. 67 | tar xf flower_photos.tgz 68 | rm -rf "${TRAIN_DIRECTORY}" "${VALIDATION_DIRECTORY}" 69 | mv flower_photos "${TRAIN_DIRECTORY}" 70 | 71 | # Generate a list of 5 labels: daisy, dandelion, roses, sunflowers, tulips 72 | LABELS_FILE="${SCRATCH_DIR}/labels.txt" 73 | ls -1 "${TRAIN_DIRECTORY}" | grep -v 'LICENSE' | sed 's/\///' | sort > "${LABELS_FILE}" 74 | 75 | # Generate the validation data set. 76 | while read LABEL; do 77 | VALIDATION_DIR_FOR_LABEL="${VALIDATION_DIRECTORY}${LABEL}" 78 | TRAIN_DIR_FOR_LABEL="${TRAIN_DIRECTORY}${LABEL}" 79 | 80 | # Move the first randomly selected 100 images to the validation set. 81 | mkdir -p "${VALIDATION_DIR_FOR_LABEL}" 82 | VALIDATION_IMAGES=$(ls -1 "${TRAIN_DIR_FOR_LABEL}" | gshuf | head -100) 83 | for IMAGE in ${VALIDATION_IMAGES}; do 84 | mv -f "${TRAIN_DIRECTORY}${LABEL}/${IMAGE}" "${VALIDATION_DIR_FOR_LABEL}" 85 | done 86 | done < "${LABELS_FILE}" 87 | 88 | # Build the TFRecords version of the image data. 89 | cd "${CURRENT_DIR}" 90 | BUILD_SCRIPT="${WORK_DIR}/build_image_data" 91 | OUTPUT_DIRECTORY="${DATA_DIR}" 92 | "${BUILD_SCRIPT}" \ 93 | --train_directory="${TRAIN_DIRECTORY}" \ 94 | --validation_directory="${VALIDATION_DIRECTORY}" \ 95 | --output_directory="${OUTPUT_DIRECTORY}" \ 96 | --labels_file="${LABELS_FILE}" 97 | -------------------------------------------------------------------------------- /terngrad/inception/data/download_and_preprocess_imagenet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | # Script to download and preprocess ImageNet Challenge 2012 18 | # training and validation data set. 19 | # 20 | # The final output of this script are sharded TFRecord files containing 21 | # serialized Example protocol buffers. See build_imagenet_data.py for 22 | # details of how the Example protocol buffers contain the ImageNet data. 23 | # 24 | # The final output of this script appears as such: 25 | # 26 | # data_dir/train-00000-of-01024 27 | # data_dir/train-00001-of-01024 28 | # ... 29 | # data_dir/train-00127-of-01024 30 | # 31 | # and 32 | # 33 | # data_dir/validation-00000-of-00128 34 | # data_dir/validation-00001-of-00128 35 | # ... 36 | # data_dir/validation-00127-of-00128 37 | # 38 | # Note that this script may take several hours to run to completion. The 39 | # conversion of the ImageNet data to TFRecords alone takes 2-3 hours depending 40 | # on the speed of your machine. Please be patient. 41 | # 42 | # **IMPORTANT** 43 | # To download the raw images, the user must create an account with image-net.org 44 | # and generate a username and access_key. The latter two are required for 45 | # downloading the raw images. 46 | # 47 | # usage: 48 | # ./download_and_preprocess_imagenet.sh [data-dir] 49 | set -e 50 | 51 | if [ -z "$1" ]; then 52 | echo "usage download_and_preprocess_imagenet.sh [data dir]" 53 | exit 54 | fi 55 | 56 | # Store images in raw pixel format (True) or jpeg format (False) 57 | RAW_PIXEL=False 58 | # When RAW_PIXEL=True and RESIZE_DIMEN>0, resize images to the specific size without preserving original height/width ratio 59 | RESIZE_DIMEN=0 60 | 61 | # Create the output and temporary directories. 62 | DATA_DIR="${1%/}" 63 | SCRATCH_DIR="${DATA_DIR}/raw-data/" 64 | mkdir -p "${DATA_DIR}" 65 | mkdir -p "${SCRATCH_DIR}" 66 | WORK_DIR="$0.runfiles/inception/inception" 67 | 68 | # Download the ImageNet data. 69 | LABELS_FILE="${WORK_DIR}/data/imagenet_lsvrc_2015_synsets.txt" 70 | DOWNLOAD_SCRIPT="${WORK_DIR}/data/download_imagenet.sh" 71 | "${DOWNLOAD_SCRIPT}" "${SCRATCH_DIR}" "${LABELS_FILE}" 72 | 73 | # Note the locations of the train and validation data. 74 | TRAIN_DIRECTORY="${SCRATCH_DIR}train/" 75 | VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/" 76 | 77 | # Preprocess the validation data by moving the images into the appropriate 78 | # sub-directory based on the label (synset) of the image. 79 | echo "Organizing the validation data into sub-directories." 80 | PREPROCESS_VAL_SCRIPT="${WORK_DIR}/data/preprocess_imagenet_validation_data.py" 81 | VAL_LABELS_FILE="${WORK_DIR}/data/imagenet_2012_validation_synset_labels.txt" 82 | 83 | "${PREPROCESS_VAL_SCRIPT}" "${VALIDATION_DIRECTORY}" "${VAL_LABELS_FILE}" 84 | 85 | # Convert the XML files for bounding box annotations into a single CSV. 86 | echo "Extracting bounding box information from XML." 87 | BOUNDING_BOX_SCRIPT="${WORK_DIR}/data/process_bounding_boxes.py" 88 | BOUNDING_BOX_FILE="${SCRATCH_DIR}/imagenet_2012_bounding_boxes.csv" 89 | BOUNDING_BOX_DIR="${SCRATCH_DIR}bounding_boxes/" 90 | 91 | "${BOUNDING_BOX_SCRIPT}" "${BOUNDING_BOX_DIR}" "${LABELS_FILE}" \ 92 | | sort >"${BOUNDING_BOX_FILE}" 93 | echo "Finished downloading and preprocessing the ImageNet data." 94 | 95 | # Build the TFRecords version of the ImageNet data. 96 | BUILD_SCRIPT="${WORK_DIR}/build_imagenet_data" 97 | OUTPUT_DIRECTORY="${DATA_DIR}" 98 | IMAGENET_METADATA_FILE="${WORK_DIR}/data/imagenet_metadata.txt" 99 | 100 | "${BUILD_SCRIPT}" \ 101 | --train_directory="${TRAIN_DIRECTORY}" \ 102 | --validation_directory="${VALIDATION_DIRECTORY}" \ 103 | --raw_pixel=${RAW_PIXEL} \ 104 | --resize_dimen ${RESIZE_DIMEN} \ 105 | --output_directory="${OUTPUT_DIRECTORY}" \ 106 | --imagenet_metadata_file="${IMAGENET_METADATA_FILE}" \ 107 | --labels_file="${LABELS_FILE}" \ 108 | --bounding_box_file="${BOUNDING_BOX_FILE}" 109 | -------------------------------------------------------------------------------- /terngrad/inception/data/download_imagenet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | # Script to download ImageNet Challenge 2012 training and validation data set. 18 | # 19 | # Downloads and decompresses raw images and bounding boxes. 20 | # 21 | # **IMPORTANT** 22 | # To download the raw images, the user must create an account with image-net.org 23 | # and generate a username and access_key. The latter two are required for 24 | # downloading the raw images. 25 | # 26 | # usage: 27 | # ./download_imagenet.sh [dirname] 28 | set -e 29 | 30 | if [ "x$IMAGENET_ACCESS_KEY" == x -o "x$IMAGENET_USERNAME" == x ]; then 31 | cat < ') 61 | sys.exit(-1) 62 | data_dir = sys.argv[1] 63 | validation_labels_file = sys.argv[2] 64 | 65 | # Read in the 50000 synsets associated with the validation data set. 66 | labels = [l.strip() for l in open(validation_labels_file).readlines()] 67 | unique_labels = set(labels) 68 | 69 | # Make all sub-directories in the validation data dir. 70 | for label in unique_labels: 71 | labeled_data_dir = os.path.join(data_dir, label) 72 | os.makedirs(labeled_data_dir) 73 | 74 | # Move all of the image to the appropriate sub-directory. 75 | for i in range(len(labels)): 76 | basename = 'ILSVRC2012_val_000%.5d.JPEG' % (i + 1) 77 | original_filename = os.path.join(data_dir, basename) 78 | if not os.path.exists(original_filename): 79 | print('Failed to find: ' % original_filename) 80 | sys.exit(-1) 81 | new_filename = os.path.join(data_dir, labels[i], basename) 82 | os.rename(original_filename, new_filename) 83 | -------------------------------------------------------------------------------- /terngrad/inception/dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Small library that points to a data set. 16 | 17 | Methods of Data class: 18 | data_files: Returns a python list of all (sharded) data set files. 19 | num_examples_per_epoch: Returns the number of examples in the data set. 20 | num_classes: Returns the number of classes in the data set. 21 | reader: Return a reader for a single entry from the data set. 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | from abc import ABCMeta 28 | from abc import abstractmethod 29 | import os 30 | 31 | 32 | import tensorflow as tf 33 | 34 | FLAGS = tf.app.flags.FLAGS 35 | 36 | # Basic model parameters. 37 | tf.app.flags.DEFINE_string('data_dir', '/tmp/mydata', 38 | """Path to the processed data, i.e. """ 39 | """TFRecord of Example protos.""") 40 | 41 | 42 | class Dataset(object): 43 | """A simple class for handling data sets.""" 44 | __metaclass__ = ABCMeta 45 | 46 | def __init__(self, name, subset): 47 | """Initialize dataset using a subset and the path to the data.""" 48 | assert subset in self.available_subsets(), self.available_subsets() 49 | self.name = name 50 | self.subset = subset 51 | 52 | @abstractmethod 53 | def num_classes(self): 54 | """Returns the number of classes in the data set.""" 55 | pass 56 | # return 10 57 | 58 | @abstractmethod 59 | def num_examples_per_epoch(self): 60 | """Returns the number of examples in the data subset.""" 61 | pass 62 | # if self.subset == 'train': 63 | # return 10000 64 | # if self.subset == 'validation': 65 | # return 1000 66 | 67 | @abstractmethod 68 | def download_message(self): 69 | """Prints a download message for the Dataset.""" 70 | pass 71 | 72 | def available_subsets(self): 73 | """Returns the list of available subsets.""" 74 | return ['train', 'validation'] 75 | 76 | def data_files(self): 77 | """Returns a python list of all (sharded) data subset files. 78 | 79 | Returns: 80 | python list of all (sharded) data set files. 81 | Raises: 82 | ValueError: if there are not data_files matching the subset. 83 | """ 84 | tf_record_pattern = os.path.join(FLAGS.data_dir, '%s-*' % self.subset) 85 | data_files = tf.gfile.Glob(tf_record_pattern) 86 | if not data_files: 87 | print('No files found for dataset %s/%s at %s' % (self.name, 88 | self.subset, 89 | FLAGS.data_dir)) 90 | 91 | self.download_message() 92 | exit(-1) 93 | return data_files 94 | 95 | def reader(self): 96 | """Return a reader for a single entry from the data set. 97 | 98 | See io_ops.py for details of Reader class. 99 | 100 | Returns: 101 | Reader object that reads the data set. 102 | """ 103 | return tf.TFRecordReader() 104 | -------------------------------------------------------------------------------- /terngrad/inception/flowers_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Small library that points to the flowers data set. 16 | """ 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | 22 | 23 | from inception.dataset import Dataset 24 | 25 | 26 | class FlowersData(Dataset): 27 | """Flowers data set.""" 28 | 29 | def __init__(self, subset): 30 | super(FlowersData, self).__init__('Flowers', subset) 31 | 32 | def num_classes(self): 33 | """Returns the number of classes in the data set.""" 34 | return 5 35 | 36 | def num_examples_per_epoch(self): 37 | """Returns the number of examples in the data subset.""" 38 | if self.subset == 'train': 39 | return 3170 40 | if self.subset == 'validation': 41 | return 500 42 | 43 | def download_message(self): 44 | """Instruction to download and extract the tarball from Flowers website.""" 45 | 46 | print('Failed to find any Flowers %s files'% self.subset) 47 | print('') 48 | print('If you have already downloaded and processed the data, then make ' 49 | 'sure to set --data_dir to point to the directory containing the ' 50 | 'location of the sharded TFRecords.\n') 51 | print('Please see README.md for instructions on how to build ' 52 | 'the flowers dataset using download_and_preprocess_flowers.\n') 53 | -------------------------------------------------------------------------------- /terngrad/inception/flowers_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """A binary to evaluate Inception on the flowers data set. 16 | """ 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | 22 | import tensorflow as tf 23 | 24 | from inception import inception_eval 25 | from inception.flowers_data import FlowersData 26 | 27 | FLAGS = tf.app.flags.FLAGS 28 | 29 | 30 | def main(unused_argv=None): 31 | dataset = FlowersData(subset=FLAGS.subset) 32 | assert dataset.data_files() 33 | if tf.gfile.Exists(FLAGS.eval_dir): 34 | tf.gfile.DeleteRecursively(FLAGS.eval_dir) 35 | tf.gfile.MakeDirs(FLAGS.eval_dir) 36 | inception_eval.evaluate(dataset) 37 | 38 | 39 | if __name__ == '__main__': 40 | tf.app.run() 41 | -------------------------------------------------------------------------------- /terngrad/inception/flowers_train.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """A binary to train Inception on the flowers data set. 16 | """ 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | 22 | 23 | import tensorflow as tf 24 | 25 | from inception import inception_train 26 | from inception.flowers_data import FlowersData 27 | 28 | FLAGS = tf.app.flags.FLAGS 29 | 30 | 31 | def main(_): 32 | dataset = FlowersData(subset=FLAGS.subset) 33 | assert dataset.data_files() 34 | if tf.gfile.Exists(FLAGS.train_dir): 35 | tf.gfile.DeleteRecursively(FLAGS.train_dir) 36 | tf.gfile.MakeDirs(FLAGS.train_dir) 37 | inception_train.train(dataset) 38 | 39 | 40 | if __name__ == '__main__': 41 | tf.app.run() 42 | -------------------------------------------------------------------------------- /terngrad/inception/imagenet_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Small library that points to the ImageNet data set. 16 | """ 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | 22 | 23 | from inception.dataset import Dataset 24 | 25 | 26 | class ImagenetData(Dataset): 27 | """ImageNet data set.""" 28 | 29 | def __init__(self, subset): 30 | super(ImagenetData, self).__init__('ImageNet', subset) 31 | 32 | def num_classes(self): 33 | """Returns the number of classes in the data set.""" 34 | return 1000 35 | 36 | def num_examples_per_epoch(self): 37 | """Returns the number of examples in the data set.""" 38 | # Bounding box data consists of 615299 bounding boxes for 544546 images. 39 | if self.subset == 'train': 40 | return 1281167 41 | if self.subset == 'validation': 42 | return 50000 43 | 44 | def download_message(self): 45 | """Instruction to download and extract the tarball from Flowers website.""" 46 | 47 | print('Failed to find any ImageNet %s files'% self.subset) 48 | print('') 49 | print('If you have already downloaded and processed the data, then make ' 50 | 'sure to set --data_dir to point to the directory containing the ' 51 | 'location of the sharded TFRecords.\n') 52 | print('If you have not downloaded and prepared the ImageNet data in the ' 53 | 'TFRecord format, you will need to do this at least once. This ' 54 | 'process could take several hours depending on the speed of your ' 55 | 'computer and network connection\n') 56 | print('Please see README.md for instructions on how to build ' 57 | 'the ImageNet dataset using download_and_preprocess_imagenet.\n') 58 | print('Note that the raw data size is 300 GB and the processed data size ' 59 | 'is 150 GB. Please ensure you have at least 500GB disk space.') 60 | -------------------------------------------------------------------------------- /terngrad/inception/imagenet_distributed_train.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | # pylint: disable=line-too-long 16 | """A binary to train Inception in a distributed manner using multiple systems. 17 | 18 | Please see accompanying README.md for details and instructions. 19 | """ 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | 24 | import tensorflow as tf 25 | 26 | from inception import inception_distributed_train 27 | from inception.imagenet_data import ImagenetData 28 | 29 | FLAGS = tf.app.flags.FLAGS 30 | 31 | 32 | def main(unused_args): 33 | FLAGS.dataset_name = 'imagenet' 34 | 35 | assert FLAGS.job_name in ['ps', 'worker'], 'job_name must be ps or worker' 36 | 37 | # Extract all the hostnames for the ps and worker jobs to construct the 38 | # cluster spec. 39 | ps_hosts = FLAGS.ps_hosts.split(',') 40 | worker_hosts = FLAGS.worker_hosts.split(',') 41 | tf.logging.info('PS hosts are: %s' % ps_hosts) 42 | tf.logging.info('Worker hosts are: %s' % worker_hosts) 43 | 44 | cluster_spec = tf.train.ClusterSpec({'ps': ps_hosts, 45 | 'worker': worker_hosts}) 46 | sess_config = tf.ConfigProto() 47 | sess_config.gpu_options.allow_growth = True 48 | 49 | server = tf.train.Server( 50 | {'ps': ps_hosts, 51 | 'worker': worker_hosts}, 52 | job_name=FLAGS.job_name, 53 | task_index=FLAGS.task_id, 54 | config=sess_config) 55 | 56 | if FLAGS.job_name == 'ps': 57 | # `ps` jobs wait for incoming connections from the workers. 58 | server.join() 59 | else: 60 | # `worker` jobs will actually do the work. 61 | dataset = ImagenetData(subset=FLAGS.subset) 62 | assert dataset.data_files() 63 | # Only the chief checks for or creates train_dir. 64 | if FLAGS.task_id == 0: 65 | if not tf.gfile.Exists(FLAGS.train_dir): 66 | tf.gfile.MakeDirs(FLAGS.train_dir) 67 | inception_distributed_train.train(server.target, dataset, cluster_spec) 68 | 69 | if __name__ == '__main__': 70 | tf.logging.set_verbosity(tf.logging.INFO) 71 | tf.app.run() 72 | -------------------------------------------------------------------------------- /terngrad/inception/imagenet_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """A binary to evaluate Inception on the flowers data set. 16 | 17 | Note that using the supplied pre-trained inception checkpoint, the eval should 18 | achieve: 19 | precision @ 1 = 0.7874 recall @ 5 = 0.9436 [50000 examples] 20 | 21 | See the README.md for more details. 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | 28 | import tensorflow as tf 29 | 30 | from inception import inception_eval 31 | from inception.imagenet_data import ImagenetData 32 | 33 | FLAGS = tf.app.flags.FLAGS 34 | 35 | 36 | def main(unused_argv=None): 37 | dataset = ImagenetData(subset=FLAGS.subset) 38 | assert dataset.data_files() 39 | if tf.gfile.Exists(FLAGS.eval_dir): 40 | tf.gfile.DeleteRecursively(FLAGS.eval_dir) 41 | tf.gfile.MakeDirs(FLAGS.eval_dir) 42 | FLAGS.dataset_name = 'imagenet' 43 | FLAGS.num_examples = dataset.num_examples_per_epoch() 44 | inception_eval.evaluate(dataset) 45 | 46 | 47 | if __name__ == '__main__': 48 | tf.app.run() 49 | -------------------------------------------------------------------------------- /terngrad/inception/imagenet_train.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """A binary to train Inception on the ImageNet data set. 16 | """ 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | 22 | 23 | import tensorflow as tf 24 | 25 | from inception import inception_train 26 | from inception.imagenet_data import ImagenetData 27 | 28 | FLAGS = tf.app.flags.FLAGS 29 | 30 | 31 | def main(_): 32 | dataset = ImagenetData(subset=FLAGS.subset) 33 | assert dataset.data_files() 34 | if tf.gfile.Exists(FLAGS.train_dir): 35 | tf.gfile.DeleteRecursively(FLAGS.train_dir) 36 | tf.gfile.MakeDirs(FLAGS.train_dir) 37 | FLAGS.dataset_name = 'imagenet' 38 | inception_train.train(dataset) 39 | 40 | 41 | if __name__ == '__main__': 42 | tf.app.run() 43 | -------------------------------------------------------------------------------- /terngrad/inception/lenet_preprocessing.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Provides utilities for preprocessing.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import tensorflow as tf 22 | 23 | slim = tf.contrib.slim 24 | 25 | 26 | def preprocess_image(image, output_height, output_width, is_training): 27 | """Preprocesses the given image. 28 | 29 | Args: 30 | image: A `Tensor` representing an image of arbitrary size. 31 | output_height: The height of the image after preprocessing. 32 | output_width: The width of the image after preprocessing. 33 | is_training: `True` if we're preprocessing the image for training and 34 | `False` otherwise. 35 | 36 | Returns: 37 | A preprocessed image. 38 | """ 39 | image = tf.to_float(image) 40 | image = tf.image.resize_image_with_crop_or_pad( 41 | image, output_height, output_width) 42 | image.set_shape([output_height, output_width, 1]) 43 | image = tf.subtract(image, 0.5) 44 | image = tf.multiply(image, 2.0) 45 | return image 46 | -------------------------------------------------------------------------------- /terngrad/inception/mnist_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Small library that points to the cifar-10 data set. 16 | """ 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | 22 | import tensorflow as tf 23 | from inception.dataset import Dataset 24 | import os 25 | 26 | FLAGS = tf.app.flags.FLAGS 27 | 28 | class MnistData(Dataset): 29 | """mnist data set.""" 30 | 31 | def __init__(self, subset): 32 | super(MnistData, self).__init__('mnist', subset) 33 | 34 | def num_classes(self): 35 | """Returns the number of classes in the data set.""" 36 | return 10 37 | 38 | def num_examples_per_epoch(self): 39 | """Returns the number of examples in the data set.""" 40 | if self.subset == 'train': 41 | return 60000 42 | if self.subset == 'test': 43 | return 10000 44 | 45 | def download_message(self): 46 | """Instruction to download and extract the tarball from Flowers website.""" 47 | 48 | print('Failed to find any mnist %s files'% self.subset) 49 | print('') 50 | print('If you have already downloaded and processed the data, then make ' 51 | 'sure to set --data_dir to point to the directory containing the ' 52 | 'location of the sharded TFRecords.\n') 53 | print('If you have not downloaded and prepared the mnist data in the ' 54 | 'TFRecord format, you will need to do this at least once. This ' 55 | 'process could take a while depending on the speed of your ' 56 | 'computer and network connection\n') 57 | print('Please see README.md for instructions on how to build ' 58 | 'the mnist dataset using download_and_convert_data.py. For example: \n') 59 | print ('cd ./slim\n') 60 | print ('python download_and_convert_data.py ' 61 | '--dataset_name mnist --dataset_dir ~/dataset/mnist-data/\n') 62 | 63 | def available_subsets(self): 64 | """Returns the list of available subsets.""" 65 | return ['train', 'test'] -------------------------------------------------------------------------------- /terngrad/inception/mnist_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """A binary to evaluate Inception on the flowers data set. 16 | 17 | Note that using the supplied pre-trained inception checkpoint, the eval should 18 | achieve: 19 | precision @ 1 = 0.7874 recall @ 5 = 0.9436 [50000 examples] 20 | 21 | See the README.md for more details. 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | 28 | import tensorflow as tf 29 | 30 | from inception import inception_eval 31 | from inception.mnist_data import MnistData 32 | 33 | FLAGS = tf.app.flags.FLAGS 34 | 35 | def main(unused_argv=None): 36 | dataset = MnistData(subset=FLAGS.subset) 37 | assert dataset.data_files() 38 | if tf.gfile.Exists(FLAGS.eval_dir): 39 | tf.gfile.DeleteRecursively(FLAGS.eval_dir) 40 | tf.gfile.MakeDirs(FLAGS.eval_dir) 41 | FLAGS.dataset_name = 'mnist' 42 | FLAGS.num_examples = dataset.num_examples_per_epoch() 43 | inception_eval.evaluate(dataset) 44 | 45 | 46 | if __name__ == '__main__': 47 | tf.app.run() 48 | -------------------------------------------------------------------------------- /terngrad/inception/mnist_train.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """A binary to train Inception on the ImageNet data set. 16 | """ 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | 22 | 23 | import tensorflow as tf 24 | 25 | from inception import inception_train 26 | from inception.mnist_data import MnistData 27 | 28 | FLAGS = tf.app.flags.FLAGS 29 | 30 | 31 | def main(_): 32 | dataset = MnistData(subset=FLAGS.subset) 33 | assert dataset.data_files() 34 | if tf.gfile.Exists(FLAGS.train_dir): 35 | tf.gfile.DeleteRecursively(FLAGS.train_dir) 36 | tf.gfile.MakeDirs(FLAGS.train_dir) 37 | FLAGS.dataset_name = 'mnist' 38 | inception_train.train(dataset) 39 | 40 | 41 | if __name__ == '__main__': 42 | tf.app.run() 43 | -------------------------------------------------------------------------------- /terngrad/inception/models.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Build the Inception v3 network on ImageNet data set. 16 | 17 | The Inception v3 architecture is described in http://arxiv.org/abs/1512.00567 18 | 19 | Summary of available functions: 20 | inference: Compute inference on the model inputs to make a prediction 21 | loss: Compute the loss of the prediction with respect to the labels 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import re 28 | 29 | import tensorflow as tf 30 | 31 | from inception.slim import slim 32 | 33 | FLAGS = tf.app.flags.FLAGS 34 | 35 | # If a model is trained using multiple GPUs, prefix all Op names with tower_name 36 | # to differentiate the operations. Note that this prefix is removed from the 37 | # names of the summaries when visualizing a model. 38 | TOWER_NAME = 'tower' 39 | 40 | # Batch normalization. Constant governing the exponential moving average of 41 | # the 'global' mean and variance for all activations. 42 | BATCHNORM_MOVING_AVERAGE_DECAY = 0.9997 43 | 44 | # The decay to use for the moving average. 45 | MOVING_AVERAGE_DECAY = 0.9999 46 | 47 | 48 | def inference(images, num_classes, for_training=False, restore_logits=True, 49 | scope=None): 50 | pass 51 | 52 | 53 | def loss(logits, labels, batch_size=None): 54 | pass 55 | 56 | 57 | def _activation_summary(x): 58 | """Helper to create summaries for activations. 59 | 60 | Creates a summary that provides a histogram of activations. 61 | Creates a summary that measure the sparsity of activations. 62 | 63 | Args: 64 | x: Tensor 65 | """ 66 | # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training 67 | # session. This helps the clarity of presentation on tensorboard. 68 | tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name) 69 | tf.contrib.deprecated.histogram_summary(tensor_name + '/activations', x) 70 | tf.contrib.deprecated.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x)) 71 | 72 | 73 | def _activation_summaries(endpoints): 74 | with tf.name_scope('summaries'): 75 | for act in endpoints.values(): 76 | _activation_summary(act) 77 | -------------------------------------------------------------------------------- /terngrad/inception/slim/BUILD: -------------------------------------------------------------------------------- 1 | # Description: 2 | # Contains the operations and nets for building TensorFlow-Slim models. 3 | 4 | package(default_visibility = ["//inception:internal"]) 5 | 6 | licenses(["notice"]) # Apache 2.0 7 | 8 | exports_files(["LICENSE"]) 9 | 10 | py_library( 11 | name = "scopes", 12 | srcs = ["scopes.py"], 13 | ) 14 | 15 | py_library( 16 | name = "inception_utils", 17 | srcs = ["inception_utils.py"], 18 | ) 19 | 20 | py_library( 21 | name = "inception_v1", 22 | srcs = ["inception_v1.py"], 23 | ) 24 | 25 | py_test( 26 | name = "scopes_test", 27 | size = "small", 28 | srcs = ["scopes_test.py"], 29 | deps = [ 30 | ":scopes", 31 | ], 32 | ) 33 | 34 | py_library( 35 | name = "variables", 36 | srcs = ["variables.py"], 37 | deps = [ 38 | ":scopes", 39 | ], 40 | ) 41 | 42 | py_test( 43 | name = "variables_test", 44 | size = "small", 45 | srcs = ["variables_test.py"], 46 | deps = [ 47 | ":variables", 48 | ], 49 | ) 50 | 51 | py_library( 52 | name = "losses", 53 | srcs = ["losses.py"], 54 | ) 55 | 56 | py_test( 57 | name = "losses_test", 58 | size = "small", 59 | srcs = ["losses_test.py"], 60 | deps = [ 61 | ":losses", 62 | ], 63 | ) 64 | 65 | py_library( 66 | name = "ops", 67 | srcs = ["ops.py"], 68 | deps = [ 69 | ":losses", 70 | ":scopes", 71 | ":variables", 72 | ], 73 | ) 74 | 75 | py_test( 76 | name = "ops_test", 77 | size = "small", 78 | srcs = ["ops_test.py"], 79 | deps = [ 80 | ":ops", 81 | ":variables", 82 | ], 83 | ) 84 | 85 | py_library( 86 | name = "inception", 87 | srcs = ["inception_model.py"], 88 | deps = [ 89 | ":ops", 90 | ":scopes", 91 | ], 92 | ) 93 | 94 | py_library( 95 | name = "models", 96 | srcs = ["models.py"], 97 | deps = [ 98 | "inception_utils", 99 | "inception_v1", 100 | ":ops", 101 | ":scopes", 102 | ], 103 | ) 104 | 105 | py_test( 106 | name = "inception_test", 107 | size = "medium", 108 | srcs = ["inception_test.py"], 109 | deps = [ 110 | ":inception", 111 | ], 112 | ) 113 | 114 | py_library( 115 | name = "slim", 116 | srcs = ["slim.py"], 117 | deps = [ 118 | ":inception", 119 | ":models", 120 | ":losses", 121 | ":ops", 122 | ":scopes", 123 | ":variables", 124 | ], 125 | ) 126 | 127 | py_test( 128 | name = "collections_test", 129 | size = "small", 130 | srcs = ["collections_test.py"], 131 | deps = [ 132 | ":slim", 133 | ], 134 | ) 135 | -------------------------------------------------------------------------------- /terngrad/inception/slim/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Makes helper libraries available in the cifar10 package.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function -------------------------------------------------------------------------------- /terngrad/inception/slim/inception_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Tests for slim.inception.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import tensorflow as tf 21 | 22 | from inception.slim import inception_model as inception 23 | 24 | 25 | class InceptionTest(tf.test.TestCase): 26 | 27 | def testBuildLogits(self): 28 | batch_size = 5 29 | height, width = 299, 299 30 | num_classes = 1000 31 | with self.test_session(): 32 | inputs = tf.random_uniform((batch_size, height, width, 3)) 33 | logits, _ = inception.inception_v3(inputs, num_classes) 34 | self.assertTrue(logits.op.name.startswith('logits')) 35 | self.assertListEqual(logits.get_shape().as_list(), 36 | [batch_size, num_classes]) 37 | 38 | def testBuildEndPoints(self): 39 | batch_size = 5 40 | height, width = 299, 299 41 | num_classes = 1000 42 | with self.test_session(): 43 | inputs = tf.random_uniform((batch_size, height, width, 3)) 44 | _, end_points = inception.inception_v3(inputs, num_classes) 45 | self.assertTrue('logits' in end_points) 46 | logits = end_points['logits'] 47 | self.assertListEqual(logits.get_shape().as_list(), 48 | [batch_size, num_classes]) 49 | self.assertTrue('aux_logits' in end_points) 50 | aux_logits = end_points['aux_logits'] 51 | self.assertListEqual(aux_logits.get_shape().as_list(), 52 | [batch_size, num_classes]) 53 | pre_pool = end_points['mixed_8x8x2048b'] 54 | self.assertListEqual(pre_pool.get_shape().as_list(), 55 | [batch_size, 8, 8, 2048]) 56 | 57 | def testVariablesSetDevice(self): 58 | batch_size = 5 59 | height, width = 299, 299 60 | num_classes = 1000 61 | with self.test_session(): 62 | inputs = tf.random_uniform((batch_size, height, width, 3)) 63 | # Force all Variables to reside on the device. 64 | with tf.variable_scope('on_cpu'), tf.device('/cpu:0'): 65 | inception.inception_v3(inputs, num_classes) 66 | with tf.variable_scope('on_gpu'), tf.device('/gpu:0'): 67 | inception.inception_v3(inputs, num_classes) 68 | for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_cpu'): 69 | self.assertDeviceEqual(v.device, '/cpu:0') 70 | for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_gpu'): 71 | self.assertDeviceEqual(v.device, '/gpu:0') 72 | 73 | def testHalfSizeImages(self): 74 | batch_size = 5 75 | height, width = 150, 150 76 | num_classes = 1000 77 | with self.test_session(): 78 | inputs = tf.random_uniform((batch_size, height, width, 3)) 79 | logits, end_points = inception.inception_v3(inputs, num_classes) 80 | self.assertTrue(logits.op.name.startswith('logits')) 81 | self.assertListEqual(logits.get_shape().as_list(), 82 | [batch_size, num_classes]) 83 | pre_pool = end_points['mixed_8x8x2048b'] 84 | self.assertListEqual(pre_pool.get_shape().as_list(), 85 | [batch_size, 3, 3, 2048]) 86 | 87 | def testUnknowBatchSize(self): 88 | batch_size = 1 89 | height, width = 299, 299 90 | num_classes = 1000 91 | with self.test_session() as sess: 92 | inputs = tf.placeholder(tf.float32, (None, height, width, 3)) 93 | logits, _ = inception.inception_v3(inputs, num_classes) 94 | self.assertTrue(logits.op.name.startswith('logits')) 95 | self.assertListEqual(logits.get_shape().as_list(), 96 | [None, num_classes]) 97 | images = tf.random_uniform((batch_size, height, width, 3)) 98 | sess.run(tf.global_variables_initializer()) 99 | output = sess.run(logits, {inputs: images.eval()}) 100 | self.assertEquals(output.shape, (batch_size, num_classes)) 101 | 102 | def testEvaluation(self): 103 | batch_size = 2 104 | height, width = 299, 299 105 | num_classes = 1000 106 | with self.test_session() as sess: 107 | eval_inputs = tf.random_uniform((batch_size, height, width, 3)) 108 | logits, _ = inception.inception_v3(eval_inputs, num_classes, 109 | is_training=False) 110 | predictions = tf.argmax(logits, 1) 111 | sess.run(tf.global_variables_initializer()) 112 | output = sess.run(predictions) 113 | self.assertEquals(output.shape, (batch_size,)) 114 | 115 | def testTrainEvalWithReuse(self): 116 | train_batch_size = 5 117 | eval_batch_size = 2 118 | height, width = 150, 150 119 | num_classes = 1000 120 | with self.test_session() as sess: 121 | train_inputs = tf.random_uniform((train_batch_size, height, width, 3)) 122 | inception.inception_v3(train_inputs, num_classes) 123 | tf.get_variable_scope().reuse_variables() 124 | eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3)) 125 | logits, _ = inception.inception_v3(eval_inputs, num_classes, 126 | is_training=False) 127 | predictions = tf.argmax(logits, 1) 128 | sess.run(tf.global_variables_initializer()) 129 | output = sess.run(predictions) 130 | self.assertEquals(output.shape, (eval_batch_size,)) 131 | 132 | 133 | if __name__ == '__main__': 134 | tf.test.main() 135 | -------------------------------------------------------------------------------- /terngrad/inception/slim/inception_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains common code shared by all inception models. 16 | 17 | Usage of arg scope: 18 | with slim.arg_scope(inception_arg_scope()): 19 | logits, end_points = inception.inception_v3(images, num_classes, 20 | is_training=is_training) 21 | 22 | """ 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | 27 | import tensorflow as tf 28 | 29 | slim = tf.contrib.slim 30 | 31 | 32 | def inception_arg_scope(weight_decay=0.00004, 33 | use_batch_norm=True, 34 | batch_norm_decay=0.9997, 35 | batch_norm_epsilon=0.001): 36 | """Defines the default arg scope for inception models. 37 | 38 | Args: 39 | weight_decay: The weight decay to use for regularizing the model. 40 | use_batch_norm: "If `True`, batch_norm is applied after each convolution. 41 | batch_norm_decay: Decay for batch norm moving average. 42 | batch_norm_epsilon: Small float added to variance to avoid dividing by zero 43 | in batch norm. 44 | 45 | Returns: 46 | An `arg_scope` to use for the inception models. 47 | """ 48 | batch_norm_params = { 49 | # Decay for the moving averages. 50 | 'decay': batch_norm_decay, 51 | # epsilon to prevent 0s in variance. 52 | 'epsilon': batch_norm_epsilon, 53 | # collection containing update_ops. 54 | 'updates_collections': tf.GraphKeys.UPDATE_OPS, 55 | } 56 | if use_batch_norm: 57 | normalizer_fn = slim.batch_norm 58 | normalizer_params = batch_norm_params 59 | else: 60 | normalizer_fn = None 61 | normalizer_params = {} 62 | # Set weight_decay for weights in Conv and FC layers. 63 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 64 | weights_regularizer=slim.l2_regularizer(weight_decay)): 65 | with slim.arg_scope( 66 | [slim.conv2d], 67 | weights_initializer=slim.variance_scaling_initializer(), 68 | activation_fn=tf.nn.relu, 69 | normalizer_fn=normalizer_fn, 70 | normalizer_params=normalizer_params) as sc: 71 | return sc 72 | -------------------------------------------------------------------------------- /terngrad/inception/slim/scopes.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains the new arg_scope used for TF-Slim ops. 16 | 17 | Allows one to define models much more compactly by eliminating boilerplate 18 | code. This is accomplished through the use of argument scoping (arg_scope). 19 | 20 | Example of how to use scopes.arg_scope: 21 | 22 | with scopes.arg_scope(ops.conv2d, padding='SAME', 23 | stddev=0.01, weight_decay=0.0005): 24 | net = ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') 25 | net = ops.conv2d(net, 256, [5, 5], scope='conv2') 26 | 27 | The first call to conv2d will overwrite padding: 28 | ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', 29 | stddev=0.01, weight_decay=0.0005, scope='conv1') 30 | 31 | The second call to Conv will use predefined args: 32 | ops.conv2d(inputs, 256, [5, 5], padding='SAME', 33 | stddev=0.01, weight_decay=0.0005, scope='conv2') 34 | 35 | Example of how to reuse an arg_scope: 36 | with scopes.arg_scope(ops.conv2d, padding='SAME', 37 | stddev=0.01, weight_decay=0.0005) as conv2d_arg_scope: 38 | net = ops.conv2d(net, 256, [5, 5], scope='conv1') 39 | .... 40 | 41 | with scopes.arg_scope(conv2d_arg_scope): 42 | net = ops.conv2d(net, 256, [5, 5], scope='conv2') 43 | 44 | Example of how to use scopes.add_arg_scope: 45 | 46 | @scopes.add_arg_scope 47 | def conv2d(*args, **kwargs) 48 | """ 49 | from __future__ import absolute_import 50 | from __future__ import division 51 | from __future__ import print_function 52 | 53 | import contextlib 54 | import functools 55 | 56 | from tensorflow.python.framework import ops 57 | 58 | _ARGSTACK_KEY = ("__arg_stack",) 59 | 60 | _DECORATED_OPS = set() 61 | 62 | 63 | def _get_arg_stack(): 64 | stack = ops.get_collection(_ARGSTACK_KEY) 65 | if stack: 66 | return stack[0] 67 | else: 68 | stack = [{}] 69 | ops.add_to_collection(_ARGSTACK_KEY, stack) 70 | return stack 71 | 72 | 73 | def _current_arg_scope(): 74 | stack = _get_arg_stack() 75 | return stack[-1] 76 | 77 | 78 | def _add_op(op): 79 | key_op = (op.__module__, op.__name__) 80 | if key_op not in _DECORATED_OPS: 81 | _DECORATED_OPS.add(key_op) 82 | 83 | 84 | @contextlib.contextmanager 85 | def arg_scope(list_ops_or_scope, **kwargs): 86 | """Stores the default arguments for the given set of list_ops. 87 | 88 | For usage, please see examples at top of the file. 89 | 90 | Args: 91 | list_ops_or_scope: List or tuple of operations to set argument scope for or 92 | a dictionary containg the current scope. When list_ops_or_scope is a dict, 93 | kwargs must be empty. When list_ops_or_scope is a list or tuple, then 94 | every op in it need to be decorated with @add_arg_scope to work. 95 | **kwargs: keyword=value that will define the defaults for each op in 96 | list_ops. All the ops need to accept the given set of arguments. 97 | 98 | Yields: 99 | the current_scope, which is a dictionary of {op: {arg: value}} 100 | Raises: 101 | TypeError: if list_ops is not a list or a tuple. 102 | ValueError: if any op in list_ops has not be decorated with @add_arg_scope. 103 | """ 104 | if isinstance(list_ops_or_scope, dict): 105 | # Assumes that list_ops_or_scope is a scope that is being reused. 106 | if kwargs: 107 | raise ValueError("When attempting to re-use a scope by suppling a" 108 | "dictionary, kwargs must be empty.") 109 | current_scope = list_ops_or_scope.copy() 110 | try: 111 | _get_arg_stack().append(current_scope) 112 | yield current_scope 113 | finally: 114 | _get_arg_stack().pop() 115 | else: 116 | # Assumes that list_ops_or_scope is a list/tuple of ops with kwargs. 117 | if not isinstance(list_ops_or_scope, (list, tuple)): 118 | raise TypeError("list_ops_or_scope must either be a list/tuple or reused" 119 | "scope (i.e. dict)") 120 | try: 121 | current_scope = _current_arg_scope().copy() 122 | for op in list_ops_or_scope: 123 | key_op = (op.__module__, op.__name__) 124 | if not has_arg_scope(op): 125 | raise ValueError("%s is not decorated with @add_arg_scope", key_op) 126 | if key_op in current_scope: 127 | current_kwargs = current_scope[key_op].copy() 128 | current_kwargs.update(kwargs) 129 | current_scope[key_op] = current_kwargs 130 | else: 131 | current_scope[key_op] = kwargs.copy() 132 | _get_arg_stack().append(current_scope) 133 | yield current_scope 134 | finally: 135 | _get_arg_stack().pop() 136 | 137 | 138 | def add_arg_scope(func): 139 | """Decorates a function with args so it can be used within an arg_scope. 140 | 141 | Args: 142 | func: function to decorate. 143 | 144 | Returns: 145 | A tuple with the decorated function func_with_args(). 146 | """ 147 | @functools.wraps(func) 148 | def func_with_args(*args, **kwargs): 149 | current_scope = _current_arg_scope() 150 | current_args = kwargs 151 | key_func = (func.__module__, func.__name__) 152 | if key_func in current_scope: 153 | current_args = current_scope[key_func].copy() 154 | current_args.update(kwargs) 155 | return func(*args, **current_args) 156 | _add_op(func) 157 | return func_with_args 158 | 159 | 160 | def has_arg_scope(func): 161 | """Checks whether a func has been decorated with @add_arg_scope or not. 162 | 163 | Args: 164 | func: function to check. 165 | 166 | Returns: 167 | a boolean. 168 | """ 169 | key_op = (func.__module__, func.__name__) 170 | return key_op in _DECORATED_OPS 171 | -------------------------------------------------------------------------------- /terngrad/inception/slim/slim.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """TF-Slim grouped API. Please see README.md for details and usage.""" 16 | # pylint: disable=unused-import 17 | 18 | # Collapse tf-slim into a single namespace. 19 | from inception.slim import inception_model as inception 20 | from inception.slim import models as models 21 | from inception.slim import losses 22 | from inception.slim import ops 23 | from inception.slim import scopes 24 | from inception.slim import variables 25 | from inception.slim.scopes import arg_scope 26 | -------------------------------------------------------------------------------- /terngrad/kill_local.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | threadid=$( ps aux | grep python | grep distributed_train | grep ${USER} | awk '{print $2}') 3 | if [[ "$threadid" =~ ^-?[0-9]+.*$ ]] ; 4 | then 5 | kill $threadid 6 | else 7 | echo "Stopped already." 8 | fi 9 | -------------------------------------------------------------------------------- /terngrad/run_dist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | #set -x 4 | 5 | . ./config_dist.sh 6 | 7 | WORKER_STRING=$(echo ${WORKER_HOSTS[*]} | sed 's/ /,/g') 8 | PS_STRING=$(echo ${PS_HOSTS[*]} | sed 's/ /,/g') 9 | EXPERIMENT_ID=$(date) 10 | EXPERIMENT_ID=${EXPERIMENT_ID// /_} 11 | EXPERIMENT_ID=${EXPERIMENT_ID//:/-} 12 | 13 | PS_NUM=${#PS_HOSTS[@]} 14 | WORKER_NUM=${#WORKER_HOSTS[@]} 15 | DEVICE_NUM=${#WORKER_DEVICES[@]} 16 | DATA_NUM=${#DATA_DIR[@]} 17 | if [ ${WORKER_NUM} -ne ${DEVICE_NUM} ] 18 | then 19 | echo "The number of workers (${WORKER_NUM}) does not match the number of devices (${DEVICE_NUM})" 20 | exit 21 | fi 22 | if [ ${WORKER_NUM} -ne ${DATA_NUM} ] 23 | then 24 | echo "The number of workers (${WORKER_NUM}) does not match the number of data paths (${DATA_NUM})" 25 | exit 26 | fi 27 | 28 | echo "${PS_NUM} ps hosts: ${PS_STRING}" 29 | echo "${WORKER_NUM} worker hosts: ${WORKER_STRING}" 30 | 31 | # start workers 32 | task_id=0 33 | for HOST in ${WORKER_HOSTS[*]}; do 34 | worker=$(echo ${HOST} |cut -d':' -f1) 35 | ssh ${worker} "hostname; \ 36 | cd ${WORKSPACE}; \ 37 | pwd; \ 38 | ${WORKER_SCRIPT} ${PS_STRING} ${WORKER_STRING} worker ${task_id} ${WORKER_DEVICES[$task_id]} ${DATA_DIR[$task_id]} ${EXPERIMENT_ID}" 39 | task_id=`expr $task_id + 1` 40 | done 41 | 42 | # start ps 43 | task_id=0 44 | for HOST in ${PS_HOSTS[*]}; do 45 | ps=$(echo ${HOST} |cut -d':' -f1) 46 | ssh ${ps} "hostname; \ 47 | cd ${WORKSPACE}; \ 48 | pwd; \ 49 | ${PS_SCRIPT} ${PS_STRING} ${WORKER_STRING} ps ${task_id} ${EXPERIMENT_ID}" 50 | task_id=`expr $task_id + 1` 51 | done 52 | 53 | -------------------------------------------------------------------------------- /terngrad/run_dist_cifar10.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | set -e 4 | PS=localhost 5 | WORKER1=localhost 6 | WORKER2=localhost 7 | 8 | export CUDA_VISIBLE_DEVICES=1 9 | bazel-bin/inception/cifar10_distributed_train \ 10 | --optimizer adam \ 11 | --initial_learning_rate 0.0002 \ 12 | --batch_size 64 \ 13 | --num_epochs_per_decay 200 \ 14 | --max_steps 300000 \ 15 | --seed 123 \ 16 | --weight_decay 0.004 \ 17 | --net cifar10_alexnet \ 18 | --image_size 24 \ 19 | --data_dir="$HOME/dataset/cifar10-data-shard-500-999" \ 20 | --job_name='worker' \ 21 | --task_id=1 \ 22 | --ps_hosts="$PS:2222" \ 23 | --worker_hosts="${WORKER1}:2224,${WORKER2}:2226" \ 24 | --train_dir=/tmp/cifar10_distributed_train & 25 | 26 | export CUDA_VISIBLE_DEVICES=0 27 | bazel-bin/inception/cifar10_distributed_train \ 28 | --optimizer adam \ 29 | --initial_learning_rate 0.0002 \ 30 | --batch_size 64 \ 31 | --num_epochs_per_decay 200 \ 32 | --max_steps 300000 \ 33 | --seed 123 \ 34 | --weight_decay 0.004 \ 35 | --net cifar10_alexnet \ 36 | --image_size 24 \ 37 | --data_dir="$HOME/dataset/cifar10-data-shard-0-499" \ 38 | --job_name='worker' \ 39 | --task_id=0 \ 40 | --ps_hosts="$PS:2222" \ 41 | --worker_hosts="${WORKER1}:2224,${WORKER2}:2226" \ 42 | --train_dir=/tmp/cifar10_distributed_train & 43 | 44 | export CUDA_VISIBLE_DEVICES=1 45 | bazel-bin/inception/cifar10_distributed_train \ 46 | --job_name='ps' \ 47 | --task_id=0 \ 48 | --ps_hosts="$PS:2222" \ 49 | --worker_hosts="${WORKER1}:2224,${WORKER2}:2226" & 50 | -------------------------------------------------------------------------------- /terngrad/run_multi_gpus_alexnet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | set -x 4 | 5 | DATASET_NAME=imagenet # imagenet or cifar10 6 | ROOT_WORKSPACE=${HOME}/dataset/results/imagenet # the location to store tf.summary and logs 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location 8 | FINETUNED_MODEL_PATH= 9 | NUM_GPUS=8 10 | #export CUDA_VISIBLE_DEVICES=0,1 # specify visible gpus to tensorflow 11 | OPTIMIZER=momentum 12 | NET=alexnet 13 | IMAGE_SIZE=224 14 | GRAD_BITS=1 15 | BASE_LR=0.04 16 | CLIP_FACTOR=2.5 # 0.0 means no clipping 17 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back 18 | FLOATING_GRAD_EPOCH=0 # 0 means no switching 19 | WEIGHT_DECAY=0.0005 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004 20 | DROPOUT_KEEP_PROB=0.8 # The probability to keep in dropout 21 | MOMENTUM=0.9 22 | SIZE_TO_BINARIZE=9217 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1 23 | TRAIN_BATCH_SIZE=1024 # total batch size 24 | VAL_BATCH_SIZE=50 # set smaller to avoid OOM 25 | NUM_EPOCHS_PER_DECAY=20 # per decay learning rate 26 | MAX_STEPS=92500 27 | VAL_TOWER=0 # -1 for cpu 28 | EVAL_INTERVAL_SECS=900 # seconds to evaluate the accuracy 29 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0" 30 | RESTORE_AVG_VAR=True # use the moving average parameters to eval? 31 | SEED=123 # use ${RANDOM} if no duplicable results are required 32 | 33 | if [ ! -d "$ROOT_WORKSPACE" ]; then 34 | echo "${ROOT_WORKSPACE} does not exsit!" 35 | exit 36 | fi 37 | 38 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/ 39 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/ 40 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/ 41 | if [ ! -d "${INFO_WORKSPACE}" ]; then 42 | echo "Creating ${INFO_WORKSPACE} ..." 43 | mkdir -p ${INFO_WORKSPACE} 44 | fi 45 | current_time=$(date) 46 | current_time=${current_time// /_} 47 | current_time=${current_time//:/-} 48 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_GPUS}_${current_time} 49 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME} 50 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME} 51 | if [ ! -d "$TRAIN_DIR" ]; then 52 | echo "Creating ${TRAIN_DIR} ..." 53 | mkdir -p ${TRAIN_DIR} 54 | fi 55 | if [ ! -d "$EVAL_DIR" ]; then 56 | echo "Creating ${EVAL_DIR} ..." 57 | mkdir -p ${EVAL_DIR} 58 | fi 59 | 60 | bazel-bin/inception/${DATASET_NAME}_eval \ 61 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \ 62 | --device ${EVAL_DEVICE} \ 63 | --restore_avg_var ${RESTORE_AVG_VAR} \ 64 | --data_dir ${DATA_DIR} \ 65 | --net ${NET} \ 66 | --image_size ${IMAGE_SIZE} \ 67 | --batch_size ${VAL_BATCH_SIZE} \ 68 | --max_steps ${MAX_STEPS} \ 69 | --checkpoint_dir ${TRAIN_DIR} \ 70 | --tower ${VAL_TOWER} \ 71 | --eval_dir ${EVAL_DIR} > ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 & 72 | 73 | bazel-bin/inception/${DATASET_NAME}_train \ 74 | --seed ${SEED} \ 75 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \ 76 | --num_epochs_per_decay ${NUM_EPOCHS_PER_DECAY} \ 77 | --initial_learning_rate ${BASE_LR} \ 78 | --grad_bits ${GRAD_BITS} \ 79 | --clip_factor ${CLIP_FACTOR} \ 80 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \ 81 | --weight_decay ${WEIGHT_DECAY} \ 82 | --dropout_keep_prob ${DROPOUT_KEEP_PROB} \ 83 | --momentum ${MOMENTUM} \ 84 | --size_to_binarize ${SIZE_TO_BINARIZE} \ 85 | --optimizer ${OPTIMIZER} \ 86 | --net ${NET} \ 87 | --image_size ${IMAGE_SIZE} \ 88 | --num_gpus ${NUM_GPUS} \ 89 | --batch_size ${TRAIN_BATCH_SIZE} \ 90 | --max_steps ${MAX_STEPS} \ 91 | --train_dir ${TRAIN_DIR} \ 92 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 & 93 | -------------------------------------------------------------------------------- /terngrad/run_multi_gpus_cifar10.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | set -x 4 | 5 | DATASET_NAME=cifar10 # imagenet or cifar10 6 | ROOT_WORKSPACE=${HOME}/dataset/results/cifar10/ # the location to store summary and logs 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location 8 | FINETUNED_MODEL_PATH= 9 | NUM_GPUS=2 # num of physical gpus 10 | export CUDA_VISIBLE_DEVICES=0,1 # specify visible gpus to tensorflow 11 | NUM_NODES=2 # num of virtual nodes on physical gpus 12 | OPTIMIZER=adam 13 | NET=cifar10_alexnet 14 | IMAGE_SIZE=24 15 | GRAD_BITS=1 16 | BASE_LR=0.0002 17 | CLIP_FACTOR=2.5 # 0.0 means no clipping 18 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back 19 | FLOATING_GRAD_EPOCH=0 # 0 means no switching 20 | WEIGHT_DECAY=0.004 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004 21 | MOMENTUM=0.9 22 | SIZE_TO_BINARIZE=1 # The min size of variable to enable binarizing. e.g., 385 means biases are excluded from binarizing 23 | TRAIN_BATCH_SIZE=128 # total batch size 24 | SAVE_ITER=2000 # Save summaries and checkpoint per iterations 25 | QUANTIZE_LOGITS=True # If quantize the gradients in the last logits layer. 26 | VAL_BATCH_SIZE=50 # set smaller to avoid OOM 27 | NUM_EPOCHS_PER_DECAY=200 28 | MAX_STEPS=300000 29 | VAL_TOWER=0 # -1 for cpu 30 | EVAL_INTERVAL_SECS=10 31 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0" 32 | RESTORE_AVG_VAR=True # use the moving average parameters to eval? 33 | SEED=123 # use ${RANDOM} if no duplicable results are required 34 | 35 | if [ ! -d "$ROOT_WORKSPACE" ]; then 36 | echo "${ROOT_WORKSPACE} does not exsit!" 37 | exit 38 | fi 39 | 40 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/ 41 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/ 42 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/ 43 | if [ ! -d "${INFO_WORKSPACE}" ]; then 44 | echo "Creating ${INFO_WORKSPACE} ..." 45 | mkdir -p ${INFO_WORKSPACE} 46 | fi 47 | current_time=$(date) 48 | current_time=${current_time// /_} 49 | current_time=${current_time//:/-} 50 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_NODES}_${current_time} 51 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME} 52 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME} 53 | if [ ! -d "$TRAIN_DIR" ]; then 54 | echo "Creating ${TRAIN_DIR} ..." 55 | mkdir -p ${TRAIN_DIR} 56 | fi 57 | if [ ! -d "$EVAL_DIR" ]; then 58 | echo "Creating ${EVAL_DIR} ..." 59 | mkdir -p ${EVAL_DIR} 60 | fi 61 | 62 | bazel-bin/inception/${DATASET_NAME}_eval \ 63 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \ 64 | --device ${EVAL_DEVICE} \ 65 | --restore_avg_var ${RESTORE_AVG_VAR} \ 66 | --data_dir ${DATA_DIR} \ 67 | --subset "test" \ 68 | --net ${NET} \ 69 | --image_size ${IMAGE_SIZE} \ 70 | --batch_size ${VAL_BATCH_SIZE} \ 71 | --max_steps ${MAX_STEPS} \ 72 | --checkpoint_dir ${TRAIN_DIR} \ 73 | --tower ${VAL_TOWER} \ 74 | --eval_dir ${EVAL_DIR} > ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 & 75 | 76 | bazel-bin/inception/${DATASET_NAME}_train \ 77 | --seed ${SEED} \ 78 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \ 79 | --num_epochs_per_decay ${NUM_EPOCHS_PER_DECAY} \ 80 | --initial_learning_rate ${BASE_LR} \ 81 | --grad_bits ${GRAD_BITS} \ 82 | --clip_factor ${CLIP_FACTOR} \ 83 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \ 84 | --weight_decay ${WEIGHT_DECAY} \ 85 | --momentum ${MOMENTUM} \ 86 | --size_to_binarize ${SIZE_TO_BINARIZE} \ 87 | --optimizer ${OPTIMIZER} \ 88 | --net ${NET} \ 89 | --image_size ${IMAGE_SIZE} \ 90 | --num_gpus ${NUM_GPUS} \ 91 | --num_nodes ${NUM_NODES} \ 92 | --batch_size ${TRAIN_BATCH_SIZE} \ 93 | --save_iter ${SAVE_ITER} \ 94 | --quantize_logits ${QUANTIZE_LOGITS} \ 95 | --max_steps ${MAX_STEPS} \ 96 | --train_dir ${TRAIN_DIR} \ 97 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 & 98 | -------------------------------------------------------------------------------- /terngrad/run_multi_gpus_googlenet_quick.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | set -x 4 | 5 | DATASET_NAME=imagenet # imagenet or cifar10 6 | ROOT_WORKSPACE=${HOME}/dataset/results/imagenet # the location to store tf.summary and logs 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location 8 | FINETUNED_MODEL_PATH= 9 | NUM_GPUS=2 10 | export CUDA_VISIBLE_DEVICES=0,1 # specify visible gpus to tensorflow 11 | NUM_NODES=2 # num of virtual nodes on physical gpus 12 | OPTIMIZER=momentum 13 | NET=googlenet 14 | IMAGE_SIZE=224 15 | GRAD_BITS=32 16 | BASE_LR=0.04 17 | CLIP_FACTOR=0.0 # 0.0 means no clipping 18 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back 19 | FLOATING_GRAD_EPOCH=0 # 0 means no switching 20 | WEIGHT_DECAY=0.00004 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004 21 | DROPOUT_KEEP_PROB=0.8 # The probability to keep in dropout 22 | MOMENTUM=0.9 23 | LR_DECAY_TYPE="polynomial" # learning rate decay type 24 | SIZE_TO_BINARIZE=1 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1 25 | TRAIN_BATCH_SIZE=128 # total batch size 26 | VAL_BATCH_SIZE=25 # set smaller to avoid OOM 27 | MAX_STEPS=600000 28 | VAL_TOWER=0 # -1 for cpu 29 | EVAL_INTERVAL_SECS=1800 # seconds to evaluate the accuracy 30 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0" 31 | RESTORE_AVG_VAR=True # use the moving average parameters to eval? 32 | SEED=123 # use ${RANDOM} if no duplicable results are required 33 | 34 | if [ ! -d "$ROOT_WORKSPACE" ]; then 35 | echo "${ROOT_WORKSPACE} does not exsit!" 36 | exit 37 | fi 38 | 39 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/ 40 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/ 41 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/ 42 | if [ ! -d "${INFO_WORKSPACE}" ]; then 43 | echo "Creating ${INFO_WORKSPACE} ..." 44 | mkdir -p ${INFO_WORKSPACE} 45 | fi 46 | current_time=$(date) 47 | current_time=${current_time// /_} 48 | current_time=${current_time//:/-} 49 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_GPUS}_${NUM_NODES}_${current_time} 50 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME} 51 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME} 52 | if [ ! -d "$TRAIN_DIR" ]; then 53 | echo "Creating ${TRAIN_DIR} ..." 54 | mkdir -p ${TRAIN_DIR} 55 | fi 56 | if [ ! -d "$EVAL_DIR" ]; then 57 | echo "Creating ${EVAL_DIR} ..." 58 | mkdir -p ${EVAL_DIR} 59 | fi 60 | 61 | bazel-bin/inception/${DATASET_NAME}_eval \ 62 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \ 63 | --device ${EVAL_DEVICE} \ 64 | --restore_avg_var ${RESTORE_AVG_VAR} \ 65 | --data_dir ${DATA_DIR} \ 66 | --net ${NET} \ 67 | --image_size ${IMAGE_SIZE} \ 68 | --batch_size ${VAL_BATCH_SIZE} \ 69 | --max_steps ${MAX_STEPS} \ 70 | --checkpoint_dir ${TRAIN_DIR} \ 71 | --tower ${VAL_TOWER} \ 72 | --eval_dir ${EVAL_DIR} > ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 & 73 | 74 | bazel-bin/inception/${DATASET_NAME}_train \ 75 | --seed ${SEED} \ 76 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \ 77 | --initial_learning_rate ${BASE_LR} \ 78 | --grad_bits ${GRAD_BITS} \ 79 | --clip_factor ${CLIP_FACTOR} \ 80 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \ 81 | --weight_decay ${WEIGHT_DECAY} \ 82 | --dropout_keep_prob ${DROPOUT_KEEP_PROB} \ 83 | --momentum ${MOMENTUM} \ 84 | --learning_rate_decay_type ${LR_DECAY_TYPE} \ 85 | --size_to_binarize ${SIZE_TO_BINARIZE} \ 86 | --optimizer ${OPTIMIZER} \ 87 | --net ${NET} \ 88 | --image_size ${IMAGE_SIZE} \ 89 | --num_gpus ${NUM_GPUS} \ 90 | --num_nodes ${NUM_NODES} \ 91 | --batch_size ${TRAIN_BATCH_SIZE} \ 92 | --max_steps ${MAX_STEPS} \ 93 | --train_dir ${TRAIN_DIR} \ 94 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 & 95 | -------------------------------------------------------------------------------- /terngrad/run_multi_gpus_lenet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | set -x 4 | 5 | DATASET_NAME=mnist # imagenet or cifar10 or mnist 6 | ROOT_WORKSPACE=${HOME}/dataset/results/${DATASET_NAME} # the location to store tf.summary and logs 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location 8 | FINETUNED_MODEL_PATH= 9 | NUM_GPUS=2 # num of physical gpus 10 | export CUDA_VISIBLE_DEVICES=0,1 # specify visible gpus to tensorflow 11 | NUM_NODES=4 # num of virtual nodes on physical gpus 12 | OPTIMIZER=momentum 13 | NET=lenet 14 | IMAGE_SIZE=28 15 | GRAD_BITS=32 16 | BASE_LR=0.01 17 | CLIP_FACTOR=0.0 # 0.0 means no clipping 18 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back 19 | FLOATING_GRAD_EPOCH=0 # 0 means no switching 20 | WEIGHT_DECAY=0.0005 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004 21 | MOMENTUM=0.9 22 | LR_DECAY_TYPE="polynomial" # learning rate decay type 23 | SIZE_TO_BINARIZE=1 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1 24 | TRAIN_BATCH_SIZE=64 # total batch size 25 | SAVE_ITER=200 # Save summaries and checkpoint per iterations 26 | QUANTIZE_LOGITS=True # If quantize the gradients in the last logits layer. 27 | VAL_BATCH_SIZE=100 # set smaller to avoid OOM 28 | MAX_STEPS=10000 29 | VAL_TOWER=0 # -1 for cpu 30 | EVAL_INTERVAL_SECS=1 # seconds to evaluate the accuracy 31 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0" 32 | RESTORE_AVG_VAR=True # use the moving average parameters to eval? 33 | SEED=123 # use ${RANDOM} if no duplicable results are required 34 | 35 | if [ ! -d "$ROOT_WORKSPACE" ]; then 36 | echo "${ROOT_WORKSPACE} does not exsit!" 37 | exit 38 | fi 39 | 40 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/ 41 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/ 42 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/ 43 | if [ ! -d "${INFO_WORKSPACE}" ]; then 44 | echo "Creating ${INFO_WORKSPACE} ..." 45 | mkdir -p ${INFO_WORKSPACE} 46 | fi 47 | current_time=$(date) 48 | current_time=${current_time// /_} 49 | current_time=${current_time//:/-} 50 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_NODES}_${current_time} 51 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME} 52 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME} 53 | if [ ! -d "$TRAIN_DIR" ]; then 54 | echo "Creating ${TRAIN_DIR} ..." 55 | mkdir -p ${TRAIN_DIR} 56 | fi 57 | if [ ! -d "$EVAL_DIR" ]; then 58 | echo "Creating ${EVAL_DIR} ..." 59 | mkdir -p ${EVAL_DIR} 60 | fi 61 | 62 | bazel-bin/inception/${DATASET_NAME}_eval \ 63 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \ 64 | --device ${EVAL_DEVICE} \ 65 | --restore_avg_var ${RESTORE_AVG_VAR} \ 66 | --data_dir ${DATA_DIR} \ 67 | --subset "test" \ 68 | --net ${NET} \ 69 | --image_size ${IMAGE_SIZE} \ 70 | --batch_size ${VAL_BATCH_SIZE} \ 71 | --checkpoint_dir ${TRAIN_DIR} \ 72 | --max_steps ${MAX_STEPS} \ 73 | --tower ${VAL_TOWER} \ 74 | --eval_dir ${EVAL_DIR} > ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 & 75 | 76 | bazel-bin/inception/${DATASET_NAME}_train \ 77 | --seed ${SEED} \ 78 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \ 79 | --initial_learning_rate ${BASE_LR} \ 80 | --grad_bits ${GRAD_BITS} \ 81 | --clip_factor ${CLIP_FACTOR} \ 82 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \ 83 | --weight_decay ${WEIGHT_DECAY} \ 84 | --momentum ${MOMENTUM} \ 85 | --learning_rate_decay_type ${LR_DECAY_TYPE} \ 86 | --size_to_binarize ${SIZE_TO_BINARIZE} \ 87 | --optimizer ${OPTIMIZER} \ 88 | --net ${NET} \ 89 | --image_size ${IMAGE_SIZE} \ 90 | --num_gpus ${NUM_GPUS} \ 91 | --num_nodes ${NUM_NODES} \ 92 | --batch_size ${TRAIN_BATCH_SIZE} \ 93 | --save_iter ${SAVE_ITER} \ 94 | --quantize_logits ${QUANTIZE_LOGITS} \ 95 | --max_steps ${MAX_STEPS} \ 96 | --train_dir ${TRAIN_DIR} \ 97 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 & 98 | -------------------------------------------------------------------------------- /terngrad/run_single_ps_cifar10.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | . ~/.bashrc 3 | set -e 4 | set -x 5 | 6 | if [ "$#" -ne 5 ]; then 7 | echo "Illegal number of parameters" 8 | echo "Usage: $0 PS_HOSTS WORKER_HOSTS JOB_NAME TASK_ID EXPERIMENT_ID" 9 | exit 10 | fi 11 | 12 | # cluster and task 13 | PS_HOSTS=$1 14 | WORKER_HOSTS=$2 15 | JOB_NAME=$3 16 | TASK_ID=$4 17 | EXPERIMENT_ID=$5 18 | 19 | if [ "${JOB_NAME}" != "ps" ] 20 | then 21 | echo "JOB_NAME(${JOB_NAME}) is not ps" 22 | exit 23 | fi 24 | 25 | DATASET_NAME=cifar10 # imagenet or cifar10 26 | INFO_WORKSPACE=${HOME}/tmp/${DATASET_NAME}_info/ 27 | if [ ! -d "${INFO_WORKSPACE}" ]; then 28 | echo "Creating ${INFO_WORKSPACE} ..." 29 | mkdir -p ${INFO_WORKSPACE} 30 | fi 31 | LOG_FILE=${INFO_WORKSPACE}/${EXPERIMENT_ID}_${JOB_NAME}_${TASK_ID}.log 32 | 33 | bazel-bin/inception/${DATASET_NAME}_distributed_train \ 34 | --job_name ${JOB_NAME} \ 35 | --task_id ${TASK_ID} \ 36 | --ps_hosts ${PS_HOSTS} \ 37 | --worker_hosts ${WORKER_HOSTS} > ${LOG_FILE} 2>&1 & 38 | -------------------------------------------------------------------------------- /terngrad/run_single_ps_imagenet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | set -x 4 | 5 | if [ "$#" -ne 5 ]; then 6 | echo "Illegal number of parameters" 7 | echo "Usage: $0 PS_HOSTS WORKER_HOSTS JOB_NAME TASK_ID EXPERIMENT_ID" 8 | exit 9 | fi 10 | 11 | # cluster and task 12 | PS_HOSTS=$1 13 | WORKER_HOSTS=$2 14 | JOB_NAME=$3 15 | TASK_ID=$4 16 | EXPERIMENT_ID=$5 17 | 18 | if [ "${JOB_NAME}" != "ps" ] 19 | then 20 | echo "JOB_NAME(${JOB_NAME}) is not ps" 21 | exit 22 | fi 23 | 24 | DATASET_NAME=imagenet # imagenet or cifar10 25 | INFO_WORKSPACE=${HOME}/tmp/${DATASET_NAME}_info/ 26 | if [ ! -d "${INFO_WORKSPACE}" ]; then 27 | echo "Creating ${INFO_WORKSPACE} ..." 28 | mkdir -p ${INFO_WORKSPACE} 29 | fi 30 | LOG_FILE=${INFO_WORKSPACE}/${EXPERIMENT_ID}_${JOB_NAME}_${TASK_ID}.log 31 | 32 | bazel-bin/inception/${DATASET_NAME}_distributed_train \ 33 | --job_name ${JOB_NAME} \ 34 | --task_id ${TASK_ID} \ 35 | --ps_hosts ${PS_HOSTS} \ 36 | --worker_hosts ${WORKER_HOSTS} > ${LOG_FILE} 2>&1 & 37 | -------------------------------------------------------------------------------- /terngrad/run_single_worker_alexnet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | set -x 4 | 5 | if [ "$#" -ne 7 ]; then 6 | echo "Illegal number of parameters" 7 | echo "Usage: $0 PS_HOSTS WORKER_HOSTS JOB_NAME TASK_ID DEVICE DATA_DIR EXPERIMENT_ID" 8 | exit 9 | fi 10 | 11 | # cluster and task 12 | PS_HOSTS=$1 13 | WORKER_HOSTS=$2 14 | JOB_NAME=$3 15 | TASK_ID=$4 16 | DEVICE=$5 17 | DATA_DIR=$6 # dataset location 18 | EXPERIMENT_ID=$7 19 | 20 | if [ "${JOB_NAME}" != "worker" ] 21 | then 22 | echo "JOB_NAME(${JOB_NAME}) is not worker" 23 | exit 24 | fi 25 | 26 | DATASET_NAME=imagenet # imagenet or cifar10 27 | ROOT_WORKSPACE=${HOME}/tmp/ # the location to store tf.summary and logs 28 | FINETUNED_MODEL_PATH= 29 | OPTIMIZER=momentum 30 | NET=alexnet 31 | IMAGE_SIZE=224 32 | GRAD_BITS=32 33 | BASE_LR=0.02 34 | CLIP_FACTOR=0.0 # 0.0 means no clipping 35 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back 36 | FLOATING_GRAD_EPOCH=0 # 0 means no switching 37 | WEIGHT_DECAY=0.0005 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004 38 | DROPOUT_KEEP_PROB=0.5 # The probability to keep in dropout 39 | MOMENTUM=0.9 40 | SIZE_TO_BINARIZE=9217 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1 41 | TRAIN_BATCH_SIZE=128 # batch size per node 42 | NUM_EPOCHS_PER_DECAY=20 # per decay learning rate 43 | MAX_STEPS=185000 44 | SEED=123 # use ${RANDOM} if no duplicable results are required 45 | 46 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/ 47 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/ 48 | if [ ! -d "${INFO_WORKSPACE}" ]; then 49 | echo "Creating ${INFO_WORKSPACE} ..." 50 | mkdir -p ${INFO_WORKSPACE} 51 | fi 52 | FOLDER_NAME=${EXPERIMENT_ID}_${JOB_NAME}_${TASK_ID} 53 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME} 54 | if [ ! -d "$TRAIN_DIR" ]; then 55 | echo "Creating ${TRAIN_DIR} ..." 56 | mkdir -p ${TRAIN_DIR} 57 | fi 58 | 59 | export CUDA_VISIBLE_DEVICES=${DEVICE} # specify visible gpus to tensorflow 60 | bazel-bin/inception/${DATASET_NAME}_distributed_train \ 61 | --seed ${SEED} \ 62 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \ 63 | --num_epochs_per_decay ${NUM_EPOCHS_PER_DECAY} \ 64 | --initial_learning_rate ${BASE_LR} \ 65 | --grad_bits ${GRAD_BITS} \ 66 | --clip_factor ${CLIP_FACTOR} \ 67 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \ 68 | --weight_decay ${WEIGHT_DECAY} \ 69 | --dropout_keep_prob ${DROPOUT_KEEP_PROB} \ 70 | --momentum ${MOMENTUM} \ 71 | --size_to_binarize ${SIZE_TO_BINARIZE} \ 72 | --optimizer ${OPTIMIZER} \ 73 | --net ${NET} \ 74 | --image_size ${IMAGE_SIZE} \ 75 | --batch_size ${TRAIN_BATCH_SIZE} \ 76 | --max_steps ${MAX_STEPS} \ 77 | --train_dir ${TRAIN_DIR} \ 78 | --job_name ${JOB_NAME} \ 79 | --task_id ${TASK_ID} \ 80 | --ps_hosts ${PS_HOSTS} \ 81 | --worker_hosts ${WORKER_HOSTS} \ 82 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/${FOLDER_NAME}.log 2>&1 & 83 | -------------------------------------------------------------------------------- /terngrad/run_single_worker_cifarnet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | . ~/.bashrc 3 | 4 | set -e 5 | set -x 6 | 7 | if [ "$#" -ne 7 ]; then 8 | echo "Illegal number of parameters" 9 | echo "Usage: $0 PS_HOSTS WORKER_HOSTS JOB_NAME TASK_ID DEVICE DATA_DIR EXPERIMENT_ID" 10 | exit 11 | fi 12 | 13 | # cluster and task 14 | PS_HOSTS=$1 15 | WORKER_HOSTS=$2 16 | JOB_NAME=$3 17 | TASK_ID=$4 18 | DEVICE=$5 19 | DATA_DIR=$6 # dataset location 20 | EXPERIMENT_ID=$7 21 | 22 | if [ "${JOB_NAME}" != "worker" ] 23 | then 24 | echo "JOB_NAME(${JOB_NAME}) is not worker" 25 | exit 26 | fi 27 | 28 | DATASET_NAME=cifar10 # imagenet or cifar10 29 | ROOT_WORKSPACE=${HOME}/tmp/ # the location to store tf.summary and logs 30 | FINETUNED_MODEL_PATH= 31 | OPTIMIZER=adam 32 | NET=cifar10_alexnet 33 | IMAGE_SIZE=24 34 | GRAD_BITS=32 35 | BASE_LR=0.0002 36 | CLIP_FACTOR=0.0 # 0.0 means no clipping 37 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back 38 | FLOATING_GRAD_EPOCH=0 # 0 means no switching 39 | WEIGHT_DECAY=0.004 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004 40 | MOMENTUM=0.9 41 | SIZE_TO_BINARIZE=1 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1 42 | TRAIN_BATCH_SIZE=64 # batch size per node 43 | NUM_EPOCHS_PER_DECAY=200 # per decay learning rate 44 | MAX_STEPS=300000 45 | SEED=123 # use ${RANDOM} if no duplicable results are required 46 | 47 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/ 48 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/ 49 | if [ ! -d "${INFO_WORKSPACE}" ]; then 50 | echo "Creating ${INFO_WORKSPACE} ..." 51 | mkdir -p ${INFO_WORKSPACE} 52 | fi 53 | FOLDER_NAME=${EXPERIMENT_ID}_${JOB_NAME}_${TASK_ID} 54 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME} 55 | if [ ! -d "$TRAIN_DIR" ]; then 56 | echo "Creating ${TRAIN_DIR} ..." 57 | mkdir -p ${TRAIN_DIR} 58 | fi 59 | 60 | export CUDA_VISIBLE_DEVICES=${DEVICE} # specify visible gpus to tensorflow 61 | bazel-bin/inception/${DATASET_NAME}_distributed_train \ 62 | --seed ${SEED} \ 63 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \ 64 | --num_epochs_per_decay ${NUM_EPOCHS_PER_DECAY} \ 65 | --initial_learning_rate ${BASE_LR} \ 66 | --grad_bits ${GRAD_BITS} \ 67 | --clip_factor ${CLIP_FACTOR} \ 68 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \ 69 | --weight_decay ${WEIGHT_DECAY} \ 70 | --momentum ${MOMENTUM} \ 71 | --size_to_binarize ${SIZE_TO_BINARIZE} \ 72 | --optimizer ${OPTIMIZER} \ 73 | --net ${NET} \ 74 | --image_size ${IMAGE_SIZE} \ 75 | --batch_size ${TRAIN_BATCH_SIZE} \ 76 | --max_steps ${MAX_STEPS} \ 77 | --train_dir ${TRAIN_DIR} \ 78 | --job_name ${JOB_NAME} \ 79 | --task_id ${TASK_ID} \ 80 | --ps_hosts ${PS_HOSTS} \ 81 | --worker_hosts ${WORKER_HOSTS} \ 82 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/${FOLDER_NAME}.log 2>&1 & 83 | -------------------------------------------------------------------------------- /terngrad/split_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -e 3 | #set -x 4 | 5 | if [ "$#" -ne 3 ]; then 6 | echo "Illegal number of parameters" 7 | echo "Usage: $0 DATA_DIR WORKER_NUM WORKER_ID" 8 | exit 9 | fi 10 | 11 | DATA_DIR=$1 12 | WORKER_NUM=$2 13 | WORKER_ID=$3 14 | 15 | if [ "${WORKER_ID}" -ge ${WORKER_NUM} ] || [ "${WORKER_ID}" -lt 0 ] ; then 16 | echo "WORKER_ID between [0,WORKER_NUM)" 17 | exit 18 | fi 19 | 20 | SPLIT_DIR=${DATA_DIR}/worker_${WORKER_ID}_of_${WORKER_NUM} 21 | if [ ! -d "$SPLIT_DIR" ]; then 22 | export total_files=$( ls -l ${DATA_DIR}/train-* | wc -l ) 23 | split_size=$( expr ${total_files} / ${WORKER_NUM} ) 24 | remainder=$( expr ${total_files} % ${WORKER_NUM} ) 25 | if [ "${remainder}" -ne 0 ]; then 26 | echo "Dataset cannot be evenly split" 27 | exit 28 | fi 29 | echo "Splitting to ${SPLIT_DIR} ..." 30 | mkdir ${SPLIT_DIR} 31 | cd ${SPLIT_DIR} 32 | files=$( ls -dl ${DATA_DIR}/train-*|head -n $( expr $( expr ${WORKER_ID} + 1 ) * ${split_size} ) | tail -n ${split_size} | awk '{print $9}') 33 | 34 | for file in ${files}; do 35 | ln -s ${file}; 36 | done 37 | 38 | else 39 | echo "${SPLIT_DIR} exists." 40 | fi 41 | 42 | -------------------------------------------------------------------------------- /terngrad/stop_dist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | set -x 4 | 5 | . ./config_dist.sh 6 | 7 | WORKER_NUM=${#WORKER_HOSTS[@]} 8 | DEVICE_NUM=${#WORKER_DEVICES[@]} 9 | DATA_NUM=${#DATA_DIR[@]} 10 | if [ ${WORKER_NUM} -ne ${DEVICE_NUM} ] 11 | then 12 | echo "The number of workers (${WORKER_NUM}) does not match the number of devices (${DEVICE_NUM})" 13 | exit 14 | fi 15 | if [ ${WORKER_NUM} -ne ${DATA_NUM} ] 16 | then 17 | echo "The number of workers (${WORKER_NUM}) does not match the number of data paths (${DATA_NUM})" 18 | exit 19 | fi 20 | 21 | 22 | # stop workers 23 | for HOST in ${WORKER_HOSTS[*]}; do 24 | worker=$(echo ${HOST} |cut -d':' -f1) 25 | ssh ${worker} "hostname; \ 26 | cd ${WORKSPACE}; \ 27 | pwd; \ 28 | ./kill_local.sh " 29 | done 30 | 31 | # stop ps 32 | for HOST in ${PS_HOSTS[*]}; do 33 | ps=$(echo ${HOST} |cut -d':' -f1) 34 | ssh ${ps} "hostname; \ 35 | cd ${WORKSPACE}; \ 36 | pwd; \ 37 | ./kill_local.sh " 38 | done 39 | 40 | -------------------------------------------------------------------------------- /terngrad/test/test_ternary_encoder_decoder.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def ternary_encoder(input_data): 4 | """Encoding and compressing the signs """ 5 | a = tf.sign(input_data) # -1, 0, 1 6 | a = tf.add(a,1) # shift -1,0,1 to 0,1,2 (2'b00,2'b01,2'b10) 7 | a = tf.reshape(a,[-1]) 8 | pad_size = 4 - tf.mod(tf.size(a), 4) 9 | pad = tf.range(0.0, pad_size) 10 | a = tf.concat([a, pad], 0) 11 | a_split1, a_split2, a_split3, a_split4 = tf.split(a,4) # assume the size is dividable by 4 12 | 13 | # encode 4 grads into 1 Byte 14 | sum_1 = tf.add(a_split1, a_split2*4) 15 | sum_2 = tf.add(a_split3*16, a_split4*64) 16 | sum_all = tf.add(sum_1, sum_2) 17 | encoded = tf.cast(sum_all, tf.uint8) 18 | return encoded 19 | 20 | def ternary_decoder(encoded_data, scaler, shape): 21 | """Decoding the signs to float format """ 22 | a = tf.cast(encoded_data, tf.int32) 23 | a_split1 = tf.mod(a,4) 24 | a_split2 = tf.to_int32(tf.mod(a/4,4)) 25 | a_split3 = tf.to_int32(tf.mod(a/16,4)) 26 | a_split4 = tf.to_int32(tf.mod(a/64,4)) 27 | a = tf.concat([a_split1, a_split2, a_split3, a_split4], 0) 28 | real_size = tf.reduce_prod(shape) 29 | a = tf.to_float(a) 30 | a = tf.gather(a, tf.range(0,real_size)) 31 | a = tf.reshape(a, shape) 32 | a = tf.subtract(a, 1) 33 | decoded = a*scaler 34 | return decoded 35 | 36 | shape=[33, 33, 33, 333] 37 | scaler=0.002 38 | with tf.device('/gpu:1'): 39 | # binary gradient generator 40 | gradient = tf.random_normal(shape, stddev=0.001, name='a') 41 | zeros = tf.zeros(shape) 42 | abs_gradient = tf.abs(gradient) 43 | sign_gradient = tf.sign( gradient ) 44 | rnd_sample = tf.random_uniform(shape,0,scaler) 45 | where_cond = tf.less(rnd_sample, abs_gradient) 46 | bin_gradient = tf.where(where_cond, sign_gradient * scaler, zeros) 47 | 48 | # encoder: -1 0 1 49 | encoded_a = ternary_encoder(bin_gradient) 50 | 51 | with tf.device('/gpu:0'): 52 | # decoder 53 | decoded_a = ternary_decoder(encoded_a, scaler, shape) 54 | 55 | err = tf.reduce_sum( tf.squared_difference(bin_gradient, decoded_a) ) 56 | 57 | config = tf.ConfigProto() 58 | config.gpu_options.allow_growth = True 59 | config.log_device_placement = True 60 | #config.allow_soft_placement = True 61 | with tf.Session(config=config) as sess: 62 | for i in range(2000): 63 | res = sess.run(err) 64 | print i, res 65 | --------------------------------------------------------------------------------