├── .gitignore
├── LICENSE
├── NIPS17-TernGrad-slides-v3.pdf
├── Poster_Wen_NIPS2017.pdf
├── README.md
├── slim
    ├── BUILD
    ├── README.md
    ├── __init__.py
    ├── datasets
    │   ├── __init__.py
    │   ├── cifar10.py
    │   ├── dataset_factory.py
    │   ├── dataset_utils.py
    │   ├── download_and_convert_cifar10.py
    │   ├── download_and_convert_flowers.py
    │   ├── download_and_convert_mnist.py
    │   ├── download_convert_and_shard_cifar10.py
    │   ├── flowers.py
    │   ├── imagenet.py
    │   └── mnist.py
    ├── deployment
    │   ├── __init__.py
    │   ├── model_deploy.py
    │   └── model_deploy_test.py
    ├── download_and_convert_data.py
    ├── eval_image_classifier.py
    ├── nets
    │   ├── __init__.py
    │   ├── alexnet.py
    │   ├── alexnet_test.py
    │   ├── cifarnet.py
    │   ├── inception.py
    │   ├── inception_resnet_v2.py
    │   ├── inception_resnet_v2_test.py
    │   ├── inception_utils.py
    │   ├── inception_v1.py
    │   ├── inception_v1_test.py
    │   ├── inception_v2.py
    │   ├── inception_v2_test.py
    │   ├── inception_v3.py
    │   ├── inception_v3_test.py
    │   ├── inception_v4.py
    │   ├── inception_v4_test.py
    │   ├── lenet.py
    │   ├── nets_factory.py
    │   ├── nets_factory_test.py
    │   ├── overfeat.py
    │   ├── overfeat_test.py
    │   ├── resnet_utils.py
    │   ├── resnet_v1.py
    │   ├── resnet_v1_test.py
    │   ├── resnet_v2.py
    │   ├── resnet_v2_test.py
    │   ├── vgg.py
    │   └── vgg_test.py
    ├── preprocessing
    │   ├── __init__.py
    │   ├── cifarnet_preprocessing.py
    │   ├── inception_preprocessing.py
    │   ├── lenet_preprocessing.py
    │   ├── preprocessing_factory.py
    │   └── vgg_preprocessing.py
    ├── scripts
    │   ├── finetune_inception_v1_on_flowers.sh
    │   ├── finetune_inception_v3_on_flowers.sh
    │   ├── finetune_resnet_v1_50_on_flowers.sh
    │   ├── train_cifarnet_on_cifar10.sh
    │   └── train_lenet_on_mnist.sh
    ├── slim_walkthrough.ipynb
    └── train_image_classifier.py
└── terngrad
    ├── WORKSPACE
    ├── build_all.sh
    ├── config_dist.sh
    ├── deprecated
        ├── run_multi_gpus.sh
        ├── run_multi_gpus_cifar10_quick.sh
        ├── run_multi_gpus_googlenet.sh
        └── run_multi_gpus_vggnet.sh
    ├── g3doc
        └── inception_v3_architecture.png
    ├── inception
        ├── BUILD
        ├── __init__.py
        ├── bingrad_common.py
        ├── cifar10_data.py
        ├── cifar10_distributed_train.py
        ├── cifar10_eval.py
        ├── cifar10_train.py
        ├── data
        │   ├── __init__.py
        │   ├── build_image_data.py
        │   ├── build_imagenet_data.py
        │   ├── download_and_preprocess_flowers.sh
        │   ├── download_and_preprocess_flowers_mac.sh
        │   ├── download_and_preprocess_imagenet.sh
        │   ├── download_imagenet.sh
        │   ├── imagenet_2012_validation_synset_labels.txt
        │   ├── imagenet_lsvrc_2015_synsets.txt
        │   ├── imagenet_metadata.txt
        │   ├── preprocess_imagenet_validation_data.py
        │   └── process_bounding_boxes.py
        ├── dataset.py
        ├── flowers_data.py
        ├── flowers_eval.py
        ├── flowers_train.py
        ├── image_processing.py
        ├── imagenet_data.py
        ├── imagenet_distributed_train.py
        ├── imagenet_eval.py
        ├── imagenet_train.py
        ├── inception_distributed_train.py
        ├── inception_eval.py
        ├── inception_model.py
        ├── inception_train.py
        ├── lenet_preprocessing.py
        ├── mnist_data.py
        ├── mnist_eval.py
        ├── mnist_train.py
        ├── models.py
        ├── slim
        │   ├── BUILD
        │   ├── README.md
        │   ├── __init__.py
        │   ├── collections_test.py
        │   ├── inception_model.py
        │   ├── inception_test.py
        │   ├── inception_utils.py
        │   ├── inception_v1.py
        │   ├── losses.py
        │   ├── losses_test.py
        │   ├── models.py
        │   ├── ops.py
        │   ├── ops_test.py
        │   ├── scopes.py
        │   ├── scopes_test.py
        │   ├── slim.py
        │   ├── variables.py
        │   └── variables_test.py
        └── vgg_preprocessing.py
    ├── kill_local.sh
    ├── run_dist.sh
    ├── run_dist_cifar10.sh
    ├── run_multi_gpus_alexnet.sh
    ├── run_multi_gpus_cifar10.sh
    ├── run_multi_gpus_googlenet_quick.sh
    ├── run_multi_gpus_lenet.sh
    ├── run_single_ps_cifar10.sh
    ├── run_single_ps_imagenet.sh
    ├── run_single_worker_alexnet.sh
    ├── run_single_worker_cifarnet.sh
    ├── serialize_tensorboard.py
    ├── split_dataset.sh
    ├── stop_dist.sh
    └── test
        └── test_ternary_encoder_decoder.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | terngrad/bazel-bin
 2 | terngrad/bazel-ci_build-cache
 3 | terngrad/bazel-genfiles
 4 | terngrad/bazel-out
 5 | terngrad/bazel-inception
 6 | terngrad/bazel-terngrad
 7 | terngrad/bazel-testlogs
 8 | terngrad/bazel-tf
 9 | /.idea
10 | */.idea
11 | *.pyc
12 | 


--------------------------------------------------------------------------------
/NIPS17-TernGrad-slides-v3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenwei202/terngrad/ec4f75e9a3a1e1c4b2e6494d830fbdfdd2e03ddc/NIPS17-TernGrad-slides-v3.pdf


--------------------------------------------------------------------------------
/Poster_Wen_NIPS2017.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenwei202/terngrad/ec4f75e9a3a1e1c4b2e6494d830fbdfdd2e03ddc/Poster_Wen_NIPS2017.pdf


--------------------------------------------------------------------------------
/slim/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/slim/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/slim/datasets/cifar10.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Provides data for the Cifar10 dataset.
16 | 
17 | The dataset scripts used to create the dataset can be found at:
18 | tensorflow/models/slim/datasets/download_and_convert_cifar10.py
19 | """
20 | 
21 | from __future__ import absolute_import
22 | from __future__ import division
23 | from __future__ import print_function
24 | 
25 | import os
26 | import tensorflow as tf
27 | 
28 | from datasets import dataset_utils
29 | 
30 | slim = tf.contrib.slim
31 | 
32 | _FILE_PATTERN = 'cifar10_%s.tfrecord'
33 | 
34 | SPLITS_TO_SIZES = {'train': 50000, 'test': 10000}
35 | 
36 | _NUM_CLASSES = 10
37 | 
38 | _ITEMS_TO_DESCRIPTIONS = {
39 |     'image': 'A [32 x 32 x 3] color image.',
40 |     'label': 'A single integer between 0 and 9',
41 | }
42 | 
43 | 
44 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
45 |   """Gets a dataset tuple with instructions for reading cifar10.
46 | 
47 |   Args:
48 |     split_name: A train/test split name.
49 |     dataset_dir: The base directory of the dataset sources.
50 |     file_pattern: The file pattern to use when matching the dataset sources.
51 |       It is assumed that the pattern contains a '%s' string so that the split
52 |       name can be inserted.
53 |     reader: The TensorFlow reader type.
54 | 
55 |   Returns:
56 |     A `Dataset` namedtuple.
57 | 
58 |   Raises:
59 |     ValueError: if `split_name` is not a valid train/test split.
60 |   """
61 |   if split_name not in SPLITS_TO_SIZES:
62 |     raise ValueError('split name %s was not recognized.' % split_name)
63 | 
64 |   if not file_pattern:
65 |     file_pattern = _FILE_PATTERN
66 |   file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
67 | 
68 |   # Allowing None in the signature so that dataset_factory can use the default.
69 |   if not reader:
70 |     reader = tf.TFRecordReader
71 | 
72 |   keys_to_features = {
73 |       'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
74 |       'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),
75 |       'image/class/label': tf.FixedLenFeature(
76 |           [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
77 |   }
78 | 
79 |   items_to_handlers = {
80 |       'image': slim.tfexample_decoder.Image(shape=[32, 32, 3]),
81 |       'label': slim.tfexample_decoder.Tensor('image/class/label'),
82 |   }
83 | 
84 |   decoder = slim.tfexample_decoder.TFExampleDecoder(
85 |       keys_to_features, items_to_handlers)
86 | 
87 |   labels_to_names = None
88 |   if dataset_utils.has_labels(dataset_dir):
89 |     labels_to_names = dataset_utils.read_label_file(dataset_dir)
90 | 
91 |   return slim.dataset.Dataset(
92 |       data_sources=file_pattern,
93 |       reader=reader,
94 |       decoder=decoder,
95 |       num_samples=SPLITS_TO_SIZES[split_name],
96 |       items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
97 |       num_classes=_NUM_CLASSES,
98 |       labels_to_names=labels_to_names)
99 | 


--------------------------------------------------------------------------------
/slim/datasets/dataset_factory.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """A factory-pattern class which returns classification image/label pairs."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | from datasets import cifar10
22 | from datasets import flowers
23 | from datasets import imagenet
24 | from datasets import mnist
25 | 
26 | datasets_map = {
27 |     'cifar10': cifar10,
28 |     'flowers': flowers,
29 |     'imagenet': imagenet,
30 |     'mnist': mnist,
31 | }
32 | 
33 | 
34 | def get_dataset(name, split_name, dataset_dir, file_pattern=None, reader=None):
35 |   """Given a dataset name and a split_name returns a Dataset.
36 | 
37 |   Args:
38 |     name: String, the name of the dataset.
39 |     split_name: A train/test split name.
40 |     dataset_dir: The directory where the dataset files are stored.
41 |     file_pattern: The file pattern to use for matching the dataset source files.
42 |     reader: The subclass of tf.ReaderBase. If left as `None`, then the default
43 |       reader defined by each dataset is used.
44 | 
45 |   Returns:
46 |     A `Dataset` class.
47 | 
48 |   Raises:
49 |     ValueError: If the dataset `name` is unknown.
50 |   """
51 |   if name not in datasets_map:
52 |     raise ValueError('Name of dataset unknown %s' % name)
53 |   return datasets_map[name].get_split(
54 |       split_name,
55 |       dataset_dir,
56 |       file_pattern,
57 |       reader)
58 | 


--------------------------------------------------------------------------------
/slim/datasets/dataset_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains utilities for downloading and converting datasets."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import os
 21 | import sys
 22 | import tarfile
 23 | 
 24 | from six.moves import urllib
 25 | import tensorflow as tf
 26 | 
 27 | LABELS_FILENAME = 'labels.txt'
 28 | 
 29 | 
 30 | def int64_feature(values):
 31 |   """Returns a TF-Feature of int64s.
 32 | 
 33 |   Args:
 34 |     values: A scalar or list of values.
 35 | 
 36 |   Returns:
 37 |     a TF-Feature.
 38 |   """
 39 |   if not isinstance(values, (tuple, list)):
 40 |     values = [values]
 41 |   return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
 42 | 
 43 | def float_feature(value):
 44 |   """Wrapper for inserting float features into Example proto."""
 45 |   if not isinstance(value, list):
 46 |     value = [value]
 47 |   return tf.train.Feature(float_list=tf.train.FloatList(value=value))
 48 | 
 49 | def bytes_feature(values):
 50 |   """Returns a TF-Feature of bytes.
 51 | 
 52 |   Args:
 53 |     values: A string.
 54 | 
 55 |   Returns:
 56 |     a TF-Feature.
 57 |   """
 58 |   return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))
 59 | 
 60 | 
 61 | def image_to_tfexample(image_data, image_format, height, width, class_id, human_label='', channels=3):
 62 |   colorspace = b'RGB'
 63 |   #channels = 3
 64 |   xmin = []
 65 |   ymin = []
 66 |   xmax = []
 67 |   ymax = []
 68 |   return tf.train.Example(features=tf.train.Features(feature={
 69 |       'image/encoded': bytes_feature(image_data),
 70 |       'image/format': bytes_feature(image_format),
 71 |       'image/class/label': int64_feature(class_id),
 72 |       'image/height': int64_feature(height),
 73 |       'image/width': int64_feature(width),
 74 |       'image/colorspace': bytes_feature(colorspace),
 75 |       'image/channels': int64_feature(channels),
 76 |       'image/object/bbox/xmin': float_feature(xmin),
 77 |       'image/object/bbox/xmax': float_feature(xmax),
 78 |       'image/object/bbox/ymin': float_feature(ymin),
 79 |       'image/object/bbox/ymax': float_feature(ymax),
 80 |       'image/class/text': bytes_feature(human_label),
 81 | 
 82 |   }))
 83 | 
 84 | 
 85 | def download_and_uncompress_tarball(tarball_url, dataset_dir):
 86 |   """Downloads the `tarball_url` and uncompresses it locally.
 87 | 
 88 |   Args:
 89 |     tarball_url: The URL of a tarball file.
 90 |     dataset_dir: The directory where the temporary files are stored.
 91 |   """
 92 |   filename = tarball_url.split('/')[-1]
 93 |   filepath = os.path.join(dataset_dir, filename)
 94 | 
 95 |   def _progress(count, block_size, total_size):
 96 |     sys.stdout.write('\r>> Downloading %s %.1f%%' % (
 97 |         filename, float(count * block_size) / float(total_size) * 100.0))
 98 |     sys.stdout.flush()
 99 |   filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress)
100 |   print()
101 |   statinfo = os.stat(filepath)
102 |   print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
103 |   tarfile.open(filepath, 'r:gz').extractall(dataset_dir)
104 | 
105 | 
106 | def write_label_file(labels_to_class_names, dataset_dir,
107 |                      filename=LABELS_FILENAME):
108 |   """Writes a file with the list of class names.
109 | 
110 |   Args:
111 |     labels_to_class_names: A map of (integer) labels to class names.
112 |     dataset_dir: The directory in which the labels file should be written.
113 |     filename: The filename where the class names are written.
114 |   """
115 |   labels_filename = os.path.join(dataset_dir, filename)
116 |   with tf.gfile.Open(labels_filename, 'w') as f:
117 |     for label in labels_to_class_names:
118 |       class_name = labels_to_class_names[label]
119 |       f.write('%d:%s\n' % (label, class_name))
120 | 
121 | 
122 | def has_labels(dataset_dir, filename=LABELS_FILENAME):
123 |   """Specifies whether or not the dataset directory contains a label map file.
124 | 
125 |   Args:
126 |     dataset_dir: The directory in which the labels file is found.
127 |     filename: The filename where the class names are written.
128 | 
129 |   Returns:
130 |     `True` if the labels file exists and `False` otherwise.
131 |   """
132 |   return tf.gfile.Exists(os.path.join(dataset_dir, filename))
133 | 
134 | 
135 | def read_label_file(dataset_dir, filename=LABELS_FILENAME):
136 |   """Reads the labels file and returns a mapping from ID to class name.
137 | 
138 |   Args:
139 |     dataset_dir: The directory in which the labels file is found.
140 |     filename: The filename where the class names are written.
141 | 
142 |   Returns:
143 |     A map from a label (integer) to class name.
144 |   """
145 |   labels_filename = os.path.join(dataset_dir, filename)
146 |   with tf.gfile.Open(labels_filename, 'r') as f:
147 |     lines = f.read().decode()
148 |   lines = lines.split('\n')
149 |   lines = filter(None, lines)
150 | 
151 |   labels_to_class_names = {}
152 |   for line in lines:
153 |     index = line.index(':')
154 |     labels_to_class_names[int(line[:index])] = line[index+1:]
155 |   return labels_to_class_names
156 | 


--------------------------------------------------------------------------------
/slim/datasets/flowers.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Provides data for the flowers dataset.
16 | 
17 | The dataset scripts used to create the dataset can be found at:
18 | tensorflow/models/slim/datasets/download_and_convert_flowers.py
19 | """
20 | 
21 | from __future__ import absolute_import
22 | from __future__ import division
23 | from __future__ import print_function
24 | 
25 | import os
26 | import tensorflow as tf
27 | 
28 | from datasets import dataset_utils
29 | 
30 | slim = tf.contrib.slim
31 | 
32 | _FILE_PATTERN = 'flowers_%s_*.tfrecord'
33 | 
34 | SPLITS_TO_SIZES = {'train': 3320, 'validation': 350}
35 | 
36 | _NUM_CLASSES = 5
37 | 
38 | _ITEMS_TO_DESCRIPTIONS = {
39 |     'image': 'A color image of varying size.',
40 |     'label': 'A single integer between 0 and 4',
41 | }
42 | 
43 | 
44 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
45 |   """Gets a dataset tuple with instructions for reading flowers.
46 | 
47 |   Args:
48 |     split_name: A train/validation split name.
49 |     dataset_dir: The base directory of the dataset sources.
50 |     file_pattern: The file pattern to use when matching the dataset sources.
51 |       It is assumed that the pattern contains a '%s' string so that the split
52 |       name can be inserted.
53 |     reader: The TensorFlow reader type.
54 | 
55 |   Returns:
56 |     A `Dataset` namedtuple.
57 | 
58 |   Raises:
59 |     ValueError: if `split_name` is not a valid train/validation split.
60 |   """
61 |   if split_name not in SPLITS_TO_SIZES:
62 |     raise ValueError('split name %s was not recognized.' % split_name)
63 | 
64 |   if not file_pattern:
65 |     file_pattern = _FILE_PATTERN
66 |   file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
67 | 
68 |   # Allowing None in the signature so that dataset_factory can use the default.
69 |   if reader is None:
70 |     reader = tf.TFRecordReader
71 | 
72 |   keys_to_features = {
73 |       'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
74 |       'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),
75 |       'image/class/label': tf.FixedLenFeature(
76 |           [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
77 |   }
78 | 
79 |   items_to_handlers = {
80 |       'image': slim.tfexample_decoder.Image(),
81 |       'label': slim.tfexample_decoder.Tensor('image/class/label'),
82 |   }
83 | 
84 |   decoder = slim.tfexample_decoder.TFExampleDecoder(
85 |       keys_to_features, items_to_handlers)
86 | 
87 |   labels_to_names = None
88 |   if dataset_utils.has_labels(dataset_dir):
89 |     labels_to_names = dataset_utils.read_label_file(dataset_dir)
90 | 
91 |   return slim.dataset.Dataset(
92 |       data_sources=file_pattern,
93 |       reader=reader,
94 |       decoder=decoder,
95 |       num_samples=SPLITS_TO_SIZES[split_name],
96 |       items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
97 |       num_classes=_NUM_CLASSES,
98 |       labels_to_names=labels_to_names)
99 | 


--------------------------------------------------------------------------------
/slim/datasets/mnist.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Provides data for the MNIST dataset.
16 | 
17 | The dataset scripts used to create the dataset can be found at:
18 | tensorflow/models/slim/datasets/download_and_convert_mnist.py
19 | """
20 | 
21 | from __future__ import absolute_import
22 | from __future__ import division
23 | from __future__ import print_function
24 | 
25 | import os
26 | import tensorflow as tf
27 | 
28 | from datasets import dataset_utils
29 | 
30 | slim = tf.contrib.slim
31 | 
32 | _FILE_PATTERN = 'mnist_%s.tfrecord'
33 | 
34 | _SPLITS_TO_SIZES = {'train': 60000, 'test': 10000}
35 | 
36 | _NUM_CLASSES = 10
37 | 
38 | _ITEMS_TO_DESCRIPTIONS = {
39 |     'image': 'A [28 x 28 x 1] grayscale image.',
40 |     'label': 'A single integer between 0 and 9',
41 | }
42 | 
43 | 
44 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
45 |   """Gets a dataset tuple with instructions for reading MNIST.
46 | 
47 |   Args:
48 |     split_name: A train/test split name.
49 |     dataset_dir: The base directory of the dataset sources.
50 |     file_pattern: The file pattern to use when matching the dataset sources.
51 |       It is assumed that the pattern contains a '%s' string so that the split
52 |       name can be inserted.
53 |     reader: The TensorFlow reader type.
54 | 
55 |   Returns:
56 |     A `Dataset` namedtuple.
57 | 
58 |   Raises:
59 |     ValueError: if `split_name` is not a valid train/test split.
60 |   """
61 |   if split_name not in _SPLITS_TO_SIZES:
62 |     raise ValueError('split name %s was not recognized.' % split_name)
63 | 
64 |   if not file_pattern:
65 |     file_pattern = _FILE_PATTERN
66 |   file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
67 | 
68 |   # Allowing None in the signature so that dataset_factory can use the default.
69 |   if reader is None:
70 |     reader = tf.TFRecordReader
71 | 
72 |   keys_to_features = {
73 |       'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
74 |       'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'),
75 |       'image/class/label': tf.FixedLenFeature(
76 |           [1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)),
77 |   }
78 | 
79 |   items_to_handlers = {
80 |       'image': slim.tfexample_decoder.Image(shape=[28, 28, 1], channels=1),
81 |       'label': slim.tfexample_decoder.Tensor('image/class/label', shape=[]),
82 |   }
83 | 
84 |   decoder = slim.tfexample_decoder.TFExampleDecoder(
85 |       keys_to_features, items_to_handlers)
86 | 
87 |   labels_to_names = None
88 |   if dataset_utils.has_labels(dataset_dir):
89 |     labels_to_names = dataset_utils.read_label_file(dataset_dir)
90 | 
91 |   return slim.dataset.Dataset(
92 |       data_sources=file_pattern,
93 |       reader=reader,
94 |       decoder=decoder,
95 |       num_samples=_SPLITS_TO_SIZES[split_name],
96 |       num_classes=_NUM_CLASSES,
97 |       items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
98 |       labels_to_names=labels_to_names)
99 | 


--------------------------------------------------------------------------------
/slim/deployment/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/slim/download_and_convert_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | r"""Downloads and converts a particular dataset.
16 | 
17 | Usage:
18 | ```shell
19 | 
20 | $ python download_and_convert_data.py \
21 |     --dataset_name=mnist \
22 |     --dataset_dir=/tmp/mnist
23 | 
24 | $ python download_and_convert_data.py \
25 |     --dataset_name=cifar10 \
26 |     --dataset_dir=/tmp/cifar10
27 | 
28 | $ python download_and_convert_data.py \
29 |     --dataset_name=flowers \
30 |     --dataset_dir=/tmp/flowers
31 | ```
32 | """
33 | from __future__ import absolute_import
34 | from __future__ import division
35 | from __future__ import print_function
36 | 
37 | import tensorflow as tf
38 | 
39 | from datasets import download_and_convert_cifar10
40 | from datasets import download_convert_and_shard_cifar10
41 | from datasets import download_and_convert_flowers
42 | from datasets import download_and_convert_mnist
43 | 
44 | FLAGS = tf.app.flags.FLAGS
45 | 
46 | tf.app.flags.DEFINE_string(
47 |     'dataset_name',
48 |     None,
49 |     'The name of the dataset to convert, one of "cifar10", "flowers", "mnist".')
50 | 
51 | tf.app.flags.DEFINE_string(
52 |     'dataset_dir',
53 |     None,
54 |     'The directory where the output TFRecords and temporary files are saved.')
55 | 
56 | tf.app.flags.DEFINE_bool(
57 |     'shard',
58 |     False,
59 |     'If break cifar10 train dataset to multiple TFRecord shards.')
60 | 
61 | def main(_):
62 |   if not FLAGS.dataset_name:
63 |     raise ValueError('You must supply the dataset name with --dataset_name')
64 |   if not FLAGS.dataset_dir:
65 |     raise ValueError('You must supply the dataset directory with --dataset_dir')
66 | 
67 |   if FLAGS.dataset_name == 'cifar10':
68 |     if FLAGS.shard:
69 |       download_convert_and_shard_cifar10.run(FLAGS.dataset_dir)
70 |     else:
71 |       download_and_convert_cifar10.run(FLAGS.dataset_dir)
72 |   elif FLAGS.dataset_name == 'flowers':
73 |     download_and_convert_flowers.run(FLAGS.dataset_dir)
74 |   elif FLAGS.dataset_name == 'mnist':
75 |     download_and_convert_mnist.run(FLAGS.dataset_dir)
76 |   else:
77 |     raise ValueError(
78 |         'dataset_name [%s] was not recognized.' % FLAGS.dataset_dir)
79 | 
80 | if __name__ == '__main__':
81 |   tf.app.run()
82 | 
83 | 


--------------------------------------------------------------------------------
/slim/nets/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/slim/nets/alexnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains a model definition for AlexNet.
 16 | 
 17 | This work was first described in:
 18 |   ImageNet Classification with Deep Convolutional Neural Networks
 19 |   Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton
 20 | 
 21 | and later refined in:
 22 |   One weird trick for parallelizing convolutional neural networks
 23 |   Alex Krizhevsky, 2014
 24 | 
 25 | Here we provide the implementation proposed in "One weird trick" and not
 26 | "ImageNet Classification", as per the paper, the LRN layers have been removed.
 27 | 
 28 | Usage:
 29 |   with slim.arg_scope(alexnet.alexnet_v2_arg_scope()):
 30 |     outputs, end_points = alexnet.alexnet_v2(inputs)
 31 | 
 32 | @@alexnet_v2
 33 | """
 34 | 
 35 | from __future__ import absolute_import
 36 | from __future__ import division
 37 | from __future__ import print_function
 38 | 
 39 | import tensorflow as tf
 40 | 
 41 | slim = tf.contrib.slim
 42 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
 43 | 
 44 | 
 45 | def alexnet_v2_arg_scope(weight_decay=0.0005):
 46 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
 47 |                       activation_fn=tf.nn.relu,
 48 |                       biases_initializer=tf.constant_initializer(0.1),
 49 |                       weights_regularizer=slim.l2_regularizer(weight_decay)):
 50 |     with slim.arg_scope([slim.conv2d], padding='SAME'):
 51 |       with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
 52 |         return arg_sc
 53 | 
 54 | 
 55 | def alexnet_v2(inputs,
 56 |                num_classes=1000,
 57 |                is_training=True,
 58 |                dropout_keep_prob=0.5,
 59 |                spatial_squeeze=True,
 60 |                scope='alexnet_v2'):
 61 |   """AlexNet version 2.
 62 | 
 63 |   Described in: http://arxiv.org/pdf/1404.5997v2.pdf
 64 |   Parameters from:
 65 |   github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
 66 |   layers-imagenet-1gpu.cfg
 67 | 
 68 |   Note: All the fully_connected layers have been transformed to conv2d layers.
 69 |         To use in classification mode, resize input to 224x224. To use in fully
 70 |         convolutional mode, set spatial_squeeze to false.
 71 |         The LRN layers have been removed and change the initializers from
 72 |         random_normal_initializer to xavier_initializer.
 73 | 
 74 |   Args:
 75 |     inputs: a tensor of size [batch_size, height, width, channels].
 76 |     num_classes: number of predicted classes.
 77 |     is_training: whether or not the model is being trained.
 78 |     dropout_keep_prob: the probability that activations are kept in the dropout
 79 |       layers during training.
 80 |     spatial_squeeze: whether or not should squeeze the spatial dimensions of the
 81 |       outputs. Useful to remove unnecessary dimensions for classification.
 82 |     scope: Optional scope for the variables.
 83 | 
 84 |   Returns:
 85 |     the last op containing the log predictions and end_points dict.
 86 |   """
 87 |   with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
 88 |     end_points_collection = sc.name + '_end_points'
 89 |     # Collect outputs for conv2d, fully_connected and max_pool2d.
 90 |     with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
 91 |                         outputs_collections=[end_points_collection]):
 92 |       net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
 93 |                         scope='conv1')
 94 |       net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
 95 |       net = slim.conv2d(net, 192, [5, 5], scope='conv2')
 96 |       net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
 97 |       net = slim.conv2d(net, 384, [3, 3], scope='conv3')
 98 |       net = slim.conv2d(net, 384, [3, 3], scope='conv4')
 99 |       net = slim.conv2d(net, 256, [3, 3], scope='conv5')
100 |       net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')
101 | 
102 |       # Use conv2d instead of fully_connected layers.
103 |       with slim.arg_scope([slim.conv2d],
104 |                           weights_initializer=trunc_normal(0.005),
105 |                           biases_initializer=tf.constant_initializer(0.1)):
106 |         net = slim.conv2d(net, 4096, [5, 5], padding='VALID',
107 |                           scope='fc6')
108 |         net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
109 |                            scope='dropout6')
110 |         net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
111 |         net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
112 |                            scope='dropout7')
113 |         net = slim.conv2d(net, num_classes, [1, 1],
114 |                           activation_fn=None,
115 |                           normalizer_fn=None,
116 |                           biases_initializer=tf.zeros_initializer(),
117 |                           scope='fc8')
118 | 
119 |       # Convert end_points_collection into a end_point dict.
120 |       end_points = slim.utils.convert_collection_to_dict(end_points_collection)
121 |       if spatial_squeeze:
122 |         net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
123 |         end_points[sc.name + '/fc8'] = net
124 |       return net, end_points
125 | alexnet_v2.default_image_size = 224
126 | 


--------------------------------------------------------------------------------
/slim/nets/alexnet_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for slim.nets.alexnet."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | from nets import alexnet
 23 | 
 24 | slim = tf.contrib.slim
 25 | 
 26 | 
 27 | class AlexnetV2Test(tf.test.TestCase):
 28 | 
 29 |   def testBuild(self):
 30 |     batch_size = 5
 31 |     height, width = 224, 224
 32 |     num_classes = 1000
 33 |     with self.test_session():
 34 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 35 |       logits, _ = alexnet.alexnet_v2(inputs, num_classes)
 36 |       self.assertEquals(logits.op.name, 'alexnet_v2/fc8/squeezed')
 37 |       self.assertListEqual(logits.get_shape().as_list(),
 38 |                            [batch_size, num_classes])
 39 | 
 40 |   def testFullyConvolutional(self):
 41 |     batch_size = 1
 42 |     height, width = 300, 400
 43 |     num_classes = 1000
 44 |     with self.test_session():
 45 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 46 |       logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False)
 47 |       self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd')
 48 |       self.assertListEqual(logits.get_shape().as_list(),
 49 |                            [batch_size, 4, 7, num_classes])
 50 | 
 51 |   def testEndPoints(self):
 52 |     batch_size = 5
 53 |     height, width = 224, 224
 54 |     num_classes = 1000
 55 |     with self.test_session():
 56 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 57 |       _, end_points = alexnet.alexnet_v2(inputs, num_classes)
 58 |       expected_names = ['alexnet_v2/conv1',
 59 |                         'alexnet_v2/pool1',
 60 |                         'alexnet_v2/conv2',
 61 |                         'alexnet_v2/pool2',
 62 |                         'alexnet_v2/conv3',
 63 |                         'alexnet_v2/conv4',
 64 |                         'alexnet_v2/conv5',
 65 |                         'alexnet_v2/pool5',
 66 |                         'alexnet_v2/fc6',
 67 |                         'alexnet_v2/fc7',
 68 |                         'alexnet_v2/fc8'
 69 |                        ]
 70 |       self.assertSetEqual(set(end_points.keys()), set(expected_names))
 71 | 
 72 |   def testModelVariables(self):
 73 |     batch_size = 5
 74 |     height, width = 224, 224
 75 |     num_classes = 1000
 76 |     with self.test_session():
 77 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 78 |       alexnet.alexnet_v2(inputs, num_classes)
 79 |       expected_names = ['alexnet_v2/conv1/weights',
 80 |                         'alexnet_v2/conv1/biases',
 81 |                         'alexnet_v2/conv2/weights',
 82 |                         'alexnet_v2/conv2/biases',
 83 |                         'alexnet_v2/conv3/weights',
 84 |                         'alexnet_v2/conv3/biases',
 85 |                         'alexnet_v2/conv4/weights',
 86 |                         'alexnet_v2/conv4/biases',
 87 |                         'alexnet_v2/conv5/weights',
 88 |                         'alexnet_v2/conv5/biases',
 89 |                         'alexnet_v2/fc6/weights',
 90 |                         'alexnet_v2/fc6/biases',
 91 |                         'alexnet_v2/fc7/weights',
 92 |                         'alexnet_v2/fc7/biases',
 93 |                         'alexnet_v2/fc8/weights',
 94 |                         'alexnet_v2/fc8/biases',
 95 |                        ]
 96 |       model_variables = [v.op.name for v in slim.get_model_variables()]
 97 |       self.assertSetEqual(set(model_variables), set(expected_names))
 98 | 
 99 |   def testEvaluation(self):
100 |     batch_size = 2
101 |     height, width = 224, 224
102 |     num_classes = 1000
103 |     with self.test_session():
104 |       eval_inputs = tf.random_uniform((batch_size, height, width, 3))
105 |       logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False)
106 |       self.assertListEqual(logits.get_shape().as_list(),
107 |                            [batch_size, num_classes])
108 |       predictions = tf.argmax(logits, 1)
109 |       self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
110 | 
111 |   def testTrainEvalWithReuse(self):
112 |     train_batch_size = 2
113 |     eval_batch_size = 1
114 |     train_height, train_width = 224, 224
115 |     eval_height, eval_width = 300, 400
116 |     num_classes = 1000
117 |     with self.test_session():
118 |       train_inputs = tf.random_uniform(
119 |           (train_batch_size, train_height, train_width, 3))
120 |       logits, _ = alexnet.alexnet_v2(train_inputs)
121 |       self.assertListEqual(logits.get_shape().as_list(),
122 |                            [train_batch_size, num_classes])
123 |       tf.get_variable_scope().reuse_variables()
124 |       eval_inputs = tf.random_uniform(
125 |           (eval_batch_size, eval_height, eval_width, 3))
126 |       logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False,
127 |                                      spatial_squeeze=False)
128 |       self.assertListEqual(logits.get_shape().as_list(),
129 |                            [eval_batch_size, 4, 7, num_classes])
130 |       logits = tf.reduce_mean(logits, [1, 2])
131 |       predictions = tf.argmax(logits, 1)
132 |       self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
133 | 
134 |   def testForward(self):
135 |     batch_size = 1
136 |     height, width = 224, 224
137 |     with self.test_session() as sess:
138 |       inputs = tf.random_uniform((batch_size, height, width, 3))
139 |       logits, _ = alexnet.alexnet_v2(inputs)
140 |       sess.run(tf.global_variables_initializer())
141 |       output = sess.run(logits)
142 |       self.assertTrue(output.any())
143 | 
144 | if __name__ == '__main__':
145 |   tf.test.main()
146 | 


--------------------------------------------------------------------------------
/slim/nets/cifarnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains a variant of the CIFAR-10 model definition."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | 
 21 | import tensorflow as tf
 22 | 
 23 | slim = tf.contrib.slim
 24 | 
 25 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(stddev=stddev)
 26 | 
 27 | 
 28 | def cifarnet(images, num_classes=10, is_training=False,
 29 |              dropout_keep_prob=0.5,
 30 |              prediction_fn=slim.softmax,
 31 |              scope='CifarNet'):
 32 |   """Creates a variant of the CifarNet model.
 33 | 
 34 |   Note that since the output is a set of 'logits', the values fall in the
 35 |   interval of (-infinity, infinity). Consequently, to convert the outputs to a
 36 |   probability distribution over the characters, one will need to convert them
 37 |   using the softmax function:
 38 | 
 39 |         logits = cifarnet.cifarnet(images, is_training=False)
 40 |         probabilities = tf.nn.softmax(logits)
 41 |         predictions = tf.argmax(logits, 1)
 42 | 
 43 |   Args:
 44 |     images: A batch of `Tensors` of size [batch_size, height, width, channels].
 45 |     num_classes: the number of classes in the dataset.
 46 |     is_training: specifies whether or not we're currently training the model.
 47 |       This variable will determine the behaviour of the dropout layer.
 48 |     dropout_keep_prob: the percentage of activation values that are retained.
 49 |     prediction_fn: a function to get predictions out of logits.
 50 |     scope: Optional variable_scope.
 51 | 
 52 |   Returns:
 53 |     logits: the pre-softmax activations, a tensor of size
 54 |       [batch_size, `num_classes`]
 55 |     end_points: a dictionary from components of the network to the corresponding
 56 |       activation.
 57 |   """
 58 |   end_points = {}
 59 | 
 60 |   with tf.variable_scope(scope, 'CifarNet', [images, num_classes]):
 61 |     net = slim.conv2d(images, 64, [5, 5], scope='conv1')
 62 |     end_points['conv1'] = net
 63 |     net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
 64 |     end_points['pool1'] = net
 65 |     net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1')
 66 |     net = slim.conv2d(net, 64, [5, 5], scope='conv2')
 67 |     end_points['conv2'] = net
 68 |     net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2')
 69 |     net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
 70 |     end_points['pool2'] = net
 71 |     net = slim.flatten(net)
 72 |     end_points['Flatten'] = net
 73 |     net = slim.fully_connected(net, 384, scope='fc3')
 74 |     end_points['fc3'] = net
 75 |     net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
 76 |                        scope='dropout3')
 77 |     net = slim.fully_connected(net, 192, scope='fc4')
 78 |     end_points['fc4'] = net
 79 |     logits = slim.fully_connected(net, num_classes,
 80 |                                   biases_initializer=tf.zeros_initializer(),
 81 |                                   weights_initializer=trunc_normal(1/192.0),
 82 |                                   weights_regularizer=None,
 83 |                                   activation_fn=None,
 84 |                                   scope='logits')
 85 | 
 86 |     end_points['Logits'] = logits
 87 |     end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
 88 | 
 89 |   return logits, end_points
 90 | cifarnet.default_image_size = 32
 91 | 
 92 | 
 93 | def cifarnet_arg_scope(weight_decay=0.004):
 94 |   """Defines the default cifarnet argument scope.
 95 | 
 96 |   Args:
 97 |     weight_decay: The weight decay to use for regularizing the model.
 98 | 
 99 |   Returns:
100 |     An `arg_scope` to use for the inception v3 model.
101 |   """
102 |   with slim.arg_scope(
103 |       [slim.conv2d],
104 |       weights_initializer=tf.truncated_normal_initializer(stddev=5e-2),
105 |       activation_fn=tf.nn.relu):
106 |     with slim.arg_scope(
107 |         [slim.fully_connected],
108 |         biases_initializer=tf.constant_initializer(0.1),
109 |         weights_initializer=trunc_normal(0.04),
110 |         weights_regularizer=slim.l2_regularizer(weight_decay),
111 |         activation_fn=tf.nn.relu) as sc:
112 |       return sc
113 | 


--------------------------------------------------------------------------------
/slim/nets/inception.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Brings all inception models under one namespace."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | # pylint: disable=unused-import
22 | from nets.inception_resnet_v2 import inception_resnet_v2
23 | from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope
24 | from nets.inception_v1 import inception_v1
25 | from nets.inception_v1 import inception_v1_arg_scope
26 | from nets.inception_v1 import inception_v1_base
27 | from nets.inception_v2 import inception_v2
28 | from nets.inception_v2 import inception_v2_arg_scope
29 | from nets.inception_v2 import inception_v2_base
30 | from nets.inception_v3 import inception_v3
31 | from nets.inception_v3 import inception_v3_arg_scope
32 | from nets.inception_v3 import inception_v3_base
33 | from nets.inception_v4 import inception_v4
34 | from nets.inception_v4 import inception_v4_arg_scope
35 | from nets.inception_v4 import inception_v4_base
36 | # pylint: enable=unused-import
37 | 


--------------------------------------------------------------------------------
/slim/nets/inception_resnet_v2_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for slim.inception_resnet_v2."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | from nets import inception
 23 | 
 24 | 
 25 | class InceptionTest(tf.test.TestCase):
 26 | 
 27 |   def testBuildLogits(self):
 28 |     batch_size = 5
 29 |     height, width = 299, 299
 30 |     num_classes = 1000
 31 |     with self.test_session():
 32 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 33 |       logits, _ = inception.inception_resnet_v2(inputs, num_classes)
 34 |       self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
 35 |       self.assertListEqual(logits.get_shape().as_list(),
 36 |                            [batch_size, num_classes])
 37 | 
 38 |   def testBuildEndPoints(self):
 39 |     batch_size = 5
 40 |     height, width = 299, 299
 41 |     num_classes = 1000
 42 |     with self.test_session():
 43 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 44 |       _, end_points = inception.inception_resnet_v2(inputs, num_classes)
 45 |       self.assertTrue('Logits' in end_points)
 46 |       logits = end_points['Logits']
 47 |       self.assertListEqual(logits.get_shape().as_list(),
 48 |                            [batch_size, num_classes])
 49 |       self.assertTrue('AuxLogits' in end_points)
 50 |       aux_logits = end_points['AuxLogits']
 51 |       self.assertListEqual(aux_logits.get_shape().as_list(),
 52 |                            [batch_size, num_classes])
 53 |       pre_pool = end_points['PrePool']
 54 |       self.assertListEqual(pre_pool.get_shape().as_list(),
 55 |                            [batch_size, 8, 8, 1536])
 56 | 
 57 |   def testVariablesSetDevice(self):
 58 |     batch_size = 5
 59 |     height, width = 299, 299
 60 |     num_classes = 1000
 61 |     with self.test_session():
 62 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 63 |       # Force all Variables to reside on the device.
 64 |       with tf.variable_scope('on_cpu'), tf.device('/cpu:0'):
 65 |         inception.inception_resnet_v2(inputs, num_classes)
 66 |       with tf.variable_scope('on_gpu'), tf.device('/gpu:0'):
 67 |         inception.inception_resnet_v2(inputs, num_classes)
 68 |       for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_cpu'):
 69 |         self.assertDeviceEqual(v.device, '/cpu:0')
 70 |       for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_gpu'):
 71 |         self.assertDeviceEqual(v.device, '/gpu:0')
 72 | 
 73 |   def testHalfSizeImages(self):
 74 |     batch_size = 5
 75 |     height, width = 150, 150
 76 |     num_classes = 1000
 77 |     with self.test_session():
 78 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 79 |       logits, end_points = inception.inception_resnet_v2(inputs, num_classes)
 80 |       self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
 81 |       self.assertListEqual(logits.get_shape().as_list(),
 82 |                            [batch_size, num_classes])
 83 |       pre_pool = end_points['PrePool']
 84 |       self.assertListEqual(pre_pool.get_shape().as_list(),
 85 |                            [batch_size, 3, 3, 1536])
 86 | 
 87 |   def testUnknownBatchSize(self):
 88 |     batch_size = 1
 89 |     height, width = 299, 299
 90 |     num_classes = 1000
 91 |     with self.test_session() as sess:
 92 |       inputs = tf.placeholder(tf.float32, (None, height, width, 3))
 93 |       logits, _ = inception.inception_resnet_v2(inputs, num_classes)
 94 |       self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
 95 |       self.assertListEqual(logits.get_shape().as_list(),
 96 |                            [None, num_classes])
 97 |       images = tf.random_uniform((batch_size, height, width, 3))
 98 |       sess.run(tf.global_variables_initializer())
 99 |       output = sess.run(logits, {inputs: images.eval()})
100 |       self.assertEquals(output.shape, (batch_size, num_classes))
101 | 
102 |   def testEvaluation(self):
103 |     batch_size = 2
104 |     height, width = 299, 299
105 |     num_classes = 1000
106 |     with self.test_session() as sess:
107 |       eval_inputs = tf.random_uniform((batch_size, height, width, 3))
108 |       logits, _ = inception.inception_resnet_v2(eval_inputs,
109 |                                                 num_classes,
110 |                                                 is_training=False)
111 |       predictions = tf.argmax(logits, 1)
112 |       sess.run(tf.global_variables_initializer())
113 |       output = sess.run(predictions)
114 |       self.assertEquals(output.shape, (batch_size,))
115 | 
116 |   def testTrainEvalWithReuse(self):
117 |     train_batch_size = 5
118 |     eval_batch_size = 2
119 |     height, width = 150, 150
120 |     num_classes = 1000
121 |     with self.test_session() as sess:
122 |       train_inputs = tf.random_uniform((train_batch_size, height, width, 3))
123 |       inception.inception_resnet_v2(train_inputs, num_classes)
124 |       eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))
125 |       logits, _ = inception.inception_resnet_v2(eval_inputs,
126 |                                                 num_classes,
127 |                                                 is_training=False,
128 |                                                 reuse=True)
129 |       predictions = tf.argmax(logits, 1)
130 |       sess.run(tf.global_variables_initializer())
131 |       output = sess.run(predictions)
132 |       self.assertEquals(output.shape, (eval_batch_size,))
133 | 
134 | 
135 | if __name__ == '__main__':
136 |   tf.test.main()
137 | 


--------------------------------------------------------------------------------
/slim/nets/inception_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains common code shared by all inception models.
16 | 
17 | Usage of arg scope:
18 |   with slim.arg_scope(inception_arg_scope()):
19 |     logits, end_points = inception.inception_v3(images, num_classes,
20 |                                                 is_training=is_training)
21 | 
22 | """
23 | from __future__ import absolute_import
24 | from __future__ import division
25 | from __future__ import print_function
26 | 
27 | import tensorflow as tf
28 | 
29 | slim = tf.contrib.slim
30 | 
31 | 
32 | def inception_arg_scope(weight_decay=0.00004,
33 |                         use_batch_norm=True,
34 |                         batch_norm_decay=0.9997,
35 |                         batch_norm_epsilon=0.001):
36 |   """Defines the default arg scope for inception models.
37 | 
38 |   Args:
39 |     weight_decay: The weight decay to use for regularizing the model.
40 |     use_batch_norm: "If `True`, batch_norm is applied after each convolution.
41 |     batch_norm_decay: Decay for batch norm moving average.
42 |     batch_norm_epsilon: Small float added to variance to avoid dividing by zero
43 |       in batch norm.
44 | 
45 |   Returns:
46 |     An `arg_scope` to use for the inception models.
47 |   """
48 |   batch_norm_params = {
49 |       # Decay for the moving averages.
50 |       'decay': batch_norm_decay,
51 |       # epsilon to prevent 0s in variance.
52 |       'epsilon': batch_norm_epsilon,
53 |       # collection containing update_ops.
54 |       'updates_collections': tf.GraphKeys.UPDATE_OPS,
55 |   }
56 |   if use_batch_norm:
57 |     normalizer_fn = slim.batch_norm
58 |     normalizer_params = batch_norm_params
59 |   else:
60 |     normalizer_fn = None
61 |     normalizer_params = {}
62 |   # Set weight_decay for weights in Conv and FC layers.
63 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
64 |                       weights_regularizer=slim.l2_regularizer(weight_decay)):
65 |     with slim.arg_scope(
66 |         [slim.conv2d],
67 |         weights_initializer=slim.variance_scaling_initializer(),
68 |         activation_fn=tf.nn.relu,
69 |         normalizer_fn=normalizer_fn,
70 |         normalizer_params=normalizer_params) as sc:
71 |       return sc
72 | 


--------------------------------------------------------------------------------
/slim/nets/lenet.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains a variant of the LeNet model definition."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import tensorflow as tf
22 | 
23 | slim = tf.contrib.slim
24 | 
25 | 
26 | def lenet(images, num_classes=10, is_training=False,
27 |           dropout_keep_prob=0.5,
28 |           prediction_fn=slim.softmax,
29 |           scope='LeNet'):
30 |   """Creates a variant of the LeNet model.
31 | 
32 |   Note that since the output is a set of 'logits', the values fall in the
33 |   interval of (-infinity, infinity). Consequently, to convert the outputs to a
34 |   probability distribution over the characters, one will need to convert them
35 |   using the softmax function:
36 | 
37 |         logits = lenet.lenet(images, is_training=False)
38 |         probabilities = tf.nn.softmax(logits)
39 |         predictions = tf.argmax(logits, 1)
40 | 
41 |   Args:
42 |     images: A batch of `Tensors` of size [batch_size, height, width, channels].
43 |     num_classes: the number of classes in the dataset.
44 |     is_training: specifies whether or not we're currently training the model.
45 |       This variable will determine the behaviour of the dropout layer.
46 |     dropout_keep_prob: the percentage of activation values that are retained.
47 |     prediction_fn: a function to get predictions out of logits.
48 |     scope: Optional variable_scope.
49 | 
50 |   Returns:
51 |     logits: the pre-softmax activations, a tensor of size
52 |       [batch_size, `num_classes`]
53 |     end_points: a dictionary from components of the network to the corresponding
54 |       activation.
55 |   """
56 |   end_points = {}
57 | 
58 |   with tf.variable_scope(scope, 'LeNet', [images, num_classes]):
59 |     net = slim.conv2d(images, 32, [5, 5], scope='conv1')
60 |     net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
61 |     net = slim.conv2d(net, 64, [5, 5], scope='conv2')
62 |     net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
63 |     net = slim.flatten(net)
64 |     end_points['Flatten'] = net
65 | 
66 |     net = slim.fully_connected(net, 1024, scope='fc3')
67 |     net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
68 |                        scope='dropout3')
69 |     logits = slim.fully_connected(net, num_classes, activation_fn=None,
70 |                                   scope='fc4')
71 | 
72 |   end_points['Logits'] = logits
73 |   end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
74 | 
75 |   return logits, end_points
76 | lenet.default_image_size = 28
77 | 
78 | 
79 | def lenet_arg_scope(weight_decay=0.0):
80 |   """Defines the default lenet argument scope.
81 | 
82 |   Args:
83 |     weight_decay: The weight decay to use for regularizing the model.
84 | 
85 |   Returns:
86 |     An `arg_scope` to use for the inception v3 model.
87 |   """
88 |   with slim.arg_scope(
89 |       [slim.conv2d, slim.fully_connected],
90 |       weights_regularizer=slim.l2_regularizer(weight_decay),
91 |       weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
92 |       activation_fn=tf.nn.relu) as sc:
93 |     return sc
94 | 


--------------------------------------------------------------------------------
/slim/nets/nets_factory.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains a factory for building various models."""
 16 | 
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | import functools
 21 | 
 22 | import tensorflow as tf
 23 | 
 24 | from nets import alexnet
 25 | from nets import cifarnet
 26 | from nets import inception
 27 | from nets import lenet
 28 | from nets import overfeat
 29 | from nets import resnet_v1
 30 | from nets import resnet_v2
 31 | from nets import vgg
 32 | 
 33 | slim = tf.contrib.slim
 34 | 
 35 | networks_map = {'alexnet_v2': alexnet.alexnet_v2,
 36 |                 'cifarnet': cifarnet.cifarnet,
 37 |                 'overfeat': overfeat.overfeat,
 38 |                 'vgg_a': vgg.vgg_a,
 39 |                 'vgg_16': vgg.vgg_16,
 40 |                 'vgg_19': vgg.vgg_19,
 41 |                 'inception_v1': inception.inception_v1,
 42 |                 'inception_v2': inception.inception_v2,
 43 |                 'inception_v3': inception.inception_v3,
 44 |                 'inception_v4': inception.inception_v4,
 45 |                 'inception_resnet_v2': inception.inception_resnet_v2,
 46 |                 'lenet': lenet.lenet,
 47 |                 'resnet_v1_50': resnet_v1.resnet_v1_50,
 48 |                 'resnet_v1_101': resnet_v1.resnet_v1_101,
 49 |                 'resnet_v1_152': resnet_v1.resnet_v1_152,
 50 |                 'resnet_v1_200': resnet_v1.resnet_v1_200,
 51 |                 'resnet_v2_50': resnet_v2.resnet_v2_50,
 52 |                 'resnet_v2_101': resnet_v2.resnet_v2_101,
 53 |                 'resnet_v2_152': resnet_v2.resnet_v2_152,
 54 |                 'resnet_v2_200': resnet_v2.resnet_v2_200,
 55 |                }
 56 | 
 57 | arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope,
 58 |                   'cifarnet': cifarnet.cifarnet_arg_scope,
 59 |                   'overfeat': overfeat.overfeat_arg_scope,
 60 |                   'vgg_a': vgg.vgg_arg_scope,
 61 |                   'vgg_16': vgg.vgg_arg_scope,
 62 |                   'vgg_19': vgg.vgg_arg_scope,
 63 |                   'inception_v1': inception.inception_v3_arg_scope,
 64 |                   'inception_v2': inception.inception_v3_arg_scope,
 65 |                   'inception_v3': inception.inception_v3_arg_scope,
 66 |                   'inception_v4': inception.inception_v4_arg_scope,
 67 |                   'inception_resnet_v2':
 68 |                   inception.inception_resnet_v2_arg_scope,
 69 |                   'lenet': lenet.lenet_arg_scope,
 70 |                   'resnet_v1_50': resnet_v1.resnet_arg_scope,
 71 |                   'resnet_v1_101': resnet_v1.resnet_arg_scope,
 72 |                   'resnet_v1_152': resnet_v1.resnet_arg_scope,
 73 |                   'resnet_v1_200': resnet_v1.resnet_arg_scope,
 74 |                   'resnet_v2_50': resnet_v2.resnet_arg_scope,
 75 |                   'resnet_v2_101': resnet_v2.resnet_arg_scope,
 76 |                   'resnet_v2_152': resnet_v2.resnet_arg_scope,
 77 |                   'resnet_v2_200': resnet_v2.resnet_arg_scope,
 78 |                  }
 79 | 
 80 | 
 81 | def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False):
 82 |   """Returns a network_fn such as `logits, end_points = network_fn(images)`.
 83 | 
 84 |   Args:
 85 |     name: The name of the network.
 86 |     num_classes: The number of classes to use for classification.
 87 |     weight_decay: The l2 coefficient for the model weights.
 88 |     is_training: `True` if the model is being used for training and `False`
 89 |       otherwise.
 90 | 
 91 |   Returns:
 92 |     network_fn: A function that applies the model to a batch of images. It has
 93 |       the following signature:
 94 |         logits, end_points = network_fn(images)
 95 |   Raises:
 96 |     ValueError: If network `name` is not recognized.
 97 |   """
 98 |   if name not in networks_map:
 99 |     raise ValueError('Name of network unknown %s' % name)
100 |   arg_scope = arg_scopes_map[name](weight_decay=weight_decay)
101 |   func = networks_map[name]
102 |   @functools.wraps(func)
103 |   def network_fn(images):
104 |     with slim.arg_scope(arg_scope):
105 |       return func(images, num_classes, is_training=is_training)
106 |   if hasattr(func, 'default_image_size'):
107 |     network_fn.default_image_size = func.default_image_size
108 | 
109 |   return network_fn
110 | 


--------------------------------------------------------------------------------
/slim/nets/nets_factory_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Tests for slim.inception."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | 
22 | 
23 | import tensorflow as tf
24 | 
25 | from nets import nets_factory
26 | 
27 | 
28 | class NetworksTest(tf.test.TestCase):
29 | 
30 |   def testGetNetworkFn(self):
31 |     batch_size = 5
32 |     num_classes = 1000
33 |     for net in nets_factory.networks_map:
34 |       with self.test_session():
35 |         net_fn = nets_factory.get_network_fn(net, num_classes)
36 |         # Most networks use 224 as their default_image_size
37 |         image_size = getattr(net_fn, 'default_image_size', 224)
38 |         inputs = tf.random_uniform((batch_size, image_size, image_size, 3))
39 |         logits, end_points = net_fn(inputs)
40 |         self.assertTrue(isinstance(logits, tf.Tensor))
41 |         self.assertTrue(isinstance(end_points, dict))
42 |         self.assertEqual(logits.get_shape().as_list()[0], batch_size)
43 |         self.assertEqual(logits.get_shape().as_list()[-1], num_classes)
44 | 
45 | if __name__ == '__main__':
46 |   tf.test.main()
47 | 


--------------------------------------------------------------------------------
/slim/nets/overfeat.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains the model definition for the OverFeat network.
 16 | 
 17 | The definition for the network was obtained from:
 18 |   OverFeat: Integrated Recognition, Localization and Detection using
 19 |   Convolutional Networks
 20 |   Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
 21 |   Yann LeCun, 2014
 22 |   http://arxiv.org/abs/1312.6229
 23 | 
 24 | Usage:
 25 |   with slim.arg_scope(overfeat.overfeat_arg_scope()):
 26 |     outputs, end_points = overfeat.overfeat(inputs)
 27 | 
 28 | @@overfeat
 29 | """
 30 | from __future__ import absolute_import
 31 | from __future__ import division
 32 | from __future__ import print_function
 33 | 
 34 | import tensorflow as tf
 35 | 
 36 | slim = tf.contrib.slim
 37 | trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
 38 | 
 39 | 
 40 | def overfeat_arg_scope(weight_decay=0.0005):
 41 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
 42 |                       activation_fn=tf.nn.relu,
 43 |                       weights_regularizer=slim.l2_regularizer(weight_decay),
 44 |                       biases_initializer=tf.zeros_initializer()):
 45 |     with slim.arg_scope([slim.conv2d], padding='SAME'):
 46 |       with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
 47 |         return arg_sc
 48 | 
 49 | 
 50 | def overfeat(inputs,
 51 |              num_classes=1000,
 52 |              is_training=True,
 53 |              dropout_keep_prob=0.5,
 54 |              spatial_squeeze=True,
 55 |              scope='overfeat'):
 56 |   """Contains the model definition for the OverFeat network.
 57 | 
 58 |   The definition for the network was obtained from:
 59 |     OverFeat: Integrated Recognition, Localization and Detection using
 60 |     Convolutional Networks
 61 |     Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
 62 |     Yann LeCun, 2014
 63 |     http://arxiv.org/abs/1312.6229
 64 | 
 65 |   Note: All the fully_connected layers have been transformed to conv2d layers.
 66 |         To use in classification mode, resize input to 231x231. To use in fully
 67 |         convolutional mode, set spatial_squeeze to false.
 68 | 
 69 |   Args:
 70 |     inputs: a tensor of size [batch_size, height, width, channels].
 71 |     num_classes: number of predicted classes.
 72 |     is_training: whether or not the model is being trained.
 73 |     dropout_keep_prob: the probability that activations are kept in the dropout
 74 |       layers during training.
 75 |     spatial_squeeze: whether or not should squeeze the spatial dimensions of the
 76 |       outputs. Useful to remove unnecessary dimensions for classification.
 77 |     scope: Optional scope for the variables.
 78 | 
 79 |   Returns:
 80 |     the last op containing the log predictions and end_points dict.
 81 | 
 82 |   """
 83 |   with tf.variable_scope(scope, 'overfeat', [inputs]) as sc:
 84 |     end_points_collection = sc.name + '_end_points'
 85 |     # Collect outputs for conv2d, fully_connected and max_pool2d
 86 |     with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
 87 |                         outputs_collections=end_points_collection):
 88 |       net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
 89 |                         scope='conv1')
 90 |       net = slim.max_pool2d(net, [2, 2], scope='pool1')
 91 |       net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2')
 92 |       net = slim.max_pool2d(net, [2, 2], scope='pool2')
 93 |       net = slim.conv2d(net, 512, [3, 3], scope='conv3')
 94 |       net = slim.conv2d(net, 1024, [3, 3], scope='conv4')
 95 |       net = slim.conv2d(net, 1024, [3, 3], scope='conv5')
 96 |       net = slim.max_pool2d(net, [2, 2], scope='pool5')
 97 |       with slim.arg_scope([slim.conv2d],
 98 |                           weights_initializer=trunc_normal(0.005),
 99 |                           biases_initializer=tf.constant_initializer(0.1)):
100 |         # Use conv2d instead of fully_connected layers.
101 |         net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6')
102 |         net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
103 |                            scope='dropout6')
104 |         net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
105 |         net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
106 |                            scope='dropout7')
107 |         net = slim.conv2d(net, num_classes, [1, 1],
108 |                           activation_fn=None,
109 |                           normalizer_fn=None,
110 |                           biases_initializer=tf.zeros_initializer(),
111 |                           scope='fc8')
112 |       # Convert end_points_collection into a end_point dict.
113 |       end_points = slim.utils.convert_collection_to_dict(end_points_collection)
114 |       if spatial_squeeze:
115 |         net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
116 |         end_points[sc.name + '/fc8'] = net
117 |       return net, end_points
118 | overfeat.default_image_size = 231
119 | 


--------------------------------------------------------------------------------
/slim/nets/overfeat_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for slim.nets.overfeat."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | from nets import overfeat
 23 | 
 24 | slim = tf.contrib.slim
 25 | 
 26 | 
 27 | class OverFeatTest(tf.test.TestCase):
 28 | 
 29 |   def testBuild(self):
 30 |     batch_size = 5
 31 |     height, width = 231, 231
 32 |     num_classes = 1000
 33 |     with self.test_session():
 34 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 35 |       logits, _ = overfeat.overfeat(inputs, num_classes)
 36 |       self.assertEquals(logits.op.name, 'overfeat/fc8/squeezed')
 37 |       self.assertListEqual(logits.get_shape().as_list(),
 38 |                            [batch_size, num_classes])
 39 | 
 40 |   def testFullyConvolutional(self):
 41 |     batch_size = 1
 42 |     height, width = 281, 281
 43 |     num_classes = 1000
 44 |     with self.test_session():
 45 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 46 |       logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False)
 47 |       self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd')
 48 |       self.assertListEqual(logits.get_shape().as_list(),
 49 |                            [batch_size, 2, 2, num_classes])
 50 | 
 51 |   def testEndPoints(self):
 52 |     batch_size = 5
 53 |     height, width = 231, 231
 54 |     num_classes = 1000
 55 |     with self.test_session():
 56 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 57 |       _, end_points = overfeat.overfeat(inputs, num_classes)
 58 |       expected_names = ['overfeat/conv1',
 59 |                         'overfeat/pool1',
 60 |                         'overfeat/conv2',
 61 |                         'overfeat/pool2',
 62 |                         'overfeat/conv3',
 63 |                         'overfeat/conv4',
 64 |                         'overfeat/conv5',
 65 |                         'overfeat/pool5',
 66 |                         'overfeat/fc6',
 67 |                         'overfeat/fc7',
 68 |                         'overfeat/fc8'
 69 |                        ]
 70 |       self.assertSetEqual(set(end_points.keys()), set(expected_names))
 71 | 
 72 |   def testModelVariables(self):
 73 |     batch_size = 5
 74 |     height, width = 231, 231
 75 |     num_classes = 1000
 76 |     with self.test_session():
 77 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 78 |       overfeat.overfeat(inputs, num_classes)
 79 |       expected_names = ['overfeat/conv1/weights',
 80 |                         'overfeat/conv1/biases',
 81 |                         'overfeat/conv2/weights',
 82 |                         'overfeat/conv2/biases',
 83 |                         'overfeat/conv3/weights',
 84 |                         'overfeat/conv3/biases',
 85 |                         'overfeat/conv4/weights',
 86 |                         'overfeat/conv4/biases',
 87 |                         'overfeat/conv5/weights',
 88 |                         'overfeat/conv5/biases',
 89 |                         'overfeat/fc6/weights',
 90 |                         'overfeat/fc6/biases',
 91 |                         'overfeat/fc7/weights',
 92 |                         'overfeat/fc7/biases',
 93 |                         'overfeat/fc8/weights',
 94 |                         'overfeat/fc8/biases',
 95 |                        ]
 96 |       model_variables = [v.op.name for v in slim.get_model_variables()]
 97 |       self.assertSetEqual(set(model_variables), set(expected_names))
 98 | 
 99 |   def testEvaluation(self):
100 |     batch_size = 2
101 |     height, width = 231, 231
102 |     num_classes = 1000
103 |     with self.test_session():
104 |       eval_inputs = tf.random_uniform((batch_size, height, width, 3))
105 |       logits, _ = overfeat.overfeat(eval_inputs, is_training=False)
106 |       self.assertListEqual(logits.get_shape().as_list(),
107 |                            [batch_size, num_classes])
108 |       predictions = tf.argmax(logits, 1)
109 |       self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
110 | 
111 |   def testTrainEvalWithReuse(self):
112 |     train_batch_size = 2
113 |     eval_batch_size = 1
114 |     train_height, train_width = 231, 231
115 |     eval_height, eval_width = 281, 281
116 |     num_classes = 1000
117 |     with self.test_session():
118 |       train_inputs = tf.random_uniform(
119 |           (train_batch_size, train_height, train_width, 3))
120 |       logits, _ = overfeat.overfeat(train_inputs)
121 |       self.assertListEqual(logits.get_shape().as_list(),
122 |                            [train_batch_size, num_classes])
123 |       tf.get_variable_scope().reuse_variables()
124 |       eval_inputs = tf.random_uniform(
125 |           (eval_batch_size, eval_height, eval_width, 3))
126 |       logits, _ = overfeat.overfeat(eval_inputs, is_training=False,
127 |                                     spatial_squeeze=False)
128 |       self.assertListEqual(logits.get_shape().as_list(),
129 |                            [eval_batch_size, 2, 2, num_classes])
130 |       logits = tf.reduce_mean(logits, [1, 2])
131 |       predictions = tf.argmax(logits, 1)
132 |       self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
133 | 
134 |   def testForward(self):
135 |     batch_size = 1
136 |     height, width = 231, 231
137 |     with self.test_session() as sess:
138 |       inputs = tf.random_uniform((batch_size, height, width, 3))
139 |       logits, _ = overfeat.overfeat(inputs)
140 |       sess.run(tf.global_variables_initializer())
141 |       output = sess.run(logits)
142 |       self.assertTrue(output.any())
143 | 
144 | if __name__ == '__main__':
145 |   tf.test.main()
146 | 


--------------------------------------------------------------------------------
/slim/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/slim/preprocessing/cifarnet_preprocessing.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Provides utilities to preprocess images in CIFAR-10.
 16 | 
 17 | """
 18 | 
 19 | from __future__ import absolute_import
 20 | from __future__ import division
 21 | from __future__ import print_function
 22 | 
 23 | import tensorflow as tf
 24 | 
 25 | _PADDING = 4
 26 | 
 27 | slim = tf.contrib.slim
 28 | 
 29 | 
 30 | def preprocess_for_train(image,
 31 |                          output_height,
 32 |                          output_width,
 33 |                          padding=_PADDING):
 34 |   """Preprocesses the given image for training.
 35 | 
 36 |   Note that the actual resizing scale is sampled from
 37 |     [`resize_size_min`, `resize_size_max`].
 38 | 
 39 |   Args:
 40 |     image: A `Tensor` representing an image of arbitrary size.
 41 |     output_height: The height of the image after preprocessing.
 42 |     output_width: The width of the image after preprocessing.
 43 |     padding: The amound of padding before and after each dimension of the image.
 44 | 
 45 |   Returns:
 46 |     A preprocessed image.
 47 |   """
 48 |   tf.summary.image('image', tf.expand_dims(image, 0))
 49 | 
 50 |   # Transform the image to floats.
 51 |   image = tf.to_float(image)
 52 |   if padding > 0:
 53 |     image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])
 54 |   # Randomly crop a [height, width] section of the image.
 55 |   distorted_image = tf.random_crop(image,
 56 |                                    [output_height, output_width, 3])
 57 | 
 58 |   # Randomly flip the image horizontally.
 59 |   distorted_image = tf.image.random_flip_left_right(distorted_image)
 60 | 
 61 |   tf.summary.image('distorted_image', tf.expand_dims(distorted_image, 0))
 62 | 
 63 |   # Because these operations are not commutative, consider randomizing
 64 |   # the order their operation.
 65 |   distorted_image = tf.image.random_brightness(distorted_image,
 66 |                                                max_delta=63)
 67 |   distorted_image = tf.image.random_contrast(distorted_image,
 68 |                                              lower=0.2, upper=1.8)
 69 |   # Subtract off the mean and divide by the variance of the pixels.
 70 |   return tf.image.per_image_standardization(distorted_image)
 71 | 
 72 | 
 73 | def preprocess_for_eval(image, output_height, output_width):
 74 |   """Preprocesses the given image for evaluation.
 75 | 
 76 |   Args:
 77 |     image: A `Tensor` representing an image of arbitrary size.
 78 |     output_height: The height of the image after preprocessing.
 79 |     output_width: The width of the image after preprocessing.
 80 | 
 81 |   Returns:
 82 |     A preprocessed image.
 83 |   """
 84 |   tf.summary.image('image', tf.expand_dims(image, 0))
 85 |   # Transform the image to floats.
 86 |   image = tf.to_float(image)
 87 | 
 88 |   # Resize and crop if needed.
 89 |   resized_image = tf.image.resize_image_with_crop_or_pad(image,
 90 |                                                          output_width,
 91 |                                                          output_height)
 92 |   tf.summary.image('resized_image', tf.expand_dims(resized_image, 0))
 93 | 
 94 |   # Subtract off the mean and divide by the variance of the pixels.
 95 |   return tf.image.per_image_standardization(resized_image)
 96 | 
 97 | 
 98 | def preprocess_image(image, output_height, output_width, is_training=False):
 99 |   """Preprocesses the given image.
100 | 
101 |   Args:
102 |     image: A `Tensor` representing an image of arbitrary size.
103 |     output_height: The height of the image after preprocessing.
104 |     output_width: The width of the image after preprocessing.
105 |     is_training: `True` if we're preprocessing the image for training and
106 |       `False` otherwise.
107 | 
108 |   Returns:
109 |     A preprocessed image.
110 |   """
111 |   if is_training:
112 |     return preprocess_for_train(image, output_height, output_width)
113 |   else:
114 |     return preprocess_for_eval(image, output_height, output_width)
115 | 


--------------------------------------------------------------------------------
/slim/preprocessing/lenet_preprocessing.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Provides utilities for preprocessing."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import tensorflow as tf
22 | 
23 | slim = tf.contrib.slim
24 | 
25 | 
26 | def preprocess_image(image, output_height, output_width, is_training):
27 |   """Preprocesses the given image.
28 | 
29 |   Args:
30 |     image: A `Tensor` representing an image of arbitrary size.
31 |     output_height: The height of the image after preprocessing.
32 |     output_width: The width of the image after preprocessing.
33 |     is_training: `True` if we're preprocessing the image for training and
34 |       `False` otherwise.
35 | 
36 |   Returns:
37 |     A preprocessed image.
38 |   """
39 |   image = tf.to_float(image)
40 |   image = tf.image.resize_image_with_crop_or_pad(
41 |       image, output_width, output_height)
42 |   image = tf.subtract(image, 128.0)
43 |   image = tf.div(image, 128.0)
44 |   return image
45 | 


--------------------------------------------------------------------------------
/slim/preprocessing/preprocessing_factory.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains a factory for building various models."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import tensorflow as tf
22 | 
23 | from preprocessing import cifarnet_preprocessing
24 | from preprocessing import inception_preprocessing
25 | from preprocessing import lenet_preprocessing
26 | from preprocessing import vgg_preprocessing
27 | 
28 | slim = tf.contrib.slim
29 | 
30 | 
31 | def get_preprocessing(name, is_training=False):
32 |   """Returns preprocessing_fn(image, height, width, **kwargs).
33 | 
34 |   Args:
35 |     name: The name of the preprocessing function.
36 |     is_training: `True` if the model is being used for training and `False`
37 |       otherwise.
38 | 
39 |   Returns:
40 |     preprocessing_fn: A function that preprocessing a single image (pre-batch).
41 |       It has the following signature:
42 |         image = preprocessing_fn(image, output_height, output_width, ...).
43 | 
44 |   Raises:
45 |     ValueError: If Preprocessing `name` is not recognized.
46 |   """
47 |   preprocessing_fn_map = {
48 |       'cifarnet': cifarnet_preprocessing,
49 |       'inception': inception_preprocessing,
50 |       'inception_v1': inception_preprocessing,
51 |       'inception_v2': inception_preprocessing,
52 |       'inception_v3': inception_preprocessing,
53 |       'inception_v4': inception_preprocessing,
54 |       'inception_resnet_v2': inception_preprocessing,
55 |       'lenet': lenet_preprocessing,
56 |       'resnet_v1_50': vgg_preprocessing,
57 |       'resnet_v1_101': vgg_preprocessing,
58 |       'resnet_v1_152': vgg_preprocessing,
59 |       'resnet_v2_50': vgg_preprocessing,
60 |       'resnet_v2_101': vgg_preprocessing,
61 |       'resnet_v2_152': vgg_preprocessing,
62 |       'vgg': vgg_preprocessing,
63 |       'vgg_a': vgg_preprocessing,
64 |       'vgg_16': vgg_preprocessing,
65 |       'vgg_19': vgg_preprocessing,
66 |   }
67 | 
68 |   if name not in preprocessing_fn_map:
69 |     raise ValueError('Preprocessing name [%s] was not recognized' % name)
70 | 
71 |   def preprocessing_fn(image, output_height, output_width, **kwargs):
72 |     return preprocessing_fn_map[name].preprocess_image(
73 |         image, output_height, output_width, is_training=is_training, **kwargs)
74 | 
75 |   return preprocessing_fn
76 | 


--------------------------------------------------------------------------------
/slim/scripts/finetune_inception_v1_on_flowers.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # This script performs the following operations:
 4 | # 1. Downloads the Flowers dataset
 5 | # 2. Fine-tunes an InceptionV1 model on the Flowers training set.
 6 | # 3. Evaluates the model on the Flowers validation set.
 7 | #
 8 | # Usage:
 9 | # cd slim
10 | # ./slim/scripts/finetune_inception_v1_on_flowers.sh
11 | 
12 | # Where the pre-trained InceptionV1 checkpoint is saved to.
13 | PRETRAINED_CHECKPOINT_DIR=/tmp/checkpoints
14 | 
15 | # Where the training (fine-tuned) checkpoint and logs will be saved to.
16 | TRAIN_DIR=/tmp/flowers-models/inception_v1
17 | 
18 | # Where the dataset is saved to.
19 | DATASET_DIR=/tmp/flowers
20 | 
21 | # Download the pre-trained checkpoint.
22 | if [ ! -d "$PRETRAINED_CHECKPOINT_DIR" ]; then
23 |   mkdir ${PRETRAINED_CHECKPOINT_DIR}
24 | fi
25 | if [ ! -f ${PRETRAINED_CHECKPOINT_DIR}/inception_v1.ckpt ]; then
26 |   wget http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz
27 |   tar -xvf inception_v1_2016_08_28.tar.gz
28 |   mv inception_v1.ckpt ${PRETRAINED_CHECKPOINT_DIR}/inception_v1.ckpt
29 |   rm inception_v1_2016_08_28.tar.gz
30 | fi
31 | 
32 | # Download the dataset
33 | python download_and_convert_data.py \
34 |   --dataset_name=flowers \
35 |   --dataset_dir=${DATASET_DIR}
36 | 
37 | # Fine-tune only the new layers for 2000 steps.
38 | python train_image_classifier.py \
39 |   --train_dir=${TRAIN_DIR} \
40 |   --dataset_name=flowers \
41 |   --dataset_split_name=train \
42 |   --dataset_dir=${DATASET_DIR} \
43 |   --model_name=inception_v1 \
44 |   --checkpoint_path=${PRETRAINED_CHECKPOINT_DIR}/inception_v1.ckpt \
45 |   --checkpoint_exclude_scopes=InceptionV1/Logits \
46 |   --trainable_scopes=InceptionV1/Logits \
47 |   --max_number_of_steps=3000 \
48 |   --batch_size=32 \
49 |   --learning_rate=0.01 \
50 |   --save_interval_secs=60 \
51 |   --save_summaries_secs=60 \
52 |   --log_every_n_steps=100 \
53 |   --optimizer=rmsprop \
54 |   --weight_decay=0.00004
55 | 
56 | # Run evaluation.
57 | python eval_image_classifier.py \
58 |   --checkpoint_path=${TRAIN_DIR} \
59 |   --eval_dir=${TRAIN_DIR} \
60 |   --dataset_name=flowers \
61 |   --dataset_split_name=validation \
62 |   --dataset_dir=${DATASET_DIR} \
63 |   --model_name=inception_v1
64 | 
65 | # Fine-tune all the new layers for 1000 steps.
66 | python train_image_classifier.py \
67 |   --train_dir=${TRAIN_DIR}/all \
68 |   --dataset_name=flowers \
69 |   --dataset_split_name=train \
70 |   --dataset_dir=${DATASET_DIR} \
71 |   --checkpoint_path=${TRAIN_DIR} \
72 |   --model_name=inception_v1 \
73 |   --max_number_of_steps=1000 \
74 |   --batch_size=32 \
75 |   --learning_rate=0.001 \
76 |   --save_interval_secs=60 \
77 |   --save_summaries_secs=60 \
78 |   --log_every_n_steps=100 \
79 |   --optimizer=rmsprop \
80 |   --weight_decay=0.00004
81 | 
82 | # Run evaluation.
83 | python eval_image_classifier.py \
84 |   --checkpoint_path=${TRAIN_DIR}/all \
85 |   --eval_dir=${TRAIN_DIR}/all \
86 |   --dataset_name=flowers \
87 |   --dataset_split_name=validation \
88 |   --dataset_dir=${DATASET_DIR} \
89 |   --model_name=inception_v1
90 | 


--------------------------------------------------------------------------------
/slim/scripts/finetune_inception_v3_on_flowers.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # This script performs the following operations:
 4 | # 1. Downloads the Flowers dataset
 5 | # 2. Fine-tunes an InceptionV3 model on the Flowers training set.
 6 | # 3. Evaluates the model on the Flowers validation set.
 7 | #
 8 | # Usage:
 9 | # cd slim
10 | # ./slim/scripts/finetune_inceptionv3_on_flowers.sh
11 | 
12 | # Where the pre-trained InceptionV3 checkpoint is saved to.
13 | PRETRAINED_CHECKPOINT_DIR=/tmp/checkpoints
14 | 
15 | # Where the training (fine-tuned) checkpoint and logs will be saved to.
16 | TRAIN_DIR=/tmp/flowers-models/inception_v3
17 | 
18 | # Where the dataset is saved to.
19 | DATASET_DIR=/tmp/flowers
20 | 
21 | # Download the pre-trained checkpoint.
22 | if [ ! -d "$PRETRAINED_CHECKPOINT_DIR" ]; then
23 |   mkdir ${PRETRAINED_CHECKPOINT_DIR}
24 | fi
25 | if [ ! -f ${PRETRAINED_CHECKPOINT_DIR}/inception_v3.ckpt ]; then
26 |   wget http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz
27 |   tar -xvf inception_v3_2016_08_28.tar.gz
28 |   mv inception_v3.ckpt ${PRETRAINED_CHECKPOINT_DIR}/inception_v3.ckpt
29 |   rm inception_v3_2016_08_28.tar.gz
30 | fi
31 | 
32 | # Download the dataset
33 | python download_and_convert_data.py \
34 |   --dataset_name=flowers \
35 |   --dataset_dir=${DATASET_DIR}
36 | 
37 | # Fine-tune only the new layers for 1000 steps.
38 | python train_image_classifier.py \
39 |   --train_dir=${TRAIN_DIR} \
40 |   --dataset_name=flowers \
41 |   --dataset_split_name=train \
42 |   --dataset_dir=${DATASET_DIR} \
43 |   --model_name=inception_v3 \
44 |   --checkpoint_path=${PRETRAINED_CHECKPOINT_DIR}/inception_v3.ckpt \
45 |   --checkpoint_exclude_scopes=InceptionV3/Logits,InceptionV3/AuxLogits \
46 |   --trainable_scopes=InceptionV3/Logits,InceptionV3/AuxLogits \
47 |   --max_number_of_steps=1000 \
48 |   --batch_size=32 \
49 |   --learning_rate=0.01 \
50 |   --learning_rate_decay_type=fixed \
51 |   --save_interval_secs=60 \
52 |   --save_summaries_secs=60 \
53 |   --log_every_n_steps=100 \
54 |   --optimizer=rmsprop \
55 |   --weight_decay=0.00004
56 | 
57 | # Run evaluation.
58 | python eval_image_classifier.py \
59 |   --checkpoint_path=${TRAIN_DIR} \
60 |   --eval_dir=${TRAIN_DIR} \
61 |   --dataset_name=flowers \
62 |   --dataset_split_name=validation \
63 |   --dataset_dir=${DATASET_DIR} \
64 |   --model_name=inception_v3
65 | 
66 | # Fine-tune all the new layers for 500 steps.
67 | python train_image_classifier.py \
68 |   --train_dir=${TRAIN_DIR}/all \
69 |   --dataset_name=flowers \
70 |   --dataset_split_name=train \
71 |   --dataset_dir=${DATASET_DIR} \
72 |   --model_name=inception_v3 \
73 |   --checkpoint_path=${TRAIN_DIR} \
74 |   --max_number_of_steps=500 \
75 |   --batch_size=32 \
76 |   --learning_rate=0.0001 \
77 |   --learning_rate_decay_type=fixed \
78 |   --save_interval_secs=60 \
79 |   --save_summaries_secs=60 \
80 |   --log_every_n_steps=10 \
81 |   --optimizer=rmsprop \
82 |   --weight_decay=0.00004
83 | 
84 | # Run evaluation.
85 | python eval_image_classifier.py \
86 |   --checkpoint_path=${TRAIN_DIR}/all \
87 |   --eval_dir=${TRAIN_DIR}/all \
88 |   --dataset_name=flowers \
89 |   --dataset_split_name=validation \
90 |   --dataset_dir=${DATASET_DIR} \
91 |   --model_name=inception_v3
92 | 


--------------------------------------------------------------------------------
/slim/scripts/finetune_resnet_v1_50_on_flowers.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # This script performs the following operations:
 4 | # 1. Downloads the Flowers dataset
 5 | # 2. Fine-tunes a ResNetV1-50 model on the Flowers training set.
 6 | # 3. Evaluates the model on the Flowers validation set.
 7 | #
 8 | # Usage:
 9 | # cd slim
10 | # ./slim/scripts/finetune_resnet_v1_50_on_flowers.sh
11 | 
12 | # Where the pre-trained ResNetV1-50 checkpoint is saved to.
13 | PRETRAINED_CHECKPOINT_DIR=/tmp/checkpoints
14 | 
15 | # Where the training (fine-tuned) checkpoint and logs will be saved to.
16 | TRAIN_DIR=/tmp/flowers-models/resnet_v1_50
17 | 
18 | # Where the dataset is saved to.
19 | DATASET_DIR=/tmp/flowers
20 | 
21 | # Download the pre-trained checkpoint.
22 | if [ ! -d "$PRETRAINED_CHECKPOINT_DIR" ]; then
23 |   mkdir ${PRETRAINED_CHECKPOINT_DIR}
24 | fi
25 | if [ ! -f ${PRETRAINED_CHECKPOINT_DIR}/resnet_v1_50.ckpt ]; then
26 |   wget http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz
27 |   tar -xvf resnet_v1_50_2016_08_28.tar.gz
28 |   mv resnet_v1_50.ckpt ${PRETRAINED_CHECKPOINT_DIR}/resnet_v1_50.ckpt
29 |   rm resnet_v1_50_2016_08_28.tar.gz
30 | fi
31 | 
32 | # Download the dataset
33 | python download_and_convert_data.py \
34 |   --dataset_name=flowers \
35 |   --dataset_dir=${DATASET_DIR}
36 | 
37 | # Fine-tune only the new layers for 3000 steps.
38 | python train_image_classifier.py \
39 |   --train_dir=${TRAIN_DIR} \
40 |   --dataset_name=flowers \
41 |   --dataset_split_name=train \
42 |   --dataset_dir=${DATASET_DIR} \
43 |   --model_name=resnet_v1_50 \
44 |   --checkpoint_path=${PRETRAINED_CHECKPOINT_DIR}/resnet_v1_50.ckpt \
45 |   --checkpoint_exclude_scopes=resnet_v1_50/logits \
46 |   --trainable_scopes=resnet_v1_50/logits \
47 |   --max_number_of_steps=3000 \
48 |   --batch_size=32 \
49 |   --learning_rate=0.01 \
50 |   --save_interval_secs=60 \
51 |   --save_summaries_secs=60 \
52 |   --log_every_n_steps=100 \
53 |   --optimizer=rmsprop \
54 |   --weight_decay=0.00004
55 | 
56 | # Run evaluation.
57 | python eval_image_classifier.py \
58 |   --checkpoint_path=${TRAIN_DIR} \
59 |   --eval_dir=${TRAIN_DIR} \
60 |   --dataset_name=flowers \
61 |   --dataset_split_name=validation \
62 |   --dataset_dir=${DATASET_DIR} \
63 |   --model_name=resnet_v1_50
64 | 
65 | # Fine-tune all the new layers for 1000 steps.
66 | python train_image_classifier.py \
67 |   --train_dir=${TRAIN_DIR}/all \
68 |   --dataset_name=flowers \
69 |   --dataset_split_name=train \
70 |   --dataset_dir=${DATASET_DIR} \
71 |   --checkpoint_path=${TRAIN_DIR} \
72 |   --model_name=resnet_v1_50 \
73 |   --max_number_of_steps=1000 \
74 |   --batch_size=32 \
75 |   --learning_rate=0.001 \
76 |   --save_interval_secs=60 \
77 |   --save_summaries_secs=60 \
78 |   --log_every_n_steps=100 \
79 |   --optimizer=rmsprop \
80 |   --weight_decay=0.00004
81 | 
82 | # Run evaluation.
83 | python eval_image_classifier.py \
84 |   --checkpoint_path=${TRAIN_DIR}/all \
85 |   --eval_dir=${TRAIN_DIR}/all \
86 |   --dataset_name=flowers \
87 |   --dataset_split_name=validation \
88 |   --dataset_dir=${DATASET_DIR} \
89 |   --model_name=resnet_v1_50
90 | 


--------------------------------------------------------------------------------
/slim/scripts/train_cifarnet_on_cifar10.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # This script performs the following operations:
 4 | # 1. Downloads the Cifar10 dataset
 5 | # 2. Trains a CifarNet model on the Cifar10 training set.
 6 | # 3. Evaluates the model on the Cifar10 testing set.
 7 | #
 8 | # Usage:
 9 | # cd slim
10 | # ./scripts/train_cifar_net_on_mnist.sh
11 | 
12 | # Where the checkpoint and logs will be saved to.
13 | TRAIN_DIR=/tmp/cifarnet-model
14 | 
15 | # Where the dataset is saved to.
16 | DATASET_DIR=/tmp/cifar10
17 | 
18 | # Download the dataset
19 | python download_and_convert_data.py \
20 |   --dataset_name=cifar10 \
21 |   --dataset_dir=${DATASET_DIR}
22 | 
23 | # Run training.
24 | python train_image_classifier.py \
25 |   --train_dir=${TRAIN_DIR} \
26 |   --dataset_name=cifar10 \
27 |   --dataset_split_name=train \
28 |   --dataset_dir=${DATASET_DIR} \
29 |   --model_name=cifarnet \
30 |   --preprocessing_name=cifarnet \
31 |   --max_number_of_steps=100000 \
32 |   --batch_size=128 \
33 |   --save_interval_secs=120 \
34 |   --save_summaries_secs=120 \
35 |   --log_every_n_steps=100 \
36 |   --optimizer=sgd \
37 |   --learning_rate=0.1 \
38 |   --learning_rate_decay_factor=0.1 \
39 |   --num_epochs_per_decay=200 \
40 |   --weight_decay=0.004
41 | 
42 | # Run evaluation.
43 | python eval_image_classifier.py \
44 |   --checkpoint_path=${TRAIN_DIR} \
45 |   --eval_dir=${TRAIN_DIR} \
46 |   --dataset_name=cifar10 \
47 |   --dataset_split_name=test \
48 |   --dataset_dir=${DATASET_DIR} \
49 |   --model_name=cifarnet
50 | 


--------------------------------------------------------------------------------
/slim/scripts/train_lenet_on_mnist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # This script performs the following operations:
 4 | # 1. Downloads the MNIST dataset
 5 | # 2. Trains a LeNet model on the MNIST training set.
 6 | # 3. Evaluates the model on the MNIST testing set.
 7 | #
 8 | # Usage:
 9 | # cd slim
10 | # ./slim/scripts/train_lenet_on_mnist.sh
11 | 
12 | # Where the checkpoint and logs will be saved to.
13 | TRAIN_DIR=/tmp/lenet-model
14 | 
15 | # Where the dataset is saved to.
16 | DATASET_DIR=/tmp/mnist
17 | 
18 | # Download the dataset
19 | python download_and_convert_data.py \
20 |   --dataset_name=mnist \
21 |   --dataset_dir=${DATASET_DIR}
22 | 
23 | # Run training.
24 | python train_image_classifier.py \
25 |   --train_dir=${TRAIN_DIR} \
26 |   --dataset_name=mnist \
27 |   --dataset_split_name=train \
28 |   --dataset_dir=${DATASET_DIR} \
29 |   --model_name=lenet \
30 |   --preprocessing_name=lenet \
31 |   --max_number_of_steps=20000 \
32 |   --batch_size=50 \
33 |   --learning_rate=0.01 \
34 |   --save_interval_secs=60 \
35 |   --save_summaries_secs=60 \
36 |   --log_every_n_steps=100 \
37 |   --optimizer=sgd \
38 |   --learning_rate_decay_type=fixed \
39 |   --weight_decay=0
40 | 
41 | # Run evaluation.
42 | python eval_image_classifier.py \
43 |   --checkpoint_path=${TRAIN_DIR} \
44 |   --eval_dir=${TRAIN_DIR} \
45 |   --dataset_name=mnist \
46 |   --dataset_split_name=test \
47 |   --dataset_dir=${DATASET_DIR} \
48 |   --model_name=lenet
49 | 


--------------------------------------------------------------------------------
/terngrad/WORKSPACE:
--------------------------------------------------------------------------------
1 | workspace(name = "inception")
2 | 


--------------------------------------------------------------------------------
/terngrad/build_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | bazel build inception/download_and_preprocess_imagenet
 7 | 
 8 | bazel build inception/mnist_train
 9 | bazel build inception/mnist_eval
10 | 
11 | bazel build inception/cifar10_train
12 | bazel build inception/cifar10_eval
13 | 
14 | bazel build inception/imagenet_train
15 | bazel build inception/imagenet_eval
16 | 
17 | bazel build inception/imagenet_distributed_train
18 | bazel build inception/cifar10_distributed_train
19 | 


--------------------------------------------------------------------------------
/terngrad/config_dist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | set -x
 4 | 
 5 | ########################## Example settup ###############################
 6 | # Parameter server is 10.236.176.29:2222
 7 | # Worker 0 is GPU 1 in 10.236.176.28:2224 using ~/dataset/cifar10-data-shard-0-499 as training data
 8 | # Worker 1 is GPU 2 in 10.236.176.29:2226 using ~/dataset/cifar10-data-shard-500-999 as training data
 9 | # The whole cifar10 dataset are split to cifar10-data-shard-0-499 and cifar10-data-shard-500-999 
10 | 
11 | 
12 | ######################## Workspace of TernGrad ##########################
13 | # The path of executables (terngrad/terngrad). Must be the same across all nodes
14 | WORKSPACE="~/github/users/wenwei202/terngrad/terngrad"
15 | 
16 | 
17 | 
18 | 
19 | #################### Scripts to start workers and ps #####################
20 | # The script to start worker
21 | # Customize WORKER_SCRIPT for your own training
22 | WORKER_SCRIPT="./run_single_worker_cifarnet.sh"
23 | #WORKER_SCRIPT="./run_single_worker_alexnet.sh"
24 | 
25 | # The script to start ps (depending on dataset only)
26 | # Select one from those below
27 | PS_SCRIPT="./run_single_ps_cifar10.sh"
28 | #PS_SCRIPT="./run_single_ps_imagenet.sh"
29 | 
30 | 
31 | 
32 | ######################### Configurations of ps ###########################
33 | # The list of hosts and ports of ps
34 | # Multiple ps not tested yet
35 | PS_HOSTS=( \
36 |   10.236.176.29:2222 \
37 | )
38 | 
39 | 
40 | 
41 | ######################### Configurations of workers #######################
42 | # The list of hosts and ports of workers
43 | WORKER_HOSTS=( \
44 |   10.236.176.28:2224 \
45 |   10.236.176.29:2226 \
46 | )
47 | # GPU IDs in corresponding workers
48 | WORKER_DEVICES=( \
49 |   1 \
50 |   2 \
51 | )
52 | # Paths of dataset shards in corresponding workers
53 | DATA_DIR=( \
54 |   ~/dataset/cifar10-data-shard-0-499 \
55 |   ~/dataset/cifar10-data-shard-500-999 \
56 | )
57 | 


--------------------------------------------------------------------------------
/terngrad/deprecated/run_multi_gpus.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | set -x
 4 | 
 5 | DATASET_NAME=imagenet # imagenet or cifar10
 6 | ROOT_WORKSPACE=/tmp/ # the location to store tf.summary and logs
 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location
 8 | FINETUNED_MODEL_PATH=
 9 | NUM_GPUS=2
10 | export CUDA_VISIBLE_DEVICES=0,1 # specify visible gpus to tensorflow
11 | OPTIMIZER=momentum
12 | NET=alexnet
13 | IMAGE_SIZE=224
14 | GRAD_BITS=32
15 | BASE_LR=0.01
16 | CLIP_FACTOR=0.0 # 0.0 means no clipping
17 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back
18 | FLOATING_GRAD_EPOCH=0 # 0 means no switching
19 | WEIGHT_DECAY=0.0005 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004
20 | DROPOUT_KEEP_PROB=0.5 # The probability to keep in dropout
21 | MOMENTUM=0.9
22 | SIZE_TO_BINARIZE=1 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1
23 | TRAIN_BATCH_SIZE=256 # total batch size
24 | VAL_BATCH_SIZE=50 # set smaller to avoid OOM
25 | NUM_EPOCHS_PER_DECAY=20 # per decay learning rate
26 | MAX_STEPS=370000
27 | VAL_TOWER=0 # -1 for cpu
28 | EVAL_INTERVAL_SECS=900 # seconds to evaluate the accuracy
29 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0"
30 | RESTORE_AVG_VAR=True # use the moving average parameters to eval?
31 | SEED=123 # use ${RANDOM} if no duplicable results are required
32 | 
33 | if [ ! -d "$ROOT_WORKSPACE" ]; then
34 |   echo "${ROOT_WORKSPACE} does not exsit!"
35 |   exit
36 | fi
37 | 
38 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/
39 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/
40 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/
41 | if [ ! -d "${INFO_WORKSPACE}" ]; then
42 |   echo "Creating ${INFO_WORKSPACE} ..."
43 |   mkdir -p ${INFO_WORKSPACE}
44 | fi
45 | current_time=$(date)
46 | current_time=${current_time// /_}
47 | current_time=${current_time//:/-}
48 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_GPUS}_${current_time}
49 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME}
50 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME}
51 | if [ ! -d "$TRAIN_DIR" ]; then
52 |   echo "Creating ${TRAIN_DIR} ..."
53 |   mkdir -p ${TRAIN_DIR}
54 | fi
55 | if [ ! -d "$EVAL_DIR" ]; then
56 |   echo "Creating ${EVAL_DIR} ..."
57 |   mkdir -p ${EVAL_DIR}
58 | fi
59 | 
60 | bazel-bin/inception/${DATASET_NAME}_eval \
61 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \
62 | --device ${EVAL_DEVICE} \
63 | --restore_avg_var ${RESTORE_AVG_VAR} \
64 | --data_dir ${DATA_DIR} \
65 | --net ${NET} \
66 | --image_size ${IMAGE_SIZE} \
67 | --batch_size ${VAL_BATCH_SIZE} \
68 | --max_steps ${MAX_STEPS} \
69 | --checkpoint_dir ${TRAIN_DIR} \
70 | --tower ${VAL_TOWER} \
71 | --eval_dir ${EVAL_DIR} >  ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 &
72 | 
73 | bazel-bin/inception/${DATASET_NAME}_train \
74 | --seed ${SEED}  \
75 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \
76 | --num_epochs_per_decay ${NUM_EPOCHS_PER_DECAY} \
77 | --initial_learning_rate ${BASE_LR} \
78 | --grad_bits ${GRAD_BITS} \
79 | --clip_factor ${CLIP_FACTOR} \
80 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \
81 | --weight_decay ${WEIGHT_DECAY} \
82 | --dropout_keep_prob ${DROPOUT_KEEP_PROB} \
83 | --momentum ${MOMENTUM} \
84 | --size_to_binarize ${SIZE_TO_BINARIZE} \
85 | --optimizer ${OPTIMIZER} \
86 | --net ${NET} \
87 | --image_size ${IMAGE_SIZE} \
88 | --num_gpus ${NUM_GPUS} \
89 | --batch_size ${TRAIN_BATCH_SIZE} \
90 | --max_steps ${MAX_STEPS} \
91 | --train_dir ${TRAIN_DIR} \
92 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 &
93 | 


--------------------------------------------------------------------------------
/terngrad/deprecated/run_multi_gpus_cifar10_quick.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | set -x
 4 | 
 5 | DATASET_NAME=cifar10 # imagenet or cifar10
 6 | ROOT_WORKSPACE=${HOME}/dataset/results/cifar10/ # the location to store summary and logs
 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location
 8 | FINETUNED_MODEL_PATH=
 9 | NUM_GPUS=2 # num of physical gpus
10 | export CUDA_VISIBLE_DEVICES=0,1 # specify visible gpus to tensorflow
11 | NUM_NODES=2 # num of virtual nodes on physical gpus
12 | OPTIMIZER=momentum
13 | NET=cifar10_alexnet
14 | IMAGE_SIZE=24
15 | GRAD_BITS=32
16 | BASE_LR=0.01
17 | CLIP_FACTOR=0.0 # 0.0 means no clipping
18 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back
19 | FLOATING_GRAD_EPOCH=0 # 0 means no switching
20 | WEIGHT_DECAY=0.004 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004
21 | MOMENTUM=0.9
22 | LR_DECAY_TYPE="polynomial"
23 | SIZE_TO_BINARIZE=1 # The min size of variable to enable binarizing. e.g., 385 means biases are excluded from binarizing
24 | TRAIN_BATCH_SIZE=128
25 | SAVE_ITER=2000 # Save summaries and checkpoint per iterations
26 | QUANTIZE_LOGITS=True # If quantize the gradients in the last logits layer. 
27 | VAL_BATCH_SIZE=50 # set smaller to avoid OOM
28 | MAX_STEPS=80000
29 | VAL_TOWER=0 # -1 for cpu
30 | EVAL_INTERVAL_SECS=10
31 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0"
32 | RESTORE_AVG_VAR=True # use the moving average parameters to eval?
33 | SEED=123 # use ${RANDOM} if no duplicable results are required
34 | 
35 | if [ ! -d "$ROOT_WORKSPACE" ]; then
36 |   echo "${ROOT_WORKSPACE} does not exsit!"
37 |   exit
38 | fi
39 | 
40 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/
41 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/
42 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/
43 | if [ ! -d "${INFO_WORKSPACE}" ]; then
44 |   echo "Creating ${INFO_WORKSPACE} ..."
45 |   mkdir -p ${INFO_WORKSPACE}
46 | fi
47 | current_time=$(date)
48 | current_time=${current_time// /_}
49 | current_time=${current_time//:/-}
50 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_NODES}_${current_time}
51 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME}
52 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME}
53 | if [ ! -d "$TRAIN_DIR" ]; then
54 |   echo "Creating ${TRAIN_DIR} ..."
55 |   mkdir -p ${TRAIN_DIR}
56 | fi
57 | if [ ! -d "$EVAL_DIR" ]; then
58 |   echo "Creating ${EVAL_DIR} ..."
59 |   mkdir -p ${EVAL_DIR}
60 | fi
61 | 
62 | bazel-bin/inception/${DATASET_NAME}_eval \
63 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \
64 | --device ${EVAL_DEVICE} \
65 | --restore_avg_var ${RESTORE_AVG_VAR} \
66 | --data_dir ${DATA_DIR} \
67 | --subset "test" \
68 | --net ${NET} \
69 | --image_size ${IMAGE_SIZE} \
70 | --batch_size ${VAL_BATCH_SIZE} \
71 | --max_steps ${MAX_STEPS} \
72 | --checkpoint_dir ${TRAIN_DIR} \
73 | --tower ${VAL_TOWER} \
74 | --eval_dir ${EVAL_DIR} >  ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 &
75 | 
76 | bazel-bin/inception/${DATASET_NAME}_train \
77 | --seed ${SEED}  \
78 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \
79 | --initial_learning_rate ${BASE_LR} \
80 | --grad_bits ${GRAD_BITS} \
81 | --clip_factor ${CLIP_FACTOR} \
82 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \
83 | --weight_decay ${WEIGHT_DECAY} \
84 | --momentum ${MOMENTUM} \
85 | --learning_rate_decay_type ${LR_DECAY_TYPE} \
86 | --size_to_binarize ${SIZE_TO_BINARIZE} \
87 | --optimizer ${OPTIMIZER} \
88 | --net ${NET} \
89 | --image_size ${IMAGE_SIZE} \
90 | --num_gpus ${NUM_GPUS} \
91 | --num_nodes ${NUM_NODES} \
92 | --batch_size ${TRAIN_BATCH_SIZE} \
93 | --save_iter ${SAVE_ITER} \
94 | --quantize_logits ${QUANTIZE_LOGITS} \
95 | --max_steps ${MAX_STEPS} \
96 | --train_dir ${TRAIN_DIR} \
97 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 &
98 | 


--------------------------------------------------------------------------------
/terngrad/deprecated/run_multi_gpus_googlenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | set -x
 4 | 
 5 | DATASET_NAME=imagenet # imagenet or cifar10
 6 | ROOT_WORKSPACE=${HOME}/dataset/results/imagenet # the location to store tf.summary and logs
 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location
 8 | FINETUNED_MODEL_PATH=
 9 | NUM_GPUS=2
10 | export CUDA_VISIBLE_DEVICES=0,1 # specify visible gpus to tensorflow
11 | OPTIMIZER=momentum
12 | NET=googlenet
13 | IMAGE_SIZE=224
14 | GRAD_BITS=32
15 | BASE_LR=0.01
16 | CLIP_FACTOR=0.0 # 0.0 means no clipping
17 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back
18 | FLOATING_GRAD_EPOCH=0 # 0 means no switching
19 | WEIGHT_DECAY=0.00004 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004
20 | DROPOUT_KEEP_PROB=0.8 # The probability to keep in dropout
21 | MOMENTUM=0.9
22 | LR_DECAY_FACTOR=0.96 # learning rate decay factor
23 | SIZE_TO_BINARIZE=1 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1
24 | TRAIN_BATCH_SIZE=32 # total batch size
25 | VAL_BATCH_SIZE=25 # set smaller to avoid OOM
26 | NUM_EPOCHS_PER_DECAY=8 # per decay learning rate
27 | MAX_STEPS=10000000
28 | VAL_TOWER=0 # -1 for cpu
29 | EVAL_INTERVAL_SECS=900 # seconds to evaluate the accuracy
30 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0"
31 | RESTORE_AVG_VAR=True # use the moving average parameters to eval?
32 | SEED=123 # use ${RANDOM} if no duplicable results are required
33 | 
34 | if [ ! -d "$ROOT_WORKSPACE" ]; then
35 |   echo "${ROOT_WORKSPACE} does not exsit!"
36 |   exit
37 | fi
38 | 
39 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/
40 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/
41 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/
42 | if [ ! -d "${INFO_WORKSPACE}" ]; then
43 |   echo "Creating ${INFO_WORKSPACE} ..."
44 |   mkdir -p ${INFO_WORKSPACE}
45 | fi
46 | current_time=$(date)
47 | current_time=${current_time// /_}
48 | current_time=${current_time//:/-}
49 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_GPUS}_${current_time}
50 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME}
51 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME}
52 | if [ ! -d "$TRAIN_DIR" ]; then
53 |   echo "Creating ${TRAIN_DIR} ..."
54 |   mkdir -p ${TRAIN_DIR}
55 | fi
56 | if [ ! -d "$EVAL_DIR" ]; then
57 |   echo "Creating ${EVAL_DIR} ..."
58 |   mkdir -p ${EVAL_DIR}
59 | fi
60 | 
61 | bazel-bin/inception/${DATASET_NAME}_eval \
62 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \
63 | --device ${EVAL_DEVICE} \
64 | --restore_avg_var ${RESTORE_AVG_VAR} \
65 | --data_dir ${DATA_DIR} \
66 | --net ${NET} \
67 | --image_size ${IMAGE_SIZE} \
68 | --batch_size ${VAL_BATCH_SIZE} \
69 | --max_steps ${MAX_STEPS} \
70 | --checkpoint_dir ${TRAIN_DIR} \
71 | --tower ${VAL_TOWER} \
72 | --eval_dir ${EVAL_DIR} >  ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 &
73 | 
74 | bazel-bin/inception/${DATASET_NAME}_train \
75 | --seed ${SEED}  \
76 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \
77 | --num_epochs_per_decay ${NUM_EPOCHS_PER_DECAY} \
78 | --initial_learning_rate ${BASE_LR} \
79 | --grad_bits ${GRAD_BITS} \
80 | --clip_factor ${CLIP_FACTOR} \
81 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \
82 | --weight_decay ${WEIGHT_DECAY} \
83 | --dropout_keep_prob ${DROPOUT_KEEP_PROB} \
84 | --momentum ${MOMENTUM} \
85 | --learning_rate_decay_factor ${LR_DECAY_FACTOR} \
86 | --size_to_binarize ${SIZE_TO_BINARIZE} \
87 | --optimizer ${OPTIMIZER} \
88 | --net ${NET} \
89 | --image_size ${IMAGE_SIZE} \
90 | --num_gpus ${NUM_GPUS} \
91 | --batch_size ${TRAIN_BATCH_SIZE} \
92 | --max_steps ${MAX_STEPS} \
93 | --train_dir ${TRAIN_DIR} \
94 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 &
95 | 


--------------------------------------------------------------------------------
/terngrad/deprecated/run_multi_gpus_vggnet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | set -x
 4 | 
 5 | DATASET_NAME=imagenet # imagenet or cifar10
 6 | ROOT_WORKSPACE=${HOME}/dataset/results/imagenet # the location to store tf.summary and logs
 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location
 8 | FINETUNED_MODEL_PATH=
 9 | NUM_GPUS=4
10 | # export CUDA_VISIBLE_DEVICES=0,1,2,3 # specify visible gpus to tensorflow
11 | OPTIMIZER=momentum
12 | NET=vgg_a # vgg_a or vgg_16
13 | IMAGE_SIZE=224
14 | GRAD_BITS=32
15 | BASE_LR=0.01
16 | CLIP_FACTOR=0.0 # 0.0 means no clipping
17 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back
18 | FLOATING_GRAD_EPOCH=0 # 0 means no switching
19 | WEIGHT_DECAY=0.0005 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004
20 | DROPOUT_KEEP_PROB=0.5 # The probability to keep in dropout
21 | MOMENTUM=0.9
22 | SIZE_TO_BINARIZE=1 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1
23 | TRAIN_BATCH_SIZE=256 # total batch size
24 | VAL_BATCH_SIZE=25 # set smaller to avoid OOM
25 | NUM_EPOCHS_PER_DECAY=20 # per decay learning rate
26 | MAX_STEPS=370000
27 | VAL_TOWER=0 # -1 for cpu
28 | EVAL_INTERVAL_SECS=900 # seconds to evaluate the accuracy
29 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0"
30 | RESTORE_AVG_VAR=True # use the moving average parameters to eval?
31 | SEED=123 # use ${RANDOM} if no duplicable results are required
32 | 
33 | if [ ! -d "$ROOT_WORKSPACE" ]; then
34 |   echo "${ROOT_WORKSPACE} does not exsit!"
35 |   exit
36 | fi
37 | 
38 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/
39 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/
40 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/
41 | if [ ! -d "${INFO_WORKSPACE}" ]; then
42 |   echo "Creating ${INFO_WORKSPACE} ..."
43 |   mkdir -p ${INFO_WORKSPACE}
44 | fi
45 | current_time=$(date)
46 | current_time=${current_time// /_}
47 | current_time=${current_time//:/-}
48 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_GPUS}_${current_time}
49 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME}
50 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME}
51 | if [ ! -d "$TRAIN_DIR" ]; then
52 |   echo "Creating ${TRAIN_DIR} ..."
53 |   mkdir -p ${TRAIN_DIR}
54 | fi
55 | if [ ! -d "$EVAL_DIR" ]; then
56 |   echo "Creating ${EVAL_DIR} ..."
57 |   mkdir -p ${EVAL_DIR}
58 | fi
59 | 
60 | bazel-bin/inception/${DATASET_NAME}_eval \
61 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \
62 | --device ${EVAL_DEVICE} \
63 | --restore_avg_var ${RESTORE_AVG_VAR} \
64 | --data_dir ${DATA_DIR} \
65 | --net ${NET} \
66 | --image_size ${IMAGE_SIZE} \
67 | --batch_size ${VAL_BATCH_SIZE} \
68 | --max_steps ${MAX_STEPS} \
69 | --checkpoint_dir ${TRAIN_DIR} \
70 | --tower ${VAL_TOWER} \
71 | --eval_dir ${EVAL_DIR} >  ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 &
72 | 
73 | bazel-bin/inception/${DATASET_NAME}_train \
74 | --seed ${SEED}  \
75 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \
76 | --num_epochs_per_decay ${NUM_EPOCHS_PER_DECAY} \
77 | --initial_learning_rate ${BASE_LR} \
78 | --grad_bits ${GRAD_BITS} \
79 | --clip_factor ${CLIP_FACTOR} \
80 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \
81 | --weight_decay ${WEIGHT_DECAY} \
82 | --dropout_keep_prob ${DROPOUT_KEEP_PROB} \
83 | --momentum ${MOMENTUM} \
84 | --size_to_binarize ${SIZE_TO_BINARIZE} \
85 | --optimizer ${OPTIMIZER} \
86 | --net ${NET} \
87 | --image_size ${IMAGE_SIZE} \
88 | --num_gpus ${NUM_GPUS} \
89 | --batch_size ${TRAIN_BATCH_SIZE} \
90 | --max_steps ${MAX_STEPS} \
91 | --train_dir ${TRAIN_DIR} \
92 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 &
93 | 


--------------------------------------------------------------------------------
/terngrad/g3doc/inception_v3_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenwei202/terngrad/ec4f75e9a3a1e1c4b2e6494d830fbdfdd2e03ddc/terngrad/g3doc/inception_v3_architecture.png


--------------------------------------------------------------------------------
/terngrad/inception/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Makes helper libraries available in the cifar10 package."""
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function


--------------------------------------------------------------------------------
/terngrad/inception/cifar10_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Small library that points to the cifar-10 data set.
16 | """
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | 
22 | import tensorflow as tf
23 | from inception.dataset import Dataset
24 | import os
25 | 
26 | FLAGS = tf.app.flags.FLAGS
27 | 
28 | class Cifar10Data(Dataset):
29 |   """cifar-10 data set."""
30 | 
31 |   def __init__(self, subset):
32 |     super(Cifar10Data, self).__init__('cifar10', subset)
33 | 
34 |   def num_classes(self):
35 |     """Returns the number of classes in the data set."""
36 |     return 10
37 | 
38 |   def num_examples_per_epoch(self):
39 |     """Returns the number of examples in the data set."""
40 |     if self.subset == 'train':
41 |       return 50000
42 |     if self.subset == 'test':
43 |       return 10000
44 | 
45 |   def download_message(self):
46 |     """Instruction to download and extract the tarball from Flowers website."""
47 | 
48 |     print('Failed to find any cifar10 %s files'% self.subset)
49 |     print('')
50 |     print('If you have already downloaded and processed the data, then make '
51 |           'sure to set --data_dir to point to the directory containing the '
52 |           'location of the sharded TFRecords.\n')
53 |     print('If you have not downloaded and prepared the cifar10 data in the '
54 |           'TFRecord format, you will need to do this at least once. This '
55 |           'process could take a while depending on the speed of your '
56 |           'computer and network connection\n')
57 |     print('Please see README.md for instructions on how to build '
58 |           'the cifar10 dataset using download_and_convert_data.py. For example: \n')
59 |     print ('cd ./slim\n')
60 |     print ('python download_and_convert_data.py '
61 |            '--dataset_name cifar10 --dataset_dir ~/dataset/cifar10-data/ [--shard True]\n')
62 | 
63 |   def available_subsets(self):
64 |     """Returns the list of available subsets."""
65 |     return ['train', 'test']


--------------------------------------------------------------------------------
/terngrad/inception/cifar10_distributed_train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | # pylint: disable=line-too-long
16 | """A binary to train Inception in a distributed manner using multiple systems.
17 | 
18 | Please see accompanying README.md for details and instructions.
19 | """
20 | from __future__ import absolute_import
21 | from __future__ import division
22 | from __future__ import print_function
23 | 
24 | import tensorflow as tf
25 | 
26 | from inception import inception_distributed_train
27 | from inception.cifar10_data import Cifar10Data
28 | 
29 | FLAGS = tf.app.flags.FLAGS
30 | 
31 | 
32 | def main(unused_args):
33 |   FLAGS.dataset_name = 'cifar10'
34 | 
35 |   assert FLAGS.job_name in ['ps', 'worker'], 'job_name must be ps or worker'
36 | 
37 |   # Extract all the hostnames for the ps and worker jobs to construct the
38 |   # cluster spec.
39 |   ps_hosts = FLAGS.ps_hosts.split(',')
40 |   worker_hosts = FLAGS.worker_hosts.split(',')
41 |   tf.logging.info('PS hosts are: %s' % ps_hosts)
42 |   tf.logging.info('Worker hosts are: %s' % worker_hosts)
43 | 
44 |   cluster_spec = tf.train.ClusterSpec({'ps': ps_hosts,
45 |                                        'worker': worker_hosts})
46 |   sess_config = tf.ConfigProto()
47 |   sess_config.gpu_options.allow_growth = True
48 | 
49 |   server = tf.train.Server(
50 |       {'ps': ps_hosts,
51 |        'worker': worker_hosts},
52 |       job_name=FLAGS.job_name,
53 |       task_index=FLAGS.task_id,
54 |       config=sess_config)
55 | 
56 |   if FLAGS.job_name == 'ps':
57 |     # `ps` jobs wait for incoming connections from the workers.
58 |     server.join()
59 |   else:
60 |     # `worker` jobs will actually do the work.
61 |     dataset = Cifar10Data(subset=FLAGS.subset)
62 |     assert dataset.data_files()
63 |     # Only the chief checks for or creates train_dir.
64 |     if FLAGS.task_id == 0:
65 |       if not tf.gfile.Exists(FLAGS.train_dir):
66 |         tf.gfile.MakeDirs(FLAGS.train_dir)
67 |     inception_distributed_train.train(server.target, dataset, cluster_spec)
68 | 
69 | if __name__ == '__main__':
70 |   tf.logging.set_verbosity(tf.logging.INFO)
71 |   tf.app.run()
72 | 


--------------------------------------------------------------------------------
/terngrad/inception/cifar10_eval.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """A binary to evaluate Inception on the flowers data set.
16 | 
17 | Note that using the supplied pre-trained inception checkpoint, the eval should
18 | achieve:
19 |   precision @ 1 = 0.7874 recall @ 5 = 0.9436 [50000 examples]
20 | 
21 | See the README.md for more details.
22 | """
23 | from __future__ import absolute_import
24 | from __future__ import division
25 | from __future__ import print_function
26 | 
27 | 
28 | import tensorflow as tf
29 | 
30 | from inception import inception_eval
31 | from inception.cifar10_data import Cifar10Data
32 | 
33 | FLAGS = tf.app.flags.FLAGS
34 | 
35 | 
36 | def main(unused_argv=None):
37 |   dataset = Cifar10Data(subset=FLAGS.subset)
38 |   assert dataset.data_files()
39 |   if tf.gfile.Exists(FLAGS.eval_dir):
40 |     tf.gfile.DeleteRecursively(FLAGS.eval_dir)
41 |   tf.gfile.MakeDirs(FLAGS.eval_dir)
42 |   FLAGS.dataset_name = 'cifar10'
43 |   FLAGS.num_examples = dataset.num_examples_per_epoch()
44 |   inception_eval.evaluate(dataset)
45 | 
46 | 
47 | if __name__ == '__main__':
48 |   tf.app.run()
49 | 


--------------------------------------------------------------------------------
/terngrad/inception/cifar10_train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """A binary to train Inception on the ImageNet data set.
16 | """
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | 
22 | 
23 | import tensorflow as tf
24 | 
25 | from inception import inception_train
26 | from inception.cifar10_data import Cifar10Data
27 | 
28 | FLAGS = tf.app.flags.FLAGS
29 | 
30 | 
31 | def main(_):
32 |   dataset = Cifar10Data(subset=FLAGS.subset)
33 |   assert dataset.data_files()
34 |   if tf.gfile.Exists(FLAGS.train_dir):
35 |     tf.gfile.DeleteRecursively(FLAGS.train_dir)
36 |   tf.gfile.MakeDirs(FLAGS.train_dir)
37 |   FLAGS.dataset_name = 'cifar10'
38 |   inception_train.train(dataset)
39 | 
40 | 
41 | if __name__ == '__main__':
42 |   tf.app.run()
43 | 


--------------------------------------------------------------------------------
/terngrad/inception/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Makes helper libraries available in the cifar10 package."""
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function


--------------------------------------------------------------------------------
/terngrad/inception/data/download_and_preprocess_flowers.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2016 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ==============================================================================
16 | 
17 | # Script to download and preprocess the flowers data set. This data set
18 | # provides a demonstration for how to perform fine-tuning (i.e. tranfer
19 | # learning) from one model to a new data set.
20 | #
21 | # This script provides a demonstration for how to prepare an arbitrary
22 | # data set for training an Inception v3 model.
23 | #
24 | # We demonstrate this with the flowers data set which consists of images
25 | # of labeled flower images from 5 classes:
26 | #
27 | # daisy, dandelion, roses, sunflowers, tulips
28 | #
29 | # The final output of this script are sharded TFRecord files containing
30 | # serialized Example protocol buffers. See build_image_data.py for
31 | # details of how the Example protocol buffer contains image data.
32 | #
33 | # usage:
34 | #  ./download_and_preprocess_flowers.sh [data-dir]
35 | set -e
36 | 
37 | if [ -z "$1" ]; then
38 |   echo "usage download_and_preprocess_flowers.sh [data dir]"
39 |   exit
40 | fi
41 | 
42 | # Create the output and temporary directories.
43 | DATA_DIR="${1%/}"
44 | SCRATCH_DIR="${DATA_DIR}/raw-data/"
45 | mkdir -p "${DATA_DIR}"
46 | mkdir -p "${SCRATCH_DIR}"
47 | WORK_DIR="$0.runfiles/inception/inception"
48 | 
49 | # Download the flowers data.
50 | DATA_URL="http://download.tensorflow.org/example_images/flower_photos.tgz"
51 | CURRENT_DIR=$(pwd)
52 | cd "${DATA_DIR}"
53 | TARBALL="flower_photos.tgz"
54 | if [ ! -f ${TARBALL} ]; then
55 |   echo "Downloading flower data set."
56 |   wget -O ${TARBALL} "${DATA_URL}"
57 | else
58 |   echo "Skipping download of flower data."
59 | fi
60 | 
61 | # Note the locations of the train and validation data.
62 | TRAIN_DIRECTORY="${SCRATCH_DIR}train/"
63 | VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/"
64 | 
65 | # Expands the data into the flower_photos/ directory and rename it as the
66 | # train directory.
67 | tar xf flower_photos.tgz
68 | rm -rf "${TRAIN_DIRECTORY}" "${VALIDATION_DIRECTORY}"
69 | mv flower_photos "${TRAIN_DIRECTORY}"
70 | 
71 | # Generate a list of 5 labels: daisy, dandelion, roses, sunflowers, tulips
72 | LABELS_FILE="${SCRATCH_DIR}/labels.txt"
73 | ls -1 "${TRAIN_DIRECTORY}" | grep -v 'LICENSE' | sed 's/\///' | sort > "${LABELS_FILE}"
74 | 
75 | # Generate the validation data set.
76 | while read LABEL; do
77 |   VALIDATION_DIR_FOR_LABEL="${VALIDATION_DIRECTORY}${LABEL}"
78 |   TRAIN_DIR_FOR_LABEL="${TRAIN_DIRECTORY}${LABEL}"
79 | 
80 |   # Move the first randomly selected 100 images to the validation set.
81 |   mkdir -p "${VALIDATION_DIR_FOR_LABEL}"
82 |   VALIDATION_IMAGES=$(ls -1 "${TRAIN_DIR_FOR_LABEL}" | shuf | head -100)
83 |   for IMAGE in ${VALIDATION_IMAGES}; do
84 |     mv -f "${TRAIN_DIRECTORY}${LABEL}/${IMAGE}" "${VALIDATION_DIR_FOR_LABEL}"
85 |   done
86 | done < "${LABELS_FILE}"
87 | 
88 | # Build the TFRecords version of the image data.
89 | cd "${CURRENT_DIR}"
90 | BUILD_SCRIPT="${WORK_DIR}/build_image_data"
91 | OUTPUT_DIRECTORY="${DATA_DIR}"
92 | "${BUILD_SCRIPT}" \
93 |   --train_directory="${TRAIN_DIRECTORY}" \
94 |   --validation_directory="${VALIDATION_DIRECTORY}" \
95 |   --output_directory="${OUTPUT_DIRECTORY}" \
96 |   --labels_file="${LABELS_FILE}"
97 | 


--------------------------------------------------------------------------------
/terngrad/inception/data/download_and_preprocess_flowers_mac.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2016 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ==============================================================================
16 | 
17 | # Script to download and preprocess the flowers data set. This data set
18 | # provides a demonstration for how to perform fine-tuning (i.e. tranfer
19 | # learning) from one model to a new data set.
20 | #
21 | # This script provides a demonstration for how to prepare an arbitrary
22 | # data set for training an Inception v3 model.
23 | #
24 | # We demonstrate this with the flowers data set which consists of images
25 | # of labeled flower images from 5 classes:
26 | #
27 | # daisy, dandelion, roses, sunflowers, tulips
28 | #
29 | # The final output of this script are sharded TFRecord files containing
30 | # serialized Example protocol buffers. See build_image_data.py for
31 | # details of how the Example protocol buffer contains image data.
32 | #
33 | # usage:
34 | #  ./download_and_preprocess_flowers.sh [data-dir]
35 | set -e
36 | 
37 | if [ -z "$1" ]; then
38 |   echo "usage download_and_preprocess_flowers.sh [data dir]"
39 |   exit
40 | fi
41 | 
42 | # Create the output and temporary directories.
43 | DATA_DIR="${1%/}"
44 | SCRATCH_DIR="${DATA_DIR}/raw-data/"
45 | mkdir -p "${DATA_DIR}"
46 | mkdir -p "${SCRATCH_DIR}"
47 | WORK_DIR="$0.runfiles/inception/inception"
48 | 
49 | # Download the flowers data.
50 | DATA_URL="http://download.tensorflow.org/example_images/flower_photos.tgz"
51 | CURRENT_DIR=$(pwd)
52 | cd "${DATA_DIR}"
53 | TARBALL="flower_photos.tgz"
54 | if [ ! -f ${TARBALL} ]; then
55 |   echo "Downloading flower data set."
56 |   wget -O ${TARBALL} "${DATA_URL}"
57 | else
58 |   echo "Skipping download of flower data."
59 | fi
60 | 
61 | # Note the locations of the train and validation data.
62 | TRAIN_DIRECTORY="${SCRATCH_DIR}train/"
63 | VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/"
64 | 
65 | # Expands the data into the flower_photos/ directory and rename it as the
66 | # train directory.
67 | tar xf flower_photos.tgz
68 | rm -rf "${TRAIN_DIRECTORY}" "${VALIDATION_DIRECTORY}"
69 | mv flower_photos "${TRAIN_DIRECTORY}"
70 | 
71 | # Generate a list of 5 labels: daisy, dandelion, roses, sunflowers, tulips
72 | LABELS_FILE="${SCRATCH_DIR}/labels.txt"
73 | ls -1 "${TRAIN_DIRECTORY}" | grep -v 'LICENSE' | sed 's/\///' | sort > "${LABELS_FILE}"
74 | 
75 | # Generate the validation data set.
76 | while read LABEL; do
77 |   VALIDATION_DIR_FOR_LABEL="${VALIDATION_DIRECTORY}${LABEL}"
78 |   TRAIN_DIR_FOR_LABEL="${TRAIN_DIRECTORY}${LABEL}"
79 | 
80 |   # Move the first randomly selected 100 images to the validation set.
81 |   mkdir -p "${VALIDATION_DIR_FOR_LABEL}"
82 |   VALIDATION_IMAGES=$(ls -1 "${TRAIN_DIR_FOR_LABEL}" | gshuf | head -100)
83 |   for IMAGE in ${VALIDATION_IMAGES}; do
84 |     mv -f "${TRAIN_DIRECTORY}${LABEL}/${IMAGE}" "${VALIDATION_DIR_FOR_LABEL}"
85 |   done
86 | done < "${LABELS_FILE}"
87 | 
88 | # Build the TFRecords version of the image data.
89 | cd "${CURRENT_DIR}"
90 | BUILD_SCRIPT="${WORK_DIR}/build_image_data"
91 | OUTPUT_DIRECTORY="${DATA_DIR}"
92 | "${BUILD_SCRIPT}" \
93 |   --train_directory="${TRAIN_DIRECTORY}" \
94 |   --validation_directory="${VALIDATION_DIRECTORY}" \
95 |   --output_directory="${OUTPUT_DIRECTORY}" \
96 |   --labels_file="${LABELS_FILE}"
97 | 


--------------------------------------------------------------------------------
/terngrad/inception/data/download_and_preprocess_imagenet.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2016 Google Inc. All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ==============================================================================
 16 | 
 17 | # Script to download and preprocess ImageNet Challenge 2012
 18 | # training and validation data set.
 19 | #
 20 | # The final output of this script are sharded TFRecord files containing
 21 | # serialized Example protocol buffers. See build_imagenet_data.py for
 22 | # details of how the Example protocol buffers contain the ImageNet data.
 23 | #
 24 | # The final output of this script appears as such:
 25 | #
 26 | #   data_dir/train-00000-of-01024
 27 | #   data_dir/train-00001-of-01024
 28 | #    ...
 29 | #   data_dir/train-00127-of-01024
 30 | #
 31 | # and
 32 | #
 33 | #   data_dir/validation-00000-of-00128
 34 | #   data_dir/validation-00001-of-00128
 35 | #   ...
 36 | #   data_dir/validation-00127-of-00128
 37 | #
 38 | # Note that this script may take several hours to run to completion. The
 39 | # conversion of the ImageNet data to TFRecords alone takes 2-3 hours depending
 40 | # on the speed of your machine. Please be patient.
 41 | #
 42 | # **IMPORTANT**
 43 | # To download the raw images, the user must create an account with image-net.org
 44 | # and generate a username and access_key. The latter two are required for
 45 | # downloading the raw images.
 46 | #
 47 | # usage:
 48 | #  ./download_and_preprocess_imagenet.sh [data-dir]
 49 | set -e
 50 | 
 51 | if [ -z "$1" ]; then
 52 |   echo "usage download_and_preprocess_imagenet.sh [data dir]"
 53 |   exit
 54 | fi
 55 | 
 56 | # Store images in raw pixel format (True) or jpeg format (False)
 57 | RAW_PIXEL=False
 58 | # When RAW_PIXEL=True and RESIZE_DIMEN>0, resize images to the specific size without preserving original height/width ratio
 59 | RESIZE_DIMEN=0
 60 | 
 61 | # Create the output and temporary directories.
 62 | DATA_DIR="${1%/}"
 63 | SCRATCH_DIR="${DATA_DIR}/raw-data/"
 64 | mkdir -p "${DATA_DIR}"
 65 | mkdir -p "${SCRATCH_DIR}"
 66 | WORK_DIR="$0.runfiles/inception/inception"
 67 | 
 68 | # Download the ImageNet data.
 69 | LABELS_FILE="${WORK_DIR}/data/imagenet_lsvrc_2015_synsets.txt"
 70 | DOWNLOAD_SCRIPT="${WORK_DIR}/data/download_imagenet.sh"
 71 | "${DOWNLOAD_SCRIPT}" "${SCRATCH_DIR}" "${LABELS_FILE}"
 72 | 
 73 | # Note the locations of the train and validation data.
 74 | TRAIN_DIRECTORY="${SCRATCH_DIR}train/"
 75 | VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/"
 76 | 
 77 | # Preprocess the validation data by moving the images into the appropriate
 78 | # sub-directory based on the label (synset) of the image.
 79 | echo "Organizing the validation data into sub-directories."
 80 | PREPROCESS_VAL_SCRIPT="${WORK_DIR}/data/preprocess_imagenet_validation_data.py"
 81 | VAL_LABELS_FILE="${WORK_DIR}/data/imagenet_2012_validation_synset_labels.txt"
 82 | 
 83 | "${PREPROCESS_VAL_SCRIPT}" "${VALIDATION_DIRECTORY}" "${VAL_LABELS_FILE}"
 84 | 
 85 | # Convert the XML files for bounding box annotations into a single CSV.
 86 | echo "Extracting bounding box information from XML."
 87 | BOUNDING_BOX_SCRIPT="${WORK_DIR}/data/process_bounding_boxes.py"
 88 | BOUNDING_BOX_FILE="${SCRATCH_DIR}/imagenet_2012_bounding_boxes.csv"
 89 | BOUNDING_BOX_DIR="${SCRATCH_DIR}bounding_boxes/"
 90 | 
 91 | "${BOUNDING_BOX_SCRIPT}" "${BOUNDING_BOX_DIR}" "${LABELS_FILE}" \
 92 |  | sort >"${BOUNDING_BOX_FILE}"
 93 | echo "Finished downloading and preprocessing the ImageNet data."
 94 | 
 95 | # Build the TFRecords version of the ImageNet data.
 96 | BUILD_SCRIPT="${WORK_DIR}/build_imagenet_data"
 97 | OUTPUT_DIRECTORY="${DATA_DIR}"
 98 | IMAGENET_METADATA_FILE="${WORK_DIR}/data/imagenet_metadata.txt"
 99 | 
100 | "${BUILD_SCRIPT}" \
101 |   --train_directory="${TRAIN_DIRECTORY}" \
102 |   --validation_directory="${VALIDATION_DIRECTORY}" \
103 |   --raw_pixel=${RAW_PIXEL} \
104 |   --resize_dimen ${RESIZE_DIMEN} \
105 |   --output_directory="${OUTPUT_DIRECTORY}" \
106 |   --imagenet_metadata_file="${IMAGENET_METADATA_FILE}" \
107 |   --labels_file="${LABELS_FILE}" \
108 |   --bounding_box_file="${BOUNDING_BOX_FILE}"
109 | 


--------------------------------------------------------------------------------
/terngrad/inception/data/download_imagenet.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2016 Google Inc. All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ==============================================================================
 16 | 
 17 | # Script to download ImageNet Challenge 2012 training and validation data set.
 18 | #
 19 | # Downloads and decompresses raw images and bounding boxes.
 20 | #
 21 | # **IMPORTANT**
 22 | # To download the raw images, the user must create an account with image-net.org
 23 | # and generate a username and access_key. The latter two are required for
 24 | # downloading the raw images.
 25 | #
 26 | # usage:
 27 | #  ./download_imagenet.sh [dirname]
 28 | set -e
 29 | 
 30 | if [ "x$IMAGENET_ACCESS_KEY" == x -o "x$IMAGENET_USERNAME" == x ]; then
 31 |   cat <<END
 32 | In order to download the imagenet data, you have to create an account with
 33 | image-net.org. This will get you a username and an access key. You can set the
 34 | IMAGENET_USERNAME and IMAGENET_ACCESS_KEY environment variables, or you can
 35 | enter the credentials here.
 36 | END
 37 |   read -p "Username: " IMAGENET_USERNAME
 38 |   read -p "Access key: " IMAGENET_ACCESS_KEY
 39 | fi
 40 | 
 41 | OUTDIR="${1:-./imagenet-data}"
 42 | SYNSETS_FILE="${2:-./synsets.txt}"
 43 | SYNSETS_FILE="${PWD}/${SYNSETS_FILE}"
 44 | 
 45 | echo "Saving downloaded files to $OUTDIR"
 46 | mkdir -p "${OUTDIR}"
 47 | CURRENT_DIR=$(pwd)
 48 | BBOX_DIR="${OUTDIR}bounding_boxes"
 49 | mkdir -p "${BBOX_DIR}"
 50 | cd "${OUTDIR}"
 51 | 
 52 | # Download and process all of the ImageNet bounding boxes.
 53 | BASE_URL="http://www.image-net.org/challenges/LSVRC/2012/nonpub"
 54 | 
 55 | # See here for details: http://www.image-net.org/download-bboxes
 56 | BOUNDING_BOX_ANNOTATIONS="${BASE_URL}/ILSVRC2012_bbox_train_v2.tar.gz"
 57 | BBOX_TAR_BALL="${BBOX_DIR}/annotations.tar.gz"
 58 | echo "Downloading bounding box annotations."
 59 | wget "${BOUNDING_BOX_ANNOTATIONS}" -O "${BBOX_TAR_BALL}" || BASE_URL_CHANGE=1
 60 | if [ $BASE_URL_CHANGE ]; then
 61 |   BASE_URL="http://www.image-net.org/challenges/LSVRC/2012/nnoupb"
 62 |   BOUNDING_BOX_ANNOTATIONS="${BASE_URL}/ILSVRC2012_bbox_train_v2.tar.gz"
 63 |   BBOX_TAR_BALL="${BBOX_DIR}/annotations.tar.gz"
 64 | fi
 65 | wget "${BOUNDING_BOX_ANNOTATIONS}" -O "${BBOX_TAR_BALL}"
 66 | echo "Uncompressing bounding box annotations ..."
 67 | tar xzf "${BBOX_TAR_BALL}" -C "${BBOX_DIR}"
 68 | 
 69 | LABELS_ANNOTATED="${BBOX_DIR}/*"
 70 | NUM_XML=$(ls -1 ${LABELS_ANNOTATED} | wc -l)
 71 | echo "Identified ${NUM_XML} bounding box annotations."
 72 | 
 73 | # Download and uncompress all images from the ImageNet 2012 validation dataset.
 74 | VALIDATION_TARBALL="ILSVRC2012_img_val.tar"
 75 | OUTPUT_PATH="${OUTDIR}validation/"
 76 | mkdir -p "${OUTPUT_PATH}"
 77 | cd "${OUTDIR}/.."
 78 | echo "Downloading ${VALIDATION_TARBALL} to ${OUTPUT_PATH}."
 79 | wget --no-clobber -nd -c "${BASE_URL}/${VALIDATION_TARBALL}"
 80 | tar xf "${VALIDATION_TARBALL}" -C "${OUTPUT_PATH}"
 81 | 
 82 | # Download all images from the ImageNet 2012 train dataset.
 83 | TRAIN_TARBALL="ILSVRC2012_img_train.tar"
 84 | OUTPUT_PATH="${OUTDIR}train/"
 85 | mkdir -p "${OUTPUT_PATH}"
 86 | cd "${OUTDIR}/.."
 87 | echo "Downloading ${TRAIN_TARBALL} to ${OUTPUT_PATH}."
 88 | wget --no-clobber -nd -c "${BASE_URL}/${TRAIN_TARBALL}"
 89 | 
 90 | # Un-compress the individual tar-files within the train tar-file.
 91 | echo "Uncompressing individual train tar-balls in the training data."
 92 | 
 93 | while read SYNSET; do
 94 |   echo "Processing: ${SYNSET}"
 95 | 
 96 |   # Create a directory and delete anything there.
 97 |   mkdir -p "${OUTPUT_PATH}/${SYNSET}"
 98 |   rm -rf "${OUTPUT_PATH}/${SYNSET}/*"
 99 | 
100 |   # Uncompress into the directory.
101 |   tar xf "${TRAIN_TARBALL}" "${SYNSET}.tar"
102 |   tar xf "${SYNSET}.tar" -C "${OUTPUT_PATH}/${SYNSET}/"
103 |   rm -f "${SYNSET}.tar"
104 | 
105 |   echo "Finished processing: ${SYNSET}"
106 | done < "${SYNSETS_FILE}"
107 | 


--------------------------------------------------------------------------------
/terngrad/inception/data/preprocess_imagenet_validation_data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # Copyright 2016 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # ==============================================================================
16 | """Process the ImageNet Challenge bounding boxes for TensorFlow model training.
17 | 
18 | Associate the ImageNet 2012 Challenge validation data set with labels.
19 | 
20 | The raw ImageNet validation data set is expected to reside in JPEG files
21 | located in the following directory structure.
22 | 
23 |  data_dir/ILSVRC2012_val_00000001.JPEG
24 |  data_dir/ILSVRC2012_val_00000002.JPEG
25 |  ...
26 |  data_dir/ILSVRC2012_val_00050000.JPEG
27 | 
28 | This script moves the files into a directory structure like such:
29 |  data_dir/n01440764/ILSVRC2012_val_00000293.JPEG
30 |  data_dir/n01440764/ILSVRC2012_val_00000543.JPEG
31 |  ...
32 | where 'n01440764' is the unique synset label associated with
33 | these images.
34 | 
35 | This directory reorganization requires a mapping from validation image
36 | number (i.e. suffix of the original file) to the associated label. This
37 | is provided in the ImageNet development kit via a Matlab file.
38 | 
39 | In order to make life easier and divorce ourselves from Matlab, we instead
40 | supply a custom text file that provides this mapping for us.
41 | 
42 | Sample usage:
43 |   ./preprocess_imagenet_validation_data.py ILSVRC2012_img_val \
44 |   imagenet_2012_validation_synset_labels.txt
45 | """
46 | 
47 | from __future__ import absolute_import
48 | from __future__ import division
49 | from __future__ import print_function
50 | 
51 | import os
52 | import os.path
53 | import sys
54 | 
55 | 
56 | if __name__ == '__main__':
57 |   if len(sys.argv) < 3:
58 |     print('Invalid usage\n'
59 |           'usage: preprocess_imagenet_validation_data.py '
60 |           '<validation data dir> <validation labels file>')
61 |     sys.exit(-1)
62 |   data_dir = sys.argv[1]
63 |   validation_labels_file = sys.argv[2]
64 | 
65 |   # Read in the 50000 synsets associated with the validation data set.
66 |   labels = [l.strip() for l in open(validation_labels_file).readlines()]
67 |   unique_labels = set(labels)
68 | 
69 |   # Make all sub-directories in the validation data dir.
70 |   for label in unique_labels:
71 |     labeled_data_dir = os.path.join(data_dir, label)
72 |     os.makedirs(labeled_data_dir)
73 | 
74 |   # Move all of the image to the appropriate sub-directory.
75 |   for i in range(len(labels)):
76 |     basename = 'ILSVRC2012_val_000%.5d.JPEG' % (i + 1)
77 |     original_filename = os.path.join(data_dir, basename)
78 |     if not os.path.exists(original_filename):
79 |       print('Failed to find: ' % original_filename)
80 |       sys.exit(-1)
81 |     new_filename = os.path.join(data_dir, labels[i], basename)
82 |     os.rename(original_filename, new_filename)
83 | 


--------------------------------------------------------------------------------
/terngrad/inception/dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Small library that points to a data set.
 16 | 
 17 | Methods of Data class:
 18 |   data_files: Returns a python list of all (sharded) data set files.
 19 |   num_examples_per_epoch: Returns the number of examples in the data set.
 20 |   num_classes: Returns the number of classes in the data set.
 21 |   reader: Return a reader for a single entry from the data set.
 22 | """
 23 | from __future__ import absolute_import
 24 | from __future__ import division
 25 | from __future__ import print_function
 26 | 
 27 | from abc import ABCMeta
 28 | from abc import abstractmethod
 29 | import os
 30 | 
 31 | 
 32 | import tensorflow as tf
 33 | 
 34 | FLAGS = tf.app.flags.FLAGS
 35 | 
 36 | # Basic model parameters.
 37 | tf.app.flags.DEFINE_string('data_dir', '/tmp/mydata',
 38 |                            """Path to the processed data, i.e. """
 39 |                            """TFRecord of Example protos.""")
 40 | 
 41 | 
 42 | class Dataset(object):
 43 |   """A simple class for handling data sets."""
 44 |   __metaclass__ = ABCMeta
 45 | 
 46 |   def __init__(self, name, subset):
 47 |     """Initialize dataset using a subset and the path to the data."""
 48 |     assert subset in self.available_subsets(), self.available_subsets()
 49 |     self.name = name
 50 |     self.subset = subset
 51 | 
 52 |   @abstractmethod
 53 |   def num_classes(self):
 54 |     """Returns the number of classes in the data set."""
 55 |     pass
 56 |     # return 10
 57 | 
 58 |   @abstractmethod
 59 |   def num_examples_per_epoch(self):
 60 |     """Returns the number of examples in the data subset."""
 61 |     pass
 62 |     # if self.subset == 'train':
 63 |     #   return 10000
 64 |     # if self.subset == 'validation':
 65 |     #   return 1000
 66 | 
 67 |   @abstractmethod
 68 |   def download_message(self):
 69 |     """Prints a download message for the Dataset."""
 70 |     pass
 71 | 
 72 |   def available_subsets(self):
 73 |     """Returns the list of available subsets."""
 74 |     return ['train', 'validation']
 75 | 
 76 |   def data_files(self):
 77 |     """Returns a python list of all (sharded) data subset files.
 78 | 
 79 |     Returns:
 80 |       python list of all (sharded) data set files.
 81 |     Raises:
 82 |       ValueError: if there are not data_files matching the subset.
 83 |     """
 84 |     tf_record_pattern = os.path.join(FLAGS.data_dir, '%s-*' % self.subset)
 85 |     data_files = tf.gfile.Glob(tf_record_pattern)
 86 |     if not data_files:
 87 |       print('No files found for dataset %s/%s at %s' % (self.name,
 88 |                                                         self.subset,
 89 |                                                         FLAGS.data_dir))
 90 | 
 91 |       self.download_message()
 92 |       exit(-1)
 93 |     return data_files
 94 | 
 95 |   def reader(self):
 96 |     """Return a reader for a single entry from the data set.
 97 | 
 98 |     See io_ops.py for details of Reader class.
 99 | 
100 |     Returns:
101 |       Reader object that reads the data set.
102 |     """
103 |     return tf.TFRecordReader()
104 | 


--------------------------------------------------------------------------------
/terngrad/inception/flowers_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Small library that points to the flowers data set.
16 | """
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | 
22 | 
23 | from inception.dataset import Dataset
24 | 
25 | 
26 | class FlowersData(Dataset):
27 |   """Flowers data set."""
28 | 
29 |   def __init__(self, subset):
30 |     super(FlowersData, self).__init__('Flowers', subset)
31 | 
32 |   def num_classes(self):
33 |     """Returns the number of classes in the data set."""
34 |     return 5
35 | 
36 |   def num_examples_per_epoch(self):
37 |     """Returns the number of examples in the data subset."""
38 |     if self.subset == 'train':
39 |       return 3170
40 |     if self.subset == 'validation':
41 |       return 500
42 | 
43 |   def download_message(self):
44 |     """Instruction to download and extract the tarball from Flowers website."""
45 | 
46 |     print('Failed to find any Flowers %s files'% self.subset)
47 |     print('')
48 |     print('If you have already downloaded and processed the data, then make '
49 |           'sure to set --data_dir to point to the directory containing the '
50 |           'location of the sharded TFRecords.\n')
51 |     print('Please see README.md for instructions on how to build '
52 |           'the flowers dataset using download_and_preprocess_flowers.\n')
53 | 


--------------------------------------------------------------------------------
/terngrad/inception/flowers_eval.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """A binary to evaluate Inception on the flowers data set.
16 | """
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | 
22 | import tensorflow as tf
23 | 
24 | from inception import inception_eval
25 | from inception.flowers_data import FlowersData
26 | 
27 | FLAGS = tf.app.flags.FLAGS
28 | 
29 | 
30 | def main(unused_argv=None):
31 |   dataset = FlowersData(subset=FLAGS.subset)
32 |   assert dataset.data_files()
33 |   if tf.gfile.Exists(FLAGS.eval_dir):
34 |     tf.gfile.DeleteRecursively(FLAGS.eval_dir)
35 |   tf.gfile.MakeDirs(FLAGS.eval_dir)
36 |   inception_eval.evaluate(dataset)
37 | 
38 | 
39 | if __name__ == '__main__':
40 |   tf.app.run()
41 | 


--------------------------------------------------------------------------------
/terngrad/inception/flowers_train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """A binary to train Inception on the flowers data set.
16 | """
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | 
22 | 
23 | import tensorflow as tf
24 | 
25 | from inception import inception_train
26 | from inception.flowers_data import FlowersData
27 | 
28 | FLAGS = tf.app.flags.FLAGS
29 | 
30 | 
31 | def main(_):
32 |   dataset = FlowersData(subset=FLAGS.subset)
33 |   assert dataset.data_files()
34 |   if tf.gfile.Exists(FLAGS.train_dir):
35 |     tf.gfile.DeleteRecursively(FLAGS.train_dir)
36 |   tf.gfile.MakeDirs(FLAGS.train_dir)
37 |   inception_train.train(dataset)
38 | 
39 | 
40 | if __name__ == '__main__':
41 |   tf.app.run()
42 | 


--------------------------------------------------------------------------------
/terngrad/inception/imagenet_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Small library that points to the ImageNet data set.
16 | """
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | 
22 | 
23 | from inception.dataset import Dataset
24 | 
25 | 
26 | class ImagenetData(Dataset):
27 |   """ImageNet data set."""
28 | 
29 |   def __init__(self, subset):
30 |     super(ImagenetData, self).__init__('ImageNet', subset)
31 | 
32 |   def num_classes(self):
33 |     """Returns the number of classes in the data set."""
34 |     return 1000
35 | 
36 |   def num_examples_per_epoch(self):
37 |     """Returns the number of examples in the data set."""
38 |     # Bounding box data consists of 615299 bounding boxes for 544546 images.
39 |     if self.subset == 'train':
40 |       return 1281167
41 |     if self.subset == 'validation':
42 |       return 50000
43 | 
44 |   def download_message(self):
45 |     """Instruction to download and extract the tarball from Flowers website."""
46 | 
47 |     print('Failed to find any ImageNet %s files'% self.subset)
48 |     print('')
49 |     print('If you have already downloaded and processed the data, then make '
50 |           'sure to set --data_dir to point to the directory containing the '
51 |           'location of the sharded TFRecords.\n')
52 |     print('If you have not downloaded and prepared the ImageNet data in the '
53 |           'TFRecord format, you will need to do this at least once. This '
54 |           'process could take several hours depending on the speed of your '
55 |           'computer and network connection\n')
56 |     print('Please see README.md for instructions on how to build '
57 |           'the ImageNet dataset using download_and_preprocess_imagenet.\n')
58 |     print('Note that the raw data size is 300 GB and the processed data size '
59 |           'is 150 GB. Please ensure you have at least 500GB disk space.')
60 | 


--------------------------------------------------------------------------------
/terngrad/inception/imagenet_distributed_train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | # pylint: disable=line-too-long
16 | """A binary to train Inception in a distributed manner using multiple systems.
17 | 
18 | Please see accompanying README.md for details and instructions.
19 | """
20 | from __future__ import absolute_import
21 | from __future__ import division
22 | from __future__ import print_function
23 | 
24 | import tensorflow as tf
25 | 
26 | from inception import inception_distributed_train
27 | from inception.imagenet_data import ImagenetData
28 | 
29 | FLAGS = tf.app.flags.FLAGS
30 | 
31 | 
32 | def main(unused_args):
33 |   FLAGS.dataset_name = 'imagenet'
34 | 
35 |   assert FLAGS.job_name in ['ps', 'worker'], 'job_name must be ps or worker'
36 | 
37 |   # Extract all the hostnames for the ps and worker jobs to construct the
38 |   # cluster spec.
39 |   ps_hosts = FLAGS.ps_hosts.split(',')
40 |   worker_hosts = FLAGS.worker_hosts.split(',')
41 |   tf.logging.info('PS hosts are: %s' % ps_hosts)
42 |   tf.logging.info('Worker hosts are: %s' % worker_hosts)
43 | 
44 |   cluster_spec = tf.train.ClusterSpec({'ps': ps_hosts,
45 |                                        'worker': worker_hosts})
46 |   sess_config = tf.ConfigProto()
47 |   sess_config.gpu_options.allow_growth = True
48 | 
49 |   server = tf.train.Server(
50 |       {'ps': ps_hosts,
51 |        'worker': worker_hosts},
52 |       job_name=FLAGS.job_name,
53 |       task_index=FLAGS.task_id,
54 |       config=sess_config)
55 | 
56 |   if FLAGS.job_name == 'ps':
57 |     # `ps` jobs wait for incoming connections from the workers.
58 |     server.join()
59 |   else:
60 |     # `worker` jobs will actually do the work.
61 |     dataset = ImagenetData(subset=FLAGS.subset)
62 |     assert dataset.data_files()
63 |     # Only the chief checks for or creates train_dir.
64 |     if FLAGS.task_id == 0:
65 |       if not tf.gfile.Exists(FLAGS.train_dir):
66 |         tf.gfile.MakeDirs(FLAGS.train_dir)
67 |     inception_distributed_train.train(server.target, dataset, cluster_spec)
68 | 
69 | if __name__ == '__main__':
70 |   tf.logging.set_verbosity(tf.logging.INFO)
71 |   tf.app.run()
72 | 


--------------------------------------------------------------------------------
/terngrad/inception/imagenet_eval.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """A binary to evaluate Inception on the flowers data set.
16 | 
17 | Note that using the supplied pre-trained inception checkpoint, the eval should
18 | achieve:
19 |   precision @ 1 = 0.7874 recall @ 5 = 0.9436 [50000 examples]
20 | 
21 | See the README.md for more details.
22 | """
23 | from __future__ import absolute_import
24 | from __future__ import division
25 | from __future__ import print_function
26 | 
27 | 
28 | import tensorflow as tf
29 | 
30 | from inception import inception_eval
31 | from inception.imagenet_data import ImagenetData
32 | 
33 | FLAGS = tf.app.flags.FLAGS
34 | 
35 | 
36 | def main(unused_argv=None):
37 |   dataset = ImagenetData(subset=FLAGS.subset)
38 |   assert dataset.data_files()
39 |   if tf.gfile.Exists(FLAGS.eval_dir):
40 |     tf.gfile.DeleteRecursively(FLAGS.eval_dir)
41 |   tf.gfile.MakeDirs(FLAGS.eval_dir)
42 |   FLAGS.dataset_name = 'imagenet'
43 |   FLAGS.num_examples = dataset.num_examples_per_epoch()
44 |   inception_eval.evaluate(dataset)
45 | 
46 | 
47 | if __name__ == '__main__':
48 |   tf.app.run()
49 | 


--------------------------------------------------------------------------------
/terngrad/inception/imagenet_train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """A binary to train Inception on the ImageNet data set.
16 | """
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | 
22 | 
23 | import tensorflow as tf
24 | 
25 | from inception import inception_train
26 | from inception.imagenet_data import ImagenetData
27 | 
28 | FLAGS = tf.app.flags.FLAGS
29 | 
30 | 
31 | def main(_):
32 |   dataset = ImagenetData(subset=FLAGS.subset)
33 |   assert dataset.data_files()
34 |   if tf.gfile.Exists(FLAGS.train_dir):
35 |     tf.gfile.DeleteRecursively(FLAGS.train_dir)
36 |   tf.gfile.MakeDirs(FLAGS.train_dir)
37 |   FLAGS.dataset_name = 'imagenet'
38 |   inception_train.train(dataset)
39 | 
40 | 
41 | if __name__ == '__main__':
42 |   tf.app.run()
43 | 


--------------------------------------------------------------------------------
/terngrad/inception/lenet_preprocessing.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Provides utilities for preprocessing."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import tensorflow as tf
22 | 
23 | slim = tf.contrib.slim
24 | 
25 | 
26 | def preprocess_image(image, output_height, output_width, is_training):
27 |   """Preprocesses the given image.
28 | 
29 |   Args:
30 |     image: A `Tensor` representing an image of arbitrary size.
31 |     output_height: The height of the image after preprocessing.
32 |     output_width: The width of the image after preprocessing.
33 |     is_training: `True` if we're preprocessing the image for training and
34 |       `False` otherwise.
35 | 
36 |   Returns:
37 |     A preprocessed image.
38 |   """
39 |   image = tf.to_float(image)
40 |   image = tf.image.resize_image_with_crop_or_pad(
41 |       image, output_height, output_width)
42 |   image.set_shape([output_height, output_width, 1])
43 |   image = tf.subtract(image, 0.5)
44 |   image = tf.multiply(image, 2.0)
45 |   return image
46 | 


--------------------------------------------------------------------------------
/terngrad/inception/mnist_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Small library that points to the cifar-10 data set.
16 | """
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | 
22 | import tensorflow as tf
23 | from inception.dataset import Dataset
24 | import os
25 | 
26 | FLAGS = tf.app.flags.FLAGS
27 | 
28 | class MnistData(Dataset):
29 |   """mnist data set."""
30 | 
31 |   def __init__(self, subset):
32 |     super(MnistData, self).__init__('mnist', subset)
33 | 
34 |   def num_classes(self):
35 |     """Returns the number of classes in the data set."""
36 |     return 10
37 | 
38 |   def num_examples_per_epoch(self):
39 |     """Returns the number of examples in the data set."""
40 |     if self.subset == 'train':
41 |       return 60000
42 |     if self.subset == 'test':
43 |       return 10000
44 | 
45 |   def download_message(self):
46 |     """Instruction to download and extract the tarball from Flowers website."""
47 | 
48 |     print('Failed to find any mnist %s files'% self.subset)
49 |     print('')
50 |     print('If you have already downloaded and processed the data, then make '
51 |           'sure to set --data_dir to point to the directory containing the '
52 |           'location of the sharded TFRecords.\n')
53 |     print('If you have not downloaded and prepared the mnist data in the '
54 |           'TFRecord format, you will need to do this at least once. This '
55 |           'process could take a while depending on the speed of your '
56 |           'computer and network connection\n')
57 |     print('Please see README.md for instructions on how to build '
58 |           'the mnist dataset using download_and_convert_data.py. For example: \n')
59 |     print ('cd ./slim\n')
60 |     print ('python download_and_convert_data.py '
61 |            '--dataset_name mnist --dataset_dir ~/dataset/mnist-data/\n')
62 | 
63 |   def available_subsets(self):
64 |     """Returns the list of available subsets."""
65 |     return ['train', 'test']


--------------------------------------------------------------------------------
/terngrad/inception/mnist_eval.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """A binary to evaluate Inception on the flowers data set.
16 | 
17 | Note that using the supplied pre-trained inception checkpoint, the eval should
18 | achieve:
19 |   precision @ 1 = 0.7874 recall @ 5 = 0.9436 [50000 examples]
20 | 
21 | See the README.md for more details.
22 | """
23 | from __future__ import absolute_import
24 | from __future__ import division
25 | from __future__ import print_function
26 | 
27 | 
28 | import tensorflow as tf
29 | 
30 | from inception import inception_eval
31 | from inception.mnist_data import MnistData
32 | 
33 | FLAGS = tf.app.flags.FLAGS
34 | 
35 | def main(unused_argv=None):
36 |   dataset = MnistData(subset=FLAGS.subset)
37 |   assert dataset.data_files()
38 |   if tf.gfile.Exists(FLAGS.eval_dir):
39 |     tf.gfile.DeleteRecursively(FLAGS.eval_dir)
40 |   tf.gfile.MakeDirs(FLAGS.eval_dir)
41 |   FLAGS.dataset_name = 'mnist'
42 |   FLAGS.num_examples = dataset.num_examples_per_epoch()
43 |   inception_eval.evaluate(dataset)
44 | 
45 | 
46 | if __name__ == '__main__':
47 |   tf.app.run()
48 | 


--------------------------------------------------------------------------------
/terngrad/inception/mnist_train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """A binary to train Inception on the ImageNet data set.
16 | """
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | 
22 | 
23 | import tensorflow as tf
24 | 
25 | from inception import inception_train
26 | from inception.mnist_data import MnistData
27 | 
28 | FLAGS = tf.app.flags.FLAGS
29 | 
30 | 
31 | def main(_):
32 |   dataset = MnistData(subset=FLAGS.subset)
33 |   assert dataset.data_files()
34 |   if tf.gfile.Exists(FLAGS.train_dir):
35 |     tf.gfile.DeleteRecursively(FLAGS.train_dir)
36 |   tf.gfile.MakeDirs(FLAGS.train_dir)
37 |   FLAGS.dataset_name = 'mnist'
38 |   inception_train.train(dataset)
39 | 
40 | 
41 | if __name__ == '__main__':
42 |   tf.app.run()
43 | 


--------------------------------------------------------------------------------
/terngrad/inception/models.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Build the Inception v3 network on ImageNet data set.
16 | 
17 | The Inception v3 architecture is described in http://arxiv.org/abs/1512.00567
18 | 
19 | Summary of available functions:
20 |  inference: Compute inference on the model inputs to make a prediction
21 |  loss: Compute the loss of the prediction with respect to the labels
22 | """
23 | from __future__ import absolute_import
24 | from __future__ import division
25 | from __future__ import print_function
26 | 
27 | import re
28 | 
29 | import tensorflow as tf
30 | 
31 | from inception.slim import slim
32 | 
33 | FLAGS = tf.app.flags.FLAGS
34 | 
35 | # If a model is trained using multiple GPUs, prefix all Op names with tower_name
36 | # to differentiate the operations. Note that this prefix is removed from the
37 | # names of the summaries when visualizing a model.
38 | TOWER_NAME = 'tower'
39 | 
40 | # Batch normalization. Constant governing the exponential moving average of
41 | # the 'global' mean and variance for all activations.
42 | BATCHNORM_MOVING_AVERAGE_DECAY = 0.9997
43 | 
44 | # The decay to use for the moving average.
45 | MOVING_AVERAGE_DECAY = 0.9999
46 | 
47 | 
48 | def inference(images, num_classes, for_training=False, restore_logits=True,
49 |               scope=None):
50 |   pass
51 | 
52 | 
53 | def loss(logits, labels, batch_size=None):
54 |   pass
55 | 
56 | 
57 | def _activation_summary(x):
58 |   """Helper to create summaries for activations.
59 | 
60 |   Creates a summary that provides a histogram of activations.
61 |   Creates a summary that measure the sparsity of activations.
62 | 
63 |   Args:
64 |     x: Tensor
65 |   """
66 |   # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
67 |   # session. This helps the clarity of presentation on tensorboard.
68 |   tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
69 |   tf.contrib.deprecated.histogram_summary(tensor_name + '/activations', x)
70 |   tf.contrib.deprecated.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
71 | 
72 | 
73 | def _activation_summaries(endpoints):
74 |   with tf.name_scope('summaries'):
75 |     for act in endpoints.values():
76 |       _activation_summary(act)
77 | 


--------------------------------------------------------------------------------
/terngrad/inception/slim/BUILD:
--------------------------------------------------------------------------------
  1 | # Description:
  2 | #   Contains the operations and nets for building TensorFlow-Slim models.
  3 | 
  4 | package(default_visibility = ["//inception:internal"])
  5 | 
  6 | licenses(["notice"])  # Apache 2.0
  7 | 
  8 | exports_files(["LICENSE"])
  9 | 
 10 | py_library(
 11 |     name = "scopes",
 12 |     srcs = ["scopes.py"],
 13 | )
 14 | 
 15 | py_library(
 16 |     name = "inception_utils",
 17 |     srcs = ["inception_utils.py"],
 18 | )
 19 | 
 20 | py_library(
 21 |     name = "inception_v1",
 22 |     srcs = ["inception_v1.py"],
 23 | )
 24 | 
 25 | py_test(
 26 |     name = "scopes_test",
 27 |     size = "small",
 28 |     srcs = ["scopes_test.py"],
 29 |     deps = [
 30 |         ":scopes",
 31 |     ],
 32 | )
 33 | 
 34 | py_library(
 35 |     name = "variables",
 36 |     srcs = ["variables.py"],
 37 |     deps = [
 38 |         ":scopes",
 39 |     ],
 40 | )
 41 | 
 42 | py_test(
 43 |     name = "variables_test",
 44 |     size = "small",
 45 |     srcs = ["variables_test.py"],
 46 |     deps = [
 47 |         ":variables",
 48 |     ],
 49 | )
 50 | 
 51 | py_library(
 52 |     name = "losses",
 53 |     srcs = ["losses.py"],
 54 | )
 55 | 
 56 | py_test(
 57 |     name = "losses_test",
 58 |     size = "small",
 59 |     srcs = ["losses_test.py"],
 60 |     deps = [
 61 |         ":losses",
 62 |     ],
 63 | )
 64 | 
 65 | py_library(
 66 |     name = "ops",
 67 |     srcs = ["ops.py"],
 68 |     deps = [
 69 |         ":losses",
 70 |         ":scopes",
 71 |         ":variables",
 72 |     ],
 73 | )
 74 | 
 75 | py_test(
 76 |     name = "ops_test",
 77 |     size = "small",
 78 |     srcs = ["ops_test.py"],
 79 |     deps = [
 80 |         ":ops",
 81 |         ":variables",
 82 |     ],
 83 | )
 84 | 
 85 | py_library(
 86 |     name = "inception",
 87 |     srcs = ["inception_model.py"],
 88 |     deps = [
 89 |         ":ops",
 90 |         ":scopes",
 91 |     ],
 92 | )
 93 | 
 94 | py_library(
 95 |     name = "models",
 96 |     srcs = ["models.py"],
 97 |     deps = [
 98 |         "inception_utils",
 99 |         "inception_v1",
100 |         ":ops",
101 |         ":scopes",
102 |     ],
103 | )
104 | 
105 | py_test(
106 |     name = "inception_test",
107 |     size = "medium",
108 |     srcs = ["inception_test.py"],
109 |     deps = [
110 |         ":inception",
111 |     ],
112 | )
113 | 
114 | py_library(
115 |     name = "slim",
116 |     srcs = ["slim.py"],
117 |     deps = [
118 |         ":inception",
119 |         ":models",
120 |         ":losses",
121 |         ":ops",
122 |         ":scopes",
123 |         ":variables",
124 |     ],
125 | )
126 | 
127 | py_test(
128 |     name = "collections_test",
129 |     size = "small",
130 |     srcs = ["collections_test.py"],
131 |     deps = [
132 |         ":slim",
133 |     ],
134 | )
135 | 


--------------------------------------------------------------------------------
/terngrad/inception/slim/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Makes helper libraries available in the cifar10 package."""
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function


--------------------------------------------------------------------------------
/terngrad/inception/slim/inception_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Tests for slim.inception."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import tensorflow as tf
 21 | 
 22 | from inception.slim import inception_model as inception
 23 | 
 24 | 
 25 | class InceptionTest(tf.test.TestCase):
 26 | 
 27 |   def testBuildLogits(self):
 28 |     batch_size = 5
 29 |     height, width = 299, 299
 30 |     num_classes = 1000
 31 |     with self.test_session():
 32 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 33 |       logits, _ = inception.inception_v3(inputs, num_classes)
 34 |       self.assertTrue(logits.op.name.startswith('logits'))
 35 |       self.assertListEqual(logits.get_shape().as_list(),
 36 |                            [batch_size, num_classes])
 37 | 
 38 |   def testBuildEndPoints(self):
 39 |     batch_size = 5
 40 |     height, width = 299, 299
 41 |     num_classes = 1000
 42 |     with self.test_session():
 43 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 44 |       _, end_points = inception.inception_v3(inputs, num_classes)
 45 |       self.assertTrue('logits' in end_points)
 46 |       logits = end_points['logits']
 47 |       self.assertListEqual(logits.get_shape().as_list(),
 48 |                            [batch_size, num_classes])
 49 |       self.assertTrue('aux_logits' in end_points)
 50 |       aux_logits = end_points['aux_logits']
 51 |       self.assertListEqual(aux_logits.get_shape().as_list(),
 52 |                            [batch_size, num_classes])
 53 |       pre_pool = end_points['mixed_8x8x2048b']
 54 |       self.assertListEqual(pre_pool.get_shape().as_list(),
 55 |                            [batch_size, 8, 8, 2048])
 56 | 
 57 |   def testVariablesSetDevice(self):
 58 |     batch_size = 5
 59 |     height, width = 299, 299
 60 |     num_classes = 1000
 61 |     with self.test_session():
 62 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 63 |       # Force all Variables to reside on the device.
 64 |       with tf.variable_scope('on_cpu'), tf.device('/cpu:0'):
 65 |         inception.inception_v3(inputs, num_classes)
 66 |       with tf.variable_scope('on_gpu'), tf.device('/gpu:0'):
 67 |         inception.inception_v3(inputs, num_classes)
 68 |       for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_cpu'):
 69 |         self.assertDeviceEqual(v.device, '/cpu:0')
 70 |       for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_gpu'):
 71 |         self.assertDeviceEqual(v.device, '/gpu:0')
 72 | 
 73 |   def testHalfSizeImages(self):
 74 |     batch_size = 5
 75 |     height, width = 150, 150
 76 |     num_classes = 1000
 77 |     with self.test_session():
 78 |       inputs = tf.random_uniform((batch_size, height, width, 3))
 79 |       logits, end_points = inception.inception_v3(inputs, num_classes)
 80 |       self.assertTrue(logits.op.name.startswith('logits'))
 81 |       self.assertListEqual(logits.get_shape().as_list(),
 82 |                            [batch_size, num_classes])
 83 |       pre_pool = end_points['mixed_8x8x2048b']
 84 |       self.assertListEqual(pre_pool.get_shape().as_list(),
 85 |                            [batch_size, 3, 3, 2048])
 86 | 
 87 |   def testUnknowBatchSize(self):
 88 |     batch_size = 1
 89 |     height, width = 299, 299
 90 |     num_classes = 1000
 91 |     with self.test_session() as sess:
 92 |       inputs = tf.placeholder(tf.float32, (None, height, width, 3))
 93 |       logits, _ = inception.inception_v3(inputs, num_classes)
 94 |       self.assertTrue(logits.op.name.startswith('logits'))
 95 |       self.assertListEqual(logits.get_shape().as_list(),
 96 |                            [None, num_classes])
 97 |       images = tf.random_uniform((batch_size, height, width, 3))
 98 |       sess.run(tf.global_variables_initializer())
 99 |       output = sess.run(logits, {inputs: images.eval()})
100 |       self.assertEquals(output.shape, (batch_size, num_classes))
101 | 
102 |   def testEvaluation(self):
103 |     batch_size = 2
104 |     height, width = 299, 299
105 |     num_classes = 1000
106 |     with self.test_session() as sess:
107 |       eval_inputs = tf.random_uniform((batch_size, height, width, 3))
108 |       logits, _ = inception.inception_v3(eval_inputs, num_classes,
109 |                                          is_training=False)
110 |       predictions = tf.argmax(logits, 1)
111 |       sess.run(tf.global_variables_initializer())
112 |       output = sess.run(predictions)
113 |       self.assertEquals(output.shape, (batch_size,))
114 | 
115 |   def testTrainEvalWithReuse(self):
116 |     train_batch_size = 5
117 |     eval_batch_size = 2
118 |     height, width = 150, 150
119 |     num_classes = 1000
120 |     with self.test_session() as sess:
121 |       train_inputs = tf.random_uniform((train_batch_size, height, width, 3))
122 |       inception.inception_v3(train_inputs, num_classes)
123 |       tf.get_variable_scope().reuse_variables()
124 |       eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))
125 |       logits, _ = inception.inception_v3(eval_inputs, num_classes,
126 |                                          is_training=False)
127 |       predictions = tf.argmax(logits, 1)
128 |       sess.run(tf.global_variables_initializer())
129 |       output = sess.run(predictions)
130 |       self.assertEquals(output.shape, (eval_batch_size,))
131 | 
132 | 
133 | if __name__ == '__main__':
134 |   tf.test.main()
135 | 


--------------------------------------------------------------------------------
/terngrad/inception/slim/inception_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains common code shared by all inception models.
16 | 
17 | Usage of arg scope:
18 |   with slim.arg_scope(inception_arg_scope()):
19 |     logits, end_points = inception.inception_v3(images, num_classes,
20 |                                                 is_training=is_training)
21 | 
22 | """
23 | from __future__ import absolute_import
24 | from __future__ import division
25 | from __future__ import print_function
26 | 
27 | import tensorflow as tf
28 | 
29 | slim = tf.contrib.slim
30 | 
31 | 
32 | def inception_arg_scope(weight_decay=0.00004,
33 |                         use_batch_norm=True,
34 |                         batch_norm_decay=0.9997,
35 |                         batch_norm_epsilon=0.001):
36 |   """Defines the default arg scope for inception models.
37 | 
38 |   Args:
39 |     weight_decay: The weight decay to use for regularizing the model.
40 |     use_batch_norm: "If `True`, batch_norm is applied after each convolution.
41 |     batch_norm_decay: Decay for batch norm moving average.
42 |     batch_norm_epsilon: Small float added to variance to avoid dividing by zero
43 |       in batch norm.
44 | 
45 |   Returns:
46 |     An `arg_scope` to use for the inception models.
47 |   """
48 |   batch_norm_params = {
49 |       # Decay for the moving averages.
50 |       'decay': batch_norm_decay,
51 |       # epsilon to prevent 0s in variance.
52 |       'epsilon': batch_norm_epsilon,
53 |       # collection containing update_ops.
54 |       'updates_collections': tf.GraphKeys.UPDATE_OPS,
55 |   }
56 |   if use_batch_norm:
57 |     normalizer_fn = slim.batch_norm
58 |     normalizer_params = batch_norm_params
59 |   else:
60 |     normalizer_fn = None
61 |     normalizer_params = {}
62 |   # Set weight_decay for weights in Conv and FC layers.
63 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
64 |                       weights_regularizer=slim.l2_regularizer(weight_decay)):
65 |     with slim.arg_scope(
66 |         [slim.conv2d],
67 |         weights_initializer=slim.variance_scaling_initializer(),
68 |         activation_fn=tf.nn.relu,
69 |         normalizer_fn=normalizer_fn,
70 |         normalizer_params=normalizer_params) as sc:
71 |       return sc
72 | 


--------------------------------------------------------------------------------
/terngrad/inception/slim/scopes.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains the new arg_scope used for TF-Slim ops.
 16 | 
 17 |   Allows one to define models much more compactly by eliminating boilerplate
 18 |   code. This is accomplished through the use of argument scoping (arg_scope).
 19 | 
 20 |   Example of how to use scopes.arg_scope:
 21 | 
 22 |   with scopes.arg_scope(ops.conv2d, padding='SAME',
 23 |                       stddev=0.01, weight_decay=0.0005):
 24 |     net = ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1')
 25 |     net = ops.conv2d(net, 256, [5, 5], scope='conv2')
 26 | 
 27 |   The first call to conv2d will overwrite padding:
 28 |     ops.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
 29 |               stddev=0.01, weight_decay=0.0005, scope='conv1')
 30 | 
 31 |   The second call to Conv will use predefined args:
 32 |     ops.conv2d(inputs, 256, [5, 5], padding='SAME',
 33 |                stddev=0.01, weight_decay=0.0005, scope='conv2')
 34 | 
 35 |   Example of how to reuse an arg_scope:
 36 |   with scopes.arg_scope(ops.conv2d, padding='SAME',
 37 |                       stddev=0.01, weight_decay=0.0005) as conv2d_arg_scope:
 38 |     net = ops.conv2d(net, 256, [5, 5], scope='conv1')
 39 |     ....
 40 | 
 41 |   with scopes.arg_scope(conv2d_arg_scope):
 42 |     net = ops.conv2d(net, 256, [5, 5], scope='conv2')
 43 | 
 44 |   Example of how to use scopes.add_arg_scope:
 45 | 
 46 |   @scopes.add_arg_scope
 47 |   def conv2d(*args, **kwargs)
 48 | """
 49 | from __future__ import absolute_import
 50 | from __future__ import division
 51 | from __future__ import print_function
 52 | 
 53 | import contextlib
 54 | import functools
 55 | 
 56 | from tensorflow.python.framework import ops
 57 | 
 58 | _ARGSTACK_KEY = ("__arg_stack",)
 59 | 
 60 | _DECORATED_OPS = set()
 61 | 
 62 | 
 63 | def _get_arg_stack():
 64 |   stack = ops.get_collection(_ARGSTACK_KEY)
 65 |   if stack:
 66 |     return stack[0]
 67 |   else:
 68 |     stack = [{}]
 69 |     ops.add_to_collection(_ARGSTACK_KEY, stack)
 70 |     return stack
 71 | 
 72 | 
 73 | def _current_arg_scope():
 74 |   stack = _get_arg_stack()
 75 |   return stack[-1]
 76 | 
 77 | 
 78 | def _add_op(op):
 79 |   key_op = (op.__module__, op.__name__)
 80 |   if key_op not in _DECORATED_OPS:
 81 |     _DECORATED_OPS.add(key_op)
 82 | 
 83 | 
 84 | @contextlib.contextmanager
 85 | def arg_scope(list_ops_or_scope, **kwargs):
 86 |   """Stores the default arguments for the given set of list_ops.
 87 | 
 88 |   For usage, please see examples at top of the file.
 89 | 
 90 |   Args:
 91 |     list_ops_or_scope: List or tuple of operations to set argument scope for or
 92 |       a dictionary containg the current scope. When list_ops_or_scope is a dict,
 93 |       kwargs must be empty. When list_ops_or_scope is a list or tuple, then
 94 |       every op in it need to be decorated with @add_arg_scope to work.
 95 |     **kwargs: keyword=value that will define the defaults for each op in
 96 |               list_ops. All the ops need to accept the given set of arguments.
 97 | 
 98 |   Yields:
 99 |     the current_scope, which is a dictionary of {op: {arg: value}}
100 |   Raises:
101 |     TypeError: if list_ops is not a list or a tuple.
102 |     ValueError: if any op in list_ops has not be decorated with @add_arg_scope.
103 |   """
104 |   if isinstance(list_ops_or_scope, dict):
105 |     # Assumes that list_ops_or_scope is a scope that is being reused.
106 |     if kwargs:
107 |       raise ValueError("When attempting to re-use a scope by suppling a"
108 |                        "dictionary, kwargs must be empty.")
109 |     current_scope = list_ops_or_scope.copy()
110 |     try:
111 |       _get_arg_stack().append(current_scope)
112 |       yield current_scope
113 |     finally:
114 |       _get_arg_stack().pop()
115 |   else:
116 |     # Assumes that list_ops_or_scope is a list/tuple of ops with kwargs.
117 |     if not isinstance(list_ops_or_scope, (list, tuple)):
118 |       raise TypeError("list_ops_or_scope must either be a list/tuple or reused"
119 |                       "scope (i.e. dict)")
120 |     try:
121 |       current_scope = _current_arg_scope().copy()
122 |       for op in list_ops_or_scope:
123 |         key_op = (op.__module__, op.__name__)
124 |         if not has_arg_scope(op):
125 |           raise ValueError("%s is not decorated with @add_arg_scope", key_op)
126 |         if key_op in current_scope:
127 |           current_kwargs = current_scope[key_op].copy()
128 |           current_kwargs.update(kwargs)
129 |           current_scope[key_op] = current_kwargs
130 |         else:
131 |           current_scope[key_op] = kwargs.copy()
132 |       _get_arg_stack().append(current_scope)
133 |       yield current_scope
134 |     finally:
135 |       _get_arg_stack().pop()
136 | 
137 | 
138 | def add_arg_scope(func):
139 |   """Decorates a function with args so it can be used within an arg_scope.
140 | 
141 |   Args:
142 |     func: function to decorate.
143 | 
144 |   Returns:
145 |     A tuple with the decorated function func_with_args().
146 |   """
147 |   @functools.wraps(func)
148 |   def func_with_args(*args, **kwargs):
149 |     current_scope = _current_arg_scope()
150 |     current_args = kwargs
151 |     key_func = (func.__module__, func.__name__)
152 |     if key_func in current_scope:
153 |       current_args = current_scope[key_func].copy()
154 |       current_args.update(kwargs)
155 |     return func(*args, **current_args)
156 |   _add_op(func)
157 |   return func_with_args
158 | 
159 | 
160 | def has_arg_scope(func):
161 |   """Checks whether a func has been decorated with @add_arg_scope or not.
162 | 
163 |   Args:
164 |     func: function to check.
165 | 
166 |   Returns:
167 |     a boolean.
168 |   """
169 |   key_op = (func.__module__, func.__name__)
170 |   return key_op in _DECORATED_OPS
171 | 


--------------------------------------------------------------------------------
/terngrad/inception/slim/slim.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2016 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TF-Slim grouped API. Please see README.md for details and usage."""
16 | # pylint: disable=unused-import
17 | 
18 | # Collapse tf-slim into a single namespace.
19 | from inception.slim import inception_model as inception
20 | from inception.slim import models as models
21 | from inception.slim import losses
22 | from inception.slim import ops
23 | from inception.slim import scopes
24 | from inception.slim import variables
25 | from inception.slim.scopes import arg_scope
26 | 


--------------------------------------------------------------------------------
/terngrad/kill_local.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | threadid=$( ps aux | grep python | grep distributed_train | grep ${USER} | awk '{print $2}')
3 | if [[ "$threadid" =~ ^-?[0-9]+.*$ ]] ; 
4 | then
5 |   kill $threadid
6 | else
7 |   echo "Stopped already."
8 | fi
9 | 


--------------------------------------------------------------------------------
/terngrad/run_dist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | #set -x
 4 | 
 5 | . ./config_dist.sh
 6 | 
 7 | WORKER_STRING=$(echo ${WORKER_HOSTS[*]} | sed 's/ /,/g') 
 8 | PS_STRING=$(echo ${PS_HOSTS[*]} | sed 's/ /,/g') 
 9 | EXPERIMENT_ID=$(date)
10 | EXPERIMENT_ID=${EXPERIMENT_ID// /_}
11 | EXPERIMENT_ID=${EXPERIMENT_ID//:/-}
12 | 
13 | PS_NUM=${#PS_HOSTS[@]}
14 | WORKER_NUM=${#WORKER_HOSTS[@]}
15 | DEVICE_NUM=${#WORKER_DEVICES[@]}
16 | DATA_NUM=${#DATA_DIR[@]}
17 | if [ ${WORKER_NUM} -ne ${DEVICE_NUM}  ]
18 | then
19 |   echo "The number of workers (${WORKER_NUM}) does not match the number of devices (${DEVICE_NUM})"
20 |   exit
21 | fi
22 | if [ ${WORKER_NUM} -ne ${DATA_NUM}  ]
23 | then
24 |   echo "The number of workers (${WORKER_NUM}) does not match the number of data paths (${DATA_NUM})"
25 |   exit
26 | fi
27 | 
28 | echo "${PS_NUM} ps hosts: ${PS_STRING}"
29 | echo "${WORKER_NUM} worker hosts: ${WORKER_STRING}"
30 | 
31 | # start workers
32 | task_id=0
33 | for HOST in ${WORKER_HOSTS[*]}; do
34 |   worker=$(echo ${HOST} |cut -d':' -f1)
35 |   ssh ${worker} "hostname; \
36 |     cd ${WORKSPACE}; \
37 |     pwd; \
38 |     ${WORKER_SCRIPT} ${PS_STRING} ${WORKER_STRING} worker ${task_id} ${WORKER_DEVICES[$task_id]} ${DATA_DIR[$task_id]} ${EXPERIMENT_ID}"
39 |   task_id=`expr $task_id + 1`
40 | done
41 | 
42 | # start ps
43 | task_id=0
44 | for HOST in ${PS_HOSTS[*]}; do
45 |   ps=$(echo ${HOST} |cut -d':' -f1)
46 |   ssh ${ps} "hostname; \
47 |              cd ${WORKSPACE}; \
48 |              pwd; \
49 |              ${PS_SCRIPT} ${PS_STRING} ${WORKER_STRING} ps ${task_id} ${EXPERIMENT_ID}"
50 |   task_id=`expr $task_id + 1`
51 | done
52 | 
53 | 


--------------------------------------------------------------------------------
/terngrad/run_dist_cifar10.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | set -e
 4 | PS=localhost
 5 | WORKER1=localhost
 6 | WORKER2=localhost
 7 | 
 8 | export CUDA_VISIBLE_DEVICES=1
 9 | bazel-bin/inception/cifar10_distributed_train \
10 | --optimizer adam \
11 | --initial_learning_rate 0.0002 \
12 | --batch_size 64 \
13 | --num_epochs_per_decay 200 \
14 | --max_steps 300000 \
15 | --seed 123 \
16 | --weight_decay 0.004 \
17 | --net cifar10_alexnet \
18 | --image_size 24 \
19 | --data_dir="$HOME/dataset/cifar10-data-shard-500-999" \
20 | --job_name='worker' \
21 | --task_id=1 \
22 | --ps_hosts="$PS:2222" \
23 | --worker_hosts="${WORKER1}:2224,${WORKER2}:2226" \
24 | --train_dir=/tmp/cifar10_distributed_train &
25 | 
26 | export CUDA_VISIBLE_DEVICES=0
27 | bazel-bin/inception/cifar10_distributed_train \
28 | --optimizer adam \
29 | --initial_learning_rate 0.0002 \
30 | --batch_size 64 \
31 | --num_epochs_per_decay 200 \
32 | --max_steps 300000 \
33 | --seed 123 \
34 | --weight_decay 0.004 \
35 | --net cifar10_alexnet \
36 | --image_size 24 \
37 | --data_dir="$HOME/dataset/cifar10-data-shard-0-499" \
38 | --job_name='worker' \
39 | --task_id=0 \
40 | --ps_hosts="$PS:2222" \
41 | --worker_hosts="${WORKER1}:2224,${WORKER2}:2226" \
42 | --train_dir=/tmp/cifar10_distributed_train &
43 | 
44 | export CUDA_VISIBLE_DEVICES=1
45 | bazel-bin/inception/cifar10_distributed_train \
46 | --job_name='ps' \
47 | --task_id=0 \
48 | --ps_hosts="$PS:2222" \
49 | --worker_hosts="${WORKER1}:2224,${WORKER2}:2226" &
50 | 


--------------------------------------------------------------------------------
/terngrad/run_multi_gpus_alexnet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | set -x
 4 | 
 5 | DATASET_NAME=imagenet # imagenet or cifar10
 6 | ROOT_WORKSPACE=${HOME}/dataset/results/imagenet # the location to store tf.summary and logs
 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location
 8 | FINETUNED_MODEL_PATH=
 9 | NUM_GPUS=8
10 | #export CUDA_VISIBLE_DEVICES=0,1 # specify visible gpus to tensorflow
11 | OPTIMIZER=momentum
12 | NET=alexnet
13 | IMAGE_SIZE=224
14 | GRAD_BITS=1
15 | BASE_LR=0.04
16 | CLIP_FACTOR=2.5 # 0.0 means no clipping
17 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back
18 | FLOATING_GRAD_EPOCH=0 # 0 means no switching
19 | WEIGHT_DECAY=0.0005 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004
20 | DROPOUT_KEEP_PROB=0.8 # The probability to keep in dropout
21 | MOMENTUM=0.9
22 | SIZE_TO_BINARIZE=9217 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1
23 | TRAIN_BATCH_SIZE=1024 # total batch size
24 | VAL_BATCH_SIZE=50 # set smaller to avoid OOM
25 | NUM_EPOCHS_PER_DECAY=20 # per decay learning rate
26 | MAX_STEPS=92500
27 | VAL_TOWER=0 # -1 for cpu
28 | EVAL_INTERVAL_SECS=900 # seconds to evaluate the accuracy
29 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0"
30 | RESTORE_AVG_VAR=True # use the moving average parameters to eval?
31 | SEED=123 # use ${RANDOM} if no duplicable results are required
32 | 
33 | if [ ! -d "$ROOT_WORKSPACE" ]; then
34 |   echo "${ROOT_WORKSPACE} does not exsit!"
35 |   exit
36 | fi
37 | 
38 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/
39 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/
40 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/
41 | if [ ! -d "${INFO_WORKSPACE}" ]; then
42 |   echo "Creating ${INFO_WORKSPACE} ..."
43 |   mkdir -p ${INFO_WORKSPACE}
44 | fi
45 | current_time=$(date)
46 | current_time=${current_time// /_}
47 | current_time=${current_time//:/-}
48 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_GPUS}_${current_time}
49 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME}
50 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME}
51 | if [ ! -d "$TRAIN_DIR" ]; then
52 |   echo "Creating ${TRAIN_DIR} ..."
53 |   mkdir -p ${TRAIN_DIR}
54 | fi
55 | if [ ! -d "$EVAL_DIR" ]; then
56 |   echo "Creating ${EVAL_DIR} ..."
57 |   mkdir -p ${EVAL_DIR}
58 | fi
59 | 
60 | bazel-bin/inception/${DATASET_NAME}_eval \
61 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \
62 | --device ${EVAL_DEVICE} \
63 | --restore_avg_var ${RESTORE_AVG_VAR} \
64 | --data_dir ${DATA_DIR} \
65 | --net ${NET} \
66 | --image_size ${IMAGE_SIZE} \
67 | --batch_size ${VAL_BATCH_SIZE} \
68 | --max_steps ${MAX_STEPS} \
69 | --checkpoint_dir ${TRAIN_DIR} \
70 | --tower ${VAL_TOWER} \
71 | --eval_dir ${EVAL_DIR} >  ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 &
72 | 
73 | bazel-bin/inception/${DATASET_NAME}_train \
74 | --seed ${SEED}  \
75 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \
76 | --num_epochs_per_decay ${NUM_EPOCHS_PER_DECAY} \
77 | --initial_learning_rate ${BASE_LR} \
78 | --grad_bits ${GRAD_BITS} \
79 | --clip_factor ${CLIP_FACTOR} \
80 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \
81 | --weight_decay ${WEIGHT_DECAY} \
82 | --dropout_keep_prob ${DROPOUT_KEEP_PROB} \
83 | --momentum ${MOMENTUM} \
84 | --size_to_binarize ${SIZE_TO_BINARIZE} \
85 | --optimizer ${OPTIMIZER} \
86 | --net ${NET} \
87 | --image_size ${IMAGE_SIZE} \
88 | --num_gpus ${NUM_GPUS} \
89 | --batch_size ${TRAIN_BATCH_SIZE} \
90 | --max_steps ${MAX_STEPS} \
91 | --train_dir ${TRAIN_DIR} \
92 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 &
93 | 


--------------------------------------------------------------------------------
/terngrad/run_multi_gpus_cifar10.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | set -x
 4 | 
 5 | DATASET_NAME=cifar10 # imagenet or cifar10
 6 | ROOT_WORKSPACE=${HOME}/dataset/results/cifar10/ # the location to store summary and logs
 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location
 8 | FINETUNED_MODEL_PATH=
 9 | NUM_GPUS=2 # num of physical gpus
10 | export CUDA_VISIBLE_DEVICES=0,1 # specify visible gpus to tensorflow
11 | NUM_NODES=2 # num of virtual nodes on physical gpus
12 | OPTIMIZER=adam
13 | NET=cifar10_alexnet
14 | IMAGE_SIZE=24
15 | GRAD_BITS=1
16 | BASE_LR=0.0002
17 | CLIP_FACTOR=2.5 # 0.0 means no clipping
18 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back
19 | FLOATING_GRAD_EPOCH=0 # 0 means no switching
20 | WEIGHT_DECAY=0.004 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004
21 | MOMENTUM=0.9
22 | SIZE_TO_BINARIZE=1 # The min size of variable to enable binarizing. e.g., 385 means biases are excluded from binarizing
23 | TRAIN_BATCH_SIZE=128 # total batch size
24 | SAVE_ITER=2000 # Save summaries and checkpoint per iterations
25 | QUANTIZE_LOGITS=True # If quantize the gradients in the last logits layer. 
26 | VAL_BATCH_SIZE=50 # set smaller to avoid OOM
27 | NUM_EPOCHS_PER_DECAY=200
28 | MAX_STEPS=300000
29 | VAL_TOWER=0 # -1 for cpu
30 | EVAL_INTERVAL_SECS=10
31 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0"
32 | RESTORE_AVG_VAR=True # use the moving average parameters to eval?
33 | SEED=123 # use ${RANDOM} if no duplicable results are required
34 | 
35 | if [ ! -d "$ROOT_WORKSPACE" ]; then
36 |   echo "${ROOT_WORKSPACE} does not exsit!"
37 |   exit
38 | fi
39 | 
40 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/
41 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/
42 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/
43 | if [ ! -d "${INFO_WORKSPACE}" ]; then
44 |   echo "Creating ${INFO_WORKSPACE} ..."
45 |   mkdir -p ${INFO_WORKSPACE}
46 | fi
47 | current_time=$(date)
48 | current_time=${current_time// /_}
49 | current_time=${current_time//:/-}
50 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_NODES}_${current_time}
51 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME}
52 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME}
53 | if [ ! -d "$TRAIN_DIR" ]; then
54 |   echo "Creating ${TRAIN_DIR} ..."
55 |   mkdir -p ${TRAIN_DIR}
56 | fi
57 | if [ ! -d "$EVAL_DIR" ]; then
58 |   echo "Creating ${EVAL_DIR} ..."
59 |   mkdir -p ${EVAL_DIR}
60 | fi
61 | 
62 | bazel-bin/inception/${DATASET_NAME}_eval \
63 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \
64 | --device ${EVAL_DEVICE} \
65 | --restore_avg_var ${RESTORE_AVG_VAR} \
66 | --data_dir ${DATA_DIR} \
67 | --subset "test" \
68 | --net ${NET} \
69 | --image_size ${IMAGE_SIZE} \
70 | --batch_size ${VAL_BATCH_SIZE} \
71 | --max_steps ${MAX_STEPS} \
72 | --checkpoint_dir ${TRAIN_DIR} \
73 | --tower ${VAL_TOWER} \
74 | --eval_dir ${EVAL_DIR} >  ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 &
75 | 
76 | bazel-bin/inception/${DATASET_NAME}_train \
77 | --seed ${SEED}  \
78 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \
79 | --num_epochs_per_decay ${NUM_EPOCHS_PER_DECAY} \
80 | --initial_learning_rate ${BASE_LR} \
81 | --grad_bits ${GRAD_BITS} \
82 | --clip_factor ${CLIP_FACTOR} \
83 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \
84 | --weight_decay ${WEIGHT_DECAY} \
85 | --momentum ${MOMENTUM} \
86 | --size_to_binarize ${SIZE_TO_BINARIZE} \
87 | --optimizer ${OPTIMIZER} \
88 | --net ${NET} \
89 | --image_size ${IMAGE_SIZE} \
90 | --num_gpus ${NUM_GPUS} \
91 | --num_nodes ${NUM_NODES} \
92 | --batch_size ${TRAIN_BATCH_SIZE} \
93 | --save_iter ${SAVE_ITER} \
94 | --quantize_logits ${QUANTIZE_LOGITS} \
95 | --max_steps ${MAX_STEPS} \
96 | --train_dir ${TRAIN_DIR} \
97 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 &
98 | 


--------------------------------------------------------------------------------
/terngrad/run_multi_gpus_googlenet_quick.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | set -x
 4 | 
 5 | DATASET_NAME=imagenet # imagenet or cifar10
 6 | ROOT_WORKSPACE=${HOME}/dataset/results/imagenet # the location to store tf.summary and logs
 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location
 8 | FINETUNED_MODEL_PATH=
 9 | NUM_GPUS=2
10 | export CUDA_VISIBLE_DEVICES=0,1 # specify visible gpus to tensorflow
11 | NUM_NODES=2 # num of virtual nodes on physical gpus
12 | OPTIMIZER=momentum
13 | NET=googlenet
14 | IMAGE_SIZE=224
15 | GRAD_BITS=32
16 | BASE_LR=0.04
17 | CLIP_FACTOR=0.0 # 0.0 means no clipping
18 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back
19 | FLOATING_GRAD_EPOCH=0 # 0 means no switching
20 | WEIGHT_DECAY=0.00004 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004
21 | DROPOUT_KEEP_PROB=0.8 # The probability to keep in dropout
22 | MOMENTUM=0.9
23 | LR_DECAY_TYPE="polynomial" # learning rate decay type
24 | SIZE_TO_BINARIZE=1 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1
25 | TRAIN_BATCH_SIZE=128 # total batch size
26 | VAL_BATCH_SIZE=25 # set smaller to avoid OOM
27 | MAX_STEPS=600000
28 | VAL_TOWER=0 # -1 for cpu
29 | EVAL_INTERVAL_SECS=1800 # seconds to evaluate the accuracy
30 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0"
31 | RESTORE_AVG_VAR=True # use the moving average parameters to eval?
32 | SEED=123 # use ${RANDOM} if no duplicable results are required
33 | 
34 | if [ ! -d "$ROOT_WORKSPACE" ]; then
35 |   echo "${ROOT_WORKSPACE} does not exsit!"
36 |   exit
37 | fi
38 | 
39 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/
40 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/
41 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/
42 | if [ ! -d "${INFO_WORKSPACE}" ]; then
43 |   echo "Creating ${INFO_WORKSPACE} ..."
44 |   mkdir -p ${INFO_WORKSPACE}
45 | fi
46 | current_time=$(date)
47 | current_time=${current_time// /_}
48 | current_time=${current_time//:/-}
49 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_GPUS}_${NUM_NODES}_${current_time}
50 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME}
51 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME}
52 | if [ ! -d "$TRAIN_DIR" ]; then
53 |   echo "Creating ${TRAIN_DIR} ..."
54 |   mkdir -p ${TRAIN_DIR}
55 | fi
56 | if [ ! -d "$EVAL_DIR" ]; then
57 |   echo "Creating ${EVAL_DIR} ..."
58 |   mkdir -p ${EVAL_DIR}
59 | fi
60 | 
61 | bazel-bin/inception/${DATASET_NAME}_eval \
62 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \
63 | --device ${EVAL_DEVICE} \
64 | --restore_avg_var ${RESTORE_AVG_VAR} \
65 | --data_dir ${DATA_DIR} \
66 | --net ${NET} \
67 | --image_size ${IMAGE_SIZE} \
68 | --batch_size ${VAL_BATCH_SIZE} \
69 | --max_steps ${MAX_STEPS} \
70 | --checkpoint_dir ${TRAIN_DIR} \
71 | --tower ${VAL_TOWER} \
72 | --eval_dir ${EVAL_DIR} >  ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 &
73 | 
74 | bazel-bin/inception/${DATASET_NAME}_train \
75 | --seed ${SEED}  \
76 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \
77 | --initial_learning_rate ${BASE_LR} \
78 | --grad_bits ${GRAD_BITS} \
79 | --clip_factor ${CLIP_FACTOR} \
80 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \
81 | --weight_decay ${WEIGHT_DECAY} \
82 | --dropout_keep_prob ${DROPOUT_KEEP_PROB} \
83 | --momentum ${MOMENTUM} \
84 | --learning_rate_decay_type ${LR_DECAY_TYPE} \
85 | --size_to_binarize ${SIZE_TO_BINARIZE} \
86 | --optimizer ${OPTIMIZER} \
87 | --net ${NET} \
88 | --image_size ${IMAGE_SIZE} \
89 | --num_gpus ${NUM_GPUS} \
90 | --num_nodes ${NUM_NODES} \
91 | --batch_size ${TRAIN_BATCH_SIZE} \
92 | --max_steps ${MAX_STEPS} \
93 | --train_dir ${TRAIN_DIR} \
94 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 &
95 | 


--------------------------------------------------------------------------------
/terngrad/run_multi_gpus_lenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | set -x
 4 | 
 5 | DATASET_NAME=mnist # imagenet or cifar10 or mnist
 6 | ROOT_WORKSPACE=${HOME}/dataset/results/${DATASET_NAME} # the location to store tf.summary and logs
 7 | DATA_DIR=${HOME}/dataset/${DATASET_NAME}-data # dataset location
 8 | FINETUNED_MODEL_PATH=
 9 | NUM_GPUS=2 # num of physical gpus
10 | export CUDA_VISIBLE_DEVICES=0,1 # specify visible gpus to tensorflow
11 | NUM_NODES=4 # num of virtual nodes on physical gpus
12 | OPTIMIZER=momentum
13 | NET=lenet
14 | IMAGE_SIZE=28
15 | GRAD_BITS=32
16 | BASE_LR=0.01
17 | CLIP_FACTOR=0.0 # 0.0 means no clipping
18 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back
19 | FLOATING_GRAD_EPOCH=0 # 0 means no switching
20 | WEIGHT_DECAY=0.0005 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004
21 | MOMENTUM=0.9
22 | LR_DECAY_TYPE="polynomial" # learning rate decay type
23 | SIZE_TO_BINARIZE=1 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1
24 | TRAIN_BATCH_SIZE=64 # total batch size
25 | SAVE_ITER=200 # Save summaries and checkpoint per iterations
26 | QUANTIZE_LOGITS=True # If quantize the gradients in the last logits layer. 
27 | VAL_BATCH_SIZE=100 # set smaller to avoid OOM
28 | MAX_STEPS=10000
29 | VAL_TOWER=0 # -1 for cpu
30 | EVAL_INTERVAL_SECS=1 # seconds to evaluate the accuracy
31 | EVAL_DEVICE="/gpu:0" # specify the device to eval. e.g. "/gpu:1", "/cpu:0"
32 | RESTORE_AVG_VAR=True # use the moving average parameters to eval?
33 | SEED=123 # use ${RANDOM} if no duplicable results are required
34 | 
35 | if [ ! -d "$ROOT_WORKSPACE" ]; then
36 |   echo "${ROOT_WORKSPACE} does not exsit!"
37 |   exit
38 | fi
39 | 
40 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/
41 | EVAL_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_eval_data/
42 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/
43 | if [ ! -d "${INFO_WORKSPACE}" ]; then
44 |   echo "Creating ${INFO_WORKSPACE} ..."
45 |   mkdir -p ${INFO_WORKSPACE}
46 | fi
47 | current_time=$(date)
48 | current_time=${current_time// /_}
49 | current_time=${current_time//:/-}
50 | FOLDER_NAME=${DATASET_NAME}_${NET}_${IMAGE_SIZE}_${OPTIMIZER}_${GRAD_BITS}_${BASE_LR}_${CLIP_FACTOR}_${FLOATING_GRAD_EPOCH}_${WEIGHT_DECAY}_${MOMENTUM}_${SIZE_TO_BINARIZE}_${TRAIN_BATCH_SIZE}_${NUM_NODES}_${current_time}
51 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME}
52 | EVAL_DIR=${EVAL_WORKSPACE}/${FOLDER_NAME}
53 | if [ ! -d "$TRAIN_DIR" ]; then
54 |   echo "Creating ${TRAIN_DIR} ..."
55 |   mkdir -p ${TRAIN_DIR}
56 | fi
57 | if [ ! -d "$EVAL_DIR" ]; then
58 |   echo "Creating ${EVAL_DIR} ..."
59 |   mkdir -p ${EVAL_DIR}
60 | fi
61 | 
62 | bazel-bin/inception/${DATASET_NAME}_eval \
63 | --eval_interval_secs ${EVAL_INTERVAL_SECS} \
64 | --device ${EVAL_DEVICE} \
65 | --restore_avg_var ${RESTORE_AVG_VAR} \
66 | --data_dir ${DATA_DIR} \
67 | --subset "test" \
68 | --net ${NET} \
69 | --image_size ${IMAGE_SIZE} \
70 | --batch_size ${VAL_BATCH_SIZE} \
71 | --checkpoint_dir ${TRAIN_DIR} \
72 | --max_steps ${MAX_STEPS} \
73 | --tower ${VAL_TOWER} \
74 | --eval_dir ${EVAL_DIR} >  ${INFO_WORKSPACE}/eval_${FOLDER_NAME}_info.txt 2>&1 &
75 | 
76 | bazel-bin/inception/${DATASET_NAME}_train \
77 | --seed ${SEED}  \
78 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \
79 | --initial_learning_rate ${BASE_LR} \
80 | --grad_bits ${GRAD_BITS} \
81 | --clip_factor ${CLIP_FACTOR} \
82 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \
83 | --weight_decay ${WEIGHT_DECAY} \
84 | --momentum ${MOMENTUM} \
85 | --learning_rate_decay_type ${LR_DECAY_TYPE} \
86 | --size_to_binarize ${SIZE_TO_BINARIZE} \
87 | --optimizer ${OPTIMIZER} \
88 | --net ${NET} \
89 | --image_size ${IMAGE_SIZE} \
90 | --num_gpus ${NUM_GPUS} \
91 | --num_nodes ${NUM_NODES} \
92 | --batch_size ${TRAIN_BATCH_SIZE} \
93 | --save_iter ${SAVE_ITER} \
94 | --quantize_logits ${QUANTIZE_LOGITS} \
95 | --max_steps ${MAX_STEPS} \
96 | --train_dir ${TRAIN_DIR} \
97 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/training_${FOLDER_NAME}_info.txt 2>&1 &
98 | 


--------------------------------------------------------------------------------
/terngrad/run_single_ps_cifar10.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | . ~/.bashrc
 3 | set -e
 4 | set -x
 5 | 
 6 | if [ "$#" -ne 5 ]; then
 7 | 	echo "Illegal number of parameters"
 8 | 	echo "Usage: $0 PS_HOSTS WORKER_HOSTS JOB_NAME TASK_ID EXPERIMENT_ID"
 9 | 	exit
10 | fi
11 | 
12 | # cluster and task
13 | PS_HOSTS=$1
14 | WORKER_HOSTS=$2
15 | JOB_NAME=$3
16 | TASK_ID=$4
17 | EXPERIMENT_ID=$5
18 | 
19 | if [ "${JOB_NAME}" != "ps" ]
20 | then
21 |     echo "JOB_NAME(${JOB_NAME}) is not ps"
22 |     exit
23 | fi
24 | 
25 | DATASET_NAME=cifar10 # imagenet or cifar10
26 | INFO_WORKSPACE=${HOME}/tmp/${DATASET_NAME}_info/
27 | if [ ! -d "${INFO_WORKSPACE}" ]; then
28 |   echo "Creating ${INFO_WORKSPACE} ..."
29 |   mkdir -p ${INFO_WORKSPACE}
30 | fi
31 | LOG_FILE=${INFO_WORKSPACE}/${EXPERIMENT_ID}_${JOB_NAME}_${TASK_ID}.log
32 | 
33 | bazel-bin/inception/${DATASET_NAME}_distributed_train \
34 | --job_name ${JOB_NAME} \
35 | --task_id ${TASK_ID} \
36 | --ps_hosts ${PS_HOSTS} \
37 | --worker_hosts ${WORKER_HOSTS}  > ${LOG_FILE} 2>&1 &
38 | 


--------------------------------------------------------------------------------
/terngrad/run_single_ps_imagenet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | set -x
 4 | 
 5 | if [ "$#" -ne 5 ]; then
 6 | 	echo "Illegal number of parameters"
 7 | 	echo "Usage: $0 PS_HOSTS WORKER_HOSTS JOB_NAME TASK_ID EXPERIMENT_ID"
 8 | 	exit
 9 | fi
10 | 
11 | # cluster and task
12 | PS_HOSTS=$1
13 | WORKER_HOSTS=$2
14 | JOB_NAME=$3
15 | TASK_ID=$4
16 | EXPERIMENT_ID=$5
17 | 
18 | if [ "${JOB_NAME}" != "ps" ]
19 | then
20 |     echo "JOB_NAME(${JOB_NAME}) is not ps"
21 |     exit
22 | fi
23 | 
24 | DATASET_NAME=imagenet # imagenet or cifar10
25 | INFO_WORKSPACE=${HOME}/tmp/${DATASET_NAME}_info/
26 | if [ ! -d "${INFO_WORKSPACE}" ]; then
27 |   echo "Creating ${INFO_WORKSPACE} ..."
28 |   mkdir -p ${INFO_WORKSPACE}
29 | fi
30 | LOG_FILE=${INFO_WORKSPACE}/${EXPERIMENT_ID}_${JOB_NAME}_${TASK_ID}.log
31 | 
32 | bazel-bin/inception/${DATASET_NAME}_distributed_train \
33 | --job_name ${JOB_NAME} \
34 | --task_id ${TASK_ID} \
35 | --ps_hosts ${PS_HOSTS} \
36 | --worker_hosts ${WORKER_HOSTS}  > ${LOG_FILE} 2>&1 &
37 | 


--------------------------------------------------------------------------------
/terngrad/run_single_worker_alexnet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | set -x
 4 | 
 5 | if [ "$#" -ne 7 ]; then
 6 | 	echo "Illegal number of parameters"
 7 | 	echo "Usage: $0 PS_HOSTS WORKER_HOSTS JOB_NAME TASK_ID DEVICE DATA_DIR EXPERIMENT_ID"
 8 | 	exit
 9 | fi
10 | 
11 | # cluster and task
12 | PS_HOSTS=$1
13 | WORKER_HOSTS=$2
14 | JOB_NAME=$3
15 | TASK_ID=$4
16 | DEVICE=$5
17 | DATA_DIR=$6 # dataset location
18 | EXPERIMENT_ID=$7
19 | 
20 | if [ "${JOB_NAME}" != "worker" ]
21 | then
22 |     echo "JOB_NAME(${JOB_NAME}) is not worker"
23 |     exit
24 | fi
25 | 
26 | DATASET_NAME=imagenet # imagenet or cifar10
27 | ROOT_WORKSPACE=${HOME}/tmp/ # the location to store tf.summary and logs
28 | FINETUNED_MODEL_PATH=
29 | OPTIMIZER=momentum
30 | NET=alexnet
31 | IMAGE_SIZE=224
32 | GRAD_BITS=32
33 | BASE_LR=0.02
34 | CLIP_FACTOR=0.0 # 0.0 means no clipping
35 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back
36 | FLOATING_GRAD_EPOCH=0 # 0 means no switching
37 | WEIGHT_DECAY=0.0005 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004
38 | DROPOUT_KEEP_PROB=0.5 # The probability to keep in dropout
39 | MOMENTUM=0.9
40 | SIZE_TO_BINARIZE=9217 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1
41 | TRAIN_BATCH_SIZE=128 # batch size per node
42 | NUM_EPOCHS_PER_DECAY=20 # per decay learning rate
43 | MAX_STEPS=185000
44 | SEED=123 # use ${RANDOM} if no duplicable results are required
45 | 
46 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/
47 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/
48 | if [ ! -d "${INFO_WORKSPACE}" ]; then
49 |   echo "Creating ${INFO_WORKSPACE} ..."
50 |   mkdir -p ${INFO_WORKSPACE}
51 | fi
52 | FOLDER_NAME=${EXPERIMENT_ID}_${JOB_NAME}_${TASK_ID}
53 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME}
54 | if [ ! -d "$TRAIN_DIR" ]; then
55 |   echo "Creating ${TRAIN_DIR} ..."
56 |   mkdir -p ${TRAIN_DIR}
57 | fi
58 | 
59 | export CUDA_VISIBLE_DEVICES=${DEVICE} # specify visible gpus to tensorflow
60 | bazel-bin/inception/${DATASET_NAME}_distributed_train \
61 | --seed ${SEED}  \
62 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \
63 | --num_epochs_per_decay ${NUM_EPOCHS_PER_DECAY} \
64 | --initial_learning_rate ${BASE_LR} \
65 | --grad_bits ${GRAD_BITS} \
66 | --clip_factor ${CLIP_FACTOR} \
67 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \
68 | --weight_decay ${WEIGHT_DECAY} \
69 | --dropout_keep_prob ${DROPOUT_KEEP_PROB} \
70 | --momentum ${MOMENTUM} \
71 | --size_to_binarize ${SIZE_TO_BINARIZE} \
72 | --optimizer ${OPTIMIZER} \
73 | --net ${NET} \
74 | --image_size ${IMAGE_SIZE} \
75 | --batch_size ${TRAIN_BATCH_SIZE} \
76 | --max_steps ${MAX_STEPS} \
77 | --train_dir ${TRAIN_DIR} \
78 | --job_name ${JOB_NAME} \
79 | --task_id ${TASK_ID} \
80 | --ps_hosts ${PS_HOSTS} \
81 | --worker_hosts ${WORKER_HOSTS} \
82 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/${FOLDER_NAME}.log 2>&1 &
83 | 


--------------------------------------------------------------------------------
/terngrad/run_single_worker_cifarnet.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | . ~/.bashrc
 3 | 
 4 | set -e
 5 | set -x
 6 | 
 7 | if [ "$#" -ne 7 ]; then
 8 | 	echo "Illegal number of parameters"
 9 | 	echo "Usage: $0 PS_HOSTS WORKER_HOSTS JOB_NAME TASK_ID DEVICE DATA_DIR EXPERIMENT_ID"
10 | 	exit
11 | fi
12 | 
13 | # cluster and task
14 | PS_HOSTS=$1
15 | WORKER_HOSTS=$2
16 | JOB_NAME=$3
17 | TASK_ID=$4
18 | DEVICE=$5
19 | DATA_DIR=$6 # dataset location
20 | EXPERIMENT_ID=$7
21 | 
22 | if [ "${JOB_NAME}" != "worker" ]
23 | then
24 |     echo "JOB_NAME(${JOB_NAME}) is not worker"
25 |     exit
26 | fi
27 | 
28 | DATASET_NAME=cifar10 # imagenet or cifar10
29 | ROOT_WORKSPACE=${HOME}/tmp/ # the location to store tf.summary and logs
30 | FINETUNED_MODEL_PATH=
31 | OPTIMIZER=adam
32 | NET=cifar10_alexnet
33 | IMAGE_SIZE=24
34 | GRAD_BITS=32
35 | BASE_LR=0.0002
36 | CLIP_FACTOR=0.0 # 0.0 means no clipping
37 | # when GRAD_BITS=1 and FLOATING_GRAD_EPOCH>0, switch to floating gradients every FLOATING_GRAD_EPOCH epoch and then switch back
38 | FLOATING_GRAD_EPOCH=0 # 0 means no switching
39 | WEIGHT_DECAY=0.004 # default - alexnet/vgg_a/vgg_16:0.0005, inception_v3:0.00004, cifar10_alexnet:0.004
40 | MOMENTUM=0.9
41 | SIZE_TO_BINARIZE=1 # the min size of variable to enable binarizing. 1 means binarizing all variables when GRAD_BITS=1
42 | TRAIN_BATCH_SIZE=64 # batch size per node
43 | NUM_EPOCHS_PER_DECAY=200 # per decay learning rate
44 | MAX_STEPS=300000
45 | SEED=123 # use ${RANDOM} if no duplicable results are required
46 | 
47 | TRAIN_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_training_data/
48 | INFO_WORKSPACE=${ROOT_WORKSPACE}/${DATASET_NAME}_info/
49 | if [ ! -d "${INFO_WORKSPACE}" ]; then
50 |   echo "Creating ${INFO_WORKSPACE} ..."
51 |   mkdir -p ${INFO_WORKSPACE}
52 | fi
53 | FOLDER_NAME=${EXPERIMENT_ID}_${JOB_NAME}_${TASK_ID}
54 | TRAIN_DIR=${TRAIN_WORKSPACE}/${FOLDER_NAME}
55 | if [ ! -d "$TRAIN_DIR" ]; then
56 |   echo "Creating ${TRAIN_DIR} ..."
57 |   mkdir -p ${TRAIN_DIR}
58 | fi
59 | 
60 | export CUDA_VISIBLE_DEVICES=${DEVICE} # specify visible gpus to tensorflow
61 | bazel-bin/inception/${DATASET_NAME}_distributed_train \
62 | --seed ${SEED}  \
63 | --pretrained_model_checkpoint_path "${FINETUNED_MODEL_PATH}" \
64 | --num_epochs_per_decay ${NUM_EPOCHS_PER_DECAY} \
65 | --initial_learning_rate ${BASE_LR} \
66 | --grad_bits ${GRAD_BITS} \
67 | --clip_factor ${CLIP_FACTOR} \
68 | --floating_grad_epoch ${FLOATING_GRAD_EPOCH} \
69 | --weight_decay ${WEIGHT_DECAY} \
70 | --momentum ${MOMENTUM} \
71 | --size_to_binarize ${SIZE_TO_BINARIZE} \
72 | --optimizer ${OPTIMIZER} \
73 | --net ${NET} \
74 | --image_size ${IMAGE_SIZE} \
75 | --batch_size ${TRAIN_BATCH_SIZE} \
76 | --max_steps ${MAX_STEPS} \
77 | --train_dir ${TRAIN_DIR} \
78 | --job_name ${JOB_NAME} \
79 | --task_id ${TASK_ID} \
80 | --ps_hosts ${PS_HOSTS} \
81 | --worker_hosts ${WORKER_HOSTS} \
82 | --data_dir ${DATA_DIR} > ${INFO_WORKSPACE}/${FOLDER_NAME}.log 2>&1 &
83 | 


--------------------------------------------------------------------------------
/terngrad/split_dataset.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #set -e
 3 | #set -x
 4 | 
 5 | if [ "$#" -ne 3 ]; then
 6 |         echo "Illegal number of parameters"
 7 |         echo "Usage: $0 DATA_DIR WORKER_NUM WORKER_ID"
 8 |         exit
 9 | fi
10 | 
11 | DATA_DIR=$1
12 | WORKER_NUM=$2
13 | WORKER_ID=$3
14 | 
15 | if [ "${WORKER_ID}" -ge ${WORKER_NUM} ] || [ "${WORKER_ID}" -lt 0 ] ; then
16 |         echo "WORKER_ID between [0,WORKER_NUM)"
17 |         exit
18 | fi
19 | 
20 | SPLIT_DIR=${DATA_DIR}/worker_${WORKER_ID}_of_${WORKER_NUM}
21 | if [ ! -d "$SPLIT_DIR" ]; then
22 |   export total_files=$( ls -l ${DATA_DIR}/train-* | wc -l )
23 |   split_size=$( expr ${total_files} / ${WORKER_NUM}  )
24 |   remainder=$( expr ${total_files} % ${WORKER_NUM}  )
25 |   if [ "${remainder}" -ne 0 ]; then
26 |     echo "Dataset cannot be evenly split"
27 |     exit
28 |   fi
29 |   echo "Splitting to ${SPLIT_DIR} ..."
30 |   mkdir ${SPLIT_DIR}
31 |   cd ${SPLIT_DIR}
32 |   files=$( ls -dl ${DATA_DIR}/train-*|head -n $( expr $( expr ${WORKER_ID} + 1 ) * ${split_size}  ) | tail -n ${split_size} | awk '{print $9}')
33 | 
34 |   for file in ${files}; do
35 |    ln -s ${file};
36 |   done
37 | 
38 | else
39 |   echo "${SPLIT_DIR} exists."
40 | fi
41 | 
42 | 


--------------------------------------------------------------------------------
/terngrad/stop_dist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | set -x
 4 | 
 5 | . ./config_dist.sh 
 6 | 
 7 | WORKER_NUM=${#WORKER_HOSTS[@]}
 8 | DEVICE_NUM=${#WORKER_DEVICES[@]}
 9 | DATA_NUM=${#DATA_DIR[@]}
10 | if [ ${WORKER_NUM} -ne ${DEVICE_NUM}  ]
11 | then
12 |   echo "The number of workers (${WORKER_NUM}) does not match the number of devices (${DEVICE_NUM})"
13 |   exit
14 | fi
15 | if [ ${WORKER_NUM} -ne ${DATA_NUM}  ]
16 | then
17 |   echo "The number of workers (${WORKER_NUM}) does not match the number of data paths (${DATA_NUM})"
18 |   exit
19 | fi
20 | 
21 | 
22 | # stop workers
23 | for HOST in ${WORKER_HOSTS[*]}; do
24 |   worker=$(echo ${HOST} |cut -d':' -f1)
25 |   ssh ${worker} "hostname; \
26 |     cd ${WORKSPACE}; \
27 |     pwd; \
28 |     ./kill_local.sh "
29 | done
30 | 
31 | # stop ps
32 | for HOST in ${PS_HOSTS[*]}; do
33 |   ps=$(echo ${HOST} |cut -d':' -f1)
34 |   ssh ${ps} "hostname; \
35 |              cd ${WORKSPACE}; \
36 |              pwd; \
37 |              ./kill_local.sh "
38 | done
39 | 
40 | 


--------------------------------------------------------------------------------
/terngrad/test/test_ternary_encoder_decoder.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | def ternary_encoder(input_data):
 4 |   """Encoding and compressing the signs """
 5 |   a = tf.sign(input_data) # -1, 0, 1
 6 |   a = tf.add(a,1) # shift -1,0,1 to 0,1,2 (2'b00,2'b01,2'b10)
 7 |   a = tf.reshape(a,[-1])
 8 |   pad_size = 4 - tf.mod(tf.size(a), 4)
 9 |   pad = tf.range(0.0, pad_size)
10 |   a = tf.concat([a, pad], 0)
11 |   a_split1, a_split2, a_split3, a_split4 = tf.split(a,4) # assume the size is dividable by 4
12 | 
13 |   # encode 4 grads into 1 Byte
14 |   sum_1 = tf.add(a_split1, a_split2*4)
15 |   sum_2 = tf.add(a_split3*16, a_split4*64)
16 |   sum_all = tf.add(sum_1, sum_2)
17 |   encoded = tf.cast(sum_all, tf.uint8)
18 |   return encoded
19 | 
20 | def ternary_decoder(encoded_data, scaler, shape):
21 |   """Decoding the signs to float format """
22 |   a = tf.cast(encoded_data, tf.int32)
23 |   a_split1 = tf.mod(a,4)
24 |   a_split2 = tf.to_int32(tf.mod(a/4,4))
25 |   a_split3 = tf.to_int32(tf.mod(a/16,4))
26 |   a_split4 = tf.to_int32(tf.mod(a/64,4))
27 |   a = tf.concat([a_split1, a_split2, a_split3, a_split4], 0)
28 |   real_size = tf.reduce_prod(shape)
29 |   a = tf.to_float(a)
30 |   a = tf.gather(a, tf.range(0,real_size))
31 |   a = tf.reshape(a, shape)
32 |   a = tf.subtract(a, 1)
33 |   decoded = a*scaler
34 |   return decoded
35 | 
36 | shape=[33, 33, 33, 333]
37 | scaler=0.002
38 | with tf.device('/gpu:1'):
39 |   # binary gradient generator
40 |   gradient = tf.random_normal(shape, stddev=0.001, name='a')
41 |   zeros = tf.zeros(shape)
42 |   abs_gradient = tf.abs(gradient)
43 |   sign_gradient = tf.sign( gradient )
44 |   rnd_sample = tf.random_uniform(shape,0,scaler)
45 |   where_cond = tf.less(rnd_sample, abs_gradient)
46 |   bin_gradient = tf.where(where_cond, sign_gradient * scaler, zeros)
47 | 
48 |   # encoder:  -1 0 1
49 |   encoded_a = ternary_encoder(bin_gradient)
50 | 
51 | with tf.device('/gpu:0'):
52 |   # decoder
53 |   decoded_a = ternary_decoder(encoded_a, scaler, shape)
54 |  
55 |   err = tf.reduce_sum( tf.squared_difference(bin_gradient, decoded_a)  )
56 |    
57 | config = tf.ConfigProto()
58 | config.gpu_options.allow_growth = True
59 | config.log_device_placement = True
60 | #config.allow_soft_placement = True
61 | with tf.Session(config=config) as sess:
62 |   for i in range(2000):
63 |     res = sess.run(err)
64 |     print i, res
65 | 


--------------------------------------------------------------------------------